1# vfs.py - Mercurial 'vfs' classes 2# 3# Copyright Olivia Mackall <olivia@selenic.com> 4# 5# This software may be used and distributed according to the terms of the 6# GNU General Public License version 2 or any later version. 7from __future__ import absolute_import 8 9import contextlib 10import errno 11import os 12import shutil 13import stat 14import threading 15 16from .i18n import _ 17from .pycompat import ( 18 delattr, 19 getattr, 20 setattr, 21) 22from . import ( 23 encoding, 24 error, 25 pathutil, 26 pycompat, 27 util, 28) 29 30 31def _avoidambig(path, oldstat): 32 """Avoid file stat ambiguity forcibly 33 34 This function causes copying ``path`` file, if it is owned by 35 another (see issue5418 and issue5584 for detail). 36 """ 37 38 def checkandavoid(): 39 newstat = util.filestat.frompath(path) 40 # return whether file stat ambiguity is (already) avoided 41 return not newstat.isambig(oldstat) or newstat.avoidambig(path, oldstat) 42 43 if not checkandavoid(): 44 # simply copy to change owner of path to get privilege to 45 # advance mtime (see issue5418) 46 util.rename(util.mktempcopy(path), path) 47 checkandavoid() 48 49 50class abstractvfs(object): 51 """Abstract base class; cannot be instantiated""" 52 53 # default directory separator for vfs 54 # 55 # Other vfs code always use `/` and this works fine because python file API 56 # abstract the use of `/` and make it work transparently. For consistency 57 # vfs will always use `/` when joining. This avoid some confusion in 58 # encoded vfs (see issue6546) 59 _dir_sep = b'/' 60 61 def __init__(self, *args, **kwargs): 62 '''Prevent instantiation; don't call this from subclasses.''' 63 raise NotImplementedError('attempted instantiating ' + str(type(self))) 64 65 def __call__(self, path, mode=b'rb', **kwargs): 66 raise NotImplementedError 67 68 def _auditpath(self, path, mode): 69 raise NotImplementedError 70 71 def join(self, path, *insidef): 72 raise NotImplementedError 73 74 def tryread(self, path): 75 '''gracefully return an empty string for missing files''' 76 try: 77 return self.read(path) 78 except IOError as inst: 79 if inst.errno != errno.ENOENT: 80 raise 81 return b"" 82 83 def tryreadlines(self, path, mode=b'rb'): 84 '''gracefully return an empty array for missing files''' 85 try: 86 return self.readlines(path, mode=mode) 87 except IOError as inst: 88 if inst.errno != errno.ENOENT: 89 raise 90 return [] 91 92 @util.propertycache 93 def open(self): 94 """Open ``path`` file, which is relative to vfs root. 95 96 Newly created directories are marked as "not to be indexed by 97 the content indexing service", if ``notindexed`` is specified 98 for "write" mode access. 99 """ 100 return self.__call__ 101 102 def read(self, path): 103 with self(path, b'rb') as fp: 104 return fp.read() 105 106 def readlines(self, path, mode=b'rb'): 107 with self(path, mode=mode) as fp: 108 return fp.readlines() 109 110 def write(self, path, data, backgroundclose=False, **kwargs): 111 with self(path, b'wb', backgroundclose=backgroundclose, **kwargs) as fp: 112 return fp.write(data) 113 114 def writelines(self, path, data, mode=b'wb', notindexed=False): 115 with self(path, mode=mode, notindexed=notindexed) as fp: 116 return fp.writelines(data) 117 118 def append(self, path, data): 119 with self(path, b'ab') as fp: 120 return fp.write(data) 121 122 def basename(self, path): 123 """return base element of a path (as os.path.basename would do) 124 125 This exists to allow handling of strange encoding if needed.""" 126 return os.path.basename(path) 127 128 def chmod(self, path, mode): 129 return os.chmod(self.join(path), mode) 130 131 def dirname(self, path): 132 """return dirname element of a path (as os.path.dirname would do) 133 134 This exists to allow handling of strange encoding if needed.""" 135 return os.path.dirname(path) 136 137 def exists(self, path=None): 138 return os.path.exists(self.join(path)) 139 140 def fstat(self, fp): 141 return util.fstat(fp) 142 143 def isdir(self, path=None): 144 return os.path.isdir(self.join(path)) 145 146 def isfile(self, path=None): 147 return os.path.isfile(self.join(path)) 148 149 def islink(self, path=None): 150 return os.path.islink(self.join(path)) 151 152 def isfileorlink(self, path=None): 153 """return whether path is a regular file or a symlink 154 155 Unlike isfile, this doesn't follow symlinks.""" 156 try: 157 st = self.lstat(path) 158 except OSError: 159 return False 160 mode = st.st_mode 161 return stat.S_ISREG(mode) or stat.S_ISLNK(mode) 162 163 def _join(self, *paths): 164 root_idx = 0 165 for idx, p in enumerate(paths): 166 if os.path.isabs(p) or p.startswith(self._dir_sep): 167 root_idx = idx 168 if root_idx != 0: 169 paths = paths[root_idx:] 170 paths = [p for p in paths if p] 171 return self._dir_sep.join(paths) 172 173 def reljoin(self, *paths): 174 """join various elements of a path together (as os.path.join would do) 175 176 The vfs base is not injected so that path stay relative. This exists 177 to allow handling of strange encoding if needed.""" 178 return self._join(*paths) 179 180 def split(self, path): 181 """split top-most element of a path (as os.path.split would do) 182 183 This exists to allow handling of strange encoding if needed.""" 184 return os.path.split(path) 185 186 def lexists(self, path=None): 187 return os.path.lexists(self.join(path)) 188 189 def lstat(self, path=None): 190 return os.lstat(self.join(path)) 191 192 def listdir(self, path=None): 193 return os.listdir(self.join(path)) 194 195 def makedir(self, path=None, notindexed=True): 196 return util.makedir(self.join(path), notindexed) 197 198 def makedirs(self, path=None, mode=None): 199 return util.makedirs(self.join(path), mode) 200 201 def makelock(self, info, path): 202 return util.makelock(info, self.join(path)) 203 204 def mkdir(self, path=None): 205 return os.mkdir(self.join(path)) 206 207 def mkstemp(self, suffix=b'', prefix=b'tmp', dir=None): 208 fd, name = pycompat.mkstemp( 209 suffix=suffix, prefix=prefix, dir=self.join(dir) 210 ) 211 dname, fname = util.split(name) 212 if dir: 213 return fd, os.path.join(dir, fname) 214 else: 215 return fd, fname 216 217 def readdir(self, path=None, stat=None, skip=None): 218 return util.listdir(self.join(path), stat, skip) 219 220 def readlock(self, path): 221 return util.readlock(self.join(path)) 222 223 def rename(self, src, dst, checkambig=False): 224 """Rename from src to dst 225 226 checkambig argument is used with util.filestat, and is useful 227 only if destination file is guarded by any lock 228 (e.g. repo.lock or repo.wlock). 229 230 To avoid file stat ambiguity forcibly, checkambig=True involves 231 copying ``src`` file, if it is owned by another. Therefore, use 232 checkambig=True only in limited cases (see also issue5418 and 233 issue5584 for detail). 234 """ 235 self._auditpath(dst, b'w') 236 srcpath = self.join(src) 237 dstpath = self.join(dst) 238 oldstat = checkambig and util.filestat.frompath(dstpath) 239 if oldstat and oldstat.stat: 240 ret = util.rename(srcpath, dstpath) 241 _avoidambig(dstpath, oldstat) 242 return ret 243 return util.rename(srcpath, dstpath) 244 245 def readlink(self, path): 246 return util.readlink(self.join(path)) 247 248 def removedirs(self, path=None): 249 """Remove a leaf directory and all empty intermediate ones""" 250 return util.removedirs(self.join(path)) 251 252 def rmdir(self, path=None): 253 """Remove an empty directory.""" 254 return os.rmdir(self.join(path)) 255 256 def rmtree(self, path=None, ignore_errors=False, forcibly=False): 257 """Remove a directory tree recursively 258 259 If ``forcibly``, this tries to remove READ-ONLY files, too. 260 """ 261 if forcibly: 262 263 def onerror(function, path, excinfo): 264 if function is not os.remove: 265 raise 266 # read-only files cannot be unlinked under Windows 267 s = os.stat(path) 268 if (s.st_mode & stat.S_IWRITE) != 0: 269 raise 270 os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE) 271 os.remove(path) 272 273 else: 274 onerror = None 275 return shutil.rmtree( 276 self.join(path), ignore_errors=ignore_errors, onerror=onerror 277 ) 278 279 def setflags(self, path, l, x): 280 return util.setflags(self.join(path), l, x) 281 282 def stat(self, path=None): 283 return os.stat(self.join(path)) 284 285 def unlink(self, path=None): 286 return util.unlink(self.join(path)) 287 288 def tryunlink(self, path=None): 289 """Attempt to remove a file, ignoring missing file errors.""" 290 util.tryunlink(self.join(path)) 291 292 def unlinkpath(self, path=None, ignoremissing=False, rmdir=True): 293 return util.unlinkpath( 294 self.join(path), ignoremissing=ignoremissing, rmdir=rmdir 295 ) 296 297 def utime(self, path=None, t=None): 298 return os.utime(self.join(path), t) 299 300 def walk(self, path=None, onerror=None): 301 """Yield (dirpath, dirs, files) tuple for each directories under path 302 303 ``dirpath`` is relative one from the root of this vfs. This 304 uses ``os.sep`` as path separator, even you specify POSIX 305 style ``path``. 306 307 "The root of this vfs" is represented as empty ``dirpath``. 308 """ 309 root = os.path.normpath(self.join(None)) 310 # when dirpath == root, dirpath[prefixlen:] becomes empty 311 # because len(dirpath) < prefixlen. 312 prefixlen = len(pathutil.normasprefix(root)) 313 for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror): 314 yield (dirpath[prefixlen:], dirs, files) 315 316 @contextlib.contextmanager 317 def backgroundclosing(self, ui, expectedcount=-1): 318 """Allow files to be closed asynchronously. 319 320 When this context manager is active, ``backgroundclose`` can be passed 321 to ``__call__``/``open`` to result in the file possibly being closed 322 asynchronously, on a background thread. 323 """ 324 # Sharing backgroundfilecloser between threads is complex and using 325 # multiple instances puts us at risk of running out of file descriptors 326 # only allow to use backgroundfilecloser when in main thread. 327 if not isinstance( 328 threading.current_thread(), 329 threading._MainThread, # pytype: disable=module-attr 330 ): 331 yield 332 return 333 vfs = getattr(self, 'vfs', self) 334 if getattr(vfs, '_backgroundfilecloser', None): 335 raise error.Abort( 336 _(b'can only have 1 active background file closer') 337 ) 338 339 with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc: 340 try: 341 vfs._backgroundfilecloser = ( 342 bfc # pytype: disable=attribute-error 343 ) 344 yield bfc 345 finally: 346 vfs._backgroundfilecloser = ( 347 None # pytype: disable=attribute-error 348 ) 349 350 def register_file(self, path): 351 """generic hook point to lets fncache steer its stew""" 352 353 354class vfs(abstractvfs): 355 """Operate files relative to a base directory 356 357 This class is used to hide the details of COW semantics and 358 remote file access from higher level code. 359 360 'cacheaudited' should be enabled only if (a) vfs object is short-lived, or 361 (b) the base directory is managed by hg and considered sort-of append-only. 362 See pathutil.pathauditor() for details. 363 """ 364 365 def __init__( 366 self, 367 base, 368 audit=True, 369 cacheaudited=False, 370 expandpath=False, 371 realpath=False, 372 ): 373 if expandpath: 374 base = util.expandpath(base) 375 if realpath: 376 base = os.path.realpath(base) 377 self.base = base 378 self._audit = audit 379 if audit: 380 self.audit = pathutil.pathauditor(self.base, cached=cacheaudited) 381 else: 382 self.audit = lambda path, mode=None: True 383 self.createmode = None 384 self._trustnlink = None 385 self.options = {} 386 387 @util.propertycache 388 def _cansymlink(self): 389 return util.checklink(self.base) 390 391 @util.propertycache 392 def _chmod(self): 393 return util.checkexec(self.base) 394 395 def _fixfilemode(self, name): 396 if self.createmode is None or not self._chmod: 397 return 398 os.chmod(name, self.createmode & 0o666) 399 400 def _auditpath(self, path, mode): 401 if self._audit: 402 if os.path.isabs(path) and path.startswith(self.base): 403 path = os.path.relpath(path, self.base) 404 r = util.checkosfilename(path) 405 if r: 406 raise error.Abort(b"%s: %r" % (r, path)) 407 self.audit(path, mode=mode) 408 409 def __call__( 410 self, 411 path, 412 mode=b"r", 413 atomictemp=False, 414 notindexed=False, 415 backgroundclose=False, 416 checkambig=False, 417 auditpath=True, 418 makeparentdirs=True, 419 ): 420 """Open ``path`` file, which is relative to vfs root. 421 422 By default, parent directories are created as needed. Newly created 423 directories are marked as "not to be indexed by the content indexing 424 service", if ``notindexed`` is specified for "write" mode access. 425 Set ``makeparentdirs=False`` to not create directories implicitly. 426 427 If ``backgroundclose`` is passed, the file may be closed asynchronously. 428 It can only be used if the ``self.backgroundclosing()`` context manager 429 is active. This should only be specified if the following criteria hold: 430 431 1. There is a potential for writing thousands of files. Unless you 432 are writing thousands of files, the performance benefits of 433 asynchronously closing files is not realized. 434 2. Files are opened exactly once for the ``backgroundclosing`` 435 active duration and are therefore free of race conditions between 436 closing a file on a background thread and reopening it. (If the 437 file were opened multiple times, there could be unflushed data 438 because the original file handle hasn't been flushed/closed yet.) 439 440 ``checkambig`` argument is passed to atomictempfile (valid 441 only for writing), and is useful only if target file is 442 guarded by any lock (e.g. repo.lock or repo.wlock). 443 444 To avoid file stat ambiguity forcibly, checkambig=True involves 445 copying ``path`` file opened in "append" mode (e.g. for 446 truncation), if it is owned by another. Therefore, use 447 combination of append mode and checkambig=True only in limited 448 cases (see also issue5418 and issue5584 for detail). 449 """ 450 if auditpath: 451 self._auditpath(path, mode) 452 f = self.join(path) 453 454 if b"b" not in mode: 455 mode += b"b" # for that other OS 456 457 nlink = -1 458 if mode not in (b'r', b'rb'): 459 dirname, basename = util.split(f) 460 # If basename is empty, then the path is malformed because it points 461 # to a directory. Let the posixfile() call below raise IOError. 462 if basename: 463 if atomictemp: 464 if makeparentdirs: 465 util.makedirs(dirname, self.createmode, notindexed) 466 return util.atomictempfile( 467 f, mode, self.createmode, checkambig=checkambig 468 ) 469 try: 470 if b'w' in mode: 471 util.unlink(f) 472 nlink = 0 473 else: 474 # nlinks() may behave differently for files on Windows 475 # shares if the file is open. 476 with util.posixfile(f): 477 nlink = util.nlinks(f) 478 if nlink < 1: 479 nlink = 2 # force mktempcopy (issue1922) 480 except (OSError, IOError) as e: 481 if e.errno != errno.ENOENT: 482 raise 483 nlink = 0 484 if makeparentdirs: 485 util.makedirs(dirname, self.createmode, notindexed) 486 if nlink > 0: 487 if self._trustnlink is None: 488 self._trustnlink = nlink > 1 or util.checknlink(f) 489 if nlink > 1 or not self._trustnlink: 490 util.rename(util.mktempcopy(f), f) 491 fp = util.posixfile(f, mode) 492 if nlink == 0: 493 self._fixfilemode(f) 494 495 if checkambig: 496 if mode in (b'r', b'rb'): 497 raise error.Abort( 498 _( 499 b'implementation error: mode %s is not' 500 b' valid for checkambig=True' 501 ) 502 % mode 503 ) 504 fp = checkambigatclosing(fp) 505 506 if backgroundclose and isinstance( 507 threading.current_thread(), 508 threading._MainThread, # pytype: disable=module-attr 509 ): 510 if ( 511 not self._backgroundfilecloser # pytype: disable=attribute-error 512 ): 513 raise error.Abort( 514 _( 515 b'backgroundclose can only be used when a ' 516 b'backgroundclosing context manager is active' 517 ) 518 ) 519 520 fp = delayclosedfile( 521 fp, 522 self._backgroundfilecloser, # pytype: disable=attribute-error 523 ) 524 525 return fp 526 527 def symlink(self, src, dst): 528 self.audit(dst) 529 linkname = self.join(dst) 530 util.tryunlink(linkname) 531 532 util.makedirs(os.path.dirname(linkname), self.createmode) 533 534 if self._cansymlink: 535 try: 536 os.symlink(src, linkname) 537 except OSError as err: 538 raise OSError( 539 err.errno, 540 _(b'could not symlink to %r: %s') 541 % (src, encoding.strtolocal(err.strerror)), 542 linkname, 543 ) 544 else: 545 self.write(dst, src) 546 547 def join(self, path, *insidef): 548 if path: 549 parts = [self.base, path] 550 parts.extend(insidef) 551 return self._join(*parts) 552 else: 553 return self.base 554 555 556opener = vfs 557 558 559class proxyvfs(abstractvfs): 560 def __init__(self, vfs): 561 self.vfs = vfs 562 563 def _auditpath(self, path, mode): 564 return self.vfs._auditpath(path, mode) 565 566 @property 567 def options(self): 568 return self.vfs.options 569 570 @options.setter 571 def options(self, value): 572 self.vfs.options = value 573 574 575class filtervfs(proxyvfs, abstractvfs): 576 '''Wrapper vfs for filtering filenames with a function.''' 577 578 def __init__(self, vfs, filter): 579 proxyvfs.__init__(self, vfs) 580 self._filter = filter 581 582 def __call__(self, path, *args, **kwargs): 583 return self.vfs(self._filter(path), *args, **kwargs) 584 585 def join(self, path, *insidef): 586 if path: 587 return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef))) 588 else: 589 return self.vfs.join(path) 590 591 592filteropener = filtervfs 593 594 595class readonlyvfs(proxyvfs): 596 '''Wrapper vfs preventing any writing.''' 597 598 def __init__(self, vfs): 599 proxyvfs.__init__(self, vfs) 600 601 def __call__(self, path, mode=b'r', *args, **kw): 602 if mode not in (b'r', b'rb'): 603 raise error.Abort(_(b'this vfs is read only')) 604 return self.vfs(path, mode, *args, **kw) 605 606 def join(self, path, *insidef): 607 return self.vfs.join(path, *insidef) 608 609 610class closewrapbase(object): 611 """Base class of wrapper, which hooks closing 612 613 Do not instantiate outside of the vfs layer. 614 """ 615 616 def __init__(self, fh): 617 object.__setattr__(self, '_origfh', fh) 618 619 def __getattr__(self, attr): 620 return getattr(self._origfh, attr) 621 622 def __setattr__(self, attr, value): 623 return setattr(self._origfh, attr, value) 624 625 def __delattr__(self, attr): 626 return delattr(self._origfh, attr) 627 628 def __enter__(self): 629 self._origfh.__enter__() 630 return self 631 632 def __exit__(self, exc_type, exc_value, exc_tb): 633 raise NotImplementedError('attempted instantiating ' + str(type(self))) 634 635 def close(self): 636 raise NotImplementedError('attempted instantiating ' + str(type(self))) 637 638 639class delayclosedfile(closewrapbase): 640 """Proxy for a file object whose close is delayed. 641 642 Do not instantiate outside of the vfs layer. 643 """ 644 645 def __init__(self, fh, closer): 646 super(delayclosedfile, self).__init__(fh) 647 object.__setattr__(self, '_closer', closer) 648 649 def __exit__(self, exc_type, exc_value, exc_tb): 650 self._closer.close(self._origfh) 651 652 def close(self): 653 self._closer.close(self._origfh) 654 655 656class backgroundfilecloser(object): 657 """Coordinates background closing of file handles on multiple threads.""" 658 659 def __init__(self, ui, expectedcount=-1): 660 self._running = False 661 self._entered = False 662 self._threads = [] 663 self._threadexception = None 664 665 # Only Windows/NTFS has slow file closing. So only enable by default 666 # on that platform. But allow to be enabled elsewhere for testing. 667 defaultenabled = pycompat.iswindows 668 enabled = ui.configbool(b'worker', b'backgroundclose', defaultenabled) 669 670 if not enabled: 671 return 672 673 # There is overhead to starting and stopping the background threads. 674 # Don't do background processing unless the file count is large enough 675 # to justify it. 676 minfilecount = ui.configint(b'worker', b'backgroundcloseminfilecount') 677 # FUTURE dynamically start background threads after minfilecount closes. 678 # (We don't currently have any callers that don't know their file count) 679 if expectedcount > 0 and expectedcount < minfilecount: 680 return 681 682 maxqueue = ui.configint(b'worker', b'backgroundclosemaxqueue') 683 threadcount = ui.configint(b'worker', b'backgroundclosethreadcount') 684 685 ui.debug( 686 b'starting %d threads for background file closing\n' % threadcount 687 ) 688 689 self._queue = pycompat.queue.Queue(maxsize=maxqueue) 690 self._running = True 691 692 for i in range(threadcount): 693 t = threading.Thread(target=self._worker, name='backgroundcloser') 694 self._threads.append(t) 695 t.start() 696 697 def __enter__(self): 698 self._entered = True 699 return self 700 701 def __exit__(self, exc_type, exc_value, exc_tb): 702 self._running = False 703 704 # Wait for threads to finish closing so open files don't linger for 705 # longer than lifetime of context manager. 706 for t in self._threads: 707 t.join() 708 709 def _worker(self): 710 """Main routine for worker thread.""" 711 while True: 712 try: 713 fh = self._queue.get(block=True, timeout=0.100) 714 # Need to catch or the thread will terminate and 715 # we could orphan file descriptors. 716 try: 717 fh.close() 718 except Exception as e: 719 # Stash so can re-raise from main thread later. 720 self._threadexception = e 721 except pycompat.queue.Empty: 722 if not self._running: 723 break 724 725 def close(self, fh): 726 """Schedule a file for closing.""" 727 if not self._entered: 728 raise error.Abort( 729 _(b'can only call close() when context manager active') 730 ) 731 732 # If a background thread encountered an exception, raise now so we fail 733 # fast. Otherwise we may potentially go on for minutes until the error 734 # is acted on. 735 if self._threadexception: 736 e = self._threadexception 737 self._threadexception = None 738 raise e 739 740 # If we're not actively running, close synchronously. 741 if not self._running: 742 fh.close() 743 return 744 745 self._queue.put(fh, block=True, timeout=None) 746 747 748class checkambigatclosing(closewrapbase): 749 """Proxy for a file object, to avoid ambiguity of file stat 750 751 See also util.filestat for detail about "ambiguity of file stat". 752 753 This proxy is useful only if the target file is guarded by any 754 lock (e.g. repo.lock or repo.wlock) 755 756 Do not instantiate outside of the vfs layer. 757 """ 758 759 def __init__(self, fh): 760 super(checkambigatclosing, self).__init__(fh) 761 object.__setattr__(self, '_oldstat', util.filestat.frompath(fh.name)) 762 763 def _checkambig(self): 764 oldstat = self._oldstat 765 if oldstat.stat: 766 _avoidambig(self._origfh.name, oldstat) 767 768 def __exit__(self, exc_type, exc_value, exc_tb): 769 self._origfh.__exit__(exc_type, exc_value, exc_tb) 770 self._checkambig() 771 772 def close(self): 773 self._origfh.close() 774 self._checkambig() 775