1# vfs.py - Mercurial 'vfs' classes
2#
3#  Copyright Olivia Mackall <olivia@selenic.com>
4#
5# This software may be used and distributed according to the terms of the
6# GNU General Public License version 2 or any later version.
7from __future__ import absolute_import
8
9import contextlib
10import errno
11import os
12import shutil
13import stat
14import threading
15
16from .i18n import _
17from .pycompat import (
18    delattr,
19    getattr,
20    setattr,
21)
22from . import (
23    encoding,
24    error,
25    pathutil,
26    pycompat,
27    util,
28)
29
30
31def _avoidambig(path, oldstat):
32    """Avoid file stat ambiguity forcibly
33
34    This function causes copying ``path`` file, if it is owned by
35    another (see issue5418 and issue5584 for detail).
36    """
37
38    def checkandavoid():
39        newstat = util.filestat.frompath(path)
40        # return whether file stat ambiguity is (already) avoided
41        return not newstat.isambig(oldstat) or newstat.avoidambig(path, oldstat)
42
43    if not checkandavoid():
44        # simply copy to change owner of path to get privilege to
45        # advance mtime (see issue5418)
46        util.rename(util.mktempcopy(path), path)
47        checkandavoid()
48
49
50class abstractvfs(object):
51    """Abstract base class; cannot be instantiated"""
52
53    # default directory separator for vfs
54    #
55    # Other vfs code always use `/` and this works fine because python file API
56    # abstract the use of `/` and make it work transparently. For consistency
57    # vfs will always use `/` when joining. This avoid some confusion in
58    # encoded vfs (see issue6546)
59    _dir_sep = b'/'
60
61    def __init__(self, *args, **kwargs):
62        '''Prevent instantiation; don't call this from subclasses.'''
63        raise NotImplementedError('attempted instantiating ' + str(type(self)))
64
65    def __call__(self, path, mode=b'rb', **kwargs):
66        raise NotImplementedError
67
68    def _auditpath(self, path, mode):
69        raise NotImplementedError
70
71    def join(self, path, *insidef):
72        raise NotImplementedError
73
74    def tryread(self, path):
75        '''gracefully return an empty string for missing files'''
76        try:
77            return self.read(path)
78        except IOError as inst:
79            if inst.errno != errno.ENOENT:
80                raise
81        return b""
82
83    def tryreadlines(self, path, mode=b'rb'):
84        '''gracefully return an empty array for missing files'''
85        try:
86            return self.readlines(path, mode=mode)
87        except IOError as inst:
88            if inst.errno != errno.ENOENT:
89                raise
90        return []
91
92    @util.propertycache
93    def open(self):
94        """Open ``path`` file, which is relative to vfs root.
95
96        Newly created directories are marked as "not to be indexed by
97        the content indexing service", if ``notindexed`` is specified
98        for "write" mode access.
99        """
100        return self.__call__
101
102    def read(self, path):
103        with self(path, b'rb') as fp:
104            return fp.read()
105
106    def readlines(self, path, mode=b'rb'):
107        with self(path, mode=mode) as fp:
108            return fp.readlines()
109
110    def write(self, path, data, backgroundclose=False, **kwargs):
111        with self(path, b'wb', backgroundclose=backgroundclose, **kwargs) as fp:
112            return fp.write(data)
113
114    def writelines(self, path, data, mode=b'wb', notindexed=False):
115        with self(path, mode=mode, notindexed=notindexed) as fp:
116            return fp.writelines(data)
117
118    def append(self, path, data):
119        with self(path, b'ab') as fp:
120            return fp.write(data)
121
122    def basename(self, path):
123        """return base element of a path (as os.path.basename would do)
124
125        This exists to allow handling of strange encoding if needed."""
126        return os.path.basename(path)
127
128    def chmod(self, path, mode):
129        return os.chmod(self.join(path), mode)
130
131    def dirname(self, path):
132        """return dirname element of a path (as os.path.dirname would do)
133
134        This exists to allow handling of strange encoding if needed."""
135        return os.path.dirname(path)
136
137    def exists(self, path=None):
138        return os.path.exists(self.join(path))
139
140    def fstat(self, fp):
141        return util.fstat(fp)
142
143    def isdir(self, path=None):
144        return os.path.isdir(self.join(path))
145
146    def isfile(self, path=None):
147        return os.path.isfile(self.join(path))
148
149    def islink(self, path=None):
150        return os.path.islink(self.join(path))
151
152    def isfileorlink(self, path=None):
153        """return whether path is a regular file or a symlink
154
155        Unlike isfile, this doesn't follow symlinks."""
156        try:
157            st = self.lstat(path)
158        except OSError:
159            return False
160        mode = st.st_mode
161        return stat.S_ISREG(mode) or stat.S_ISLNK(mode)
162
163    def _join(self, *paths):
164        root_idx = 0
165        for idx, p in enumerate(paths):
166            if os.path.isabs(p) or p.startswith(self._dir_sep):
167                root_idx = idx
168        if root_idx != 0:
169            paths = paths[root_idx:]
170        paths = [p for p in paths if p]
171        return self._dir_sep.join(paths)
172
173    def reljoin(self, *paths):
174        """join various elements of a path together (as os.path.join would do)
175
176        The vfs base is not injected so that path stay relative. This exists
177        to allow handling of strange encoding if needed."""
178        return self._join(*paths)
179
180    def split(self, path):
181        """split top-most element of a path (as os.path.split would do)
182
183        This exists to allow handling of strange encoding if needed."""
184        return os.path.split(path)
185
186    def lexists(self, path=None):
187        return os.path.lexists(self.join(path))
188
189    def lstat(self, path=None):
190        return os.lstat(self.join(path))
191
192    def listdir(self, path=None):
193        return os.listdir(self.join(path))
194
195    def makedir(self, path=None, notindexed=True):
196        return util.makedir(self.join(path), notindexed)
197
198    def makedirs(self, path=None, mode=None):
199        return util.makedirs(self.join(path), mode)
200
201    def makelock(self, info, path):
202        return util.makelock(info, self.join(path))
203
204    def mkdir(self, path=None):
205        return os.mkdir(self.join(path))
206
207    def mkstemp(self, suffix=b'', prefix=b'tmp', dir=None):
208        fd, name = pycompat.mkstemp(
209            suffix=suffix, prefix=prefix, dir=self.join(dir)
210        )
211        dname, fname = util.split(name)
212        if dir:
213            return fd, os.path.join(dir, fname)
214        else:
215            return fd, fname
216
217    def readdir(self, path=None, stat=None, skip=None):
218        return util.listdir(self.join(path), stat, skip)
219
220    def readlock(self, path):
221        return util.readlock(self.join(path))
222
223    def rename(self, src, dst, checkambig=False):
224        """Rename from src to dst
225
226        checkambig argument is used with util.filestat, and is useful
227        only if destination file is guarded by any lock
228        (e.g. repo.lock or repo.wlock).
229
230        To avoid file stat ambiguity forcibly, checkambig=True involves
231        copying ``src`` file, if it is owned by another. Therefore, use
232        checkambig=True only in limited cases (see also issue5418 and
233        issue5584 for detail).
234        """
235        self._auditpath(dst, b'w')
236        srcpath = self.join(src)
237        dstpath = self.join(dst)
238        oldstat = checkambig and util.filestat.frompath(dstpath)
239        if oldstat and oldstat.stat:
240            ret = util.rename(srcpath, dstpath)
241            _avoidambig(dstpath, oldstat)
242            return ret
243        return util.rename(srcpath, dstpath)
244
245    def readlink(self, path):
246        return util.readlink(self.join(path))
247
248    def removedirs(self, path=None):
249        """Remove a leaf directory and all empty intermediate ones"""
250        return util.removedirs(self.join(path))
251
252    def rmdir(self, path=None):
253        """Remove an empty directory."""
254        return os.rmdir(self.join(path))
255
256    def rmtree(self, path=None, ignore_errors=False, forcibly=False):
257        """Remove a directory tree recursively
258
259        If ``forcibly``, this tries to remove READ-ONLY files, too.
260        """
261        if forcibly:
262
263            def onerror(function, path, excinfo):
264                if function is not os.remove:
265                    raise
266                # read-only files cannot be unlinked under Windows
267                s = os.stat(path)
268                if (s.st_mode & stat.S_IWRITE) != 0:
269                    raise
270                os.chmod(path, stat.S_IMODE(s.st_mode) | stat.S_IWRITE)
271                os.remove(path)
272
273        else:
274            onerror = None
275        return shutil.rmtree(
276            self.join(path), ignore_errors=ignore_errors, onerror=onerror
277        )
278
279    def setflags(self, path, l, x):
280        return util.setflags(self.join(path), l, x)
281
282    def stat(self, path=None):
283        return os.stat(self.join(path))
284
285    def unlink(self, path=None):
286        return util.unlink(self.join(path))
287
288    def tryunlink(self, path=None):
289        """Attempt to remove a file, ignoring missing file errors."""
290        util.tryunlink(self.join(path))
291
292    def unlinkpath(self, path=None, ignoremissing=False, rmdir=True):
293        return util.unlinkpath(
294            self.join(path), ignoremissing=ignoremissing, rmdir=rmdir
295        )
296
297    def utime(self, path=None, t=None):
298        return os.utime(self.join(path), t)
299
300    def walk(self, path=None, onerror=None):
301        """Yield (dirpath, dirs, files) tuple for each directories under path
302
303        ``dirpath`` is relative one from the root of this vfs. This
304        uses ``os.sep`` as path separator, even you specify POSIX
305        style ``path``.
306
307        "The root of this vfs" is represented as empty ``dirpath``.
308        """
309        root = os.path.normpath(self.join(None))
310        # when dirpath == root, dirpath[prefixlen:] becomes empty
311        # because len(dirpath) < prefixlen.
312        prefixlen = len(pathutil.normasprefix(root))
313        for dirpath, dirs, files in os.walk(self.join(path), onerror=onerror):
314            yield (dirpath[prefixlen:], dirs, files)
315
316    @contextlib.contextmanager
317    def backgroundclosing(self, ui, expectedcount=-1):
318        """Allow files to be closed asynchronously.
319
320        When this context manager is active, ``backgroundclose`` can be passed
321        to ``__call__``/``open`` to result in the file possibly being closed
322        asynchronously, on a background thread.
323        """
324        # Sharing backgroundfilecloser between threads is complex and using
325        # multiple instances puts us at risk of running out of file descriptors
326        # only allow to use backgroundfilecloser when in main thread.
327        if not isinstance(
328            threading.current_thread(),
329            threading._MainThread,  # pytype: disable=module-attr
330        ):
331            yield
332            return
333        vfs = getattr(self, 'vfs', self)
334        if getattr(vfs, '_backgroundfilecloser', None):
335            raise error.Abort(
336                _(b'can only have 1 active background file closer')
337            )
338
339        with backgroundfilecloser(ui, expectedcount=expectedcount) as bfc:
340            try:
341                vfs._backgroundfilecloser = (
342                    bfc  # pytype: disable=attribute-error
343                )
344                yield bfc
345            finally:
346                vfs._backgroundfilecloser = (
347                    None  # pytype: disable=attribute-error
348                )
349
350    def register_file(self, path):
351        """generic hook point to lets fncache steer its stew"""
352
353
354class vfs(abstractvfs):
355    """Operate files relative to a base directory
356
357    This class is used to hide the details of COW semantics and
358    remote file access from higher level code.
359
360    'cacheaudited' should be enabled only if (a) vfs object is short-lived, or
361    (b) the base directory is managed by hg and considered sort-of append-only.
362    See pathutil.pathauditor() for details.
363    """
364
365    def __init__(
366        self,
367        base,
368        audit=True,
369        cacheaudited=False,
370        expandpath=False,
371        realpath=False,
372    ):
373        if expandpath:
374            base = util.expandpath(base)
375        if realpath:
376            base = os.path.realpath(base)
377        self.base = base
378        self._audit = audit
379        if audit:
380            self.audit = pathutil.pathauditor(self.base, cached=cacheaudited)
381        else:
382            self.audit = lambda path, mode=None: True
383        self.createmode = None
384        self._trustnlink = None
385        self.options = {}
386
387    @util.propertycache
388    def _cansymlink(self):
389        return util.checklink(self.base)
390
391    @util.propertycache
392    def _chmod(self):
393        return util.checkexec(self.base)
394
395    def _fixfilemode(self, name):
396        if self.createmode is None or not self._chmod:
397            return
398        os.chmod(name, self.createmode & 0o666)
399
400    def _auditpath(self, path, mode):
401        if self._audit:
402            if os.path.isabs(path) and path.startswith(self.base):
403                path = os.path.relpath(path, self.base)
404            r = util.checkosfilename(path)
405            if r:
406                raise error.Abort(b"%s: %r" % (r, path))
407            self.audit(path, mode=mode)
408
409    def __call__(
410        self,
411        path,
412        mode=b"r",
413        atomictemp=False,
414        notindexed=False,
415        backgroundclose=False,
416        checkambig=False,
417        auditpath=True,
418        makeparentdirs=True,
419    ):
420        """Open ``path`` file, which is relative to vfs root.
421
422        By default, parent directories are created as needed. Newly created
423        directories are marked as "not to be indexed by the content indexing
424        service", if ``notindexed`` is specified for "write" mode access.
425        Set ``makeparentdirs=False`` to not create directories implicitly.
426
427        If ``backgroundclose`` is passed, the file may be closed asynchronously.
428        It can only be used if the ``self.backgroundclosing()`` context manager
429        is active. This should only be specified if the following criteria hold:
430
431        1. There is a potential for writing thousands of files. Unless you
432           are writing thousands of files, the performance benefits of
433           asynchronously closing files is not realized.
434        2. Files are opened exactly once for the ``backgroundclosing``
435           active duration and are therefore free of race conditions between
436           closing a file on a background thread and reopening it. (If the
437           file were opened multiple times, there could be unflushed data
438           because the original file handle hasn't been flushed/closed yet.)
439
440        ``checkambig`` argument is passed to atomictempfile (valid
441        only for writing), and is useful only if target file is
442        guarded by any lock (e.g. repo.lock or repo.wlock).
443
444        To avoid file stat ambiguity forcibly, checkambig=True involves
445        copying ``path`` file opened in "append" mode (e.g. for
446        truncation), if it is owned by another. Therefore, use
447        combination of append mode and checkambig=True only in limited
448        cases (see also issue5418 and issue5584 for detail).
449        """
450        if auditpath:
451            self._auditpath(path, mode)
452        f = self.join(path)
453
454        if b"b" not in mode:
455            mode += b"b"  # for that other OS
456
457        nlink = -1
458        if mode not in (b'r', b'rb'):
459            dirname, basename = util.split(f)
460            # If basename is empty, then the path is malformed because it points
461            # to a directory. Let the posixfile() call below raise IOError.
462            if basename:
463                if atomictemp:
464                    if makeparentdirs:
465                        util.makedirs(dirname, self.createmode, notindexed)
466                    return util.atomictempfile(
467                        f, mode, self.createmode, checkambig=checkambig
468                    )
469                try:
470                    if b'w' in mode:
471                        util.unlink(f)
472                        nlink = 0
473                    else:
474                        # nlinks() may behave differently for files on Windows
475                        # shares if the file is open.
476                        with util.posixfile(f):
477                            nlink = util.nlinks(f)
478                            if nlink < 1:
479                                nlink = 2  # force mktempcopy (issue1922)
480                except (OSError, IOError) as e:
481                    if e.errno != errno.ENOENT:
482                        raise
483                    nlink = 0
484                    if makeparentdirs:
485                        util.makedirs(dirname, self.createmode, notindexed)
486                if nlink > 0:
487                    if self._trustnlink is None:
488                        self._trustnlink = nlink > 1 or util.checknlink(f)
489                    if nlink > 1 or not self._trustnlink:
490                        util.rename(util.mktempcopy(f), f)
491        fp = util.posixfile(f, mode)
492        if nlink == 0:
493            self._fixfilemode(f)
494
495        if checkambig:
496            if mode in (b'r', b'rb'):
497                raise error.Abort(
498                    _(
499                        b'implementation error: mode %s is not'
500                        b' valid for checkambig=True'
501                    )
502                    % mode
503                )
504            fp = checkambigatclosing(fp)
505
506        if backgroundclose and isinstance(
507            threading.current_thread(),
508            threading._MainThread,  # pytype: disable=module-attr
509        ):
510            if (
511                not self._backgroundfilecloser  # pytype: disable=attribute-error
512            ):
513                raise error.Abort(
514                    _(
515                        b'backgroundclose can only be used when a '
516                        b'backgroundclosing context manager is active'
517                    )
518                )
519
520            fp = delayclosedfile(
521                fp,
522                self._backgroundfilecloser,  # pytype: disable=attribute-error
523            )
524
525        return fp
526
527    def symlink(self, src, dst):
528        self.audit(dst)
529        linkname = self.join(dst)
530        util.tryunlink(linkname)
531
532        util.makedirs(os.path.dirname(linkname), self.createmode)
533
534        if self._cansymlink:
535            try:
536                os.symlink(src, linkname)
537            except OSError as err:
538                raise OSError(
539                    err.errno,
540                    _(b'could not symlink to %r: %s')
541                    % (src, encoding.strtolocal(err.strerror)),
542                    linkname,
543                )
544        else:
545            self.write(dst, src)
546
547    def join(self, path, *insidef):
548        if path:
549            parts = [self.base, path]
550            parts.extend(insidef)
551            return self._join(*parts)
552        else:
553            return self.base
554
555
556opener = vfs
557
558
559class proxyvfs(abstractvfs):
560    def __init__(self, vfs):
561        self.vfs = vfs
562
563    def _auditpath(self, path, mode):
564        return self.vfs._auditpath(path, mode)
565
566    @property
567    def options(self):
568        return self.vfs.options
569
570    @options.setter
571    def options(self, value):
572        self.vfs.options = value
573
574
575class filtervfs(proxyvfs, abstractvfs):
576    '''Wrapper vfs for filtering filenames with a function.'''
577
578    def __init__(self, vfs, filter):
579        proxyvfs.__init__(self, vfs)
580        self._filter = filter
581
582    def __call__(self, path, *args, **kwargs):
583        return self.vfs(self._filter(path), *args, **kwargs)
584
585    def join(self, path, *insidef):
586        if path:
587            return self.vfs.join(self._filter(self.vfs.reljoin(path, *insidef)))
588        else:
589            return self.vfs.join(path)
590
591
592filteropener = filtervfs
593
594
595class readonlyvfs(proxyvfs):
596    '''Wrapper vfs preventing any writing.'''
597
598    def __init__(self, vfs):
599        proxyvfs.__init__(self, vfs)
600
601    def __call__(self, path, mode=b'r', *args, **kw):
602        if mode not in (b'r', b'rb'):
603            raise error.Abort(_(b'this vfs is read only'))
604        return self.vfs(path, mode, *args, **kw)
605
606    def join(self, path, *insidef):
607        return self.vfs.join(path, *insidef)
608
609
610class closewrapbase(object):
611    """Base class of wrapper, which hooks closing
612
613    Do not instantiate outside of the vfs layer.
614    """
615
616    def __init__(self, fh):
617        object.__setattr__(self, '_origfh', fh)
618
619    def __getattr__(self, attr):
620        return getattr(self._origfh, attr)
621
622    def __setattr__(self, attr, value):
623        return setattr(self._origfh, attr, value)
624
625    def __delattr__(self, attr):
626        return delattr(self._origfh, attr)
627
628    def __enter__(self):
629        self._origfh.__enter__()
630        return self
631
632    def __exit__(self, exc_type, exc_value, exc_tb):
633        raise NotImplementedError('attempted instantiating ' + str(type(self)))
634
635    def close(self):
636        raise NotImplementedError('attempted instantiating ' + str(type(self)))
637
638
639class delayclosedfile(closewrapbase):
640    """Proxy for a file object whose close is delayed.
641
642    Do not instantiate outside of the vfs layer.
643    """
644
645    def __init__(self, fh, closer):
646        super(delayclosedfile, self).__init__(fh)
647        object.__setattr__(self, '_closer', closer)
648
649    def __exit__(self, exc_type, exc_value, exc_tb):
650        self._closer.close(self._origfh)
651
652    def close(self):
653        self._closer.close(self._origfh)
654
655
656class backgroundfilecloser(object):
657    """Coordinates background closing of file handles on multiple threads."""
658
659    def __init__(self, ui, expectedcount=-1):
660        self._running = False
661        self._entered = False
662        self._threads = []
663        self._threadexception = None
664
665        # Only Windows/NTFS has slow file closing. So only enable by default
666        # on that platform. But allow to be enabled elsewhere for testing.
667        defaultenabled = pycompat.iswindows
668        enabled = ui.configbool(b'worker', b'backgroundclose', defaultenabled)
669
670        if not enabled:
671            return
672
673        # There is overhead to starting and stopping the background threads.
674        # Don't do background processing unless the file count is large enough
675        # to justify it.
676        minfilecount = ui.configint(b'worker', b'backgroundcloseminfilecount')
677        # FUTURE dynamically start background threads after minfilecount closes.
678        # (We don't currently have any callers that don't know their file count)
679        if expectedcount > 0 and expectedcount < minfilecount:
680            return
681
682        maxqueue = ui.configint(b'worker', b'backgroundclosemaxqueue')
683        threadcount = ui.configint(b'worker', b'backgroundclosethreadcount')
684
685        ui.debug(
686            b'starting %d threads for background file closing\n' % threadcount
687        )
688
689        self._queue = pycompat.queue.Queue(maxsize=maxqueue)
690        self._running = True
691
692        for i in range(threadcount):
693            t = threading.Thread(target=self._worker, name='backgroundcloser')
694            self._threads.append(t)
695            t.start()
696
697    def __enter__(self):
698        self._entered = True
699        return self
700
701    def __exit__(self, exc_type, exc_value, exc_tb):
702        self._running = False
703
704        # Wait for threads to finish closing so open files don't linger for
705        # longer than lifetime of context manager.
706        for t in self._threads:
707            t.join()
708
709    def _worker(self):
710        """Main routine for worker thread."""
711        while True:
712            try:
713                fh = self._queue.get(block=True, timeout=0.100)
714                # Need to catch or the thread will terminate and
715                # we could orphan file descriptors.
716                try:
717                    fh.close()
718                except Exception as e:
719                    # Stash so can re-raise from main thread later.
720                    self._threadexception = e
721            except pycompat.queue.Empty:
722                if not self._running:
723                    break
724
725    def close(self, fh):
726        """Schedule a file for closing."""
727        if not self._entered:
728            raise error.Abort(
729                _(b'can only call close() when context manager active')
730            )
731
732        # If a background thread encountered an exception, raise now so we fail
733        # fast. Otherwise we may potentially go on for minutes until the error
734        # is acted on.
735        if self._threadexception:
736            e = self._threadexception
737            self._threadexception = None
738            raise e
739
740        # If we're not actively running, close synchronously.
741        if not self._running:
742            fh.close()
743            return
744
745        self._queue.put(fh, block=True, timeout=None)
746
747
748class checkambigatclosing(closewrapbase):
749    """Proxy for a file object, to avoid ambiguity of file stat
750
751    See also util.filestat for detail about "ambiguity of file stat".
752
753    This proxy is useful only if the target file is guarded by any
754    lock (e.g. repo.lock or repo.wlock)
755
756    Do not instantiate outside of the vfs layer.
757    """
758
759    def __init__(self, fh):
760        super(checkambigatclosing, self).__init__(fh)
761        object.__setattr__(self, '_oldstat', util.filestat.frompath(fh.name))
762
763    def _checkambig(self):
764        oldstat = self._oldstat
765        if oldstat.stat:
766            _avoidambig(self._origfh.name, oldstat)
767
768    def __exit__(self, exc_type, exc_value, exc_tb):
769        self._origfh.__exit__(exc_type, exc_value, exc_tb)
770        self._checkambig()
771
772    def close(self):
773        self._origfh.close()
774        self._checkambig()
775