1# Copyright (C) 2007 Giampaolo Rodola' <g.rodola@gmail.com>.
2# Use of this source code is governed by MIT license that can be
3# found in the LICENSE file.
4
5import os
6import stat
7import tempfile
8import time
9try:
10    from stat import filemode as _filemode  # PY 3.3
11except ImportError:
12    from tarfile import filemode as _filemode
13try:
14    import pwd
15    import grp
16except ImportError:
17    pwd = grp = None
18try:
19    from os import scandir  # py 3.5
20except ImportError:
21    try:
22        from scandir import scandir  # requires "pip install scandir"
23    except ImportError:
24        scandir = None
25
26from ._compat import PY3
27from ._compat import u
28from ._compat import unicode
29
30
31__all__ = ['FilesystemError', 'AbstractedFS']
32
33
34_months_map = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun',
35               7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
36
37
38def _memoize(fun):
39    """A simple memoize decorator for functions supporting (hashable)
40    positional arguments.
41    """
42    def wrapper(*args, **kwargs):
43        key = (args, frozenset(sorted(kwargs.items())))
44        try:
45            return cache[key]
46        except KeyError:
47            ret = cache[key] = fun(*args, **kwargs)
48            return ret
49
50    cache = {}
51    return wrapper
52
53
54# ===================================================================
55# --- custom exceptions
56# ===================================================================
57
58class FilesystemError(Exception):
59    """Custom class for filesystem-related exceptions.
60    You can raise this from an AbstractedFS subclass in order to
61    send a customized error string to the client.
62    """
63
64
65# ===================================================================
66# --- base class
67# ===================================================================
68
69class AbstractedFS(object):
70    """A class used to interact with the file system, providing a
71    cross-platform interface compatible with both Windows and
72    UNIX style filesystems where all paths use "/" separator.
73
74    AbstractedFS distinguishes between "real" filesystem paths and
75    "virtual" ftp paths emulating a UNIX chroot jail where the user
76    can not escape its home directory (example: real "/home/user"
77    path will be seen as "/" by the client)
78
79    It also provides some utility methods and wraps around all os.*
80    calls involving operations against the filesystem like creating
81    files or removing directories.
82
83    FilesystemError exception can be raised from within any of
84    the methods below in order to send a customized error string
85    to the client.
86    """
87
88    def __init__(self, root, cmd_channel):
89        """
90         - (str) root: the user "real" home directory (e.g. '/home/user')
91         - (instance) cmd_channel: the FTPHandler class instance
92        """
93        assert isinstance(root, unicode)
94        # Set initial current working directory.
95        # By default initial cwd is set to "/" to emulate a chroot jail.
96        # If a different behavior is desired (e.g. initial cwd = root,
97        # to reflect the real filesystem) users overriding this class
98        # are responsible to set _cwd attribute as necessary.
99        self._cwd = u('/')
100        self._root = root
101        self.cmd_channel = cmd_channel
102
103    @property
104    def root(self):
105        """The user home directory."""
106        return self._root
107
108    @property
109    def cwd(self):
110        """The user current working directory."""
111        return self._cwd
112
113    @root.setter
114    def root(self, path):
115        assert isinstance(path, unicode), path
116        self._root = path
117
118    @cwd.setter
119    def cwd(self, path):
120        assert isinstance(path, unicode), path
121        self._cwd = path
122
123    # --- Pathname / conversion utilities
124
125    def ftpnorm(self, ftppath):
126        """Normalize a "virtual" ftp pathname (typically the raw string
127        coming from client) depending on the current working directory.
128
129        Example (having "/foo" as current working directory):
130        >>> ftpnorm('bar')
131        '/foo/bar'
132
133        Note: directory separators are system independent ("/").
134        Pathname returned is always absolutized.
135        """
136        assert isinstance(ftppath, unicode), ftppath
137        if os.path.isabs(ftppath):
138            p = os.path.normpath(ftppath)
139        else:
140            p = os.path.normpath(os.path.join(self.cwd, ftppath))
141        # normalize string in a standard web-path notation having '/'
142        # as separator.
143        if os.sep == "\\":
144            p = p.replace("\\", "/")
145        # os.path.normpath supports UNC paths (e.g. "//a/b/c") but we
146        # don't need them.  In case we get an UNC path we collapse
147        # redundant separators appearing at the beginning of the string
148        while p[:2] == '//':
149            p = p[1:]
150        # Anti path traversal: don't trust user input, in the event
151        # that self.cwd is not absolute, return "/" as a safety measure.
152        # This is for extra protection, maybe not really necessary.
153        if not os.path.isabs(p):
154            p = u("/")
155        return p
156
157    def ftp2fs(self, ftppath):
158        """Translate a "virtual" ftp pathname (typically the raw string
159        coming from client) into equivalent absolute "real" filesystem
160        pathname.
161
162        Example (having "/home/user" as root directory):
163        >>> ftp2fs("foo")
164        '/home/user/foo'
165
166        Note: directory separators are system dependent.
167        """
168        assert isinstance(ftppath, unicode), ftppath
169        # as far as I know, it should always be path traversal safe...
170        if os.path.normpath(self.root) == os.sep:
171            return os.path.normpath(self.ftpnorm(ftppath))
172        else:
173            p = self.ftpnorm(ftppath)[1:]
174            return os.path.normpath(os.path.join(self.root, p))
175
176    def fs2ftp(self, fspath):
177        """Translate a "real" filesystem pathname into equivalent
178        absolute "virtual" ftp pathname depending on the user's
179        root directory.
180
181        Example (having "/home/user" as root directory):
182        >>> fs2ftp("/home/user/foo")
183        '/foo'
184
185        As for ftpnorm, directory separators are system independent
186        ("/") and pathname returned is always absolutized.
187
188        On invalid pathnames escaping from user's root directory
189        (e.g. "/home" when root is "/home/user") always return "/".
190        """
191        assert isinstance(fspath, unicode), fspath
192        if os.path.isabs(fspath):
193            p = os.path.normpath(fspath)
194        else:
195            p = os.path.normpath(os.path.join(self.root, fspath))
196        if not self.validpath(p):
197            return u('/')
198        p = p.replace(os.sep, "/")
199        p = p[len(self.root):]
200        if not p.startswith('/'):
201            p = '/' + p
202        return p
203
204    def validpath(self, path):
205        """Check whether the path belongs to user's home directory.
206        Expected argument is a "real" filesystem pathname.
207
208        If path is a symbolic link it is resolved to check its real
209        destination.
210
211        Pathnames escaping from user's root directory are considered
212        not valid.
213        """
214        assert isinstance(path, unicode), path
215        root = self.realpath(self.root)
216        path = self.realpath(path)
217        if not root.endswith(os.sep):
218            root = root + os.sep
219        if not path.endswith(os.sep):
220            path = path + os.sep
221        if path[0:len(root)] == root:
222            return True
223        return False
224
225    # --- Wrapper methods around open() and tempfile.mkstemp
226
227    def open(self, filename, mode):
228        """Open a file returning its handler."""
229        assert isinstance(filename, unicode), filename
230        return open(filename, mode)
231
232    def mkstemp(self, suffix='', prefix='', dir=None, mode='wb'):
233        """A wrap around tempfile.mkstemp creating a file with a unique
234        name.  Unlike mkstemp it returns an object with a file-like
235        interface.
236        """
237        class FileWrapper:
238
239            def __init__(self, fd, name):
240                self.file = fd
241                self.name = name
242
243            def __getattr__(self, attr):
244                return getattr(self.file, attr)
245
246        text = 'b' not in mode
247        # max number of tries to find out a unique file name
248        tempfile.TMP_MAX = 50
249        fd, name = tempfile.mkstemp(suffix, prefix, dir, text=text)
250        file = os.fdopen(fd, mode)
251        return FileWrapper(file, name)
252
253    # --- Wrapper methods around os.* calls
254
255    def chdir(self, path):
256        """Change the current directory. If this method is overridden
257        it is vital that `cwd` attribute gets set.
258        """
259        # note: process cwd will be reset by the caller
260        assert isinstance(path, unicode), path
261        os.chdir(path)
262        self.cwd = self.fs2ftp(path)
263
264    def mkdir(self, path):
265        """Create the specified directory."""
266        assert isinstance(path, unicode), path
267        os.mkdir(path)
268
269    def listdir(self, path):
270        """List the content of a directory."""
271        assert isinstance(path, unicode), path
272        return os.listdir(path)
273
274    def listdirinfo(self, path):
275        """List the content of a directory."""
276        assert isinstance(path, unicode), path
277        return os.listdir(path)
278
279    def rmdir(self, path):
280        """Remove the specified directory."""
281        assert isinstance(path, unicode), path
282        os.rmdir(path)
283
284    def remove(self, path):
285        """Remove the specified file."""
286        assert isinstance(path, unicode), path
287        os.remove(path)
288
289    def rename(self, src, dst):
290        """Rename the specified src file to the dst filename."""
291        assert isinstance(src, unicode), src
292        assert isinstance(dst, unicode), dst
293        os.rename(src, dst)
294
295    def chmod(self, path, mode):
296        """Change file/directory mode."""
297        assert isinstance(path, unicode), path
298        if not hasattr(os, 'chmod'):
299            raise NotImplementedError
300        os.chmod(path, mode)
301
302    def stat(self, path):
303        """Perform a stat() system call on the given path."""
304        # on python 2 we might also get bytes from os.lisdir()
305        # assert isinstance(path, unicode), path
306        return os.stat(path)
307
308    def utime(self, path, timeval):
309        """Perform a utime() call on the given path"""
310        # utime expects a int/float (atime, mtime) in seconds
311        # thus, setting both access and modify time to timeval
312        return os.utime(path, (timeval, timeval))
313
314    if hasattr(os, 'lstat'):
315        def lstat(self, path):
316            """Like stat but does not follow symbolic links."""
317            # on python 2 we might also get bytes from os.lisdir()
318            # assert isinstance(path, unicode), path
319            return os.lstat(path)
320    else:
321        lstat = stat
322
323    if hasattr(os, 'readlink'):
324        def readlink(self, path):
325            """Return a string representing the path to which a
326            symbolic link points.
327            """
328            assert isinstance(path, unicode), path
329            return os.readlink(path)
330
331    # --- Wrapper methods around os.path.* calls
332
333    def isfile(self, path):
334        """Return True if path is a file."""
335        assert isinstance(path, unicode), path
336        return os.path.isfile(path)
337
338    def islink(self, path):
339        """Return True if path is a symbolic link."""
340        assert isinstance(path, unicode), path
341        return os.path.islink(path)
342
343    def isdir(self, path):
344        """Return True if path is a directory."""
345        assert isinstance(path, unicode), path
346        return os.path.isdir(path)
347
348    def getsize(self, path):
349        """Return the size of the specified file in bytes."""
350        assert isinstance(path, unicode), path
351        return os.path.getsize(path)
352
353    def getmtime(self, path):
354        """Return the last modified time as a number of seconds since
355        the epoch."""
356        assert isinstance(path, unicode), path
357        return os.path.getmtime(path)
358
359    def realpath(self, path):
360        """Return the canonical version of path eliminating any
361        symbolic links encountered in the path (if they are
362        supported by the operating system).
363        """
364        assert isinstance(path, unicode), path
365        return os.path.realpath(path)
366
367    def lexists(self, path):
368        """Return True if path refers to an existing path, including
369        a broken or circular symbolic link.
370        """
371        assert isinstance(path, unicode), path
372        return os.path.lexists(path)
373
374    if pwd is not None:
375        def get_user_by_uid(self, uid):
376            """Return the username associated with user id.
377            If this can't be determined return raw uid instead.
378            On Windows just return "owner".
379            """
380            try:
381                return pwd.getpwuid(uid).pw_name
382            except KeyError:
383                return uid
384    else:
385        def get_user_by_uid(self, uid):
386            return "owner"
387
388    if grp is not None:
389        def get_group_by_gid(self, gid):
390            """Return the groupname associated with group id.
391            If this can't be determined return raw gid instead.
392            On Windows just return "group".
393            """
394            try:
395                return grp.getgrgid(gid).gr_name
396            except KeyError:
397                return gid
398    else:
399        def get_group_by_gid(self, gid):
400            return "group"
401
402    # --- Listing utilities
403
404    def format_list(self, basedir, listing, ignore_err=True):
405        """Return an iterator object that yields the entries of given
406        directory emulating the "/bin/ls -lA" UNIX command output.
407
408         - (str) basedir: the absolute dirname.
409         - (list) listing: the names of the entries in basedir
410         - (bool) ignore_err: when False raise exception if os.lstat()
411         call fails.
412
413        On platforms which do not support the pwd and grp modules (such
414        as Windows), ownership is printed as "owner" and "group" as a
415        default, and number of hard links is always "1". On UNIX
416        systems, the actual owner, group, and number of links are
417        printed.
418
419        This is how output appears to client:
420
421        -rw-rw-rw-   1 owner   group    7045120 Sep 02  3:47 music.mp3
422        drwxrwxrwx   1 owner   group          0 Aug 31 18:50 e-books
423        -rw-rw-rw-   1 owner   group        380 Sep 02  3:40 module.py
424        """
425        @_memoize
426        def get_user_by_uid(uid):
427            return self.get_user_by_uid(uid)
428
429        @_memoize
430        def get_group_by_gid(gid):
431            return self.get_group_by_gid(gid)
432
433        assert isinstance(basedir, unicode), basedir
434        if self.cmd_channel.use_gmt_times:
435            timefunc = time.gmtime
436        else:
437            timefunc = time.localtime
438        SIX_MONTHS = 180 * 24 * 60 * 60
439        readlink = getattr(self, 'readlink', None)
440        now = time.time()
441        for basename in listing:
442            if not PY3:
443                try:
444                    file = os.path.join(basedir, basename)
445                except UnicodeDecodeError:
446                    # (Python 2 only) might happen on filesystem not
447                    # supporting UTF8 meaning os.listdir() returned a list
448                    # of mixed bytes and unicode strings:
449                    # http://goo.gl/6DLHD
450                    # http://bugs.python.org/issue683592
451                    file = os.path.join(bytes(basedir), bytes(basename))
452                    if not isinstance(basename, unicode):
453                        basename = unicode(basename, 'utf8', 'ignore')
454            else:
455                file = os.path.join(basedir, basename)
456            try:
457                st = self.lstat(file)
458            except (OSError, FilesystemError):
459                if ignore_err:
460                    continue
461                raise
462
463            perms = _filemode(st.st_mode)  # permissions
464            nlinks = st.st_nlink  # number of links to inode
465            if not nlinks:  # non-posix system, let's use a bogus value
466                nlinks = 1
467            size = st.st_size  # file size
468            uname = get_user_by_uid(st.st_uid)
469            gname = get_group_by_gid(st.st_gid)
470            mtime = timefunc(st.st_mtime)
471            # if modification time > 6 months shows "month year"
472            # else "month hh:mm";  this matches proftpd format, see:
473            # https://github.com/giampaolo/pyftpdlib/issues/187
474            if (now - st.st_mtime) > SIX_MONTHS:
475                fmtstr = "%d  %Y"
476            else:
477                fmtstr = "%d %H:%M"
478            try:
479                mtimestr = "%s %s" % (_months_map[mtime.tm_mon],
480                                      time.strftime(fmtstr, mtime))
481            except ValueError:
482                # It could be raised if last mtime happens to be too
483                # old (prior to year 1900) in which case we return
484                # the current time as last mtime.
485                mtime = timefunc()
486                mtimestr = "%s %s" % (_months_map[mtime.tm_mon],
487                                      time.strftime("%d %H:%M", mtime))
488
489            # same as stat.S_ISLNK(st.st_mode) but slighlty faster
490            islink = (st.st_mode & 61440) == stat.S_IFLNK
491            if islink and readlink is not None:
492                # if the file is a symlink, resolve it, e.g.
493                # "symlink -> realfile"
494                try:
495                    basename = basename + " -> " + readlink(file)
496                except (OSError, FilesystemError):
497                    if not ignore_err:
498                        raise
499
500            # formatting is matched with proftpd ls output
501            line = "%s %3s %-8s %-8s %8s %s %s\r\n" % (
502                perms, nlinks, uname, gname, size, mtimestr, basename)
503            yield line.encode('utf8', self.cmd_channel.unicode_errors)
504
505    def format_mlsx(self, basedir, listing, perms, facts, ignore_err=True):
506        """Return an iterator object that yields the entries of a given
507        directory or of a single file in a form suitable with MLSD and
508        MLST commands.
509
510        Every entry includes a list of "facts" referring the listed
511        element.  See RFC-3659, chapter 7, to see what every single
512        fact stands for.
513
514         - (str) basedir: the absolute dirname.
515         - (list) listing: the names of the entries in basedir
516         - (str) perms: the string referencing the user permissions.
517         - (str) facts: the list of "facts" to be returned.
518         - (bool) ignore_err: when False raise exception if os.stat()
519         call fails.
520
521        Note that "facts" returned may change depending on the platform
522        and on what user specified by using the OPTS command.
523
524        This is how output could appear to the client issuing
525        a MLSD request:
526
527        type=file;size=156;perm=r;modify=20071029155301;unique=8012; music.mp3
528        type=dir;size=0;perm=el;modify=20071127230206;unique=801e33; ebooks
529        type=file;size=211;perm=r;modify=20071103093626;unique=192; module.py
530        """
531        assert isinstance(basedir, unicode), basedir
532        if self.cmd_channel.use_gmt_times:
533            timefunc = time.gmtime
534        else:
535            timefunc = time.localtime
536        permdir = ''.join([x for x in perms if x not in 'arw'])
537        permfile = ''.join([x for x in perms if x not in 'celmp'])
538        if ('w' in perms) or ('a' in perms) or ('f' in perms):
539            permdir += 'c'
540        if 'd' in perms:
541            permdir += 'p'
542        show_type = 'type' in facts
543        show_perm = 'perm' in facts
544        show_size = 'size' in facts
545        show_modify = 'modify' in facts
546        show_create = 'create' in facts
547        show_mode = 'unix.mode' in facts
548        show_uid = 'unix.uid' in facts
549        show_gid = 'unix.gid' in facts
550        show_unique = 'unique' in facts
551        for basename in listing:
552            retfacts = dict()
553            if not PY3:
554                try:
555                    file = os.path.join(basedir, basename)
556                except UnicodeDecodeError:
557                    # (Python 2 only) might happen on filesystem not
558                    # supporting UTF8 meaning os.listdir() returned a list
559                    # of mixed bytes and unicode strings:
560                    # http://goo.gl/6DLHD
561                    # http://bugs.python.org/issue683592
562                    file = os.path.join(bytes(basedir), bytes(basename))
563                    if not isinstance(basename, unicode):
564                        basename = unicode(basename, 'utf8', 'ignore')
565            else:
566                file = os.path.join(basedir, basename)
567            # in order to properly implement 'unique' fact (RFC-3659,
568            # chapter 7.5.2) we are supposed to follow symlinks, hence
569            # use os.stat() instead of os.lstat()
570            try:
571                st = self.stat(file)
572            except (OSError, FilesystemError):
573                if ignore_err:
574                    continue
575                raise
576            # type + perm
577            # same as stat.S_ISDIR(st.st_mode) but slightly faster
578            isdir = (st.st_mode & 61440) == stat.S_IFDIR
579            if isdir:
580                if show_type:
581                    if basename == '.':
582                        retfacts['type'] = 'cdir'
583                    elif basename == '..':
584                        retfacts['type'] = 'pdir'
585                    else:
586                        retfacts['type'] = 'dir'
587                if show_perm:
588                    retfacts['perm'] = permdir
589            else:
590                if show_type:
591                    retfacts['type'] = 'file'
592                if show_perm:
593                    retfacts['perm'] = permfile
594            if show_size:
595                retfacts['size'] = st.st_size  # file size
596            # last modification time
597            if show_modify:
598                try:
599                    retfacts['modify'] = time.strftime("%Y%m%d%H%M%S",
600                                                       timefunc(st.st_mtime))
601                # it could be raised if last mtime happens to be too old
602                # (prior to year 1900)
603                except ValueError:
604                    pass
605            if show_create:
606                # on Windows we can provide also the creation time
607                try:
608                    retfacts['create'] = time.strftime("%Y%m%d%H%M%S",
609                                                       timefunc(st.st_ctime))
610                except ValueError:
611                    pass
612            # UNIX only
613            if show_mode:
614                retfacts['unix.mode'] = oct(st.st_mode & 511)
615            if show_uid:
616                retfacts['unix.uid'] = st.st_uid
617            if show_gid:
618                retfacts['unix.gid'] = st.st_gid
619
620            # We provide unique fact (see RFC-3659, chapter 7.5.2) on
621            # posix platforms only; we get it by mixing st_dev and
622            # st_ino values which should be enough for granting an
623            # uniqueness for the file listed.
624            # The same approach is used by pure-ftpd.
625            # Implementors who want to provide unique fact on other
626            # platforms should use some platform-specific method (e.g.
627            # on Windows NTFS filesystems MTF records could be used).
628            if show_unique:
629                retfacts['unique'] = "%xg%x" % (st.st_dev, st.st_ino)
630
631            # facts can be in any order but we sort them by name
632            factstring = "".join(["%s=%s;" % (x, retfacts[x])
633                                  for x in sorted(retfacts.keys())])
634            line = "%s %s\r\n" % (factstring, basename)
635            yield line.encode('utf8', self.cmd_channel.unicode_errors)
636
637
638# ===================================================================
639# --- platform specific implementation
640# ===================================================================
641
642if os.name == 'posix':
643    __all__.append('UnixFilesystem')
644
645    class UnixFilesystem(AbstractedFS):
646        """Represents the real UNIX filesystem.
647
648        Differently from AbstractedFS the client will login into
649        /home/<username> and will be able to escape its home directory
650        and navigate the real filesystem.
651        """
652
653        def __init__(self, root, cmd_channel):
654            AbstractedFS.__init__(self, root, cmd_channel)
655            # initial cwd was set to "/" to emulate a chroot jail
656            self.cwd = root
657
658        def ftp2fs(self, ftppath):
659            return self.ftpnorm(ftppath)
660
661        def fs2ftp(self, fspath):
662            return fspath
663
664        def validpath(self, path):
665            # validpath was used to check symlinks escaping user home
666            # directory; this is no longer necessary.
667            return True
668