1# Copyright (C) 2007 Giampaolo Rodola' <g.rodola@gmail.com>. 2# Use of this source code is governed by MIT license that can be 3# found in the LICENSE file. 4 5import os 6import stat 7import tempfile 8import time 9try: 10 from stat import filemode as _filemode # PY 3.3 11except ImportError: 12 from tarfile import filemode as _filemode 13try: 14 import pwd 15 import grp 16except ImportError: 17 pwd = grp = None 18try: 19 from os import scandir # py 3.5 20except ImportError: 21 try: 22 from scandir import scandir # requires "pip install scandir" 23 except ImportError: 24 scandir = None 25 26from ._compat import PY3 27from ._compat import u 28from ._compat import unicode 29 30 31__all__ = ['FilesystemError', 'AbstractedFS'] 32 33 34_months_map = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 35 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'} 36 37 38def _memoize(fun): 39 """A simple memoize decorator for functions supporting (hashable) 40 positional arguments. 41 """ 42 def wrapper(*args, **kwargs): 43 key = (args, frozenset(sorted(kwargs.items()))) 44 try: 45 return cache[key] 46 except KeyError: 47 ret = cache[key] = fun(*args, **kwargs) 48 return ret 49 50 cache = {} 51 return wrapper 52 53 54# =================================================================== 55# --- custom exceptions 56# =================================================================== 57 58class FilesystemError(Exception): 59 """Custom class for filesystem-related exceptions. 60 You can raise this from an AbstractedFS subclass in order to 61 send a customized error string to the client. 62 """ 63 64 65# =================================================================== 66# --- base class 67# =================================================================== 68 69class AbstractedFS(object): 70 """A class used to interact with the file system, providing a 71 cross-platform interface compatible with both Windows and 72 UNIX style filesystems where all paths use "/" separator. 73 74 AbstractedFS distinguishes between "real" filesystem paths and 75 "virtual" ftp paths emulating a UNIX chroot jail where the user 76 can not escape its home directory (example: real "/home/user" 77 path will be seen as "/" by the client) 78 79 It also provides some utility methods and wraps around all os.* 80 calls involving operations against the filesystem like creating 81 files or removing directories. 82 83 FilesystemError exception can be raised from within any of 84 the methods below in order to send a customized error string 85 to the client. 86 """ 87 88 def __init__(self, root, cmd_channel): 89 """ 90 - (str) root: the user "real" home directory (e.g. '/home/user') 91 - (instance) cmd_channel: the FTPHandler class instance 92 """ 93 assert isinstance(root, unicode) 94 # Set initial current working directory. 95 # By default initial cwd is set to "/" to emulate a chroot jail. 96 # If a different behavior is desired (e.g. initial cwd = root, 97 # to reflect the real filesystem) users overriding this class 98 # are responsible to set _cwd attribute as necessary. 99 self._cwd = u('/') 100 self._root = root 101 self.cmd_channel = cmd_channel 102 103 @property 104 def root(self): 105 """The user home directory.""" 106 return self._root 107 108 @property 109 def cwd(self): 110 """The user current working directory.""" 111 return self._cwd 112 113 @root.setter 114 def root(self, path): 115 assert isinstance(path, unicode), path 116 self._root = path 117 118 @cwd.setter 119 def cwd(self, path): 120 assert isinstance(path, unicode), path 121 self._cwd = path 122 123 # --- Pathname / conversion utilities 124 125 def ftpnorm(self, ftppath): 126 """Normalize a "virtual" ftp pathname (typically the raw string 127 coming from client) depending on the current working directory. 128 129 Example (having "/foo" as current working directory): 130 >>> ftpnorm('bar') 131 '/foo/bar' 132 133 Note: directory separators are system independent ("/"). 134 Pathname returned is always absolutized. 135 """ 136 assert isinstance(ftppath, unicode), ftppath 137 if os.path.isabs(ftppath): 138 p = os.path.normpath(ftppath) 139 else: 140 p = os.path.normpath(os.path.join(self.cwd, ftppath)) 141 # normalize string in a standard web-path notation having '/' 142 # as separator. 143 if os.sep == "\\": 144 p = p.replace("\\", "/") 145 # os.path.normpath supports UNC paths (e.g. "//a/b/c") but we 146 # don't need them. In case we get an UNC path we collapse 147 # redundant separators appearing at the beginning of the string 148 while p[:2] == '//': 149 p = p[1:] 150 # Anti path traversal: don't trust user input, in the event 151 # that self.cwd is not absolute, return "/" as a safety measure. 152 # This is for extra protection, maybe not really necessary. 153 if not os.path.isabs(p): 154 p = u("/") 155 return p 156 157 def ftp2fs(self, ftppath): 158 """Translate a "virtual" ftp pathname (typically the raw string 159 coming from client) into equivalent absolute "real" filesystem 160 pathname. 161 162 Example (having "/home/user" as root directory): 163 >>> ftp2fs("foo") 164 '/home/user/foo' 165 166 Note: directory separators are system dependent. 167 """ 168 assert isinstance(ftppath, unicode), ftppath 169 # as far as I know, it should always be path traversal safe... 170 if os.path.normpath(self.root) == os.sep: 171 return os.path.normpath(self.ftpnorm(ftppath)) 172 else: 173 p = self.ftpnorm(ftppath)[1:] 174 return os.path.normpath(os.path.join(self.root, p)) 175 176 def fs2ftp(self, fspath): 177 """Translate a "real" filesystem pathname into equivalent 178 absolute "virtual" ftp pathname depending on the user's 179 root directory. 180 181 Example (having "/home/user" as root directory): 182 >>> fs2ftp("/home/user/foo") 183 '/foo' 184 185 As for ftpnorm, directory separators are system independent 186 ("/") and pathname returned is always absolutized. 187 188 On invalid pathnames escaping from user's root directory 189 (e.g. "/home" when root is "/home/user") always return "/". 190 """ 191 assert isinstance(fspath, unicode), fspath 192 if os.path.isabs(fspath): 193 p = os.path.normpath(fspath) 194 else: 195 p = os.path.normpath(os.path.join(self.root, fspath)) 196 if not self.validpath(p): 197 return u('/') 198 p = p.replace(os.sep, "/") 199 p = p[len(self.root):] 200 if not p.startswith('/'): 201 p = '/' + p 202 return p 203 204 def validpath(self, path): 205 """Check whether the path belongs to user's home directory. 206 Expected argument is a "real" filesystem pathname. 207 208 If path is a symbolic link it is resolved to check its real 209 destination. 210 211 Pathnames escaping from user's root directory are considered 212 not valid. 213 """ 214 assert isinstance(path, unicode), path 215 root = self.realpath(self.root) 216 path = self.realpath(path) 217 if not root.endswith(os.sep): 218 root = root + os.sep 219 if not path.endswith(os.sep): 220 path = path + os.sep 221 if path[0:len(root)] == root: 222 return True 223 return False 224 225 # --- Wrapper methods around open() and tempfile.mkstemp 226 227 def open(self, filename, mode): 228 """Open a file returning its handler.""" 229 assert isinstance(filename, unicode), filename 230 return open(filename, mode) 231 232 def mkstemp(self, suffix='', prefix='', dir=None, mode='wb'): 233 """A wrap around tempfile.mkstemp creating a file with a unique 234 name. Unlike mkstemp it returns an object with a file-like 235 interface. 236 """ 237 class FileWrapper: 238 239 def __init__(self, fd, name): 240 self.file = fd 241 self.name = name 242 243 def __getattr__(self, attr): 244 return getattr(self.file, attr) 245 246 text = 'b' not in mode 247 # max number of tries to find out a unique file name 248 tempfile.TMP_MAX = 50 249 fd, name = tempfile.mkstemp(suffix, prefix, dir, text=text) 250 file = os.fdopen(fd, mode) 251 return FileWrapper(file, name) 252 253 # --- Wrapper methods around os.* calls 254 255 def chdir(self, path): 256 """Change the current directory. If this method is overridden 257 it is vital that `cwd` attribute gets set. 258 """ 259 # note: process cwd will be reset by the caller 260 assert isinstance(path, unicode), path 261 os.chdir(path) 262 self.cwd = self.fs2ftp(path) 263 264 def mkdir(self, path): 265 """Create the specified directory.""" 266 assert isinstance(path, unicode), path 267 os.mkdir(path) 268 269 def listdir(self, path): 270 """List the content of a directory.""" 271 assert isinstance(path, unicode), path 272 return os.listdir(path) 273 274 def listdirinfo(self, path): 275 """List the content of a directory.""" 276 assert isinstance(path, unicode), path 277 return os.listdir(path) 278 279 def rmdir(self, path): 280 """Remove the specified directory.""" 281 assert isinstance(path, unicode), path 282 os.rmdir(path) 283 284 def remove(self, path): 285 """Remove the specified file.""" 286 assert isinstance(path, unicode), path 287 os.remove(path) 288 289 def rename(self, src, dst): 290 """Rename the specified src file to the dst filename.""" 291 assert isinstance(src, unicode), src 292 assert isinstance(dst, unicode), dst 293 os.rename(src, dst) 294 295 def chmod(self, path, mode): 296 """Change file/directory mode.""" 297 assert isinstance(path, unicode), path 298 if not hasattr(os, 'chmod'): 299 raise NotImplementedError 300 os.chmod(path, mode) 301 302 def stat(self, path): 303 """Perform a stat() system call on the given path.""" 304 # on python 2 we might also get bytes from os.lisdir() 305 # assert isinstance(path, unicode), path 306 return os.stat(path) 307 308 def utime(self, path, timeval): 309 """Perform a utime() call on the given path""" 310 # utime expects a int/float (atime, mtime) in seconds 311 # thus, setting both access and modify time to timeval 312 return os.utime(path, (timeval, timeval)) 313 314 if hasattr(os, 'lstat'): 315 def lstat(self, path): 316 """Like stat but does not follow symbolic links.""" 317 # on python 2 we might also get bytes from os.lisdir() 318 # assert isinstance(path, unicode), path 319 return os.lstat(path) 320 else: 321 lstat = stat 322 323 if hasattr(os, 'readlink'): 324 def readlink(self, path): 325 """Return a string representing the path to which a 326 symbolic link points. 327 """ 328 assert isinstance(path, unicode), path 329 return os.readlink(path) 330 331 # --- Wrapper methods around os.path.* calls 332 333 def isfile(self, path): 334 """Return True if path is a file.""" 335 assert isinstance(path, unicode), path 336 return os.path.isfile(path) 337 338 def islink(self, path): 339 """Return True if path is a symbolic link.""" 340 assert isinstance(path, unicode), path 341 return os.path.islink(path) 342 343 def isdir(self, path): 344 """Return True if path is a directory.""" 345 assert isinstance(path, unicode), path 346 return os.path.isdir(path) 347 348 def getsize(self, path): 349 """Return the size of the specified file in bytes.""" 350 assert isinstance(path, unicode), path 351 return os.path.getsize(path) 352 353 def getmtime(self, path): 354 """Return the last modified time as a number of seconds since 355 the epoch.""" 356 assert isinstance(path, unicode), path 357 return os.path.getmtime(path) 358 359 def realpath(self, path): 360 """Return the canonical version of path eliminating any 361 symbolic links encountered in the path (if they are 362 supported by the operating system). 363 """ 364 assert isinstance(path, unicode), path 365 return os.path.realpath(path) 366 367 def lexists(self, path): 368 """Return True if path refers to an existing path, including 369 a broken or circular symbolic link. 370 """ 371 assert isinstance(path, unicode), path 372 return os.path.lexists(path) 373 374 if pwd is not None: 375 def get_user_by_uid(self, uid): 376 """Return the username associated with user id. 377 If this can't be determined return raw uid instead. 378 On Windows just return "owner". 379 """ 380 try: 381 return pwd.getpwuid(uid).pw_name 382 except KeyError: 383 return uid 384 else: 385 def get_user_by_uid(self, uid): 386 return "owner" 387 388 if grp is not None: 389 def get_group_by_gid(self, gid): 390 """Return the groupname associated with group id. 391 If this can't be determined return raw gid instead. 392 On Windows just return "group". 393 """ 394 try: 395 return grp.getgrgid(gid).gr_name 396 except KeyError: 397 return gid 398 else: 399 def get_group_by_gid(self, gid): 400 return "group" 401 402 # --- Listing utilities 403 404 def format_list(self, basedir, listing, ignore_err=True): 405 """Return an iterator object that yields the entries of given 406 directory emulating the "/bin/ls -lA" UNIX command output. 407 408 - (str) basedir: the absolute dirname. 409 - (list) listing: the names of the entries in basedir 410 - (bool) ignore_err: when False raise exception if os.lstat() 411 call fails. 412 413 On platforms which do not support the pwd and grp modules (such 414 as Windows), ownership is printed as "owner" and "group" as a 415 default, and number of hard links is always "1". On UNIX 416 systems, the actual owner, group, and number of links are 417 printed. 418 419 This is how output appears to client: 420 421 -rw-rw-rw- 1 owner group 7045120 Sep 02 3:47 music.mp3 422 drwxrwxrwx 1 owner group 0 Aug 31 18:50 e-books 423 -rw-rw-rw- 1 owner group 380 Sep 02 3:40 module.py 424 """ 425 @_memoize 426 def get_user_by_uid(uid): 427 return self.get_user_by_uid(uid) 428 429 @_memoize 430 def get_group_by_gid(gid): 431 return self.get_group_by_gid(gid) 432 433 assert isinstance(basedir, unicode), basedir 434 if self.cmd_channel.use_gmt_times: 435 timefunc = time.gmtime 436 else: 437 timefunc = time.localtime 438 SIX_MONTHS = 180 * 24 * 60 * 60 439 readlink = getattr(self, 'readlink', None) 440 now = time.time() 441 for basename in listing: 442 if not PY3: 443 try: 444 file = os.path.join(basedir, basename) 445 except UnicodeDecodeError: 446 # (Python 2 only) might happen on filesystem not 447 # supporting UTF8 meaning os.listdir() returned a list 448 # of mixed bytes and unicode strings: 449 # http://goo.gl/6DLHD 450 # http://bugs.python.org/issue683592 451 file = os.path.join(bytes(basedir), bytes(basename)) 452 if not isinstance(basename, unicode): 453 basename = unicode(basename, 'utf8', 'ignore') 454 else: 455 file = os.path.join(basedir, basename) 456 try: 457 st = self.lstat(file) 458 except (OSError, FilesystemError): 459 if ignore_err: 460 continue 461 raise 462 463 perms = _filemode(st.st_mode) # permissions 464 nlinks = st.st_nlink # number of links to inode 465 if not nlinks: # non-posix system, let's use a bogus value 466 nlinks = 1 467 size = st.st_size # file size 468 uname = get_user_by_uid(st.st_uid) 469 gname = get_group_by_gid(st.st_gid) 470 mtime = timefunc(st.st_mtime) 471 # if modification time > 6 months shows "month year" 472 # else "month hh:mm"; this matches proftpd format, see: 473 # https://github.com/giampaolo/pyftpdlib/issues/187 474 if (now - st.st_mtime) > SIX_MONTHS: 475 fmtstr = "%d %Y" 476 else: 477 fmtstr = "%d %H:%M" 478 try: 479 mtimestr = "%s %s" % (_months_map[mtime.tm_mon], 480 time.strftime(fmtstr, mtime)) 481 except ValueError: 482 # It could be raised if last mtime happens to be too 483 # old (prior to year 1900) in which case we return 484 # the current time as last mtime. 485 mtime = timefunc() 486 mtimestr = "%s %s" % (_months_map[mtime.tm_mon], 487 time.strftime("%d %H:%M", mtime)) 488 489 # same as stat.S_ISLNK(st.st_mode) but slighlty faster 490 islink = (st.st_mode & 61440) == stat.S_IFLNK 491 if islink and readlink is not None: 492 # if the file is a symlink, resolve it, e.g. 493 # "symlink -> realfile" 494 try: 495 basename = basename + " -> " + readlink(file) 496 except (OSError, FilesystemError): 497 if not ignore_err: 498 raise 499 500 # formatting is matched with proftpd ls output 501 line = "%s %3s %-8s %-8s %8s %s %s\r\n" % ( 502 perms, nlinks, uname, gname, size, mtimestr, basename) 503 yield line.encode('utf8', self.cmd_channel.unicode_errors) 504 505 def format_mlsx(self, basedir, listing, perms, facts, ignore_err=True): 506 """Return an iterator object that yields the entries of a given 507 directory or of a single file in a form suitable with MLSD and 508 MLST commands. 509 510 Every entry includes a list of "facts" referring the listed 511 element. See RFC-3659, chapter 7, to see what every single 512 fact stands for. 513 514 - (str) basedir: the absolute dirname. 515 - (list) listing: the names of the entries in basedir 516 - (str) perms: the string referencing the user permissions. 517 - (str) facts: the list of "facts" to be returned. 518 - (bool) ignore_err: when False raise exception if os.stat() 519 call fails. 520 521 Note that "facts" returned may change depending on the platform 522 and on what user specified by using the OPTS command. 523 524 This is how output could appear to the client issuing 525 a MLSD request: 526 527 type=file;size=156;perm=r;modify=20071029155301;unique=8012; music.mp3 528 type=dir;size=0;perm=el;modify=20071127230206;unique=801e33; ebooks 529 type=file;size=211;perm=r;modify=20071103093626;unique=192; module.py 530 """ 531 assert isinstance(basedir, unicode), basedir 532 if self.cmd_channel.use_gmt_times: 533 timefunc = time.gmtime 534 else: 535 timefunc = time.localtime 536 permdir = ''.join([x for x in perms if x not in 'arw']) 537 permfile = ''.join([x for x in perms if x not in 'celmp']) 538 if ('w' in perms) or ('a' in perms) or ('f' in perms): 539 permdir += 'c' 540 if 'd' in perms: 541 permdir += 'p' 542 show_type = 'type' in facts 543 show_perm = 'perm' in facts 544 show_size = 'size' in facts 545 show_modify = 'modify' in facts 546 show_create = 'create' in facts 547 show_mode = 'unix.mode' in facts 548 show_uid = 'unix.uid' in facts 549 show_gid = 'unix.gid' in facts 550 show_unique = 'unique' in facts 551 for basename in listing: 552 retfacts = dict() 553 if not PY3: 554 try: 555 file = os.path.join(basedir, basename) 556 except UnicodeDecodeError: 557 # (Python 2 only) might happen on filesystem not 558 # supporting UTF8 meaning os.listdir() returned a list 559 # of mixed bytes and unicode strings: 560 # http://goo.gl/6DLHD 561 # http://bugs.python.org/issue683592 562 file = os.path.join(bytes(basedir), bytes(basename)) 563 if not isinstance(basename, unicode): 564 basename = unicode(basename, 'utf8', 'ignore') 565 else: 566 file = os.path.join(basedir, basename) 567 # in order to properly implement 'unique' fact (RFC-3659, 568 # chapter 7.5.2) we are supposed to follow symlinks, hence 569 # use os.stat() instead of os.lstat() 570 try: 571 st = self.stat(file) 572 except (OSError, FilesystemError): 573 if ignore_err: 574 continue 575 raise 576 # type + perm 577 # same as stat.S_ISDIR(st.st_mode) but slightly faster 578 isdir = (st.st_mode & 61440) == stat.S_IFDIR 579 if isdir: 580 if show_type: 581 if basename == '.': 582 retfacts['type'] = 'cdir' 583 elif basename == '..': 584 retfacts['type'] = 'pdir' 585 else: 586 retfacts['type'] = 'dir' 587 if show_perm: 588 retfacts['perm'] = permdir 589 else: 590 if show_type: 591 retfacts['type'] = 'file' 592 if show_perm: 593 retfacts['perm'] = permfile 594 if show_size: 595 retfacts['size'] = st.st_size # file size 596 # last modification time 597 if show_modify: 598 try: 599 retfacts['modify'] = time.strftime("%Y%m%d%H%M%S", 600 timefunc(st.st_mtime)) 601 # it could be raised if last mtime happens to be too old 602 # (prior to year 1900) 603 except ValueError: 604 pass 605 if show_create: 606 # on Windows we can provide also the creation time 607 try: 608 retfacts['create'] = time.strftime("%Y%m%d%H%M%S", 609 timefunc(st.st_ctime)) 610 except ValueError: 611 pass 612 # UNIX only 613 if show_mode: 614 retfacts['unix.mode'] = oct(st.st_mode & 511) 615 if show_uid: 616 retfacts['unix.uid'] = st.st_uid 617 if show_gid: 618 retfacts['unix.gid'] = st.st_gid 619 620 # We provide unique fact (see RFC-3659, chapter 7.5.2) on 621 # posix platforms only; we get it by mixing st_dev and 622 # st_ino values which should be enough for granting an 623 # uniqueness for the file listed. 624 # The same approach is used by pure-ftpd. 625 # Implementors who want to provide unique fact on other 626 # platforms should use some platform-specific method (e.g. 627 # on Windows NTFS filesystems MTF records could be used). 628 if show_unique: 629 retfacts['unique'] = "%xg%x" % (st.st_dev, st.st_ino) 630 631 # facts can be in any order but we sort them by name 632 factstring = "".join(["%s=%s;" % (x, retfacts[x]) 633 for x in sorted(retfacts.keys())]) 634 line = "%s %s\r\n" % (factstring, basename) 635 yield line.encode('utf8', self.cmd_channel.unicode_errors) 636 637 638# =================================================================== 639# --- platform specific implementation 640# =================================================================== 641 642if os.name == 'posix': 643 __all__.append('UnixFilesystem') 644 645 class UnixFilesystem(AbstractedFS): 646 """Represents the real UNIX filesystem. 647 648 Differently from AbstractedFS the client will login into 649 /home/<username> and will be able to escape its home directory 650 and navigate the real filesystem. 651 """ 652 653 def __init__(self, root, cmd_channel): 654 AbstractedFS.__init__(self, root, cmd_channel) 655 # initial cwd was set to "/" to emulate a chroot jail 656 self.cwd = root 657 658 def ftp2fs(self, ftppath): 659 return self.ftpnorm(ftppath) 660 661 def fs2ftp(self, fspath): 662 return fspath 663 664 def validpath(self, path): 665 # validpath was used to check symlinks escaping user home 666 # directory; this is no longer necessary. 667 return True 668