1"""scandir, a better directory iterator and faster os.walk(), now in the Python 3.5 stdlib 2 3scandir() is a generator version of os.listdir() that returns an 4iterator over files in a directory, and also exposes the extra 5information most OSes provide while iterating files in a directory 6(such as type and stat information). 7 8This module also includes a version of os.walk() that uses scandir() 9to speed it up significantly. 10 11See README.md or https://github.com/benhoyt/scandir for rationale and 12docs, or read PEP 471 (https://www.python.org/dev/peps/pep-0471/) for 13more details on its inclusion into Python 3.5 14 15scandir is released under the new BSD 3-clause license. See 16LICENSE.txt for the full license text. 17""" 18 19from __future__ import division 20 21from errno import ENOENT 22from os import listdir, lstat, stat, strerror 23from os.path import join, islink 24from stat import S_IFDIR, S_IFLNK, S_IFREG 25import collections 26import sys 27 28try: 29 import _scandir 30except ImportError: 31 _scandir = None 32 33try: 34 import ctypes 35except ImportError: 36 ctypes = None 37 38if _scandir is None and ctypes is None: 39 import warnings 40 warnings.warn("scandir can't find the compiled _scandir C module " 41 "or ctypes, using slow generic fallback") 42 43__version__ = '1.10.0' 44__all__ = ['scandir', 'walk'] 45 46# Windows FILE_ATTRIBUTE constants for interpreting the 47# FIND_DATA.dwFileAttributes member 48FILE_ATTRIBUTE_ARCHIVE = 32 49FILE_ATTRIBUTE_COMPRESSED = 2048 50FILE_ATTRIBUTE_DEVICE = 64 51FILE_ATTRIBUTE_DIRECTORY = 16 52FILE_ATTRIBUTE_ENCRYPTED = 16384 53FILE_ATTRIBUTE_HIDDEN = 2 54FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768 55FILE_ATTRIBUTE_NORMAL = 128 56FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192 57FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072 58FILE_ATTRIBUTE_OFFLINE = 4096 59FILE_ATTRIBUTE_READONLY = 1 60FILE_ATTRIBUTE_REPARSE_POINT = 1024 61FILE_ATTRIBUTE_SPARSE_FILE = 512 62FILE_ATTRIBUTE_SYSTEM = 4 63FILE_ATTRIBUTE_TEMPORARY = 256 64FILE_ATTRIBUTE_VIRTUAL = 65536 65 66IS_PY3 = sys.version_info >= (3, 0) 67 68if IS_PY3: 69 unicode = str # Because Python <= 3.2 doesn't have u'unicode' syntax 70 71 72class GenericDirEntry(object): 73 __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path') 74 75 def __init__(self, scandir_path, name): 76 self._scandir_path = scandir_path 77 self.name = name 78 self._stat = None 79 self._lstat = None 80 self._path = None 81 82 @property 83 def path(self): 84 if self._path is None: 85 self._path = join(self._scandir_path, self.name) 86 return self._path 87 88 def stat(self, follow_symlinks=True): 89 if follow_symlinks: 90 if self._stat is None: 91 self._stat = stat(self.path) 92 return self._stat 93 else: 94 if self._lstat is None: 95 self._lstat = lstat(self.path) 96 return self._lstat 97 98 # The code duplication below is intentional: this is for slightly 99 # better performance on systems that fall back to GenericDirEntry. 100 # It avoids an additional attribute lookup and method call, which 101 # are relatively slow on CPython. 102 def is_dir(self, follow_symlinks=True): 103 try: 104 st = self.stat(follow_symlinks=follow_symlinks) 105 except OSError as e: 106 if e.errno != ENOENT: 107 raise 108 return False # Path doesn't exist or is a broken symlink 109 return st.st_mode & 0o170000 == S_IFDIR 110 111 def is_file(self, follow_symlinks=True): 112 try: 113 st = self.stat(follow_symlinks=follow_symlinks) 114 except OSError as e: 115 if e.errno != ENOENT: 116 raise 117 return False # Path doesn't exist or is a broken symlink 118 return st.st_mode & 0o170000 == S_IFREG 119 120 def is_symlink(self): 121 try: 122 st = self.stat(follow_symlinks=False) 123 except OSError as e: 124 if e.errno != ENOENT: 125 raise 126 return False # Path doesn't exist or is a broken symlink 127 return st.st_mode & 0o170000 == S_IFLNK 128 129 def inode(self): 130 st = self.stat(follow_symlinks=False) 131 return st.st_ino 132 133 def __str__(self): 134 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 135 136 __repr__ = __str__ 137 138 139def _scandir_generic(path=unicode('.')): 140 """Like os.listdir(), but yield DirEntry objects instead of returning 141 a list of names. 142 """ 143 for name in listdir(path): 144 yield GenericDirEntry(path, name) 145 146 147if IS_PY3 and sys.platform == 'win32': 148 def scandir_generic(path=unicode('.')): 149 if isinstance(path, bytes): 150 raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead") 151 return _scandir_generic(path) 152 scandir_generic.__doc__ = _scandir_generic.__doc__ 153else: 154 scandir_generic = _scandir_generic 155 156 157scandir_c = None 158scandir_python = None 159 160 161if sys.platform == 'win32': 162 if ctypes is not None: 163 from ctypes import wintypes 164 165 # Various constants from windows.h 166 INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value 167 ERROR_FILE_NOT_FOUND = 2 168 ERROR_NO_MORE_FILES = 18 169 IO_REPARSE_TAG_SYMLINK = 0xA000000C 170 171 # Numer of seconds between 1601-01-01 and 1970-01-01 172 SECONDS_BETWEEN_EPOCHS = 11644473600 173 174 kernel32 = ctypes.windll.kernel32 175 176 # ctypes wrappers for (wide string versions of) FindFirstFile, 177 # FindNextFile, and FindClose 178 FindFirstFile = kernel32.FindFirstFileW 179 FindFirstFile.argtypes = [ 180 wintypes.LPCWSTR, 181 ctypes.POINTER(wintypes.WIN32_FIND_DATAW), 182 ] 183 FindFirstFile.restype = wintypes.HANDLE 184 185 FindNextFile = kernel32.FindNextFileW 186 FindNextFile.argtypes = [ 187 wintypes.HANDLE, 188 ctypes.POINTER(wintypes.WIN32_FIND_DATAW), 189 ] 190 FindNextFile.restype = wintypes.BOOL 191 192 FindClose = kernel32.FindClose 193 FindClose.argtypes = [wintypes.HANDLE] 194 FindClose.restype = wintypes.BOOL 195 196 Win32StatResult = collections.namedtuple('Win32StatResult', [ 197 'st_mode', 198 'st_ino', 199 'st_dev', 200 'st_nlink', 201 'st_uid', 202 'st_gid', 203 'st_size', 204 'st_atime', 205 'st_mtime', 206 'st_ctime', 207 'st_atime_ns', 208 'st_mtime_ns', 209 'st_ctime_ns', 210 'st_file_attributes', 211 ]) 212 213 def filetime_to_time(filetime): 214 """Convert Win32 FILETIME to time since Unix epoch in seconds.""" 215 total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime 216 return total / 10000000 - SECONDS_BETWEEN_EPOCHS 217 218 def find_data_to_stat(data): 219 """Convert Win32 FIND_DATA struct to stat_result.""" 220 # First convert Win32 dwFileAttributes to st_mode 221 attributes = data.dwFileAttributes 222 st_mode = 0 223 if attributes & FILE_ATTRIBUTE_DIRECTORY: 224 st_mode |= S_IFDIR | 0o111 225 else: 226 st_mode |= S_IFREG 227 if attributes & FILE_ATTRIBUTE_READONLY: 228 st_mode |= 0o444 229 else: 230 st_mode |= 0o666 231 if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and 232 data.dwReserved0 == IO_REPARSE_TAG_SYMLINK): 233 st_mode ^= st_mode & 0o170000 234 st_mode |= S_IFLNK 235 236 st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow 237 st_atime = filetime_to_time(data.ftLastAccessTime) 238 st_mtime = filetime_to_time(data.ftLastWriteTime) 239 st_ctime = filetime_to_time(data.ftCreationTime) 240 241 # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev, 242 # st_nlink, st_uid, st_gid 243 return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size, 244 st_atime, st_mtime, st_ctime, 245 int(st_atime * 1000000000), 246 int(st_mtime * 1000000000), 247 int(st_ctime * 1000000000), 248 attributes) 249 250 class Win32DirEntryPython(object): 251 __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path', '_inode') 252 253 def __init__(self, scandir_path, name, find_data): 254 self._scandir_path = scandir_path 255 self.name = name 256 self._stat = None 257 self._lstat = None 258 self._find_data = find_data 259 self._path = None 260 self._inode = None 261 262 @property 263 def path(self): 264 if self._path is None: 265 self._path = join(self._scandir_path, self.name) 266 return self._path 267 268 def stat(self, follow_symlinks=True): 269 if follow_symlinks: 270 if self._stat is None: 271 if self.is_symlink(): 272 # It's a symlink, call link-following stat() 273 self._stat = stat(self.path) 274 else: 275 # Not a symlink, stat is same as lstat value 276 if self._lstat is None: 277 self._lstat = find_data_to_stat(self._find_data) 278 self._stat = self._lstat 279 return self._stat 280 else: 281 if self._lstat is None: 282 # Lazily convert to stat object, because it's slow 283 # in Python, and often we only need is_dir() etc 284 self._lstat = find_data_to_stat(self._find_data) 285 return self._lstat 286 287 def is_dir(self, follow_symlinks=True): 288 is_symlink = self.is_symlink() 289 if follow_symlinks and is_symlink: 290 try: 291 return self.stat().st_mode & 0o170000 == S_IFDIR 292 except OSError as e: 293 if e.errno != ENOENT: 294 raise 295 return False 296 elif is_symlink: 297 return False 298 else: 299 return (self._find_data.dwFileAttributes & 300 FILE_ATTRIBUTE_DIRECTORY != 0) 301 302 def is_file(self, follow_symlinks=True): 303 is_symlink = self.is_symlink() 304 if follow_symlinks and is_symlink: 305 try: 306 return self.stat().st_mode & 0o170000 == S_IFREG 307 except OSError as e: 308 if e.errno != ENOENT: 309 raise 310 return False 311 elif is_symlink: 312 return False 313 else: 314 return (self._find_data.dwFileAttributes & 315 FILE_ATTRIBUTE_DIRECTORY == 0) 316 317 def is_symlink(self): 318 return (self._find_data.dwFileAttributes & 319 FILE_ATTRIBUTE_REPARSE_POINT != 0 and 320 self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK) 321 322 def inode(self): 323 if self._inode is None: 324 self._inode = lstat(self.path).st_ino 325 return self._inode 326 327 def __str__(self): 328 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 329 330 __repr__ = __str__ 331 332 def win_error(error, filename): 333 exc = WindowsError(error, ctypes.FormatError(error)) 334 exc.filename = filename 335 return exc 336 337 def _scandir_python(path=unicode('.')): 338 """Like os.listdir(), but yield DirEntry objects instead of returning 339 a list of names. 340 """ 341 # Call FindFirstFile and handle errors 342 if isinstance(path, bytes): 343 is_bytes = True 344 filename = join(path.decode('mbcs', 'strict'), '*.*') 345 else: 346 is_bytes = False 347 filename = join(path, '*.*') 348 data = wintypes.WIN32_FIND_DATAW() 349 data_p = ctypes.byref(data) 350 handle = FindFirstFile(filename, data_p) 351 if handle == INVALID_HANDLE_VALUE: 352 error = ctypes.GetLastError() 353 if error == ERROR_FILE_NOT_FOUND: 354 # No files, don't yield anything 355 return 356 raise win_error(error, path) 357 358 # Call FindNextFile in a loop, stopping when no more files 359 try: 360 while True: 361 # Skip '.' and '..' (current and parent directory), but 362 # otherwise yield (filename, stat_result) tuple 363 name = data.cFileName 364 if name not in ('.', '..'): 365 if is_bytes: 366 name = name.encode('mbcs', 'replace') 367 yield Win32DirEntryPython(path, name, data) 368 369 data = wintypes.WIN32_FIND_DATAW() 370 data_p = ctypes.byref(data) 371 success = FindNextFile(handle, data_p) 372 if not success: 373 error = ctypes.GetLastError() 374 if error == ERROR_NO_MORE_FILES: 375 break 376 raise win_error(error, path) 377 finally: 378 if not FindClose(handle): 379 raise win_error(ctypes.GetLastError(), path) 380 381 if IS_PY3: 382 def scandir_python(path=unicode('.')): 383 if isinstance(path, bytes): 384 raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead") 385 return _scandir_python(path) 386 scandir_python.__doc__ = _scandir_python.__doc__ 387 else: 388 scandir_python = _scandir_python 389 390 if _scandir is not None: 391 scandir_c = _scandir.scandir 392 DirEntry_c = _scandir.DirEntry 393 394 if _scandir is not None: 395 scandir = scandir_c 396 DirEntry = DirEntry_c 397 elif ctypes is not None: 398 scandir = scandir_python 399 DirEntry = Win32DirEntryPython 400 else: 401 scandir = scandir_generic 402 DirEntry = GenericDirEntry 403 404 405# Linux, OS X, and BSD implementation 406elif sys.platform.startswith(('linux', 'darwin', 'sunos5')) or 'bsd' in sys.platform: 407 have_dirent_d_type = (sys.platform != 'sunos5') 408 409 if ctypes is not None and have_dirent_d_type: 410 import ctypes.util 411 412 DIR_p = ctypes.c_void_p 413 414 # Rather annoying how the dirent struct is slightly different on each 415 # platform. The only fields we care about are d_name and d_type. 416 class Dirent(ctypes.Structure): 417 if sys.platform.startswith('linux'): 418 _fields_ = ( 419 ('d_ino', ctypes.c_ulong), 420 ('d_off', ctypes.c_long), 421 ('d_reclen', ctypes.c_ushort), 422 ('d_type', ctypes.c_byte), 423 ('d_name', ctypes.c_char * 256), 424 ) 425 elif 'openbsd' in sys.platform: 426 _fields_ = ( 427 ('d_ino', ctypes.c_uint64), 428 ('d_off', ctypes.c_uint64), 429 ('d_reclen', ctypes.c_uint16), 430 ('d_type', ctypes.c_uint8), 431 ('d_namlen', ctypes.c_uint8), 432 ('__d_padding', ctypes.c_uint8 * 4), 433 ('d_name', ctypes.c_char * 256), 434 ) 435 elif 'freebsd' in sys.platform: 436 _fields_ = ( 437 ('d_ino', ctypes.c_uint64), 438 ('d_off', ctypes.c_uint64), 439 ('d_reclen', ctypes.c_uint16), 440 ('d_type', ctypes.c_uint8), 441 ('d_namlen', ctypes.c_uint8), 442 ('__d_padding', ctypes.c_uint8 * 4), 443 ('d_name', ctypes.c_char * 256), 444 ) 445 else: 446 _fields_ = ( 447 ('d_ino', ctypes.c_uint32), # must be uint32, not ulong 448 ('d_reclen', ctypes.c_ushort), 449 ('d_type', ctypes.c_byte), 450 ('d_namlen', ctypes.c_byte), 451 ('d_name', ctypes.c_char * 256), 452 ) 453 454 DT_UNKNOWN = 0 455 DT_DIR = 4 456 DT_REG = 8 457 DT_LNK = 10 458 459 Dirent_p = ctypes.POINTER(Dirent) 460 Dirent_pp = ctypes.POINTER(Dirent_p) 461 462 libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) 463 opendir = libc.opendir 464 opendir.argtypes = [ctypes.c_char_p] 465 opendir.restype = DIR_p 466 467 readdir_r = libc.readdir_r 468 readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp] 469 readdir_r.restype = ctypes.c_int 470 471 closedir = libc.closedir 472 closedir.argtypes = [DIR_p] 473 closedir.restype = ctypes.c_int 474 475 file_system_encoding = sys.getfilesystemencoding() 476 477 class PosixDirEntry(object): 478 __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path', '_inode') 479 480 def __init__(self, scandir_path, name, d_type, inode): 481 self._scandir_path = scandir_path 482 self.name = name 483 self._d_type = d_type 484 self._inode = inode 485 self._stat = None 486 self._lstat = None 487 self._path = None 488 489 @property 490 def path(self): 491 if self._path is None: 492 self._path = join(self._scandir_path, self.name) 493 return self._path 494 495 def stat(self, follow_symlinks=True): 496 if follow_symlinks: 497 if self._stat is None: 498 if self.is_symlink(): 499 self._stat = stat(self.path) 500 else: 501 if self._lstat is None: 502 self._lstat = lstat(self.path) 503 self._stat = self._lstat 504 return self._stat 505 else: 506 if self._lstat is None: 507 self._lstat = lstat(self.path) 508 return self._lstat 509 510 def is_dir(self, follow_symlinks=True): 511 if (self._d_type == DT_UNKNOWN or 512 (follow_symlinks and self.is_symlink())): 513 try: 514 st = self.stat(follow_symlinks=follow_symlinks) 515 except OSError as e: 516 if e.errno != ENOENT: 517 raise 518 return False 519 return st.st_mode & 0o170000 == S_IFDIR 520 else: 521 return self._d_type == DT_DIR 522 523 def is_file(self, follow_symlinks=True): 524 if (self._d_type == DT_UNKNOWN or 525 (follow_symlinks and self.is_symlink())): 526 try: 527 st = self.stat(follow_symlinks=follow_symlinks) 528 except OSError as e: 529 if e.errno != ENOENT: 530 raise 531 return False 532 return st.st_mode & 0o170000 == S_IFREG 533 else: 534 return self._d_type == DT_REG 535 536 def is_symlink(self): 537 if self._d_type == DT_UNKNOWN: 538 try: 539 st = self.stat(follow_symlinks=False) 540 except OSError as e: 541 if e.errno != ENOENT: 542 raise 543 return False 544 return st.st_mode & 0o170000 == S_IFLNK 545 else: 546 return self._d_type == DT_LNK 547 548 def inode(self): 549 return self._inode 550 551 def __str__(self): 552 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 553 554 __repr__ = __str__ 555 556 def posix_error(filename): 557 errno = ctypes.get_errno() 558 exc = OSError(errno, strerror(errno)) 559 exc.filename = filename 560 return exc 561 562 def scandir_python(path=unicode('.')): 563 """Like os.listdir(), but yield DirEntry objects instead of returning 564 a list of names. 565 """ 566 if isinstance(path, bytes): 567 opendir_path = path 568 is_bytes = True 569 else: 570 opendir_path = path.encode(file_system_encoding) 571 is_bytes = False 572 dir_p = opendir(opendir_path) 573 if not dir_p: 574 raise posix_error(path) 575 try: 576 result = Dirent_p() 577 while True: 578 entry = Dirent() 579 if readdir_r(dir_p, entry, result): 580 raise posix_error(path) 581 if not result: 582 break 583 name = entry.d_name 584 if name not in (b'.', b'..'): 585 if not is_bytes: 586 name = name.decode(file_system_encoding) 587 yield PosixDirEntry(path, name, entry.d_type, entry.d_ino) 588 finally: 589 if closedir(dir_p): 590 raise posix_error(path) 591 592 if _scandir is not None: 593 scandir_c = _scandir.scandir 594 DirEntry_c = _scandir.DirEntry 595 596 if _scandir is not None: 597 scandir = scandir_c 598 DirEntry = DirEntry_c 599 elif ctypes is not None and have_dirent_d_type: 600 scandir = scandir_python 601 DirEntry = PosixDirEntry 602 else: 603 scandir = scandir_generic 604 DirEntry = GenericDirEntry 605 606 607# Some other system -- no d_type or stat information 608else: 609 scandir = scandir_generic 610 DirEntry = GenericDirEntry 611 612 613def _walk(top, topdown=True, onerror=None, followlinks=False): 614 """Like Python 3.5's implementation of os.walk() -- faster than 615 the pre-Python 3.5 version as it uses scandir() internally. 616 """ 617 dirs = [] 618 nondirs = [] 619 620 # We may not have read permission for top, in which case we can't 621 # get a list of the files the directory contains. os.walk 622 # always suppressed the exception then, rather than blow up for a 623 # minor reason when (say) a thousand readable directories are still 624 # left to visit. That logic is copied here. 625 try: 626 scandir_it = scandir(top) 627 except OSError as error: 628 if onerror is not None: 629 onerror(error) 630 return 631 632 while True: 633 try: 634 try: 635 entry = next(scandir_it) 636 except StopIteration: 637 break 638 except OSError as error: 639 if onerror is not None: 640 onerror(error) 641 return 642 643 try: 644 is_dir = entry.is_dir() 645 except OSError: 646 # If is_dir() raises an OSError, consider that the entry is not 647 # a directory, same behaviour than os.path.isdir(). 648 is_dir = False 649 650 if is_dir: 651 dirs.append(entry.name) 652 else: 653 nondirs.append(entry.name) 654 655 if not topdown and is_dir: 656 # Bottom-up: recurse into sub-directory, but exclude symlinks to 657 # directories if followlinks is False 658 if followlinks: 659 walk_into = True 660 else: 661 try: 662 is_symlink = entry.is_symlink() 663 except OSError: 664 # If is_symlink() raises an OSError, consider that the 665 # entry is not a symbolic link, same behaviour than 666 # os.path.islink(). 667 is_symlink = False 668 walk_into = not is_symlink 669 670 if walk_into: 671 for entry in walk(entry.path, topdown, onerror, followlinks): 672 yield entry 673 674 # Yield before recursion if going top down 675 if topdown: 676 yield top, dirs, nondirs 677 678 # Recurse into sub-directories 679 for name in dirs: 680 new_path = join(top, name) 681 # Issue #23605: os.path.islink() is used instead of caching 682 # entry.is_symlink() result during the loop on os.scandir() because 683 # the caller can replace the directory entry during the "yield" 684 # above. 685 if followlinks or not islink(new_path): 686 for entry in walk(new_path, topdown, onerror, followlinks): 687 yield entry 688 else: 689 # Yield after recursion if going bottom up 690 yield top, dirs, nondirs 691 692 693if IS_PY3 or sys.platform != 'win32': 694 walk = _walk 695else: 696 # Fix for broken unicode handling on Windows on Python 2.x, see: 697 # https://github.com/benhoyt/scandir/issues/54 698 file_system_encoding = sys.getfilesystemencoding() 699 700 def walk(top, topdown=True, onerror=None, followlinks=False): 701 if isinstance(top, bytes): 702 top = top.decode(file_system_encoding) 703 return _walk(top, topdown, onerror, followlinks) 704