1"""scandir, a better directory iterator and faster os.walk(), now in the Python 3.5 stdlib 2 3scandir() is a generator version of os.listdir() that returns an 4iterator over files in a directory, and also exposes the extra 5information most OSes provide while iterating files in a directory 6(such as type and stat information). 7 8This module also includes a version of os.walk() that uses scandir() 9to speed it up significantly. 10 11See README.md or https://github.com/benhoyt/scandir for rationale and 12docs, or read PEP 471 (https://www.python.org/dev/peps/pep-0471/) for 13more details on its inclusion into Python 3.5 14 15scandir is released under the new BSD 3-clause license. See 16LICENSE.txt for the full license text. 17""" 18 19from __future__ import division 20 21from errno import ENOENT 22from os import listdir, lstat, stat, strerror 23from os.path import join, islink 24from stat import S_IFDIR, S_IFLNK, S_IFREG 25import collections 26import sys 27 28try: 29 import _scandir 30except ImportError: 31 _scandir = None 32 33try: 34 import ctypes 35except ImportError: 36 ctypes = None 37 38if _scandir is None and ctypes is None: 39 import warnings 40 warnings.warn("scandir can't find the compiled _scandir C module " 41 "or ctypes, using slow generic fallback") 42 43__version__ = '1.10.0' 44__all__ = ['scandir', 'walk'] 45 46# Windows FILE_ATTRIBUTE constants for interpreting the 47# FIND_DATA.dwFileAttributes member 48FILE_ATTRIBUTE_ARCHIVE = 32 49FILE_ATTRIBUTE_COMPRESSED = 2048 50FILE_ATTRIBUTE_DEVICE = 64 51FILE_ATTRIBUTE_DIRECTORY = 16 52FILE_ATTRIBUTE_ENCRYPTED = 16384 53FILE_ATTRIBUTE_HIDDEN = 2 54FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768 55FILE_ATTRIBUTE_NORMAL = 128 56FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192 57FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072 58FILE_ATTRIBUTE_OFFLINE = 4096 59FILE_ATTRIBUTE_READONLY = 1 60FILE_ATTRIBUTE_REPARSE_POINT = 1024 61FILE_ATTRIBUTE_SPARSE_FILE = 512 62FILE_ATTRIBUTE_SYSTEM = 4 63FILE_ATTRIBUTE_TEMPORARY = 256 64FILE_ATTRIBUTE_VIRTUAL = 65536 65 66IS_PY3 = sys.version_info >= (3, 0) 67 68if IS_PY3: 69 unicode = str # Because Python <= 3.2 doesn't have u'unicode' syntax 70 71 72class GenericDirEntry(object): 73 __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path') 74 75 def __init__(self, scandir_path, name): 76 self._scandir_path = scandir_path 77 self.name = name 78 self._stat = None 79 self._lstat = None 80 self._path = None 81 82 @property 83 def path(self): 84 if self._path is None: 85 self._path = join(self._scandir_path, self.name) 86 return self._path 87 88 def stat(self, follow_symlinks=True): 89 if follow_symlinks: 90 if self._stat is None: 91 self._stat = stat(self.path) 92 return self._stat 93 else: 94 if self._lstat is None: 95 self._lstat = lstat(self.path) 96 return self._lstat 97 98 # The code duplication below is intentional: this is for slightly 99 # better performance on systems that fall back to GenericDirEntry. 100 # It avoids an additional attribute lookup and method call, which 101 # are relatively slow on CPython. 102 def is_dir(self, follow_symlinks=True): 103 try: 104 st = self.stat(follow_symlinks=follow_symlinks) 105 except OSError as e: 106 if e.errno != ENOENT: 107 raise 108 return False # Path doesn't exist or is a broken symlink 109 return st.st_mode & 0o170000 == S_IFDIR 110 111 def is_file(self, follow_symlinks=True): 112 try: 113 st = self.stat(follow_symlinks=follow_symlinks) 114 except OSError as e: 115 if e.errno != ENOENT: 116 raise 117 return False # Path doesn't exist or is a broken symlink 118 return st.st_mode & 0o170000 == S_IFREG 119 120 def is_symlink(self): 121 try: 122 st = self.stat(follow_symlinks=False) 123 except OSError as e: 124 if e.errno != ENOENT: 125 raise 126 return False # Path doesn't exist or is a broken symlink 127 return st.st_mode & 0o170000 == S_IFLNK 128 129 def inode(self): 130 st = self.stat(follow_symlinks=False) 131 return st.st_ino 132 133 def __str__(self): 134 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 135 136 __repr__ = __str__ 137 138 139def _scandir_generic(path=unicode('.')): 140 """Like os.listdir(), but yield DirEntry objects instead of returning 141 a list of names. 142 """ 143 for name in listdir(path): 144 yield GenericDirEntry(path, name) 145 146 147if IS_PY3 and sys.platform == 'win32': 148 def scandir_generic(path=unicode('.')): 149 if isinstance(path, bytes): 150 raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead") 151 return _scandir_generic(path) 152 scandir_generic.__doc__ = _scandir_generic.__doc__ 153else: 154 scandir_generic = _scandir_generic 155 156 157scandir_c = None 158scandir_python = None 159 160 161if sys.platform == 'win32': 162 if ctypes is not None: 163 from ctypes import wintypes 164 165 # Various constants from windows.h 166 INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value 167 ERROR_FILE_NOT_FOUND = 2 168 ERROR_NO_MORE_FILES = 18 169 IO_REPARSE_TAG_SYMLINK = 0xA000000C 170 171 # Numer of seconds between 1601-01-01 and 1970-01-01 172 SECONDS_BETWEEN_EPOCHS = 11644473600 173 174 kernel32 = ctypes.windll.kernel32 175 176 # ctypes wrappers for (wide string versions of) FindFirstFile, 177 # FindNextFile, and FindClose 178 FindFirstFile = kernel32.FindFirstFileW 179 FindFirstFile.argtypes = [ 180 wintypes.LPCWSTR, 181 ctypes.POINTER(wintypes.WIN32_FIND_DATAW), 182 ] 183 FindFirstFile.restype = wintypes.HANDLE 184 185 FindNextFile = kernel32.FindNextFileW 186 FindNextFile.argtypes = [ 187 wintypes.HANDLE, 188 ctypes.POINTER(wintypes.WIN32_FIND_DATAW), 189 ] 190 FindNextFile.restype = wintypes.BOOL 191 192 FindClose = kernel32.FindClose 193 FindClose.argtypes = [wintypes.HANDLE] 194 FindClose.restype = wintypes.BOOL 195 196 Win32StatResult = collections.namedtuple('Win32StatResult', [ 197 'st_mode', 198 'st_ino', 199 'st_dev', 200 'st_nlink', 201 'st_uid', 202 'st_gid', 203 'st_size', 204 'st_atime', 205 'st_mtime', 206 'st_ctime', 207 'st_atime_ns', 208 'st_mtime_ns', 209 'st_ctime_ns', 210 'st_file_attributes', 211 ]) 212 213 def filetime_to_time(filetime): 214 """Convert Win32 FILETIME to time since Unix epoch in seconds.""" 215 total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime 216 return total / 10000000 - SECONDS_BETWEEN_EPOCHS 217 218 def find_data_to_stat(data): 219 """Convert Win32 FIND_DATA struct to stat_result.""" 220 # First convert Win32 dwFileAttributes to st_mode 221 attributes = data.dwFileAttributes 222 st_mode = 0 223 if attributes & FILE_ATTRIBUTE_DIRECTORY: 224 st_mode |= S_IFDIR | 0o111 225 else: 226 st_mode |= S_IFREG 227 if attributes & FILE_ATTRIBUTE_READONLY: 228 st_mode |= 0o444 229 else: 230 st_mode |= 0o666 231 if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and 232 data.dwReserved0 == IO_REPARSE_TAG_SYMLINK): 233 st_mode ^= st_mode & 0o170000 234 st_mode |= S_IFLNK 235 236 st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow 237 st_atime = filetime_to_time(data.ftLastAccessTime) 238 st_mtime = filetime_to_time(data.ftLastWriteTime) 239 st_ctime = filetime_to_time(data.ftCreationTime) 240 241 # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev, 242 # st_nlink, st_uid, st_gid 243 return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size, 244 st_atime, st_mtime, st_ctime, 245 int(st_atime * 1000000000), 246 int(st_mtime * 1000000000), 247 int(st_ctime * 1000000000), 248 attributes) 249 250 class Win32DirEntryPython(object): 251 __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path', '_inode') 252 253 def __init__(self, scandir_path, name, find_data): 254 self._scandir_path = scandir_path 255 self.name = name 256 self._stat = None 257 self._lstat = None 258 self._find_data = find_data 259 self._path = None 260 self._inode = None 261 262 @property 263 def path(self): 264 if self._path is None: 265 self._path = join(self._scandir_path, self.name) 266 return self._path 267 268 def stat(self, follow_symlinks=True): 269 if follow_symlinks: 270 if self._stat is None: 271 if self.is_symlink(): 272 # It's a symlink, call link-following stat() 273 self._stat = stat(self.path) 274 else: 275 # Not a symlink, stat is same as lstat value 276 if self._lstat is None: 277 self._lstat = find_data_to_stat(self._find_data) 278 self._stat = self._lstat 279 return self._stat 280 else: 281 if self._lstat is None: 282 # Lazily convert to stat object, because it's slow 283 # in Python, and often we only need is_dir() etc 284 self._lstat = find_data_to_stat(self._find_data) 285 return self._lstat 286 287 def is_dir(self, follow_symlinks=True): 288 is_symlink = self.is_symlink() 289 if follow_symlinks and is_symlink: 290 try: 291 return self.stat().st_mode & 0o170000 == S_IFDIR 292 except OSError as e: 293 if e.errno != ENOENT: 294 raise 295 return False 296 elif is_symlink: 297 return False 298 else: 299 return (self._find_data.dwFileAttributes & 300 FILE_ATTRIBUTE_DIRECTORY != 0) 301 302 def is_file(self, follow_symlinks=True): 303 is_symlink = self.is_symlink() 304 if follow_symlinks and is_symlink: 305 try: 306 return self.stat().st_mode & 0o170000 == S_IFREG 307 except OSError as e: 308 if e.errno != ENOENT: 309 raise 310 return False 311 elif is_symlink: 312 return False 313 else: 314 return (self._find_data.dwFileAttributes & 315 FILE_ATTRIBUTE_DIRECTORY == 0) 316 317 def is_symlink(self): 318 return (self._find_data.dwFileAttributes & 319 FILE_ATTRIBUTE_REPARSE_POINT != 0 and 320 self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK) 321 322 def inode(self): 323 if self._inode is None: 324 self._inode = lstat(self.path).st_ino 325 return self._inode 326 327 def __str__(self): 328 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 329 330 __repr__ = __str__ 331 332 def win_error(error, filename): 333 exc = WindowsError(error, ctypes.FormatError(error)) 334 exc.filename = filename 335 return exc 336 337 def _scandir_python(path=unicode('.')): 338 """Like os.listdir(), but yield DirEntry objects instead of returning 339 a list of names. 340 """ 341 # Call FindFirstFile and handle errors 342 if isinstance(path, bytes): 343 is_bytes = True 344 filename = join(path.decode('mbcs', 'strict'), '*.*') 345 else: 346 is_bytes = False 347 filename = join(path, '*.*') 348 data = wintypes.WIN32_FIND_DATAW() 349 data_p = ctypes.byref(data) 350 handle = FindFirstFile(filename, data_p) 351 if handle == INVALID_HANDLE_VALUE: 352 error = ctypes.GetLastError() 353 if error == ERROR_FILE_NOT_FOUND: 354 # No files, don't yield anything 355 return 356 raise win_error(error, path) 357 358 # Call FindNextFile in a loop, stopping when no more files 359 try: 360 while True: 361 # Skip '.' and '..' (current and parent directory), but 362 # otherwise yield (filename, stat_result) tuple 363 name = data.cFileName 364 if name not in ('.', '..'): 365 if is_bytes: 366 name = name.encode('mbcs', 'replace') 367 yield Win32DirEntryPython(path, name, data) 368 369 data = wintypes.WIN32_FIND_DATAW() 370 data_p = ctypes.byref(data) 371 success = FindNextFile(handle, data_p) 372 if not success: 373 error = ctypes.GetLastError() 374 if error == ERROR_NO_MORE_FILES: 375 break 376 raise win_error(error, path) 377 finally: 378 if not FindClose(handle): 379 raise win_error(ctypes.GetLastError(), path) 380 381 if IS_PY3: 382 def scandir_python(path=unicode('.')): 383 if isinstance(path, bytes): 384 raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead") 385 return _scandir_python(path) 386 scandir_python.__doc__ = _scandir_python.__doc__ 387 else: 388 scandir_python = _scandir_python 389 390 if _scandir is not None: 391 scandir_c = _scandir.scandir 392 DirEntry_c = _scandir.DirEntry 393 394 if _scandir is not None: 395 scandir = scandir_c 396 DirEntry = DirEntry_c 397 elif ctypes is not None: 398 scandir = scandir_python 399 DirEntry = Win32DirEntryPython 400 else: 401 scandir = scandir_generic 402 DirEntry = GenericDirEntry 403 404 405# Linux, OS X, and BSD implementation 406elif sys.platform.startswith(('linux', 'darwin', 'sunos5')) or 'bsd' in sys.platform: 407 have_dirent_d_type = (sys.platform != 'sunos5') 408 409 if ctypes is not None and have_dirent_d_type: 410 import ctypes.util 411 412 DIR_p = ctypes.c_void_p 413 414 # Rather annoying how the dirent struct is slightly different on each 415 # platform. The only fields we care about are d_name and d_type. 416 class Dirent(ctypes.Structure): 417 if sys.platform.startswith('linux'): 418 _fields_ = ( 419 ('d_ino', ctypes.c_ulong), 420 ('d_off', ctypes.c_long), 421 ('d_reclen', ctypes.c_ushort), 422 ('d_type', ctypes.c_byte), 423 ('d_name', ctypes.c_char * 256), 424 ) 425 elif 'openbsd' in sys.platform: 426 _fields_ = ( 427 ('d_ino', ctypes.c_uint64), 428 ('d_off', ctypes.c_uint64), 429 ('d_reclen', ctypes.c_uint16), 430 ('d_type', ctypes.c_uint8), 431 ('d_namlen', ctypes.c_uint8), 432 ('__d_padding', ctypes.c_uint8 * 4), 433 ('d_name', ctypes.c_char * 256), 434 ) 435 else: 436 _fields_ = ( 437 ('d_ino', ctypes.c_uint32), # must be uint32, not ulong 438 ('d_reclen', ctypes.c_ushort), 439 ('d_type', ctypes.c_byte), 440 ('d_namlen', ctypes.c_byte), 441 ('d_name', ctypes.c_char * 256), 442 ) 443 444 DT_UNKNOWN = 0 445 DT_DIR = 4 446 DT_REG = 8 447 DT_LNK = 10 448 449 Dirent_p = ctypes.POINTER(Dirent) 450 Dirent_pp = ctypes.POINTER(Dirent_p) 451 452 libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) 453 opendir = libc.opendir 454 opendir.argtypes = [ctypes.c_char_p] 455 opendir.restype = DIR_p 456 457 readdir_r = libc.readdir_r 458 readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp] 459 readdir_r.restype = ctypes.c_int 460 461 closedir = libc.closedir 462 closedir.argtypes = [DIR_p] 463 closedir.restype = ctypes.c_int 464 465 file_system_encoding = sys.getfilesystemencoding() 466 467 class PosixDirEntry(object): 468 __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path', '_inode') 469 470 def __init__(self, scandir_path, name, d_type, inode): 471 self._scandir_path = scandir_path 472 self.name = name 473 self._d_type = d_type 474 self._inode = inode 475 self._stat = None 476 self._lstat = None 477 self._path = None 478 479 @property 480 def path(self): 481 if self._path is None: 482 self._path = join(self._scandir_path, self.name) 483 return self._path 484 485 def stat(self, follow_symlinks=True): 486 if follow_symlinks: 487 if self._stat is None: 488 if self.is_symlink(): 489 self._stat = stat(self.path) 490 else: 491 if self._lstat is None: 492 self._lstat = lstat(self.path) 493 self._stat = self._lstat 494 return self._stat 495 else: 496 if self._lstat is None: 497 self._lstat = lstat(self.path) 498 return self._lstat 499 500 def is_dir(self, follow_symlinks=True): 501 if (self._d_type == DT_UNKNOWN or 502 (follow_symlinks and self.is_symlink())): 503 try: 504 st = self.stat(follow_symlinks=follow_symlinks) 505 except OSError as e: 506 if e.errno != ENOENT: 507 raise 508 return False 509 return st.st_mode & 0o170000 == S_IFDIR 510 else: 511 return self._d_type == DT_DIR 512 513 def is_file(self, follow_symlinks=True): 514 if (self._d_type == DT_UNKNOWN or 515 (follow_symlinks and self.is_symlink())): 516 try: 517 st = self.stat(follow_symlinks=follow_symlinks) 518 except OSError as e: 519 if e.errno != ENOENT: 520 raise 521 return False 522 return st.st_mode & 0o170000 == S_IFREG 523 else: 524 return self._d_type == DT_REG 525 526 def is_symlink(self): 527 if self._d_type == DT_UNKNOWN: 528 try: 529 st = self.stat(follow_symlinks=False) 530 except OSError as e: 531 if e.errno != ENOENT: 532 raise 533 return False 534 return st.st_mode & 0o170000 == S_IFLNK 535 else: 536 return self._d_type == DT_LNK 537 538 def inode(self): 539 return self._inode 540 541 def __str__(self): 542 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 543 544 __repr__ = __str__ 545 546 def posix_error(filename): 547 errno = ctypes.get_errno() 548 exc = OSError(errno, strerror(errno)) 549 exc.filename = filename 550 return exc 551 552 def scandir_python(path=unicode('.')): 553 """Like os.listdir(), but yield DirEntry objects instead of returning 554 a list of names. 555 """ 556 if isinstance(path, bytes): 557 opendir_path = path 558 is_bytes = True 559 else: 560 opendir_path = path.encode(file_system_encoding) 561 is_bytes = False 562 dir_p = opendir(opendir_path) 563 if not dir_p: 564 raise posix_error(path) 565 try: 566 result = Dirent_p() 567 while True: 568 entry = Dirent() 569 if readdir_r(dir_p, entry, result): 570 raise posix_error(path) 571 if not result: 572 break 573 name = entry.d_name 574 if name not in (b'.', b'..'): 575 if not is_bytes: 576 name = name.decode(file_system_encoding) 577 yield PosixDirEntry(path, name, entry.d_type, entry.d_ino) 578 finally: 579 if closedir(dir_p): 580 raise posix_error(path) 581 582 if _scandir is not None: 583 scandir_c = _scandir.scandir 584 DirEntry_c = _scandir.DirEntry 585 586 if _scandir is not None: 587 scandir = scandir_c 588 DirEntry = DirEntry_c 589 elif ctypes is not None and have_dirent_d_type: 590 scandir = scandir_python 591 DirEntry = PosixDirEntry 592 else: 593 scandir = scandir_generic 594 DirEntry = GenericDirEntry 595 596 597# Some other system -- no d_type or stat information 598else: 599 scandir = scandir_generic 600 DirEntry = GenericDirEntry 601 602 603def _walk(top, topdown=True, onerror=None, followlinks=False): 604 """Like Python 3.5's implementation of os.walk() -- faster than 605 the pre-Python 3.5 version as it uses scandir() internally. 606 """ 607 dirs = [] 608 nondirs = [] 609 610 # We may not have read permission for top, in which case we can't 611 # get a list of the files the directory contains. os.walk 612 # always suppressed the exception then, rather than blow up for a 613 # minor reason when (say) a thousand readable directories are still 614 # left to visit. That logic is copied here. 615 try: 616 scandir_it = scandir(top) 617 except OSError as error: 618 if onerror is not None: 619 onerror(error) 620 return 621 622 while True: 623 try: 624 try: 625 entry = next(scandir_it) 626 except StopIteration: 627 break 628 except OSError as error: 629 if onerror is not None: 630 onerror(error) 631 return 632 633 try: 634 is_dir = entry.is_dir() 635 except OSError: 636 # If is_dir() raises an OSError, consider that the entry is not 637 # a directory, same behaviour than os.path.isdir(). 638 is_dir = False 639 640 if is_dir: 641 dirs.append(entry.name) 642 else: 643 nondirs.append(entry.name) 644 645 if not topdown and is_dir: 646 # Bottom-up: recurse into sub-directory, but exclude symlinks to 647 # directories if followlinks is False 648 if followlinks: 649 walk_into = True 650 else: 651 try: 652 is_symlink = entry.is_symlink() 653 except OSError: 654 # If is_symlink() raises an OSError, consider that the 655 # entry is not a symbolic link, same behaviour than 656 # os.path.islink(). 657 is_symlink = False 658 walk_into = not is_symlink 659 660 if walk_into: 661 for entry in walk(entry.path, topdown, onerror, followlinks): 662 yield entry 663 664 # Yield before recursion if going top down 665 if topdown: 666 yield top, dirs, nondirs 667 668 # Recurse into sub-directories 669 for name in dirs: 670 new_path = join(top, name) 671 # Issue #23605: os.path.islink() is used instead of caching 672 # entry.is_symlink() result during the loop on os.scandir() because 673 # the caller can replace the directory entry during the "yield" 674 # above. 675 if followlinks or not islink(new_path): 676 for entry in walk(new_path, topdown, onerror, followlinks): 677 yield entry 678 else: 679 # Yield after recursion if going bottom up 680 yield top, dirs, nondirs 681 682 683if IS_PY3 or sys.platform != 'win32': 684 walk = _walk 685else: 686 # Fix for broken unicode handling on Windows on Python 2.x, see: 687 # https://github.com/benhoyt/scandir/issues/54 688 file_system_encoding = sys.getfilesystemencoding() 689 690 def walk(top, topdown=True, onerror=None, followlinks=False): 691 if isinstance(top, bytes): 692 top = top.decode(file_system_encoding) 693 return _walk(top, topdown, onerror, followlinks) 694