1"""scandir, a better directory iterator that exposes all file info OS provides 2 3scandir is a generator version of os.listdir() that returns an iterator over 4files in a directory, and also exposes the extra information most OSes provide 5while iterating files in a directory. 6 7See README.md or https://github.com/benhoyt/scandir for rationale and docs. 8 9scandir is released under the new BSD 3-clause license. See LICENSE.txt for 10the full license text. 11""" 12 13from __future__ import division 14 15from errno import ENOENT 16from os import listdir, lstat, stat, strerror 17from os.path import join 18from stat import S_IFDIR, S_IFLNK, S_IFREG 19import collections 20import ctypes 21import os 22import sys 23 24__version__ = '0.9' 25__all__ = ['scandir', 'walk'] 26 27# Windows FILE_ATTRIBUTE constants for interpreting the 28# FIND_DATA.dwFileAttributes member 29FILE_ATTRIBUTE_ARCHIVE = 32 30FILE_ATTRIBUTE_COMPRESSED = 2048 31FILE_ATTRIBUTE_DEVICE = 64 32FILE_ATTRIBUTE_DIRECTORY = 16 33FILE_ATTRIBUTE_ENCRYPTED = 16384 34FILE_ATTRIBUTE_HIDDEN = 2 35FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768 36FILE_ATTRIBUTE_NORMAL = 128 37FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192 38FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072 39FILE_ATTRIBUTE_OFFLINE = 4096 40FILE_ATTRIBUTE_READONLY = 1 41FILE_ATTRIBUTE_REPARSE_POINT = 1024 42FILE_ATTRIBUTE_SPARSE_FILE = 512 43FILE_ATTRIBUTE_SYSTEM = 4 44FILE_ATTRIBUTE_TEMPORARY = 256 45FILE_ATTRIBUTE_VIRTUAL = 65536 46 47IS_PY3 = sys.version_info >= (3, 0) 48 49if not IS_PY3: 50 str = unicode 51 52_scandir = None 53 54 55class GenericDirEntry(object): 56 __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path') 57 58 def __init__(self, scandir_path, name): 59 self._scandir_path = scandir_path 60 self.name = name 61 self._stat = None 62 self._lstat = None 63 self._path = None 64 65 @property 66 def path(self): 67 if self._path is None: 68 self._path = join(self._scandir_path, self.name) 69 return self._path 70 71 def stat(self, follow_symlinks=True): 72 if follow_symlinks: 73 if self._stat is None: 74 self._stat = stat(self.path) 75 return self._stat 76 else: 77 if self._lstat is None: 78 self._lstat = lstat(self.path) 79 return self._lstat 80 81 def is_dir(self, follow_symlinks=True): 82 try: 83 st = self.stat(follow_symlinks=follow_symlinks) 84 except OSError as e: 85 if e.errno != ENOENT: 86 raise 87 return False # Path doesn't exist or is a broken symlink 88 return st.st_mode & 0o170000 == S_IFDIR 89 90 def is_file(self, follow_symlinks=True): 91 try: 92 st = self.stat(follow_symlinks=follow_symlinks) 93 except OSError as e: 94 if e.errno != ENOENT: 95 raise 96 return False # Path doesn't exist or is a broken symlink 97 return st.st_mode & 0o170000 == S_IFREG 98 99 def is_symlink(self): 100 try: 101 st = self.stat(follow_symlinks=False) 102 except OSError as e: 103 if e.errno != ENOENT: 104 raise 105 return False # Path doesn't exist or is a broken symlink 106 return st.st_mode & 0o170000 == S_IFLNK 107 108 def __str__(self): 109 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 110 111 __repr__ = __str__ 112 113 114def scandir_generic(path=u'.'): 115 """Like os.listdir(), but yield DirEntry objects instead of returning 116 a list of names. 117 """ 118 for name in listdir(path): 119 yield GenericDirEntry(path, name) 120 121 122if sys.platform == 'win32': 123 from ctypes import wintypes 124 125 # Various constants from windows.h 126 INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value 127 ERROR_FILE_NOT_FOUND = 2 128 ERROR_NO_MORE_FILES = 18 129 IO_REPARSE_TAG_SYMLINK = 0xA000000C 130 131 # Numer of seconds between 1601-01-01 and 1970-01-01 132 SECONDS_BETWEEN_EPOCHS = 11644473600 133 134 kernel32 = ctypes.windll.kernel32 135 136 # ctypes wrappers for (wide string versions of) FindFirstFile, 137 # FindNextFile, and FindClose 138 FindFirstFile = kernel32.FindFirstFileW 139 FindFirstFile.argtypes = [ 140 wintypes.LPCWSTR, 141 ctypes.POINTER(wintypes.WIN32_FIND_DATAW), 142 ] 143 FindFirstFile.restype = wintypes.HANDLE 144 145 FindNextFile = kernel32.FindNextFileW 146 FindNextFile.argtypes = [ 147 wintypes.HANDLE, 148 ctypes.POINTER(wintypes.WIN32_FIND_DATAW), 149 ] 150 FindNextFile.restype = wintypes.BOOL 151 152 FindClose = kernel32.FindClose 153 FindClose.argtypes = [wintypes.HANDLE] 154 FindClose.restype = wintypes.BOOL 155 156 Win32StatResult = collections.namedtuple('Win32StatResult', [ 157 'st_mode', 158 'st_ino', 159 'st_dev', 160 'st_nlink', 161 'st_uid', 162 'st_gid', 163 'st_size', 164 'st_atime', 165 'st_mtime', 166 'st_ctime', 167 'st_atime_ns', 168 'st_mtime_ns', 169 'st_ctime_ns', 170 'st_file_attributes', 171 ]) 172 173 def filetime_to_time(filetime): 174 """Convert Win32 FILETIME to time since Unix epoch in seconds.""" 175 total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime 176 return total / 10000000 - SECONDS_BETWEEN_EPOCHS 177 178 def find_data_to_stat(data): 179 """Convert Win32 FIND_DATA struct to stat_result.""" 180 # First convert Win32 dwFileAttributes to st_mode 181 attributes = data.dwFileAttributes 182 st_mode = 0 183 if attributes & FILE_ATTRIBUTE_DIRECTORY: 184 st_mode |= S_IFDIR | 0o111 185 else: 186 st_mode |= S_IFREG 187 if attributes & FILE_ATTRIBUTE_READONLY: 188 st_mode |= 0o444 189 else: 190 st_mode |= 0o666 191 if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and 192 data.dwReserved0 == IO_REPARSE_TAG_SYMLINK): 193 st_mode ^= st_mode & 0o170000 194 st_mode |= S_IFLNK 195 196 st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow 197 st_atime = filetime_to_time(data.ftLastAccessTime) 198 st_mtime = filetime_to_time(data.ftLastWriteTime) 199 st_ctime = filetime_to_time(data.ftCreationTime) 200 201 # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev, 202 # st_nlink, st_uid, st_gid 203 return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size, 204 st_atime, st_mtime, st_ctime, 205 int(st_atime * 1000000000), 206 int(st_mtime * 1000000000), 207 int(st_ctime * 1000000000), 208 attributes) 209 210 class Win32DirEntryPython(object): 211 __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path') 212 213 def __init__(self, scandir_path, name, find_data): 214 self._scandir_path = scandir_path 215 self.name = name 216 self._stat = None 217 self._lstat = None 218 self._find_data = find_data 219 self._path = None 220 221 @property 222 def path(self): 223 if self._path is None: 224 self._path = join(self._scandir_path, self.name) 225 return self._path 226 227 def stat(self, follow_symlinks=True): 228 if follow_symlinks: 229 if self._stat is None: 230 if self.is_symlink(): 231 # It's a symlink, call link-following stat() 232 self._stat = stat(self.path) 233 else: 234 # Not a symlink, stat is same as lstat value 235 if self._lstat is None: 236 self._lstat = find_data_to_stat(self._find_data) 237 self._stat = self._lstat 238 return self._stat 239 else: 240 if self._lstat is None: 241 # Lazily convert to stat object, because it's slow 242 # in Python, and often we only need is_dir() etc 243 self._lstat = find_data_to_stat(self._find_data) 244 return self._lstat 245 246 def is_dir(self, follow_symlinks=True): 247 is_symlink = self.is_symlink() 248 if follow_symlinks and is_symlink: 249 try: 250 return self.stat().st_mode & 0o170000 == S_IFDIR 251 except OSError as e: 252 if e.errno != ENOENT: 253 raise 254 return False 255 elif is_symlink: 256 return False 257 else: 258 return (self._find_data.dwFileAttributes & 259 FILE_ATTRIBUTE_DIRECTORY != 0) 260 261 def is_file(self, follow_symlinks=True): 262 is_symlink = self.is_symlink() 263 if follow_symlinks and is_symlink: 264 try: 265 return self.stat().st_mode & 0o170000 == S_IFREG 266 except OSError as e: 267 if e.errno != ENOENT: 268 raise 269 return False 270 elif is_symlink: 271 return False 272 else: 273 return (self._find_data.dwFileAttributes & 274 FILE_ATTRIBUTE_DIRECTORY == 0) 275 276 def is_symlink(self): 277 return (self._find_data.dwFileAttributes & 278 FILE_ATTRIBUTE_REPARSE_POINT != 0 and 279 self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK) 280 281 def __str__(self): 282 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 283 284 __repr__ = __str__ 285 286 def win_error(error, filename): 287 exc = WindowsError(error, ctypes.FormatError(error)) 288 exc.filename = filename 289 return exc 290 291 def scandir_python(path=u'.'): 292 """Like os.listdir(), but yield DirEntry objects instead of returning 293 a list of names. 294 """ 295 # Call FindFirstFile and handle errors 296 if isinstance(path, bytes): 297 is_bytes = True 298 filename = join(path.decode('mbcs', 'strict'), '*.*') 299 else: 300 is_bytes = False 301 filename = join(path, '*.*') 302 data = wintypes.WIN32_FIND_DATAW() 303 data_p = ctypes.byref(data) 304 handle = FindFirstFile(filename, data_p) 305 if handle == INVALID_HANDLE_VALUE: 306 error = ctypes.GetLastError() 307 if error == ERROR_FILE_NOT_FOUND: 308 # No files, don't yield anything 309 return 310 raise win_error(error, path) 311 312 # Call FindNextFile in a loop, stopping when no more files 313 try: 314 while True: 315 # Skip '.' and '..' (current and parent directory), but 316 # otherwise yield (filename, stat_result) tuple 317 name = data.cFileName 318 if name not in ('.', '..'): 319 if is_bytes: 320 name = name.encode('mbcs', 'replace') 321 yield Win32DirEntryPython(path, name, data) 322 323 data = wintypes.WIN32_FIND_DATAW() 324 data_p = ctypes.byref(data) 325 success = FindNextFile(handle, data_p) 326 if not success: 327 error = ctypes.GetLastError() 328 if error == ERROR_NO_MORE_FILES: 329 break 330 raise win_error(error, path) 331 finally: 332 if not FindClose(handle): 333 raise win_error(ctypes.GetLastError(), path) 334 335 try: 336 import _scandir 337 338 scandir_helper = _scandir.scandir_helper 339 340 class Win32DirEntryC(object): 341 __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path') 342 343 def __init__(self, scandir_path, name, lstat): 344 self._scandir_path = scandir_path 345 self.name = name 346 self._stat = None 347 self._lstat = lstat 348 self._path = None 349 350 @property 351 def path(self): 352 if self._path is None: 353 self._path = join(self._scandir_path, self.name) 354 return self._path 355 356 def stat(self, follow_symlinks=True): 357 if follow_symlinks: 358 if self._stat is None: 359 if self.is_symlink(): 360 self._stat = stat(self.path) 361 else: 362 self._stat = self._lstat 363 return self._stat 364 else: 365 return self._lstat 366 367 def is_dir(self, follow_symlinks=True): 368 if follow_symlinks and self.is_symlink(): 369 try: 370 st = self.stat() 371 except OSError as e: 372 if e.errno != ENOENT: 373 raise 374 return False 375 else: 376 st = self._lstat 377 return st.st_mode & 0o170000 == S_IFDIR 378 379 def is_file(self, follow_symlinks=True): 380 if follow_symlinks and self.is_symlink(): 381 try: 382 st = self.stat() 383 except OSError as e: 384 if e.errno != ENOENT: 385 raise 386 return False 387 else: 388 st = self._lstat 389 return st.st_mode & 0o170000 == S_IFREG 390 391 def is_symlink(self): 392 return self._lstat.st_mode & 0o170000 == S_IFLNK 393 394 def __str__(self): 395 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 396 397 __repr__ = __str__ 398 399 def scandir_c(path=u'.'): 400 if isinstance(path, bytes): 401 for name, stat in scandir_helper(path.decode('mbcs', 'replace')): 402 name = name.encode('mbcs', 'replace') 403 yield Win32DirEntryC(path, name, stat) 404 else: 405 for name, stat in scandir_helper(path): 406 yield Win32DirEntryC(path, name, stat) 407 408 scandir = scandir_c 409 410 except ImportError: 411 scandir = scandir_python 412 413 414# Linux, OS X, and BSD implementation 415elif sys.platform.startswith(('linux', 'darwin')) or 'bsd' in sys.platform: 416 import ctypes.util 417 418 DIR_p = ctypes.c_void_p 419 420 # Rather annoying how the dirent struct is slightly different on each 421 # platform. The only fields we care about are d_name and d_type. 422 class Dirent(ctypes.Structure): 423 if sys.platform.startswith('linux'): 424 _fields_ = ( 425 ('d_ino', ctypes.c_ulong), 426 ('d_off', ctypes.c_long), 427 ('d_reclen', ctypes.c_ushort), 428 ('d_type', ctypes.c_byte), 429 ('d_name', ctypes.c_char * 256), 430 ) 431 elif sys.platform.startswith('freebsd') and int(sys.platform[7:]) > 11: 432 _fields_ = ( 433 ('d_ino', ctypes.c_uint64), 434 ('d_off', ctypes.c_uint64), 435 ('d_reclen', ctypes.c_ushort), 436 ('d_type', ctypes.c_byte), 437 ('d_pad0', ctypes.c_byte), 438 ('d_namlen', ctypes.c_ushort), 439 ('d_pad1', ctypes.c_ushort), 440 ('d_name', ctypes.c_char * 256), 441 ) 442 else: 443 _fields_ = ( 444 ('d_ino', ctypes.c_uint32), # must be uint32, not ulong 445 ('d_reclen', ctypes.c_ushort), 446 ('d_type', ctypes.c_byte), 447 ('d_namlen', ctypes.c_byte), 448 ('d_name', ctypes.c_char * 256), 449 ) 450 451 DT_UNKNOWN = 0 452 DT_DIR = 4 453 DT_REG = 8 454 DT_LNK = 10 455 456 Dirent_p = ctypes.POINTER(Dirent) 457 Dirent_pp = ctypes.POINTER(Dirent_p) 458 459 libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) 460 opendir = libc.opendir 461 opendir.argtypes = [ctypes.c_char_p] 462 opendir.restype = DIR_p 463 464 readdir_r = libc.readdir_r 465 readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp] 466 readdir_r.restype = ctypes.c_int 467 468 closedir = libc.closedir 469 closedir.argtypes = [DIR_p] 470 closedir.restype = ctypes.c_int 471 472 file_system_encoding = sys.getfilesystemencoding() 473 474 class PosixDirEntry(object): 475 __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path') 476 477 def __init__(self, scandir_path, name, d_type): 478 self._scandir_path = scandir_path 479 self.name = name 480 self._d_type = d_type 481 self._stat = None 482 self._lstat = None 483 self._path = None 484 485 @property 486 def path(self): 487 if self._path is None: 488 self._path = join(self._scandir_path, self.name) 489 return self._path 490 491 def stat(self, follow_symlinks=True): 492 if follow_symlinks: 493 if self._stat is None: 494 if self.is_symlink(): 495 self._stat = stat(self.path) 496 else: 497 if self._lstat is None: 498 self._lstat = lstat(self.path) 499 self._stat = self._lstat 500 return self._stat 501 else: 502 if self._lstat is None: 503 self._lstat = lstat(self.path) 504 return self._lstat 505 506 def is_dir(self, follow_symlinks=True): 507 if (self._d_type == DT_UNKNOWN or 508 (follow_symlinks and self.is_symlink())): 509 try: 510 st = self.stat(follow_symlinks=follow_symlinks) 511 except OSError as e: 512 if e.errno != ENOENT: 513 raise 514 return False 515 return st.st_mode & 0o170000 == S_IFDIR 516 else: 517 return self._d_type == DT_DIR 518 519 def is_file(self, follow_symlinks=True): 520 if (self._d_type == DT_UNKNOWN or 521 (follow_symlinks and self.is_symlink())): 522 try: 523 st = self.stat(follow_symlinks=follow_symlinks) 524 except OSError as e: 525 if e.errno != ENOENT: 526 raise 527 return False 528 return st.st_mode & 0o170000 == S_IFREG 529 else: 530 return self._d_type == DT_REG 531 532 def is_symlink(self): 533 if self._d_type == DT_UNKNOWN: 534 try: 535 st = self.stat(follow_symlinks=False) 536 except OSError as e: 537 if e.errno != ENOENT: 538 raise 539 return False 540 return st.st_mode & 0o170000 == S_IFLNK 541 else: 542 return self._d_type == DT_LNK 543 544 def __str__(self): 545 return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name) 546 547 __repr__ = __str__ 548 549 def posix_error(filename): 550 errno = ctypes.get_errno() 551 exc = OSError(errno, strerror(errno)) 552 exc.filename = filename 553 return exc 554 555 def scandir_python(path=u'.'): 556 """Like os.listdir(), but yield DirEntry objects instead of returning 557 a list of names. 558 """ 559 if isinstance(path, bytes): 560 opendir_path = path 561 is_bytes = True 562 else: 563 opendir_path = path.encode(file_system_encoding) 564 is_bytes = False 565 dir_p = opendir(opendir_path) 566 if not dir_p: 567 raise posix_error(path) 568 try: 569 result = Dirent_p() 570 while True: 571 entry = Dirent() 572 if readdir_r(dir_p, entry, result): 573 raise posix_error(path) 574 if not result: 575 break 576 name = entry.d_name 577 if name not in (b'.', b'..'): 578 if not is_bytes: 579 name = name.decode(file_system_encoding) 580 yield PosixDirEntry(path, name, entry.d_type) 581 finally: 582 if closedir(dir_p): 583 raise posix_error(path) 584 585 try: 586 import _scandir 587 588 scandir_helper = _scandir.scandir_helper 589 590 def scandir_c(path=u'.'): 591 is_bytes = isinstance(path, bytes) 592 for name, d_type in scandir_helper(path): 593 if not is_bytes: 594 name = name.decode(file_system_encoding) 595 yield PosixDirEntry(path, name, d_type) 596 597 scandir = scandir_c 598 599 except ImportError: 600 scandir = scandir_python 601 602 603# Some other system -- no d_type or stat information 604else: 605 scandir = scandir_generic 606 607 608def walk(top, topdown=True, onerror=None, followlinks=False): 609 """Like os.walk(), but faster, as it uses scandir() internally.""" 610 # Determine which are files and which are directories 611 dirs = [] 612 nondirs = [] 613 symlinks = set() 614 try: 615 for entry in scandir(top): 616 try: 617 if entry.is_dir(): 618 dirs.append(entry.name) 619 else: 620 nondirs.append(entry.name) 621 except OSError: 622 # Need this to emulate os.walk(), which uses 623 # os.path.isdir(), and that returns False (nondir) on 624 # any OSError; same with entry.is_symlink() below 625 nondirs.append(entry.name) 626 try: 627 if entry.is_symlink(): 628 symlinks.add(entry.name) 629 except OSError: 630 pass 631 except OSError as error: 632 if onerror is not None: 633 onerror(error) 634 return 635 636 # Yield before recursion if going top down 637 if topdown: 638 yield top, dirs, nondirs 639 640 # Recurse into sub-directories, following symbolic links if "followlinks" 641 for name in dirs: 642 if followlinks or name not in symlinks: 643 new_path = join(top, name) 644 for x in walk(new_path, topdown, onerror, followlinks): 645 yield x 646 647 # Yield after recursion if going bottom up 648 if not topdown: 649 yield top, dirs, nondirs 650