1"""scandir, a better directory iterator and faster os.walk(), now in the Python 3.5 stdlib
2
3scandir() is a generator version of os.listdir() that returns an
4iterator over files in a directory, and also exposes the extra
5information most OSes provide while iterating files in a directory
6(such as type and stat information).
7
8This module also includes a version of os.walk() that uses scandir()
9to speed it up significantly.
10
11See README.md or https://github.com/benhoyt/scandir for rationale and
12docs, or read PEP 471 (https://www.python.org/dev/peps/pep-0471/) for
13more details on its inclusion into Python 3.5
14
15scandir is released under the new BSD 3-clause license. See
16LICENSE.txt for the full license text.
17"""
18
19from __future__ import division
20
21from errno import ENOENT
22from os import listdir, lstat, stat, strerror
23from os.path import join, islink
24from stat import S_IFDIR, S_IFLNK, S_IFREG
25import collections
26import sys
27
28try:
29    import _scandir
30except ImportError:
31    _scandir = None
32
33try:
34    import ctypes
35except ImportError:
36    ctypes = None
37
38if _scandir is None and ctypes is None:
39    import warnings
40    warnings.warn("scandir can't find the compiled _scandir C module "
41                  "or ctypes, using slow generic fallback")
42
43__version__ = '1.10.0'
44__all__ = ['scandir', 'walk']
45
46# Windows FILE_ATTRIBUTE constants for interpreting the
47# FIND_DATA.dwFileAttributes member
48FILE_ATTRIBUTE_ARCHIVE = 32
49FILE_ATTRIBUTE_COMPRESSED = 2048
50FILE_ATTRIBUTE_DEVICE = 64
51FILE_ATTRIBUTE_DIRECTORY = 16
52FILE_ATTRIBUTE_ENCRYPTED = 16384
53FILE_ATTRIBUTE_HIDDEN = 2
54FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768
55FILE_ATTRIBUTE_NORMAL = 128
56FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192
57FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072
58FILE_ATTRIBUTE_OFFLINE = 4096
59FILE_ATTRIBUTE_READONLY = 1
60FILE_ATTRIBUTE_REPARSE_POINT = 1024
61FILE_ATTRIBUTE_SPARSE_FILE = 512
62FILE_ATTRIBUTE_SYSTEM = 4
63FILE_ATTRIBUTE_TEMPORARY = 256
64FILE_ATTRIBUTE_VIRTUAL = 65536
65
66IS_PY3 = sys.version_info >= (3, 0)
67
68if IS_PY3:
69    unicode = str  # Because Python <= 3.2 doesn't have u'unicode' syntax
70
71
72class GenericDirEntry(object):
73    __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path')
74
75    def __init__(self, scandir_path, name):
76        self._scandir_path = scandir_path
77        self.name = name
78        self._stat = None
79        self._lstat = None
80        self._path = None
81
82    @property
83    def path(self):
84        if self._path is None:
85            self._path = join(self._scandir_path, self.name)
86        return self._path
87
88    def stat(self, follow_symlinks=True):
89        if follow_symlinks:
90            if self._stat is None:
91                self._stat = stat(self.path)
92            return self._stat
93        else:
94            if self._lstat is None:
95                self._lstat = lstat(self.path)
96            return self._lstat
97
98    # The code duplication below is intentional: this is for slightly
99    # better performance on systems that fall back to GenericDirEntry.
100    # It avoids an additional attribute lookup and method call, which
101    # are relatively slow on CPython.
102    def is_dir(self, follow_symlinks=True):
103        try:
104            st = self.stat(follow_symlinks=follow_symlinks)
105        except OSError as e:
106            if e.errno != ENOENT:
107                raise
108            return False  # Path doesn't exist or is a broken symlink
109        return st.st_mode & 0o170000 == S_IFDIR
110
111    def is_file(self, follow_symlinks=True):
112        try:
113            st = self.stat(follow_symlinks=follow_symlinks)
114        except OSError as e:
115            if e.errno != ENOENT:
116                raise
117            return False  # Path doesn't exist or is a broken symlink
118        return st.st_mode & 0o170000 == S_IFREG
119
120    def is_symlink(self):
121        try:
122            st = self.stat(follow_symlinks=False)
123        except OSError as e:
124            if e.errno != ENOENT:
125                raise
126            return False  # Path doesn't exist or is a broken symlink
127        return st.st_mode & 0o170000 == S_IFLNK
128
129    def inode(self):
130        st = self.stat(follow_symlinks=False)
131        return st.st_ino
132
133    def __str__(self):
134        return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
135
136    __repr__ = __str__
137
138
139def _scandir_generic(path=unicode('.')):
140    """Like os.listdir(), but yield DirEntry objects instead of returning
141    a list of names.
142    """
143    for name in listdir(path):
144        yield GenericDirEntry(path, name)
145
146
147if IS_PY3 and sys.platform == 'win32':
148    def scandir_generic(path=unicode('.')):
149        if isinstance(path, bytes):
150            raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead")
151        return _scandir_generic(path)
152    scandir_generic.__doc__ = _scandir_generic.__doc__
153else:
154    scandir_generic = _scandir_generic
155
156
157scandir_c = None
158scandir_python = None
159
160
161if sys.platform == 'win32':
162    if ctypes is not None:
163        from ctypes import wintypes
164
165        # Various constants from windows.h
166        INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value
167        ERROR_FILE_NOT_FOUND = 2
168        ERROR_NO_MORE_FILES = 18
169        IO_REPARSE_TAG_SYMLINK = 0xA000000C
170
171        # Numer of seconds between 1601-01-01 and 1970-01-01
172        SECONDS_BETWEEN_EPOCHS = 11644473600
173
174        kernel32 = ctypes.windll.kernel32
175
176        # ctypes wrappers for (wide string versions of) FindFirstFile,
177        # FindNextFile, and FindClose
178        FindFirstFile = kernel32.FindFirstFileW
179        FindFirstFile.argtypes = [
180            wintypes.LPCWSTR,
181            ctypes.POINTER(wintypes.WIN32_FIND_DATAW),
182        ]
183        FindFirstFile.restype = wintypes.HANDLE
184
185        FindNextFile = kernel32.FindNextFileW
186        FindNextFile.argtypes = [
187            wintypes.HANDLE,
188            ctypes.POINTER(wintypes.WIN32_FIND_DATAW),
189        ]
190        FindNextFile.restype = wintypes.BOOL
191
192        FindClose = kernel32.FindClose
193        FindClose.argtypes = [wintypes.HANDLE]
194        FindClose.restype = wintypes.BOOL
195
196        Win32StatResult = collections.namedtuple('Win32StatResult', [
197            'st_mode',
198            'st_ino',
199            'st_dev',
200            'st_nlink',
201            'st_uid',
202            'st_gid',
203            'st_size',
204            'st_atime',
205            'st_mtime',
206            'st_ctime',
207            'st_atime_ns',
208            'st_mtime_ns',
209            'st_ctime_ns',
210            'st_file_attributes',
211        ])
212
213        def filetime_to_time(filetime):
214            """Convert Win32 FILETIME to time since Unix epoch in seconds."""
215            total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime
216            return total / 10000000 - SECONDS_BETWEEN_EPOCHS
217
218        def find_data_to_stat(data):
219            """Convert Win32 FIND_DATA struct to stat_result."""
220            # First convert Win32 dwFileAttributes to st_mode
221            attributes = data.dwFileAttributes
222            st_mode = 0
223            if attributes & FILE_ATTRIBUTE_DIRECTORY:
224                st_mode |= S_IFDIR | 0o111
225            else:
226                st_mode |= S_IFREG
227            if attributes & FILE_ATTRIBUTE_READONLY:
228                st_mode |= 0o444
229            else:
230                st_mode |= 0o666
231            if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and
232                    data.dwReserved0 == IO_REPARSE_TAG_SYMLINK):
233                st_mode ^= st_mode & 0o170000
234                st_mode |= S_IFLNK
235
236            st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow
237            st_atime = filetime_to_time(data.ftLastAccessTime)
238            st_mtime = filetime_to_time(data.ftLastWriteTime)
239            st_ctime = filetime_to_time(data.ftCreationTime)
240
241            # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev,
242            # st_nlink, st_uid, st_gid
243            return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size,
244                                   st_atime, st_mtime, st_ctime,
245                                   int(st_atime * 1000000000),
246                                   int(st_mtime * 1000000000),
247                                   int(st_ctime * 1000000000),
248                                   attributes)
249
250        class Win32DirEntryPython(object):
251            __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path', '_inode')
252
253            def __init__(self, scandir_path, name, find_data):
254                self._scandir_path = scandir_path
255                self.name = name
256                self._stat = None
257                self._lstat = None
258                self._find_data = find_data
259                self._path = None
260                self._inode = None
261
262            @property
263            def path(self):
264                if self._path is None:
265                    self._path = join(self._scandir_path, self.name)
266                return self._path
267
268            def stat(self, follow_symlinks=True):
269                if follow_symlinks:
270                    if self._stat is None:
271                        if self.is_symlink():
272                            # It's a symlink, call link-following stat()
273                            self._stat = stat(self.path)
274                        else:
275                            # Not a symlink, stat is same as lstat value
276                            if self._lstat is None:
277                                self._lstat = find_data_to_stat(self._find_data)
278                            self._stat = self._lstat
279                    return self._stat
280                else:
281                    if self._lstat is None:
282                        # Lazily convert to stat object, because it's slow
283                        # in Python, and often we only need is_dir() etc
284                        self._lstat = find_data_to_stat(self._find_data)
285                    return self._lstat
286
287            def is_dir(self, follow_symlinks=True):
288                is_symlink = self.is_symlink()
289                if follow_symlinks and is_symlink:
290                    try:
291                        return self.stat().st_mode & 0o170000 == S_IFDIR
292                    except OSError as e:
293                        if e.errno != ENOENT:
294                            raise
295                        return False
296                elif is_symlink:
297                    return False
298                else:
299                    return (self._find_data.dwFileAttributes &
300                            FILE_ATTRIBUTE_DIRECTORY != 0)
301
302            def is_file(self, follow_symlinks=True):
303                is_symlink = self.is_symlink()
304                if follow_symlinks and is_symlink:
305                    try:
306                        return self.stat().st_mode & 0o170000 == S_IFREG
307                    except OSError as e:
308                        if e.errno != ENOENT:
309                            raise
310                        return False
311                elif is_symlink:
312                    return False
313                else:
314                    return (self._find_data.dwFileAttributes &
315                            FILE_ATTRIBUTE_DIRECTORY == 0)
316
317            def is_symlink(self):
318                return (self._find_data.dwFileAttributes &
319                            FILE_ATTRIBUTE_REPARSE_POINT != 0 and
320                        self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK)
321
322            def inode(self):
323                if self._inode is None:
324                    self._inode = lstat(self.path).st_ino
325                return self._inode
326
327            def __str__(self):
328                return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
329
330            __repr__ = __str__
331
332        def win_error(error, filename):
333            exc = WindowsError(error, ctypes.FormatError(error))
334            exc.filename = filename
335            return exc
336
337        def _scandir_python(path=unicode('.')):
338            """Like os.listdir(), but yield DirEntry objects instead of returning
339            a list of names.
340            """
341            # Call FindFirstFile and handle errors
342            if isinstance(path, bytes):
343                is_bytes = True
344                filename = join(path.decode('mbcs', 'strict'), '*.*')
345            else:
346                is_bytes = False
347                filename = join(path, '*.*')
348            data = wintypes.WIN32_FIND_DATAW()
349            data_p = ctypes.byref(data)
350            handle = FindFirstFile(filename, data_p)
351            if handle == INVALID_HANDLE_VALUE:
352                error = ctypes.GetLastError()
353                if error == ERROR_FILE_NOT_FOUND:
354                    # No files, don't yield anything
355                    return
356                raise win_error(error, path)
357
358            # Call FindNextFile in a loop, stopping when no more files
359            try:
360                while True:
361                    # Skip '.' and '..' (current and parent directory), but
362                    # otherwise yield (filename, stat_result) tuple
363                    name = data.cFileName
364                    if name not in ('.', '..'):
365                        if is_bytes:
366                            name = name.encode('mbcs', 'replace')
367                        yield Win32DirEntryPython(path, name, data)
368
369                    data = wintypes.WIN32_FIND_DATAW()
370                    data_p = ctypes.byref(data)
371                    success = FindNextFile(handle, data_p)
372                    if not success:
373                        error = ctypes.GetLastError()
374                        if error == ERROR_NO_MORE_FILES:
375                            break
376                        raise win_error(error, path)
377            finally:
378                if not FindClose(handle):
379                    raise win_error(ctypes.GetLastError(), path)
380
381        if IS_PY3:
382            def scandir_python(path=unicode('.')):
383                if isinstance(path, bytes):
384                    raise TypeError("os.scandir() doesn't support bytes path on Windows, use Unicode instead")
385                return _scandir_python(path)
386            scandir_python.__doc__ = _scandir_python.__doc__
387        else:
388            scandir_python = _scandir_python
389
390    if _scandir is not None:
391        scandir_c = _scandir.scandir
392        DirEntry_c = _scandir.DirEntry
393
394    if _scandir is not None:
395        scandir = scandir_c
396        DirEntry = DirEntry_c
397    elif ctypes is not None:
398        scandir = scandir_python
399        DirEntry = Win32DirEntryPython
400    else:
401        scandir = scandir_generic
402        DirEntry = GenericDirEntry
403
404
405# Linux, OS X, and BSD implementation
406elif sys.platform.startswith(('linux', 'darwin', 'sunos5')) or 'bsd' in sys.platform:
407    have_dirent_d_type = (sys.platform != 'sunos5')
408
409    if ctypes is not None and have_dirent_d_type:
410        import ctypes.util
411
412        DIR_p = ctypes.c_void_p
413
414        # Rather annoying how the dirent struct is slightly different on each
415        # platform. The only fields we care about are d_name and d_type.
416        class Dirent(ctypes.Structure):
417            if sys.platform.startswith('linux'):
418                _fields_ = (
419                    ('d_ino', ctypes.c_ulong),
420                    ('d_off', ctypes.c_long),
421                    ('d_reclen', ctypes.c_ushort),
422                    ('d_type', ctypes.c_byte),
423                    ('d_name', ctypes.c_char * 256),
424                )
425            elif 'openbsd' in sys.platform:
426                _fields_ = (
427                    ('d_ino', ctypes.c_uint64),
428                    ('d_off', ctypes.c_uint64),
429                    ('d_reclen', ctypes.c_uint16),
430                    ('d_type', ctypes.c_uint8),
431                    ('d_namlen', ctypes.c_uint8),
432                    ('__d_padding', ctypes.c_uint8 * 4),
433                    ('d_name', ctypes.c_char * 256),
434                )
435            else:
436                _fields_ = (
437                    ('d_ino', ctypes.c_uint32),  # must be uint32, not ulong
438                    ('d_reclen', ctypes.c_ushort),
439                    ('d_type', ctypes.c_byte),
440                    ('d_namlen', ctypes.c_byte),
441                    ('d_name', ctypes.c_char * 256),
442                )
443
444        DT_UNKNOWN = 0
445        DT_DIR = 4
446        DT_REG = 8
447        DT_LNK = 10
448
449        Dirent_p = ctypes.POINTER(Dirent)
450        Dirent_pp = ctypes.POINTER(Dirent_p)
451
452        libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
453        opendir = libc.opendir
454        opendir.argtypes = [ctypes.c_char_p]
455        opendir.restype = DIR_p
456
457        readdir_r = libc.readdir_r
458        readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp]
459        readdir_r.restype = ctypes.c_int
460
461        closedir = libc.closedir
462        closedir.argtypes = [DIR_p]
463        closedir.restype = ctypes.c_int
464
465        file_system_encoding = sys.getfilesystemencoding()
466
467        class PosixDirEntry(object):
468            __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path', '_inode')
469
470            def __init__(self, scandir_path, name, d_type, inode):
471                self._scandir_path = scandir_path
472                self.name = name
473                self._d_type = d_type
474                self._inode = inode
475                self._stat = None
476                self._lstat = None
477                self._path = None
478
479            @property
480            def path(self):
481                if self._path is None:
482                    self._path = join(self._scandir_path, self.name)
483                return self._path
484
485            def stat(self, follow_symlinks=True):
486                if follow_symlinks:
487                    if self._stat is None:
488                        if self.is_symlink():
489                            self._stat = stat(self.path)
490                        else:
491                            if self._lstat is None:
492                                self._lstat = lstat(self.path)
493                            self._stat = self._lstat
494                    return self._stat
495                else:
496                    if self._lstat is None:
497                        self._lstat = lstat(self.path)
498                    return self._lstat
499
500            def is_dir(self, follow_symlinks=True):
501                if (self._d_type == DT_UNKNOWN or
502                        (follow_symlinks and self.is_symlink())):
503                    try:
504                        st = self.stat(follow_symlinks=follow_symlinks)
505                    except OSError as e:
506                        if e.errno != ENOENT:
507                            raise
508                        return False
509                    return st.st_mode & 0o170000 == S_IFDIR
510                else:
511                    return self._d_type == DT_DIR
512
513            def is_file(self, follow_symlinks=True):
514                if (self._d_type == DT_UNKNOWN or
515                        (follow_symlinks and self.is_symlink())):
516                    try:
517                        st = self.stat(follow_symlinks=follow_symlinks)
518                    except OSError as e:
519                        if e.errno != ENOENT:
520                            raise
521                        return False
522                    return st.st_mode & 0o170000 == S_IFREG
523                else:
524                    return self._d_type == DT_REG
525
526            def is_symlink(self):
527                if self._d_type == DT_UNKNOWN:
528                    try:
529                        st = self.stat(follow_symlinks=False)
530                    except OSError as e:
531                        if e.errno != ENOENT:
532                            raise
533                        return False
534                    return st.st_mode & 0o170000 == S_IFLNK
535                else:
536                    return self._d_type == DT_LNK
537
538            def inode(self):
539                return self._inode
540
541            def __str__(self):
542                return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
543
544            __repr__ = __str__
545
546        def posix_error(filename):
547            errno = ctypes.get_errno()
548            exc = OSError(errno, strerror(errno))
549            exc.filename = filename
550            return exc
551
552        def scandir_python(path=unicode('.')):
553            """Like os.listdir(), but yield DirEntry objects instead of returning
554            a list of names.
555            """
556            if isinstance(path, bytes):
557                opendir_path = path
558                is_bytes = True
559            else:
560                opendir_path = path.encode(file_system_encoding)
561                is_bytes = False
562            dir_p = opendir(opendir_path)
563            if not dir_p:
564                raise posix_error(path)
565            try:
566                result = Dirent_p()
567                while True:
568                    entry = Dirent()
569                    if readdir_r(dir_p, entry, result):
570                        raise posix_error(path)
571                    if not result:
572                        break
573                    name = entry.d_name
574                    if name not in (b'.', b'..'):
575                        if not is_bytes:
576                            name = name.decode(file_system_encoding)
577                        yield PosixDirEntry(path, name, entry.d_type, entry.d_ino)
578            finally:
579                if closedir(dir_p):
580                    raise posix_error(path)
581
582    if _scandir is not None:
583        scandir_c = _scandir.scandir
584        DirEntry_c = _scandir.DirEntry
585
586    if _scandir is not None:
587        scandir = scandir_c
588        DirEntry = DirEntry_c
589    elif ctypes is not None and have_dirent_d_type:
590        scandir = scandir_python
591        DirEntry = PosixDirEntry
592    else:
593        scandir = scandir_generic
594        DirEntry = GenericDirEntry
595
596
597# Some other system -- no d_type or stat information
598else:
599    scandir = scandir_generic
600    DirEntry = GenericDirEntry
601
602
603def _walk(top, topdown=True, onerror=None, followlinks=False):
604    """Like Python 3.5's implementation of os.walk() -- faster than
605    the pre-Python 3.5 version as it uses scandir() internally.
606    """
607    dirs = []
608    nondirs = []
609
610    # We may not have read permission for top, in which case we can't
611    # get a list of the files the directory contains.  os.walk
612    # always suppressed the exception then, rather than blow up for a
613    # minor reason when (say) a thousand readable directories are still
614    # left to visit.  That logic is copied here.
615    try:
616        scandir_it = scandir(top)
617    except OSError as error:
618        if onerror is not None:
619            onerror(error)
620        return
621
622    while True:
623        try:
624            try:
625                entry = next(scandir_it)
626            except StopIteration:
627                break
628        except OSError as error:
629            if onerror is not None:
630                onerror(error)
631            return
632
633        try:
634            is_dir = entry.is_dir()
635        except OSError:
636            # If is_dir() raises an OSError, consider that the entry is not
637            # a directory, same behaviour than os.path.isdir().
638            is_dir = False
639
640        if is_dir:
641            dirs.append(entry.name)
642        else:
643            nondirs.append(entry.name)
644
645        if not topdown and is_dir:
646            # Bottom-up: recurse into sub-directory, but exclude symlinks to
647            # directories if followlinks is False
648            if followlinks:
649                walk_into = True
650            else:
651                try:
652                    is_symlink = entry.is_symlink()
653                except OSError:
654                    # If is_symlink() raises an OSError, consider that the
655                    # entry is not a symbolic link, same behaviour than
656                    # os.path.islink().
657                    is_symlink = False
658                walk_into = not is_symlink
659
660            if walk_into:
661                for entry in walk(entry.path, topdown, onerror, followlinks):
662                    yield entry
663
664    # Yield before recursion if going top down
665    if topdown:
666        yield top, dirs, nondirs
667
668        # Recurse into sub-directories
669        for name in dirs:
670            new_path = join(top, name)
671            # Issue #23605: os.path.islink() is used instead of caching
672            # entry.is_symlink() result during the loop on os.scandir() because
673            # the caller can replace the directory entry during the "yield"
674            # above.
675            if followlinks or not islink(new_path):
676                for entry in walk(new_path, topdown, onerror, followlinks):
677                    yield entry
678    else:
679        # Yield after recursion if going bottom up
680        yield top, dirs, nondirs
681
682
683if IS_PY3 or sys.platform != 'win32':
684    walk = _walk
685else:
686    # Fix for broken unicode handling on Windows on Python 2.x, see:
687    # https://github.com/benhoyt/scandir/issues/54
688    file_system_encoding = sys.getfilesystemencoding()
689
690    def walk(top, topdown=True, onerror=None, followlinks=False):
691        if isinstance(top, bytes):
692            top = top.decode(file_system_encoding)
693        return _walk(top, topdown, onerror, followlinks)
694