1"""scandir, a better directory iterator that exposes all file info OS provides
2
3scandir is a generator version of os.listdir() that returns an iterator over
4files in a directory, and also exposes the extra information most OSes provide
5while iterating files in a directory.
6
7See README.md or https://github.com/benhoyt/scandir for rationale and docs.
8
9scandir is released under the new BSD 3-clause license. See LICENSE.txt for
10the full license text.
11"""
12
13from __future__ import division
14
15from errno import ENOENT
16from os import listdir, lstat, stat, strerror
17from os.path import join
18from stat import S_IFDIR, S_IFLNK, S_IFREG
19import collections
20import ctypes
21import os
22import sys
23
24__version__ = '0.9'
25__all__ = ['scandir', 'walk']
26
27# Windows FILE_ATTRIBUTE constants for interpreting the
28# FIND_DATA.dwFileAttributes member
29FILE_ATTRIBUTE_ARCHIVE = 32
30FILE_ATTRIBUTE_COMPRESSED = 2048
31FILE_ATTRIBUTE_DEVICE = 64
32FILE_ATTRIBUTE_DIRECTORY = 16
33FILE_ATTRIBUTE_ENCRYPTED = 16384
34FILE_ATTRIBUTE_HIDDEN = 2
35FILE_ATTRIBUTE_INTEGRITY_STREAM = 32768
36FILE_ATTRIBUTE_NORMAL = 128
37FILE_ATTRIBUTE_NOT_CONTENT_INDEXED = 8192
38FILE_ATTRIBUTE_NO_SCRUB_DATA = 131072
39FILE_ATTRIBUTE_OFFLINE = 4096
40FILE_ATTRIBUTE_READONLY = 1
41FILE_ATTRIBUTE_REPARSE_POINT = 1024
42FILE_ATTRIBUTE_SPARSE_FILE = 512
43FILE_ATTRIBUTE_SYSTEM = 4
44FILE_ATTRIBUTE_TEMPORARY = 256
45FILE_ATTRIBUTE_VIRTUAL = 65536
46
47IS_PY3 = sys.version_info >= (3, 0)
48
49if not IS_PY3:
50    str = unicode
51
52_scandir = None
53
54
55class GenericDirEntry(object):
56    __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path')
57
58    def __init__(self, scandir_path, name):
59        self._scandir_path = scandir_path
60        self.name = name
61        self._stat = None
62        self._lstat = None
63        self._path = None
64
65    @property
66    def path(self):
67        if self._path is None:
68            self._path = join(self._scandir_path, self.name)
69        return self._path
70
71    def stat(self, follow_symlinks=True):
72        if follow_symlinks:
73            if self._stat is None:
74                self._stat = stat(self.path)
75            return self._stat
76        else:
77            if self._lstat is None:
78                self._lstat = lstat(self.path)
79            return self._lstat
80
81    def is_dir(self, follow_symlinks=True):
82        try:
83            st = self.stat(follow_symlinks=follow_symlinks)
84        except OSError as e:
85            if e.errno != ENOENT:
86                raise
87            return False  # Path doesn't exist or is a broken symlink
88        return st.st_mode & 0o170000 == S_IFDIR
89
90    def is_file(self, follow_symlinks=True):
91        try:
92            st = self.stat(follow_symlinks=follow_symlinks)
93        except OSError as e:
94            if e.errno != ENOENT:
95                raise
96            return False  # Path doesn't exist or is a broken symlink
97        return st.st_mode & 0o170000 == S_IFREG
98
99    def is_symlink(self):
100        try:
101            st = self.stat(follow_symlinks=False)
102        except OSError as e:
103            if e.errno != ENOENT:
104                raise
105            return False  # Path doesn't exist or is a broken symlink
106        return st.st_mode & 0o170000 == S_IFLNK
107
108    def __str__(self):
109        return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
110
111    __repr__ = __str__
112
113
114def scandir_generic(path=u'.'):
115    """Like os.listdir(), but yield DirEntry objects instead of returning
116    a list of names.
117    """
118    for name in listdir(path):
119        yield GenericDirEntry(path, name)
120
121
122if sys.platform == 'win32':
123    from ctypes import wintypes
124
125    # Various constants from windows.h
126    INVALID_HANDLE_VALUE = ctypes.c_void_p(-1).value
127    ERROR_FILE_NOT_FOUND = 2
128    ERROR_NO_MORE_FILES = 18
129    IO_REPARSE_TAG_SYMLINK = 0xA000000C
130
131    # Numer of seconds between 1601-01-01 and 1970-01-01
132    SECONDS_BETWEEN_EPOCHS = 11644473600
133
134    kernel32 = ctypes.windll.kernel32
135
136    # ctypes wrappers for (wide string versions of) FindFirstFile,
137    # FindNextFile, and FindClose
138    FindFirstFile = kernel32.FindFirstFileW
139    FindFirstFile.argtypes = [
140        wintypes.LPCWSTR,
141        ctypes.POINTER(wintypes.WIN32_FIND_DATAW),
142    ]
143    FindFirstFile.restype = wintypes.HANDLE
144
145    FindNextFile = kernel32.FindNextFileW
146    FindNextFile.argtypes = [
147        wintypes.HANDLE,
148        ctypes.POINTER(wintypes.WIN32_FIND_DATAW),
149    ]
150    FindNextFile.restype = wintypes.BOOL
151
152    FindClose = kernel32.FindClose
153    FindClose.argtypes = [wintypes.HANDLE]
154    FindClose.restype = wintypes.BOOL
155
156    Win32StatResult = collections.namedtuple('Win32StatResult', [
157        'st_mode',
158        'st_ino',
159        'st_dev',
160        'st_nlink',
161        'st_uid',
162        'st_gid',
163        'st_size',
164        'st_atime',
165        'st_mtime',
166        'st_ctime',
167        'st_atime_ns',
168        'st_mtime_ns',
169        'st_ctime_ns',
170        'st_file_attributes',
171    ])
172
173    def filetime_to_time(filetime):
174        """Convert Win32 FILETIME to time since Unix epoch in seconds."""
175        total = filetime.dwHighDateTime << 32 | filetime.dwLowDateTime
176        return total / 10000000 - SECONDS_BETWEEN_EPOCHS
177
178    def find_data_to_stat(data):
179        """Convert Win32 FIND_DATA struct to stat_result."""
180        # First convert Win32 dwFileAttributes to st_mode
181        attributes = data.dwFileAttributes
182        st_mode = 0
183        if attributes & FILE_ATTRIBUTE_DIRECTORY:
184            st_mode |= S_IFDIR | 0o111
185        else:
186            st_mode |= S_IFREG
187        if attributes & FILE_ATTRIBUTE_READONLY:
188            st_mode |= 0o444
189        else:
190            st_mode |= 0o666
191        if (attributes & FILE_ATTRIBUTE_REPARSE_POINT and
192                data.dwReserved0 == IO_REPARSE_TAG_SYMLINK):
193            st_mode ^= st_mode & 0o170000
194            st_mode |= S_IFLNK
195
196        st_size = data.nFileSizeHigh << 32 | data.nFileSizeLow
197        st_atime = filetime_to_time(data.ftLastAccessTime)
198        st_mtime = filetime_to_time(data.ftLastWriteTime)
199        st_ctime = filetime_to_time(data.ftCreationTime)
200
201        # Some fields set to zero per CPython's posixmodule.c: st_ino, st_dev,
202        # st_nlink, st_uid, st_gid
203        return Win32StatResult(st_mode, 0, 0, 0, 0, 0, st_size,
204                               st_atime, st_mtime, st_ctime,
205                               int(st_atime * 1000000000),
206                               int(st_mtime * 1000000000),
207                               int(st_ctime * 1000000000),
208                               attributes)
209
210    class Win32DirEntryPython(object):
211        __slots__ = ('name', '_stat', '_lstat', '_find_data', '_scandir_path', '_path')
212
213        def __init__(self, scandir_path, name, find_data):
214            self._scandir_path = scandir_path
215            self.name = name
216            self._stat = None
217            self._lstat = None
218            self._find_data = find_data
219            self._path = None
220
221        @property
222        def path(self):
223            if self._path is None:
224                self._path = join(self._scandir_path, self.name)
225            return self._path
226
227        def stat(self, follow_symlinks=True):
228            if follow_symlinks:
229                if self._stat is None:
230                    if self.is_symlink():
231                        # It's a symlink, call link-following stat()
232                        self._stat = stat(self.path)
233                    else:
234                        # Not a symlink, stat is same as lstat value
235                        if self._lstat is None:
236                            self._lstat = find_data_to_stat(self._find_data)
237                        self._stat = self._lstat
238                return self._stat
239            else:
240                if self._lstat is None:
241                    # Lazily convert to stat object, because it's slow
242                    # in Python, and often we only need is_dir() etc
243                    self._lstat = find_data_to_stat(self._find_data)
244                return self._lstat
245
246        def is_dir(self, follow_symlinks=True):
247            is_symlink = self.is_symlink()
248            if follow_symlinks and is_symlink:
249                try:
250                    return self.stat().st_mode & 0o170000 == S_IFDIR
251                except OSError as e:
252                    if e.errno != ENOENT:
253                        raise
254                    return False
255            elif is_symlink:
256                return False
257            else:
258                return (self._find_data.dwFileAttributes &
259                        FILE_ATTRIBUTE_DIRECTORY != 0)
260
261        def is_file(self, follow_symlinks=True):
262            is_symlink = self.is_symlink()
263            if follow_symlinks and is_symlink:
264                try:
265                    return self.stat().st_mode & 0o170000 == S_IFREG
266                except OSError as e:
267                    if e.errno != ENOENT:
268                        raise
269                    return False
270            elif is_symlink:
271                return False
272            else:
273                return (self._find_data.dwFileAttributes &
274                        FILE_ATTRIBUTE_DIRECTORY == 0)
275
276        def is_symlink(self):
277            return (self._find_data.dwFileAttributes &
278                        FILE_ATTRIBUTE_REPARSE_POINT != 0 and
279                    self._find_data.dwReserved0 == IO_REPARSE_TAG_SYMLINK)
280
281        def __str__(self):
282            return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
283
284        __repr__ = __str__
285
286    def win_error(error, filename):
287        exc = WindowsError(error, ctypes.FormatError(error))
288        exc.filename = filename
289        return exc
290
291    def scandir_python(path=u'.'):
292        """Like os.listdir(), but yield DirEntry objects instead of returning
293        a list of names.
294        """
295        # Call FindFirstFile and handle errors
296        if isinstance(path, bytes):
297            is_bytes = True
298            filename = join(path.decode('mbcs', 'strict'), '*.*')
299        else:
300            is_bytes = False
301            filename = join(path, '*.*')
302        data = wintypes.WIN32_FIND_DATAW()
303        data_p = ctypes.byref(data)
304        handle = FindFirstFile(filename, data_p)
305        if handle == INVALID_HANDLE_VALUE:
306            error = ctypes.GetLastError()
307            if error == ERROR_FILE_NOT_FOUND:
308                # No files, don't yield anything
309                return
310            raise win_error(error, path)
311
312        # Call FindNextFile in a loop, stopping when no more files
313        try:
314            while True:
315                # Skip '.' and '..' (current and parent directory), but
316                # otherwise yield (filename, stat_result) tuple
317                name = data.cFileName
318                if name not in ('.', '..'):
319                    if is_bytes:
320                        name = name.encode('mbcs', 'replace')
321                    yield Win32DirEntryPython(path, name, data)
322
323                data = wintypes.WIN32_FIND_DATAW()
324                data_p = ctypes.byref(data)
325                success = FindNextFile(handle, data_p)
326                if not success:
327                    error = ctypes.GetLastError()
328                    if error == ERROR_NO_MORE_FILES:
329                        break
330                    raise win_error(error, path)
331        finally:
332            if not FindClose(handle):
333                raise win_error(ctypes.GetLastError(), path)
334
335    try:
336        import _scandir
337
338        scandir_helper = _scandir.scandir_helper
339
340        class Win32DirEntryC(object):
341            __slots__ = ('name', '_stat', '_lstat', '_scandir_path', '_path')
342
343            def __init__(self, scandir_path, name, lstat):
344                self._scandir_path = scandir_path
345                self.name = name
346                self._stat = None
347                self._lstat = lstat
348                self._path = None
349
350            @property
351            def path(self):
352                if self._path is None:
353                    self._path = join(self._scandir_path, self.name)
354                return self._path
355
356            def stat(self, follow_symlinks=True):
357                if follow_symlinks:
358                    if self._stat is None:
359                        if self.is_symlink():
360                            self._stat = stat(self.path)
361                        else:
362                            self._stat = self._lstat
363                    return self._stat
364                else:
365                    return self._lstat
366
367            def is_dir(self, follow_symlinks=True):
368                if follow_symlinks and self.is_symlink():
369                    try:
370                        st = self.stat()
371                    except OSError as e:
372                        if e.errno != ENOENT:
373                            raise
374                        return False
375                else:
376                    st = self._lstat
377                return st.st_mode & 0o170000 == S_IFDIR
378
379            def is_file(self, follow_symlinks=True):
380                if follow_symlinks and self.is_symlink():
381                    try:
382                        st = self.stat()
383                    except OSError as e:
384                        if e.errno != ENOENT:
385                            raise
386                        return False
387                else:
388                    st = self._lstat
389                return st.st_mode & 0o170000 == S_IFREG
390
391            def is_symlink(self):
392                return self._lstat.st_mode & 0o170000 == S_IFLNK
393
394            def __str__(self):
395                return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
396
397            __repr__ = __str__
398
399        def scandir_c(path=u'.'):
400            if isinstance(path, bytes):
401                for name, stat in scandir_helper(path.decode('mbcs', 'replace')):
402                    name = name.encode('mbcs', 'replace')
403                    yield Win32DirEntryC(path, name, stat)
404            else:
405                for name, stat in scandir_helper(path):
406                    yield Win32DirEntryC(path, name, stat)
407
408        scandir = scandir_c
409
410    except ImportError:
411        scandir = scandir_python
412
413
414# Linux, OS X, and BSD implementation
415elif sys.platform.startswith(('linux', 'darwin')) or 'bsd' in sys.platform:
416    import ctypes.util
417
418    DIR_p = ctypes.c_void_p
419
420    # Rather annoying how the dirent struct is slightly different on each
421    # platform. The only fields we care about are d_name and d_type.
422    class Dirent(ctypes.Structure):
423        if sys.platform.startswith('linux'):
424            _fields_ = (
425                ('d_ino', ctypes.c_ulong),
426                ('d_off', ctypes.c_long),
427                ('d_reclen', ctypes.c_ushort),
428                ('d_type', ctypes.c_byte),
429                ('d_name', ctypes.c_char * 256),
430            )
431        elif sys.platform.startswith('freebsd') and int(sys.platform[7:]) > 11:
432            _fields_ = (
433                ('d_ino', ctypes.c_uint64),
434                ('d_off', ctypes.c_uint64),
435                ('d_reclen', ctypes.c_ushort),
436                ('d_type', ctypes.c_byte),
437                ('d_pad0', ctypes.c_byte),
438                ('d_namlen', ctypes.c_ushort),
439                ('d_pad1', ctypes.c_ushort),
440                ('d_name', ctypes.c_char * 256),
441            )
442        else:
443            _fields_ = (
444                ('d_ino', ctypes.c_uint32),  # must be uint32, not ulong
445                ('d_reclen', ctypes.c_ushort),
446                ('d_type', ctypes.c_byte),
447                ('d_namlen', ctypes.c_byte),
448                ('d_name', ctypes.c_char * 256),
449            )
450
451    DT_UNKNOWN = 0
452    DT_DIR = 4
453    DT_REG = 8
454    DT_LNK = 10
455
456    Dirent_p = ctypes.POINTER(Dirent)
457    Dirent_pp = ctypes.POINTER(Dirent_p)
458
459    libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
460    opendir = libc.opendir
461    opendir.argtypes = [ctypes.c_char_p]
462    opendir.restype = DIR_p
463
464    readdir_r = libc.readdir_r
465    readdir_r.argtypes = [DIR_p, Dirent_p, Dirent_pp]
466    readdir_r.restype = ctypes.c_int
467
468    closedir = libc.closedir
469    closedir.argtypes = [DIR_p]
470    closedir.restype = ctypes.c_int
471
472    file_system_encoding = sys.getfilesystemencoding()
473
474    class PosixDirEntry(object):
475        __slots__ = ('name', '_d_type', '_stat', '_lstat', '_scandir_path', '_path')
476
477        def __init__(self, scandir_path, name, d_type):
478            self._scandir_path = scandir_path
479            self.name = name
480            self._d_type = d_type
481            self._stat = None
482            self._lstat = None
483            self._path = None
484
485        @property
486        def path(self):
487            if self._path is None:
488                self._path = join(self._scandir_path, self.name)
489            return self._path
490
491        def stat(self, follow_symlinks=True):
492            if follow_symlinks:
493                if self._stat is None:
494                    if self.is_symlink():
495                        self._stat = stat(self.path)
496                    else:
497                        if self._lstat is None:
498                            self._lstat = lstat(self.path)
499                        self._stat = self._lstat
500                return self._stat
501            else:
502                if self._lstat is None:
503                    self._lstat = lstat(self.path)
504                return self._lstat
505
506        def is_dir(self, follow_symlinks=True):
507            if (self._d_type == DT_UNKNOWN or
508                    (follow_symlinks and self.is_symlink())):
509                try:
510                    st = self.stat(follow_symlinks=follow_symlinks)
511                except OSError as e:
512                    if e.errno != ENOENT:
513                        raise
514                    return False
515                return st.st_mode & 0o170000 == S_IFDIR
516            else:
517                return self._d_type == DT_DIR
518
519        def is_file(self, follow_symlinks=True):
520            if (self._d_type == DT_UNKNOWN or
521                    (follow_symlinks and self.is_symlink())):
522                try:
523                    st = self.stat(follow_symlinks=follow_symlinks)
524                except OSError as e:
525                    if e.errno != ENOENT:
526                        raise
527                    return False
528                return st.st_mode & 0o170000 == S_IFREG
529            else:
530                return self._d_type == DT_REG
531
532        def is_symlink(self):
533            if self._d_type == DT_UNKNOWN:
534                try:
535                    st = self.stat(follow_symlinks=False)
536                except OSError as e:
537                    if e.errno != ENOENT:
538                        raise
539                    return False
540                return st.st_mode & 0o170000 == S_IFLNK
541            else:
542                return self._d_type == DT_LNK
543
544        def __str__(self):
545            return '<{0}: {1!r}>'.format(self.__class__.__name__, self.name)
546
547        __repr__ = __str__
548
549    def posix_error(filename):
550        errno = ctypes.get_errno()
551        exc = OSError(errno, strerror(errno))
552        exc.filename = filename
553        return exc
554
555    def scandir_python(path=u'.'):
556        """Like os.listdir(), but yield DirEntry objects instead of returning
557        a list of names.
558        """
559        if isinstance(path, bytes):
560            opendir_path = path
561            is_bytes = True
562        else:
563            opendir_path = path.encode(file_system_encoding)
564            is_bytes = False
565        dir_p = opendir(opendir_path)
566        if not dir_p:
567            raise posix_error(path)
568        try:
569            result = Dirent_p()
570            while True:
571                entry = Dirent()
572                if readdir_r(dir_p, entry, result):
573                    raise posix_error(path)
574                if not result:
575                    break
576                name = entry.d_name
577                if name not in (b'.', b'..'):
578                    if not is_bytes:
579                        name = name.decode(file_system_encoding)
580                    yield PosixDirEntry(path, name, entry.d_type)
581        finally:
582            if closedir(dir_p):
583                raise posix_error(path)
584
585    try:
586        import _scandir
587
588        scandir_helper = _scandir.scandir_helper
589
590        def scandir_c(path=u'.'):
591            is_bytes = isinstance(path, bytes)
592            for name, d_type in scandir_helper(path):
593                if not is_bytes:
594                    name = name.decode(file_system_encoding)
595                yield PosixDirEntry(path, name, d_type)
596
597        scandir = scandir_c
598
599    except ImportError:
600        scandir = scandir_python
601
602
603# Some other system -- no d_type or stat information
604else:
605    scandir = scandir_generic
606
607
608def walk(top, topdown=True, onerror=None, followlinks=False):
609    """Like os.walk(), but faster, as it uses scandir() internally."""
610    # Determine which are files and which are directories
611    dirs = []
612    nondirs = []
613    symlinks = set()
614    try:
615        for entry in scandir(top):
616            try:
617                if entry.is_dir():
618                    dirs.append(entry.name)
619                else:
620                    nondirs.append(entry.name)
621            except OSError:
622                # Need this to emulate os.walk(), which uses
623                # os.path.isdir(), and that returns False (nondir) on
624                # any OSError; same with entry.is_symlink() below
625                nondirs.append(entry.name)
626            try:
627                if entry.is_symlink():
628                    symlinks.add(entry.name)
629            except OSError:
630                pass
631    except OSError as error:
632        if onerror is not None:
633            onerror(error)
634        return
635
636    # Yield before recursion if going top down
637    if topdown:
638        yield top, dirs, nondirs
639
640    # Recurse into sub-directories, following symbolic links if "followlinks"
641    for name in dirs:
642        if followlinks or name not in symlinks:
643            new_path = join(top, name)
644            for x in walk(new_path, topdown, onerror, followlinks):
645                yield x
646
647    # Yield after recursion if going bottom up
648    if not topdown:
649        yield top, dirs, nondirs
650