1"""Handle path matching."""
2import re
3import os
4import stat
5import copyreg
6from . import util
7from typing import Pattern, Tuple, AnyStr, Optional, Generic, Any, Dict, cast
8
9# `O_DIRECTORY` may not always be defined
10DIR_FLAGS = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0)
11# Right half can return an empty set if not supported
12SUPPORT_DIR_FD = {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd
13
14
15RE_WIN_MOUNT = (
16    re.compile(r'\\|[a-z]:(?:\\|$)', re.I),
17    re.compile(br'\\|[a-z]:(?:\\|$)', re.I)
18)
19RE_MOUNT = (
20    re.compile(r'/'),
21    re.compile(br'/')
22)
23
24
25class _Match(Generic[AnyStr]):
26    """Match the given pattern."""
27
28    def __init__(
29        self,
30        filename: AnyStr,
31        include: Tuple[Pattern[AnyStr], ...],
32        exclude: Optional[Tuple[Pattern[AnyStr], ...]],
33        real: bool,
34        path: bool,
35        follow: bool
36    ) -> None:
37        """Initialize."""
38
39        self.filename = filename  # type: AnyStr
40        self.include = include  # type: Tuple[Pattern[AnyStr], ...]
41        self.exclude = exclude  # type: Optional[Tuple[Pattern[AnyStr], ...]]
42        self.real = real
43        self.path = path
44        self.follow = follow
45        self.ptype = util.BYTES if isinstance(self.filename, bytes) else util.UNICODE
46
47    def _fs_match(
48        self,
49        pattern: Pattern[AnyStr],
50        filename: AnyStr,
51        is_dir: bool,
52        sep: AnyStr,
53        follow: bool,
54        symlinks: Dict[Tuple[Optional[int], AnyStr], bool],
55        root: AnyStr,
56        dir_fd: Optional[int]
57    ) -> bool:
58        """
59        Match path against the pattern.
60
61        Since `globstar` doesn't match symlinks (unless `FOLLOW` is enabled), we must look for symlinks.
62        If we identify a symlink in a `globstar` match, we know this result should not actually match.
63
64        We only check for the symlink if we know we are looking at a directory.
65        And we only call `lstat` if we can't find it in the cache.
66
67        We know it's a directory if:
68
69        1. If the base is a directory, all parts are directories.
70        2. If we are not the last part of the `globstar`, the part is a directory.
71        3. If the base is a file, but the part is not at the end, it is a directory.
72
73        """
74
75        matched = False
76
77        end = len(filename)
78        base = None
79        m = pattern.fullmatch(filename)
80        if m:
81            matched = True
82            # Lets look at the captured `globstar` groups and see if that part of the path
83            # contains symlinks.
84            if not follow:
85                last = len(m.groups())
86                try:
87                    for i, star in enumerate(m.groups(), 1):
88                        if star:
89                            at_end = m.end(i) == end
90                            parts = star.strip(sep).split(sep)
91                            if base is None:
92                                base = os.path.join(root, filename[:m.start(i)])
93                            for part in parts:
94                                base = os.path.join(base, part)
95                                key = (dir_fd, base)
96                                if is_dir or i != last or not at_end:
97                                    is_link = symlinks.get(key, None)
98                                    if is_link is None:
99                                        if dir_fd is None:
100                                            is_link = os.path.islink(base)
101                                            symlinks[key] = is_link
102                                        else:
103                                            try:
104                                                st = os.lstat(base, dir_fd=dir_fd)
105                                            except (OSError, ValueError):  # pragma: no cover
106                                                is_link = False
107                                            else:
108                                                is_link = stat.S_ISLNK(st.st_mode)
109                                            symlinks[key] = is_link
110                                    matched = not is_link
111                                    if not matched:
112                                        break
113                        if not matched:
114                            break
115                except OSError:  # pragma: no cover
116                    matched = False
117        return matched
118
119    def _match_real(
120        self,
121        symlinks: Dict[Tuple[Optional[int], AnyStr], bool],
122        root: AnyStr,
123        dir_fd: Optional[int]
124    ) -> bool:
125        """Match real filename includes and excludes."""
126
127        temp = '\\' if util.platform() == "windows" else '/'
128        if isinstance(self.filename, bytes):
129            sep = os.fsencode(temp)
130        else:
131            sep = temp
132
133        is_dir = self.filename.endswith(sep)
134        try:
135            if dir_fd is None:
136                is_file_dir = os.path.isdir(os.path.join(root, self.filename))
137            else:
138                try:
139                    st = os.stat(os.path.join(root, self.filename), dir_fd=dir_fd)
140                except (OSError, ValueError):  # pragma: no cover
141                    is_file_dir = False
142                else:
143                    is_file_dir = stat.S_ISDIR(st.st_mode)
144        except OSError:  # pragma: no cover
145            return False
146
147        if not is_dir and is_file_dir:
148            is_dir = True
149            filename = self.filename + sep
150        else:
151            filename = self.filename
152
153        matched = False
154        for pattern in self.include:
155            if self._fs_match(pattern, filename, is_dir, sep, self.follow, symlinks, root, dir_fd):
156                matched = True
157                break
158
159        if matched:
160            if self.exclude:
161                for pattern in self.exclude:
162                    if self._fs_match(pattern, filename, is_dir, sep, True, symlinks, root, dir_fd):
163                        matched = False
164                        break
165
166        return matched
167
168    def match(self, root_dir: Optional[AnyStr] = None, dir_fd: Optional[int] = None) -> bool:
169        """Match."""
170
171        if self.real:
172            if isinstance(self.filename, bytes):
173                root = root_dir if root_dir is not None else b'.'  # type: AnyStr
174            else:
175                root = root_dir if root_dir is not None else '.'
176
177            if dir_fd is not None and not SUPPORT_DIR_FD:
178                dir_fd = None
179
180            if not isinstance(self.filename, type(root)):
181                raise TypeError(
182                    "The filename and root directory should be of the same type, not {} and {}".format(
183                        type(self.filename), type(root_dir)
184                    )
185                )
186
187            if self.include and not isinstance(self.include[0].pattern, type(self.filename)):
188                raise TypeError(
189                    "The filename and pattern should be of the same type, not {} and {}".format(
190                        type(self.filename), type(self.include[0].pattern)
191                    )
192                )
193
194            re_mount = cast(Pattern[AnyStr], (RE_WIN_MOUNT if util.platform() == "windows" else RE_MOUNT)[self.ptype])
195            is_abs = re_mount.match(self.filename) is not None
196
197            if is_abs:
198                exists = os.path.lexists(self.filename)
199            elif dir_fd is None:
200                exists = os.path.lexists(os.path.join(root, self.filename))
201            else:
202                try:
203                    os.lstat(os.path.join(root, self.filename), dir_fd=dir_fd)
204                except (OSError, ValueError):  # pragma: no cover
205                    exists = False
206                else:
207                    exists = True
208
209            if exists:
210                symlinks = {}  # type: Dict[Tuple[Optional[int], AnyStr], bool]
211                return self._match_real(symlinks, root, dir_fd)
212            else:
213                return False
214
215        matched = False
216        for pattern in self.include:
217            if pattern.fullmatch(self.filename):
218                matched = True
219                break
220
221        if matched:
222            matched = True
223            if self.exclude:
224                for pattern in self.exclude:
225                    if pattern.fullmatch(self.filename):
226                        matched = False
227                        break
228        return matched
229
230
231class WcRegexp(util.Immutable, Generic[AnyStr]):
232    """File name match object."""
233
234    _include: Tuple[Pattern[AnyStr], ...]
235    _exclude: Optional[Tuple[Pattern[AnyStr], ...]]
236    _real: bool
237    _path: bool
238    _follow: bool
239    _hash: int
240
241    __slots__ = ("_include", "_exclude", "_real", "_path", "_follow", "_hash")
242
243    def __init__(
244        self,
245        include: Tuple[Pattern[AnyStr], ...],
246        exclude: Optional[Tuple[Pattern[AnyStr], ...]] = None,
247        real: bool = False,
248        path: bool = False,
249        follow: bool = False
250    ):
251        """Initialization."""
252
253        super().__init__(
254            _include=include,
255            _exclude=exclude,
256            _real=real,
257            _path=path,
258            _follow=follow,
259            _hash=hash(
260                (
261                    type(self),
262                    type(include), include,
263                    type(exclude), exclude,
264                    type(real), real,
265                    type(path), path,
266                    type(follow), follow
267                )
268            )
269        )
270
271    def __hash__(self) -> int:
272        """Hash."""
273
274        return self._hash
275
276    def __len__(self) -> int:
277        """Length."""
278
279        return len(self._include) + (len(self._exclude) if self._exclude is not None else 0)
280
281    def __eq__(self, other: Any) -> bool:
282        """Equal."""
283
284        return (
285            isinstance(other, WcRegexp) and
286            self._include == other._include and
287            self._exclude == other._exclude and
288            self._real == other._real and
289            self._path == other._path and
290            self._follow == other._follow
291        )
292
293    def __ne__(self, other: Any) -> bool:
294        """Equal."""
295
296        return (
297            not isinstance(other, WcRegexp) or
298            self._include != other._include or
299            self._exclude != other._exclude or
300            self._real != other._real or
301            self._path != other._path or
302            self._follow != other._follow
303        )
304
305    def match(self, filename: AnyStr, root_dir: Optional[AnyStr] = None, dir_fd: Optional[int] = None) -> bool:
306        """Match filename."""
307
308        return _Match(
309            filename,
310            self._include,
311            self._exclude,
312            self._real,
313            self._path,
314            self._follow
315        ).match(
316            root_dir=root_dir,
317            dir_fd=dir_fd
318        )
319
320
321def _pickle(p):  # type: ignore[no-untyped-def]
322    return WcRegexp, (p._include, p._exclude, p._real, p._path, p._follow)
323
324
325copyreg.pickle(WcRegexp, _pickle)
326