1"""Handle path matching.""" 2import re 3import os 4import stat 5import copyreg 6from . import util 7from typing import Pattern, Tuple, AnyStr, Optional, Generic, Any, Dict, cast 8 9# `O_DIRECTORY` may not always be defined 10DIR_FLAGS = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) 11# Right half can return an empty set if not supported 12SUPPORT_DIR_FD = {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd 13 14 15RE_WIN_MOUNT = ( 16 re.compile(r'\\|[a-z]:(?:\\|$)', re.I), 17 re.compile(br'\\|[a-z]:(?:\\|$)', re.I) 18) 19RE_MOUNT = ( 20 re.compile(r'/'), 21 re.compile(br'/') 22) 23 24 25class _Match(Generic[AnyStr]): 26 """Match the given pattern.""" 27 28 def __init__( 29 self, 30 filename: AnyStr, 31 include: Tuple[Pattern[AnyStr], ...], 32 exclude: Optional[Tuple[Pattern[AnyStr], ...]], 33 real: bool, 34 path: bool, 35 follow: bool 36 ) -> None: 37 """Initialize.""" 38 39 self.filename = filename # type: AnyStr 40 self.include = include # type: Tuple[Pattern[AnyStr], ...] 41 self.exclude = exclude # type: Optional[Tuple[Pattern[AnyStr], ...]] 42 self.real = real 43 self.path = path 44 self.follow = follow 45 self.ptype = util.BYTES if isinstance(self.filename, bytes) else util.UNICODE 46 47 def _fs_match( 48 self, 49 pattern: Pattern[AnyStr], 50 filename: AnyStr, 51 is_dir: bool, 52 sep: AnyStr, 53 follow: bool, 54 symlinks: Dict[Tuple[Optional[int], AnyStr], bool], 55 root: AnyStr, 56 dir_fd: Optional[int] 57 ) -> bool: 58 """ 59 Match path against the pattern. 60 61 Since `globstar` doesn't match symlinks (unless `FOLLOW` is enabled), we must look for symlinks. 62 If we identify a symlink in a `globstar` match, we know this result should not actually match. 63 64 We only check for the symlink if we know we are looking at a directory. 65 And we only call `lstat` if we can't find it in the cache. 66 67 We know it's a directory if: 68 69 1. If the base is a directory, all parts are directories. 70 2. If we are not the last part of the `globstar`, the part is a directory. 71 3. If the base is a file, but the part is not at the end, it is a directory. 72 73 """ 74 75 matched = False 76 77 end = len(filename) 78 base = None 79 m = pattern.fullmatch(filename) 80 if m: 81 matched = True 82 # Lets look at the captured `globstar` groups and see if that part of the path 83 # contains symlinks. 84 if not follow: 85 last = len(m.groups()) 86 try: 87 for i, star in enumerate(m.groups(), 1): 88 if star: 89 at_end = m.end(i) == end 90 parts = star.strip(sep).split(sep) 91 if base is None: 92 base = os.path.join(root, filename[:m.start(i)]) 93 for part in parts: 94 base = os.path.join(base, part) 95 key = (dir_fd, base) 96 if is_dir or i != last or not at_end: 97 is_link = symlinks.get(key, None) 98 if is_link is None: 99 if dir_fd is None: 100 is_link = os.path.islink(base) 101 symlinks[key] = is_link 102 else: 103 try: 104 st = os.lstat(base, dir_fd=dir_fd) 105 except (OSError, ValueError): # pragma: no cover 106 is_link = False 107 else: 108 is_link = stat.S_ISLNK(st.st_mode) 109 symlinks[key] = is_link 110 matched = not is_link 111 if not matched: 112 break 113 if not matched: 114 break 115 except OSError: # pragma: no cover 116 matched = False 117 return matched 118 119 def _match_real( 120 self, 121 symlinks: Dict[Tuple[Optional[int], AnyStr], bool], 122 root: AnyStr, 123 dir_fd: Optional[int] 124 ) -> bool: 125 """Match real filename includes and excludes.""" 126 127 temp = '\\' if util.platform() == "windows" else '/' 128 if isinstance(self.filename, bytes): 129 sep = os.fsencode(temp) 130 else: 131 sep = temp 132 133 is_dir = self.filename.endswith(sep) 134 try: 135 if dir_fd is None: 136 is_file_dir = os.path.isdir(os.path.join(root, self.filename)) 137 else: 138 try: 139 st = os.stat(os.path.join(root, self.filename), dir_fd=dir_fd) 140 except (OSError, ValueError): # pragma: no cover 141 is_file_dir = False 142 else: 143 is_file_dir = stat.S_ISDIR(st.st_mode) 144 except OSError: # pragma: no cover 145 return False 146 147 if not is_dir and is_file_dir: 148 is_dir = True 149 filename = self.filename + sep 150 else: 151 filename = self.filename 152 153 matched = False 154 for pattern in self.include: 155 if self._fs_match(pattern, filename, is_dir, sep, self.follow, symlinks, root, dir_fd): 156 matched = True 157 break 158 159 if matched: 160 if self.exclude: 161 for pattern in self.exclude: 162 if self._fs_match(pattern, filename, is_dir, sep, True, symlinks, root, dir_fd): 163 matched = False 164 break 165 166 return matched 167 168 def match(self, root_dir: Optional[AnyStr] = None, dir_fd: Optional[int] = None) -> bool: 169 """Match.""" 170 171 if self.real: 172 if isinstance(self.filename, bytes): 173 root = root_dir if root_dir is not None else b'.' # type: AnyStr 174 else: 175 root = root_dir if root_dir is not None else '.' 176 177 if dir_fd is not None and not SUPPORT_DIR_FD: 178 dir_fd = None 179 180 if not isinstance(self.filename, type(root)): 181 raise TypeError( 182 "The filename and root directory should be of the same type, not {} and {}".format( 183 type(self.filename), type(root_dir) 184 ) 185 ) 186 187 if self.include and not isinstance(self.include[0].pattern, type(self.filename)): 188 raise TypeError( 189 "The filename and pattern should be of the same type, not {} and {}".format( 190 type(self.filename), type(self.include[0].pattern) 191 ) 192 ) 193 194 re_mount = cast(Pattern[AnyStr], (RE_WIN_MOUNT if util.platform() == "windows" else RE_MOUNT)[self.ptype]) 195 is_abs = re_mount.match(self.filename) is not None 196 197 if is_abs: 198 exists = os.path.lexists(self.filename) 199 elif dir_fd is None: 200 exists = os.path.lexists(os.path.join(root, self.filename)) 201 else: 202 try: 203 os.lstat(os.path.join(root, self.filename), dir_fd=dir_fd) 204 except (OSError, ValueError): # pragma: no cover 205 exists = False 206 else: 207 exists = True 208 209 if exists: 210 symlinks = {} # type: Dict[Tuple[Optional[int], AnyStr], bool] 211 return self._match_real(symlinks, root, dir_fd) 212 else: 213 return False 214 215 matched = False 216 for pattern in self.include: 217 if pattern.fullmatch(self.filename): 218 matched = True 219 break 220 221 if matched: 222 matched = True 223 if self.exclude: 224 for pattern in self.exclude: 225 if pattern.fullmatch(self.filename): 226 matched = False 227 break 228 return matched 229 230 231class WcRegexp(util.Immutable, Generic[AnyStr]): 232 """File name match object.""" 233 234 _include: Tuple[Pattern[AnyStr], ...] 235 _exclude: Optional[Tuple[Pattern[AnyStr], ...]] 236 _real: bool 237 _path: bool 238 _follow: bool 239 _hash: int 240 241 __slots__ = ("_include", "_exclude", "_real", "_path", "_follow", "_hash") 242 243 def __init__( 244 self, 245 include: Tuple[Pattern[AnyStr], ...], 246 exclude: Optional[Tuple[Pattern[AnyStr], ...]] = None, 247 real: bool = False, 248 path: bool = False, 249 follow: bool = False 250 ): 251 """Initialization.""" 252 253 super().__init__( 254 _include=include, 255 _exclude=exclude, 256 _real=real, 257 _path=path, 258 _follow=follow, 259 _hash=hash( 260 ( 261 type(self), 262 type(include), include, 263 type(exclude), exclude, 264 type(real), real, 265 type(path), path, 266 type(follow), follow 267 ) 268 ) 269 ) 270 271 def __hash__(self) -> int: 272 """Hash.""" 273 274 return self._hash 275 276 def __len__(self) -> int: 277 """Length.""" 278 279 return len(self._include) + (len(self._exclude) if self._exclude is not None else 0) 280 281 def __eq__(self, other: Any) -> bool: 282 """Equal.""" 283 284 return ( 285 isinstance(other, WcRegexp) and 286 self._include == other._include and 287 self._exclude == other._exclude and 288 self._real == other._real and 289 self._path == other._path and 290 self._follow == other._follow 291 ) 292 293 def __ne__(self, other: Any) -> bool: 294 """Equal.""" 295 296 return ( 297 not isinstance(other, WcRegexp) or 298 self._include != other._include or 299 self._exclude != other._exclude or 300 self._real != other._real or 301 self._path != other._path or 302 self._follow != other._follow 303 ) 304 305 def match(self, filename: AnyStr, root_dir: Optional[AnyStr] = None, dir_fd: Optional[int] = None) -> bool: 306 """Match filename.""" 307 308 return _Match( 309 filename, 310 self._include, 311 self._exclude, 312 self._real, 313 self._path, 314 self._follow 315 ).match( 316 root_dir=root_dir, 317 dir_fd=dir_fd 318 ) 319 320 321def _pickle(p): # type: ignore[no-untyped-def] 322 return WcRegexp, (p._include, p._exclude, p._real, p._path, p._follow) 323 324 325copyreg.pickle(WcRegexp, _pickle) 326