1import re 2import os 3import itertools 4from collections import defaultdict 5 6MYPY = False 7if MYPY: 8 # MYPY is set to True when run under Mypy. 9 from typing import Any 10 from typing import Dict 11 from typing import Iterable 12 from typing import List 13 from typing import MutableMapping 14 from typing import Optional 15 from typing import Pattern 16 from typing import Tuple 17 from typing import TypeVar 18 from typing import Union 19 from typing import cast 20 21 T = TypeVar('T') 22 23 24end_space = re.compile(r"([^\\]\s)*$") 25 26 27def fnmatch_translate(pat): 28 # type: (bytes) -> Tuple[bool, Pattern[bytes]] 29 parts = [] 30 seq = None 31 i = 0 32 any_char = b"[^/]" 33 if pat[0:1] == b"/": 34 parts.append(b"^") 35 pat = pat[1:] 36 else: 37 # By default match the entire path up to a / 38 # but if / doesn't appear in the pattern we will mark is as 39 # a name pattern and just produce a pattern that matches against 40 # the filename 41 parts.append(b"^(?:.*/)?") 42 43 name_pattern = True 44 if pat[-1:] == b"/": 45 # If the last character is / match this directory or any subdirectory 46 pat = pat[:-1] 47 suffix = b"(?:/|$)" 48 else: 49 suffix = b"$" 50 while i < len(pat): 51 c = pat[i:i+1] 52 if c == b"\\": 53 if i < len(pat) - 1: 54 i += 1 55 c = pat[i:i+1] 56 parts.append(re.escape(c)) 57 else: 58 raise ValueError 59 elif seq is not None: 60 # TODO: this doesn't really handle invalid sequences in the right way 61 if c == b"]": 62 seq = None 63 if parts[-1:] == b"[": 64 parts = parts[:-1] 65 elif parts[-1:] == b"^" and parts[-2:-1] == b"[": 66 parts = parts[:-2] 67 else: 68 parts.append(c) 69 elif c == b"-": 70 parts.append(c) 71 elif c == b"[": 72 raise ValueError 73 else: 74 parts.append(re.escape(c)) 75 elif c == b"[": 76 parts.append(b"[") 77 if i < len(pat) - 1 and pat[i+1:i+2] in (b"!", b"^"): 78 parts.append(b"^") 79 i += 1 80 seq = i 81 elif c == b"*": 82 if i < len(pat) - 1 and pat[i+1:i+2] == b"*": 83 if i > 0 and pat[i-1:i] != b"/": 84 raise ValueError 85 parts.append(b".*") 86 i += 1 87 if i < len(pat) - 1 and pat[i+1:i+2] != b"/": 88 raise ValueError 89 else: 90 parts.append(any_char + b"*") 91 elif c == b"?": 92 parts.append(any_char) 93 elif c == b"/" and not seq: 94 name_pattern = False 95 parts.append(c) 96 else: 97 parts.append(re.escape(c)) 98 i += 1 99 100 if name_pattern: 101 parts[0] = b"^" 102 103 if seq is not None: 104 raise ValueError 105 parts.append(suffix) 106 try: 107 return name_pattern, re.compile(b"".join(parts)) 108 except Exception: 109 raise ValueError 110 111# Regexp matching rules that have to be converted to patterns 112pattern_re = re.compile(br".*[\*\[\?]") 113 114 115def parse_line(line): 116 # type: (bytes) -> Optional[Tuple[bool, bool, bool, Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]]] 117 line = line.rstrip() 118 if not line or line[0:1] == b"#": 119 return None 120 121 invert = line[0:1] == b"!" 122 if invert: 123 line = line[1:] 124 125 dir_only = line[-1:] == b"/" 126 127 if dir_only: 128 line = line[:-1] 129 130 # Could make a special case for **/foo, but we don't have any patterns like that 131 if not invert and not pattern_re.match(line): 132 literal = True 133 pattern = tuple(line.rsplit(b"/", 1)) # type: Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]] 134 else: 135 pattern = fnmatch_translate(line) 136 literal = False 137 138 return invert, dir_only, literal, pattern 139 140 141class PathFilter(object): 142 def __init__(self, root, extras=None, cache=None): 143 # type: (bytes, Optional[List[bytes]], Optional[MutableMapping[bytes, bool]]) -> None 144 if root: 145 ignore_path = os.path.join(root, b".gitignore") # type: Optional[bytes] 146 else: 147 ignore_path = None 148 if not ignore_path and not extras: 149 self.trivial = True 150 return 151 self.trivial = False 152 153 self.literals_file = defaultdict(dict) # type: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] 154 self.literals_dir = defaultdict(dict) # type: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]] 155 self.patterns_file = [] # type: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] 156 self.patterns_dir = [] # type: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]] 157 158 if cache is None: 159 cache = {} 160 self.cache = cache # type: MutableMapping[bytes, bool] 161 162 if extras is None: 163 extras = [] 164 165 if ignore_path and os.path.exists(ignore_path): 166 args = ignore_path, extras # type: Tuple[Optional[bytes], List[bytes]] 167 else: 168 args = None, extras 169 self._read_ignore(*args) 170 171 def _read_ignore(self, ignore_path, extras): 172 # type: (Optional[bytes], List[bytes]) -> None 173 if ignore_path is not None: 174 with open(ignore_path, "rb") as f: 175 for line in f: 176 self._read_line(line) 177 for line in extras: 178 self._read_line(line) 179 180 def _read_line(self, line): 181 # type: (bytes) -> None 182 parsed = parse_line(line) 183 if not parsed: 184 return 185 invert, dir_only, literal, rule = parsed 186 187 if invert: 188 # For exclude rules, we attach the rules to all preceeding patterns, so 189 # that we can match patterns out of order and check if they were later 190 # overriden by an exclude rule 191 assert not literal 192 if MYPY: 193 rule = cast(Tuple[bool, Pattern[bytes]], rule) 194 if not dir_only: 195 rules_iter = itertools.chain( 196 itertools.chain(*(item.items() for item in self.literals_dir.values())), 197 itertools.chain(*(item.items() for item in self.literals_file.values())), 198 self.patterns_dir, 199 self.patterns_file) # type: Iterable[Tuple[Any, List[Tuple[bool, Pattern[bytes]]]]] 200 else: 201 rules_iter = itertools.chain( 202 itertools.chain(*(item.items() for item in self.literals_dir.values())), 203 self.patterns_dir) 204 205 for rules in rules_iter: 206 rules[1].append(rule) 207 else: 208 if literal: 209 if MYPY: 210 rule = cast(Tuple[bytes, ...], rule) 211 if len(rule) == 1: 212 dir_name, pattern = None, rule[0] # type: Tuple[Optional[bytes], bytes] 213 else: 214 dir_name, pattern = rule 215 self.literals_dir[dir_name][pattern] = [] 216 if not dir_only: 217 self.literals_file[dir_name][pattern] = [] 218 else: 219 if MYPY: 220 rule = cast(Tuple[bool, Pattern[bytes]], rule) 221 self.patterns_dir.append((rule, [])) 222 if not dir_only: 223 self.patterns_file.append((rule, [])) 224 225 def filter(self, 226 iterator # type: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] 227 ): 228 # type: (...) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] 229 empty = {} # type: Dict[Any, Any] 230 for dirpath, dirnames, filenames in iterator: 231 orig_dirpath = dirpath 232 path_sep = os.path.sep.encode() 233 if path_sep != b"/": 234 dirpath = dirpath.replace(path_sep, b"/") 235 236 keep_dirs = [] # type: List[Tuple[bytes, T]] 237 keep_files = [] # type: List[Tuple[bytes, T]] 238 239 for iter_items, literals, patterns, target, suffix in [ 240 (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, b"/"), 241 (filenames, self.literals_file, self.patterns_file, keep_files, b"")]: 242 for item in iter_items: 243 name = item[0] 244 if dirpath: 245 path = b"%s/%s" % (dirpath, name) + suffix 246 else: 247 path = name + suffix 248 if path in self.cache: 249 if not self.cache[path]: 250 target.append(item) 251 continue 252 for rule_dir in [None, dirpath if dirpath != b"." else b""]: 253 if name in literals.get(rule_dir, empty): 254 exclude = literals[rule_dir][name] 255 if not any(rule.match(name if name_only else path) 256 for name_only, rule in exclude): 257 # Skip this item 258 self.cache[path] = True 259 break 260 else: 261 for (component_only, pattern), exclude in patterns: 262 if component_only: 263 match = pattern.match(name) 264 else: 265 match = pattern.match(path) 266 if match: 267 if not any(rule.match(name if name_only else path) 268 for name_only, rule in exclude): 269 # Skip this item 270 self.cache[path] = True 271 break 272 else: 273 self.cache[path] = False 274 target.append(item) 275 276 dirnames[:] = keep_dirs 277 assert not any(b".git" == name for name, _ in dirnames) 278 yield orig_dirpath, dirnames, keep_files 279 280 def __call__(self, 281 iterator # type: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] 282 ): 283 # type: (...) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]] 284 if self.trivial: 285 return iterator 286 287 return self.filter(iterator) 288 289 290def has_ignore(dirpath): 291 # type: (bytes) -> bool 292 return os.path.exists(os.path.join(dirpath, b".gitignore")) 293