1import re
2import os
3import itertools
4from collections import defaultdict
5
6MYPY = False
7if MYPY:
8    # MYPY is set to True when run under Mypy.
9    from typing import Any
10    from typing import Dict
11    from typing import Iterable
12    from typing import List
13    from typing import MutableMapping
14    from typing import Optional
15    from typing import Pattern
16    from typing import Tuple
17    from typing import TypeVar
18    from typing import Union
19    from typing import cast
20
21    T = TypeVar('T')
22
23
24end_space = re.compile(r"([^\\]\s)*$")
25
26
27def fnmatch_translate(pat):
28    # type: (bytes) -> Tuple[bool, Pattern[bytes]]
29    parts = []
30    seq = None
31    i = 0
32    any_char = b"[^/]"
33    if pat[0:1] == b"/":
34        parts.append(b"^")
35        pat = pat[1:]
36    else:
37        # By default match the entire path up to a /
38        # but if / doesn't appear in the pattern we will mark is as
39        # a name pattern and just produce a pattern that matches against
40        # the filename
41        parts.append(b"^(?:.*/)?")
42
43    name_pattern = True
44    if pat[-1:] == b"/":
45        # If the last character is / match this directory or any subdirectory
46        pat = pat[:-1]
47        suffix = b"(?:/|$)"
48    else:
49        suffix = b"$"
50    while i < len(pat):
51        c = pat[i:i+1]
52        if c == b"\\":
53            if i < len(pat) - 1:
54                i += 1
55                c = pat[i:i+1]
56                parts.append(re.escape(c))
57            else:
58                raise ValueError
59        elif seq is not None:
60            # TODO: this doesn't really handle invalid sequences in the right way
61            if c == b"]":
62                seq = None
63                if parts[-1:] == b"[":
64                    parts = parts[:-1]
65                elif parts[-1:] == b"^" and parts[-2:-1] == b"[":
66                    parts = parts[:-2]
67                else:
68                    parts.append(c)
69            elif c == b"-":
70                parts.append(c)
71            elif c == b"[":
72                raise ValueError
73            else:
74                parts.append(re.escape(c))
75        elif c == b"[":
76            parts.append(b"[")
77            if i < len(pat) - 1 and pat[i+1:i+2] in (b"!", b"^"):
78                parts.append(b"^")
79                i += 1
80            seq = i
81        elif c == b"*":
82            if i < len(pat) - 1 and pat[i+1:i+2] == b"*":
83                if i > 0 and pat[i-1:i] != b"/":
84                    raise ValueError
85                parts.append(b".*")
86                i += 1
87                if i < len(pat) - 1 and pat[i+1:i+2] != b"/":
88                    raise ValueError
89            else:
90                parts.append(any_char + b"*")
91        elif c == b"?":
92            parts.append(any_char)
93        elif c == b"/" and not seq:
94            name_pattern = False
95            parts.append(c)
96        else:
97            parts.append(re.escape(c))
98        i += 1
99
100    if name_pattern:
101        parts[0] = b"^"
102
103    if seq is not None:
104        raise ValueError
105    parts.append(suffix)
106    try:
107        return name_pattern, re.compile(b"".join(parts))
108    except Exception:
109        raise ValueError
110
111# Regexp matching rules that have to be converted to patterns
112pattern_re = re.compile(br".*[\*\[\?]")
113
114
115def parse_line(line):
116    # type: (bytes) -> Optional[Tuple[bool, bool, bool, Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]]]
117    line = line.rstrip()
118    if not line or line[0:1] == b"#":
119        return None
120
121    invert = line[0:1] == b"!"
122    if invert:
123        line = line[1:]
124
125    dir_only = line[-1:] == b"/"
126
127    if dir_only:
128        line = line[:-1]
129
130    # Could make a special case for **/foo, but we don't have any patterns like that
131    if not invert and not pattern_re.match(line):
132        literal = True
133        pattern = tuple(line.rsplit(b"/", 1))  # type: Union[Tuple[bytes, ...], Tuple[bool, Pattern[bytes]]]
134    else:
135        pattern = fnmatch_translate(line)
136        literal = False
137
138    return invert, dir_only, literal, pattern
139
140
141class PathFilter(object):
142    def __init__(self, root, extras=None, cache=None):
143        # type: (bytes, Optional[List[bytes]], Optional[MutableMapping[bytes, bool]]) -> None
144        if root:
145            ignore_path = os.path.join(root, b".gitignore")  # type: Optional[bytes]
146        else:
147            ignore_path = None
148        if not ignore_path and not extras:
149            self.trivial = True
150            return
151        self.trivial = False
152
153        self.literals_file = defaultdict(dict)  # type: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]]
154        self.literals_dir = defaultdict(dict)  # type: Dict[Optional[bytes], Dict[bytes, List[Tuple[bool, Pattern[bytes]]]]]
155        self.patterns_file = []  # type: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]]
156        self.patterns_dir = []  # type: List[Tuple[Tuple[bool, Pattern[bytes]], List[Tuple[bool, Pattern[bytes]]]]]
157
158        if cache is None:
159            cache = {}
160        self.cache = cache  # type: MutableMapping[bytes, bool]
161
162        if extras is None:
163            extras = []
164
165        if ignore_path and os.path.exists(ignore_path):
166            args = ignore_path, extras  # type: Tuple[Optional[bytes], List[bytes]]
167        else:
168            args = None, extras
169        self._read_ignore(*args)
170
171    def _read_ignore(self, ignore_path, extras):
172        # type: (Optional[bytes], List[bytes]) -> None
173        if ignore_path is not None:
174            with open(ignore_path, "rb") as f:
175                for line in f:
176                    self._read_line(line)
177        for line in extras:
178            self._read_line(line)
179
180    def _read_line(self, line):
181        # type: (bytes) -> None
182        parsed = parse_line(line)
183        if not parsed:
184            return
185        invert, dir_only, literal, rule = parsed
186
187        if invert:
188            # For exclude rules, we attach the rules to all preceeding patterns, so
189            # that we can match patterns out of order and check if they were later
190            # overriden by an exclude rule
191            assert not literal
192            if MYPY:
193                rule = cast(Tuple[bool, Pattern[bytes]], rule)
194            if not dir_only:
195                rules_iter = itertools.chain(
196                    itertools.chain(*(item.items() for item in self.literals_dir.values())),
197                    itertools.chain(*(item.items() for item in self.literals_file.values())),
198                    self.patterns_dir,
199                    self.patterns_file)  # type: Iterable[Tuple[Any, List[Tuple[bool, Pattern[bytes]]]]]
200            else:
201                rules_iter = itertools.chain(
202                    itertools.chain(*(item.items() for item in self.literals_dir.values())),
203                    self.patterns_dir)
204
205            for rules in rules_iter:
206                rules[1].append(rule)
207        else:
208            if literal:
209                if MYPY:
210                    rule = cast(Tuple[bytes, ...], rule)
211                if len(rule) == 1:
212                    dir_name, pattern = None, rule[0]  # type: Tuple[Optional[bytes], bytes]
213                else:
214                    dir_name, pattern = rule
215                self.literals_dir[dir_name][pattern] = []
216                if not dir_only:
217                    self.literals_file[dir_name][pattern] = []
218            else:
219                if MYPY:
220                    rule = cast(Tuple[bool, Pattern[bytes]], rule)
221                self.patterns_dir.append((rule, []))
222                if not dir_only:
223                    self.patterns_file.append((rule, []))
224
225    def filter(self,
226               iterator  # type: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
227               ):
228        # type: (...) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
229        empty = {}  # type: Dict[Any, Any]
230        for dirpath, dirnames, filenames in iterator:
231            orig_dirpath = dirpath
232            path_sep = os.path.sep.encode()
233            if path_sep != b"/":
234                dirpath = dirpath.replace(path_sep, b"/")
235
236            keep_dirs = []  # type: List[Tuple[bytes, T]]
237            keep_files = []  # type: List[Tuple[bytes, T]]
238
239            for iter_items, literals, patterns, target, suffix in [
240                    (dirnames, self.literals_dir, self.patterns_dir, keep_dirs, b"/"),
241                    (filenames, self.literals_file, self.patterns_file, keep_files, b"")]:
242                for item in iter_items:
243                    name = item[0]
244                    if dirpath:
245                        path = b"%s/%s" % (dirpath, name) + suffix
246                    else:
247                        path = name + suffix
248                    if path in self.cache:
249                        if not self.cache[path]:
250                            target.append(item)
251                        continue
252                    for rule_dir in [None, dirpath if dirpath != b"." else b""]:
253                        if name in literals.get(rule_dir, empty):
254                            exclude = literals[rule_dir][name]
255                            if not any(rule.match(name if name_only else path)
256                                       for name_only, rule in exclude):
257                                # Skip this item
258                                self.cache[path] = True
259                                break
260                    else:
261                        for (component_only, pattern), exclude in patterns:
262                            if component_only:
263                                match = pattern.match(name)
264                            else:
265                                match = pattern.match(path)
266                            if match:
267                                if not any(rule.match(name if name_only else path)
268                                           for name_only, rule in exclude):
269                                    # Skip this item
270                                    self.cache[path] = True
271                                    break
272                        else:
273                            self.cache[path] = False
274                            target.append(item)
275
276            dirnames[:] = keep_dirs
277            assert not any(b".git" == name for name, _ in dirnames)
278            yield orig_dirpath, dirnames, keep_files
279
280    def __call__(self,
281                 iterator  # type: Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
282                 ):
283        # type: (...) -> Iterable[Tuple[bytes, List[Tuple[bytes, T]], List[Tuple[bytes, T]]]]
284        if self.trivial:
285            return iterator
286
287        return self.filter(iterator)
288
289
290def has_ignore(dirpath):
291    # type: (bytes) -> bool
292    return os.path.exists(os.path.join(dirpath, b".gitignore"))
293