1"""Filename matching with shell patterns.
2
3fnmatch(FILENAME, PATTERN) matches according to the local convention.
4fnmatchcase(FILENAME, PATTERN) always takes case in account.
5
6The functions operate by translating the pattern into a regular
7expression.  They cache the compiled regular expressions for speed.
8
9The function translate(PATTERN) returns a regular expression
10corresponding to PATTERN.  (It does not compile it.)
11"""
12import os
13import posixpath
14import re
15import functools
16
17__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
18
19# Build a thread-safe incrementing counter to help create unique regexp group
20# names across calls.
21from itertools import count
22_nextgroupnum = count().__next__
23del count
24
25def fnmatch(name, pat):
26    """Test whether FILENAME matches PATTERN.
27
28    Patterns are Unix shell style:
29
30    *       matches everything
31    ?       matches any single character
32    [seq]   matches any character in seq
33    [!seq]  matches any char not in seq
34
35    An initial period in FILENAME is not special.
36    Both FILENAME and PATTERN are first case-normalized
37    if the operating system requires it.
38    If you don't want this, use fnmatchcase(FILENAME, PATTERN).
39    """
40    name = os.path.normcase(name)
41    pat = os.path.normcase(pat)
42    return fnmatchcase(name, pat)
43
44@functools.lru_cache(maxsize=256, typed=True)
45def _compile_pattern(pat):
46    if isinstance(pat, bytes):
47        pat_str = str(pat, 'ISO-8859-1')
48        res_str = translate(pat_str)
49        res = bytes(res_str, 'ISO-8859-1')
50    else:
51        res = translate(pat)
52    return re.compile(res).match
53
54def filter(names, pat):
55    """Construct a list from those elements of the iterable NAMES that match PAT."""
56    result = []
57    pat = os.path.normcase(pat)
58    match = _compile_pattern(pat)
59    if os.path is posixpath:
60        # normcase on posix is NOP. Optimize it away from the loop.
61        for name in names:
62            if match(name):
63                result.append(name)
64    else:
65        for name in names:
66            if match(os.path.normcase(name)):
67                result.append(name)
68    return result
69
70def fnmatchcase(name, pat):
71    """Test whether FILENAME matches PATTERN, including case.
72
73    This is a version of fnmatch() which doesn't case-normalize
74    its arguments.
75    """
76    match = _compile_pattern(pat)
77    return match(name) is not None
78
79
80def translate(pat):
81    """Translate a shell PATTERN to a regular expression.
82
83    There is no way to quote meta-characters.
84    """
85
86    STAR = object()
87    res = []
88    add = res.append
89    i, n = 0, len(pat)
90    while i < n:
91        c = pat[i]
92        i = i+1
93        if c == '*':
94            # compress consecutive `*` into one
95            if (not res) or res[-1] is not STAR:
96                add(STAR)
97        elif c == '?':
98            add('.')
99        elif c == '[':
100            j = i
101            if j < n and pat[j] == '!':
102                j = j+1
103            if j < n and pat[j] == ']':
104                j = j+1
105            while j < n and pat[j] != ']':
106                j = j+1
107            if j >= n:
108                add('\\[')
109            else:
110                stuff = pat[i:j]
111                if '--' not in stuff:
112                    stuff = stuff.replace('\\', r'\\')
113                else:
114                    chunks = []
115                    k = i+2 if pat[i] == '!' else i+1
116                    while True:
117                        k = pat.find('-', k, j)
118                        if k < 0:
119                            break
120                        chunks.append(pat[i:k])
121                        i = k+1
122                        k = k+3
123                    chunks.append(pat[i:j])
124                    # Escape backslashes and hyphens for set difference (--).
125                    # Hyphens that create ranges shouldn't be escaped.
126                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
127                                     for s in chunks)
128                # Escape set operations (&&, ~~ and ||).
129                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
130                i = j+1
131                if stuff[0] == '!':
132                    stuff = '^' + stuff[1:]
133                elif stuff[0] in ('^', '['):
134                    stuff = '\\' + stuff
135                add(f'[{stuff}]')
136        else:
137            add(re.escape(c))
138    assert i == n
139
140    # Deal with STARs.
141    inp = res
142    res = []
143    add = res.append
144    i, n = 0, len(inp)
145    # Fixed pieces at the start?
146    while i < n and inp[i] is not STAR:
147        add(inp[i])
148        i += 1
149    # Now deal with STAR fixed STAR fixed ...
150    # For an interior `STAR fixed` pairing, we want to do a minimal
151    # .*? match followed by `fixed`, with no possibility of backtracking.
152    # We can't spell that directly, but can trick it into working by matching
153    #    .*?fixed
154    # in a lookahead assertion, save the matched part in a group, then
155    # consume that group via a backreference. If the overall match fails,
156    # the lookahead assertion won't try alternatives. So the translation is:
157    #     (?=(?P<name>.*?fixed))(?P=name)
158    # Group names are created as needed: g0, g1, g2, ...
159    # The numbers are obtained from _nextgroupnum() to ensure they're unique
160    # across calls and across threads. This is because people rely on the
161    # undocumented ability to join multiple translate() results together via
162    # "|" to build large regexps matching "one of many" shell patterns.
163    while i < n:
164        assert inp[i] is STAR
165        i += 1
166        if i == n:
167            add(".*")
168            break
169        assert inp[i] is not STAR
170        fixed = []
171        while i < n and inp[i] is not STAR:
172            fixed.append(inp[i])
173            i += 1
174        fixed = "".join(fixed)
175        if i == n:
176            add(".*")
177            add(fixed)
178        else:
179            groupnum = _nextgroupnum()
180            add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
181    assert i == n
182    res = "".join(res)
183    return fr'(?s:{res})\Z'
184