1"""
2Wild Card Match.
3
4A custom implementation of `fnmatch`.
5
6Licensed under MIT
7Copyright (c) 2018 - 2020 Isaac Muse <isaacmuse@gmail.com>
8
9Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
10documentation files (the "Software"), to deal in the Software without restriction, including without limitation
11the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
12and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
13
14The above copyright notice and this permission notice shall be included in all copies or substantial portions
15of the Software.
16
17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
18TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
20CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21IN THE SOFTWARE.
22"""
23import re
24import functools
25import bracex
26import os
27from . import util
28from . import posix
29from . _wcmatch import WcRegexp
30from typing import List, Tuple, AnyStr, Iterable, Pattern, Generic, Optional, Set, Sequence, Union, cast
31
32UNICODE_RANGE = '\u0000-\U0010ffff'
33ASCII_RANGE = '\x00-\xff'
34
35PATTERN_LIMIT = 1000
36
37RE_WIN_DRIVE_START = re.compile(r'((?:\\\\|/){2}((?:\\[^\\/]|[^\\/])+)|([\\]?[a-z][\\]?:))((?:\\\\|/)|$)', re.I)
38RE_WIN_DRIVE_LETTER = re.compile(r'([a-z]:)((?:\\|/)|$)', re.I)
39RE_WIN_DRIVE_PART = re.compile(r'((?:\\[^\\/]|[^\\/])+)((?:\\\\|/)|$)', re.I)
40RE_WIN_DRIVE_UNESCAPE = re.compile(r'\\(.)', re.I)
41
42RE_WIN_DRIVE = (
43    re.compile(
44        r'''(?x)
45        (
46            (?:\\\\|/){2}[?.](?:\\\\|/)(?:
47                [a-z]:|
48                unc(?:(?:\\\\|/)[^\\/]+){2} |
49                (?:global(?:\\\\|/))+(?:[a-z]:|unc(?:(?:\\\\|/)[^\\/]+){2}|[^\\/]+)
50            ) |
51            (?:\\\\|/){2}[^\\/]+(?:\\\\|/)[^\\/]+|
52            [a-z]:
53        )((?:\\\\|/){1}|$)
54        ''',
55        re.I
56    ),
57    re.compile(
58        br'''(?x)
59        (
60            (?:\\\\|/){2}[?.](?:\\\\|/)(?:
61                [a-z]:|
62                unc(?:(?:\\\\|/)[^\\/]+){2} |
63                (?:global(?:\\\\|/))+(?:[a-z]:|unc(?:(?:\\\\|/)[^\\/]+){2}|[^\\/]+)
64            ) |
65            (?:\\\\|/){2}[^\\/]+(?:\\\\|/)[^\\/]+|
66            [a-z]:
67        )((?:\\\\|/){1}|$)
68        ''',
69        re.I
70    )
71)
72
73RE_MAGIC_ESCAPE = (
74    re.compile(r'([-!~*?()\[\]|{}]|(?<!\\)(?:(?:[\\]{2})*)\\(?!\\))'),
75    re.compile(br'([-!~*?()\[\]|{}]|(?<!\\)(?:(?:[\\]{2})*)\\(?!\\))')
76)
77
78MAGIC_DEF = (
79    frozenset("*?[]\\"),
80    frozenset(b"*?[]\\")
81)
82MAGIC_SPLIT = (
83    frozenset("|"),
84    frozenset(b"|")
85)
86MAGIC_NEGATE = (
87    frozenset('!'),
88    frozenset(b'!')
89)
90MAGIC_MINUS_NEGATE = (
91    frozenset('-'),
92    frozenset(b'-')
93)
94MAGIC_TILDE = (
95    frozenset('~'),
96    frozenset(b'~')
97)
98MAGIC_EXTMATCH = (
99    frozenset('()'),
100    frozenset(b'()')
101)
102MAGIC_BRACE = (
103    frozenset("{}"),
104    frozenset(b"{}")
105)
106
107RE_MAGIC = (
108    re.compile(r'([-!~*?(\[|{\\])'),
109    re.compile(br'([-!~*?(\[|{\\])')
110)
111RE_WIN_DRIVE_MAGIC = (
112    re.compile(r'([{}|]|(?<!\\)(?:(?:[\\]{2})*)\\(?!\\))'),
113    re.compile(br'([{}|]|(?<!\\)(?:(?:[\\]{2})*)\\(?!\\))')
114)
115RE_NO_DIR = (
116    re.compile(r'^(?:.*?(?:/\.{1,2}/*|/)|\.{1,2}/*)$'),
117    re.compile(br'^(?:.*?(?:/\.{1,2}/*|/)|\.{1,2}/*)$')
118)
119RE_WIN_NO_DIR = (
120    re.compile(r'^(?:.*?(?:[\\/]\.{1,2}[\\/]*|[\\/])|\.{1,2}[\\/]*)$'),
121    re.compile(br'^(?:.*?(?:[\\/]\.{1,2}[\\/]*|[\\/])|\.{1,2}[\\/]*)$')
122)
123RE_TILDE = (
124    re.compile(r'~[^/]*(?=/|$)'),
125    re.compile(br'~[^/]*(?=/|$)')
126)
127RE_WIN_TILDE = (
128    re.compile(r'~(?:\\(?![\\/])|[^\\/])*(?=\\\\|/|$)'),
129    re.compile(br'~(?:\\(?![\\/])|[^\\/])*(?=\\\\|/|$)')
130)
131
132TILDE_SYM = (
133    '~',
134    b'~'
135)
136
137RE_ANCHOR = re.compile(r'^/+')
138RE_WIN_ANCHOR = re.compile(r'^(?:\\\\|/)+')
139RE_POSIX = re.compile(r':(alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print|punct|space|upper|word|xdigit):\]')
140
141SET_OPERATORS = frozenset(('&', '~', '|'))
142NEGATIVE_SYM = frozenset((b'!', '!'))
143MINUS_NEGATIVE_SYM = frozenset((b'-', '-'))
144ROUND_BRACKET = frozenset((b'(', '('))
145EXT_TYPES = frozenset(('*', '?', '+', '@', '!'))
146
147# Common flags are found between `0x0001 - 0xffffff`
148# Implementation specific (`glob` vs `fnmatch` vs `wcmatch`) are found between `0x01000000 - 0xff000000`
149# Internal special flags are found at `0x100000000` and above
150CASE = 0x0001
151IGNORECASE = 0x0002
152RAWCHARS = 0x0004
153NEGATE = 0x0008
154MINUSNEGATE = 0x0010
155PATHNAME = 0x0020
156DOTMATCH = 0x0040
157EXTMATCH = 0x0080
158GLOBSTAR = 0x0100
159BRACE = 0x0200
160REALPATH = 0x0400
161FOLLOW = 0x0800
162SPLIT = 0x1000
163MATCHBASE = 0x2000
164NODIR = 0x4000
165NEGATEALL = 0x8000
166FORCEWIN = 0x10000
167FORCEUNIX = 0x20000
168GLOBTILDE = 0x40000
169NOUNIQUE = 0x80000
170NODOTDIR = 0x100000
171
172# Internal flag
173_TRANSLATE = 0x100000000  # Lets us know we are performing a translation, and we just want the regex.
174_ANCHOR = 0x200000000  # The pattern, if it starts with a slash, is anchored to the working directory; strip the slash.
175_EXTMATCHBASE = 0x400000000  # Like `MATCHBASE`, but works for multiple directory levels.
176_NOABSOLUTE = 0x800000000  # Do not allow absolute patterns
177_RTL = 0x1000000000  # Match from right to left
178
179FLAG_MASK = (
180    CASE |
181    IGNORECASE |
182    RAWCHARS |
183    NEGATE |
184    MINUSNEGATE |
185    PATHNAME |
186    DOTMATCH |
187    EXTMATCH |
188    GLOBSTAR |
189    BRACE |
190    REALPATH |
191    FOLLOW |
192    MATCHBASE |
193    NODIR |
194    NEGATEALL |
195    FORCEWIN |
196    FORCEUNIX |
197    GLOBTILDE |
198    SPLIT |
199    NOUNIQUE |
200    NODOTDIR |
201    _TRANSLATE |
202    _ANCHOR |
203    _EXTMATCHBASE |
204    _RTL |
205    _NOABSOLUTE
206)
207CASE_FLAGS = IGNORECASE | CASE
208
209# Pieces to construct search path
210
211# Question Mark
212_QMARK = r'.'
213# Star
214_STAR = r'.*?'
215# For paths, allow trailing /
216_PATH_TRAIL = r'{}*?'
217# Disallow . and .. (usually applied right after path separator when needed)
218_NO_DIR = r'(?!(?:\.{{1,2}})(?:$|[{sep}]))'
219# Star for `PATHNAME`
220_PATH_STAR = r'[^{sep}]*?'
221# Star when at start of filename during `DOTMATCH`
222# (allow dot, but don't allow directory match /./ or /../)
223_PATH_STAR_DOTMATCH = _NO_DIR + _PATH_STAR
224# Star for `PATHNAME` when `DOTMATCH` is disabled and start is at start of file.
225# Disallow . and .. and don't allow match to start with a dot.
226_PATH_STAR_NO_DOTMATCH = _NO_DIR + r'(?:(?!\.){})?'.format(_PATH_STAR)
227# `GLOBSTAR` during `DOTMATCH`. Avoid directory match /./ or /../
228_PATH_GSTAR_DOTMATCH = r'(?:(?!(?:[{sep}]|^)(?:\.{{1,2}})($|[{sep}])).)*?'
229# `GLOBSTAR` with `DOTMATCH` disabled. Don't allow a dot to follow /
230_PATH_GSTAR_NO_DOTMATCH = r'(?:(?!(?:[{sep}]|^)\.).)*?'
231# Special right to left matching
232_PATH_GSTAR_RTL_MATCH = r'.*?'
233# Next char cannot be a dot
234_NO_DOT = r'(?![.])'
235# Following char from sequence cannot be a separator or a dot
236_PATH_NO_SLASH_DOT = r'(?![{sep}.])'
237# Following char from sequence cannot be a separator
238_PATH_NO_SLASH = r'(?![{sep}])'
239# One or more
240_ONE_OR_MORE = r'+'
241# End of pattern
242_EOP = r'$'
243_PATH_EOP = r'(?:$|[{sep}])'
244# Divider between `globstar`. Can match start or end of pattern
245# in addition to slashes.
246_GLOBSTAR_DIV = r'(?:^|$|{})+'
247# Lookahead to see there is one character.
248_NEED_CHAR_PATH = r'(?=[^{sep}])'
249_NEED_CHAR = r'(?=.)'
250_NEED_SEP = r'(?={})'
251# Group that matches one or none
252_QMARK_GROUP = r'(?:{})?'
253_QMARK_CAPTURE_GROUP = r'((?#)(?:{})?)'
254# Group that matches Zero or more
255_STAR_GROUP = r'(?:{})*'
256_STAR_CAPTURE_GROUP = r'((?#)(?:{})*)'
257# Group that matches one or more
258_PLUS_GROUP = r'(?:{})+'
259_PLUS_CAPTURE_GROUP = r'((?#)(?:{})+)'
260# Group that matches exactly one
261_GROUP = r'(?:{})'
262_CAPTURE_GROUP = r'((?#){})'
263# Inverse group that matches none
264# This is the start. Since Python can't
265# do variable look behinds, we have stuff
266# everything at the end that it needs to lookahead
267# for. So there is an opening and a closing.
268_EXCLA_GROUP = r'(?:(?!(?:{})'
269_EXCLA_CAPTURE_GROUP = r'((?#)(?!(?:{})'
270# Closing for inverse group
271_EXCLA_GROUP_CLOSE = r'){})'
272# Restrict root
273_NO_ROOT = r'(?!/)'
274_NO_WIN_ROOT = r'(?!(?:[\\/]|[a-zA-Z]:))'
275# Restrict directories
276_NO_NIX_DIR = (
277    r'^(?:.*?(?:/\.{1,2}/*|/)|\.{1,2}/*)$',
278    rb'^(?:.*?(?:/\.{1,2}/*|/)|\.{1,2}/*)$'
279)
280_NO_WIN_DIR = (
281    r'^(?:.*?(?:[\\/]\.{1,2}[\\/]*|[\\/])|\.{1,2}[\\/]*)$',
282    rb'^(?:.*?(?:[\\/]\.{1,2}[\\/]*|[\\/])|\.{1,2}[\\/]*)$'
283)
284
285
286class InvPlaceholder(str):
287    """Placeholder for inverse pattern !(...)."""
288
289
290class PathNameException(Exception):
291    """Path name exception."""
292
293
294class DotException(Exception):
295    """Dot exception."""
296
297
298class PatternLimitException(Exception):
299    """Pattern limit exception."""
300
301
302def to_str_sequence(patterns: Union[str, bytes, Sequence[AnyStr]]) -> Sequence[AnyStr]:
303    """Return a simple string sequence."""
304
305    if isinstance(patterns, (str, bytes)):
306        return cast(Sequence[AnyStr], [patterns])
307    else:
308        return patterns
309
310
311def escape(pattern: AnyStr, unix: Optional[bool] = None, pathname: bool = True, raw: bool = False) -> AnyStr:
312    """
313    Escape.
314
315    `unix`: use Unix style path logic.
316    `pathname`: Use path logic.
317    `raw`: Handle raw strings (deprecated)
318
319    """
320
321    if isinstance(pattern, bytes):
322        drive_pat = cast(Pattern[AnyStr], RE_WIN_DRIVE[util.BYTES])
323        magic = cast(Pattern[AnyStr], RE_MAGIC_ESCAPE[util.BYTES])
324        drive_magic = cast(Pattern[AnyStr], RE_WIN_DRIVE_MAGIC[util.BYTES])
325        replace = br'\\\1'
326        slash = b'\\'
327        double_slash = b'\\\\'
328        drive = b''
329    else:
330        drive_pat = cast(Pattern[AnyStr], RE_WIN_DRIVE[util.UNICODE])
331        magic = cast(Pattern[AnyStr], RE_MAGIC_ESCAPE[util.UNICODE])
332        drive_magic = cast(Pattern[AnyStr], RE_WIN_DRIVE_MAGIC[util.UNICODE])
333        replace = r'\\\1'
334        slash = '\\'
335        double_slash = '\\\\'
336        drive = ''
337
338    if not raw:
339        pattern = pattern.replace(slash, double_slash)
340
341    # Handle windows drives special.
342    # Windows drives are handled special internally.
343    # So we shouldn't escape them as we'll just have to
344    # detect and undo it later.
345    length = 0
346    if pathname and ((unix is None and util.platform() == "windows") or unix is False):
347        m = drive_pat.match(pattern)
348        if m:
349            # Replace splitting magic chars
350            drive = m.group(0)
351            length = len(drive)
352            drive = drive_magic.sub(replace, m.group(0))
353    pattern = pattern[length:]
354
355    return drive + magic.sub(replace, pattern)
356
357
358def _get_win_drive(
359    pattern: str,
360    regex: bool = False,
361    case_sensitive: bool = False
362) -> Tuple[bool, Optional[str], bool, int]:
363    """Get Windows drive."""
364
365    drive = None
366    slash = False
367    end = 0
368    root_specified = False
369    m = RE_WIN_DRIVE_START.match(pattern)
370    if m:
371        end = m.end(0)
372        if m.group(3) and RE_WIN_DRIVE_LETTER.match(m.group(0)):
373            if regex:
374                drive = escape_drive(RE_WIN_DRIVE_UNESCAPE.sub(r'\1', m.group(3)), case_sensitive)
375            else:
376                drive = RE_WIN_DRIVE_UNESCAPE.sub(r'\1', m.group(0))
377            slash = bool(m.group(4))
378            root_specified = True
379        elif m.group(2):
380            root_specified = True
381            part = [RE_WIN_DRIVE_UNESCAPE.sub(r'\1', m.group(2))]
382            is_special = part[-1].lower() in ('.', '?')
383            complete = 1
384            first = 1
385            count = 0
386            for count, m in enumerate(RE_WIN_DRIVE_PART.finditer(pattern, m.end(0)), 1):
387                end = m.end(0)
388                part.append(RE_WIN_DRIVE_UNESCAPE.sub(r'\1', m.group(1)))
389                slash = bool(m.group(2))
390                if is_special:
391                    if count == first and part[-1].lower() == 'unc':
392                        complete += 2
393                    elif count == first and part[-1].lower() == 'global':
394                        first += 1
395                        complete += 1
396                if count == complete:
397                    break
398            if count == complete:
399                if not regex:
400                    drive = '\\\\{}{}'.format('\\'.join(part), '\\' if slash else '')
401                else:
402                    drive = r'[\\/]{2}' + r'[\\/]'.join([escape_drive(p, case_sensitive) for p in part])
403    elif pattern.startswith(('\\\\', '/')):
404        root_specified = True
405
406    return root_specified, drive, slash, end
407
408
409def _get_magic_symbols(pattern: AnyStr, unix: bool, flags: int) -> Tuple[Set[AnyStr], Set[AnyStr]]:
410    """Get magic symbols."""
411
412    if isinstance(pattern, bytes):
413        ptype = util.BYTES
414        slash = b'\\'  # type: AnyStr
415    else:
416        ptype = util.UNICODE
417        slash = '\\'
418
419    magic = set()  # type: Set[AnyStr]
420    if unix:
421        magic_drive = set()  # type: Set[AnyStr]
422    else:
423        magic_drive = set([slash])
424
425    magic |= cast(Set[AnyStr], MAGIC_DEF[ptype])
426    if flags & BRACE:
427        magic |= cast(Set[AnyStr], MAGIC_BRACE[ptype])
428        magic_drive |= cast(Set[AnyStr], MAGIC_BRACE[ptype])
429    if flags & SPLIT:
430        magic |= cast(Set[AnyStr], MAGIC_SPLIT[ptype])
431        magic_drive |= cast(Set[AnyStr], MAGIC_SPLIT[ptype])
432    if flags & GLOBTILDE:
433        magic |= cast(Set[AnyStr], MAGIC_TILDE[ptype])
434    if flags & EXTMATCH:
435        magic |= cast(Set[AnyStr], MAGIC_EXTMATCH[ptype])
436    if flags & NEGATE:
437        if flags & MINUSNEGATE:
438            magic |= cast(Set[AnyStr], MAGIC_MINUS_NEGATE[ptype])
439        else:
440            magic |= cast(Set[AnyStr], MAGIC_NEGATE[ptype])
441
442    return magic, magic_drive
443
444
445def is_magic(pattern: AnyStr, flags: int = 0) -> bool:
446    """Check if pattern is magic."""
447
448    magical = False
449    unix = is_unix_style(flags)
450
451    if isinstance(pattern, bytes):
452        ptype = util.BYTES
453    else:
454        ptype = util.UNICODE
455
456    drive_pat = cast(Pattern[AnyStr], RE_WIN_DRIVE[ptype])
457
458    magic, magic_drive = _get_magic_symbols(pattern, unix, flags)
459    is_path = flags & PATHNAME
460
461    length = 0
462    if is_path and ((unix is None and util.platform() == "windows") or unix is False):
463        m = drive_pat.match(pattern)
464        if m:
465            drive = m.group(0)
466            length = len(drive)
467            for c in magic_drive:
468                if c in drive:
469                    magical = True
470                    break
471
472    if not magical:
473        pattern = pattern[length:]
474        for c in magic:
475            if c in pattern:
476                magical = True
477                break
478
479    return magical
480
481
482def is_negative(pattern: AnyStr, flags: int) -> bool:
483    """Check if negative pattern."""
484
485    if flags & MINUSNEGATE:
486        return bool(flags & NEGATE and pattern[0:1] in MINUS_NEGATIVE_SYM)
487    elif flags & EXTMATCH:
488        return bool(flags & NEGATE and pattern[0:1] in NEGATIVE_SYM and pattern[1:2] not in ROUND_BRACKET)
489    else:
490        return bool(flags & NEGATE and pattern[0:1] in NEGATIVE_SYM)
491
492
493def tilde_pos(pattern: AnyStr, flags: int) -> int:
494    """Is user folder."""
495
496    pos = -1
497    if flags & GLOBTILDE and flags & REALPATH:
498        if flags & NEGATE:
499            if pattern[0:1] in TILDE_SYM:
500                pos = 0
501            elif pattern[0:1] in NEGATIVE_SYM and pattern[1:2] in TILDE_SYM:
502                pos = 1
503        elif pattern[0:1] in TILDE_SYM:
504            pos = 0
505    return pos
506
507
508def expand_braces(patterns: AnyStr, flags: int, limit: int) -> Iterable[AnyStr]:
509    """Expand braces."""
510
511    if flags & BRACE:
512        for p in ([patterns] if isinstance(patterns, (str, bytes)) else patterns):
513            try:
514                # Turn off limit as we are handling it ourselves.
515                yield from bracex.iexpand(p, keep_escapes=True, limit=limit)
516            except bracex.ExpansionLimitException:
517                raise
518            except Exception:  # pragma: no cover
519                # We will probably never hit this as `bracex`
520                # doesn't throw any specific exceptions and
521                # should normally always parse, but just in case.
522                yield p
523    else:
524        for p in ([patterns] if isinstance(patterns, (str, bytes)) else patterns):
525            yield p
526
527
528def expand_tilde(pattern: AnyStr, is_unix: bool, flags: int) -> AnyStr:
529    """Expand tilde."""
530
531    pos = tilde_pos(pattern, flags)
532
533    if pos > -1:
534        string_type = util.BYTES if isinstance(pattern, bytes) else util.UNICODE
535        tilde = cast(AnyStr, TILDE_SYM[string_type])
536        re_tilde = cast(Pattern[AnyStr], RE_WIN_TILDE[string_type] if not is_unix else RE_TILDE[string_type])
537        m = re_tilde.match(pattern, pos)
538        if m:
539            expanded = os.path.expanduser(m.group(0))
540            if not expanded.startswith(tilde) and os.path.exists(expanded):
541                pattern = (pattern[0:1] if pos else pattern[0:0]) + escape(expanded, is_unix) + pattern[m.end(0):]
542    return pattern
543
544
545def expand(pattern: AnyStr, flags: int, limit: int) -> Iterable[AnyStr]:
546    """Expand and normalize."""
547
548    for expanded in expand_braces(pattern, flags, limit):
549        for splitted in split(expanded, flags):
550            yield expand_tilde(splitted, is_unix_style(flags), flags)
551
552
553def is_case_sensitive(flags: int) -> bool:
554    """Is case sensitive."""
555
556    if bool(flags & FORCEWIN):
557        case_sensitive = False
558    elif bool(flags & FORCEUNIX):
559        case_sensitive = True
560    else:
561        case_sensitive = util.is_case_sensitive()
562    return case_sensitive
563
564
565def get_case(flags: int) -> bool:
566    """Parse flags for case sensitivity settings."""
567
568    if not bool(flags & CASE_FLAGS):
569        case_sensitive = is_case_sensitive(flags)
570    elif flags & CASE:
571        case_sensitive = True
572    else:
573        case_sensitive = False
574    return case_sensitive
575
576
577def escape_drive(drive: str, case: bool) -> str:
578    """Escape drive."""
579
580    return '(?i:{})'.format(re.escape(drive)) if case else re.escape(drive)
581
582
583def is_unix_style(flags: int) -> bool:
584    """Check if we should use Unix style."""
585
586    return (
587        (
588            (util.platform() != "windows") or
589            (not bool(flags & REALPATH) and bool(flags & FORCEUNIX))
590        ) and
591        not flags & FORCEWIN
592    )
593
594
595def translate(
596    patterns: Sequence[AnyStr],
597    flags: int,
598    limit: int = PATTERN_LIMIT
599) -> Tuple[List[AnyStr], List[AnyStr]]:
600    """Translate patterns."""
601
602    positive = []  # type: List[AnyStr]
603    negative = []  # type: List[AnyStr]
604
605    flags = (flags | _TRANSLATE) & FLAG_MASK
606    is_unix = is_unix_style(flags)
607    seen = set()
608
609    try:
610        current_limit = limit
611        total = 0
612        for pattern in patterns:
613            pattern = util.norm_pattern(pattern, not is_unix, bool(flags & RAWCHARS))
614            count = 0
615            for count, expanded in enumerate(expand(pattern, flags, current_limit), 1):
616                total += 1
617                if 0 < limit < total:
618                    raise PatternLimitException("Pattern limit exceeded the limit of {:d}".format(limit))
619                if expanded not in seen:
620                    seen.add(expanded)
621                    (negative if is_negative(expanded, flags) else positive).append(WcParse(expanded, flags).parse())
622            if limit:
623                current_limit -= count
624                if current_limit < 1:
625                    current_limit = 1
626    except bracex.ExpansionLimitException:
627        raise PatternLimitException("Pattern limit exceeded the limit of {:d}".format(limit))
628
629    if patterns is not None and negative and not positive:
630        if flags & NEGATEALL:
631            default = b'**' if isinstance(patterns[0], bytes) else '**'
632            positive.append(
633                WcParse(default, flags | (GLOBSTAR if flags & PATHNAME else 0)).parse()
634            )
635
636    if patterns and flags & NODIR:
637        index = util.BYTES if isinstance(patterns[0], bytes) else util.UNICODE
638        exclude = cast(AnyStr, _NO_NIX_DIR[index] if is_unix else _NO_WIN_DIR[index])
639        negative.append(exclude)
640
641    return positive, negative
642
643
644def split(pattern: AnyStr, flags: int) -> Iterable[AnyStr]:
645    """Split patterns."""
646
647    if flags & SPLIT:
648        yield from WcSplit(pattern, flags).split()
649    else:
650        yield pattern
651
652
653def compile(  # noqa: A001
654    patterns: Sequence[AnyStr],
655    flags: int,
656    limit: int = PATTERN_LIMIT
657) -> WcRegexp[AnyStr]:
658    """Compile patterns."""
659
660    positive = []  # type: List[Pattern[AnyStr]]
661    negative = []  # type: List[Pattern[AnyStr]]
662
663    is_unix = is_unix_style(flags)
664    seen = set()
665
666    try:
667        current_limit = limit
668        total = 0
669        for pattern in patterns:
670            pattern = util.norm_pattern(pattern, not is_unix, bool(flags & RAWCHARS))
671            count = 0
672            for count, expanded in enumerate(expand(pattern, flags, current_limit), 1):
673                total += 1
674                if 0 < limit < total:
675                    raise PatternLimitException("Pattern limit exceeded the limit of {:d}".format(limit))
676                if expanded not in seen:
677                    seen.add(expanded)
678                    (negative if is_negative(expanded, flags) else positive).append(_compile(expanded, flags))
679            if limit:
680                current_limit -= count
681                if current_limit < 1:
682                    current_limit = 1
683    except bracex.ExpansionLimitException:
684        raise PatternLimitException("Pattern limit exceeded the limit of {:d}".format(limit))
685
686    if patterns is not None and negative and not positive:
687        if flags & NEGATEALL:
688            default = b'**' if isinstance(patterns[0], bytes) else '**'
689            positive.append(_compile(default, flags | (GLOBSTAR if flags & PATHNAME else 0)))
690
691    if patterns is not None and flags & NODIR:
692        ptype = util.BYTES if isinstance(patterns[0], bytes) else util.UNICODE
693        negative.append(cast(Pattern[AnyStr], RE_NO_DIR[ptype] if is_unix else RE_WIN_NO_DIR[ptype]))
694
695    return WcRegexp(
696        tuple(positive), tuple(negative),
697        bool(flags & REALPATH), bool(flags & PATHNAME), bool(flags & FOLLOW)
698    )
699
700
701@functools.lru_cache(maxsize=256, typed=True)
702def _compile(pattern: AnyStr, flags: int) -> Pattern[AnyStr]:
703    """Compile the pattern to regex."""
704
705    return re.compile(WcParse(pattern, flags & FLAG_MASK).parse())
706
707
708class WcSplit(Generic[AnyStr]):
709    """Class that splits patterns on |."""
710
711    def __init__(self, pattern: AnyStr, flags: int) -> None:
712        """Initialize."""
713
714        self.pattern = pattern  # type: AnyStr
715        self.pathname = bool(flags & PATHNAME)
716        self.extend = bool(flags & EXTMATCH)
717        self.unix = is_unix_style(flags)
718        self.bslash_abort = not self.unix
719
720    def _sequence(self, i: util.StringIter) -> None:
721        """Handle character group."""
722
723        c = next(i)
724        if c == '!':
725            c = next(i)
726        if c in ('^', '-', '['):
727            c = next(i)
728
729        while c != ']':
730            if c == '\\':
731                # Handle escapes
732                try:
733                    self._references(i, True)
734                except PathNameException:
735                    raise StopIteration
736            elif c == '/':
737                if self.pathname:
738                    raise StopIteration
739            c = next(i)
740
741    def _references(self, i: util.StringIter, sequence: bool = False) -> None:
742        """Handle references."""
743
744        c = next(i)
745        if c == '\\':
746            # \\
747            if sequence and self.bslash_abort:
748                raise PathNameException
749        elif c == '/':
750            # \/
751            if sequence and self.pathname:
752                raise PathNameException
753        else:
754            # \a, \b, \c, etc.
755            pass
756
757    def parse_extend(self, c: str, i: util.StringIter) -> bool:
758        """Parse extended pattern lists."""
759
760        # Start list parsing
761        success = True
762        index = i.index
763        list_type = c
764        try:
765            c = next(i)
766            if c != '(':
767                raise StopIteration
768            while c != ')':
769                c = next(i)
770
771                if self.extend and c in EXT_TYPES and self.parse_extend(c, i):
772                    continue
773
774                if c == '\\':
775                    try:
776                        self._references(i)
777                    except StopIteration:
778                        pass
779                elif c == '[':
780                    index = i.index
781                    try:
782                        self._sequence(i)
783                    except StopIteration:
784                        i.rewind(i.index - index)
785
786        except StopIteration:
787            success = False
788            c = list_type
789            i.rewind(i.index - index)
790
791        return success
792
793    def _split(self, pattern: str) -> Iterable[str]:
794        """Split the pattern."""
795
796        start = -1
797        i = util.StringIter(pattern)
798
799        for c in i:
800            if self.extend and c in EXT_TYPES and self.parse_extend(c, i):
801                continue
802
803            if c == '|':
804                split = i.index - 1
805                p = pattern[start + 1:split]
806                yield p
807                start = split
808            elif c == '\\':
809                index = i.index
810                try:
811                    self._references(i)
812                except StopIteration:
813                    i.rewind(i.index - index)
814            elif c == '[':
815                index = i.index
816                try:
817                    self._sequence(i)
818                except StopIteration:
819                    i.rewind(i.index - index)
820
821        if start < len(pattern):
822            yield pattern[start + 1:]
823
824    def split(self) -> Iterable[AnyStr]:
825        """Split the pattern."""
826
827        if isinstance(self.pattern, bytes):
828            for p in self._split(self.pattern.decode('latin-1')):
829                yield p.encode('latin-1')
830        else:
831            yield from self._split(self.pattern)
832
833
834class WcParse(Generic[AnyStr]):
835    """Parse the wildcard pattern."""
836
837    def __init__(self, pattern: AnyStr, flags: int = 0) -> None:
838        """Initialize."""
839
840        self.pattern = pattern  # type: AnyStr
841        self.no_abs = bool(flags & _NOABSOLUTE)
842        self.braces = bool(flags & BRACE)
843        self.is_bytes = isinstance(pattern, bytes)
844        self.pathname = bool(flags & PATHNAME)
845        self.raw_chars = bool(flags & RAWCHARS)
846        self.globstar = self.pathname and bool(flags & GLOBSTAR)
847        self.realpath = bool(flags & REALPATH) and self.pathname
848        self.translate = bool(flags & _TRANSLATE)
849        self.negate = bool(flags & NEGATE)
850        self.globstar_capture = self.realpath and not self.translate
851        self.dot = bool(flags & DOTMATCH)
852        self.extend = bool(flags & EXTMATCH)
853        self.matchbase = bool(flags & MATCHBASE)
854        self.extmatchbase = bool(flags & _EXTMATCHBASE)
855        self.rtl = bool(flags & _RTL)
856        self.anchor = bool(flags & _ANCHOR)
857        self.nodotdir = bool(flags & NODOTDIR)
858        self.capture = self.translate
859        self.case_sensitive = get_case(flags)
860        self.in_list = False
861        self.inv_nest = False
862        self.flags = flags
863        self.inv_ext = 0
864        self.unix = is_unix_style(self.flags)
865        if not self.unix:
866            self.win_drive_detect = self.pathname
867            self.char_avoid = (ord('\\'), ord('/'), ord('.'))  # type: Tuple[int, ...]
868            self.bslash_abort = self.pathname
869            sep = {"sep": re.escape('\\/')}
870        else:
871            self.win_drive_detect = False
872            self.char_avoid = (ord('/'), ord('.'))
873            self.bslash_abort = False
874            sep = {"sep": re.escape('/')}
875        self.bare_sep = sep['sep']
876        self.sep = '[{}]'.format(self.bare_sep)
877        self.path_eop = _PATH_EOP.format(**sep)
878        self.no_dir = _NO_DIR.format(**sep)
879        self.seq_path = _PATH_NO_SLASH.format(**sep)
880        self.seq_path_dot = _PATH_NO_SLASH_DOT.format(**sep)
881        self.path_star = _PATH_STAR.format(**sep)
882        self.path_star_dot1 = _PATH_STAR_DOTMATCH.format(**sep)
883        self.path_star_dot2 = _PATH_STAR_NO_DOTMATCH.format(**sep)
884        self.path_gstar_dot1 = _PATH_GSTAR_DOTMATCH.format(**sep)
885        self.path_gstar_dot2 = _PATH_GSTAR_NO_DOTMATCH.format(**sep)
886        if self.pathname:
887            self.need_char = _NEED_CHAR_PATH.format(**sep)
888        else:
889            self.need_char = _NEED_CHAR
890
891    def set_after_start(self) -> None:
892        """Set tracker for character after the start of a directory."""
893
894        self.after_start = True
895        self.dir_start = False
896
897    def set_start_dir(self) -> None:
898        """Set directory start."""
899
900        self.dir_start = True
901        self.after_start = False
902
903    def reset_dir_track(self) -> None:
904        """Reset directory tracker."""
905
906        self.dir_start = False
907        self.after_start = False
908
909    def update_dir_state(self) -> None:
910        """
911        Update the directory state.
912
913        If we are at the directory start,
914        update to after start state (the character right after).
915        If at after start, reset state.
916        """
917
918        if self.dir_start and not self.after_start:
919            self.set_after_start()
920        elif not self.dir_start and self.after_start:
921            self.reset_dir_track()
922
923    def _restrict_extended_slash(self) -> str:
924        """Restrict extended slash."""
925
926        return self.seq_path if self.pathname else ''
927
928    def _restrict_sequence(self) -> str:
929        """Restrict sequence."""
930
931        if self.pathname:
932            value = self.seq_path_dot if self.after_start and not self.dot else self.seq_path
933            if self.after_start:
934                value = self.no_dir + value
935        else:
936            value = _NO_DOT if self.after_start and not self.dot else ""
937        self.reset_dir_track()
938
939        return value
940
941    def _sequence_range_check(self, result: List[str], last: str) -> bool:
942        """
943        If range backwards, remove it.
944
945        A bad range will cause the regular expression to fail,
946        so we need to remove it, but return that we removed it
947        so the caller can know the sequence wasn't empty.
948        Caller will have to craft a sequence that makes sense
949        if empty at the end with either an impossible sequence
950        for inclusive sequences or a sequence that matches
951        everything for an exclusive sequence.
952        """
953
954        removed = False
955        first = result[-2]
956        v1 = ord(first[1:2] if len(first) > 1 else first)
957        v2 = ord(last[1:2] if len(last) > 1 else last)
958        if v2 < v1:
959            result.pop()
960            result.pop()
961            removed = True
962        else:
963            result.append(last)
964        return removed
965
966    def _handle_posix(self, i: util.StringIter, result: List[str], end_range: int) -> bool:
967        """Handle posix classes."""
968
969        last_posix = False
970        m = i.match(RE_POSIX)
971        if m:
972            last_posix = True
973            # Cannot do range with posix class
974            # so escape last `-` if we think this
975            # is the end of a range.
976            if end_range and i.index - 1 >= end_range:
977                result[-1] = '\\' + result[-1]
978            result.append(posix.get_posix_property(m.group(1), self.is_bytes))
979        return last_posix
980
981    def _sequence(self, i: util.StringIter) -> str:
982        """Handle character group."""
983
984        result = ['[']
985        end_range = 0
986        escape_hyphen = -1
987        removed = False
988        last_posix = False
989
990        c = next(i)
991        if c in ('!', '^'):
992            # Handle negate char
993            result.append('^')
994            c = next(i)
995        if c == '[':
996            last_posix = self._handle_posix(i, result, 0)
997            if not last_posix:
998                result.append(re.escape(c))
999            c = next(i)
1000        elif c in ('-', ']'):
1001            result.append(re.escape(c))
1002            c = next(i)
1003
1004        while c != ']':
1005            if c == '-':
1006                if last_posix:
1007                    result.append('\\' + c)
1008                    last_posix = False
1009                elif i.index - 1 > escape_hyphen:
1010                    # Found a range delimiter.
1011                    # Mark the next two characters as needing to be escaped if hyphens.
1012                    # The next character would be the end char range (s-e),
1013                    # and the one after that would be the potential start char range
1014                    # of a new range (s-es-e), so neither can be legitimate range delimiters.
1015                    result.append(c)
1016                    escape_hyphen = i.index + 1
1017                    end_range = i.index
1018                elif end_range and i.index - 1 >= end_range:
1019                    if self._sequence_range_check(result, '\\' + c):
1020                        removed = True
1021                    end_range = 0
1022                else:
1023                    result.append('\\' + c)
1024                c = next(i)
1025                continue
1026            last_posix = False
1027
1028            if c == '[':
1029                last_posix = self._handle_posix(i, result, end_range)
1030                if last_posix:
1031                    c = next(i)
1032                    continue
1033
1034            if c == '\\':
1035                # Handle escapes
1036                try:
1037                    value = self._references(i, True)
1038                except DotException:
1039                    value = re.escape(next(i))
1040                except PathNameException:
1041                    raise StopIteration
1042            elif c == '/':
1043                if self.pathname:
1044                    raise StopIteration
1045                value = c
1046            elif c in SET_OPERATORS:
1047                # Escape &, |, and ~ to avoid &&, ||, and ~~
1048                value = '\\' + c
1049            else:
1050                # Anything else
1051                value = c
1052
1053            if end_range and i.index - 1 >= end_range:
1054                if self._sequence_range_check(result, value):
1055                    removed = True
1056                end_range = 0
1057            else:
1058                result.append(value)
1059
1060            c = next(i)
1061
1062        result.append(']')
1063        # Bad range removed.
1064        if removed:
1065            value = "".join(result)
1066            if value == '[]':
1067                # We specified some ranges, but they are all
1068                # out of reach.  Create an impossible sequence to match.
1069                result = ['[^{}]'.format(ASCII_RANGE if self.is_bytes else UNICODE_RANGE)]
1070            elif value == '[^]':
1071                # We specified some range, but hey are all
1072                # out of reach. Since this is exclusive
1073                # that means we can match *anything*.
1074                result = ['[{}]'.format(ASCII_RANGE if self.is_bytes else UNICODE_RANGE)]
1075            else:
1076                result = [value]
1077
1078        if self.pathname or self.after_start:
1079            return self._restrict_sequence() + ''.join(result)
1080
1081        return ''.join(result)
1082
1083    def _references(self, i: util.StringIter, sequence: bool = False) -> str:
1084        """Handle references."""
1085
1086        value = ''
1087        c = next(i)
1088        if c == '\\':
1089            # \\
1090            if sequence and self.bslash_abort:
1091                raise PathNameException
1092            value = r'\\'
1093            if self.bslash_abort:
1094                if not self.in_list:
1095                    value = self.sep + _ONE_OR_MORE
1096                    self.set_start_dir()
1097                else:
1098                    value = self._restrict_extended_slash() + self.sep
1099            elif not self.unix:
1100                value = self.sep if not sequence else self.bare_sep
1101        elif c == '/':
1102            # \/
1103            if sequence and self.pathname:
1104                raise PathNameException
1105            if self.pathname:
1106                if not self.in_list:
1107                    value = self.sep + _ONE_OR_MORE
1108                    self.set_start_dir()
1109                else:
1110                    value = self._restrict_extended_slash() + self.sep
1111            else:
1112                value = self.sep if not sequence else self.bare_sep
1113        elif c == '.':
1114            # Let dots be handled special
1115            i.rewind(1)
1116            raise DotException
1117        else:
1118            # \a, \b, \c, etc.
1119            value = re.escape(c)
1120
1121        return value
1122
1123    def _handle_dot(self, i: util.StringIter, current: List[str]) -> None:
1124        """Handle dot."""
1125
1126        is_current = True
1127        is_previous = False
1128
1129        if self.after_start and self.pathname and self.nodotdir:
1130            try:
1131                index = i.index
1132                while True:
1133                    c = next(i)
1134                    if c == '.' and is_current:
1135                        is_previous = True
1136                        is_current = False
1137                    elif c == '.' and is_previous:
1138                        is_previous = False
1139                        raise StopIteration
1140                    elif c in ('|', ')') and self.in_list:
1141                        raise StopIteration
1142                    elif c == '\\':
1143                        try:
1144                            self._references(i, True)
1145                            # Was not what we expected
1146                            is_current = False
1147                            is_previous = False
1148                            raise StopIteration
1149                        except DotException:
1150                            if is_current:
1151                                is_previous = True
1152                                is_current = False
1153                                c = next(i)
1154                            else:
1155                                is_previous = False
1156                                raise StopIteration
1157                        except PathNameException:
1158                            raise StopIteration
1159                    elif c == '/':
1160                        raise StopIteration
1161                    else:
1162                        is_current = False
1163                        is_previous = False
1164                        raise StopIteration
1165            except StopIteration:
1166                i.rewind(i.index - index)
1167
1168        if not is_current and not is_previous:
1169            current.append(r'(?!\.[.]?{})\.'.format(self.path_eop))
1170        else:
1171            current.append(re.escape('.'))
1172
1173    def _handle_star(self, i: util.StringIter, current: List[str]) -> None:
1174        """Handle star."""
1175
1176        if self.pathname:
1177            if self.after_start and not self.dot:
1178                star = self.path_star_dot2
1179                globstar = self.path_gstar_dot2
1180            elif self.after_start:
1181                star = self.path_star_dot1
1182                globstar = self.path_gstar_dot1
1183            else:
1184                star = self.path_star
1185                globstar = self.path_gstar_dot1
1186            if self.globstar_capture:
1187                globstar = '({})'.format(globstar)
1188        else:
1189            if self.after_start and not self.dot:
1190                star = _NO_DOT + _STAR
1191            else:
1192                star = _STAR
1193            globstar = ''
1194        value = star
1195
1196        if self.after_start and self.globstar and not self.in_list:
1197            skip = False
1198            try:
1199                c = next(i)
1200                if c != '*':
1201                    i.rewind(1)
1202                    raise StopIteration
1203            except StopIteration:
1204                # Could not acquire a second star, so assume single star pattern
1205                skip = True
1206
1207            if not skip:
1208                try:
1209                    index = i.index
1210                    c = next(i)
1211                    if c == '\\':
1212                        try:
1213                            self._references(i, True)
1214                            # Was not what we expected
1215                            # Assume two single stars
1216                        except DotException:
1217                            pass
1218                        except PathNameException:
1219                            # Looks like escape was a valid slash
1220                            # Store pattern accordingly
1221                            value = globstar
1222                            self.matchbase = False
1223                        except StopIteration:
1224                            # Escapes nothing, ignore and assume double star
1225                            value = globstar
1226                    elif c == '/':
1227                        value = globstar
1228                        self.matchbase = False
1229
1230                    if value != globstar:
1231                        i.rewind(i.index - index)
1232                except StopIteration:
1233                    # Could not acquire directory slash due to no more characters
1234                    # Use double star
1235                    value = globstar
1236
1237        if self.after_start and value != globstar:
1238            value = self.need_char + value
1239            # Consume duplicate starts
1240            try:
1241                c = next(i)
1242                while c == '*':
1243                    c = next(i)
1244                i.rewind(1)
1245            except StopIteration:
1246                pass
1247
1248        self.reset_dir_track()
1249        if value == globstar:
1250            sep = _GLOBSTAR_DIV.format(self.sep)
1251            # Check if the last entry was a `globstar`
1252            # If so, don't bother adding another.
1253            if current[-1] != sep:
1254                if current[-1] == '':
1255                    # At the beginning of the pattern
1256                    current[-1] = value
1257                else:
1258                    # Replace the last path separator
1259                    current[-1] = _NEED_SEP.format(self.sep)
1260                    current.append(value)
1261                self.consume_path_sep(i)
1262                current.append(sep)
1263            self.set_start_dir()
1264        else:
1265            current.append(value)
1266
1267    def clean_up_inverse(self, current: List[str], nested: bool = False) -> None:
1268        """
1269        Clean up current.
1270
1271        Python doesn't have variable lookbehinds, so we have to do negative lookaheads.
1272        !(...) when converted to regular expression is atomic, so once it matches, that's it.
1273        So we use the pattern `(?:(?!(?:stuff|to|exclude)<x>))[^/]*?)` where <x> is everything
1274        that comes after the negative group. `!(this|that)other` --> `(?:(?!(?:this|that)other))[^/]*?)`.
1275
1276        We have to update the list before | in nested cases: *(!(...)|stuff). Before we close a parent
1277        `extmatch`: `*(!(...))`. And of course on path separators (when path mode is on): `!(...)/stuff`.
1278        Lastly we make sure all is accounted for when finishing the pattern at the end.  If there is nothing
1279        to store, we store `$`: `(?:(?!(?:this|that)$))[^/]*?)`.
1280        """
1281
1282        if not self.inv_ext:
1283            return
1284
1285        index = len(current) - 1
1286        while index >= 0:
1287            if isinstance(current[index], InvPlaceholder):
1288                content = current[index + 1:]
1289                if not nested:
1290                    content.append(_EOP if not self.pathname else self.path_eop)
1291                current[index] = (
1292                    (''.join(content).replace('(?#)', '?:') if self.capture else ''.join(content)) +
1293                    (_EXCLA_GROUP_CLOSE.format(str(current[index])))
1294                )
1295            index -= 1
1296        self.inv_ext = 0
1297
1298    def parse_extend(self, c: str, i: util.StringIter, current: List[str], reset_dot: bool = False) -> bool:
1299        """Parse extended pattern lists."""
1300
1301        # Save state
1302        temp_dir_start = self.dir_start
1303        temp_after_start = self.after_start
1304        temp_in_list = self.in_list
1305        temp_inv_ext = self.inv_ext
1306        temp_inv_nest = self.inv_nest
1307        self.in_list = True
1308        self.inv_nest = c == '!'
1309
1310        if reset_dot:
1311            self.match_dot_dir = False
1312
1313        # Start list parsing
1314        success = True
1315        index = i.index
1316        list_type = c
1317        extended = []  # type: List[str]
1318
1319        try:
1320            c = next(i)
1321            if c != '(':
1322                raise StopIteration
1323
1324            while c != ')':
1325                c = next(i)
1326
1327                if self.extend and c in EXT_TYPES and self.parse_extend(c, i, extended):
1328                    # Nothing more to do
1329                    pass
1330                elif c == '*':
1331                    self._handle_star(i, extended)
1332                elif c == '.':
1333                    self._handle_dot(i, extended)
1334                    if self.after_start:
1335                        self.match_dot_dir = self.dot and not self.nodotdir
1336                        self.reset_dir_track()
1337                elif c == '?':
1338                    extended.append(self._restrict_sequence() + _QMARK)
1339                elif c == '/':
1340                    if self.pathname:
1341                        extended.append(self._restrict_extended_slash())
1342                    extended.append(self.sep)
1343                elif c == "|":
1344                    self.clean_up_inverse(extended, temp_inv_nest and self.inv_nest)
1345                    extended.append(c)
1346                    if temp_after_start:
1347                        self.set_start_dir()
1348                elif c == '\\':
1349                    try:
1350                        extended.append(self._references(i))
1351                    except DotException:
1352                        continue
1353                    except StopIteration:
1354                        # We've reached the end.
1355                        # Do nothing because this is going to abort the `extmatch` anyways.
1356                        pass
1357                elif c == '[':
1358                    subindex = i.index
1359                    try:
1360                        extended.append(self._sequence(i))
1361                    except StopIteration:
1362                        i.rewind(i.index - subindex)
1363                        extended.append(r'\[')
1364                elif c != ')':
1365                    extended.append(re.escape(c))
1366
1367                self.update_dir_state()
1368
1369            if list_type == '?':
1370                current.append((_QMARK_CAPTURE_GROUP if self.capture else _QMARK_GROUP).format(''.join(extended)))
1371            elif list_type == '*':
1372                current.append((_STAR_CAPTURE_GROUP if self.capture else _STAR_GROUP).format(''.join(extended)))
1373            elif list_type == '+':
1374                current.append((_PLUS_CAPTURE_GROUP if self.capture else _PLUS_GROUP).format(''.join(extended)))
1375            elif list_type == '@':
1376                current.append((_CAPTURE_GROUP if self.capture else _GROUP).format(''.join(extended)))
1377            elif list_type == '!':
1378                self.inv_ext += 1
1379                # If pattern is at the end, anchor the match to the end.
1380                current.append((_EXCLA_CAPTURE_GROUP if self.capture else _EXCLA_GROUP).format(''.join(extended)))
1381                if self.pathname:
1382                    if not temp_after_start or self.match_dot_dir:
1383                        star = self.path_star
1384                    elif temp_after_start and not self.dot:
1385                        star = self.path_star_dot2
1386                    else:
1387                        star = self.path_star_dot1
1388                else:
1389                    if not temp_after_start or self.dot:
1390                        star = _STAR
1391                    else:
1392                        star = _NO_DOT + _STAR
1393
1394                if temp_after_start:
1395                    star = self.need_char + star
1396                # Place holder for closing, but store the proper star
1397                # so we know which one to use
1398                current.append(InvPlaceholder(star))
1399
1400            if temp_in_list:
1401                self.clean_up_inverse(current, temp_inv_nest and self.inv_nest)
1402
1403        except StopIteration:
1404            success = False
1405            self.inv_ext = temp_inv_ext
1406            i.rewind(i.index - index)
1407
1408        # Either restore if extend parsing failed, or reset if it worked
1409        if not temp_in_list:
1410            self.in_list = False
1411        if not temp_inv_nest:
1412            self.inv_nest = False
1413
1414        if success:
1415            self.reset_dir_track()
1416        else:
1417            self.dir_start = temp_dir_start
1418            self.after_start = temp_after_start
1419
1420        return success
1421
1422    def consume_path_sep(self, i: util.StringIter) -> None:
1423        """Consume any consecutive path separators as they count as one."""
1424
1425        try:
1426            if self.bslash_abort:
1427                count = -1
1428                c = '\\'
1429                while c in ('\\', '/'):
1430                    if c != '/' or count % 2:
1431                        count += 1
1432                    else:
1433                        count += 2
1434                    c = next(i)
1435                i.rewind(1)
1436                # Rewind one more if we have an odd number (escape): \\\*
1437                if count > 0 and count % 2:
1438                    i.rewind(1)
1439            else:
1440                c = '/'
1441                while c == '/':
1442                    c = next(i)
1443                i.rewind(1)
1444        except StopIteration:
1445            pass
1446
1447    def root(self, pattern: str, current: List[str]) -> None:
1448        """Start parsing the pattern."""
1449
1450        self.set_after_start()
1451        i = util.StringIter(pattern)
1452
1453        root_specified = False
1454        if self.win_drive_detect:
1455            root_specified, drive, slash, end = _get_win_drive(pattern, True, self.case_sensitive)
1456            if drive is not None:
1457                current.append(drive)
1458                if slash:
1459                    current.append(self.sep + _ONE_OR_MORE)
1460                i.advance(end)
1461                self.consume_path_sep(i)
1462            elif drive is None and root_specified:
1463                root_specified = True
1464        elif not self.win_drive_detect and self.pathname and pattern.startswith('/'):
1465            root_specified = True
1466
1467        if self.no_abs and root_specified:
1468            raise ValueError('The pattern must be a relative path pattern')
1469
1470        if root_specified:
1471            self.matchbase = False
1472            self.extmatchbase = False
1473            self.rtl = False
1474
1475        if not root_specified and self.realpath:
1476            current.append(_NO_WIN_ROOT if self.win_drive_detect else _NO_ROOT)
1477            current.append('')
1478
1479        for c in i:
1480
1481            index = i.index
1482            if self.extend and c in EXT_TYPES and self.parse_extend(c, i, current, True):
1483                # Nothing to do
1484                pass
1485            elif c == '.':
1486                self._handle_dot(i, current)
1487            elif c == '*':
1488                self._handle_star(i, current)
1489            elif c == '?':
1490                current.append(self._restrict_sequence() + _QMARK)
1491            elif c == '/':
1492                if self.pathname:
1493                    self.set_start_dir()
1494                    self.clean_up_inverse(current)
1495                    current.append(self.sep + _ONE_OR_MORE)
1496                    self.consume_path_sep(i)
1497                    self.matchbase = False
1498                else:
1499                    current.append(self.sep)
1500            elif c == '\\':
1501                index = i.index
1502                try:
1503                    value = self._references(i)
1504                    if self.dir_start:
1505                        self.clean_up_inverse(current)
1506                        self.consume_path_sep(i)
1507                        self.matchbase = False
1508                    current.append(value)
1509                except DotException:
1510                    continue
1511                except StopIteration:
1512                    # Escapes nothing, ignore
1513                    i.rewind(i.index - index)
1514            elif c == '[':
1515                index = i.index
1516                try:
1517                    current.append(self._sequence(i))
1518                except StopIteration:
1519                    i.rewind(i.index - index)
1520                    current.append(re.escape(c))
1521            else:
1522                current.append(re.escape(c))
1523
1524            self.update_dir_state()
1525
1526        self.clean_up_inverse(current)
1527
1528        if self.pathname:
1529            current.append(_PATH_TRAIL.format(self.sep))
1530
1531    def _parse(self, p: str) -> str:
1532        """Parse pattern."""
1533
1534        result = ['']
1535        prepend = ['']
1536
1537        self.negative = False
1538
1539        if is_negative(p, self.flags):
1540            self.negative = True
1541            p = p[1:]
1542
1543        if self.negative:
1544            # TODO: Do we prevent `NODOTDIR` for negative patterns?
1545            self.globstar_capture = False
1546            self.dot = True
1547
1548        if self.anchor:
1549            p, number = (RE_ANCHOR if not self.win_drive_detect else RE_WIN_ANCHOR).subn('', p)
1550            if number:
1551                self.matchbase = False
1552                self.extmatchbase = False
1553                self.rtl = False
1554
1555        if self.matchbase or self.extmatchbase:
1556            globstar = self.globstar
1557            self.globstar = True
1558            self.root('**', prepend)
1559            self.globstar = globstar
1560
1561        elif self.rtl:
1562            # Add a `**` that can capture anything: dots, special directories, symlinks, etc.
1563            # We are simulating right to left, so everything on the left should be accepted without
1564            # question.
1565            globstar = self.globstar
1566            dot = self.dot
1567            gstar = self.path_gstar_dot1
1568            globstar_capture = self.globstar_capture
1569            self.path_gstar_dot1 = _PATH_GSTAR_RTL_MATCH
1570            self.dot = True
1571            self.globstar = True
1572            self.globstar_capture = False
1573            self.root('**', prepend)
1574            self.globstar = globstar
1575            self.dot = dot
1576            self.path_gstar_dot1 = gstar
1577            self.globstar_capture = globstar_capture
1578
1579        # We have an escape, but it escapes nothing
1580        if p == '\\':
1581            p = ''
1582
1583        if p:
1584            self.root(p, result)
1585
1586        if p and (self.matchbase or self.extmatchbase or self.rtl):
1587            result = prepend + result
1588
1589        case_flag = 'i' if not self.case_sensitive else ''
1590        pattern = r'^(?s{}:{})$'.format(case_flag, ''.join(result))
1591
1592        if self.capture:
1593            # Strip out unnecessary regex comments
1594            pattern = pattern.replace('(?#)', '')
1595
1596        return pattern
1597
1598    def parse(self) -> AnyStr:
1599        """Parse pattern list."""
1600
1601        if isinstance(self.pattern, bytes):
1602            pattern = self._parse(self.pattern.decode('latin-1')).encode('latin-1')
1603        else:
1604            pattern = self._parse(self.pattern)
1605
1606        return pattern
1607