1""" 2Wild Card Match. 3 4A custom implementation of `fnmatch`. 5 6Licensed under MIT 7Copyright (c) 2018 - 2020 Isaac Muse <isaacmuse@gmail.com> 8 9Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 10documentation files (the "Software"), to deal in the Software without restriction, including without limitation 11the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, 12and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 13 14The above copyright notice and this permission notice shall be included in all copies or substantial portions 15of the Software. 16 17THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 18TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 20CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21IN THE SOFTWARE. 22""" 23import re 24import functools 25import bracex 26import os 27from . import util 28from . import posix 29from . _wcmatch import WcRegexp 30from typing import List, Tuple, AnyStr, Iterable, Pattern, Generic, Optional, Set, Sequence, Union, cast 31 32UNICODE_RANGE = '\u0000-\U0010ffff' 33ASCII_RANGE = '\x00-\xff' 34 35PATTERN_LIMIT = 1000 36 37RE_WIN_DRIVE_START = re.compile(r'((?:\\\\|/){2}((?:\\[^\\/]|[^\\/])+)|([\\]?[a-z][\\]?:))((?:\\\\|/)|$)', re.I) 38RE_WIN_DRIVE_LETTER = re.compile(r'([a-z]:)((?:\\|/)|$)', re.I) 39RE_WIN_DRIVE_PART = re.compile(r'((?:\\[^\\/]|[^\\/])+)((?:\\\\|/)|$)', re.I) 40RE_WIN_DRIVE_UNESCAPE = re.compile(r'\\(.)', re.I) 41 42RE_WIN_DRIVE = ( 43 re.compile( 44 r'''(?x) 45 ( 46 (?:\\\\|/){2}[?.](?:\\\\|/)(?: 47 [a-z]:| 48 unc(?:(?:\\\\|/)[^\\/]+){2} | 49 (?:global(?:\\\\|/))+(?:[a-z]:|unc(?:(?:\\\\|/)[^\\/]+){2}|[^\\/]+) 50 ) | 51 (?:\\\\|/){2}[^\\/]+(?:\\\\|/)[^\\/]+| 52 [a-z]: 53 )((?:\\\\|/){1}|$) 54 ''', 55 re.I 56 ), 57 re.compile( 58 br'''(?x) 59 ( 60 (?:\\\\|/){2}[?.](?:\\\\|/)(?: 61 [a-z]:| 62 unc(?:(?:\\\\|/)[^\\/]+){2} | 63 (?:global(?:\\\\|/))+(?:[a-z]:|unc(?:(?:\\\\|/)[^\\/]+){2}|[^\\/]+) 64 ) | 65 (?:\\\\|/){2}[^\\/]+(?:\\\\|/)[^\\/]+| 66 [a-z]: 67 )((?:\\\\|/){1}|$) 68 ''', 69 re.I 70 ) 71) 72 73RE_MAGIC_ESCAPE = ( 74 re.compile(r'([-!~*?()\[\]|{}]|(?<!\\)(?:(?:[\\]{2})*)\\(?!\\))'), 75 re.compile(br'([-!~*?()\[\]|{}]|(?<!\\)(?:(?:[\\]{2})*)\\(?!\\))') 76) 77 78MAGIC_DEF = ( 79 frozenset("*?[]\\"), 80 frozenset(b"*?[]\\") 81) 82MAGIC_SPLIT = ( 83 frozenset("|"), 84 frozenset(b"|") 85) 86MAGIC_NEGATE = ( 87 frozenset('!'), 88 frozenset(b'!') 89) 90MAGIC_MINUS_NEGATE = ( 91 frozenset('-'), 92 frozenset(b'-') 93) 94MAGIC_TILDE = ( 95 frozenset('~'), 96 frozenset(b'~') 97) 98MAGIC_EXTMATCH = ( 99 frozenset('()'), 100 frozenset(b'()') 101) 102MAGIC_BRACE = ( 103 frozenset("{}"), 104 frozenset(b"{}") 105) 106 107RE_MAGIC = ( 108 re.compile(r'([-!~*?(\[|{\\])'), 109 re.compile(br'([-!~*?(\[|{\\])') 110) 111RE_WIN_DRIVE_MAGIC = ( 112 re.compile(r'([{}|]|(?<!\\)(?:(?:[\\]{2})*)\\(?!\\))'), 113 re.compile(br'([{}|]|(?<!\\)(?:(?:[\\]{2})*)\\(?!\\))') 114) 115RE_NO_DIR = ( 116 re.compile(r'^(?:.*?(?:/\.{1,2}/*|/)|\.{1,2}/*)$'), 117 re.compile(br'^(?:.*?(?:/\.{1,2}/*|/)|\.{1,2}/*)$') 118) 119RE_WIN_NO_DIR = ( 120 re.compile(r'^(?:.*?(?:[\\/]\.{1,2}[\\/]*|[\\/])|\.{1,2}[\\/]*)$'), 121 re.compile(br'^(?:.*?(?:[\\/]\.{1,2}[\\/]*|[\\/])|\.{1,2}[\\/]*)$') 122) 123RE_TILDE = ( 124 re.compile(r'~[^/]*(?=/|$)'), 125 re.compile(br'~[^/]*(?=/|$)') 126) 127RE_WIN_TILDE = ( 128 re.compile(r'~(?:\\(?![\\/])|[^\\/])*(?=\\\\|/|$)'), 129 re.compile(br'~(?:\\(?![\\/])|[^\\/])*(?=\\\\|/|$)') 130) 131 132TILDE_SYM = ( 133 '~', 134 b'~' 135) 136 137RE_ANCHOR = re.compile(r'^/+') 138RE_WIN_ANCHOR = re.compile(r'^(?:\\\\|/)+') 139RE_POSIX = re.compile(r':(alnum|alpha|ascii|blank|cntrl|digit|graph|lower|print|punct|space|upper|word|xdigit):\]') 140 141SET_OPERATORS = frozenset(('&', '~', '|')) 142NEGATIVE_SYM = frozenset((b'!', '!')) 143MINUS_NEGATIVE_SYM = frozenset((b'-', '-')) 144ROUND_BRACKET = frozenset((b'(', '(')) 145EXT_TYPES = frozenset(('*', '?', '+', '@', '!')) 146 147# Common flags are found between `0x0001 - 0xffffff` 148# Implementation specific (`glob` vs `fnmatch` vs `wcmatch`) are found between `0x01000000 - 0xff000000` 149# Internal special flags are found at `0x100000000` and above 150CASE = 0x0001 151IGNORECASE = 0x0002 152RAWCHARS = 0x0004 153NEGATE = 0x0008 154MINUSNEGATE = 0x0010 155PATHNAME = 0x0020 156DOTMATCH = 0x0040 157EXTMATCH = 0x0080 158GLOBSTAR = 0x0100 159BRACE = 0x0200 160REALPATH = 0x0400 161FOLLOW = 0x0800 162SPLIT = 0x1000 163MATCHBASE = 0x2000 164NODIR = 0x4000 165NEGATEALL = 0x8000 166FORCEWIN = 0x10000 167FORCEUNIX = 0x20000 168GLOBTILDE = 0x40000 169NOUNIQUE = 0x80000 170NODOTDIR = 0x100000 171 172# Internal flag 173_TRANSLATE = 0x100000000 # Lets us know we are performing a translation, and we just want the regex. 174_ANCHOR = 0x200000000 # The pattern, if it starts with a slash, is anchored to the working directory; strip the slash. 175_EXTMATCHBASE = 0x400000000 # Like `MATCHBASE`, but works for multiple directory levels. 176_NOABSOLUTE = 0x800000000 # Do not allow absolute patterns 177_RTL = 0x1000000000 # Match from right to left 178 179FLAG_MASK = ( 180 CASE | 181 IGNORECASE | 182 RAWCHARS | 183 NEGATE | 184 MINUSNEGATE | 185 PATHNAME | 186 DOTMATCH | 187 EXTMATCH | 188 GLOBSTAR | 189 BRACE | 190 REALPATH | 191 FOLLOW | 192 MATCHBASE | 193 NODIR | 194 NEGATEALL | 195 FORCEWIN | 196 FORCEUNIX | 197 GLOBTILDE | 198 SPLIT | 199 NOUNIQUE | 200 NODOTDIR | 201 _TRANSLATE | 202 _ANCHOR | 203 _EXTMATCHBASE | 204 _RTL | 205 _NOABSOLUTE 206) 207CASE_FLAGS = IGNORECASE | CASE 208 209# Pieces to construct search path 210 211# Question Mark 212_QMARK = r'.' 213# Star 214_STAR = r'.*?' 215# For paths, allow trailing / 216_PATH_TRAIL = r'{}*?' 217# Disallow . and .. (usually applied right after path separator when needed) 218_NO_DIR = r'(?!(?:\.{{1,2}})(?:$|[{sep}]))' 219# Star for `PATHNAME` 220_PATH_STAR = r'[^{sep}]*?' 221# Star when at start of filename during `DOTMATCH` 222# (allow dot, but don't allow directory match /./ or /../) 223_PATH_STAR_DOTMATCH = _NO_DIR + _PATH_STAR 224# Star for `PATHNAME` when `DOTMATCH` is disabled and start is at start of file. 225# Disallow . and .. and don't allow match to start with a dot. 226_PATH_STAR_NO_DOTMATCH = _NO_DIR + r'(?:(?!\.){})?'.format(_PATH_STAR) 227# `GLOBSTAR` during `DOTMATCH`. Avoid directory match /./ or /../ 228_PATH_GSTAR_DOTMATCH = r'(?:(?!(?:[{sep}]|^)(?:\.{{1,2}})($|[{sep}])).)*?' 229# `GLOBSTAR` with `DOTMATCH` disabled. Don't allow a dot to follow / 230_PATH_GSTAR_NO_DOTMATCH = r'(?:(?!(?:[{sep}]|^)\.).)*?' 231# Special right to left matching 232_PATH_GSTAR_RTL_MATCH = r'.*?' 233# Next char cannot be a dot 234_NO_DOT = r'(?![.])' 235# Following char from sequence cannot be a separator or a dot 236_PATH_NO_SLASH_DOT = r'(?![{sep}.])' 237# Following char from sequence cannot be a separator 238_PATH_NO_SLASH = r'(?![{sep}])' 239# One or more 240_ONE_OR_MORE = r'+' 241# End of pattern 242_EOP = r'$' 243_PATH_EOP = r'(?:$|[{sep}])' 244# Divider between `globstar`. Can match start or end of pattern 245# in addition to slashes. 246_GLOBSTAR_DIV = r'(?:^|$|{})+' 247# Lookahead to see there is one character. 248_NEED_CHAR_PATH = r'(?=[^{sep}])' 249_NEED_CHAR = r'(?=.)' 250_NEED_SEP = r'(?={})' 251# Group that matches one or none 252_QMARK_GROUP = r'(?:{})?' 253_QMARK_CAPTURE_GROUP = r'((?#)(?:{})?)' 254# Group that matches Zero or more 255_STAR_GROUP = r'(?:{})*' 256_STAR_CAPTURE_GROUP = r'((?#)(?:{})*)' 257# Group that matches one or more 258_PLUS_GROUP = r'(?:{})+' 259_PLUS_CAPTURE_GROUP = r'((?#)(?:{})+)' 260# Group that matches exactly one 261_GROUP = r'(?:{})' 262_CAPTURE_GROUP = r'((?#){})' 263# Inverse group that matches none 264# This is the start. Since Python can't 265# do variable look behinds, we have stuff 266# everything at the end that it needs to lookahead 267# for. So there is an opening and a closing. 268_EXCLA_GROUP = r'(?:(?!(?:{})' 269_EXCLA_CAPTURE_GROUP = r'((?#)(?!(?:{})' 270# Closing for inverse group 271_EXCLA_GROUP_CLOSE = r'){})' 272# Restrict root 273_NO_ROOT = r'(?!/)' 274_NO_WIN_ROOT = r'(?!(?:[\\/]|[a-zA-Z]:))' 275# Restrict directories 276_NO_NIX_DIR = ( 277 r'^(?:.*?(?:/\.{1,2}/*|/)|\.{1,2}/*)$', 278 rb'^(?:.*?(?:/\.{1,2}/*|/)|\.{1,2}/*)$' 279) 280_NO_WIN_DIR = ( 281 r'^(?:.*?(?:[\\/]\.{1,2}[\\/]*|[\\/])|\.{1,2}[\\/]*)$', 282 rb'^(?:.*?(?:[\\/]\.{1,2}[\\/]*|[\\/])|\.{1,2}[\\/]*)$' 283) 284 285 286class InvPlaceholder(str): 287 """Placeholder for inverse pattern !(...).""" 288 289 290class PathNameException(Exception): 291 """Path name exception.""" 292 293 294class DotException(Exception): 295 """Dot exception.""" 296 297 298class PatternLimitException(Exception): 299 """Pattern limit exception.""" 300 301 302def to_str_sequence(patterns: Union[str, bytes, Sequence[AnyStr]]) -> Sequence[AnyStr]: 303 """Return a simple string sequence.""" 304 305 if isinstance(patterns, (str, bytes)): 306 return cast(Sequence[AnyStr], [patterns]) 307 else: 308 return patterns 309 310 311def escape(pattern: AnyStr, unix: Optional[bool] = None, pathname: bool = True, raw: bool = False) -> AnyStr: 312 """ 313 Escape. 314 315 `unix`: use Unix style path logic. 316 `pathname`: Use path logic. 317 `raw`: Handle raw strings (deprecated) 318 319 """ 320 321 if isinstance(pattern, bytes): 322 drive_pat = cast(Pattern[AnyStr], RE_WIN_DRIVE[util.BYTES]) 323 magic = cast(Pattern[AnyStr], RE_MAGIC_ESCAPE[util.BYTES]) 324 drive_magic = cast(Pattern[AnyStr], RE_WIN_DRIVE_MAGIC[util.BYTES]) 325 replace = br'\\\1' 326 slash = b'\\' 327 double_slash = b'\\\\' 328 drive = b'' 329 else: 330 drive_pat = cast(Pattern[AnyStr], RE_WIN_DRIVE[util.UNICODE]) 331 magic = cast(Pattern[AnyStr], RE_MAGIC_ESCAPE[util.UNICODE]) 332 drive_magic = cast(Pattern[AnyStr], RE_WIN_DRIVE_MAGIC[util.UNICODE]) 333 replace = r'\\\1' 334 slash = '\\' 335 double_slash = '\\\\' 336 drive = '' 337 338 if not raw: 339 pattern = pattern.replace(slash, double_slash) 340 341 # Handle windows drives special. 342 # Windows drives are handled special internally. 343 # So we shouldn't escape them as we'll just have to 344 # detect and undo it later. 345 length = 0 346 if pathname and ((unix is None and util.platform() == "windows") or unix is False): 347 m = drive_pat.match(pattern) 348 if m: 349 # Replace splitting magic chars 350 drive = m.group(0) 351 length = len(drive) 352 drive = drive_magic.sub(replace, m.group(0)) 353 pattern = pattern[length:] 354 355 return drive + magic.sub(replace, pattern) 356 357 358def _get_win_drive( 359 pattern: str, 360 regex: bool = False, 361 case_sensitive: bool = False 362) -> Tuple[bool, Optional[str], bool, int]: 363 """Get Windows drive.""" 364 365 drive = None 366 slash = False 367 end = 0 368 root_specified = False 369 m = RE_WIN_DRIVE_START.match(pattern) 370 if m: 371 end = m.end(0) 372 if m.group(3) and RE_WIN_DRIVE_LETTER.match(m.group(0)): 373 if regex: 374 drive = escape_drive(RE_WIN_DRIVE_UNESCAPE.sub(r'\1', m.group(3)), case_sensitive) 375 else: 376 drive = RE_WIN_DRIVE_UNESCAPE.sub(r'\1', m.group(0)) 377 slash = bool(m.group(4)) 378 root_specified = True 379 elif m.group(2): 380 root_specified = True 381 part = [RE_WIN_DRIVE_UNESCAPE.sub(r'\1', m.group(2))] 382 is_special = part[-1].lower() in ('.', '?') 383 complete = 1 384 first = 1 385 count = 0 386 for count, m in enumerate(RE_WIN_DRIVE_PART.finditer(pattern, m.end(0)), 1): 387 end = m.end(0) 388 part.append(RE_WIN_DRIVE_UNESCAPE.sub(r'\1', m.group(1))) 389 slash = bool(m.group(2)) 390 if is_special: 391 if count == first and part[-1].lower() == 'unc': 392 complete += 2 393 elif count == first and part[-1].lower() == 'global': 394 first += 1 395 complete += 1 396 if count == complete: 397 break 398 if count == complete: 399 if not regex: 400 drive = '\\\\{}{}'.format('\\'.join(part), '\\' if slash else '') 401 else: 402 drive = r'[\\/]{2}' + r'[\\/]'.join([escape_drive(p, case_sensitive) for p in part]) 403 elif pattern.startswith(('\\\\', '/')): 404 root_specified = True 405 406 return root_specified, drive, slash, end 407 408 409def _get_magic_symbols(pattern: AnyStr, unix: bool, flags: int) -> Tuple[Set[AnyStr], Set[AnyStr]]: 410 """Get magic symbols.""" 411 412 if isinstance(pattern, bytes): 413 ptype = util.BYTES 414 slash = b'\\' # type: AnyStr 415 else: 416 ptype = util.UNICODE 417 slash = '\\' 418 419 magic = set() # type: Set[AnyStr] 420 if unix: 421 magic_drive = set() # type: Set[AnyStr] 422 else: 423 magic_drive = set([slash]) 424 425 magic |= cast(Set[AnyStr], MAGIC_DEF[ptype]) 426 if flags & BRACE: 427 magic |= cast(Set[AnyStr], MAGIC_BRACE[ptype]) 428 magic_drive |= cast(Set[AnyStr], MAGIC_BRACE[ptype]) 429 if flags & SPLIT: 430 magic |= cast(Set[AnyStr], MAGIC_SPLIT[ptype]) 431 magic_drive |= cast(Set[AnyStr], MAGIC_SPLIT[ptype]) 432 if flags & GLOBTILDE: 433 magic |= cast(Set[AnyStr], MAGIC_TILDE[ptype]) 434 if flags & EXTMATCH: 435 magic |= cast(Set[AnyStr], MAGIC_EXTMATCH[ptype]) 436 if flags & NEGATE: 437 if flags & MINUSNEGATE: 438 magic |= cast(Set[AnyStr], MAGIC_MINUS_NEGATE[ptype]) 439 else: 440 magic |= cast(Set[AnyStr], MAGIC_NEGATE[ptype]) 441 442 return magic, magic_drive 443 444 445def is_magic(pattern: AnyStr, flags: int = 0) -> bool: 446 """Check if pattern is magic.""" 447 448 magical = False 449 unix = is_unix_style(flags) 450 451 if isinstance(pattern, bytes): 452 ptype = util.BYTES 453 else: 454 ptype = util.UNICODE 455 456 drive_pat = cast(Pattern[AnyStr], RE_WIN_DRIVE[ptype]) 457 458 magic, magic_drive = _get_magic_symbols(pattern, unix, flags) 459 is_path = flags & PATHNAME 460 461 length = 0 462 if is_path and ((unix is None and util.platform() == "windows") or unix is False): 463 m = drive_pat.match(pattern) 464 if m: 465 drive = m.group(0) 466 length = len(drive) 467 for c in magic_drive: 468 if c in drive: 469 magical = True 470 break 471 472 if not magical: 473 pattern = pattern[length:] 474 for c in magic: 475 if c in pattern: 476 magical = True 477 break 478 479 return magical 480 481 482def is_negative(pattern: AnyStr, flags: int) -> bool: 483 """Check if negative pattern.""" 484 485 if flags & MINUSNEGATE: 486 return bool(flags & NEGATE and pattern[0:1] in MINUS_NEGATIVE_SYM) 487 elif flags & EXTMATCH: 488 return bool(flags & NEGATE and pattern[0:1] in NEGATIVE_SYM and pattern[1:2] not in ROUND_BRACKET) 489 else: 490 return bool(flags & NEGATE and pattern[0:1] in NEGATIVE_SYM) 491 492 493def tilde_pos(pattern: AnyStr, flags: int) -> int: 494 """Is user folder.""" 495 496 pos = -1 497 if flags & GLOBTILDE and flags & REALPATH: 498 if flags & NEGATE: 499 if pattern[0:1] in TILDE_SYM: 500 pos = 0 501 elif pattern[0:1] in NEGATIVE_SYM and pattern[1:2] in TILDE_SYM: 502 pos = 1 503 elif pattern[0:1] in TILDE_SYM: 504 pos = 0 505 return pos 506 507 508def expand_braces(patterns: AnyStr, flags: int, limit: int) -> Iterable[AnyStr]: 509 """Expand braces.""" 510 511 if flags & BRACE: 512 for p in ([patterns] if isinstance(patterns, (str, bytes)) else patterns): 513 try: 514 # Turn off limit as we are handling it ourselves. 515 yield from bracex.iexpand(p, keep_escapes=True, limit=limit) 516 except bracex.ExpansionLimitException: 517 raise 518 except Exception: # pragma: no cover 519 # We will probably never hit this as `bracex` 520 # doesn't throw any specific exceptions and 521 # should normally always parse, but just in case. 522 yield p 523 else: 524 for p in ([patterns] if isinstance(patterns, (str, bytes)) else patterns): 525 yield p 526 527 528def expand_tilde(pattern: AnyStr, is_unix: bool, flags: int) -> AnyStr: 529 """Expand tilde.""" 530 531 pos = tilde_pos(pattern, flags) 532 533 if pos > -1: 534 string_type = util.BYTES if isinstance(pattern, bytes) else util.UNICODE 535 tilde = cast(AnyStr, TILDE_SYM[string_type]) 536 re_tilde = cast(Pattern[AnyStr], RE_WIN_TILDE[string_type] if not is_unix else RE_TILDE[string_type]) 537 m = re_tilde.match(pattern, pos) 538 if m: 539 expanded = os.path.expanduser(m.group(0)) 540 if not expanded.startswith(tilde) and os.path.exists(expanded): 541 pattern = (pattern[0:1] if pos else pattern[0:0]) + escape(expanded, is_unix) + pattern[m.end(0):] 542 return pattern 543 544 545def expand(pattern: AnyStr, flags: int, limit: int) -> Iterable[AnyStr]: 546 """Expand and normalize.""" 547 548 for expanded in expand_braces(pattern, flags, limit): 549 for splitted in split(expanded, flags): 550 yield expand_tilde(splitted, is_unix_style(flags), flags) 551 552 553def is_case_sensitive(flags: int) -> bool: 554 """Is case sensitive.""" 555 556 if bool(flags & FORCEWIN): 557 case_sensitive = False 558 elif bool(flags & FORCEUNIX): 559 case_sensitive = True 560 else: 561 case_sensitive = util.is_case_sensitive() 562 return case_sensitive 563 564 565def get_case(flags: int) -> bool: 566 """Parse flags for case sensitivity settings.""" 567 568 if not bool(flags & CASE_FLAGS): 569 case_sensitive = is_case_sensitive(flags) 570 elif flags & CASE: 571 case_sensitive = True 572 else: 573 case_sensitive = False 574 return case_sensitive 575 576 577def escape_drive(drive: str, case: bool) -> str: 578 """Escape drive.""" 579 580 return '(?i:{})'.format(re.escape(drive)) if case else re.escape(drive) 581 582 583def is_unix_style(flags: int) -> bool: 584 """Check if we should use Unix style.""" 585 586 return ( 587 ( 588 (util.platform() != "windows") or 589 (not bool(flags & REALPATH) and bool(flags & FORCEUNIX)) 590 ) and 591 not flags & FORCEWIN 592 ) 593 594 595def translate( 596 patterns: Sequence[AnyStr], 597 flags: int, 598 limit: int = PATTERN_LIMIT 599) -> Tuple[List[AnyStr], List[AnyStr]]: 600 """Translate patterns.""" 601 602 positive = [] # type: List[AnyStr] 603 negative = [] # type: List[AnyStr] 604 605 flags = (flags | _TRANSLATE) & FLAG_MASK 606 is_unix = is_unix_style(flags) 607 seen = set() 608 609 try: 610 current_limit = limit 611 total = 0 612 for pattern in patterns: 613 pattern = util.norm_pattern(pattern, not is_unix, bool(flags & RAWCHARS)) 614 count = 0 615 for count, expanded in enumerate(expand(pattern, flags, current_limit), 1): 616 total += 1 617 if 0 < limit < total: 618 raise PatternLimitException("Pattern limit exceeded the limit of {:d}".format(limit)) 619 if expanded not in seen: 620 seen.add(expanded) 621 (negative if is_negative(expanded, flags) else positive).append(WcParse(expanded, flags).parse()) 622 if limit: 623 current_limit -= count 624 if current_limit < 1: 625 current_limit = 1 626 except bracex.ExpansionLimitException: 627 raise PatternLimitException("Pattern limit exceeded the limit of {:d}".format(limit)) 628 629 if patterns is not None and negative and not positive: 630 if flags & NEGATEALL: 631 default = b'**' if isinstance(patterns[0], bytes) else '**' 632 positive.append( 633 WcParse(default, flags | (GLOBSTAR if flags & PATHNAME else 0)).parse() 634 ) 635 636 if patterns and flags & NODIR: 637 index = util.BYTES if isinstance(patterns[0], bytes) else util.UNICODE 638 exclude = cast(AnyStr, _NO_NIX_DIR[index] if is_unix else _NO_WIN_DIR[index]) 639 negative.append(exclude) 640 641 return positive, negative 642 643 644def split(pattern: AnyStr, flags: int) -> Iterable[AnyStr]: 645 """Split patterns.""" 646 647 if flags & SPLIT: 648 yield from WcSplit(pattern, flags).split() 649 else: 650 yield pattern 651 652 653def compile( # noqa: A001 654 patterns: Sequence[AnyStr], 655 flags: int, 656 limit: int = PATTERN_LIMIT 657) -> WcRegexp[AnyStr]: 658 """Compile patterns.""" 659 660 positive = [] # type: List[Pattern[AnyStr]] 661 negative = [] # type: List[Pattern[AnyStr]] 662 663 is_unix = is_unix_style(flags) 664 seen = set() 665 666 try: 667 current_limit = limit 668 total = 0 669 for pattern in patterns: 670 pattern = util.norm_pattern(pattern, not is_unix, bool(flags & RAWCHARS)) 671 count = 0 672 for count, expanded in enumerate(expand(pattern, flags, current_limit), 1): 673 total += 1 674 if 0 < limit < total: 675 raise PatternLimitException("Pattern limit exceeded the limit of {:d}".format(limit)) 676 if expanded not in seen: 677 seen.add(expanded) 678 (negative if is_negative(expanded, flags) else positive).append(_compile(expanded, flags)) 679 if limit: 680 current_limit -= count 681 if current_limit < 1: 682 current_limit = 1 683 except bracex.ExpansionLimitException: 684 raise PatternLimitException("Pattern limit exceeded the limit of {:d}".format(limit)) 685 686 if patterns is not None and negative and not positive: 687 if flags & NEGATEALL: 688 default = b'**' if isinstance(patterns[0], bytes) else '**' 689 positive.append(_compile(default, flags | (GLOBSTAR if flags & PATHNAME else 0))) 690 691 if patterns is not None and flags & NODIR: 692 ptype = util.BYTES if isinstance(patterns[0], bytes) else util.UNICODE 693 negative.append(cast(Pattern[AnyStr], RE_NO_DIR[ptype] if is_unix else RE_WIN_NO_DIR[ptype])) 694 695 return WcRegexp( 696 tuple(positive), tuple(negative), 697 bool(flags & REALPATH), bool(flags & PATHNAME), bool(flags & FOLLOW) 698 ) 699 700 701@functools.lru_cache(maxsize=256, typed=True) 702def _compile(pattern: AnyStr, flags: int) -> Pattern[AnyStr]: 703 """Compile the pattern to regex.""" 704 705 return re.compile(WcParse(pattern, flags & FLAG_MASK).parse()) 706 707 708class WcSplit(Generic[AnyStr]): 709 """Class that splits patterns on |.""" 710 711 def __init__(self, pattern: AnyStr, flags: int) -> None: 712 """Initialize.""" 713 714 self.pattern = pattern # type: AnyStr 715 self.pathname = bool(flags & PATHNAME) 716 self.extend = bool(flags & EXTMATCH) 717 self.unix = is_unix_style(flags) 718 self.bslash_abort = not self.unix 719 720 def _sequence(self, i: util.StringIter) -> None: 721 """Handle character group.""" 722 723 c = next(i) 724 if c == '!': 725 c = next(i) 726 if c in ('^', '-', '['): 727 c = next(i) 728 729 while c != ']': 730 if c == '\\': 731 # Handle escapes 732 try: 733 self._references(i, True) 734 except PathNameException: 735 raise StopIteration 736 elif c == '/': 737 if self.pathname: 738 raise StopIteration 739 c = next(i) 740 741 def _references(self, i: util.StringIter, sequence: bool = False) -> None: 742 """Handle references.""" 743 744 c = next(i) 745 if c == '\\': 746 # \\ 747 if sequence and self.bslash_abort: 748 raise PathNameException 749 elif c == '/': 750 # \/ 751 if sequence and self.pathname: 752 raise PathNameException 753 else: 754 # \a, \b, \c, etc. 755 pass 756 757 def parse_extend(self, c: str, i: util.StringIter) -> bool: 758 """Parse extended pattern lists.""" 759 760 # Start list parsing 761 success = True 762 index = i.index 763 list_type = c 764 try: 765 c = next(i) 766 if c != '(': 767 raise StopIteration 768 while c != ')': 769 c = next(i) 770 771 if self.extend and c in EXT_TYPES and self.parse_extend(c, i): 772 continue 773 774 if c == '\\': 775 try: 776 self._references(i) 777 except StopIteration: 778 pass 779 elif c == '[': 780 index = i.index 781 try: 782 self._sequence(i) 783 except StopIteration: 784 i.rewind(i.index - index) 785 786 except StopIteration: 787 success = False 788 c = list_type 789 i.rewind(i.index - index) 790 791 return success 792 793 def _split(self, pattern: str) -> Iterable[str]: 794 """Split the pattern.""" 795 796 start = -1 797 i = util.StringIter(pattern) 798 799 for c in i: 800 if self.extend and c in EXT_TYPES and self.parse_extend(c, i): 801 continue 802 803 if c == '|': 804 split = i.index - 1 805 p = pattern[start + 1:split] 806 yield p 807 start = split 808 elif c == '\\': 809 index = i.index 810 try: 811 self._references(i) 812 except StopIteration: 813 i.rewind(i.index - index) 814 elif c == '[': 815 index = i.index 816 try: 817 self._sequence(i) 818 except StopIteration: 819 i.rewind(i.index - index) 820 821 if start < len(pattern): 822 yield pattern[start + 1:] 823 824 def split(self) -> Iterable[AnyStr]: 825 """Split the pattern.""" 826 827 if isinstance(self.pattern, bytes): 828 for p in self._split(self.pattern.decode('latin-1')): 829 yield p.encode('latin-1') 830 else: 831 yield from self._split(self.pattern) 832 833 834class WcParse(Generic[AnyStr]): 835 """Parse the wildcard pattern.""" 836 837 def __init__(self, pattern: AnyStr, flags: int = 0) -> None: 838 """Initialize.""" 839 840 self.pattern = pattern # type: AnyStr 841 self.no_abs = bool(flags & _NOABSOLUTE) 842 self.braces = bool(flags & BRACE) 843 self.is_bytes = isinstance(pattern, bytes) 844 self.pathname = bool(flags & PATHNAME) 845 self.raw_chars = bool(flags & RAWCHARS) 846 self.globstar = self.pathname and bool(flags & GLOBSTAR) 847 self.realpath = bool(flags & REALPATH) and self.pathname 848 self.translate = bool(flags & _TRANSLATE) 849 self.negate = bool(flags & NEGATE) 850 self.globstar_capture = self.realpath and not self.translate 851 self.dot = bool(flags & DOTMATCH) 852 self.extend = bool(flags & EXTMATCH) 853 self.matchbase = bool(flags & MATCHBASE) 854 self.extmatchbase = bool(flags & _EXTMATCHBASE) 855 self.rtl = bool(flags & _RTL) 856 self.anchor = bool(flags & _ANCHOR) 857 self.nodotdir = bool(flags & NODOTDIR) 858 self.capture = self.translate 859 self.case_sensitive = get_case(flags) 860 self.in_list = False 861 self.inv_nest = False 862 self.flags = flags 863 self.inv_ext = 0 864 self.unix = is_unix_style(self.flags) 865 if not self.unix: 866 self.win_drive_detect = self.pathname 867 self.char_avoid = (ord('\\'), ord('/'), ord('.')) # type: Tuple[int, ...] 868 self.bslash_abort = self.pathname 869 sep = {"sep": re.escape('\\/')} 870 else: 871 self.win_drive_detect = False 872 self.char_avoid = (ord('/'), ord('.')) 873 self.bslash_abort = False 874 sep = {"sep": re.escape('/')} 875 self.bare_sep = sep['sep'] 876 self.sep = '[{}]'.format(self.bare_sep) 877 self.path_eop = _PATH_EOP.format(**sep) 878 self.no_dir = _NO_DIR.format(**sep) 879 self.seq_path = _PATH_NO_SLASH.format(**sep) 880 self.seq_path_dot = _PATH_NO_SLASH_DOT.format(**sep) 881 self.path_star = _PATH_STAR.format(**sep) 882 self.path_star_dot1 = _PATH_STAR_DOTMATCH.format(**sep) 883 self.path_star_dot2 = _PATH_STAR_NO_DOTMATCH.format(**sep) 884 self.path_gstar_dot1 = _PATH_GSTAR_DOTMATCH.format(**sep) 885 self.path_gstar_dot2 = _PATH_GSTAR_NO_DOTMATCH.format(**sep) 886 if self.pathname: 887 self.need_char = _NEED_CHAR_PATH.format(**sep) 888 else: 889 self.need_char = _NEED_CHAR 890 891 def set_after_start(self) -> None: 892 """Set tracker for character after the start of a directory.""" 893 894 self.after_start = True 895 self.dir_start = False 896 897 def set_start_dir(self) -> None: 898 """Set directory start.""" 899 900 self.dir_start = True 901 self.after_start = False 902 903 def reset_dir_track(self) -> None: 904 """Reset directory tracker.""" 905 906 self.dir_start = False 907 self.after_start = False 908 909 def update_dir_state(self) -> None: 910 """ 911 Update the directory state. 912 913 If we are at the directory start, 914 update to after start state (the character right after). 915 If at after start, reset state. 916 """ 917 918 if self.dir_start and not self.after_start: 919 self.set_after_start() 920 elif not self.dir_start and self.after_start: 921 self.reset_dir_track() 922 923 def _restrict_extended_slash(self) -> str: 924 """Restrict extended slash.""" 925 926 return self.seq_path if self.pathname else '' 927 928 def _restrict_sequence(self) -> str: 929 """Restrict sequence.""" 930 931 if self.pathname: 932 value = self.seq_path_dot if self.after_start and not self.dot else self.seq_path 933 if self.after_start: 934 value = self.no_dir + value 935 else: 936 value = _NO_DOT if self.after_start and not self.dot else "" 937 self.reset_dir_track() 938 939 return value 940 941 def _sequence_range_check(self, result: List[str], last: str) -> bool: 942 """ 943 If range backwards, remove it. 944 945 A bad range will cause the regular expression to fail, 946 so we need to remove it, but return that we removed it 947 so the caller can know the sequence wasn't empty. 948 Caller will have to craft a sequence that makes sense 949 if empty at the end with either an impossible sequence 950 for inclusive sequences or a sequence that matches 951 everything for an exclusive sequence. 952 """ 953 954 removed = False 955 first = result[-2] 956 v1 = ord(first[1:2] if len(first) > 1 else first) 957 v2 = ord(last[1:2] if len(last) > 1 else last) 958 if v2 < v1: 959 result.pop() 960 result.pop() 961 removed = True 962 else: 963 result.append(last) 964 return removed 965 966 def _handle_posix(self, i: util.StringIter, result: List[str], end_range: int) -> bool: 967 """Handle posix classes.""" 968 969 last_posix = False 970 m = i.match(RE_POSIX) 971 if m: 972 last_posix = True 973 # Cannot do range with posix class 974 # so escape last `-` if we think this 975 # is the end of a range. 976 if end_range and i.index - 1 >= end_range: 977 result[-1] = '\\' + result[-1] 978 result.append(posix.get_posix_property(m.group(1), self.is_bytes)) 979 return last_posix 980 981 def _sequence(self, i: util.StringIter) -> str: 982 """Handle character group.""" 983 984 result = ['['] 985 end_range = 0 986 escape_hyphen = -1 987 removed = False 988 last_posix = False 989 990 c = next(i) 991 if c in ('!', '^'): 992 # Handle negate char 993 result.append('^') 994 c = next(i) 995 if c == '[': 996 last_posix = self._handle_posix(i, result, 0) 997 if not last_posix: 998 result.append(re.escape(c)) 999 c = next(i) 1000 elif c in ('-', ']'): 1001 result.append(re.escape(c)) 1002 c = next(i) 1003 1004 while c != ']': 1005 if c == '-': 1006 if last_posix: 1007 result.append('\\' + c) 1008 last_posix = False 1009 elif i.index - 1 > escape_hyphen: 1010 # Found a range delimiter. 1011 # Mark the next two characters as needing to be escaped if hyphens. 1012 # The next character would be the end char range (s-e), 1013 # and the one after that would be the potential start char range 1014 # of a new range (s-es-e), so neither can be legitimate range delimiters. 1015 result.append(c) 1016 escape_hyphen = i.index + 1 1017 end_range = i.index 1018 elif end_range and i.index - 1 >= end_range: 1019 if self._sequence_range_check(result, '\\' + c): 1020 removed = True 1021 end_range = 0 1022 else: 1023 result.append('\\' + c) 1024 c = next(i) 1025 continue 1026 last_posix = False 1027 1028 if c == '[': 1029 last_posix = self._handle_posix(i, result, end_range) 1030 if last_posix: 1031 c = next(i) 1032 continue 1033 1034 if c == '\\': 1035 # Handle escapes 1036 try: 1037 value = self._references(i, True) 1038 except DotException: 1039 value = re.escape(next(i)) 1040 except PathNameException: 1041 raise StopIteration 1042 elif c == '/': 1043 if self.pathname: 1044 raise StopIteration 1045 value = c 1046 elif c in SET_OPERATORS: 1047 # Escape &, |, and ~ to avoid &&, ||, and ~~ 1048 value = '\\' + c 1049 else: 1050 # Anything else 1051 value = c 1052 1053 if end_range and i.index - 1 >= end_range: 1054 if self._sequence_range_check(result, value): 1055 removed = True 1056 end_range = 0 1057 else: 1058 result.append(value) 1059 1060 c = next(i) 1061 1062 result.append(']') 1063 # Bad range removed. 1064 if removed: 1065 value = "".join(result) 1066 if value == '[]': 1067 # We specified some ranges, but they are all 1068 # out of reach. Create an impossible sequence to match. 1069 result = ['[^{}]'.format(ASCII_RANGE if self.is_bytes else UNICODE_RANGE)] 1070 elif value == '[^]': 1071 # We specified some range, but hey are all 1072 # out of reach. Since this is exclusive 1073 # that means we can match *anything*. 1074 result = ['[{}]'.format(ASCII_RANGE if self.is_bytes else UNICODE_RANGE)] 1075 else: 1076 result = [value] 1077 1078 if self.pathname or self.after_start: 1079 return self._restrict_sequence() + ''.join(result) 1080 1081 return ''.join(result) 1082 1083 def _references(self, i: util.StringIter, sequence: bool = False) -> str: 1084 """Handle references.""" 1085 1086 value = '' 1087 c = next(i) 1088 if c == '\\': 1089 # \\ 1090 if sequence and self.bslash_abort: 1091 raise PathNameException 1092 value = r'\\' 1093 if self.bslash_abort: 1094 if not self.in_list: 1095 value = self.sep + _ONE_OR_MORE 1096 self.set_start_dir() 1097 else: 1098 value = self._restrict_extended_slash() + self.sep 1099 elif not self.unix: 1100 value = self.sep if not sequence else self.bare_sep 1101 elif c == '/': 1102 # \/ 1103 if sequence and self.pathname: 1104 raise PathNameException 1105 if self.pathname: 1106 if not self.in_list: 1107 value = self.sep + _ONE_OR_MORE 1108 self.set_start_dir() 1109 else: 1110 value = self._restrict_extended_slash() + self.sep 1111 else: 1112 value = self.sep if not sequence else self.bare_sep 1113 elif c == '.': 1114 # Let dots be handled special 1115 i.rewind(1) 1116 raise DotException 1117 else: 1118 # \a, \b, \c, etc. 1119 value = re.escape(c) 1120 1121 return value 1122 1123 def _handle_dot(self, i: util.StringIter, current: List[str]) -> None: 1124 """Handle dot.""" 1125 1126 is_current = True 1127 is_previous = False 1128 1129 if self.after_start and self.pathname and self.nodotdir: 1130 try: 1131 index = i.index 1132 while True: 1133 c = next(i) 1134 if c == '.' and is_current: 1135 is_previous = True 1136 is_current = False 1137 elif c == '.' and is_previous: 1138 is_previous = False 1139 raise StopIteration 1140 elif c in ('|', ')') and self.in_list: 1141 raise StopIteration 1142 elif c == '\\': 1143 try: 1144 self._references(i, True) 1145 # Was not what we expected 1146 is_current = False 1147 is_previous = False 1148 raise StopIteration 1149 except DotException: 1150 if is_current: 1151 is_previous = True 1152 is_current = False 1153 c = next(i) 1154 else: 1155 is_previous = False 1156 raise StopIteration 1157 except PathNameException: 1158 raise StopIteration 1159 elif c == '/': 1160 raise StopIteration 1161 else: 1162 is_current = False 1163 is_previous = False 1164 raise StopIteration 1165 except StopIteration: 1166 i.rewind(i.index - index) 1167 1168 if not is_current and not is_previous: 1169 current.append(r'(?!\.[.]?{})\.'.format(self.path_eop)) 1170 else: 1171 current.append(re.escape('.')) 1172 1173 def _handle_star(self, i: util.StringIter, current: List[str]) -> None: 1174 """Handle star.""" 1175 1176 if self.pathname: 1177 if self.after_start and not self.dot: 1178 star = self.path_star_dot2 1179 globstar = self.path_gstar_dot2 1180 elif self.after_start: 1181 star = self.path_star_dot1 1182 globstar = self.path_gstar_dot1 1183 else: 1184 star = self.path_star 1185 globstar = self.path_gstar_dot1 1186 if self.globstar_capture: 1187 globstar = '({})'.format(globstar) 1188 else: 1189 if self.after_start and not self.dot: 1190 star = _NO_DOT + _STAR 1191 else: 1192 star = _STAR 1193 globstar = '' 1194 value = star 1195 1196 if self.after_start and self.globstar and not self.in_list: 1197 skip = False 1198 try: 1199 c = next(i) 1200 if c != '*': 1201 i.rewind(1) 1202 raise StopIteration 1203 except StopIteration: 1204 # Could not acquire a second star, so assume single star pattern 1205 skip = True 1206 1207 if not skip: 1208 try: 1209 index = i.index 1210 c = next(i) 1211 if c == '\\': 1212 try: 1213 self._references(i, True) 1214 # Was not what we expected 1215 # Assume two single stars 1216 except DotException: 1217 pass 1218 except PathNameException: 1219 # Looks like escape was a valid slash 1220 # Store pattern accordingly 1221 value = globstar 1222 self.matchbase = False 1223 except StopIteration: 1224 # Escapes nothing, ignore and assume double star 1225 value = globstar 1226 elif c == '/': 1227 value = globstar 1228 self.matchbase = False 1229 1230 if value != globstar: 1231 i.rewind(i.index - index) 1232 except StopIteration: 1233 # Could not acquire directory slash due to no more characters 1234 # Use double star 1235 value = globstar 1236 1237 if self.after_start and value != globstar: 1238 value = self.need_char + value 1239 # Consume duplicate starts 1240 try: 1241 c = next(i) 1242 while c == '*': 1243 c = next(i) 1244 i.rewind(1) 1245 except StopIteration: 1246 pass 1247 1248 self.reset_dir_track() 1249 if value == globstar: 1250 sep = _GLOBSTAR_DIV.format(self.sep) 1251 # Check if the last entry was a `globstar` 1252 # If so, don't bother adding another. 1253 if current[-1] != sep: 1254 if current[-1] == '': 1255 # At the beginning of the pattern 1256 current[-1] = value 1257 else: 1258 # Replace the last path separator 1259 current[-1] = _NEED_SEP.format(self.sep) 1260 current.append(value) 1261 self.consume_path_sep(i) 1262 current.append(sep) 1263 self.set_start_dir() 1264 else: 1265 current.append(value) 1266 1267 def clean_up_inverse(self, current: List[str], nested: bool = False) -> None: 1268 """ 1269 Clean up current. 1270 1271 Python doesn't have variable lookbehinds, so we have to do negative lookaheads. 1272 !(...) when converted to regular expression is atomic, so once it matches, that's it. 1273 So we use the pattern `(?:(?!(?:stuff|to|exclude)<x>))[^/]*?)` where <x> is everything 1274 that comes after the negative group. `!(this|that)other` --> `(?:(?!(?:this|that)other))[^/]*?)`. 1275 1276 We have to update the list before | in nested cases: *(!(...)|stuff). Before we close a parent 1277 `extmatch`: `*(!(...))`. And of course on path separators (when path mode is on): `!(...)/stuff`. 1278 Lastly we make sure all is accounted for when finishing the pattern at the end. If there is nothing 1279 to store, we store `$`: `(?:(?!(?:this|that)$))[^/]*?)`. 1280 """ 1281 1282 if not self.inv_ext: 1283 return 1284 1285 index = len(current) - 1 1286 while index >= 0: 1287 if isinstance(current[index], InvPlaceholder): 1288 content = current[index + 1:] 1289 if not nested: 1290 content.append(_EOP if not self.pathname else self.path_eop) 1291 current[index] = ( 1292 (''.join(content).replace('(?#)', '?:') if self.capture else ''.join(content)) + 1293 (_EXCLA_GROUP_CLOSE.format(str(current[index]))) 1294 ) 1295 index -= 1 1296 self.inv_ext = 0 1297 1298 def parse_extend(self, c: str, i: util.StringIter, current: List[str], reset_dot: bool = False) -> bool: 1299 """Parse extended pattern lists.""" 1300 1301 # Save state 1302 temp_dir_start = self.dir_start 1303 temp_after_start = self.after_start 1304 temp_in_list = self.in_list 1305 temp_inv_ext = self.inv_ext 1306 temp_inv_nest = self.inv_nest 1307 self.in_list = True 1308 self.inv_nest = c == '!' 1309 1310 if reset_dot: 1311 self.match_dot_dir = False 1312 1313 # Start list parsing 1314 success = True 1315 index = i.index 1316 list_type = c 1317 extended = [] # type: List[str] 1318 1319 try: 1320 c = next(i) 1321 if c != '(': 1322 raise StopIteration 1323 1324 while c != ')': 1325 c = next(i) 1326 1327 if self.extend and c in EXT_TYPES and self.parse_extend(c, i, extended): 1328 # Nothing more to do 1329 pass 1330 elif c == '*': 1331 self._handle_star(i, extended) 1332 elif c == '.': 1333 self._handle_dot(i, extended) 1334 if self.after_start: 1335 self.match_dot_dir = self.dot and not self.nodotdir 1336 self.reset_dir_track() 1337 elif c == '?': 1338 extended.append(self._restrict_sequence() + _QMARK) 1339 elif c == '/': 1340 if self.pathname: 1341 extended.append(self._restrict_extended_slash()) 1342 extended.append(self.sep) 1343 elif c == "|": 1344 self.clean_up_inverse(extended, temp_inv_nest and self.inv_nest) 1345 extended.append(c) 1346 if temp_after_start: 1347 self.set_start_dir() 1348 elif c == '\\': 1349 try: 1350 extended.append(self._references(i)) 1351 except DotException: 1352 continue 1353 except StopIteration: 1354 # We've reached the end. 1355 # Do nothing because this is going to abort the `extmatch` anyways. 1356 pass 1357 elif c == '[': 1358 subindex = i.index 1359 try: 1360 extended.append(self._sequence(i)) 1361 except StopIteration: 1362 i.rewind(i.index - subindex) 1363 extended.append(r'\[') 1364 elif c != ')': 1365 extended.append(re.escape(c)) 1366 1367 self.update_dir_state() 1368 1369 if list_type == '?': 1370 current.append((_QMARK_CAPTURE_GROUP if self.capture else _QMARK_GROUP).format(''.join(extended))) 1371 elif list_type == '*': 1372 current.append((_STAR_CAPTURE_GROUP if self.capture else _STAR_GROUP).format(''.join(extended))) 1373 elif list_type == '+': 1374 current.append((_PLUS_CAPTURE_GROUP if self.capture else _PLUS_GROUP).format(''.join(extended))) 1375 elif list_type == '@': 1376 current.append((_CAPTURE_GROUP if self.capture else _GROUP).format(''.join(extended))) 1377 elif list_type == '!': 1378 self.inv_ext += 1 1379 # If pattern is at the end, anchor the match to the end. 1380 current.append((_EXCLA_CAPTURE_GROUP if self.capture else _EXCLA_GROUP).format(''.join(extended))) 1381 if self.pathname: 1382 if not temp_after_start or self.match_dot_dir: 1383 star = self.path_star 1384 elif temp_after_start and not self.dot: 1385 star = self.path_star_dot2 1386 else: 1387 star = self.path_star_dot1 1388 else: 1389 if not temp_after_start or self.dot: 1390 star = _STAR 1391 else: 1392 star = _NO_DOT + _STAR 1393 1394 if temp_after_start: 1395 star = self.need_char + star 1396 # Place holder for closing, but store the proper star 1397 # so we know which one to use 1398 current.append(InvPlaceholder(star)) 1399 1400 if temp_in_list: 1401 self.clean_up_inverse(current, temp_inv_nest and self.inv_nest) 1402 1403 except StopIteration: 1404 success = False 1405 self.inv_ext = temp_inv_ext 1406 i.rewind(i.index - index) 1407 1408 # Either restore if extend parsing failed, or reset if it worked 1409 if not temp_in_list: 1410 self.in_list = False 1411 if not temp_inv_nest: 1412 self.inv_nest = False 1413 1414 if success: 1415 self.reset_dir_track() 1416 else: 1417 self.dir_start = temp_dir_start 1418 self.after_start = temp_after_start 1419 1420 return success 1421 1422 def consume_path_sep(self, i: util.StringIter) -> None: 1423 """Consume any consecutive path separators as they count as one.""" 1424 1425 try: 1426 if self.bslash_abort: 1427 count = -1 1428 c = '\\' 1429 while c in ('\\', '/'): 1430 if c != '/' or count % 2: 1431 count += 1 1432 else: 1433 count += 2 1434 c = next(i) 1435 i.rewind(1) 1436 # Rewind one more if we have an odd number (escape): \\\* 1437 if count > 0 and count % 2: 1438 i.rewind(1) 1439 else: 1440 c = '/' 1441 while c == '/': 1442 c = next(i) 1443 i.rewind(1) 1444 except StopIteration: 1445 pass 1446 1447 def root(self, pattern: str, current: List[str]) -> None: 1448 """Start parsing the pattern.""" 1449 1450 self.set_after_start() 1451 i = util.StringIter(pattern) 1452 1453 root_specified = False 1454 if self.win_drive_detect: 1455 root_specified, drive, slash, end = _get_win_drive(pattern, True, self.case_sensitive) 1456 if drive is not None: 1457 current.append(drive) 1458 if slash: 1459 current.append(self.sep + _ONE_OR_MORE) 1460 i.advance(end) 1461 self.consume_path_sep(i) 1462 elif drive is None and root_specified: 1463 root_specified = True 1464 elif not self.win_drive_detect and self.pathname and pattern.startswith('/'): 1465 root_specified = True 1466 1467 if self.no_abs and root_specified: 1468 raise ValueError('The pattern must be a relative path pattern') 1469 1470 if root_specified: 1471 self.matchbase = False 1472 self.extmatchbase = False 1473 self.rtl = False 1474 1475 if not root_specified and self.realpath: 1476 current.append(_NO_WIN_ROOT if self.win_drive_detect else _NO_ROOT) 1477 current.append('') 1478 1479 for c in i: 1480 1481 index = i.index 1482 if self.extend and c in EXT_TYPES and self.parse_extend(c, i, current, True): 1483 # Nothing to do 1484 pass 1485 elif c == '.': 1486 self._handle_dot(i, current) 1487 elif c == '*': 1488 self._handle_star(i, current) 1489 elif c == '?': 1490 current.append(self._restrict_sequence() + _QMARK) 1491 elif c == '/': 1492 if self.pathname: 1493 self.set_start_dir() 1494 self.clean_up_inverse(current) 1495 current.append(self.sep + _ONE_OR_MORE) 1496 self.consume_path_sep(i) 1497 self.matchbase = False 1498 else: 1499 current.append(self.sep) 1500 elif c == '\\': 1501 index = i.index 1502 try: 1503 value = self._references(i) 1504 if self.dir_start: 1505 self.clean_up_inverse(current) 1506 self.consume_path_sep(i) 1507 self.matchbase = False 1508 current.append(value) 1509 except DotException: 1510 continue 1511 except StopIteration: 1512 # Escapes nothing, ignore 1513 i.rewind(i.index - index) 1514 elif c == '[': 1515 index = i.index 1516 try: 1517 current.append(self._sequence(i)) 1518 except StopIteration: 1519 i.rewind(i.index - index) 1520 current.append(re.escape(c)) 1521 else: 1522 current.append(re.escape(c)) 1523 1524 self.update_dir_state() 1525 1526 self.clean_up_inverse(current) 1527 1528 if self.pathname: 1529 current.append(_PATH_TRAIL.format(self.sep)) 1530 1531 def _parse(self, p: str) -> str: 1532 """Parse pattern.""" 1533 1534 result = [''] 1535 prepend = [''] 1536 1537 self.negative = False 1538 1539 if is_negative(p, self.flags): 1540 self.negative = True 1541 p = p[1:] 1542 1543 if self.negative: 1544 # TODO: Do we prevent `NODOTDIR` for negative patterns? 1545 self.globstar_capture = False 1546 self.dot = True 1547 1548 if self.anchor: 1549 p, number = (RE_ANCHOR if not self.win_drive_detect else RE_WIN_ANCHOR).subn('', p) 1550 if number: 1551 self.matchbase = False 1552 self.extmatchbase = False 1553 self.rtl = False 1554 1555 if self.matchbase or self.extmatchbase: 1556 globstar = self.globstar 1557 self.globstar = True 1558 self.root('**', prepend) 1559 self.globstar = globstar 1560 1561 elif self.rtl: 1562 # Add a `**` that can capture anything: dots, special directories, symlinks, etc. 1563 # We are simulating right to left, so everything on the left should be accepted without 1564 # question. 1565 globstar = self.globstar 1566 dot = self.dot 1567 gstar = self.path_gstar_dot1 1568 globstar_capture = self.globstar_capture 1569 self.path_gstar_dot1 = _PATH_GSTAR_RTL_MATCH 1570 self.dot = True 1571 self.globstar = True 1572 self.globstar_capture = False 1573 self.root('**', prepend) 1574 self.globstar = globstar 1575 self.dot = dot 1576 self.path_gstar_dot1 = gstar 1577 self.globstar_capture = globstar_capture 1578 1579 # We have an escape, but it escapes nothing 1580 if p == '\\': 1581 p = '' 1582 1583 if p: 1584 self.root(p, result) 1585 1586 if p and (self.matchbase or self.extmatchbase or self.rtl): 1587 result = prepend + result 1588 1589 case_flag = 'i' if not self.case_sensitive else '' 1590 pattern = r'^(?s{}:{})$'.format(case_flag, ''.join(result)) 1591 1592 if self.capture: 1593 # Strip out unnecessary regex comments 1594 pattern = pattern.replace('(?#)', '') 1595 1596 return pattern 1597 1598 def parse(self) -> AnyStr: 1599 """Parse pattern list.""" 1600 1601 if isinstance(self.pattern, bytes): 1602 pattern = self._parse(self.pattern.decode('latin-1')).encode('latin-1') 1603 else: 1604 pattern = self._parse(self.pattern) 1605 1606 return pattern 1607