1-- File created: 2008-10-10 13:29:26 2 3{-# LANGUAGE CPP #-} 4{-# LANGUAGE PatternGuards #-} 5 6module System.FilePath.Glob.Base 7 ( Token(..), Pattern(..) 8 9 , CompOptions(..), MatchOptions(..) 10 , compDefault, compPosix, matchDefault, matchPosix 11 12 , decompile 13 14 , compile 15 , compileWith, tryCompileWith 16 , tokenize -- for tests 17 18 , optimize 19 20 , liftP, tokToLower 21 22 , isLiteral 23 ) where 24 25import Control.Arrow (first) 26import Control.Monad.Trans.Class (lift) 27import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE) 28import Control.Monad.Trans.Writer.Strict (Writer, runWriter, tell) 29import Control.Exception (assert) 30import Data.Char (isDigit, isAlpha, toLower) 31import Data.List (find, sortBy) 32import Data.List.NonEmpty (toList) 33import Data.Maybe (fromMaybe) 34-- Monoid is re-exported from Prelude as of 4.8.0.0 35#if !MIN_VERSION_base(4,8,0) 36import Data.Monoid (Monoid, mappend, mempty, mconcat) 37#endif 38#if MIN_VERSION_base(4,11,0) 39import Data.Semigroup (sconcat, stimes) 40#else 41import Data.Semigroup (Semigroup, (<>), sconcat, stimes) 42#endif 43import Data.String (IsString(fromString)) 44import System.FilePath ( pathSeparator, extSeparator 45 , isExtSeparator, isPathSeparator 46 ) 47 48import System.FilePath.Glob.Utils ( dropLeadingZeroes 49 , isLeft, fromLeft 50 , increasingSeq 51 , addToRange, overlap 52 ) 53 54#if __GLASGOW_HASKELL__ 55import Text.Read (readPrec, lexP, parens, prec, Lexeme(Ident)) 56#endif 57 58data Token 59 -- primitives 60 = Literal !Char 61 | ExtSeparator -- . optimized away to Literal 62 | PathSeparator -- / 63 | NonPathSeparator -- ? 64 | CharRange !Bool [Either Char (Char,Char)] -- [] 65 | OpenRange (Maybe String) (Maybe String) -- <> 66 | AnyNonPathSeparator -- * 67 | AnyDirectory -- **/ 68 69 -- after optimization only 70 | LongLiteral !Int String 71 | Unmatchable -- [/], or [.] at the beginning or after a path separator 72 deriving (Eq) 73 74-- Note: CharRanges aren't converted, because this is tricky in general. 75-- Consider for instance [@-[], which includes the range A-Z. This would need 76-- to become [@[a-z]: so essentially we'd need to either: 77-- 78-- 1) Have a list of ranges of uppercase Unicode. Check if our range 79-- overlaps with any of them and if it does, take the non-overlapping 80-- part and combine it with the toLower of the overlapping part. 81-- 82-- 2) Simply expand the entire range to a list and map toLower over it. 83-- 84-- In either case we'd need to re-optimize the CharRange—we can't assume that 85-- if the uppercase characters are consecutive, so are the lowercase. 86-- 87-- 1) might be feasible if someone bothered to get the latest data. 88-- 89-- 2) obviously isn't since you might have 'Right (minBound, maxBound)' in 90-- there somewhere. 91-- 92-- The current solution is to just check both the toUpper of the character and 93-- the toLower. 94tokToLower :: Token -> Token 95tokToLower (Literal c) = Literal (toLower c) 96tokToLower (LongLiteral n s) = LongLiteral n (map toLower s) 97tokToLower tok = tok 98 99-- |An abstract data type representing a compiled pattern. 100-- 101-- Note that the 'Eq' instance cannot tell you whether two patterns behave in 102-- the same way; only whether they compile to the same 'Pattern'. For instance, 103-- @'compile' \"x\"@ and @'compile' \"[x]\"@ may or may not compare equal, 104-- though a @'match'@ will behave the exact same way no matter which 'Pattern' 105-- is used. 106newtype Pattern = Pattern { unPattern :: [Token] } deriving (Eq) 107 108liftP :: ([Token] -> [Token]) -> Pattern -> Pattern 109liftP f (Pattern pat) = Pattern (f pat) 110 111instance Show Token where 112 show (Literal c) 113 | c `elem` "*?[<" = ['[',c,']'] 114 | otherwise = assert (not $ isPathSeparator c) [c] 115 show ExtSeparator = [ extSeparator] 116 show PathSeparator = [pathSeparator] 117 show NonPathSeparator = "?" 118 show AnyNonPathSeparator = "*" 119 show AnyDirectory = "**/" 120 show (LongLiteral _ s) = concatMap (show . Literal) s 121 show (OpenRange a b) = 122 '<' : fromMaybe "" a ++ "-" ++ 123 fromMaybe "" b ++ ">" 124 125 -- We have to be careful here with ^ and ! lest [a!b] become [!ab]. So we 126 -- just put them at the end. 127 -- 128 -- Also, [^x-] was sorted and should not become [^-x]. 129 -- 130 -- Also, for something like [/!^] or /[.^!] that got optimized to have just ^ 131 -- and ! we need to add a dummy /. 132 show (CharRange b r) = 133 let f = either (:[]) (\(x,y) -> [x,'-',y]) 134 (caret,exclamation,fs) = 135 foldr (\c (ca,ex,ss) -> 136 case c of 137 Left '^' -> ("^",ex,ss) 138 Left '!' -> (ca,"!",ss) 139 _ -> (ca, ex,(f c ++) . ss) 140 ) 141 ("", "", id) 142 r 143 (beg,rest) = let s' = fs [] 144 (x,y) = splitAt 1 s' 145 in if not b && x == "-" 146 then (y,x) 147 else (s',"") 148 in concat [ "[" 149 , if b then "" else "^" 150 , if b && null beg && not (null caret && null exclamation) then "/" else "" 151 , beg, caret, exclamation, rest 152 , "]" 153 ] 154 155 show Unmatchable = "[.]" 156 157instance Show Pattern where 158 showsPrec d p = showParen (d > 10) $ 159 showString "compile " . showsPrec (d+1) (decompile p) 160 161instance Read Pattern where 162#if __GLASGOW_HASKELL__ 163 readPrec = parens . prec 10 $ do 164 Ident "compile" <- lexP 165 fmap compile readPrec 166#else 167 readsPrec d = readParen (d > 10) $ \r -> do 168 ("compile",string) <- lex r 169 (xs,rest) <- readsPrec (d+1) string 170 [(compile xs, rest)] 171#endif 172 173instance Semigroup Pattern where 174 Pattern a <> Pattern b = optimize $ Pattern (a <> b) 175 sconcat = optimize . Pattern . concatMap unPattern . toList 176 stimes n (Pattern a) = optimize $ Pattern (stimes n a) 177 178instance Monoid Pattern where 179 mempty = Pattern [] 180 mappend = (<>) 181 mconcat = optimize . Pattern . concatMap unPattern 182 183instance IsString Pattern where 184 fromString = compile 185 186-- |Options which can be passed to the 'tryCompileWith' or 'compileWith' 187-- functions: with these you can selectively toggle certain features at compile 188-- time. 189-- 190-- Note that some of these options depend on each other: classes can never 191-- occur if ranges aren't allowed, for instance. 192 193-- We could presumably put locale information in here, too. 194data CompOptions = CompOptions 195 { characterClasses :: Bool -- ^Allow character classes, @[[:...:]]@. 196 , characterRanges :: Bool -- ^Allow character ranges, @[...]@. 197 , numberRanges :: Bool -- ^Allow open ranges, @\<...>@. 198 , wildcards :: Bool -- ^Allow wildcards, @*@ and @?@. 199 , recursiveWildcards :: Bool -- ^Allow recursive wildcards, @**/@. 200 201 , pathSepInRanges :: Bool 202 -- ^Allow path separators in character ranges. 203 -- 204 -- If true, @a[/]b@ never matches anything (since character ranges can't 205 -- match path separators); if false and 'errorRecovery' is enabled, 206 -- @a[/]b@ matches itself, i.e. a file named @]b@ in the subdirectory 207 -- @a[@. 208 209 , errorRecovery :: Bool 210 -- ^If the input is invalid, recover by turning any invalid part into 211 -- literals. For instance, with 'characterRanges' enabled, @[abc@ is an 212 -- error by default (unclosed character range); with 'errorRecovery', the 213 -- @[@ is turned into a literal match, as though 'characterRanges' were 214 -- disabled. 215 } deriving (Show,Read,Eq) 216 217-- |The default set of compilation options: closest to the behaviour of the 218-- @zsh@ shell, with 'errorRecovery' enabled. 219-- 220-- All options are enabled. 221compDefault :: CompOptions 222compDefault = CompOptions 223 { characterClasses = True 224 , characterRanges = True 225 , numberRanges = True 226 , wildcards = True 227 , recursiveWildcards = True 228 , pathSepInRanges = True 229 , errorRecovery = True 230 } 231 232-- |Options for POSIX-compliance, as described in @man 7 glob@. 233-- 234-- 'numberRanges', 'recursiveWildcards', and 'pathSepInRanges' are disabled. 235compPosix :: CompOptions 236compPosix = CompOptions 237 { characterClasses = True 238 , characterRanges = True 239 , numberRanges = False 240 , wildcards = True 241 , recursiveWildcards = False 242 , pathSepInRanges = False 243 , errorRecovery = True 244 } 245 246-- |Options which can be passed to the 'matchWith' or 'globDirWith' functions: 247-- with these you can selectively toggle certain features at matching time. 248data MatchOptions = MatchOptions 249 { matchDotsImplicitly :: Bool 250 -- ^Allow @*@, @?@, and @**/@ to match @.@ at the beginning of paths. 251 252 , ignoreCase :: Bool 253 -- ^Case-independent matching. 254 255 , ignoreDotSlash :: Bool 256 -- ^Treat @./@ as a no-op in both paths and patterns. 257 -- 258 -- (Of course e.g. @../@ means something different and will not be 259 -- ignored.) 260 } 261 262-- |The default set of execution options: closest to the behaviour of the @zsh@ 263-- shell. 264-- 265-- Currently identical to 'matchPosix'. 266matchDefault :: MatchOptions 267matchDefault = matchPosix 268 269-- |Options for POSIX-compliance, as described in @man 7 glob@. 270-- 271-- 'ignoreDotSlash' is enabled, the rest are disabled. 272matchPosix :: MatchOptions 273matchPosix = MatchOptions 274 { matchDotsImplicitly = False 275 , ignoreCase = False 276 , ignoreDotSlash = True 277 } 278 279-- |Decompiles a 'Pattern' object into its textual representation: essentially 280-- the inverse of 'compile'. 281-- 282-- Note, however, that due to internal optimization, @decompile . compile@ is 283-- not the identity function. Instead, @compile . decompile@ is. 284-- 285-- Be careful with 'CompOptions': 'decompile' always produces a 'String' which 286-- can be passed to 'compile' to get back the same 'Pattern'. @compileWith 287-- options . decompile@ is /not/ the identity function unless @options@ is 288-- 'compDefault'. 289decompile :: Pattern -> String 290decompile = concatMap show . unPattern 291 292------------------------------------------ 293-- COMPILATION 294------------------------------------------ 295 296 297-- |Compiles a glob pattern from its textual representation into a 'Pattern' 298-- object. 299-- 300-- For the most part, a character matches itself. Recognized operators are as 301-- follows: 302-- 303-- [@?@] Matches any character except path separators. 304-- 305-- [@*@] Matches any number of characters except path separators, 306-- including the empty string. 307-- 308-- [@[..\]@] Matches any of the enclosed characters. Ranges of characters can 309-- be specified by separating the endpoints with a @\'-'@. @\'-'@ or 310-- @']'@ can be matched by including them as the first character(s) 311-- in the list. Never matches path separators: @[\/]@ matches 312-- nothing at all. Named character classes can also be matched: 313-- @[:x:]@ within @[]@ specifies the class named @x@, which matches 314-- certain predefined characters. See below for a full list. 315-- 316-- [@[^..\]@ or @[!..\]@] Like @[..]@, but matches any character /not/ listed. 317-- Note that @[^-x]@ is not the inverse of @[-x]@, but 318-- the range @[^-x]@. 319-- 320-- [@\<m-n>@] Matches any integer in the range m to n, inclusive. The range may 321-- be open-ended by leaving out either number: @\"\<->\"@, for 322-- instance, matches any integer. 323-- 324-- [@**/@] Matches any number of characters, including path separators, 325-- excluding the empty string. 326-- 327-- Supported character classes: 328-- 329-- [@[:alnum:\]@] Equivalent to @\"0-9A-Za-z\"@. 330-- 331-- [@[:alpha:\]@] Equivalent to @\"A-Za-z\"@. 332-- 333-- [@[:blank:\]@] Equivalent to @\"\\t \"@. 334-- 335-- [@[:cntrl:\]@] Equivalent to @\"\\0-\\x1f\\x7f\"@. 336-- 337-- [@[:digit:\]@] Equivalent to @\"0-9\"@. 338-- 339-- [@[:graph:\]@] Equivalent to @\"!-~\"@. 340-- 341-- [@[:lower:\]@] Equivalent to @\"a-z\"@. 342-- 343-- [@[:print:\]@] Equivalent to @\" -~\"@. 344-- 345-- [@[:punct:\]@] Equivalent to @\"!-\/:-\@[-`{-~\"@. 346-- 347-- [@[:space:\]@] Equivalent to @\"\\t-\\r \"@. 348-- 349-- [@[:upper:\]@] Equivalent to @\"A-Z\"@. 350-- 351-- [@[:xdigit:\]@] Equivalent to @\"0-9A-Fa-f\"@. 352-- 353-- Note that path separators (typically @\'/\'@) have to be matched explicitly 354-- or using the @**/@ pattern. In addition, extension separators (typically 355-- @\'.\'@) have to be matched explicitly at the beginning of the pattern or 356-- after any path separator. 357-- 358-- If a system supports multiple path separators, any one of them will match 359-- any of them. For instance, on Windows, @\'/\'@ will match itself as well as 360-- @\'\\\'@. 361-- 362-- Error recovery will be performed: erroneous operators will not be considered 363-- operators, but matched as literal strings. Such operators include: 364-- 365-- * An empty @[]@ or @[^]@ or @[!]@ 366-- 367-- * A @[@ or @\<@ without a matching @]@ or @>@ 368-- 369-- * A malformed @\<>@: e.g. nonnumeric characters or no hyphen 370-- 371-- So, e.g. @[]@ will match the string @\"[]\"@. 372compile :: String -> Pattern 373compile = compileWith compDefault 374 375-- |Like 'compile', but recognizes operators according to the given 376-- 'CompOptions' instead of the defaults. 377-- 378-- If an error occurs and 'errorRecovery' is disabled, 'error' will be called. 379compileWith :: CompOptions -> String -> Pattern 380compileWith opts = either error id . tryCompileWith opts 381 382-- |A safe version of 'compileWith'. 383-- 384-- If an error occurs and 'errorRecovery' is disabled, the error message will 385-- be returned in a 'Left'. 386tryCompileWith :: CompOptions -> String -> Either String Pattern 387tryCompileWith opts = fmap optimize . tokenize opts 388 389tokenize :: CompOptions -> String -> Either String Pattern 390tokenize opts = fmap Pattern . sequence . go 391 where 392 err _ c cs | errorRecovery opts = Right (Literal c) : go cs 393 err s _ _ = [Left s] 394 395 go :: String -> [Either String Token] 396 go [] = [] 397 go ('?':cs) | wcs = Right NonPathSeparator : go cs 398 go ('*':cs) | wcs = 399 case cs of 400 '*':p:xs | rwcs && isPathSeparator p 401 -> Right AnyDirectory : go xs 402 _ -> Right AnyNonPathSeparator : go cs 403 404 go ('[':cs) | crs = let (range,rest) = charRange opts cs 405 in case range of 406 Left s -> err s '[' cs 407 r -> r : go rest 408 409 go ('<':cs) | ors = 410 let (range, rest) = break (=='>') cs 411 in if null rest 412 then err "compile :: unclosed <> in pattern" '<' cs 413 else case openRange range of 414 Left s -> err s '<' cs 415 r -> r : go (tail rest) 416 go (c:cs) 417 | isPathSeparator c = Right PathSeparator : go cs 418 | isExtSeparator c = Right ExtSeparator : go cs 419 | otherwise = Right (Literal c) : go cs 420 421 wcs = wildcards opts 422 rwcs = recursiveWildcards opts 423 crs = characterRanges opts 424 ors = numberRanges opts 425 426-- <a-b> where a > b can never match anything; this is not considered an error 427openRange :: String -> Either String Token 428openRange ['-'] = Right $ OpenRange Nothing Nothing 429openRange ('-':s) = 430 case span isDigit s of 431 (b,"") -> Right $ OpenRange Nothing (openRangeNum b) 432 _ -> Left $ "compile :: bad <>, expected number, got " ++ s 433openRange s = 434 case span isDigit s of 435 (a,"-") -> Right $ OpenRange (openRangeNum a) Nothing 436 (a,'-':s') -> 437 case span isDigit s' of 438 (b,"") -> Right $ OpenRange (openRangeNum a) (openRangeNum b) 439 _ -> Left $ "compile :: bad <>, expected number, got " ++ s' 440 _ -> Left $ "compile :: bad <>, expected number followed by - in " ++ s 441 442openRangeNum :: String -> Maybe String 443openRangeNum = Just . dropLeadingZeroes 444 445type CharRange = [Either Char (Char,Char)] 446 447charRange :: CompOptions -> String -> (Either String Token, String) 448charRange opts zs = 449 case zs of 450 y:ys | y `elem` "^!" -> 451 case ys of 452 -- [!-#] is not the inverse of [-#], it is the range ! through 453 -- # 454 '-':']':xs -> (Right (CharRange False [Left '-']), xs) 455 '-' :_ -> first (fmap (CharRange True )) (start zs) 456 xs -> first (fmap (CharRange False)) (start xs) 457 _ -> first (fmap (CharRange True )) (start zs) 458 where 459 start :: String -> (Either String CharRange, String) 460 start (']':xs) = run $ char ']' xs 461 start ('-':xs) = run $ char '-' xs 462 start xs = run $ go xs 463 464 run :: ExceptT String (Writer CharRange) String 465 -> (Either String CharRange, String) 466 run m = case runWriter.runExceptT $ m of 467 (Left err, _) -> (Left err, []) 468 (Right rest, cs) -> (Right cs, rest) 469 470 go :: String -> ExceptT String (Writer CharRange) String 471 go ('[':':':xs) | characterClasses opts = readClass xs 472 go ( ']':xs) = return xs 473 go ( c:xs) = 474 if not (pathSepInRanges opts) && isPathSeparator c 475 then throwE "compile :: path separator within []" 476 else char c xs 477 go [] = throwE "compile :: unclosed [] in pattern" 478 479 char :: Char -> String -> ExceptT String (Writer CharRange) String 480 char c ('-':x:xs) = 481 if x == ']' 482 then ltell [Left c, Left '-'] >> return xs 483 else ltell [Right (c,x)] >> go xs 484 485 char c xs = ltell [Left c] >> go xs 486 487 readClass :: String -> ExceptT String (Writer CharRange) String 488 readClass xs = let (name,end) = span isAlpha xs 489 in case end of 490 ':':']':rest -> charClass name >> go rest 491 _ -> ltell [Left '[',Left ':'] >> go xs 492 493 charClass :: String -> ExceptT String (Writer CharRange) () 494 charClass name = 495 -- The POSIX classes 496 -- 497 -- TODO: this is ASCII-only, not sure how this should be extended 498 -- Unicode, or with a locale as input, or something else? 499 case name of 500 "alnum" -> ltell [digit,upper,lower] 501 "alpha" -> ltell [upper,lower] 502 "blank" -> ltell blanks 503 "cntrl" -> ltell [Right ('\0','\x1f'), Left '\x7f'] 504 "digit" -> ltell [digit] 505 "graph" -> ltell [Right ('!','~')] 506 "lower" -> ltell [lower] 507 "print" -> ltell [Right (' ','~')] 508 "punct" -> ltell punct 509 "space" -> ltell spaces 510 "upper" -> ltell [upper] 511 "xdigit" -> ltell [digit, Right ('A','F'), Right ('a','f')] 512 _ -> 513 throwE ("compile :: unknown character class '" ++name++ "'") 514 515 digit = Right ('0','9') 516 upper = Right ('A','Z') 517 lower = Right ('a','z') 518 punct = map Right [('!','/'), (':','@'), ('[','`'), ('{','~')] 519 blanks = [Left '\t', Left ' '] 520 spaces = [Right ('\t','\r'), Left ' '] 521 522 ltell = lift . tell 523 524 525------------------------------------------ 526-- OPTIMIZATION 527------------------------------------------ 528 529 530optimize :: Pattern -> Pattern 531optimize (Pattern pat) = 532 Pattern . fin $ 533 case pat of 534 e : ts | e == ExtSeparator || e == Literal '.' -> 535 checkUnmatchable (Literal '.' :) (go ts) 536 _ -> 537 -- Handle the case where the whole pattern starts with a 538 -- now-literalized [.]. LongLiterals haven't been created yet so 539 -- checking for Literal suffices. 540 case go pat of 541 Literal '.' : _ -> [Unmatchable] 542 opat -> checkUnmatchable id opat 543 where 544 fin [] = [] 545 546 -- Literals to LongLiteral 547 -- Has to be done here: we can't backtrack in go, but some cases might 548 -- result in consecutive Literals being generated. 549 -- E.g. "a[b]". 550 fin (x:y:xs) | Just x' <- isCharLiteral x, Just y' <- isCharLiteral y = 551 let (ls,rest) = spanMaybe isCharLiteral xs 552 in fin $ LongLiteral (length ls + 2) 553 (foldr (\a -> (a:)) [] (x':y':ls)) 554 : rest 555 556 -- concatenate LongLiterals 557 -- Has to be done here because LongLiterals are generated above. 558 -- 559 -- So one could say that we have one pass (go) which flattens everything as 560 -- much as it can and one pass (fin) which concatenates what it can. 561 fin (LongLiteral l1 s1 : LongLiteral l2 s2 : xs) = 562 fin $ LongLiteral (l1+l2) (s1++s2) : xs 563 564 fin (LongLiteral l s : Literal c : xs) = 565 fin $ LongLiteral (l+1) (s++[c]) : xs 566 567 fin (LongLiteral 1 s : xs) = Literal (head s) : fin xs 568 569 fin (Literal c : LongLiteral l s : xs) = 570 fin $ LongLiteral (l+1) (c:s) : xs 571 572 fin (x:xs) = x : fin xs 573 574 go [] = [] 575 576 -- Get rid of ExtSeparators, so that they can hopefully be combined into 577 -- LongLiterals later. 578 -- 579 -- /. -> fine 580 -- . elsewhere -> fine 581 -- /[.] -> Unmatchable 582 -- [.] at start of pattern -> handled outside 'go' 583 go (p@PathSeparator : ExtSeparator : xs) = p : Literal '.' : go xs 584 go (ExtSeparator : xs) = Literal '.' : go xs 585 go (p@PathSeparator : x@(CharRange _ _) : xs) = 586 p : case optimizeCharRange True x of 587 x'@(CharRange _ _) -> x' : go xs 588 Literal '.' -> [Unmatchable] 589 x' -> go (x':xs) 590 591 go (x@(CharRange _ _) : xs) = 592 case optimizeCharRange False x of 593 x'@(CharRange _ _) -> x' : go xs 594 x' -> go (x':xs) 595 596 -- Put [0-9] in front of <-> to allow compressing <->[0-9]<->. Handling the 597 -- [0-9] first in matching should also be faster in general. 598 go (o@(OpenRange Nothing Nothing) : d : xs) | d == anyDigit = 599 d : go (o : xs) 600 601 go (x:xs) = 602 case find ((== x) . fst) compressables of 603 Just (_, f) -> let (compressed,ys) = span (== x) xs 604 in if null compressed 605 then x : go ys 606 else f (length compressed) ++ go (x : ys) 607 Nothing -> x : go xs 608 609 checkUnmatchable f ts = if Unmatchable `elem` ts then [Unmatchable] else f ts 610 611 compressables = [ (AnyNonPathSeparator, const []) 612 , (AnyDirectory, const []) 613 , (OpenRange Nothing Nothing, \n -> replicate n anyDigit) 614 ] 615 616 isCharLiteral (Literal x) = Just x 617 isCharLiteral _ = Nothing 618 619 anyDigit = CharRange True [Right ('0', '9')] 620 621-- | Like 'span', but let's use a -> Maybe b predicate 622spanMaybe :: (a -> Maybe b) -> [a] -> ([b], [a]) 623spanMaybe f = go 624 where 625 go xs@[] = ([], xs) 626 go xs@(x : xs') = case f x of 627 Nothing -> ([], xs) 628 Just y -> let (ys, zs) = go xs' in (y : ys, zs) 629 630optimizeCharRange :: Bool -> Token -> Token 631optimizeCharRange precededBySlash (CharRange b rs) = 632 fin . stripUnmatchable . go . sortCharRange $ rs 633 where 634 -- [/] is interesting, it actually matches nothing at all 635 -- [.] can be Literalized though, just don't make it into an ExtSeparator so 636 -- that it doesn't match a leading dot 637 fin [Left c] | b = if isPathSeparator c then Unmatchable else Literal c 638 fin [Right r] | b && r == (minBound,maxBound) = NonPathSeparator 639 fin x = CharRange b x 640 641 stripUnmatchable xs@(_:_:_) | b = 642 filter (\x -> (not precededBySlash || x /= Left '.') && x /= Left '/') xs 643 stripUnmatchable xs = xs 644 645 go [] = [] 646 647 go (x@(Left c) : xs) = 648 case xs of 649 [] -> [x] 650 y@(Left d) : ys 651 -- [aaaaa] -> [a] 652 | c == d -> go$ Left c : ys 653 | d == succ c -> 654 let (ls,rest) = span isLeft xs -- start from y 655 (catable,others) = increasingSeq (map fromLeft ls) 656 range = (c, head catable) 657 658 in -- three (or more) Lefts make a Right 659 if null catable || null (tail catable) 660 then x : y : go ys 661 -- [abcd] -> [a-d] 662 else go$ Right range : map Left others ++ rest 663 664 | otherwise -> x : go xs 665 666 Right r : ys -> 667 case addToRange r c of 668 -- [da-c] -> [a-d] 669 Just r' -> go$ Right r' : ys 670 Nothing -> x : go xs 671 672 go (x@(Right r) : xs) = 673 case xs of 674 [] -> [x] 675 Left c : ys -> 676 case addToRange r c of 677 -- [a-cd] -> [a-d] 678 Just r' -> go$ Right r' : ys 679 Nothing -> x : go xs 680 681 Right r' : ys -> 682 case overlap r r' of 683 -- [a-cb-d] -> [a-d] 684 Just o -> go$ Right o : ys 685 Nothing -> x : go xs 686optimizeCharRange _ _ = error "Glob.optimizeCharRange :: internal error" 687 688sortCharRange :: [Either Char (Char,Char)] -> [Either Char (Char,Char)] 689sortCharRange = sortBy cmp 690 where 691 cmp (Left a) (Left b) = compare a b 692 cmp (Left a) (Right (b,_)) = compare a b 693 cmp (Right (a,_)) (Left b) = compare a b 694 cmp (Right (a,_)) (Right (b,_)) = compare a b 695 696-- |Returns `True` iff the given `Pattern` is a literal file path, i.e. it has 697-- no wildcards, character ranges, etc. 698isLiteral :: Pattern -> Bool 699isLiteral = all lit . unPattern 700 where 701 lit (Literal _) = True 702 lit (LongLiteral _ _) = True 703 lit PathSeparator = True 704 lit _ = False 705