1-- File created: 2008-10-16 12:12:50 2 3module System.FilePath.Glob.Directory 4 ( GlobOptions(..), globDefault 5 , globDir, globDirWith, globDir1, glob 6 , commonDirectory 7 ) where 8 9import Control.Arrow (first, second) 10import Control.Monad (forM) 11import qualified Data.DList as DL 12import Data.DList (DList) 13import Data.List ((\\), find) 14import System.Directory ( doesDirectoryExist, getDirectoryContents 15 , getCurrentDirectory 16 ) 17import System.FilePath ( (</>), takeDrive, splitDrive 18 , isExtSeparator 19 , pathSeparator, isPathSeparator 20 , takeDirectory 21 ) 22 23import System.FilePath.Glob.Base ( Pattern(..), Token(..) 24 , MatchOptions, matchDefault 25 , compile 26 ) 27import System.FilePath.Glob.Match (matchWith) 28import System.FilePath.Glob.Utils ( getRecursiveContents 29 , nubOrd 30 , pathParts 31 , partitionDL, tailDL 32 , catchIO 33 ) 34-- |Options which can be passed to the 'globDirWith' function. 35data GlobOptions = GlobOptions 36 { matchOptions :: MatchOptions 37 -- ^Options controlling how matching is performed; see 'MatchOptions'. 38 , includeUnmatched :: Bool 39 -- ^Whether to include unmatched files in the result. 40 } 41 42-- |The default set of globbing options: uses the default matching options, and 43-- does not include unmatched files. 44globDefault :: GlobOptions 45globDefault = GlobOptions matchDefault False 46 47-- The Patterns in TypedPattern don't contain PathSeparator or AnyDirectory 48-- 49-- We store the number of PathSeparators that Dir and AnyDir were followed by 50-- so that "foo////*" can match "foo/bar" but return "foo////bar". It's the 51-- exact number for convenience: (</>) doesn't add a path separator if one is 52-- already there. This way, '\(Dir n _) -> replicate n pathSeparator </> "bar"' 53-- results in the correct amount of slashes. 54data TypedPattern 55 = Any Pattern -- pattern 56 | Dir Int Pattern -- pattern/ 57 | AnyDir Int Pattern -- pattern**/ 58 deriving Show 59 60-- |Matches each given 'Pattern' against the contents of the given 'FilePath', 61-- recursively. The result contains the matched paths, grouped for each given 62-- 'Pattern'. The results are not in any defined order. 63-- 64-- The given directory is prepended to all the matches: the returned paths are 65-- all valid from the point of view of the current working directory. 66-- 67-- If multiple 'Pattern's match a single 'FilePath', that path will be included 68-- in multiple groups. 69-- 70-- Two 'FilePath's which can be canonicalized to the same file (e.g. @\"foo\"@ 71-- and @\"./foo\"@) may appear separately if explicit matching on paths 72-- beginning with @\".\"@ is done. Looking for @\".*/*\"@, for instance, will 73-- cause @\"./foo\"@ to return as a match but @\"foo\"@ to not be matched. 74-- 75-- This function is different from a simple 'filter' over all the contents of 76-- the directory: the matching is performed relative to the directory, so that 77-- for instance the following is true: 78-- 79-- > fmap head (globDir [compile "*"] dir) == getDirectoryContents dir 80-- 81-- (With the exception that that glob won't match anything beginning with @.@.) 82-- 83-- If the given 'FilePath' is @[]@, 'getCurrentDirectory' will be used. 84-- 85-- If the given 'Pattern' starts with a drive (as defined by 86-- 'System.FilePath'), it is not relative to the given directory and the 87-- 'FilePath' parameter is completely ignored! Similarly, if the given 88-- 'Pattern' starts with a path separator, only the drive part of the 89-- 'FilePath' is used. On Posix systems these behaviours are equivalent: 90-- 'Pattern's starting with @\/@ work relative to @\/@. On Windows, 'Pattern's 91-- starting with @\/@ or @\\@ work relative only to the drive part of the 92-- 'FilePath' and 'Pattern's starting with absolute paths ignore the 93-- 'FilePath'. 94-- 95-- Note that in some cases results outside the given directory may be returned: 96-- for instance the @.*@ pattern matches the @..@ directory. 97-- 98-- Any results deeper than in the given directory are enumerated lazily, using 99-- 'unsafeInterleaveIO'. 100-- 101-- Directories without read permissions are returned as entries but their 102-- contents, of course, are not. 103globDir :: [Pattern] -> FilePath -> IO [[FilePath]] 104globDir pats dir = fmap fst (globDirWith globDefault pats dir) 105 106-- |Like 'globDir', but applies the given 'GlobOptions' instead of the 107-- defaults when matching. The first component of the returned tuple contains 108-- the matched paths, grouped for each given 'Pattern', and the second contains 109-- Just the unmatched paths if the given 'GlobOptions' specified that unmatched 110-- files should be included, or otherwise Nothing. 111globDirWith :: GlobOptions -> [Pattern] -> FilePath 112 -> IO ([[FilePath]], Maybe [FilePath]) 113globDirWith opts [pat] dir | not (includeUnmatched opts) = 114 -- This is an optimization for the case where only one pattern has been 115 -- passed and we are not including unmatched files: we can use 116 -- 'commonDirectory' to avoid some calls to 'getDirectoryContents'. 117 let (prefix, pat') = commonDirectory pat 118 in globDirWith' opts [pat'] (dir </> prefix) 119 120globDirWith opts pats dir = 121 globDirWith' opts pats dir 122 123-- See 'globDirWith'. 124globDirWith' :: GlobOptions -> [Pattern] -> FilePath 125 -> IO ([[FilePath]], Maybe [FilePath]) 126globDirWith' opts [] dir = 127 if includeUnmatched opts 128 then do 129 dir' <- if null dir then getCurrentDirectory else return dir 130 c <- getRecursiveContents dir' 131 return ([], Just (DL.toList c)) 132 else 133 return ([], Nothing) 134 135globDirWith' opts pats@(_:_) dir = do 136 results <- mapM (\p -> globDir'0 opts p dir) pats 137 138 let (matches, others) = unzip results 139 allMatches = DL.toList . DL.concat $ matches 140 allOthers = DL.toList . DL.concat $ others 141 142 return ( map DL.toList matches 143 , if includeUnmatched opts 144 then Just (nubOrd allOthers \\ allMatches) 145 else Nothing 146 ) 147 148-- |A convenience wrapper on top of 'globDir', for when you only have one 149-- 'Pattern' you care about. Returns only the matched paths. 150globDir1 :: Pattern -> FilePath -> IO [FilePath] 151globDir1 p = fmap head . globDir [p] 152 153-- |The simplest IO function. Finds matches to the given pattern in the current 154-- working directory. Takes a 'String' instead of a 'Pattern' to avoid the need 155-- for a call to 'compile', simplifying usage further. 156-- 157-- Can also be seen as a convenience wrapper on top of 'globDir1', for when you 158-- want to work in the current directory or have a pattern referring to an 159-- absolute path. 160glob :: String -> IO [FilePath] 161glob = flip globDir1 "" . compile 162 163globDir'0 :: GlobOptions -> Pattern -> FilePath 164 -> IO (DList FilePath, DList FilePath) 165globDir'0 opts pat dir = do 166 let (pat', drive) = driveSplit pat 167 dir' <- case drive of 168 Just "" -> fmap takeDrive getCurrentDirectory 169 Just d -> return d 170 Nothing -> if null dir then getCurrentDirectory else return dir 171 globDir' opts (separate pat') dir' 172 173globDir' :: GlobOptions -> [TypedPattern] -> FilePath 174 -> IO (DList FilePath, DList FilePath) 175globDir' opts pats@(_:_) dir = do 176 entries <- getDirectoryContents dir `catchIO` const (return []) 177 178 results <- forM entries $ \e -> matchTypedAndGo opts pats e (dir </> e) 179 180 let (matches, others) = unzip results 181 182 return (DL.concat matches, DL.concat others) 183 184globDir' _ [] dir = 185 -- We can only get here from matchTypedAndGo getting a [Dir _]: it means the 186 -- original pattern had a trailing PathSeparator. Reproduce it here. 187 return (DL.singleton (dir ++ [pathSeparator]), DL.empty) 188 189matchTypedAndGo :: GlobOptions 190 -> [TypedPattern] 191 -> FilePath -> FilePath 192 -> IO (DList FilePath, DList FilePath) 193 194-- (Any p) is always the last element 195matchTypedAndGo opts [Any p] path absPath = 196 if matchWith (matchOptions opts) p path 197 then return (DL.singleton absPath, DL.empty) 198 else doesDirectoryExist absPath >>= didNotMatch opts path absPath 199 200matchTypedAndGo opts (Dir n p:ps) path absPath = do 201 isDir <- doesDirectoryExist absPath 202 if isDir && matchWith (matchOptions opts) p path 203 then globDir' opts ps (absPath ++ replicate n pathSeparator) 204 else didNotMatch opts path absPath isDir 205 206matchTypedAndGo opts (AnyDir n p:ps) path absPath = 207 if path `elem` [".",".."] 208 then didNotMatch opts path absPath True 209 else do 210 isDir <- doesDirectoryExist absPath 211 let m = matchWith (matchOptions opts) (unseparate ps) 212 unconditionalMatch = 213 null (unPattern p) && not (isExtSeparator $ head path) 214 p' = Pattern (unPattern p ++ [AnyNonPathSeparator]) 215 216 case unconditionalMatch || matchWith (matchOptions opts) p' path of 217 True | isDir -> do 218 contents <- getRecursiveContents absPath 219 return $ 220 -- foo**/ should match foo/ and nothing below it 221 -- relies on head contents == absPath 222 if null ps 223 then ( DL.singleton $ 224 DL.head contents 225 ++ replicate n pathSeparator 226 , tailDL contents 227 ) 228 else let (matches, nonMatches) = 229 partitionDL fst 230 (fmap (recursiveMatch n m) contents) 231 in (fmap snd matches, fmap snd nonMatches) 232 233 True | m path -> 234 return ( DL.singleton $ 235 takeDirectory absPath 236 ++ replicate n pathSeparator 237 ++ path 238 , DL.empty 239 ) 240 _ -> 241 didNotMatch opts path absPath isDir 242 243matchTypedAndGo _ _ _ _ = error "Glob.matchTypedAndGo :: internal error" 244 245-- To be called to check whether a filepath matches the part of a pattern 246-- following an **/ (AnyDirectory) token and reconstruct the filepath with the 247-- correct number of slashes. Arguments are: 248-- 249-- * Int: number of slashes in the AnyDirectory token, i.e. 1 for **/, 2 for 250-- **//, and so on 251-- 252-- * FilePath -> Bool: matching function for the remainder of the pattern, to 253-- determine whether the rest of the filepath following the AnyDirectory token 254-- matches 255-- 256-- * FilePath: the (entire) filepath to be checked: some file which is in a 257-- subdirectory of a directory which matches the prefix of the pattern up to 258-- the AnyDirectory token. 259-- 260-- The returned tuple contains both the result, where True means the filepath 261-- matches and should be included in the resulting list of matching files, and 262-- False otherwise. We also include the filepath in the returned tuple, because 263-- this function also takes care of including the correct number of slashes 264-- in the result. For example, with a pattern **//foo/bar.txt, this function 265-- would ensure that, if dir/foo/bar.txt exists, it would be returned as 266-- dir//foo/bar.txt. 267recursiveMatch :: Int -> (FilePath -> Bool) -> FilePath -> (Bool, FilePath) 268recursiveMatch n isMatch path = 269 case find isMatch (pathParts path) of 270 Just matchedSuffix -> 271 let dir = take (length path - length matchedSuffix) path 272 in ( True 273 , dir 274 ++ replicate (n-1) pathSeparator 275 ++ matchedSuffix 276 ) 277 Nothing -> 278 (False, path) 279 280-- To be called when a pattern didn't match a path: given the path and whether 281-- it was a directory, return all paths which didn't match (i.e. for a file, 282-- just the file, and for a directory, everything inside it). 283didNotMatch :: GlobOptions -> FilePath -> FilePath -> Bool 284 -> IO (DList FilePath, DList FilePath) 285didNotMatch opts path absPath isDir = 286 if includeUnmatched opts 287 then fmap ((,) DL.empty) $ 288 if isDir 289 then if path `elem` [".",".."] 290 then return DL.empty 291 else getRecursiveContents absPath 292 else return$ DL.singleton absPath 293 else 294 return (DL.empty, DL.empty) 295 296separate :: Pattern -> [TypedPattern] 297separate = go DL.empty . unPattern 298 where 299 go gr [] | null (DL.toList gr) = [] 300 go gr [] = [Any (pat gr)] 301 go gr (PathSeparator:ps) = slash gr Dir ps 302 go gr ( AnyDirectory:ps) = slash gr AnyDir ps 303 go gr ( p:ps) = go (gr `DL.snoc` p) ps 304 305 pat = Pattern . DL.toList 306 307 slash gr f ps = let (n,ps') = first length . span isSlash $ ps 308 in f (n+1) (pat gr) : go DL.empty ps' 309 310 isSlash PathSeparator = True 311 isSlash _ = False 312 313unseparate :: [TypedPattern] -> Pattern 314unseparate = Pattern . foldr f [] 315 where 316 f (AnyDir n p) ts = u p ++ AnyDirectory : replicate (n-1) PathSeparator ++ ts 317 f ( Dir n p) ts = u p ++ replicate n PathSeparator ++ ts 318 f (Any p) ts = u p ++ ts 319 320 u = unPattern 321 322-- Note that we consider "/foo" to specify a drive on Windows, even though it's 323-- relative to the current drive. 324-- 325-- Returns the [TypedPattern] of the Pattern (with the drive dropped if 326-- appropriate) and, if the Pattern specified a drive, a Maybe representing the 327-- drive to use. If it's a Just "", use the drive of the current working 328-- directory. 329driveSplit :: Pattern -> (Pattern, Maybe FilePath) 330driveSplit = check . split . unPattern 331 where 332 -- We can't just use something like commonDirectory because of Windows 333 -- drives being possibly longer than one "directory", like "//?/foo/bar/". 334 -- So just take as much as possible. 335 split (LongLiteral _ l : xs) = first (l++) (split xs) 336 split ( Literal l : xs) = first (l:) (split xs) 337 split (PathSeparator : xs) = first (pathSeparator:) (split xs) 338 split xs = ([],xs) 339 340 -- The isPathSeparator check is interesting in two ways: 341 -- 342 -- 1. It's correct to return simply Just "" because there can't be more than 343 -- one path separator if splitDrive gave a null drive: "//x" is a shared 344 -- "drive" in Windows and starts with the root "drive" in Posix. 345 -- 346 -- 2. The 'head' is safe because we have not (null d) && null drive. 347 check (d,ps) 348 | null d = (Pattern ps, Nothing) 349 | not (null drive) = (dirify rest ps, Just drive) 350 | isPathSeparator (head rest) = (Pattern ps, Just "") 351 | otherwise = (dirify d ps, Nothing) 352 where 353 (drive, rest) = splitDrive d 354 355 dirify path = Pattern . (comp path++) 356 357 comp s = let (p,l) = foldr f ([],[]) s in if null l then p else ll l p 358 where 359 f c (p,l) | isExtSeparator c = (Literal '.' : ll l p, []) 360 | isPathSeparator c = (PathSeparator : ll l p, []) 361 | otherwise = (p, c:l) 362 363 ll l p = if null l then p else LongLiteral (length l) l : p 364 365-- |Factors out the directory component of a 'Pattern'. Useful in conjunction 366-- with 'globDir'. 367-- 368-- Preserves the number of path separators: @commonDirectory (compile 369-- \"foo\/\/\/bar\")@ becomes @(\"foo\/\/\/\", compile \"bar\")@. 370commonDirectory :: Pattern -> (FilePath, Pattern) 371commonDirectory = second unseparate . splitP . separate 372 where 373 splitP pt@(Dir n p:ps) = 374 case fromConst DL.empty (unPattern p) of 375 Just d -> first ((d ++ replicate n pathSeparator) </>) (splitP ps) 376 Nothing -> ("", pt) 377 378 splitP pt = ("", pt) 379 380 fromConst d [] = Just (DL.toList d) 381 fromConst d (Literal c :xs) = fromConst (d `DL.snoc` c) xs 382 fromConst d (LongLiteral _ s:xs) = fromConst (d `DL.append`DL.fromList s) xs 383 fromConst _ _ = Nothing 384