1-- File created: 2008-10-16 12:12:50
2
3module System.FilePath.Glob.Directory
4   ( GlobOptions(..), globDefault
5   , globDir, globDirWith, globDir1, glob
6   , commonDirectory
7   ) where
8
9import Control.Arrow    (first, second)
10import Control.Monad    (forM)
11import qualified Data.DList as DL
12import Data.DList       (DList)
13import Data.List        ((\\), find)
14import System.Directory ( doesDirectoryExist, getDirectoryContents
15                        , getCurrentDirectory
16                        )
17import System.FilePath  ( (</>), takeDrive, splitDrive
18                        , isExtSeparator
19                        , pathSeparator, isPathSeparator
20                        , takeDirectory
21                        )
22
23import System.FilePath.Glob.Base  ( Pattern(..), Token(..)
24                                  , MatchOptions, matchDefault
25                                  , compile
26                                  )
27import System.FilePath.Glob.Match (matchWith)
28import System.FilePath.Glob.Utils ( getRecursiveContents
29                                  , nubOrd
30                                  , pathParts
31                                  , partitionDL, tailDL
32                                  , catchIO
33                                  )
34-- |Options which can be passed to the 'globDirWith' function.
35data GlobOptions = GlobOptions
36  { matchOptions :: MatchOptions
37  -- ^Options controlling how matching is performed; see 'MatchOptions'.
38  , includeUnmatched :: Bool
39  -- ^Whether to include unmatched files in the result.
40  }
41
42-- |The default set of globbing options: uses the default matching options, and
43-- does not include unmatched files.
44globDefault :: GlobOptions
45globDefault = GlobOptions matchDefault False
46
47-- The Patterns in TypedPattern don't contain PathSeparator or AnyDirectory
48--
49-- We store the number of PathSeparators that Dir and AnyDir were followed by
50-- so that "foo////*" can match "foo/bar" but return "foo////bar". It's the
51-- exact number for convenience: (</>) doesn't add a path separator if one is
52-- already there. This way, '\(Dir n _) -> replicate n pathSeparator </> "bar"'
53-- results in the correct amount of slashes.
54data TypedPattern
55   = Any Pattern        -- pattern
56   | Dir Int Pattern    -- pattern/
57   | AnyDir Int Pattern -- pattern**/
58   deriving Show
59
60-- |Matches each given 'Pattern' against the contents of the given 'FilePath',
61-- recursively. The result contains the matched paths, grouped for each given
62-- 'Pattern'. The results are not in any defined order.
63--
64-- The given directory is prepended to all the matches: the returned paths are
65-- all valid from the point of view of the current working directory.
66--
67-- If multiple 'Pattern's match a single 'FilePath', that path will be included
68-- in multiple groups.
69--
70-- Two 'FilePath's which can be canonicalized to the same file (e.g. @\"foo\"@
71-- and @\"./foo\"@) may appear separately if explicit matching on paths
72-- beginning with @\".\"@ is done. Looking for @\".*/*\"@, for instance, will
73-- cause @\"./foo\"@ to return as a match but @\"foo\"@ to not be matched.
74--
75-- This function is different from a simple 'filter' over all the contents of
76-- the directory: the matching is performed relative to the directory, so that
77-- for instance the following is true:
78--
79-- > fmap head (globDir [compile "*"] dir) == getDirectoryContents dir
80--
81-- (With the exception that that glob won't match anything beginning with @.@.)
82--
83-- If the given 'FilePath' is @[]@, 'getCurrentDirectory' will be used.
84--
85-- If the given 'Pattern' starts with a drive (as defined by
86-- 'System.FilePath'), it is not relative to the given directory and the
87-- 'FilePath' parameter is completely ignored! Similarly, if the given
88-- 'Pattern' starts with a path separator, only the drive part of the
89-- 'FilePath' is used. On Posix systems these behaviours are equivalent:
90-- 'Pattern's starting with @\/@ work relative to @\/@. On Windows, 'Pattern's
91-- starting with @\/@ or @\\@ work relative only to the drive part of the
92-- 'FilePath' and 'Pattern's starting with absolute paths ignore the
93-- 'FilePath'.
94--
95-- Note that in some cases results outside the given directory may be returned:
96-- for instance the @.*@ pattern matches the @..@ directory.
97--
98-- Any results deeper than in the given directory are enumerated lazily, using
99-- 'unsafeInterleaveIO'.
100--
101-- Directories without read permissions are returned as entries but their
102-- contents, of course, are not.
103globDir :: [Pattern] -> FilePath -> IO [[FilePath]]
104globDir pats dir = fmap fst (globDirWith globDefault pats dir)
105
106-- |Like 'globDir', but applies the given 'GlobOptions' instead of the
107-- defaults when matching. The first component of the returned tuple contains
108-- the matched paths, grouped for each given 'Pattern', and the second contains
109-- Just the unmatched paths if the given 'GlobOptions' specified that unmatched
110-- files should be included, or otherwise Nothing.
111globDirWith :: GlobOptions -> [Pattern] -> FilePath
112            -> IO ([[FilePath]], Maybe [FilePath])
113globDirWith opts [pat] dir | not (includeUnmatched opts) =
114   -- This is an optimization for the case where only one pattern has been
115   -- passed and we are not including unmatched files: we can use
116   -- 'commonDirectory' to avoid some calls to 'getDirectoryContents'.
117   let (prefix, pat') = commonDirectory pat
118    in globDirWith' opts [pat'] (dir </> prefix)
119
120globDirWith opts pats dir =
121   globDirWith' opts pats dir
122
123-- See 'globDirWith'.
124globDirWith' :: GlobOptions -> [Pattern] -> FilePath
125            -> IO ([[FilePath]], Maybe [FilePath])
126globDirWith' opts []   dir =
127   if includeUnmatched opts
128      then do
129         dir' <- if null dir then getCurrentDirectory else return dir
130         c <- getRecursiveContents dir'
131         return ([], Just (DL.toList c))
132      else
133         return ([], Nothing)
134
135globDirWith' opts pats@(_:_) dir = do
136   results <- mapM (\p -> globDir'0 opts p dir) pats
137
138   let (matches, others) = unzip results
139       allMatches        = DL.toList . DL.concat $ matches
140       allOthers         = DL.toList . DL.concat $ others
141
142   return ( map DL.toList matches
143          , if includeUnmatched opts
144               then Just (nubOrd allOthers \\ allMatches)
145               else Nothing
146          )
147
148-- |A convenience wrapper on top of 'globDir', for when you only have one
149-- 'Pattern' you care about. Returns only the matched paths.
150globDir1 :: Pattern -> FilePath -> IO [FilePath]
151globDir1 p = fmap head . globDir [p]
152
153-- |The simplest IO function. Finds matches to the given pattern in the current
154-- working directory. Takes a 'String' instead of a 'Pattern' to avoid the need
155-- for a call to 'compile', simplifying usage further.
156--
157-- Can also be seen as a convenience wrapper on top of 'globDir1', for when you
158-- want to work in the current directory or have a pattern referring to an
159-- absolute path.
160glob :: String -> IO [FilePath]
161glob = flip globDir1 "" . compile
162
163globDir'0 :: GlobOptions -> Pattern -> FilePath
164          -> IO (DList FilePath, DList FilePath)
165globDir'0 opts pat dir = do
166   let (pat', drive) = driveSplit pat
167   dir' <- case drive of
168                Just "" -> fmap takeDrive getCurrentDirectory
169                Just d  -> return d
170                Nothing -> if null dir then getCurrentDirectory else return dir
171   globDir' opts (separate pat') dir'
172
173globDir' :: GlobOptions -> [TypedPattern] -> FilePath
174         -> IO (DList FilePath, DList FilePath)
175globDir' opts pats@(_:_) dir = do
176   entries <- getDirectoryContents dir `catchIO` const (return [])
177
178   results <- forM entries $ \e -> matchTypedAndGo opts pats e (dir </> e)
179
180   let (matches, others) = unzip results
181
182   return (DL.concat matches, DL.concat others)
183
184globDir' _ [] dir =
185   -- We can only get here from matchTypedAndGo getting a [Dir _]: it means the
186   -- original pattern had a trailing PathSeparator. Reproduce it here.
187   return (DL.singleton (dir ++ [pathSeparator]), DL.empty)
188
189matchTypedAndGo :: GlobOptions
190                -> [TypedPattern]
191                -> FilePath -> FilePath
192                -> IO (DList FilePath, DList FilePath)
193
194-- (Any p) is always the last element
195matchTypedAndGo opts [Any p] path absPath =
196   if matchWith (matchOptions opts) p path
197      then return (DL.singleton absPath, DL.empty)
198      else doesDirectoryExist absPath >>= didNotMatch opts path absPath
199
200matchTypedAndGo opts (Dir n p:ps) path absPath = do
201   isDir <- doesDirectoryExist absPath
202   if isDir && matchWith (matchOptions opts) p path
203      then globDir' opts ps (absPath ++ replicate n pathSeparator)
204      else didNotMatch opts path absPath isDir
205
206matchTypedAndGo opts (AnyDir n p:ps) path absPath =
207   if path `elem` [".",".."]
208      then didNotMatch opts path absPath True
209      else do
210         isDir <- doesDirectoryExist absPath
211         let m = matchWith (matchOptions opts) (unseparate ps)
212             unconditionalMatch =
213                null (unPattern p) && not (isExtSeparator $ head path)
214             p' = Pattern (unPattern p ++ [AnyNonPathSeparator])
215
216         case unconditionalMatch || matchWith (matchOptions opts) p' path of
217              True | isDir -> do
218                 contents <- getRecursiveContents absPath
219                 return $
220                    -- foo**/ should match foo/ and nothing below it
221                    -- relies on head contents == absPath
222                    if null ps
223                       then ( DL.singleton $
224                                DL.head contents
225                                ++ replicate n pathSeparator
226                            , tailDL contents
227                            )
228                       else let (matches, nonMatches) =
229                                   partitionDL fst
230                                      (fmap (recursiveMatch n m) contents)
231                             in (fmap snd matches, fmap snd nonMatches)
232
233              True | m path ->
234                 return ( DL.singleton $
235                             takeDirectory absPath
236                             ++ replicate n pathSeparator
237                             ++ path
238                        , DL.empty
239                        )
240              _ ->
241                 didNotMatch opts path absPath isDir
242
243matchTypedAndGo _ _ _ _ = error "Glob.matchTypedAndGo :: internal error"
244
245-- To be called to check whether a filepath matches the part of a pattern
246-- following an **/ (AnyDirectory) token and reconstruct the filepath with the
247-- correct number of slashes. Arguments are:
248--
249-- * Int: number of slashes in the AnyDirectory token, i.e. 1 for **/, 2 for
250--   **//, and so on
251--
252-- * FilePath -> Bool: matching function for the remainder of the pattern, to
253--   determine whether the rest of the filepath following the AnyDirectory token
254--   matches
255--
256-- * FilePath: the (entire) filepath to be checked: some file which is in a
257--   subdirectory of a directory which matches the prefix of the pattern up to
258--   the AnyDirectory token.
259--
260-- The returned tuple contains both the result, where True means the filepath
261-- matches and should be included in the resulting list of matching files, and
262-- False otherwise. We also include the filepath in the returned tuple, because
263-- this function also takes care of including the correct number of slashes
264-- in the result. For example, with a pattern **//foo/bar.txt, this function
265-- would ensure that, if dir/foo/bar.txt exists, it would be returned as
266-- dir//foo/bar.txt.
267recursiveMatch :: Int -> (FilePath -> Bool) -> FilePath -> (Bool, FilePath)
268recursiveMatch n isMatch path =
269   case find isMatch (pathParts path) of
270        Just matchedSuffix ->
271           let dir = take (length path - length matchedSuffix) path
272            in ( True
273               , dir
274                 ++ replicate (n-1) pathSeparator
275                 ++ matchedSuffix
276               )
277        Nothing ->
278           (False, path)
279
280-- To be called when a pattern didn't match a path: given the path and whether
281-- it was a directory, return all paths which didn't match (i.e. for a file,
282-- just the file, and for a directory, everything inside it).
283didNotMatch :: GlobOptions -> FilePath -> FilePath -> Bool
284            -> IO (DList FilePath, DList FilePath)
285didNotMatch opts path absPath isDir =
286   if includeUnmatched opts
287      then fmap ((,) DL.empty) $
288         if isDir
289            then if path `elem` [".",".."]
290                    then return DL.empty
291                    else getRecursiveContents absPath
292            else return$ DL.singleton absPath
293      else
294         return (DL.empty, DL.empty)
295
296separate :: Pattern -> [TypedPattern]
297separate = go DL.empty . unPattern
298 where
299   go gr [] | null (DL.toList gr) = []
300   go gr []                       = [Any (pat gr)]
301   go gr (PathSeparator:ps)       = slash gr Dir ps
302   go gr ( AnyDirectory:ps)       = slash gr AnyDir ps
303   go gr (            p:ps)       = go (gr `DL.snoc` p) ps
304
305   pat = Pattern . DL.toList
306
307   slash gr f ps = let (n,ps') = first length . span isSlash $ ps
308                    in f (n+1) (pat gr) : go DL.empty ps'
309
310   isSlash PathSeparator = True
311   isSlash _             = False
312
313unseparate :: [TypedPattern] -> Pattern
314unseparate = Pattern . foldr f []
315 where
316   f (AnyDir n p) ts = u p ++ AnyDirectory : replicate (n-1) PathSeparator ++ ts
317   f (   Dir n p) ts = u p ++ replicate n PathSeparator ++ ts
318   f (Any      p) ts = u p ++ ts
319
320   u = unPattern
321
322-- Note that we consider "/foo" to specify a drive on Windows, even though it's
323-- relative to the current drive.
324--
325-- Returns the [TypedPattern] of the Pattern (with the drive dropped if
326-- appropriate) and, if the Pattern specified a drive, a Maybe representing the
327-- drive to use. If it's a Just "", use the drive of the current working
328-- directory.
329driveSplit :: Pattern -> (Pattern, Maybe FilePath)
330driveSplit = check . split . unPattern
331 where
332   -- We can't just use something like commonDirectory because of Windows
333   -- drives being possibly longer than one "directory", like "//?/foo/bar/".
334   -- So just take as much as possible.
335   split (LongLiteral _ l : xs) = first (l++) (split xs)
336   split (    Literal   l : xs) = first (l:) (split xs)
337   split (PathSeparator   : xs) = first (pathSeparator:) (split xs)
338   split xs                     = ([],xs)
339
340   -- The isPathSeparator check is interesting in two ways:
341   --
342   -- 1. It's correct to return simply Just "" because there can't be more than
343   --    one path separator if splitDrive gave a null drive: "//x" is a shared
344   --    "drive" in Windows and starts with the root "drive" in Posix.
345   --
346   -- 2. The 'head' is safe because we have not (null d) && null drive.
347   check (d,ps)
348      | null d                      = (Pattern     ps, Nothing)
349      | not (null drive)            = (dirify rest ps, Just drive)
350      | isPathSeparator (head rest) = (Pattern     ps, Just "")
351      | otherwise                   = (dirify d    ps, Nothing)
352    where
353      (drive, rest) = splitDrive d
354
355   dirify path = Pattern . (comp path++)
356
357   comp s = let (p,l) = foldr f ([],[]) s in if null l then p else ll l p
358    where
359      f c (p,l) | isExtSeparator  c = (Literal '.'   : ll l p, [])
360                | isPathSeparator c = (PathSeparator : ll l p, [])
361                | otherwise         = (p, c:l)
362
363      ll l p = if null l then p else LongLiteral (length l) l : p
364
365-- |Factors out the directory component of a 'Pattern'. Useful in conjunction
366-- with 'globDir'.
367--
368-- Preserves the number of path separators: @commonDirectory (compile
369-- \"foo\/\/\/bar\")@ becomes @(\"foo\/\/\/\", compile \"bar\")@.
370commonDirectory :: Pattern -> (FilePath, Pattern)
371commonDirectory = second unseparate . splitP . separate
372 where
373   splitP pt@(Dir n p:ps) =
374      case fromConst DL.empty (unPattern p) of
375           Just d  -> first ((d ++ replicate n pathSeparator) </>) (splitP ps)
376           Nothing -> ("", pt)
377
378   splitP pt = ("", pt)
379
380   fromConst d []                   = Just (DL.toList d)
381   fromConst d (Literal c      :xs) = fromConst (d `DL.snoc` c) xs
382   fromConst d (LongLiteral _ s:xs) = fromConst (d `DL.append`DL.fromList s) xs
383   fromConst _ _                    = Nothing
384