1{-# LANGUAGE CPP #-}
2{-# LANGUAGE DeriveGeneric, DeriveFunctor, GeneralizedNewtypeDeriving,
3             NamedFieldPuns, BangPatterns #-}
4{-# OPTIONS_GHC -fno-warn-orphans #-}
5
6-- | An abstraction to help with re-running actions when files or other
7-- input values they depend on have changed.
8--
9module Distribution.Client.FileMonitor (
10
11  -- * Declaring files to monitor
12  MonitorFilePath(..),
13  MonitorKindFile(..),
14  MonitorKindDir(..),
15  FilePathGlob(..),
16  monitorFile,
17  monitorFileHashed,
18  monitorNonExistentFile,
19  monitorFileExistence,
20  monitorDirectory,
21  monitorNonExistentDirectory,
22  monitorDirectoryExistence,
23  monitorFileOrDirectory,
24  monitorFileGlob,
25  monitorFileGlobExistence,
26  monitorFileSearchPath,
27  monitorFileHashedSearchPath,
28
29  -- * Creating and checking sets of monitored files
30  FileMonitor(..),
31  newFileMonitor,
32  MonitorChanged(..),
33  MonitorChangedReason(..),
34  checkFileMonitorChanged,
35  updateFileMonitor,
36  MonitorTimestamp,
37  beginUpdateFileMonitor,
38
39  -- * Internal
40  MonitorStateFileSet,
41  MonitorStateFile,
42  MonitorStateGlob,
43  ) where
44
45import Prelude ()
46import Distribution.Client.Compat.Prelude
47
48import qualified Data.Map.Strict as Map
49import qualified Data.ByteString.Lazy as BS
50import qualified Data.Hashable as Hashable
51
52import           Control.Monad
53import           Control.Monad.Trans (MonadIO, liftIO)
54import           Control.Monad.State (StateT, mapStateT)
55import qualified Control.Monad.State as State
56import           Control.Monad.Except (ExceptT, runExceptT, withExceptT,
57                                       throwError)
58import           Control.Exception
59
60import           Distribution.Compat.Time
61import           Distribution.Client.Glob
62import           Distribution.Simple.Utils (handleDoesNotExist, writeFileAtomic)
63import           Distribution.Client.Utils (mergeBy, MergeResult(..))
64import           Distribution.Utils.Structured (structuredDecodeOrFailIO, structuredEncode)
65import           System.FilePath
66import           System.Directory
67import           System.IO
68
69------------------------------------------------------------------------------
70-- Types for specifying files to monitor
71--
72
73
74-- | A description of a file (or set of files) to monitor for changes.
75--
76-- Where file paths are relative they are relative to a common directory
77-- (e.g. project root), not necessarily the process current directory.
78--
79data MonitorFilePath =
80     MonitorFile {
81       monitorKindFile :: !MonitorKindFile,
82       monitorKindDir  :: !MonitorKindDir,
83       monitorPath     :: !FilePath
84     }
85   | MonitorFileGlob {
86       monitorKindFile :: !MonitorKindFile,
87       monitorKindDir  :: !MonitorKindDir,
88       monitorPathGlob :: !FilePathGlob
89     }
90  deriving (Eq, Show, Generic)
91
92data MonitorKindFile = FileExists
93                     | FileModTime
94                     | FileHashed
95                     | FileNotExists
96  deriving (Eq, Show, Generic)
97
98data MonitorKindDir  = DirExists
99                     | DirModTime
100                     | DirNotExists
101  deriving (Eq, Show, Generic)
102
103instance Binary MonitorFilePath
104instance Binary MonitorKindFile
105instance Binary MonitorKindDir
106
107instance Structured MonitorFilePath
108instance Structured MonitorKindFile
109instance Structured MonitorKindDir
110
111-- | Monitor a single file for changes, based on its modification time.
112-- The monitored file is considered to have changed if it no longer
113-- exists or if its modification time has changed.
114--
115monitorFile :: FilePath -> MonitorFilePath
116monitorFile = MonitorFile FileModTime DirNotExists
117
118-- | Monitor a single file for changes, based on its modification time
119-- and content hash. The monitored file is considered to have changed if
120-- it no longer exists or if its modification time and content hash have
121-- changed.
122--
123monitorFileHashed :: FilePath -> MonitorFilePath
124monitorFileHashed = MonitorFile FileHashed DirNotExists
125
126-- | Monitor a single non-existent file for changes. The monitored file
127-- is considered to have changed if it exists.
128--
129monitorNonExistentFile :: FilePath -> MonitorFilePath
130monitorNonExistentFile = MonitorFile FileNotExists DirNotExists
131
132-- | Monitor a single file for existence only. The monitored file is
133-- considered to have changed if it no longer exists.
134--
135monitorFileExistence :: FilePath -> MonitorFilePath
136monitorFileExistence = MonitorFile FileExists DirNotExists
137
138-- | Monitor a single directory for changes, based on its modification
139-- time. The monitored directory is considered to have changed if it no
140-- longer exists or if its modification time has changed.
141--
142monitorDirectory :: FilePath -> MonitorFilePath
143monitorDirectory = MonitorFile FileNotExists DirModTime
144
145-- | Monitor a single non-existent directory for changes.  The monitored
146-- directory is considered to have changed if it exists.
147--
148monitorNonExistentDirectory :: FilePath -> MonitorFilePath
149-- Just an alias for monitorNonExistentFile, since you can't
150-- tell the difference between a non-existent directory and
151-- a non-existent file :)
152monitorNonExistentDirectory = monitorNonExistentFile
153
154-- | Monitor a single directory for existence. The monitored directory is
155-- considered to have changed only if it no longer exists.
156--
157monitorDirectoryExistence :: FilePath -> MonitorFilePath
158monitorDirectoryExistence = MonitorFile FileNotExists DirExists
159
160-- | Monitor a single file or directory for changes, based on its modification
161-- time. The monitored file is considered to have changed if it no longer
162-- exists or if its modification time has changed.
163--
164monitorFileOrDirectory :: FilePath -> MonitorFilePath
165monitorFileOrDirectory = MonitorFile FileModTime DirModTime
166
167-- | Monitor a set of files (or directories) identified by a file glob.
168-- The monitored glob is considered to have changed if the set of files
169-- matching the glob changes (i.e. creations or deletions), or for files if the
170-- modification time and content hash of any matching file has changed.
171--
172monitorFileGlob :: FilePathGlob -> MonitorFilePath
173monitorFileGlob = MonitorFileGlob FileHashed DirExists
174
175-- | Monitor a set of files (or directories) identified by a file glob for
176-- existence only. The monitored glob is considered to have changed if the set
177-- of files matching the glob changes (i.e. creations or deletions).
178--
179monitorFileGlobExistence :: FilePathGlob -> MonitorFilePath
180monitorFileGlobExistence = MonitorFileGlob FileExists DirExists
181
182-- | Creates a list of files to monitor when you search for a file which
183-- unsuccessfully looked in @notFoundAtPaths@ before finding it at
184-- @foundAtPath@.
185monitorFileSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath]
186monitorFileSearchPath notFoundAtPaths foundAtPath =
187    monitorFile foundAtPath
188  : map monitorNonExistentFile notFoundAtPaths
189
190-- | Similar to 'monitorFileSearchPath', but also instructs us to
191-- monitor the hash of the found file.
192monitorFileHashedSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath]
193monitorFileHashedSearchPath notFoundAtPaths foundAtPath =
194    monitorFileHashed foundAtPath
195  : map monitorNonExistentFile notFoundAtPaths
196
197
198------------------------------------------------------------------------------
199-- Implementation types, files status
200--
201
202-- | The state necessary to determine whether a set of monitored
203-- files has changed.  It consists of two parts: a set of specific
204-- files to be monitored (index by their path), and a list of
205-- globs, which monitor may files at once.
206data MonitorStateFileSet
207   = MonitorStateFileSet ![MonitorStateFile]
208                         ![MonitorStateGlob]
209     -- Morally this is not actually a set but a bag (represented by lists).
210     -- There is no principled reason to use a bag here rather than a set, but
211     -- there is also no particular gain either. That said, we do preserve the
212     -- order of the lists just to reduce confusion (and have predictable I/O
213     -- patterns).
214  deriving (Show, Generic)
215
216instance Binary MonitorStateFileSet
217instance Structured MonitorStateFileSet
218
219type Hash = Int
220
221-- | The state necessary to determine whether a monitored file has changed.
222--
223-- This covers all the cases of 'MonitorFilePath' except for globs which is
224-- covered separately by 'MonitorStateGlob'.
225--
226-- The @Maybe ModTime@ is to cover the case where we already consider the
227-- file to have changed, either because it had already changed by the time we
228-- did the snapshot (i.e. too new, changed since start of update process) or it
229-- no longer exists at all.
230--
231data MonitorStateFile = MonitorStateFile !MonitorKindFile !MonitorKindDir
232                                         !FilePath !MonitorStateFileStatus
233  deriving (Show, Generic)
234
235data MonitorStateFileStatus
236   = MonitorStateFileExists
237   | MonitorStateFileModTime !ModTime        -- ^ cached file mtime
238   | MonitorStateFileHashed  !ModTime !Hash  -- ^ cached mtime and content hash
239   | MonitorStateDirExists
240   | MonitorStateDirModTime  !ModTime        -- ^ cached dir mtime
241   | MonitorStateNonExistent
242   | MonitorStateAlreadyChanged
243  deriving (Show, Generic)
244
245instance Binary MonitorStateFile
246instance Binary MonitorStateFileStatus
247instance Structured MonitorStateFile
248instance Structured MonitorStateFileStatus
249
250-- | The state necessary to determine whether the files matched by a globbing
251-- match have changed.
252--
253data MonitorStateGlob = MonitorStateGlob !MonitorKindFile !MonitorKindDir
254                                         !FilePathRoot !MonitorStateGlobRel
255  deriving (Show, Generic)
256
257data MonitorStateGlobRel
258   = MonitorStateGlobDirs
259       !Glob !FilePathGlobRel
260       !ModTime
261       ![(FilePath, MonitorStateGlobRel)] -- invariant: sorted
262
263   | MonitorStateGlobFiles
264       !Glob
265       !ModTime
266       ![(FilePath, MonitorStateFileStatus)] -- invariant: sorted
267
268   | MonitorStateGlobDirTrailing
269  deriving (Show, Generic)
270
271instance Binary MonitorStateGlob
272instance Binary MonitorStateGlobRel
273
274instance Structured MonitorStateGlob
275instance Structured MonitorStateGlobRel
276
277-- | We can build a 'MonitorStateFileSet' from a set of 'MonitorFilePath' by
278-- inspecting the state of the file system, and we can go in the reverse
279-- direction by just forgetting the extra info.
280--
281reconstructMonitorFilePaths :: MonitorStateFileSet -> [MonitorFilePath]
282reconstructMonitorFilePaths (MonitorStateFileSet singlePaths globPaths) =
283    map getSinglePath singlePaths
284 ++ map getGlobPath globPaths
285  where
286    getSinglePath (MonitorStateFile kindfile kinddir filepath _) =
287      MonitorFile kindfile kinddir filepath
288
289    getGlobPath (MonitorStateGlob kindfile kinddir root gstate) =
290      MonitorFileGlob kindfile kinddir $ FilePathGlob root $
291        case gstate of
292          MonitorStateGlobDirs  glob globs _ _ -> GlobDir  glob globs
293          MonitorStateGlobFiles glob       _ _ -> GlobFile glob
294          MonitorStateGlobDirTrailing          -> GlobDirTrailing
295
296------------------------------------------------------------------------------
297-- Checking the status of monitored files
298--
299
300-- | A monitor for detecting changes to a set of files. It can be used to
301-- efficiently test if any of a set of files (specified individually or by
302-- glob patterns) has changed since some snapshot. In addition, it also checks
303-- for changes in a value (of type @a@), and when there are no changes in
304-- either it returns a saved value (of type @b@).
305--
306-- The main use case looks like this: suppose we have some expensive action
307-- that depends on certain pure inputs and reads some set of files, and
308-- produces some pure result. We want to avoid re-running this action when it
309-- would produce the same result. So we need to monitor the files the action
310-- looked at, the other pure input values, and we need to cache the result.
311-- Then at some later point, if the input value didn't change, and none of the
312-- files changed, then we can re-use the cached result rather than re-running
313-- the action.
314--
315-- This can be achieved using a 'FileMonitor'. Each 'FileMonitor' instance
316-- saves state in a disk file, so the file for that has to be specified,
317-- making sure it is unique. The pattern is to use 'checkFileMonitorChanged'
318-- to see if there's been any change. If there is, re-run the action, keeping
319-- track of the files, then use 'updateFileMonitor' to record the current
320-- set of files to monitor, the current input value for the action, and the
321-- result of the action.
322--
323-- The typical occurrence of this pattern is captured by 'rerunIfChanged'
324-- and the 'Rebuild' monad. More complicated cases may need to use
325-- 'checkFileMonitorChanged' and 'updateFileMonitor' directly.
326--
327data FileMonitor a b
328   = FileMonitor {
329
330       -- | The file where this 'FileMonitor' should store its state.
331       --
332       fileMonitorCacheFile :: FilePath,
333
334       -- | Compares a new cache key with old one to determine if a
335       -- corresponding cached value is still valid.
336       --
337       -- Typically this is just an equality test, but in some
338       -- circumstances it can make sense to do things like subset
339       -- comparisons.
340       --
341       -- The first arg is the new value, the second is the old cached value.
342       --
343       fileMonitorKeyValid :: a -> a -> Bool,
344
345       -- | When this mode is enabled, if 'checkFileMonitorChanged' returns
346       -- 'MonitoredValueChanged' then we have the guarantee that no files
347       -- changed, that the value change was the only change. In the default
348       -- mode no such guarantee is provided which is slightly faster.
349       --
350       fileMonitorCheckIfOnlyValueChanged :: Bool
351  }
352
353-- | Define a new file monitor.
354--
355-- It's best practice to define file monitor values once, and then use the
356-- same value for 'checkFileMonitorChanged' and 'updateFileMonitor' as this
357-- ensures you get the same types @a@ and @b@ for reading and writing.
358--
359-- The path of the file monitor itself must be unique because it keeps state
360-- on disk and these would clash.
361--
362newFileMonitor :: Eq a => FilePath -- ^ The file to cache the state of the
363                                   -- file monitor. Must be unique.
364                       -> FileMonitor a b
365newFileMonitor path = FileMonitor path (==) False
366
367-- | The result of 'checkFileMonitorChanged': either the monitored files or
368-- value changed (and it tells us which it was) or nothing changed and we get
369-- the cached result.
370--
371data MonitorChanged a b =
372     -- | The monitored files and value did not change. The cached result is
373     -- @b@.
374     --
375     -- The set of monitored files is also returned. This is useful
376     -- for composing or nesting 'FileMonitor's.
377     MonitorUnchanged b [MonitorFilePath]
378
379     -- | The monitor found that something changed. The reason is given.
380     --
381   | MonitorChanged (MonitorChangedReason a)
382  deriving Show
383
384-- | What kind of change 'checkFileMonitorChanged' detected.
385--
386data MonitorChangedReason a =
387
388     -- | One of the files changed (existence, file type, mtime or file
389     -- content, depending on the 'MonitorFilePath' in question)
390     MonitoredFileChanged FilePath
391
392     -- | The pure input value changed.
393     --
394     -- The previous cached key value is also returned. This is sometimes
395     -- useful when using a 'fileMonitorKeyValid' function that is not simply
396     -- '(==)', when invalidation can be partial. In such cases it can make
397     -- sense to 'updateFileMonitor' with a key value that's a combination of
398     -- the new and old (e.g. set union).
399   | MonitoredValueChanged a
400
401     -- | There was no saved monitor state, cached value etc. Ie the file
402     -- for the 'FileMonitor' does not exist.
403   | MonitorFirstRun
404
405     -- | There was existing state, but we could not read it. This typically
406     -- happens when the code has changed compared to an existing 'FileMonitor'
407     -- cache file and type of the input value or cached value has changed such
408     -- that we cannot decode the values. This is completely benign as we can
409     -- treat is just as if there were no cache file and re-run.
410   | MonitorCorruptCache
411  deriving (Eq, Show, Functor)
412
413-- | Test if the input value or files monitored by the 'FileMonitor' have
414-- changed. If not, return the cached value.
415--
416-- See 'FileMonitor' for a full explanation.
417--
418checkFileMonitorChanged
419  :: (Binary a, Structured a, Binary b, Structured b)
420  => FileMonitor a b            -- ^ cache file path
421  -> FilePath                   -- ^ root directory
422  -> a                          -- ^ guard or key value
423  -> IO (MonitorChanged a b)    -- ^ did the key or any paths change?
424checkFileMonitorChanged
425    monitor@FileMonitor { fileMonitorKeyValid,
426                          fileMonitorCheckIfOnlyValueChanged }
427    root currentKey =
428
429    -- Consider it a change if the cache file does not exist,
430    -- or we cannot decode it. Sadly ErrorCall can still happen, despite
431    -- using decodeFileOrFail, e.g. Data.Char.chr errors
432
433    handleDoesNotExist (MonitorChanged MonitorFirstRun) $
434    handleErrorCall    (MonitorChanged MonitorCorruptCache) $
435          readCacheFile monitor
436      >>= either (\_ -> return (MonitorChanged MonitorCorruptCache))
437                 checkStatusCache
438
439  where
440    checkStatusCache (cachedFileStatus, cachedKey, cachedResult) = do
441        change <- checkForChanges
442        case change of
443          Just reason -> return (MonitorChanged reason)
444          Nothing     -> return (MonitorUnchanged cachedResult monitorFiles)
445            where monitorFiles = reconstructMonitorFilePaths cachedFileStatus
446      where
447        -- In fileMonitorCheckIfOnlyValueChanged mode we want to guarantee that
448        -- if we return MonitoredValueChanged that only the value changed.
449        -- We do that by checkin for file changes first. Otherwise it makes
450        -- more sense to do the cheaper test first.
451        checkForChanges
452          | fileMonitorCheckIfOnlyValueChanged
453          = checkFileChange cachedFileStatus cachedKey cachedResult
454              `mplusMaybeT`
455            checkValueChange cachedKey
456
457          | otherwise
458          = checkValueChange cachedKey
459              `mplusMaybeT`
460            checkFileChange cachedFileStatus cachedKey cachedResult
461
462    mplusMaybeT :: Monad m => m (Maybe a) -> m (Maybe a) -> m (Maybe a)
463    mplusMaybeT ma mb = do
464      mx <- ma
465      case mx of
466        Nothing -> mb
467        Just x  -> return (Just x)
468
469    -- Check if the guard value has changed
470    checkValueChange cachedKey
471      | not (fileMonitorKeyValid currentKey cachedKey)
472      = return (Just (MonitoredValueChanged cachedKey))
473      | otherwise
474      = return Nothing
475
476    -- Check if any file has changed
477    checkFileChange cachedFileStatus cachedKey cachedResult = do
478      res <- probeFileSystem root cachedFileStatus
479      case res of
480        -- Some monitored file has changed
481        Left changedPath ->
482          return (Just (MonitoredFileChanged (normalise changedPath)))
483
484        -- No monitored file has changed
485        Right (cachedFileStatus', cacheStatus) -> do
486
487          -- But we might still want to update the cache
488          whenCacheChanged cacheStatus $
489            rewriteCacheFile monitor cachedFileStatus' cachedKey cachedResult
490
491          return Nothing
492
493-- | Helper for reading the cache file.
494--
495-- This determines the type and format of the binary cache file.
496--
497readCacheFile :: (Binary a, Structured a, Binary b, Structured b)
498              => FileMonitor a b
499              -> IO (Either String (MonitorStateFileSet, a, b))
500readCacheFile FileMonitor {fileMonitorCacheFile} =
501    withBinaryFile fileMonitorCacheFile ReadMode $ \hnd -> do
502        contents <- BS.hGetContents hnd
503        structuredDecodeOrFailIO contents
504
505-- | Helper for writing the cache file.
506--
507-- This determines the type and format of the binary cache file.
508--
509rewriteCacheFile :: (Binary a, Structured a, Binary b, Structured b)
510                 => FileMonitor a b
511                 -> MonitorStateFileSet -> a -> b -> IO ()
512rewriteCacheFile FileMonitor {fileMonitorCacheFile} fileset key result =
513    writeFileAtomic fileMonitorCacheFile $
514        structuredEncode (fileset, key, result)
515
516-- | Probe the file system to see if any of the monitored files have changed.
517--
518-- It returns Nothing if any file changed, or returns a possibly updated
519-- file 'MonitorStateFileSet' plus an indicator of whether it actually changed.
520--
521-- We may need to update the cache since there may be changes in the filesystem
522-- state which don't change any of our affected files.
523--
524-- Consider the glob @{proj1,proj2}\/\*.cabal@. Say we first run and find a
525-- @proj1@ directory containing @proj1.cabal@ yet no @proj2@. If we later run
526-- and find @proj2@ was created, yet contains no files matching @*.cabal@ then
527-- we want to update the cache despite no changes in our relevant file set.
528-- Specifically, we should add an mtime for this directory so we can avoid
529-- re-traversing the directory in future runs.
530--
531probeFileSystem :: FilePath -> MonitorStateFileSet
532                -> IO (Either FilePath (MonitorStateFileSet, CacheChanged))
533probeFileSystem root (MonitorStateFileSet singlePaths globPaths) =
534  runChangedM $ do
535    sequence_
536      [ probeMonitorStateFileStatus root file status
537      | MonitorStateFile _ _ file status <- singlePaths ]
538    -- The glob monitors can require state changes
539    globPaths' <-
540      sequence
541        [ probeMonitorStateGlob root globPath
542        | globPath <- globPaths ]
543    return (MonitorStateFileSet singlePaths globPaths')
544
545
546-----------------------------------------------
547-- Monad for checking for file system changes
548--
549-- We need to be able to bail out if we detect a change (using ExceptT),
550-- but if there's no change we need to be able to rebuild the monitor
551-- state. And we want to optimise that rebuilding by keeping track if
552-- anything actually changed (using StateT), so that in the typical case
553-- we can avoid rewriting the state file.
554
555newtype ChangedM a = ChangedM (StateT CacheChanged (ExceptT FilePath IO) a)
556  deriving (Functor, Applicative, Monad, MonadIO)
557
558runChangedM :: ChangedM a -> IO (Either FilePath (a, CacheChanged))
559runChangedM (ChangedM action) =
560  runExceptT $ State.runStateT action CacheUnchanged
561
562somethingChanged :: FilePath -> ChangedM a
563somethingChanged path = ChangedM $ throwError path
564
565cacheChanged :: ChangedM ()
566cacheChanged = ChangedM $ State.put CacheChanged
567
568mapChangedFile :: (FilePath -> FilePath) -> ChangedM a -> ChangedM a
569mapChangedFile adjust (ChangedM a) =
570    ChangedM (mapStateT (withExceptT adjust) a)
571
572data CacheChanged = CacheChanged | CacheUnchanged
573
574whenCacheChanged :: Monad m => CacheChanged -> m () -> m ()
575whenCacheChanged CacheChanged action = action
576whenCacheChanged CacheUnchanged _    = return ()
577
578----------------------
579
580-- | Probe the file system to see if a single monitored file has changed.
581--
582probeMonitorStateFileStatus :: FilePath -> FilePath
583                            -> MonitorStateFileStatus
584                            -> ChangedM ()
585probeMonitorStateFileStatus root file status =
586    case status of
587      MonitorStateFileExists ->
588        probeFileExistence root file
589
590      MonitorStateFileModTime mtime ->
591        probeFileModificationTime root file mtime
592
593      MonitorStateFileHashed  mtime hash ->
594        probeFileModificationTimeAndHash root file mtime hash
595
596      MonitorStateDirExists ->
597        probeDirExistence root file
598
599      MonitorStateDirModTime mtime ->
600        probeFileModificationTime root file mtime
601
602      MonitorStateNonExistent ->
603        probeFileNonExistence root file
604
605      MonitorStateAlreadyChanged ->
606        somethingChanged file
607
608
609-- | Probe the file system to see if a monitored file glob has changed.
610--
611probeMonitorStateGlob :: FilePath      -- ^ root path
612                      -> MonitorStateGlob
613                      -> ChangedM MonitorStateGlob
614probeMonitorStateGlob relroot
615                      (MonitorStateGlob kindfile kinddir globroot glob) = do
616    root <- liftIO $ getFilePathRootDirectory globroot relroot
617    case globroot of
618      FilePathRelative ->
619        MonitorStateGlob kindfile kinddir globroot <$>
620        probeMonitorStateGlobRel kindfile kinddir root "." glob
621
622      -- for absolute cases, make the changed file we report absolute too
623      _ ->
624        mapChangedFile (root </>) $
625        MonitorStateGlob kindfile kinddir globroot <$>
626        probeMonitorStateGlobRel kindfile kinddir root "" glob
627
628probeMonitorStateGlobRel :: MonitorKindFile -> MonitorKindDir
629                         -> FilePath      -- ^ root path
630                         -> FilePath      -- ^ path of the directory we are
631                                          --   looking in relative to @root@
632                         -> MonitorStateGlobRel
633                         -> ChangedM MonitorStateGlobRel
634probeMonitorStateGlobRel kindfile kinddir root dirName
635                        (MonitorStateGlobDirs glob globPath mtime children) = do
636    change <- liftIO $ checkDirectoryModificationTime (root </> dirName) mtime
637    case change of
638      Nothing -> do
639        children' <- sequence
640          [ do fstate' <- probeMonitorStateGlobRel
641                            kindfile kinddir root
642                            (dirName </> fname) fstate
643               return (fname, fstate')
644          | (fname, fstate) <- children ]
645        return $! MonitorStateGlobDirs glob globPath mtime children'
646
647      Just mtime' -> do
648        -- directory modification time changed:
649        -- a matching subdir may have been added or deleted
650        matches <- filterM (\entry -> let subdir = root </> dirName </> entry
651                                       in liftIO $ doesDirectoryExist subdir)
652                 . filter (matchGlob glob)
653               =<< liftIO (getDirectoryContents (root </> dirName))
654
655        children' <- traverse probeMergeResult $
656                          mergeBy (\(path1,_) path2 -> compare path1 path2)
657                                  children
658                                  (sort matches)
659        return $! MonitorStateGlobDirs glob globPath mtime' children'
660        -- Note that just because the directory has changed, we don't force
661        -- a cache rewrite with 'cacheChanged' since that has some cost, and
662        -- all we're saving is scanning the directory. But we do rebuild the
663        -- cache with the new mtime', so that if the cache is rewritten for
664        -- some other reason, we'll take advantage of that.
665
666  where
667    probeMergeResult :: MergeResult (FilePath, MonitorStateGlobRel) FilePath
668                     -> ChangedM (FilePath, MonitorStateGlobRel)
669
670    -- Only in cached (directory deleted)
671    probeMergeResult (OnlyInLeft (path, fstate)) = do
672      case allMatchingFiles (dirName </> path) fstate of
673        [] -> return (path, fstate)
674        -- Strictly speaking we should be returning 'CacheChanged' above
675        -- as we should prune the now-missing 'MonitorStateGlobRel'. However
676        -- we currently just leave these now-redundant entries in the
677        -- cache as they cost no IO and keeping them allows us to avoid
678        -- rewriting the cache.
679        (file:_) -> somethingChanged file
680
681    -- Only in current filesystem state (directory added)
682    probeMergeResult (OnlyInRight path) = do
683      fstate <- liftIO $ buildMonitorStateGlobRel Nothing Map.empty
684                           kindfile kinddir root (dirName </> path) globPath
685      case allMatchingFiles (dirName </> path) fstate of
686        (file:_) -> somethingChanged file
687        -- This is the only case where we use 'cacheChanged' because we can
688        -- have a whole new dir subtree (of unbounded size and cost), so we
689        -- need to save the state of that new subtree in the cache.
690        [] -> cacheChanged >> return (path, fstate)
691
692    -- Found in path
693    probeMergeResult (InBoth (path, fstate) _) = do
694      fstate' <- probeMonitorStateGlobRel kindfile kinddir
695                                          root (dirName </> path) fstate
696      return (path, fstate')
697
698    -- | Does a 'MonitorStateGlob' have any relevant files within it?
699    allMatchingFiles :: FilePath -> MonitorStateGlobRel -> [FilePath]
700    allMatchingFiles dir (MonitorStateGlobFiles _ _   entries) =
701      [ dir </> fname | (fname, _) <- entries ]
702    allMatchingFiles dir (MonitorStateGlobDirs  _ _ _ entries) =
703      [ res
704      | (subdir, fstate) <- entries
705      , res <- allMatchingFiles (dir </> subdir) fstate ]
706    allMatchingFiles dir MonitorStateGlobDirTrailing =
707      [dir]
708
709probeMonitorStateGlobRel _ _ root dirName
710                         (MonitorStateGlobFiles glob mtime children) = do
711    change <- liftIO $ checkDirectoryModificationTime (root </> dirName) mtime
712    mtime' <- case change of
713      Nothing     -> return mtime
714      Just mtime' -> do
715        -- directory modification time changed:
716        -- a matching file may have been added or deleted
717        matches <- return . filter (matchGlob glob)
718               =<< liftIO (getDirectoryContents (root </> dirName))
719
720        traverse_ probeMergeResult $
721              mergeBy (\(path1,_) path2 -> compare path1 path2)
722                      children
723                      (sort matches)
724        return mtime'
725
726    -- Check that none of the children have changed
727    for_ children $ \(file, status) ->
728      probeMonitorStateFileStatus root (dirName </> file) status
729
730
731    return (MonitorStateGlobFiles glob mtime' children)
732    -- Again, we don't force a cache rewite with 'cacheChanged', but we do use
733    -- the new mtime' if any.
734  where
735    probeMergeResult :: MergeResult (FilePath, MonitorStateFileStatus) FilePath
736                     -> ChangedM ()
737    probeMergeResult mr = case mr of
738      InBoth _ _            -> return ()
739    -- this is just to be able to accurately report which file changed:
740      OnlyInLeft  (path, _) -> somethingChanged (dirName </> path)
741      OnlyInRight path      -> somethingChanged (dirName </> path)
742
743probeMonitorStateGlobRel _ _ _ _ MonitorStateGlobDirTrailing =
744    return MonitorStateGlobDirTrailing
745
746------------------------------------------------------------------------------
747
748-- | Update the input value and the set of files monitored by the
749-- 'FileMonitor', plus the cached value that may be returned in future.
750--
751-- This takes a snapshot of the state of the monitored files right now, so
752-- 'checkFileMonitorChanged' will look for file system changes relative to
753-- this snapshot.
754--
755-- This is typically done once the action has been completed successfully and
756-- we have the action's result and we know what files it looked at. See
757-- 'FileMonitor' for a full explanation.
758--
759-- If we do take the snapshot after the action has completed then we have a
760-- problem. The problem is that files might have changed /while/ the action was
761-- running but /after/ the action read them. If we take the snapshot after the
762-- action completes then we will miss these changes. The solution is to record
763-- a timestamp before beginning execution of the action and then we make the
764-- conservative assumption that any file that has changed since then has
765-- already changed, ie the file monitor state for these files will be such that
766-- 'checkFileMonitorChanged' will report that they have changed.
767--
768-- So if you do use 'updateFileMonitor' after the action (so you can discover
769-- the files used rather than predicting them in advance) then use
770-- 'beginUpdateFileMonitor' to get a timestamp and pass that. Alternatively,
771-- if you take the snapshot in advance of the action, or you're not monitoring
772-- any files then you can use @Nothing@ for the timestamp parameter.
773--
774updateFileMonitor
775  :: (Binary a, Structured a, Binary b, Structured b)
776  => FileMonitor a b          -- ^ cache file path
777  -> FilePath                 -- ^ root directory
778  -> Maybe MonitorTimestamp   -- ^ timestamp when the update action started
779  -> [MonitorFilePath]        -- ^ files of interest relative to root
780  -> a                        -- ^ the current key value
781  -> b                        -- ^ the current result value
782  -> IO ()
783updateFileMonitor monitor root startTime monitorFiles
784                  cachedKey cachedResult = do
785    hashcache <- readCacheFileHashes monitor
786    msfs <- buildMonitorStateFileSet startTime hashcache root monitorFiles
787    rewriteCacheFile monitor msfs cachedKey cachedResult
788
789-- | A timestamp to help with the problem of file changes during actions.
790-- See 'updateFileMonitor' for details.
791--
792newtype MonitorTimestamp = MonitorTimestamp ModTime
793
794-- | Record a timestamp at the beginning of an action, and when the action
795-- completes call 'updateFileMonitor' passing it the timestamp.
796-- See 'updateFileMonitor' for details.
797--
798beginUpdateFileMonitor :: IO MonitorTimestamp
799beginUpdateFileMonitor = MonitorTimestamp <$> getCurTime
800
801-- | Take the snapshot of the monitored files. That is, given the
802-- specification of the set of files we need to monitor, inspect the state
803-- of the file system now and collect the information we'll need later to
804-- determine if anything has changed.
805--
806buildMonitorStateFileSet :: Maybe MonitorTimestamp -- ^ optional: timestamp
807                                              -- of the start of the action
808                         -> FileHashCache     -- ^ existing file hashes
809                         -> FilePath          -- ^ root directory
810                         -> [MonitorFilePath] -- ^ patterns of interest
811                                              --   relative to root
812                         -> IO MonitorStateFileSet
813buildMonitorStateFileSet mstartTime hashcache root =
814    go [] []
815  where
816    go :: [MonitorStateFile] -> [MonitorStateGlob]
817       -> [MonitorFilePath] -> IO MonitorStateFileSet
818    go !singlePaths !globPaths [] =
819      return (MonitorStateFileSet (reverse singlePaths) (reverse globPaths))
820
821    go !singlePaths !globPaths
822       (MonitorFile kindfile kinddir path : monitors) = do
823      monitorState <- MonitorStateFile kindfile kinddir path
824                  <$> buildMonitorStateFile mstartTime hashcache
825                                            kindfile kinddir root path
826      go (monitorState : singlePaths) globPaths monitors
827
828    go !singlePaths !globPaths
829       (MonitorFileGlob kindfile kinddir globPath : monitors) = do
830      monitorState <- buildMonitorStateGlob mstartTime hashcache
831                                            kindfile kinddir root globPath
832      go singlePaths (monitorState : globPaths) monitors
833
834
835buildMonitorStateFile :: Maybe MonitorTimestamp -- ^ start time of update
836                      -> FileHashCache          -- ^ existing file hashes
837                      -> MonitorKindFile -> MonitorKindDir
838                      -> FilePath               -- ^ the root directory
839                      -> FilePath
840                      -> IO MonitorStateFileStatus
841buildMonitorStateFile mstartTime hashcache kindfile kinddir root path = do
842    let abspath = root </> path
843    isFile <- doesFileExist abspath
844    isDir  <- doesDirectoryExist abspath
845    case (isFile, kindfile, isDir, kinddir) of
846      (_, FileNotExists, _, DirNotExists) ->
847        -- we don't need to care if it exists now, since we check at probe time
848        return MonitorStateNonExistent
849
850      (False, _, False, _) ->
851        return MonitorStateAlreadyChanged
852
853      (True, FileExists, _, _)  ->
854        return MonitorStateFileExists
855
856      (True, FileModTime, _, _) ->
857        handleIOException MonitorStateAlreadyChanged $ do
858          mtime <- getModTime abspath
859          if changedDuringUpdate mstartTime mtime
860            then return MonitorStateAlreadyChanged
861            else return (MonitorStateFileModTime mtime)
862
863      (True, FileHashed, _, _) ->
864        handleIOException MonitorStateAlreadyChanged $ do
865          mtime <- getModTime abspath
866          if changedDuringUpdate mstartTime mtime
867            then return MonitorStateAlreadyChanged
868            else do hash <- getFileHash hashcache abspath abspath mtime
869                    return (MonitorStateFileHashed mtime hash)
870
871      (_, _, True, DirExists) ->
872        return MonitorStateDirExists
873
874      (_, _, True, DirModTime) ->
875        handleIOException MonitorStateAlreadyChanged $ do
876          mtime <- getModTime abspath
877          if changedDuringUpdate mstartTime mtime
878            then return MonitorStateAlreadyChanged
879            else return (MonitorStateDirModTime mtime)
880
881      (False, _, True,  DirNotExists) -> return MonitorStateAlreadyChanged
882      (True, FileNotExists, False, _) -> return MonitorStateAlreadyChanged
883
884-- | If we have a timestamp for the beginning of the update, then any file
885-- mtime later than this means that it changed during the update and we ought
886-- to consider the file as already changed.
887--
888changedDuringUpdate :: Maybe MonitorTimestamp -> ModTime -> Bool
889changedDuringUpdate (Just (MonitorTimestamp startTime)) mtime
890                        = mtime > startTime
891changedDuringUpdate _ _ = False
892
893-- | Much like 'buildMonitorStateFileSet' but for the somewhat complicated case
894-- of a file glob.
895--
896-- This gets used both by 'buildMonitorStateFileSet' when we're taking the
897-- file system snapshot, but also by 'probeGlobStatus' as part of checking
898-- the monitored (globed) files for changes when we find a whole new subtree.
899--
900buildMonitorStateGlob :: Maybe MonitorTimestamp -- ^ start time of update
901                      -> FileHashCache     -- ^ existing file hashes
902                      -> MonitorKindFile -> MonitorKindDir
903                      -> FilePath     -- ^ the root directory
904                      -> FilePathGlob -- ^ the matching glob
905                      -> IO MonitorStateGlob
906buildMonitorStateGlob mstartTime hashcache kindfile kinddir relroot
907                      (FilePathGlob globroot globPath) = do
908    root <- liftIO $ getFilePathRootDirectory globroot relroot
909    MonitorStateGlob kindfile kinddir globroot <$>
910      buildMonitorStateGlobRel
911        mstartTime hashcache kindfile kinddir root "." globPath
912
913buildMonitorStateGlobRel :: Maybe MonitorTimestamp -- ^ start time of update
914                         -> FileHashCache   -- ^ existing file hashes
915                         -> MonitorKindFile -> MonitorKindDir
916                         -> FilePath        -- ^ the root directory
917                         -> FilePath        -- ^ directory we are examining
918                                            --   relative to the root
919                         -> FilePathGlobRel -- ^ the matching glob
920                         -> IO MonitorStateGlobRel
921buildMonitorStateGlobRel mstartTime hashcache kindfile kinddir root
922                         dir globPath = do
923    let absdir = root </> dir
924    dirEntries <- getDirectoryContents absdir
925    dirMTime   <- getModTime absdir
926    case globPath of
927      GlobDir glob globPath' -> do
928        subdirs <- filterM (\subdir -> doesDirectoryExist (absdir </> subdir))
929                 $ filter (matchGlob glob) dirEntries
930        subdirStates <-
931          for (sort subdirs) $ \subdir -> do
932            fstate <- buildMonitorStateGlobRel
933                        mstartTime hashcache kindfile kinddir root
934                        (dir </> subdir) globPath'
935            return (subdir, fstate)
936        return $! MonitorStateGlobDirs glob globPath' dirMTime subdirStates
937
938      GlobFile glob -> do
939        let files = filter (matchGlob glob) dirEntries
940        filesStates <-
941          for (sort files) $ \file -> do
942            fstate <- buildMonitorStateFile
943                        mstartTime hashcache kindfile kinddir root
944                        (dir </> file)
945            return (file, fstate)
946        return $! MonitorStateGlobFiles glob dirMTime filesStates
947
948      GlobDirTrailing ->
949        return MonitorStateGlobDirTrailing
950
951
952-- | We really want to avoid re-hashing files all the time. We already make
953-- the assumption that if a file mtime has not changed then we don't need to
954-- bother checking if the content hash has changed. We can apply the same
955-- assumption when updating the file monitor state. In the typical case of
956-- updating a file monitor the set of files is the same or largely the same so
957-- we can grab the previously known content hashes with their corresponding
958-- mtimes.
959--
960type FileHashCache = Map FilePath (ModTime, Hash)
961
962-- | We declare it a cache hit if the mtime of a file is the same as before.
963--
964lookupFileHashCache :: FileHashCache -> FilePath -> ModTime -> Maybe Hash
965lookupFileHashCache hashcache file mtime = do
966    (mtime', hash) <- Map.lookup file hashcache
967    guard (mtime' == mtime)
968    return hash
969
970-- | Either get it from the cache or go read the file
971getFileHash :: FileHashCache -> FilePath -> FilePath -> ModTime -> IO Hash
972getFileHash hashcache relfile absfile mtime =
973    case lookupFileHashCache hashcache relfile mtime of
974      Just hash -> return hash
975      Nothing   -> readFileHash absfile
976
977-- | Build a 'FileHashCache' from the previous 'MonitorStateFileSet'. While
978-- in principle we could preserve the structure of the previous state, given
979-- that the set of files to monitor can change then it's simpler just to throw
980-- away the structure and use a finite map.
981--
982readCacheFileHashes :: (Binary a, Structured a, Binary b, Structured b)
983                    => FileMonitor a b -> IO FileHashCache
984readCacheFileHashes monitor =
985    handleDoesNotExist Map.empty $
986    handleErrorCall    Map.empty $ do
987      res <- readCacheFile monitor
988      case res of
989        Left _             -> return Map.empty
990        Right (msfs, _, _) -> return (mkFileHashCache msfs)
991  where
992    mkFileHashCache :: MonitorStateFileSet -> FileHashCache
993    mkFileHashCache (MonitorStateFileSet singlePaths globPaths) =
994                    collectAllFileHashes singlePaths
995        `Map.union` collectAllGlobHashes globPaths
996
997    collectAllFileHashes singlePaths =
998      Map.fromList [ (fpath, (mtime, hash))
999                   | MonitorStateFile _ _ fpath
1000                       (MonitorStateFileHashed mtime hash) <- singlePaths ]
1001
1002    collectAllGlobHashes globPaths =
1003      Map.fromList [ (fpath, (mtime, hash))
1004                   | MonitorStateGlob _ _ _ gstate <- globPaths
1005                   , (fpath, (mtime, hash)) <- collectGlobHashes "" gstate ]
1006
1007    collectGlobHashes dir (MonitorStateGlobDirs _ _ _ entries) =
1008      [ res
1009      | (subdir, fstate) <- entries
1010      , res <- collectGlobHashes (dir </> subdir) fstate ]
1011
1012    collectGlobHashes dir (MonitorStateGlobFiles  _ _ entries) =
1013      [ (dir </> fname, (mtime, hash))
1014      | (fname, MonitorStateFileHashed mtime hash) <- entries ]
1015
1016    collectGlobHashes _dir MonitorStateGlobDirTrailing =
1017      []
1018
1019
1020------------------------------------------------------------------------------
1021-- Utils
1022--
1023
1024-- | Within the @root@ directory, check if @file@ has its 'ModTime' is
1025-- the same as @mtime@, short-circuiting if it is different.
1026probeFileModificationTime :: FilePath -> FilePath -> ModTime -> ChangedM ()
1027probeFileModificationTime root file mtime = do
1028    unchanged <- liftIO $ checkModificationTimeUnchanged root file mtime
1029    unless unchanged (somethingChanged file)
1030
1031-- | Within the @root@ directory, check if @file@ has its 'ModTime' and
1032-- 'Hash' is the same as @mtime@ and @hash@, short-circuiting if it is
1033-- different.
1034probeFileModificationTimeAndHash :: FilePath -> FilePath -> ModTime -> Hash
1035                                 -> ChangedM ()
1036probeFileModificationTimeAndHash root file mtime hash = do
1037    unchanged <- liftIO $
1038      checkFileModificationTimeAndHashUnchanged root file mtime hash
1039    unless unchanged (somethingChanged file)
1040
1041-- | Within the @root@ directory, check if @file@ still exists as a file.
1042-- If it *does not* exist, short-circuit.
1043probeFileExistence :: FilePath -> FilePath -> ChangedM ()
1044probeFileExistence root file = do
1045    existsFile <- liftIO $ doesFileExist (root </> file)
1046    unless existsFile (somethingChanged file)
1047
1048-- | Within the @root@ directory, check if @dir@ still exists.
1049-- If it *does not* exist, short-circuit.
1050probeDirExistence :: FilePath -> FilePath -> ChangedM ()
1051probeDirExistence root dir = do
1052    existsDir  <- liftIO $ doesDirectoryExist (root </> dir)
1053    unless existsDir (somethingChanged dir)
1054
1055-- | Within the @root@ directory, check if @file@ still does not exist.
1056-- If it *does* exist, short-circuit.
1057probeFileNonExistence :: FilePath -> FilePath -> ChangedM ()
1058probeFileNonExistence root file = do
1059    existsFile <- liftIO $ doesFileExist (root </> file)
1060    existsDir  <- liftIO $ doesDirectoryExist (root </> file)
1061    when (existsFile || existsDir) (somethingChanged file)
1062
1063-- | Returns @True@ if, inside the @root@ directory, @file@ has the same
1064-- 'ModTime' as @mtime@.
1065checkModificationTimeUnchanged :: FilePath -> FilePath
1066                               -> ModTime -> IO Bool
1067checkModificationTimeUnchanged root file mtime =
1068  handleIOException False $ do
1069    mtime' <- getModTime (root </> file)
1070    return (mtime == mtime')
1071
1072-- | Returns @True@ if, inside the @root@ directory, @file@ has the
1073-- same 'ModTime' and 'Hash' as @mtime and @chash@.
1074checkFileModificationTimeAndHashUnchanged :: FilePath -> FilePath
1075                                          -> ModTime -> Hash -> IO Bool
1076checkFileModificationTimeAndHashUnchanged root file mtime chash =
1077  handleIOException False $ do
1078    mtime' <- getModTime (root </> file)
1079    if mtime == mtime'
1080      then return True
1081      else do
1082        chash' <- readFileHash (root </> file)
1083        return (chash == chash')
1084
1085-- | Read a non-cryptographic hash of a @file@.
1086readFileHash :: FilePath -> IO Hash
1087readFileHash file =
1088    withBinaryFile file ReadMode $ \hnd ->
1089      evaluate . Hashable.hash =<< BS.hGetContents hnd
1090
1091-- | Given a directory @dir@, return @Nothing@ if its 'ModTime'
1092-- is the same as @mtime@, and the new 'ModTime' if it is not.
1093checkDirectoryModificationTime :: FilePath -> ModTime -> IO (Maybe ModTime)
1094checkDirectoryModificationTime dir mtime =
1095  handleIOException Nothing $ do
1096    mtime' <- getModTime dir
1097    if mtime == mtime'
1098      then return Nothing
1099      else return (Just mtime')
1100
1101-- | Run an IO computation, returning the first argument @e@ if there is an 'error'
1102-- call. ('ErrorCall')
1103handleErrorCall :: a -> IO a -> IO a
1104handleErrorCall e = handle handler where
1105#if MIN_VERSION_base(4,9,0)
1106    handler (ErrorCallWithLocation _ _) = return e
1107#else
1108    handler (ErrorCall _) = return e
1109#endif
1110
1111
1112-- | Run an IO computation, returning @e@ if there is any 'IOException'.
1113--
1114-- This policy is OK in the file monitor code because it just causes the
1115-- monitor to report that something changed, and then code reacting to that
1116-- will normally encounter the same IO exception when it re-runs the action
1117-- that uses the file.
1118--
1119handleIOException :: a -> IO a -> IO a
1120handleIOException e =
1121    handle (anyIOException e)
1122  where
1123    anyIOException :: a -> IOException -> IO a
1124    anyIOException x _ = return x
1125
1126
1127------------------------------------------------------------------------------
1128-- Instances
1129--
1130
1131