1{-# LANGUAGE CPP #-} 2{-# LANGUAGE DeriveGeneric, DeriveFunctor, GeneralizedNewtypeDeriving, 3 NamedFieldPuns, BangPatterns #-} 4{-# OPTIONS_GHC -fno-warn-orphans #-} 5 6-- | An abstraction to help with re-running actions when files or other 7-- input values they depend on have changed. 8-- 9module Distribution.Client.FileMonitor ( 10 11 -- * Declaring files to monitor 12 MonitorFilePath(..), 13 MonitorKindFile(..), 14 MonitorKindDir(..), 15 FilePathGlob(..), 16 monitorFile, 17 monitorFileHashed, 18 monitorNonExistentFile, 19 monitorFileExistence, 20 monitorDirectory, 21 monitorNonExistentDirectory, 22 monitorDirectoryExistence, 23 monitorFileOrDirectory, 24 monitorFileGlob, 25 monitorFileGlobExistence, 26 monitorFileSearchPath, 27 monitorFileHashedSearchPath, 28 29 -- * Creating and checking sets of monitored files 30 FileMonitor(..), 31 newFileMonitor, 32 MonitorChanged(..), 33 MonitorChangedReason(..), 34 checkFileMonitorChanged, 35 updateFileMonitor, 36 MonitorTimestamp, 37 beginUpdateFileMonitor, 38 39 -- * Internal 40 MonitorStateFileSet, 41 MonitorStateFile, 42 MonitorStateGlob, 43 ) where 44 45import Prelude () 46import Distribution.Client.Compat.Prelude 47 48import qualified Data.Map.Strict as Map 49import qualified Data.ByteString.Lazy as BS 50import qualified Data.Hashable as Hashable 51 52import Control.Monad 53import Control.Monad.Trans (MonadIO, liftIO) 54import Control.Monad.State (StateT, mapStateT) 55import qualified Control.Monad.State as State 56import Control.Monad.Except (ExceptT, runExceptT, withExceptT, 57 throwError) 58import Control.Exception 59 60import Distribution.Compat.Time 61import Distribution.Client.Glob 62import Distribution.Simple.Utils (handleDoesNotExist, writeFileAtomic) 63import Distribution.Client.Utils (mergeBy, MergeResult(..)) 64import Distribution.Utils.Structured (structuredDecodeOrFailIO, structuredEncode) 65import System.FilePath 66import System.Directory 67import System.IO 68 69------------------------------------------------------------------------------ 70-- Types for specifying files to monitor 71-- 72 73 74-- | A description of a file (or set of files) to monitor for changes. 75-- 76-- Where file paths are relative they are relative to a common directory 77-- (e.g. project root), not necessarily the process current directory. 78-- 79data MonitorFilePath = 80 MonitorFile { 81 monitorKindFile :: !MonitorKindFile, 82 monitorKindDir :: !MonitorKindDir, 83 monitorPath :: !FilePath 84 } 85 | MonitorFileGlob { 86 monitorKindFile :: !MonitorKindFile, 87 monitorKindDir :: !MonitorKindDir, 88 monitorPathGlob :: !FilePathGlob 89 } 90 deriving (Eq, Show, Generic) 91 92data MonitorKindFile = FileExists 93 | FileModTime 94 | FileHashed 95 | FileNotExists 96 deriving (Eq, Show, Generic) 97 98data MonitorKindDir = DirExists 99 | DirModTime 100 | DirNotExists 101 deriving (Eq, Show, Generic) 102 103instance Binary MonitorFilePath 104instance Binary MonitorKindFile 105instance Binary MonitorKindDir 106 107instance Structured MonitorFilePath 108instance Structured MonitorKindFile 109instance Structured MonitorKindDir 110 111-- | Monitor a single file for changes, based on its modification time. 112-- The monitored file is considered to have changed if it no longer 113-- exists or if its modification time has changed. 114-- 115monitorFile :: FilePath -> MonitorFilePath 116monitorFile = MonitorFile FileModTime DirNotExists 117 118-- | Monitor a single file for changes, based on its modification time 119-- and content hash. The monitored file is considered to have changed if 120-- it no longer exists or if its modification time and content hash have 121-- changed. 122-- 123monitorFileHashed :: FilePath -> MonitorFilePath 124monitorFileHashed = MonitorFile FileHashed DirNotExists 125 126-- | Monitor a single non-existent file for changes. The monitored file 127-- is considered to have changed if it exists. 128-- 129monitorNonExistentFile :: FilePath -> MonitorFilePath 130monitorNonExistentFile = MonitorFile FileNotExists DirNotExists 131 132-- | Monitor a single file for existence only. The monitored file is 133-- considered to have changed if it no longer exists. 134-- 135monitorFileExistence :: FilePath -> MonitorFilePath 136monitorFileExistence = MonitorFile FileExists DirNotExists 137 138-- | Monitor a single directory for changes, based on its modification 139-- time. The monitored directory is considered to have changed if it no 140-- longer exists or if its modification time has changed. 141-- 142monitorDirectory :: FilePath -> MonitorFilePath 143monitorDirectory = MonitorFile FileNotExists DirModTime 144 145-- | Monitor a single non-existent directory for changes. The monitored 146-- directory is considered to have changed if it exists. 147-- 148monitorNonExistentDirectory :: FilePath -> MonitorFilePath 149-- Just an alias for monitorNonExistentFile, since you can't 150-- tell the difference between a non-existent directory and 151-- a non-existent file :) 152monitorNonExistentDirectory = monitorNonExistentFile 153 154-- | Monitor a single directory for existence. The monitored directory is 155-- considered to have changed only if it no longer exists. 156-- 157monitorDirectoryExistence :: FilePath -> MonitorFilePath 158monitorDirectoryExistence = MonitorFile FileNotExists DirExists 159 160-- | Monitor a single file or directory for changes, based on its modification 161-- time. The monitored file is considered to have changed if it no longer 162-- exists or if its modification time has changed. 163-- 164monitorFileOrDirectory :: FilePath -> MonitorFilePath 165monitorFileOrDirectory = MonitorFile FileModTime DirModTime 166 167-- | Monitor a set of files (or directories) identified by a file glob. 168-- The monitored glob is considered to have changed if the set of files 169-- matching the glob changes (i.e. creations or deletions), or for files if the 170-- modification time and content hash of any matching file has changed. 171-- 172monitorFileGlob :: FilePathGlob -> MonitorFilePath 173monitorFileGlob = MonitorFileGlob FileHashed DirExists 174 175-- | Monitor a set of files (or directories) identified by a file glob for 176-- existence only. The monitored glob is considered to have changed if the set 177-- of files matching the glob changes (i.e. creations or deletions). 178-- 179monitorFileGlobExistence :: FilePathGlob -> MonitorFilePath 180monitorFileGlobExistence = MonitorFileGlob FileExists DirExists 181 182-- | Creates a list of files to monitor when you search for a file which 183-- unsuccessfully looked in @notFoundAtPaths@ before finding it at 184-- @foundAtPath@. 185monitorFileSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath] 186monitorFileSearchPath notFoundAtPaths foundAtPath = 187 monitorFile foundAtPath 188 : map monitorNonExistentFile notFoundAtPaths 189 190-- | Similar to 'monitorFileSearchPath', but also instructs us to 191-- monitor the hash of the found file. 192monitorFileHashedSearchPath :: [FilePath] -> FilePath -> [MonitorFilePath] 193monitorFileHashedSearchPath notFoundAtPaths foundAtPath = 194 monitorFileHashed foundAtPath 195 : map monitorNonExistentFile notFoundAtPaths 196 197 198------------------------------------------------------------------------------ 199-- Implementation types, files status 200-- 201 202-- | The state necessary to determine whether a set of monitored 203-- files has changed. It consists of two parts: a set of specific 204-- files to be monitored (index by their path), and a list of 205-- globs, which monitor may files at once. 206data MonitorStateFileSet 207 = MonitorStateFileSet ![MonitorStateFile] 208 ![MonitorStateGlob] 209 -- Morally this is not actually a set but a bag (represented by lists). 210 -- There is no principled reason to use a bag here rather than a set, but 211 -- there is also no particular gain either. That said, we do preserve the 212 -- order of the lists just to reduce confusion (and have predictable I/O 213 -- patterns). 214 deriving (Show, Generic) 215 216instance Binary MonitorStateFileSet 217instance Structured MonitorStateFileSet 218 219type Hash = Int 220 221-- | The state necessary to determine whether a monitored file has changed. 222-- 223-- This covers all the cases of 'MonitorFilePath' except for globs which is 224-- covered separately by 'MonitorStateGlob'. 225-- 226-- The @Maybe ModTime@ is to cover the case where we already consider the 227-- file to have changed, either because it had already changed by the time we 228-- did the snapshot (i.e. too new, changed since start of update process) or it 229-- no longer exists at all. 230-- 231data MonitorStateFile = MonitorStateFile !MonitorKindFile !MonitorKindDir 232 !FilePath !MonitorStateFileStatus 233 deriving (Show, Generic) 234 235data MonitorStateFileStatus 236 = MonitorStateFileExists 237 | MonitorStateFileModTime !ModTime -- ^ cached file mtime 238 | MonitorStateFileHashed !ModTime !Hash -- ^ cached mtime and content hash 239 | MonitorStateDirExists 240 | MonitorStateDirModTime !ModTime -- ^ cached dir mtime 241 | MonitorStateNonExistent 242 | MonitorStateAlreadyChanged 243 deriving (Show, Generic) 244 245instance Binary MonitorStateFile 246instance Binary MonitorStateFileStatus 247instance Structured MonitorStateFile 248instance Structured MonitorStateFileStatus 249 250-- | The state necessary to determine whether the files matched by a globbing 251-- match have changed. 252-- 253data MonitorStateGlob = MonitorStateGlob !MonitorKindFile !MonitorKindDir 254 !FilePathRoot !MonitorStateGlobRel 255 deriving (Show, Generic) 256 257data MonitorStateGlobRel 258 = MonitorStateGlobDirs 259 !Glob !FilePathGlobRel 260 !ModTime 261 ![(FilePath, MonitorStateGlobRel)] -- invariant: sorted 262 263 | MonitorStateGlobFiles 264 !Glob 265 !ModTime 266 ![(FilePath, MonitorStateFileStatus)] -- invariant: sorted 267 268 | MonitorStateGlobDirTrailing 269 deriving (Show, Generic) 270 271instance Binary MonitorStateGlob 272instance Binary MonitorStateGlobRel 273 274instance Structured MonitorStateGlob 275instance Structured MonitorStateGlobRel 276 277-- | We can build a 'MonitorStateFileSet' from a set of 'MonitorFilePath' by 278-- inspecting the state of the file system, and we can go in the reverse 279-- direction by just forgetting the extra info. 280-- 281reconstructMonitorFilePaths :: MonitorStateFileSet -> [MonitorFilePath] 282reconstructMonitorFilePaths (MonitorStateFileSet singlePaths globPaths) = 283 map getSinglePath singlePaths 284 ++ map getGlobPath globPaths 285 where 286 getSinglePath (MonitorStateFile kindfile kinddir filepath _) = 287 MonitorFile kindfile kinddir filepath 288 289 getGlobPath (MonitorStateGlob kindfile kinddir root gstate) = 290 MonitorFileGlob kindfile kinddir $ FilePathGlob root $ 291 case gstate of 292 MonitorStateGlobDirs glob globs _ _ -> GlobDir glob globs 293 MonitorStateGlobFiles glob _ _ -> GlobFile glob 294 MonitorStateGlobDirTrailing -> GlobDirTrailing 295 296------------------------------------------------------------------------------ 297-- Checking the status of monitored files 298-- 299 300-- | A monitor for detecting changes to a set of files. It can be used to 301-- efficiently test if any of a set of files (specified individually or by 302-- glob patterns) has changed since some snapshot. In addition, it also checks 303-- for changes in a value (of type @a@), and when there are no changes in 304-- either it returns a saved value (of type @b@). 305-- 306-- The main use case looks like this: suppose we have some expensive action 307-- that depends on certain pure inputs and reads some set of files, and 308-- produces some pure result. We want to avoid re-running this action when it 309-- would produce the same result. So we need to monitor the files the action 310-- looked at, the other pure input values, and we need to cache the result. 311-- Then at some later point, if the input value didn't change, and none of the 312-- files changed, then we can re-use the cached result rather than re-running 313-- the action. 314-- 315-- This can be achieved using a 'FileMonitor'. Each 'FileMonitor' instance 316-- saves state in a disk file, so the file for that has to be specified, 317-- making sure it is unique. The pattern is to use 'checkFileMonitorChanged' 318-- to see if there's been any change. If there is, re-run the action, keeping 319-- track of the files, then use 'updateFileMonitor' to record the current 320-- set of files to monitor, the current input value for the action, and the 321-- result of the action. 322-- 323-- The typical occurrence of this pattern is captured by 'rerunIfChanged' 324-- and the 'Rebuild' monad. More complicated cases may need to use 325-- 'checkFileMonitorChanged' and 'updateFileMonitor' directly. 326-- 327data FileMonitor a b 328 = FileMonitor { 329 330 -- | The file where this 'FileMonitor' should store its state. 331 -- 332 fileMonitorCacheFile :: FilePath, 333 334 -- | Compares a new cache key with old one to determine if a 335 -- corresponding cached value is still valid. 336 -- 337 -- Typically this is just an equality test, but in some 338 -- circumstances it can make sense to do things like subset 339 -- comparisons. 340 -- 341 -- The first arg is the new value, the second is the old cached value. 342 -- 343 fileMonitorKeyValid :: a -> a -> Bool, 344 345 -- | When this mode is enabled, if 'checkFileMonitorChanged' returns 346 -- 'MonitoredValueChanged' then we have the guarantee that no files 347 -- changed, that the value change was the only change. In the default 348 -- mode no such guarantee is provided which is slightly faster. 349 -- 350 fileMonitorCheckIfOnlyValueChanged :: Bool 351 } 352 353-- | Define a new file monitor. 354-- 355-- It's best practice to define file monitor values once, and then use the 356-- same value for 'checkFileMonitorChanged' and 'updateFileMonitor' as this 357-- ensures you get the same types @a@ and @b@ for reading and writing. 358-- 359-- The path of the file monitor itself must be unique because it keeps state 360-- on disk and these would clash. 361-- 362newFileMonitor :: Eq a => FilePath -- ^ The file to cache the state of the 363 -- file monitor. Must be unique. 364 -> FileMonitor a b 365newFileMonitor path = FileMonitor path (==) False 366 367-- | The result of 'checkFileMonitorChanged': either the monitored files or 368-- value changed (and it tells us which it was) or nothing changed and we get 369-- the cached result. 370-- 371data MonitorChanged a b = 372 -- | The monitored files and value did not change. The cached result is 373 -- @b@. 374 -- 375 -- The set of monitored files is also returned. This is useful 376 -- for composing or nesting 'FileMonitor's. 377 MonitorUnchanged b [MonitorFilePath] 378 379 -- | The monitor found that something changed. The reason is given. 380 -- 381 | MonitorChanged (MonitorChangedReason a) 382 deriving Show 383 384-- | What kind of change 'checkFileMonitorChanged' detected. 385-- 386data MonitorChangedReason a = 387 388 -- | One of the files changed (existence, file type, mtime or file 389 -- content, depending on the 'MonitorFilePath' in question) 390 MonitoredFileChanged FilePath 391 392 -- | The pure input value changed. 393 -- 394 -- The previous cached key value is also returned. This is sometimes 395 -- useful when using a 'fileMonitorKeyValid' function that is not simply 396 -- '(==)', when invalidation can be partial. In such cases it can make 397 -- sense to 'updateFileMonitor' with a key value that's a combination of 398 -- the new and old (e.g. set union). 399 | MonitoredValueChanged a 400 401 -- | There was no saved monitor state, cached value etc. Ie the file 402 -- for the 'FileMonitor' does not exist. 403 | MonitorFirstRun 404 405 -- | There was existing state, but we could not read it. This typically 406 -- happens when the code has changed compared to an existing 'FileMonitor' 407 -- cache file and type of the input value or cached value has changed such 408 -- that we cannot decode the values. This is completely benign as we can 409 -- treat is just as if there were no cache file and re-run. 410 | MonitorCorruptCache 411 deriving (Eq, Show, Functor) 412 413-- | Test if the input value or files monitored by the 'FileMonitor' have 414-- changed. If not, return the cached value. 415-- 416-- See 'FileMonitor' for a full explanation. 417-- 418checkFileMonitorChanged 419 :: (Binary a, Structured a, Binary b, Structured b) 420 => FileMonitor a b -- ^ cache file path 421 -> FilePath -- ^ root directory 422 -> a -- ^ guard or key value 423 -> IO (MonitorChanged a b) -- ^ did the key or any paths change? 424checkFileMonitorChanged 425 monitor@FileMonitor { fileMonitorKeyValid, 426 fileMonitorCheckIfOnlyValueChanged } 427 root currentKey = 428 429 -- Consider it a change if the cache file does not exist, 430 -- or we cannot decode it. Sadly ErrorCall can still happen, despite 431 -- using decodeFileOrFail, e.g. Data.Char.chr errors 432 433 handleDoesNotExist (MonitorChanged MonitorFirstRun) $ 434 handleErrorCall (MonitorChanged MonitorCorruptCache) $ 435 readCacheFile monitor 436 >>= either (\_ -> return (MonitorChanged MonitorCorruptCache)) 437 checkStatusCache 438 439 where 440 checkStatusCache (cachedFileStatus, cachedKey, cachedResult) = do 441 change <- checkForChanges 442 case change of 443 Just reason -> return (MonitorChanged reason) 444 Nothing -> return (MonitorUnchanged cachedResult monitorFiles) 445 where monitorFiles = reconstructMonitorFilePaths cachedFileStatus 446 where 447 -- In fileMonitorCheckIfOnlyValueChanged mode we want to guarantee that 448 -- if we return MonitoredValueChanged that only the value changed. 449 -- We do that by checkin for file changes first. Otherwise it makes 450 -- more sense to do the cheaper test first. 451 checkForChanges 452 | fileMonitorCheckIfOnlyValueChanged 453 = checkFileChange cachedFileStatus cachedKey cachedResult 454 `mplusMaybeT` 455 checkValueChange cachedKey 456 457 | otherwise 458 = checkValueChange cachedKey 459 `mplusMaybeT` 460 checkFileChange cachedFileStatus cachedKey cachedResult 461 462 mplusMaybeT :: Monad m => m (Maybe a) -> m (Maybe a) -> m (Maybe a) 463 mplusMaybeT ma mb = do 464 mx <- ma 465 case mx of 466 Nothing -> mb 467 Just x -> return (Just x) 468 469 -- Check if the guard value has changed 470 checkValueChange cachedKey 471 | not (fileMonitorKeyValid currentKey cachedKey) 472 = return (Just (MonitoredValueChanged cachedKey)) 473 | otherwise 474 = return Nothing 475 476 -- Check if any file has changed 477 checkFileChange cachedFileStatus cachedKey cachedResult = do 478 res <- probeFileSystem root cachedFileStatus 479 case res of 480 -- Some monitored file has changed 481 Left changedPath -> 482 return (Just (MonitoredFileChanged (normalise changedPath))) 483 484 -- No monitored file has changed 485 Right (cachedFileStatus', cacheStatus) -> do 486 487 -- But we might still want to update the cache 488 whenCacheChanged cacheStatus $ 489 rewriteCacheFile monitor cachedFileStatus' cachedKey cachedResult 490 491 return Nothing 492 493-- | Helper for reading the cache file. 494-- 495-- This determines the type and format of the binary cache file. 496-- 497readCacheFile :: (Binary a, Structured a, Binary b, Structured b) 498 => FileMonitor a b 499 -> IO (Either String (MonitorStateFileSet, a, b)) 500readCacheFile FileMonitor {fileMonitorCacheFile} = 501 withBinaryFile fileMonitorCacheFile ReadMode $ \hnd -> do 502 contents <- BS.hGetContents hnd 503 structuredDecodeOrFailIO contents 504 505-- | Helper for writing the cache file. 506-- 507-- This determines the type and format of the binary cache file. 508-- 509rewriteCacheFile :: (Binary a, Structured a, Binary b, Structured b) 510 => FileMonitor a b 511 -> MonitorStateFileSet -> a -> b -> IO () 512rewriteCacheFile FileMonitor {fileMonitorCacheFile} fileset key result = 513 writeFileAtomic fileMonitorCacheFile $ 514 structuredEncode (fileset, key, result) 515 516-- | Probe the file system to see if any of the monitored files have changed. 517-- 518-- It returns Nothing if any file changed, or returns a possibly updated 519-- file 'MonitorStateFileSet' plus an indicator of whether it actually changed. 520-- 521-- We may need to update the cache since there may be changes in the filesystem 522-- state which don't change any of our affected files. 523-- 524-- Consider the glob @{proj1,proj2}\/\*.cabal@. Say we first run and find a 525-- @proj1@ directory containing @proj1.cabal@ yet no @proj2@. If we later run 526-- and find @proj2@ was created, yet contains no files matching @*.cabal@ then 527-- we want to update the cache despite no changes in our relevant file set. 528-- Specifically, we should add an mtime for this directory so we can avoid 529-- re-traversing the directory in future runs. 530-- 531probeFileSystem :: FilePath -> MonitorStateFileSet 532 -> IO (Either FilePath (MonitorStateFileSet, CacheChanged)) 533probeFileSystem root (MonitorStateFileSet singlePaths globPaths) = 534 runChangedM $ do 535 sequence_ 536 [ probeMonitorStateFileStatus root file status 537 | MonitorStateFile _ _ file status <- singlePaths ] 538 -- The glob monitors can require state changes 539 globPaths' <- 540 sequence 541 [ probeMonitorStateGlob root globPath 542 | globPath <- globPaths ] 543 return (MonitorStateFileSet singlePaths globPaths') 544 545 546----------------------------------------------- 547-- Monad for checking for file system changes 548-- 549-- We need to be able to bail out if we detect a change (using ExceptT), 550-- but if there's no change we need to be able to rebuild the monitor 551-- state. And we want to optimise that rebuilding by keeping track if 552-- anything actually changed (using StateT), so that in the typical case 553-- we can avoid rewriting the state file. 554 555newtype ChangedM a = ChangedM (StateT CacheChanged (ExceptT FilePath IO) a) 556 deriving (Functor, Applicative, Monad, MonadIO) 557 558runChangedM :: ChangedM a -> IO (Either FilePath (a, CacheChanged)) 559runChangedM (ChangedM action) = 560 runExceptT $ State.runStateT action CacheUnchanged 561 562somethingChanged :: FilePath -> ChangedM a 563somethingChanged path = ChangedM $ throwError path 564 565cacheChanged :: ChangedM () 566cacheChanged = ChangedM $ State.put CacheChanged 567 568mapChangedFile :: (FilePath -> FilePath) -> ChangedM a -> ChangedM a 569mapChangedFile adjust (ChangedM a) = 570 ChangedM (mapStateT (withExceptT adjust) a) 571 572data CacheChanged = CacheChanged | CacheUnchanged 573 574whenCacheChanged :: Monad m => CacheChanged -> m () -> m () 575whenCacheChanged CacheChanged action = action 576whenCacheChanged CacheUnchanged _ = return () 577 578---------------------- 579 580-- | Probe the file system to see if a single monitored file has changed. 581-- 582probeMonitorStateFileStatus :: FilePath -> FilePath 583 -> MonitorStateFileStatus 584 -> ChangedM () 585probeMonitorStateFileStatus root file status = 586 case status of 587 MonitorStateFileExists -> 588 probeFileExistence root file 589 590 MonitorStateFileModTime mtime -> 591 probeFileModificationTime root file mtime 592 593 MonitorStateFileHashed mtime hash -> 594 probeFileModificationTimeAndHash root file mtime hash 595 596 MonitorStateDirExists -> 597 probeDirExistence root file 598 599 MonitorStateDirModTime mtime -> 600 probeFileModificationTime root file mtime 601 602 MonitorStateNonExistent -> 603 probeFileNonExistence root file 604 605 MonitorStateAlreadyChanged -> 606 somethingChanged file 607 608 609-- | Probe the file system to see if a monitored file glob has changed. 610-- 611probeMonitorStateGlob :: FilePath -- ^ root path 612 -> MonitorStateGlob 613 -> ChangedM MonitorStateGlob 614probeMonitorStateGlob relroot 615 (MonitorStateGlob kindfile kinddir globroot glob) = do 616 root <- liftIO $ getFilePathRootDirectory globroot relroot 617 case globroot of 618 FilePathRelative -> 619 MonitorStateGlob kindfile kinddir globroot <$> 620 probeMonitorStateGlobRel kindfile kinddir root "." glob 621 622 -- for absolute cases, make the changed file we report absolute too 623 _ -> 624 mapChangedFile (root </>) $ 625 MonitorStateGlob kindfile kinddir globroot <$> 626 probeMonitorStateGlobRel kindfile kinddir root "" glob 627 628probeMonitorStateGlobRel :: MonitorKindFile -> MonitorKindDir 629 -> FilePath -- ^ root path 630 -> FilePath -- ^ path of the directory we are 631 -- looking in relative to @root@ 632 -> MonitorStateGlobRel 633 -> ChangedM MonitorStateGlobRel 634probeMonitorStateGlobRel kindfile kinddir root dirName 635 (MonitorStateGlobDirs glob globPath mtime children) = do 636 change <- liftIO $ checkDirectoryModificationTime (root </> dirName) mtime 637 case change of 638 Nothing -> do 639 children' <- sequence 640 [ do fstate' <- probeMonitorStateGlobRel 641 kindfile kinddir root 642 (dirName </> fname) fstate 643 return (fname, fstate') 644 | (fname, fstate) <- children ] 645 return $! MonitorStateGlobDirs glob globPath mtime children' 646 647 Just mtime' -> do 648 -- directory modification time changed: 649 -- a matching subdir may have been added or deleted 650 matches <- filterM (\entry -> let subdir = root </> dirName </> entry 651 in liftIO $ doesDirectoryExist subdir) 652 . filter (matchGlob glob) 653 =<< liftIO (getDirectoryContents (root </> dirName)) 654 655 children' <- traverse probeMergeResult $ 656 mergeBy (\(path1,_) path2 -> compare path1 path2) 657 children 658 (sort matches) 659 return $! MonitorStateGlobDirs glob globPath mtime' children' 660 -- Note that just because the directory has changed, we don't force 661 -- a cache rewrite with 'cacheChanged' since that has some cost, and 662 -- all we're saving is scanning the directory. But we do rebuild the 663 -- cache with the new mtime', so that if the cache is rewritten for 664 -- some other reason, we'll take advantage of that. 665 666 where 667 probeMergeResult :: MergeResult (FilePath, MonitorStateGlobRel) FilePath 668 -> ChangedM (FilePath, MonitorStateGlobRel) 669 670 -- Only in cached (directory deleted) 671 probeMergeResult (OnlyInLeft (path, fstate)) = do 672 case allMatchingFiles (dirName </> path) fstate of 673 [] -> return (path, fstate) 674 -- Strictly speaking we should be returning 'CacheChanged' above 675 -- as we should prune the now-missing 'MonitorStateGlobRel'. However 676 -- we currently just leave these now-redundant entries in the 677 -- cache as they cost no IO and keeping them allows us to avoid 678 -- rewriting the cache. 679 (file:_) -> somethingChanged file 680 681 -- Only in current filesystem state (directory added) 682 probeMergeResult (OnlyInRight path) = do 683 fstate <- liftIO $ buildMonitorStateGlobRel Nothing Map.empty 684 kindfile kinddir root (dirName </> path) globPath 685 case allMatchingFiles (dirName </> path) fstate of 686 (file:_) -> somethingChanged file 687 -- This is the only case where we use 'cacheChanged' because we can 688 -- have a whole new dir subtree (of unbounded size and cost), so we 689 -- need to save the state of that new subtree in the cache. 690 [] -> cacheChanged >> return (path, fstate) 691 692 -- Found in path 693 probeMergeResult (InBoth (path, fstate) _) = do 694 fstate' <- probeMonitorStateGlobRel kindfile kinddir 695 root (dirName </> path) fstate 696 return (path, fstate') 697 698 -- | Does a 'MonitorStateGlob' have any relevant files within it? 699 allMatchingFiles :: FilePath -> MonitorStateGlobRel -> [FilePath] 700 allMatchingFiles dir (MonitorStateGlobFiles _ _ entries) = 701 [ dir </> fname | (fname, _) <- entries ] 702 allMatchingFiles dir (MonitorStateGlobDirs _ _ _ entries) = 703 [ res 704 | (subdir, fstate) <- entries 705 , res <- allMatchingFiles (dir </> subdir) fstate ] 706 allMatchingFiles dir MonitorStateGlobDirTrailing = 707 [dir] 708 709probeMonitorStateGlobRel _ _ root dirName 710 (MonitorStateGlobFiles glob mtime children) = do 711 change <- liftIO $ checkDirectoryModificationTime (root </> dirName) mtime 712 mtime' <- case change of 713 Nothing -> return mtime 714 Just mtime' -> do 715 -- directory modification time changed: 716 -- a matching file may have been added or deleted 717 matches <- return . filter (matchGlob glob) 718 =<< liftIO (getDirectoryContents (root </> dirName)) 719 720 traverse_ probeMergeResult $ 721 mergeBy (\(path1,_) path2 -> compare path1 path2) 722 children 723 (sort matches) 724 return mtime' 725 726 -- Check that none of the children have changed 727 for_ children $ \(file, status) -> 728 probeMonitorStateFileStatus root (dirName </> file) status 729 730 731 return (MonitorStateGlobFiles glob mtime' children) 732 -- Again, we don't force a cache rewite with 'cacheChanged', but we do use 733 -- the new mtime' if any. 734 where 735 probeMergeResult :: MergeResult (FilePath, MonitorStateFileStatus) FilePath 736 -> ChangedM () 737 probeMergeResult mr = case mr of 738 InBoth _ _ -> return () 739 -- this is just to be able to accurately report which file changed: 740 OnlyInLeft (path, _) -> somethingChanged (dirName </> path) 741 OnlyInRight path -> somethingChanged (dirName </> path) 742 743probeMonitorStateGlobRel _ _ _ _ MonitorStateGlobDirTrailing = 744 return MonitorStateGlobDirTrailing 745 746------------------------------------------------------------------------------ 747 748-- | Update the input value and the set of files monitored by the 749-- 'FileMonitor', plus the cached value that may be returned in future. 750-- 751-- This takes a snapshot of the state of the monitored files right now, so 752-- 'checkFileMonitorChanged' will look for file system changes relative to 753-- this snapshot. 754-- 755-- This is typically done once the action has been completed successfully and 756-- we have the action's result and we know what files it looked at. See 757-- 'FileMonitor' for a full explanation. 758-- 759-- If we do take the snapshot after the action has completed then we have a 760-- problem. The problem is that files might have changed /while/ the action was 761-- running but /after/ the action read them. If we take the snapshot after the 762-- action completes then we will miss these changes. The solution is to record 763-- a timestamp before beginning execution of the action and then we make the 764-- conservative assumption that any file that has changed since then has 765-- already changed, ie the file monitor state for these files will be such that 766-- 'checkFileMonitorChanged' will report that they have changed. 767-- 768-- So if you do use 'updateFileMonitor' after the action (so you can discover 769-- the files used rather than predicting them in advance) then use 770-- 'beginUpdateFileMonitor' to get a timestamp and pass that. Alternatively, 771-- if you take the snapshot in advance of the action, or you're not monitoring 772-- any files then you can use @Nothing@ for the timestamp parameter. 773-- 774updateFileMonitor 775 :: (Binary a, Structured a, Binary b, Structured b) 776 => FileMonitor a b -- ^ cache file path 777 -> FilePath -- ^ root directory 778 -> Maybe MonitorTimestamp -- ^ timestamp when the update action started 779 -> [MonitorFilePath] -- ^ files of interest relative to root 780 -> a -- ^ the current key value 781 -> b -- ^ the current result value 782 -> IO () 783updateFileMonitor monitor root startTime monitorFiles 784 cachedKey cachedResult = do 785 hashcache <- readCacheFileHashes monitor 786 msfs <- buildMonitorStateFileSet startTime hashcache root monitorFiles 787 rewriteCacheFile monitor msfs cachedKey cachedResult 788 789-- | A timestamp to help with the problem of file changes during actions. 790-- See 'updateFileMonitor' for details. 791-- 792newtype MonitorTimestamp = MonitorTimestamp ModTime 793 794-- | Record a timestamp at the beginning of an action, and when the action 795-- completes call 'updateFileMonitor' passing it the timestamp. 796-- See 'updateFileMonitor' for details. 797-- 798beginUpdateFileMonitor :: IO MonitorTimestamp 799beginUpdateFileMonitor = MonitorTimestamp <$> getCurTime 800 801-- | Take the snapshot of the monitored files. That is, given the 802-- specification of the set of files we need to monitor, inspect the state 803-- of the file system now and collect the information we'll need later to 804-- determine if anything has changed. 805-- 806buildMonitorStateFileSet :: Maybe MonitorTimestamp -- ^ optional: timestamp 807 -- of the start of the action 808 -> FileHashCache -- ^ existing file hashes 809 -> FilePath -- ^ root directory 810 -> [MonitorFilePath] -- ^ patterns of interest 811 -- relative to root 812 -> IO MonitorStateFileSet 813buildMonitorStateFileSet mstartTime hashcache root = 814 go [] [] 815 where 816 go :: [MonitorStateFile] -> [MonitorStateGlob] 817 -> [MonitorFilePath] -> IO MonitorStateFileSet 818 go !singlePaths !globPaths [] = 819 return (MonitorStateFileSet (reverse singlePaths) (reverse globPaths)) 820 821 go !singlePaths !globPaths 822 (MonitorFile kindfile kinddir path : monitors) = do 823 monitorState <- MonitorStateFile kindfile kinddir path 824 <$> buildMonitorStateFile mstartTime hashcache 825 kindfile kinddir root path 826 go (monitorState : singlePaths) globPaths monitors 827 828 go !singlePaths !globPaths 829 (MonitorFileGlob kindfile kinddir globPath : monitors) = do 830 monitorState <- buildMonitorStateGlob mstartTime hashcache 831 kindfile kinddir root globPath 832 go singlePaths (monitorState : globPaths) monitors 833 834 835buildMonitorStateFile :: Maybe MonitorTimestamp -- ^ start time of update 836 -> FileHashCache -- ^ existing file hashes 837 -> MonitorKindFile -> MonitorKindDir 838 -> FilePath -- ^ the root directory 839 -> FilePath 840 -> IO MonitorStateFileStatus 841buildMonitorStateFile mstartTime hashcache kindfile kinddir root path = do 842 let abspath = root </> path 843 isFile <- doesFileExist abspath 844 isDir <- doesDirectoryExist abspath 845 case (isFile, kindfile, isDir, kinddir) of 846 (_, FileNotExists, _, DirNotExists) -> 847 -- we don't need to care if it exists now, since we check at probe time 848 return MonitorStateNonExistent 849 850 (False, _, False, _) -> 851 return MonitorStateAlreadyChanged 852 853 (True, FileExists, _, _) -> 854 return MonitorStateFileExists 855 856 (True, FileModTime, _, _) -> 857 handleIOException MonitorStateAlreadyChanged $ do 858 mtime <- getModTime abspath 859 if changedDuringUpdate mstartTime mtime 860 then return MonitorStateAlreadyChanged 861 else return (MonitorStateFileModTime mtime) 862 863 (True, FileHashed, _, _) -> 864 handleIOException MonitorStateAlreadyChanged $ do 865 mtime <- getModTime abspath 866 if changedDuringUpdate mstartTime mtime 867 then return MonitorStateAlreadyChanged 868 else do hash <- getFileHash hashcache abspath abspath mtime 869 return (MonitorStateFileHashed mtime hash) 870 871 (_, _, True, DirExists) -> 872 return MonitorStateDirExists 873 874 (_, _, True, DirModTime) -> 875 handleIOException MonitorStateAlreadyChanged $ do 876 mtime <- getModTime abspath 877 if changedDuringUpdate mstartTime mtime 878 then return MonitorStateAlreadyChanged 879 else return (MonitorStateDirModTime mtime) 880 881 (False, _, True, DirNotExists) -> return MonitorStateAlreadyChanged 882 (True, FileNotExists, False, _) -> return MonitorStateAlreadyChanged 883 884-- | If we have a timestamp for the beginning of the update, then any file 885-- mtime later than this means that it changed during the update and we ought 886-- to consider the file as already changed. 887-- 888changedDuringUpdate :: Maybe MonitorTimestamp -> ModTime -> Bool 889changedDuringUpdate (Just (MonitorTimestamp startTime)) mtime 890 = mtime > startTime 891changedDuringUpdate _ _ = False 892 893-- | Much like 'buildMonitorStateFileSet' but for the somewhat complicated case 894-- of a file glob. 895-- 896-- This gets used both by 'buildMonitorStateFileSet' when we're taking the 897-- file system snapshot, but also by 'probeGlobStatus' as part of checking 898-- the monitored (globed) files for changes when we find a whole new subtree. 899-- 900buildMonitorStateGlob :: Maybe MonitorTimestamp -- ^ start time of update 901 -> FileHashCache -- ^ existing file hashes 902 -> MonitorKindFile -> MonitorKindDir 903 -> FilePath -- ^ the root directory 904 -> FilePathGlob -- ^ the matching glob 905 -> IO MonitorStateGlob 906buildMonitorStateGlob mstartTime hashcache kindfile kinddir relroot 907 (FilePathGlob globroot globPath) = do 908 root <- liftIO $ getFilePathRootDirectory globroot relroot 909 MonitorStateGlob kindfile kinddir globroot <$> 910 buildMonitorStateGlobRel 911 mstartTime hashcache kindfile kinddir root "." globPath 912 913buildMonitorStateGlobRel :: Maybe MonitorTimestamp -- ^ start time of update 914 -> FileHashCache -- ^ existing file hashes 915 -> MonitorKindFile -> MonitorKindDir 916 -> FilePath -- ^ the root directory 917 -> FilePath -- ^ directory we are examining 918 -- relative to the root 919 -> FilePathGlobRel -- ^ the matching glob 920 -> IO MonitorStateGlobRel 921buildMonitorStateGlobRel mstartTime hashcache kindfile kinddir root 922 dir globPath = do 923 let absdir = root </> dir 924 dirEntries <- getDirectoryContents absdir 925 dirMTime <- getModTime absdir 926 case globPath of 927 GlobDir glob globPath' -> do 928 subdirs <- filterM (\subdir -> doesDirectoryExist (absdir </> subdir)) 929 $ filter (matchGlob glob) dirEntries 930 subdirStates <- 931 for (sort subdirs) $ \subdir -> do 932 fstate <- buildMonitorStateGlobRel 933 mstartTime hashcache kindfile kinddir root 934 (dir </> subdir) globPath' 935 return (subdir, fstate) 936 return $! MonitorStateGlobDirs glob globPath' dirMTime subdirStates 937 938 GlobFile glob -> do 939 let files = filter (matchGlob glob) dirEntries 940 filesStates <- 941 for (sort files) $ \file -> do 942 fstate <- buildMonitorStateFile 943 mstartTime hashcache kindfile kinddir root 944 (dir </> file) 945 return (file, fstate) 946 return $! MonitorStateGlobFiles glob dirMTime filesStates 947 948 GlobDirTrailing -> 949 return MonitorStateGlobDirTrailing 950 951 952-- | We really want to avoid re-hashing files all the time. We already make 953-- the assumption that if a file mtime has not changed then we don't need to 954-- bother checking if the content hash has changed. We can apply the same 955-- assumption when updating the file monitor state. In the typical case of 956-- updating a file monitor the set of files is the same or largely the same so 957-- we can grab the previously known content hashes with their corresponding 958-- mtimes. 959-- 960type FileHashCache = Map FilePath (ModTime, Hash) 961 962-- | We declare it a cache hit if the mtime of a file is the same as before. 963-- 964lookupFileHashCache :: FileHashCache -> FilePath -> ModTime -> Maybe Hash 965lookupFileHashCache hashcache file mtime = do 966 (mtime', hash) <- Map.lookup file hashcache 967 guard (mtime' == mtime) 968 return hash 969 970-- | Either get it from the cache or go read the file 971getFileHash :: FileHashCache -> FilePath -> FilePath -> ModTime -> IO Hash 972getFileHash hashcache relfile absfile mtime = 973 case lookupFileHashCache hashcache relfile mtime of 974 Just hash -> return hash 975 Nothing -> readFileHash absfile 976 977-- | Build a 'FileHashCache' from the previous 'MonitorStateFileSet'. While 978-- in principle we could preserve the structure of the previous state, given 979-- that the set of files to monitor can change then it's simpler just to throw 980-- away the structure and use a finite map. 981-- 982readCacheFileHashes :: (Binary a, Structured a, Binary b, Structured b) 983 => FileMonitor a b -> IO FileHashCache 984readCacheFileHashes monitor = 985 handleDoesNotExist Map.empty $ 986 handleErrorCall Map.empty $ do 987 res <- readCacheFile monitor 988 case res of 989 Left _ -> return Map.empty 990 Right (msfs, _, _) -> return (mkFileHashCache msfs) 991 where 992 mkFileHashCache :: MonitorStateFileSet -> FileHashCache 993 mkFileHashCache (MonitorStateFileSet singlePaths globPaths) = 994 collectAllFileHashes singlePaths 995 `Map.union` collectAllGlobHashes globPaths 996 997 collectAllFileHashes singlePaths = 998 Map.fromList [ (fpath, (mtime, hash)) 999 | MonitorStateFile _ _ fpath 1000 (MonitorStateFileHashed mtime hash) <- singlePaths ] 1001 1002 collectAllGlobHashes globPaths = 1003 Map.fromList [ (fpath, (mtime, hash)) 1004 | MonitorStateGlob _ _ _ gstate <- globPaths 1005 , (fpath, (mtime, hash)) <- collectGlobHashes "" gstate ] 1006 1007 collectGlobHashes dir (MonitorStateGlobDirs _ _ _ entries) = 1008 [ res 1009 | (subdir, fstate) <- entries 1010 , res <- collectGlobHashes (dir </> subdir) fstate ] 1011 1012 collectGlobHashes dir (MonitorStateGlobFiles _ _ entries) = 1013 [ (dir </> fname, (mtime, hash)) 1014 | (fname, MonitorStateFileHashed mtime hash) <- entries ] 1015 1016 collectGlobHashes _dir MonitorStateGlobDirTrailing = 1017 [] 1018 1019 1020------------------------------------------------------------------------------ 1021-- Utils 1022-- 1023 1024-- | Within the @root@ directory, check if @file@ has its 'ModTime' is 1025-- the same as @mtime@, short-circuiting if it is different. 1026probeFileModificationTime :: FilePath -> FilePath -> ModTime -> ChangedM () 1027probeFileModificationTime root file mtime = do 1028 unchanged <- liftIO $ checkModificationTimeUnchanged root file mtime 1029 unless unchanged (somethingChanged file) 1030 1031-- | Within the @root@ directory, check if @file@ has its 'ModTime' and 1032-- 'Hash' is the same as @mtime@ and @hash@, short-circuiting if it is 1033-- different. 1034probeFileModificationTimeAndHash :: FilePath -> FilePath -> ModTime -> Hash 1035 -> ChangedM () 1036probeFileModificationTimeAndHash root file mtime hash = do 1037 unchanged <- liftIO $ 1038 checkFileModificationTimeAndHashUnchanged root file mtime hash 1039 unless unchanged (somethingChanged file) 1040 1041-- | Within the @root@ directory, check if @file@ still exists as a file. 1042-- If it *does not* exist, short-circuit. 1043probeFileExistence :: FilePath -> FilePath -> ChangedM () 1044probeFileExistence root file = do 1045 existsFile <- liftIO $ doesFileExist (root </> file) 1046 unless existsFile (somethingChanged file) 1047 1048-- | Within the @root@ directory, check if @dir@ still exists. 1049-- If it *does not* exist, short-circuit. 1050probeDirExistence :: FilePath -> FilePath -> ChangedM () 1051probeDirExistence root dir = do 1052 existsDir <- liftIO $ doesDirectoryExist (root </> dir) 1053 unless existsDir (somethingChanged dir) 1054 1055-- | Within the @root@ directory, check if @file@ still does not exist. 1056-- If it *does* exist, short-circuit. 1057probeFileNonExistence :: FilePath -> FilePath -> ChangedM () 1058probeFileNonExistence root file = do 1059 existsFile <- liftIO $ doesFileExist (root </> file) 1060 existsDir <- liftIO $ doesDirectoryExist (root </> file) 1061 when (existsFile || existsDir) (somethingChanged file) 1062 1063-- | Returns @True@ if, inside the @root@ directory, @file@ has the same 1064-- 'ModTime' as @mtime@. 1065checkModificationTimeUnchanged :: FilePath -> FilePath 1066 -> ModTime -> IO Bool 1067checkModificationTimeUnchanged root file mtime = 1068 handleIOException False $ do 1069 mtime' <- getModTime (root </> file) 1070 return (mtime == mtime') 1071 1072-- | Returns @True@ if, inside the @root@ directory, @file@ has the 1073-- same 'ModTime' and 'Hash' as @mtime and @chash@. 1074checkFileModificationTimeAndHashUnchanged :: FilePath -> FilePath 1075 -> ModTime -> Hash -> IO Bool 1076checkFileModificationTimeAndHashUnchanged root file mtime chash = 1077 handleIOException False $ do 1078 mtime' <- getModTime (root </> file) 1079 if mtime == mtime' 1080 then return True 1081 else do 1082 chash' <- readFileHash (root </> file) 1083 return (chash == chash') 1084 1085-- | Read a non-cryptographic hash of a @file@. 1086readFileHash :: FilePath -> IO Hash 1087readFileHash file = 1088 withBinaryFile file ReadMode $ \hnd -> 1089 evaluate . Hashable.hash =<< BS.hGetContents hnd 1090 1091-- | Given a directory @dir@, return @Nothing@ if its 'ModTime' 1092-- is the same as @mtime@, and the new 'ModTime' if it is not. 1093checkDirectoryModificationTime :: FilePath -> ModTime -> IO (Maybe ModTime) 1094checkDirectoryModificationTime dir mtime = 1095 handleIOException Nothing $ do 1096 mtime' <- getModTime dir 1097 if mtime == mtime' 1098 then return Nothing 1099 else return (Just mtime') 1100 1101-- | Run an IO computation, returning the first argument @e@ if there is an 'error' 1102-- call. ('ErrorCall') 1103handleErrorCall :: a -> IO a -> IO a 1104handleErrorCall e = handle handler where 1105#if MIN_VERSION_base(4,9,0) 1106 handler (ErrorCallWithLocation _ _) = return e 1107#else 1108 handler (ErrorCall _) = return e 1109#endif 1110 1111 1112-- | Run an IO computation, returning @e@ if there is any 'IOException'. 1113-- 1114-- This policy is OK in the file monitor code because it just causes the 1115-- monitor to report that something changed, and then code reacting to that 1116-- will normally encounter the same IO exception when it re-runs the action 1117-- that uses the file. 1118-- 1119handleIOException :: a -> IO a -> IO a 1120handleIOException e = 1121 handle (anyIOException e) 1122 where 1123 anyIOException :: a -> IOException -> IO a 1124 anyIOException x _ = return x 1125 1126 1127------------------------------------------------------------------------------ 1128-- Instances 1129-- 1130 1131