1module Darcs.Repository.Traverse
2    ( cleanInventories
3    , cleanPatches
4    , cleanPristine
5    , cleanRepository
6    , diffHashLists
7    , listInventories
8    , listInventoriesLocal
9    , listInventoriesRepoDir
10    , listPatchesLocalBucketed
11    , specialPatches
12    ) where
13
14import Darcs.Prelude
15
16import Data.Maybe ( fromJust )
17import qualified Data.ByteString.Char8 as BC ( unpack, pack )
18import qualified Data.Set as Set
19
20import System.Directory ( listDirectory )
21import System.FilePath.Posix( (</>) )
22
23import Darcs.Repository.Cache ( HashedDir(..), bucketFolder )
24import Darcs.Repository.HashedIO ( cleanHashdir )
25import Darcs.Repository.Inventory
26    ( Inventory(..)
27    , emptyInventory
28    , getValidHash
29    , inventoryPatchNames
30    , parseInventory
31    , peekPristineHash
32    , skipPristineHash
33    )
34import Darcs.Repository.InternalTypes
35    ( Repository
36    , repoCache
37    , withRepoLocation
38    )
39import Darcs.Repository.Paths
40    ( hashedInventory
41    , hashedInventoryPath
42    , inventoriesDir
43    , inventoriesDirPath
44    , patchesDirPath
45    )
46import Darcs.Repository.Prefs ( globalCacheDir )
47
48import Darcs.Util.ByteString ( gzReadFilePS )
49import Darcs.Util.Exception ( ifDoesNotExistError )
50import Darcs.Util.Global ( darcsdir, debugMessage )
51import Darcs.Util.Lock ( removeFileMayNotExist )
52
53
54cleanRepository :: Repository rt p wR wU wT -> IO ()
55cleanRepository r = cleanPristine r >> cleanInventories r >> cleanPatches r
56
57-- | The way patchfiles, inventories, and pristine trees are stored.
58-- 'PlainLayout' means all files are in the same directory. 'BucketedLayout'
59-- means we create a second level of subdirectories, such that all files whose
60-- hash starts with the same two letters are in the same directory.
61-- Currently, only the global cache uses 'BucketedLayout' while repositories
62-- use the 'PlainLayout'.
63data DirLayout = PlainLayout | BucketedLayout
64
65-- | Remove unreferenced entries in the pristine cache.
66cleanPristine :: Repository rt p wR wU wT -> IO ()
67cleanPristine r = withRepoLocation r $ do
68    debugMessage "Cleaning out the pristine cache..."
69    i <- gzReadFilePS hashedInventoryPath
70    cleanHashdir (repoCache r) HashedPristineDir [peekPristineHash i]
71
72-- | Set difference between two lists of hashes.
73diffHashLists :: [String] -> [String] -> [String]
74diffHashLists xs ys = from_set $ (to_set xs) `Set.difference` (to_set ys)
75  where
76    to_set = Set.fromList . map BC.pack
77    from_set = map BC.unpack . Set.toList
78
79-- | Remove unreferenced files in the inventories directory.
80cleanInventories :: Repository rt p wR wU wT -> IO ()
81cleanInventories _ = do
82    debugMessage "Cleaning out inventories..."
83    hs <- listInventoriesLocal
84    fs <- ifDoesNotExistError [] $ listDirectory inventoriesDirPath
85    mapM_ (removeFileMayNotExist . (inventoriesDirPath </>))
86        (diffHashLists fs hs)
87
88-- FIXME this is ugly, these files should be directly under _darcs
89-- since they are not hashed. And 'unrevert' isn't even a real patch but
90-- a patch bundle.
91
92-- | List of special patch files that may exist in the directory
93-- _darcs/patches/. We must not clean those.
94specialPatches :: [FilePath]
95specialPatches = ["unrevert", "pending", "pending.tentative"]
96
97-- | Remove unreferenced files in the patches directory.
98cleanPatches :: Repository rt p wR wU wT -> IO ()
99cleanPatches _ = do
100    debugMessage "Cleaning out patches..."
101    hs <- (specialPatches ++) <$> listPatchesLocal PlainLayout darcsdir darcsdir
102    fs <- ifDoesNotExistError [] (listDirectory patchesDirPath)
103    mapM_ (removeFileMayNotExist . (patchesDirPath </>)) (diffHashLists fs hs)
104
105-- | Return a list of the inventories hashes.
106-- The first argument can be readInventory or readInventoryLocal.
107-- The second argument specifies whether the files are expected
108-- to be stored in plain or in bucketed format.
109-- The third argument is the directory of the parent inventory files.
110-- The fourth argument is the directory of the head inventory file.
111listInventoriesWith
112  :: (FilePath -> IO Inventory)
113  -> DirLayout
114  -> String -> String -> IO [String]
115listInventoriesWith readInv dirformat baseDir startDir = do
116    mbStartingWithInv <- getStartingWithHash startDir hashedInventory
117    followStartingWiths mbStartingWithInv
118  where
119    getStartingWithHash dir file = inventoryParent <$> readInv (dir </> file)
120
121    invDir = baseDir </> inventoriesDir
122    nextDir dir = case dirformat of
123        BucketedLayout -> invDir </> bucketFolder dir
124        PlainLayout -> invDir
125
126    followStartingWiths Nothing = return []
127    followStartingWiths (Just hash) = do
128        let startingWith = getValidHash hash
129        mbNextInv <- getStartingWithHash (nextDir startingWith) startingWith
130        (startingWith :) <$> followStartingWiths mbNextInv
131
132-- | Return a list of the inventories hashes.
133-- This function attempts to retrieve missing inventory files from the cache.
134listInventories :: IO [String]
135listInventories =
136    listInventoriesWith readInventory PlainLayout darcsdir darcsdir
137
138-- | Return inventories hashes by following the head inventory.
139-- This function does not attempt to retrieve missing inventory files.
140listInventoriesLocal :: IO [String]
141listInventoriesLocal =
142    listInventoriesWith readInventoryLocal PlainLayout darcsdir darcsdir
143
144-- | Return a list of the inventories hashes.
145-- The argument @repoDir@ is the directory of the repository from which
146-- we are going to read the head inventory file.
147-- The rest of hashed files are read from the global cache.
148listInventoriesRepoDir :: String -> IO [String]
149listInventoriesRepoDir repoDir = do
150    gCacheDir' <- globalCacheDir
151    let gCacheInvDir = fromJust gCacheDir'
152    listInventoriesWith
153        readInventoryLocal
154        BucketedLayout
155        gCacheInvDir
156        (repoDir </> darcsdir)
157
158-- | Return a list of the patch filenames, extracted from inventory
159-- files, by starting with the head inventory and then following the
160-- chain of parent inventories.
161--
162-- This function does not attempt to download missing inventory files.
163--
164-- * The first argument specifies whether the files are expected
165--   to be stored in plain or in bucketed format.
166-- * The second argument is the directory of the parent inventory.
167-- * The third argument is the directory of the head inventory.
168listPatchesLocal :: DirLayout -> String -> String -> IO [String]
169listPatchesLocal dirformat baseDir startDir = do
170  inventory <- readInventory (startDir </> hashedInventory)
171  followStartingWiths
172    (inventoryParent inventory)
173    (inventoryPatchNames inventory)
174  where
175    invDir = baseDir </> inventoriesDir
176    nextDir dir =
177      case dirformat of
178        BucketedLayout -> invDir </> bucketFolder dir
179        PlainLayout -> invDir
180    followStartingWiths Nothing patches = return patches
181    followStartingWiths (Just hash) patches = do
182      let startingWith = getValidHash hash
183      inv <- readInventoryLocal (nextDir startingWith </> startingWith)
184      (patches ++) <$>
185        followStartingWiths (inventoryParent inv) (inventoryPatchNames inv)
186
187-- |listPatchesLocalBucketed is similar to listPatchesLocal, but
188-- it read the inventory directory under @darcsDir@ in bucketed format.
189listPatchesLocalBucketed :: String -> String -> IO [String]
190listPatchesLocalBucketed = listPatchesLocal BucketedLayout
191
192-- | Read the given inventory file if it exist, otherwise return an empty
193-- inventory. Used when we expect that some inventory files may be missing.
194-- Still fails with an error message if file cannot be parsed.
195readInventoryLocal :: FilePath -> IO Inventory
196readInventoryLocal path =
197  ifDoesNotExistError emptyInventory $ readInventory path
198
199-- | Read an inventory from a file. Fails with an error message if
200-- file is not there or cannot be parsed.
201readInventory :: FilePath -> IO Inventory
202readInventory path = do
203  -- FIXME we should check the hash (if this is a hashed file)
204  inv <- skipPristineHash <$> gzReadFilePS path
205  case parseInventory inv of
206    Right r -> return r
207    Left e -> fail $ unlines [unwords ["parse error in file", path], e]
208