1-- Copyright (C) 2009-2011 Petr Rockai 2-- 3-- BSD3 4{-# LANGUAGE MultiParamTypeClasses #-} 5 6-- | The abstract representation of a Tree and useful abstract utilities to 7-- handle those. 8module Darcs.Util.Tree 9 ( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..) 10 , makeTree, makeTreeWithHash, emptyTree, emptyBlob, makeBlob, makeBlobBS 11 12 -- * Unfolding stubbed (lazy) Trees. 13 -- 14 -- | By default, Tree obtained by a read function is stubbed: it will 15 -- contain Stub items that need to be executed in order to access the 16 -- respective subtrees. 'expand' will produce an unstubbed Tree. 17 , expandUpdate, expand, expandPath, checkExpand 18 19 -- * Tree access and lookup. 20 , items, list, listImmediate, treeHash 21 , lookup, find, findFile, findTree, itemHash, itemType 22 , zipCommonFiles, zipFiles, zipTrees, diffTrees 23 , explodePath, explodePaths 24 25 -- * Files (Blobs). 26 , readBlob 27 28 -- * Filtering trees. 29 , FilterTree(..), restrict 30 31 -- * Manipulating trees. 32 , modifyTree, updateTree, partiallyUpdateTree, updateSubtrees, overlay 33 , addMissingHashes 34 35 -- * Properties 36 , prop_explodePath 37 ) where 38 39import Darcs.Prelude hiding ( filter ) 40import qualified Prelude ( filter ) 41 42import Control.Exception( catch, IOException ) 43import Darcs.Util.Path 44import Darcs.Util.Hash 45 46import qualified Data.ByteString.Lazy as BL 47import qualified Data.ByteString as B 48import qualified Data.Map as M 49 50import Data.Maybe( catMaybes, isNothing ) 51import Data.Either( lefts, rights ) 52import Data.List( union, sort ) 53import Control.Monad( filterM ) 54 55-------------------------------- 56-- Tree, Blob and friends 57-- 58 59data Blob m = Blob !(m BL.ByteString) !Hash 60data TreeItem m = File !(Blob m) 61 | SubTree !(Tree m) 62 | Stub !(m (Tree m)) !Hash 63 64data ItemType = TreeType | BlobType deriving (Show, Eq, Ord) 65 66-- | Abstraction of a filesystem tree. 67-- Please note that the Tree returned by the respective read operations will 68-- have TreeStub items in it. To obtain a Tree without such stubs, call 69-- expand on it, eg.: 70-- 71-- > tree <- readDarcsPristine "." >>= expand 72-- 73-- When a Tree is expanded, it becomes \"final\". All stubs are forced and the 74-- Tree can be traversed purely. Access to actual file contents stays in IO 75-- though. 76-- 77-- A Tree may have a Hash associated with it. A pair of Tree's is identical 78-- whenever their hashes are (the reverse need not hold, since not all Trees 79-- come equipped with a hash). 80data Tree m = Tree { items :: M.Map Name (TreeItem m) 81 -- | Get hash of a Tree. This is guaranteed to uniquely 82 -- identify the Tree (including any blob content), as far as 83 -- cryptographic hashes are concerned. Sha256 is recommended. 84 , treeHash :: !Hash } 85 86listImmediate :: Tree m -> [(Name, TreeItem m)] 87listImmediate = M.toList . items 88 89-- | Get a hash of a TreeItem. May be Nothing. 90itemHash :: TreeItem m -> Hash 91itemHash (File (Blob _ h)) = h 92itemHash (SubTree t) = treeHash t 93itemHash (Stub _ h) = h 94 95itemType :: TreeItem m -> ItemType 96itemType (File _) = BlobType 97itemType (SubTree _) = TreeType 98itemType (Stub _ _) = TreeType 99 100emptyTree :: Tree m 101emptyTree = Tree { items = M.empty 102 , treeHash = NoHash } 103 104emptyBlob :: (Monad m) => Blob m 105emptyBlob = Blob (return BL.empty) NoHash 106 107makeBlob :: (Monad m) => BL.ByteString -> Blob m 108makeBlob str = Blob (return str) (sha256 str) 109 110makeBlobBS :: (Monad m) => B.ByteString -> Blob m 111makeBlobBS s' = let s = BL.fromChunks [s'] in Blob (return s) (sha256 s) 112 113makeTree :: [(Name,TreeItem m)] -> Tree m 114makeTree l = Tree { items = M.fromList l 115 , treeHash = NoHash } 116 117makeTreeWithHash :: [(Name,TreeItem m)] -> Hash -> Tree m 118makeTreeWithHash l h = Tree { items = M.fromList l 119 , treeHash = h } 120 121----------------------------------- 122-- Tree access and lookup 123-- 124 125-- | Look up a 'Tree' item (an immediate subtree or blob). 126lookup :: Tree m -> Name -> Maybe (TreeItem m) 127lookup t n = M.lookup n (items t) 128 129find' :: TreeItem m -> AnchoredPath -> Maybe (TreeItem m) 130find' t (AnchoredPath []) = Just t 131find' (SubTree t) (AnchoredPath (d : rest)) = 132 case lookup t d of 133 Just sub -> find' sub (AnchoredPath rest) 134 Nothing -> Nothing 135find' _ _ = Nothing 136 137-- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid. 138find :: Tree m -> AnchoredPath -> Maybe (TreeItem m) 139find = find' . SubTree 140 141-- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does 142-- not point to a Blob. 143findFile :: Tree m -> AnchoredPath -> Maybe (Blob m) 144findFile t p = case find t p of 145 Just (File x) -> Just x 146 _ -> Nothing 147 148-- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does 149-- not point to a Tree. 150findTree :: Tree m -> AnchoredPath -> Maybe (Tree m) 151findTree t p = case find t p of 152 Just (SubTree x) -> Just x 153 _ -> Nothing 154 155-- | List all contents of a 'Tree'. 156list :: Tree m -> [(AnchoredPath, TreeItem m)] 157list t_ = paths t_ (AnchoredPath []) 158 where paths t p = [ (appendPath p n, i) 159 | (n,i) <- listImmediate t ] ++ 160 concat [ paths subt (appendPath p subn) 161 | (subn, SubTree subt) <- listImmediate t ] 162 163-- | Like 'explodePath' but for multiple paths. 164explodePaths :: Tree IO -> [AnchoredPath] -> [AnchoredPath] 165explodePaths tree paths = concatMap (explodePath tree) paths 166 167-- | All paths in the tree that that have the given path as prefix. 168-- 169-- prop> explodePath t p == Prelude.filter (p `isPrefix`) (map fst (list t)) 170explodePath :: Tree m -> AnchoredPath -> [AnchoredPath] 171explodePath tree path = 172 path : maybe [] (map (catPaths path . fst) . list) (findTree tree path) 173 174expandUpdate :: (Monad m) => (AnchoredPath -> Tree m -> m (Tree m)) -> Tree m -> m (Tree m) 175expandUpdate update t_ = go (AnchoredPath []) t_ 176 where go path t = do 177 let subtree (name, sub) = do tree <- go (path `appendPath` name) =<< unstub sub 178 return (name, SubTree tree) 179 expanded <- mapM subtree [ x | x@(_, item) <- listImmediate t, isSub item ] 180 let orig_map = M.filter (not . isSub) (items t) 181 expanded_map = M.fromList expanded 182 tree = t { items = M.union orig_map expanded_map } 183 update path tree 184 185-- | Expand a stubbed Tree into a one with no stubs in it. You might want to 186-- filter the tree before expanding to save IO. This is the basic 187-- implementation, which may be overriden by some Tree instances (this is 188-- especially true of the Index case). 189expand :: (Monad m) => Tree m -> m (Tree m) 190expand = expandUpdate $ const return 191 192-- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is 193-- reachable without crossing any stubs. Moreover, the leaf ought not be a Stub 194-- in the resulting Tree. A non-existent path is expanded as far as it can be. 195expandPath :: (Monad m) => Tree m -> AnchoredPath -> m (Tree m) 196expandPath t (AnchoredPath []) = return t 197expandPath t (AnchoredPath (n:rest)) = 198 case lookup t n of 199 (Just item) | isSub item -> amend t n rest =<< unstub item 200 _ -> return t -- fail $ "Descent error in expandPath: " ++ show path_ 201 where 202 amend t' name rest' sub = do 203 sub' <- expandPath sub (AnchoredPath rest') 204 let tree = t' { items = M.insert name (SubTree sub') (items t') } 205 return tree 206 207-- | Check the disk version of a Tree: expands it, and checks each 208-- hash. Returns either the expanded tree or a list of AnchoredPaths 209-- where there are problems. The first argument is the hashing function 210-- used to create the tree. 211checkExpand :: (TreeItem IO -> IO Hash) -> Tree IO 212 -> IO (Either [(AnchoredPath, Hash, Maybe Hash)] (Tree IO)) 213checkExpand hashFunc t = go (AnchoredPath []) t 214 where 215 go path t_ = do 216 let 217 subtree (name, sub) = 218 do let here = path `appendPath` name 219 sub' <- (Just <$> unstub sub) `catch` \(_ :: IOException) -> return Nothing 220 case sub' of 221 Nothing -> return $ Left [(here, treeHash t_, Nothing)] 222 Just sub'' -> do 223 treeOrTrouble <- go (path `appendPath` name) sub'' 224 return $ case treeOrTrouble of 225 Left problems -> Left problems 226 Right tree -> Right (name, SubTree tree) 227 badBlob (_, f@(File (Blob _ h))) = 228 fmap (/= h) (hashFunc f `catch` (\(_ :: IOException) -> return NoHash)) 229 badBlob _ = return False 230 render (name, f@(File (Blob _ h))) = do 231 h' <- (Just <$> hashFunc f) `catch` \(_ :: IOException) -> return Nothing 232 return (path `appendPath` name, h, h') 233 render (name, _) = return (path `appendPath` name, NoHash, Nothing) 234 subs <- mapM subtree [ x | x@(_, item) <- listImmediate t_, isSub item ] 235 badBlobs <- filterM badBlob (listImmediate t) >>= mapM render 236 let problems = badBlobs ++ concat (lefts subs) 237 if null problems 238 then do 239 let orig_map = M.filter (not . isSub) (items t) 240 expanded_map = M.fromList $ rights subs 241 tree = t_ {items = orig_map `M.union` expanded_map} 242 h' <- hashFunc (SubTree t_) 243 if h' `match` treeHash t_ 244 then return $ Right tree 245 else return $ Left [(path, treeHash t_, Just h')] 246 else return $ Left problems 247 248class (Monad m) => FilterTree a m where 249 -- | Given @pred tree@, produce a 'Tree' that only has items for which 250 -- @pred@ returns @True@. 251 -- The tree might contain stubs. When expanded, these will be subject to 252 -- filtering as well. 253 filter :: (AnchoredPath -> TreeItem m -> Bool) -> a m -> a m 254 255instance (Monad m) => FilterTree Tree m where 256 filter predicate t_ = filter' t_ (AnchoredPath []) 257 where filter' t path = t { items = M.mapMaybeWithKey (wibble path) $ items t } 258 wibble path name item = 259 let npath = path `appendPath` name in 260 if predicate npath item 261 then Just $ filterSub npath item 262 else Nothing 263 filterSub npath (SubTree t) = SubTree $ filter' t npath 264 filterSub npath (Stub stub h) = 265 Stub (do x <- stub 266 return $ filter' x npath) h 267 filterSub _ x = x 268 269-- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a 270-- identical to @tree@, but only has those items that are present in both 271-- @tree@ and @guide@. The @guide@ Tree may not contain any stubs. 272restrict :: (FilterTree t m) => Tree n -> t m -> t m 273restrict guide tree = filter accept tree 274 where accept path item = 275 case (find guide path, item) of 276 (Just (SubTree _), SubTree _) -> True 277 (Just (SubTree _), Stub _ _) -> True 278 (Just (File _), File _) -> True 279 (Just (Stub _ _), _) -> 280 error "*sulk* Go away, you, you precondition violator!" 281 (_, _) -> False 282 283-- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with 284-- care. 285readBlob :: Blob m -> m BL.ByteString 286readBlob (Blob r _) = r 287 288-- | For every pair of corresponding blobs from the two supplied trees, 289-- evaluate the supplied function and accumulate the results in a list. Hint: 290-- to get IO actions through, just use sequence on the resulting list. 291-- NB. This won't expand any stubs. 292zipCommonFiles :: (AnchoredPath -> Blob m -> Blob m -> a) -> Tree m -> Tree m -> [a] 293zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p 294 | (p, File x) <- list b ] 295 296-- | For each file in each of the two supplied trees, evaluate the supplied 297-- function (supplying the corresponding file from the other tree, or Nothing) 298-- and accumulate the results in a list. Hint: to get IO actions through, just 299-- use sequence on the resulting list. NB. This won't expand any stubs. 300zipFiles :: (AnchoredPath -> Maybe (Blob m) -> Maybe (Blob m) -> a) 301 -> Tree m -> Tree m -> [a] 302zipFiles f a b = [ f p (findFile a p) (findFile b p) 303 | p <- paths a `sortedUnion` paths b ] 304 where paths t = sort [ p | (p, File _) <- list t ] 305 306zipTrees :: (AnchoredPath -> Maybe (TreeItem m) -> Maybe (TreeItem m) -> a) 307 -> Tree m -> Tree m -> [a] 308zipTrees f a b = [ f p (find a p) (find b p) 309 | p <- reverse (paths a `sortedUnion` paths b) ] 310 where paths t = sort [ p | (p, _) <- list t ] 311 312-- | Helper function for taking the union of AnchoredPath lists that 313-- are already sorted. This function does not check the precondition 314-- so use it carefully. 315sortedUnion :: [AnchoredPath] -> [AnchoredPath] -> [AnchoredPath] 316sortedUnion [] ys = ys 317sortedUnion xs [] = xs 318sortedUnion a@(x:xs) b@(y:ys) = case compare x y of 319 LT -> x : sortedUnion xs b 320 EQ -> x : sortedUnion xs ys 321 GT -> y : sortedUnion a ys 322 323-- | Cautiously extracts differing subtrees from a pair of Trees. It will never 324-- do any unneccessary expanding. Tree hashes are used to cut the comparison as 325-- high up the Tree branches as possible. The result is a pair of trees that do 326-- not share any identical subtrees. They are derived from the first and second 327-- parameters respectively and they are always fully expanded. It might be 328-- advantageous to feed the result into 'zipFiles' or 'zipTrees'. 329diffTrees :: forall m. (Monad m) => Tree m -> Tree m -> m (Tree m, Tree m) 330diffTrees left right = 331 if treeHash left `match` treeHash right 332 then return (emptyTree, emptyTree) 333 else diff left right 334 where isFile (File _) = True 335 isFile _ = False 336 notFile = not . isFile 337 isEmpty = null . listImmediate 338 subtree :: TreeItem m -> m (Tree m) 339 subtree (Stub x _) = x 340 subtree (SubTree x) = return x 341 subtree (File _) = error "diffTrees tried to descend a File as a subtree" 342 maybeUnfold (Stub x _) = SubTree `fmap` (x >>= expand) 343 maybeUnfold (SubTree x) = SubTree `fmap` expand x 344 maybeUnfold i = return i 345 immediateN t = [ n | (n, _) <- listImmediate t ] 346 diff left' right' = do 347 is <- sequence [ 348 case (lookup left' n, lookup right' n) of 349 (Just l, Nothing) -> do 350 l' <- maybeUnfold l 351 return (n, Just l', Nothing) 352 (Nothing, Just r) -> do 353 r' <- maybeUnfold r 354 return (n, Nothing, Just r') 355 (Just l, Just r) 356 | itemHash l `match` itemHash r -> 357 return (n, Nothing, Nothing) 358 | notFile l && notFile r -> 359 do x <- subtree l 360 y <- subtree r 361 (x', y') <- diffTrees x y 362 if isEmpty x' && isEmpty y' 363 then return (n, Nothing, Nothing) 364 else return (n, Just $ SubTree x', Just $ SubTree y') 365 | isFile l && isFile r -> 366 return (n, Just l, Just r) 367 | otherwise -> 368 do l' <- maybeUnfold l 369 r' <- maybeUnfold r 370 return (n, Just l', Just r') 371 _ -> error "n lookups failed" 372 | n <- immediateN left' `union` immediateN right' ] 373 let is_l = [ (n, l) | (n, Just l, _) <- is ] 374 is_r = [ (n, r) | (n, _, Just r) <- is ] 375 return (makeTree is_l, makeTree is_r) 376 377-- | Modify a Tree (by replacing, or removing or adding items). 378modifyTree :: (Monad m) => Tree m -> AnchoredPath -> Maybe (TreeItem m) -> Tree m 379modifyTree t_ p_ i_ = snd $ go t_ p_ i_ 380 where fix t unmod items' = (unmod, t { items = (countmap items':: Int) `seq` items' 381 , treeHash = if unmod then treeHash t else NoHash }) 382 383 go t (AnchoredPath []) (Just (SubTree sub)) = (treeHash t `match` treeHash sub, sub) 384 385 go t (AnchoredPath [n]) (Just item) = fix t unmod items' 386 where !items' = M.insert n item (items t) 387 !unmod = itemHash item `match` case lookup t n of 388 Nothing -> NoHash 389 Just i -> itemHash i 390 391 go t (AnchoredPath [n]) Nothing = fix t unmod items' 392 where !items' = M.delete n (items t) 393 !unmod = isNothing $ lookup t n 394 395 go t path@(AnchoredPath (n:r)) item = fix t unmod items' 396 where subtree s = go s (AnchoredPath r) item 397 !items' = M.insert n sub (items t) 398 !sub = snd sub' 399 !unmod = fst sub' 400 !sub' = case lookup t n of 401 Just (SubTree s) -> let (mod', sub'') = subtree s in (mod', SubTree sub'') 402 Just (Stub s _) -> (False, Stub (do x <- s 403 return $! snd $! subtree x) NoHash) 404 Nothing -> (False, SubTree $! snd $! subtree emptyTree) 405 _ -> error $ "Modify tree at " ++ show path 406 407 go _ (AnchoredPath []) (Just (Stub _ _)) = 408 error $ "descending in modifyTree, case = (Just (Stub _ _)), path = " ++ show p_ 409 go _ (AnchoredPath []) (Just (File _)) = 410 error $ "descending in modifyTree, case = (Just (File _)), path = " ++ show p_ 411 go _ (AnchoredPath []) Nothing = 412 error $ "descending in modifyTree, case = Nothing, path = " ++ show p_ 413 414countmap :: forall a k. M.Map k a -> Int 415countmap = M.foldr (\_ i -> i + 1) 0 416 417updateSubtrees :: (Tree m -> Tree m) -> Tree m -> Tree m 418updateSubtrees fun t = 419 fun $ t { items = M.mapWithKey (curry $ snd . update) $ items t 420 , treeHash = NoHash } 421 where update (k, SubTree s) = (k, SubTree $ updateSubtrees fun s) 422 update (k, File f) = (k, File f) 423 update (_, Stub _ _) = error "Stubs not supported in updateTreePostorder" 424 425-- | Does /not/ expand the tree. 426updateTree :: (Monad m) => (TreeItem m -> m (TreeItem m)) -> Tree m -> m (Tree m) 427updateTree fun t = partiallyUpdateTree fun (\_ _ -> True) t 428 429-- | Does /not/ expand the tree. 430partiallyUpdateTree :: (Monad m) => (TreeItem m -> m (TreeItem m)) 431 -> (AnchoredPath -> TreeItem m -> Bool) -> Tree m -> m (Tree m) 432partiallyUpdateTree fun predi t' = go (AnchoredPath []) t' 433 where go path t = do 434 items' <- M.fromList <$> mapM (maybeupdate path) (listImmediate t) 435 subtree <- fun . SubTree $ t { items = items' 436 , treeHash = NoHash } 437 case subtree of 438 SubTree t'' -> return t'' 439 _ -> error "function passed to partiallyUpdateTree changed SubTree to something else" 440 maybeupdate path (k, item) = if predi (path `appendPath` k) item 441 then update (path `appendPath` k) (k, item) 442 else return (k, item) 443 update path (k, SubTree tree) = (\new -> (k, SubTree new)) <$> go path tree 444 update _ (k, item) = (\new -> (k, new)) <$> fun item 445 446-- | Lay one tree over another. The resulting Tree will look like the base (1st 447-- parameter) Tree, although any items also present in the overlay Tree will be 448-- taken from the overlay. It is not allowed to overlay a different kind of an 449-- object, nor it is allowed for the overlay to add new objects to base. This 450-- means that the overlay Tree should be a subset of the base Tree (although 451-- any extraneous items will be ignored by the implementation). 452overlay :: (Monad m) => Tree m -> Tree m -> Tree m 453overlay base over = Tree { items = M.fromList immediate 454 , treeHash = NoHash } 455 where immediate = [ (n, get n) | (n, _) <- listImmediate base ] 456 get n = case (M.lookup n $ items base, M.lookup n $ items over) of 457 (Just (File _), Just f@(File _)) -> f 458 (Just (SubTree b), Just (SubTree o)) -> SubTree $ overlay b o 459 (Just (Stub b _), Just (SubTree o)) -> Stub (flip overlay o `fmap` b) NoHash 460 (Just (SubTree b), Just (Stub o _)) -> Stub (overlay b `fmap` o) NoHash 461 (Just (Stub b _), Just (Stub o _)) -> Stub (do o' <- o 462 b' <- b 463 return $ overlay b' o') NoHash 464 (Just x, _) -> x 465 (_, _) -> error $ "Unexpected case in overlay at get " ++ show n ++ "." 466 467addMissingHashes :: (Monad m) => (TreeItem m -> m Hash) -> Tree m -> m (Tree m) 468addMissingHashes make = updateTree update -- use partiallyUpdateTree here 469 where update (SubTree t) = make (SubTree t) >>= \x -> return $ SubTree (t { treeHash = x }) 470 update (File blob@(Blob con NoHash)) = 471 do hash <- make $ File blob 472 return $ File (Blob con hash) 473 update (Stub s NoHash) = update . SubTree =<< s 474 update x = return x 475 476------ Private utilities shared among multiple functions. -------- 477 478unstub :: (Monad m) => TreeItem m -> m (Tree m) 479unstub (Stub s _) = s 480unstub (SubTree s) = return s 481unstub _ = return emptyTree 482 483isSub :: TreeItem m -> Bool 484isSub (File _) = False 485isSub _ = True 486 487-- Properties 488 489-- | Specification of 'explodePath' 490prop_explodePath :: Tree m -> AnchoredPath -> Bool 491prop_explodePath t p = 492 explodePath t p == Prelude.filter (isPrefix p) (map fst (list t)) 493