1--  Copyright (C) 2009-2011 Petr Rockai
2--
3--  BSD3
4{-# LANGUAGE MultiParamTypeClasses #-}
5
6-- | The abstract representation of a Tree and useful abstract utilities to
7-- handle those.
8module Darcs.Util.Tree
9    ( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..)
10    , makeTree, makeTreeWithHash, emptyTree, emptyBlob, makeBlob, makeBlobBS
11
12    -- * Unfolding stubbed (lazy) Trees.
13    --
14    -- | By default, Tree obtained by a read function is stubbed: it will
15    -- contain Stub items that need to be executed in order to access the
16    -- respective subtrees. 'expand' will produce an unstubbed Tree.
17    , expandUpdate, expand, expandPath, checkExpand
18
19    -- * Tree access and lookup.
20    , items, list, listImmediate, treeHash
21    , lookup, find, findFile, findTree, itemHash, itemType
22    , zipCommonFiles, zipFiles, zipTrees, diffTrees
23    , explodePath, explodePaths
24
25    -- * Files (Blobs).
26    , readBlob
27
28    -- * Filtering trees.
29    , FilterTree(..), restrict
30
31    -- * Manipulating trees.
32    , modifyTree, updateTree, partiallyUpdateTree, updateSubtrees, overlay
33    , addMissingHashes
34
35    -- * Properties
36    , prop_explodePath
37    ) where
38
39import Darcs.Prelude hiding ( filter )
40import qualified Prelude ( filter )
41
42import Control.Exception( catch, IOException )
43import Darcs.Util.Path
44import Darcs.Util.Hash
45
46import qualified Data.ByteString.Lazy as BL
47import qualified Data.ByteString as B
48import qualified Data.Map as M
49
50import Data.Maybe( catMaybes, isNothing )
51import Data.Either( lefts, rights )
52import Data.List( union, sort )
53import Control.Monad( filterM )
54
55--------------------------------
56-- Tree, Blob and friends
57--
58
59data Blob m = Blob !(m BL.ByteString) !Hash
60data TreeItem m = File !(Blob m)
61                | SubTree !(Tree m)
62                | Stub !(m (Tree m)) !Hash
63
64data ItemType = TreeType | BlobType deriving (Show, Eq, Ord)
65
66-- | Abstraction of a filesystem tree.
67-- Please note that the Tree returned by the respective read operations will
68-- have TreeStub items in it. To obtain a Tree without such stubs, call
69-- expand on it, eg.:
70--
71-- > tree <- readDarcsPristine "." >>= expand
72--
73-- When a Tree is expanded, it becomes \"final\". All stubs are forced and the
74-- Tree can be traversed purely. Access to actual file contents stays in IO
75-- though.
76--
77-- A Tree may have a Hash associated with it. A pair of Tree's is identical
78-- whenever their hashes are (the reverse need not hold, since not all Trees
79-- come equipped with a hash).
80data Tree m = Tree { items :: M.Map Name (TreeItem m)
81                   -- | Get hash of a Tree. This is guaranteed to uniquely
82                   -- identify the Tree (including any blob content), as far as
83                   -- cryptographic hashes are concerned. Sha256 is recommended.
84                   , treeHash :: !Hash }
85
86listImmediate :: Tree m -> [(Name, TreeItem m)]
87listImmediate = M.toList . items
88
89-- | Get a hash of a TreeItem. May be Nothing.
90itemHash :: TreeItem m -> Hash
91itemHash (File (Blob _ h)) = h
92itemHash (SubTree t) = treeHash t
93itemHash (Stub _ h) = h
94
95itemType :: TreeItem m -> ItemType
96itemType (File _) = BlobType
97itemType (SubTree _) = TreeType
98itemType (Stub _ _) = TreeType
99
100emptyTree :: Tree m
101emptyTree = Tree { items = M.empty
102                 , treeHash = NoHash }
103
104emptyBlob :: (Monad m) => Blob m
105emptyBlob = Blob (return BL.empty) NoHash
106
107makeBlob :: (Monad m) => BL.ByteString -> Blob m
108makeBlob str = Blob (return str) (sha256 str)
109
110makeBlobBS :: (Monad m) => B.ByteString -> Blob m
111makeBlobBS s' = let s = BL.fromChunks [s'] in Blob (return s) (sha256 s)
112
113makeTree :: [(Name,TreeItem m)] -> Tree m
114makeTree l = Tree { items = M.fromList l
115                  , treeHash = NoHash }
116
117makeTreeWithHash :: [(Name,TreeItem m)] -> Hash -> Tree m
118makeTreeWithHash l h = Tree { items = M.fromList l
119                            , treeHash = h }
120
121-----------------------------------
122-- Tree access and lookup
123--
124
125-- | Look up a 'Tree' item (an immediate subtree or blob).
126lookup :: Tree m -> Name -> Maybe (TreeItem m)
127lookup t n = M.lookup n (items t)
128
129find' :: TreeItem m -> AnchoredPath -> Maybe (TreeItem m)
130find' t (AnchoredPath []) = Just t
131find' (SubTree t) (AnchoredPath (d : rest)) =
132    case lookup t d of
133      Just sub -> find' sub (AnchoredPath rest)
134      Nothing -> Nothing
135find' _ _ = Nothing
136
137-- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid.
138find :: Tree m -> AnchoredPath -> Maybe (TreeItem m)
139find = find' . SubTree
140
141-- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does
142-- not point to a Blob.
143findFile :: Tree m -> AnchoredPath -> Maybe (Blob m)
144findFile t p = case find t p of
145                 Just (File x) -> Just x
146                 _ -> Nothing
147
148-- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does
149-- not point to a Tree.
150findTree :: Tree m -> AnchoredPath -> Maybe (Tree m)
151findTree t p = case find t p of
152                 Just (SubTree x) -> Just x
153                 _ -> Nothing
154
155-- | List all contents of a 'Tree'.
156list :: Tree m -> [(AnchoredPath, TreeItem m)]
157list t_ = paths t_ (AnchoredPath [])
158    where paths t p = [ (appendPath p n, i)
159                          | (n,i) <- listImmediate t ] ++
160                    concat [ paths subt (appendPath p subn)
161                             | (subn, SubTree subt) <- listImmediate t ]
162
163-- | Like 'explodePath' but for multiple paths.
164explodePaths :: Tree IO -> [AnchoredPath] -> [AnchoredPath]
165explodePaths tree paths = concatMap (explodePath tree) paths
166
167-- | All paths in the tree that that have the given path as prefix.
168--
169-- prop> explodePath t p == Prelude.filter (p `isPrefix`) (map fst (list t))
170explodePath :: Tree m -> AnchoredPath -> [AnchoredPath]
171explodePath tree path =
172  path : maybe [] (map (catPaths path . fst) . list) (findTree tree path)
173
174expandUpdate :: (Monad m) => (AnchoredPath -> Tree m -> m (Tree m)) -> Tree m -> m (Tree m)
175expandUpdate update t_ = go (AnchoredPath []) t_
176    where go path t = do
177            let subtree (name, sub) = do tree <- go (path `appendPath` name) =<< unstub sub
178                                         return (name, SubTree tree)
179            expanded <- mapM subtree [ x | x@(_, item) <- listImmediate t, isSub item ]
180            let orig_map = M.filter (not . isSub) (items t)
181                expanded_map = M.fromList expanded
182                tree = t { items = M.union orig_map expanded_map }
183            update path tree
184
185-- | Expand a stubbed Tree into a one with no stubs in it. You might want to
186-- filter the tree before expanding to save IO. This is the basic
187-- implementation, which may be overriden by some Tree instances (this is
188-- especially true of the Index case).
189expand :: (Monad m) => Tree m -> m (Tree m)
190expand = expandUpdate $ const return
191
192-- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is
193-- reachable without crossing any stubs. Moreover, the leaf ought not be a Stub
194-- in the resulting Tree. A non-existent path is expanded as far as it can be.
195expandPath :: (Monad m) => Tree m -> AnchoredPath -> m (Tree m)
196expandPath t (AnchoredPath []) = return t
197expandPath t (AnchoredPath (n:rest)) =
198  case lookup t n of
199    (Just item) | isSub item -> amend t n rest =<< unstub item
200    _ -> return t -- fail $ "Descent error in expandPath: " ++ show path_
201    where
202          amend t' name rest' sub = do
203            sub' <- expandPath sub (AnchoredPath rest')
204            let tree = t' { items = M.insert name (SubTree sub') (items t') }
205            return tree
206
207-- | Check the disk version of a Tree: expands it, and checks each
208-- hash. Returns either the expanded tree or a list of AnchoredPaths
209-- where there are problems. The first argument is the hashing function
210-- used to create the tree.
211checkExpand :: (TreeItem IO -> IO Hash) -> Tree IO
212            -> IO (Either [(AnchoredPath, Hash, Maybe Hash)] (Tree IO))
213checkExpand hashFunc t = go (AnchoredPath []) t
214    where
215      go path t_ = do
216        let
217            subtree (name, sub) =
218                do let here = path `appendPath` name
219                   sub' <- (Just <$> unstub sub) `catch` \(_ :: IOException) -> return Nothing
220                   case sub' of
221                     Nothing -> return $ Left [(here, treeHash t_, Nothing)]
222                     Just sub'' -> do
223                       treeOrTrouble <- go (path `appendPath` name) sub''
224                       return $ case treeOrTrouble of
225                              Left problems -> Left problems
226                              Right tree -> Right (name, SubTree tree)
227            badBlob (_, f@(File (Blob _ h))) =
228              fmap (/= h) (hashFunc f `catch` (\(_ :: IOException) -> return NoHash))
229            badBlob _ = return False
230            render (name, f@(File (Blob _ h))) = do
231              h' <- (Just <$> hashFunc f) `catch` \(_ :: IOException) -> return Nothing
232              return (path `appendPath` name, h, h')
233            render (name, _) = return (path `appendPath` name, NoHash, Nothing)
234        subs <- mapM subtree [ x | x@(_, item) <- listImmediate t_, isSub item ]
235        badBlobs <- filterM badBlob (listImmediate t) >>= mapM render
236        let problems = badBlobs ++ concat (lefts subs)
237        if null problems
238         then do
239           let orig_map = M.filter (not . isSub) (items t)
240               expanded_map = M.fromList $ rights subs
241               tree = t_ {items = orig_map `M.union` expanded_map}
242           h' <- hashFunc (SubTree t_)
243           if h' `match` treeHash t_
244            then return $ Right tree
245            else return $ Left [(path, treeHash t_, Just h')]
246         else return $ Left problems
247
248class (Monad m) => FilterTree a m where
249    -- | Given @pred tree@, produce a 'Tree' that only has items for which
250    -- @pred@ returns @True@.
251    -- The tree might contain stubs. When expanded, these will be subject to
252    -- filtering as well.
253    filter :: (AnchoredPath -> TreeItem m -> Bool) -> a m -> a m
254
255instance (Monad m) => FilterTree Tree m where
256    filter predicate t_ = filter' t_ (AnchoredPath [])
257        where filter' t path = t { items = M.mapMaybeWithKey (wibble path) $ items t }
258              wibble path name item =
259                  let npath = path `appendPath` name in
260                      if predicate npath item
261                         then Just $ filterSub npath item
262                         else Nothing
263              filterSub npath (SubTree t) = SubTree $ filter' t npath
264              filterSub npath (Stub stub h) =
265                  Stub (do x <- stub
266                           return $ filter' x npath) h
267              filterSub _ x = x
268
269-- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a
270-- identical to @tree@, but only has those items that are present in both
271-- @tree@ and @guide@. The @guide@ Tree may not contain any stubs.
272restrict :: (FilterTree t m) => Tree n -> t m -> t m
273restrict guide tree = filter accept tree
274    where accept path item =
275              case (find guide path, item) of
276                (Just (SubTree _), SubTree _) -> True
277                (Just (SubTree _), Stub _ _) -> True
278                (Just (File _), File _) -> True
279                (Just (Stub _ _), _) ->
280                    error "*sulk* Go away, you, you precondition violator!"
281                (_, _) -> False
282
283-- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with
284-- care.
285readBlob :: Blob m -> m BL.ByteString
286readBlob (Blob r _) = r
287
288-- | For every pair of corresponding blobs from the two supplied trees,
289-- evaluate the supplied function and accumulate the results in a list. Hint:
290-- to get IO actions through, just use sequence on the resulting list.
291-- NB. This won't expand any stubs.
292zipCommonFiles :: (AnchoredPath -> Blob m -> Blob m -> a) -> Tree m -> Tree m -> [a]
293zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p
294                                   | (p, File x) <- list b ]
295
296-- | For each file in each of the two supplied trees, evaluate the supplied
297-- function (supplying the corresponding file from the other tree, or Nothing)
298-- and accumulate the results in a list. Hint: to get IO actions through, just
299-- use sequence on the resulting list.  NB. This won't expand any stubs.
300zipFiles :: (AnchoredPath -> Maybe (Blob m) -> Maybe (Blob m) -> a)
301         -> Tree m -> Tree m -> [a]
302zipFiles f a b = [ f p (findFile a p) (findFile b p)
303                   | p <- paths a `sortedUnion` paths b ]
304    where paths t = sort [ p | (p, File _) <- list t ]
305
306zipTrees :: (AnchoredPath -> Maybe (TreeItem m) -> Maybe (TreeItem m) -> a)
307         -> Tree m -> Tree m -> [a]
308zipTrees f a b = [ f p (find a p) (find b p)
309                   | p <- reverse (paths a `sortedUnion` paths b) ]
310    where paths t = sort [ p | (p, _) <- list t ]
311
312-- | Helper function for taking the union of AnchoredPath lists that
313-- are already sorted.  This function does not check the precondition
314-- so use it carefully.
315sortedUnion :: [AnchoredPath] -> [AnchoredPath] -> [AnchoredPath]
316sortedUnion [] ys = ys
317sortedUnion xs [] = xs
318sortedUnion a@(x:xs) b@(y:ys) = case compare x y of
319                                LT -> x : sortedUnion xs b
320                                EQ -> x : sortedUnion xs ys
321                                GT -> y : sortedUnion a ys
322
323-- | Cautiously extracts differing subtrees from a pair of Trees. It will never
324-- do any unneccessary expanding. Tree hashes are used to cut the comparison as
325-- high up the Tree branches as possible. The result is a pair of trees that do
326-- not share any identical subtrees. They are derived from the first and second
327-- parameters respectively and they are always fully expanded. It might be
328-- advantageous to feed the result into 'zipFiles' or 'zipTrees'.
329diffTrees :: forall m. (Monad m) => Tree m -> Tree m -> m (Tree m, Tree m)
330diffTrees left right =
331            if treeHash left `match` treeHash right
332               then return (emptyTree, emptyTree)
333               else diff left right
334  where isFile (File _) = True
335        isFile _ = False
336        notFile = not . isFile
337        isEmpty = null . listImmediate
338        subtree :: TreeItem m -> m (Tree m)
339        subtree (Stub x _) = x
340        subtree (SubTree x) = return x
341        subtree (File _) = error "diffTrees tried to descend a File as a subtree"
342        maybeUnfold (Stub x _) = SubTree `fmap` (x >>= expand)
343        maybeUnfold (SubTree x) = SubTree `fmap` expand x
344        maybeUnfold i = return i
345        immediateN t = [ n | (n, _) <- listImmediate t ]
346        diff left' right' = do
347          is <- sequence [
348                   case (lookup left' n, lookup right' n) of
349                     (Just l, Nothing) -> do
350                       l' <- maybeUnfold l
351                       return (n, Just l', Nothing)
352                     (Nothing, Just r) -> do
353                       r' <- maybeUnfold r
354                       return (n, Nothing, Just r')
355                     (Just l, Just r)
356                         | itemHash l `match` itemHash r ->
357                             return (n, Nothing, Nothing)
358                         | notFile l && notFile r ->
359                             do x <- subtree l
360                                y <- subtree r
361                                (x', y') <- diffTrees x y
362                                if isEmpty x' && isEmpty y'
363                                   then return (n, Nothing, Nothing)
364                                   else return (n, Just $ SubTree x', Just $ SubTree y')
365                         | isFile l && isFile r ->
366                             return (n, Just l, Just r)
367                         | otherwise ->
368                             do l' <- maybeUnfold l
369                                r' <- maybeUnfold r
370                                return (n, Just l', Just r')
371                     _ -> error "n lookups failed"
372                   | n <- immediateN left' `union` immediateN right' ]
373          let is_l = [ (n, l) | (n, Just l, _) <- is ]
374              is_r = [ (n, r) | (n, _, Just r) <- is ]
375          return (makeTree is_l, makeTree is_r)
376
377-- | Modify a Tree (by replacing, or removing or adding items).
378modifyTree :: (Monad m) => Tree m -> AnchoredPath -> Maybe (TreeItem m) -> Tree m
379modifyTree t_ p_ i_ = snd $ go t_ p_ i_
380  where fix t unmod items' = (unmod, t { items = (countmap items':: Int) `seq` items'
381                                       , treeHash = if unmod then treeHash t else NoHash })
382
383        go t (AnchoredPath []) (Just (SubTree sub)) = (treeHash t `match` treeHash sub, sub)
384
385        go t (AnchoredPath [n]) (Just item) = fix t unmod items'
386            where !items' = M.insert n item (items t)
387                  !unmod = itemHash item `match` case lookup t n of
388                                             Nothing -> NoHash
389                                             Just i -> itemHash i
390
391        go t (AnchoredPath [n]) Nothing = fix t unmod items'
392            where !items' = M.delete n (items t)
393                  !unmod = isNothing $ lookup t n
394
395        go t path@(AnchoredPath (n:r)) item = fix t unmod items'
396            where subtree s = go s (AnchoredPath r) item
397                  !items' = M.insert n sub (items t)
398                  !sub = snd sub'
399                  !unmod = fst sub'
400                  !sub' = case lookup t n of
401                    Just (SubTree s) -> let (mod', sub'') = subtree s in (mod', SubTree sub'')
402                    Just (Stub s _) -> (False, Stub (do x <- s
403                                                        return $! snd $! subtree x) NoHash)
404                    Nothing -> (False, SubTree $! snd $! subtree emptyTree)
405                    _ -> error $ "Modify tree at " ++ show path
406
407        go _ (AnchoredPath []) (Just (Stub _ _)) =
408            error $ "descending in modifyTree, case = (Just (Stub _ _)), path = " ++ show p_
409        go _ (AnchoredPath []) (Just (File _)) =
410            error $ "descending in modifyTree, case = (Just (File _)), path = " ++ show p_
411        go _ (AnchoredPath []) Nothing =
412            error $ "descending in modifyTree, case = Nothing, path = " ++ show p_
413
414countmap :: forall a k. M.Map k a -> Int
415countmap = M.foldr (\_ i -> i + 1) 0
416
417updateSubtrees :: (Tree m -> Tree m) -> Tree m -> Tree m
418updateSubtrees fun t =
419    fun $ t { items = M.mapWithKey (curry $ snd . update) $ items t
420            , treeHash = NoHash }
421  where update (k, SubTree s) = (k, SubTree $ updateSubtrees fun s)
422        update (k, File f) = (k, File f)
423        update (_, Stub _ _) = error "Stubs not supported in updateTreePostorder"
424
425-- | Does /not/ expand the tree.
426updateTree :: (Monad m) => (TreeItem m -> m (TreeItem m)) -> Tree m -> m (Tree m)
427updateTree fun t = partiallyUpdateTree fun (\_ _ -> True) t
428
429-- | Does /not/ expand the tree.
430partiallyUpdateTree :: (Monad m) => (TreeItem m -> m (TreeItem m))
431                       -> (AnchoredPath -> TreeItem m -> Bool) -> Tree m -> m (Tree m)
432partiallyUpdateTree fun predi t' = go (AnchoredPath []) t'
433  where go path t = do
434          items' <- M.fromList <$> mapM (maybeupdate path) (listImmediate t)
435          subtree <- fun . SubTree $ t { items = items'
436                                       , treeHash = NoHash }
437          case subtree of
438            SubTree t'' -> return t''
439            _ -> error "function passed to partiallyUpdateTree changed SubTree to something else"
440        maybeupdate path (k, item) = if predi (path `appendPath` k) item
441          then update (path `appendPath` k) (k, item)
442          else return (k, item)
443        update path (k, SubTree tree) = (\new -> (k, SubTree new)) <$> go path tree
444        update    _ (k, item) = (\new -> (k, new)) <$> fun item
445
446-- | Lay one tree over another. The resulting Tree will look like the base (1st
447-- parameter) Tree, although any items also present in the overlay Tree will be
448-- taken from the overlay. It is not allowed to overlay a different kind of an
449-- object, nor it is allowed for the overlay to add new objects to base.  This
450-- means that the overlay Tree should be a subset of the base Tree (although
451-- any extraneous items will be ignored by the implementation).
452overlay :: (Monad m) => Tree m -> Tree m -> Tree m
453overlay base over = Tree { items = M.fromList immediate
454                         , treeHash = NoHash }
455    where immediate = [ (n, get n) | (n, _) <- listImmediate base ]
456          get n = case (M.lookup n $ items base, M.lookup n $ items over) of
457                    (Just (File _), Just f@(File _)) -> f
458                    (Just (SubTree b), Just (SubTree o)) -> SubTree $ overlay b o
459                    (Just (Stub b _), Just (SubTree o)) -> Stub (flip overlay o `fmap` b) NoHash
460                    (Just (SubTree b), Just (Stub o _)) -> Stub (overlay b `fmap` o) NoHash
461                    (Just (Stub b _), Just (Stub o _)) -> Stub (do o' <- o
462                                                                   b' <- b
463                                                                   return $ overlay b' o') NoHash
464                    (Just x, _) -> x
465                    (_, _) -> error $ "Unexpected case in overlay at get " ++ show n ++ "."
466
467addMissingHashes :: (Monad m) => (TreeItem m -> m Hash) -> Tree m -> m (Tree m)
468addMissingHashes make = updateTree update -- use partiallyUpdateTree here
469    where update (SubTree t) = make (SubTree t) >>= \x -> return $ SubTree (t { treeHash = x })
470          update (File blob@(Blob con NoHash)) =
471              do hash <- make $ File blob
472                 return $ File (Blob con hash)
473          update (Stub s NoHash) = update . SubTree =<< s
474          update x = return x
475
476------ Private utilities shared among multiple functions. --------
477
478unstub :: (Monad m) => TreeItem m -> m (Tree m)
479unstub (Stub s _) = s
480unstub (SubTree s) = return s
481unstub _ = return emptyTree
482
483isSub :: TreeItem m -> Bool
484isSub (File _) = False
485isSub _ = True
486
487-- Properties
488
489-- | Specification of 'explodePath'
490prop_explodePath :: Tree m -> AnchoredPath -> Bool
491prop_explodePath t p =
492  explodePath t p == Prelude.filter (isPrefix p) (map fst (list t))
493