1{- git trees
2 -
3 - Copyright 2016-2019 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8{-# LANGUAGE BangPatterns, TypeSynonymInstances, FlexibleInstances #-}
9
10module Git.Tree (
11	Tree(..),
12	TreeContent(..),
13	getTree,
14	recordTree,
15	recordTree',
16	TreeItem(..),
17	treeItemsToTree,
18	treeItemToLsTreeItem,
19	lsTreeItemToTreeItem,
20	adjustTree,
21	graftTree,
22	graftTree',
23	withMkTreeHandle,
24	treeMode,
25) where
26
27import Common
28import Git
29import Git.FilePath
30import Git.Types
31import Git.Command
32import Git.Sha
33import qualified Git.LsTree as LsTree
34import qualified Utility.CoProcess as CoProcess
35
36import Numeric
37import System.Posix.Types
38import Control.Monad.IO.Class
39import qualified Data.Set as S
40import qualified Data.Map as M
41import qualified Data.ByteString.Char8 as S8
42
43newtype Tree = Tree [TreeContent]
44	deriving (Show)
45
46data TreeContent
47	-- A blob object in the tree.
48	= TreeBlob TopFilePath FileMode Sha
49	-- A subtree that is already recorded in git, with a known sha.
50	| RecordedSubTree TopFilePath Sha [TreeContent]
51	-- A subtree that has not yet been recorded in git.
52	| NewSubTree TopFilePath [TreeContent]
53	-- A commit object that is part of a tree (used for submodules)
54	| TreeCommit TopFilePath FileMode Sha
55	deriving (Show, Eq, Ord)
56
57{- Gets the Tree for a Ref. -}
58getTree :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO Tree
59getTree recursive r repo = do
60	(l, cleanup) <- lsTreeWithObjects recursive r repo
61	let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id
62		(extractTree l)
63	void cleanup
64	return t
65
66lsTreeWithObjects :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
67lsTreeWithObjects recursive =
68	LsTree.lsTree' [Param "-t"] recursive (LsTree.LsTreeLong False)
69
70newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle
71
72withMkTreeHandle :: (MonadIO m, MonadMask m) => Repo -> (MkTreeHandle -> m a) -> m a
73withMkTreeHandle repo a = bracketIO setup cleanup (a . MkTreeHandle)
74  where
75	setup = gitCoProcessStart False ps repo
76	ps = [Param "mktree", Param "--batch", Param "-z"]
77	cleanup = CoProcess.stop
78
79{- Records a Tree in the Repo, returning its Sha.
80 -
81 - Efficiently handles subtrees, by only recording ones that have not
82 - already been recorded before. And even when many subtrees need to be
83 - recorded, it's done with a single call to git mktree, using its batch
84 - interface.
85 -}
86recordTree :: Tree -> Repo -> IO Sha
87recordTree t repo = withMkTreeHandle repo $ \h -> recordTree' h t
88
89recordTree' :: MkTreeHandle -> Tree -> IO Sha
90recordTree' h (Tree l) = mkTree h =<< mapM (recordSubTree h) l
91
92{- Note that the returned RecordedSubTree does not have its [TreeContent]
93 - list populated. This is a memory optimisation, since the list is not
94 - used. -}
95recordSubTree :: MkTreeHandle -> TreeContent -> IO TreeContent
96recordSubTree h (NewSubTree d l) = do
97	sha <- mkTree h =<< mapM (recordSubTree h) l
98	return (RecordedSubTree d sha [])
99recordSubTree _ alreadyrecorded = return alreadyrecorded
100
101mkTree :: MkTreeHandle -> [TreeContent] -> IO Sha
102mkTree (MkTreeHandle cp) l = CoProcess.query cp send receive
103  where
104	send h = do
105		forM_ l $ \i ->	hPutStr h $ case i of
106			TreeBlob f fm s -> mkTreeOutput fm BlobObject s f
107			RecordedSubTree f s _ -> mkTreeOutput treeMode TreeObject s f
108			NewSubTree _ _ -> error "recordSubTree internal error; unexpected NewSubTree"
109			TreeCommit f fm s -> mkTreeOutput fm CommitObject s f
110		hPutStr h "\NUL" -- signal end of tree to --batch
111	receive h = getSha "mktree" (S8.hGetLine h)
112
113treeMode :: FileMode
114treeMode = 0o040000
115
116mkTreeOutput :: FileMode -> ObjectType -> Sha -> TopFilePath -> String
117mkTreeOutput fm ot s f = concat
118	[ showOct fm ""
119	, " "
120	, decodeBS (fmtObjectType ot)
121	, " "
122	, fromRef s
123	, "\t"
124	, takeFileName (fromRawFilePath (getTopFilePath f))
125	, "\NUL"
126	]
127
128data TreeItem = TreeItem TopFilePath FileMode Sha
129	deriving (Show, Eq)
130
131treeItemToTreeContent :: TreeItem -> TreeContent
132treeItemToTreeContent (TreeItem f m s) = case toTreeItemType m of
133	Just TreeSubmodule -> TreeCommit f m s
134	_ -> TreeBlob f m s
135
136treeItemToLsTreeItem :: TreeItem -> LsTree.TreeItem
137treeItemToLsTreeItem (TreeItem f mode sha) = LsTree.TreeItem
138	{ LsTree.mode = mode
139	, LsTree.typeobj = fmtObjectType $ case toTreeItemType mode of
140		Just TreeSubmodule -> CommitObject
141		Just TreeSubtree -> TreeObject
142		_ -> BlobObject
143	, LsTree.sha = sha
144	, LsTree.size = Nothing
145	, LsTree.file = f
146	}
147
148lsTreeItemToTreeItem :: LsTree.TreeItem -> TreeItem
149lsTreeItemToTreeItem ti = TreeItem
150	(LsTree.file ti)
151	(LsTree.mode ti)
152	(LsTree.sha ti)
153
154treeItemsToTree :: [TreeItem] -> Tree
155treeItemsToTree = go M.empty
156  where
157	go m [] = Tree $ filter inTopTree (M.elems m)
158	go m (i:is)
159		| inTopTree p =
160			go (M.insert p (treeItemToTreeContent i) m) is
161		| otherwise = case M.lookup idir m of
162			Just (NewSubTree d l) ->
163				go (addsubtree idir m (NewSubTree d (c:l))) is
164			_ ->
165				go (addsubtree idir m (NewSubTree (asTopFilePath (toRawFilePath idir)) [c])) is
166	  where
167		p = gitPath i
168		idir = takeDirectory p
169		c = treeItemToTreeContent i
170
171	addsubtree d m t
172		| not (inTopTree d) =
173			let m' = M.insert d t m
174			in case M.lookup parent m' of
175				Just (NewSubTree d' l) ->
176					let l' = filter (\ti -> gitPath ti /= d) l
177					in addsubtree parent m' (NewSubTree d' (t:l'))
178				_ -> addsubtree parent m' (NewSubTree (asTopFilePath (toRawFilePath parent)) [t])
179		| otherwise = M.insert d t m
180	  where
181		parent = takeDirectory d
182
183{- Flattens the top N levels of a Tree. -}
184flattenTree :: Int -> Tree -> Tree
185flattenTree 0 t = t
186flattenTree n (Tree l) = Tree (concatMap (go n) l)
187  where
188	go 0 c = [c]
189	go _ b@(TreeBlob _ _ _) = [b]
190	go n' (RecordedSubTree _ _ l') = concatMap (go (n'-1)) l'
191	go n' (NewSubTree _ l') = concatMap (go (n'-1)) l'
192	go _ c@(TreeCommit _ _ _) = [c]
193
194{- Applies an adjustment to items in a tree.
195 -
196 - While less flexible than using getTree and recordTree,
197 - this avoids buffering the whole tree in memory.
198 -}
199adjustTree
200	:: (Functor m, MonadIO m, MonadMask m)
201	=> (TreeItem -> m (Maybe TreeItem))
202	-- ^ Adjust an item in the tree. Nothing deletes the item.
203	-- Cannot move the item to a different tree.
204	-> [TreeItem]
205	-- ^ New items to add to the tree.
206	-> (TreeContent -> TreeContent -> TreeContent)
207	-- ^ When adding a new item to the tree and an item with the same
208	-- name already exists, this function picks which to use.
209	-- The first one is the item that was already in the tree.
210	-> [TopFilePath]
211	-- ^ Files to remove from the tree.
212	-> Ref
213	-> Repo
214	-> m Sha
215adjustTree adjusttreeitem addtreeitems resolveaddconflict removefiles r repo =
216	withMkTreeHandle repo $ \h -> do
217		(l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo
218		(l', _, _) <- go h False [] 1 inTopTree l
219		l'' <- adjustlist h 0 inTopTree (const True) l'
220		sha <- liftIO $ mkTree h l''
221		void $ liftIO cleanup
222		return sha
223  where
224	go _ wasmodified c _ _ [] = return (c, wasmodified, [])
225	go h wasmodified c depth intree (i:is)
226		| intree i = case readObjectType (LsTree.typeobj i) of
227			Just BlobObject -> do
228				let ti = TreeItem (LsTree.file i) (LsTree.mode i) (LsTree.sha i)
229				v <- adjusttreeitem ti
230				case v of
231					Nothing -> go h True c depth intree is
232					Just ti'@(TreeItem f m s) ->
233						let !modified = wasmodified || ti' /= ti
234						    blob = TreeBlob f m s
235						in go h modified (blob:c) depth intree is
236			Just TreeObject -> do
237				(sl, modified, is') <- go h False [] (depth+1) (beneathSubTree i) is
238				sl' <- adjustlist h depth (inTree i) (beneathSubTree i) sl
239				let slmodified = sl' /= sl
240				subtree <- if modified || slmodified
241					then liftIO $ recordSubTree h $ NewSubTree (LsTree.file i) sl'
242					else return $ RecordedSubTree (LsTree.file i) (LsTree.sha i) []
243				let !modified' = modified || slmodified || wasmodified
244				go h modified' (subtree : c) depth intree is'
245			Just CommitObject -> do
246				let ti = TreeItem (LsTree.file i) (LsTree.mode i) (LsTree.sha i)
247				v <- adjusttreeitem ti
248				case v of
249					Nothing -> go h True c depth intree is
250					Just (TreeItem f m s) ->
251						let commit = TreeCommit f m s
252						in go h wasmodified (commit:c) depth intree is
253			_ -> error ("unexpected object type \"" ++ decodeBS (LsTree.typeobj i) ++ "\"")
254		| otherwise = return (c, wasmodified, i:is)
255
256	adjustlist h depth ishere underhere l = do
257		let (addhere, rest) = partition ishere addtreeitems
258		let l' = filter (not . removed) $
259			addoldnew l (map treeItemToTreeContent addhere)
260		let inl i = any (\t -> beneathSubTree t i) l'
261		let (Tree addunderhere) = flattenTree depth $ treeItemsToTree $
262			filter (\i -> underhere i && not (inl i)) rest
263		addunderhere' <- liftIO $ mapM (recordSubTree h) addunderhere
264		return (addoldnew l' addunderhere')
265
266	removeset = S.fromList $ map (normalise . gitPath) removefiles
267	removed (TreeBlob f _ _) = S.member (normalise (gitPath f)) removeset
268	removed (TreeCommit f _ _) = S.member (normalise (gitPath f)) removeset
269	removed (RecordedSubTree _ _ _) = False
270	removed (NewSubTree _ _) = False
271
272	addoldnew [] new = new
273	addoldnew old [] = old
274	addoldnew old new = addoldnew' (M.fromList $ map (\i -> (mkk i, i)) old) new
275	addoldnew' oldm (n:ns) =
276			let k = mkk n
277			in case M.lookup k oldm of
278				Just o ->
279					resolveaddconflict o n
280					:
281					addoldnew' (M.delete k oldm) ns
282				Nothing -> n : addoldnew' oldm ns
283	addoldnew' oldm [] = M.elems oldm
284	mkk = normalise . gitPath
285
286{- Grafts subtree into the basetree at the specified location, replacing
287 - anything that the basetree already had at that location.
288 -
289 - This is generally much more efficient than using getTree and recordTree,
290 - or adjustTree, since it only needs to traverse from the top of the tree
291 - down to the graft location. It does not buffer the whole tree in memory.
292 -}
293graftTree
294	:: Sha
295	-> TopFilePath
296	-> Sha
297	-> Repo
298	-> IO Sha
299graftTree subtree graftloc basetree repo =
300	withMkTreeHandle repo $ graftTree' subtree graftloc basetree repo
301
302graftTree'
303	:: Sha
304	-> TopFilePath
305	-> Sha
306	-> Repo
307	-> MkTreeHandle
308	-> IO Sha
309graftTree' subtree graftloc basetree repo hdl = go basetree subdirs graftdirs
310  where
311	go tsha (subdir:restsubdirs) (topmostgraphdir:restgraphdirs) = do
312		Tree t <- getTree LsTree.LsTreeNonRecursive tsha repo
313		let abovegraftpoint i = gitPath i == gitPath subdir
314		t' <- case partition abovegraftpoint t of
315			-- the graft point is not already in the tree,
316			-- so graft it in, keeping the existing tree
317			-- content
318			([], _) -> do
319				graft <- graftin (topmostgraphdir:restgraphdirs)
320				return (graft:t)
321			(matching, rest) -> do
322				newshas <- forM matching $ \case
323					RecordedSubTree tloc tsha' _
324						| null restgraphdirs -> return $
325							RecordedSubTree tloc subtree []
326						| otherwise -> do
327							tsha'' <- go tsha' restsubdirs restgraphdirs
328							return $ RecordedSubTree tloc tsha'' []
329					_ -> graftin (topmostgraphdir:restgraphdirs)
330				return (newshas ++ rest)
331		mkTree hdl t'
332	go _ _ [] = return subtree
333	go _ [] _ = return subtree
334
335	graftin t = recordSubTree hdl $ graftin' t
336	graftin' [] = RecordedSubTree graftloc subtree []
337	graftin' (d:rest)
338		| d == graftloc = graftin' []
339		| otherwise = NewSubTree d [graftin' rest]
340
341	subdirs = splitDirectories $ gitPath graftloc
342
343	-- For a graftloc of "foo/bar/baz", this generates
344	-- ["foo", "foo/bar", "foo/bar/baz"]
345	graftdirs = map (asTopFilePath . toInternalGitPath . encodeBS) $
346		mkpaths [] subdirs
347	mkpaths _ [] = []
348	mkpaths base (d:rest) = (joinPath base </> d) : mkpaths (base ++ [d]) rest
349
350{- Assumes the list is ordered, with tree objects coming right before their
351 - contents. -}
352extractTree :: [LsTree.TreeItem] -> Either String Tree
353extractTree l = case go [] inTopTree l of
354	Right (t, []) -> Right (Tree t)
355	Right _ -> parseerr "unexpected tree form"
356	Left e -> parseerr e
357  where
358	go t _ [] = Right (t, [])
359	go t intree (i:is)
360		| intree i = case readObjectType (LsTree.typeobj i) of
361			Just BlobObject ->
362				let b = TreeBlob (LsTree.file i) (LsTree.mode i) (LsTree.sha i)
363				in go (b:t) intree is
364			Just TreeObject -> case go [] (beneathSubTree i) is of
365				Right (subtree, is') ->
366					let st = RecordedSubTree (LsTree.file i) (LsTree.sha i) subtree
367					in go (st:t) intree is'
368				Left e -> Left e
369			Just CommitObject ->
370				let c = TreeCommit (LsTree.file i) (LsTree.mode i) (LsTree.sha i)
371				in go (c:t) intree is
372			_ -> parseerr ("unexpected object type \"" ++ decodeBS (LsTree.typeobj i) ++ "\"")
373		| otherwise = Right (t, i:is)
374	parseerr = Left
375
376class GitPath t where
377	gitPath :: t -> FilePath
378
379instance GitPath FilePath where
380	gitPath = id
381
382instance GitPath TopFilePath where
383	gitPath = fromRawFilePath . getTopFilePath
384
385instance GitPath TreeItem where
386	gitPath (TreeItem f _ _) = gitPath f
387
388instance GitPath LsTree.TreeItem where
389	gitPath = gitPath . LsTree.file
390
391instance GitPath TreeContent where
392	gitPath (TreeBlob f _ _) = gitPath f
393	gitPath (RecordedSubTree f _ _) = gitPath f
394	gitPath (NewSubTree f _) = gitPath f
395	gitPath (TreeCommit f _ _) = gitPath f
396
397inTopTree :: GitPath t => t -> Bool
398inTopTree = inTree "."
399
400inTree :: (GitPath t, GitPath f) => t -> f -> Bool
401inTree t f = gitPath t == takeDirectory (gitPath f)
402
403beneathSubTree :: (GitPath t, GitPath f) => t -> f -> Bool
404beneathSubTree t f = prefix `isPrefixOf` normalise (gitPath f)
405  where
406	tp = gitPath t
407	prefix = if null tp then tp else addTrailingPathSeparator (normalise tp)
408