1{- git ls-files interface
2 -
3 - Copyright 2010-2020 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8module Git.LsFiles (
9	Options(..),
10	inRepo,
11	inRepoDetails,
12	inRepoOrBranch,
13	notInRepo,
14	notInRepoIncludingEmptyDirectories,
15	allFiles,
16	deleted,
17	modified,
18	staged,
19	stagedNotDeleted,
20	usualStageNum,
21	mergeConflictHeadStageNum,
22	stagedDetails,
23	typeChanged,
24	typeChangedStaged,
25	Conflicting(..),
26	Unmerged(..),
27	unmerged,
28	StagedDetails,
29	inodeCaches,
30) where
31
32import Common
33import Git
34import Git.Command
35import Git.Types
36import Git.Sha
37import Utility.InodeCache
38import Utility.TimeStamp
39import Utility.Attoparsec
40import qualified Utility.RawFilePath as R
41
42import System.Posix.Types
43import qualified Data.Map as M
44import qualified Data.ByteString as S
45import qualified Data.Attoparsec.ByteString as A
46import qualified Data.Attoparsec.ByteString.Char8 as A8
47import qualified System.FilePath.ByteString as P
48
49{- It's only safe to use git ls-files on the current repo, not on a remote.
50 -
51 - Git has some strange behavior when git ls-files is used with repos
52 - that are not the one that the cwd is in:
53 - git --git-dir=../foo/.git --worktree=../foo ../foo fails saying
54 - "../foo is outside repository".
55 - That does not happen when an absolute path is provided.
56 -
57 - Also, the files output by ls-files are relative to the cwd.
58 - Unless it's run on remote. Then it's relative to the top of the remote
59 - repo.
60 -
61 - So, best to avoid that class of problems.
62 -}
63safeForLsFiles :: Repo -> Bool
64safeForLsFiles r = isNothing (remoteName r)
65
66guardSafeForLsFiles :: Repo -> IO a -> IO a
67guardSafeForLsFiles r a
68	| safeForLsFiles r = a
69	| otherwise = error $ "git ls-files is unsafe to run on repository " ++ repoDescribe r
70
71data Options = ErrorUnmatch
72
73opParam :: Options -> CommandParam
74opParam ErrorUnmatch = Param "--error-unmatch"
75
76{- Lists files that are checked into git's index at the specified paths.
77 - With no paths, all files are listed.
78 -}
79inRepo :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
80inRepo = inRepo' [Param "--cached"]
81
82inRepo' :: [CommandParam] -> [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
83inRepo' ps os l repo = guardSafeForLsFiles repo $ pipeNullSplit' params repo
84  where
85	params =
86		Param "ls-files" :
87		Param "-z" :
88		map opParam os ++ ps ++
89		(Param "--" : map (File . fromRawFilePath) l)
90
91{- Lists the same files inRepo does, but with sha and mode. -}
92inRepoDetails :: [Options] -> [RawFilePath] -> Repo -> IO ([(RawFilePath, Sha, FileMode)], IO Bool)
93inRepoDetails = stagedDetails' parser . map opParam
94  where
95	parser s = case parseStagedDetails s of
96		Just (file, sha, mode, stagenum)
97			| stagenum == usualStageNum || stagenum == mergeConflictHeadStageNum ->
98				Just (file, sha, mode)
99		_ -> Nothing
100
101{- Files that are checked into the index or have been committed to a
102 - branch. -}
103inRepoOrBranch :: Branch -> [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
104inRepoOrBranch b = inRepo'
105	[ Param "--cached"
106	, Param ("--with-tree=" ++ fromRef b)
107	]
108
109{- Scans for files at the specified locations that are not checked into git. -}
110notInRepo :: [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
111notInRepo = notInRepo' []
112
113notInRepo' :: [CommandParam] -> [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
114notInRepo' ps os include_ignored =
115	inRepo' (Param "--others" : ps ++ exclude) os
116  where
117	exclude
118		| include_ignored = []
119		| otherwise = [Param "--exclude-standard"]
120
121{- Scans for files at the specified locations that are not checked into
122 - git. Empty directories are included in the result. -}
123notInRepoIncludingEmptyDirectories :: [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
124notInRepoIncludingEmptyDirectories = notInRepo' [Param "--directory"]
125
126{- Finds all files in the specified locations, whether checked into git or
127 - not. -}
128allFiles :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
129allFiles = inRepo' [Param "--cached", Param "--others"]
130
131{- Returns a list of files in the specified locations that have been
132 - deleted. -}
133deleted :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
134deleted = inRepo' [Param "--deleted"]
135
136{- Returns a list of files in the specified locations that have been
137 - modified. -}
138modified :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
139modified = inRepo' [Param "--modified"]
140
141{- Returns a list of all files that are staged for commit. -}
142staged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
143staged = staged' []
144
145{- Returns a list of the files, staged for commit, that are being added,
146 - moved, or changed (but not deleted), from the specified locations. -}
147stagedNotDeleted :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
148stagedNotDeleted = staged' [Param "--diff-filter=ACMRT"]
149
150staged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
151staged' ps l repo = guardSafeForLsFiles repo $
152	pipeNullSplit' (prefix ++ ps ++ suffix) repo
153  where
154	prefix = [Param "diff", Param "--cached", Param "--name-only", Param "-z"]
155	suffix = Param "--" : map (File . fromRawFilePath) l
156
157type StagedDetails = (RawFilePath, Sha, FileMode, StageNum)
158
159type StageNum = Int
160
161{- Used when not in a merge conflict. -}
162usualStageNum :: Int
163usualStageNum = 0
164
165{- WHen in a merge conflict, git uses stage number 2 for the local HEAD
166 - side of the merge conflict. -}
167mergeConflictHeadStageNum :: Int
168mergeConflictHeadStageNum = 2
169
170{- Returns details about all files that are staged in the index.
171 -
172 - Note that, during a conflict, a file will appear in the list
173 - more than once with different stage numbers.
174 -}
175stagedDetails :: [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool)
176stagedDetails = stagedDetails' parseStagedDetails []
177
178stagedDetails' :: (S.ByteString -> Maybe t) -> [CommandParam] -> [RawFilePath] -> Repo -> IO ([t], IO Bool)
179stagedDetails' parser ps l repo = guardSafeForLsFiles repo $ do
180	(ls, cleanup) <- pipeNullSplit' params repo
181	return (mapMaybe parser ls, cleanup)
182  where
183	params = Param "ls-files" : Param "--stage" : Param "-z" : ps ++
184		Param "--" : map (File . fromRawFilePath) l
185
186parseStagedDetails :: S.ByteString -> Maybe StagedDetails
187parseStagedDetails = eitherToMaybe . A.parseOnly parser
188  where
189	parser = do
190		mode <- octal
191		void $ A8.char ' '
192		sha <- maybe (fail "bad sha") return . extractSha =<< nextword
193		void $ A8.char ' '
194		stagenum <- A8.decimal
195		void $ A8.char '\t'
196		file <- A.takeByteString
197		return (file, sha, mode, stagenum)
198
199	nextword = A8.takeTill (== ' ')
200
201{- Returns a list of the files in the specified locations that are staged
202 - for commit, and whose type has changed. -}
203typeChangedStaged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
204typeChangedStaged = typeChanged' [Param "--cached"]
205
206{- Returns a list of the files in the specified locations whose type has
207 - changed.  Files only staged for commit will not be included. -}
208typeChanged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
209typeChanged = typeChanged' []
210
211typeChanged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool)
212typeChanged' ps l repo = guardSafeForLsFiles repo $ do
213	(fs, cleanup) <- pipeNullSplit' (prefix ++ ps ++ suffix) repo
214	-- git diff returns filenames relative to the top of the git repo;
215	-- convert to filenames relative to the cwd, like git ls-files.
216	top <- absPath (repoPath repo)
217	currdir <- R.getCurrentDirectory
218	return (map (\f -> relPathDirToFileAbs currdir $ top P.</> f) fs, cleanup)
219  where
220	prefix =
221		[ Param "diff"
222		, Param "--name-only"
223		, Param "--diff-filter=T"
224		, Param "-z"
225		]
226	suffix = Param "--" : (if null l then [File "."] else map (File . fromRawFilePath) l)
227
228{- A item in conflict has two possible values.
229 - Either can be Nothing, when that side deleted the file. -}
230data Conflicting v = Conflicting
231	{ valUs :: Maybe v
232	, valThem :: Maybe v
233	} deriving (Show)
234
235data Unmerged = Unmerged
236	{ unmergedFile :: RawFilePath
237	, unmergedTreeItemType :: Conflicting TreeItemType
238	, unmergedSha :: Conflicting Sha
239	}
240
241{- Returns a list of the files in the specified locations that have
242 - unresolved merge conflicts.
243 -
244 - ls-files outputs multiple lines per conflicting file, each with its own
245 - stage number:
246 -   1 = old version, can be ignored
247 -   2 = us
248 -   3 = them
249 - If a line is omitted, that side removed the file.
250 -}
251unmerged :: [RawFilePath] -> Repo -> IO ([Unmerged], IO Bool)
252unmerged l repo = guardSafeForLsFiles repo $ do
253	(fs, cleanup) <- pipeNullSplit params repo
254	return (reduceUnmerged [] $ catMaybes $ map (parseUnmerged . decodeBL) fs, cleanup)
255  where
256	params =
257		Param "ls-files" :
258		Param "--unmerged" :
259		Param "-z" :
260		Param "--" :
261		map (File . fromRawFilePath) l
262
263data InternalUnmerged = InternalUnmerged
264	{ isus :: Bool
265	, ifile :: RawFilePath
266	, itreeitemtype :: Maybe TreeItemType
267	, isha :: Maybe Sha
268	}
269
270parseUnmerged :: String -> Maybe InternalUnmerged
271parseUnmerged s
272	| null file = Nothing
273	| otherwise = case words metadata of
274		(rawtreeitemtype:rawsha:rawstage:_) -> do
275			stage <- readish rawstage :: Maybe Int
276			if stage /= 2 && stage /= 3
277				then Nothing
278				else do
279					treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype)
280					sha <- extractSha (encodeBS rawsha)
281					return $ InternalUnmerged (stage == 2) (toRawFilePath file)
282						(Just treeitemtype) (Just sha)
283		_ -> Nothing
284  where
285	(metadata, file) = separate (== '\t') s
286
287reduceUnmerged :: [Unmerged] -> [InternalUnmerged] -> [Unmerged]
288reduceUnmerged c [] = c
289reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest
290  where
291	(rest, sibi) = findsib i is
292	(treeitemtypeA, treeitemtypeB, shaA, shaB)
293		| isus i    = (itreeitemtype i, itreeitemtype sibi, isha i, isha sibi)
294		| otherwise = (itreeitemtype sibi, itreeitemtype i, isha sibi, isha i)
295	new = Unmerged
296		{ unmergedFile = ifile i
297		, unmergedTreeItemType = Conflicting treeitemtypeA treeitemtypeB
298		, unmergedSha = Conflicting shaA shaB
299		}
300	findsib templatei [] = ([], removed templatei)
301	findsib templatei (l:ls)
302		| ifile l == ifile templatei = (ls, l)
303		| otherwise = (l:ls, removed templatei)
304	removed templatei = templatei
305		{ isus = not (isus templatei)
306		, itreeitemtype = Nothing
307		, isha = Nothing
308		}
309
310{- Gets the InodeCache equivilant information stored in the git index.
311 -
312 - Note that this uses a --debug option whose output could change at some
313 - point in the future. If the output is not as expected, will use Nothing.
314 -}
315inodeCaches :: [RawFilePath] -> Repo -> IO ([(FilePath, Maybe InodeCache)], IO Bool)
316inodeCaches locs repo = guardSafeForLsFiles repo $ do
317	(ls, cleanup) <- pipeNullSplit params repo
318	return (parse Nothing (map decodeBL ls), cleanup)
319  where
320	params =
321		Param "ls-files" :
322		Param "--cached" :
323		Param "-z" :
324		Param "--debug" :
325		Param "--" :
326		map (File . fromRawFilePath) locs
327
328	parse Nothing (f:ls) = parse (Just f) ls
329	parse (Just f) (s:[]) =
330		let i = parsedebug s
331		in (f, i) : []
332	parse (Just f) (s:ls) =
333		let (d, f') = splitdebug s
334		    i = parsedebug d
335		in (f, i) : parse (Just f') ls
336	parse _ _ = []
337
338	-- First 5 lines are --debug output, remainder is the next filename.
339	-- This assumes that --debug does not start outputting more lines.
340	splitdebug s = case splitc '\n' s of
341		(d1:d2:d3:d4:d5:rest) ->
342			( intercalate "\n" [d1, d2, d3, d4, d5]
343			, intercalate "\n" rest
344			)
345		_ -> ("", s)
346
347	-- This parser allows for some changes to the --debug output,
348	-- including reordering, or adding more items.
349	parsedebug s = do
350		let l = words s
351		let iskey v = ":" `isSuffixOf` v
352		let m = M.fromList $ zip
353			(filter iskey l)
354			(filter (not . iskey) l)
355		mkInodeCache
356			<$> (readish =<< M.lookup "ino:" m)
357			<*> (readish =<< M.lookup "size:" m)
358			<*> (parsePOSIXTime =<< (replace ":" "." <$> M.lookup "mtime:" m))
359