1{- git ls-files interface 2 - 3 - Copyright 2010-2020 Joey Hess <id@joeyh.name> 4 - 5 - Licensed under the GNU AGPL version 3 or higher. 6 -} 7 8module Git.LsFiles ( 9 Options(..), 10 inRepo, 11 inRepoDetails, 12 inRepoOrBranch, 13 notInRepo, 14 notInRepoIncludingEmptyDirectories, 15 allFiles, 16 deleted, 17 modified, 18 staged, 19 stagedNotDeleted, 20 usualStageNum, 21 mergeConflictHeadStageNum, 22 stagedDetails, 23 typeChanged, 24 typeChangedStaged, 25 Conflicting(..), 26 Unmerged(..), 27 unmerged, 28 StagedDetails, 29 inodeCaches, 30) where 31 32import Common 33import Git 34import Git.Command 35import Git.Types 36import Git.Sha 37import Utility.InodeCache 38import Utility.TimeStamp 39import Utility.Attoparsec 40import qualified Utility.RawFilePath as R 41 42import System.Posix.Types 43import qualified Data.Map as M 44import qualified Data.ByteString as S 45import qualified Data.Attoparsec.ByteString as A 46import qualified Data.Attoparsec.ByteString.Char8 as A8 47import qualified System.FilePath.ByteString as P 48 49{- It's only safe to use git ls-files on the current repo, not on a remote. 50 - 51 - Git has some strange behavior when git ls-files is used with repos 52 - that are not the one that the cwd is in: 53 - git --git-dir=../foo/.git --worktree=../foo ../foo fails saying 54 - "../foo is outside repository". 55 - That does not happen when an absolute path is provided. 56 - 57 - Also, the files output by ls-files are relative to the cwd. 58 - Unless it's run on remote. Then it's relative to the top of the remote 59 - repo. 60 - 61 - So, best to avoid that class of problems. 62 -} 63safeForLsFiles :: Repo -> Bool 64safeForLsFiles r = isNothing (remoteName r) 65 66guardSafeForLsFiles :: Repo -> IO a -> IO a 67guardSafeForLsFiles r a 68 | safeForLsFiles r = a 69 | otherwise = error $ "git ls-files is unsafe to run on repository " ++ repoDescribe r 70 71data Options = ErrorUnmatch 72 73opParam :: Options -> CommandParam 74opParam ErrorUnmatch = Param "--error-unmatch" 75 76{- Lists files that are checked into git's index at the specified paths. 77 - With no paths, all files are listed. 78 -} 79inRepo :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 80inRepo = inRepo' [Param "--cached"] 81 82inRepo' :: [CommandParam] -> [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 83inRepo' ps os l repo = guardSafeForLsFiles repo $ pipeNullSplit' params repo 84 where 85 params = 86 Param "ls-files" : 87 Param "-z" : 88 map opParam os ++ ps ++ 89 (Param "--" : map (File . fromRawFilePath) l) 90 91{- Lists the same files inRepo does, but with sha and mode. -} 92inRepoDetails :: [Options] -> [RawFilePath] -> Repo -> IO ([(RawFilePath, Sha, FileMode)], IO Bool) 93inRepoDetails = stagedDetails' parser . map opParam 94 where 95 parser s = case parseStagedDetails s of 96 Just (file, sha, mode, stagenum) 97 | stagenum == usualStageNum || stagenum == mergeConflictHeadStageNum -> 98 Just (file, sha, mode) 99 _ -> Nothing 100 101{- Files that are checked into the index or have been committed to a 102 - branch. -} 103inRepoOrBranch :: Branch -> [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 104inRepoOrBranch b = inRepo' 105 [ Param "--cached" 106 , Param ("--with-tree=" ++ fromRef b) 107 ] 108 109{- Scans for files at the specified locations that are not checked into git. -} 110notInRepo :: [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 111notInRepo = notInRepo' [] 112 113notInRepo' :: [CommandParam] -> [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 114notInRepo' ps os include_ignored = 115 inRepo' (Param "--others" : ps ++ exclude) os 116 where 117 exclude 118 | include_ignored = [] 119 | otherwise = [Param "--exclude-standard"] 120 121{- Scans for files at the specified locations that are not checked into 122 - git. Empty directories are included in the result. -} 123notInRepoIncludingEmptyDirectories :: [Options] -> Bool -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 124notInRepoIncludingEmptyDirectories = notInRepo' [Param "--directory"] 125 126{- Finds all files in the specified locations, whether checked into git or 127 - not. -} 128allFiles :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 129allFiles = inRepo' [Param "--cached", Param "--others"] 130 131{- Returns a list of files in the specified locations that have been 132 - deleted. -} 133deleted :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 134deleted = inRepo' [Param "--deleted"] 135 136{- Returns a list of files in the specified locations that have been 137 - modified. -} 138modified :: [Options] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 139modified = inRepo' [Param "--modified"] 140 141{- Returns a list of all files that are staged for commit. -} 142staged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 143staged = staged' [] 144 145{- Returns a list of the files, staged for commit, that are being added, 146 - moved, or changed (but not deleted), from the specified locations. -} 147stagedNotDeleted :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 148stagedNotDeleted = staged' [Param "--diff-filter=ACMRT"] 149 150staged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 151staged' ps l repo = guardSafeForLsFiles repo $ 152 pipeNullSplit' (prefix ++ ps ++ suffix) repo 153 where 154 prefix = [Param "diff", Param "--cached", Param "--name-only", Param "-z"] 155 suffix = Param "--" : map (File . fromRawFilePath) l 156 157type StagedDetails = (RawFilePath, Sha, FileMode, StageNum) 158 159type StageNum = Int 160 161{- Used when not in a merge conflict. -} 162usualStageNum :: Int 163usualStageNum = 0 164 165{- WHen in a merge conflict, git uses stage number 2 for the local HEAD 166 - side of the merge conflict. -} 167mergeConflictHeadStageNum :: Int 168mergeConflictHeadStageNum = 2 169 170{- Returns details about all files that are staged in the index. 171 - 172 - Note that, during a conflict, a file will appear in the list 173 - more than once with different stage numbers. 174 -} 175stagedDetails :: [RawFilePath] -> Repo -> IO ([StagedDetails], IO Bool) 176stagedDetails = stagedDetails' parseStagedDetails [] 177 178stagedDetails' :: (S.ByteString -> Maybe t) -> [CommandParam] -> [RawFilePath] -> Repo -> IO ([t], IO Bool) 179stagedDetails' parser ps l repo = guardSafeForLsFiles repo $ do 180 (ls, cleanup) <- pipeNullSplit' params repo 181 return (mapMaybe parser ls, cleanup) 182 where 183 params = Param "ls-files" : Param "--stage" : Param "-z" : ps ++ 184 Param "--" : map (File . fromRawFilePath) l 185 186parseStagedDetails :: S.ByteString -> Maybe StagedDetails 187parseStagedDetails = eitherToMaybe . A.parseOnly parser 188 where 189 parser = do 190 mode <- octal 191 void $ A8.char ' ' 192 sha <- maybe (fail "bad sha") return . extractSha =<< nextword 193 void $ A8.char ' ' 194 stagenum <- A8.decimal 195 void $ A8.char '\t' 196 file <- A.takeByteString 197 return (file, sha, mode, stagenum) 198 199 nextword = A8.takeTill (== ' ') 200 201{- Returns a list of the files in the specified locations that are staged 202 - for commit, and whose type has changed. -} 203typeChangedStaged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 204typeChangedStaged = typeChanged' [Param "--cached"] 205 206{- Returns a list of the files in the specified locations whose type has 207 - changed. Files only staged for commit will not be included. -} 208typeChanged :: [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 209typeChanged = typeChanged' [] 210 211typeChanged' :: [CommandParam] -> [RawFilePath] -> Repo -> IO ([RawFilePath], IO Bool) 212typeChanged' ps l repo = guardSafeForLsFiles repo $ do 213 (fs, cleanup) <- pipeNullSplit' (prefix ++ ps ++ suffix) repo 214 -- git diff returns filenames relative to the top of the git repo; 215 -- convert to filenames relative to the cwd, like git ls-files. 216 top <- absPath (repoPath repo) 217 currdir <- R.getCurrentDirectory 218 return (map (\f -> relPathDirToFileAbs currdir $ top P.</> f) fs, cleanup) 219 where 220 prefix = 221 [ Param "diff" 222 , Param "--name-only" 223 , Param "--diff-filter=T" 224 , Param "-z" 225 ] 226 suffix = Param "--" : (if null l then [File "."] else map (File . fromRawFilePath) l) 227 228{- A item in conflict has two possible values. 229 - Either can be Nothing, when that side deleted the file. -} 230data Conflicting v = Conflicting 231 { valUs :: Maybe v 232 , valThem :: Maybe v 233 } deriving (Show) 234 235data Unmerged = Unmerged 236 { unmergedFile :: RawFilePath 237 , unmergedTreeItemType :: Conflicting TreeItemType 238 , unmergedSha :: Conflicting Sha 239 } 240 241{- Returns a list of the files in the specified locations that have 242 - unresolved merge conflicts. 243 - 244 - ls-files outputs multiple lines per conflicting file, each with its own 245 - stage number: 246 - 1 = old version, can be ignored 247 - 2 = us 248 - 3 = them 249 - If a line is omitted, that side removed the file. 250 -} 251unmerged :: [RawFilePath] -> Repo -> IO ([Unmerged], IO Bool) 252unmerged l repo = guardSafeForLsFiles repo $ do 253 (fs, cleanup) <- pipeNullSplit params repo 254 return (reduceUnmerged [] $ catMaybes $ map (parseUnmerged . decodeBL) fs, cleanup) 255 where 256 params = 257 Param "ls-files" : 258 Param "--unmerged" : 259 Param "-z" : 260 Param "--" : 261 map (File . fromRawFilePath) l 262 263data InternalUnmerged = InternalUnmerged 264 { isus :: Bool 265 , ifile :: RawFilePath 266 , itreeitemtype :: Maybe TreeItemType 267 , isha :: Maybe Sha 268 } 269 270parseUnmerged :: String -> Maybe InternalUnmerged 271parseUnmerged s 272 | null file = Nothing 273 | otherwise = case words metadata of 274 (rawtreeitemtype:rawsha:rawstage:_) -> do 275 stage <- readish rawstage :: Maybe Int 276 if stage /= 2 && stage /= 3 277 then Nothing 278 else do 279 treeitemtype <- readTreeItemType (encodeBS rawtreeitemtype) 280 sha <- extractSha (encodeBS rawsha) 281 return $ InternalUnmerged (stage == 2) (toRawFilePath file) 282 (Just treeitemtype) (Just sha) 283 _ -> Nothing 284 where 285 (metadata, file) = separate (== '\t') s 286 287reduceUnmerged :: [Unmerged] -> [InternalUnmerged] -> [Unmerged] 288reduceUnmerged c [] = c 289reduceUnmerged c (i:is) = reduceUnmerged (new:c) rest 290 where 291 (rest, sibi) = findsib i is 292 (treeitemtypeA, treeitemtypeB, shaA, shaB) 293 | isus i = (itreeitemtype i, itreeitemtype sibi, isha i, isha sibi) 294 | otherwise = (itreeitemtype sibi, itreeitemtype i, isha sibi, isha i) 295 new = Unmerged 296 { unmergedFile = ifile i 297 , unmergedTreeItemType = Conflicting treeitemtypeA treeitemtypeB 298 , unmergedSha = Conflicting shaA shaB 299 } 300 findsib templatei [] = ([], removed templatei) 301 findsib templatei (l:ls) 302 | ifile l == ifile templatei = (ls, l) 303 | otherwise = (l:ls, removed templatei) 304 removed templatei = templatei 305 { isus = not (isus templatei) 306 , itreeitemtype = Nothing 307 , isha = Nothing 308 } 309 310{- Gets the InodeCache equivilant information stored in the git index. 311 - 312 - Note that this uses a --debug option whose output could change at some 313 - point in the future. If the output is not as expected, will use Nothing. 314 -} 315inodeCaches :: [RawFilePath] -> Repo -> IO ([(FilePath, Maybe InodeCache)], IO Bool) 316inodeCaches locs repo = guardSafeForLsFiles repo $ do 317 (ls, cleanup) <- pipeNullSplit params repo 318 return (parse Nothing (map decodeBL ls), cleanup) 319 where 320 params = 321 Param "ls-files" : 322 Param "--cached" : 323 Param "-z" : 324 Param "--debug" : 325 Param "--" : 326 map (File . fromRawFilePath) locs 327 328 parse Nothing (f:ls) = parse (Just f) ls 329 parse (Just f) (s:[]) = 330 let i = parsedebug s 331 in (f, i) : [] 332 parse (Just f) (s:ls) = 333 let (d, f') = splitdebug s 334 i = parsedebug d 335 in (f, i) : parse (Just f') ls 336 parse _ _ = [] 337 338 -- First 5 lines are --debug output, remainder is the next filename. 339 -- This assumes that --debug does not start outputting more lines. 340 splitdebug s = case splitc '\n' s of 341 (d1:d2:d3:d4:d5:rest) -> 342 ( intercalate "\n" [d1, d2, d3, d4, d5] 343 , intercalate "\n" rest 344 ) 345 _ -> ("", s) 346 347 -- This parser allows for some changes to the --debug output, 348 -- including reordering, or adding more items. 349 parsedebug s = do 350 let l = words s 351 let iskey v = ":" `isSuffixOf` v 352 let m = M.fromList $ zip 353 (filter iskey l) 354 (filter (not . iskey) l) 355 mkInodeCache 356 <$> (readish =<< M.lookup "ino:" m) 357 <*> (readish =<< M.lookup "size:" m) 358 <*> (parsePOSIXTime =<< (replace ":" "." <$> M.lookup "mtime:" m)) 359