1{- git fsck interface 2i it is not fully repoducibleI repeated the same steps 3 - 4 - Copyright 2013 Joey Hess <id@joeyh.name> 5 - 6 - Licensed under the GNU AGPL version 3 or higher. 7 -} 8 9{-# LANGUAGE BangPatterns #-} 10 11module Git.Fsck ( 12 FsckResults(..), 13 MissingObjects, 14 findBroken, 15 foundBroken, 16 findMissing, 17 isMissing, 18 knownMissing, 19) where 20 21import Common 22import Git 23import Git.Command 24import Git.Sha 25import Utility.Batch 26 27import qualified Data.Set as S 28import Control.Concurrent.Async 29import qualified Data.Semigroup as Sem 30import Prelude 31 32data FsckResults 33 = FsckFoundMissing 34 { missingObjects :: MissingObjects 35 , missingObjectsTruncated :: Bool 36 } 37 | FsckFailed 38 deriving (Show) 39 40data FsckOutput 41 = FsckOutput MissingObjects Truncated 42 | NoFsckOutput 43 | AllDuplicateEntriesWarning 44 45type MissingObjects = S.Set Sha 46 47type Truncated = Bool 48 49appendFsckOutput :: FsckOutput -> FsckOutput -> FsckOutput 50appendFsckOutput (FsckOutput s1 t1) (FsckOutput s2 t2) = 51 FsckOutput (S.union s1 s2) (t1 || t2) 52appendFsckOutput (FsckOutput s t) _ = FsckOutput s t 53appendFsckOutput _ (FsckOutput s t) = FsckOutput s t 54appendFsckOutput NoFsckOutput NoFsckOutput = NoFsckOutput 55appendFsckOutput AllDuplicateEntriesWarning AllDuplicateEntriesWarning = AllDuplicateEntriesWarning 56appendFsckOutput AllDuplicateEntriesWarning NoFsckOutput = AllDuplicateEntriesWarning 57appendFsckOutput NoFsckOutput AllDuplicateEntriesWarning = AllDuplicateEntriesWarning 58 59instance Sem.Semigroup FsckOutput where 60 (<>) = appendFsckOutput 61 62instance Monoid FsckOutput where 63 mempty = NoFsckOutput 64 65{- Runs fsck to find some of the broken objects in the repository. 66 - May not find all broken objects, if fsck fails on bad data in some of 67 - the broken objects it does find. 68 - 69 - Strategy: Rather than parsing fsck's current specific output, 70 - look for anything in its output (both stdout and stderr) that appears 71 - to be a git sha. Not all such shas are of broken objects, so ask git 72 - to try to cat the object, and see if it fails. 73 - 74 - Note that there is a possible false positive: When changes are being 75 - made to the repo while this is running, fsck might complain about a 76 - missing object that has not made it to disk yet. Catting the object 77 - then succeeds, so it's not included in the FsckResults. But, fsck then 78 - exits nonzero, and so FsckFailed is returned. Set ignorenonzeroexit 79 - to avoid this false positive, at the risk of perhaps missing a problem 80 - so bad that fsck crashes without outputting any missing shas. 81 -} 82findBroken :: Bool -> Bool -> Repo -> IO FsckResults 83findBroken batchmode ignorenonzeroexit r = do 84 let (command, params) = ("git", fsckParams r) 85 (command', params') <- if batchmode 86 then toBatchCommand (command, params) 87 else return (command, params) 88 89 let p = (proc command' (toCommand params')) 90 { std_out = CreatePipe 91 , std_err = CreatePipe 92 } 93 withCreateProcess p go 94 where 95 go _ (Just outh) (Just errh) pid = do 96 (o1, o2) <- concurrently 97 (parseFsckOutput maxobjs r outh pid) 98 (parseFsckOutput maxobjs r errh pid) 99 fsckok <- checkSuccessProcess pid 100 case mappend o1 o2 of 101 FsckOutput badobjs truncated 102 | S.null badobjs && not fsckok -> return fsckfailed 103 | otherwise -> return $ FsckFoundMissing badobjs truncated 104 NoFsckOutput 105 | not fsckok -> return fsckfailed 106 | otherwise -> return noproblem 107 -- If all fsck output was duplicateEntries warnings, 108 -- the repository is not broken, it just has some 109 -- unusual tree objects in it. So ignore nonzero 110 -- exit status. 111 AllDuplicateEntriesWarning -> return noproblem 112 go _ _ _ _ = error "internal" 113 114 maxobjs = 10000 115 noproblem = FsckFoundMissing S.empty False 116 fsckfailed 117 | ignorenonzeroexit = noproblem 118 | otherwise = FsckFailed 119 120foundBroken :: FsckResults -> Bool 121foundBroken FsckFailed = True 122foundBroken (FsckFoundMissing s _) = not (S.null s) 123 124knownMissing :: FsckResults -> MissingObjects 125knownMissing FsckFailed = S.empty 126knownMissing (FsckFoundMissing s _) = s 127 128{- Finds objects that are missing from the git repsitory, or are corrupt. 129 - 130 - This does not use git cat-file --batch, because catting a corrupt 131 - object can cause it to crash, or to report incorrect size information. 132 -} 133findMissing :: [Sha] -> Repo -> IO MissingObjects 134findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs 135 136parseFsckOutput :: Int -> Repo -> Handle -> ProcessHandle -> IO FsckOutput 137parseFsckOutput maxobjs r h pid = do 138 ls <- getlines [] 139 if null ls 140 then return NoFsckOutput 141 else if all ("duplicateEntries" `isInfixOf`) ls 142 then return AllDuplicateEntriesWarning 143 else do 144 let shas = findShas ls 145 let !truncated = length shas > maxobjs 146 missingobjs <- findMissing (take maxobjs shas) r 147 return $ FsckOutput missingobjs truncated 148 where 149 getlines c = hGetLineUntilExitOrEOF pid h >>= \case 150 Nothing -> return (reverse c) 151 Just l -> getlines (l:c) 152 153isMissing :: Sha -> Repo -> IO Bool 154isMissing s r = either (const True) (const False) <$> tryIO dump 155 where 156 dump = runQuiet 157 [ Param "show" 158 , Param (fromRef s) 159 ] r 160 161findShas :: [String] -> [Sha] 162findShas = catMaybes . map (extractSha . encodeBS) 163 . concat . map words . filter wanted 164 where 165 wanted l = not ("dangling " `isPrefixOf` l) 166 167fsckParams :: Repo -> [CommandParam] 168fsckParams = gitCommandLine $ map Param 169 [ "fsck" 170 , "--no-dangling" 171 , "--no-reflogs" 172 ] 173