1{- git fsck interface
2i it is not fully repoducibleI repeated the same steps
3 -
4 - Copyright 2013 Joey Hess <id@joeyh.name>
5 -
6 - Licensed under the GNU AGPL version 3 or higher.
7 -}
8
9{-# LANGUAGE BangPatterns #-}
10
11module Git.Fsck (
12	FsckResults(..),
13	MissingObjects,
14	findBroken,
15	foundBroken,
16	findMissing,
17	isMissing,
18	knownMissing,
19) where
20
21import Common
22import Git
23import Git.Command
24import Git.Sha
25import Utility.Batch
26
27import qualified Data.Set as S
28import Control.Concurrent.Async
29import qualified Data.Semigroup as Sem
30import Prelude
31
32data FsckResults
33	= FsckFoundMissing
34		{ missingObjects :: MissingObjects
35		, missingObjectsTruncated :: Bool
36		}
37	| FsckFailed
38	deriving (Show)
39
40data FsckOutput
41	= FsckOutput MissingObjects Truncated
42	| NoFsckOutput
43	| AllDuplicateEntriesWarning
44
45type MissingObjects = S.Set Sha
46
47type Truncated = Bool
48
49appendFsckOutput :: FsckOutput -> FsckOutput -> FsckOutput
50appendFsckOutput (FsckOutput s1 t1) (FsckOutput s2 t2) =
51	FsckOutput (S.union s1 s2) (t1 || t2)
52appendFsckOutput (FsckOutput s t) _ = FsckOutput s t
53appendFsckOutput _ (FsckOutput s t) = FsckOutput s t
54appendFsckOutput NoFsckOutput NoFsckOutput = NoFsckOutput
55appendFsckOutput AllDuplicateEntriesWarning AllDuplicateEntriesWarning = AllDuplicateEntriesWarning
56appendFsckOutput AllDuplicateEntriesWarning NoFsckOutput = AllDuplicateEntriesWarning
57appendFsckOutput NoFsckOutput AllDuplicateEntriesWarning = AllDuplicateEntriesWarning
58
59instance Sem.Semigroup FsckOutput where
60	(<>) = appendFsckOutput
61
62instance Monoid FsckOutput where
63	mempty = NoFsckOutput
64
65{- Runs fsck to find some of the broken objects in the repository.
66 - May not find all broken objects, if fsck fails on bad data in some of
67 - the broken objects it does find.
68 -
69 - Strategy: Rather than parsing fsck's current specific output,
70 - look for anything in its output (both stdout and stderr) that appears
71 - to be a git sha. Not all such shas are of broken objects, so ask git
72 - to try to cat the object, and see if it fails.
73 -
74 - Note that there is a possible false positive: When changes are being
75 - made to the repo while this is running, fsck might complain about a
76 - missing object that has not made it to disk yet. Catting the object
77 - then succeeds, so it's not included in the FsckResults. But, fsck then
78 - exits nonzero, and so FsckFailed is returned. Set ignorenonzeroexit
79 - to avoid this false positive, at the risk of perhaps missing a problem
80 - so bad that fsck crashes without outputting any missing shas.
81 -}
82findBroken :: Bool -> Bool -> Repo -> IO FsckResults
83findBroken batchmode ignorenonzeroexit r = do
84	let (command, params) = ("git", fsckParams r)
85	(command', params') <- if batchmode
86		then toBatchCommand (command, params)
87		else return (command, params)
88
89	let p = (proc command' (toCommand params'))
90		{ std_out = CreatePipe
91		, std_err = CreatePipe
92		}
93	withCreateProcess p go
94  where
95	go _ (Just outh) (Just errh) pid = do
96		(o1, o2) <- concurrently
97			(parseFsckOutput maxobjs r outh pid)
98			(parseFsckOutput maxobjs r errh pid)
99		fsckok <- checkSuccessProcess pid
100		case mappend o1 o2 of
101			FsckOutput badobjs truncated
102				| S.null badobjs && not fsckok -> return fsckfailed
103				| otherwise -> return $ FsckFoundMissing badobjs truncated
104			NoFsckOutput
105				| not fsckok -> return fsckfailed
106				| otherwise -> return noproblem
107			-- If all fsck output was duplicateEntries warnings,
108			-- the repository is not broken, it just has some
109			-- unusual tree objects in it. So ignore nonzero
110			-- exit status.
111			AllDuplicateEntriesWarning -> return noproblem
112	go _ _ _ _ = error "internal"
113
114	maxobjs = 10000
115	noproblem = FsckFoundMissing S.empty False
116	fsckfailed
117		| ignorenonzeroexit = noproblem
118		| otherwise = FsckFailed
119
120foundBroken :: FsckResults -> Bool
121foundBroken FsckFailed = True
122foundBroken (FsckFoundMissing s _) = not (S.null s)
123
124knownMissing :: FsckResults -> MissingObjects
125knownMissing FsckFailed = S.empty
126knownMissing (FsckFoundMissing s _) = s
127
128{- Finds objects that are missing from the git repsitory, or are corrupt.
129 -
130 - This does not use git cat-file --batch, because catting a corrupt
131 - object can cause it to crash, or to report incorrect size information.
132 -}
133findMissing :: [Sha] -> Repo -> IO MissingObjects
134findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
135
136parseFsckOutput :: Int -> Repo -> Handle -> ProcessHandle -> IO FsckOutput
137parseFsckOutput maxobjs r h pid = do
138	ls <- getlines []
139	if null ls
140		then return NoFsckOutput
141		else if all ("duplicateEntries" `isInfixOf`) ls
142			then return AllDuplicateEntriesWarning
143			else do
144				let shas = findShas ls
145				let !truncated = length shas > maxobjs
146				missingobjs <- findMissing (take maxobjs shas) r
147				return $ FsckOutput missingobjs truncated
148  where
149	getlines c = hGetLineUntilExitOrEOF pid h >>= \case
150		Nothing -> return (reverse c)
151		Just l -> getlines (l:c)
152
153isMissing :: Sha -> Repo -> IO Bool
154isMissing s r = either (const True) (const False) <$> tryIO dump
155  where
156	dump = runQuiet
157		[ Param "show"
158		, Param (fromRef s)
159		] r
160
161findShas :: [String] -> [Sha]
162findShas = catMaybes . map (extractSha . encodeBS)
163	. concat . map words . filter wanted
164  where
165	wanted l = not ("dangling " `isPrefixOf` l)
166
167fsckParams :: Repo -> [CommandParam]
168fsckParams = gitCommandLine $ map Param
169	[ "fsck"
170	, "--no-dangling"
171	, "--no-reflogs"
172	]
173