1{- git cat-file interface
2 -
3 - Copyright 2011-2020 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8{-# LANGUAGE OverloadedStrings #-}
9{-# LANGUAGE BangPatterns #-}
10
11module Git.CatFile (
12	CatFileHandle,
13	catFileStart,
14	catFileStart',
15	catFileStop,
16	catFile,
17	catFileDetails,
18	catTree,
19	catCommit,
20	catObject,
21	catObjectDetails,
22	catObjectMetaData,
23	catObjectStreamLsTree,
24	catObjectStream,
25	catObjectMetaDataStream,
26) where
27
28import System.IO
29import qualified Data.ByteString as S
30import qualified Data.ByteString.Lazy as L
31import qualified Data.ByteString.Char8 as S8
32import qualified Data.Attoparsec.ByteString as A
33import qualified Data.Attoparsec.ByteString.Char8 as A8
34import qualified Data.Map.Strict as M
35import Data.String
36import Data.Char
37import Numeric
38import System.Posix.Types
39import Text.Read
40import Control.Concurrent.Async
41import Control.Concurrent.Chan
42import Control.Monad.IO.Class (MonadIO)
43
44import Common
45import Git
46import Git.Sha
47import qualified Git.Ref
48import Git.Command
49import Git.Types
50import Git.HashObject
51import qualified Git.LsTree as LsTree
52import qualified Utility.CoProcess as CoProcess
53import qualified Git.BuildVersion as BuildVersion
54import Utility.Tuple
55
56data CatFileHandle = CatFileHandle
57	{ catFileProcess :: CoProcess.CoProcessHandle
58	, checkFileProcess :: CoProcess.CoProcessHandle
59	, gitRepo :: Repo
60	}
61
62catFileStart :: Repo -> IO CatFileHandle
63catFileStart = catFileStart' True
64
65catFileStart' :: Bool -> Repo -> IO CatFileHandle
66catFileStart' restartable repo = CatFileHandle
67	<$> startp "--batch"
68	<*> startp ("--batch-check=" ++ batchFormat)
69	<*> pure repo
70  where
71	startp p = gitCoProcessStart restartable
72		[ Param "cat-file"
73		, Param p
74		] repo
75
76batchFormat :: String
77batchFormat = "%(objectname) %(objecttype) %(objectsize)"
78
79catFileStop :: CatFileHandle -> IO ()
80catFileStop h = do
81	CoProcess.stop (catFileProcess h)
82	CoProcess.stop (checkFileProcess h)
83
84{- Reads a file from a specified branch. -}
85catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
86catFile h branch file = catObject h $
87	Git.Ref.branchFileRef branch file
88
89catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType))
90catFileDetails h branch file = catObjectDetails h $
91	Git.Ref.branchFileRef branch file
92
93{- Uses a running git cat-file read the content of an object.
94 - Objects that do not exist will have "" returned. -}
95catObject :: CatFileHandle -> Ref -> IO L.ByteString
96catObject h object = maybe L.empty fst3 <$> catObjectDetails h object
97
98catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
99catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do
100	header <- S8.hGetLine from
101	case parseResp object header of
102		Just r@(ParsedResp sha objtype _size) -> do
103			content <- readObjectContent from r
104			return $ Just (content, sha, objtype)
105		Just DNE -> return Nothing
106		Nothing -> error $ "unknown response from git cat-file " ++ show (header, object)
107  where
108	-- Slow fallback path for filenames containing newlines.
109	newlinefallback = queryObjectType object (gitRepo h) >>= \case
110		Nothing -> return Nothing
111		Just objtype -> queryContent object (gitRepo h) >>= \case
112			Nothing -> return Nothing
113			Just content -> do
114				-- only the --batch interface allows getting
115				-- the sha, so have to re-hash the object
116				sha <- hashObject' objtype
117					(flip L.hPut content)
118					(gitRepo h)
119				return (Just (content, sha, objtype))
120
121readObjectContent :: Handle -> ParsedResp -> IO L.ByteString
122readObjectContent h (ParsedResp _ _ size) = do
123	content <- S.hGet h (fromIntegral size)
124	eatchar '\n'
125	return (L.fromChunks [content])
126  where
127	eatchar expected = do
128		c <- hGetChar h
129		when (c /= expected) $
130			error $ "missing " ++ (show expected) ++ " from git cat-file"
131readObjectContent _ DNE = error "internal"
132
133{- Gets the size and type of an object, without reading its content. -}
134catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
135catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
136	resp <- S8.hGetLine from
137	case parseResp object resp of
138		Just (ParsedResp sha objtype size) ->
139			return $ Just (sha, size, objtype)
140		Just DNE -> return Nothing
141		Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object)
142  where
143	-- Slow fallback path for filenames containing newlines.
144	newlinefallback = do
145		sha <- Git.Ref.sha object (gitRepo h)
146		sz <- querySize object (gitRepo h)
147		objtype <- queryObjectType object (gitRepo h)
148		return $ (,,) <$> sha <*> sz <*> objtype
149
150data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE
151	deriving (Show)
152
153query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a
154query hdl object newlinefallback receive
155	-- git cat-file --batch uses a line based protocol, so when the
156	-- filename itself contains a newline, have to fall back to another
157	-- method of getting the information.
158	| '\n' `S8.elem` s = newlinefallback
159	-- git strips carriage return from the end of a line, out of some
160	-- misplaced desire to support windows, so also use the newline
161	-- fallback for those.
162	| "\r" `S8.isSuffixOf` s = newlinefallback
163	| otherwise = CoProcess.query hdl send receive
164  where
165	send to = S8.hPutStrLn to s
166	s = fromRef' object
167
168parseResp :: Ref -> S.ByteString -> Maybe ParsedResp
169parseResp object s
170	| " missing" `S.isSuffixOf` s -- less expensive than full check
171		&& s == fromRef' object <> " missing" = Just DNE
172	| otherwise = eitherToMaybe $ A.parseOnly respParser s
173
174respParser :: A.Parser ParsedResp
175respParser = ParsedResp
176	<$> (maybe (fail "bad sha") return . extractSha =<< nextword)
177	<* A8.char ' '
178	<*> (maybe (fail "bad object type") return . readObjectType =<< nextword)
179	<* A8.char ' '
180	<*> A8.decimal
181  where
182	nextword = A8.takeTill (== ' ')
183
184querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a)
185querySingle o r repo reader = assertLocal repo $
186	-- In non-batch mode, git cat-file warns on stderr when
187	-- asked for an object that does not exist.
188	-- Squelch that warning to behave the same as batch mode.
189	withNullHandle $ \nullh -> do
190		let p = gitCreateProcess
191			[ Param "cat-file"
192			, o
193			, Param (fromRef r)
194			] repo
195		let p' = p
196			{ std_err = UseHandle nullh
197			, std_in = Inherit
198			, std_out = CreatePipe
199			}
200		withCreateProcess p' go
201  where
202	go _ (Just outh) _ pid = do
203		output <- reader outh
204		hClose outh
205		ifM (checkSuccessProcess pid)
206			( return (Just output)
207			, return Nothing
208			)
209	go _ _ _ _ = error "internal"
210
211querySize :: Ref -> Repo -> IO (Maybe FileSize)
212querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n'))
213	<$> querySingle (Param "-s") r repo hGetContentsStrict
214
215queryObjectType :: Ref -> Repo -> IO (Maybe ObjectType)
216queryObjectType r repo = maybe Nothing (readObjectType . encodeBS . takeWhile (/= '\n'))
217	<$> querySingle (Param "-t") r repo hGetContentsStrict
218
219queryContent :: Ref -> Repo -> IO (Maybe L.ByteString)
220queryContent r repo = fmap (\b -> L.fromChunks [b])
221	<$> querySingle (Param "-p") r repo S.hGetContents
222
223{- Gets a list of files and directories in a tree. (Not recursive.) -}
224catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)]
225catTree h treeref = go <$> catObjectDetails h treeref
226  where
227	go (Just (b, _, TreeObject)) = parsetree [] b
228	go _ = []
229
230	parsetree c b = case L.break (== 0) b of
231		(modefile, rest)
232			| L.null modefile -> c
233			| otherwise -> parsetree
234				(parsemodefile modefile:c)
235				(dropsha rest)
236
237	-- these 20 bytes after the NUL hold the file's sha
238	dropsha = L.drop 21
239
240	parsemodefile b =
241		let (modestr, file) = separate (== ' ') (decodeBL b)
242		in (file, readmode modestr)
243	readmode = fromMaybe 0 . fmap fst . headMaybe . readOct
244
245catCommit :: CatFileHandle -> Ref -> IO (Maybe Commit)
246catCommit h commitref = go <$> catObjectDetails h commitref
247  where
248	go (Just (b, _, CommitObject)) = parseCommit (L.toStrict b)
249	go _ = Nothing
250
251parseCommit :: S.ByteString -> Maybe Commit
252parseCommit b = Commit
253	<$> (extractSha =<< field "tree")
254	<*> Just (maybe [] (mapMaybe extractSha) (fields "parent"))
255	<*> (parsemetadata <$> field "author")
256	<*> (parsemetadata <$> field "committer")
257	<*> Just (decodeBS $ S.intercalate (S.singleton nl) message)
258  where
259	field n = headMaybe =<< fields n
260	fields n = M.lookup (fromString n) fieldmap
261	fieldmap = M.fromListWith (++) ((map breakfield) header)
262	breakfield l =
263		let (k, sp_v) = S.break (== sp) l
264		in (k, [S.drop 1 sp_v])
265	(header, message) = separate S.null ls
266	ls = S.split nl b
267
268	-- author and committer lines have the form: "name <email> date"
269	-- The email is always present, even if empty "<>"
270	parsemetadata l = CommitMetaData
271		{ commitName = whenset $ S.init name_sp
272		, commitEmail = whenset email
273		, commitDate = whenset $ S.drop 2 gt_sp_date
274		}
275	  where
276		(name_sp, rest) = S.break (== lt) l
277		(email, gt_sp_date) = S.break (== gt) (S.drop 1 rest)
278		whenset v
279			| S.null v = Nothing
280			| otherwise = Just (decodeBS v)
281
282	nl = fromIntegral (ord '\n')
283	sp = fromIntegral (ord ' ')
284	lt = fromIntegral (ord '<')
285	gt = fromIntegral (ord '>')
286
287{- Uses cat-file to stream the contents of the files as efficiently
288 - as possible. This is much faster than querying it repeatedly per file.
289 -}
290catObjectStreamLsTree
291	:: (MonadMask m, MonadIO m)
292	=> [LsTree.TreeItem]
293	-> (LsTree.TreeItem -> Maybe v)
294	-> Repo
295	-> (IO (Maybe (v, Maybe L.ByteString)) -> m a)
296	-> m a
297catObjectStreamLsTree l want repo reader = withCatFileStream False repo $
298	\c hin hout -> bracketIO
299		(async $ feeder c hin)
300		cancel
301		(const (reader (catObjectReader readObjectContent c hout)))
302  where
303	feeder c h = do
304		forM_ l $ \ti -> case want ti of
305			Nothing -> return ()
306			Just v -> do
307				let sha = LsTree.sha ti
308				liftIO $ writeChan c (sha, v)
309				S8.hPutStrLn h (fromRef' sha)
310		hClose h
311
312catObjectStream
313	:: (MonadMask m, MonadIO m)
314	=> Repo
315	-> (
316	    ((v, Ref) -> IO ()) -- ^ call to feed values in
317	    -> IO () -- call once all values are fed in
318	    -> IO (Maybe (v, Maybe L.ByteString)) -- call to read results
319	    -> m a
320	   )
321	-> m a
322catObjectStream repo a = withCatFileStream False repo go
323  where
324	go c hin hout = a
325		(feeder c hin)
326		(hClose hin)
327		(catObjectReader readObjectContent c hout)
328	feeder c h (v, ref) = do
329		writeChan c (ref, v)
330		S8.hPutStrLn h (fromRef' ref)
331
332catObjectMetaDataStream
333	:: (MonadMask m, MonadIO m)
334	=> Repo
335	-> (
336	    ((v, Ref) -> IO ()) -- ^ call to feed values in
337	    -> IO () -- call once all values are fed in
338	    -> IO (Maybe (v, Maybe (Sha, FileSize, ObjectType))) -- call to read results
339	    -> m a
340	   )
341	-> m a
342catObjectMetaDataStream repo a = withCatFileStream True repo go
343  where
344	go c hin hout = a
345		(feeder c hin)
346		(hClose hin)
347		(catObjectReader (\_h r -> pure (conv r)) c hout)
348
349	feeder c h (v, ref) = do
350		liftIO $ writeChan c (ref, v)
351		S8.hPutStrLn h (fromRef' ref)
352
353	conv (ParsedResp sha ty sz) = (sha, sz, ty)
354	conv DNE = error "internal"
355
356catObjectReader
357	:: (Handle -> ParsedResp -> IO t)
358	-> Chan (Ref, a)
359	-> Handle
360	-> IO (Maybe (a, Maybe t))
361catObjectReader getv c h = ifM (hIsEOF h)
362	( return Nothing
363	, do
364		(ref, f) <- liftIO $ readChan c
365		resp <- S8.hGetLine h
366		case parseResp ref resp of
367			Just r@(ParsedResp {}) -> do
368				v <- getv h r
369				return (Just (f, Just v))
370			Just DNE -> return (Just (f, Nothing))
371			Nothing -> error $ "unknown response from git cat-file " ++ show resp
372	)
373
374withCatFileStream
375	:: (MonadMask m, MonadIO m)
376	=> Bool
377	-> Repo
378	-> (Chan v -> Handle -> Handle -> m a)
379	-> m a
380withCatFileStream check repo reader = assertLocal repo $
381	bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout
382  where
383	params = catMaybes
384		[ Just $ Param "cat-file"
385		, Just $ Param ("--batch" ++ (if check then "-check" else "") ++ "=" ++ batchFormat)
386		-- This option makes it faster, but is not present in
387		-- older versions of git.
388		, if BuildVersion.older "2.4.3"
389			then Nothing
390			else Just $ Param "--buffer"
391		]
392
393	start = do
394		let p = gitCreateProcess params repo
395		(Just hin, Just hout, _, pid) <- createProcess p
396			{ std_in = CreatePipe
397			, std_out = CreatePipe
398			}
399		c <- newChan
400		return (c, hin, hout, pid)
401
402	stop (_, hin, hout, pid) = do
403		hClose hin
404		hClose hout
405		void $ checkSuccessProcess pid
406