1{- git cat-file interface 2 - 3 - Copyright 2011-2020 Joey Hess <id@joeyh.name> 4 - 5 - Licensed under the GNU AGPL version 3 or higher. 6 -} 7 8{-# LANGUAGE OverloadedStrings #-} 9{-# LANGUAGE BangPatterns #-} 10 11module Git.CatFile ( 12 CatFileHandle, 13 catFileStart, 14 catFileStart', 15 catFileStop, 16 catFile, 17 catFileDetails, 18 catTree, 19 catCommit, 20 catObject, 21 catObjectDetails, 22 catObjectMetaData, 23 catObjectStreamLsTree, 24 catObjectStream, 25 catObjectMetaDataStream, 26) where 27 28import System.IO 29import qualified Data.ByteString as S 30import qualified Data.ByteString.Lazy as L 31import qualified Data.ByteString.Char8 as S8 32import qualified Data.Attoparsec.ByteString as A 33import qualified Data.Attoparsec.ByteString.Char8 as A8 34import qualified Data.Map.Strict as M 35import Data.String 36import Data.Char 37import Numeric 38import System.Posix.Types 39import Text.Read 40import Control.Concurrent.Async 41import Control.Concurrent.Chan 42import Control.Monad.IO.Class (MonadIO) 43 44import Common 45import Git 46import Git.Sha 47import qualified Git.Ref 48import Git.Command 49import Git.Types 50import Git.HashObject 51import qualified Git.LsTree as LsTree 52import qualified Utility.CoProcess as CoProcess 53import qualified Git.BuildVersion as BuildVersion 54import Utility.Tuple 55 56data CatFileHandle = CatFileHandle 57 { catFileProcess :: CoProcess.CoProcessHandle 58 , checkFileProcess :: CoProcess.CoProcessHandle 59 , gitRepo :: Repo 60 } 61 62catFileStart :: Repo -> IO CatFileHandle 63catFileStart = catFileStart' True 64 65catFileStart' :: Bool -> Repo -> IO CatFileHandle 66catFileStart' restartable repo = CatFileHandle 67 <$> startp "--batch" 68 <*> startp ("--batch-check=" ++ batchFormat) 69 <*> pure repo 70 where 71 startp p = gitCoProcessStart restartable 72 [ Param "cat-file" 73 , Param p 74 ] repo 75 76batchFormat :: String 77batchFormat = "%(objectname) %(objecttype) %(objectsize)" 78 79catFileStop :: CatFileHandle -> IO () 80catFileStop h = do 81 CoProcess.stop (catFileProcess h) 82 CoProcess.stop (checkFileProcess h) 83 84{- Reads a file from a specified branch. -} 85catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString 86catFile h branch file = catObject h $ 87 Git.Ref.branchFileRef branch file 88 89catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType)) 90catFileDetails h branch file = catObjectDetails h $ 91 Git.Ref.branchFileRef branch file 92 93{- Uses a running git cat-file read the content of an object. 94 - Objects that do not exist will have "" returned. -} 95catObject :: CatFileHandle -> Ref -> IO L.ByteString 96catObject h object = maybe L.empty fst3 <$> catObjectDetails h object 97 98catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType)) 99catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do 100 header <- S8.hGetLine from 101 case parseResp object header of 102 Just r@(ParsedResp sha objtype _size) -> do 103 content <- readObjectContent from r 104 return $ Just (content, sha, objtype) 105 Just DNE -> return Nothing 106 Nothing -> error $ "unknown response from git cat-file " ++ show (header, object) 107 where 108 -- Slow fallback path for filenames containing newlines. 109 newlinefallback = queryObjectType object (gitRepo h) >>= \case 110 Nothing -> return Nothing 111 Just objtype -> queryContent object (gitRepo h) >>= \case 112 Nothing -> return Nothing 113 Just content -> do 114 -- only the --batch interface allows getting 115 -- the sha, so have to re-hash the object 116 sha <- hashObject' objtype 117 (flip L.hPut content) 118 (gitRepo h) 119 return (Just (content, sha, objtype)) 120 121readObjectContent :: Handle -> ParsedResp -> IO L.ByteString 122readObjectContent h (ParsedResp _ _ size) = do 123 content <- S.hGet h (fromIntegral size) 124 eatchar '\n' 125 return (L.fromChunks [content]) 126 where 127 eatchar expected = do 128 c <- hGetChar h 129 when (c /= expected) $ 130 error $ "missing " ++ (show expected) ++ " from git cat-file" 131readObjectContent _ DNE = error "internal" 132 133{- Gets the size and type of an object, without reading its content. -} 134catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType)) 135catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do 136 resp <- S8.hGetLine from 137 case parseResp object resp of 138 Just (ParsedResp sha objtype size) -> 139 return $ Just (sha, size, objtype) 140 Just DNE -> return Nothing 141 Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object) 142 where 143 -- Slow fallback path for filenames containing newlines. 144 newlinefallback = do 145 sha <- Git.Ref.sha object (gitRepo h) 146 sz <- querySize object (gitRepo h) 147 objtype <- queryObjectType object (gitRepo h) 148 return $ (,,) <$> sha <*> sz <*> objtype 149 150data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE 151 deriving (Show) 152 153query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a 154query hdl object newlinefallback receive 155 -- git cat-file --batch uses a line based protocol, so when the 156 -- filename itself contains a newline, have to fall back to another 157 -- method of getting the information. 158 | '\n' `S8.elem` s = newlinefallback 159 -- git strips carriage return from the end of a line, out of some 160 -- misplaced desire to support windows, so also use the newline 161 -- fallback for those. 162 | "\r" `S8.isSuffixOf` s = newlinefallback 163 | otherwise = CoProcess.query hdl send receive 164 where 165 send to = S8.hPutStrLn to s 166 s = fromRef' object 167 168parseResp :: Ref -> S.ByteString -> Maybe ParsedResp 169parseResp object s 170 | " missing" `S.isSuffixOf` s -- less expensive than full check 171 && s == fromRef' object <> " missing" = Just DNE 172 | otherwise = eitherToMaybe $ A.parseOnly respParser s 173 174respParser :: A.Parser ParsedResp 175respParser = ParsedResp 176 <$> (maybe (fail "bad sha") return . extractSha =<< nextword) 177 <* A8.char ' ' 178 <*> (maybe (fail "bad object type") return . readObjectType =<< nextword) 179 <* A8.char ' ' 180 <*> A8.decimal 181 where 182 nextword = A8.takeTill (== ' ') 183 184querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a) 185querySingle o r repo reader = assertLocal repo $ 186 -- In non-batch mode, git cat-file warns on stderr when 187 -- asked for an object that does not exist. 188 -- Squelch that warning to behave the same as batch mode. 189 withNullHandle $ \nullh -> do 190 let p = gitCreateProcess 191 [ Param "cat-file" 192 , o 193 , Param (fromRef r) 194 ] repo 195 let p' = p 196 { std_err = UseHandle nullh 197 , std_in = Inherit 198 , std_out = CreatePipe 199 } 200 withCreateProcess p' go 201 where 202 go _ (Just outh) _ pid = do 203 output <- reader outh 204 hClose outh 205 ifM (checkSuccessProcess pid) 206 ( return (Just output) 207 , return Nothing 208 ) 209 go _ _ _ _ = error "internal" 210 211querySize :: Ref -> Repo -> IO (Maybe FileSize) 212querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n')) 213 <$> querySingle (Param "-s") r repo hGetContentsStrict 214 215queryObjectType :: Ref -> Repo -> IO (Maybe ObjectType) 216queryObjectType r repo = maybe Nothing (readObjectType . encodeBS . takeWhile (/= '\n')) 217 <$> querySingle (Param "-t") r repo hGetContentsStrict 218 219queryContent :: Ref -> Repo -> IO (Maybe L.ByteString) 220queryContent r repo = fmap (\b -> L.fromChunks [b]) 221 <$> querySingle (Param "-p") r repo S.hGetContents 222 223{- Gets a list of files and directories in a tree. (Not recursive.) -} 224catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)] 225catTree h treeref = go <$> catObjectDetails h treeref 226 where 227 go (Just (b, _, TreeObject)) = parsetree [] b 228 go _ = [] 229 230 parsetree c b = case L.break (== 0) b of 231 (modefile, rest) 232 | L.null modefile -> c 233 | otherwise -> parsetree 234 (parsemodefile modefile:c) 235 (dropsha rest) 236 237 -- these 20 bytes after the NUL hold the file's sha 238 dropsha = L.drop 21 239 240 parsemodefile b = 241 let (modestr, file) = separate (== ' ') (decodeBL b) 242 in (file, readmode modestr) 243 readmode = fromMaybe 0 . fmap fst . headMaybe . readOct 244 245catCommit :: CatFileHandle -> Ref -> IO (Maybe Commit) 246catCommit h commitref = go <$> catObjectDetails h commitref 247 where 248 go (Just (b, _, CommitObject)) = parseCommit (L.toStrict b) 249 go _ = Nothing 250 251parseCommit :: S.ByteString -> Maybe Commit 252parseCommit b = Commit 253 <$> (extractSha =<< field "tree") 254 <*> Just (maybe [] (mapMaybe extractSha) (fields "parent")) 255 <*> (parsemetadata <$> field "author") 256 <*> (parsemetadata <$> field "committer") 257 <*> Just (decodeBS $ S.intercalate (S.singleton nl) message) 258 where 259 field n = headMaybe =<< fields n 260 fields n = M.lookup (fromString n) fieldmap 261 fieldmap = M.fromListWith (++) ((map breakfield) header) 262 breakfield l = 263 let (k, sp_v) = S.break (== sp) l 264 in (k, [S.drop 1 sp_v]) 265 (header, message) = separate S.null ls 266 ls = S.split nl b 267 268 -- author and committer lines have the form: "name <email> date" 269 -- The email is always present, even if empty "<>" 270 parsemetadata l = CommitMetaData 271 { commitName = whenset $ S.init name_sp 272 , commitEmail = whenset email 273 , commitDate = whenset $ S.drop 2 gt_sp_date 274 } 275 where 276 (name_sp, rest) = S.break (== lt) l 277 (email, gt_sp_date) = S.break (== gt) (S.drop 1 rest) 278 whenset v 279 | S.null v = Nothing 280 | otherwise = Just (decodeBS v) 281 282 nl = fromIntegral (ord '\n') 283 sp = fromIntegral (ord ' ') 284 lt = fromIntegral (ord '<') 285 gt = fromIntegral (ord '>') 286 287{- Uses cat-file to stream the contents of the files as efficiently 288 - as possible. This is much faster than querying it repeatedly per file. 289 -} 290catObjectStreamLsTree 291 :: (MonadMask m, MonadIO m) 292 => [LsTree.TreeItem] 293 -> (LsTree.TreeItem -> Maybe v) 294 -> Repo 295 -> (IO (Maybe (v, Maybe L.ByteString)) -> m a) 296 -> m a 297catObjectStreamLsTree l want repo reader = withCatFileStream False repo $ 298 \c hin hout -> bracketIO 299 (async $ feeder c hin) 300 cancel 301 (const (reader (catObjectReader readObjectContent c hout))) 302 where 303 feeder c h = do 304 forM_ l $ \ti -> case want ti of 305 Nothing -> return () 306 Just v -> do 307 let sha = LsTree.sha ti 308 liftIO $ writeChan c (sha, v) 309 S8.hPutStrLn h (fromRef' sha) 310 hClose h 311 312catObjectStream 313 :: (MonadMask m, MonadIO m) 314 => Repo 315 -> ( 316 ((v, Ref) -> IO ()) -- ^ call to feed values in 317 -> IO () -- call once all values are fed in 318 -> IO (Maybe (v, Maybe L.ByteString)) -- call to read results 319 -> m a 320 ) 321 -> m a 322catObjectStream repo a = withCatFileStream False repo go 323 where 324 go c hin hout = a 325 (feeder c hin) 326 (hClose hin) 327 (catObjectReader readObjectContent c hout) 328 feeder c h (v, ref) = do 329 writeChan c (ref, v) 330 S8.hPutStrLn h (fromRef' ref) 331 332catObjectMetaDataStream 333 :: (MonadMask m, MonadIO m) 334 => Repo 335 -> ( 336 ((v, Ref) -> IO ()) -- ^ call to feed values in 337 -> IO () -- call once all values are fed in 338 -> IO (Maybe (v, Maybe (Sha, FileSize, ObjectType))) -- call to read results 339 -> m a 340 ) 341 -> m a 342catObjectMetaDataStream repo a = withCatFileStream True repo go 343 where 344 go c hin hout = a 345 (feeder c hin) 346 (hClose hin) 347 (catObjectReader (\_h r -> pure (conv r)) c hout) 348 349 feeder c h (v, ref) = do 350 liftIO $ writeChan c (ref, v) 351 S8.hPutStrLn h (fromRef' ref) 352 353 conv (ParsedResp sha ty sz) = (sha, sz, ty) 354 conv DNE = error "internal" 355 356catObjectReader 357 :: (Handle -> ParsedResp -> IO t) 358 -> Chan (Ref, a) 359 -> Handle 360 -> IO (Maybe (a, Maybe t)) 361catObjectReader getv c h = ifM (hIsEOF h) 362 ( return Nothing 363 , do 364 (ref, f) <- liftIO $ readChan c 365 resp <- S8.hGetLine h 366 case parseResp ref resp of 367 Just r@(ParsedResp {}) -> do 368 v <- getv h r 369 return (Just (f, Just v)) 370 Just DNE -> return (Just (f, Nothing)) 371 Nothing -> error $ "unknown response from git cat-file " ++ show resp 372 ) 373 374withCatFileStream 375 :: (MonadMask m, MonadIO m) 376 => Bool 377 -> Repo 378 -> (Chan v -> Handle -> Handle -> m a) 379 -> m a 380withCatFileStream check repo reader = assertLocal repo $ 381 bracketIO start stop $ \(c, hin, hout, _) -> reader c hin hout 382 where 383 params = catMaybes 384 [ Just $ Param "cat-file" 385 , Just $ Param ("--batch" ++ (if check then "-check" else "") ++ "=" ++ batchFormat) 386 -- This option makes it faster, but is not present in 387 -- older versions of git. 388 , if BuildVersion.older "2.4.3" 389 then Nothing 390 else Just $ Param "--buffer" 391 ] 392 393 start = do 394 let p = gitCreateProcess params repo 395 (Just hin, Just hout, _, pid) <- createProcess p 396 { std_in = CreatePipe 397 , std_out = CreatePipe 398 } 399 c <- newChan 400 return (c, hin, hout, pid) 401 402 stop (_, hin, hout, pid) = do 403 hClose hin 404 hClose hout 405 void $ checkSuccessProcess pid 406