1{- git cat-file interface, with handle automatically stored in the Annex monad 2 - 3 - Copyright 2011-2020 Joey Hess <id@joeyh.name> 4 - 5 - Licensed under the GNU AGPL version 3 or higher. 6 -} 7 8{-# LANGUAGE BangPatterns #-} 9 10module Annex.CatFile ( 11 catFile, 12 catFileDetails, 13 catObject, 14 catTree, 15 catCommit, 16 catObjectDetails, 17 withCatFileHandle, 18 catObjectMetaData, 19 catFileStop, 20 catKey, 21 catKey', 22 catSymLinkTarget, 23 catKeyFile, 24 catKeyFileHEAD, 25 catKeyFileHidden, 26 catObjectMetaDataHidden, 27) where 28 29import qualified Data.ByteString.Lazy as L 30import qualified Data.Map as M 31import System.PosixCompat.Types 32import Control.Concurrent.STM 33 34import Annex.Common 35import qualified Git 36import qualified Git.CatFile 37import qualified Annex 38import Git.Types 39import Git.FilePath 40import Git.Index 41import qualified Git.Ref 42import Annex.Link 43import Annex.CurrentBranch 44import Types.AdjustedBranch 45import Types.CatFileHandles 46import Utility.ResourcePool 47 48catFile :: Git.Branch -> RawFilePath -> Annex L.ByteString 49catFile branch file = withCatFileHandle $ \h -> 50 liftIO $ Git.CatFile.catFile h branch file 51 52catFileDetails :: Git.Branch -> RawFilePath -> Annex (Maybe (L.ByteString, Sha, ObjectType)) 53catFileDetails branch file = withCatFileHandle $ \h -> 54 liftIO $ Git.CatFile.catFileDetails h branch file 55 56catObject :: Git.Ref -> Annex L.ByteString 57catObject ref = withCatFileHandle $ \h -> 58 liftIO $ Git.CatFile.catObject h ref 59 60catObjectMetaData :: Git.Ref -> Annex (Maybe (Sha, Integer, ObjectType)) 61catObjectMetaData ref = withCatFileHandle $ \h -> 62 liftIO $ Git.CatFile.catObjectMetaData h ref 63 64catTree :: Git.Ref -> Annex [(FilePath, FileMode)] 65catTree ref = withCatFileHandle $ \h -> 66 liftIO $ Git.CatFile.catTree h ref 67 68catCommit :: Git.Ref -> Annex (Maybe Commit) 69catCommit ref = withCatFileHandle $ \h -> 70 liftIO $ Git.CatFile.catCommit h ref 71 72catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha, ObjectType)) 73catObjectDetails ref = withCatFileHandle $ \h -> 74 liftIO $ Git.CatFile.catObjectDetails h ref 75 76{- There can be multiple index files, and a different cat-file is needed 77 - for each. That is selected by setting GIT_INDEX_FILE in the gitEnv 78 - before running this. -} 79withCatFileHandle :: (Git.CatFile.CatFileHandle -> Annex a) -> Annex a 80withCatFileHandle a = do 81 cfh <- Annex.getState Annex.catfilehandles 82 indexfile <- fromMaybe "" . maybe Nothing (lookup indexEnv) 83 <$> fromRepo gitEnv 84 p <- case cfh of 85 CatFileHandlesNonConcurrent m -> case M.lookup indexfile m of 86 Just p -> return p 87 Nothing -> do 88 p <- mkResourcePoolNonConcurrent startcatfile 89 let !m' = M.insert indexfile p m 90 Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent m' } 91 return p 92 CatFileHandlesPool tm -> do 93 m <- liftIO $ atomically $ takeTMVar tm 94 case M.lookup indexfile m of 95 Just p -> do 96 liftIO $ atomically $ putTMVar tm m 97 return p 98 Nothing -> do 99 p <- mkResourcePool maxCatFiles 100 let !m' = M.insert indexfile p m 101 liftIO $ atomically $ putTMVar tm m' 102 return p 103 withResourcePool p startcatfile a 104 where 105 startcatfile = inRepo Git.CatFile.catFileStart 106 107{- A lot of git cat-file processes are unlikely to improve concurrency, 108 - because a query to them takes only a little bit of CPU, and tends to be 109 - bottlenecked on disk. Also, they each open a number of files, so 110 - using too many might run out of file handles. So, only start a maximum 111 - of 2. 112 - 113 - Note that each different index file gets its own pool of cat-files; 114 - this is the size of each pool. In all, 4 times this many cat-files 115 - may end up running. 116 -} 117maxCatFiles :: Int 118maxCatFiles = 2 119 120{- Stops all running cat-files. Should only be run when it's known that 121 - nothing is using the handles, eg at shutdown. -} 122catFileStop :: Annex () 123catFileStop = do 124 cfh <- Annex.getState Annex.catfilehandles 125 m <- case cfh of 126 CatFileHandlesNonConcurrent m -> do 127 Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent M.empty } 128 return m 129 CatFileHandlesPool tm -> 130 liftIO $ atomically $ swapTMVar tm M.empty 131 liftIO $ forM_ (M.elems m) $ \p -> 132 freeResourcePool p Git.CatFile.catFileStop 133 134{- From ref to a symlink or a pointer file, get the key. -} 135catKey :: Ref -> Annex (Maybe Key) 136catKey ref = catObjectMetaData ref >>= \case 137 Just (_, sz, _) -> catKey' ref sz 138 Nothing -> return Nothing 139 140catKey' :: Ref -> FileSize -> Annex (Maybe Key) 141catKey' ref sz 142 -- Avoid catting large files, that cannot be symlinks or 143 -- pointer files, which would require buffering their 144 -- content in memory, as well as a lot of IO. 145 | sz <= maxPointerSz = 146 parseLinkTargetOrPointer . L.toStrict <$> catObject ref 147catKey' _ _ = return Nothing 148 149{- Gets a symlink target. -} 150catSymLinkTarget :: Sha -> Annex RawFilePath 151catSymLinkTarget sha = fromInternalGitPath . L.toStrict <$> get 152 where 153 -- Avoid buffering the whole file content, which might be large. 154 -- 8192 is enough if it really is a symlink. 155 get = L.take 8192 <$> catObject sha 156 157{- From a file in the repository back to the key. 158 - 159 - Ideally, this should reflect the key that's staged in the index, 160 - not the key that's committed to HEAD. Unfortunately, git cat-file 161 - does not refresh the index file after it's started up, so things 162 - newly staged in the index won't show up. It does, however, notice 163 - when branches change. 164 - 165 - For command-line git-annex use, that doesn't matter. It's perfectly 166 - reasonable for things staged in the index after the currently running 167 - git-annex process to not be noticed by it. However, we do want to see 168 - what's in the index, since it may have uncommitted changes not in HEAD 169 - 170 - For the assistant, this is much more of a problem, since it commits 171 - files and then needs to be able to immediately look up their keys. 172 - OTOH, the assistant doesn't keep changes staged in the index for very 173 - long at all before committing them -- and it won't look at the keys 174 - of files until after committing them. 175 - 176 - So, this gets info from the index, unless running as a daemon. 177 -} 178catKeyFile :: RawFilePath -> Annex (Maybe Key) 179catKeyFile f = ifM (Annex.getState Annex.daemon) 180 ( catKeyFileHEAD f 181 , catKey =<< liftIO (Git.Ref.fileRef f) 182 ) 183 184catKeyFileHEAD :: RawFilePath -> Annex (Maybe Key) 185catKeyFileHEAD f = catKey =<< liftIO (Git.Ref.fileFromRef Git.Ref.headRef f) 186 187{- Look in the original branch from whence an adjusted branch is based 188 - to find the file. But only when the adjustment hides some files. -} 189catKeyFileHidden :: RawFilePath -> CurrBranch -> Annex (Maybe Key) 190catKeyFileHidden = hiddenCat catKey 191 192catObjectMetaDataHidden :: RawFilePath -> CurrBranch -> Annex (Maybe (Sha, Integer, ObjectType)) 193catObjectMetaDataHidden = hiddenCat catObjectMetaData 194 195hiddenCat :: (Ref -> Annex (Maybe a)) -> RawFilePath -> CurrBranch -> Annex (Maybe a) 196hiddenCat a f (Just origbranch, Just adj) 197 | adjustmentHidesFiles adj = 198 a =<< liftIO (Git.Ref.fileFromRef origbranch f) 199hiddenCat _ _ _ = return Nothing 200