1{- git cat-file interface, with handle automatically stored in the Annex monad
2 -
3 - Copyright 2011-2020 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8{-# LANGUAGE BangPatterns #-}
9
10module Annex.CatFile (
11	catFile,
12	catFileDetails,
13	catObject,
14	catTree,
15	catCommit,
16	catObjectDetails,
17	withCatFileHandle,
18	catObjectMetaData,
19	catFileStop,
20	catKey,
21	catKey',
22	catSymLinkTarget,
23	catKeyFile,
24	catKeyFileHEAD,
25	catKeyFileHidden,
26	catObjectMetaDataHidden,
27) where
28
29import qualified Data.ByteString.Lazy as L
30import qualified Data.Map as M
31import System.PosixCompat.Types
32import Control.Concurrent.STM
33
34import Annex.Common
35import qualified Git
36import qualified Git.CatFile
37import qualified Annex
38import Git.Types
39import Git.FilePath
40import Git.Index
41import qualified Git.Ref
42import Annex.Link
43import Annex.CurrentBranch
44import Types.AdjustedBranch
45import Types.CatFileHandles
46import Utility.ResourcePool
47
48catFile :: Git.Branch -> RawFilePath -> Annex L.ByteString
49catFile branch file = withCatFileHandle $ \h ->
50	liftIO $ Git.CatFile.catFile h branch file
51
52catFileDetails :: Git.Branch -> RawFilePath -> Annex (Maybe (L.ByteString, Sha, ObjectType))
53catFileDetails branch file = withCatFileHandle $ \h ->
54	liftIO $ Git.CatFile.catFileDetails h branch file
55
56catObject :: Git.Ref -> Annex L.ByteString
57catObject ref = withCatFileHandle $ \h ->
58	liftIO $ Git.CatFile.catObject h ref
59
60catObjectMetaData :: Git.Ref -> Annex (Maybe (Sha, Integer, ObjectType))
61catObjectMetaData ref = withCatFileHandle $ \h ->
62	liftIO $ Git.CatFile.catObjectMetaData h ref
63
64catTree :: Git.Ref -> Annex [(FilePath, FileMode)]
65catTree ref = withCatFileHandle $ \h ->
66	liftIO $ Git.CatFile.catTree h ref
67
68catCommit :: Git.Ref -> Annex (Maybe Commit)
69catCommit ref = withCatFileHandle $ \h ->
70	liftIO $ Git.CatFile.catCommit h ref
71
72catObjectDetails :: Git.Ref -> Annex (Maybe (L.ByteString, Sha, ObjectType))
73catObjectDetails ref = withCatFileHandle $ \h ->
74	liftIO $ Git.CatFile.catObjectDetails h ref
75
76{- There can be multiple index files, and a different cat-file is needed
77 - for each. That is selected by setting GIT_INDEX_FILE in the gitEnv
78 - before running this. -}
79withCatFileHandle :: (Git.CatFile.CatFileHandle -> Annex a) -> Annex a
80withCatFileHandle a = do
81	cfh <- Annex.getState Annex.catfilehandles
82	indexfile <- fromMaybe "" . maybe Nothing (lookup indexEnv)
83		<$> fromRepo gitEnv
84	p <- case cfh of
85		CatFileHandlesNonConcurrent m -> case M.lookup indexfile m of
86			Just p -> return p
87			Nothing -> do
88				p <- mkResourcePoolNonConcurrent startcatfile
89				let !m' = M.insert indexfile p m
90				Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent m' }
91				return p
92		CatFileHandlesPool tm -> do
93			m <- liftIO $ atomically $ takeTMVar tm
94			case M.lookup indexfile m of
95				Just p -> do
96					liftIO $ atomically $ putTMVar tm m
97					return p
98				Nothing -> do
99					p  <- mkResourcePool maxCatFiles
100					let !m' = M.insert indexfile p m
101					liftIO $ atomically $ putTMVar tm m'
102					return p
103	withResourcePool p startcatfile a
104  where
105	startcatfile = inRepo Git.CatFile.catFileStart
106
107{- A lot of git cat-file processes are unlikely to improve concurrency,
108 - because a query to them takes only a little bit of CPU, and tends to be
109 - bottlenecked on disk. Also, they each open a number of files, so
110 - using too many might run out of file handles. So, only start a maximum
111 - of 2.
112 -
113 - Note that each different index file gets its own pool of cat-files;
114 - this is the size of each pool. In all, 4 times this many cat-files
115 - may end up running.
116 -}
117maxCatFiles :: Int
118maxCatFiles = 2
119
120{- Stops all running cat-files. Should only be run when it's known that
121 - nothing is using the handles, eg at shutdown. -}
122catFileStop :: Annex ()
123catFileStop = do
124	cfh <- Annex.getState Annex.catfilehandles
125	m <- case cfh of
126		CatFileHandlesNonConcurrent m -> do
127			Annex.changeState $ \s -> s { Annex.catfilehandles = CatFileHandlesNonConcurrent M.empty }
128			return m
129		CatFileHandlesPool tm ->
130			liftIO $ atomically $ swapTMVar tm M.empty
131	liftIO $ forM_ (M.elems m) $ \p ->
132		freeResourcePool p Git.CatFile.catFileStop
133
134{- From ref to a symlink or a pointer file, get the key. -}
135catKey :: Ref -> Annex (Maybe Key)
136catKey ref = catObjectMetaData ref >>= \case
137	Just (_, sz, _) -> catKey' ref sz
138	Nothing -> return Nothing
139
140catKey' :: Ref -> FileSize -> Annex (Maybe Key)
141catKey' ref sz
142	-- Avoid catting large files, that cannot be symlinks or
143	-- pointer files, which would require buffering their
144	-- content in memory, as well as a lot of IO.
145	| sz <= maxPointerSz =
146		parseLinkTargetOrPointer . L.toStrict <$> catObject ref
147catKey' _ _ = return Nothing
148
149{- Gets a symlink target. -}
150catSymLinkTarget :: Sha -> Annex RawFilePath
151catSymLinkTarget sha = fromInternalGitPath . L.toStrict <$> get
152  where
153	-- Avoid buffering the whole file content, which might be large.
154	-- 8192 is enough if it really is a symlink.
155	get = L.take 8192 <$> catObject sha
156
157{- From a file in the repository back to the key.
158 -
159 - Ideally, this should reflect the key that's staged in the index,
160 - not the key that's committed to HEAD. Unfortunately, git cat-file
161 - does not refresh the index file after it's started up, so things
162 - newly staged in the index won't show up. It does, however, notice
163 - when branches change.
164 -
165 - For command-line git-annex use, that doesn't matter. It's perfectly
166 - reasonable for things staged in the index after the currently running
167 - git-annex process to not be noticed by it. However, we do want to see
168 - what's in the index, since it may have uncommitted changes not in HEAD
169 -
170 - For the assistant, this is much more of a problem, since it commits
171 - files and then needs to be able to immediately look up their keys.
172 - OTOH, the assistant doesn't keep changes staged in the index for very
173 - long at all before committing them -- and it won't look at the keys
174 - of files until after committing them.
175 -
176 - So, this gets info from the index, unless running as a daemon.
177 -}
178catKeyFile :: RawFilePath -> Annex (Maybe Key)
179catKeyFile f = ifM (Annex.getState Annex.daemon)
180	( catKeyFileHEAD f
181	, catKey =<< liftIO (Git.Ref.fileRef f)
182	)
183
184catKeyFileHEAD :: RawFilePath -> Annex (Maybe Key)
185catKeyFileHEAD f = catKey =<< liftIO (Git.Ref.fileFromRef Git.Ref.headRef f)
186
187{- Look in the original branch from whence an adjusted branch is based
188 - to find the file. But only when the adjustment hides some files. -}
189catKeyFileHidden :: RawFilePath -> CurrBranch -> Annex (Maybe Key)
190catKeyFileHidden = hiddenCat catKey
191
192catObjectMetaDataHidden :: RawFilePath -> CurrBranch -> Annex (Maybe (Sha, Integer, ObjectType))
193catObjectMetaDataHidden = hiddenCat catObjectMetaData
194
195hiddenCat :: (Ref -> Annex (Maybe a)) -> RawFilePath -> CurrBranch -> Annex (Maybe a)
196hiddenCat a f (Just origbranch, Just adj)
197	| adjustmentHidesFiles adj =
198		a =<< liftIO (Git.Ref.fileFromRef origbranch f)
199hiddenCat _ _ _ = return Nothing
200