1{- Helper to make remotes support export and import (or not).
2 -
3 - Copyright 2017-2019 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8{-# LANGUAGE FlexibleInstances, FlexibleContexts #-}
9
10module Remote.Helper.ExportImport where
11
12import Annex.Common
13import Types.Remote
14import Types.Key
15import Types.ProposedAccepted
16import Annex.Verify
17import Remote.Helper.Encryptable (encryptionIsEnabled)
18import qualified Database.Export as Export
19import qualified Database.ContentIdentifier as ContentIdentifier
20import Annex.Export
21import Annex.LockFile
22import Annex.SpecialRemote.Config
23import Git.Types (fromRef)
24import Logs.Export
25import Logs.ContentIdentifier (recordContentIdentifier)
26
27import Control.Concurrent.STM
28
29-- | Use for remotes that do not support exports.
30class HasExportUnsupported a where
31	exportUnsupported :: a
32
33instance HasExportUnsupported (ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool) where
34	exportUnsupported = \_ _ -> return False
35
36instance HasExportUnsupported (ExportActions Annex) where
37	exportUnsupported = ExportActions
38		{ storeExport = nope
39		, retrieveExport = nope
40		, checkPresentExport = \_ _ -> return False
41		, removeExport = nope
42		, versionedExport = False
43		, removeExportDirectory = nope
44		, renameExport = \_ _ _ -> return Nothing
45		}
46	 where
47	 	nope = giveup "export not supported"
48
49-- | Use for remotes that do not support imports.
50class HasImportUnsupported a where
51	importUnsupported :: a
52
53instance HasImportUnsupported (ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool) where
54	importUnsupported = \_ _ -> return False
55
56instance HasImportUnsupported (ImportActions Annex) where
57	importUnsupported = ImportActions
58		{ listImportableContents = nope
59		, importKey = Nothing
60		, retrieveExportWithContentIdentifier = nope
61		, storeExportWithContentIdentifier = nope
62		, removeExportWithContentIdentifier = nope
63		, removeExportDirectoryWhenEmpty = nope
64		, checkPresentExportWithContentIdentifier = \_ _ _ -> return False
65		}
66	  where
67		nope = giveup "import not supported"
68
69exportIsSupported :: ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool
70exportIsSupported = \_ _ -> return True
71
72importIsSupported :: ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool
73importIsSupported = \_ _ -> return True
74
75-- | Prevent or allow exporttree=yes and importtree=yes when
76-- setting up a new remote, depending on the remote's capabilities.
77adjustExportImportRemoteType :: RemoteType -> RemoteType
78adjustExportImportRemoteType rt = rt { setup = setup' }
79  where
80	setup' st mu cp c gc = do
81		pc <- either giveup return . parseRemoteConfig c
82			=<< configParser rt c
83		let checkconfig supported configured configfield cont =
84			ifM (supported rt pc gc <&&> pure (not (thirdPartyPopulated rt)))
85				( case st of
86					Init
87						| configured pc && encryptionIsEnabled pc ->
88							giveup $ "cannot enable both encryption and " ++ fromProposedAccepted configfield
89						| otherwise -> cont
90					Enable oldc -> enable oldc pc configured configfield cont
91					AutoEnable oldc -> enable oldc pc configured configfield cont
92				, if configured pc
93					then giveup $ fromProposedAccepted configfield ++ " is not supported by this special remote"
94					else cont
95				)
96		checkconfig exportSupported exportTree exportTreeField $
97			checkconfig importSupported importTree importTreeField $
98				setup rt st mu cp c gc
99
100	enable oldc pc configured configfield cont = do
101		oldpc <- parsedRemoteConfig rt oldc
102		if configured pc /= configured oldpc
103			then giveup $ "cannot change " ++ fromProposedAccepted configfield ++ " of existing special remote"
104			else cont
105
106-- | Adjust a remote to support exporttree=yes and/or importree=yes.
107adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote
108adjustExportImport r rs = do
109	isexport <- pure (exportTree (config r))
110		<&&> isExportSupported r
111	-- When thirdPartyPopulated is True, the remote
112	-- does not need to be configured with importTree to support
113	-- imports.
114	isimport <- pure (importTree (config r) || thirdPartyPopulated (remotetype r))
115		<&&> isImportSupported r
116	let r' = r
117		{ remotetype = (remotetype r)
118			{ exportSupported = if isexport
119				then exportSupported (remotetype r)
120				else exportUnsupported
121			, importSupported = if isimport
122				then importSupported (remotetype r)
123				else importUnsupported
124			}
125		}
126	if not isexport && not isimport
127		then return r'
128		else adjustExportImport' isexport isimport r' rs
129
130adjustExportImport' :: Bool -> Bool -> Remote -> RemoteStateHandle -> Annex Remote
131adjustExportImport' isexport isimport r rs = do
132	dbv <- prepdbv
133	ciddbv <- prepciddb
134	let versioned = versionedExport (exportActions r)
135	return $ r
136		{ exportActions = if isexport
137			then if isimport
138				then exportActionsForImport dbv ciddbv (exportActions r)
139				else exportActions r
140			else exportUnsupported
141		, importActions = if isimport
142			then importActions r
143			else importUnsupported
144		, storeKey = \k af p ->
145			-- Storing a key on an export could be implemented,
146			-- but it would perform unncessary work
147			-- when another repository has already stored the
148			-- key, and the local repository does not know
149			-- about it. To avoid unnecessary costs, don't do it.
150			if thirdpartypopulated
151				then giveup "remote is not populated by git-annex"
152				else if isexport
153					then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
154					else if isimport
155						then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
156						else storeKey r k af p
157		, removeKey = \k ->
158			-- Removing a key from an export would need to
159			-- change the tree in the export log to not include
160			-- the file. Otherwise, conflicts when removing
161			-- files would not be dealt with correctly.
162			-- There does not seem to be a good use case for
163			-- removing a key from an export in any case.
164			if thirdpartypopulated
165				then giveup "dropping content from this remote is not supported"
166				else if isexport
167					then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
168					else if isimport
169						then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
170						else removeKey r k
171		, lockContent = if versioned
172			then lockContent r
173			else Nothing
174		, retrieveKeyFile = \k af dest p vc ->
175			if isimport
176				then supportversionedretrieve k af dest p vc $
177					retrieveKeyFileFromImport dbv ciddbv k af dest p
178				else if isexport
179					then supportversionedretrieve k af dest p vc $
180						retrieveKeyFileFromExport dbv k af dest p
181					else retrieveKeyFile r k af dest p vc
182		, retrieveKeyFileCheap = if versioned
183			then retrieveKeyFileCheap r
184			else Nothing
185		, checkPresent = \k -> if versioned
186			then checkPresent r k
187			else if isimport
188				then anyM (checkPresentImport ciddbv k)
189					=<< getanyexportlocs dbv k
190				else if isexport
191					-- Check if any of the files a key
192					-- was exported to are present. This
193					-- doesn't guarantee the export
194					-- contains the right content,
195					-- if the remote is an export,
196					-- or if something else can write
197					-- to it. Remotes that have such
198					-- problems are made untrusted,
199					-- so it's not worried about here.
200					then anyM (checkPresentExport (exportActions r) k)
201						=<< getanyexportlocs dbv k
202					else checkPresent r k
203		-- checkPresent from an export is more expensive
204		-- than otherwise, so not cheap. Also, this
205		-- avoids things that look at checkPresentCheap and
206		-- silently skip non-present files from behaving
207		-- in confusing ways when there's an export
208		-- conflict (or an import conflict).
209		, checkPresentCheap = False
210		-- Export/import remotes can lose content stored on them in
211		-- many ways. This is not a problem with versioned
212		-- ones though, since they still allow accessing by Key.
213		-- And for thirdPartyPopulated, it depends on how the
214		-- content gets actually stored in the remote, so
215		-- is not overriddden here.
216		, untrustworthy =
217			if versioned || thirdPartyPopulated (remotetype r)
218				then untrustworthy r
219				else False
220		-- git-annex testremote cannot be used to test
221		-- import/export since it stores keys.
222		, mkUnavailable = return Nothing
223		, getInfo = do
224			is <- getInfo r
225			is' <- if isexport && not thirdpartypopulated
226				then do
227					ts <- map fromRef . exportedTreeishes
228						<$> getExport (uuid r)
229					return (is++[("exporttree", "yes"), ("exportedtree", unwords ts)])
230				else return is
231			return $ if isimport && not thirdpartypopulated
232				then (is'++[("importtree", "yes")])
233				else is'
234		}
235  where
236	thirdpartypopulated = thirdPartyPopulated (remotetype r)
237
238	-- exportActions adjusted to use the equivilant import actions,
239	-- which take ContentIdentifiers into account.
240	exportActionsForImport dbv ciddbv ea = ea
241  		{ storeExport = \f k loc p -> do
242			db <- getciddb ciddbv
243			exportdb <- getexportdb dbv
244			oldks <- liftIO $ Export.getExportTreeKey exportdb loc
245			oldcids <- liftIO $ concat
246				<$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks
247			newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p
248			withExclusiveLock gitAnnexContentIdentifierLock $ do
249				liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k
250				liftIO $ ContentIdentifier.flushDbQueue db
251			recordContentIdentifier rs newcid k
252		, removeExport = \k loc ->
253			removeExportWithContentIdentifier (importActions r) k loc
254				=<< getkeycids ciddbv k
255		, removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r)
256		-- renameExport is optional, and the remote's
257		-- implementation may lose modifications to the file
258		-- (by eg copying and then deleting) so don't use it
259		, renameExport = \_ _ _ -> return Nothing
260		, checkPresentExport = checkPresentImport ciddbv
261		}
262
263	prepciddb = do
264		lcklckv <- liftIO newEmptyTMVarIO
265		dbtv <- liftIO newEmptyTMVarIO
266		return (dbtv, lcklckv)
267
268	prepdbv = do
269		lcklckv <- liftIO newEmptyTMVarIO
270		dbv <- liftIO newEmptyTMVarIO
271		exportinconflict <- liftIO $ newTVarIO False
272		return (dbv, lcklckv, exportinconflict)
273
274	-- Only open the database once it's needed.
275	getciddb (dbtv, lcklckv) =
276		liftIO (atomically (tryReadTMVar dbtv)) >>= \case
277			Just db -> return db
278			-- let only one thread take the lock
279			Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
280				( do
281					db <- ContentIdentifier.openDb
282					ContentIdentifier.needsUpdateFromLog db >>= \case
283						Just v -> withExclusiveLock gitAnnexContentIdentifierLock $
284							ContentIdentifier.updateFromLog db v
285						Nothing -> noop
286					liftIO $ atomically $ putTMVar dbtv db
287					return db
288				-- loser waits for winner to open the db and
289				-- can then also use its handle
290				, liftIO $ atomically (readTMVar dbtv)
291				)
292
293	-- Only open the database once it's needed.
294	--
295	-- After opening the database, check if the export log is
296	-- different than the database, and update the database, to notice
297	-- when an export has been updated from another repository.
298	getexportdb (dbv, lcklckv, exportinconflict) =
299		liftIO (atomically (tryReadTMVar dbv)) >>= \case
300			Just db -> return db
301			-- let only one thread take the lock
302			Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
303				( do
304					db <- Export.openDb (uuid r)
305					updateexportdb db exportinconflict
306					liftIO $ atomically $ putTMVar dbv db
307					return db
308				-- loser waits for winner to open the db and
309				-- can then also use its handle
310				, liftIO $ atomically (readTMVar dbv)
311				)
312
313	getexportinconflict (_, _, v) = v
314
315	updateexportdb db exportinconflict =
316		Export.updateExportTreeFromLog db >>= \case
317			Export.ExportUpdateSuccess -> return ()
318			Export.ExportUpdateConflict -> do
319				warnExportImportConflict r
320				liftIO $ atomically $
321					writeTVar exportinconflict True
322
323	getanyexportlocs dbv k = do
324		db <- getexportdb dbv
325		liftIO $ Export.getExportTree db k
326
327	getfirstexportloc dbv k = do
328		getexportlocs dbv k >>= \case
329			[] -> giveup "unknown export location"
330			(l:_) -> return l
331
332	getexportlocs dbv k = do
333		db <- getexportdb dbv
334		liftIO $ Export.getExportTree db k >>= \case
335			[] -> ifM (atomically $ readTVar $ getexportinconflict dbv)
336				( giveup "unknown export location, likely due to the export conflict"
337				, return []
338				)
339			ls -> return ls
340
341	getkeycids ciddbv k = do
342		db <- getciddb ciddbv
343		liftIO $ ContentIdentifier.getContentIdentifiers db rs k
344
345	-- Keys can be retrieved using retrieveExport, but since that
346	-- retrieves from a path in the remote that another writer could
347	-- have replaced with content not of the requested key, the content
348	-- has to be strongly verified.
349	retrieveKeyFileFromExport dbv k _af dest p = ifM (isVerifiable k)
350		( do
351			l <- getfirstexportloc dbv k
352			retrieveExport (exportActions r) k l dest p
353			return MustVerify
354		, giveup $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend"
355		)
356
357	retrieveKeyFileFromImport dbv ciddbv k af dest p =
358		getkeycids ciddbv k >>= \case
359			(cid:_) -> do
360				l <- getfirstexportloc dbv k
361				void $ retrieveExportWithContentIdentifier (importActions r) l cid dest (pure k) p
362				return UnVerified
363			-- In case a content identifier is somehow missing,
364			-- try this instead.
365			[] -> if isexport
366				then retrieveKeyFileFromExport dbv k af dest p
367				else giveup "no content identifier is recorded, unable to retrieve"
368
369	-- versionedExport remotes have a key/value store, so can use
370	-- the usual retrieveKeyFile, rather than an import/export
371	-- variant. However, fall back to that if retrieveKeyFile fails.
372	supportversionedretrieve k af dest p vc a
373		| versionedExport (exportActions r) =
374			retrieveKeyFile r k af dest p vc
375				`catchNonAsync` const a
376		| otherwise = a
377
378	checkPresentImport ciddbv k loc =
379		checkPresentExportWithContentIdentifier
380			(importActions r)
381			k loc
382			=<< getkeycids ciddbv k
383