1{- Using borg as a remote.
2 -
3 - Copyright 2020,2021 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8{-# LANGUAGE OverloadedStrings #-}
9
10module Remote.Borg (remote) where
11
12import Annex.Common
13import Types.Remote
14import Types.Creds
15import Types.Import
16import qualified Git
17import qualified Git.LsTree as LsTree
18import Git.Types (toTreeItemType, TreeItemType(..))
19import Git.FilePath
20import Config
21import Config.Cost
22import Annex.Tmp
23import Annex.SpecialRemote.Config
24import Remote.Helper.Special
25import Remote.Helper.ExportImport
26import Annex.UUID
27import Types.ProposedAccepted
28import Utility.Metered
29import Logs.Export
30import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated
31import Utility.Env
32
33import Data.Either
34import Text.Read
35import Control.Exception (evaluate)
36import Control.DeepSeq
37import qualified Data.Map as M
38import qualified Data.ByteString as S
39import qualified Data.ByteString.Lazy as L
40import qualified System.FilePath.ByteString as P
41
42newtype BorgRepo = BorgRepo { locBorgRepo :: String }
43
44type BorgArchiveName = S.ByteString
45
46remote :: RemoteType
47remote = RemoteType
48	{ typename = "borg"
49	, enumerate = const (findSpecialRemotes "borgrepo")
50	, generate = gen
51	, configParser = mkRemoteConfigParser
52		[ optionalStringParser borgrepoField
53			(FieldDesc "(required) borg repository to use")
54		, optionalStringParser subdirField
55			(FieldDesc "limit to a subdirectory of the borg repository")
56		, yesNoParser appendonlyField (Just False)
57			(FieldDesc "you will not use borg to delete from the repository")
58		]
59	, setup = borgSetup
60	, exportSupported = exportUnsupported
61	, importSupported = importIsSupported
62	, thirdPartyPopulated = True
63	}
64
65borgrepoField :: RemoteConfigField
66borgrepoField = Accepted "borgrepo"
67
68subdirField :: RemoteConfigField
69subdirField = Accepted "subdir"
70
71appendonlyField :: RemoteConfigField
72appendonlyField = Accepted "appendonly"
73
74gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
75gen r u rc gc rs = do
76	c <- parsedRemoteConfig remote rc
77	cst <- remoteCost gc $
78		if borgLocal borgrepo
79			then nearlyCheapRemoteCost
80			else expensiveRemoteCost
81	return $ Just $ Remote
82		{ uuid = u
83		, cost = cst
84		, name = Git.repoDescribe r
85		, storeKey = storeKeyDummy
86		, retrieveKeyFile = retrieveKeyFileDummy
87		, retrieveKeyFileCheap = Nothing
88		-- Borg cryptographically verifies content.
89		, retrievalSecurityPolicy = RetrievalAllKeysSecure
90		, removeKey = removeKeyDummy
91		, lockContent = Nothing
92		, checkPresent = checkPresentDummy
93		, checkPresentCheap = borgLocal borgrepo
94		, exportActions = exportUnsupported
95		, importActions = ImportActions
96			{ listImportableContents = listImportableContentsM u borgrepo c
97			, importKey = Just ThirdPartyPopulated.importKey
98			, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM borgrepo
99			, checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierM borgrepo
100			-- This remote is thirdPartyPopulated, so these
101			-- actions will never be used.
102			, storeExportWithContentIdentifier = storeExportWithContentIdentifier importUnsupported
103			, removeExportDirectoryWhenEmpty = removeExportDirectoryWhenEmpty importUnsupported
104			, removeExportWithContentIdentifier = removeExportWithContentIdentifier importUnsupported
105			}
106		, whereisKey = Nothing
107		, remoteFsck = Nothing
108		, repairRepo = Nothing
109		, config = c
110		, getRepo = return r
111		, gitconfig = gc
112		, localpath = borgRepoLocalPath borgrepo
113		, remotetype = remote
114		, availability = if borgLocal borgrepo then LocallyAvailable else GloballyAvailable
115		, readonly = False
116		, appendonly = False
117		-- When the user sets the appendonly field, they are
118		-- promising not to delete content out from under git-annex
119		-- using borg, so the remote is not untrustworthy.
120		, untrustworthy = maybe True not $
121			getRemoteConfigValue appendonlyField c
122		, mkUnavailable = return Nothing
123		, getInfo = return [("repo", locBorgRepo borgrepo)]
124		, claimUrl = Nothing
125		, checkUrl = Nothing
126		, remoteStateHandle = rs
127		}
128  where
129	borgrepo = maybe
130		(giveup "missing borgrepo")
131		BorgRepo
132		(remoteAnnexBorgRepo gc)
133
134borgSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
135borgSetup _ mu _ c _gc = do
136	u <- maybe (liftIO genUUID) return mu
137
138	-- verify configuration is sane
139	let borgrepo = maybe (giveup "Specify borgrepo=") fromProposedAccepted $
140		M.lookup borgrepoField c
141
142	-- The borgrepo is stored in git config, as well as this repo's
143	-- persistant state, so it can vary between hosts.
144	gitConfigSpecialRemote u c [("borgrepo", borgrepo)]
145
146	return (c, u)
147
148borgLocal :: BorgRepo -> Bool
149borgLocal (BorgRepo r) = notElem ':' r
150
151borgArchive :: BorgRepo -> BorgArchiveName -> String
152borgArchive (BorgRepo r) n = r ++ "::" ++ decodeBS n
153
154absBorgRepo :: BorgRepo -> IO BorgRepo
155absBorgRepo r@(BorgRepo p)
156	| borgLocal r = BorgRepo . fromRawFilePath
157		<$> absPath (toRawFilePath p)
158	| otherwise = return r
159
160borgRepoLocalPath :: BorgRepo -> Maybe FilePath
161borgRepoLocalPath r@(BorgRepo p)
162	| borgLocal r && not (null p) = Just p
163	| otherwise = Nothing
164
165listImportableContentsM :: UUID -> BorgRepo -> ParsedRemoteConfig -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
166listImportableContentsM u borgrepo c = prompt $ do
167	imported <- getImported u
168	ls <- withborglist (locBorgRepo borgrepo) Nothing formatarchivelist $ \as ->
169		forM (filter (not . S.null) as) $ \archivename ->
170			case M.lookup archivename imported of
171				Just getfast -> return $ Left (archivename, getfast)
172				Nothing -> Right <$>
173					let archive = borgArchive borgrepo archivename
174					in withborglist archive subdir formatfilelist $
175						liftIO . evaluate . force . parsefilelist archivename
176	if all isLeft ls && M.null (M.difference imported (M.fromList (lefts ls)))
177		then return Nothing -- unchanged since last time, avoid work
178		else Just . mkimportablecontents <$> mapM (either snd pure) ls
179  where
180	withborglist what addparam format a = do
181		environ <- liftIO getEnvironment
182		let p = proc "borg" $ toCommand $ catMaybes
183			[ Just (Param "list")
184			, Just (Param "--format")
185			, Just (Param format)
186			, Just (Param what)
187			, addparam
188			]
189		(Nothing, Just h, Nothing, pid) <- liftIO $ createProcess $ p
190			{ std_out = CreatePipe
191			-- Run in C locale because the file list can
192			-- include some possibly translatable text in the
193			-- "extra" field.
194			, env = Just (addEntry "LANG" "C" environ)
195			}
196		l <- liftIO $ map L.toStrict
197			. L.split 0
198			<$> L.hGetContents h
199		let cleanup = liftIO $ do
200			hClose h
201			forceSuccessProcess p pid
202		a l `finally` cleanup
203
204	formatarchivelist = "{barchive}{NUL}"
205
206	formatfilelist = "{size}{NUL}{path}{NUL}{extra}{NUL}"
207
208	subdir = File <$> getRemoteConfigValue subdirField c
209
210	parsefilelist archivename (bsz:f:extra:rest) = case readMaybe (fromRawFilePath bsz) of
211		Nothing -> parsefilelist archivename rest
212		Just sz ->
213			let loc = genImportLocation archivename f
214			-- borg list reports hard links as 0 byte files,
215			-- with the extra field set to " link to ".
216			-- When the annex object is a hard link to
217			-- something else, we'll assume it has not been
218			-- modified, since usually git-annex does prevent
219			-- this. Since the 0 byte size is not the actual
220			-- size,  report the key size instead, when available.
221			    (reqsz, retsz) = case extra of
222				" link to " -> (Nothing, fromMaybe sz . fromKey keySize)
223				_ -> (Just sz, const sz)
224			-- This does a little unncessary work to parse the
225			-- key, which is then thrown away. But, it lets the
226			-- file list be shrank down to only the ones that are
227			-- importable keys, so avoids needing to buffer all
228			-- the rest of the files in memory.
229			in case ThirdPartyPopulated.importKey' loc reqsz of
230				Just k -> (loc, (borgContentIdentifier, retsz k))
231					: parsefilelist archivename rest
232				Nothing -> parsefilelist archivename rest
233	parsefilelist _ _ = []
234
235	-- importableHistory is not used for retrieval, so is not
236	-- populated with old archives. Instead, a tree of archives
237	-- is constructed, by genImportLocation including the archive
238	-- name in the ImportLocation.
239	mkimportablecontents l = ImportableContents
240		{ importableContents = concat l
241		, importableHistory = []
242		}
243
244-- We do not need a ContentIdentifier in order to retrieve a file from
245-- borg; the ImportLocation contains all that's needed. So, this is left
246-- empty.
247borgContentIdentifier :: ContentIdentifier
248borgContentIdentifier = ContentIdentifier mempty
249
250-- Borg does not allow / in the name of an archive, so the archive
251-- name will always be the first directory in the ImportLocation.
252--
253-- Paths in a borg archive are always relative, not absolute, so the use of
254-- </> to combine the archive name with the path will always work.
255genImportLocation :: BorgArchiveName -> RawFilePath -> ImportLocation
256genImportLocation archivename p  =
257	ThirdPartyPopulated.mkThirdPartyImportLocation $
258		archivename P.</> p
259
260extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath)
261extractImportLocation loc = go $ P.splitDirectories $
262	ThirdPartyPopulated.fromThirdPartyImportLocation loc
263  where
264	go (archivename:rest) = (archivename, P.joinPath rest)
265	go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc)
266
267-- Since the ImportLocation starts with the archive name, a list of all
268-- archive names we've already imported can be found by just listing the
269-- last imported tree. And the contents of those archives can be retrieved
270-- by listing the subtree recursively, which will likely be quite a lot
271-- faster than running borg.
272getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (ContentIdentifier, ByteSize))]))
273getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
274  where
275	go t = M.fromList . mapMaybe mk
276		<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive (LsTree.LsTreeLong False) t)
277
278	mk ti
279		| toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just
280			( getTopFilePath (LsTree.file ti)
281			, getcontents
282				(getTopFilePath (LsTree.file ti))
283				(LsTree.sha ti)
284			)
285		| otherwise = Nothing
286
287	getcontents archivename t = mapMaybe (mkcontents archivename)
288		<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive (LsTree.LsTreeLong False) t)
289
290	mkcontents archivename ti = do
291		let f = ThirdPartyPopulated.fromThirdPartyImportLocation $
292			mkImportLocation $ getTopFilePath $ LsTree.file ti
293		k <- fileKey (P.takeFileName f)
294		return
295			( genImportLocation archivename f
296			,
297				( borgContentIdentifier
298				-- defaulting to 0 size is ok, this size
299				-- only gets used by
300				-- ThirdPartyPopulated.importKey,
301				-- which ignores the size when the key
302				-- does not have a size.
303				, fromMaybe 0 (fromKey keySize k)
304				)
305			)
306
307-- Check if the file is still there in the borg archive.
308-- Does not check that the content is unchanged; we assume that
309-- the content of files in borg archives does not change, which is normally
310-- the case. But archives may be deleted, and files may be deleted.
311checkPresentExportWithContentIdentifierM :: BorgRepo -> Key -> ImportLocation -> [ContentIdentifier] -> Annex Bool
312checkPresentExportWithContentIdentifierM borgrepo _ loc _ = prompt $ liftIO $ do
313	let p = proc "borg" $ toCommand
314		[ Param "list"
315		, Param "--format"
316		, Param "1"
317		, Param (borgArchive borgrepo archivename)
318		, File (fromRawFilePath archivefile)
319		]
320	-- borg list exits nonzero with an error message if an archive
321	-- no longer exists. But, the user can delete archives at any
322	-- time they want. So, hide errors, and if it exists nonzero,
323	-- check if the borg repository still exists, and only throw an
324	-- exception if not.
325	(Nothing, Just h, Nothing, pid) <- withNullHandle $ \nullh ->
326		createProcess $ p
327			{ std_out = CreatePipe
328			, std_err = UseHandle nullh
329			}
330	ok <- (== "1") <$> hGetContentsStrict h
331	hClose h
332	ifM (checkSuccessProcess pid)
333		( return ok
334		, checkrepoexists
335		)
336  where
337	(archivename, archivefile) = extractImportLocation loc
338
339	checkrepoexists = do
340		let p = proc "borg" $ toCommand
341			[ Param "list"
342			, Param "--format"
343			, Param "1"
344			, Param (locBorgRepo borgrepo)
345			]
346		(Nothing, Nothing, Nothing, pid) <- withNullHandle $ \nullh ->
347			createProcess $ p
348				{ std_out = UseHandle nullh }
349		ifM (checkSuccessProcess pid)
350			( return False -- repo exists, content not in it
351			, giveup $ "Unable to access borg repository " ++ locBorgRepo borgrepo
352			)
353
354retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
355retrieveExportWithContentIdentifierM borgrepo loc _ dest mkk _ = do
356	showOutput
357	prompt $ withOtherTmp $ \othertmp -> liftIO $ do
358		-- borgrepo could be relative, and borg has to be run
359		-- in the temp directory to get it to write there
360		absborgrepo <- absBorgRepo borgrepo
361		let p = proc "borg" $ toCommand
362			[ Param "extract"
363			, Param (borgArchive absborgrepo archivename)
364			, File (fromRawFilePath archivefile)
365			]
366		(Nothing, Nothing, Nothing, pid) <- createProcess $ p
367			{ cwd = Just (fromRawFilePath othertmp) }
368		forceSuccessProcess p pid
369		-- Filepaths in borg archives are relative, so it's ok to
370		-- combine with </>
371		moveFile (fromRawFilePath othertmp </> fromRawFilePath archivefile) dest
372		removeDirectoryRecursive (fromRawFilePath othertmp)
373	mkk
374  where
375	(archivename, archivefile) = extractImportLocation loc
376