1{- Using borg as a remote. 2 - 3 - Copyright 2020,2021 Joey Hess <id@joeyh.name> 4 - 5 - Licensed under the GNU AGPL version 3 or higher. 6 -} 7 8{-# LANGUAGE OverloadedStrings #-} 9 10module Remote.Borg (remote) where 11 12import Annex.Common 13import Types.Remote 14import Types.Creds 15import Types.Import 16import qualified Git 17import qualified Git.LsTree as LsTree 18import Git.Types (toTreeItemType, TreeItemType(..)) 19import Git.FilePath 20import Config 21import Config.Cost 22import Annex.Tmp 23import Annex.SpecialRemote.Config 24import Remote.Helper.Special 25import Remote.Helper.ExportImport 26import Annex.UUID 27import Types.ProposedAccepted 28import Utility.Metered 29import Logs.Export 30import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated 31import Utility.Env 32 33import Data.Either 34import Text.Read 35import Control.Exception (evaluate) 36import Control.DeepSeq 37import qualified Data.Map as M 38import qualified Data.ByteString as S 39import qualified Data.ByteString.Lazy as L 40import qualified System.FilePath.ByteString as P 41 42newtype BorgRepo = BorgRepo { locBorgRepo :: String } 43 44type BorgArchiveName = S.ByteString 45 46remote :: RemoteType 47remote = RemoteType 48 { typename = "borg" 49 , enumerate = const (findSpecialRemotes "borgrepo") 50 , generate = gen 51 , configParser = mkRemoteConfigParser 52 [ optionalStringParser borgrepoField 53 (FieldDesc "(required) borg repository to use") 54 , optionalStringParser subdirField 55 (FieldDesc "limit to a subdirectory of the borg repository") 56 , yesNoParser appendonlyField (Just False) 57 (FieldDesc "you will not use borg to delete from the repository") 58 ] 59 , setup = borgSetup 60 , exportSupported = exportUnsupported 61 , importSupported = importIsSupported 62 , thirdPartyPopulated = True 63 } 64 65borgrepoField :: RemoteConfigField 66borgrepoField = Accepted "borgrepo" 67 68subdirField :: RemoteConfigField 69subdirField = Accepted "subdir" 70 71appendonlyField :: RemoteConfigField 72appendonlyField = Accepted "appendonly" 73 74gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) 75gen r u rc gc rs = do 76 c <- parsedRemoteConfig remote rc 77 cst <- remoteCost gc $ 78 if borgLocal borgrepo 79 then nearlyCheapRemoteCost 80 else expensiveRemoteCost 81 return $ Just $ Remote 82 { uuid = u 83 , cost = cst 84 , name = Git.repoDescribe r 85 , storeKey = storeKeyDummy 86 , retrieveKeyFile = retrieveKeyFileDummy 87 , retrieveKeyFileCheap = Nothing 88 -- Borg cryptographically verifies content. 89 , retrievalSecurityPolicy = RetrievalAllKeysSecure 90 , removeKey = removeKeyDummy 91 , lockContent = Nothing 92 , checkPresent = checkPresentDummy 93 , checkPresentCheap = borgLocal borgrepo 94 , exportActions = exportUnsupported 95 , importActions = ImportActions 96 { listImportableContents = listImportableContentsM u borgrepo c 97 , importKey = Just ThirdPartyPopulated.importKey 98 , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM borgrepo 99 , checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierM borgrepo 100 -- This remote is thirdPartyPopulated, so these 101 -- actions will never be used. 102 , storeExportWithContentIdentifier = storeExportWithContentIdentifier importUnsupported 103 , removeExportDirectoryWhenEmpty = removeExportDirectoryWhenEmpty importUnsupported 104 , removeExportWithContentIdentifier = removeExportWithContentIdentifier importUnsupported 105 } 106 , whereisKey = Nothing 107 , remoteFsck = Nothing 108 , repairRepo = Nothing 109 , config = c 110 , getRepo = return r 111 , gitconfig = gc 112 , localpath = borgRepoLocalPath borgrepo 113 , remotetype = remote 114 , availability = if borgLocal borgrepo then LocallyAvailable else GloballyAvailable 115 , readonly = False 116 , appendonly = False 117 -- When the user sets the appendonly field, they are 118 -- promising not to delete content out from under git-annex 119 -- using borg, so the remote is not untrustworthy. 120 , untrustworthy = maybe True not $ 121 getRemoteConfigValue appendonlyField c 122 , mkUnavailable = return Nothing 123 , getInfo = return [("repo", locBorgRepo borgrepo)] 124 , claimUrl = Nothing 125 , checkUrl = Nothing 126 , remoteStateHandle = rs 127 } 128 where 129 borgrepo = maybe 130 (giveup "missing borgrepo") 131 BorgRepo 132 (remoteAnnexBorgRepo gc) 133 134borgSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) 135borgSetup _ mu _ c _gc = do 136 u <- maybe (liftIO genUUID) return mu 137 138 -- verify configuration is sane 139 let borgrepo = maybe (giveup "Specify borgrepo=") fromProposedAccepted $ 140 M.lookup borgrepoField c 141 142 -- The borgrepo is stored in git config, as well as this repo's 143 -- persistant state, so it can vary between hosts. 144 gitConfigSpecialRemote u c [("borgrepo", borgrepo)] 145 146 return (c, u) 147 148borgLocal :: BorgRepo -> Bool 149borgLocal (BorgRepo r) = notElem ':' r 150 151borgArchive :: BorgRepo -> BorgArchiveName -> String 152borgArchive (BorgRepo r) n = r ++ "::" ++ decodeBS n 153 154absBorgRepo :: BorgRepo -> IO BorgRepo 155absBorgRepo r@(BorgRepo p) 156 | borgLocal r = BorgRepo . fromRawFilePath 157 <$> absPath (toRawFilePath p) 158 | otherwise = return r 159 160borgRepoLocalPath :: BorgRepo -> Maybe FilePath 161borgRepoLocalPath r@(BorgRepo p) 162 | borgLocal r && not (null p) = Just p 163 | otherwise = Nothing 164 165listImportableContentsM :: UUID -> BorgRepo -> ParsedRemoteConfig -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) 166listImportableContentsM u borgrepo c = prompt $ do 167 imported <- getImported u 168 ls <- withborglist (locBorgRepo borgrepo) Nothing formatarchivelist $ \as -> 169 forM (filter (not . S.null) as) $ \archivename -> 170 case M.lookup archivename imported of 171 Just getfast -> return $ Left (archivename, getfast) 172 Nothing -> Right <$> 173 let archive = borgArchive borgrepo archivename 174 in withborglist archive subdir formatfilelist $ 175 liftIO . evaluate . force . parsefilelist archivename 176 if all isLeft ls && M.null (M.difference imported (M.fromList (lefts ls))) 177 then return Nothing -- unchanged since last time, avoid work 178 else Just . mkimportablecontents <$> mapM (either snd pure) ls 179 where 180 withborglist what addparam format a = do 181 environ <- liftIO getEnvironment 182 let p = proc "borg" $ toCommand $ catMaybes 183 [ Just (Param "list") 184 , Just (Param "--format") 185 , Just (Param format) 186 , Just (Param what) 187 , addparam 188 ] 189 (Nothing, Just h, Nothing, pid) <- liftIO $ createProcess $ p 190 { std_out = CreatePipe 191 -- Run in C locale because the file list can 192 -- include some possibly translatable text in the 193 -- "extra" field. 194 , env = Just (addEntry "LANG" "C" environ) 195 } 196 l <- liftIO $ map L.toStrict 197 . L.split 0 198 <$> L.hGetContents h 199 let cleanup = liftIO $ do 200 hClose h 201 forceSuccessProcess p pid 202 a l `finally` cleanup 203 204 formatarchivelist = "{barchive}{NUL}" 205 206 formatfilelist = "{size}{NUL}{path}{NUL}{extra}{NUL}" 207 208 subdir = File <$> getRemoteConfigValue subdirField c 209 210 parsefilelist archivename (bsz:f:extra:rest) = case readMaybe (fromRawFilePath bsz) of 211 Nothing -> parsefilelist archivename rest 212 Just sz -> 213 let loc = genImportLocation archivename f 214 -- borg list reports hard links as 0 byte files, 215 -- with the extra field set to " link to ". 216 -- When the annex object is a hard link to 217 -- something else, we'll assume it has not been 218 -- modified, since usually git-annex does prevent 219 -- this. Since the 0 byte size is not the actual 220 -- size, report the key size instead, when available. 221 (reqsz, retsz) = case extra of 222 " link to " -> (Nothing, fromMaybe sz . fromKey keySize) 223 _ -> (Just sz, const sz) 224 -- This does a little unncessary work to parse the 225 -- key, which is then thrown away. But, it lets the 226 -- file list be shrank down to only the ones that are 227 -- importable keys, so avoids needing to buffer all 228 -- the rest of the files in memory. 229 in case ThirdPartyPopulated.importKey' loc reqsz of 230 Just k -> (loc, (borgContentIdentifier, retsz k)) 231 : parsefilelist archivename rest 232 Nothing -> parsefilelist archivename rest 233 parsefilelist _ _ = [] 234 235 -- importableHistory is not used for retrieval, so is not 236 -- populated with old archives. Instead, a tree of archives 237 -- is constructed, by genImportLocation including the archive 238 -- name in the ImportLocation. 239 mkimportablecontents l = ImportableContents 240 { importableContents = concat l 241 , importableHistory = [] 242 } 243 244-- We do not need a ContentIdentifier in order to retrieve a file from 245-- borg; the ImportLocation contains all that's needed. So, this is left 246-- empty. 247borgContentIdentifier :: ContentIdentifier 248borgContentIdentifier = ContentIdentifier mempty 249 250-- Borg does not allow / in the name of an archive, so the archive 251-- name will always be the first directory in the ImportLocation. 252-- 253-- Paths in a borg archive are always relative, not absolute, so the use of 254-- </> to combine the archive name with the path will always work. 255genImportLocation :: BorgArchiveName -> RawFilePath -> ImportLocation 256genImportLocation archivename p = 257 ThirdPartyPopulated.mkThirdPartyImportLocation $ 258 archivename P.</> p 259 260extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath) 261extractImportLocation loc = go $ P.splitDirectories $ 262 ThirdPartyPopulated.fromThirdPartyImportLocation loc 263 where 264 go (archivename:rest) = (archivename, P.joinPath rest) 265 go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc) 266 267-- Since the ImportLocation starts with the archive name, a list of all 268-- archive names we've already imported can be found by just listing the 269-- last imported tree. And the contents of those archives can be retrieved 270-- by listing the subtree recursively, which will likely be quite a lot 271-- faster than running borg. 272getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (ContentIdentifier, ByteSize))])) 273getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u) 274 where 275 go t = M.fromList . mapMaybe mk 276 <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive (LsTree.LsTreeLong False) t) 277 278 mk ti 279 | toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just 280 ( getTopFilePath (LsTree.file ti) 281 , getcontents 282 (getTopFilePath (LsTree.file ti)) 283 (LsTree.sha ti) 284 ) 285 | otherwise = Nothing 286 287 getcontents archivename t = mapMaybe (mkcontents archivename) 288 <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive (LsTree.LsTreeLong False) t) 289 290 mkcontents archivename ti = do 291 let f = ThirdPartyPopulated.fromThirdPartyImportLocation $ 292 mkImportLocation $ getTopFilePath $ LsTree.file ti 293 k <- fileKey (P.takeFileName f) 294 return 295 ( genImportLocation archivename f 296 , 297 ( borgContentIdentifier 298 -- defaulting to 0 size is ok, this size 299 -- only gets used by 300 -- ThirdPartyPopulated.importKey, 301 -- which ignores the size when the key 302 -- does not have a size. 303 , fromMaybe 0 (fromKey keySize k) 304 ) 305 ) 306 307-- Check if the file is still there in the borg archive. 308-- Does not check that the content is unchanged; we assume that 309-- the content of files in borg archives does not change, which is normally 310-- the case. But archives may be deleted, and files may be deleted. 311checkPresentExportWithContentIdentifierM :: BorgRepo -> Key -> ImportLocation -> [ContentIdentifier] -> Annex Bool 312checkPresentExportWithContentIdentifierM borgrepo _ loc _ = prompt $ liftIO $ do 313 let p = proc "borg" $ toCommand 314 [ Param "list" 315 , Param "--format" 316 , Param "1" 317 , Param (borgArchive borgrepo archivename) 318 , File (fromRawFilePath archivefile) 319 ] 320 -- borg list exits nonzero with an error message if an archive 321 -- no longer exists. But, the user can delete archives at any 322 -- time they want. So, hide errors, and if it exists nonzero, 323 -- check if the borg repository still exists, and only throw an 324 -- exception if not. 325 (Nothing, Just h, Nothing, pid) <- withNullHandle $ \nullh -> 326 createProcess $ p 327 { std_out = CreatePipe 328 , std_err = UseHandle nullh 329 } 330 ok <- (== "1") <$> hGetContentsStrict h 331 hClose h 332 ifM (checkSuccessProcess pid) 333 ( return ok 334 , checkrepoexists 335 ) 336 where 337 (archivename, archivefile) = extractImportLocation loc 338 339 checkrepoexists = do 340 let p = proc "borg" $ toCommand 341 [ Param "list" 342 , Param "--format" 343 , Param "1" 344 , Param (locBorgRepo borgrepo) 345 ] 346 (Nothing, Nothing, Nothing, pid) <- withNullHandle $ \nullh -> 347 createProcess $ p 348 { std_out = UseHandle nullh } 349 ifM (checkSuccessProcess pid) 350 ( return False -- repo exists, content not in it 351 , giveup $ "Unable to access borg repository " ++ locBorgRepo borgrepo 352 ) 353 354retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key 355retrieveExportWithContentIdentifierM borgrepo loc _ dest mkk _ = do 356 showOutput 357 prompt $ withOtherTmp $ \othertmp -> liftIO $ do 358 -- borgrepo could be relative, and borg has to be run 359 -- in the temp directory to get it to write there 360 absborgrepo <- absBorgRepo borgrepo 361 let p = proc "borg" $ toCommand 362 [ Param "extract" 363 , Param (borgArchive absborgrepo archivename) 364 , File (fromRawFilePath archivefile) 365 ] 366 (Nothing, Nothing, Nothing, pid) <- createProcess $ p 367 { cwd = Just (fromRawFilePath othertmp) } 368 forceSuccessProcess p pid 369 -- Filepaths in borg archives are relative, so it's ok to 370 -- combine with </> 371 moveFile (fromRawFilePath othertmp </> fromRawFilePath archivefile) dest 372 removeDirectoryRecursive (fromRawFilePath othertmp) 373 mkk 374 where 375 (archivename, archivefile) = extractImportLocation loc 376