1{- git-annex file content managing 2 - 3 - Copyright 2010-2021 Joey Hess <id@joeyh.name> 4 - 5 - Licensed under the GNU AGPL version 3 or higher. 6 -} 7 8{-# LANGUAGE CPP #-} 9 10module Annex.Content ( 11 inAnnex, 12 inAnnex', 13 inAnnexSafe, 14 inAnnexCheck, 15 objectFileExists, 16 lockContentShared, 17 lockContentForRemoval, 18 ContentRemovalLock, 19 RetrievalSecurityPolicy(..), 20 getViaTmp, 21 getViaTmpFromDisk, 22 checkDiskSpaceToGet, 23 checkSecureHashes, 24 prepTmp, 25 withTmp, 26 checkDiskSpace, 27 needMoreDiskSpace, 28 moveAnnex, 29 populatePointerFile, 30 linkToAnnex, 31 linkFromAnnex, 32 linkFromAnnex', 33 LinkAnnexResult(..), 34 unlinkAnnex, 35 checkedCopyFile, 36 linkOrCopy, 37 linkOrCopy', 38 sendAnnex, 39 prepSendAnnex, 40 prepSendAnnex', 41 removeAnnex, 42 moveBad, 43 KeyLocation(..), 44 listKeys, 45 saveState, 46 downloadUrl, 47 preseedTmp, 48 dirKeys, 49 withObjectLoc, 50 staleKeysPrune, 51 pruneTmpWorkDirBefore, 52 isUnmodified, 53 isUnmodifiedCheap, 54 verifyKeyContentPostRetrieval, 55 verifyKeyContent, 56 VerifyConfig, 57 VerifyConfigA(..), 58 Verification(..), 59 unVerified, 60 withTmpWorkDir, 61) where 62 63import System.IO.Unsafe (unsafeInterleaveIO) 64import qualified Data.Set as S 65 66import Annex.Common 67import Annex.Content.Presence 68import Annex.Content.LowLevel 69import Annex.Content.PointerFile 70import Annex.Verify 71import qualified Git 72import qualified Annex 73import qualified Annex.Queue 74import qualified Annex.Branch 75import qualified Annex.Url as Url 76import qualified Backend 77import qualified Database.Keys 78import Git.FilePath 79import Annex.Perms 80import Annex.Link 81import Annex.LockPool 82import Annex.UUID 83import Annex.InodeSentinal 84import Annex.ReplaceFile 85import Annex.AdjustedBranch (adjustedBranchRefresh) 86import Messages.Progress 87import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..)) 88import Types.NumCopies 89import Types.Key 90import Types.Transfer 91import Logs.Transfer 92import Logs.Location 93import Utility.InodeCache 94import Utility.CopyFile 95import Utility.Metered 96import qualified Utility.RawFilePath as R 97 98import qualified System.FilePath.ByteString as P 99 100{- Prevents the content from being removed while the action is running. 101 - Uses a shared lock. 102 - 103 - If locking fails, or the content is not present, throws an exception 104 - rather than running the action. 105 -} 106lockContentShared :: Key -> (VerifiedCopy -> Annex a) -> Annex a 107lockContentShared key a = lockContentUsing lock key notpresent $ 108 ifM (inAnnex key) 109 ( do 110 u <- getUUID 111 withVerifiedCopy LockedCopy u (return True) a 112 , notpresent 113 ) 114 where 115 notpresent = giveup $ "failed to lock content: not present" 116#ifndef mingw32_HOST_OS 117 lock contentfile Nothing = tryLockShared Nothing contentfile 118 lock _ (Just lockfile) = posixLocker tryLockShared lockfile 119#else 120 lock = winLocker lockShared 121#endif 122 123{- Exclusively locks content, while performing an action that 124 - might remove it. 125 - 126 - If locking fails, throws an exception rather than running the action. 127 - 128 - But, if locking fails because the the content is not present, runs the 129 - fallback action instead. 130 -} 131lockContentForRemoval :: Key -> Annex a -> (ContentRemovalLock -> Annex a) -> Annex a 132lockContentForRemoval key fallback a = lockContentUsing lock key fallback $ 133 a (ContentRemovalLock key) 134 where 135#ifndef mingw32_HOST_OS 136 {- Since content files are stored with the write bit disabled, have 137 - to fiddle with permissions to open for an exclusive lock. -} 138 lock contentfile Nothing = bracket_ 139 (thawContent contentfile) 140 (freezeContent contentfile) 141 (tryLockExclusive Nothing contentfile) 142 lock _ (Just lockfile) = posixLocker tryLockExclusive lockfile 143#else 144 lock = winLocker lockExclusive 145#endif 146 147{- Passed the object content file, and maybe a separate lock file to use, 148 - when the content file itself should not be locked. -} 149type ContentLocker = RawFilePath -> Maybe LockFile -> Annex (Maybe LockHandle) 150 151#ifndef mingw32_HOST_OS 152posixLocker :: (Maybe FileMode -> LockFile -> Annex (Maybe LockHandle)) -> LockFile -> Annex (Maybe LockHandle) 153posixLocker takelock lockfile = do 154 mode <- annexFileMode 155 modifyContent lockfile $ 156 takelock (Just mode) lockfile 157 158#else 159winLocker :: (LockFile -> IO (Maybe LockHandle)) -> ContentLocker 160winLocker takelock _ (Just lockfile) = do 161 modifyContent lockfile $ 162 void $ liftIO $ tryIO $ 163 writeFile (fromRawFilePath lockfile) "" 164 liftIO $ takelock lockfile 165-- never reached; windows always uses a separate lock file 166winLocker _ _ Nothing = return Nothing 167#endif 168 169{- The fallback action is run if the ContentLocker throws an IO exception 170 - and the content is not present. It's not guaranteed to always run when 171 - the content is not present, because the content file is not always 172 - the file that is locked eg on Windows a different file is locked. -} 173lockContentUsing :: ContentLocker -> Key -> Annex a -> Annex a -> Annex a 174lockContentUsing locker key fallback a = do 175 contentfile <- calcRepo (gitAnnexLocation key) 176 lockfile <- contentLockFile key 177 bracket 178 (lock contentfile lockfile) 179 (either (const noop) (unlock lockfile)) 180 go 181 where 182 alreadylocked = giveup "content is locked" 183 failedtolock e = giveup $ "failed to lock content: " ++ show e 184 185 lock contentfile lockfile = tryIO $ 186 maybe alreadylocked return 187 =<< locker contentfile lockfile 188 189 go (Right _) = a 190 go (Left e) = ifM (inAnnex key) 191 ( failedtolock e 192 , fallback 193 ) 194 195#ifndef mingw32_HOST_OS 196 unlock mlockfile lck = do 197 maybe noop cleanuplockfile mlockfile 198 liftIO $ dropLock lck 199#else 200 unlock mlockfile lck = do 201 -- Can't delete a locked file on Windows 202 liftIO $ dropLock lck 203 maybe noop cleanuplockfile mlockfile 204#endif 205 206 cleanuplockfile lockfile = modifyContent lockfile $ 207 void $ liftIO $ tryIO $ 208 removeWhenExistsWith R.removeLink lockfile 209 210{- Runs an action, passing it the temp file to get, 211 - and if the action succeeds, verifies the file matches 212 - the key and moves the file into the annex as a key's content. -} 213getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool 214getViaTmp rsp v key af action = checkDiskSpaceToGet key False $ 215 getViaTmpFromDisk rsp v key af action 216 217{- Like getViaTmp, but does not check that there is enough disk space 218 - for the incoming key. For use when the key content is already on disk 219 - and not being copied into place. -} 220getViaTmpFromDisk :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool 221getViaTmpFromDisk rsp v key af action = checkallowed $ do 222 tmpfile <- prepTmp key 223 resuming <- liftIO $ R.doesPathExist tmpfile 224 (ok, verification) <- action tmpfile 225 -- When the temp file already had content, we don't know if 226 -- that content is good or not, so only trust if it the action 227 -- Verified it in passing. Otherwise, force verification even 228 -- if the VerifyConfig normally disables it. 229 let verification' = if resuming 230 then case verification of 231 Verified -> Verified 232 _ -> MustVerify 233 else verification 234 if ok 235 then ifM (verifyKeyContentPostRetrieval rsp v verification' key tmpfile) 236 ( pruneTmpWorkDirBefore tmpfile (moveAnnex key af) 237 , do 238 warning "verification of content failed" 239 -- The bad content is not retained, because 240 -- a retry should not try to resume from it 241 -- since it's apparently corrupted. 242 -- Also, the bad content could be any data, 243 -- including perhaps the content of another 244 -- file than the one that was requested, 245 -- and so it's best not to keep it on disk. 246 pruneTmpWorkDirBefore tmpfile 247 (liftIO . removeWhenExistsWith R.removeLink) 248 return False 249 ) 250 -- On transfer failure, the tmp file is left behind, in case 251 -- caller wants to resume its transfer 252 else return False 253 where 254 -- Avoid running the action to get the content when the 255 -- RetrievalSecurityPolicy would cause verification to always fail. 256 checkallowed a = case rsp of 257 RetrievalAllKeysSecure -> a 258 RetrievalVerifiableKeysSecure -> ifM (isVerifiable key) 259 ( a 260 , ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig) 261 ( a 262 , warnUnverifiableInsecure key >> return False 263 ) 264 ) 265 266{- Checks if there is enough free disk space to download a key 267 - to its temp file. 268 - 269 - When the temp file already exists, count the space it is using as 270 - free, since the download will overwrite it or resume. 271 - 272 - Wen there's enough free space, runs the download action. 273 -} 274checkDiskSpaceToGet :: Key -> a -> Annex a -> Annex a 275checkDiskSpaceToGet key unabletoget getkey = do 276 tmp <- fromRepo (gitAnnexTmpObjectLocation key) 277 e <- liftIO $ doesFileExist (fromRawFilePath tmp) 278 alreadythere <- liftIO $ if e 279 then getFileSize tmp 280 else return 0 281 ifM (checkDiskSpace Nothing key alreadythere True) 282 ( do 283 -- The tmp file may not have been left writable 284 when e $ thawContent tmp 285 getkey 286 , return unabletoget 287 ) 288 289prepTmp :: Key -> Annex RawFilePath 290prepTmp key = do 291 tmp <- fromRepo $ gitAnnexTmpObjectLocation key 292 createAnnexDirectory (parentDir tmp) 293 return tmp 294 295{- Prepares a temp file for a key, runs an action on it, and cleans up 296 - the temp file. If the action throws an exception, the temp file is 297 - left behind, which allows for resuming. 298 -} 299withTmp :: Key -> (RawFilePath -> Annex a) -> Annex a 300withTmp key action = do 301 tmp <- prepTmp key 302 res <- action tmp 303 pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink) 304 return res 305 306{- Moves a key's content into .git/annex/objects/ 307 - 308 - When a key has associated pointer files, the object is hard 309 - linked (or copied) to the files, and the object file is left thawed. 310 - 311 - What if the key there already has content? This could happen for 312 - various reasons; perhaps the same content is being annexed again. 313 - Perhaps there has been a hash collision generating the keys. 314 - 315 - The current strategy is to assume that in this case it's safe to delete 316 - one of the two copies of the content; and the one already in the annex 317 - is left there, assuming it's the original, canonical copy. 318 - 319 - I considered being more paranoid, and checking that both files had 320 - the same content. Decided against it because A) users explicitly choose 321 - a backend based on its hashing properties and so if they're dealing 322 - with colliding files it's their own fault and B) adding such a check 323 - would not catch all cases of colliding keys. For example, perhaps 324 - a remote has a key; if it's then added again with different content then 325 - the overall system now has two different peices of content for that 326 - key, and one of them will probably get deleted later. So, adding the 327 - check here would only raise expectations that git-annex cannot truely 328 - meet. 329 - 330 - May return false, when a particular variety of key is not being 331 - accepted into the repository. Will display a warning message in this 332 - case. May also throw exceptions in some cases. 333 -} 334moveAnnex :: Key -> AssociatedFile -> RawFilePath -> Annex Bool 335moveAnnex key af src = ifM (checkSecureHashes' key) 336 ( do 337 withObjectLoc key storeobject 338 return True 339 , return False 340 ) 341 where 342 storeobject dest = ifM (liftIO $ R.doesPathExist dest) 343 ( alreadyhave 344 , adjustedBranchRefresh af $ modifyContent dest $ do 345 freezeContent src 346 liftIO $ moveFile 347 (fromRawFilePath src) 348 (fromRawFilePath dest) 349 g <- Annex.gitRepo 350 fs <- map (`fromTopFilePath` g) 351 <$> Database.Keys.getAssociatedFiles key 352 unless (null fs) $ do 353 destic <- withTSDelta $ 354 liftIO . genInodeCache dest 355 ics <- mapM (populatePointerFile (Restage True) key dest) fs 356 Database.Keys.addInodeCaches key 357 (catMaybes (destic:ics)) 358 ) 359 alreadyhave = liftIO $ R.removeLink src 360 361checkSecureHashes :: Key -> Annex (Maybe String) 362checkSecureHashes key = ifM (Backend.isCryptographicallySecure key) 363 ( return Nothing 364 , ifM (annexSecureHashesOnly <$> Annex.getGitConfig) 365 ( return $ Just $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key" 366 , return Nothing 367 ) 368 ) 369 370checkSecureHashes' :: Key -> Annex Bool 371checkSecureHashes' key = checkSecureHashes key >>= \case 372 Nothing -> return True 373 Just msg -> do 374 warning $ msg ++ "to annex objects" 375 return False 376 377data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop 378 deriving (Eq) 379 380{- Populates the annex object file by hard linking or copying a source 381 - file to it. -} 382linkToAnnex :: Key -> RawFilePath -> Maybe InodeCache -> Annex LinkAnnexResult 383linkToAnnex key src srcic = ifM (checkSecureHashes' key) 384 ( do 385 dest <- calcRepo (gitAnnexLocation key) 386 modifyContent dest $ linkAnnex To key src srcic dest Nothing 387 , return LinkAnnexFailed 388 ) 389 390{- Makes a destination file be a link or copy from the annex object. 391 - 392 - linkAnnex stats the file after copying it to add to the inode 393 - cache. But dest may be a file in the working tree, which could 394 - get modified immediately after being populated. To avoid such a 395 - race, call linkAnnex on a temporary file and move it into place 396 - afterwards. Note that a consequence of this is that, if the file 397 - already exists, it will be overwritten. 398 -} 399linkFromAnnex :: Key -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult 400linkFromAnnex key dest destmode = 401 replaceFile' (const noop) (fromRawFilePath dest) (== LinkAnnexOk) $ \tmp -> 402 linkFromAnnex' key (toRawFilePath tmp) destmode 403 404{- This is only safe to use when dest is not a worktree file. -} 405linkFromAnnex' :: Key -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult 406linkFromAnnex' key dest destmode = do 407 src <- calcRepo (gitAnnexLocation key) 408 srcic <- withTSDelta (liftIO . genInodeCache src) 409 linkAnnex From key src srcic dest destmode 410 411data FromTo = From | To 412 413{- Hard links or copies from or to the annex object location. 414 - Updates inode cache. 415 - 416 - Freezes or thaws the destination appropriately. 417 - 418 - When a hard link is made, the annex object necessarily has to be thawed 419 - too. So, adding an object to the annex with a hard link can prevent 420 - losing the content if the source file is deleted, but does not 421 - guard against modifications. 422 - 423 - Nothing is done if the destination file already exists. 424 -} 425linkAnnex :: FromTo -> Key -> RawFilePath -> Maybe InodeCache -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult 426linkAnnex _ _ _ Nothing _ _ = return LinkAnnexFailed 427linkAnnex fromto key src (Just srcic) dest destmode = 428 withTSDelta (liftIO . genInodeCache dest) >>= \case 429 Just destic -> do 430 cs <- Database.Keys.getInodeCaches key 431 if null cs 432 then Database.Keys.addInodeCaches key [srcic, destic] 433 else Database.Keys.addInodeCaches key [srcic] 434 return LinkAnnexNoop 435 Nothing -> linkOrCopy key src dest destmode >>= \case 436 Nothing -> failed 437 Just r -> do 438 case fromto of 439 From -> thawContent dest 440 To -> case r of 441 Copied -> freezeContent dest 442 Linked -> noop 443 checksrcunchanged 444 where 445 failed = do 446 Database.Keys.addInodeCaches key [srcic] 447 return LinkAnnexFailed 448 checksrcunchanged = withTSDelta (liftIO . genInodeCache src) >>= \case 449 Just srcic' | compareStrong srcic srcic' -> do 450 destic <- withTSDelta (liftIO . genInodeCache dest) 451 Database.Keys.addInodeCaches key $ 452 catMaybes [destic, Just srcic] 453 return LinkAnnexOk 454 _ -> do 455 liftIO $ removeWhenExistsWith R.removeLink dest 456 failed 457 458{- Removes the annex object file for a key. Lowlevel. -} 459unlinkAnnex :: Key -> Annex () 460unlinkAnnex key = do 461 obj <- calcRepo (gitAnnexLocation key) 462 modifyContent obj $ do 463 secureErase obj 464 liftIO $ removeWhenExistsWith R.removeLink obj 465 466{- Runs an action to transfer an object's content. 467 - 468 - In some cases, it's possible for the file to change as it's being sent. 469 - If this happens, runs the rollback action and throws an exception. 470 - The rollback action should remove the data that was transferred. 471 -} 472sendAnnex :: Key -> Annex () -> (FilePath -> Annex a) -> Annex a 473sendAnnex key rollback sendobject = go =<< prepSendAnnex' key 474 where 475 go (Just (f, check)) = do 476 r <- sendobject f 477 check >>= \case 478 Nothing -> return r 479 Just err -> do 480 rollback 481 giveup err 482 go Nothing = giveup "content not available to send" 483 484{- Returns a file that contains an object's content, 485 - and a check to run after the transfer is complete. 486 - 487 - When a file is unlocked, it's possble for its content to 488 - change as it's being sent. The check detects this case 489 - and returns False. 490 - 491 - Note that the returned check action is, in some cases, run in the 492 - Annex monad of the remote that is receiving the object, rather than 493 - the sender. So it cannot rely on Annex state. 494 -} 495prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool)) 496prepSendAnnex key = withObjectLoc key $ \f -> do 497 let retval c = return $ Just (fromRawFilePath f, sameInodeCache f c) 498 cache <- Database.Keys.getInodeCaches key 499 if null cache 500 -- Since no inode cache is in the database, this 501 -- object is not currently unlocked. But that could 502 -- change while the transfer is in progress, so 503 -- generate an inode cache for the starting 504 -- content. 505 then maybe (return Nothing) (retval . (:[])) 506 =<< withTSDelta (liftIO . genInodeCache f) 507 -- Verify that the object is not modified. Usually this 508 -- only has to check the inode cache, but if the cache 509 -- is somehow stale, it will fall back to verifying its 510 -- content. 511 else withTSDelta (liftIO . genInodeCache f) >>= \case 512 Just fc -> ifM (isUnmodified' key f fc cache) 513 ( retval (fc:cache) 514 , return Nothing 515 ) 516 Nothing -> return Nothing 517 518prepSendAnnex' :: Key -> Annex (Maybe (FilePath, Annex (Maybe String))) 519prepSendAnnex' key = prepSendAnnex key >>= \case 520 Just (f, checksuccess) -> 521 let checksuccess' = ifM checksuccess 522 ( return Nothing 523 , return (Just "content changed while it was being sent") 524 ) 525 in return (Just (f, checksuccess')) 526 Nothing -> return Nothing 527 528cleanObjectLoc :: Key -> Annex () -> Annex () 529cleanObjectLoc key cleaner = do 530 file <- calcRepo (gitAnnexLocation key) 531 void $ tryIO $ thawContentDir file 532 cleaner 533 liftIO $ removeparents file (3 :: Int) 534 where 535 removeparents _ 0 = noop 536 removeparents file n = do 537 let dir = parentDir file 538 maybe noop (const $ removeparents dir (n-1)) 539 <=< catchMaybeIO $ removeDirectory (fromRawFilePath dir) 540 541{- Removes a key's file from .git/annex/objects/ -} 542removeAnnex :: ContentRemovalLock -> Annex () 543removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file -> 544 cleanObjectLoc key $ do 545 secureErase file 546 liftIO $ removeWhenExistsWith R.removeLink file 547 g <- Annex.gitRepo 548 mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g) 549 =<< Database.Keys.getAssociatedFiles key 550 Database.Keys.removeInodeCaches key 551 where 552 -- Check associated pointer file for modifications, and reset if 553 -- it's unmodified. 554 resetpointer file = ifM (isUnmodified key file) 555 ( adjustedBranchRefresh (AssociatedFile (Just file)) $ 556 depopulatePointerFile key file 557 -- Modified file, so leave it alone. 558 -- If it was a hard link to the annex object, 559 -- that object might have been frozen as part of the 560 -- removal process, so thaw it. 561 , void $ tryIO $ thawContent file 562 ) 563 564{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and 565 - returns the file it was moved to. -} 566moveBad :: Key -> Annex FilePath 567moveBad key = do 568 src <- calcRepo (gitAnnexLocation key) 569 bad <- fromRepo gitAnnexBadDir 570 let dest = bad P.</> P.takeFileName src 571 let dest' = fromRawFilePath dest 572 createAnnexDirectory (parentDir dest) 573 cleanObjectLoc key $ 574 liftIO $ moveFile (fromRawFilePath src) dest' 575 logStatus key InfoMissing 576 return dest' 577 578data KeyLocation = InAnnex | InAnywhere 579 580{- InAnnex only lists keys with content in .git/annex/objects. 581 - InAnywhere lists all keys that have directories in 582 - .git/annex/objects, whether or not the content is present. 583 -} 584listKeys :: KeyLocation -> Annex [Key] 585listKeys keyloc = do 586 dir <- fromRepo gitAnnexObjectDir 587 {- In order to run Annex monad actions within unsafeInterleaveIO, 588 - the current state is taken and reused. No changes made to this 589 - state will be preserved. 590 -} 591 s <- Annex.getState id 592 depth <- gitAnnexLocationDepth <$> Annex.getGitConfig 593 liftIO $ walk s depth (fromRawFilePath dir) 594 where 595 walk s depth dir = do 596 contents <- catchDefaultIO [] (dirContents dir) 597 if depth < 2 598 then do 599 contents' <- filterM (present s) contents 600 let keys = mapMaybe (fileKey . P.takeFileName . toRawFilePath) contents' 601 continue keys [] 602 else do 603 let deeper = walk s (depth - 1) 604 continue [] (map deeper contents) 605 continue keys [] = return keys 606 continue keys (a:as) = do 607 {- Force lazy traversal with unsafeInterleaveIO. -} 608 morekeys <- unsafeInterleaveIO a 609 continue (morekeys++keys) as 610 611 inanywhere = case keyloc of 612 InAnywhere -> True 613 _ -> False 614 615 present _ _ | inanywhere = pure True 616 present _ d = presentInAnnex d 617 618 presentInAnnex = doesFileExist . contentfile 619 contentfile d = d </> takeFileName d 620 621{- Things to do to record changes to content when shutting down. 622 - 623 - It's acceptable to avoid committing changes to the branch, 624 - especially if performing a short-lived action. 625 -} 626saveState :: Bool -> Annex () 627saveState nocommit = doSideAction $ do 628 Annex.Queue.flush 629 Database.Keys.closeDb 630 unless nocommit $ 631 whenM (annexAlwaysCommit <$> Annex.getGitConfig) $ 632 Annex.Branch.commit =<< Annex.Branch.commitMessage 633 634{- Downloads content from any of a list of urls, displaying a progress 635 - meter. 636 - 637 - Only displays error message if all the urls fail to download. 638 - When listfailedurls is set, lists each url and why it failed. 639 - Otherwise, only displays one error message, from one of the urls 640 - that failed. 641 -} 642downloadUrl :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool 643downloadUrl listfailedurls k p iv urls file uo = 644 -- Poll the file to handle configurations where an external 645 -- download command is used. 646 meteredFile file (Just p) k (go urls []) 647 where 648 go (u:us) errs = Url.download' p iv u file uo >>= \case 649 Right () -> return True 650 Left err -> do 651 -- If the incremental verifier was fed anything 652 -- while the download that failed ran, it's unable 653 -- to be used for the other urls. 654 case iv of 655 Just iv' -> 656 liftIO $ positionIncremental iv' >>= \case 657 Just n | n > 0 -> unableIncremental iv' 658 _ -> noop 659 Nothing -> noop 660 go us ((u, err) : errs) 661 go [] [] = return False 662 go [] errs@((_, err):_) = do 663 if listfailedurls 664 then warning $ unlines $ flip map errs $ \(u, err') -> 665 u ++ " " ++ err' 666 else warning err 667 return False 668 669{- Copies a key's content, when present, to a temp file. 670 - This is used to speed up some rsyncs. -} 671preseedTmp :: Key -> FilePath -> Annex Bool 672preseedTmp key file = go =<< inAnnex key 673 where 674 go False = return False 675 go True = do 676 ok <- copy 677 when ok $ thawContent (toRawFilePath file) 678 return ok 679 copy = ifM (liftIO $ doesFileExist file) 680 ( return True 681 , do 682 s <- fromRawFilePath <$> (calcRepo $ gitAnnexLocation key) 683 liftIO $ ifM (doesFileExist s) 684 ( copyFileExternal CopyTimeStamps s file 685 , return False 686 ) 687 ) 688 689{- Finds files directly inside a directory like gitAnnexBadDir 690 - (not in subdirectories) and returns the corresponding keys. -} 691dirKeys :: (Git.Repo -> RawFilePath) -> Annex [Key] 692dirKeys dirspec = do 693 dir <- fromRawFilePath <$> fromRepo dirspec 694 ifM (liftIO $ doesDirectoryExist dir) 695 ( do 696 contents <- liftIO $ getDirectoryContents dir 697 files <- liftIO $ filterM doesFileExist $ 698 map (dir </>) contents 699 return $ mapMaybe (fileKey . P.takeFileName . toRawFilePath) files 700 , return [] 701 ) 702 703{- Looks in the specified directory for bad/tmp keys, and returns a list 704 - of those that might still have value, or might be stale and removable. 705 - 706 - Also, stale keys that can be proven to have no value 707 - (ie, their content is already present) are deleted. 708 -} 709staleKeysPrune :: (Git.Repo -> RawFilePath) -> Bool -> Annex [Key] 710staleKeysPrune dirspec nottransferred = do 711 contents <- dirKeys dirspec 712 713 dups <- filterM inAnnex contents 714 let stale = contents `exclude` dups 715 716 dir <- fromRepo dirspec 717 forM_ dups $ \k -> 718 pruneTmpWorkDirBefore (dir P.</> keyFile k) 719 (liftIO . R.removeLink) 720 721 if nottransferred 722 then do 723 inprogress <- S.fromList . map (transferKey . fst) 724 <$> getTransfers 725 return $ filter (`S.notMember` inprogress) stale 726 else return stale 727 728{- Prune the work dir associated with the specified content file, 729 - before performing an action that deletes the file, or moves it away. 730 - 731 - This preserves the invariant that the workdir never exists without 732 - the content file. 733 -} 734pruneTmpWorkDirBefore :: RawFilePath -> (RawFilePath -> Annex a) -> Annex a 735pruneTmpWorkDirBefore f action = do 736 let workdir = fromRawFilePath $ gitAnnexTmpWorkDir f 737 liftIO $ whenM (doesDirectoryExist workdir) $ 738 removeDirectoryRecursive workdir 739 action f 740 741{- Runs an action, passing it a temporary work directory where 742 - it can write files while receiving the content of a key. 743 - 744 - Preserves the invariant that the workdir never exists without the 745 - content file, by creating an empty content file first. 746 - 747 - On exception, or when the action returns Nothing, 748 - the temporary work directory is retained (unless 749 - empty), so anything in it can be used on resume. 750 -} 751withTmpWorkDir :: Key -> (RawFilePath -> Annex (Maybe a)) -> Annex (Maybe a) 752withTmpWorkDir key action = do 753 -- Create the object file if it does not exist. This way, 754 -- staleKeysPrune only has to look for object files, and can 755 -- clean up gitAnnexTmpWorkDir for those it finds. 756 obj <- prepTmp key 757 let obj' = fromRawFilePath obj 758 unlessM (liftIO $ doesFileExist obj') $ do 759 liftIO $ writeFile obj' "" 760 setAnnexFilePerm obj 761 let tmpdir = gitAnnexTmpWorkDir obj 762 createAnnexDirectory tmpdir 763 res <- action tmpdir 764 case res of 765 Just _ -> liftIO $ removeDirectoryRecursive (fromRawFilePath tmpdir) 766 Nothing -> liftIO $ void $ tryIO $ removeDirectory (fromRawFilePath tmpdir) 767 return res 768 769{- Finds items in the first, smaller list, that are not 770 - present in the second, larger list. 771 - 772 - Constructing a single set, of the list that tends to be 773 - smaller, appears more efficient in both memory and CPU 774 - than constructing and taking the S.difference of two sets. -} 775exclude :: Ord a => [a] -> [a] -> [a] 776exclude [] _ = [] -- optimisation 777exclude smaller larger = S.toList $ remove larger $ S.fromList smaller 778 where 779 remove a b = foldl (flip S.delete) b a 780