1{- git-annex file content managing
2 -
3 - Copyright 2010-2021 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8{-# LANGUAGE CPP #-}
9
10module Annex.Content (
11	inAnnex,
12	inAnnex',
13	inAnnexSafe,
14	inAnnexCheck,
15	objectFileExists,
16	lockContentShared,
17	lockContentForRemoval,
18	ContentRemovalLock,
19	RetrievalSecurityPolicy(..),
20	getViaTmp,
21	getViaTmpFromDisk,
22	checkDiskSpaceToGet,
23	checkSecureHashes,
24	prepTmp,
25	withTmp,
26	checkDiskSpace,
27	needMoreDiskSpace,
28	moveAnnex,
29	populatePointerFile,
30	linkToAnnex,
31	linkFromAnnex,
32	linkFromAnnex',
33	LinkAnnexResult(..),
34	unlinkAnnex,
35	checkedCopyFile,
36	linkOrCopy,
37	linkOrCopy',
38	sendAnnex,
39	prepSendAnnex,
40	prepSendAnnex',
41	removeAnnex,
42	moveBad,
43	KeyLocation(..),
44	listKeys,
45	saveState,
46	downloadUrl,
47	preseedTmp,
48	dirKeys,
49	withObjectLoc,
50	staleKeysPrune,
51	pruneTmpWorkDirBefore,
52	isUnmodified,
53	isUnmodifiedCheap,
54	verifyKeyContentPostRetrieval,
55	verifyKeyContent,
56	VerifyConfig,
57	VerifyConfigA(..),
58	Verification(..),
59	unVerified,
60	withTmpWorkDir,
61) where
62
63import System.IO.Unsafe (unsafeInterleaveIO)
64import qualified Data.Set as S
65
66import Annex.Common
67import Annex.Content.Presence
68import Annex.Content.LowLevel
69import Annex.Content.PointerFile
70import Annex.Verify
71import qualified Git
72import qualified Annex
73import qualified Annex.Queue
74import qualified Annex.Branch
75import qualified Annex.Url as Url
76import qualified Backend
77import qualified Database.Keys
78import Git.FilePath
79import Annex.Perms
80import Annex.Link
81import Annex.LockPool
82import Annex.UUID
83import Annex.InodeSentinal
84import Annex.ReplaceFile
85import Annex.AdjustedBranch (adjustedBranchRefresh)
86import Messages.Progress
87import Types.Remote (RetrievalSecurityPolicy(..), VerifyConfigA(..))
88import Types.NumCopies
89import Types.Key
90import Types.Transfer
91import Logs.Transfer
92import Logs.Location
93import Utility.InodeCache
94import Utility.CopyFile
95import Utility.Metered
96import qualified Utility.RawFilePath as R
97
98import qualified System.FilePath.ByteString as P
99
100{- Prevents the content from being removed while the action is running.
101 - Uses a shared lock.
102 -
103 - If locking fails, or the content is not present, throws an exception
104 - rather than running the action.
105 -}
106lockContentShared :: Key -> (VerifiedCopy -> Annex a) -> Annex a
107lockContentShared key a = lockContentUsing lock key notpresent $
108	ifM (inAnnex key)
109		( do
110			u <- getUUID
111			withVerifiedCopy LockedCopy u (return True) a
112		, notpresent
113		)
114  where
115	notpresent = giveup $ "failed to lock content: not present"
116#ifndef mingw32_HOST_OS
117	lock contentfile Nothing = tryLockShared Nothing contentfile
118	lock _ (Just lockfile) = posixLocker tryLockShared lockfile
119#else
120	lock = winLocker lockShared
121#endif
122
123{- Exclusively locks content, while performing an action that
124 - might remove it.
125 -
126 - If locking fails, throws an exception rather than running the action.
127 -
128 - But, if locking fails because the the content is not present, runs the
129 - fallback action instead.
130 -}
131lockContentForRemoval :: Key -> Annex a -> (ContentRemovalLock -> Annex a) -> Annex a
132lockContentForRemoval key fallback a = lockContentUsing lock key fallback $
133	a (ContentRemovalLock key)
134  where
135#ifndef mingw32_HOST_OS
136	{- Since content files are stored with the write bit disabled, have
137	 - to fiddle with permissions to open for an exclusive lock. -}
138	lock contentfile Nothing = bracket_
139		(thawContent contentfile)
140		(freezeContent contentfile)
141		(tryLockExclusive Nothing contentfile)
142	lock _ (Just lockfile) = posixLocker tryLockExclusive lockfile
143#else
144	lock = winLocker lockExclusive
145#endif
146
147{- Passed the object content file, and maybe a separate lock file to use,
148 - when the content file itself should not be locked. -}
149type ContentLocker = RawFilePath -> Maybe LockFile -> Annex (Maybe LockHandle)
150
151#ifndef mingw32_HOST_OS
152posixLocker :: (Maybe FileMode -> LockFile -> Annex (Maybe LockHandle)) -> LockFile -> Annex (Maybe LockHandle)
153posixLocker takelock lockfile = do
154	mode <- annexFileMode
155	modifyContent lockfile $
156		takelock (Just mode) lockfile
157
158#else
159winLocker :: (LockFile -> IO (Maybe LockHandle)) -> ContentLocker
160winLocker takelock _ (Just lockfile) = do
161	modifyContent lockfile $
162		void $ liftIO $ tryIO $
163			writeFile (fromRawFilePath lockfile) ""
164	liftIO $ takelock lockfile
165-- never reached; windows always uses a separate lock file
166winLocker _ _ Nothing = return Nothing
167#endif
168
169{- The fallback action is run if the ContentLocker throws an IO exception
170 - and the content is not present. It's not guaranteed to always run when
171 - the content is not present, because the content file is not always
172 - the file that is locked eg on Windows a different file is locked. -}
173lockContentUsing :: ContentLocker -> Key -> Annex a -> Annex a -> Annex a
174lockContentUsing locker key fallback a = do
175	contentfile <- calcRepo (gitAnnexLocation key)
176	lockfile <- contentLockFile key
177	bracket
178		(lock contentfile lockfile)
179		(either (const noop) (unlock lockfile))
180		go
181  where
182	alreadylocked = giveup "content is locked"
183	failedtolock e = giveup $ "failed to lock content: " ++ show e
184
185	lock contentfile lockfile = tryIO $
186		maybe alreadylocked return
187			=<< locker contentfile lockfile
188
189	go (Right _) = a
190	go (Left e) = ifM (inAnnex key)
191		( failedtolock e
192		, fallback
193		)
194
195#ifndef mingw32_HOST_OS
196	unlock mlockfile lck = do
197		maybe noop cleanuplockfile mlockfile
198		liftIO $ dropLock lck
199#else
200	unlock mlockfile lck = do
201		-- Can't delete a locked file on Windows
202		liftIO $ dropLock lck
203		maybe noop cleanuplockfile mlockfile
204#endif
205
206	cleanuplockfile lockfile = modifyContent lockfile $
207		void $ liftIO $ tryIO $
208			removeWhenExistsWith R.removeLink lockfile
209
210{- Runs an action, passing it the temp file to get,
211 - and if the action succeeds, verifies the file matches
212 - the key and moves the file into the annex as a key's content. -}
213getViaTmp :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool
214getViaTmp rsp v key af action = checkDiskSpaceToGet key False $
215	getViaTmpFromDisk rsp v key af action
216
217{- Like getViaTmp, but does not check that there is enough disk space
218 - for the incoming key. For use when the key content is already on disk
219 - and not being copied into place. -}
220getViaTmpFromDisk :: RetrievalSecurityPolicy -> VerifyConfig -> Key -> AssociatedFile -> (RawFilePath -> Annex (Bool, Verification)) -> Annex Bool
221getViaTmpFromDisk rsp v key af action = checkallowed $ do
222	tmpfile <- prepTmp key
223	resuming <- liftIO $ R.doesPathExist tmpfile
224	(ok, verification) <- action tmpfile
225	-- When the temp file already had content, we don't know if
226	-- that content is good or not, so only trust if it the action
227	-- Verified it in passing. Otherwise, force verification even
228	-- if the VerifyConfig normally disables it.
229	let verification' = if resuming
230		then case verification of
231			Verified -> Verified
232			_ -> MustVerify
233		else verification
234	if ok
235		then ifM (verifyKeyContentPostRetrieval rsp v verification' key tmpfile)
236			( pruneTmpWorkDirBefore tmpfile (moveAnnex key af)
237			, do
238				warning "verification of content failed"
239				-- The bad content is not retained, because
240				-- a retry should not try to resume from it
241				-- since it's apparently corrupted.
242				-- Also, the bad content could be any data,
243				-- including perhaps the content of another
244				-- file than the one that was requested,
245				-- and so it's best not to keep it on disk.
246				pruneTmpWorkDirBefore tmpfile
247					(liftIO . removeWhenExistsWith R.removeLink)
248				return False
249			)
250		-- On transfer failure, the tmp file is left behind, in case
251		-- caller wants to resume its transfer
252		else return False
253  where
254	-- Avoid running the action to get the content when the
255	-- RetrievalSecurityPolicy would cause verification to always fail.
256	checkallowed a = case rsp of
257		RetrievalAllKeysSecure -> a
258		RetrievalVerifiableKeysSecure -> ifM (isVerifiable key)
259			( a
260			, ifM (annexAllowUnverifiedDownloads <$> Annex.getGitConfig)
261				( a
262				, warnUnverifiableInsecure key >> return False
263				)
264			)
265
266{- Checks if there is enough free disk space to download a key
267 - to its temp file.
268 -
269 - When the temp file already exists, count the space it is using as
270 - free, since the download will overwrite it or resume.
271 -
272 - Wen there's enough free space, runs the download action.
273 -}
274checkDiskSpaceToGet :: Key -> a -> Annex a -> Annex a
275checkDiskSpaceToGet key unabletoget getkey = do
276	tmp <- fromRepo (gitAnnexTmpObjectLocation key)
277	e <- liftIO $ doesFileExist (fromRawFilePath tmp)
278	alreadythere <- liftIO $ if e
279		then getFileSize tmp
280		else return 0
281	ifM (checkDiskSpace Nothing key alreadythere True)
282		( do
283			-- The tmp file may not have been left writable
284			when e $ thawContent tmp
285			getkey
286		, return unabletoget
287		)
288
289prepTmp :: Key -> Annex RawFilePath
290prepTmp key = do
291	tmp <- fromRepo $ gitAnnexTmpObjectLocation key
292	createAnnexDirectory (parentDir tmp)
293	return tmp
294
295{- Prepares a temp file for a key, runs an action on it, and cleans up
296 - the temp file. If the action throws an exception, the temp file is
297 - left behind, which allows for resuming.
298 -}
299withTmp :: Key -> (RawFilePath -> Annex a) -> Annex a
300withTmp key action = do
301	tmp <- prepTmp key
302	res <- action tmp
303	pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)
304	return res
305
306{- Moves a key's content into .git/annex/objects/
307 -
308 - When a key has associated pointer files, the object is hard
309 - linked (or copied) to the files, and the object file is left thawed.
310 -
311 - What if the key there already has content? This could happen for
312 - various reasons; perhaps the same content is being annexed again.
313 - Perhaps there has been a hash collision generating the keys.
314 -
315 - The current strategy is to assume that in this case it's safe to delete
316 - one of the two copies of the content; and the one already in the annex
317 - is left there, assuming it's the original, canonical copy.
318 -
319 - I considered being more paranoid, and checking that both files had
320 - the same content. Decided against it because A) users explicitly choose
321 - a backend based on its hashing properties and so if they're dealing
322 - with colliding files it's their own fault and B) adding such a check
323 - would not catch all cases of colliding keys. For example, perhaps
324 - a remote has a key; if it's then added again with different content then
325 - the overall system now has two different peices of content for that
326 - key, and one of them will probably get deleted later. So, adding the
327 - check here would only raise expectations that git-annex cannot truely
328 - meet.
329 -
330 - May return false, when a particular variety of key is not being
331 - accepted into the repository. Will display a warning message in this
332 - case. May also throw exceptions in some cases.
333 -}
334moveAnnex :: Key -> AssociatedFile -> RawFilePath -> Annex Bool
335moveAnnex key af src = ifM (checkSecureHashes' key)
336	( do
337		withObjectLoc key storeobject
338		return True
339	, return False
340	)
341  where
342	storeobject dest = ifM (liftIO $ R.doesPathExist dest)
343		( alreadyhave
344		, adjustedBranchRefresh af $ modifyContent dest $ do
345			freezeContent src
346			liftIO $ moveFile
347				(fromRawFilePath src)
348				(fromRawFilePath dest)
349			g <- Annex.gitRepo
350			fs <- map (`fromTopFilePath` g)
351				<$> Database.Keys.getAssociatedFiles key
352			unless (null fs) $ do
353				destic <- withTSDelta $
354					liftIO . genInodeCache dest
355				ics <- mapM (populatePointerFile (Restage True) key dest) fs
356				Database.Keys.addInodeCaches key
357					(catMaybes (destic:ics))
358		)
359	alreadyhave = liftIO $ R.removeLink src
360
361checkSecureHashes :: Key -> Annex (Maybe String)
362checkSecureHashes key = ifM (Backend.isCryptographicallySecure key)
363	( return Nothing
364	, ifM (annexSecureHashesOnly <$> Annex.getGitConfig)
365		( return $ Just $ "annex.securehashesonly blocked adding " ++ decodeBS (formatKeyVariety (fromKey keyVariety key)) ++ " key"
366		, return Nothing
367		)
368	)
369
370checkSecureHashes' :: Key -> Annex Bool
371checkSecureHashes' key = checkSecureHashes key >>= \case
372	Nothing -> return True
373	Just msg -> do
374		warning $ msg ++ "to annex objects"
375		return False
376
377data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
378	deriving (Eq)
379
380{- Populates the annex object file by hard linking or copying a source
381 - file to it. -}
382linkToAnnex :: Key -> RawFilePath -> Maybe InodeCache -> Annex LinkAnnexResult
383linkToAnnex key src srcic = ifM (checkSecureHashes' key)
384	( do
385		dest <- calcRepo (gitAnnexLocation key)
386		modifyContent dest $ linkAnnex To key src srcic dest Nothing
387	, return LinkAnnexFailed
388	)
389
390{- Makes a destination file be a link or copy from the annex object.
391 -
392 - linkAnnex stats the file after copying it to add to the inode
393 - cache. But dest may be a file in the working tree, which could
394 - get modified immediately after being populated. To avoid such a
395 - race, call linkAnnex on a temporary file and move it into place
396 - afterwards. Note that a consequence of this is that, if the file
397 - already exists, it will be overwritten.
398 -}
399linkFromAnnex :: Key -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult
400linkFromAnnex key dest destmode =
401	replaceFile' (const noop) (fromRawFilePath dest) (== LinkAnnexOk) $ \tmp ->
402		linkFromAnnex' key (toRawFilePath tmp) destmode
403
404{- This is only safe to use when dest is not a worktree file. -}
405linkFromAnnex' :: Key -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult
406linkFromAnnex' key dest destmode = do
407	src <- calcRepo (gitAnnexLocation key)
408	srcic <- withTSDelta (liftIO . genInodeCache src)
409	linkAnnex From key src srcic dest destmode
410
411data FromTo = From | To
412
413{- Hard links or copies from or to the annex object location.
414 - Updates inode cache.
415 -
416 - Freezes or thaws the destination appropriately.
417 -
418 - When a hard link is made, the annex object necessarily has to be thawed
419 - too. So, adding an object to the annex with a hard link can prevent
420 - losing the content if the source file is deleted, but does not
421 - guard against modifications.
422 -
423 - Nothing is done if the destination file already exists.
424 -}
425linkAnnex :: FromTo -> Key -> RawFilePath -> Maybe InodeCache -> RawFilePath -> Maybe FileMode -> Annex LinkAnnexResult
426linkAnnex _ _ _ Nothing _ _ = return LinkAnnexFailed
427linkAnnex fromto key src (Just srcic) dest destmode =
428	withTSDelta (liftIO . genInodeCache dest) >>= \case
429		Just destic -> do
430			cs <- Database.Keys.getInodeCaches key
431			if null cs
432				then Database.Keys.addInodeCaches key [srcic, destic]
433				else Database.Keys.addInodeCaches key [srcic]
434			return LinkAnnexNoop
435		Nothing -> linkOrCopy key src dest destmode >>= \case
436			Nothing -> failed
437			Just r -> do
438				case fromto of
439					From -> thawContent dest
440					To -> case r of
441						Copied -> freezeContent dest
442						Linked -> noop
443				checksrcunchanged
444  where
445	failed = do
446		Database.Keys.addInodeCaches key [srcic]
447		return LinkAnnexFailed
448	checksrcunchanged = withTSDelta (liftIO . genInodeCache src) >>= \case
449		Just srcic' | compareStrong srcic srcic' -> do
450			destic <- withTSDelta (liftIO . genInodeCache dest)
451			Database.Keys.addInodeCaches key $
452				catMaybes [destic, Just srcic]
453			return LinkAnnexOk
454		_ -> do
455			liftIO $ removeWhenExistsWith R.removeLink dest
456			failed
457
458{- Removes the annex object file for a key. Lowlevel. -}
459unlinkAnnex :: Key -> Annex ()
460unlinkAnnex key = do
461	obj <- calcRepo (gitAnnexLocation key)
462	modifyContent obj $ do
463		secureErase obj
464		liftIO $ removeWhenExistsWith R.removeLink obj
465
466{- Runs an action to transfer an object's content.
467 -
468 - In some cases, it's possible for the file to change as it's being sent.
469 - If this happens, runs the rollback action and throws an exception.
470 - The rollback action should remove the data that was transferred.
471 -}
472sendAnnex :: Key -> Annex () -> (FilePath -> Annex a) -> Annex a
473sendAnnex key rollback sendobject = go =<< prepSendAnnex' key
474  where
475	go (Just (f, check)) = do
476		r <- sendobject f
477		check >>= \case
478			Nothing -> return r
479			Just err -> do
480				rollback
481				giveup err
482	go Nothing = giveup "content not available to send"
483
484{- Returns a file that contains an object's content,
485 - and a check to run after the transfer is complete.
486 -
487 - When a file is unlocked, it's possble for its content to
488 - change as it's being sent. The check detects this case
489 - and returns False.
490 -
491 - Note that the returned check action is, in some cases, run in the
492 - Annex monad of the remote that is receiving the object, rather than
493 - the sender. So it cannot rely on Annex state.
494 -}
495prepSendAnnex :: Key -> Annex (Maybe (FilePath, Annex Bool))
496prepSendAnnex key = withObjectLoc key $ \f -> do
497	let retval c = return $ Just (fromRawFilePath f, sameInodeCache f c)
498	cache <- Database.Keys.getInodeCaches key
499	if null cache
500		-- Since no inode cache is in the database, this
501		-- object is not currently unlocked. But that could
502		-- change while the transfer is in progress, so
503		-- generate an inode cache for the starting
504		-- content.
505		then maybe (return Nothing) (retval . (:[]))
506			=<< withTSDelta (liftIO . genInodeCache f)
507		-- Verify that the object is not modified. Usually this
508		-- only has to check the inode cache, but if the cache
509		-- is somehow stale, it will fall back to verifying its
510		-- content.
511		else withTSDelta (liftIO . genInodeCache f) >>= \case
512			Just fc -> ifM (isUnmodified' key f fc cache)
513				( retval (fc:cache)
514				, return Nothing
515				)
516			Nothing -> return Nothing
517
518prepSendAnnex' :: Key -> Annex (Maybe (FilePath, Annex (Maybe String)))
519prepSendAnnex' key = prepSendAnnex key >>= \case
520	Just (f, checksuccess) ->
521		let checksuccess' = ifM checksuccess
522			( return Nothing
523			, return (Just "content changed while it was being sent")
524			)
525		in return (Just (f, checksuccess'))
526	Nothing -> return Nothing
527
528cleanObjectLoc :: Key -> Annex () -> Annex ()
529cleanObjectLoc key cleaner = do
530	file <- calcRepo (gitAnnexLocation key)
531	void $ tryIO $ thawContentDir file
532	cleaner
533	liftIO $ removeparents file (3 :: Int)
534  where
535	removeparents _ 0 = noop
536	removeparents file n = do
537		let dir = parentDir file
538		maybe noop (const $ removeparents dir (n-1))
539			<=< catchMaybeIO $ removeDirectory (fromRawFilePath dir)
540
541{- Removes a key's file from .git/annex/objects/ -}
542removeAnnex :: ContentRemovalLock -> Annex ()
543removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
544	cleanObjectLoc key $ do
545		secureErase file
546		liftIO $ removeWhenExistsWith R.removeLink file
547		g <- Annex.gitRepo
548		mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g)
549			=<< Database.Keys.getAssociatedFiles key
550		Database.Keys.removeInodeCaches key
551  where
552	-- Check associated pointer file for modifications, and reset if
553	-- it's unmodified.
554	resetpointer file = ifM (isUnmodified key file)
555		( adjustedBranchRefresh (AssociatedFile (Just file)) $
556			depopulatePointerFile key file
557		-- Modified file, so leave it alone.
558		-- If it was a hard link to the annex object,
559		-- that object might have been frozen as part of the
560		-- removal process, so thaw it.
561		, void $ tryIO $ thawContent file
562		)
563
564{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
565 - returns the file it was moved to. -}
566moveBad :: Key -> Annex FilePath
567moveBad key = do
568	src <- calcRepo (gitAnnexLocation key)
569	bad <- fromRepo gitAnnexBadDir
570	let dest = bad P.</> P.takeFileName src
571	let dest' = fromRawFilePath dest
572	createAnnexDirectory (parentDir dest)
573	cleanObjectLoc key $
574		liftIO $ moveFile (fromRawFilePath src) dest'
575	logStatus key InfoMissing
576	return dest'
577
578data KeyLocation = InAnnex | InAnywhere
579
580{- InAnnex only lists keys with content in .git/annex/objects.
581 - InAnywhere lists all keys that have directories in
582 - .git/annex/objects, whether or not the content is present.
583 -}
584listKeys :: KeyLocation -> Annex [Key]
585listKeys keyloc = do
586	dir <- fromRepo gitAnnexObjectDir
587	{- In order to run Annex monad actions within unsafeInterleaveIO,
588	 - the current state is taken and reused. No changes made to this
589	 - state will be preserved.
590	 -}
591	s <- Annex.getState id
592	depth <- gitAnnexLocationDepth <$> Annex.getGitConfig
593	liftIO $ walk s depth (fromRawFilePath dir)
594  where
595	walk s depth dir = do
596		contents <- catchDefaultIO [] (dirContents dir)
597		if depth < 2
598			then do
599				contents' <- filterM (present s) contents
600				let keys = mapMaybe (fileKey . P.takeFileName . toRawFilePath) contents'
601				continue keys []
602			else do
603				let deeper = walk s (depth - 1)
604				continue [] (map deeper contents)
605	continue keys [] = return keys
606	continue keys (a:as) = do
607		{- Force lazy traversal with unsafeInterleaveIO. -}
608		morekeys <- unsafeInterleaveIO a
609		continue (morekeys++keys) as
610
611	inanywhere = case keyloc of
612		InAnywhere -> True
613		_ -> False
614
615	present _ _ | inanywhere = pure True
616	present _ d = presentInAnnex d
617
618	presentInAnnex = doesFileExist . contentfile
619	contentfile d = d </> takeFileName d
620
621{- Things to do to record changes to content when shutting down.
622 -
623 - It's acceptable to avoid committing changes to the branch,
624 - especially if performing a short-lived action.
625 -}
626saveState :: Bool -> Annex ()
627saveState nocommit = doSideAction $ do
628	Annex.Queue.flush
629	Database.Keys.closeDb
630	unless nocommit $
631		whenM (annexAlwaysCommit <$> Annex.getGitConfig) $
632			Annex.Branch.commit =<< Annex.Branch.commitMessage
633
634{- Downloads content from any of a list of urls, displaying a progress
635 - meter.
636 -
637 - Only displays error message if all the urls fail to download.
638 - When listfailedurls is set, lists each url and why it failed.
639 - Otherwise, only displays one error message, from one of the urls
640 - that failed.
641 -}
642downloadUrl :: Bool -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> [Url.URLString] -> FilePath -> Url.UrlOptions -> Annex Bool
643downloadUrl listfailedurls k p iv urls file uo =
644	-- Poll the file to handle configurations where an external
645	-- download command is used.
646	meteredFile file (Just p) k (go urls [])
647  where
648	go (u:us) errs = Url.download' p iv u file uo >>= \case
649		Right () -> return True
650		Left err -> do
651			-- If the incremental verifier was fed anything
652			-- while the download that failed ran, it's unable
653			-- to be used for the other urls.
654			case iv of
655				Just iv' ->
656					liftIO $ positionIncremental iv' >>= \case
657					Just n | n > 0 -> unableIncremental iv'
658					_ -> noop
659				Nothing -> noop
660			go us ((u, err) : errs)
661	go [] [] = return False
662	go [] errs@((_, err):_) = do
663		if listfailedurls
664			then warning $ unlines $ flip map errs $ \(u, err') ->
665				u ++ " " ++ err'
666			else warning err
667		return False
668
669{- Copies a key's content, when present, to a temp file.
670 - This is used to speed up some rsyncs. -}
671preseedTmp :: Key -> FilePath -> Annex Bool
672preseedTmp key file = go =<< inAnnex key
673  where
674	go False = return False
675	go True = do
676		ok <- copy
677		when ok $ thawContent (toRawFilePath file)
678		return ok
679	copy = ifM (liftIO $ doesFileExist file)
680		( return True
681		, do
682			s <- fromRawFilePath <$> (calcRepo $ gitAnnexLocation key)
683			liftIO $ ifM (doesFileExist s)
684				( copyFileExternal CopyTimeStamps s file
685				, return False
686				)
687		)
688
689{- Finds files directly inside a directory like gitAnnexBadDir
690 - (not in subdirectories) and returns the corresponding keys. -}
691dirKeys :: (Git.Repo -> RawFilePath) -> Annex [Key]
692dirKeys dirspec = do
693	dir <- fromRawFilePath <$> fromRepo dirspec
694	ifM (liftIO $ doesDirectoryExist dir)
695		( do
696			contents <- liftIO $ getDirectoryContents dir
697			files <- liftIO $ filterM doesFileExist $
698				map (dir </>) contents
699			return $ mapMaybe (fileKey . P.takeFileName . toRawFilePath) files
700		, return []
701		)
702
703{- Looks in the specified directory for bad/tmp keys, and returns a list
704 - of those that might still have value, or might be stale and removable.
705 -
706 - Also, stale keys that can be proven to have no value
707 - (ie, their content is already present) are deleted.
708 -}
709staleKeysPrune :: (Git.Repo -> RawFilePath) -> Bool -> Annex [Key]
710staleKeysPrune dirspec nottransferred = do
711	contents <- dirKeys dirspec
712
713	dups <- filterM inAnnex contents
714	let stale = contents `exclude` dups
715
716	dir <- fromRepo dirspec
717	forM_ dups $ \k ->
718		pruneTmpWorkDirBefore (dir P.</> keyFile k)
719			(liftIO . R.removeLink)
720
721	if nottransferred
722		then do
723			inprogress <- S.fromList . map (transferKey . fst)
724				<$> getTransfers
725			return $ filter (`S.notMember` inprogress) stale
726		else return stale
727
728{- Prune the work dir associated with the specified content file,
729 - before performing an action that deletes the file, or moves it away.
730 -
731 - This preserves the invariant that the workdir never exists without
732 - the content file.
733 -}
734pruneTmpWorkDirBefore :: RawFilePath -> (RawFilePath -> Annex a) -> Annex a
735pruneTmpWorkDirBefore f action = do
736	let workdir = fromRawFilePath $ gitAnnexTmpWorkDir f
737	liftIO $ whenM (doesDirectoryExist workdir) $
738		removeDirectoryRecursive workdir
739	action f
740
741{- Runs an action, passing it a temporary work directory where
742 - it can write files while receiving the content of a key.
743 -
744 - Preserves the invariant that the workdir never exists without the
745 - content file, by creating an empty content file first.
746 -
747 - On exception, or when the action returns Nothing,
748 - the temporary work directory is retained (unless
749 - empty), so anything in it can be used on resume.
750 -}
751withTmpWorkDir :: Key -> (RawFilePath -> Annex (Maybe a)) -> Annex (Maybe a)
752withTmpWorkDir key action = do
753	-- Create the object file if it does not exist. This way,
754	-- staleKeysPrune only has to look for object files, and can
755	-- clean up gitAnnexTmpWorkDir for those it finds.
756	obj <- prepTmp key
757	let obj' = fromRawFilePath obj
758	unlessM (liftIO $ doesFileExist obj') $ do
759		liftIO $ writeFile obj' ""
760		setAnnexFilePerm obj
761	let tmpdir = gitAnnexTmpWorkDir obj
762	createAnnexDirectory tmpdir
763	res <- action tmpdir
764	case res of
765		Just _ -> liftIO $ removeDirectoryRecursive (fromRawFilePath tmpdir)
766		Nothing -> liftIO $ void $ tryIO $ removeDirectory (fromRawFilePath tmpdir)
767	return res
768
769{- Finds items in the first, smaller list, that are not
770 - present in the second, larger list.
771 -
772 - Constructing a single set, of the list that tends to be
773 - smaller, appears more efficient in both memory and CPU
774 - than constructing and taking the S.difference of two sets. -}
775exclude :: Ord a => [a] -> [a] -> [a]
776exclude [] _ = [] -- optimisation
777exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
778  where
779	remove a b = foldl (flip S.delete) b a
780