1{- management of the git-annex branch
2 -
3 - Copyright 2011-2021 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8{-# LANGUAGE OverloadedStrings #-}
9
10module Annex.Branch (
11	fullname,
12	name,
13	hasOrigin,
14	hasSibling,
15	siblingBranches,
16	create,
17	UpdateMade(..),
18	update,
19	forceUpdate,
20	updateTo,
21	get,
22	getHistorical,
23	RegardingUUID(..),
24	change,
25	maybeChange,
26	commitMessage,
27	createMessage,
28	commit,
29	forceCommit,
30	getBranch,
31	files,
32	rememberTreeish,
33	performTransitions,
34	withIndex,
35	precache,
36	overBranchFileContents,
37) where
38
39import qualified Data.ByteString as B
40import qualified Data.ByteString.Lazy as L
41import qualified Data.ByteString.Char8 as B8
42import qualified Data.Set as S
43import qualified Data.Map as M
44import Data.Function
45import Data.Char
46import Data.ByteString.Builder
47import Control.Concurrent (threadDelay)
48import Control.Concurrent.MVar
49import qualified System.FilePath.ByteString as P
50
51import Annex.Common
52import Types.BranchState
53import Annex.BranchState
54import Annex.Journal
55import Annex.GitOverlay
56import Annex.Tmp
57import qualified Git
58import qualified Git.Command
59import qualified Git.Ref
60import qualified Git.RefLog
61import qualified Git.Sha
62import qualified Git.Branch
63import qualified Git.UnionMerge
64import qualified Git.UpdateIndex
65import qualified Git.Tree
66import qualified Git.LsTree
67import Git.LsTree (lsTreeParams)
68import qualified Git.HashObject
69import Annex.HashObject
70import Git.Types (Ref(..), fromRef, fromRef', RefDate, TreeItemType(..))
71import Git.FilePath
72import Annex.CatFile
73import Git.CatFile (catObjectStreamLsTree)
74import Annex.Perms
75import Logs
76import Logs.Transitions
77import Logs.File
78import Logs.Trust.Pure
79import Logs.Remote.Pure
80import Logs.Export.Pure
81import Logs.Difference.Pure
82import qualified Annex.Queue
83import Annex.Branch.Transitions
84import qualified Annex
85import Annex.Hook
86import Utility.Directory.Stream
87import Utility.Tmp
88import qualified Utility.RawFilePath as R
89
90{- Name of the branch that is used to store git-annex's information. -}
91name :: Git.Ref
92name = Git.Ref "git-annex"
93
94{- Fully qualified name of the branch. -}
95fullname :: Git.Ref
96fullname = Git.Ref $ "refs/heads/" <> fromRef' name
97
98{- Branch's name in origin. -}
99originname :: Git.Ref
100originname = Git.Ref $ "refs/remotes/origin/" <> fromRef' name
101
102{- Does origin/git-annex exist? -}
103hasOrigin :: Annex Bool
104hasOrigin = inRepo $ Git.Ref.exists originname
105
106{- Does the git-annex branch or a sibling foo/git-annex branch exist? -}
107hasSibling :: Annex Bool
108hasSibling = not . null <$> siblingBranches
109
110{- List of git-annex (shas, branches), including the main one and any
111 - from remotes. Duplicates are filtered out. -}
112siblingBranches :: Annex [(Git.Sha, Git.Branch)]
113siblingBranches = inRepo $ Git.Ref.matchingUniq [name]
114
115{- Creates the branch, if it does not already exist. -}
116create :: Annex ()
117create = void getBranch
118
119{- Returns the ref of the branch, creating it first if necessary. -}
120getBranch :: Annex Git.Ref
121getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha
122  where
123	go True = do
124		inRepo $ Git.Command.run
125			[ Param "branch"
126			, Param "--no-track"
127			, Param $ fromRef name
128			, Param $ fromRef originname
129			]
130		fromMaybe (error $ "failed to create " ++ fromRef name)
131			<$> branchsha
132	go False = withIndex' True $ do
133		cmode <- annexCommitMode <$> Annex.getGitConfig
134		cmessage <- createMessage
135		inRepo $ Git.Branch.commitAlways cmode cmessage fullname []
136	use sha = do
137		setIndexSha sha
138		return sha
139	branchsha = inRepo $ Git.Ref.sha fullname
140
141{- Ensures that the branch and index are up-to-date; should be
142 - called before data is read from it. Runs only once per git-annex run. -}
143update :: Annex BranchState
144update = runUpdateOnce $ journalClean <$$> updateTo =<< siblingBranches
145
146{- Forces an update even if one has already been run. -}
147forceUpdate :: Annex UpdateMade
148forceUpdate = updateTo =<< siblingBranches
149
150data UpdateMade = UpdateMade
151	{ refsWereMerged :: Bool
152	, journalClean :: Bool
153	}
154
155{- Merges the specified Refs into the index, if they have any changes not
156 - already in it. The Branch names are only used in the commit message;
157 - it's even possible that the provided Branches have not been updated to
158 - point to the Refs yet.
159 -
160 - The branch is fast-forwarded if possible, otherwise a merge commit is
161 - made.
162 -
163 - Before Refs are merged into the index, it's important to first stage the
164 - journal into the index. Otherwise, any changes in the journal would
165 - later get staged, and might overwrite changes made during the merge.
166 - This is only done if some of the Refs do need to be merged.
167 -
168 - Also handles performing any Transitions that have not yet been
169 - performed, in either the local branch, or the Refs.
170 -
171 - Returns True if any refs were merged in, False otherwise.
172 -}
173updateTo :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
174updateTo pairs = ifM (annexMergeAnnexBranches <$> Annex.getGitConfig)
175	( updateTo' pairs
176	, return (UpdateMade False False)
177	)
178
179updateTo' :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade
180updateTo' pairs = do
181	-- ensure branch exists, and get its current ref
182	branchref <- getBranch
183	dirty <- journalDirty gitAnnexJournalDir
184	ignoredrefs <- getIgnoredRefs
185	let unignoredrefs = excludeset ignoredrefs pairs
186	tomerge <- if null unignoredrefs
187		then return []
188		else do
189			mergedrefs <- getMergedRefs
190			filterM isnewer (excludeset mergedrefs unignoredrefs)
191	journalcleaned <- if null tomerge
192		{- Even when no refs need to be merged, the index
193		 - may still be updated if the branch has gotten ahead
194		 - of the index, or just if the journal is dirty. -}
195		then ifM (needUpdateIndex branchref)
196			( lockJournal $ \jl -> do
197				forceUpdateIndex jl branchref
198				{- When there are journalled changes
199				 - as well as the branch being updated,
200				 - a commit needs to be done. -}
201				when dirty $
202					go branchref dirty [] jl
203				return True
204			, if dirty
205				then ifM (annexAlwaysCommit <$> Annex.getGitConfig)
206					( do
207						lockJournal $ go branchref dirty []
208						return True
209					, return False
210					)
211				else return True
212			)
213		else do
214			lockJournal $ go branchref dirty tomerge
215			return True
216	journalclean <- if journalcleaned
217		then not <$> privateUUIDsKnown
218		else pure False
219	return $ UpdateMade
220		{ refsWereMerged = not (null tomerge)
221		, journalClean = journalclean
222		}
223  where
224	excludeset s = filter (\(r, _) -> S.notMember r s)
225	isnewer (r, _) = inRepo $ Git.Branch.changed fullname r
226	go branchref dirty tomerge jl = stagejournalwhen dirty jl $ do
227		let (refs, branches) = unzip tomerge
228		merge_desc <- if null tomerge
229			then commitMessage
230			else return $ "merging " ++
231				unwords (map Git.Ref.describe branches) ++
232				" into " ++ fromRef name
233		localtransitions <- parseTransitionsStrictly "local"
234			<$> getLocal transitionsLog
235		unless (null tomerge) $ do
236			showSideAction merge_desc
237			mapM_ checkBranchDifferences refs
238			mergeIndex jl refs
239		let commitrefs = nub $ fullname:refs
240		ifM (handleTransitions jl localtransitions commitrefs)
241			( runAnnexHook postUpdateAnnexHook
242			, do
243				ff <- if dirty
244					then return False
245					else inRepo $ Git.Branch.fastForward fullname refs
246				if ff
247					then updateIndex jl branchref
248					else commitIndex jl branchref merge_desc commitrefs
249			)
250		addMergedRefs tomerge
251		invalidateCache
252	stagejournalwhen dirty jl a
253		| dirty = stageJournal jl a
254		| otherwise = withIndex a
255
256{- Gets the content of a file, which may be in the journal, or in the index
257 - (and committed to the branch).
258 -
259 - Updates the branch if necessary, to ensure the most up-to-date available
260 - content is returned.
261 -
262 - Returns an empty string if the file doesn't exist yet. -}
263get :: RawFilePath -> Annex L.ByteString
264get file = getCache file >>= \case
265	Just content -> return content
266	Nothing -> do
267		st <- update
268		content <- if journalIgnorable st
269			then getRef fullname file
270			else getLocal file
271		setCache file content
272		return content
273
274{- Used to cache the value of a file, which has been read from the branch
275 - using some optimised method. The journal has to be checked, in case
276 - it has a newer version of the file that has not reached the branch yet.
277 -}
278precache :: RawFilePath -> L.ByteString -> Annex ()
279precache file branchcontent = do
280	st <- getState
281	content <- if journalIgnorable st
282		then pure branchcontent
283		else fromMaybe branchcontent
284			<$> getJournalFileStale (GetPrivate True) file
285	Annex.BranchState.setCache file content
286
287{- Like get, but does not merge the branch, so the info returned may not
288 - reflect changes in remotes.
289 - (Changing the value this returns, and then merging is always the
290 - same as using get, and then changing its value.) -}
291getLocal :: RawFilePath -> Annex L.ByteString
292getLocal = getLocal' (GetPrivate True)
293
294getLocal' :: GetPrivate -> RawFilePath -> Annex L.ByteString
295getLocal' getprivate file = do
296	fastDebug "Annex.Branch" ("read " ++ fromRawFilePath file)
297	go =<< getJournalFileStale getprivate file
298  where
299	go (Just journalcontent) = return journalcontent
300	go Nothing = getRef fullname file
301
302{- Gets the content of a file as staged in the branch's index. -}
303getStaged :: RawFilePath -> Annex L.ByteString
304getStaged = getRef indexref
305  where
306	-- This makes git cat-file be run with ":file",
307	-- so it looks at the index.
308	indexref = Ref ""
309
310getHistorical :: RefDate -> RawFilePath -> Annex L.ByteString
311getHistorical date file =
312	-- This check avoids some ugly error messages when the reflog
313	-- is empty.
314	ifM (null <$> inRepo (Git.RefLog.get' [Param (fromRef fullname), Param "-n1"]))
315		( giveup ("No reflog for " ++ fromRef fullname)
316		, getRef (Git.Ref.dateRef fullname date) file
317		)
318
319getRef :: Ref -> RawFilePath -> Annex L.ByteString
320getRef ref file = withIndex $ catFile ref file
321
322{- Applies a function to modify the content of a file.
323 -
324 - Note that this does not cause the branch to be merged, it only
325 - modifes the current content of the file on the branch.
326 -}
327change :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> content) -> Annex ()
328change ru file f = lockJournal $ \jl -> f <$> getToChange ru file >>= set jl ru file
329
330{- Applies a function which can modify the content of a file, or not. -}
331maybeChange :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> Maybe content) -> Annex ()
332maybeChange ru file f = lockJournal $ \jl -> do
333	v <- getToChange ru file
334	case f v of
335		Just jv ->
336			let b = journalableByteString jv
337			in when (v /= b) $ set jl ru file b
338		_ -> noop
339
340{- Only get private information when the RegardingUUID is itself private. -}
341getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString
342getToChange ru f = flip getLocal' f . GetPrivate =<< regardingPrivateUUID ru
343
344{- Records new content of a file into the journal.
345 -
346 - This is not exported; all changes have to be made via change. This
347 - ensures that information that was written to the branch is not
348 - overwritten. Also, it avoids a get followed by a set without taking into
349 - account whether private information was gotten from the private
350 - git-annex index, and should not be written to the public git-annex
351 - branch.
352 -}
353set :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
354set jl ru f c = do
355	journalChanged
356	setJournalFile jl ru f c
357	fastDebug "Annex.Branch" ("set " ++ fromRawFilePath f)
358	-- Could cache the new content, but it would involve
359	-- evaluating a Journalable Builder twice, which is not very
360	-- efficient. Instead, assume that it's not common to need to read
361	-- a log file immediately after writing it.
362	invalidateCache
363
364{- Commit message used when making a commit of whatever data has changed
365 - to the git-annex brach. -}
366commitMessage :: Annex String
367commitMessage = fromMaybe "update" . annexCommitMessage <$> Annex.getGitConfig
368
369{- Commit message used when creating the branch. -}
370createMessage :: Annex String
371createMessage = fromMaybe "branch created" . annexCommitMessage <$> Annex.getGitConfig
372
373{- Stages the journal, and commits staged changes to the branch. -}
374commit :: String -> Annex ()
375commit = whenM (journalDirty gitAnnexJournalDir) . forceCommit
376
377{- Commits the current index to the branch even without any journalled
378 - changes. -}
379forceCommit :: String -> Annex ()
380forceCommit message = lockJournal $ \jl ->
381	stageJournal jl $ do
382		ref <- getBranch
383		commitIndex jl ref message [fullname]
384
385{- Commits the staged changes in the index to the branch.
386 -
387 - Ensures that the branch's index file is first updated to merge the state
388 - of the branch at branchref, before running the commit action. This
389 - is needed because the branch may have had changes pushed to it, that
390 - are not yet reflected in the index.
391 -
392 - The branchref value can have been obtained using getBranch at any
393 - previous point, though getting it a long time ago makes the race
394 - more likely to occur.
395 -
396 - Note that changes may be pushed to the branch at any point in time!
397 - So, there's a race. If the commit is made using the newly pushed tip of
398 - the branch as its parent, and that ref has not yet been merged into the
399 - index, then the result is that the commit will revert the pushed
400 - changes, since they have not been merged into the index. This race
401 - is detected and another commit made to fix it.
402 -
403 - (It's also possible for the branch to be overwritten,
404 - losing the commit made here. But that's ok; the data is still in the
405 - index and will get committed again later.)
406 -}
407commitIndex :: JournalLocked -> Git.Ref -> String -> [Git.Ref] -> Annex ()
408commitIndex jl branchref message parents = do
409	showStoringStateAction
410	commitIndex' jl branchref message message 0 parents
411commitIndex' :: JournalLocked -> Git.Ref -> String -> String -> Integer -> [Git.Ref] -> Annex ()
412commitIndex' jl branchref message basemessage retrynum parents = do
413	updateIndex jl branchref
414	cmode <- annexCommitMode <$> Annex.getGitConfig
415	committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname parents
416	setIndexSha committedref
417	parentrefs <- commitparents <$> catObject committedref
418	when (racedetected branchref parentrefs) $
419		fixrace committedref parentrefs
420  where
421	-- look for "parent ref" lines and return the refs
422	commitparents = map (Git.Ref . snd) . filter isparent .
423		map (toassoc . L.toStrict) . L.split newline
424	newline = fromIntegral (ord '\n')
425	toassoc = separate' (== (fromIntegral (ord ' ')))
426	isparent (k,_) = k == "parent"
427
428	{- The race can be detected by checking the commit's
429	 - parent, which will be the newly pushed branch,
430	 - instead of the expected ref that the index was updated to. -}
431	racedetected expectedref parentrefs
432		| expectedref `elem` parentrefs = False -- good parent
433		| otherwise = True -- race!
434
435	{- To recover from the race, union merge the lost refs
436	 - into the index. -}
437	fixrace committedref lostrefs = do
438		showSideAction "recovering from race"
439		let retrynum' = retrynum+1
440		-- small sleep to let any activity that caused
441		-- the race settle down
442		liftIO $ threadDelay (100000 + fromInteger retrynum')
443		mergeIndex jl lostrefs
444		let racemessage = basemessage ++ " (recovery from race #" ++ show retrynum' ++ "; expected commit parent " ++ show branchref ++ " but found " ++ show lostrefs ++ " )"
445		commitIndex' jl committedref racemessage basemessage retrynum' [committedref]
446
447{- Lists all files on the branch. including ones in the journal
448 - that have not been committed yet. There may be duplicates in the list. -}
449files :: Annex ([RawFilePath], IO Bool)
450files = do
451	_  <- update
452	(bfs, cleanup) <- branchFiles
453	-- ++ forces the content of the first list to be buffered in
454	-- memory, so use journalledFiles, which should be much smaller
455	-- most of the time. branchFiles will stream as the list is consumed.
456	l <- (++) <$> journalledFiles <*> pure bfs
457	return (l, cleanup)
458
459{- Lists all files currently in the journal. There may be duplicates in
460 - the list when using a private journal. -}
461journalledFiles :: Annex [RawFilePath]
462journalledFiles = ifM privateUUIDsKnown
463	( (++)
464		<$> getJournalledFilesStale gitAnnexPrivateJournalDir
465		<*> getJournalledFilesStale gitAnnexJournalDir
466	, getJournalledFilesStale gitAnnexJournalDir
467	)
468
469{- Files in the branch, not including any from journalled changes,
470 - and without updating the branch. -}
471branchFiles :: Annex ([RawFilePath], IO Bool)
472branchFiles = withIndex $ inRepo branchFiles'
473
474branchFiles' :: Git.Repo -> IO ([RawFilePath], IO Bool)
475branchFiles' = Git.Command.pipeNullSplit' $
476	lsTreeParams Git.LsTree.LsTreeRecursive (Git.LsTree.LsTreeLong False)
477		fullname
478		[Param "--name-only"]
479
480{- Populates the branch's index file with the current branch contents.
481 -
482 - This is only done when the index doesn't yet exist, and the index
483 - is used to build up changes to be commited to the branch, and merge
484 - in changes from other branches.
485 -}
486genIndex :: Git.Repo -> IO ()
487genIndex g = Git.UpdateIndex.streamUpdateIndex g
488	[Git.UpdateIndex.lsTree fullname g]
489
490{- Merges the specified refs into the index.
491 - Any changes staged in the index will be preserved. -}
492mergeIndex :: JournalLocked -> [Git.Ref] -> Annex ()
493mergeIndex jl branches = do
494	prepareModifyIndex jl
495	hashhandle <- hashObjectHandle
496	withCatFileHandle $ \ch ->
497		inRepo $ \g -> Git.UnionMerge.mergeIndex hashhandle ch g branches
498
499{- Removes any stale git lock file, to avoid git falling over when
500 - updating the index.
501 -
502 - Since all modifications of the index are performed inside this module,
503 - and only when the journal is locked, the fact that the journal has to be
504 - locked when this is called ensures that no other process is currently
505 - modifying the index. So any index.lock file must be stale, caused
506 - by git running when the system crashed, or the repository's disk was
507 - removed, etc.
508 -}
509prepareModifyIndex :: JournalLocked -> Annex ()
510prepareModifyIndex _jl = do
511	index <- fromRepo gitAnnexIndex
512	void $ liftIO $ tryIO $ R.removeLink (index <> ".lock")
513
514{- Runs an action using the branch's index file. -}
515withIndex :: Annex a -> Annex a
516withIndex = withIndex' False
517withIndex' :: Bool -> Annex a -> Annex a
518withIndex' bootstrapping a = withIndexFile AnnexIndexFile $ \f -> do
519	checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do
520		unless bootstrapping create
521		createAnnexDirectory $ toRawFilePath $ takeDirectory f
522		unless bootstrapping $ inRepo genIndex
523	a
524
525{- Updates the branch's index to reflect the current contents of the branch.
526 - Any changes staged in the index will be preserved.
527 -
528 - Compares the ref stored in the lock file with the current
529 - ref of the branch to see if an update is needed.
530 -}
531updateIndex :: JournalLocked -> Git.Ref -> Annex ()
532updateIndex jl branchref = whenM (needUpdateIndex branchref) $
533	forceUpdateIndex jl branchref
534
535forceUpdateIndex :: JournalLocked -> Git.Ref -> Annex ()
536forceUpdateIndex jl branchref = do
537	withIndex $ mergeIndex jl [fullname]
538	setIndexSha branchref
539
540{- Checks if the index needs to be updated. -}
541needUpdateIndex :: Git.Ref -> Annex Bool
542needUpdateIndex branchref = do
543	f <- fromRawFilePath <$> fromRepo gitAnnexIndexStatus
544	committedref <- Git.Ref . firstLine' <$>
545		liftIO (catchDefaultIO mempty $ B.readFile f)
546	return (committedref /= branchref)
547
548{- Record that the branch's index has been updated to correspond to a
549 - given ref of the branch. -}
550setIndexSha :: Git.Ref -> Annex ()
551setIndexSha ref = do
552	f <- fromRepo gitAnnexIndexStatus
553	writeLogFile f $ fromRef ref ++ "\n"
554	runAnnexHook postUpdateAnnexHook
555
556{- Stages the journal into the index, and runs an action that
557 - commits the index to the branch. Note that the action is run
558 - inside withIndex so will automatically use the branch's index.
559 -
560 - Before staging, this removes any existing git index file lock.
561 - This is safe to do because stageJournal is the only thing that
562 - modifies this index file, and only one can run at a time, because
563 - the journal is locked. So any existing git index file lock must be
564 - stale, and the journal must contain any data that was in the process
565 - of being written to the index file when it crashed.
566 -}
567stageJournal :: JournalLocked -> Annex () -> Annex ()
568stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
569	prepareModifyIndex jl
570	g <- gitRepo
571	let dir = gitAnnexJournalDir g
572	(jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir)
573	h <- hashObjectHandle
574	withJournalHandle gitAnnexJournalDir $ \jh ->
575		Git.UpdateIndex.streamUpdateIndex g
576			[genstream dir h jh jlogh]
577	commitindex
578	liftIO $ cleanup (fromRawFilePath dir) jlogh jlogf
579  where
580	genstream dir h jh jlogh streamer = readDirectory jh >>= \case
581		Nothing -> return ()
582		Just file -> do
583			unless (dirCruft file) $ do
584				let path = dir P.</> toRawFilePath file
585				sha <- Git.HashObject.hashFile h path
586				hPutStrLn jlogh file
587				streamer $ Git.UpdateIndex.updateIndexLine
588					sha TreeFile (asTopFilePath $ fileJournal $ toRawFilePath file)
589			genstream dir h jh jlogh streamer
590	-- Clean up the staged files, as listed in the temp log file.
591	-- The temp file is used to avoid needing to buffer all the
592	-- filenames in memory.
593	cleanup dir jlogh jlogf = do
594		hFlush jlogh
595		hSeek jlogh AbsoluteSeek 0
596		stagedfs <- lines <$> hGetContents jlogh
597		mapM_ (removeFile . (dir </>)) stagedfs
598		hClose jlogh
599		removeWhenExistsWith (R.removeLink) (toRawFilePath jlogf)
600	openjlog tmpdir = liftIO $ openTmpFileIn tmpdir "jlog"
601
602{- This is run after the refs have been merged into the index,
603 - but before the result is committed to the branch.
604 - (Which is why it's passed the contents of the local branches's
605 - transition log before that merge took place.)
606 -
607 - When the refs contain transitions that have not yet been done locally,
608 - the transitions are performed on the index, and a new branch
609 - is created from the result.
610 -
611 - When there are transitions recorded locally that have not been done
612 - to the remote refs, the transitions are performed in the index,
613 - and committed to the existing branch. In this case, the untransitioned
614 - remote refs cannot be merged into the branch (since transitions
615 - throw away history), so they are added to the list of refs to ignore,
616 - to avoid re-merging content from them again.
617 -}
618handleTransitions :: JournalLocked -> Transitions -> [Git.Ref] -> Annex Bool
619handleTransitions jl localts refs = do
620	m <- M.fromList <$> mapM getreftransition refs
621	let remotets = M.elems m
622	if all (localts ==) remotets
623		then return False
624		else do
625			let allts = combineTransitions (localts:remotets)
626			let (transitionedrefs, untransitionedrefs) =
627				partition (\r -> M.lookup r m == Just allts) refs
628			performTransitionsLocked jl allts (localts /= allts) transitionedrefs
629			ignoreRefs untransitionedrefs
630			return True
631  where
632	getreftransition ref = do
633		ts <- parseTransitionsStrictly "remote"
634			<$> catFile ref transitionsLog
635		return (ref, ts)
636
637{- Performs the specified transitions on the contents of the index file,
638 - commits it to the branch, or creates a new branch.
639 -}
640performTransitions :: Transitions -> Bool -> [Ref] -> Annex ()
641performTransitions ts neednewlocalbranch transitionedrefs = lockJournal $ \jl ->
642	performTransitionsLocked jl ts neednewlocalbranch transitionedrefs
643performTransitionsLocked :: JournalLocked -> Transitions -> Bool -> [Ref] -> Annex ()
644performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do
645	-- For simplicity & speed, we're going to use the Annex.Queue to
646	-- update the git-annex branch, while it usually holds changes
647	-- for the head branch. Flush any such changes.
648	Annex.Queue.flush
649	-- Stop any running git cat-files, to ensure that the
650	-- getStaged calls below use the current index, and not some older
651	-- one.
652	catFileStop
653	withIndex $ do
654		prepareModifyIndex jl
655		run $ mapMaybe getTransitionCalculator tlist
656		Annex.Queue.flush
657		if neednewlocalbranch
658			then do
659				cmode <- annexCommitMode <$> Annex.getGitConfig
660				committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname transitionedrefs
661				setIndexSha committedref
662			else do
663				ref <- getBranch
664				commitIndex jl ref message (nub $ fullname:transitionedrefs)
665	regraftexports
666  where
667	message
668		| neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc
669		| otherwise = "continuing transition " ++ tdesc
670	tdesc = show $ map describeTransition tlist
671	tlist = knownTransitionList ts
672
673	{- The changes to make to the branch are calculated and applied to
674	 - the branch directly, rather than going through the journal,
675	 - which would be innefficient. (And the journal is not designed
676	 - to hold changes to every file in the branch at once.)
677	 -
678	 - When a file in the branch is changed by transition code,
679	 - its new content is remembered and fed into the code for subsequent
680	 - transitions.
681	 -}
682	run [] = noop
683	run changers = do
684		config <- Annex.getGitConfig
685		trustmap <- calcTrustMap <$> getStaged trustLog
686		remoteconfigmap <- calcRemoteConfigMap <$> getStaged remoteLog
687		-- partially apply, improves performance
688		let changers' = map (\c -> c trustmap remoteconfigmap config) changers
689		(fs, cleanup) <- branchFiles
690		forM_ fs $ \f -> do
691			content <- getStaged f
692			apply changers' f content
693		liftIO $ void cleanup
694
695	apply [] _ _ = return ()
696	apply (changer:rest) file content = case changer file content of
697		PreserveFile -> apply rest file content
698		ChangeFile builder -> do
699			let content' = toLazyByteString builder
700			if L.null content'
701				then do
702					Annex.Queue.addUpdateIndex
703						=<< inRepo (Git.UpdateIndex.unstageFile (fromRawFilePath file))
704					-- File is deleted; can't run any other
705					-- transitions on it.
706					return ()
707				else do
708					sha <- hashBlob content'
709					Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $
710						Git.UpdateIndex.updateIndexLine sha TreeFile (asTopFilePath file)
711					apply rest file content'
712
713	-- Trees mentioned in export.log were grafted into the old
714	-- git-annex branch to make sure they remain available. Re-graft
715	-- the trees into the new branch.
716	regraftexports = do
717		l <- exportedTreeishes . M.elems . parseExportLogMap
718			<$> getStaged exportLog
719		forM_ l $ \t ->
720			rememberTreeishLocked t (asTopFilePath exportTreeGraftPoint) jl
721
722checkBranchDifferences :: Git.Ref -> Annex ()
723checkBranchDifferences ref = do
724	theirdiffs <- allDifferences . parseDifferencesLog
725		<$> catFile ref differenceLog
726	mydiffs <- annexDifferences <$> Annex.getGitConfig
727	when (theirdiffs /= mydiffs) $
728		giveup "Remote repository is tuned in incompatible way; cannot be merged with local repository."
729
730ignoreRefs :: [Git.Sha] -> Annex ()
731ignoreRefs rs = do
732	old <- getIgnoredRefs
733	let s = S.unions [old, S.fromList rs]
734	f <- fromRepo gitAnnexIgnoredRefs
735	writeLogFile f $
736		unlines $ map fromRef $ S.elems s
737
738getIgnoredRefs :: Annex (S.Set Git.Sha)
739getIgnoredRefs =
740	S.fromList . mapMaybe Git.Sha.extractSha . B8.lines <$> content
741  where
742	content = do
743		f <- fromRawFilePath <$> fromRepo gitAnnexIgnoredRefs
744		liftIO $ catchDefaultIO mempty $ B.readFile f
745
746addMergedRefs :: [(Git.Sha, Git.Branch)] -> Annex ()
747addMergedRefs [] = return ()
748addMergedRefs new = do
749	old <- getMergedRefs'
750	-- Keep only the newest sha for each branch.
751	let l = nubBy ((==) `on` snd) (new ++ old)
752	f <- fromRepo gitAnnexMergedRefs
753	writeLogFile f $
754		unlines $ map (\(s, b) -> fromRef s ++ '\t' : fromRef b) l
755
756getMergedRefs :: Annex (S.Set Git.Sha)
757getMergedRefs = S.fromList . map fst <$> getMergedRefs'
758
759getMergedRefs' :: Annex [(Git.Sha, Git.Branch)]
760getMergedRefs' = do
761	f <- fromRawFilePath <$> fromRepo gitAnnexMergedRefs
762	s <- liftIO $ catchDefaultIO mempty $ B.readFile f
763	return $ map parse $ B8.lines s
764  where
765	parse l =
766		let (s, b) = separate' (== (fromIntegral (ord '\t'))) l
767		in (Ref s, Ref b)
768
769{- Grafts a treeish into the branch at the specified location,
770 - and then removes it. This ensures that the treeish won't get garbage
771 - collected, and will always be available as long as the git-annex branch
772 - is available. -}
773rememberTreeish :: Git.Ref -> TopFilePath -> Annex ()
774rememberTreeish treeish graftpoint = lockJournal $ rememberTreeishLocked treeish graftpoint
775rememberTreeishLocked :: Git.Ref -> TopFilePath -> JournalLocked -> Annex ()
776rememberTreeishLocked treeish graftpoint jl = do
777	branchref <- getBranch
778	updateIndex jl branchref
779	origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$>
780		inRepo (Git.Ref.tree branchref)
781	addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree
782	cmode <- annexCommitMode <$> Annex.getGitConfig
783	c <- inRepo $ Git.Branch.commitTree cmode
784		"graft" [branchref] addedt
785	c' <- inRepo $ Git.Branch.commitTree cmode
786		"graft cleanup" [c] origtree
787	inRepo $ Git.Branch.update' fullname c'
788	-- The tree in c' is the same as the tree in branchref,
789	-- and the index was updated to that above, so it's safe to
790	-- say that the index contains c'.
791	setIndexSha c'
792
793{- Runs an action on the content of selected files from the branch.
794 - This is much faster than reading the content of each file in turn,
795 - because it lets git cat-file stream content without blocking.
796 -
797 - The action is passed a callback that it can repeatedly call to read
798 - the next file and its contents. When there are no more files, the
799 - callback will return Nothing.
800 -}
801overBranchFileContents
802	:: (RawFilePath -> Maybe v)
803	-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a)
804	-> Annex a
805overBranchFileContents select go = do
806	st <- update
807	g <- Annex.gitRepo
808	(l, cleanup) <- inRepo $ Git.LsTree.lsTree
809		Git.LsTree.LsTreeRecursive
810		(Git.LsTree.LsTreeLong False)
811		fullname
812	let select' f = fmap (\v -> (v, f)) (select f)
813	buf <- liftIO newEmptyMVar
814	let go' reader = go $ liftIO reader >>= \case
815		Just ((v, f), content) -> do
816			-- Check the journal if it did not get
817			-- committed to the branch
818			content' <- if journalIgnorable st
819				then pure content
820				else maybe content Just
821					<$> getJournalFileStale (GetPrivate True) f
822			return (Just (v, f, content'))
823		Nothing
824			| journalIgnorable st -> return Nothing
825			-- The journal did not get committed to the
826			-- branch, and may contain files that
827			-- are not present in the branch, which
828			-- need to be provided to the action still.
829			-- This can cause the action to be run a
830			-- second time with a file it already ran on.
831			| otherwise -> liftIO (tryTakeMVar buf) >>= \case
832				Nothing -> drain buf =<< journalledFiles
833				Just fs -> drain buf fs
834	catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
835		`finally` liftIO (void cleanup)
836  where
837	getnext [] = Nothing
838	getnext (f:fs) = case select f of
839		Nothing -> getnext fs
840		Just v -> Just (v, f, fs)
841
842	drain buf fs = case getnext fs of
843		Just (v, f, fs') -> do
844			liftIO $ putMVar buf fs'
845			content <- getJournalFileStale (GetPrivate True) f
846			return (Just (v, f, content))
847		Nothing -> do
848			liftIO $ putMVar buf []
849			return Nothing
850