1{- management of the git-annex branch 2 - 3 - Copyright 2011-2021 Joey Hess <id@joeyh.name> 4 - 5 - Licensed under the GNU AGPL version 3 or higher. 6 -} 7 8{-# LANGUAGE OverloadedStrings #-} 9 10module Annex.Branch ( 11 fullname, 12 name, 13 hasOrigin, 14 hasSibling, 15 siblingBranches, 16 create, 17 UpdateMade(..), 18 update, 19 forceUpdate, 20 updateTo, 21 get, 22 getHistorical, 23 RegardingUUID(..), 24 change, 25 maybeChange, 26 commitMessage, 27 createMessage, 28 commit, 29 forceCommit, 30 getBranch, 31 files, 32 rememberTreeish, 33 performTransitions, 34 withIndex, 35 precache, 36 overBranchFileContents, 37) where 38 39import qualified Data.ByteString as B 40import qualified Data.ByteString.Lazy as L 41import qualified Data.ByteString.Char8 as B8 42import qualified Data.Set as S 43import qualified Data.Map as M 44import Data.Function 45import Data.Char 46import Data.ByteString.Builder 47import Control.Concurrent (threadDelay) 48import Control.Concurrent.MVar 49import qualified System.FilePath.ByteString as P 50 51import Annex.Common 52import Types.BranchState 53import Annex.BranchState 54import Annex.Journal 55import Annex.GitOverlay 56import Annex.Tmp 57import qualified Git 58import qualified Git.Command 59import qualified Git.Ref 60import qualified Git.RefLog 61import qualified Git.Sha 62import qualified Git.Branch 63import qualified Git.UnionMerge 64import qualified Git.UpdateIndex 65import qualified Git.Tree 66import qualified Git.LsTree 67import Git.LsTree (lsTreeParams) 68import qualified Git.HashObject 69import Annex.HashObject 70import Git.Types (Ref(..), fromRef, fromRef', RefDate, TreeItemType(..)) 71import Git.FilePath 72import Annex.CatFile 73import Git.CatFile (catObjectStreamLsTree) 74import Annex.Perms 75import Logs 76import Logs.Transitions 77import Logs.File 78import Logs.Trust.Pure 79import Logs.Remote.Pure 80import Logs.Export.Pure 81import Logs.Difference.Pure 82import qualified Annex.Queue 83import Annex.Branch.Transitions 84import qualified Annex 85import Annex.Hook 86import Utility.Directory.Stream 87import Utility.Tmp 88import qualified Utility.RawFilePath as R 89 90{- Name of the branch that is used to store git-annex's information. -} 91name :: Git.Ref 92name = Git.Ref "git-annex" 93 94{- Fully qualified name of the branch. -} 95fullname :: Git.Ref 96fullname = Git.Ref $ "refs/heads/" <> fromRef' name 97 98{- Branch's name in origin. -} 99originname :: Git.Ref 100originname = Git.Ref $ "refs/remotes/origin/" <> fromRef' name 101 102{- Does origin/git-annex exist? -} 103hasOrigin :: Annex Bool 104hasOrigin = inRepo $ Git.Ref.exists originname 105 106{- Does the git-annex branch or a sibling foo/git-annex branch exist? -} 107hasSibling :: Annex Bool 108hasSibling = not . null <$> siblingBranches 109 110{- List of git-annex (shas, branches), including the main one and any 111 - from remotes. Duplicates are filtered out. -} 112siblingBranches :: Annex [(Git.Sha, Git.Branch)] 113siblingBranches = inRepo $ Git.Ref.matchingUniq [name] 114 115{- Creates the branch, if it does not already exist. -} 116create :: Annex () 117create = void getBranch 118 119{- Returns the ref of the branch, creating it first if necessary. -} 120getBranch :: Annex Git.Ref 121getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha 122 where 123 go True = do 124 inRepo $ Git.Command.run 125 [ Param "branch" 126 , Param "--no-track" 127 , Param $ fromRef name 128 , Param $ fromRef originname 129 ] 130 fromMaybe (error $ "failed to create " ++ fromRef name) 131 <$> branchsha 132 go False = withIndex' True $ do 133 cmode <- annexCommitMode <$> Annex.getGitConfig 134 cmessage <- createMessage 135 inRepo $ Git.Branch.commitAlways cmode cmessage fullname [] 136 use sha = do 137 setIndexSha sha 138 return sha 139 branchsha = inRepo $ Git.Ref.sha fullname 140 141{- Ensures that the branch and index are up-to-date; should be 142 - called before data is read from it. Runs only once per git-annex run. -} 143update :: Annex BranchState 144update = runUpdateOnce $ journalClean <$$> updateTo =<< siblingBranches 145 146{- Forces an update even if one has already been run. -} 147forceUpdate :: Annex UpdateMade 148forceUpdate = updateTo =<< siblingBranches 149 150data UpdateMade = UpdateMade 151 { refsWereMerged :: Bool 152 , journalClean :: Bool 153 } 154 155{- Merges the specified Refs into the index, if they have any changes not 156 - already in it. The Branch names are only used in the commit message; 157 - it's even possible that the provided Branches have not been updated to 158 - point to the Refs yet. 159 - 160 - The branch is fast-forwarded if possible, otherwise a merge commit is 161 - made. 162 - 163 - Before Refs are merged into the index, it's important to first stage the 164 - journal into the index. Otherwise, any changes in the journal would 165 - later get staged, and might overwrite changes made during the merge. 166 - This is only done if some of the Refs do need to be merged. 167 - 168 - Also handles performing any Transitions that have not yet been 169 - performed, in either the local branch, or the Refs. 170 - 171 - Returns True if any refs were merged in, False otherwise. 172 -} 173updateTo :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade 174updateTo pairs = ifM (annexMergeAnnexBranches <$> Annex.getGitConfig) 175 ( updateTo' pairs 176 , return (UpdateMade False False) 177 ) 178 179updateTo' :: [(Git.Sha, Git.Branch)] -> Annex UpdateMade 180updateTo' pairs = do 181 -- ensure branch exists, and get its current ref 182 branchref <- getBranch 183 dirty <- journalDirty gitAnnexJournalDir 184 ignoredrefs <- getIgnoredRefs 185 let unignoredrefs = excludeset ignoredrefs pairs 186 tomerge <- if null unignoredrefs 187 then return [] 188 else do 189 mergedrefs <- getMergedRefs 190 filterM isnewer (excludeset mergedrefs unignoredrefs) 191 journalcleaned <- if null tomerge 192 {- Even when no refs need to be merged, the index 193 - may still be updated if the branch has gotten ahead 194 - of the index, or just if the journal is dirty. -} 195 then ifM (needUpdateIndex branchref) 196 ( lockJournal $ \jl -> do 197 forceUpdateIndex jl branchref 198 {- When there are journalled changes 199 - as well as the branch being updated, 200 - a commit needs to be done. -} 201 when dirty $ 202 go branchref dirty [] jl 203 return True 204 , if dirty 205 then ifM (annexAlwaysCommit <$> Annex.getGitConfig) 206 ( do 207 lockJournal $ go branchref dirty [] 208 return True 209 , return False 210 ) 211 else return True 212 ) 213 else do 214 lockJournal $ go branchref dirty tomerge 215 return True 216 journalclean <- if journalcleaned 217 then not <$> privateUUIDsKnown 218 else pure False 219 return $ UpdateMade 220 { refsWereMerged = not (null tomerge) 221 , journalClean = journalclean 222 } 223 where 224 excludeset s = filter (\(r, _) -> S.notMember r s) 225 isnewer (r, _) = inRepo $ Git.Branch.changed fullname r 226 go branchref dirty tomerge jl = stagejournalwhen dirty jl $ do 227 let (refs, branches) = unzip tomerge 228 merge_desc <- if null tomerge 229 then commitMessage 230 else return $ "merging " ++ 231 unwords (map Git.Ref.describe branches) ++ 232 " into " ++ fromRef name 233 localtransitions <- parseTransitionsStrictly "local" 234 <$> getLocal transitionsLog 235 unless (null tomerge) $ do 236 showSideAction merge_desc 237 mapM_ checkBranchDifferences refs 238 mergeIndex jl refs 239 let commitrefs = nub $ fullname:refs 240 ifM (handleTransitions jl localtransitions commitrefs) 241 ( runAnnexHook postUpdateAnnexHook 242 , do 243 ff <- if dirty 244 then return False 245 else inRepo $ Git.Branch.fastForward fullname refs 246 if ff 247 then updateIndex jl branchref 248 else commitIndex jl branchref merge_desc commitrefs 249 ) 250 addMergedRefs tomerge 251 invalidateCache 252 stagejournalwhen dirty jl a 253 | dirty = stageJournal jl a 254 | otherwise = withIndex a 255 256{- Gets the content of a file, which may be in the journal, or in the index 257 - (and committed to the branch). 258 - 259 - Updates the branch if necessary, to ensure the most up-to-date available 260 - content is returned. 261 - 262 - Returns an empty string if the file doesn't exist yet. -} 263get :: RawFilePath -> Annex L.ByteString 264get file = getCache file >>= \case 265 Just content -> return content 266 Nothing -> do 267 st <- update 268 content <- if journalIgnorable st 269 then getRef fullname file 270 else getLocal file 271 setCache file content 272 return content 273 274{- Used to cache the value of a file, which has been read from the branch 275 - using some optimised method. The journal has to be checked, in case 276 - it has a newer version of the file that has not reached the branch yet. 277 -} 278precache :: RawFilePath -> L.ByteString -> Annex () 279precache file branchcontent = do 280 st <- getState 281 content <- if journalIgnorable st 282 then pure branchcontent 283 else fromMaybe branchcontent 284 <$> getJournalFileStale (GetPrivate True) file 285 Annex.BranchState.setCache file content 286 287{- Like get, but does not merge the branch, so the info returned may not 288 - reflect changes in remotes. 289 - (Changing the value this returns, and then merging is always the 290 - same as using get, and then changing its value.) -} 291getLocal :: RawFilePath -> Annex L.ByteString 292getLocal = getLocal' (GetPrivate True) 293 294getLocal' :: GetPrivate -> RawFilePath -> Annex L.ByteString 295getLocal' getprivate file = do 296 fastDebug "Annex.Branch" ("read " ++ fromRawFilePath file) 297 go =<< getJournalFileStale getprivate file 298 where 299 go (Just journalcontent) = return journalcontent 300 go Nothing = getRef fullname file 301 302{- Gets the content of a file as staged in the branch's index. -} 303getStaged :: RawFilePath -> Annex L.ByteString 304getStaged = getRef indexref 305 where 306 -- This makes git cat-file be run with ":file", 307 -- so it looks at the index. 308 indexref = Ref "" 309 310getHistorical :: RefDate -> RawFilePath -> Annex L.ByteString 311getHistorical date file = 312 -- This check avoids some ugly error messages when the reflog 313 -- is empty. 314 ifM (null <$> inRepo (Git.RefLog.get' [Param (fromRef fullname), Param "-n1"])) 315 ( giveup ("No reflog for " ++ fromRef fullname) 316 , getRef (Git.Ref.dateRef fullname date) file 317 ) 318 319getRef :: Ref -> RawFilePath -> Annex L.ByteString 320getRef ref file = withIndex $ catFile ref file 321 322{- Applies a function to modify the content of a file. 323 - 324 - Note that this does not cause the branch to be merged, it only 325 - modifes the current content of the file on the branch. 326 -} 327change :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> content) -> Annex () 328change ru file f = lockJournal $ \jl -> f <$> getToChange ru file >>= set jl ru file 329 330{- Applies a function which can modify the content of a file, or not. -} 331maybeChange :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> Maybe content) -> Annex () 332maybeChange ru file f = lockJournal $ \jl -> do 333 v <- getToChange ru file 334 case f v of 335 Just jv -> 336 let b = journalableByteString jv 337 in when (v /= b) $ set jl ru file b 338 _ -> noop 339 340{- Only get private information when the RegardingUUID is itself private. -} 341getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString 342getToChange ru f = flip getLocal' f . GetPrivate =<< regardingPrivateUUID ru 343 344{- Records new content of a file into the journal. 345 - 346 - This is not exported; all changes have to be made via change. This 347 - ensures that information that was written to the branch is not 348 - overwritten. Also, it avoids a get followed by a set without taking into 349 - account whether private information was gotten from the private 350 - git-annex index, and should not be written to the public git-annex 351 - branch. 352 -} 353set :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex () 354set jl ru f c = do 355 journalChanged 356 setJournalFile jl ru f c 357 fastDebug "Annex.Branch" ("set " ++ fromRawFilePath f) 358 -- Could cache the new content, but it would involve 359 -- evaluating a Journalable Builder twice, which is not very 360 -- efficient. Instead, assume that it's not common to need to read 361 -- a log file immediately after writing it. 362 invalidateCache 363 364{- Commit message used when making a commit of whatever data has changed 365 - to the git-annex brach. -} 366commitMessage :: Annex String 367commitMessage = fromMaybe "update" . annexCommitMessage <$> Annex.getGitConfig 368 369{- Commit message used when creating the branch. -} 370createMessage :: Annex String 371createMessage = fromMaybe "branch created" . annexCommitMessage <$> Annex.getGitConfig 372 373{- Stages the journal, and commits staged changes to the branch. -} 374commit :: String -> Annex () 375commit = whenM (journalDirty gitAnnexJournalDir) . forceCommit 376 377{- Commits the current index to the branch even without any journalled 378 - changes. -} 379forceCommit :: String -> Annex () 380forceCommit message = lockJournal $ \jl -> 381 stageJournal jl $ do 382 ref <- getBranch 383 commitIndex jl ref message [fullname] 384 385{- Commits the staged changes in the index to the branch. 386 - 387 - Ensures that the branch's index file is first updated to merge the state 388 - of the branch at branchref, before running the commit action. This 389 - is needed because the branch may have had changes pushed to it, that 390 - are not yet reflected in the index. 391 - 392 - The branchref value can have been obtained using getBranch at any 393 - previous point, though getting it a long time ago makes the race 394 - more likely to occur. 395 - 396 - Note that changes may be pushed to the branch at any point in time! 397 - So, there's a race. If the commit is made using the newly pushed tip of 398 - the branch as its parent, and that ref has not yet been merged into the 399 - index, then the result is that the commit will revert the pushed 400 - changes, since they have not been merged into the index. This race 401 - is detected and another commit made to fix it. 402 - 403 - (It's also possible for the branch to be overwritten, 404 - losing the commit made here. But that's ok; the data is still in the 405 - index and will get committed again later.) 406 -} 407commitIndex :: JournalLocked -> Git.Ref -> String -> [Git.Ref] -> Annex () 408commitIndex jl branchref message parents = do 409 showStoringStateAction 410 commitIndex' jl branchref message message 0 parents 411commitIndex' :: JournalLocked -> Git.Ref -> String -> String -> Integer -> [Git.Ref] -> Annex () 412commitIndex' jl branchref message basemessage retrynum parents = do 413 updateIndex jl branchref 414 cmode <- annexCommitMode <$> Annex.getGitConfig 415 committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname parents 416 setIndexSha committedref 417 parentrefs <- commitparents <$> catObject committedref 418 when (racedetected branchref parentrefs) $ 419 fixrace committedref parentrefs 420 where 421 -- look for "parent ref" lines and return the refs 422 commitparents = map (Git.Ref . snd) . filter isparent . 423 map (toassoc . L.toStrict) . L.split newline 424 newline = fromIntegral (ord '\n') 425 toassoc = separate' (== (fromIntegral (ord ' '))) 426 isparent (k,_) = k == "parent" 427 428 {- The race can be detected by checking the commit's 429 - parent, which will be the newly pushed branch, 430 - instead of the expected ref that the index was updated to. -} 431 racedetected expectedref parentrefs 432 | expectedref `elem` parentrefs = False -- good parent 433 | otherwise = True -- race! 434 435 {- To recover from the race, union merge the lost refs 436 - into the index. -} 437 fixrace committedref lostrefs = do 438 showSideAction "recovering from race" 439 let retrynum' = retrynum+1 440 -- small sleep to let any activity that caused 441 -- the race settle down 442 liftIO $ threadDelay (100000 + fromInteger retrynum') 443 mergeIndex jl lostrefs 444 let racemessage = basemessage ++ " (recovery from race #" ++ show retrynum' ++ "; expected commit parent " ++ show branchref ++ " but found " ++ show lostrefs ++ " )" 445 commitIndex' jl committedref racemessage basemessage retrynum' [committedref] 446 447{- Lists all files on the branch. including ones in the journal 448 - that have not been committed yet. There may be duplicates in the list. -} 449files :: Annex ([RawFilePath], IO Bool) 450files = do 451 _ <- update 452 (bfs, cleanup) <- branchFiles 453 -- ++ forces the content of the first list to be buffered in 454 -- memory, so use journalledFiles, which should be much smaller 455 -- most of the time. branchFiles will stream as the list is consumed. 456 l <- (++) <$> journalledFiles <*> pure bfs 457 return (l, cleanup) 458 459{- Lists all files currently in the journal. There may be duplicates in 460 - the list when using a private journal. -} 461journalledFiles :: Annex [RawFilePath] 462journalledFiles = ifM privateUUIDsKnown 463 ( (++) 464 <$> getJournalledFilesStale gitAnnexPrivateJournalDir 465 <*> getJournalledFilesStale gitAnnexJournalDir 466 , getJournalledFilesStale gitAnnexJournalDir 467 ) 468 469{- Files in the branch, not including any from journalled changes, 470 - and without updating the branch. -} 471branchFiles :: Annex ([RawFilePath], IO Bool) 472branchFiles = withIndex $ inRepo branchFiles' 473 474branchFiles' :: Git.Repo -> IO ([RawFilePath], IO Bool) 475branchFiles' = Git.Command.pipeNullSplit' $ 476 lsTreeParams Git.LsTree.LsTreeRecursive (Git.LsTree.LsTreeLong False) 477 fullname 478 [Param "--name-only"] 479 480{- Populates the branch's index file with the current branch contents. 481 - 482 - This is only done when the index doesn't yet exist, and the index 483 - is used to build up changes to be commited to the branch, and merge 484 - in changes from other branches. 485 -} 486genIndex :: Git.Repo -> IO () 487genIndex g = Git.UpdateIndex.streamUpdateIndex g 488 [Git.UpdateIndex.lsTree fullname g] 489 490{- Merges the specified refs into the index. 491 - Any changes staged in the index will be preserved. -} 492mergeIndex :: JournalLocked -> [Git.Ref] -> Annex () 493mergeIndex jl branches = do 494 prepareModifyIndex jl 495 hashhandle <- hashObjectHandle 496 withCatFileHandle $ \ch -> 497 inRepo $ \g -> Git.UnionMerge.mergeIndex hashhandle ch g branches 498 499{- Removes any stale git lock file, to avoid git falling over when 500 - updating the index. 501 - 502 - Since all modifications of the index are performed inside this module, 503 - and only when the journal is locked, the fact that the journal has to be 504 - locked when this is called ensures that no other process is currently 505 - modifying the index. So any index.lock file must be stale, caused 506 - by git running when the system crashed, or the repository's disk was 507 - removed, etc. 508 -} 509prepareModifyIndex :: JournalLocked -> Annex () 510prepareModifyIndex _jl = do 511 index <- fromRepo gitAnnexIndex 512 void $ liftIO $ tryIO $ R.removeLink (index <> ".lock") 513 514{- Runs an action using the branch's index file. -} 515withIndex :: Annex a -> Annex a 516withIndex = withIndex' False 517withIndex' :: Bool -> Annex a -> Annex a 518withIndex' bootstrapping a = withIndexFile AnnexIndexFile $ \f -> do 519 checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do 520 unless bootstrapping create 521 createAnnexDirectory $ toRawFilePath $ takeDirectory f 522 unless bootstrapping $ inRepo genIndex 523 a 524 525{- Updates the branch's index to reflect the current contents of the branch. 526 - Any changes staged in the index will be preserved. 527 - 528 - Compares the ref stored in the lock file with the current 529 - ref of the branch to see if an update is needed. 530 -} 531updateIndex :: JournalLocked -> Git.Ref -> Annex () 532updateIndex jl branchref = whenM (needUpdateIndex branchref) $ 533 forceUpdateIndex jl branchref 534 535forceUpdateIndex :: JournalLocked -> Git.Ref -> Annex () 536forceUpdateIndex jl branchref = do 537 withIndex $ mergeIndex jl [fullname] 538 setIndexSha branchref 539 540{- Checks if the index needs to be updated. -} 541needUpdateIndex :: Git.Ref -> Annex Bool 542needUpdateIndex branchref = do 543 f <- fromRawFilePath <$> fromRepo gitAnnexIndexStatus 544 committedref <- Git.Ref . firstLine' <$> 545 liftIO (catchDefaultIO mempty $ B.readFile f) 546 return (committedref /= branchref) 547 548{- Record that the branch's index has been updated to correspond to a 549 - given ref of the branch. -} 550setIndexSha :: Git.Ref -> Annex () 551setIndexSha ref = do 552 f <- fromRepo gitAnnexIndexStatus 553 writeLogFile f $ fromRef ref ++ "\n" 554 runAnnexHook postUpdateAnnexHook 555 556{- Stages the journal into the index, and runs an action that 557 - commits the index to the branch. Note that the action is run 558 - inside withIndex so will automatically use the branch's index. 559 - 560 - Before staging, this removes any existing git index file lock. 561 - This is safe to do because stageJournal is the only thing that 562 - modifies this index file, and only one can run at a time, because 563 - the journal is locked. So any existing git index file lock must be 564 - stale, and the journal must contain any data that was in the process 565 - of being written to the index file when it crashed. 566 -} 567stageJournal :: JournalLocked -> Annex () -> Annex () 568stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do 569 prepareModifyIndex jl 570 g <- gitRepo 571 let dir = gitAnnexJournalDir g 572 (jlogf, jlogh) <- openjlog (fromRawFilePath tmpdir) 573 h <- hashObjectHandle 574 withJournalHandle gitAnnexJournalDir $ \jh -> 575 Git.UpdateIndex.streamUpdateIndex g 576 [genstream dir h jh jlogh] 577 commitindex 578 liftIO $ cleanup (fromRawFilePath dir) jlogh jlogf 579 where 580 genstream dir h jh jlogh streamer = readDirectory jh >>= \case 581 Nothing -> return () 582 Just file -> do 583 unless (dirCruft file) $ do 584 let path = dir P.</> toRawFilePath file 585 sha <- Git.HashObject.hashFile h path 586 hPutStrLn jlogh file 587 streamer $ Git.UpdateIndex.updateIndexLine 588 sha TreeFile (asTopFilePath $ fileJournal $ toRawFilePath file) 589 genstream dir h jh jlogh streamer 590 -- Clean up the staged files, as listed in the temp log file. 591 -- The temp file is used to avoid needing to buffer all the 592 -- filenames in memory. 593 cleanup dir jlogh jlogf = do 594 hFlush jlogh 595 hSeek jlogh AbsoluteSeek 0 596 stagedfs <- lines <$> hGetContents jlogh 597 mapM_ (removeFile . (dir </>)) stagedfs 598 hClose jlogh 599 removeWhenExistsWith (R.removeLink) (toRawFilePath jlogf) 600 openjlog tmpdir = liftIO $ openTmpFileIn tmpdir "jlog" 601 602{- This is run after the refs have been merged into the index, 603 - but before the result is committed to the branch. 604 - (Which is why it's passed the contents of the local branches's 605 - transition log before that merge took place.) 606 - 607 - When the refs contain transitions that have not yet been done locally, 608 - the transitions are performed on the index, and a new branch 609 - is created from the result. 610 - 611 - When there are transitions recorded locally that have not been done 612 - to the remote refs, the transitions are performed in the index, 613 - and committed to the existing branch. In this case, the untransitioned 614 - remote refs cannot be merged into the branch (since transitions 615 - throw away history), so they are added to the list of refs to ignore, 616 - to avoid re-merging content from them again. 617 -} 618handleTransitions :: JournalLocked -> Transitions -> [Git.Ref] -> Annex Bool 619handleTransitions jl localts refs = do 620 m <- M.fromList <$> mapM getreftransition refs 621 let remotets = M.elems m 622 if all (localts ==) remotets 623 then return False 624 else do 625 let allts = combineTransitions (localts:remotets) 626 let (transitionedrefs, untransitionedrefs) = 627 partition (\r -> M.lookup r m == Just allts) refs 628 performTransitionsLocked jl allts (localts /= allts) transitionedrefs 629 ignoreRefs untransitionedrefs 630 return True 631 where 632 getreftransition ref = do 633 ts <- parseTransitionsStrictly "remote" 634 <$> catFile ref transitionsLog 635 return (ref, ts) 636 637{- Performs the specified transitions on the contents of the index file, 638 - commits it to the branch, or creates a new branch. 639 -} 640performTransitions :: Transitions -> Bool -> [Ref] -> Annex () 641performTransitions ts neednewlocalbranch transitionedrefs = lockJournal $ \jl -> 642 performTransitionsLocked jl ts neednewlocalbranch transitionedrefs 643performTransitionsLocked :: JournalLocked -> Transitions -> Bool -> [Ref] -> Annex () 644performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do 645 -- For simplicity & speed, we're going to use the Annex.Queue to 646 -- update the git-annex branch, while it usually holds changes 647 -- for the head branch. Flush any such changes. 648 Annex.Queue.flush 649 -- Stop any running git cat-files, to ensure that the 650 -- getStaged calls below use the current index, and not some older 651 -- one. 652 catFileStop 653 withIndex $ do 654 prepareModifyIndex jl 655 run $ mapMaybe getTransitionCalculator tlist 656 Annex.Queue.flush 657 if neednewlocalbranch 658 then do 659 cmode <- annexCommitMode <$> Annex.getGitConfig 660 committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname transitionedrefs 661 setIndexSha committedref 662 else do 663 ref <- getBranch 664 commitIndex jl ref message (nub $ fullname:transitionedrefs) 665 regraftexports 666 where 667 message 668 | neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc 669 | otherwise = "continuing transition " ++ tdesc 670 tdesc = show $ map describeTransition tlist 671 tlist = knownTransitionList ts 672 673 {- The changes to make to the branch are calculated and applied to 674 - the branch directly, rather than going through the journal, 675 - which would be innefficient. (And the journal is not designed 676 - to hold changes to every file in the branch at once.) 677 - 678 - When a file in the branch is changed by transition code, 679 - its new content is remembered and fed into the code for subsequent 680 - transitions. 681 -} 682 run [] = noop 683 run changers = do 684 config <- Annex.getGitConfig 685 trustmap <- calcTrustMap <$> getStaged trustLog 686 remoteconfigmap <- calcRemoteConfigMap <$> getStaged remoteLog 687 -- partially apply, improves performance 688 let changers' = map (\c -> c trustmap remoteconfigmap config) changers 689 (fs, cleanup) <- branchFiles 690 forM_ fs $ \f -> do 691 content <- getStaged f 692 apply changers' f content 693 liftIO $ void cleanup 694 695 apply [] _ _ = return () 696 apply (changer:rest) file content = case changer file content of 697 PreserveFile -> apply rest file content 698 ChangeFile builder -> do 699 let content' = toLazyByteString builder 700 if L.null content' 701 then do 702 Annex.Queue.addUpdateIndex 703 =<< inRepo (Git.UpdateIndex.unstageFile (fromRawFilePath file)) 704 -- File is deleted; can't run any other 705 -- transitions on it. 706 return () 707 else do 708 sha <- hashBlob content' 709 Annex.Queue.addUpdateIndex $ Git.UpdateIndex.pureStreamer $ 710 Git.UpdateIndex.updateIndexLine sha TreeFile (asTopFilePath file) 711 apply rest file content' 712 713 -- Trees mentioned in export.log were grafted into the old 714 -- git-annex branch to make sure they remain available. Re-graft 715 -- the trees into the new branch. 716 regraftexports = do 717 l <- exportedTreeishes . M.elems . parseExportLogMap 718 <$> getStaged exportLog 719 forM_ l $ \t -> 720 rememberTreeishLocked t (asTopFilePath exportTreeGraftPoint) jl 721 722checkBranchDifferences :: Git.Ref -> Annex () 723checkBranchDifferences ref = do 724 theirdiffs <- allDifferences . parseDifferencesLog 725 <$> catFile ref differenceLog 726 mydiffs <- annexDifferences <$> Annex.getGitConfig 727 when (theirdiffs /= mydiffs) $ 728 giveup "Remote repository is tuned in incompatible way; cannot be merged with local repository." 729 730ignoreRefs :: [Git.Sha] -> Annex () 731ignoreRefs rs = do 732 old <- getIgnoredRefs 733 let s = S.unions [old, S.fromList rs] 734 f <- fromRepo gitAnnexIgnoredRefs 735 writeLogFile f $ 736 unlines $ map fromRef $ S.elems s 737 738getIgnoredRefs :: Annex (S.Set Git.Sha) 739getIgnoredRefs = 740 S.fromList . mapMaybe Git.Sha.extractSha . B8.lines <$> content 741 where 742 content = do 743 f <- fromRawFilePath <$> fromRepo gitAnnexIgnoredRefs 744 liftIO $ catchDefaultIO mempty $ B.readFile f 745 746addMergedRefs :: [(Git.Sha, Git.Branch)] -> Annex () 747addMergedRefs [] = return () 748addMergedRefs new = do 749 old <- getMergedRefs' 750 -- Keep only the newest sha for each branch. 751 let l = nubBy ((==) `on` snd) (new ++ old) 752 f <- fromRepo gitAnnexMergedRefs 753 writeLogFile f $ 754 unlines $ map (\(s, b) -> fromRef s ++ '\t' : fromRef b) l 755 756getMergedRefs :: Annex (S.Set Git.Sha) 757getMergedRefs = S.fromList . map fst <$> getMergedRefs' 758 759getMergedRefs' :: Annex [(Git.Sha, Git.Branch)] 760getMergedRefs' = do 761 f <- fromRawFilePath <$> fromRepo gitAnnexMergedRefs 762 s <- liftIO $ catchDefaultIO mempty $ B.readFile f 763 return $ map parse $ B8.lines s 764 where 765 parse l = 766 let (s, b) = separate' (== (fromIntegral (ord '\t'))) l 767 in (Ref s, Ref b) 768 769{- Grafts a treeish into the branch at the specified location, 770 - and then removes it. This ensures that the treeish won't get garbage 771 - collected, and will always be available as long as the git-annex branch 772 - is available. -} 773rememberTreeish :: Git.Ref -> TopFilePath -> Annex () 774rememberTreeish treeish graftpoint = lockJournal $ rememberTreeishLocked treeish graftpoint 775rememberTreeishLocked :: Git.Ref -> TopFilePath -> JournalLocked -> Annex () 776rememberTreeishLocked treeish graftpoint jl = do 777 branchref <- getBranch 778 updateIndex jl branchref 779 origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$> 780 inRepo (Git.Ref.tree branchref) 781 addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree 782 cmode <- annexCommitMode <$> Annex.getGitConfig 783 c <- inRepo $ Git.Branch.commitTree cmode 784 "graft" [branchref] addedt 785 c' <- inRepo $ Git.Branch.commitTree cmode 786 "graft cleanup" [c] origtree 787 inRepo $ Git.Branch.update' fullname c' 788 -- The tree in c' is the same as the tree in branchref, 789 -- and the index was updated to that above, so it's safe to 790 -- say that the index contains c'. 791 setIndexSha c' 792 793{- Runs an action on the content of selected files from the branch. 794 - This is much faster than reading the content of each file in turn, 795 - because it lets git cat-file stream content without blocking. 796 - 797 - The action is passed a callback that it can repeatedly call to read 798 - the next file and its contents. When there are no more files, the 799 - callback will return Nothing. 800 -} 801overBranchFileContents 802 :: (RawFilePath -> Maybe v) 803 -> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a) 804 -> Annex a 805overBranchFileContents select go = do 806 st <- update 807 g <- Annex.gitRepo 808 (l, cleanup) <- inRepo $ Git.LsTree.lsTree 809 Git.LsTree.LsTreeRecursive 810 (Git.LsTree.LsTreeLong False) 811 fullname 812 let select' f = fmap (\v -> (v, f)) (select f) 813 buf <- liftIO newEmptyMVar 814 let go' reader = go $ liftIO reader >>= \case 815 Just ((v, f), content) -> do 816 -- Check the journal if it did not get 817 -- committed to the branch 818 content' <- if journalIgnorable st 819 then pure content 820 else maybe content Just 821 <$> getJournalFileStale (GetPrivate True) f 822 return (Just (v, f, content')) 823 Nothing 824 | journalIgnorable st -> return Nothing 825 -- The journal did not get committed to the 826 -- branch, and may contain files that 827 -- are not present in the branch, which 828 -- need to be provided to the action still. 829 -- This can cause the action to be run a 830 -- second time with a file it already ran on. 831 | otherwise -> liftIO (tryTakeMVar buf) >>= \case 832 Nothing -> drain buf =<< journalledFiles 833 Just fs -> drain buf fs 834 catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go' 835 `finally` liftIO (void cleanup) 836 where 837 getnext [] = Nothing 838 getnext (f:fs) = case select f of 839 Nothing -> getnext fs 840 Just v -> Just (v, f, fs) 841 842 drain buf fs = case getnext fs of 843 Just (v, f, fs') -> do 844 liftIO $ putMVar buf fs' 845 content <- getJournalFileStale (GetPrivate True) f 846 return (Just (v, f, content)) 847 Nothing -> do 848 liftIO $ putMVar buf [] 849 return Nothing 850