1{- management of the git-annex journal 2 - 3 - The journal is used to queue up changes before they are committed to the 4 - git-annex branch. Among other things, it ensures that if git-annex is 5 - interrupted, its recorded data is not lost. 6 - 7 - Copyright 2011-2021 Joey Hess <id@joeyh.name> 8 - 9 - Licensed under the GNU AGPL version 3 or higher. 10 -} 11 12{-# LANGUAGE OverloadedStrings #-} 13 14module Annex.Journal where 15 16import Annex.Common 17import qualified Annex 18import qualified Git 19import Annex.Perms 20import Annex.Tmp 21import Annex.LockFile 22import Utility.Directory.Stream 23 24import qualified Data.Set as S 25import qualified Data.ByteString.Lazy as L 26import qualified Data.ByteString as B 27import qualified System.FilePath.ByteString as P 28import Data.ByteString.Builder 29import Data.Char 30 31class Journalable t where 32 writeJournalHandle :: Handle -> t -> IO () 33 journalableByteString :: t -> L.ByteString 34 35instance Journalable L.ByteString where 36 writeJournalHandle = L.hPut 37 journalableByteString = id 38 39-- This is more efficient than the ByteString instance. 40instance Journalable Builder where 41 writeJournalHandle = hPutBuilder 42 journalableByteString = toLazyByteString 43 44{- When a file in the git-annex branch is changed, this indicates what 45 - repository UUID (or in some cases, UUIDs) a change is regarding. 46 - 47 - Using this lets changes regarding private UUIDs be stored separately 48 - from the git-annex branch, so its information does not get exposed 49 - outside the repo. 50 -} 51data RegardingUUID = RegardingUUID [UUID] 52 53regardingPrivateUUID :: RegardingUUID -> Annex Bool 54regardingPrivateUUID (RegardingUUID []) = pure False 55regardingPrivateUUID (RegardingUUID us) = do 56 s <- annexPrivateRepos <$> Annex.getGitConfig 57 return (any (flip S.member s) us) 58 59{- Are any private UUIDs known to exist? If so, extra work has to be done, 60 - to check for information separately recorded for them, outside the usual 61 - locations. 62 -} 63privateUUIDsKnown :: Annex Bool 64privateUUIDsKnown = privateUUIDsKnown' <$> Annex.getState id 65 66privateUUIDsKnown' :: Annex.AnnexState -> Bool 67privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig 68 69{- Records content for a file in the branch to the journal. 70 - 71 - Using the journal, rather than immediatly staging content to the index 72 - avoids git needing to rewrite the index after every change. 73 - 74 - The file in the journal is updated atomically, which allows 75 - getJournalFileStale to always return a consistent journal file 76 - content, although possibly not the most current one. 77 -} 78setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex () 79setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do 80 jd <- fromRepo =<< ifM (regardingPrivateUUID ru) 81 ( return gitAnnexPrivateJournalDir 82 , return gitAnnexJournalDir 83 ) 84 createAnnexDirectory jd 85 -- journal file is written atomically 86 let jfile = journalFile file 87 let tmpfile = fromRawFilePath (tmp P.</> jfile) 88 liftIO $ do 89 withFile tmpfile WriteMode $ \h -> writeJournalHandle h content 90 moveFile tmpfile (fromRawFilePath (jd P.</> jfile)) 91 92{- Gets any journalled content for a file in the branch. -} 93getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString) 94getJournalFile _jl = getJournalFileStale 95 96data GetPrivate = GetPrivate Bool 97 98{- Without locking, this is not guaranteed to be the most recent 99 - version of the file in the journal, so should not be used as a basis for 100 - changes. 101 - 102 - The file is read strictly so that its content can safely be fed into 103 - an operation that modifies the file. While setJournalFile doesn't 104 - write directly to journal files and so probably avoids problems with 105 - writing to the same file that's being read, but there could be 106 - concurrency or other issues with a lazy read, and the minor loss of 107 - laziness doesn't matter much, as the files are not very large. 108 -} 109getJournalFileStale :: GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString) 110getJournalFileStale (GetPrivate getprivate) file = do 111 -- Optimisation to avoid a second MVar access. 112 st <- Annex.getState id 113 let g = Annex.repo st 114 liftIO $ 115 if getprivate && privateUUIDsKnown' st 116 then do 117 x <- getfrom (gitAnnexJournalDir g) 118 y <- getfrom (gitAnnexPrivateJournalDir g) 119 -- This concacenation is the same as happens in a 120 -- merge of two git-annex branches. 121 return (x <> y) 122 else getfrom (gitAnnexJournalDir g) 123 where 124 jfile = journalFile file 125 getfrom d = catchMaybeIO $ 126 L.fromStrict <$> B.readFile (fromRawFilePath (d P.</> jfile)) 127 128{- List of existing journal files in a journal directory, but without locking, 129 - may miss new ones just being added, or may have false positives if the 130 - journal is staged as it is run. -} 131getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath] 132getJournalledFilesStale getjournaldir = do 133 g <- gitRepo 134 fs <- liftIO $ catchDefaultIO [] $ 135 getDirectoryContents $ fromRawFilePath (getjournaldir g) 136 return $ filter (`notElem` [".", ".."]) $ 137 map (fileJournal . toRawFilePath) fs 138 139{- Directory handle open on a journal directory. -} 140withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a 141withJournalHandle getjournaldir a = do 142 d <- fromRawFilePath <$> fromRepo getjournaldir 143 bracketIO (openDirectory d) closeDirectory (liftIO . a) 144 145{- Checks if there are changes in the journal. -} 146journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool 147journalDirty getjournaldir = do 148 d <- fromRawFilePath <$> fromRepo getjournaldir 149 liftIO $ 150 (not <$> isDirectoryEmpty d) 151 `catchIO` (const $ doesDirectoryExist d) 152 153{- Produces a filename to use in the journal for a file on the branch. 154 - The filename does not include the journal directory. 155 - 156 - The journal typically won't have a lot of files in it, so the hashing 157 - used in the branch is not necessary, and all the files are put directly 158 - in the journal directory. 159 -} 160journalFile :: RawFilePath -> RawFilePath 161journalFile file = B.concatMap mangle file 162 where 163 mangle c 164 | P.isPathSeparator c = B.singleton underscore 165 | c == underscore = B.pack [underscore, underscore] 166 | otherwise = B.singleton c 167 underscore = fromIntegral (ord '_') 168 169{- Converts a journal file (relative to the journal dir) back to the 170 - filename on the branch. -} 171fileJournal :: RawFilePath -> RawFilePath 172fileJournal = go 173 where 174 go b = 175 let (h, t) = B.break (== underscore) b 176 in h <> case B.uncons t of 177 Nothing -> t 178 Just (_u, t') -> case B.uncons t' of 179 Nothing -> t' 180 Just (w, t'') 181 | w == underscore -> 182 B.cons underscore (go t'') 183 | otherwise -> 184 B.cons P.pathSeparator (go t') 185 186 underscore = fromIntegral (ord '_') 187 188{- Sentinal value, only produced by lockJournal; required 189 - as a parameter by things that need to ensure the journal is 190 - locked. -} 191data JournalLocked = ProduceJournalLocked 192 193{- Runs an action that modifies the journal, using locking to avoid 194 - contention with other git-annex processes. -} 195lockJournal :: (JournalLocked -> Annex a) -> Annex a 196lockJournal a = withExclusiveLock gitAnnexJournalLock $ a ProduceJournalLocked 197