1{- management of the git-annex journal
2 -
3 - The journal is used to queue up changes before they are committed to the
4 - git-annex branch. Among other things, it ensures that if git-annex is
5 - interrupted, its recorded data is not lost.
6 -
7 - Copyright 2011-2021 Joey Hess <id@joeyh.name>
8 -
9 - Licensed under the GNU AGPL version 3 or higher.
10 -}
11
12{-# LANGUAGE OverloadedStrings #-}
13
14module Annex.Journal where
15
16import Annex.Common
17import qualified Annex
18import qualified Git
19import Annex.Perms
20import Annex.Tmp
21import Annex.LockFile
22import Utility.Directory.Stream
23
24import qualified Data.Set as S
25import qualified Data.ByteString.Lazy as L
26import qualified Data.ByteString as B
27import qualified System.FilePath.ByteString as P
28import Data.ByteString.Builder
29import Data.Char
30
31class Journalable t where
32	writeJournalHandle :: Handle -> t -> IO ()
33	journalableByteString :: t -> L.ByteString
34
35instance Journalable L.ByteString where
36	writeJournalHandle = L.hPut
37	journalableByteString = id
38
39-- This is more efficient than the ByteString instance.
40instance Journalable Builder where
41	writeJournalHandle = hPutBuilder
42	journalableByteString = toLazyByteString
43
44{- When a file in the git-annex branch is changed, this indicates what
45 - repository UUID (or in some cases, UUIDs) a change is regarding.
46 -
47 - Using this lets changes regarding private UUIDs be stored separately
48 - from the git-annex branch, so its information does not get exposed
49 - outside the repo.
50 -}
51data RegardingUUID = RegardingUUID [UUID]
52
53regardingPrivateUUID :: RegardingUUID -> Annex Bool
54regardingPrivateUUID (RegardingUUID []) = pure False
55regardingPrivateUUID (RegardingUUID us) = do
56	s <- annexPrivateRepos <$> Annex.getGitConfig
57	return (any (flip S.member s) us)
58
59{- Are any private UUIDs known to exist? If so, extra work has to be done,
60 - to check for information separately recorded for them, outside the usual
61 - locations.
62 -}
63privateUUIDsKnown :: Annex Bool
64privateUUIDsKnown = privateUUIDsKnown' <$> Annex.getState id
65
66privateUUIDsKnown' :: Annex.AnnexState -> Bool
67privateUUIDsKnown' = not . S.null . annexPrivateRepos . Annex.gitconfig
68
69{- Records content for a file in the branch to the journal.
70 -
71 - Using the journal, rather than immediatly staging content to the index
72 - avoids git needing to rewrite the index after every change.
73 -
74 - The file in the journal is updated atomically, which allows
75 - getJournalFileStale to always return a consistent journal file
76 - content, although possibly not the most current one.
77 -}
78setJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> content -> Annex ()
79setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
80	jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
81		( return gitAnnexPrivateJournalDir
82		, return gitAnnexJournalDir
83		)
84	createAnnexDirectory jd
85	-- journal file is written atomically
86	let jfile = journalFile file
87	let tmpfile = fromRawFilePath (tmp P.</> jfile)
88	liftIO $ do
89		withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
90		moveFile tmpfile (fromRawFilePath (jd P.</> jfile))
91
92{- Gets any journalled content for a file in the branch. -}
93getJournalFile :: JournalLocked -> GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
94getJournalFile _jl = getJournalFileStale
95
96data GetPrivate = GetPrivate Bool
97
98{- Without locking, this is not guaranteed to be the most recent
99 - version of the file in the journal, so should not be used as a basis for
100 - changes.
101 -
102 - The file is read strictly so that its content can safely be fed into
103 - an operation that modifies the file. While setJournalFile doesn't
104 - write directly to journal files and so probably avoids problems with
105 - writing to the same file that's being read, but there could be
106 - concurrency or other issues with a lazy read, and the minor loss of
107 - laziness doesn't matter much, as the files are not very large.
108 -}
109getJournalFileStale :: GetPrivate -> RawFilePath -> Annex (Maybe L.ByteString)
110getJournalFileStale (GetPrivate getprivate) file = do
111	-- Optimisation to avoid a second MVar access.
112	st <- Annex.getState id
113	let g = Annex.repo st
114	liftIO $
115		if getprivate && privateUUIDsKnown' st
116		then do
117			x <- getfrom (gitAnnexJournalDir g)
118			y <- getfrom (gitAnnexPrivateJournalDir g)
119			-- This concacenation is the same as happens in a
120			-- merge of two git-annex branches.
121			return (x <> y)
122		else getfrom (gitAnnexJournalDir g)
123  where
124	jfile = journalFile file
125	getfrom d = catchMaybeIO $
126		L.fromStrict <$> B.readFile (fromRawFilePath (d P.</> jfile))
127
128{- List of existing journal files in a journal directory, but without locking,
129 - may miss new ones just being added, or may have false positives if the
130 - journal is staged as it is run. -}
131getJournalledFilesStale :: (Git.Repo -> RawFilePath) -> Annex [RawFilePath]
132getJournalledFilesStale getjournaldir = do
133	g <- gitRepo
134	fs <- liftIO $ catchDefaultIO [] $
135		getDirectoryContents $ fromRawFilePath (getjournaldir g)
136	return $ filter (`notElem` [".", ".."]) $
137		map (fileJournal . toRawFilePath) fs
138
139{- Directory handle open on a journal directory. -}
140withJournalHandle :: (Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
141withJournalHandle getjournaldir a = do
142	d <- fromRawFilePath <$> fromRepo getjournaldir
143	bracketIO (openDirectory d) closeDirectory (liftIO . a)
144
145{- Checks if there are changes in the journal. -}
146journalDirty :: (Git.Repo -> RawFilePath) -> Annex Bool
147journalDirty getjournaldir = do
148	d <- fromRawFilePath <$> fromRepo getjournaldir
149	liftIO $
150		(not <$> isDirectoryEmpty d)
151			`catchIO` (const $ doesDirectoryExist d)
152
153{- Produces a filename to use in the journal for a file on the branch.
154 - The filename does not include the journal directory.
155 -
156 - The journal typically won't have a lot of files in it, so the hashing
157 - used in the branch is not necessary, and all the files are put directly
158 - in the journal directory.
159 -}
160journalFile :: RawFilePath -> RawFilePath
161journalFile file = B.concatMap mangle file
162  where
163	mangle c
164		| P.isPathSeparator c = B.singleton underscore
165		| c == underscore = B.pack [underscore, underscore]
166		| otherwise = B.singleton c
167	underscore = fromIntegral (ord '_')
168
169{- Converts a journal file (relative to the journal dir) back to the
170 - filename on the branch. -}
171fileJournal :: RawFilePath -> RawFilePath
172fileJournal = go
173  where
174	go b =
175		let (h, t) = B.break (== underscore) b
176		in h <> case B.uncons t of
177			Nothing -> t
178			Just (_u, t') -> case B.uncons t' of
179				Nothing -> t'
180				Just (w, t'')
181					| w == underscore ->
182						B.cons underscore (go t'')
183					| otherwise ->
184						B.cons P.pathSeparator (go t')
185
186	underscore = fromIntegral (ord '_')
187
188{- Sentinal value, only produced by lockJournal; required
189 - as a parameter by things that need to ensure the journal is
190 - locked. -}
191data JournalLocked = ProduceJournalLocked
192
193{- Runs an action that modifies the journal, using locking to avoid
194 - contention with other git-annex processes. -}
195lockJournal :: (JournalLocked -> Annex a) -> Annex a
196lockJournal a = withExclusiveLock gitAnnexJournalLock $ a ProduceJournalLocked
197