1{- git-annex content ingestion
2 -
3 - Copyright 2010-2021 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8module Annex.Ingest (
9	LockedDown(..),
10	LockDownConfig(..),
11	lockDown,
12	checkLockedDownWritePerms,
13	ingestAdd,
14	ingestAdd',
15	ingest,
16	ingest',
17	finishIngestUnlocked,
18	cleanOldKeys,
19	addLink,
20	makeLink,
21	addUnlocked,
22	CheckGitIgnore(..),
23	gitAddParams,
24	addAnnexedFile,
25	addingExistingLink,
26) where
27
28import Annex.Common
29import Types.KeySource
30import Types.FileMatcher
31import Backend
32import Annex.Content
33import Annex.Perms
34import Annex.Link
35import Annex.MetaData
36import Annex.CurrentBranch
37import Annex.CheckIgnore
38import Logs.Location
39import qualified Annex
40import qualified Annex.Queue
41import qualified Database.Keys
42import Config
43import Utility.InodeCache
44import Annex.ReplaceFile
45import Utility.Tmp
46import Utility.CopyFile
47import Utility.Touch
48import Utility.Metered
49import Git.FilePath
50import Annex.InodeSentinal
51import Annex.AdjustedBranch
52import Annex.FileMatcher
53import qualified Utility.RawFilePath as R
54
55data LockedDown = LockedDown
56	{ lockDownConfig :: LockDownConfig
57	, keySource :: KeySource
58	}
59	deriving (Show)
60
61data LockDownConfig = LockDownConfig
62	{ lockingFile :: Bool
63	-- ^ write bit removed during lock down
64	, hardlinkFileTmpDir :: Maybe RawFilePath
65	-- ^ hard link to temp directorya
66	, checkWritePerms :: Bool
67	-- ^ check that write perms are successfully removed
68	}
69	deriving (Show)
70
71{- The file that's being ingested is locked down before a key is generated,
72 - to prevent it from being modified in between. This lock down is not
73 - perfect at best (and pretty weak at worst). For example, it does not
74 - guard against files that are already opened for write by another process.
75 - So, the InodeCache can be used to detect any changes that might be made
76 - to the file after it was locked down.
77 -
78 - When possible, the file is hard linked to a temp directory. This guards
79 - against some changes, like deletion or overwrite of the file, and
80 - allows lsof checks to be done more efficiently when adding a lot of files.
81 -
82 - Lockdown can fail if a file gets deleted, or if it's unable to remove
83 - write permissions, and Nothing will be returned.
84 -}
85lockDown :: LockDownConfig-> FilePath -> Annex (Maybe LockedDown)
86lockDown cfg file = either
87		(\e -> warning (show e) >> return Nothing)
88		(return . Just)
89	=<< lockDown' cfg file
90
91lockDown' :: LockDownConfig -> FilePath -> Annex (Either SomeException LockedDown)
92lockDown' cfg file = tryNonAsync $ ifM crippledFileSystem
93	( nohardlink
94	, case hardlinkFileTmpDir cfg of
95		Nothing -> nohardlink
96		Just tmpdir -> withhardlink tmpdir
97	)
98  where
99	file' = toRawFilePath file
100
101	nohardlink = do
102		setperms
103		withTSDelta $ liftIO . nohardlink'
104
105	nohardlink' delta = do
106		cache <- genInodeCache file' delta
107		return $ LockedDown cfg $ KeySource
108			{ keyFilename = file'
109			, contentLocation = file'
110			, inodeCache = cache
111			}
112
113	withhardlink tmpdir = do
114		setperms
115		withTSDelta $ \delta -> liftIO $ do
116			(tmpfile, h) <- openTmpFileIn (fromRawFilePath tmpdir) $
117				relatedTemplate $ "ingest-" ++ takeFileName file
118			hClose h
119			removeWhenExistsWith R.removeLink (toRawFilePath tmpfile)
120			withhardlink' delta tmpfile
121				`catchIO` const (nohardlink' delta)
122
123	withhardlink' delta tmpfile = do
124		createLink file tmpfile
125		cache <- genInodeCache (toRawFilePath tmpfile) delta
126		return $ LockedDown cfg $ KeySource
127			{ keyFilename = file'
128			, contentLocation = toRawFilePath tmpfile
129			, inodeCache = cache
130			}
131
132	setperms = when (lockingFile cfg) $ do
133		freezeContent file'
134		when (checkWritePerms cfg) $
135			maybe noop giveup =<< checkLockedDownWritePerms file' file'
136
137checkLockedDownWritePerms :: RawFilePath -> RawFilePath -> Annex (Maybe String)
138checkLockedDownWritePerms file displayfile = checkContentWritePerm file >>= return . \case
139	Just False -> Just $ unwords
140		[ "Unable to remove all write permissions from"
141		, fromRawFilePath displayfile
142		, "-- perhaps it has an xattr or ACL set."
143		]
144	_ -> Nothing
145
146{- Ingests a locked down file into the annex. Updates the work tree and
147 - index. -}
148ingestAdd :: CheckGitIgnore -> MeterUpdate -> Maybe LockedDown -> Annex (Maybe Key)
149ingestAdd ci meterupdate ld = ingestAdd' ci meterupdate ld Nothing
150
151ingestAdd' :: CheckGitIgnore -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key)
152ingestAdd' _ _ Nothing _ = return Nothing
153ingestAdd' ci meterupdate ld@(Just (LockedDown cfg source)) mk = do
154	(mk', mic) <- ingest meterupdate ld mk
155	case mk' of
156		Nothing -> return Nothing
157		Just k -> do
158			let f = keyFilename source
159			if lockingFile cfg
160				then addLink ci f k mic
161				else do
162					mode <- liftIO $ catchMaybeIO $
163						fileMode <$> R.getFileStatus (contentLocation source)
164					stagePointerFile f mode =<< hashPointerFile k
165			return (Just k)
166
167{- Ingests a locked down file into the annex. Does not update the working
168 - tree or the index. -}
169ingest :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache)
170ingest meterupdate ld mk = ingest' Nothing meterupdate ld mk (Restage True)
171
172ingest' :: Maybe Backend -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Restage -> Annex (Maybe Key, Maybe InodeCache)
173ingest' _ _ Nothing _ _ = return (Nothing, Nothing)
174ingest' preferredbackend meterupdate (Just (LockedDown cfg source)) mk restage = withTSDelta $ \delta -> do
175	k <- case mk of
176		Nothing -> do
177			backend <- maybe
178				(chooseBackend $ keyFilename source)
179				(return . Just)
180				preferredbackend
181			fst <$> genKey source meterupdate backend
182		Just k -> return k
183	let src = contentLocation source
184	ms <- liftIO $ catchMaybeIO $ R.getFileStatus src
185	mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms
186	case (mcache, inodeCache source) of
187		(_, Nothing) -> go k mcache ms
188		(Just newc, Just c) | compareStrong c newc -> go k mcache ms
189		_ -> failure "changed while it was being added"
190  where
191	go key mcache (Just s)
192		| lockingFile cfg = golocked key mcache s
193		| otherwise = gounlocked key mcache s
194	go _ _ Nothing = failure "failed to generate a key"
195
196	golocked key mcache s =
197		tryNonAsync (moveAnnex key naf (contentLocation source)) >>= \case
198			Right True -> success key mcache s
199			Right False -> giveup "failed to add content to annex"
200			Left e -> restoreFile (keyFilename source) key e
201
202	-- moveAnnex uses the AssociatedFile provided to it to unlock
203	-- locked files when getting a file in an adjusted branch.
204	-- That case does not apply here, where we're adding an unlocked
205	-- file, so provide it nothing.
206	naf = AssociatedFile Nothing
207
208	gounlocked key (Just cache) s = do
209		-- Remove temp directory hard link first because
210		-- linkToAnnex falls back to copying if a file
211		-- already has a hard link.
212		cleanCruft source
213		cleanOldKeys (keyFilename source) key
214		linkToAnnex key (keyFilename source) (Just cache) >>= \case
215			LinkAnnexFailed -> failure "failed to link to annex"
216			lar -> do
217				finishIngestUnlocked' key source restage (Just lar)
218				success key (Just cache) s
219	gounlocked _ _ _ = failure "failed statting file"
220
221	success k mcache s = do
222		genMetaData k (keyFilename source) s
223		return (Just k, mcache)
224
225	failure msg = do
226		warning $ fromRawFilePath (keyFilename source) ++ " " ++ msg
227		cleanCruft source
228		return (Nothing, Nothing)
229
230finishIngestUnlocked :: Key -> KeySource -> Annex ()
231finishIngestUnlocked key source = do
232	cleanCruft source
233	finishIngestUnlocked' key source (Restage True) Nothing
234
235finishIngestUnlocked' :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex ()
236finishIngestUnlocked' key source restage lar = do
237	Database.Keys.addAssociatedFile key
238		=<< inRepo (toTopFilePath (keyFilename source))
239	populateUnlockedFiles key source restage lar
240
241{- Copy to any other unlocked files using the same key.
242 -
243 - When linkToAnnex did not have to do anything, the object file
244 - was already present, and so other unlocked files are already populated,
245 - and nothing needs to be done here.
246 -}
247populateUnlockedFiles :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex ()
248populateUnlockedFiles _ _ _ (Just LinkAnnexNoop) = return ()
249populateUnlockedFiles key source restage _ = do
250	obj <- calcRepo (gitAnnexLocation key)
251	g <- Annex.gitRepo
252	ingestedf <- flip fromTopFilePath g
253		<$> inRepo (toTopFilePath (keyFilename source))
254	afs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key
255	forM_ (filter (/= ingestedf) afs) $
256		populatePointerFile restage key obj
257
258cleanCruft :: KeySource -> Annex ()
259cleanCruft source = when (contentLocation source /= keyFilename source) $
260	liftIO $ removeWhenExistsWith R.removeLink $ contentLocation source
261
262-- If a worktree file was was hard linked to an annex object before,
263-- modifying the file would have caused the object to have the wrong
264-- content. Clean up from that.
265cleanOldKeys :: RawFilePath -> Key -> Annex ()
266cleanOldKeys file newkey = do
267	g <- Annex.gitRepo
268	topf <- inRepo (toTopFilePath file)
269	ingestedf <- fromRepo $ fromTopFilePath topf
270	oldkeys <- filter (/= newkey)
271		<$> Database.Keys.getAssociatedKey topf
272	forM_ oldkeys $ \key ->
273		unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do
274			caches <- Database.Keys.getInodeCaches key
275			unlinkAnnex key
276			fs <- filter (/= ingestedf)
277				. map (`fromTopFilePath` g)
278				<$> Database.Keys.getAssociatedFiles key
279			filterM (`sameInodeCache` caches) fs >>= \case
280				-- If linkToAnnex fails, the associated
281				-- file with the content is still present,
282				-- so no need for any recovery.
283				(f:_) -> do
284					ic <- withTSDelta (liftIO . genInodeCache f)
285					void $ linkToAnnex key f ic
286				_ -> logStatus key InfoMissing
287
288{- On error, put the file back so it doesn't seem to have vanished.
289 - This can be called before or after the symlink is in place. -}
290restoreFile :: RawFilePath -> Key -> SomeException -> Annex a
291restoreFile file key e = do
292	whenM (inAnnex key) $ do
293		liftIO $ removeWhenExistsWith R.removeLink file
294		-- The key could be used by other files too, so leave the
295		-- content in the annex, and make a copy back to the file.
296		obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
297		unlessM (liftIO $ copyFileExternal CopyTimeStamps obj (fromRawFilePath file)) $
298			warning $ "Unable to restore content of " ++ fromRawFilePath file ++ "; it should be located in " ++ obj
299		thawContent file
300	throwM e
301
302{- Creates the symlink to the annexed content, returns the link target. -}
303makeLink :: RawFilePath -> Key -> Maybe InodeCache -> Annex LinkTarget
304makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
305	l <- calcRepo $ gitAnnexLink file key
306	replaceWorkTreeFile file' $ makeAnnexLink l . toRawFilePath
307
308	-- touch symlink to have same time as the original file,
309	-- as provided in the InodeCache
310	case mcache of
311		Just c -> liftIO $ touch file (inodeCacheToMtime c) False
312		Nothing -> noop
313
314	return l
315  where
316	file' = fromRawFilePath file
317
318{- Creates the symlink to the annexed content, and stages it in git.
319 -
320 - As long as the filesystem supports symlinks, we use
321 - git add, rather than directly staging the symlink to git.
322 - Using git add is best because it allows the queuing to work
323 - and is faster (staging the symlink runs hash-object commands each time).
324 - Also, using git add allows it to skip gitignored files, unless forced
325 - to include them.
326 -}
327addLink :: CheckGitIgnore -> RawFilePath -> Key -> Maybe InodeCache -> Annex ()
328addLink ci file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
329	( do
330		_ <- makeLink file key mcache
331		ps <- gitAddParams ci
332		Annex.Queue.addCommand [] "add" (ps++[Param "--"])
333			[fromRawFilePath file]
334	, do
335		l <- makeLink file key mcache
336		addAnnexLink l file
337	)
338
339{- Parameters to pass to git add, forcing addition of ignored files.
340 -
341 - Note that, when git add is being run on an ignored file that is already
342 - checked in, CheckGitIgnore True has no effect.
343 -}
344gitAddParams :: CheckGitIgnore -> Annex [CommandParam]
345gitAddParams (CheckGitIgnore True) = ifM (Annex.getState Annex.force)
346	( return [Param "-f"]
347	, return []
348	)
349gitAddParams (CheckGitIgnore False) = return [Param "-f"]
350
351{- Whether a file should be added unlocked or not. Default is to not,
352 - unless symlinks are not supported. annex.addunlocked can override that.
353 - Also, when in an adjusted branch that unlocked files, always add files
354 - unlocked.
355 -}
356addUnlocked :: AddUnlockedMatcher -> MatchInfo -> Bool -> Annex Bool
357addUnlocked matcher mi contentpresent =
358	((not . coreSymlinks <$> Annex.getGitConfig) <||>
359	 (checkAddUnlockedMatcher matcher mi) <||>
360	 (maybe False go . snd <$> getCurrentBranch)
361	)
362  where
363	go (LinkAdjustment UnlockAdjustment) = True
364	go (LinkAdjustment LockAdjustment) = False
365	go (LinkAdjustment FixAdjustment) = False
366	go (LinkAdjustment UnFixAdjustment) = False
367	go (PresenceAdjustment _ (Just la)) = go (LinkAdjustment la)
368	go (PresenceAdjustment _ Nothing) = False
369	go (LinkPresentAdjustment UnlockPresentAdjustment) = contentpresent
370	go (LinkPresentAdjustment LockPresentAdjustment) = False
371
372{- Adds a file to the work tree for the key, and stages it in the index.
373 - The content of the key may be provided in a temp file, which will be
374 - moved into place. If no content is provided, adds an annex link but does
375 - not ingest the content.
376 -
377 - When the content of the key is not accepted into the annex, returns False.
378 -}
379addAnnexedFile :: CheckGitIgnore -> AddUnlockedMatcher -> RawFilePath -> Key -> Maybe RawFilePath -> Annex Bool
380addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp))
381	( do
382		mode <- maybe
383			(pure Nothing)
384			(\tmp -> liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus tmp)
385			mtmp
386		stagePointerFile file mode =<< hashPointerFile key
387		Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
388		case mtmp of
389			Just tmp -> ifM (moveAnnex key af tmp)
390				( linkunlocked mode >> return True
391				, writepointer mode >> return False
392				)
393			Nothing -> ifM (inAnnex key)
394				( linkunlocked mode >> return True
395				, writepointer mode >> return True
396				)
397	, do
398		addLink ci file key Nothing
399		case mtmp of
400			Just tmp -> moveAnnex key af tmp
401			Nothing -> return True
402	)
403  where
404	af = AssociatedFile (Just file)
405	mi = case mtmp of
406		Just tmp -> MatchingFile $ FileInfo
407			{ contentFile = tmp
408			, matchFile = file
409			, matchKey = Just key
410			}
411		Nothing -> keyMatchInfoWithoutContent key file
412
413	linkunlocked mode = linkFromAnnex key file mode >>= \case
414		LinkAnnexFailed -> writepointer mode
415		_ -> return ()
416
417	writepointer mode = liftIO $ writePointerFile file key mode
418
419{- Use with actions that add an already existing annex symlink or pointer
420 - file. The warning avoids a confusing situation where the file got copied
421 - from another git-annex repo, probably by accident. -}
422addingExistingLink :: RawFilePath -> Key -> Annex a -> Annex a
423addingExistingLink f k a = do
424	unlessM (isKnownKey k <||> inAnnex k) $ do
425		islink <- isJust <$> isAnnexLink f
426		warning $ unwords
427			[ fromRawFilePath f
428			, "is a git-annex"
429			, if islink then "symlink." else "pointer file."
430			, "Its content is not available in this repository."
431			, "(Maybe " ++ fromRawFilePath f ++ " was copied from another repository?)"
432			]
433	a
434