1{- git-annex content ingestion 2 - 3 - Copyright 2010-2021 Joey Hess <id@joeyh.name> 4 - 5 - Licensed under the GNU AGPL version 3 or higher. 6 -} 7 8module Annex.Ingest ( 9 LockedDown(..), 10 LockDownConfig(..), 11 lockDown, 12 checkLockedDownWritePerms, 13 ingestAdd, 14 ingestAdd', 15 ingest, 16 ingest', 17 finishIngestUnlocked, 18 cleanOldKeys, 19 addLink, 20 makeLink, 21 addUnlocked, 22 CheckGitIgnore(..), 23 gitAddParams, 24 addAnnexedFile, 25 addingExistingLink, 26) where 27 28import Annex.Common 29import Types.KeySource 30import Types.FileMatcher 31import Backend 32import Annex.Content 33import Annex.Perms 34import Annex.Link 35import Annex.MetaData 36import Annex.CurrentBranch 37import Annex.CheckIgnore 38import Logs.Location 39import qualified Annex 40import qualified Annex.Queue 41import qualified Database.Keys 42import Config 43import Utility.InodeCache 44import Annex.ReplaceFile 45import Utility.Tmp 46import Utility.CopyFile 47import Utility.Touch 48import Utility.Metered 49import Git.FilePath 50import Annex.InodeSentinal 51import Annex.AdjustedBranch 52import Annex.FileMatcher 53import qualified Utility.RawFilePath as R 54 55data LockedDown = LockedDown 56 { lockDownConfig :: LockDownConfig 57 , keySource :: KeySource 58 } 59 deriving (Show) 60 61data LockDownConfig = LockDownConfig 62 { lockingFile :: Bool 63 -- ^ write bit removed during lock down 64 , hardlinkFileTmpDir :: Maybe RawFilePath 65 -- ^ hard link to temp directorya 66 , checkWritePerms :: Bool 67 -- ^ check that write perms are successfully removed 68 } 69 deriving (Show) 70 71{- The file that's being ingested is locked down before a key is generated, 72 - to prevent it from being modified in between. This lock down is not 73 - perfect at best (and pretty weak at worst). For example, it does not 74 - guard against files that are already opened for write by another process. 75 - So, the InodeCache can be used to detect any changes that might be made 76 - to the file after it was locked down. 77 - 78 - When possible, the file is hard linked to a temp directory. This guards 79 - against some changes, like deletion or overwrite of the file, and 80 - allows lsof checks to be done more efficiently when adding a lot of files. 81 - 82 - Lockdown can fail if a file gets deleted, or if it's unable to remove 83 - write permissions, and Nothing will be returned. 84 -} 85lockDown :: LockDownConfig-> FilePath -> Annex (Maybe LockedDown) 86lockDown cfg file = either 87 (\e -> warning (show e) >> return Nothing) 88 (return . Just) 89 =<< lockDown' cfg file 90 91lockDown' :: LockDownConfig -> FilePath -> Annex (Either SomeException LockedDown) 92lockDown' cfg file = tryNonAsync $ ifM crippledFileSystem 93 ( nohardlink 94 , case hardlinkFileTmpDir cfg of 95 Nothing -> nohardlink 96 Just tmpdir -> withhardlink tmpdir 97 ) 98 where 99 file' = toRawFilePath file 100 101 nohardlink = do 102 setperms 103 withTSDelta $ liftIO . nohardlink' 104 105 nohardlink' delta = do 106 cache <- genInodeCache file' delta 107 return $ LockedDown cfg $ KeySource 108 { keyFilename = file' 109 , contentLocation = file' 110 , inodeCache = cache 111 } 112 113 withhardlink tmpdir = do 114 setperms 115 withTSDelta $ \delta -> liftIO $ do 116 (tmpfile, h) <- openTmpFileIn (fromRawFilePath tmpdir) $ 117 relatedTemplate $ "ingest-" ++ takeFileName file 118 hClose h 119 removeWhenExistsWith R.removeLink (toRawFilePath tmpfile) 120 withhardlink' delta tmpfile 121 `catchIO` const (nohardlink' delta) 122 123 withhardlink' delta tmpfile = do 124 createLink file tmpfile 125 cache <- genInodeCache (toRawFilePath tmpfile) delta 126 return $ LockedDown cfg $ KeySource 127 { keyFilename = file' 128 , contentLocation = toRawFilePath tmpfile 129 , inodeCache = cache 130 } 131 132 setperms = when (lockingFile cfg) $ do 133 freezeContent file' 134 when (checkWritePerms cfg) $ 135 maybe noop giveup =<< checkLockedDownWritePerms file' file' 136 137checkLockedDownWritePerms :: RawFilePath -> RawFilePath -> Annex (Maybe String) 138checkLockedDownWritePerms file displayfile = checkContentWritePerm file >>= return . \case 139 Just False -> Just $ unwords 140 [ "Unable to remove all write permissions from" 141 , fromRawFilePath displayfile 142 , "-- perhaps it has an xattr or ACL set." 143 ] 144 _ -> Nothing 145 146{- Ingests a locked down file into the annex. Updates the work tree and 147 - index. -} 148ingestAdd :: CheckGitIgnore -> MeterUpdate -> Maybe LockedDown -> Annex (Maybe Key) 149ingestAdd ci meterupdate ld = ingestAdd' ci meterupdate ld Nothing 150 151ingestAdd' :: CheckGitIgnore -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key) 152ingestAdd' _ _ Nothing _ = return Nothing 153ingestAdd' ci meterupdate ld@(Just (LockedDown cfg source)) mk = do 154 (mk', mic) <- ingest meterupdate ld mk 155 case mk' of 156 Nothing -> return Nothing 157 Just k -> do 158 let f = keyFilename source 159 if lockingFile cfg 160 then addLink ci f k mic 161 else do 162 mode <- liftIO $ catchMaybeIO $ 163 fileMode <$> R.getFileStatus (contentLocation source) 164 stagePointerFile f mode =<< hashPointerFile k 165 return (Just k) 166 167{- Ingests a locked down file into the annex. Does not update the working 168 - tree or the index. -} 169ingest :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key, Maybe InodeCache) 170ingest meterupdate ld mk = ingest' Nothing meterupdate ld mk (Restage True) 171 172ingest' :: Maybe Backend -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Restage -> Annex (Maybe Key, Maybe InodeCache) 173ingest' _ _ Nothing _ _ = return (Nothing, Nothing) 174ingest' preferredbackend meterupdate (Just (LockedDown cfg source)) mk restage = withTSDelta $ \delta -> do 175 k <- case mk of 176 Nothing -> do 177 backend <- maybe 178 (chooseBackend $ keyFilename source) 179 (return . Just) 180 preferredbackend 181 fst <$> genKey source meterupdate backend 182 Just k -> return k 183 let src = contentLocation source 184 ms <- liftIO $ catchMaybeIO $ R.getFileStatus src 185 mcache <- maybe (pure Nothing) (liftIO . toInodeCache delta src) ms 186 case (mcache, inodeCache source) of 187 (_, Nothing) -> go k mcache ms 188 (Just newc, Just c) | compareStrong c newc -> go k mcache ms 189 _ -> failure "changed while it was being added" 190 where 191 go key mcache (Just s) 192 | lockingFile cfg = golocked key mcache s 193 | otherwise = gounlocked key mcache s 194 go _ _ Nothing = failure "failed to generate a key" 195 196 golocked key mcache s = 197 tryNonAsync (moveAnnex key naf (contentLocation source)) >>= \case 198 Right True -> success key mcache s 199 Right False -> giveup "failed to add content to annex" 200 Left e -> restoreFile (keyFilename source) key e 201 202 -- moveAnnex uses the AssociatedFile provided to it to unlock 203 -- locked files when getting a file in an adjusted branch. 204 -- That case does not apply here, where we're adding an unlocked 205 -- file, so provide it nothing. 206 naf = AssociatedFile Nothing 207 208 gounlocked key (Just cache) s = do 209 -- Remove temp directory hard link first because 210 -- linkToAnnex falls back to copying if a file 211 -- already has a hard link. 212 cleanCruft source 213 cleanOldKeys (keyFilename source) key 214 linkToAnnex key (keyFilename source) (Just cache) >>= \case 215 LinkAnnexFailed -> failure "failed to link to annex" 216 lar -> do 217 finishIngestUnlocked' key source restage (Just lar) 218 success key (Just cache) s 219 gounlocked _ _ _ = failure "failed statting file" 220 221 success k mcache s = do 222 genMetaData k (keyFilename source) s 223 return (Just k, mcache) 224 225 failure msg = do 226 warning $ fromRawFilePath (keyFilename source) ++ " " ++ msg 227 cleanCruft source 228 return (Nothing, Nothing) 229 230finishIngestUnlocked :: Key -> KeySource -> Annex () 231finishIngestUnlocked key source = do 232 cleanCruft source 233 finishIngestUnlocked' key source (Restage True) Nothing 234 235finishIngestUnlocked' :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex () 236finishIngestUnlocked' key source restage lar = do 237 Database.Keys.addAssociatedFile key 238 =<< inRepo (toTopFilePath (keyFilename source)) 239 populateUnlockedFiles key source restage lar 240 241{- Copy to any other unlocked files using the same key. 242 - 243 - When linkToAnnex did not have to do anything, the object file 244 - was already present, and so other unlocked files are already populated, 245 - and nothing needs to be done here. 246 -} 247populateUnlockedFiles :: Key -> KeySource -> Restage -> Maybe LinkAnnexResult -> Annex () 248populateUnlockedFiles _ _ _ (Just LinkAnnexNoop) = return () 249populateUnlockedFiles key source restage _ = do 250 obj <- calcRepo (gitAnnexLocation key) 251 g <- Annex.gitRepo 252 ingestedf <- flip fromTopFilePath g 253 <$> inRepo (toTopFilePath (keyFilename source)) 254 afs <- map (`fromTopFilePath` g) <$> Database.Keys.getAssociatedFiles key 255 forM_ (filter (/= ingestedf) afs) $ 256 populatePointerFile restage key obj 257 258cleanCruft :: KeySource -> Annex () 259cleanCruft source = when (contentLocation source /= keyFilename source) $ 260 liftIO $ removeWhenExistsWith R.removeLink $ contentLocation source 261 262-- If a worktree file was was hard linked to an annex object before, 263-- modifying the file would have caused the object to have the wrong 264-- content. Clean up from that. 265cleanOldKeys :: RawFilePath -> Key -> Annex () 266cleanOldKeys file newkey = do 267 g <- Annex.gitRepo 268 topf <- inRepo (toTopFilePath file) 269 ingestedf <- fromRepo $ fromTopFilePath topf 270 oldkeys <- filter (/= newkey) 271 <$> Database.Keys.getAssociatedKey topf 272 forM_ oldkeys $ \key -> 273 unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do 274 caches <- Database.Keys.getInodeCaches key 275 unlinkAnnex key 276 fs <- filter (/= ingestedf) 277 . map (`fromTopFilePath` g) 278 <$> Database.Keys.getAssociatedFiles key 279 filterM (`sameInodeCache` caches) fs >>= \case 280 -- If linkToAnnex fails, the associated 281 -- file with the content is still present, 282 -- so no need for any recovery. 283 (f:_) -> do 284 ic <- withTSDelta (liftIO . genInodeCache f) 285 void $ linkToAnnex key f ic 286 _ -> logStatus key InfoMissing 287 288{- On error, put the file back so it doesn't seem to have vanished. 289 - This can be called before or after the symlink is in place. -} 290restoreFile :: RawFilePath -> Key -> SomeException -> Annex a 291restoreFile file key e = do 292 whenM (inAnnex key) $ do 293 liftIO $ removeWhenExistsWith R.removeLink file 294 -- The key could be used by other files too, so leave the 295 -- content in the annex, and make a copy back to the file. 296 obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key) 297 unlessM (liftIO $ copyFileExternal CopyTimeStamps obj (fromRawFilePath file)) $ 298 warning $ "Unable to restore content of " ++ fromRawFilePath file ++ "; it should be located in " ++ obj 299 thawContent file 300 throwM e 301 302{- Creates the symlink to the annexed content, returns the link target. -} 303makeLink :: RawFilePath -> Key -> Maybe InodeCache -> Annex LinkTarget 304makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do 305 l <- calcRepo $ gitAnnexLink file key 306 replaceWorkTreeFile file' $ makeAnnexLink l . toRawFilePath 307 308 -- touch symlink to have same time as the original file, 309 -- as provided in the InodeCache 310 case mcache of 311 Just c -> liftIO $ touch file (inodeCacheToMtime c) False 312 Nothing -> noop 313 314 return l 315 where 316 file' = fromRawFilePath file 317 318{- Creates the symlink to the annexed content, and stages it in git. 319 - 320 - As long as the filesystem supports symlinks, we use 321 - git add, rather than directly staging the symlink to git. 322 - Using git add is best because it allows the queuing to work 323 - and is faster (staging the symlink runs hash-object commands each time). 324 - Also, using git add allows it to skip gitignored files, unless forced 325 - to include them. 326 -} 327addLink :: CheckGitIgnore -> RawFilePath -> Key -> Maybe InodeCache -> Annex () 328addLink ci file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) 329 ( do 330 _ <- makeLink file key mcache 331 ps <- gitAddParams ci 332 Annex.Queue.addCommand [] "add" (ps++[Param "--"]) 333 [fromRawFilePath file] 334 , do 335 l <- makeLink file key mcache 336 addAnnexLink l file 337 ) 338 339{- Parameters to pass to git add, forcing addition of ignored files. 340 - 341 - Note that, when git add is being run on an ignored file that is already 342 - checked in, CheckGitIgnore True has no effect. 343 -} 344gitAddParams :: CheckGitIgnore -> Annex [CommandParam] 345gitAddParams (CheckGitIgnore True) = ifM (Annex.getState Annex.force) 346 ( return [Param "-f"] 347 , return [] 348 ) 349gitAddParams (CheckGitIgnore False) = return [Param "-f"] 350 351{- Whether a file should be added unlocked or not. Default is to not, 352 - unless symlinks are not supported. annex.addunlocked can override that. 353 - Also, when in an adjusted branch that unlocked files, always add files 354 - unlocked. 355 -} 356addUnlocked :: AddUnlockedMatcher -> MatchInfo -> Bool -> Annex Bool 357addUnlocked matcher mi contentpresent = 358 ((not . coreSymlinks <$> Annex.getGitConfig) <||> 359 (checkAddUnlockedMatcher matcher mi) <||> 360 (maybe False go . snd <$> getCurrentBranch) 361 ) 362 where 363 go (LinkAdjustment UnlockAdjustment) = True 364 go (LinkAdjustment LockAdjustment) = False 365 go (LinkAdjustment FixAdjustment) = False 366 go (LinkAdjustment UnFixAdjustment) = False 367 go (PresenceAdjustment _ (Just la)) = go (LinkAdjustment la) 368 go (PresenceAdjustment _ Nothing) = False 369 go (LinkPresentAdjustment UnlockPresentAdjustment) = contentpresent 370 go (LinkPresentAdjustment LockPresentAdjustment) = False 371 372{- Adds a file to the work tree for the key, and stages it in the index. 373 - The content of the key may be provided in a temp file, which will be 374 - moved into place. If no content is provided, adds an annex link but does 375 - not ingest the content. 376 - 377 - When the content of the key is not accepted into the annex, returns False. 378 -} 379addAnnexedFile :: CheckGitIgnore -> AddUnlockedMatcher -> RawFilePath -> Key -> Maybe RawFilePath -> Annex Bool 380addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp)) 381 ( do 382 mode <- maybe 383 (pure Nothing) 384 (\tmp -> liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus tmp) 385 mtmp 386 stagePointerFile file mode =<< hashPointerFile key 387 Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file) 388 case mtmp of 389 Just tmp -> ifM (moveAnnex key af tmp) 390 ( linkunlocked mode >> return True 391 , writepointer mode >> return False 392 ) 393 Nothing -> ifM (inAnnex key) 394 ( linkunlocked mode >> return True 395 , writepointer mode >> return True 396 ) 397 , do 398 addLink ci file key Nothing 399 case mtmp of 400 Just tmp -> moveAnnex key af tmp 401 Nothing -> return True 402 ) 403 where 404 af = AssociatedFile (Just file) 405 mi = case mtmp of 406 Just tmp -> MatchingFile $ FileInfo 407 { contentFile = tmp 408 , matchFile = file 409 , matchKey = Just key 410 } 411 Nothing -> keyMatchInfoWithoutContent key file 412 413 linkunlocked mode = linkFromAnnex key file mode >>= \case 414 LinkAnnexFailed -> writepointer mode 415 _ -> return () 416 417 writepointer mode = liftIO $ writePointerFile file key mode 418 419{- Use with actions that add an already existing annex symlink or pointer 420 - file. The warning avoids a confusing situation where the file got copied 421 - from another git-annex repo, probably by accident. -} 422addingExistingLink :: RawFilePath -> Key -> Annex a -> Annex a 423addingExistingLink f k a = do 424 unlessM (isKnownKey k <||> inAnnex k) $ do 425 islink <- isJust <$> isAnnexLink f 426 warning $ unwords 427 [ fromRawFilePath f 428 , "is a git-annex" 429 , if islink then "symlink." else "pointer file." 430 , "Its content is not available in this repository." 431 , "(Maybe " ++ fromRawFilePath f ++ " was copied from another repository?)" 432 ] 433 a 434