1{- Url downloading, with git-annex user agent and configured http 2 - headers, security restrictions, etc. 3 - 4 - Copyright 2013-2020 Joey Hess <id@joeyh.name> 5 - 6 - Licensed under the GNU AGPL version 3 or higher. 7 -} 8 9{-# LANGUAGE CPP #-} 10 11module Annex.Url ( 12 withUrlOptions, 13 withUrlOptionsPromptingCreds, 14 getUrlOptions, 15 getUserAgent, 16 ipAddressesUnlimited, 17 checkBoth, 18 download, 19 download', 20 exists, 21 getUrlInfo, 22 U.URLString, 23 U.UrlOptions(..), 24 U.UrlInfo(..), 25 U.sinkResponseFile, 26 U.matchStatusCodeException, 27 U.downloadConduit, 28 U.downloadPartial, 29 U.parseURIRelaxed, 30 U.allowedScheme, 31 U.assumeUrlExists, 32) where 33 34import Annex.Common 35import qualified Annex 36import qualified Utility.Url as U 37import Utility.Hash (IncrementalVerifier) 38import Utility.IPAddress 39#ifdef WITH_HTTP_CLIENT_RESTRICTED 40import Network.HTTP.Client.Restricted 41#else 42import Utility.HttpManagerRestricted 43#endif 44import Utility.Metered 45import Git.Credential 46import qualified BuildInfo 47 48import Network.Socket 49import Network.HTTP.Client 50import Network.HTTP.Client.TLS 51import Text.Read 52 53defaultUserAgent :: U.UserAgent 54defaultUserAgent = "git-annex/" ++ BuildInfo.packageversion 55 56getUserAgent :: Annex U.UserAgent 57getUserAgent = Annex.getState $ 58 fromMaybe defaultUserAgent . Annex.useragent 59 60getUrlOptions :: Annex U.UrlOptions 61getUrlOptions = Annex.getState Annex.urloptions >>= \case 62 Just uo -> return uo 63 Nothing -> do 64 uo <- mk 65 Annex.changeState $ \s -> s 66 { Annex.urloptions = Just uo } 67 return uo 68 where 69 mk = do 70 (urldownloader, manager) <- checkallowedaddr 71 U.mkUrlOptions 72 <$> (Just <$> getUserAgent) 73 <*> headers 74 <*> pure urldownloader 75 <*> pure manager 76 <*> (annexAllowedUrlSchemes <$> Annex.getGitConfig) 77 <*> pure (Just (\u -> "Configuration of annex.security.allowed-url-schemes does not allow accessing " ++ show u)) 78 <*> pure U.noBasicAuth 79 80 headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case 81 Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd]) 82 Nothing -> annexHttpHeaders <$> Annex.getGitConfig 83 84 checkallowedaddr = words . annexAllowedIPAddresses <$> Annex.getGitConfig >>= \case 85 ["all"] -> do 86 curlopts <- map Param . annexWebOptions <$> Annex.getGitConfig 87 let urldownloader = if null curlopts 88 then U.DownloadWithConduit $ 89 U.DownloadWithCurlRestricted mempty 90 else U.DownloadWithCurl curlopts 91 manager <- liftIO $ U.newManager $ 92 avoidtimeout $ tlsManagerSettings 93 return (urldownloader, manager) 94 allowedaddrsports -> do 95 addrmatcher <- liftIO $ 96 (\l v -> any (\f -> f v) l) . catMaybes 97 <$> mapM (uncurry makeAddressMatcher) 98 (mapMaybe splitAddrPort allowedaddrsports) 99 -- Default to not allowing access to loopback 100 -- and private IP addresses to avoid data 101 -- leakage. 102 let isallowed addr 103 | addrmatcher addr = True 104 | isLoopbackAddress addr = False 105 | isPrivateAddress addr = False 106 | otherwise = True 107 let connectionrestricted = connectionRestricted 108 ("Configuration of annex.security.allowed-ip-addresses does not allow accessing address " ++) 109 let r = addressRestriction $ \addr -> 110 if isallowed (addrAddress addr) 111 then Nothing 112 else Just (connectionrestricted addr) 113 (settings, pr) <- liftIO $ 114 mkRestrictedManagerSettings r Nothing Nothing 115 case pr of 116 Nothing -> return () 117 Just ProxyRestricted -> toplevelWarning True 118 "http proxy settings not used due to annex.security.allowed-ip-addresses configuration" 119 manager <- liftIO $ U.newManager $ 120 avoidtimeout settings 121 -- Curl is not used, as its interface does not allow 122 -- preventing it from accessing specific IP addresses. 123 let urldownloader = U.DownloadWithConduit $ 124 U.DownloadWithCurlRestricted r 125 return (urldownloader, manager) 126 127 -- http-client defailts to timing out a request after 30 seconds 128 -- or so, but some web servers are slower and git-annex has its own 129 -- separate timeout controls, so disable that. 130 avoidtimeout s = s { managerResponseTimeout = responseTimeoutNone } 131 132splitAddrPort :: String -> Maybe (String, Maybe PortNumber) 133splitAddrPort s 134 -- "[addr]:port" (also allow "[addr]") 135 | "[" `isPrefixOf` s = case splitc ']' (drop 1 s) of 136 [a,cp] -> case splitc ':' cp of 137 ["",p] -> do 138 pn <- readMaybe p 139 return (a, Just pn) 140 [""] -> Just (a, Nothing) 141 _ -> Nothing 142 _ -> Nothing 143 | otherwise = Just (s, Nothing) 144 145ipAddressesUnlimited :: Annex Bool 146ipAddressesUnlimited = 147 ("all" == ) . annexAllowedIPAddresses <$> Annex.getGitConfig 148 149withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a 150withUrlOptions a = a =<< getUrlOptions 151 152-- When downloading an url, if authentication is needed, uses 153-- git-credential to prompt for username and password. 154withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a 155withUrlOptionsPromptingCreds a = do 156 g <- Annex.gitRepo 157 uo <- getUrlOptions 158 prompter <- mkPrompter 159 a $ uo 160 { U.getBasicAuth = \u -> prompter $ 161 getBasicAuthFromCredential g u 162 -- Can't download with curl and handle basic auth, 163 -- so make sure it uses conduit. 164 , U.urlDownloader = case U.urlDownloader uo of 165 U.DownloadWithCurl _ -> U.DownloadWithConduit $ 166 U.DownloadWithCurlRestricted mempty 167 v -> v 168 } 169 170checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool 171checkBoth url expected_size uo = 172 liftIO (U.checkBoth url expected_size uo) >>= \case 173 Right r -> return r 174 Left err -> warning err >> return False 175 176download :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool 177download meterupdate iv url file uo = 178 liftIO (U.download meterupdate iv url file uo) >>= \case 179 Right () -> return True 180 Left err -> warning err >> return False 181 182download' :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ()) 183download' meterupdate iv url file uo = 184 liftIO (U.download meterupdate iv url file uo) 185 186exists :: U.URLString -> U.UrlOptions -> Annex Bool 187exists url uo = liftIO (U.exists url uo) >>= \case 188 Right b -> return b 189 Left err -> warning err >> return False 190 191getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo) 192getUrlInfo url uo = liftIO (U.getUrlInfo url uo) 193