1{- Url downloading, with git-annex user agent and configured http
2 - headers, security restrictions, etc.
3 -
4 - Copyright 2013-2020 Joey Hess <id@joeyh.name>
5 -
6 - Licensed under the GNU AGPL version 3 or higher.
7 -}
8
9{-# LANGUAGE CPP #-}
10
11module Annex.Url (
12	withUrlOptions,
13	withUrlOptionsPromptingCreds,
14	getUrlOptions,
15	getUserAgent,
16	ipAddressesUnlimited,
17	checkBoth,
18	download,
19	download',
20	exists,
21	getUrlInfo,
22	U.URLString,
23	U.UrlOptions(..),
24	U.UrlInfo(..),
25	U.sinkResponseFile,
26	U.matchStatusCodeException,
27	U.downloadConduit,
28	U.downloadPartial,
29	U.parseURIRelaxed,
30	U.allowedScheme,
31	U.assumeUrlExists,
32) where
33
34import Annex.Common
35import qualified Annex
36import qualified Utility.Url as U
37import Utility.Hash (IncrementalVerifier)
38import Utility.IPAddress
39#ifdef WITH_HTTP_CLIENT_RESTRICTED
40import Network.HTTP.Client.Restricted
41#else
42import Utility.HttpManagerRestricted
43#endif
44import Utility.Metered
45import Git.Credential
46import qualified BuildInfo
47
48import Network.Socket
49import Network.HTTP.Client
50import Network.HTTP.Client.TLS
51import Text.Read
52
53defaultUserAgent :: U.UserAgent
54defaultUserAgent = "git-annex/" ++ BuildInfo.packageversion
55
56getUserAgent :: Annex U.UserAgent
57getUserAgent = Annex.getState $
58	fromMaybe defaultUserAgent . Annex.useragent
59
60getUrlOptions :: Annex U.UrlOptions
61getUrlOptions = Annex.getState Annex.urloptions >>= \case
62	Just uo -> return uo
63	Nothing -> do
64		uo <- mk
65		Annex.changeState $ \s -> s
66			{ Annex.urloptions = Just uo }
67		return uo
68  where
69	mk = do
70		(urldownloader, manager) <- checkallowedaddr
71		U.mkUrlOptions
72			<$> (Just <$> getUserAgent)
73			<*> headers
74			<*> pure urldownloader
75			<*> pure manager
76			<*> (annexAllowedUrlSchemes <$> Annex.getGitConfig)
77			<*> pure (Just (\u -> "Configuration of annex.security.allowed-url-schemes does not allow accessing " ++ show u))
78			<*> pure U.noBasicAuth
79
80	headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case
81		Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
82		Nothing -> annexHttpHeaders <$> Annex.getGitConfig
83
84	checkallowedaddr = words . annexAllowedIPAddresses <$> Annex.getGitConfig >>= \case
85		["all"] -> do
86			curlopts <- map Param . annexWebOptions <$> Annex.getGitConfig
87			let urldownloader = if null curlopts
88				then U.DownloadWithConduit $
89					U.DownloadWithCurlRestricted mempty
90				else U.DownloadWithCurl curlopts
91			manager <- liftIO $ U.newManager $
92				avoidtimeout $ tlsManagerSettings
93			return (urldownloader, manager)
94		allowedaddrsports -> do
95			addrmatcher <- liftIO $
96				(\l v -> any (\f -> f v) l) . catMaybes
97					<$> mapM (uncurry makeAddressMatcher)
98						(mapMaybe splitAddrPort allowedaddrsports)
99			-- Default to not allowing access to loopback
100			-- and private IP addresses to avoid data
101			-- leakage.
102			let isallowed addr
103				| addrmatcher addr = True
104				| isLoopbackAddress addr = False
105				| isPrivateAddress addr = False
106				| otherwise = True
107			let connectionrestricted = connectionRestricted
108				("Configuration of annex.security.allowed-ip-addresses does not allow accessing address " ++)
109			let r = addressRestriction $ \addr ->
110				if isallowed (addrAddress addr)
111					then Nothing
112					else Just (connectionrestricted addr)
113			(settings, pr) <- liftIO $
114				mkRestrictedManagerSettings r Nothing Nothing
115			case pr of
116				Nothing -> return ()
117				Just ProxyRestricted -> toplevelWarning True
118					"http proxy settings not used due to annex.security.allowed-ip-addresses configuration"
119			manager <- liftIO $ U.newManager $
120				avoidtimeout settings
121			-- Curl is not used, as its interface does not allow
122			-- preventing it from accessing specific IP addresses.
123			let urldownloader = U.DownloadWithConduit $
124				U.DownloadWithCurlRestricted r
125			return (urldownloader, manager)
126
127	-- http-client defailts to timing out a request after 30 seconds
128	-- or so, but some web servers are slower and git-annex has its own
129	-- separate timeout controls, so disable that.
130	avoidtimeout s = s { managerResponseTimeout = responseTimeoutNone }
131
132splitAddrPort :: String -> Maybe (String, Maybe PortNumber)
133splitAddrPort s
134	-- "[addr]:port" (also allow "[addr]")
135	| "[" `isPrefixOf` s = case splitc ']' (drop 1 s) of
136		[a,cp] -> case splitc ':' cp of
137			["",p] -> do
138				pn <- readMaybe p
139				return (a, Just pn)
140			[""] -> Just (a, Nothing)
141			_ -> Nothing
142		_ -> Nothing
143	| otherwise = Just (s, Nothing)
144
145ipAddressesUnlimited :: Annex Bool
146ipAddressesUnlimited =
147	("all" == ) . annexAllowedIPAddresses <$> Annex.getGitConfig
148
149withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
150withUrlOptions a = a =<< getUrlOptions
151
152-- When downloading an url, if authentication is needed, uses
153-- git-credential to prompt for username and password.
154withUrlOptionsPromptingCreds :: (U.UrlOptions -> Annex a) -> Annex a
155withUrlOptionsPromptingCreds a = do
156	g <- Annex.gitRepo
157	uo <- getUrlOptions
158	prompter <- mkPrompter
159	a $ uo
160		{ U.getBasicAuth = \u -> prompter $
161			getBasicAuthFromCredential g u
162		-- Can't download with curl and handle basic auth,
163		-- so make sure it uses conduit.
164		, U.urlDownloader = case U.urlDownloader uo of
165			U.DownloadWithCurl _ -> U.DownloadWithConduit $
166				U.DownloadWithCurlRestricted mempty
167			v -> v
168		}
169
170checkBoth :: U.URLString -> Maybe Integer -> U.UrlOptions -> Annex Bool
171checkBoth url expected_size uo =
172	liftIO (U.checkBoth url expected_size uo) >>= \case
173		Right r -> return r
174		Left err -> warning err >> return False
175
176download :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex Bool
177download meterupdate iv url file uo =
178	liftIO (U.download meterupdate iv url file uo) >>= \case
179		Right () -> return True
180		Left err -> warning err >> return False
181
182download' :: MeterUpdate -> Maybe IncrementalVerifier -> U.URLString -> FilePath -> U.UrlOptions -> Annex (Either String ())
183download' meterupdate iv url file uo =
184	liftIO (U.download meterupdate iv url file uo)
185
186exists :: U.URLString -> U.UrlOptions -> Annex Bool
187exists url uo = liftIO (U.exists url uo) >>= \case
188	Right b -> return b
189	Left err -> warning err >> return False
190
191getUrlInfo :: U.URLString -> U.UrlOptions -> Annex (Either String U.UrlInfo)
192getUrlInfo url uo = liftIO (U.getUrlInfo url uo)
193