1{- git-annex assistant ssh utilities
2 -
3 - Copyright 2012-2013 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8module Assistant.Ssh where
9
10import Annex.Common
11import Utility.Tmp
12import Utility.Tmp.Dir
13import Utility.Shell
14import Utility.Rsync
15import Utility.FileMode
16import Utility.SshConfig
17import Git.Remote
18import Utility.SshHost
19import Utility.Process.Transcript
20
21import Data.Text (Text)
22import qualified Data.Text as T
23import Data.Char
24import Network.URI
25
26data SshData = SshData
27	{ sshHostName :: Text
28	, sshUserName :: Maybe Text
29	, sshDirectory :: Text
30	, sshRepoName :: String
31	, sshPort :: Int
32	, needsPubKey :: Bool
33	, sshCapabilities :: [SshServerCapability]
34	, sshRepoUrl :: Maybe String
35	}
36	deriving (Read, Show, Eq)
37
38data SshServerCapability
39	= GitAnnexShellCapable -- server has git-annex-shell installed
40	| GitCapable -- server has git installed
41	| RsyncCapable -- server supports raw rsync access (not only via git-annex-shell)
42	| PushCapable -- repo on server is set up already, and ready to accept pushes
43	deriving (Read, Show, Eq)
44
45hasCapability :: SshData -> SshServerCapability -> Bool
46hasCapability d c = c `elem` sshCapabilities d
47
48addCapability :: SshData -> SshServerCapability -> SshData
49addCapability d c = d { sshCapabilities = c : sshCapabilities d }
50
51onlyCapability :: SshData -> SshServerCapability -> Bool
52onlyCapability d c = all (== c) (sshCapabilities d)
53
54type SshPubKey = String
55type SshPrivKey = String
56
57data SshKeyPair = SshKeyPair
58	{ sshPubKey :: SshPubKey
59	, sshPrivKey :: SshPrivKey
60	}
61
62instance Show SshKeyPair where
63	show = sshPubKey
64
65{- ssh -ofoo=bar command-line option -}
66sshOpt :: String -> String -> String
67sshOpt k v = concat ["-o", k, "=", v]
68
69{- user@host or host -}
70genSshHost :: Text -> Maybe Text -> SshHost
71genSshHost host user = either error id $ mkSshHost $
72	maybe "" (\v -> T.unpack v ++ "@") user ++ T.unpack host
73
74{- Generates a ssh or rsync url from a SshData. -}
75genSshUrl :: SshData -> String
76genSshUrl sshdata = case sshRepoUrl sshdata of
77	Just repourl -> repourl
78	Nothing -> addtrailingslash $ T.unpack $ T.concat $
79		if (onlyCapability sshdata RsyncCapable)
80			then [u, h, T.pack ":", sshDirectory sshdata]
81			else [T.pack "ssh://", u, h, d]
82  where
83	u = maybe (T.pack "") (\v -> T.concat [v, T.pack "@"]) $ sshUserName sshdata
84	h = sshHostName sshdata
85	d
86		| T.pack "/" `T.isPrefixOf` sshDirectory sshdata = sshDirectory sshdata
87		| T.pack "~/" `T.isPrefixOf` sshDirectory sshdata = T.concat [T.pack "/", sshDirectory sshdata]
88		| otherwise = T.concat [T.pack "/~/", sshDirectory sshdata]
89	addtrailingslash s
90		| "/" `isSuffixOf` s = s
91		| otherwise = s ++ "/"
92
93{- Reverses genSshUrl -}
94parseSshUrl :: String -> Maybe SshData
95parseSshUrl u
96	| "ssh://" `isPrefixOf` u = fromssh (drop (length "ssh://") u)
97	| otherwise = fromrsync u
98  where
99	mkdata (userhost, dir) = Just $ SshData
100		{ sshHostName = T.pack host
101		, sshUserName = if null user then Nothing else Just $ T.pack user
102		, sshDirectory = T.pack dir
103		, sshRepoName = genSshRepoName host dir
104		-- dummy values, cannot determine from url
105		, sshPort = 22
106		, needsPubKey = True
107		, sshCapabilities = []
108		, sshRepoUrl = Nothing
109		}
110	  where
111		(user, host) = if '@' `elem` userhost
112			then separate (== '@') userhost
113			else ("", userhost)
114	fromrsync s
115		| not (rsyncUrlIsShell u) = Nothing
116		| otherwise = mkdata $ separate (== ':') s
117	fromssh = mkdata . break (== '/')
118
119{- Generates a git remote name, like host_dir or host -}
120genSshRepoName :: String -> FilePath -> String
121genSshRepoName host dir
122	| null dir = makeLegalName host
123	| otherwise = makeLegalName $ host ++ "_" ++ dir
124
125{- The output of ssh, including both stdout and stderr. -}
126sshTranscript :: [String] -> SshHost -> String -> (Maybe String) -> IO (String, Bool)
127sshTranscript opts sshhost cmd input = processTranscript "ssh"
128	(opts ++ [fromSshHost sshhost, cmd]) input
129
130{- Ensure that the ssh public key doesn't include any ssh options, like
131 - command=foo, or other weirdness.
132 -
133 - The returned version of the key has its comment removed.
134 -}
135validateSshPubKey :: SshPubKey -> Either String SshPubKey
136validateSshPubKey pubkey
137	| length (lines pubkey) == 1 = check $ words pubkey
138	| otherwise = Left "too many lines in ssh public key"
139  where
140	check (prefix:key:_) = checkprefix prefix (unwords [prefix, key])
141	check _ = err "wrong number of words in ssh public key"
142
143	err msg = Left $ unwords [msg, pubkey]
144
145	checkprefix prefix validpubkey
146		| ssh == "ssh" && all isAlphaNum keytype = Right validpubkey
147		| otherwise = err "bad ssh public key prefix"
148	  where
149		(ssh, keytype) = separate (== '-') prefix
150
151addAuthorizedKeys :: Bool -> FilePath -> SshPubKey -> IO Bool
152addAuthorizedKeys gitannexshellonly dir pubkey = boolSystem "sh"
153	[ Param "-c" , Param $ addAuthorizedKeysCommand gitannexshellonly dir pubkey ]
154
155{- Should only be used within the same process that added the line;
156 - the layout of the line is not kepy stable across versions. -}
157removeAuthorizedKeys :: Bool -> FilePath -> SshPubKey -> IO ()
158removeAuthorizedKeys gitannexshellonly dir pubkey = do
159	let keyline = authorizedKeysLine gitannexshellonly dir pubkey
160	sshdir <- sshDir
161	let keyfile = sshdir </> "authorized_keys"
162	tryWhenExists (lines <$> readFileStrict keyfile) >>= \case
163		Just ls -> viaTmp writeSshConfig keyfile $
164			unlines $ filter (/= keyline) ls
165		Nothing -> noop
166
167{- Implemented as a shell command, so it can be run on remote servers over
168 - ssh.
169 -
170 - The ~/.ssh/git-annex-shell wrapper script is created if not already
171 - present.
172 -}
173addAuthorizedKeysCommand :: Bool -> FilePath -> SshPubKey -> String
174addAuthorizedKeysCommand gitannexshellonly dir pubkey = intercalate "&&"
175	[ "mkdir -p ~/.ssh"
176	, intercalate "; "
177		[ "if [ ! -e " ++ wrapper ++ " ]"
178		, "then (" ++ intercalate ";" (map echoval script) ++ ") > " ++ wrapper
179		, "fi"
180		]
181	, "chmod 700 " ++ wrapper
182	, "touch ~/.ssh/authorized_keys"
183	, "chmod 600 ~/.ssh/authorized_keys"
184	, unwords
185		[ "echo"
186		, shellEscape $ authorizedKeysLine gitannexshellonly dir pubkey
187		, ">>~/.ssh/authorized_keys"
188		]
189	]
190  where
191	echoval v = "echo " ++ shellEscape v
192	wrapper = "~/.ssh/git-annex-shell"
193	script =
194		[ shebang
195		, "set -e"
196		, "if [ \"x$SSH_ORIGINAL_COMMAND\" != \"x\" ]; then"
197		,   runshell "$SSH_ORIGINAL_COMMAND"
198		, "else"
199		,   runshell "$@"
200		, "fi"
201		]
202	runshell var = "exec git-annex-shell -c \"" ++ var ++ "\""
203
204authorizedKeysLine :: Bool -> FilePath -> SshPubKey -> String
205authorizedKeysLine gitannexshellonly dir pubkey
206	| gitannexshellonly = limitcommand ++ pubkey
207	{- TODO: Locking down rsync is difficult, requiring a rather
208	 - long perl script. -}
209	| otherwise = pubkey
210  where
211	limitcommand = "command=\"env GIT_ANNEX_SHELL_DIRECTORY="++shellEscape dir++" ~/.ssh/git-annex-shell\",no-agent-forwarding,no-port-forwarding,no-X11-forwarding,no-pty "
212
213{- Generates a ssh key pair. -}
214genSshKeyPair :: IO SshKeyPair
215genSshKeyPair = withTmpDir "git-annex-keygen" $ \dir -> do
216	ok <- boolSystem "ssh-keygen"
217		[ Param "-P", Param "" -- no password
218		, Param "-f", File $ dir </> "key"
219		]
220	unless ok $
221		error "ssh-keygen failed"
222	SshKeyPair
223		<$> readFile (dir </> "key.pub")
224		<*> readFile (dir </> "key")
225
226{- Installs a ssh key pair, and sets up ssh config with a mangled hostname
227 - that will enable use of the key. This way we avoid changing the user's
228 - regular ssh experience at all. Returns a modified SshData containing the
229 - mangled hostname.
230 -
231 - Note that the key files are put in ~/.ssh/git-annex/, rather than directly
232 - in ssh because of an **INSANE** behavior of gnome-keyring: It loads
233 - ~/.ssh/ANYTHING.pub, and uses them indiscriminately. But using this key
234 - for a normal login to the server will force git-annex-shell to run,
235 - and locks the user out. Luckily, it does not recurse into subdirectories.
236 -
237 - Similarly, IdentitiesOnly is set in the ssh config to prevent the
238 - ssh-agent from forcing use of a different key.
239 -
240 - Force strict host key checking to avoid repeated prompts
241 - when git-annex and git try to access the remote, if its
242 - host key has changed.
243 -}
244installSshKeyPair :: SshKeyPair -> SshData -> IO SshData
245installSshKeyPair sshkeypair sshdata = do
246	sshdir <- sshDir
247	createDirectoryIfMissing True $ fromRawFilePath $
248		parentDir $ toRawFilePath $ sshdir </> sshPrivKeyFile sshdata
249
250	unlessM (doesFileExist $ sshdir </> sshPrivKeyFile sshdata) $
251		writeFileProtected (toRawFilePath (sshdir </> sshPrivKeyFile sshdata)) (sshPrivKey sshkeypair)
252	unlessM (doesFileExist $ sshdir </> sshPubKeyFile sshdata) $
253		writeFile (sshdir </> sshPubKeyFile sshdata) (sshPubKey sshkeypair)
254
255	setSshConfig sshdata
256		[ ("IdentityFile", "~/.ssh/" ++ sshPrivKeyFile sshdata)
257		, ("IdentitiesOnly", "yes")
258		, ("StrictHostKeyChecking", "yes")
259		]
260
261sshPrivKeyFile :: SshData -> FilePath
262sshPrivKeyFile sshdata = "git-annex" </> "key." ++ mangleSshHostName sshdata
263
264sshPubKeyFile :: SshData -> FilePath
265sshPubKeyFile sshdata = sshPrivKeyFile sshdata ++ ".pub"
266
267{- Generates an installs a new ssh key pair if one is not already
268 - installed. Returns the modified SshData that will use the key pair,
269 - and the key pair. -}
270setupSshKeyPair :: SshData -> IO (SshData, SshKeyPair)
271setupSshKeyPair sshdata = do
272	sshdir <- sshDir
273	mprivkey <- catchMaybeIO $ readFile (sshdir </> sshPrivKeyFile sshdata)
274	mpubkey <- catchMaybeIO $ readFile (sshdir </> sshPubKeyFile sshdata)
275	keypair <- case (mprivkey, mpubkey) of
276		(Just privkey, Just pubkey) -> return $ SshKeyPair
277			{ sshPubKey = pubkey
278			, sshPrivKey = privkey
279			}
280		_ -> genSshKeyPair
281	sshdata' <- installSshKeyPair keypair sshdata
282	return (sshdata', keypair)
283
284{- Fixes git-annex ssh key pairs configured in .ssh/config
285 - by old versions to set IdentitiesOnly.
286 -
287 - Strategy: Search for IdentityFile lines with key.git-annex
288 - in their names. These are for git-annex ssh key pairs.
289 - Add the IdentitiesOnly line immediately after them, if not already
290 - present.
291 -}
292fixSshKeyPairIdentitiesOnly :: IO ()
293fixSshKeyPairIdentitiesOnly = changeUserSshConfig $ unlines . go [] . lines
294  where
295	go c [] = reverse c
296	go c (l:[])
297		| all (`isInfixOf` l) indicators = go (fixedline l:l:c) []
298		| otherwise = go (l:c) []
299	go c (l:next:rest)
300		| all (`isInfixOf` l) indicators && not ("IdentitiesOnly" `isInfixOf` next) =
301			go (fixedline l:l:c) (next:rest)
302		| otherwise = go (l:c) (next:rest)
303	indicators = ["IdentityFile", "key.git-annex"]
304	fixedline tmpl = takeWhile isSpace tmpl ++ "IdentitiesOnly yes"
305
306{- Add StrictHostKeyChecking to any ssh config stanzas that were written
307 - by git-annex. -}
308fixUpSshRemotes :: IO ()
309fixUpSshRemotes = modifyUserSshConfig (map go)
310  where
311	go c@(HostConfig h _)
312		| "git-annex-" `isPrefixOf` h = fixupconfig c
313		| otherwise = c
314	go other = other
315
316	fixupconfig c = case findHostConfigKey c "StrictHostKeyChecking" of
317		Nothing -> addToHostConfig c "StrictHostKeyChecking" "yes"
318		Just _ -> c
319
320{- Setups up a ssh config with a mangled hostname.
321 - Returns a modified SshData containing the mangled hostname. -}
322setSshConfig :: SshData -> [(String, String)] -> IO SshData
323setSshConfig sshdata config = do
324	sshdir <- sshDir
325	createDirectoryIfMissing True sshdir
326	let configfile = sshdir </> "config"
327	unlessM (catchBoolIO $ isInfixOf mangledhost <$> readFile configfile) $ do
328		appendFile configfile $ unlines $
329			[ ""
330			, "# Added automatically by git-annex"
331			, "Host " ++ mangledhost
332			] ++ map (\(k, v) -> "\t" ++ k ++ " " ++ v)
333				(settings ++ config)
334		setSshConfigMode (toRawFilePath configfile)
335
336	return $ sshdata
337		{ sshHostName = T.pack mangledhost
338		, sshRepoUrl = replace orighost mangledhost
339			<$> sshRepoUrl sshdata
340		}
341  where
342	orighost = T.unpack $ sshHostName sshdata
343	mangledhost = mangleSshHostName sshdata
344	settings =
345		[ ("Hostname", orighost)
346		, ("Port", show $ sshPort sshdata)
347		]
348
349{- This hostname is specific to a given repository on the ssh host,
350 - so it is based on the real hostname, the username, and the directory.
351 -
352 - The mangled hostname has the form:
353 - "git-annex-realhostname-username_port_dir"
354 - Note that "-" is only used in the realhostname and as a separator;
355 - this is necessary to allow unMangleSshHostName to work.
356 -
357 - Unusual characters are url encoded, but using "." rather than "%"
358 - (the latter has special meaning to ssh).
359 -
360 - In the username and directory, unusual characters are any
361 - non-alphanumerics, other than "_"
362 -
363 - The real hostname is not normally encoded at all. This is done for
364 - backwards compatability and to avoid unnecessary ugliness in the
365 - filename. However, when it contains special characters
366 - (notably ":" which cannot be used on some filesystems), it is url
367 - encoded. To indicate it was encoded, the mangled hostname
368 - has the form
369 - "git-annex-.encodedhostname-username_port_dir"
370 -}
371mangleSshHostName :: SshData -> String
372mangleSshHostName sshdata = intercalate "-"
373	[ "git-annex"
374	, escapehostname (T.unpack (sshHostName sshdata))
375	, escape extra
376	]
377  where
378	extra = intercalate "_" $ map T.unpack $ catMaybes
379		[ sshUserName sshdata
380		, Just $ T.pack $ show $ sshPort sshdata
381		, Just $ sshDirectory sshdata
382		]
383	safe c
384		| isAlphaNum c = True
385		| c == '_' = True
386		| otherwise = False
387	escape s = replace "%" "." $ escapeURIString safe s
388	escapehostname s
389		| all (\c -> c == '.' || safe c) s = s
390		| otherwise = '.' : escape s
391
392{- Extracts the real hostname from a mangled ssh hostname. -}
393unMangleSshHostName :: String -> String
394unMangleSshHostName h = case splitc '-' h of
395	("git":"annex":rest) -> unescape (intercalate "-" (beginning rest))
396	_ -> h
397  where
398	unescape ('.':s) = unEscapeString (replace "." "%" s)
399	unescape s = s
400
401{- Does ssh have known_hosts data for a hostname? -}
402knownHost :: Text -> IO Bool
403knownHost hostname = do
404	sshdir <- sshDir
405	ifM (doesFileExist $ sshdir </> "known_hosts")
406		( not . null <$> checkhost
407		, return False
408		)
409  where
410	{- ssh-keygen -F can crash on some old known_hosts file -}
411	checkhost = catchDefaultIO "" $
412		readProcess "ssh-keygen" ["-F", T.unpack hostname]
413