1{- git-annex hashing backends
2 -
3 - Copyright 2011-2021 Joey Hess <id@joeyh.name>
4 -
5 - Licensed under the GNU AGPL version 3 or higher.
6 -}
7
8{-# LANGUAGE OverloadedStrings #-}
9
10module Backend.Hash (
11	backends,
12	testKeyBackend,
13	keyHash,
14) where
15
16import Annex.Common
17import qualified Annex
18import Backend.Utilities
19import Types.Key
20import Types.Backend
21import Types.KeySource
22import Utility.Hash
23import Utility.Metered
24import qualified Utility.RawFilePath as R
25
26import qualified Data.ByteString as S
27import qualified Data.ByteString.Char8 as S8
28import qualified Data.ByteString.Lazy as L
29import Control.DeepSeq
30import Control.Exception (evaluate)
31
32data Hash
33	= MD5Hash
34	| SHA1Hash
35	| SHA2Hash HashSize
36	| SHA3Hash HashSize
37	| SkeinHash HashSize
38	| Blake2bHash HashSize
39	| Blake2bpHash HashSize
40	| Blake2sHash HashSize
41	| Blake2spHash HashSize
42
43cryptographicallySecure :: Hash -> Bool
44cryptographicallySecure (SHA2Hash _) = True
45cryptographicallySecure (SHA3Hash _) = True
46cryptographicallySecure (SkeinHash _) = True
47cryptographicallySecure (Blake2bHash _) = True
48cryptographicallySecure (Blake2bpHash _) = True
49cryptographicallySecure (Blake2sHash _) = True
50cryptographicallySecure (Blake2spHash _) = True
51cryptographicallySecure SHA1Hash = False
52cryptographicallySecure MD5Hash = False
53
54{- Order is slightly significant; want SHA256 first, and more general
55 - sizes earlier. -}
56hashes :: [Hash]
57hashes = concat
58	[ map (SHA2Hash . HashSize) [256, 512, 224, 384]
59	, map (SHA3Hash . HashSize) [256, 512, 224, 384]
60	, map (SkeinHash . HashSize) [256, 512]
61	, map (Blake2bHash . HashSize) [256, 512, 160, 224, 384]
62	, map (Blake2bpHash . HashSize) [512]
63	, map (Blake2sHash . HashSize) [256, 160, 224]
64	, map (Blake2spHash . HashSize) [256, 224]
65	, [SHA1Hash]
66	, [MD5Hash]
67	]
68
69{- The SHA256E backend is the default, so genBackendE comes first. -}
70backends :: [Backend]
71backends = concatMap (\h -> [genBackendE h, genBackend h]) hashes
72
73genBackend :: Hash -> Backend
74genBackend hash = Backend
75	{ backendVariety = hashKeyVariety hash (HasExt False)
76	, genKey = Just (keyValue hash)
77	, verifyKeyContent = Just $ checkKeyChecksum hash
78	, verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash
79	, canUpgradeKey = Just needsUpgrade
80	, fastMigrate = Just trivialMigrate
81	, isStableKey = const True
82	, isCryptographicallySecure = const (cryptographicallySecure hash)
83	}
84
85genBackendE :: Hash -> Backend
86genBackendE hash = (genBackend hash)
87	{ backendVariety = hashKeyVariety hash (HasExt True)
88	, genKey = Just (keyValueE hash)
89	}
90
91hashKeyVariety :: Hash -> HasExt -> KeyVariety
92hashKeyVariety MD5Hash he = MD5Key he
93hashKeyVariety SHA1Hash he = SHA1Key he
94hashKeyVariety (SHA2Hash size) he = SHA2Key size he
95hashKeyVariety (SHA3Hash size) he = SHA3Key size he
96hashKeyVariety (SkeinHash size) he = SKEINKey size he
97hashKeyVariety (Blake2bHash size) he = Blake2bKey size he
98hashKeyVariety (Blake2bpHash size) he = Blake2bpKey size he
99hashKeyVariety (Blake2sHash size) he = Blake2sKey size he
100hashKeyVariety (Blake2spHash size) he = Blake2spKey size he
101
102{- A key is a hash of its contents. -}
103keyValue :: Hash -> KeySource -> MeterUpdate -> Annex Key
104keyValue hash source meterupdate = do
105	let file = contentLocation source
106	filesize <- liftIO $ getFileSize file
107	s <- hashFile hash file meterupdate
108	return $ mkKey $ \k -> k
109		{ keyName = encodeBS s
110		, keyVariety = hashKeyVariety hash (HasExt False)
111		, keySize = Just filesize
112		}
113
114{- Extension preserving keys. -}
115keyValueE :: Hash -> KeySource -> MeterUpdate -> Annex Key
116keyValueE hash source meterupdate =
117	keyValue hash source meterupdate
118		>>= addE source (const $ hashKeyVariety hash (HasExt True))
119
120checkKeyChecksum :: Hash -> Key -> RawFilePath -> Annex Bool
121checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do
122	fast <- Annex.getState Annex.fast
123	exists <- liftIO $ R.doesPathExist file
124	case (exists, fast) of
125		(True, False) -> do
126			showAction descChecksum
127			sameCheckSum key
128				<$> hashFile hash file nullMeterUpdate
129		_ -> return True
130  where
131	hwfault e = do
132		warning $ "hardware fault: " ++ show e
133		return False
134
135sameCheckSum :: Key -> String -> Bool
136sameCheckSum key s
137	| s == expected = True
138	{- A bug caused checksums to be prefixed with \ in some
139	 - cases; still accept these as legal now that the bug
140	 - has been fixed. -}
141	| '\\' : s == expected = True
142	| otherwise = False
143  where
144	expected = decodeBS (keyHash key)
145
146checkKeyChecksumIncremental :: Hash -> Key -> Annex IncrementalVerifier
147checkKeyChecksumIncremental hash key = liftIO $ (snd $ hasher hash) key
148
149keyHash :: Key -> S.ByteString
150keyHash = fst . splitKeyNameExtension
151
152{- Upgrade keys that have the \ prefix on their hash due to a bug, or
153 - that contain non-alphanumeric characters in their extension.
154 -
155 - Also, for a while migrate from eg SHA256E to SHA256 resulted in a SHA256
156 - key that contained an extension inside its keyName. Upgrade those
157 - keys, removing the extension.
158 -}
159needsUpgrade :: Key -> Bool
160needsUpgrade key = or
161	[ "\\" `S8.isPrefixOf` keyHash key
162	, S.any (not . validInExtension) (snd $ splitKeyNameExtension key)
163	, not (hasExt (fromKey keyVariety key)) && keyHash key /= fromKey keyName key
164	]
165
166trivialMigrate :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key)
167trivialMigrate oldkey newbackend afile = trivialMigrate' oldkey newbackend afile
168	<$> (annexMaxExtensionLength <$> Annex.getGitConfig)
169
170trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key
171trivialMigrate' oldkey newbackend afile maxextlen
172	{- Fast migration from hashE to hash backend. -}
173	| migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d
174		{ keyName = keyHash oldkey
175		, keyVariety = newvariety
176		}
177	{- Fast migration from hash to hashE backend. -}
178	| migratable && hasExt newvariety = case afile of
179		AssociatedFile Nothing -> Nothing
180		AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d
181			{ keyName = keyHash oldkey
182				<> selectExtension maxextlen file
183			, keyVariety = newvariety
184			}
185	{- Upgrade to fix bad previous migration that created a
186	 - non-extension preserving key, with an extension
187	 - in its keyName. -}
188	| newvariety == oldvariety && not (hasExt oldvariety) &&
189		keyHash oldkey /= fromKey keyName oldkey =
190			Just $ alterKey oldkey $ \d -> d
191				{ keyName = keyHash oldkey
192				}
193	| otherwise = Nothing
194  where
195	migratable = oldvariety /= newvariety
196		&& sameExceptExt oldvariety newvariety
197	oldvariety = fromKey keyVariety oldkey
198	newvariety = backendVariety newbackend
199
200hashFile :: Hash -> RawFilePath -> MeterUpdate -> Annex String
201hashFile hash file meterupdate =
202	liftIO $ withMeteredFile (fromRawFilePath file) meterupdate $ \b -> do
203		let h = (fst $ hasher hash) b
204		-- Force full evaluation of hash so whole file is read
205		-- before returning.
206		evaluate (rnf h)
207		return h
208
209type Hasher = (L.ByteString -> String, Key -> IO IncrementalVerifier)
210
211hasher :: Hash -> Hasher
212hasher MD5Hash = md5Hasher
213hasher SHA1Hash = sha1Hasher
214hasher (SHA2Hash hashsize) = sha2Hasher hashsize
215hasher (SHA3Hash hashsize) = sha3Hasher hashsize
216hasher (SkeinHash hashsize) = skeinHasher hashsize
217hasher (Blake2bHash hashsize) = blake2bHasher hashsize
218hasher (Blake2bpHash hashsize) = blake2bpHasher hashsize
219hasher (Blake2sHash hashsize) = blake2sHasher hashsize
220hasher (Blake2spHash hashsize) = blake2spHasher hashsize
221
222mkHasher :: HashAlgorithm h => (L.ByteString -> Digest h) -> Context h -> Hasher
223mkHasher h c = (show . h, mkIncrementalVerifier c descChecksum . sameCheckSum)
224
225sha2Hasher :: HashSize -> Hasher
226sha2Hasher (HashSize hashsize)
227	| hashsize == 256 = mkHasher sha2_256 sha2_256_context
228	| hashsize == 224 = mkHasher sha2_224 sha2_224_context
229	| hashsize == 384 = mkHasher sha2_384 sha2_384_context
230	| hashsize == 512 = mkHasher sha2_512 sha2_512_context
231	| otherwise = error $ "unsupported SHA2 size " ++ show hashsize
232
233sha3Hasher :: HashSize -> Hasher
234sha3Hasher (HashSize hashsize)
235	| hashsize == 256 = mkHasher sha3_256 sha3_256_context
236	| hashsize == 224 = mkHasher sha3_224 sha3_224_context
237	| hashsize == 384 = mkHasher sha3_384 sha3_384_context
238	| hashsize == 512 = mkHasher sha3_512 sha3_512_context
239	| otherwise = error $ "unsupported SHA3 size " ++ show hashsize
240
241skeinHasher :: HashSize -> Hasher
242skeinHasher (HashSize hashsize)
243	| hashsize == 256 = mkHasher skein256 skein256_context
244	| hashsize == 512 = mkHasher skein512 skein512_context
245	| otherwise = error $ "unsupported SKEIN size " ++ show hashsize
246
247blake2bHasher :: HashSize -> Hasher
248blake2bHasher (HashSize hashsize)
249	| hashsize == 256 = mkHasher blake2b_256 blake2b_256_context
250	| hashsize == 512 = mkHasher blake2b_512 blake2b_512_context
251	| hashsize == 160 = mkHasher blake2b_160 blake2b_160_context
252	| hashsize == 224 = mkHasher blake2b_224 blake2b_224_context
253	| hashsize == 384 = mkHasher blake2b_384 blake2b_384_context
254	| otherwise = error $ "unsupported BLAKE2B size " ++ show hashsize
255
256blake2bpHasher :: HashSize -> Hasher
257blake2bpHasher (HashSize hashsize)
258	| hashsize == 512 = mkHasher blake2bp_512 blake2bp_512_context
259	| otherwise = error $ "unsupported BLAKE2BP size " ++ show hashsize
260
261blake2sHasher :: HashSize -> Hasher
262blake2sHasher (HashSize hashsize)
263	| hashsize == 256 = mkHasher blake2s_256 blake2s_256_context
264	| hashsize == 160 = mkHasher blake2s_160 blake2s_160_context
265	| hashsize == 224 = mkHasher blake2s_224 blake2s_224_context
266	| otherwise = error $ "unsupported BLAKE2S size " ++ show hashsize
267
268blake2spHasher :: HashSize -> Hasher
269blake2spHasher (HashSize hashsize)
270	| hashsize == 256 = mkHasher blake2sp_256 blake2sp_256_context
271	| hashsize == 224 = mkHasher blake2sp_224 blake2sp_224_context
272	| otherwise = error $ "unsupported BLAKE2SP size " ++ show hashsize
273
274sha1Hasher :: Hasher
275sha1Hasher = mkHasher sha1 sha1_context
276
277md5Hasher :: Hasher
278md5Hasher = mkHasher md5 md5_context
279
280descChecksum :: String
281descChecksum = "checksum"
282
283{- A varient of the SHA256E backend, for testing that needs special keys
284 - that cannot collide with legitimate keys in the repository.
285 -
286 - This is accomplished by appending a special extension to the key,
287 - that is not one that selectExtension would select (due to being too
288 - long).
289 -}
290testKeyBackend :: Backend
291testKeyBackend =
292	let b = genBackendE (SHA2Hash (HashSize 256))
293	    gk = case genKey b of
294		Nothing -> Nothing
295		Just f -> Just (\ks p -> addTestE <$> f ks p)
296	in b { genKey = gk }
297  where
298	addTestE k = alterKey k $ \d -> d
299		{ keyName = keyName d <> longext
300		}
301	longext = ".this-is-a-test-key"
302