1 // scrypt.cpp - written and placed in public domain by Jeffrey Walton.
2 // Based on reference source code by Colin Percival for
3 // Scrypt and Daniel Bernstein for Salsa20 core.
4
5 #include "pch.h"
6
7 #include "scrypt.h"
8 #include "algparam.h"
9 #include "argnames.h"
10 #include "pwdbased.h"
11 #include "stdcpp.h"
12 #include "salsa.h"
13 #include "misc.h"
14 #include "sha.h"
15
16 #include <sstream>
17 #include <limits>
18
19 #ifdef _OPENMP
20 # include <omp.h>
21 #endif
22
23 // https://github.com/weidai11/cryptopp/issues/777
24 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
25 # if defined(__clang__)
26 # pragma GCC diagnostic ignored "-Wtautological-compare"
27 # elif defined(__GNUC__)
28 # pragma GCC diagnostic ignored "-Wtype-limits"
29 # endif
30 #endif
31
32 ANONYMOUS_NAMESPACE_BEGIN
33
34 using CryptoPP::byte;
35 using CryptoPP::word32;
36 using CryptoPP::word64;
37 using CryptoPP::GetWord;
38 using CryptoPP::PutWord;
39 using CryptoPP::Salsa20_Core;
40 using CryptoPP::rotlConstant;
41 using CryptoPP::LITTLE_ENDIAN_ORDER;
42 using CryptoPP::AlignedSecByteBlock;
43
LE32ENC(byte * out,word32 in)44 inline void LE32ENC(byte* out, word32 in)
45 {
46 PutWord(false, LITTLE_ENDIAN_ORDER, out, in);
47 }
48
LE32DEC(const byte * in)49 inline word32 LE32DEC(const byte* in)
50 {
51 return GetWord<word32>(false, LITTLE_ENDIAN_ORDER, in);
52 }
53
LE64DEC(const byte * in)54 inline word64 LE64DEC(const byte* in)
55 {
56 return GetWord<word64>(false, LITTLE_ENDIAN_ORDER, in);
57 }
58
BlockCopy(byte * dest,byte * src,size_t len)59 inline void BlockCopy(byte* dest, byte* src, size_t len)
60 {
61 // OpenMP 4.0 released July 2013.
62 #if _OPENMP >= 201307
63 #pragma omp simd
64 for (size_t i = 0; i < len; ++i)
65 dest[i] = src[i];
66 #else
67 for (size_t i = 0; i < len; ++i)
68 dest[i] = src[i];
69 #endif
70 }
71
BlockXOR(byte * dest,byte * src,size_t len)72 inline void BlockXOR(byte* dest, byte* src, size_t len)
73 {
74 // OpenMP 4.0 released July 2013.
75 #if _OPENMP >= 201307
76 #pragma omp simd
77 for (size_t i = 0; i < len; ++i)
78 dest[i] ^= src[i];
79 #else
80 for (size_t i = 0; i < len; ++i)
81 dest[i] ^= src[i];
82 #endif
83 }
84
PBKDF2_SHA256(byte * buf,size_t dkLen,const byte * passwd,size_t passwdlen,const byte * salt,size_t saltlen,byte count)85 inline void PBKDF2_SHA256(byte* buf, size_t dkLen,
86 const byte* passwd, size_t passwdlen,
87 const byte* salt, size_t saltlen, byte count)
88 {
89 using CryptoPP::SHA256;
90 using CryptoPP::PKCS5_PBKDF2_HMAC;
91
92 PKCS5_PBKDF2_HMAC<SHA256> pbkdf;
93 pbkdf.DeriveKey(buf, dkLen, 0, passwd, passwdlen, salt, saltlen, count, 0.0f);
94 }
95
Salsa20_8(byte B[64])96 inline void Salsa20_8(byte B[64])
97 {
98 word32 B32[16];
99
100 for (size_t i = 0; i < 16; ++i)
101 B32[i] = LE32DEC(&B[i * 4]);
102
103 Salsa20_Core(B32, 8);
104
105 for (size_t i = 0; i < 16; ++i)
106 LE32ENC(&B[4 * i], B32[i]);
107 }
108
BlockMix(byte * B,byte * Y,size_t r)109 inline void BlockMix(byte* B, byte* Y, size_t r)
110 {
111 byte X[64];
112
113 // 1: X <-- B_{2r - 1}
114 BlockCopy(X, &B[(2 * r - 1) * 64], 64);
115
116 // 2: for i = 0 to 2r - 1 do
117 for (size_t i = 0; i < 2 * r; ++i)
118 {
119 // 3: X <-- H(X \xor B_i)
120 BlockXOR(X, &B[i * 64], 64);
121 Salsa20_8(X);
122
123 // 4: Y_i <-- X
124 BlockCopy(&Y[i * 64], X, 64);
125 }
126
127 // 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1})
128 for (size_t i = 0; i < r; ++i)
129 BlockCopy(&B[i * 64], &Y[(i * 2) * 64], 64);
130
131 for (size_t i = 0; i < r; ++i)
132 BlockCopy(&B[(i + r) * 64], &Y[(i * 2 + 1) * 64], 64);
133 }
134
Integerify(byte * B,size_t r)135 inline word64 Integerify(byte* B, size_t r)
136 {
137 byte* X = &B[(2 * r - 1) * 64];
138 return LE64DEC(X);
139 }
140
Smix(byte * B,size_t r,word64 N,byte * V,byte * XY)141 inline void Smix(byte* B, size_t r, word64 N, byte* V, byte* XY)
142 {
143 byte* X = XY;
144 byte* Y = XY+128*r;
145
146 // 1: X <-- B
147 BlockCopy(X, B, 128 * r);
148
149 // 2: for i = 0 to N - 1 do
150 for (word64 i = 0; i < N; ++i)
151 {
152 // 3: V_i <-- X
153 BlockCopy(&V[i * (128 * r)], X, 128 * r);
154
155 // 4: X <-- H(X)
156 BlockMix(X, Y, r);
157 }
158
159 // 6: for i = 0 to N - 1 do
160 for (word64 i = 0; i < N; ++i)
161 {
162 // 7: j <-- Integerify(X) mod N
163 word64 j = Integerify(X, r) & (N - 1);
164
165 // 8: X <-- H(X \xor V_j)
166 BlockXOR(X, &V[j * (128 * r)], 128 * r);
167 BlockMix(X, Y, r);
168 }
169
170 // 10: B' <-- X
171 BlockCopy(B, X, 128 * r);
172 }
173
174 ANONYMOUS_NAMESPACE_END
175
NAMESPACE_BEGIN(CryptoPP)176 NAMESPACE_BEGIN(CryptoPP)
177
178 size_t Scrypt::GetValidDerivedLength(size_t keylength) const
179 {
180 if (keylength > MaxDerivedKeyLength())
181 return MaxDerivedKeyLength();
182 return keylength;
183 }
184
ValidateParameters(size_t derivedLen,word64 cost,word64 blockSize,word64 parallelization) const185 void Scrypt::ValidateParameters(size_t derivedLen, word64 cost, word64 blockSize, word64 parallelization) const
186 {
187 // https://github.com/weidai11/cryptopp/issues/842
188 CRYPTOPP_ASSERT(derivedLen != 0);
189 CRYPTOPP_ASSERT(cost != 0);
190 CRYPTOPP_ASSERT(blockSize != 0);
191 CRYPTOPP_ASSERT(parallelization != 0);
192
193 if (cost == 0)
194 throw InvalidArgument("Scrypt: cost cannot be 0");
195
196 if (blockSize == 0)
197 throw InvalidArgument("Scrypt: block size cannot be 0");
198
199 if (parallelization == 0)
200 throw InvalidArgument("Scrypt: parallelization cannot be 0");
201
202 // Optimizer should remove this on 32-bit platforms
203 if (std::numeric_limits<size_t>::max() > std::numeric_limits<word32>::max())
204 {
205 const word64 maxLen = ((static_cast<word64>(1) << 32) - 1) * 32;
206 if (derivedLen > maxLen) {
207 std::ostringstream oss;
208 oss << "derivedLen " << derivedLen << " is larger than " << maxLen;
209 throw InvalidArgument("Scrypt: " + oss.str());
210 }
211 }
212
213 // https://github.com/weidai11/cryptopp/issues/787
214 CRYPTOPP_ASSERT(parallelization <= static_cast<word64>(std::numeric_limits<int>::max()));
215 if (parallelization > static_cast<word64>(std::numeric_limits<int>::max()))
216 {
217 std::ostringstream oss;
218 oss << " parallelization " << parallelization << " is larger than ";
219 oss << std::numeric_limits<int>::max();
220 throw InvalidArgument("Scrypt: " + oss.str());
221 }
222
223 CRYPTOPP_ASSERT(IsPowerOf2(cost));
224 if (IsPowerOf2(cost) == false)
225 throw InvalidArgument("Scrypt: cost must be a power of 2");
226
227 const word64 prod = static_cast<word64>(blockSize) * parallelization;
228 CRYPTOPP_ASSERT(prod < (1U << 30));
229
230 if (prod >= (1U << 30)) {
231 std::ostringstream oss;
232 oss << "r*p " << prod << " is larger than " << (1U << 30);
233 throw InvalidArgument("Scrypt: " + oss.str());
234 }
235
236 // Scrypt has several tests that effectively verify allocations like
237 // '128 * r * N' and '128 * r * p' do not overflow. They are the tests
238 // that set errno to ENOMEM. We can make the logic a little more clear
239 // using word128. At first blush the word128 may seem like overkill.
240 // However, this alogirthm is dominated by slow moving parts, so a
241 // one-time check is insignificant in the bigger picture.
242 #if defined(CRYPTOPP_WORD128_AVAILABLE)
243 const word128 maxElems = static_cast<word128>(SIZE_MAX);
244 bool bLimit = (maxElems >= static_cast<word128>(cost) * blockSize * 128U);
245 bool xyLimit = (maxElems >= static_cast<word128>(parallelization) * blockSize * 128U);
246 bool vLimit = (maxElems >= static_cast<word128>(blockSize) * 256U + 64U);
247 #else
248 const word64 maxElems = static_cast<word64>(SIZE_MAX);
249 bool bLimit = (blockSize < maxElems / 128U / cost);
250 bool xyLimit = (blockSize < maxElems / 128U / parallelization);
251 bool vLimit = (blockSize < (maxElems - 64U) / 256U);
252 #endif
253
254 CRYPTOPP_ASSERT(bLimit); CRYPTOPP_ASSERT(xyLimit); CRYPTOPP_ASSERT(vLimit);
255 if (!bLimit || !xyLimit || !vLimit)
256 throw std::bad_alloc();
257 }
258
DeriveKey(byte * derived,size_t derivedLen,const byte * secret,size_t secretLen,const NameValuePairs & params) const259 size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen,
260 const byte*secret, size_t secretLen, const NameValuePairs& params) const
261 {
262 CRYPTOPP_ASSERT(secret /*&& secretLen*/);
263 CRYPTOPP_ASSERT(derived && derivedLen);
264 CRYPTOPP_ASSERT(derivedLen <= MaxDerivedKeyLength());
265
266 word64 cost=0, blockSize=0, parallelization=0;
267 if(params.GetValue("Cost", cost) == false)
268 cost = defaultCost;
269
270 if(params.GetValue("BlockSize", blockSize) == false)
271 blockSize = defaultBlockSize;
272
273 if(params.GetValue("Parallelization", parallelization) == false)
274 parallelization = defaultParallelization;
275
276 ConstByteArrayParameter salt;
277 (void)params.GetValue("Salt", salt);
278
279 return DeriveKey(derived, derivedLen, secret, secretLen, salt.begin(), salt.size(), cost, blockSize, parallelization);
280 }
281
DeriveKey(byte * derived,size_t derivedLen,const byte * secret,size_t secretLen,const byte * salt,size_t saltLen,word64 cost,word64 blockSize,word64 parallel) const282 size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen, const byte*secret, size_t secretLen,
283 const byte*salt, size_t saltLen, word64 cost, word64 blockSize, word64 parallel) const
284 {
285 CRYPTOPP_ASSERT(secret /*&& secretLen*/);
286 CRYPTOPP_ASSERT(derived && derivedLen);
287 CRYPTOPP_ASSERT(derivedLen <= MaxDerivedKeyLength());
288
289 ThrowIfInvalidDerivedKeyLength(derivedLen);
290 ValidateParameters(derivedLen, cost, blockSize, parallel);
291
292 AlignedSecByteBlock B(static_cast<size_t>(blockSize * parallel * 128U));
293
294 // 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen)
295 PBKDF2_SHA256(B, B.size(), secret, secretLen, salt, saltLen, 1);
296
297 // Visual Studio and OpenMP 2.0 fixup. We must use int, not size_t.
298 int maxParallel=0;
299 if (!SafeConvert(parallel, maxParallel))
300 maxParallel = std::numeric_limits<int>::max();
301
302 #ifdef _OPENMP
303 int threads = STDMIN(omp_get_max_threads(), maxParallel);
304 #endif
305
306 // http://stackoverflow.com/q/49604260/608639
307 #pragma omp parallel num_threads(threads)
308 {
309 // Each thread gets its own copy
310 AlignedSecByteBlock XY(static_cast<size_t>(blockSize * 256U));
311 AlignedSecByteBlock V(static_cast<size_t>(blockSize * cost * 128U));
312
313 // 2: for i = 0 to p - 1 do
314 #pragma omp for
315 for (int i = 0; i < maxParallel; ++i)
316 {
317 // 3: B_i <-- MF(B_i, N)
318 const ptrdiff_t offset = static_cast<ptrdiff_t>(blockSize*i*128);
319 Smix(B+offset, static_cast<size_t>(blockSize), cost, V, XY);
320 }
321 }
322
323 // 5: DK <-- PBKDF2(P, B, 1, dkLen)
324 PBKDF2_SHA256(derived, derivedLen, secret, secretLen, B, B.size(), 1);
325
326 return 1;
327 }
328
329 NAMESPACE_END
330