1 // scrypt.cpp - written and placed in public domain by Jeffrey Walton.
2 //              Based on reference source code by Colin Percival for
3 //              Scrypt and Daniel Bernstein for Salsa20 core.
4 
5 #include "pch.h"
6 
7 #include "scrypt.h"
8 #include "algparam.h"
9 #include "argnames.h"
10 #include "pwdbased.h"
11 #include "stdcpp.h"
12 #include "salsa.h"
13 #include "misc.h"
14 #include "sha.h"
15 
16 #include <sstream>
17 #include <limits>
18 
19 #ifdef _OPENMP
20 # include <omp.h>
21 #endif
22 
23 // https://github.com/weidai11/cryptopp/issues/777
24 #if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
25 # if defined(__clang__)
26 #  pragma GCC diagnostic ignored "-Wtautological-compare"
27 # elif defined(__GNUC__)
28 #  pragma GCC diagnostic ignored "-Wtype-limits"
29 # endif
30 #endif
31 
32 ANONYMOUS_NAMESPACE_BEGIN
33 
34 using CryptoPP::byte;
35 using CryptoPP::word32;
36 using CryptoPP::word64;
37 using CryptoPP::GetWord;
38 using CryptoPP::PutWord;
39 using CryptoPP::Salsa20_Core;
40 using CryptoPP::rotlConstant;
41 using CryptoPP::LITTLE_ENDIAN_ORDER;
42 using CryptoPP::AlignedSecByteBlock;
43 
LE32ENC(byte * out,word32 in)44 inline void LE32ENC(byte* out, word32 in)
45 {
46     PutWord(false, LITTLE_ENDIAN_ORDER, out, in);
47 }
48 
LE32DEC(const byte * in)49 inline word32 LE32DEC(const byte* in)
50 {
51     return GetWord<word32>(false, LITTLE_ENDIAN_ORDER, in);
52 }
53 
LE64DEC(const byte * in)54 inline word64 LE64DEC(const byte* in)
55 {
56     return GetWord<word64>(false, LITTLE_ENDIAN_ORDER, in);
57 }
58 
BlockCopy(byte * dest,byte * src,size_t len)59 inline void BlockCopy(byte* dest, byte* src, size_t len)
60 {
61 // OpenMP 4.0 released July 2013.
62 #if _OPENMP >= 201307
63     #pragma omp simd
64     for (size_t i = 0; i < len; ++i)
65         dest[i] = src[i];
66 #else
67     for (size_t i = 0; i < len; ++i)
68         dest[i] = src[i];
69 #endif
70 }
71 
BlockXOR(byte * dest,byte * src,size_t len)72 inline void BlockXOR(byte* dest, byte* src, size_t len)
73 {
74 // OpenMP 4.0 released July 2013.
75 #if _OPENMP >= 201307
76     #pragma omp simd
77     for (size_t i = 0; i < len; ++i)
78         dest[i] ^= src[i];
79 #else
80     for (size_t i = 0; i < len; ++i)
81         dest[i] ^= src[i];
82 #endif
83 }
84 
PBKDF2_SHA256(byte * buf,size_t dkLen,const byte * passwd,size_t passwdlen,const byte * salt,size_t saltlen,byte count)85 inline void PBKDF2_SHA256(byte* buf, size_t dkLen,
86     const byte* passwd, size_t passwdlen,
87     const byte* salt, size_t saltlen, byte count)
88 {
89     using CryptoPP::SHA256;
90     using CryptoPP::PKCS5_PBKDF2_HMAC;
91 
92     PKCS5_PBKDF2_HMAC<SHA256> pbkdf;
93     pbkdf.DeriveKey(buf, dkLen, 0, passwd, passwdlen, salt, saltlen, count, 0.0f);
94 }
95 
Salsa20_8(byte B[64])96 inline void Salsa20_8(byte B[64])
97 {
98     word32 B32[16];
99 
100     for (size_t i = 0; i < 16; ++i)
101         B32[i] = LE32DEC(&B[i * 4]);
102 
103     Salsa20_Core(B32, 8);
104 
105     for (size_t i = 0; i < 16; ++i)
106         LE32ENC(&B[4 * i], B32[i]);
107 }
108 
BlockMix(byte * B,byte * Y,size_t r)109 inline void BlockMix(byte* B, byte* Y, size_t r)
110 {
111     byte X[64];
112 
113     // 1: X <-- B_{2r - 1}
114     BlockCopy(X, &B[(2 * r - 1) * 64], 64);
115 
116     // 2: for i = 0 to 2r - 1 do
117     for (size_t i = 0; i < 2 * r; ++i)
118     {
119         // 3: X <-- H(X \xor B_i)
120         BlockXOR(X, &B[i * 64], 64);
121         Salsa20_8(X);
122 
123         // 4: Y_i <-- X
124         BlockCopy(&Y[i * 64], X, 64);
125     }
126 
127     // 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1})
128     for (size_t i = 0; i < r; ++i)
129         BlockCopy(&B[i * 64], &Y[(i * 2) * 64], 64);
130 
131     for (size_t i = 0; i < r; ++i)
132         BlockCopy(&B[(i + r) * 64], &Y[(i * 2 + 1) * 64], 64);
133 }
134 
Integerify(byte * B,size_t r)135 inline word64 Integerify(byte* B, size_t r)
136 {
137     byte* X = &B[(2 * r - 1) * 64];
138     return LE64DEC(X);
139 }
140 
Smix(byte * B,size_t r,word64 N,byte * V,byte * XY)141 inline void Smix(byte* B, size_t r, word64 N, byte* V, byte* XY)
142 {
143     byte* X = XY;
144     byte* Y = XY+128*r;
145 
146     // 1: X <-- B
147     BlockCopy(X, B, 128 * r);
148 
149     // 2: for i = 0 to N - 1 do
150     for (word64 i = 0; i < N; ++i)
151     {
152         // 3: V_i <-- X
153         BlockCopy(&V[i * (128 * r)], X, 128 * r);
154 
155         // 4: X <-- H(X)
156         BlockMix(X, Y, r);
157     }
158 
159     // 6: for i = 0 to N - 1 do
160     for (word64 i = 0; i < N; ++i)
161     {
162         // 7: j <-- Integerify(X) mod N
163         word64 j = Integerify(X, r) & (N - 1);
164 
165         // 8: X <-- H(X \xor V_j)
166         BlockXOR(X, &V[j * (128 * r)], 128 * r);
167         BlockMix(X, Y, r);
168     }
169 
170     // 10: B' <-- X
171     BlockCopy(B, X, 128 * r);
172 }
173 
174 ANONYMOUS_NAMESPACE_END
175 
NAMESPACE_BEGIN(CryptoPP)176 NAMESPACE_BEGIN(CryptoPP)
177 
178 size_t Scrypt::GetValidDerivedLength(size_t keylength) const
179 {
180     if (keylength > MaxDerivedKeyLength())
181         return MaxDerivedKeyLength();
182     return keylength;
183 }
184 
ValidateParameters(size_t derivedLen,word64 cost,word64 blockSize,word64 parallelization) const185 void Scrypt::ValidateParameters(size_t derivedLen, word64 cost, word64 blockSize, word64 parallelization) const
186 {
187     // https://github.com/weidai11/cryptopp/issues/842
188     CRYPTOPP_ASSERT(derivedLen != 0);
189     CRYPTOPP_ASSERT(cost != 0);
190     CRYPTOPP_ASSERT(blockSize != 0);
191     CRYPTOPP_ASSERT(parallelization != 0);
192 
193     if (cost == 0)
194         throw InvalidArgument("Scrypt: cost cannot be 0");
195 
196     if (blockSize == 0)
197         throw InvalidArgument("Scrypt: block size cannot be 0");
198 
199     if (parallelization == 0)
200         throw InvalidArgument("Scrypt: parallelization cannot be 0");
201 
202     // Optimizer should remove this on 32-bit platforms
203     if (std::numeric_limits<size_t>::max() > std::numeric_limits<word32>::max())
204     {
205         const word64 maxLen = ((static_cast<word64>(1) << 32) - 1) * 32;
206         if (derivedLen > maxLen) {
207             std::ostringstream oss;
208             oss << "derivedLen " << derivedLen << " is larger than " << maxLen;
209             throw InvalidArgument("Scrypt: " + oss.str());
210         }
211     }
212 
213     // https://github.com/weidai11/cryptopp/issues/787
214     CRYPTOPP_ASSERT(parallelization <= static_cast<word64>(std::numeric_limits<int>::max()));
215     if (parallelization > static_cast<word64>(std::numeric_limits<int>::max()))
216     {
217         std::ostringstream oss;
218         oss << " parallelization " << parallelization << " is larger than ";
219         oss << std::numeric_limits<int>::max();
220         throw InvalidArgument("Scrypt: " + oss.str());
221     }
222 
223     CRYPTOPP_ASSERT(IsPowerOf2(cost));
224     if (IsPowerOf2(cost) == false)
225         throw InvalidArgument("Scrypt: cost must be a power of 2");
226 
227     const word64 prod = static_cast<word64>(blockSize) * parallelization;
228     CRYPTOPP_ASSERT(prod < (1U << 30));
229 
230     if (prod >= (1U << 30)) {
231         std::ostringstream oss;
232         oss << "r*p " << prod << " is larger than " << (1U << 30);
233         throw InvalidArgument("Scrypt: " + oss.str());
234     }
235 
236     // Scrypt has several tests that effectively verify allocations like
237     // '128 * r * N' and '128 * r * p' do not overflow. They are the tests
238     // that set errno to ENOMEM. We can make the logic a little more clear
239     // using word128. At first blush the word128 may seem like  overkill.
240     // However, this alogirthm is dominated by slow moving parts, so a
241     // one-time check is insignificant in the bigger picture.
242 #if defined(CRYPTOPP_WORD128_AVAILABLE)
243     const word128 maxElems = static_cast<word128>(SIZE_MAX);
244     bool  bLimit = (maxElems >= static_cast<word128>(cost) * blockSize * 128U);
245     bool xyLimit = (maxElems >= static_cast<word128>(parallelization) * blockSize * 128U);
246     bool  vLimit = (maxElems >= static_cast<word128>(blockSize) * 256U + 64U);
247 #else
248     const word64 maxElems = static_cast<word64>(SIZE_MAX);
249     bool  bLimit = (blockSize < maxElems / 128U / cost);
250     bool xyLimit = (blockSize < maxElems / 128U / parallelization);
251     bool  vLimit = (blockSize < (maxElems - 64U) / 256U);
252 #endif
253 
254     CRYPTOPP_ASSERT(bLimit); CRYPTOPP_ASSERT(xyLimit); CRYPTOPP_ASSERT(vLimit);
255     if (!bLimit || !xyLimit || !vLimit)
256         throw std::bad_alloc();
257 }
258 
DeriveKey(byte * derived,size_t derivedLen,const byte * secret,size_t secretLen,const NameValuePairs & params) const259 size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen,
260     const byte*secret, size_t secretLen, const NameValuePairs& params) const
261 {
262     CRYPTOPP_ASSERT(secret /*&& secretLen*/);
263     CRYPTOPP_ASSERT(derived && derivedLen);
264     CRYPTOPP_ASSERT(derivedLen <= MaxDerivedKeyLength());
265 
266     word64 cost=0, blockSize=0, parallelization=0;
267     if(params.GetValue("Cost", cost) == false)
268         cost = defaultCost;
269 
270     if(params.GetValue("BlockSize", blockSize) == false)
271         blockSize = defaultBlockSize;
272 
273     if(params.GetValue("Parallelization", parallelization) == false)
274         parallelization = defaultParallelization;
275 
276     ConstByteArrayParameter salt;
277     (void)params.GetValue("Salt", salt);
278 
279     return DeriveKey(derived, derivedLen, secret, secretLen, salt.begin(), salt.size(), cost, blockSize, parallelization);
280 }
281 
DeriveKey(byte * derived,size_t derivedLen,const byte * secret,size_t secretLen,const byte * salt,size_t saltLen,word64 cost,word64 blockSize,word64 parallel) const282 size_t Scrypt::DeriveKey(byte*derived, size_t derivedLen, const byte*secret, size_t secretLen,
283     const byte*salt, size_t saltLen, word64 cost, word64 blockSize, word64 parallel) const
284 {
285     CRYPTOPP_ASSERT(secret /*&& secretLen*/);
286     CRYPTOPP_ASSERT(derived && derivedLen);
287     CRYPTOPP_ASSERT(derivedLen <= MaxDerivedKeyLength());
288 
289     ThrowIfInvalidDerivedKeyLength(derivedLen);
290     ValidateParameters(derivedLen, cost, blockSize, parallel);
291 
292     AlignedSecByteBlock B(static_cast<size_t>(blockSize * parallel * 128U));
293 
294     // 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen)
295     PBKDF2_SHA256(B, B.size(), secret, secretLen, salt, saltLen, 1);
296 
297     // Visual Studio and OpenMP 2.0 fixup. We must use int, not size_t.
298     int maxParallel=0;
299     if (!SafeConvert(parallel, maxParallel))
300         maxParallel = std::numeric_limits<int>::max();
301 
302     #ifdef _OPENMP
303     int threads = STDMIN(omp_get_max_threads(), maxParallel);
304     #endif
305 
306     // http://stackoverflow.com/q/49604260/608639
307     #pragma omp parallel num_threads(threads)
308     {
309         // Each thread gets its own copy
310         AlignedSecByteBlock XY(static_cast<size_t>(blockSize * 256U));
311         AlignedSecByteBlock  V(static_cast<size_t>(blockSize * cost * 128U));
312 
313         // 2: for i = 0 to p - 1 do
314         #pragma omp for
315         for (int i = 0; i < maxParallel; ++i)
316         {
317             // 3: B_i <-- MF(B_i, N)
318             const ptrdiff_t offset = static_cast<ptrdiff_t>(blockSize*i*128);
319             Smix(B+offset, static_cast<size_t>(blockSize), cost, V, XY);
320         }
321     }
322 
323     // 5: DK <-- PBKDF2(P, B, 1, dkLen)
324     PBKDF2_SHA256(derived, derivedLen, secret, secretLen, B, B.size(), 1);
325 
326     return 1;
327 }
328 
329 NAMESPACE_END
330