1 /*
2 * Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * This file was originally written by Colin Percival as part of the Tarsnap
27 * online backup system.
28 */
29
30 #include "cpuminer-config.h"
31 #include "miner.h"
32
33 #include <stdlib.h>
34 #include <string.h>
35 #include <inttypes.h>
36
37 static const uint32_t keypad[12] = {
38 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
39 };
40 static const uint32_t innerpad[11] = {
41 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
42 };
43 static const uint32_t outerpad[8] = {
44 0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
45 };
46 static const uint32_t finalblk[16] = {
47 0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
48 };
49
HMAC_SHA256_80_init(const uint32_t * key,uint32_t * tstate,uint32_t * ostate)50 static inline void HMAC_SHA256_80_init(const uint32_t *key,
51 uint32_t *tstate, uint32_t *ostate)
52 {
53 uint32_t ihash[8];
54 uint32_t pad[16];
55 int i;
56
57 /* tstate is assumed to contain the midstate of key */
58 memcpy(pad, key + 16, 16);
59 memcpy(pad + 4, keypad, 48);
60 sha256_transform(tstate, pad, 0);
61 memcpy(ihash, tstate, 32);
62
63 sha256_init(ostate);
64 for (i = 0; i < 8; i++)
65 pad[i] = ihash[i] ^ 0x5c5c5c5c;
66 for (; i < 16; i++)
67 pad[i] = 0x5c5c5c5c;
68 sha256_transform(ostate, pad, 0);
69
70 sha256_init(tstate);
71 for (i = 0; i < 8; i++)
72 pad[i] = ihash[i] ^ 0x36363636;
73 for (; i < 16; i++)
74 pad[i] = 0x36363636;
75 sha256_transform(tstate, pad, 0);
76 }
77
PBKDF2_SHA256_80_128(const uint32_t * tstate,const uint32_t * ostate,const uint32_t * salt,uint32_t * output)78 static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
79 const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
80 {
81 uint32_t istate[8], ostate2[8];
82 uint32_t ibuf[16], obuf[16];
83 int i, j;
84
85 memcpy(istate, tstate, 32);
86 sha256_transform(istate, salt, 0);
87
88 memcpy(ibuf, salt + 16, 16);
89 memcpy(ibuf + 5, innerpad, 44);
90 memcpy(obuf + 8, outerpad, 32);
91
92 for (i = 0; i < 4; i++) {
93 memcpy(obuf, istate, 32);
94 ibuf[4] = i + 1;
95 sha256_transform(obuf, ibuf, 0);
96
97 memcpy(ostate2, ostate, 32);
98 sha256_transform(ostate2, obuf, 0);
99 for (j = 0; j < 8; j++)
100 output[8 * i + j] = swab32(ostate2[j]);
101 }
102 }
103
PBKDF2_SHA256_128_32(uint32_t * tstate,uint32_t * ostate,const uint32_t * salt,uint32_t * output)104 static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
105 const uint32_t *salt, uint32_t *output)
106 {
107 uint32_t buf[16];
108 int i;
109
110 sha256_transform(tstate, salt, 1);
111 sha256_transform(tstate, salt + 16, 1);
112 sha256_transform(tstate, finalblk, 0);
113 memcpy(buf, tstate, 32);
114 memcpy(buf + 8, outerpad, 32);
115
116 sha256_transform(ostate, buf, 0);
117 for (i = 0; i < 8; i++)
118 output[i] = swab32(ostate[i]);
119 }
120
121
122 #ifdef HAVE_SHA256_4WAY
123
124 static const uint32_t keypad_4way[4 * 12] = {
125 0x80000000, 0x80000000, 0x80000000, 0x80000000,
126 0x00000000, 0x00000000, 0x00000000, 0x00000000,
127 0x00000000, 0x00000000, 0x00000000, 0x00000000,
128 0x00000000, 0x00000000, 0x00000000, 0x00000000,
129 0x00000000, 0x00000000, 0x00000000, 0x00000000,
130 0x00000000, 0x00000000, 0x00000000, 0x00000000,
131 0x00000000, 0x00000000, 0x00000000, 0x00000000,
132 0x00000000, 0x00000000, 0x00000000, 0x00000000,
133 0x00000000, 0x00000000, 0x00000000, 0x00000000,
134 0x00000000, 0x00000000, 0x00000000, 0x00000000,
135 0x00000000, 0x00000000, 0x00000000, 0x00000000,
136 0x00000280, 0x00000280, 0x00000280, 0x00000280
137 };
138 static const uint32_t innerpad_4way[4 * 11] = {
139 0x80000000, 0x80000000, 0x80000000, 0x80000000,
140 0x00000000, 0x00000000, 0x00000000, 0x00000000,
141 0x00000000, 0x00000000, 0x00000000, 0x00000000,
142 0x00000000, 0x00000000, 0x00000000, 0x00000000,
143 0x00000000, 0x00000000, 0x00000000, 0x00000000,
144 0x00000000, 0x00000000, 0x00000000, 0x00000000,
145 0x00000000, 0x00000000, 0x00000000, 0x00000000,
146 0x00000000, 0x00000000, 0x00000000, 0x00000000,
147 0x00000000, 0x00000000, 0x00000000, 0x00000000,
148 0x00000000, 0x00000000, 0x00000000, 0x00000000,
149 0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
150 };
151 static const uint32_t outerpad_4way[4 * 8] = {
152 0x80000000, 0x80000000, 0x80000000, 0x80000000,
153 0x00000000, 0x00000000, 0x00000000, 0x00000000,
154 0x00000000, 0x00000000, 0x00000000, 0x00000000,
155 0x00000000, 0x00000000, 0x00000000, 0x00000000,
156 0x00000000, 0x00000000, 0x00000000, 0x00000000,
157 0x00000000, 0x00000000, 0x00000000, 0x00000000,
158 0x00000000, 0x00000000, 0x00000000, 0x00000000,
159 0x00000300, 0x00000300, 0x00000300, 0x00000300
160 };
161 static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = {
162 0x00000001, 0x00000001, 0x00000001, 0x00000001,
163 0x80000000, 0x80000000, 0x80000000, 0x80000000,
164 0x00000000, 0x00000000, 0x00000000, 0x00000000,
165 0x00000000, 0x00000000, 0x00000000, 0x00000000,
166 0x00000000, 0x00000000, 0x00000000, 0x00000000,
167 0x00000000, 0x00000000, 0x00000000, 0x00000000,
168 0x00000000, 0x00000000, 0x00000000, 0x00000000,
169 0x00000000, 0x00000000, 0x00000000, 0x00000000,
170 0x00000000, 0x00000000, 0x00000000, 0x00000000,
171 0x00000000, 0x00000000, 0x00000000, 0x00000000,
172 0x00000000, 0x00000000, 0x00000000, 0x00000000,
173 0x00000000, 0x00000000, 0x00000000, 0x00000000,
174 0x00000000, 0x00000000, 0x00000000, 0x00000000,
175 0x00000000, 0x00000000, 0x00000000, 0x00000000,
176 0x00000000, 0x00000000, 0x00000000, 0x00000000,
177 0x00000620, 0x00000620, 0x00000620, 0x00000620
178 };
179
HMAC_SHA256_80_init_4way(const uint32_t * key,uint32_t * tstate,uint32_t * ostate)180 static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
181 uint32_t *tstate, uint32_t *ostate)
182 {
183 uint32_t ihash[4 * 8] __attribute__((aligned(16)));
184 uint32_t pad[4 * 16] __attribute__((aligned(16)));
185 int i;
186
187 /* tstate is assumed to contain the midstate of key */
188 memcpy(pad, key + 4 * 16, 4 * 16);
189 memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
190 sha256_transform_4way(tstate, pad, 0);
191 memcpy(ihash, tstate, 4 * 32);
192
193 sha256_init_4way(ostate);
194 for (i = 0; i < 4 * 8; i++)
195 pad[i] = ihash[i] ^ 0x5c5c5c5c;
196 for (; i < 4 * 16; i++)
197 pad[i] = 0x5c5c5c5c;
198 sha256_transform_4way(ostate, pad, 0);
199
200 sha256_init_4way(tstate);
201 for (i = 0; i < 4 * 8; i++)
202 pad[i] = ihash[i] ^ 0x36363636;
203 for (; i < 4 * 16; i++)
204 pad[i] = 0x36363636;
205 sha256_transform_4way(tstate, pad, 0);
206 }
207
PBKDF2_SHA256_80_128_4way(const uint32_t * tstate,const uint32_t * ostate,const uint32_t * salt,uint32_t * output)208 static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
209 const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
210 {
211 uint32_t istate[4 * 8] __attribute__((aligned(16)));
212 uint32_t ostate2[4 * 8] __attribute__((aligned(16)));
213 uint32_t ibuf[4 * 16] __attribute__((aligned(16)));
214 uint32_t obuf[4 * 16] __attribute__((aligned(16)));
215 int i, j;
216
217 memcpy(istate, tstate, 4 * 32);
218 sha256_transform_4way(istate, salt, 0);
219
220 memcpy(ibuf, salt + 4 * 16, 4 * 16);
221 memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
222 memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
223
224 for (i = 0; i < 4; i++) {
225 memcpy(obuf, istate, 4 * 32);
226 ibuf[4 * 4 + 0] = i + 1;
227 ibuf[4 * 4 + 1] = i + 1;
228 ibuf[4 * 4 + 2] = i + 1;
229 ibuf[4 * 4 + 3] = i + 1;
230 sha256_transform_4way(obuf, ibuf, 0);
231
232 memcpy(ostate2, ostate, 4 * 32);
233 sha256_transform_4way(ostate2, obuf, 0);
234 for (j = 0; j < 4 * 8; j++)
235 output[4 * 8 * i + j] = swab32(ostate2[j]);
236 }
237 }
238
PBKDF2_SHA256_128_32_4way(uint32_t * tstate,uint32_t * ostate,const uint32_t * salt,uint32_t * output)239 static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
240 uint32_t *ostate, const uint32_t *salt, uint32_t *output)
241 {
242 uint32_t buf[4 * 16] __attribute__((aligned(16)));
243 int i;
244
245 sha256_transform_4way(tstate, salt, 1);
246 sha256_transform_4way(tstate, salt + 4 * 16, 1);
247 sha256_transform_4way(tstate, finalblk_4way, 0);
248 memcpy(buf, tstate, 4 * 32);
249 memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
250
251 sha256_transform_4way(ostate, buf, 0);
252 for (i = 0; i < 4 * 8; i++)
253 output[i] = swab32(ostate[i]);
254 }
255
256 #endif /* HAVE_SHA256_4WAY */
257
258
259 #ifdef HAVE_SHA256_8WAY
260
261 static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = {
262 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
263 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
264 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
265 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
266 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
267 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
268 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
269 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
270 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
271 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
272 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
273 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
274 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
275 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
276 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
277 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
278 };
279
HMAC_SHA256_80_init_8way(const uint32_t * key,uint32_t * tstate,uint32_t * ostate)280 static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
281 uint32_t *tstate, uint32_t *ostate)
282 {
283 uint32_t ihash[8 * 8] __attribute__((aligned(32)));
284 uint32_t pad[8 * 16] __attribute__((aligned(32)));
285 int i;
286
287 /* tstate is assumed to contain the midstate of key */
288 memcpy(pad, key + 8 * 16, 8 * 16);
289 for (i = 0; i < 8; i++)
290 pad[8 * 4 + i] = 0x80000000;
291 memset(pad + 8 * 5, 0x00, 8 * 40);
292 for (i = 0; i < 8; i++)
293 pad[8 * 15 + i] = 0x00000280;
294 sha256_transform_8way(tstate, pad, 0);
295 memcpy(ihash, tstate, 8 * 32);
296
297 sha256_init_8way(ostate);
298 for (i = 0; i < 8 * 8; i++)
299 pad[i] = ihash[i] ^ 0x5c5c5c5c;
300 for (; i < 8 * 16; i++)
301 pad[i] = 0x5c5c5c5c;
302 sha256_transform_8way(ostate, pad, 0);
303
304 sha256_init_8way(tstate);
305 for (i = 0; i < 8 * 8; i++)
306 pad[i] = ihash[i] ^ 0x36363636;
307 for (; i < 8 * 16; i++)
308 pad[i] = 0x36363636;
309 sha256_transform_8way(tstate, pad, 0);
310 }
311
PBKDF2_SHA256_80_128_8way(const uint32_t * tstate,const uint32_t * ostate,const uint32_t * salt,uint32_t * output)312 static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
313 const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
314 {
315 uint32_t istate[8 * 8] __attribute__((aligned(32)));
316 uint32_t ostate2[8 * 8] __attribute__((aligned(32)));
317 uint32_t ibuf[8 * 16] __attribute__((aligned(32)));
318 uint32_t obuf[8 * 16] __attribute__((aligned(32)));
319 int i, j;
320
321 memcpy(istate, tstate, 8 * 32);
322 sha256_transform_8way(istate, salt, 0);
323
324 memcpy(ibuf, salt + 8 * 16, 8 * 16);
325 for (i = 0; i < 8; i++)
326 ibuf[8 * 5 + i] = 0x80000000;
327 memset(ibuf + 8 * 6, 0x00, 8 * 36);
328 for (i = 0; i < 8; i++)
329 ibuf[8 * 15 + i] = 0x000004a0;
330
331 for (i = 0; i < 8; i++)
332 obuf[8 * 8 + i] = 0x80000000;
333 memset(obuf + 8 * 9, 0x00, 8 * 24);
334 for (i = 0; i < 8; i++)
335 obuf[8 * 15 + i] = 0x00000300;
336
337 for (i = 0; i < 4; i++) {
338 memcpy(obuf, istate, 8 * 32);
339 ibuf[8 * 4 + 0] = i + 1;
340 ibuf[8 * 4 + 1] = i + 1;
341 ibuf[8 * 4 + 2] = i + 1;
342 ibuf[8 * 4 + 3] = i + 1;
343 ibuf[8 * 4 + 4] = i + 1;
344 ibuf[8 * 4 + 5] = i + 1;
345 ibuf[8 * 4 + 6] = i + 1;
346 ibuf[8 * 4 + 7] = i + 1;
347 sha256_transform_8way(obuf, ibuf, 0);
348
349 memcpy(ostate2, ostate, 8 * 32);
350 sha256_transform_8way(ostate2, obuf, 0);
351 for (j = 0; j < 8 * 8; j++)
352 output[8 * 8 * i + j] = swab32(ostate2[j]);
353 }
354 }
355
PBKDF2_SHA256_128_32_8way(uint32_t * tstate,uint32_t * ostate,const uint32_t * salt,uint32_t * output)356 static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
357 uint32_t *ostate, const uint32_t *salt, uint32_t *output)
358 {
359 uint32_t buf[8 * 16] __attribute__((aligned(32)));
360 int i;
361
362 sha256_transform_8way(tstate, salt, 1);
363 sha256_transform_8way(tstate, salt + 8 * 16, 1);
364 sha256_transform_8way(tstate, finalblk_8way, 0);
365
366 memcpy(buf, tstate, 8 * 32);
367 for (i = 0; i < 8; i++)
368 buf[8 * 8 + i] = 0x80000000;
369 memset(buf + 8 * 9, 0x00, 8 * 24);
370 for (i = 0; i < 8; i++)
371 buf[8 * 15 + i] = 0x00000300;
372 sha256_transform_8way(ostate, buf, 0);
373
374 for (i = 0; i < 8 * 8; i++)
375 output[i] = swab32(ostate[i]);
376 }
377
378 #endif /* HAVE_SHA256_8WAY */
379
380
381 #if defined(USE_ASM) && defined(__x86_64__)
382
383 #define SCRYPT_MAX_WAYS 12
384 #define HAVE_SCRYPT_3WAY 1
385 int scrypt_best_throughput();
386 void scrypt_core(uint32_t *X, uint32_t *V, int N);
387 void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
388 #if defined(USE_AVX2)
389 #undef SCRYPT_MAX_WAYS
390 #define SCRYPT_MAX_WAYS 24
391 #define HAVE_SCRYPT_6WAY 1
392 void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
393 #endif
394
395 #elif defined(USE_ASM) && defined(__i386__)
396
397 #define SCRYPT_MAX_WAYS 4
398 #define scrypt_best_throughput() 1
399 void scrypt_core(uint32_t *X, uint32_t *V, int N);
400
401 #elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
402
403 void scrypt_core(uint32_t *X, uint32_t *V, int N);
404 #if defined(__ARM_NEON__)
405 #undef HAVE_SHA256_4WAY
406 #define SCRYPT_MAX_WAYS 3
407 #define HAVE_SCRYPT_3WAY 1
408 #define scrypt_best_throughput() 3
409 void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
410 #endif
411
412 #elif defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
413
414 #define SCRYPT_MAX_WAYS 4
415 #define scrypt_best_throughput() 1
416 void scrypt_core(uint32_t *X, uint32_t *V, int N);
417
418 #else
419
xor_salsa8(uint32_t B[16],const uint32_t Bx[16])420 static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
421 {
422 uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
423 int i;
424
425 x00 = (B[ 0] ^= Bx[ 0]);
426 x01 = (B[ 1] ^= Bx[ 1]);
427 x02 = (B[ 2] ^= Bx[ 2]);
428 x03 = (B[ 3] ^= Bx[ 3]);
429 x04 = (B[ 4] ^= Bx[ 4]);
430 x05 = (B[ 5] ^= Bx[ 5]);
431 x06 = (B[ 6] ^= Bx[ 6]);
432 x07 = (B[ 7] ^= Bx[ 7]);
433 x08 = (B[ 8] ^= Bx[ 8]);
434 x09 = (B[ 9] ^= Bx[ 9]);
435 x10 = (B[10] ^= Bx[10]);
436 x11 = (B[11] ^= Bx[11]);
437 x12 = (B[12] ^= Bx[12]);
438 x13 = (B[13] ^= Bx[13]);
439 x14 = (B[14] ^= Bx[14]);
440 x15 = (B[15] ^= Bx[15]);
441 for (i = 0; i < 8; i += 2) {
442 #define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
443 /* Operate on columns. */
444 x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
445 x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
446
447 x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
448 x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
449
450 x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
451 x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
452
453 x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
454 x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
455
456 /* Operate on rows. */
457 x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
458 x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
459
460 x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
461 x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
462
463 x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
464 x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
465
466 x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
467 x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
468 #undef R
469 }
470 B[ 0] += x00;
471 B[ 1] += x01;
472 B[ 2] += x02;
473 B[ 3] += x03;
474 B[ 4] += x04;
475 B[ 5] += x05;
476 B[ 6] += x06;
477 B[ 7] += x07;
478 B[ 8] += x08;
479 B[ 9] += x09;
480 B[10] += x10;
481 B[11] += x11;
482 B[12] += x12;
483 B[13] += x13;
484 B[14] += x14;
485 B[15] += x15;
486 }
487
scrypt_core(uint32_t * X,uint32_t * V,int N)488 static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
489 {
490 uint32_t i, j, k;
491
492 for (i = 0; i < N; i++) {
493 memcpy(&V[i * 32], X, 128);
494 xor_salsa8(&X[0], &X[16]);
495 xor_salsa8(&X[16], &X[0]);
496 }
497 for (i = 0; i < N; i++) {
498 j = 32 * (X[16] & (N - 1));
499 for (k = 0; k < 32; k++)
500 X[k] ^= V[j + k];
501 xor_salsa8(&X[0], &X[16]);
502 xor_salsa8(&X[16], &X[0]);
503 }
504 }
505
506 #endif
507
508 #ifndef SCRYPT_MAX_WAYS
509 #define SCRYPT_MAX_WAYS 1
510 #define scrypt_best_throughput() 1
511 #endif
512
scrypt_buffer_alloc(int N)513 unsigned char *scrypt_buffer_alloc(int N)
514 {
515 return malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
516 }
517
scrypt_1024_1_1_256(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)518 static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
519 uint32_t *midstate, unsigned char *scratchpad, int N)
520 {
521 uint32_t tstate[8], ostate[8];
522 uint32_t X[32] __attribute__((aligned(128)));
523 uint32_t *V;
524
525 V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
526
527 memcpy(tstate, midstate, 32);
528 HMAC_SHA256_80_init(input, tstate, ostate);
529 PBKDF2_SHA256_80_128(tstate, ostate, input, X);
530
531 scrypt_core(X, V, N);
532
533 PBKDF2_SHA256_128_32(tstate, ostate, X, output);
534 }
535
536 #ifdef HAVE_SHA256_4WAY
scrypt_1024_1_1_256_4way(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)537 static void scrypt_1024_1_1_256_4way(const uint32_t *input,
538 uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
539 {
540 uint32_t tstate[4 * 8] __attribute__((aligned(128)));
541 uint32_t ostate[4 * 8] __attribute__((aligned(128)));
542 uint32_t W[4 * 32] __attribute__((aligned(128)));
543 uint32_t X[4 * 32] __attribute__((aligned(128)));
544 uint32_t *V;
545 int i, k;
546
547 V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
548
549 for (i = 0; i < 20; i++)
550 for (k = 0; k < 4; k++)
551 W[4 * i + k] = input[k * 20 + i];
552 for (i = 0; i < 8; i++)
553 for (k = 0; k < 4; k++)
554 tstate[4 * i + k] = midstate[i];
555 HMAC_SHA256_80_init_4way(W, tstate, ostate);
556 PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
557 for (i = 0; i < 32; i++)
558 for (k = 0; k < 4; k++)
559 X[k * 32 + i] = W[4 * i + k];
560 scrypt_core(X + 0 * 32, V, N);
561 scrypt_core(X + 1 * 32, V, N);
562 scrypt_core(X + 2 * 32, V, N);
563 scrypt_core(X + 3 * 32, V, N);
564 for (i = 0; i < 32; i++)
565 for (k = 0; k < 4; k++)
566 W[4 * i + k] = X[k * 32 + i];
567 PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
568 for (i = 0; i < 8; i++)
569 for (k = 0; k < 4; k++)
570 output[k * 8 + i] = W[4 * i + k];
571 }
572 #endif /* HAVE_SHA256_4WAY */
573
574 #ifdef HAVE_SCRYPT_3WAY
575
scrypt_1024_1_1_256_3way(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)576 static void scrypt_1024_1_1_256_3way(const uint32_t *input,
577 uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
578 {
579 uint32_t tstate[3 * 8], ostate[3 * 8];
580 uint32_t X[3 * 32] __attribute__((aligned(64)));
581 uint32_t *V;
582
583 V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
584
585 memcpy(tstate + 0, midstate, 32);
586 memcpy(tstate + 8, midstate, 32);
587 memcpy(tstate + 16, midstate, 32);
588 HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
589 HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
590 HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
591 PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
592 PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
593 PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
594
595 scrypt_core_3way(X, V, N);
596
597 PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
598 PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
599 PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
600 }
601
602 #ifdef HAVE_SHA256_4WAY
scrypt_1024_1_1_256_12way(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)603 static void scrypt_1024_1_1_256_12way(const uint32_t *input,
604 uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
605 {
606 uint32_t tstate[12 * 8] __attribute__((aligned(128)));
607 uint32_t ostate[12 * 8] __attribute__((aligned(128)));
608 uint32_t W[12 * 32] __attribute__((aligned(128)));
609 uint32_t X[12 * 32] __attribute__((aligned(128)));
610 uint32_t *V;
611 int i, j, k;
612
613 V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
614
615 for (j = 0; j < 3; j++)
616 for (i = 0; i < 20; i++)
617 for (k = 0; k < 4; k++)
618 W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
619 for (j = 0; j < 3; j++)
620 for (i = 0; i < 8; i++)
621 for (k = 0; k < 4; k++)
622 tstate[32 * j + 4 * i + k] = midstate[i];
623 HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
624 HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
625 HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
626 PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
627 PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
628 PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
629 for (j = 0; j < 3; j++)
630 for (i = 0; i < 32; i++)
631 for (k = 0; k < 4; k++)
632 X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
633 scrypt_core_3way(X + 0 * 96, V, N);
634 scrypt_core_3way(X + 1 * 96, V, N);
635 scrypt_core_3way(X + 2 * 96, V, N);
636 scrypt_core_3way(X + 3 * 96, V, N);
637 for (j = 0; j < 3; j++)
638 for (i = 0; i < 32; i++)
639 for (k = 0; k < 4; k++)
640 W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
641 PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
642 PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
643 PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
644 for (j = 0; j < 3; j++)
645 for (i = 0; i < 8; i++)
646 for (k = 0; k < 4; k++)
647 output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
648 }
649 #endif /* HAVE_SHA256_4WAY */
650
651 #endif /* HAVE_SCRYPT_3WAY */
652
653 #ifdef HAVE_SCRYPT_6WAY
scrypt_1024_1_1_256_24way(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)654 static void scrypt_1024_1_1_256_24way(const uint32_t *input,
655 uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
656 {
657 uint32_t tstate[24 * 8] __attribute__((aligned(128)));
658 uint32_t ostate[24 * 8] __attribute__((aligned(128)));
659 uint32_t W[24 * 32] __attribute__((aligned(128)));
660 uint32_t X[24 * 32] __attribute__((aligned(128)));
661 uint32_t *V;
662 int i, j, k;
663
664 V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
665
666 for (j = 0; j < 3; j++)
667 for (i = 0; i < 20; i++)
668 for (k = 0; k < 8; k++)
669 W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
670 for (j = 0; j < 3; j++)
671 for (i = 0; i < 8; i++)
672 for (k = 0; k < 8; k++)
673 tstate[8 * 8 * j + 8 * i + k] = midstate[i];
674 HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
675 HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
676 HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
677 PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
678 PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
679 PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
680 for (j = 0; j < 3; j++)
681 for (i = 0; i < 32; i++)
682 for (k = 0; k < 8; k++)
683 X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
684 scrypt_core_6way(X + 0 * 32, V, N);
685 scrypt_core_6way(X + 6 * 32, V, N);
686 scrypt_core_6way(X + 12 * 32, V, N);
687 scrypt_core_6way(X + 18 * 32, V, N);
688 for (j = 0; j < 3; j++)
689 for (i = 0; i < 32; i++)
690 for (k = 0; k < 8; k++)
691 W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
692 PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
693 PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
694 PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
695 for (j = 0; j < 3; j++)
696 for (i = 0; i < 8; i++)
697 for (k = 0; k < 8; k++)
698 output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
699 }
700 #endif /* HAVE_SCRYPT_6WAY */
701
scanhash_scrypt(int thr_id,uint32_t * pdata,unsigned char * scratchbuf,const uint32_t * ptarget,uint32_t max_nonce,unsigned long * hashes_done,int N)702 int scanhash_scrypt(int thr_id, uint32_t *pdata,
703 unsigned char *scratchbuf, const uint32_t *ptarget,
704 uint32_t max_nonce, unsigned long *hashes_done, int N)
705 {
706 uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
707 uint32_t midstate[8];
708 uint32_t n = pdata[19] - 1;
709 const uint32_t Htarg = ptarget[7];
710 int throughput = scrypt_best_throughput();
711 int i;
712
713 #ifdef HAVE_SHA256_4WAY
714 if (sha256_use_4way())
715 throughput *= 4;
716 #endif
717
718 for (i = 0; i < throughput; i++)
719 memcpy(data + i * 20, pdata, 80);
720
721 sha256_init(midstate);
722 sha256_transform(midstate, data, 0);
723
724 do {
725 for (i = 0; i < throughput; i++)
726 data[i * 20 + 19] = ++n;
727
728 #if defined(HAVE_SHA256_4WAY)
729 if (throughput == 4)
730 scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf, N);
731 else
732 #endif
733 #if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
734 if (throughput == 12)
735 scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf, N);
736 else
737 #endif
738 #if defined(HAVE_SCRYPT_6WAY)
739 if (throughput == 24)
740 scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf, N);
741 else
742 #endif
743 #if defined(HAVE_SCRYPT_3WAY)
744 if (throughput == 3)
745 scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf, N);
746 else
747 #endif
748 scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, N);
749
750 for (i = 0; i < throughput; i++) {
751 if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
752 *hashes_done = n - pdata[19] + 1;
753 pdata[19] = data[i * 20 + 19];
754 return 1;
755 }
756 }
757 } while (n < max_nonce && !work_restart[thr_id].restart);
758
759 *hashes_done = n - pdata[19] + 1;
760 pdata[19] = n;
761 return 0;
762 }
763