1 /*
2  * Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * This file was originally written by Colin Percival as part of the Tarsnap
27  * online backup system.
28  */
29 
30 #include "cpuminer-config.h"
31 #include "miner.h"
32 
33 #include <stdlib.h>
34 #include <string.h>
35 #include <inttypes.h>
36 
37 static const uint32_t keypad[12] = {
38 	0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
39 };
40 static const uint32_t innerpad[11] = {
41 	0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
42 };
43 static const uint32_t outerpad[8] = {
44 	0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
45 };
46 static const uint32_t finalblk[16] = {
47 	0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
48 };
49 
HMAC_SHA256_80_init(const uint32_t * key,uint32_t * tstate,uint32_t * ostate)50 static inline void HMAC_SHA256_80_init(const uint32_t *key,
51 	uint32_t *tstate, uint32_t *ostate)
52 {
53 	uint32_t ihash[8];
54 	uint32_t pad[16];
55 	int i;
56 
57 	/* tstate is assumed to contain the midstate of key */
58 	memcpy(pad, key + 16, 16);
59 	memcpy(pad + 4, keypad, 48);
60 	sha256_transform(tstate, pad, 0);
61 	memcpy(ihash, tstate, 32);
62 
63 	sha256_init(ostate);
64 	for (i = 0; i < 8; i++)
65 		pad[i] = ihash[i] ^ 0x5c5c5c5c;
66 	for (; i < 16; i++)
67 		pad[i] = 0x5c5c5c5c;
68 	sha256_transform(ostate, pad, 0);
69 
70 	sha256_init(tstate);
71 	for (i = 0; i < 8; i++)
72 		pad[i] = ihash[i] ^ 0x36363636;
73 	for (; i < 16; i++)
74 		pad[i] = 0x36363636;
75 	sha256_transform(tstate, pad, 0);
76 }
77 
PBKDF2_SHA256_80_128(const uint32_t * tstate,const uint32_t * ostate,const uint32_t * salt,uint32_t * output)78 static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
79 	const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
80 {
81 	uint32_t istate[8], ostate2[8];
82 	uint32_t ibuf[16], obuf[16];
83 	int i, j;
84 
85 	memcpy(istate, tstate, 32);
86 	sha256_transform(istate, salt, 0);
87 
88 	memcpy(ibuf, salt + 16, 16);
89 	memcpy(ibuf + 5, innerpad, 44);
90 	memcpy(obuf + 8, outerpad, 32);
91 
92 	for (i = 0; i < 4; i++) {
93 		memcpy(obuf, istate, 32);
94 		ibuf[4] = i + 1;
95 		sha256_transform(obuf, ibuf, 0);
96 
97 		memcpy(ostate2, ostate, 32);
98 		sha256_transform(ostate2, obuf, 0);
99 		for (j = 0; j < 8; j++)
100 			output[8 * i + j] = swab32(ostate2[j]);
101 	}
102 }
103 
PBKDF2_SHA256_128_32(uint32_t * tstate,uint32_t * ostate,const uint32_t * salt,uint32_t * output)104 static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
105 	const uint32_t *salt, uint32_t *output)
106 {
107 	uint32_t buf[16];
108 	int i;
109 
110 	sha256_transform(tstate, salt, 1);
111 	sha256_transform(tstate, salt + 16, 1);
112 	sha256_transform(tstate, finalblk, 0);
113 	memcpy(buf, tstate, 32);
114 	memcpy(buf + 8, outerpad, 32);
115 
116 	sha256_transform(ostate, buf, 0);
117 	for (i = 0; i < 8; i++)
118 		output[i] = swab32(ostate[i]);
119 }
120 
121 
122 #ifdef HAVE_SHA256_4WAY
123 
124 static const uint32_t keypad_4way[4 * 12] = {
125 	0x80000000, 0x80000000, 0x80000000, 0x80000000,
126 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
127 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
128 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
129 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
130 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
131 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
132 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
133 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
134 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
135 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
136 	0x00000280, 0x00000280, 0x00000280, 0x00000280
137 };
138 static const uint32_t innerpad_4way[4 * 11] = {
139 	0x80000000, 0x80000000, 0x80000000, 0x80000000,
140 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
141 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
142 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
143 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
144 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
145 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
146 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
147 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
148 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
149 	0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
150 };
151 static const uint32_t outerpad_4way[4 * 8] = {
152 	0x80000000, 0x80000000, 0x80000000, 0x80000000,
153 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
154 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
155 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
156 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
157 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
158 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
159 	0x00000300, 0x00000300, 0x00000300, 0x00000300
160 };
161 static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = {
162 	0x00000001, 0x00000001, 0x00000001, 0x00000001,
163 	0x80000000, 0x80000000, 0x80000000, 0x80000000,
164 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
165 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
166 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
167 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
168 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
169 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
170 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
171 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
172 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
173 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
174 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
175 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
176 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
177 	0x00000620, 0x00000620, 0x00000620, 0x00000620
178 };
179 
HMAC_SHA256_80_init_4way(const uint32_t * key,uint32_t * tstate,uint32_t * ostate)180 static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
181 	uint32_t *tstate, uint32_t *ostate)
182 {
183 	uint32_t ihash[4 * 8] __attribute__((aligned(16)));
184 	uint32_t pad[4 * 16] __attribute__((aligned(16)));
185 	int i;
186 
187 	/* tstate is assumed to contain the midstate of key */
188 	memcpy(pad, key + 4 * 16, 4 * 16);
189 	memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
190 	sha256_transform_4way(tstate, pad, 0);
191 	memcpy(ihash, tstate, 4 * 32);
192 
193 	sha256_init_4way(ostate);
194 	for (i = 0; i < 4 * 8; i++)
195 		pad[i] = ihash[i] ^ 0x5c5c5c5c;
196 	for (; i < 4 * 16; i++)
197 		pad[i] = 0x5c5c5c5c;
198 	sha256_transform_4way(ostate, pad, 0);
199 
200 	sha256_init_4way(tstate);
201 	for (i = 0; i < 4 * 8; i++)
202 		pad[i] = ihash[i] ^ 0x36363636;
203 	for (; i < 4 * 16; i++)
204 		pad[i] = 0x36363636;
205 	sha256_transform_4way(tstate, pad, 0);
206 }
207 
PBKDF2_SHA256_80_128_4way(const uint32_t * tstate,const uint32_t * ostate,const uint32_t * salt,uint32_t * output)208 static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
209 	const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
210 {
211 	uint32_t istate[4 * 8] __attribute__((aligned(16)));
212 	uint32_t ostate2[4 * 8] __attribute__((aligned(16)));
213 	uint32_t ibuf[4 * 16] __attribute__((aligned(16)));
214 	uint32_t obuf[4 * 16] __attribute__((aligned(16)));
215 	int i, j;
216 
217 	memcpy(istate, tstate, 4 * 32);
218 	sha256_transform_4way(istate, salt, 0);
219 
220 	memcpy(ibuf, salt + 4 * 16, 4 * 16);
221 	memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
222 	memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
223 
224 	for (i = 0; i < 4; i++) {
225 		memcpy(obuf, istate, 4 * 32);
226 		ibuf[4 * 4 + 0] = i + 1;
227 		ibuf[4 * 4 + 1] = i + 1;
228 		ibuf[4 * 4 + 2] = i + 1;
229 		ibuf[4 * 4 + 3] = i + 1;
230 		sha256_transform_4way(obuf, ibuf, 0);
231 
232 		memcpy(ostate2, ostate, 4 * 32);
233 		sha256_transform_4way(ostate2, obuf, 0);
234 		for (j = 0; j < 4 * 8; j++)
235 			output[4 * 8 * i + j] = swab32(ostate2[j]);
236 	}
237 }
238 
PBKDF2_SHA256_128_32_4way(uint32_t * tstate,uint32_t * ostate,const uint32_t * salt,uint32_t * output)239 static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
240 	uint32_t *ostate, const uint32_t *salt, uint32_t *output)
241 {
242 	uint32_t buf[4 * 16] __attribute__((aligned(16)));
243 	int i;
244 
245 	sha256_transform_4way(tstate, salt, 1);
246 	sha256_transform_4way(tstate, salt + 4 * 16, 1);
247 	sha256_transform_4way(tstate, finalblk_4way, 0);
248 	memcpy(buf, tstate, 4 * 32);
249 	memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
250 
251 	sha256_transform_4way(ostate, buf, 0);
252 	for (i = 0; i < 4 * 8; i++)
253 		output[i] = swab32(ostate[i]);
254 }
255 
256 #endif /* HAVE_SHA256_4WAY */
257 
258 
259 #ifdef HAVE_SHA256_8WAY
260 
261 static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = {
262 	0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
263 	0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
264 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
265 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
266 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
267 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
268 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
269 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
270 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
271 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
272 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
273 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
274 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
275 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
276 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
277 	0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
278 };
279 
HMAC_SHA256_80_init_8way(const uint32_t * key,uint32_t * tstate,uint32_t * ostate)280 static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
281 	uint32_t *tstate, uint32_t *ostate)
282 {
283 	uint32_t ihash[8 * 8] __attribute__((aligned(32)));
284 	uint32_t pad[8 * 16] __attribute__((aligned(32)));
285 	int i;
286 
287 	/* tstate is assumed to contain the midstate of key */
288 	memcpy(pad, key + 8 * 16, 8 * 16);
289 	for (i = 0; i < 8; i++)
290 		pad[8 * 4 + i] = 0x80000000;
291 	memset(pad + 8 * 5, 0x00, 8 * 40);
292 	for (i = 0; i < 8; i++)
293 		pad[8 * 15 + i] = 0x00000280;
294 	sha256_transform_8way(tstate, pad, 0);
295 	memcpy(ihash, tstate, 8 * 32);
296 
297 	sha256_init_8way(ostate);
298 	for (i = 0; i < 8 * 8; i++)
299 		pad[i] = ihash[i] ^ 0x5c5c5c5c;
300 	for (; i < 8 * 16; i++)
301 		pad[i] = 0x5c5c5c5c;
302 	sha256_transform_8way(ostate, pad, 0);
303 
304 	sha256_init_8way(tstate);
305 	for (i = 0; i < 8 * 8; i++)
306 		pad[i] = ihash[i] ^ 0x36363636;
307 	for (; i < 8 * 16; i++)
308 		pad[i] = 0x36363636;
309 	sha256_transform_8way(tstate, pad, 0);
310 }
311 
PBKDF2_SHA256_80_128_8way(const uint32_t * tstate,const uint32_t * ostate,const uint32_t * salt,uint32_t * output)312 static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
313 	const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
314 {
315 	uint32_t istate[8 * 8] __attribute__((aligned(32)));
316 	uint32_t ostate2[8 * 8] __attribute__((aligned(32)));
317 	uint32_t ibuf[8 * 16] __attribute__((aligned(32)));
318 	uint32_t obuf[8 * 16] __attribute__((aligned(32)));
319 	int i, j;
320 
321 	memcpy(istate, tstate, 8 * 32);
322 	sha256_transform_8way(istate, salt, 0);
323 
324 	memcpy(ibuf, salt + 8 * 16, 8 * 16);
325 	for (i = 0; i < 8; i++)
326 		ibuf[8 * 5 + i] = 0x80000000;
327 	memset(ibuf + 8 * 6, 0x00, 8 * 36);
328 	for (i = 0; i < 8; i++)
329 		ibuf[8 * 15 + i] = 0x000004a0;
330 
331 	for (i = 0; i < 8; i++)
332 		obuf[8 * 8 + i] = 0x80000000;
333 	memset(obuf + 8 * 9, 0x00, 8 * 24);
334 	for (i = 0; i < 8; i++)
335 		obuf[8 * 15 + i] = 0x00000300;
336 
337 	for (i = 0; i < 4; i++) {
338 		memcpy(obuf, istate, 8 * 32);
339 		ibuf[8 * 4 + 0] = i + 1;
340 		ibuf[8 * 4 + 1] = i + 1;
341 		ibuf[8 * 4 + 2] = i + 1;
342 		ibuf[8 * 4 + 3] = i + 1;
343 		ibuf[8 * 4 + 4] = i + 1;
344 		ibuf[8 * 4 + 5] = i + 1;
345 		ibuf[8 * 4 + 6] = i + 1;
346 		ibuf[8 * 4 + 7] = i + 1;
347 		sha256_transform_8way(obuf, ibuf, 0);
348 
349 		memcpy(ostate2, ostate, 8 * 32);
350 		sha256_transform_8way(ostate2, obuf, 0);
351 		for (j = 0; j < 8 * 8; j++)
352 			output[8 * 8 * i + j] = swab32(ostate2[j]);
353 	}
354 }
355 
PBKDF2_SHA256_128_32_8way(uint32_t * tstate,uint32_t * ostate,const uint32_t * salt,uint32_t * output)356 static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
357 	uint32_t *ostate, const uint32_t *salt, uint32_t *output)
358 {
359 	uint32_t buf[8 * 16] __attribute__((aligned(32)));
360 	int i;
361 
362 	sha256_transform_8way(tstate, salt, 1);
363 	sha256_transform_8way(tstate, salt + 8 * 16, 1);
364 	sha256_transform_8way(tstate, finalblk_8way, 0);
365 
366 	memcpy(buf, tstate, 8 * 32);
367 	for (i = 0; i < 8; i++)
368 		buf[8 * 8 + i] = 0x80000000;
369 	memset(buf + 8 * 9, 0x00, 8 * 24);
370 	for (i = 0; i < 8; i++)
371 		buf[8 * 15 + i] = 0x00000300;
372 	sha256_transform_8way(ostate, buf, 0);
373 
374 	for (i = 0; i < 8 * 8; i++)
375 		output[i] = swab32(ostate[i]);
376 }
377 
378 #endif /* HAVE_SHA256_8WAY */
379 
380 
381 #if defined(USE_ASM) && defined(__x86_64__)
382 
383 #define SCRYPT_MAX_WAYS 12
384 #define HAVE_SCRYPT_3WAY 1
385 int scrypt_best_throughput();
386 void scrypt_core(uint32_t *X, uint32_t *V, int N);
387 void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
388 #if defined(USE_AVX2)
389 #undef SCRYPT_MAX_WAYS
390 #define SCRYPT_MAX_WAYS 24
391 #define HAVE_SCRYPT_6WAY 1
392 void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
393 #endif
394 
395 #elif defined(USE_ASM) && defined(__i386__)
396 
397 #define SCRYPT_MAX_WAYS 4
398 #define scrypt_best_throughput() 1
399 void scrypt_core(uint32_t *X, uint32_t *V, int N);
400 
401 #elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
402 
403 void scrypt_core(uint32_t *X, uint32_t *V, int N);
404 #if defined(__ARM_NEON__)
405 #undef HAVE_SHA256_4WAY
406 #define SCRYPT_MAX_WAYS 3
407 #define HAVE_SCRYPT_3WAY 1
408 #define scrypt_best_throughput() 3
409 void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
410 #endif
411 
412 #elif defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
413 
414 #define SCRYPT_MAX_WAYS 4
415 #define scrypt_best_throughput() 1
416 void scrypt_core(uint32_t *X, uint32_t *V, int N);
417 
418 #else
419 
xor_salsa8(uint32_t B[16],const uint32_t Bx[16])420 static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
421 {
422 	uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
423 	int i;
424 
425 	x00 = (B[ 0] ^= Bx[ 0]);
426 	x01 = (B[ 1] ^= Bx[ 1]);
427 	x02 = (B[ 2] ^= Bx[ 2]);
428 	x03 = (B[ 3] ^= Bx[ 3]);
429 	x04 = (B[ 4] ^= Bx[ 4]);
430 	x05 = (B[ 5] ^= Bx[ 5]);
431 	x06 = (B[ 6] ^= Bx[ 6]);
432 	x07 = (B[ 7] ^= Bx[ 7]);
433 	x08 = (B[ 8] ^= Bx[ 8]);
434 	x09 = (B[ 9] ^= Bx[ 9]);
435 	x10 = (B[10] ^= Bx[10]);
436 	x11 = (B[11] ^= Bx[11]);
437 	x12 = (B[12] ^= Bx[12]);
438 	x13 = (B[13] ^= Bx[13]);
439 	x14 = (B[14] ^= Bx[14]);
440 	x15 = (B[15] ^= Bx[15]);
441 	for (i = 0; i < 8; i += 2) {
442 #define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
443 		/* Operate on columns. */
444 		x04 ^= R(x00+x12, 7);	x09 ^= R(x05+x01, 7);
445 		x14 ^= R(x10+x06, 7);	x03 ^= R(x15+x11, 7);
446 
447 		x08 ^= R(x04+x00, 9);	x13 ^= R(x09+x05, 9);
448 		x02 ^= R(x14+x10, 9);	x07 ^= R(x03+x15, 9);
449 
450 		x12 ^= R(x08+x04,13);	x01 ^= R(x13+x09,13);
451 		x06 ^= R(x02+x14,13);	x11 ^= R(x07+x03,13);
452 
453 		x00 ^= R(x12+x08,18);	x05 ^= R(x01+x13,18);
454 		x10 ^= R(x06+x02,18);	x15 ^= R(x11+x07,18);
455 
456 		/* Operate on rows. */
457 		x01 ^= R(x00+x03, 7);	x06 ^= R(x05+x04, 7);
458 		x11 ^= R(x10+x09, 7);	x12 ^= R(x15+x14, 7);
459 
460 		x02 ^= R(x01+x00, 9);	x07 ^= R(x06+x05, 9);
461 		x08 ^= R(x11+x10, 9);	x13 ^= R(x12+x15, 9);
462 
463 		x03 ^= R(x02+x01,13);	x04 ^= R(x07+x06,13);
464 		x09 ^= R(x08+x11,13);	x14 ^= R(x13+x12,13);
465 
466 		x00 ^= R(x03+x02,18);	x05 ^= R(x04+x07,18);
467 		x10 ^= R(x09+x08,18);	x15 ^= R(x14+x13,18);
468 #undef R
469 	}
470 	B[ 0] += x00;
471 	B[ 1] += x01;
472 	B[ 2] += x02;
473 	B[ 3] += x03;
474 	B[ 4] += x04;
475 	B[ 5] += x05;
476 	B[ 6] += x06;
477 	B[ 7] += x07;
478 	B[ 8] += x08;
479 	B[ 9] += x09;
480 	B[10] += x10;
481 	B[11] += x11;
482 	B[12] += x12;
483 	B[13] += x13;
484 	B[14] += x14;
485 	B[15] += x15;
486 }
487 
scrypt_core(uint32_t * X,uint32_t * V,int N)488 static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
489 {
490 	uint32_t i, j, k;
491 
492 	for (i = 0; i < N; i++) {
493 		memcpy(&V[i * 32], X, 128);
494 		xor_salsa8(&X[0], &X[16]);
495 		xor_salsa8(&X[16], &X[0]);
496 	}
497 	for (i = 0; i < N; i++) {
498 		j = 32 * (X[16] & (N - 1));
499 		for (k = 0; k < 32; k++)
500 			X[k] ^= V[j + k];
501 		xor_salsa8(&X[0], &X[16]);
502 		xor_salsa8(&X[16], &X[0]);
503 	}
504 }
505 
506 #endif
507 
508 #ifndef SCRYPT_MAX_WAYS
509 #define SCRYPT_MAX_WAYS 1
510 #define scrypt_best_throughput() 1
511 #endif
512 
scrypt_buffer_alloc(int N)513 unsigned char *scrypt_buffer_alloc(int N)
514 {
515 	return malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
516 }
517 
scrypt_1024_1_1_256(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)518 static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
519 	uint32_t *midstate, unsigned char *scratchpad, int N)
520 {
521 	uint32_t tstate[8], ostate[8];
522 	uint32_t X[32] __attribute__((aligned(128)));
523 	uint32_t *V;
524 
525 	V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
526 
527 	memcpy(tstate, midstate, 32);
528 	HMAC_SHA256_80_init(input, tstate, ostate);
529 	PBKDF2_SHA256_80_128(tstate, ostate, input, X);
530 
531 	scrypt_core(X, V, N);
532 
533 	PBKDF2_SHA256_128_32(tstate, ostate, X, output);
534 }
535 
536 #ifdef HAVE_SHA256_4WAY
scrypt_1024_1_1_256_4way(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)537 static void scrypt_1024_1_1_256_4way(const uint32_t *input,
538 	uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
539 {
540 	uint32_t tstate[4 * 8] __attribute__((aligned(128)));
541 	uint32_t ostate[4 * 8] __attribute__((aligned(128)));
542 	uint32_t W[4 * 32] __attribute__((aligned(128)));
543 	uint32_t X[4 * 32] __attribute__((aligned(128)));
544 	uint32_t *V;
545 	int i, k;
546 
547 	V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
548 
549 	for (i = 0; i < 20; i++)
550 		for (k = 0; k < 4; k++)
551 			W[4 * i + k] = input[k * 20 + i];
552 	for (i = 0; i < 8; i++)
553 		for (k = 0; k < 4; k++)
554 			tstate[4 * i + k] = midstate[i];
555 	HMAC_SHA256_80_init_4way(W, tstate, ostate);
556 	PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
557 	for (i = 0; i < 32; i++)
558 		for (k = 0; k < 4; k++)
559 			X[k * 32 + i] = W[4 * i + k];
560 	scrypt_core(X + 0 * 32, V, N);
561 	scrypt_core(X + 1 * 32, V, N);
562 	scrypt_core(X + 2 * 32, V, N);
563 	scrypt_core(X + 3 * 32, V, N);
564 	for (i = 0; i < 32; i++)
565 		for (k = 0; k < 4; k++)
566 			W[4 * i + k] = X[k * 32 + i];
567 	PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
568 	for (i = 0; i < 8; i++)
569 		for (k = 0; k < 4; k++)
570 			output[k * 8 + i] = W[4 * i + k];
571 }
572 #endif /* HAVE_SHA256_4WAY */
573 
574 #ifdef HAVE_SCRYPT_3WAY
575 
scrypt_1024_1_1_256_3way(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)576 static void scrypt_1024_1_1_256_3way(const uint32_t *input,
577 	uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
578 {
579 	uint32_t tstate[3 * 8], ostate[3 * 8];
580 	uint32_t X[3 * 32] __attribute__((aligned(64)));
581 	uint32_t *V;
582 
583 	V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
584 
585 	memcpy(tstate +  0, midstate, 32);
586 	memcpy(tstate +  8, midstate, 32);
587 	memcpy(tstate + 16, midstate, 32);
588 	HMAC_SHA256_80_init(input +  0, tstate +  0, ostate +  0);
589 	HMAC_SHA256_80_init(input + 20, tstate +  8, ostate +  8);
590 	HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
591 	PBKDF2_SHA256_80_128(tstate +  0, ostate +  0, input +  0, X +  0);
592 	PBKDF2_SHA256_80_128(tstate +  8, ostate +  8, input + 20, X + 32);
593 	PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
594 
595 	scrypt_core_3way(X, V, N);
596 
597 	PBKDF2_SHA256_128_32(tstate +  0, ostate +  0, X +  0, output +  0);
598 	PBKDF2_SHA256_128_32(tstate +  8, ostate +  8, X + 32, output +  8);
599 	PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
600 }
601 
602 #ifdef HAVE_SHA256_4WAY
scrypt_1024_1_1_256_12way(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)603 static void scrypt_1024_1_1_256_12way(const uint32_t *input,
604 	uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
605 {
606 	uint32_t tstate[12 * 8] __attribute__((aligned(128)));
607 	uint32_t ostate[12 * 8] __attribute__((aligned(128)));
608 	uint32_t W[12 * 32] __attribute__((aligned(128)));
609 	uint32_t X[12 * 32] __attribute__((aligned(128)));
610 	uint32_t *V;
611 	int i, j, k;
612 
613 	V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
614 
615 	for (j = 0; j < 3; j++)
616 		for (i = 0; i < 20; i++)
617 			for (k = 0; k < 4; k++)
618 				W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
619 	for (j = 0; j < 3; j++)
620 		for (i = 0; i < 8; i++)
621 			for (k = 0; k < 4; k++)
622 				tstate[32 * j + 4 * i + k] = midstate[i];
623 	HMAC_SHA256_80_init_4way(W +   0, tstate +  0, ostate +  0);
624 	HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
625 	HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
626 	PBKDF2_SHA256_80_128_4way(tstate +  0, ostate +  0, W +   0, W +   0);
627 	PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
628 	PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
629 	for (j = 0; j < 3; j++)
630 		for (i = 0; i < 32; i++)
631 			for (k = 0; k < 4; k++)
632 				X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
633 	scrypt_core_3way(X + 0 * 96, V, N);
634 	scrypt_core_3way(X + 1 * 96, V, N);
635 	scrypt_core_3way(X + 2 * 96, V, N);
636 	scrypt_core_3way(X + 3 * 96, V, N);
637 	for (j = 0; j < 3; j++)
638 		for (i = 0; i < 32; i++)
639 			for (k = 0; k < 4; k++)
640 				W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
641 	PBKDF2_SHA256_128_32_4way(tstate +  0, ostate +  0, W +   0, W +   0);
642 	PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
643 	PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
644 	for (j = 0; j < 3; j++)
645 		for (i = 0; i < 8; i++)
646 			for (k = 0; k < 4; k++)
647 				output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
648 }
649 #endif /* HAVE_SHA256_4WAY */
650 
651 #endif /* HAVE_SCRYPT_3WAY */
652 
653 #ifdef HAVE_SCRYPT_6WAY
scrypt_1024_1_1_256_24way(const uint32_t * input,uint32_t * output,uint32_t * midstate,unsigned char * scratchpad,int N)654 static void scrypt_1024_1_1_256_24way(const uint32_t *input,
655 	uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
656 {
657 	uint32_t tstate[24 * 8] __attribute__((aligned(128)));
658 	uint32_t ostate[24 * 8] __attribute__((aligned(128)));
659 	uint32_t W[24 * 32] __attribute__((aligned(128)));
660 	uint32_t X[24 * 32] __attribute__((aligned(128)));
661 	uint32_t *V;
662 	int i, j, k;
663 
664 	V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
665 
666 	for (j = 0; j < 3; j++)
667 		for (i = 0; i < 20; i++)
668 			for (k = 0; k < 8; k++)
669 				W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
670 	for (j = 0; j < 3; j++)
671 		for (i = 0; i < 8; i++)
672 			for (k = 0; k < 8; k++)
673 				tstate[8 * 8 * j + 8 * i + k] = midstate[i];
674 	HMAC_SHA256_80_init_8way(W +   0, tstate +   0, ostate +   0);
675 	HMAC_SHA256_80_init_8way(W + 256, tstate +  64, ostate +  64);
676 	HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
677 	PBKDF2_SHA256_80_128_8way(tstate +   0, ostate +   0, W +   0, W +   0);
678 	PBKDF2_SHA256_80_128_8way(tstate +  64, ostate +  64, W + 256, W + 256);
679 	PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
680 	for (j = 0; j < 3; j++)
681 		for (i = 0; i < 32; i++)
682 			for (k = 0; k < 8; k++)
683 				X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
684 	scrypt_core_6way(X + 0 * 32, V, N);
685 	scrypt_core_6way(X + 6 * 32, V, N);
686 	scrypt_core_6way(X + 12 * 32, V, N);
687 	scrypt_core_6way(X + 18 * 32, V, N);
688 	for (j = 0; j < 3; j++)
689 		for (i = 0; i < 32; i++)
690 			for (k = 0; k < 8; k++)
691 				W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
692 	PBKDF2_SHA256_128_32_8way(tstate +   0, ostate +   0, W +   0, W +   0);
693 	PBKDF2_SHA256_128_32_8way(tstate +  64, ostate +  64, W + 256, W + 256);
694 	PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
695 	for (j = 0; j < 3; j++)
696 		for (i = 0; i < 8; i++)
697 			for (k = 0; k < 8; k++)
698 				output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
699 }
700 #endif /* HAVE_SCRYPT_6WAY */
701 
scanhash_scrypt(int thr_id,uint32_t * pdata,unsigned char * scratchbuf,const uint32_t * ptarget,uint32_t max_nonce,unsigned long * hashes_done,int N)702 int scanhash_scrypt(int thr_id, uint32_t *pdata,
703 	unsigned char *scratchbuf, const uint32_t *ptarget,
704 	uint32_t max_nonce, unsigned long *hashes_done, int N)
705 {
706 	uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
707 	uint32_t midstate[8];
708 	uint32_t n = pdata[19] - 1;
709 	const uint32_t Htarg = ptarget[7];
710 	int throughput = scrypt_best_throughput();
711 	int i;
712 
713 #ifdef HAVE_SHA256_4WAY
714 	if (sha256_use_4way())
715 		throughput *= 4;
716 #endif
717 
718 	for (i = 0; i < throughput; i++)
719 		memcpy(data + i * 20, pdata, 80);
720 
721 	sha256_init(midstate);
722 	sha256_transform(midstate, data, 0);
723 
724 	do {
725 		for (i = 0; i < throughput; i++)
726 			data[i * 20 + 19] = ++n;
727 
728 #if defined(HAVE_SHA256_4WAY)
729 		if (throughput == 4)
730 			scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf, N);
731 		else
732 #endif
733 #if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
734 		if (throughput == 12)
735 			scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf, N);
736 		else
737 #endif
738 #if defined(HAVE_SCRYPT_6WAY)
739 		if (throughput == 24)
740 			scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf, N);
741 		else
742 #endif
743 #if defined(HAVE_SCRYPT_3WAY)
744 		if (throughput == 3)
745 			scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf, N);
746 		else
747 #endif
748 		scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, N);
749 
750 		for (i = 0; i < throughput; i++) {
751 			if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
752 				*hashes_done = n - pdata[19] + 1;
753 				pdata[19] = data[i * 20 + 19];
754 				return 1;
755 			}
756 		}
757 	} while (n < max_nonce && !work_restart[thr_id].restart);
758 
759 	*hashes_done = n - pdata[19] + 1;
760 	pdata[19] = n;
761 	return 0;
762 }
763