1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6#ifdef KERNEL_STATIC
7#include "inc_vendor.h"
8#include "inc_types.h"
9#include "inc_platform.cl"
10#include "inc_common.cl"
11#include "inc_hash_sha256.cl"
12#endif
13
14#define COMPARE_S "inc_comp_single.cl"
15#define COMPARE_M "inc_comp_multi.cl"
16
17#define MIN(a,b) (((a) < (b)) ? (a) : (b))
18
19typedef struct sha256crypt_tmp
20{
21  // pure version
22
23  u32 alt_result[8];
24  u32 p_bytes[64];
25  u32 s_bytes[64];
26
27} sha256crypt_tmp_t;
28
29DECLSPEC void init_ctx (u32 *digest)
30{
31  digest[0] = SHA256M_A;
32  digest[1] = SHA256M_B;
33  digest[2] = SHA256M_C;
34  digest[3] = SHA256M_D;
35  digest[4] = SHA256M_E;
36  digest[5] = SHA256M_F;
37  digest[6] = SHA256M_G;
38  digest[7] = SHA256M_H;
39}
40
41DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u32 append_len)
42{
43  u32 in0 = append[0];
44  u32 in1 = append[1];
45  u32 in2 = append[2];
46  u32 in3 = append[3];
47
48  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
49  const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
50  const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
51  const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
52  const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
53  const u32 tmp4 = hc_bytealign_be (in3,   0, offset);
54  #endif
55
56  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
57
58  #if defined IS_NV
59  const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
60  #endif
61
62  #if (defined IS_AMD || defined IS_HIP)
63  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
64  #endif
65
66  const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
67  const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
68  const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
69  const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
70  const u32 tmp4 = hc_byte_perm_S (0,   in3, selector);
71  #endif
72
73  switch (offset / 4)
74  {
75    case  0:  block[ 0] |= tmp0;
76              block[ 1]  = tmp1;
77              block[ 2]  = tmp2;
78              block[ 3]  = tmp3;
79              block[ 4]  = tmp4;
80              break;
81    case  1:  block[ 1] |= tmp0;
82              block[ 2]  = tmp1;
83              block[ 3]  = tmp2;
84              block[ 4]  = tmp3;
85              block[ 5]  = tmp4;
86              break;
87    case  2:  block[ 2] |= tmp0;
88              block[ 3]  = tmp1;
89              block[ 4]  = tmp2;
90              block[ 5]  = tmp3;
91              block[ 6]  = tmp4;
92              break;
93    case  3:  block[ 3] |= tmp0;
94              block[ 4]  = tmp1;
95              block[ 5]  = tmp2;
96              block[ 6]  = tmp3;
97              block[ 7]  = tmp4;
98              break;
99    case  4:  block[ 4] |= tmp0;
100              block[ 5]  = tmp1;
101              block[ 6]  = tmp2;
102              block[ 7]  = tmp3;
103              block[ 8]  = tmp4;
104              break;
105    case  5:  block[ 5] |= tmp0;
106              block[ 6]  = tmp1;
107              block[ 7]  = tmp2;
108              block[ 8]  = tmp3;
109              block[ 9]  = tmp4;
110              break;
111    case  6:  block[ 6] |= tmp0;
112              block[ 7]  = tmp1;
113              block[ 8]  = tmp2;
114              block[ 9]  = tmp3;
115              block[10]  = tmp4;
116              break;
117    case  7:  block[ 7] |= tmp0;
118              block[ 8]  = tmp1;
119              block[ 9]  = tmp2;
120              block[10]  = tmp3;
121              block[11]  = tmp4;
122              break;
123    case  8:  block[ 8] |= tmp0;
124              block[ 9]  = tmp1;
125              block[10]  = tmp2;
126              block[11]  = tmp3;
127              block[12]  = tmp4;
128              break;
129    case  9:  block[ 9] |= tmp0;
130              block[10]  = tmp1;
131              block[11]  = tmp2;
132              block[12]  = tmp3;
133              block[13]  = tmp4;
134              break;
135    case 10:  block[10] |= tmp0;
136              block[11]  = tmp1;
137              block[12]  = tmp2;
138              block[13]  = tmp3;
139              block[14]  = tmp4;
140              break;
141    case 11:  block[11] |= tmp0;
142              block[12]  = tmp1;
143              block[13]  = tmp2;
144              block[14]  = tmp3;
145              block[15]  = tmp4;
146              break;
147    case 12:  block[12] |= tmp0;
148              block[13]  = tmp1;
149              block[14]  = tmp2;
150              block[15]  = tmp3;
151              break;
152    case 13:  block[13] |= tmp0;
153              block[14]  = tmp1;
154              block[15]  = tmp2;
155              break;
156    case 14:  block[14] |= tmp0;
157              block[15]  = tmp1;
158              break;
159    case 15:  block[15] |= tmp0;
160              break;
161  }
162
163  u32 new_len = offset + append_len;
164
165  return new_len;
166}
167
168DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u32 append_len, u32 *digest)
169{
170  u32 in0 = append[0];
171  u32 in1 = append[1];
172  u32 in2 = append[2];
173  u32 in3 = append[3];
174
175  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
176  const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
177  const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
178  const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
179  const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
180  const u32 tmp4 = hc_bytealign_be (in3,   0, offset);
181  #endif
182
183  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
184
185  #if defined IS_NV
186  const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
187  #endif
188
189  #if (defined IS_AMD || defined IS_HIP)
190  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
191  #endif
192
193  const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
194  const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
195  const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
196  const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
197  const u32 tmp4 = hc_byte_perm_S (0,   in3, selector);
198  #endif
199
200  u32 carry[4] = { 0 };
201
202  switch (offset / 4)
203  {
204    case  0:  block[ 0] |= tmp0;
205              block[ 1]  = tmp1;
206              block[ 2]  = tmp2;
207              block[ 3]  = tmp3;
208              block[ 4]  = tmp4;
209              break;
210    case  1:  block[ 1] |= tmp0;
211              block[ 2]  = tmp1;
212              block[ 3]  = tmp2;
213              block[ 4]  = tmp3;
214              block[ 5]  = tmp4;
215              break;
216    case  2:  block[ 2] |= tmp0;
217              block[ 3]  = tmp1;
218              block[ 4]  = tmp2;
219              block[ 5]  = tmp3;
220              block[ 6]  = tmp4;
221              break;
222    case  3:  block[ 3] |= tmp0;
223              block[ 4]  = tmp1;
224              block[ 5]  = tmp2;
225              block[ 6]  = tmp3;
226              block[ 7]  = tmp4;
227              break;
228    case  4:  block[ 4] |= tmp0;
229              block[ 5]  = tmp1;
230              block[ 6]  = tmp2;
231              block[ 7]  = tmp3;
232              block[ 8]  = tmp4;
233              break;
234    case  5:  block[ 5] |= tmp0;
235              block[ 6]  = tmp1;
236              block[ 7]  = tmp2;
237              block[ 8]  = tmp3;
238              block[ 9]  = tmp4;
239              break;
240    case  6:  block[ 6] |= tmp0;
241              block[ 7]  = tmp1;
242              block[ 8]  = tmp2;
243              block[ 9]  = tmp3;
244              block[10]  = tmp4;
245              break;
246    case  7:  block[ 7] |= tmp0;
247              block[ 8]  = tmp1;
248              block[ 9]  = tmp2;
249              block[10]  = tmp3;
250              block[11]  = tmp4;
251              break;
252    case  8:  block[ 8] |= tmp0;
253              block[ 9]  = tmp1;
254              block[10]  = tmp2;
255              block[11]  = tmp3;
256              block[12]  = tmp4;
257              break;
258    case  9:  block[ 9] |= tmp0;
259              block[10]  = tmp1;
260              block[11]  = tmp2;
261              block[12]  = tmp3;
262              block[13]  = tmp4;
263              break;
264    case 10:  block[10] |= tmp0;
265              block[11]  = tmp1;
266              block[12]  = tmp2;
267              block[13]  = tmp3;
268              block[14]  = tmp4;
269              break;
270    case 11:  block[11] |= tmp0;
271              block[12]  = tmp1;
272              block[13]  = tmp2;
273              block[14]  = tmp3;
274              block[15]  = tmp4;
275              break;
276    case 12:  block[12] |= tmp0;
277              block[13]  = tmp1;
278              block[14]  = tmp2;
279              block[15]  = tmp3;
280              carry[ 0]  = tmp4;
281              break;
282    case 13:  block[13] |= tmp0;
283              block[14]  = tmp1;
284              block[15]  = tmp2;
285              carry[ 0]  = tmp3;
286              carry[ 1]  = tmp4;
287              break;
288    case 14:  block[14] |= tmp0;
289              block[15]  = tmp1;
290              carry[ 0]  = tmp2;
291              carry[ 1]  = tmp3;
292              carry[ 2]  = tmp4;
293              break;
294    case 15:  block[15] |= tmp0;
295              carry[ 0]  = tmp1;
296              carry[ 1]  = tmp2;
297              carry[ 2]  = tmp3;
298              carry[ 3]  = tmp4;
299              break;
300  }
301
302  u32 new_len = offset + append_len;
303
304  if (new_len >= 64)
305  {
306    new_len -= 64;
307
308    sha256_transform (block + 0, block + 4, block + 8, block + 12, digest);
309
310    block[ 0] = carry[0];
311    block[ 1] = carry[1];
312    block[ 2] = carry[2];
313    block[ 3] = carry[3];
314    block[ 4] = 0;
315    block[ 5] = 0;
316    block[ 6] = 0;
317    block[ 7] = 0;
318    block[ 8] = 0;
319    block[ 9] = 0;
320    block[10] = 0;
321    block[11] = 0;
322    block[12] = 0;
323    block[13] = 0;
324    block[14] = 0;
325    block[15] = 0;
326  }
327
328  return new_len;
329}
330
331DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u32 append_len)
332{
333  u32 in0 = append[0];
334  u32 in1 = append[1];
335  u32 in2 = append[2];
336  u32 in3 = append[3];
337  u32 in4 = append[4];
338
339  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
340  const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
341  const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
342  const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
343  const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
344  const u32 tmp4 = hc_bytealign_be (in3, in4, offset);
345  const u32 tmp5 = hc_bytealign_be (in4,   0, offset);
346  #endif
347
348  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
349
350  #if defined IS_NV
351  const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
352  #endif
353
354  #if (defined IS_AMD || defined IS_HIP)
355  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
356  #endif
357
358  const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
359  const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
360  const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
361  const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
362  const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
363  const u32 tmp5 = hc_byte_perm_S (0,   in4, selector);
364  #endif
365
366  switch (offset / 4)
367  {
368    case  0:  block[ 0] |= tmp0;
369              block[ 1]  = tmp1;
370              block[ 2]  = tmp2;
371              block[ 3]  = tmp3;
372              block[ 4]  = tmp4;
373              block[ 5]  = tmp5;
374              break;
375    case  1:  block[ 1] |= tmp0;
376              block[ 2]  = tmp1;
377              block[ 3]  = tmp2;
378              block[ 4]  = tmp3;
379              block[ 5]  = tmp4;
380              block[ 6]  = tmp5;
381              break;
382    case  2:  block[ 2] |= tmp0;
383              block[ 3]  = tmp1;
384              block[ 4]  = tmp2;
385              block[ 5]  = tmp3;
386              block[ 6]  = tmp4;
387              block[ 7]  = tmp5;
388              break;
389    case  3:  block[ 3] |= tmp0;
390              block[ 4]  = tmp1;
391              block[ 5]  = tmp2;
392              block[ 6]  = tmp3;
393              block[ 7]  = tmp4;
394              block[ 8]  = tmp5;
395              break;
396    case  4:  block[ 4] |= tmp0;
397              block[ 5]  = tmp1;
398              block[ 6]  = tmp2;
399              block[ 7]  = tmp3;
400              block[ 8]  = tmp4;
401              block[ 9]  = tmp5;
402              break;
403    case  5:  block[ 5] |= tmp0;
404              block[ 6]  = tmp1;
405              block[ 7]  = tmp2;
406              block[ 8]  = tmp3;
407              block[ 9]  = tmp4;
408              block[10]  = tmp5;
409              break;
410    case  6:  block[ 6] |= tmp0;
411              block[ 7]  = tmp1;
412              block[ 8]  = tmp2;
413              block[ 9]  = tmp3;
414              block[10]  = tmp4;
415              block[11]  = tmp5;
416              break;
417    case  7:  block[ 7] |= tmp0;
418              block[ 8]  = tmp1;
419              block[ 9]  = tmp2;
420              block[10]  = tmp3;
421              block[11]  = tmp4;
422              block[12]  = tmp5;
423              break;
424    case  8:  block[ 8] |= tmp0;
425              block[ 9]  = tmp1;
426              block[10]  = tmp2;
427              block[11]  = tmp3;
428              block[12]  = tmp4;
429              block[13]  = tmp5;
430              break;
431    case  9:  block[ 9] |= tmp0;
432              block[10]  = tmp1;
433              block[11]  = tmp2;
434              block[12]  = tmp3;
435              block[13]  = tmp4;
436              block[14]  = tmp5;
437              break;
438    case 10:  block[10] |= tmp0;
439              block[11]  = tmp1;
440              block[12]  = tmp2;
441              block[13]  = tmp3;
442              block[14]  = tmp4;
443              block[15]  = tmp5;
444              break;
445    case 11:  block[11] |= tmp0;
446              block[12]  = tmp1;
447              block[13]  = tmp2;
448              block[14]  = tmp3;
449              block[15]  = tmp4;
450              break;
451    case 12:  block[12] |= tmp0;
452              block[13]  = tmp1;
453              block[14]  = tmp2;
454              block[15]  = tmp3;
455              break;
456    case 13:  block[13] |= tmp0;
457              block[14]  = tmp1;
458              block[15]  = tmp2;
459              break;
460    case 14:  block[14] |= tmp0;
461              block[15]  = tmp1;
462              break;
463    case 15:  block[15] |= tmp0;
464              break;
465  }
466
467  u32 new_len = offset + append_len;
468
469  return new_len;
470}
471
472DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const u32 append_len, u32 *digest)
473{
474  u32 in0 = append[0];
475  u32 in1 = append[1];
476  u32 in2 = append[2];
477  u32 in3 = append[3];
478  u32 in4 = append[4];
479
480  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
481  const u32 tmp0 = hc_bytealign_be (  0, in0, offset);
482  const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
483  const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
484  const u32 tmp3 = hc_bytealign_be (in2, in3, offset);
485  const u32 tmp4 = hc_bytealign_be (in3, in4, offset);
486  const u32 tmp5 = hc_bytealign_be (in4,   0, offset);
487  #endif
488
489  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
490
491  #if defined IS_NV
492  const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
493  #endif
494
495  #if (defined IS_AMD || defined IS_HIP)
496  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
497  #endif
498
499  const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
500  const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
501  const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
502  const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
503  const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
504  const u32 tmp5 = hc_byte_perm_S (0,   in4, selector);
505  #endif
506
507  u32 carry[5] = { 0 };
508
509  switch (offset / 4)
510  {
511    case  0:  block[ 0] |= tmp0;
512              block[ 1]  = tmp1;
513              block[ 2]  = tmp2;
514              block[ 3]  = tmp3;
515              block[ 4]  = tmp4;
516              block[ 5]  = tmp5;
517              break;
518    case  1:  block[ 1] |= tmp0;
519              block[ 2]  = tmp1;
520              block[ 3]  = tmp2;
521              block[ 4]  = tmp3;
522              block[ 5]  = tmp4;
523              block[ 6]  = tmp5;
524              break;
525    case  2:  block[ 2] |= tmp0;
526              block[ 3]  = tmp1;
527              block[ 4]  = tmp2;
528              block[ 5]  = tmp3;
529              block[ 6]  = tmp4;
530              block[ 7]  = tmp5;
531              break;
532    case  3:  block[ 3] |= tmp0;
533              block[ 4]  = tmp1;
534              block[ 5]  = tmp2;
535              block[ 6]  = tmp3;
536              block[ 7]  = tmp4;
537              block[ 8]  = tmp5;
538              break;
539    case  4:  block[ 4] |= tmp0;
540              block[ 5]  = tmp1;
541              block[ 6]  = tmp2;
542              block[ 7]  = tmp3;
543              block[ 8]  = tmp4;
544              block[ 9]  = tmp5;
545              break;
546    case  5:  block[ 5] |= tmp0;
547              block[ 6]  = tmp1;
548              block[ 7]  = tmp2;
549              block[ 8]  = tmp3;
550              block[ 9]  = tmp4;
551              block[10]  = tmp5;
552              break;
553    case  6:  block[ 6] |= tmp0;
554              block[ 7]  = tmp1;
555              block[ 8]  = tmp2;
556              block[ 9]  = tmp3;
557              block[10]  = tmp4;
558              block[11]  = tmp5;
559              break;
560    case  7:  block[ 7] |= tmp0;
561              block[ 8]  = tmp1;
562              block[ 9]  = tmp2;
563              block[10]  = tmp3;
564              block[11]  = tmp4;
565              block[12]  = tmp5;
566              break;
567    case  8:  block[ 8] |= tmp0;
568              block[ 9]  = tmp1;
569              block[10]  = tmp2;
570              block[11]  = tmp3;
571              block[12]  = tmp4;
572              block[13]  = tmp5;
573              break;
574    case  9:  block[ 9] |= tmp0;
575              block[10]  = tmp1;
576              block[11]  = tmp2;
577              block[12]  = tmp3;
578              block[13]  = tmp4;
579              block[14]  = tmp5;
580              break;
581    case 10:  block[10] |= tmp0;
582              block[11]  = tmp1;
583              block[12]  = tmp2;
584              block[13]  = tmp3;
585              block[14]  = tmp4;
586              block[15]  = tmp5;
587              break;
588    case 11:  block[11] |= tmp0;
589              block[12]  = tmp1;
590              block[13]  = tmp2;
591              block[14]  = tmp3;
592              block[15]  = tmp4;
593              carry[ 0]  = tmp5;
594              break;
595    case 12:  block[12] |= tmp0;
596              block[13]  = tmp1;
597              block[14]  = tmp2;
598              block[15]  = tmp3;
599              carry[ 0]  = tmp4;
600              carry[ 1]  = tmp5;
601              break;
602    case 13:  block[13] |= tmp0;
603              block[14]  = tmp1;
604              block[15]  = tmp2;
605              carry[ 0]  = tmp3;
606              carry[ 1]  = tmp4;
607              carry[ 2]  = tmp5;
608              break;
609    case 14:  block[14] |= tmp0;
610              block[15]  = tmp1;
611              carry[ 0]  = tmp2;
612              carry[ 1]  = tmp3;
613              carry[ 2]  = tmp4;
614              carry[ 3]  = tmp5;
615              break;
616    case 15:  block[15] |= tmp0;
617              carry[ 0]  = tmp1;
618              carry[ 1]  = tmp2;
619              carry[ 2]  = tmp3;
620              carry[ 3]  = tmp4;
621              carry[ 4]  = tmp5;
622              break;
623  }
624
625  u32 new_len = offset + append_len;
626
627  if (new_len >= 64)
628  {
629    new_len -= 64;
630
631    sha256_transform (block + 0, block + 4, block + 8, block + 12, digest);
632
633    block[ 0] = carry[0];
634    block[ 1] = carry[1];
635    block[ 2] = carry[2];
636    block[ 3] = carry[3];
637    block[ 4] = carry[4];
638    block[ 5] = 0;
639    block[ 6] = 0;
640    block[ 7] = 0;
641    block[ 8] = 0;
642    block[ 9] = 0;
643    block[10] = 0;
644    block[11] = 0;
645    block[12] = 0;
646    block[13] = 0;
647    block[14] = 0;
648    block[15] = 0;
649  }
650
651  return new_len;
652}
653
654DECLSPEC void truncate_block_5x4_be_S (u32 *w0, const u32 len)
655{
656  switch (len)
657  {
658    case  0:
659      w0[0]  = 0;
660      w0[1]  = 0;
661      w0[2]  = 0;
662      w0[3]  = 0;
663      w0[4]  = 0;
664      break;
665
666    case  1:
667      w0[0] &= 0xff000000;
668      w0[1]  = 0;
669      w0[2]  = 0;
670      w0[3]  = 0;
671      w0[4]  = 0;
672      break;
673
674    case  2:
675      w0[0] &= 0xffff0000;
676      w0[1]  = 0;
677      w0[2]  = 0;
678      w0[3]  = 0;
679      w0[4]  = 0;
680      break;
681
682    case  3:
683      w0[0] &= 0xffffff00;
684      w0[1]  = 0;
685      w0[2]  = 0;
686      w0[3]  = 0;
687      w0[4]  = 0;
688      break;
689
690    case  4:
691      w0[1]  = 0;
692      w0[2]  = 0;
693      w0[3]  = 0;
694      w0[4]  = 0;
695      break;
696
697    case  5:
698      w0[1] &= 0xff000000;
699      w0[2]  = 0;
700      w0[3]  = 0;
701      w0[4]  = 0;
702      break;
703
704    case  6:
705      w0[1] &= 0xffff0000;
706      w0[2]  = 0;
707      w0[3]  = 0;
708      w0[4]  = 0;
709      break;
710
711    case  7:
712      w0[1] &= 0xffffff00;
713      w0[2]  = 0;
714      w0[3]  = 0;
715      w0[4]  = 0;
716      break;
717
718    case  8:
719      w0[2]  = 0;
720      w0[3]  = 0;
721      w0[4]  = 0;
722      break;
723
724    case  9:
725      w0[2] &= 0xff000000;
726      w0[3]  = 0;
727      w0[4]  = 0;
728      break;
729
730    case 10:
731      w0[2] &= 0xffff0000;
732      w0[3]  = 0;
733      w0[4]  = 0;
734      break;
735
736    case 11:
737      w0[2] &= 0xffffff00;
738      w0[3]  = 0;
739      w0[4]  = 0;
740      break;
741
742    case 12:
743      w0[3]  = 0;
744      w0[4]  = 0;
745      break;
746
747    case 13:
748      w0[3] &= 0xff000000;
749      w0[4]  = 0;
750      break;
751
752    case 14:
753      w0[3] &= 0xffff0000;
754      w0[4]  = 0;
755      break;
756
757    case 15:
758      w0[3] &= 0xffffff00;
759      w0[4]  = 0;
760      break;
761
762    case 16:
763      w0[4]  = 0;
764      break;
765
766    case 17:
767      w0[4] &= 0xff000000;
768      break;
769
770    case 18:
771      w0[4] &= 0xffff0000;
772      break;
773
774    case 19:
775      w0[4] &= 0xffffff00;
776      break;
777  }
778}
779
780DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u32 append_len)
781{
782  u32 in0 = append[0];
783  u32 in1 = append[1];
784  u32 in2 = append[2];
785  u32 in3 = append[3];
786
787  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
788  const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
789  const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
790  const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
791  const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
792  const u32 tmp4 = hc_bytealign_be_S (in3,   0, offset);
793  #endif
794
795  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
796
797  #if defined IS_NV
798  const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
799  #endif
800
801  #if (defined IS_AMD || defined IS_HIP)
802  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
803  #endif
804
805  const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
806  const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
807  const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
808  const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
809  const u32 tmp4 = hc_byte_perm_S (0,   in3, selector);
810  #endif
811
812  switch (offset / 4)
813  {
814    case  0:  block[ 0] |= tmp0;
815              block[ 1]  = tmp1;
816              block[ 2]  = tmp2;
817              block[ 3]  = tmp3;
818              block[ 4]  = tmp4;
819              break;
820    case  1:  block[ 1] |= tmp0;
821              block[ 2]  = tmp1;
822              block[ 3]  = tmp2;
823              block[ 4]  = tmp3;
824              block[ 5]  = tmp4;
825              break;
826    case  2:  block[ 2] |= tmp0;
827              block[ 3]  = tmp1;
828              block[ 4]  = tmp2;
829              block[ 5]  = tmp3;
830              block[ 6]  = tmp4;
831              break;
832    case  3:  block[ 3] |= tmp0;
833              block[ 4]  = tmp1;
834              block[ 5]  = tmp2;
835              block[ 6]  = tmp3;
836              block[ 7]  = tmp4;
837              break;
838    case  4:  block[ 4] |= tmp0;
839              block[ 5]  = tmp1;
840              block[ 6]  = tmp2;
841              block[ 7]  = tmp3;
842              block[ 8]  = tmp4;
843              break;
844    case  5:  block[ 5] |= tmp0;
845              block[ 6]  = tmp1;
846              block[ 7]  = tmp2;
847              block[ 8]  = tmp3;
848              block[ 9]  = tmp4;
849              break;
850    case  6:  block[ 6] |= tmp0;
851              block[ 7]  = tmp1;
852              block[ 8]  = tmp2;
853              block[ 9]  = tmp3;
854              block[10]  = tmp4;
855              break;
856    case  7:  block[ 7] |= tmp0;
857              block[ 8]  = tmp1;
858              block[ 9]  = tmp2;
859              block[10]  = tmp3;
860              block[11]  = tmp4;
861              break;
862    case  8:  block[ 8] |= tmp0;
863              block[ 9]  = tmp1;
864              block[10]  = tmp2;
865              block[11]  = tmp3;
866              block[12]  = tmp4;
867              break;
868    case  9:  block[ 9] |= tmp0;
869              block[10]  = tmp1;
870              block[11]  = tmp2;
871              block[12]  = tmp3;
872              block[13]  = tmp4;
873              break;
874    case 10:  block[10] |= tmp0;
875              block[11]  = tmp1;
876              block[12]  = tmp2;
877              block[13]  = tmp3;
878              block[14]  = tmp4;
879              break;
880    case 11:  block[11] |= tmp0;
881              block[12]  = tmp1;
882              block[13]  = tmp2;
883              block[14]  = tmp3;
884              block[15]  = tmp4;
885              break;
886    case 12:  block[12] |= tmp0;
887              block[13]  = tmp1;
888              block[14]  = tmp2;
889              block[15]  = tmp3;
890              block[16]  = tmp4;
891              break;
892    case 13:  block[13] |= tmp0;
893              block[14]  = tmp1;
894              block[15]  = tmp2;
895              block[16]  = tmp3;
896              block[17]  = tmp4;
897              break;
898    case 14:  block[14] |= tmp0;
899              block[15]  = tmp1;
900              block[16]  = tmp2;
901              block[17]  = tmp3;
902              block[18]  = tmp4;
903              break;
904    case 15:  block[15] |= tmp0;
905              block[16]  = tmp1;
906              block[17]  = tmp2;
907              block[18]  = tmp3;
908              block[19]  = tmp4;
909              break;
910    case 16:  block[16] |= tmp0;
911              block[17]  = tmp1;
912              block[18]  = tmp2;
913              block[19]  = tmp3;
914              block[20]  = tmp4;
915              break;
916    case 17:  block[17] |= tmp0;
917              block[18]  = tmp1;
918              block[19]  = tmp2;
919              block[20]  = tmp3;
920              block[21]  = tmp4;
921              break;
922    case 18:  block[18] |= tmp0;
923              block[19]  = tmp1;
924              block[20]  = tmp2;
925              block[21]  = tmp3;
926              block[22]  = tmp4;
927              break;
928    case 19:  block[19] |= tmp0;
929              block[20]  = tmp1;
930              block[21]  = tmp2;
931              block[22]  = tmp3;
932              block[23]  = tmp4;
933              break;
934    case 20:  block[20] |= tmp0;
935              block[21]  = tmp1;
936              block[22]  = tmp2;
937              block[23]  = tmp3;
938              block[24]  = tmp4;
939              break;
940  }
941
942  return offset + append_len;
943}
944
945DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, const u32 append_len)
946{
947  u32 in0 = append[0];
948  u32 in1 = append[1];
949  u32 in2 = append[2];
950  u32 in3 = append[3];
951  u32 in4 = 0x80000000;
952
953  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
954  const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
955  const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
956  const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
957  const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
958  const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
959  #endif
960
961  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
962
963  #if defined IS_NV
964  const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
965  #endif
966
967  #if (defined IS_AMD || defined IS_HIP)
968  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
969  #endif
970
971  const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
972  const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
973  const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
974  const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
975  const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
976  #endif
977
978  switch (offset / 4)
979  {
980    case  0:  block[ 0] |= tmp0;
981              block[ 1]  = tmp1;
982              block[ 2]  = tmp2;
983              block[ 3]  = tmp3;
984              block[ 4]  = tmp4;
985              break;
986    case  1:  block[ 1] |= tmp0;
987              block[ 2]  = tmp1;
988              block[ 3]  = tmp2;
989              block[ 4]  = tmp3;
990              block[ 5]  = tmp4;
991              break;
992    case  2:  block[ 2] |= tmp0;
993              block[ 3]  = tmp1;
994              block[ 4]  = tmp2;
995              block[ 5]  = tmp3;
996              block[ 6]  = tmp4;
997              break;
998    case  3:  block[ 3] |= tmp0;
999              block[ 4]  = tmp1;
1000              block[ 5]  = tmp2;
1001              block[ 6]  = tmp3;
1002              block[ 7]  = tmp4;
1003              break;
1004    case  4:  block[ 4] |= tmp0;
1005              block[ 5]  = tmp1;
1006              block[ 6]  = tmp2;
1007              block[ 7]  = tmp3;
1008              block[ 8]  = tmp4;
1009              break;
1010    case  5:  block[ 5] |= tmp0;
1011              block[ 6]  = tmp1;
1012              block[ 7]  = tmp2;
1013              block[ 8]  = tmp3;
1014              block[ 9]  = tmp4;
1015              break;
1016    case  6:  block[ 6] |= tmp0;
1017              block[ 7]  = tmp1;
1018              block[ 8]  = tmp2;
1019              block[ 9]  = tmp3;
1020              block[10]  = tmp4;
1021              break;
1022    case  7:  block[ 7] |= tmp0;
1023              block[ 8]  = tmp1;
1024              block[ 9]  = tmp2;
1025              block[10]  = tmp3;
1026              block[11]  = tmp4;
1027              break;
1028    case  8:  block[ 8] |= tmp0;
1029              block[ 9]  = tmp1;
1030              block[10]  = tmp2;
1031              block[11]  = tmp3;
1032              block[12]  = tmp4;
1033              break;
1034    case  9:  block[ 9] |= tmp0;
1035              block[10]  = tmp1;
1036              block[11]  = tmp2;
1037              block[12]  = tmp3;
1038              block[13]  = tmp4;
1039              break;
1040    case 10:  block[10] |= tmp0;
1041              block[11]  = tmp1;
1042              block[12]  = tmp2;
1043              block[13]  = tmp3;
1044              block[14]  = tmp4;
1045              break;
1046    case 11:  block[11] |= tmp0;
1047              block[12]  = tmp1;
1048              block[13]  = tmp2;
1049              block[14]  = tmp3;
1050              block[15]  = tmp4;
1051              break;
1052    case 12:  block[12] |= tmp0;
1053              block[13]  = tmp1;
1054              block[14]  = tmp2;
1055              block[15]  = tmp3;
1056              block[16]  = tmp4;
1057              break;
1058    case 13:  block[13] |= tmp0;
1059              block[14]  = tmp1;
1060              block[15]  = tmp2;
1061              block[16]  = tmp3;
1062              block[17]  = tmp4;
1063              break;
1064    case 14:  block[14] |= tmp0;
1065              block[15]  = tmp1;
1066              block[16]  = tmp2;
1067              block[17]  = tmp3;
1068              block[18]  = tmp4;
1069              break;
1070    case 15:  block[15] |= tmp0;
1071              block[16]  = tmp1;
1072              block[17]  = tmp2;
1073              block[18]  = tmp3;
1074              block[19]  = tmp4;
1075              break;
1076    case 16:  block[16] |= tmp0;
1077              block[17]  = tmp1;
1078              block[18]  = tmp2;
1079              block[19]  = tmp3;
1080              block[20]  = tmp4;
1081              break;
1082    case 17:  block[17] |= tmp0;
1083              block[18]  = tmp1;
1084              block[19]  = tmp2;
1085              block[20]  = tmp3;
1086              block[21]  = tmp4;
1087              break;
1088    case 18:  block[18] |= tmp0;
1089              block[19]  = tmp1;
1090              block[20]  = tmp2;
1091              block[21]  = tmp3;
1092              block[22]  = tmp4;
1093              break;
1094    case 19:  block[19] |= tmp0;
1095              block[20]  = tmp1;
1096              block[21]  = tmp2;
1097              block[22]  = tmp3;
1098              block[23]  = tmp4;
1099              break;
1100    case 20:  block[20] |= tmp0;
1101              block[21]  = tmp1;
1102              block[22]  = tmp2;
1103              block[23]  = tmp3;
1104              block[24]  = tmp4;
1105              break;
1106  }
1107
1108  return offset + append_len;
1109}
1110
1111DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u32 append_len)
1112{
1113  u32 in0 = append[0];
1114  u32 in1 = append[1];
1115  u32 in2 = append[2];
1116  u32 in3 = append[3];
1117  u32 in4 = append[4];
1118
1119  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
1120  const u32 tmp0 = hc_bytealign_be_S (  0, in0, offset);
1121  const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
1122  const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
1123  const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset);
1124  const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
1125  const u32 tmp5 = hc_bytealign_be_S (in4,   0, offset);
1126  #endif
1127
1128  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
1129
1130  #if defined IS_NV
1131  const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
1132  #endif
1133
1134  #if (defined IS_AMD || defined IS_HIP)
1135  const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
1136  #endif
1137
1138  const u32 tmp0 = hc_byte_perm_S (in0,   0, selector);
1139  const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
1140  const u32 tmp2 = hc_byte_perm_S (in2, in1, selector);
1141  const u32 tmp3 = hc_byte_perm_S (in3, in2, selector);
1142  const u32 tmp4 = hc_byte_perm_S (in4, in3, selector);
1143  const u32 tmp5 = hc_byte_perm_S (0,   in4, selector);
1144  #endif
1145
1146  switch (offset / 4)
1147  {
1148    case  0:  block[ 0] |= tmp0;
1149              block[ 1]  = tmp1;
1150              block[ 2]  = tmp2;
1151              block[ 3]  = tmp3;
1152              block[ 4]  = tmp4;
1153              block[ 5]  = tmp5;
1154              break;
1155    case  1:  block[ 1] |= tmp0;
1156              block[ 2]  = tmp1;
1157              block[ 3]  = tmp2;
1158              block[ 4]  = tmp3;
1159              block[ 5]  = tmp4;
1160              block[ 6]  = tmp5;
1161              break;
1162    case  2:  block[ 2] |= tmp0;
1163              block[ 3]  = tmp1;
1164              block[ 4]  = tmp2;
1165              block[ 5]  = tmp3;
1166              block[ 6]  = tmp4;
1167              block[ 7]  = tmp5;
1168              break;
1169    case  3:  block[ 3] |= tmp0;
1170              block[ 4]  = tmp1;
1171              block[ 5]  = tmp2;
1172              block[ 6]  = tmp3;
1173              block[ 7]  = tmp4;
1174              block[ 8]  = tmp5;
1175              break;
1176  }
1177
1178  return offset + append_len;
1179}
1180
1181KERNEL_FQ void m07400_init (KERN_ATTR_TMPS (sha256crypt_tmp_t))
1182{
1183  /**
1184   * base
1185   */
1186
1187  const u64 gid = get_global_id (0);
1188
1189  if (gid >= gid_max) return;
1190
1191  u32 w0[4];
1192
1193  w0[0] = hc_swap32_S (pws[gid].i[0]);
1194  w0[1] = hc_swap32_S (pws[gid].i[1]);
1195  w0[2] = hc_swap32_S (pws[gid].i[2]);
1196  w0[3] = hc_swap32_S (pws[gid].i[3]);
1197
1198  const u32 pw_len = MIN (pws[gid].pw_len, 15);
1199
1200  /**
1201   * salt
1202   */
1203
1204  u32 salt_buf[5];
1205
1206  salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]);
1207  salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]);
1208  salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[2]);
1209  salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[3]);
1210  salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[4]);
1211
1212  const u32 salt_len = MIN (salt_bufs[SALT_POS].salt_len, 20);
1213
1214  /**
1215   * buffers
1216   */
1217
1218  u32 block_len;     // never reaches > 64
1219  u32 transform_len; // required for w[15] = len * 8
1220
1221  u32 block[16];
1222
1223  block[ 0] = 0;
1224  block[ 1] = 0;
1225  block[ 2] = 0;
1226  block[ 3] = 0;
1227  block[ 4] = 0;
1228  block[ 5] = 0;
1229  block[ 6] = 0;
1230  block[ 7] = 0;
1231  block[ 8] = 0;
1232  block[ 9] = 0;
1233  block[10] = 0;
1234  block[11] = 0;
1235  block[12] = 0;
1236  block[13] = 0;
1237  block[14] = 0;
1238  block[15] = 0;
1239
1240  u32 alt_result[8];
1241  u32 p_bytes[8];
1242  u32 s_bytes[8];
1243
1244  /* Prepare for the real work.  */
1245
1246  block_len = 0;
1247
1248  /* Add key.  */
1249
1250  block_len = memcat16 (block, block_len, w0, pw_len);
1251
1252  /* Add salt.  */
1253
1254  block_len = memcat16s (block, block_len, salt_buf, salt_len);
1255
1256  /* Add key again.  */
1257
1258  block_len = memcat16 (block, block_len, w0, pw_len);
1259
1260  append_0x80_1x16 (block, block_len ^ 3);
1261
1262  block[15] = block_len * 8;
1263
1264  init_ctx (alt_result);
1265
1266  sha256_transform (block + 0, block + 4, block + 8, block + 12, alt_result);
1267
1268  u32 alt_result_tmp[8];
1269
1270  alt_result_tmp[0] = alt_result[0];
1271  alt_result_tmp[1] = alt_result[1];
1272  alt_result_tmp[2] = alt_result[2];
1273  alt_result_tmp[3] = alt_result[3];
1274  alt_result_tmp[4] = 0;
1275  alt_result_tmp[5] = 0;
1276  alt_result_tmp[6] = 0;
1277  alt_result_tmp[7] = 0;
1278
1279  truncate_block_4x4_be_S (alt_result_tmp, pw_len);
1280
1281  block[ 0] = 0;
1282  block[ 1] = 0;
1283  block[ 2] = 0;
1284  block[ 3] = 0;
1285  block[ 4] = 0;
1286  block[ 5] = 0;
1287  block[ 6] = 0;
1288  block[ 7] = 0;
1289  block[ 8] = 0;
1290  block[ 9] = 0;
1291  block[10] = 0;
1292  block[11] = 0;
1293  block[12] = 0;
1294  block[13] = 0;
1295  block[14] = 0;
1296  block[15] = 0;
1297
1298  block_len = 0;
1299
1300  /* Add the key string.  */
1301
1302  block_len = memcat16 (block, block_len, w0, pw_len);
1303
1304  /* The last part is the salt string.  This must be at most 8
1305     characters and it ends at the first `$' character (for
1306     compatibility with existing implementations).  */
1307
1308  block_len = memcat16s (block, block_len, salt_buf, salt_len);
1309
1310  /* Now get result of this (32 bytes) and add it to the other
1311     context.  */
1312
1313  block_len = memcat16 (block, block_len, alt_result_tmp, pw_len);
1314
1315  transform_len = block_len;
1316
1317  /* Take the binary representation of the length of the key and for every
1318     1 add the alternate sum, for every 0 the key.  */
1319
1320  alt_result_tmp[0] = alt_result[0];
1321  alt_result_tmp[1] = alt_result[1];
1322  alt_result_tmp[2] = alt_result[2];
1323  alt_result_tmp[3] = alt_result[3];
1324  alt_result_tmp[4] = alt_result[4];
1325  alt_result_tmp[5] = alt_result[5];
1326  alt_result_tmp[6] = alt_result[6];
1327  alt_result_tmp[7] = alt_result[7];
1328
1329  init_ctx (alt_result);
1330
1331  for (u32 j = pw_len; j; j >>= 1)
1332  {
1333    if (j & 1)
1334    {
1335      block_len = memcat16c (block, block_len, &alt_result_tmp[0], 16, alt_result);
1336      block_len = memcat16c (block, block_len, &alt_result_tmp[4], 16, alt_result);
1337
1338      transform_len += 32;
1339    }
1340    else
1341    {
1342      block_len = memcat16c (block, block_len, w0, pw_len, alt_result);
1343
1344      transform_len += pw_len;
1345    }
1346  }
1347
1348  append_0x80_1x16 (block, block_len ^ 3);
1349
1350  if (block_len >= 56)
1351  {
1352    sha256_transform (block + 0, block + 4, block + 8, block + 12, alt_result);
1353
1354    block[ 0] = 0;
1355    block[ 1] = 0;
1356    block[ 2] = 0;
1357    block[ 3] = 0;
1358    block[ 4] = 0;
1359    block[ 5] = 0;
1360    block[ 6] = 0;
1361    block[ 7] = 0;
1362    block[ 8] = 0;
1363    block[ 9] = 0;
1364    block[10] = 0;
1365    block[11] = 0;
1366    block[12] = 0;
1367    block[13] = 0;
1368    block[14] = 0;
1369    block[15] = 0;
1370  }
1371
1372  block[15] = transform_len * 8;
1373
1374  sha256_transform (block + 0, block + 4, block + 8, block + 12, alt_result);
1375
1376  tmps[gid].alt_result[0] = alt_result[0];
1377  tmps[gid].alt_result[1] = alt_result[1];
1378  tmps[gid].alt_result[2] = alt_result[2];
1379  tmps[gid].alt_result[3] = alt_result[3];
1380  tmps[gid].alt_result[4] = alt_result[4];
1381  tmps[gid].alt_result[5] = alt_result[5];
1382  tmps[gid].alt_result[6] = alt_result[6];
1383  tmps[gid].alt_result[7] = alt_result[7];
1384
1385  /* Start computation of P byte sequence.  */
1386
1387  transform_len = 0;
1388
1389  block[ 0] = 0;
1390  block[ 1] = 0;
1391  block[ 2] = 0;
1392  block[ 3] = 0;
1393  block[ 4] = 0;
1394  block[ 5] = 0;
1395  block[ 6] = 0;
1396  block[ 7] = 0;
1397  block[ 8] = 0;
1398  block[ 9] = 0;
1399  block[10] = 0;
1400  block[11] = 0;
1401  block[12] = 0;
1402  block[13] = 0;
1403  block[14] = 0;
1404  block[15] = 0;
1405
1406  block_len = 0;
1407
1408  /* For every character in the password add the entire password.  */
1409
1410  init_ctx (p_bytes);
1411
1412  for (u32 j = 0; j < pw_len; j++)
1413  {
1414    block_len = memcat16c (block, block_len, w0, pw_len, p_bytes);
1415
1416    transform_len += pw_len;
1417  }
1418
1419  /* Finish the digest.  */
1420
1421  append_0x80_1x16 (block, block_len ^ 3);
1422
1423  if (block_len >= 56)
1424  {
1425    sha256_transform (block + 0, block + 4, block + 8, block + 12, p_bytes);
1426
1427    block[ 0] = 0;
1428    block[ 1] = 0;
1429    block[ 2] = 0;
1430    block[ 3] = 0;
1431    block[ 4] = 0;
1432    block[ 5] = 0;
1433    block[ 6] = 0;
1434    block[ 7] = 0;
1435    block[ 8] = 0;
1436    block[ 9] = 0;
1437    block[10] = 0;
1438    block[11] = 0;
1439    block[12] = 0;
1440    block[13] = 0;
1441    block[14] = 0;
1442    block[15] = 0;
1443  }
1444
1445  block[15] = transform_len * 8;
1446
1447  sha256_transform (block + 0, block + 4, block + 8, block + 12, p_bytes);
1448
1449  truncate_block_4x4_be_S (p_bytes, pw_len);
1450
1451  tmps[gid].p_bytes[0] = p_bytes[0];
1452  tmps[gid].p_bytes[1] = p_bytes[1];
1453  tmps[gid].p_bytes[2] = p_bytes[2];
1454  tmps[gid].p_bytes[3] = p_bytes[3];
1455
1456  /* Start computation of S byte sequence.  */
1457
1458  transform_len = 0;
1459
1460  block[ 0] = 0;
1461  block[ 1] = 0;
1462  block[ 2] = 0;
1463  block[ 3] = 0;
1464  block[ 4] = 0;
1465  block[ 5] = 0;
1466  block[ 6] = 0;
1467  block[ 7] = 0;
1468  block[ 8] = 0;
1469  block[ 9] = 0;
1470  block[10] = 0;
1471  block[11] = 0;
1472  block[12] = 0;
1473  block[13] = 0;
1474  block[14] = 0;
1475  block[15] = 0;
1476
1477  block_len = 0;
1478
1479  /* For every character in the password add the entire password.  */
1480
1481  init_ctx (s_bytes);
1482
1483  for (u32 j = 0; j < 16 + (alt_result[0] >> 24); j++)
1484  {
1485    block_len = memcat16sc (block, block_len, salt_buf, salt_len, s_bytes);
1486
1487    transform_len += salt_len;
1488  }
1489
1490  /* Finish the digest.  */
1491
1492  append_0x80_1x16 (block, block_len ^ 3);
1493
1494  if (block_len >= 56)
1495  {
1496    sha256_transform (block + 0, block + 4, block + 8, block + 12, s_bytes);
1497
1498    block[ 0] = 0;
1499    block[ 1] = 0;
1500    block[ 2] = 0;
1501    block[ 3] = 0;
1502    block[ 4] = 0;
1503    block[ 5] = 0;
1504    block[ 6] = 0;
1505    block[ 7] = 0;
1506    block[ 8] = 0;
1507    block[ 9] = 0;
1508    block[10] = 0;
1509    block[11] = 0;
1510    block[12] = 0;
1511    block[13] = 0;
1512    block[14] = 0;
1513    block[15] = 0;
1514  }
1515
1516  block[15] = transform_len * 8;
1517
1518  sha256_transform (block + 0, block + 4, block + 8, block + 12, s_bytes);
1519
1520  truncate_block_5x4_be_S (s_bytes, salt_len);
1521
1522  tmps[gid].s_bytes[0] = s_bytes[0];
1523  tmps[gid].s_bytes[1] = s_bytes[1];
1524  tmps[gid].s_bytes[2] = s_bytes[2];
1525  tmps[gid].s_bytes[3] = s_bytes[3];
1526  tmps[gid].s_bytes[4] = s_bytes[4];
1527}
1528
1529KERNEL_FQ void m07400_loop (KERN_ATTR_TMPS (sha256crypt_tmp_t))
1530{
1531  /**
1532   * base
1533   */
1534
1535  const u64 gid = get_global_id (0);
1536
1537  if (gid >= gid_max) return;
1538
1539  const u32 pw_len = MIN (pws[gid].pw_len, 15);
1540
1541  /**
1542   * base
1543   */
1544
1545  u32 p_bytes[4];
1546
1547  p_bytes[0] = tmps[gid].p_bytes[0];
1548  p_bytes[1] = tmps[gid].p_bytes[1];
1549  p_bytes[2] = tmps[gid].p_bytes[2];
1550  p_bytes[3] = tmps[gid].p_bytes[3];
1551
1552  u32 s_bytes[5];
1553
1554  s_bytes[0] = tmps[gid].s_bytes[0];
1555  s_bytes[1] = tmps[gid].s_bytes[1];
1556  s_bytes[2] = tmps[gid].s_bytes[2];
1557  s_bytes[3] = tmps[gid].s_bytes[3];
1558  s_bytes[4] = tmps[gid].s_bytes[4]; // 4 extra bytes for MySQL 7.5+ hashes
1559
1560  u32 alt_result[8];
1561
1562  alt_result[0] = tmps[gid].alt_result[0];
1563  alt_result[1] = tmps[gid].alt_result[1];
1564  alt_result[2] = tmps[gid].alt_result[2];
1565  alt_result[3] = tmps[gid].alt_result[3];
1566  alt_result[4] = tmps[gid].alt_result[4];
1567  alt_result[5] = tmps[gid].alt_result[5];
1568  alt_result[6] = tmps[gid].alt_result[6];
1569  alt_result[7] = tmps[gid].alt_result[7];
1570
1571  const u32 salt_len = MIN (salt_bufs[SALT_POS].salt_len, 20);
1572
1573  // just an optimization
1574
1575  u32 p_bytes_x80[4];
1576
1577  p_bytes_x80[0] = p_bytes[0];
1578  p_bytes_x80[1] = p_bytes[1];
1579  p_bytes_x80[2] = p_bytes[2];
1580  p_bytes_x80[3] = p_bytes[3];
1581
1582  append_0x80_1x4_S (p_bytes_x80, pw_len ^ 3);
1583
1584  /* Repeatedly run the collected hash value through SHA256 to burn
1585     CPU cycles.  */
1586
1587  for (u32 i = 0, j = loop_pos; i < loop_cnt; i++, j++)
1588  {
1589    u32 tmp[8];
1590
1591    init_ctx (tmp);
1592
1593    u32 block[25];
1594
1595    u32 block_len = 0;
1596
1597    const u32 j1 = (j & 1) ? 1 : 0;
1598    const u32 j3 = (j % 3) ? 1 : 0;
1599    const u32 j7 = (j % 7) ? 1 : 0;
1600
1601    if (j1)
1602    {
1603      block[ 0] = p_bytes[0];
1604      block[ 1] = p_bytes[1];
1605      block[ 2] = p_bytes[2];
1606      block[ 3] = p_bytes[3];
1607      block[ 4] = 0;
1608      block[ 5] = 0;
1609      block[ 6] = 0;
1610      block[ 7] = 0;
1611      block[ 8] = 0;
1612      block[ 9] = 0;
1613      block[10] = 0;
1614      block[11] = 0;
1615      block[12] = 0;
1616      block[13] = 0;
1617      block[14] = 0;
1618      block[15] = 0;
1619      block[16] = 0;
1620      block[17] = 0;
1621      block[18] = 0;
1622      block[19] = 0;
1623      block[20] = 0;
1624      block[21] = 0;
1625      block[22] = 0;
1626      block[23] = 0;
1627      block[24] = 0;
1628
1629      block_len = pw_len;
1630
1631      if (j3)
1632      {
1633        block_len = memcat24 (block, block_len, s_bytes, salt_len);
1634      }
1635    }
1636    else
1637    {
1638      block[ 0] = alt_result[0];
1639      block[ 1] = alt_result[1];
1640      block[ 2] = alt_result[2];
1641      block[ 3] = alt_result[3];
1642      block[ 4] = alt_result[4];
1643      block[ 5] = alt_result[5];
1644      block[ 6] = alt_result[6];
1645      block[ 7] = alt_result[7];
1646      block[ 8] = 0;
1647      block[ 9] = 0;
1648      block[10] = 0;
1649      block[11] = 0;
1650      block[12] = 0;
1651      block[13] = 0;
1652      block[14] = 0;
1653      block[15] = 0;
1654      block[16] = 0;
1655      block[17] = 0;
1656      block[18] = 0;
1657      block[19] = 0;
1658      block[20] = 0;
1659      block[21] = 0;
1660      block[22] = 0;
1661      block[23] = 0;
1662      block[24] = 0;
1663
1664      block_len = 32;
1665
1666      if (j3)
1667      {
1668        block[ 8] = s_bytes[0];
1669        block[ 9] = s_bytes[1];
1670        block[10] = s_bytes[2];
1671        block[11] = s_bytes[3];
1672        block[12] = s_bytes[4];
1673
1674        block_len += salt_len;
1675      }
1676    }
1677
1678    if (j7)
1679    {
1680      block_len = memcat20 (block, block_len, p_bytes, pw_len);
1681    }
1682
1683    if (j1)
1684    {
1685      block_len = memcat20     (block, block_len, &alt_result[0], 16);
1686      block_len = memcat20_x80 (block, block_len, &alt_result[4], 16);
1687    }
1688    else
1689    {
1690      block_len = memcat20 (block, block_len, p_bytes_x80, pw_len);
1691    }
1692
1693    if (block_len >= 56)
1694    {
1695      sha256_transform (block + 0, block + 4, block + 8, block + 12, tmp);
1696
1697      block[ 0] = block[16];
1698      block[ 1] = block[17];
1699      block[ 2] = block[18];
1700      block[ 3] = block[19];
1701      block[ 4] = block[20];
1702      block[ 5] = block[21];
1703      block[ 6] = block[22];
1704      block[ 7] = block[23];
1705      block[ 8] = block[24];
1706      block[ 9] = 0;
1707      block[10] = 0;
1708      block[11] = 0;
1709      block[12] = 0;
1710      block[13] = 0;
1711      block[14] = 0;
1712      block[15] = 0;
1713    }
1714
1715    block[14] = 0;
1716    block[15] = block_len * 8;
1717
1718    sha256_transform (block + 0, block + 4, block + 8, block + 12, tmp);
1719
1720    alt_result[0] = tmp[0];
1721    alt_result[1] = tmp[1];
1722    alt_result[2] = tmp[2];
1723    alt_result[3] = tmp[3];
1724    alt_result[4] = tmp[4];
1725    alt_result[5] = tmp[5];
1726    alt_result[6] = tmp[6];
1727    alt_result[7] = tmp[7];
1728  }
1729
1730  tmps[gid].alt_result[0] = alt_result[0];
1731  tmps[gid].alt_result[1] = alt_result[1];
1732  tmps[gid].alt_result[2] = alt_result[2];
1733  tmps[gid].alt_result[3] = alt_result[3];
1734  tmps[gid].alt_result[4] = alt_result[4];
1735  tmps[gid].alt_result[5] = alt_result[5];
1736  tmps[gid].alt_result[6] = alt_result[6];
1737  tmps[gid].alt_result[7] = alt_result[7];
1738}
1739
1740KERNEL_FQ void m07400_comp (KERN_ATTR_TMPS (sha256crypt_tmp_t))
1741{
1742  /**
1743   * base
1744   */
1745
1746  const u64 gid = get_global_id (0);
1747
1748  if (gid >= gid_max) return;
1749
1750  const u64 lid = get_local_id (0);
1751
1752  const u32 r0 = hc_swap32_S (tmps[gid].alt_result[0]);
1753  const u32 r1 = hc_swap32_S (tmps[gid].alt_result[1]);
1754  const u32 r2 = hc_swap32_S (tmps[gid].alt_result[2]);
1755  const u32 r3 = hc_swap32_S (tmps[gid].alt_result[3]);
1756
1757  #define il_pos 0
1758
1759  #ifdef KERNEL_STATIC
1760  #include COMPARE_M
1761  #endif
1762}
1763