1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6#define NEW_SIMD_CODE
7
8#ifdef KERNEL_STATIC
9#include "inc_vendor.h"
10#include "inc_types.h"
11#include "inc_platform.cl"
12#include "inc_common.cl"
13#include "inc_simd.cl"
14#include "inc_hash_blake2b.cl"
15#endif
16
17DECLSPEC void m00600m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ())
18{
19  /**
20   * modifier
21   */
22
23  const u64 gid = get_global_id (0);
24
25  /**
26   * loop
27   */
28
29  u32 w0l = w[0];
30
31  for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
32  {
33    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
34    const u32x w0x = w0l | w0r;
35
36    u32x w0[4];
37    u32x w1[4];
38    u32x w2[4];
39    u32x w3[4];
40
41    w0[0] = w0x;
42    w0[1] = w[ 1];
43    w0[2] = w[ 2];
44    w0[3] = w[ 3];
45    w1[0] = w[ 4];
46    w1[1] = w[ 5];
47    w1[2] = w[ 6];
48    w1[3] = w[ 7];
49    w2[0] = w[ 8];
50    w2[1] = w[ 9];
51    w2[2] = w[10];
52    w2[3] = w[11];
53    w3[0] = w[12];
54    w3[1] = w[13];
55    w3[2] = w[14];
56    w3[3] = w[15];
57
58    u64x m[16];
59
60    m[ 0] = hl32_to_64 (w0[1], w0[0]);
61    m[ 1] = hl32_to_64 (w0[3], w0[2]);
62    m[ 2] = hl32_to_64 (w1[1], w1[0]);
63    m[ 3] = hl32_to_64 (w1[3], w1[2]);
64    m[ 4] = hl32_to_64 (w2[1], w2[0]);
65    m[ 5] = hl32_to_64 (w2[3], w2[2]);
66    m[ 6] = hl32_to_64 (w3[1], w3[0]);
67    m[ 7] = hl32_to_64 (w3[3], w3[2]);
68    m[ 8] = 0;
69    m[ 9] = 0;
70    m[10] = 0;
71    m[11] = 0;
72    m[12] = 0;
73    m[13] = 0;
74    m[14] = 0;
75    m[15] = 0;
76
77    u64x h[8];
78
79    h[0] = BLAKE2B_IV_00 ^ 0x01010040;
80    h[1] = BLAKE2B_IV_01;
81    h[2] = BLAKE2B_IV_02;
82    h[3] = BLAKE2B_IV_03;
83    h[4] = BLAKE2B_IV_04;
84    h[5] = BLAKE2B_IV_05;
85    h[6] = BLAKE2B_IV_06;
86    h[7] = BLAKE2B_IV_07;
87
88    blake2b_transform_vector (h, m, pw_len, BLAKE2B_FINAL);
89
90    const u32x r0 = h32_from_64 (h[0]);
91    const u32x r1 = l32_from_64 (h[0]);
92    const u32x r2 = h32_from_64 (h[1]);
93    const u32x r3 = l32_from_64 (h[1]);
94
95    COMPARE_M_SIMD (r0, r1, r2, r3);
96  }
97}
98
99DECLSPEC void m00600s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ())
100{
101  /**
102   * modifier
103   */
104
105  const u64 gid = get_global_id (0);
106
107  /**
108   * digest
109   */
110
111  const u32 search[4] =
112  {
113    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
114    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
115    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
116    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
117  };
118
119  /**
120   * loop
121   */
122
123  u32 w0l = w[0];
124
125  for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
126  {
127    const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
128    const u32x w0x = w0l | w0r;
129
130    u32x w0[4];
131    u32x w1[4];
132    u32x w2[4];
133    u32x w3[4];
134
135    w0[0] = w0x;
136    w0[1] = w[ 1];
137    w0[2] = w[ 2];
138    w0[3] = w[ 3];
139    w1[0] = w[ 4];
140    w1[1] = w[ 5];
141    w1[2] = w[ 6];
142    w1[3] = w[ 7];
143    w2[0] = w[ 8];
144    w2[1] = w[ 9];
145    w2[2] = w[10];
146    w2[3] = w[11];
147    w3[0] = w[12];
148    w3[1] = w[13];
149    w3[2] = w[14];
150    w3[3] = w[15];
151
152    u64x m[16];
153
154    m[ 0] = hl32_to_64 (w0[1], w0[0]);
155    m[ 1] = hl32_to_64 (w0[3], w0[2]);
156    m[ 2] = hl32_to_64 (w1[1], w1[0]);
157    m[ 3] = hl32_to_64 (w1[3], w1[2]);
158    m[ 4] = hl32_to_64 (w2[1], w2[0]);
159    m[ 5] = hl32_to_64 (w2[3], w2[2]);
160    m[ 6] = hl32_to_64 (w3[1], w3[0]);
161    m[ 7] = hl32_to_64 (w3[3], w3[2]);
162    m[ 8] = 0;
163    m[ 9] = 0;
164    m[10] = 0;
165    m[11] = 0;
166    m[12] = 0;
167    m[13] = 0;
168    m[14] = 0;
169    m[15] = 0;
170
171    u64x h[8];
172
173    h[0] = BLAKE2B_IV_00 ^ 0x01010040;
174    h[1] = BLAKE2B_IV_01;
175    h[2] = BLAKE2B_IV_02;
176    h[3] = BLAKE2B_IV_03;
177    h[4] = BLAKE2B_IV_04;
178    h[5] = BLAKE2B_IV_05;
179    h[6] = BLAKE2B_IV_06;
180    h[7] = BLAKE2B_IV_07;
181
182    blake2b_transform_vector (h, m, pw_len, BLAKE2B_FINAL);
183
184    const u32x r0 = h32_from_64 (h[0]);
185    const u32x r1 = l32_from_64 (h[0]);
186    const u32x r2 = h32_from_64 (h[1]);
187    const u32x r3 = l32_from_64 (h[1]);
188
189    COMPARE_S_SIMD (r0, r1, r2, r3);
190  }
191}
192
193KERNEL_FQ void m00600_m04 (KERN_ATTR_VECTOR ())
194{
195  /**
196   * base
197   */
198
199  const u64 gid = get_global_id (0);
200
201  if (gid >= gid_max) return;
202
203  u32 w[16];
204
205  w[ 0] = pws[gid].i[ 0];
206  w[ 1] = pws[gid].i[ 1];
207  w[ 2] = pws[gid].i[ 2];
208  w[ 3] = pws[gid].i[ 3];
209  w[ 4] = 0;
210  w[ 5] = 0;
211  w[ 6] = 0;
212  w[ 7] = 0;
213  w[ 8] = 0;
214  w[ 9] = 0;
215  w[10] = 0;
216  w[11] = 0;
217  w[12] = 0;
218  w[13] = 0;
219  w[14] = 0;
220  w[15] = 0;
221
222  const u32 pw_len = pws[gid].pw_len & 63;
223
224  /**
225   * main
226   */
227
228  m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
229}
230
231KERNEL_FQ void m00600_m08 (KERN_ATTR_VECTOR ())
232{
233  /**
234   * base
235   */
236
237  const u64 gid = get_global_id (0);
238
239  if (gid >= gid_max) return;
240
241  u32 w[16];
242
243  w[ 0] = pws[gid].i[ 0];
244  w[ 1] = pws[gid].i[ 1];
245  w[ 2] = pws[gid].i[ 2];
246  w[ 3] = pws[gid].i[ 3];
247  w[ 4] = pws[gid].i[ 4];
248  w[ 5] = pws[gid].i[ 5];
249  w[ 6] = pws[gid].i[ 6];
250  w[ 7] = pws[gid].i[ 7];
251  w[ 8] = 0;
252  w[ 9] = 0;
253  w[10] = 0;
254  w[11] = 0;
255  w[12] = 0;
256  w[13] = 0;
257  w[14] = 0;
258  w[15] = 0;
259
260  const u32 pw_len = pws[gid].pw_len & 63;
261
262  /**
263   * main
264   */
265
266  m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
267}
268
269KERNEL_FQ void m00600_m16 (KERN_ATTR_VECTOR ())
270{
271  /**
272   * base
273   */
274
275  const u64 gid = get_global_id (0);
276
277  if (gid >= gid_max) return;
278
279  u32 w[16];
280
281  w[ 0] = pws[gid].i[ 0];
282  w[ 1] = pws[gid].i[ 1];
283  w[ 2] = pws[gid].i[ 2];
284  w[ 3] = pws[gid].i[ 3];
285  w[ 4] = pws[gid].i[ 4];
286  w[ 5] = pws[gid].i[ 5];
287  w[ 6] = pws[gid].i[ 6];
288  w[ 7] = pws[gid].i[ 7];
289  w[ 8] = pws[gid].i[ 8];
290  w[ 9] = pws[gid].i[ 9];
291  w[10] = pws[gid].i[10];
292  w[11] = pws[gid].i[11];
293  w[12] = pws[gid].i[12];
294  w[13] = pws[gid].i[13];
295  w[14] = pws[gid].i[14];
296  w[15] = pws[gid].i[15];
297
298  const u32 pw_len = pws[gid].pw_len & 63;
299
300  /**
301   * main
302   */
303
304  m00600m (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
305}
306
307KERNEL_FQ void m00600_s04 (KERN_ATTR_VECTOR ())
308{
309  /**
310   * base
311   */
312
313  const u64 gid = get_global_id (0);
314
315  if (gid >= gid_max) return;
316
317  u32 w[16];
318
319  w[ 0] = pws[gid].i[ 0];
320  w[ 1] = pws[gid].i[ 1];
321  w[ 2] = pws[gid].i[ 2];
322  w[ 3] = pws[gid].i[ 3];
323  w[ 4] = 0;
324  w[ 5] = 0;
325  w[ 6] = 0;
326  w[ 7] = 0;
327  w[ 8] = 0;
328  w[ 9] = 0;
329  w[10] = 0;
330  w[11] = 0;
331  w[12] = 0;
332  w[13] = 0;
333  w[14] = 0;
334  w[15] = 0;
335
336  const u32 pw_len = pws[gid].pw_len & 63;
337
338  /**
339   * main
340   */
341
342  m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
343}
344
345KERNEL_FQ void m00600_s08 (KERN_ATTR_VECTOR ())
346{
347  /**
348   * base
349   */
350
351  const u64 gid = get_global_id (0);
352
353  if (gid >= gid_max) return;
354
355  u32 w[16];
356
357  w[ 0] = pws[gid].i[ 0];
358  w[ 1] = pws[gid].i[ 1];
359  w[ 2] = pws[gid].i[ 2];
360  w[ 3] = pws[gid].i[ 3];
361  w[ 4] = pws[gid].i[ 4];
362  w[ 5] = pws[gid].i[ 5];
363  w[ 6] = pws[gid].i[ 6];
364  w[ 7] = pws[gid].i[ 7];
365  w[ 8] = 0;
366  w[ 9] = 0;
367  w[10] = 0;
368  w[11] = 0;
369  w[12] = 0;
370  w[13] = 0;
371  w[14] = 0;
372  w[15] = 0;
373
374  const u32 pw_len = pws[gid].pw_len & 63;
375
376  /**
377   * main
378   */
379
380  m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
381}
382
383KERNEL_FQ void m00600_s16 (KERN_ATTR_VECTOR ())
384{
385  /**
386   * base
387   */
388
389  const u64 gid = get_global_id (0);
390
391  if (gid >= gid_max) return;
392
393  u32 w[16];
394
395  w[ 0] = pws[gid].i[ 0];
396  w[ 1] = pws[gid].i[ 1];
397  w[ 2] = pws[gid].i[ 2];
398  w[ 3] = pws[gid].i[ 3];
399  w[ 4] = pws[gid].i[ 4];
400  w[ 5] = pws[gid].i[ 5];
401  w[ 6] = pws[gid].i[ 6];
402  w[ 7] = pws[gid].i[ 7];
403  w[ 8] = pws[gid].i[ 8];
404  w[ 9] = pws[gid].i[ 9];
405  w[10] = pws[gid].i[10];
406  w[11] = pws[gid].i[11];
407  w[12] = pws[gid].i[12];
408  w[13] = pws[gid].i[13];
409  w[14] = pws[gid].i[14];
410  w[15] = pws[gid].i[15];
411
412  const u32 pw_len = pws[gid].pw_len & 63;
413
414  /**
415   * main
416   */
417
418  m00600s (w, pw_len, pws, rules_buf, combs_buf, words_buf_r, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, SALT_POS, loop_pos, loop_cnt, il_cnt, digests_cnt, DIGESTS_OFFSET, combs_mode, salt_repeat, pws_pos, gid_max);
419}
420