1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6//#define NEW_SIMD_CODE
7
8#ifdef KERNEL_STATIC
9#include "inc_vendor.h"
10#include "inc_types.h"
11#include "inc_platform.cl"
12#include "inc_common.cl"
13#include "inc_scalar.cl"
14#include "inc_simd.cl"
15#include "inc_cipher_aes.cl"
16#endif
17
18KERNEL_FQ void m26401_m04 (KERN_ATTR_BASIC ())
19{
20  const u64 gid = get_global_id (0);
21  const u64 lid = get_local_id (0);
22  const u64 lsz = get_local_size (0);
23
24  /**
25   * aes shared
26   */
27
28  #ifdef REAL_SHM
29
30  LOCAL_VK u32 s_td0[256];
31  LOCAL_VK u32 s_td1[256];
32  LOCAL_VK u32 s_td2[256];
33  LOCAL_VK u32 s_td3[256];
34  LOCAL_VK u32 s_td4[256];
35
36  LOCAL_VK u32 s_te0[256];
37  LOCAL_VK u32 s_te1[256];
38  LOCAL_VK u32 s_te2[256];
39  LOCAL_VK u32 s_te3[256];
40  LOCAL_VK u32 s_te4[256];
41
42  for (u32 i = lid; i < 256; i += lsz)
43  {
44    s_td0[i] = td0[i];
45    s_td1[i] = td1[i];
46    s_td2[i] = td2[i];
47    s_td3[i] = td3[i];
48    s_td4[i] = td4[i];
49
50    s_te0[i] = te0[i];
51    s_te1[i] = te1[i];
52    s_te2[i] = te2[i];
53    s_te3[i] = te3[i];
54    s_te4[i] = te4[i];
55  }
56
57  SYNC_THREADS ();
58
59  #else
60
61  CONSTANT_AS u32a *s_td0 = td0;
62  CONSTANT_AS u32a *s_td1 = td1;
63  CONSTANT_AS u32a *s_td2 = td2;
64  CONSTANT_AS u32a *s_td3 = td3;
65  CONSTANT_AS u32a *s_td4 = td4;
66
67  CONSTANT_AS u32a *s_te0 = te0;
68  CONSTANT_AS u32a *s_te1 = te1;
69  CONSTANT_AS u32a *s_te2 = te2;
70  CONSTANT_AS u32a *s_te3 = te3;
71  CONSTANT_AS u32a *s_te4 = te4;
72
73  #endif
74
75  if (gid >= gid_max) return;
76
77  /**
78   * base
79   */
80
81  u32 pw_buf0[4];
82  u32 pw_buf1[4];
83
84  pw_buf0[0] = pws[gid].i[0];
85  pw_buf0[1] = pws[gid].i[1];
86  pw_buf0[2] = pws[gid].i[2];
87  pw_buf0[3] = pws[gid].i[3];
88  pw_buf1[0] = pws[gid].i[4];
89  pw_buf1[1] = pws[gid].i[5];
90  pw_buf1[2] = pws[gid].i[6];
91  pw_buf1[3] = pws[gid].i[7];
92
93  const u32 pw_l_len = pws[gid].pw_len & 63;
94
95  /**
96   * Salt prep
97   */
98
99  u32 pt[4];
100
101  pt[0] = salt_bufs[SALT_POS].salt_buf[0];
102  pt[1] = salt_bufs[SALT_POS].salt_buf[1];
103  pt[2] = salt_bufs[SALT_POS].salt_buf[2];
104  pt[3] = salt_bufs[SALT_POS].salt_buf[3];
105
106  /**
107   * loop
108   */
109
110  for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
111  {
112    const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15;
113
114    const u32x pw_len = (pw_l_len + pw_r_len) & 15;
115
116    /**
117     * concat password candidate
118     */
119
120    u32x wordl0[4] = { 0 };
121    u32x wordl1[4] = { 0 };
122    u32x wordl2[4] = { 0 };
123    u32x wordl3[4] = { 0 };
124
125    wordl0[0] = pw_buf0[0];
126    wordl0[1] = pw_buf0[1];
127    wordl0[2] = pw_buf0[2];
128    wordl0[3] = pw_buf0[3];
129
130    u32x wordr0[4] = { 0 };
131    u32x wordr1[4] = { 0 };
132    u32x wordr2[4] = { 0 };
133    u32x wordr3[4] = { 0 };
134
135    wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
136    wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
137    wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
138    wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
139
140    if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
141    {
142      switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
143    }
144    else
145    {
146      switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
147    }
148
149    u32x w0[4];
150    u32x w1[4];
151    u32x w2[4];
152    u32x w3[4];
153
154    w0[0] = wordl0[0] | wordr0[0];
155    w0[1] = wordl0[1] | wordr0[1];
156    w0[2] = wordl0[2] | wordr0[2];
157    w0[3] = wordl0[3] | wordr0[3];
158    w1[0] = 0;
159    w1[1] = 0;
160    w1[2] = 0;
161    w1[3] = 0;
162    w2[0] = 0;
163    w2[1] = 0;
164    w2[2] = 0;
165    w2[3] = 0;
166    w3[0] = 0;
167    w3[1] = 0;
168    w3[2] = 0;
169    w3[3] = 0;
170
171    u32 ukey[4];
172
173    ukey[0] = w0[0];
174    ukey[1] = w0[1];
175    ukey[2] = w0[2];
176    ukey[3] = w0[3];
177
178    #define KEYLEN 44
179
180    u32 ks[KEYLEN];
181
182    aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3);
183
184    u32 ct[4];
185
186    aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4);
187
188    const u32 r0 = ct[0];
189    const u32 r1 = ct[1];
190    const u32 r2 = ct[2];
191    const u32 r3 = ct[3];
192
193    COMPARE_M_SIMD (r0, r1, r2, r3);
194  }
195}
196
197KERNEL_FQ void m26401_m08 (KERN_ATTR_BASIC ())
198{
199}
200
201KERNEL_FQ void m26401_m16 (KERN_ATTR_BASIC ())
202{
203}
204
205KERNEL_FQ void m26401_s04 (KERN_ATTR_BASIC ())
206{
207  const u64 gid = get_global_id (0);
208  const u64 lid = get_local_id (0);
209  const u64 lsz = get_local_size (0);
210
211  /**
212   * aes shared
213   */
214
215  #ifdef REAL_SHM
216
217  LOCAL_VK u32 s_td0[256];
218  LOCAL_VK u32 s_td1[256];
219  LOCAL_VK u32 s_td2[256];
220  LOCAL_VK u32 s_td3[256];
221  LOCAL_VK u32 s_td4[256];
222
223  LOCAL_VK u32 s_te0[256];
224  LOCAL_VK u32 s_te1[256];
225  LOCAL_VK u32 s_te2[256];
226  LOCAL_VK u32 s_te3[256];
227  LOCAL_VK u32 s_te4[256];
228
229  for (u32 i = lid; i < 256; i += lsz)
230  {
231    s_td0[i] = td0[i];
232    s_td1[i] = td1[i];
233    s_td2[i] = td2[i];
234    s_td3[i] = td3[i];
235    s_td4[i] = td4[i];
236
237    s_te0[i] = te0[i];
238    s_te1[i] = te1[i];
239    s_te2[i] = te2[i];
240    s_te3[i] = te3[i];
241    s_te4[i] = te4[i];
242  }
243
244  SYNC_THREADS ();
245
246  #else
247
248  CONSTANT_AS u32a *s_td0 = td0;
249  CONSTANT_AS u32a *s_td1 = td1;
250  CONSTANT_AS u32a *s_td2 = td2;
251  CONSTANT_AS u32a *s_td3 = td3;
252  CONSTANT_AS u32a *s_td4 = td4;
253
254  CONSTANT_AS u32a *s_te0 = te0;
255  CONSTANT_AS u32a *s_te1 = te1;
256  CONSTANT_AS u32a *s_te2 = te2;
257  CONSTANT_AS u32a *s_te3 = te3;
258  CONSTANT_AS u32a *s_te4 = te4;
259
260  #endif
261
262  if (gid >= gid_max) return;
263
264  /**
265   * base
266   */
267
268  u32 pw_buf0[4];
269  u32 pw_buf1[4];
270
271  pw_buf0[0] = pws[gid].i[0];
272  pw_buf0[1] = pws[gid].i[1];
273  pw_buf0[2] = pws[gid].i[2];
274  pw_buf0[3] = pws[gid].i[3];
275  pw_buf1[0] = pws[gid].i[4];
276  pw_buf1[1] = pws[gid].i[5];
277  pw_buf1[2] = pws[gid].i[6];
278  pw_buf1[3] = pws[gid].i[7];
279
280  const u32 pw_l_len = pws[gid].pw_len & 63;
281
282  /**
283   * Salt prep
284   */
285
286  u32 pt[4];
287
288  pt[0] = salt_bufs[SALT_POS].salt_buf[0];
289  pt[1] = salt_bufs[SALT_POS].salt_buf[1];
290  pt[2] = salt_bufs[SALT_POS].salt_buf[2];
291  pt[3] = salt_bufs[SALT_POS].salt_buf[3];
292
293  /**
294   * digest
295   */
296
297  const u32 search[4] =
298  {
299    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
300    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
301    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
302    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
303  };
304
305  /**
306   * loop
307   */
308
309  for (u32 il_pos = 0; il_pos < il_cnt; il_pos += VECT_SIZE)
310  {
311    const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 15;
312
313    const u32x pw_len = (pw_l_len + pw_r_len) & 15;
314
315    /**
316     * concat password candidate
317     */
318
319    u32x wordl0[4] = { 0 };
320    u32x wordl1[4] = { 0 };
321    u32x wordl2[4] = { 0 };
322    u32x wordl3[4] = { 0 };
323
324    wordl0[0] = pw_buf0[0];
325    wordl0[1] = pw_buf0[1];
326    wordl0[2] = pw_buf0[2];
327    wordl0[3] = pw_buf0[3];
328
329    u32x wordr0[4] = { 0 };
330    u32x wordr1[4] = { 0 };
331    u32x wordr2[4] = { 0 };
332    u32x wordr3[4] = { 0 };
333
334    wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
335    wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
336    wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
337    wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
338
339    if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
340    {
341      switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
342    }
343    else
344    {
345      switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
346    }
347
348    u32x w0[4];
349    u32x w1[4];
350    u32x w2[4];
351    u32x w3[4];
352
353    w0[0] = wordl0[0] | wordr0[0];
354    w0[1] = wordl0[1] | wordr0[1];
355    w0[2] = wordl0[2] | wordr0[2];
356    w0[3] = wordl0[3] | wordr0[3];
357    w1[0] = 0;
358    w1[1] = 0;
359    w1[2] = 0;
360    w1[3] = 0;
361    w2[0] = 0;
362    w2[1] = 0;
363    w2[2] = 0;
364    w2[3] = 0;
365    w3[0] = 0;
366    w3[1] = 0;
367    w3[2] = 0;
368    w3[3] = 0;
369
370    u32 ukey[4];
371
372    ukey[0] = w0[0];
373    ukey[1] = w0[1];
374    ukey[2] = w0[2];
375    ukey[3] = w0[3];
376
377    #define KEYLEN 44
378
379    u32 ks[KEYLEN];
380
381    aes128_set_encrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3);
382
383    u32 ct[4];
384
385    aes128_encrypt (ks, pt, ct, s_te0, s_te1, s_te2, s_te3, s_te4);
386
387    const u32 r0 = ct[0];
388    const u32 r1 = ct[1];
389    const u32 r2 = ct[2];
390    const u32 r3 = ct[3];
391
392    COMPARE_S_SIMD (r0, r1, r2, r3);
393  }
394}
395
396KERNEL_FQ void m26401_s08 (KERN_ATTR_BASIC ())
397{
398}
399
400KERNEL_FQ void m26401_s16 (KERN_ATTR_BASIC ())
401{
402}
403