1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6//#define NEW_SIMD_CODE
7
8#ifdef KERNEL_STATIC
9#include "inc_vendor.h"
10#include "inc_types.h"
11#include "inc_platform.cl"
12#include "inc_common.cl"
13#include "inc_scalar.cl"
14#include "inc_hash_md5.cl"
15#include "inc_cipher_aes.cl"
16#endif
17
18typedef struct pem
19{
20  u32 data_buf[16384];
21  int data_len;
22
23  int cipher;
24
25} pem_t;
26
27KERNEL_FQ void m22941_mxx (KERN_ATTR_ESALT (pem_t))
28{
29  const u64 gid = get_global_id (0);
30  const u64 lid = get_local_id (0);
31  const u64 lsz = get_local_size (0);
32
33  /**
34   * aes shared
35   */
36
37  #ifdef REAL_SHM
38
39  LOCAL_VK u32 s_td0[256];
40  LOCAL_VK u32 s_td1[256];
41  LOCAL_VK u32 s_td2[256];
42  LOCAL_VK u32 s_td3[256];
43  LOCAL_VK u32 s_td4[256];
44
45  LOCAL_VK u32 s_te0[256];
46  LOCAL_VK u32 s_te1[256];
47  LOCAL_VK u32 s_te2[256];
48  LOCAL_VK u32 s_te3[256];
49  LOCAL_VK u32 s_te4[256];
50
51  for (u32 i = lid; i < 256; i += lsz)
52  {
53    s_td0[i] = td0[i];
54    s_td1[i] = td1[i];
55    s_td2[i] = td2[i];
56    s_td3[i] = td3[i];
57    s_td4[i] = td4[i];
58
59    s_te0[i] = te0[i];
60    s_te1[i] = te1[i];
61    s_te2[i] = te2[i];
62    s_te3[i] = te3[i];
63    s_te4[i] = te4[i];
64  }
65
66  SYNC_THREADS ();
67
68  #else
69
70  CONSTANT_AS u32a *s_td0 = td0;
71  CONSTANT_AS u32a *s_td1 = td1;
72  CONSTANT_AS u32a *s_td2 = td2;
73  CONSTANT_AS u32a *s_td3 = td3;
74  CONSTANT_AS u32a *s_td4 = td4;
75
76  CONSTANT_AS u32a *s_te0 = te0;
77  CONSTANT_AS u32a *s_te1 = te1;
78  CONSTANT_AS u32a *s_te2 = te2;
79  CONSTANT_AS u32a *s_te3 = te3;
80  CONSTANT_AS u32a *s_te4 = te4;
81
82  #endif
83
84  if (gid >= gid_max) return;
85
86  /**
87   * digest
88   */
89
90  const u32 search[4] =
91  {
92    digests_buf[DIGESTS_OFFSET].digest_buf[0],
93    digests_buf[DIGESTS_OFFSET].digest_buf[1],
94    digests_buf[DIGESTS_OFFSET].digest_buf[2],
95    digests_buf[DIGESTS_OFFSET].digest_buf[3]
96  };
97
98  /**
99   * base
100   */
101
102  u32 s[4];
103
104  s[0] = salt_bufs[SALT_POS].salt_buf[0];
105  s[1] = salt_bufs[SALT_POS].salt_buf[1];
106  s[2] = salt_bufs[SALT_POS].salt_buf[2];
107  s[3] = salt_bufs[SALT_POS].salt_buf[3];
108
109  u32 first_data[4];
110
111  first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0];
112  first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1];
113  first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2];
114  first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3];
115
116  const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len;
117
118  const int last_pad_pos = data_len - 1;
119
120  const int last_pad_elem = last_pad_pos / 4;
121
122  u32 iv[4];
123
124  iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7];
125  iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6];
126  iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5];
127  iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4];
128
129  u32 enc[4];
130
131  enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3];
132  enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2];
133  enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1];
134  enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0];
135
136  /**
137   * loop
138   */
139
140  for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
141  {
142    md5_ctx_t ctx;
143
144    md5_init (&ctx);
145
146    md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len);
147
148    md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
149
150    u32 t[16];
151
152    t[ 0] = s[0];
153    t[ 1] = s[1];
154    t[ 2] = 0;
155    t[ 3] = 0;
156    t[ 4] = 0;
157    t[ 5] = 0;
158    t[ 6] = 0;
159    t[ 7] = 0;
160    t[ 8] = 0;
161    t[ 9] = 0;
162    t[10] = 0;
163    t[11] = 0;
164    t[12] = 0;
165    t[13] = 0;
166    t[14] = 0;
167    t[15] = 0;
168
169    md5_update (&ctx, t, 8);
170
171    md5_final (&ctx);
172
173    u32 ukey[6];
174
175    ukey[0] = ctx.h[0];
176    ukey[1] = ctx.h[1];
177    ukey[2] = ctx.h[2];
178    ukey[3] = ctx.h[3];
179
180    md5_init (&ctx);
181
182    ctx.w0[0] = ukey[0];
183    ctx.w0[1] = ukey[1];
184    ctx.w0[2] = ukey[2];
185    ctx.w0[3] = ukey[3];
186
187    ctx.len = 16;
188
189    md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len);
190
191    md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
192
193    md5_update (&ctx, t, 8);
194
195    md5_final (&ctx);
196
197    ukey[4] = ctx.h[0];
198    ukey[5] = ctx.h[1];
199
200    // AES
201
202    ukey[0] = hc_swap32_S (ukey[0]);
203    ukey[1] = hc_swap32_S (ukey[1]);
204    ukey[2] = hc_swap32_S (ukey[2]);
205    ukey[3] = hc_swap32_S (ukey[3]);
206    ukey[4] = hc_swap32_S (ukey[4]);
207    ukey[5] = hc_swap32_S (ukey[5]);
208
209    u32 ks[52];
210
211    AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3);
212
213    u32 dec[4];
214
215    // first check the padding
216
217    aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4);
218
219    dec[0] ^= iv[0];
220    dec[1] ^= iv[1];
221    dec[2] ^= iv[2];
222    dec[3] ^= iv[3];
223
224    const int paddingv = pkcs_padding_bs16 (dec, 16);
225
226    if (paddingv == -1) continue;
227
228    // second check (naive code) ASN.1 structure
229
230    aes192_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4);
231
232    dec[0] ^= s[0];
233    dec[1] ^= s[1];
234    dec[2] ^= s[2];
235    dec[3] ^= s[3];
236
237    const int real_len = (data_len - 16) + paddingv;
238
239    const int asn1_ok = asn1_detect (dec, real_len);
240
241    if (asn1_ok == 0) continue;
242
243    const u32 r0 = search[0];
244    const u32 r1 = search[1];
245    const u32 r2 = search[2];
246    const u32 r3 = search[3];
247
248    COMPARE_M_SCALAR (r0, r1, r2, r3);
249  }
250}
251
252KERNEL_FQ void m22941_sxx (KERN_ATTR_ESALT (pem_t))
253{
254  const u64 gid = get_global_id (0);
255  const u64 lid = get_local_id (0);
256  const u64 lsz = get_local_size (0);
257
258  /**
259   * aes shared
260   */
261
262  #ifdef REAL_SHM
263
264  LOCAL_VK u32 s_td0[256];
265  LOCAL_VK u32 s_td1[256];
266  LOCAL_VK u32 s_td2[256];
267  LOCAL_VK u32 s_td3[256];
268  LOCAL_VK u32 s_td4[256];
269
270  LOCAL_VK u32 s_te0[256];
271  LOCAL_VK u32 s_te1[256];
272  LOCAL_VK u32 s_te2[256];
273  LOCAL_VK u32 s_te3[256];
274  LOCAL_VK u32 s_te4[256];
275
276  for (u32 i = lid; i < 256; i += lsz)
277  {
278    s_td0[i] = td0[i];
279    s_td1[i] = td1[i];
280    s_td2[i] = td2[i];
281    s_td3[i] = td3[i];
282    s_td4[i] = td4[i];
283
284    s_te0[i] = te0[i];
285    s_te1[i] = te1[i];
286    s_te2[i] = te2[i];
287    s_te3[i] = te3[i];
288    s_te4[i] = te4[i];
289  }
290
291  SYNC_THREADS ();
292
293  #else
294
295  CONSTANT_AS u32a *s_td0 = td0;
296  CONSTANT_AS u32a *s_td1 = td1;
297  CONSTANT_AS u32a *s_td2 = td2;
298  CONSTANT_AS u32a *s_td3 = td3;
299  CONSTANT_AS u32a *s_td4 = td4;
300
301  CONSTANT_AS u32a *s_te0 = te0;
302  CONSTANT_AS u32a *s_te1 = te1;
303  CONSTANT_AS u32a *s_te2 = te2;
304  CONSTANT_AS u32a *s_te3 = te3;
305  CONSTANT_AS u32a *s_te4 = te4;
306
307  #endif
308
309  if (gid >= gid_max) return;
310
311  /**
312   * digest
313   */
314
315  const u32 search[4] =
316  {
317    digests_buf[DIGESTS_OFFSET].digest_buf[0],
318    digests_buf[DIGESTS_OFFSET].digest_buf[1],
319    digests_buf[DIGESTS_OFFSET].digest_buf[2],
320    digests_buf[DIGESTS_OFFSET].digest_buf[3]
321  };
322
323  /**
324   * base
325   */
326
327  u32 s[4];
328
329  s[0] = salt_bufs[SALT_POS].salt_buf[0];
330  s[1] = salt_bufs[SALT_POS].salt_buf[1];
331  s[2] = salt_bufs[SALT_POS].salt_buf[2];
332  s[3] = salt_bufs[SALT_POS].salt_buf[3];
333
334  u32 first_data[4];
335
336  first_data[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[0];
337  first_data[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[1];
338  first_data[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[2];
339  first_data[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[3];
340
341  const int data_len = esalt_bufs[DIGESTS_OFFSET].data_len;
342
343  const int last_pad_pos = data_len - 1;
344
345  const int last_pad_elem = last_pad_pos / 4;
346
347  u32 iv[4];
348
349  iv[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 7];
350  iv[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 6];
351  iv[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 5];
352  iv[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 4];
353
354  u32 enc[4];
355
356  enc[0] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 3];
357  enc[1] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 2];
358  enc[2] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 1];
359  enc[3] = esalt_bufs[DIGESTS_OFFSET].data_buf[last_pad_elem - 0];
360
361  /**
362   * loop
363   */
364
365  for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
366  {
367    md5_ctx_t ctx;
368
369    md5_init (&ctx);
370
371    md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len);
372
373    md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
374
375    u32 t[16];
376
377    t[ 0] = s[0];
378    t[ 1] = s[1];
379    t[ 2] = 0;
380    t[ 3] = 0;
381    t[ 4] = 0;
382    t[ 5] = 0;
383    t[ 6] = 0;
384    t[ 7] = 0;
385    t[ 8] = 0;
386    t[ 9] = 0;
387    t[10] = 0;
388    t[11] = 0;
389    t[12] = 0;
390    t[13] = 0;
391    t[14] = 0;
392    t[15] = 0;
393
394    md5_update (&ctx, t, 8);
395
396    md5_final (&ctx);
397
398    u32 ukey[6];
399
400    ukey[0] = ctx.h[0];
401    ukey[1] = ctx.h[1];
402    ukey[2] = ctx.h[2];
403    ukey[3] = ctx.h[3];
404
405    md5_init (&ctx);
406
407    ctx.w0[0] = ukey[0];
408    ctx.w0[1] = ukey[1];
409    ctx.w0[2] = ukey[2];
410    ctx.w0[3] = ukey[3];
411
412    ctx.len = 16;
413
414    md5_update_global (&ctx, pws[gid].i, pws[gid].pw_len);
415
416    md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
417
418    md5_update (&ctx, t, 8);
419
420    md5_final (&ctx);
421
422    ukey[4] = ctx.h[0];
423    ukey[5] = ctx.h[1];
424
425    // AES
426
427    ukey[0] = hc_swap32_S (ukey[0]);
428    ukey[1] = hc_swap32_S (ukey[1]);
429    ukey[2] = hc_swap32_S (ukey[2]);
430    ukey[3] = hc_swap32_S (ukey[3]);
431    ukey[4] = hc_swap32_S (ukey[4]);
432    ukey[5] = hc_swap32_S (ukey[5]);
433
434    u32 ks[52];
435
436    AES192_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3);
437
438    u32 dec[4];
439
440    // first check the padding
441
442    aes192_decrypt (ks, enc, dec, s_td0, s_td1, s_td2, s_td3, s_td4);
443
444    dec[0] ^= iv[0];
445    dec[1] ^= iv[1];
446    dec[2] ^= iv[2];
447    dec[3] ^= iv[3];
448
449    const int paddingv = pkcs_padding_bs16 (dec, 16);
450
451    if (paddingv == -1) continue;
452
453    // second check (naive code) ASN.1 structure
454
455    aes192_decrypt (ks, first_data, dec, s_td0, s_td1, s_td2, s_td3, s_td4);
456
457    dec[0] ^= s[0];
458    dec[1] ^= s[1];
459    dec[2] ^= s[2];
460    dec[3] ^= s[3];
461
462    const int real_len = (data_len - 16) + paddingv;
463
464    const int asn1_ok = asn1_detect (dec, real_len);
465
466    if (asn1_ok == 0) continue;
467
468    const u32 r0 = search[0];
469    const u32 r1 = search[1];
470    const u32 r2 = search[2];
471    const u32 r3 = search[3];
472
473    COMPARE_S_SCALAR (r0, r1, r2, r3);
474  }
475}
476