1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6//#define NEW_SIMD_CODE
7
8#ifdef KERNEL_STATIC
9#include "inc_vendor.h"
10#include "inc_types.h"
11#include "inc_platform.cl"
12#include "inc_common.cl"
13#include "inc_scalar.cl"
14#include "inc_hash_md5.cl"
15#endif
16
17#if   VECT_SIZE == 1
18#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
19#elif VECT_SIZE == 2
20#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
21#elif VECT_SIZE == 4
22#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
23#elif VECT_SIZE == 8
24#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
25#elif VECT_SIZE == 16
26#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
27#endif
28
29KERNEL_FQ void m04110_mxx (KERN_ATTR_BASIC ())
30{
31  /**
32   * modifier
33   */
34
35  const u64 gid = get_global_id (0);
36  const u64 lid = get_local_id (0);
37  const u64 lsz = get_local_size (0);
38
39  /**
40   * bin2asc table
41   */
42
43  LOCAL_VK u32 l_bin2asc[256];
44
45  for (u32 i = lid; i < 256; i += lsz)
46  {
47    const u32 i0 = (i >> 0) & 15;
48    const u32 i1 = (i >> 4) & 15;
49
50    l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
51                 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
52  }
53
54  SYNC_THREADS ();
55
56  if (gid >= gid_max) return;
57
58  /**
59   * base
60   */
61
62  const u32 salt_len = salt_bufs[SALT_POS].salt_len;
63
64  u32 s[64] = { 0 };
65
66  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
67  {
68    s[idx] = salt_bufs[SALT_POS].salt_buf[idx];
69  }
70
71  md5_ctx_t ctx0;
72
73  md5_init (&ctx0);
74
75  md5_update (&ctx0, s, salt_len);
76
77  md5_ctx_t ctx0t;
78
79  md5_init (&ctx0t);
80
81  md5_update_global (&ctx0t, pws[gid].i, pws[gid].pw_len);
82
83  /**
84   * loop
85   */
86
87  for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
88  {
89    md5_ctx_t ctx1 = ctx0t;
90
91    md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
92
93    md5_update (&ctx1, s, salt_len);
94
95    md5_final (&ctx1);
96
97    const u32 a = ctx1.h[0];
98    const u32 b = ctx1.h[1];
99    const u32 c = ctx1.h[2];
100    const u32 d = ctx1.h[3];
101
102    u32 w0[4];
103    u32 w1[4];
104    u32 w2[4];
105    u32 w3[4];
106
107    w0[0] = uint_to_hex_lower8 ((a >>  0) & 255) <<  0
108          | uint_to_hex_lower8 ((a >>  8) & 255) << 16;
109    w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) <<  0
110          | uint_to_hex_lower8 ((a >> 24) & 255) << 16;
111    w0[2] = uint_to_hex_lower8 ((b >>  0) & 255) <<  0
112          | uint_to_hex_lower8 ((b >>  8) & 255) << 16;
113    w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) <<  0
114          | uint_to_hex_lower8 ((b >> 24) & 255) << 16;
115    w1[0] = uint_to_hex_lower8 ((c >>  0) & 255) <<  0
116          | uint_to_hex_lower8 ((c >>  8) & 255) << 16;
117    w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) <<  0
118          | uint_to_hex_lower8 ((c >> 24) & 255) << 16;
119    w1[2] = uint_to_hex_lower8 ((d >>  0) & 255) <<  0
120          | uint_to_hex_lower8 ((d >>  8) & 255) << 16;
121    w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) <<  0
122          | uint_to_hex_lower8 ((d >> 24) & 255) << 16;
123    w2[0] = 0;
124    w2[1] = 0;
125    w2[2] = 0;
126    w2[3] = 0;
127    w3[0] = 0;
128    w3[1] = 0;
129    w3[2] = 0;
130    w3[3] = 0;
131
132    md5_ctx_t ctx = ctx0;
133
134    md5_update_64 (&ctx, w0, w1, w2, w3, 32);
135
136    md5_final (&ctx);
137
138    const u32 r0 = ctx.h[DGST_R0];
139    const u32 r1 = ctx.h[DGST_R1];
140    const u32 r2 = ctx.h[DGST_R2];
141    const u32 r3 = ctx.h[DGST_R3];
142
143    COMPARE_M_SCALAR (r0, r1, r2, r3);
144  }
145}
146
147KERNEL_FQ void m04110_sxx (KERN_ATTR_BASIC ())
148{
149  /**
150   * modifier
151   */
152
153  const u64 gid = get_global_id (0);
154  const u64 lid = get_local_id (0);
155  const u64 lsz = get_local_size (0);
156
157  /**
158   * bin2asc table
159   */
160
161  LOCAL_VK u32 l_bin2asc[256];
162
163  for (u32 i = lid; i < 256; i += lsz)
164  {
165    const u32 i0 = (i >> 0) & 15;
166    const u32 i1 = (i >> 4) & 15;
167
168    l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
169                 | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
170  }
171
172  SYNC_THREADS ();
173
174  if (gid >= gid_max) return;
175
176  /**
177   * digest
178   */
179
180  const u32 search[4] =
181  {
182    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R0],
183    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R1],
184    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R2],
185    digests_buf[DIGESTS_OFFSET].digest_buf[DGST_R3]
186  };
187
188  /**
189   * base
190   */
191
192  const u32 salt_len = salt_bufs[SALT_POS].salt_len;
193
194  u32 s[64] = { 0 };
195
196  for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
197  {
198    s[idx] = salt_bufs[SALT_POS].salt_buf[idx];
199  }
200
201  md5_ctx_t ctx0;
202
203  md5_init (&ctx0);
204
205  md5_update (&ctx0, s, salt_len);
206
207  md5_ctx_t ctx0t;
208
209  md5_init (&ctx0t);
210
211  md5_update_global (&ctx0t, pws[gid].i, pws[gid].pw_len);
212
213  /**
214   * loop
215   */
216
217  for (u32 il_pos = 0; il_pos < il_cnt; il_pos++)
218  {
219    md5_ctx_t ctx1 = ctx0t;
220
221    md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
222
223    md5_update (&ctx1, s, salt_len);
224
225    md5_final (&ctx1);
226
227    const u32 a = ctx1.h[0];
228    const u32 b = ctx1.h[1];
229    const u32 c = ctx1.h[2];
230    const u32 d = ctx1.h[3];
231
232    u32 w0[4];
233    u32 w1[4];
234    u32 w2[4];
235    u32 w3[4];
236
237    w0[0] = uint_to_hex_lower8 ((a >>  0) & 255) <<  0
238          | uint_to_hex_lower8 ((a >>  8) & 255) << 16;
239    w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) <<  0
240          | uint_to_hex_lower8 ((a >> 24) & 255) << 16;
241    w0[2] = uint_to_hex_lower8 ((b >>  0) & 255) <<  0
242          | uint_to_hex_lower8 ((b >>  8) & 255) << 16;
243    w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) <<  0
244          | uint_to_hex_lower8 ((b >> 24) & 255) << 16;
245    w1[0] = uint_to_hex_lower8 ((c >>  0) & 255) <<  0
246          | uint_to_hex_lower8 ((c >>  8) & 255) << 16;
247    w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) <<  0
248          | uint_to_hex_lower8 ((c >> 24) & 255) << 16;
249    w1[2] = uint_to_hex_lower8 ((d >>  0) & 255) <<  0
250          | uint_to_hex_lower8 ((d >>  8) & 255) << 16;
251    w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) <<  0
252          | uint_to_hex_lower8 ((d >> 24) & 255) << 16;
253    w2[0] = 0;
254    w2[1] = 0;
255    w2[2] = 0;
256    w2[3] = 0;
257    w3[0] = 0;
258    w3[1] = 0;
259    w3[2] = 0;
260    w3[3] = 0;
261
262    md5_ctx_t ctx = ctx0;
263
264    md5_update_64 (&ctx, w0, w1, w2, w3, 32);
265
266    md5_final (&ctx);
267
268    const u32 r0 = ctx.h[DGST_R0];
269    const u32 r1 = ctx.h[DGST_R1];
270    const u32 r2 = ctx.h[DGST_R2];
271    const u32 r3 = ctx.h[DGST_R3];
272
273    COMPARE_S_SCALAR (r0, r1, r2, r3);
274  }
275}
276