1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6#ifdef KERNEL_STATIC
7#include "inc_vendor.h"
8#include "inc_types.h"
9#include "inc_platform.cl"
10#include "inc_common.cl"
11#include "inc_hash_sha1.cl"
12#include "inc_cipher_aes.cl"
13#endif
14
15#define COMPARE_S "inc_comp_single.cl"
16#define COMPARE_M "inc_comp_multi.cl"
17
18#define ROUNDS 0x40000
19
20#define MIN(a,b) (((a) < (b)) ? (a) : (b))
21
22typedef struct rar3
23{
24  u32 data[81920];
25
26  u32 pack_size;
27  u32 unpack_size;
28
29} rar3_t;
30
31typedef struct rar3_tmp
32{
33  u32 dgst[5];
34
35  u32 w[66]; // 256 byte pass + 8 byte salt
36
37  u32 iv[4];
38
39} rar3_tmp_t;
40
41CONSTANT_VK u32a crc32tab[0x100] =
42{
43  0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
44  0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
45  0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
46  0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
47  0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
48  0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
49  0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
50  0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
51  0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
52  0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
53  0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
54  0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
55  0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
56  0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
57  0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
58  0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
59  0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
60  0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
61  0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
62  0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
63  0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
64  0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
65  0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
66  0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
67  0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
68  0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
69  0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
70  0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
71  0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
72  0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
73  0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
74  0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
75  0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
76  0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
77  0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
78  0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
79  0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
80  0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
81  0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
82  0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
83  0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
84  0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
85  0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
86  0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
87  0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
88  0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
89  0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
90  0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
91  0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
92  0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
93  0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
94  0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
95  0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
96  0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
97  0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
98  0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
99  0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
100  0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
101  0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
102  0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
103  0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
104  0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
105  0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
106  0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
107};
108
109DECLSPEC u32 round_crc32 (const u32 a, const u32 v, LOCAL_AS u32 *l_crc32tab)
110{
111  const u32 k = (a ^ v) & 0xff;
112
113  const u32 s = a >> 8;
114
115  return l_crc32tab[k] ^ s;
116}
117
118DECLSPEC u32 round_crc32_16 (const u32 crc32, const u32 *buf, const u32 len, LOCAL_AS u32 *l_crc32tab)
119{
120  const int crc_len = MIN (len, 16);
121
122  u32 c = crc32;
123
124  for (int i = 0; i < crc_len; i++)
125  {
126    const u32 idx = i / 4;
127    const u32 mod = i % 4;
128    const u32 sht = (3 - mod) * 8;
129
130    const u32 b = buf[idx] >> sht; // b & 0xff (but already done in round_crc32 ())
131
132    c = round_crc32 (c, b, l_crc32tab);
133  }
134
135  return c;
136}
137
138DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, const u32 append, u32 *digest)
139{
140  const u32 func_len = len & 63;
141
142  //const u32 mod = func_len & 3;
143  const u32 div = func_len / 4;
144
145  u32 tmp0;
146  u32 tmp1;
147
148  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
149  tmp0 = hc_bytealign_be (0, append, func_len);
150  tmp1 = hc_bytealign_be (append, 0, func_len);
151  #endif
152
153  #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
154
155  #if defined IS_NV
156  const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
157  #endif
158
159  #if (defined IS_AMD || defined IS_HIP)
160  const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
161  #endif
162
163  tmp0 = hc_byte_perm (append, 0, selector);
164  tmp1 = hc_byte_perm (0, append, selector);
165  #endif
166
167  u32 carry = 0;
168
169  switch (div)
170  {
171    case  0:  w0[0] |= tmp0; w0[1]  = tmp1; break;
172    case  1:  w0[1] |= tmp0; w0[2]  = tmp1; break;
173    case  2:  w0[2] |= tmp0; w0[3]  = tmp1; break;
174    case  3:  w0[3] |= tmp0; w1[0]  = tmp1; break;
175    case  4:  w1[0] |= tmp0; w1[1]  = tmp1; break;
176    case  5:  w1[1] |= tmp0; w1[2]  = tmp1; break;
177    case  6:  w1[2] |= tmp0; w1[3]  = tmp1; break;
178    case  7:  w1[3] |= tmp0; w2[0]  = tmp1; break;
179    case  8:  w2[0] |= tmp0; w2[1]  = tmp1; break;
180    case  9:  w2[1] |= tmp0; w2[2]  = tmp1; break;
181    case 10:  w2[2] |= tmp0; w2[3]  = tmp1; break;
182    case 11:  w2[3] |= tmp0; w3[0]  = tmp1; break;
183    case 12:  w3[0] |= tmp0; w3[1]  = tmp1; break;
184    case 13:  w3[1] |= tmp0; w3[2]  = tmp1; break;
185    case 14:  w3[2] |= tmp0; w3[3]  = tmp1; break;
186    default:  w3[3] |= tmp0; carry  = tmp1; break; // this is a bit weird but helps to workaround AMD JiT compiler segfault if set to case 15:
187  }
188
189  const u32 new_len = func_len + 3;
190
191  if (new_len >= 64)
192  {
193    sha1_transform (w0, w1, w2, w3, digest);
194
195    w0[0] = carry;
196    w0[1] = 0;
197    w0[2] = 0;
198    w0[3] = 0;
199    w1[0] = 0;
200    w1[1] = 0;
201    w1[2] = 0;
202    w1[3] = 0;
203    w2[0] = 0;
204    w2[1] = 0;
205    w2[2] = 0;
206    w2[3] = 0;
207    w3[0] = 0;
208    w3[1] = 0;
209    w3[2] = 0;
210    w3[3] = 0;
211  }
212}
213
214// only change in this function compared to OpenCL/inc_hash_sha1.cl is that it returns
215// the expanded 64 byte buffer w0_t..wf_t in t[]:
216
217DECLSPEC void sha1_transform_rar29 (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, u32 *t)
218{
219  u32 a = digest[0];
220  u32 b = digest[1];
221  u32 c = digest[2];
222  u32 d = digest[3];
223  u32 e = digest[4];
224
225  #ifdef IS_CPU
226
227  u32 w0_t = w0[0];
228  u32 w1_t = w0[1];
229  u32 w2_t = w0[2];
230  u32 w3_t = w0[3];
231  u32 w4_t = w1[0];
232  u32 w5_t = w1[1];
233  u32 w6_t = w1[2];
234  u32 w7_t = w1[3];
235  u32 w8_t = w2[0];
236  u32 w9_t = w2[1];
237  u32 wa_t = w2[2];
238  u32 wb_t = w2[3];
239  u32 wc_t = w3[0];
240  u32 wd_t = w3[1];
241  u32 we_t = w3[2];
242  u32 wf_t = w3[3];
243
244  #define K SHA1C00
245
246  SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0_t);
247  SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w1_t);
248  SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w2_t);
249  SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w3_t);
250  SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w4_t);
251  SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w5_t);
252  SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w6_t);
253  SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w7_t);
254  SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w8_t);
255  SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w9_t);
256  SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, wa_t);
257  SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, wb_t);
258  SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, wc_t);
259  SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, wd_t);
260  SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, we_t);
261  SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, wf_t);
262  w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w0_t);
263  w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w1_t);
264  w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w2_t);
265  w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w3_t);
266
267  #undef K
268  #define K SHA1C01
269
270  w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w4_t);
271  w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w5_t);
272  w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w6_t);
273  w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w7_t);
274  w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w8_t);
275  w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w9_t);
276  wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wa_t);
277  wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, wb_t);
278  wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, wc_t);
279  wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wd_t);
280  we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, we_t);
281  wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wf_t);
282  w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w0_t);
283  w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w1_t);
284  w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w2_t);
285  w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w3_t);
286  w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w4_t);
287  w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w5_t);
288  w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w6_t);
289  w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w7_t);
290
291  #undef K
292  #define K SHA1C02
293
294  w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w8_t);
295  w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w9_t);
296  wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, wa_t);
297  wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, wb_t);
298  wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, wc_t);
299  wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, wd_t);
300  we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, we_t);
301  wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, wf_t);
302  w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w0_t);
303  w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w1_t);
304  w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w2_t);
305  w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w3_t);
306  w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w4_t);
307  w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w5_t);
308  w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w6_t);
309  w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w7_t);
310  w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w8_t);
311  w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w9_t);
312  wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, wa_t);
313  wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, wb_t);
314
315  #undef K
316  #define K SHA1C03
317
318  wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, wc_t);
319  wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wd_t);
320  we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, we_t);
321  wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, wf_t);
322  w0_t = hc_rotl32_S ((wd_t ^ w8_t ^ w2_t ^ w0_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w0_t);
323  w1_t = hc_rotl32_S ((we_t ^ w9_t ^ w3_t ^ w1_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w1_t);
324  w2_t = hc_rotl32_S ((wf_t ^ wa_t ^ w4_t ^ w2_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w2_t);
325  w3_t = hc_rotl32_S ((w0_t ^ wb_t ^ w5_t ^ w3_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w3_t);
326  w4_t = hc_rotl32_S ((w1_t ^ wc_t ^ w6_t ^ w4_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w4_t);
327  w5_t = hc_rotl32_S ((w2_t ^ wd_t ^ w7_t ^ w5_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w5_t);
328  w6_t = hc_rotl32_S ((w3_t ^ we_t ^ w8_t ^ w6_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w6_t);
329  w7_t = hc_rotl32_S ((w4_t ^ wf_t ^ w9_t ^ w7_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w7_t);
330  w8_t = hc_rotl32_S ((w5_t ^ w0_t ^ wa_t ^ w8_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w8_t);
331  w9_t = hc_rotl32_S ((w6_t ^ w1_t ^ wb_t ^ w9_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w9_t);
332  wa_t = hc_rotl32_S ((w7_t ^ w2_t ^ wc_t ^ wa_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wa_t);
333  wb_t = hc_rotl32_S ((w8_t ^ w3_t ^ wd_t ^ wb_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, wb_t);
334  wc_t = hc_rotl32_S ((w9_t ^ w4_t ^ we_t ^ wc_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, wc_t);
335  wd_t = hc_rotl32_S ((wa_t ^ w5_t ^ wf_t ^ wd_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, wd_t);
336  we_t = hc_rotl32_S ((wb_t ^ w6_t ^ w0_t ^ we_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, we_t);
337  wf_t = hc_rotl32_S ((wc_t ^ w7_t ^ w1_t ^ wf_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, wf_t);
338
339  t[ 0] = w0_t;
340  t[ 1] = w1_t;
341  t[ 2] = w2_t;
342  t[ 3] = w3_t;
343  t[ 4] = w4_t;
344  t[ 5] = w5_t;
345  t[ 6] = w6_t;
346  t[ 7] = w7_t;
347  t[ 8] = w8_t;
348  t[ 9] = w9_t;
349  t[10] = wa_t;
350  t[11] = wb_t;
351  t[12] = wc_t;
352  t[13] = wd_t;
353  t[14] = we_t;
354  t[15] = wf_t;
355
356  #undef K
357
358  #else
359
360  u32 w00_t = w0[0];
361  u32 w01_t = w0[1];
362  u32 w02_t = w0[2];
363  u32 w03_t = w0[3];
364  u32 w04_t = w1[0];
365  u32 w05_t = w1[1];
366  u32 w06_t = w1[2];
367  u32 w07_t = w1[3];
368  u32 w08_t = w2[0];
369  u32 w09_t = w2[1];
370  u32 w0a_t = w2[2];
371  u32 w0b_t = w2[3];
372  u32 w0c_t = w3[0];
373  u32 w0d_t = w3[1];
374  u32 w0e_t = w3[2];
375  u32 w0f_t = w3[3];
376  u32 w10_t;
377  u32 w11_t;
378  u32 w12_t;
379  u32 w13_t;
380  u32 w14_t;
381  u32 w15_t;
382  u32 w16_t;
383  u32 w17_t;
384  u32 w18_t;
385  u32 w19_t;
386  u32 w1a_t;
387  u32 w1b_t;
388  u32 w1c_t;
389  u32 w1d_t;
390  u32 w1e_t;
391  u32 w1f_t;
392  u32 w20_t;
393  u32 w21_t;
394  u32 w22_t;
395  u32 w23_t;
396  u32 w24_t;
397  u32 w25_t;
398  u32 w26_t;
399  u32 w27_t;
400  u32 w28_t;
401  u32 w29_t;
402  u32 w2a_t;
403  u32 w2b_t;
404  u32 w2c_t;
405  u32 w2d_t;
406  u32 w2e_t;
407  u32 w2f_t;
408  u32 w30_t;
409  u32 w31_t;
410  u32 w32_t;
411  u32 w33_t;
412  u32 w34_t;
413  u32 w35_t;
414  u32 w36_t;
415  u32 w37_t;
416  u32 w38_t;
417  u32 w39_t;
418  u32 w3a_t;
419  u32 w3b_t;
420  u32 w3c_t;
421  u32 w3d_t;
422  u32 w3e_t;
423  u32 w3f_t;
424  u32 w40_t;
425  u32 w41_t;
426  u32 w42_t;
427  u32 w43_t;
428  u32 w44_t;
429  u32 w45_t;
430  u32 w46_t;
431  u32 w47_t;
432  u32 w48_t;
433  u32 w49_t;
434  u32 w4a_t;
435  u32 w4b_t;
436  u32 w4c_t;
437  u32 w4d_t;
438  u32 w4e_t;
439  u32 w4f_t;
440
441  #define K SHA1C00
442
443  SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w00_t);
444  SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w01_t);
445  SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w02_t);
446  SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w03_t);
447  SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w04_t);
448  SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w05_t);
449  SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w06_t);
450  SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w07_t);
451  SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w08_t);
452  SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w09_t);
453  SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0a_t);
454  SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w0b_t);
455  SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w0c_t);
456  SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w0d_t);
457  SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w0e_t);
458  SHA1_STEP_S (SHA1_F0o, a, b, c, d, e, w0f_t);
459  w10_t = hc_rotl32_S ((w0d_t ^ w08_t ^ w02_t ^ w00_t), 1u); SHA1_STEP_S (SHA1_F0o, e, a, b, c, d, w10_t);
460  w11_t = hc_rotl32_S ((w0e_t ^ w09_t ^ w03_t ^ w01_t), 1u); SHA1_STEP_S (SHA1_F0o, d, e, a, b, c, w11_t);
461  w12_t = hc_rotl32_S ((w0f_t ^ w0a_t ^ w04_t ^ w02_t), 1u); SHA1_STEP_S (SHA1_F0o, c, d, e, a, b, w12_t);
462  w13_t = hc_rotl32_S ((w10_t ^ w0b_t ^ w05_t ^ w03_t), 1u); SHA1_STEP_S (SHA1_F0o, b, c, d, e, a, w13_t);
463
464  #undef K
465  #define K SHA1C01
466
467  w14_t = hc_rotl32_S ((w11_t ^ w0c_t ^ w06_t ^ w04_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w14_t);
468  w15_t = hc_rotl32_S ((w12_t ^ w0d_t ^ w07_t ^ w05_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w15_t);
469  w16_t = hc_rotl32_S ((w13_t ^ w0e_t ^ w08_t ^ w06_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w16_t);
470  w17_t = hc_rotl32_S ((w14_t ^ w0f_t ^ w09_t ^ w07_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w17_t);
471  w18_t = hc_rotl32_S ((w15_t ^ w10_t ^ w0a_t ^ w08_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w18_t);
472  w19_t = hc_rotl32_S ((w16_t ^ w11_t ^ w0b_t ^ w09_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w19_t);
473  w1a_t = hc_rotl32_S ((w17_t ^ w12_t ^ w0c_t ^ w0a_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w1a_t);
474  w1b_t = hc_rotl32_S ((w18_t ^ w13_t ^ w0d_t ^ w0b_t), 1u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w1b_t);
475  w1c_t = hc_rotl32_S ((w19_t ^ w14_t ^ w0e_t ^ w0c_t), 1u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w1c_t);
476  w1d_t = hc_rotl32_S ((w1a_t ^ w15_t ^ w0f_t ^ w0d_t), 1u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w1d_t);
477  w1e_t = hc_rotl32_S ((w1b_t ^ w16_t ^ w10_t ^ w0e_t), 1u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w1e_t);
478  w1f_t = hc_rotl32_S ((w1c_t ^ w17_t ^ w11_t ^ w0f_t), 1u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w1f_t);
479  w20_t = hc_rotl32_S ((w1a_t ^ w10_t ^ w04_t ^ w00_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w20_t);
480  w21_t = hc_rotl32_S ((w1b_t ^ w11_t ^ w05_t ^ w01_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w21_t);
481  w22_t = hc_rotl32_S ((w1c_t ^ w12_t ^ w06_t ^ w02_t), 2u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w22_t);
482  w23_t = hc_rotl32_S ((w1d_t ^ w13_t ^ w07_t ^ w03_t), 2u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w23_t);
483  w24_t = hc_rotl32_S ((w1e_t ^ w14_t ^ w08_t ^ w04_t), 2u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w24_t);
484  w25_t = hc_rotl32_S ((w1f_t ^ w15_t ^ w09_t ^ w05_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w25_t);
485  w26_t = hc_rotl32_S ((w20_t ^ w16_t ^ w0a_t ^ w06_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w26_t);
486  w27_t = hc_rotl32_S ((w21_t ^ w17_t ^ w0b_t ^ w07_t), 2u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w27_t);
487
488  #undef K
489  #define K SHA1C02
490
491  w28_t = hc_rotl32_S ((w22_t ^ w18_t ^ w0c_t ^ w08_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w28_t);
492  w29_t = hc_rotl32_S ((w23_t ^ w19_t ^ w0d_t ^ w09_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w29_t);
493  w2a_t = hc_rotl32_S ((w24_t ^ w1a_t ^ w0e_t ^ w0a_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w2a_t);
494  w2b_t = hc_rotl32_S ((w25_t ^ w1b_t ^ w0f_t ^ w0b_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w2b_t);
495  w2c_t = hc_rotl32_S ((w26_t ^ w1c_t ^ w10_t ^ w0c_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w2c_t);
496  w2d_t = hc_rotl32_S ((w27_t ^ w1d_t ^ w11_t ^ w0d_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w2d_t);
497  w2e_t = hc_rotl32_S ((w28_t ^ w1e_t ^ w12_t ^ w0e_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w2e_t);
498  w2f_t = hc_rotl32_S ((w29_t ^ w1f_t ^ w13_t ^ w0f_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w2f_t);
499  w30_t = hc_rotl32_S ((w2a_t ^ w20_t ^ w14_t ^ w10_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w30_t);
500  w31_t = hc_rotl32_S ((w2b_t ^ w21_t ^ w15_t ^ w11_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w31_t);
501  w32_t = hc_rotl32_S ((w2c_t ^ w22_t ^ w16_t ^ w12_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w32_t);
502  w33_t = hc_rotl32_S ((w2d_t ^ w23_t ^ w17_t ^ w13_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w33_t);
503  w34_t = hc_rotl32_S ((w2e_t ^ w24_t ^ w18_t ^ w14_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w34_t);
504  w35_t = hc_rotl32_S ((w2f_t ^ w25_t ^ w19_t ^ w15_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w35_t);
505  w36_t = hc_rotl32_S ((w30_t ^ w26_t ^ w1a_t ^ w16_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w36_t);
506  w37_t = hc_rotl32_S ((w31_t ^ w27_t ^ w1b_t ^ w17_t), 2u); SHA1_STEP_S (SHA1_F2o, a, b, c, d, e, w37_t);
507  w38_t = hc_rotl32_S ((w32_t ^ w28_t ^ w1c_t ^ w18_t), 2u); SHA1_STEP_S (SHA1_F2o, e, a, b, c, d, w38_t);
508  w39_t = hc_rotl32_S ((w33_t ^ w29_t ^ w1d_t ^ w19_t), 2u); SHA1_STEP_S (SHA1_F2o, d, e, a, b, c, w39_t);
509  w3a_t = hc_rotl32_S ((w34_t ^ w2a_t ^ w1e_t ^ w1a_t), 2u); SHA1_STEP_S (SHA1_F2o, c, d, e, a, b, w3a_t);
510  w3b_t = hc_rotl32_S ((w35_t ^ w2b_t ^ w1f_t ^ w1b_t), 2u); SHA1_STEP_S (SHA1_F2o, b, c, d, e, a, w3b_t);
511
512  #undef K
513  #define K SHA1C03
514
515  w3c_t = hc_rotl32_S ((w36_t ^ w2c_t ^ w20_t ^ w1c_t), 2u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w3c_t);
516  w3d_t = hc_rotl32_S ((w37_t ^ w2d_t ^ w21_t ^ w1d_t), 2u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w3d_t);
517  w3e_t = hc_rotl32_S ((w38_t ^ w2e_t ^ w22_t ^ w1e_t), 2u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w3e_t);
518  w3f_t = hc_rotl32_S ((w39_t ^ w2f_t ^ w23_t ^ w1f_t), 2u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w3f_t);
519  w40_t = hc_rotl32_S ((w34_t ^ w20_t ^ w08_t ^ w00_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w40_t);
520  w41_t = hc_rotl32_S ((w35_t ^ w21_t ^ w09_t ^ w01_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w41_t);
521  w42_t = hc_rotl32_S ((w36_t ^ w22_t ^ w0a_t ^ w02_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w42_t);
522  w43_t = hc_rotl32_S ((w37_t ^ w23_t ^ w0b_t ^ w03_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w43_t);
523  w44_t = hc_rotl32_S ((w38_t ^ w24_t ^ w0c_t ^ w04_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w44_t);
524  w45_t = hc_rotl32_S ((w39_t ^ w25_t ^ w0d_t ^ w05_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w45_t);
525  w46_t = hc_rotl32_S ((w3a_t ^ w26_t ^ w0e_t ^ w06_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w46_t);
526  w47_t = hc_rotl32_S ((w3b_t ^ w27_t ^ w0f_t ^ w07_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w47_t);
527  w48_t = hc_rotl32_S ((w3c_t ^ w28_t ^ w10_t ^ w08_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w48_t);
528  w49_t = hc_rotl32_S ((w3d_t ^ w29_t ^ w11_t ^ w09_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w49_t);
529  w4a_t = hc_rotl32_S ((w3e_t ^ w2a_t ^ w12_t ^ w0a_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w4a_t);
530  w4b_t = hc_rotl32_S ((w3f_t ^ w2b_t ^ w13_t ^ w0b_t), 4u); SHA1_STEP_S (SHA1_F1, a, b, c, d, e, w4b_t);
531  w4c_t = hc_rotl32_S ((w40_t ^ w2c_t ^ w14_t ^ w0c_t), 4u); SHA1_STEP_S (SHA1_F1, e, a, b, c, d, w4c_t);
532  w4d_t = hc_rotl32_S ((w41_t ^ w2d_t ^ w15_t ^ w0d_t), 4u); SHA1_STEP_S (SHA1_F1, d, e, a, b, c, w4d_t);
533  w4e_t = hc_rotl32_S ((w42_t ^ w2e_t ^ w16_t ^ w0e_t), 4u); SHA1_STEP_S (SHA1_F1, c, d, e, a, b, w4e_t);
534  w4f_t = hc_rotl32_S ((w43_t ^ w2f_t ^ w17_t ^ w0f_t), 4u); SHA1_STEP_S (SHA1_F1, b, c, d, e, a, w4f_t);
535
536  t[ 0] = w40_t;
537  t[ 1] = w41_t;
538  t[ 2] = w42_t;
539  t[ 3] = w43_t;
540  t[ 4] = w44_t;
541  t[ 5] = w45_t;
542  t[ 6] = w46_t;
543  t[ 7] = w47_t;
544  t[ 8] = w48_t;
545  t[ 9] = w49_t;
546  t[10] = w4a_t;
547  t[11] = w4b_t;
548  t[12] = w4c_t;
549  t[13] = w4d_t;
550  t[14] = w4e_t;
551  t[15] = w4f_t;
552
553  #undef K
554  #endif
555
556  digest[0] += a;
557  digest[1] += b;
558  digest[2] += c;
559  digest[3] += d;
560  digest[4] += e;
561}
562
563// only change in this function compared to OpenCL/inc_hash_sha1.cl is that
564// it calls our modified sha1_transform_rar29 () function
565
566DECLSPEC void sha1_update_64_rar29 (sha1_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int bytes, u32 *t)
567{
568  if (bytes == 0) return;
569
570  const int pos = ctx->len & 63;
571
572  int len = 64;
573
574  if (bytes < 64)
575  {
576    len = bytes;
577  }
578
579  ctx->len += len;
580
581  if (pos == 0)
582  {
583    ctx->w0[0] = w0[0];
584    ctx->w0[1] = w0[1];
585    ctx->w0[2] = w0[2];
586    ctx->w0[3] = w0[3];
587    ctx->w1[0] = w1[0];
588    ctx->w1[1] = w1[1];
589    ctx->w1[2] = w1[2];
590    ctx->w1[3] = w1[3];
591    ctx->w2[0] = w2[0];
592    ctx->w2[1] = w2[1];
593    ctx->w2[2] = w2[2];
594    ctx->w2[3] = w2[3];
595    ctx->w3[0] = w3[0];
596    ctx->w3[1] = w3[1];
597    ctx->w3[2] = w3[2];
598    ctx->w3[3] = w3[3];
599
600    if (len == 64)
601    {
602      sha1_transform_rar29 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, t);
603
604      ctx->w0[0] = 0;
605      ctx->w0[1] = 0;
606      ctx->w0[2] = 0;
607      ctx->w0[3] = 0;
608      ctx->w1[0] = 0;
609      ctx->w1[1] = 0;
610      ctx->w1[2] = 0;
611      ctx->w1[3] = 0;
612      ctx->w2[0] = 0;
613      ctx->w2[1] = 0;
614      ctx->w2[2] = 0;
615      ctx->w2[3] = 0;
616      ctx->w3[0] = 0;
617      ctx->w3[1] = 0;
618      ctx->w3[2] = 0;
619      ctx->w3[3] = 0;
620    }
621  }
622  else
623  {
624    if ((pos + len) < 64)
625    {
626      switch_buffer_by_offset_be_S (w0, w1, w2, w3, pos);
627
628      ctx->w0[0] |= w0[0];
629      ctx->w0[1] |= w0[1];
630      ctx->w0[2] |= w0[2];
631      ctx->w0[3] |= w0[3];
632      ctx->w1[0] |= w1[0];
633      ctx->w1[1] |= w1[1];
634      ctx->w1[2] |= w1[2];
635      ctx->w1[3] |= w1[3];
636      ctx->w2[0] |= w2[0];
637      ctx->w2[1] |= w2[1];
638      ctx->w2[2] |= w2[2];
639      ctx->w2[3] |= w2[3];
640      ctx->w3[0] |= w3[0];
641      ctx->w3[1] |= w3[1];
642      ctx->w3[2] |= w3[2];
643      ctx->w3[3] |= w3[3];
644    }
645    else
646    {
647      u32 c0[4] = { 0 };
648      u32 c1[4] = { 0 };
649      u32 c2[4] = { 0 };
650      u32 c3[4] = { 0 };
651
652      switch_buffer_by_offset_carry_be_S (w0, w1, w2, w3, c0, c1, c2, c3, pos);
653
654      ctx->w0[0] |= w0[0];
655      ctx->w0[1] |= w0[1];
656      ctx->w0[2] |= w0[2];
657      ctx->w0[3] |= w0[3];
658      ctx->w1[0] |= w1[0];
659      ctx->w1[1] |= w1[1];
660      ctx->w1[2] |= w1[2];
661      ctx->w1[3] |= w1[3];
662      ctx->w2[0] |= w2[0];
663      ctx->w2[1] |= w2[1];
664      ctx->w2[2] |= w2[2];
665      ctx->w2[3] |= w2[3];
666      ctx->w3[0] |= w3[0];
667      ctx->w3[1] |= w3[1];
668      ctx->w3[2] |= w3[2];
669      ctx->w3[3] |= w3[3];
670
671      sha1_transform_rar29 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, t);
672
673      ctx->w0[0] = c0[0];
674      ctx->w0[1] = c0[1];
675      ctx->w0[2] = c0[2];
676      ctx->w0[3] = c0[3];
677      ctx->w1[0] = c1[0];
678      ctx->w1[1] = c1[1];
679      ctx->w1[2] = c1[2];
680      ctx->w1[3] = c1[3];
681      ctx->w2[0] = c2[0];
682      ctx->w2[1] = c2[1];
683      ctx->w2[2] = c2[2];
684      ctx->w2[3] = c2[3];
685      ctx->w3[0] = c3[0];
686      ctx->w3[1] = c3[1];
687      ctx->w3[2] = c3[2];
688      ctx->w3[3] = c3[3];
689    }
690  }
691}
692
693// main change in this function compared to OpenCL/inc_hash_sha1.cl is that
694// we call sha1_update_64_rar29 () and sometimes replace w[]
695
696DECLSPEC void sha1_update_rar29 (sha1_ctx_t *ctx, u32 *w, const int len)
697{
698  u32 w0[4];
699  u32 w1[4];
700  u32 w2[4];
701  u32 w3[4];
702
703  if (len == 0) return;
704
705  const int pos = ctx->len & 63;
706
707  int pos1 = 0;
708  int pos4 = 0;
709
710  if (len > 64) // or: if (pos1 < (len - 64))
711  {
712    w0[0] = w[pos4 +  0];
713    w0[1] = w[pos4 +  1];
714    w0[2] = w[pos4 +  2];
715    w0[3] = w[pos4 +  3];
716    w1[0] = w[pos4 +  4];
717    w1[1] = w[pos4 +  5];
718    w1[2] = w[pos4 +  6];
719    w1[3] = w[pos4 +  7];
720    w2[0] = w[pos4 +  8];
721    w2[1] = w[pos4 +  9];
722    w2[2] = w[pos4 + 10];
723    w2[3] = w[pos4 + 11];
724    w3[0] = w[pos4 + 12];
725    w3[1] = w[pos4 + 13];
726    w3[2] = w[pos4 + 14];
727    w3[3] = w[pos4 + 15];
728
729    sha1_update_64 (ctx, w0, w1, w2, w3, 64);
730
731    pos1 += 64;
732    pos4 += 16;
733  }
734
735  for (int diff = 64 - pos; pos1 < len; pos1 += 64, pos4 += 16, diff += 64)
736  {
737    w0[0] = w[pos4 +  0];
738    w0[1] = w[pos4 +  1];
739    w0[2] = w[pos4 +  2];
740    w0[3] = w[pos4 +  3];
741    w1[0] = w[pos4 +  4];
742    w1[1] = w[pos4 +  5];
743    w1[2] = w[pos4 +  6];
744    w1[3] = w[pos4 +  7];
745    w2[0] = w[pos4 +  8];
746    w2[1] = w[pos4 +  9];
747    w2[2] = w[pos4 + 10];
748    w2[3] = w[pos4 + 11];
749    w3[0] = w[pos4 + 12];
750    w3[1] = w[pos4 + 13];
751    w3[2] = w[pos4 + 14];
752    w3[3] = w[pos4 + 15];
753
754    // only major change in this function compared to OpenCL/inc_hash_sha1.cl:
755
756    u32 t[17] = { 0 };
757
758    sha1_update_64_rar29 (ctx, w0, w1, w2, w3, len - pos1, t);
759
760
761    if ((diff + 63) >= len) break;
762
763    // replaces 64 bytes (with offset diff) of the underlying data w[] with t[]:
764
765    // for (int i = 0; i < 16; i++) t[i] = hc_swap32_S (t[i]);
766
767    t[ 0] = hc_swap32_S (t[ 0]); // unroll seems to be faster
768    t[ 1] = hc_swap32_S (t[ 1]);
769    t[ 2] = hc_swap32_S (t[ 2]);
770    t[ 3] = hc_swap32_S (t[ 3]);
771    t[ 4] = hc_swap32_S (t[ 4]);
772    t[ 5] = hc_swap32_S (t[ 5]);
773    t[ 6] = hc_swap32_S (t[ 6]);
774    t[ 7] = hc_swap32_S (t[ 7]);
775    t[ 8] = hc_swap32_S (t[ 8]);
776    t[ 9] = hc_swap32_S (t[ 9]);
777    t[10] = hc_swap32_S (t[10]);
778    t[11] = hc_swap32_S (t[11]);
779    t[12] = hc_swap32_S (t[12]);
780    t[13] = hc_swap32_S (t[13]);
781    t[14] = hc_swap32_S (t[14]);
782    t[15] = hc_swap32_S (t[15]);
783
784    const u32 n_idx = diff / 4;
785    const u32 n_off = diff % 4;
786
787    if (n_off)
788    {
789      const u32 off_mul = n_off * 8;
790      const u32 off_sub = 32 - off_mul;
791
792      t[16] =                      (t[15] << off_sub);
793      t[15] = (t[15] >> off_mul) | (t[14] << off_sub);
794      t[14] = (t[14] >> off_mul) | (t[13] << off_sub);
795      t[13] = (t[13] >> off_mul) | (t[12] << off_sub);
796      t[12] = (t[12] >> off_mul) | (t[11] << off_sub);
797      t[11] = (t[11] >> off_mul) | (t[10] << off_sub);
798      t[10] = (t[10] >> off_mul) | (t[ 9] << off_sub);
799      t[ 9] = (t[ 9] >> off_mul) | (t[ 8] << off_sub);
800      t[ 8] = (t[ 8] >> off_mul) | (t[ 7] << off_sub);
801      t[ 7] = (t[ 7] >> off_mul) | (t[ 6] << off_sub);
802      t[ 6] = (t[ 6] >> off_mul) | (t[ 5] << off_sub);
803      t[ 5] = (t[ 5] >> off_mul) | (t[ 4] << off_sub);
804      t[ 4] = (t[ 4] >> off_mul) | (t[ 3] << off_sub);
805      t[ 3] = (t[ 3] >> off_mul) | (t[ 2] << off_sub);
806      t[ 2] = (t[ 2] >> off_mul) | (t[ 1] << off_sub);
807      t[ 1] = (t[ 1] >> off_mul) | (t[ 0] << off_sub);
808      t[ 0] = (t[ 0] >> off_mul);
809    }
810
811    w[n_idx] &= 0xffffff00 << ((3 - n_off) * 8);
812
813    w[n_idx] |= t[0];
814
815    w[n_idx +  1] = t[ 1];
816    w[n_idx +  2] = t[ 2];
817    w[n_idx +  3] = t[ 3];
818    w[n_idx +  4] = t[ 4];
819    w[n_idx +  5] = t[ 5];
820    w[n_idx +  6] = t[ 6];
821    w[n_idx +  7] = t[ 7];
822    w[n_idx +  8] = t[ 8];
823    w[n_idx +  9] = t[ 9];
824    w[n_idx + 10] = t[10];
825    w[n_idx + 11] = t[11];
826    w[n_idx + 12] = t[12];
827    w[n_idx + 13] = t[13];
828    w[n_idx + 14] = t[14];
829    w[n_idx + 15] = t[15];
830
831    // the final set is only meaningful: if (n_off)
832
833    w[n_idx + 16] &= 0xffffffff >> (n_off * 8);
834
835    w[n_idx + 16] |= t[16];
836  }
837}
838
839KERNEL_FQ void m23700_init (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t))
840{
841  /**
842   * base
843   */
844
845  const u64 gid = get_global_id (0);
846
847  if (gid >= gid_max) return;
848
849  tmps[gid].dgst[0] = SHA1M_A;
850  tmps[gid].dgst[1] = SHA1M_B;
851  tmps[gid].dgst[2] = SHA1M_C;
852  tmps[gid].dgst[3] = SHA1M_D;
853  tmps[gid].dgst[4] = SHA1M_E;
854
855  // store pass and salt in tmps:
856
857  const u32 pw_len = pws[gid].pw_len;
858
859  u32 w[80] = { 0 };
860
861  for (int i = 0, j = 0; i < pw_len; i += 4, j += 1)
862  {
863    w[j] = hc_swap32_S (pws[gid].i[j]);
864  }
865
866  // append salt:
867
868  const u32 salt_idx = pw_len / 4;
869  const u32 salt_off = pw_len & 3;
870
871  u32 salt_buf[3];
872
873  salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[0]); // swap needed due to -O kernel
874  salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS].salt_buf[1]);
875  salt_buf[2] = 0;
876
877  // switch buffer by offset (can only be 0 or 2 because of utf16):
878
879  if (salt_off == 2) // or just: if (salt_off)
880  {
881    salt_buf[2] =                       (salt_buf[1] << 16);
882    salt_buf[1] = (salt_buf[1] >> 16) | (salt_buf[0] << 16);
883    salt_buf[0] = (salt_buf[0] >> 16);
884  }
885
886  w[salt_idx + 0] |= salt_buf[0];
887  w[salt_idx + 1]  = salt_buf[1];
888  w[salt_idx + 2]  = salt_buf[2];
889
890  // store initial w[] (pass and salt) in tmps:
891
892  for (u32 i = 0; i < 66; i++) // unroll ?
893  {
894    tmps[gid].w[i] = w[i];
895  }
896
897  // iv:
898
899  tmps[gid].iv[0] = 0;
900  tmps[gid].iv[1] = 0;
901  tmps[gid].iv[2] = 0;
902  tmps[gid].iv[3] = 0;
903}
904
905KERNEL_FQ void m23700_loop (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t))
906{
907  const u64 gid = get_global_id (0);
908
909  if (gid >= gid_max) return;
910
911  /**
912   * base
913   */
914
915  const u32 pw_len = pws[gid].pw_len;
916
917  const u32 salt_len = 8;
918
919  const u32 pw_salt_len = pw_len + salt_len;
920
921  const u32 p3 = pw_salt_len + 3;
922
923  u32 w[80] = { 0 };
924
925  for (u32 i = 0; i < 66; i++)
926  {
927    w[i] = tmps[gid].w[i];
928  }
929
930  // update IV:
931
932  const u32 init_pos = loop_pos / (ROUNDS / 16);
933
934  sha1_ctx_t ctx_iv;
935
936  sha1_init (&ctx_iv);
937
938  ctx_iv.h[0] = tmps[gid].dgst[0];
939  ctx_iv.h[1] = tmps[gid].dgst[1];
940  ctx_iv.h[2] = tmps[gid].dgst[2];
941  ctx_iv.h[3] = tmps[gid].dgst[3];
942  ctx_iv.h[4] = tmps[gid].dgst[4];
943
944  ctx_iv.len = loop_pos * p3;
945
946  sha1_update_rar29 (&ctx_iv, w, pw_salt_len);
947
948  memcat8c_be (ctx_iv.w0, ctx_iv.w1, ctx_iv.w2, ctx_iv.w3, ctx_iv.len, hc_swap32_S (loop_pos), ctx_iv.h);
949
950  ctx_iv.len += 3;
951
952
953  // copy the context from ctx_iv to ctx:
954
955  sha1_ctx_t ctx;
956
957  ctx.h[0] = ctx_iv.h[0];
958  ctx.h[1] = ctx_iv.h[1];
959  ctx.h[2] = ctx_iv.h[2];
960  ctx.h[3] = ctx_iv.h[3];
961  ctx.h[4] = ctx_iv.h[4];
962
963  ctx.w0[0] = ctx_iv.w0[0];
964  ctx.w0[1] = ctx_iv.w0[1];
965  ctx.w0[2] = ctx_iv.w0[2];
966  ctx.w0[3] = ctx_iv.w0[3];
967
968  ctx.w1[0] = ctx_iv.w1[0];
969  ctx.w1[1] = ctx_iv.w1[1];
970  ctx.w1[2] = ctx_iv.w1[2];
971  ctx.w1[3] = ctx_iv.w1[3];
972
973  ctx.w2[0] = ctx_iv.w2[0];
974  ctx.w2[1] = ctx_iv.w2[1];
975  ctx.w2[2] = ctx_iv.w2[2];
976  ctx.w2[3] = ctx_iv.w2[3];
977
978  ctx.w3[0] = ctx_iv.w3[0];
979  ctx.w3[1] = ctx_iv.w3[1];
980  ctx.w3[2] = ctx_iv.w3[2];
981  ctx.w3[3] = ctx_iv.w3[3];
982
983  ctx.len = p3; // or ctx_iv.len ?
984
985  // final () for the IV byte:
986
987  sha1_final (&ctx_iv);
988
989  const u32 iv_idx = init_pos / 4;
990  const u32 iv_off = init_pos % 4;
991
992  tmps[gid].iv[iv_idx] |= (ctx_iv.h[4] & 0xff) << (iv_off * 8);
993
994  // main loop:
995
996  for (u32 i = 0, j = (loop_pos + 1); i < 16383; i++, j++)
997  {
998    sha1_update_rar29 (&ctx, w, pw_salt_len);
999
1000    memcat8c_be (ctx.w0, ctx.w1, ctx.w2, ctx.w3, ctx.len, hc_swap32_S (j), ctx.h);
1001
1002    ctx.len += 3;
1003  }
1004
1005  tmps[gid].dgst[0] = ctx.h[0];
1006  tmps[gid].dgst[1] = ctx.h[1];
1007  tmps[gid].dgst[2] = ctx.h[2];
1008  tmps[gid].dgst[3] = ctx.h[3];
1009  tmps[gid].dgst[4] = ctx.h[4];
1010
1011  // only needed if pw_len > 28:
1012
1013  for (u32 i = 0; i < 66; i++) // unroll ?
1014  {
1015    tmps[gid].w[i] = w[i];
1016  }
1017}
1018
1019KERNEL_FQ void m23700_comp (KERN_ATTR_TMPS_ESALT (rar3_tmp_t, rar3_t))
1020{
1021  const u64 gid = get_global_id (0);
1022  const u64 lid = get_local_id (0);
1023  const u64 lsz = get_local_size (0);
1024
1025  /**
1026   * aes shared
1027   */
1028
1029  #ifdef REAL_SHM
1030
1031  LOCAL_VK u32 s_td0[256];
1032  LOCAL_VK u32 s_td1[256];
1033  LOCAL_VK u32 s_td2[256];
1034  LOCAL_VK u32 s_td3[256];
1035  LOCAL_VK u32 s_td4[256];
1036
1037  LOCAL_VK u32 s_te0[256];
1038  LOCAL_VK u32 s_te1[256];
1039  LOCAL_VK u32 s_te2[256];
1040  LOCAL_VK u32 s_te3[256];
1041  LOCAL_VK u32 s_te4[256];
1042
1043  for (u32 i = lid; i < 256; i += lsz)
1044  {
1045    s_td0[i] = td0[i];
1046    s_td1[i] = td1[i];
1047    s_td2[i] = td2[i];
1048    s_td3[i] = td3[i];
1049    s_td4[i] = td4[i];
1050
1051    s_te0[i] = te0[i];
1052    s_te1[i] = te1[i];
1053    s_te2[i] = te2[i];
1054    s_te3[i] = te3[i];
1055    s_te4[i] = te4[i];
1056  }
1057
1058  #else
1059
1060  CONSTANT_AS u32a *s_td0 = td0;
1061  CONSTANT_AS u32a *s_td1 = td1;
1062  CONSTANT_AS u32a *s_td2 = td2;
1063  CONSTANT_AS u32a *s_td3 = td3;
1064  CONSTANT_AS u32a *s_td4 = td4;
1065
1066  CONSTANT_AS u32a *s_te0 = te0;
1067  CONSTANT_AS u32a *s_te1 = te1;
1068  CONSTANT_AS u32a *s_te2 = te2;
1069  CONSTANT_AS u32a *s_te3 = te3;
1070  CONSTANT_AS u32a *s_te4 = te4;
1071
1072  #endif
1073
1074  LOCAL_VK u32 l_crc32tab[256];
1075
1076  for (int i = lid; i < 256; i += lsz)
1077  {
1078    l_crc32tab[i] = crc32tab[i];
1079  }
1080
1081  SYNC_THREADS ();
1082
1083  if (gid >= gid_max) return;
1084
1085  /**
1086   * base
1087   */
1088
1089  const u32 pw_len = pws[gid].pw_len;
1090
1091  const u32 salt_len = 8;
1092
1093  const u32 pw_salt_len = pw_len + salt_len;
1094
1095  const u32 p3 = pw_salt_len + 3;
1096
1097  u32 h[5];
1098
1099  h[0] = tmps[gid].dgst[0];
1100  h[1] = tmps[gid].dgst[1];
1101  h[2] = tmps[gid].dgst[2];
1102  h[3] = tmps[gid].dgst[3];
1103  h[4] = tmps[gid].dgst[4];
1104
1105  u32 w0[4];
1106  u32 w1[4];
1107  u32 w2[4];
1108  u32 w3[4];
1109
1110  w0[0] = 0x80000000;
1111  w0[1] = 0;
1112  w0[2] = 0;
1113  w0[3] = 0;
1114  w1[0] = 0;
1115  w1[1] = 0;
1116  w1[2] = 0;
1117  w1[3] = 0;
1118  w2[0] = 0;
1119  w2[1] = 0;
1120  w2[2] = 0;
1121  w2[3] = 0;
1122  w3[0] = 0;
1123  w3[1] = 0;
1124  w3[2] = 0;
1125  w3[3] = (ROUNDS * p3) * 8;
1126
1127  sha1_transform (w0, w1, w2, w3, h);
1128
1129  u32 ukey[4];
1130
1131  ukey[0] = hc_swap32_S (h[0]);
1132  ukey[1] = hc_swap32_S (h[1]);
1133  ukey[2] = hc_swap32_S (h[2]);
1134  ukey[3] = hc_swap32_S (h[3]);
1135
1136  u32 ks[44];
1137
1138  AES128_set_decrypt_key (ks, ukey, s_te0, s_te1, s_te2, s_te3, s_td0, s_td1, s_td2, s_td3);
1139
1140  const u32 pack_size   = esalt_bufs[DIGESTS_OFFSET].pack_size;
1141  const u32 unpack_size = esalt_bufs[DIGESTS_OFFSET].unpack_size;
1142
1143  if (pack_size > unpack_size) // could be aligned
1144  {
1145    if (pack_size >= 32) // otherwise IV...
1146    {
1147      const u32 pack_size_elements = pack_size / 4;
1148
1149      u32 last_block_encrypted[4];
1150
1151      last_block_encrypted[0] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 0];
1152      last_block_encrypted[1] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 1];
1153      last_block_encrypted[2] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 2];
1154      last_block_encrypted[3] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 4 + 3];
1155
1156      u32 last_block_decrypted[4];
1157
1158      AES128_decrypt (ks, last_block_encrypted, last_block_decrypted, s_td0, s_td1, s_td2, s_td3, s_td4);
1159
1160      u32 last_block_iv[4];
1161
1162      last_block_iv[0] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 0];
1163      last_block_iv[1] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 1];
1164      last_block_iv[2] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 2];
1165      last_block_iv[3] = esalt_bufs[DIGESTS_OFFSET].data[pack_size_elements - 8 + 3];
1166
1167      last_block_decrypted[0] ^= last_block_iv[0];
1168      last_block_decrypted[1] ^= last_block_iv[1];
1169      last_block_decrypted[2] ^= last_block_iv[2];
1170      last_block_decrypted[3] ^= last_block_iv[3];
1171
1172      if ((last_block_decrypted[3] & 0xff) != 0) return;
1173    }
1174  }
1175
1176  u32 iv[4];
1177
1178  iv[0] = tmps[gid].iv[0];
1179  iv[1] = tmps[gid].iv[1];
1180  iv[2] = tmps[gid].iv[2];
1181  iv[3] = tmps[gid].iv[3];
1182
1183  iv[0] = hc_swap32_S (iv[0]);
1184  iv[1] = hc_swap32_S (iv[1]);
1185  iv[2] = hc_swap32_S (iv[2]);
1186  iv[3] = hc_swap32_S (iv[3]);
1187
1188  u32 data_left = unpack_size;
1189
1190  u32 crc32 = ~0;
1191
1192  for (u32 i = 0, j = 0; i < pack_size / 16; i += 1, j += 4)
1193  {
1194    u32 data[4];
1195
1196    data[0] = esalt_bufs[DIGESTS_OFFSET].data[j + 0];
1197    data[1] = esalt_bufs[DIGESTS_OFFSET].data[j + 1];
1198    data[2] = esalt_bufs[DIGESTS_OFFSET].data[j + 2];
1199    data[3] = esalt_bufs[DIGESTS_OFFSET].data[j + 3];
1200
1201    u32 out[4];
1202
1203    AES128_decrypt (ks, data, out, s_td0, s_td1, s_td2, s_td3, s_td4);
1204
1205    out[0] ^= iv[0];
1206    out[1] ^= iv[1];
1207    out[2] ^= iv[2];
1208    out[3] ^= iv[3];
1209
1210    crc32 = round_crc32_16 (crc32, out, data_left, l_crc32tab);
1211
1212    iv[0] = data[0];
1213    iv[1] = data[1];
1214    iv[2] = data[2];
1215    iv[3] = data[3];
1216
1217    data_left -= 16;
1218  }
1219
1220  const u32 r0 = crc32;
1221  const u32 r1 = 0;
1222  const u32 r2 = 0;
1223  const u32 r3 = 0;
1224
1225  #define il_pos 0
1226
1227  #ifdef KERNEL_STATIC
1228  #include COMPARE_M
1229  #endif
1230}
1231