1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6#include "inc_vendor.h"
7#include "inc_types.h"
8#include "inc_platform.h"
9#include "inc_common.h"
10#include "inc_hash_md4.h"
11
12// important notes on this:
13// input buf unused bytes needs to be set to zero
14// input buf needs to be in algorithm native byte order (md4 = LE, sha1 = BE, etc)
15// input buf needs to be 64 byte aligned when using md4_update()
16
17DECLSPEC void md4_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest)
18{
19  u32 a = digest[0];
20  u32 b = digest[1];
21  u32 c = digest[2];
22  u32 d = digest[3];
23
24  MD4_STEP_S (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00);
25  MD4_STEP_S (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01);
26  MD4_STEP_S (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02);
27  MD4_STEP_S (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03);
28  MD4_STEP_S (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00);
29  MD4_STEP_S (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01);
30  MD4_STEP_S (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02);
31  MD4_STEP_S (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03);
32  MD4_STEP_S (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00);
33  MD4_STEP_S (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01);
34  MD4_STEP_S (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02);
35  MD4_STEP_S (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03);
36  MD4_STEP_S (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00);
37  MD4_STEP_S (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01);
38  MD4_STEP_S (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02);
39  MD4_STEP_S (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03);
40
41  MD4_STEP_S (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10);
42  MD4_STEP_S (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11);
43  MD4_STEP_S (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12);
44  MD4_STEP_S (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13);
45  MD4_STEP_S (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10);
46  MD4_STEP_S (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11);
47  MD4_STEP_S (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12);
48  MD4_STEP_S (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13);
49  MD4_STEP_S (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10);
50  MD4_STEP_S (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11);
51  MD4_STEP_S (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12);
52  MD4_STEP_S (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13);
53  MD4_STEP_S (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10);
54  MD4_STEP_S (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11);
55  MD4_STEP_S (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12);
56  MD4_STEP_S (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13);
57
58  MD4_STEP_S (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20);
59  MD4_STEP_S (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21);
60  MD4_STEP_S (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22);
61  MD4_STEP_S (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23);
62  MD4_STEP_S (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20);
63  MD4_STEP_S (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21);
64  MD4_STEP_S (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22);
65  MD4_STEP_S (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23);
66  MD4_STEP_S (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20);
67  MD4_STEP_S (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21);
68  MD4_STEP_S (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22);
69  MD4_STEP_S (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23);
70  MD4_STEP_S (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20);
71  MD4_STEP_S (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21);
72  MD4_STEP_S (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
73  MD4_STEP_S (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
74
75  digest[0] += a;
76  digest[1] += b;
77  digest[2] += c;
78  digest[3] += d;
79}
80
81DECLSPEC void md4_init (md4_ctx_t *ctx)
82{
83  ctx->h[0] = MD4M_A;
84  ctx->h[1] = MD4M_B;
85  ctx->h[2] = MD4M_C;
86  ctx->h[3] = MD4M_D;
87
88  ctx->w0[0] = 0;
89  ctx->w0[1] = 0;
90  ctx->w0[2] = 0;
91  ctx->w0[3] = 0;
92  ctx->w1[0] = 0;
93  ctx->w1[1] = 0;
94  ctx->w1[2] = 0;
95  ctx->w1[3] = 0;
96  ctx->w2[0] = 0;
97  ctx->w2[1] = 0;
98  ctx->w2[2] = 0;
99  ctx->w2[3] = 0;
100  ctx->w3[0] = 0;
101  ctx->w3[1] = 0;
102  ctx->w3[2] = 0;
103  ctx->w3[3] = 0;
104
105  ctx->len = 0;
106}
107
108DECLSPEC void md4_update_64 (md4_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
109{
110  if (len == 0) return;
111
112  const int pos = ctx->len & 63;
113
114  ctx->len += len;
115
116  if (pos == 0)
117  {
118    ctx->w0[0] = w0[0];
119    ctx->w0[1] = w0[1];
120    ctx->w0[2] = w0[2];
121    ctx->w0[3] = w0[3];
122    ctx->w1[0] = w1[0];
123    ctx->w1[1] = w1[1];
124    ctx->w1[2] = w1[2];
125    ctx->w1[3] = w1[3];
126    ctx->w2[0] = w2[0];
127    ctx->w2[1] = w2[1];
128    ctx->w2[2] = w2[2];
129    ctx->w2[3] = w2[3];
130    ctx->w3[0] = w3[0];
131    ctx->w3[1] = w3[1];
132    ctx->w3[2] = w3[2];
133    ctx->w3[3] = w3[3];
134
135    if (len == 64)
136    {
137      md4_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
138
139      ctx->w0[0] = 0;
140      ctx->w0[1] = 0;
141      ctx->w0[2] = 0;
142      ctx->w0[3] = 0;
143      ctx->w1[0] = 0;
144      ctx->w1[1] = 0;
145      ctx->w1[2] = 0;
146      ctx->w1[3] = 0;
147      ctx->w2[0] = 0;
148      ctx->w2[1] = 0;
149      ctx->w2[2] = 0;
150      ctx->w2[3] = 0;
151      ctx->w3[0] = 0;
152      ctx->w3[1] = 0;
153      ctx->w3[2] = 0;
154      ctx->w3[3] = 0;
155    }
156  }
157  else
158  {
159    if ((pos + len) < 64)
160    {
161      switch_buffer_by_offset_le_S (w0, w1, w2, w3, pos);
162
163      ctx->w0[0] |= w0[0];
164      ctx->w0[1] |= w0[1];
165      ctx->w0[2] |= w0[2];
166      ctx->w0[3] |= w0[3];
167      ctx->w1[0] |= w1[0];
168      ctx->w1[1] |= w1[1];
169      ctx->w1[2] |= w1[2];
170      ctx->w1[3] |= w1[3];
171      ctx->w2[0] |= w2[0];
172      ctx->w2[1] |= w2[1];
173      ctx->w2[2] |= w2[2];
174      ctx->w2[3] |= w2[3];
175      ctx->w3[0] |= w3[0];
176      ctx->w3[1] |= w3[1];
177      ctx->w3[2] |= w3[2];
178      ctx->w3[3] |= w3[3];
179    }
180    else
181    {
182      u32 c0[4] = { 0 };
183      u32 c1[4] = { 0 };
184      u32 c2[4] = { 0 };
185      u32 c3[4] = { 0 };
186
187      switch_buffer_by_offset_carry_le_S (w0, w1, w2, w3, c0, c1, c2, c3, pos);
188
189      ctx->w0[0] |= w0[0];
190      ctx->w0[1] |= w0[1];
191      ctx->w0[2] |= w0[2];
192      ctx->w0[3] |= w0[3];
193      ctx->w1[0] |= w1[0];
194      ctx->w1[1] |= w1[1];
195      ctx->w1[2] |= w1[2];
196      ctx->w1[3] |= w1[3];
197      ctx->w2[0] |= w2[0];
198      ctx->w2[1] |= w2[1];
199      ctx->w2[2] |= w2[2];
200      ctx->w2[3] |= w2[3];
201      ctx->w3[0] |= w3[0];
202      ctx->w3[1] |= w3[1];
203      ctx->w3[2] |= w3[2];
204      ctx->w3[3] |= w3[3];
205
206      md4_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
207
208      ctx->w0[0] = c0[0];
209      ctx->w0[1] = c0[1];
210      ctx->w0[2] = c0[2];
211      ctx->w0[3] = c0[3];
212      ctx->w1[0] = c1[0];
213      ctx->w1[1] = c1[1];
214      ctx->w1[2] = c1[2];
215      ctx->w1[3] = c1[3];
216      ctx->w2[0] = c2[0];
217      ctx->w2[1] = c2[1];
218      ctx->w2[2] = c2[2];
219      ctx->w2[3] = c2[3];
220      ctx->w3[0] = c3[0];
221      ctx->w3[1] = c3[1];
222      ctx->w3[2] = c3[2];
223      ctx->w3[3] = c3[3];
224    }
225  }
226}
227
228DECLSPEC void md4_update (md4_ctx_t *ctx, const u32 *w, const int len)
229{
230  u32 w0[4];
231  u32 w1[4];
232  u32 w2[4];
233  u32 w3[4];
234
235  int pos1;
236  int pos4;
237
238  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
239  {
240    w0[0] = w[pos4 +  0];
241    w0[1] = w[pos4 +  1];
242    w0[2] = w[pos4 +  2];
243    w0[3] = w[pos4 +  3];
244    w1[0] = w[pos4 +  4];
245    w1[1] = w[pos4 +  5];
246    w1[2] = w[pos4 +  6];
247    w1[3] = w[pos4 +  7];
248    w2[0] = w[pos4 +  8];
249    w2[1] = w[pos4 +  9];
250    w2[2] = w[pos4 + 10];
251    w2[3] = w[pos4 + 11];
252    w3[0] = w[pos4 + 12];
253    w3[1] = w[pos4 + 13];
254    w3[2] = w[pos4 + 14];
255    w3[3] = w[pos4 + 15];
256
257    md4_update_64 (ctx, w0, w1, w2, w3, 64);
258  }
259
260  w0[0] = w[pos4 +  0];
261  w0[1] = w[pos4 +  1];
262  w0[2] = w[pos4 +  2];
263  w0[3] = w[pos4 +  3];
264  w1[0] = w[pos4 +  4];
265  w1[1] = w[pos4 +  5];
266  w1[2] = w[pos4 +  6];
267  w1[3] = w[pos4 +  7];
268  w2[0] = w[pos4 +  8];
269  w2[1] = w[pos4 +  9];
270  w2[2] = w[pos4 + 10];
271  w2[3] = w[pos4 + 11];
272  w3[0] = w[pos4 + 12];
273  w3[1] = w[pos4 + 13];
274  w3[2] = w[pos4 + 14];
275  w3[3] = w[pos4 + 15];
276
277  md4_update_64 (ctx, w0, w1, w2, w3, len - pos1);
278}
279
280DECLSPEC void md4_update_swap (md4_ctx_t *ctx, const u32 *w, const int len)
281{
282  u32 w0[4];
283  u32 w1[4];
284  u32 w2[4];
285  u32 w3[4];
286
287  int pos1;
288  int pos4;
289
290  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
291  {
292    w0[0] = w[pos4 +  0];
293    w0[1] = w[pos4 +  1];
294    w0[2] = w[pos4 +  2];
295    w0[3] = w[pos4 +  3];
296    w1[0] = w[pos4 +  4];
297    w1[1] = w[pos4 +  5];
298    w1[2] = w[pos4 +  6];
299    w1[3] = w[pos4 +  7];
300    w2[0] = w[pos4 +  8];
301    w2[1] = w[pos4 +  9];
302    w2[2] = w[pos4 + 10];
303    w2[3] = w[pos4 + 11];
304    w3[0] = w[pos4 + 12];
305    w3[1] = w[pos4 + 13];
306    w3[2] = w[pos4 + 14];
307    w3[3] = w[pos4 + 15];
308
309    w0[0] = hc_swap32_S (w0[0]);
310    w0[1] = hc_swap32_S (w0[1]);
311    w0[2] = hc_swap32_S (w0[2]);
312    w0[3] = hc_swap32_S (w0[3]);
313    w1[0] = hc_swap32_S (w1[0]);
314    w1[1] = hc_swap32_S (w1[1]);
315    w1[2] = hc_swap32_S (w1[2]);
316    w1[3] = hc_swap32_S (w1[3]);
317    w2[0] = hc_swap32_S (w2[0]);
318    w2[1] = hc_swap32_S (w2[1]);
319    w2[2] = hc_swap32_S (w2[2]);
320    w2[3] = hc_swap32_S (w2[3]);
321    w3[0] = hc_swap32_S (w3[0]);
322    w3[1] = hc_swap32_S (w3[1]);
323    w3[2] = hc_swap32_S (w3[2]);
324    w3[3] = hc_swap32_S (w3[3]);
325
326    md4_update_64 (ctx, w0, w1, w2, w3, 64);
327  }
328
329  w0[0] = w[pos4 +  0];
330  w0[1] = w[pos4 +  1];
331  w0[2] = w[pos4 +  2];
332  w0[3] = w[pos4 +  3];
333  w1[0] = w[pos4 +  4];
334  w1[1] = w[pos4 +  5];
335  w1[2] = w[pos4 +  6];
336  w1[3] = w[pos4 +  7];
337  w2[0] = w[pos4 +  8];
338  w2[1] = w[pos4 +  9];
339  w2[2] = w[pos4 + 10];
340  w2[3] = w[pos4 + 11];
341  w3[0] = w[pos4 + 12];
342  w3[1] = w[pos4 + 13];
343  w3[2] = w[pos4 + 14];
344  w3[3] = w[pos4 + 15];
345
346  w0[0] = hc_swap32_S (w0[0]);
347  w0[1] = hc_swap32_S (w0[1]);
348  w0[2] = hc_swap32_S (w0[2]);
349  w0[3] = hc_swap32_S (w0[3]);
350  w1[0] = hc_swap32_S (w1[0]);
351  w1[1] = hc_swap32_S (w1[1]);
352  w1[2] = hc_swap32_S (w1[2]);
353  w1[3] = hc_swap32_S (w1[3]);
354  w2[0] = hc_swap32_S (w2[0]);
355  w2[1] = hc_swap32_S (w2[1]);
356  w2[2] = hc_swap32_S (w2[2]);
357  w2[3] = hc_swap32_S (w2[3]);
358  w3[0] = hc_swap32_S (w3[0]);
359  w3[1] = hc_swap32_S (w3[1]);
360  w3[2] = hc_swap32_S (w3[2]);
361  w3[3] = hc_swap32_S (w3[3]);
362
363  md4_update_64 (ctx, w0, w1, w2, w3, len - pos1);
364}
365
366DECLSPEC void md4_update_utf16le (md4_ctx_t *ctx, const u32 *w, const int len)
367{
368  if (hc_enc_scan (w, len))
369  {
370    hc_enc_t hc_enc;
371
372    hc_enc_init (&hc_enc);
373
374    while (hc_enc_has_next (&hc_enc, len))
375    {
376      u32 enc_buf[16] = { 0 };
377
378      const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
379
380      md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len);
381    }
382
383    return;
384  }
385
386  u32 w0[4];
387  u32 w1[4];
388  u32 w2[4];
389  u32 w3[4];
390
391  int pos1;
392  int pos4;
393
394  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
395  {
396    w0[0] = w[pos4 + 0];
397    w0[1] = w[pos4 + 1];
398    w0[2] = w[pos4 + 2];
399    w0[3] = w[pos4 + 3];
400    w1[0] = w[pos4 + 4];
401    w1[1] = w[pos4 + 5];
402    w1[2] = w[pos4 + 6];
403    w1[3] = w[pos4 + 7];
404
405    make_utf16le_S (w1, w2, w3);
406    make_utf16le_S (w0, w0, w1);
407
408    md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
409  }
410
411  w0[0] = w[pos4 + 0];
412  w0[1] = w[pos4 + 1];
413  w0[2] = w[pos4 + 2];
414  w0[3] = w[pos4 + 3];
415  w1[0] = w[pos4 + 4];
416  w1[1] = w[pos4 + 5];
417  w1[2] = w[pos4 + 6];
418  w1[3] = w[pos4 + 7];
419
420  make_utf16le_S (w1, w2, w3);
421  make_utf16le_S (w0, w0, w1);
422
423  md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
424}
425
426DECLSPEC void md4_update_utf16le_swap (md4_ctx_t *ctx, const u32 *w, const int len)
427{
428  if (hc_enc_scan (w, len))
429  {
430    hc_enc_t hc_enc;
431
432    hc_enc_init (&hc_enc);
433
434    while (hc_enc_has_next (&hc_enc, len))
435    {
436      u32 enc_buf[16] = { 0 };
437
438      const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
439
440      enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]);
441      enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]);
442      enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]);
443      enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]);
444      enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]);
445      enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]);
446      enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]);
447      enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]);
448      enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]);
449      enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]);
450      enc_buf[10] = hc_swap32_S (enc_buf[10]);
451      enc_buf[11] = hc_swap32_S (enc_buf[11]);
452      enc_buf[12] = hc_swap32_S (enc_buf[12]);
453      enc_buf[13] = hc_swap32_S (enc_buf[13]);
454      enc_buf[14] = hc_swap32_S (enc_buf[14]);
455      enc_buf[15] = hc_swap32_S (enc_buf[15]);
456
457      md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len);
458    }
459
460    return;
461  }
462
463  u32 w0[4];
464  u32 w1[4];
465  u32 w2[4];
466  u32 w3[4];
467
468  int pos1;
469  int pos4;
470
471  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
472  {
473    w0[0] = w[pos4 + 0];
474    w0[1] = w[pos4 + 1];
475    w0[2] = w[pos4 + 2];
476    w0[3] = w[pos4 + 3];
477    w1[0] = w[pos4 + 4];
478    w1[1] = w[pos4 + 5];
479    w1[2] = w[pos4 + 6];
480    w1[3] = w[pos4 + 7];
481
482    make_utf16le_S (w1, w2, w3);
483    make_utf16le_S (w0, w0, w1);
484
485    w0[0] = hc_swap32_S (w0[0]);
486    w0[1] = hc_swap32_S (w0[1]);
487    w0[2] = hc_swap32_S (w0[2]);
488    w0[3] = hc_swap32_S (w0[3]);
489    w1[0] = hc_swap32_S (w1[0]);
490    w1[1] = hc_swap32_S (w1[1]);
491    w1[2] = hc_swap32_S (w1[2]);
492    w1[3] = hc_swap32_S (w1[3]);
493    w2[0] = hc_swap32_S (w2[0]);
494    w2[1] = hc_swap32_S (w2[1]);
495    w2[2] = hc_swap32_S (w2[2]);
496    w2[3] = hc_swap32_S (w2[3]);
497    w3[0] = hc_swap32_S (w3[0]);
498    w3[1] = hc_swap32_S (w3[1]);
499    w3[2] = hc_swap32_S (w3[2]);
500    w3[3] = hc_swap32_S (w3[3]);
501
502    md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
503  }
504
505  w0[0] = w[pos4 + 0];
506  w0[1] = w[pos4 + 1];
507  w0[2] = w[pos4 + 2];
508  w0[3] = w[pos4 + 3];
509  w1[0] = w[pos4 + 4];
510  w1[1] = w[pos4 + 5];
511  w1[2] = w[pos4 + 6];
512  w1[3] = w[pos4 + 7];
513
514  make_utf16le_S (w1, w2, w3);
515  make_utf16le_S (w0, w0, w1);
516
517  w0[0] = hc_swap32_S (w0[0]);
518  w0[1] = hc_swap32_S (w0[1]);
519  w0[2] = hc_swap32_S (w0[2]);
520  w0[3] = hc_swap32_S (w0[3]);
521  w1[0] = hc_swap32_S (w1[0]);
522  w1[1] = hc_swap32_S (w1[1]);
523  w1[2] = hc_swap32_S (w1[2]);
524  w1[3] = hc_swap32_S (w1[3]);
525  w2[0] = hc_swap32_S (w2[0]);
526  w2[1] = hc_swap32_S (w2[1]);
527  w2[2] = hc_swap32_S (w2[2]);
528  w2[3] = hc_swap32_S (w2[3]);
529  w3[0] = hc_swap32_S (w3[0]);
530  w3[1] = hc_swap32_S (w3[1]);
531  w3[2] = hc_swap32_S (w3[2]);
532  w3[3] = hc_swap32_S (w3[3]);
533
534  md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
535}
536
537DECLSPEC void md4_update_global (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
538{
539  u32 w0[4];
540  u32 w1[4];
541  u32 w2[4];
542  u32 w3[4];
543
544  int pos1;
545  int pos4;
546
547  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
548  {
549    w0[0] = w[pos4 +  0];
550    w0[1] = w[pos4 +  1];
551    w0[2] = w[pos4 +  2];
552    w0[3] = w[pos4 +  3];
553    w1[0] = w[pos4 +  4];
554    w1[1] = w[pos4 +  5];
555    w1[2] = w[pos4 +  6];
556    w1[3] = w[pos4 +  7];
557    w2[0] = w[pos4 +  8];
558    w2[1] = w[pos4 +  9];
559    w2[2] = w[pos4 + 10];
560    w2[3] = w[pos4 + 11];
561    w3[0] = w[pos4 + 12];
562    w3[1] = w[pos4 + 13];
563    w3[2] = w[pos4 + 14];
564    w3[3] = w[pos4 + 15];
565
566    md4_update_64 (ctx, w0, w1, w2, w3, 64);
567  }
568
569  w0[0] = w[pos4 +  0];
570  w0[1] = w[pos4 +  1];
571  w0[2] = w[pos4 +  2];
572  w0[3] = w[pos4 +  3];
573  w1[0] = w[pos4 +  4];
574  w1[1] = w[pos4 +  5];
575  w1[2] = w[pos4 +  6];
576  w1[3] = w[pos4 +  7];
577  w2[0] = w[pos4 +  8];
578  w2[1] = w[pos4 +  9];
579  w2[2] = w[pos4 + 10];
580  w2[3] = w[pos4 + 11];
581  w3[0] = w[pos4 + 12];
582  w3[1] = w[pos4 + 13];
583  w3[2] = w[pos4 + 14];
584  w3[3] = w[pos4 + 15];
585
586  md4_update_64 (ctx, w0, w1, w2, w3, len - pos1);
587}
588
589DECLSPEC void md4_update_global_swap (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
590{
591  u32 w0[4];
592  u32 w1[4];
593  u32 w2[4];
594  u32 w3[4];
595
596  int pos1;
597  int pos4;
598
599  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
600  {
601    w0[0] = w[pos4 +  0];
602    w0[1] = w[pos4 +  1];
603    w0[2] = w[pos4 +  2];
604    w0[3] = w[pos4 +  3];
605    w1[0] = w[pos4 +  4];
606    w1[1] = w[pos4 +  5];
607    w1[2] = w[pos4 +  6];
608    w1[3] = w[pos4 +  7];
609    w2[0] = w[pos4 +  8];
610    w2[1] = w[pos4 +  9];
611    w2[2] = w[pos4 + 10];
612    w2[3] = w[pos4 + 11];
613    w3[0] = w[pos4 + 12];
614    w3[1] = w[pos4 + 13];
615    w3[2] = w[pos4 + 14];
616    w3[3] = w[pos4 + 15];
617
618    w0[0] = hc_swap32_S (w0[0]);
619    w0[1] = hc_swap32_S (w0[1]);
620    w0[2] = hc_swap32_S (w0[2]);
621    w0[3] = hc_swap32_S (w0[3]);
622    w1[0] = hc_swap32_S (w1[0]);
623    w1[1] = hc_swap32_S (w1[1]);
624    w1[2] = hc_swap32_S (w1[2]);
625    w1[3] = hc_swap32_S (w1[3]);
626    w2[0] = hc_swap32_S (w2[0]);
627    w2[1] = hc_swap32_S (w2[1]);
628    w2[2] = hc_swap32_S (w2[2]);
629    w2[3] = hc_swap32_S (w2[3]);
630    w3[0] = hc_swap32_S (w3[0]);
631    w3[1] = hc_swap32_S (w3[1]);
632    w3[2] = hc_swap32_S (w3[2]);
633    w3[3] = hc_swap32_S (w3[3]);
634
635    md4_update_64 (ctx, w0, w1, w2, w3, 64);
636  }
637
638  w0[0] = w[pos4 +  0];
639  w0[1] = w[pos4 +  1];
640  w0[2] = w[pos4 +  2];
641  w0[3] = w[pos4 +  3];
642  w1[0] = w[pos4 +  4];
643  w1[1] = w[pos4 +  5];
644  w1[2] = w[pos4 +  6];
645  w1[3] = w[pos4 +  7];
646  w2[0] = w[pos4 +  8];
647  w2[1] = w[pos4 +  9];
648  w2[2] = w[pos4 + 10];
649  w2[3] = w[pos4 + 11];
650  w3[0] = w[pos4 + 12];
651  w3[1] = w[pos4 + 13];
652  w3[2] = w[pos4 + 14];
653  w3[3] = w[pos4 + 15];
654
655  w0[0] = hc_swap32_S (w0[0]);
656  w0[1] = hc_swap32_S (w0[1]);
657  w0[2] = hc_swap32_S (w0[2]);
658  w0[3] = hc_swap32_S (w0[3]);
659  w1[0] = hc_swap32_S (w1[0]);
660  w1[1] = hc_swap32_S (w1[1]);
661  w1[2] = hc_swap32_S (w1[2]);
662  w1[3] = hc_swap32_S (w1[3]);
663  w2[0] = hc_swap32_S (w2[0]);
664  w2[1] = hc_swap32_S (w2[1]);
665  w2[2] = hc_swap32_S (w2[2]);
666  w2[3] = hc_swap32_S (w2[3]);
667  w3[0] = hc_swap32_S (w3[0]);
668  w3[1] = hc_swap32_S (w3[1]);
669  w3[2] = hc_swap32_S (w3[2]);
670  w3[3] = hc_swap32_S (w3[3]);
671
672  md4_update_64 (ctx, w0, w1, w2, w3, len - pos1);
673}
674
675DECLSPEC void md4_update_global_utf16le (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
676{
677  if (hc_enc_scan_global (w, len))
678  {
679    hc_enc_t hc_enc;
680
681    hc_enc_init (&hc_enc);
682
683    while (hc_enc_has_next (&hc_enc, len))
684    {
685      u32 enc_buf[16] = { 0 };
686
687      const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
688
689      md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len);
690    }
691
692    return;
693  }
694
695  u32 w0[4];
696  u32 w1[4];
697  u32 w2[4];
698  u32 w3[4];
699
700  int pos1;
701  int pos4;
702
703  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
704  {
705    w0[0] = w[pos4 + 0];
706    w0[1] = w[pos4 + 1];
707    w0[2] = w[pos4 + 2];
708    w0[3] = w[pos4 + 3];
709    w1[0] = w[pos4 + 4];
710    w1[1] = w[pos4 + 5];
711    w1[2] = w[pos4 + 6];
712    w1[3] = w[pos4 + 7];
713
714    make_utf16le_S (w1, w2, w3);
715    make_utf16le_S (w0, w0, w1);
716
717    md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
718  }
719
720  w0[0] = w[pos4 + 0];
721  w0[1] = w[pos4 + 1];
722  w0[2] = w[pos4 + 2];
723  w0[3] = w[pos4 + 3];
724  w1[0] = w[pos4 + 4];
725  w1[1] = w[pos4 + 5];
726  w1[2] = w[pos4 + 6];
727  w1[3] = w[pos4 + 7];
728
729  make_utf16le_S (w1, w2, w3);
730  make_utf16le_S (w0, w0, w1);
731
732  md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
733}
734
735DECLSPEC void md4_update_global_utf16le_swap (md4_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
736{
737  if (hc_enc_scan_global (w, len))
738  {
739    hc_enc_t hc_enc;
740
741    hc_enc_init (&hc_enc);
742
743    while (hc_enc_has_next (&hc_enc, len))
744    {
745      u32 enc_buf[16] = { 0 };
746
747      const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
748
749      enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]);
750      enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]);
751      enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]);
752      enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]);
753      enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]);
754      enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]);
755      enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]);
756      enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]);
757      enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]);
758      enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]);
759      enc_buf[10] = hc_swap32_S (enc_buf[10]);
760      enc_buf[11] = hc_swap32_S (enc_buf[11]);
761      enc_buf[12] = hc_swap32_S (enc_buf[12]);
762      enc_buf[13] = hc_swap32_S (enc_buf[13]);
763      enc_buf[14] = hc_swap32_S (enc_buf[14]);
764      enc_buf[15] = hc_swap32_S (enc_buf[15]);
765
766      md4_update_64 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_len);
767    }
768
769    return;
770  }
771
772  u32 w0[4];
773  u32 w1[4];
774  u32 w2[4];
775  u32 w3[4];
776
777  int pos1;
778  int pos4;
779
780  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
781  {
782    w0[0] = w[pos4 + 0];
783    w0[1] = w[pos4 + 1];
784    w0[2] = w[pos4 + 2];
785    w0[3] = w[pos4 + 3];
786    w1[0] = w[pos4 + 4];
787    w1[1] = w[pos4 + 5];
788    w1[2] = w[pos4 + 6];
789    w1[3] = w[pos4 + 7];
790
791    make_utf16le_S (w1, w2, w3);
792    make_utf16le_S (w0, w0, w1);
793
794    w0[0] = hc_swap32_S (w0[0]);
795    w0[1] = hc_swap32_S (w0[1]);
796    w0[2] = hc_swap32_S (w0[2]);
797    w0[3] = hc_swap32_S (w0[3]);
798    w1[0] = hc_swap32_S (w1[0]);
799    w1[1] = hc_swap32_S (w1[1]);
800    w1[2] = hc_swap32_S (w1[2]);
801    w1[3] = hc_swap32_S (w1[3]);
802    w2[0] = hc_swap32_S (w2[0]);
803    w2[1] = hc_swap32_S (w2[1]);
804    w2[2] = hc_swap32_S (w2[2]);
805    w2[3] = hc_swap32_S (w2[3]);
806    w3[0] = hc_swap32_S (w3[0]);
807    w3[1] = hc_swap32_S (w3[1]);
808    w3[2] = hc_swap32_S (w3[2]);
809    w3[3] = hc_swap32_S (w3[3]);
810
811    md4_update_64 (ctx, w0, w1, w2, w3, 32 * 2);
812  }
813
814  w0[0] = w[pos4 + 0];
815  w0[1] = w[pos4 + 1];
816  w0[2] = w[pos4 + 2];
817  w0[3] = w[pos4 + 3];
818  w1[0] = w[pos4 + 4];
819  w1[1] = w[pos4 + 5];
820  w1[2] = w[pos4 + 6];
821  w1[3] = w[pos4 + 7];
822
823  make_utf16le_S (w1, w2, w3);
824  make_utf16le_S (w0, w0, w1);
825
826  w0[0] = hc_swap32_S (w0[0]);
827  w0[1] = hc_swap32_S (w0[1]);
828  w0[2] = hc_swap32_S (w0[2]);
829  w0[3] = hc_swap32_S (w0[3]);
830  w1[0] = hc_swap32_S (w1[0]);
831  w1[1] = hc_swap32_S (w1[1]);
832  w1[2] = hc_swap32_S (w1[2]);
833  w1[3] = hc_swap32_S (w1[3]);
834  w2[0] = hc_swap32_S (w2[0]);
835  w2[1] = hc_swap32_S (w2[1]);
836  w2[2] = hc_swap32_S (w2[2]);
837  w2[3] = hc_swap32_S (w2[3]);
838  w3[0] = hc_swap32_S (w3[0]);
839  w3[1] = hc_swap32_S (w3[1]);
840  w3[2] = hc_swap32_S (w3[2]);
841  w3[3] = hc_swap32_S (w3[3]);
842
843  md4_update_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
844}
845
846DECLSPEC void md4_final (md4_ctx_t *ctx)
847{
848  const int pos = ctx->len & 63;
849
850  append_0x80_4x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
851
852  if (pos >= 56)
853  {
854    md4_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
855
856    ctx->w0[0] = 0;
857    ctx->w0[1] = 0;
858    ctx->w0[2] = 0;
859    ctx->w0[3] = 0;
860    ctx->w1[0] = 0;
861    ctx->w1[1] = 0;
862    ctx->w1[2] = 0;
863    ctx->w1[3] = 0;
864    ctx->w2[0] = 0;
865    ctx->w2[1] = 0;
866    ctx->w2[2] = 0;
867    ctx->w2[3] = 0;
868    ctx->w3[0] = 0;
869    ctx->w3[1] = 0;
870    ctx->w3[2] = 0;
871    ctx->w3[3] = 0;
872  }
873
874  ctx->w3[2] = ctx->len * 8;
875  ctx->w3[3] = 0;
876
877  md4_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
878}
879
880// md4_hmac
881
882DECLSPEC void md4_hmac_init_64 (md4_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3)
883{
884  u32 a0[4];
885  u32 a1[4];
886  u32 a2[4];
887  u32 a3[4];
888
889  // ipad
890
891  a0[0] = w0[0] ^ 0x36363636;
892  a0[1] = w0[1] ^ 0x36363636;
893  a0[2] = w0[2] ^ 0x36363636;
894  a0[3] = w0[3] ^ 0x36363636;
895  a1[0] = w1[0] ^ 0x36363636;
896  a1[1] = w1[1] ^ 0x36363636;
897  a1[2] = w1[2] ^ 0x36363636;
898  a1[3] = w1[3] ^ 0x36363636;
899  a2[0] = w2[0] ^ 0x36363636;
900  a2[1] = w2[1] ^ 0x36363636;
901  a2[2] = w2[2] ^ 0x36363636;
902  a2[3] = w2[3] ^ 0x36363636;
903  a3[0] = w3[0] ^ 0x36363636;
904  a3[1] = w3[1] ^ 0x36363636;
905  a3[2] = w3[2] ^ 0x36363636;
906  a3[3] = w3[3] ^ 0x36363636;
907
908  md4_init (&ctx->ipad);
909
910  md4_update_64 (&ctx->ipad, a0, a1, a2, a3, 64);
911
912  // opad
913
914  u32 b0[4];
915  u32 b1[4];
916  u32 b2[4];
917  u32 b3[4];
918
919  b0[0] = w0[0] ^ 0x5c5c5c5c;
920  b0[1] = w0[1] ^ 0x5c5c5c5c;
921  b0[2] = w0[2] ^ 0x5c5c5c5c;
922  b0[3] = w0[3] ^ 0x5c5c5c5c;
923  b1[0] = w1[0] ^ 0x5c5c5c5c;
924  b1[1] = w1[1] ^ 0x5c5c5c5c;
925  b1[2] = w1[2] ^ 0x5c5c5c5c;
926  b1[3] = w1[3] ^ 0x5c5c5c5c;
927  b2[0] = w2[0] ^ 0x5c5c5c5c;
928  b2[1] = w2[1] ^ 0x5c5c5c5c;
929  b2[2] = w2[2] ^ 0x5c5c5c5c;
930  b2[3] = w2[3] ^ 0x5c5c5c5c;
931  b3[0] = w3[0] ^ 0x5c5c5c5c;
932  b3[1] = w3[1] ^ 0x5c5c5c5c;
933  b3[2] = w3[2] ^ 0x5c5c5c5c;
934  b3[3] = w3[3] ^ 0x5c5c5c5c;
935
936  md4_init (&ctx->opad);
937
938  md4_update_64 (&ctx->opad, b0, b1, b2, b3, 64);
939}
940
941DECLSPEC void md4_hmac_init (md4_hmac_ctx_t *ctx, const u32 *w, const int len)
942{
943  u32 w0[4];
944  u32 w1[4];
945  u32 w2[4];
946  u32 w3[4];
947
948  if (len > 64)
949  {
950    md4_ctx_t tmp;
951
952    md4_init (&tmp);
953
954    md4_update (&tmp, w, len);
955
956    md4_final (&tmp);
957
958    w0[0] = tmp.h[0];
959    w0[1] = tmp.h[1];
960    w0[2] = tmp.h[2];
961    w0[3] = tmp.h[3];
962    w1[0] = 0;
963    w1[1] = 0;
964    w1[2] = 0;
965    w1[3] = 0;
966    w2[0] = 0;
967    w2[1] = 0;
968    w2[2] = 0;
969    w2[3] = 0;
970    w3[0] = 0;
971    w3[1] = 0;
972    w3[2] = 0;
973    w3[3] = 0;
974  }
975  else
976  {
977    w0[0] = w[ 0];
978    w0[1] = w[ 1];
979    w0[2] = w[ 2];
980    w0[3] = w[ 3];
981    w1[0] = w[ 4];
982    w1[1] = w[ 5];
983    w1[2] = w[ 6];
984    w1[3] = w[ 7];
985    w2[0] = w[ 8];
986    w2[1] = w[ 9];
987    w2[2] = w[10];
988    w2[3] = w[11];
989    w3[0] = w[12];
990    w3[1] = w[13];
991    w3[2] = w[14];
992    w3[3] = w[15];
993  }
994
995  md4_hmac_init_64 (ctx, w0, w1, w2, w3);
996}
997
998DECLSPEC void md4_hmac_init_swap (md4_hmac_ctx_t *ctx, const u32 *w, const int len)
999{
1000  u32 w0[4];
1001  u32 w1[4];
1002  u32 w2[4];
1003  u32 w3[4];
1004
1005  if (len > 64)
1006  {
1007    md4_ctx_t tmp;
1008
1009    md4_init (&tmp);
1010
1011    md4_update_swap (&tmp, w, len);
1012
1013    md4_final (&tmp);
1014
1015    w0[0] = tmp.h[0];
1016    w0[1] = tmp.h[1];
1017    w0[2] = tmp.h[2];
1018    w0[3] = tmp.h[3];
1019    w1[0] = 0;
1020    w1[1] = 0;
1021    w1[2] = 0;
1022    w1[3] = 0;
1023    w2[0] = 0;
1024    w2[1] = 0;
1025    w2[2] = 0;
1026    w2[3] = 0;
1027    w3[0] = 0;
1028    w3[1] = 0;
1029    w3[2] = 0;
1030    w3[3] = 0;
1031  }
1032  else
1033  {
1034    w0[0] = hc_swap32_S (w[ 0]);
1035    w0[1] = hc_swap32_S (w[ 1]);
1036    w0[2] = hc_swap32_S (w[ 2]);
1037    w0[3] = hc_swap32_S (w[ 3]);
1038    w1[0] = hc_swap32_S (w[ 4]);
1039    w1[1] = hc_swap32_S (w[ 5]);
1040    w1[2] = hc_swap32_S (w[ 6]);
1041    w1[3] = hc_swap32_S (w[ 7]);
1042    w2[0] = hc_swap32_S (w[ 8]);
1043    w2[1] = hc_swap32_S (w[ 9]);
1044    w2[2] = hc_swap32_S (w[10]);
1045    w2[3] = hc_swap32_S (w[11]);
1046    w3[0] = hc_swap32_S (w[12]);
1047    w3[1] = hc_swap32_S (w[13]);
1048    w3[2] = hc_swap32_S (w[14]);
1049    w3[3] = hc_swap32_S (w[15]);
1050  }
1051
1052  md4_hmac_init_64 (ctx, w0, w1, w2, w3);
1053}
1054
1055DECLSPEC void md4_hmac_init_global (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1056{
1057  u32 w0[4];
1058  u32 w1[4];
1059  u32 w2[4];
1060  u32 w3[4];
1061
1062  if (len > 64)
1063  {
1064    md4_ctx_t tmp;
1065
1066    md4_init (&tmp);
1067
1068    md4_update_global (&tmp, w, len);
1069
1070    md4_final (&tmp);
1071
1072    w0[0] = tmp.h[0];
1073    w0[1] = tmp.h[1];
1074    w0[2] = tmp.h[2];
1075    w0[3] = tmp.h[3];
1076    w1[0] = 0;
1077    w1[1] = 0;
1078    w1[2] = 0;
1079    w1[3] = 0;
1080    w2[0] = 0;
1081    w2[1] = 0;
1082    w2[2] = 0;
1083    w2[3] = 0;
1084    w3[0] = 0;
1085    w3[1] = 0;
1086    w3[2] = 0;
1087    w3[3] = 0;
1088  }
1089  else
1090  {
1091    w0[0] = w[ 0];
1092    w0[1] = w[ 1];
1093    w0[2] = w[ 2];
1094    w0[3] = w[ 3];
1095    w1[0] = w[ 4];
1096    w1[1] = w[ 5];
1097    w1[2] = w[ 6];
1098    w1[3] = w[ 7];
1099    w2[0] = w[ 8];
1100    w2[1] = w[ 9];
1101    w2[2] = w[10];
1102    w2[3] = w[11];
1103    w3[0] = w[12];
1104    w3[1] = w[13];
1105    w3[2] = w[14];
1106    w3[3] = w[15];
1107  }
1108
1109  md4_hmac_init_64 (ctx, w0, w1, w2, w3);
1110}
1111
1112DECLSPEC void md4_hmac_init_global_swap (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1113{
1114  u32 w0[4];
1115  u32 w1[4];
1116  u32 w2[4];
1117  u32 w3[4];
1118
1119  if (len > 64)
1120  {
1121    md4_ctx_t tmp;
1122
1123    md4_init (&tmp);
1124
1125    md4_update_global_swap (&tmp, w, len);
1126
1127    md4_final (&tmp);
1128
1129    w0[0] = tmp.h[0];
1130    w0[1] = tmp.h[1];
1131    w0[2] = tmp.h[2];
1132    w0[3] = tmp.h[3];
1133    w1[0] = 0;
1134    w1[1] = 0;
1135    w1[2] = 0;
1136    w1[3] = 0;
1137    w2[0] = 0;
1138    w2[1] = 0;
1139    w2[2] = 0;
1140    w2[3] = 0;
1141    w3[0] = 0;
1142    w3[1] = 0;
1143    w3[2] = 0;
1144    w3[3] = 0;
1145  }
1146  else
1147  {
1148    w0[0] = hc_swap32_S (w[ 0]);
1149    w0[1] = hc_swap32_S (w[ 1]);
1150    w0[2] = hc_swap32_S (w[ 2]);
1151    w0[3] = hc_swap32_S (w[ 3]);
1152    w1[0] = hc_swap32_S (w[ 4]);
1153    w1[1] = hc_swap32_S (w[ 5]);
1154    w1[2] = hc_swap32_S (w[ 6]);
1155    w1[3] = hc_swap32_S (w[ 7]);
1156    w2[0] = hc_swap32_S (w[ 8]);
1157    w2[1] = hc_swap32_S (w[ 9]);
1158    w2[2] = hc_swap32_S (w[10]);
1159    w2[3] = hc_swap32_S (w[11]);
1160    w3[0] = hc_swap32_S (w[12]);
1161    w3[1] = hc_swap32_S (w[13]);
1162    w3[2] = hc_swap32_S (w[14]);
1163    w3[3] = hc_swap32_S (w[15]);
1164  }
1165
1166  md4_hmac_init_64 (ctx, w0, w1, w2, w3);
1167}
1168
1169DECLSPEC void md4_hmac_update_64 (md4_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len)
1170{
1171  md4_update_64 (&ctx->ipad, w0, w1, w2, w3, len);
1172}
1173
1174DECLSPEC void md4_hmac_update (md4_hmac_ctx_t *ctx, const u32 *w, const int len)
1175{
1176  md4_update (&ctx->ipad, w, len);
1177}
1178
1179DECLSPEC void md4_hmac_update_swap (md4_hmac_ctx_t *ctx, const u32 *w, const int len)
1180{
1181  md4_update_swap (&ctx->ipad, w, len);
1182}
1183
1184DECLSPEC void md4_hmac_update_utf16le (md4_hmac_ctx_t *ctx, const u32 *w, const int len)
1185{
1186  md4_update_utf16le (&ctx->ipad, w, len);
1187}
1188
1189DECLSPEC void md4_hmac_update_utf16le_swap (md4_hmac_ctx_t *ctx, const u32 *w, const int len)
1190{
1191  md4_update_utf16le_swap (&ctx->ipad, w, len);
1192}
1193
1194DECLSPEC void md4_hmac_update_global (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1195{
1196  md4_update_global (&ctx->ipad, w, len);
1197}
1198
1199DECLSPEC void md4_hmac_update_global_swap (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1200{
1201  md4_update_global_swap (&ctx->ipad, w, len);
1202}
1203
1204DECLSPEC void md4_hmac_update_global_utf16le (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1205{
1206  md4_update_global_utf16le (&ctx->ipad, w, len);
1207}
1208
1209DECLSPEC void md4_hmac_update_global_utf16le_swap (md4_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1210{
1211  md4_update_global_utf16le_swap (&ctx->ipad, w, len);
1212}
1213
1214DECLSPEC void md4_hmac_final (md4_hmac_ctx_t *ctx)
1215{
1216  md4_final (&ctx->ipad);
1217
1218  ctx->opad.w0[0] = ctx->ipad.h[0];
1219  ctx->opad.w0[1] = ctx->ipad.h[1];
1220  ctx->opad.w0[2] = ctx->ipad.h[2];
1221  ctx->opad.w0[3] = ctx->ipad.h[3];
1222  ctx->opad.w1[0] = 0;
1223  ctx->opad.w1[1] = 0;
1224  ctx->opad.w1[2] = 0;
1225  ctx->opad.w1[3] = 0;
1226  ctx->opad.w2[0] = 0;
1227  ctx->opad.w2[1] = 0;
1228  ctx->opad.w2[2] = 0;
1229  ctx->opad.w2[3] = 0;
1230  ctx->opad.w3[0] = 0;
1231  ctx->opad.w3[1] = 0;
1232  ctx->opad.w3[2] = 0;
1233  ctx->opad.w3[3] = 0;
1234
1235  ctx->opad.len += 16;
1236
1237  md4_final (&ctx->opad);
1238}
1239
1240// while input buf can be a vector datatype, the length of the different elements can not
1241
1242DECLSPEC void md4_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest)
1243{
1244  u32x a = digest[0];
1245  u32x b = digest[1];
1246  u32x c = digest[2];
1247  u32x d = digest[3];
1248
1249  MD4_STEP (MD4_Fo, a, b, c, d, w0[0], MD4C00, MD4S00);
1250  MD4_STEP (MD4_Fo, d, a, b, c, w0[1], MD4C00, MD4S01);
1251  MD4_STEP (MD4_Fo, c, d, a, b, w0[2], MD4C00, MD4S02);
1252  MD4_STEP (MD4_Fo, b, c, d, a, w0[3], MD4C00, MD4S03);
1253  MD4_STEP (MD4_Fo, a, b, c, d, w1[0], MD4C00, MD4S00);
1254  MD4_STEP (MD4_Fo, d, a, b, c, w1[1], MD4C00, MD4S01);
1255  MD4_STEP (MD4_Fo, c, d, a, b, w1[2], MD4C00, MD4S02);
1256  MD4_STEP (MD4_Fo, b, c, d, a, w1[3], MD4C00, MD4S03);
1257  MD4_STEP (MD4_Fo, a, b, c, d, w2[0], MD4C00, MD4S00);
1258  MD4_STEP (MD4_Fo, d, a, b, c, w2[1], MD4C00, MD4S01);
1259  MD4_STEP (MD4_Fo, c, d, a, b, w2[2], MD4C00, MD4S02);
1260  MD4_STEP (MD4_Fo, b, c, d, a, w2[3], MD4C00, MD4S03);
1261  MD4_STEP (MD4_Fo, a, b, c, d, w3[0], MD4C00, MD4S00);
1262  MD4_STEP (MD4_Fo, d, a, b, c, w3[1], MD4C00, MD4S01);
1263  MD4_STEP (MD4_Fo, c, d, a, b, w3[2], MD4C00, MD4S02);
1264  MD4_STEP (MD4_Fo, b, c, d, a, w3[3], MD4C00, MD4S03);
1265
1266  MD4_STEP (MD4_Go, a, b, c, d, w0[0], MD4C01, MD4S10);
1267  MD4_STEP (MD4_Go, d, a, b, c, w1[0], MD4C01, MD4S11);
1268  MD4_STEP (MD4_Go, c, d, a, b, w2[0], MD4C01, MD4S12);
1269  MD4_STEP (MD4_Go, b, c, d, a, w3[0], MD4C01, MD4S13);
1270  MD4_STEP (MD4_Go, a, b, c, d, w0[1], MD4C01, MD4S10);
1271  MD4_STEP (MD4_Go, d, a, b, c, w1[1], MD4C01, MD4S11);
1272  MD4_STEP (MD4_Go, c, d, a, b, w2[1], MD4C01, MD4S12);
1273  MD4_STEP (MD4_Go, b, c, d, a, w3[1], MD4C01, MD4S13);
1274  MD4_STEP (MD4_Go, a, b, c, d, w0[2], MD4C01, MD4S10);
1275  MD4_STEP (MD4_Go, d, a, b, c, w1[2], MD4C01, MD4S11);
1276  MD4_STEP (MD4_Go, c, d, a, b, w2[2], MD4C01, MD4S12);
1277  MD4_STEP (MD4_Go, b, c, d, a, w3[2], MD4C01, MD4S13);
1278  MD4_STEP (MD4_Go, a, b, c, d, w0[3], MD4C01, MD4S10);
1279  MD4_STEP (MD4_Go, d, a, b, c, w1[3], MD4C01, MD4S11);
1280  MD4_STEP (MD4_Go, c, d, a, b, w2[3], MD4C01, MD4S12);
1281  MD4_STEP (MD4_Go, b, c, d, a, w3[3], MD4C01, MD4S13);
1282
1283  MD4_STEP (MD4_H , a, b, c, d, w0[0], MD4C02, MD4S20);
1284  MD4_STEP (MD4_H , d, a, b, c, w2[0], MD4C02, MD4S21);
1285  MD4_STEP (MD4_H , c, d, a, b, w1[0], MD4C02, MD4S22);
1286  MD4_STEP (MD4_H , b, c, d, a, w3[0], MD4C02, MD4S23);
1287  MD4_STEP (MD4_H , a, b, c, d, w0[2], MD4C02, MD4S20);
1288  MD4_STEP (MD4_H , d, a, b, c, w2[2], MD4C02, MD4S21);
1289  MD4_STEP (MD4_H , c, d, a, b, w1[2], MD4C02, MD4S22);
1290  MD4_STEP (MD4_H , b, c, d, a, w3[2], MD4C02, MD4S23);
1291  MD4_STEP (MD4_H , a, b, c, d, w0[1], MD4C02, MD4S20);
1292  MD4_STEP (MD4_H , d, a, b, c, w2[1], MD4C02, MD4S21);
1293  MD4_STEP (MD4_H , c, d, a, b, w1[1], MD4C02, MD4S22);
1294  MD4_STEP (MD4_H , b, c, d, a, w3[1], MD4C02, MD4S23);
1295  MD4_STEP (MD4_H , a, b, c, d, w0[3], MD4C02, MD4S20);
1296  MD4_STEP (MD4_H , d, a, b, c, w2[3], MD4C02, MD4S21);
1297  MD4_STEP (MD4_H , c, d, a, b, w1[3], MD4C02, MD4S22);
1298  MD4_STEP (MD4_H , b, c, d, a, w3[3], MD4C02, MD4S23);
1299
1300  digest[0] += a;
1301  digest[1] += b;
1302  digest[2] += c;
1303  digest[3] += d;
1304}
1305
1306DECLSPEC void md4_init_vector (md4_ctx_vector_t *ctx)
1307{
1308  ctx->h[0] = MD4M_A;
1309  ctx->h[1] = MD4M_B;
1310  ctx->h[2] = MD4M_C;
1311  ctx->h[3] = MD4M_D;
1312
1313  ctx->w0[0] = 0;
1314  ctx->w0[1] = 0;
1315  ctx->w0[2] = 0;
1316  ctx->w0[3] = 0;
1317  ctx->w1[0] = 0;
1318  ctx->w1[1] = 0;
1319  ctx->w1[2] = 0;
1320  ctx->w1[3] = 0;
1321  ctx->w2[0] = 0;
1322  ctx->w2[1] = 0;
1323  ctx->w2[2] = 0;
1324  ctx->w2[3] = 0;
1325  ctx->w3[0] = 0;
1326  ctx->w3[1] = 0;
1327  ctx->w3[2] = 0;
1328  ctx->w3[3] = 0;
1329
1330  ctx->len = 0;
1331}
1332
1333DECLSPEC void md4_init_vector_from_scalar (md4_ctx_vector_t *ctx, md4_ctx_t *ctx0)
1334{
1335  ctx->h[0] = ctx0->h[0];
1336  ctx->h[1] = ctx0->h[1];
1337  ctx->h[2] = ctx0->h[2];
1338  ctx->h[3] = ctx0->h[3];
1339
1340  ctx->w0[0] = ctx0->w0[0];
1341  ctx->w0[1] = ctx0->w0[1];
1342  ctx->w0[2] = ctx0->w0[2];
1343  ctx->w0[3] = ctx0->w0[3];
1344  ctx->w1[0] = ctx0->w1[0];
1345  ctx->w1[1] = ctx0->w1[1];
1346  ctx->w1[2] = ctx0->w1[2];
1347  ctx->w1[3] = ctx0->w1[3];
1348  ctx->w2[0] = ctx0->w2[0];
1349  ctx->w2[1] = ctx0->w2[1];
1350  ctx->w2[2] = ctx0->w2[2];
1351  ctx->w2[3] = ctx0->w2[3];
1352  ctx->w3[0] = ctx0->w3[0];
1353  ctx->w3[1] = ctx0->w3[1];
1354  ctx->w3[2] = ctx0->w3[2];
1355  ctx->w3[3] = ctx0->w3[3];
1356
1357  ctx->len = ctx0->len;
1358}
1359
1360DECLSPEC void md4_update_vector_64 (md4_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
1361{
1362  if (len == 0) return;
1363
1364  const int pos = ctx->len & 63;
1365
1366  ctx->len += len;
1367
1368  if (pos == 0)
1369  {
1370    ctx->w0[0] = w0[0];
1371    ctx->w0[1] = w0[1];
1372    ctx->w0[2] = w0[2];
1373    ctx->w0[3] = w0[3];
1374    ctx->w1[0] = w1[0];
1375    ctx->w1[1] = w1[1];
1376    ctx->w1[2] = w1[2];
1377    ctx->w1[3] = w1[3];
1378    ctx->w2[0] = w2[0];
1379    ctx->w2[1] = w2[1];
1380    ctx->w2[2] = w2[2];
1381    ctx->w2[3] = w2[3];
1382    ctx->w3[0] = w3[0];
1383    ctx->w3[1] = w3[1];
1384    ctx->w3[2] = w3[2];
1385    ctx->w3[3] = w3[3];
1386
1387    if (len == 64)
1388    {
1389      md4_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
1390
1391      ctx->w0[0] = 0;
1392      ctx->w0[1] = 0;
1393      ctx->w0[2] = 0;
1394      ctx->w0[3] = 0;
1395      ctx->w1[0] = 0;
1396      ctx->w1[1] = 0;
1397      ctx->w1[2] = 0;
1398      ctx->w1[3] = 0;
1399      ctx->w2[0] = 0;
1400      ctx->w2[1] = 0;
1401      ctx->w2[2] = 0;
1402      ctx->w2[3] = 0;
1403      ctx->w3[0] = 0;
1404      ctx->w3[1] = 0;
1405      ctx->w3[2] = 0;
1406      ctx->w3[3] = 0;
1407    }
1408  }
1409  else
1410  {
1411    if ((pos + len) < 64)
1412    {
1413      switch_buffer_by_offset_le (w0, w1, w2, w3, pos);
1414
1415      ctx->w0[0] |= w0[0];
1416      ctx->w0[1] |= w0[1];
1417      ctx->w0[2] |= w0[2];
1418      ctx->w0[3] |= w0[3];
1419      ctx->w1[0] |= w1[0];
1420      ctx->w1[1] |= w1[1];
1421      ctx->w1[2] |= w1[2];
1422      ctx->w1[3] |= w1[3];
1423      ctx->w2[0] |= w2[0];
1424      ctx->w2[1] |= w2[1];
1425      ctx->w2[2] |= w2[2];
1426      ctx->w2[3] |= w2[3];
1427      ctx->w3[0] |= w3[0];
1428      ctx->w3[1] |= w3[1];
1429      ctx->w3[2] |= w3[2];
1430      ctx->w3[3] |= w3[3];
1431    }
1432    else
1433    {
1434      u32x c0[4] = { 0 };
1435      u32x c1[4] = { 0 };
1436      u32x c2[4] = { 0 };
1437      u32x c3[4] = { 0 };
1438
1439      switch_buffer_by_offset_carry_le (w0, w1, w2, w3, c0, c1, c2, c3, pos);
1440
1441      ctx->w0[0] |= w0[0];
1442      ctx->w0[1] |= w0[1];
1443      ctx->w0[2] |= w0[2];
1444      ctx->w0[3] |= w0[3];
1445      ctx->w1[0] |= w1[0];
1446      ctx->w1[1] |= w1[1];
1447      ctx->w1[2] |= w1[2];
1448      ctx->w1[3] |= w1[3];
1449      ctx->w2[0] |= w2[0];
1450      ctx->w2[1] |= w2[1];
1451      ctx->w2[2] |= w2[2];
1452      ctx->w2[3] |= w2[3];
1453      ctx->w3[0] |= w3[0];
1454      ctx->w3[1] |= w3[1];
1455      ctx->w3[2] |= w3[2];
1456      ctx->w3[3] |= w3[3];
1457
1458      md4_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
1459
1460      ctx->w0[0] = c0[0];
1461      ctx->w0[1] = c0[1];
1462      ctx->w0[2] = c0[2];
1463      ctx->w0[3] = c0[3];
1464      ctx->w1[0] = c1[0];
1465      ctx->w1[1] = c1[1];
1466      ctx->w1[2] = c1[2];
1467      ctx->w1[3] = c1[3];
1468      ctx->w2[0] = c2[0];
1469      ctx->w2[1] = c2[1];
1470      ctx->w2[2] = c2[2];
1471      ctx->w2[3] = c2[3];
1472      ctx->w3[0] = c3[0];
1473      ctx->w3[1] = c3[1];
1474      ctx->w3[2] = c3[2];
1475      ctx->w3[3] = c3[3];
1476    }
1477  }
1478}
1479
1480DECLSPEC void md4_update_vector (md4_ctx_vector_t *ctx, const u32x *w, const int len)
1481{
1482  u32x w0[4];
1483  u32x w1[4];
1484  u32x w2[4];
1485  u32x w3[4];
1486
1487  int pos1;
1488  int pos4;
1489
1490  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
1491  {
1492    w0[0] = w[pos4 +  0];
1493    w0[1] = w[pos4 +  1];
1494    w0[2] = w[pos4 +  2];
1495    w0[3] = w[pos4 +  3];
1496    w1[0] = w[pos4 +  4];
1497    w1[1] = w[pos4 +  5];
1498    w1[2] = w[pos4 +  6];
1499    w1[3] = w[pos4 +  7];
1500    w2[0] = w[pos4 +  8];
1501    w2[1] = w[pos4 +  9];
1502    w2[2] = w[pos4 + 10];
1503    w2[3] = w[pos4 + 11];
1504    w3[0] = w[pos4 + 12];
1505    w3[1] = w[pos4 + 13];
1506    w3[2] = w[pos4 + 14];
1507    w3[3] = w[pos4 + 15];
1508
1509    md4_update_vector_64 (ctx, w0, w1, w2, w3, 64);
1510  }
1511
1512  w0[0] = w[pos4 +  0];
1513  w0[1] = w[pos4 +  1];
1514  w0[2] = w[pos4 +  2];
1515  w0[3] = w[pos4 +  3];
1516  w1[0] = w[pos4 +  4];
1517  w1[1] = w[pos4 +  5];
1518  w1[2] = w[pos4 +  6];
1519  w1[3] = w[pos4 +  7];
1520  w2[0] = w[pos4 +  8];
1521  w2[1] = w[pos4 +  9];
1522  w2[2] = w[pos4 + 10];
1523  w2[3] = w[pos4 + 11];
1524  w3[0] = w[pos4 + 12];
1525  w3[1] = w[pos4 + 13];
1526  w3[2] = w[pos4 + 14];
1527  w3[3] = w[pos4 + 15];
1528
1529  md4_update_vector_64 (ctx, w0, w1, w2, w3, len - pos1);
1530}
1531
1532DECLSPEC void md4_update_vector_swap (md4_ctx_vector_t *ctx, const u32x *w, const int len)
1533{
1534  u32x w0[4];
1535  u32x w1[4];
1536  u32x w2[4];
1537  u32x w3[4];
1538
1539  int pos1;
1540  int pos4;
1541
1542  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
1543  {
1544    w0[0] = w[pos4 +  0];
1545    w0[1] = w[pos4 +  1];
1546    w0[2] = w[pos4 +  2];
1547    w0[3] = w[pos4 +  3];
1548    w1[0] = w[pos4 +  4];
1549    w1[1] = w[pos4 +  5];
1550    w1[2] = w[pos4 +  6];
1551    w1[3] = w[pos4 +  7];
1552    w2[0] = w[pos4 +  8];
1553    w2[1] = w[pos4 +  9];
1554    w2[2] = w[pos4 + 10];
1555    w2[3] = w[pos4 + 11];
1556    w3[0] = w[pos4 + 12];
1557    w3[1] = w[pos4 + 13];
1558    w3[2] = w[pos4 + 14];
1559    w3[3] = w[pos4 + 15];
1560
1561    w0[0] = hc_swap32 (w0[0]);
1562    w0[1] = hc_swap32 (w0[1]);
1563    w0[2] = hc_swap32 (w0[2]);
1564    w0[3] = hc_swap32 (w0[3]);
1565    w1[0] = hc_swap32 (w1[0]);
1566    w1[1] = hc_swap32 (w1[1]);
1567    w1[2] = hc_swap32 (w1[2]);
1568    w1[3] = hc_swap32 (w1[3]);
1569    w2[0] = hc_swap32 (w2[0]);
1570    w2[1] = hc_swap32 (w2[1]);
1571    w2[2] = hc_swap32 (w2[2]);
1572    w2[3] = hc_swap32 (w2[3]);
1573    w3[0] = hc_swap32 (w3[0]);
1574    w3[1] = hc_swap32 (w3[1]);
1575    w3[2] = hc_swap32 (w3[2]);
1576    w3[3] = hc_swap32 (w3[3]);
1577
1578    md4_update_vector_64 (ctx, w0, w1, w2, w3, 64);
1579  }
1580
1581  w0[0] = w[pos4 +  0];
1582  w0[1] = w[pos4 +  1];
1583  w0[2] = w[pos4 +  2];
1584  w0[3] = w[pos4 +  3];
1585  w1[0] = w[pos4 +  4];
1586  w1[1] = w[pos4 +  5];
1587  w1[2] = w[pos4 +  6];
1588  w1[3] = w[pos4 +  7];
1589  w2[0] = w[pos4 +  8];
1590  w2[1] = w[pos4 +  9];
1591  w2[2] = w[pos4 + 10];
1592  w2[3] = w[pos4 + 11];
1593  w3[0] = w[pos4 + 12];
1594  w3[1] = w[pos4 + 13];
1595  w3[2] = w[pos4 + 14];
1596  w3[3] = w[pos4 + 15];
1597
1598  w0[0] = hc_swap32 (w0[0]);
1599  w0[1] = hc_swap32 (w0[1]);
1600  w0[2] = hc_swap32 (w0[2]);
1601  w0[3] = hc_swap32 (w0[3]);
1602  w1[0] = hc_swap32 (w1[0]);
1603  w1[1] = hc_swap32 (w1[1]);
1604  w1[2] = hc_swap32 (w1[2]);
1605  w1[3] = hc_swap32 (w1[3]);
1606  w2[0] = hc_swap32 (w2[0]);
1607  w2[1] = hc_swap32 (w2[1]);
1608  w2[2] = hc_swap32 (w2[2]);
1609  w2[3] = hc_swap32 (w2[3]);
1610  w3[0] = hc_swap32 (w3[0]);
1611  w3[1] = hc_swap32 (w3[1]);
1612  w3[2] = hc_swap32 (w3[2]);
1613  w3[3] = hc_swap32 (w3[3]);
1614
1615  md4_update_vector_64 (ctx, w0, w1, w2, w3, len - pos1);
1616}
1617
1618DECLSPEC void md4_update_vector_utf16le (md4_ctx_vector_t *ctx, const u32x *w, const int len)
1619{
1620  u32x w0[4];
1621  u32x w1[4];
1622  u32x w2[4];
1623  u32x w3[4];
1624
1625  int pos1;
1626  int pos4;
1627
1628  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
1629  {
1630    w0[0] = w[pos4 + 0];
1631    w0[1] = w[pos4 + 1];
1632    w0[2] = w[pos4 + 2];
1633    w0[3] = w[pos4 + 3];
1634    w1[0] = w[pos4 + 4];
1635    w1[1] = w[pos4 + 5];
1636    w1[2] = w[pos4 + 6];
1637    w1[3] = w[pos4 + 7];
1638
1639    make_utf16le (w1, w2, w3);
1640    make_utf16le (w0, w0, w1);
1641
1642    md4_update_vector_64 (ctx, w0, w1, w2, w3, 32 * 2);
1643  }
1644
1645  w0[0] = w[pos4 + 0];
1646  w0[1] = w[pos4 + 1];
1647  w0[2] = w[pos4 + 2];
1648  w0[3] = w[pos4 + 3];
1649  w1[0] = w[pos4 + 4];
1650  w1[1] = w[pos4 + 5];
1651  w1[2] = w[pos4 + 6];
1652  w1[3] = w[pos4 + 7];
1653
1654  make_utf16le (w1, w2, w3);
1655  make_utf16le (w0, w0, w1);
1656
1657  md4_update_vector_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
1658}
1659
1660DECLSPEC void md4_update_vector_utf16le_swap (md4_ctx_vector_t *ctx, const u32x *w, const int len)
1661{
1662  u32x w0[4];
1663  u32x w1[4];
1664  u32x w2[4];
1665  u32x w3[4];
1666
1667  int pos1;
1668  int pos4;
1669
1670  for (pos1 = 0, pos4 = 0; pos1 < len - 32; pos1 += 32, pos4 += 8)
1671  {
1672    w0[0] = w[pos4 + 0];
1673    w0[1] = w[pos4 + 1];
1674    w0[2] = w[pos4 + 2];
1675    w0[3] = w[pos4 + 3];
1676    w1[0] = w[pos4 + 4];
1677    w1[1] = w[pos4 + 5];
1678    w1[2] = w[pos4 + 6];
1679    w1[3] = w[pos4 + 7];
1680
1681    make_utf16le (w1, w2, w3);
1682    make_utf16le (w0, w0, w1);
1683
1684    w0[0] = hc_swap32 (w0[0]);
1685    w0[1] = hc_swap32 (w0[1]);
1686    w0[2] = hc_swap32 (w0[2]);
1687    w0[3] = hc_swap32 (w0[3]);
1688    w1[0] = hc_swap32 (w1[0]);
1689    w1[1] = hc_swap32 (w1[1]);
1690    w1[2] = hc_swap32 (w1[2]);
1691    w1[3] = hc_swap32 (w1[3]);
1692    w2[0] = hc_swap32 (w2[0]);
1693    w2[1] = hc_swap32 (w2[1]);
1694    w2[2] = hc_swap32 (w2[2]);
1695    w2[3] = hc_swap32 (w2[3]);
1696    w3[0] = hc_swap32 (w3[0]);
1697    w3[1] = hc_swap32 (w3[1]);
1698    w3[2] = hc_swap32 (w3[2]);
1699    w3[3] = hc_swap32 (w3[3]);
1700
1701    md4_update_vector_64 (ctx, w0, w1, w2, w3, 32 * 2);
1702  }
1703
1704  w0[0] = w[pos4 + 0];
1705  w0[1] = w[pos4 + 1];
1706  w0[2] = w[pos4 + 2];
1707  w0[3] = w[pos4 + 3];
1708  w1[0] = w[pos4 + 4];
1709  w1[1] = w[pos4 + 5];
1710  w1[2] = w[pos4 + 6];
1711  w1[3] = w[pos4 + 7];
1712
1713  make_utf16le (w1, w2, w3);
1714  make_utf16le (w0, w0, w1);
1715
1716  w0[0] = hc_swap32 (w0[0]);
1717  w0[1] = hc_swap32 (w0[1]);
1718  w0[2] = hc_swap32 (w0[2]);
1719  w0[3] = hc_swap32 (w0[3]);
1720  w1[0] = hc_swap32 (w1[0]);
1721  w1[1] = hc_swap32 (w1[1]);
1722  w1[2] = hc_swap32 (w1[2]);
1723  w1[3] = hc_swap32 (w1[3]);
1724  w2[0] = hc_swap32 (w2[0]);
1725  w2[1] = hc_swap32 (w2[1]);
1726  w2[2] = hc_swap32 (w2[2]);
1727  w2[3] = hc_swap32 (w2[3]);
1728  w3[0] = hc_swap32 (w3[0]);
1729  w3[1] = hc_swap32 (w3[1]);
1730  w3[2] = hc_swap32 (w3[2]);
1731  w3[3] = hc_swap32 (w3[3]);
1732
1733  md4_update_vector_64 (ctx, w0, w1, w2, w3, (len - pos1) * 2);
1734}
1735
1736DECLSPEC void md4_final_vector (md4_ctx_vector_t *ctx)
1737{
1738  const int pos = ctx->len & 63;
1739
1740  append_0x80_4x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, pos);
1741
1742  if (pos >= 56)
1743  {
1744    md4_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
1745
1746    ctx->w0[0] = 0;
1747    ctx->w0[1] = 0;
1748    ctx->w0[2] = 0;
1749    ctx->w0[3] = 0;
1750    ctx->w1[0] = 0;
1751    ctx->w1[1] = 0;
1752    ctx->w1[2] = 0;
1753    ctx->w1[3] = 0;
1754    ctx->w2[0] = 0;
1755    ctx->w2[1] = 0;
1756    ctx->w2[2] = 0;
1757    ctx->w2[3] = 0;
1758    ctx->w3[0] = 0;
1759    ctx->w3[1] = 0;
1760    ctx->w3[2] = 0;
1761    ctx->w3[3] = 0;
1762  }
1763
1764  ctx->w3[2] = ctx->len * 8;
1765  ctx->w3[3] = 0;
1766
1767  md4_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h);
1768}
1769
1770// HMAC + Vector
1771
1772DECLSPEC void md4_hmac_init_vector_64 (md4_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3)
1773{
1774  u32x a0[4];
1775  u32x a1[4];
1776  u32x a2[4];
1777  u32x a3[4];
1778
1779  // ipad
1780
1781  a0[0] = w0[0] ^ 0x36363636;
1782  a0[1] = w0[1] ^ 0x36363636;
1783  a0[2] = w0[2] ^ 0x36363636;
1784  a0[3] = w0[3] ^ 0x36363636;
1785  a1[0] = w1[0] ^ 0x36363636;
1786  a1[1] = w1[1] ^ 0x36363636;
1787  a1[2] = w1[2] ^ 0x36363636;
1788  a1[3] = w1[3] ^ 0x36363636;
1789  a2[0] = w2[0] ^ 0x36363636;
1790  a2[1] = w2[1] ^ 0x36363636;
1791  a2[2] = w2[2] ^ 0x36363636;
1792  a2[3] = w2[3] ^ 0x36363636;
1793  a3[0] = w3[0] ^ 0x36363636;
1794  a3[1] = w3[1] ^ 0x36363636;
1795  a3[2] = w3[2] ^ 0x36363636;
1796  a3[3] = w3[3] ^ 0x36363636;
1797
1798  md4_init_vector (&ctx->ipad);
1799
1800  md4_update_vector_64 (&ctx->ipad, a0, a1, a2, a3, 64);
1801
1802  // opad
1803
1804  u32x b0[4];
1805  u32x b1[4];
1806  u32x b2[4];
1807  u32x b3[4];
1808
1809  b0[0] = w0[0] ^ 0x5c5c5c5c;
1810  b0[1] = w0[1] ^ 0x5c5c5c5c;
1811  b0[2] = w0[2] ^ 0x5c5c5c5c;
1812  b0[3] = w0[3] ^ 0x5c5c5c5c;
1813  b1[0] = w1[0] ^ 0x5c5c5c5c;
1814  b1[1] = w1[1] ^ 0x5c5c5c5c;
1815  b1[2] = w1[2] ^ 0x5c5c5c5c;
1816  b1[3] = w1[3] ^ 0x5c5c5c5c;
1817  b2[0] = w2[0] ^ 0x5c5c5c5c;
1818  b2[1] = w2[1] ^ 0x5c5c5c5c;
1819  b2[2] = w2[2] ^ 0x5c5c5c5c;
1820  b2[3] = w2[3] ^ 0x5c5c5c5c;
1821  b3[0] = w3[0] ^ 0x5c5c5c5c;
1822  b3[1] = w3[1] ^ 0x5c5c5c5c;
1823  b3[2] = w3[2] ^ 0x5c5c5c5c;
1824  b3[3] = w3[3] ^ 0x5c5c5c5c;
1825
1826  md4_init_vector (&ctx->opad);
1827
1828  md4_update_vector_64 (&ctx->opad, b0, b1, b2, b3, 64);
1829}
1830
1831DECLSPEC void md4_hmac_init_vector (md4_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
1832{
1833  u32x w0[4];
1834  u32x w1[4];
1835  u32x w2[4];
1836  u32x w3[4];
1837
1838  if (len > 64)
1839  {
1840    md4_ctx_vector_t tmp;
1841
1842    md4_init_vector (&tmp);
1843
1844    md4_update_vector (&tmp, w, len);
1845
1846    md4_final_vector (&tmp);
1847
1848    w0[0] = tmp.h[0];
1849    w0[1] = tmp.h[1];
1850    w0[2] = tmp.h[2];
1851    w0[3] = tmp.h[3];
1852    w1[0] = 0;
1853    w1[1] = 0;
1854    w1[2] = 0;
1855    w1[3] = 0;
1856    w2[0] = 0;
1857    w2[1] = 0;
1858    w2[2] = 0;
1859    w2[3] = 0;
1860    w3[0] = 0;
1861    w3[1] = 0;
1862    w3[2] = 0;
1863    w3[3] = 0;
1864  }
1865  else
1866  {
1867    w0[0] = w[ 0];
1868    w0[1] = w[ 1];
1869    w0[2] = w[ 2];
1870    w0[3] = w[ 3];
1871    w1[0] = w[ 4];
1872    w1[1] = w[ 5];
1873    w1[2] = w[ 6];
1874    w1[3] = w[ 7];
1875    w2[0] = w[ 8];
1876    w2[1] = w[ 9];
1877    w2[2] = w[10];
1878    w2[3] = w[11];
1879    w3[0] = w[12];
1880    w3[1] = w[13];
1881    w3[2] = w[14];
1882    w3[3] = w[15];
1883  }
1884
1885  md4_hmac_init_vector_64 (ctx, w0, w1, w2, w3);
1886}
1887
1888DECLSPEC void md4_hmac_update_vector_64 (md4_hmac_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len)
1889{
1890  md4_update_vector_64 (&ctx->ipad, w0, w1, w2, w3, len);
1891}
1892
1893DECLSPEC void md4_hmac_update_vector (md4_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
1894{
1895  md4_update_vector (&ctx->ipad, w, len);
1896}
1897
1898DECLSPEC void md4_hmac_final_vector (md4_hmac_ctx_vector_t *ctx)
1899{
1900  md4_final_vector (&ctx->ipad);
1901
1902  ctx->opad.w0[0] = ctx->ipad.h[0];
1903  ctx->opad.w0[1] = ctx->ipad.h[1];
1904  ctx->opad.w0[2] = ctx->ipad.h[2];
1905  ctx->opad.w0[3] = ctx->ipad.h[3];
1906  ctx->opad.w1[0] = 0;
1907  ctx->opad.w1[1] = 0;
1908  ctx->opad.w1[2] = 0;
1909  ctx->opad.w1[3] = 0;
1910  ctx->opad.w2[0] = 0;
1911  ctx->opad.w2[1] = 0;
1912  ctx->opad.w2[2] = 0;
1913  ctx->opad.w2[3] = 0;
1914  ctx->opad.w3[0] = 0;
1915  ctx->opad.w3[1] = 0;
1916  ctx->opad.w3[2] = 0;
1917  ctx->opad.w3[3] = 0;
1918
1919  ctx->opad.len += 16;
1920
1921  md4_final_vector (&ctx->opad);
1922}
1923