1/**
2 * Author......: See docs/credits.txt
3 * License.....: MIT
4 */
5
6#include "inc_vendor.h"
7#include "inc_types.h"
8#include "inc_platform.h"
9#include "inc_common.h"
10#include "inc_hash_sha512.h"
11
12CONSTANT_VK u64a k_sha512[80] =
13{
14  SHA512C00, SHA512C01, SHA512C02, SHA512C03,
15  SHA512C04, SHA512C05, SHA512C06, SHA512C07,
16  SHA512C08, SHA512C09, SHA512C0a, SHA512C0b,
17  SHA512C0c, SHA512C0d, SHA512C0e, SHA512C0f,
18  SHA512C10, SHA512C11, SHA512C12, SHA512C13,
19  SHA512C14, SHA512C15, SHA512C16, SHA512C17,
20  SHA512C18, SHA512C19, SHA512C1a, SHA512C1b,
21  SHA512C1c, SHA512C1d, SHA512C1e, SHA512C1f,
22  SHA512C20, SHA512C21, SHA512C22, SHA512C23,
23  SHA512C24, SHA512C25, SHA512C26, SHA512C27,
24  SHA512C28, SHA512C29, SHA512C2a, SHA512C2b,
25  SHA512C2c, SHA512C2d, SHA512C2e, SHA512C2f,
26  SHA512C30, SHA512C31, SHA512C32, SHA512C33,
27  SHA512C34, SHA512C35, SHA512C36, SHA512C37,
28  SHA512C38, SHA512C39, SHA512C3a, SHA512C3b,
29  SHA512C3c, SHA512C3d, SHA512C3e, SHA512C3f,
30  SHA512C40, SHA512C41, SHA512C42, SHA512C43,
31  SHA512C44, SHA512C45, SHA512C46, SHA512C47,
32  SHA512C48, SHA512C49, SHA512C4a, SHA512C4b,
33  SHA512C4c, SHA512C4d, SHA512C4e, SHA512C4f,
34};
35
36// important notes on this:
37// input buf unused bytes needs to be set to zero
38// input buf needs to be in algorithm native byte order (md5 = LE, sha1 = BE, etc)
39// input buf needs to be 128 byte aligned when using sha512_update()
40
41DECLSPEC void sha512_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, const u32 *w4, const u32 *w5, const u32 *w6, const u32 *w7, u64 *digest)
42{
43  u64 a = digest[0];
44  u64 b = digest[1];
45  u64 c = digest[2];
46  u64 d = digest[3];
47  u64 e = digest[4];
48  u64 f = digest[5];
49  u64 g = digest[6];
50  u64 h = digest[7];
51
52  u64 w0_t = hl32_to_64_S (w0[0], w0[1]);
53  u64 w1_t = hl32_to_64_S (w0[2], w0[3]);
54  u64 w2_t = hl32_to_64_S (w1[0], w1[1]);
55  u64 w3_t = hl32_to_64_S (w1[2], w1[3]);
56  u64 w4_t = hl32_to_64_S (w2[0], w2[1]);
57  u64 w5_t = hl32_to_64_S (w2[2], w2[3]);
58  u64 w6_t = hl32_to_64_S (w3[0], w3[1]);
59  u64 w7_t = hl32_to_64_S (w3[2], w3[3]);
60  u64 w8_t = hl32_to_64_S (w4[0], w4[1]);
61  u64 w9_t = hl32_to_64_S (w4[2], w4[3]);
62  u64 wa_t = hl32_to_64_S (w5[0], w5[1]);
63  u64 wb_t = hl32_to_64_S (w5[2], w5[3]);
64  u64 wc_t = hl32_to_64_S (w6[0], w6[1]);
65  u64 wd_t = hl32_to_64_S (w6[2], w6[3]);
66  u64 we_t = hl32_to_64_S (w7[0], w7[1]);
67  u64 wf_t = hl32_to_64_S (w7[2], w7[3]);
68
69  #define ROUND_EXPAND_S()                            \
70  {                                                   \
71    w0_t = SHA512_EXPAND_S (we_t, w9_t, w1_t, w0_t);  \
72    w1_t = SHA512_EXPAND_S (wf_t, wa_t, w2_t, w1_t);  \
73    w2_t = SHA512_EXPAND_S (w0_t, wb_t, w3_t, w2_t);  \
74    w3_t = SHA512_EXPAND_S (w1_t, wc_t, w4_t, w3_t);  \
75    w4_t = SHA512_EXPAND_S (w2_t, wd_t, w5_t, w4_t);  \
76    w5_t = SHA512_EXPAND_S (w3_t, we_t, w6_t, w5_t);  \
77    w6_t = SHA512_EXPAND_S (w4_t, wf_t, w7_t, w6_t);  \
78    w7_t = SHA512_EXPAND_S (w5_t, w0_t, w8_t, w7_t);  \
79    w8_t = SHA512_EXPAND_S (w6_t, w1_t, w9_t, w8_t);  \
80    w9_t = SHA512_EXPAND_S (w7_t, w2_t, wa_t, w9_t);  \
81    wa_t = SHA512_EXPAND_S (w8_t, w3_t, wb_t, wa_t);  \
82    wb_t = SHA512_EXPAND_S (w9_t, w4_t, wc_t, wb_t);  \
83    wc_t = SHA512_EXPAND_S (wa_t, w5_t, wd_t, wc_t);  \
84    wd_t = SHA512_EXPAND_S (wb_t, w6_t, we_t, wd_t);  \
85    we_t = SHA512_EXPAND_S (wc_t, w7_t, wf_t, we_t);  \
86    wf_t = SHA512_EXPAND_S (wd_t, w8_t, w0_t, wf_t);  \
87  }
88
89  #define ROUND_STEP_S(i)                                                                   \
90  {                                                                                         \
91    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i +  0]); \
92    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i +  1]); \
93    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i +  2]); \
94    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i +  3]); \
95    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i +  4]); \
96    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i +  5]); \
97    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i +  6]); \
98    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i +  7]); \
99    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i +  8]); \
100    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i +  9]); \
101    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
102    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
103    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
104    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
105    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
106    SHA512_STEP_S (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
107  }
108
109  ROUND_STEP_S (0);
110
111  #ifdef _unroll
112  #pragma unroll
113  #endif
114  for (int i = 16; i < 80; i += 16)
115  {
116    ROUND_EXPAND_S (); ROUND_STEP_S (i);
117  }
118
119  #undef ROUND_EXPAND_S
120  #undef ROUND_STEP_S
121
122  digest[0] += a;
123  digest[1] += b;
124  digest[2] += c;
125  digest[3] += d;
126  digest[4] += e;
127  digest[5] += f;
128  digest[6] += g;
129  digest[7] += h;
130}
131
132DECLSPEC void sha512_init (sha512_ctx_t *ctx)
133{
134  ctx->h[0] = SHA512M_A;
135  ctx->h[1] = SHA512M_B;
136  ctx->h[2] = SHA512M_C;
137  ctx->h[3] = SHA512M_D;
138  ctx->h[4] = SHA512M_E;
139  ctx->h[5] = SHA512M_F;
140  ctx->h[6] = SHA512M_G;
141  ctx->h[7] = SHA512M_H;
142
143  ctx->w0[0] = 0;
144  ctx->w0[1] = 0;
145  ctx->w0[2] = 0;
146  ctx->w0[3] = 0;
147  ctx->w1[0] = 0;
148  ctx->w1[1] = 0;
149  ctx->w1[2] = 0;
150  ctx->w1[3] = 0;
151  ctx->w2[0] = 0;
152  ctx->w2[1] = 0;
153  ctx->w2[2] = 0;
154  ctx->w2[3] = 0;
155  ctx->w3[0] = 0;
156  ctx->w3[1] = 0;
157  ctx->w3[2] = 0;
158  ctx->w3[3] = 0;
159  ctx->w4[0] = 0;
160  ctx->w4[1] = 0;
161  ctx->w4[2] = 0;
162  ctx->w4[3] = 0;
163  ctx->w5[0] = 0;
164  ctx->w5[1] = 0;
165  ctx->w5[2] = 0;
166  ctx->w5[3] = 0;
167  ctx->w6[0] = 0;
168  ctx->w6[1] = 0;
169  ctx->w6[2] = 0;
170  ctx->w6[3] = 0;
171  ctx->w7[0] = 0;
172  ctx->w7[1] = 0;
173  ctx->w7[2] = 0;
174  ctx->w7[3] = 0;
175
176  ctx->len = 0;
177}
178
179DECLSPEC void sha512_update_128 (sha512_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len)
180{
181  if (len == 0) return;
182
183  const int pos = ctx->len & 127;
184
185  ctx->len += len;
186
187  if (pos == 0)
188  {
189    ctx->w0[0] = w0[0];
190    ctx->w0[1] = w0[1];
191    ctx->w0[2] = w0[2];
192    ctx->w0[3] = w0[3];
193    ctx->w1[0] = w1[0];
194    ctx->w1[1] = w1[1];
195    ctx->w1[2] = w1[2];
196    ctx->w1[3] = w1[3];
197    ctx->w2[0] = w2[0];
198    ctx->w2[1] = w2[1];
199    ctx->w2[2] = w2[2];
200    ctx->w2[3] = w2[3];
201    ctx->w3[0] = w3[0];
202    ctx->w3[1] = w3[1];
203    ctx->w3[2] = w3[2];
204    ctx->w3[3] = w3[3];
205    ctx->w4[0] = w4[0];
206    ctx->w4[1] = w4[1];
207    ctx->w4[2] = w4[2];
208    ctx->w4[3] = w4[3];
209    ctx->w5[0] = w5[0];
210    ctx->w5[1] = w5[1];
211    ctx->w5[2] = w5[2];
212    ctx->w5[3] = w5[3];
213    ctx->w6[0] = w6[0];
214    ctx->w6[1] = w6[1];
215    ctx->w6[2] = w6[2];
216    ctx->w6[3] = w6[3];
217    ctx->w7[0] = w7[0];
218    ctx->w7[1] = w7[1];
219    ctx->w7[2] = w7[2];
220    ctx->w7[3] = w7[3];
221
222    if (len == 128)
223    {
224      sha512_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
225
226      ctx->w0[0] = 0;
227      ctx->w0[1] = 0;
228      ctx->w0[2] = 0;
229      ctx->w0[3] = 0;
230      ctx->w1[0] = 0;
231      ctx->w1[1] = 0;
232      ctx->w1[2] = 0;
233      ctx->w1[3] = 0;
234      ctx->w2[0] = 0;
235      ctx->w2[1] = 0;
236      ctx->w2[2] = 0;
237      ctx->w2[3] = 0;
238      ctx->w3[0] = 0;
239      ctx->w3[1] = 0;
240      ctx->w3[2] = 0;
241      ctx->w3[3] = 0;
242      ctx->w4[0] = 0;
243      ctx->w4[1] = 0;
244      ctx->w4[2] = 0;
245      ctx->w4[3] = 0;
246      ctx->w5[0] = 0;
247      ctx->w5[1] = 0;
248      ctx->w5[2] = 0;
249      ctx->w5[3] = 0;
250      ctx->w6[0] = 0;
251      ctx->w6[1] = 0;
252      ctx->w6[2] = 0;
253      ctx->w6[3] = 0;
254      ctx->w7[0] = 0;
255      ctx->w7[1] = 0;
256      ctx->w7[2] = 0;
257      ctx->w7[3] = 0;
258    }
259  }
260  else
261  {
262    if ((pos + len) < 128)
263    {
264      switch_buffer_by_offset_8x4_be_S (w0, w1, w2, w3, w4, w5, w6, w7, pos);
265
266      ctx->w0[0] |= w0[0];
267      ctx->w0[1] |= w0[1];
268      ctx->w0[2] |= w0[2];
269      ctx->w0[3] |= w0[3];
270      ctx->w1[0] |= w1[0];
271      ctx->w1[1] |= w1[1];
272      ctx->w1[2] |= w1[2];
273      ctx->w1[3] |= w1[3];
274      ctx->w2[0] |= w2[0];
275      ctx->w2[1] |= w2[1];
276      ctx->w2[2] |= w2[2];
277      ctx->w2[3] |= w2[3];
278      ctx->w3[0] |= w3[0];
279      ctx->w3[1] |= w3[1];
280      ctx->w3[2] |= w3[2];
281      ctx->w3[3] |= w3[3];
282      ctx->w4[0] |= w4[0];
283      ctx->w4[1] |= w4[1];
284      ctx->w4[2] |= w4[2];
285      ctx->w4[3] |= w4[3];
286      ctx->w5[0] |= w5[0];
287      ctx->w5[1] |= w5[1];
288      ctx->w5[2] |= w5[2];
289      ctx->w5[3] |= w5[3];
290      ctx->w6[0] |= w6[0];
291      ctx->w6[1] |= w6[1];
292      ctx->w6[2] |= w6[2];
293      ctx->w6[3] |= w6[3];
294      ctx->w7[0] |= w7[0];
295      ctx->w7[1] |= w7[1];
296      ctx->w7[2] |= w7[2];
297      ctx->w7[3] |= w7[3];
298    }
299    else
300    {
301      u32 c0[4] = { 0 };
302      u32 c1[4] = { 0 };
303      u32 c2[4] = { 0 };
304      u32 c3[4] = { 0 };
305      u32 c4[4] = { 0 };
306      u32 c5[4] = { 0 };
307      u32 c6[4] = { 0 };
308      u32 c7[4] = { 0 };
309
310      switch_buffer_by_offset_8x4_carry_be_S (w0, w1, w2, w3, w4, w5, w6, w7, c0, c1, c2, c3, c4, c5, c6, c7, pos);
311
312      ctx->w0[0] |= w0[0];
313      ctx->w0[1] |= w0[1];
314      ctx->w0[2] |= w0[2];
315      ctx->w0[3] |= w0[3];
316      ctx->w1[0] |= w1[0];
317      ctx->w1[1] |= w1[1];
318      ctx->w1[2] |= w1[2];
319      ctx->w1[3] |= w1[3];
320      ctx->w2[0] |= w2[0];
321      ctx->w2[1] |= w2[1];
322      ctx->w2[2] |= w2[2];
323      ctx->w2[3] |= w2[3];
324      ctx->w3[0] |= w3[0];
325      ctx->w3[1] |= w3[1];
326      ctx->w3[2] |= w3[2];
327      ctx->w3[3] |= w3[3];
328      ctx->w4[0] |= w4[0];
329      ctx->w4[1] |= w4[1];
330      ctx->w4[2] |= w4[2];
331      ctx->w4[3] |= w4[3];
332      ctx->w5[0] |= w5[0];
333      ctx->w5[1] |= w5[1];
334      ctx->w5[2] |= w5[2];
335      ctx->w5[3] |= w5[3];
336      ctx->w6[0] |= w6[0];
337      ctx->w6[1] |= w6[1];
338      ctx->w6[2] |= w6[2];
339      ctx->w6[3] |= w6[3];
340      ctx->w7[0] |= w7[0];
341      ctx->w7[1] |= w7[1];
342      ctx->w7[2] |= w7[2];
343      ctx->w7[3] |= w7[3];
344
345      sha512_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
346
347      ctx->w0[0] = c0[0];
348      ctx->w0[1] = c0[1];
349      ctx->w0[2] = c0[2];
350      ctx->w0[3] = c0[3];
351      ctx->w1[0] = c1[0];
352      ctx->w1[1] = c1[1];
353      ctx->w1[2] = c1[2];
354      ctx->w1[3] = c1[3];
355      ctx->w2[0] = c2[0];
356      ctx->w2[1] = c2[1];
357      ctx->w2[2] = c2[2];
358      ctx->w2[3] = c2[3];
359      ctx->w3[0] = c3[0];
360      ctx->w3[1] = c3[1];
361      ctx->w3[2] = c3[2];
362      ctx->w3[3] = c3[3];
363      ctx->w4[0] = c4[0];
364      ctx->w4[1] = c4[1];
365      ctx->w4[2] = c4[2];
366      ctx->w4[3] = c4[3];
367      ctx->w5[0] = c5[0];
368      ctx->w5[1] = c5[1];
369      ctx->w5[2] = c5[2];
370      ctx->w5[3] = c5[3];
371      ctx->w6[0] = c6[0];
372      ctx->w6[1] = c6[1];
373      ctx->w6[2] = c6[2];
374      ctx->w6[3] = c6[3];
375      ctx->w7[0] = c7[0];
376      ctx->w7[1] = c7[1];
377      ctx->w7[2] = c7[2];
378      ctx->w7[3] = c7[3];
379    }
380  }
381}
382
383DECLSPEC void sha512_update (sha512_ctx_t *ctx, const u32 *w, const int len)
384{
385  u32 w0[4];
386  u32 w1[4];
387  u32 w2[4];
388  u32 w3[4];
389  u32 w4[4];
390  u32 w5[4];
391  u32 w6[4];
392  u32 w7[4];
393
394  int pos1;
395  int pos4;
396
397  for (pos1 = 0, pos4 = 0; pos1 < len - 128; pos1 += 128, pos4 += 32)
398  {
399    w0[0] = w[pos4 +  0];
400    w0[1] = w[pos4 +  1];
401    w0[2] = w[pos4 +  2];
402    w0[3] = w[pos4 +  3];
403    w1[0] = w[pos4 +  4];
404    w1[1] = w[pos4 +  5];
405    w1[2] = w[pos4 +  6];
406    w1[3] = w[pos4 +  7];
407    w2[0] = w[pos4 +  8];
408    w2[1] = w[pos4 +  9];
409    w2[2] = w[pos4 + 10];
410    w2[3] = w[pos4 + 11];
411    w3[0] = w[pos4 + 12];
412    w3[1] = w[pos4 + 13];
413    w3[2] = w[pos4 + 14];
414    w3[3] = w[pos4 + 15];
415    w4[0] = w[pos4 + 16];
416    w4[1] = w[pos4 + 17];
417    w4[2] = w[pos4 + 18];
418    w4[3] = w[pos4 + 19];
419    w5[0] = w[pos4 + 20];
420    w5[1] = w[pos4 + 21];
421    w5[2] = w[pos4 + 22];
422    w5[3] = w[pos4 + 23];
423    w6[0] = w[pos4 + 24];
424    w6[1] = w[pos4 + 25];
425    w6[2] = w[pos4 + 26];
426    w6[3] = w[pos4 + 27];
427    w7[0] = w[pos4 + 28];
428    w7[1] = w[pos4 + 29];
429    w7[2] = w[pos4 + 30];
430    w7[3] = w[pos4 + 31];
431
432    sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
433  }
434
435  w0[0] = w[pos4 +  0];
436  w0[1] = w[pos4 +  1];
437  w0[2] = w[pos4 +  2];
438  w0[3] = w[pos4 +  3];
439  w1[0] = w[pos4 +  4];
440  w1[1] = w[pos4 +  5];
441  w1[2] = w[pos4 +  6];
442  w1[3] = w[pos4 +  7];
443  w2[0] = w[pos4 +  8];
444  w2[1] = w[pos4 +  9];
445  w2[2] = w[pos4 + 10];
446  w2[3] = w[pos4 + 11];
447  w3[0] = w[pos4 + 12];
448  w3[1] = w[pos4 + 13];
449  w3[2] = w[pos4 + 14];
450  w3[3] = w[pos4 + 15];
451  w4[0] = w[pos4 + 16];
452  w4[1] = w[pos4 + 17];
453  w4[2] = w[pos4 + 18];
454  w4[3] = w[pos4 + 19];
455  w5[0] = w[pos4 + 20];
456  w5[1] = w[pos4 + 21];
457  w5[2] = w[pos4 + 22];
458  w5[3] = w[pos4 + 23];
459  w6[0] = w[pos4 + 24];
460  w6[1] = w[pos4 + 25];
461  w6[2] = w[pos4 + 26];
462  w6[3] = w[pos4 + 27];
463  w7[0] = w[pos4 + 28];
464  w7[1] = w[pos4 + 29];
465  w7[2] = w[pos4 + 30];
466  w7[3] = w[pos4 + 31];
467
468  sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - pos1);
469}
470
471DECLSPEC void sha512_update_swap (sha512_ctx_t *ctx, const u32 *w, const int len)
472{
473  u32 w0[4];
474  u32 w1[4];
475  u32 w2[4];
476  u32 w3[4];
477  u32 w4[4];
478  u32 w5[4];
479  u32 w6[4];
480  u32 w7[4];
481
482  int pos1;
483  int pos4;
484
485  for (pos1 = 0, pos4 = 0; pos1 < len - 128; pos1 += 128, pos4 += 32)
486  {
487    w0[0] = w[pos4 +  0];
488    w0[1] = w[pos4 +  1];
489    w0[2] = w[pos4 +  2];
490    w0[3] = w[pos4 +  3];
491    w1[0] = w[pos4 +  4];
492    w1[1] = w[pos4 +  5];
493    w1[2] = w[pos4 +  6];
494    w1[3] = w[pos4 +  7];
495    w2[0] = w[pos4 +  8];
496    w2[1] = w[pos4 +  9];
497    w2[2] = w[pos4 + 10];
498    w2[3] = w[pos4 + 11];
499    w3[0] = w[pos4 + 12];
500    w3[1] = w[pos4 + 13];
501    w3[2] = w[pos4 + 14];
502    w3[3] = w[pos4 + 15];
503    w4[0] = w[pos4 + 16];
504    w4[1] = w[pos4 + 17];
505    w4[2] = w[pos4 + 18];
506    w4[3] = w[pos4 + 19];
507    w5[0] = w[pos4 + 20];
508    w5[1] = w[pos4 + 21];
509    w5[2] = w[pos4 + 22];
510    w5[3] = w[pos4 + 23];
511    w6[0] = w[pos4 + 24];
512    w6[1] = w[pos4 + 25];
513    w6[2] = w[pos4 + 26];
514    w6[3] = w[pos4 + 27];
515    w7[0] = w[pos4 + 28];
516    w7[1] = w[pos4 + 29];
517    w7[2] = w[pos4 + 30];
518    w7[3] = w[pos4 + 31];
519
520    w0[0] = hc_swap32_S (w0[0]);
521    w0[1] = hc_swap32_S (w0[1]);
522    w0[2] = hc_swap32_S (w0[2]);
523    w0[3] = hc_swap32_S (w0[3]);
524    w1[0] = hc_swap32_S (w1[0]);
525    w1[1] = hc_swap32_S (w1[1]);
526    w1[2] = hc_swap32_S (w1[2]);
527    w1[3] = hc_swap32_S (w1[3]);
528    w2[0] = hc_swap32_S (w2[0]);
529    w2[1] = hc_swap32_S (w2[1]);
530    w2[2] = hc_swap32_S (w2[2]);
531    w2[3] = hc_swap32_S (w2[3]);
532    w3[0] = hc_swap32_S (w3[0]);
533    w3[1] = hc_swap32_S (w3[1]);
534    w3[2] = hc_swap32_S (w3[2]);
535    w3[3] = hc_swap32_S (w3[3]);
536    w4[0] = hc_swap32_S (w4[0]);
537    w4[1] = hc_swap32_S (w4[1]);
538    w4[2] = hc_swap32_S (w4[2]);
539    w4[3] = hc_swap32_S (w4[3]);
540    w5[0] = hc_swap32_S (w5[0]);
541    w5[1] = hc_swap32_S (w5[1]);
542    w5[2] = hc_swap32_S (w5[2]);
543    w5[3] = hc_swap32_S (w5[3]);
544    w6[0] = hc_swap32_S (w6[0]);
545    w6[1] = hc_swap32_S (w6[1]);
546    w6[2] = hc_swap32_S (w6[2]);
547    w6[3] = hc_swap32_S (w6[3]);
548    w7[0] = hc_swap32_S (w7[0]);
549    w7[1] = hc_swap32_S (w7[1]);
550    w7[2] = hc_swap32_S (w7[2]);
551    w7[3] = hc_swap32_S (w7[3]);
552
553    sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
554  }
555
556  w0[0] = w[pos4 +  0];
557  w0[1] = w[pos4 +  1];
558  w0[2] = w[pos4 +  2];
559  w0[3] = w[pos4 +  3];
560  w1[0] = w[pos4 +  4];
561  w1[1] = w[pos4 +  5];
562  w1[2] = w[pos4 +  6];
563  w1[3] = w[pos4 +  7];
564  w2[0] = w[pos4 +  8];
565  w2[1] = w[pos4 +  9];
566  w2[2] = w[pos4 + 10];
567  w2[3] = w[pos4 + 11];
568  w3[0] = w[pos4 + 12];
569  w3[1] = w[pos4 + 13];
570  w3[2] = w[pos4 + 14];
571  w3[3] = w[pos4 + 15];
572  w4[0] = w[pos4 + 16];
573  w4[1] = w[pos4 + 17];
574  w4[2] = w[pos4 + 18];
575  w4[3] = w[pos4 + 19];
576  w5[0] = w[pos4 + 20];
577  w5[1] = w[pos4 + 21];
578  w5[2] = w[pos4 + 22];
579  w5[3] = w[pos4 + 23];
580  w6[0] = w[pos4 + 24];
581  w6[1] = w[pos4 + 25];
582  w6[2] = w[pos4 + 26];
583  w6[3] = w[pos4 + 27];
584  w7[0] = w[pos4 + 28];
585  w7[1] = w[pos4 + 29];
586  w7[2] = w[pos4 + 30];
587  w7[3] = w[pos4 + 31];
588
589  w0[0] = hc_swap32_S (w0[0]);
590  w0[1] = hc_swap32_S (w0[1]);
591  w0[2] = hc_swap32_S (w0[2]);
592  w0[3] = hc_swap32_S (w0[3]);
593  w1[0] = hc_swap32_S (w1[0]);
594  w1[1] = hc_swap32_S (w1[1]);
595  w1[2] = hc_swap32_S (w1[2]);
596  w1[3] = hc_swap32_S (w1[3]);
597  w2[0] = hc_swap32_S (w2[0]);
598  w2[1] = hc_swap32_S (w2[1]);
599  w2[2] = hc_swap32_S (w2[2]);
600  w2[3] = hc_swap32_S (w2[3]);
601  w3[0] = hc_swap32_S (w3[0]);
602  w3[1] = hc_swap32_S (w3[1]);
603  w3[2] = hc_swap32_S (w3[2]);
604  w3[3] = hc_swap32_S (w3[3]);
605  w4[0] = hc_swap32_S (w4[0]);
606  w4[1] = hc_swap32_S (w4[1]);
607  w4[2] = hc_swap32_S (w4[2]);
608  w4[3] = hc_swap32_S (w4[3]);
609  w5[0] = hc_swap32_S (w5[0]);
610  w5[1] = hc_swap32_S (w5[1]);
611  w5[2] = hc_swap32_S (w5[2]);
612  w5[3] = hc_swap32_S (w5[3]);
613  w6[0] = hc_swap32_S (w6[0]);
614  w6[1] = hc_swap32_S (w6[1]);
615  w6[2] = hc_swap32_S (w6[2]);
616  w6[3] = hc_swap32_S (w6[3]);
617  w7[0] = hc_swap32_S (w7[0]);
618  w7[1] = hc_swap32_S (w7[1]);
619  w7[2] = hc_swap32_S (w7[2]);
620  w7[3] = hc_swap32_S (w7[3]);
621
622  sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - pos1);
623}
624
625DECLSPEC void sha512_update_utf16le (sha512_ctx_t *ctx, const u32 *w, const int len)
626{
627  if (hc_enc_scan (w, len))
628  {
629    hc_enc_t hc_enc;
630
631    hc_enc_init (&hc_enc);
632
633    while (hc_enc_has_next (&hc_enc, len))
634    {
635      u32 enc_buf[32] = { 0 };
636
637      const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
638
639      sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len);
640    }
641
642    return;
643  }
644
645  u32 w0[4];
646  u32 w1[4];
647  u32 w2[4];
648  u32 w3[4];
649  u32 w4[4];
650  u32 w5[4];
651  u32 w6[4];
652  u32 w7[4];
653
654  int pos1;
655  int pos4;
656
657  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
658  {
659    w0[0] = w[pos4 +  0];
660    w0[1] = w[pos4 +  1];
661    w0[2] = w[pos4 +  2];
662    w0[3] = w[pos4 +  3];
663    w1[0] = w[pos4 +  4];
664    w1[1] = w[pos4 +  5];
665    w1[2] = w[pos4 +  6];
666    w1[3] = w[pos4 +  7];
667    w2[0] = w[pos4 +  8];
668    w2[1] = w[pos4 +  9];
669    w2[2] = w[pos4 + 10];
670    w2[3] = w[pos4 + 11];
671    w3[0] = w[pos4 + 12];
672    w3[1] = w[pos4 + 13];
673    w3[2] = w[pos4 + 14];
674    w3[3] = w[pos4 + 15];
675
676    make_utf16le_S (w3, w6, w7);
677    make_utf16le_S (w2, w4, w5);
678    make_utf16le_S (w1, w2, w3);
679    make_utf16le_S (w0, w0, w1);
680
681    sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2);
682  }
683
684  w0[0] = w[pos4 +  0];
685  w0[1] = w[pos4 +  1];
686  w0[2] = w[pos4 +  2];
687  w0[3] = w[pos4 +  3];
688  w1[0] = w[pos4 +  4];
689  w1[1] = w[pos4 +  5];
690  w1[2] = w[pos4 +  6];
691  w1[3] = w[pos4 +  7];
692  w2[0] = w[pos4 +  8];
693  w2[1] = w[pos4 +  9];
694  w2[2] = w[pos4 + 10];
695  w2[3] = w[pos4 + 11];
696  w3[0] = w[pos4 + 12];
697  w3[1] = w[pos4 + 13];
698  w3[2] = w[pos4 + 14];
699  w3[3] = w[pos4 + 15];
700
701  make_utf16le_S (w3, w6, w7);
702  make_utf16le_S (w2, w4, w5);
703  make_utf16le_S (w1, w2, w3);
704  make_utf16le_S (w0, w0, w1);
705
706  sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2);
707}
708
709DECLSPEC void sha512_update_utf16le_swap (sha512_ctx_t *ctx, const u32 *w, const int len)
710{
711  if (hc_enc_scan (w, len))
712  {
713    hc_enc_t hc_enc;
714
715    hc_enc_init (&hc_enc);
716
717    while (hc_enc_has_next (&hc_enc, len))
718    {
719      u32 enc_buf[32] = { 0 };
720
721      const int enc_len = hc_enc_next (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
722
723      enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]);
724      enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]);
725      enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]);
726      enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]);
727      enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]);
728      enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]);
729      enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]);
730      enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]);
731      enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]);
732      enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]);
733      enc_buf[10] = hc_swap32_S (enc_buf[10]);
734      enc_buf[11] = hc_swap32_S (enc_buf[11]);
735      enc_buf[12] = hc_swap32_S (enc_buf[12]);
736      enc_buf[13] = hc_swap32_S (enc_buf[13]);
737      enc_buf[14] = hc_swap32_S (enc_buf[14]);
738      enc_buf[15] = hc_swap32_S (enc_buf[15]);
739      enc_buf[16] = hc_swap32_S (enc_buf[16]);
740      enc_buf[17] = hc_swap32_S (enc_buf[17]);
741      enc_buf[18] = hc_swap32_S (enc_buf[18]);
742      enc_buf[19] = hc_swap32_S (enc_buf[19]);
743      enc_buf[20] = hc_swap32_S (enc_buf[20]);
744      enc_buf[21] = hc_swap32_S (enc_buf[21]);
745      enc_buf[22] = hc_swap32_S (enc_buf[22]);
746      enc_buf[23] = hc_swap32_S (enc_buf[23]);
747      enc_buf[24] = hc_swap32_S (enc_buf[24]);
748      enc_buf[25] = hc_swap32_S (enc_buf[25]);
749      enc_buf[26] = hc_swap32_S (enc_buf[26]);
750      enc_buf[27] = hc_swap32_S (enc_buf[27]);
751      enc_buf[28] = hc_swap32_S (enc_buf[28]);
752      enc_buf[29] = hc_swap32_S (enc_buf[29]);
753      enc_buf[30] = hc_swap32_S (enc_buf[30]);
754      enc_buf[31] = hc_swap32_S (enc_buf[31]);
755
756      sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len);
757    }
758
759    return;
760  }
761
762  u32 w0[4];
763  u32 w1[4];
764  u32 w2[4];
765  u32 w3[4];
766  u32 w4[4];
767  u32 w5[4];
768  u32 w6[4];
769  u32 w7[4];
770
771  int pos1;
772  int pos4;
773
774  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
775  {
776    w0[0] = w[pos4 +  0];
777    w0[1] = w[pos4 +  1];
778    w0[2] = w[pos4 +  2];
779    w0[3] = w[pos4 +  3];
780    w1[0] = w[pos4 +  4];
781    w1[1] = w[pos4 +  5];
782    w1[2] = w[pos4 +  6];
783    w1[3] = w[pos4 +  7];
784    w2[0] = w[pos4 +  8];
785    w2[1] = w[pos4 +  9];
786    w2[2] = w[pos4 + 10];
787    w2[3] = w[pos4 + 11];
788    w3[0] = w[pos4 + 12];
789    w3[1] = w[pos4 + 13];
790    w3[2] = w[pos4 + 14];
791    w3[3] = w[pos4 + 15];
792
793    make_utf16le_S (w3, w6, w7);
794    make_utf16le_S (w2, w4, w5);
795    make_utf16le_S (w1, w2, w3);
796    make_utf16le_S (w0, w0, w1);
797
798    w0[0] = hc_swap32_S (w0[0]);
799    w0[1] = hc_swap32_S (w0[1]);
800    w0[2] = hc_swap32_S (w0[2]);
801    w0[3] = hc_swap32_S (w0[3]);
802    w1[0] = hc_swap32_S (w1[0]);
803    w1[1] = hc_swap32_S (w1[1]);
804    w1[2] = hc_swap32_S (w1[2]);
805    w1[3] = hc_swap32_S (w1[3]);
806    w2[0] = hc_swap32_S (w2[0]);
807    w2[1] = hc_swap32_S (w2[1]);
808    w2[2] = hc_swap32_S (w2[2]);
809    w2[3] = hc_swap32_S (w2[3]);
810    w3[0] = hc_swap32_S (w3[0]);
811    w3[1] = hc_swap32_S (w3[1]);
812    w3[2] = hc_swap32_S (w3[2]);
813    w3[3] = hc_swap32_S (w3[3]);
814    w4[0] = hc_swap32_S (w4[0]);
815    w4[1] = hc_swap32_S (w4[1]);
816    w4[2] = hc_swap32_S (w4[2]);
817    w4[3] = hc_swap32_S (w4[3]);
818    w5[0] = hc_swap32_S (w5[0]);
819    w5[1] = hc_swap32_S (w5[1]);
820    w5[2] = hc_swap32_S (w5[2]);
821    w5[3] = hc_swap32_S (w5[3]);
822    w6[0] = hc_swap32_S (w6[0]);
823    w6[1] = hc_swap32_S (w6[1]);
824    w6[2] = hc_swap32_S (w6[2]);
825    w6[3] = hc_swap32_S (w6[3]);
826    w7[0] = hc_swap32_S (w7[0]);
827    w7[1] = hc_swap32_S (w7[1]);
828    w7[2] = hc_swap32_S (w7[2]);
829    w7[3] = hc_swap32_S (w7[3]);
830
831    sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2);
832  }
833
834  w0[0] = w[pos4 +  0];
835  w0[1] = w[pos4 +  1];
836  w0[2] = w[pos4 +  2];
837  w0[3] = w[pos4 +  3];
838  w1[0] = w[pos4 +  4];
839  w1[1] = w[pos4 +  5];
840  w1[2] = w[pos4 +  6];
841  w1[3] = w[pos4 +  7];
842  w2[0] = w[pos4 +  8];
843  w2[1] = w[pos4 +  9];
844  w2[2] = w[pos4 + 10];
845  w2[3] = w[pos4 + 11];
846  w3[0] = w[pos4 + 12];
847  w3[1] = w[pos4 + 13];
848  w3[2] = w[pos4 + 14];
849  w3[3] = w[pos4 + 15];
850
851  make_utf16le_S (w3, w6, w7);
852  make_utf16le_S (w2, w4, w5);
853  make_utf16le_S (w1, w2, w3);
854  make_utf16le_S (w0, w0, w1);
855
856  w0[0] = hc_swap32_S (w0[0]);
857  w0[1] = hc_swap32_S (w0[1]);
858  w0[2] = hc_swap32_S (w0[2]);
859  w0[3] = hc_swap32_S (w0[3]);
860  w1[0] = hc_swap32_S (w1[0]);
861  w1[1] = hc_swap32_S (w1[1]);
862  w1[2] = hc_swap32_S (w1[2]);
863  w1[3] = hc_swap32_S (w1[3]);
864  w2[0] = hc_swap32_S (w2[0]);
865  w2[1] = hc_swap32_S (w2[1]);
866  w2[2] = hc_swap32_S (w2[2]);
867  w2[3] = hc_swap32_S (w2[3]);
868  w3[0] = hc_swap32_S (w3[0]);
869  w3[1] = hc_swap32_S (w3[1]);
870  w3[2] = hc_swap32_S (w3[2]);
871  w3[3] = hc_swap32_S (w3[3]);
872  w4[0] = hc_swap32_S (w4[0]);
873  w4[1] = hc_swap32_S (w4[1]);
874  w4[2] = hc_swap32_S (w4[2]);
875  w4[3] = hc_swap32_S (w4[3]);
876  w5[0] = hc_swap32_S (w5[0]);
877  w5[1] = hc_swap32_S (w5[1]);
878  w5[2] = hc_swap32_S (w5[2]);
879  w5[3] = hc_swap32_S (w5[3]);
880  w6[0] = hc_swap32_S (w6[0]);
881  w6[1] = hc_swap32_S (w6[1]);
882  w6[2] = hc_swap32_S (w6[2]);
883  w6[3] = hc_swap32_S (w6[3]);
884  w7[0] = hc_swap32_S (w7[0]);
885  w7[1] = hc_swap32_S (w7[1]);
886  w7[2] = hc_swap32_S (w7[2]);
887  w7[3] = hc_swap32_S (w7[3]);
888
889  sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2);
890}
891
892DECLSPEC void sha512_update_global (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
893{
894  u32 w0[4];
895  u32 w1[4];
896  u32 w2[4];
897  u32 w3[4];
898  u32 w4[4];
899  u32 w5[4];
900  u32 w6[4];
901  u32 w7[4];
902
903  int pos1;
904  int pos4;
905
906  for (pos1 = 0, pos4 = 0; pos1 < len - 128; pos1 += 128, pos4 += 32)
907  {
908    w0[0] = w[pos4 +  0];
909    w0[1] = w[pos4 +  1];
910    w0[2] = w[pos4 +  2];
911    w0[3] = w[pos4 +  3];
912    w1[0] = w[pos4 +  4];
913    w1[1] = w[pos4 +  5];
914    w1[2] = w[pos4 +  6];
915    w1[3] = w[pos4 +  7];
916    w2[0] = w[pos4 +  8];
917    w2[1] = w[pos4 +  9];
918    w2[2] = w[pos4 + 10];
919    w2[3] = w[pos4 + 11];
920    w3[0] = w[pos4 + 12];
921    w3[1] = w[pos4 + 13];
922    w3[2] = w[pos4 + 14];
923    w3[3] = w[pos4 + 15];
924    w4[0] = w[pos4 + 16];
925    w4[1] = w[pos4 + 17];
926    w4[2] = w[pos4 + 18];
927    w4[3] = w[pos4 + 19];
928    w5[0] = w[pos4 + 20];
929    w5[1] = w[pos4 + 21];
930    w5[2] = w[pos4 + 22];
931    w5[3] = w[pos4 + 23];
932    w6[0] = w[pos4 + 24];
933    w6[1] = w[pos4 + 25];
934    w6[2] = w[pos4 + 26];
935    w6[3] = w[pos4 + 27];
936    w7[0] = w[pos4 + 28];
937    w7[1] = w[pos4 + 29];
938    w7[2] = w[pos4 + 30];
939    w7[3] = w[pos4 + 31];
940
941    sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
942  }
943
944  w0[0] = w[pos4 +  0];
945  w0[1] = w[pos4 +  1];
946  w0[2] = w[pos4 +  2];
947  w0[3] = w[pos4 +  3];
948  w1[0] = w[pos4 +  4];
949  w1[1] = w[pos4 +  5];
950  w1[2] = w[pos4 +  6];
951  w1[3] = w[pos4 +  7];
952  w2[0] = w[pos4 +  8];
953  w2[1] = w[pos4 +  9];
954  w2[2] = w[pos4 + 10];
955  w2[3] = w[pos4 + 11];
956  w3[0] = w[pos4 + 12];
957  w3[1] = w[pos4 + 13];
958  w3[2] = w[pos4 + 14];
959  w3[3] = w[pos4 + 15];
960  w4[0] = w[pos4 + 16];
961  w4[1] = w[pos4 + 17];
962  w4[2] = w[pos4 + 18];
963  w4[3] = w[pos4 + 19];
964  w5[0] = w[pos4 + 20];
965  w5[1] = w[pos4 + 21];
966  w5[2] = w[pos4 + 22];
967  w5[3] = w[pos4 + 23];
968  w6[0] = w[pos4 + 24];
969  w6[1] = w[pos4 + 25];
970  w6[2] = w[pos4 + 26];
971  w6[3] = w[pos4 + 27];
972  w7[0] = w[pos4 + 28];
973  w7[1] = w[pos4 + 29];
974  w7[2] = w[pos4 + 30];
975  w7[3] = w[pos4 + 31];
976
977  sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - pos1);
978}
979
980DECLSPEC void sha512_update_global_swap (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
981{
982  u32 w0[4];
983  u32 w1[4];
984  u32 w2[4];
985  u32 w3[4];
986  u32 w4[4];
987  u32 w5[4];
988  u32 w6[4];
989  u32 w7[4];
990
991  int pos1;
992  int pos4;
993
994  for (pos1 = 0, pos4 = 0; pos1 < len - 128; pos1 += 128, pos4 += 32)
995  {
996    w0[0] = w[pos4 +  0];
997    w0[1] = w[pos4 +  1];
998    w0[2] = w[pos4 +  2];
999    w0[3] = w[pos4 +  3];
1000    w1[0] = w[pos4 +  4];
1001    w1[1] = w[pos4 +  5];
1002    w1[2] = w[pos4 +  6];
1003    w1[3] = w[pos4 +  7];
1004    w2[0] = w[pos4 +  8];
1005    w2[1] = w[pos4 +  9];
1006    w2[2] = w[pos4 + 10];
1007    w2[3] = w[pos4 + 11];
1008    w3[0] = w[pos4 + 12];
1009    w3[1] = w[pos4 + 13];
1010    w3[2] = w[pos4 + 14];
1011    w3[3] = w[pos4 + 15];
1012    w4[0] = w[pos4 + 16];
1013    w4[1] = w[pos4 + 17];
1014    w4[2] = w[pos4 + 18];
1015    w4[3] = w[pos4 + 19];
1016    w5[0] = w[pos4 + 20];
1017    w5[1] = w[pos4 + 21];
1018    w5[2] = w[pos4 + 22];
1019    w5[3] = w[pos4 + 23];
1020    w6[0] = w[pos4 + 24];
1021    w6[1] = w[pos4 + 25];
1022    w6[2] = w[pos4 + 26];
1023    w6[3] = w[pos4 + 27];
1024    w7[0] = w[pos4 + 28];
1025    w7[1] = w[pos4 + 29];
1026    w7[2] = w[pos4 + 30];
1027    w7[3] = w[pos4 + 31];
1028
1029    w0[0] = hc_swap32_S (w0[0]);
1030    w0[1] = hc_swap32_S (w0[1]);
1031    w0[2] = hc_swap32_S (w0[2]);
1032    w0[3] = hc_swap32_S (w0[3]);
1033    w1[0] = hc_swap32_S (w1[0]);
1034    w1[1] = hc_swap32_S (w1[1]);
1035    w1[2] = hc_swap32_S (w1[2]);
1036    w1[3] = hc_swap32_S (w1[3]);
1037    w2[0] = hc_swap32_S (w2[0]);
1038    w2[1] = hc_swap32_S (w2[1]);
1039    w2[2] = hc_swap32_S (w2[2]);
1040    w2[3] = hc_swap32_S (w2[3]);
1041    w3[0] = hc_swap32_S (w3[0]);
1042    w3[1] = hc_swap32_S (w3[1]);
1043    w3[2] = hc_swap32_S (w3[2]);
1044    w3[3] = hc_swap32_S (w3[3]);
1045    w4[0] = hc_swap32_S (w4[0]);
1046    w4[1] = hc_swap32_S (w4[1]);
1047    w4[2] = hc_swap32_S (w4[2]);
1048    w4[3] = hc_swap32_S (w4[3]);
1049    w5[0] = hc_swap32_S (w5[0]);
1050    w5[1] = hc_swap32_S (w5[1]);
1051    w5[2] = hc_swap32_S (w5[2]);
1052    w5[3] = hc_swap32_S (w5[3]);
1053    w6[0] = hc_swap32_S (w6[0]);
1054    w6[1] = hc_swap32_S (w6[1]);
1055    w6[2] = hc_swap32_S (w6[2]);
1056    w6[3] = hc_swap32_S (w6[3]);
1057    w7[0] = hc_swap32_S (w7[0]);
1058    w7[1] = hc_swap32_S (w7[1]);
1059    w7[2] = hc_swap32_S (w7[2]);
1060    w7[3] = hc_swap32_S (w7[3]);
1061
1062    sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
1063  }
1064
1065  w0[0] = w[pos4 +  0];
1066  w0[1] = w[pos4 +  1];
1067  w0[2] = w[pos4 +  2];
1068  w0[3] = w[pos4 +  3];
1069  w1[0] = w[pos4 +  4];
1070  w1[1] = w[pos4 +  5];
1071  w1[2] = w[pos4 +  6];
1072  w1[3] = w[pos4 +  7];
1073  w2[0] = w[pos4 +  8];
1074  w2[1] = w[pos4 +  9];
1075  w2[2] = w[pos4 + 10];
1076  w2[3] = w[pos4 + 11];
1077  w3[0] = w[pos4 + 12];
1078  w3[1] = w[pos4 + 13];
1079  w3[2] = w[pos4 + 14];
1080  w3[3] = w[pos4 + 15];
1081  w4[0] = w[pos4 + 16];
1082  w4[1] = w[pos4 + 17];
1083  w4[2] = w[pos4 + 18];
1084  w4[3] = w[pos4 + 19];
1085  w5[0] = w[pos4 + 20];
1086  w5[1] = w[pos4 + 21];
1087  w5[2] = w[pos4 + 22];
1088  w5[3] = w[pos4 + 23];
1089  w6[0] = w[pos4 + 24];
1090  w6[1] = w[pos4 + 25];
1091  w6[2] = w[pos4 + 26];
1092  w6[3] = w[pos4 + 27];
1093  w7[0] = w[pos4 + 28];
1094  w7[1] = w[pos4 + 29];
1095  w7[2] = w[pos4 + 30];
1096  w7[3] = w[pos4 + 31];
1097
1098  w0[0] = hc_swap32_S (w0[0]);
1099  w0[1] = hc_swap32_S (w0[1]);
1100  w0[2] = hc_swap32_S (w0[2]);
1101  w0[3] = hc_swap32_S (w0[3]);
1102  w1[0] = hc_swap32_S (w1[0]);
1103  w1[1] = hc_swap32_S (w1[1]);
1104  w1[2] = hc_swap32_S (w1[2]);
1105  w1[3] = hc_swap32_S (w1[3]);
1106  w2[0] = hc_swap32_S (w2[0]);
1107  w2[1] = hc_swap32_S (w2[1]);
1108  w2[2] = hc_swap32_S (w2[2]);
1109  w2[3] = hc_swap32_S (w2[3]);
1110  w3[0] = hc_swap32_S (w3[0]);
1111  w3[1] = hc_swap32_S (w3[1]);
1112  w3[2] = hc_swap32_S (w3[2]);
1113  w3[3] = hc_swap32_S (w3[3]);
1114  w4[0] = hc_swap32_S (w4[0]);
1115  w4[1] = hc_swap32_S (w4[1]);
1116  w4[2] = hc_swap32_S (w4[2]);
1117  w4[3] = hc_swap32_S (w4[3]);
1118  w5[0] = hc_swap32_S (w5[0]);
1119  w5[1] = hc_swap32_S (w5[1]);
1120  w5[2] = hc_swap32_S (w5[2]);
1121  w5[3] = hc_swap32_S (w5[3]);
1122  w6[0] = hc_swap32_S (w6[0]);
1123  w6[1] = hc_swap32_S (w6[1]);
1124  w6[2] = hc_swap32_S (w6[2]);
1125  w6[3] = hc_swap32_S (w6[3]);
1126  w7[0] = hc_swap32_S (w7[0]);
1127  w7[1] = hc_swap32_S (w7[1]);
1128  w7[2] = hc_swap32_S (w7[2]);
1129  w7[3] = hc_swap32_S (w7[3]);
1130
1131  sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - pos1);
1132}
1133
1134DECLSPEC void sha512_update_global_utf16le (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1135{
1136  if (hc_enc_scan_global (w, len))
1137  {
1138    hc_enc_t hc_enc;
1139
1140    hc_enc_init (&hc_enc);
1141
1142    while (hc_enc_has_next (&hc_enc, len))
1143    {
1144      u32 enc_buf[32] = { 0 };
1145
1146      const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
1147
1148      sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len);
1149    }
1150
1151    return;
1152  }
1153
1154  u32 w0[4];
1155  u32 w1[4];
1156  u32 w2[4];
1157  u32 w3[4];
1158  u32 w4[4];
1159  u32 w5[4];
1160  u32 w6[4];
1161  u32 w7[4];
1162
1163  int pos1;
1164  int pos4;
1165
1166  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
1167  {
1168    w0[0] = w[pos4 +  0];
1169    w0[1] = w[pos4 +  1];
1170    w0[2] = w[pos4 +  2];
1171    w0[3] = w[pos4 +  3];
1172    w1[0] = w[pos4 +  4];
1173    w1[1] = w[pos4 +  5];
1174    w1[2] = w[pos4 +  6];
1175    w1[3] = w[pos4 +  7];
1176    w2[0] = w[pos4 +  8];
1177    w2[1] = w[pos4 +  9];
1178    w2[2] = w[pos4 + 10];
1179    w2[3] = w[pos4 + 11];
1180    w3[0] = w[pos4 + 12];
1181    w3[1] = w[pos4 + 13];
1182    w3[2] = w[pos4 + 14];
1183    w3[3] = w[pos4 + 15];
1184
1185    make_utf16le_S (w3, w6, w7);
1186    make_utf16le_S (w2, w4, w5);
1187    make_utf16le_S (w1, w2, w3);
1188    make_utf16le_S (w0, w0, w1);
1189
1190    sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2);
1191  }
1192
1193  w0[0] = w[pos4 +  0];
1194  w0[1] = w[pos4 +  1];
1195  w0[2] = w[pos4 +  2];
1196  w0[3] = w[pos4 +  3];
1197  w1[0] = w[pos4 +  4];
1198  w1[1] = w[pos4 +  5];
1199  w1[2] = w[pos4 +  6];
1200  w1[3] = w[pos4 +  7];
1201  w2[0] = w[pos4 +  8];
1202  w2[1] = w[pos4 +  9];
1203  w2[2] = w[pos4 + 10];
1204  w2[3] = w[pos4 + 11];
1205  w3[0] = w[pos4 + 12];
1206  w3[1] = w[pos4 + 13];
1207  w3[2] = w[pos4 + 14];
1208  w3[3] = w[pos4 + 15];
1209
1210  make_utf16le_S (w3, w6, w7);
1211  make_utf16le_S (w2, w4, w5);
1212  make_utf16le_S (w1, w2, w3);
1213  make_utf16le_S (w0, w0, w1);
1214
1215  sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2);
1216}
1217
1218DECLSPEC void sha512_update_global_utf16le_swap (sha512_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1219{
1220  if (hc_enc_scan_global (w, len))
1221  {
1222    hc_enc_t hc_enc;
1223
1224    hc_enc_init (&hc_enc);
1225
1226    while (hc_enc_has_next (&hc_enc, len))
1227    {
1228      u32 enc_buf[32] = { 0 };
1229
1230      const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
1231
1232      enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]);
1233      enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]);
1234      enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]);
1235      enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]);
1236      enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]);
1237      enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]);
1238      enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]);
1239      enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]);
1240      enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]);
1241      enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]);
1242      enc_buf[10] = hc_swap32_S (enc_buf[10]);
1243      enc_buf[11] = hc_swap32_S (enc_buf[11]);
1244      enc_buf[12] = hc_swap32_S (enc_buf[12]);
1245      enc_buf[13] = hc_swap32_S (enc_buf[13]);
1246      enc_buf[14] = hc_swap32_S (enc_buf[14]);
1247      enc_buf[15] = hc_swap32_S (enc_buf[15]);
1248      enc_buf[16] = hc_swap32_S (enc_buf[16]);
1249      enc_buf[17] = hc_swap32_S (enc_buf[17]);
1250      enc_buf[18] = hc_swap32_S (enc_buf[18]);
1251      enc_buf[19] = hc_swap32_S (enc_buf[19]);
1252      enc_buf[20] = hc_swap32_S (enc_buf[20]);
1253      enc_buf[21] = hc_swap32_S (enc_buf[21]);
1254      enc_buf[22] = hc_swap32_S (enc_buf[22]);
1255      enc_buf[23] = hc_swap32_S (enc_buf[23]);
1256      enc_buf[24] = hc_swap32_S (enc_buf[24]);
1257      enc_buf[25] = hc_swap32_S (enc_buf[25]);
1258      enc_buf[26] = hc_swap32_S (enc_buf[26]);
1259      enc_buf[27] = hc_swap32_S (enc_buf[27]);
1260      enc_buf[28] = hc_swap32_S (enc_buf[28]);
1261      enc_buf[29] = hc_swap32_S (enc_buf[29]);
1262      enc_buf[30] = hc_swap32_S (enc_buf[30]);
1263      enc_buf[31] = hc_swap32_S (enc_buf[31]);
1264
1265      sha512_update_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28, enc_len);
1266    }
1267
1268    return;
1269  }
1270
1271  u32 w0[4];
1272  u32 w1[4];
1273  u32 w2[4];
1274  u32 w3[4];
1275  u32 w4[4];
1276  u32 w5[4];
1277  u32 w6[4];
1278  u32 w7[4];
1279
1280  int pos1;
1281  int pos4;
1282
1283  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
1284  {
1285    w0[0] = w[pos4 +  0];
1286    w0[1] = w[pos4 +  1];
1287    w0[2] = w[pos4 +  2];
1288    w0[3] = w[pos4 +  3];
1289    w1[0] = w[pos4 +  4];
1290    w1[1] = w[pos4 +  5];
1291    w1[2] = w[pos4 +  6];
1292    w1[3] = w[pos4 +  7];
1293    w2[0] = w[pos4 +  8];
1294    w2[1] = w[pos4 +  9];
1295    w2[2] = w[pos4 + 10];
1296    w2[3] = w[pos4 + 11];
1297    w3[0] = w[pos4 + 12];
1298    w3[1] = w[pos4 + 13];
1299    w3[2] = w[pos4 + 14];
1300    w3[3] = w[pos4 + 15];
1301
1302    make_utf16le_S (w3, w6, w7);
1303    make_utf16le_S (w2, w4, w5);
1304    make_utf16le_S (w1, w2, w3);
1305    make_utf16le_S (w0, w0, w1);
1306
1307    w0[0] = hc_swap32_S (w0[0]);
1308    w0[1] = hc_swap32_S (w0[1]);
1309    w0[2] = hc_swap32_S (w0[2]);
1310    w0[3] = hc_swap32_S (w0[3]);
1311    w1[0] = hc_swap32_S (w1[0]);
1312    w1[1] = hc_swap32_S (w1[1]);
1313    w1[2] = hc_swap32_S (w1[2]);
1314    w1[3] = hc_swap32_S (w1[3]);
1315    w2[0] = hc_swap32_S (w2[0]);
1316    w2[1] = hc_swap32_S (w2[1]);
1317    w2[2] = hc_swap32_S (w2[2]);
1318    w2[3] = hc_swap32_S (w2[3]);
1319    w3[0] = hc_swap32_S (w3[0]);
1320    w3[1] = hc_swap32_S (w3[1]);
1321    w3[2] = hc_swap32_S (w3[2]);
1322    w3[3] = hc_swap32_S (w3[3]);
1323    w4[0] = hc_swap32_S (w4[0]);
1324    w4[1] = hc_swap32_S (w4[1]);
1325    w4[2] = hc_swap32_S (w4[2]);
1326    w4[3] = hc_swap32_S (w4[3]);
1327    w5[0] = hc_swap32_S (w5[0]);
1328    w5[1] = hc_swap32_S (w5[1]);
1329    w5[2] = hc_swap32_S (w5[2]);
1330    w5[3] = hc_swap32_S (w5[3]);
1331    w6[0] = hc_swap32_S (w6[0]);
1332    w6[1] = hc_swap32_S (w6[1]);
1333    w6[2] = hc_swap32_S (w6[2]);
1334    w6[3] = hc_swap32_S (w6[3]);
1335    w7[0] = hc_swap32_S (w7[0]);
1336    w7[1] = hc_swap32_S (w7[1]);
1337    w7[2] = hc_swap32_S (w7[2]);
1338    w7[3] = hc_swap32_S (w7[3]);
1339
1340    sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2);
1341  }
1342
1343  w0[0] = w[pos4 +  0];
1344  w0[1] = w[pos4 +  1];
1345  w0[2] = w[pos4 +  2];
1346  w0[3] = w[pos4 +  3];
1347  w1[0] = w[pos4 +  4];
1348  w1[1] = w[pos4 +  5];
1349  w1[2] = w[pos4 +  6];
1350  w1[3] = w[pos4 +  7];
1351  w2[0] = w[pos4 +  8];
1352  w2[1] = w[pos4 +  9];
1353  w2[2] = w[pos4 + 10];
1354  w2[3] = w[pos4 + 11];
1355  w3[0] = w[pos4 + 12];
1356  w3[1] = w[pos4 + 13];
1357  w3[2] = w[pos4 + 14];
1358  w3[3] = w[pos4 + 15];
1359
1360  make_utf16le_S (w3, w6, w7);
1361  make_utf16le_S (w2, w4, w5);
1362  make_utf16le_S (w1, w2, w3);
1363  make_utf16le_S (w0, w0, w1);
1364
1365  w0[0] = hc_swap32_S (w0[0]);
1366  w0[1] = hc_swap32_S (w0[1]);
1367  w0[2] = hc_swap32_S (w0[2]);
1368  w0[3] = hc_swap32_S (w0[3]);
1369  w1[0] = hc_swap32_S (w1[0]);
1370  w1[1] = hc_swap32_S (w1[1]);
1371  w1[2] = hc_swap32_S (w1[2]);
1372  w1[3] = hc_swap32_S (w1[3]);
1373  w2[0] = hc_swap32_S (w2[0]);
1374  w2[1] = hc_swap32_S (w2[1]);
1375  w2[2] = hc_swap32_S (w2[2]);
1376  w2[3] = hc_swap32_S (w2[3]);
1377  w3[0] = hc_swap32_S (w3[0]);
1378  w3[1] = hc_swap32_S (w3[1]);
1379  w3[2] = hc_swap32_S (w3[2]);
1380  w3[3] = hc_swap32_S (w3[3]);
1381  w4[0] = hc_swap32_S (w4[0]);
1382  w4[1] = hc_swap32_S (w4[1]);
1383  w4[2] = hc_swap32_S (w4[2]);
1384  w4[3] = hc_swap32_S (w4[3]);
1385  w5[0] = hc_swap32_S (w5[0]);
1386  w5[1] = hc_swap32_S (w5[1]);
1387  w5[2] = hc_swap32_S (w5[2]);
1388  w5[3] = hc_swap32_S (w5[3]);
1389  w6[0] = hc_swap32_S (w6[0]);
1390  w6[1] = hc_swap32_S (w6[1]);
1391  w6[2] = hc_swap32_S (w6[2]);
1392  w6[3] = hc_swap32_S (w6[3]);
1393  w7[0] = hc_swap32_S (w7[0]);
1394  w7[1] = hc_swap32_S (w7[1]);
1395  w7[2] = hc_swap32_S (w7[2]);
1396  w7[3] = hc_swap32_S (w7[3]);
1397
1398  sha512_update_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2);
1399}
1400
1401DECLSPEC void sha512_final (sha512_ctx_t *ctx)
1402{
1403  const int pos = ctx->len & 127;
1404
1405  append_0x80_8x4_S (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);
1406
1407  if (pos >= 112)
1408  {
1409    sha512_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
1410
1411    ctx->w0[0] = 0;
1412    ctx->w0[1] = 0;
1413    ctx->w0[2] = 0;
1414    ctx->w0[3] = 0;
1415    ctx->w1[0] = 0;
1416    ctx->w1[1] = 0;
1417    ctx->w1[2] = 0;
1418    ctx->w1[3] = 0;
1419    ctx->w2[0] = 0;
1420    ctx->w2[1] = 0;
1421    ctx->w2[2] = 0;
1422    ctx->w2[3] = 0;
1423    ctx->w3[0] = 0;
1424    ctx->w3[1] = 0;
1425    ctx->w3[2] = 0;
1426    ctx->w3[3] = 0;
1427    ctx->w4[0] = 0;
1428    ctx->w4[1] = 0;
1429    ctx->w4[2] = 0;
1430    ctx->w4[3] = 0;
1431    ctx->w5[0] = 0;
1432    ctx->w5[1] = 0;
1433    ctx->w5[2] = 0;
1434    ctx->w5[3] = 0;
1435    ctx->w6[0] = 0;
1436    ctx->w6[1] = 0;
1437    ctx->w6[2] = 0;
1438    ctx->w6[3] = 0;
1439    ctx->w7[0] = 0;
1440    ctx->w7[1] = 0;
1441    ctx->w7[2] = 0;
1442    ctx->w7[3] = 0;
1443  }
1444
1445  ctx->w7[2] = 0;
1446  ctx->w7[3] = ctx->len * 8;
1447
1448  sha512_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
1449}
1450
1451// sha512_hmac
1452
1453DECLSPEC void sha512_hmac_init_128 (sha512_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, const u32 *w4, const u32 *w5, const u32 *w6, const u32 *w7)
1454{
1455  u32 a0[4];
1456  u32 a1[4];
1457  u32 a2[4];
1458  u32 a3[4];
1459  u32 a4[4];
1460  u32 a5[4];
1461  u32 a6[4];
1462  u32 a7[4];
1463
1464  // ipad
1465
1466  a0[0] = w0[0] ^ 0x36363636;
1467  a0[1] = w0[1] ^ 0x36363636;
1468  a0[2] = w0[2] ^ 0x36363636;
1469  a0[3] = w0[3] ^ 0x36363636;
1470  a1[0] = w1[0] ^ 0x36363636;
1471  a1[1] = w1[1] ^ 0x36363636;
1472  a1[2] = w1[2] ^ 0x36363636;
1473  a1[3] = w1[3] ^ 0x36363636;
1474  a2[0] = w2[0] ^ 0x36363636;
1475  a2[1] = w2[1] ^ 0x36363636;
1476  a2[2] = w2[2] ^ 0x36363636;
1477  a2[3] = w2[3] ^ 0x36363636;
1478  a3[0] = w3[0] ^ 0x36363636;
1479  a3[1] = w3[1] ^ 0x36363636;
1480  a3[2] = w3[2] ^ 0x36363636;
1481  a3[3] = w3[3] ^ 0x36363636;
1482  a4[0] = w4[0] ^ 0x36363636;
1483  a4[1] = w4[1] ^ 0x36363636;
1484  a4[2] = w4[2] ^ 0x36363636;
1485  a4[3] = w4[3] ^ 0x36363636;
1486  a5[0] = w5[0] ^ 0x36363636;
1487  a5[1] = w5[1] ^ 0x36363636;
1488  a5[2] = w5[2] ^ 0x36363636;
1489  a5[3] = w5[3] ^ 0x36363636;
1490  a6[0] = w6[0] ^ 0x36363636;
1491  a6[1] = w6[1] ^ 0x36363636;
1492  a6[2] = w6[2] ^ 0x36363636;
1493  a6[3] = w6[3] ^ 0x36363636;
1494  a7[0] = w7[0] ^ 0x36363636;
1495  a7[1] = w7[1] ^ 0x36363636;
1496  a7[2] = w7[2] ^ 0x36363636;
1497  a7[3] = w7[3] ^ 0x36363636;
1498
1499  sha512_init (&ctx->ipad);
1500
1501  sha512_update_128 (&ctx->ipad, a0, a1, a2, a3, a4, a5, a6, a7, 128);
1502
1503  // opad
1504
1505  u32 b0[4];
1506  u32 b1[4];
1507  u32 b2[4];
1508  u32 b3[4];
1509  u32 b4[4];
1510  u32 b5[4];
1511  u32 b6[4];
1512  u32 b7[4];
1513
1514  b0[0] = w0[0] ^ 0x5c5c5c5c;
1515  b0[1] = w0[1] ^ 0x5c5c5c5c;
1516  b0[2] = w0[2] ^ 0x5c5c5c5c;
1517  b0[3] = w0[3] ^ 0x5c5c5c5c;
1518  b1[0] = w1[0] ^ 0x5c5c5c5c;
1519  b1[1] = w1[1] ^ 0x5c5c5c5c;
1520  b1[2] = w1[2] ^ 0x5c5c5c5c;
1521  b1[3] = w1[3] ^ 0x5c5c5c5c;
1522  b2[0] = w2[0] ^ 0x5c5c5c5c;
1523  b2[1] = w2[1] ^ 0x5c5c5c5c;
1524  b2[2] = w2[2] ^ 0x5c5c5c5c;
1525  b2[3] = w2[3] ^ 0x5c5c5c5c;
1526  b3[0] = w3[0] ^ 0x5c5c5c5c;
1527  b3[1] = w3[1] ^ 0x5c5c5c5c;
1528  b3[2] = w3[2] ^ 0x5c5c5c5c;
1529  b3[3] = w3[3] ^ 0x5c5c5c5c;
1530  b4[0] = w4[0] ^ 0x5c5c5c5c;
1531  b4[1] = w4[1] ^ 0x5c5c5c5c;
1532  b4[2] = w4[2] ^ 0x5c5c5c5c;
1533  b4[3] = w4[3] ^ 0x5c5c5c5c;
1534  b5[0] = w5[0] ^ 0x5c5c5c5c;
1535  b5[1] = w5[1] ^ 0x5c5c5c5c;
1536  b5[2] = w5[2] ^ 0x5c5c5c5c;
1537  b5[3] = w5[3] ^ 0x5c5c5c5c;
1538  b6[0] = w6[0] ^ 0x5c5c5c5c;
1539  b6[1] = w6[1] ^ 0x5c5c5c5c;
1540  b6[2] = w6[2] ^ 0x5c5c5c5c;
1541  b6[3] = w6[3] ^ 0x5c5c5c5c;
1542  b7[0] = w7[0] ^ 0x5c5c5c5c;
1543  b7[1] = w7[1] ^ 0x5c5c5c5c;
1544  b7[2] = w7[2] ^ 0x5c5c5c5c;
1545  b7[3] = w7[3] ^ 0x5c5c5c5c;
1546
1547  sha512_init (&ctx->opad);
1548
1549  sha512_update_128 (&ctx->opad, b0, b1, b2, b3, b4, b5, b6, b7, 128);
1550}
1551
1552DECLSPEC void sha512_hmac_init (sha512_hmac_ctx_t *ctx, const u32 *w, const int len)
1553{
1554  u32 w0[4];
1555  u32 w1[4];
1556  u32 w2[4];
1557  u32 w3[4];
1558  u32 w4[4];
1559  u32 w5[4];
1560  u32 w6[4];
1561  u32 w7[4];
1562
1563  if (len > 128)
1564  {
1565    sha512_ctx_t tmp;
1566
1567    sha512_init (&tmp);
1568
1569    sha512_update (&tmp, w, len);
1570
1571    sha512_final (&tmp);
1572
1573    w0[0] = h32_from_64_S (tmp.h[0]);
1574    w0[1] = l32_from_64_S (tmp.h[0]);
1575    w0[2] = h32_from_64_S (tmp.h[1]);
1576    w0[3] = l32_from_64_S (tmp.h[1]);
1577    w1[0] = h32_from_64_S (tmp.h[2]);
1578    w1[1] = l32_from_64_S (tmp.h[2]);
1579    w1[2] = h32_from_64_S (tmp.h[3]);
1580    w1[3] = l32_from_64_S (tmp.h[3]);
1581    w2[0] = h32_from_64_S (tmp.h[4]);
1582    w2[1] = l32_from_64_S (tmp.h[4]);
1583    w2[2] = h32_from_64_S (tmp.h[5]);
1584    w2[3] = l32_from_64_S (tmp.h[5]);
1585    w3[0] = h32_from_64_S (tmp.h[6]);
1586    w3[1] = l32_from_64_S (tmp.h[6]);
1587    w3[2] = h32_from_64_S (tmp.h[7]);
1588    w3[3] = l32_from_64_S (tmp.h[7]);
1589    w4[0] = 0;
1590    w4[1] = 0;
1591    w4[2] = 0;
1592    w4[3] = 0;
1593    w5[0] = 0;
1594    w5[1] = 0;
1595    w5[2] = 0;
1596    w5[3] = 0;
1597    w6[0] = 0;
1598    w6[1] = 0;
1599    w6[2] = 0;
1600    w6[3] = 0;
1601    w7[0] = 0;
1602    w7[1] = 0;
1603    w7[2] = 0;
1604    w7[3] = 0;
1605  }
1606  else
1607  {
1608    w0[0] = w[ 0];
1609    w0[1] = w[ 1];
1610    w0[2] = w[ 2];
1611    w0[3] = w[ 3];
1612    w1[0] = w[ 4];
1613    w1[1] = w[ 5];
1614    w1[2] = w[ 6];
1615    w1[3] = w[ 7];
1616    w2[0] = w[ 8];
1617    w2[1] = w[ 9];
1618    w2[2] = w[10];
1619    w2[3] = w[11];
1620    w3[0] = w[12];
1621    w3[1] = w[13];
1622    w3[2] = w[14];
1623    w3[3] = w[15];
1624    w4[0] = w[16];
1625    w4[1] = w[17];
1626    w4[2] = w[18];
1627    w4[3] = w[19];
1628    w5[0] = w[20];
1629    w5[1] = w[21];
1630    w5[2] = w[22];
1631    w5[3] = w[23];
1632    w6[0] = w[24];
1633    w6[1] = w[25];
1634    w6[2] = w[26];
1635    w6[3] = w[27];
1636    w7[0] = w[28];
1637    w7[1] = w[29];
1638    w7[2] = w[30];
1639    w7[3] = w[31];
1640  }
1641
1642  sha512_hmac_init_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7);
1643}
1644
1645DECLSPEC void sha512_hmac_init_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len)
1646{
1647  u32 w0[4];
1648  u32 w1[4];
1649  u32 w2[4];
1650  u32 w3[4];
1651  u32 w4[4];
1652  u32 w5[4];
1653  u32 w6[4];
1654  u32 w7[4];
1655
1656  if (len > 128)
1657  {
1658    sha512_ctx_t tmp;
1659
1660    sha512_init (&tmp);
1661
1662    sha512_update_swap (&tmp, w, len);
1663
1664    sha512_final (&tmp);
1665
1666    w0[0] = h32_from_64_S (tmp.h[0]);
1667    w0[1] = l32_from_64_S (tmp.h[0]);
1668    w0[2] = h32_from_64_S (tmp.h[1]);
1669    w0[3] = l32_from_64_S (tmp.h[1]);
1670    w1[0] = h32_from_64_S (tmp.h[2]);
1671    w1[1] = l32_from_64_S (tmp.h[2]);
1672    w1[2] = h32_from_64_S (tmp.h[3]);
1673    w1[3] = l32_from_64_S (tmp.h[3]);
1674    w2[0] = h32_from_64_S (tmp.h[4]);
1675    w2[1] = l32_from_64_S (tmp.h[4]);
1676    w2[2] = h32_from_64_S (tmp.h[5]);
1677    w2[3] = l32_from_64_S (tmp.h[5]);
1678    w3[0] = h32_from_64_S (tmp.h[6]);
1679    w3[1] = l32_from_64_S (tmp.h[6]);
1680    w3[2] = h32_from_64_S (tmp.h[7]);
1681    w3[3] = l32_from_64_S (tmp.h[7]);
1682    w4[0] = 0;
1683    w4[1] = 0;
1684    w4[2] = 0;
1685    w4[3] = 0;
1686    w5[0] = 0;
1687    w5[1] = 0;
1688    w5[2] = 0;
1689    w5[3] = 0;
1690    w6[0] = 0;
1691    w6[1] = 0;
1692    w6[2] = 0;
1693    w6[3] = 0;
1694    w7[0] = 0;
1695    w7[1] = 0;
1696    w7[2] = 0;
1697    w7[3] = 0;
1698  }
1699  else
1700  {
1701    w0[0] = hc_swap32_S (w[ 0]);
1702    w0[1] = hc_swap32_S (w[ 1]);
1703    w0[2] = hc_swap32_S (w[ 2]);
1704    w0[3] = hc_swap32_S (w[ 3]);
1705    w1[0] = hc_swap32_S (w[ 4]);
1706    w1[1] = hc_swap32_S (w[ 5]);
1707    w1[2] = hc_swap32_S (w[ 6]);
1708    w1[3] = hc_swap32_S (w[ 7]);
1709    w2[0] = hc_swap32_S (w[ 8]);
1710    w2[1] = hc_swap32_S (w[ 9]);
1711    w2[2] = hc_swap32_S (w[10]);
1712    w2[3] = hc_swap32_S (w[11]);
1713    w3[0] = hc_swap32_S (w[12]);
1714    w3[1] = hc_swap32_S (w[13]);
1715    w3[2] = hc_swap32_S (w[14]);
1716    w3[3] = hc_swap32_S (w[15]);
1717    w4[0] = hc_swap32_S (w[16]);
1718    w4[1] = hc_swap32_S (w[17]);
1719    w4[2] = hc_swap32_S (w[18]);
1720    w4[3] = hc_swap32_S (w[19]);
1721    w5[0] = hc_swap32_S (w[20]);
1722    w5[1] = hc_swap32_S (w[21]);
1723    w5[2] = hc_swap32_S (w[22]);
1724    w5[3] = hc_swap32_S (w[23]);
1725    w6[0] = hc_swap32_S (w[24]);
1726    w6[1] = hc_swap32_S (w[25]);
1727    w6[2] = hc_swap32_S (w[26]);
1728    w6[3] = hc_swap32_S (w[27]);
1729    w7[0] = hc_swap32_S (w[28]);
1730    w7[1] = hc_swap32_S (w[29]);
1731    w7[2] = hc_swap32_S (w[30]);
1732    w7[3] = hc_swap32_S (w[31]);
1733  }
1734
1735  sha512_hmac_init_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7);
1736}
1737
1738DECLSPEC void sha512_hmac_init_global (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1739{
1740  u32 w0[4];
1741  u32 w1[4];
1742  u32 w2[4];
1743  u32 w3[4];
1744  u32 w4[4];
1745  u32 w5[4];
1746  u32 w6[4];
1747  u32 w7[4];
1748
1749  if (len > 128)
1750  {
1751    sha512_ctx_t tmp;
1752
1753    sha512_init (&tmp);
1754
1755    sha512_update_global (&tmp, w, len);
1756
1757    sha512_final (&tmp);
1758
1759    w0[0] = h32_from_64_S (tmp.h[0]);
1760    w0[1] = l32_from_64_S (tmp.h[0]);
1761    w0[2] = h32_from_64_S (tmp.h[1]);
1762    w0[3] = l32_from_64_S (tmp.h[1]);
1763    w1[0] = h32_from_64_S (tmp.h[2]);
1764    w1[1] = l32_from_64_S (tmp.h[2]);
1765    w1[2] = h32_from_64_S (tmp.h[3]);
1766    w1[3] = l32_from_64_S (tmp.h[3]);
1767    w2[0] = h32_from_64_S (tmp.h[4]);
1768    w2[1] = l32_from_64_S (tmp.h[4]);
1769    w2[2] = h32_from_64_S (tmp.h[5]);
1770    w2[3] = l32_from_64_S (tmp.h[5]);
1771    w3[0] = h32_from_64_S (tmp.h[6]);
1772    w3[1] = l32_from_64_S (tmp.h[6]);
1773    w3[2] = h32_from_64_S (tmp.h[7]);
1774    w3[3] = l32_from_64_S (tmp.h[7]);
1775    w4[0] = 0;
1776    w4[1] = 0;
1777    w4[2] = 0;
1778    w4[3] = 0;
1779    w5[0] = 0;
1780    w5[1] = 0;
1781    w5[2] = 0;
1782    w5[3] = 0;
1783    w6[0] = 0;
1784    w6[1] = 0;
1785    w6[2] = 0;
1786    w6[3] = 0;
1787    w7[0] = 0;
1788    w7[1] = 0;
1789    w7[2] = 0;
1790    w7[3] = 0;
1791  }
1792  else
1793  {
1794    w0[0] = w[ 0];
1795    w0[1] = w[ 1];
1796    w0[2] = w[ 2];
1797    w0[3] = w[ 3];
1798    w1[0] = w[ 4];
1799    w1[1] = w[ 5];
1800    w1[2] = w[ 6];
1801    w1[3] = w[ 7];
1802    w2[0] = w[ 8];
1803    w2[1] = w[ 9];
1804    w2[2] = w[10];
1805    w2[3] = w[11];
1806    w3[0] = w[12];
1807    w3[1] = w[13];
1808    w3[2] = w[14];
1809    w3[3] = w[15];
1810    w4[0] = w[16];
1811    w4[1] = w[17];
1812    w4[2] = w[18];
1813    w4[3] = w[19];
1814    w5[0] = w[20];
1815    w5[1] = w[21];
1816    w5[2] = w[22];
1817    w5[3] = w[23];
1818    w6[0] = w[24];
1819    w6[1] = w[25];
1820    w6[2] = w[26];
1821    w6[3] = w[27];
1822    w7[0] = w[28];
1823    w7[1] = w[29];
1824    w7[2] = w[30];
1825    w7[3] = w[31];
1826  }
1827
1828  sha512_hmac_init_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7);
1829}
1830
1831DECLSPEC void sha512_hmac_init_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1832{
1833  u32 w0[4];
1834  u32 w1[4];
1835  u32 w2[4];
1836  u32 w3[4];
1837  u32 w4[4];
1838  u32 w5[4];
1839  u32 w6[4];
1840  u32 w7[4];
1841
1842  if (len > 128)
1843  {
1844    sha512_ctx_t tmp;
1845
1846    sha512_init (&tmp);
1847
1848    sha512_update_global_swap (&tmp, w, len);
1849
1850    sha512_final (&tmp);
1851
1852    w0[0] = h32_from_64_S (tmp.h[0]);
1853    w0[1] = l32_from_64_S (tmp.h[0]);
1854    w0[2] = h32_from_64_S (tmp.h[1]);
1855    w0[3] = l32_from_64_S (tmp.h[1]);
1856    w1[0] = h32_from_64_S (tmp.h[2]);
1857    w1[1] = l32_from_64_S (tmp.h[2]);
1858    w1[2] = h32_from_64_S (tmp.h[3]);
1859    w1[3] = l32_from_64_S (tmp.h[3]);
1860    w2[0] = h32_from_64_S (tmp.h[4]);
1861    w2[1] = l32_from_64_S (tmp.h[4]);
1862    w2[2] = h32_from_64_S (tmp.h[5]);
1863    w2[3] = l32_from_64_S (tmp.h[5]);
1864    w3[0] = h32_from_64_S (tmp.h[6]);
1865    w3[1] = l32_from_64_S (tmp.h[6]);
1866    w3[2] = h32_from_64_S (tmp.h[7]);
1867    w3[3] = l32_from_64_S (tmp.h[7]);
1868    w4[0] = 0;
1869    w4[1] = 0;
1870    w4[2] = 0;
1871    w4[3] = 0;
1872    w5[0] = 0;
1873    w5[1] = 0;
1874    w5[2] = 0;
1875    w5[3] = 0;
1876    w6[0] = 0;
1877    w6[1] = 0;
1878    w6[2] = 0;
1879    w6[3] = 0;
1880    w7[0] = 0;
1881    w7[1] = 0;
1882    w7[2] = 0;
1883    w7[3] = 0;
1884  }
1885  else
1886  {
1887    w0[0] = hc_swap32_S (w[ 0]);
1888    w0[1] = hc_swap32_S (w[ 1]);
1889    w0[2] = hc_swap32_S (w[ 2]);
1890    w0[3] = hc_swap32_S (w[ 3]);
1891    w1[0] = hc_swap32_S (w[ 4]);
1892    w1[1] = hc_swap32_S (w[ 5]);
1893    w1[2] = hc_swap32_S (w[ 6]);
1894    w1[3] = hc_swap32_S (w[ 7]);
1895    w2[0] = hc_swap32_S (w[ 8]);
1896    w2[1] = hc_swap32_S (w[ 9]);
1897    w2[2] = hc_swap32_S (w[10]);
1898    w2[3] = hc_swap32_S (w[11]);
1899    w3[0] = hc_swap32_S (w[12]);
1900    w3[1] = hc_swap32_S (w[13]);
1901    w3[2] = hc_swap32_S (w[14]);
1902    w3[3] = hc_swap32_S (w[15]);
1903    w4[0] = hc_swap32_S (w[16]);
1904    w4[1] = hc_swap32_S (w[17]);
1905    w4[2] = hc_swap32_S (w[18]);
1906    w4[3] = hc_swap32_S (w[19]);
1907    w5[0] = hc_swap32_S (w[20]);
1908    w5[1] = hc_swap32_S (w[21]);
1909    w5[2] = hc_swap32_S (w[22]);
1910    w5[3] = hc_swap32_S (w[23]);
1911    w6[0] = hc_swap32_S (w[24]);
1912    w6[1] = hc_swap32_S (w[25]);
1913    w6[2] = hc_swap32_S (w[26]);
1914    w6[3] = hc_swap32_S (w[27]);
1915    w7[0] = hc_swap32_S (w[28]);
1916    w7[1] = hc_swap32_S (w[29]);
1917    w7[2] = hc_swap32_S (w[30]);
1918    w7[3] = hc_swap32_S (w[31]);
1919  }
1920
1921  sha512_hmac_init_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7);
1922}
1923
1924DECLSPEC void sha512_hmac_init_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
1925{
1926  if (hc_enc_scan_global (w, len))
1927  {
1928    hc_enc_t hc_enc;
1929
1930    hc_enc_init (&hc_enc);
1931
1932    while (hc_enc_has_next (&hc_enc, len))
1933    {
1934      // forced full decode in one round
1935
1936      u32 enc_buf[256];
1937
1938      const int enc_len = hc_enc_next_global (&hc_enc, w, len, 256, enc_buf, sizeof (enc_buf));
1939
1940      if (enc_len > 128)
1941      {
1942        sha512_ctx_t tmp;
1943
1944        sha512_init (&tmp);
1945
1946        sha512_update_utf16le_swap (&tmp, enc_buf, enc_len);
1947
1948        sha512_final (&tmp);
1949
1950        enc_buf[ 0] = h32_from_64_S (tmp.h[0]);
1951        enc_buf[ 1] = l32_from_64_S (tmp.h[0]);
1952        enc_buf[ 2] = h32_from_64_S (tmp.h[1]);
1953        enc_buf[ 3] = l32_from_64_S (tmp.h[1]);
1954        enc_buf[ 4] = h32_from_64_S (tmp.h[2]);
1955        enc_buf[ 5] = l32_from_64_S (tmp.h[2]);
1956        enc_buf[ 6] = h32_from_64_S (tmp.h[3]);
1957        enc_buf[ 7] = l32_from_64_S (tmp.h[3]);
1958        enc_buf[ 8] = h32_from_64_S (tmp.h[4]);
1959        enc_buf[ 9] = l32_from_64_S (tmp.h[4]);
1960        enc_buf[10] = h32_from_64_S (tmp.h[5]);
1961        enc_buf[11] = l32_from_64_S (tmp.h[5]);
1962        enc_buf[12] = h32_from_64_S (tmp.h[6]);
1963        enc_buf[13] = l32_from_64_S (tmp.h[6]);
1964        enc_buf[14] = h32_from_64_S (tmp.h[7]);
1965        enc_buf[15] = l32_from_64_S (tmp.h[7]);
1966        enc_buf[16] = 0;
1967        enc_buf[17] = 0;
1968        enc_buf[18] = 0;
1969        enc_buf[19] = 0;
1970        enc_buf[20] = 0;
1971        enc_buf[21] = 0;
1972        enc_buf[22] = 0;
1973        enc_buf[23] = 0;
1974        enc_buf[24] = 0;
1975        enc_buf[25] = 0;
1976        enc_buf[26] = 0;
1977        enc_buf[27] = 0;
1978        enc_buf[28] = 0;
1979        enc_buf[29] = 0;
1980        enc_buf[30] = 0;
1981        enc_buf[31] = 0;
1982      }
1983      else
1984      {
1985        enc_buf[ 0] = hc_swap32_S (enc_buf[ 0]);
1986        enc_buf[ 1] = hc_swap32_S (enc_buf[ 1]);
1987        enc_buf[ 2] = hc_swap32_S (enc_buf[ 2]);
1988        enc_buf[ 3] = hc_swap32_S (enc_buf[ 3]);
1989        enc_buf[ 4] = hc_swap32_S (enc_buf[ 4]);
1990        enc_buf[ 5] = hc_swap32_S (enc_buf[ 5]);
1991        enc_buf[ 6] = hc_swap32_S (enc_buf[ 6]);
1992        enc_buf[ 7] = hc_swap32_S (enc_buf[ 7]);
1993        enc_buf[ 8] = hc_swap32_S (enc_buf[ 8]);
1994        enc_buf[ 9] = hc_swap32_S (enc_buf[ 9]);
1995        enc_buf[10] = hc_swap32_S (enc_buf[10]);
1996        enc_buf[11] = hc_swap32_S (enc_buf[11]);
1997        enc_buf[12] = hc_swap32_S (enc_buf[12]);
1998        enc_buf[13] = hc_swap32_S (enc_buf[13]);
1999        enc_buf[14] = hc_swap32_S (enc_buf[14]);
2000        enc_buf[15] = hc_swap32_S (enc_buf[15]);
2001        enc_buf[16] = hc_swap32_S (enc_buf[16]);
2002        enc_buf[17] = hc_swap32_S (enc_buf[17]);
2003        enc_buf[18] = hc_swap32_S (enc_buf[18]);
2004        enc_buf[19] = hc_swap32_S (enc_buf[19]);
2005        enc_buf[20] = hc_swap32_S (enc_buf[20]);
2006        enc_buf[21] = hc_swap32_S (enc_buf[21]);
2007        enc_buf[22] = hc_swap32_S (enc_buf[22]);
2008        enc_buf[23] = hc_swap32_S (enc_buf[23]);
2009        enc_buf[24] = hc_swap32_S (enc_buf[24]);
2010        enc_buf[25] = hc_swap32_S (enc_buf[25]);
2011        enc_buf[26] = hc_swap32_S (enc_buf[26]);
2012        enc_buf[27] = hc_swap32_S (enc_buf[27]);
2013        enc_buf[28] = hc_swap32_S (enc_buf[28]);
2014        enc_buf[29] = hc_swap32_S (enc_buf[29]);
2015        enc_buf[30] = hc_swap32_S (enc_buf[30]);
2016        enc_buf[31] = hc_swap32_S (enc_buf[31]);
2017      }
2018
2019      sha512_hmac_init_128 (ctx, enc_buf + 0, enc_buf + 4, enc_buf + 8, enc_buf + 12, enc_buf + 16, enc_buf + 20, enc_buf + 24, enc_buf + 28);
2020    }
2021
2022    return;
2023  }
2024
2025  u32 w0[4];
2026  u32 w1[4];
2027  u32 w2[4];
2028  u32 w3[4];
2029  u32 w4[4];
2030  u32 w5[4];
2031  u32 w6[4];
2032  u32 w7[4];
2033
2034  const int len_new = len * 2;
2035
2036  if (len_new > 128)
2037  {
2038    sha512_ctx_t tmp;
2039
2040    sha512_init (&tmp);
2041
2042    sha512_update_global_utf16le_swap (&tmp, w, len);
2043
2044    sha512_final (&tmp);
2045
2046    w0[0] = h32_from_64_S (tmp.h[0]);
2047    w0[1] = l32_from_64_S (tmp.h[0]);
2048    w0[2] = h32_from_64_S (tmp.h[1]);
2049    w0[3] = l32_from_64_S (tmp.h[1]);
2050    w1[0] = h32_from_64_S (tmp.h[2]);
2051    w1[1] = l32_from_64_S (tmp.h[2]);
2052    w1[2] = h32_from_64_S (tmp.h[3]);
2053    w1[3] = l32_from_64_S (tmp.h[3]);
2054    w2[0] = h32_from_64_S (tmp.h[4]);
2055    w2[1] = l32_from_64_S (tmp.h[4]);
2056    w2[2] = h32_from_64_S (tmp.h[5]);
2057    w2[3] = l32_from_64_S (tmp.h[5]);
2058    w3[0] = h32_from_64_S (tmp.h[6]);
2059    w3[1] = l32_from_64_S (tmp.h[6]);
2060    w3[2] = h32_from_64_S (tmp.h[7]);
2061    w3[3] = l32_from_64_S (tmp.h[7]);
2062    w4[0] = 0;
2063    w4[1] = 0;
2064    w4[2] = 0;
2065    w4[3] = 0;
2066    w5[0] = 0;
2067    w5[1] = 0;
2068    w5[2] = 0;
2069    w5[3] = 0;
2070    w6[0] = 0;
2071    w6[1] = 0;
2072    w6[2] = 0;
2073    w6[3] = 0;
2074    w7[0] = 0;
2075    w7[1] = 0;
2076    w7[2] = 0;
2077    w7[3] = 0;
2078  }
2079  else
2080  {
2081    w0[0] = w[ 0];
2082    w0[1] = w[ 1];
2083    w0[2] = w[ 2];
2084    w0[3] = w[ 3];
2085    w1[0] = w[ 4];
2086    w1[1] = w[ 5];
2087    w1[2] = w[ 6];
2088    w1[3] = w[ 7];
2089    w2[0] = w[ 8];
2090    w2[1] = w[ 9];
2091    w2[2] = w[10];
2092    w2[3] = w[11];
2093    w3[0] = w[12];
2094    w3[1] = w[13];
2095    w3[2] = w[14];
2096    w3[3] = w[15];
2097
2098    make_utf16le_S (w3, w6, w7);
2099    make_utf16le_S (w2, w4, w5);
2100    make_utf16le_S (w1, w2, w3);
2101    make_utf16le_S (w0, w0, w1);
2102
2103    w0[0] = hc_swap32_S (w0[0]);
2104    w0[1] = hc_swap32_S (w0[1]);
2105    w0[2] = hc_swap32_S (w0[2]);
2106    w0[3] = hc_swap32_S (w0[3]);
2107    w1[0] = hc_swap32_S (w1[0]);
2108    w1[1] = hc_swap32_S (w1[1]);
2109    w1[2] = hc_swap32_S (w1[2]);
2110    w1[3] = hc_swap32_S (w1[3]);
2111    w2[0] = hc_swap32_S (w2[0]);
2112    w2[1] = hc_swap32_S (w2[1]);
2113    w2[2] = hc_swap32_S (w2[2]);
2114    w2[3] = hc_swap32_S (w2[3]);
2115    w3[0] = hc_swap32_S (w3[0]);
2116    w3[1] = hc_swap32_S (w3[1]);
2117    w3[2] = hc_swap32_S (w3[2]);
2118    w3[3] = hc_swap32_S (w3[3]);
2119    w4[0] = hc_swap32_S (w4[0]);
2120    w4[1] = hc_swap32_S (w4[1]);
2121    w4[2] = hc_swap32_S (w4[2]);
2122    w4[3] = hc_swap32_S (w4[3]);
2123    w5[0] = hc_swap32_S (w5[0]);
2124    w5[1] = hc_swap32_S (w5[1]);
2125    w5[2] = hc_swap32_S (w5[2]);
2126    w5[3] = hc_swap32_S (w5[3]);
2127    w6[0] = hc_swap32_S (w6[0]);
2128    w6[1] = hc_swap32_S (w6[1]);
2129    w6[2] = hc_swap32_S (w6[2]);
2130    w6[3] = hc_swap32_S (w6[3]);
2131    w7[0] = hc_swap32_S (w7[0]);
2132    w7[1] = hc_swap32_S (w7[1]);
2133    w7[2] = hc_swap32_S (w7[2]);
2134    w7[3] = hc_swap32_S (w7[3]);
2135  }
2136
2137  sha512_hmac_init_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7);
2138}
2139
2140DECLSPEC void sha512_hmac_update_128 (sha512_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *w4, u32 *w5, u32 *w6, u32 *w7, const int len)
2141{
2142  sha512_update_128 (&ctx->ipad, w0, w1, w2, w3, w4, w5, w6, w7, len);
2143}
2144
2145DECLSPEC void sha512_hmac_update (sha512_hmac_ctx_t *ctx, const u32 *w, const int len)
2146{
2147  sha512_update (&ctx->ipad, w, len);
2148}
2149
2150DECLSPEC void sha512_hmac_update_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len)
2151{
2152  sha512_update_swap (&ctx->ipad, w, len);
2153}
2154
2155DECLSPEC void sha512_hmac_update_utf16le (sha512_hmac_ctx_t *ctx, const u32 *w, const int len)
2156{
2157  sha512_update_utf16le (&ctx->ipad, w, len);
2158}
2159
2160DECLSPEC void sha512_hmac_update_utf16le_swap (sha512_hmac_ctx_t *ctx, const u32 *w, const int len)
2161{
2162  sha512_update_utf16le_swap (&ctx->ipad, w, len);
2163}
2164
2165DECLSPEC void sha512_hmac_update_global (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
2166{
2167  sha512_update_global (&ctx->ipad, w, len);
2168}
2169
2170DECLSPEC void sha512_hmac_update_global_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
2171{
2172  sha512_update_global_swap (&ctx->ipad, w, len);
2173}
2174
2175DECLSPEC void sha512_hmac_update_global_utf16le (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
2176{
2177  sha512_update_global_utf16le (&ctx->ipad, w, len);
2178}
2179
2180DECLSPEC void sha512_hmac_update_global_utf16le_swap (sha512_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len)
2181{
2182  sha512_update_global_utf16le_swap (&ctx->ipad, w, len);
2183}
2184
2185DECLSPEC void sha512_hmac_final (sha512_hmac_ctx_t *ctx)
2186{
2187  sha512_final (&ctx->ipad);
2188
2189  ctx->opad.w0[0] = h32_from_64_S (ctx->ipad.h[0]);
2190  ctx->opad.w0[1] = l32_from_64_S (ctx->ipad.h[0]);
2191  ctx->opad.w0[2] = h32_from_64_S (ctx->ipad.h[1]);
2192  ctx->opad.w0[3] = l32_from_64_S (ctx->ipad.h[1]);
2193  ctx->opad.w1[0] = h32_from_64_S (ctx->ipad.h[2]);
2194  ctx->opad.w1[1] = l32_from_64_S (ctx->ipad.h[2]);
2195  ctx->opad.w1[2] = h32_from_64_S (ctx->ipad.h[3]);
2196  ctx->opad.w1[3] = l32_from_64_S (ctx->ipad.h[3]);
2197  ctx->opad.w2[0] = h32_from_64_S (ctx->ipad.h[4]);
2198  ctx->opad.w2[1] = l32_from_64_S (ctx->ipad.h[4]);
2199  ctx->opad.w2[2] = h32_from_64_S (ctx->ipad.h[5]);
2200  ctx->opad.w2[3] = l32_from_64_S (ctx->ipad.h[5]);
2201  ctx->opad.w3[0] = h32_from_64_S (ctx->ipad.h[6]);
2202  ctx->opad.w3[1] = l32_from_64_S (ctx->ipad.h[6]);
2203  ctx->opad.w3[2] = h32_from_64_S (ctx->ipad.h[7]);
2204  ctx->opad.w3[3] = l32_from_64_S (ctx->ipad.h[7]);
2205  ctx->opad.w4[0] = 0;
2206  ctx->opad.w4[1] = 0;
2207  ctx->opad.w4[2] = 0;
2208  ctx->opad.w4[3] = 0;
2209  ctx->opad.w5[0] = 0;
2210  ctx->opad.w5[1] = 0;
2211  ctx->opad.w5[2] = 0;
2212  ctx->opad.w5[3] = 0;
2213  ctx->opad.w6[0] = 0;
2214  ctx->opad.w6[1] = 0;
2215  ctx->opad.w6[2] = 0;
2216  ctx->opad.w6[3] = 0;
2217  ctx->opad.w7[0] = 0;
2218  ctx->opad.w7[1] = 0;
2219  ctx->opad.w7[2] = 0;
2220  ctx->opad.w7[3] = 0;
2221
2222  ctx->opad.len += 64;
2223
2224  sha512_final (&ctx->opad);
2225}
2226
2227// while input buf can be a vector datatype, the length of the different elements can not
2228
2229DECLSPEC void sha512_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, const u32x *w4, const u32x *w5, const u32x *w6, const u32x *w7, u64x *digest)
2230{
2231  u64x a = digest[0];
2232  u64x b = digest[1];
2233  u64x c = digest[2];
2234  u64x d = digest[3];
2235  u64x e = digest[4];
2236  u64x f = digest[5];
2237  u64x g = digest[6];
2238  u64x h = digest[7];
2239
2240  u64x w0_t = hl32_to_64 (w0[0], w0[1]);
2241  u64x w1_t = hl32_to_64 (w0[2], w0[3]);
2242  u64x w2_t = hl32_to_64 (w1[0], w1[1]);
2243  u64x w3_t = hl32_to_64 (w1[2], w1[3]);
2244  u64x w4_t = hl32_to_64 (w2[0], w2[1]);
2245  u64x w5_t = hl32_to_64 (w2[2], w2[3]);
2246  u64x w6_t = hl32_to_64 (w3[0], w3[1]);
2247  u64x w7_t = hl32_to_64 (w3[2], w3[3]);
2248  u64x w8_t = hl32_to_64 (w4[0], w4[1]);
2249  u64x w9_t = hl32_to_64 (w4[2], w4[3]);
2250  u64x wa_t = hl32_to_64 (w5[0], w5[1]);
2251  u64x wb_t = hl32_to_64 (w5[2], w5[3]);
2252  u64x wc_t = hl32_to_64 (w6[0], w6[1]);
2253  u64x wd_t = hl32_to_64 (w6[2], w6[3]);
2254  u64x we_t = hl32_to_64 (w7[0], w7[1]);
2255  u64x wf_t = hl32_to_64 (w7[2], w7[3]);
2256
2257  #define ROUND_EXPAND()                            \
2258  {                                                 \
2259    w0_t = SHA512_EXPAND (we_t, w9_t, w1_t, w0_t);  \
2260    w1_t = SHA512_EXPAND (wf_t, wa_t, w2_t, w1_t);  \
2261    w2_t = SHA512_EXPAND (w0_t, wb_t, w3_t, w2_t);  \
2262    w3_t = SHA512_EXPAND (w1_t, wc_t, w4_t, w3_t);  \
2263    w4_t = SHA512_EXPAND (w2_t, wd_t, w5_t, w4_t);  \
2264    w5_t = SHA512_EXPAND (w3_t, we_t, w6_t, w5_t);  \
2265    w6_t = SHA512_EXPAND (w4_t, wf_t, w7_t, w6_t);  \
2266    w7_t = SHA512_EXPAND (w5_t, w0_t, w8_t, w7_t);  \
2267    w8_t = SHA512_EXPAND (w6_t, w1_t, w9_t, w8_t);  \
2268    w9_t = SHA512_EXPAND (w7_t, w2_t, wa_t, w9_t);  \
2269    wa_t = SHA512_EXPAND (w8_t, w3_t, wb_t, wa_t);  \
2270    wb_t = SHA512_EXPAND (w9_t, w4_t, wc_t, wb_t);  \
2271    wc_t = SHA512_EXPAND (wa_t, w5_t, wd_t, wc_t);  \
2272    wd_t = SHA512_EXPAND (wb_t, w6_t, we_t, wd_t);  \
2273    we_t = SHA512_EXPAND (wc_t, w7_t, wf_t, we_t);  \
2274    wf_t = SHA512_EXPAND (wd_t, w8_t, w0_t, wf_t);  \
2275  }
2276
2277  #define ROUND_STEP(i)                                                                   \
2278  {                                                                                       \
2279    SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha512[i +  0]); \
2280    SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha512[i +  1]); \
2281    SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha512[i +  2]); \
2282    SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha512[i +  3]); \
2283    SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha512[i +  4]); \
2284    SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha512[i +  5]); \
2285    SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha512[i +  6]); \
2286    SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha512[i +  7]); \
2287    SHA512_STEP (SHA512_F0o, SHA512_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha512[i +  8]); \
2288    SHA512_STEP (SHA512_F0o, SHA512_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha512[i +  9]); \
2289    SHA512_STEP (SHA512_F0o, SHA512_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha512[i + 10]); \
2290    SHA512_STEP (SHA512_F0o, SHA512_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha512[i + 11]); \
2291    SHA512_STEP (SHA512_F0o, SHA512_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha512[i + 12]); \
2292    SHA512_STEP (SHA512_F0o, SHA512_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha512[i + 13]); \
2293    SHA512_STEP (SHA512_F0o, SHA512_F1o, c, d, e, f, g, h, a, b, we_t, k_sha512[i + 14]); \
2294    SHA512_STEP (SHA512_F0o, SHA512_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha512[i + 15]); \
2295  }
2296
2297  ROUND_STEP (0);
2298
2299  #ifdef _unroll
2300  #pragma unroll
2301  #endif
2302  for (int i = 16; i < 80; i += 16)
2303  {
2304    ROUND_EXPAND (); ROUND_STEP (i);
2305  }
2306
2307  #undef ROUND_EXPAND
2308  #undef ROUND_STEP
2309
2310  digest[0] += a;
2311  digest[1] += b;
2312  digest[2] += c;
2313  digest[3] += d;
2314  digest[4] += e;
2315  digest[5] += f;
2316  digest[6] += g;
2317  digest[7] += h;
2318}
2319
2320DECLSPEC void sha512_init_vector (sha512_ctx_vector_t *ctx)
2321{
2322  ctx->h[0] = SHA512M_A;
2323  ctx->h[1] = SHA512M_B;
2324  ctx->h[2] = SHA512M_C;
2325  ctx->h[3] = SHA512M_D;
2326  ctx->h[4] = SHA512M_E;
2327  ctx->h[5] = SHA512M_F;
2328  ctx->h[6] = SHA512M_G;
2329  ctx->h[7] = SHA512M_H;
2330
2331  ctx->w0[0] = 0;
2332  ctx->w0[1] = 0;
2333  ctx->w0[2] = 0;
2334  ctx->w0[3] = 0;
2335  ctx->w1[0] = 0;
2336  ctx->w1[1] = 0;
2337  ctx->w1[2] = 0;
2338  ctx->w1[3] = 0;
2339  ctx->w2[0] = 0;
2340  ctx->w2[1] = 0;
2341  ctx->w2[2] = 0;
2342  ctx->w2[3] = 0;
2343  ctx->w3[0] = 0;
2344  ctx->w3[1] = 0;
2345  ctx->w3[2] = 0;
2346  ctx->w3[3] = 0;
2347  ctx->w4[0] = 0;
2348  ctx->w4[1] = 0;
2349  ctx->w4[2] = 0;
2350  ctx->w4[3] = 0;
2351  ctx->w5[0] = 0;
2352  ctx->w5[1] = 0;
2353  ctx->w5[2] = 0;
2354  ctx->w5[3] = 0;
2355  ctx->w6[0] = 0;
2356  ctx->w6[1] = 0;
2357  ctx->w6[2] = 0;
2358  ctx->w6[3] = 0;
2359  ctx->w7[0] = 0;
2360  ctx->w7[1] = 0;
2361  ctx->w7[2] = 0;
2362  ctx->w7[3] = 0;
2363
2364  ctx->len = 0;
2365}
2366
2367DECLSPEC void sha512_init_vector_from_scalar (sha512_ctx_vector_t *ctx, sha512_ctx_t *ctx0)
2368{
2369  ctx->h[0] = ctx0->h[0];
2370  ctx->h[1] = ctx0->h[1];
2371  ctx->h[2] = ctx0->h[2];
2372  ctx->h[3] = ctx0->h[3];
2373  ctx->h[4] = ctx0->h[4];
2374  ctx->h[5] = ctx0->h[5];
2375  ctx->h[6] = ctx0->h[6];
2376  ctx->h[7] = ctx0->h[7];
2377
2378  ctx->w0[0] = ctx0->w0[0];
2379  ctx->w0[1] = ctx0->w0[1];
2380  ctx->w0[2] = ctx0->w0[2];
2381  ctx->w0[3] = ctx0->w0[3];
2382  ctx->w1[0] = ctx0->w1[0];
2383  ctx->w1[1] = ctx0->w1[1];
2384  ctx->w1[2] = ctx0->w1[2];
2385  ctx->w1[3] = ctx0->w1[3];
2386  ctx->w2[0] = ctx0->w2[0];
2387  ctx->w2[1] = ctx0->w2[1];
2388  ctx->w2[2] = ctx0->w2[2];
2389  ctx->w2[3] = ctx0->w2[3];
2390  ctx->w3[0] = ctx0->w3[0];
2391  ctx->w3[1] = ctx0->w3[1];
2392  ctx->w3[2] = ctx0->w3[2];
2393  ctx->w3[3] = ctx0->w3[3];
2394  ctx->w4[0] = ctx0->w4[0];
2395  ctx->w4[1] = ctx0->w4[1];
2396  ctx->w4[2] = ctx0->w4[2];
2397  ctx->w4[3] = ctx0->w4[3];
2398  ctx->w5[0] = ctx0->w5[0];
2399  ctx->w5[1] = ctx0->w5[1];
2400  ctx->w5[2] = ctx0->w5[2];
2401  ctx->w5[3] = ctx0->w5[3];
2402  ctx->w6[0] = ctx0->w6[0];
2403  ctx->w6[1] = ctx0->w6[1];
2404  ctx->w6[2] = ctx0->w6[2];
2405  ctx->w6[3] = ctx0->w6[3];
2406  ctx->w7[0] = ctx0->w7[0];
2407  ctx->w7[1] = ctx0->w7[1];
2408  ctx->w7[2] = ctx0->w7[2];
2409  ctx->w7[3] = ctx0->w7[3];
2410
2411  ctx->len = ctx0->len;
2412}
2413
2414DECLSPEC void sha512_update_vector_128 (sha512_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len)
2415{
2416  if (len == 0) return;
2417
2418  const int pos = ctx->len & 127;
2419
2420  ctx->len += len;
2421
2422  if (pos == 0)
2423  {
2424    ctx->w0[0] = w0[0];
2425    ctx->w0[1] = w0[1];
2426    ctx->w0[2] = w0[2];
2427    ctx->w0[3] = w0[3];
2428    ctx->w1[0] = w1[0];
2429    ctx->w1[1] = w1[1];
2430    ctx->w1[2] = w1[2];
2431    ctx->w1[3] = w1[3];
2432    ctx->w2[0] = w2[0];
2433    ctx->w2[1] = w2[1];
2434    ctx->w2[2] = w2[2];
2435    ctx->w2[3] = w2[3];
2436    ctx->w3[0] = w3[0];
2437    ctx->w3[1] = w3[1];
2438    ctx->w3[2] = w3[2];
2439    ctx->w3[3] = w3[3];
2440    ctx->w4[0] = w4[0];
2441    ctx->w4[1] = w4[1];
2442    ctx->w4[2] = w4[2];
2443    ctx->w4[3] = w4[3];
2444    ctx->w5[0] = w5[0];
2445    ctx->w5[1] = w5[1];
2446    ctx->w5[2] = w5[2];
2447    ctx->w5[3] = w5[3];
2448    ctx->w6[0] = w6[0];
2449    ctx->w6[1] = w6[1];
2450    ctx->w6[2] = w6[2];
2451    ctx->w6[3] = w6[3];
2452    ctx->w7[0] = w7[0];
2453    ctx->w7[1] = w7[1];
2454    ctx->w7[2] = w7[2];
2455    ctx->w7[3] = w7[3];
2456
2457    if (len == 128)
2458    {
2459      sha512_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
2460
2461      ctx->w0[0] = 0;
2462      ctx->w0[1] = 0;
2463      ctx->w0[2] = 0;
2464      ctx->w0[3] = 0;
2465      ctx->w1[0] = 0;
2466      ctx->w1[1] = 0;
2467      ctx->w1[2] = 0;
2468      ctx->w1[3] = 0;
2469      ctx->w2[0] = 0;
2470      ctx->w2[1] = 0;
2471      ctx->w2[2] = 0;
2472      ctx->w2[3] = 0;
2473      ctx->w3[0] = 0;
2474      ctx->w3[1] = 0;
2475      ctx->w3[2] = 0;
2476      ctx->w3[3] = 0;
2477      ctx->w4[0] = 0;
2478      ctx->w4[1] = 0;
2479      ctx->w4[2] = 0;
2480      ctx->w4[3] = 0;
2481      ctx->w5[0] = 0;
2482      ctx->w5[1] = 0;
2483      ctx->w5[2] = 0;
2484      ctx->w5[3] = 0;
2485      ctx->w6[0] = 0;
2486      ctx->w6[1] = 0;
2487      ctx->w6[2] = 0;
2488      ctx->w6[3] = 0;
2489      ctx->w7[0] = 0;
2490      ctx->w7[1] = 0;
2491      ctx->w7[2] = 0;
2492      ctx->w7[3] = 0;
2493    }
2494  }
2495  else
2496  {
2497    if ((pos + len) < 128)
2498    {
2499      switch_buffer_by_offset_8x4_be (w0, w1, w2, w3, w4, w5, w6, w7, pos);
2500
2501      ctx->w0[0] |= w0[0];
2502      ctx->w0[1] |= w0[1];
2503      ctx->w0[2] |= w0[2];
2504      ctx->w0[3] |= w0[3];
2505      ctx->w1[0] |= w1[0];
2506      ctx->w1[1] |= w1[1];
2507      ctx->w1[2] |= w1[2];
2508      ctx->w1[3] |= w1[3];
2509      ctx->w2[0] |= w2[0];
2510      ctx->w2[1] |= w2[1];
2511      ctx->w2[2] |= w2[2];
2512      ctx->w2[3] |= w2[3];
2513      ctx->w3[0] |= w3[0];
2514      ctx->w3[1] |= w3[1];
2515      ctx->w3[2] |= w3[2];
2516      ctx->w3[3] |= w3[3];
2517      ctx->w4[0] |= w4[0];
2518      ctx->w4[1] |= w4[1];
2519      ctx->w4[2] |= w4[2];
2520      ctx->w4[3] |= w4[3];
2521      ctx->w5[0] |= w5[0];
2522      ctx->w5[1] |= w5[1];
2523      ctx->w5[2] |= w5[2];
2524      ctx->w5[3] |= w5[3];
2525      ctx->w6[0] |= w6[0];
2526      ctx->w6[1] |= w6[1];
2527      ctx->w6[2] |= w6[2];
2528      ctx->w6[3] |= w6[3];
2529      ctx->w7[0] |= w7[0];
2530      ctx->w7[1] |= w7[1];
2531      ctx->w7[2] |= w7[2];
2532      ctx->w7[3] |= w7[3];
2533    }
2534    else
2535    {
2536      u32x c0[4] = { 0 };
2537      u32x c1[4] = { 0 };
2538      u32x c2[4] = { 0 };
2539      u32x c3[4] = { 0 };
2540      u32x c4[4] = { 0 };
2541      u32x c5[4] = { 0 };
2542      u32x c6[4] = { 0 };
2543      u32x c7[4] = { 0 };
2544
2545      switch_buffer_by_offset_8x4_carry_be (w0, w1, w2, w3, w4, w5, w6, w7, c0, c1, c2, c3, c4, c5, c6, c7, pos);
2546
2547      ctx->w0[0] |= w0[0];
2548      ctx->w0[1] |= w0[1];
2549      ctx->w0[2] |= w0[2];
2550      ctx->w0[3] |= w0[3];
2551      ctx->w1[0] |= w1[0];
2552      ctx->w1[1] |= w1[1];
2553      ctx->w1[2] |= w1[2];
2554      ctx->w1[3] |= w1[3];
2555      ctx->w2[0] |= w2[0];
2556      ctx->w2[1] |= w2[1];
2557      ctx->w2[2] |= w2[2];
2558      ctx->w2[3] |= w2[3];
2559      ctx->w3[0] |= w3[0];
2560      ctx->w3[1] |= w3[1];
2561      ctx->w3[2] |= w3[2];
2562      ctx->w3[3] |= w3[3];
2563      ctx->w4[0] |= w4[0];
2564      ctx->w4[1] |= w4[1];
2565      ctx->w4[2] |= w4[2];
2566      ctx->w4[3] |= w4[3];
2567      ctx->w5[0] |= w5[0];
2568      ctx->w5[1] |= w5[1];
2569      ctx->w5[2] |= w5[2];
2570      ctx->w5[3] |= w5[3];
2571      ctx->w6[0] |= w6[0];
2572      ctx->w6[1] |= w6[1];
2573      ctx->w6[2] |= w6[2];
2574      ctx->w6[3] |= w6[3];
2575      ctx->w7[0] |= w7[0];
2576      ctx->w7[1] |= w7[1];
2577      ctx->w7[2] |= w7[2];
2578      ctx->w7[3] |= w7[3];
2579
2580      sha512_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
2581
2582      ctx->w0[0] = c0[0];
2583      ctx->w0[1] = c0[1];
2584      ctx->w0[2] = c0[2];
2585      ctx->w0[3] = c0[3];
2586      ctx->w1[0] = c1[0];
2587      ctx->w1[1] = c1[1];
2588      ctx->w1[2] = c1[2];
2589      ctx->w1[3] = c1[3];
2590      ctx->w2[0] = c2[0];
2591      ctx->w2[1] = c2[1];
2592      ctx->w2[2] = c2[2];
2593      ctx->w2[3] = c2[3];
2594      ctx->w3[0] = c3[0];
2595      ctx->w3[1] = c3[1];
2596      ctx->w3[2] = c3[2];
2597      ctx->w3[3] = c3[3];
2598      ctx->w4[0] = c4[0];
2599      ctx->w4[1] = c4[1];
2600      ctx->w4[2] = c4[2];
2601      ctx->w4[3] = c4[3];
2602      ctx->w5[0] = c5[0];
2603      ctx->w5[1] = c5[1];
2604      ctx->w5[2] = c5[2];
2605      ctx->w5[3] = c5[3];
2606      ctx->w6[0] = c6[0];
2607      ctx->w6[1] = c6[1];
2608      ctx->w6[2] = c6[2];
2609      ctx->w6[3] = c6[3];
2610      ctx->w7[0] = c7[0];
2611      ctx->w7[1] = c7[1];
2612      ctx->w7[2] = c7[2];
2613      ctx->w7[3] = c7[3];
2614    }
2615  }
2616}
2617
2618DECLSPEC void sha512_update_vector (sha512_ctx_vector_t *ctx, const u32x *w, const int len)
2619{
2620  u32x w0[4];
2621  u32x w1[4];
2622  u32x w2[4];
2623  u32x w3[4];
2624  u32x w4[4];
2625  u32x w5[4];
2626  u32x w6[4];
2627  u32x w7[4];
2628
2629  int pos1;
2630  int pos4;
2631
2632  for (pos1 = 0, pos4 = 0; pos1 < len - 128; pos1 += 128, pos4 += 32)
2633  {
2634    w0[0] = w[pos4 +  0];
2635    w0[1] = w[pos4 +  1];
2636    w0[2] = w[pos4 +  2];
2637    w0[3] = w[pos4 +  3];
2638    w1[0] = w[pos4 +  4];
2639    w1[1] = w[pos4 +  5];
2640    w1[2] = w[pos4 +  6];
2641    w1[3] = w[pos4 +  7];
2642    w2[0] = w[pos4 +  8];
2643    w2[1] = w[pos4 +  9];
2644    w2[2] = w[pos4 + 10];
2645    w2[3] = w[pos4 + 11];
2646    w3[0] = w[pos4 + 12];
2647    w3[1] = w[pos4 + 13];
2648    w3[2] = w[pos4 + 14];
2649    w3[3] = w[pos4 + 15];
2650    w4[0] = w[pos4 + 16];
2651    w4[1] = w[pos4 + 17];
2652    w4[2] = w[pos4 + 18];
2653    w4[3] = w[pos4 + 19];
2654    w5[0] = w[pos4 + 20];
2655    w5[1] = w[pos4 + 21];
2656    w5[2] = w[pos4 + 22];
2657    w5[3] = w[pos4 + 23];
2658    w6[0] = w[pos4 + 24];
2659    w6[1] = w[pos4 + 25];
2660    w6[2] = w[pos4 + 26];
2661    w6[3] = w[pos4 + 27];
2662    w7[0] = w[pos4 + 28];
2663    w7[1] = w[pos4 + 29];
2664    w7[2] = w[pos4 + 30];
2665    w7[3] = w[pos4 + 31];
2666
2667    sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
2668  }
2669
2670  w0[0] = w[pos4 +  0];
2671  w0[1] = w[pos4 +  1];
2672  w0[2] = w[pos4 +  2];
2673  w0[3] = w[pos4 +  3];
2674  w1[0] = w[pos4 +  4];
2675  w1[1] = w[pos4 +  5];
2676  w1[2] = w[pos4 +  6];
2677  w1[3] = w[pos4 +  7];
2678  w2[0] = w[pos4 +  8];
2679  w2[1] = w[pos4 +  9];
2680  w2[2] = w[pos4 + 10];
2681  w2[3] = w[pos4 + 11];
2682  w3[0] = w[pos4 + 12];
2683  w3[1] = w[pos4 + 13];
2684  w3[2] = w[pos4 + 14];
2685  w3[3] = w[pos4 + 15];
2686  w4[0] = w[pos4 + 16];
2687  w4[1] = w[pos4 + 17];
2688  w4[2] = w[pos4 + 18];
2689  w4[3] = w[pos4 + 19];
2690  w5[0] = w[pos4 + 20];
2691  w5[1] = w[pos4 + 21];
2692  w5[2] = w[pos4 + 22];
2693  w5[3] = w[pos4 + 23];
2694  w6[0] = w[pos4 + 24];
2695  w6[1] = w[pos4 + 25];
2696  w6[2] = w[pos4 + 26];
2697  w6[3] = w[pos4 + 27];
2698  w7[0] = w[pos4 + 28];
2699  w7[1] = w[pos4 + 29];
2700  w7[2] = w[pos4 + 30];
2701  w7[3] = w[pos4 + 31];
2702
2703  sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - pos1);
2704}
2705
2706DECLSPEC void sha512_update_vector_swap (sha512_ctx_vector_t *ctx, const u32x *w, const int len)
2707{
2708  u32x w0[4];
2709  u32x w1[4];
2710  u32x w2[4];
2711  u32x w3[4];
2712  u32x w4[4];
2713  u32x w5[4];
2714  u32x w6[4];
2715  u32x w7[4];
2716
2717  int pos1;
2718  int pos4;
2719
2720  for (pos1 = 0, pos4 = 0; pos1 < len - 128; pos1 += 128, pos4 += 32)
2721  {
2722    w0[0] = w[pos4 +  0];
2723    w0[1] = w[pos4 +  1];
2724    w0[2] = w[pos4 +  2];
2725    w0[3] = w[pos4 +  3];
2726    w1[0] = w[pos4 +  4];
2727    w1[1] = w[pos4 +  5];
2728    w1[2] = w[pos4 +  6];
2729    w1[3] = w[pos4 +  7];
2730    w2[0] = w[pos4 +  8];
2731    w2[1] = w[pos4 +  9];
2732    w2[2] = w[pos4 + 10];
2733    w2[3] = w[pos4 + 11];
2734    w3[0] = w[pos4 + 12];
2735    w3[1] = w[pos4 + 13];
2736    w3[2] = w[pos4 + 14];
2737    w3[3] = w[pos4 + 15];
2738    w4[0] = w[pos4 + 16];
2739    w4[1] = w[pos4 + 17];
2740    w4[2] = w[pos4 + 18];
2741    w4[3] = w[pos4 + 19];
2742    w5[0] = w[pos4 + 20];
2743    w5[1] = w[pos4 + 21];
2744    w5[2] = w[pos4 + 22];
2745    w5[3] = w[pos4 + 23];
2746    w6[0] = w[pos4 + 24];
2747    w6[1] = w[pos4 + 25];
2748    w6[2] = w[pos4 + 26];
2749    w6[3] = w[pos4 + 27];
2750    w7[0] = w[pos4 + 28];
2751    w7[1] = w[pos4 + 29];
2752    w7[2] = w[pos4 + 30];
2753    w7[3] = w[pos4 + 31];
2754
2755    w0[0] = hc_swap32 (w0[0]);
2756    w0[1] = hc_swap32 (w0[1]);
2757    w0[2] = hc_swap32 (w0[2]);
2758    w0[3] = hc_swap32 (w0[3]);
2759    w1[0] = hc_swap32 (w1[0]);
2760    w1[1] = hc_swap32 (w1[1]);
2761    w1[2] = hc_swap32 (w1[2]);
2762    w1[3] = hc_swap32 (w1[3]);
2763    w2[0] = hc_swap32 (w2[0]);
2764    w2[1] = hc_swap32 (w2[1]);
2765    w2[2] = hc_swap32 (w2[2]);
2766    w2[3] = hc_swap32 (w2[3]);
2767    w3[0] = hc_swap32 (w3[0]);
2768    w3[1] = hc_swap32 (w3[1]);
2769    w3[2] = hc_swap32 (w3[2]);
2770    w3[3] = hc_swap32 (w3[3]);
2771    w4[0] = hc_swap32 (w4[0]);
2772    w4[1] = hc_swap32 (w4[1]);
2773    w4[2] = hc_swap32 (w4[2]);
2774    w4[3] = hc_swap32 (w4[3]);
2775    w5[0] = hc_swap32 (w5[0]);
2776    w5[1] = hc_swap32 (w5[1]);
2777    w5[2] = hc_swap32 (w5[2]);
2778    w5[3] = hc_swap32 (w5[3]);
2779    w6[0] = hc_swap32 (w6[0]);
2780    w6[1] = hc_swap32 (w6[1]);
2781    w6[2] = hc_swap32 (w6[2]);
2782    w6[3] = hc_swap32 (w6[3]);
2783    w7[0] = hc_swap32 (w7[0]);
2784    w7[1] = hc_swap32 (w7[1]);
2785    w7[2] = hc_swap32 (w7[2]);
2786    w7[3] = hc_swap32 (w7[3]);
2787
2788    sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 128);
2789  }
2790
2791  w0[0] = w[pos4 +  0];
2792  w0[1] = w[pos4 +  1];
2793  w0[2] = w[pos4 +  2];
2794  w0[3] = w[pos4 +  3];
2795  w1[0] = w[pos4 +  4];
2796  w1[1] = w[pos4 +  5];
2797  w1[2] = w[pos4 +  6];
2798  w1[3] = w[pos4 +  7];
2799  w2[0] = w[pos4 +  8];
2800  w2[1] = w[pos4 +  9];
2801  w2[2] = w[pos4 + 10];
2802  w2[3] = w[pos4 + 11];
2803  w3[0] = w[pos4 + 12];
2804  w3[1] = w[pos4 + 13];
2805  w3[2] = w[pos4 + 14];
2806  w3[3] = w[pos4 + 15];
2807  w4[0] = w[pos4 + 16];
2808  w4[1] = w[pos4 + 17];
2809  w4[2] = w[pos4 + 18];
2810  w4[3] = w[pos4 + 19];
2811  w5[0] = w[pos4 + 20];
2812  w5[1] = w[pos4 + 21];
2813  w5[2] = w[pos4 + 22];
2814  w5[3] = w[pos4 + 23];
2815  w6[0] = w[pos4 + 24];
2816  w6[1] = w[pos4 + 25];
2817  w6[2] = w[pos4 + 26];
2818  w6[3] = w[pos4 + 27];
2819  w7[0] = w[pos4 + 28];
2820  w7[1] = w[pos4 + 29];
2821  w7[2] = w[pos4 + 30];
2822  w7[3] = w[pos4 + 31];
2823
2824  w0[0] = hc_swap32 (w0[0]);
2825  w0[1] = hc_swap32 (w0[1]);
2826  w0[2] = hc_swap32 (w0[2]);
2827  w0[3] = hc_swap32 (w0[3]);
2828  w1[0] = hc_swap32 (w1[0]);
2829  w1[1] = hc_swap32 (w1[1]);
2830  w1[2] = hc_swap32 (w1[2]);
2831  w1[3] = hc_swap32 (w1[3]);
2832  w2[0] = hc_swap32 (w2[0]);
2833  w2[1] = hc_swap32 (w2[1]);
2834  w2[2] = hc_swap32 (w2[2]);
2835  w2[3] = hc_swap32 (w2[3]);
2836  w3[0] = hc_swap32 (w3[0]);
2837  w3[1] = hc_swap32 (w3[1]);
2838  w3[2] = hc_swap32 (w3[2]);
2839  w3[3] = hc_swap32 (w3[3]);
2840  w4[0] = hc_swap32 (w4[0]);
2841  w4[1] = hc_swap32 (w4[1]);
2842  w4[2] = hc_swap32 (w4[2]);
2843  w4[3] = hc_swap32 (w4[3]);
2844  w5[0] = hc_swap32 (w5[0]);
2845  w5[1] = hc_swap32 (w5[1]);
2846  w5[2] = hc_swap32 (w5[2]);
2847  w5[3] = hc_swap32 (w5[3]);
2848  w6[0] = hc_swap32 (w6[0]);
2849  w6[1] = hc_swap32 (w6[1]);
2850  w6[2] = hc_swap32 (w6[2]);
2851  w6[3] = hc_swap32 (w6[3]);
2852  w7[0] = hc_swap32 (w7[0]);
2853  w7[1] = hc_swap32 (w7[1]);
2854  w7[2] = hc_swap32 (w7[2]);
2855  w7[3] = hc_swap32 (w7[3]);
2856
2857  sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, len - pos1);
2858}
2859
2860DECLSPEC void sha512_update_vector_utf16le (sha512_ctx_vector_t *ctx, const u32x *w, const int len)
2861{
2862  u32x w0[4];
2863  u32x w1[4];
2864  u32x w2[4];
2865  u32x w3[4];
2866  u32x w4[4];
2867  u32x w5[4];
2868  u32x w6[4];
2869  u32x w7[4];
2870
2871  int pos1;
2872  int pos4;
2873
2874  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
2875  {
2876    w0[0] = w[pos4 +  0];
2877    w0[1] = w[pos4 +  1];
2878    w0[2] = w[pos4 +  2];
2879    w0[3] = w[pos4 +  3];
2880    w1[0] = w[pos4 +  4];
2881    w1[1] = w[pos4 +  5];
2882    w1[2] = w[pos4 +  6];
2883    w1[3] = w[pos4 +  7];
2884    w2[0] = w[pos4 +  8];
2885    w2[1] = w[pos4 +  9];
2886    w2[2] = w[pos4 + 10];
2887    w2[3] = w[pos4 + 11];
2888    w3[0] = w[pos4 + 12];
2889    w3[1] = w[pos4 + 13];
2890    w3[2] = w[pos4 + 14];
2891    w3[3] = w[pos4 + 15];
2892
2893    make_utf16le (w3, w6, w7);
2894    make_utf16le (w2, w4, w5);
2895    make_utf16le (w1, w2, w3);
2896    make_utf16le (w0, w0, w1);
2897
2898    sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2);
2899  }
2900
2901  w0[0] = w[pos4 +  0];
2902  w0[1] = w[pos4 +  1];
2903  w0[2] = w[pos4 +  2];
2904  w0[3] = w[pos4 +  3];
2905  w1[0] = w[pos4 +  4];
2906  w1[1] = w[pos4 +  5];
2907  w1[2] = w[pos4 +  6];
2908  w1[3] = w[pos4 +  7];
2909  w2[0] = w[pos4 +  8];
2910  w2[1] = w[pos4 +  9];
2911  w2[2] = w[pos4 + 10];
2912  w2[3] = w[pos4 + 11];
2913  w3[0] = w[pos4 + 12];
2914  w3[1] = w[pos4 + 13];
2915  w3[2] = w[pos4 + 14];
2916  w3[3] = w[pos4 + 15];
2917
2918  make_utf16le (w3, w6, w7);
2919  make_utf16le (w2, w4, w5);
2920  make_utf16le (w1, w2, w3);
2921  make_utf16le (w0, w0, w1);
2922
2923  sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2);
2924}
2925
2926DECLSPEC void sha512_update_vector_utf16le_swap (sha512_ctx_vector_t *ctx, const u32x *w, const int len)
2927{
2928  u32x w0[4];
2929  u32x w1[4];
2930  u32x w2[4];
2931  u32x w3[4];
2932  u32x w4[4];
2933  u32x w5[4];
2934  u32x w6[4];
2935  u32x w7[4];
2936
2937  int pos1;
2938  int pos4;
2939
2940  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
2941  {
2942    w0[0] = w[pos4 +  0];
2943    w0[1] = w[pos4 +  1];
2944    w0[2] = w[pos4 +  2];
2945    w0[3] = w[pos4 +  3];
2946    w1[0] = w[pos4 +  4];
2947    w1[1] = w[pos4 +  5];
2948    w1[2] = w[pos4 +  6];
2949    w1[3] = w[pos4 +  7];
2950    w2[0] = w[pos4 +  8];
2951    w2[1] = w[pos4 +  9];
2952    w2[2] = w[pos4 + 10];
2953    w2[3] = w[pos4 + 11];
2954    w3[0] = w[pos4 + 12];
2955    w3[1] = w[pos4 + 13];
2956    w3[2] = w[pos4 + 14];
2957    w3[3] = w[pos4 + 15];
2958
2959    make_utf16le (w3, w6, w7);
2960    make_utf16le (w2, w4, w5);
2961    make_utf16le (w1, w2, w3);
2962    make_utf16le (w0, w0, w1);
2963
2964    w0[0] = hc_swap32 (w0[0]);
2965    w0[1] = hc_swap32 (w0[1]);
2966    w0[2] = hc_swap32 (w0[2]);
2967    w0[3] = hc_swap32 (w0[3]);
2968    w1[0] = hc_swap32 (w1[0]);
2969    w1[1] = hc_swap32 (w1[1]);
2970    w1[2] = hc_swap32 (w1[2]);
2971    w1[3] = hc_swap32 (w1[3]);
2972    w2[0] = hc_swap32 (w2[0]);
2973    w2[1] = hc_swap32 (w2[1]);
2974    w2[2] = hc_swap32 (w2[2]);
2975    w2[3] = hc_swap32 (w2[3]);
2976    w3[0] = hc_swap32 (w3[0]);
2977    w3[1] = hc_swap32 (w3[1]);
2978    w3[2] = hc_swap32 (w3[2]);
2979    w3[3] = hc_swap32 (w3[3]);
2980    w4[0] = hc_swap32 (w4[0]);
2981    w4[1] = hc_swap32 (w4[1]);
2982    w4[2] = hc_swap32 (w4[2]);
2983    w4[3] = hc_swap32 (w4[3]);
2984    w5[0] = hc_swap32 (w5[0]);
2985    w5[1] = hc_swap32 (w5[1]);
2986    w5[2] = hc_swap32 (w5[2]);
2987    w5[3] = hc_swap32 (w5[3]);
2988    w6[0] = hc_swap32 (w6[0]);
2989    w6[1] = hc_swap32 (w6[1]);
2990    w6[2] = hc_swap32 (w6[2]);
2991    w6[3] = hc_swap32 (w6[3]);
2992    w7[0] = hc_swap32 (w7[0]);
2993    w7[1] = hc_swap32 (w7[1]);
2994    w7[2] = hc_swap32 (w7[2]);
2995    w7[3] = hc_swap32 (w7[3]);
2996
2997    sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2);
2998  }
2999
3000  w0[0] = w[pos4 +  0];
3001  w0[1] = w[pos4 +  1];
3002  w0[2] = w[pos4 +  2];
3003  w0[3] = w[pos4 +  3];
3004  w1[0] = w[pos4 +  4];
3005  w1[1] = w[pos4 +  5];
3006  w1[2] = w[pos4 +  6];
3007  w1[3] = w[pos4 +  7];
3008  w2[0] = w[pos4 +  8];
3009  w2[1] = w[pos4 +  9];
3010  w2[2] = w[pos4 + 10];
3011  w2[3] = w[pos4 + 11];
3012  w3[0] = w[pos4 + 12];
3013  w3[1] = w[pos4 + 13];
3014  w3[2] = w[pos4 + 14];
3015  w3[3] = w[pos4 + 15];
3016
3017  make_utf16le (w3, w6, w7);
3018  make_utf16le (w2, w4, w5);
3019  make_utf16le (w1, w2, w3);
3020  make_utf16le (w0, w0, w1);
3021
3022  w0[0] = hc_swap32 (w0[0]);
3023  w0[1] = hc_swap32 (w0[1]);
3024  w0[2] = hc_swap32 (w0[2]);
3025  w0[3] = hc_swap32 (w0[3]);
3026  w1[0] = hc_swap32 (w1[0]);
3027  w1[1] = hc_swap32 (w1[1]);
3028  w1[2] = hc_swap32 (w1[2]);
3029  w1[3] = hc_swap32 (w1[3]);
3030  w2[0] = hc_swap32 (w2[0]);
3031  w2[1] = hc_swap32 (w2[1]);
3032  w2[2] = hc_swap32 (w2[2]);
3033  w2[3] = hc_swap32 (w2[3]);
3034  w3[0] = hc_swap32 (w3[0]);
3035  w3[1] = hc_swap32 (w3[1]);
3036  w3[2] = hc_swap32 (w3[2]);
3037  w3[3] = hc_swap32 (w3[3]);
3038  w4[0] = hc_swap32 (w4[0]);
3039  w4[1] = hc_swap32 (w4[1]);
3040  w4[2] = hc_swap32 (w4[2]);
3041  w4[3] = hc_swap32 (w4[3]);
3042  w5[0] = hc_swap32 (w5[0]);
3043  w5[1] = hc_swap32 (w5[1]);
3044  w5[2] = hc_swap32 (w5[2]);
3045  w5[3] = hc_swap32 (w5[3]);
3046  w6[0] = hc_swap32 (w6[0]);
3047  w6[1] = hc_swap32 (w6[1]);
3048  w6[2] = hc_swap32 (w6[2]);
3049  w6[3] = hc_swap32 (w6[3]);
3050  w7[0] = hc_swap32 (w7[0]);
3051  w7[1] = hc_swap32 (w7[1]);
3052  w7[2] = hc_swap32 (w7[2]);
3053  w7[3] = hc_swap32 (w7[3]);
3054
3055  sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2);
3056}
3057
3058DECLSPEC void sha512_update_vector_utf16beN (sha512_ctx_vector_t *ctx, const u32x *w, const int len)
3059{
3060  u32x w0[4];
3061  u32x w1[4];
3062  u32x w2[4];
3063  u32x w3[4];
3064  u32x w4[4];
3065  u32x w5[4];
3066  u32x w6[4];
3067  u32x w7[4];
3068
3069  int pos1;
3070  int pos4;
3071
3072  for (pos1 = 0, pos4 = 0; pos1 < len - 64; pos1 += 64, pos4 += 16)
3073  {
3074    w0[0] = w[pos4 +  0];
3075    w0[1] = w[pos4 +  1];
3076    w0[2] = w[pos4 +  2];
3077    w0[3] = w[pos4 +  3];
3078    w1[0] = w[pos4 +  4];
3079    w1[1] = w[pos4 +  5];
3080    w1[2] = w[pos4 +  6];
3081    w1[3] = w[pos4 +  7];
3082    w2[0] = w[pos4 +  8];
3083    w2[1] = w[pos4 +  9];
3084    w2[2] = w[pos4 + 10];
3085    w2[3] = w[pos4 + 11];
3086    w3[0] = w[pos4 + 12];
3087    w3[1] = w[pos4 + 13];
3088    w3[2] = w[pos4 + 14];
3089    w3[3] = w[pos4 + 15];
3090
3091    make_utf16beN (w3, w6, w7);
3092    make_utf16beN (w2, w4, w5);
3093    make_utf16beN (w1, w2, w3);
3094    make_utf16beN (w0, w0, w1);
3095
3096    sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, 64 * 2);
3097  }
3098
3099  w0[0] = w[pos4 +  0];
3100  w0[1] = w[pos4 +  1];
3101  w0[2] = w[pos4 +  2];
3102  w0[3] = w[pos4 +  3];
3103  w1[0] = w[pos4 +  4];
3104  w1[1] = w[pos4 +  5];
3105  w1[2] = w[pos4 +  6];
3106  w1[3] = w[pos4 +  7];
3107  w2[0] = w[pos4 +  8];
3108  w2[1] = w[pos4 +  9];
3109  w2[2] = w[pos4 + 10];
3110  w2[3] = w[pos4 + 11];
3111  w3[0] = w[pos4 + 12];
3112  w3[1] = w[pos4 + 13];
3113  w3[2] = w[pos4 + 14];
3114  w3[3] = w[pos4 + 15];
3115
3116  make_utf16beN (w3, w6, w7);
3117  make_utf16beN (w2, w4, w5);
3118  make_utf16beN (w1, w2, w3);
3119  make_utf16beN (w0, w0, w1);
3120
3121  sha512_update_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7, (len - pos1) * 2);
3122}
3123
3124DECLSPEC void sha512_final_vector (sha512_ctx_vector_t *ctx)
3125{
3126  const int pos = ctx->len & 127;
3127
3128  append_0x80_8x4 (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, pos ^ 3);
3129
3130  if (pos >= 112)
3131  {
3132    sha512_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
3133
3134    ctx->w0[0] = 0;
3135    ctx->w0[1] = 0;
3136    ctx->w0[2] = 0;
3137    ctx->w0[3] = 0;
3138    ctx->w1[0] = 0;
3139    ctx->w1[1] = 0;
3140    ctx->w1[2] = 0;
3141    ctx->w1[3] = 0;
3142    ctx->w2[0] = 0;
3143    ctx->w2[1] = 0;
3144    ctx->w2[2] = 0;
3145    ctx->w2[3] = 0;
3146    ctx->w3[0] = 0;
3147    ctx->w3[1] = 0;
3148    ctx->w3[2] = 0;
3149    ctx->w3[3] = 0;
3150    ctx->w4[0] = 0;
3151    ctx->w4[1] = 0;
3152    ctx->w4[2] = 0;
3153    ctx->w4[3] = 0;
3154    ctx->w5[0] = 0;
3155    ctx->w5[1] = 0;
3156    ctx->w5[2] = 0;
3157    ctx->w5[3] = 0;
3158    ctx->w6[0] = 0;
3159    ctx->w6[1] = 0;
3160    ctx->w6[2] = 0;
3161    ctx->w6[3] = 0;
3162    ctx->w7[0] = 0;
3163    ctx->w7[1] = 0;
3164    ctx->w7[2] = 0;
3165    ctx->w7[3] = 0;
3166  }
3167
3168  ctx->w7[2] = 0;
3169  ctx->w7[3] = ctx->len * 8;
3170
3171  sha512_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->w4, ctx->w5, ctx->w6, ctx->w7, ctx->h);
3172}
3173
3174// HMAC + Vector
3175
3176DECLSPEC void sha512_hmac_init_vector_128 (sha512_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, const u32x *w4, const u32x *w5, const u32x *w6, const u32x *w7)
3177{
3178  u32x a0[4];
3179  u32x a1[4];
3180  u32x a2[4];
3181  u32x a3[4];
3182  u32x a4[4];
3183  u32x a5[4];
3184  u32x a6[4];
3185  u32x a7[4];
3186
3187  // ipad
3188
3189  a0[0] = w0[0] ^ 0x36363636;
3190  a0[1] = w0[1] ^ 0x36363636;
3191  a0[2] = w0[2] ^ 0x36363636;
3192  a0[3] = w0[3] ^ 0x36363636;
3193  a1[0] = w1[0] ^ 0x36363636;
3194  a1[1] = w1[1] ^ 0x36363636;
3195  a1[2] = w1[2] ^ 0x36363636;
3196  a1[3] = w1[3] ^ 0x36363636;
3197  a2[0] = w2[0] ^ 0x36363636;
3198  a2[1] = w2[1] ^ 0x36363636;
3199  a2[2] = w2[2] ^ 0x36363636;
3200  a2[3] = w2[3] ^ 0x36363636;
3201  a3[0] = w3[0] ^ 0x36363636;
3202  a3[1] = w3[1] ^ 0x36363636;
3203  a3[2] = w3[2] ^ 0x36363636;
3204  a3[3] = w3[3] ^ 0x36363636;
3205  a4[0] = w4[0] ^ 0x36363636;
3206  a4[1] = w4[1] ^ 0x36363636;
3207  a4[2] = w4[2] ^ 0x36363636;
3208  a4[3] = w4[3] ^ 0x36363636;
3209  a5[0] = w5[0] ^ 0x36363636;
3210  a5[1] = w5[1] ^ 0x36363636;
3211  a5[2] = w5[2] ^ 0x36363636;
3212  a5[3] = w5[3] ^ 0x36363636;
3213  a6[0] = w6[0] ^ 0x36363636;
3214  a6[1] = w6[1] ^ 0x36363636;
3215  a6[2] = w6[2] ^ 0x36363636;
3216  a6[3] = w6[3] ^ 0x36363636;
3217  a7[0] = w7[0] ^ 0x36363636;
3218  a7[1] = w7[1] ^ 0x36363636;
3219  a7[2] = w7[2] ^ 0x36363636;
3220  a7[3] = w7[3] ^ 0x36363636;
3221
3222  sha512_init_vector (&ctx->ipad);
3223
3224  sha512_update_vector_128 (&ctx->ipad, a0, a1, a2, a3, a4, a5, a6, a7, 128);
3225
3226  // opad
3227
3228  u32x b0[4];
3229  u32x b1[4];
3230  u32x b2[4];
3231  u32x b3[4];
3232  u32x b4[4];
3233  u32x b5[4];
3234  u32x b6[4];
3235  u32x b7[4];
3236
3237  b0[0] = w0[0] ^ 0x5c5c5c5c;
3238  b0[1] = w0[1] ^ 0x5c5c5c5c;
3239  b0[2] = w0[2] ^ 0x5c5c5c5c;
3240  b0[3] = w0[3] ^ 0x5c5c5c5c;
3241  b1[0] = w1[0] ^ 0x5c5c5c5c;
3242  b1[1] = w1[1] ^ 0x5c5c5c5c;
3243  b1[2] = w1[2] ^ 0x5c5c5c5c;
3244  b1[3] = w1[3] ^ 0x5c5c5c5c;
3245  b2[0] = w2[0] ^ 0x5c5c5c5c;
3246  b2[1] = w2[1] ^ 0x5c5c5c5c;
3247  b2[2] = w2[2] ^ 0x5c5c5c5c;
3248  b2[3] = w2[3] ^ 0x5c5c5c5c;
3249  b3[0] = w3[0] ^ 0x5c5c5c5c;
3250  b3[1] = w3[1] ^ 0x5c5c5c5c;
3251  b3[2] = w3[2] ^ 0x5c5c5c5c;
3252  b3[3] = w3[3] ^ 0x5c5c5c5c;
3253  b4[0] = w4[0] ^ 0x5c5c5c5c;
3254  b4[1] = w4[1] ^ 0x5c5c5c5c;
3255  b4[2] = w4[2] ^ 0x5c5c5c5c;
3256  b4[3] = w4[3] ^ 0x5c5c5c5c;
3257  b5[0] = w5[0] ^ 0x5c5c5c5c;
3258  b5[1] = w5[1] ^ 0x5c5c5c5c;
3259  b5[2] = w5[2] ^ 0x5c5c5c5c;
3260  b5[3] = w5[3] ^ 0x5c5c5c5c;
3261  b6[0] = w6[0] ^ 0x5c5c5c5c;
3262  b6[1] = w6[1] ^ 0x5c5c5c5c;
3263  b6[2] = w6[2] ^ 0x5c5c5c5c;
3264  b6[3] = w6[3] ^ 0x5c5c5c5c;
3265  b7[0] = w7[0] ^ 0x5c5c5c5c;
3266  b7[1] = w7[1] ^ 0x5c5c5c5c;
3267  b7[2] = w7[2] ^ 0x5c5c5c5c;
3268  b7[3] = w7[3] ^ 0x5c5c5c5c;
3269
3270  sha512_init_vector (&ctx->opad);
3271
3272  sha512_update_vector_128 (&ctx->opad, b0, b1, b2, b3, b4, b5, b6, b7, 128);
3273}
3274
3275DECLSPEC void sha512_hmac_init_vector (sha512_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
3276{
3277  u32x w0[4];
3278  u32x w1[4];
3279  u32x w2[4];
3280  u32x w3[4];
3281  u32x w4[4];
3282  u32x w5[4];
3283  u32x w6[4];
3284  u32x w7[4];
3285
3286  if (len > 128)
3287  {
3288    sha512_ctx_vector_t tmp;
3289
3290    sha512_init_vector (&tmp);
3291
3292    sha512_update_vector (&tmp, w, len);
3293
3294    sha512_final_vector (&tmp);
3295
3296    w0[0] = h32_from_64 (tmp.h[0]);
3297    w0[1] = l32_from_64 (tmp.h[0]);
3298    w0[2] = h32_from_64 (tmp.h[1]);
3299    w0[3] = l32_from_64 (tmp.h[1]);
3300    w1[0] = h32_from_64 (tmp.h[2]);
3301    w1[1] = l32_from_64 (tmp.h[2]);
3302    w1[2] = h32_from_64 (tmp.h[3]);
3303    w1[3] = l32_from_64 (tmp.h[3]);
3304    w2[0] = h32_from_64 (tmp.h[4]);
3305    w2[1] = l32_from_64 (tmp.h[4]);
3306    w2[2] = h32_from_64 (tmp.h[5]);
3307    w2[3] = l32_from_64 (tmp.h[5]);
3308    w3[0] = h32_from_64 (tmp.h[6]);
3309    w3[1] = l32_from_64 (tmp.h[6]);
3310    w3[2] = h32_from_64 (tmp.h[7]);
3311    w3[3] = l32_from_64 (tmp.h[7]);
3312    w4[0] = 0;
3313    w4[1] = 0;
3314    w4[2] = 0;
3315    w4[3] = 0;
3316    w5[0] = 0;
3317    w5[1] = 0;
3318    w5[2] = 0;
3319    w5[3] = 0;
3320    w6[0] = 0;
3321    w6[1] = 0;
3322    w6[2] = 0;
3323    w6[3] = 0;
3324    w7[0] = 0;
3325    w7[1] = 0;
3326    w7[2] = 0;
3327    w7[3] = 0;
3328  }
3329  else
3330  {
3331    w0[0] = w[ 0];
3332    w0[1] = w[ 1];
3333    w0[2] = w[ 2];
3334    w0[3] = w[ 3];
3335    w1[0] = w[ 4];
3336    w1[1] = w[ 5];
3337    w1[2] = w[ 6];
3338    w1[3] = w[ 7];
3339    w2[0] = w[ 8];
3340    w2[1] = w[ 9];
3341    w2[2] = w[10];
3342    w2[3] = w[11];
3343    w3[0] = w[12];
3344    w3[1] = w[13];
3345    w3[2] = w[14];
3346    w3[3] = w[15];
3347    w4[0] = w[16];
3348    w4[1] = w[17];
3349    w4[2] = w[18];
3350    w4[3] = w[19];
3351    w5[0] = w[20];
3352    w5[1] = w[21];
3353    w5[2] = w[22];
3354    w5[3] = w[23];
3355    w6[0] = w[24];
3356    w6[1] = w[25];
3357    w6[2] = w[26];
3358    w6[3] = w[27];
3359    w7[0] = w[28];
3360    w7[1] = w[29];
3361    w7[2] = w[30];
3362    w7[3] = w[31];
3363  }
3364
3365  sha512_hmac_init_vector_128 (ctx, w0, w1, w2, w3, w4, w5, w6, w7);
3366}
3367
3368DECLSPEC void sha512_hmac_update_vector_128 (sha512_hmac_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *w4, u32x *w5, u32x *w6, u32x *w7, const int len)
3369{
3370  sha512_update_vector_128 (&ctx->ipad, w0, w1, w2, w3, w4, w5, w6, w7, len);
3371}
3372
3373DECLSPEC void sha512_hmac_update_vector (sha512_hmac_ctx_vector_t *ctx, const u32x *w, const int len)
3374{
3375  sha512_update_vector (&ctx->ipad, w, len);
3376}
3377
3378DECLSPEC void sha512_hmac_final_vector (sha512_hmac_ctx_vector_t *ctx)
3379{
3380  sha512_final_vector (&ctx->ipad);
3381
3382  ctx->opad.w0[0] = h32_from_64 (ctx->ipad.h[0]);
3383  ctx->opad.w0[1] = l32_from_64 (ctx->ipad.h[0]);
3384  ctx->opad.w0[2] = h32_from_64 (ctx->ipad.h[1]);
3385  ctx->opad.w0[3] = l32_from_64 (ctx->ipad.h[1]);
3386  ctx->opad.w1[0] = h32_from_64 (ctx->ipad.h[2]);
3387  ctx->opad.w1[1] = l32_from_64 (ctx->ipad.h[2]);
3388  ctx->opad.w1[2] = h32_from_64 (ctx->ipad.h[3]);
3389  ctx->opad.w1[3] = l32_from_64 (ctx->ipad.h[3]);
3390  ctx->opad.w2[0] = h32_from_64 (ctx->ipad.h[4]);
3391  ctx->opad.w2[1] = l32_from_64 (ctx->ipad.h[4]);
3392  ctx->opad.w2[2] = h32_from_64 (ctx->ipad.h[5]);
3393  ctx->opad.w2[3] = l32_from_64 (ctx->ipad.h[5]);
3394  ctx->opad.w3[0] = h32_from_64 (ctx->ipad.h[6]);
3395  ctx->opad.w3[1] = l32_from_64 (ctx->ipad.h[6]);
3396  ctx->opad.w3[2] = h32_from_64 (ctx->ipad.h[7]);
3397  ctx->opad.w3[3] = l32_from_64 (ctx->ipad.h[7]);
3398  ctx->opad.w4[0] = 0;
3399  ctx->opad.w4[1] = 0;
3400  ctx->opad.w4[2] = 0;
3401  ctx->opad.w4[3] = 0;
3402  ctx->opad.w5[0] = 0;
3403  ctx->opad.w5[1] = 0;
3404  ctx->opad.w5[2] = 0;
3405  ctx->opad.w5[3] = 0;
3406  ctx->opad.w6[0] = 0;
3407  ctx->opad.w6[1] = 0;
3408  ctx->opad.w6[2] = 0;
3409  ctx->opad.w6[3] = 0;
3410  ctx->opad.w7[0] = 0;
3411  ctx->opad.w7[1] = 0;
3412  ctx->opad.w7[2] = 0;
3413  ctx->opad.w7[3] = 0;
3414
3415  ctx->opad.len += 64;
3416
3417  sha512_final_vector (&ctx->opad);
3418}
3419