1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3 * Copyright (c) 2004 The OpenSSL Project. All rights reserved
4 * according to the OpenSSL license [found in ../../LICENSE].
5 * ====================================================================
6 */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*-
10 * IMPLEMENTATION NOTES.
11 *
12 * As you might have noticed 32-bit hash algorithms:
13 *
14 * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15 * - optimized versions implement two transform functions: one operating
16 * on [aligned] data in host byte order and one - on data in input
17 * stream byte order;
18 * - share common byte-order neutral collector and padding function
19 * implementations, ../md32_common.h;
20 *
21 * Neither of the above applies to this SHA-512 implementations. Reasons
22 * [in reverse order] are:
23 *
24 * - it's the only 64-bit hash algorithm for the moment of this writing,
25 * there is no need for common collector/padding implementation [yet];
26 * - by supporting only one transform function [which operates on
27 * *aligned* data in input stream byte order, big-endian in this case]
28 * we minimize burden of maintenance in two ways: a) collector/padding
29 * function is simpler; b) only one transform function to stare at;
30 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31 * apply a number of optimizations to mitigate potential performance
32 * penalties caused by previous design decision;
33 *
34 * Caveat lector.
35 *
36 * Implementation relies on the fact that "long long" is 64-bit on
37 * both 32- and 64-bit platforms. If some compiler vendor comes up
38 * with 128-bit long long, adjustment to sha.h would be required.
39 * As this implementation relies on 64-bit integer type, it's totally
40 * inappropriate for platforms which don't support it, most notably
41 * 16-bit platforms.
42 * <appro@fy.chalmers.se>
43 */
44 # include <stdlib.h>
45 # include <string.h>
46
47 # include <openssl/crypto.h>
48 # include <openssl/sha.h>
49 # include <openssl/opensslv.h>
50
51 # include "cryptlib.h"
52
53 const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT;
54
55 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57 defined(__s390__) || defined(__s390x__) || \
58 defined(SHA512_ASM)
59 # define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
60 # endif
61
fips_md_init_ctx(SHA384,SHA512)62 fips_md_init_ctx(SHA384, SHA512)
63 {
64 c->h[0] = U64(0xcbbb9d5dc1059ed8);
65 c->h[1] = U64(0x629a292a367cd507);
66 c->h[2] = U64(0x9159015a3070dd17);
67 c->h[3] = U64(0x152fecd8f70e5939);
68 c->h[4] = U64(0x67332667ffc00b31);
69 c->h[5] = U64(0x8eb44a8768581511);
70 c->h[6] = U64(0xdb0c2e0d64f98fa7);
71 c->h[7] = U64(0x47b5481dbefa4fa4);
72
73 c->Nl = 0;
74 c->Nh = 0;
75 c->num = 0;
76 c->md_len = SHA384_DIGEST_LENGTH;
77 return 1;
78 }
79
fips_md_init(SHA512)80 fips_md_init(SHA512)
81 {
82 c->h[0] = U64(0x6a09e667f3bcc908);
83 c->h[1] = U64(0xbb67ae8584caa73b);
84 c->h[2] = U64(0x3c6ef372fe94f82b);
85 c->h[3] = U64(0xa54ff53a5f1d36f1);
86 c->h[4] = U64(0x510e527fade682d1);
87 c->h[5] = U64(0x9b05688c2b3e6c1f);
88 c->h[6] = U64(0x1f83d9abfb41bd6b);
89 c->h[7] = U64(0x5be0cd19137e2179);
90
91 c->Nl = 0;
92 c->Nh = 0;
93 c->num = 0;
94 c->md_len = SHA512_DIGEST_LENGTH;
95 return 1;
96 }
97
98 # ifndef SHA512_ASM
99 static
100 # endif
101 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
102
SHA512_Final(unsigned char * md,SHA512_CTX * c)103 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
104 {
105 unsigned char *p = (unsigned char *)c->u.p;
106 size_t n = c->num;
107
108 p[n] = 0x80; /* There always is a room for one */
109 n++;
110 if (n > (sizeof(c->u) - 16))
111 memset(p + n, 0, sizeof(c->u) - n), n = 0,
112 sha512_block_data_order(c, p, 1);
113
114 memset(p + n, 0, sizeof(c->u) - 16 - n);
115 # ifdef B_ENDIAN
116 c->u.d[SHA_LBLOCK - 2] = c->Nh;
117 c->u.d[SHA_LBLOCK - 1] = c->Nl;
118 # else
119 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
120 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
121 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
122 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
123 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
124 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
125 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
126 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
127 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
128 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
129 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
130 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
131 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
132 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
133 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
134 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
135 # endif
136
137 sha512_block_data_order(c, p, 1);
138
139 if (md == 0)
140 return 0;
141
142 switch (c->md_len) {
143 /* Let compiler decide if it's appropriate to unroll... */
144 case SHA384_DIGEST_LENGTH:
145 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
146 SHA_LONG64 t = c->h[n];
147
148 *(md++) = (unsigned char)(t >> 56);
149 *(md++) = (unsigned char)(t >> 48);
150 *(md++) = (unsigned char)(t >> 40);
151 *(md++) = (unsigned char)(t >> 32);
152 *(md++) = (unsigned char)(t >> 24);
153 *(md++) = (unsigned char)(t >> 16);
154 *(md++) = (unsigned char)(t >> 8);
155 *(md++) = (unsigned char)(t);
156 }
157 break;
158 case SHA512_DIGEST_LENGTH:
159 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
160 SHA_LONG64 t = c->h[n];
161
162 *(md++) = (unsigned char)(t >> 56);
163 *(md++) = (unsigned char)(t >> 48);
164 *(md++) = (unsigned char)(t >> 40);
165 *(md++) = (unsigned char)(t >> 32);
166 *(md++) = (unsigned char)(t >> 24);
167 *(md++) = (unsigned char)(t >> 16);
168 *(md++) = (unsigned char)(t >> 8);
169 *(md++) = (unsigned char)(t);
170 }
171 break;
172 /* ... as well as make sure md_len is not abused. */
173 default:
174 return 0;
175 }
176
177 return 1;
178 }
179
SHA384_Final(unsigned char * md,SHA512_CTX * c)180 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
181 {
182 return SHA512_Final(md, c);
183 }
184
SHA512_Update(SHA512_CTX * c,const void * _data,size_t len)185 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
186 {
187 SHA_LONG64 l;
188 unsigned char *p = c->u.p;
189 const unsigned char *data = (const unsigned char *)_data;
190
191 if (len == 0)
192 return 1;
193
194 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
195 if (l < c->Nl)
196 c->Nh++;
197 if (sizeof(len) >= 8)
198 c->Nh += (((SHA_LONG64) len) >> 61);
199 c->Nl = l;
200
201 if (c->num != 0) {
202 size_t n = sizeof(c->u) - c->num;
203
204 if (len < n) {
205 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
206 return 1;
207 } else {
208 memcpy(p + c->num, data, n), c->num = 0;
209 len -= n, data += n;
210 sha512_block_data_order(c, p, 1);
211 }
212 }
213
214 if (len >= sizeof(c->u)) {
215 # ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
216 if ((size_t)data % sizeof(c->u.d[0]) != 0)
217 while (len >= sizeof(c->u))
218 memcpy(p, data, sizeof(c->u)),
219 sha512_block_data_order(c, p, 1),
220 len -= sizeof(c->u), data += sizeof(c->u);
221 else
222 # endif
223 sha512_block_data_order(c, data, len / sizeof(c->u)),
224 data += len, len %= sizeof(c->u), data -= len;
225 }
226
227 if (len != 0)
228 memcpy(p, data, len), c->num = (int)len;
229
230 return 1;
231 }
232
SHA384_Update(SHA512_CTX * c,const void * data,size_t len)233 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
234 {
235 return SHA512_Update(c, data, len);
236 }
237
SHA512_Transform(SHA512_CTX * c,const unsigned char * data)238 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
239 {
240 # ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
241 if ((size_t)data % sizeof(c->u.d[0]) != 0)
242 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
243 # endif
244 sha512_block_data_order(c, data, 1);
245 }
246
SHA384(const unsigned char * d,size_t n,unsigned char * md)247 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
248 {
249 SHA512_CTX c;
250 static unsigned char m[SHA384_DIGEST_LENGTH];
251
252 if (md == NULL)
253 md = m;
254 SHA384_Init(&c);
255 SHA512_Update(&c, d, n);
256 SHA512_Final(md, &c);
257 OPENSSL_cleanse(&c, sizeof(c));
258 return (md);
259 }
260
SHA512(const unsigned char * d,size_t n,unsigned char * md)261 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
262 {
263 SHA512_CTX c;
264 static unsigned char m[SHA512_DIGEST_LENGTH];
265
266 if (md == NULL)
267 md = m;
268 SHA512_Init(&c);
269 SHA512_Update(&c, d, n);
270 SHA512_Final(md, &c);
271 OPENSSL_cleanse(&c, sizeof(c));
272 return (md);
273 }
274
275 # ifndef SHA512_ASM
276 static const SHA_LONG64 K512[80] = {
277 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
278 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
279 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
280 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
281 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
282 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
283 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
284 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
285 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
286 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
287 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
288 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
289 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
290 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
291 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
292 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
293 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
294 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
295 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
296 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
297 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
298 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
299 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
300 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
301 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
302 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
303 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
304 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
305 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
306 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
307 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
308 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
309 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
310 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
311 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
312 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
313 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
314 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
315 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
316 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
317 };
318
319 # ifndef PEDANTIC
320 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
321 # if defined(__x86_64) || defined(__x86_64__)
322 # define ROTR(a,n) ({ SHA_LONG64 ret; \
323 asm ("rorq %1,%0" \
324 : "=r"(ret) \
325 : "J"(n),"0"(a) \
326 : "cc"); ret; })
327 # if !defined(B_ENDIAN)
328 # define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
329 asm ("bswapq %0" \
330 : "=r"(ret) \
331 : "0"(ret)); ret; })
332 # endif
333 # elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
334 # if defined(I386_ONLY)
335 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
336 unsigned int hi=p[0],lo=p[1]; \
337 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
338 "roll $16,%%eax; roll $16,%%edx; "\
339 "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
340 : "=a"(lo),"=d"(hi) \
341 : "0"(lo),"1"(hi) : "cc"); \
342 ((SHA_LONG64)hi)<<32|lo; })
343 # else
344 # define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
345 unsigned int hi=p[0],lo=p[1]; \
346 asm ("bswapl %0; bswapl %1;" \
347 : "=r"(lo),"=r"(hi) \
348 : "0"(lo),"1"(hi)); \
349 ((SHA_LONG64)hi)<<32|lo; })
350 # endif
351 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
352 # define ROTR(a,n) ({ SHA_LONG64 ret; \
353 asm ("rotrdi %0,%1,%2" \
354 : "=r"(ret) \
355 : "r"(a),"K"(n)); ret; })
356 # endif
357 # elif defined(_MSC_VER)
358 # if defined(_WIN64) /* applies to both IA-64 and AMD64 */
359 # pragma intrinsic(_rotr64)
360 # define ROTR(a,n) _rotr64((a),n)
361 # endif
362 # if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
363 # if defined(I386_ONLY)
__pull64be(const void * x)364 static SHA_LONG64 __fastcall __pull64be(const void *x)
365 {
366 _asm mov edx,[ecx + 0]
367 _asm mov eax,[ecx + 4]
368 _asm xchg dh, dl
369 _asm xchg ah, al
370 _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
371 # else
__pull64be(const void * x)372 static SHA_LONG64 __fastcall __pull64be(const void *x)
373 {
374 _asm mov edx,[ecx + 0]
375 _asm mov eax,[ecx + 4]
376 _asm bswap edx _asm bswap eax}
377 # endif
378 # define PULL64(x) __pull64be(&(x))
379 # if _MSC_VER<=1200
380 # pragma inline_depth(0)
381 # endif
382 # endif
383 # endif
384 # endif
385 # ifndef PULL64
386 # define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
387 # define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
388 # endif
389 # ifndef ROTR
390 # define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
391 # endif
392 # define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
393 # define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
394 # define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
395 # define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
396 # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
397 # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
398 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
399 /*
400 * This code should give better results on 32-bit CPU with less than
401 * ~24 registers, both size and performance wise...
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)402 */ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
403 size_t num)
404 {
405 const SHA_LONG64 *W = in;
406 SHA_LONG64 A, E, T;
407 SHA_LONG64 X[9 + 80], *F;
408 int i;
409
410 while (num--) {
411
412 F = X + 80;
413 A = ctx->h[0];
414 F[1] = ctx->h[1];
415 F[2] = ctx->h[2];
416 F[3] = ctx->h[3];
417 E = ctx->h[4];
418 F[5] = ctx->h[5];
419 F[6] = ctx->h[6];
420 F[7] = ctx->h[7];
421
422 for (i = 0; i < 16; i++, F--) {
423 # ifdef B_ENDIAN
424 T = W[i];
425 # else
426 T = PULL64(W[i]);
427 # endif
428 F[0] = A;
429 F[4] = E;
430 F[8] = T;
431 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
432 E = F[3] + T;
433 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
434 }
435
436 for (; i < 80; i++, F--) {
437 T = sigma0(F[8 + 16 - 1]);
438 T += sigma1(F[8 + 16 - 14]);
439 T += F[8 + 16] + F[8 + 16 - 9];
440
441 F[0] = A;
442 F[4] = E;
443 F[8] = T;
444 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
445 E = F[3] + T;
446 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
447 }
448
449 ctx->h[0] += A;
450 ctx->h[1] += F[1];
451 ctx->h[2] += F[2];
452 ctx->h[3] += F[3];
453 ctx->h[4] += E;
454 ctx->h[5] += F[5];
455 ctx->h[6] += F[6];
456 ctx->h[7] += F[7];
457
458 W += SHA_LBLOCK;
459 }
460 }
461
462 # elif defined(OPENSSL_SMALL_FOOTPRINT)
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)463 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
464 size_t num)
465 {
466 const SHA_LONG64 *W = in;
467 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
468 SHA_LONG64 X[16];
469 int i;
470
471 while (num--) {
472
473 a = ctx->h[0];
474 b = ctx->h[1];
475 c = ctx->h[2];
476 d = ctx->h[3];
477 e = ctx->h[4];
478 f = ctx->h[5];
479 g = ctx->h[6];
480 h = ctx->h[7];
481
482 for (i = 0; i < 16; i++) {
483 # ifdef B_ENDIAN
484 T1 = X[i] = W[i];
485 # else
486 T1 = X[i] = PULL64(W[i]);
487 # endif
488 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
489 T2 = Sigma0(a) + Maj(a, b, c);
490 h = g;
491 g = f;
492 f = e;
493 e = d + T1;
494 d = c;
495 c = b;
496 b = a;
497 a = T1 + T2;
498 }
499
500 for (; i < 80; i++) {
501 s0 = X[(i + 1) & 0x0f];
502 s0 = sigma0(s0);
503 s1 = X[(i + 14) & 0x0f];
504 s1 = sigma1(s1);
505
506 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
507 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
508 T2 = Sigma0(a) + Maj(a, b, c);
509 h = g;
510 g = f;
511 f = e;
512 e = d + T1;
513 d = c;
514 c = b;
515 b = a;
516 a = T1 + T2;
517 }
518
519 ctx->h[0] += a;
520 ctx->h[1] += b;
521 ctx->h[2] += c;
522 ctx->h[3] += d;
523 ctx->h[4] += e;
524 ctx->h[5] += f;
525 ctx->h[6] += g;
526 ctx->h[7] += h;
527
528 W += SHA_LBLOCK;
529 }
530 }
531
532 # else
533 # define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
534 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
535 h = Sigma0(a) + Maj(a,b,c); \
536 d += T1; h += T1; } while (0)
537 # define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
538 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
539 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
540 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
541 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)542 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
543 size_t num)
544 {
545 const SHA_LONG64 *W = in;
546 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
547 SHA_LONG64 X[16];
548 int i;
549
550 while (num--) {
551
552 a = ctx->h[0];
553 b = ctx->h[1];
554 c = ctx->h[2];
555 d = ctx->h[3];
556 e = ctx->h[4];
557 f = ctx->h[5];
558 g = ctx->h[6];
559 h = ctx->h[7];
560
561 # ifdef B_ENDIAN
562 T1 = X[0] = W[0];
563 ROUND_00_15(0, a, b, c, d, e, f, g, h);
564 T1 = X[1] = W[1];
565 ROUND_00_15(1, h, a, b, c, d, e, f, g);
566 T1 = X[2] = W[2];
567 ROUND_00_15(2, g, h, a, b, c, d, e, f);
568 T1 = X[3] = W[3];
569 ROUND_00_15(3, f, g, h, a, b, c, d, e);
570 T1 = X[4] = W[4];
571 ROUND_00_15(4, e, f, g, h, a, b, c, d);
572 T1 = X[5] = W[5];
573 ROUND_00_15(5, d, e, f, g, h, a, b, c);
574 T1 = X[6] = W[6];
575 ROUND_00_15(6, c, d, e, f, g, h, a, b);
576 T1 = X[7] = W[7];
577 ROUND_00_15(7, b, c, d, e, f, g, h, a);
578 T1 = X[8] = W[8];
579 ROUND_00_15(8, a, b, c, d, e, f, g, h);
580 T1 = X[9] = W[9];
581 ROUND_00_15(9, h, a, b, c, d, e, f, g);
582 T1 = X[10] = W[10];
583 ROUND_00_15(10, g, h, a, b, c, d, e, f);
584 T1 = X[11] = W[11];
585 ROUND_00_15(11, f, g, h, a, b, c, d, e);
586 T1 = X[12] = W[12];
587 ROUND_00_15(12, e, f, g, h, a, b, c, d);
588 T1 = X[13] = W[13];
589 ROUND_00_15(13, d, e, f, g, h, a, b, c);
590 T1 = X[14] = W[14];
591 ROUND_00_15(14, c, d, e, f, g, h, a, b);
592 T1 = X[15] = W[15];
593 ROUND_00_15(15, b, c, d, e, f, g, h, a);
594 # else
595 T1 = X[0] = PULL64(W[0]);
596 ROUND_00_15(0, a, b, c, d, e, f, g, h);
597 T1 = X[1] = PULL64(W[1]);
598 ROUND_00_15(1, h, a, b, c, d, e, f, g);
599 T1 = X[2] = PULL64(W[2]);
600 ROUND_00_15(2, g, h, a, b, c, d, e, f);
601 T1 = X[3] = PULL64(W[3]);
602 ROUND_00_15(3, f, g, h, a, b, c, d, e);
603 T1 = X[4] = PULL64(W[4]);
604 ROUND_00_15(4, e, f, g, h, a, b, c, d);
605 T1 = X[5] = PULL64(W[5]);
606 ROUND_00_15(5, d, e, f, g, h, a, b, c);
607 T1 = X[6] = PULL64(W[6]);
608 ROUND_00_15(6, c, d, e, f, g, h, a, b);
609 T1 = X[7] = PULL64(W[7]);
610 ROUND_00_15(7, b, c, d, e, f, g, h, a);
611 T1 = X[8] = PULL64(W[8]);
612 ROUND_00_15(8, a, b, c, d, e, f, g, h);
613 T1 = X[9] = PULL64(W[9]);
614 ROUND_00_15(9, h, a, b, c, d, e, f, g);
615 T1 = X[10] = PULL64(W[10]);
616 ROUND_00_15(10, g, h, a, b, c, d, e, f);
617 T1 = X[11] = PULL64(W[11]);
618 ROUND_00_15(11, f, g, h, a, b, c, d, e);
619 T1 = X[12] = PULL64(W[12]);
620 ROUND_00_15(12, e, f, g, h, a, b, c, d);
621 T1 = X[13] = PULL64(W[13]);
622 ROUND_00_15(13, d, e, f, g, h, a, b, c);
623 T1 = X[14] = PULL64(W[14]);
624 ROUND_00_15(14, c, d, e, f, g, h, a, b);
625 T1 = X[15] = PULL64(W[15]);
626 ROUND_00_15(15, b, c, d, e, f, g, h, a);
627 # endif
628
629 for (i = 16; i < 80; i += 16) {
630 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
631 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
632 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
633 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
634 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
635 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
636 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
637 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
638 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
639 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
640 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
641 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
642 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
643 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
644 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
645 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
646 }
647
648 ctx->h[0] += a;
649 ctx->h[1] += b;
650 ctx->h[2] += c;
651 ctx->h[3] += d;
652 ctx->h[4] += e;
653 ctx->h[5] += f;
654 ctx->h[6] += g;
655 ctx->h[7] += h;
656
657 W += SHA_LBLOCK;
658 }
659 }
660
661 # endif
662
663 # endif /* SHA512_ASM */
664
665 #else /* !OPENSSL_NO_SHA512 */
666
667 # if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
668 static void *dummy = &dummy;
669 # endif
670
671 #endif /* !OPENSSL_NO_SHA512 */
672