1 /* crypto/sha/sha512.c */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 #include <openssl/opensslconf.h>
8 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
9 /*-
10  * IMPLEMENTATION NOTES.
11  *
12  * As you might have noticed 32-bit hash algorithms:
13  *
14  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
15  * - optimized versions implement two transform functions: one operating
16  *   on [aligned] data in host byte order and one - on data in input
17  *   stream byte order;
18  * - share common byte-order neutral collector and padding function
19  *   implementations, ../md32_common.h;
20  *
21  * Neither of the above applies to this SHA-512 implementations. Reasons
22  * [in reverse order] are:
23  *
24  * - it's the only 64-bit hash algorithm for the moment of this writing,
25  *   there is no need for common collector/padding implementation [yet];
26  * - by supporting only one transform function [which operates on
27  *   *aligned* data in input stream byte order, big-endian in this case]
28  *   we minimize burden of maintenance in two ways: a) collector/padding
29  *   function is simpler; b) only one transform function to stare at;
30  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
31  *   apply a number of optimizations to mitigate potential performance
32  *   penalties caused by previous design decision;
33  *
34  * Caveat lector.
35  *
36  * Implementation relies on the fact that "long long" is 64-bit on
37  * both 32- and 64-bit platforms. If some compiler vendor comes up
38  * with 128-bit long long, adjustment to sha.h would be required.
39  * As this implementation relies on 64-bit integer type, it's totally
40  * inappropriate for platforms which don't support it, most notably
41  * 16-bit platforms.
42  *                                      <appro@fy.chalmers.se>
43  */
44 # include <stdlib.h>
45 # include <string.h>
46 
47 # include <openssl/crypto.h>
48 # include <openssl/sha.h>
49 # include <openssl/opensslv.h>
50 
51 # include "cryptlib.h"
52 
53 const char SHA512_version[] = "SHA-512" OPENSSL_VERSION_PTEXT;
54 
55 # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
56     defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
57     defined(__s390__) || defined(__s390x__) || \
58     defined(SHA512_ASM)
59 #  define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
60 # endif
61 
fips_md_init_ctx(SHA384,SHA512)62 fips_md_init_ctx(SHA384, SHA512)
63 {
64     c->h[0] = U64(0xcbbb9d5dc1059ed8);
65     c->h[1] = U64(0x629a292a367cd507);
66     c->h[2] = U64(0x9159015a3070dd17);
67     c->h[3] = U64(0x152fecd8f70e5939);
68     c->h[4] = U64(0x67332667ffc00b31);
69     c->h[5] = U64(0x8eb44a8768581511);
70     c->h[6] = U64(0xdb0c2e0d64f98fa7);
71     c->h[7] = U64(0x47b5481dbefa4fa4);
72 
73     c->Nl = 0;
74     c->Nh = 0;
75     c->num = 0;
76     c->md_len = SHA384_DIGEST_LENGTH;
77     return 1;
78 }
79 
fips_md_init(SHA512)80 fips_md_init(SHA512)
81 {
82     c->h[0] = U64(0x6a09e667f3bcc908);
83     c->h[1] = U64(0xbb67ae8584caa73b);
84     c->h[2] = U64(0x3c6ef372fe94f82b);
85     c->h[3] = U64(0xa54ff53a5f1d36f1);
86     c->h[4] = U64(0x510e527fade682d1);
87     c->h[5] = U64(0x9b05688c2b3e6c1f);
88     c->h[6] = U64(0x1f83d9abfb41bd6b);
89     c->h[7] = U64(0x5be0cd19137e2179);
90 
91     c->Nl = 0;
92     c->Nh = 0;
93     c->num = 0;
94     c->md_len = SHA512_DIGEST_LENGTH;
95     return 1;
96 }
97 
98 # ifndef SHA512_ASM
99 static
100 # endif
101 void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
102 
SHA512_Final(unsigned char * md,SHA512_CTX * c)103 int SHA512_Final(unsigned char *md, SHA512_CTX *c)
104 {
105     unsigned char *p = (unsigned char *)c->u.p;
106     size_t n = c->num;
107 
108     p[n] = 0x80;                /* There always is a room for one */
109     n++;
110     if (n > (sizeof(c->u) - 16))
111         memset(p + n, 0, sizeof(c->u) - n), n = 0,
112             sha512_block_data_order(c, p, 1);
113 
114     memset(p + n, 0, sizeof(c->u) - 16 - n);
115 # ifdef  B_ENDIAN
116     c->u.d[SHA_LBLOCK - 2] = c->Nh;
117     c->u.d[SHA_LBLOCK - 1] = c->Nl;
118 # else
119     p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
120     p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
121     p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
122     p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
123     p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
124     p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
125     p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
126     p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
127     p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
128     p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
129     p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
130     p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
131     p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
132     p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
133     p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
134     p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
135 # endif
136 
137     sha512_block_data_order(c, p, 1);
138 
139     if (md == 0)
140         return 0;
141 
142     switch (c->md_len) {
143         /* Let compiler decide if it's appropriate to unroll... */
144     case SHA384_DIGEST_LENGTH:
145         for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
146             SHA_LONG64 t = c->h[n];
147 
148             *(md++) = (unsigned char)(t >> 56);
149             *(md++) = (unsigned char)(t >> 48);
150             *(md++) = (unsigned char)(t >> 40);
151             *(md++) = (unsigned char)(t >> 32);
152             *(md++) = (unsigned char)(t >> 24);
153             *(md++) = (unsigned char)(t >> 16);
154             *(md++) = (unsigned char)(t >> 8);
155             *(md++) = (unsigned char)(t);
156         }
157         break;
158     case SHA512_DIGEST_LENGTH:
159         for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
160             SHA_LONG64 t = c->h[n];
161 
162             *(md++) = (unsigned char)(t >> 56);
163             *(md++) = (unsigned char)(t >> 48);
164             *(md++) = (unsigned char)(t >> 40);
165             *(md++) = (unsigned char)(t >> 32);
166             *(md++) = (unsigned char)(t >> 24);
167             *(md++) = (unsigned char)(t >> 16);
168             *(md++) = (unsigned char)(t >> 8);
169             *(md++) = (unsigned char)(t);
170         }
171         break;
172         /* ... as well as make sure md_len is not abused. */
173     default:
174         return 0;
175     }
176 
177     return 1;
178 }
179 
SHA384_Final(unsigned char * md,SHA512_CTX * c)180 int SHA384_Final(unsigned char *md, SHA512_CTX *c)
181 {
182     return SHA512_Final(md, c);
183 }
184 
SHA512_Update(SHA512_CTX * c,const void * _data,size_t len)185 int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
186 {
187     SHA_LONG64 l;
188     unsigned char *p = c->u.p;
189     const unsigned char *data = (const unsigned char *)_data;
190 
191     if (len == 0)
192         return 1;
193 
194     l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
195     if (l < c->Nl)
196         c->Nh++;
197     if (sizeof(len) >= 8)
198         c->Nh += (((SHA_LONG64) len) >> 61);
199     c->Nl = l;
200 
201     if (c->num != 0) {
202         size_t n = sizeof(c->u) - c->num;
203 
204         if (len < n) {
205             memcpy(p + c->num, data, len), c->num += (unsigned int)len;
206             return 1;
207         } else {
208             memcpy(p + c->num, data, n), c->num = 0;
209             len -= n, data += n;
210             sha512_block_data_order(c, p, 1);
211         }
212     }
213 
214     if (len >= sizeof(c->u)) {
215 # ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
216         if ((size_t)data % sizeof(c->u.d[0]) != 0)
217             while (len >= sizeof(c->u))
218                 memcpy(p, data, sizeof(c->u)),
219                     sha512_block_data_order(c, p, 1),
220                     len -= sizeof(c->u), data += sizeof(c->u);
221         else
222 # endif
223             sha512_block_data_order(c, data, len / sizeof(c->u)),
224                 data += len, len %= sizeof(c->u), data -= len;
225     }
226 
227     if (len != 0)
228         memcpy(p, data, len), c->num = (int)len;
229 
230     return 1;
231 }
232 
SHA384_Update(SHA512_CTX * c,const void * data,size_t len)233 int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
234 {
235     return SHA512_Update(c, data, len);
236 }
237 
SHA512_Transform(SHA512_CTX * c,const unsigned char * data)238 void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
239 {
240 # ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
241     if ((size_t)data % sizeof(c->u.d[0]) != 0)
242         memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
243 # endif
244     sha512_block_data_order(c, data, 1);
245 }
246 
SHA384(const unsigned char * d,size_t n,unsigned char * md)247 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
248 {
249     SHA512_CTX c;
250     static unsigned char m[SHA384_DIGEST_LENGTH];
251 
252     if (md == NULL)
253         md = m;
254     SHA384_Init(&c);
255     SHA512_Update(&c, d, n);
256     SHA512_Final(md, &c);
257     OPENSSL_cleanse(&c, sizeof(c));
258     return (md);
259 }
260 
SHA512(const unsigned char * d,size_t n,unsigned char * md)261 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
262 {
263     SHA512_CTX c;
264     static unsigned char m[SHA512_DIGEST_LENGTH];
265 
266     if (md == NULL)
267         md = m;
268     SHA512_Init(&c);
269     SHA512_Update(&c, d, n);
270     SHA512_Final(md, &c);
271     OPENSSL_cleanse(&c, sizeof(c));
272     return (md);
273 }
274 
275 # ifndef SHA512_ASM
276 static const SHA_LONG64 K512[80] = {
277     U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
278     U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
279     U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
280     U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
281     U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
282     U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
283     U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
284     U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
285     U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
286     U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
287     U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
288     U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
289     U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
290     U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
291     U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
292     U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
293     U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
294     U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
295     U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
296     U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
297     U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
298     U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
299     U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
300     U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
301     U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
302     U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
303     U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
304     U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
305     U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
306     U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
307     U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
308     U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
309     U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
310     U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
311     U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
312     U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
313     U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
314     U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
315     U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
316     U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
317 };
318 
319 #  ifndef PEDANTIC
320 #   if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
321 #    if defined(__x86_64) || defined(__x86_64__)
322 #     define ROTR(a,n)    ({ SHA_LONG64 ret;              \
323                                 asm ("rorq %1,%0"       \
324                                 : "=r"(ret)             \
325                                 : "J"(n),"0"(a)         \
326                                 : "cc"); ret;           })
327 #     if !defined(B_ENDIAN)
328 #      define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));  \
329                                 asm ("bswapq    %0"             \
330                                 : "=r"(ret)                     \
331                                 : "0"(ret)); ret;               })
332 #     endif
333 #    elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
334 #     if defined(I386_ONLY)
335 #      define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
336                          unsigned int hi=p[0],lo=p[1];          \
337                                 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
338                                     "roll $16,%%eax; roll $16,%%edx; "\
339                                     "xchgb %%ah,%%al;xchgb %%dh,%%dl;" \
340                                 : "=a"(lo),"=d"(hi)             \
341                                 : "0"(lo),"1"(hi) : "cc");      \
342                                 ((SHA_LONG64)hi)<<32|lo;        })
343 #     else
344 #      define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
345                          unsigned int hi=p[0],lo=p[1];          \
346                                 asm ("bswapl %0; bswapl %1;"    \
347                                 : "=r"(lo),"=r"(hi)             \
348                                 : "0"(lo),"1"(hi));             \
349                                 ((SHA_LONG64)hi)<<32|lo;        })
350 #     endif
351 #    elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
352 #     define ROTR(a,n)    ({ SHA_LONG64 ret;              \
353                                 asm ("rotrdi %0,%1,%2"  \
354                                 : "=r"(ret)             \
355                                 : "r"(a),"K"(n)); ret;  })
356 #    endif
357 #   elif defined(_MSC_VER)
358 #    if defined(_WIN64)         /* applies to both IA-64 and AMD64 */
359 #     pragma intrinsic(_rotr64)
360 #     define ROTR(a,n)    _rotr64((a),n)
361 #    endif
362 #    if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
363 #     if defined(I386_ONLY)
__pull64be(const void * x)364 static SHA_LONG64 __fastcall __pull64be(const void *x)
365 {
366     _asm mov edx,[ecx + 0]
367     _asm mov eax,[ecx + 4]
368 _asm xchg dh, dl
369         _asm xchg ah, al
370         _asm rol edx, 16 _asm rol eax, 16 _asm xchg dh, dl _asm xchg ah, al}
371 #     else
__pull64be(const void * x)372 static SHA_LONG64 __fastcall __pull64be(const void *x)
373 {
374     _asm mov edx,[ecx + 0]
375     _asm mov eax,[ecx + 4]
376 _asm bswap edx _asm bswap eax}
377 #     endif
378 #     define PULL64(x) __pull64be(&(x))
379 #     if _MSC_VER<=1200
380 #      pragma inline_depth(0)
381 #     endif
382 #    endif
383 #   endif
384 #  endif
385 #  ifndef PULL64
386 #   define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
387 #   define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
388 #  endif
389 #  ifndef ROTR
390 #   define ROTR(x,s)       (((x)>>s) | (x)<<(64-s))
391 #  endif
392 #  define Sigma0(x)       (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
393 #  define Sigma1(x)       (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
394 #  define sigma0(x)       (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
395 #  define sigma1(x)       (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
396 #  define Ch(x,y,z)       (((x) & (y)) ^ ((~(x)) & (z)))
397 #  define Maj(x,y,z)      (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
398 #  if defined(__i386) || defined(__i386__) || defined(_M_IX86)
399 /*
400  * This code should give better results on 32-bit CPU with less than
401  * ~24 registers, both size and performance wise...
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)402  */ static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
403                                         size_t num)
404 {
405     const SHA_LONG64 *W = in;
406     SHA_LONG64 A, E, T;
407     SHA_LONG64 X[9 + 80], *F;
408     int i;
409 
410     while (num--) {
411 
412         F = X + 80;
413         A = ctx->h[0];
414         F[1] = ctx->h[1];
415         F[2] = ctx->h[2];
416         F[3] = ctx->h[3];
417         E = ctx->h[4];
418         F[5] = ctx->h[5];
419         F[6] = ctx->h[6];
420         F[7] = ctx->h[7];
421 
422         for (i = 0; i < 16; i++, F--) {
423 #   ifdef B_ENDIAN
424             T = W[i];
425 #   else
426             T = PULL64(W[i]);
427 #   endif
428             F[0] = A;
429             F[4] = E;
430             F[8] = T;
431             T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
432             E = F[3] + T;
433             A = T + Sigma0(A) + Maj(A, F[1], F[2]);
434         }
435 
436         for (; i < 80; i++, F--) {
437             T = sigma0(F[8 + 16 - 1]);
438             T += sigma1(F[8 + 16 - 14]);
439             T += F[8 + 16] + F[8 + 16 - 9];
440 
441             F[0] = A;
442             F[4] = E;
443             F[8] = T;
444             T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
445             E = F[3] + T;
446             A = T + Sigma0(A) + Maj(A, F[1], F[2]);
447         }
448 
449         ctx->h[0] += A;
450         ctx->h[1] += F[1];
451         ctx->h[2] += F[2];
452         ctx->h[3] += F[3];
453         ctx->h[4] += E;
454         ctx->h[5] += F[5];
455         ctx->h[6] += F[6];
456         ctx->h[7] += F[7];
457 
458         W += SHA_LBLOCK;
459     }
460 }
461 
462 #  elif defined(OPENSSL_SMALL_FOOTPRINT)
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)463 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
464                                     size_t num)
465 {
466     const SHA_LONG64 *W = in;
467     SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
468     SHA_LONG64 X[16];
469     int i;
470 
471     while (num--) {
472 
473         a = ctx->h[0];
474         b = ctx->h[1];
475         c = ctx->h[2];
476         d = ctx->h[3];
477         e = ctx->h[4];
478         f = ctx->h[5];
479         g = ctx->h[6];
480         h = ctx->h[7];
481 
482         for (i = 0; i < 16; i++) {
483 #   ifdef B_ENDIAN
484             T1 = X[i] = W[i];
485 #   else
486             T1 = X[i] = PULL64(W[i]);
487 #   endif
488             T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
489             T2 = Sigma0(a) + Maj(a, b, c);
490             h = g;
491             g = f;
492             f = e;
493             e = d + T1;
494             d = c;
495             c = b;
496             b = a;
497             a = T1 + T2;
498         }
499 
500         for (; i < 80; i++) {
501             s0 = X[(i + 1) & 0x0f];
502             s0 = sigma0(s0);
503             s1 = X[(i + 14) & 0x0f];
504             s1 = sigma1(s1);
505 
506             T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
507             T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
508             T2 = Sigma0(a) + Maj(a, b, c);
509             h = g;
510             g = f;
511             f = e;
512             e = d + T1;
513             d = c;
514             c = b;
515             b = a;
516             a = T1 + T2;
517         }
518 
519         ctx->h[0] += a;
520         ctx->h[1] += b;
521         ctx->h[2] += c;
522         ctx->h[3] += d;
523         ctx->h[4] += e;
524         ctx->h[5] += f;
525         ctx->h[6] += g;
526         ctx->h[7] += h;
527 
528         W += SHA_LBLOCK;
529     }
530 }
531 
532 #  else
533 #   define ROUND_00_15(i,a,b,c,d,e,f,g,h)          do {    \
534         T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];      \
535         h = Sigma0(a) + Maj(a,b,c);                     \
536         d += T1;        h += T1;                } while (0)
537 #   define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)      do {    \
538         s0 = X[(j+1)&0x0f];     s0 = sigma0(s0);        \
539         s1 = X[(j+14)&0x0f];    s1 = sigma1(s1);        \
540         T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];    \
541         ROUND_00_15(i+j,a,b,c,d,e,f,g,h);               } while (0)
sha512_block_data_order(SHA512_CTX * ctx,const void * in,size_t num)542 static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
543                                     size_t num)
544 {
545     const SHA_LONG64 *W = in;
546     SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
547     SHA_LONG64 X[16];
548     int i;
549 
550     while (num--) {
551 
552         a = ctx->h[0];
553         b = ctx->h[1];
554         c = ctx->h[2];
555         d = ctx->h[3];
556         e = ctx->h[4];
557         f = ctx->h[5];
558         g = ctx->h[6];
559         h = ctx->h[7];
560 
561 #   ifdef B_ENDIAN
562         T1 = X[0] = W[0];
563         ROUND_00_15(0, a, b, c, d, e, f, g, h);
564         T1 = X[1] = W[1];
565         ROUND_00_15(1, h, a, b, c, d, e, f, g);
566         T1 = X[2] = W[2];
567         ROUND_00_15(2, g, h, a, b, c, d, e, f);
568         T1 = X[3] = W[3];
569         ROUND_00_15(3, f, g, h, a, b, c, d, e);
570         T1 = X[4] = W[4];
571         ROUND_00_15(4, e, f, g, h, a, b, c, d);
572         T1 = X[5] = W[5];
573         ROUND_00_15(5, d, e, f, g, h, a, b, c);
574         T1 = X[6] = W[6];
575         ROUND_00_15(6, c, d, e, f, g, h, a, b);
576         T1 = X[7] = W[7];
577         ROUND_00_15(7, b, c, d, e, f, g, h, a);
578         T1 = X[8] = W[8];
579         ROUND_00_15(8, a, b, c, d, e, f, g, h);
580         T1 = X[9] = W[9];
581         ROUND_00_15(9, h, a, b, c, d, e, f, g);
582         T1 = X[10] = W[10];
583         ROUND_00_15(10, g, h, a, b, c, d, e, f);
584         T1 = X[11] = W[11];
585         ROUND_00_15(11, f, g, h, a, b, c, d, e);
586         T1 = X[12] = W[12];
587         ROUND_00_15(12, e, f, g, h, a, b, c, d);
588         T1 = X[13] = W[13];
589         ROUND_00_15(13, d, e, f, g, h, a, b, c);
590         T1 = X[14] = W[14];
591         ROUND_00_15(14, c, d, e, f, g, h, a, b);
592         T1 = X[15] = W[15];
593         ROUND_00_15(15, b, c, d, e, f, g, h, a);
594 #   else
595         T1 = X[0] = PULL64(W[0]);
596         ROUND_00_15(0, a, b, c, d, e, f, g, h);
597         T1 = X[1] = PULL64(W[1]);
598         ROUND_00_15(1, h, a, b, c, d, e, f, g);
599         T1 = X[2] = PULL64(W[2]);
600         ROUND_00_15(2, g, h, a, b, c, d, e, f);
601         T1 = X[3] = PULL64(W[3]);
602         ROUND_00_15(3, f, g, h, a, b, c, d, e);
603         T1 = X[4] = PULL64(W[4]);
604         ROUND_00_15(4, e, f, g, h, a, b, c, d);
605         T1 = X[5] = PULL64(W[5]);
606         ROUND_00_15(5, d, e, f, g, h, a, b, c);
607         T1 = X[6] = PULL64(W[6]);
608         ROUND_00_15(6, c, d, e, f, g, h, a, b);
609         T1 = X[7] = PULL64(W[7]);
610         ROUND_00_15(7, b, c, d, e, f, g, h, a);
611         T1 = X[8] = PULL64(W[8]);
612         ROUND_00_15(8, a, b, c, d, e, f, g, h);
613         T1 = X[9] = PULL64(W[9]);
614         ROUND_00_15(9, h, a, b, c, d, e, f, g);
615         T1 = X[10] = PULL64(W[10]);
616         ROUND_00_15(10, g, h, a, b, c, d, e, f);
617         T1 = X[11] = PULL64(W[11]);
618         ROUND_00_15(11, f, g, h, a, b, c, d, e);
619         T1 = X[12] = PULL64(W[12]);
620         ROUND_00_15(12, e, f, g, h, a, b, c, d);
621         T1 = X[13] = PULL64(W[13]);
622         ROUND_00_15(13, d, e, f, g, h, a, b, c);
623         T1 = X[14] = PULL64(W[14]);
624         ROUND_00_15(14, c, d, e, f, g, h, a, b);
625         T1 = X[15] = PULL64(W[15]);
626         ROUND_00_15(15, b, c, d, e, f, g, h, a);
627 #   endif
628 
629         for (i = 16; i < 80; i += 16) {
630             ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
631             ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
632             ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
633             ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
634             ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
635             ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
636             ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
637             ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
638             ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
639             ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
640             ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
641             ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
642             ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
643             ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
644             ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
645             ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
646         }
647 
648         ctx->h[0] += a;
649         ctx->h[1] += b;
650         ctx->h[2] += c;
651         ctx->h[3] += d;
652         ctx->h[4] += e;
653         ctx->h[5] += f;
654         ctx->h[6] += g;
655         ctx->h[7] += h;
656 
657         W += SHA_LBLOCK;
658     }
659 }
660 
661 #  endif
662 
663 # endif                         /* SHA512_ASM */
664 
665 #else                           /* !OPENSSL_NO_SHA512 */
666 
667 # if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX)
668 static void *dummy = &dummy;
669 # endif
670 
671 #endif                          /* !OPENSSL_NO_SHA512 */
672