xref: /dragonfly/crypto/libressl/crypto/sha/sha512.c (revision 6f5ec8b5)
1 /* $OpenBSD: sha512.c,v 1.16 2021/11/09 18:40:21 bcook Exp $ */
2 /* ====================================================================
3  * Copyright (c) 2004 The OpenSSL Project.  All rights reserved
4  * according to the OpenSSL license [found in ../../LICENSE].
5  * ====================================================================
6  */
7 
8 #include <endian.h>
9 #include <stdlib.h>
10 #include <string.h>
11 
12 #include <openssl/opensslconf.h>
13 
14 #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA512)
15 /*
16  * IMPLEMENTATION NOTES.
17  *
18  * As you might have noticed 32-bit hash algorithms:
19  *
20  * - permit SHA_LONG to be wider than 32-bit (case on CRAY);
21  * - optimized versions implement two transform functions: one operating
22  *   on [aligned] data in host byte order and one - on data in input
23  *   stream byte order;
24  * - share common byte-order neutral collector and padding function
25  *   implementations, ../md32_common.h;
26  *
27  * Neither of the above applies to this SHA-512 implementations. Reasons
28  * [in reverse order] are:
29  *
30  * - it's the only 64-bit hash algorithm for the moment of this writing,
31  *   there is no need for common collector/padding implementation [yet];
32  * - by supporting only one transform function [which operates on
33  *   *aligned* data in input stream byte order, big-endian in this case]
34  *   we minimize burden of maintenance in two ways: a) collector/padding
35  *   function is simpler; b) only one transform function to stare at;
36  * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
37  *   apply a number of optimizations to mitigate potential performance
38  *   penalties caused by previous design decision;
39  *
40  * Caveat lector.
41  *
42  * Implementation relies on the fact that "long long" is 64-bit on
43  * both 32- and 64-bit platforms. If some compiler vendor comes up
44  * with 128-bit long long, adjustment to sha.h would be required.
45  * As this implementation relies on 64-bit integer type, it's totally
46  * inappropriate for platforms which don't support it, most notably
47  * 16-bit platforms.
48  *					<appro@fy.chalmers.se>
49  */
50 
51 #include <openssl/crypto.h>
52 #include <openssl/opensslv.h>
53 #include <openssl/sha.h>
54 
55 #if !defined(__STRICT_ALIGNMENT) || defined(SHA512_ASM)
56 #define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
57 #endif
58 
59 int SHA384_Init(SHA512_CTX *c)
60 	{
61 	c->h[0]=U64(0xcbbb9d5dc1059ed8);
62 	c->h[1]=U64(0x629a292a367cd507);
63 	c->h[2]=U64(0x9159015a3070dd17);
64 	c->h[3]=U64(0x152fecd8f70e5939);
65 	c->h[4]=U64(0x67332667ffc00b31);
66 	c->h[5]=U64(0x8eb44a8768581511);
67 	c->h[6]=U64(0xdb0c2e0d64f98fa7);
68 	c->h[7]=U64(0x47b5481dbefa4fa4);
69 
70         c->Nl=0;        c->Nh=0;
71         c->num=0;       c->md_len=SHA384_DIGEST_LENGTH;
72         return 1;
73 	}
74 
75 int SHA512_Init(SHA512_CTX *c)
76 	{
77 	c->h[0]=U64(0x6a09e667f3bcc908);
78 	c->h[1]=U64(0xbb67ae8584caa73b);
79 	c->h[2]=U64(0x3c6ef372fe94f82b);
80 	c->h[3]=U64(0xa54ff53a5f1d36f1);
81 	c->h[4]=U64(0x510e527fade682d1);
82 	c->h[5]=U64(0x9b05688c2b3e6c1f);
83 	c->h[6]=U64(0x1f83d9abfb41bd6b);
84 	c->h[7]=U64(0x5be0cd19137e2179);
85 
86         c->Nl=0;        c->Nh=0;
87         c->num=0;       c->md_len=SHA512_DIGEST_LENGTH;
88         return 1;
89 	}
90 
91 #ifndef SHA512_ASM
92 static
93 #endif
94 void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num);
95 
96 int SHA512_Final (unsigned char *md, SHA512_CTX *c)
97 	{
98 	unsigned char *p=(unsigned char *)c->u.p;
99 	size_t n=c->num;
100 
101 	p[n]=0x80;	/* There always is a room for one */
102 	n++;
103 	if (n > (sizeof(c->u)-16))
104 		memset (p+n,0,sizeof(c->u)-n), n=0,
105 		sha512_block_data_order (c,p,1);
106 
107 	memset (p+n,0,sizeof(c->u)-16-n);
108 #if BYTE_ORDER == BIG_ENDIAN
109 	c->u.d[SHA_LBLOCK-2] = c->Nh;
110 	c->u.d[SHA_LBLOCK-1] = c->Nl;
111 #else
112 	p[sizeof(c->u)-1]  = (unsigned char)(c->Nl);
113 	p[sizeof(c->u)-2]  = (unsigned char)(c->Nl>>8);
114 	p[sizeof(c->u)-3]  = (unsigned char)(c->Nl>>16);
115 	p[sizeof(c->u)-4]  = (unsigned char)(c->Nl>>24);
116 	p[sizeof(c->u)-5]  = (unsigned char)(c->Nl>>32);
117 	p[sizeof(c->u)-6]  = (unsigned char)(c->Nl>>40);
118 	p[sizeof(c->u)-7]  = (unsigned char)(c->Nl>>48);
119 	p[sizeof(c->u)-8]  = (unsigned char)(c->Nl>>56);
120 	p[sizeof(c->u)-9]  = (unsigned char)(c->Nh);
121 	p[sizeof(c->u)-10] = (unsigned char)(c->Nh>>8);
122 	p[sizeof(c->u)-11] = (unsigned char)(c->Nh>>16);
123 	p[sizeof(c->u)-12] = (unsigned char)(c->Nh>>24);
124 	p[sizeof(c->u)-13] = (unsigned char)(c->Nh>>32);
125 	p[sizeof(c->u)-14] = (unsigned char)(c->Nh>>40);
126 	p[sizeof(c->u)-15] = (unsigned char)(c->Nh>>48);
127 	p[sizeof(c->u)-16] = (unsigned char)(c->Nh>>56);
128 #endif
129 
130 	sha512_block_data_order (c,p,1);
131 
132 	if (md==0) return 0;
133 
134 	switch (c->md_len)
135 		{
136 		/* Let compiler decide if it's appropriate to unroll... */
137 		case SHA384_DIGEST_LENGTH:
138 			for (n=0;n<SHA384_DIGEST_LENGTH/8;n++)
139 				{
140 				SHA_LONG64 t = c->h[n];
141 
142 				*(md++)	= (unsigned char)(t>>56);
143 				*(md++)	= (unsigned char)(t>>48);
144 				*(md++)	= (unsigned char)(t>>40);
145 				*(md++)	= (unsigned char)(t>>32);
146 				*(md++)	= (unsigned char)(t>>24);
147 				*(md++)	= (unsigned char)(t>>16);
148 				*(md++)	= (unsigned char)(t>>8);
149 				*(md++)	= (unsigned char)(t);
150 				}
151 			break;
152 		case SHA512_DIGEST_LENGTH:
153 			for (n=0;n<SHA512_DIGEST_LENGTH/8;n++)
154 				{
155 				SHA_LONG64 t = c->h[n];
156 
157 				*(md++)	= (unsigned char)(t>>56);
158 				*(md++)	= (unsigned char)(t>>48);
159 				*(md++)	= (unsigned char)(t>>40);
160 				*(md++)	= (unsigned char)(t>>32);
161 				*(md++)	= (unsigned char)(t>>24);
162 				*(md++)	= (unsigned char)(t>>16);
163 				*(md++)	= (unsigned char)(t>>8);
164 				*(md++)	= (unsigned char)(t);
165 				}
166 			break;
167 		/* ... as well as make sure md_len is not abused. */
168 		default:	return 0;
169 		}
170 
171 	return 1;
172 	}
173 
174 int SHA384_Final (unsigned char *md,SHA512_CTX *c)
175 {   return SHA512_Final (md,c);   }
176 
177 int SHA512_Update (SHA512_CTX *c, const void *_data, size_t len)
178 	{
179 	SHA_LONG64	l;
180 	unsigned char  *p=c->u.p;
181 	const unsigned char *data=(const unsigned char *)_data;
182 
183 	if (len==0) return  1;
184 
185 	l = (c->Nl+(((SHA_LONG64)len)<<3))&U64(0xffffffffffffffff);
186 	if (l < c->Nl)		c->Nh++;
187 	if (sizeof(len)>=8)	c->Nh+=(((SHA_LONG64)len)>>61);
188 	c->Nl=l;
189 
190 	if (c->num != 0)
191 		{
192 		size_t n = sizeof(c->u) - c->num;
193 
194 		if (len < n)
195 			{
196 			memcpy (p+c->num,data,len), c->num += (unsigned int)len;
197 			return 1;
198 			}
199 		else	{
200 			memcpy (p+c->num,data,n), c->num = 0;
201 			len-=n, data+=n;
202 			sha512_block_data_order (c,p,1);
203 			}
204 		}
205 
206 	if (len >= sizeof(c->u))
207 		{
208 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
209 		if ((size_t)data%sizeof(c->u.d[0]) != 0)
210 			while (len >= sizeof(c->u))
211 				memcpy (p,data,sizeof(c->u)),
212 				sha512_block_data_order (c,p,1),
213 				len  -= sizeof(c->u),
214 				data += sizeof(c->u);
215 		else
216 #endif
217 			sha512_block_data_order (c,data,len/sizeof(c->u)),
218 			data += len,
219 			len  %= sizeof(c->u),
220 			data -= len;
221 		}
222 
223 	if (len != 0)	memcpy (p,data,len), c->num = (int)len;
224 
225 	return 1;
226 	}
227 
228 int SHA384_Update (SHA512_CTX *c, const void *data, size_t len)
229 {   return SHA512_Update (c,data,len);   }
230 
231 void SHA512_Transform (SHA512_CTX *c, const unsigned char *data)
232 	{
233 #ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
234 	if ((size_t)data%sizeof(c->u.d[0]) != 0)
235 		memcpy(c->u.p,data,sizeof(c->u.p)),
236 		data = c->u.p;
237 #endif
238 	sha512_block_data_order (c,data,1);
239 	}
240 
241 unsigned char *SHA384(const unsigned char *d, size_t n, unsigned char *md)
242 	{
243 	SHA512_CTX c;
244 	static unsigned char m[SHA384_DIGEST_LENGTH];
245 
246 	if (md == NULL) md=m;
247 	SHA384_Init(&c);
248 	SHA512_Update(&c,d,n);
249 	SHA512_Final(md,&c);
250 	explicit_bzero(&c,sizeof(c));
251 	return(md);
252 	}
253 
254 unsigned char *SHA512(const unsigned char *d, size_t n, unsigned char *md)
255 	{
256 	SHA512_CTX c;
257 	static unsigned char m[SHA512_DIGEST_LENGTH];
258 
259 	if (md == NULL) md=m;
260 	SHA512_Init(&c);
261 	SHA512_Update(&c,d,n);
262 	SHA512_Final(md,&c);
263 	explicit_bzero(&c,sizeof(c));
264 	return(md);
265 	}
266 
267 #ifndef SHA512_ASM
268 static const SHA_LONG64 K512[80] = {
269         U64(0x428a2f98d728ae22),U64(0x7137449123ef65cd),
270         U64(0xb5c0fbcfec4d3b2f),U64(0xe9b5dba58189dbbc),
271         U64(0x3956c25bf348b538),U64(0x59f111f1b605d019),
272         U64(0x923f82a4af194f9b),U64(0xab1c5ed5da6d8118),
273         U64(0xd807aa98a3030242),U64(0x12835b0145706fbe),
274         U64(0x243185be4ee4b28c),U64(0x550c7dc3d5ffb4e2),
275         U64(0x72be5d74f27b896f),U64(0x80deb1fe3b1696b1),
276         U64(0x9bdc06a725c71235),U64(0xc19bf174cf692694),
277         U64(0xe49b69c19ef14ad2),U64(0xefbe4786384f25e3),
278         U64(0x0fc19dc68b8cd5b5),U64(0x240ca1cc77ac9c65),
279         U64(0x2de92c6f592b0275),U64(0x4a7484aa6ea6e483),
280         U64(0x5cb0a9dcbd41fbd4),U64(0x76f988da831153b5),
281         U64(0x983e5152ee66dfab),U64(0xa831c66d2db43210),
282         U64(0xb00327c898fb213f),U64(0xbf597fc7beef0ee4),
283         U64(0xc6e00bf33da88fc2),U64(0xd5a79147930aa725),
284         U64(0x06ca6351e003826f),U64(0x142929670a0e6e70),
285         U64(0x27b70a8546d22ffc),U64(0x2e1b21385c26c926),
286         U64(0x4d2c6dfc5ac42aed),U64(0x53380d139d95b3df),
287         U64(0x650a73548baf63de),U64(0x766a0abb3c77b2a8),
288         U64(0x81c2c92e47edaee6),U64(0x92722c851482353b),
289         U64(0xa2bfe8a14cf10364),U64(0xa81a664bbc423001),
290         U64(0xc24b8b70d0f89791),U64(0xc76c51a30654be30),
291         U64(0xd192e819d6ef5218),U64(0xd69906245565a910),
292         U64(0xf40e35855771202a),U64(0x106aa07032bbd1b8),
293         U64(0x19a4c116b8d2d0c8),U64(0x1e376c085141ab53),
294         U64(0x2748774cdf8eeb99),U64(0x34b0bcb5e19b48a8),
295         U64(0x391c0cb3c5c95a63),U64(0x4ed8aa4ae3418acb),
296         U64(0x5b9cca4f7763e373),U64(0x682e6ff3d6b2b8a3),
297         U64(0x748f82ee5defb2fc),U64(0x78a5636f43172f60),
298         U64(0x84c87814a1f0ab72),U64(0x8cc702081a6439ec),
299         U64(0x90befffa23631e28),U64(0xa4506cebde82bde9),
300         U64(0xbef9a3f7b2c67915),U64(0xc67178f2e372532b),
301         U64(0xca273eceea26619c),U64(0xd186b8c721c0c207),
302         U64(0xeada7dd6cde0eb1e),U64(0xf57d4f7fee6ed178),
303         U64(0x06f067aa72176fba),U64(0x0a637dc5a2c898a6),
304         U64(0x113f9804bef90dae),U64(0x1b710b35131c471b),
305         U64(0x28db77f523047d84),U64(0x32caab7b40c72493),
306         U64(0x3c9ebe0a15c9bebc),U64(0x431d67c49c100d4c),
307         U64(0x4cc5d4becb3e42b6),U64(0x597f299cfc657e2a),
308         U64(0x5fcb6fab3ad6faec),U64(0x6c44198c4a475817) };
309 
310 #if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
311 # if defined(__x86_64) || defined(__x86_64__)
312 #  define ROTR(a,n)	({ SHA_LONG64 ret;		\
313 				asm ("rorq %1,%0"	\
314 				: "=r"(ret)		\
315 				: "J"(n),"0"(a)		\
316 				: "cc"); ret;		})
317 #   define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x)));	\
318 				asm ("bswapq	%0"		\
319 				: "=r"(ret)			\
320 				: "0"(ret)); ret;		})
321 # elif (defined(__i386) || defined(__i386__))
322 #   define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
323 			 unsigned int hi=p[0],lo=p[1];		\
324 				asm ("bswapl %0; bswapl %1;"	\
325 				: "=r"(lo),"=r"(hi)		\
326 				: "0"(lo),"1"(hi));		\
327 				((SHA_LONG64)hi)<<32|lo;	})
328 # elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
329 #  define ROTR(a,n)	({ SHA_LONG64 ret;		\
330 				asm ("rotrdi %0,%1,%2"	\
331 				: "=r"(ret)		\
332 				: "r"(a),"K"(n)); ret;	})
333 # endif
334 #endif
335 
336 #ifndef PULL64
337 #define B(x,j)    (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
338 #define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
339 #endif
340 
341 #ifndef ROTR
342 #define ROTR(x,s)	(((x)>>s) | (x)<<(64-s))
343 #endif
344 
345 #define Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
346 #define Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
347 #define sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
348 #define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
349 
350 #define Ch(x,y,z)	(((x) & (y)) ^ ((~(x)) & (z)))
351 #define Maj(x,y,z)	(((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
352 
353 
354 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
355 /*
356  * This code should give better results on 32-bit CPU with less than
357  * ~24 registers, both size and performance wise...
358  */
359 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
360 	{
361 	const SHA_LONG64 *W=in;
362 	SHA_LONG64	A,E,T;
363 	SHA_LONG64	X[9+80],*F;
364 	int i;
365 
366 			while (num--) {
367 
368 	F    = X+80;
369 	A    = ctx->h[0];	F[1] = ctx->h[1];
370 	F[2] = ctx->h[2];	F[3] = ctx->h[3];
371 	E    = ctx->h[4];	F[5] = ctx->h[5];
372 	F[6] = ctx->h[6];	F[7] = ctx->h[7];
373 
374 	for (i=0;i<16;i++,F--)
375 		{
376 		T = PULL64(W[i]);
377 		F[0] = A;
378 		F[4] = E;
379 		F[8] = T;
380 		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
381 		E    = F[3] + T;
382 		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
383 		}
384 
385 	for (;i<80;i++,F--)
386 		{
387 		T    = sigma0(F[8+16-1]);
388 		T   += sigma1(F[8+16-14]);
389 		T   += F[8+16] + F[8+16-9];
390 
391 		F[0] = A;
392 		F[4] = E;
393 		F[8] = T;
394 		T   += F[7] + Sigma1(E) + Ch(E,F[5],F[6]) + K512[i];
395 		E    = F[3] + T;
396 		A    = T + Sigma0(A) + Maj(A,F[1],F[2]);
397 		}
398 
399 	ctx->h[0] += A;		ctx->h[1] += F[1];
400 	ctx->h[2] += F[2];	ctx->h[3] += F[3];
401 	ctx->h[4] += E;		ctx->h[5] += F[5];
402 	ctx->h[6] += F[6];	ctx->h[7] += F[7];
403 
404 			W+=SHA_LBLOCK;
405 			}
406 	}
407 
408 #elif defined(OPENSSL_SMALL_FOOTPRINT)
409 
410 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
411 	{
412 	const SHA_LONG64 *W=in;
413 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1,T2;
414 	SHA_LONG64	X[16];
415 	int i;
416 
417 			while (num--) {
418 
419 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
420 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
421 
422 	for (i=0;i<16;i++)
423 		{
424 #if BYTE_ORDER == BIG_ENDIAN
425 		T1 = X[i] = W[i];
426 #else
427 		T1 = X[i] = PULL64(W[i]);
428 #endif
429 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
430 		T2 = Sigma0(a) + Maj(a,b,c);
431 		h = g;	g = f;	f = e;	e = d + T1;
432 		d = c;	c = b;	b = a;	a = T1 + T2;
433 		}
434 
435 	for (;i<80;i++)
436 		{
437 		s0 = X[(i+1)&0x0f];	s0 = sigma0(s0);
438 		s1 = X[(i+14)&0x0f];	s1 = sigma1(s1);
439 
440 		T1 = X[i&0xf] += s0 + s1 + X[(i+9)&0xf];
441 		T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];
442 		T2 = Sigma0(a) + Maj(a,b,c);
443 		h = g;	g = f;	f = e;	e = d + T1;
444 		d = c;	c = b;	b = a;	a = T1 + T2;
445 		}
446 
447 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
448 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
449 
450 			W+=SHA_LBLOCK;
451 			}
452 	}
453 
454 #else
455 
456 #define	ROUND_00_15(i,a,b,c,d,e,f,g,h)		do {	\
457 	T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i];	\
458 	h = Sigma0(a) + Maj(a,b,c);			\
459 	d += T1;	h += T1;		} while (0)
460 
461 #define	ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X)	do {	\
462 	s0 = X[(j+1)&0x0f];	s0 = sigma0(s0);	\
463 	s1 = X[(j+14)&0x0f];	s1 = sigma1(s1);	\
464 	T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f];	\
465 	ROUND_00_15(i+j,a,b,c,d,e,f,g,h);		} while (0)
466 
467 static void sha512_block_data_order (SHA512_CTX *ctx, const void *in, size_t num)
468 	{
469 	const SHA_LONG64 *W=in;
470 	SHA_LONG64	a,b,c,d,e,f,g,h,s0,s1,T1;
471 	SHA_LONG64	X[16];
472 	int i;
473 
474 			while (num--) {
475 
476 	a = ctx->h[0];	b = ctx->h[1];	c = ctx->h[2];	d = ctx->h[3];
477 	e = ctx->h[4];	f = ctx->h[5];	g = ctx->h[6];	h = ctx->h[7];
478 
479 #if BYTE_ORDER == BIG_ENDIAN
480 	T1 = X[0] = W[0];	ROUND_00_15(0,a,b,c,d,e,f,g,h);
481 	T1 = X[1] = W[1];	ROUND_00_15(1,h,a,b,c,d,e,f,g);
482 	T1 = X[2] = W[2];	ROUND_00_15(2,g,h,a,b,c,d,e,f);
483 	T1 = X[3] = W[3];	ROUND_00_15(3,f,g,h,a,b,c,d,e);
484 	T1 = X[4] = W[4];	ROUND_00_15(4,e,f,g,h,a,b,c,d);
485 	T1 = X[5] = W[5];	ROUND_00_15(5,d,e,f,g,h,a,b,c);
486 	T1 = X[6] = W[6];	ROUND_00_15(6,c,d,e,f,g,h,a,b);
487 	T1 = X[7] = W[7];	ROUND_00_15(7,b,c,d,e,f,g,h,a);
488 	T1 = X[8] = W[8];	ROUND_00_15(8,a,b,c,d,e,f,g,h);
489 	T1 = X[9] = W[9];	ROUND_00_15(9,h,a,b,c,d,e,f,g);
490 	T1 = X[10] = W[10];	ROUND_00_15(10,g,h,a,b,c,d,e,f);
491 	T1 = X[11] = W[11];	ROUND_00_15(11,f,g,h,a,b,c,d,e);
492 	T1 = X[12] = W[12];	ROUND_00_15(12,e,f,g,h,a,b,c,d);
493 	T1 = X[13] = W[13];	ROUND_00_15(13,d,e,f,g,h,a,b,c);
494 	T1 = X[14] = W[14];	ROUND_00_15(14,c,d,e,f,g,h,a,b);
495 	T1 = X[15] = W[15];	ROUND_00_15(15,b,c,d,e,f,g,h,a);
496 #else
497 	T1 = X[0]  = PULL64(W[0]);	ROUND_00_15(0,a,b,c,d,e,f,g,h);
498 	T1 = X[1]  = PULL64(W[1]);	ROUND_00_15(1,h,a,b,c,d,e,f,g);
499 	T1 = X[2]  = PULL64(W[2]);	ROUND_00_15(2,g,h,a,b,c,d,e,f);
500 	T1 = X[3]  = PULL64(W[3]);	ROUND_00_15(3,f,g,h,a,b,c,d,e);
501 	T1 = X[4]  = PULL64(W[4]);	ROUND_00_15(4,e,f,g,h,a,b,c,d);
502 	T1 = X[5]  = PULL64(W[5]);	ROUND_00_15(5,d,e,f,g,h,a,b,c);
503 	T1 = X[6]  = PULL64(W[6]);	ROUND_00_15(6,c,d,e,f,g,h,a,b);
504 	T1 = X[7]  = PULL64(W[7]);	ROUND_00_15(7,b,c,d,e,f,g,h,a);
505 	T1 = X[8]  = PULL64(W[8]);	ROUND_00_15(8,a,b,c,d,e,f,g,h);
506 	T1 = X[9]  = PULL64(W[9]);	ROUND_00_15(9,h,a,b,c,d,e,f,g);
507 	T1 = X[10] = PULL64(W[10]);	ROUND_00_15(10,g,h,a,b,c,d,e,f);
508 	T1 = X[11] = PULL64(W[11]);	ROUND_00_15(11,f,g,h,a,b,c,d,e);
509 	T1 = X[12] = PULL64(W[12]);	ROUND_00_15(12,e,f,g,h,a,b,c,d);
510 	T1 = X[13] = PULL64(W[13]);	ROUND_00_15(13,d,e,f,g,h,a,b,c);
511 	T1 = X[14] = PULL64(W[14]);	ROUND_00_15(14,c,d,e,f,g,h,a,b);
512 	T1 = X[15] = PULL64(W[15]);	ROUND_00_15(15,b,c,d,e,f,g,h,a);
513 #endif
514 
515 	for (i=16;i<80;i+=16)
516 		{
517 		ROUND_16_80(i, 0,a,b,c,d,e,f,g,h,X);
518 		ROUND_16_80(i, 1,h,a,b,c,d,e,f,g,X);
519 		ROUND_16_80(i, 2,g,h,a,b,c,d,e,f,X);
520 		ROUND_16_80(i, 3,f,g,h,a,b,c,d,e,X);
521 		ROUND_16_80(i, 4,e,f,g,h,a,b,c,d,X);
522 		ROUND_16_80(i, 5,d,e,f,g,h,a,b,c,X);
523 		ROUND_16_80(i, 6,c,d,e,f,g,h,a,b,X);
524 		ROUND_16_80(i, 7,b,c,d,e,f,g,h,a,X);
525 		ROUND_16_80(i, 8,a,b,c,d,e,f,g,h,X);
526 		ROUND_16_80(i, 9,h,a,b,c,d,e,f,g,X);
527 		ROUND_16_80(i,10,g,h,a,b,c,d,e,f,X);
528 		ROUND_16_80(i,11,f,g,h,a,b,c,d,e,X);
529 		ROUND_16_80(i,12,e,f,g,h,a,b,c,d,X);
530 		ROUND_16_80(i,13,d,e,f,g,h,a,b,c,X);
531 		ROUND_16_80(i,14,c,d,e,f,g,h,a,b,X);
532 		ROUND_16_80(i,15,b,c,d,e,f,g,h,a,X);
533 		}
534 
535 	ctx->h[0] += a;	ctx->h[1] += b;	ctx->h[2] += c;	ctx->h[3] += d;
536 	ctx->h[4] += e;	ctx->h[5] += f;	ctx->h[6] += g;	ctx->h[7] += h;
537 
538 			W+=SHA_LBLOCK;
539 			}
540 	}
541 
542 #endif
543 
544 #endif /* SHA512_ASM */
545 
546 #endif /* !OPENSSL_NO_SHA512 */
547