xref: /freebsd/crypto/openssl/crypto/bn/bn_asm.c (revision 5c87c606)
174664626SKris Kennaway /* crypto/bn/bn_asm.c */
274664626SKris Kennaway /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
374664626SKris Kennaway  * All rights reserved.
474664626SKris Kennaway  *
574664626SKris Kennaway  * This package is an SSL implementation written
674664626SKris Kennaway  * by Eric Young (eay@cryptsoft.com).
774664626SKris Kennaway  * The implementation was written so as to conform with Netscapes SSL.
874664626SKris Kennaway  *
974664626SKris Kennaway  * This library is free for commercial and non-commercial use as long as
1074664626SKris Kennaway  * the following conditions are aheared to.  The following conditions
1174664626SKris Kennaway  * apply to all code found in this distribution, be it the RC4, RSA,
1274664626SKris Kennaway  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1374664626SKris Kennaway  * included with this distribution is covered by the same copyright terms
1474664626SKris Kennaway  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
1574664626SKris Kennaway  *
1674664626SKris Kennaway  * Copyright remains Eric Young's, and as such any Copyright notices in
1774664626SKris Kennaway  * the code are not to be removed.
1874664626SKris Kennaway  * If this package is used in a product, Eric Young should be given attribution
1974664626SKris Kennaway  * as the author of the parts of the library used.
2074664626SKris Kennaway  * This can be in the form of a textual message at program startup or
2174664626SKris Kennaway  * in documentation (online or textual) provided with the package.
2274664626SKris Kennaway  *
2374664626SKris Kennaway  * Redistribution and use in source and binary forms, with or without
2474664626SKris Kennaway  * modification, are permitted provided that the following conditions
2574664626SKris Kennaway  * are met:
2674664626SKris Kennaway  * 1. Redistributions of source code must retain the copyright
2774664626SKris Kennaway  *    notice, this list of conditions and the following disclaimer.
2874664626SKris Kennaway  * 2. Redistributions in binary form must reproduce the above copyright
2974664626SKris Kennaway  *    notice, this list of conditions and the following disclaimer in the
3074664626SKris Kennaway  *    documentation and/or other materials provided with the distribution.
3174664626SKris Kennaway  * 3. All advertising materials mentioning features or use of this software
3274664626SKris Kennaway  *    must display the following acknowledgement:
3374664626SKris Kennaway  *    "This product includes cryptographic software written by
3474664626SKris Kennaway  *     Eric Young (eay@cryptsoft.com)"
3574664626SKris Kennaway  *    The word 'cryptographic' can be left out if the rouines from the library
3674664626SKris Kennaway  *    being used are not cryptographic related :-).
3774664626SKris Kennaway  * 4. If you include any Windows specific code (or a derivative thereof) from
3874664626SKris Kennaway  *    the apps directory (application code) you must include an acknowledgement:
3974664626SKris Kennaway  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
4074664626SKris Kennaway  *
4174664626SKris Kennaway  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
4274664626SKris Kennaway  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4374664626SKris Kennaway  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4474664626SKris Kennaway  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
4574664626SKris Kennaway  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4674664626SKris Kennaway  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4774664626SKris Kennaway  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4874664626SKris Kennaway  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4974664626SKris Kennaway  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5074664626SKris Kennaway  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5174664626SKris Kennaway  * SUCH DAMAGE.
5274664626SKris Kennaway  *
5374664626SKris Kennaway  * The licence and distribution terms for any publically available version or
5474664626SKris Kennaway  * derivative of this code cannot be changed.  i.e. this code cannot simply be
5574664626SKris Kennaway  * copied and put under another distribution licence
5674664626SKris Kennaway  * [including the GNU Public Licence.]
5774664626SKris Kennaway  */
5874664626SKris Kennaway 
59f579bf8eSKris Kennaway #ifndef BN_DEBUG
60f579bf8eSKris Kennaway # undef NDEBUG /* avoid conflicting definitions */
61f579bf8eSKris Kennaway # define NDEBUG
62f579bf8eSKris Kennaway #endif
63f579bf8eSKris Kennaway 
6474664626SKris Kennaway #include <stdio.h>
65f579bf8eSKris Kennaway #include <assert.h>
6674664626SKris Kennaway #include "cryptlib.h"
6774664626SKris Kennaway #include "bn_lcl.h"
6874664626SKris Kennaway 
69f579bf8eSKris Kennaway #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
7074664626SKris Kennaway 
715c87c606SMark Murray BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
7274664626SKris Kennaway 	{
7374664626SKris Kennaway 	BN_ULONG c1=0;
7474664626SKris Kennaway 
75f579bf8eSKris Kennaway 	assert(num >= 0);
7674664626SKris Kennaway 	if (num <= 0) return(c1);
7774664626SKris Kennaway 
78f579bf8eSKris Kennaway 	while (num&~3)
7974664626SKris Kennaway 		{
8074664626SKris Kennaway 		mul_add(rp[0],ap[0],w,c1);
8174664626SKris Kennaway 		mul_add(rp[1],ap[1],w,c1);
8274664626SKris Kennaway 		mul_add(rp[2],ap[2],w,c1);
8374664626SKris Kennaway 		mul_add(rp[3],ap[3],w,c1);
84f579bf8eSKris Kennaway 		ap+=4; rp+=4; num-=4;
85f579bf8eSKris Kennaway 		}
86f579bf8eSKris Kennaway 	if (num)
87f579bf8eSKris Kennaway 		{
88f579bf8eSKris Kennaway 		mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
89f579bf8eSKris Kennaway 		mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
90f579bf8eSKris Kennaway 		mul_add(rp[2],ap[2],w,c1); return c1;
9174664626SKris Kennaway 		}
9274664626SKris Kennaway 
9374664626SKris Kennaway 	return(c1);
9474664626SKris Kennaway 	}
9574664626SKris Kennaway 
965c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
9774664626SKris Kennaway 	{
9874664626SKris Kennaway 	BN_ULONG c1=0;
9974664626SKris Kennaway 
100f579bf8eSKris Kennaway 	assert(num >= 0);
10174664626SKris Kennaway 	if (num <= 0) return(c1);
10274664626SKris Kennaway 
103f579bf8eSKris Kennaway 	while (num&~3)
10474664626SKris Kennaway 		{
10574664626SKris Kennaway 		mul(rp[0],ap[0],w,c1);
10674664626SKris Kennaway 		mul(rp[1],ap[1],w,c1);
10774664626SKris Kennaway 		mul(rp[2],ap[2],w,c1);
10874664626SKris Kennaway 		mul(rp[3],ap[3],w,c1);
109f579bf8eSKris Kennaway 		ap+=4; rp+=4; num-=4;
110f579bf8eSKris Kennaway 		}
111f579bf8eSKris Kennaway 	if (num)
112f579bf8eSKris Kennaway 		{
113f579bf8eSKris Kennaway 		mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
114f579bf8eSKris Kennaway 		mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
115f579bf8eSKris Kennaway 		mul(rp[2],ap[2],w,c1);
11674664626SKris Kennaway 		}
11774664626SKris Kennaway 	return(c1);
11874664626SKris Kennaway 	}
11974664626SKris Kennaway 
1205c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
12174664626SKris Kennaway         {
122f579bf8eSKris Kennaway 	assert(n >= 0);
12374664626SKris Kennaway 	if (n <= 0) return;
124f579bf8eSKris Kennaway 	while (n&~3)
12574664626SKris Kennaway 		{
126f579bf8eSKris Kennaway 		sqr(r[0],r[1],a[0]);
127f579bf8eSKris Kennaway 		sqr(r[2],r[3],a[1]);
128f579bf8eSKris Kennaway 		sqr(r[4],r[5],a[2]);
129f579bf8eSKris Kennaway 		sqr(r[6],r[7],a[3]);
130f579bf8eSKris Kennaway 		a+=4; r+=8; n-=4;
131f579bf8eSKris Kennaway 		}
132f579bf8eSKris Kennaway 	if (n)
133f579bf8eSKris Kennaway 		{
134f579bf8eSKris Kennaway 		sqr(r[0],r[1],a[0]); if (--n == 0) return;
135f579bf8eSKris Kennaway 		sqr(r[2],r[3],a[1]); if (--n == 0) return;
136f579bf8eSKris Kennaway 		sqr(r[4],r[5],a[2]);
13774664626SKris Kennaway 		}
13874664626SKris Kennaway 	}
13974664626SKris Kennaway 
140f579bf8eSKris Kennaway #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
14174664626SKris Kennaway 
1425c87c606SMark Murray BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
14374664626SKris Kennaway 	{
14474664626SKris Kennaway 	BN_ULONG c=0;
14574664626SKris Kennaway 	BN_ULONG bl,bh;
14674664626SKris Kennaway 
147f579bf8eSKris Kennaway 	assert(num >= 0);
14874664626SKris Kennaway 	if (num <= 0) return((BN_ULONG)0);
14974664626SKris Kennaway 
15074664626SKris Kennaway 	bl=LBITS(w);
15174664626SKris Kennaway 	bh=HBITS(w);
15274664626SKris Kennaway 
15374664626SKris Kennaway 	for (;;)
15474664626SKris Kennaway 		{
15574664626SKris Kennaway 		mul_add(rp[0],ap[0],bl,bh,c);
15674664626SKris Kennaway 		if (--num == 0) break;
15774664626SKris Kennaway 		mul_add(rp[1],ap[1],bl,bh,c);
15874664626SKris Kennaway 		if (--num == 0) break;
15974664626SKris Kennaway 		mul_add(rp[2],ap[2],bl,bh,c);
16074664626SKris Kennaway 		if (--num == 0) break;
16174664626SKris Kennaway 		mul_add(rp[3],ap[3],bl,bh,c);
16274664626SKris Kennaway 		if (--num == 0) break;
16374664626SKris Kennaway 		ap+=4;
16474664626SKris Kennaway 		rp+=4;
16574664626SKris Kennaway 		}
16674664626SKris Kennaway 	return(c);
16774664626SKris Kennaway 	}
16874664626SKris Kennaway 
1695c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
17074664626SKris Kennaway 	{
17174664626SKris Kennaway 	BN_ULONG carry=0;
17274664626SKris Kennaway 	BN_ULONG bl,bh;
17374664626SKris Kennaway 
174f579bf8eSKris Kennaway 	assert(num >= 0);
17574664626SKris Kennaway 	if (num <= 0) return((BN_ULONG)0);
17674664626SKris Kennaway 
17774664626SKris Kennaway 	bl=LBITS(w);
17874664626SKris Kennaway 	bh=HBITS(w);
17974664626SKris Kennaway 
18074664626SKris Kennaway 	for (;;)
18174664626SKris Kennaway 		{
18274664626SKris Kennaway 		mul(rp[0],ap[0],bl,bh,carry);
18374664626SKris Kennaway 		if (--num == 0) break;
18474664626SKris Kennaway 		mul(rp[1],ap[1],bl,bh,carry);
18574664626SKris Kennaway 		if (--num == 0) break;
18674664626SKris Kennaway 		mul(rp[2],ap[2],bl,bh,carry);
18774664626SKris Kennaway 		if (--num == 0) break;
18874664626SKris Kennaway 		mul(rp[3],ap[3],bl,bh,carry);
18974664626SKris Kennaway 		if (--num == 0) break;
19074664626SKris Kennaway 		ap+=4;
19174664626SKris Kennaway 		rp+=4;
19274664626SKris Kennaway 		}
19374664626SKris Kennaway 	return(carry);
19474664626SKris Kennaway 	}
19574664626SKris Kennaway 
1965c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
19774664626SKris Kennaway         {
198f579bf8eSKris Kennaway 	assert(n >= 0);
19974664626SKris Kennaway 	if (n <= 0) return;
20074664626SKris Kennaway 	for (;;)
20174664626SKris Kennaway 		{
20274664626SKris Kennaway 		sqr64(r[0],r[1],a[0]);
20374664626SKris Kennaway 		if (--n == 0) break;
20474664626SKris Kennaway 
20574664626SKris Kennaway 		sqr64(r[2],r[3],a[1]);
20674664626SKris Kennaway 		if (--n == 0) break;
20774664626SKris Kennaway 
20874664626SKris Kennaway 		sqr64(r[4],r[5],a[2]);
20974664626SKris Kennaway 		if (--n == 0) break;
21074664626SKris Kennaway 
21174664626SKris Kennaway 		sqr64(r[6],r[7],a[3]);
21274664626SKris Kennaway 		if (--n == 0) break;
21374664626SKris Kennaway 
21474664626SKris Kennaway 		a+=4;
21574664626SKris Kennaway 		r+=8;
21674664626SKris Kennaway 		}
21774664626SKris Kennaway 	}
21874664626SKris Kennaway 
219f579bf8eSKris Kennaway #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
22074664626SKris Kennaway 
22174664626SKris Kennaway #if defined(BN_LLONG) && defined(BN_DIV2W)
22274664626SKris Kennaway 
22374664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
22474664626SKris Kennaway 	{
22574664626SKris Kennaway 	return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
22674664626SKris Kennaway 	}
22774664626SKris Kennaway 
22874664626SKris Kennaway #else
22974664626SKris Kennaway 
230ddd58736SKris Kennaway /* Divide h,l by d and return the result. */
23174664626SKris Kennaway /* I need to test this some more :-( */
23274664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
23374664626SKris Kennaway 	{
23474664626SKris Kennaway 	BN_ULONG dh,dl,q,ret=0,th,tl,t;
23574664626SKris Kennaway 	int i,count=2;
23674664626SKris Kennaway 
23774664626SKris Kennaway 	if (d == 0) return(BN_MASK2);
23874664626SKris Kennaway 
23974664626SKris Kennaway 	i=BN_num_bits_word(d);
240ddd58736SKris Kennaway 	assert((i == BN_BITS2) || (h > (BN_ULONG)1<<i));
241ddd58736SKris Kennaway 
24274664626SKris Kennaway 	i=BN_BITS2-i;
24374664626SKris Kennaway 	if (h >= d) h-=d;
24474664626SKris Kennaway 
24574664626SKris Kennaway 	if (i)
24674664626SKris Kennaway 		{
24774664626SKris Kennaway 		d<<=i;
24874664626SKris Kennaway 		h=(h<<i)|(l>>(BN_BITS2-i));
24974664626SKris Kennaway 		l<<=i;
25074664626SKris Kennaway 		}
25174664626SKris Kennaway 	dh=(d&BN_MASK2h)>>BN_BITS4;
25274664626SKris Kennaway 	dl=(d&BN_MASK2l);
25374664626SKris Kennaway 	for (;;)
25474664626SKris Kennaway 		{
25574664626SKris Kennaway 		if ((h>>BN_BITS4) == dh)
25674664626SKris Kennaway 			q=BN_MASK2l;
25774664626SKris Kennaway 		else
25874664626SKris Kennaway 			q=h/dh;
25974664626SKris Kennaway 
26074664626SKris Kennaway 		th=q*dh;
26174664626SKris Kennaway 		tl=dl*q;
26274664626SKris Kennaway 		for (;;)
26374664626SKris Kennaway 			{
26474664626SKris Kennaway 			t=h-th;
26574664626SKris Kennaway 			if ((t&BN_MASK2h) ||
26674664626SKris Kennaway 				((tl) <= (
26774664626SKris Kennaway 					(t<<BN_BITS4)|
26874664626SKris Kennaway 					((l&BN_MASK2h)>>BN_BITS4))))
26974664626SKris Kennaway 				break;
27074664626SKris Kennaway 			q--;
27174664626SKris Kennaway 			th-=dh;
27274664626SKris Kennaway 			tl-=dl;
27374664626SKris Kennaway 			}
27474664626SKris Kennaway 		t=(tl>>BN_BITS4);
27574664626SKris Kennaway 		tl=(tl<<BN_BITS4)&BN_MASK2h;
27674664626SKris Kennaway 		th+=t;
27774664626SKris Kennaway 
27874664626SKris Kennaway 		if (l < tl) th++;
27974664626SKris Kennaway 		l-=tl;
28074664626SKris Kennaway 		if (h < th)
28174664626SKris Kennaway 			{
28274664626SKris Kennaway 			h+=d;
28374664626SKris Kennaway 			q--;
28474664626SKris Kennaway 			}
28574664626SKris Kennaway 		h-=th;
28674664626SKris Kennaway 
28774664626SKris Kennaway 		if (--count == 0) break;
28874664626SKris Kennaway 
28974664626SKris Kennaway 		ret=q<<BN_BITS4;
29074664626SKris Kennaway 		h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
29174664626SKris Kennaway 		l=(l&BN_MASK2l)<<BN_BITS4;
29274664626SKris Kennaway 		}
29374664626SKris Kennaway 	ret|=q;
29474664626SKris Kennaway 	return(ret);
29574664626SKris Kennaway 	}
296f579bf8eSKris Kennaway #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
29774664626SKris Kennaway 
29874664626SKris Kennaway #ifdef BN_LLONG
2995c87c606SMark Murray BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
30074664626SKris Kennaway         {
30174664626SKris Kennaway 	BN_ULLONG ll=0;
30274664626SKris Kennaway 
303f579bf8eSKris Kennaway 	assert(n >= 0);
30474664626SKris Kennaway 	if (n <= 0) return((BN_ULONG)0);
30574664626SKris Kennaway 
30674664626SKris Kennaway 	for (;;)
30774664626SKris Kennaway 		{
30874664626SKris Kennaway 		ll+=(BN_ULLONG)a[0]+b[0];
30974664626SKris Kennaway 		r[0]=(BN_ULONG)ll&BN_MASK2;
31074664626SKris Kennaway 		ll>>=BN_BITS2;
31174664626SKris Kennaway 		if (--n <= 0) break;
31274664626SKris Kennaway 
31374664626SKris Kennaway 		ll+=(BN_ULLONG)a[1]+b[1];
31474664626SKris Kennaway 		r[1]=(BN_ULONG)ll&BN_MASK2;
31574664626SKris Kennaway 		ll>>=BN_BITS2;
31674664626SKris Kennaway 		if (--n <= 0) break;
31774664626SKris Kennaway 
31874664626SKris Kennaway 		ll+=(BN_ULLONG)a[2]+b[2];
31974664626SKris Kennaway 		r[2]=(BN_ULONG)ll&BN_MASK2;
32074664626SKris Kennaway 		ll>>=BN_BITS2;
32174664626SKris Kennaway 		if (--n <= 0) break;
32274664626SKris Kennaway 
32374664626SKris Kennaway 		ll+=(BN_ULLONG)a[3]+b[3];
32474664626SKris Kennaway 		r[3]=(BN_ULONG)ll&BN_MASK2;
32574664626SKris Kennaway 		ll>>=BN_BITS2;
32674664626SKris Kennaway 		if (--n <= 0) break;
32774664626SKris Kennaway 
32874664626SKris Kennaway 		a+=4;
32974664626SKris Kennaway 		b+=4;
33074664626SKris Kennaway 		r+=4;
33174664626SKris Kennaway 		}
33274664626SKris Kennaway 	return((BN_ULONG)ll);
33374664626SKris Kennaway 	}
334f579bf8eSKris Kennaway #else /* !BN_LLONG */
3355c87c606SMark Murray BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
33674664626SKris Kennaway         {
33774664626SKris Kennaway 	BN_ULONG c,l,t;
33874664626SKris Kennaway 
339f579bf8eSKris Kennaway 	assert(n >= 0);
34074664626SKris Kennaway 	if (n <= 0) return((BN_ULONG)0);
34174664626SKris Kennaway 
34274664626SKris Kennaway 	c=0;
34374664626SKris Kennaway 	for (;;)
34474664626SKris Kennaway 		{
34574664626SKris Kennaway 		t=a[0];
34674664626SKris Kennaway 		t=(t+c)&BN_MASK2;
34774664626SKris Kennaway 		c=(t < c);
34874664626SKris Kennaway 		l=(t+b[0])&BN_MASK2;
34974664626SKris Kennaway 		c+=(l < t);
35074664626SKris Kennaway 		r[0]=l;
35174664626SKris Kennaway 		if (--n <= 0) break;
35274664626SKris Kennaway 
35374664626SKris Kennaway 		t=a[1];
35474664626SKris Kennaway 		t=(t+c)&BN_MASK2;
35574664626SKris Kennaway 		c=(t < c);
35674664626SKris Kennaway 		l=(t+b[1])&BN_MASK2;
35774664626SKris Kennaway 		c+=(l < t);
35874664626SKris Kennaway 		r[1]=l;
35974664626SKris Kennaway 		if (--n <= 0) break;
36074664626SKris Kennaway 
36174664626SKris Kennaway 		t=a[2];
36274664626SKris Kennaway 		t=(t+c)&BN_MASK2;
36374664626SKris Kennaway 		c=(t < c);
36474664626SKris Kennaway 		l=(t+b[2])&BN_MASK2;
36574664626SKris Kennaway 		c+=(l < t);
36674664626SKris Kennaway 		r[2]=l;
36774664626SKris Kennaway 		if (--n <= 0) break;
36874664626SKris Kennaway 
36974664626SKris Kennaway 		t=a[3];
37074664626SKris Kennaway 		t=(t+c)&BN_MASK2;
37174664626SKris Kennaway 		c=(t < c);
37274664626SKris Kennaway 		l=(t+b[3])&BN_MASK2;
37374664626SKris Kennaway 		c+=(l < t);
37474664626SKris Kennaway 		r[3]=l;
37574664626SKris Kennaway 		if (--n <= 0) break;
37674664626SKris Kennaway 
37774664626SKris Kennaway 		a+=4;
37874664626SKris Kennaway 		b+=4;
37974664626SKris Kennaway 		r+=4;
38074664626SKris Kennaway 		}
38174664626SKris Kennaway 	return((BN_ULONG)c);
38274664626SKris Kennaway 	}
383f579bf8eSKris Kennaway #endif /* !BN_LLONG */
38474664626SKris Kennaway 
3855c87c606SMark Murray BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
38674664626SKris Kennaway         {
38774664626SKris Kennaway 	BN_ULONG t1,t2;
38874664626SKris Kennaway 	int c=0;
38974664626SKris Kennaway 
390f579bf8eSKris Kennaway 	assert(n >= 0);
39174664626SKris Kennaway 	if (n <= 0) return((BN_ULONG)0);
39274664626SKris Kennaway 
39374664626SKris Kennaway 	for (;;)
39474664626SKris Kennaway 		{
39574664626SKris Kennaway 		t1=a[0]; t2=b[0];
39674664626SKris Kennaway 		r[0]=(t1-t2-c)&BN_MASK2;
39774664626SKris Kennaway 		if (t1 != t2) c=(t1 < t2);
39874664626SKris Kennaway 		if (--n <= 0) break;
39974664626SKris Kennaway 
40074664626SKris Kennaway 		t1=a[1]; t2=b[1];
40174664626SKris Kennaway 		r[1]=(t1-t2-c)&BN_MASK2;
40274664626SKris Kennaway 		if (t1 != t2) c=(t1 < t2);
40374664626SKris Kennaway 		if (--n <= 0) break;
40474664626SKris Kennaway 
40574664626SKris Kennaway 		t1=a[2]; t2=b[2];
40674664626SKris Kennaway 		r[2]=(t1-t2-c)&BN_MASK2;
40774664626SKris Kennaway 		if (t1 != t2) c=(t1 < t2);
40874664626SKris Kennaway 		if (--n <= 0) break;
40974664626SKris Kennaway 
41074664626SKris Kennaway 		t1=a[3]; t2=b[3];
41174664626SKris Kennaway 		r[3]=(t1-t2-c)&BN_MASK2;
41274664626SKris Kennaway 		if (t1 != t2) c=(t1 < t2);
41374664626SKris Kennaway 		if (--n <= 0) break;
41474664626SKris Kennaway 
41574664626SKris Kennaway 		a+=4;
41674664626SKris Kennaway 		b+=4;
41774664626SKris Kennaway 		r+=4;
41874664626SKris Kennaway 		}
41974664626SKris Kennaway 	return(c);
42074664626SKris Kennaway 	}
42174664626SKris Kennaway 
42274664626SKris Kennaway #ifdef BN_MUL_COMBA
42374664626SKris Kennaway 
42474664626SKris Kennaway #undef bn_mul_comba8
42574664626SKris Kennaway #undef bn_mul_comba4
42674664626SKris Kennaway #undef bn_sqr_comba8
42774664626SKris Kennaway #undef bn_sqr_comba4
42874664626SKris Kennaway 
429f579bf8eSKris Kennaway /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
430f579bf8eSKris Kennaway /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
431f579bf8eSKris Kennaway /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
432f579bf8eSKris Kennaway /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
433f579bf8eSKris Kennaway 
43474664626SKris Kennaway #ifdef BN_LLONG
43574664626SKris Kennaway #define mul_add_c(a,b,c0,c1,c2) \
43674664626SKris Kennaway 	t=(BN_ULLONG)a*b; \
43774664626SKris Kennaway 	t1=(BN_ULONG)Lw(t); \
43874664626SKris Kennaway 	t2=(BN_ULONG)Hw(t); \
43974664626SKris Kennaway 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
44074664626SKris Kennaway 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
44174664626SKris Kennaway 
44274664626SKris Kennaway #define mul_add_c2(a,b,c0,c1,c2) \
44374664626SKris Kennaway 	t=(BN_ULLONG)a*b; \
44474664626SKris Kennaway 	tt=(t+t)&BN_MASK; \
44574664626SKris Kennaway 	if (tt < t) c2++; \
44674664626SKris Kennaway 	t1=(BN_ULONG)Lw(tt); \
44774664626SKris Kennaway 	t2=(BN_ULONG)Hw(tt); \
44874664626SKris Kennaway 	c0=(c0+t1)&BN_MASK2;  \
44974664626SKris Kennaway 	if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
45074664626SKris Kennaway 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
45174664626SKris Kennaway 
45274664626SKris Kennaway #define sqr_add_c(a,i,c0,c1,c2) \
45374664626SKris Kennaway 	t=(BN_ULLONG)a[i]*a[i]; \
45474664626SKris Kennaway 	t1=(BN_ULONG)Lw(t); \
45574664626SKris Kennaway 	t2=(BN_ULONG)Hw(t); \
45674664626SKris Kennaway 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
45774664626SKris Kennaway 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
45874664626SKris Kennaway 
45974664626SKris Kennaway #define sqr_add_c2(a,i,j,c0,c1,c2) \
46074664626SKris Kennaway 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
461f579bf8eSKris Kennaway 
462f579bf8eSKris Kennaway #elif defined(BN_UMULT_HIGH)
463f579bf8eSKris Kennaway 
464f579bf8eSKris Kennaway #define mul_add_c(a,b,c0,c1,c2)	{	\
465f579bf8eSKris Kennaway 	BN_ULONG ta=(a),tb=(b);		\
466f579bf8eSKris Kennaway 	t1 = ta * tb;			\
467f579bf8eSKris Kennaway 	t2 = BN_UMULT_HIGH(ta,tb);	\
468f579bf8eSKris Kennaway 	c0 += t1; t2 += (c0<t1)?1:0;	\
469f579bf8eSKris Kennaway 	c1 += t2; c2 += (c1<t2)?1:0;	\
470f579bf8eSKris Kennaway 	}
471f579bf8eSKris Kennaway 
472f579bf8eSKris Kennaway #define mul_add_c2(a,b,c0,c1,c2) {	\
473f579bf8eSKris Kennaway 	BN_ULONG ta=(a),tb=(b),t0;	\
474f579bf8eSKris Kennaway 	t1 = BN_UMULT_HIGH(ta,tb);	\
475f579bf8eSKris Kennaway 	t0 = ta * tb;			\
476f579bf8eSKris Kennaway 	t2 = t1+t1; c2 += (t2<t1)?1:0;	\
477f579bf8eSKris Kennaway 	t1 = t0+t0; t2 += (t1<t0)?1:0;	\
478f579bf8eSKris Kennaway 	c0 += t1; t2 += (c0<t1)?1:0;	\
479f579bf8eSKris Kennaway 	c1 += t2; c2 += (c1<t2)?1:0;	\
480f579bf8eSKris Kennaway 	}
481f579bf8eSKris Kennaway 
482f579bf8eSKris Kennaway #define sqr_add_c(a,i,c0,c1,c2)	{	\
483f579bf8eSKris Kennaway 	BN_ULONG ta=(a)[i];		\
484f579bf8eSKris Kennaway 	t1 = ta * ta;			\
485f579bf8eSKris Kennaway 	t2 = BN_UMULT_HIGH(ta,ta);	\
486f579bf8eSKris Kennaway 	c0 += t1; t2 += (c0<t1)?1:0;	\
487f579bf8eSKris Kennaway 	c1 += t2; c2 += (c1<t2)?1:0;	\
488f579bf8eSKris Kennaway 	}
489f579bf8eSKris Kennaway 
490f579bf8eSKris Kennaway #define sqr_add_c2(a,i,j,c0,c1,c2)	\
491f579bf8eSKris Kennaway 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
492f579bf8eSKris Kennaway 
493f579bf8eSKris Kennaway #else /* !BN_LLONG */
49474664626SKris Kennaway #define mul_add_c(a,b,c0,c1,c2) \
49574664626SKris Kennaway 	t1=LBITS(a); t2=HBITS(a); \
49674664626SKris Kennaway 	bl=LBITS(b); bh=HBITS(b); \
49774664626SKris Kennaway 	mul64(t1,t2,bl,bh); \
49874664626SKris Kennaway 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
49974664626SKris Kennaway 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
50074664626SKris Kennaway 
50174664626SKris Kennaway #define mul_add_c2(a,b,c0,c1,c2) \
50274664626SKris Kennaway 	t1=LBITS(a); t2=HBITS(a); \
50374664626SKris Kennaway 	bl=LBITS(b); bh=HBITS(b); \
50474664626SKris Kennaway 	mul64(t1,t2,bl,bh); \
50574664626SKris Kennaway 	if (t2 & BN_TBIT) c2++; \
50674664626SKris Kennaway 	t2=(t2+t2)&BN_MASK2; \
50774664626SKris Kennaway 	if (t1 & BN_TBIT) t2++; \
50874664626SKris Kennaway 	t1=(t1+t1)&BN_MASK2; \
50974664626SKris Kennaway 	c0=(c0+t1)&BN_MASK2;  \
51074664626SKris Kennaway 	if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
51174664626SKris Kennaway 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
51274664626SKris Kennaway 
51374664626SKris Kennaway #define sqr_add_c(a,i,c0,c1,c2) \
51474664626SKris Kennaway 	sqr64(t1,t2,(a)[i]); \
51574664626SKris Kennaway 	c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
51674664626SKris Kennaway 	c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
51774664626SKris Kennaway 
51874664626SKris Kennaway #define sqr_add_c2(a,i,j,c0,c1,c2) \
51974664626SKris Kennaway 	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
520f579bf8eSKris Kennaway #endif /* !BN_LLONG */
52174664626SKris Kennaway 
52274664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
52374664626SKris Kennaway 	{
52474664626SKris Kennaway #ifdef BN_LLONG
52574664626SKris Kennaway 	BN_ULLONG t;
52674664626SKris Kennaway #else
52774664626SKris Kennaway 	BN_ULONG bl,bh;
52874664626SKris Kennaway #endif
52974664626SKris Kennaway 	BN_ULONG t1,t2;
53074664626SKris Kennaway 	BN_ULONG c1,c2,c3;
53174664626SKris Kennaway 
53274664626SKris Kennaway 	c1=0;
53374664626SKris Kennaway 	c2=0;
53474664626SKris Kennaway 	c3=0;
53574664626SKris Kennaway 	mul_add_c(a[0],b[0],c1,c2,c3);
53674664626SKris Kennaway 	r[0]=c1;
53774664626SKris Kennaway 	c1=0;
53874664626SKris Kennaway 	mul_add_c(a[0],b[1],c2,c3,c1);
53974664626SKris Kennaway 	mul_add_c(a[1],b[0],c2,c3,c1);
54074664626SKris Kennaway 	r[1]=c2;
54174664626SKris Kennaway 	c2=0;
54274664626SKris Kennaway 	mul_add_c(a[2],b[0],c3,c1,c2);
54374664626SKris Kennaway 	mul_add_c(a[1],b[1],c3,c1,c2);
54474664626SKris Kennaway 	mul_add_c(a[0],b[2],c3,c1,c2);
54574664626SKris Kennaway 	r[2]=c3;
54674664626SKris Kennaway 	c3=0;
54774664626SKris Kennaway 	mul_add_c(a[0],b[3],c1,c2,c3);
54874664626SKris Kennaway 	mul_add_c(a[1],b[2],c1,c2,c3);
54974664626SKris Kennaway 	mul_add_c(a[2],b[1],c1,c2,c3);
55074664626SKris Kennaway 	mul_add_c(a[3],b[0],c1,c2,c3);
55174664626SKris Kennaway 	r[3]=c1;
55274664626SKris Kennaway 	c1=0;
55374664626SKris Kennaway 	mul_add_c(a[4],b[0],c2,c3,c1);
55474664626SKris Kennaway 	mul_add_c(a[3],b[1],c2,c3,c1);
55574664626SKris Kennaway 	mul_add_c(a[2],b[2],c2,c3,c1);
55674664626SKris Kennaway 	mul_add_c(a[1],b[3],c2,c3,c1);
55774664626SKris Kennaway 	mul_add_c(a[0],b[4],c2,c3,c1);
55874664626SKris Kennaway 	r[4]=c2;
55974664626SKris Kennaway 	c2=0;
56074664626SKris Kennaway 	mul_add_c(a[0],b[5],c3,c1,c2);
56174664626SKris Kennaway 	mul_add_c(a[1],b[4],c3,c1,c2);
56274664626SKris Kennaway 	mul_add_c(a[2],b[3],c3,c1,c2);
56374664626SKris Kennaway 	mul_add_c(a[3],b[2],c3,c1,c2);
56474664626SKris Kennaway 	mul_add_c(a[4],b[1],c3,c1,c2);
56574664626SKris Kennaway 	mul_add_c(a[5],b[0],c3,c1,c2);
56674664626SKris Kennaway 	r[5]=c3;
56774664626SKris Kennaway 	c3=0;
56874664626SKris Kennaway 	mul_add_c(a[6],b[0],c1,c2,c3);
56974664626SKris Kennaway 	mul_add_c(a[5],b[1],c1,c2,c3);
57074664626SKris Kennaway 	mul_add_c(a[4],b[2],c1,c2,c3);
57174664626SKris Kennaway 	mul_add_c(a[3],b[3],c1,c2,c3);
57274664626SKris Kennaway 	mul_add_c(a[2],b[4],c1,c2,c3);
57374664626SKris Kennaway 	mul_add_c(a[1],b[5],c1,c2,c3);
57474664626SKris Kennaway 	mul_add_c(a[0],b[6],c1,c2,c3);
57574664626SKris Kennaway 	r[6]=c1;
57674664626SKris Kennaway 	c1=0;
57774664626SKris Kennaway 	mul_add_c(a[0],b[7],c2,c3,c1);
57874664626SKris Kennaway 	mul_add_c(a[1],b[6],c2,c3,c1);
57974664626SKris Kennaway 	mul_add_c(a[2],b[5],c2,c3,c1);
58074664626SKris Kennaway 	mul_add_c(a[3],b[4],c2,c3,c1);
58174664626SKris Kennaway 	mul_add_c(a[4],b[3],c2,c3,c1);
58274664626SKris Kennaway 	mul_add_c(a[5],b[2],c2,c3,c1);
58374664626SKris Kennaway 	mul_add_c(a[6],b[1],c2,c3,c1);
58474664626SKris Kennaway 	mul_add_c(a[7],b[0],c2,c3,c1);
58574664626SKris Kennaway 	r[7]=c2;
58674664626SKris Kennaway 	c2=0;
58774664626SKris Kennaway 	mul_add_c(a[7],b[1],c3,c1,c2);
58874664626SKris Kennaway 	mul_add_c(a[6],b[2],c3,c1,c2);
58974664626SKris Kennaway 	mul_add_c(a[5],b[3],c3,c1,c2);
59074664626SKris Kennaway 	mul_add_c(a[4],b[4],c3,c1,c2);
59174664626SKris Kennaway 	mul_add_c(a[3],b[5],c3,c1,c2);
59274664626SKris Kennaway 	mul_add_c(a[2],b[6],c3,c1,c2);
59374664626SKris Kennaway 	mul_add_c(a[1],b[7],c3,c1,c2);
59474664626SKris Kennaway 	r[8]=c3;
59574664626SKris Kennaway 	c3=0;
59674664626SKris Kennaway 	mul_add_c(a[2],b[7],c1,c2,c3);
59774664626SKris Kennaway 	mul_add_c(a[3],b[6],c1,c2,c3);
59874664626SKris Kennaway 	mul_add_c(a[4],b[5],c1,c2,c3);
59974664626SKris Kennaway 	mul_add_c(a[5],b[4],c1,c2,c3);
60074664626SKris Kennaway 	mul_add_c(a[6],b[3],c1,c2,c3);
60174664626SKris Kennaway 	mul_add_c(a[7],b[2],c1,c2,c3);
60274664626SKris Kennaway 	r[9]=c1;
60374664626SKris Kennaway 	c1=0;
60474664626SKris Kennaway 	mul_add_c(a[7],b[3],c2,c3,c1);
60574664626SKris Kennaway 	mul_add_c(a[6],b[4],c2,c3,c1);
60674664626SKris Kennaway 	mul_add_c(a[5],b[5],c2,c3,c1);
60774664626SKris Kennaway 	mul_add_c(a[4],b[6],c2,c3,c1);
60874664626SKris Kennaway 	mul_add_c(a[3],b[7],c2,c3,c1);
60974664626SKris Kennaway 	r[10]=c2;
61074664626SKris Kennaway 	c2=0;
61174664626SKris Kennaway 	mul_add_c(a[4],b[7],c3,c1,c2);
61274664626SKris Kennaway 	mul_add_c(a[5],b[6],c3,c1,c2);
61374664626SKris Kennaway 	mul_add_c(a[6],b[5],c3,c1,c2);
61474664626SKris Kennaway 	mul_add_c(a[7],b[4],c3,c1,c2);
61574664626SKris Kennaway 	r[11]=c3;
61674664626SKris Kennaway 	c3=0;
61774664626SKris Kennaway 	mul_add_c(a[7],b[5],c1,c2,c3);
61874664626SKris Kennaway 	mul_add_c(a[6],b[6],c1,c2,c3);
61974664626SKris Kennaway 	mul_add_c(a[5],b[7],c1,c2,c3);
62074664626SKris Kennaway 	r[12]=c1;
62174664626SKris Kennaway 	c1=0;
62274664626SKris Kennaway 	mul_add_c(a[6],b[7],c2,c3,c1);
62374664626SKris Kennaway 	mul_add_c(a[7],b[6],c2,c3,c1);
62474664626SKris Kennaway 	r[13]=c2;
62574664626SKris Kennaway 	c2=0;
62674664626SKris Kennaway 	mul_add_c(a[7],b[7],c3,c1,c2);
62774664626SKris Kennaway 	r[14]=c3;
62874664626SKris Kennaway 	r[15]=c1;
62974664626SKris Kennaway 	}
63074664626SKris Kennaway 
63174664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
63274664626SKris Kennaway 	{
63374664626SKris Kennaway #ifdef BN_LLONG
63474664626SKris Kennaway 	BN_ULLONG t;
63574664626SKris Kennaway #else
63674664626SKris Kennaway 	BN_ULONG bl,bh;
63774664626SKris Kennaway #endif
63874664626SKris Kennaway 	BN_ULONG t1,t2;
63974664626SKris Kennaway 	BN_ULONG c1,c2,c3;
64074664626SKris Kennaway 
64174664626SKris Kennaway 	c1=0;
64274664626SKris Kennaway 	c2=0;
64374664626SKris Kennaway 	c3=0;
64474664626SKris Kennaway 	mul_add_c(a[0],b[0],c1,c2,c3);
64574664626SKris Kennaway 	r[0]=c1;
64674664626SKris Kennaway 	c1=0;
64774664626SKris Kennaway 	mul_add_c(a[0],b[1],c2,c3,c1);
64874664626SKris Kennaway 	mul_add_c(a[1],b[0],c2,c3,c1);
64974664626SKris Kennaway 	r[1]=c2;
65074664626SKris Kennaway 	c2=0;
65174664626SKris Kennaway 	mul_add_c(a[2],b[0],c3,c1,c2);
65274664626SKris Kennaway 	mul_add_c(a[1],b[1],c3,c1,c2);
65374664626SKris Kennaway 	mul_add_c(a[0],b[2],c3,c1,c2);
65474664626SKris Kennaway 	r[2]=c3;
65574664626SKris Kennaway 	c3=0;
65674664626SKris Kennaway 	mul_add_c(a[0],b[3],c1,c2,c3);
65774664626SKris Kennaway 	mul_add_c(a[1],b[2],c1,c2,c3);
65874664626SKris Kennaway 	mul_add_c(a[2],b[1],c1,c2,c3);
65974664626SKris Kennaway 	mul_add_c(a[3],b[0],c1,c2,c3);
66074664626SKris Kennaway 	r[3]=c1;
66174664626SKris Kennaway 	c1=0;
66274664626SKris Kennaway 	mul_add_c(a[3],b[1],c2,c3,c1);
66374664626SKris Kennaway 	mul_add_c(a[2],b[2],c2,c3,c1);
66474664626SKris Kennaway 	mul_add_c(a[1],b[3],c2,c3,c1);
66574664626SKris Kennaway 	r[4]=c2;
66674664626SKris Kennaway 	c2=0;
66774664626SKris Kennaway 	mul_add_c(a[2],b[3],c3,c1,c2);
66874664626SKris Kennaway 	mul_add_c(a[3],b[2],c3,c1,c2);
66974664626SKris Kennaway 	r[5]=c3;
67074664626SKris Kennaway 	c3=0;
67174664626SKris Kennaway 	mul_add_c(a[3],b[3],c1,c2,c3);
67274664626SKris Kennaway 	r[6]=c1;
67374664626SKris Kennaway 	r[7]=c2;
67474664626SKris Kennaway 	}
67574664626SKris Kennaway 
6765c87c606SMark Murray void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
67774664626SKris Kennaway 	{
67874664626SKris Kennaway #ifdef BN_LLONG
67974664626SKris Kennaway 	BN_ULLONG t,tt;
68074664626SKris Kennaway #else
68174664626SKris Kennaway 	BN_ULONG bl,bh;
68274664626SKris Kennaway #endif
68374664626SKris Kennaway 	BN_ULONG t1,t2;
68474664626SKris Kennaway 	BN_ULONG c1,c2,c3;
68574664626SKris Kennaway 
68674664626SKris Kennaway 	c1=0;
68774664626SKris Kennaway 	c2=0;
68874664626SKris Kennaway 	c3=0;
68974664626SKris Kennaway 	sqr_add_c(a,0,c1,c2,c3);
69074664626SKris Kennaway 	r[0]=c1;
69174664626SKris Kennaway 	c1=0;
69274664626SKris Kennaway 	sqr_add_c2(a,1,0,c2,c3,c1);
69374664626SKris Kennaway 	r[1]=c2;
69474664626SKris Kennaway 	c2=0;
69574664626SKris Kennaway 	sqr_add_c(a,1,c3,c1,c2);
69674664626SKris Kennaway 	sqr_add_c2(a,2,0,c3,c1,c2);
69774664626SKris Kennaway 	r[2]=c3;
69874664626SKris Kennaway 	c3=0;
69974664626SKris Kennaway 	sqr_add_c2(a,3,0,c1,c2,c3);
70074664626SKris Kennaway 	sqr_add_c2(a,2,1,c1,c2,c3);
70174664626SKris Kennaway 	r[3]=c1;
70274664626SKris Kennaway 	c1=0;
70374664626SKris Kennaway 	sqr_add_c(a,2,c2,c3,c1);
70474664626SKris Kennaway 	sqr_add_c2(a,3,1,c2,c3,c1);
70574664626SKris Kennaway 	sqr_add_c2(a,4,0,c2,c3,c1);
70674664626SKris Kennaway 	r[4]=c2;
70774664626SKris Kennaway 	c2=0;
70874664626SKris Kennaway 	sqr_add_c2(a,5,0,c3,c1,c2);
70974664626SKris Kennaway 	sqr_add_c2(a,4,1,c3,c1,c2);
71074664626SKris Kennaway 	sqr_add_c2(a,3,2,c3,c1,c2);
71174664626SKris Kennaway 	r[5]=c3;
71274664626SKris Kennaway 	c3=0;
71374664626SKris Kennaway 	sqr_add_c(a,3,c1,c2,c3);
71474664626SKris Kennaway 	sqr_add_c2(a,4,2,c1,c2,c3);
71574664626SKris Kennaway 	sqr_add_c2(a,5,1,c1,c2,c3);
71674664626SKris Kennaway 	sqr_add_c2(a,6,0,c1,c2,c3);
71774664626SKris Kennaway 	r[6]=c1;
71874664626SKris Kennaway 	c1=0;
71974664626SKris Kennaway 	sqr_add_c2(a,7,0,c2,c3,c1);
72074664626SKris Kennaway 	sqr_add_c2(a,6,1,c2,c3,c1);
72174664626SKris Kennaway 	sqr_add_c2(a,5,2,c2,c3,c1);
72274664626SKris Kennaway 	sqr_add_c2(a,4,3,c2,c3,c1);
72374664626SKris Kennaway 	r[7]=c2;
72474664626SKris Kennaway 	c2=0;
72574664626SKris Kennaway 	sqr_add_c(a,4,c3,c1,c2);
72674664626SKris Kennaway 	sqr_add_c2(a,5,3,c3,c1,c2);
72774664626SKris Kennaway 	sqr_add_c2(a,6,2,c3,c1,c2);
72874664626SKris Kennaway 	sqr_add_c2(a,7,1,c3,c1,c2);
72974664626SKris Kennaway 	r[8]=c3;
73074664626SKris Kennaway 	c3=0;
73174664626SKris Kennaway 	sqr_add_c2(a,7,2,c1,c2,c3);
73274664626SKris Kennaway 	sqr_add_c2(a,6,3,c1,c2,c3);
73374664626SKris Kennaway 	sqr_add_c2(a,5,4,c1,c2,c3);
73474664626SKris Kennaway 	r[9]=c1;
73574664626SKris Kennaway 	c1=0;
73674664626SKris Kennaway 	sqr_add_c(a,5,c2,c3,c1);
73774664626SKris Kennaway 	sqr_add_c2(a,6,4,c2,c3,c1);
73874664626SKris Kennaway 	sqr_add_c2(a,7,3,c2,c3,c1);
73974664626SKris Kennaway 	r[10]=c2;
74074664626SKris Kennaway 	c2=0;
74174664626SKris Kennaway 	sqr_add_c2(a,7,4,c3,c1,c2);
74274664626SKris Kennaway 	sqr_add_c2(a,6,5,c3,c1,c2);
74374664626SKris Kennaway 	r[11]=c3;
74474664626SKris Kennaway 	c3=0;
74574664626SKris Kennaway 	sqr_add_c(a,6,c1,c2,c3);
74674664626SKris Kennaway 	sqr_add_c2(a,7,5,c1,c2,c3);
74774664626SKris Kennaway 	r[12]=c1;
74874664626SKris Kennaway 	c1=0;
74974664626SKris Kennaway 	sqr_add_c2(a,7,6,c2,c3,c1);
75074664626SKris Kennaway 	r[13]=c2;
75174664626SKris Kennaway 	c2=0;
75274664626SKris Kennaway 	sqr_add_c(a,7,c3,c1,c2);
75374664626SKris Kennaway 	r[14]=c3;
75474664626SKris Kennaway 	r[15]=c1;
75574664626SKris Kennaway 	}
75674664626SKris Kennaway 
7575c87c606SMark Murray void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
75874664626SKris Kennaway 	{
75974664626SKris Kennaway #ifdef BN_LLONG
76074664626SKris Kennaway 	BN_ULLONG t,tt;
76174664626SKris Kennaway #else
76274664626SKris Kennaway 	BN_ULONG bl,bh;
76374664626SKris Kennaway #endif
76474664626SKris Kennaway 	BN_ULONG t1,t2;
76574664626SKris Kennaway 	BN_ULONG c1,c2,c3;
76674664626SKris Kennaway 
76774664626SKris Kennaway 	c1=0;
76874664626SKris Kennaway 	c2=0;
76974664626SKris Kennaway 	c3=0;
77074664626SKris Kennaway 	sqr_add_c(a,0,c1,c2,c3);
77174664626SKris Kennaway 	r[0]=c1;
77274664626SKris Kennaway 	c1=0;
77374664626SKris Kennaway 	sqr_add_c2(a,1,0,c2,c3,c1);
77474664626SKris Kennaway 	r[1]=c2;
77574664626SKris Kennaway 	c2=0;
77674664626SKris Kennaway 	sqr_add_c(a,1,c3,c1,c2);
77774664626SKris Kennaway 	sqr_add_c2(a,2,0,c3,c1,c2);
77874664626SKris Kennaway 	r[2]=c3;
77974664626SKris Kennaway 	c3=0;
78074664626SKris Kennaway 	sqr_add_c2(a,3,0,c1,c2,c3);
78174664626SKris Kennaway 	sqr_add_c2(a,2,1,c1,c2,c3);
78274664626SKris Kennaway 	r[3]=c1;
78374664626SKris Kennaway 	c1=0;
78474664626SKris Kennaway 	sqr_add_c(a,2,c2,c3,c1);
78574664626SKris Kennaway 	sqr_add_c2(a,3,1,c2,c3,c1);
78674664626SKris Kennaway 	r[4]=c2;
78774664626SKris Kennaway 	c2=0;
78874664626SKris Kennaway 	sqr_add_c2(a,3,2,c3,c1,c2);
78974664626SKris Kennaway 	r[5]=c3;
79074664626SKris Kennaway 	c3=0;
79174664626SKris Kennaway 	sqr_add_c(a,3,c1,c2,c3);
79274664626SKris Kennaway 	r[6]=c1;
79374664626SKris Kennaway 	r[7]=c2;
79474664626SKris Kennaway 	}
795f579bf8eSKris Kennaway #else /* !BN_MUL_COMBA */
79674664626SKris Kennaway 
79774664626SKris Kennaway /* hmm... is it faster just to do a multiply? */
79874664626SKris Kennaway #undef bn_sqr_comba4
79974664626SKris Kennaway void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
80074664626SKris Kennaway 	{
80174664626SKris Kennaway 	BN_ULONG t[8];
80274664626SKris Kennaway 	bn_sqr_normal(r,a,4,t);
80374664626SKris Kennaway 	}
80474664626SKris Kennaway 
80574664626SKris Kennaway #undef bn_sqr_comba8
80674664626SKris Kennaway void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
80774664626SKris Kennaway 	{
80874664626SKris Kennaway 	BN_ULONG t[16];
80974664626SKris Kennaway 	bn_sqr_normal(r,a,8,t);
81074664626SKris Kennaway 	}
81174664626SKris Kennaway 
81274664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
81374664626SKris Kennaway 	{
81474664626SKris Kennaway 	r[4]=bn_mul_words(    &(r[0]),a,4,b[0]);
81574664626SKris Kennaway 	r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
81674664626SKris Kennaway 	r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
81774664626SKris Kennaway 	r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
81874664626SKris Kennaway 	}
81974664626SKris Kennaway 
82074664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
82174664626SKris Kennaway 	{
82274664626SKris Kennaway 	r[ 8]=bn_mul_words(    &(r[0]),a,8,b[0]);
82374664626SKris Kennaway 	r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
82474664626SKris Kennaway 	r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
82574664626SKris Kennaway 	r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
82674664626SKris Kennaway 	r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
82774664626SKris Kennaway 	r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
82874664626SKris Kennaway 	r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
82974664626SKris Kennaway 	r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
83074664626SKris Kennaway 	}
83174664626SKris Kennaway 
832f579bf8eSKris Kennaway #endif /* !BN_MUL_COMBA */
833