174664626SKris Kennaway /* crypto/bn/bn_asm.c */ 274664626SKris Kennaway /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 374664626SKris Kennaway * All rights reserved. 474664626SKris Kennaway * 574664626SKris Kennaway * This package is an SSL implementation written 674664626SKris Kennaway * by Eric Young (eay@cryptsoft.com). 774664626SKris Kennaway * The implementation was written so as to conform with Netscapes SSL. 874664626SKris Kennaway * 974664626SKris Kennaway * This library is free for commercial and non-commercial use as long as 1074664626SKris Kennaway * the following conditions are aheared to. The following conditions 1174664626SKris Kennaway * apply to all code found in this distribution, be it the RC4, RSA, 1274664626SKris Kennaway * lhash, DES, etc., code; not just the SSL code. The SSL documentation 1374664626SKris Kennaway * included with this distribution is covered by the same copyright terms 1474664626SKris Kennaway * except that the holder is Tim Hudson (tjh@cryptsoft.com). 1574664626SKris Kennaway * 1674664626SKris Kennaway * Copyright remains Eric Young's, and as such any Copyright notices in 1774664626SKris Kennaway * the code are not to be removed. 1874664626SKris Kennaway * If this package is used in a product, Eric Young should be given attribution 1974664626SKris Kennaway * as the author of the parts of the library used. 2074664626SKris Kennaway * This can be in the form of a textual message at program startup or 2174664626SKris Kennaway * in documentation (online or textual) provided with the package. 2274664626SKris Kennaway * 2374664626SKris Kennaway * Redistribution and use in source and binary forms, with or without 2474664626SKris Kennaway * modification, are permitted provided that the following conditions 2574664626SKris Kennaway * are met: 2674664626SKris Kennaway * 1. Redistributions of source code must retain the copyright 2774664626SKris Kennaway * notice, this list of conditions and the following disclaimer. 2874664626SKris Kennaway * 2. Redistributions in binary form must reproduce the above copyright 2974664626SKris Kennaway * notice, this list of conditions and the following disclaimer in the 3074664626SKris Kennaway * documentation and/or other materials provided with the distribution. 3174664626SKris Kennaway * 3. All advertising materials mentioning features or use of this software 3274664626SKris Kennaway * must display the following acknowledgement: 3374664626SKris Kennaway * "This product includes cryptographic software written by 3474664626SKris Kennaway * Eric Young (eay@cryptsoft.com)" 3574664626SKris Kennaway * The word 'cryptographic' can be left out if the rouines from the library 3674664626SKris Kennaway * being used are not cryptographic related :-). 3774664626SKris Kennaway * 4. If you include any Windows specific code (or a derivative thereof) from 3874664626SKris Kennaway * the apps directory (application code) you must include an acknowledgement: 3974664626SKris Kennaway * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 4074664626SKris Kennaway * 4174664626SKris Kennaway * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 4274664626SKris Kennaway * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 4374664626SKris Kennaway * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4474664626SKris Kennaway * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 4574664626SKris Kennaway * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4674664626SKris Kennaway * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 4774664626SKris Kennaway * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 4874664626SKris Kennaway * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 4974664626SKris Kennaway * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 5074664626SKris Kennaway * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 5174664626SKris Kennaway * SUCH DAMAGE. 5274664626SKris Kennaway * 5374664626SKris Kennaway * The licence and distribution terms for any publically available version or 5474664626SKris Kennaway * derivative of this code cannot be changed. i.e. this code cannot simply be 5574664626SKris Kennaway * copied and put under another distribution licence 5674664626SKris Kennaway * [including the GNU Public Licence.] 5774664626SKris Kennaway */ 5874664626SKris Kennaway 59f579bf8eSKris Kennaway #ifndef BN_DEBUG 60f579bf8eSKris Kennaway # undef NDEBUG /* avoid conflicting definitions */ 61f579bf8eSKris Kennaway # define NDEBUG 62f579bf8eSKris Kennaway #endif 63f579bf8eSKris Kennaway 6474664626SKris Kennaway #include <stdio.h> 65f579bf8eSKris Kennaway #include <assert.h> 6674664626SKris Kennaway #include "cryptlib.h" 6774664626SKris Kennaway #include "bn_lcl.h" 6874664626SKris Kennaway 69f579bf8eSKris Kennaway #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) 7074664626SKris Kennaway 715c87c606SMark Murray BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 7274664626SKris Kennaway { 7374664626SKris Kennaway BN_ULONG c1=0; 7474664626SKris Kennaway 75f579bf8eSKris Kennaway assert(num >= 0); 7674664626SKris Kennaway if (num <= 0) return(c1); 7774664626SKris Kennaway 781f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 79f579bf8eSKris Kennaway while (num&~3) 8074664626SKris Kennaway { 8174664626SKris Kennaway mul_add(rp[0],ap[0],w,c1); 8274664626SKris Kennaway mul_add(rp[1],ap[1],w,c1); 8374664626SKris Kennaway mul_add(rp[2],ap[2],w,c1); 8474664626SKris Kennaway mul_add(rp[3],ap[3],w,c1); 85f579bf8eSKris Kennaway ap+=4; rp+=4; num-=4; 86f579bf8eSKris Kennaway } 871f13597dSJung-uk Kim #endif 881f13597dSJung-uk Kim while (num) 89f579bf8eSKris Kennaway { 901f13597dSJung-uk Kim mul_add(rp[0],ap[0],w,c1); 911f13597dSJung-uk Kim ap++; rp++; num--; 9274664626SKris Kennaway } 9374664626SKris Kennaway 9474664626SKris Kennaway return(c1); 9574664626SKris Kennaway } 9674664626SKris Kennaway 975c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 9874664626SKris Kennaway { 9974664626SKris Kennaway BN_ULONG c1=0; 10074664626SKris Kennaway 101f579bf8eSKris Kennaway assert(num >= 0); 10274664626SKris Kennaway if (num <= 0) return(c1); 10374664626SKris Kennaway 1041f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 105f579bf8eSKris Kennaway while (num&~3) 10674664626SKris Kennaway { 10774664626SKris Kennaway mul(rp[0],ap[0],w,c1); 10874664626SKris Kennaway mul(rp[1],ap[1],w,c1); 10974664626SKris Kennaway mul(rp[2],ap[2],w,c1); 11074664626SKris Kennaway mul(rp[3],ap[3],w,c1); 111f579bf8eSKris Kennaway ap+=4; rp+=4; num-=4; 112f579bf8eSKris Kennaway } 1131f13597dSJung-uk Kim #endif 1141f13597dSJung-uk Kim while (num) 115f579bf8eSKris Kennaway { 1161f13597dSJung-uk Kim mul(rp[0],ap[0],w,c1); 1171f13597dSJung-uk Kim ap++; rp++; num--; 11874664626SKris Kennaway } 11974664626SKris Kennaway return(c1); 12074664626SKris Kennaway } 12174664626SKris Kennaway 1225c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) 12374664626SKris Kennaway { 124f579bf8eSKris Kennaway assert(n >= 0); 12574664626SKris Kennaway if (n <= 0) return; 1261f13597dSJung-uk Kim 1271f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 128f579bf8eSKris Kennaway while (n&~3) 12974664626SKris Kennaway { 130f579bf8eSKris Kennaway sqr(r[0],r[1],a[0]); 131f579bf8eSKris Kennaway sqr(r[2],r[3],a[1]); 132f579bf8eSKris Kennaway sqr(r[4],r[5],a[2]); 133f579bf8eSKris Kennaway sqr(r[6],r[7],a[3]); 134f579bf8eSKris Kennaway a+=4; r+=8; n-=4; 135f579bf8eSKris Kennaway } 1361f13597dSJung-uk Kim #endif 1371f13597dSJung-uk Kim while (n) 138f579bf8eSKris Kennaway { 1391f13597dSJung-uk Kim sqr(r[0],r[1],a[0]); 1401f13597dSJung-uk Kim a++; r+=2; n--; 14174664626SKris Kennaway } 14274664626SKris Kennaway } 14374664626SKris Kennaway 144f579bf8eSKris Kennaway #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 14574664626SKris Kennaway 1465c87c606SMark Murray BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 14774664626SKris Kennaway { 14874664626SKris Kennaway BN_ULONG c=0; 14974664626SKris Kennaway BN_ULONG bl,bh; 15074664626SKris Kennaway 151f579bf8eSKris Kennaway assert(num >= 0); 15274664626SKris Kennaway if (num <= 0) return((BN_ULONG)0); 15374664626SKris Kennaway 15474664626SKris Kennaway bl=LBITS(w); 15574664626SKris Kennaway bh=HBITS(w); 15674664626SKris Kennaway 1571f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 1581f13597dSJung-uk Kim while (num&~3) 15974664626SKris Kennaway { 16074664626SKris Kennaway mul_add(rp[0],ap[0],bl,bh,c); 16174664626SKris Kennaway mul_add(rp[1],ap[1],bl,bh,c); 16274664626SKris Kennaway mul_add(rp[2],ap[2],bl,bh,c); 16374664626SKris Kennaway mul_add(rp[3],ap[3],bl,bh,c); 1641f13597dSJung-uk Kim ap+=4; rp+=4; num-=4; 1651f13597dSJung-uk Kim } 1661f13597dSJung-uk Kim #endif 1671f13597dSJung-uk Kim while (num) 1681f13597dSJung-uk Kim { 1691f13597dSJung-uk Kim mul_add(rp[0],ap[0],bl,bh,c); 1701f13597dSJung-uk Kim ap++; rp++; num--; 17174664626SKris Kennaway } 17274664626SKris Kennaway return(c); 17374664626SKris Kennaway } 17474664626SKris Kennaway 1755c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) 17674664626SKris Kennaway { 17774664626SKris Kennaway BN_ULONG carry=0; 17874664626SKris Kennaway BN_ULONG bl,bh; 17974664626SKris Kennaway 180f579bf8eSKris Kennaway assert(num >= 0); 18174664626SKris Kennaway if (num <= 0) return((BN_ULONG)0); 18274664626SKris Kennaway 18374664626SKris Kennaway bl=LBITS(w); 18474664626SKris Kennaway bh=HBITS(w); 18574664626SKris Kennaway 1861f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 1871f13597dSJung-uk Kim while (num&~3) 18874664626SKris Kennaway { 18974664626SKris Kennaway mul(rp[0],ap[0],bl,bh,carry); 19074664626SKris Kennaway mul(rp[1],ap[1],bl,bh,carry); 19174664626SKris Kennaway mul(rp[2],ap[2],bl,bh,carry); 19274664626SKris Kennaway mul(rp[3],ap[3],bl,bh,carry); 1931f13597dSJung-uk Kim ap+=4; rp+=4; num-=4; 1941f13597dSJung-uk Kim } 1951f13597dSJung-uk Kim #endif 1961f13597dSJung-uk Kim while (num) 1971f13597dSJung-uk Kim { 1981f13597dSJung-uk Kim mul(rp[0],ap[0],bl,bh,carry); 1991f13597dSJung-uk Kim ap++; rp++; num--; 20074664626SKris Kennaway } 20174664626SKris Kennaway return(carry); 20274664626SKris Kennaway } 20374664626SKris Kennaway 2045c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) 20574664626SKris Kennaway { 206f579bf8eSKris Kennaway assert(n >= 0); 20774664626SKris Kennaway if (n <= 0) return; 2081f13597dSJung-uk Kim 2091f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 2101f13597dSJung-uk Kim while (n&~3) 21174664626SKris Kennaway { 21274664626SKris Kennaway sqr64(r[0],r[1],a[0]); 21374664626SKris Kennaway sqr64(r[2],r[3],a[1]); 21474664626SKris Kennaway sqr64(r[4],r[5],a[2]); 21574664626SKris Kennaway sqr64(r[6],r[7],a[3]); 2161f13597dSJung-uk Kim a+=4; r+=8; n-=4; 2171f13597dSJung-uk Kim } 2181f13597dSJung-uk Kim #endif 2191f13597dSJung-uk Kim while (n) 2201f13597dSJung-uk Kim { 2211f13597dSJung-uk Kim sqr64(r[0],r[1],a[0]); 2221f13597dSJung-uk Kim a++; r+=2; n--; 22374664626SKris Kennaway } 22474664626SKris Kennaway } 22574664626SKris Kennaway 226f579bf8eSKris Kennaway #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ 22774664626SKris Kennaway 22874664626SKris Kennaway #if defined(BN_LLONG) && defined(BN_DIV2W) 22974664626SKris Kennaway 23074664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) 23174664626SKris Kennaway { 23274664626SKris Kennaway return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d)); 23374664626SKris Kennaway } 23474664626SKris Kennaway 23574664626SKris Kennaway #else 23674664626SKris Kennaway 237ddd58736SKris Kennaway /* Divide h,l by d and return the result. */ 23874664626SKris Kennaway /* I need to test this some more :-( */ 23974664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) 24074664626SKris Kennaway { 24174664626SKris Kennaway BN_ULONG dh,dl,q,ret=0,th,tl,t; 24274664626SKris Kennaway int i,count=2; 24374664626SKris Kennaway 24474664626SKris Kennaway if (d == 0) return(BN_MASK2); 24574664626SKris Kennaway 24674664626SKris Kennaway i=BN_num_bits_word(d); 2473b4e3dcbSSimon L. B. Nielsen assert((i == BN_BITS2) || (h <= (BN_ULONG)1<<i)); 248ddd58736SKris Kennaway 24974664626SKris Kennaway i=BN_BITS2-i; 25074664626SKris Kennaway if (h >= d) h-=d; 25174664626SKris Kennaway 25274664626SKris Kennaway if (i) 25374664626SKris Kennaway { 25474664626SKris Kennaway d<<=i; 25574664626SKris Kennaway h=(h<<i)|(l>>(BN_BITS2-i)); 25674664626SKris Kennaway l<<=i; 25774664626SKris Kennaway } 25874664626SKris Kennaway dh=(d&BN_MASK2h)>>BN_BITS4; 25974664626SKris Kennaway dl=(d&BN_MASK2l); 26074664626SKris Kennaway for (;;) 26174664626SKris Kennaway { 26274664626SKris Kennaway if ((h>>BN_BITS4) == dh) 26374664626SKris Kennaway q=BN_MASK2l; 26474664626SKris Kennaway else 26574664626SKris Kennaway q=h/dh; 26674664626SKris Kennaway 26774664626SKris Kennaway th=q*dh; 26874664626SKris Kennaway tl=dl*q; 26974664626SKris Kennaway for (;;) 27074664626SKris Kennaway { 27174664626SKris Kennaway t=h-th; 27274664626SKris Kennaway if ((t&BN_MASK2h) || 27374664626SKris Kennaway ((tl) <= ( 27474664626SKris Kennaway (t<<BN_BITS4)| 27574664626SKris Kennaway ((l&BN_MASK2h)>>BN_BITS4)))) 27674664626SKris Kennaway break; 27774664626SKris Kennaway q--; 27874664626SKris Kennaway th-=dh; 27974664626SKris Kennaway tl-=dl; 28074664626SKris Kennaway } 28174664626SKris Kennaway t=(tl>>BN_BITS4); 28274664626SKris Kennaway tl=(tl<<BN_BITS4)&BN_MASK2h; 28374664626SKris Kennaway th+=t; 28474664626SKris Kennaway 28574664626SKris Kennaway if (l < tl) th++; 28674664626SKris Kennaway l-=tl; 28774664626SKris Kennaway if (h < th) 28874664626SKris Kennaway { 28974664626SKris Kennaway h+=d; 29074664626SKris Kennaway q--; 29174664626SKris Kennaway } 29274664626SKris Kennaway h-=th; 29374664626SKris Kennaway 29474664626SKris Kennaway if (--count == 0) break; 29574664626SKris Kennaway 29674664626SKris Kennaway ret=q<<BN_BITS4; 29774664626SKris Kennaway h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2; 29874664626SKris Kennaway l=(l&BN_MASK2l)<<BN_BITS4; 29974664626SKris Kennaway } 30074664626SKris Kennaway ret|=q; 30174664626SKris Kennaway return(ret); 30274664626SKris Kennaway } 303f579bf8eSKris Kennaway #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */ 30474664626SKris Kennaway 30574664626SKris Kennaway #ifdef BN_LLONG 3065c87c606SMark Murray BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) 30774664626SKris Kennaway { 30874664626SKris Kennaway BN_ULLONG ll=0; 30974664626SKris Kennaway 310f579bf8eSKris Kennaway assert(n >= 0); 31174664626SKris Kennaway if (n <= 0) return((BN_ULONG)0); 31274664626SKris Kennaway 3131f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 3141f13597dSJung-uk Kim while (n&~3) 31574664626SKris Kennaway { 31674664626SKris Kennaway ll+=(BN_ULLONG)a[0]+b[0]; 31774664626SKris Kennaway r[0]=(BN_ULONG)ll&BN_MASK2; 31874664626SKris Kennaway ll>>=BN_BITS2; 31974664626SKris Kennaway ll+=(BN_ULLONG)a[1]+b[1]; 32074664626SKris Kennaway r[1]=(BN_ULONG)ll&BN_MASK2; 32174664626SKris Kennaway ll>>=BN_BITS2; 32274664626SKris Kennaway ll+=(BN_ULLONG)a[2]+b[2]; 32374664626SKris Kennaway r[2]=(BN_ULONG)ll&BN_MASK2; 32474664626SKris Kennaway ll>>=BN_BITS2; 32574664626SKris Kennaway ll+=(BN_ULLONG)a[3]+b[3]; 32674664626SKris Kennaway r[3]=(BN_ULONG)ll&BN_MASK2; 32774664626SKris Kennaway ll>>=BN_BITS2; 3281f13597dSJung-uk Kim a+=4; b+=4; r+=4; n-=4; 3291f13597dSJung-uk Kim } 3301f13597dSJung-uk Kim #endif 3311f13597dSJung-uk Kim while (n) 3321f13597dSJung-uk Kim { 3331f13597dSJung-uk Kim ll+=(BN_ULLONG)a[0]+b[0]; 3341f13597dSJung-uk Kim r[0]=(BN_ULONG)ll&BN_MASK2; 3351f13597dSJung-uk Kim ll>>=BN_BITS2; 3361f13597dSJung-uk Kim a++; b++; r++; n--; 33774664626SKris Kennaway } 33874664626SKris Kennaway return((BN_ULONG)ll); 33974664626SKris Kennaway } 340f579bf8eSKris Kennaway #else /* !BN_LLONG */ 3415c87c606SMark Murray BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) 34274664626SKris Kennaway { 34374664626SKris Kennaway BN_ULONG c,l,t; 34474664626SKris Kennaway 345f579bf8eSKris Kennaway assert(n >= 0); 34674664626SKris Kennaway if (n <= 0) return((BN_ULONG)0); 34774664626SKris Kennaway 34874664626SKris Kennaway c=0; 3491f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 3501f13597dSJung-uk Kim while (n&~3) 35174664626SKris Kennaway { 35274664626SKris Kennaway t=a[0]; 35374664626SKris Kennaway t=(t+c)&BN_MASK2; 35474664626SKris Kennaway c=(t < c); 35574664626SKris Kennaway l=(t+b[0])&BN_MASK2; 35674664626SKris Kennaway c+=(l < t); 35774664626SKris Kennaway r[0]=l; 35874664626SKris Kennaway t=a[1]; 35974664626SKris Kennaway t=(t+c)&BN_MASK2; 36074664626SKris Kennaway c=(t < c); 36174664626SKris Kennaway l=(t+b[1])&BN_MASK2; 36274664626SKris Kennaway c+=(l < t); 36374664626SKris Kennaway r[1]=l; 36474664626SKris Kennaway t=a[2]; 36574664626SKris Kennaway t=(t+c)&BN_MASK2; 36674664626SKris Kennaway c=(t < c); 36774664626SKris Kennaway l=(t+b[2])&BN_MASK2; 36874664626SKris Kennaway c+=(l < t); 36974664626SKris Kennaway r[2]=l; 37074664626SKris Kennaway t=a[3]; 37174664626SKris Kennaway t=(t+c)&BN_MASK2; 37274664626SKris Kennaway c=(t < c); 37374664626SKris Kennaway l=(t+b[3])&BN_MASK2; 37474664626SKris Kennaway c+=(l < t); 37574664626SKris Kennaway r[3]=l; 3761f13597dSJung-uk Kim a+=4; b+=4; r+=4; n-=4; 3771f13597dSJung-uk Kim } 3781f13597dSJung-uk Kim #endif 3791f13597dSJung-uk Kim while(n) 3801f13597dSJung-uk Kim { 3811f13597dSJung-uk Kim t=a[0]; 3821f13597dSJung-uk Kim t=(t+c)&BN_MASK2; 3831f13597dSJung-uk Kim c=(t < c); 3841f13597dSJung-uk Kim l=(t+b[0])&BN_MASK2; 3851f13597dSJung-uk Kim c+=(l < t); 3861f13597dSJung-uk Kim r[0]=l; 3871f13597dSJung-uk Kim a++; b++; r++; n--; 38874664626SKris Kennaway } 38974664626SKris Kennaway return((BN_ULONG)c); 39074664626SKris Kennaway } 391f579bf8eSKris Kennaway #endif /* !BN_LLONG */ 39274664626SKris Kennaway 3935c87c606SMark Murray BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) 39474664626SKris Kennaway { 39574664626SKris Kennaway BN_ULONG t1,t2; 39674664626SKris Kennaway int c=0; 39774664626SKris Kennaway 398f579bf8eSKris Kennaway assert(n >= 0); 39974664626SKris Kennaway if (n <= 0) return((BN_ULONG)0); 40074664626SKris Kennaway 4011f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT 4021f13597dSJung-uk Kim while (n&~3) 40374664626SKris Kennaway { 40474664626SKris Kennaway t1=a[0]; t2=b[0]; 40574664626SKris Kennaway r[0]=(t1-t2-c)&BN_MASK2; 40674664626SKris Kennaway if (t1 != t2) c=(t1 < t2); 40774664626SKris Kennaway t1=a[1]; t2=b[1]; 40874664626SKris Kennaway r[1]=(t1-t2-c)&BN_MASK2; 40974664626SKris Kennaway if (t1 != t2) c=(t1 < t2); 41074664626SKris Kennaway t1=a[2]; t2=b[2]; 41174664626SKris Kennaway r[2]=(t1-t2-c)&BN_MASK2; 41274664626SKris Kennaway if (t1 != t2) c=(t1 < t2); 41374664626SKris Kennaway t1=a[3]; t2=b[3]; 41474664626SKris Kennaway r[3]=(t1-t2-c)&BN_MASK2; 41574664626SKris Kennaway if (t1 != t2) c=(t1 < t2); 4161f13597dSJung-uk Kim a+=4; b+=4; r+=4; n-=4; 4171f13597dSJung-uk Kim } 4181f13597dSJung-uk Kim #endif 4191f13597dSJung-uk Kim while (n) 4201f13597dSJung-uk Kim { 4211f13597dSJung-uk Kim t1=a[0]; t2=b[0]; 4221f13597dSJung-uk Kim r[0]=(t1-t2-c)&BN_MASK2; 4231f13597dSJung-uk Kim if (t1 != t2) c=(t1 < t2); 4241f13597dSJung-uk Kim a++; b++; r++; n--; 42574664626SKris Kennaway } 42674664626SKris Kennaway return(c); 42774664626SKris Kennaway } 42874664626SKris Kennaway 4291f13597dSJung-uk Kim #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) 43074664626SKris Kennaway 43174664626SKris Kennaway #undef bn_mul_comba8 43274664626SKris Kennaway #undef bn_mul_comba4 43374664626SKris Kennaway #undef bn_sqr_comba8 43474664626SKris Kennaway #undef bn_sqr_comba4 43574664626SKris Kennaway 436f579bf8eSKris Kennaway /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ 437f579bf8eSKris Kennaway /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ 438f579bf8eSKris Kennaway /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ 439f579bf8eSKris Kennaway /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 440f579bf8eSKris Kennaway 44174664626SKris Kennaway #ifdef BN_LLONG 44274664626SKris Kennaway #define mul_add_c(a,b,c0,c1,c2) \ 44374664626SKris Kennaway t=(BN_ULLONG)a*b; \ 44474664626SKris Kennaway t1=(BN_ULONG)Lw(t); \ 44574664626SKris Kennaway t2=(BN_ULONG)Hw(t); \ 44674664626SKris Kennaway c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 44774664626SKris Kennaway c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 44874664626SKris Kennaway 44974664626SKris Kennaway #define mul_add_c2(a,b,c0,c1,c2) \ 45074664626SKris Kennaway t=(BN_ULLONG)a*b; \ 45174664626SKris Kennaway tt=(t+t)&BN_MASK; \ 45274664626SKris Kennaway if (tt < t) c2++; \ 45374664626SKris Kennaway t1=(BN_ULONG)Lw(tt); \ 45474664626SKris Kennaway t2=(BN_ULONG)Hw(tt); \ 45574664626SKris Kennaway c0=(c0+t1)&BN_MASK2; \ 45674664626SKris Kennaway if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ 45774664626SKris Kennaway c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 45874664626SKris Kennaway 45974664626SKris Kennaway #define sqr_add_c(a,i,c0,c1,c2) \ 46074664626SKris Kennaway t=(BN_ULLONG)a[i]*a[i]; \ 46174664626SKris Kennaway t1=(BN_ULONG)Lw(t); \ 46274664626SKris Kennaway t2=(BN_ULONG)Hw(t); \ 46374664626SKris Kennaway c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 46474664626SKris Kennaway c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 46574664626SKris Kennaway 46674664626SKris Kennaway #define sqr_add_c2(a,i,j,c0,c1,c2) \ 46774664626SKris Kennaway mul_add_c2((a)[i],(a)[j],c0,c1,c2) 468f579bf8eSKris Kennaway 4693b4e3dcbSSimon L. B. Nielsen #elif defined(BN_UMULT_LOHI) 4703b4e3dcbSSimon L. B. Nielsen 4713b4e3dcbSSimon L. B. Nielsen #define mul_add_c(a,b,c0,c1,c2) { \ 4723b4e3dcbSSimon L. B. Nielsen BN_ULONG ta=(a),tb=(b); \ 4733b4e3dcbSSimon L. B. Nielsen BN_UMULT_LOHI(t1,t2,ta,tb); \ 4743b4e3dcbSSimon L. B. Nielsen c0 += t1; t2 += (c0<t1)?1:0; \ 4753b4e3dcbSSimon L. B. Nielsen c1 += t2; c2 += (c1<t2)?1:0; \ 4763b4e3dcbSSimon L. B. Nielsen } 4773b4e3dcbSSimon L. B. Nielsen 4783b4e3dcbSSimon L. B. Nielsen #define mul_add_c2(a,b,c0,c1,c2) { \ 4793b4e3dcbSSimon L. B. Nielsen BN_ULONG ta=(a),tb=(b),t0; \ 4803b4e3dcbSSimon L. B. Nielsen BN_UMULT_LOHI(t0,t1,ta,tb); \ 4813b4e3dcbSSimon L. B. Nielsen t2 = t1+t1; c2 += (t2<t1)?1:0; \ 4823b4e3dcbSSimon L. B. Nielsen t1 = t0+t0; t2 += (t1<t0)?1:0; \ 4833b4e3dcbSSimon L. B. Nielsen c0 += t1; t2 += (c0<t1)?1:0; \ 4843b4e3dcbSSimon L. B. Nielsen c1 += t2; c2 += (c1<t2)?1:0; \ 4853b4e3dcbSSimon L. B. Nielsen } 4863b4e3dcbSSimon L. B. Nielsen 4873b4e3dcbSSimon L. B. Nielsen #define sqr_add_c(a,i,c0,c1,c2) { \ 4883b4e3dcbSSimon L. B. Nielsen BN_ULONG ta=(a)[i]; \ 4893b4e3dcbSSimon L. B. Nielsen BN_UMULT_LOHI(t1,t2,ta,ta); \ 4903b4e3dcbSSimon L. B. Nielsen c0 += t1; t2 += (c0<t1)?1:0; \ 4913b4e3dcbSSimon L. B. Nielsen c1 += t2; c2 += (c1<t2)?1:0; \ 4923b4e3dcbSSimon L. B. Nielsen } 4933b4e3dcbSSimon L. B. Nielsen 4943b4e3dcbSSimon L. B. Nielsen #define sqr_add_c2(a,i,j,c0,c1,c2) \ 4953b4e3dcbSSimon L. B. Nielsen mul_add_c2((a)[i],(a)[j],c0,c1,c2) 4963b4e3dcbSSimon L. B. Nielsen 497f579bf8eSKris Kennaway #elif defined(BN_UMULT_HIGH) 498f579bf8eSKris Kennaway 499f579bf8eSKris Kennaway #define mul_add_c(a,b,c0,c1,c2) { \ 500f579bf8eSKris Kennaway BN_ULONG ta=(a),tb=(b); \ 501f579bf8eSKris Kennaway t1 = ta * tb; \ 502f579bf8eSKris Kennaway t2 = BN_UMULT_HIGH(ta,tb); \ 503f579bf8eSKris Kennaway c0 += t1; t2 += (c0<t1)?1:0; \ 504f579bf8eSKris Kennaway c1 += t2; c2 += (c1<t2)?1:0; \ 505f579bf8eSKris Kennaway } 506f579bf8eSKris Kennaway 507f579bf8eSKris Kennaway #define mul_add_c2(a,b,c0,c1,c2) { \ 508f579bf8eSKris Kennaway BN_ULONG ta=(a),tb=(b),t0; \ 509f579bf8eSKris Kennaway t1 = BN_UMULT_HIGH(ta,tb); \ 510f579bf8eSKris Kennaway t0 = ta * tb; \ 511f579bf8eSKris Kennaway t2 = t1+t1; c2 += (t2<t1)?1:0; \ 512f579bf8eSKris Kennaway t1 = t0+t0; t2 += (t1<t0)?1:0; \ 513f579bf8eSKris Kennaway c0 += t1; t2 += (c0<t1)?1:0; \ 514f579bf8eSKris Kennaway c1 += t2; c2 += (c1<t2)?1:0; \ 515f579bf8eSKris Kennaway } 516f579bf8eSKris Kennaway 517f579bf8eSKris Kennaway #define sqr_add_c(a,i,c0,c1,c2) { \ 518f579bf8eSKris Kennaway BN_ULONG ta=(a)[i]; \ 519f579bf8eSKris Kennaway t1 = ta * ta; \ 520f579bf8eSKris Kennaway t2 = BN_UMULT_HIGH(ta,ta); \ 521f579bf8eSKris Kennaway c0 += t1; t2 += (c0<t1)?1:0; \ 522f579bf8eSKris Kennaway c1 += t2; c2 += (c1<t2)?1:0; \ 523f579bf8eSKris Kennaway } 524f579bf8eSKris Kennaway 525f579bf8eSKris Kennaway #define sqr_add_c2(a,i,j,c0,c1,c2) \ 526f579bf8eSKris Kennaway mul_add_c2((a)[i],(a)[j],c0,c1,c2) 527f579bf8eSKris Kennaway 528f579bf8eSKris Kennaway #else /* !BN_LLONG */ 52974664626SKris Kennaway #define mul_add_c(a,b,c0,c1,c2) \ 53074664626SKris Kennaway t1=LBITS(a); t2=HBITS(a); \ 53174664626SKris Kennaway bl=LBITS(b); bh=HBITS(b); \ 53274664626SKris Kennaway mul64(t1,t2,bl,bh); \ 53374664626SKris Kennaway c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 53474664626SKris Kennaway c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 53574664626SKris Kennaway 53674664626SKris Kennaway #define mul_add_c2(a,b,c0,c1,c2) \ 53774664626SKris Kennaway t1=LBITS(a); t2=HBITS(a); \ 53874664626SKris Kennaway bl=LBITS(b); bh=HBITS(b); \ 53974664626SKris Kennaway mul64(t1,t2,bl,bh); \ 54074664626SKris Kennaway if (t2 & BN_TBIT) c2++; \ 54174664626SKris Kennaway t2=(t2+t2)&BN_MASK2; \ 54274664626SKris Kennaway if (t1 & BN_TBIT) t2++; \ 54374664626SKris Kennaway t1=(t1+t1)&BN_MASK2; \ 54474664626SKris Kennaway c0=(c0+t1)&BN_MASK2; \ 54574664626SKris Kennaway if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \ 54674664626SKris Kennaway c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 54774664626SKris Kennaway 54874664626SKris Kennaway #define sqr_add_c(a,i,c0,c1,c2) \ 54974664626SKris Kennaway sqr64(t1,t2,(a)[i]); \ 55074664626SKris Kennaway c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \ 55174664626SKris Kennaway c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++; 55274664626SKris Kennaway 55374664626SKris Kennaway #define sqr_add_c2(a,i,j,c0,c1,c2) \ 55474664626SKris Kennaway mul_add_c2((a)[i],(a)[j],c0,c1,c2) 555f579bf8eSKris Kennaway #endif /* !BN_LLONG */ 55674664626SKris Kennaway 55774664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 55874664626SKris Kennaway { 55974664626SKris Kennaway #ifdef BN_LLONG 56074664626SKris Kennaway BN_ULLONG t; 56174664626SKris Kennaway #else 56274664626SKris Kennaway BN_ULONG bl,bh; 56374664626SKris Kennaway #endif 56474664626SKris Kennaway BN_ULONG t1,t2; 56574664626SKris Kennaway BN_ULONG c1,c2,c3; 56674664626SKris Kennaway 56774664626SKris Kennaway c1=0; 56874664626SKris Kennaway c2=0; 56974664626SKris Kennaway c3=0; 57074664626SKris Kennaway mul_add_c(a[0],b[0],c1,c2,c3); 57174664626SKris Kennaway r[0]=c1; 57274664626SKris Kennaway c1=0; 57374664626SKris Kennaway mul_add_c(a[0],b[1],c2,c3,c1); 57474664626SKris Kennaway mul_add_c(a[1],b[0],c2,c3,c1); 57574664626SKris Kennaway r[1]=c2; 57674664626SKris Kennaway c2=0; 57774664626SKris Kennaway mul_add_c(a[2],b[0],c3,c1,c2); 57874664626SKris Kennaway mul_add_c(a[1],b[1],c3,c1,c2); 57974664626SKris Kennaway mul_add_c(a[0],b[2],c3,c1,c2); 58074664626SKris Kennaway r[2]=c3; 58174664626SKris Kennaway c3=0; 58274664626SKris Kennaway mul_add_c(a[0],b[3],c1,c2,c3); 58374664626SKris Kennaway mul_add_c(a[1],b[2],c1,c2,c3); 58474664626SKris Kennaway mul_add_c(a[2],b[1],c1,c2,c3); 58574664626SKris Kennaway mul_add_c(a[3],b[0],c1,c2,c3); 58674664626SKris Kennaway r[3]=c1; 58774664626SKris Kennaway c1=0; 58874664626SKris Kennaway mul_add_c(a[4],b[0],c2,c3,c1); 58974664626SKris Kennaway mul_add_c(a[3],b[1],c2,c3,c1); 59074664626SKris Kennaway mul_add_c(a[2],b[2],c2,c3,c1); 59174664626SKris Kennaway mul_add_c(a[1],b[3],c2,c3,c1); 59274664626SKris Kennaway mul_add_c(a[0],b[4],c2,c3,c1); 59374664626SKris Kennaway r[4]=c2; 59474664626SKris Kennaway c2=0; 59574664626SKris Kennaway mul_add_c(a[0],b[5],c3,c1,c2); 59674664626SKris Kennaway mul_add_c(a[1],b[4],c3,c1,c2); 59774664626SKris Kennaway mul_add_c(a[2],b[3],c3,c1,c2); 59874664626SKris Kennaway mul_add_c(a[3],b[2],c3,c1,c2); 59974664626SKris Kennaway mul_add_c(a[4],b[1],c3,c1,c2); 60074664626SKris Kennaway mul_add_c(a[5],b[0],c3,c1,c2); 60174664626SKris Kennaway r[5]=c3; 60274664626SKris Kennaway c3=0; 60374664626SKris Kennaway mul_add_c(a[6],b[0],c1,c2,c3); 60474664626SKris Kennaway mul_add_c(a[5],b[1],c1,c2,c3); 60574664626SKris Kennaway mul_add_c(a[4],b[2],c1,c2,c3); 60674664626SKris Kennaway mul_add_c(a[3],b[3],c1,c2,c3); 60774664626SKris Kennaway mul_add_c(a[2],b[4],c1,c2,c3); 60874664626SKris Kennaway mul_add_c(a[1],b[5],c1,c2,c3); 60974664626SKris Kennaway mul_add_c(a[0],b[6],c1,c2,c3); 61074664626SKris Kennaway r[6]=c1; 61174664626SKris Kennaway c1=0; 61274664626SKris Kennaway mul_add_c(a[0],b[7],c2,c3,c1); 61374664626SKris Kennaway mul_add_c(a[1],b[6],c2,c3,c1); 61474664626SKris Kennaway mul_add_c(a[2],b[5],c2,c3,c1); 61574664626SKris Kennaway mul_add_c(a[3],b[4],c2,c3,c1); 61674664626SKris Kennaway mul_add_c(a[4],b[3],c2,c3,c1); 61774664626SKris Kennaway mul_add_c(a[5],b[2],c2,c3,c1); 61874664626SKris Kennaway mul_add_c(a[6],b[1],c2,c3,c1); 61974664626SKris Kennaway mul_add_c(a[7],b[0],c2,c3,c1); 62074664626SKris Kennaway r[7]=c2; 62174664626SKris Kennaway c2=0; 62274664626SKris Kennaway mul_add_c(a[7],b[1],c3,c1,c2); 62374664626SKris Kennaway mul_add_c(a[6],b[2],c3,c1,c2); 62474664626SKris Kennaway mul_add_c(a[5],b[3],c3,c1,c2); 62574664626SKris Kennaway mul_add_c(a[4],b[4],c3,c1,c2); 62674664626SKris Kennaway mul_add_c(a[3],b[5],c3,c1,c2); 62774664626SKris Kennaway mul_add_c(a[2],b[6],c3,c1,c2); 62874664626SKris Kennaway mul_add_c(a[1],b[7],c3,c1,c2); 62974664626SKris Kennaway r[8]=c3; 63074664626SKris Kennaway c3=0; 63174664626SKris Kennaway mul_add_c(a[2],b[7],c1,c2,c3); 63274664626SKris Kennaway mul_add_c(a[3],b[6],c1,c2,c3); 63374664626SKris Kennaway mul_add_c(a[4],b[5],c1,c2,c3); 63474664626SKris Kennaway mul_add_c(a[5],b[4],c1,c2,c3); 63574664626SKris Kennaway mul_add_c(a[6],b[3],c1,c2,c3); 63674664626SKris Kennaway mul_add_c(a[7],b[2],c1,c2,c3); 63774664626SKris Kennaway r[9]=c1; 63874664626SKris Kennaway c1=0; 63974664626SKris Kennaway mul_add_c(a[7],b[3],c2,c3,c1); 64074664626SKris Kennaway mul_add_c(a[6],b[4],c2,c3,c1); 64174664626SKris Kennaway mul_add_c(a[5],b[5],c2,c3,c1); 64274664626SKris Kennaway mul_add_c(a[4],b[6],c2,c3,c1); 64374664626SKris Kennaway mul_add_c(a[3],b[7],c2,c3,c1); 64474664626SKris Kennaway r[10]=c2; 64574664626SKris Kennaway c2=0; 64674664626SKris Kennaway mul_add_c(a[4],b[7],c3,c1,c2); 64774664626SKris Kennaway mul_add_c(a[5],b[6],c3,c1,c2); 64874664626SKris Kennaway mul_add_c(a[6],b[5],c3,c1,c2); 64974664626SKris Kennaway mul_add_c(a[7],b[4],c3,c1,c2); 65074664626SKris Kennaway r[11]=c3; 65174664626SKris Kennaway c3=0; 65274664626SKris Kennaway mul_add_c(a[7],b[5],c1,c2,c3); 65374664626SKris Kennaway mul_add_c(a[6],b[6],c1,c2,c3); 65474664626SKris Kennaway mul_add_c(a[5],b[7],c1,c2,c3); 65574664626SKris Kennaway r[12]=c1; 65674664626SKris Kennaway c1=0; 65774664626SKris Kennaway mul_add_c(a[6],b[7],c2,c3,c1); 65874664626SKris Kennaway mul_add_c(a[7],b[6],c2,c3,c1); 65974664626SKris Kennaway r[13]=c2; 66074664626SKris Kennaway c2=0; 66174664626SKris Kennaway mul_add_c(a[7],b[7],c3,c1,c2); 66274664626SKris Kennaway r[14]=c3; 66374664626SKris Kennaway r[15]=c1; 66474664626SKris Kennaway } 66574664626SKris Kennaway 66674664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 66774664626SKris Kennaway { 66874664626SKris Kennaway #ifdef BN_LLONG 66974664626SKris Kennaway BN_ULLONG t; 67074664626SKris Kennaway #else 67174664626SKris Kennaway BN_ULONG bl,bh; 67274664626SKris Kennaway #endif 67374664626SKris Kennaway BN_ULONG t1,t2; 67474664626SKris Kennaway BN_ULONG c1,c2,c3; 67574664626SKris Kennaway 67674664626SKris Kennaway c1=0; 67774664626SKris Kennaway c2=0; 67874664626SKris Kennaway c3=0; 67974664626SKris Kennaway mul_add_c(a[0],b[0],c1,c2,c3); 68074664626SKris Kennaway r[0]=c1; 68174664626SKris Kennaway c1=0; 68274664626SKris Kennaway mul_add_c(a[0],b[1],c2,c3,c1); 68374664626SKris Kennaway mul_add_c(a[1],b[0],c2,c3,c1); 68474664626SKris Kennaway r[1]=c2; 68574664626SKris Kennaway c2=0; 68674664626SKris Kennaway mul_add_c(a[2],b[0],c3,c1,c2); 68774664626SKris Kennaway mul_add_c(a[1],b[1],c3,c1,c2); 68874664626SKris Kennaway mul_add_c(a[0],b[2],c3,c1,c2); 68974664626SKris Kennaway r[2]=c3; 69074664626SKris Kennaway c3=0; 69174664626SKris Kennaway mul_add_c(a[0],b[3],c1,c2,c3); 69274664626SKris Kennaway mul_add_c(a[1],b[2],c1,c2,c3); 69374664626SKris Kennaway mul_add_c(a[2],b[1],c1,c2,c3); 69474664626SKris Kennaway mul_add_c(a[3],b[0],c1,c2,c3); 69574664626SKris Kennaway r[3]=c1; 69674664626SKris Kennaway c1=0; 69774664626SKris Kennaway mul_add_c(a[3],b[1],c2,c3,c1); 69874664626SKris Kennaway mul_add_c(a[2],b[2],c2,c3,c1); 69974664626SKris Kennaway mul_add_c(a[1],b[3],c2,c3,c1); 70074664626SKris Kennaway r[4]=c2; 70174664626SKris Kennaway c2=0; 70274664626SKris Kennaway mul_add_c(a[2],b[3],c3,c1,c2); 70374664626SKris Kennaway mul_add_c(a[3],b[2],c3,c1,c2); 70474664626SKris Kennaway r[5]=c3; 70574664626SKris Kennaway c3=0; 70674664626SKris Kennaway mul_add_c(a[3],b[3],c1,c2,c3); 70774664626SKris Kennaway r[6]=c1; 70874664626SKris Kennaway r[7]=c2; 70974664626SKris Kennaway } 71074664626SKris Kennaway 7115c87c606SMark Murray void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) 71274664626SKris Kennaway { 71374664626SKris Kennaway #ifdef BN_LLONG 71474664626SKris Kennaway BN_ULLONG t,tt; 71574664626SKris Kennaway #else 71674664626SKris Kennaway BN_ULONG bl,bh; 71774664626SKris Kennaway #endif 71874664626SKris Kennaway BN_ULONG t1,t2; 71974664626SKris Kennaway BN_ULONG c1,c2,c3; 72074664626SKris Kennaway 72174664626SKris Kennaway c1=0; 72274664626SKris Kennaway c2=0; 72374664626SKris Kennaway c3=0; 72474664626SKris Kennaway sqr_add_c(a,0,c1,c2,c3); 72574664626SKris Kennaway r[0]=c1; 72674664626SKris Kennaway c1=0; 72774664626SKris Kennaway sqr_add_c2(a,1,0,c2,c3,c1); 72874664626SKris Kennaway r[1]=c2; 72974664626SKris Kennaway c2=0; 73074664626SKris Kennaway sqr_add_c(a,1,c3,c1,c2); 73174664626SKris Kennaway sqr_add_c2(a,2,0,c3,c1,c2); 73274664626SKris Kennaway r[2]=c3; 73374664626SKris Kennaway c3=0; 73474664626SKris Kennaway sqr_add_c2(a,3,0,c1,c2,c3); 73574664626SKris Kennaway sqr_add_c2(a,2,1,c1,c2,c3); 73674664626SKris Kennaway r[3]=c1; 73774664626SKris Kennaway c1=0; 73874664626SKris Kennaway sqr_add_c(a,2,c2,c3,c1); 73974664626SKris Kennaway sqr_add_c2(a,3,1,c2,c3,c1); 74074664626SKris Kennaway sqr_add_c2(a,4,0,c2,c3,c1); 74174664626SKris Kennaway r[4]=c2; 74274664626SKris Kennaway c2=0; 74374664626SKris Kennaway sqr_add_c2(a,5,0,c3,c1,c2); 74474664626SKris Kennaway sqr_add_c2(a,4,1,c3,c1,c2); 74574664626SKris Kennaway sqr_add_c2(a,3,2,c3,c1,c2); 74674664626SKris Kennaway r[5]=c3; 74774664626SKris Kennaway c3=0; 74874664626SKris Kennaway sqr_add_c(a,3,c1,c2,c3); 74974664626SKris Kennaway sqr_add_c2(a,4,2,c1,c2,c3); 75074664626SKris Kennaway sqr_add_c2(a,5,1,c1,c2,c3); 75174664626SKris Kennaway sqr_add_c2(a,6,0,c1,c2,c3); 75274664626SKris Kennaway r[6]=c1; 75374664626SKris Kennaway c1=0; 75474664626SKris Kennaway sqr_add_c2(a,7,0,c2,c3,c1); 75574664626SKris Kennaway sqr_add_c2(a,6,1,c2,c3,c1); 75674664626SKris Kennaway sqr_add_c2(a,5,2,c2,c3,c1); 75774664626SKris Kennaway sqr_add_c2(a,4,3,c2,c3,c1); 75874664626SKris Kennaway r[7]=c2; 75974664626SKris Kennaway c2=0; 76074664626SKris Kennaway sqr_add_c(a,4,c3,c1,c2); 76174664626SKris Kennaway sqr_add_c2(a,5,3,c3,c1,c2); 76274664626SKris Kennaway sqr_add_c2(a,6,2,c3,c1,c2); 76374664626SKris Kennaway sqr_add_c2(a,7,1,c3,c1,c2); 76474664626SKris Kennaway r[8]=c3; 76574664626SKris Kennaway c3=0; 76674664626SKris Kennaway sqr_add_c2(a,7,2,c1,c2,c3); 76774664626SKris Kennaway sqr_add_c2(a,6,3,c1,c2,c3); 76874664626SKris Kennaway sqr_add_c2(a,5,4,c1,c2,c3); 76974664626SKris Kennaway r[9]=c1; 77074664626SKris Kennaway c1=0; 77174664626SKris Kennaway sqr_add_c(a,5,c2,c3,c1); 77274664626SKris Kennaway sqr_add_c2(a,6,4,c2,c3,c1); 77374664626SKris Kennaway sqr_add_c2(a,7,3,c2,c3,c1); 77474664626SKris Kennaway r[10]=c2; 77574664626SKris Kennaway c2=0; 77674664626SKris Kennaway sqr_add_c2(a,7,4,c3,c1,c2); 77774664626SKris Kennaway sqr_add_c2(a,6,5,c3,c1,c2); 77874664626SKris Kennaway r[11]=c3; 77974664626SKris Kennaway c3=0; 78074664626SKris Kennaway sqr_add_c(a,6,c1,c2,c3); 78174664626SKris Kennaway sqr_add_c2(a,7,5,c1,c2,c3); 78274664626SKris Kennaway r[12]=c1; 78374664626SKris Kennaway c1=0; 78474664626SKris Kennaway sqr_add_c2(a,7,6,c2,c3,c1); 78574664626SKris Kennaway r[13]=c2; 78674664626SKris Kennaway c2=0; 78774664626SKris Kennaway sqr_add_c(a,7,c3,c1,c2); 78874664626SKris Kennaway r[14]=c3; 78974664626SKris Kennaway r[15]=c1; 79074664626SKris Kennaway } 79174664626SKris Kennaway 7925c87c606SMark Murray void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) 79374664626SKris Kennaway { 79474664626SKris Kennaway #ifdef BN_LLONG 79574664626SKris Kennaway BN_ULLONG t,tt; 79674664626SKris Kennaway #else 79774664626SKris Kennaway BN_ULONG bl,bh; 79874664626SKris Kennaway #endif 79974664626SKris Kennaway BN_ULONG t1,t2; 80074664626SKris Kennaway BN_ULONG c1,c2,c3; 80174664626SKris Kennaway 80274664626SKris Kennaway c1=0; 80374664626SKris Kennaway c2=0; 80474664626SKris Kennaway c3=0; 80574664626SKris Kennaway sqr_add_c(a,0,c1,c2,c3); 80674664626SKris Kennaway r[0]=c1; 80774664626SKris Kennaway c1=0; 80874664626SKris Kennaway sqr_add_c2(a,1,0,c2,c3,c1); 80974664626SKris Kennaway r[1]=c2; 81074664626SKris Kennaway c2=0; 81174664626SKris Kennaway sqr_add_c(a,1,c3,c1,c2); 81274664626SKris Kennaway sqr_add_c2(a,2,0,c3,c1,c2); 81374664626SKris Kennaway r[2]=c3; 81474664626SKris Kennaway c3=0; 81574664626SKris Kennaway sqr_add_c2(a,3,0,c1,c2,c3); 81674664626SKris Kennaway sqr_add_c2(a,2,1,c1,c2,c3); 81774664626SKris Kennaway r[3]=c1; 81874664626SKris Kennaway c1=0; 81974664626SKris Kennaway sqr_add_c(a,2,c2,c3,c1); 82074664626SKris Kennaway sqr_add_c2(a,3,1,c2,c3,c1); 82174664626SKris Kennaway r[4]=c2; 82274664626SKris Kennaway c2=0; 82374664626SKris Kennaway sqr_add_c2(a,3,2,c3,c1,c2); 82474664626SKris Kennaway r[5]=c3; 82574664626SKris Kennaway c3=0; 82674664626SKris Kennaway sqr_add_c(a,3,c1,c2,c3); 82774664626SKris Kennaway r[6]=c1; 82874664626SKris Kennaway r[7]=c2; 82974664626SKris Kennaway } 8301f13597dSJung-uk Kim 8311f13597dSJung-uk Kim #ifdef OPENSSL_NO_ASM 8321f13597dSJung-uk Kim #ifdef OPENSSL_BN_ASM_MONT 8331f13597dSJung-uk Kim #include <alloca.h> 8341f13597dSJung-uk Kim /* 8351f13597dSJung-uk Kim * This is essentially reference implementation, which may or may not 8361f13597dSJung-uk Kim * result in performance improvement. E.g. on IA-32 this routine was 8371f13597dSJung-uk Kim * observed to give 40% faster rsa1024 private key operations and 10% 8381f13597dSJung-uk Kim * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only 8391f13597dSJung-uk Kim * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a 8401f13597dSJung-uk Kim * reference implementation, one to be used as starting point for 8411f13597dSJung-uk Kim * platform-specific assembler. Mentioned numbers apply to compiler 8421f13597dSJung-uk Kim * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and 8431f13597dSJung-uk Kim * can vary not only from platform to platform, but even for compiler 8441f13597dSJung-uk Kim * versions. Assembler vs. assembler improvement coefficients can 8451f13597dSJung-uk Kim * [and are known to] differ and are to be documented elsewhere. 8461f13597dSJung-uk Kim */ 8471f13597dSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) 8481f13597dSJung-uk Kim { 8491f13597dSJung-uk Kim BN_ULONG c0,c1,ml,*tp,n0; 8501f13597dSJung-uk Kim #ifdef mul64 8511f13597dSJung-uk Kim BN_ULONG mh; 8521f13597dSJung-uk Kim #endif 8531f13597dSJung-uk Kim volatile BN_ULONG *vp; 8541f13597dSJung-uk Kim int i=0,j; 8551f13597dSJung-uk Kim 8561f13597dSJung-uk Kim #if 0 /* template for platform-specific implementation */ 8571f13597dSJung-uk Kim if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num); 8581f13597dSJung-uk Kim #endif 8591f13597dSJung-uk Kim vp = tp = alloca((num+2)*sizeof(BN_ULONG)); 8601f13597dSJung-uk Kim 8611f13597dSJung-uk Kim n0 = *n0p; 8621f13597dSJung-uk Kim 8631f13597dSJung-uk Kim c0 = 0; 8641f13597dSJung-uk Kim ml = bp[0]; 8651f13597dSJung-uk Kim #ifdef mul64 8661f13597dSJung-uk Kim mh = HBITS(ml); 8671f13597dSJung-uk Kim ml = LBITS(ml); 8681f13597dSJung-uk Kim for (j=0;j<num;++j) 8691f13597dSJung-uk Kim mul(tp[j],ap[j],ml,mh,c0); 8701f13597dSJung-uk Kim #else 8711f13597dSJung-uk Kim for (j=0;j<num;++j) 8721f13597dSJung-uk Kim mul(tp[j],ap[j],ml,c0); 8731f13597dSJung-uk Kim #endif 8741f13597dSJung-uk Kim 8751f13597dSJung-uk Kim tp[num] = c0; 8761f13597dSJung-uk Kim tp[num+1] = 0; 8771f13597dSJung-uk Kim goto enter; 8781f13597dSJung-uk Kim 8791f13597dSJung-uk Kim for(i=0;i<num;i++) 8801f13597dSJung-uk Kim { 8811f13597dSJung-uk Kim c0 = 0; 8821f13597dSJung-uk Kim ml = bp[i]; 8831f13597dSJung-uk Kim #ifdef mul64 8841f13597dSJung-uk Kim mh = HBITS(ml); 8851f13597dSJung-uk Kim ml = LBITS(ml); 8861f13597dSJung-uk Kim for (j=0;j<num;++j) 8871f13597dSJung-uk Kim mul_add(tp[j],ap[j],ml,mh,c0); 8881f13597dSJung-uk Kim #else 8891f13597dSJung-uk Kim for (j=0;j<num;++j) 8901f13597dSJung-uk Kim mul_add(tp[j],ap[j],ml,c0); 8911f13597dSJung-uk Kim #endif 8921f13597dSJung-uk Kim c1 = (tp[num] + c0)&BN_MASK2; 8931f13597dSJung-uk Kim tp[num] = c1; 8941f13597dSJung-uk Kim tp[num+1] = (c1<c0?1:0); 8951f13597dSJung-uk Kim enter: 8961f13597dSJung-uk Kim c1 = tp[0]; 8971f13597dSJung-uk Kim ml = (c1*n0)&BN_MASK2; 8981f13597dSJung-uk Kim c0 = 0; 8991f13597dSJung-uk Kim #ifdef mul64 9001f13597dSJung-uk Kim mh = HBITS(ml); 9011f13597dSJung-uk Kim ml = LBITS(ml); 9021f13597dSJung-uk Kim mul_add(c1,np[0],ml,mh,c0); 9031f13597dSJung-uk Kim #else 9041f13597dSJung-uk Kim mul_add(c1,ml,np[0],c0); 9051f13597dSJung-uk Kim #endif 9061f13597dSJung-uk Kim for(j=1;j<num;j++) 9071f13597dSJung-uk Kim { 9081f13597dSJung-uk Kim c1 = tp[j]; 9091f13597dSJung-uk Kim #ifdef mul64 9101f13597dSJung-uk Kim mul_add(c1,np[j],ml,mh,c0); 9111f13597dSJung-uk Kim #else 9121f13597dSJung-uk Kim mul_add(c1,ml,np[j],c0); 9131f13597dSJung-uk Kim #endif 9141f13597dSJung-uk Kim tp[j-1] = c1&BN_MASK2; 9151f13597dSJung-uk Kim } 9161f13597dSJung-uk Kim c1 = (tp[num] + c0)&BN_MASK2; 9171f13597dSJung-uk Kim tp[num-1] = c1; 9181f13597dSJung-uk Kim tp[num] = tp[num+1] + (c1<c0?1:0); 9191f13597dSJung-uk Kim } 9201f13597dSJung-uk Kim 9211f13597dSJung-uk Kim if (tp[num]!=0 || tp[num-1]>=np[num-1]) 9221f13597dSJung-uk Kim { 9231f13597dSJung-uk Kim c0 = bn_sub_words(rp,tp,np,num); 9241f13597dSJung-uk Kim if (tp[num]!=0 || c0==0) 9251f13597dSJung-uk Kim { 9261f13597dSJung-uk Kim for(i=0;i<num+2;i++) vp[i] = 0; 9271f13597dSJung-uk Kim return 1; 9281f13597dSJung-uk Kim } 9291f13597dSJung-uk Kim } 9301f13597dSJung-uk Kim for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; 9311f13597dSJung-uk Kim vp[num] = 0; 9321f13597dSJung-uk Kim vp[num+1] = 0; 9331f13597dSJung-uk Kim return 1; 9341f13597dSJung-uk Kim } 9351f13597dSJung-uk Kim #else 9361f13597dSJung-uk Kim /* 9371f13597dSJung-uk Kim * Return value of 0 indicates that multiplication/convolution was not 9381f13597dSJung-uk Kim * performed to signal the caller to fall down to alternative/original 9391f13597dSJung-uk Kim * code-path. 9401f13597dSJung-uk Kim */ 9411f13597dSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) 9421f13597dSJung-uk Kim { return 0; } 9431f13597dSJung-uk Kim #endif /* OPENSSL_BN_ASM_MONT */ 9441f13597dSJung-uk Kim #endif 9451f13597dSJung-uk Kim 946f579bf8eSKris Kennaway #else /* !BN_MUL_COMBA */ 94774664626SKris Kennaway 94874664626SKris Kennaway /* hmm... is it faster just to do a multiply? */ 94974664626SKris Kennaway #undef bn_sqr_comba4 9501f13597dSJung-uk Kim void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) 95174664626SKris Kennaway { 95274664626SKris Kennaway BN_ULONG t[8]; 95374664626SKris Kennaway bn_sqr_normal(r,a,4,t); 95474664626SKris Kennaway } 95574664626SKris Kennaway 95674664626SKris Kennaway #undef bn_sqr_comba8 9571f13597dSJung-uk Kim void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) 95874664626SKris Kennaway { 95974664626SKris Kennaway BN_ULONG t[16]; 96074664626SKris Kennaway bn_sqr_normal(r,a,8,t); 96174664626SKris Kennaway } 96274664626SKris Kennaway 96374664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 96474664626SKris Kennaway { 96574664626SKris Kennaway r[4]=bn_mul_words( &(r[0]),a,4,b[0]); 96674664626SKris Kennaway r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]); 96774664626SKris Kennaway r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]); 96874664626SKris Kennaway r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]); 96974664626SKris Kennaway } 97074664626SKris Kennaway 97174664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 97274664626SKris Kennaway { 97374664626SKris Kennaway r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]); 97474664626SKris Kennaway r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]); 97574664626SKris Kennaway r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]); 97674664626SKris Kennaway r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]); 97774664626SKris Kennaway r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]); 97874664626SKris Kennaway r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]); 97974664626SKris Kennaway r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]); 98074664626SKris Kennaway r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]); 98174664626SKris Kennaway } 98274664626SKris Kennaway 9831f13597dSJung-uk Kim #ifdef OPENSSL_NO_ASM 9841f13597dSJung-uk Kim #ifdef OPENSSL_BN_ASM_MONT 9851f13597dSJung-uk Kim #include <alloca.h> 9861f13597dSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) 9871f13597dSJung-uk Kim { 9881f13597dSJung-uk Kim BN_ULONG c0,c1,*tp,n0=*n0p; 9891f13597dSJung-uk Kim volatile BN_ULONG *vp; 9901f13597dSJung-uk Kim int i=0,j; 9911f13597dSJung-uk Kim 9921f13597dSJung-uk Kim vp = tp = alloca((num+2)*sizeof(BN_ULONG)); 9931f13597dSJung-uk Kim 9941f13597dSJung-uk Kim for(i=0;i<=num;i++) tp[i]=0; 9951f13597dSJung-uk Kim 9961f13597dSJung-uk Kim for(i=0;i<num;i++) 9971f13597dSJung-uk Kim { 9981f13597dSJung-uk Kim c0 = bn_mul_add_words(tp,ap,num,bp[i]); 9991f13597dSJung-uk Kim c1 = (tp[num] + c0)&BN_MASK2; 10001f13597dSJung-uk Kim tp[num] = c1; 10011f13597dSJung-uk Kim tp[num+1] = (c1<c0?1:0); 10021f13597dSJung-uk Kim 10031f13597dSJung-uk Kim c0 = bn_mul_add_words(tp,np,num,tp[0]*n0); 10041f13597dSJung-uk Kim c1 = (tp[num] + c0)&BN_MASK2; 10051f13597dSJung-uk Kim tp[num] = c1; 10061f13597dSJung-uk Kim tp[num+1] += (c1<c0?1:0); 10071f13597dSJung-uk Kim for(j=0;j<=num;j++) tp[j]=tp[j+1]; 10081f13597dSJung-uk Kim } 10091f13597dSJung-uk Kim 10101f13597dSJung-uk Kim if (tp[num]!=0 || tp[num-1]>=np[num-1]) 10111f13597dSJung-uk Kim { 10121f13597dSJung-uk Kim c0 = bn_sub_words(rp,tp,np,num); 10131f13597dSJung-uk Kim if (tp[num]!=0 || c0==0) 10141f13597dSJung-uk Kim { 10151f13597dSJung-uk Kim for(i=0;i<num+2;i++) vp[i] = 0; 10161f13597dSJung-uk Kim return 1; 10171f13597dSJung-uk Kim } 10181f13597dSJung-uk Kim } 10191f13597dSJung-uk Kim for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0; 10201f13597dSJung-uk Kim vp[num] = 0; 10211f13597dSJung-uk Kim vp[num+1] = 0; 10221f13597dSJung-uk Kim return 1; 10231f13597dSJung-uk Kim } 10241f13597dSJung-uk Kim #else 10251f13597dSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) 10261f13597dSJung-uk Kim { return 0; } 10271f13597dSJung-uk Kim #endif /* OPENSSL_BN_ASM_MONT */ 10281f13597dSJung-uk Kim #endif 10291f13597dSJung-uk Kim 1030f579bf8eSKris Kennaway #endif /* !BN_MUL_COMBA */ 1031