xref: /freebsd/crypto/openssl/crypto/bn/bn_asm.c (revision 6f9291ce)
174664626SKris Kennaway /* crypto/bn/bn_asm.c */
274664626SKris Kennaway /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
374664626SKris Kennaway  * All rights reserved.
474664626SKris Kennaway  *
574664626SKris Kennaway  * This package is an SSL implementation written
674664626SKris Kennaway  * by Eric Young (eay@cryptsoft.com).
774664626SKris Kennaway  * The implementation was written so as to conform with Netscapes SSL.
874664626SKris Kennaway  *
974664626SKris Kennaway  * This library is free for commercial and non-commercial use as long as
1074664626SKris Kennaway  * the following conditions are aheared to.  The following conditions
1174664626SKris Kennaway  * apply to all code found in this distribution, be it the RC4, RSA,
1274664626SKris Kennaway  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1374664626SKris Kennaway  * included with this distribution is covered by the same copyright terms
1474664626SKris Kennaway  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
1574664626SKris Kennaway  *
1674664626SKris Kennaway  * Copyright remains Eric Young's, and as such any Copyright notices in
1774664626SKris Kennaway  * the code are not to be removed.
1874664626SKris Kennaway  * If this package is used in a product, Eric Young should be given attribution
1974664626SKris Kennaway  * as the author of the parts of the library used.
2074664626SKris Kennaway  * This can be in the form of a textual message at program startup or
2174664626SKris Kennaway  * in documentation (online or textual) provided with the package.
2274664626SKris Kennaway  *
2374664626SKris Kennaway  * Redistribution and use in source and binary forms, with or without
2474664626SKris Kennaway  * modification, are permitted provided that the following conditions
2574664626SKris Kennaway  * are met:
2674664626SKris Kennaway  * 1. Redistributions of source code must retain the copyright
2774664626SKris Kennaway  *    notice, this list of conditions and the following disclaimer.
2874664626SKris Kennaway  * 2. Redistributions in binary form must reproduce the above copyright
2974664626SKris Kennaway  *    notice, this list of conditions and the following disclaimer in the
3074664626SKris Kennaway  *    documentation and/or other materials provided with the distribution.
3174664626SKris Kennaway  * 3. All advertising materials mentioning features or use of this software
3274664626SKris Kennaway  *    must display the following acknowledgement:
3374664626SKris Kennaway  *    "This product includes cryptographic software written by
3474664626SKris Kennaway  *     Eric Young (eay@cryptsoft.com)"
3574664626SKris Kennaway  *    The word 'cryptographic' can be left out if the rouines from the library
3674664626SKris Kennaway  *    being used are not cryptographic related :-).
3774664626SKris Kennaway  * 4. If you include any Windows specific code (or a derivative thereof) from
3874664626SKris Kennaway  *    the apps directory (application code) you must include an acknowledgement:
3974664626SKris Kennaway  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
4074664626SKris Kennaway  *
4174664626SKris Kennaway  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
4274664626SKris Kennaway  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4374664626SKris Kennaway  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4474664626SKris Kennaway  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
4574664626SKris Kennaway  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4674664626SKris Kennaway  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4774664626SKris Kennaway  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4874664626SKris Kennaway  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4974664626SKris Kennaway  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5074664626SKris Kennaway  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5174664626SKris Kennaway  * SUCH DAMAGE.
5274664626SKris Kennaway  *
5374664626SKris Kennaway  * The licence and distribution terms for any publically available version or
5474664626SKris Kennaway  * derivative of this code cannot be changed.  i.e. this code cannot simply be
5574664626SKris Kennaway  * copied and put under another distribution licence
5674664626SKris Kennaway  * [including the GNU Public Licence.]
5774664626SKris Kennaway  */
5874664626SKris Kennaway 
59f579bf8eSKris Kennaway #ifndef BN_DEBUG
60f579bf8eSKris Kennaway # undef NDEBUG                  /* avoid conflicting definitions */
61f579bf8eSKris Kennaway # define NDEBUG
62f579bf8eSKris Kennaway #endif
63f579bf8eSKris Kennaway 
6474664626SKris Kennaway #include <stdio.h>
65f579bf8eSKris Kennaway #include <assert.h>
6674664626SKris Kennaway #include "cryptlib.h"
6774664626SKris Kennaway #include "bn_lcl.h"
6874664626SKris Kennaway 
69f579bf8eSKris Kennaway #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
7074664626SKris Kennaway 
716f9291ceSJung-uk Kim BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
726f9291ceSJung-uk Kim                           BN_ULONG w)
7374664626SKris Kennaway {
7474664626SKris Kennaway     BN_ULONG c1 = 0;
7574664626SKris Kennaway 
76f579bf8eSKris Kennaway     assert(num >= 0);
776f9291ceSJung-uk Kim     if (num <= 0)
786f9291ceSJung-uk Kim         return (c1);
7974664626SKris Kennaway 
801f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
816f9291ceSJung-uk Kim     while (num & ~3) {
8274664626SKris Kennaway         mul_add(rp[0], ap[0], w, c1);
8374664626SKris Kennaway         mul_add(rp[1], ap[1], w, c1);
8474664626SKris Kennaway         mul_add(rp[2], ap[2], w, c1);
8574664626SKris Kennaway         mul_add(rp[3], ap[3], w, c1);
866f9291ceSJung-uk Kim         ap += 4;
876f9291ceSJung-uk Kim         rp += 4;
886f9291ceSJung-uk Kim         num -= 4;
89f579bf8eSKris Kennaway     }
901f13597dSJung-uk Kim # endif
916f9291ceSJung-uk Kim     while (num) {
921f13597dSJung-uk Kim         mul_add(rp[0], ap[0], w, c1);
936f9291ceSJung-uk Kim         ap++;
946f9291ceSJung-uk Kim         rp++;
956f9291ceSJung-uk Kim         num--;
9674664626SKris Kennaway     }
9774664626SKris Kennaway 
9874664626SKris Kennaway     return (c1);
9974664626SKris Kennaway }
10074664626SKris Kennaway 
1015c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
10274664626SKris Kennaway {
10374664626SKris Kennaway     BN_ULONG c1 = 0;
10474664626SKris Kennaway 
105f579bf8eSKris Kennaway     assert(num >= 0);
1066f9291ceSJung-uk Kim     if (num <= 0)
1076f9291ceSJung-uk Kim         return (c1);
10874664626SKris Kennaway 
1091f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1106f9291ceSJung-uk Kim     while (num & ~3) {
11174664626SKris Kennaway         mul(rp[0], ap[0], w, c1);
11274664626SKris Kennaway         mul(rp[1], ap[1], w, c1);
11374664626SKris Kennaway         mul(rp[2], ap[2], w, c1);
11474664626SKris Kennaway         mul(rp[3], ap[3], w, c1);
1156f9291ceSJung-uk Kim         ap += 4;
1166f9291ceSJung-uk Kim         rp += 4;
1176f9291ceSJung-uk Kim         num -= 4;
118f579bf8eSKris Kennaway     }
1191f13597dSJung-uk Kim # endif
1206f9291ceSJung-uk Kim     while (num) {
1211f13597dSJung-uk Kim         mul(rp[0], ap[0], w, c1);
1226f9291ceSJung-uk Kim         ap++;
1236f9291ceSJung-uk Kim         rp++;
1246f9291ceSJung-uk Kim         num--;
12574664626SKris Kennaway     }
12674664626SKris Kennaway     return (c1);
12774664626SKris Kennaway }
12874664626SKris Kennaway 
1295c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
13074664626SKris Kennaway {
131f579bf8eSKris Kennaway     assert(n >= 0);
1326f9291ceSJung-uk Kim     if (n <= 0)
1336f9291ceSJung-uk Kim         return;
1341f13597dSJung-uk Kim 
1351f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1366f9291ceSJung-uk Kim     while (n & ~3) {
137f579bf8eSKris Kennaway         sqr(r[0], r[1], a[0]);
138f579bf8eSKris Kennaway         sqr(r[2], r[3], a[1]);
139f579bf8eSKris Kennaway         sqr(r[4], r[5], a[2]);
140f579bf8eSKris Kennaway         sqr(r[6], r[7], a[3]);
1416f9291ceSJung-uk Kim         a += 4;
1426f9291ceSJung-uk Kim         r += 8;
1436f9291ceSJung-uk Kim         n -= 4;
144f579bf8eSKris Kennaway     }
1451f13597dSJung-uk Kim # endif
1466f9291ceSJung-uk Kim     while (n) {
1471f13597dSJung-uk Kim         sqr(r[0], r[1], a[0]);
1486f9291ceSJung-uk Kim         a++;
1496f9291ceSJung-uk Kim         r += 2;
1506f9291ceSJung-uk Kim         n--;
15174664626SKris Kennaway     }
15274664626SKris Kennaway }
15374664626SKris Kennaway 
1546f9291ceSJung-uk Kim #else                           /* !(defined(BN_LLONG) ||
1556f9291ceSJung-uk Kim                                  * defined(BN_UMULT_HIGH)) */
15674664626SKris Kennaway 
1576f9291ceSJung-uk Kim BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
1586f9291ceSJung-uk Kim                           BN_ULONG w)
15974664626SKris Kennaway {
16074664626SKris Kennaway     BN_ULONG c = 0;
16174664626SKris Kennaway     BN_ULONG bl, bh;
16274664626SKris Kennaway 
163f579bf8eSKris Kennaway     assert(num >= 0);
1646f9291ceSJung-uk Kim     if (num <= 0)
1656f9291ceSJung-uk Kim         return ((BN_ULONG)0);
16674664626SKris Kennaway 
16774664626SKris Kennaway     bl = LBITS(w);
16874664626SKris Kennaway     bh = HBITS(w);
16974664626SKris Kennaway 
1701f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1716f9291ceSJung-uk Kim     while (num & ~3) {
17274664626SKris Kennaway         mul_add(rp[0], ap[0], bl, bh, c);
17374664626SKris Kennaway         mul_add(rp[1], ap[1], bl, bh, c);
17474664626SKris Kennaway         mul_add(rp[2], ap[2], bl, bh, c);
17574664626SKris Kennaway         mul_add(rp[3], ap[3], bl, bh, c);
1766f9291ceSJung-uk Kim         ap += 4;
1776f9291ceSJung-uk Kim         rp += 4;
1786f9291ceSJung-uk Kim         num -= 4;
1791f13597dSJung-uk Kim     }
1801f13597dSJung-uk Kim # endif
1816f9291ceSJung-uk Kim     while (num) {
1821f13597dSJung-uk Kim         mul_add(rp[0], ap[0], bl, bh, c);
1836f9291ceSJung-uk Kim         ap++;
1846f9291ceSJung-uk Kim         rp++;
1856f9291ceSJung-uk Kim         num--;
18674664626SKris Kennaway     }
18774664626SKris Kennaway     return (c);
18874664626SKris Kennaway }
18974664626SKris Kennaway 
1905c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
19174664626SKris Kennaway {
19274664626SKris Kennaway     BN_ULONG carry = 0;
19374664626SKris Kennaway     BN_ULONG bl, bh;
19474664626SKris Kennaway 
195f579bf8eSKris Kennaway     assert(num >= 0);
1966f9291ceSJung-uk Kim     if (num <= 0)
1976f9291ceSJung-uk Kim         return ((BN_ULONG)0);
19874664626SKris Kennaway 
19974664626SKris Kennaway     bl = LBITS(w);
20074664626SKris Kennaway     bh = HBITS(w);
20174664626SKris Kennaway 
2021f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
2036f9291ceSJung-uk Kim     while (num & ~3) {
20474664626SKris Kennaway         mul(rp[0], ap[0], bl, bh, carry);
20574664626SKris Kennaway         mul(rp[1], ap[1], bl, bh, carry);
20674664626SKris Kennaway         mul(rp[2], ap[2], bl, bh, carry);
20774664626SKris Kennaway         mul(rp[3], ap[3], bl, bh, carry);
2086f9291ceSJung-uk Kim         ap += 4;
2096f9291ceSJung-uk Kim         rp += 4;
2106f9291ceSJung-uk Kim         num -= 4;
2111f13597dSJung-uk Kim     }
2121f13597dSJung-uk Kim # endif
2136f9291ceSJung-uk Kim     while (num) {
2141f13597dSJung-uk Kim         mul(rp[0], ap[0], bl, bh, carry);
2156f9291ceSJung-uk Kim         ap++;
2166f9291ceSJung-uk Kim         rp++;
2176f9291ceSJung-uk Kim         num--;
21874664626SKris Kennaway     }
21974664626SKris Kennaway     return (carry);
22074664626SKris Kennaway }
22174664626SKris Kennaway 
2225c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
22374664626SKris Kennaway {
224f579bf8eSKris Kennaway     assert(n >= 0);
2256f9291ceSJung-uk Kim     if (n <= 0)
2266f9291ceSJung-uk Kim         return;
2271f13597dSJung-uk Kim 
2281f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
2296f9291ceSJung-uk Kim     while (n & ~3) {
23074664626SKris Kennaway         sqr64(r[0], r[1], a[0]);
23174664626SKris Kennaway         sqr64(r[2], r[3], a[1]);
23274664626SKris Kennaway         sqr64(r[4], r[5], a[2]);
23374664626SKris Kennaway         sqr64(r[6], r[7], a[3]);
2346f9291ceSJung-uk Kim         a += 4;
2356f9291ceSJung-uk Kim         r += 8;
2366f9291ceSJung-uk Kim         n -= 4;
2371f13597dSJung-uk Kim     }
2381f13597dSJung-uk Kim # endif
2396f9291ceSJung-uk Kim     while (n) {
2401f13597dSJung-uk Kim         sqr64(r[0], r[1], a[0]);
2416f9291ceSJung-uk Kim         a++;
2426f9291ceSJung-uk Kim         r += 2;
2436f9291ceSJung-uk Kim         n--;
24474664626SKris Kennaway     }
24574664626SKris Kennaway }
24674664626SKris Kennaway 
2476f9291ceSJung-uk Kim #endif                          /* !(defined(BN_LLONG) ||
2486f9291ceSJung-uk Kim                                  * defined(BN_UMULT_HIGH)) */
24974664626SKris Kennaway 
25074664626SKris Kennaway #if defined(BN_LLONG) && defined(BN_DIV2W)
25174664626SKris Kennaway 
25274664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
25374664626SKris Kennaway {
25474664626SKris Kennaway     return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d));
25574664626SKris Kennaway }
25674664626SKris Kennaway 
25774664626SKris Kennaway #else
25874664626SKris Kennaway 
259ddd58736SKris Kennaway /* Divide h,l by d and return the result. */
26074664626SKris Kennaway /* I need to test this some more :-( */
26174664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
26274664626SKris Kennaway {
26374664626SKris Kennaway     BN_ULONG dh, dl, q, ret = 0, th, tl, t;
26474664626SKris Kennaway     int i, count = 2;
26574664626SKris Kennaway 
2666f9291ceSJung-uk Kim     if (d == 0)
2676f9291ceSJung-uk Kim         return (BN_MASK2);
26874664626SKris Kennaway 
26974664626SKris Kennaway     i = BN_num_bits_word(d);
2703b4e3dcbSSimon L. B. Nielsen     assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
271ddd58736SKris Kennaway 
27274664626SKris Kennaway     i = BN_BITS2 - i;
2736f9291ceSJung-uk Kim     if (h >= d)
2746f9291ceSJung-uk Kim         h -= d;
27574664626SKris Kennaway 
2766f9291ceSJung-uk Kim     if (i) {
27774664626SKris Kennaway         d <<= i;
27874664626SKris Kennaway         h = (h << i) | (l >> (BN_BITS2 - i));
27974664626SKris Kennaway         l <<= i;
28074664626SKris Kennaway     }
28174664626SKris Kennaway     dh = (d & BN_MASK2h) >> BN_BITS4;
28274664626SKris Kennaway     dl = (d & BN_MASK2l);
2836f9291ceSJung-uk Kim     for (;;) {
28474664626SKris Kennaway         if ((h >> BN_BITS4) == dh)
28574664626SKris Kennaway             q = BN_MASK2l;
28674664626SKris Kennaway         else
28774664626SKris Kennaway             q = h / dh;
28874664626SKris Kennaway 
28974664626SKris Kennaway         th = q * dh;
29074664626SKris Kennaway         tl = dl * q;
2916f9291ceSJung-uk Kim         for (;;) {
29274664626SKris Kennaway             t = h - th;
29374664626SKris Kennaway             if ((t & BN_MASK2h) ||
2946f9291ceSJung-uk Kim                 ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4))))
29574664626SKris Kennaway                 break;
29674664626SKris Kennaway             q--;
29774664626SKris Kennaway             th -= dh;
29874664626SKris Kennaway             tl -= dl;
29974664626SKris Kennaway         }
30074664626SKris Kennaway         t = (tl >> BN_BITS4);
30174664626SKris Kennaway         tl = (tl << BN_BITS4) & BN_MASK2h;
30274664626SKris Kennaway         th += t;
30374664626SKris Kennaway 
3046f9291ceSJung-uk Kim         if (l < tl)
3056f9291ceSJung-uk Kim             th++;
30674664626SKris Kennaway         l -= tl;
3076f9291ceSJung-uk Kim         if (h < th) {
30874664626SKris Kennaway             h += d;
30974664626SKris Kennaway             q--;
31074664626SKris Kennaway         }
31174664626SKris Kennaway         h -= th;
31274664626SKris Kennaway 
3136f9291ceSJung-uk Kim         if (--count == 0)
3146f9291ceSJung-uk Kim             break;
31574664626SKris Kennaway 
31674664626SKris Kennaway         ret = q << BN_BITS4;
31774664626SKris Kennaway         h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
31874664626SKris Kennaway         l = (l & BN_MASK2l) << BN_BITS4;
31974664626SKris Kennaway     }
32074664626SKris Kennaway     ret |= q;
32174664626SKris Kennaway     return (ret);
32274664626SKris Kennaway }
323f579bf8eSKris Kennaway #endif                          /* !defined(BN_LLONG) && defined(BN_DIV2W) */
32474664626SKris Kennaway 
32574664626SKris Kennaway #ifdef BN_LLONG
3266f9291ceSJung-uk Kim BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
3276f9291ceSJung-uk Kim                       int n)
32874664626SKris Kennaway {
32974664626SKris Kennaway     BN_ULLONG ll = 0;
33074664626SKris Kennaway 
331f579bf8eSKris Kennaway     assert(n >= 0);
3326f9291ceSJung-uk Kim     if (n <= 0)
3336f9291ceSJung-uk Kim         return ((BN_ULONG)0);
33474664626SKris Kennaway 
3351f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
3366f9291ceSJung-uk Kim     while (n & ~3) {
33774664626SKris Kennaway         ll += (BN_ULLONG) a[0] + b[0];
33874664626SKris Kennaway         r[0] = (BN_ULONG)ll & BN_MASK2;
33974664626SKris Kennaway         ll >>= BN_BITS2;
34074664626SKris Kennaway         ll += (BN_ULLONG) a[1] + b[1];
34174664626SKris Kennaway         r[1] = (BN_ULONG)ll & BN_MASK2;
34274664626SKris Kennaway         ll >>= BN_BITS2;
34374664626SKris Kennaway         ll += (BN_ULLONG) a[2] + b[2];
34474664626SKris Kennaway         r[2] = (BN_ULONG)ll & BN_MASK2;
34574664626SKris Kennaway         ll >>= BN_BITS2;
34674664626SKris Kennaway         ll += (BN_ULLONG) a[3] + b[3];
34774664626SKris Kennaway         r[3] = (BN_ULONG)ll & BN_MASK2;
34874664626SKris Kennaway         ll >>= BN_BITS2;
3496f9291ceSJung-uk Kim         a += 4;
3506f9291ceSJung-uk Kim         b += 4;
3516f9291ceSJung-uk Kim         r += 4;
3526f9291ceSJung-uk Kim         n -= 4;
3531f13597dSJung-uk Kim     }
3541f13597dSJung-uk Kim # endif
3556f9291ceSJung-uk Kim     while (n) {
3561f13597dSJung-uk Kim         ll += (BN_ULLONG) a[0] + b[0];
3571f13597dSJung-uk Kim         r[0] = (BN_ULONG)ll & BN_MASK2;
3581f13597dSJung-uk Kim         ll >>= BN_BITS2;
3596f9291ceSJung-uk Kim         a++;
3606f9291ceSJung-uk Kim         b++;
3616f9291ceSJung-uk Kim         r++;
3626f9291ceSJung-uk Kim         n--;
36374664626SKris Kennaway     }
36474664626SKris Kennaway     return ((BN_ULONG)ll);
36574664626SKris Kennaway }
366f579bf8eSKris Kennaway #else                           /* !BN_LLONG */
3676f9291ceSJung-uk Kim BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
3686f9291ceSJung-uk Kim                       int n)
36974664626SKris Kennaway {
37074664626SKris Kennaway     BN_ULONG c, l, t;
37174664626SKris Kennaway 
372f579bf8eSKris Kennaway     assert(n >= 0);
3736f9291ceSJung-uk Kim     if (n <= 0)
3746f9291ceSJung-uk Kim         return ((BN_ULONG)0);
37574664626SKris Kennaway 
37674664626SKris Kennaway     c = 0;
3771f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
3786f9291ceSJung-uk Kim     while (n & ~3) {
37974664626SKris Kennaway         t = a[0];
38074664626SKris Kennaway         t = (t + c) & BN_MASK2;
38174664626SKris Kennaway         c = (t < c);
38274664626SKris Kennaway         l = (t + b[0]) & BN_MASK2;
38374664626SKris Kennaway         c += (l < t);
38474664626SKris Kennaway         r[0] = l;
38574664626SKris Kennaway         t = a[1];
38674664626SKris Kennaway         t = (t + c) & BN_MASK2;
38774664626SKris Kennaway         c = (t < c);
38874664626SKris Kennaway         l = (t + b[1]) & BN_MASK2;
38974664626SKris Kennaway         c += (l < t);
39074664626SKris Kennaway         r[1] = l;
39174664626SKris Kennaway         t = a[2];
39274664626SKris Kennaway         t = (t + c) & BN_MASK2;
39374664626SKris Kennaway         c = (t < c);
39474664626SKris Kennaway         l = (t + b[2]) & BN_MASK2;
39574664626SKris Kennaway         c += (l < t);
39674664626SKris Kennaway         r[2] = l;
39774664626SKris Kennaway         t = a[3];
39874664626SKris Kennaway         t = (t + c) & BN_MASK2;
39974664626SKris Kennaway         c = (t < c);
40074664626SKris Kennaway         l = (t + b[3]) & BN_MASK2;
40174664626SKris Kennaway         c += (l < t);
40274664626SKris Kennaway         r[3] = l;
4036f9291ceSJung-uk Kim         a += 4;
4046f9291ceSJung-uk Kim         b += 4;
4056f9291ceSJung-uk Kim         r += 4;
4066f9291ceSJung-uk Kim         n -= 4;
4071f13597dSJung-uk Kim     }
4081f13597dSJung-uk Kim # endif
4096f9291ceSJung-uk Kim     while (n) {
4101f13597dSJung-uk Kim         t = a[0];
4111f13597dSJung-uk Kim         t = (t + c) & BN_MASK2;
4121f13597dSJung-uk Kim         c = (t < c);
4131f13597dSJung-uk Kim         l = (t + b[0]) & BN_MASK2;
4141f13597dSJung-uk Kim         c += (l < t);
4151f13597dSJung-uk Kim         r[0] = l;
4166f9291ceSJung-uk Kim         a++;
4176f9291ceSJung-uk Kim         b++;
4186f9291ceSJung-uk Kim         r++;
4196f9291ceSJung-uk Kim         n--;
42074664626SKris Kennaway     }
42174664626SKris Kennaway     return ((BN_ULONG)c);
42274664626SKris Kennaway }
423f579bf8eSKris Kennaway #endif                          /* !BN_LLONG */
42474664626SKris Kennaway 
4256f9291ceSJung-uk Kim BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
4266f9291ceSJung-uk Kim                       int n)
42774664626SKris Kennaway {
42874664626SKris Kennaway     BN_ULONG t1, t2;
42974664626SKris Kennaway     int c = 0;
43074664626SKris Kennaway 
431f579bf8eSKris Kennaway     assert(n >= 0);
4326f9291ceSJung-uk Kim     if (n <= 0)
4336f9291ceSJung-uk Kim         return ((BN_ULONG)0);
43474664626SKris Kennaway 
4351f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT
4366f9291ceSJung-uk Kim     while (n & ~3) {
4376f9291ceSJung-uk Kim         t1 = a[0];
4386f9291ceSJung-uk Kim         t2 = b[0];
43974664626SKris Kennaway         r[0] = (t1 - t2 - c) & BN_MASK2;
4406f9291ceSJung-uk Kim         if (t1 != t2)
4416f9291ceSJung-uk Kim             c = (t1 < t2);
4426f9291ceSJung-uk Kim         t1 = a[1];
4436f9291ceSJung-uk Kim         t2 = b[1];
44474664626SKris Kennaway         r[1] = (t1 - t2 - c) & BN_MASK2;
4456f9291ceSJung-uk Kim         if (t1 != t2)
4466f9291ceSJung-uk Kim             c = (t1 < t2);
4476f9291ceSJung-uk Kim         t1 = a[2];
4486f9291ceSJung-uk Kim         t2 = b[2];
44974664626SKris Kennaway         r[2] = (t1 - t2 - c) & BN_MASK2;
4506f9291ceSJung-uk Kim         if (t1 != t2)
4516f9291ceSJung-uk Kim             c = (t1 < t2);
4526f9291ceSJung-uk Kim         t1 = a[3];
4536f9291ceSJung-uk Kim         t2 = b[3];
45474664626SKris Kennaway         r[3] = (t1 - t2 - c) & BN_MASK2;
4556f9291ceSJung-uk Kim         if (t1 != t2)
4566f9291ceSJung-uk Kim             c = (t1 < t2);
4576f9291ceSJung-uk Kim         a += 4;
4586f9291ceSJung-uk Kim         b += 4;
4596f9291ceSJung-uk Kim         r += 4;
4606f9291ceSJung-uk Kim         n -= 4;
4611f13597dSJung-uk Kim     }
4621f13597dSJung-uk Kim #endif
4636f9291ceSJung-uk Kim     while (n) {
4646f9291ceSJung-uk Kim         t1 = a[0];
4656f9291ceSJung-uk Kim         t2 = b[0];
4661f13597dSJung-uk Kim         r[0] = (t1 - t2 - c) & BN_MASK2;
4676f9291ceSJung-uk Kim         if (t1 != t2)
4686f9291ceSJung-uk Kim             c = (t1 < t2);
4696f9291ceSJung-uk Kim         a++;
4706f9291ceSJung-uk Kim         b++;
4716f9291ceSJung-uk Kim         r++;
4726f9291ceSJung-uk Kim         n--;
47374664626SKris Kennaway     }
47474664626SKris Kennaway     return (c);
47574664626SKris Kennaway }
47674664626SKris Kennaway 
4771f13597dSJung-uk Kim #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
47874664626SKris Kennaway 
47974664626SKris Kennaway # undef bn_mul_comba8
48074664626SKris Kennaway # undef bn_mul_comba4
48174664626SKris Kennaway # undef bn_sqr_comba8
48274664626SKris Kennaway # undef bn_sqr_comba4
48374664626SKris Kennaway 
484f579bf8eSKris Kennaway /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
485f579bf8eSKris Kennaway /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
486f579bf8eSKris Kennaway /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
4876f9291ceSJung-uk Kim /*
4886f9291ceSJung-uk Kim  * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
4896f9291ceSJung-uk Kim  * c=(c2,c1,c0)
4906f9291ceSJung-uk Kim  */
491f579bf8eSKris Kennaway 
492751d2991SJung-uk Kim /*
493751d2991SJung-uk Kim  * Keep in mind that carrying into high part of multiplication result
494751d2991SJung-uk Kim  * can not overflow, because it cannot be all-ones.
495751d2991SJung-uk Kim  */
49674664626SKris Kennaway # ifdef BN_LLONG
49774664626SKris Kennaway #  define mul_add_c(a,b,c0,c1,c2) \
49874664626SKris Kennaway         t=(BN_ULLONG)a*b; \
49974664626SKris Kennaway         t1=(BN_ULONG)Lw(t); \
50074664626SKris Kennaway         t2=(BN_ULONG)Hw(t); \
50174664626SKris Kennaway         c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
50274664626SKris Kennaway         c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
50374664626SKris Kennaway 
50474664626SKris Kennaway #  define mul_add_c2(a,b,c0,c1,c2) \
50574664626SKris Kennaway         t=(BN_ULLONG)a*b; \
50674664626SKris Kennaway         tt=(t+t)&BN_MASK; \
50774664626SKris Kennaway         if (tt < t) c2++; \
50874664626SKris Kennaway         t1=(BN_ULONG)Lw(tt); \
50974664626SKris Kennaway         t2=(BN_ULONG)Hw(tt); \
51074664626SKris Kennaway         c0=(c0+t1)&BN_MASK2;  \
51174664626SKris Kennaway         if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
51274664626SKris Kennaway         c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
51374664626SKris Kennaway 
51474664626SKris Kennaway #  define sqr_add_c(a,i,c0,c1,c2) \
51574664626SKris Kennaway         t=(BN_ULLONG)a[i]*a[i]; \
51674664626SKris Kennaway         t1=(BN_ULONG)Lw(t); \
51774664626SKris Kennaway         t2=(BN_ULONG)Hw(t); \
51874664626SKris Kennaway         c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
51974664626SKris Kennaway         c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
52074664626SKris Kennaway 
52174664626SKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2) \
52274664626SKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
523f579bf8eSKris Kennaway 
5243b4e3dcbSSimon L. B. Nielsen # elif defined(BN_UMULT_LOHI)
5253b4e3dcbSSimon L. B. Nielsen 
5263b4e3dcbSSimon L. B. Nielsen #  define mul_add_c(a,b,c0,c1,c2) {       \
5273b4e3dcbSSimon L. B. Nielsen         BN_ULONG ta=(a),tb=(b);         \
5283b4e3dcbSSimon L. B. Nielsen         BN_UMULT_LOHI(t1,t2,ta,tb);     \
5293b4e3dcbSSimon L. B. Nielsen         c0 += t1; t2 += (c0<t1)?1:0;    \
5303b4e3dcbSSimon L. B. Nielsen         c1 += t2; c2 += (c1<t2)?1:0;    \
5313b4e3dcbSSimon L. B. Nielsen         }
5323b4e3dcbSSimon L. B. Nielsen 
5333b4e3dcbSSimon L. B. Nielsen #  define mul_add_c2(a,b,c0,c1,c2) {      \
5343b4e3dcbSSimon L. B. Nielsen         BN_ULONG ta=(a),tb=(b),t0;      \
5353b4e3dcbSSimon L. B. Nielsen         BN_UMULT_LOHI(t0,t1,ta,tb);     \
536751d2991SJung-uk Kim         c0 += t0; t2 = t1+((c0<t0)?1:0);\
5373b4e3dcbSSimon L. B. Nielsen         c1 += t2; c2 += (c1<t2)?1:0;    \
538751d2991SJung-uk Kim         c0 += t0; t1 += (c0<t0)?1:0;    \
539751d2991SJung-uk Kim         c1 += t1; c2 += (c1<t1)?1:0;    \
5403b4e3dcbSSimon L. B. Nielsen         }
5413b4e3dcbSSimon L. B. Nielsen 
5423b4e3dcbSSimon L. B. Nielsen #  define sqr_add_c(a,i,c0,c1,c2) {       \
5433b4e3dcbSSimon L. B. Nielsen         BN_ULONG ta=(a)[i];             \
5443b4e3dcbSSimon L. B. Nielsen         BN_UMULT_LOHI(t1,t2,ta,ta);     \
5453b4e3dcbSSimon L. B. Nielsen         c0 += t1; t2 += (c0<t1)?1:0;    \
5463b4e3dcbSSimon L. B. Nielsen         c1 += t2; c2 += (c1<t2)?1:0;    \
5473b4e3dcbSSimon L. B. Nielsen         }
5483b4e3dcbSSimon L. B. Nielsen 
5493b4e3dcbSSimon L. B. Nielsen #  define sqr_add_c2(a,i,j,c0,c1,c2)    \
5503b4e3dcbSSimon L. B. Nielsen         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
5513b4e3dcbSSimon L. B. Nielsen 
552f579bf8eSKris Kennaway # elif defined(BN_UMULT_HIGH)
553f579bf8eSKris Kennaway 
554f579bf8eSKris Kennaway #  define mul_add_c(a,b,c0,c1,c2) {       \
555f579bf8eSKris Kennaway         BN_ULONG ta=(a),tb=(b);         \
556f579bf8eSKris Kennaway         t1 = ta * tb;                   \
557f579bf8eSKris Kennaway         t2 = BN_UMULT_HIGH(ta,tb);      \
558f579bf8eSKris Kennaway         c0 += t1; t2 += (c0<t1)?1:0;    \
559f579bf8eSKris Kennaway         c1 += t2; c2 += (c1<t2)?1:0;    \
560f579bf8eSKris Kennaway         }
561f579bf8eSKris Kennaway 
562f579bf8eSKris Kennaway #  define mul_add_c2(a,b,c0,c1,c2) {      \
563f579bf8eSKris Kennaway         BN_ULONG ta=(a),tb=(b),t0;      \
564f579bf8eSKris Kennaway         t1 = BN_UMULT_HIGH(ta,tb);      \
565f579bf8eSKris Kennaway         t0 = ta * tb;                   \
566751d2991SJung-uk Kim         c0 += t0; t2 = t1+((c0<t0)?1:0);\
567f579bf8eSKris Kennaway         c1 += t2; c2 += (c1<t2)?1:0;    \
568751d2991SJung-uk Kim         c0 += t0; t1 += (c0<t0)?1:0;    \
569751d2991SJung-uk Kim         c1 += t1; c2 += (c1<t1)?1:0;    \
570f579bf8eSKris Kennaway         }
571f579bf8eSKris Kennaway 
572f579bf8eSKris Kennaway #  define sqr_add_c(a,i,c0,c1,c2) {       \
573f579bf8eSKris Kennaway         BN_ULONG ta=(a)[i];             \
574f579bf8eSKris Kennaway         t1 = ta * ta;                   \
575f579bf8eSKris Kennaway         t2 = BN_UMULT_HIGH(ta,ta);      \
576f579bf8eSKris Kennaway         c0 += t1; t2 += (c0<t1)?1:0;    \
577f579bf8eSKris Kennaway         c1 += t2; c2 += (c1<t2)?1:0;    \
578f579bf8eSKris Kennaway         }
579f579bf8eSKris Kennaway 
580f579bf8eSKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2)      \
581f579bf8eSKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
582f579bf8eSKris Kennaway 
583f579bf8eSKris Kennaway # else                          /* !BN_LLONG */
58474664626SKris Kennaway #  define mul_add_c(a,b,c0,c1,c2) \
58574664626SKris Kennaway         t1=LBITS(a); t2=HBITS(a); \
58674664626SKris Kennaway         bl=LBITS(b); bh=HBITS(b); \
58774664626SKris Kennaway         mul64(t1,t2,bl,bh); \
58874664626SKris Kennaway         c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
58974664626SKris Kennaway         c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
59074664626SKris Kennaway 
59174664626SKris Kennaway #  define mul_add_c2(a,b,c0,c1,c2) \
59274664626SKris Kennaway         t1=LBITS(a); t2=HBITS(a); \
59374664626SKris Kennaway         bl=LBITS(b); bh=HBITS(b); \
59474664626SKris Kennaway         mul64(t1,t2,bl,bh); \
59574664626SKris Kennaway         if (t2 & BN_TBIT) c2++; \
59674664626SKris Kennaway         t2=(t2+t2)&BN_MASK2; \
59774664626SKris Kennaway         if (t1 & BN_TBIT) t2++; \
59874664626SKris Kennaway         t1=(t1+t1)&BN_MASK2; \
59974664626SKris Kennaway         c0=(c0+t1)&BN_MASK2;  \
60074664626SKris Kennaway         if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
60174664626SKris Kennaway         c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
60274664626SKris Kennaway 
60374664626SKris Kennaway #  define sqr_add_c(a,i,c0,c1,c2) \
60474664626SKris Kennaway         sqr64(t1,t2,(a)[i]); \
60574664626SKris Kennaway         c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
60674664626SKris Kennaway         c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
60774664626SKris Kennaway 
60874664626SKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2) \
60974664626SKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
610f579bf8eSKris Kennaway # endif                         /* !BN_LLONG */
61174664626SKris Kennaway 
61274664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
61374664626SKris Kennaway {
61474664626SKris Kennaway # ifdef BN_LLONG
61574664626SKris Kennaway     BN_ULLONG t;
61674664626SKris Kennaway # else
61774664626SKris Kennaway     BN_ULONG bl, bh;
61874664626SKris Kennaway # endif
61974664626SKris Kennaway     BN_ULONG t1, t2;
62074664626SKris Kennaway     BN_ULONG c1, c2, c3;
62174664626SKris Kennaway 
62274664626SKris Kennaway     c1 = 0;
62374664626SKris Kennaway     c2 = 0;
62474664626SKris Kennaway     c3 = 0;
62574664626SKris Kennaway     mul_add_c(a[0], b[0], c1, c2, c3);
62674664626SKris Kennaway     r[0] = c1;
62774664626SKris Kennaway     c1 = 0;
62874664626SKris Kennaway     mul_add_c(a[0], b[1], c2, c3, c1);
62974664626SKris Kennaway     mul_add_c(a[1], b[0], c2, c3, c1);
63074664626SKris Kennaway     r[1] = c2;
63174664626SKris Kennaway     c2 = 0;
63274664626SKris Kennaway     mul_add_c(a[2], b[0], c3, c1, c2);
63374664626SKris Kennaway     mul_add_c(a[1], b[1], c3, c1, c2);
63474664626SKris Kennaway     mul_add_c(a[0], b[2], c3, c1, c2);
63574664626SKris Kennaway     r[2] = c3;
63674664626SKris Kennaway     c3 = 0;
63774664626SKris Kennaway     mul_add_c(a[0], b[3], c1, c2, c3);
63874664626SKris Kennaway     mul_add_c(a[1], b[2], c1, c2, c3);
63974664626SKris Kennaway     mul_add_c(a[2], b[1], c1, c2, c3);
64074664626SKris Kennaway     mul_add_c(a[3], b[0], c1, c2, c3);
64174664626SKris Kennaway     r[3] = c1;
64274664626SKris Kennaway     c1 = 0;
64374664626SKris Kennaway     mul_add_c(a[4], b[0], c2, c3, c1);
64474664626SKris Kennaway     mul_add_c(a[3], b[1], c2, c3, c1);
64574664626SKris Kennaway     mul_add_c(a[2], b[2], c2, c3, c1);
64674664626SKris Kennaway     mul_add_c(a[1], b[3], c2, c3, c1);
64774664626SKris Kennaway     mul_add_c(a[0], b[4], c2, c3, c1);
64874664626SKris Kennaway     r[4] = c2;
64974664626SKris Kennaway     c2 = 0;
65074664626SKris Kennaway     mul_add_c(a[0], b[5], c3, c1, c2);
65174664626SKris Kennaway     mul_add_c(a[1], b[4], c3, c1, c2);
65274664626SKris Kennaway     mul_add_c(a[2], b[3], c3, c1, c2);
65374664626SKris Kennaway     mul_add_c(a[3], b[2], c3, c1, c2);
65474664626SKris Kennaway     mul_add_c(a[4], b[1], c3, c1, c2);
65574664626SKris Kennaway     mul_add_c(a[5], b[0], c3, c1, c2);
65674664626SKris Kennaway     r[5] = c3;
65774664626SKris Kennaway     c3 = 0;
65874664626SKris Kennaway     mul_add_c(a[6], b[0], c1, c2, c3);
65974664626SKris Kennaway     mul_add_c(a[5], b[1], c1, c2, c3);
66074664626SKris Kennaway     mul_add_c(a[4], b[2], c1, c2, c3);
66174664626SKris Kennaway     mul_add_c(a[3], b[3], c1, c2, c3);
66274664626SKris Kennaway     mul_add_c(a[2], b[4], c1, c2, c3);
66374664626SKris Kennaway     mul_add_c(a[1], b[5], c1, c2, c3);
66474664626SKris Kennaway     mul_add_c(a[0], b[6], c1, c2, c3);
66574664626SKris Kennaway     r[6] = c1;
66674664626SKris Kennaway     c1 = 0;
66774664626SKris Kennaway     mul_add_c(a[0], b[7], c2, c3, c1);
66874664626SKris Kennaway     mul_add_c(a[1], b[6], c2, c3, c1);
66974664626SKris Kennaway     mul_add_c(a[2], b[5], c2, c3, c1);
67074664626SKris Kennaway     mul_add_c(a[3], b[4], c2, c3, c1);
67174664626SKris Kennaway     mul_add_c(a[4], b[3], c2, c3, c1);
67274664626SKris Kennaway     mul_add_c(a[5], b[2], c2, c3, c1);
67374664626SKris Kennaway     mul_add_c(a[6], b[1], c2, c3, c1);
67474664626SKris Kennaway     mul_add_c(a[7], b[0], c2, c3, c1);
67574664626SKris Kennaway     r[7] = c2;
67674664626SKris Kennaway     c2 = 0;
67774664626SKris Kennaway     mul_add_c(a[7], b[1], c3, c1, c2);
67874664626SKris Kennaway     mul_add_c(a[6], b[2], c3, c1, c2);
67974664626SKris Kennaway     mul_add_c(a[5], b[3], c3, c1, c2);
68074664626SKris Kennaway     mul_add_c(a[4], b[4], c3, c1, c2);
68174664626SKris Kennaway     mul_add_c(a[3], b[5], c3, c1, c2);
68274664626SKris Kennaway     mul_add_c(a[2], b[6], c3, c1, c2);
68374664626SKris Kennaway     mul_add_c(a[1], b[7], c3, c1, c2);
68474664626SKris Kennaway     r[8] = c3;
68574664626SKris Kennaway     c3 = 0;
68674664626SKris Kennaway     mul_add_c(a[2], b[7], c1, c2, c3);
68774664626SKris Kennaway     mul_add_c(a[3], b[6], c1, c2, c3);
68874664626SKris Kennaway     mul_add_c(a[4], b[5], c1, c2, c3);
68974664626SKris Kennaway     mul_add_c(a[5], b[4], c1, c2, c3);
69074664626SKris Kennaway     mul_add_c(a[6], b[3], c1, c2, c3);
69174664626SKris Kennaway     mul_add_c(a[7], b[2], c1, c2, c3);
69274664626SKris Kennaway     r[9] = c1;
69374664626SKris Kennaway     c1 = 0;
69474664626SKris Kennaway     mul_add_c(a[7], b[3], c2, c3, c1);
69574664626SKris Kennaway     mul_add_c(a[6], b[4], c2, c3, c1);
69674664626SKris Kennaway     mul_add_c(a[5], b[5], c2, c3, c1);
69774664626SKris Kennaway     mul_add_c(a[4], b[6], c2, c3, c1);
69874664626SKris Kennaway     mul_add_c(a[3], b[7], c2, c3, c1);
69974664626SKris Kennaway     r[10] = c2;
70074664626SKris Kennaway     c2 = 0;
70174664626SKris Kennaway     mul_add_c(a[4], b[7], c3, c1, c2);
70274664626SKris Kennaway     mul_add_c(a[5], b[6], c3, c1, c2);
70374664626SKris Kennaway     mul_add_c(a[6], b[5], c3, c1, c2);
70474664626SKris Kennaway     mul_add_c(a[7], b[4], c3, c1, c2);
70574664626SKris Kennaway     r[11] = c3;
70674664626SKris Kennaway     c3 = 0;
70774664626SKris Kennaway     mul_add_c(a[7], b[5], c1, c2, c3);
70874664626SKris Kennaway     mul_add_c(a[6], b[6], c1, c2, c3);
70974664626SKris Kennaway     mul_add_c(a[5], b[7], c1, c2, c3);
71074664626SKris Kennaway     r[12] = c1;
71174664626SKris Kennaway     c1 = 0;
71274664626SKris Kennaway     mul_add_c(a[6], b[7], c2, c3, c1);
71374664626SKris Kennaway     mul_add_c(a[7], b[6], c2, c3, c1);
71474664626SKris Kennaway     r[13] = c2;
71574664626SKris Kennaway     c2 = 0;
71674664626SKris Kennaway     mul_add_c(a[7], b[7], c3, c1, c2);
71774664626SKris Kennaway     r[14] = c3;
71874664626SKris Kennaway     r[15] = c1;
71974664626SKris Kennaway }
72074664626SKris Kennaway 
72174664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
72274664626SKris Kennaway {
72374664626SKris Kennaway # ifdef BN_LLONG
72474664626SKris Kennaway     BN_ULLONG t;
72574664626SKris Kennaway # else
72674664626SKris Kennaway     BN_ULONG bl, bh;
72774664626SKris Kennaway # endif
72874664626SKris Kennaway     BN_ULONG t1, t2;
72974664626SKris Kennaway     BN_ULONG c1, c2, c3;
73074664626SKris Kennaway 
73174664626SKris Kennaway     c1 = 0;
73274664626SKris Kennaway     c2 = 0;
73374664626SKris Kennaway     c3 = 0;
73474664626SKris Kennaway     mul_add_c(a[0], b[0], c1, c2, c3);
73574664626SKris Kennaway     r[0] = c1;
73674664626SKris Kennaway     c1 = 0;
73774664626SKris Kennaway     mul_add_c(a[0], b[1], c2, c3, c1);
73874664626SKris Kennaway     mul_add_c(a[1], b[0], c2, c3, c1);
73974664626SKris Kennaway     r[1] = c2;
74074664626SKris Kennaway     c2 = 0;
74174664626SKris Kennaway     mul_add_c(a[2], b[0], c3, c1, c2);
74274664626SKris Kennaway     mul_add_c(a[1], b[1], c3, c1, c2);
74374664626SKris Kennaway     mul_add_c(a[0], b[2], c3, c1, c2);
74474664626SKris Kennaway     r[2] = c3;
74574664626SKris Kennaway     c3 = 0;
74674664626SKris Kennaway     mul_add_c(a[0], b[3], c1, c2, c3);
74774664626SKris Kennaway     mul_add_c(a[1], b[2], c1, c2, c3);
74874664626SKris Kennaway     mul_add_c(a[2], b[1], c1, c2, c3);
74974664626SKris Kennaway     mul_add_c(a[3], b[0], c1, c2, c3);
75074664626SKris Kennaway     r[3] = c1;
75174664626SKris Kennaway     c1 = 0;
75274664626SKris Kennaway     mul_add_c(a[3], b[1], c2, c3, c1);
75374664626SKris Kennaway     mul_add_c(a[2], b[2], c2, c3, c1);
75474664626SKris Kennaway     mul_add_c(a[1], b[3], c2, c3, c1);
75574664626SKris Kennaway     r[4] = c2;
75674664626SKris Kennaway     c2 = 0;
75774664626SKris Kennaway     mul_add_c(a[2], b[3], c3, c1, c2);
75874664626SKris Kennaway     mul_add_c(a[3], b[2], c3, c1, c2);
75974664626SKris Kennaway     r[5] = c3;
76074664626SKris Kennaway     c3 = 0;
76174664626SKris Kennaway     mul_add_c(a[3], b[3], c1, c2, c3);
76274664626SKris Kennaway     r[6] = c1;
76374664626SKris Kennaway     r[7] = c2;
76474664626SKris Kennaway }
76574664626SKris Kennaway 
7665c87c606SMark Murray void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
76774664626SKris Kennaway {
76874664626SKris Kennaway # ifdef BN_LLONG
76974664626SKris Kennaway     BN_ULLONG t, tt;
77074664626SKris Kennaway # else
77174664626SKris Kennaway     BN_ULONG bl, bh;
77274664626SKris Kennaway # endif
77374664626SKris Kennaway     BN_ULONG t1, t2;
77474664626SKris Kennaway     BN_ULONG c1, c2, c3;
77574664626SKris Kennaway 
77674664626SKris Kennaway     c1 = 0;
77774664626SKris Kennaway     c2 = 0;
77874664626SKris Kennaway     c3 = 0;
77974664626SKris Kennaway     sqr_add_c(a, 0, c1, c2, c3);
78074664626SKris Kennaway     r[0] = c1;
78174664626SKris Kennaway     c1 = 0;
78274664626SKris Kennaway     sqr_add_c2(a, 1, 0, c2, c3, c1);
78374664626SKris Kennaway     r[1] = c2;
78474664626SKris Kennaway     c2 = 0;
78574664626SKris Kennaway     sqr_add_c(a, 1, c3, c1, c2);
78674664626SKris Kennaway     sqr_add_c2(a, 2, 0, c3, c1, c2);
78774664626SKris Kennaway     r[2] = c3;
78874664626SKris Kennaway     c3 = 0;
78974664626SKris Kennaway     sqr_add_c2(a, 3, 0, c1, c2, c3);
79074664626SKris Kennaway     sqr_add_c2(a, 2, 1, c1, c2, c3);
79174664626SKris Kennaway     r[3] = c1;
79274664626SKris Kennaway     c1 = 0;
79374664626SKris Kennaway     sqr_add_c(a, 2, c2, c3, c1);
79474664626SKris Kennaway     sqr_add_c2(a, 3, 1, c2, c3, c1);
79574664626SKris Kennaway     sqr_add_c2(a, 4, 0, c2, c3, c1);
79674664626SKris Kennaway     r[4] = c2;
79774664626SKris Kennaway     c2 = 0;
79874664626SKris Kennaway     sqr_add_c2(a, 5, 0, c3, c1, c2);
79974664626SKris Kennaway     sqr_add_c2(a, 4, 1, c3, c1, c2);
80074664626SKris Kennaway     sqr_add_c2(a, 3, 2, c3, c1, c2);
80174664626SKris Kennaway     r[5] = c3;
80274664626SKris Kennaway     c3 = 0;
80374664626SKris Kennaway     sqr_add_c(a, 3, c1, c2, c3);
80474664626SKris Kennaway     sqr_add_c2(a, 4, 2, c1, c2, c3);
80574664626SKris Kennaway     sqr_add_c2(a, 5, 1, c1, c2, c3);
80674664626SKris Kennaway     sqr_add_c2(a, 6, 0, c1, c2, c3);
80774664626SKris Kennaway     r[6] = c1;
80874664626SKris Kennaway     c1 = 0;
80974664626SKris Kennaway     sqr_add_c2(a, 7, 0, c2, c3, c1);
81074664626SKris Kennaway     sqr_add_c2(a, 6, 1, c2, c3, c1);
81174664626SKris Kennaway     sqr_add_c2(a, 5, 2, c2, c3, c1);
81274664626SKris Kennaway     sqr_add_c2(a, 4, 3, c2, c3, c1);
81374664626SKris Kennaway     r[7] = c2;
81474664626SKris Kennaway     c2 = 0;
81574664626SKris Kennaway     sqr_add_c(a, 4, c3, c1, c2);
81674664626SKris Kennaway     sqr_add_c2(a, 5, 3, c3, c1, c2);
81774664626SKris Kennaway     sqr_add_c2(a, 6, 2, c3, c1, c2);
81874664626SKris Kennaway     sqr_add_c2(a, 7, 1, c3, c1, c2);
81974664626SKris Kennaway     r[8] = c3;
82074664626SKris Kennaway     c3 = 0;
82174664626SKris Kennaway     sqr_add_c2(a, 7, 2, c1, c2, c3);
82274664626SKris Kennaway     sqr_add_c2(a, 6, 3, c1, c2, c3);
82374664626SKris Kennaway     sqr_add_c2(a, 5, 4, c1, c2, c3);
82474664626SKris Kennaway     r[9] = c1;
82574664626SKris Kennaway     c1 = 0;
82674664626SKris Kennaway     sqr_add_c(a, 5, c2, c3, c1);
82774664626SKris Kennaway     sqr_add_c2(a, 6, 4, c2, c3, c1);
82874664626SKris Kennaway     sqr_add_c2(a, 7, 3, c2, c3, c1);
82974664626SKris Kennaway     r[10] = c2;
83074664626SKris Kennaway     c2 = 0;
83174664626SKris Kennaway     sqr_add_c2(a, 7, 4, c3, c1, c2);
83274664626SKris Kennaway     sqr_add_c2(a, 6, 5, c3, c1, c2);
83374664626SKris Kennaway     r[11] = c3;
83474664626SKris Kennaway     c3 = 0;
83574664626SKris Kennaway     sqr_add_c(a, 6, c1, c2, c3);
83674664626SKris Kennaway     sqr_add_c2(a, 7, 5, c1, c2, c3);
83774664626SKris Kennaway     r[12] = c1;
83874664626SKris Kennaway     c1 = 0;
83974664626SKris Kennaway     sqr_add_c2(a, 7, 6, c2, c3, c1);
84074664626SKris Kennaway     r[13] = c2;
84174664626SKris Kennaway     c2 = 0;
84274664626SKris Kennaway     sqr_add_c(a, 7, c3, c1, c2);
84374664626SKris Kennaway     r[14] = c3;
84474664626SKris Kennaway     r[15] = c1;
84574664626SKris Kennaway }
84674664626SKris Kennaway 
8475c87c606SMark Murray void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
84874664626SKris Kennaway {
84974664626SKris Kennaway # ifdef BN_LLONG
85074664626SKris Kennaway     BN_ULLONG t, tt;
85174664626SKris Kennaway # else
85274664626SKris Kennaway     BN_ULONG bl, bh;
85374664626SKris Kennaway # endif
85474664626SKris Kennaway     BN_ULONG t1, t2;
85574664626SKris Kennaway     BN_ULONG c1, c2, c3;
85674664626SKris Kennaway 
85774664626SKris Kennaway     c1 = 0;
85874664626SKris Kennaway     c2 = 0;
85974664626SKris Kennaway     c3 = 0;
86074664626SKris Kennaway     sqr_add_c(a, 0, c1, c2, c3);
86174664626SKris Kennaway     r[0] = c1;
86274664626SKris Kennaway     c1 = 0;
86374664626SKris Kennaway     sqr_add_c2(a, 1, 0, c2, c3, c1);
86474664626SKris Kennaway     r[1] = c2;
86574664626SKris Kennaway     c2 = 0;
86674664626SKris Kennaway     sqr_add_c(a, 1, c3, c1, c2);
86774664626SKris Kennaway     sqr_add_c2(a, 2, 0, c3, c1, c2);
86874664626SKris Kennaway     r[2] = c3;
86974664626SKris Kennaway     c3 = 0;
87074664626SKris Kennaway     sqr_add_c2(a, 3, 0, c1, c2, c3);
87174664626SKris Kennaway     sqr_add_c2(a, 2, 1, c1, c2, c3);
87274664626SKris Kennaway     r[3] = c1;
87374664626SKris Kennaway     c1 = 0;
87474664626SKris Kennaway     sqr_add_c(a, 2, c2, c3, c1);
87574664626SKris Kennaway     sqr_add_c2(a, 3, 1, c2, c3, c1);
87674664626SKris Kennaway     r[4] = c2;
87774664626SKris Kennaway     c2 = 0;
87874664626SKris Kennaway     sqr_add_c2(a, 3, 2, c3, c1, c2);
87974664626SKris Kennaway     r[5] = c3;
88074664626SKris Kennaway     c3 = 0;
88174664626SKris Kennaway     sqr_add_c(a, 3, c1, c2, c3);
88274664626SKris Kennaway     r[6] = c1;
88374664626SKris Kennaway     r[7] = c2;
88474664626SKris Kennaway }
8851f13597dSJung-uk Kim 
8861f13597dSJung-uk Kim # ifdef OPENSSL_NO_ASM
8871f13597dSJung-uk Kim #  ifdef OPENSSL_BN_ASM_MONT
8881f13597dSJung-uk Kim #   include <alloca.h>
8891f13597dSJung-uk Kim /*
8901f13597dSJung-uk Kim  * This is essentially reference implementation, which may or may not
8911f13597dSJung-uk Kim  * result in performance improvement. E.g. on IA-32 this routine was
8921f13597dSJung-uk Kim  * observed to give 40% faster rsa1024 private key operations and 10%
8931f13597dSJung-uk Kim  * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
8941f13597dSJung-uk Kim  * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
8951f13597dSJung-uk Kim  * reference implementation, one to be used as starting point for
8961f13597dSJung-uk Kim  * platform-specific assembler. Mentioned numbers apply to compiler
8971f13597dSJung-uk Kim  * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
8981f13597dSJung-uk Kim  * can vary not only from platform to platform, but even for compiler
8991f13597dSJung-uk Kim  * versions. Assembler vs. assembler improvement coefficients can
9001f13597dSJung-uk Kim  * [and are known to] differ and are to be documented elsewhere.
9011f13597dSJung-uk Kim  */
9026f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
9036f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0p, int num)
9041f13597dSJung-uk Kim {
9051f13597dSJung-uk Kim     BN_ULONG c0, c1, ml, *tp, n0;
9061f13597dSJung-uk Kim #   ifdef mul64
9071f13597dSJung-uk Kim     BN_ULONG mh;
9081f13597dSJung-uk Kim #   endif
9091f13597dSJung-uk Kim     volatile BN_ULONG *vp;
9101f13597dSJung-uk Kim     int i = 0, j;
9111f13597dSJung-uk Kim 
9126f9291ceSJung-uk Kim #   if 0                        /* template for platform-specific
9136f9291ceSJung-uk Kim                                  * implementation */
9146f9291ceSJung-uk Kim     if (ap == bp)
9156f9291ceSJung-uk Kim         return bn_sqr_mont(rp, ap, np, n0p, num);
9161f13597dSJung-uk Kim #   endif
9171f13597dSJung-uk Kim     vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
9181f13597dSJung-uk Kim 
9191f13597dSJung-uk Kim     n0 = *n0p;
9201f13597dSJung-uk Kim 
9211f13597dSJung-uk Kim     c0 = 0;
9221f13597dSJung-uk Kim     ml = bp[0];
9231f13597dSJung-uk Kim #   ifdef mul64
9241f13597dSJung-uk Kim     mh = HBITS(ml);
9251f13597dSJung-uk Kim     ml = LBITS(ml);
9261f13597dSJung-uk Kim     for (j = 0; j < num; ++j)
9271f13597dSJung-uk Kim         mul(tp[j], ap[j], ml, mh, c0);
9281f13597dSJung-uk Kim #   else
9291f13597dSJung-uk Kim     for (j = 0; j < num; ++j)
9301f13597dSJung-uk Kim         mul(tp[j], ap[j], ml, c0);
9311f13597dSJung-uk Kim #   endif
9321f13597dSJung-uk Kim 
9331f13597dSJung-uk Kim     tp[num] = c0;
9341f13597dSJung-uk Kim     tp[num + 1] = 0;
9351f13597dSJung-uk Kim     goto enter;
9361f13597dSJung-uk Kim 
9376f9291ceSJung-uk Kim     for (i = 0; i < num; i++) {
9381f13597dSJung-uk Kim         c0 = 0;
9391f13597dSJung-uk Kim         ml = bp[i];
9401f13597dSJung-uk Kim #   ifdef mul64
9411f13597dSJung-uk Kim         mh = HBITS(ml);
9421f13597dSJung-uk Kim         ml = LBITS(ml);
9431f13597dSJung-uk Kim         for (j = 0; j < num; ++j)
9441f13597dSJung-uk Kim             mul_add(tp[j], ap[j], ml, mh, c0);
9451f13597dSJung-uk Kim #   else
9461f13597dSJung-uk Kim         for (j = 0; j < num; ++j)
9471f13597dSJung-uk Kim             mul_add(tp[j], ap[j], ml, c0);
9481f13597dSJung-uk Kim #   endif
9491f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
9501f13597dSJung-uk Kim         tp[num] = c1;
9511f13597dSJung-uk Kim         tp[num + 1] = (c1 < c0 ? 1 : 0);
9521f13597dSJung-uk Kim  enter:
9531f13597dSJung-uk Kim         c1 = tp[0];
9541f13597dSJung-uk Kim         ml = (c1 * n0) & BN_MASK2;
9551f13597dSJung-uk Kim         c0 = 0;
9561f13597dSJung-uk Kim #   ifdef mul64
9571f13597dSJung-uk Kim         mh = HBITS(ml);
9581f13597dSJung-uk Kim         ml = LBITS(ml);
9591f13597dSJung-uk Kim         mul_add(c1, np[0], ml, mh, c0);
9601f13597dSJung-uk Kim #   else
9611f13597dSJung-uk Kim         mul_add(c1, ml, np[0], c0);
9621f13597dSJung-uk Kim #   endif
9636f9291ceSJung-uk Kim         for (j = 1; j < num; j++) {
9641f13597dSJung-uk Kim             c1 = tp[j];
9651f13597dSJung-uk Kim #   ifdef mul64
9661f13597dSJung-uk Kim             mul_add(c1, np[j], ml, mh, c0);
9671f13597dSJung-uk Kim #   else
9681f13597dSJung-uk Kim             mul_add(c1, ml, np[j], c0);
9691f13597dSJung-uk Kim #   endif
9701f13597dSJung-uk Kim             tp[j - 1] = c1 & BN_MASK2;
9711f13597dSJung-uk Kim         }
9721f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
9731f13597dSJung-uk Kim         tp[num - 1] = c1;
9741f13597dSJung-uk Kim         tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
9751f13597dSJung-uk Kim     }
9761f13597dSJung-uk Kim 
9776f9291ceSJung-uk Kim     if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
9781f13597dSJung-uk Kim         c0 = bn_sub_words(rp, tp, np, num);
9796f9291ceSJung-uk Kim         if (tp[num] != 0 || c0 == 0) {
9806f9291ceSJung-uk Kim             for (i = 0; i < num + 2; i++)
9816f9291ceSJung-uk Kim                 vp[i] = 0;
9821f13597dSJung-uk Kim             return 1;
9831f13597dSJung-uk Kim         }
9841f13597dSJung-uk Kim     }
9856f9291ceSJung-uk Kim     for (i = 0; i < num; i++)
9866f9291ceSJung-uk Kim         rp[i] = tp[i], vp[i] = 0;
9871f13597dSJung-uk Kim     vp[num] = 0;
9881f13597dSJung-uk Kim     vp[num + 1] = 0;
9891f13597dSJung-uk Kim     return 1;
9901f13597dSJung-uk Kim }
9911f13597dSJung-uk Kim #  else
9921f13597dSJung-uk Kim /*
9931f13597dSJung-uk Kim  * Return value of 0 indicates that multiplication/convolution was not
9941f13597dSJung-uk Kim  * performed to signal the caller to fall down to alternative/original
9951f13597dSJung-uk Kim  * code-path.
9961f13597dSJung-uk Kim  */
9976f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
9986f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0, int num)
9996f9291ceSJung-uk Kim {
10006f9291ceSJung-uk Kim     return 0;
10016f9291ceSJung-uk Kim }
10021f13597dSJung-uk Kim #  endif                        /* OPENSSL_BN_ASM_MONT */
10031f13597dSJung-uk Kim # endif
10041f13597dSJung-uk Kim 
1005f579bf8eSKris Kennaway #else                           /* !BN_MUL_COMBA */
100674664626SKris Kennaway 
100774664626SKris Kennaway /* hmm... is it faster just to do a multiply? */
100874664626SKris Kennaway # undef bn_sqr_comba4
10091f13597dSJung-uk Kim void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
101074664626SKris Kennaway {
101174664626SKris Kennaway     BN_ULONG t[8];
101274664626SKris Kennaway     bn_sqr_normal(r, a, 4, t);
101374664626SKris Kennaway }
101474664626SKris Kennaway 
101574664626SKris Kennaway # undef bn_sqr_comba8
10161f13597dSJung-uk Kim void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
101774664626SKris Kennaway {
101874664626SKris Kennaway     BN_ULONG t[16];
101974664626SKris Kennaway     bn_sqr_normal(r, a, 8, t);
102074664626SKris Kennaway }
102174664626SKris Kennaway 
102274664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
102374664626SKris Kennaway {
102474664626SKris Kennaway     r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
102574664626SKris Kennaway     r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
102674664626SKris Kennaway     r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]);
102774664626SKris Kennaway     r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
102874664626SKris Kennaway }
102974664626SKris Kennaway 
103074664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
103174664626SKris Kennaway {
103274664626SKris Kennaway     r[8] = bn_mul_words(&(r[0]), a, 8, b[0]);
103374664626SKris Kennaway     r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
103474664626SKris Kennaway     r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]);
103574664626SKris Kennaway     r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]);
103674664626SKris Kennaway     r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]);
103774664626SKris Kennaway     r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]);
103874664626SKris Kennaway     r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]);
103974664626SKris Kennaway     r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]);
104074664626SKris Kennaway }
104174664626SKris Kennaway 
10421f13597dSJung-uk Kim # ifdef OPENSSL_NO_ASM
10431f13597dSJung-uk Kim #  ifdef OPENSSL_BN_ASM_MONT
10441f13597dSJung-uk Kim #   include <alloca.h>
10456f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
10466f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0p, int num)
10471f13597dSJung-uk Kim {
10481f13597dSJung-uk Kim     BN_ULONG c0, c1, *tp, n0 = *n0p;
10491f13597dSJung-uk Kim     volatile BN_ULONG *vp;
10501f13597dSJung-uk Kim     int i = 0, j;
10511f13597dSJung-uk Kim 
10521f13597dSJung-uk Kim     vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
10531f13597dSJung-uk Kim 
10546f9291ceSJung-uk Kim     for (i = 0; i <= num; i++)
10556f9291ceSJung-uk Kim         tp[i] = 0;
10561f13597dSJung-uk Kim 
10576f9291ceSJung-uk Kim     for (i = 0; i < num; i++) {
10581f13597dSJung-uk Kim         c0 = bn_mul_add_words(tp, ap, num, bp[i]);
10591f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
10601f13597dSJung-uk Kim         tp[num] = c1;
10611f13597dSJung-uk Kim         tp[num + 1] = (c1 < c0 ? 1 : 0);
10621f13597dSJung-uk Kim 
10631f13597dSJung-uk Kim         c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
10641f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
10651f13597dSJung-uk Kim         tp[num] = c1;
10661f13597dSJung-uk Kim         tp[num + 1] += (c1 < c0 ? 1 : 0);
10676f9291ceSJung-uk Kim         for (j = 0; j <= num; j++)
10686f9291ceSJung-uk Kim             tp[j] = tp[j + 1];
10691f13597dSJung-uk Kim     }
10701f13597dSJung-uk Kim 
10716f9291ceSJung-uk Kim     if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
10721f13597dSJung-uk Kim         c0 = bn_sub_words(rp, tp, np, num);
10736f9291ceSJung-uk Kim         if (tp[num] != 0 || c0 == 0) {
10746f9291ceSJung-uk Kim             for (i = 0; i < num + 2; i++)
10756f9291ceSJung-uk Kim                 vp[i] = 0;
10761f13597dSJung-uk Kim             return 1;
10771f13597dSJung-uk Kim         }
10781f13597dSJung-uk Kim     }
10796f9291ceSJung-uk Kim     for (i = 0; i < num; i++)
10806f9291ceSJung-uk Kim         rp[i] = tp[i], vp[i] = 0;
10811f13597dSJung-uk Kim     vp[num] = 0;
10821f13597dSJung-uk Kim     vp[num + 1] = 0;
10831f13597dSJung-uk Kim     return 1;
10841f13597dSJung-uk Kim }
10851f13597dSJung-uk Kim #  else
10866f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
10876f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0, int num)
10886f9291ceSJung-uk Kim {
10896f9291ceSJung-uk Kim     return 0;
10906f9291ceSJung-uk Kim }
10911f13597dSJung-uk Kim #  endif                        /* OPENSSL_BN_ASM_MONT */
10921f13597dSJung-uk Kim # endif
10931f13597dSJung-uk Kim 
1094f579bf8eSKris Kennaway #endif                          /* !BN_MUL_COMBA */
1095