xref: /freebsd/crypto/openssl/crypto/bn/bn_asm.c (revision 7bded2db)
174664626SKris Kennaway /* crypto/bn/bn_asm.c */
274664626SKris Kennaway /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
374664626SKris Kennaway  * All rights reserved.
474664626SKris Kennaway  *
574664626SKris Kennaway  * This package is an SSL implementation written
674664626SKris Kennaway  * by Eric Young (eay@cryptsoft.com).
774664626SKris Kennaway  * The implementation was written so as to conform with Netscapes SSL.
874664626SKris Kennaway  *
974664626SKris Kennaway  * This library is free for commercial and non-commercial use as long as
1074664626SKris Kennaway  * the following conditions are aheared to.  The following conditions
1174664626SKris Kennaway  * apply to all code found in this distribution, be it the RC4, RSA,
1274664626SKris Kennaway  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
1374664626SKris Kennaway  * included with this distribution is covered by the same copyright terms
1474664626SKris Kennaway  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
1574664626SKris Kennaway  *
1674664626SKris Kennaway  * Copyright remains Eric Young's, and as such any Copyright notices in
1774664626SKris Kennaway  * the code are not to be removed.
1874664626SKris Kennaway  * If this package is used in a product, Eric Young should be given attribution
1974664626SKris Kennaway  * as the author of the parts of the library used.
2074664626SKris Kennaway  * This can be in the form of a textual message at program startup or
2174664626SKris Kennaway  * in documentation (online or textual) provided with the package.
2274664626SKris Kennaway  *
2374664626SKris Kennaway  * Redistribution and use in source and binary forms, with or without
2474664626SKris Kennaway  * modification, are permitted provided that the following conditions
2574664626SKris Kennaway  * are met:
2674664626SKris Kennaway  * 1. Redistributions of source code must retain the copyright
2774664626SKris Kennaway  *    notice, this list of conditions and the following disclaimer.
2874664626SKris Kennaway  * 2. Redistributions in binary form must reproduce the above copyright
2974664626SKris Kennaway  *    notice, this list of conditions and the following disclaimer in the
3074664626SKris Kennaway  *    documentation and/or other materials provided with the distribution.
3174664626SKris Kennaway  * 3. All advertising materials mentioning features or use of this software
3274664626SKris Kennaway  *    must display the following acknowledgement:
3374664626SKris Kennaway  *    "This product includes cryptographic software written by
3474664626SKris Kennaway  *     Eric Young (eay@cryptsoft.com)"
3574664626SKris Kennaway  *    The word 'cryptographic' can be left out if the rouines from the library
3674664626SKris Kennaway  *    being used are not cryptographic related :-).
3774664626SKris Kennaway  * 4. If you include any Windows specific code (or a derivative thereof) from
3874664626SKris Kennaway  *    the apps directory (application code) you must include an acknowledgement:
3974664626SKris Kennaway  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
4074664626SKris Kennaway  *
4174664626SKris Kennaway  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
4274664626SKris Kennaway  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4374664626SKris Kennaway  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
4474664626SKris Kennaway  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
4574664626SKris Kennaway  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
4674664626SKris Kennaway  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
4774664626SKris Kennaway  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
4874664626SKris Kennaway  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
4974664626SKris Kennaway  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
5074664626SKris Kennaway  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
5174664626SKris Kennaway  * SUCH DAMAGE.
5274664626SKris Kennaway  *
5374664626SKris Kennaway  * The licence and distribution terms for any publically available version or
5474664626SKris Kennaway  * derivative of this code cannot be changed.  i.e. this code cannot simply be
5574664626SKris Kennaway  * copied and put under another distribution licence
5674664626SKris Kennaway  * [including the GNU Public Licence.]
5774664626SKris Kennaway  */
5874664626SKris Kennaway 
59f579bf8eSKris Kennaway #ifndef BN_DEBUG
60f579bf8eSKris Kennaway # undef NDEBUG                  /* avoid conflicting definitions */
61f579bf8eSKris Kennaway # define NDEBUG
62f579bf8eSKris Kennaway #endif
63f579bf8eSKris Kennaway 
6474664626SKris Kennaway #include <stdio.h>
65f579bf8eSKris Kennaway #include <assert.h>
6674664626SKris Kennaway #include "cryptlib.h"
6774664626SKris Kennaway #include "bn_lcl.h"
6874664626SKris Kennaway 
69f579bf8eSKris Kennaway #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
7074664626SKris Kennaway 
716f9291ceSJung-uk Kim BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
726f9291ceSJung-uk Kim                           BN_ULONG w)
7374664626SKris Kennaway {
7474664626SKris Kennaway     BN_ULONG c1 = 0;
7574664626SKris Kennaway 
76f579bf8eSKris Kennaway     assert(num >= 0);
776f9291ceSJung-uk Kim     if (num <= 0)
786f9291ceSJung-uk Kim         return (c1);
7974664626SKris Kennaway 
801f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
816f9291ceSJung-uk Kim     while (num & ~3) {
8274664626SKris Kennaway         mul_add(rp[0], ap[0], w, c1);
8374664626SKris Kennaway         mul_add(rp[1], ap[1], w, c1);
8474664626SKris Kennaway         mul_add(rp[2], ap[2], w, c1);
8574664626SKris Kennaway         mul_add(rp[3], ap[3], w, c1);
866f9291ceSJung-uk Kim         ap += 4;
876f9291ceSJung-uk Kim         rp += 4;
886f9291ceSJung-uk Kim         num -= 4;
89f579bf8eSKris Kennaway     }
901f13597dSJung-uk Kim # endif
916f9291ceSJung-uk Kim     while (num) {
921f13597dSJung-uk Kim         mul_add(rp[0], ap[0], w, c1);
936f9291ceSJung-uk Kim         ap++;
946f9291ceSJung-uk Kim         rp++;
956f9291ceSJung-uk Kim         num--;
9674664626SKris Kennaway     }
9774664626SKris Kennaway 
9874664626SKris Kennaway     return (c1);
9974664626SKris Kennaway }
10074664626SKris Kennaway 
1015c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
10274664626SKris Kennaway {
10374664626SKris Kennaway     BN_ULONG c1 = 0;
10474664626SKris Kennaway 
105f579bf8eSKris Kennaway     assert(num >= 0);
1066f9291ceSJung-uk Kim     if (num <= 0)
1076f9291ceSJung-uk Kim         return (c1);
10874664626SKris Kennaway 
1091f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1106f9291ceSJung-uk Kim     while (num & ~3) {
11174664626SKris Kennaway         mul(rp[0], ap[0], w, c1);
11274664626SKris Kennaway         mul(rp[1], ap[1], w, c1);
11374664626SKris Kennaway         mul(rp[2], ap[2], w, c1);
11474664626SKris Kennaway         mul(rp[3], ap[3], w, c1);
1156f9291ceSJung-uk Kim         ap += 4;
1166f9291ceSJung-uk Kim         rp += 4;
1176f9291ceSJung-uk Kim         num -= 4;
118f579bf8eSKris Kennaway     }
1191f13597dSJung-uk Kim # endif
1206f9291ceSJung-uk Kim     while (num) {
1211f13597dSJung-uk Kim         mul(rp[0], ap[0], w, c1);
1226f9291ceSJung-uk Kim         ap++;
1236f9291ceSJung-uk Kim         rp++;
1246f9291ceSJung-uk Kim         num--;
12574664626SKris Kennaway     }
12674664626SKris Kennaway     return (c1);
12774664626SKris Kennaway }
12874664626SKris Kennaway 
1295c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
13074664626SKris Kennaway {
131f579bf8eSKris Kennaway     assert(n >= 0);
1326f9291ceSJung-uk Kim     if (n <= 0)
1336f9291ceSJung-uk Kim         return;
1341f13597dSJung-uk Kim 
1351f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1366f9291ceSJung-uk Kim     while (n & ~3) {
137f579bf8eSKris Kennaway         sqr(r[0], r[1], a[0]);
138f579bf8eSKris Kennaway         sqr(r[2], r[3], a[1]);
139f579bf8eSKris Kennaway         sqr(r[4], r[5], a[2]);
140f579bf8eSKris Kennaway         sqr(r[6], r[7], a[3]);
1416f9291ceSJung-uk Kim         a += 4;
1426f9291ceSJung-uk Kim         r += 8;
1436f9291ceSJung-uk Kim         n -= 4;
144f579bf8eSKris Kennaway     }
1451f13597dSJung-uk Kim # endif
1466f9291ceSJung-uk Kim     while (n) {
1471f13597dSJung-uk Kim         sqr(r[0], r[1], a[0]);
1486f9291ceSJung-uk Kim         a++;
1496f9291ceSJung-uk Kim         r += 2;
1506f9291ceSJung-uk Kim         n--;
15174664626SKris Kennaway     }
15274664626SKris Kennaway }
15374664626SKris Kennaway 
1546f9291ceSJung-uk Kim #else                           /* !(defined(BN_LLONG) ||
1556f9291ceSJung-uk Kim                                  * defined(BN_UMULT_HIGH)) */
15674664626SKris Kennaway 
1576f9291ceSJung-uk Kim BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
1586f9291ceSJung-uk Kim                           BN_ULONG w)
15974664626SKris Kennaway {
16074664626SKris Kennaway     BN_ULONG c = 0;
16174664626SKris Kennaway     BN_ULONG bl, bh;
16274664626SKris Kennaway 
163f579bf8eSKris Kennaway     assert(num >= 0);
1646f9291ceSJung-uk Kim     if (num <= 0)
1656f9291ceSJung-uk Kim         return ((BN_ULONG)0);
16674664626SKris Kennaway 
16774664626SKris Kennaway     bl = LBITS(w);
16874664626SKris Kennaway     bh = HBITS(w);
16974664626SKris Kennaway 
1701f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1716f9291ceSJung-uk Kim     while (num & ~3) {
17274664626SKris Kennaway         mul_add(rp[0], ap[0], bl, bh, c);
17374664626SKris Kennaway         mul_add(rp[1], ap[1], bl, bh, c);
17474664626SKris Kennaway         mul_add(rp[2], ap[2], bl, bh, c);
17574664626SKris Kennaway         mul_add(rp[3], ap[3], bl, bh, c);
1766f9291ceSJung-uk Kim         ap += 4;
1776f9291ceSJung-uk Kim         rp += 4;
1786f9291ceSJung-uk Kim         num -= 4;
1791f13597dSJung-uk Kim     }
1801f13597dSJung-uk Kim # endif
1816f9291ceSJung-uk Kim     while (num) {
1821f13597dSJung-uk Kim         mul_add(rp[0], ap[0], bl, bh, c);
1836f9291ceSJung-uk Kim         ap++;
1846f9291ceSJung-uk Kim         rp++;
1856f9291ceSJung-uk Kim         num--;
18674664626SKris Kennaway     }
18774664626SKris Kennaway     return (c);
18874664626SKris Kennaway }
18974664626SKris Kennaway 
1905c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
19174664626SKris Kennaway {
19274664626SKris Kennaway     BN_ULONG carry = 0;
19374664626SKris Kennaway     BN_ULONG bl, bh;
19474664626SKris Kennaway 
195f579bf8eSKris Kennaway     assert(num >= 0);
1966f9291ceSJung-uk Kim     if (num <= 0)
1976f9291ceSJung-uk Kim         return ((BN_ULONG)0);
19874664626SKris Kennaway 
19974664626SKris Kennaway     bl = LBITS(w);
20074664626SKris Kennaway     bh = HBITS(w);
20174664626SKris Kennaway 
2021f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
2036f9291ceSJung-uk Kim     while (num & ~3) {
20474664626SKris Kennaway         mul(rp[0], ap[0], bl, bh, carry);
20574664626SKris Kennaway         mul(rp[1], ap[1], bl, bh, carry);
20674664626SKris Kennaway         mul(rp[2], ap[2], bl, bh, carry);
20774664626SKris Kennaway         mul(rp[3], ap[3], bl, bh, carry);
2086f9291ceSJung-uk Kim         ap += 4;
2096f9291ceSJung-uk Kim         rp += 4;
2106f9291ceSJung-uk Kim         num -= 4;
2111f13597dSJung-uk Kim     }
2121f13597dSJung-uk Kim # endif
2136f9291ceSJung-uk Kim     while (num) {
2141f13597dSJung-uk Kim         mul(rp[0], ap[0], bl, bh, carry);
2156f9291ceSJung-uk Kim         ap++;
2166f9291ceSJung-uk Kim         rp++;
2176f9291ceSJung-uk Kim         num--;
21874664626SKris Kennaway     }
21974664626SKris Kennaway     return (carry);
22074664626SKris Kennaway }
22174664626SKris Kennaway 
2225c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
22374664626SKris Kennaway {
224f579bf8eSKris Kennaway     assert(n >= 0);
2256f9291ceSJung-uk Kim     if (n <= 0)
2266f9291ceSJung-uk Kim         return;
2271f13597dSJung-uk Kim 
2281f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
2296f9291ceSJung-uk Kim     while (n & ~3) {
23074664626SKris Kennaway         sqr64(r[0], r[1], a[0]);
23174664626SKris Kennaway         sqr64(r[2], r[3], a[1]);
23274664626SKris Kennaway         sqr64(r[4], r[5], a[2]);
23374664626SKris Kennaway         sqr64(r[6], r[7], a[3]);
2346f9291ceSJung-uk Kim         a += 4;
2356f9291ceSJung-uk Kim         r += 8;
2366f9291ceSJung-uk Kim         n -= 4;
2371f13597dSJung-uk Kim     }
2381f13597dSJung-uk Kim # endif
2396f9291ceSJung-uk Kim     while (n) {
2401f13597dSJung-uk Kim         sqr64(r[0], r[1], a[0]);
2416f9291ceSJung-uk Kim         a++;
2426f9291ceSJung-uk Kim         r += 2;
2436f9291ceSJung-uk Kim         n--;
24474664626SKris Kennaway     }
24574664626SKris Kennaway }
24674664626SKris Kennaway 
2476f9291ceSJung-uk Kim #endif                          /* !(defined(BN_LLONG) ||
2486f9291ceSJung-uk Kim                                  * defined(BN_UMULT_HIGH)) */
24974664626SKris Kennaway 
25074664626SKris Kennaway #if defined(BN_LLONG) && defined(BN_DIV2W)
25174664626SKris Kennaway 
25274664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
25374664626SKris Kennaway {
25474664626SKris Kennaway     return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d));
25574664626SKris Kennaway }
25674664626SKris Kennaway 
25774664626SKris Kennaway #else
25874664626SKris Kennaway 
259ddd58736SKris Kennaway /* Divide h,l by d and return the result. */
26074664626SKris Kennaway /* I need to test this some more :-( */
26174664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
26274664626SKris Kennaway {
26374664626SKris Kennaway     BN_ULONG dh, dl, q, ret = 0, th, tl, t;
26474664626SKris Kennaway     int i, count = 2;
26574664626SKris Kennaway 
2666f9291ceSJung-uk Kim     if (d == 0)
2676f9291ceSJung-uk Kim         return (BN_MASK2);
26874664626SKris Kennaway 
26974664626SKris Kennaway     i = BN_num_bits_word(d);
2703b4e3dcbSSimon L. B. Nielsen     assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
271ddd58736SKris Kennaway 
27274664626SKris Kennaway     i = BN_BITS2 - i;
2736f9291ceSJung-uk Kim     if (h >= d)
2746f9291ceSJung-uk Kim         h -= d;
27574664626SKris Kennaway 
2766f9291ceSJung-uk Kim     if (i) {
27774664626SKris Kennaway         d <<= i;
27874664626SKris Kennaway         h = (h << i) | (l >> (BN_BITS2 - i));
27974664626SKris Kennaway         l <<= i;
28074664626SKris Kennaway     }
28174664626SKris Kennaway     dh = (d & BN_MASK2h) >> BN_BITS4;
28274664626SKris Kennaway     dl = (d & BN_MASK2l);
2836f9291ceSJung-uk Kim     for (;;) {
28474664626SKris Kennaway         if ((h >> BN_BITS4) == dh)
28574664626SKris Kennaway             q = BN_MASK2l;
28674664626SKris Kennaway         else
28774664626SKris Kennaway             q = h / dh;
28874664626SKris Kennaway 
28974664626SKris Kennaway         th = q * dh;
29074664626SKris Kennaway         tl = dl * q;
2916f9291ceSJung-uk Kim         for (;;) {
29274664626SKris Kennaway             t = h - th;
29374664626SKris Kennaway             if ((t & BN_MASK2h) ||
2946f9291ceSJung-uk Kim                 ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4))))
29574664626SKris Kennaway                 break;
29674664626SKris Kennaway             q--;
29774664626SKris Kennaway             th -= dh;
29874664626SKris Kennaway             tl -= dl;
29974664626SKris Kennaway         }
30074664626SKris Kennaway         t = (tl >> BN_BITS4);
30174664626SKris Kennaway         tl = (tl << BN_BITS4) & BN_MASK2h;
30274664626SKris Kennaway         th += t;
30374664626SKris Kennaway 
3046f9291ceSJung-uk Kim         if (l < tl)
3056f9291ceSJung-uk Kim             th++;
30674664626SKris Kennaway         l -= tl;
3076f9291ceSJung-uk Kim         if (h < th) {
30874664626SKris Kennaway             h += d;
30974664626SKris Kennaway             q--;
31074664626SKris Kennaway         }
31174664626SKris Kennaway         h -= th;
31274664626SKris Kennaway 
3136f9291ceSJung-uk Kim         if (--count == 0)
3146f9291ceSJung-uk Kim             break;
31574664626SKris Kennaway 
31674664626SKris Kennaway         ret = q << BN_BITS4;
31774664626SKris Kennaway         h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
31874664626SKris Kennaway         l = (l & BN_MASK2l) << BN_BITS4;
31974664626SKris Kennaway     }
32074664626SKris Kennaway     ret |= q;
32174664626SKris Kennaway     return (ret);
32274664626SKris Kennaway }
323f579bf8eSKris Kennaway #endif                          /* !defined(BN_LLONG) && defined(BN_DIV2W) */
32474664626SKris Kennaway 
32574664626SKris Kennaway #ifdef BN_LLONG
3266f9291ceSJung-uk Kim BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
3276f9291ceSJung-uk Kim                       int n)
32874664626SKris Kennaway {
32974664626SKris Kennaway     BN_ULLONG ll = 0;
33074664626SKris Kennaway 
331f579bf8eSKris Kennaway     assert(n >= 0);
3326f9291ceSJung-uk Kim     if (n <= 0)
3336f9291ceSJung-uk Kim         return ((BN_ULONG)0);
33474664626SKris Kennaway 
3351f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
3366f9291ceSJung-uk Kim     while (n & ~3) {
33774664626SKris Kennaway         ll += (BN_ULLONG) a[0] + b[0];
33874664626SKris Kennaway         r[0] = (BN_ULONG)ll & BN_MASK2;
33974664626SKris Kennaway         ll >>= BN_BITS2;
34074664626SKris Kennaway         ll += (BN_ULLONG) a[1] + b[1];
34174664626SKris Kennaway         r[1] = (BN_ULONG)ll & BN_MASK2;
34274664626SKris Kennaway         ll >>= BN_BITS2;
34374664626SKris Kennaway         ll += (BN_ULLONG) a[2] + b[2];
34474664626SKris Kennaway         r[2] = (BN_ULONG)ll & BN_MASK2;
34574664626SKris Kennaway         ll >>= BN_BITS2;
34674664626SKris Kennaway         ll += (BN_ULLONG) a[3] + b[3];
34774664626SKris Kennaway         r[3] = (BN_ULONG)ll & BN_MASK2;
34874664626SKris Kennaway         ll >>= BN_BITS2;
3496f9291ceSJung-uk Kim         a += 4;
3506f9291ceSJung-uk Kim         b += 4;
3516f9291ceSJung-uk Kim         r += 4;
3526f9291ceSJung-uk Kim         n -= 4;
3531f13597dSJung-uk Kim     }
3541f13597dSJung-uk Kim # endif
3556f9291ceSJung-uk Kim     while (n) {
3561f13597dSJung-uk Kim         ll += (BN_ULLONG) a[0] + b[0];
3571f13597dSJung-uk Kim         r[0] = (BN_ULONG)ll & BN_MASK2;
3581f13597dSJung-uk Kim         ll >>= BN_BITS2;
3596f9291ceSJung-uk Kim         a++;
3606f9291ceSJung-uk Kim         b++;
3616f9291ceSJung-uk Kim         r++;
3626f9291ceSJung-uk Kim         n--;
36374664626SKris Kennaway     }
36474664626SKris Kennaway     return ((BN_ULONG)ll);
36574664626SKris Kennaway }
366f579bf8eSKris Kennaway #else                           /* !BN_LLONG */
3676f9291ceSJung-uk Kim BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
3686f9291ceSJung-uk Kim                       int n)
36974664626SKris Kennaway {
37074664626SKris Kennaway     BN_ULONG c, l, t;
37174664626SKris Kennaway 
372f579bf8eSKris Kennaway     assert(n >= 0);
3736f9291ceSJung-uk Kim     if (n <= 0)
3746f9291ceSJung-uk Kim         return ((BN_ULONG)0);
37574664626SKris Kennaway 
37674664626SKris Kennaway     c = 0;
3771f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
3786f9291ceSJung-uk Kim     while (n & ~3) {
37974664626SKris Kennaway         t = a[0];
38074664626SKris Kennaway         t = (t + c) & BN_MASK2;
38174664626SKris Kennaway         c = (t < c);
38274664626SKris Kennaway         l = (t + b[0]) & BN_MASK2;
38374664626SKris Kennaway         c += (l < t);
38474664626SKris Kennaway         r[0] = l;
38574664626SKris Kennaway         t = a[1];
38674664626SKris Kennaway         t = (t + c) & BN_MASK2;
38774664626SKris Kennaway         c = (t < c);
38874664626SKris Kennaway         l = (t + b[1]) & BN_MASK2;
38974664626SKris Kennaway         c += (l < t);
39074664626SKris Kennaway         r[1] = l;
39174664626SKris Kennaway         t = a[2];
39274664626SKris Kennaway         t = (t + c) & BN_MASK2;
39374664626SKris Kennaway         c = (t < c);
39474664626SKris Kennaway         l = (t + b[2]) & BN_MASK2;
39574664626SKris Kennaway         c += (l < t);
39674664626SKris Kennaway         r[2] = l;
39774664626SKris Kennaway         t = a[3];
39874664626SKris Kennaway         t = (t + c) & BN_MASK2;
39974664626SKris Kennaway         c = (t < c);
40074664626SKris Kennaway         l = (t + b[3]) & BN_MASK2;
40174664626SKris Kennaway         c += (l < t);
40274664626SKris Kennaway         r[3] = l;
4036f9291ceSJung-uk Kim         a += 4;
4046f9291ceSJung-uk Kim         b += 4;
4056f9291ceSJung-uk Kim         r += 4;
4066f9291ceSJung-uk Kim         n -= 4;
4071f13597dSJung-uk Kim     }
4081f13597dSJung-uk Kim # endif
4096f9291ceSJung-uk Kim     while (n) {
4101f13597dSJung-uk Kim         t = a[0];
4111f13597dSJung-uk Kim         t = (t + c) & BN_MASK2;
4121f13597dSJung-uk Kim         c = (t < c);
4131f13597dSJung-uk Kim         l = (t + b[0]) & BN_MASK2;
4141f13597dSJung-uk Kim         c += (l < t);
4151f13597dSJung-uk Kim         r[0] = l;
4166f9291ceSJung-uk Kim         a++;
4176f9291ceSJung-uk Kim         b++;
4186f9291ceSJung-uk Kim         r++;
4196f9291ceSJung-uk Kim         n--;
42074664626SKris Kennaway     }
42174664626SKris Kennaway     return ((BN_ULONG)c);
42274664626SKris Kennaway }
423f579bf8eSKris Kennaway #endif                          /* !BN_LLONG */
42474664626SKris Kennaway 
4256f9291ceSJung-uk Kim BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
4266f9291ceSJung-uk Kim                       int n)
42774664626SKris Kennaway {
42874664626SKris Kennaway     BN_ULONG t1, t2;
42974664626SKris Kennaway     int c = 0;
43074664626SKris Kennaway 
431f579bf8eSKris Kennaway     assert(n >= 0);
4326f9291ceSJung-uk Kim     if (n <= 0)
4336f9291ceSJung-uk Kim         return ((BN_ULONG)0);
43474664626SKris Kennaway 
4351f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT
4366f9291ceSJung-uk Kim     while (n & ~3) {
4376f9291ceSJung-uk Kim         t1 = a[0];
4386f9291ceSJung-uk Kim         t2 = b[0];
43974664626SKris Kennaway         r[0] = (t1 - t2 - c) & BN_MASK2;
4406f9291ceSJung-uk Kim         if (t1 != t2)
4416f9291ceSJung-uk Kim             c = (t1 < t2);
4426f9291ceSJung-uk Kim         t1 = a[1];
4436f9291ceSJung-uk Kim         t2 = b[1];
44474664626SKris Kennaway         r[1] = (t1 - t2 - c) & BN_MASK2;
4456f9291ceSJung-uk Kim         if (t1 != t2)
4466f9291ceSJung-uk Kim             c = (t1 < t2);
4476f9291ceSJung-uk Kim         t1 = a[2];
4486f9291ceSJung-uk Kim         t2 = b[2];
44974664626SKris Kennaway         r[2] = (t1 - t2 - c) & BN_MASK2;
4506f9291ceSJung-uk Kim         if (t1 != t2)
4516f9291ceSJung-uk Kim             c = (t1 < t2);
4526f9291ceSJung-uk Kim         t1 = a[3];
4536f9291ceSJung-uk Kim         t2 = b[3];
45474664626SKris Kennaway         r[3] = (t1 - t2 - c) & BN_MASK2;
4556f9291ceSJung-uk Kim         if (t1 != t2)
4566f9291ceSJung-uk Kim             c = (t1 < t2);
4576f9291ceSJung-uk Kim         a += 4;
4586f9291ceSJung-uk Kim         b += 4;
4596f9291ceSJung-uk Kim         r += 4;
4606f9291ceSJung-uk Kim         n -= 4;
4611f13597dSJung-uk Kim     }
4621f13597dSJung-uk Kim #endif
4636f9291ceSJung-uk Kim     while (n) {
4646f9291ceSJung-uk Kim         t1 = a[0];
4656f9291ceSJung-uk Kim         t2 = b[0];
4661f13597dSJung-uk Kim         r[0] = (t1 - t2 - c) & BN_MASK2;
4676f9291ceSJung-uk Kim         if (t1 != t2)
4686f9291ceSJung-uk Kim             c = (t1 < t2);
4696f9291ceSJung-uk Kim         a++;
4706f9291ceSJung-uk Kim         b++;
4716f9291ceSJung-uk Kim         r++;
4726f9291ceSJung-uk Kim         n--;
47374664626SKris Kennaway     }
47474664626SKris Kennaway     return (c);
47574664626SKris Kennaway }
47674664626SKris Kennaway 
4771f13597dSJung-uk Kim #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
47874664626SKris Kennaway 
47974664626SKris Kennaway # undef bn_mul_comba8
48074664626SKris Kennaway # undef bn_mul_comba4
48174664626SKris Kennaway # undef bn_sqr_comba8
48274664626SKris Kennaway # undef bn_sqr_comba4
48374664626SKris Kennaway 
484f579bf8eSKris Kennaway /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
485f579bf8eSKris Kennaway /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
486f579bf8eSKris Kennaway /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
4876f9291ceSJung-uk Kim /*
4886f9291ceSJung-uk Kim  * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
4896f9291ceSJung-uk Kim  * c=(c2,c1,c0)
4906f9291ceSJung-uk Kim  */
491f579bf8eSKris Kennaway 
49274664626SKris Kennaway # ifdef BN_LLONG
4937bded2dbSJung-uk Kim /*
4947bded2dbSJung-uk Kim  * Keep in mind that additions to multiplication result can not
4957bded2dbSJung-uk Kim  * overflow, because its high half cannot be all-ones.
4967bded2dbSJung-uk Kim  */
4977bded2dbSJung-uk Kim #  define mul_add_c(a,b,c0,c1,c2)       do {    \
4987bded2dbSJung-uk Kim         BN_ULONG hi;                            \
4997bded2dbSJung-uk Kim         BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
5007bded2dbSJung-uk Kim         t += c0;                /* no carry */  \
5017bded2dbSJung-uk Kim         c0 = (BN_ULONG)Lw(t);                   \
5027bded2dbSJung-uk Kim         hi = (BN_ULONG)Hw(t);                   \
5037bded2dbSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
5047bded2dbSJung-uk Kim         } while(0)
50574664626SKris Kennaway 
5067bded2dbSJung-uk Kim #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
5077bded2dbSJung-uk Kim         BN_ULONG hi;                            \
5087bded2dbSJung-uk Kim         BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
5097bded2dbSJung-uk Kim         BN_ULLONG tt = t+c0;    /* no carry */  \
5107bded2dbSJung-uk Kim         c0 = (BN_ULONG)Lw(tt);                  \
5117bded2dbSJung-uk Kim         hi = (BN_ULONG)Hw(tt);                  \
5127bded2dbSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
5137bded2dbSJung-uk Kim         t += c0;                /* no carry */  \
5147bded2dbSJung-uk Kim         c0 = (BN_ULONG)Lw(t);                   \
5157bded2dbSJung-uk Kim         hi = (BN_ULONG)Hw(t);                   \
5167bded2dbSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
5177bded2dbSJung-uk Kim         } while(0)
51874664626SKris Kennaway 
5197bded2dbSJung-uk Kim #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
5207bded2dbSJung-uk Kim         BN_ULONG hi;                            \
5217bded2dbSJung-uk Kim         BN_ULLONG t = (BN_ULLONG)a[i]*a[i];     \
5227bded2dbSJung-uk Kim         t += c0;                /* no carry */  \
5237bded2dbSJung-uk Kim         c0 = (BN_ULONG)Lw(t);                   \
5247bded2dbSJung-uk Kim         hi = (BN_ULONG)Hw(t);                   \
5257bded2dbSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
5267bded2dbSJung-uk Kim         } while(0)
52774664626SKris Kennaway 
52874664626SKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2) \
52974664626SKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
530f579bf8eSKris Kennaway 
5313b4e3dcbSSimon L. B. Nielsen # elif defined(BN_UMULT_LOHI)
5327bded2dbSJung-uk Kim /*
5337bded2dbSJung-uk Kim  * Keep in mind that additions to hi can not overflow, because
5347bded2dbSJung-uk Kim  * the high word of a multiplication result cannot be all-ones.
5357bded2dbSJung-uk Kim  */
5367bded2dbSJung-uk Kim #  define mul_add_c(a,b,c0,c1,c2)       do {    \
5373b4e3dcbSSimon L. B. Nielsen         BN_ULONG ta = (a), tb = (b);            \
5387bded2dbSJung-uk Kim         BN_ULONG lo, hi;                        \
5397bded2dbSJung-uk Kim         BN_UMULT_LOHI(lo,hi,ta,tb);             \
5407bded2dbSJung-uk Kim         c0 += lo; hi += (c0<lo)?1:0;            \
5417bded2dbSJung-uk Kim         c1 += hi; c2 += (c1<hi)?1:0;            \
5427bded2dbSJung-uk Kim         } while(0)
5433b4e3dcbSSimon L. B. Nielsen 
5447bded2dbSJung-uk Kim #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
5457bded2dbSJung-uk Kim         BN_ULONG ta = (a), tb = (b);            \
5467bded2dbSJung-uk Kim         BN_ULONG lo, hi, tt;                    \
5477bded2dbSJung-uk Kim         BN_UMULT_LOHI(lo,hi,ta,tb);             \
5487bded2dbSJung-uk Kim         c0 += lo; tt = hi+((c0<lo)?1:0);        \
5497bded2dbSJung-uk Kim         c1 += tt; c2 += (c1<tt)?1:0;            \
5507bded2dbSJung-uk Kim         c0 += lo; hi += (c0<lo)?1:0;            \
5517bded2dbSJung-uk Kim         c1 += hi; c2 += (c1<hi)?1:0;            \
5527bded2dbSJung-uk Kim         } while(0)
5533b4e3dcbSSimon L. B. Nielsen 
5547bded2dbSJung-uk Kim #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
5553b4e3dcbSSimon L. B. Nielsen         BN_ULONG ta = (a)[i];                   \
5567bded2dbSJung-uk Kim         BN_ULONG lo, hi;                        \
5577bded2dbSJung-uk Kim         BN_UMULT_LOHI(lo,hi,ta,ta);             \
5587bded2dbSJung-uk Kim         c0 += lo; hi += (c0<lo)?1:0;            \
5597bded2dbSJung-uk Kim         c1 += hi; c2 += (c1<hi)?1:0;            \
5607bded2dbSJung-uk Kim         } while(0)
5613b4e3dcbSSimon L. B. Nielsen 
5623b4e3dcbSSimon L. B. Nielsen #  define sqr_add_c2(a,i,j,c0,c1,c2)    \
5633b4e3dcbSSimon L. B. Nielsen         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
5643b4e3dcbSSimon L. B. Nielsen 
565f579bf8eSKris Kennaway # elif defined(BN_UMULT_HIGH)
5667bded2dbSJung-uk Kim /*
5677bded2dbSJung-uk Kim  * Keep in mind that additions to hi can not overflow, because
5687bded2dbSJung-uk Kim  * the high word of a multiplication result cannot be all-ones.
5697bded2dbSJung-uk Kim  */
5707bded2dbSJung-uk Kim #  define mul_add_c(a,b,c0,c1,c2)       do {    \
571f579bf8eSKris Kennaway         BN_ULONG ta = (a), tb = (b);            \
5727bded2dbSJung-uk Kim         BN_ULONG lo = ta * tb;                  \
5737bded2dbSJung-uk Kim         BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
5747bded2dbSJung-uk Kim         c0 += lo; hi += (c0<lo)?1:0;            \
5757bded2dbSJung-uk Kim         c1 += hi; c2 += (c1<hi)?1:0;            \
5767bded2dbSJung-uk Kim         } while(0)
577f579bf8eSKris Kennaway 
5787bded2dbSJung-uk Kim #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
5797bded2dbSJung-uk Kim         BN_ULONG ta = (a), tb = (b), tt;        \
5807bded2dbSJung-uk Kim         BN_ULONG lo = ta * tb;                  \
5817bded2dbSJung-uk Kim         BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
5827bded2dbSJung-uk Kim         c0 += lo; tt = hi + ((c0<lo)?1:0);      \
5837bded2dbSJung-uk Kim         c1 += tt; c2 += (c1<tt)?1:0;            \
5847bded2dbSJung-uk Kim         c0 += lo; hi += (c0<lo)?1:0;            \
5857bded2dbSJung-uk Kim         c1 += hi; c2 += (c1<hi)?1:0;            \
5867bded2dbSJung-uk Kim         } while(0)
587f579bf8eSKris Kennaway 
5887bded2dbSJung-uk Kim #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
589f579bf8eSKris Kennaway         BN_ULONG ta = (a)[i];                   \
5907bded2dbSJung-uk Kim         BN_ULONG lo = ta * ta;                  \
5917bded2dbSJung-uk Kim         BN_ULONG hi = BN_UMULT_HIGH(ta,ta);     \
5927bded2dbSJung-uk Kim         c0 += lo; hi += (c0<lo)?1:0;            \
5937bded2dbSJung-uk Kim         c1 += hi; c2 += (c1<hi)?1:0;            \
5947bded2dbSJung-uk Kim         } while(0)
595f579bf8eSKris Kennaway 
596f579bf8eSKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2)      \
597f579bf8eSKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
598f579bf8eSKris Kennaway 
599f579bf8eSKris Kennaway # else                          /* !BN_LLONG */
6007bded2dbSJung-uk Kim /*
6017bded2dbSJung-uk Kim  * Keep in mind that additions to hi can not overflow, because
6027bded2dbSJung-uk Kim  * the high word of a multiplication result cannot be all-ones.
6037bded2dbSJung-uk Kim  */
6047bded2dbSJung-uk Kim #  define mul_add_c(a,b,c0,c1,c2)       do {    \
6057bded2dbSJung-uk Kim         BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
6067bded2dbSJung-uk Kim         BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
6077bded2dbSJung-uk Kim         mul64(lo,hi,bl,bh);                     \
6087bded2dbSJung-uk Kim         c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
6097bded2dbSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
6107bded2dbSJung-uk Kim         } while(0)
61174664626SKris Kennaway 
6127bded2dbSJung-uk Kim #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
6137bded2dbSJung-uk Kim         BN_ULONG tt;                            \
6147bded2dbSJung-uk Kim         BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
6157bded2dbSJung-uk Kim         BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
6167bded2dbSJung-uk Kim         mul64(lo,hi,bl,bh);                     \
6177bded2dbSJung-uk Kim         tt = hi;                                \
6187bded2dbSJung-uk Kim         c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
6197bded2dbSJung-uk Kim         c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
6207bded2dbSJung-uk Kim         c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
6217bded2dbSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
6227bded2dbSJung-uk Kim         } while(0)
62374664626SKris Kennaway 
6247bded2dbSJung-uk Kim #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
6257bded2dbSJung-uk Kim         BN_ULONG lo, hi;                        \
6267bded2dbSJung-uk Kim         sqr64(lo,hi,(a)[i]);                    \
6277bded2dbSJung-uk Kim         c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
6287bded2dbSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
6297bded2dbSJung-uk Kim         } while(0)
63074664626SKris Kennaway 
63174664626SKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2) \
63274664626SKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
633f579bf8eSKris Kennaway # endif                         /* !BN_LLONG */
63474664626SKris Kennaway 
63574664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
63674664626SKris Kennaway {
63774664626SKris Kennaway     BN_ULONG c1, c2, c3;
63874664626SKris Kennaway 
63974664626SKris Kennaway     c1 = 0;
64074664626SKris Kennaway     c2 = 0;
64174664626SKris Kennaway     c3 = 0;
64274664626SKris Kennaway     mul_add_c(a[0], b[0], c1, c2, c3);
64374664626SKris Kennaway     r[0] = c1;
64474664626SKris Kennaway     c1 = 0;
64574664626SKris Kennaway     mul_add_c(a[0], b[1], c2, c3, c1);
64674664626SKris Kennaway     mul_add_c(a[1], b[0], c2, c3, c1);
64774664626SKris Kennaway     r[1] = c2;
64874664626SKris Kennaway     c2 = 0;
64974664626SKris Kennaway     mul_add_c(a[2], b[0], c3, c1, c2);
65074664626SKris Kennaway     mul_add_c(a[1], b[1], c3, c1, c2);
65174664626SKris Kennaway     mul_add_c(a[0], b[2], c3, c1, c2);
65274664626SKris Kennaway     r[2] = c3;
65374664626SKris Kennaway     c3 = 0;
65474664626SKris Kennaway     mul_add_c(a[0], b[3], c1, c2, c3);
65574664626SKris Kennaway     mul_add_c(a[1], b[2], c1, c2, c3);
65674664626SKris Kennaway     mul_add_c(a[2], b[1], c1, c2, c3);
65774664626SKris Kennaway     mul_add_c(a[3], b[0], c1, c2, c3);
65874664626SKris Kennaway     r[3] = c1;
65974664626SKris Kennaway     c1 = 0;
66074664626SKris Kennaway     mul_add_c(a[4], b[0], c2, c3, c1);
66174664626SKris Kennaway     mul_add_c(a[3], b[1], c2, c3, c1);
66274664626SKris Kennaway     mul_add_c(a[2], b[2], c2, c3, c1);
66374664626SKris Kennaway     mul_add_c(a[1], b[3], c2, c3, c1);
66474664626SKris Kennaway     mul_add_c(a[0], b[4], c2, c3, c1);
66574664626SKris Kennaway     r[4] = c2;
66674664626SKris Kennaway     c2 = 0;
66774664626SKris Kennaway     mul_add_c(a[0], b[5], c3, c1, c2);
66874664626SKris Kennaway     mul_add_c(a[1], b[4], c3, c1, c2);
66974664626SKris Kennaway     mul_add_c(a[2], b[3], c3, c1, c2);
67074664626SKris Kennaway     mul_add_c(a[3], b[2], c3, c1, c2);
67174664626SKris Kennaway     mul_add_c(a[4], b[1], c3, c1, c2);
67274664626SKris Kennaway     mul_add_c(a[5], b[0], c3, c1, c2);
67374664626SKris Kennaway     r[5] = c3;
67474664626SKris Kennaway     c3 = 0;
67574664626SKris Kennaway     mul_add_c(a[6], b[0], c1, c2, c3);
67674664626SKris Kennaway     mul_add_c(a[5], b[1], c1, c2, c3);
67774664626SKris Kennaway     mul_add_c(a[4], b[2], c1, c2, c3);
67874664626SKris Kennaway     mul_add_c(a[3], b[3], c1, c2, c3);
67974664626SKris Kennaway     mul_add_c(a[2], b[4], c1, c2, c3);
68074664626SKris Kennaway     mul_add_c(a[1], b[5], c1, c2, c3);
68174664626SKris Kennaway     mul_add_c(a[0], b[6], c1, c2, c3);
68274664626SKris Kennaway     r[6] = c1;
68374664626SKris Kennaway     c1 = 0;
68474664626SKris Kennaway     mul_add_c(a[0], b[7], c2, c3, c1);
68574664626SKris Kennaway     mul_add_c(a[1], b[6], c2, c3, c1);
68674664626SKris Kennaway     mul_add_c(a[2], b[5], c2, c3, c1);
68774664626SKris Kennaway     mul_add_c(a[3], b[4], c2, c3, c1);
68874664626SKris Kennaway     mul_add_c(a[4], b[3], c2, c3, c1);
68974664626SKris Kennaway     mul_add_c(a[5], b[2], c2, c3, c1);
69074664626SKris Kennaway     mul_add_c(a[6], b[1], c2, c3, c1);
69174664626SKris Kennaway     mul_add_c(a[7], b[0], c2, c3, c1);
69274664626SKris Kennaway     r[7] = c2;
69374664626SKris Kennaway     c2 = 0;
69474664626SKris Kennaway     mul_add_c(a[7], b[1], c3, c1, c2);
69574664626SKris Kennaway     mul_add_c(a[6], b[2], c3, c1, c2);
69674664626SKris Kennaway     mul_add_c(a[5], b[3], c3, c1, c2);
69774664626SKris Kennaway     mul_add_c(a[4], b[4], c3, c1, c2);
69874664626SKris Kennaway     mul_add_c(a[3], b[5], c3, c1, c2);
69974664626SKris Kennaway     mul_add_c(a[2], b[6], c3, c1, c2);
70074664626SKris Kennaway     mul_add_c(a[1], b[7], c3, c1, c2);
70174664626SKris Kennaway     r[8] = c3;
70274664626SKris Kennaway     c3 = 0;
70374664626SKris Kennaway     mul_add_c(a[2], b[7], c1, c2, c3);
70474664626SKris Kennaway     mul_add_c(a[3], b[6], c1, c2, c3);
70574664626SKris Kennaway     mul_add_c(a[4], b[5], c1, c2, c3);
70674664626SKris Kennaway     mul_add_c(a[5], b[4], c1, c2, c3);
70774664626SKris Kennaway     mul_add_c(a[6], b[3], c1, c2, c3);
70874664626SKris Kennaway     mul_add_c(a[7], b[2], c1, c2, c3);
70974664626SKris Kennaway     r[9] = c1;
71074664626SKris Kennaway     c1 = 0;
71174664626SKris Kennaway     mul_add_c(a[7], b[3], c2, c3, c1);
71274664626SKris Kennaway     mul_add_c(a[6], b[4], c2, c3, c1);
71374664626SKris Kennaway     mul_add_c(a[5], b[5], c2, c3, c1);
71474664626SKris Kennaway     mul_add_c(a[4], b[6], c2, c3, c1);
71574664626SKris Kennaway     mul_add_c(a[3], b[7], c2, c3, c1);
71674664626SKris Kennaway     r[10] = c2;
71774664626SKris Kennaway     c2 = 0;
71874664626SKris Kennaway     mul_add_c(a[4], b[7], c3, c1, c2);
71974664626SKris Kennaway     mul_add_c(a[5], b[6], c3, c1, c2);
72074664626SKris Kennaway     mul_add_c(a[6], b[5], c3, c1, c2);
72174664626SKris Kennaway     mul_add_c(a[7], b[4], c3, c1, c2);
72274664626SKris Kennaway     r[11] = c3;
72374664626SKris Kennaway     c3 = 0;
72474664626SKris Kennaway     mul_add_c(a[7], b[5], c1, c2, c3);
72574664626SKris Kennaway     mul_add_c(a[6], b[6], c1, c2, c3);
72674664626SKris Kennaway     mul_add_c(a[5], b[7], c1, c2, c3);
72774664626SKris Kennaway     r[12] = c1;
72874664626SKris Kennaway     c1 = 0;
72974664626SKris Kennaway     mul_add_c(a[6], b[7], c2, c3, c1);
73074664626SKris Kennaway     mul_add_c(a[7], b[6], c2, c3, c1);
73174664626SKris Kennaway     r[13] = c2;
73274664626SKris Kennaway     c2 = 0;
73374664626SKris Kennaway     mul_add_c(a[7], b[7], c3, c1, c2);
73474664626SKris Kennaway     r[14] = c3;
73574664626SKris Kennaway     r[15] = c1;
73674664626SKris Kennaway }
73774664626SKris Kennaway 
73874664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
73974664626SKris Kennaway {
74074664626SKris Kennaway     BN_ULONG c1, c2, c3;
74174664626SKris Kennaway 
74274664626SKris Kennaway     c1 = 0;
74374664626SKris Kennaway     c2 = 0;
74474664626SKris Kennaway     c3 = 0;
74574664626SKris Kennaway     mul_add_c(a[0], b[0], c1, c2, c3);
74674664626SKris Kennaway     r[0] = c1;
74774664626SKris Kennaway     c1 = 0;
74874664626SKris Kennaway     mul_add_c(a[0], b[1], c2, c3, c1);
74974664626SKris Kennaway     mul_add_c(a[1], b[0], c2, c3, c1);
75074664626SKris Kennaway     r[1] = c2;
75174664626SKris Kennaway     c2 = 0;
75274664626SKris Kennaway     mul_add_c(a[2], b[0], c3, c1, c2);
75374664626SKris Kennaway     mul_add_c(a[1], b[1], c3, c1, c2);
75474664626SKris Kennaway     mul_add_c(a[0], b[2], c3, c1, c2);
75574664626SKris Kennaway     r[2] = c3;
75674664626SKris Kennaway     c3 = 0;
75774664626SKris Kennaway     mul_add_c(a[0], b[3], c1, c2, c3);
75874664626SKris Kennaway     mul_add_c(a[1], b[2], c1, c2, c3);
75974664626SKris Kennaway     mul_add_c(a[2], b[1], c1, c2, c3);
76074664626SKris Kennaway     mul_add_c(a[3], b[0], c1, c2, c3);
76174664626SKris Kennaway     r[3] = c1;
76274664626SKris Kennaway     c1 = 0;
76374664626SKris Kennaway     mul_add_c(a[3], b[1], c2, c3, c1);
76474664626SKris Kennaway     mul_add_c(a[2], b[2], c2, c3, c1);
76574664626SKris Kennaway     mul_add_c(a[1], b[3], c2, c3, c1);
76674664626SKris Kennaway     r[4] = c2;
76774664626SKris Kennaway     c2 = 0;
76874664626SKris Kennaway     mul_add_c(a[2], b[3], c3, c1, c2);
76974664626SKris Kennaway     mul_add_c(a[3], b[2], c3, c1, c2);
77074664626SKris Kennaway     r[5] = c3;
77174664626SKris Kennaway     c3 = 0;
77274664626SKris Kennaway     mul_add_c(a[3], b[3], c1, c2, c3);
77374664626SKris Kennaway     r[6] = c1;
77474664626SKris Kennaway     r[7] = c2;
77574664626SKris Kennaway }
77674664626SKris Kennaway 
7775c87c606SMark Murray void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
77874664626SKris Kennaway {
77974664626SKris Kennaway     BN_ULONG c1, c2, c3;
78074664626SKris Kennaway 
78174664626SKris Kennaway     c1 = 0;
78274664626SKris Kennaway     c2 = 0;
78374664626SKris Kennaway     c3 = 0;
78474664626SKris Kennaway     sqr_add_c(a, 0, c1, c2, c3);
78574664626SKris Kennaway     r[0] = c1;
78674664626SKris Kennaway     c1 = 0;
78774664626SKris Kennaway     sqr_add_c2(a, 1, 0, c2, c3, c1);
78874664626SKris Kennaway     r[1] = c2;
78974664626SKris Kennaway     c2 = 0;
79074664626SKris Kennaway     sqr_add_c(a, 1, c3, c1, c2);
79174664626SKris Kennaway     sqr_add_c2(a, 2, 0, c3, c1, c2);
79274664626SKris Kennaway     r[2] = c3;
79374664626SKris Kennaway     c3 = 0;
79474664626SKris Kennaway     sqr_add_c2(a, 3, 0, c1, c2, c3);
79574664626SKris Kennaway     sqr_add_c2(a, 2, 1, c1, c2, c3);
79674664626SKris Kennaway     r[3] = c1;
79774664626SKris Kennaway     c1 = 0;
79874664626SKris Kennaway     sqr_add_c(a, 2, c2, c3, c1);
79974664626SKris Kennaway     sqr_add_c2(a, 3, 1, c2, c3, c1);
80074664626SKris Kennaway     sqr_add_c2(a, 4, 0, c2, c3, c1);
80174664626SKris Kennaway     r[4] = c2;
80274664626SKris Kennaway     c2 = 0;
80374664626SKris Kennaway     sqr_add_c2(a, 5, 0, c3, c1, c2);
80474664626SKris Kennaway     sqr_add_c2(a, 4, 1, c3, c1, c2);
80574664626SKris Kennaway     sqr_add_c2(a, 3, 2, c3, c1, c2);
80674664626SKris Kennaway     r[5] = c3;
80774664626SKris Kennaway     c3 = 0;
80874664626SKris Kennaway     sqr_add_c(a, 3, c1, c2, c3);
80974664626SKris Kennaway     sqr_add_c2(a, 4, 2, c1, c2, c3);
81074664626SKris Kennaway     sqr_add_c2(a, 5, 1, c1, c2, c3);
81174664626SKris Kennaway     sqr_add_c2(a, 6, 0, c1, c2, c3);
81274664626SKris Kennaway     r[6] = c1;
81374664626SKris Kennaway     c1 = 0;
81474664626SKris Kennaway     sqr_add_c2(a, 7, 0, c2, c3, c1);
81574664626SKris Kennaway     sqr_add_c2(a, 6, 1, c2, c3, c1);
81674664626SKris Kennaway     sqr_add_c2(a, 5, 2, c2, c3, c1);
81774664626SKris Kennaway     sqr_add_c2(a, 4, 3, c2, c3, c1);
81874664626SKris Kennaway     r[7] = c2;
81974664626SKris Kennaway     c2 = 0;
82074664626SKris Kennaway     sqr_add_c(a, 4, c3, c1, c2);
82174664626SKris Kennaway     sqr_add_c2(a, 5, 3, c3, c1, c2);
82274664626SKris Kennaway     sqr_add_c2(a, 6, 2, c3, c1, c2);
82374664626SKris Kennaway     sqr_add_c2(a, 7, 1, c3, c1, c2);
82474664626SKris Kennaway     r[8] = c3;
82574664626SKris Kennaway     c3 = 0;
82674664626SKris Kennaway     sqr_add_c2(a, 7, 2, c1, c2, c3);
82774664626SKris Kennaway     sqr_add_c2(a, 6, 3, c1, c2, c3);
82874664626SKris Kennaway     sqr_add_c2(a, 5, 4, c1, c2, c3);
82974664626SKris Kennaway     r[9] = c1;
83074664626SKris Kennaway     c1 = 0;
83174664626SKris Kennaway     sqr_add_c(a, 5, c2, c3, c1);
83274664626SKris Kennaway     sqr_add_c2(a, 6, 4, c2, c3, c1);
83374664626SKris Kennaway     sqr_add_c2(a, 7, 3, c2, c3, c1);
83474664626SKris Kennaway     r[10] = c2;
83574664626SKris Kennaway     c2 = 0;
83674664626SKris Kennaway     sqr_add_c2(a, 7, 4, c3, c1, c2);
83774664626SKris Kennaway     sqr_add_c2(a, 6, 5, c3, c1, c2);
83874664626SKris Kennaway     r[11] = c3;
83974664626SKris Kennaway     c3 = 0;
84074664626SKris Kennaway     sqr_add_c(a, 6, c1, c2, c3);
84174664626SKris Kennaway     sqr_add_c2(a, 7, 5, c1, c2, c3);
84274664626SKris Kennaway     r[12] = c1;
84374664626SKris Kennaway     c1 = 0;
84474664626SKris Kennaway     sqr_add_c2(a, 7, 6, c2, c3, c1);
84574664626SKris Kennaway     r[13] = c2;
84674664626SKris Kennaway     c2 = 0;
84774664626SKris Kennaway     sqr_add_c(a, 7, c3, c1, c2);
84874664626SKris Kennaway     r[14] = c3;
84974664626SKris Kennaway     r[15] = c1;
85074664626SKris Kennaway }
85174664626SKris Kennaway 
8525c87c606SMark Murray void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
85374664626SKris Kennaway {
85474664626SKris Kennaway     BN_ULONG c1, c2, c3;
85574664626SKris Kennaway 
85674664626SKris Kennaway     c1 = 0;
85774664626SKris Kennaway     c2 = 0;
85874664626SKris Kennaway     c3 = 0;
85974664626SKris Kennaway     sqr_add_c(a, 0, c1, c2, c3);
86074664626SKris Kennaway     r[0] = c1;
86174664626SKris Kennaway     c1 = 0;
86274664626SKris Kennaway     sqr_add_c2(a, 1, 0, c2, c3, c1);
86374664626SKris Kennaway     r[1] = c2;
86474664626SKris Kennaway     c2 = 0;
86574664626SKris Kennaway     sqr_add_c(a, 1, c3, c1, c2);
86674664626SKris Kennaway     sqr_add_c2(a, 2, 0, c3, c1, c2);
86774664626SKris Kennaway     r[2] = c3;
86874664626SKris Kennaway     c3 = 0;
86974664626SKris Kennaway     sqr_add_c2(a, 3, 0, c1, c2, c3);
87074664626SKris Kennaway     sqr_add_c2(a, 2, 1, c1, c2, c3);
87174664626SKris Kennaway     r[3] = c1;
87274664626SKris Kennaway     c1 = 0;
87374664626SKris Kennaway     sqr_add_c(a, 2, c2, c3, c1);
87474664626SKris Kennaway     sqr_add_c2(a, 3, 1, c2, c3, c1);
87574664626SKris Kennaway     r[4] = c2;
87674664626SKris Kennaway     c2 = 0;
87774664626SKris Kennaway     sqr_add_c2(a, 3, 2, c3, c1, c2);
87874664626SKris Kennaway     r[5] = c3;
87974664626SKris Kennaway     c3 = 0;
88074664626SKris Kennaway     sqr_add_c(a, 3, c1, c2, c3);
88174664626SKris Kennaway     r[6] = c1;
88274664626SKris Kennaway     r[7] = c2;
88374664626SKris Kennaway }
8841f13597dSJung-uk Kim 
8851f13597dSJung-uk Kim # ifdef OPENSSL_NO_ASM
8861f13597dSJung-uk Kim #  ifdef OPENSSL_BN_ASM_MONT
8871f13597dSJung-uk Kim #   include <alloca.h>
8881f13597dSJung-uk Kim /*
8891f13597dSJung-uk Kim  * This is essentially reference implementation, which may or may not
8901f13597dSJung-uk Kim  * result in performance improvement. E.g. on IA-32 this routine was
8911f13597dSJung-uk Kim  * observed to give 40% faster rsa1024 private key operations and 10%
8921f13597dSJung-uk Kim  * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
8931f13597dSJung-uk Kim  * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
8941f13597dSJung-uk Kim  * reference implementation, one to be used as starting point for
8951f13597dSJung-uk Kim  * platform-specific assembler. Mentioned numbers apply to compiler
8961f13597dSJung-uk Kim  * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
8971f13597dSJung-uk Kim  * can vary not only from platform to platform, but even for compiler
8981f13597dSJung-uk Kim  * versions. Assembler vs. assembler improvement coefficients can
8991f13597dSJung-uk Kim  * [and are known to] differ and are to be documented elsewhere.
9001f13597dSJung-uk Kim  */
9016f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
9026f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0p, int num)
9031f13597dSJung-uk Kim {
9041f13597dSJung-uk Kim     BN_ULONG c0, c1, ml, *tp, n0;
9051f13597dSJung-uk Kim #   ifdef mul64
9061f13597dSJung-uk Kim     BN_ULONG mh;
9071f13597dSJung-uk Kim #   endif
9081f13597dSJung-uk Kim     volatile BN_ULONG *vp;
9091f13597dSJung-uk Kim     int i = 0, j;
9101f13597dSJung-uk Kim 
9116f9291ceSJung-uk Kim #   if 0                        /* template for platform-specific
9126f9291ceSJung-uk Kim                                  * implementation */
9136f9291ceSJung-uk Kim     if (ap == bp)
9146f9291ceSJung-uk Kim         return bn_sqr_mont(rp, ap, np, n0p, num);
9151f13597dSJung-uk Kim #   endif
9161f13597dSJung-uk Kim     vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
9171f13597dSJung-uk Kim 
9181f13597dSJung-uk Kim     n0 = *n0p;
9191f13597dSJung-uk Kim 
9201f13597dSJung-uk Kim     c0 = 0;
9211f13597dSJung-uk Kim     ml = bp[0];
9221f13597dSJung-uk Kim #   ifdef mul64
9231f13597dSJung-uk Kim     mh = HBITS(ml);
9241f13597dSJung-uk Kim     ml = LBITS(ml);
9251f13597dSJung-uk Kim     for (j = 0; j < num; ++j)
9261f13597dSJung-uk Kim         mul(tp[j], ap[j], ml, mh, c0);
9271f13597dSJung-uk Kim #   else
9281f13597dSJung-uk Kim     for (j = 0; j < num; ++j)
9291f13597dSJung-uk Kim         mul(tp[j], ap[j], ml, c0);
9301f13597dSJung-uk Kim #   endif
9311f13597dSJung-uk Kim 
9321f13597dSJung-uk Kim     tp[num] = c0;
9331f13597dSJung-uk Kim     tp[num + 1] = 0;
9341f13597dSJung-uk Kim     goto enter;
9351f13597dSJung-uk Kim 
9366f9291ceSJung-uk Kim     for (i = 0; i < num; i++) {
9371f13597dSJung-uk Kim         c0 = 0;
9381f13597dSJung-uk Kim         ml = bp[i];
9391f13597dSJung-uk Kim #   ifdef mul64
9401f13597dSJung-uk Kim         mh = HBITS(ml);
9411f13597dSJung-uk Kim         ml = LBITS(ml);
9421f13597dSJung-uk Kim         for (j = 0; j < num; ++j)
9431f13597dSJung-uk Kim             mul_add(tp[j], ap[j], ml, mh, c0);
9441f13597dSJung-uk Kim #   else
9451f13597dSJung-uk Kim         for (j = 0; j < num; ++j)
9461f13597dSJung-uk Kim             mul_add(tp[j], ap[j], ml, c0);
9471f13597dSJung-uk Kim #   endif
9481f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
9491f13597dSJung-uk Kim         tp[num] = c1;
9501f13597dSJung-uk Kim         tp[num + 1] = (c1 < c0 ? 1 : 0);
9511f13597dSJung-uk Kim  enter:
9521f13597dSJung-uk Kim         c1 = tp[0];
9531f13597dSJung-uk Kim         ml = (c1 * n0) & BN_MASK2;
9541f13597dSJung-uk Kim         c0 = 0;
9551f13597dSJung-uk Kim #   ifdef mul64
9561f13597dSJung-uk Kim         mh = HBITS(ml);
9571f13597dSJung-uk Kim         ml = LBITS(ml);
9581f13597dSJung-uk Kim         mul_add(c1, np[0], ml, mh, c0);
9591f13597dSJung-uk Kim #   else
9601f13597dSJung-uk Kim         mul_add(c1, ml, np[0], c0);
9611f13597dSJung-uk Kim #   endif
9626f9291ceSJung-uk Kim         for (j = 1; j < num; j++) {
9631f13597dSJung-uk Kim             c1 = tp[j];
9641f13597dSJung-uk Kim #   ifdef mul64
9651f13597dSJung-uk Kim             mul_add(c1, np[j], ml, mh, c0);
9661f13597dSJung-uk Kim #   else
9671f13597dSJung-uk Kim             mul_add(c1, ml, np[j], c0);
9681f13597dSJung-uk Kim #   endif
9691f13597dSJung-uk Kim             tp[j - 1] = c1 & BN_MASK2;
9701f13597dSJung-uk Kim         }
9711f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
9721f13597dSJung-uk Kim         tp[num - 1] = c1;
9731f13597dSJung-uk Kim         tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
9741f13597dSJung-uk Kim     }
9751f13597dSJung-uk Kim 
9766f9291ceSJung-uk Kim     if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
9771f13597dSJung-uk Kim         c0 = bn_sub_words(rp, tp, np, num);
9786f9291ceSJung-uk Kim         if (tp[num] != 0 || c0 == 0) {
9796f9291ceSJung-uk Kim             for (i = 0; i < num + 2; i++)
9806f9291ceSJung-uk Kim                 vp[i] = 0;
9811f13597dSJung-uk Kim             return 1;
9821f13597dSJung-uk Kim         }
9831f13597dSJung-uk Kim     }
9846f9291ceSJung-uk Kim     for (i = 0; i < num; i++)
9856f9291ceSJung-uk Kim         rp[i] = tp[i], vp[i] = 0;
9861f13597dSJung-uk Kim     vp[num] = 0;
9871f13597dSJung-uk Kim     vp[num + 1] = 0;
9881f13597dSJung-uk Kim     return 1;
9891f13597dSJung-uk Kim }
9901f13597dSJung-uk Kim #  else
9911f13597dSJung-uk Kim /*
9921f13597dSJung-uk Kim  * Return value of 0 indicates that multiplication/convolution was not
9931f13597dSJung-uk Kim  * performed to signal the caller to fall down to alternative/original
9941f13597dSJung-uk Kim  * code-path.
9951f13597dSJung-uk Kim  */
9966f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
9976f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0, int num)
9986f9291ceSJung-uk Kim {
9996f9291ceSJung-uk Kim     return 0;
10006f9291ceSJung-uk Kim }
10011f13597dSJung-uk Kim #  endif                        /* OPENSSL_BN_ASM_MONT */
10021f13597dSJung-uk Kim # endif
10031f13597dSJung-uk Kim 
1004f579bf8eSKris Kennaway #else                           /* !BN_MUL_COMBA */
100574664626SKris Kennaway 
100674664626SKris Kennaway /* hmm... is it faster just to do a multiply? */
100774664626SKris Kennaway # undef bn_sqr_comba4
10081f13597dSJung-uk Kim void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
100974664626SKris Kennaway {
101074664626SKris Kennaway     BN_ULONG t[8];
101174664626SKris Kennaway     bn_sqr_normal(r, a, 4, t);
101274664626SKris Kennaway }
101374664626SKris Kennaway 
101474664626SKris Kennaway # undef bn_sqr_comba8
10151f13597dSJung-uk Kim void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
101674664626SKris Kennaway {
101774664626SKris Kennaway     BN_ULONG t[16];
101874664626SKris Kennaway     bn_sqr_normal(r, a, 8, t);
101974664626SKris Kennaway }
102074664626SKris Kennaway 
102174664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
102274664626SKris Kennaway {
102374664626SKris Kennaway     r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
102474664626SKris Kennaway     r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
102574664626SKris Kennaway     r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]);
102674664626SKris Kennaway     r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
102774664626SKris Kennaway }
102874664626SKris Kennaway 
102974664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
103074664626SKris Kennaway {
103174664626SKris Kennaway     r[8] = bn_mul_words(&(r[0]), a, 8, b[0]);
103274664626SKris Kennaway     r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
103374664626SKris Kennaway     r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]);
103474664626SKris Kennaway     r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]);
103574664626SKris Kennaway     r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]);
103674664626SKris Kennaway     r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]);
103774664626SKris Kennaway     r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]);
103874664626SKris Kennaway     r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]);
103974664626SKris Kennaway }
104074664626SKris Kennaway 
10411f13597dSJung-uk Kim # ifdef OPENSSL_NO_ASM
10421f13597dSJung-uk Kim #  ifdef OPENSSL_BN_ASM_MONT
10431f13597dSJung-uk Kim #   include <alloca.h>
10446f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
10456f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0p, int num)
10461f13597dSJung-uk Kim {
10471f13597dSJung-uk Kim     BN_ULONG c0, c1, *tp, n0 = *n0p;
10481f13597dSJung-uk Kim     volatile BN_ULONG *vp;
10491f13597dSJung-uk Kim     int i = 0, j;
10501f13597dSJung-uk Kim 
10511f13597dSJung-uk Kim     vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
10521f13597dSJung-uk Kim 
10536f9291ceSJung-uk Kim     for (i = 0; i <= num; i++)
10546f9291ceSJung-uk Kim         tp[i] = 0;
10551f13597dSJung-uk Kim 
10566f9291ceSJung-uk Kim     for (i = 0; i < num; i++) {
10571f13597dSJung-uk Kim         c0 = bn_mul_add_words(tp, ap, num, bp[i]);
10581f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
10591f13597dSJung-uk Kim         tp[num] = c1;
10601f13597dSJung-uk Kim         tp[num + 1] = (c1 < c0 ? 1 : 0);
10611f13597dSJung-uk Kim 
10621f13597dSJung-uk Kim         c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
10631f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
10641f13597dSJung-uk Kim         tp[num] = c1;
10651f13597dSJung-uk Kim         tp[num + 1] += (c1 < c0 ? 1 : 0);
10666f9291ceSJung-uk Kim         for (j = 0; j <= num; j++)
10676f9291ceSJung-uk Kim             tp[j] = tp[j + 1];
10681f13597dSJung-uk Kim     }
10691f13597dSJung-uk Kim 
10706f9291ceSJung-uk Kim     if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
10711f13597dSJung-uk Kim         c0 = bn_sub_words(rp, tp, np, num);
10726f9291ceSJung-uk Kim         if (tp[num] != 0 || c0 == 0) {
10736f9291ceSJung-uk Kim             for (i = 0; i < num + 2; i++)
10746f9291ceSJung-uk Kim                 vp[i] = 0;
10751f13597dSJung-uk Kim             return 1;
10761f13597dSJung-uk Kim         }
10771f13597dSJung-uk Kim     }
10786f9291ceSJung-uk Kim     for (i = 0; i < num; i++)
10796f9291ceSJung-uk Kim         rp[i] = tp[i], vp[i] = 0;
10801f13597dSJung-uk Kim     vp[num] = 0;
10811f13597dSJung-uk Kim     vp[num + 1] = 0;
10821f13597dSJung-uk Kim     return 1;
10831f13597dSJung-uk Kim }
10841f13597dSJung-uk Kim #  else
10856f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
10866f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0, int num)
10876f9291ceSJung-uk Kim {
10886f9291ceSJung-uk Kim     return 0;
10896f9291ceSJung-uk Kim }
10901f13597dSJung-uk Kim #  endif                        /* OPENSSL_BN_ASM_MONT */
10911f13597dSJung-uk Kim # endif
10921f13597dSJung-uk Kim 
1093f579bf8eSKris Kennaway #endif                          /* !BN_MUL_COMBA */
1094