xref: /freebsd/crypto/openssl/crypto/bn/bn_asm.c (revision b077aed3)
1e71b7053SJung-uk Kim /*
28f1ef87aSJung-uk Kim  * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved.
374664626SKris Kennaway  *
4b077aed3SPierre Pronchery  * Licensed under the Apache License 2.0 (the "License").  You may not use
5e71b7053SJung-uk Kim  * this file except in compliance with the License.  You can obtain a copy
6e71b7053SJung-uk Kim  * in the file LICENSE in the source distribution or at
7e71b7053SJung-uk Kim  * https://www.openssl.org/source/license.html
874664626SKris Kennaway  */
974664626SKris Kennaway 
10f579bf8eSKris Kennaway #include <assert.h>
11e71b7053SJung-uk Kim #include <openssl/crypto.h>
12e71b7053SJung-uk Kim #include "internal/cryptlib.h"
1317f01e99SJung-uk Kim #include "bn_local.h"
1474664626SKris Kennaway 
15f579bf8eSKris Kennaway #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
1674664626SKris Kennaway 
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)176f9291ceSJung-uk Kim BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
186f9291ceSJung-uk Kim                           BN_ULONG w)
1974664626SKris Kennaway {
2074664626SKris Kennaway     BN_ULONG c1 = 0;
2174664626SKris Kennaway 
22f579bf8eSKris Kennaway     assert(num >= 0);
236f9291ceSJung-uk Kim     if (num <= 0)
24e71b7053SJung-uk Kim         return c1;
2574664626SKris Kennaway 
261f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
276f9291ceSJung-uk Kim     while (num & ~3) {
2874664626SKris Kennaway         mul_add(rp[0], ap[0], w, c1);
2974664626SKris Kennaway         mul_add(rp[1], ap[1], w, c1);
3074664626SKris Kennaway         mul_add(rp[2], ap[2], w, c1);
3174664626SKris Kennaway         mul_add(rp[3], ap[3], w, c1);
326f9291ceSJung-uk Kim         ap += 4;
336f9291ceSJung-uk Kim         rp += 4;
346f9291ceSJung-uk Kim         num -= 4;
35f579bf8eSKris Kennaway     }
361f13597dSJung-uk Kim # endif
376f9291ceSJung-uk Kim     while (num) {
381f13597dSJung-uk Kim         mul_add(rp[0], ap[0], w, c1);
396f9291ceSJung-uk Kim         ap++;
406f9291ceSJung-uk Kim         rp++;
416f9291ceSJung-uk Kim         num--;
4274664626SKris Kennaway     }
4374664626SKris Kennaway 
44e71b7053SJung-uk Kim     return c1;
4574664626SKris Kennaway }
4674664626SKris Kennaway 
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)475c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
4874664626SKris Kennaway {
4974664626SKris Kennaway     BN_ULONG c1 = 0;
5074664626SKris Kennaway 
51f579bf8eSKris Kennaway     assert(num >= 0);
526f9291ceSJung-uk Kim     if (num <= 0)
53e71b7053SJung-uk Kim         return c1;
5474664626SKris Kennaway 
551f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
566f9291ceSJung-uk Kim     while (num & ~3) {
5774664626SKris Kennaway         mul(rp[0], ap[0], w, c1);
5874664626SKris Kennaway         mul(rp[1], ap[1], w, c1);
5974664626SKris Kennaway         mul(rp[2], ap[2], w, c1);
6074664626SKris Kennaway         mul(rp[3], ap[3], w, c1);
616f9291ceSJung-uk Kim         ap += 4;
626f9291ceSJung-uk Kim         rp += 4;
636f9291ceSJung-uk Kim         num -= 4;
64f579bf8eSKris Kennaway     }
651f13597dSJung-uk Kim # endif
666f9291ceSJung-uk Kim     while (num) {
671f13597dSJung-uk Kim         mul(rp[0], ap[0], w, c1);
686f9291ceSJung-uk Kim         ap++;
696f9291ceSJung-uk Kim         rp++;
706f9291ceSJung-uk Kim         num--;
7174664626SKris Kennaway     }
72e71b7053SJung-uk Kim     return c1;
7374664626SKris Kennaway }
7474664626SKris Kennaway 
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,int n)755c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
7674664626SKris Kennaway {
77f579bf8eSKris Kennaway     assert(n >= 0);
786f9291ceSJung-uk Kim     if (n <= 0)
796f9291ceSJung-uk Kim         return;
801f13597dSJung-uk Kim 
811f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
826f9291ceSJung-uk Kim     while (n & ~3) {
83f579bf8eSKris Kennaway         sqr(r[0], r[1], a[0]);
84f579bf8eSKris Kennaway         sqr(r[2], r[3], a[1]);
85f579bf8eSKris Kennaway         sqr(r[4], r[5], a[2]);
86f579bf8eSKris Kennaway         sqr(r[6], r[7], a[3]);
876f9291ceSJung-uk Kim         a += 4;
886f9291ceSJung-uk Kim         r += 8;
896f9291ceSJung-uk Kim         n -= 4;
90f579bf8eSKris Kennaway     }
911f13597dSJung-uk Kim # endif
926f9291ceSJung-uk Kim     while (n) {
931f13597dSJung-uk Kim         sqr(r[0], r[1], a[0]);
946f9291ceSJung-uk Kim         a++;
956f9291ceSJung-uk Kim         r += 2;
966f9291ceSJung-uk Kim         n--;
9774664626SKris Kennaway     }
9874664626SKris Kennaway }
9974664626SKris Kennaway 
1006f9291ceSJung-uk Kim #else                           /* !(defined(BN_LLONG) ||
1016f9291ceSJung-uk Kim                                  * defined(BN_UMULT_HIGH)) */
10274664626SKris Kennaway 
bn_mul_add_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)1036f9291ceSJung-uk Kim BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
1046f9291ceSJung-uk Kim                           BN_ULONG w)
10574664626SKris Kennaway {
10674664626SKris Kennaway     BN_ULONG c = 0;
10774664626SKris Kennaway     BN_ULONG bl, bh;
10874664626SKris Kennaway 
109f579bf8eSKris Kennaway     assert(num >= 0);
1106f9291ceSJung-uk Kim     if (num <= 0)
111e71b7053SJung-uk Kim         return (BN_ULONG)0;
11274664626SKris Kennaway 
11374664626SKris Kennaway     bl = LBITS(w);
11474664626SKris Kennaway     bh = HBITS(w);
11574664626SKris Kennaway 
1161f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1176f9291ceSJung-uk Kim     while (num & ~3) {
11874664626SKris Kennaway         mul_add(rp[0], ap[0], bl, bh, c);
11974664626SKris Kennaway         mul_add(rp[1], ap[1], bl, bh, c);
12074664626SKris Kennaway         mul_add(rp[2], ap[2], bl, bh, c);
12174664626SKris Kennaway         mul_add(rp[3], ap[3], bl, bh, c);
1226f9291ceSJung-uk Kim         ap += 4;
1236f9291ceSJung-uk Kim         rp += 4;
1246f9291ceSJung-uk Kim         num -= 4;
1251f13597dSJung-uk Kim     }
1261f13597dSJung-uk Kim # endif
1276f9291ceSJung-uk Kim     while (num) {
1281f13597dSJung-uk Kim         mul_add(rp[0], ap[0], bl, bh, c);
1296f9291ceSJung-uk Kim         ap++;
1306f9291ceSJung-uk Kim         rp++;
1316f9291ceSJung-uk Kim         num--;
13274664626SKris Kennaway     }
133e71b7053SJung-uk Kim     return c;
13474664626SKris Kennaway }
13574664626SKris Kennaway 
bn_mul_words(BN_ULONG * rp,const BN_ULONG * ap,int num,BN_ULONG w)1365c87c606SMark Murray BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
13774664626SKris Kennaway {
13874664626SKris Kennaway     BN_ULONG carry = 0;
13974664626SKris Kennaway     BN_ULONG bl, bh;
14074664626SKris Kennaway 
141f579bf8eSKris Kennaway     assert(num >= 0);
1426f9291ceSJung-uk Kim     if (num <= 0)
143e71b7053SJung-uk Kim         return (BN_ULONG)0;
14474664626SKris Kennaway 
14574664626SKris Kennaway     bl = LBITS(w);
14674664626SKris Kennaway     bh = HBITS(w);
14774664626SKris Kennaway 
1481f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1496f9291ceSJung-uk Kim     while (num & ~3) {
15074664626SKris Kennaway         mul(rp[0], ap[0], bl, bh, carry);
15174664626SKris Kennaway         mul(rp[1], ap[1], bl, bh, carry);
15274664626SKris Kennaway         mul(rp[2], ap[2], bl, bh, carry);
15374664626SKris Kennaway         mul(rp[3], ap[3], bl, bh, carry);
1546f9291ceSJung-uk Kim         ap += 4;
1556f9291ceSJung-uk Kim         rp += 4;
1566f9291ceSJung-uk Kim         num -= 4;
1571f13597dSJung-uk Kim     }
1581f13597dSJung-uk Kim # endif
1596f9291ceSJung-uk Kim     while (num) {
1601f13597dSJung-uk Kim         mul(rp[0], ap[0], bl, bh, carry);
1616f9291ceSJung-uk Kim         ap++;
1626f9291ceSJung-uk Kim         rp++;
1636f9291ceSJung-uk Kim         num--;
16474664626SKris Kennaway     }
165e71b7053SJung-uk Kim     return carry;
16674664626SKris Kennaway }
16774664626SKris Kennaway 
bn_sqr_words(BN_ULONG * r,const BN_ULONG * a,int n)1685c87c606SMark Murray void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
16974664626SKris Kennaway {
170f579bf8eSKris Kennaway     assert(n >= 0);
1716f9291ceSJung-uk Kim     if (n <= 0)
1726f9291ceSJung-uk Kim         return;
1731f13597dSJung-uk Kim 
1741f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
1756f9291ceSJung-uk Kim     while (n & ~3) {
17674664626SKris Kennaway         sqr64(r[0], r[1], a[0]);
17774664626SKris Kennaway         sqr64(r[2], r[3], a[1]);
17874664626SKris Kennaway         sqr64(r[4], r[5], a[2]);
17974664626SKris Kennaway         sqr64(r[6], r[7], a[3]);
1806f9291ceSJung-uk Kim         a += 4;
1816f9291ceSJung-uk Kim         r += 8;
1826f9291ceSJung-uk Kim         n -= 4;
1831f13597dSJung-uk Kim     }
1841f13597dSJung-uk Kim # endif
1856f9291ceSJung-uk Kim     while (n) {
1861f13597dSJung-uk Kim         sqr64(r[0], r[1], a[0]);
1876f9291ceSJung-uk Kim         a++;
1886f9291ceSJung-uk Kim         r += 2;
1896f9291ceSJung-uk Kim         n--;
19074664626SKris Kennaway     }
19174664626SKris Kennaway }
19274664626SKris Kennaway 
1936f9291ceSJung-uk Kim #endif                          /* !(defined(BN_LLONG) ||
1946f9291ceSJung-uk Kim                                  * defined(BN_UMULT_HIGH)) */
19574664626SKris Kennaway 
19674664626SKris Kennaway #if defined(BN_LLONG) && defined(BN_DIV2W)
19774664626SKris Kennaway 
bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d)19874664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
19974664626SKris Kennaway {
20074664626SKris Kennaway     return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d));
20174664626SKris Kennaway }
20274664626SKris Kennaway 
20374664626SKris Kennaway #else
20474664626SKris Kennaway 
205ddd58736SKris Kennaway /* Divide h,l by d and return the result. */
20674664626SKris Kennaway /* I need to test this some more :-( */
bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d)20774664626SKris Kennaway BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
20874664626SKris Kennaway {
20974664626SKris Kennaway     BN_ULONG dh, dl, q, ret = 0, th, tl, t;
21074664626SKris Kennaway     int i, count = 2;
21174664626SKris Kennaway 
2126f9291ceSJung-uk Kim     if (d == 0)
213e71b7053SJung-uk Kim         return BN_MASK2;
21474664626SKris Kennaway 
21574664626SKris Kennaway     i = BN_num_bits_word(d);
2163b4e3dcbSSimon L. B. Nielsen     assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
217ddd58736SKris Kennaway 
21874664626SKris Kennaway     i = BN_BITS2 - i;
2196f9291ceSJung-uk Kim     if (h >= d)
2206f9291ceSJung-uk Kim         h -= d;
22174664626SKris Kennaway 
2226f9291ceSJung-uk Kim     if (i) {
22374664626SKris Kennaway         d <<= i;
22474664626SKris Kennaway         h = (h << i) | (l >> (BN_BITS2 - i));
22574664626SKris Kennaway         l <<= i;
22674664626SKris Kennaway     }
22774664626SKris Kennaway     dh = (d & BN_MASK2h) >> BN_BITS4;
22874664626SKris Kennaway     dl = (d & BN_MASK2l);
2296f9291ceSJung-uk Kim     for (;;) {
23074664626SKris Kennaway         if ((h >> BN_BITS4) == dh)
23174664626SKris Kennaway             q = BN_MASK2l;
23274664626SKris Kennaway         else
23374664626SKris Kennaway             q = h / dh;
23474664626SKris Kennaway 
23574664626SKris Kennaway         th = q * dh;
23674664626SKris Kennaway         tl = dl * q;
2376f9291ceSJung-uk Kim         for (;;) {
23874664626SKris Kennaway             t = h - th;
23974664626SKris Kennaway             if ((t & BN_MASK2h) ||
2406f9291ceSJung-uk Kim                 ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4))))
24174664626SKris Kennaway                 break;
24274664626SKris Kennaway             q--;
24374664626SKris Kennaway             th -= dh;
24474664626SKris Kennaway             tl -= dl;
24574664626SKris Kennaway         }
24674664626SKris Kennaway         t = (tl >> BN_BITS4);
24774664626SKris Kennaway         tl = (tl << BN_BITS4) & BN_MASK2h;
24874664626SKris Kennaway         th += t;
24974664626SKris Kennaway 
2506f9291ceSJung-uk Kim         if (l < tl)
2516f9291ceSJung-uk Kim             th++;
25274664626SKris Kennaway         l -= tl;
2536f9291ceSJung-uk Kim         if (h < th) {
25474664626SKris Kennaway             h += d;
25574664626SKris Kennaway             q--;
25674664626SKris Kennaway         }
25774664626SKris Kennaway         h -= th;
25874664626SKris Kennaway 
2596f9291ceSJung-uk Kim         if (--count == 0)
2606f9291ceSJung-uk Kim             break;
26174664626SKris Kennaway 
26274664626SKris Kennaway         ret = q << BN_BITS4;
26374664626SKris Kennaway         h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
26474664626SKris Kennaway         l = (l & BN_MASK2l) << BN_BITS4;
26574664626SKris Kennaway     }
26674664626SKris Kennaway     ret |= q;
267e71b7053SJung-uk Kim     return ret;
26874664626SKris Kennaway }
269f579bf8eSKris Kennaway #endif                          /* !defined(BN_LLONG) && defined(BN_DIV2W) */
27074664626SKris Kennaway 
27174664626SKris Kennaway #ifdef BN_LLONG
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)2726f9291ceSJung-uk Kim BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
2736f9291ceSJung-uk Kim                       int n)
27474664626SKris Kennaway {
27574664626SKris Kennaway     BN_ULLONG ll = 0;
27674664626SKris Kennaway 
277f579bf8eSKris Kennaway     assert(n >= 0);
2786f9291ceSJung-uk Kim     if (n <= 0)
279e71b7053SJung-uk Kim         return (BN_ULONG)0;
28074664626SKris Kennaway 
2811f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
2826f9291ceSJung-uk Kim     while (n & ~3) {
28374664626SKris Kennaway         ll += (BN_ULLONG) a[0] + b[0];
28474664626SKris Kennaway         r[0] = (BN_ULONG)ll & BN_MASK2;
28574664626SKris Kennaway         ll >>= BN_BITS2;
28674664626SKris Kennaway         ll += (BN_ULLONG) a[1] + b[1];
28774664626SKris Kennaway         r[1] = (BN_ULONG)ll & BN_MASK2;
28874664626SKris Kennaway         ll >>= BN_BITS2;
28974664626SKris Kennaway         ll += (BN_ULLONG) a[2] + b[2];
29074664626SKris Kennaway         r[2] = (BN_ULONG)ll & BN_MASK2;
29174664626SKris Kennaway         ll >>= BN_BITS2;
29274664626SKris Kennaway         ll += (BN_ULLONG) a[3] + b[3];
29374664626SKris Kennaway         r[3] = (BN_ULONG)ll & BN_MASK2;
29474664626SKris Kennaway         ll >>= BN_BITS2;
2956f9291ceSJung-uk Kim         a += 4;
2966f9291ceSJung-uk Kim         b += 4;
2976f9291ceSJung-uk Kim         r += 4;
2986f9291ceSJung-uk Kim         n -= 4;
2991f13597dSJung-uk Kim     }
3001f13597dSJung-uk Kim # endif
3016f9291ceSJung-uk Kim     while (n) {
3021f13597dSJung-uk Kim         ll += (BN_ULLONG) a[0] + b[0];
3031f13597dSJung-uk Kim         r[0] = (BN_ULONG)ll & BN_MASK2;
3041f13597dSJung-uk Kim         ll >>= BN_BITS2;
3056f9291ceSJung-uk Kim         a++;
3066f9291ceSJung-uk Kim         b++;
3076f9291ceSJung-uk Kim         r++;
3086f9291ceSJung-uk Kim         n--;
30974664626SKris Kennaway     }
310e71b7053SJung-uk Kim     return (BN_ULONG)ll;
31174664626SKris Kennaway }
312f579bf8eSKris Kennaway #else                           /* !BN_LLONG */
bn_add_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)3136f9291ceSJung-uk Kim BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
3146f9291ceSJung-uk Kim                       int n)
31574664626SKris Kennaway {
31674664626SKris Kennaway     BN_ULONG c, l, t;
31774664626SKris Kennaway 
318f579bf8eSKris Kennaway     assert(n >= 0);
3196f9291ceSJung-uk Kim     if (n <= 0)
320e71b7053SJung-uk Kim         return (BN_ULONG)0;
32174664626SKris Kennaway 
32274664626SKris Kennaway     c = 0;
3231f13597dSJung-uk Kim # ifndef OPENSSL_SMALL_FOOTPRINT
3246f9291ceSJung-uk Kim     while (n & ~3) {
32574664626SKris Kennaway         t = a[0];
32674664626SKris Kennaway         t = (t + c) & BN_MASK2;
32774664626SKris Kennaway         c = (t < c);
32874664626SKris Kennaway         l = (t + b[0]) & BN_MASK2;
32974664626SKris Kennaway         c += (l < t);
33074664626SKris Kennaway         r[0] = l;
33174664626SKris Kennaway         t = a[1];
33274664626SKris Kennaway         t = (t + c) & BN_MASK2;
33374664626SKris Kennaway         c = (t < c);
33474664626SKris Kennaway         l = (t + b[1]) & BN_MASK2;
33574664626SKris Kennaway         c += (l < t);
33674664626SKris Kennaway         r[1] = l;
33774664626SKris Kennaway         t = a[2];
33874664626SKris Kennaway         t = (t + c) & BN_MASK2;
33974664626SKris Kennaway         c = (t < c);
34074664626SKris Kennaway         l = (t + b[2]) & BN_MASK2;
34174664626SKris Kennaway         c += (l < t);
34274664626SKris Kennaway         r[2] = l;
34374664626SKris Kennaway         t = a[3];
34474664626SKris Kennaway         t = (t + c) & BN_MASK2;
34574664626SKris Kennaway         c = (t < c);
34674664626SKris Kennaway         l = (t + b[3]) & BN_MASK2;
34774664626SKris Kennaway         c += (l < t);
34874664626SKris Kennaway         r[3] = l;
3496f9291ceSJung-uk Kim         a += 4;
3506f9291ceSJung-uk Kim         b += 4;
3516f9291ceSJung-uk Kim         r += 4;
3526f9291ceSJung-uk Kim         n -= 4;
3531f13597dSJung-uk Kim     }
3541f13597dSJung-uk Kim # endif
3556f9291ceSJung-uk Kim     while (n) {
3561f13597dSJung-uk Kim         t = a[0];
3571f13597dSJung-uk Kim         t = (t + c) & BN_MASK2;
3581f13597dSJung-uk Kim         c = (t < c);
3591f13597dSJung-uk Kim         l = (t + b[0]) & BN_MASK2;
3601f13597dSJung-uk Kim         c += (l < t);
3611f13597dSJung-uk Kim         r[0] = l;
3626f9291ceSJung-uk Kim         a++;
3636f9291ceSJung-uk Kim         b++;
3646f9291ceSJung-uk Kim         r++;
3656f9291ceSJung-uk Kim         n--;
36674664626SKris Kennaway     }
367e71b7053SJung-uk Kim     return (BN_ULONG)c;
36874664626SKris Kennaway }
369f579bf8eSKris Kennaway #endif                          /* !BN_LLONG */
37074664626SKris Kennaway 
bn_sub_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int n)3716f9291ceSJung-uk Kim BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
3726f9291ceSJung-uk Kim                       int n)
37374664626SKris Kennaway {
37474664626SKris Kennaway     BN_ULONG t1, t2;
37574664626SKris Kennaway     int c = 0;
37674664626SKris Kennaway 
377f579bf8eSKris Kennaway     assert(n >= 0);
3786f9291ceSJung-uk Kim     if (n <= 0)
379e71b7053SJung-uk Kim         return (BN_ULONG)0;
38074664626SKris Kennaway 
3811f13597dSJung-uk Kim #ifndef OPENSSL_SMALL_FOOTPRINT
3826f9291ceSJung-uk Kim     while (n & ~3) {
3836f9291ceSJung-uk Kim         t1 = a[0];
3848f1ef87aSJung-uk Kim         t2 = (t1 - c) & BN_MASK2;
3858f1ef87aSJung-uk Kim         c  = (t2 > t1);
3868f1ef87aSJung-uk Kim         t1 = b[0];
3878f1ef87aSJung-uk Kim         t1 = (t2 - t1) & BN_MASK2;
3888f1ef87aSJung-uk Kim         r[0] = t1;
3898f1ef87aSJung-uk Kim         c += (t1 > t2);
3906f9291ceSJung-uk Kim         t1 = a[1];
3918f1ef87aSJung-uk Kim         t2 = (t1 - c) & BN_MASK2;
3928f1ef87aSJung-uk Kim         c  = (t2 > t1);
3938f1ef87aSJung-uk Kim         t1 = b[1];
3948f1ef87aSJung-uk Kim         t1 = (t2 - t1) & BN_MASK2;
3958f1ef87aSJung-uk Kim         r[1] = t1;
3968f1ef87aSJung-uk Kim         c += (t1 > t2);
3976f9291ceSJung-uk Kim         t1 = a[2];
3988f1ef87aSJung-uk Kim         t2 = (t1 - c) & BN_MASK2;
3998f1ef87aSJung-uk Kim         c  = (t2 > t1);
4008f1ef87aSJung-uk Kim         t1 = b[2];
4018f1ef87aSJung-uk Kim         t1 = (t2 - t1) & BN_MASK2;
4028f1ef87aSJung-uk Kim         r[2] = t1;
4038f1ef87aSJung-uk Kim         c += (t1 > t2);
4046f9291ceSJung-uk Kim         t1 = a[3];
4058f1ef87aSJung-uk Kim         t2 = (t1 - c) & BN_MASK2;
4068f1ef87aSJung-uk Kim         c  = (t2 > t1);
4078f1ef87aSJung-uk Kim         t1 = b[3];
4088f1ef87aSJung-uk Kim         t1 = (t2 - t1) & BN_MASK2;
4098f1ef87aSJung-uk Kim         r[3] = t1;
4108f1ef87aSJung-uk Kim         c += (t1 > t2);
4116f9291ceSJung-uk Kim         a += 4;
4126f9291ceSJung-uk Kim         b += 4;
4136f9291ceSJung-uk Kim         r += 4;
4146f9291ceSJung-uk Kim         n -= 4;
4151f13597dSJung-uk Kim     }
4161f13597dSJung-uk Kim #endif
4176f9291ceSJung-uk Kim     while (n) {
4186f9291ceSJung-uk Kim         t1 = a[0];
4198f1ef87aSJung-uk Kim         t2 = (t1 - c) & BN_MASK2;
4208f1ef87aSJung-uk Kim         c  = (t2 > t1);
4218f1ef87aSJung-uk Kim         t1 = b[0];
4228f1ef87aSJung-uk Kim         t1 = (t2 - t1) & BN_MASK2;
4238f1ef87aSJung-uk Kim         r[0] = t1;
4248f1ef87aSJung-uk Kim         c += (t1 > t2);
4256f9291ceSJung-uk Kim         a++;
4266f9291ceSJung-uk Kim         b++;
4276f9291ceSJung-uk Kim         r++;
4286f9291ceSJung-uk Kim         n--;
42974664626SKris Kennaway     }
430e71b7053SJung-uk Kim     return c;
43174664626SKris Kennaway }
43274664626SKris Kennaway 
4331f13597dSJung-uk Kim #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
43474664626SKris Kennaway 
43574664626SKris Kennaway # undef bn_mul_comba8
43674664626SKris Kennaway # undef bn_mul_comba4
43774664626SKris Kennaway # undef bn_sqr_comba8
43874664626SKris Kennaway # undef bn_sqr_comba4
43974664626SKris Kennaway 
440f579bf8eSKris Kennaway /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
441f579bf8eSKris Kennaway /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
442f579bf8eSKris Kennaway /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
4436f9291ceSJung-uk Kim /*
4446f9291ceSJung-uk Kim  * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
4456f9291ceSJung-uk Kim  * c=(c2,c1,c0)
4466f9291ceSJung-uk Kim  */
447f579bf8eSKris Kennaway 
44874664626SKris Kennaway # ifdef BN_LLONG
4497bded2dbSJung-uk Kim /*
4507bded2dbSJung-uk Kim  * Keep in mind that additions to multiplication result can not
4517bded2dbSJung-uk Kim  * overflow, because its high half cannot be all-ones.
4527bded2dbSJung-uk Kim  */
4537bded2dbSJung-uk Kim #  define mul_add_c(a,b,c0,c1,c2)       do {    \
4547bded2dbSJung-uk Kim         BN_ULONG hi;                            \
4557bded2dbSJung-uk Kim         BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
4567bded2dbSJung-uk Kim         t += c0;                /* no carry */  \
4577bded2dbSJung-uk Kim         c0 = (BN_ULONG)Lw(t);                   \
4587bded2dbSJung-uk Kim         hi = (BN_ULONG)Hw(t);                   \
4598f1ef87aSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
4607bded2dbSJung-uk Kim         } while(0)
46174664626SKris Kennaway 
4627bded2dbSJung-uk Kim #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
4637bded2dbSJung-uk Kim         BN_ULONG hi;                            \
4647bded2dbSJung-uk Kim         BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
4657bded2dbSJung-uk Kim         BN_ULLONG tt = t+c0;    /* no carry */  \
4667bded2dbSJung-uk Kim         c0 = (BN_ULONG)Lw(tt);                  \
4677bded2dbSJung-uk Kim         hi = (BN_ULONG)Hw(tt);                  \
4688f1ef87aSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
4697bded2dbSJung-uk Kim         t += c0;                /* no carry */  \
4707bded2dbSJung-uk Kim         c0 = (BN_ULONG)Lw(t);                   \
4717bded2dbSJung-uk Kim         hi = (BN_ULONG)Hw(t);                   \
4728f1ef87aSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
4737bded2dbSJung-uk Kim         } while(0)
47474664626SKris Kennaway 
4757bded2dbSJung-uk Kim #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
4767bded2dbSJung-uk Kim         BN_ULONG hi;                            \
4777bded2dbSJung-uk Kim         BN_ULLONG t = (BN_ULLONG)a[i]*a[i];     \
4787bded2dbSJung-uk Kim         t += c0;                /* no carry */  \
4797bded2dbSJung-uk Kim         c0 = (BN_ULONG)Lw(t);                   \
4807bded2dbSJung-uk Kim         hi = (BN_ULONG)Hw(t);                   \
4818f1ef87aSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
4827bded2dbSJung-uk Kim         } while(0)
48374664626SKris Kennaway 
48474664626SKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2) \
48574664626SKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
486f579bf8eSKris Kennaway 
4873b4e3dcbSSimon L. B. Nielsen # elif defined(BN_UMULT_LOHI)
4887bded2dbSJung-uk Kim /*
4897bded2dbSJung-uk Kim  * Keep in mind that additions to hi can not overflow, because
4907bded2dbSJung-uk Kim  * the high word of a multiplication result cannot be all-ones.
4917bded2dbSJung-uk Kim  */
4927bded2dbSJung-uk Kim #  define mul_add_c(a,b,c0,c1,c2)       do {    \
4933b4e3dcbSSimon L. B. Nielsen         BN_ULONG ta = (a), tb = (b);            \
4947bded2dbSJung-uk Kim         BN_ULONG lo, hi;                        \
4957bded2dbSJung-uk Kim         BN_UMULT_LOHI(lo,hi,ta,tb);             \
4968f1ef87aSJung-uk Kim         c0 += lo; hi += (c0<lo);                \
4978f1ef87aSJung-uk Kim         c1 += hi; c2 += (c1<hi);                \
4987bded2dbSJung-uk Kim         } while(0)
4993b4e3dcbSSimon L. B. Nielsen 
5007bded2dbSJung-uk Kim #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
5017bded2dbSJung-uk Kim         BN_ULONG ta = (a), tb = (b);            \
5027bded2dbSJung-uk Kim         BN_ULONG lo, hi, tt;                    \
5037bded2dbSJung-uk Kim         BN_UMULT_LOHI(lo,hi,ta,tb);             \
5048f1ef87aSJung-uk Kim         c0 += lo; tt = hi + (c0<lo);            \
5058f1ef87aSJung-uk Kim         c1 += tt; c2 += (c1<tt);                \
5068f1ef87aSJung-uk Kim         c0 += lo; hi += (c0<lo);                \
5078f1ef87aSJung-uk Kim         c1 += hi; c2 += (c1<hi);                \
5087bded2dbSJung-uk Kim         } while(0)
5093b4e3dcbSSimon L. B. Nielsen 
5107bded2dbSJung-uk Kim #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
5113b4e3dcbSSimon L. B. Nielsen         BN_ULONG ta = (a)[i];                   \
5127bded2dbSJung-uk Kim         BN_ULONG lo, hi;                        \
5137bded2dbSJung-uk Kim         BN_UMULT_LOHI(lo,hi,ta,ta);             \
5148f1ef87aSJung-uk Kim         c0 += lo; hi += (c0<lo);                \
5158f1ef87aSJung-uk Kim         c1 += hi; c2 += (c1<hi);                \
5167bded2dbSJung-uk Kim         } while(0)
5173b4e3dcbSSimon L. B. Nielsen 
5183b4e3dcbSSimon L. B. Nielsen #  define sqr_add_c2(a,i,j,c0,c1,c2)    \
5193b4e3dcbSSimon L. B. Nielsen         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
5203b4e3dcbSSimon L. B. Nielsen 
521f579bf8eSKris Kennaway # elif defined(BN_UMULT_HIGH)
5227bded2dbSJung-uk Kim /*
5237bded2dbSJung-uk Kim  * Keep in mind that additions to hi can not overflow, because
5247bded2dbSJung-uk Kim  * the high word of a multiplication result cannot be all-ones.
5257bded2dbSJung-uk Kim  */
5267bded2dbSJung-uk Kim #  define mul_add_c(a,b,c0,c1,c2)       do {    \
527f579bf8eSKris Kennaway         BN_ULONG ta = (a), tb = (b);            \
5287bded2dbSJung-uk Kim         BN_ULONG lo = ta * tb;                  \
5297bded2dbSJung-uk Kim         BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
5308f1ef87aSJung-uk Kim         c0 += lo; hi += (c0<lo);                \
5318f1ef87aSJung-uk Kim         c1 += hi; c2 += (c1<hi);                \
5327bded2dbSJung-uk Kim         } while(0)
533f579bf8eSKris Kennaway 
5347bded2dbSJung-uk Kim #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
5357bded2dbSJung-uk Kim         BN_ULONG ta = (a), tb = (b), tt;        \
5367bded2dbSJung-uk Kim         BN_ULONG lo = ta * tb;                  \
5377bded2dbSJung-uk Kim         BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
5388f1ef87aSJung-uk Kim         c0 += lo; tt = hi + (c0<lo);            \
5398f1ef87aSJung-uk Kim         c1 += tt; c2 += (c1<tt);                \
5408f1ef87aSJung-uk Kim         c0 += lo; hi += (c0<lo);                \
5418f1ef87aSJung-uk Kim         c1 += hi; c2 += (c1<hi);                \
5427bded2dbSJung-uk Kim         } while(0)
543f579bf8eSKris Kennaway 
5447bded2dbSJung-uk Kim #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
545f579bf8eSKris Kennaway         BN_ULONG ta = (a)[i];                   \
5467bded2dbSJung-uk Kim         BN_ULONG lo = ta * ta;                  \
5477bded2dbSJung-uk Kim         BN_ULONG hi = BN_UMULT_HIGH(ta,ta);     \
5488f1ef87aSJung-uk Kim         c0 += lo; hi += (c0<lo);                \
5498f1ef87aSJung-uk Kim         c1 += hi; c2 += (c1<hi);                \
5507bded2dbSJung-uk Kim         } while(0)
551f579bf8eSKris Kennaway 
552f579bf8eSKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2)      \
553f579bf8eSKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
554f579bf8eSKris Kennaway 
555f579bf8eSKris Kennaway # else                          /* !BN_LLONG */
5567bded2dbSJung-uk Kim /*
5577bded2dbSJung-uk Kim  * Keep in mind that additions to hi can not overflow, because
5587bded2dbSJung-uk Kim  * the high word of a multiplication result cannot be all-ones.
5597bded2dbSJung-uk Kim  */
5607bded2dbSJung-uk Kim #  define mul_add_c(a,b,c0,c1,c2)       do {    \
5617bded2dbSJung-uk Kim         BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
5627bded2dbSJung-uk Kim         BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
5637bded2dbSJung-uk Kim         mul64(lo,hi,bl,bh);                     \
5648f1ef87aSJung-uk Kim         c0 = (c0+lo)&BN_MASK2; hi += (c0<lo);   \
5658f1ef87aSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
5667bded2dbSJung-uk Kim         } while(0)
56774664626SKris Kennaway 
5687bded2dbSJung-uk Kim #  define mul_add_c2(a,b,c0,c1,c2)      do {    \
5697bded2dbSJung-uk Kim         BN_ULONG tt;                            \
5707bded2dbSJung-uk Kim         BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
5717bded2dbSJung-uk Kim         BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
5727bded2dbSJung-uk Kim         mul64(lo,hi,bl,bh);                     \
5737bded2dbSJung-uk Kim         tt = hi;                                \
5748f1ef87aSJung-uk Kim         c0 = (c0+lo)&BN_MASK2; tt += (c0<lo);   \
5758f1ef87aSJung-uk Kim         c1 = (c1+tt)&BN_MASK2; c2 += (c1<tt);   \
5768f1ef87aSJung-uk Kim         c0 = (c0+lo)&BN_MASK2; hi += (c0<lo);   \
5778f1ef87aSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
5787bded2dbSJung-uk Kim         } while(0)
57974664626SKris Kennaway 
5807bded2dbSJung-uk Kim #  define sqr_add_c(a,i,c0,c1,c2)       do {    \
5817bded2dbSJung-uk Kim         BN_ULONG lo, hi;                        \
5827bded2dbSJung-uk Kim         sqr64(lo,hi,(a)[i]);                    \
5838f1ef87aSJung-uk Kim         c0 = (c0+lo)&BN_MASK2; hi += (c0<lo);   \
5848f1ef87aSJung-uk Kim         c1 = (c1+hi)&BN_MASK2; c2 += (c1<hi);   \
5857bded2dbSJung-uk Kim         } while(0)
58674664626SKris Kennaway 
58774664626SKris Kennaway #  define sqr_add_c2(a,i,j,c0,c1,c2) \
58874664626SKris Kennaway         mul_add_c2((a)[i],(a)[j],c0,c1,c2)
589f579bf8eSKris Kennaway # endif                         /* !BN_LLONG */
59074664626SKris Kennaway 
bn_mul_comba8(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)59174664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
59274664626SKris Kennaway {
59374664626SKris Kennaway     BN_ULONG c1, c2, c3;
59474664626SKris Kennaway 
59574664626SKris Kennaway     c1 = 0;
59674664626SKris Kennaway     c2 = 0;
59774664626SKris Kennaway     c3 = 0;
59874664626SKris Kennaway     mul_add_c(a[0], b[0], c1, c2, c3);
59974664626SKris Kennaway     r[0] = c1;
60074664626SKris Kennaway     c1 = 0;
60174664626SKris Kennaway     mul_add_c(a[0], b[1], c2, c3, c1);
60274664626SKris Kennaway     mul_add_c(a[1], b[0], c2, c3, c1);
60374664626SKris Kennaway     r[1] = c2;
60474664626SKris Kennaway     c2 = 0;
60574664626SKris Kennaway     mul_add_c(a[2], b[0], c3, c1, c2);
60674664626SKris Kennaway     mul_add_c(a[1], b[1], c3, c1, c2);
60774664626SKris Kennaway     mul_add_c(a[0], b[2], c3, c1, c2);
60874664626SKris Kennaway     r[2] = c3;
60974664626SKris Kennaway     c3 = 0;
61074664626SKris Kennaway     mul_add_c(a[0], b[3], c1, c2, c3);
61174664626SKris Kennaway     mul_add_c(a[1], b[2], c1, c2, c3);
61274664626SKris Kennaway     mul_add_c(a[2], b[1], c1, c2, c3);
61374664626SKris Kennaway     mul_add_c(a[3], b[0], c1, c2, c3);
61474664626SKris Kennaway     r[3] = c1;
61574664626SKris Kennaway     c1 = 0;
61674664626SKris Kennaway     mul_add_c(a[4], b[0], c2, c3, c1);
61774664626SKris Kennaway     mul_add_c(a[3], b[1], c2, c3, c1);
61874664626SKris Kennaway     mul_add_c(a[2], b[2], c2, c3, c1);
61974664626SKris Kennaway     mul_add_c(a[1], b[3], c2, c3, c1);
62074664626SKris Kennaway     mul_add_c(a[0], b[4], c2, c3, c1);
62174664626SKris Kennaway     r[4] = c2;
62274664626SKris Kennaway     c2 = 0;
62374664626SKris Kennaway     mul_add_c(a[0], b[5], c3, c1, c2);
62474664626SKris Kennaway     mul_add_c(a[1], b[4], c3, c1, c2);
62574664626SKris Kennaway     mul_add_c(a[2], b[3], c3, c1, c2);
62674664626SKris Kennaway     mul_add_c(a[3], b[2], c3, c1, c2);
62774664626SKris Kennaway     mul_add_c(a[4], b[1], c3, c1, c2);
62874664626SKris Kennaway     mul_add_c(a[5], b[0], c3, c1, c2);
62974664626SKris Kennaway     r[5] = c3;
63074664626SKris Kennaway     c3 = 0;
63174664626SKris Kennaway     mul_add_c(a[6], b[0], c1, c2, c3);
63274664626SKris Kennaway     mul_add_c(a[5], b[1], c1, c2, c3);
63374664626SKris Kennaway     mul_add_c(a[4], b[2], c1, c2, c3);
63474664626SKris Kennaway     mul_add_c(a[3], b[3], c1, c2, c3);
63574664626SKris Kennaway     mul_add_c(a[2], b[4], c1, c2, c3);
63674664626SKris Kennaway     mul_add_c(a[1], b[5], c1, c2, c3);
63774664626SKris Kennaway     mul_add_c(a[0], b[6], c1, c2, c3);
63874664626SKris Kennaway     r[6] = c1;
63974664626SKris Kennaway     c1 = 0;
64074664626SKris Kennaway     mul_add_c(a[0], b[7], c2, c3, c1);
64174664626SKris Kennaway     mul_add_c(a[1], b[6], c2, c3, c1);
64274664626SKris Kennaway     mul_add_c(a[2], b[5], c2, c3, c1);
64374664626SKris Kennaway     mul_add_c(a[3], b[4], c2, c3, c1);
64474664626SKris Kennaway     mul_add_c(a[4], b[3], c2, c3, c1);
64574664626SKris Kennaway     mul_add_c(a[5], b[2], c2, c3, c1);
64674664626SKris Kennaway     mul_add_c(a[6], b[1], c2, c3, c1);
64774664626SKris Kennaway     mul_add_c(a[7], b[0], c2, c3, c1);
64874664626SKris Kennaway     r[7] = c2;
64974664626SKris Kennaway     c2 = 0;
65074664626SKris Kennaway     mul_add_c(a[7], b[1], c3, c1, c2);
65174664626SKris Kennaway     mul_add_c(a[6], b[2], c3, c1, c2);
65274664626SKris Kennaway     mul_add_c(a[5], b[3], c3, c1, c2);
65374664626SKris Kennaway     mul_add_c(a[4], b[4], c3, c1, c2);
65474664626SKris Kennaway     mul_add_c(a[3], b[5], c3, c1, c2);
65574664626SKris Kennaway     mul_add_c(a[2], b[6], c3, c1, c2);
65674664626SKris Kennaway     mul_add_c(a[1], b[7], c3, c1, c2);
65774664626SKris Kennaway     r[8] = c3;
65874664626SKris Kennaway     c3 = 0;
65974664626SKris Kennaway     mul_add_c(a[2], b[7], c1, c2, c3);
66074664626SKris Kennaway     mul_add_c(a[3], b[6], c1, c2, c3);
66174664626SKris Kennaway     mul_add_c(a[4], b[5], c1, c2, c3);
66274664626SKris Kennaway     mul_add_c(a[5], b[4], c1, c2, c3);
66374664626SKris Kennaway     mul_add_c(a[6], b[3], c1, c2, c3);
66474664626SKris Kennaway     mul_add_c(a[7], b[2], c1, c2, c3);
66574664626SKris Kennaway     r[9] = c1;
66674664626SKris Kennaway     c1 = 0;
66774664626SKris Kennaway     mul_add_c(a[7], b[3], c2, c3, c1);
66874664626SKris Kennaway     mul_add_c(a[6], b[4], c2, c3, c1);
66974664626SKris Kennaway     mul_add_c(a[5], b[5], c2, c3, c1);
67074664626SKris Kennaway     mul_add_c(a[4], b[6], c2, c3, c1);
67174664626SKris Kennaway     mul_add_c(a[3], b[7], c2, c3, c1);
67274664626SKris Kennaway     r[10] = c2;
67374664626SKris Kennaway     c2 = 0;
67474664626SKris Kennaway     mul_add_c(a[4], b[7], c3, c1, c2);
67574664626SKris Kennaway     mul_add_c(a[5], b[6], c3, c1, c2);
67674664626SKris Kennaway     mul_add_c(a[6], b[5], c3, c1, c2);
67774664626SKris Kennaway     mul_add_c(a[7], b[4], c3, c1, c2);
67874664626SKris Kennaway     r[11] = c3;
67974664626SKris Kennaway     c3 = 0;
68074664626SKris Kennaway     mul_add_c(a[7], b[5], c1, c2, c3);
68174664626SKris Kennaway     mul_add_c(a[6], b[6], c1, c2, c3);
68274664626SKris Kennaway     mul_add_c(a[5], b[7], c1, c2, c3);
68374664626SKris Kennaway     r[12] = c1;
68474664626SKris Kennaway     c1 = 0;
68574664626SKris Kennaway     mul_add_c(a[6], b[7], c2, c3, c1);
68674664626SKris Kennaway     mul_add_c(a[7], b[6], c2, c3, c1);
68774664626SKris Kennaway     r[13] = c2;
68874664626SKris Kennaway     c2 = 0;
68974664626SKris Kennaway     mul_add_c(a[7], b[7], c3, c1, c2);
69074664626SKris Kennaway     r[14] = c3;
69174664626SKris Kennaway     r[15] = c1;
69274664626SKris Kennaway }
69374664626SKris Kennaway 
bn_mul_comba4(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)69474664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
69574664626SKris Kennaway {
69674664626SKris Kennaway     BN_ULONG c1, c2, c3;
69774664626SKris Kennaway 
69874664626SKris Kennaway     c1 = 0;
69974664626SKris Kennaway     c2 = 0;
70074664626SKris Kennaway     c3 = 0;
70174664626SKris Kennaway     mul_add_c(a[0], b[0], c1, c2, c3);
70274664626SKris Kennaway     r[0] = c1;
70374664626SKris Kennaway     c1 = 0;
70474664626SKris Kennaway     mul_add_c(a[0], b[1], c2, c3, c1);
70574664626SKris Kennaway     mul_add_c(a[1], b[0], c2, c3, c1);
70674664626SKris Kennaway     r[1] = c2;
70774664626SKris Kennaway     c2 = 0;
70874664626SKris Kennaway     mul_add_c(a[2], b[0], c3, c1, c2);
70974664626SKris Kennaway     mul_add_c(a[1], b[1], c3, c1, c2);
71074664626SKris Kennaway     mul_add_c(a[0], b[2], c3, c1, c2);
71174664626SKris Kennaway     r[2] = c3;
71274664626SKris Kennaway     c3 = 0;
71374664626SKris Kennaway     mul_add_c(a[0], b[3], c1, c2, c3);
71474664626SKris Kennaway     mul_add_c(a[1], b[2], c1, c2, c3);
71574664626SKris Kennaway     mul_add_c(a[2], b[1], c1, c2, c3);
71674664626SKris Kennaway     mul_add_c(a[3], b[0], c1, c2, c3);
71774664626SKris Kennaway     r[3] = c1;
71874664626SKris Kennaway     c1 = 0;
71974664626SKris Kennaway     mul_add_c(a[3], b[1], c2, c3, c1);
72074664626SKris Kennaway     mul_add_c(a[2], b[2], c2, c3, c1);
72174664626SKris Kennaway     mul_add_c(a[1], b[3], c2, c3, c1);
72274664626SKris Kennaway     r[4] = c2;
72374664626SKris Kennaway     c2 = 0;
72474664626SKris Kennaway     mul_add_c(a[2], b[3], c3, c1, c2);
72574664626SKris Kennaway     mul_add_c(a[3], b[2], c3, c1, c2);
72674664626SKris Kennaway     r[5] = c3;
72774664626SKris Kennaway     c3 = 0;
72874664626SKris Kennaway     mul_add_c(a[3], b[3], c1, c2, c3);
72974664626SKris Kennaway     r[6] = c1;
73074664626SKris Kennaway     r[7] = c2;
73174664626SKris Kennaway }
73274664626SKris Kennaway 
bn_sqr_comba8(BN_ULONG * r,const BN_ULONG * a)7335c87c606SMark Murray void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
73474664626SKris Kennaway {
73574664626SKris Kennaway     BN_ULONG c1, c2, c3;
73674664626SKris Kennaway 
73774664626SKris Kennaway     c1 = 0;
73874664626SKris Kennaway     c2 = 0;
73974664626SKris Kennaway     c3 = 0;
74074664626SKris Kennaway     sqr_add_c(a, 0, c1, c2, c3);
74174664626SKris Kennaway     r[0] = c1;
74274664626SKris Kennaway     c1 = 0;
74374664626SKris Kennaway     sqr_add_c2(a, 1, 0, c2, c3, c1);
74474664626SKris Kennaway     r[1] = c2;
74574664626SKris Kennaway     c2 = 0;
74674664626SKris Kennaway     sqr_add_c(a, 1, c3, c1, c2);
74774664626SKris Kennaway     sqr_add_c2(a, 2, 0, c3, c1, c2);
74874664626SKris Kennaway     r[2] = c3;
74974664626SKris Kennaway     c3 = 0;
75074664626SKris Kennaway     sqr_add_c2(a, 3, 0, c1, c2, c3);
75174664626SKris Kennaway     sqr_add_c2(a, 2, 1, c1, c2, c3);
75274664626SKris Kennaway     r[3] = c1;
75374664626SKris Kennaway     c1 = 0;
75474664626SKris Kennaway     sqr_add_c(a, 2, c2, c3, c1);
75574664626SKris Kennaway     sqr_add_c2(a, 3, 1, c2, c3, c1);
75674664626SKris Kennaway     sqr_add_c2(a, 4, 0, c2, c3, c1);
75774664626SKris Kennaway     r[4] = c2;
75874664626SKris Kennaway     c2 = 0;
75974664626SKris Kennaway     sqr_add_c2(a, 5, 0, c3, c1, c2);
76074664626SKris Kennaway     sqr_add_c2(a, 4, 1, c3, c1, c2);
76174664626SKris Kennaway     sqr_add_c2(a, 3, 2, c3, c1, c2);
76274664626SKris Kennaway     r[5] = c3;
76374664626SKris Kennaway     c3 = 0;
76474664626SKris Kennaway     sqr_add_c(a, 3, c1, c2, c3);
76574664626SKris Kennaway     sqr_add_c2(a, 4, 2, c1, c2, c3);
76674664626SKris Kennaway     sqr_add_c2(a, 5, 1, c1, c2, c3);
76774664626SKris Kennaway     sqr_add_c2(a, 6, 0, c1, c2, c3);
76874664626SKris Kennaway     r[6] = c1;
76974664626SKris Kennaway     c1 = 0;
77074664626SKris Kennaway     sqr_add_c2(a, 7, 0, c2, c3, c1);
77174664626SKris Kennaway     sqr_add_c2(a, 6, 1, c2, c3, c1);
77274664626SKris Kennaway     sqr_add_c2(a, 5, 2, c2, c3, c1);
77374664626SKris Kennaway     sqr_add_c2(a, 4, 3, c2, c3, c1);
77474664626SKris Kennaway     r[7] = c2;
77574664626SKris Kennaway     c2 = 0;
77674664626SKris Kennaway     sqr_add_c(a, 4, c3, c1, c2);
77774664626SKris Kennaway     sqr_add_c2(a, 5, 3, c3, c1, c2);
77874664626SKris Kennaway     sqr_add_c2(a, 6, 2, c3, c1, c2);
77974664626SKris Kennaway     sqr_add_c2(a, 7, 1, c3, c1, c2);
78074664626SKris Kennaway     r[8] = c3;
78174664626SKris Kennaway     c3 = 0;
78274664626SKris Kennaway     sqr_add_c2(a, 7, 2, c1, c2, c3);
78374664626SKris Kennaway     sqr_add_c2(a, 6, 3, c1, c2, c3);
78474664626SKris Kennaway     sqr_add_c2(a, 5, 4, c1, c2, c3);
78574664626SKris Kennaway     r[9] = c1;
78674664626SKris Kennaway     c1 = 0;
78774664626SKris Kennaway     sqr_add_c(a, 5, c2, c3, c1);
78874664626SKris Kennaway     sqr_add_c2(a, 6, 4, c2, c3, c1);
78974664626SKris Kennaway     sqr_add_c2(a, 7, 3, c2, c3, c1);
79074664626SKris Kennaway     r[10] = c2;
79174664626SKris Kennaway     c2 = 0;
79274664626SKris Kennaway     sqr_add_c2(a, 7, 4, c3, c1, c2);
79374664626SKris Kennaway     sqr_add_c2(a, 6, 5, c3, c1, c2);
79474664626SKris Kennaway     r[11] = c3;
79574664626SKris Kennaway     c3 = 0;
79674664626SKris Kennaway     sqr_add_c(a, 6, c1, c2, c3);
79774664626SKris Kennaway     sqr_add_c2(a, 7, 5, c1, c2, c3);
79874664626SKris Kennaway     r[12] = c1;
79974664626SKris Kennaway     c1 = 0;
80074664626SKris Kennaway     sqr_add_c2(a, 7, 6, c2, c3, c1);
80174664626SKris Kennaway     r[13] = c2;
80274664626SKris Kennaway     c2 = 0;
80374664626SKris Kennaway     sqr_add_c(a, 7, c3, c1, c2);
80474664626SKris Kennaway     r[14] = c3;
80574664626SKris Kennaway     r[15] = c1;
80674664626SKris Kennaway }
80774664626SKris Kennaway 
bn_sqr_comba4(BN_ULONG * r,const BN_ULONG * a)8085c87c606SMark Murray void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
80974664626SKris Kennaway {
81074664626SKris Kennaway     BN_ULONG c1, c2, c3;
81174664626SKris Kennaway 
81274664626SKris Kennaway     c1 = 0;
81374664626SKris Kennaway     c2 = 0;
81474664626SKris Kennaway     c3 = 0;
81574664626SKris Kennaway     sqr_add_c(a, 0, c1, c2, c3);
81674664626SKris Kennaway     r[0] = c1;
81774664626SKris Kennaway     c1 = 0;
81874664626SKris Kennaway     sqr_add_c2(a, 1, 0, c2, c3, c1);
81974664626SKris Kennaway     r[1] = c2;
82074664626SKris Kennaway     c2 = 0;
82174664626SKris Kennaway     sqr_add_c(a, 1, c3, c1, c2);
82274664626SKris Kennaway     sqr_add_c2(a, 2, 0, c3, c1, c2);
82374664626SKris Kennaway     r[2] = c3;
82474664626SKris Kennaway     c3 = 0;
82574664626SKris Kennaway     sqr_add_c2(a, 3, 0, c1, c2, c3);
82674664626SKris Kennaway     sqr_add_c2(a, 2, 1, c1, c2, c3);
82774664626SKris Kennaway     r[3] = c1;
82874664626SKris Kennaway     c1 = 0;
82974664626SKris Kennaway     sqr_add_c(a, 2, c2, c3, c1);
83074664626SKris Kennaway     sqr_add_c2(a, 3, 1, c2, c3, c1);
83174664626SKris Kennaway     r[4] = c2;
83274664626SKris Kennaway     c2 = 0;
83374664626SKris Kennaway     sqr_add_c2(a, 3, 2, c3, c1, c2);
83474664626SKris Kennaway     r[5] = c3;
83574664626SKris Kennaway     c3 = 0;
83674664626SKris Kennaway     sqr_add_c(a, 3, c1, c2, c3);
83774664626SKris Kennaway     r[6] = c1;
83874664626SKris Kennaway     r[7] = c2;
83974664626SKris Kennaway }
8401f13597dSJung-uk Kim 
8411f13597dSJung-uk Kim # ifdef OPENSSL_NO_ASM
8421f13597dSJung-uk Kim #  ifdef OPENSSL_BN_ASM_MONT
8431f13597dSJung-uk Kim #   include <alloca.h>
8441f13597dSJung-uk Kim /*
8451f13597dSJung-uk Kim  * This is essentially reference implementation, which may or may not
8461f13597dSJung-uk Kim  * result in performance improvement. E.g. on IA-32 this routine was
8471f13597dSJung-uk Kim  * observed to give 40% faster rsa1024 private key operations and 10%
8481f13597dSJung-uk Kim  * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
8491f13597dSJung-uk Kim  * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
8501f13597dSJung-uk Kim  * reference implementation, one to be used as starting point for
8511f13597dSJung-uk Kim  * platform-specific assembler. Mentioned numbers apply to compiler
8521f13597dSJung-uk Kim  * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
8531f13597dSJung-uk Kim  * can vary not only from platform to platform, but even for compiler
8541f13597dSJung-uk Kim  * versions. Assembler vs. assembler improvement coefficients can
8551f13597dSJung-uk Kim  * [and are known to] differ and are to be documented elsewhere.
8561f13597dSJung-uk Kim  */
bn_mul_mont(BN_ULONG * rp,const BN_ULONG * ap,const BN_ULONG * bp,const BN_ULONG * np,const BN_ULONG * n0p,int num)8576f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
8586f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0p, int num)
8591f13597dSJung-uk Kim {
8601f13597dSJung-uk Kim     BN_ULONG c0, c1, ml, *tp, n0;
8611f13597dSJung-uk Kim #   ifdef mul64
8621f13597dSJung-uk Kim     BN_ULONG mh;
8631f13597dSJung-uk Kim #   endif
8641f13597dSJung-uk Kim     volatile BN_ULONG *vp;
8651f13597dSJung-uk Kim     int i = 0, j;
8661f13597dSJung-uk Kim 
8676f9291ceSJung-uk Kim #   if 0                        /* template for platform-specific
8686f9291ceSJung-uk Kim                                  * implementation */
8696f9291ceSJung-uk Kim     if (ap == bp)
8706f9291ceSJung-uk Kim         return bn_sqr_mont(rp, ap, np, n0p, num);
8711f13597dSJung-uk Kim #   endif
8721f13597dSJung-uk Kim     vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
8731f13597dSJung-uk Kim 
8741f13597dSJung-uk Kim     n0 = *n0p;
8751f13597dSJung-uk Kim 
8761f13597dSJung-uk Kim     c0 = 0;
8771f13597dSJung-uk Kim     ml = bp[0];
8781f13597dSJung-uk Kim #   ifdef mul64
8791f13597dSJung-uk Kim     mh = HBITS(ml);
8801f13597dSJung-uk Kim     ml = LBITS(ml);
8811f13597dSJung-uk Kim     for (j = 0; j < num; ++j)
8821f13597dSJung-uk Kim         mul(tp[j], ap[j], ml, mh, c0);
8831f13597dSJung-uk Kim #   else
8841f13597dSJung-uk Kim     for (j = 0; j < num; ++j)
8851f13597dSJung-uk Kim         mul(tp[j], ap[j], ml, c0);
8861f13597dSJung-uk Kim #   endif
8871f13597dSJung-uk Kim 
8881f13597dSJung-uk Kim     tp[num] = c0;
8891f13597dSJung-uk Kim     tp[num + 1] = 0;
8901f13597dSJung-uk Kim     goto enter;
8911f13597dSJung-uk Kim 
8926f9291ceSJung-uk Kim     for (i = 0; i < num; i++) {
8931f13597dSJung-uk Kim         c0 = 0;
8941f13597dSJung-uk Kim         ml = bp[i];
8951f13597dSJung-uk Kim #   ifdef mul64
8961f13597dSJung-uk Kim         mh = HBITS(ml);
8971f13597dSJung-uk Kim         ml = LBITS(ml);
8981f13597dSJung-uk Kim         for (j = 0; j < num; ++j)
8991f13597dSJung-uk Kim             mul_add(tp[j], ap[j], ml, mh, c0);
9001f13597dSJung-uk Kim #   else
9011f13597dSJung-uk Kim         for (j = 0; j < num; ++j)
9021f13597dSJung-uk Kim             mul_add(tp[j], ap[j], ml, c0);
9031f13597dSJung-uk Kim #   endif
9041f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
9051f13597dSJung-uk Kim         tp[num] = c1;
9061f13597dSJung-uk Kim         tp[num + 1] = (c1 < c0 ? 1 : 0);
9071f13597dSJung-uk Kim  enter:
9081f13597dSJung-uk Kim         c1 = tp[0];
9091f13597dSJung-uk Kim         ml = (c1 * n0) & BN_MASK2;
9101f13597dSJung-uk Kim         c0 = 0;
9111f13597dSJung-uk Kim #   ifdef mul64
9121f13597dSJung-uk Kim         mh = HBITS(ml);
9131f13597dSJung-uk Kim         ml = LBITS(ml);
9141f13597dSJung-uk Kim         mul_add(c1, np[0], ml, mh, c0);
9151f13597dSJung-uk Kim #   else
9161f13597dSJung-uk Kim         mul_add(c1, ml, np[0], c0);
9171f13597dSJung-uk Kim #   endif
9186f9291ceSJung-uk Kim         for (j = 1; j < num; j++) {
9191f13597dSJung-uk Kim             c1 = tp[j];
9201f13597dSJung-uk Kim #   ifdef mul64
9211f13597dSJung-uk Kim             mul_add(c1, np[j], ml, mh, c0);
9221f13597dSJung-uk Kim #   else
9231f13597dSJung-uk Kim             mul_add(c1, ml, np[j], c0);
9241f13597dSJung-uk Kim #   endif
9251f13597dSJung-uk Kim             tp[j - 1] = c1 & BN_MASK2;
9261f13597dSJung-uk Kim         }
9271f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
9281f13597dSJung-uk Kim         tp[num - 1] = c1;
9291f13597dSJung-uk Kim         tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
9301f13597dSJung-uk Kim     }
9311f13597dSJung-uk Kim 
9326f9291ceSJung-uk Kim     if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
9331f13597dSJung-uk Kim         c0 = bn_sub_words(rp, tp, np, num);
9346f9291ceSJung-uk Kim         if (tp[num] != 0 || c0 == 0) {
9356f9291ceSJung-uk Kim             for (i = 0; i < num + 2; i++)
9366f9291ceSJung-uk Kim                 vp[i] = 0;
9371f13597dSJung-uk Kim             return 1;
9381f13597dSJung-uk Kim         }
9391f13597dSJung-uk Kim     }
9406f9291ceSJung-uk Kim     for (i = 0; i < num; i++)
9416f9291ceSJung-uk Kim         rp[i] = tp[i], vp[i] = 0;
9421f13597dSJung-uk Kim     vp[num] = 0;
9431f13597dSJung-uk Kim     vp[num + 1] = 0;
9441f13597dSJung-uk Kim     return 1;
9451f13597dSJung-uk Kim }
9461f13597dSJung-uk Kim #  else
9471f13597dSJung-uk Kim /*
9481f13597dSJung-uk Kim  * Return value of 0 indicates that multiplication/convolution was not
9491f13597dSJung-uk Kim  * performed to signal the caller to fall down to alternative/original
9501f13597dSJung-uk Kim  * code-path.
9511f13597dSJung-uk Kim  */
bn_mul_mont(BN_ULONG * rp,const BN_ULONG * ap,const BN_ULONG * bp,const BN_ULONG * np,const BN_ULONG * n0,int num)9526f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
9536f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0, int num)
9546f9291ceSJung-uk Kim {
9556f9291ceSJung-uk Kim     return 0;
9566f9291ceSJung-uk Kim }
9571f13597dSJung-uk Kim #  endif                        /* OPENSSL_BN_ASM_MONT */
9581f13597dSJung-uk Kim # endif
9591f13597dSJung-uk Kim 
960f579bf8eSKris Kennaway #else                           /* !BN_MUL_COMBA */
96174664626SKris Kennaway 
96274664626SKris Kennaway /* hmm... is it faster just to do a multiply? */
96374664626SKris Kennaway # undef bn_sqr_comba4
964e71b7053SJung-uk Kim # undef bn_sqr_comba8
bn_sqr_comba4(BN_ULONG * r,const BN_ULONG * a)9651f13597dSJung-uk Kim void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
96674664626SKris Kennaway {
96774664626SKris Kennaway     BN_ULONG t[8];
96874664626SKris Kennaway     bn_sqr_normal(r, a, 4, t);
96974664626SKris Kennaway }
97074664626SKris Kennaway 
bn_sqr_comba8(BN_ULONG * r,const BN_ULONG * a)9711f13597dSJung-uk Kim void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
97274664626SKris Kennaway {
97374664626SKris Kennaway     BN_ULONG t[16];
97474664626SKris Kennaway     bn_sqr_normal(r, a, 8, t);
97574664626SKris Kennaway }
97674664626SKris Kennaway 
bn_mul_comba4(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)97774664626SKris Kennaway void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
97874664626SKris Kennaway {
97974664626SKris Kennaway     r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
98074664626SKris Kennaway     r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
98174664626SKris Kennaway     r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]);
98274664626SKris Kennaway     r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
98374664626SKris Kennaway }
98474664626SKris Kennaway 
bn_mul_comba8(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b)98574664626SKris Kennaway void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
98674664626SKris Kennaway {
98774664626SKris Kennaway     r[8] = bn_mul_words(&(r[0]), a, 8, b[0]);
98874664626SKris Kennaway     r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
98974664626SKris Kennaway     r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]);
99074664626SKris Kennaway     r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]);
99174664626SKris Kennaway     r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]);
99274664626SKris Kennaway     r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]);
99374664626SKris Kennaway     r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]);
99474664626SKris Kennaway     r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]);
99574664626SKris Kennaway }
99674664626SKris Kennaway 
9971f13597dSJung-uk Kim # ifdef OPENSSL_NO_ASM
9981f13597dSJung-uk Kim #  ifdef OPENSSL_BN_ASM_MONT
9991f13597dSJung-uk Kim #   include <alloca.h>
bn_mul_mont(BN_ULONG * rp,const BN_ULONG * ap,const BN_ULONG * bp,const BN_ULONG * np,const BN_ULONG * n0p,int num)10006f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
10016f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0p, int num)
10021f13597dSJung-uk Kim {
10031f13597dSJung-uk Kim     BN_ULONG c0, c1, *tp, n0 = *n0p;
10041f13597dSJung-uk Kim     volatile BN_ULONG *vp;
10051f13597dSJung-uk Kim     int i = 0, j;
10061f13597dSJung-uk Kim 
10071f13597dSJung-uk Kim     vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
10081f13597dSJung-uk Kim 
10096f9291ceSJung-uk Kim     for (i = 0; i <= num; i++)
10106f9291ceSJung-uk Kim         tp[i] = 0;
10111f13597dSJung-uk Kim 
10126f9291ceSJung-uk Kim     for (i = 0; i < num; i++) {
10131f13597dSJung-uk Kim         c0 = bn_mul_add_words(tp, ap, num, bp[i]);
10141f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
10151f13597dSJung-uk Kim         tp[num] = c1;
10161f13597dSJung-uk Kim         tp[num + 1] = (c1 < c0 ? 1 : 0);
10171f13597dSJung-uk Kim 
10181f13597dSJung-uk Kim         c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
10191f13597dSJung-uk Kim         c1 = (tp[num] + c0) & BN_MASK2;
10201f13597dSJung-uk Kim         tp[num] = c1;
10211f13597dSJung-uk Kim         tp[num + 1] += (c1 < c0 ? 1 : 0);
10226f9291ceSJung-uk Kim         for (j = 0; j <= num; j++)
10236f9291ceSJung-uk Kim             tp[j] = tp[j + 1];
10241f13597dSJung-uk Kim     }
10251f13597dSJung-uk Kim 
10266f9291ceSJung-uk Kim     if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
10271f13597dSJung-uk Kim         c0 = bn_sub_words(rp, tp, np, num);
10286f9291ceSJung-uk Kim         if (tp[num] != 0 || c0 == 0) {
10296f9291ceSJung-uk Kim             for (i = 0; i < num + 2; i++)
10306f9291ceSJung-uk Kim                 vp[i] = 0;
10311f13597dSJung-uk Kim             return 1;
10321f13597dSJung-uk Kim         }
10331f13597dSJung-uk Kim     }
10346f9291ceSJung-uk Kim     for (i = 0; i < num; i++)
10356f9291ceSJung-uk Kim         rp[i] = tp[i], vp[i] = 0;
10361f13597dSJung-uk Kim     vp[num] = 0;
10371f13597dSJung-uk Kim     vp[num + 1] = 0;
10381f13597dSJung-uk Kim     return 1;
10391f13597dSJung-uk Kim }
10401f13597dSJung-uk Kim #  else
bn_mul_mont(BN_ULONG * rp,const BN_ULONG * ap,const BN_ULONG * bp,const BN_ULONG * np,const BN_ULONG * n0,int num)10416f9291ceSJung-uk Kim int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
10426f9291ceSJung-uk Kim                 const BN_ULONG *np, const BN_ULONG *n0, int num)
10436f9291ceSJung-uk Kim {
10446f9291ceSJung-uk Kim     return 0;
10456f9291ceSJung-uk Kim }
10461f13597dSJung-uk Kim #  endif                        /* OPENSSL_BN_ASM_MONT */
10471f13597dSJung-uk Kim # endif
10481f13597dSJung-uk Kim 
1049f579bf8eSKris Kennaway #endif                          /* !BN_MUL_COMBA */
1050