14a1767b4Smrg /* mpn_powm -- Compute R = U^E mod M.
24a1767b4Smrg
34a1767b4Smrg Contributed to the GNU project by Torbjorn Granlund.
44a1767b4Smrg
54a1767b4Smrg THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
64a1767b4Smrg SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
74a1767b4Smrg GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
84a1767b4Smrg
9*671ea119Smrg Copyright 2007-2012, 2019 Free Software Foundation, Inc.
104a1767b4Smrg
114a1767b4Smrg This file is part of the GNU MP Library.
124a1767b4Smrg
134a1767b4Smrg The GNU MP Library is free software; you can redistribute it and/or modify
14f81b1c5bSmrg it under the terms of either:
15f81b1c5bSmrg
16f81b1c5bSmrg * the GNU Lesser General Public License as published by the Free
17f81b1c5bSmrg Software Foundation; either version 3 of the License, or (at your
184a1767b4Smrg option) any later version.
194a1767b4Smrg
20f81b1c5bSmrg or
21f81b1c5bSmrg
22f81b1c5bSmrg * the GNU General Public License as published by the Free Software
23f81b1c5bSmrg Foundation; either version 2 of the License, or (at your option) any
24f81b1c5bSmrg later version.
25f81b1c5bSmrg
26f81b1c5bSmrg or both in parallel, as here.
27f81b1c5bSmrg
284a1767b4Smrg The GNU MP Library is distributed in the hope that it will be useful, but
294a1767b4Smrg WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30f81b1c5bSmrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
31f81b1c5bSmrg for more details.
324a1767b4Smrg
33f81b1c5bSmrg You should have received copies of the GNU General Public License and the
34f81b1c5bSmrg GNU Lesser General Public License along with the GNU MP Library. If not,
35f81b1c5bSmrg see https://www.gnu.org/licenses/. */
364a1767b4Smrg
374a1767b4Smrg
384a1767b4Smrg /*
394a1767b4Smrg BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
404a1767b4Smrg
414a1767b4Smrg 1. W <- U
424a1767b4Smrg
434a1767b4Smrg 2. T <- (B^n * U) mod M Convert to REDC form
444a1767b4Smrg
454a1767b4Smrg 3. Compute table U^1, U^3, U^5... of E-dependent size
464a1767b4Smrg
474a1767b4Smrg 4. While there are more bits in E
484a1767b4Smrg W <- power left-to-right base-k
494a1767b4Smrg
504a1767b4Smrg
514a1767b4Smrg TODO:
524a1767b4Smrg
534a1767b4Smrg * Make getbits a macro, thereby allowing it to update the index operand.
544a1767b4Smrg That will simplify the code using getbits. (Perhaps make getbits' sibling
554a1767b4Smrg getbit then have similar form, for symmetry.)
564a1767b4Smrg
574a1767b4Smrg * Write an itch function. Or perhaps get rid of tp parameter since the huge
584a1767b4Smrg pp area is allocated locally anyway?
594a1767b4Smrg
604a1767b4Smrg * Choose window size without looping. (Superoptimize or think(tm).)
614a1767b4Smrg
624a1767b4Smrg * Handle small bases with initial, reduction-free exponentiation.
634a1767b4Smrg
644a1767b4Smrg * Call new division functions, not mpn_tdiv_qr.
654a1767b4Smrg
664a1767b4Smrg * Consider special code for one-limb M.
674a1767b4Smrg
684a1767b4Smrg * How should we handle the redc1/redc2/redc_n choice?
694a1767b4Smrg - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1))
704a1767b4Smrg - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
714a1767b4Smrg - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
724a1767b4Smrg This disregards the addmul_N constant term, but we could think of
734a1767b4Smrg that as part of the respective mullo.
744a1767b4Smrg
754a1767b4Smrg * When U (the base) is small, we should start the exponentiation with plain
764a1767b4Smrg operations, then convert that partial result to REDC form.
774a1767b4Smrg
784a1767b4Smrg * When U is just one limb, should it be handled without the k-ary tricks?
794a1767b4Smrg We could keep a factor of B^n in W, but use U' = BU as base. After
804a1767b4Smrg multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
814a1767b4Smrg mod M.
824a1767b4Smrg */
834a1767b4Smrg
844a1767b4Smrg #include "gmp-impl.h"
854a1767b4Smrg #include "longlong.h"
864a1767b4Smrg
87*671ea119Smrg #undef MPN_REDC_0
88*671ea119Smrg #define MPN_REDC_0(rp, up, mp, invm) \
89*671ea119Smrg do { \
90*671ea119Smrg mp_limb_t p1, r0, u0, _dummy; \
91*671ea119Smrg u0 = *(up); \
92*671ea119Smrg umul_ppmm (p1, _dummy, *(mp), (u0 * (invm)) & GMP_NUMB_MASK); \
93*671ea119Smrg ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0); \
94*671ea119Smrg p1 += (u0 != 0); \
95*671ea119Smrg r0 = (up)[1] + p1; \
96*671ea119Smrg if (p1 > r0) \
97*671ea119Smrg r0 -= *(mp); \
98*671ea119Smrg *(rp) = r0; \
99*671ea119Smrg } while (0)
100*671ea119Smrg
101d25e02daSmrg #undef MPN_REDC_1
102*671ea119Smrg #if HAVE_NATIVE_mpn_sbpi1_bdiv_r
103*671ea119Smrg #define MPN_REDC_1(rp, up, mp, n, invm) \
104*671ea119Smrg do { \
105*671ea119Smrg mp_limb_t cy; \
106*671ea119Smrg cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm); \
107*671ea119Smrg if (cy != 0) \
108*671ea119Smrg mpn_sub_n (rp, up + n, mp, n); \
109*671ea119Smrg else \
110*671ea119Smrg MPN_COPY (rp, up + n, n); \
111*671ea119Smrg } while (0)
112*671ea119Smrg #else
113d25e02daSmrg #define MPN_REDC_1(rp, up, mp, n, invm) \
114d25e02daSmrg do { \
115d25e02daSmrg mp_limb_t cy; \
116d25e02daSmrg cy = mpn_redc_1 (rp, up, mp, n, invm); \
117d25e02daSmrg if (cy != 0) \
118d25e02daSmrg mpn_sub_n (rp, rp, mp, n); \
119d25e02daSmrg } while (0)
120*671ea119Smrg #endif
121d25e02daSmrg
122d25e02daSmrg #undef MPN_REDC_2
123d25e02daSmrg #define MPN_REDC_2(rp, up, mp, n, mip) \
124d25e02daSmrg do { \
125d25e02daSmrg mp_limb_t cy; \
126d25e02daSmrg cy = mpn_redc_2 (rp, up, mp, n, mip); \
127d25e02daSmrg if (cy != 0) \
128d25e02daSmrg mpn_sub_n (rp, rp, mp, n); \
129d25e02daSmrg } while (0)
130d25e02daSmrg
1314a1767b4Smrg #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
1324a1767b4Smrg #define WANT_REDC_2 1
1334a1767b4Smrg #endif
1344a1767b4Smrg
1354a1767b4Smrg #define getbit(p,bi) \
1364a1767b4Smrg ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
1374a1767b4Smrg
1384a1767b4Smrg static inline mp_limb_t
getbits(const mp_limb_t * p,mp_bitcnt_t bi,int nbits)1394a1767b4Smrg getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
1404a1767b4Smrg {
1414a1767b4Smrg int nbits_in_r;
1424a1767b4Smrg mp_limb_t r;
1434a1767b4Smrg mp_size_t i;
1444a1767b4Smrg
1454a1767b4Smrg if (bi < nbits)
1464a1767b4Smrg {
1474a1767b4Smrg return p[0] & (((mp_limb_t) 1 << bi) - 1);
1484a1767b4Smrg }
1494a1767b4Smrg else
1504a1767b4Smrg {
1514a1767b4Smrg bi -= nbits; /* bit index of low bit to extract */
1524a1767b4Smrg i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
1534a1767b4Smrg bi %= GMP_NUMB_BITS; /* bit index in low word */
1544a1767b4Smrg r = p[i] >> bi; /* extract (low) bits */
1554a1767b4Smrg nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
1564a1767b4Smrg if (nbits_in_r < nbits) /* did we get enough bits? */
1574a1767b4Smrg r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
1584a1767b4Smrg return r & (((mp_limb_t) 1 << nbits) - 1);
1594a1767b4Smrg }
1604a1767b4Smrg }
1614a1767b4Smrg
1624a1767b4Smrg static inline int
win_size(mp_bitcnt_t eb)1634a1767b4Smrg win_size (mp_bitcnt_t eb)
1644a1767b4Smrg {
1654a1767b4Smrg int k;
1664a1767b4Smrg static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
1674a1767b4Smrg for (k = 1; eb > x[k]; k++)
1684a1767b4Smrg ;
1694a1767b4Smrg return k;
1704a1767b4Smrg }
1714a1767b4Smrg
1724a1767b4Smrg /* Convert U to REDC form, U_r = B^n * U mod M */
1734a1767b4Smrg static void
redcify(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr mp,mp_size_t n)1744a1767b4Smrg redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
1754a1767b4Smrg {
1764a1767b4Smrg mp_ptr tp, qp;
1774a1767b4Smrg TMP_DECL;
1784a1767b4Smrg TMP_MARK;
1794a1767b4Smrg
180f81b1c5bSmrg TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
1814a1767b4Smrg
1824a1767b4Smrg MPN_ZERO (tp, n);
1834a1767b4Smrg MPN_COPY (tp + n, up, un);
1844a1767b4Smrg mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
1854a1767b4Smrg TMP_FREE;
1864a1767b4Smrg }
1874a1767b4Smrg
1884a1767b4Smrg /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
1894a1767b4Smrg Requires that mp[n-1..0] is odd.
1904a1767b4Smrg Requires that ep[en-1..0] is > 1.
1914a1767b4Smrg Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
1924a1767b4Smrg void
mpn_powm(mp_ptr rp,mp_srcptr bp,mp_size_t bn,mp_srcptr ep,mp_size_t en,mp_srcptr mp,mp_size_t n,mp_ptr tp)1934a1767b4Smrg mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
1944a1767b4Smrg mp_srcptr ep, mp_size_t en,
1954a1767b4Smrg mp_srcptr mp, mp_size_t n, mp_ptr tp)
1964a1767b4Smrg {
1974a1767b4Smrg mp_limb_t ip[2], *mip;
1984a1767b4Smrg int cnt;
1994a1767b4Smrg mp_bitcnt_t ebi;
2004a1767b4Smrg int windowsize, this_windowsize;
2014a1767b4Smrg mp_limb_t expbits;
2024a1767b4Smrg mp_ptr pp, this_pp;
2034a1767b4Smrg long i;
2044a1767b4Smrg TMP_DECL;
2054a1767b4Smrg
2064a1767b4Smrg ASSERT (en > 1 || (en == 1 && ep[0] > 1));
2074a1767b4Smrg ASSERT (n >= 1 && ((mp[0] & 1) != 0));
2084a1767b4Smrg
2094a1767b4Smrg TMP_MARK;
2104a1767b4Smrg
211d25e02daSmrg MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
2124a1767b4Smrg
2134a1767b4Smrg #if 0
2144a1767b4Smrg if (bn < n)
2154a1767b4Smrg {
2164a1767b4Smrg /* Do the first few exponent bits without mod reductions,
2174a1767b4Smrg until the result is greater than the mod argument. */
2184a1767b4Smrg for (;;)
2194a1767b4Smrg {
2204a1767b4Smrg mpn_sqr (tp, this_pp, tn);
2214a1767b4Smrg tn = tn * 2 - 1, tn += tp[tn] != 0;
2224a1767b4Smrg if (getbit (ep, ebi) != 0)
2234a1767b4Smrg mpn_mul (..., tp, tn, bp, bn);
2244a1767b4Smrg ebi--;
2254a1767b4Smrg }
2264a1767b4Smrg }
2274a1767b4Smrg #endif
2284a1767b4Smrg
2294a1767b4Smrg windowsize = win_size (ebi);
2304a1767b4Smrg
2314a1767b4Smrg #if WANT_REDC_2
2324a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
2334a1767b4Smrg {
2344a1767b4Smrg mip = ip;
2354a1767b4Smrg binvert_limb (mip[0], mp[0]);
2364a1767b4Smrg mip[0] = -mip[0];
2374a1767b4Smrg }
2384a1767b4Smrg else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
2394a1767b4Smrg {
2404a1767b4Smrg mip = ip;
2414a1767b4Smrg mpn_binvert (mip, mp, 2, tp);
2424a1767b4Smrg mip[0] = -mip[0]; mip[1] = ~mip[1];
2434a1767b4Smrg }
2444a1767b4Smrg #else
2454a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
2464a1767b4Smrg {
2474a1767b4Smrg mip = ip;
2484a1767b4Smrg binvert_limb (mip[0], mp[0]);
2494a1767b4Smrg mip[0] = -mip[0];
2504a1767b4Smrg }
2514a1767b4Smrg #endif
2524a1767b4Smrg else
2534a1767b4Smrg {
2544a1767b4Smrg mip = TMP_ALLOC_LIMBS (n);
2554a1767b4Smrg mpn_binvert (mip, mp, n, tp);
2564a1767b4Smrg }
2574a1767b4Smrg
2584a1767b4Smrg pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
2594a1767b4Smrg
2604a1767b4Smrg this_pp = pp;
2614a1767b4Smrg redcify (this_pp, bp, bn, mp, n);
2624a1767b4Smrg
2634a1767b4Smrg /* Store b^2 at rp. */
2644a1767b4Smrg mpn_sqr (tp, this_pp, n);
265*671ea119Smrg #if 0
266*671ea119Smrg if (n == 1) {
267*671ea119Smrg MPN_REDC_0 (rp, tp, mp, mip[0]);
268*671ea119Smrg } else
269*671ea119Smrg #endif
2704a1767b4Smrg #if WANT_REDC_2
2714a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
272d25e02daSmrg MPN_REDC_1 (rp, tp, mp, n, mip[0]);
2734a1767b4Smrg else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
274d25e02daSmrg MPN_REDC_2 (rp, tp, mp, n, mip);
2754a1767b4Smrg #else
2764a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
277d25e02daSmrg MPN_REDC_1 (rp, tp, mp, n, mip[0]);
2784a1767b4Smrg #endif
2794a1767b4Smrg else
2804a1767b4Smrg mpn_redc_n (rp, tp, mp, n, mip);
2814a1767b4Smrg
2824a1767b4Smrg /* Precompute odd powers of b and put them in the temporary area at pp. */
2834a1767b4Smrg for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
284*671ea119Smrg #if 1
285*671ea119Smrg if (n == 1) {
286*671ea119Smrg umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
287*671ea119Smrg ++this_pp ;
288*671ea119Smrg MPN_REDC_0 (this_pp, tp, mp, mip[0]);
289*671ea119Smrg } else
290*671ea119Smrg #endif
2914a1767b4Smrg {
2924a1767b4Smrg mpn_mul_n (tp, this_pp, rp, n);
2934a1767b4Smrg this_pp += n;
2944a1767b4Smrg #if WANT_REDC_2
2954a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
296d25e02daSmrg MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
2974a1767b4Smrg else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
298d25e02daSmrg MPN_REDC_2 (this_pp, tp, mp, n, mip);
2994a1767b4Smrg #else
3004a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
301d25e02daSmrg MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
3024a1767b4Smrg #endif
3034a1767b4Smrg else
3044a1767b4Smrg mpn_redc_n (this_pp, tp, mp, n, mip);
3054a1767b4Smrg }
3064a1767b4Smrg
3074a1767b4Smrg expbits = getbits (ep, ebi, windowsize);
3084a1767b4Smrg if (ebi < windowsize)
3094a1767b4Smrg ebi = 0;
3104a1767b4Smrg else
3114a1767b4Smrg ebi -= windowsize;
3124a1767b4Smrg
3134a1767b4Smrg count_trailing_zeros (cnt, expbits);
3144a1767b4Smrg ebi += cnt;
3154a1767b4Smrg expbits >>= cnt;
3164a1767b4Smrg
3174a1767b4Smrg MPN_COPY (rp, pp + n * (expbits >> 1), n);
3184a1767b4Smrg
3194a1767b4Smrg #define INNERLOOP \
3204a1767b4Smrg while (ebi != 0) \
3214a1767b4Smrg { \
3224a1767b4Smrg while (getbit (ep, ebi) == 0) \
3234a1767b4Smrg { \
3244a1767b4Smrg MPN_SQR (tp, rp, n); \
3254a1767b4Smrg MPN_REDUCE (rp, tp, mp, n, mip); \
326*671ea119Smrg if (--ebi == 0) \
3274a1767b4Smrg goto done; \
3284a1767b4Smrg } \
3294a1767b4Smrg \
3304a1767b4Smrg /* The next bit of the exponent is 1. Now extract the largest \
3314a1767b4Smrg block of bits <= windowsize, and such that the least \
3324a1767b4Smrg significant bit is 1. */ \
3334a1767b4Smrg \
3344a1767b4Smrg expbits = getbits (ep, ebi, windowsize); \
3354a1767b4Smrg this_windowsize = windowsize; \
3364a1767b4Smrg if (ebi < windowsize) \
3374a1767b4Smrg { \
3384a1767b4Smrg this_windowsize -= windowsize - ebi; \
3394a1767b4Smrg ebi = 0; \
3404a1767b4Smrg } \
3414a1767b4Smrg else \
3424a1767b4Smrg ebi -= windowsize; \
3434a1767b4Smrg \
3444a1767b4Smrg count_trailing_zeros (cnt, expbits); \
3454a1767b4Smrg this_windowsize -= cnt; \
3464a1767b4Smrg ebi += cnt; \
3474a1767b4Smrg expbits >>= cnt; \
3484a1767b4Smrg \
3494a1767b4Smrg do \
3504a1767b4Smrg { \
3514a1767b4Smrg MPN_SQR (tp, rp, n); \
3524a1767b4Smrg MPN_REDUCE (rp, tp, mp, n, mip); \
3534a1767b4Smrg } \
354*671ea119Smrg while (--this_windowsize != 0); \
3554a1767b4Smrg \
3564a1767b4Smrg MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \
3574a1767b4Smrg MPN_REDUCE (rp, tp, mp, n, mip); \
3584a1767b4Smrg }
3594a1767b4Smrg
3604a1767b4Smrg
361*671ea119Smrg if (n == 1)
362*671ea119Smrg {
363*671ea119Smrg #undef MPN_MUL_N
364*671ea119Smrg #undef MPN_SQR
365*671ea119Smrg #undef MPN_REDUCE
366*671ea119Smrg #define MPN_MUL_N(r,a,b,n) umul_ppmm((r)[1], *(r), *(a), *(b))
367*671ea119Smrg #define MPN_SQR(r,a,n) umul_ppmm((r)[1], *(r), *(a), *(a))
368*671ea119Smrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_0(rp, tp, mp, mip[0])
369*671ea119Smrg INNERLOOP;
370*671ea119Smrg }
371*671ea119Smrg else
3724a1767b4Smrg #if WANT_REDC_2
3734a1767b4Smrg if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
3744a1767b4Smrg {
3754a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
3764a1767b4Smrg {
377d25e02daSmrg if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
378d25e02daSmrg || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
379d25e02daSmrg {
3804a1767b4Smrg #undef MPN_MUL_N
3814a1767b4Smrg #undef MPN_SQR
3824a1767b4Smrg #undef MPN_REDUCE
3834a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
384d25e02daSmrg #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
385d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
3864a1767b4Smrg INNERLOOP;
3874a1767b4Smrg }
388d25e02daSmrg else
3894a1767b4Smrg {
3904a1767b4Smrg #undef MPN_MUL_N
3914a1767b4Smrg #undef MPN_SQR
3924a1767b4Smrg #undef MPN_REDUCE
3934a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
3944a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
395d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
3964a1767b4Smrg INNERLOOP;
3974a1767b4Smrg }
398d25e02daSmrg }
399d25e02daSmrg else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
400d25e02daSmrg {
401d25e02daSmrg if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
402d25e02daSmrg || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
403d25e02daSmrg {
404d25e02daSmrg #undef MPN_MUL_N
405d25e02daSmrg #undef MPN_SQR
406d25e02daSmrg #undef MPN_REDUCE
407d25e02daSmrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
408d25e02daSmrg #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
409d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
410d25e02daSmrg INNERLOOP;
411d25e02daSmrg }
412d25e02daSmrg else
413d25e02daSmrg {
414d25e02daSmrg #undef MPN_MUL_N
415d25e02daSmrg #undef MPN_SQR
416d25e02daSmrg #undef MPN_REDUCE
417d25e02daSmrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
418d25e02daSmrg #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
419d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
420d25e02daSmrg INNERLOOP;
421d25e02daSmrg }
422d25e02daSmrg }
4234a1767b4Smrg else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
4244a1767b4Smrg {
4254a1767b4Smrg #undef MPN_MUL_N
4264a1767b4Smrg #undef MPN_SQR
4274a1767b4Smrg #undef MPN_REDUCE
4284a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
4294a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
430d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
4314a1767b4Smrg INNERLOOP;
4324a1767b4Smrg }
4334a1767b4Smrg else
4344a1767b4Smrg {
4354a1767b4Smrg #undef MPN_MUL_N
4364a1767b4Smrg #undef MPN_SQR
4374a1767b4Smrg #undef MPN_REDUCE
4384a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
4394a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
4404a1767b4Smrg #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
4414a1767b4Smrg INNERLOOP;
4424a1767b4Smrg }
4434a1767b4Smrg }
4444a1767b4Smrg else
4454a1767b4Smrg {
4464a1767b4Smrg if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
4474a1767b4Smrg {
448d25e02daSmrg if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
449d25e02daSmrg || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
450d25e02daSmrg {
451d25e02daSmrg #undef MPN_MUL_N
452d25e02daSmrg #undef MPN_SQR
453d25e02daSmrg #undef MPN_REDUCE
454d25e02daSmrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
455d25e02daSmrg #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
456d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
457d25e02daSmrg INNERLOOP;
458d25e02daSmrg }
459d25e02daSmrg else
460d25e02daSmrg {
4614a1767b4Smrg #undef MPN_MUL_N
4624a1767b4Smrg #undef MPN_SQR
4634a1767b4Smrg #undef MPN_REDUCE
4644a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
4654a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
466d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
4674a1767b4Smrg INNERLOOP;
4684a1767b4Smrg }
469d25e02daSmrg }
4704a1767b4Smrg else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
4714a1767b4Smrg {
4724a1767b4Smrg #undef MPN_MUL_N
4734a1767b4Smrg #undef MPN_SQR
4744a1767b4Smrg #undef MPN_REDUCE
4754a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
4764a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
477d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
4784a1767b4Smrg INNERLOOP;
4794a1767b4Smrg }
4804a1767b4Smrg else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
4814a1767b4Smrg {
4824a1767b4Smrg #undef MPN_MUL_N
4834a1767b4Smrg #undef MPN_SQR
4844a1767b4Smrg #undef MPN_REDUCE
4854a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
4864a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
487d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
4884a1767b4Smrg INNERLOOP;
4894a1767b4Smrg }
4904a1767b4Smrg else
4914a1767b4Smrg {
4924a1767b4Smrg #undef MPN_MUL_N
4934a1767b4Smrg #undef MPN_SQR
4944a1767b4Smrg #undef MPN_REDUCE
4954a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
4964a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
4974a1767b4Smrg #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
4984a1767b4Smrg INNERLOOP;
4994a1767b4Smrg }
5004a1767b4Smrg }
5014a1767b4Smrg
5024a1767b4Smrg #else /* WANT_REDC_2 */
5034a1767b4Smrg
5044a1767b4Smrg if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
5054a1767b4Smrg {
5064a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
5074a1767b4Smrg {
508d25e02daSmrg if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
509d25e02daSmrg || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
510d25e02daSmrg {
511d25e02daSmrg #undef MPN_MUL_N
512d25e02daSmrg #undef MPN_SQR
513d25e02daSmrg #undef MPN_REDUCE
514d25e02daSmrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
515d25e02daSmrg #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
516d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
517d25e02daSmrg INNERLOOP;
518d25e02daSmrg }
519d25e02daSmrg else
520d25e02daSmrg {
5214a1767b4Smrg #undef MPN_MUL_N
5224a1767b4Smrg #undef MPN_SQR
5234a1767b4Smrg #undef MPN_REDUCE
5244a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
5254a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
526d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
5274a1767b4Smrg INNERLOOP;
5284a1767b4Smrg }
529d25e02daSmrg }
5304a1767b4Smrg else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
5314a1767b4Smrg {
532d25e02daSmrg if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
533d25e02daSmrg || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
534d25e02daSmrg {
535d25e02daSmrg #undef MPN_MUL_N
536d25e02daSmrg #undef MPN_SQR
537d25e02daSmrg #undef MPN_REDUCE
538d25e02daSmrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
539d25e02daSmrg #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
540d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
541d25e02daSmrg INNERLOOP;
542d25e02daSmrg }
543d25e02daSmrg else
544d25e02daSmrg {
5454a1767b4Smrg #undef MPN_MUL_N
5464a1767b4Smrg #undef MPN_SQR
5474a1767b4Smrg #undef MPN_REDUCE
5484a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
5494a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
5504a1767b4Smrg #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
5514a1767b4Smrg INNERLOOP;
5524a1767b4Smrg }
553d25e02daSmrg }
5544a1767b4Smrg else
5554a1767b4Smrg {
5564a1767b4Smrg #undef MPN_MUL_N
5574a1767b4Smrg #undef MPN_SQR
5584a1767b4Smrg #undef MPN_REDUCE
5594a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
5604a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
5614a1767b4Smrg #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
5624a1767b4Smrg INNERLOOP;
5634a1767b4Smrg }
5644a1767b4Smrg }
5654a1767b4Smrg else
5664a1767b4Smrg {
5674a1767b4Smrg if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
5684a1767b4Smrg {
569d25e02daSmrg if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
570d25e02daSmrg || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
571d25e02daSmrg {
572d25e02daSmrg #undef MPN_MUL_N
573d25e02daSmrg #undef MPN_SQR
574d25e02daSmrg #undef MPN_REDUCE
575d25e02daSmrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
576d25e02daSmrg #define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
577d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
578d25e02daSmrg INNERLOOP;
579d25e02daSmrg }
580d25e02daSmrg else
581d25e02daSmrg {
5824a1767b4Smrg #undef MPN_MUL_N
5834a1767b4Smrg #undef MPN_SQR
5844a1767b4Smrg #undef MPN_REDUCE
5854a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
5864a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
587d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
5884a1767b4Smrg INNERLOOP;
5894a1767b4Smrg }
590d25e02daSmrg }
5914a1767b4Smrg else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
5924a1767b4Smrg {
5934a1767b4Smrg #undef MPN_MUL_N
5944a1767b4Smrg #undef MPN_SQR
5954a1767b4Smrg #undef MPN_REDUCE
5964a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
5974a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
598d25e02daSmrg #define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
5994a1767b4Smrg INNERLOOP;
6004a1767b4Smrg }
6014a1767b4Smrg else
6024a1767b4Smrg {
6034a1767b4Smrg #undef MPN_MUL_N
6044a1767b4Smrg #undef MPN_SQR
6054a1767b4Smrg #undef MPN_REDUCE
6064a1767b4Smrg #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
6074a1767b4Smrg #define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
6084a1767b4Smrg #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
6094a1767b4Smrg INNERLOOP;
6104a1767b4Smrg }
6114a1767b4Smrg }
6124a1767b4Smrg #endif /* WANT_REDC_2 */
6134a1767b4Smrg
6144a1767b4Smrg done:
6154a1767b4Smrg
6164a1767b4Smrg MPN_COPY (tp, rp, n);
6174a1767b4Smrg MPN_ZERO (tp + n, n);
6184a1767b4Smrg
6194a1767b4Smrg #if WANT_REDC_2
6204a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
621d25e02daSmrg MPN_REDC_1 (rp, tp, mp, n, mip[0]);
6224a1767b4Smrg else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
623d25e02daSmrg MPN_REDC_2 (rp, tp, mp, n, mip);
6244a1767b4Smrg #else
6254a1767b4Smrg if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
626d25e02daSmrg MPN_REDC_1 (rp, tp, mp, n, mip[0]);
6274a1767b4Smrg #endif
6284a1767b4Smrg else
6294a1767b4Smrg mpn_redc_n (rp, tp, mp, n, mip);
6304a1767b4Smrg
6314a1767b4Smrg if (mpn_cmp (rp, mp, n) >= 0)
6324a1767b4Smrg mpn_sub_n (rp, rp, mp, n);
6334a1767b4Smrg
6344a1767b4Smrg TMP_FREE;
6354a1767b4Smrg }
636