1 /* mpn_powm -- Compute R = U^E mod M.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2007-2012, 2019 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 /*
39   BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
40 
41   1. W <- U
42 
43   2. T <- (B^n * U) mod M                Convert to REDC form
44 
45   3. Compute table U^1, U^3, U^5... of E-dependent size
46 
47   4. While there are more bits in E
48        W <- power left-to-right base-k
49 
50 
51   TODO:
52 
53    * Make getbits a macro, thereby allowing it to update the index operand.
54      That will simplify the code using getbits.  (Perhaps make getbits' sibling
55      getbit then have similar form, for symmetry.)
56 
57    * Write an itch function.  Or perhaps get rid of tp parameter since the huge
58      pp area is allocated locally anyway?
59 
60    * Choose window size without looping.  (Superoptimize or think(tm).)
61 
62    * Handle small bases with initial, reduction-free exponentiation.
63 
64    * Call new division functions, not mpn_tdiv_qr.
65 
66    * Consider special code for one-limb M.
67 
68    * How should we handle the redc1/redc2/redc_n choice?
69      - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
70      - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
71      - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
72      This disregards the addmul_N constant term, but we could think of
73      that as part of the respective mullo.
74 
75    * When U (the base) is small, we should start the exponentiation with plain
76      operations, then convert that partial result to REDC form.
77 
78    * When U is just one limb, should it be handled without the k-ary tricks?
79      We could keep a factor of B^n in W, but use U' = BU as base.  After
80      multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
81      mod M.
82 */
83 
84 #include "gmp-impl.h"
85 #include "longlong.h"
86 
87 #undef MPN_REDC_0
88 #define MPN_REDC_0(rp, up, mp, invm)					\
89   do {									\
90     mp_limb_t p1, r0, u0, _dummy;					\
91     u0 = *(up);								\
92     umul_ppmm (p1, _dummy, *(mp), (u0 * (invm)) & GMP_NUMB_MASK);	\
93     ASSERT (((u0 + _dummy) & GMP_NUMB_MASK) == 0);			\
94     p1 += (u0 != 0);							\
95     r0 = (up)[1] + p1;							\
96     if (p1 > r0)							\
97       r0 -= *(mp);							\
98     *(rp) = r0;								\
99   } while (0)
100 
101 #undef MPN_REDC_1
102 #if HAVE_NATIVE_mpn_sbpi1_bdiv_r
103 #define MPN_REDC_1(rp, up, mp, n, invm)					\
104   do {									\
105     mp_limb_t cy;							\
106     cy = mpn_sbpi1_bdiv_r (up, 2 * n, mp, n, invm);			\
107     if (cy != 0)							\
108       mpn_sub_n (rp, up + n, mp, n);					\
109     else								\
110       MPN_COPY (rp, up + n, n);						\
111   } while (0)
112 #else
113 #define MPN_REDC_1(rp, up, mp, n, invm)					\
114   do {									\
115     mp_limb_t cy;							\
116     cy = mpn_redc_1 (rp, up, mp, n, invm);				\
117     if (cy != 0)							\
118       mpn_sub_n (rp, rp, mp, n);					\
119   } while (0)
120 #endif
121 
122 #undef MPN_REDC_2
123 #define MPN_REDC_2(rp, up, mp, n, mip)					\
124   do {									\
125     mp_limb_t cy;							\
126     cy = mpn_redc_2 (rp, up, mp, n, mip);				\
127     if (cy != 0)							\
128       mpn_sub_n (rp, rp, mp, n);					\
129   } while (0)
130 
131 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
132 #define WANT_REDC_2 1
133 #endif
134 
135 #define getbit(p,bi) \
136   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
137 
138 static inline mp_limb_t
getbits(const mp_limb_t * p,mp_bitcnt_t bi,int nbits)139 getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
140 {
141   int nbits_in_r;
142   mp_limb_t r;
143   mp_size_t i;
144 
145   if (bi < nbits)
146     {
147       return p[0] & (((mp_limb_t) 1 << bi) - 1);
148     }
149   else
150     {
151       bi -= nbits;			/* bit index of low bit to extract */
152       i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
153       bi %= GMP_NUMB_BITS;		/* bit index in low word */
154       r = p[i] >> bi;			/* extract (low) bits */
155       nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
156       if (nbits_in_r < nbits)		/* did we get enough bits? */
157 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
158       return r & (((mp_limb_t) 1 << nbits) - 1);
159     }
160 }
161 
162 static inline int
win_size(mp_bitcnt_t eb)163 win_size (mp_bitcnt_t eb)
164 {
165   int k;
166   static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
167   for (k = 1; eb > x[k]; k++)
168     ;
169   return k;
170 }
171 
172 /* Convert U to REDC form, U_r = B^n * U mod M */
173 static void
redcify(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr mp,mp_size_t n)174 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
175 {
176   mp_ptr tp, qp;
177   TMP_DECL;
178   TMP_MARK;
179 
180   TMP_ALLOC_LIMBS_2 (tp, un + n, qp, un + 1);
181 
182   MPN_ZERO (tp, n);
183   MPN_COPY (tp + n, up, un);
184   mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
185   TMP_FREE;
186 }
187 
188 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
189    Requires that mp[n-1..0] is odd.
190    Requires that ep[en-1..0] is > 1.
191    Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
192 void
mpn_powm(mp_ptr rp,mp_srcptr bp,mp_size_t bn,mp_srcptr ep,mp_size_t en,mp_srcptr mp,mp_size_t n,mp_ptr tp)193 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
194 	  mp_srcptr ep, mp_size_t en,
195 	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
196 {
197   mp_limb_t ip[2], *mip;
198   int cnt;
199   mp_bitcnt_t ebi;
200   int windowsize, this_windowsize;
201   mp_limb_t expbits;
202   mp_ptr pp, this_pp;
203   long i;
204   TMP_DECL;
205 
206   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
207   ASSERT (n >= 1 && ((mp[0] & 1) != 0));
208 
209   TMP_MARK;
210 
211   MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
212 
213 #if 0
214   if (bn < n)
215     {
216       /* Do the first few exponent bits without mod reductions,
217 	 until the result is greater than the mod argument.  */
218       for (;;)
219 	{
220 	  mpn_sqr (tp, this_pp, tn);
221 	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
222 	  if (getbit (ep, ebi) != 0)
223 	    mpn_mul (..., tp, tn, bp, bn);
224 	  ebi--;
225 	}
226     }
227 #endif
228 
229   windowsize = win_size (ebi);
230 
231 #if WANT_REDC_2
232   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
233     {
234       mip = ip;
235       binvert_limb (mip[0], mp[0]);
236       mip[0] = -mip[0];
237     }
238   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
239     {
240       mip = ip;
241       mpn_binvert (mip, mp, 2, tp);
242       mip[0] = -mip[0]; mip[1] = ~mip[1];
243     }
244 #else
245   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
246     {
247       mip = ip;
248       binvert_limb (mip[0], mp[0]);
249       mip[0] = -mip[0];
250     }
251 #endif
252   else
253     {
254       mip = TMP_ALLOC_LIMBS (n);
255       mpn_binvert (mip, mp, n, tp);
256     }
257 
258   pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
259 
260   this_pp = pp;
261   redcify (this_pp, bp, bn, mp, n);
262 
263   /* Store b^2 at rp.  */
264   mpn_sqr (tp, this_pp, n);
265 #if 0
266   if (n == 1) {
267     MPN_REDC_0 (rp, tp, mp, mip[0]);
268   } else
269 #endif
270 #if WANT_REDC_2
271   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
272     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
273   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
274     MPN_REDC_2 (rp, tp, mp, n, mip);
275 #else
276   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
277     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
278 #endif
279   else
280     mpn_redc_n (rp, tp, mp, n, mip);
281 
282   /* Precompute odd powers of b and put them in the temporary area at pp.  */
283   for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
284 #if 1
285     if (n == 1) {
286       umul_ppmm((tp)[1], *(tp), *(this_pp), *(rp));
287       ++this_pp ;
288       MPN_REDC_0 (this_pp, tp, mp, mip[0]);
289     } else
290 #endif
291     {
292       mpn_mul_n (tp, this_pp, rp, n);
293       this_pp += n;
294 #if WANT_REDC_2
295       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
296 	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
297       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
298 	MPN_REDC_2 (this_pp, tp, mp, n, mip);
299 #else
300       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
301 	MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
302 #endif
303       else
304 	mpn_redc_n (this_pp, tp, mp, n, mip);
305     }
306 
307   expbits = getbits (ep, ebi, windowsize);
308   if (ebi < windowsize)
309     ebi = 0;
310   else
311     ebi -= windowsize;
312 
313   count_trailing_zeros (cnt, expbits);
314   ebi += cnt;
315   expbits >>= cnt;
316 
317   MPN_COPY (rp, pp + n * (expbits >> 1), n);
318 
319 #define INNERLOOP							\
320   while (ebi != 0)							\
321     {									\
322       while (getbit (ep, ebi) == 0)					\
323 	{								\
324 	  MPN_SQR (tp, rp, n);						\
325 	  MPN_REDUCE (rp, tp, mp, n, mip);				\
326 	  if (--ebi == 0)						\
327 	    goto done;							\
328 	}								\
329 									\
330       /* The next bit of the exponent is 1.  Now extract the largest	\
331 	 block of bits <= windowsize, and such that the least		\
332 	 significant bit is 1.  */					\
333 									\
334       expbits = getbits (ep, ebi, windowsize);				\
335       this_windowsize = windowsize;					\
336       if (ebi < windowsize)						\
337 	{								\
338 	  this_windowsize -= windowsize - ebi;				\
339 	  ebi = 0;							\
340 	}								\
341       else								\
342         ebi -= windowsize;						\
343 									\
344       count_trailing_zeros (cnt, expbits);				\
345       this_windowsize -= cnt;						\
346       ebi += cnt;							\
347       expbits >>= cnt;							\
348 									\
349       do								\
350 	{								\
351 	  MPN_SQR (tp, rp, n);						\
352 	  MPN_REDUCE (rp, tp, mp, n, mip);				\
353 	}								\
354       while (--this_windowsize != 0);					\
355 									\
356       MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
357       MPN_REDUCE (rp, tp, mp, n, mip);					\
358     }
359 
360 
361   if (n == 1)
362     {
363 #undef MPN_MUL_N
364 #undef MPN_SQR
365 #undef MPN_REDUCE
366 #define MPN_MUL_N(r,a,b,n)		umul_ppmm((r)[1], *(r), *(a), *(b))
367 #define MPN_SQR(r,a,n)			umul_ppmm((r)[1], *(r), *(a), *(a))
368 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_0(rp, tp, mp, mip[0])
369       INNERLOOP;
370     }
371   else
372 #if WANT_REDC_2
373   if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
374     {
375       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
376 	{
377 	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
378 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
379 	    {
380 #undef MPN_MUL_N
381 #undef MPN_SQR
382 #undef MPN_REDUCE
383 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
384 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
385 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
386 	      INNERLOOP;
387 	    }
388 	  else
389 	    {
390 #undef MPN_MUL_N
391 #undef MPN_SQR
392 #undef MPN_REDUCE
393 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
394 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
395 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
396 	      INNERLOOP;
397 	    }
398 	}
399       else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
400 	{
401 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
402 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
403 	    {
404 #undef MPN_MUL_N
405 #undef MPN_SQR
406 #undef MPN_REDUCE
407 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
408 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
409 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
410 	      INNERLOOP;
411 	    }
412 	  else
413 	    {
414 #undef MPN_MUL_N
415 #undef MPN_SQR
416 #undef MPN_REDUCE
417 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
418 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
419 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
420 	      INNERLOOP;
421 	    }
422 	}
423       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
424 	{
425 #undef MPN_MUL_N
426 #undef MPN_SQR
427 #undef MPN_REDUCE
428 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
429 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
430 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
431 	  INNERLOOP;
432 	}
433       else
434 	{
435 #undef MPN_MUL_N
436 #undef MPN_SQR
437 #undef MPN_REDUCE
438 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
439 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
440 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
441 	  INNERLOOP;
442 	}
443     }
444   else
445     {
446       if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
447 	{
448 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
449 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
450 	    {
451 #undef MPN_MUL_N
452 #undef MPN_SQR
453 #undef MPN_REDUCE
454 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
455 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
456 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
457 	      INNERLOOP;
458 	    }
459 	  else
460 	    {
461 #undef MPN_MUL_N
462 #undef MPN_SQR
463 #undef MPN_REDUCE
464 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
465 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
466 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
467 	      INNERLOOP;
468 	    }
469 	}
470       else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
471 	{
472 #undef MPN_MUL_N
473 #undef MPN_SQR
474 #undef MPN_REDUCE
475 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
476 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
477 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
478 	  INNERLOOP;
479 	}
480       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
481 	{
482 #undef MPN_MUL_N
483 #undef MPN_SQR
484 #undef MPN_REDUCE
485 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
486 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
487 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
488 	  INNERLOOP;
489 	}
490       else
491 	{
492 #undef MPN_MUL_N
493 #undef MPN_SQR
494 #undef MPN_REDUCE
495 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
496 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
497 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
498 	  INNERLOOP;
499 	}
500     }
501 
502 #else  /* WANT_REDC_2 */
503 
504   if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
505     {
506       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
507 	{
508 	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
509 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
510 	    {
511 #undef MPN_MUL_N
512 #undef MPN_SQR
513 #undef MPN_REDUCE
514 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
515 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
516 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
517 	      INNERLOOP;
518 	    }
519 	  else
520 	    {
521 #undef MPN_MUL_N
522 #undef MPN_SQR
523 #undef MPN_REDUCE
524 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
525 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
526 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
527 	      INNERLOOP;
528 	    }
529 	}
530       else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
531 	{
532 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
533 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
534 	    {
535 #undef MPN_MUL_N
536 #undef MPN_SQR
537 #undef MPN_REDUCE
538 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
539 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
540 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
541 	      INNERLOOP;
542 	    }
543 	  else
544 	    {
545 #undef MPN_MUL_N
546 #undef MPN_SQR
547 #undef MPN_REDUCE
548 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
549 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
550 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
551 	      INNERLOOP;
552 	    }
553 	}
554       else
555 	{
556 #undef MPN_MUL_N
557 #undef MPN_SQR
558 #undef MPN_REDUCE
559 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
560 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
561 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
562 	  INNERLOOP;
563 	}
564     }
565   else
566     {
567       if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
568 	{
569 	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
570 	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
571 	    {
572 #undef MPN_MUL_N
573 #undef MPN_SQR
574 #undef MPN_REDUCE
575 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
576 #define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
577 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
578 	      INNERLOOP;
579 	    }
580 	  else
581 	    {
582 #undef MPN_MUL_N
583 #undef MPN_SQR
584 #undef MPN_REDUCE
585 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
586 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
587 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
588 	      INNERLOOP;
589 	    }
590 	}
591       else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
592 	{
593 #undef MPN_MUL_N
594 #undef MPN_SQR
595 #undef MPN_REDUCE
596 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
597 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
598 #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
599 	  INNERLOOP;
600 	}
601       else
602 	{
603 #undef MPN_MUL_N
604 #undef MPN_SQR
605 #undef MPN_REDUCE
606 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
607 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
608 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
609 	  INNERLOOP;
610 	}
611     }
612 #endif  /* WANT_REDC_2 */
613 
614  done:
615 
616   MPN_COPY (tp, rp, n);
617   MPN_ZERO (tp + n, n);
618 
619 #if WANT_REDC_2
620   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
621     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
622   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
623     MPN_REDC_2 (rp, tp, mp, n, mip);
624 #else
625   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
626     MPN_REDC_1 (rp, tp, mp, n, mip[0]);
627 #endif
628   else
629     mpn_redc_n (rp, tp, mp, n, mip);
630 
631   if (mpn_cmp (rp, mp, n) >= 0)
632     mpn_sub_n (rp, rp, mp, n);
633 
634   TMP_FREE;
635 }
636