xref: /dragonfly/contrib/gmp/mpn/generic/powm.c (revision 0ca59c34)
1 /* mpn_powm -- Compute R = U^E mod M.
2 
3    Contributed to the GNU project by Torbjorn Granlund.
4 
5    THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
6    SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
17 
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21 License for more details.
22 
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25 
26 
27 /*
28   BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
29 
30   1. W <- U
31 
32   2. T <- (B^n * U) mod M                Convert to REDC form
33 
34   3. Compute table U^1, U^3, U^5... of E-dependent size
35 
36   4. While there are more bits in E
37        W <- power left-to-right base-k
38 
39 
40   TODO:
41 
42    * Make getbits a macro, thereby allowing it to update the index operand.
43      That will simplify the code using getbits.  (Perhaps make getbits' sibling
44      getbit then have similar form, for symmetry.)
45 
46    * Write an itch function.  Or perhaps get rid of tp parameter since the huge
47      pp area is allocated locally anyway?
48 
49    * Choose window size without looping.  (Superoptimize or think(tm).)
50 
51    * Handle small bases with initial, reduction-free exponentiation.
52 
53    * Call new division functions, not mpn_tdiv_qr.
54 
55    * Consider special code for one-limb M.
56 
57    * How should we handle the redc1/redc2/redc_n choice?
58      - redc1:  T(binvert_1limb)  + e * (n)   * (T(mullo-1x1) + n*T(addmul_1))
59      - redc2:  T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2))
60      - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n)))
61      This disregards the addmul_N constant term, but we could think of
62      that as part of the respective mullo.
63 
64    * When U (the base) is small, we should start the exponentiation with plain
65      operations, then convert that partial result to REDC form.
66 
67    * When U is just one limb, should it be handled without the k-ary tricks?
68      We could keep a factor of B^n in W, but use U' = BU as base.  After
69      multiplying by this (pseudo two-limb) number, we need to multiply by 1/B
70      mod M.
71 */
72 
73 #include "gmp.h"
74 #include "gmp-impl.h"
75 #include "longlong.h"
76 
77 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
78 #define WANT_REDC_2 1
79 #endif
80 
81 #define getbit(p,bi) \
82   ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
83 
84 static inline mp_limb_t
85 getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
86 {
87   int nbits_in_r;
88   mp_limb_t r;
89   mp_size_t i;
90 
91   if (bi < nbits)
92     {
93       return p[0] & (((mp_limb_t) 1 << bi) - 1);
94     }
95   else
96     {
97       bi -= nbits;			/* bit index of low bit to extract */
98       i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
99       bi %= GMP_NUMB_BITS;		/* bit index in low word */
100       r = p[i] >> bi;			/* extract (low) bits */
101       nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
102       if (nbits_in_r < nbits)		/* did we get enough bits? */
103 	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
104       return r & (((mp_limb_t ) 1 << nbits) - 1);
105     }
106 }
107 
108 static inline int
109 win_size (mp_bitcnt_t eb)
110 {
111   int k;
112   static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0};
113   for (k = 1; eb > x[k]; k++)
114     ;
115   return k;
116 }
117 
118 /* Convert U to REDC form, U_r = B^n * U mod M */
119 static void
120 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n)
121 {
122   mp_ptr tp, qp;
123   TMP_DECL;
124   TMP_MARK;
125 
126   tp = TMP_ALLOC_LIMBS (un + n);
127   qp = TMP_ALLOC_LIMBS (un + 1);	/* FIXME: Put at tp+? */
128 
129   MPN_ZERO (tp, n);
130   MPN_COPY (tp + n, up, un);
131   mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
132   TMP_FREE;
133 }
134 
135 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
136    Requires that mp[n-1..0] is odd.
137    Requires that ep[en-1..0] is > 1.
138    Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
139 void
140 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
141 	  mp_srcptr ep, mp_size_t en,
142 	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
143 {
144   mp_limb_t ip[2], *mip;
145   int cnt;
146   mp_bitcnt_t ebi;
147   int windowsize, this_windowsize;
148   mp_limb_t expbits;
149   mp_ptr pp, this_pp;
150   long i;
151   TMP_DECL;
152 
153   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
154   ASSERT (n >= 1 && ((mp[0] & 1) != 0));
155 
156   TMP_MARK;
157 
158   count_leading_zeros (cnt, ep[en - 1]);
159   ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
160 
161 #if 0
162   if (bn < n)
163     {
164       /* Do the first few exponent bits without mod reductions,
165 	 until the result is greater than the mod argument.  */
166       for (;;)
167 	{
168 	  mpn_sqr (tp, this_pp, tn);
169 	  tn = tn * 2 - 1,  tn += tp[tn] != 0;
170 	  if (getbit (ep, ebi) != 0)
171 	    mpn_mul (..., tp, tn, bp, bn);
172 	  ebi--;
173 	}
174     }
175 #endif
176 
177   windowsize = win_size (ebi);
178 
179 #if WANT_REDC_2
180   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
181     {
182       mip = ip;
183       binvert_limb (mip[0], mp[0]);
184       mip[0] = -mip[0];
185     }
186   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
187     {
188       mip = ip;
189       mpn_binvert (mip, mp, 2, tp);
190       mip[0] = -mip[0]; mip[1] = ~mip[1];
191     }
192 #else
193   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
194     {
195       mip = ip;
196       binvert_limb (mip[0], mp[0]);
197       mip[0] = -mip[0];
198     }
199 #endif
200   else
201     {
202       mip = TMP_ALLOC_LIMBS (n);
203       mpn_binvert (mip, mp, n, tp);
204     }
205 
206   pp = TMP_ALLOC_LIMBS (n << (windowsize - 1));
207 
208   this_pp = pp;
209   redcify (this_pp, bp, bn, mp, n);
210 
211   /* Store b^2 at rp.  */
212   mpn_sqr (tp, this_pp, n);
213 #if WANT_REDC_2
214   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
215     mpn_redc_1 (rp, tp, mp, n, mip[0]);
216   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
217     mpn_redc_2 (rp, tp, mp, n, mip);
218 #else
219   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
220     mpn_redc_1 (rp, tp, mp, n, mip[0]);
221 #endif
222   else
223     mpn_redc_n (rp, tp, mp, n, mip);
224 
225   /* Precompute odd powers of b and put them in the temporary area at pp.  */
226   for (i = (1 << (windowsize - 1)) - 1; i > 0; i--)
227     {
228       mpn_mul_n (tp, this_pp, rp, n);
229       this_pp += n;
230 #if WANT_REDC_2
231       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
232 	mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
233       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
234 	mpn_redc_2 (this_pp, tp, mp, n, mip);
235 #else
236       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
237 	mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
238 #endif
239       else
240 	mpn_redc_n (this_pp, tp, mp, n, mip);
241     }
242 
243   expbits = getbits (ep, ebi, windowsize);
244   if (ebi < windowsize)
245     ebi = 0;
246   else
247     ebi -= windowsize;
248 
249   count_trailing_zeros (cnt, expbits);
250   ebi += cnt;
251   expbits >>= cnt;
252 
253   MPN_COPY (rp, pp + n * (expbits >> 1), n);
254 
255 #define INNERLOOP							\
256   while (ebi != 0)							\
257     {									\
258       while (getbit (ep, ebi) == 0)					\
259 	{								\
260 	  MPN_SQR (tp, rp, n);						\
261 	  MPN_REDUCE (rp, tp, mp, n, mip);				\
262 	  ebi--;							\
263 	  if (ebi == 0)							\
264 	    goto done;							\
265 	}								\
266 									\
267       /* The next bit of the exponent is 1.  Now extract the largest	\
268 	 block of bits <= windowsize, and such that the least		\
269 	 significant bit is 1.  */					\
270 									\
271       expbits = getbits (ep, ebi, windowsize);				\
272       this_windowsize = windowsize;					\
273       if (ebi < windowsize)						\
274 	{								\
275 	  this_windowsize -= windowsize - ebi;				\
276 	  ebi = 0;							\
277 	}								\
278       else								\
279         ebi -= windowsize;						\
280 									\
281       count_trailing_zeros (cnt, expbits);				\
282       this_windowsize -= cnt;						\
283       ebi += cnt;							\
284       expbits >>= cnt;							\
285 									\
286       do								\
287 	{								\
288 	  MPN_SQR (tp, rp, n);						\
289 	  MPN_REDUCE (rp, tp, mp, n, mip);				\
290 	  this_windowsize--;						\
291 	}								\
292       while (this_windowsize != 0);					\
293 									\
294       MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n);			\
295       MPN_REDUCE (rp, tp, mp, n, mip);					\
296     }
297 
298 
299 #if WANT_REDC_2
300   if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
301     {
302       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
303 	{
304 #undef MPN_MUL_N
305 #undef MPN_SQR
306 #undef MPN_REDUCE
307 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
308 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
309 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
310 	  INNERLOOP;
311 	}
312       else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
313 	{
314 #undef MPN_MUL_N
315 #undef MPN_SQR
316 #undef MPN_REDUCE
317 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
318 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
319 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
320 	  INNERLOOP;
321 	}
322       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
323 	{
324 #undef MPN_MUL_N
325 #undef MPN_SQR
326 #undef MPN_REDUCE
327 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
328 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
329 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
330 	  INNERLOOP;
331 	}
332       else
333 	{
334 #undef MPN_MUL_N
335 #undef MPN_SQR
336 #undef MPN_REDUCE
337 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
338 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
339 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
340 	  INNERLOOP;
341 	}
342     }
343   else
344     {
345       if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
346 	{
347 #undef MPN_MUL_N
348 #undef MPN_SQR
349 #undef MPN_REDUCE
350 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
351 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
352 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
353 	  INNERLOOP;
354 	}
355       else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
356 	{
357 #undef MPN_MUL_N
358 #undef MPN_SQR
359 #undef MPN_REDUCE
360 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
361 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
362 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
363 	  INNERLOOP;
364 	}
365       else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
366 	{
367 #undef MPN_MUL_N
368 #undef MPN_SQR
369 #undef MPN_REDUCE
370 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
371 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
372 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_2 (rp, tp, mp, n, mip)
373 	  INNERLOOP;
374 	}
375       else
376 	{
377 #undef MPN_MUL_N
378 #undef MPN_SQR
379 #undef MPN_REDUCE
380 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
381 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
382 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
383 	  INNERLOOP;
384 	}
385     }
386 
387 #else  /* WANT_REDC_2 */
388 
389   if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
390     {
391       if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
392 	{
393 #undef MPN_MUL_N
394 #undef MPN_SQR
395 #undef MPN_REDUCE
396 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
397 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
398 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
399 	  INNERLOOP;
400 	}
401       else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
402 	{
403 #undef MPN_MUL_N
404 #undef MPN_SQR
405 #undef MPN_REDUCE
406 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
407 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
408 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
409 	  INNERLOOP;
410 	}
411       else
412 	{
413 #undef MPN_MUL_N
414 #undef MPN_SQR
415 #undef MPN_REDUCE
416 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
417 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
418 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
419 	  INNERLOOP;
420 	}
421     }
422   else
423     {
424       if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
425 	{
426 #undef MPN_MUL_N
427 #undef MPN_SQR
428 #undef MPN_REDUCE
429 #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
430 #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
431 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
432 	  INNERLOOP;
433 	}
434       else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
435 	{
436 #undef MPN_MUL_N
437 #undef MPN_SQR
438 #undef MPN_REDUCE
439 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
440 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
441 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_1 (rp, tp, mp, n, mip[0])
442 	  INNERLOOP;
443 	}
444       else
445 	{
446 #undef MPN_MUL_N
447 #undef MPN_SQR
448 #undef MPN_REDUCE
449 #define MPN_MUL_N(r,a,b,n)		mpn_mul_n (r,a,b,n)
450 #define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
451 #define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
452 	  INNERLOOP;
453 	}
454     }
455 #endif  /* WANT_REDC_2 */
456 
457  done:
458 
459   MPN_COPY (tp, rp, n);
460   MPN_ZERO (tp + n, n);
461 
462 #if WANT_REDC_2
463   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
464     mpn_redc_1 (rp, tp, mp, n, mip[0]);
465   else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
466     mpn_redc_2 (rp, tp, mp, n, mip);
467 #else
468   if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
469     mpn_redc_1 (rp, tp, mp, n, mip[0]);
470 #endif
471   else
472     mpn_redc_n (rp, tp, mp, n, mip);
473 
474   if (mpn_cmp (rp, mp, n) >= 0)
475     mpn_sub_n (rp, rp, mp, n);
476 
477   TMP_FREE;
478 }
479