1 /* mpn_powm -- Compute R = U^E mod M. 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 5 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 6 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2007, 2008, 2009 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 /* 28 BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd. 29 30 1. W <- U 31 32 2. T <- (B^n * U) mod M Convert to REDC form 33 34 3. Compute table U^1, U^3, U^5... of E-dependent size 35 36 4. While there are more bits in E 37 W <- power left-to-right base-k 38 39 40 TODO: 41 42 * Make getbits a macro, thereby allowing it to update the index operand. 43 That will simplify the code using getbits. (Perhaps make getbits' sibling 44 getbit then have similar form, for symmetry.) 45 46 * Write an itch function. Or perhaps get rid of tp parameter since the huge 47 pp area is allocated locally anyway? 48 49 * Choose window size without looping. (Superoptimize or think(tm).) 50 51 * Handle small bases with initial, reduction-free exponentiation. 52 53 * Call new division functions, not mpn_tdiv_qr. 54 55 * Consider special code for one-limb M. 56 57 * How should we handle the redc1/redc2/redc_n choice? 58 - redc1: T(binvert_1limb) + e * (n) * (T(mullo-1x1) + n*T(addmul_1)) 59 - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo-2x2) + n*T(addmul_2)) 60 - redc_n: T(binvert_nlimbs) + e * (T(mullo-nxn) + T(M(n))) 61 This disregards the addmul_N constant term, but we could think of 62 that as part of the respective mullo. 63 64 * When U (the base) is small, we should start the exponentiation with plain 65 operations, then convert that partial result to REDC form. 66 67 * When U is just one limb, should it be handled without the k-ary tricks? 68 We could keep a factor of B^n in W, but use U' = BU as base. After 69 multiplying by this (pseudo two-limb) number, we need to multiply by 1/B 70 mod M. 71 */ 72 73 #include "gmp.h" 74 #include "gmp-impl.h" 75 #include "longlong.h" 76 77 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 78 #define WANT_REDC_2 1 79 #endif 80 81 #define getbit(p,bi) \ 82 ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1) 83 84 static inline mp_limb_t 85 getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits) 86 { 87 int nbits_in_r; 88 mp_limb_t r; 89 mp_size_t i; 90 91 if (bi < nbits) 92 { 93 return p[0] & (((mp_limb_t) 1 << bi) - 1); 94 } 95 else 96 { 97 bi -= nbits; /* bit index of low bit to extract */ 98 i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */ 99 bi %= GMP_NUMB_BITS; /* bit index in low word */ 100 r = p[i] >> bi; /* extract (low) bits */ 101 nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */ 102 if (nbits_in_r < nbits) /* did we get enough bits? */ 103 r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */ 104 return r & (((mp_limb_t ) 1 << nbits) - 1); 105 } 106 } 107 108 static inline int 109 win_size (mp_bitcnt_t eb) 110 { 111 int k; 112 static mp_bitcnt_t x[] = {0,7,25,81,241,673,1793,4609,11521,28161,~(mp_bitcnt_t)0}; 113 for (k = 1; eb > x[k]; k++) 114 ; 115 return k; 116 } 117 118 /* Convert U to REDC form, U_r = B^n * U mod M */ 119 static void 120 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n) 121 { 122 mp_ptr tp, qp; 123 TMP_DECL; 124 TMP_MARK; 125 126 tp = TMP_ALLOC_LIMBS (un + n); 127 qp = TMP_ALLOC_LIMBS (un + 1); /* FIXME: Put at tp+? */ 128 129 MPN_ZERO (tp, n); 130 MPN_COPY (tp + n, up, un); 131 mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n); 132 TMP_FREE; 133 } 134 135 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0] 136 Requires that mp[n-1..0] is odd. 137 Requires that ep[en-1..0] is > 1. 138 Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */ 139 void 140 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, 141 mp_srcptr ep, mp_size_t en, 142 mp_srcptr mp, mp_size_t n, mp_ptr tp) 143 { 144 mp_limb_t ip[2], *mip; 145 int cnt; 146 mp_bitcnt_t ebi; 147 int windowsize, this_windowsize; 148 mp_limb_t expbits; 149 mp_ptr pp, this_pp; 150 long i; 151 TMP_DECL; 152 153 ASSERT (en > 1 || (en == 1 && ep[0] > 1)); 154 ASSERT (n >= 1 && ((mp[0] & 1) != 0)); 155 156 TMP_MARK; 157 158 count_leading_zeros (cnt, ep[en - 1]); 159 ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt; 160 161 #if 0 162 if (bn < n) 163 { 164 /* Do the first few exponent bits without mod reductions, 165 until the result is greater than the mod argument. */ 166 for (;;) 167 { 168 mpn_sqr (tp, this_pp, tn); 169 tn = tn * 2 - 1, tn += tp[tn] != 0; 170 if (getbit (ep, ebi) != 0) 171 mpn_mul (..., tp, tn, bp, bn); 172 ebi--; 173 } 174 } 175 #endif 176 177 windowsize = win_size (ebi); 178 179 #if WANT_REDC_2 180 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 181 { 182 mip = ip; 183 binvert_limb (mip[0], mp[0]); 184 mip[0] = -mip[0]; 185 } 186 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 187 { 188 mip = ip; 189 mpn_binvert (mip, mp, 2, tp); 190 mip[0] = -mip[0]; mip[1] = ~mip[1]; 191 } 192 #else 193 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 194 { 195 mip = ip; 196 binvert_limb (mip[0], mp[0]); 197 mip[0] = -mip[0]; 198 } 199 #endif 200 else 201 { 202 mip = TMP_ALLOC_LIMBS (n); 203 mpn_binvert (mip, mp, n, tp); 204 } 205 206 pp = TMP_ALLOC_LIMBS (n << (windowsize - 1)); 207 208 this_pp = pp; 209 redcify (this_pp, bp, bn, mp, n); 210 211 /* Store b^2 at rp. */ 212 mpn_sqr (tp, this_pp, n); 213 #if WANT_REDC_2 214 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 215 mpn_redc_1 (rp, tp, mp, n, mip[0]); 216 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 217 mpn_redc_2 (rp, tp, mp, n, mip); 218 #else 219 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 220 mpn_redc_1 (rp, tp, mp, n, mip[0]); 221 #endif 222 else 223 mpn_redc_n (rp, tp, mp, n, mip); 224 225 /* Precompute odd powers of b and put them in the temporary area at pp. */ 226 for (i = (1 << (windowsize - 1)) - 1; i > 0; i--) 227 { 228 mpn_mul_n (tp, this_pp, rp, n); 229 this_pp += n; 230 #if WANT_REDC_2 231 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 232 mpn_redc_1 (this_pp, tp, mp, n, mip[0]); 233 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 234 mpn_redc_2 (this_pp, tp, mp, n, mip); 235 #else 236 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 237 mpn_redc_1 (this_pp, tp, mp, n, mip[0]); 238 #endif 239 else 240 mpn_redc_n (this_pp, tp, mp, n, mip); 241 } 242 243 expbits = getbits (ep, ebi, windowsize); 244 if (ebi < windowsize) 245 ebi = 0; 246 else 247 ebi -= windowsize; 248 249 count_trailing_zeros (cnt, expbits); 250 ebi += cnt; 251 expbits >>= cnt; 252 253 MPN_COPY (rp, pp + n * (expbits >> 1), n); 254 255 #define INNERLOOP \ 256 while (ebi != 0) \ 257 { \ 258 while (getbit (ep, ebi) == 0) \ 259 { \ 260 MPN_SQR (tp, rp, n); \ 261 MPN_REDUCE (rp, tp, mp, n, mip); \ 262 ebi--; \ 263 if (ebi == 0) \ 264 goto done; \ 265 } \ 266 \ 267 /* The next bit of the exponent is 1. Now extract the largest \ 268 block of bits <= windowsize, and such that the least \ 269 significant bit is 1. */ \ 270 \ 271 expbits = getbits (ep, ebi, windowsize); \ 272 this_windowsize = windowsize; \ 273 if (ebi < windowsize) \ 274 { \ 275 this_windowsize -= windowsize - ebi; \ 276 ebi = 0; \ 277 } \ 278 else \ 279 ebi -= windowsize; \ 280 \ 281 count_trailing_zeros (cnt, expbits); \ 282 this_windowsize -= cnt; \ 283 ebi += cnt; \ 284 expbits >>= cnt; \ 285 \ 286 do \ 287 { \ 288 MPN_SQR (tp, rp, n); \ 289 MPN_REDUCE (rp, tp, mp, n, mip); \ 290 this_windowsize--; \ 291 } \ 292 while (this_windowsize != 0); \ 293 \ 294 MPN_MUL_N (tp, rp, pp + n * (expbits >> 1), n); \ 295 MPN_REDUCE (rp, tp, mp, n, mip); \ 296 } 297 298 299 #if WANT_REDC_2 300 if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD) 301 { 302 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 303 { 304 #undef MPN_MUL_N 305 #undef MPN_SQR 306 #undef MPN_REDUCE 307 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 308 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 309 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 310 INNERLOOP; 311 } 312 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) 313 { 314 #undef MPN_MUL_N 315 #undef MPN_SQR 316 #undef MPN_REDUCE 317 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 318 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 319 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip) 320 INNERLOOP; 321 } 322 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 323 { 324 #undef MPN_MUL_N 325 #undef MPN_SQR 326 #undef MPN_REDUCE 327 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 328 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 329 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip) 330 INNERLOOP; 331 } 332 else 333 { 334 #undef MPN_MUL_N 335 #undef MPN_SQR 336 #undef MPN_REDUCE 337 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 338 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 339 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 340 INNERLOOP; 341 } 342 } 343 else 344 { 345 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) 346 { 347 #undef MPN_MUL_N 348 #undef MPN_SQR 349 #undef MPN_REDUCE 350 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 351 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 352 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 353 INNERLOOP; 354 } 355 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 356 { 357 #undef MPN_MUL_N 358 #undef MPN_SQR 359 #undef MPN_REDUCE 360 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 361 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 362 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 363 INNERLOOP; 364 } 365 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 366 { 367 #undef MPN_MUL_N 368 #undef MPN_SQR 369 #undef MPN_REDUCE 370 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 371 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 372 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip) 373 INNERLOOP; 374 } 375 else 376 { 377 #undef MPN_MUL_N 378 #undef MPN_SQR 379 #undef MPN_REDUCE 380 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 381 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 382 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 383 INNERLOOP; 384 } 385 } 386 387 #else /* WANT_REDC_2 */ 388 389 if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD) 390 { 391 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 392 { 393 #undef MPN_MUL_N 394 #undef MPN_SQR 395 #undef MPN_REDUCE 396 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 397 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 398 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 399 INNERLOOP; 400 } 401 else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) 402 { 403 #undef MPN_MUL_N 404 #undef MPN_SQR 405 #undef MPN_REDUCE 406 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 407 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 408 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 409 INNERLOOP; 410 } 411 else 412 { 413 #undef MPN_MUL_N 414 #undef MPN_SQR 415 #undef MPN_REDUCE 416 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 417 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 418 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 419 INNERLOOP; 420 } 421 } 422 else 423 { 424 if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) 425 { 426 #undef MPN_MUL_N 427 #undef MPN_SQR 428 #undef MPN_REDUCE 429 #define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n) 430 #define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n) 431 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 432 INNERLOOP; 433 } 434 else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 435 { 436 #undef MPN_MUL_N 437 #undef MPN_SQR 438 #undef MPN_REDUCE 439 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 440 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 441 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0]) 442 INNERLOOP; 443 } 444 else 445 { 446 #undef MPN_MUL_N 447 #undef MPN_SQR 448 #undef MPN_REDUCE 449 #define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n) 450 #define MPN_SQR(r,a,n) mpn_sqr (r,a,n) 451 #define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip) 452 INNERLOOP; 453 } 454 } 455 #endif /* WANT_REDC_2 */ 456 457 done: 458 459 MPN_COPY (tp, rp, n); 460 MPN_ZERO (tp + n, n); 461 462 #if WANT_REDC_2 463 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD)) 464 mpn_redc_1 (rp, tp, mp, n, mip[0]); 465 else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD)) 466 mpn_redc_2 (rp, tp, mp, n, mip); 467 #else 468 if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD)) 469 mpn_redc_1 (rp, tp, mp, n, mip[0]); 470 #endif 471 else 472 mpn_redc_n (rp, tp, mp, n, mip); 473 474 if (mpn_cmp (rp, mp, n) >= 0) 475 mpn_sub_n (rp, rp, mp, n); 476 477 TMP_FREE; 478 } 479