1 /* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD. 2 3 Contributed by Paul Zimmermann. 4 5 Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2009 6 Free Software Foundation, Inc. 7 8 This file is part of the GNU MP Library. 9 10 The GNU MP Library is free software; you can redistribute it and/or modify 11 it under the terms of the GNU Lesser General Public License as published by 12 the Free Software Foundation; either version 3 of the License, or (at your 13 option) any later version. 14 15 The GNU MP Library is distributed in the hope that it will be useful, but 16 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 18 License for more details. 19 20 You should have received a copy of the GNU Lesser General Public License 21 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 22 23 24 #include "gmp.h" 25 #include "gmp-impl.h" 26 #include "longlong.h" 27 #ifdef BERKELEY_MP 28 #include "mp.h" 29 #endif 30 31 /* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and 32 t is defined by (tp,mn). */ 33 static void 34 reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn) 35 { 36 mp_ptr qp; 37 TMP_DECL; 38 39 TMP_MARK; 40 qp = TMP_ALLOC_LIMBS (an - mn + 1); 41 42 mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn); 43 44 TMP_FREE; 45 } 46 47 #if REDUCE_EXPONENT 48 /* Return the group order of the ring mod m. */ 49 static mp_limb_t 50 phi (mp_limb_t t) 51 { 52 mp_limb_t d, m, go; 53 54 go = 1; 55 56 if (t % 2 == 0) 57 { 58 t = t / 2; 59 while (t % 2 == 0) 60 { 61 go *= 2; 62 t = t / 2; 63 } 64 } 65 for (d = 3;; d += 2) 66 { 67 m = d - 1; 68 for (;;) 69 { 70 unsigned long int q = t / d; 71 if (q < d) 72 { 73 if (t <= 1) 74 return go; 75 if (t == d) 76 return go * m; 77 return go * (t - 1); 78 } 79 if (t != q * d) 80 break; 81 go *= m; 82 m = d; 83 t = q; 84 } 85 } 86 } 87 #endif 88 89 /* average number of calls to redc for an exponent of n bits 90 with the sliding window algorithm of base 2^k: the optimal is 91 obtained for the value of k which minimizes 2^(k-1)+n/(k+1): 92 93 n\k 4 5 6 7 8 94 128 156* 159 171 200 261 95 256 309 307* 316 343 403 96 512 617 607* 610 632 688 97 1024 1231 1204 1195* 1207 1256 98 2048 2461 2399 2366 2360* 2396 99 4096 4918 4787 4707 4665* 4670 100 */ 101 102 103 /* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. In REDC 104 each modular multiplication costs about 2*n^2 limbs operations, whereas 105 using usual reduction it costs 3*K(n), where K(n) is the cost of a 106 multiplication using Karatsuba, and a division is assumed to cost 2*K(n), 107 for example using Burnikel-Ziegler's algorithm. This gives a theoretical 108 threshold of a*SQR_KARATSUBA_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~ 109 2.66. */ 110 /* For now, also disable REDC when MOD is even, as the inverse can't handle 111 that. At some point, we might want to make the code faster for that case, 112 perhaps using CRR. */ 113 114 #ifndef POWM_THRESHOLD 115 #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3) 116 #endif 117 118 #define HANDLE_NEGATIVE_EXPONENT 1 119 #undef REDUCE_EXPONENT 120 121 void 122 #ifndef BERKELEY_MP 123 mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m) 124 #else /* BERKELEY_MP */ 125 pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r) 126 #endif /* BERKELEY_MP */ 127 { 128 mp_ptr xp, tp, qp, gp, this_gp; 129 mp_srcptr bp, ep, mp; 130 mp_size_t bn, es, en, mn, xn; 131 mp_limb_t invm, c; 132 unsigned long int enb; 133 mp_size_t i, K, j, l, k; 134 int m_zero_cnt, e_zero_cnt; 135 int sh; 136 int use_redc; 137 #if HANDLE_NEGATIVE_EXPONENT 138 mpz_t new_b; 139 #endif 140 #if REDUCE_EXPONENT 141 mpz_t new_e; 142 #endif 143 TMP_DECL; 144 145 mp = PTR(m); 146 mn = ABSIZ (m); 147 if (mn == 0) 148 DIVIDE_BY_ZERO; 149 150 TMP_MARK; 151 152 es = SIZ (e); 153 if (es <= 0) 154 { 155 if (es == 0) 156 { 157 /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if 158 m equals 1. */ 159 SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; 160 PTR(r)[0] = 1; 161 TMP_FREE; /* we haven't really allocated anything here */ 162 return; 163 } 164 #if HANDLE_NEGATIVE_EXPONENT 165 MPZ_TMP_INIT (new_b, mn + 1); 166 167 if (! mpz_invert (new_b, b, m)) 168 DIVIDE_BY_ZERO; 169 b = new_b; 170 es = -es; 171 #else 172 DIVIDE_BY_ZERO; 173 #endif 174 } 175 en = es; 176 177 #if REDUCE_EXPONENT 178 /* Reduce exponent by dividing it by phi(m) when m small. */ 179 if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150) 180 { 181 MPZ_TMP_INIT (new_e, 2); 182 mpz_mod_ui (new_e, e, phi (mp[0])); 183 e = new_e; 184 } 185 #endif 186 187 use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0; 188 if (use_redc) 189 { 190 /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ 191 modlimb_invert (invm, mp[0]); 192 invm = -invm; 193 } 194 else 195 { 196 /* Normalize m (i.e. make its most significant bit set) as required by 197 division functions below. */ 198 count_leading_zeros (m_zero_cnt, mp[mn - 1]); 199 m_zero_cnt -= GMP_NAIL_BITS; 200 if (m_zero_cnt != 0) 201 { 202 mp_ptr new_mp; 203 new_mp = TMP_ALLOC_LIMBS (mn); 204 mpn_lshift (new_mp, mp, mn, m_zero_cnt); 205 mp = new_mp; 206 } 207 } 208 209 /* Determine optimal value of k, the number of exponent bits we look at 210 at a time. */ 211 count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]); 212 e_zero_cnt -= GMP_NAIL_BITS; 213 enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */ 214 k = 1; 215 K = 2; 216 while (2 * enb > K * (2 + k * (3 + k))) 217 { 218 k++; 219 K *= 2; 220 if (k == 10) /* cap allocation */ 221 break; 222 } 223 224 tp = TMP_ALLOC_LIMBS (2 * mn); 225 qp = TMP_ALLOC_LIMBS (mn + 1); 226 227 gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn); 228 229 /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */ 230 bn = ABSIZ (b); 231 bp = PTR(b); 232 /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary 233 for speed or correctness to do this when b and m have the same number of 234 limbs, perhaps remove mpn_cmp call. */ 235 if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0)) 236 { 237 /* Reduce possibly huge base while moving it to gp[0]. Use a function 238 call to reduce, since we don't want the quotient allocation to 239 live until function return. */ 240 if (use_redc) 241 { 242 reduce (tp + mn, bp, bn, mp, mn); /* b mod m */ 243 MPN_ZERO (tp, mn); 244 mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */ 245 } 246 else 247 { 248 reduce (gp, bp, bn, mp, mn); 249 } 250 } 251 else 252 { 253 /* |b| < m. We pad out operands to become mn limbs, which simplifies 254 the rest of the function, but slows things down when |b| << m. */ 255 if (use_redc) 256 { 257 MPN_ZERO (tp, mn); 258 MPN_COPY (tp + mn, bp, bn); 259 MPN_ZERO (tp + mn + bn, mn - bn); 260 mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); 261 } 262 else 263 { 264 MPN_COPY (gp, bp, bn); 265 MPN_ZERO (gp + bn, mn - bn); 266 } 267 } 268 269 /* Compute xx^i for odd g < 2^i. */ 270 271 xp = TMP_ALLOC_LIMBS (mn); 272 mpn_sqr_n (tp, gp, mn); 273 if (use_redc) 274 mpn_redc_1 (xp, tp, mp, mn, invm); /* xx = x^2*R^n */ 275 else 276 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); 277 this_gp = gp; 278 for (i = 1; i < K / 2; i++) 279 { 280 mpn_mul_n (tp, this_gp, xp, mn); 281 this_gp += mn; 282 if (use_redc) 283 mpn_redc_1 (this_gp, tp, mp, mn, invm); /* g[i] = x^(2i+1)*R^n */ 284 else 285 mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn); 286 } 287 288 /* Start the real stuff. */ 289 ep = PTR (e); 290 i = en - 1; /* current index */ 291 c = ep[i]; /* current limb */ 292 sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */ 293 sh -= k; /* index of lower bit of ep[i] to take into account */ 294 if (sh < 0) 295 { /* k-sh extra bits are needed */ 296 if (i > 0) 297 { 298 i--; 299 c <<= (-sh); 300 sh += GMP_NUMB_BITS; 301 c |= ep[i] >> sh; 302 } 303 } 304 else 305 c >>= sh; 306 307 for (j = 0; c % 2 == 0; j++) 308 c >>= 1; 309 310 MPN_COPY (xp, gp + mn * (c >> 1), mn); 311 while (--j >= 0) 312 { 313 mpn_sqr_n (tp, xp, mn); 314 if (use_redc) 315 mpn_redc_1 (xp, tp, mp, mn, invm); 316 else 317 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); 318 } 319 320 while (i > 0 || sh > 0) 321 { 322 c = ep[i]; 323 l = k; /* number of bits treated */ 324 sh -= l; 325 if (sh < 0) 326 { 327 if (i > 0) 328 { 329 i--; 330 c <<= (-sh); 331 sh += GMP_NUMB_BITS; 332 c |= ep[i] >> sh; 333 } 334 else 335 { 336 l += sh; /* last chunk of bits from e; l < k */ 337 } 338 } 339 else 340 c >>= sh; 341 c &= ((mp_limb_t) 1 << l) - 1; 342 343 /* This while loop implements the sliding window improvement--loop while 344 the most significant bit of c is zero, squaring xx as we go. */ 345 while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0)) 346 { 347 mpn_sqr_n (tp, xp, mn); 348 if (use_redc) 349 mpn_redc_1 (xp, tp, mp, mn, invm); 350 else 351 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); 352 if (sh != 0) 353 { 354 sh--; 355 c = (c << 1) + ((ep[i] >> sh) & 1); 356 } 357 else 358 { 359 i--; 360 sh = GMP_NUMB_BITS - 1; 361 c = (c << 1) + (ep[i] >> sh); 362 } 363 } 364 365 /* Replace xx by xx^(2^l)*x^c. */ 366 if (c != 0) 367 { 368 for (j = 0; c % 2 == 0; j++) 369 c >>= 1; 370 371 /* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */ 372 l -= j; 373 while (--l >= 0) 374 { 375 mpn_sqr_n (tp, xp, mn); 376 if (use_redc) 377 mpn_redc_1 (xp, tp, mp, mn, invm); 378 else 379 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); 380 } 381 mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn); 382 if (use_redc) 383 mpn_redc_1 (xp, tp, mp, mn, invm); 384 else 385 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); 386 } 387 else 388 j = l; /* case c=0 */ 389 while (--j >= 0) 390 { 391 mpn_sqr_n (tp, xp, mn); 392 if (use_redc) 393 mpn_redc_1 (xp, tp, mp, mn, invm); 394 else 395 mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); 396 } 397 } 398 399 if (use_redc) 400 { 401 /* Convert back xx to xx/R^n. */ 402 MPN_COPY (tp, xp, mn); 403 MPN_ZERO (tp + mn, mn); 404 mpn_redc_1 (xp, tp, mp, mn, invm); 405 if (mpn_cmp (xp, mp, mn) >= 0) 406 mpn_sub_n (xp, xp, mp, mn); 407 } 408 else 409 { 410 if (m_zero_cnt != 0) 411 { 412 mp_limb_t cy; 413 cy = mpn_lshift (tp, xp, mn, m_zero_cnt); 414 tp[mn] = cy; 415 mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn); 416 mpn_rshift (xp, xp, mn, m_zero_cnt); 417 } 418 } 419 xn = mn; 420 MPN_NORMALIZE (xp, xn); 421 422 if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0) 423 { 424 mp = PTR(m); /* want original, unnormalized m */ 425 mpn_sub (xp, mp, mn, xp, xn); 426 xn = mn; 427 MPN_NORMALIZE (xp, xn); 428 } 429 MPZ_REALLOC (r, xn); 430 SIZ (r) = xn; 431 MPN_COPY (PTR(r), xp, xn); 432 433 __GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn); 434 TMP_FREE; 435 } 436