1 /* mpn_powm -- Compute R = U^E mod M. 2 3 Copyright 2007, 2008, 2009 Free Software Foundation, Inc. 4 5 This file is part of the GNU MP Library. 6 7 The GNU MP Library is free software; you can redistribute it and/or modify 8 it under the terms of the GNU Lesser General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or (at your 10 option) any later version. 11 12 The GNU MP Library is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15 License for more details. 16 17 You should have received a copy of the GNU Lesser General Public License 18 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 19 20 21 /* 22 BASIC ALGORITHM, Compute b^e mod n, where n is odd. 23 24 1. w <- b 25 26 2. While w^2 < n (and there are more bits in e) 27 w <- power left-to-right base-2 without reduction 28 29 3. t <- (B^n * b) / n Convert to REDC form 30 31 4. Compute power table of e-dependent size 32 33 5. While there are more bits in e 34 w <- power left-to-right base-k with reduction 35 36 37 TODO: 38 39 * Make getbits a macro, thereby allowing it to update the index operand. 40 That will simplify the code using getbits. (Perhaps make getbits' sibling 41 getbit then have similar form, for symmetry.) 42 43 * Write an itch function. 44 45 * Choose window size without looping. (Superoptimize or think(tm).) 46 47 * How do we handle small bases? 48 49 * This is slower than old mpz code, in particular if we base it on redc_1 50 (use: #undef HAVE_NATIVE_mpn_addmul_2). Why? 51 52 * Make it sub-quadratic. 53 54 * Call new division functions, not mpn_tdiv_qr. 55 56 * Is redc obsolete with improved SB division? 57 58 * Consider special code for one-limb M. 59 60 * CRT for N = odd*2^t: 61 Using Newton's method and 2-adic arithmetic: 62 m1_inv_m2 = 1/odd mod 2^t 63 Plain 2-adic (REDC) modexp: 64 r1 = a ^ b mod odd 65 Mullo+sqrlo-based modexp: 66 r2 = a ^ b mod 2^t 67 mullo, mul, add: 68 r = ((r2 - r1) * m1_i_m2 mod 2^t) * odd + r1 69 70 * How should we handle the redc1/redc2/redc2/redc4/redc_subquad choice? 71 - redc1: T(binvert_1limb) + e * (n) * (T(mullo1x1) + n*T(addmul_1)) 72 - redc2: T(binvert_2limbs) + e * (n/2) * (T(mullo2x2) + n*T(addmul_2)) 73 - redc3: T(binvert_3limbs) + e * (n/3) * (T(mullo3x3) + n*T(addmul_3)) 74 This disregards the addmul_N constant term, but we could think of 75 that as part of the respective mulloNxN. 76 */ 77 78 #include "gmp.h" 79 #include "gmp-impl.h" 80 #include "longlong.h" 81 82 83 #define getbit(p,bi) \ 84 ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1) 85 86 static inline mp_limb_t 87 getbits (const mp_limb_t *p, unsigned long bi, int nbits) 88 { 89 int nbits_in_r; 90 mp_limb_t r; 91 mp_size_t i; 92 93 if (bi < nbits) 94 { 95 return p[0] & (((mp_limb_t) 1 << bi) - 1); 96 } 97 else 98 { 99 bi -= nbits; /* bit index of low bit to extract */ 100 i = bi / GMP_LIMB_BITS; /* word index of low bit to extract */ 101 bi %= GMP_LIMB_BITS; /* bit index in low word */ 102 r = p[i] >> bi; /* extract (low) bits */ 103 nbits_in_r = GMP_LIMB_BITS - bi; /* number of bits now in r */ 104 if (nbits_in_r < nbits) /* did we get enough bits? */ 105 r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */ 106 return r & (((mp_limb_t ) 1 << nbits) - 1); 107 } 108 } 109 110 #undef HAVE_NATIVE_mpn_addmul_2 111 112 #ifndef HAVE_NATIVE_mpn_addmul_2 113 #define REDC_2_THRESHOLD MP_SIZE_T_MAX 114 #endif 115 116 #ifndef REDC_2_THRESHOLD 117 #define REDC_2_THRESHOLD 4 118 #endif 119 120 static void mpn_redc_n () {ASSERT_ALWAYS(0);} 121 122 static inline int 123 win_size (unsigned long eb) 124 { 125 int k; 126 static unsigned long x[] = {1,7,25,81,241,673,1793,4609,11521,28161,~0ul}; 127 for (k = 0; eb > x[k]; k++) 128 ; 129 return k; 130 } 131 132 #define MPN_REDC_X(rp, tp, mp, n, mip) \ 133 do { \ 134 if (redc_x == 1) \ 135 mpn_redc_1 (rp, tp, mp, n, mip[0]); \ 136 else if (redc_x == 2) \ 137 mpn_redc_2 (rp, tp, mp, n, mip); \ 138 else \ 139 mpn_redc_n (rp, tp, mp, n, mip); \ 140 } while (0) 141 142 /* Convert U to REDC form, U_r = B^n * U mod M */ 143 static void 144 redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n) 145 { 146 mp_ptr tp, qp; 147 TMP_DECL; 148 TMP_MARK; 149 150 tp = TMP_ALLOC_LIMBS (un + n); 151 qp = TMP_ALLOC_LIMBS (un + 1); /* FIXME: Put at tp+? */ 152 153 MPN_ZERO (tp, n); 154 MPN_COPY (tp + n, up, un); 155 mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n); 156 TMP_FREE; 157 } 158 159 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0] 160 Requires that mp[n-1..0] is odd. 161 Requires that ep[en-1..0] is > 1. 162 Uses scratch space tp[3n..0], i.e., 3n+1 words. */ 163 void 164 mpn_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn, 165 mp_srcptr ep, mp_size_t en, 166 mp_srcptr mp, mp_size_t n, mp_ptr tp) 167 { 168 mp_limb_t mip[2]; 169 int cnt; 170 long ebi; 171 int windowsize, this_windowsize; 172 mp_limb_t expbits; 173 mp_ptr pp, this_pp, last_pp; 174 mp_ptr b2p; 175 long i; 176 int redc_x; 177 TMP_DECL; 178 179 ASSERT (en > 1 || (en == 1 && ep[0] > 1)); 180 ASSERT (n >= 1 && ((mp[0] & 1) != 0)); 181 182 TMP_MARK; 183 184 count_leading_zeros (cnt, ep[en - 1]); 185 ebi = en * GMP_LIMB_BITS - cnt; 186 187 #if 0 188 if (bn < n) 189 { 190 /* Do the first few exponent bits without mod reductions, 191 until the result is greater than the mod argument. */ 192 for (;;) 193 { 194 mpn_sqr_n (tp, this_pp, tn); 195 tn = tn * 2 - 1, tn += tp[tn] != 0; 196 if (getbit (ep, ebi) != 0) 197 mpn_mul (..., tp, tn, bp, bn); 198 ebi--; 199 } 200 } 201 #endif 202 203 windowsize = win_size (ebi); 204 205 if (BELOW_THRESHOLD (n, REDC_2_THRESHOLD)) 206 { 207 binvert_limb (mip[0], mp[0]); 208 mip[0] = -mip[0]; 209 redc_x = 1; 210 } 211 #if defined (HAVE_NATIVE_mpn_addmul_2) 212 else 213 { 214 mpn_binvert (mip, mp, 2, tp); 215 mip[0] = -mip[0]; mip[1] = ~mip[1]; 216 redc_x = 2; 217 } 218 #endif 219 #if 0 220 mpn_binvert (mip, mp, n, tp); 221 redc_x = 0; 222 #endif 223 224 pp = TMP_ALLOC_LIMBS (n << (windowsize - 1)); 225 226 this_pp = pp; 227 redcify (this_pp, bp, bn, mp, n); 228 229 b2p = tp + 2*n; 230 231 /* Store b^2 in b2. */ 232 mpn_sqr_n (tp, this_pp, n); 233 MPN_REDC_X (b2p, tp, mp, n, mip); 234 235 /* Precompute odd powers of b and put them in the temporary area at pp. */ 236 for (i = (1 << (windowsize - 1)) - 1; i > 0; i--) 237 { 238 last_pp = this_pp; 239 this_pp += n; 240 mpn_mul_n (tp, last_pp, b2p, n); 241 MPN_REDC_X (this_pp, tp, mp, n, mip); 242 } 243 244 expbits = getbits (ep, ebi, windowsize); 245 ebi -= windowsize; 246 if (ebi < 0) 247 ebi = 0; 248 249 count_trailing_zeros (cnt, expbits); 250 ebi += cnt; 251 expbits >>= cnt; 252 253 MPN_COPY (rp, pp + n * (expbits >> 1), n); 254 255 while (ebi != 0) 256 { 257 while (getbit (ep, ebi) == 0) 258 { 259 mpn_sqr_n (tp, rp, n); 260 MPN_REDC_X (rp, tp, mp, n, mip); 261 ebi--; 262 if (ebi == 0) 263 goto done; 264 } 265 266 /* The next bit of the exponent is 1. Now extract the largest block of 267 bits <= windowsize, and such that the least significant bit is 1. */ 268 269 expbits = getbits (ep, ebi, windowsize); 270 ebi -= windowsize; 271 this_windowsize = windowsize; 272 if (ebi < 0) 273 { 274 this_windowsize += ebi; 275 ebi = 0; 276 } 277 278 count_trailing_zeros (cnt, expbits); 279 this_windowsize -= cnt; 280 ebi += cnt; 281 expbits >>= cnt; 282 283 do 284 { 285 mpn_sqr_n (tp, rp, n); 286 MPN_REDC_X (rp, tp, mp, n, mip); 287 this_windowsize--; 288 } 289 while (this_windowsize != 0); 290 291 mpn_mul_n (tp, rp, pp + n * (expbits >> 1), n); 292 MPN_REDC_X (rp, tp, mp, n, mip); 293 } 294 295 done: 296 MPN_COPY (tp, rp, n); 297 MPN_ZERO (tp + n, n); 298 MPN_REDC_X (rp, tp, mp, n, mip); 299 if (mpn_cmp (rp, mp, n) >= 0) 300 mpn_sub_n (rp, rp, mp, n); 301 TMP_FREE; 302 } 303