1 /* sqrmod_bnm1.c -- squaring mod B^n-1. 2 3 Contributed to the GNU project by Niels M�ller, Torbjorn Granlund and 4 Marco Bodrato. 5 6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2009, 2010 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of the GNU Lesser General Public License as published by 16 the Free Software Foundation; either version 3 of the License, or (at your 17 option) any later version. 18 19 The GNU MP Library is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 22 License for more details. 23 24 You should have received a copy of the GNU Lesser General Public License 25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 26 27 28 #include "gmp.h" 29 #include "gmp-impl.h" 30 #include "longlong.h" 31 32 /* Input is {ap,rn}; output is {rp,rn}, computation is 33 mod B^rn - 1, and values are semi-normalised; zero is represented 34 as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp. 35 tp==rp is allowed. */ 36 static void 37 mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp) 38 { 39 mp_limb_t cy; 40 41 ASSERT (0 < rn); 42 43 mpn_sqr (tp, ap, rn); 44 cy = mpn_add_n (rp, tp, tp + rn, rn); 45 /* If cy == 1, then the value of rp is at most B^rn - 2, so there can 46 * be no overflow when adding in the carry. */ 47 MPN_INCR_U (rp, rn, cy); 48 } 49 50 51 /* Input is {ap,rn+1}; output is {rp,rn+1}, in 52 semi-normalised representation, computation is mod B^rn + 1. Needs 53 a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed. 54 Output is normalised. */ 55 static void 56 mpn_bc_sqrmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp) 57 { 58 mp_limb_t cy; 59 60 ASSERT (0 < rn); 61 62 mpn_sqr (tp, ap, rn + 1); 63 ASSERT (tp[2*rn+1] == 0); 64 ASSERT (tp[2*rn] < GMP_NUMB_MAX); 65 cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn); 66 rp[rn] = 0; 67 MPN_INCR_U (rp, rn+1, cy ); 68 } 69 70 71 /* Computes {rp,MIN(rn,2an)} <- {ap,an}^2 Mod(B^rn-1) 72 * 73 * The result is expected to be ZERO if and only if the operand 74 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by 75 * B^rn-1. 76 * It should not be a problem if sqrmod_bnm1 is used to 77 * compute the full square with an <= 2*rn, because this condition 78 * implies (B^an-1)^2 < (B^rn-1) . 79 * 80 * Requires rn/4 < an <= rn 81 * Scratch need: rn/2 + (need for recursive call OR rn + 3). This gives 82 * 83 * S(n) <= rn/2 + MAX (rn + 4, S(n/2)) <= 3/2 rn + 4 84 */ 85 void 86 mpn_sqrmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_ptr tp) 87 { 88 ASSERT (0 < an); 89 ASSERT (an <= rn); 90 91 if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, SQRMOD_BNM1_THRESHOLD)) 92 { 93 if (UNLIKELY (an < rn)) 94 { 95 if (UNLIKELY (2*an <= rn)) 96 { 97 mpn_sqr (rp, ap, an); 98 } 99 else 100 { 101 mp_limb_t cy; 102 mpn_sqr (tp, ap, an); 103 cy = mpn_add (rp, tp, rn, tp + rn, 2*an - rn); 104 MPN_INCR_U (rp, rn, cy); 105 } 106 } 107 else 108 mpn_bc_sqrmod_bnm1 (rp, ap, rn, tp); 109 } 110 else 111 { 112 mp_size_t n; 113 mp_limb_t cy; 114 mp_limb_t hi; 115 116 n = rn >> 1; 117 118 ASSERT (2*an > n); 119 120 /* Compute xm = a^2 mod (B^n - 1), xp = a^2 mod (B^n + 1) 121 and crt together as 122 123 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)] 124 */ 125 126 #define a0 ap 127 #define a1 (ap + n) 128 129 #define xp tp /* 2n + 2 */ 130 /* am1 maybe in {xp, n} */ 131 #define sp1 (tp + 2*n + 2) 132 /* ap1 maybe in {sp1, n + 1} */ 133 134 { 135 mp_srcptr am1; 136 mp_size_t anm; 137 mp_ptr so; 138 139 if (LIKELY (an > n)) 140 { 141 so = xp + n; 142 am1 = xp; 143 cy = mpn_add (xp, a0, n, a1, an - n); 144 MPN_INCR_U (xp, n, cy); 145 anm = n; 146 } 147 else 148 { 149 so = xp; 150 am1 = a0; 151 anm = an; 152 } 153 154 mpn_sqrmod_bnm1 (rp, n, am1, anm, so); 155 } 156 157 { 158 int k; 159 mp_srcptr ap1; 160 mp_size_t anp; 161 162 if (LIKELY (an > n)) { 163 ap1 = sp1; 164 cy = mpn_sub (sp1, a0, n, a1, an - n); 165 sp1[n] = 0; 166 MPN_INCR_U (sp1, n + 1, cy); 167 anp = n + ap1[n]; 168 } else { 169 ap1 = a0; 170 anp = an; 171 } 172 173 if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD)) 174 k=0; 175 else 176 { 177 int mask; 178 k = mpn_fft_best_k (n, 1); 179 mask = (1<<k) -1; 180 while (n & mask) {k--; mask >>=1;}; 181 } 182 if (k >= FFT_FIRST_K) 183 xp[n] = mpn_mul_fft (xp, n, ap1, anp, ap1, anp, k); 184 else if (UNLIKELY (ap1 == a0)) 185 { 186 ASSERT (anp <= n); 187 ASSERT (2*anp > n); 188 mpn_sqr (xp, a0, an); 189 anp = 2*an - n; 190 cy = mpn_sub (xp, xp, n, xp + n, anp); 191 xp[n] = 0; 192 MPN_INCR_U (xp, n+1, cy); 193 } 194 else 195 mpn_bc_sqrmod_bnp1 (xp, ap1, n, xp); 196 } 197 198 /* Here the CRT recomposition begins. 199 200 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1) 201 Division by 2 is a bitwise rotation. 202 203 Assumes xp normalised mod (B^n+1). 204 205 The residue class [0] is represented by [B^n-1]; except when 206 both input are ZERO. 207 */ 208 209 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc 210 #if HAVE_NATIVE_mpn_rsh1add_nc 211 cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */ 212 hi = cy << (GMP_NUMB_BITS - 1); 213 cy = 0; 214 /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi 215 overflows, i.e. a further increment will not overflow again. */ 216 #else /* ! _nc */ 217 cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */ 218 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */ 219 cy >>= 1; 220 /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that 221 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */ 222 #endif 223 #if GMP_NAIL_BITS == 0 224 add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi); 225 #else 226 cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1); 227 rp[n-1] ^= hi; 228 #endif 229 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */ 230 #if HAVE_NATIVE_mpn_add_nc 231 cy = mpn_add_nc(rp, rp, xp, n, xp[n]); 232 #else /* ! _nc */ 233 cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */ 234 #endif 235 cy += (rp[0]&1); 236 mpn_rshift(rp, rp, n, 1); 237 ASSERT (cy <= 2); 238 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */ 239 cy >>= 1; 240 /* We can have cy != 0 only if hi = 0... */ 241 ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0); 242 rp[n-1] |= hi; 243 /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */ 244 #endif 245 ASSERT (cy <= 1); 246 /* Next increment can not overflow, read the previous comments about cy. */ 247 ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0)); 248 MPN_INCR_U(rp, n, cy); 249 250 /* Compute the highest half: 251 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n 252 */ 253 if (UNLIKELY (2*an < rn)) 254 { 255 /* Note that in this case, the only way the result can equal 256 zero mod B^{rn} - 1 is if the input is zero, and 257 then the output of both the recursive calls and this CRT 258 reconstruction is zero, not B^{rn} - 1. */ 259 cy = mpn_sub_n (rp + n, rp, xp, 2*an - n); 260 261 /* FIXME: This subtraction of the high parts is not really 262 necessary, we do it to get the carry out, and for sanity 263 checking. */ 264 cy = xp[n] + mpn_sub_nc (xp + 2*an - n, rp + 2*an - n, 265 xp + 2*an - n, rn - 2*an, cy); 266 ASSERT (mpn_zero_p (xp + 2*an - n+1, rn - 1 - 2*an)); 267 cy = mpn_sub_1 (rp, rp, 2*an, cy); 268 ASSERT (cy == (xp + 2*an - n)[0]); 269 } 270 else 271 { 272 cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n); 273 /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO. 274 DECR will affect _at most_ the lowest n limbs. */ 275 MPN_DECR_U (rp, 2*n, cy); 276 } 277 #undef a0 278 #undef a1 279 #undef xp 280 #undef sp1 281 } 282 } 283 284 mp_size_t 285 mpn_sqrmod_bnm1_next_size (mp_size_t n) 286 { 287 mp_size_t nh; 288 289 if (BELOW_THRESHOLD (n, SQRMOD_BNM1_THRESHOLD)) 290 return n; 291 if (BELOW_THRESHOLD (n, 4 * (SQRMOD_BNM1_THRESHOLD - 1) + 1)) 292 return (n + (2-1)) & (-2); 293 if (BELOW_THRESHOLD (n, 8 * (SQRMOD_BNM1_THRESHOLD - 1) + 1)) 294 return (n + (4-1)) & (-4); 295 296 nh = (n + 1) >> 1; 297 298 if (BELOW_THRESHOLD (nh, SQR_FFT_MODF_THRESHOLD)) 299 return (n + (8-1)) & (-8); 300 301 return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 1)); 302 } 303