1 /* mulmod_bnm1.c -- multiplication mod B^n-1. 2 3 Contributed to the GNU project by Niels M�ller, Torbjorn Granlund and 4 Marco Bodrato. 5 6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2009, 2010 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of the GNU Lesser General Public License as published by 16 the Free Software Foundation; either version 3 of the License, or (at your 17 option) any later version. 18 19 The GNU MP Library is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 22 License for more details. 23 24 You should have received a copy of the GNU Lesser General Public License 25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 26 27 28 #include "gmp.h" 29 #include "gmp-impl.h" 30 #include "longlong.h" 31 32 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is 33 mod B^rn - 1, and values are semi-normalised; zero is represented 34 as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp. 35 tp==rp is allowed. */ 36 void 37 mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn, 38 mp_ptr tp) 39 { 40 mp_limb_t cy; 41 42 ASSERT (0 < rn); 43 44 mpn_mul_n (tp, ap, bp, rn); 45 cy = mpn_add_n (rp, tp, tp + rn, rn); 46 /* If cy == 1, then the value of rp is at most B^rn - 2, so there can 47 * be no overflow when adding in the carry. */ 48 MPN_INCR_U (rp, rn, cy); 49 } 50 51 52 /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in 53 semi-normalised representation, computation is mod B^rn + 1. Needs 54 a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed. 55 Output is normalised. */ 56 static void 57 mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn, 58 mp_ptr tp) 59 { 60 mp_limb_t cy; 61 62 ASSERT (0 < rn); 63 64 mpn_mul_n (tp, ap, bp, rn + 1); 65 ASSERT (tp[2*rn+1] == 0); 66 ASSERT (tp[2*rn] < GMP_NUMB_MAX); 67 cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn); 68 rp[rn] = 0; 69 MPN_INCR_U (rp, rn+1, cy ); 70 } 71 72 73 /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1) 74 * 75 * The result is expected to be ZERO if and only if one of the operand 76 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by 77 * B^rn-1. This should not be a problem if mulmod_bnm1 is used to 78 * combine results and obtain a natural number when one knows in 79 * advance that the final value is less than (B^rn-1). 80 * Moreover it should not be a problem if mulmod_bnm1 is used to 81 * compute the full product with an+bn <= rn, because this condition 82 * implies (B^an-1)(B^bn-1) < (B^rn-1) . 83 * 84 * Requires 0 < bn <= an <= rn and an + bn > rn/2 85 * Scratch need: rn + (need for recursive call OR rn + 4). This gives 86 * 87 * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4 88 */ 89 void 90 mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp) 91 { 92 ASSERT (0 < bn); 93 ASSERT (bn <= an); 94 ASSERT (an <= rn); 95 96 if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD)) 97 { 98 if (UNLIKELY (bn < rn)) 99 { 100 if (UNLIKELY (an + bn <= rn)) 101 { 102 mpn_mul (rp, ap, an, bp, bn); 103 } 104 else 105 { 106 mp_limb_t cy; 107 mpn_mul (tp, ap, an, bp, bn); 108 cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn); 109 MPN_INCR_U (rp, rn, cy); 110 } 111 } 112 else 113 mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp); 114 } 115 else 116 { 117 mp_size_t n; 118 mp_limb_t cy; 119 mp_limb_t hi; 120 121 n = rn >> 1; 122 123 /* We need at least an + bn >= n, to be able to fit one of the 124 recursive products at rp. Requiring strict inequality makes 125 the coded slightly simpler. If desired, we could avoid this 126 restriction by initially halving rn as long as rn is even and 127 an + bn <= rn/2. */ 128 129 ASSERT (an + bn > n); 130 131 /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1) 132 and crt together as 133 134 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)] 135 */ 136 137 #define a0 ap 138 #define a1 (ap + n) 139 #define b0 bp 140 #define b1 (bp + n) 141 142 #define xp tp /* 2n + 2 */ 143 /* am1 maybe in {xp, n} */ 144 /* bm1 maybe in {xp + n, n} */ 145 #define sp1 (tp + 2*n + 2) 146 /* ap1 maybe in {sp1, n + 1} */ 147 /* bp1 maybe in {sp1 + n + 1, n + 1} */ 148 149 { 150 mp_srcptr am1, bm1; 151 mp_size_t anm, bnm; 152 mp_ptr so; 153 154 if (LIKELY (an > n)) 155 { 156 am1 = xp; 157 cy = mpn_add (xp, a0, n, a1, an - n); 158 MPN_INCR_U (xp, n, cy); 159 anm = n; 160 if (LIKELY (bn > n)) 161 { 162 bm1 = xp + n; 163 cy = mpn_add (xp + n, b0, n, b1, bn - n); 164 MPN_INCR_U (xp + n, n, cy); 165 bnm = n; 166 so = xp + 2*n; 167 } 168 else 169 { 170 so = xp + n; 171 bm1 = b0; 172 bnm = bn; 173 } 174 } 175 else 176 { 177 so = xp; 178 am1 = a0; 179 anm = an; 180 bm1 = b0; 181 bnm = bn; 182 } 183 184 mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so); 185 } 186 187 { 188 int k; 189 mp_srcptr ap1, bp1; 190 mp_size_t anp, bnp; 191 192 if (LIKELY (an > n)) { 193 ap1 = sp1; 194 cy = mpn_sub (sp1, a0, n, a1, an - n); 195 sp1[n] = 0; 196 MPN_INCR_U (sp1, n + 1, cy); 197 anp = n + ap1[n]; 198 } else { 199 ap1 = a0; 200 anp = an; 201 } 202 203 if (LIKELY (bn > n)) { 204 bp1 = sp1 + n + 1; 205 cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n); 206 sp1[2*n+1] = 0; 207 MPN_INCR_U (sp1 + n + 1, n + 1, cy); 208 bnp = n + bp1[n]; 209 } else { 210 bp1 = b0; 211 bnp = bn; 212 } 213 214 if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD)) 215 k=0; 216 else 217 { 218 int mask; 219 k = mpn_fft_best_k (n, 0); 220 mask = (1<<k) -1; 221 while (n & mask) {k--; mask >>=1;}; 222 } 223 if (k >= FFT_FIRST_K) 224 xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k); 225 else if (UNLIKELY (bp1 == b0)) 226 { 227 ASSERT (anp + bnp <= 2*n+1); 228 ASSERT (anp + bnp > n); 229 ASSERT (anp >= bnp); 230 mpn_mul (xp, ap1, anp, bp1, bnp); 231 anp = anp + bnp - n; 232 ASSERT (anp <= n || xp[2*n]==0); 233 anp-= anp > n; 234 cy = mpn_sub (xp, xp, n, xp + n, anp); 235 xp[n] = 0; 236 MPN_INCR_U (xp, n+1, cy); 237 } 238 else 239 mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp); 240 } 241 242 /* Here the CRT recomposition begins. 243 244 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1) 245 Division by 2 is a bitwise rotation. 246 247 Assumes xp normalised mod (B^n+1). 248 249 The residue class [0] is represented by [B^n-1]; except when 250 both input are ZERO. 251 */ 252 253 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc 254 #if HAVE_NATIVE_mpn_rsh1add_nc 255 cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */ 256 hi = cy << (GMP_NUMB_BITS - 1); 257 cy = 0; 258 /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi 259 overflows, i.e. a further increment will not overflow again. */ 260 #else /* ! _nc */ 261 cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */ 262 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */ 263 cy >>= 1; 264 /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that 265 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */ 266 #endif 267 #if GMP_NAIL_BITS == 0 268 add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi); 269 #else 270 cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1); 271 rp[n-1] ^= hi; 272 #endif 273 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */ 274 #if HAVE_NATIVE_mpn_add_nc 275 cy = mpn_add_nc(rp, rp, xp, n, xp[n]); 276 #else /* ! _nc */ 277 cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */ 278 #endif 279 cy += (rp[0]&1); 280 mpn_rshift(rp, rp, n, 1); 281 ASSERT (cy <= 2); 282 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */ 283 cy >>= 1; 284 /* We can have cy != 0 only if hi = 0... */ 285 ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0); 286 rp[n-1] |= hi; 287 /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */ 288 #endif 289 ASSERT (cy <= 1); 290 /* Next increment can not overflow, read the previous comments about cy. */ 291 ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0)); 292 MPN_INCR_U(rp, n, cy); 293 294 /* Compute the highest half: 295 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n 296 */ 297 if (UNLIKELY (an + bn < rn)) 298 { 299 /* Note that in this case, the only way the result can equal 300 zero mod B^{rn} - 1 is if one of the inputs is zero, and 301 then the output of both the recursive calls and this CRT 302 reconstruction is zero, not B^{rn} - 1. Which is good, 303 since the latter representation doesn't fit in the output 304 area.*/ 305 cy = mpn_sub_n (rp + n, rp, xp, an + bn - n); 306 307 /* FIXME: This subtraction of the high parts is not really 308 necessary, we do it to get the carry out, and for sanity 309 checking. */ 310 cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n, 311 xp + an + bn - n, rn - (an + bn), cy); 312 ASSERT (an + bn == rn - 1 || 313 mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn))); 314 cy = mpn_sub_1 (rp, rp, an + bn, cy); 315 ASSERT (cy == (xp + an + bn - n)[0]); 316 } 317 else 318 { 319 cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n); 320 /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO. 321 DECR will affect _at most_ the lowest n limbs. */ 322 MPN_DECR_U (rp, 2*n, cy); 323 } 324 #undef a0 325 #undef a1 326 #undef b0 327 #undef b1 328 #undef xp 329 #undef sp1 330 } 331 } 332 333 mp_size_t 334 mpn_mulmod_bnm1_next_size (mp_size_t n) 335 { 336 mp_size_t nh; 337 338 if (BELOW_THRESHOLD (n, MULMOD_BNM1_THRESHOLD)) 339 return n; 340 if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1)) 341 return (n + (2-1)) & (-2); 342 if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1)) 343 return (n + (4-1)) & (-4); 344 345 nh = (n + 1) >> 1; 346 347 if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD)) 348 return (n + (8-1)) & (-8); 349 350 return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0)); 351 } 352