1 /* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols. 2 3 Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc. 4 5 This file is part of the GNU MP Library. 6 7 The GNU MP Library is free software; you can redistribute it and/or modify it 8 under the terms of the GNU Lesser General Public License as published by the 9 Free Software Foundation; either version 3 of the License, or (at your 10 option) any later version. 11 12 The GNU MP Library is distributed in the hope that it will be useful, but 13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 15 for more details. 16 17 You should have received a copy of the GNU Lesser General Public License along 18 with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 19 20 #include <stdio.h> 21 #include "gmp.h" 22 #include "gmp-impl.h" 23 #include "longlong.h" 24 25 26 /* Change this to "#define TRACE(x) x" for some traces. */ 27 #define TRACE(x) 28 29 30 #define MPN_RSHIFT_OR_COPY(dst,src,size,shift) \ 31 do { \ 32 if ((shift) != 0) \ 33 { \ 34 ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift)); \ 35 (size) -= ((dst)[(size)-1] == 0); \ 36 } \ 37 else \ 38 MPN_COPY (dst, src, size); \ 39 } while (0) 40 41 42 /* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker. 43 44 mpz_jacobi could assume b is odd, but the improvements from that seem 45 small compared to other operations, and anything significant should be 46 checked at run-time since we'd like odd b to go fast in mpz_kronecker 47 too. 48 49 mpz_legendre could assume b is an odd prime, but knowing this doesn't 50 present any obvious benefits. Result 0 wouldn't arise (unless "a" is a 51 multiple of b), but the checking for that takes little time compared to 52 other operations. 53 54 The main loop is just a simple binary GCD with the jacobi symbol result 55 tracked during the reduction. 56 57 The special cases for a or b fitting in one limb let mod_1 or modexact_1 58 get used, without any copying, and end up just as efficient as the mixed 59 precision mpz_kronecker_ui etc. 60 61 When tdiv_qr is called it's not necessary to make "a" odd or make a 62 working copy of it, but tdiv_qr is going to be pretty slow so it's not 63 worth bothering trying to save anything for that case. 64 65 Enhancements: 66 67 mpn_bdivmod could be used instead of mpn_tdiv_qr, like in mpn_gcd. 68 Currently tdiv_qr is preferred since it's sub-quadratic on big sizes, 69 although bdivmod might be a touch quicker on small sizes. This can be 70 revised when bdivmod becomes sub-quadratic too. 71 72 Some sort of multi-step algorithm should be used. The current subtract 73 and shift for every bit is very inefficient. Lehmer (per current gcdext) 74 would need some low bits included in its calculation to apply the sign 75 change for reciprocity. Binary Lehmer keeps low bits to strip twos 76 anyway, so might be better suited. Maybe the accelerated GCD style k-ary 77 reduction would work, if sign changes due to the extra factors it 78 introduces can be accounted for (or maybe they can be ignored). */ 79 80 81 int 82 mpz_jacobi (mpz_srcptr a, mpz_srcptr b) 83 { 84 mp_srcptr asrcp, bsrcp; 85 mp_size_t asize, bsize; 86 mp_ptr ap, bp; 87 mp_limb_t alow, blow, ahigh, bhigh, asecond, bsecond; 88 unsigned atwos, btwos; 89 int result_bit1; 90 TMP_DECL; 91 92 TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b)); 93 mpz_trace (" a", a); 94 mpz_trace (" b", b)); 95 96 asize = SIZ(a); 97 asrcp = PTR(a); 98 alow = asrcp[0]; 99 100 bsize = SIZ(b); 101 if (bsize == 0) 102 return JACOBI_LS0 (alow, asize); /* (a/0) */ 103 104 bsrcp = PTR(b); 105 blow = bsrcp[0]; 106 107 if (asize == 0) 108 return JACOBI_0LS (blow, bsize); /* (0/b) */ 109 110 /* (even/even)=0 */ 111 if (((alow | blow) & 1) == 0) 112 return 0; 113 114 /* account for effect of sign of b, then ignore it */ 115 result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize); 116 bsize = ABS (bsize); 117 118 /* low zero limbs on b can be discarded */ 119 JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow); 120 121 count_trailing_zeros (btwos, blow); 122 TRACE (printf ("b twos %u\n", btwos)); 123 124 /* establish shifted blow */ 125 blow >>= btwos; 126 if (bsize > 1) 127 { 128 bsecond = bsrcp[1]; 129 if (btwos != 0) 130 blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK; 131 } 132 133 /* account for effect of sign of a, then ignore it */ 134 result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow); 135 asize = ABS (asize); 136 137 if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0)) 138 { 139 /* special case one limb b, use modexact and no copying */ 140 141 /* (a/2)=(2/a) with a odd, and if b is even then a is odd here */ 142 result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow); 143 144 if (blow == 1) /* (a/1)=1 always */ 145 return JACOBI_BIT1_TO_PN (result_bit1); 146 147 JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow); 148 TRACE (printf ("base (%lu/%lu) with %d\n", 149 alow, blow, JACOBI_BIT1_TO_PN (result_bit1))); 150 return mpn_jacobi_base (alow, blow, result_bit1); 151 } 152 153 /* Discard low zero limbs of a. Usually there won't be anything to 154 strip, hence not bothering with it for the bsize==1 case. */ 155 JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow); 156 157 count_trailing_zeros (atwos, alow); 158 TRACE (printf ("a twos %u\n", atwos)); 159 result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow); 160 161 /* establish shifted alow */ 162 alow >>= atwos; 163 if (asize > 1) 164 { 165 asecond = asrcp[1]; 166 if (atwos != 0) 167 alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK; 168 } 169 170 /* (a/2)=(2/a) with a odd */ 171 result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow); 172 173 if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0)) 174 { 175 /* another special case with modexact and no copying */ 176 177 if (alow == 1) /* (1/b)=1 always */ 178 return JACOBI_BIT1_TO_PN (result_bit1); 179 180 /* b still has its twos, so cancel out their effect */ 181 result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow); 182 183 result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); /* now (b/a) */ 184 JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow); 185 TRACE (printf ("base (%lu/%lu) with %d\n", 186 blow, alow, JACOBI_BIT1_TO_PN (result_bit1))); 187 return mpn_jacobi_base (blow, alow, result_bit1); 188 } 189 190 191 TMP_MARK; 192 TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize); 193 194 MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos); 195 ASSERT (alow == ap[0]); 196 TRACE (mpn_trace ("stripped a", ap, asize)); 197 198 MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos); 199 ASSERT (blow == bp[0]); 200 TRACE (mpn_trace ("stripped b", bp, bsize)); 201 202 /* swap if necessary to make a longer than b */ 203 if (asize < bsize) 204 { 205 TRACE (printf ("swap\n")); 206 MPN_PTR_SWAP (ap,asize, bp,bsize); 207 MP_LIMB_T_SWAP (alow, blow); 208 result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); 209 } 210 211 /* If a is bigger than b then reduce to a mod b. 212 Division is much faster than chipping away at "a" bit-by-bit. */ 213 if (asize > bsize) 214 { 215 mp_ptr rp, qp; 216 217 TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize)); 218 219 TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1); 220 mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, bp, bsize); 221 ap = rp; 222 asize = bsize; 223 MPN_NORMALIZE (ap, asize); 224 225 TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize); 226 mpn_trace (" a", ap, asize); 227 mpn_trace (" b", bp, bsize)); 228 229 if (asize == 0) /* (0/b)=0 for b!=1 */ 230 goto zero; 231 232 alow = ap[0]; 233 goto strip_a; 234 } 235 236 for (;;) 237 { 238 ASSERT (asize >= 1); /* a,b non-empty */ 239 ASSERT (bsize >= 1); 240 ASSERT (ap[asize-1] != 0); /* a,b normalized (and hence non-zero) */ 241 ASSERT (bp[bsize-1] != 0); 242 ASSERT (alow == ap[0]); /* low limb copies should be correct */ 243 ASSERT (blow == bp[0]); 244 ASSERT (alow & 1); /* a,b odd */ 245 ASSERT (blow & 1); 246 247 TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize); 248 mpn_trace (" a", ap, asize); 249 mpn_trace (" b", bp, bsize)); 250 251 /* swap if necessary to make a>=b, applying reciprocity 252 high limbs are almost always enough to tell which is bigger */ 253 if (asize < bsize 254 || (asize == bsize 255 && ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1]) 256 || (ahigh == bhigh 257 && mpn_cmp (ap, bp, asize-1) < 0)))) 258 { 259 TRACE (printf ("swap\n")); 260 MPN_PTR_SWAP (ap,asize, bp,bsize); 261 MP_LIMB_T_SWAP (alow, blow); 262 result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); 263 } 264 265 if (asize == 1) 266 break; 267 268 /* a = a-b */ 269 ASSERT (asize >= bsize); 270 ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize)); 271 MPN_NORMALIZE (ap, asize); 272 alow = ap[0]; 273 274 /* (0/b)=0 for b!=1. b!=1 when a==0 because otherwise would have had 275 a==1 which is asize==1 and would have exited above. */ 276 if (asize == 0) 277 goto zero; 278 279 strip_a: 280 /* low zero limbs on a can be discarded */ 281 JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, ap, asize, alow); 282 283 if ((alow & 1) == 0) 284 { 285 /* factors of 2 from a */ 286 unsigned twos; 287 count_trailing_zeros (twos, alow); 288 TRACE (printf ("twos %u\n", twos)); 289 result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow); 290 ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos)); 291 asize -= (ap[asize-1] == 0); 292 alow = ap[0]; 293 } 294 } 295 296 ASSERT (asize == 1 && bsize == 1); /* just alow and blow left */ 297 TMP_FREE; 298 299 /* (1/b)=1 always (in this case have b==1 because a>=b) */ 300 if (alow == 1) 301 return JACOBI_BIT1_TO_PN (result_bit1); 302 303 /* swap with reciprocity and do (b/a) */ 304 result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); 305 TRACE (printf ("base (%lu/%lu) with %d\n", 306 blow, alow, JACOBI_BIT1_TO_PN (result_bit1))); 307 return mpn_jacobi_base (blow, alow, result_bit1); 308 309 zero: 310 TMP_FREE; 311 return 0; 312 } 313