1 /* mpn_gcdext -- Extended Greatest Common Divisor. 2 3 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software 4 Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MP Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21 #include "gmp.h" 22 #include "gmp-impl.h" 23 #include "longlong.h" 24 25 #ifndef GCDEXT_1_USE_BINARY 26 #define GCDEXT_1_USE_BINARY 0 27 #endif 28 29 #ifndef GCDEXT_1_BINARY_METHOD 30 #define GCDEXT_1_BINARY_METHOD 2 31 #endif 32 33 #ifndef USE_ZEROTAB 34 #define USE_ZEROTAB 1 35 #endif 36 37 #if GCDEXT_1_USE_BINARY 38 39 #if USE_ZEROTAB 40 static unsigned char zerotab[0x40] = { 41 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 42 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 43 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 44 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 45 }; 46 #endif 47 48 mp_limb_t 49 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp, 50 mp_limb_t u, mp_limb_t v) 51 { 52 /* Maintain 53 54 U = t1 u + t0 v 55 V = s1 u + s0 v 56 57 where U, V are the inputs (without any shared power of two), 58 and the matris has determinant � 2^{shift}. 59 */ 60 mp_limb_t s0 = 1; 61 mp_limb_t t0 = 0; 62 mp_limb_t s1 = 0; 63 mp_limb_t t1 = 1; 64 mp_limb_t ug; 65 mp_limb_t vg; 66 mp_limb_t ugh; 67 mp_limb_t vgh; 68 unsigned zero_bits; 69 unsigned shift; 70 unsigned i; 71 #if GCDEXT_1_BINARY_METHOD == 2 72 mp_limb_t det_sign; 73 #endif 74 75 ASSERT (u > 0); 76 ASSERT (v > 0); 77 78 count_trailing_zeros (zero_bits, u | v); 79 u >>= zero_bits; 80 v >>= zero_bits; 81 82 if ((u & 1) == 0) 83 { 84 count_trailing_zeros (shift, u); 85 u >>= shift; 86 t1 <<= shift; 87 } 88 else if ((v & 1) == 0) 89 { 90 count_trailing_zeros (shift, v); 91 v >>= shift; 92 s0 <<= shift; 93 } 94 else 95 shift = 0; 96 97 #if GCDEXT_1_BINARY_METHOD == 1 98 while (u != v) 99 { 100 unsigned count; 101 if (u > v) 102 { 103 u -= v; 104 #if USE_ZEROTAB 105 count = zerotab [u & 0x3f]; 106 u >>= count; 107 if (UNLIKELY (count == 6)) 108 { 109 unsigned c; 110 do 111 { 112 c = zerotab[u & 0x3f]; 113 u >>= c; 114 count += c; 115 } 116 while (c == 6); 117 } 118 #else 119 count_trailing_zeros (count, u); 120 u >>= count; 121 #endif 122 t0 += t1; t1 <<= count; 123 s0 += s1; s1 <<= count; 124 } 125 else 126 { 127 v -= u; 128 #if USE_ZEROTAB 129 count = zerotab [v & 0x3f]; 130 v >>= count; 131 if (UNLIKELY (count == 6)) 132 { 133 unsigned c; 134 do 135 { 136 c = zerotab[v & 0x3f]; 137 v >>= c; 138 count += c; 139 } 140 while (c == 6); 141 } 142 #else 143 count_trailing_zeros (count, v); 144 v >>= count; 145 #endif 146 t1 += t0; t0 <<= count; 147 s1 += s0; s0 <<= count; 148 } 149 shift += count; 150 } 151 #else 152 # if GCDEXT_1_BINARY_METHOD == 2 153 u >>= 1; 154 v >>= 1; 155 156 det_sign = 0; 157 158 while (u != v) 159 { 160 unsigned count; 161 mp_limb_t d = u - v; 162 mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d); 163 mp_limb_t sx; 164 mp_limb_t tx; 165 166 /* When v <= u (vgtu == 0), the updates are: 167 168 (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor) 169 (t1, t0) <-- (t1 << count, t0 + t1) 170 171 and when v > 0, the updates are 172 173 (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count)) 174 (t1, t0) <-- (t0 << count, t0 + t1) 175 176 and similarly for s1, s0 177 */ 178 179 /* v <-- min (u, v) */ 180 v += (vgtu & d); 181 182 /* u <-- |u - v| */ 183 u = (d ^ vgtu) - vgtu; 184 185 /* Number of trailing zeros is the same no matter if we look at 186 * d or u, but using d gives more parallelism. */ 187 #if USE_ZEROTAB 188 count = zerotab[d & 0x3f]; 189 if (UNLIKELY (count == 6)) 190 { 191 unsigned c = 6; 192 do 193 { 194 d >>= c; 195 c = zerotab[d & 0x3f]; 196 count += c; 197 } 198 while (c == 6); 199 } 200 #else 201 count_trailing_zeros (count, d); 202 #endif 203 det_sign ^= vgtu; 204 205 tx = vgtu & (t0 - t1); 206 sx = vgtu & (s0 - s1); 207 t0 += t1; 208 s0 += s1; 209 t1 += tx; 210 s1 += sx; 211 212 count++; 213 u >>= count; 214 t1 <<= count; 215 s1 <<= count; 216 shift += count; 217 } 218 u = (u << 1) + 1; 219 # else /* GCDEXT_1_BINARY_METHOD == 2 */ 220 # error Unknown GCDEXT_1_BINARY_METHOD 221 # endif 222 #endif 223 224 /* Now u = v = g = gcd (u,v). Compute U/g and V/g */ 225 ug = t0 + t1; 226 vg = s0 + s1; 227 228 ugh = ug/2 + (ug & 1); 229 vgh = vg/2 + (vg & 1); 230 231 /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using 232 s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */ 233 for (i = 0; i < shift; i++) 234 { 235 mp_limb_t mask = - ( (s0 | t0) & 1); 236 237 s0 /= 2; 238 t0 /= 2; 239 s0 += mask & vgh; 240 t0 += mask & ugh; 241 } 242 /* FIXME: Try simplifying this condition. */ 243 if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) ) 244 { 245 s0 -= vg; 246 t0 -= ug; 247 } 248 #if GCDEXT_1_BINARY_METHOD == 2 249 /* Conditional negation. */ 250 s0 = (s0 ^ det_sign) - det_sign; 251 t0 = (t0 ^ det_sign) - det_sign; 252 #endif 253 *sp = s0; 254 *tp = -t0; 255 256 return u << zero_bits; 257 } 258 259 #else /* !GCDEXT_1_USE_BINARY */ 260 261 262 /* FIXME: Takes two single-word limbs. It could be extended to a 263 * function that accepts a bignum for the first input, and only 264 * returns the first co-factor. */ 265 266 mp_limb_t 267 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp, 268 mp_limb_t a, mp_limb_t b) 269 { 270 /* Maintain 271 272 a = u0 A + v0 B 273 b = u1 A + v1 B 274 275 where A, B are the original inputs. 276 */ 277 mp_limb_signed_t u0 = 1; 278 mp_limb_signed_t v0 = 0; 279 mp_limb_signed_t u1 = 0; 280 mp_limb_signed_t v1 = 1; 281 282 ASSERT (a > 0); 283 ASSERT (b > 0); 284 285 if (a < b) 286 goto divide_by_b; 287 288 for (;;) 289 { 290 mp_limb_t q; 291 292 q = a / b; 293 a -= q * b; 294 295 if (a == 0) 296 { 297 *up = u1; 298 *vp = v1; 299 return b; 300 } 301 u0 -= q * u1; 302 v0 -= q * v1; 303 304 divide_by_b: 305 q = b / a; 306 b -= q * a; 307 308 if (b == 0) 309 { 310 *up = u0; 311 *vp = v0; 312 return a; 313 } 314 u1 -= q * u0; 315 v1 -= q * v0; 316 } 317 } 318 #endif /* !GCDEXT_1_USE_BINARY */ 319