1 /* mpn_gcdext -- Extended Greatest Common Divisor.
2
3 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
4 Foundation, Inc.
5
6 This file is part of the GNU MP Library.
7
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20
21 #include "gmp.h"
22 #include "gmp-impl.h"
23 #include "longlong.h"
24
25 #ifndef GCDEXT_1_USE_BINARY
26 #define GCDEXT_1_USE_BINARY 0
27 #endif
28
29 #ifndef GCDEXT_1_BINARY_METHOD
30 #define GCDEXT_1_BINARY_METHOD 2
31 #endif
32
33 #ifndef USE_ZEROTAB
34 #define USE_ZEROTAB 1
35 #endif
36
37 #if GCDEXT_1_USE_BINARY
38
39 #if USE_ZEROTAB
40 static unsigned char zerotab[0x40] = {
41 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
42 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
43 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
44 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
45 };
46 #endif
47
48 mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * sp,mp_limb_signed_t * tp,mp_limb_t u,mp_limb_t v)49 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
50 mp_limb_t u, mp_limb_t v)
51 {
52 /* Maintain
53
54 U = t1 u + t0 v
55 V = s1 u + s0 v
56
57 where U, V are the inputs (without any shared power of two),
58 and the matris has determinant � 2^{shift}.
59 */
60 mp_limb_t s0 = 1;
61 mp_limb_t t0 = 0;
62 mp_limb_t s1 = 0;
63 mp_limb_t t1 = 1;
64 mp_limb_t ug;
65 mp_limb_t vg;
66 mp_limb_t ugh;
67 mp_limb_t vgh;
68 unsigned zero_bits;
69 unsigned shift;
70 unsigned i;
71 #if GCDEXT_1_BINARY_METHOD == 2
72 mp_limb_t det_sign;
73 #endif
74
75 ASSERT (u > 0);
76 ASSERT (v > 0);
77
78 count_trailing_zeros (zero_bits, u | v);
79 u >>= zero_bits;
80 v >>= zero_bits;
81
82 if ((u & 1) == 0)
83 {
84 count_trailing_zeros (shift, u);
85 u >>= shift;
86 t1 <<= shift;
87 }
88 else if ((v & 1) == 0)
89 {
90 count_trailing_zeros (shift, v);
91 v >>= shift;
92 s0 <<= shift;
93 }
94 else
95 shift = 0;
96
97 #if GCDEXT_1_BINARY_METHOD == 1
98 while (u != v)
99 {
100 unsigned count;
101 if (u > v)
102 {
103 u -= v;
104 #if USE_ZEROTAB
105 count = zerotab [u & 0x3f];
106 u >>= count;
107 if (UNLIKELY (count == 6))
108 {
109 unsigned c;
110 do
111 {
112 c = zerotab[u & 0x3f];
113 u >>= c;
114 count += c;
115 }
116 while (c == 6);
117 }
118 #else
119 count_trailing_zeros (count, u);
120 u >>= count;
121 #endif
122 t0 += t1; t1 <<= count;
123 s0 += s1; s1 <<= count;
124 }
125 else
126 {
127 v -= u;
128 #if USE_ZEROTAB
129 count = zerotab [v & 0x3f];
130 v >>= count;
131 if (UNLIKELY (count == 6))
132 {
133 unsigned c;
134 do
135 {
136 c = zerotab[v & 0x3f];
137 v >>= c;
138 count += c;
139 }
140 while (c == 6);
141 }
142 #else
143 count_trailing_zeros (count, v);
144 v >>= count;
145 #endif
146 t1 += t0; t0 <<= count;
147 s1 += s0; s0 <<= count;
148 }
149 shift += count;
150 }
151 #else
152 # if GCDEXT_1_BINARY_METHOD == 2
153 u >>= 1;
154 v >>= 1;
155
156 det_sign = 0;
157
158 while (u != v)
159 {
160 unsigned count;
161 mp_limb_t d = u - v;
162 mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
163 mp_limb_t sx;
164 mp_limb_t tx;
165
166 /* When v <= u (vgtu == 0), the updates are:
167
168 (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor)
169 (t1, t0) <-- (t1 << count, t0 + t1)
170
171 and when v > 0, the updates are
172
173 (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count))
174 (t1, t0) <-- (t0 << count, t0 + t1)
175
176 and similarly for s1, s0
177 */
178
179 /* v <-- min (u, v) */
180 v += (vgtu & d);
181
182 /* u <-- |u - v| */
183 u = (d ^ vgtu) - vgtu;
184
185 /* Number of trailing zeros is the same no matter if we look at
186 * d or u, but using d gives more parallelism. */
187 #if USE_ZEROTAB
188 count = zerotab[d & 0x3f];
189 if (UNLIKELY (count == 6))
190 {
191 unsigned c = 6;
192 do
193 {
194 d >>= c;
195 c = zerotab[d & 0x3f];
196 count += c;
197 }
198 while (c == 6);
199 }
200 #else
201 count_trailing_zeros (count, d);
202 #endif
203 det_sign ^= vgtu;
204
205 tx = vgtu & (t0 - t1);
206 sx = vgtu & (s0 - s1);
207 t0 += t1;
208 s0 += s1;
209 t1 += tx;
210 s1 += sx;
211
212 count++;
213 u >>= count;
214 t1 <<= count;
215 s1 <<= count;
216 shift += count;
217 }
218 u = (u << 1) + 1;
219 # else /* GCDEXT_1_BINARY_METHOD == 2 */
220 # error Unknown GCDEXT_1_BINARY_METHOD
221 # endif
222 #endif
223
224 /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
225 ug = t0 + t1;
226 vg = s0 + s1;
227
228 ugh = ug/2 + (ug & 1);
229 vgh = vg/2 + (vg & 1);
230
231 /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
232 s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
233 for (i = 0; i < shift; i++)
234 {
235 mp_limb_t mask = - ( (s0 | t0) & 1);
236
237 s0 /= 2;
238 t0 /= 2;
239 s0 += mask & vgh;
240 t0 += mask & ugh;
241 }
242 /* FIXME: Try simplifying this condition. */
243 if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
244 {
245 s0 -= vg;
246 t0 -= ug;
247 }
248 #if GCDEXT_1_BINARY_METHOD == 2
249 /* Conditional negation. */
250 s0 = (s0 ^ det_sign) - det_sign;
251 t0 = (t0 ^ det_sign) - det_sign;
252 #endif
253 *sp = s0;
254 *tp = -t0;
255
256 return u << zero_bits;
257 }
258
259 #else /* !GCDEXT_1_USE_BINARY */
260
261
262 /* FIXME: Takes two single-word limbs. It could be extended to a
263 * function that accepts a bignum for the first input, and only
264 * returns the first co-factor. */
265
266 mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * up,mp_limb_signed_t * vp,mp_limb_t a,mp_limb_t b)267 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
268 mp_limb_t a, mp_limb_t b)
269 {
270 /* Maintain
271
272 a = u0 A + v0 B
273 b = u1 A + v1 B
274
275 where A, B are the original inputs.
276 */
277 mp_limb_signed_t u0 = 1;
278 mp_limb_signed_t v0 = 0;
279 mp_limb_signed_t u1 = 0;
280 mp_limb_signed_t v1 = 1;
281
282 ASSERT (a > 0);
283 ASSERT (b > 0);
284
285 if (a < b)
286 goto divide_by_b;
287
288 for (;;)
289 {
290 mp_limb_t q;
291
292 q = a / b;
293 a -= q * b;
294
295 if (a == 0)
296 {
297 *up = u1;
298 *vp = v1;
299 return b;
300 }
301 u0 -= q * u1;
302 v0 -= q * v1;
303
304 divide_by_b:
305 q = b / a;
306 b -= q * a;
307
308 if (b == 0)
309 {
310 *up = u0;
311 *vp = v0;
312 return a;
313 }
314 u1 -= q * u0;
315 v1 -= q * v0;
316 }
317 }
318 #endif /* !GCDEXT_1_USE_BINARY */
319