1 /* mpn_gcdext -- Extended Greatest Common Divisor.
2
3 Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
4
5 This file is part of the GNU MP Library.
6
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9
10 * the GNU Lesser General Public License as published by the Free
11 Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13
14 or
15
16 * the GNU General Public License as published by the Free Software
17 Foundation; either version 2 of the License, or (at your option) any
18 later version.
19
20 or both in parallel, as here.
21
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
25 for more details.
26
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library. If not,
29 see https://www.gnu.org/licenses/. */
30
31 #include "gmp-impl.h"
32 #include "longlong.h"
33
34 #ifndef GCDEXT_1_USE_BINARY
35 #define GCDEXT_1_USE_BINARY 0
36 #endif
37
38 #ifndef GCDEXT_1_BINARY_METHOD
39 #define GCDEXT_1_BINARY_METHOD 2
40 #endif
41
42 #if GCDEXT_1_USE_BINARY
43
44 mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * sp,mp_limb_signed_t * tp,mp_limb_t u,mp_limb_t v)45 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
46 mp_limb_t u, mp_limb_t v)
47 {
48 /* Maintain
49
50 U = t1 u + t0 v
51 V = s1 u + s0 v
52
53 where U, V are the inputs (without any shared power of two),
54 and the matrix has determinant � 2^{shift}.
55 */
56 mp_limb_t s0 = 1;
57 mp_limb_t t0 = 0;
58 mp_limb_t s1 = 0;
59 mp_limb_t t1 = 1;
60 mp_limb_t ug;
61 mp_limb_t vg;
62 mp_limb_t ugh;
63 mp_limb_t vgh;
64 unsigned zero_bits;
65 unsigned shift;
66 unsigned i;
67 #if GCDEXT_1_BINARY_METHOD == 2
68 mp_limb_t det_sign;
69 #endif
70
71 ASSERT (u > 0);
72 ASSERT (v > 0);
73
74 count_trailing_zeros (zero_bits, u | v);
75 u >>= zero_bits;
76 v >>= zero_bits;
77
78 if ((u & 1) == 0)
79 {
80 count_trailing_zeros (shift, u);
81 u >>= shift;
82 t1 <<= shift;
83 }
84 else if ((v & 1) == 0)
85 {
86 count_trailing_zeros (shift, v);
87 v >>= shift;
88 s0 <<= shift;
89 }
90 else
91 shift = 0;
92
93 #if GCDEXT_1_BINARY_METHOD == 1
94 while (u != v)
95 {
96 unsigned count;
97 if (u > v)
98 {
99 u -= v;
100
101 count_trailing_zeros (count, u);
102 u >>= count;
103
104 t0 += t1; t1 <<= count;
105 s0 += s1; s1 <<= count;
106 }
107 else
108 {
109 v -= u;
110
111 count_trailing_zeros (count, v);
112 v >>= count;
113
114 t1 += t0; t0 <<= count;
115 s1 += s0; s0 <<= count;
116 }
117 shift += count;
118 }
119 #else
120 # if GCDEXT_1_BINARY_METHOD == 2
121 u >>= 1;
122 v >>= 1;
123
124 det_sign = 0;
125
126 while (u != v)
127 {
128 unsigned count;
129 mp_limb_t d = u - v;
130 mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
131 mp_limb_t sx;
132 mp_limb_t tx;
133
134 /* When v <= u (vgtu == 0), the updates are:
135
136 (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor)
137 (t1, t0) <-- (t1 << count, t0 + t1)
138
139 and when v > 0, the updates are
140
141 (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count))
142 (t1, t0) <-- (t0 << count, t0 + t1)
143
144 and similarly for s1, s0
145 */
146
147 /* v <-- min (u, v) */
148 v += (vgtu & d);
149
150 /* u <-- |u - v| */
151 u = (d ^ vgtu) - vgtu;
152
153 /* Number of trailing zeros is the same no matter if we look at
154 * d or u, but using d gives more parallelism. */
155 count_trailing_zeros (count, d);
156
157 det_sign ^= vgtu;
158
159 tx = vgtu & (t0 - t1);
160 sx = vgtu & (s0 - s1);
161 t0 += t1;
162 s0 += s1;
163 t1 += tx;
164 s1 += sx;
165
166 count++;
167 u >>= count;
168 t1 <<= count;
169 s1 <<= count;
170 shift += count;
171 }
172 u = (u << 1) + 1;
173 # else /* GCDEXT_1_BINARY_METHOD == 2 */
174 # error Unknown GCDEXT_1_BINARY_METHOD
175 # endif
176 #endif
177
178 /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
179 ug = t0 + t1;
180 vg = s0 + s1;
181
182 ugh = ug/2 + (ug & 1);
183 vgh = vg/2 + (vg & 1);
184
185 /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
186 s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
187 for (i = 0; i < shift; i++)
188 {
189 mp_limb_t mask = - ( (s0 | t0) & 1);
190
191 s0 /= 2;
192 t0 /= 2;
193 s0 += mask & vgh;
194 t0 += mask & ugh;
195 }
196
197 ASSERT_ALWAYS (s0 <= vg);
198 ASSERT_ALWAYS (t0 <= ug);
199
200 if (s0 > vg - s0)
201 {
202 s0 -= vg;
203 t0 -= ug;
204 }
205 #if GCDEXT_1_BINARY_METHOD == 2
206 /* Conditional negation. */
207 s0 = (s0 ^ det_sign) - det_sign;
208 t0 = (t0 ^ det_sign) - det_sign;
209 #endif
210 *sp = s0;
211 *tp = -t0;
212
213 return u << zero_bits;
214 }
215
216 #else /* !GCDEXT_1_USE_BINARY */
217
218
219 /* FIXME: Takes two single-word limbs. It could be extended to a
220 * function that accepts a bignum for the first input, and only
221 * returns the first co-factor. */
222
223 mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * up,mp_limb_signed_t * vp,mp_limb_t a,mp_limb_t b)224 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
225 mp_limb_t a, mp_limb_t b)
226 {
227 /* Maintain
228
229 a = u0 A + v0 B
230 b = u1 A + v1 B
231
232 where A, B are the original inputs.
233 */
234 mp_limb_signed_t u0 = 1;
235 mp_limb_signed_t v0 = 0;
236 mp_limb_signed_t u1 = 0;
237 mp_limb_signed_t v1 = 1;
238
239 ASSERT (a > 0);
240 ASSERT (b > 0);
241
242 if (a < b)
243 goto divide_by_b;
244
245 for (;;)
246 {
247 mp_limb_t q;
248
249 q = a / b;
250 a -= q * b;
251
252 if (a == 0)
253 {
254 *up = u1;
255 *vp = v1;
256 return b;
257 }
258 u0 -= q * u1;
259 v0 -= q * v1;
260
261 divide_by_b:
262 q = b / a;
263 b -= q * a;
264
265 if (b == 0)
266 {
267 *up = u0;
268 *vp = v0;
269 return a;
270 }
271 u1 -= q * u0;
272 v1 -= q * v0;
273 }
274 }
275 #endif /* !GCDEXT_1_USE_BINARY */
276