1 /* mpn_gcdext -- Extended Greatest Common Divisor.
2 
3 Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9 
10   * the GNU Lesser General Public License as published by the Free
11     Software Foundation; either version 3 of the License, or (at your
12     option) any later version.
13 
14 or
15 
16   * the GNU General Public License as published by the Free Software
17     Foundation; either version 2 of the License, or (at your option) any
18     later version.
19 
20 or both in parallel, as here.
21 
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 for more details.
26 
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library.  If not,
29 see https://www.gnu.org/licenses/.  */
30 
31 #include "gmp-impl.h"
32 #include "longlong.h"
33 
34 #ifndef GCDEXT_1_USE_BINARY
35 #define GCDEXT_1_USE_BINARY 0
36 #endif
37 
38 #ifndef GCDEXT_1_BINARY_METHOD
39 #define GCDEXT_1_BINARY_METHOD 2
40 #endif
41 
42 #if GCDEXT_1_USE_BINARY
43 
44 mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * sp,mp_limb_signed_t * tp,mp_limb_t u,mp_limb_t v)45 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
46 	      mp_limb_t u, mp_limb_t v)
47 {
48   /* Maintain
49 
50      U = t1 u + t0 v
51      V = s1 u + s0 v
52 
53      where U, V are the inputs (without any shared power of two),
54      and the matrix has determinant � 2^{shift}.
55   */
56   mp_limb_t s0 = 1;
57   mp_limb_t t0 = 0;
58   mp_limb_t s1 = 0;
59   mp_limb_t t1 = 1;
60   mp_limb_t ug;
61   mp_limb_t vg;
62   mp_limb_t ugh;
63   mp_limb_t vgh;
64   unsigned zero_bits;
65   unsigned shift;
66   unsigned i;
67 #if GCDEXT_1_BINARY_METHOD == 2
68   mp_limb_t det_sign;
69 #endif
70 
71   ASSERT (u > 0);
72   ASSERT (v > 0);
73 
74   count_trailing_zeros (zero_bits, u | v);
75   u >>= zero_bits;
76   v >>= zero_bits;
77 
78   if ((u & 1) == 0)
79     {
80       count_trailing_zeros (shift, u);
81       u >>= shift;
82       t1 <<= shift;
83     }
84   else if ((v & 1) == 0)
85     {
86       count_trailing_zeros (shift, v);
87       v >>= shift;
88       s0 <<= shift;
89     }
90   else
91     shift = 0;
92 
93 #if GCDEXT_1_BINARY_METHOD == 1
94   while (u != v)
95     {
96       unsigned count;
97       if (u > v)
98 	{
99 	  u -= v;
100 
101 	  count_trailing_zeros (count, u);
102 	  u >>= count;
103 
104 	  t0 += t1; t1 <<= count;
105 	  s0 += s1; s1 <<= count;
106 	}
107       else
108 	{
109 	  v -= u;
110 
111 	  count_trailing_zeros (count, v);
112 	  v >>= count;
113 
114 	  t1 += t0; t0 <<= count;
115 	  s1 += s0; s0 <<= count;
116 	}
117       shift += count;
118     }
119 #else
120 # if GCDEXT_1_BINARY_METHOD == 2
121   u >>= 1;
122   v >>= 1;
123 
124   det_sign = 0;
125 
126   while (u != v)
127     {
128       unsigned count;
129       mp_limb_t d =  u - v;
130       mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
131       mp_limb_t sx;
132       mp_limb_t tx;
133 
134       /* When v <= u (vgtu == 0), the updates are:
135 
136 	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
137 	   (t1, t0) <-- (t1 << count, t0 + t1)
138 
139 	 and when v > 0, the updates are
140 
141 	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
142 	   (t1, t0) <-- (t0 << count, t0 + t1)
143 
144 	 and similarly for s1, s0
145       */
146 
147       /* v <-- min (u, v) */
148       v += (vgtu & d);
149 
150       /* u <-- |u - v| */
151       u = (d ^ vgtu) - vgtu;
152 
153       /* Number of trailing zeros is the same no matter if we look at
154        * d or u, but using d gives more parallelism. */
155       count_trailing_zeros (count, d);
156 
157       det_sign ^= vgtu;
158 
159       tx = vgtu & (t0 - t1);
160       sx = vgtu & (s0 - s1);
161       t0 += t1;
162       s0 += s1;
163       t1 += tx;
164       s1 += sx;
165 
166       count++;
167       u >>= count;
168       t1 <<= count;
169       s1 <<= count;
170       shift += count;
171     }
172   u = (u << 1) + 1;
173 # else /* GCDEXT_1_BINARY_METHOD == 2 */
174 #  error Unknown GCDEXT_1_BINARY_METHOD
175 # endif
176 #endif
177 
178   /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
179   ug = t0 + t1;
180   vg = s0 + s1;
181 
182   ugh = ug/2 + (ug & 1);
183   vgh = vg/2 + (vg & 1);
184 
185   /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
186      s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
187   for (i = 0; i < shift; i++)
188     {
189       mp_limb_t mask = - ( (s0 | t0) & 1);
190 
191       s0 /= 2;
192       t0 /= 2;
193       s0 += mask & vgh;
194       t0 += mask & ugh;
195     }
196 
197   ASSERT_ALWAYS (s0 <= vg);
198   ASSERT_ALWAYS (t0 <= ug);
199 
200   if (s0 > vg - s0)
201     {
202       s0 -= vg;
203       t0 -= ug;
204     }
205 #if GCDEXT_1_BINARY_METHOD == 2
206   /* Conditional negation. */
207   s0 = (s0 ^ det_sign) - det_sign;
208   t0 = (t0 ^ det_sign) - det_sign;
209 #endif
210   *sp = s0;
211   *tp = -t0;
212 
213   return u << zero_bits;
214 }
215 
216 #else /* !GCDEXT_1_USE_BINARY */
217 
218 
219 /* FIXME: Takes two single-word limbs. It could be extended to a
220  * function that accepts a bignum for the first input, and only
221  * returns the first co-factor. */
222 
223 mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * up,mp_limb_signed_t * vp,mp_limb_t a,mp_limb_t b)224 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
225 	      mp_limb_t a, mp_limb_t b)
226 {
227   /* Maintain
228 
229      a =  u0 A + v0 B
230      b =  u1 A + v1 B
231 
232      where A, B are the original inputs.
233   */
234   mp_limb_signed_t u0 = 1;
235   mp_limb_signed_t v0 = 0;
236   mp_limb_signed_t u1 = 0;
237   mp_limb_signed_t v1 = 1;
238 
239   ASSERT (a > 0);
240   ASSERT (b > 0);
241 
242   if (a < b)
243     goto divide_by_b;
244 
245   for (;;)
246     {
247       mp_limb_t q;
248 
249       q = a / b;
250       a -= q * b;
251 
252       if (a == 0)
253 	{
254 	  *up = u1;
255 	  *vp = v1;
256 	  return b;
257 	}
258       u0 -= q * u1;
259       v0 -= q * v1;
260 
261     divide_by_b:
262       q = b / a;
263       b -= q * a;
264 
265       if (b == 0)
266 	{
267 	  *up = u0;
268 	  *vp = v0;
269 	  return a;
270 	}
271       u1 -= q * u0;
272       v1 -= q * v0;
273     }
274 }
275 #endif /* !GCDEXT_1_USE_BINARY */
276