1 /* mpn_gcdext -- Extended Greatest Common Divisor.
2 
3 Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of either:
9 
10   * the GNU Lesser General Public License as published by the Free
11     Software Foundation; either version 3 of the License, or (at your
12     option) any later version.
13 
14 or
15 
16   * the GNU General Public License as published by the Free Software
17     Foundation; either version 2 of the License, or (at your option) any
18     later version.
19 
20 or both in parallel, as here.
21 
22 The GNU MP Library is distributed in the hope that it will be useful, but
23 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25 for more details.
26 
27 You should have received copies of the GNU General Public License and the
28 GNU Lesser General Public License along with the GNU MP Library.  If not,
29 see https://www.gnu.org/licenses/.  */
30 
31 #include "gmp.h"
32 #include "gmp-impl.h"
33 #include "longlong.h"
34 
35 #ifndef GCDEXT_1_USE_BINARY
36 #define GCDEXT_1_USE_BINARY 0
37 #endif
38 
39 #ifndef GCDEXT_1_BINARY_METHOD
40 #define GCDEXT_1_BINARY_METHOD 2
41 #endif
42 
43 #ifndef USE_ZEROTAB
44 #define USE_ZEROTAB 1
45 #endif
46 
47 #if GCDEXT_1_USE_BINARY
48 
49 #if USE_ZEROTAB
50 static unsigned char zerotab[0x40] = {
51   6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
52   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
53   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
54   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
55 };
56 #endif
57 
58 mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * sp,mp_limb_signed_t * tp,mp_limb_t u,mp_limb_t v)59 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
60 	      mp_limb_t u, mp_limb_t v)
61 {
62   /* Maintain
63 
64      U = t1 u + t0 v
65      V = s1 u + s0 v
66 
67      where U, V are the inputs (without any shared power of two),
68      and the matrix has determinant � 2^{shift}.
69   */
70   mp_limb_t s0 = 1;
71   mp_limb_t t0 = 0;
72   mp_limb_t s1 = 0;
73   mp_limb_t t1 = 1;
74   mp_limb_t ug;
75   mp_limb_t vg;
76   mp_limb_t ugh;
77   mp_limb_t vgh;
78   unsigned zero_bits;
79   unsigned shift;
80   unsigned i;
81 #if GCDEXT_1_BINARY_METHOD == 2
82   mp_limb_t det_sign;
83 #endif
84 
85   ASSERT (u > 0);
86   ASSERT (v > 0);
87 
88   count_trailing_zeros (zero_bits, u | v);
89   u >>= zero_bits;
90   v >>= zero_bits;
91 
92   if ((u & 1) == 0)
93     {
94       count_trailing_zeros (shift, u);
95       u >>= shift;
96       t1 <<= shift;
97     }
98   else if ((v & 1) == 0)
99     {
100       count_trailing_zeros (shift, v);
101       v >>= shift;
102       s0 <<= shift;
103     }
104   else
105     shift = 0;
106 
107 #if GCDEXT_1_BINARY_METHOD == 1
108   while (u != v)
109     {
110       unsigned count;
111       if (u > v)
112 	{
113 	  u -= v;
114 #if USE_ZEROTAB
115 	  count = zerotab [u & 0x3f];
116 	  u >>= count;
117 	  if (UNLIKELY (count == 6))
118 	    {
119 	      unsigned c;
120 	      do
121 		{
122 		  c = zerotab[u & 0x3f];
123 		  u >>= c;
124 		  count += c;
125 		}
126 	      while (c == 6);
127 	    }
128 #else
129 	  count_trailing_zeros (count, u);
130 	  u >>= count;
131 #endif
132 	  t0 += t1; t1 <<= count;
133 	  s0 += s1; s1 <<= count;
134 	}
135       else
136 	{
137 	  v -= u;
138 #if USE_ZEROTAB
139 	  count = zerotab [v & 0x3f];
140 	  v >>= count;
141 	  if (UNLIKELY (count == 6))
142 	    {
143 	      unsigned c;
144 	      do
145 		{
146 		  c = zerotab[v & 0x3f];
147 		  v >>= c;
148 		  count += c;
149 		}
150 	      while (c == 6);
151 	    }
152 #else
153 	  count_trailing_zeros (count, v);
154 	  v >>= count;
155 #endif
156 	  t1 += t0; t0 <<= count;
157 	  s1 += s0; s0 <<= count;
158 	}
159       shift += count;
160     }
161 #else
162 # if GCDEXT_1_BINARY_METHOD == 2
163   u >>= 1;
164   v >>= 1;
165 
166   det_sign = 0;
167 
168   while (u != v)
169     {
170       unsigned count;
171       mp_limb_t d =  u - v;
172       mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
173       mp_limb_t sx;
174       mp_limb_t tx;
175 
176       /* When v <= u (vgtu == 0), the updates are:
177 
178 	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
179 	   (t1, t0) <-- (t1 << count, t0 + t1)
180 
181 	 and when v > 0, the updates are
182 
183 	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
184 	   (t1, t0) <-- (t0 << count, t0 + t1)
185 
186 	 and similarly for s1, s0
187       */
188 
189       /* v <-- min (u, v) */
190       v += (vgtu & d);
191 
192       /* u <-- |u - v| */
193       u = (d ^ vgtu) - vgtu;
194 
195       /* Number of trailing zeros is the same no matter if we look at
196        * d or u, but using d gives more parallelism. */
197 #if USE_ZEROTAB
198       count = zerotab[d & 0x3f];
199       if (UNLIKELY (count == 6))
200 	{
201 	  unsigned c = 6;
202 	  do
203 	    {
204 	      d >>= c;
205 	      c = zerotab[d & 0x3f];
206 	      count += c;
207 	    }
208 	  while (c == 6);
209 	}
210 #else
211       count_trailing_zeros (count, d);
212 #endif
213       det_sign ^= vgtu;
214 
215       tx = vgtu & (t0 - t1);
216       sx = vgtu & (s0 - s1);
217       t0 += t1;
218       s0 += s1;
219       t1 += tx;
220       s1 += sx;
221 
222       count++;
223       u >>= count;
224       t1 <<= count;
225       s1 <<= count;
226       shift += count;
227     }
228   u = (u << 1) + 1;
229 # else /* GCDEXT_1_BINARY_METHOD == 2 */
230 #  error Unknown GCDEXT_1_BINARY_METHOD
231 # endif
232 #endif
233 
234   /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
235   ug = t0 + t1;
236   vg = s0 + s1;
237 
238   ugh = ug/2 + (ug & 1);
239   vgh = vg/2 + (vg & 1);
240 
241   /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
242      s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
243   for (i = 0; i < shift; i++)
244     {
245       mp_limb_t mask = - ( (s0 | t0) & 1);
246 
247       s0 /= 2;
248       t0 /= 2;
249       s0 += mask & vgh;
250       t0 += mask & ugh;
251     }
252   /* FIXME: Try simplifying this condition. */
253   if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
254     {
255       s0 -= vg;
256       t0 -= ug;
257     }
258 #if GCDEXT_1_BINARY_METHOD == 2
259   /* Conditional negation. */
260   s0 = (s0 ^ det_sign) - det_sign;
261   t0 = (t0 ^ det_sign) - det_sign;
262 #endif
263   *sp = s0;
264   *tp = -t0;
265 
266   return u << zero_bits;
267 }
268 
269 #else /* !GCDEXT_1_USE_BINARY */
270 
271 
272 /* FIXME: Takes two single-word limbs. It could be extended to a
273  * function that accepts a bignum for the first input, and only
274  * returns the first co-factor. */
275 
276 mp_limb_t
mpn_gcdext_1(mp_limb_signed_t * up,mp_limb_signed_t * vp,mp_limb_t a,mp_limb_t b)277 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
278 	      mp_limb_t a, mp_limb_t b)
279 {
280   /* Maintain
281 
282      a =  u0 A + v0 B
283      b =  u1 A + v1 B
284 
285      where A, B are the original inputs.
286   */
287   mp_limb_signed_t u0 = 1;
288   mp_limb_signed_t v0 = 0;
289   mp_limb_signed_t u1 = 0;
290   mp_limb_signed_t v1 = 1;
291 
292   ASSERT (a > 0);
293   ASSERT (b > 0);
294 
295   if (a < b)
296     goto divide_by_b;
297 
298   for (;;)
299     {
300       mp_limb_t q;
301 
302       q = a / b;
303       a -= q * b;
304 
305       if (a == 0)
306 	{
307 	  *up = u1;
308 	  *vp = v1;
309 	  return b;
310 	}
311       u0 -= q * u1;
312       v0 -= q * v1;
313 
314     divide_by_b:
315       q = b / a;
316       b -= q * a;
317 
318       if (b == 0)
319 	{
320 	  *up = u0;
321 	  *vp = v0;
322 	  return a;
323 	}
324       u1 -= q * u0;
325       v1 -= q * v0;
326     }
327 }
328 #endif /* !GCDEXT_1_USE_BINARY */
329