xref: /dragonfly/contrib/gmp/mpn/generic/gcdext_1.c (revision f2c43266)
1 /* mpn_gcdext -- Extended Greatest Common Divisor.
2 
3 Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
4 Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20 
21 #include "gmp.h"
22 #include "gmp-impl.h"
23 #include "longlong.h"
24 
25 #ifndef GCDEXT_1_USE_BINARY
26 #define GCDEXT_1_USE_BINARY 0
27 #endif
28 
29 #ifndef GCDEXT_1_BINARY_METHOD
30 #define GCDEXT_1_BINARY_METHOD 2
31 #endif
32 
33 #ifndef USE_ZEROTAB
34 #define USE_ZEROTAB 1
35 #endif
36 
37 #if GCDEXT_1_USE_BINARY
38 
39 #if USE_ZEROTAB
40 static unsigned char zerotab[0x40] = {
41   6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
42   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
43   5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
44   4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
45 };
46 #endif
47 
48 mp_limb_t
49 mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
50 	      mp_limb_t u, mp_limb_t v)
51 {
52   /* Maintain
53 
54      U = t1 u + t0 v
55      V = s1 u + s0 v
56 
57      where U, V are the inputs (without any shared power of two),
58      and the matris has determinant � 2^{shift}.
59   */
60   mp_limb_t s0 = 1;
61   mp_limb_t t0 = 0;
62   mp_limb_t s1 = 0;
63   mp_limb_t t1 = 1;
64   mp_limb_t ug;
65   mp_limb_t vg;
66   mp_limb_t ugh;
67   mp_limb_t vgh;
68   unsigned zero_bits;
69   unsigned shift;
70   unsigned i;
71 #if GCDEXT_1_BINARY_METHOD == 2
72   mp_limb_t det_sign;
73 #endif
74 
75   ASSERT (u > 0);
76   ASSERT (v > 0);
77 
78   count_trailing_zeros (zero_bits, u | v);
79   u >>= zero_bits;
80   v >>= zero_bits;
81 
82   if ((u & 1) == 0)
83     {
84       count_trailing_zeros (shift, u);
85       u >>= shift;
86       t1 <<= shift;
87     }
88   else if ((v & 1) == 0)
89     {
90       count_trailing_zeros (shift, v);
91       v >>= shift;
92       s0 <<= shift;
93     }
94   else
95     shift = 0;
96 
97 #if GCDEXT_1_BINARY_METHOD == 1
98   while (u != v)
99     {
100       unsigned count;
101       if (u > v)
102 	{
103 	  u -= v;
104 #if USE_ZEROTAB
105 	  count = zerotab [u & 0x3f];
106 	  u >>= count;
107 	  if (UNLIKELY (count == 6))
108 	    {
109 	      unsigned c;
110 	      do
111 		{
112 		  c = zerotab[u & 0x3f];
113 		  u >>= c;
114 		  count += c;
115 		}
116 	      while (c == 6);
117 	    }
118 #else
119 	  count_trailing_zeros (count, u);
120 	  u >>= count;
121 #endif
122 	  t0 += t1; t1 <<= count;
123 	  s0 += s1; s1 <<= count;
124 	}
125       else
126 	{
127 	  v -= u;
128 #if USE_ZEROTAB
129 	  count = zerotab [v & 0x3f];
130 	  v >>= count;
131 	  if (UNLIKELY (count == 6))
132 	    {
133 	      unsigned c;
134 	      do
135 		{
136 		  c = zerotab[v & 0x3f];
137 		  v >>= c;
138 		  count += c;
139 		}
140 	      while (c == 6);
141 	    }
142 #else
143 	  count_trailing_zeros (count, v);
144 	  v >>= count;
145 #endif
146 	  t1 += t0; t0 <<= count;
147 	  s1 += s0; s0 <<= count;
148 	}
149       shift += count;
150     }
151 #else
152 # if GCDEXT_1_BINARY_METHOD == 2
153   u >>= 1;
154   v >>= 1;
155 
156   det_sign = 0;
157 
158   while (u != v)
159     {
160       unsigned count;
161       mp_limb_t d =  u - v;
162       mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
163       mp_limb_t sx;
164       mp_limb_t tx;
165 
166       /* When v <= u (vgtu == 0), the updates are:
167 
168 	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
169 	   (t1, t0) <-- (t1 << count, t0 + t1)
170 
171 	 and when v > 0, the updates are
172 
173 	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
174 	   (t1, t0) <-- (t0 << count, t0 + t1)
175 
176 	 and similarly for s1, s0
177       */
178 
179       /* v <-- min (u, v) */
180       v += (vgtu & d);
181 
182       /* u <-- |u - v| */
183       u = (d ^ vgtu) - vgtu;
184 
185       /* Number of trailing zeros is the same no matter if we look at
186        * d or u, but using d gives more parallelism. */
187 #if USE_ZEROTAB
188       count = zerotab[d & 0x3f];
189       if (UNLIKELY (count == 6))
190 	{
191 	  unsigned c = 6;
192 	  do
193 	    {
194 	      d >>= c;
195 	      c = zerotab[d & 0x3f];
196 	      count += c;
197 	    }
198 	  while (c == 6);
199 	}
200 #else
201       count_trailing_zeros (count, d);
202 #endif
203       det_sign ^= vgtu;
204 
205       tx = vgtu & (t0 - t1);
206       sx = vgtu & (s0 - s1);
207       t0 += t1;
208       s0 += s1;
209       t1 += tx;
210       s1 += sx;
211 
212       count++;
213       u >>= count;
214       t1 <<= count;
215       s1 <<= count;
216       shift += count;
217     }
218   u = (u << 1) + 1;
219 # else /* GCDEXT_1_BINARY_METHOD == 2 */
220 #  error Unknown GCDEXT_1_BINARY_METHOD
221 # endif
222 #endif
223 
224   /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
225   ug = t0 + t1;
226   vg = s0 + s1;
227 
228   ugh = ug/2 + (ug & 1);
229   vgh = vg/2 + (vg & 1);
230 
231   /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
232      s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
233   for (i = 0; i < shift; i++)
234     {
235       mp_limb_t mask = - ( (s0 | t0) & 1);
236 
237       s0 /= 2;
238       t0 /= 2;
239       s0 += mask & vgh;
240       t0 += mask & ugh;
241     }
242   /* FIXME: Try simplifying this condition. */
243   if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
244     {
245       s0 -= vg;
246       t0 -= ug;
247     }
248 #if GCDEXT_1_BINARY_METHOD == 2
249   /* Conditional negation. */
250   s0 = (s0 ^ det_sign) - det_sign;
251   t0 = (t0 ^ det_sign) - det_sign;
252 #endif
253   *sp = s0;
254   *tp = -t0;
255 
256   return u << zero_bits;
257 }
258 
259 #else /* !GCDEXT_1_USE_BINARY */
260 
261 
262 /* FIXME: Takes two single-word limbs. It could be extended to a
263  * function that accepts a bignum for the first input, and only
264  * returns the first co-factor. */
265 
266 mp_limb_t
267 mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
268 	      mp_limb_t a, mp_limb_t b)
269 {
270   /* Maintain
271 
272      a =  u0 A + v0 B
273      b =  u1 A + v1 B
274 
275      where A, B are the original inputs.
276   */
277   mp_limb_signed_t u0 = 1;
278   mp_limb_signed_t v0 = 0;
279   mp_limb_signed_t u1 = 0;
280   mp_limb_signed_t v1 = 1;
281 
282   ASSERT (a > 0);
283   ASSERT (b > 0);
284 
285   if (a < b)
286     goto divide_by_b;
287 
288   for (;;)
289     {
290       mp_limb_t q;
291 
292       q = a / b;
293       a -= q * b;
294 
295       if (a == 0)
296 	{
297 	  *up = u1;
298 	  *vp = v1;
299 	  return b;
300 	}
301       u0 -= q * u1;
302       v0 -= q * v1;
303 
304     divide_by_b:
305       q = b / a;
306       b -= q * a;
307 
308       if (b == 0)
309 	{
310 	  *up = u0;
311 	  *vp = v0;
312 	  return a;
313 	}
314       u1 -= q * u0;
315       v1 -= q * v0;
316     }
317 }
318 #endif /* !GCDEXT_1_USE_BINARY */
319