1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2 
3 Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
4 Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
10 
11   * the GNU Lesser General Public License as published by the Free
12     Software Foundation; either version 3 of the License, or (at your
13     option) any later version.
14 
15 or
16 
17   * the GNU General Public License as published by the Free Software
18     Foundation; either version 2 of the License, or (at your option) any
19     later version.
20 
21 or both in parallel, as here.
22 
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26 for more details.
27 
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library.  If not,
30 see https://www.gnu.org/licenses/.  */
31 
32 #include "gmp.h"
33 #include "gmp-impl.h"
34 #include "longlong.h"
35 
36 /* Uses the HGCD operation described in
37 
38      N. Möller, On Schönhage's algorithm and subquadratic integer gcd
39      computation, Math. Comp. 77 (2008), 589-607.
40 
41   to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
42   then uses Lehmer's algorithm.
43 */
44 
45 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
46  * 2)/3, which gives a balanced multiplication in
47  * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
48  * performance. The matrix-vector multiplication is then
49  * 4:1-unbalanced, with matrix elements of size n/6, and vector
50  * elements of size p = 2n/3. */
51 
52 /* From analysis of the theoretical running time, it appears that when
53  * multiplication takes time O(n^alpha), p should be chosen so that
54  * the ratio of the time for the mpn_hgcd call, and the time for the
55  * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
56  * 1). */
57 #ifdef TUNE_GCD_P
58 #define P_TABLE_SIZE 10000
59 mp_size_t p_table[P_TABLE_SIZE];
60 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
61 #else
62 #define CHOOSE_P(n) (2*(n) / 3)
63 #endif
64 
65 struct gcd_ctx
66 {
67   mp_ptr gp;
68   mp_size_t gn;
69 };
70 
71 static void
gcd_hook(void * p,mp_srcptr gp,mp_size_t gn,mp_srcptr qp,mp_size_t qn,int d)72 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
73 	  mp_srcptr qp, mp_size_t qn, int d)
74 {
75   struct gcd_ctx *ctx = (struct gcd_ctx *) p;
76   MPN_COPY (ctx->gp, gp, gn);
77   ctx->gn = gn;
78 }
79 
80 #if GMP_NAIL_BITS > 0
81 /* Nail supports should be easy, replacing the sub_ddmmss with nails
82  * logic. */
83 #error Nails not supported.
84 #endif
85 
86 /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
87    Both U and V must be odd. */
88 static inline mp_size_t
gcd_2(mp_ptr gp,mp_srcptr up,mp_srcptr vp)89 gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
90 {
91   mp_limb_t u0, u1, v0, v1;
92   mp_size_t gn;
93 
94   u0 = up[0];
95   u1 = up[1];
96   v0 = vp[0];
97   v1 = vp[1];
98 
99   ASSERT (u0 & 1);
100   ASSERT (v0 & 1);
101 
102   /* Check for u0 != v0 needed to ensure that argument to
103    * count_trailing_zeros is non-zero. */
104   while (u1 != v1 && u0 != v0)
105     {
106       unsigned long int r;
107       if (u1 > v1)
108 	{
109 	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
110 	  count_trailing_zeros (r, u0);
111 	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
112 	  u1 >>= r;
113 	}
114       else  /* u1 < v1.  */
115 	{
116 	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
117 	  count_trailing_zeros (r, v0);
118 	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
119 	  v1 >>= r;
120 	}
121     }
122 
123   gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
124 
125   /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
126   if (u1 == v1 && u0 == v0)
127     return gn;
128 
129   v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
130   gp[0] = mpn_gcd_1 (gp, gn, v0);
131 
132   return 1;
133 }
134 
135 mp_size_t
mpn_gcd(mp_ptr gp,mp_ptr up,mp_size_t usize,mp_ptr vp,mp_size_t n)136 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
137 {
138   mp_size_t talloc;
139   mp_size_t scratch;
140   mp_size_t matrix_scratch;
141 
142   struct gcd_ctx ctx;
143   mp_ptr tp;
144   TMP_DECL;
145 
146   ASSERT (usize >= n);
147   ASSERT (n > 0);
148   ASSERT (vp[n-1] > 0);
149 
150   /* FIXME: Check for small sizes first, before setting up temporary
151      storage etc. */
152   talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
153 
154   /* For initial division */
155   scratch = usize - n + 1;
156   if (scratch > talloc)
157     talloc = scratch;
158 
159 #if TUNE_GCD_P
160   if (CHOOSE_P (n) > 0)
161 #else
162   if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
163 #endif
164     {
165       mp_size_t hgcd_scratch;
166       mp_size_t update_scratch;
167       mp_size_t p = CHOOSE_P (n);
168       mp_size_t scratch;
169 #if TUNE_GCD_P
170       /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
171 	 is increasing */
172       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
173       hgcd_scratch = mpn_hgcd_itch (n);
174       update_scratch = 2*(n - 1);
175 #else
176       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
177       hgcd_scratch = mpn_hgcd_itch (n - p);
178       update_scratch = p + n - 1;
179 #endif
180       scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
181       if (scratch > talloc)
182 	talloc = scratch;
183     }
184 
185   TMP_MARK;
186   tp = TMP_ALLOC_LIMBS(talloc);
187 
188   if (usize > n)
189     {
190       mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
191 
192       if (mpn_zero_p (up, n))
193 	{
194 	  MPN_COPY (gp, vp, n);
195 	  ctx.gn = n;
196 	  goto done;
197 	}
198     }
199 
200   ctx.gp = gp;
201 
202 #if TUNE_GCD_P
203   while (CHOOSE_P (n) > 0)
204 #else
205   while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
206 #endif
207     {
208       struct hgcd_matrix M;
209       mp_size_t p = CHOOSE_P (n);
210       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
211       mp_size_t nn;
212       mpn_hgcd_matrix_init (&M, n - p, tp);
213       nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
214       if (nn > 0)
215 	{
216 	  ASSERT (M.n <= (n - p - 1)/2);
217 	  ASSERT (M.n + p <= (p + n - 1) / 2);
218 	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
219 	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
220 	}
221       else
222 	{
223 	  /* Temporary storage n */
224 	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
225 	  if (n == 0)
226 	    goto done;
227 	}
228     }
229 
230   while (n > 2)
231     {
232       struct hgcd_matrix1 M;
233       mp_limb_t uh, ul, vh, vl;
234       mp_limb_t mask;
235 
236       mask = up[n-1] | vp[n-1];
237       ASSERT (mask > 0);
238 
239       if (mask & GMP_NUMB_HIGHBIT)
240 	{
241 	  uh = up[n-1]; ul = up[n-2];
242 	  vh = vp[n-1]; vl = vp[n-2];
243 	}
244       else
245 	{
246 	  int shift;
247 
248 	  count_leading_zeros (shift, mask);
249 	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
250 	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
251 	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
252 	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
253 	}
254 
255       /* Try an mpn_hgcd2 step */
256       if (mpn_hgcd2 (uh, ul, vh, vl, &M))
257 	{
258 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
259 	  MP_PTR_SWAP (up, tp);
260 	}
261       else
262 	{
263 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
264 	     small, or the difference is very small. Perform one
265 	     subtraction followed by one division. */
266 
267 	  /* Temporary storage n */
268 	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
269 	  if (n == 0)
270 	    goto done;
271 	}
272     }
273 
274   ASSERT(up[n-1] | vp[n-1]);
275 
276   if (n == 1)
277     {
278       *gp = mpn_gcd_1(up, 1, vp[0]);
279       ctx.gn = 1;
280       goto done;
281     }
282 
283   /* Due to the calling convention for mpn_gcd, at most one can be
284      even. */
285 
286   if (! (up[0] & 1))
287     MP_PTR_SWAP (up, vp);
288 
289   ASSERT (up[0] & 1);
290 
291   if (vp[0] == 0)
292     {
293       *gp = mpn_gcd_1 (up, 2, vp[1]);
294       ctx.gn = 1;
295       goto done;
296     }
297   else if (! (vp[0] & 1))
298     {
299       int r;
300       count_trailing_zeros (r, vp[0]);
301       vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
302       vp[1] >>= r;
303     }
304 
305   ctx.gn = gcd_2(gp, up, vp);
306 
307 done:
308   TMP_FREE;
309   return ctx.gn;
310 }
311