1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2 
3 Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012, 2019 Free Software
4 Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
10 
11   * the GNU Lesser General Public License as published by the Free
12     Software Foundation; either version 3 of the License, or (at your
13     option) any later version.
14 
15 or
16 
17   * the GNU General Public License as published by the Free Software
18     Foundation; either version 2 of the License, or (at your option) any
19     later version.
20 
21 or both in parallel, as here.
22 
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26 for more details.
27 
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library.  If not,
30 see https://www.gnu.org/licenses/.  */
31 
32 #include "gmp-impl.h"
33 #include "longlong.h"
34 
35 /* Uses the HGCD operation described in
36 
37      N. Möller, On Schönhage's algorithm and subquadratic integer gcd
38      computation, Math. Comp. 77 (2008), 589-607.
39 
40   to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
41   then uses Lehmer's algorithm.
42 */
43 
44 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
45  * 2)/3, which gives a balanced multiplication in
46  * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
47  * performance. The matrix-vector multiplication is then
48  * 4:1-unbalanced, with matrix elements of size n/6, and vector
49  * elements of size p = 2n/3. */
50 
51 /* From analysis of the theoretical running time, it appears that when
52  * multiplication takes time O(n^alpha), p should be chosen so that
53  * the ratio of the time for the mpn_hgcd call, and the time for the
54  * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
55  * 1). */
56 #ifdef TUNE_GCD_P
57 #define P_TABLE_SIZE 10000
58 mp_size_t p_table[P_TABLE_SIZE];
59 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
60 #else
61 #define CHOOSE_P(n) (2*(n) / 3)
62 #endif
63 
64 struct gcd_ctx
65 {
66   mp_ptr gp;
67   mp_size_t gn;
68 };
69 
70 static void
gcd_hook(void * p,mp_srcptr gp,mp_size_t gn,mp_srcptr qp,mp_size_t qn,int d)71 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
72 	  mp_srcptr qp, mp_size_t qn, int d)
73 {
74   struct gcd_ctx *ctx = (struct gcd_ctx *) p;
75   MPN_COPY (ctx->gp, gp, gn);
76   ctx->gn = gn;
77 }
78 
79 mp_size_t
mpn_gcd(mp_ptr gp,mp_ptr up,mp_size_t usize,mp_ptr vp,mp_size_t n)80 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
81 {
82   mp_size_t talloc;
83   mp_size_t scratch;
84   mp_size_t matrix_scratch;
85 
86   struct gcd_ctx ctx;
87   mp_ptr tp;
88   TMP_DECL;
89 
90   ASSERT (usize >= n);
91   ASSERT (n > 0);
92   ASSERT (vp[n-1] > 0);
93 
94   /* FIXME: Check for small sizes first, before setting up temporary
95      storage etc. */
96   talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
97 
98   /* For initial division */
99   scratch = usize - n + 1;
100   if (scratch > talloc)
101     talloc = scratch;
102 
103 #if TUNE_GCD_P
104   if (CHOOSE_P (n) > 0)
105 #else
106   if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
107 #endif
108     {
109       mp_size_t hgcd_scratch;
110       mp_size_t update_scratch;
111       mp_size_t p = CHOOSE_P (n);
112       mp_size_t scratch;
113 #if TUNE_GCD_P
114       /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
115 	 is increasing */
116       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
117       hgcd_scratch = mpn_hgcd_itch (n);
118       update_scratch = 2*(n - 1);
119 #else
120       matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
121       hgcd_scratch = mpn_hgcd_itch (n - p);
122       update_scratch = p + n - 1;
123 #endif
124       scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
125       if (scratch > talloc)
126 	talloc = scratch;
127     }
128 
129   TMP_MARK;
130   tp = TMP_ALLOC_LIMBS(talloc);
131 
132   if (usize > n)
133     {
134       mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
135 
136       if (mpn_zero_p (up, n))
137 	{
138 	  MPN_COPY (gp, vp, n);
139 	  ctx.gn = n;
140 	  goto done;
141 	}
142     }
143 
144   ctx.gp = gp;
145 
146 #if TUNE_GCD_P
147   while (CHOOSE_P (n) > 0)
148 #else
149   while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
150 #endif
151     {
152       struct hgcd_matrix M;
153       mp_size_t p = CHOOSE_P (n);
154       mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
155       mp_size_t nn;
156       mpn_hgcd_matrix_init (&M, n - p, tp);
157       nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
158       if (nn > 0)
159 	{
160 	  ASSERT (M.n <= (n - p - 1)/2);
161 	  ASSERT (M.n + p <= (p + n - 1) / 2);
162 	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
163 	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
164 	}
165       else
166 	{
167 	  /* Temporary storage n */
168 	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
169 	  if (n == 0)
170 	    goto done;
171 	}
172     }
173 
174   while (n > 2)
175     {
176       struct hgcd_matrix1 M;
177       mp_limb_t uh, ul, vh, vl;
178       mp_limb_t mask;
179 
180       mask = up[n-1] | vp[n-1];
181       ASSERT (mask > 0);
182 
183       if (mask & GMP_NUMB_HIGHBIT)
184 	{
185 	  uh = up[n-1]; ul = up[n-2];
186 	  vh = vp[n-1]; vl = vp[n-2];
187 	}
188       else
189 	{
190 	  int shift;
191 
192 	  count_leading_zeros (shift, mask);
193 	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
194 	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
195 	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
196 	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
197 	}
198 
199       /* Try an mpn_hgcd2 step */
200       if (mpn_hgcd2 (uh, ul, vh, vl, &M))
201 	{
202 	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
203 	  MP_PTR_SWAP (up, tp);
204 	}
205       else
206 	{
207 	  /* mpn_hgcd2 has failed. Then either one of a or b is very
208 	     small, or the difference is very small. Perform one
209 	     subtraction followed by one division. */
210 
211 	  /* Temporary storage n */
212 	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
213 	  if (n == 0)
214 	    goto done;
215 	}
216     }
217 
218   ASSERT(up[n-1] | vp[n-1]);
219 
220   /* Due to the calling convention for mpn_gcd, at most one can be even. */
221   if ((up[0] & 1) == 0)
222     MP_PTR_SWAP (up, vp);
223   ASSERT ((up[0] & 1) != 0);
224 
225   {
226     mp_limb_t u0, u1, v0, v1;
227     mp_double_limb_t g;
228 
229     u0 = up[0];
230     v0 = vp[0];
231 
232     if (n == 1)
233       {
234 	int cnt;
235 	count_trailing_zeros (cnt, v0);
236 	*gp = mpn_gcd_11 (u0, v0 >> cnt);
237 	ctx.gn = 1;
238 	goto done;
239       }
240 
241     v1 = vp[1];
242     if (UNLIKELY (v0 == 0))
243       {
244 	v0 = v1;
245 	v1 = 0;
246 	/* FIXME: We could invoke a mpn_gcd_21 here, just like mpn_gcd_22 could
247 	   when this situation occurs internally.  */
248       }
249     if ((v0 & 1) == 0)
250       {
251 	int cnt;
252 	count_trailing_zeros (cnt, v0);
253 	v0 = ((v1 << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK) | (v0 >> cnt);
254 	v1 >>= cnt;
255       }
256 
257     u1 = up[1];
258     g = mpn_gcd_22 (u1, u0, v1, v0);
259     gp[0] = g.d0;
260     gp[1] = g.d1;
261     ctx.gn = 1 + (g.d1 > 0);
262   }
263 done:
264   TMP_FREE;
265   return ctx.gn;
266 }
267