1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2
3 Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
4 Foundation, Inc.
5
6 This file is part of the GNU MP Library.
7
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
10
11 * the GNU Lesser General Public License as published by the Free
12 Software Foundation; either version 3 of the License, or (at your
13 option) any later version.
14
15 or
16
17 * the GNU General Public License as published by the Free Software
18 Foundation; either version 2 of the License, or (at your option) any
19 later version.
20
21 or both in parallel, as here.
22
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
26 for more details.
27
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library. If not,
30 see https://www.gnu.org/licenses/. */
31
32 #include "gmp.h"
33 #include "gmp-impl.h"
34 #include "longlong.h"
35
36 /* Uses the HGCD operation described in
37
38 N. Möller, On Schönhage's algorithm and subquadratic integer gcd
39 computation, Math. Comp. 77 (2008), 589-607.
40
41 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
42 then uses Lehmer's algorithm.
43 */
44
45 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
46 * 2)/3, which gives a balanced multiplication in
47 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
48 * performance. The matrix-vector multiplication is then
49 * 4:1-unbalanced, with matrix elements of size n/6, and vector
50 * elements of size p = 2n/3. */
51
52 /* From analysis of the theoretical running time, it appears that when
53 * multiplication takes time O(n^alpha), p should be chosen so that
54 * the ratio of the time for the mpn_hgcd call, and the time for the
55 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
56 * 1). */
57 #ifdef TUNE_GCD_P
58 #define P_TABLE_SIZE 10000
59 mp_size_t p_table[P_TABLE_SIZE];
60 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
61 #else
62 #define CHOOSE_P(n) (2*(n) / 3)
63 #endif
64
65 struct gcd_ctx
66 {
67 mp_ptr gp;
68 mp_size_t gn;
69 };
70
71 static void
gcd_hook(void * p,mp_srcptr gp,mp_size_t gn,mp_srcptr qp,mp_size_t qn,int d)72 gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
73 mp_srcptr qp, mp_size_t qn, int d)
74 {
75 struct gcd_ctx *ctx = (struct gcd_ctx *) p;
76 MPN_COPY (ctx->gp, gp, gn);
77 ctx->gn = gn;
78 }
79
80 #if GMP_NAIL_BITS > 0
81 /* Nail supports should be easy, replacing the sub_ddmmss with nails
82 * logic. */
83 #error Nails not supported.
84 #endif
85
86 /* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
87 Both U and V must be odd. */
88 static inline mp_size_t
gcd_2(mp_ptr gp,mp_srcptr up,mp_srcptr vp)89 gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
90 {
91 mp_limb_t u0, u1, v0, v1;
92 mp_size_t gn;
93
94 u0 = up[0];
95 u1 = up[1];
96 v0 = vp[0];
97 v1 = vp[1];
98
99 ASSERT (u0 & 1);
100 ASSERT (v0 & 1);
101
102 /* Check for u0 != v0 needed to ensure that argument to
103 * count_trailing_zeros is non-zero. */
104 while (u1 != v1 && u0 != v0)
105 {
106 unsigned long int r;
107 if (u1 > v1)
108 {
109 sub_ddmmss (u1, u0, u1, u0, v1, v0);
110 count_trailing_zeros (r, u0);
111 u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
112 u1 >>= r;
113 }
114 else /* u1 < v1. */
115 {
116 sub_ddmmss (v1, v0, v1, v0, u1, u0);
117 count_trailing_zeros (r, v0);
118 v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
119 v1 >>= r;
120 }
121 }
122
123 gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
124
125 /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
126 if (u1 == v1 && u0 == v0)
127 return gn;
128
129 v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
130 gp[0] = mpn_gcd_1 (gp, gn, v0);
131
132 return 1;
133 }
134
135 mp_size_t
mpn_gcd(mp_ptr gp,mp_ptr up,mp_size_t usize,mp_ptr vp,mp_size_t n)136 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
137 {
138 mp_size_t talloc;
139 mp_size_t scratch;
140 mp_size_t matrix_scratch;
141
142 struct gcd_ctx ctx;
143 mp_ptr tp;
144 TMP_DECL;
145
146 ASSERT (usize >= n);
147 ASSERT (n > 0);
148 ASSERT (vp[n-1] > 0);
149
150 /* FIXME: Check for small sizes first, before setting up temporary
151 storage etc. */
152 talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
153
154 /* For initial division */
155 scratch = usize - n + 1;
156 if (scratch > talloc)
157 talloc = scratch;
158
159 #if TUNE_GCD_P
160 if (CHOOSE_P (n) > 0)
161 #else
162 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
163 #endif
164 {
165 mp_size_t hgcd_scratch;
166 mp_size_t update_scratch;
167 mp_size_t p = CHOOSE_P (n);
168 mp_size_t scratch;
169 #if TUNE_GCD_P
170 /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
171 is increasing */
172 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
173 hgcd_scratch = mpn_hgcd_itch (n);
174 update_scratch = 2*(n - 1);
175 #else
176 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
177 hgcd_scratch = mpn_hgcd_itch (n - p);
178 update_scratch = p + n - 1;
179 #endif
180 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
181 if (scratch > talloc)
182 talloc = scratch;
183 }
184
185 TMP_MARK;
186 tp = TMP_ALLOC_LIMBS(talloc);
187
188 if (usize > n)
189 {
190 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
191
192 if (mpn_zero_p (up, n))
193 {
194 MPN_COPY (gp, vp, n);
195 ctx.gn = n;
196 goto done;
197 }
198 }
199
200 ctx.gp = gp;
201
202 #if TUNE_GCD_P
203 while (CHOOSE_P (n) > 0)
204 #else
205 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
206 #endif
207 {
208 struct hgcd_matrix M;
209 mp_size_t p = CHOOSE_P (n);
210 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
211 mp_size_t nn;
212 mpn_hgcd_matrix_init (&M, n - p, tp);
213 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
214 if (nn > 0)
215 {
216 ASSERT (M.n <= (n - p - 1)/2);
217 ASSERT (M.n + p <= (p + n - 1) / 2);
218 /* Temporary storage 2 (p + M->n) <= p + n - 1. */
219 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
220 }
221 else
222 {
223 /* Temporary storage n */
224 n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
225 if (n == 0)
226 goto done;
227 }
228 }
229
230 while (n > 2)
231 {
232 struct hgcd_matrix1 M;
233 mp_limb_t uh, ul, vh, vl;
234 mp_limb_t mask;
235
236 mask = up[n-1] | vp[n-1];
237 ASSERT (mask > 0);
238
239 if (mask & GMP_NUMB_HIGHBIT)
240 {
241 uh = up[n-1]; ul = up[n-2];
242 vh = vp[n-1]; vl = vp[n-2];
243 }
244 else
245 {
246 int shift;
247
248 count_leading_zeros (shift, mask);
249 uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
250 ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
251 vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
252 vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
253 }
254
255 /* Try an mpn_hgcd2 step */
256 if (mpn_hgcd2 (uh, ul, vh, vl, &M))
257 {
258 n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
259 MP_PTR_SWAP (up, tp);
260 }
261 else
262 {
263 /* mpn_hgcd2 has failed. Then either one of a or b is very
264 small, or the difference is very small. Perform one
265 subtraction followed by one division. */
266
267 /* Temporary storage n */
268 n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
269 if (n == 0)
270 goto done;
271 }
272 }
273
274 ASSERT(up[n-1] | vp[n-1]);
275
276 if (n == 1)
277 {
278 *gp = mpn_gcd_1(up, 1, vp[0]);
279 ctx.gn = 1;
280 goto done;
281 }
282
283 /* Due to the calling convention for mpn_gcd, at most one can be
284 even. */
285
286 if (! (up[0] & 1))
287 MP_PTR_SWAP (up, vp);
288
289 ASSERT (up[0] & 1);
290
291 if (vp[0] == 0)
292 {
293 *gp = mpn_gcd_1 (up, 2, vp[1]);
294 ctx.gn = 1;
295 goto done;
296 }
297 else if (! (vp[0] & 1))
298 {
299 int r;
300 count_trailing_zeros (r, vp[0]);
301 vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
302 vp[1] >>= r;
303 }
304
305 ctx.gn = gcd_2(gp, up, vp);
306
307 done:
308 TMP_FREE;
309 return ctx.gn;
310 }
311