1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
2
3 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
4 2004, 2005, 2008 Free Software Foundation, Inc.
5
6 This file is part of the GNU MP Library.
7
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
20
21 #include "gmp.h"
22 #include "gmp-impl.h"
23 #include "longlong.h"
24
25 /* Uses the HGCD operation described in
26
27 N. M�ller, On Sch�nhage's algorithm and subquadratic integer gcd
28 computation, Math. Comp. 77 (2008), 589-607.
29
30 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
31 then uses Lehmer's algorithm.
32 */
33
34 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
35 * 2)/3, which gives a balanced multiplication in
36 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
37 * performance. The matrix-vector multiplication is then
38 * 4:1-unbalanced, with matrix elements of size n/6, and vector
39 * elements of size p = 2n/3. */
40
41 /* From analysis of the theoretical running time, it appears that when
42 * multiplication takes time O(n^alpha), p should be chosen so that
43 * the ratio of the time for the mpn_hgcd call, and the time for the
44 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
45 * 1). */
46 #ifdef TUNE_GCD_P
47 #define P_TABLE_SIZE 10000
48 mp_size_t p_table[P_TABLE_SIZE];
49 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
50 #else
51 #define CHOOSE_P(n) (2*(n) / 3)
52 #endif
53
54 mp_size_t
mpn_gcd(mp_ptr gp,mp_ptr up,mp_size_t usize,mp_ptr vp,mp_size_t n)55 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
56 {
57 mp_size_t talloc;
58 mp_size_t scratch;
59 mp_size_t matrix_scratch;
60
61 mp_size_t gn;
62 mp_ptr tp;
63 TMP_DECL;
64
65 /* FIXME: Check for small sizes first, before setting up temporary
66 storage etc. */
67 talloc = MPN_GCD_LEHMER_N_ITCH(n);
68
69 /* For initial division */
70 scratch = usize - n + 1;
71 if (scratch > talloc)
72 talloc = scratch;
73
74 #if TUNE_GCD_P
75 if (CHOOSE_P (n) > 0)
76 #else
77 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
78 #endif
79 {
80 mp_size_t hgcd_scratch;
81 mp_size_t update_scratch;
82 mp_size_t p = CHOOSE_P (n);
83 mp_size_t scratch;
84 #if TUNE_GCD_P
85 /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
86 is increasing */
87 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
88 hgcd_scratch = mpn_hgcd_itch (n);
89 update_scratch = 2*(n - 1);
90 #else
91 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
92 hgcd_scratch = mpn_hgcd_itch (n - p);
93 update_scratch = p + n - 1;
94 #endif
95 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
96 if (scratch > talloc)
97 talloc = scratch;
98 }
99
100 TMP_MARK;
101 tp = TMP_ALLOC_LIMBS(talloc);
102
103 if (usize > n)
104 {
105 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
106
107 if (mpn_zero_p (up, n))
108 {
109 MPN_COPY (gp, vp, n);
110 TMP_FREE;
111 return n;
112 }
113 }
114
115 #if TUNE_GCD_P
116 while (CHOOSE_P (n) > 0)
117 #else
118 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
119 #endif
120 {
121 struct hgcd_matrix M;
122 mp_size_t p = CHOOSE_P (n);
123 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
124 mp_size_t nn;
125 mpn_hgcd_matrix_init (&M, n - p, tp);
126 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
127 if (nn > 0)
128 {
129 ASSERT (M.n <= (n - p - 1)/2);
130 ASSERT (M.n + p <= (p + n - 1) / 2);
131 /* Temporary storage 2 (p + M->n) <= p + n - 1. */
132 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
133 }
134 else
135 {
136 /* Temporary storage n */
137 n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
138 if (n == 0)
139 {
140 TMP_FREE;
141 return gn;
142 }
143 }
144 }
145
146 gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
147 TMP_FREE;
148 return gn;
149 }
150
151 #ifdef TUNE_GCD_P
152 #include <stdio.h>
153 #include <string.h>
154 #include <time.h>
155 #include "speed.h"
156
157 static int
compare_double(const void * ap,const void * bp)158 compare_double(const void *ap, const void *bp)
159 {
160 double a = * (const double *) ap;
161 double b = * (const double *) bp;
162
163 if (a < b)
164 return -1;
165 else if (a > b)
166 return 1;
167 else
168 return 0;
169 }
170
171 static double
median(double * v,size_t n)172 median (double *v, size_t n)
173 {
174 qsort(v, n, sizeof(*v), compare_double);
175
176 return v[n/2];
177 }
178
179 #define TIME(res, code) do { \
180 double time_measurement[5]; \
181 unsigned time_i; \
182 \
183 for (time_i = 0; time_i < 5; time_i++) \
184 { \
185 speed_starttime(); \
186 code; \
187 time_measurement[time_i] = speed_endtime(); \
188 } \
189 res = median(time_measurement, 5); \
190 } while (0)
191
192 int
main(int argc,char * argv)193 main(int argc, char *argv)
194 {
195 gmp_randstate_t rands;
196 mp_size_t n;
197 mp_ptr ap;
198 mp_ptr bp;
199 mp_ptr up;
200 mp_ptr vp;
201 mp_ptr gp;
202 mp_ptr tp;
203 TMP_DECL;
204
205 /* Unbuffered so if output is redirected to a file it isn't lost if the
206 program is killed part way through. */
207 setbuf (stdout, NULL);
208 setbuf (stderr, NULL);
209
210 gmp_randinit_default (rands);
211
212 TMP_MARK;
213
214 ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
215 bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
216 up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
217 vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
218 gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
219 tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
220
221 mpn_random (ap, P_TABLE_SIZE);
222 mpn_random (bp, P_TABLE_SIZE);
223
224 memset (p_table, 0, sizeof(p_table));
225
226 for (n = 100; n++; n < P_TABLE_SIZE)
227 {
228 mp_size_t p;
229 mp_size_t best_p;
230 double best_time;
231 double lehmer_time;
232
233 if (ap[n-1] == 0)
234 ap[n-1] = 1;
235
236 if (bp[n-1] == 0)
237 bp[n-1] = 1;
238
239 p_table[n] = 0;
240 TIME(lehmer_time, {
241 MPN_COPY (up, ap, n);
242 MPN_COPY (vp, bp, n);
243 mpn_gcd_lehmer_n (gp, up, vp, n, tp);
244 });
245
246 best_time = lehmer_time;
247 best_p = 0;
248
249 for (p = n * 0.48; p < n * 0.77; p++)
250 {
251 double t;
252
253 p_table[n] = p;
254
255 TIME(t, {
256 MPN_COPY (up, ap, n);
257 MPN_COPY (vp, bp, n);
258 mpn_gcd (gp, up, n, vp, n);
259 });
260
261 if (t < best_time)
262 {
263 best_time = t;
264 best_p = p;
265 }
266 }
267 printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
268 if (best_p > 0)
269 {
270 double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
271 printf(" %5.3g%%", speedup);
272 if (speedup < 1.0)
273 {
274 printf(" (ignored)");
275 best_p = 0;
276 }
277 }
278 printf("\n");
279
280 p_table[n] = best_p;
281 }
282 TMP_FREE;
283 gmp_randclear(rands);
284 return 0;
285 }
286 #endif /* TUNE_GCD_P */
287