1 /* mpn/gcd.c: mpn_gcd for gcd of two odd integers. 2 3 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 4 2004, 2005, 2008 Free Software Foundation, Inc. 5 6 This file is part of the GNU MP Library. 7 8 The GNU MP Library is free software; you can redistribute it and/or modify 9 it under the terms of the GNU Lesser General Public License as published by 10 the Free Software Foundation; either version 3 of the License, or (at your 11 option) any later version. 12 13 The GNU MP Library is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 License for more details. 17 18 You should have received a copy of the GNU Lesser General Public License 19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21 #include "gmp.h" 22 #include "gmp-impl.h" 23 #include "longlong.h" 24 25 /* Uses the HGCD operation described in 26 27 N. M�ller, On Sch�nhage's algorithm and subquadratic integer gcd 28 computation, Math. Comp. 77 (2008), 589-607. 29 30 to reduce inputs until they are of size below GCD_DC_THRESHOLD, and 31 then uses Lehmer's algorithm. 32 */ 33 34 /* Some reasonable choices are n / 2 (same as in hgcd), and p = (n + 35 * 2)/3, which gives a balanced multiplication in 36 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better 37 * performance. The matrix-vector multiplication is then 38 * 4:1-unbalanced, with matrix elements of size n/6, and vector 39 * elements of size p = 2n/3. */ 40 41 /* From analysis of the theoretical running time, it appears that when 42 * multiplication takes time O(n^alpha), p should be chosen so that 43 * the ratio of the time for the mpn_hgcd call, and the time for the 44 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha - 45 * 1). */ 46 #ifdef TUNE_GCD_P 47 #define P_TABLE_SIZE 10000 48 mp_size_t p_table[P_TABLE_SIZE]; 49 #define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3) 50 #else 51 #define CHOOSE_P(n) (2*(n) / 3) 52 #endif 53 54 mp_size_t 55 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n) 56 { 57 mp_size_t talloc; 58 mp_size_t scratch; 59 mp_size_t matrix_scratch; 60 61 mp_size_t gn; 62 mp_ptr tp; 63 TMP_DECL; 64 65 /* FIXME: Check for small sizes first, before setting up temporary 66 storage etc. */ 67 talloc = MPN_GCD_LEHMER_N_ITCH(n); 68 69 /* For initial division */ 70 scratch = usize - n + 1; 71 if (scratch > talloc) 72 talloc = scratch; 73 74 #if TUNE_GCD_P 75 if (CHOOSE_P (n) > 0) 76 #else 77 if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 78 #endif 79 { 80 mp_size_t hgcd_scratch; 81 mp_size_t update_scratch; 82 mp_size_t p = CHOOSE_P (n); 83 mp_size_t scratch; 84 #if TUNE_GCD_P 85 /* Worst case, since we don't guarantee that n - CHOOSE_P(n) 86 is increasing */ 87 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n); 88 hgcd_scratch = mpn_hgcd_itch (n); 89 update_scratch = 2*(n - 1); 90 #else 91 matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 92 hgcd_scratch = mpn_hgcd_itch (n - p); 93 update_scratch = p + n - 1; 94 #endif 95 scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); 96 if (scratch > talloc) 97 talloc = scratch; 98 } 99 100 TMP_MARK; 101 tp = TMP_ALLOC_LIMBS(talloc); 102 103 if (usize > n) 104 { 105 mpn_tdiv_qr (tp, up, 0, up, usize, vp, n); 106 107 if (mpn_zero_p (up, n)) 108 { 109 MPN_COPY (gp, vp, n); 110 TMP_FREE; 111 return n; 112 } 113 } 114 115 #if TUNE_GCD_P 116 while (CHOOSE_P (n) > 0) 117 #else 118 while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) 119 #endif 120 { 121 struct hgcd_matrix M; 122 mp_size_t p = CHOOSE_P (n); 123 mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); 124 mp_size_t nn; 125 mpn_hgcd_matrix_init (&M, n - p, tp); 126 nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch); 127 if (nn > 0) 128 { 129 ASSERT (M.n <= (n - p - 1)/2); 130 ASSERT (M.n + p <= (p + n - 1) / 2); 131 /* Temporary storage 2 (p + M->n) <= p + n - 1. */ 132 n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch); 133 } 134 else 135 { 136 /* Temporary storage n */ 137 n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp); 138 if (n == 0) 139 { 140 TMP_FREE; 141 return gn; 142 } 143 } 144 } 145 146 gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp); 147 TMP_FREE; 148 return gn; 149 } 150 151 #ifdef TUNE_GCD_P 152 #include <stdio.h> 153 #include <string.h> 154 #include <time.h> 155 #include "speed.h" 156 157 static int 158 compare_double(const void *ap, const void *bp) 159 { 160 double a = * (const double *) ap; 161 double b = * (const double *) bp; 162 163 if (a < b) 164 return -1; 165 else if (a > b) 166 return 1; 167 else 168 return 0; 169 } 170 171 static double 172 median (double *v, size_t n) 173 { 174 qsort(v, n, sizeof(*v), compare_double); 175 176 return v[n/2]; 177 } 178 179 #define TIME(res, code) do { \ 180 double time_measurement[5]; \ 181 unsigned time_i; \ 182 \ 183 for (time_i = 0; time_i < 5; time_i++) \ 184 { \ 185 speed_starttime(); \ 186 code; \ 187 time_measurement[time_i] = speed_endtime(); \ 188 } \ 189 res = median(time_measurement, 5); \ 190 } while (0) 191 192 int 193 main(int argc, char *argv) 194 { 195 gmp_randstate_t rands; 196 mp_size_t n; 197 mp_ptr ap; 198 mp_ptr bp; 199 mp_ptr up; 200 mp_ptr vp; 201 mp_ptr gp; 202 mp_ptr tp; 203 TMP_DECL; 204 205 /* Unbuffered so if output is redirected to a file it isn't lost if the 206 program is killed part way through. */ 207 setbuf (stdout, NULL); 208 setbuf (stderr, NULL); 209 210 gmp_randinit_default (rands); 211 212 TMP_MARK; 213 214 ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE); 215 bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE); 216 up = TMP_ALLOC_LIMBS (P_TABLE_SIZE); 217 vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE); 218 gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE); 219 tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE)); 220 221 mpn_random (ap, P_TABLE_SIZE); 222 mpn_random (bp, P_TABLE_SIZE); 223 224 memset (p_table, 0, sizeof(p_table)); 225 226 for (n = 100; n++; n < P_TABLE_SIZE) 227 { 228 mp_size_t p; 229 mp_size_t best_p; 230 double best_time; 231 double lehmer_time; 232 233 if (ap[n-1] == 0) 234 ap[n-1] = 1; 235 236 if (bp[n-1] == 0) 237 bp[n-1] = 1; 238 239 p_table[n] = 0; 240 TIME(lehmer_time, { 241 MPN_COPY (up, ap, n); 242 MPN_COPY (vp, bp, n); 243 mpn_gcd_lehmer_n (gp, up, vp, n, tp); 244 }); 245 246 best_time = lehmer_time; 247 best_p = 0; 248 249 for (p = n * 0.48; p < n * 0.77; p++) 250 { 251 double t; 252 253 p_table[n] = p; 254 255 TIME(t, { 256 MPN_COPY (up, ap, n); 257 MPN_COPY (vp, bp, n); 258 mpn_gcd (gp, up, n, vp, n); 259 }); 260 261 if (t < best_time) 262 { 263 best_time = t; 264 best_p = p; 265 } 266 } 267 printf("%6d %6d %5.3g", n, best_p, (double) best_p / n); 268 if (best_p > 0) 269 { 270 double speedup = 100 * (lehmer_time - best_time) / lehmer_time; 271 printf(" %5.3g%%", speedup); 272 if (speedup < 1.0) 273 { 274 printf(" (ignored)"); 275 best_p = 0; 276 } 277 } 278 printf("\n"); 279 280 p_table[n] = best_p; 281 } 282 TMP_FREE; 283 gmp_randclear(rands); 284 return 0; 285 } 286 #endif /* TUNE_GCD_P */ 287