1 #include "ecm-gpu.h"
2
3 #ifdef WITH_GPU
4
5 #define TWO32 4294967296 /* 2^32 */
6
7 extern int select_and_init_GPU (int, unsigned int*, int);
8 extern float cuda_Main (biguint_t, biguint_t, biguint_t, digit_t, biguint_t*,
9 biguint_t*, biguint_t*, biguint_t*, mpz_t, unsigned int,
10 unsigned int, int);
11
findfactor(mpz_t factor,mpz_t N,mpz_t xfin,mpz_t zfin)12 int findfactor (mpz_t factor, mpz_t N, mpz_t xfin, mpz_t zfin)
13 {
14 int youpi;
15 mpz_t gcd;
16 mpz_init (gcd);
17
18 mpz_gcd (gcd, zfin, N);
19
20 if (mpz_cmp_ui (gcd, 1) == 0)
21 {
22 mpz_invert (zfin, zfin, N);
23 mpz_mul (xfin, xfin, zfin);
24 mpz_mod (xfin, xfin, N);
25
26 mpz_set (factor, xfin);
27 youpi = ECM_NO_FACTOR_FOUND;
28 }
29 else //gcd !=1 (and gcd>0 because N>0) so we found a factor
30 {
31 mpz_set (factor, gcd);
32 youpi = ECM_FACTOR_FOUND_STEP1;
33 }
34
35 mpz_clear (gcd);
36 return youpi;
37 }
38
to_mont_repr(mpz_t x,mpz_t n)39 void to_mont_repr (mpz_t x, mpz_t n)
40 {
41 mpz_mul_2exp (x, x, ECM_GPU_MAX_BITS);
42 mpz_mod (x, x, n);
43 }
44
from_mont_repr(mpz_t x,mpz_t n,mpz_t invB)45 void from_mont_repr (mpz_t x, mpz_t n, mpz_t invB)
46 {
47 mpz_mul (x, x, invB);
48 mpz_mod (x, x, n);
49 }
50
mpz_to_biguint(biguint_t a,mpz_t b)51 void mpz_to_biguint (biguint_t a, mpz_t b)
52 {
53 int i;
54
55 for (i=0;i<ECM_GPU_NB_DIGITS;i++)
56 {
57 #if GMP_NUMB_BITS == 32
58 a[i]=mpz_getlimbn (b, i);
59 #else // GMP_NUMB_BITS == 64
60 if (i%2 == 0)
61 a[i]=(mpz_getlimbn (b, i/2) & 0x00000000ffffffff);
62 else
63 a[i]=(mpz_getlimbn (b, i/2) >> 32);
64 #endif
65 }
66 }
67
biguint_to_mpz(mpz_t a,biguint_t b)68 void biguint_to_mpz (mpz_t a, biguint_t b)
69 {
70 int i;
71
72 mpz_set_ui (a, 0);
73
74 for (i=ECM_GPU_NB_DIGITS-1;i>=0;i--)
75 {
76 mpz_mul_2exp (a, a, 32);
77 mpz_add_ui (a , a, b[i]);
78 }
79 }
80
gpu_ecm_stage1(mpz_t * factors,int * array_stage_found,mpz_t N,mpz_t s,unsigned int number_of_curves,unsigned int firstsigma,float * gputime,int verbose)81 int gpu_ecm_stage1 (mpz_t *factors, int *array_stage_found, mpz_t N, mpz_t s,
82 unsigned int number_of_curves, unsigned int firstsigma,
83 float *gputime, int verbose)
84 {
85 int youpi = ECM_NO_FACTOR_FOUND;
86
87 unsigned int sigma;
88 unsigned int i;
89
90 mpz_t N3; /* N3 = 3*N */
91 mpz_t w; /* w = 2^(SIZE_DIGIT) */
92 mpz_t invN; /* invN = -N^-1 mod w */
93 mpz_t invB; /* invB = 2^(-MAX_BITS) mod N ; B is w^NB_DIGITS */
94 mpz_t invw; /* w^(-1) mod N */
95 mpz_t M; /* (invN*N+1)/w */
96 mpz_t xp, zp, x2p, z2p;
97
98 /* The same variables but for the GPU */
99 biguint_t *h_xarray, *h_zarray, *h_x2array, *h_z2array;
100 digit_t h_invN;
101 biguint_t h_N, h_3N, h_M;
102
103 /*****************************/
104 /* Initialize some variables */
105 /*****************************/
106 mpz_init (N3);
107 mpz_init (invw);
108 mpz_init (w);
109 mpz_init (M);
110 mpz_init (xp);
111 mpz_init (zp);
112 mpz_init (x2p);
113 mpz_init (z2p);
114 mpz_init (invN);
115 mpz_init (invB);
116
117 h_xarray= (biguint_t *) malloc (number_of_curves * sizeof (biguint_t));
118 h_zarray= (biguint_t *) malloc (number_of_curves * sizeof (biguint_t));
119 h_x2array= (biguint_t *) malloc (number_of_curves * sizeof (biguint_t));
120 h_z2array= (biguint_t *) malloc (number_of_curves * sizeof (biguint_t));
121
122 /*Some computation depending on N */
123 mpz_mul_ui (N3, N, 3); /* Compute N3 = 3*N */
124 mpz_ui_pow_ui (w, 2, ECM_GPU_SIZE_DIGIT); /* Compute w = 2^SIZE_DIGIT */
125
126 mpz_invert (invN, N, w);
127 mpz_sub (invN, w, invN); /* Compute invN = -N^-1 mod w */
128
129 mpz_mul (M, invN, N);
130 mpz_add_ui (M, M, 1);
131 mpz_divexact (M, M, w); /* Compute M = (invN*N+1)/w */
132
133 mpz_to_biguint (h_N, N);
134 mpz_to_biguint (h_3N, N3);
135 mpz_to_biguint (h_M, M);
136 h_invN = mpz_get_ui (invN);
137
138 mpz_ui_pow_ui (invB, 2, ECM_GPU_MAX_BITS);
139 mpz_invert (invB, invB, N); /* Compute invB = 2^(-MAX_BITS) mod N */
140
141 mpz_invert (invw, w, N); /* Compute inw = 2^-SIZE_DIGIT % N */
142
143 /* xp zp x2p are independent of N and the curve */
144 mpz_set_ui (xp, 2);
145 mpz_set_ui (zp, 1);
146 mpz_set_ui (x2p, 9);
147
148 /* Compute their Montgomery representation */
149 to_mont_repr (xp, N);
150 to_mont_repr (zp, N);
151 to_mont_repr (x2p, N);
152
153 /* for each curve, compute z2p and put xp, zp, x2p, z2p in the h_*array */
154 for (i = 0; i < number_of_curves; i++)
155 {
156 sigma = firstsigma + i;
157
158 mpz_mul_ui (z2p, invw, sigma);
159 mpz_mod (z2p, z2p, N);
160 mpz_mul_2exp (z2p, z2p, 6);
161 mpz_add_ui (z2p, z2p, 8);
162 mpz_mod (z2p, z2p, N); /* z2p = 8+64*d */
163
164 to_mont_repr (z2p, N);
165
166 mpz_to_biguint (h_xarray[i], xp);
167 mpz_to_biguint (h_zarray[i], zp);
168 mpz_to_biguint (h_x2array[i], x2p);
169 mpz_to_biguint (h_z2array[i], z2p);
170 }
171
172 /* Call the wrapper function that call the GPU */
173 *gputime=cuda_Main (h_N, h_3N, h_M, h_invN, h_xarray, h_zarray, h_x2array,
174 h_z2array, s, firstsigma, number_of_curves, verbose);
175
176 /* Analyse results */
177 for (i = 0; i < number_of_curves; i++)
178 {
179 sigma = firstsigma + i;
180
181 biguint_to_mpz (xp, h_xarray[i]);
182 biguint_to_mpz (zp, h_zarray[i]);
183
184 from_mont_repr (xp, N, invB);
185 from_mont_repr (zp, N, invB);
186
187 array_stage_found[i] = findfactor (factors[i], N, xp, zp);
188
189 if (array_stage_found[i] != ECM_NO_FACTOR_FOUND)
190 {
191 youpi = array_stage_found[i];
192 outputf (OUTPUT_NORMAL, "GPU: factor %Zd found in Step 1 with"
193 " curve %u (-sigma 3:%u)\n", factors[i], i, sigma);
194 }
195 }
196
197 mpz_clear (N3);
198 mpz_clear (invN);
199 mpz_clear (invw);
200 mpz_clear (w);
201 mpz_clear (M);
202 mpz_clear (xp);
203 mpz_clear (zp);
204 mpz_clear (x2p);
205 mpz_clear (z2p);
206 mpz_clear (invB);
207
208 free ((void *) h_xarray);
209 free ((void *) h_zarray);
210 free ((void *) h_x2array);
211 free ((void *) h_z2array);
212
213 return youpi;
214 }
215
216 static void
A_from_sigma(mpz_t A,unsigned int sigma,mpz_t n)217 A_from_sigma (mpz_t A, unsigned int sigma, mpz_t n)
218 {
219 mpz_t tmp;
220 int i;
221 mpz_init_set_ui (tmp, sigma);
222 /* Compute d = sigma/2^ECM_GPU_SIZE_DIGIT */
223 for (i = 0; i < ECM_GPU_SIZE_DIGIT; i++)
224 {
225 if (mpz_tstbit (tmp, 0) == 1)
226 mpz_add (tmp, tmp, n);
227 mpz_div_2exp (tmp, tmp, 1);
228 }
229 mpz_mul_2exp (tmp, tmp, 2); /* 4d */
230 mpz_sub_ui (tmp, tmp, 2); /* 4d-2 */
231
232 mpz_set (A, tmp);
233
234 mpz_clear (tmp);
235 }
236
237 int
gpu_ecm(mpz_t f,mpz_t x,int * param,mpz_t firstsigma,mpz_t n,mpz_t go,double * B1done,double B1,mpz_t B2min_parm,mpz_t B2_parm,unsigned long k,const int S,int verbose,int repr,int nobase2step2,int use_ntt,int sigma_is_A,FILE * os,FILE * es,char * chkfilename ATTRIBUTE_UNUSED,char * TreeFilename,double maxmem,int (* stop_asap)(void),mpz_t batch_s,double * batch_last_B1_used,int device,int * device_init,unsigned int * nb_curves)238 gpu_ecm (mpz_t f, mpz_t x, int *param, mpz_t firstsigma, mpz_t n, mpz_t go,
239 double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm,
240 unsigned long k, const int S, int verbose, int repr,
241 int nobase2step2, int use_ntt, int sigma_is_A, FILE *os, FILE* es,
242 char *chkfilename ATTRIBUTE_UNUSED, char *TreeFilename, double maxmem,
243 int (*stop_asap)(void), mpz_t batch_s, double *batch_last_B1_used,
244 int device, int *device_init, unsigned int *nb_curves)
245 {
246 unsigned int i;
247 int youpi = ECM_NO_FACTOR_FOUND;
248 int factor_found = ECM_NO_FACTOR_FOUND;
249 long st, st2;
250 long tottime; /* at the end, total time in ms */
251 unsigned int firstsigma_ui;
252 float gputime = 0.0;
253 mpz_t tmp_A;
254 mpz_t *factors = NULL; /* Contains either a factor of n either end-of-stage-1
255 residue (depending of the value of array_stage_found */
256 int *array_stage_found = NULL;
257 /* Only for stage 2 */
258 int base2 = 0; /* If n is of form 2^n[+-]1, set base to [+-]n */
259 int Fermat = 0; /* If base2 > 0 is a power of 2, set Fermat to base2 */
260 int po2 = 0; /* Whether we should use power-of-2 poly degree */
261 /* Use only in stage 2 */
262 mpmod_t modulus;
263 curve P;
264 mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */
265 unsigned long dF;
266 root_params_t root_params;
267
268 ASSERT((-1 <= sigma_is_A) && (sigma_is_A <= 1));
269 ASSERT((GMP_NUMB_BITS == 32) || (GMP_NUMB_BITS == 64));
270
271 set_verbose (verbose);
272 ECM_STDOUT = (os == NULL) ? stdout : os;
273 ECM_STDERR = (es == NULL) ? stdout : es;
274
275
276 /* Check that N is not too big */
277 if (mpz_sizeinbase (n, 2) > ECM_GPU_MAX_BITS-6)
278 {
279 outputf (OUTPUT_ERROR, "GPU: Error, input number should be stricly lower"
280 " than 2^%d\n", ECM_GPU_MAX_BITS-6);
281 return ECM_ERROR;
282 }
283
284 /* Only param = ECM_PARAM_BATCH_32BITS_D is accepted on GPU */
285 if (*param == ECM_PARAM_DEFAULT)
286 *param = ECM_PARAM_BATCH_32BITS_D;
287
288 if (*param != ECM_PARAM_BATCH_32BITS_D)
289 {
290 outputf (OUTPUT_ERROR, "GPU: Error, only param = ECM_PARAM_BATCH_32BITS_D "
291 "is accepted on GPU.\n");
292 return ECM_ERROR;
293 }
294
295 /* check that repr == ECM_MOD_DEFAULT or ECM_MOD_BASE2 (only for stage 2) */
296 if (repr != ECM_MOD_DEFAULT && repr != ECM_MOD_BASE2)
297 outputf (OUTPUT_ERROR, "GPU: Warning, the value of repr will be ignored "
298 "for step 1 on GPU.\n");
299
300 /* It is only for stage 2, it is not taken into account for GPU code */
301 if (mpmod_init (modulus, n, repr) != 0)
302 return ECM_ERROR;
303
304 /* See what kind of number we have as that may influence optimal parameter
305 selection. Test for base 2 number. Note: this was already done by
306 mpmod_init. */
307
308 if (modulus->repr == ECM_MOD_BASE2)
309 base2 = modulus->bits;
310
311 /* For a Fermat number (base2 a positive power of 2) */
312 for (Fermat = base2; Fermat > 0 && (Fermat & 1) == 0; Fermat >>= 1);
313 if (Fermat == 1)
314 {
315 Fermat = base2;
316 po2 = 1;
317 }
318 else
319 Fermat = 0;
320
321 /* Cannot do resume on GPU */
322 if (!ECM_IS_DEFAULT_B1_DONE(*B1done) && *B1done < B1)
323 {
324 outputf (OUTPUT_ERROR, "GPU: Error, cannot resume on GPU.\n");
325 return ECM_ERROR;
326 }
327
328 /* Compute s */
329 if (B1 != *batch_last_B1_used || mpz_cmp_ui (batch_s, 1) <= 0)
330 {
331 *batch_last_B1_used = B1;
332
333 st = cputime ();
334 /* construct the batch exponent */
335 compute_s (batch_s, B1, NULL);
336 outputf (OUTPUT_VERBOSE, "Computing batch product (of %" PRIu64
337 " bits) of primes up to B1=%1.0f took %ldms\n",
338 mpz_sizeinbase (batch_s, 2), B1, cputime () - st);
339 }
340
341 /* Set parameters for stage 2 */
342 mpres_init (P.x, modulus);
343 mpres_init (P.y, modulus);
344 mpres_init (P.A, modulus);
345 mpz_init (tmp_A);
346 mpz_init (B2);
347 mpz_init (B2min);
348
349 youpi = set_stage_2_params (B2, B2_parm, B2min, B2min_parm, &root_params,
350 B1, &k, S, use_ntt, &po2, &dF,
351 TreeFilename, maxmem, Fermat, modulus);
352 if (youpi == ECM_ERROR)
353 goto end_gpu_ecm;
354
355
356 /* Initialize the GPU if necessary */
357 if (!*device_init)
358 {
359 st = cputime ();
360 youpi = select_and_init_GPU (device, nb_curves,
361 test_verbose (OUTPUT_VERBOSE));
362
363 if (youpi != 0)
364 {
365 youpi = ECM_ERROR;
366 goto end_gpu_ecm2;
367 }
368
369 outputf (OUTPUT_VERBOSE, "GPU: Selection and initialization of the device "
370 "took %ldms\n", elltime (st, cputime ()));
371 /* TRICKS: If initialization of the device is too long (few seconds), */
372 /* try running 'nvidia-smi -q -l' on the background . */
373 *device_init = 1;
374 }
375
376 /* Init arrays */
377 factors = (mpz_t *) malloc (*nb_curves * sizeof (mpz_t));
378 ASSERT_ALWAYS (factors != NULL);
379
380 array_stage_found = (int *) malloc (*nb_curves * sizeof (int));
381 ASSERT_ALWAYS (array_stage_found != NULL);
382
383 for (i = 0; i < *nb_curves; i++)
384 {
385 mpz_init (factors[i]);
386 array_stage_found[i] = ECM_NO_FACTOR_FOUND;
387 }
388
389
390 /* Current code works only for sigma_is_A = 0 */
391 if (sigma_is_A != 0)
392 {
393 outputf (OUTPUT_ERROR, "GPU: Not yet implemented.\n");
394 youpi= ECM_ERROR;
395 goto end_gpu_ecm;
396 }
397
398 ASSERT (sigma_is_A == 0);
399 if (mpz_sgn (firstsigma) == 0)
400 {
401 /* generate random value in [2, 2^32 - nb_curves - 1] */
402 mpz_set_ui (firstsigma, (get_random_ul () %
403 (TWO32 - 2 - *nb_curves)) + 2);
404 }
405 else /* sigma should be in [2, 2^32-nb_curves] */
406 {
407 if (mpz_cmp_ui (firstsigma, 2) < 0 ||
408 mpz_cmp_ui (firstsigma, TWO32 - *nb_curves) >= 0)
409 {
410 outputf (OUTPUT_ERROR, "GPU: Error, sigma should be in [2,%lu]\n",
411 TWO32 - *nb_curves - 1);
412 youpi= ECM_ERROR;
413 goto end_gpu_ecm;
414 }
415 }
416 firstsigma_ui = mpz_get_ui (firstsigma);
417
418 print_B1_B2_poly (OUTPUT_NORMAL, ECM_ECM, B1, *B1done, B2min_parm, B2min,
419 B2, S, firstsigma, sigma_is_A, ECM_EC_TYPE_MONTGOMERY,
420 go, *param, *nb_curves);
421 outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n",
422 dF, k, root_params.d1, root_params.d2, root_params.i0);
423
424 if (go != NULL && mpz_cmp_ui (go, 1) > 0)
425 {
426 outputf (OUTPUT_ERROR, "GPU: Error, option -go is not allowed\n");
427 youpi= ECM_ERROR;
428 goto end_gpu_ecm;
429 }
430
431 if (test_verbose (OUTPUT_VERBOSE))
432 {
433 if (mpz_cmp_d (B2min, B1) != 0)
434 {
435 outputf (OUTPUT_VERBOSE,
436 "Can't compute success probabilities for B1 <> B2min\n");
437 }
438 else
439 {
440 rhoinit (256, 10);
441 print_expcurves (B1, B2, dF, k, root_params.S, *param);
442 }
443 }
444
445 st = cputime ();
446 youpi = gpu_ecm_stage1 (factors, array_stage_found, n, batch_s, *nb_curves,
447 firstsigma_ui, &gputime, verbose);
448
449 outputf (OUTPUT_NORMAL, "Computing %u Step 1 took %ldms of CPU time / "
450 "%.0fms of GPU time\n", *nb_curves,
451 elltime (st, cputime ()), gputime);
452 outputf (OUTPUT_VERBOSE, "Throughput: %.3f curves per second ",
453 1000 * (*nb_curves)/gputime);
454 outputf (OUTPUT_VERBOSE, "(on average %.2fms per Step 1)\n",
455 gputime/(*nb_curves));
456 tottime = (long) gputime;
457
458 *B1done=B1;
459
460 /* Save stage 1 residues */
461 mpz_set_ui (x, 0);
462 for (i = 0; i < *nb_curves; i++)
463 {
464 mpz_mul (x, x, n);
465 mpz_add (x, x, factors[i]);
466 }
467
468 /* was a factor found in stage 1 ? */
469 if (youpi != ECM_NO_FACTOR_FOUND)
470 goto end_gpu_ecm_rhotable;
471
472 /* If using 2^k +/-1 modulus and 'nobase2step2' flag is set,
473 set default (-nobase2) modular method and remap P.x, P.y, and P.A */
474 if (modulus->repr == ECM_MOD_BASE2 && nobase2step2)
475 {
476 mpmod_clear (modulus);
477
478 repr = ECM_MOD_NOBASE2;
479 if (mpmod_init (modulus, n, repr) != 0) /* reset modulus for nobase2 */
480 {
481 youpi = ECM_ERROR;
482 goto end_gpu_ecm_rhotable;
483 }
484 }
485
486 if (mpz_cmp (B2, B2min) < 0)
487 goto end_gpu_ecm_rhotable;
488
489 st2 = cputime ();
490
491 P.disc = 0; /* For stage2 this needs to be 0, in order not to use CM stuff */
492
493 for (i = 0; i < *nb_curves; i++)
494 {
495 if (test_verbose (OUTPUT_RESVERBOSE))
496 outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", factors[i]);
497
498 if (stop_asap != NULL && (*stop_asap) ())
499 goto end_gpu_ecm_rhotable;
500
501 mpres_set_z (P.x, factors[i], modulus);
502 mpres_set_ui (P.y, 1, modulus);
503 A_from_sigma (tmp_A, i+firstsigma_ui, modulus->orig_modulus);
504 mpres_set_z (P.A, tmp_A, modulus);
505
506 /* compute stage 2 */
507 youpi = montgomery_to_weierstrass (factors[i], P.x, P.y, P.A, modulus);
508 if (youpi != ECM_NO_FACTOR_FOUND)
509 goto next_curve;
510
511 if (test_verbose (OUTPUT_RESVERBOSE) && youpi == ECM_NO_FACTOR_FOUND
512 && mpz_cmp (B2, B2min) >= 0)
513 {
514 mpz_t t;
515
516 mpz_init (t);
517 mpres_get_z (t, P.x, modulus);
518 outputf (OUTPUT_RESVERBOSE, "After switch to Weierstrass form, "
519 "P=(%Zd", t);
520 mpres_get_z (t, P.y, modulus);
521 outputf (OUTPUT_RESVERBOSE, ", %Zd)\n", t);
522 mpres_get_z (t, P.A, modulus);
523 outputf (OUTPUT_RESVERBOSE, "on curve Y^2 = X^3 + %Zd * X + b\n",
524 t);
525 mpz_clear (t);
526 }
527
528 /* It is a hack to avoid very verbose Step 2
529 (without it, stage2() prints a least a line by curves) */
530 if (!test_verbose (OUTPUT_VERBOSE))
531 set_verbose (0);
532 youpi = stage2 (factors[i], &P, modulus, dF, k, &root_params, use_ntt,
533 TreeFilename, stop_asap);
534 set_verbose (verbose);
535
536 next_curve:
537 if (youpi != ECM_NO_FACTOR_FOUND)
538 {
539 array_stage_found[i] = youpi;
540 outputf (OUTPUT_NORMAL, "GPU: factor %Zd found in Step 2 with"
541 " curve %u (-sigma 3:%u)\n", factors[i], i, i+firstsigma_ui);
542 /* factor_found corresponds to the first factor found */
543 if (factor_found == ECM_NO_FACTOR_FOUND)
544 factor_found = youpi;
545 }
546 }
547
548 /* If a factor was found in Step 2, make sure we set
549 * our return value "youpi" appropriately
550 */
551 youpi = factor_found;
552
553 st2 = elltime (st2, cputime ());
554 outputf (OUTPUT_NORMAL, "Computing %u Step 2 on CPU took %ldms\n",
555 *nb_curves, st2);
556 outputf (OUTPUT_VERBOSE, "Throughput: %.3f Step 2 per second ",
557 1000 * ((double)(*nb_curves))/((double)st2));
558 outputf (OUTPUT_VERBOSE, "(on average %0.2fms per Step 2)\n",
559 ((double) st2)/((double) *nb_curves));
560 tottime += st2;
561
562
563 end_gpu_ecm_rhotable:
564 if (test_verbose (OUTPUT_VERBOSE))
565 {
566 if (mpz_cmp_d (B2min, B1) == 0)
567 {
568 if (youpi == ECM_NO_FACTOR_FOUND &&
569 (stop_asap == NULL || !(*stop_asap)()))
570 print_exptime (B1, B2, dF, k, root_params.S,
571 (long) (tottime / *nb_curves), *param);
572 rhoinit (1, 0); /* Free memory of rhotable */
573 }
574 }
575
576 /* If f0, ,fk are the factors found (in stage 1 or 2)
577 * f = f0 + f1*n + .. + fk*n^k
578 * The purpose of this construction is to be able to return more than one
579 * factor if needed without breaking the lib interface (as gcd(f,n)=gcd(f0,n).
580 */
581 mpz_set_ui (f, 0);
582 for (i = 0; i < *nb_curves; i++)
583 {
584 if (array_stage_found[i] != ECM_NO_FACTOR_FOUND)
585 {
586 mpz_mul (f, f, n);
587 mpz_add (f, f, factors[i]);
588 }
589 }
590
591 end_gpu_ecm:
592 mpz_clear (root_params.i0);
593 mpz_clear (B2);
594 mpz_clear (B2min);
595
596 for (i = 0; i < *nb_curves; i++)
597 mpz_clear (factors[i]);
598
599 free (array_stage_found);
600 free (factors);
601
602 end_gpu_ecm2:
603 mpz_clear (tmp_A);
604 mpres_clear (P.A, modulus);
605 mpres_clear (P.y, modulus);
606 mpres_clear (P.x, modulus);
607 mpmod_clear (modulus);
608
609 return youpi;
610 }
611 #endif
612
613
614
615