1 #include "ecm-gpu.h"
2 
3 #ifdef WITH_GPU
4 
5 #define TWO32 4294967296 /* 2^32 */
6 
7 extern int select_and_init_GPU (int, unsigned int*, int);
8 extern float cuda_Main (biguint_t, biguint_t, biguint_t, digit_t, biguint_t*,
9                         biguint_t*, biguint_t*, biguint_t*, mpz_t, unsigned int,
10                         unsigned int, int);
11 
findfactor(mpz_t factor,mpz_t N,mpz_t xfin,mpz_t zfin)12 int findfactor (mpz_t factor, mpz_t N, mpz_t xfin, mpz_t zfin)
13 {
14   int youpi;
15   mpz_t gcd;
16   mpz_init (gcd);
17 
18   mpz_gcd (gcd, zfin, N);
19 
20   if (mpz_cmp_ui (gcd, 1) == 0)
21   {
22     mpz_invert (zfin, zfin, N);
23     mpz_mul (xfin, xfin, zfin);
24     mpz_mod (xfin, xfin, N);
25 
26     mpz_set (factor, xfin);
27     youpi = ECM_NO_FACTOR_FOUND;
28   }
29   else //gcd !=1 (and gcd>0 because N>0) so we found a factor
30   {
31       mpz_set (factor, gcd);
32       youpi = ECM_FACTOR_FOUND_STEP1;
33     }
34 
35   mpz_clear (gcd);
36   return youpi;
37 }
38 
to_mont_repr(mpz_t x,mpz_t n)39 void to_mont_repr (mpz_t x, mpz_t n)
40 {
41   mpz_mul_2exp (x, x, ECM_GPU_MAX_BITS);
42   mpz_mod (x, x, n);
43 }
44 
from_mont_repr(mpz_t x,mpz_t n,mpz_t invB)45 void from_mont_repr (mpz_t x, mpz_t n, mpz_t invB)
46 {
47   mpz_mul (x, x, invB);
48   mpz_mod (x, x, n);
49 }
50 
mpz_to_biguint(biguint_t a,mpz_t b)51 void mpz_to_biguint (biguint_t a, mpz_t b)
52 {
53   int i;
54 
55   for (i=0;i<ECM_GPU_NB_DIGITS;i++)
56   {
57 #if GMP_NUMB_BITS == 32
58     a[i]=mpz_getlimbn (b, i);
59 #else // GMP_NUMB_BITS == 64
60     if (i%2 == 0)
61       a[i]=(mpz_getlimbn (b, i/2) & 0x00000000ffffffff);
62     else
63       a[i]=(mpz_getlimbn (b, i/2) >> 32);
64 #endif
65   }
66 }
67 
biguint_to_mpz(mpz_t a,biguint_t b)68 void biguint_to_mpz (mpz_t a, biguint_t b)
69 {
70   int i;
71 
72   mpz_set_ui (a, 0);
73 
74   for (i=ECM_GPU_NB_DIGITS-1;i>=0;i--)
75   {
76     mpz_mul_2exp (a, a, 32);
77 	  mpz_add_ui (a , a, b[i]);
78   }
79 }
80 
gpu_ecm_stage1(mpz_t * factors,int * array_stage_found,mpz_t N,mpz_t s,unsigned int number_of_curves,unsigned int firstsigma,float * gputime,int verbose)81 int gpu_ecm_stage1 (mpz_t *factors, int *array_stage_found, mpz_t N, mpz_t s,
82                     unsigned int number_of_curves, unsigned int firstsigma,
83                     float *gputime, int verbose)
84 {
85   int youpi = ECM_NO_FACTOR_FOUND;
86 
87   unsigned int sigma;
88   unsigned int i;
89 
90   mpz_t N3; /* N3 = 3*N */
91   mpz_t w; /* w = 2^(SIZE_DIGIT) */
92   mpz_t invN; /* invN = -N^-1 mod w */
93   mpz_t invB; /* invB = 2^(-MAX_BITS) mod N ; B is w^NB_DIGITS */
94   mpz_t invw; /* w^(-1) mod N */
95   mpz_t M; /* (invN*N+1)/w */
96   mpz_t xp, zp, x2p, z2p;
97 
98   /* The same variables but for the GPU */
99   biguint_t *h_xarray, *h_zarray, *h_x2array, *h_z2array;
100   digit_t h_invN;
101   biguint_t h_N, h_3N, h_M;
102 
103   /*****************************/
104   /* Initialize some variables */
105   /*****************************/
106   mpz_init (N3);
107   mpz_init (invw);
108   mpz_init (w);
109   mpz_init (M);
110   mpz_init (xp);
111   mpz_init (zp);
112   mpz_init (x2p);
113   mpz_init (z2p);
114   mpz_init (invN);
115   mpz_init (invB);
116 
117   h_xarray= (biguint_t *) malloc (number_of_curves * sizeof (biguint_t));
118   h_zarray= (biguint_t *) malloc (number_of_curves * sizeof (biguint_t));
119   h_x2array= (biguint_t *) malloc (number_of_curves * sizeof (biguint_t));
120   h_z2array= (biguint_t *) malloc (number_of_curves * sizeof (biguint_t));
121 
122   /*Some computation depending on N */
123   mpz_mul_ui (N3, N, 3); /* Compute N3 = 3*N */
124   mpz_ui_pow_ui (w, 2, ECM_GPU_SIZE_DIGIT); /* Compute w = 2^SIZE_DIGIT */
125 
126   mpz_invert (invN, N, w);
127   mpz_sub (invN, w, invN); /* Compute invN = -N^-1 mod w */
128 
129   mpz_mul (M, invN, N);
130   mpz_add_ui (M, M, 1);
131   mpz_divexact (M, M, w); /* Compute M = (invN*N+1)/w */
132 
133   mpz_to_biguint (h_N, N);
134   mpz_to_biguint (h_3N, N3);
135   mpz_to_biguint (h_M, M);
136   h_invN = mpz_get_ui (invN);
137 
138   mpz_ui_pow_ui (invB, 2, ECM_GPU_MAX_BITS);
139   mpz_invert (invB, invB, N); /* Compute invB = 2^(-MAX_BITS) mod N */
140 
141   mpz_invert (invw, w, N); /* Compute inw = 2^-SIZE_DIGIT % N */
142 
143   /* xp zp x2p are independent of N and the curve */
144   mpz_set_ui (xp, 2);
145   mpz_set_ui (zp, 1);
146   mpz_set_ui (x2p, 9);
147 
148   /* Compute their Montgomery representation */
149   to_mont_repr (xp, N);
150   to_mont_repr (zp, N);
151   to_mont_repr (x2p, N);
152 
153   /* for each curve, compute z2p and put xp, zp, x2p, z2p in the h_*array  */
154   for (i = 0; i < number_of_curves; i++)
155   {
156     sigma = firstsigma + i;
157 
158     mpz_mul_ui (z2p, invw, sigma);
159     mpz_mod (z2p, z2p, N);
160     mpz_mul_2exp (z2p, z2p, 6);
161     mpz_add_ui (z2p, z2p, 8);
162     mpz_mod (z2p, z2p, N); /* z2p = 8+64*d */
163 
164     to_mont_repr (z2p, N);
165 
166     mpz_to_biguint (h_xarray[i], xp);
167     mpz_to_biguint (h_zarray[i], zp);
168     mpz_to_biguint (h_x2array[i], x2p);
169     mpz_to_biguint (h_z2array[i], z2p);
170   }
171 
172   /* Call the wrapper function that call the GPU */
173   *gputime=cuda_Main (h_N, h_3N, h_M, h_invN, h_xarray, h_zarray, h_x2array,
174                      h_z2array, s, firstsigma, number_of_curves, verbose);
175 
176   /* Analyse results */
177   for (i = 0; i < number_of_curves; i++)
178   {
179     sigma = firstsigma + i;
180 
181     biguint_to_mpz (xp, h_xarray[i]);
182     biguint_to_mpz (zp, h_zarray[i]);
183 
184     from_mont_repr (xp, N, invB);
185     from_mont_repr (zp, N, invB);
186 
187     array_stage_found[i] = findfactor (factors[i], N, xp, zp);
188 
189     if (array_stage_found[i] != ECM_NO_FACTOR_FOUND)
190       {
191         youpi = array_stage_found[i];
192         outputf (OUTPUT_NORMAL, "GPU: factor %Zd found in Step 1 with"
193                 " curve %u (-sigma 3:%u)\n", factors[i], i, sigma);
194       }
195     }
196 
197   mpz_clear (N3);
198   mpz_clear (invN);
199   mpz_clear (invw);
200   mpz_clear (w);
201   mpz_clear (M);
202   mpz_clear (xp);
203   mpz_clear (zp);
204   mpz_clear (x2p);
205   mpz_clear (z2p);
206   mpz_clear (invB);
207 
208   free ((void *) h_xarray);
209   free ((void *) h_zarray);
210   free ((void *) h_x2array);
211   free ((void *) h_z2array);
212 
213   return youpi;
214 }
215 
216 static void
A_from_sigma(mpz_t A,unsigned int sigma,mpz_t n)217 A_from_sigma (mpz_t A, unsigned int sigma, mpz_t n)
218 {
219   mpz_t tmp;
220   int i;
221   mpz_init_set_ui (tmp, sigma);
222   /* Compute d = sigma/2^ECM_GPU_SIZE_DIGIT */
223   for (i = 0; i < ECM_GPU_SIZE_DIGIT; i++)
224     {
225       if (mpz_tstbit (tmp, 0) == 1)
226       mpz_add (tmp, tmp, n);
227       mpz_div_2exp (tmp, tmp, 1);
228     }
229   mpz_mul_2exp (tmp, tmp, 2);           /* 4d */
230   mpz_sub_ui (tmp, tmp, 2);             /* 4d-2 */
231 
232   mpz_set (A, tmp);
233 
234   mpz_clear (tmp);
235 }
236 
237 int
gpu_ecm(mpz_t f,mpz_t x,int * param,mpz_t firstsigma,mpz_t n,mpz_t go,double * B1done,double B1,mpz_t B2min_parm,mpz_t B2_parm,unsigned long k,const int S,int verbose,int repr,int nobase2step2,int use_ntt,int sigma_is_A,FILE * os,FILE * es,char * chkfilename ATTRIBUTE_UNUSED,char * TreeFilename,double maxmem,int (* stop_asap)(void),mpz_t batch_s,double * batch_last_B1_used,int device,int * device_init,unsigned int * nb_curves)238 gpu_ecm (mpz_t f, mpz_t x, int *param, mpz_t firstsigma, mpz_t n, mpz_t go,
239          double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm,
240          unsigned long k, const int S, int verbose, int repr,
241          int nobase2step2, int use_ntt, int sigma_is_A, FILE *os, FILE* es,
242          char *chkfilename ATTRIBUTE_UNUSED, char *TreeFilename, double maxmem,
243          int (*stop_asap)(void), mpz_t batch_s, double *batch_last_B1_used,
244          int device, int *device_init, unsigned int *nb_curves)
245 {
246   unsigned int i;
247   int youpi = ECM_NO_FACTOR_FOUND;
248   int factor_found = ECM_NO_FACTOR_FOUND;
249   long st, st2;
250   long tottime; /* at the end, total time in ms */
251   unsigned int firstsigma_ui;
252   float gputime = 0.0;
253   mpz_t tmp_A;
254   mpz_t *factors = NULL; /* Contains either a factor of n either end-of-stage-1
255                          residue (depending of the value of array_stage_found */
256   int *array_stage_found = NULL;
257   /* Only for stage 2 */
258   int base2 = 0;  /* If n is of form 2^n[+-]1, set base to [+-]n */
259   int Fermat = 0; /* If base2 > 0 is a power of 2, set Fermat to base2 */
260   int po2 = 0;    /* Whether we should use power-of-2 poly degree */
261   /* Use only in stage 2 */
262   mpmod_t modulus;
263   curve P;
264   mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */
265   unsigned long dF;
266   root_params_t root_params;
267 
268   ASSERT((-1 <= sigma_is_A) && (sigma_is_A <= 1));
269   ASSERT((GMP_NUMB_BITS == 32) || (GMP_NUMB_BITS == 64));
270 
271   set_verbose (verbose);
272   ECM_STDOUT = (os == NULL) ? stdout : os;
273   ECM_STDERR = (es == NULL) ? stdout : es;
274 
275 
276   /* Check that N is not too big */
277   if (mpz_sizeinbase (n, 2) > ECM_GPU_MAX_BITS-6)
278     {
279       outputf (OUTPUT_ERROR, "GPU: Error, input number should be stricly lower"
280                              " than 2^%d\n", ECM_GPU_MAX_BITS-6);
281       return ECM_ERROR;
282     }
283 
284   /* Only param = ECM_PARAM_BATCH_32BITS_D is accepted on GPU */
285   if (*param == ECM_PARAM_DEFAULT)
286       *param = ECM_PARAM_BATCH_32BITS_D;
287 
288   if (*param != ECM_PARAM_BATCH_32BITS_D)
289     {
290       outputf (OUTPUT_ERROR, "GPU: Error, only param = ECM_PARAM_BATCH_32BITS_D "
291                              "is accepted on GPU.\n");
292       return ECM_ERROR;
293     }
294 
295   /* check that repr == ECM_MOD_DEFAULT or ECM_MOD_BASE2 (only for stage 2) */
296   if (repr != ECM_MOD_DEFAULT && repr != ECM_MOD_BASE2)
297       outputf (OUTPUT_ERROR, "GPU: Warning, the value of repr will be ignored "
298       "for step 1 on GPU.\n");
299 
300   /* It is only for stage 2, it is not taken into account for GPU code */
301   if (mpmod_init (modulus, n, repr) != 0)
302     return ECM_ERROR;
303 
304   /* See what kind of number we have as that may influence optimal parameter
305      selection. Test for base 2 number. Note: this was already done by
306      mpmod_init. */
307 
308   if (modulus->repr == ECM_MOD_BASE2)
309     base2 = modulus->bits;
310 
311   /* For a Fermat number (base2 a positive power of 2) */
312   for (Fermat = base2; Fermat > 0 && (Fermat & 1) == 0; Fermat >>= 1);
313   if (Fermat == 1)
314     {
315       Fermat = base2;
316       po2 = 1;
317     }
318   else
319       Fermat = 0;
320 
321   /* Cannot do resume on GPU */
322   if (!ECM_IS_DEFAULT_B1_DONE(*B1done) && *B1done < B1)
323     {
324       outputf (OUTPUT_ERROR, "GPU: Error, cannot resume on GPU.\n");
325       return ECM_ERROR;
326     }
327 
328   /* Compute s */
329   if (B1 != *batch_last_B1_used || mpz_cmp_ui (batch_s, 1) <= 0)
330     {
331       *batch_last_B1_used = B1;
332 
333       st = cputime ();
334       /* construct the batch exponent */
335       compute_s (batch_s, B1, NULL);
336       outputf (OUTPUT_VERBOSE, "Computing batch product (of %" PRIu64
337                                " bits) of primes up to B1=%1.0f took %ldms\n",
338                                mpz_sizeinbase (batch_s, 2), B1, cputime () - st);
339     }
340 
341   /* Set parameters for stage 2 */
342   mpres_init (P.x, modulus);
343   mpres_init (P.y, modulus);
344   mpres_init (P.A, modulus);
345   mpz_init (tmp_A);
346   mpz_init (B2);
347   mpz_init (B2min);
348 
349   youpi = set_stage_2_params (B2, B2_parm, B2min, B2min_parm, &root_params,
350                               B1, &k, S, use_ntt, &po2, &dF,
351                               TreeFilename, maxmem, Fermat, modulus);
352   if (youpi == ECM_ERROR)
353       goto end_gpu_ecm;
354 
355 
356   /* Initialize the GPU if necessary */
357   if (!*device_init)
358     {
359       st = cputime ();
360       youpi = select_and_init_GPU (device, nb_curves,
361                                    test_verbose (OUTPUT_VERBOSE));
362 
363       if (youpi != 0)
364         {
365           youpi = ECM_ERROR;
366           goto end_gpu_ecm2;
367         }
368 
369       outputf (OUTPUT_VERBOSE, "GPU: Selection and initialization of the device "
370                                "took %ldms\n", elltime (st, cputime ()));
371       /* TRICKS: If initialization of the device is too long (few seconds), */
372       /* try running 'nvidia-smi -q -l' on the background .                 */
373       *device_init = 1;
374     }
375 
376   /* Init arrays */
377   factors = (mpz_t *) malloc (*nb_curves * sizeof (mpz_t));
378   ASSERT_ALWAYS (factors != NULL);
379 
380   array_stage_found = (int *) malloc (*nb_curves * sizeof (int));
381   ASSERT_ALWAYS (array_stage_found != NULL);
382 
383   for (i = 0; i < *nb_curves; i++)
384     {
385       mpz_init (factors[i]);
386       array_stage_found[i] = ECM_NO_FACTOR_FOUND;
387     }
388 
389 
390   /* Current code works only for sigma_is_A = 0 */
391   if (sigma_is_A != 0)
392     {
393       outputf (OUTPUT_ERROR, "GPU: Not yet implemented.\n");
394       youpi= ECM_ERROR;
395       goto end_gpu_ecm;
396     }
397 
398   ASSERT (sigma_is_A == 0);
399   if (mpz_sgn (firstsigma) == 0)
400     {
401       /* generate random value in [2, 2^32 - nb_curves - 1] */
402       mpz_set_ui (firstsigma, (get_random_ul () %
403                                (TWO32 - 2 - *nb_curves)) + 2);
404     }
405   else /* sigma should be in [2, 2^32-nb_curves] */
406     {
407       if (mpz_cmp_ui (firstsigma, 2) < 0 ||
408           mpz_cmp_ui (firstsigma, TWO32 - *nb_curves) >= 0)
409         {
410           outputf (OUTPUT_ERROR, "GPU: Error, sigma should be in [2,%lu]\n",
411                                  TWO32 - *nb_curves - 1);
412           youpi= ECM_ERROR;
413           goto end_gpu_ecm;
414         }
415     }
416   firstsigma_ui = mpz_get_ui (firstsigma);
417 
418   print_B1_B2_poly (OUTPUT_NORMAL, ECM_ECM, B1, *B1done,  B2min_parm, B2min,
419                     B2, S, firstsigma, sigma_is_A, ECM_EC_TYPE_MONTGOMERY,
420                     go, *param, *nb_curves);
421   outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n",
422            dF, k, root_params.d1, root_params.d2, root_params.i0);
423 
424   if (go != NULL && mpz_cmp_ui (go, 1) > 0)
425     {
426       outputf (OUTPUT_ERROR, "GPU: Error, option -go is not allowed\n");
427       youpi= ECM_ERROR;
428       goto end_gpu_ecm;
429     }
430 
431   if (test_verbose (OUTPUT_VERBOSE))
432     {
433       if (mpz_cmp_d (B2min, B1) != 0)
434         {
435           outputf (OUTPUT_VERBOSE,
436             "Can't compute success probabilities for B1 <> B2min\n");
437         }
438       else
439         {
440           rhoinit (256, 10);
441           print_expcurves (B1, B2, dF, k, root_params.S, *param);
442         }
443     }
444 
445   st = cputime ();
446   youpi = gpu_ecm_stage1 (factors, array_stage_found, n, batch_s, *nb_curves,
447                           firstsigma_ui, &gputime, verbose);
448 
449   outputf (OUTPUT_NORMAL, "Computing %u Step 1 took %ldms of CPU time / "
450                           "%.0fms of GPU time\n", *nb_curves,
451                                            elltime (st, cputime ()), gputime);
452   outputf (OUTPUT_VERBOSE, "Throughput: %.3f curves per second ",
453                                                  1000 * (*nb_curves)/gputime);
454   outputf (OUTPUT_VERBOSE, "(on average %.2fms per Step 1)\n",
455                                                         gputime/(*nb_curves));
456   tottime = (long) gputime;
457 
458   *B1done=B1;
459 
460   /* Save stage 1 residues */
461   mpz_set_ui (x, 0);
462   for (i = 0; i < *nb_curves; i++)
463     {
464       mpz_mul (x, x, n);
465       mpz_add (x, x, factors[i]);
466     }
467 
468   /* was a factor found in stage 1 ? */
469   if (youpi != ECM_NO_FACTOR_FOUND)
470       goto end_gpu_ecm_rhotable;
471 
472   /* If using 2^k +/-1 modulus and 'nobase2step2' flag is set,
473      set default (-nobase2) modular method and remap P.x, P.y, and P.A */
474   if (modulus->repr == ECM_MOD_BASE2 && nobase2step2)
475     {
476       mpmod_clear (modulus);
477 
478       repr = ECM_MOD_NOBASE2;
479       if (mpmod_init (modulus, n, repr) != 0) /* reset modulus for nobase2 */
480         {
481           youpi = ECM_ERROR;
482           goto end_gpu_ecm_rhotable;
483         }
484     }
485 
486   if (mpz_cmp (B2, B2min) < 0)
487       goto end_gpu_ecm_rhotable;
488 
489   st2 = cputime ();
490 
491   P.disc = 0; /* For stage2 this needs to be 0, in order not to use CM stuff */
492 
493   for (i = 0; i < *nb_curves; i++)
494     {
495       if (test_verbose (OUTPUT_RESVERBOSE))
496         outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", factors[i]);
497 
498       if (stop_asap != NULL && (*stop_asap) ())
499           goto end_gpu_ecm_rhotable;
500 
501       mpres_set_z (P.x, factors[i], modulus);
502       mpres_set_ui (P.y, 1, modulus);
503       A_from_sigma (tmp_A, i+firstsigma_ui, modulus->orig_modulus);
504       mpres_set_z (P.A, tmp_A, modulus);
505 
506       /* compute stage 2 */
507       youpi = montgomery_to_weierstrass (factors[i], P.x, P.y, P.A, modulus);
508       if (youpi != ECM_NO_FACTOR_FOUND)
509         goto next_curve;
510 
511       if (test_verbose (OUTPUT_RESVERBOSE) && youpi == ECM_NO_FACTOR_FOUND
512           && mpz_cmp (B2, B2min) >= 0)
513         {
514           mpz_t t;
515 
516           mpz_init (t);
517           mpres_get_z (t, P.x, modulus);
518           outputf (OUTPUT_RESVERBOSE, "After switch to Weierstrass form, "
519                                       "P=(%Zd", t);
520           mpres_get_z (t, P.y, modulus);
521           outputf (OUTPUT_RESVERBOSE, ", %Zd)\n", t);
522           mpres_get_z (t, P.A, modulus);
523           outputf (OUTPUT_RESVERBOSE, "on curve Y^2 = X^3 + %Zd * X + b\n",
524                        t);
525           mpz_clear (t);
526         }
527 
528       /* It is a hack to avoid very verbose Step 2
529         (without it, stage2() prints a least a line by curves) */
530       if (!test_verbose (OUTPUT_VERBOSE))
531         set_verbose (0);
532       youpi = stage2 (factors[i], &P, modulus, dF, k, &root_params, use_ntt,
533                       TreeFilename, stop_asap);
534       set_verbose (verbose);
535 
536     next_curve:
537       if (youpi != ECM_NO_FACTOR_FOUND)
538         {
539           array_stage_found[i] = youpi;
540           outputf (OUTPUT_NORMAL, "GPU: factor %Zd found in Step 2 with"
541                 " curve %u (-sigma 3:%u)\n", factors[i], i, i+firstsigma_ui);
542           /* factor_found corresponds to the first factor found */
543           if (factor_found == ECM_NO_FACTOR_FOUND)
544             factor_found = youpi;
545         }
546     }
547 
548   /* If a factor was found in Step 2, make sure we set
549    * our return value "youpi" appropriately
550    */
551   youpi = factor_found;
552 
553   st2 = elltime (st2, cputime ());
554   outputf (OUTPUT_NORMAL, "Computing %u Step 2 on CPU took %ldms\n",
555                                                               *nb_curves, st2);
556   outputf (OUTPUT_VERBOSE, "Throughput: %.3f Step 2 per second ",
557                                   1000 * ((double)(*nb_curves))/((double)st2));
558   outputf (OUTPUT_VERBOSE, "(on average %0.2fms per Step 2)\n",
559                                          ((double) st2)/((double) *nb_curves));
560   tottime += st2;
561 
562 
563 end_gpu_ecm_rhotable:
564   if (test_verbose (OUTPUT_VERBOSE))
565     {
566       if (mpz_cmp_d (B2min, B1) == 0)
567         {
568           if (youpi == ECM_NO_FACTOR_FOUND &&
569               (stop_asap == NULL || !(*stop_asap)()))
570               print_exptime (B1, B2, dF, k, root_params.S,
571                              (long) (tottime / *nb_curves), *param);
572           rhoinit (1, 0); /* Free memory of rhotable */
573         }
574     }
575 
576   /* If f0, ,fk are the factors found (in stage 1 or 2)
577    * f = f0 + f1*n + .. + fk*n^k
578    * The purpose of this construction is to be able to return more than one
579    * factor if needed without breaking the lib interface (as gcd(f,n)=gcd(f0,n).
580    */
581   mpz_set_ui (f, 0);
582   for (i = 0; i < *nb_curves; i++)
583   {
584     if (array_stage_found[i] != ECM_NO_FACTOR_FOUND)
585       {
586         mpz_mul (f, f, n);
587         mpz_add (f, f, factors[i]);
588       }
589   }
590 
591 end_gpu_ecm:
592   mpz_clear (root_params.i0);
593   mpz_clear (B2);
594   mpz_clear (B2min);
595 
596   for (i = 0; i < *nb_curves; i++)
597       mpz_clear (factors[i]);
598 
599   free (array_stage_found);
600   free (factors);
601 
602 end_gpu_ecm2:
603   mpz_clear (tmp_A);
604   mpres_clear (P.A, modulus);
605   mpres_clear (P.y, modulus);
606   mpres_clear (P.x, modulus);
607   mpmod_clear (modulus);
608 
609   return youpi;
610 }
611 #endif
612 
613 
614 
615