1 /* Common stage 2 for ECM, P-1 and P+1 (improved standard continuation
2    with subquadratic polynomial arithmetic).
3 
4 Copyright 2001-2015 Paul Zimmermann, Alexander Kruppa, Dave Newman.
5 
6 This file is part of the ECM Library.
7 
8 The ECM Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The ECM Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the ECM Library; see the file COPYING.LIB.  If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include "config.h"
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <math.h> /* for floor */
27 #include <string.h> /* for strlen */
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h> /* for unlink */
31 #endif
32 
33 #include "ecm-impl.h"
34 #include "sp.h"
35 
36 extern unsigned int Fermat;
37 
38 /* r <- Dickson(n,a)(x) */
39 static void
dickson(mpz_t r,mpz_t x,unsigned int n,int a)40 dickson (mpz_t r, mpz_t x, unsigned int n, int a)
41 {
42   unsigned int i, b = 0;
43   mpz_t t, u;
44 
45   ASSERT_ALWAYS(n > 0);
46 
47   while (n > 2 && (n & 1) == 0)
48     {
49       b++;
50       n >>= 1;
51     }
52 
53   mpz_set (r, x);
54 
55   mpz_init (t);
56   mpz_init (u);
57 
58   if (n > 1)
59     {
60       mpz_set (r, x);
61       mpz_mul (r, r, r);
62       mpz_sub_si (r, r, a);
63       mpz_sub_si (r, r, a); /* r = dickson(x, 2, a) */
64 
65       mpz_set (t, x);    /* t = dickson(x, 1, a) */
66 
67       for (i = 2; i < n; i++)
68         {
69           mpz_mul_si (u, t, a);
70           mpz_set (t, r);     /* t = dickson(x, i, a) */
71           mpz_mul (r, r, x);
72           mpz_sub (r, r, u);  /* r = dickson(x, i+1, a) */
73         }
74     }
75 
76   for ( ; b > 0; b--)
77     {
78       mpz_mul (t, r, r); /* t = dickson(x, n, a) ^ 2 */
79       mpz_ui_pow_ui (u, abs (a), n);
80       if (n & 1 && a < 0)
81         mpz_neg (u, u);
82       mpz_mul_2exp (u, u, 1); /* u = 2 * a^n */
83       mpz_sub (r, t, u); /* r = dickson(x, 2*n, a) */
84       n <<= 1;
85     }
86 
87   mpz_clear (t);
88   mpz_clear (u);
89 }
90 
91 
92 /* Init table to allow computation of
93 
94    Dickson_{E, a} (s + n*D),
95 
96    for successive n, where Dickson_{E, a} is the Dickson polynomial
97    of degree E with parameter a. For a == 0, Dickson_{E, a} (x) = x^E .
98 
99    See Knuth, TAOCP vol.2, 4.6.4 and exercise 7 in 4.6.4, and
100    "An FFT Extension of the Elliptic Curve Method of Factorization",
101    Peter Montgomery, Dissertation, 1992, Chapter 5.
102 
103    Ternary return value.
104 */
105 
106 static void
fin_diff_coeff(listz_t coeffs,mpz_t s,mpz_t D,unsigned int E,int dickson_a)107 fin_diff_coeff (listz_t coeffs, mpz_t s, mpz_t D, unsigned int E,
108                 int dickson_a)
109 {
110   unsigned int i, k;
111   mpz_t t;
112 
113   mpz_init (t);
114   mpz_set (t, s);
115 
116   for (i = 0; i <= E; i++)
117     {
118       if (dickson_a != 0)         /* fd[i] = dickson_{E,a} (s+i*D) */
119         dickson (coeffs[i], t, E, dickson_a);
120       else                        /* fd[i] = (s+i*D)^E */
121         mpz_pow_ui (coeffs[i], t, E);
122       mpz_add (t, t, D);          /* t = s + i * D */
123     }
124 
125   for (k = 1; k <= E; k++)
126     for (i = E; i >= k; i--)
127       mpz_sub (coeffs[i], coeffs[i], coeffs[i-1]);
128 
129   mpz_clear (t);
130 }
131 
132 
133 /* Init several disjoint progressions for the computation of
134 
135    Dickson_{E,a} (e * (i0 + i + n * d * k)), for 0 <= i < d * k   (1)
136                   with gcd(e * (i0 + i), d) == 1, i == 1 (mod m),
137 		  where m divides d
138 
139    for successive n (the variable n does not appear here, it is the
140    application that called this function that wants to evaluate (1)
141    for n = 0, 1, 2, ...
142 
143    This means there will be k sets of progressions, where each set contains
144    eulerphi(d) progressions that generate the values of Dickson_{E,a} (x)
145    with x coprime to d and
146    with i == 1 (mod m), where x == e * (i0 + i) (mod m).
147 
148    i0 may be a NULL pointer, in this case i0 = 0 is assumed.
149 
150    Return NULL if an error occurred.
151 */
152 
153 listz_t
init_progression_coeffs(mpz_t i0,const unsigned long d,const unsigned long e,const unsigned int k,const unsigned int m,const unsigned int E,const int dickson_a)154 init_progression_coeffs (mpz_t i0, const unsigned long d,
155 			 const unsigned long e, const unsigned int k,
156 			 const unsigned int m, const unsigned int E,
157 			 const int dickson_a)
158 {
159   unsigned int i, j, size_fd;
160   mpz_t t, dke, em;
161   listz_t fd;
162 
163   ASSERT (d % m == 0);
164 
165   size_fd = k * (eulerphi(d) / eulerphi(m)) * (E + 1);
166   fd = (listz_t) malloc (size_fd * sizeof (mpz_t));
167   ASSERT_ALWAYS(fd != NULL);
168   for (i = 0; i < size_fd; i++)
169     mpz_init (fd[i]);
170 
171   mpz_init (t);
172   if (i0 != NULL)
173     mpz_set (t, i0);
174 
175   outputf (OUTPUT_TRACE, "init_progression_coeffs: i0 = %Zd, d = %u, e = %u, "
176            "k = %u, m = %u, E = %u, a = %d, size_fd = %u\n",
177            t, d, e, k, m, E, dickson_a, size_fd);
178 
179   /* Due to the condition i == 1 (mod m) we start at i = 1 or i = 0,
180      depending on whether m > 1 or m == 1 */
181   i = (m > 1) ? 1 : 0;
182   mpz_add_ui (t, t, (unsigned long) i);
183   mpz_mul_ui (t, t, e);
184   /* Now t = e * (i0 + i + n * d * k), for n = 0 */
185 
186   /* dke = d * k * e, the common difference of the arithmetic progressions
187      (it is the same for all arithmetic progressions we initialise) */
188   mpz_init (dke);
189   mpz_set_ui (dke, d);
190   mpz_mul_ui (dke, dke, k);
191   mpz_mul_ui (dke, dke, e);
192   /* em = e * m, the value by which t advances if we increase i by m */
193   mpz_init (em);
194   mpz_set_ui (em, e);
195   mpz_mul_ui (em, em, (unsigned long) m);
196 
197   for (j = 0; i < k * d; i += m)
198     {
199       if (mpz_gcd_ui (NULL, t, d) == 1)
200         {
201           outputf (OUTPUT_TRACE, "init_progression_coeffs: initing a "
202                    "progression for Dickson_{%d,%d}(%Zd + n * %Zd)\n",
203                    E, dickson_a, t, dke);
204 	  /* Initialise for the evaluation of Dickson_{E,a} (t + n*dke)
205 	     for n = 0, 1, 2, ... */
206           fin_diff_coeff (fd + j, t, dke, E, dickson_a);
207           j += E + 1;
208         } else
209           if (test_verbose (OUTPUT_TRACE))
210             outputf (OUTPUT_TRACE, "init_progression_coeffs: NOT initing a "
211                      "progression for Dickson_{%d,%d}(%Zd + n * %Zd), "
212                      "gcd (%Zd, %u) == %u)\n", E, dickson_a, t, dke, t, d,
213                      mpz_gcd_ui (NULL, t, d));
214       /* We increase i by m, so we increase t by e*m */
215       mpz_add (t, t, em);
216     }
217 
218   mpz_clear (em);
219   mpz_clear (dke);
220   mpz_clear (t);
221   return fd;
222 }
223 
224 void
init_roots_params(progression_params_t * params,const int S,const unsigned long d1,const unsigned long d2,const double cost)225 init_roots_params (progression_params_t *params, const int S,
226 		   const unsigned long d1, const unsigned long d2,
227 		   const double cost)
228 {
229   ASSERT (gcd (d1, d2) == 1);
230   /* If S < 0, use degree |S| Dickson poly, otherwise use x^S */
231   params->S = abs (S);
232   params->dickson_a = (S < 0) ? -1 : 0;
233 
234   /* We only calculate Dickson_{S, a}(j * d2) * s where
235      gcd (j, dsieve) == 1 and j == 1 (mod 6)
236      by doing nr = eulerphi(dsieve)/2 separate progressions. */
237   /* Now choose a value for dsieve. */
238   params->dsieve = 6;
239   params->nr = 1;
240 
241   /* Prospective saving by sieving out multiples of 5:
242      d1 / params->dsieve * params->nr / 5 roots, each one costs S point adds
243      Prospective cost increase:
244      4 times as many progressions to init (that is, 3 * params->nr more),
245      each costs ~ S * S * log_2(5 * dsieve * d2) / 2 point adds
246      The params->nr and one S cancel.
247   */
248   if (d1 % 5 == 0 &&
249       d1 / params->dsieve / 5. * cost >
250       3. * params->S * log (5. * params->dsieve * d2) / 2.)
251     {
252       params->dsieve *= 5;
253       params->nr *= 4;
254     }
255 
256   if (d1 % 7 == 0 &&
257       d1 / params->dsieve / 7. * cost >
258       5. * params->S * log (7. * params->dsieve * d2) / 2.)
259     {
260       params->dsieve *= 7;
261       params->nr *= 6;
262     }
263 
264 #if 0 /* commented out since not covered by unit tests */
265   if (d1 % 11 == 0 &&
266       d1 / params->dsieve / 11. * cost >
267       9. * params->S * log (11. * params->dsieve * d2) / 2.)
268     {
269       params->dsieve *= 11;
270       params->nr *= 10;
271     }
272 #endif
273 
274   params->size_fd = params->nr * (params->S + 1);
275   params->next = 0;
276   params->rsieve = 1;
277 }
278 
279 double
memory_use(unsigned long dF,unsigned int sp_num,unsigned int Ftreelvl,mpmod_t modulus)280 memory_use (unsigned long dF, unsigned int sp_num, unsigned int Ftreelvl,
281             mpmod_t modulus)
282 {
283   double mem;
284 
285   /* printf ("memory_use (%lu, %d, %d, )\n", dF, sp_num, Ftreelvl); */
286 
287   mem = 9.0; /* F:1, T:3*2, invF:1, G:1 */
288   mem += (double) Ftreelvl;
289   mem *= (double) dF;
290   mem += 2. * list_mul_mem (dF); /* Also in T */
291   /* estimated memory for list_mult_n /
292      wrap-case in PrerevertDivision respectively */
293   mem += (24.0 + 1.0) * (double) (sp_num ? MIN(MUL_NTT_THRESHOLD, dF) : dF);
294   mem *= (double) (mpz_size (modulus->orig_modulus)) * sizeof (mp_limb_t)
295          + sizeof (mpz_t);
296 
297   if (sp_num)
298     mem += /* peak malloc in ecm_ntt.c */
299          (4.0 * dF * sp_num * sizeof (sp_t))
300 
301 	 /* mpzspv_normalise */
302 	 + (MPZSPV_NORMALISE_STRIDE * ((double) sp_num *
303 	 	sizeof (sp_t) + 6.0 * sizeof (sp_t) + sizeof (float)))
304 
305 	 /* sp_F, sp_invF */
306 	 + ((1.0 + 2.0) * dF * sp_num * sizeof (sp_t));
307 
308   return mem;
309 }
310 
311 /* Input:  X is the point at end of stage 1
312            n is the number to factor
313            B2min-B2 is the stage 2 range (we consider B2min is done)
314            k0 is the number of blocks (if 0, use default)
315            S is the exponent for Brent-Suyama's extension
316            invtrick is non-zero iff one uses x+1/x instead of x.
317            Cf "Speeding the Pollard and Elliptic Curve Methods
318                of Factorization", Peter Montgomery, Math. of Comp., 1987,
319                page 257: using x^(i^e)+1/x^(i^e) instead of x^(i^(2e))
320                reduces the cost of Brent-Suyama's extension from 2*e
321                to e+3 multiplications per value of i.
322    Output: f is the factor found
323    Return value: 2 (step number) iff a factor was found,
324                  or ECM_ERROR if an error occurred.
325 */
326 int
stage2(mpz_t f,void * X,mpmod_t modulus,unsigned long dF,unsigned long k,root_params_t * root_params,int use_ntt,char * TreeFilename,int (* stop_asap)(void))327 stage2 (mpz_t f, void *X, mpmod_t modulus, unsigned long dF, unsigned long k,
328         root_params_t *root_params, int use_ntt, char *TreeFilename,
329         int (*stop_asap)(void))
330 {
331   unsigned long i, sizeT;
332   mpz_t n;
333   listz_t F, G, H, T;
334   int youpi = ECM_NO_FACTOR_FOUND;
335   long st, st0;
336   void *rootsG_state = NULL;
337   listz_t *Tree = NULL; /* stores the product tree for F */
338   unsigned int lgk; /* ceil(log(k)/log(2)) */
339   listz_t invF = NULL;
340   double mem;
341   mpzspm_t mpzspm = NULL;
342   mpzspv_t sp_F = NULL, sp_invF = NULL;
343 
344   /* check alloc. size of f */
345   mpres_realloc (f, modulus);
346 
347   st0 = cputime ();
348 
349   Fermat = 0;
350   if (modulus->repr == ECM_MOD_BASE2 && modulus->Fermat > 0)
351     {
352       Fermat = modulus->Fermat;
353       use_ntt = 0; /* don't use NTT for Fermat numbers */
354     }
355 
356   if (use_ntt)
357     {
358       mpzspm = mpzspm_init (2 * dF, modulus->orig_modulus);
359       ASSERT_ALWAYS(mpzspm != NULL);
360 
361       outputf (OUTPUT_VERBOSE,
362 	  "Using %u small primes for NTT\n", mpzspm->sp_num);
363     }
364 
365   lgk = ceil_log2 (dF);
366 
367   mem = memory_use (dF, use_ntt ? mpzspm->sp_num : 0,
368       (TreeFilename == NULL) ? lgk : 0, modulus);
369 
370   /* we want at least two significant digits */
371   if (mem < 1048576.0)
372     outputf (OUTPUT_VERBOSE, "Estimated memory usage: %1.0fKB\n", mem / 1024.);
373   else if (mem < 1073741824.0)
374     outputf (OUTPUT_VERBOSE, "Estimated memory usage: %1.2fMB\n",
375              mem / 1048576.);
376   else
377     outputf (OUTPUT_VERBOSE, "Estimated memory usage: %1.2fGB\n",
378              mem / 1073741824.);
379 
380   F = init_list2 (dF + 1, mpz_sizeinbase (modulus->orig_modulus, 2) +
381                           3 * GMP_NUMB_BITS);
382   ASSERT_ALWAYS(F != NULL);
383 
384   sizeT = 3 * dF + list_mul_mem (dF);
385   if (dF > 3)
386     sizeT += dF;
387   T = init_list2 (sizeT, 2 * mpz_sizeinbase (modulus->orig_modulus, 2) +
388                          3 * GMP_NUMB_BITS);
389   ASSERT_ALWAYS(T != NULL);
390   H = T;
391 
392   /* needs dF+1 cells in T */
393   youpi = ecm_rootsF (f, F, root_params, dF, (curve*) X, modulus);
394 
395   if (youpi != ECM_NO_FACTOR_FOUND)
396     {
397       if (youpi != ECM_ERROR)
398 	youpi = ECM_FACTOR_FOUND_STEP2;
399       goto clear_T;
400     }
401   if (stop_asap != NULL && (*stop_asap)())
402     goto clear_T;
403 
404   if (test_verbose (OUTPUT_TRACE))
405     {
406       unsigned long j;
407       for (j = 0; j < dF; j++)
408 	outputf (OUTPUT_TRACE, "f_%lu = %Zd\n", j, F[j]);
409     }
410 
411   /* ----------------------------------------------
412      |   F    |  invF  |   G   |         T        |
413      ----------------------------------------------
414      | rootsF |  ???   |  ???  |      ???         |
415      ---------------------------------------------- */
416 
417   if (TreeFilename == NULL)
418     {
419       Tree = (listz_t*) malloc (lgk * sizeof (listz_t));
420       ASSERT_ALWAYS(Tree != NULL);
421       for (i = 0; i < lgk; i++)
422         {
423           Tree[i] = init_list2 (dF, mpz_sizeinbase (modulus->orig_modulus, 2)
424                                     + GMP_NUMB_BITS);
425           ASSERT_ALWAYS(Tree[i] != NULL);
426         }
427     }
428   else
429     Tree = NULL;
430 
431 #ifdef TELLEGEN_DEBUG
432   outputf (OUTPUT_ALWAYS, "Roots = ");
433   print_list (os, F, dF);
434 #endif
435   mpz_init_set (n, modulus->orig_modulus);
436   st = cputime ();
437   if (TreeFilename != NULL)
438     {
439       FILE *TreeFile;
440       char *fullname = (char *) malloc (strlen (TreeFilename) + 1 + 2 + 1);
441       int ret;
442       ASSERT_ALWAYS(fullname != NULL);
443 
444       for (i = lgk; i > 0; i--)
445         {
446           if (stop_asap != NULL && (*stop_asap)())
447             goto free_Tree_i;
448           sprintf (fullname, "%s.%lu", TreeFilename, i - 1);
449 
450 	  TreeFile = fopen (fullname, "wb");
451           if (TreeFile == NULL)
452             {
453               outputf (OUTPUT_ERROR,
454                        "Error opening file for product tree of F\n");
455               youpi = ECM_ERROR;
456               goto free_Tree_i;
457             }
458 
459           ret = (use_ntt) ? ntt_PolyFromRoots_Tree (F, F, dF, T, i - 1,
460                                                     mpzspm, NULL, TreeFile)
461             : PolyFromRoots_Tree (F, F, dF, T, i - 1, n, NULL, TreeFile, 0);
462 	  if (ret == ECM_ERROR)
463 	    {
464               fclose (TreeFile);
465               youpi = ECM_ERROR;
466               goto free_Tree_i;
467             }
468           fclose (TreeFile);
469         }
470       free (fullname);
471     }
472   else
473     {
474       /* TODO: how to check for stop_asap() here? */
475       if (use_ntt)
476         ntt_PolyFromRoots_Tree (F, F, dF, T, -1, mpzspm, Tree, NULL);
477       else
478 	PolyFromRoots_Tree (F, F, dF, T, -1, n, Tree, NULL, 0);
479     }
480 
481 
482   if (test_verbose (OUTPUT_TRACE))
483     {
484       unsigned long j;
485       for (j = 0; j < dF; j++)
486 	outputf (OUTPUT_TRACE, "F[%lu] = %Zd\n", j, F[j]);
487     }
488   outputf (OUTPUT_VERBOSE, "Building F from its roots took %ldms\n",
489            elltime (st, cputime ()));
490 
491   if (stop_asap != NULL && (*stop_asap)())
492     goto free_Tree_i;
493 
494 
495   /* needs dF+list_mul_mem(dF/2) cells in T */
496 
497   mpz_set_ui (F[dF], 1); /* the leading monic coefficient needs to be stored
498                              explicitly for PrerevertDivision */
499 
500   /* ----------------------------------------------
501      |   F    |  invF  |   G   |         T        |
502      ----------------------------------------------
503      |  F(x)  |  ???   |  ???  |      ???         |
504      ---------------------------------------------- */
505 
506   /* G*H has degree 2*dF-2, hence we must cancel dF-1 coefficients
507      to get degree dF-1 */
508   if (dF > 1)
509     {
510       /* only dF-1 coefficients of 1/F are needed to reduce G*H,
511          but we need one more for TUpTree */
512       invF = init_list2 (dF + 1, mpz_sizeinbase (modulus->orig_modulus, 2) +
513                                  2 * GMP_NUMB_BITS);
514       ASSERT_ALWAYS(invF != NULL);
515       st = cputime ();
516 
517       if (use_ntt)
518         {
519 	  sp_F = mpzspv_init (dF, mpzspm);
520 	  mpzspv_from_mpzv (sp_F, 0, F, dF, mpzspm);
521 	  mpzspv_to_ntt (sp_F, 0, dF, dF, 1, mpzspm);
522 
523 	  ntt_PolyInvert (invF, F + 1, dF, T, mpzspm);
524 	  sp_invF = mpzspv_init (2 * dF, mpzspm);
525 	  mpzspv_from_mpzv (sp_invF, 0, invF, dF, mpzspm);
526 	  mpzspv_to_ntt (sp_invF, 0, dF, 2 * dF, 0, mpzspm);
527 	}
528       else
529         PolyInvert (invF, F + 1, dF, T, n);
530 
531       /* now invF[0..dF-1] = Quo(x^(2dF-1), F) */
532       outputf (OUTPUT_VERBOSE, "Computing 1/F took %ldms\n",
533 	       elltime (st, cputime ()));
534 
535       /* ----------------------------------------------
536          |   F    |  invF  |   G   |         T        |
537          ----------------------------------------------
538          |  F(x)  | 1/F(x) |  ???  |      ???         |
539          ---------------------------------------------- */
540     }
541 
542   if (stop_asap != NULL && (*stop_asap)())
543     goto clear_invF;
544 
545   /* start computing G with dF roots.
546 
547      In the non CM case, roots are at i0*d, (i0+1)*d, (i0+2)*d, ...
548      where i0*d <= B2min < (i0+1)*d .
549   */
550   G = init_list2 (dF, mpz_sizeinbase (modulus->orig_modulus, 2) +
551                       3 * GMP_NUMB_BITS);
552   ASSERT_ALWAYS(G != NULL);
553 
554   st = cputime ();
555   rootsG_state = ecm_rootsG_init (f, (curve *) X, root_params, dF, k,
556                                   modulus);
557 
558   /* rootsG_state=NULL if an error occurred or (ecm only) a factor was found */
559   if (rootsG_state == NULL)
560     {
561       /* ecm: f = -1 if an error occurred */
562       youpi = (mpz_cmp_si (f, -1)) ? ECM_FACTOR_FOUND_STEP2 : ECM_ERROR;
563       goto clear_G;
564     }
565 
566   if (stop_asap != NULL && (*stop_asap)())
567     goto clear_fd;
568 
569   for (i = 0; i < k; i++)
570     {
571       /* needs dF+1 cells in T+dF */
572 	youpi = ecm_rootsG (f, G, dF, (ecm_roots_state_t *) rootsG_state,
573 			      modulus);
574 
575       if (test_verbose (OUTPUT_TRACE))
576 	{
577 	  unsigned long j;
578 	  for (j = 0; j < dF; j++)
579 	    outputf (OUTPUT_TRACE, "g_%lu = %Zd\n", j, G[j]);
580 	}
581 
582       ASSERT(youpi != ECM_ERROR); /* xxx_rootsG cannot fail */
583       if (youpi) /* factor found */
584         {
585           youpi = ECM_FACTOR_FOUND_STEP2;
586           goto clear_fd;
587         }
588 
589     if (stop_asap != NULL && (*stop_asap)())
590       goto clear_fd;
591 
592   /* -----------------------------------------------
593      |   F    |  invF  |   G    |         T        |
594      -----------------------------------------------
595      |  F(x)  | 1/F(x) | rootsG |      ???         |
596      ----------------------------------------------- */
597 
598       st = cputime ();
599 
600       if (use_ntt)
601         ntt_PolyFromRoots (G, G, dF, T + dF, mpzspm);
602       else
603         PolyFromRoots (G, G, dF, T + dF, n);
604 
605       if (test_verbose (OUTPUT_TRACE))
606 	{
607 	  unsigned long j;
608 	  outputf (OUTPUT_TRACE, "G(x) = x^%lu ", dF);
609 	  for (j = 0; j < dF; j++)
610 	    outputf (OUTPUT_TRACE, "+ (%Zd * x^%lu)", G[j], j);
611 	  outputf (OUTPUT_TRACE, "\n");
612 	}
613 
614       /* needs 2*dF+list_mul_mem(dF/2) cells in T */
615       outputf (OUTPUT_VERBOSE, "Building G from its roots took %ldms\n",
616                elltime (st, cputime ()));
617 
618     if (stop_asap != NULL && (*stop_asap)())
619       goto clear_fd;
620 
621   /* -----------------------------------------------
622      |   F    |  invF  |   G    |         T        |
623      -----------------------------------------------
624      |  F(x)  | 1/F(x) |  G(x)  |      ???         |
625      ----------------------------------------------- */
626 
627       if (i == 0)
628         {
629           list_sub (H, G, F, dF); /* coefficients 1 of degree cancel,
630                                      thus T is of degree < dF */
631           list_mod (H, H, dF, n);
632           /* ------------------------------------------------
633              |   F    |  invF  |    G    |         T        |
634              ------------------------------------------------
635              |  F(x)  | 1/F(x) |  ???    |G(x)-F(x)|  ???   |
636              ------------------------------------------------ */
637         }
638       else
639 	{
640           /* since F and G are monic of same degree, G mod F = G - F */
641           list_sub (G, G, F, dF);
642           list_mod (G, G, dF, n);
643 
644           /* ------------------------------------------------
645              |   F    |  invF  |    G    |         T        |
646              ------------------------------------------------
647              |  F(x)  | 1/F(x) |G(x)-F(x)|  H(x)  |         |
648              ------------------------------------------------ */
649 
650 	  st = cputime ();
651 	  /* previous G mod F is in H, with degree < dF, i.e. dF coefficients:
652 	     requires 3dF-1+list_mul_mem(dF) cells in T */
653           if (use_ntt)
654 	    {
655 	      ntt_mul (T + dF, G, H, dF, T + 3 * dF, 0, mpzspm);
656 	      list_mod (H, T + dF, 2 * dF, n);
657 	    }
658 	  else
659 	    list_mulmod (H, T + dF, G, H, dF, T + 3 * dF, n);
660 
661           outputf (OUTPUT_VERBOSE, "Computing G * H took %ldms\n",
662                    elltime (st, cputime ()));
663 
664           if (stop_asap != NULL && (*stop_asap)())
665             goto clear_fd;
666 
667           /* ------------------------------------------------
668              |   F    |  invF  |    G    |         T        |
669              ------------------------------------------------
670              |  F(x)  | 1/F(x) |G(x)-F(x)| G * H  |         |
671              ------------------------------------------------ */
672 
673 	  st = cputime ();
674 
675           if (use_ntt)
676 	    {
677 	      ntt_PrerevertDivision (H, F, invF + 1, sp_F, sp_invF, dF,
678 		  T + 2 * dF, mpzspm);
679 	    }
680 	  else
681 	    {
682 	      if (PrerevertDivision (H, F, invF + 1, dF, T + 2 * dF, n))
683 	        {
684 	          youpi = ECM_ERROR;
685 	          goto clear_fd;
686 	        }
687 	    }
688 
689 	  outputf (OUTPUT_VERBOSE, "Reducing  G * H mod F took %ldms\n",
690                    elltime (st, cputime ()));
691 
692           if (stop_asap != NULL && (*stop_asap)())
693             goto clear_fd;
694 	}
695     }
696 
697   clear_list (F, dF + 1);
698   F = NULL;
699   clear_list (G, dF);
700   G = NULL;
701   st = cputime ();
702   if (use_ntt)
703     youpi = ntt_polyevalT (T, dF, Tree, T + dF + 1, sp_invF,
704 	mpzspm, TreeFilename);
705   else
706     youpi = polyeval_tellegen (T, dF, Tree, T + dF + 1, sizeT - dF - 1, invF,
707 	n, TreeFilename);
708 
709   if (youpi)
710     {
711       outputf (OUTPUT_ERROR, "Error, not enough memory\n");
712       goto clear_fd;
713     }
714 
715   if (test_verbose (OUTPUT_TRACE))
716     {
717       unsigned long j;
718       for (j = 0; j < dF; j++)
719 	outputf (OUTPUT_TRACE, "G(x_%lu) = %Zd\n", j, T[j]);
720     }
721 
722   outputf (OUTPUT_VERBOSE, "Computing polyeval(F,G) took %ldms\n",
723            elltime (st, cputime ()));
724 
725   st = cputime ();
726   list_mulup (T, dF, n, T[dF]);
727   outputf (OUTPUT_VERBOSE, "Computing product of all F(g_i) took %ldms\n",
728            elltime (st, cputime ()));
729 
730   mpz_gcd (f, T[dF - 1], n);
731   if (mpz_cmp_ui (f, 1) > 0)
732     youpi = ECM_FACTOR_FOUND_STEP2;
733   else
734     /* Here, mpz_cmp_ui (f, 1) == 0, i.e. no factor was found */
735     outputf (OUTPUT_RESVERBOSE, "Product of G(f_i) = %Zd\n", T[0]);
736 
737 clear_fd:
738   ecm_rootsG_clear ((ecm_roots_state_t *) rootsG_state, modulus);
739 
740 clear_G:
741   clear_list (G, dF);
742 clear_invF:
743   clear_list (invF, dF + 1);
744 
745   if (use_ntt)
746     {
747       mpzspv_clear (sp_F, mpzspm);
748       mpzspv_clear (sp_invF, mpzspm);
749     }
750 free_Tree_i:
751   if (Tree != NULL)
752     {
753       for (i = 0; i < lgk; i++)
754         clear_list (Tree[i], dF);
755       free (Tree);
756     }
757   /* the trees are already cleared by ntt_polyevalT or polyeval_tellegen */
758   mpz_clear (n);
759 
760 clear_T:
761   clear_list (T, sizeT);
762   clear_list (F, dF + 1);
763 
764   if (use_ntt)
765     mpzspm_clear (mpzspm);
766 
767   if (Fermat)
768     F_clear ();
769 
770 
771   if (stop_asap == NULL || !(*stop_asap)())
772     {
773       st0 = elltime (st0, cputime ());
774       outputf (OUTPUT_NORMAL, "Step 2 took %ldms\n", st0);
775     }
776 
777   return youpi;
778 }
779