1 /* Pollard 'P-1' algorithm.
2 
3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
4 2012, Paul Zimmermann and Alexander Kruppa.
5 
6 This file is part of the ECM Library.
7 
8 The ECM Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The ECM Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the ECM Library; see the file COPYING.LIB.  If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include <math.h>
24 #include <stdlib.h>
25 #include "ecm-impl.h"
26 #include "getprime_r.h"
27 
28 #define CASCADE_THRES 3
29 #define CASCADE_MAX 50000000.0
30 
31 typedef struct {
32   unsigned int size;
33   mpz_t *val;
34 } mul_casc;
35 
36 /******************************************************************************
37 *                                                                             *
38 *                                  Stage 1                                    *
39 *                                                                             *
40 ******************************************************************************/
41 
42 /* prime powers are accumulated up to about n^L1 */
43 #define L1 16
44 
45 /*** Cascaded multiply ***/
46 
47 /* return NULL if an error occurred */
48 static mul_casc *
mulcascade_init(void)49 mulcascade_init (void)
50 {
51   mul_casc *t;
52 
53   t = (mul_casc *) malloc (sizeof (mul_casc));
54   ASSERT_ALWAYS(t != NULL);
55   t->val = (mpz_t*) malloc (sizeof (mpz_t));
56   ASSERT_ALWAYS(t->val != NULL);
57   mpz_init (t->val[0]);
58   t->size = 1;
59   return t;
60 }
61 
62 static void
mulcascade_free(mul_casc * c)63 mulcascade_free (mul_casc *c)
64 {
65   unsigned int i;
66 
67   for (i = 0; i < c->size; i++)
68     mpz_clear (c->val[i]);
69   free (c->val);
70   free (c);
71 }
72 
73 static void
mulcascade_mul_d(mul_casc * c,const double n,ATTRIBUTE_UNUSED mpz_t t)74 mulcascade_mul_d (mul_casc *c, const double n, ATTRIBUTE_UNUSED mpz_t t)
75 {
76   unsigned int i;
77 
78   if (mpz_sgn (c->val[0]) == 0)
79     {
80       mpz_set_d (c->val[0], n);
81       return;
82     }
83 
84   mpz_mul_d (c->val[0], c->val[0], n, t);
85   if (mpz_size (c->val[0]) <= CASCADE_THRES)
86     return;
87 
88   for (i = 1; i < c->size; i++)
89     {
90       if (mpz_sgn (c->val[i]) == 0)
91         {
92           mpz_set (c->val[i], c->val[i-1]);
93           mpz_set_ui (c->val[i-1], 0);
94           return;
95         }
96       else
97 	{
98           mpz_mul (c->val[i], c->val[i], c->val[i-1]);
99           mpz_set_ui (c->val[i-1], 0);
100         }
101     }
102 
103   /* Allocate more space for cascade */
104 
105   i = c->size++;
106   c->val = (mpz_t*) realloc (c->val, c->size * sizeof (mpz_t));
107   ASSERT_ALWAYS(c->val != NULL);
108   mpz_init (c->val[i]);
109   mpz_swap (c->val[i], c->val[i-1]);
110 }
111 
112 /* initialize c to n (assumes c is empty) */
113 static void
mulcascade_set(mul_casc * c,mpz_t n)114 mulcascade_set (mul_casc *c, mpz_t n)
115 {
116   ASSERT(mpz_sgn (c->val[0]) == 0);
117 
118   mpz_set (c->val[0], n);
119 }
120 
121 static void
mulcascade_get_z(mpz_t r,mul_casc * c)122 mulcascade_get_z (mpz_t r, mul_casc *c)
123 {
124   unsigned int i;
125 
126   ASSERT(c->size != 0);
127 
128   mpz_set_ui (r, 1);
129 
130   for (i = 0; i < c->size; i++)
131     if (mpz_sgn (c->val[i]) != 0)
132       mpz_mul (r, r, c->val[i]);
133 }
134 
135 
136 /* Input:  a is the generator (sigma)
137            n is the number to factor
138            B1 is the stage 1 bound
139 	   B1done: stage 1 was already done up to that limit
140 	   go is the group order to preload
141    Output: f is the factor found, a is the value at end of stage 1
142 	   B1done is set to B1 if stage 1 completed normally,
143 	   or to the largest prime processed if interrupted, but never
144 	   to a smaller value than B1done was upon function entry.
145    Return value: non-zero iff a factor was found (or an error occurred).
146 */
147 
148 static int
pm1_stage1(mpz_t f,mpres_t a,mpmod_t n,double B1,double * B1done,mpz_t go,int (* stop_asap)(void),char * chkfilename)149 pm1_stage1 (mpz_t f, mpres_t a, mpmod_t n, double B1, double *B1done,
150             mpz_t go, int (*stop_asap)(void), char *chkfilename)
151 {
152   double p, q, r, cascade_limit, last_chkpnt_p;
153   mpz_t g, d;
154   int youpi = ECM_NO_FACTOR_FOUND;
155   unsigned int size_n, max_size;
156   unsigned int smallbase = 0;
157   mul_casc *cascade;
158   long last_chkpnt_time;
159   const double B0 = sqrt (B1);
160   prime_info_t prime_info;
161 
162   mpz_init (g);
163   mpz_init (d);
164 
165   size_n = mpz_sizeinbase (n->orig_modulus, 2);
166   max_size = L1 * size_n;
167 
168   mpres_get_z (g, a, n);
169   if (mpz_fits_uint_p (g))
170     smallbase = mpz_get_ui (g);
171 
172   mpz_set_ui (g, 1);
173 
174   /* Set a limit of roughly 10000 * log_10(N) for the primes that are
175      multiplied up in the exponent, i.e. 1M for a 100 digit number,
176      but limit to CASCADE_MAX to avoid problems with stack allocation */
177 
178   cascade_limit = 3000.0 * (double) size_n;
179 
180   if (cascade_limit > CASCADE_MAX)
181     cascade_limit = CASCADE_MAX;
182 
183   if (cascade_limit > B1)
184     cascade_limit = B1;
185 
186   cascade = mulcascade_init ();
187   if (cascade == NULL)
188     {
189       youpi = ECM_ERROR;
190       goto clear_pm1_stage1;
191     }
192 
193   /* since B0 = sqrt(B1), we can have B0 > cascade_limit only when
194      B1 > cascade_limit^2. This cannot happen when cascade_limit=B1,
195      thus we need B1 > min(CASCADE_MAX, 3000*sizeinbase(n,2))^2.
196      For sizeinbase(n,2) <= CASCADE_MAX/3000 (less than 5017 digits
197      for CASCADE_MAX=5e7) this means B1 > 9e6*sizeinbase(n,2)^2.
198      For sizeinbase(n,2) > CASCADE_MAX/3000, this means B1 > CASCADE_MAX^2,
199      i.e. B1 > 25e14 for CASCADE_MAX=5e7.
200   */
201 
202   /* if the user knows that P-1 has a given divisor, he can supply it */
203   if (mpz_cmp_ui (go, 1) > 0)
204     mulcascade_set (cascade, go);
205 
206   last_chkpnt_time = cputime ();
207   last_chkpnt_p = 2.;
208 
209   /* Fill the multiplication cascade with the product of small stage 1
210      primes */
211   /* Add small primes <= MIN(sqrt(B1), cascade_limit) in the appropriate
212      power to the cascade */
213   prime_info_init (prime_info);
214   for (p = 2.; p <= MIN(B0, cascade_limit); p = (double) getprime_mt (prime_info))
215     {
216       for (q = 1., r = p; r <= B1; r *= p)
217         if (r > *B1done) q *= p;
218       mulcascade_mul_d (cascade, q, d);
219     }
220 
221   /* If B0 < cascade_limit, we can add some primes > sqrt(B1) with
222      exponent 1 to the cascade */
223   for ( ; p <= cascade_limit; p = (double) getprime_mt (prime_info))
224     if (p > *B1done)
225       mulcascade_mul_d (cascade, p, d);
226 
227   /* Now p > cascade_limit, flush cascade and exponentiate */
228   mulcascade_get_z (g, cascade);
229   mulcascade_free (cascade);
230   outputf (OUTPUT_DEVVERBOSE, "Exponent has %u bits\n",
231            mpz_sizeinbase (g, 2));
232 
233   if (smallbase)
234     {
235       outputf (OUTPUT_DEVVERBOSE, "Using mpres_ui_pow, base %u\n", smallbase);
236       mpres_ui_pow (a, smallbase, g, n);
237     }
238   else
239     {
240       mpres_pow (a, a, g, n);
241     }
242   mpz_set_ui (g, 1);
243 
244   /* If B0 > cascade_limit, we need to process the primes
245      cascade_limit < p < B0 in the appropriate exponent yet */
246   for ( ; p <= B0; p = (double) getprime_mt (prime_info))
247     {
248       for (q = 1, r = p; r <= B1; r *= p)
249         if (r > *B1done) q *= p;
250       mpz_mul_d (g, g, q, d);
251       if (mpz_sizeinbase (g, 2) >= max_size)
252         {
253           mpres_pow (a, a, g, n);
254           mpz_set_ui (g, 1);
255         if (stop_asap != NULL && (*stop_asap) ())
256           {
257             outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p);
258             if (p > *B1done)
259               *B1done = p;
260             goto clear_pm1_stage1;
261           }
262         }
263     }
264 
265   /* All primes sqrt(B1) < p <= B1 appear with exponent 1. All primes <= B1done
266      are already included with exponent at least 1, so it's safe to skip
267      ahead to B1done+1. */
268 
269   while (p <= *B1done)
270     p = (double) getprime_mt (prime_info);
271 
272   /* then remaining primes > max(sqrt(B1), cascade_limit) and taken
273      with exponent 1 */
274   for (; p <= B1; p = (double) getprime_mt (prime_info))
275   {
276     mpz_mul_d (g, g, p, d);
277     if (mpz_sizeinbase (g, 2) >= max_size)
278       {
279         mpres_pow (a, a, g, n);
280         mpz_set_ui (g, 1);
281         if (stop_asap != NULL && (*stop_asap) ())
282           {
283             outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p);
284              if (p > *B1done)
285               *B1done = p;
286             goto clear_pm1_stage1;
287           }
288         if (chkfilename != NULL && p > last_chkpnt_p + 10000. &&
289             elltime (last_chkpnt_time, cputime ()) > CHKPNT_PERIOD)
290           {
291 	    writechkfile (chkfilename, ECM_PM1, p, n, NULL, a, NULL, NULL);
292             last_chkpnt_p = p;
293             last_chkpnt_time = cputime ();
294           }
295       }
296   }
297 
298   mpres_pow (a, a, g, n);
299 
300   /* If stage 1 finished normally, p is the smallest prime >B1 here.
301      In that case, set to B1 */
302   if (p > B1)
303       p = B1;
304 
305   if (p > *B1done)
306     *B1done = p;
307 
308   mpres_sub_ui (a, a, 1, n);
309   mpres_gcd (f, a, n);
310   if (mpz_cmp_ui (f, 1) > 0)
311     youpi = ECM_FACTOR_FOUND_STEP1;
312   mpres_add_ui (a, a, 1, n);
313 
314  clear_pm1_stage1:
315   if (chkfilename != NULL)
316     writechkfile (chkfilename, ECM_PM1, *B1done, n, NULL, a, NULL, NULL);
317   prime_info_clear (prime_info); /* free the prime table */
318   mpz_clear (d);
319   mpz_clear (g);
320 
321   return youpi;
322 }
323 
324 
325 static void
print_prob(double B1,const mpz_t B2,unsigned long dF,unsigned long k,int S,const mpz_t go)326 print_prob (double B1, const mpz_t B2, unsigned long dF, unsigned long k,
327             int S, const mpz_t go)
328 {
329   double prob;
330   int i;
331   char sep;
332 
333   outputf (OUTPUT_VERBOSE, "Probability of finding a factor of n digits:\n");
334   outputf (OUTPUT_VERBOSE, "20\t25\t30\t35\t40\t45\t50\t55\t60\t65\n");
335   for (i = 20; i <= 65; i += 5)
336     {
337       sep = (i < 65) ? '\t' : '\n';
338       prob = pm1prob (B1, mpz_get_d (B2),
339                       pow (10., i - .5), (double) dF * dF * k, S, go);
340       outputf (OUTPUT_VERBOSE, "%.2g%c", prob, sep);
341     }
342 }
343 
344 
345 
346 /******************************************************************************
347 *                                                                             *
348 *                                Pollard P-1                                  *
349 *                                                                             *
350 ******************************************************************************/
351 
352 /* Input: p is the initial generator (sigma), if 0, generate it at random.
353           N is the number to factor
354 	  B1 is the stage 1 bound
355 	  B2 is the stage 2 bound
356 	  B1done is the stage 1 limit to which supplied residue has
357 	    already been computed
358           k is the number of blocks for stage 2
359           verbose is the verbosity level
360    Output: f is the factor found, p is the residue at end of stage 1
361    Return value: non-zero iff a factor is found (1 for stage 1, 2 for stage 2)
362 */
363 int
pm1(mpz_t f,mpz_t p,mpz_t N,mpz_t go,double * B1done,double B1,mpz_t B2min_parm,mpz_t B2_parm,unsigned long k,int verbose,int repr,int use_ntt,FILE * os,FILE * es,char * chkfilename,char * TreeFilename,double maxmem,gmp_randstate_t rng,int (* stop_asap)(void))364 pm1 (mpz_t f, mpz_t p, mpz_t N, mpz_t go, double *B1done, double B1,
365      mpz_t B2min_parm, mpz_t B2_parm, unsigned long k,
366      int verbose, int repr, int use_ntt, FILE *os, FILE *es,
367      char *chkfilename, char *TreeFilename, double maxmem,
368      gmp_randstate_t rng, int (*stop_asap)(void))
369 {
370   int youpi = ECM_NO_FACTOR_FOUND;
371   long st;
372   mpmod_t modulus;
373   mpres_t x;
374   mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */
375   faststage2_param_t params;
376 
377   set_verbose (verbose);
378   ECM_STDOUT = (os == NULL) ? stdout : os;
379   ECM_STDERR = (es == NULL) ? stdout : es;
380 
381   /* if n is even, return 2 */
382   if (mpz_divisible_2exp_p (N, 1))
383     {
384       mpz_set_ui (f, 2);
385       return ECM_FACTOR_FOUND_STEP1;
386     }
387 
388   st = cputime ();
389 
390   if (mpz_cmp_ui (p, 0) == 0)
391     pm1_random_seed (p, N, rng);
392 
393   mpz_init_set (B2min, B2min_parm);
394   mpz_init_set (B2, B2_parm);
395 
396   /* Set default B2. See ecm.c for comments */
397   if (ECM_IS_DEFAULT_B2(B2))
398     mpz_set_d (B2, pow (B1 * PM1FS2_COST, PM1FS2_DEFAULT_B2_EXPONENT));
399 
400   /* set B2min */
401   if (mpz_sgn (B2min) < 0)
402     mpz_set_d (B2min, B1);
403 
404   /* choice of modular arithmetic: if default choice, choose mpzmod which
405      is always faster, since mpz_powm uses base-k sliding window exponentiation
406      and mpres_pow does not */
407   if (repr == ECM_MOD_DEFAULT && isbase2 (N, BASE2_THRESHOLD) == 0)
408     mpmod_init (modulus, N, ECM_MOD_MPZ);
409   else
410     mpmod_init (modulus, N, repr);
411 
412   /* Determine parameters (polynomial degree etc.) */
413 
414     {
415       long P_ntt, P_nontt;
416       const unsigned long lmax = 1UL << 30; /* An upper bound */
417       unsigned long lmax_NTT, lmax_noNTT;
418       faststage2_param_t params_ntt, params_nontt, *better_params;
419       mpz_t effB2min_ntt, effB2_ntt, effB2min_nontt, effB2_nontt;
420 
421       mpz_init (params.m_1);
422       params.l = 0;
423       mpz_init (params_ntt.m_1);
424       params_ntt.l = 0;
425       mpz_init (params_nontt.m_1);
426       params_nontt.l = 0;
427       mpz_init (effB2min_ntt);
428       mpz_init (effB2_ntt);
429       mpz_init (effB2min_nontt);
430       mpz_init (effB2_nontt);
431 
432       /* Find out what the longest transform length is we can do at all.
433 	 If no maxmem is given, the non-NTT can theoretically do any length. */
434 
435       lmax_NTT = 0;
436       if (use_ntt)
437 	{
438 	  unsigned long t;
439 	  /* See what transform length the NTT can handle (due to limited
440 	     primes and limited memory) */
441 	  t = mpzspm_max_len (N);
442 	  lmax_NTT = MIN (lmax, t);
443 	  if (maxmem != 0.)
444 	    {
445 	      t = pm1fs2_maxlen (double_to_size (maxmem), N, use_ntt);
446 	      lmax_NTT = MIN (lmax_NTT, t);
447 	    }
448 	  outputf (OUTPUT_DEVVERBOSE, "NTT can handle lmax <= %lu\n", lmax_NTT);
449           P_ntt = choose_P (B2min, B2, lmax_NTT, k, &params_ntt,
450                             effB2min_ntt, effB2_ntt, 1, ECM_PM1);
451           if (P_ntt != ECM_ERROR)
452             outputf (OUTPUT_DEVVERBOSE,
453 	             "Parameters for NTT: P=%lu, l=%lu\n",
454 	             params_ntt.P, params_ntt.l);
455 	}
456       else
457         P_ntt = 0; /* or GCC complains about uninitialized var */
458 
459       /* See what transform length the non-NTT code can handle */
460       lmax_noNTT = lmax;
461       if (maxmem != 0.)
462 	{
463 	  unsigned long t;
464 	  t = pm1fs2_maxlen (double_to_size (maxmem), N, 0);
465 	  lmax_noNTT = MIN (lmax_noNTT, t);
466 	  outputf (OUTPUT_DEVVERBOSE, "non-NTT can handle lmax <= %lu\n",
467 		   lmax_noNTT);
468 	}
469       if (use_ntt != 2)
470         P_nontt = choose_P (B2min, B2, lmax_noNTT, k, &params_nontt,
471                             effB2min_nontt, effB2_nontt, 0, ECM_PM1);
472       else
473         P_nontt = ECM_ERROR;
474       if (P_nontt != ECM_ERROR)
475         outputf (OUTPUT_DEVVERBOSE,
476                  "Parameters for non-NTT: P=%lu, l=%lu\n",
477                  params_nontt.P, params_nontt.l);
478 
479       if (((!use_ntt || P_ntt == ECM_ERROR) && P_nontt == ECM_ERROR) ||
480           (use_ntt == 2 && P_ntt == ECM_ERROR))
481         {
482           outputf (OUTPUT_ERROR,
483                    "Error: cannot choose suitable P value for your stage 2 "
484                    "parameters.\nTry a shorter B2min,B2 interval.\n");
485           mpz_clear (params.m_1);
486           mpz_clear (params_ntt.m_1);
487           mpz_clear (params_nontt.m_1);
488           return ECM_ERROR;
489         }
490 
491       /* Now decide whether to take NTT or non-NTT. Since the non-NTT code
492          uses more memory, we only use it when -no-ntt was given, or when
493          we can't find good parameters for the NTT code. */
494       if (use_ntt == 0 || P_ntt == ECM_ERROR)
495         {
496           better_params = &params_nontt;
497           mpz_set (B2min, effB2min_nontt);
498           mpz_set (B2, effB2_nontt);
499           use_ntt = 0;
500         }
501       else
502         {
503           better_params = &params_ntt;
504           mpz_set (B2min, effB2min_ntt);
505           mpz_set (B2, effB2_ntt);
506           use_ntt = 1;
507         }
508 
509       params.P = better_params->P;
510       params.s_1 = better_params->s_1;
511       params.s_2 = better_params->s_2;
512       params.l = better_params->l;
513       mpz_set (params.m_1, better_params->m_1);
514       params.file_stem = TreeFilename;
515       params.file_stem = TreeFilename;
516 
517       mpz_clear (params_ntt.m_1);
518       mpz_clear (params_nontt.m_1);
519       mpz_clear (effB2min_ntt);
520       mpz_clear (effB2_ntt);
521       mpz_clear (effB2min_nontt);
522       mpz_clear (effB2_nontt);
523 
524       outputf (OUTPUT_VERBOSE, "Using lmax = %lu with%s NTT which takes "
525                "about %luMB of memory\n", params.l,
526                (use_ntt) ? "" : "out",
527                pm1fs2_memory_use (params.l, N, use_ntt) / 1048576);
528     }
529 
530   /* Print B1, B2, polynomial and x0 */
531   print_B1_B2_poly (OUTPUT_NORMAL, ECM_PM1, B1, *B1done, B2min_parm, B2min,
532                     B2, 1, p, 0, 0, NULL, 0, 0);
533 
534   /* If we do a stage 2, print its parameters */
535   if (mpz_cmp (B2, B2min) >= 0)
536     {
537       /* can't mix 64-bit types and mpz_t on win32 for some reason */
538       outputf (OUTPUT_VERBOSE, "P = %" PRId64 ", l = %lu"
539                     ", s_1 = %" PRId64 ", k = s_2 = %" PRId64 ,
540              params.P, params.l,
541              params.s_1,params.s_2);
542       outputf (OUTPUT_VERBOSE, ", m_1 = %Zd\n", params.m_1);
543     }
544 
545   if (test_verbose (OUTPUT_VERBOSE))
546     {
547       if (mpz_sgn (B2min_parm) >= 0)
548         {
549           outputf (OUTPUT_VERBOSE,
550             "Can't compute success probabilities for B1 <> B2min\n");
551         }
552       else
553         {
554           rhoinit (256, 10);
555           print_prob (B1, B2, 0, k, 1, go);
556         }
557     }
558 
559   mpres_init (x, modulus);
560   mpres_set_z (x, p, modulus);
561 
562   st = cputime ();
563 
564   if (B1 > *B1done || mpz_cmp_ui (go, 1) > 0)
565     youpi = pm1_stage1 (f, x, modulus, B1, B1done, go, stop_asap, chkfilename);
566 
567   st = elltime (st, cputime ());
568 
569   outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", st);
570   if (test_verbose (OUTPUT_RESVERBOSE))
571     {
572       mpz_t tx;
573       mpz_init (tx);
574       mpres_get_z (tx, x, modulus);
575       outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", tx);
576       mpz_clear (tx);
577     }
578 
579   if (stop_asap != NULL && (*stop_asap) ())
580     goto clear_and_exit;
581 
582   if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0)
583     {
584       if (use_ntt)
585         youpi = pm1fs2_ntt (f, x, modulus, &params);
586       else
587         youpi = pm1fs2 (f, x, modulus, &params);
588     }
589 
590   if (test_verbose (OUTPUT_VERBOSE))
591     {
592       if (mpz_sgn (B2min_parm) < 0)
593         rhoinit (1, 0); /* Free memory of rhotable */
594     }
595 
596 clear_and_exit:
597   mpres_get_z (p, x, modulus);
598   mpres_clear (x, modulus);
599   mpmod_clear (modulus);
600   mpz_clear (params.m_1);
601   mpz_clear (B2);
602   mpz_clear (B2min);
603 
604   return youpi;
605 }
606