1 /* batch.c - Implement batch mode for step 1 of ECM
2 
3 Copyright 2011, 2012, 2016 Cyril Bouvier, Paul Zimmermann and David Cleaver.
4 
5 This file is part of the ECM Library.
6 
7 The ECM Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11 
12 The ECM Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 License for more details.
16 
17 You should have received a copy of the GNU Lesser General Public License
18 along with the ECM Library; see the file COPYING.LIB.  If not, see
19 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
20 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
21 
22 
23 /* ECM stage 1 in batch mode, for initial point (x:z) with small coordinates,
24    such that x and z fit into a mp_limb_t.
25    For example we can start with (x=2:y=1) with the curve by^2 = x^3 + ax^2 + x
26    with a = 4d-2 and b=16d+2, then we have to multiply by d=(a+2)/4 in the
27    duplicates.
28    With the change of variable x=b*X, y=b*Y, this curve becomes:
29    Y^2 = X^3 + a/b*X^2 + 1/b^2*X.
30 */
31 
32 #include <stdlib.h>
33 #include "ecm-impl.h"
34 #include "getprime_r.h"
35 
36 #define MAX_HEIGHT 32
37 
38 #if ECM_UINT_MAX == 4294967295
39 /* On a 32-bit machine, with no access to a 64-bit type,
40     the maximum value that can be returned by mpz_sizeinbase(s,2)
41     is = (2^32-1).  Multiplying all primes up to the following
42     will result in a product that has (2^32-1) bits. */
43 #define MAX_B1_BATCH 2977044736UL
44 #elif defined(_WIN32)
45 /* Due to a limitation in GMP on 64-bit Windows, should also
46     affect 32-bit Windows, sufficient memory cannot be allocated
47     for the batch product s when using primes larger than the following */
48 #define MAX_B1_BATCH 3124253146UL
49 #else
50 /* nth_prime(2^(MAX_HEIGHT-1)) */
51 #define MAX_B1_BATCH 50685770167ULL
52 #endif
53 
54 /* If forbiddenres != NULL, forbiddenres = "m r_1 ... r_k -1" indicating that
55    if p = r_i mod m, then p^2 should be considered instead of p. This has
56    only a sense for CM curves. We assume r_1 < r_2 < ... < r_k.
57    Typical example: "4 3 -1" for curves Y^2 = X^3 + a * X.
58 */
59 void
compute_s(mpz_t s,ecm_uint B1,int * forbiddenres ATTRIBUTE_UNUSED)60 compute_s (mpz_t s, ecm_uint B1, int *forbiddenres ATTRIBUTE_UNUSED)
61 {
62   mpz_t acc[MAX_HEIGHT]; /* To accumulate products of prime powers */
63   mpz_t ppz;
64   unsigned int i, j;
65   ecm_uint pi = 2, pp, maxpp, qi;
66   prime_info_t prime_info;
67 
68   prime_info_init (prime_info);
69 
70   ASSERT_ALWAYS (B1 < MAX_B1_BATCH);
71 
72   for (j = 0; j < MAX_HEIGHT; j++)
73     mpz_init (acc[j]); /* sets acc[j] to 0 */
74   mpz_init (ppz);
75 
76   i = 0;
77   while (pi <= B1)
78     {
79       pp = qi = pi;
80       maxpp = B1 / qi;
81 #ifdef HAVE_ADDLAWS
82       if(forbiddenres != NULL && pi > 2){
83 	  /* non splitting primes can occur in even powers only */
84 	  int rp = (int)(pi % forbiddenres[0]);
85 	  for(j = 1; forbiddenres[j] >= 0; j++)
86 	      if(rp >= forbiddenres[j])
87 		  break;
88 	  if(rp == forbiddenres[j]){
89 	      /*	      printf("p=%lu is forbidden\n", pi);*/
90 	      if(qi <= maxpp){
91 		  /* qi <= B1/qi => qi^2 <= B1, let it go */
92 		  qi *= qi;
93 	      }
94 	      else{
95 		  /* qi is too large, do not increment i */
96 		  pi = getprime_mt (prime_info);
97 		  continue;
98 	      }
99 	  }
100       }
101 #endif
102       while (pp <= maxpp)
103           pp *= qi;
104 
105 #if ECM_UINT_MAX == 4294967295
106           mpz_set_ui (ppz, pp);
107 #else
108           mpz_set_uint64 (ppz, pp);
109 #endif
110 
111       if ((i & 1) == 0)
112           mpz_set (acc[0], ppz);
113       else
114           mpz_mul (acc[0], acc[0], ppz);
115 
116       j = 0;
117       /* We have accumulated i+1 products so far. If bits 0..j of i are all
118          set, then i+1 is a multiple of 2^(j+1). */
119       while ((i & (1 << j)) != 0)
120         {
121           /* we use acc[MAX_HEIGHT-1] as 0-sentinel below, thus we need
122              j+1 < MAX_HEIGHT-1 */
123           ASSERT (j + 1 < MAX_HEIGHT - 1);
124           if ((i & (1 << (j + 1))) == 0) /* i+1 is not multiple of 2^(j+2),
125                                             thus add[j+1] is "empty" */
126             mpz_swap (acc[j+1], acc[j]); /* avoid a copy with mpz_set */
127           else
128             mpz_mul (acc[j+1], acc[j+1], acc[j]); /* accumulate in acc[j+1] */
129           mpz_set_ui (acc[j], 1);
130           j++;
131         }
132 
133       i++;
134       pi = getprime_mt (prime_info);
135     }
136 
137   for (mpz_set (s, acc[0]), j = 1; mpz_cmp_ui (acc[j], 0) != 0; j++)
138     mpz_mul (s, s, acc[j]);
139 
140   prime_info_clear (prime_info); /* free the prime tables */
141 
142   for (i = 0; i < MAX_HEIGHT; i++)
143       mpz_clear (acc[i]);
144   mpz_clear (ppz);
145 }
146 
147 #if 0
148 /* this function is useful in debug mode to print non-normalized residues */
149 static void
150 mpresn_print (mpres_t x, mpmod_t n)
151 {
152   mp_size_t m, xn;
153 
154   xn = SIZ(x);
155   m = ABSIZ(x);
156   MPN_NORMALIZE(PTR(x), m);
157   SIZ(x) = xn >= 0 ? m : -m;
158   gmp_printf ("%Zd\n", x);
159   SIZ(x) = xn;
160 }
161 #endif
162 
163 /* (x1:z1) <- 2(x1:z1)
164    (x2:z2) <- (x1:z1) + (x2:z2)
165    assume (x2:z2) - (x1:z1) = (2:1)
166    Uses 4 modular multiplies and 4 modular squarings.
167    Inputs are x1, z1, x2, z2, d, n.
168    Use two auxiliary variables: t, w (it seems using one only is not possible
169    if all mpresn_mul and mpresn_sqr calls don't overlap input and output).
170 
171    In the batch 1 mode, we pass d_prime such that the actual d is d_prime/beta.
172    Since beta is a square, if d_prime is a square (on 64-bit machines),
173    so is d.
174    In mpresn_mul_1, we multiply by d_prime = beta*d and divide by beta.
175 */
176 static void
dup_add_batch1(mpres_t x1,mpres_t z1,mpres_t x2,mpres_t z2,mpres_t t,mpres_t w,mp_limb_t d_prime,mpmod_t n)177 dup_add_batch1 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2,
178                 mpres_t t, mpres_t w, mp_limb_t d_prime, mpmod_t n)
179 {
180   /* active: x1 z1 x2 z2 */
181   mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */
182   /* active: w z1 x2 z2 */
183   mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */
184   /* active: w z1 x1 x2 */
185 
186   mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */
187   /* active: w z1 x1 z2 */
188   mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */
189   /* active: w z1 x2 z2 */
190   mpresn_sqr (t, z1, n);    /* t = (x1-z1)^2 */
191   /* active: w t x2 z2 */
192   mpresn_sqr (z1, w, n);    /* z1 = (x1+z1)^2 */
193   /* active: z1 t x2 z2 */
194 
195   mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */
196   /* active: x1 z1 t x2 z2 */
197 
198   mpresn_sub (w, z1, t, n);   /* w = (x1+z1)^2 - (x1-z1)^2 */
199   /* active: x1 w t x2 z2 */
200 
201   mpresn_mul_1 (z1, w, d_prime, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */
202   /* active: x1 z1 w t x2 z2 */
203 
204   mpresn_add (t, t, z1, n);  /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */
205   /* active: x1 w t x2 z2 */
206   mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */
207   /* active: x1 z1 x2 z2 */
208 
209   mpresn_addsub (w, z2, x2, z2, n);
210   /* active: x1 z1 w z2 */
211 
212   mpresn_sqr (x2, w, n);
213   /* active: x1 z1 x2 z2 */
214   mpresn_sqr (w, z2, n);
215   /* active: x1 z1 x2 w */
216   mpresn_add (z2, w, w, n);
217 }
218 
219 static void
dup_add_batch2(mpres_t x1,mpres_t z1,mpres_t x2,mpres_t z2,mpres_t t,mpres_t w,mpres_t d,mpmod_t n)220 dup_add_batch2 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2,
221                 mpres_t t, mpres_t w, mpres_t d, mpmod_t n)
222 {
223   /* active: x1 z1 x2 z2 */
224   mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */
225   /* active: w z1 x2 z2 */
226   mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */
227   /* active: w z1 x1 x2 */
228 
229   mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */
230   /* active: w z1 x1 z2 */
231   mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */
232   /* active: w z1 x2 z2 */
233   mpresn_sqr (t, z1, n);    /* t = (x1-z1)^2 */
234   /* active: w t x2 z2 */
235   mpresn_sqr (z1, w, n);    /* z1 = (x1+z1)^2 */
236   /* active: z1 t x2 z2 */
237 
238   mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */
239   /* active: x1 z1 t x2 z2 */
240 
241   mpresn_sub (w, z1, t, n);   /* w = (x1+z1)^2 - (x1-z1)^2 */
242   /* active: x1 w t x2 z2 */
243 
244   mpresn_mul (z1, w, d, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */
245   /* active: x1 z1 w t x2 z2 */
246 
247   mpresn_add (t, t, z1, n);  /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */
248   /* active: x1 w t x2 z2 */
249   mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */
250   /* active: x1 z1 x2 z2 */
251 
252   mpresn_addsub (w, z2, x2, z2, n);
253   /* active: x1 z1 w z2 */
254 
255   mpresn_sqr (x2, w, n);
256   /* active: x1 z1 x2 z2 */
257   mpresn_sqr (w, z2, n);
258   /* active: x1 z1 x2 w */
259   mpresn_add (z2, w, w, n);
260 }
261 
262 
263 /* Input: x is initial point
264           A is curve parameter in Montgomery's form:
265           g*y^2*z = x^3 + a*x^2*z + x*z^2
266           n is the number to factor
267 	  B1 is the stage 1 bound
268    Output: If a factor is found, it is returned in x.
269            Otherwise, x contains the x-coordinate of the point computed
270            in stage 1 (with z coordinate normalized to 1).
271 	   B1done is set to B1 if stage 1 completed normally,
272 	   or to the largest prime processed if interrupted, but never
273 	   to a smaller value than B1done was upon function entry.
274    Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise
275            ECM_NO_FACTOR_FOUND
276 */
277 /*
278 For now we don't take into account go stop_asap and chkfilename
279 */
280 int
ecm_stage1_batch(mpz_t f,mpres_t x,mpres_t A,mpmod_t n,double B1,double * B1done,int batch,mpz_t s)281 ecm_stage1_batch (mpz_t f, mpres_t x, mpres_t A, mpmod_t n, double B1,
282                   double *B1done, int batch, mpz_t s)
283 {
284   mp_limb_t d_1;
285   mpz_t d_2;
286 
287   mpres_t x1, z1, x2, z2;
288   ecm_uint i;
289   mpres_t t, u;
290   int ret = ECM_NO_FACTOR_FOUND;
291 
292   mpres_init (x1, n);
293   mpres_init (z1, n);
294   mpres_init (x2, n);
295   mpres_init (z2, n);
296   mpres_init (t, n);
297   mpres_init (u, n);
298   if (batch == ECM_PARAM_BATCH_2)
299     mpres_init (d_2, n);
300 
301   /* initialize P */
302   mpres_set (x1, x, n);
303   mpres_set_ui (z1, 1, n); /* P1 <- 1P */
304 
305   /* Compute d=(A+2)/4 from A and d'=B*d thus d' = 2^(GMP_NUMB_BITS-2)*(A+2) */
306   if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
307   {
308       mpres_get_z (u, A, n);
309       mpz_add_ui (u, u, 2);
310       mpz_mul_2exp (u, u, GMP_NUMB_BITS - 2);
311       mpres_set_z_for_gcd (u, u, n); /* reduces u mod n */
312       if (mpz_size (u) > 1)
313         {
314           mpres_get_z (u, A, n);
315           outputf (OUTPUT_ERROR,
316                "Error, 2^%d*(A+2) should fit in a mp_limb_t, A=%Zd\n",
317                    GMP_NUMB_BITS - 2, u);
318           return ECM_ERROR;
319         }
320       d_1 = mpz_getlimbn (u, 0);
321     }
322   else
323     {
324       /* b = (A0+2)*B/4, where B=2^(k*GMP_NUMB_BITS)
325          for MODMULN or REDC, B=2^GMP_NUMB_BITS for batch1,
326          and B=1 otherwise */
327       mpres_add_ui (d_2, A, 2, n);
328       mpres_div_2exp (d_2, d_2, 2, n);
329     }
330 
331   /* Compute 2P : no need to duplicate P, the coordinates are simple. */
332   mpres_set_ui (x2, 9, n);
333   /* here d = d_1 / GMP_NUMB_BITS */
334   if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
335     {
336       /* warning: mpres_set_ui takes an unsigned long which has only 32 bits
337          on Windows, while d_1 might have 64 bits */
338       ASSERT_ALWAYS (mpz_size (u) == 1 && mpz_getlimbn (u, 0) == d_1);
339       mpres_set_z (z2, u, n);
340       mpres_div_2exp (z2, z2, GMP_NUMB_BITS, n);
341     }
342   else
343       mpres_set (z2, d_2, n);
344 
345   mpres_mul_2exp (z2, z2, 6, n);
346   mpres_add_ui (z2, z2, 8, n); /* P2 <- 2P = (9 : : 64d+8) */
347 
348   /* invariant: if j represents the upper bits of s,
349      then P1 = j*P and P2=(j+1)*P */
350 
351   mpresn_pad (x1, n);
352   mpresn_pad (z1, n);
353   mpresn_pad (x2, n);
354   mpresn_pad (z2, n);
355 
356   /* now perform the double-and-add ladder */
357   if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
358     {
359       for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;)
360         {
361           if (mpz_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */
362             /* P2 <- P1+P2    P1 <- 2*P1 */
363             dup_add_batch1 (x1, z1, x2, z2, t, u, d_1, n);
364           else /* (j,j+1) -> (2j+1,2j+2) */
365               /* P1 <- P1+P2     P2 <- 2*P2 */
366             dup_add_batch1 (x2, z2, x1, z1, t, u, d_1, n);
367         }
368     }
369   else /* batch = ECM_PARAM_BATCH_2 */
370     {
371       mpresn_pad (d_2, n);
372       for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;)
373         {
374           if (mpz_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */
375             /* P2 <- P1+P2    P1 <- 2*P1 */
376             dup_add_batch2 (x1, z1, x2, z2, t, u, d_2, n);
377           else /* (j,j+1) -> (2j+1,2j+2) */
378               /* P1 <- P1+P2     P2 <- 2*P2 */
379             dup_add_batch2 (x2, z2, x1, z1, t, u, d_2, n);
380         }
381     }
382 
383   *B1done=B1;
384 
385   mpresn_unpad (x1);
386   mpresn_unpad (z1);
387 
388   if (!mpres_invert (u, z1, n)) /* Factor found? */
389     {
390       mpres_gcd (f, z1, n);
391       ret = ECM_FACTOR_FOUND_STEP1;
392     }
393   mpres_mul (x, x1, u, n);
394 
395   mpz_clear (x1);
396   mpz_clear (z1);
397   mpz_clear (x2);
398   mpz_clear (z2);
399   mpz_clear (t);
400   mpz_clear (u);
401   if (batch == 2)
402     {
403       mpz_clear (d_2);
404     }
405 
406   return ret;
407 }
408