1 /* batch.c - Implement batch mode for step 1 of ECM
2
3 Copyright 2011, 2012, 2016 Cyril Bouvier, Paul Zimmermann and David Cleaver.
4
5 This file is part of the ECM Library.
6
7 The ECM Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11
12 The ECM Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with the ECM Library; see the file COPYING.LIB. If not, see
19 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
20 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22
23 /* ECM stage 1 in batch mode, for initial point (x:z) with small coordinates,
24 such that x and z fit into a mp_limb_t.
25 For example we can start with (x=2:y=1) with the curve by^2 = x^3 + ax^2 + x
26 with a = 4d-2 and b=16d+2, then we have to multiply by d=(a+2)/4 in the
27 duplicates.
28 With the change of variable x=b*X, y=b*Y, this curve becomes:
29 Y^2 = X^3 + a/b*X^2 + 1/b^2*X.
30 */
31
32 #include <stdlib.h>
33 #include "ecm-impl.h"
34 #include "getprime_r.h"
35
36 #define MAX_HEIGHT 32
37
38 #if ECM_UINT_MAX == 4294967295
39 /* On a 32-bit machine, with no access to a 64-bit type,
40 the maximum value that can be returned by mpz_sizeinbase(s,2)
41 is = (2^32-1). Multiplying all primes up to the following
42 will result in a product that has (2^32-1) bits. */
43 #define MAX_B1_BATCH 2977044736UL
44 #elif defined(_WIN32)
45 /* Due to a limitation in GMP on 64-bit Windows, should also
46 affect 32-bit Windows, sufficient memory cannot be allocated
47 for the batch product s when using primes larger than the following */
48 #define MAX_B1_BATCH 3124253146UL
49 #else
50 /* nth_prime(2^(MAX_HEIGHT-1)) */
51 #define MAX_B1_BATCH 50685770167ULL
52 #endif
53
54 /* If forbiddenres != NULL, forbiddenres = "m r_1 ... r_k -1" indicating that
55 if p = r_i mod m, then p^2 should be considered instead of p. This has
56 only a sense for CM curves. We assume r_1 < r_2 < ... < r_k.
57 Typical example: "4 3 -1" for curves Y^2 = X^3 + a * X.
58 */
59 void
compute_s(mpz_t s,ecm_uint B1,int * forbiddenres ATTRIBUTE_UNUSED)60 compute_s (mpz_t s, ecm_uint B1, int *forbiddenres ATTRIBUTE_UNUSED)
61 {
62 mpz_t acc[MAX_HEIGHT]; /* To accumulate products of prime powers */
63 mpz_t ppz;
64 unsigned int i, j;
65 ecm_uint pi = 2, pp, maxpp, qi;
66 prime_info_t prime_info;
67
68 prime_info_init (prime_info);
69
70 ASSERT_ALWAYS (B1 < MAX_B1_BATCH);
71
72 for (j = 0; j < MAX_HEIGHT; j++)
73 mpz_init (acc[j]); /* sets acc[j] to 0 */
74 mpz_init (ppz);
75
76 i = 0;
77 while (pi <= B1)
78 {
79 pp = qi = pi;
80 maxpp = B1 / qi;
81 #ifdef HAVE_ADDLAWS
82 if(forbiddenres != NULL && pi > 2){
83 /* non splitting primes can occur in even powers only */
84 int rp = (int)(pi % forbiddenres[0]);
85 for(j = 1; forbiddenres[j] >= 0; j++)
86 if(rp >= forbiddenres[j])
87 break;
88 if(rp == forbiddenres[j]){
89 /* printf("p=%lu is forbidden\n", pi);*/
90 if(qi <= maxpp){
91 /* qi <= B1/qi => qi^2 <= B1, let it go */
92 qi *= qi;
93 }
94 else{
95 /* qi is too large, do not increment i */
96 pi = getprime_mt (prime_info);
97 continue;
98 }
99 }
100 }
101 #endif
102 while (pp <= maxpp)
103 pp *= qi;
104
105 #if ECM_UINT_MAX == 4294967295
106 mpz_set_ui (ppz, pp);
107 #else
108 mpz_set_uint64 (ppz, pp);
109 #endif
110
111 if ((i & 1) == 0)
112 mpz_set (acc[0], ppz);
113 else
114 mpz_mul (acc[0], acc[0], ppz);
115
116 j = 0;
117 /* We have accumulated i+1 products so far. If bits 0..j of i are all
118 set, then i+1 is a multiple of 2^(j+1). */
119 while ((i & (1 << j)) != 0)
120 {
121 /* we use acc[MAX_HEIGHT-1] as 0-sentinel below, thus we need
122 j+1 < MAX_HEIGHT-1 */
123 ASSERT (j + 1 < MAX_HEIGHT - 1);
124 if ((i & (1 << (j + 1))) == 0) /* i+1 is not multiple of 2^(j+2),
125 thus add[j+1] is "empty" */
126 mpz_swap (acc[j+1], acc[j]); /* avoid a copy with mpz_set */
127 else
128 mpz_mul (acc[j+1], acc[j+1], acc[j]); /* accumulate in acc[j+1] */
129 mpz_set_ui (acc[j], 1);
130 j++;
131 }
132
133 i++;
134 pi = getprime_mt (prime_info);
135 }
136
137 for (mpz_set (s, acc[0]), j = 1; mpz_cmp_ui (acc[j], 0) != 0; j++)
138 mpz_mul (s, s, acc[j]);
139
140 prime_info_clear (prime_info); /* free the prime tables */
141
142 for (i = 0; i < MAX_HEIGHT; i++)
143 mpz_clear (acc[i]);
144 mpz_clear (ppz);
145 }
146
147 #if 0
148 /* this function is useful in debug mode to print non-normalized residues */
149 static void
150 mpresn_print (mpres_t x, mpmod_t n)
151 {
152 mp_size_t m, xn;
153
154 xn = SIZ(x);
155 m = ABSIZ(x);
156 MPN_NORMALIZE(PTR(x), m);
157 SIZ(x) = xn >= 0 ? m : -m;
158 gmp_printf ("%Zd\n", x);
159 SIZ(x) = xn;
160 }
161 #endif
162
163 /* (x1:z1) <- 2(x1:z1)
164 (x2:z2) <- (x1:z1) + (x2:z2)
165 assume (x2:z2) - (x1:z1) = (2:1)
166 Uses 4 modular multiplies and 4 modular squarings.
167 Inputs are x1, z1, x2, z2, d, n.
168 Use two auxiliary variables: t, w (it seems using one only is not possible
169 if all mpresn_mul and mpresn_sqr calls don't overlap input and output).
170
171 In the batch 1 mode, we pass d_prime such that the actual d is d_prime/beta.
172 Since beta is a square, if d_prime is a square (on 64-bit machines),
173 so is d.
174 In mpresn_mul_1, we multiply by d_prime = beta*d and divide by beta.
175 */
176 static void
dup_add_batch1(mpres_t x1,mpres_t z1,mpres_t x2,mpres_t z2,mpres_t t,mpres_t w,mp_limb_t d_prime,mpmod_t n)177 dup_add_batch1 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2,
178 mpres_t t, mpres_t w, mp_limb_t d_prime, mpmod_t n)
179 {
180 /* active: x1 z1 x2 z2 */
181 mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */
182 /* active: w z1 x2 z2 */
183 mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */
184 /* active: w z1 x1 x2 */
185
186 mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */
187 /* active: w z1 x1 z2 */
188 mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */
189 /* active: w z1 x2 z2 */
190 mpresn_sqr (t, z1, n); /* t = (x1-z1)^2 */
191 /* active: w t x2 z2 */
192 mpresn_sqr (z1, w, n); /* z1 = (x1+z1)^2 */
193 /* active: z1 t x2 z2 */
194
195 mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */
196 /* active: x1 z1 t x2 z2 */
197
198 mpresn_sub (w, z1, t, n); /* w = (x1+z1)^2 - (x1-z1)^2 */
199 /* active: x1 w t x2 z2 */
200
201 mpresn_mul_1 (z1, w, d_prime, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */
202 /* active: x1 z1 w t x2 z2 */
203
204 mpresn_add (t, t, z1, n); /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */
205 /* active: x1 w t x2 z2 */
206 mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */
207 /* active: x1 z1 x2 z2 */
208
209 mpresn_addsub (w, z2, x2, z2, n);
210 /* active: x1 z1 w z2 */
211
212 mpresn_sqr (x2, w, n);
213 /* active: x1 z1 x2 z2 */
214 mpresn_sqr (w, z2, n);
215 /* active: x1 z1 x2 w */
216 mpresn_add (z2, w, w, n);
217 }
218
219 static void
dup_add_batch2(mpres_t x1,mpres_t z1,mpres_t x2,mpres_t z2,mpres_t t,mpres_t w,mpres_t d,mpmod_t n)220 dup_add_batch2 (mpres_t x1, mpres_t z1, mpres_t x2, mpres_t z2,
221 mpres_t t, mpres_t w, mpres_t d, mpmod_t n)
222 {
223 /* active: x1 z1 x2 z2 */
224 mpresn_addsub (w, z1, x1, z1, n); /* w = x1+z1, z1 = x1-z1 */
225 /* active: w z1 x2 z2 */
226 mpresn_addsub (x1, x2, x2, z2, n); /* x1 = x2+z2, x2 = x2-z2 */
227 /* active: w z1 x1 x2 */
228
229 mpresn_mul (z2, w, x2, n); /* w = (x1+z1)(x2-z2) */
230 /* active: w z1 x1 z2 */
231 mpresn_mul (x2, z1, x1, n); /* x2 = (x1-z1)(x2+z2) */
232 /* active: w z1 x2 z2 */
233 mpresn_sqr (t, z1, n); /* t = (x1-z1)^2 */
234 /* active: w t x2 z2 */
235 mpresn_sqr (z1, w, n); /* z1 = (x1+z1)^2 */
236 /* active: z1 t x2 z2 */
237
238 mpresn_mul (x1, z1, t, n); /* xdup = (x1+z1)^2 * (x1-z1)^2 */
239 /* active: x1 z1 t x2 z2 */
240
241 mpresn_sub (w, z1, t, n); /* w = (x1+z1)^2 - (x1-z1)^2 */
242 /* active: x1 w t x2 z2 */
243
244 mpresn_mul (z1, w, d, n); /* z1 = d * ((x1+z1)^2 - (x1-z1)^2) */
245 /* active: x1 z1 w t x2 z2 */
246
247 mpresn_add (t, t, z1, n); /* t = (x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2) */
248 /* active: x1 w t x2 z2 */
249 mpresn_mul (z1, w, t, n); /* zdup = w * [(x1-z1)^2 - d* ((x1+z1)^2 - (x1-z1)^2)] */
250 /* active: x1 z1 x2 z2 */
251
252 mpresn_addsub (w, z2, x2, z2, n);
253 /* active: x1 z1 w z2 */
254
255 mpresn_sqr (x2, w, n);
256 /* active: x1 z1 x2 z2 */
257 mpresn_sqr (w, z2, n);
258 /* active: x1 z1 x2 w */
259 mpresn_add (z2, w, w, n);
260 }
261
262
263 /* Input: x is initial point
264 A is curve parameter in Montgomery's form:
265 g*y^2*z = x^3 + a*x^2*z + x*z^2
266 n is the number to factor
267 B1 is the stage 1 bound
268 Output: If a factor is found, it is returned in x.
269 Otherwise, x contains the x-coordinate of the point computed
270 in stage 1 (with z coordinate normalized to 1).
271 B1done is set to B1 if stage 1 completed normally,
272 or to the largest prime processed if interrupted, but never
273 to a smaller value than B1done was upon function entry.
274 Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise
275 ECM_NO_FACTOR_FOUND
276 */
277 /*
278 For now we don't take into account go stop_asap and chkfilename
279 */
280 int
ecm_stage1_batch(mpz_t f,mpres_t x,mpres_t A,mpmod_t n,double B1,double * B1done,int batch,mpz_t s)281 ecm_stage1_batch (mpz_t f, mpres_t x, mpres_t A, mpmod_t n, double B1,
282 double *B1done, int batch, mpz_t s)
283 {
284 mp_limb_t d_1;
285 mpz_t d_2;
286
287 mpres_t x1, z1, x2, z2;
288 ecm_uint i;
289 mpres_t t, u;
290 int ret = ECM_NO_FACTOR_FOUND;
291
292 mpres_init (x1, n);
293 mpres_init (z1, n);
294 mpres_init (x2, n);
295 mpres_init (z2, n);
296 mpres_init (t, n);
297 mpres_init (u, n);
298 if (batch == ECM_PARAM_BATCH_2)
299 mpres_init (d_2, n);
300
301 /* initialize P */
302 mpres_set (x1, x, n);
303 mpres_set_ui (z1, 1, n); /* P1 <- 1P */
304
305 /* Compute d=(A+2)/4 from A and d'=B*d thus d' = 2^(GMP_NUMB_BITS-2)*(A+2) */
306 if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
307 {
308 mpres_get_z (u, A, n);
309 mpz_add_ui (u, u, 2);
310 mpz_mul_2exp (u, u, GMP_NUMB_BITS - 2);
311 mpres_set_z_for_gcd (u, u, n); /* reduces u mod n */
312 if (mpz_size (u) > 1)
313 {
314 mpres_get_z (u, A, n);
315 outputf (OUTPUT_ERROR,
316 "Error, 2^%d*(A+2) should fit in a mp_limb_t, A=%Zd\n",
317 GMP_NUMB_BITS - 2, u);
318 return ECM_ERROR;
319 }
320 d_1 = mpz_getlimbn (u, 0);
321 }
322 else
323 {
324 /* b = (A0+2)*B/4, where B=2^(k*GMP_NUMB_BITS)
325 for MODMULN or REDC, B=2^GMP_NUMB_BITS for batch1,
326 and B=1 otherwise */
327 mpres_add_ui (d_2, A, 2, n);
328 mpres_div_2exp (d_2, d_2, 2, n);
329 }
330
331 /* Compute 2P : no need to duplicate P, the coordinates are simple. */
332 mpres_set_ui (x2, 9, n);
333 /* here d = d_1 / GMP_NUMB_BITS */
334 if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
335 {
336 /* warning: mpres_set_ui takes an unsigned long which has only 32 bits
337 on Windows, while d_1 might have 64 bits */
338 ASSERT_ALWAYS (mpz_size (u) == 1 && mpz_getlimbn (u, 0) == d_1);
339 mpres_set_z (z2, u, n);
340 mpres_div_2exp (z2, z2, GMP_NUMB_BITS, n);
341 }
342 else
343 mpres_set (z2, d_2, n);
344
345 mpres_mul_2exp (z2, z2, 6, n);
346 mpres_add_ui (z2, z2, 8, n); /* P2 <- 2P = (9 : : 64d+8) */
347
348 /* invariant: if j represents the upper bits of s,
349 then P1 = j*P and P2=(j+1)*P */
350
351 mpresn_pad (x1, n);
352 mpresn_pad (z1, n);
353 mpresn_pad (x2, n);
354 mpresn_pad (z2, n);
355
356 /* now perform the double-and-add ladder */
357 if (batch == ECM_PARAM_BATCH_SQUARE || batch == ECM_PARAM_BATCH_32BITS_D)
358 {
359 for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;)
360 {
361 if (mpz_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */
362 /* P2 <- P1+P2 P1 <- 2*P1 */
363 dup_add_batch1 (x1, z1, x2, z2, t, u, d_1, n);
364 else /* (j,j+1) -> (2j+1,2j+2) */
365 /* P1 <- P1+P2 P2 <- 2*P2 */
366 dup_add_batch1 (x2, z2, x1, z1, t, u, d_1, n);
367 }
368 }
369 else /* batch = ECM_PARAM_BATCH_2 */
370 {
371 mpresn_pad (d_2, n);
372 for (i = mpz_sizeinbase (s, 2) - 1; i-- > 0;)
373 {
374 if (mpz_tstbit (s, i) == 0) /* (j,j+1) -> (2j,2j+1) */
375 /* P2 <- P1+P2 P1 <- 2*P1 */
376 dup_add_batch2 (x1, z1, x2, z2, t, u, d_2, n);
377 else /* (j,j+1) -> (2j+1,2j+2) */
378 /* P1 <- P1+P2 P2 <- 2*P2 */
379 dup_add_batch2 (x2, z2, x1, z1, t, u, d_2, n);
380 }
381 }
382
383 *B1done=B1;
384
385 mpresn_unpad (x1);
386 mpresn_unpad (z1);
387
388 if (!mpres_invert (u, z1, n)) /* Factor found? */
389 {
390 mpres_gcd (f, z1, n);
391 ret = ECM_FACTOR_FOUND_STEP1;
392 }
393 mpres_mul (x, x1, u, n);
394
395 mpz_clear (x1);
396 mpz_clear (z1);
397 mpz_clear (x2);
398 mpz_clear (z2);
399 mpz_clear (t);
400 mpz_clear (u);
401 if (batch == 2)
402 {
403 mpz_clear (d_2);
404 }
405
406 return ret;
407 }
408