1 /* Elliptic Curve Method: toplevel and stage 1 routines.
2
3 Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
4 2012, 2016 Paul Zimmermann, Alexander Kruppa, Cyril Bouvier, David Cleaver.
5
6 This file is part of the ECM Library.
7
8 The ECM Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The ECM Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the ECM Library; see the file COPYING.LIB. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include "ecm-impl.h"
27 #include "getprime_r.h"
28 #include <math.h>
29
30 #ifdef HAVE_ADDLAWS
31 #include "addlaws.h"
32 #endif
33
34 #ifdef HAVE_LIMITS_H
35 # include <limits.h>
36 #else
37 # define ULONG_MAX __GMP_ULONG_MAX
38 #endif
39
40 #ifdef TIMING_CRT
41 extern int mpzspv_from_mpzv_slow_time, mpzspv_to_mpzv_time,
42 mpzspv_normalise_time;
43 #endif
44
45 /* the following factor takes into account the smaller expected smoothness
46 for Montgomery's curves (batch mode) with respect to Suyama's curves */
47 /* For param 1 we use A=4d-2 with d a square (see main.c). In that
48 case, Cyril Bouvier and Razvan Barbulescu have shown that the average
49 expected torsion is that of a generic Suyama curve multiplied by the
50 constant 2^(1/3)/(3*3^(1/128)) */
51 #define EXTRA_SMOOTHNESS_SQUARE 0.416384512396064
52 /* For A=4d-2 (param 3) for d a random integer, the average expected torsion
53 is that of a generic Suyama curve multiplied by the constant
54 1/(3*3^(1/128)) */
55 #define EXTRA_SMOOTHNESS_32BITS_D 0.330484606500389
56 /******************************************************************************
57 * *
58 * Elliptic Curve Method *
59 * *
60 ******************************************************************************/
61
62 void duplicate (mpres_t, mpres_t, mpres_t, mpres_t, mpmod_t, mpres_t,
63 mpres_t, mpres_t, mpres_t) ATTRIBUTE_HOT;
64 void add3 (mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t, mpres_t,
65 mpres_t, mpmod_t, mpres_t, mpres_t, mpres_t) ATTRIBUTE_HOT;
66
67 #define mpz_mulmod5(r,s1,s2,m,t) { mpz_mul(t,s1,s2); mpz_mod(r, t, m); }
68
69 /* switch from Montgomery's form g*y^2 = x^3 + a*x^2 + x
70 to Weierstrass' form Y^2 = X^3 + A*X + B
71 by change of variables x -> g*X-a/3, y -> g*Y.
72 We have A = (3-a^2)/(3g^2), X = (3x+a)/(3g), Y = y/g.
73 If a factor is found during the modular inverse, returns
74 ECM_FACTOR_FOUND_STEP1 and the factor in f, otherwise returns
75 ECM_NO_FACTOR_FOUND.
76
77 The input value of y is the y0 value in the initial equation:
78 g*y0^2 = x0^3 + a*x0^2 + x0.
79 */
80 int
montgomery_to_weierstrass(mpz_t f,mpres_t x,mpres_t y,mpres_t A,mpmod_t n)81 montgomery_to_weierstrass (mpz_t f, mpres_t x, mpres_t y, mpres_t A, mpmod_t n)
82 {
83 mpres_t g;
84
85 mpres_init (g, n);
86 mpres_add (g, x, A, n);
87 mpres_mul (g, g, x, n);
88 mpres_add_ui (g, g, 1, n);
89 mpres_mul (g, g, x, n); /* g = x^3+a*x^2+x (y=1) */
90 mpres_mul_ui (y, g, 3, n);
91 mpres_mul (y, y, g, n); /* y = 3g^2 */
92 if (!mpres_invert (y, y, n)) /* y = 1/(3g^2) temporarily */
93 {
94 mpres_gcd (f, y, n);
95 mpres_clear (g, n);
96 return ECM_FACTOR_FOUND_STEP1;
97 }
98
99 /* update x */
100 mpres_mul_ui (x, x, 3, n); /* 3x */
101 mpres_add (x, x, A, n); /* 3x+a */
102 mpres_mul (x, x, g, n); /* (3x+a)*g */
103 mpres_mul (x, x, y, n); /* (3x+a)/(3g) */
104
105 /* update A */
106 mpres_sqr (A, A, n); /* a^2 */
107 mpres_sub_ui (A, A, 3, n);
108 mpres_neg (A, A, n); /* 3-a^2 */
109 mpres_mul (A, A, y, n); /* (3-a^2)/(3g^2) */
110
111 /* update y */
112 mpres_mul_ui (g, g, 3, n); /* 3g */
113 mpres_mul (y, y, g, n); /* (3g)/(3g^2) = 1/g */
114
115 mpres_clear (g, n);
116 return ECM_NO_FACTOR_FOUND;
117 }
118
119 /* adds Q=(x2:z2) and R=(x1:z1) and puts the result in (x3:z3),
120 using 6 muls (4 muls and 2 squares), and 6 add/sub.
121 One assumes that Q-R=P or R-Q=P where P=(x:z).
122 - n : number to factor
123 - u, v, w : auxiliary variables
124 Modifies: x3, z3, u, v, w.
125 (x3,z3) may be identical to (x2,z2) and to (x,z)
126 */
127 void
add3(mpres_t x3,mpres_t z3,mpres_t x2,mpres_t z2,mpres_t x1,mpres_t z1,mpres_t x,mpres_t z,mpmod_t n,mpres_t u,mpres_t v,mpres_t w)128 add3 (mpres_t x3, mpres_t z3, mpres_t x2, mpres_t z2, mpres_t x1, mpres_t z1,
129 mpres_t x, mpres_t z, mpmod_t n, mpres_t u, mpres_t v, mpres_t w)
130 {
131 mpres_sub (u, x2, z2, n);
132 mpres_add (v, x1, z1, n); /* u = x2-z2, v = x1+z1 */
133
134 mpres_mul (u, u, v, n); /* u = (x2-z2)*(x1+z1) */
135
136 mpres_add (w, x2, z2, n);
137 mpres_sub (v, x1, z1, n); /* w = x2+z2, v = x1-z1 */
138
139 mpres_mul (v, w, v, n); /* v = (x2+z2)*(x1-z1) */
140
141 mpres_add (w, u, v, n); /* w = 2*(x1*x2-z1*z2) */
142 mpres_sub (v, u, v, n); /* v = 2*(x2*z1-x1*z2) */
143
144 mpres_sqr (w, w, n); /* w = 4*(x1*x2-z1*z2)^2 */
145 mpres_sqr (v, v, n); /* v = 4*(x2*z1-x1*z2)^2 */
146
147 if (x == x3) /* same variable: in-place variant */
148 {
149 /* x3 <- w * z mod n
150 z3 <- x * v mod n */
151 mpres_mul (z3, w, z, n);
152 mpres_mul (x3, x, v, n);
153 mpres_swap (x3, z3, n);
154 }
155 else
156 {
157 mpres_mul (x3, w, z, n); /* x3 = 4*z*(x1*x2-z1*z2)^2 mod n */
158 mpres_mul (z3, x, v, n); /* z3 = 4*x*(x2*z1-x1*z2)^2 mod n */
159 }
160 /* mul += 6; */
161 }
162
163 /* computes 2P=(x2:z2) from P=(x1:z1), with 5 muls (3 muls and 2 squares)
164 and 4 add/sub.
165 - n : number to factor
166 - b : (a+2)/4 mod n
167 - t, u, v, w : auxiliary variables
168 */
169 void
duplicate(mpres_t x2,mpres_t z2,mpres_t x1,mpres_t z1,mpmod_t n,mpres_t b,mpres_t u,mpres_t v,mpres_t w)170 duplicate (mpres_t x2, mpres_t z2, mpres_t x1, mpres_t z1, mpmod_t n,
171 mpres_t b, mpres_t u, mpres_t v, mpres_t w)
172 {
173 mpres_add (u, x1, z1, n);
174 mpres_sqr (u, u, n); /* u = (x1+z1)^2 mod n */
175 mpres_sub (v, x1, z1, n);
176 mpres_sqr (v, v, n); /* v = (x1-z1)^2 mod n */
177 mpres_mul (x2, u, v, n); /* x2 = u*v = (x1^2 - z1^2)^2 mod n */
178 mpres_sub (w, u, v, n); /* w = u-v = 4*x1*z1 */
179 mpres_mul (u, w, b, n); /* u = w*b = ((A+2)/4*(4*x1*z1)) mod n */
180 mpres_add (u, u, v, n); /* u = (x1-z1)^2+(A+2)/4*(4*x1*z1) */
181 mpres_mul (z2, w, u, n); /* z2 = ((4*x1*z1)*((x1-z1)^2+(A+2)/4*(4*x1*z1))) mod n */
182 }
183
184 /* multiply P=(x:z) by e and puts the result in (x:z). */
185 void
ecm_mul(mpres_t x,mpres_t z,mpz_t e,mpmod_t n,mpres_t b)186 ecm_mul (mpres_t x, mpres_t z, mpz_t e, mpmod_t n, mpres_t b)
187 {
188 size_t l;
189 int negated = 0;
190 mpres_t x0, z0, x1, z1, u, v, w;
191
192 /* In Montgomery coordinates, the point at infinity is (0::0) */
193 if (mpz_sgn (e) == 0)
194 {
195 mpz_set_ui (x, 0);
196 mpz_set_ui (z, 0);
197 return;
198 }
199
200 /* The negative of a point (x:y:z) is (x:-y:u). Since we do not compute
201 y, e*(x::z) == (-e)*(x::z). */
202 if (mpz_sgn (e) < 0)
203 {
204 negated = 1;
205 mpz_neg (e, e);
206 }
207
208 if (mpz_cmp_ui (e, 1) == 0)
209 goto ecm_mul_end;
210
211 mpres_init (x0, n);
212 mpres_init (z0, n);
213 mpres_init (x1, n);
214 mpres_init (z1, n);
215 mpres_init (u, n);
216 mpres_init (v, n);
217 mpres_init (w, n);
218
219 l = mpz_sizeinbase (e, 2) - 1; /* l >= 1 */
220
221 mpres_set (x0, x, n);
222 mpres_set (z0, z, n);
223 duplicate (x1, z1, x0, z0, n, b, u, v, w);
224
225 /* invariant: (P1,P0) = ((k+1)P, kP) where k = floor(e/2^l) */
226
227 while (l-- > 0)
228 {
229 if (mpz_tstbit (e, l)) /* k, k+1 -> 2k+1, 2k+2 */
230 {
231 add3 (x0, z0, x0, z0, x1, z1, x, z, n, u, v, w); /* 2k+1 */
232 duplicate (x1, z1, x1, z1, n, b, u, v, w); /* 2k+2 */
233 }
234 else /* k, k+1 -> 2k, 2k+1 */
235 {
236 add3 (x1, z1, x1, z1, x0, z0, x, z, n, u, v, w); /* 2k+1 */
237 duplicate (x0, z0, x0, z0, n, b, u, v, w); /* 2k */
238 }
239 }
240
241 mpres_set (x, x0, n);
242 mpres_set (z, z0, n);
243
244 mpres_clear (x0, n);
245 mpres_clear (z0, n);
246 mpres_clear (x1, n);
247 mpres_clear (z1, n);
248 mpres_clear (u, n);
249 mpres_clear (v, n);
250 mpres_clear (w, n);
251
252 ecm_mul_end:
253
254 /* Undo negation to avoid changing the caller's e value */
255 if (negated)
256 mpz_neg (e, e);
257 }
258
259 #define ADD 6.0 /* number of multiplications in an addition */
260 #define DUP 5.0 /* number of multiplications in a duplicate */
261
262 /* returns the number of modular multiplications for computing
263 V_n from V_r * V_{n-r} - V_{n-2r}.
264 ADD is the cost of an addition
265 DUP is the cost of a duplicate
266 */
267 static double
lucas_cost(ecm_uint n,double v)268 lucas_cost (ecm_uint n, double v)
269 {
270 ecm_uint d, e, r;
271 double c; /* cost */
272
273 d = n;
274 r = (ecm_uint) ((double) d * v + 0.5);
275 if (r >= n)
276 return (ADD * (double) n);
277 d = n - r;
278 e = 2 * r - n;
279 c = DUP + ADD; /* initial duplicate and final addition */
280 while (d != e)
281 {
282 if (d < e)
283 {
284 r = d;
285 d = e;
286 e = r;
287 }
288 if (d - e <= e / 4 && ((d + e) % 3) == 0)
289 { /* condition 1 */
290 d = (2 * d - e) / 3;
291 e = (e - d) / 2;
292 c += 3.0 * ADD; /* 3 additions */
293 }
294 else if (d - e <= e / 4 && (d - e) % 6 == 0)
295 { /* condition 2 */
296 d = (d - e) / 2;
297 c += ADD + DUP; /* one addition, one duplicate */
298 }
299 else if ((d + 3) / 4 <= e)
300 { /* condition 3 */
301 d -= e;
302 c += ADD; /* one addition */
303 }
304 else if ((d + e) % 2 == 0)
305 { /* condition 4 */
306 d = (d - e) / 2;
307 c += ADD + DUP; /* one addition, one duplicate */
308 }
309 /* now d+e is odd */
310 else if (d % 2 == 0)
311 { /* condition 5 */
312 d /= 2;
313 c += ADD + DUP; /* one addition, one duplicate */
314 }
315 /* now d is odd and e is even */
316 else if (d % 3 == 0)
317 { /* condition 6 */
318 d = d / 3 - e;
319 c += 3.0 * ADD + DUP; /* three additions, one duplicate */
320 }
321 else if ((d + e) % 3 == 0)
322 { /* condition 7 */
323 d = (d - 2 * e) / 3;
324 c += 3.0 * ADD + DUP; /* three additions, one duplicate */
325 }
326 else if ((d - e) % 3 == 0)
327 { /* condition 8 */
328 d = (d - e) / 3;
329 c += 3.0 * ADD + DUP; /* three additions, one duplicate */
330 }
331 else /* necessarily e is even: catches all cases */
332 { /* condition 9 */
333 e /= 2;
334 c += ADD + DUP; /* one addition, one duplicate */
335 }
336 }
337
338 return c;
339 }
340
341
342 /* computes kP from P=(xA:zA) and puts the result in (xA:zA). Assumes k>2.
343 WARNING! The calls to add3() assume that the two input points are distinct,
344 which is not neccessarily satisfied. The result can be that in rare cases
345 the point at infinity (z==0) results when it shouldn't. A test case is
346 echo 33554520197234177 | ./ecm -sigma 2046841451 373 1
347 which finds the prime even though it shouldn't (23^2=529 divides order).
348 This is not a problem for ECM since at worst we'll find a factor we
349 shouldn't have found. For other purposes (i.e. primality proving) this
350 would have to be fixed first.
351 */
352
353 static void
prac(mpres_t xA,mpres_t zA,ecm_uint k,mpmod_t n,mpres_t b,mpres_t u,mpres_t v,mpres_t w,mpres_t xB,mpres_t zB,mpres_t xC,mpres_t zC,mpres_t xT,mpres_t zT,mpres_t xT2,mpres_t zT2)354 prac (mpres_t xA, mpres_t zA, ecm_uint k, mpmod_t n, mpres_t b,
355 mpres_t u, mpres_t v, mpres_t w, mpres_t xB, mpres_t zB, mpres_t xC,
356 mpres_t zC, mpres_t xT, mpres_t zT, mpres_t xT2, mpres_t zT2)
357 {
358 ecm_uint d, e, r, i = 0, nv;
359 double c, cmin;
360 __mpz_struct *tmp;
361 #define NV 10
362 /* 1/val[0] = the golden ratio (1+sqrt(5))/2, and 1/val[i] for i>0
363 is the real number whose continued fraction expansion is all 1s
364 except for a 2 in i+1-st place */
365 static double val[NV] =
366 { 0.61803398874989485, 0.72360679774997897, 0.58017872829546410,
367 0.63283980608870629, 0.61242994950949500, 0.62018198080741576,
368 0.61721461653440386, 0.61834711965622806, 0.61791440652881789,
369 0.61807966846989581};
370
371 /* for small n, it makes no sense to try 10 different Lucas chains */
372 nv = mpz_size ((mpz_ptr) n);
373 if (nv > NV)
374 nv = NV;
375
376 if (nv > 1)
377 {
378 /* chooses the best value of v */
379 for (d = 0, cmin = ADD * (double) k; d < nv; d++)
380 {
381 c = lucas_cost (k, val[d]);
382 if (c < cmin)
383 {
384 cmin = c;
385 i = d;
386 }
387 }
388 }
389
390 d = k;
391 r = (ecm_uint) ((double) d * val[i] + 0.5);
392
393 /* first iteration always begins by Condition 3, then a swap */
394 d = k - r;
395 e = 2 * r - k;
396 mpres_set (xB, xA, n);
397 mpres_set (zB, zA, n); /* B=A */
398 mpres_set (xC, xA, n);
399 mpres_set (zC, zA, n); /* C=A */
400 duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */
401 while (d != e)
402 {
403 if (d < e)
404 {
405 r = d;
406 d = e;
407 e = r;
408 mpres_swap (xA, xB, n);
409 mpres_swap (zA, zB, n);
410 }
411 /* do the first line of Table 4 whose condition qualifies */
412 if (d - e <= e / 4 && ((d + e) % 3) == 0)
413 { /* condition 1 */
414 d = (2 * d - e) / 3;
415 e = (e - d) / 2;
416 add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */
417 add3 (xT2, zT2, xT, zT, xA, zA, xB, zB, n, u, v, w); /* T2 = f(T,A,B) */
418 add3 (xB, zB, xB, zB, xT, zT, xA, zA, n, u, v, w); /* B = f(B,T,A) */
419 mpres_swap (xA, xT2, n);
420 mpres_swap (zA, zT2, n); /* swap A and T2 */
421 }
422 else if (d - e <= e / 4 && (d - e) % 6 == 0)
423 { /* condition 2 */
424 d = (d - e) / 2;
425 add3 (xB, zB, xA, zA, xB, zB, xC, zC, n, u, v, w); /* B = f(A,B,C) */
426 duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */
427 }
428 else if ((d + 3) / 4 <= e)
429 { /* condition 3 */
430 d -= e;
431 add3 (xT, zT, xB, zB, xA, zA, xC, zC, n, u, v, w); /* T = f(B,A,C) */
432 /* circular permutation (B,T,C) */
433 tmp = xB;
434 xB = xT;
435 xT = xC;
436 xC = tmp;
437 tmp = zB;
438 zB = zT;
439 zT = zC;
440 zC = tmp;
441 }
442 else if ((d + e) % 2 == 0)
443 { /* condition 4 */
444 d = (d - e) / 2;
445 add3 (xB, zB, xB, zB, xA, zA, xC, zC, n, u, v, w); /* B = f(B,A,C) */
446 duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */
447 }
448 /* now d+e is odd */
449 else if (d % 2 == 0)
450 { /* condition 5 */
451 d /= 2;
452 add3 (xC, zC, xC, zC, xA, zA, xB, zB, n, u, v, w); /* C = f(C,A,B) */
453 duplicate (xA, zA, xA, zA, n, b, u, v, w); /* A = 2*A */
454 }
455 /* now d is odd, e is even */
456 else if (d % 3 == 0)
457 { /* condition 6 */
458 d = d / 3 - e;
459 duplicate (xT, zT, xA, zA, n, b, u, v, w); /* T = 2*A */
460 add3 (xT2, zT2, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T2 = f(A,B,C) */
461 add3 (xA, zA, xT, zT, xA, zA, xA, zA, n, u, v, w); /* A = f(T,A,A) */
462 add3 (xT, zT, xT, zT, xT2, zT2, xC, zC, n, u, v, w); /* T = f(T,T2,C) */
463 /* circular permutation (C,B,T) */
464 tmp = xC;
465 xC = xB;
466 xB = xT;
467 xT = tmp;
468 tmp = zC;
469 zC = zB;
470 zB = zT;
471 zT = tmp;
472 }
473 else if ((d + e) % 3 == 0)
474 { /* condition 7 */
475 d = (d - 2 * e) / 3;
476 add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */
477 add3 (xB, zB, xT, zT, xA, zA, xB, zB, n, u, v, w); /* B = f(T,A,B) */
478 duplicate (xT, zT, xA, zA, n, b, u, v, w);
479 add3 (xA, zA, xA, zA, xT, zT, xA, zA, n, u, v, w); /* A = 3*A */
480 }
481 else if ((d - e) % 3 == 0)
482 { /* condition 8 */
483 d = (d - e) / 3;
484 add3 (xT, zT, xA, zA, xB, zB, xC, zC, n, u, v, w); /* T = f(A,B,C) */
485 add3 (xC, zC, xC, zC, xA, zA, xB, zB, n, u, v, w); /* C = f(A,C,B) */
486 mpres_swap (xB, xT, n);
487 mpres_swap (zB, zT, n); /* swap B and T */
488 duplicate (xT, zT, xA, zA, n, b, u, v, w);
489 add3 (xA, zA, xA, zA, xT, zT, xA, zA, n, u, v, w); /* A = 3*A */
490 }
491 else /* necessarily e is even here */
492 { /* condition 9 */
493 e /= 2;
494 add3 (xC, zC, xC, zC, xB, zB, xA, zA, n, u, v, w); /* C = f(C,B,A) */
495 duplicate (xB, zB, xB, zB, n, b, u, v, w); /* B = 2*B */
496 }
497 }
498
499 add3 (xA, zA, xA, zA, xB, zB, xC, zC, n, u, v, w);
500
501 ASSERT(d == 1);
502 }
503
504 /* Input: x is initial point
505 A is curve parameter in Montgomery's form:
506 g*y^2*z = x^3 + a*x^2*z + x*z^2
507 n is the number to factor
508 B1 is the stage 1 bound
509 Output: If a factor is found, it is returned in f.
510 Otherwise, x contains the x-coordinate of the point computed
511 in stage 1 (with z coordinate normalized to 1).
512 B1done is set to B1 if stage 1 completed normally,
513 or to the largest prime processed if interrupted, but never
514 to a smaller value than B1done was upon function entry.
515 Return value: ECM_FACTOR_FOUND_STEP1 if a factor, otherwise
516 ECM_NO_FACTOR_FOUND
517 */
518 static int
ecm_stage1(mpz_t f,mpres_t x,mpres_t A,mpmod_t n,double B1,double * B1done,mpz_t go,int (* stop_asap)(void),char * chkfilename)519 ecm_stage1 (mpz_t f, mpres_t x, mpres_t A, mpmod_t n, double B1,
520 double *B1done, mpz_t go, int (*stop_asap)(void),
521 char *chkfilename)
522 {
523 mpres_t b, z, u, v, w, xB, zB, xC, zC, xT, zT, xT2, zT2;
524 uint64_t p, r, last_chkpnt_p;
525 int ret = ECM_NO_FACTOR_FOUND;
526 long last_chkpnt_time;
527 prime_info_t prime_info;
528
529 prime_info_init (prime_info);
530
531 mpres_init (b, n);
532 mpres_init (z, n);
533 mpres_init (u, n);
534 mpres_init (v, n);
535 mpres_init (w, n);
536 mpres_init (xB, n);
537 mpres_init (zB, n);
538 mpres_init (xC, n);
539 mpres_init (zC, n);
540 mpres_init (xT, n);
541 mpres_init (zT, n);
542 mpres_init (xT2, n);
543 mpres_init (zT2, n);
544
545 last_chkpnt_time = cputime ();
546
547 mpres_set_ui (z, 1, n);
548
549 mpres_add_ui (b, A, 2, n);
550 mpres_div_2exp (b, b, 2, n); /* b == (A0+2)*B/4 */
551
552 /* preload group order */
553 if (go != NULL)
554 ecm_mul (x, z, go, n, b);
555
556 /* prac() wants multiplicands > 2 */
557 for (r = 2; r <= B1; r *= 2)
558 if (r > *B1done)
559 duplicate (x, z, x, z, n, b, u, v, w);
560
561 /* We'll do 3 manually, too (that's what ecm4 did..) */
562 for (r = 3; r <= B1; r *= 3)
563 if (r > *B1done)
564 {
565 duplicate (xB, zB, x, z, n, b, u, v, w);
566 add3 (x, z, x, z, xB, zB, x, z, n, u, v, w);
567 }
568
569 last_chkpnt_p = 3;
570 p = getprime_mt (prime_info); /* Puts 3 into p. Next call gives 5 */
571 for (p = getprime_mt (prime_info); p <= B1; p = getprime_mt (prime_info))
572 {
573 for (r = p; r <= B1; r *= p)
574 if (r > *B1done)
575 prac (x, z, (ecm_uint) p, n, b, u, v, w, xB, zB, xC, zC, xT,
576 zT, xT2, zT2);
577
578 if (mpres_is_zero (z, n))
579 {
580 outputf (OUTPUT_VERBOSE, "Reached point at infinity, %.0f divides "
581 "group orders\n", p);
582 break;
583 }
584
585 if (stop_asap != NULL && (*stop_asap) ())
586 {
587 outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p);
588 break;
589 }
590
591 if (chkfilename != NULL && p > last_chkpnt_p + 10000 &&
592 elltime (last_chkpnt_time, cputime ()) > CHKPNT_PERIOD)
593 {
594 writechkfile (chkfilename, ECM_ECM, MAX(p, *B1done), n, A, x, NULL, z);
595 last_chkpnt_p = p;
596 last_chkpnt_time = cputime ();
597 }
598 }
599
600 /* If stage 1 finished normally, p is the smallest prime >B1 here.
601 In that case, set to B1 */
602 if (p > B1)
603 p = B1;
604
605 if (p > *B1done)
606 *B1done = p;
607
608 if (chkfilename != NULL)
609 writechkfile (chkfilename, ECM_ECM, *B1done, n, A, x, NULL, z);
610
611 prime_info_clear (prime_info);
612
613 if (!mpres_invert (u, z, n)) /* Factor found? */
614 {
615 mpres_gcd (f, z, n);
616 ret = ECM_FACTOR_FOUND_STEP1;
617 }
618 mpres_mul (x, x, u, n);
619
620 mpres_clear (zT2, n);
621 mpres_clear (xT2, n);
622 mpres_clear (zT, n);
623 mpres_clear (xT, n);
624 mpres_clear (zC, n);
625 mpres_clear (xC, n);
626 mpres_clear (zB, n);
627 mpres_clear (xB, n);
628 mpres_clear (w, n);
629 mpres_clear (v, n);
630 mpres_clear (u, n);
631 mpres_clear (z, n);
632 mpres_clear (b, n);
633
634 return ret;
635 }
636
637 #define DEBUG_EC_W 0
638
639 #ifdef HAVE_ADDLAWS
640 /* Input: when Etype == ECM_EC_TYPE_WEIERSTRASS*:
641 (x, y) is initial point
642 A is curve parameter in Weierstrass's form:
643 Y^2 = X^3 + A*X + B, where B = y^2-(x^3+A*x) is implicit
644 when Etype == ECM_EC_TYPE_HESSIAN:
645 (x, y) is initial point
646 A is curve parameter in Hessian form: X^3+Y^3+Z^3=3*A*X*Y*Z
647 n is the number to factor
648 B1 is the stage 1 bound
649 batch_s = prod(p^e <= B1) if != 1
650 Output: If a factor is found, it is returned in f.
651 Otherwise, (x, y) contains the point computed in stage 1.
652 B1done is set to B1 if stage 1 completed normally,
653 or to the largest prime processed if interrupted, but never
654 to a smaller value than B1done was upon function entry.
655 Return value: ECM_FACTOR_FOUND_STEP1 if a factor is found, otherwise
656 ECM_NO_FACTOR_FOUND
657 */
658 static int
ecm_stage1_W(mpz_t f,ell_curve_t E,ell_point_t P,mpmod_t n,double B1,double * B1done,mpz_t batch_s,mpz_t go,int (* stop_asap)(void),char * chkfilename)659 ecm_stage1_W (mpz_t f, ell_curve_t E, ell_point_t P, mpmod_t n,
660 double B1, double *B1done, mpz_t batch_s, mpz_t go,
661 int (*stop_asap)(void), char *chkfilename)
662 {
663 mpres_t xB;
664 ell_point_t Q;
665 uint64_t p = 0, r, last_chkpnt_p;
666 int ret = ECM_NO_FACTOR_FOUND;
667 long last_chkpnt_time;
668 prime_info_t prime_info;
669
670 prime_info_init (prime_info);
671
672 mpres_init (xB, n);
673
674 ell_point_init(Q, E, n);
675
676 last_chkpnt_time = cputime ();
677
678 #if DEBUG_EC_W >= 2
679 gmp_printf("N:=%Zd;\n", n->orig_modulus);
680 printf("E:="); ell_curve_print(E, n);
681 printf("E:=[E[4], E[5]];\n");
682 printf("P:="); ell_point_print(P, E, n); printf("; Q:=P;\n");
683 #endif
684 /* preload group order */
685 if (go != NULL){
686 if (ell_point_mul (Q, go, P, E, n) == 0){
687 mpz_set (f, Q->x);
688 ret = ECM_FACTOR_FOUND_STEP1;
689 goto end_of_stage1_w;
690 }
691 ell_point_set(P, Q, E, n);
692 }
693 #if DEBUG_EC_W >= 1
694 gmp_printf("go:=%Zd;\n", go);
695 printf("goP:="); ell_point_print(P, E, n); printf(";\n");
696 #endif
697 if(mpz_cmp_ui(batch_s, 1) == 0){
698 outputf (OUTPUT_VERBOSE, "Using traditional approach to Step 1\n");
699 for (r = 2; r <= B1; r *= 2)
700 if (r > *B1done){
701 if(ell_point_duplicate (Q, P, E, n) == 0){
702 mpz_set(f, Q->x);
703 ret = ECM_FACTOR_FOUND_STEP1;
704 goto end_of_stage1_w;
705 }
706 ell_point_set(P, Q, E, n);
707 #if DEBUG_EC_W >= 2
708 printf("P%ld:=", (long)r); ell_point_print(P, E, n); printf(";\n");
709 printf("Q:=EcmMult(2, Q, E, N);\n");
710 printf("(Q[1]*P%ld[3]-Q[3]*P%ld[1]) mod N;\n",(long)r,(long)r);
711 #endif
712 }
713
714 last_chkpnt_p = 3;
715 for (p = getprime_mt (prime_info); p <= B1; p = getprime_mt (prime_info)){
716 mpz_set_ui(f, (ecm_uint)p);
717 for (r = p; r <= B1; r *= p){
718 if (r > *B1done){
719 if(ell_point_mul (Q, f, P, E, n) == 0){
720 mpz_set(f, Q->x);
721 ret = ECM_FACTOR_FOUND_STEP1;
722 goto end_of_stage1_w;
723 }
724 #if DEBUG_EC_W >= 2
725 printf("R%ld:=", (long)r); ell_point_print(Q, E, n);
726 printf(";\nQ:=EcmMult(%ld, Q, E, N);\n", (long)p);
727 printf("(Q[1]*R%ld[3]-Q[3]*R%ld[1]) mod N;\n",(long)r,(long)r);
728 #endif
729 ell_point_set(P, Q, E, n);
730 }
731 }
732 if (ell_point_is_zero (P, E, n)){
733 outputf (OUTPUT_VERBOSE, "Reached point at infinity, "
734 "%.0f divides group orders\n", p);
735 break;
736 }
737
738 if (stop_asap != NULL && (*stop_asap) ()){
739 outputf (OUTPUT_NORMAL, "Interrupted at prime %.0f\n", p);
740 break;
741 }
742
743 if (chkfilename != NULL && p > last_chkpnt_p + 10000 &&
744 elltime (last_chkpnt_time, cputime ()) > CHKPNT_PERIOD){
745 writechkfile (chkfilename, ECM_ECM, MAX(p, *B1done),
746 n, E->a4, P->x, P->y, P->z);
747 last_chkpnt_p = p;
748 last_chkpnt_time = cputime ();
749 }
750 }
751 }
752 else{
753 #if USE_ADD_SUB_CHAINS == 0 /* keeping it simple */
754 if (ell_point_mul (Q, batch_s, P, E, n) == 0){
755 mpz_set (f, Q->x);
756 ret = ECM_FACTOR_FOUND_STEP1;
757 goto end_of_stage1_w;
758 }
759 #else
760 /* batch mode and special coding... */
761 short *S = NULL;
762 int w, iS;
763 add_sub_unpack(&w, &S, &iS, batch_s);
764 if (ell_point_mul_add_sub_with_S(Q, P, E, n, w, S, iS) == 0){
765 mpz_set (f, Q->x);
766 ret = ECM_FACTOR_FOUND_STEP1;
767 }
768 #endif
769 ell_point_set(P, Q, E, n);
770 p = B1;
771 }
772 end_of_stage1_w:
773 /* If stage 1 finished normally, p is the smallest prime > B1 here.
774 In that case, set to B1 */
775 if (p > B1)
776 p = B1;
777
778 if (p > *B1done)
779 *B1done = p;
780
781 if (chkfilename != NULL)
782 writechkfile (chkfilename, ECM_ECM, *B1done, n, E->a4, P->x, P->y,P->z);
783 prime_info_clear (prime_info);
784
785 if(ret != ECM_FACTOR_FOUND_STEP1){
786 if(ell_point_is_zero(P, E, n) == 1){
787 /* too bad */
788 ell_point_set_to_zero(P, E, n);
789 mpz_set(f, n->orig_modulus);
790 ret = ECM_FACTOR_FOUND_STEP1;
791 }
792 else{
793 /* for affine case, z = 1 anyway */
794 if(E->law == ECM_LAW_HOMOGENEOUS){
795 if (!mpres_invert (xB, P->z, n)){ /* Factor found? */
796 mpres_gcd (f, P->z, n);
797 gmp_printf("# factor found during normalization: %Zd\n", f);
798 ret = ECM_FACTOR_FOUND_STEP1;
799 }
800 else{
801 /* normalize to get (x:y:1) valid in W or H form... */
802 #if DEBUG_EC_W >= 2
803 mpres_get_z(f, xB, n); gmp_printf("1/z=%Zd\n", f);
804 #endif
805 mpres_mul (P->x, P->x, xB, n);
806 mpres_mul (P->y, P->y, xB, n);
807 #if DEBUG_EC_W >= 2
808 mpres_get_z(f, P->x, n); gmp_printf("x/z=%Zd\n", f);
809 mpres_get_z(f, P->y, n); gmp_printf("y/z=%Zd\n", f);
810 #endif
811 mpres_set_ui (P->z, 1, n);
812 }
813 }
814 }
815 }
816
817 mpres_clear (xB, n);
818 ell_point_clear(Q, E, n);
819
820 return ret;
821 }
822 #endif
823
824 /* choose "optimal" S according to step 2 range B2 */
825 int
choose_S(mpz_t B2len)826 choose_S (mpz_t B2len)
827 {
828 if (mpz_cmp_d (B2len, 1e7) < 0)
829 return 1; /* x^1 */
830 else if (mpz_cmp_d (B2len, 1e8) < 0)
831 return 2; /* x^2 */
832 else if (mpz_cmp_d (B2len, 1e9) < 0)
833 return -3; /* Dickson(3) */
834 else if (mpz_cmp_d (B2len, 1e10) < 0)
835 return -6; /* Dickson(6) */
836 else if (mpz_cmp_d (B2len, 3e11) < 0)
837 return -12; /* Dickson(12) */
838 else
839 return -30; /* Dickson(30) */
840 }
841
842 #define DIGITS_START 35
843 #define DIGITS_INCR 5
844 #define DIGITS_END 80
845
846 void
print_expcurves(double B1,const mpz_t B2,unsigned long dF,unsigned long k,int S,int param)847 print_expcurves (double B1, const mpz_t B2, unsigned long dF, unsigned long k,
848 int S, int param)
849 {
850 double prob;
851 int i, j;
852 char sep, outs[128], flt[16];
853 double smoothness_correction;
854
855 if (param == ECM_PARAM_SUYAMA || param == ECM_PARAM_BATCH_2)
856 smoothness_correction = 1.0;
857 else if (param == ECM_PARAM_BATCH_SQUARE)
858 smoothness_correction = EXTRA_SMOOTHNESS_SQUARE;
859 else if (param == ECM_PARAM_BATCH_32BITS_D)
860 smoothness_correction = EXTRA_SMOOTHNESS_32BITS_D;
861 else /* This case should never happen */
862 smoothness_correction = 0.0;
863
864 for (i = DIGITS_START, j = 0; i <= DIGITS_END; i += DIGITS_INCR)
865 j += sprintf (outs + j, "%u%c", i, (i < DIGITS_END) ? '\t' : '\n');
866 outs[j] = '\0';
867 outputf (OUTPUT_VERBOSE, "Expected number of curves to find a factor "
868 "of n digits:\n%s", outs);
869 for (i = DIGITS_START; i <= DIGITS_END; i += DIGITS_INCR)
870 {
871 sep = (i < DIGITS_END) ? '\t' : '\n';
872 prob = ecmprob (B1, mpz_get_d (B2),
873 /* smoothness depends on the parametrization */
874 pow (10., i - .5) / smoothness_correction,
875 (double) dF * dF * k, S);
876 if (prob > 1. / 10000000)
877 outputf (OUTPUT_VERBOSE, "%.0f%c", floor (1. / prob + .5), sep);
878 else if (prob > 0.)
879 {
880 /* on Windows: 2.6e+009 or 2.6e+025 (8 characters in string) */
881 /* on Linux : 2.6e+09 or 2.6e+25 (7 characters in string) */
882 /* This will normalize the output so that the Windows ouptut will match the Linux output */
883 if (sprintf (flt, "%.2g", floor (1. / prob + .5)) == 8)
884 memmove (&flt[5], &flt[6], strlen(flt) - 5);
885 outputf (OUTPUT_VERBOSE, "%s%c", flt, sep);
886 }
887 else
888 outputf (OUTPUT_VERBOSE, "Inf%c", sep);
889 }
890 }
891
892 void
print_exptime(double B1,const mpz_t B2,unsigned long dF,unsigned long k,int S,double tottime,int param)893 print_exptime (double B1, const mpz_t B2, unsigned long dF, unsigned long k,
894 int S, double tottime, int param)
895 {
896 double prob, exptime;
897 int i, j;
898 char sep, outs[128];
899 double smoothness_correction;
900
901 if (param == ECM_PARAM_SUYAMA || param == ECM_PARAM_BATCH_2)
902 smoothness_correction = 1.0;
903 else if (param == ECM_PARAM_BATCH_SQUARE)
904 smoothness_correction = EXTRA_SMOOTHNESS_SQUARE;
905 else if (param == ECM_PARAM_BATCH_32BITS_D)
906 smoothness_correction = EXTRA_SMOOTHNESS_32BITS_D;
907 else /* This case should never happen */
908 smoothness_correction = 0.0;
909
910 for (i = DIGITS_START, j = 0; i <= DIGITS_END; i += DIGITS_INCR)
911 j += sprintf (outs + j, "%u%c", i, (i < DIGITS_END) ? '\t' : '\n');
912 outs[j] = '\0';
913 outputf (OUTPUT_VERBOSE, "Expected time to find a factor of n digits:\n%s",
914 outs);
915 for (i = DIGITS_START; i <= DIGITS_END; i += DIGITS_INCR)
916 {
917 sep = (i < DIGITS_END) ? '\t' : '\n';
918 prob = ecmprob (B1, mpz_get_d (B2),
919 /* in batch mode, the extra smoothness is smaller */
920 pow (10., i - .5) / smoothness_correction,
921 (double) dF * dF * k, S);
922 exptime = (prob > 0.) ? tottime / prob : HUGE_VAL;
923 outputf (OUTPUT_TRACE, "Digits: %d, Total time: %.0f, probability: "
924 "%g, expected time: %.0f\n", i, tottime, prob, exptime);
925 if (exptime < 1000.)
926 outputf (OUTPUT_VERBOSE, "%.0fms%c", exptime, sep);
927 else if (exptime < 60000.) /* One minute */
928 outputf (OUTPUT_VERBOSE, "%.2fs%c", exptime / 1000., sep);
929 else if (exptime < 3600000.) /* One hour */
930 outputf (OUTPUT_VERBOSE, "%.2fm%c", exptime / 60000., sep);
931 else if (exptime < 86400000.) /* One day */
932 outputf (OUTPUT_VERBOSE, "%.2fh%c", exptime / 3600000., sep);
933 else if (exptime < 31536000000.) /* One year */
934 outputf (OUTPUT_VERBOSE, "%.2fd%c", exptime / 86400000., sep);
935 else if (exptime < 31536000000000.) /* One thousand years */
936 outputf (OUTPUT_VERBOSE, "%.2fy%c", exptime / 31536000000., sep);
937 else if (exptime < 31536000000000000.) /* One million years */
938 outputf (OUTPUT_VERBOSE, "%.0fy%c", exptime / 31536000000., sep);
939 else if (prob > 0.)
940 outputf (OUTPUT_VERBOSE, "%.1gy%c", exptime / 31536000000., sep);
941 else
942 outputf (OUTPUT_VERBOSE, "Inf%c", sep);
943 }
944 }
945
946 /* y should be NULL for P+1, and P-1, it contains the y coordinate for the
947 Weierstrass form for ECM (when sigma_is_A = -1). */
948 /* if gpu != 0 then it contains the number of curves that will be computed on
949 the GPU */
950 void
print_B1_B2_poly(int verbosity,int method,double B1,double B1done,mpz_t B2min_param,mpz_t B2min,mpz_t B2,int S,mpz_t sigma,int sigma_is_A,int Etype,mpz_t y,int param,unsigned int gpu)951 print_B1_B2_poly (int verbosity, int method, double B1, double B1done,
952 mpz_t B2min_param, mpz_t B2min, mpz_t B2, int S, mpz_t sigma,
953 int sigma_is_A, int Etype,
954 mpz_t y, int param, unsigned int gpu)
955 {
956 ASSERT ((method == ECM_ECM) || (y == NULL));
957 ASSERT ((-1 <= sigma_is_A) && (sigma_is_A <= 1));
958 ASSERT (param != ECM_PARAM_DEFAULT || sigma_is_A == 1 || sigma_is_A == -1);
959
960 if (test_verbose (verbosity))
961 {
962 outputf (verbosity, "Using ");
963 if (ECM_IS_DEFAULT_B1_DONE(B1done))
964 outputf (verbosity, "B1=%1.0f, ", B1);
965 else
966 outputf (verbosity, "B1=%1.0f-%1.0f, ", B1done, B1);
967 if (mpz_sgn (B2min_param) < 0)
968 outputf (verbosity, "B2=%Zd", B2);
969 else
970 outputf (verbosity, "B2=%Zd-%Zd", B2min, B2);
971
972 if (S > 0)
973 outputf (verbosity, ", polynomial x^%u", S);
974 else if (S < 0)
975 outputf (verbosity, ", polynomial Dickson(%u)", -S);
976
977 /* don't print in resume case, since x0 is saved in resume file */
978 if (method == ECM_ECM)
979 {
980 if (sigma_is_A == 1)
981 outputf (verbosity, ", A=%Zd", sigma);
982 else if (sigma_is_A == 0)
983 {
984 if (gpu) /* if not 0, contains number_of_curves */
985 {
986 outputf (verbosity, ", sigma=%d:%Zd", param, sigma);
987 mpz_add_ui (sigma, sigma, gpu-1);
988 outputf (verbosity, "-%d:%Zd", param, sigma);
989 mpz_sub_ui (sigma, sigma, gpu-1);
990 outputf (verbosity, " (%u curves)", gpu);
991 }
992 else
993 outputf (verbosity, ", sigma=%d:%Zd", param, sigma);
994 }
995 else{
996 if (Etype == ECM_EC_TYPE_WEIERSTRASS)
997 outputf (verbosity, ", Weierstrass(A=%Zd,y=%Zd)", sigma, y);
998 else if (Etype == ECM_EC_TYPE_HESSIAN)
999 outputf (verbosity, ", Hessian(D=%Zd,y=%Zd)", sigma, y);
1000 }
1001 }
1002 else if (ECM_IS_DEFAULT_B1_DONE(B1done))
1003 /* in case of P-1 or P+1, we store the initial point in sigma */
1004 outputf (verbosity, ", x0=%Zd", sigma);
1005
1006 outputf (verbosity, "\n");
1007 }
1008 }
1009
1010 /* Compute parameters for stage 2 */
1011 int
set_stage_2_params(mpz_t B2,mpz_t B2_parm,mpz_t B2min,mpz_t B2min_parm,root_params_t * root_params,double B1,unsigned long * k,const int S,int use_ntt,int * po2,unsigned long * dF,char * TreeFilename,double maxmem,int Fermat,mpmod_t modulus)1012 set_stage_2_params (mpz_t B2, mpz_t B2_parm, mpz_t B2min, mpz_t B2min_parm,
1013 root_params_t *root_params, double B1,
1014 unsigned long *k, const int S, int use_ntt, int *po2,
1015 unsigned long *dF, char *TreeFilename, double maxmem,
1016 int Fermat, mpmod_t modulus)
1017 {
1018 mpz_set (B2min, B2min_parm);
1019 mpz_set (B2, B2_parm);
1020
1021 mpz_init (root_params->i0);
1022
1023 /* set second stage bound B2: when using polynomial multiplication of
1024 complexity n^alpha, stage 2 has complexity about B2^(alpha/2), and
1025 we want stage 2 to take about half of stage 1, thus we choose
1026 B2 = (c*B1)^(2/alpha). Experimentally, c=1/4 seems to work well.
1027 For Toom-Cook 3, this gives alpha=log(5)/log(3), and B2 ~ (c*B1)^1.365.
1028 For Toom-Cook 4, this gives alpha=log(7)/log(4), and B2 ~ (c*B1)^1.424. */
1029
1030 /* We take the cost of P+1 stage 1 to be about twice that of P-1.
1031 Since nai"ve P+1 and ECM cost respectively 2 and 11 multiplies per
1032 addition and duplicate, and both are optimized with PRAC, we can
1033 assume the ratio remains about 11/2. */
1034
1035 /* Also scale B2 by what the user said (or by the default scaling of 1.0) */
1036
1037 if (ECM_IS_DEFAULT_B2(B2))
1038 mpz_set_d (B2, pow (ECM_COST * B1, DEFAULT_B2_EXPONENT));
1039
1040 /* set B2min */
1041 if (mpz_sgn (B2min) < 0)
1042 mpz_set_d (B2min, B1);
1043
1044 /* Let bestD determine parameters for root generation and the
1045 effective B2 */
1046
1047 if (use_ntt)
1048 *po2 = 1;
1049
1050 root_params->d2 = 0; /* Enable automatic choice of d2 */
1051 if (bestD (root_params, k, dF, B2min, B2, *po2, use_ntt, maxmem,
1052 (TreeFilename != NULL), modulus) == ECM_ERROR)
1053 return ECM_ERROR;
1054
1055 /* Set default degree for Brent-Suyama extension */
1056 /* We try to keep the time used by the Brent-Suyama extension
1057 at about 10% of the stage 2 time */
1058 /* Degree S Dickson polys and x^S are equally fast for ECM, so we go for
1059 the better Dickson polys whenever possible. For S == 1, 2, they behave
1060 identically. */
1061
1062 root_params->S = S;
1063 if (root_params->S == ECM_DEFAULT_S)
1064 {
1065 if (Fermat > 0)
1066 {
1067 /* For Fermat numbers, default is 1 (no Brent-Suyama) */
1068 root_params->S = 1;
1069 }
1070 else
1071 {
1072 mpz_t t;
1073 mpz_init (t);
1074 mpz_sub (t, B2, B2min);
1075 root_params->S = choose_S (t);
1076 mpz_clear (t);
1077 }
1078 }
1079 return ECM_NO_FACTOR_FOUND;
1080 }
1081
1082 /* Input: x is starting point or zero
1083 y is used for Weierstrass curves (even in Step1)
1084 sigma is sigma value (if x is set to zero) or
1085 A parameter (if x is non-zero) of curve
1086 n is the number to factor
1087 go is the initial group order to preload
1088 B1, B2 are the stage 1/stage 2 bounds, respectively
1089 B2min the lower bound for stage 2
1090 k is the number of blocks to do in stage 2
1091 S is the degree of the Suyama-Brent extension for stage 2
1092 verbose is verbosity level: 0 no output, 1 normal output,
1093 2 diagnostic output.
1094 sigma_is_A: If true, the sigma parameter contains the curve's A value
1095 Etype
1096 zE is a curve that is used when a special torsion group was used; in
1097 that case, (x, y) must be a point on E.
1098 Output: f is the factor found.
1099 Return value: ECM_FACTOR_FOUND_STEPn if a factor was found,
1100 ECM_NO_FACTOR_FOUND if no factor was found,
1101 ECM_ERROR in case of error.
1102 (x, y) contains the new point at the end of Stage 1.
1103 */
1104 int
ecm(mpz_t f,mpz_t x,mpz_t y,int * param,mpz_t sigma,mpz_t n,mpz_t go,double * B1done,double B1,mpz_t B2min_parm,mpz_t B2_parm,unsigned long k,const int S,int verbose,int repr,int nobase2step2,int use_ntt,int sigma_is_A,ell_curve_t zE,FILE * os,FILE * es,char * chkfilename,char * TreeFilename,double maxmem,double stage1time,gmp_randstate_t rng,int (* stop_asap)(void),mpz_t batch_s,double * batch_last_B1_used,ATTRIBUTE_UNUSED double gw_k,ATTRIBUTE_UNUSED unsigned long gw_b,ATTRIBUTE_UNUSED unsigned long gw_n,ATTRIBUTE_UNUSED signed long gw_c)1105 ecm (mpz_t f, mpz_t x, mpz_t y, int *param, mpz_t sigma, mpz_t n, mpz_t go,
1106 double *B1done, double B1, mpz_t B2min_parm, mpz_t B2_parm,
1107 unsigned long k, const int S, int verbose, int repr, int nobase2step2,
1108 int use_ntt, int sigma_is_A, ell_curve_t zE,
1109 FILE *os, FILE* es, char *chkfilename, char
1110 *TreeFilename, double maxmem, double stage1time, gmp_randstate_t rng, int
1111 (*stop_asap)(void), mpz_t batch_s, double *batch_last_B1_used,
1112 ATTRIBUTE_UNUSED double gw_k, ATTRIBUTE_UNUSED unsigned long gw_b,
1113 ATTRIBUTE_UNUSED unsigned long gw_n, ATTRIBUTE_UNUSED signed long gw_c)
1114 {
1115 int youpi = ECM_NO_FACTOR_FOUND;
1116 int base2 = 0; /* If n is of form 2^n[+-]1, set base to [+-]n */
1117 int Fermat = 0; /* If base2 > 0 is a power of 2, set Fermat to base2 */
1118 int po2 = 0; /* Whether we should use power-of-2 poly degree */
1119 long st;
1120 mpmod_t modulus;
1121 curve P;
1122 ell_curve_t E;
1123 #ifdef HAVE_ADDLAWS
1124 ell_point_t PE;
1125 #endif
1126 mpz_t B2min, B2; /* Local B2, B2min to avoid changing caller's values */
1127 unsigned long dF;
1128 root_params_t root_params;
1129
1130 /* 1: sigma contains A from Montgomery form By^2 = x^3 + Ax^2 + x
1131 0: sigma contains sigma
1132 -1: sigma contains A from Weierstrass form y^2 = x^3 + Ax + B,
1133 and y contains y */
1134 ASSERT((-1 <= sigma_is_A) && (sigma_is_A <= 1));
1135 ASSERT((GMP_NUMB_BITS == 32) || (GMP_NUMB_BITS == 64));
1136
1137 set_verbose (verbose);
1138 ECM_STDOUT = (os == NULL) ? stdout : os;
1139 ECM_STDERR = (es == NULL) ? stdout : es;
1140
1141 #ifdef MPRESN_NO_ADJUSTMENT
1142 /* When no adjustment is made in mpresn_ functions, N should be smaller
1143 than B^n/16 */
1144 if (mpz_sizeinbase (n, 2) > mpz_size (n) * GMP_NUMB_BITS - 4)
1145 {
1146 outputf (OUTPUT_ERROR, "Error, N should be smaller than B^n/16\n");
1147 return ECM_ERROR;
1148 }
1149 #endif
1150
1151 /* if a batch mode is requested by the user, this implies ECM_MOD_MODMULN */
1152 if (repr == ECM_MOD_DEFAULT && IS_BATCH_MODE(*param))
1153 repr = ECM_MOD_MODMULN;
1154
1155 /* choose the arithmetic used before the parametrization, since for divisors
1156 of 2^n+/-1 the default choice param=1 might not be optimal */
1157 if (mpmod_init (modulus, n, repr) != 0)
1158 return ECM_ERROR;
1159
1160 repr = modulus->repr;
1161
1162 /* If the parametrization is not given, choose it. */
1163 if (*param == ECM_PARAM_DEFAULT)
1164 *param = get_default_param (sigma_is_A, *B1done, repr);
1165
1166 /* In batch mode,
1167 we force repr=MODMULN,
1168 B1done should be either the default value or greater than B1
1169 x should be either 0 (undetermined) or 2 */
1170 if (IS_BATCH_MODE(*param))
1171 {
1172 if (repr != ECM_MOD_MODMULN)
1173 {
1174 outputf (OUTPUT_ERROR, "Error, with param %d, repr should be "
1175 "ECM_MOD_MODMULN.\n", *param);
1176 return ECM_ERROR;
1177 }
1178
1179 if (!ECM_IS_DEFAULT_B1_DONE(*B1done) && *B1done < B1)
1180 {
1181 outputf (OUTPUT_ERROR, "Error, cannot resume with param %d, except "
1182 "for doing only stage 2\n", *param);
1183 return ECM_ERROR;
1184 }
1185
1186 if (sigma_is_A >= 0 && mpz_sgn (x) != 0 && mpz_cmp_ui (x, 2) != 0)
1187 {
1188 if (ECM_IS_DEFAULT_B1_DONE(*B1done))
1189 {
1190 outputf (OUTPUT_ERROR, "Error, x0 should be equal to 2 with this "
1191 "parametrization\n");
1192 return ECM_ERROR;
1193 }
1194 }
1195 }
1196
1197 /* check that if ECM_PARAM_BATCH_SQUARE is used, GMP_NUMB_BITS == 64 */
1198 if (*param == ECM_PARAM_BATCH_SQUARE && GMP_NUMB_BITS == 32)
1199 {
1200 outputf (OUTPUT_ERROR, "Error, parametrization ECM_PARAM_BATCH_SQUARE "
1201 "works only with GMP_NUMB_BITS=64\n");
1202 return ECM_ERROR;
1203 }
1204
1205 /* check that B1 is not too large */
1206 if (B1 > (double) ECM_UINT_MAX)
1207 {
1208 outputf (OUTPUT_ERROR, "Error, maximal step 1 bound for ECM is %lu.\n",
1209 ECM_UINT_MAX);
1210 return ECM_ERROR;
1211 }
1212
1213 /* loading stage 1 exponent makes sense only in batch mode */
1214 if (!IS_BATCH_MODE(*param) && mpz_cmp_ui (batch_s, 1) > 0)
1215 {
1216 fprintf (stderr, "Error, -bsaves/-bloads makes sense in batch mode only\n");
1217 exit (EXIT_FAILURE);
1218 }
1219
1220 /* Compute s for the batch mode */
1221 if (IS_BATCH_MODE(*param) && ECM_IS_DEFAULT_B1_DONE(*B1done) &&
1222 (B1 != *batch_last_B1_used || mpz_cmp_ui (batch_s, 1) <= 0))
1223 {
1224 *batch_last_B1_used = B1;
1225
1226 st = cputime ();
1227 /* construct the batch exponent */
1228 compute_s (batch_s, B1, NULL);
1229 outputf (OUTPUT_VERBOSE, "Computing batch product (of %" PRIu64
1230 " bits) of primes up to B1=%1.0f took %ldms\n",
1231 mpz_sizeinbase (batch_s, 2), B1, cputime () - st);
1232 }
1233
1234 st = cputime ();
1235
1236 /* See what kind of number we have as that may influence optimal parameter
1237 selection. Test for base 2 number. Note: this was already done by
1238 mpmod_init. */
1239
1240 if (modulus->repr == ECM_MOD_BASE2)
1241 base2 = modulus->bits;
1242
1243 /* For a Fermat number (base2 a positive power of 2) */
1244 for (Fermat = base2; Fermat > 0 && (Fermat & 1) == 0; Fermat >>= 1);
1245 if (Fermat == 1)
1246 {
1247 Fermat = base2;
1248 po2 = 1;
1249 }
1250 else
1251 Fermat = 0;
1252
1253 mpres_init (P.x, modulus);
1254 mpres_init (P.y, modulus);
1255 mpres_init (P.A, modulus);
1256
1257 #ifdef HAVE_ADDLAWS
1258 ell_curve_set_z (E, zE, modulus);
1259 #else
1260 E->type = ECM_EC_TYPE_MONTGOMERY;
1261 #endif
1262
1263 mpz_init (B2);
1264 mpz_init (B2min);
1265 youpi = set_stage_2_params (B2, B2_parm, B2min, B2min_parm,
1266 &root_params, B1, &k, S, use_ntt,
1267 &po2, &dF, TreeFilename, maxmem, Fermat,modulus);
1268
1269 /* if the user gave B2, print that B2 on the Using B1=..., B2=... line */
1270 if(!ECM_IS_DEFAULT_B2(B2_parm))
1271 mpz_set (B2, B2_parm);
1272
1273 if (youpi == ECM_ERROR)
1274 goto end_of_ecm;
1275
1276 if (sigma_is_A == 0)
1277 {
1278 if (mpz_sgn (sigma) == 0)
1279 {
1280 youpi = get_curve_from_random_parameter (f, P.A, P.x, sigma, *param,
1281 modulus, rng);
1282
1283 if (youpi == ECM_ERROR)
1284 {
1285 outputf (OUTPUT_ERROR, "Error, invalid parametrization.\n");
1286 goto end_of_ecm;
1287 }
1288 }
1289 else /* Compute A and x0 from given sigma values */
1290 {
1291 if (*param == ECM_PARAM_SUYAMA)
1292 youpi = get_curve_from_param0 (f, P.A, P.x, sigma, modulus);
1293 else if (*param == ECM_PARAM_BATCH_SQUARE)
1294 youpi = get_curve_from_param1 (P.A, P.x, sigma, modulus);
1295 else if (*param == ECM_PARAM_BATCH_2)
1296 youpi = get_curve_from_param2 (f, P.A, P.x, sigma, modulus);
1297 else if (*param == ECM_PARAM_BATCH_32BITS_D)
1298 youpi = get_curve_from_param3 (P.A, P.x, sigma, modulus);
1299 else
1300 {
1301 outputf (OUTPUT_ERROR, "Error, invalid parametrization.\n");
1302 youpi = ECM_ERROR;
1303 goto end_of_ecm;
1304 }
1305
1306 if (youpi != ECM_NO_FACTOR_FOUND)
1307 {
1308 if (youpi == ECM_ERROR)
1309 outputf (OUTPUT_ERROR, "Error, invalid value of sigma.\n");
1310
1311 goto end_of_ecm;
1312 }
1313 }
1314 }
1315 else if (sigma_is_A == 1)
1316 {
1317 /* sigma contains the A value */
1318 mpres_set_z (P.A, sigma, modulus);
1319 /* TODO: make a valid, random starting point in case none was given */
1320 /* Problem: this may be as hard as factoring as we'd need to determine
1321 whether x^3 + a*x^2 + x is a quadratic residue or not */
1322 /* For now, we'll just chicken out. */
1323 /* Except for batch mode where we know that x0=2 */
1324 if (mpz_sgn (x) == 0)
1325 {
1326 if (IS_BATCH_MODE(*param))
1327 mpres_set_ui (P.x, 2, modulus);
1328 else
1329 {
1330 outputf (OUTPUT_ERROR,
1331 "Error, -A requires a starting point (-x0 x).\n");
1332 youpi = ECM_ERROR;
1333 goto end_of_ecm;
1334 }
1335 }
1336 else
1337 mpres_set_z (P.x, x, modulus);
1338 }
1339
1340 /* Print B1, B2, polynomial and sigma */
1341 print_B1_B2_poly (OUTPUT_NORMAL, ECM_ECM, B1, *B1done, B2min_parm, B2min,
1342 B2, root_params.S, sigma, sigma_is_A, E->type,
1343 y, *param, 0);
1344
1345 #if 0
1346 outputf (OUTPUT_VERBOSE, "b2=%1.0f, dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n",
1347 b2, dF, k, root_params.d1, root_params.d2, root_params.i0);
1348 #else
1349 outputf (OUTPUT_VERBOSE, "dF=%lu, k=%lu, d=%lu, d2=%lu, i0=%Zd\n",
1350 dF, k, root_params.d1, root_params.d2, root_params.i0);
1351 #endif
1352
1353 if (sigma_is_A == -1) /* Weierstrass or Hessian form.
1354 It is known that all curves in Weierstrass form do
1355 not admit a Montgomery form. However, we could
1356 be interested in performing some plain Step 1
1357 on some special curves.
1358 */
1359 {
1360 if (*param != ECM_PARAM_TORSION)
1361 {
1362 mpres_set_z (P.A, sigma, modulus); /* sigma contains A */
1363 mpres_set_z (P.x, x, modulus);
1364 mpres_set_z (P.y, y, modulus);
1365 }
1366 else
1367 {
1368 #ifdef HAVE_ADDLAWS
1369 if(E->type == ECM_EC_TYPE_WEIERSTRASS)
1370 #endif
1371 mpres_set_z(P.A, zE->a4, modulus);
1372 #ifdef HAVE_ADDLAWS
1373 else if(E->type == ECM_EC_TYPE_MONTGOMERY)
1374 mpres_set_z(P.A, zE->a2, modulus);
1375 #endif
1376 mpres_set_z (P.x, x, modulus);
1377 mpres_set_z (P.y, y, modulus);
1378 }
1379 }
1380
1381 if (test_verbose (OUTPUT_RESVERBOSE))
1382 {
1383 mpz_t t;
1384
1385 mpz_init (t);
1386 mpres_get_z (t, P.A, modulus);
1387 outputf (OUTPUT_RESVERBOSE, "A=%Zd\n", t);
1388 mpres_get_z (t, P.x, modulus);
1389 outputf (OUTPUT_RESVERBOSE, "starting point: x0=%Zd\n", t);
1390 #ifdef HAVE_ADDLAWS
1391 if (E->type == ECM_EC_TYPE_WEIERSTRASS)
1392 {
1393 mpres_get_z (t, P.y, modulus);
1394 outputf (OUTPUT_RESVERBOSE, " y0=%Zd\n", t);
1395 }
1396 #endif
1397 mpz_clear (t);
1398 }
1399
1400 if (go != NULL && mpz_cmp_ui (go, 1) > 0)
1401 outputf (OUTPUT_VERBOSE, "initial group order: %Zd\n", go);
1402
1403 if (test_verbose (OUTPUT_VERBOSE))
1404 {
1405 if (mpz_cmp_d (B2min, B1) != 0)
1406 {
1407 outputf (OUTPUT_VERBOSE,
1408 "Can't compute success probabilities for B1 <> B2min\n");
1409 }
1410 else if (*param == ECM_PARAM_DEFAULT)
1411 {
1412 outputf (OUTPUT_VERBOSE, "Can't compute success probabilities "
1413 "for this parametrization.\n");
1414 }
1415 else
1416 {
1417 rhoinit (256, 10);
1418 print_expcurves (B1, B2, dF, k, root_params.S, *param);
1419 }
1420 }
1421
1422 #ifdef HAVE_GWNUM
1423 /* We will only use GWNUM for numbers of the form k*b^n+c */
1424
1425 if (gw_b != 0 && B1 >= *B1done && *param == ECM_PARAM_SUYAMA)
1426 youpi = gw_ecm_stage1 (f, &P, modulus, B1, B1done, go, gw_k, gw_b, gw_n, gw_c);
1427
1428 /* At this point B1 == *B1done unless interrupted, or no GWNUM ecm_stage1
1429 is available */
1430
1431 if (youpi != ECM_NO_FACTOR_FOUND)
1432 goto end_of_ecm_rhotable;
1433 #endif /* HAVE_GWNUM */
1434
1435 if (B1 > *B1done || mpz_cmp_ui (go, 1) > 0)
1436 {
1437 if (IS_BATCH_MODE(*param))
1438 /* FIXME: go, stop_asap and chkfilename are ignored in batch mode */
1439 youpi = ecm_stage1_batch (f, P.x, P.A, modulus, B1, B1done,
1440 *param, batch_s);
1441 else{
1442 #ifdef HAVE_ADDLAWS
1443 if(E->type == ECM_EC_TYPE_MONTGOMERY)
1444 #endif
1445 youpi = ecm_stage1 (f, P.x, P.A, modulus, B1, B1done, go,
1446 stop_asap, chkfilename);
1447 #ifdef HAVE_ADDLAWS
1448 else{
1449 ell_point_init(PE, E, modulus);
1450 mpres_set(PE->x, P.x, modulus);
1451 mpres_set(PE->y, P.y, modulus);
1452 youpi = ecm_stage1_W (f, E, PE, modulus, B1, B1done, batch_s,
1453 go, stop_asap, chkfilename);
1454 mpres_set(P.x, PE->x, modulus);
1455 mpres_set(P.y, PE->y, modulus);
1456 }
1457 #endif
1458 }
1459 }
1460 else if (mpz_sgn (x) != 0)
1461 {
1462 /* when x <> 0, we initialize P to (x:y) */
1463 mpres_set_z (P.x, x, modulus);
1464 mpres_set_z (P.y, y, modulus);
1465 }
1466
1467 if (stage1time > 0.)
1468 {
1469 const long st2 = elltime (st, cputime ());
1470 const long s1t = (long) (stage1time * 1000.);
1471 outputf (OUTPUT_NORMAL,
1472 "Step 1 took %ldms (%ld in this run, %ld from previous runs)\n",
1473 st2 + s1t, st2, s1t);
1474 }
1475 else
1476 outputf (OUTPUT_NORMAL, "Step 1 took %ldms\n", elltime (st, cputime ()));
1477
1478 /* Store end-of-stage-1 residue in x in case we write it to a save file,
1479 before P.x is (perhaps) converted to Weierstrass form */
1480
1481 mpres_get_z (x, P.x, modulus);
1482 #ifdef HAVE_ADDLAWS
1483 if (E->type == ECM_EC_TYPE_WEIERSTRASS || E->type == ECM_EC_TYPE_HESSIAN)
1484 mpres_get_z (y, P.y, modulus);
1485 #endif
1486
1487 if (youpi != ECM_NO_FACTOR_FOUND)
1488 goto end_of_ecm_rhotable;
1489
1490 if (test_verbose (OUTPUT_RESVERBOSE))
1491 {
1492 mpz_t t;
1493
1494 mpz_init (t);
1495 mpres_get_z (t, P.x, modulus);
1496 outputf (OUTPUT_RESVERBOSE, "x=%Zd\n", t);
1497 #ifdef HAVE_ADDLAWS
1498 if (E->type == ECM_EC_TYPE_WEIERSTRASS)
1499 {
1500 mpres_get_z (t, P.y, modulus);
1501 outputf (OUTPUT_RESVERBOSE, "y=%Zd\n", t);
1502 }
1503 #endif
1504 mpz_clear (t);
1505 }
1506
1507 /* In case of a signal, we'll exit after the residue is printed. If no save
1508 file is specified, the user may still resume from the residue */
1509 if (stop_asap != NULL && (*stop_asap) ())
1510 goto end_of_ecm_rhotable;
1511
1512 /* If using 2^k +/-1 modulus and 'nobase2step2' flag is set,
1513 set default (-nobase2) modular method and remap P.x, P.y, and P.A */
1514 if (modulus->repr == ECM_MOD_BASE2 && nobase2step2)
1515 {
1516 mpz_t x_t, y_t, A_t;
1517
1518 mpz_init (x_t);
1519 mpz_init (y_t);
1520 mpz_init (A_t);
1521
1522 mpz_mod (x_t, P.x, modulus->orig_modulus);
1523 mpz_mod (y_t, P.y, modulus->orig_modulus);
1524 mpz_mod (A_t, P.A, modulus->orig_modulus);
1525
1526 mpmod_clear (modulus);
1527
1528 repr = ECM_MOD_NOBASE2;
1529 if (mpmod_init (modulus, n, repr) != 0) /* reset modulus for nobase2 */
1530 return ECM_ERROR;
1531
1532 /* remap x, y, and A for new modular method */
1533 mpres_set_z (P.x, x_t, modulus);
1534 mpres_set_z (P.y, y_t, modulus);
1535 mpres_set_z (P.A, A_t, modulus);
1536
1537 mpz_clear (x_t);
1538 mpz_clear (y_t);
1539 mpz_clear (A_t);
1540 }
1541
1542 #ifdef HAVE_ADDLAWS
1543 if (E->type == ECM_EC_TYPE_MONTGOMERY)
1544 #endif
1545 youpi = montgomery_to_weierstrass (f, P.x, P.y, P.A, modulus);
1546 #ifdef HAVE_ADDLAWS
1547 else if (E->type == ECM_EC_TYPE_HESSIAN)
1548 {
1549 youpi = hessian_to_weierstrass (f, P.x, P.y, P.A, modulus);
1550 if(youpi == ECM_NO_FACTOR_FOUND)
1551 /* due to that non-trivial kernel(?) */
1552 youpi = mult_by_3(f, P.x, P.y, P.A, modulus);
1553 }
1554 #endif
1555
1556 if (test_verbose (OUTPUT_RESVERBOSE) && youpi == ECM_NO_FACTOR_FOUND &&
1557 mpz_cmp (B2, B2min) >= 0)
1558 {
1559 mpz_t t;
1560
1561 mpz_init (t);
1562 mpres_get_z (t, P.x, modulus);
1563 outputf (OUTPUT_RESVERBOSE, "After switch to Weierstrass form, "
1564 "P=(%Zd", t);
1565 mpres_get_z (t, P.y, modulus);
1566 outputf (OUTPUT_RESVERBOSE, ", %Zd)\n", t);
1567 mpres_get_z (t, P.A, modulus);
1568 outputf (OUTPUT_RESVERBOSE, "on curve Y^2 = X^3 + %Zd * X + b\n", t);
1569 mpz_clear (t);
1570 }
1571
1572 P.disc = 0; /* FIXME: should disappear one day */
1573
1574 if (youpi == ECM_NO_FACTOR_FOUND && mpz_cmp (B2, B2min) >= 0)
1575 youpi = stage2 (f, &P, modulus, dF, k, &root_params, use_ntt,
1576 TreeFilename, stop_asap);
1577 #ifdef TIMING_CRT
1578 printf ("mpzspv_from_mpzv_slow: %dms\n", mpzspv_from_mpzv_slow_time);
1579 printf ("mpzspv_to_mpzv: %dms\n", mpzspv_to_mpzv_time);
1580 printf ("mpzspv_normalise: %dms\n", mpzspv_normalise_time);
1581 #endif
1582
1583 end_of_ecm_rhotable:
1584 if (test_verbose (OUTPUT_VERBOSE))
1585 {
1586 if (mpz_cmp_d (B2min, B1) == 0 && *param != ECM_PARAM_DEFAULT)
1587 {
1588 if (youpi == ECM_NO_FACTOR_FOUND &&
1589 (stop_asap == NULL || !(*stop_asap)()))
1590 print_exptime (B1, B2, dF, k, root_params.S,
1591 (long) (stage1time * 1000.) +
1592 elltime (st, cputime ()), *param);
1593 rhoinit (1, 0); /* Free memory of rhotable */
1594 }
1595 }
1596
1597 end_of_ecm:
1598 #ifdef HAVE_ADDLAWS
1599 ell_curve_clear(E, modulus);
1600 #endif
1601 mpres_clear (P.A, modulus);
1602 mpres_clear (P.y, modulus);
1603 mpres_clear (P.x, modulus);
1604 mpz_clear (root_params.i0);
1605 mpz_clear (B2);
1606 mpz_clear (B2min);
1607 mpmod_clear (modulus);
1608 return youpi;
1609 }
1610