1 /* Copyright (C) 2000 The PARI group.
2
3 This file is part of the PARI/GP package.
4
5 PARI/GP is free software; you can redistribute it and/or modify it under the
6 terms of the GNU General Public License as published by the Free Software
7 Foundation; either version 2 of the License, or (at your option) any later
8 version. It is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY WHATSOEVER.
10
11 Check the License for details. You should have received a copy of it, along
12 with the package; see the file 'COPYING'. If not, write to the Free Software
13 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
14 #include "pari.h"
15 #include "paripriv.h"
16
17 /***********************************************************************/
18 /** PRIMES IN SUCCESSION **/
19 /***********************************************************************/
20
21 /* map from prime residue classes mod 210 to their numbers in {0...47}.
22 * Subscripts into this array take the form ((k-1)%210)/2, ranging from
23 * 0 to 104. Unused entries are */
24 #define NPRC 128 /* nonprime residue class */
25
26 static unsigned char prc210_no[] = {
27 0, NPRC, NPRC, NPRC, NPRC, 1, 2, NPRC, 3, 4, NPRC, /* 21 */
28 5, NPRC, NPRC, 6, 7, NPRC, NPRC, 8, NPRC, 9, /* 41 */
29 10, NPRC, 11, NPRC, NPRC, 12, NPRC, NPRC, 13, 14, NPRC, /* 63 */
30 NPRC, 15, NPRC, 16, 17, NPRC, NPRC, 18, NPRC, 19, /* 83 */
31 NPRC, NPRC, 20, NPRC, NPRC, NPRC, 21, NPRC, 22, 23, NPRC, /* 105 */
32 24, 25, NPRC, 26, NPRC, NPRC, NPRC, 27, NPRC, NPRC, /* 125 */
33 28, NPRC, 29, NPRC, NPRC, 30, 31, NPRC, 32, NPRC, NPRC, /* 147 */
34 33, 34, NPRC, NPRC, 35, NPRC, NPRC, 36, NPRC, 37, /* 167 */
35 38, NPRC, 39, NPRC, NPRC, 40, 41, NPRC, NPRC, 42, NPRC, /* 189 */
36 43, 44, NPRC, 45, 46, NPRC, NPRC, NPRC, NPRC, 47, /* 209 */
37 };
38
39 /* first differences of the preceding */
40 static unsigned char prc210_d1[] = {
41 10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6,
42 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6,
43 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2, 10, 2,
44 };
45
46 static int
unextprime_overflow(ulong n)47 unextprime_overflow(ulong n)
48 {
49 #ifdef LONG_IS_64BIT
50 return (n > (ulong)-59);
51 #else
52 return (n > (ulong)-5);
53 #endif
54 }
55
56 /* return 0 for overflow */
57 ulong
unextprime(ulong n)58 unextprime(ulong n)
59 {
60 long rc, rc0, rcn;
61
62 switch(n) {
63 case 0: case 1: case 2: return 2;
64 case 3: return 3;
65 case 4: case 5: return 5;
66 case 6: case 7: return 7;
67 }
68 if (unextprime_overflow(n)) return 0;
69 /* here n > 7 */
70 n |= 1; /* make it odd */
71 rc = rc0 = n % 210;
72 /* find next prime residue class mod 210 */
73 for(;;)
74 {
75 rcn = (long)(prc210_no[rc>>1]);
76 if (rcn != NPRC) break;
77 rc += 2; /* cannot wrap since 209 is coprime and rc odd */
78 }
79 if (rc > rc0) n += rc - rc0;
80 /* now find an actual (pseudo)prime */
81 for(;;)
82 {
83 if (uisprime(n)) break;
84 n += prc210_d1[rcn];
85 if (++rcn > 47) rcn = 0;
86 }
87 return n;
88 }
89
90 GEN
nextprime(GEN n)91 nextprime(GEN n)
92 {
93 long rc, rc0, rcn;
94 pari_sp av = avma;
95
96 if (typ(n) != t_INT)
97 {
98 n = gceil(n);
99 if (typ(n) != t_INT) pari_err_TYPE("nextprime",n);
100 }
101 if (signe(n) <= 0) { set_avma(av); return gen_2; }
102 if (lgefint(n) == 3)
103 {
104 ulong k = unextprime(uel(n,2));
105 set_avma(av);
106 if (k) return utoipos(k);
107 #ifdef LONG_IS_64BIT
108 return uutoi(1,13);
109 #else
110 return uutoi(1,15);
111 #endif
112 }
113 /* here n > 7 */
114 if (!mod2(n)) n = addui(1,n);
115 rc = rc0 = umodiu(n, 210);
116 /* find next prime residue class mod 210 */
117 for(;;)
118 {
119 rcn = (long)(prc210_no[rc>>1]);
120 if (rcn != NPRC) break;
121 rc += 2; /* cannot wrap since 209 is coprime and rc odd */
122 }
123 if (rc > rc0) n = addui(rc - rc0, n);
124 /* now find an actual (pseudo)prime */
125 for(;;)
126 {
127 if (BPSW_psp(n)) break;
128 n = addui(prc210_d1[rcn], n);
129 if (++rcn > 47) rcn = 0;
130 }
131 if (avma == av) return icopy(n);
132 return gerepileuptoint(av, n);
133 }
134
135 ulong
uprecprime(ulong n)136 uprecprime(ulong n)
137 {
138 long rc, rc0, rcn;
139 { /* check if n <= 10 */
140 if (n <= 1) return 0;
141 if (n == 2) return 2;
142 if (n <= 4) return 3;
143 if (n <= 6) return 5;
144 if (n <= 10) return 7;
145 }
146 /* here n >= 11 */
147 if (!(n % 2)) n--;
148 rc = rc0 = n % 210;
149 /* find previous prime residue class mod 210 */
150 for(;;)
151 {
152 rcn = (long)(prc210_no[rc>>1]);
153 if (rcn != NPRC) break;
154 rc -= 2; /* cannot wrap since 1 is coprime and rc odd */
155 }
156 if (rc < rc0) n += rc - rc0;
157 /* now find an actual (pseudo)prime */
158 for(;;)
159 {
160 if (uisprime(n)) break;
161 if (--rcn < 0) rcn = 47;
162 n -= prc210_d1[rcn];
163 }
164 return n;
165 }
166
167 GEN
precprime(GEN n)168 precprime(GEN n)
169 {
170 long rc, rc0, rcn;
171 pari_sp av = avma;
172
173 if (typ(n) != t_INT)
174 {
175 n = gfloor(n);
176 if (typ(n) != t_INT) pari_err_TYPE("nextprime",n);
177 }
178 if (signe(n) <= 0) { set_avma(av); return gen_0; }
179 if (lgefint(n) <= 3)
180 {
181 ulong k = uel(n,2);
182 set_avma(av);
183 return utoi(uprecprime(k));
184 }
185 if (!mod2(n)) n = subiu(n,1);
186 rc = rc0 = umodiu(n, 210);
187 /* find previous prime residue class mod 210 */
188 for(;;)
189 {
190 rcn = (long)(prc210_no[rc>>1]);
191 if (rcn != NPRC) break;
192 rc -= 2; /* cannot wrap since 1 is coprime and rc odd */
193 }
194 if (rc0 > rc) n = subiu(n, rc0 - rc);
195 /* now find an actual (pseudo)prime */
196 for(;;)
197 {
198 if (BPSW_psp(n)) break;
199 if (--rcn < 0) rcn = 47;
200 n = subiu(n, prc210_d1[rcn]);
201 }
202 if (avma == av) return icopy(n);
203 return gerepileuptoint(av, n);
204 }
205
206 /* Find next single-word prime strictly larger than p.
207 * If **d is non-NULL (somewhere in a diffptr), this is p + *(*d)++;
208 * otherwise imitate nextprime().
209 * *rcn = NPRC or the correct residue class for the current p; we'll use this
210 * to track the current prime residue class mod 210 once we're out of range of
211 * the diffptr table, and we'll update it before that if it isn't NPRC.
212 *
213 * *q is incremented whenever q!=NULL and we wrap from 209 mod 210 to
214 * 1 mod 210 */
215 ulong
snextpr(ulong p,byteptr * d,long * rcn,long * q,int (* ispsp)(ulong))216 snextpr(ulong p, byteptr *d, long *rcn, long *q, int (*ispsp)(ulong))
217 {
218 if (**d)
219 {
220 byteptr dd = *d;
221 long d1 = 0;
222
223 NEXT_PRIME_VIADIFF(d1,dd);
224 /* d1 = nextprime(p+1) - p */
225 if (*rcn != NPRC)
226 {
227 while (d1 > 0)
228 {
229 d1 -= prc210_d1[*rcn];
230 if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
231 }
232 /* assert(d1 == 0) */
233 }
234 NEXT_PRIME_VIADIFF(p,*d);
235 return p;
236 }
237 if (unextprime_overflow(p)) pari_err_OVERFLOW("snextpr");
238 /* we are beyond the diffptr table, initialize if needed */
239 if (*rcn == NPRC) *rcn = prc210_no[(p % 210) >> 1]; /* != NPRC */
240 /* look for the next one */
241 do {
242 p += prc210_d1[*rcn];
243 if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
244 } while (!ispsp(p));
245 return p;
246 }
247
248 /********************************************************************/
249 /** **/
250 /** INTEGER FACTORIZATION **/
251 /** **/
252 /********************************************************************/
253 int factor_add_primes = 0, factor_proven = 0;
254
255 /***********************************************************************/
256 /** **/
257 /** FACTORIZATION (ECM) -- GN Jul-Aug 1998 **/
258 /** Integer factorization using the elliptic curves method (ECM). **/
259 /** ellfacteur() returns a non trivial factor of N, assuming N>0, **/
260 /** is composite, and has no prime divisor below 2^14 or so. **/
261 /** Thanks to Paul Zimmermann for much helpful advice and to **/
262 /** Guillaume Hanrot and Igor Schein for intensive testing **/
263 /** **/
264 /***********************************************************************/
265 #define nbcmax 64 /* max number of simultaneous curves */
266
267 static const ulong TB1[] = {
268 142,172,208,252,305,370,450,545,661,801,972,1180,1430,
269 1735,2100,2550,3090,3745,4540,5505,6675,8090,9810,11900,
270 14420,17490,21200,25700,31160,37780UL,45810UL,55550UL,67350UL,
271 81660UL,99010UL,120050UL,145550UL,176475UL,213970UL,259430UL,
272 314550UL,381380UL,462415UL,560660UL,679780UL,824220UL,999340UL,
273 1211670UL,1469110UL,1781250UL,2159700UL,2618600UL,3175000UL,
274 3849600UL,4667500UL,5659200UL,6861600UL,8319500UL,10087100UL,
275 12230300UL,14828900UL,17979600UL,21799700UL,26431500UL,
276 32047300UL,38856400UL, /* 110 times that still fits into 32bits */
277 #ifdef LONG_IS_64BIT
278 47112200UL,57122100UL,69258800UL,83974200UL,101816200UL,
279 123449000UL,149678200UL,181480300UL,220039400UL,266791100UL,
280 323476100UL,392204900UL,475536500UL,576573500UL,699077800UL,
281 847610500UL,1027701900UL,1246057200UL,1510806400UL,1831806700UL,
282 2221009800UL,2692906700UL,3265067200UL,3958794400UL,4799917500UL
283 #endif
284 };
285 static const ulong TB1_for_stage[] = {
286 /* Start below the optimal B1 for finding factors which would just have been
287 * missed by pollardbrent(), and escalate, changing curves to give good
288 * coverage of the small factor ranges. Entries grow faster than what would
289 * be optimal but a table instead of a 2D array keeps the code simple */
290 500,520,560,620,700,800,900,1000,1150,1300,1450,1600,1800,2000,
291 2200,2450,2700,2950,3250,3600,4000,4400,4850,5300,5800,6400,
292 7100,7850,8700,9600,10600,11700,12900,14200,15700,17300,
293 19000,21000,23200,25500,28000,31000,34500UL,38500UL,43000UL,
294 48000UL,53800UL,60400UL,67750UL,76000UL,85300UL,95700UL,
295 107400UL,120500UL,135400UL,152000UL,170800UL,191800UL,215400UL,
296 241800UL,271400UL,304500UL,341500UL,383100UL,429700UL,481900UL,
297 540400UL,606000UL,679500UL,761800UL,854100UL,957500UL,1073500UL
298 };
299
300 /* addition/doubling/multiplication of a point on an 'elliptic curve mod N'
301 * may result in one of three things:
302 * - a new bona fide point
303 * - a point at infinity (denominator divisible by N)
304 * - a point at infinity mod some p | N but finite mod q | N betraying itself
305 * by a denominator which has nontrivial gcd with N.
306 *
307 * In the second case, addition/doubling aborts, copying one of the summands
308 * to the destination array of points unless they coincide.
309 * Multiplication will stop at some unpredictable intermediate stage: The
310 * destination will contain _some_ multiple of the input point, but not
311 * necessarily the desired one, which doesn't matter. As long as we're
312 * multiplying (B1 phase) we simply carry on with the next multiplier.
313 * During the B2 phase, the only additions are the giant steps, and the
314 * worst that can happen here is that we lose one residue class mod 210
315 * of prime multipliers on 4 of the curves, so again, we ignore the problem
316 * and just carry on.)
317 *
318 * Idea: select nbc curves mod N and one point P on each of them. For each
319 * such P, compute [M]P = Q where M is the product of all powers <= B2 of
320 * primes <= nextprime(B1). Then check whether [p]Q for p < nextprime(B2)
321 * betrays a factor. This second stage looks separately at the primes in
322 * each residue class mod 210, four curves at a time, and steps additively
323 * to ever larger multipliers, by comparing X coordinates of points which we
324 * would need to add in order to reach another prime multiplier in the same
325 * residue class. 'Comparing' means that we accumulate a product of
326 * differences of X coordinates, and from time to time take a gcd of this
327 * product with N. Montgomery's multi-inverse trick is used heavily. */
328
329 /* *** auxiliary functions for ellfacteur: *** */
330 /* (Rx,Ry) <- (Px,Py)+(Qx,Qy) over Z/NZ, z=1/(Px-Qx). If Ry = NULL, don't set */
331 static void
FpE_add_i(GEN N,GEN z,GEN Px,GEN Py,GEN Qx,GEN Qy,GEN * Rx,GEN * Ry)332 FpE_add_i(GEN N, GEN z, GEN Px, GEN Py, GEN Qx, GEN Qy, GEN *Rx, GEN *Ry)
333 {
334 GEN slope = modii(mulii(subii(Py, Qy), z), N);
335 GEN t = subii(sqri(slope), addii(Qx, Px));
336 affii(modii(t, N), *Rx);
337 if (Ry) {
338 t = subii(mulii(slope, subii(Px, *Rx)), Py);
339 affii(modii(t, N), *Ry);
340 }
341 }
342 /* X -> Z; cannot add on one of the curves: make sure Z contains
343 * something useful before letting caller proceed */
344 static void
ZV_aff(long n,GEN * X,GEN * Z)345 ZV_aff(long n, GEN *X, GEN *Z)
346 {
347 if (X != Z) {
348 long k;
349 for (k = n; k--; ) affii(X[k],Z[k]);
350 }
351 }
352
353 /* Parallel addition on nbc curves, assigning the result to locations at and
354 * following *X3, *Y3. (If Y-coords of result not desired, set Y=NULL.)
355 * Safe even if (X3,Y3) = (X2,Y2), _not_ if (X1,Y1). It is also safe to
356 * overwrite Y2 with X3. If nbc1 < nbc, the first summand is
357 * assumed to hold only nbc1 distinct points, repeated as often as we need
358 * them (to add one point on each of a few curves to several other points on
359 * the same curves): only used with nbc1 = nbc or nbc1 = 4 | nbc.
360 *
361 * Return 0 [SUCCESS], 1 [N | den], 2 [gcd(den, N) is a factor of N, preserved
362 * in gl.
363 * Stack space is bounded by a constant multiple of lgefint(N)*nbc:
364 * - Phase 2 creates 12 items on the stack per iteration, of which 4 are twice
365 * as long and 1 is thrice as long as N, i.e. 18 units per iteration.
366 * - Phase 1 creates 4 units.
367 * Total can be as large as 4*nbcmax + 18*8 units; ecm_elladd2() is
368 * just as bad, and elldouble() comes to 3*nbcmax + 29*8 units. */
369 static int
ecm_elladd0(GEN N,GEN * gl,long nbc,long nbc1,GEN * X1,GEN * Y1,GEN * X2,GEN * Y2,GEN * X3,GEN * Y3)370 ecm_elladd0(GEN N, GEN *gl, long nbc, long nbc1,
371 GEN *X1, GEN *Y1, GEN *X2, GEN *Y2, GEN *X3, GEN *Y3)
372 {
373 const ulong mask = (nbc1 == 4)? 3: ~0UL; /*nbc1 = 4 or nbc*/
374 GEN W[2*nbcmax], *A = W+nbc; /* W[0],A[0] unused */
375 long i;
376 pari_sp av = avma;
377
378 W[1] = subii(X1[0], X2[0]);
379 for (i=1; i<nbc; i++)
380 { /*prepare for multi-inverse*/
381 A[i] = subii(X1[i&mask], X2[i]); /* don't waste time reducing mod N */
382 W[i+1] = modii(mulii(A[i], W[i]), N);
383 }
384 if (!invmod(W[nbc], N, gl))
385 {
386 if (!equalii(N,*gl)) return 2;
387 ZV_aff(nbc, X2,X3);
388 if (Y3) ZV_aff(nbc, Y2,Y3);
389 return gc_int(av,1);
390 }
391
392 while (i--) /* nbc times */
393 {
394 pari_sp av2 = avma;
395 GEN Px = X1[i&mask], Py = Y1[i&mask], Qx = X2[i], Qy = Y2[i];
396 GEN z = i? mulii(*gl,W[i]): *gl; /*1/(Px-Qx)*/
397 FpE_add_i(N,z, Px,Py,Qx,Qy, X3+i, Y3? Y3+i: NULL);
398 if (!i) break;
399 set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
400 }
401 return gc_int(av,0);
402 }
403
404 /* Shortcut, for use in cases where Y coordinates follow their corresponding
405 * X coordinates, and first summand doesn't need to be repeated */
406 static int
ecm_elladd(GEN N,GEN * gl,long nbc,GEN * X1,GEN * X2,GEN * X3)407 ecm_elladd(GEN N, GEN *gl, long nbc, GEN *X1, GEN *X2, GEN *X3) {
408 return ecm_elladd0(N, gl, nbc, nbc, X1, X1+nbc, X2, X2+nbc, X3, X3+nbc);
409 }
410
411 /* As ecm_elladd except it does twice as many additions (and hides even more
412 * of the cost of the modular inverse); the net effect is the same as
413 * ecm_elladd(nbc,X1,X2,X3) && ecm_elladd(nbc,X4,X5,X6). Safe to
414 * have X2=X3, X5=X6, or X1,X2 coincide with X4,X5 in any order. */
415 static int
ecm_elladd2(GEN N,GEN * gl,long nbc,GEN * X1,GEN * X2,GEN * X3,GEN * X4,GEN * X5,GEN * X6)416 ecm_elladd2(GEN N, GEN *gl, long nbc,
417 GEN *X1, GEN *X2, GEN *X3, GEN *X4, GEN *X5, GEN *X6)
418 {
419 GEN *Y1 = X1+nbc, *Y2 = X2+nbc, *Y3 = X3+nbc;
420 GEN *Y4 = X4+nbc, *Y5 = X5+nbc, *Y6 = X6+nbc;
421 GEN W[4*nbcmax], *A = W+2*nbc; /* W[0],A[0] unused */
422 long i, j;
423 pari_sp av = avma;
424
425 W[1] = subii(X1[0], X2[0]);
426 for (i=1; i<nbc; i++)
427 {
428 A[i] = subii(X1[i], X2[i]); /* don't waste time reducing mod N here */
429 W[i+1] = modii(mulii(A[i], W[i]), N);
430 }
431 for (j=0; j<nbc; i++,j++)
432 {
433 A[i] = subii(X4[j], X5[j]);
434 W[i+1] = modii(mulii(A[i], W[i]), N);
435 }
436 if (!invmod(W[2*nbc], N, gl))
437 {
438 if (!equalii(N,*gl)) return 2;
439 ZV_aff(2*nbc, X2,X3); /* hack: 2*nbc => copy Y2->Y3 */
440 ZV_aff(2*nbc, X5,X6); /* also copy Y5->Y6 */
441 return gc_int(av,1);
442 }
443
444 while (j--) /* nbc times */
445 {
446 pari_sp av2 = avma;
447 GEN Px = X4[j], Py = Y4[j], Qx = X5[j], Qy = Y5[j];
448 GEN z = mulii(*gl,W[--i]); /*1/(Px-Qx)*/
449 FpE_add_i(N,z, Px,Py, Qx,Qy, X6+j,Y6+j);
450 set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
451 }
452 while (i--) /* nbc times */
453 {
454 pari_sp av2 = avma;
455 GEN Px = X1[i], Py = Y1[i], Qx = X2[i], Qy = Y2[i];
456 GEN z = i? mulii(*gl, W[i]): *gl; /*1/(Px-Qx)*/
457 FpE_add_i(N,z, Px,Py, Qx,Qy, X3+i,Y3+i);
458 if (!i) break;
459 set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
460 }
461 return gc_int(av,0);
462 }
463
464 /* Parallel doubling on nbc curves, assigning the result to locations at
465 * and following *X2. Safe to be called with X2 equal to X1. Return
466 * value as for ecm_elladd. If we find a point at infinity mod N,
467 * and if X1 != X2, we copy the points at X1 to X2. */
468 static int
elldouble(GEN N,GEN * gl,long nbc,GEN * X1,GEN * X2)469 elldouble(GEN N, GEN *gl, long nbc, GEN *X1, GEN *X2)
470 {
471 GEN *Y1 = X1+nbc, *Y2 = X2+nbc;
472 GEN W[nbcmax+1]; /* W[0] unused */
473 long i;
474 pari_sp av = avma;
475 /*W[0] = gen_1;*/ W[1] = Y1[0];
476 for (i=1; i<nbc; i++) W[i+1] = modii(mulii(Y1[i], W[i]), N);
477 if (!invmod(W[nbc], N, gl))
478 {
479 if (!equalii(N,*gl)) return 2;
480 ZV_aff(2*nbc,X1,X2); /* also copies Y1->Y2 */
481 return gc_int(av,1);
482 }
483 while (i--) /* nbc times */
484 {
485 pari_sp av2;
486 GEN v, w, L, z = i? mulii(*gl,W[i]): *gl;
487 if (i) *gl = modii(mulii(*gl, Y1[i]), N);
488 av2 = avma;
489 L = modii(mulii(addui(1, mului(3, Fp_sqr(X1[i],N))), z), N);
490 if (signe(L)) /* half of zero is still zero */
491 L = shifti(mod2(L)? addii(L, N): L, -1);
492 v = modii(subii(sqri(L), shifti(X1[i],1)), N);
493 w = modii(subii(mulii(L, subii(X1[i], v)), Y1[i]), N);
494 affii(v, X2[i]);
495 affii(w, Y2[i]);
496 set_avma(av2);
497 }
498 return gc_int(av,0);
499 }
500
501 /* Parallel multiplication by an odd prime k on nbc curves, storing the
502 * result to locations at and following *X2. Safe to be called with X2 = X1.
503 * Return values as ecm_elladd. Uses (a simplified variant of) Montgomery's
504 * PRAC algorithm; see ftp://ftp.cwi.nl/pub/pmontgom/Lucas.ps.gz .
505 * With thanks to Paul Zimmermann for the reference. --GN1998Aug13 */
506 static int
get_rule(ulong d,ulong e)507 get_rule(ulong d, ulong e)
508 {
509 if (d <= e + (e>>2)) /* floor(1.25*e) */
510 {
511 if ((d+e)%3 == 0) return 0; /* rule 1 */
512 if ((d-e)%6 == 0) return 1; /* rule 2 */
513 }
514 /* d <= 4*e but no ofl */
515 if ((d+3)>>2 <= e) return 2; /* rule 3, common case */
516 if ((d&1)==(e&1)) return 1; /* rule 4 = rule 2 */
517 if (!(d&1)) return 3; /* rule 5 */
518 if (d%3 == 0) return 4; /* rule 6 */
519 if ((d+e)%3 == 0) return 5; /* rule 7 */
520 if ((d-e)%3 == 0) return 6; /* rule 8 */
521 /* when we get here, e is even, otherwise one of rules 4,5 would apply */
522 return 7; /* rule 9 */
523 }
524
525 /* PRAC implementation notes - main changes against the paper version:
526 * (1) The general function [m+n]P = f([m]P,[n]P,[m-n]P) collapses (for m!=n)
527 * to an ecm_elladd() which does not depend on the third argument; thus
528 * references to the third variable (C in the paper) can be eliminated.
529 * (2) Since our multipliers are prime, the outer loop of the paper
530 * version executes only once, and thus is invisible above.
531 * (3) The first step in the inner loop of the paper version will always be
532 * rule 3, but the addition requested by this rule amounts to a doubling, and
533 * will always be followed by a swap, so we have unrolled this first iteration.
534 * (4) Simplifications in rules 6 and 7 are possible given the above, and we
535 * save one addition in each of the two cases. NB none of the other
536 * ecm_elladd()s in the loop can ever degenerate into an elldouble.
537 * (5) I tried to optimize for rule 3, which is used more frequently than all
538 * others together, but it didn't improve things, so I removed the nested
539 * tight loop again. --GN */
540 /* The main loop body of ellfacteur() runs _slower_ under PRAC than under a
541 * straightforward left-shift binary multiplication when N has <30 digits and
542 * B1 is small; PRAC wins when N and B1 get larger. Weird. --GN */
543 /* k>2 assumed prime, XAUX = scratchpad */
544 static int
ellmult(GEN N,GEN * gl,long nbc,ulong k,GEN * X1,GEN * X2,GEN * XAUX)545 ellmult(GEN N, GEN *gl, long nbc, ulong k, GEN *X1, GEN *X2, GEN *XAUX)
546 {
547 ulong r, d, e, e1;
548 int res;
549 GEN *A = X2, *B = XAUX, *T = XAUX + 2*nbc;
550
551 ZV_aff(2*nbc,X1,XAUX);
552 /* first doubling picks up X1; after this we'll be working in XAUX and
553 * X2 only, mostly via A and B and T */
554 if ((res = elldouble(N, gl, nbc, X1, X2)) != 0) return res;
555
556 /* split the work at the golden ratio */
557 r = (ulong)(k*0.61803398875 + .5);
558 d = k - r;
559 e = r - d; /* d+e == r, so no danger of ofl below */
560 while (d != e)
561 { /* apply one of the nine transformations from PM's Table 4. */
562 switch(get_rule(d,e))
563 {
564 case 0: /* rule 1 */
565 if ( (res = ecm_elladd(N, gl, nbc, A, B, T)) ) return res;
566 if ( (res = ecm_elladd2(N, gl, nbc, T, A, A, T, B, B)) != 0) return res;
567 e1 = d - e; d = (d + e1)/3; e = (e - e1)/3; break;
568 case 1: /* rules 2 and 4 */
569 if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
570 if ( (res = elldouble(N, gl, nbc, A, A)) ) return res;
571 d = (d-e)>>1; break;
572 case 3: /* rule 5 */
573 if ( (res = elldouble(N, gl, nbc, A, A)) ) return res;
574 d >>= 1; break;
575 case 4: /* rule 6 */
576 if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
577 if ( (res = ecm_elladd(N, gl, nbc, T, A, A)) ) return res;
578 if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
579 d = d/3 - e; break;
580 case 2: /* rule 3 */
581 if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
582 d -= e; break;
583 case 5: /* rule 7 */
584 if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
585 if ( (res = ecm_elladd2(N, gl, nbc, T, A, A, T, B, B)) != 0) return res;
586 d = (d - 2*e)/3; break;
587 case 6: /* rule 8 */
588 if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
589 if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
590 if ( (res = ecm_elladd(N, gl, nbc, T, A, A)) ) return res;
591 d = (d - e)/3; break;
592 case 7: /* rule 9 */
593 if ( (res = elldouble(N, gl, nbc, B, B)) ) return res;
594 e >>= 1; break;
595 }
596 /* swap d <-> e and A <-> B if necessary */
597 if (d < e) { lswap(d,e); pswap(A,B); }
598 }
599 return ecm_elladd(N, gl, nbc, XAUX, X2, X2);
600 }
601
602 struct ECM {
603 pari_timer T;
604 long nbc, nbc2, seed;
605 GEN *X, *XAUX, *XT, *XD, *XB, *XB2, *XH, *Xh, *Yh;
606 };
607
608 /* memory layout in ellfacteur(): a large array of GEN pointers, and one
609 * huge chunk of memory containing all the actual GEN (t_INT) objects.
610 * nbc is constant throughout the invocation:
611 * - The B1 stage of each iteration through the main loop needs little
612 * space: enough for the X and Y coordinates of the current points,
613 * and twice as much again as scratchpad for ellmult().
614 * - The B2 stage, starting from some current set of points Q, needs, in
615 * succession:
616 * + space for [2]Q, [4]Q, ..., [10]Q, and [p]Q for building the helix;
617 * + space for 48*nbc X and Y coordinates to hold the helix. This could
618 * re-use [2]Q,...,[8]Q, but only with difficulty, since we don't
619 * know in advance which residue class mod 210 our p is going to be in.
620 * It can and should re-use [p]Q, though;
621 * + space for (temporarily [30]Q and then) [210]Q, [420]Q, and several
622 * further doublings until the giant step multiplier is reached. This
623 * can re-use the remaining cells from above. The computation of [210]Q
624 * will have been the last call to ellmult() within this iteration of the
625 * main loop, so the scratchpad is now also free to be re-used. We also
626 * compute [630]Q by a parallel addition; we'll need it later to get the
627 * baby-step table bootstrapped a little faster.
628 * + Finally, for no more than 4 curves at a time, room for up to 1024 X
629 * coordinates only: the Y coordinates needed whilst setting up this baby
630 * step table are temporarily stored in the upper half, and overwritten
631 * during the last series of additions.
632 *
633 * Graphically: after end of B1 stage (X,Y are the coords of Q):
634 * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
635 * | X Y | scratch | [2]Q| [4]Q| [6]Q| [8]Q|[10]Q| ... | ...
636 * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
637 * *X *XAUX *XT *XD *XB
638 *
639 * [30]Q is computed from [10]Q. [210]Q can go into XY, etc:
640 * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
641 * |[210]|[420]|[630]|[840]|[1680,3360,6720,...,2048*210] |bstp table...
642 * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
643 * *X *XAUX *XT *XD [*XG, somewhere here] *XB .... *XH
644 *
645 * So we need (13 + 48) * 2 * nbc slots here + 4096 slots for the baby step
646 * table (not all of which will be used when we start with a small B1, but
647 * better to allocate and initialize ahead of time all the slots that might
648 * be needed later).
649 *
650 * Note on memory locality: During the B2 phase, accesses to the helix
651 * (once it is set up) will be clustered by curves (4 out of nbc at a time).
652 * Accesses to the baby steps table will wander from one end of the array to
653 * the other and back, one such cycle per giant step, and during a full cycle
654 * we would expect on the order of 2E4 accesses when using the largest giant
655 * step size. Thus we shouldn't be doing too bad with respect to thrashing
656 * a 512KBy L2 cache. However, we don't want the baby step table to grow
657 * larger than this, even if it would reduce the number of EC operations by a
658 * few more per cent for very large B2, lest cache thrashing slow down
659 * everything disproportionally. --GN */
660 /* Auxiliary routines need < (3*nbc+240)*lN words on the PARI stack, in
661 * addition to the spc*(lN+1) words occupied by our main table. */
662 static void
ECM_alloc(struct ECM * E,long lN)663 ECM_alloc(struct ECM *E, long lN)
664 {
665 const long bstpmax = 1024; /* max number of baby step table entries */
666 long spc = (13 + 48) * E->nbc2 + bstpmax * 4;
667 long len = spc + 385 + spc*lN;
668 long i, tw = evallg(lN) | evaltyp(t_INT);
669 GEN w, *X = (GEN*)new_chunk(len);
670 /* hack for X[i] = cgeti(lN). X = current point in B1 phase */
671 w = (GEN)(X + spc + 385);
672 for (i = spc-1; i >= 0; i--) { X[i] = w; *w = tw; w += lN; }
673 E->X = X;
674 E->XAUX = E->X + E->nbc2; /* scratchpad for ellmult() */
675 E->XT = E->XAUX + E->nbc2; /* ditto, will later hold [3*210]Q */
676 E->XD = E->XT + E->nbc2; /* room for various multiples */
677 E->XB = E->XD + 10*E->nbc2; /* start of baby steps table */
678 E->XB2 = E->XB + 2 * bstpmax; /* middle of baby steps table */
679 E->XH = E->XB2 + 2 * bstpmax; /* end of bstps table, start of helix */
680 E->Xh = E->XH + 48*E->nbc2; /* little helix, X coords */
681 E->Yh = E->XH + 192; /* ditto, Y coords */
682 /* XG,YG set inside the main loop, since they depend on B2 */
683 /* E.Xh range of 384 pointers not set; these will later duplicate the pointers
684 * in the E.XH range, 4 curves at a time. Some of the cells reserved here for
685 * the E.XB range will never be used, instead, we'll warp the pointers to
686 * connect to (read-only) GENs in the X/E.XD range */
687 }
688 /* N.B. E->seed is not initialized here */
689 static void
ECM_init(struct ECM * E,GEN N,long nbc)690 ECM_init(struct ECM *E, GEN N, long nbc)
691 {
692 if (nbc < 0)
693 { /* choose a sensible default */
694 const long size = expi(N) + 1;
695 nbc = ((size >> 3) << 2) - 80;
696 if (nbc < 8) nbc = 8;
697 }
698 if (nbc > nbcmax) nbc = nbcmax;
699 E->nbc = nbc;
700 E->nbc2 = nbc << 1;
701 ECM_alloc(E, lgefint(N));
702 }
703
704 static GEN
ECM_loop(struct ECM * E,GEN N,ulong B1)705 ECM_loop(struct ECM *E, GEN N, ulong B1)
706 {
707 const ulong B2 = 110 * B1, B2_rt = usqrt(B2);
708 const ulong nbc = E->nbc, nbc2 = E->nbc2;
709 pari_sp av1, avtmp;
710 byteptr d0, d = diffptr;
711 long i, gse, gss, bstp, bstp0, rcn0, rcn;
712 ulong B2_p, m, p, p0;
713 GEN g, *XG, *YG;
714 GEN *X = E->X, *XAUX = E->XAUX, *XT = E->XT, *XD = E->XD;
715 GEN *XB = E->XB, *XB2 = E->XB2, *XH = E->XH, *Xh = E->Xh, *Yh = E->Yh;
716 /* pick curves */
717 for (i = nbc2; i--; ) affui(E->seed++, X[i]);
718 /* pick giant step exponent and size */
719 gse = B1 < 656
720 ? (B1 < 200? 5: 6)
721 : (B1 < 10500
722 ? (B1 < 2625? 7: 8)
723 : (B1 < 42000? 9: 10));
724 gss = 1UL << gse;
725 /* With 32 baby steps, a giant step corresponds to 32*420 = 13440,
726 * appropriate for the smallest B2s. With 1024, a giant step will be 430080;
727 * appropriate for B1 >~ 42000, where 512 baby steps would imply roughly
728 * the same number of curve additions. */
729 XG = XT + gse*nbc2; /* will later hold [2^(gse+1)*210]Q */
730 YG = XG + nbc;
731
732 if (DEBUGLEVEL >= 4) {
733 err_printf("ECM: time = %6ld ms\nECM: B1 = %4lu,", timer_delay(&E->T), B1);
734 err_printf("\tB2 = %6lu,\tgss = %4ld*420\n", B2, gss);
735 }
736 p = 0;
737 NEXT_PRIME_VIADIFF(p,d);
738
739 /* ---B1 PHASE--- */
740 /* treat p=2 separately */
741 B2_p = B2 >> 1;
742 for (m=1; m<=B2_p; m<<=1)
743 {
744 int fl = elldouble(N, &g, nbc, X, X);
745 if (fl > 1) return g; else if (fl) break;
746 }
747 rcn = NPRC; /* multipliers begin at the beginning */
748 /* p=3,...,nextprime(B1) */
749 while (p < B1 && p <= B2_rt)
750 {
751 pari_sp av2 = avma;
752 p = snextpr(p, &d, &rcn, NULL, uisprime);
753 B2_p = B2/p; /* beware integer overflow on 32-bit CPUs */
754 for (m=1; m<=B2_p; m*=p)
755 {
756 int fl = ellmult(N, &g, nbc, p, X, X, XAUX);
757 if (fl > 1) return g; else if (fl) break;
758 set_avma(av2);
759 }
760 set_avma(av2);
761 }
762 /* primes p larger than sqrt(B2) appear only to the 1st power */
763 while (p < B1)
764 {
765 pari_sp av2 = avma;
766 p = snextpr(p, &d, &rcn, NULL, uisprime);
767 if (ellmult(N, &g, nbc, p, X, X, XAUX) > 1) return g;
768 set_avma(av2);
769 }
770 if (DEBUGLEVEL >= 4) {
771 err_printf("ECM: time = %6ld ms, B1 phase done, ", timer_delay(&E->T));
772 err_printf("p = %lu, setting up for B2\n", p);
773 }
774
775 /* ---B2 PHASE--- */
776 /* compute [2]Q,...,[10]Q, needed to build the helix */
777 if (elldouble(N, &g, nbc, X, XD) > 1) return g; /*[2]Q*/
778 if (elldouble(N, &g, nbc, XD, XD + nbc2) > 1) return g; /*[4]Q*/
779 if (ecm_elladd(N, &g, nbc,
780 XD, XD + nbc2, XD + (nbc<<2)) > 1) return g; /* [6]Q */
781 if (ecm_elladd2(N, &g, nbc,
782 XD, XD + (nbc<<2), XT + (nbc<<3),
783 XD + nbc2, XD + (nbc<<2), XD + (nbc<<3)) > 1)
784 return g; /* [8]Q and [10]Q */
785 if (DEBUGLEVEL >= 7) err_printf("\t(got [2]Q...[10]Q)\n");
786
787 /* get next prime (still using the foolproof test) */
788 p = snextpr(p, &d, &rcn, NULL, uisprime);
789 /* make sure we have the residue class number (mod 210) */
790 if (rcn == NPRC)
791 {
792 rcn = prc210_no[(p % 210) >> 1];
793 if (rcn == NPRC)
794 {
795 err_printf("ECM: %lu should have been prime but isn\'t\n", p);
796 pari_err_BUG("ellfacteur");
797 }
798 }
799
800 /* compute [p]Q and put it into its place in the helix */
801 if (ellmult(N, &g, nbc, p, X, XH + rcn*nbc2, XAUX) > 1)
802 return g;
803 if (DEBUGLEVEL >= 7)
804 err_printf("\t(got [p]Q, p = %lu = prc210_rp[%ld] mod 210)\n", p, rcn);
805
806 /* save current p, d, and rcn; we'll need them more than once below */
807 p0 = p;
808 d0 = d;
809 rcn0 = rcn; /* remember where the helix wraps */
810 bstp0 = 0; /* p is at baby-step offset 0 from itself */
811
812 /* fill up the helix, stepping forward through the prime residue classes
813 * mod 210 until we're back at the r'class of p0. Keep updating p so
814 * that we can print meaningful diagnostics if a factor shows up; don't
815 * bother checking which of these p's are in fact prime */
816 for (i = 47; i; i--) /* 47 iterations */
817 {
818 ulong dp = (ulong)prc210_d1[rcn];
819 p += dp;
820 if (rcn == 47)
821 { /* wrap mod 210 */
822 if (ecm_elladd(N, &g, nbc, XT+dp*nbc, XH+rcn*nbc2, XH) > 1) return g;
823 rcn = 0; continue;
824 }
825 if (ecm_elladd(N, &g, nbc, XT+dp*nbc, XH+rcn*nbc2, XH+rcn*nbc2+nbc2) > 1)
826 return g;
827 rcn++;
828 }
829 if (DEBUGLEVEL >= 7) err_printf("\t(got initial helix)\n");
830 /* compute [210]Q etc, needed for the baby step table */
831 if (ellmult(N, &g, nbc, 3, XD + (nbc<<3), X, XAUX) > 1) return g;
832 if (ellmult(N, &g, nbc, 7, X, X, XAUX) > 1) return g; /* [210]Q */
833 /* this was the last call to ellmult() in the main loop body; may now
834 * overwrite XAUX and slots XD and following */
835 if (elldouble(N, &g, nbc, X, XAUX) > 1) return g; /* [420]Q */
836 if (ecm_elladd(N, &g, nbc, X, XAUX, XT) > 1) return g;/*[630]Q*/
837 if (ecm_elladd(N, &g, nbc, X, XT, XD) > 1) return g; /*[840]Q*/
838 for (i=1; i <= gse; i++)
839 if (elldouble(N, &g, nbc, XT + i*nbc2, XD + i*nbc2) > 1) return g;
840 /* (the last iteration has initialized XG to [210*2^(gse+1)]Q) */
841
842 if (DEBUGLEVEL >= 4)
843 err_printf("ECM: time = %6ld ms, entering B2 phase, p = %lu\n",
844 timer_delay(&E->T), p);
845
846 for (i = nbc - 4; i >= 0; i -= 4)
847 { /* loop over small sets of 4 curves at a time */
848 GEN *Xb;
849 long j, k;
850 if (DEBUGLEVEL >= 6)
851 err_printf("ECM: finishing curves %ld...%ld\n", i, i+3);
852 /* Copy relevant pointers from XH to Xh. Memory layout in XH:
853 * nbc X coordinates, nbc Y coordinates for residue class
854 * 1 mod 210, then the same for r.c. 11 mod 210, etc. Memory layout for
855 * Xh is: four X coords for 1 mod 210, four for 11 mod 210, ..., four
856 * for 209 mod 210, then the corresponding Y coordinates in the same
857 * order. This allows a giant step on Xh using just three calls to
858 * ecm_elladd0() each acting on 64 points in parallel */
859 for (j = 48; j--; )
860 {
861 k = nbc2*j + i;
862 m = j << 2; /* X coordinates */
863 Xh[m] = XH[k]; Xh[m+1] = XH[k+1];
864 Xh[m+2] = XH[k+2]; Xh[m+3] = XH[k+3];
865 k += nbc; /* Y coordinates */
866 Yh[m] = XH[k]; Yh[m+1] = XH[k+1];
867 Yh[m+2] = XH[k+2]; Yh[m+3] = XH[k+3];
868 }
869 /* Build baby step table of X coords of multiples of [210]Q. XB[4*j]
870 * will point at X coords on four curves from [(j+1)*210]Q. Until
871 * we're done, we need some Y coords as well, which we keep in the
872 * second half of the table, overwriting them at the end when gse=10.
873 * Multiples which we already have (by 1,2,3,4,8,16,...,2^gse) are
874 * entered simply by copying the pointers, ignoring the few slots in w
875 * that were initially reserved for them. Here are the initial entries */
876 for (Xb=XB,k=2,j=i; k--; Xb=XB2,j+=nbc) /* first X, then Y coords */
877 {
878 Xb[0] = X[j]; Xb[1] = X[j+1]; /* [210]Q */
879 Xb[2] = X[j+2]; Xb[3] = X[j+3];
880 Xb[4] = XAUX[j]; Xb[5] = XAUX[j+1]; /* [420]Q */
881 Xb[6] = XAUX[j+2]; Xb[7] = XAUX[j+3];
882 Xb[8] = XT[j]; Xb[9] = XT[j+1]; /* [630]Q */
883 Xb[10] = XT[j+2]; Xb[11] = XT[j+3];
884 Xb += 4; /* points at [420]Q */
885 /* ... entries at powers of 2 times 210 .... */
886 for (m = 2; m < (ulong)gse+k; m++) /* omit Y coords of [2^gse*210]Q */
887 {
888 long m2 = m*nbc2 + j;
889 Xb += (2UL<<m); /* points at [2^m*210]Q */
890 Xb[0] = XAUX[m2]; Xb[1] = XAUX[m2+1];
891 Xb[2] = XAUX[m2+2]; Xb[3] = XAUX[m2+3];
892 }
893 }
894 if (DEBUGLEVEL >= 7)
895 err_printf("\t(extracted precomputed helix / baby step entries)\n");
896 /* ... glue in between, up to 16*210 ... */
897 if (ecm_elladd0(N, &g, 12, 4, /* 12 pts + (4 pts replicated thrice) */
898 XB + 12, XB2 + 12,
899 XB, XB2,
900 XB + 16, XB2 + 16) > 1) return g; /*4+{1,2,3} = {5,6,7}*/
901 if (ecm_elladd0(N, &g, 28, 4, /* 28 pts + (4 pts replicated 7fold) */
902 XB + 28, XB2 + 28,
903 XB, XB2,
904 XB + 32, XB2 + 32) > 1) return g;/*8+{1...7} = {9...15}*/
905 /* ... and the remainder of the lot */
906 for (m = 5; m <= (ulong)gse; m++)
907 { /* fill in from 2^(m-1)+1 to 2^m-1 in chunks of 64 and 60 points */
908 ulong m2 = 2UL << m; /* will point at 2^(m-1)+1 */
909 for (j = 0; (ulong)j < m2-64; j+=64) /* executed 0 times when m = 5 */
910 {
911 if (ecm_elladd0(N, &g, 64, 4,
912 XB + m2-4, XB2 + m2-4,
913 XB + j, XB2 + j,
914 XB + m2+j, (m<(ulong)gse? XB2+m2+j: NULL)) > 1)
915 return g;
916 } /* j = m2-64 here, 60 points left */
917 if (ecm_elladd0(N, &g, 60, 4,
918 XB + m2-4, XB2 + m2-4,
919 XB + j, XB2 + j,
920 XB + m2+j, (m<(ulong)gse? XB2+m2+j: NULL)) > 1)
921 return g;
922 /* when m=gse, drop Y coords of result, and when both equal 1024,
923 * overwrite Y coords of second argument with X coords of result */
924 }
925 if (DEBUGLEVEL >= 7) err_printf("\t(baby step table complete)\n");
926 /* initialize a few other things */
927 bstp = bstp0;
928 p = p0; d = d0; rcn = rcn0;
929 g = gen_1; av1 = avma;
930 /* scratchspace for prod (x_i-x_j) */
931 avtmp = (pari_sp)new_chunk(8 * lgefint(N));
932 /* The correct entry in XB to use depends on bstp and on where we are
933 * on the helix. As we skip from prime to prime, bstp is incremented
934 * by snextpr each time we wrap around through residue class number 0
935 * (1 mod 210), but the baby step should not be taken until rcn>=rcn0,
936 * i.e. until we pass again the residue class of p0.
937 *
938 * The correct signed multiplier is thus k = bstp - (rcn < rcn0),
939 * and the offset from XB is four times (|k| - 1). When k=0, we ignore
940 * the current prime: if it had led to a factorization, this
941 * would have been noted during the last giant step, or -- when we
942 * first get here -- whilst initializing the helix. When k > gss,
943 * we must do a giant step and bump bstp back by -2*gss.
944 *
945 * The gcd of the product of X coord differences against N is taken just
946 * before we do a giant step. */
947 while (p < B2)
948 {/* loop over probable primes p0 < p <= nextprime(B2), inserting giant
949 * steps as necessary */
950 p = snextpr(p, &d, &rcn, &bstp, uis2psp); /* next probable prime */
951 /* work out the corresponding baby-step multiplier */
952 k = bstp - (rcn < rcn0 ? 1 : 0);
953 if (k > gss)
954 { /* giant-step time, take gcd */
955 g = gcdii(g, N);
956 if (!is_pm1(g) && !equalii(g, N)) return g;
957 g = gen_1; set_avma(av1);
958 while (k > gss)
959 { /* giant step */
960 if (DEBUGLEVEL >= 7) err_printf("\t(giant step at p = %lu)\n", p);
961 if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
962 Xh, Yh, Xh, Yh) > 1) return g;
963 if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
964 Xh + 64, Yh + 64, Xh + 64, Yh + 64) > 1)
965 return g;
966 if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
967 Xh + 128, Yh + 128, Xh + 128, Yh + 128) > 1)
968 return g;
969 bstp -= (gss << 1);
970 k = bstp - (rcn < rcn0? 1: 0); /* recompute multiplier */
971 }
972 }
973 if (!k) continue; /* point of interest is already in Xh */
974 if (k < 0) k = -k;
975 m = ((ulong)k - 1) << 2;
976 /* accumulate product of differences of X coordinates */
977 j = rcn<<2;
978 avma = avtmp; /* go to garbage zone; don't use set_avma */
979 g = modii(mulii(g, subii(XB[m], Xh[j])), N);
980 g = modii(mulii(g, subii(XB[m+1], Xh[j+1])), N);
981 g = modii(mulii(g, subii(XB[m+2], Xh[j+2])), N);
982 g = mulii(g, subii(XB[m+3], Xh[j+3]));
983 set_avma(av1);
984 g = modii(g, N);
985 }
986 set_avma(av1);
987 }
988 return NULL;
989 }
990
991 /* ellfacteur() tuned to be useful as a first stage before MPQS, especially for
992 * large arguments, when 'insist' is false, and now also for the case when
993 * 'insist' is true, vaguely following suggestions by Paul Zimmermann
994 * (http://www.loria.fr/~zimmerma/records/ecmnet.html). --GN 1998Jul,Aug */
995 static GEN
ellfacteur(GEN N,int insist)996 ellfacteur(GEN N, int insist)
997 {
998 const long size = expi(N) + 1;
999 pari_sp av = avma;
1000 struct ECM E;
1001 long nbc, dsn, dsnmax, rep = 0;
1002 if (insist)
1003 {
1004 const long DSNMAX = numberof(TB1)-1;
1005 dsnmax = (size >> 2) - 10;
1006 if (dsnmax < 0) dsnmax = 0;
1007 else if (dsnmax > DSNMAX) dsnmax = DSNMAX;
1008 E.seed = 1 + (nbcmax<<7)*(size&0xffff); /* seed for choice of curves */
1009
1010 dsn = (size >> 3) - 5;
1011 if (dsn < 0) dsn = 0; else if (dsn > 47) dsn = 47;
1012 /* pick up the torch where noninsistent stage would have given up */
1013 nbc = dsn + (dsn >> 2) + 9; /* 8 or more curves in parallel */
1014 nbc &= ~3; /* 4 | nbc */
1015 }
1016 else
1017 {
1018 dsn = (size - 140) >> 3;
1019 if (dsn < 0)
1020 {
1021 #ifndef __EMX__ /* unless DOS/EMX: MPQS's disk access is abysmally slow */
1022 if (DEBUGLEVEL >= 4)
1023 err_printf("ECM: number too small to justify this stage\n");
1024 return NULL; /* too small, decline the task */
1025 #endif
1026 dsn = 0;
1027 } else if (dsn > 12) dsn = 12;
1028 rep = (size <= 248 ?
1029 (size <= 176 ? (size - 124) >> 4 : (size - 148) >> 3) :
1030 (size - 224) >> 1);
1031 #ifdef __EMX__ /* DOS/EMX: extra rounds (shun MPQS) */
1032 rep += 20;
1033 #endif
1034 dsnmax = 72;
1035 /* Use disjoint sets of curves for non-insist and insist phases; moreover,
1036 * repeated calls acting on factors of the same original number should try
1037 * to use fresh curves. The following achieves this */
1038 E.seed = 1 + (nbcmax<<3)*(size & 0xf);
1039 nbc = -1;
1040 }
1041 ECM_init(&E, N, nbc);
1042 if (DEBUGLEVEL >= 4)
1043 {
1044 timer_start(&E.T);
1045 err_printf("ECM: working on %ld curves at a time; initializing", E.nbc);
1046 if (!insist)
1047 {
1048 if (rep == 1) err_printf(" for one round");
1049 else err_printf(" for up to %ld rounds", rep);
1050 }
1051 err_printf("...\n");
1052 }
1053 if (dsn > dsnmax) dsn = dsnmax;
1054 for(;;)
1055 {
1056 ulong B1 = insist? TB1[dsn]: TB1_for_stage[dsn];
1057 GEN g = ECM_loop(&E, N, B1);
1058 if (g)
1059 {
1060 if (DEBUGLEVEL >= 4)
1061 err_printf("ECM: time = %6ld ms\n\tfound factor = %Ps\n",
1062 timer_delay(&E.T), g);
1063 return gerepilecopy(av, g);
1064 }
1065 if (dsn < dsnmax)
1066 {
1067 if (insist) dsn++;
1068 else { dsn += 2; if (dsn > dsnmax) dsn = dsnmax; }
1069 }
1070 if (!insist && !--rep)
1071 {
1072 if (DEBUGLEVEL >= 4)
1073 err_printf("ECM: time = %6ld ms,\tellfacteur giving up.\n",
1074 timer_delay(&E.T));
1075 return gc_NULL(av);
1076 }
1077 }
1078 }
1079 /* assume rounds >= 1, seed >= 1, B1 <= ULONG_MAX / 110 */
1080 GEN
Z_ECM(GEN N,long rounds,long seed,ulong B1)1081 Z_ECM(GEN N, long rounds, long seed, ulong B1)
1082 {
1083 pari_sp av = avma;
1084 struct ECM E;
1085 long i;
1086 E.seed = seed;
1087 ECM_init(&E, N, -1);
1088 if (DEBUGLEVEL >= 4) timer_start(&E.T);
1089 for (i = rounds; i--; )
1090 {
1091 GEN g = ECM_loop(&E, N, B1);
1092 if (g) return gerepilecopy(av, g);
1093 }
1094 return gc_NULL(av);
1095 }
1096
1097 /***********************************************************************/
1098 /** **/
1099 /** FACTORIZATION (Pollard-Brent rho) --GN1998Jun18-26 **/
1100 /** pollardbrent() returns a nontrivial factor of n, assuming n is **/
1101 /** composite and has no small prime divisor, or NULL if going on **/
1102 /** would take more time than we want to spend. Sometimes it finds **/
1103 /** more than one factor, and returns a structure suitable for **/
1104 /** interpretation by ifac_crack. (Cf Algo 8.5.2 in ACiCNT) **/
1105 /** **/
1106 /***********************************************************************/
1107 #define VALUE(x) gel(x,0)
1108 #define EXPON(x) gel(x,1)
1109 #define CLASS(x) gel(x,2)
1110
1111 INLINE void
INIT(GEN x,GEN v,GEN e,GEN c)1112 INIT(GEN x, GEN v, GEN e, GEN c) {
1113 VALUE(x) = v;
1114 EXPON(x) = e;
1115 CLASS(x) = c;
1116 }
1117 static void
ifac_delete(GEN x)1118 ifac_delete(GEN x) { INIT(x,NULL,NULL,NULL); }
1119
1120 static void
rho_dbg(pari_timer * T,long c,long msg_mask)1121 rho_dbg(pari_timer *T, long c, long msg_mask)
1122 {
1123 if (c & msg_mask) return;
1124 err_printf("Rho: time = %6ld ms,\t%3ld round%s\n",
1125 timer_delay(T), c, (c==1?"":"s"));
1126 }
1127
1128 static void
one_iter(GEN * x,GEN * P,GEN x1,GEN n,long delta)1129 one_iter(GEN *x, GEN *P, GEN x1, GEN n, long delta)
1130 {
1131 *x = addis(remii(sqri(*x), n), delta);
1132 *P = modii(mulii(*P, subii(x1, *x)), n);
1133 }
1134 /* Return NULL when we run out of time, or a single t_INT containing a
1135 * nontrivial factor of n, or a vector of t_INTs, each triple of successive
1136 * entries containing a factor, an exponent (equal to one), and a factor
1137 * class (NULL for unknown or zero for known composite), matching the
1138 * internal representation used by the ifac_*() routines below. Repeated
1139 * factors may arise; the caller will sort the factors anyway. Result
1140 * is not gerepile-able (contains NULL) */
1141 static GEN
pollardbrent_i(GEN n,long size,long c0,long retries)1142 pollardbrent_i(GEN n, long size, long c0, long retries)
1143 {
1144 long tf = lgefint(n), delta, msg_mask, c, k, k1, l;
1145 pari_sp av;
1146 GEN x, x1, y, P, g, g1, res;
1147 pari_timer T;
1148
1149 if (DEBUGLEVEL >= 4) timer_start(&T);
1150 c = c0 << 5; /* 2^5 iterations per round */
1151 msg_mask = (size >= 448? 0x1fff:
1152 (size >= 192? (256L<<((size-128)>>6))-1: 0xff));
1153 y = cgeti(tf);
1154 x1= cgeti(tf);
1155 av = avma;
1156
1157 PB_RETRY:
1158 /* trick to make a 'random' choice determined by n. Don't use x^2+0 or
1159 * x^2-2, ever. Don't use x^2-3 or x^2-7 with a starting value of 2.
1160 * x^2+4, x^2+9 are affine conjugate to x^2+1, so don't use them either.
1161 *
1162 * (the point being that when we get called again on a composite cofactor
1163 * of something we've already seen, we had better avoid the same delta) */
1164 switch ((size + retries) & 7)
1165 {
1166 case 0: delta= 1; break;
1167 case 1: delta= -1; break;
1168 case 2: delta= 3; break;
1169 case 3: delta= 5; break;
1170 case 4: delta= -5; break;
1171 case 5: delta= 7; break;
1172 case 6: delta= 11; break;
1173 /* case 7: */
1174 default: delta=-11; break;
1175 }
1176 if (DEBUGLEVEL >= 4)
1177 {
1178 if (!retries)
1179 err_printf("Rho: searching small factor of %ld-bit integer\n", size);
1180 else
1181 err_printf("Rho: restarting for remaining rounds...\n");
1182 err_printf("Rho: using X^2%+1ld for up to %ld rounds of 32 iterations\n",
1183 delta, c >> 5);
1184 }
1185 x = gen_2; P = gen_1; g1 = NULL; k = 1; l = 1;
1186 affui(2, y);
1187 affui(2, x1);
1188 for (;;) /* terminated under the control of c */
1189 { /* use the polynomial x^2 + delta */
1190 one_iter(&x, &P, x1, n, delta);
1191
1192 if ((--c & 0x1f)==0)
1193 { /* one round complete */
1194 g = gcdii(n, P); if (!is_pm1(g)) goto fin;
1195 if (c <= 0)
1196 { /* getting bored */
1197 if (DEBUGLEVEL >= 4)
1198 err_printf("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1199 timer_delay(&T));
1200 return NULL;
1201 }
1202 P = gen_1;
1203 if (DEBUGLEVEL >= 4) rho_dbg(&T, c0-(c>>5), msg_mask);
1204 affii(x,y); x = y; set_avma(av);
1205 }
1206
1207 if (--k) continue; /* normal end of loop body */
1208
1209 if (c & 0x1f) /* otherwise, we already checked */
1210 {
1211 g = gcdii(n, P); if (!is_pm1(g)) goto fin;
1212 P = gen_1;
1213 }
1214
1215 /* Fast forward phase, doing l inner iterations without computing gcds.
1216 * Check first whether it would take us beyond the alloted time.
1217 * Fast forward rounds count only half (although they're taking
1218 * more like 2/3 the time of normal rounds). This to counteract the
1219 * nuisance that all c0 between 4096 and 6144 would act exactly as
1220 * 4096; with the halving trick only the range 4096..5120 collapses
1221 * (similarly for all other powers of two) */
1222 if ((c -= (l>>1)) <= 0)
1223 { /* got bored */
1224 if (DEBUGLEVEL >= 4)
1225 err_printf("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1226 timer_delay(&T));
1227 return NULL;
1228 }
1229 c &= ~0x1f; /* keep it on multiples of 32 */
1230
1231 /* Fast forward loop */
1232 affii(x, x1); set_avma(av); x = x1;
1233 k = l; l <<= 1;
1234 /* don't show this for the first several (short) fast forward phases. */
1235 if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1236 err_printf("Rho: fast forward phase (%ld rounds of 64)...\n", l>>7);
1237 for (k1=k; k1; k1--)
1238 {
1239 one_iter(&x, &P, x1, n, delta);
1240 if ((k1 & 0x1f) == 0) gerepileall(av, 2, &x, &P);
1241 }
1242 if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1243 err_printf("Rho: time = %6ld ms,\t%3ld rounds, back to normal mode\n",
1244 timer_delay(&T), c0-(c>>5));
1245 affii(x,y); P = gerepileuptoint(av, P); x = y;
1246 } /* forever */
1247
1248 fin:
1249 /* An accumulated gcd was > 1 */
1250 if (!equalii(g,n))
1251 { /* if it isn't n, and looks prime, return it */
1252 if (MR_Jaeschke(g))
1253 {
1254 if (DEBUGLEVEL >= 4)
1255 {
1256 rho_dbg(&T, c0-(c>>5), 0);
1257 err_printf("\tfound factor = %Ps\n",g);
1258 }
1259 return g;
1260 }
1261 set_avma(av); g1 = icopy(g); /* known composite, keep it safe */
1262 av = avma;
1263 }
1264 else g1 = n; /* and work modulo g1 for backtracking */
1265
1266 /* Here g1 is known composite */
1267 if (DEBUGLEVEL >= 4 && size > 192)
1268 err_printf("Rho: hang on a second, we got something here...\n");
1269 x = y;
1270 for(;;)
1271 { /* backtrack until period recovered. Must terminate */
1272 x = addis(remii(sqri(x), g1), delta);
1273 g = gcdii(subii(x1, x), g1); if (!is_pm1(g)) break;
1274
1275 if (DEBUGLEVEL >= 4 && (--c & 0x1f) == 0) rho_dbg(&T, c0-(c>>5), msg_mask);
1276 }
1277
1278 if (g1 == n || equalii(g,g1))
1279 {
1280 if (g1 == n && equalii(g,g1))
1281 { /* out of luck */
1282 if (DEBUGLEVEL >= 4)
1283 {
1284 rho_dbg(&T, c0-(c>>5), 0);
1285 err_printf("\tPollard-Brent failed.\n");
1286 }
1287 if (++retries >= 4) pari_err_BUG("");
1288 goto PB_RETRY;
1289 }
1290 /* half lucky: we've split n, but g1 equals either g or n */
1291 if (DEBUGLEVEL >= 4)
1292 {
1293 rho_dbg(&T, c0-(c>>5), 0);
1294 err_printf("\tfound %sfactor = %Ps\n", (g1!=n ? "composite " : ""), g);
1295 }
1296 res = cgetg(7, t_VEC);
1297 /* g^1: known composite when g1!=n */
1298 INIT(res+1, g, gen_1, (g1!=n? gen_0: NULL));
1299 /* cofactor^1: status unknown */
1300 INIT(res+4, diviiexact(n,g), gen_1, NULL);
1301 return res;
1302 }
1303 /* g < g1 < n : our lucky day -- we've split g1, too */
1304 res = cgetg(10, t_VEC);
1305 /* unknown status for all three factors */
1306 INIT(res+1, g, gen_1, NULL);
1307 INIT(res+4, diviiexact(g1,g), gen_1, NULL);
1308 INIT(res+7, diviiexact(n,g1), gen_1, NULL);
1309 if (DEBUGLEVEL >= 4)
1310 {
1311 rho_dbg(&T, c0-(c>>5), 0);
1312 err_printf("\tfound factors = %Ps, %Ps,\n\tand %Ps\n",
1313 gel(res,1), gel(res,4), gel(res,7));
1314 }
1315 return res;
1316 }
1317 /* Tuning parameter: for input up to 64 bits long, we must not spend more
1318 * than a very short time, for fear of slowing things down on average.
1319 * With the current tuning formula, increase our efforts somewhat at 49 bit
1320 * input (an extra round for each bit at first), and go up more and more
1321 * rapidly after we pass 80 bits.-- Changed this to adjust for the presence of
1322 * squfof, which will finish input up to 59 bits quickly. */
1323 static GEN
pollardbrent(GEN n)1324 pollardbrent(GEN n)
1325 {
1326 const long tune_pb_min = 14; /* even 15 seems too much. */
1327 long c0, size = expi(n) + 1;
1328 if (size <= 28)
1329 c0 = 32;/* amounts very nearly to 'insist'. Now that we have squfof(), we
1330 * don't insist any more when input is 2^29 ... 2^32 */
1331 else if (size <= 42)
1332 c0 = tune_pb_min;
1333 else if (size <= 59) /* match squfof() cutoff point */
1334 c0 = tune_pb_min + ((size - 42)<<1);
1335 else if (size <= 72)
1336 c0 = tune_pb_min + size - 24;
1337 else if (size <= 301)
1338 /* nonlinear increase in effort, kicking in around 80 bits */
1339 /* 301 gives 48121 + tune_pb_min */
1340 c0 = tune_pb_min + size - 60 +
1341 ((size-73)>>1)*((size-70)>>3)*((size-56)>>4);
1342 else
1343 c0 = 49152; /* ECM is faster when it'd take longer */
1344 return pollardbrent_i(n, size, c0, 0);
1345 }
1346 GEN
Z_pollardbrent(GEN n,long rounds,long seed)1347 Z_pollardbrent(GEN n, long rounds, long seed)
1348 {
1349 pari_sp av = avma;
1350 GEN v = pollardbrent_i(n, expi(n)+1, rounds, seed);
1351 if (!v) return NULL;
1352 if (typ(v) == t_INT) v = mkvec2(v, diviiexact(n,v));
1353 else if (lg(v) == 7) v = mkvec2(gel(v,1), gel(v,4));
1354 else v = mkvec3(gel(v,1), gel(v,4), gel(v,7));
1355 return gerepilecopy(av, v);
1356 }
1357
1358 /***********************************************************************/
1359 /** FACTORIZATION (Shanks' SQUFOF) --GN2000Sep30-Oct01 **/
1360 /** squfof() returns a nontrivial factor of n, assuming n is odd, **/
1361 /** composite, not a pure square, and has no small prime divisor, **/
1362 /** or NULL if it fails to find one. It works on two discriminants **/
1363 /** simultaneously (n and 5n for n=1(4), 3n and 4n for n=3(4)). **/
1364 /** Present implementation is limited to input <2^59, and works most **/
1365 /** of the time in signed arithmetic on integers <2^31 in absolute **/
1366 /** size. (Cf. Algo 8.7.2 in ACiCNT) **/
1367 /***********************************************************************/
1368
1369 /* The following is invoked to walk back along the ambiguous cycle* until we
1370 * hit an ambiguous form and thus the desired factor, which it returns. If it
1371 * fails for any reason, it returns 0. It doesn't interfere with timing and
1372 * diagnostics, which it leaves to squfof().
1373 *
1374 * Before we invoke this, we've found a form (A, B, -C) with A = a^2, where a
1375 * isn't blacklisted and where gcd(a, B) = 1. According to ACiCANT, we should
1376 * now proceed reducing the form (a, -B, -aC), but it is easy to show that the
1377 * first reduction step always sends this to (-aC, B, a), and the next one,
1378 * with q computed as usual from B and a (occupying the c position), gives a
1379 * reduced form, whose third member is easiest to recover by going back to D.
1380 * From this point onwards, we're once again working with single-word numbers.
1381 * No need to track signs, just work with the abs values of the coefficients. */
1382 static long
squfof_ambig(long a,long B,long dd,GEN D)1383 squfof_ambig(long a, long B, long dd, GEN D)
1384 {
1385 long b, c, q, qa, qc, qcb, a0, b0, b1, c0;
1386 long cnt = 0; /* count reduction steps on the cycle */
1387
1388 q = (dd + (B>>1)) / a;
1389 qa = q * a;
1390 b = (qa - B) + qa; /* avoid overflow */
1391 {
1392 pari_sp av = avma;
1393 c = itos(divis(shifti(subii(D, sqrs(b)), -2), a));
1394 set_avma(av);
1395 }
1396 #ifdef DEBUG_SQUFOF
1397 err_printf("SQUFOF: ambigous cycle of discriminant %Ps\n", D);
1398 err_printf("SQUFOF: Form on ambigous cycle (%ld, %ld, %ld)\n", a, b, c);
1399 #endif
1400
1401 a0 = a; b0 = b1 = b; /* end of loop detection and safeguard */
1402
1403 for (;;) /* reduced cycles are finite */
1404 { /* reduction step */
1405 c0 = c;
1406 if (c0 > dd)
1407 q = 1;
1408 else
1409 q = (dd + (b>>1)) / c0;
1410 if (q == 1)
1411 {
1412 qcb = c0 - b; b = c0 + qcb; c = a - qcb;
1413 }
1414 else
1415 {
1416 qc = q*c0; qcb = qc - b; b = qc + qcb; c = a - q*qcb;
1417 }
1418 a = c0;
1419
1420 cnt++; if (b == b1) break;
1421
1422 /* safeguard against infinite loop: recognize when we've walked the entire
1423 * cycle in vain. (I don't think this can actually happen -- exercise.) */
1424 if (b == b0 && a == a0) return 0;
1425
1426 b1 = b;
1427 }
1428 q = a&1 ? a : a>>1;
1429 if (DEBUGLEVEL >= 4)
1430 {
1431 if (q > 1)
1432 err_printf("SQUFOF: found factor %ld from ambiguous form\n"
1433 "\tafter %ld steps on the ambiguous cycle\n",
1434 q / ugcd(q,15), cnt);
1435 else
1436 err_printf("SQUFOF: ...found nothing on the ambiguous cycle\n"
1437 "\tafter %ld steps there\n", cnt);
1438 if (DEBUGLEVEL >= 6) err_printf("SQUFOF: squfof_ambig returned %ld\n", q);
1439 }
1440 return q;
1441 }
1442
1443 #define SQUFOF_BLACKLIST_SZ 64
1444
1445 /* assume 2,3,5 do not divide n */
1446 static GEN
squfof(GEN n)1447 squfof(GEN n)
1448 {
1449 ulong d1, d2;
1450 long tf = lgefint(n), nm4, cnt = 0;
1451 long a1, b1, c1, dd1, L1, a2, b2, c2, dd2, L2, a, q, c, qc, qcb;
1452 GEN D1, D2;
1453 pari_sp av = avma;
1454 long blacklist1[SQUFOF_BLACKLIST_SZ], blacklist2[SQUFOF_BLACKLIST_SZ];
1455 long blp1 = 0, blp2 = 0;
1456 int act1 = 1, act2 = 1;
1457
1458 #ifdef LONG_IS_64BIT
1459 if (tf > 3 || (tf == 3 && uel(n,2) >= (1UL << (BITS_IN_LONG-5))))
1460 #else /* 32 bits */
1461 if (tf > 4 || (tf == 4 && (ulong)(*int_MSW(n)) >= (1UL << (BITS_IN_LONG-5))))
1462 #endif
1463 return NULL; /* n too large */
1464
1465 /* now we have 5 < n < 2^59 */
1466 nm4 = mod4(n);
1467 if (nm4 == 1)
1468 { /* n = 1 (mod4): run one iteration on D1 = n, another on D2 = 5n */
1469 D1 = n;
1470 D2 = mului(5,n); d2 = itou(sqrti(D2)); dd2 = (long)((d2>>1) + (d2&1));
1471 b2 = (long)((d2-1) | 1); /* b1, b2 will always stay odd */
1472 }
1473 else
1474 { /* n = 3 (mod4): run one iteration on D1 = 3n, another on D2 = 4n */
1475 D1 = mului(3,n);
1476 D2 = shifti(n,2); dd2 = itou(sqrti(n)); d2 = dd2 << 1;
1477 b2 = (long)(d2 & (~1UL)); /* largest even below d2, will stay even */
1478 }
1479 d1 = itou(sqrti(D1));
1480 b1 = (long)((d1-1) | 1); /* largest odd number not exceeding d1 */
1481 c1 = itos(shifti(subii(D1, sqru((ulong)b1)), -2));
1482 if (!c1) pari_err_BUG("squfof [caller of] (n or 3n is a square)");
1483 c2 = itos(shifti(subii(D2, sqru((ulong)b2)), -2));
1484 if (!c2) pari_err_BUG("squfof [caller of] (5n is a square)");
1485 L1 = (long)usqrt(d1);
1486 L2 = (long)usqrt(d2);
1487 /* dd1 used to compute floor((d1+b1)/2) as dd1+floor(b1/2), without
1488 * overflowing the 31bit signed integer size limit. Same for dd2. */
1489 dd1 = (long) ((d1>>1) + (d1&1));
1490 a1 = a2 = 1;
1491
1492 /* The two (identity) forms (a1,b1,-c1) and (a2,b2,-c2) are now set up.
1493 *
1494 * a1 and c1 represent the absolute values of the a,c coefficients; we keep
1495 * track of the sign separately, via the iteration counter cnt: when cnt is
1496 * even, c is understood to be negative, else c is positive and a < 0.
1497 *
1498 * L1, L2 are the limits for blacklisting small leading coefficients
1499 * on the principal cycle, to guarantee that when we find a square form,
1500 * its square root will belong to an ambiguous cycle (i.e. won't be an
1501 * earlier form on the principal cycle).
1502 *
1503 * When n = 3(mod 4), D2 = 12(mod 16), and b^2 is always 0 or 4 mod 16.
1504 * It follows that 4*a*c must be 4 or 8 mod 16, respectively, so at most
1505 * one of a,c can be divisible by 2 at most to the first power. This fact
1506 * is used a couple of times below.
1507 *
1508 * The flags act1, act2 remain true while the respective cycle is still
1509 * active; we drop them to false when we return to the identity form with-
1510 * out having found a square form (or when the blacklist overflows, which
1511 * shouldn't happen). */
1512 if (DEBUGLEVEL >= 4)
1513 err_printf("SQUFOF: entering main loop with forms\n"
1514 "\t(1, %ld, %ld) and (1, %ld, %ld)\n\tof discriminants\n"
1515 "\t%Ps and %Ps, respectively\n", b1, -c1, b2, -c2, D1, D2);
1516
1517 /* MAIN LOOP: walk around the principal cycle looking for a square form.
1518 * Blacklist small leading coefficients.
1519 *
1520 * The reduction operator can be computed entirely in 32-bit arithmetic:
1521 * Let q = floor(floor((d1+b1)/2)/c1) (when c1>dd1, q=1, which happens
1522 * often enough to special-case it). Then the new b1 = (q*c1-b1) + q*c1,
1523 * which does not overflow, and the new c1 = a1 - q*(q*c1-b1), which is
1524 * bounded by d1 in abs size since both the old and the new a1 are positive
1525 * and bounded by d1. */
1526 while (act1 || act2)
1527 {
1528 if (act1)
1529 { /* send first form through reduction operator if active */
1530 c = c1;
1531 q = (c > dd1)? 1: (dd1 + (b1>>1)) / c;
1532 if (q == 1)
1533 { qcb = c - b1; b1 = c + qcb; c1 = a1 - qcb; }
1534 else
1535 { qc = q*c; qcb = qc - b1; b1 = qc + qcb; c1 = a1 - q*qcb; }
1536 a1 = c;
1537
1538 if (a1 <= L1)
1539 { /* blacklist this */
1540 if (blp1 >= SQUFOF_BLACKLIST_SZ) /* overflows: shouldn't happen */
1541 act1 = 0; /* silently */
1542 else
1543 {
1544 if (DEBUGLEVEL >= 6)
1545 err_printf("SQUFOF: blacklisting a = %ld on first cycle\n", a1);
1546 blacklist1[blp1++] = a1;
1547 }
1548 }
1549 }
1550 if (act2)
1551 { /* send second form through reduction operator if active */
1552 c = c2;
1553 q = (c > dd2)? 1: (dd2 + (b2>>1)) / c;
1554 if (q == 1)
1555 { qcb = c - b2; b2 = c + qcb; c2 = a2 - qcb; }
1556 else
1557 { qc = q*c; qcb = qc - b2; b2 = qc + qcb; c2 = a2 - q*qcb; }
1558 a2 = c;
1559
1560 if (a2 <= L2)
1561 { /* blacklist this */
1562 if (blp2 >= SQUFOF_BLACKLIST_SZ) /* overflows: shouldn't happen */
1563 act2 = 0; /* silently */
1564 else
1565 {
1566 if (DEBUGLEVEL >= 6)
1567 err_printf("SQUFOF: blacklisting a = %ld on second cycle\n", a2);
1568 blacklist2[blp2++] = a2;
1569 }
1570 }
1571 }
1572
1573 /* bump counter, loop if this is an odd iteration (i.e. if the real
1574 * leading coefficients are negative) */
1575 if (++cnt & 1) continue;
1576
1577 /* second half of main loop entered only when the leading coefficients
1578 * are positive (i.e., during even-numbered iterations) */
1579
1580 /* examine first form if active */
1581 if (act1 && a1 == 1) /* back to identity */
1582 { /* drop this discriminant */
1583 act1 = 0;
1584 if (DEBUGLEVEL >= 4)
1585 err_printf("SQUFOF: first cycle exhausted after %ld iterations,\n"
1586 "\tdropping it\n", cnt);
1587 }
1588 if (act1)
1589 {
1590 if (uissquareall((ulong)a1, (ulong*)&a))
1591 { /* square form */
1592 if (DEBUGLEVEL >= 4)
1593 err_printf("SQUFOF: square form (%ld^2, %ld, %ld) on first cycle\n"
1594 "\tafter %ld iterations\n", a, b1, -c1, cnt);
1595 if (a <= L1)
1596 { /* blacklisted? */
1597 long j;
1598 for (j = 0; j < blp1; j++)
1599 if (a == blacklist1[j]) { a = 0; break; }
1600 }
1601 if (a > 0)
1602 { /* not blacklisted */
1603 q = ugcd(a, b1); /* imprimitive form? */
1604 if (q > 1)
1605 { /* q^2 divides D1 hence n [ assuming n % 3 != 0 ] */
1606 set_avma(av);
1607 if (DEBUGLEVEL >= 4) err_printf("SQUFOF: found factor %ld^2\n", q);
1608 return mkvec3(utoipos(q), gen_2, NULL);/* exponent 2, unknown status */
1609 }
1610 /* chase the inverse root form back along the ambiguous cycle */
1611 q = squfof_ambig(a, b1, dd1, D1);
1612 if (nm4 == 3 && q % 3 == 0) q /= 3;
1613 if (q > 1) { set_avma(av); return utoipos(q); } /* SUCCESS! */
1614 }
1615 else if (DEBUGLEVEL >= 4) /* blacklisted */
1616 err_printf("SQUFOF: ...but the root form seems to be on the "
1617 "principal cycle\n");
1618 }
1619 }
1620
1621 /* examine second form if active */
1622 if (act2 && a2 == 1) /* back to identity form */
1623 { /* drop this discriminant */
1624 act2 = 0;
1625 if (DEBUGLEVEL >= 4)
1626 err_printf("SQUFOF: second cycle exhausted after %ld iterations,\n"
1627 "\tdropping it\n", cnt);
1628 }
1629 if (act2)
1630 {
1631 if (uissquareall((ulong)a2, (ulong*)&a))
1632 { /* square form */
1633 if (DEBUGLEVEL >= 4)
1634 err_printf("SQUFOF: square form (%ld^2, %ld, %ld) on second cycle\n"
1635 "\tafter %ld iterations\n", a, b2, -c2, cnt);
1636 if (a <= L2)
1637 { /* blacklisted? */
1638 long j;
1639 for (j = 0; j < blp2; j++)
1640 if (a == blacklist2[j]) { a = 0; break; }
1641 }
1642 if (a > 0)
1643 { /* not blacklisted */
1644 q = ugcd(a, b2); /* imprimitive form? */
1645 /* NB if b2 is even, a is odd, so the gcd is always odd */
1646 if (q > 1)
1647 { /* q^2 divides D2 hence n [ assuming n % 5 != 0 ] */
1648 set_avma(av);
1649 if (DEBUGLEVEL >= 4) err_printf("SQUFOF: found factor %ld^2\n", q);
1650 return mkvec3(utoipos(q), gen_2, NULL);/* exponent 2, unknown status */
1651 }
1652 /* chase the inverse root form along the ambiguous cycle */
1653 q = squfof_ambig(a, b2, dd2, D2);
1654 if (nm4 == 1 && q % 5 == 0) q /= 5;
1655 if (q > 1) { set_avma(av); return utoipos(q); } /* SUCCESS! */
1656 }
1657 else if (DEBUGLEVEL >= 4) /* blacklisted */
1658 err_printf("SQUFOF: ...but the root form seems to be on the "
1659 "principal cycle\n");
1660 }
1661 }
1662 } /* end main loop */
1663
1664 /* both discriminants turned out to be useless. */
1665 if (DEBUGLEVEL>=4) err_printf("SQUFOF: giving up\n");
1666 return gc_NULL(av);
1667 }
1668
1669 /***********************************************************************/
1670 /* DETECTING ODD POWERS --GN1998Jun28 */
1671 /* Factoring engines like MPQS which ultimately rely on computing */
1672 /* gcd(N, x^2-y^2) to find a nontrivial factor of N can't split */
1673 /* N = p^k for an odd prime p, since (Z/p^k)^* is then cyclic. Here */
1674 /* is an analogue of Z_issquareall() for 3rd, 5th and 7th powers. */
1675 /* The general case is handled by is_kth_power */
1676 /***********************************************************************/
1677
1678 /* Multistage sieve. First stages work mod 211, 209, 61, 203 in this order
1679 * (first reduce mod the product of these and then take the remainder apart).
1680 * Second stages use 117, 31, 43, 71. Moduli which are no longer interesting
1681 * are skipped. Everything is encoded in a table of 106 24-bit masks. We only
1682 * need the first half of the residues. Three bits per modulus indicate which
1683 * residues are 7th (bit 2), 5th (bit 1) or 3rd (bit 0) powers; the eight
1684 * moduli above are assigned right-to-left. The table was generated using: */
1685
1686 #if 0
1687 L = [71, 43, 31, [O(3^2),O(13)], [O(7),O(29)], 61, [O(11),O(19)], 211];
1688 ispow(x, N, k)=
1689 {
1690 if (type(N) == "t_INT", return (ispower(Mod(x,N), k)));
1691 for (i = 1, #N, if (!ispower(x + N[i], k), return (0))); 1
1692 }
1693 check(r) =
1694 {
1695 print1(" 0");
1696 for (i=1,#L,
1697 N = 0;
1698 if (ispow(r, L[i], 3), N += 1);
1699 if (ispow(r, L[i], 5), N += 2);
1700 if (ispow(r, L[i], 7), N += 4);
1701 print1(N);
1702 ); print("ul, /* ", r, " */")
1703 }
1704 for (r = 0, 105, check(r))
1705 #endif
1706 static ulong powersmod[106] = {
1707 077777777ul, /* 0 */
1708 077777777ul, /* 1 */
1709 013562440ul, /* 2 */
1710 012402540ul, /* 3 */
1711 013562440ul, /* 4 */
1712 052662441ul, /* 5 */
1713 016603440ul, /* 6 */
1714 016463450ul, /* 7 */
1715 013573551ul, /* 8 */
1716 012462540ul, /* 9 */
1717 012462464ul, /* 10 */
1718 013462771ul, /* 11 */
1719 012406473ul, /* 12 */
1720 012463641ul, /* 13 */
1721 052463646ul, /* 14 */
1722 012503446ul, /* 15 */
1723 013562440ul, /* 16 */
1724 052466440ul, /* 17 */
1725 012472451ul, /* 18 */
1726 012462454ul, /* 19 */
1727 032463550ul, /* 20 */
1728 013403664ul, /* 21 */
1729 013463460ul, /* 22 */
1730 032562565ul, /* 23 */
1731 012402540ul, /* 24 */
1732 052662441ul, /* 25 */
1733 032672452ul, /* 26 */
1734 013573551ul, /* 27 */
1735 012467541ul, /* 28 */
1736 012567640ul, /* 29 */
1737 032706450ul, /* 30 */
1738 012762452ul, /* 31 */
1739 033762662ul, /* 32 */
1740 012502562ul, /* 33 */
1741 032463562ul, /* 34 */
1742 013563440ul, /* 35 */
1743 016663440ul, /* 36 */
1744 036662550ul, /* 37 */
1745 012462552ul, /* 38 */
1746 033502450ul, /* 39 */
1747 012462643ul, /* 40 */
1748 033467540ul, /* 41 */
1749 017403441ul, /* 42 */
1750 017463462ul, /* 43 */
1751 017472460ul, /* 44 */
1752 033462470ul, /* 45 */
1753 052566450ul, /* 46 */
1754 013562640ul, /* 47 */
1755 032403640ul, /* 48 */
1756 016463450ul, /* 49 */
1757 016463752ul, /* 50 */
1758 033402440ul, /* 51 */
1759 012462540ul, /* 52 */
1760 012472540ul, /* 53 */
1761 053562462ul, /* 54 */
1762 012463465ul, /* 55 */
1763 012663470ul, /* 56 */
1764 052607450ul, /* 57 */
1765 012566553ul, /* 58 */
1766 013466440ul, /* 59 */
1767 012502741ul, /* 60 */
1768 012762744ul, /* 61 */
1769 012763740ul, /* 62 */
1770 012763443ul, /* 63 */
1771 013573551ul, /* 64 */
1772 013462471ul, /* 65 */
1773 052502460ul, /* 66 */
1774 012662463ul, /* 67 */
1775 012662451ul, /* 68 */
1776 012403550ul, /* 69 */
1777 073567540ul, /* 70 */
1778 072463445ul, /* 71 */
1779 072462740ul, /* 72 */
1780 012472442ul, /* 73 */
1781 012462644ul, /* 74 */
1782 013406650ul, /* 75 */
1783 052463471ul, /* 76 */
1784 012563474ul, /* 77 */
1785 013503460ul, /* 78 */
1786 016462441ul, /* 79 */
1787 016462440ul, /* 80 */
1788 012462540ul, /* 81 */
1789 013462641ul, /* 82 */
1790 012463454ul, /* 83 */
1791 013403550ul, /* 84 */
1792 057563540ul, /* 85 */
1793 017466441ul, /* 86 */
1794 017606471ul, /* 87 */
1795 053666573ul, /* 88 */
1796 012562561ul, /* 89 */
1797 013473641ul, /* 90 */
1798 032573440ul, /* 91 */
1799 016763440ul, /* 92 */
1800 016702640ul, /* 93 */
1801 033762552ul, /* 94 */
1802 012562550ul, /* 95 */
1803 052402451ul, /* 96 */
1804 033563441ul, /* 97 */
1805 012663561ul, /* 98 */
1806 012677560ul, /* 99 */
1807 012462464ul, /* 100 */
1808 032562642ul, /* 101 */
1809 013402551ul, /* 102 */
1810 032462450ul, /* 103 */
1811 012467445ul, /* 104 */
1812 032403440ul, /* 105 */
1813 };
1814
1815 static int
check_res(ulong x,ulong N,int shift,ulong * mask)1816 check_res(ulong x, ulong N, int shift, ulong *mask)
1817 {
1818 long r = x%N; if ((ulong)r> (N>>1)) r = N - r;
1819 *mask &= (powersmod[r] >> shift);
1820 return *mask;
1821 }
1822
1823 /* is x mod 211*209*61*203*117*31*43*71 a 3rd, 5th or 7th power ? */
1824 int
uis_357_powermod(ulong x,ulong * mask)1825 uis_357_powermod(ulong x, ulong *mask)
1826 {
1827 if ( !check_res(x, 211UL, 0, mask)) return 0;
1828 if (*mask & 3 && !check_res(x, 209UL, 3, mask)) return 0;
1829 if (*mask & 3 && !check_res(x, 61UL, 6, mask)) return 0;
1830 if (*mask & 5 && !check_res(x, 203UL, 9, mask)) return 0;
1831 if (*mask & 1 && !check_res(x, 117UL,12, mask)) return 0;
1832 if (*mask & 3 && !check_res(x, 31UL,15, mask)) return 0;
1833 if (*mask & 5 && !check_res(x, 43UL,18, mask)) return 0;
1834 if (*mask & 6 && !check_res(x, 71UL,21, mask)) return 0;
1835 return 1;
1836 }
1837 /* asume x > 0 and pt != NULL */
1838 int
uis_357_power(ulong x,ulong * pt,ulong * mask)1839 uis_357_power(ulong x, ulong *pt, ulong *mask)
1840 {
1841 double logx;
1842 if (!odd(x))
1843 {
1844 long v = vals(x);
1845 if (v % 7) *mask &= ~4;
1846 if (v % 5) *mask &= ~2;
1847 if (v % 3) *mask &= ~1;
1848 if (!*mask) return 0;
1849 }
1850 if (!uis_357_powermod(x, mask)) return 0;
1851 logx = log((double)x);
1852 while (*mask)
1853 {
1854 long e, b;
1855 ulong y, ye;
1856 if (*mask & 1) { b = 1; e = 3; }
1857 else if (*mask & 2) { b = 2; e = 5; }
1858 else { b = 4; e = 7; }
1859 y = (ulong)(exp(logx / e) + 0.5);
1860 ye = upowuu(y,e);
1861 if (ye == x) { *pt = y; return e; }
1862 #ifdef LONG_IS_64BIT
1863 if (ye > x) y--; else y++;
1864 ye = upowuu(y,e);
1865 if (ye == x) { *pt = y; return e; }
1866 #endif
1867 *mask &= ~b; /* turn the bit off */
1868 }
1869 return 0;
1870 }
1871
1872 #ifndef LONG_IS_64BIT
1873 /* as above, split in two functions */
1874 /* is x mod 211*209*61*203 a 3rd, 5th or 7th power ? */
1875 static int
uis_357_powermod_32bit_1(ulong x,ulong * mask)1876 uis_357_powermod_32bit_1(ulong x, ulong *mask)
1877 {
1878 if ( !check_res(x, 211UL, 0, mask)) return 0;
1879 if (*mask & 3 && !check_res(x, 209UL, 3, mask)) return 0;
1880 if (*mask & 3 && !check_res(x, 61UL, 6, mask)) return 0;
1881 if (*mask & 5 && !check_res(x, 203UL, 9, mask)) return 0;
1882 return 1;
1883 }
1884 /* is x mod 117*31*43*71 a 3rd, 5th or 7th power ? */
1885 static int
uis_357_powermod_32bit_2(ulong x,ulong * mask)1886 uis_357_powermod_32bit_2(ulong x, ulong *mask)
1887 {
1888 if (*mask & 1 && !check_res(x, 117UL,12, mask)) return 0;
1889 if (*mask & 3 && !check_res(x, 31UL,15, mask)) return 0;
1890 if (*mask & 5 && !check_res(x, 43UL,18, mask)) return 0;
1891 if (*mask & 6 && !check_res(x, 71UL,21, mask)) return 0;
1892 return 1;
1893 }
1894 #endif
1895
1896 /* Returns 3, 5, or 7 if x is a cube (but not a 5th or 7th power), a 5th
1897 * power (but not a 7th), or a 7th power, and in this case creates the
1898 * base on the stack and assigns its address to *pt. Otherwise returns 0.
1899 * x must be of type t_INT and positive; this is not checked. The *mask
1900 * argument tells us which things to check -- bit 0: 3rd, bit 1: 5th,
1901 * bit 2: 7th pwr; set a bit to have the corresponding power examined --
1902 * and is updated appropriately for a possible follow-up call */
1903 int
is_357_power(GEN x,GEN * pt,ulong * mask)1904 is_357_power(GEN x, GEN *pt, ulong *mask)
1905 {
1906 long lx = lgefint(x);
1907 ulong r;
1908 pari_sp av;
1909 GEN y;
1910
1911 if (!*mask) return 0; /* useful when running in a loop */
1912 if (DEBUGLEVEL>4)
1913 err_printf("OddPwrs: examining %ld-bit integer\n", expi(x)+1);
1914 if (lgefint(x) == 3) {
1915 ulong t;
1916 long e = uis_357_power(x[2], &t, mask);
1917 if (e)
1918 {
1919 if (pt) *pt = utoi(t);
1920 return e;
1921 }
1922 return 0;
1923 }
1924 #ifdef LONG_IS_64BIT
1925 r = (lx == 3)? uel(x,2): umodiu(x, 6046846918939827UL);
1926 if (!uis_357_powermod(r, mask)) return 0;
1927 #else
1928 r = (lx == 3)? uel(x,2): umodiu(x, 211*209*61*203);
1929 if (!uis_357_powermod_32bit_1(r, mask)) return 0;
1930 r = (lx == 3)? uel(x,2): umodiu(x, 117*31*43*71);
1931 if (!uis_357_powermod_32bit_2(r, mask)) return 0;
1932 #endif
1933 av = avma;
1934 while (*mask)
1935 {
1936 long e, b;
1937 /* priority to higher powers: if we have a 21st, it is easier to rediscover
1938 * that its 7th root is a cube than that its cube root is a 7th power */
1939 if (*mask & 4) { b = 4; e = 7; }
1940 else if (*mask & 2) { b = 2; e = 5; }
1941 else { b = 1; e = 3; }
1942 y = mpround( sqrtnr(itor(x, nbits2prec(64 + bit_accuracy(lx) / e)), e) );
1943 if (equalii(powiu(y,e), x))
1944 {
1945 if (!pt) return gc_int(av,e);
1946 set_avma((pari_sp)y); *pt = gerepileuptoint(av, y);
1947 return e;
1948 }
1949 *mask &= ~b; /* turn the bit off */
1950 set_avma(av);
1951 }
1952 return 0;
1953 }
1954
1955 /* Is x a n-th power ?
1956 * if d = NULL, n not necessarily prime, otherwise, n prime and d the
1957 * corresponding diffptr to go on looping over primes.
1958 * If pt != NULL, it receives the n-th root */
1959 ulong
is_kth_power(GEN x,ulong n,GEN * pt)1960 is_kth_power(GEN x, ulong n, GEN *pt)
1961 {
1962 forprime_t T;
1963 long j;
1964 ulong q, residue;
1965 GEN y;
1966 pari_sp av = avma;
1967
1968 (void)u_forprime_arith_init(&T, odd(n)? 2*n+1: n+1, ULONG_MAX, 1,n);
1969 /* we'll start at q, smallest prime >= n */
1970
1971 /* Modular checks, use small primes q congruent 1 mod n */
1972 /* A non n-th power nevertheless passes the test with proba n^(-#checks),
1973 * We'd like this < 1e-6 but let j = floor(log(1e-6) / log(n)) which
1974 * ensures much less. */
1975 if (n < 16)
1976 j = 5;
1977 else if (n < 32)
1978 j = 4;
1979 else if (n < 101)
1980 j = 3;
1981 else if (n < 1001)
1982 j = 2;
1983 else if (n < 17886697) /* smallest such that smallest suitable q is > 2^32 */
1984 j = 1;
1985 else
1986 j = 0;
1987 for (; j > 0; j--)
1988 {
1989 if (!(q = u_forprime_next(&T))) break;
1990 /* q a prime = 1 mod n */
1991 residue = umodiu(x, q);
1992 if (residue == 0)
1993 {
1994 if (Z_lval(x,q) % n) return gc_ulong(av,0);
1995 continue;
1996 }
1997 /* n-th power mod q ? */
1998 if (Fl_powu(residue, (q-1)/n, q) != 1) return gc_ulong(av,0);
1999 }
2000 set_avma(av);
2001
2002 if (DEBUGLEVEL>4) err_printf("\nOddPwrs: [%lu] passed modular checks\n",n);
2003 /* go to the horse's mouth... */
2004 y = roundr( sqrtnr(itor(x, nbits2prec(expi(x)/n + 16)), n) );
2005 if (!equalii(powiu(y, n), x)) {
2006 if (DEBUGLEVEL>4) err_printf("\tBut it wasn't a pure power.\n");
2007 return gc_ulong(av,0);
2008 }
2009 if (!pt) set_avma(av); else { set_avma((pari_sp)y); *pt = gerepileuptoint(av,y); }
2010 return 1;
2011 }
2012
2013 /* is x a p^i-th power, p >= 11 prime ? Similar to is_357_power(), but instead
2014 * of the mask, we keep the current test exponent around. Cut off when
2015 * log_2 x^(1/k) < cutoffbits since we would have found it by trial division.
2016 * Everything needed here (primitive roots etc.) is computed from scratch on
2017 * the fly; compared to the size of numbers under consideration, these
2018 * word-sized computations take negligible time.
2019 * Any cutoffbits > 0 is safe, but direct root extraction attempts are faster
2020 * when trial division has been used to discover very small bases. We become
2021 * competitive at cutoffbits ~ 10 */
2022 int
is_pth_power(GEN x,GEN * pt,forprime_t * T,ulong cutoffbits)2023 is_pth_power(GEN x, GEN *pt, forprime_t *T, ulong cutoffbits)
2024 {
2025 long cnt=0, size = expi(x) /* not +1 */;
2026 ulong p;
2027 pari_sp av = avma;
2028 while ((p = u_forprime_next(T)) && size/p >= cutoffbits) {
2029 long v = 1;
2030 if (DEBUGLEVEL>5 && cnt++==2000)
2031 { cnt=0; err_printf("%lu%% ", 100*p*cutoffbits/size); }
2032 while (is_kth_power(x, p, pt)) {
2033 v *= p; x = *pt;
2034 size = expi(x);
2035 }
2036 if (v > 1)
2037 {
2038 if (DEBUGLEVEL>5) err_printf("\nOddPwrs: is a %ld power\n",v);
2039 return v;
2040 }
2041 }
2042 if (DEBUGLEVEL>5) err_printf("\nOddPwrs: not a power\n",p);
2043 return gc_int(av,0); /* give up */
2044 }
2045
2046 /***********************************************************************/
2047 /** FACTORIZATION (master iteration) **/
2048 /** Driver for the various methods of finding large factors **/
2049 /** (after trial division has cast out the very small ones). **/
2050 /** GN1998Jun24--30 **/
2051 /***********************************************************************/
2052
2053 /* Direct use:
2054 * ifac_start_hint(n,moebius,hint) registers with the iterative factorizer
2055 * - an integer n (without prime factors < tridiv_bound(n))
2056 * - registers whether or not we should terminate early if we find a square
2057 * factor,
2058 * - a hint about which method(s) to use.
2059 * This must always be called first. If input is not composite, oo loop.
2060 * The routine decomposes n nontrivially into a product of two factors except
2061 * in squarefreeness ('Moebius') mode.
2062 *
2063 * ifac_start(n,moebius) same using default hint.
2064 *
2065 * ifac_primary_factor() returns a prime divisor (not necessarily the
2066 * smallest) and the corresponding exponent.
2067 *
2068 * Encapsulated user interface: Many arithmetic functions have a 'contributor'
2069 * ifac_xxx, to be called on any large composite cofactor left over after trial
2070 * division by small primes: xxx is one of moebius, issquarefree, totient, etc.
2071 *
2072 * We never test whether the input number is prime or composite, since
2073 * presumably it will have come out of the small factors finder stage
2074 * (which doesn't really exist yet but which will test the left-over
2075 * cofactor for primality once it does). */
2076
2077 /* The data structure in which we preserve whatever we know about our number N
2078 * is kept on the PARI stack, and updated as needed.
2079 * This makes the machinery re-entrant, and avoids memory leaks when a lengthy
2080 * factorization is interrupted. We try to keep the whole affair connected,
2081 * and the parent object is always older than its children. This may in
2082 * rare cases lead to some extra copying around, and knowing what is garbage
2083 * at any given time is not trivial. See below for examples how to do it right.
2084 * (Connectedness is destroyed if callers of ifac_main() create stuff on the
2085 * stack in between calls. This is harmless as long as ifac_realloc() is used
2086 * to re-create a connected object at the head of the stack just before
2087 * collecting garbage.)
2088 * A t_INT may well have > 10^6 distinct prime factors larger than 2^16. Since
2089 * we need not find factors in order of increasing size, we must be prepared to
2090 * drag a very large amount of data around. We start with a small structure
2091 * and extend it when necessary. */
2092
2093 /* The idea of the algorithm is:
2094 * Let N0 be whatever is currently left of N after dividing off all the
2095 * prime powers we have already returned to the caller. Then we maintain
2096 * N0 as a product
2097 * (1) N0 = \prod_i P_i^{e_i} * \prod_j Q_j^{f_j} * \prod_k C_k^{g_k}
2098 * where the P_i and Q_j are distinct primes, each C_k is known composite,
2099 * none of the P_i divides any C_k, and we also know the total ordering
2100 * of all the P_i, Q_j and C_k; in particular, we will never try to divide
2101 * a C_k by a larger Q_j. Some of the C_k may have common factors.
2102 *
2103 * Caveat implementor: Taking gcds among C_k's is very likely to cost at
2104 * least as much time as dividing off any primes as we find them, and book-
2105 * keeping would be tough (since D=gcd(C_1,C_2) can still have common factors
2106 * with both C_1/D and C_2/D, and so on...).
2107 *
2108 * At startup, we just initialize the structure to
2109 * (2) N = C_1^1 (composite).
2110 *
2111 * Whenever ifac_primary_factor() or one of the arithmetic user interface
2112 * routines needs a primary factor, and the smallest thing in our list is P_1,
2113 * we return that and its exponent, and remove it from our list. (When nothing
2114 * is left, we return a sentinel value -- gen_1. And in Moebius mode, when we
2115 * see something with exponent > 1, whether prime or composite, we return gen_0
2116 * or 0, depending on the function). In all other cases, ifac_main() iterates
2117 * the following steps until we have a P_1 in the smallest position.
2118 *
2119 * When the smallest item is C_1, as it is initially:
2120 * (3.1) Crack C_1 into a nontrivial product U_1 * U_2 by whatever method
2121 * comes to mind for this size. (U for 'unknown'.) Cracking will detect
2122 * perfect powers, so we may instead see a power of some U_1 here, or even
2123 * something of the form U_1^k*U_2^k; of course the exponent already attached
2124 * to C_1 is taken into account in the following.
2125 * (3.2) If we have U_1*U_2, sort the two factors (distinct: squares are caught
2126 * in stage 3.1). N.B. U_1 and U_2 are smaller than anything else in our list.
2127 * (3.3) Check U_1 and U_2 for primality, and flag them accordingly.
2128 * (3.4) Iterate.
2129 *
2130 * When the smallest item is Q_1:
2131 * This is the unpleasant case. We go through the entire list and try to
2132 * divide Q_1 off each of the current C_k's, which usually fails, but may
2133 * succeed several times. When a division was successful, the corresponding
2134 * C_k is removed from our list, and the cofactor becomes a U_l for the moment
2135 * unless it is 1 (which happens when C_k was a power of Q_1). When we're
2136 * through we upgrade Q_1 to P_1 status, then do a primality check on each U_l
2137 * and sort it back into the list either as a Q_j or as a C_k. If during the
2138 * insertion sort we discover that some U_l equals some P_i or Q_j or C_k we
2139 * already have, we just add U_l's exponent to that of its twin. (The sorting
2140 * therefore happens before the primality test). Since this may produce one or
2141 * more elements smaller than the P_1 we just confirmed, we may have to repeat
2142 * the iteration.
2143 * A trick avoids some Q_1 instances: just after the sweep classifying
2144 * all current unknowns as either composites or primes, we do another downward
2145 * sweep beginning with the largest current factor and stopping just above the
2146 * largest current composite. Every Q_j we pass is turned into a P_i.
2147 * (Different primes are automatically coprime among each other, and primes do
2148 * not divide smaller composites.)
2149 * NB: We have no use for comparing the square of a prime to N0. Normally
2150 * we will get called after casting out only the smallest primes, and
2151 * since we cannot guarantee that we see the large prime factors in as-
2152 * cending order, we cannot stop when we find one larger than sqrt(N0). */
2153
2154 /* Data structure: We keep everything in a single t_VEC of t_INTs. The
2155 * first 2 components are read-only:
2156 * 1) the first records whether we're doing full (NULL) or Moebius (gen_1)
2157 * factorization; in the latter case subroutines return a sentinel value as
2158 * soon as they spot an exponent > 1.
2159 * 2) the second records the hint from factorint()'s optional flag, for use by
2160 * ifac_crack().
2161 *
2162 * The remaining components (initially 15) are used in groups of three:
2163 * [ factor (t_INT), exponent (t_INT), factor class ], where factor class is
2164 * NULL : unknown
2165 * gen_0: known composite C_k
2166 * gen_1: known prime Q_j awaiting trial division
2167 * gen_2: finished prime P_i.
2168 * When during the division stage we re-sort a C_k-turned-U_l to a lower
2169 * position, we rotate any intervening material upward towards its old
2170 * slot. When a C_k was divided down to 1, its slot is left empty at
2171 * first; similarly when the re-sorting detects a repeated factor.
2172 * After the sorting phase, we de-fragment the list and squeeze all the
2173 * occupied slots together to the high end, so that ifac_crack() has room
2174 * for new factors. When this doesn't suffice, we abandon the current vector
2175 * and allocate a somewhat larger one, defragmenting again while copying.
2176 *
2177 * For internal use: note that all exponents will fit into C longs, given
2178 * PARI's lgefint field size. When we work with them, we sometimes read
2179 * out the GEN pointer, and sometimes do an itos, whatever is more con-
2180 * venient for the task at hand. */
2181
2182 /*** Overview ***/
2183
2184 /* The '*where' argument in the following points into *partial at the first of
2185 * the three fields of the first occupied slot. It's there because the caller
2186 * would already know where 'here' is, so we don't want to search for it again.
2187 * We do not preserve this from one user-interface call to the next. */
2188
2189 /* In the most cases, control flows from the user interface to ifac_main() and
2190 * then to a succession of ifac_crack()s and ifac_divide()s, with (typically)
2191 * none of the latter finding anything. */
2192
2193 #define LAST(x) x+lg(x)-3
2194 #define FIRST(x) x+3
2195
2196 #define MOEBIUS(x) gel(x,1)
2197 #define HINT(x) gel(x,2)
2198
2199 /* y <- x */
2200 INLINE void
SHALLOWCOPY(GEN x,GEN y)2201 SHALLOWCOPY(GEN x, GEN y) {
2202 VALUE(y) = VALUE(x);
2203 EXPON(y) = EXPON(x);
2204 CLASS(y) = CLASS(x);
2205 }
2206 /* y <- x */
2207 INLINE void
COPY(GEN x,GEN y)2208 COPY(GEN x, GEN y) {
2209 icopyifstack(VALUE(x), VALUE(y));
2210 icopyifstack(EXPON(x), EXPON(y));
2211 CLASS(y) = CLASS(x);
2212 }
2213
2214 /* Diagnostics */
2215 static void
ifac_factor_dbg(GEN x)2216 ifac_factor_dbg(GEN x)
2217 {
2218 GEN c = CLASS(x), v = VALUE(x);
2219 if (c == gen_2) err_printf("IFAC: factor %Ps\n\tis prime (finished)\n", v);
2220 else if (c == gen_1) err_printf("IFAC: factor %Ps\n\tis prime\n", v);
2221 else if (c == gen_0) err_printf("IFAC: factor %Ps\n\tis composite\n", v);
2222 }
2223 static void
ifac_check(GEN partial,GEN where)2224 ifac_check(GEN partial, GEN where)
2225 {
2226 if (!where || where < FIRST(partial) || where > LAST(partial))
2227 pari_err_BUG("ifac_check ['where' out of bounds]");
2228 }
2229 static void
ifac_print(GEN part,GEN where)2230 ifac_print(GEN part, GEN where)
2231 {
2232 long l = lg(part);
2233 GEN p;
2234
2235 err_printf("ifac partial factorization structure: %ld slots, ", (l-3)/3);
2236 if (MOEBIUS(part)) err_printf("Moebius mode, ");
2237 err_printf("hint = %ld\n", itos(HINT(part)));
2238 ifac_check(part, where);
2239 for (p = part+3; p < part + l; p += 3)
2240 {
2241 GEN v = VALUE(p), e = EXPON(p), c = CLASS(p);
2242 const char *s = "";
2243 if (!v) { err_printf("[empty slot]\n"); continue; }
2244 if (c == NULL) s = "unknown";
2245 else if (c == gen_0) s = "composite";
2246 else if (c == gen_1) s = "unfinished prime";
2247 else if (c == gen_2) s = "prime";
2248 else pari_err_BUG("unknown factor class");
2249 err_printf("[%Ps, %Ps, %s]\n", v, e, s);
2250 }
2251 err_printf("Done.\n");
2252 }
2253
2254 static const long decomp_default_hint = 0;
2255 /* assume n > 0, which we can assign to */
2256 /* return initial data structure, see ifac_crack() for the hint argument */
2257 static GEN
ifac_start_hint(GEN n,int moebius,long hint)2258 ifac_start_hint(GEN n, int moebius, long hint)
2259 {
2260 const long ifac_initial_length = 3 + 7*3;
2261 /* codeword, moebius, hint, 7 slots -- a 512-bit product of distinct 8-bit
2262 * primes needs at most 7 slots at a time) */
2263 GEN here, part = cgetg(ifac_initial_length, t_VEC);
2264
2265 MOEBIUS(part) = moebius? gen_1 : NULL;
2266 HINT(part) = stoi(hint);
2267 /* fill first slot at the top end */
2268 here = part + ifac_initial_length - 3; /* LAST(part) */
2269 INIT(here, n,gen_1,gen_0); /* n^1: composite */
2270 while ((here -= 3) > part) ifac_delete(here);
2271 return part;
2272 }
2273 GEN
ifac_start(GEN n,int moebius)2274 ifac_start(GEN n, int moebius)
2275 { return ifac_start_hint(n,moebius,decomp_default_hint); }
2276
2277 /* Return next nonempty slot after 'here', NULL if none exist */
2278 static GEN
ifac_find(GEN partial)2279 ifac_find(GEN partial)
2280 {
2281 GEN scan, end = partial + lg(partial);
2282
2283 #ifdef IFAC_DEBUG
2284 ifac_check(partial, partial);
2285 #endif
2286 for (scan = partial+3; scan < end; scan += 3)
2287 if (VALUE(scan)) return scan;
2288 return NULL;
2289 }
2290
2291 /* Defragment: squeeze out unoccupied slots above *where. Unoccupied slots
2292 * arise when a composite factor dissolves completely whilst dividing off a
2293 * prime, or when ifac_resort() spots a coincidence and merges two factors.
2294 * Update *where */
2295 static void
ifac_defrag(GEN * partial,GEN * where)2296 ifac_defrag(GEN *partial, GEN *where)
2297 {
2298 GEN scan_new = LAST(*partial), scan_old;
2299
2300 for (scan_old = scan_new; scan_old >= *where; scan_old -= 3)
2301 {
2302 if (!VALUE(scan_old)) continue; /* empty slot */
2303 if (scan_old < scan_new) SHALLOWCOPY(scan_old, scan_new);
2304 scan_new -= 3; /* point at next slot to be written */
2305 }
2306 scan_new += 3; /* back up to last slot written */
2307 *where = scan_new;
2308 while ((scan_new -= 3) > *partial) ifac_delete(scan_new); /* erase junk */
2309 }
2310
2311 /* Move to a larger main vector, updating *where if it points into it, and
2312 * *partial in any case. Can be used as a specialized gcopy before
2313 * a gerepileupto() (pass 0 as the new length). Normally, one would pass
2314 * new_lg=1 to let this function guess the new size. To be used sparingly.
2315 * Complex version of ifac_defrag(), combined with reallocation. If new_lg
2316 * is 0, use the old length, so this acts just like gcopy except that the
2317 * 'where' pointer is carried along; if it is 1, we make an educated guess.
2318 * Exception: If new_lg is 0, the vector is full to the brim, and the first
2319 * entry is composite, we make it longer to avoid being called again a
2320 * microsecond later. It is safe to call this with *where = NULL:
2321 * if it doesn't point anywhere within the old structure, it is left alone */
2322 static void
ifac_realloc(GEN * partial,GEN * where,long new_lg)2323 ifac_realloc(GEN *partial, GEN *where, long new_lg)
2324 {
2325 long old_lg = lg(*partial);
2326 GEN newpart, scan_new, scan_old;
2327
2328 if (new_lg == 1)
2329 new_lg = 2*old_lg - 6; /* from 7 slots to 13 to 25... */
2330 else if (new_lg <= old_lg) /* includes case new_lg == 0 */
2331 {
2332 GEN first = *partial + 3;
2333 new_lg = old_lg;
2334 /* structure full and first entry composite or unknown */
2335 if (VALUE(first) && (CLASS(first) == gen_0 || CLASS(first)==NULL))
2336 new_lg += 6; /* give it a little more breathing space */
2337 }
2338 newpart = cgetg(new_lg, t_VEC);
2339 if (DEBUGMEM >= 3)
2340 err_printf("IFAC: new partial factorization structure (%ld slots)\n",
2341 (new_lg - 3)/3);
2342 MOEBIUS(newpart) = MOEBIUS(*partial);
2343 icopyifstack(HINT(*partial), HINT(newpart));
2344 /* Downward sweep through the old *partial. Pick up 'where' and carry it
2345 * over if we pass it. (Only useful if it pointed at a nonempty slot.)
2346 * Factors are COPY'd so that we again have a nice object (parent older
2347 * than children, connected), except the one factor that may still be living
2348 * in a clone where n originally was; exponents are similarly copied if they
2349 * aren't global constants; class-of-factor fields are global constants so we
2350 * need only copy them as pointers. Caller may then do a gerepileupto() */
2351 scan_new = newpart + new_lg - 3; /* LAST(newpart) */
2352 scan_old = *partial + old_lg - 3; /* LAST(*partial) */
2353 for (; scan_old > *partial + 2; scan_old -= 3)
2354 {
2355 if (*where == scan_old) *where = scan_new;
2356 if (!VALUE(scan_old)) continue; /* skip empty slots */
2357 COPY(scan_old, scan_new); scan_new -= 3;
2358 }
2359 scan_new += 3; /* back up to last slot written */
2360 while ((scan_new -= 3) > newpart) ifac_delete(scan_new);
2361 *partial = newpart;
2362 }
2363
2364 /* Re-sort one (typically unknown) entry from washere to a new position,
2365 * rotating intervening entries upward to fill the vacant space. If the new
2366 * position is the same as the old one, or the new value of the entry coincides
2367 * with a value already occupying a lower slot, then we just add exponents (and
2368 * use the 'more known' class, and return 1 immediately when in Moebius mode).
2369 * Slots between *where and washere must be in sorted order, so a sweep using
2370 * this to re-sort several unknowns must proceed upward, see ifac_resort().
2371 * Bubble-sort-of-thing sort. Won't be exercised frequently, so this is ok */
2372 static void
ifac_sort_one(GEN * where,GEN washere)2373 ifac_sort_one(GEN *where, GEN washere)
2374 {
2375 GEN old, scan = washere - 3;
2376 GEN value, exponent, class0, class1;
2377 long cmp_res;
2378
2379 if (scan < *where) return; /* nothing to do, washere==*where */
2380 value = VALUE(washere);
2381 exponent = EXPON(washere);
2382 class0 = CLASS(washere);
2383 cmp_res = -1; /* sentinel */
2384 while (scan >= *where) /* at least once */
2385 {
2386 if (VALUE(scan))
2387 { /* current slot nonempty, check against where */
2388 cmp_res = cmpii(value, VALUE(scan));
2389 if (cmp_res >= 0) break; /* have found where to stop */
2390 }
2391 /* copy current slot upward by one position and move pointers down */
2392 SHALLOWCOPY(scan, scan+3);
2393 scan -= 3;
2394 }
2395 scan += 3;
2396 /* At this point there are the following possibilities:
2397 * 1) cmp_res == -1. Either value is less than that at *where, or *where was
2398 * pointing at vacant slots and any factors we saw en route were larger than
2399 * value. At any rate, scan == *where now, and scan is pointing at an empty
2400 * slot, into which we'll stash our entry.
2401 * 2) cmp_res == 0. The entry at scan-3 is the one, we compare class0
2402 * fields and add exponents, and put it all into the vacated scan slot,
2403 * NULLing the one at scan-3 (and possibly updating *where).
2404 * 3) cmp_res == 1. The slot at scan is the one to store our entry into. */
2405 if (cmp_res)
2406 {
2407 if (cmp_res < 0 && scan != *where)
2408 pari_err_BUG("ifact_sort_one [misaligned partial]");
2409 INIT(scan, value, exponent, class0); return;
2410 }
2411 /* case cmp_res == 0: repeated factor detected */
2412 if (DEBUGLEVEL >= 4)
2413 err_printf("IFAC: repeated factor %Ps\n\tin ifac_sort_one\n", value);
2414 old = scan - 3;
2415 /* if old class0 was composite and new is prime, or vice versa, complain
2416 * (and if one class0 was unknown and the other wasn't, use the known one) */
2417 class1 = CLASS(old);
2418 if (class0) /* should never be used */
2419 {
2420 if (class1)
2421 {
2422 if (class0 == gen_0 && class1 != gen_0)
2423 pari_err_BUG("ifac_sort_one (composite = prime)");
2424 else if (class0 != gen_0 && class1 == gen_0)
2425 pari_err_BUG("ifac_sort_one (prime = composite)");
2426 else if (class0 == gen_2)
2427 CLASS(scan) = class0;
2428 }
2429 else
2430 CLASS(scan) = class0;
2431 }
2432 /* else stay with the existing known class0 */
2433 CLASS(scan) = class1;
2434 /* in any case, add exponents */
2435 if (EXPON(old) == gen_1 && exponent == gen_1)
2436 EXPON(scan) = gen_2;
2437 else
2438 EXPON(scan) = addii(EXPON(old), exponent);
2439 /* move the value over and null out the vacated slot below */
2440 old = scan - 3;
2441 *scan = *old;
2442 ifac_delete(old);
2443 /* finally, see whether *where should be pulled in */
2444 if (old == *where) *where += 3;
2445 }
2446
2447 /* Sort all current unknowns downward to where they belong. Sweeps in the
2448 * upward direction. Not needed after ifac_crack(), only when ifac_divide()
2449 * returned true. Update *where. */
2450 static void
ifac_resort(GEN * partial,GEN * where)2451 ifac_resort(GEN *partial, GEN *where)
2452 {
2453 GEN scan, end;
2454 ifac_defrag(partial, where); end = LAST(*partial);
2455 for (scan = *where; scan <= end; scan += 3)
2456 if (VALUE(scan) && !CLASS(scan)) ifac_sort_one(where, scan); /*unknown*/
2457 ifac_defrag(partial, where); /* remove newly created gaps */
2458 }
2459
2460 /* Let x be a t_INT known not to have small divisors (< 2^14). Return 0 if x
2461 * is a proven composite. Return 1 if we believe it to be prime (fully proven
2462 * prime if factor_proven is set). */
2463 int
ifac_isprime(GEN x)2464 ifac_isprime(GEN x)
2465 {
2466 if (!BPSW_psp_nosmalldiv(x)) return 0; /* composite */
2467 if (factor_proven && ! BPSW_isprime(x))
2468 {
2469 pari_warn(warner,
2470 "IFAC: pseudo-prime %Ps\n\tis not prime. PLEASE REPORT!\n", x);
2471 return 0;
2472 }
2473 return 1;
2474 }
2475
2476 static int
ifac_checkprime(GEN x)2477 ifac_checkprime(GEN x)
2478 {
2479 int res = ifac_isprime(VALUE(x));
2480 CLASS(x) = res? gen_1: gen_0;
2481 if (DEBUGLEVEL>2) ifac_factor_dbg(x);
2482 return res;
2483 }
2484
2485 /* Determine primality or compositeness of all current unknowns, and set
2486 * class Q primes to finished (class P) if everything larger is already
2487 * known to be prime. When after_crack >= 0, only look at the
2488 * first after_crack things in the list (do nothing when it's 0) */
2489 static void
ifac_whoiswho(GEN * partial,GEN * where,long after_crack)2490 ifac_whoiswho(GEN *partial, GEN *where, long after_crack)
2491 {
2492 GEN scan, scan_end = LAST(*partial);
2493
2494 #ifdef IFAC_DEBUG
2495 ifac_check(*partial, *where);
2496 #endif
2497 if (after_crack == 0) return;
2498 if (after_crack > 0) /* check at most after_crack entries */
2499 scan = *where + 3*(after_crack - 1); /* assert(scan <= scan_end) */
2500 else
2501 for (scan = scan_end; scan >= *where; scan -= 3)
2502 {
2503 if (CLASS(scan))
2504 { /* known class of factor */
2505 if (CLASS(scan) == gen_0) break;
2506 if (CLASS(scan) == gen_1)
2507 {
2508 if (DEBUGLEVEL>=3)
2509 {
2510 err_printf("IFAC: factor %Ps\n\tis prime (no larger composite)\n",
2511 VALUE(*where));
2512 err_printf("IFAC: prime %Ps\n\tappears with exponent = %ld\n",
2513 VALUE(*where), itos(EXPON(*where)));
2514 }
2515 CLASS(scan) = gen_2;
2516 }
2517 continue;
2518 }
2519 if (!ifac_checkprime(scan)) break; /* must disable Q-to-P */
2520 CLASS(scan) = gen_2; /* P_i, finished prime */
2521 if (DEBUGLEVEL>2) ifac_factor_dbg(scan);
2522 }
2523 /* go on, Q-to-P trick now disabled */
2524 for (; scan >= *where; scan -= 3)
2525 {
2526 if (CLASS(scan)) continue;
2527 (void)ifac_checkprime(scan); /* Qj | Ck */
2528 }
2529 }
2530
2531 /* Divide all current composites by first (prime, class Q) entry, updating its
2532 * exponent, and turning it into a finished prime (class P). Return 1 if any
2533 * such divisions succeeded (in Moebius mode, the update may then not have
2534 * been completed), or 0 if none of them succeeded. Doesn't modify *where.
2535 * Here we normally do not check that the first entry is a not-finished
2536 * prime. Stack management: we may allocate a new exponent */
2537 static long
ifac_divide(GEN * partial,GEN * where,long moebius_mode)2538 ifac_divide(GEN *partial, GEN *where, long moebius_mode)
2539 {
2540 GEN scan, scan_end = LAST(*partial);
2541 long res = 0, exponent, newexp, otherexp;
2542
2543 #ifdef IFAC_DEBUG
2544 ifac_check(*partial, *where);
2545 if (CLASS(*where) != gen_1)
2546 pari_err_BUG("ifac_divide [division by composite or finished prime]");
2547 if (!VALUE(*where)) pari_err_BUG("ifac_divide [division by nothing]");
2548 #endif
2549 newexp = exponent = itos(EXPON(*where));
2550 if (exponent > 1 && moebius_mode) return 1;
2551 /* should've been caught by caller */
2552
2553 for (scan = *where+3; scan <= scan_end; scan += 3)
2554 {
2555 if (CLASS(scan) != gen_0) continue; /* the other thing ain't composite */
2556 otherexp = 0;
2557 /* divide in place to keep stack clutter minimal */
2558 while (dvdiiz(VALUE(scan), VALUE(*where), VALUE(scan)))
2559 {
2560 if (moebius_mode) return 1; /* immediately */
2561 if (!otherexp) otherexp = itos(EXPON(scan));
2562 newexp += otherexp;
2563 }
2564 if (newexp > exponent) /* did anything happen? */
2565 {
2566 EXPON(*where) = (newexp == 2 ? gen_2 : utoipos(newexp));
2567 exponent = newexp;
2568 if (is_pm1((GEN)*scan)) /* factor dissolved completely */
2569 {
2570 ifac_delete(scan);
2571 if (DEBUGLEVEL >= 4)
2572 err_printf("IFAC: a factor was a power of another prime factor\n");
2573 } else {
2574 CLASS(scan) = NULL; /* at any rate it's Unknown now */
2575 if (DEBUGLEVEL >= 4)
2576 err_printf("IFAC: a factor was divisible by another prime factor,\n"
2577 "\tleaving a cofactor = %Ps\n", VALUE(scan));
2578 }
2579 res = 1;
2580 if (DEBUGLEVEL >= 5)
2581 err_printf("IFAC: prime %Ps\n\tappears at least to the power %ld\n",
2582 VALUE(*where), newexp);
2583 }
2584 } /* for */
2585 CLASS(*where) = gen_2; /* make it a finished prime */
2586 if (DEBUGLEVEL >= 3)
2587 err_printf("IFAC: prime %Ps\n\tappears with exponent = %ld\n",
2588 VALUE(*where), newexp);
2589 return res;
2590 }
2591
2592 /* found out our integer was factor^exp. Update */
2593 static void
update_pow(GEN where,GEN factor,long exp,pari_sp * av)2594 update_pow(GEN where, GEN factor, long exp, pari_sp *av)
2595 {
2596 GEN ex = EXPON(where);
2597 if (DEBUGLEVEL>3)
2598 err_printf("IFAC: found %Ps =\n\t%Ps ^%ld\n", *where, factor, exp);
2599 affii(factor, VALUE(where)); set_avma(*av);
2600 if (ex == gen_1)
2601 { EXPON(where) = exp == 2? gen_2: utoipos(exp); *av = avma; }
2602 else if (ex == gen_2)
2603 { EXPON(where) = utoipos(exp<<1); *av = avma; }
2604 else
2605 affsi(exp * itos(ex), EXPON(where));
2606 }
2607 /* hint = 0 : Use a default strategy
2608 * hint & 1 : avoid MPQS
2609 * hint & 2 : avoid first-stage ECM (may fall back to ECM if MPQS gives up)
2610 * hint & 4 : avoid Pollard and SQUFOF stages.
2611 * hint & 8 : avoid final ECM; may flag a composite as prime. */
2612 #define get_hint(partial) (itos(HINT(*partial)) & 15)
2613
2614 /* Complete ifac_crack's job when a factoring engine splits the current factor
2615 * into a product of three or more new factors. Makes room for them if
2616 * necessary, sorts them, gives them the right exponents and class. Returns the
2617 * number of factors actually written, which may be less than #facvec if there
2618 * are duplicates. Vectors of factors (cf pollardbrent() or mpqs()) contain
2619 * 'slots' of 3 GENs per factor, interpreted as in our partial factorization
2620 * data structure. Thus engines can tell us what they already know about
2621 * factors being prime/composite or appearing to a power larger than thefirst.
2622 * Don't collect garbage. No diagnostics: engine has printed what it found.
2623 * facvec contains slots of three components per factor; repeated factors are
2624 * allowed (their classes shouldn't contradict each other whereas their
2625 * exponents will be added up) */
2626 static long
ifac_insert_multiplet(GEN * partial,GEN * where,GEN facvec,long moebius_mode)2627 ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec, long moebius_mode)
2628 {
2629 long j,k=1, lfv=lg(facvec)-1, nf=lfv/3, room=(long)(*where-*partial);
2630 /* one of the factors will go into the *where slot, so room is now 3 times
2631 * the number of slots we can use */
2632 long needroom = lfv - room;
2633 GEN newexp, cur, sorted, auxvec = cgetg(nf+1, t_VEC), factor;
2634 long E = itos(EXPON(*where)); /* the old exponent */
2635
2636 if (DEBUGLEVEL >= 5) /* squfof may return a single squared factor as a set */
2637 err_printf("IFAC: incorporating set of %ld factor(s)\n", nf);
2638 if (needroom > 0) ifac_realloc(partial, where, lg(*partial) + needroom);
2639
2640 /* create sort permutation from the values of the factors */
2641 for (j=nf; j; j--) gel(auxvec,j) = gel(facvec,3*j-2);
2642 sorted = ZV_indexsort(auxvec);
2643 /* store factors, beginning at *where, and catching any duplicates */
2644 cur = facvec + 3*sorted[nf]-2; /* adjust for triple spacing */
2645 VALUE(*where) = VALUE(cur);
2646 newexp = EXPON(cur);
2647 /* if new exponent is 1, the old exponent already in place will do */
2648 if (newexp != gen_1) EXPON(*where) = mului(E,newexp);
2649 CLASS(*where) = CLASS(cur);
2650 if (DEBUGLEVEL >= 6) err_printf("\tstored (largest) factor no. %ld...\n", nf);
2651
2652 for (j=nf-1; j; j--)
2653 {
2654 GEN e;
2655 cur = facvec + 3*sorted[j]-2;
2656 factor = VALUE(cur);
2657 if (equalii(factor, VALUE(*where)))
2658 {
2659 if (DEBUGLEVEL >= 6)
2660 err_printf("\tfactor no. %ld is a duplicate%s\n", j, (j>1? "...": ""));
2661 /* update exponent, ignore class which would already have been set,
2662 * then forget current factor */
2663 newexp = EXPON(cur);
2664 if (newexp != gen_1) /* new exp > 1 */
2665 e = addiu(EXPON(*where), E * itou(newexp));
2666 else if (EXPON(*where) == gen_1 && E == 1)
2667 e = gen_2;
2668 else
2669 e = addiu(EXPON(*where), E);
2670 EXPON(*where) = e;
2671
2672 if (moebius_mode) return 0; /* stop now, with exponent updated */
2673 continue;
2674 }
2675
2676 *where -= 3;
2677 CLASS(*where) = CLASS(cur); /* class as given */
2678 newexp = EXPON(cur);
2679 if (newexp != gen_1) /* new exp > 1 */
2680 e = (E == 1 && newexp == gen_2)? gen_2: mului(E, newexp);
2681 else /* inherit parent's exponent */
2682 e = (E == 1 ? gen_1 : (E == 2 ? gen_2 : utoipos(E)));
2683 EXPON(*where) = e;
2684 /* keep components younger than *partial */
2685 icopyifstack(factor, VALUE(*where));
2686 k++;
2687 if (DEBUGLEVEL >= 6)
2688 err_printf("\tfactor no. %ld was unique%s\n", j, j>1? " (so far)...": "");
2689 }
2690 return k;
2691 }
2692
2693 /* Split the first (composite) entry. There _must_ already be room for another
2694 * factor below *where, and *where is updated. Two cases:
2695 * - entry = factor^k is a pure power: factor^k is inserted, leaving *where
2696 * unchanged;
2697 * - entry = factor * cofactor (not necessarily coprime): both factors are
2698 * inserted in the correct order, updating *where
2699 * The inserted factors class is set to unknown, they inherit the exponent
2700 * (or a multiple thereof) of their ancestor.
2701 *
2702 * Returns number of factors written into the structure, normally 2 (1 if pure
2703 * power, maybe > 2 if a factoring engine returned a vector of factors instead
2704 * of a single factor). Can reallocate the data structure in the
2705 * vector-of-factors case, not in the most common single-factor case.
2706 * Stack housekeeping: this routine may create one or more objects (a new
2707 * factor, or possibly several, and perhaps one or more new exponents > 2) */
2708 static long
ifac_crack(GEN * partial,GEN * where,long moebius_mode)2709 ifac_crack(GEN *partial, GEN *where, long moebius_mode)
2710 {
2711 long cmp_res, hint = get_hint(partial);
2712 GEN factor, exponent;
2713
2714 #ifdef IFAC_DEBUG
2715 ifac_check(*partial, *where);
2716 if (*where < *partial + 6)
2717 pari_err_BUG("ifac_crack ['*where' out of bounds]");
2718 if (!(VALUE(*where)) || typ(VALUE(*where)) != t_INT)
2719 pari_err_BUG("ifac_crack [incorrect VALUE(*where)]");
2720 if (CLASS(*where) != gen_0)
2721 pari_err_BUG("ifac_crack [operand not known composite]");
2722 #endif
2723
2724 if (DEBUGLEVEL>2) {
2725 err_printf("IFAC: cracking composite\n\t%Ps\n", **where);
2726 if (DEBUGLEVEL>3) err_printf("IFAC: checking for pure square\n");
2727 }
2728 /* MPQS cannot factor prime powers. Look for pure powers even if MPQS is
2729 * blocked by hint: fast and useful in bounded factorization */
2730 {
2731 forprime_t T;
2732 ulong exp = 1, mask = 7;
2733 long good = 0;
2734 pari_sp av = avma;
2735 (void)u_forprime_init(&T, 11, ULONG_MAX);
2736 /* crack squares */
2737 while (Z_issquareall(VALUE(*where), &factor))
2738 {
2739 good = 1; /* remember we succeeded once */
2740 update_pow(*where, factor, 2, &av);
2741 if (moebius_mode) return 0; /* no need to carry on */
2742 }
2743 while ( (exp = is_357_power(VALUE(*where), &factor, &mask)) )
2744 {
2745 good = 1; /* remember we succeeded once */
2746 update_pow(*where, factor, exp, &av);
2747 if (moebius_mode) return 0; /* no need to carry on */
2748 }
2749 /* cutoff at 14 bits as trial division must have found everything below */
2750 while ( (exp = is_pth_power(VALUE(*where), &factor, &T, 15)) )
2751 {
2752 good = 1; /* remember we succeeded once */
2753 update_pow(*where, factor, exp, &av);
2754 if (moebius_mode) return 0; /* no need to carry on */
2755 }
2756
2757 if (good && hint != 15 && ifac_checkprime(*where))
2758 { /* our composite was a prime power */
2759 if (DEBUGLEVEL>3)
2760 err_printf("IFAC: factor %Ps\n\tis prime\n", VALUE(*where));
2761 return 0; /* bypass subsequent ifac_whoiswho() call */
2762 }
2763 } /* pure power stage */
2764
2765 factor = NULL;
2766 if (!(hint & 4))
2767 { /* pollardbrent() Rho usually gets a first chance */
2768 if (DEBUGLEVEL >= 4) err_printf("IFAC: trying Pollard-Brent rho method\n");
2769 factor = pollardbrent(VALUE(*where));
2770 if (!factor)
2771 { /* Shanks' squfof() */
2772 if (DEBUGLEVEL >= 4)
2773 err_printf("IFAC: trying Shanks' SQUFOF, will fail silently if input\n"
2774 " is too large for it.\n");
2775 factor = squfof(VALUE(*where));
2776 }
2777 }
2778 if (!factor && !(hint & 2))
2779 { /* First ECM stage */
2780 if (DEBUGLEVEL >= 4) err_printf("IFAC: trying Lenstra-Montgomery ECM\n");
2781 factor = ellfacteur(VALUE(*where), 0); /* do not insist */
2782 }
2783 if (!factor && !(hint & 1))
2784 { /* MPQS stage */
2785 if (DEBUGLEVEL >= 4) err_printf("IFAC: trying MPQS\n");
2786 factor = mpqs(VALUE(*where));
2787 }
2788 if (!factor)
2789 {
2790 if (!(hint & 8))
2791 { /* still no luck? Final ECM stage, guaranteed to succeed */
2792 if (DEBUGLEVEL >= 4)
2793 err_printf("IFAC: forcing ECM, may take some time\n");
2794 factor = ellfacteur(VALUE(*where), 1);
2795 }
2796 else
2797 { /* limited factorization */
2798 if (DEBUGLEVEL >= 2)
2799 {
2800 if (hint != 15)
2801 pari_warn(warner, "IFAC: unfactored composite declared prime");
2802 else
2803 pari_warn(warner, "IFAC: untested integer declared prime");
2804
2805 /* don't print it out at level 3 or above, where it would appear
2806 * several times before and after this message already */
2807 if (DEBUGLEVEL == 2) err_printf("\t%Ps\n", VALUE(*where));
2808 }
2809 CLASS(*where) = gen_1; /* might as well trial-divide by it... */
2810 return 1;
2811 }
2812 }
2813 if (typ(factor) == t_VEC) /* delegate this case */
2814 return ifac_insert_multiplet(partial, where, factor, moebius_mode);
2815 /* typ(factor) == t_INT */
2816 /* got single integer back: work out the cofactor (in place) */
2817 if (!dvdiiz(VALUE(*where), factor, VALUE(*where)))
2818 {
2819 err_printf("IFAC: factoring %Ps\n", VALUE(*where));
2820 err_printf("\tyielded 'factor' %Ps\n\twhich isn't!\n", factor);
2821 pari_err_BUG("factoring");
2822 }
2823 /* factoring engines report the factor found; tell about the cofactor */
2824 if (DEBUGLEVEL >= 4) err_printf("IFAC: cofactor = %Ps\n", VALUE(*where));
2825
2826 /* The two factors are 'factor' and VALUE(*where), find out which is larger */
2827 cmp_res = cmpii(factor, VALUE(*where));
2828 CLASS(*where) = NULL; /* mark factor /cofactor 'unknown' */
2829 exponent = EXPON(*where);
2830 *where -= 3;
2831 CLASS(*where) = NULL; /* mark factor /cofactor 'unknown' */
2832 icopyifstack(exponent, EXPON(*where));
2833 if (cmp_res < 0)
2834 VALUE(*where) = factor; /* common case */
2835 else if (cmp_res > 0)
2836 { /* factor > cofactor, rearrange */
2837 GEN old = *where + 3;
2838 VALUE(*where) = VALUE(old); /* move cofactor pointer to lowest slot */
2839 VALUE(old) = factor; /* save factor */
2840 }
2841 else pari_err_BUG("ifac_crack [Z_issquareall miss]");
2842 return 2;
2843 }
2844
2845 /* main loop: iterate until smallest entry is a finished prime; returns
2846 * a 'where' pointer, or NULL if nothing left, or gen_0 in Moebius mode if
2847 * we aren't squarefree */
2848 static GEN
ifac_main(GEN * partial)2849 ifac_main(GEN *partial)
2850 {
2851 const long moebius_mode = !!MOEBIUS(*partial);
2852 GEN here = ifac_find(*partial);
2853 long nf;
2854
2855 if (!here) return NULL; /* nothing left */
2856 /* loop until first entry is a finished prime. May involve reallocations,
2857 * thus updates of *partial */
2858 while (CLASS(here) != gen_2)
2859 {
2860 if (CLASS(here) == gen_0) /* composite: crack it */
2861 { /* make sure there's room for another factor */
2862 if (here < *partial + 6)
2863 {
2864 ifac_defrag(partial, &here);
2865 if (here < *partial + 6) ifac_realloc(partial, &here, 1); /* no luck */
2866 }
2867 nf = ifac_crack(partial, &here, moebius_mode);
2868 if (moebius_mode && EXPON(here) != gen_1) /* that was a power */
2869 {
2870 if (DEBUGLEVEL >= 3)
2871 err_printf("IFAC: main loop: repeated new factor\n\t%Ps\n", *here);
2872 return gen_0;
2873 }
2874 /* deal with the new unknowns. No sort: ifac_crack did it */
2875 ifac_whoiswho(partial, &here, nf);
2876 continue;
2877 }
2878 if (CLASS(here) == gen_1) /* prime but not yet finished: finish it */
2879 {
2880 if (ifac_divide(partial, &here, moebius_mode))
2881 {
2882 if (moebius_mode)
2883 {
2884 if (DEBUGLEVEL >= 3)
2885 err_printf("IFAC: main loop: another factor was divisible by\n"
2886 "\t%Ps\n", *here);
2887 return gen_0;
2888 }
2889 ifac_resort(partial, &here); /* sort new cofactors down */
2890 ifac_whoiswho(partial, &here, -1);
2891 }
2892 continue;
2893 }
2894 pari_err_BUG("ifac_main [nonexistent factor class]");
2895 } /* while */
2896 if (moebius_mode && EXPON(here) != gen_1)
2897 {
2898 if (DEBUGLEVEL >= 3)
2899 err_printf("IFAC: after main loop: repeated old factor\n\t%Ps\n", *here);
2900 return gen_0;
2901 }
2902 if (DEBUGLEVEL >= 4)
2903 {
2904 nf = (*partial + lg(*partial) - here - 3)/3;
2905 if (nf)
2906 err_printf("IFAC: main loop: %ld factor%s left\n", nf, (nf>1)? "s": "");
2907 else
2908 err_printf("IFAC: main loop: this was the last factor\n");
2909 }
2910 if (factor_add_primes && !(get_hint(partial) & 8))
2911 {
2912 GEN p = VALUE(here);
2913 if (lgefint(p)>3 || uel(p,2) > 0x1000000UL) (void)addprimes(p);
2914 }
2915 return here;
2916 }
2917
2918 /* Encapsulated routines */
2919
2920 /* prime/exponent pairs need to appear contiguously on the stack, but we also
2921 * need our data structure somewhere, and we don't know in advance how many
2922 * primes will turn up. The following discipline achieves this: When
2923 * ifac_decomp() is called, n should point at an object older than the oldest
2924 * small prime/exponent pair (ifactor() guarantees this).
2925 * We allocate sufficient space to accommodate several pairs -- eleven pairs
2926 * ought to fit in a space not much larger than n itself -- before calling
2927 * ifac_start(). If we manage to complete the factorization before we run out
2928 * of space, we free the data structure and cull the excess reserved space
2929 * before returning. When we do run out, we have to leapfrog to generate more
2930 * (guesstimating the requirements from what is left in the partial
2931 * factorization structure); room for fresh pairs is allocated at the head of
2932 * the stack, followed by an ifac_realloc() to reconnect the data structure and
2933 * move it out of the way, followed by a few pointer tweaks to connect the new
2934 * pairs space to the old one. This whole affair translates into a surprisingly
2935 * compact routine. */
2936
2937 /* find primary factors of n; destroy n */
2938 static long
ifac_decomp(GEN n,long hint)2939 ifac_decomp(GEN n, long hint)
2940 {
2941 pari_sp av = avma;
2942 long nb = 0;
2943 GEN part, here, workspc, pairs = (GEN)av;
2944
2945 /* workspc will be doled out in pairs of smaller t_INTs. For n = prod p^{e_p}
2946 * (p not necessarily prime), need room to store all p and e_p [ cgeti(3) ],
2947 * bounded by
2948 * sum_{p | n} ( log_{2^BIL} (p) + 6 ) <= log_{2^BIL} n + 6 log_2 n */
2949 workspc = new_chunk((expi(n) + 1) * 7);
2950 part = ifac_start_hint(n, 0, hint);
2951 for (;;)
2952 {
2953 here = ifac_main(&part);
2954 if (!here) break;
2955 if (gc_needed(av,1))
2956 {
2957 long offset;
2958 if(DEBUGMEM>1)
2959 {
2960 pari_warn(warnmem,"[2] ifac_decomp");
2961 ifac_print(part, here);
2962 }
2963 ifac_realloc(&part, &here, 0);
2964 offset = here - part;
2965 part = gerepileupto((pari_sp)workspc, part);
2966 here = part + offset;
2967 }
2968 nb++;
2969 pairs = icopy_avma(VALUE(here), (pari_sp)pairs);
2970 pairs = icopy_avma(EXPON(here), (pari_sp)pairs);
2971 ifac_delete(here);
2972 }
2973 set_avma((pari_sp)pairs);
2974 if (DEBUGLEVEL >= 3)
2975 err_printf("IFAC: found %ld large prime (power) factor%s.\n",
2976 nb, (nb>1? "s": ""));
2977 return nb;
2978 }
2979
2980 /***********************************************************************/
2981 /** ARITHMETIC FUNCTIONS WITH EARLY-ABORT **/
2982 /** needing direct access to the factoring machinery to avoid work: **/
2983 /** e.g. if we find a square factor, moebius returns 0, core doesn't **/
2984 /** need to factor it, etc. **/
2985 /***********************************************************************/
2986 /* memory management */
2987 static void
ifac_GC(pari_sp av,GEN * part)2988 ifac_GC(pari_sp av, GEN *part)
2989 {
2990 GEN here = NULL;
2991 if(DEBUGMEM>1) pari_warn(warnmem,"ifac_xxx");
2992 ifac_realloc(part, &here, 0);
2993 *part = gerepileupto(av, *part);
2994 }
2995
2996 /* destroys n */
2997 static long
ifac_moebius(GEN n)2998 ifac_moebius(GEN n)
2999 {
3000 long mu = 1;
3001 pari_sp av = avma;
3002 GEN part = ifac_start(n, 1);
3003 for(;;)
3004 {
3005 long v;
3006 GEN p;
3007 if (!ifac_next(&part,&p,&v)) return v? 0: mu;
3008 mu = -mu;
3009 if (gc_needed(av,1)) ifac_GC(av,&part);
3010 }
3011 }
3012
3013 int
ifac_read(GEN part,GEN * p,long * e)3014 ifac_read(GEN part, GEN *p, long *e)
3015 {
3016 GEN here = ifac_find(part);
3017 if (!here) return 0;
3018 *p = VALUE(here);
3019 *e = EXPON(here)[2];
3020 return 1;
3021 }
3022 void
ifac_skip(GEN part)3023 ifac_skip(GEN part)
3024 {
3025 GEN here = ifac_find(part);
3026 if (here) ifac_delete(here);
3027 }
3028
3029 /* destroys n */
3030 static int
ifac_ispowerful(GEN n)3031 ifac_ispowerful(GEN n)
3032 {
3033 pari_sp av = avma;
3034 GEN part = ifac_start(n, 0);
3035 for(;;)
3036 {
3037 long e;
3038 GEN p;
3039 if (!ifac_read(part,&p,&e)) return 1;
3040 /* power: skip */
3041 if (e != 1 || Z_isanypower(p,NULL)) { ifac_skip(part); continue; }
3042 if (!ifac_next(&part,&p,&e)) return 1;
3043 if (e == 1) return 0;
3044 if (gc_needed(av,1)) ifac_GC(av,&part);
3045 }
3046 }
3047 /* destroys n */
3048 static GEN
ifac_core(GEN n)3049 ifac_core(GEN n)
3050 {
3051 GEN m = gen_1, c = cgeti(lgefint(n));
3052 pari_sp av = avma;
3053 GEN part = ifac_start(n, 0);
3054 for(;;)
3055 {
3056 long e;
3057 GEN p;
3058 if (!ifac_read(part,&p,&e)) return m;
3059 /* square: skip */
3060 if (!odd(e) || Z_issquare(p)) { ifac_skip(part); continue; }
3061 if (!ifac_next(&part,&p,&e)) return m;
3062 if (odd(e)) m = mulii(m, p);
3063 if (gc_needed(av,1)) { affii(m,c); m=c; ifac_GC(av,&part); }
3064 }
3065 }
3066
3067 /* Where to stop trial dividing in factorization. Guaranteed >= 2^14 */
3068 ulong
tridiv_bound(GEN n)3069 tridiv_bound(GEN n)
3070 {
3071 ulong l = (ulong)expi(n) + 1;
3072 if (l <= 32) return 1UL<<14;
3073 if (l <= 512) return (l-16) << 10;
3074 return 1UL<<19; /* Rho is generally faster above this */
3075 }
3076
3077 /* return a value <= (48 << 10) = 49152 < primelinit */
3078 static ulong
utridiv_bound(ulong n)3079 utridiv_bound(ulong n)
3080 {
3081 #ifdef LONG_IS_64BIT
3082 if (n & HIGHMASK)
3083 return ((ulong)expu(n) + 1 - 16) << 10;
3084 #else
3085 (void)n;
3086 #endif
3087 return 1UL<<14;
3088 }
3089
3090 /* destroys n */
3091 static void
ifac_factoru(GEN n,long hint,GEN P,GEN E,long * pi)3092 ifac_factoru(GEN n, long hint, GEN P, GEN E, long *pi)
3093 {
3094 GEN part = ifac_start_hint(n, 0, hint);
3095 for(;;)
3096 {
3097 long v;
3098 GEN p;
3099 if (!ifac_next(&part,&p,&v)) return;
3100 P[*pi] = itou(p);
3101 E[*pi] = v;
3102 (*pi)++;
3103 }
3104 }
3105 /* destroys n */
3106 static long
ifac_moebiusu(GEN n)3107 ifac_moebiusu(GEN n)
3108 {
3109 GEN part = ifac_start(n, 1);
3110 long s = 1;
3111 for(;;)
3112 {
3113 long v;
3114 GEN p;
3115 if (!ifac_next(&part,&p,&v)) return v? 0: s;
3116 s = -s;
3117 }
3118 }
3119
3120 INLINE ulong
u_forprime_next_fast(forprime_t * T)3121 u_forprime_next_fast(forprime_t *T)
3122 {
3123 if (*(T->d))
3124 {
3125 NEXT_PRIME_VIADIFF(T->p, T->d);
3126 return T->p > T->b ? 0: T->p;
3127 }
3128 return u_forprime_next(T);
3129 }
3130
3131 /* Factor n and output [p,e] where
3132 * p, e are vecsmall with n = prod{p[i]^e[i]}. If all != 0:
3133 * if pU is not NULL, set it to unfactored composite; else include it
3134 * in factorization */
3135 static GEN
factoru_sign(ulong n,ulong all,long hint,ulong * pU1,ulong * pU2)3136 factoru_sign(ulong n, ulong all, long hint, ulong *pU1, ulong *pU2)
3137 {
3138 GEN f, E, E2, P, P2;
3139 pari_sp av;
3140 ulong p, lim;
3141 long i;
3142 forprime_t S;
3143
3144 if (pU1) *pU1 = *pU2 = 1;
3145 if (n == 0) retmkvec2(mkvecsmall(0), mkvecsmall(1));
3146 if (n == 1) retmkvec2(cgetg(1,t_VECSMALL), cgetg(1,t_VECSMALL));
3147
3148 f = cgetg(3,t_VEC); av = avma;
3149 lim = all; if (!lim) lim = utridiv_bound(n);
3150 /* enough room to store <= 15 primes and exponents (OK if n < 2^64) */
3151 (void)new_chunk(16*2);
3152 P = cgetg(16, t_VECSMALL); i = 1;
3153 E = cgetg(16, t_VECSMALL);
3154 if (lim > 2)
3155 {
3156 long v = vals(n), oldi;
3157 if (v)
3158 {
3159 P[1] = 2; E[1] = v; i = 2;
3160 n >>= v; if (n == 1) goto END;
3161 }
3162 u_forprime_init(&S, 3, lim-1);
3163 oldi = i;
3164 while ( (p = u_forprime_next_fast(&S)) )
3165 {
3166 int stop;
3167 /* tiny integers without small factors are often primes */
3168 if (p == 673)
3169 {
3170 oldi = i;
3171 if (uisprime_661(n)) { P[i] = n; E[i] = 1; i++; goto END; }
3172 }
3173 v = u_lvalrem_stop(&n, p, &stop);
3174 if (v) {
3175 P[i] = p;
3176 E[i] = v; i++;
3177 }
3178 if (stop) {
3179 if (n != 1) { P[i] = n; E[i] = 1; i++; }
3180 goto END;
3181 }
3182 }
3183 if (oldi != i && !all && uisprime_661(n))
3184 { P[i] = n; E[i] = 1; i++; goto END; }
3185 }
3186 if (all)
3187 { /* smallfact: look for easy pure powers then stop */
3188 #ifdef LONG_IS_64BIT
3189 ulong mask = all > 563 ? (all > 7129 ? 1: 3): 7;
3190 #else
3191 ulong mask = all > 22 ? (all > 83 ? 1: 3): 7;
3192 #endif
3193 long k = 1, ex;
3194 while (uissquareall(n, &n)) k <<= 1;
3195 while ( (ex = uis_357_power(n, &n, &mask)) ) k *= ex;
3196 if (pU1 && !uisprime(n))
3197 { *pU1 = n; *pU2 = (ulong)k; }
3198 else
3199 { P[i] = n; E[i] = k; i++; }
3200 goto END;
3201 }
3202 {
3203 GEN perm;
3204 ifac_factoru(utoipos(n), hint, P, E, &i);
3205 setlg(P, i);
3206 perm = vecsmall_indexsort(P);
3207 P = vecsmallpermute(P, perm);
3208 E = vecsmallpermute(E, perm);
3209 }
3210 END:
3211 set_avma(av);
3212 P2 = cgetg(i, t_VECSMALL); gel(f,1) = P2;
3213 E2 = cgetg(i, t_VECSMALL); gel(f,2) = E2;
3214 while (--i >= 1) { P2[i] = P[i]; E2[i] = E[i]; }
3215 return f;
3216 }
3217 GEN
factoru(ulong n)3218 factoru(ulong n)
3219 { return factoru_sign(n, 0, decomp_default_hint, NULL, NULL); }
3220
3221 ulong
radicalu(ulong n)3222 radicalu(ulong n)
3223 {
3224 pari_sp av = avma;
3225 return gc_long(av, zv_prod(gel(factoru(n),1)));
3226 }
3227
3228 long
moebiusu_fact(GEN f)3229 moebiusu_fact(GEN f)
3230 {
3231 GEN E = gel(f,2);
3232 long i, l = lg(E);
3233 for (i = 1; i < l; i++)
3234 if (E[i] > 1) return 0;
3235 return odd(l)? 1: -1;
3236 }
3237
3238 long
moebiusu(ulong n)3239 moebiusu(ulong n)
3240 {
3241 pari_sp av;
3242 ulong p;
3243 long s, v, test_prime;
3244 forprime_t S;
3245
3246 switch(n)
3247 {
3248 case 0: (void)check_arith_non0(gen_0,"moebius");/*error*/
3249 case 1: return 1;
3250 case 2: return -1;
3251 }
3252 v = vals(n);
3253 if (v == 0)
3254 s = 1;
3255 else
3256 {
3257 if (v > 1) return 0;
3258 n >>= 1;
3259 s = -1;
3260 }
3261 av = avma;
3262 u_forprime_init(&S, 3, utridiv_bound(n));
3263 test_prime = 0;
3264 while ((p = u_forprime_next_fast(&S)))
3265 {
3266 int stop;
3267 /* tiny integers without small factors are often primes */
3268 if (p == 673)
3269 {
3270 test_prime = 0;
3271 if (uisprime_661(n)) return gc_long(av,-s);
3272 }
3273 v = u_lvalrem_stop(&n, p, &stop);
3274 if (v) {
3275 if (v > 1) return gc_long(av,0);
3276 test_prime = 1;
3277 s = -s;
3278 }
3279 if (stop) return gc_long(av, n==1? s: -s);
3280 }
3281 set_avma(av);
3282 if (test_prime && uisprime_661(n)) return -s;
3283 else
3284 {
3285 long t = ifac_moebiusu(utoipos(n));
3286 set_avma(av);
3287 if (t == 0) return 0;
3288 return (s == t)? 1: -1;
3289 }
3290 }
3291
3292 long
moebius(GEN n)3293 moebius(GEN n)
3294 {
3295 pari_sp av = avma;
3296 GEN F;
3297 ulong p;
3298 long i, l, s, v;
3299 forprime_t S;
3300
3301 if ((F = check_arith_non0(n,"moebius")))
3302 {
3303 GEN E;
3304 F = clean_Z_factor(F);
3305 E = gel(F,2);
3306 l = lg(E);
3307 for(i = 1; i < l; i++)
3308 if (!equali1(gel(E,i))) return gc_long(av,0);
3309 return gc_long(av, odd(l)? 1: -1);
3310 }
3311 if (lgefint(n) == 3) return moebiusu(uel(n,2));
3312 p = mod4(n); if (!p) return 0;
3313 if (p == 2) { s = -1; n = shifti(n, -1); } else { s = 1; n = icopy(n); }
3314 setabssign(n);
3315
3316 u_forprime_init(&S, 3, tridiv_bound(n));
3317 while ((p = u_forprime_next_fast(&S)))
3318 {
3319 int stop;
3320 v = Z_lvalrem_stop(&n, p, &stop);
3321 if (v)
3322 {
3323 if (v > 1) return gc_long(av,0);
3324 s = -s;
3325 if (stop) return gc_long(av, is_pm1(n)? s: -s);
3326 }
3327 }
3328 l = lg(primetab);
3329 for (i = 1; i < l; i++)
3330 {
3331 v = Z_pvalrem(n, gel(primetab,i), &n);
3332 if (v)
3333 {
3334 if (v > 1) return gc_long(av,0);
3335 s = -s;
3336 if (is_pm1(n)) return gc_long(av,s);
3337 }
3338 }
3339 if (ifac_isprime(n)) return gc_long(av,-s);
3340 /* large composite without small factors */
3341 v = ifac_moebius(n);
3342 return gc_long(av, s < 0? -v: v); /* correct also if v==0 */
3343 }
3344
3345 long
ispowerful(GEN n)3346 ispowerful(GEN n)
3347 {
3348 pari_sp av = avma;
3349 GEN F;
3350 ulong p, bound;
3351 long i, l, v;
3352 forprime_t S;
3353
3354 if ((F = check_arith_all(n, "ispowerful")))
3355 {
3356 GEN p, P = gel(F,1), E = gel(F,2);
3357 if (lg(P) == 1) return 1; /* 1 */
3358 p = gel(P,1);
3359 if (!signe(p)) return 1; /* 0 */
3360 i = is_pm1(p)? 2: 1; /* skip -1 */
3361 l = lg(E);
3362 for (; i < l; i++)
3363 if (equali1(gel(E,i))) return 0;
3364 return 1;
3365 }
3366 if (!signe(n)) return 1;
3367
3368 if (mod4(n) == 2) return 0;
3369 n = shifti(n, -vali(n));
3370 if (is_pm1(n)) return 1;
3371 setabssign(n);
3372 bound = tridiv_bound(n);
3373 u_forprime_init(&S, 3, bound);
3374 while ((p = u_forprime_next_fast(&S)))
3375 {
3376 int stop;
3377 v = Z_lvalrem_stop(&n, p, &stop);
3378 if (v)
3379 {
3380 if (v == 1) return gc_long(av,0);
3381 if (stop) return gc_long(av, is_pm1(n));
3382 }
3383 }
3384 l = lg(primetab);
3385 for (i = 1; i < l; i++)
3386 {
3387 v = Z_pvalrem(n, gel(primetab,i), &n);
3388 if (v)
3389 {
3390 if (v == 1) return gc_long(av,0);
3391 if (is_pm1(n)) return gc_long(av,1);
3392 }
3393 }
3394 /* no need to factor: must be p^2 or not powerful */
3395 if (cmpii(powuu(bound+1, 3), n) > 0) return gc_long(av, Z_issquare(n));
3396
3397 if (ifac_isprime(n)) return gc_long(av,0);
3398 /* large composite without small factors */
3399 return gc_long(av, ifac_ispowerful(n));
3400 }
3401
3402 ulong
coreu_fact(GEN f)3403 coreu_fact(GEN f)
3404 {
3405 GEN P = gel(f,1), E = gel(f,2);
3406 long i, l = lg(P), m = 1;
3407 for (i = 1; i < l; i++)
3408 {
3409 ulong p = P[i], e = E[i];
3410 if (e & 1) m *= p;
3411 }
3412 return m;
3413 }
3414 ulong
coreu(ulong n)3415 coreu(ulong n)
3416 {
3417 pari_sp av;
3418 if (n == 0) return 0;
3419 av = avma; return gc_ulong(av, coreu_fact(factoru(n)));
3420 }
3421 GEN
core(GEN n)3422 core(GEN n)
3423 {
3424 pari_sp av = avma;
3425 GEN m, F;
3426 ulong p;
3427 long i, l, v;
3428 forprime_t S;
3429
3430 if ((F = check_arith_all(n, "core")))
3431 {
3432 GEN p, x, P = gel(F,1), E = gel(F,2);
3433 long j = 1;
3434 if (lg(P) == 1) return gen_1;
3435 p = gel(P,1);
3436 if (!signe(p)) return gen_0;
3437 l = lg(P); x = cgetg(l, t_VEC);
3438 for (i = 1; i < l; i++)
3439 if (mpodd(gel(E,i))) gel(x,j++) = gel(P,i);
3440 setlg(x, j); return ZV_prod(x);
3441 }
3442 switch(lgefint(n))
3443 {
3444 case 2: return gen_0;
3445 case 3:
3446 p = coreu(uel(n,2));
3447 return signe(n) > 0? utoipos(p): utoineg(p);
3448 }
3449
3450 m = signe(n) < 0? gen_m1: gen_1;
3451 n = absi_shallow(n);
3452 u_forprime_init(&S, 2, tridiv_bound(n));
3453 while ((p = u_forprime_next_fast(&S)))
3454 {
3455 int stop;
3456 v = Z_lvalrem_stop(&n, p, &stop);
3457 if (v)
3458 {
3459 if (v & 1) m = muliu(m, p);
3460 if (stop)
3461 {
3462 if (!is_pm1(n)) m = mulii(m, n);
3463 return gerepileuptoint(av, m);
3464 }
3465 }
3466 }
3467 l = lg(primetab);
3468 for (i = 1; i < l; i++)
3469 {
3470 GEN q = gel(primetab,i);
3471 v = Z_pvalrem(n, q, &n);
3472 if (v)
3473 {
3474 if (v & 1) m = mulii(m, q);
3475 if (is_pm1(n)) return gerepileuptoint(av, m);
3476 }
3477 }
3478 if (ifac_isprime(n)) { m = mulii(m, n); return gerepileuptoint(av, m); }
3479 if (m == gen_1) n = icopy(n); /* ifac_core destroys n */
3480 /* large composite without small factors */
3481 return gerepileuptoint(av, mulii(m, ifac_core(n)));
3482 }
3483
3484 long
Z_issmooth(GEN m,ulong lim)3485 Z_issmooth(GEN m, ulong lim)
3486 {
3487 pari_sp av = avma;
3488 ulong p = 2;
3489 forprime_t S;
3490 u_forprime_init(&S, 2, lim);
3491 while ((p = u_forprime_next_fast(&S)))
3492 {
3493 int stop;
3494 (void)Z_lvalrem_stop(&m, p, &stop);
3495 if (stop) return gc_long(av, abscmpiu(m,lim) <= 0);
3496 }
3497 return gc_long(av,0);
3498 }
3499
3500 GEN
Z_issmooth_fact(GEN m,ulong lim)3501 Z_issmooth_fact(GEN m, ulong lim)
3502 {
3503 pari_sp av = avma;
3504 GEN F, P, E;
3505 ulong p;
3506 long i = 1, l = expi(m)+1;
3507 forprime_t S;
3508 P = cgetg(l, t_VECSMALL);
3509 E = cgetg(l, t_VECSMALL);
3510 F = mkmat2(P,E);
3511 u_forprime_init(&S, 2, lim);
3512 while ((p = u_forprime_next_fast(&S)))
3513 {
3514 long v;
3515 int stop;
3516 if ((v = Z_lvalrem_stop(&m, p, &stop)))
3517 {
3518 P[i] = p;
3519 E[i] = v; i++;
3520 if (stop)
3521 {
3522 if (abscmpiu(m,lim) > 0) break;
3523 if (m[2] > 1) { P[i] = m[2]; E[i] = 1; i++; }
3524 setlg(P, i);
3525 setlg(E, i); return gc_const((pari_sp)F, F);
3526 }
3527 }
3528 }
3529 return gc_NULL(av);
3530 }
3531
3532 /***********************************************************************/
3533 /** **/
3534 /** COMPUTING THE MATRIX OF PRIME DIVISORS AND EXPONENTS **/
3535 /** **/
3536 /***********************************************************************/
3537 static GEN
aux_end(GEN M,GEN n,long nb)3538 aux_end(GEN M, GEN n, long nb)
3539 {
3540 GEN P,E, z = (GEN)avma;
3541 long i;
3542
3543 guncloneNULL(n);
3544 P = cgetg(nb+1,t_COL);
3545 E = cgetg(nb+1,t_COL);
3546 for (i=nb; i; i--)
3547 { /* allow a stackdummy in the middle */
3548 while (typ(z) != t_INT) z += lg(z);
3549 gel(E,i) = z; z += lg(z);
3550 gel(P,i) = z; z += lg(z);
3551 }
3552 gel(M,1) = P;
3553 gel(M,2) = E;
3554 return sort_factor(M, (void*)&abscmpii, cmp_nodata);
3555 }
3556
3557 static void
STORE(long * nb,GEN x,long e)3558 STORE(long *nb, GEN x, long e) { (*nb)++; (void)x; (void)utoipos(e); }
3559 static void
STOREu(long * nb,ulong x,long e)3560 STOREu(long *nb, ulong x, long e) { STORE(nb, utoipos(x), e); }
3561 static void
STOREi(long * nb,GEN x,long e)3562 STOREi(long *nb, GEN x, long e) { STORE(nb, icopy(x), e); }
3563 /* no prime less than p divides n; return 1 if factored completely */
3564 static int
special_primes(GEN n,ulong p,long * nb,GEN T)3565 special_primes(GEN n, ulong p, long *nb, GEN T)
3566 {
3567 long i, l = lg(T);
3568 if (l > 1)
3569 { /* pp = square of biggest p tried so far */
3570 long pp[] = { evaltyp(t_INT)|_evallg(4), 0,0,0 };
3571 pari_sp av = avma; affii(sqru(p), pp); set_avma(av);
3572
3573 for (i = 1; i < l; i++)
3574 if (dvdiiz(n, gel(T,i), n))
3575 {
3576 long k = 1; while (dvdiiz(n, gel(T,i), n)) k++;
3577 STOREi(nb, gel(T,i), k);
3578 if (abscmpii(pp, n) > 0)
3579 {
3580 if (!is_pm1(n)) STOREi(nb, n, 1);
3581 return 1;
3582 }
3583 }
3584 }
3585 return 0;
3586 }
3587
3588 /* factor(sn*|n|), where sn = -1 or 1.
3589 * all != 0 : only look for prime divisors < all. If pU is not NULL,
3590 * set it to unfactored composite */
3591 static GEN
ifactor_sign(GEN n,ulong all,long hint,long sn,GEN * pU)3592 ifactor_sign(GEN n, ulong all, long hint, long sn, GEN *pU)
3593 {
3594 GEN M, N;
3595 pari_sp av;
3596 long nb = 0, i;
3597 ulong lim;
3598 forprime_t T;
3599
3600 if (lgefint(n) == 3)
3601 { /* small integer */
3602 GEN f, Pf, Ef, P, E, F = cgetg(3, t_MAT);
3603 ulong U1, U2;
3604 long l;
3605 av = avma;
3606 /* enough room to store <= 15 primes and exponents (OK if n < 2^64) */
3607 (void)new_chunk((15*3 + 15 + 1) * 2);
3608 f = factoru_sign(uel(n,2), all, hint, pU? &U1: NULL, pU? &U2: NULL);
3609 set_avma(av);
3610 Pf = gel(f,1);
3611 Ef = gel(f,2);
3612 l = lg(Pf);
3613 if (sn < 0)
3614 { /* add sign */
3615 long L = l+1;
3616 gel(F,1) = P = cgetg(L, t_COL);
3617 gel(F,2) = E = cgetg(L, t_COL);
3618 gel(P,1) = gen_m1; P++;
3619 gel(E,1) = gen_1; E++;
3620 }
3621 else
3622 {
3623 gel(F,1) = P = cgetg(l, t_COL);
3624 gel(F,2) = E = cgetg(l, t_COL);
3625 }
3626 for (i = 1; i < l; i++)
3627 {
3628 gel(P,i) = utoipos(Pf[i]);
3629 gel(E,i) = utoipos(Ef[i]);
3630 }
3631 if (pU) *pU = U1 == 1? NULL: mkvec2(utoipos(U1), utoipos(U2));
3632 return F;
3633 }
3634 if (pU) *pU = NULL;
3635 M = cgetg(3,t_MAT);
3636 if (sn < 0) STORE(&nb, utoineg(1), 1);
3637 if (is_pm1(n)) return aux_end(M,NULL,nb);
3638
3639 n = N = gclone(n); setabssign(n);
3640 /* trial division bound */
3641 lim = all; if (!lim) lim = tridiv_bound(n);
3642 if (lim > 2)
3643 {
3644 ulong maxp, p;
3645 pari_sp av2;
3646 i = vali(n);
3647 if (i)
3648 {
3649 STOREu(&nb, 2, i);
3650 av = avma; affii(shifti(n,-i), n); set_avma(av);
3651 }
3652 if (is_pm1(n)) return aux_end(M,n,nb);
3653 /* trial division */
3654 maxp = maxprime();
3655 av = avma; u_forprime_init(&T, 3, minss(lim, maxp)); av2 = avma;
3656 /* first pass: known to fit in private prime table */
3657 while ((p = u_forprime_next_fast(&T)))
3658 {
3659 pari_sp av3 = avma;
3660 int stop;
3661 long k = Z_lvalrem_stop(&n, p, &stop);
3662 if (k)
3663 {
3664 affii(n, N); n = N; set_avma(av3);
3665 STOREu(&nb, p, k);
3666 }
3667 if (stop)
3668 {
3669 if (!is_pm1(n)) STOREi(&nb, n, 1);
3670 stackdummy(av, av2);
3671 return aux_end(M,n,nb);
3672 }
3673 }
3674 stackdummy(av, av2);
3675 if (lim > maxp)
3676 { /* second pass, usually empty: outside private prime table */
3677 av = avma; u_forprime_init(&T, maxp+1, lim); av2 = avma;
3678 while ((p = u_forprime_next(&T)))
3679 {
3680 pari_sp av3 = avma;
3681 int stop;
3682 long k = Z_lvalrem_stop(&n, p, &stop);
3683 if (k)
3684 {
3685 affii(n, N); n = N; set_avma(av3);
3686 STOREu(&nb, p, k);
3687 }
3688 if (stop)
3689 {
3690 if (!is_pm1(n)) STOREi(&nb, n, 1);
3691 stackdummy(av, av2);
3692 return aux_end(M,n,nb);
3693 }
3694 }
3695 stackdummy(av, av2);
3696 }
3697 }
3698 if (special_primes(n, lim, &nb, primetab)) return aux_end(M,n, nb);
3699 if (all)
3700 { /* smallfact: look for easy pure powers then stop. Cf Z_isanypower */
3701 GEN x;
3702 long k;
3703 av = avma;
3704 k = isanypower_nosmalldiv(n, &x);
3705 if (k > 1) affii(x, n);
3706 if (pU)
3707 {
3708 GEN F;
3709 if (abscmpiu(n, lim) <= 0
3710 || cmpii(n, sqru(lim)) <= 0 || ifac_isprime(n))
3711 { set_avma(av); STOREi(&nb, n, k); return aux_end(M,n, nb); }
3712 set_avma(av); F = aux_end(M, NULL, nb); /* don't destroy n */
3713 *pU = mkvec2(icopy(n), utoipos(k)); /* composite cofactor */
3714 gunclone(n); return F;
3715 }
3716 else
3717 { set_avma(av); STOREi(&nb, n, k); }
3718 if (DEBUGLEVEL >= 2) {
3719 pari_warn(warner,
3720 "IFAC: untested %ld-bit integer declared prime", expi(n)+1);
3721 if (expi(n) <= 256) err_printf("\t%Ps\n", n);
3722 }
3723 }
3724 else if (ifac_isprime(n)) STOREi(&nb, n, 1);
3725 else nb += ifac_decomp(n, hint);
3726 return aux_end(M,n, nb);
3727 }
3728
3729 static GEN
ifactor(GEN n,ulong all,long hint)3730 ifactor(GEN n, ulong all, long hint)
3731 {
3732 long s = signe(n);
3733 if (!s) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3734 return ifactor_sign(n, all, hint, s, NULL);
3735 }
3736
3737 int
ifac_next(GEN * part,GEN * p,long * e)3738 ifac_next(GEN *part, GEN *p, long *e)
3739 {
3740 GEN here = ifac_main(part);
3741 if (here == gen_0) { *p = NULL; *e = 1; return 0; }
3742 if (!here) { *p = NULL; *e = 0; return 0; }
3743 *p = VALUE(here);
3744 *e = EXPON(here)[2];
3745 ifac_delete(here); return 1;
3746 }
3747
3748 /* see before ifac_crack for current semantics of 'hint' (factorint's 'flag') */
3749 GEN
factorint(GEN n,long flag)3750 factorint(GEN n, long flag)
3751 {
3752 GEN F;
3753 if ((F = check_arith_all(n,"factorint"))) return gcopy(F);
3754 return ifactor(n,0,flag);
3755 }
3756
3757 GEN
Z_factor_limit(GEN n,ulong all)3758 Z_factor_limit(GEN n, ulong all)
3759 {
3760 if (!all) all = GP_DATA->primelimit + 1;
3761 return ifactor(n, all, decomp_default_hint);
3762 }
3763 GEN
absZ_factor_limit_strict(GEN n,ulong all,GEN * pU)3764 absZ_factor_limit_strict(GEN n, ulong all, GEN *pU)
3765 {
3766 GEN F, U;
3767 if (!signe(n))
3768 {
3769 if (pU) *pU = NULL;
3770 retmkmat2(mkcol(gen_0), mkcol(gen_1));
3771 }
3772 if (!all) all = GP_DATA->primelimit + 1;
3773 F = ifactor_sign(n, all, decomp_default_hint, 1, &U);
3774 if (pU) *pU = U;
3775 return F;
3776 }
3777 GEN
absZ_factor_limit(GEN n,ulong all)3778 absZ_factor_limit(GEN n, ulong all)
3779 {
3780 if (!signe(n)) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3781 if (!all) all = GP_DATA->primelimit + 1;
3782 return ifactor_sign(n, all, decomp_default_hint, 1, NULL);
3783 }
3784 GEN
Z_factor(GEN n)3785 Z_factor(GEN n)
3786 { return ifactor(n,0,decomp_default_hint); }
3787 GEN
absZ_factor(GEN n)3788 absZ_factor(GEN n)
3789 {
3790 if (!signe(n)) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3791 return ifactor_sign(n, 0, decomp_default_hint, 1, NULL);
3792 }
3793 /* Factor until the unfactored part is smaller than limit. Return the
3794 * factored part. Hence factorback(output) may be smaller than n */
3795 GEN
Z_factor_until(GEN n,GEN limit)3796 Z_factor_until(GEN n, GEN limit)
3797 {
3798 pari_sp av = avma;
3799 long s = signe(n), eq;
3800 GEN q, F, U;
3801
3802 if (!s) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3803 F = ifactor_sign(n, tridiv_bound(n), decomp_default_hint, s, &U);
3804 if (!U) return F;
3805 q = gel(U,1); /* composite, q^eq = unfactored part */
3806 eq = itou(gel(U,2));
3807 if (cmpii(eq == 1? q: powiu(q, eq), limit) > 0)
3808 { /* factor further */
3809 long l2 = expi(q)+1;
3810 GEN P2, E2, F2, part;
3811 if (eq > 1) limit = sqrtnint(limit, eq);
3812 P2 = coltrunc_init(l2);
3813 E2 = coltrunc_init(l2); F2 = mkmat2(P2,E2);
3814 part = ifac_start(icopy(q), 0); /* ifac_next would destroy q */
3815 for(;;)
3816 {
3817 long e;
3818 GEN p;
3819 if (!ifac_next(&part,&p,&e)) break;
3820 vectrunc_append(P2, p);
3821 vectrunc_append(E2, utoipos(e * eq));
3822 q = diviiexact(q, powiu(p, e));
3823 if (cmpii(q, limit) <= 0) break;
3824 }
3825 F2 = sort_factor(F2, (void*)&abscmpii, cmp_nodata);
3826 F = merge_factor(F, F2, (void*)&abscmpii, cmp_nodata);
3827 }
3828 return gerepilecopy(av, F);
3829 }
3830
3831 static void
matsmalltrunc_append(GEN m,ulong p,ulong e)3832 matsmalltrunc_append(GEN m, ulong p, ulong e)
3833 {
3834 GEN P = gel(m,1), E = gel(m,2);
3835 long l = lg(P);
3836 P[l] = p; lg_increase(P);
3837 E[l] = e; lg_increase(E);
3838 }
3839 static GEN
matsmalltrunc_init(long l)3840 matsmalltrunc_init(long l)
3841 {
3842 GEN P = vecsmalltrunc_init(l);
3843 GEN E = vecsmalltrunc_init(l); return mkvec2(P,E);
3844 }
3845
3846 /* If a <= c <= b , factoru(c) = L[c-a+1] */
3847 GEN
vecfactoru_i(ulong a,ulong b)3848 vecfactoru_i(ulong a, ulong b)
3849 {
3850 ulong N, k, p, n = b-a+1;
3851 GEN v = const_vecsmall(n, 1);
3852 GEN L = cgetg(n+1, t_VEC);
3853 forprime_t T;
3854 if (b < 510510UL) N = 7;
3855 else if (b < 9699690UL) N = 8;
3856 else if (b < 223092870UL) N = 9;
3857 #ifdef LONG_IS_64BIT
3858 else if (b < 6469693230UL) N = 10;
3859 else if (b < 200560490130UL) N = 11;
3860 else if (b < 7420738134810UL) N = 12;
3861 else if (b < 304250263527210UL) N = 13;
3862 else N = 16; /* don't bother */
3863 #else
3864 else N = 10;
3865 #endif
3866 for (k = 1; k <= n; k++) gel(L,k) = matsmalltrunc_init(N);
3867 u_forprime_init(&T, 2, usqrt(b));
3868 while ((p = u_forprime_next(&T)))
3869 { /* p <= sqrt(b) */
3870 ulong pk = p, K = ulogint(b, p);
3871 for (k = 1; k <= K; k++)
3872 {
3873 ulong j, t = a / pk, ap = t * pk;
3874 if (ap < a) { ap += pk; t++; }
3875 /* t = (j+a-1) \ pk */
3876 t %= p;
3877 for (j = ap-a+1; j <= n; j += pk)
3878 {
3879 if (t) { v[j] *= pk; matsmalltrunc_append(gel(L,j), p,k); }
3880 if (++t == p) t = 0;
3881 }
3882 pk *= p;
3883 }
3884 }
3885 /* complete factorisation of non-sqrt(b)-smooth numbers */
3886 for (k = 1, N = a; k <= n; k++, N++)
3887 if (uel(v,k) != N) matsmalltrunc_append(gel(L,k), N/uel(v,k),1UL);
3888 return L;
3889 }
3890 GEN
vecfactoru(ulong a,ulong b)3891 vecfactoru(ulong a, ulong b)
3892 {
3893 pari_sp av = avma;
3894 return gerepilecopy(av, vecfactoru_i(a,b));
3895 }
3896
3897 /* Assume a and b odd, return L s.t. L[k] = factoru(a + 2*(k-1))
3898 * If a <= c <= b odd, factoru(c) = L[(c-a)>>1 + 1] */
3899 GEN
vecfactoroddu_i(ulong a,ulong b)3900 vecfactoroddu_i(ulong a, ulong b)
3901 {
3902 ulong N, k, p, n = ((b-a)>>1) + 1;
3903 GEN v = const_vecsmall(n, 1);
3904 GEN L = cgetg(n+1, t_VEC);
3905 forprime_t T;
3906 /* f(N)=my(a=primes(n+1));vecprod(a[2..#a]); */
3907 if (b < 255255UL) N = 6;
3908 else if (b < 4849845UL) N = 7;
3909 else if (b < 111546435UL) N = 8;
3910 else if (b < 3234846615UL) N = 9;
3911 #ifdef LONG_IS_64BIT
3912 else if (b < 100280245065UL) N = 10;
3913 else if (b < 3710369067405UL) N = 11;
3914 else if (b < 152125131763605UL) N = 12;
3915 else N = 16; /* don't bother */
3916 #else
3917 else N = 10;
3918 #endif
3919 for (k = 1; k <= n; k++) gel(L,k) = matsmalltrunc_init(N);
3920 u_forprime_init(&T, 3, usqrt(b));
3921 while ((p = u_forprime_next(&T)))
3922 { /* p <= sqrt(b) */
3923 ulong pk = p, K = ulogint(b, p);
3924 for (k = 1; k <= K; k++)
3925 {
3926 ulong j, t = (a / pk) | 1UL, ap = t * pk;
3927 /* t and ap are odd, ap multiple of pk = p^k */
3928 if (ap < a) { ap += pk<<1; t+=2; }
3929 /* c=t*p^k by steps of 2*p^k; factorization of c*=p^k if (t,p)=1 */
3930 t %= p;
3931 for (j = ((ap-a)>>1)+1; j <= n; j += pk)
3932 {
3933 if (t) { v[j] *= pk; matsmalltrunc_append(gel(L,j), p,k); }
3934 t += 2; if (t >= p) t -= p;
3935 }
3936 pk *= p;
3937 }
3938 }
3939 /* complete factorisation of non-sqrt(b)-smooth numbers */
3940 for (k = 1, N = a; k <= n; k++, N+=2)
3941 if (uel(v,k) != N) matsmalltrunc_append(gel(L,k), N/uel(v,k),1UL);
3942 return L;
3943 }
3944 GEN
vecfactoroddu(ulong a,ulong b)3945 vecfactoroddu(ulong a, ulong b)
3946 {
3947 pari_sp av = avma;
3948 return gerepilecopy(av, vecfactoroddu_i(a,b));
3949 }
3950
3951 /* If 0 <= a <= c <= b; L[c-a+1] = factoru(c)[,1] if c squarefree, else NULL */
3952 GEN
vecfactorsquarefreeu(ulong a,ulong b)3953 vecfactorsquarefreeu(ulong a, ulong b)
3954 {
3955 ulong N, k, p, n = b-a+1;
3956 GEN v = const_vecsmall(n, 1);
3957 GEN L = cgetg(n+1, t_VEC);
3958 forprime_t T;
3959 if (b < 510510UL) N = 7;
3960 else if (b < 9699690UL) N = 8;
3961 else if (b < 223092870UL) N = 9;
3962 #ifdef LONG_IS_64BIT
3963 else if (b < 6469693230UL) N = 10;
3964 else if (b < 200560490130UL) N = 11;
3965 else if (b < 7420738134810UL) N = 12;
3966 else if (b < 304250263527210UL) N = 13;
3967 else N = 16; /* don't bother */
3968 #else
3969 else N = 10;
3970 #endif
3971 for (k = 1; k <= n; k++) gel(L,k) = vecsmalltrunc_init(N);
3972 u_forprime_init(&T, 2, usqrt(b));
3973 while ((p = u_forprime_next(&T)))
3974 { /* p <= sqrt(b), kill nonsquarefree */
3975 ulong j, pk = p*p, t = a / pk, ap = t * pk;
3976 if (ap < a) ap += pk;
3977 for (j = ap-a+1; j <= n; j += pk) gel(L,j) = NULL;
3978
3979 t = a / p; ap = t * p;
3980 if (ap < a) { ap += p; t++; }
3981 for (j = ap-a+1; j <= n; j += p, t++)
3982 if (gel(L,j)) { v[j] *= p; vecsmalltrunc_append(gel(L,j), p); }
3983 }
3984 /* complete factorisation of non-sqrt(b)-smooth numbers */
3985 for (k = 1, N = a; k <= n; k++, N++)
3986 if (gel(L,k) && uel(v,k) != N) vecsmalltrunc_append(gel(L,k), N/uel(v,k));
3987 return L;
3988 }
3989 /* If 0 <= a <= c <= b; L[c-a+1] = factoru(c)[,1] if c squarefree and coprime
3990 * to all the primes in sorted zv P, else NULL */
3991 GEN
vecfactorsquarefreeu_coprime(ulong a,ulong b,GEN P)3992 vecfactorsquarefreeu_coprime(ulong a, ulong b, GEN P)
3993 {
3994 ulong N, k, p, n = b-a+1, sqb = usqrt(b);
3995 GEN v = const_vecsmall(n, 1);
3996 GEN L = cgetg(n+1, t_VEC);
3997 forprime_t T;
3998 if (b < 510510UL) N = 7;
3999 else if (b < 9699690UL) N = 8;
4000 else if (b < 223092870UL) N = 9;
4001 #ifdef LONG_IS_64BIT
4002 else if (b < 6469693230UL) N = 10;
4003 else if (b < 200560490130UL) N = 11;
4004 else if (b < 7420738134810UL) N = 12;
4005 else if (b < 304250263527210UL) N = 13;
4006 else N = 16; /* don't bother */
4007 #else
4008 else N = 10;
4009 #endif
4010 for (k = 1; k <= n; k++) gel(L,k) = vecsmalltrunc_init(N);
4011 u_forprime_init(&T, 2, sqb);
4012 while ((p = u_forprime_next(&T)))
4013 { /* p <= sqrt(b), kill nonsquarefree */
4014 ulong j, t, ap, bad = zv_search(P, p), pk = bad ? p: p * p;
4015 t = a / pk; ap = t * pk; if (ap < a) ap += pk;
4016 for (j = ap-a+1; j <= n; j += pk) gel(L,j) = NULL;
4017 if (bad) continue;
4018
4019 t = a / p; ap = t * p;
4020 if (ap < a) { ap += p; t++; }
4021 for (j = ap-a+1; j <= n; j += p, t++)
4022 if (gel(L,j)) { v[j] *= p; vecsmalltrunc_append(gel(L,j), p); }
4023 }
4024 if (uel(P,lg(P)-1) <= sqb) P = NULL;
4025 /* complete factorisation of non-sqrt(b)-smooth numbers */
4026 for (k = 1, N = a; k <= n; k++, N++)
4027 if (gel(L,k) && uel(v,k) != N)
4028 {
4029 ulong q = N / uel(v,k);
4030 if (!P || !zv_search(P, q)) vecsmalltrunc_append(gel(L,k), q);
4031 }
4032 return L;
4033 }
4034
4035 GEN
vecsquarefreeu(ulong a,ulong b)4036 vecsquarefreeu(ulong a, ulong b)
4037 {
4038 ulong j, k, p, n = b-a+1;
4039 GEN L = const_vecsmall(n, 1);
4040 forprime_t T;
4041 u_forprime_init(&T, 2, usqrt(b));
4042 while ((p = u_forprime_next(&T)))
4043 { /* p <= sqrt(b), kill nonsquarefree */
4044 ulong pk = p*p, t = a / pk, ap = t * pk;
4045 if (ap < a) { ap += pk; t++; }
4046 /* t = (j+a-1) \ pk */
4047 for (j = ap-a+1; j <= n; j += pk, t++) L[j] = 0;
4048 }
4049 for (k = j = 1; k <= n; k++)
4050 if (L[k]) L[j++] = a+k-1;
4051 setlg(L,j); return L;
4052 }
4053