1 /* Copyright (C) 2000  The PARI group.
2 
3 This file is part of the PARI/GP package.
4 
5 PARI/GP is free software; you can redistribute it and/or modify it under the
6 terms of the GNU General Public License as published by the Free Software
7 Foundation; either version 2 of the License, or (at your option) any later
8 version. It is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY WHATSOEVER.
10 
11 Check the License for details. You should have received a copy of it, along
12 with the package; see the file 'COPYING'. If not, write to the Free Software
13 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
14 #include "pari.h"
15 #include "paripriv.h"
16 
17 /***********************************************************************/
18 /**                       PRIMES IN SUCCESSION                        **/
19 /***********************************************************************/
20 
21 /* map from prime residue classes mod 210 to their numbers in {0...47}.
22  * Subscripts into this array take the form ((k-1)%210)/2, ranging from
23  * 0 to 104.  Unused entries are */
24 #define NPRC 128 /* nonprime residue class */
25 
26 static unsigned char prc210_no[] = {
27   0, NPRC, NPRC, NPRC, NPRC, 1, 2, NPRC, 3, 4, NPRC, /* 21 */
28   5, NPRC, NPRC, 6, 7, NPRC, NPRC, 8, NPRC, 9, /* 41 */
29   10, NPRC, 11, NPRC, NPRC, 12, NPRC, NPRC, 13, 14, NPRC, /* 63 */
30   NPRC, 15, NPRC, 16, 17, NPRC, NPRC, 18, NPRC, 19, /* 83 */
31   NPRC, NPRC, 20, NPRC, NPRC, NPRC, 21, NPRC, 22, 23, NPRC, /* 105 */
32   24, 25, NPRC, 26, NPRC, NPRC, NPRC, 27, NPRC, NPRC, /* 125 */
33   28, NPRC, 29, NPRC, NPRC, 30, 31, NPRC, 32, NPRC, NPRC, /* 147 */
34   33, 34, NPRC, NPRC, 35, NPRC, NPRC, 36, NPRC, 37, /* 167 */
35   38, NPRC, 39, NPRC, NPRC, 40, 41, NPRC, NPRC, 42, NPRC, /* 189 */
36   43, 44, NPRC, 45, 46, NPRC, NPRC, NPRC, NPRC, 47, /* 209 */
37 };
38 
39 /* first differences of the preceding */
40 static unsigned char prc210_d1[] = {
41   10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6,
42   4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6,
43   2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2, 10, 2,
44 };
45 
46 static int
unextprime_overflow(ulong n)47 unextprime_overflow(ulong n)
48 {
49 #ifdef LONG_IS_64BIT
50   return (n > (ulong)-59);
51 #else
52   return (n > (ulong)-5);
53 #endif
54 }
55 
56 /* return 0 for overflow */
57 ulong
unextprime(ulong n)58 unextprime(ulong n)
59 {
60   long rc, rc0, rcn;
61 
62   switch(n) {
63     case 0: case 1: case 2: return 2;
64     case 3: return 3;
65     case 4: case 5: return 5;
66     case 6: case 7: return 7;
67   }
68   if (unextprime_overflow(n)) return 0;
69   /* here n > 7 */
70   n |= 1; /* make it odd */
71   rc = rc0 = n % 210;
72   /* find next prime residue class mod 210 */
73   for(;;)
74   {
75     rcn = (long)(prc210_no[rc>>1]);
76     if (rcn != NPRC) break;
77     rc += 2; /* cannot wrap since 209 is coprime and rc odd */
78   }
79   if (rc > rc0) n += rc - rc0;
80   /* now find an actual (pseudo)prime */
81   for(;;)
82   {
83     if (uisprime(n)) break;
84     n += prc210_d1[rcn];
85     if (++rcn > 47) rcn = 0;
86   }
87   return n;
88 }
89 
90 GEN
nextprime(GEN n)91 nextprime(GEN n)
92 {
93   long rc, rc0, rcn;
94   pari_sp av = avma;
95 
96   if (typ(n) != t_INT)
97   {
98     n = gceil(n);
99     if (typ(n) != t_INT) pari_err_TYPE("nextprime",n);
100   }
101   if (signe(n) <= 0) { set_avma(av); return gen_2; }
102   if (lgefint(n) == 3)
103   {
104     ulong k = unextprime(uel(n,2));
105     set_avma(av);
106     if (k) return utoipos(k);
107 #ifdef LONG_IS_64BIT
108     return uutoi(1,13);
109 #else
110     return uutoi(1,15);
111 #endif
112   }
113   /* here n > 7 */
114   if (!mod2(n)) n = addui(1,n);
115   rc = rc0 = umodiu(n, 210);
116   /* find next prime residue class mod 210 */
117   for(;;)
118   {
119     rcn = (long)(prc210_no[rc>>1]);
120     if (rcn != NPRC) break;
121     rc += 2; /* cannot wrap since 209 is coprime and rc odd */
122   }
123   if (rc > rc0) n = addui(rc - rc0, n);
124   /* now find an actual (pseudo)prime */
125   for(;;)
126   {
127     if (BPSW_psp(n)) break;
128     n = addui(prc210_d1[rcn], n);
129     if (++rcn > 47) rcn = 0;
130   }
131   if (avma == av) return icopy(n);
132   return gerepileuptoint(av, n);
133 }
134 
135 ulong
uprecprime(ulong n)136 uprecprime(ulong n)
137 {
138   long rc, rc0, rcn;
139   { /* check if n <= 10 */
140     if (n <= 1)  return 0;
141     if (n == 2)  return 2;
142     if (n <= 4)  return 3;
143     if (n <= 6)  return 5;
144     if (n <= 10) return 7;
145   }
146   /* here n >= 11 */
147   if (!(n % 2)) n--;
148   rc = rc0 = n % 210;
149   /* find previous prime residue class mod 210 */
150   for(;;)
151   {
152     rcn = (long)(prc210_no[rc>>1]);
153     if (rcn != NPRC) break;
154     rc -= 2; /* cannot wrap since 1 is coprime and rc odd */
155   }
156   if (rc < rc0) n += rc - rc0;
157   /* now find an actual (pseudo)prime */
158   for(;;)
159   {
160     if (uisprime(n)) break;
161     if (--rcn < 0) rcn = 47;
162     n -= prc210_d1[rcn];
163   }
164   return n;
165 }
166 
167 GEN
precprime(GEN n)168 precprime(GEN n)
169 {
170   long rc, rc0, rcn;
171   pari_sp av = avma;
172 
173   if (typ(n) != t_INT)
174   {
175     n = gfloor(n);
176     if (typ(n) != t_INT) pari_err_TYPE("nextprime",n);
177   }
178   if (signe(n) <= 0) { set_avma(av); return gen_0; }
179   if (lgefint(n) <= 3)
180   {
181     ulong k = uel(n,2);
182     set_avma(av);
183     return utoi(uprecprime(k));
184   }
185   if (!mod2(n)) n = subiu(n,1);
186   rc = rc0 = umodiu(n, 210);
187   /* find previous prime residue class mod 210 */
188   for(;;)
189   {
190     rcn = (long)(prc210_no[rc>>1]);
191     if (rcn != NPRC) break;
192     rc -= 2; /* cannot wrap since 1 is coprime and rc odd */
193   }
194   if (rc0 > rc) n = subiu(n, rc0 - rc);
195   /* now find an actual (pseudo)prime */
196   for(;;)
197   {
198     if (BPSW_psp(n)) break;
199     if (--rcn < 0) rcn = 47;
200     n = subiu(n, prc210_d1[rcn]);
201   }
202   if (avma == av) return icopy(n);
203   return gerepileuptoint(av, n);
204 }
205 
206 /* Find next single-word prime strictly larger than p.
207  * If **d is non-NULL (somewhere in a diffptr), this is p + *(*d)++;
208  * otherwise imitate nextprime().
209  * *rcn = NPRC or the correct residue class for the current p; we'll use this
210  * to track the current prime residue class mod 210 once we're out of range of
211  * the diffptr table, and we'll update it before that if it isn't NPRC.
212  *
213  * *q is incremented whenever q!=NULL and we wrap from 209 mod 210 to
214  * 1 mod 210 */
215 ulong
snextpr(ulong p,byteptr * d,long * rcn,long * q,int (* ispsp)(ulong))216 snextpr(ulong p, byteptr *d, long *rcn, long *q, int (*ispsp)(ulong))
217 {
218   if (**d)
219   {
220     byteptr dd = *d;
221     long d1 = 0;
222 
223     NEXT_PRIME_VIADIFF(d1,dd);
224     /* d1 = nextprime(p+1) - p */
225     if (*rcn != NPRC)
226     {
227       while (d1 > 0)
228       {
229         d1 -= prc210_d1[*rcn];
230         if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
231       }
232       /* assert(d1 == 0) */
233     }
234     NEXT_PRIME_VIADIFF(p,*d);
235     return p;
236   }
237   if (unextprime_overflow(p)) pari_err_OVERFLOW("snextpr");
238   /* we are beyond the diffptr table, initialize if needed */
239   if (*rcn == NPRC) *rcn = prc210_no[(p % 210) >> 1]; /* != NPRC */
240   /* look for the next one */
241   do {
242     p += prc210_d1[*rcn];
243     if (++*rcn > 47) { *rcn = 0; if (q) (*q)++; }
244   } while (!ispsp(p));
245   return p;
246 }
247 
248 /********************************************************************/
249 /**                                                                **/
250 /**                     INTEGER FACTORIZATION                      **/
251 /**                                                                **/
252 /********************************************************************/
253 int factor_add_primes = 0, factor_proven = 0;
254 
255 /***********************************************************************/
256 /**                                                                   **/
257 /**                 FACTORIZATION (ECM) -- GN Jul-Aug 1998            **/
258 /**   Integer factorization using the elliptic curves method (ECM).   **/
259 /**   ellfacteur() returns a non trivial factor of N, assuming N>0,   **/
260 /**   is composite, and has no prime divisor below 2^14 or so.        **/
261 /**   Thanks to Paul Zimmermann for much helpful advice and to        **/
262 /**   Guillaume Hanrot and Igor Schein for intensive testing          **/
263 /**                                                                   **/
264 /***********************************************************************/
265 #define nbcmax 64 /* max number of simultaneous curves */
266 
267 static const ulong TB1[] = {
268   142,172,208,252,305,370,450,545,661,801,972,1180,1430,
269   1735,2100,2550,3090,3745,4540,5505,6675,8090,9810,11900,
270   14420,17490,21200,25700,31160,37780UL,45810UL,55550UL,67350UL,
271   81660UL,99010UL,120050UL,145550UL,176475UL,213970UL,259430UL,
272   314550UL,381380UL,462415UL,560660UL,679780UL,824220UL,999340UL,
273   1211670UL,1469110UL,1781250UL,2159700UL,2618600UL,3175000UL,
274   3849600UL,4667500UL,5659200UL,6861600UL,8319500UL,10087100UL,
275   12230300UL,14828900UL,17979600UL,21799700UL,26431500UL,
276   32047300UL,38856400UL, /* 110 times that still fits into 32bits */
277 #ifdef LONG_IS_64BIT
278   47112200UL,57122100UL,69258800UL,83974200UL,101816200UL,
279   123449000UL,149678200UL,181480300UL,220039400UL,266791100UL,
280   323476100UL,392204900UL,475536500UL,576573500UL,699077800UL,
281   847610500UL,1027701900UL,1246057200UL,1510806400UL,1831806700UL,
282   2221009800UL,2692906700UL,3265067200UL,3958794400UL,4799917500UL
283 #endif
284 };
285 static const ulong TB1_for_stage[] = {
286  /* Start below the optimal B1 for finding factors which would just have been
287   * missed by pollardbrent(), and escalate, changing curves to give good
288   * coverage of the small factor ranges. Entries grow faster than what would
289   * be optimal but a table instead of a 2D array keeps the code simple */
290   500,520,560,620,700,800,900,1000,1150,1300,1450,1600,1800,2000,
291   2200,2450,2700,2950,3250,3600,4000,4400,4850,5300,5800,6400,
292   7100,7850,8700,9600,10600,11700,12900,14200,15700,17300,
293   19000,21000,23200,25500,28000,31000,34500UL,38500UL,43000UL,
294   48000UL,53800UL,60400UL,67750UL,76000UL,85300UL,95700UL,
295   107400UL,120500UL,135400UL,152000UL,170800UL,191800UL,215400UL,
296   241800UL,271400UL,304500UL,341500UL,383100UL,429700UL,481900UL,
297   540400UL,606000UL,679500UL,761800UL,854100UL,957500UL,1073500UL
298 };
299 
300 /* addition/doubling/multiplication of a point on an 'elliptic curve mod N'
301  * may result in one of three things:
302  * - a new bona fide point
303  * - a point at infinity (denominator divisible by N)
304  * - a point at infinity mod some p | N but finite mod q | N betraying itself
305  *   by a denominator which has nontrivial gcd with N.
306  *
307  * In the second case, addition/doubling aborts, copying one of the summands
308  * to the destination array of points unless they coincide.
309  * Multiplication will stop at some unpredictable intermediate stage:  The
310  * destination will contain _some_ multiple of the input point, but not
311  * necessarily the desired one, which doesn't matter.  As long as we're
312  * multiplying (B1 phase) we simply carry on with the next multiplier.
313  * During the B2 phase, the only additions are the giant steps, and the
314  * worst that can happen here is that we lose one residue class mod 210
315  * of prime multipliers on 4 of the curves, so again, we ignore the problem
316  * and just carry on.)
317  *
318  * Idea: select nbc curves mod N and one point P on each of them. For each
319  * such P, compute [M]P = Q where M is the product of all powers <= B2 of
320  * primes <= nextprime(B1). Then check whether [p]Q for p < nextprime(B2)
321  * betrays a factor. This second stage looks separately at the primes in
322  * each residue class mod 210, four curves at a time, and steps additively
323  * to ever larger multipliers, by comparing X coordinates of points which we
324  * would need to add in order to reach another prime multiplier in the same
325  * residue class. 'Comparing' means that we accumulate a product of
326  * differences of X coordinates, and from time to time take a gcd of this
327  * product with N. Montgomery's multi-inverse trick is used heavily. */
328 
329 /* *** auxiliary functions for ellfacteur: *** */
330 /* (Rx,Ry) <- (Px,Py)+(Qx,Qy) over Z/NZ, z=1/(Px-Qx). If Ry = NULL, don't set */
331 static void
FpE_add_i(GEN N,GEN z,GEN Px,GEN Py,GEN Qx,GEN Qy,GEN * Rx,GEN * Ry)332 FpE_add_i(GEN N, GEN z, GEN Px, GEN Py, GEN Qx, GEN Qy, GEN *Rx, GEN *Ry)
333 {
334   GEN slope = modii(mulii(subii(Py, Qy), z), N);
335   GEN t = subii(sqri(slope), addii(Qx, Px));
336   affii(modii(t, N), *Rx);
337   if (Ry) {
338     t = subii(mulii(slope, subii(Px, *Rx)), Py);
339     affii(modii(t, N), *Ry);
340   }
341 }
342 /* X -> Z; cannot add on one of the curves: make sure Z contains
343  * something useful before letting caller proceed */
344 static void
ZV_aff(long n,GEN * X,GEN * Z)345 ZV_aff(long n, GEN *X, GEN *Z)
346 {
347   if (X != Z) {
348     long k;
349     for (k = n; k--; ) affii(X[k],Z[k]);
350   }
351 }
352 
353 /* Parallel addition on nbc curves, assigning the result to locations at and
354  * following *X3, *Y3. (If Y-coords of result not desired, set Y=NULL.)
355  * Safe even if (X3,Y3) = (X2,Y2), _not_ if (X1,Y1). It is also safe to
356  * overwrite Y2 with X3. If nbc1 < nbc, the first summand is
357  * assumed to hold only nbc1 distinct points, repeated as often as we need
358  * them  (to add one point on each of a few curves to several other points on
359  * the same curves): only used with nbc1 = nbc or nbc1 = 4 | nbc.
360  *
361  * Return 0 [SUCCESS], 1 [N | den], 2 [gcd(den, N) is a factor of N, preserved
362  * in gl.
363  * Stack space is bounded by a constant multiple of lgefint(N)*nbc:
364  * - Phase 2 creates 12 items on the stack per iteration, of which 4 are twice
365  *   as long and 1 is thrice as long as N, i.e. 18 units per iteration.
366  * - Phase  1 creates 4 units.
367  * Total can be as large as 4*nbcmax + 18*8 units; ecm_elladd2() is
368  * just as bad, and elldouble() comes to 3*nbcmax + 29*8 units. */
369 static int
ecm_elladd0(GEN N,GEN * gl,long nbc,long nbc1,GEN * X1,GEN * Y1,GEN * X2,GEN * Y2,GEN * X3,GEN * Y3)370 ecm_elladd0(GEN N, GEN *gl, long nbc, long nbc1,
371             GEN *X1, GEN *Y1, GEN *X2, GEN *Y2, GEN *X3, GEN *Y3)
372 {
373   const ulong mask = (nbc1 == 4)? 3: ~0UL; /*nbc1 = 4 or nbc*/
374   GEN W[2*nbcmax], *A = W+nbc; /* W[0],A[0] unused */
375   long i;
376   pari_sp av = avma;
377 
378   W[1] = subii(X1[0], X2[0]);
379   for (i=1; i<nbc; i++)
380   { /*prepare for multi-inverse*/
381     A[i] = subii(X1[i&mask], X2[i]); /* don't waste time reducing mod N */
382     W[i+1] = modii(mulii(A[i], W[i]), N);
383   }
384   if (!invmod(W[nbc], N, gl))
385   {
386     if (!equalii(N,*gl)) return 2;
387     ZV_aff(nbc, X2,X3);
388     if (Y3) ZV_aff(nbc, Y2,Y3);
389     return gc_int(av,1);
390   }
391 
392   while (i--) /* nbc times */
393   {
394     pari_sp av2 = avma;
395     GEN Px = X1[i&mask], Py = Y1[i&mask], Qx = X2[i], Qy = Y2[i];
396     GEN z = i? mulii(*gl,W[i]): *gl; /*1/(Px-Qx)*/
397     FpE_add_i(N,z,  Px,Py,Qx,Qy, X3+i, Y3? Y3+i: NULL);
398     if (!i) break;
399     set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
400   }
401   return gc_int(av,0);
402 }
403 
404 /* Shortcut, for use in cases where Y coordinates follow their corresponding
405  * X coordinates, and first summand doesn't need to be repeated */
406 static int
ecm_elladd(GEN N,GEN * gl,long nbc,GEN * X1,GEN * X2,GEN * X3)407 ecm_elladd(GEN N, GEN *gl, long nbc, GEN *X1, GEN *X2, GEN *X3) {
408   return ecm_elladd0(N, gl, nbc, nbc, X1, X1+nbc, X2, X2+nbc, X3, X3+nbc);
409 }
410 
411 /* As ecm_elladd except it does twice as many additions (and hides even more
412  * of the cost of the modular inverse); the net effect is the same as
413  * ecm_elladd(nbc,X1,X2,X3) && ecm_elladd(nbc,X4,X5,X6). Safe to
414  * have X2=X3, X5=X6, or X1,X2 coincide with X4,X5 in any order. */
415 static int
ecm_elladd2(GEN N,GEN * gl,long nbc,GEN * X1,GEN * X2,GEN * X3,GEN * X4,GEN * X5,GEN * X6)416 ecm_elladd2(GEN N, GEN *gl, long nbc,
417             GEN *X1, GEN *X2, GEN *X3, GEN *X4, GEN *X5, GEN *X6)
418 {
419   GEN *Y1 = X1+nbc, *Y2 = X2+nbc, *Y3 = X3+nbc;
420   GEN *Y4 = X4+nbc, *Y5 = X5+nbc, *Y6 = X6+nbc;
421   GEN W[4*nbcmax], *A = W+2*nbc; /* W[0],A[0] unused */
422   long i, j;
423   pari_sp av = avma;
424 
425   W[1] = subii(X1[0], X2[0]);
426   for (i=1; i<nbc; i++)
427   {
428     A[i] = subii(X1[i], X2[i]); /* don't waste time reducing mod N here */
429     W[i+1] = modii(mulii(A[i], W[i]), N);
430   }
431   for (j=0; j<nbc; i++,j++)
432   {
433     A[i] = subii(X4[j], X5[j]);
434     W[i+1] = modii(mulii(A[i], W[i]), N);
435   }
436   if (!invmod(W[2*nbc], N, gl))
437   {
438     if (!equalii(N,*gl)) return 2;
439     ZV_aff(2*nbc, X2,X3); /* hack: 2*nbc => copy Y2->Y3 */
440     ZV_aff(2*nbc, X5,X6); /* also copy Y5->Y6 */
441     return gc_int(av,1);
442   }
443 
444   while (j--) /* nbc times */
445   {
446     pari_sp av2 = avma;
447     GEN Px = X4[j], Py = Y4[j], Qx = X5[j], Qy = Y5[j];
448     GEN z = mulii(*gl,W[--i]); /*1/(Px-Qx)*/
449     FpE_add_i(N,z, Px,Py, Qx,Qy, X6+j,Y6+j);
450     set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
451   }
452   while (i--) /* nbc times */
453   {
454     pari_sp av2 = avma;
455     GEN Px = X1[i], Py = Y1[i], Qx = X2[i], Qy = Y2[i];
456     GEN z = i? mulii(*gl, W[i]): *gl; /*1/(Px-Qx)*/
457     FpE_add_i(N,z, Px,Py, Qx,Qy, X3+i,Y3+i);
458     if (!i) break;
459     set_avma(av2); *gl = modii(mulii(*gl, A[i]), N);
460   }
461   return gc_int(av,0);
462 }
463 
464 /* Parallel doubling on nbc curves, assigning the result to locations at
465  * and following *X2.  Safe to be called with X2 equal to X1.  Return
466  * value as for ecm_elladd.  If we find a point at infinity mod N,
467  * and if X1 != X2, we copy the points at X1 to X2. */
468 static int
elldouble(GEN N,GEN * gl,long nbc,GEN * X1,GEN * X2)469 elldouble(GEN N, GEN *gl, long nbc, GEN *X1, GEN *X2)
470 {
471   GEN *Y1 = X1+nbc, *Y2 = X2+nbc;
472   GEN W[nbcmax+1]; /* W[0] unused */
473   long i;
474   pari_sp av = avma;
475   /*W[0] = gen_1;*/ W[1] = Y1[0];
476   for (i=1; i<nbc; i++) W[i+1] = modii(mulii(Y1[i], W[i]), N);
477   if (!invmod(W[nbc], N, gl))
478   {
479     if (!equalii(N,*gl)) return 2;
480     ZV_aff(2*nbc,X1,X2); /* also copies Y1->Y2 */
481     return gc_int(av,1);
482   }
483   while (i--) /* nbc times */
484   {
485     pari_sp av2;
486     GEN v, w, L, z = i? mulii(*gl,W[i]): *gl;
487     if (i) *gl = modii(mulii(*gl, Y1[i]), N);
488     av2 = avma;
489     L = modii(mulii(addui(1, mului(3, Fp_sqr(X1[i],N))), z), N);
490     if (signe(L)) /* half of zero is still zero */
491       L = shifti(mod2(L)? addii(L, N): L, -1);
492     v = modii(subii(sqri(L), shifti(X1[i],1)), N);
493     w = modii(subii(mulii(L, subii(X1[i], v)), Y1[i]), N);
494     affii(v, X2[i]);
495     affii(w, Y2[i]);
496     set_avma(av2);
497   }
498   return gc_int(av,0);
499 }
500 
501 /* Parallel multiplication by an odd prime k on nbc curves, storing the
502  * result to locations at and following *X2. Safe to be called with X2 = X1.
503  * Return values as ecm_elladd. Uses (a simplified variant of) Montgomery's
504  * PRAC algorithm; see ftp://ftp.cwi.nl/pub/pmontgom/Lucas.ps.gz .
505  * With thanks to Paul Zimmermann for the reference.  --GN1998Aug13 */
506 static int
get_rule(ulong d,ulong e)507 get_rule(ulong d, ulong e)
508 {
509   if (d <= e + (e>>2)) /* floor(1.25*e) */
510   {
511     if ((d+e)%3 == 0) return 0; /* rule 1 */
512     if ((d-e)%6 == 0) return 1;  /* rule 2 */
513   }
514   /* d <= 4*e but no ofl */
515   if ((d+3)>>2 <= e) return 2; /* rule 3, common case */
516   if ((d&1)==(e&1))  return 1; /* rule 4 = rule 2 */
517   if (!(d&1))        return 3; /* rule 5 */
518   if (d%3 == 0)      return 4; /* rule 6 */
519   if ((d+e)%3 == 0)  return 5; /* rule 7 */
520   if ((d-e)%3 == 0)  return 6; /* rule 8 */
521   /* when we get here, e is even, otherwise one of rules 4,5 would apply */
522   return 7; /* rule 9 */
523 }
524 
525 /* PRAC implementation notes - main changes against the paper version:
526  * (1) The general function [m+n]P = f([m]P,[n]P,[m-n]P) collapses (for m!=n)
527  * to an ecm_elladd() which does not depend on the third argument; thus
528  * references to the third variable (C in the paper) can be eliminated.
529  * (2) Since our multipliers are prime, the outer loop of the paper
530  * version executes only once, and thus is invisible above.
531  * (3) The first step in the inner loop of the paper version will always be
532  * rule 3, but the addition requested by this rule amounts to a doubling, and
533  * will always be followed by a swap, so we have unrolled this first iteration.
534  * (4) Simplifications in rules 6 and 7 are possible given the above, and we
535  * save one addition in each of the two cases.  NB none of the other
536  * ecm_elladd()s in the loop can ever degenerate into an elldouble.
537  * (5) I tried to optimize for rule 3, which is used more frequently than all
538  * others together, but it didn't improve things, so I removed the nested
539  * tight loop again.  --GN */
540 /* The main loop body of ellfacteur() runs _slower_ under PRAC than under a
541  * straightforward left-shift binary multiplication when N has <30 digits and
542  * B1 is small;  PRAC wins when N and B1 get larger.  Weird. --GN */
543 /* k>2 assumed prime, XAUX = scratchpad */
544 static int
ellmult(GEN N,GEN * gl,long nbc,ulong k,GEN * X1,GEN * X2,GEN * XAUX)545 ellmult(GEN N, GEN *gl, long nbc, ulong k, GEN *X1, GEN *X2, GEN *XAUX)
546 {
547   ulong r, d, e, e1;
548   int res;
549   GEN *A = X2, *B = XAUX, *T = XAUX + 2*nbc;
550 
551   ZV_aff(2*nbc,X1,XAUX);
552   /* first doubling picks up X1;  after this we'll be working in XAUX and
553    * X2 only, mostly via A and B and T */
554   if ((res = elldouble(N, gl, nbc, X1, X2)) != 0) return res;
555 
556   /* split the work at the golden ratio */
557   r = (ulong)(k*0.61803398875 + .5);
558   d = k - r;
559   e = r - d; /* d+e == r, so no danger of ofl below */
560   while (d != e)
561   { /* apply one of the nine transformations from PM's Table 4. */
562     switch(get_rule(d,e))
563     {
564     case 0: /* rule 1 */
565       if ( (res = ecm_elladd(N, gl, nbc, A, B, T)) ) return res;
566       if ( (res = ecm_elladd2(N, gl, nbc, T, A, A, T, B, B)) != 0) return res;
567       e1 = d - e; d = (d + e1)/3; e = (e - e1)/3; break;
568     case 1: /* rules 2 and 4 */
569       if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
570       if ( (res = elldouble(N, gl, nbc, A, A)) ) return res;
571       d = (d-e)>>1; break;
572     case 3: /* rule 5 */
573       if ( (res = elldouble(N, gl, nbc, A, A)) ) return res;
574       d >>= 1; break;
575     case 4: /* rule 6 */
576       if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
577       if ( (res = ecm_elladd(N, gl, nbc, T, A, A)) ) return res;
578       if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
579       d = d/3 - e; break;
580     case 2: /* rule 3 */
581       if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
582       d -= e; break;
583     case 5: /* rule 7 */
584       if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
585       if ( (res = ecm_elladd2(N, gl, nbc, T, A, A, T, B, B)) != 0) return res;
586       d = (d - 2*e)/3; break;
587     case 6: /* rule 8 */
588       if ( (res = ecm_elladd(N, gl, nbc, A, B, B)) ) return res;
589       if ( (res = elldouble(N, gl, nbc, A, T)) ) return res;
590       if ( (res = ecm_elladd(N, gl, nbc, T, A, A)) ) return res;
591       d = (d - e)/3; break;
592     case 7: /* rule 9 */
593       if ( (res = elldouble(N, gl, nbc, B, B)) ) return res;
594       e >>= 1; break;
595     }
596     /* swap d <-> e and A <-> B if necessary */
597     if (d < e) { lswap(d,e); pswap(A,B); }
598   }
599   return ecm_elladd(N, gl, nbc, XAUX, X2, X2);
600 }
601 
602 struct ECM {
603   pari_timer T;
604   long nbc, nbc2, seed;
605   GEN *X, *XAUX, *XT, *XD, *XB, *XB2, *XH, *Xh, *Yh;
606 };
607 
608 /* memory layout in ellfacteur():  a large array of GEN pointers, and one
609  * huge chunk of memory containing all the actual GEN (t_INT) objects.
610  * nbc is constant throughout the invocation:
611  * - The B1 stage of each iteration through the main loop needs little
612  * space:  enough for the X and Y coordinates of the current points,
613  * and twice as much again as scratchpad for ellmult().
614  * - The B2 stage, starting from some current set of points Q, needs, in
615  * succession:
616  *   + space for [2]Q, [4]Q, ..., [10]Q, and [p]Q for building the helix;
617  *   + space for 48*nbc X and Y coordinates to hold the helix.  This could
618  *   re-use [2]Q,...,[8]Q, but only with difficulty, since we don't
619  *   know in advance which residue class mod 210 our p is going to be in.
620  *   It can and should re-use [p]Q, though;
621  *   + space for (temporarily [30]Q and then) [210]Q, [420]Q, and several
622  *   further doublings until the giant step multiplier is reached.  This
623  *   can re-use the remaining cells from above.  The computation of [210]Q
624  *   will have been the last call to ellmult() within this iteration of the
625  *   main loop, so the scratchpad is now also free to be re-used. We also
626  *   compute [630]Q by a parallel addition;  we'll need it later to get the
627  *   baby-step table bootstrapped a little faster.
628  *   + Finally, for no more than 4 curves at a time, room for up to 1024 X
629  *   coordinates only: the Y coordinates needed whilst setting up this baby
630  *   step table are temporarily stored in the upper half, and overwritten
631  *   during the last series of additions.
632  *
633  * Graphically:  after end of B1 stage (X,Y are the coords of Q):
634  * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
635  * | X Y |  scratch  | [2]Q| [4]Q| [6]Q| [8]Q|[10]Q|    ...    | ...
636  * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
637  * *X    *XAUX *XT   *XD                                       *XB
638  *
639  * [30]Q is computed from [10]Q.  [210]Q can go into XY, etc:
640  * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
641  * |[210]|[420]|[630]|[840]|[1680,3360,6720,...,2048*210]      |bstp table...
642  * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--
643  * *X    *XAUX *XT   *XD      [*XG, somewhere here]            *XB .... *XH
644  *
645  * So we need (13 + 48) * 2 * nbc slots here + 4096 slots for the baby step
646  * table (not all of which will be used when we start with a small B1, but
647  * better to allocate and initialize ahead of time all the slots that might
648  * be needed later).
649  *
650  * Note on memory locality:  During the B2 phase, accesses to the helix
651  * (once it is set up) will be clustered by curves (4 out of nbc at a time).
652  * Accesses to the baby steps table will wander from one end of the array to
653  * the other and back, one such cycle per giant step, and during a full cycle
654  * we would expect on the order of 2E4 accesses when using the largest giant
655  * step size.  Thus we shouldn't be doing too bad with respect to thrashing
656  * a 512KBy L2 cache.  However, we don't want the baby step table to grow
657  * larger than this, even if it would reduce the number of EC operations by a
658  * few more per cent for very large B2, lest cache thrashing slow down
659  * everything disproportionally. --GN */
660 /* Auxiliary routines need < (3*nbc+240)*lN words on the PARI stack, in
661  * addition to the spc*(lN+1) words occupied by our main table. */
662 static void
ECM_alloc(struct ECM * E,long lN)663 ECM_alloc(struct ECM *E, long lN)
664 {
665   const long bstpmax = 1024; /* max number of baby step table entries */
666   long spc = (13 + 48) * E->nbc2 + bstpmax * 4;
667   long len = spc + 385 + spc*lN;
668   long i, tw = evallg(lN) | evaltyp(t_INT);
669   GEN w, *X = (GEN*)new_chunk(len);
670   /* hack for X[i] = cgeti(lN). X = current point in B1 phase */
671   w = (GEN)(X + spc + 385);
672   for (i = spc-1; i >= 0; i--) { X[i] = w; *w = tw; w += lN; }
673   E->X = X;
674   E->XAUX = E->X    + E->nbc2; /* scratchpad for ellmult() */
675   E->XT   = E->XAUX + E->nbc2; /* ditto, will later hold [3*210]Q */
676   E->XD   = E->XT   + E->nbc2; /* room for various multiples */
677   E->XB   = E->XD   + 10*E->nbc2; /* start of baby steps table */
678   E->XB2  = E->XB   + 2 * bstpmax; /* middle of baby steps table */
679   E->XH   = E->XB2  + 2 * bstpmax; /* end of bstps table, start of helix */
680   E->Xh   = E->XH   + 48*E->nbc2; /* little helix, X coords */
681   E->Yh   = E->XH   + 192;     /* ditto, Y coords */
682   /* XG,YG set inside the main loop, since they depend on B2 */
683   /* E.Xh range of 384 pointers not set; these will later duplicate the pointers
684    * in the E.XH range, 4 curves at a time. Some of the cells reserved here for
685    * the E.XB range will never be used, instead, we'll warp the pointers to
686    * connect to (read-only) GENs in the X/E.XD range */
687 }
688 /* N.B. E->seed is not initialized here */
689 static void
ECM_init(struct ECM * E,GEN N,long nbc)690 ECM_init(struct ECM *E, GEN N, long nbc)
691 {
692   if (nbc < 0)
693   { /* choose a sensible default */
694     const long size = expi(N) + 1;
695     nbc = ((size >> 3) << 2) - 80;
696     if (nbc < 8) nbc = 8;
697   }
698   if (nbc > nbcmax) nbc = nbcmax;
699   E->nbc = nbc;
700   E->nbc2 = nbc << 1;
701   ECM_alloc(E, lgefint(N));
702 }
703 
704 static GEN
ECM_loop(struct ECM * E,GEN N,ulong B1)705 ECM_loop(struct ECM *E, GEN N, ulong B1)
706 {
707   const ulong B2 = 110 * B1, B2_rt = usqrt(B2);
708   const ulong nbc = E->nbc, nbc2 = E->nbc2;
709   pari_sp av1, avtmp;
710   byteptr d0, d = diffptr;
711   long i, gse, gss, bstp, bstp0, rcn0, rcn;
712   ulong B2_p, m, p, p0;
713   GEN g, *XG, *YG;
714   GEN *X = E->X, *XAUX = E->XAUX, *XT = E->XT, *XD = E->XD;
715   GEN *XB = E->XB, *XB2 = E->XB2, *XH = E->XH, *Xh = E->Xh, *Yh = E->Yh;
716   /* pick curves */
717   for (i = nbc2; i--; ) affui(E->seed++, X[i]);
718   /* pick giant step exponent and size */
719   gse = B1 < 656
720           ? (B1 < 200? 5: 6)
721           : (B1 < 10500
722             ? (B1 < 2625? 7: 8)
723             : (B1 < 42000? 9: 10));
724   gss = 1UL << gse;
725   /* With 32 baby steps, a giant step corresponds to 32*420 = 13440,
726    * appropriate for the smallest B2s. With 1024, a giant step will be 430080;
727    * appropriate for B1 >~ 42000, where 512 baby steps would imply roughly
728    * the same number of curve additions. */
729   XG = XT + gse*nbc2; /* will later hold [2^(gse+1)*210]Q */
730   YG = XG + nbc;
731 
732   if (DEBUGLEVEL >= 4) {
733     err_printf("ECM: time = %6ld ms\nECM: B1 = %4lu,", timer_delay(&E->T), B1);
734     err_printf("\tB2 = %6lu,\tgss = %4ld*420\n", B2, gss);
735   }
736   p = 0;
737   NEXT_PRIME_VIADIFF(p,d);
738 
739   /* ---B1 PHASE--- */
740   /* treat p=2 separately */
741   B2_p = B2 >> 1;
742   for (m=1; m<=B2_p; m<<=1)
743   {
744     int fl = elldouble(N, &g, nbc, X, X);
745     if (fl > 1) return g; else if (fl) break;
746   }
747   rcn = NPRC; /* multipliers begin at the beginning */
748   /* p=3,...,nextprime(B1) */
749   while (p < B1 && p <= B2_rt)
750   {
751     pari_sp av2 = avma;
752     p = snextpr(p, &d, &rcn, NULL, uisprime);
753     B2_p = B2/p; /* beware integer overflow on 32-bit CPUs */
754     for (m=1; m<=B2_p; m*=p)
755     {
756       int fl = ellmult(N, &g, nbc, p, X, X, XAUX);
757       if (fl > 1) return g; else if (fl) break;
758       set_avma(av2);
759     }
760     set_avma(av2);
761   }
762   /* primes p larger than sqrt(B2) appear only to the 1st power */
763   while (p < B1)
764   {
765     pari_sp av2 = avma;
766     p = snextpr(p, &d, &rcn, NULL, uisprime);
767     if (ellmult(N, &g, nbc, p, X, X, XAUX) > 1) return g;
768     set_avma(av2);
769   }
770   if (DEBUGLEVEL >= 4) {
771     err_printf("ECM: time = %6ld ms, B1 phase done, ", timer_delay(&E->T));
772     err_printf("p = %lu, setting up for B2\n", p);
773   }
774 
775   /* ---B2 PHASE--- */
776   /* compute [2]Q,...,[10]Q, needed to build the helix */
777   if (elldouble(N, &g, nbc, X, XD) > 1) return g; /*[2]Q*/
778   if (elldouble(N, &g, nbc, XD, XD + nbc2) > 1) return g; /*[4]Q*/
779   if (ecm_elladd(N, &g, nbc,
780         XD, XD + nbc2, XD + (nbc<<2)) > 1) return g; /* [6]Q */
781   if (ecm_elladd2(N, &g, nbc,
782         XD, XD + (nbc<<2), XT + (nbc<<3),
783         XD + nbc2, XD + (nbc<<2), XD + (nbc<<3)) > 1)
784     return g; /* [8]Q and [10]Q */
785   if (DEBUGLEVEL >= 7) err_printf("\t(got [2]Q...[10]Q)\n");
786 
787   /* get next prime (still using the foolproof test) */
788   p = snextpr(p, &d, &rcn, NULL, uisprime);
789   /* make sure we have the residue class number (mod 210) */
790   if (rcn == NPRC)
791   {
792     rcn = prc210_no[(p % 210) >> 1];
793     if (rcn == NPRC)
794     {
795       err_printf("ECM: %lu should have been prime but isn\'t\n", p);
796       pari_err_BUG("ellfacteur");
797     }
798   }
799 
800   /* compute [p]Q and put it into its place in the helix */
801   if (ellmult(N, &g, nbc, p, X, XH + rcn*nbc2, XAUX) > 1)
802     return g;
803   if (DEBUGLEVEL >= 7)
804     err_printf("\t(got [p]Q, p = %lu = prc210_rp[%ld] mod 210)\n", p, rcn);
805 
806   /* save current p, d, and rcn;  we'll need them more than once below */
807   p0 = p;
808   d0 = d;
809   rcn0 = rcn; /* remember where the helix wraps */
810   bstp0 = 0; /* p is at baby-step offset 0 from itself */
811 
812   /* fill up the helix, stepping forward through the prime residue classes
813    * mod 210 until we're back at the r'class of p0.  Keep updating p so
814    * that we can print meaningful diagnostics if a factor shows up; don't
815    * bother checking which of these p's are in fact prime */
816   for (i = 47; i; i--) /* 47 iterations */
817   {
818     ulong dp = (ulong)prc210_d1[rcn];
819     p += dp;
820     if (rcn == 47)
821     { /* wrap mod 210 */
822       if (ecm_elladd(N, &g, nbc, XT+dp*nbc, XH+rcn*nbc2, XH) > 1) return g;
823       rcn = 0; continue;
824     }
825     if (ecm_elladd(N, &g, nbc, XT+dp*nbc, XH+rcn*nbc2, XH+rcn*nbc2+nbc2) > 1)
826       return g;
827     rcn++;
828   }
829   if (DEBUGLEVEL >= 7) err_printf("\t(got initial helix)\n");
830   /* compute [210]Q etc, needed for the baby step table */
831   if (ellmult(N, &g, nbc, 3, XD + (nbc<<3), X, XAUX) > 1) return g;
832   if (ellmult(N, &g, nbc, 7, X, X, XAUX) > 1) return g; /* [210]Q */
833   /* this was the last call to ellmult() in the main loop body; may now
834    * overwrite XAUX and slots XD and following */
835   if (elldouble(N, &g, nbc, X, XAUX) > 1) return g; /* [420]Q */
836   if (ecm_elladd(N, &g, nbc, X, XAUX, XT) > 1) return g;/*[630]Q*/
837   if (ecm_elladd(N, &g, nbc, X, XT, XD) > 1) return g;  /*[840]Q*/
838   for (i=1; i <= gse; i++)
839     if (elldouble(N, &g, nbc, XT + i*nbc2, XD + i*nbc2) > 1) return g;
840   /* (the last iteration has initialized XG to [210*2^(gse+1)]Q) */
841 
842   if (DEBUGLEVEL >= 4)
843     err_printf("ECM: time = %6ld ms, entering B2 phase, p = %lu\n",
844                timer_delay(&E->T), p);
845 
846   for (i = nbc - 4; i >= 0; i -= 4)
847   { /* loop over small sets of 4 curves at a time */
848     GEN *Xb;
849     long j, k;
850     if (DEBUGLEVEL >= 6)
851       err_printf("ECM: finishing curves %ld...%ld\n", i, i+3);
852     /* Copy relevant pointers from XH to Xh. Memory layout in XH:
853      * nbc X coordinates, nbc Y coordinates for residue class
854      * 1 mod 210, then the same for r.c. 11 mod 210, etc. Memory layout for
855      * Xh is: four X coords for 1 mod 210, four for 11 mod 210, ..., four
856      * for 209 mod 210, then the corresponding Y coordinates in the same
857      * order. This allows a giant step on Xh using just three calls to
858      * ecm_elladd0() each acting on 64 points in parallel */
859     for (j = 48; j--; )
860     {
861       k = nbc2*j + i;
862       m = j << 2; /* X coordinates */
863       Xh[m]   = XH[k];   Xh[m+1] = XH[k+1];
864       Xh[m+2] = XH[k+2]; Xh[m+3] = XH[k+3];
865       k += nbc; /* Y coordinates */
866       Yh[m]   = XH[k];   Yh[m+1] = XH[k+1];
867       Yh[m+2] = XH[k+2]; Yh[m+3] = XH[k+3];
868     }
869     /* Build baby step table of X coords of multiples of [210]Q.  XB[4*j]
870      * will point at X coords on four curves from [(j+1)*210]Q.  Until
871      * we're done, we need some Y coords as well, which we keep in the
872      * second half of the table, overwriting them at the end when gse=10.
873      * Multiples which we already have  (by 1,2,3,4,8,16,...,2^gse) are
874      * entered simply by copying the pointers, ignoring the few slots in w
875      * that were initially reserved for them. Here are the initial entries */
876     for (Xb=XB,k=2,j=i; k--; Xb=XB2,j+=nbc) /* first X, then Y coords */
877     {
878       Xb[0]  = X[j];      Xb[1]  = X[j+1]; /* [210]Q */
879       Xb[2]  = X[j+2];    Xb[3]  = X[j+3];
880       Xb[4]  = XAUX[j];   Xb[5]  = XAUX[j+1]; /* [420]Q */
881       Xb[6]  = XAUX[j+2]; Xb[7]  = XAUX[j+3];
882       Xb[8]  = XT[j];     Xb[9]  = XT[j+1]; /* [630]Q */
883       Xb[10] = XT[j+2];   Xb[11] = XT[j+3];
884       Xb += 4; /* points at [420]Q */
885       /* ... entries at powers of 2 times 210 .... */
886       for (m = 2; m < (ulong)gse+k; m++) /* omit Y coords of [2^gse*210]Q */
887       {
888         long m2 = m*nbc2 + j;
889         Xb += (2UL<<m); /* points at [2^m*210]Q */
890         Xb[0] = XAUX[m2];   Xb[1] = XAUX[m2+1];
891         Xb[2] = XAUX[m2+2]; Xb[3] = XAUX[m2+3];
892       }
893     }
894     if (DEBUGLEVEL >= 7)
895       err_printf("\t(extracted precomputed helix / baby step entries)\n");
896     /* ... glue in between, up to 16*210 ... */
897     if (ecm_elladd0(N, &g, 12, 4, /* 12 pts + (4 pts replicated thrice) */
898           XB + 12, XB2 + 12,
899           XB,      XB2,
900           XB + 16, XB2 + 16) > 1) return g; /*4+{1,2,3} = {5,6,7}*/
901     if (ecm_elladd0(N, &g, 28, 4, /* 28 pts + (4 pts replicated 7fold) */
902           XB + 28, XB2 + 28,
903           XB,      XB2,
904           XB + 32, XB2 + 32) > 1) return g;/*8+{1...7} = {9...15}*/
905     /* ... and the remainder of the lot */
906     for (m = 5; m <= (ulong)gse; m++)
907     { /* fill in from 2^(m-1)+1 to 2^m-1 in chunks of 64 and 60 points */
908       ulong m2 = 2UL << m; /* will point at 2^(m-1)+1 */
909       for (j = 0; (ulong)j < m2-64; j+=64) /* executed 0 times when m = 5 */
910       {
911         if (ecm_elladd0(N, &g, 64, 4,
912               XB + m2-4, XB2 + m2-4,
913               XB + j,    XB2 + j,
914               XB + m2+j, (m<(ulong)gse? XB2+m2+j: NULL)) > 1)
915           return g;
916       } /* j = m2-64 here, 60 points left */
917       if (ecm_elladd0(N, &g, 60, 4,
918             XB + m2-4, XB2 + m2-4,
919             XB + j,    XB2 + j,
920             XB + m2+j, (m<(ulong)gse? XB2+m2+j: NULL)) > 1)
921         return g;
922       /* when m=gse, drop Y coords of result, and when both equal 1024,
923        * overwrite Y coords of second argument with X coords of result */
924     }
925     if (DEBUGLEVEL >= 7) err_printf("\t(baby step table complete)\n");
926     /* initialize a few other things */
927     bstp = bstp0;
928     p = p0; d = d0; rcn = rcn0;
929     g = gen_1; av1 = avma;
930     /* scratchspace for prod (x_i-x_j) */
931     avtmp = (pari_sp)new_chunk(8 * lgefint(N));
932     /* The correct entry in XB to use depends on bstp and on where we are
933      * on the helix. As we skip from prime to prime, bstp is incremented
934      * by snextpr each time we wrap around through residue class number 0
935      * (1 mod 210), but the baby step should not be taken until rcn>=rcn0,
936      * i.e. until we pass again the residue class of p0.
937      *
938      * The correct signed multiplier is thus k = bstp - (rcn < rcn0),
939      * and the offset from XB is four times (|k| - 1).  When k=0, we ignore
940      * the current prime: if it had led to a factorization, this
941      * would have been noted during the last giant step, or -- when we
942      * first get here -- whilst initializing the helix.  When k > gss,
943      * we must do a giant step and bump bstp back by -2*gss.
944      *
945      * The gcd of the product of X coord differences against N is taken just
946      * before we do a giant step. */
947     while (p < B2)
948     {/* loop over probable primes p0 < p <= nextprime(B2), inserting giant
949       * steps as necessary */
950       p = snextpr(p, &d, &rcn, &bstp, uis2psp); /* next probable prime */
951       /* work out the corresponding baby-step multiplier */
952       k = bstp - (rcn < rcn0 ? 1 : 0);
953       if (k > gss)
954       { /* giant-step time, take gcd */
955         g = gcdii(g, N);
956         if (!is_pm1(g) && !equalii(g, N)) return g;
957         g = gen_1; set_avma(av1);
958         while (k > gss)
959         { /* giant step */
960           if (DEBUGLEVEL >= 7) err_printf("\t(giant step at p = %lu)\n", p);
961           if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
962                 Xh, Yh, Xh, Yh) > 1) return g;
963           if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
964                 Xh + 64, Yh + 64, Xh + 64, Yh + 64) > 1)
965             return g;
966           if (ecm_elladd0(N, &g, 64, 4, XG + i, YG + i,
967                 Xh + 128, Yh + 128, Xh + 128, Yh + 128) > 1)
968             return g;
969           bstp -= (gss << 1);
970           k = bstp - (rcn < rcn0? 1: 0); /* recompute multiplier */
971         }
972       }
973       if (!k) continue; /* point of interest is already in Xh */
974       if (k < 0) k = -k;
975       m = ((ulong)k - 1) << 2;
976       /* accumulate product of differences of X coordinates */
977       j = rcn<<2;
978       avma = avtmp; /* go to garbage zone; don't use set_avma */
979       g = modii(mulii(g, subii(XB[m],   Xh[j])), N);
980       g = modii(mulii(g, subii(XB[m+1], Xh[j+1])), N);
981       g = modii(mulii(g, subii(XB[m+2], Xh[j+2])), N);
982       g = mulii(g, subii(XB[m+3], Xh[j+3]));
983       set_avma(av1);
984       g = modii(g, N);
985     }
986     set_avma(av1);
987   }
988   return NULL;
989 }
990 
991 /* ellfacteur() tuned to be useful as a first stage before MPQS, especially for
992  * large arguments, when 'insist' is false, and now also for the case when
993  * 'insist' is true, vaguely following suggestions by Paul Zimmermann
994  * (http://www.loria.fr/~zimmerma/records/ecmnet.html). --GN 1998Jul,Aug */
995 static GEN
ellfacteur(GEN N,int insist)996 ellfacteur(GEN N, int insist)
997 {
998   const long size = expi(N) + 1;
999   pari_sp av = avma;
1000   struct ECM E;
1001   long nbc, dsn, dsnmax, rep = 0;
1002   if (insist)
1003   {
1004     const long DSNMAX = numberof(TB1)-1;
1005     dsnmax = (size >> 2) - 10;
1006     if (dsnmax < 0) dsnmax = 0;
1007     else if (dsnmax > DSNMAX) dsnmax = DSNMAX;
1008     E.seed = 1 + (nbcmax<<7)*(size&0xffff); /* seed for choice of curves */
1009 
1010     dsn = (size >> 3) - 5;
1011     if (dsn < 0) dsn = 0; else if (dsn > 47) dsn = 47;
1012     /* pick up the torch where noninsistent stage would have given up */
1013     nbc = dsn + (dsn >> 2) + 9; /* 8 or more curves in parallel */
1014     nbc &= ~3; /* 4 | nbc */
1015   }
1016   else
1017   {
1018     dsn = (size - 140) >> 3;
1019     if (dsn < 0)
1020     {
1021 #ifndef __EMX__ /* unless DOS/EMX: MPQS's disk access is abysmally slow */
1022       if (DEBUGLEVEL >= 4)
1023         err_printf("ECM: number too small to justify this stage\n");
1024       return NULL; /* too small, decline the task */
1025 #endif
1026       dsn = 0;
1027     } else if (dsn > 12) dsn = 12;
1028     rep = (size <= 248 ?
1029            (size <= 176 ? (size - 124) >> 4 : (size - 148) >> 3) :
1030            (size - 224) >> 1);
1031 #ifdef __EMX__ /* DOS/EMX: extra rounds (shun MPQS) */
1032     rep += 20;
1033 #endif
1034     dsnmax = 72;
1035     /* Use disjoint sets of curves for non-insist and insist phases; moreover,
1036      * repeated calls acting on factors of the same original number should try
1037      * to use fresh curves. The following achieves this */
1038     E.seed = 1 + (nbcmax<<3)*(size & 0xf);
1039     nbc = -1;
1040   }
1041   ECM_init(&E, N, nbc);
1042   if (DEBUGLEVEL >= 4)
1043   {
1044     timer_start(&E.T);
1045     err_printf("ECM: working on %ld curves at a time; initializing", E.nbc);
1046     if (!insist)
1047     {
1048       if (rep == 1) err_printf(" for one round");
1049       else          err_printf(" for up to %ld rounds", rep);
1050     }
1051     err_printf("...\n");
1052   }
1053   if (dsn > dsnmax) dsn = dsnmax;
1054   for(;;)
1055   {
1056     ulong B1 = insist? TB1[dsn]: TB1_for_stage[dsn];
1057     GEN g = ECM_loop(&E, N, B1);
1058     if (g)
1059     {
1060       if (DEBUGLEVEL >= 4)
1061         err_printf("ECM: time = %6ld ms\n\tfound factor = %Ps\n",
1062                    timer_delay(&E.T), g);
1063       return gerepilecopy(av, g);
1064     }
1065     if (dsn < dsnmax)
1066     {
1067       if (insist) dsn++;
1068       else { dsn += 2; if (dsn > dsnmax) dsn = dsnmax; }
1069     }
1070     if (!insist && !--rep)
1071     {
1072       if (DEBUGLEVEL >= 4)
1073         err_printf("ECM: time = %6ld ms,\tellfacteur giving up.\n",
1074                    timer_delay(&E.T));
1075       return gc_NULL(av);
1076     }
1077   }
1078 }
1079 /* assume rounds >= 1, seed >= 1, B1 <= ULONG_MAX / 110 */
1080 GEN
Z_ECM(GEN N,long rounds,long seed,ulong B1)1081 Z_ECM(GEN N, long rounds, long seed, ulong B1)
1082 {
1083   pari_sp av = avma;
1084   struct ECM E;
1085   long i;
1086   E.seed = seed;
1087   ECM_init(&E, N, -1);
1088   if (DEBUGLEVEL >= 4) timer_start(&E.T);
1089   for (i = rounds; i--; )
1090   {
1091     GEN g = ECM_loop(&E, N, B1);
1092     if (g) return gerepilecopy(av, g);
1093   }
1094   return gc_NULL(av);
1095 }
1096 
1097 /***********************************************************************/
1098 /**                                                                   **/
1099 /**                FACTORIZATION (Pollard-Brent rho) --GN1998Jun18-26 **/
1100 /**  pollardbrent() returns a nontrivial factor of n, assuming n is   **/
1101 /**  composite and has no small prime divisor, or NULL if going on    **/
1102 /**  would take more time than we want to spend.  Sometimes it finds  **/
1103 /**  more than one factor, and returns a structure suitable for       **/
1104 /**  interpretation by ifac_crack. (Cf Algo 8.5.2 in ACiCNT)          **/
1105 /**                                                                   **/
1106 /***********************************************************************/
1107 #define VALUE(x) gel(x,0)
1108 #define EXPON(x) gel(x,1)
1109 #define CLASS(x) gel(x,2)
1110 
1111 INLINE void
INIT(GEN x,GEN v,GEN e,GEN c)1112 INIT(GEN x, GEN v, GEN e, GEN c) {
1113   VALUE(x) = v;
1114   EXPON(x) = e;
1115   CLASS(x) = c;
1116 }
1117 static void
ifac_delete(GEN x)1118 ifac_delete(GEN x) { INIT(x,NULL,NULL,NULL); }
1119 
1120 static void
rho_dbg(pari_timer * T,long c,long msg_mask)1121 rho_dbg(pari_timer *T, long c, long msg_mask)
1122 {
1123   if (c & msg_mask) return;
1124   err_printf("Rho: time = %6ld ms,\t%3ld round%s\n",
1125              timer_delay(T), c, (c==1?"":"s"));
1126 }
1127 
1128 static void
one_iter(GEN * x,GEN * P,GEN x1,GEN n,long delta)1129 one_iter(GEN *x, GEN *P, GEN x1, GEN n, long delta)
1130 {
1131   *x = addis(remii(sqri(*x), n), delta);
1132   *P = modii(mulii(*P, subii(x1, *x)), n);
1133 }
1134 /* Return NULL when we run out of time, or a single t_INT containing a
1135  * nontrivial factor of n, or a vector of t_INTs, each triple of successive
1136  * entries containing a factor, an exponent (equal to one),  and a factor
1137  * class (NULL for unknown or zero for known composite),  matching the
1138  * internal representation used by the ifac_*() routines below. Repeated
1139  * factors may arise; the caller will sort the factors anyway. Result
1140  * is not gerepile-able (contains NULL) */
1141 static GEN
pollardbrent_i(GEN n,long size,long c0,long retries)1142 pollardbrent_i(GEN n, long size, long c0, long retries)
1143 {
1144   long tf = lgefint(n), delta, msg_mask, c, k, k1, l;
1145   pari_sp av;
1146   GEN x, x1, y, P, g, g1, res;
1147   pari_timer T;
1148 
1149   if (DEBUGLEVEL >= 4) timer_start(&T);
1150   c = c0 << 5; /* 2^5 iterations per round */
1151   msg_mask = (size >= 448? 0x1fff:
1152                            (size >= 192? (256L<<((size-128)>>6))-1: 0xff));
1153   y = cgeti(tf);
1154   x1= cgeti(tf);
1155   av = avma;
1156 
1157 PB_RETRY:
1158  /* trick to make a 'random' choice determined by n.  Don't use x^2+0 or
1159   * x^2-2, ever.  Don't use x^2-3 or x^2-7 with a starting value of 2.
1160   * x^2+4, x^2+9 are affine conjugate to x^2+1, so don't use them either.
1161   *
1162   * (the point being that when we get called again on a composite cofactor
1163   * of something we've already seen, we had better avoid the same delta) */
1164   switch ((size + retries) & 7)
1165   {
1166     case 0:  delta=  1; break;
1167     case 1:  delta= -1; break;
1168     case 2:  delta=  3; break;
1169     case 3:  delta=  5; break;
1170     case 4:  delta= -5; break;
1171     case 5:  delta=  7; break;
1172     case 6:  delta= 11; break;
1173     /* case 7: */
1174     default: delta=-11; break;
1175   }
1176   if (DEBUGLEVEL >= 4)
1177   {
1178     if (!retries)
1179       err_printf("Rho: searching small factor of %ld-bit integer\n", size);
1180     else
1181       err_printf("Rho: restarting for remaining rounds...\n");
1182     err_printf("Rho: using X^2%+1ld for up to %ld rounds of 32 iterations\n",
1183                delta, c >> 5);
1184   }
1185   x = gen_2; P = gen_1; g1 = NULL; k = 1; l = 1;
1186   affui(2, y);
1187   affui(2, x1);
1188   for (;;) /* terminated under the control of c */
1189   { /* use the polynomial  x^2 + delta */
1190     one_iter(&x, &P, x1, n, delta);
1191 
1192     if ((--c & 0x1f)==0)
1193     { /* one round complete */
1194       g = gcdii(n, P); if (!is_pm1(g)) goto fin;
1195       if (c <= 0)
1196       { /* getting bored */
1197         if (DEBUGLEVEL >= 4)
1198           err_printf("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1199                      timer_delay(&T));
1200         return NULL;
1201       }
1202       P = gen_1;
1203       if (DEBUGLEVEL >= 4) rho_dbg(&T, c0-(c>>5), msg_mask);
1204       affii(x,y); x = y; set_avma(av);
1205     }
1206 
1207     if (--k) continue; /* normal end of loop body */
1208 
1209     if (c & 0x1f) /* otherwise, we already checked */
1210     {
1211       g = gcdii(n, P); if (!is_pm1(g)) goto fin;
1212       P = gen_1;
1213     }
1214 
1215    /* Fast forward phase, doing l inner iterations without computing gcds.
1216     * Check first whether it would take us beyond the alloted time.
1217     * Fast forward rounds count only half (although they're taking
1218     * more like 2/3 the time of normal rounds).  This to counteract the
1219     * nuisance that all c0 between 4096 and 6144 would act exactly as
1220     * 4096;  with the halving trick only the range 4096..5120 collapses
1221     * (similarly for all other powers of two) */
1222     if ((c -= (l>>1)) <= 0)
1223     { /* got bored */
1224       if (DEBUGLEVEL >= 4)
1225         err_printf("Rho: time = %6ld ms,\tPollard-Brent giving up.\n",
1226                    timer_delay(&T));
1227       return NULL;
1228     }
1229     c &= ~0x1f; /* keep it on multiples of 32 */
1230 
1231     /* Fast forward loop */
1232     affii(x, x1); set_avma(av); x = x1;
1233     k = l; l <<= 1;
1234     /* don't show this for the first several (short) fast forward phases. */
1235     if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1236       err_printf("Rho: fast forward phase (%ld rounds of 64)...\n", l>>7);
1237     for (k1=k; k1; k1--)
1238     {
1239       one_iter(&x, &P, x1, n, delta);
1240       if ((k1 & 0x1f) == 0) gerepileall(av, 2, &x, &P);
1241     }
1242     if (DEBUGLEVEL >= 4 && (l>>7) > msg_mask)
1243       err_printf("Rho: time = %6ld ms,\t%3ld rounds, back to normal mode\n",
1244                  timer_delay(&T), c0-(c>>5));
1245     affii(x,y); P = gerepileuptoint(av, P); x = y;
1246   } /* forever */
1247 
1248 fin:
1249   /* An accumulated gcd was > 1 */
1250   if  (!equalii(g,n))
1251   { /* if it isn't n, and looks prime, return it */
1252     if (MR_Jaeschke(g))
1253     {
1254       if (DEBUGLEVEL >= 4)
1255       {
1256         rho_dbg(&T, c0-(c>>5), 0);
1257         err_printf("\tfound factor = %Ps\n",g);
1258       }
1259       return g;
1260     }
1261     set_avma(av); g1 = icopy(g);  /* known composite, keep it safe */
1262     av = avma;
1263   }
1264   else g1 = n; /* and work modulo g1 for backtracking */
1265 
1266   /* Here g1 is known composite */
1267   if (DEBUGLEVEL >= 4 && size > 192)
1268     err_printf("Rho: hang on a second, we got something here...\n");
1269   x = y;
1270   for(;;)
1271   { /* backtrack until period recovered. Must terminate */
1272     x = addis(remii(sqri(x), g1), delta);
1273     g = gcdii(subii(x1, x), g1); if (!is_pm1(g)) break;
1274 
1275     if (DEBUGLEVEL >= 4 && (--c & 0x1f) == 0) rho_dbg(&T, c0-(c>>5), msg_mask);
1276   }
1277 
1278   if (g1 == n || equalii(g,g1))
1279   {
1280     if (g1 == n && equalii(g,g1))
1281     { /* out of luck */
1282       if (DEBUGLEVEL >= 4)
1283       {
1284         rho_dbg(&T, c0-(c>>5), 0);
1285         err_printf("\tPollard-Brent failed.\n");
1286       }
1287       if (++retries >= 4) pari_err_BUG("");
1288       goto PB_RETRY;
1289     }
1290     /* half lucky: we've split n, but g1 equals either g or n */
1291     if (DEBUGLEVEL >= 4)
1292     {
1293       rho_dbg(&T, c0-(c>>5), 0);
1294       err_printf("\tfound %sfactor = %Ps\n", (g1!=n ? "composite " : ""), g);
1295     }
1296     res = cgetg(7, t_VEC);
1297     /* g^1: known composite when g1!=n */
1298     INIT(res+1, g, gen_1, (g1!=n? gen_0: NULL));
1299     /* cofactor^1: status unknown */
1300     INIT(res+4, diviiexact(n,g), gen_1, NULL);
1301     return res;
1302   }
1303   /* g < g1 < n : our lucky day -- we've split g1, too */
1304   res = cgetg(10, t_VEC);
1305   /* unknown status for all three factors */
1306   INIT(res+1, g,                gen_1, NULL);
1307   INIT(res+4, diviiexact(g1,g), gen_1, NULL);
1308   INIT(res+7, diviiexact(n,g1), gen_1, NULL);
1309   if (DEBUGLEVEL >= 4)
1310   {
1311     rho_dbg(&T, c0-(c>>5), 0);
1312     err_printf("\tfound factors = %Ps, %Ps,\n\tand %Ps\n",
1313                gel(res,1), gel(res,4), gel(res,7));
1314   }
1315   return res;
1316 }
1317 /* Tuning parameter:  for input up to 64 bits long, we must not spend more
1318  * than a very short time, for fear of slowing things down on average.
1319  * With the current tuning formula, increase our efforts somewhat at 49 bit
1320  * input (an extra round for each bit at first),  and go up more and more
1321  * rapidly after we pass 80 bits.-- Changed this to adjust for the presence of
1322  * squfof, which will finish input up to 59 bits quickly. */
1323 static GEN
pollardbrent(GEN n)1324 pollardbrent(GEN n)
1325 {
1326   const long tune_pb_min = 14; /* even 15 seems too much. */
1327   long c0, size = expi(n) + 1;
1328   if (size <= 28)
1329     c0 = 32;/* amounts very nearly to 'insist'. Now that we have squfof(), we
1330              * don't insist any more when input is 2^29 ... 2^32 */
1331   else if (size <= 42)
1332     c0 = tune_pb_min;
1333   else if (size <= 59) /* match squfof() cutoff point */
1334     c0 = tune_pb_min + ((size - 42)<<1);
1335   else if (size <= 72)
1336     c0 = tune_pb_min + size - 24;
1337   else if (size <= 301)
1338     /* nonlinear increase in effort, kicking in around 80 bits */
1339     /* 301 gives 48121 + tune_pb_min */
1340     c0 = tune_pb_min + size - 60 +
1341       ((size-73)>>1)*((size-70)>>3)*((size-56)>>4);
1342   else
1343     c0 = 49152; /* ECM is faster when it'd take longer */
1344   return pollardbrent_i(n, size, c0, 0);
1345 }
1346 GEN
Z_pollardbrent(GEN n,long rounds,long seed)1347 Z_pollardbrent(GEN n, long rounds, long seed)
1348 {
1349   pari_sp av = avma;
1350   GEN v = pollardbrent_i(n, expi(n)+1, rounds, seed);
1351   if (!v) return NULL;
1352   if (typ(v) == t_INT) v = mkvec2(v, diviiexact(n,v));
1353   else if (lg(v) == 7) v = mkvec2(gel(v,1), gel(v,4));
1354   else v = mkvec3(gel(v,1), gel(v,4), gel(v,7));
1355   return gerepilecopy(av, v);
1356 }
1357 
1358 /***********************************************************************/
1359 /**              FACTORIZATION (Shanks' SQUFOF) --GN2000Sep30-Oct01   **/
1360 /**  squfof() returns a nontrivial factor of n, assuming n is odd,    **/
1361 /**  composite, not a pure square, and has no small prime divisor,    **/
1362 /**  or NULL if it fails to find one.  It works on two discriminants  **/
1363 /**  simultaneously  (n and 5n for n=1(4), 3n and 4n for n=3(4)).     **/
1364 /**  Present implementation is limited to input <2^59, and works most **/
1365 /**  of the time in signed arithmetic on integers <2^31 in absolute   **/
1366 /**  size. (Cf. Algo 8.7.2 in ACiCNT)                                 **/
1367 /***********************************************************************/
1368 
1369 /* The following is invoked to walk back along the ambiguous cycle* until we
1370  * hit an ambiguous form and thus the desired factor, which it returns.  If it
1371  * fails for any reason, it returns 0.  It doesn't interfere with timing and
1372  * diagnostics, which it leaves to squfof().
1373  *
1374  * Before we invoke this, we've found a form (A, B, -C) with A = a^2, where a
1375  * isn't blacklisted and where gcd(a, B) = 1.  According to ACiCANT, we should
1376  * now proceed reducing the form (a, -B, -aC), but it is easy to show that the
1377  * first reduction step always sends this to (-aC, B, a), and the next one,
1378  * with q computed as usual from B and a (occupying the c position), gives a
1379  * reduced form, whose third member is easiest to recover by going back to D.
1380  * From this point onwards, we're once again working with single-word numbers.
1381  * No need to track signs, just work with the abs values of the coefficients. */
1382 static long
squfof_ambig(long a,long B,long dd,GEN D)1383 squfof_ambig(long a, long B, long dd, GEN D)
1384 {
1385   long b, c, q, qa, qc, qcb, a0, b0, b1, c0;
1386   long cnt = 0; /* count reduction steps on the cycle */
1387 
1388   q = (dd + (B>>1)) / a;
1389   qa = q * a;
1390   b = (qa - B) + qa; /* avoid overflow */
1391   {
1392     pari_sp av = avma;
1393     c = itos(divis(shifti(subii(D, sqrs(b)), -2), a));
1394     set_avma(av);
1395   }
1396 #ifdef DEBUG_SQUFOF
1397   err_printf("SQUFOF: ambigous cycle of discriminant %Ps\n", D);
1398   err_printf("SQUFOF: Form on ambigous cycle (%ld, %ld, %ld)\n", a, b, c);
1399 #endif
1400 
1401   a0 = a; b0 = b1 = b;        /* end of loop detection and safeguard */
1402 
1403   for (;;) /* reduced cycles are finite */
1404   { /* reduction step */
1405     c0 = c;
1406     if (c0 > dd)
1407       q = 1;
1408     else
1409       q = (dd + (b>>1)) / c0;
1410     if (q == 1)
1411     {
1412       qcb = c0 - b; b = c0 + qcb; c = a - qcb;
1413     }
1414     else
1415     {
1416       qc = q*c0; qcb = qc - b; b = qc + qcb; c = a - q*qcb;
1417     }
1418     a = c0;
1419 
1420     cnt++; if (b == b1) break;
1421 
1422     /* safeguard against infinite loop: recognize when we've walked the entire
1423      * cycle in vain. (I don't think this can actually happen -- exercise.) */
1424     if (b == b0 && a == a0) return 0;
1425 
1426     b1 = b;
1427   }
1428   q = a&1 ? a : a>>1;
1429   if (DEBUGLEVEL >= 4)
1430   {
1431     if (q > 1)
1432       err_printf("SQUFOF: found factor %ld from ambiguous form\n"
1433                  "\tafter %ld steps on the ambiguous cycle\n",
1434                  q / ugcd(q,15), cnt);
1435     else
1436       err_printf("SQUFOF: ...found nothing on the ambiguous cycle\n"
1437                  "\tafter %ld steps there\n", cnt);
1438     if (DEBUGLEVEL >= 6) err_printf("SQUFOF: squfof_ambig returned %ld\n", q);
1439   }
1440   return q;
1441 }
1442 
1443 #define SQUFOF_BLACKLIST_SZ 64
1444 
1445 /* assume 2,3,5 do not divide n */
1446 static GEN
squfof(GEN n)1447 squfof(GEN n)
1448 {
1449   ulong d1, d2;
1450   long tf = lgefint(n), nm4, cnt = 0;
1451   long a1, b1, c1, dd1, L1, a2, b2, c2, dd2, L2, a, q, c, qc, qcb;
1452   GEN D1, D2;
1453   pari_sp av = avma;
1454   long blacklist1[SQUFOF_BLACKLIST_SZ], blacklist2[SQUFOF_BLACKLIST_SZ];
1455   long blp1 = 0, blp2 = 0;
1456   int act1 = 1, act2 = 1;
1457 
1458 #ifdef LONG_IS_64BIT
1459   if (tf > 3 || (tf == 3 && uel(n,2)             >= (1UL << (BITS_IN_LONG-5))))
1460 #else  /* 32 bits */
1461   if (tf > 4 || (tf == 4 && (ulong)(*int_MSW(n)) >= (1UL << (BITS_IN_LONG-5))))
1462 #endif
1463     return NULL; /* n too large */
1464 
1465   /* now we have 5 < n < 2^59 */
1466   nm4 = mod4(n);
1467   if (nm4 == 1)
1468   { /* n = 1 (mod4):  run one iteration on D1 = n, another on D2 = 5n */
1469     D1 = n;
1470     D2 = mului(5,n); d2 = itou(sqrti(D2)); dd2 = (long)((d2>>1) + (d2&1));
1471     b2 = (long)((d2-1) | 1);        /* b1, b2 will always stay odd */
1472   }
1473   else
1474   { /* n = 3 (mod4):  run one iteration on D1 = 3n, another on D2 = 4n */
1475     D1 = mului(3,n);
1476     D2 = shifti(n,2); dd2 = itou(sqrti(n)); d2 =  dd2 << 1;
1477     b2 = (long)(d2 & (~1UL)); /* largest even below d2, will stay even */
1478   }
1479   d1 = itou(sqrti(D1));
1480   b1 = (long)((d1-1) | 1); /* largest odd number not exceeding d1 */
1481   c1 = itos(shifti(subii(D1, sqru((ulong)b1)), -2));
1482   if (!c1) pari_err_BUG("squfof [caller of] (n or 3n is a square)");
1483   c2 = itos(shifti(subii(D2, sqru((ulong)b2)), -2));
1484   if (!c2) pari_err_BUG("squfof [caller of] (5n is a square)");
1485   L1 = (long)usqrt(d1);
1486   L2 = (long)usqrt(d2);
1487   /* dd1 used to compute floor((d1+b1)/2) as dd1+floor(b1/2), without
1488    * overflowing the 31bit signed integer size limit. Same for dd2. */
1489   dd1 = (long) ((d1>>1) + (d1&1));
1490   a1 = a2 = 1;
1491 
1492   /* The two (identity) forms (a1,b1,-c1) and (a2,b2,-c2) are now set up.
1493    *
1494    * a1 and c1 represent the absolute values of the a,c coefficients; we keep
1495    * track of the sign separately, via the iteration counter cnt: when cnt is
1496    * even, c is understood to be negative, else c is positive and a < 0.
1497    *
1498    * L1, L2 are the limits for blacklisting small leading coefficients
1499    * on the principal cycle, to guarantee that when we find a square form,
1500    * its square root will belong to an ambiguous cycle  (i.e. won't be an
1501    * earlier form on the principal cycle).
1502    *
1503    * When n = 3(mod 4), D2 = 12(mod 16), and b^2 is always 0 or 4 mod 16.
1504    * It follows that 4*a*c must be 4 or 8 mod 16, respectively, so at most
1505    * one of a,c can be divisible by 2 at most to the first power.  This fact
1506    * is used a couple of times below.
1507    *
1508    * The flags act1, act2 remain true while the respective cycle is still
1509    * active;  we drop them to false when we return to the identity form with-
1510    * out having found a square form  (or when the blacklist overflows, which
1511    * shouldn't happen). */
1512   if (DEBUGLEVEL >= 4)
1513     err_printf("SQUFOF: entering main loop with forms\n"
1514                "\t(1, %ld, %ld) and (1, %ld, %ld)\n\tof discriminants\n"
1515                "\t%Ps and %Ps, respectively\n", b1, -c1, b2, -c2, D1, D2);
1516 
1517   /* MAIN LOOP: walk around the principal cycle looking for a square form.
1518    * Blacklist small leading coefficients.
1519    *
1520    * The reduction operator can be computed entirely in 32-bit arithmetic:
1521    * Let q = floor(floor((d1+b1)/2)/c1)  (when c1>dd1, q=1, which happens
1522    * often enough to special-case it).  Then the new b1 = (q*c1-b1) + q*c1,
1523    * which does not overflow, and the new c1 = a1 - q*(q*c1-b1), which is
1524    * bounded by d1 in abs size since both the old and the new a1 are positive
1525    * and bounded by d1. */
1526   while (act1 || act2)
1527   {
1528     if (act1)
1529     { /* send first form through reduction operator if active */
1530       c = c1;
1531       q = (c > dd1)? 1: (dd1 + (b1>>1)) / c;
1532       if (q == 1)
1533       { qcb = c - b1; b1 = c + qcb; c1 = a1 - qcb; }
1534       else
1535       { qc = q*c; qcb = qc - b1; b1 = qc + qcb; c1 = a1 - q*qcb; }
1536       a1 = c;
1537 
1538       if (a1 <= L1)
1539       { /* blacklist this */
1540         if (blp1 >= SQUFOF_BLACKLIST_SZ) /* overflows: shouldn't happen */
1541           act1 = 0; /* silently */
1542         else
1543         {
1544           if (DEBUGLEVEL >= 6)
1545             err_printf("SQUFOF: blacklisting a = %ld on first cycle\n", a1);
1546           blacklist1[blp1++] = a1;
1547         }
1548       }
1549     }
1550     if (act2)
1551     { /* send second form through reduction operator if active */
1552       c = c2;
1553       q = (c > dd2)? 1: (dd2 + (b2>>1)) / c;
1554       if (q == 1)
1555       { qcb = c - b2; b2 = c + qcb; c2 = a2 - qcb; }
1556       else
1557       { qc = q*c; qcb = qc - b2; b2 = qc + qcb; c2 = a2 - q*qcb; }
1558       a2 = c;
1559 
1560       if (a2 <= L2)
1561       { /* blacklist this */
1562         if (blp2 >= SQUFOF_BLACKLIST_SZ) /* overflows: shouldn't happen */
1563           act2 = 0; /* silently */
1564         else
1565         {
1566           if (DEBUGLEVEL >= 6)
1567             err_printf("SQUFOF: blacklisting a = %ld on second cycle\n", a2);
1568           blacklist2[blp2++] = a2;
1569         }
1570       }
1571     }
1572 
1573     /* bump counter, loop if this is an odd iteration (i.e. if the real
1574      * leading coefficients are negative) */
1575     if (++cnt & 1) continue;
1576 
1577     /* second half of main loop entered only when the leading coefficients
1578      * are positive (i.e., during even-numbered iterations) */
1579 
1580     /* examine first form if active */
1581     if (act1 && a1 == 1) /* back to identity */
1582     { /* drop this discriminant */
1583       act1 = 0;
1584       if (DEBUGLEVEL >= 4)
1585         err_printf("SQUFOF: first cycle exhausted after %ld iterations,\n"
1586                    "\tdropping it\n", cnt);
1587     }
1588     if (act1)
1589     {
1590       if (uissquareall((ulong)a1, (ulong*)&a))
1591       { /* square form */
1592         if (DEBUGLEVEL >= 4)
1593           err_printf("SQUFOF: square form (%ld^2, %ld, %ld) on first cycle\n"
1594                      "\tafter %ld iterations\n", a, b1, -c1, cnt);
1595         if (a <= L1)
1596         { /* blacklisted? */
1597           long j;
1598           for (j = 0; j < blp1; j++)
1599             if (a == blacklist1[j]) { a = 0; break; }
1600         }
1601         if (a > 0)
1602         { /* not blacklisted */
1603           q = ugcd(a, b1); /* imprimitive form? */
1604           if (q > 1)
1605           { /* q^2 divides D1 hence n [ assuming n % 3 != 0 ] */
1606             set_avma(av);
1607             if (DEBUGLEVEL >= 4) err_printf("SQUFOF: found factor %ld^2\n", q);
1608             return mkvec3(utoipos(q), gen_2, NULL);/* exponent 2, unknown status */
1609           }
1610           /* chase the inverse root form back along the ambiguous cycle */
1611           q = squfof_ambig(a, b1, dd1, D1);
1612           if (nm4 == 3 && q % 3 == 0) q /= 3;
1613           if (q > 1) { set_avma(av); return utoipos(q); } /* SUCCESS! */
1614         }
1615         else if (DEBUGLEVEL >= 4) /* blacklisted */
1616           err_printf("SQUFOF: ...but the root form seems to be on the "
1617                      "principal cycle\n");
1618       }
1619     }
1620 
1621     /* examine second form if active */
1622     if (act2 && a2 == 1) /* back to identity form */
1623     { /* drop this discriminant */
1624       act2 = 0;
1625       if (DEBUGLEVEL >= 4)
1626         err_printf("SQUFOF: second cycle exhausted after %ld iterations,\n"
1627                    "\tdropping it\n", cnt);
1628     }
1629     if (act2)
1630     {
1631       if (uissquareall((ulong)a2, (ulong*)&a))
1632       { /* square form */
1633         if (DEBUGLEVEL >= 4)
1634           err_printf("SQUFOF: square form (%ld^2, %ld, %ld) on second cycle\n"
1635                      "\tafter %ld iterations\n", a, b2, -c2, cnt);
1636         if (a <= L2)
1637         { /* blacklisted? */
1638           long j;
1639           for (j = 0; j < blp2; j++)
1640             if (a == blacklist2[j]) { a = 0; break; }
1641         }
1642         if (a > 0)
1643         { /* not blacklisted */
1644           q = ugcd(a, b2); /* imprimitive form? */
1645           /* NB if b2 is even, a is odd, so the gcd is always odd */
1646           if (q > 1)
1647           { /* q^2 divides D2 hence n [ assuming n % 5 != 0 ] */
1648             set_avma(av);
1649             if (DEBUGLEVEL >= 4) err_printf("SQUFOF: found factor %ld^2\n", q);
1650             return mkvec3(utoipos(q), gen_2, NULL);/* exponent 2, unknown status */
1651           }
1652           /* chase the inverse root form along the ambiguous cycle */
1653           q = squfof_ambig(a, b2, dd2, D2);
1654           if (nm4 == 1 && q % 5 == 0) q /= 5;
1655           if (q > 1) { set_avma(av); return utoipos(q); } /* SUCCESS! */
1656         }
1657         else if (DEBUGLEVEL >= 4)        /* blacklisted */
1658           err_printf("SQUFOF: ...but the root form seems to be on the "
1659                      "principal cycle\n");
1660       }
1661     }
1662   } /* end main loop */
1663 
1664   /* both discriminants turned out to be useless. */
1665   if (DEBUGLEVEL>=4) err_printf("SQUFOF: giving up\n");
1666   return gc_NULL(av);
1667 }
1668 
1669 /***********************************************************************/
1670 /*                    DETECTING ODD POWERS  --GN1998Jun28              */
1671 /*   Factoring engines like MPQS which ultimately rely on computing    */
1672 /*   gcd(N, x^2-y^2) to find a nontrivial factor of N can't split      */
1673 /*   N = p^k for an odd prime p, since (Z/p^k)^* is then cyclic. Here  */
1674 /*   is an analogue of Z_issquareall() for 3rd, 5th and 7th powers.    */
1675 /*   The general case is handled by is_kth_power                       */
1676 /***********************************************************************/
1677 
1678 /* Multistage sieve. First stages work mod 211, 209, 61, 203 in this order
1679  * (first reduce mod the product of these and then take the remainder apart).
1680  * Second stages use 117, 31, 43, 71. Moduli which are no longer interesting
1681  * are skipped. Everything is encoded in a table of 106 24-bit masks. We only
1682  * need the first half of the residues.  Three bits per modulus indicate which
1683  * residues are 7th (bit 2), 5th (bit 1) or 3rd (bit 0) powers; the eight
1684  * moduli above are assigned right-to-left. The table was generated using: */
1685 
1686 #if 0
1687 L = [71, 43, 31, [O(3^2),O(13)], [O(7),O(29)], 61, [O(11),O(19)], 211];
1688 ispow(x, N, k)=
1689 {
1690   if (type(N) == "t_INT", return (ispower(Mod(x,N), k)));
1691   for (i = 1, #N, if (!ispower(x + N[i], k), return (0))); 1
1692 }
1693 check(r) =
1694 {
1695   print1("  0");
1696   for (i=1,#L,
1697     N = 0;
1698     if (ispow(r, L[i], 3), N += 1);
1699     if (ispow(r, L[i], 5), N += 2);
1700     if (ispow(r, L[i], 7), N += 4);
1701     print1(N);
1702   ); print("ul,  /* ", r, " */")
1703 }
1704 for (r = 0, 105, check(r))
1705 #endif
1706 static ulong powersmod[106] = {
1707   077777777ul,  /* 0 */
1708   077777777ul,  /* 1 */
1709   013562440ul,  /* 2 */
1710   012402540ul,  /* 3 */
1711   013562440ul,  /* 4 */
1712   052662441ul,  /* 5 */
1713   016603440ul,  /* 6 */
1714   016463450ul,  /* 7 */
1715   013573551ul,  /* 8 */
1716   012462540ul,  /* 9 */
1717   012462464ul,  /* 10 */
1718   013462771ul,  /* 11 */
1719   012406473ul,  /* 12 */
1720   012463641ul,  /* 13 */
1721   052463646ul,  /* 14 */
1722   012503446ul,  /* 15 */
1723   013562440ul,  /* 16 */
1724   052466440ul,  /* 17 */
1725   012472451ul,  /* 18 */
1726   012462454ul,  /* 19 */
1727   032463550ul,  /* 20 */
1728   013403664ul,  /* 21 */
1729   013463460ul,  /* 22 */
1730   032562565ul,  /* 23 */
1731   012402540ul,  /* 24 */
1732   052662441ul,  /* 25 */
1733   032672452ul,  /* 26 */
1734   013573551ul,  /* 27 */
1735   012467541ul,  /* 28 */
1736   012567640ul,  /* 29 */
1737   032706450ul,  /* 30 */
1738   012762452ul,  /* 31 */
1739   033762662ul,  /* 32 */
1740   012502562ul,  /* 33 */
1741   032463562ul,  /* 34 */
1742   013563440ul,  /* 35 */
1743   016663440ul,  /* 36 */
1744   036662550ul,  /* 37 */
1745   012462552ul,  /* 38 */
1746   033502450ul,  /* 39 */
1747   012462643ul,  /* 40 */
1748   033467540ul,  /* 41 */
1749   017403441ul,  /* 42 */
1750   017463462ul,  /* 43 */
1751   017472460ul,  /* 44 */
1752   033462470ul,  /* 45 */
1753   052566450ul,  /* 46 */
1754   013562640ul,  /* 47 */
1755   032403640ul,  /* 48 */
1756   016463450ul,  /* 49 */
1757   016463752ul,  /* 50 */
1758   033402440ul,  /* 51 */
1759   012462540ul,  /* 52 */
1760   012472540ul,  /* 53 */
1761   053562462ul,  /* 54 */
1762   012463465ul,  /* 55 */
1763   012663470ul,  /* 56 */
1764   052607450ul,  /* 57 */
1765   012566553ul,  /* 58 */
1766   013466440ul,  /* 59 */
1767   012502741ul,  /* 60 */
1768   012762744ul,  /* 61 */
1769   012763740ul,  /* 62 */
1770   012763443ul,  /* 63 */
1771   013573551ul,  /* 64 */
1772   013462471ul,  /* 65 */
1773   052502460ul,  /* 66 */
1774   012662463ul,  /* 67 */
1775   012662451ul,  /* 68 */
1776   012403550ul,  /* 69 */
1777   073567540ul,  /* 70 */
1778   072463445ul,  /* 71 */
1779   072462740ul,  /* 72 */
1780   012472442ul,  /* 73 */
1781   012462644ul,  /* 74 */
1782   013406650ul,  /* 75 */
1783   052463471ul,  /* 76 */
1784   012563474ul,  /* 77 */
1785   013503460ul,  /* 78 */
1786   016462441ul,  /* 79 */
1787   016462440ul,  /* 80 */
1788   012462540ul,  /* 81 */
1789   013462641ul,  /* 82 */
1790   012463454ul,  /* 83 */
1791   013403550ul,  /* 84 */
1792   057563540ul,  /* 85 */
1793   017466441ul,  /* 86 */
1794   017606471ul,  /* 87 */
1795   053666573ul,  /* 88 */
1796   012562561ul,  /* 89 */
1797   013473641ul,  /* 90 */
1798   032573440ul,  /* 91 */
1799   016763440ul,  /* 92 */
1800   016702640ul,  /* 93 */
1801   033762552ul,  /* 94 */
1802   012562550ul,  /* 95 */
1803   052402451ul,  /* 96 */
1804   033563441ul,  /* 97 */
1805   012663561ul,  /* 98 */
1806   012677560ul,  /* 99 */
1807   012462464ul,  /* 100 */
1808   032562642ul,  /* 101 */
1809   013402551ul,  /* 102 */
1810   032462450ul,  /* 103 */
1811   012467445ul,  /* 104 */
1812   032403440ul,  /* 105 */
1813 };
1814 
1815 static int
check_res(ulong x,ulong N,int shift,ulong * mask)1816 check_res(ulong x, ulong N, int shift, ulong *mask)
1817 {
1818   long r = x%N; if ((ulong)r> (N>>1)) r = N - r;
1819   *mask &= (powersmod[r] >> shift);
1820   return *mask;
1821 }
1822 
1823 /* is x mod 211*209*61*203*117*31*43*71 a 3rd, 5th or 7th power ? */
1824 int
uis_357_powermod(ulong x,ulong * mask)1825 uis_357_powermod(ulong x, ulong *mask)
1826 {
1827   if (             !check_res(x, 211UL, 0, mask)) return 0;
1828   if (*mask & 3 && !check_res(x, 209UL, 3, mask)) return 0;
1829   if (*mask & 3 && !check_res(x,  61UL, 6, mask)) return 0;
1830   if (*mask & 5 && !check_res(x, 203UL, 9, mask)) return 0;
1831   if (*mask & 1 && !check_res(x, 117UL,12, mask)) return 0;
1832   if (*mask & 3 && !check_res(x,  31UL,15, mask)) return 0;
1833   if (*mask & 5 && !check_res(x,  43UL,18, mask)) return 0;
1834   if (*mask & 6 && !check_res(x,  71UL,21, mask)) return 0;
1835   return 1;
1836 }
1837 /* asume x > 0 and pt != NULL */
1838 int
uis_357_power(ulong x,ulong * pt,ulong * mask)1839 uis_357_power(ulong x, ulong *pt, ulong *mask)
1840 {
1841   double logx;
1842   if (!odd(x))
1843   {
1844     long v = vals(x);
1845     if (v % 7) *mask &= ~4;
1846     if (v % 5) *mask &= ~2;
1847     if (v % 3) *mask &= ~1;
1848     if (!*mask) return 0;
1849   }
1850   if (!uis_357_powermod(x, mask)) return 0;
1851   logx = log((double)x);
1852   while (*mask)
1853   {
1854     long e, b;
1855     ulong y, ye;
1856     if (*mask & 1)      { b = 1; e = 3; }
1857     else if (*mask & 2) { b = 2; e = 5; }
1858     else                { b = 4; e = 7; }
1859     y = (ulong)(exp(logx / e) + 0.5);
1860     ye = upowuu(y,e);
1861     if (ye == x) { *pt = y; return e; }
1862 #ifdef LONG_IS_64BIT
1863     if (ye > x) y--; else y++;
1864     ye = upowuu(y,e);
1865     if (ye == x) { *pt = y; return e; }
1866 #endif
1867     *mask &= ~b; /* turn the bit off */
1868   }
1869   return 0;
1870 }
1871 
1872 #ifndef LONG_IS_64BIT
1873 /* as above, split in two functions */
1874 /* is x mod 211*209*61*203 a 3rd, 5th or 7th power ? */
1875 static int
uis_357_powermod_32bit_1(ulong x,ulong * mask)1876 uis_357_powermod_32bit_1(ulong x, ulong *mask)
1877 {
1878   if (             !check_res(x, 211UL, 0, mask)) return 0;
1879   if (*mask & 3 && !check_res(x, 209UL, 3, mask)) return 0;
1880   if (*mask & 3 && !check_res(x,  61UL, 6, mask)) return 0;
1881   if (*mask & 5 && !check_res(x, 203UL, 9, mask)) return 0;
1882   return 1;
1883 }
1884 /* is x mod 117*31*43*71 a 3rd, 5th or 7th power ? */
1885 static int
uis_357_powermod_32bit_2(ulong x,ulong * mask)1886 uis_357_powermod_32bit_2(ulong x, ulong *mask)
1887 {
1888   if (*mask & 1 && !check_res(x, 117UL,12, mask)) return 0;
1889   if (*mask & 3 && !check_res(x,  31UL,15, mask)) return 0;
1890   if (*mask & 5 && !check_res(x,  43UL,18, mask)) return 0;
1891   if (*mask & 6 && !check_res(x,  71UL,21, mask)) return 0;
1892   return 1;
1893 }
1894 #endif
1895 
1896 /* Returns 3, 5, or 7 if x is a cube (but not a 5th or 7th power),  a 5th
1897  * power (but not a 7th),  or a 7th power, and in this case creates the
1898  * base on the stack and assigns its address to *pt.  Otherwise returns 0.
1899  * x must be of type t_INT and positive;  this is not checked.  The *mask
1900  * argument tells us which things to check -- bit 0: 3rd, bit 1: 5th,
1901  * bit 2: 7th pwr;  set a bit to have the corresponding power examined --
1902  * and is updated appropriately for a possible follow-up call */
1903 int
is_357_power(GEN x,GEN * pt,ulong * mask)1904 is_357_power(GEN x, GEN *pt, ulong *mask)
1905 {
1906   long lx = lgefint(x);
1907   ulong r;
1908   pari_sp av;
1909   GEN y;
1910 
1911   if (!*mask) return 0; /* useful when running in a loop */
1912   if (DEBUGLEVEL>4)
1913     err_printf("OddPwrs: examining %ld-bit integer\n", expi(x)+1);
1914   if (lgefint(x) == 3) {
1915     ulong t;
1916     long e = uis_357_power(x[2], &t, mask);
1917     if (e)
1918     {
1919       if (pt) *pt = utoi(t);
1920       return e;
1921     }
1922     return 0;
1923   }
1924 #ifdef LONG_IS_64BIT
1925   r = (lx == 3)? uel(x,2): umodiu(x, 6046846918939827UL);
1926   if (!uis_357_powermod(r, mask)) return 0;
1927 #else
1928   r = (lx == 3)? uel(x,2): umodiu(x, 211*209*61*203);
1929   if (!uis_357_powermod_32bit_1(r, mask)) return 0;
1930   r = (lx == 3)? uel(x,2): umodiu(x, 117*31*43*71);
1931   if (!uis_357_powermod_32bit_2(r, mask)) return 0;
1932 #endif
1933   av = avma;
1934   while (*mask)
1935   {
1936     long e, b;
1937     /* priority to higher powers: if we have a 21st, it is easier to rediscover
1938      * that its 7th root is a cube than that its cube root is a 7th power */
1939          if (*mask & 4) { b = 4; e = 7; }
1940     else if (*mask & 2) { b = 2; e = 5; }
1941     else                { b = 1; e = 3; }
1942     y = mpround( sqrtnr(itor(x, nbits2prec(64 + bit_accuracy(lx) / e)), e) );
1943     if (equalii(powiu(y,e), x))
1944     {
1945       if (!pt) return gc_int(av,e);
1946       set_avma((pari_sp)y); *pt = gerepileuptoint(av, y);
1947       return e;
1948     }
1949     *mask &= ~b; /* turn the bit off */
1950     set_avma(av);
1951   }
1952   return 0;
1953 }
1954 
1955 /* Is x a n-th power ?
1956  * if d = NULL, n not necessarily prime, otherwise, n prime and d the
1957  * corresponding diffptr to go on looping over primes.
1958  * If pt != NULL, it receives the n-th root */
1959 ulong
is_kth_power(GEN x,ulong n,GEN * pt)1960 is_kth_power(GEN x, ulong n, GEN *pt)
1961 {
1962   forprime_t T;
1963   long j;
1964   ulong q, residue;
1965   GEN y;
1966   pari_sp av = avma;
1967 
1968   (void)u_forprime_arith_init(&T, odd(n)? 2*n+1: n+1, ULONG_MAX, 1,n);
1969   /* we'll start at q, smallest prime >= n */
1970 
1971   /* Modular checks, use small primes q congruent 1 mod n */
1972   /* A non n-th power nevertheless passes the test with proba n^(-#checks),
1973    * We'd like this < 1e-6 but let j = floor(log(1e-6) / log(n)) which
1974    * ensures much less. */
1975   if (n < 16)
1976     j = 5;
1977   else if (n < 32)
1978     j = 4;
1979   else if (n < 101)
1980     j = 3;
1981   else if (n < 1001)
1982     j = 2;
1983   else if (n < 17886697) /* smallest such that smallest suitable q is > 2^32 */
1984     j = 1;
1985   else
1986     j = 0;
1987   for (; j > 0; j--)
1988   {
1989     if (!(q = u_forprime_next(&T))) break;
1990     /* q a prime = 1 mod n */
1991     residue = umodiu(x, q);
1992     if (residue == 0)
1993     {
1994       if (Z_lval(x,q) % n) return gc_ulong(av,0);
1995       continue;
1996     }
1997     /* n-th power mod q ? */
1998     if (Fl_powu(residue, (q-1)/n, q) != 1) return gc_ulong(av,0);
1999   }
2000   set_avma(av);
2001 
2002   if (DEBUGLEVEL>4) err_printf("\nOddPwrs: [%lu] passed modular checks\n",n);
2003   /* go to the horse's mouth... */
2004   y = roundr( sqrtnr(itor(x, nbits2prec(expi(x)/n + 16)), n) );
2005   if (!equalii(powiu(y, n), x)) {
2006     if (DEBUGLEVEL>4) err_printf("\tBut it wasn't a pure power.\n");
2007     return gc_ulong(av,0);
2008   }
2009   if (!pt) set_avma(av); else { set_avma((pari_sp)y); *pt = gerepileuptoint(av,y); }
2010   return 1;
2011 }
2012 
2013 /* is x a p^i-th power, p >= 11 prime ? Similar to is_357_power(), but instead
2014  * of the mask, we keep the current test exponent around. Cut off when
2015  * log_2 x^(1/k) < cutoffbits since we would have found it by trial division.
2016  * Everything needed here (primitive roots etc.) is computed from scratch on
2017  * the fly; compared to the size of numbers under consideration, these
2018  * word-sized computations take negligible time.
2019  * Any cutoffbits > 0 is safe, but direct root extraction attempts are faster
2020  * when trial division has been used to discover very small bases. We become
2021  * competitive at cutoffbits ~ 10 */
2022 int
is_pth_power(GEN x,GEN * pt,forprime_t * T,ulong cutoffbits)2023 is_pth_power(GEN x, GEN *pt, forprime_t *T, ulong cutoffbits)
2024 {
2025   long cnt=0, size = expi(x) /* not +1 */;
2026   ulong p;
2027   pari_sp av = avma;
2028   while ((p = u_forprime_next(T)) && size/p >= cutoffbits) {
2029     long v = 1;
2030     if (DEBUGLEVEL>5 && cnt++==2000)
2031       { cnt=0; err_printf("%lu%% ", 100*p*cutoffbits/size); }
2032     while (is_kth_power(x, p, pt)) {
2033       v *= p; x = *pt;
2034       size = expi(x);
2035     }
2036     if (v > 1)
2037     {
2038       if (DEBUGLEVEL>5) err_printf("\nOddPwrs: is a %ld power\n",v);
2039       return v;
2040     }
2041   }
2042   if (DEBUGLEVEL>5) err_printf("\nOddPwrs: not a power\n",p);
2043   return gc_int(av,0); /* give up */
2044 }
2045 
2046 /***********************************************************************/
2047 /**                FACTORIZATION  (master iteration)                  **/
2048 /**      Driver for the various methods of finding large factors      **/
2049 /**      (after trial division has cast out the very small ones).     **/
2050 /**                        GN1998Jun24--30                            **/
2051 /***********************************************************************/
2052 
2053 /* Direct use:
2054  *  ifac_start_hint(n,moebius,hint) registers with the iterative factorizer
2055  *  - an integer n (without prime factors  < tridiv_bound(n))
2056  *  - registers whether or not we should terminate early if we find a square
2057  *    factor,
2058  *  - a hint about which method(s) to use.
2059  *  This must always be called first. If input is not composite, oo loop.
2060  *  The routine decomposes n nontrivially into a product of two factors except
2061  *  in squarefreeness ('Moebius') mode.
2062  *
2063  *  ifac_start(n,moebius) same using default hint.
2064  *
2065  *  ifac_primary_factor()  returns a prime divisor (not necessarily the
2066  *    smallest) and the corresponding exponent.
2067  *
2068  * Encapsulated user interface: Many arithmetic functions have a 'contributor'
2069  * ifac_xxx, to be called on any large composite cofactor left over after trial
2070  * division by small primes: xxx is one of moebius, issquarefree, totient, etc.
2071  *
2072  * We never test whether the input number is prime or composite, since
2073  * presumably it will have come out of the small factors finder stage
2074  * (which doesn't really exist yet but which will test the left-over
2075  * cofactor for primality once it does). */
2076 
2077 /* The data structure in which we preserve whatever we know about our number N
2078  * is kept on the PARI stack, and updated as needed.
2079  * This makes the machinery re-entrant, and avoids memory leaks when a lengthy
2080  * factorization is interrupted. We try to keep the whole affair connected,
2081  * and the parent object is always older than its children.  This may in
2082  * rare cases lead to some extra copying around, and knowing what is garbage
2083  * at any given time is not trivial. See below for examples how to do it right.
2084  * (Connectedness is destroyed if callers of ifac_main() create stuff on the
2085  * stack in between calls. This is harmless as long as ifac_realloc() is used
2086  * to re-create a connected object at the head of the stack just before
2087  * collecting garbage.)
2088  * A t_INT may well have > 10^6 distinct prime factors larger than 2^16. Since
2089  * we need not find factors in order of increasing size, we must be prepared to
2090  * drag a very large amount of data around.  We start with a small structure
2091  * and extend it when necessary. */
2092 
2093 /* The idea of the algorithm is:
2094  * Let N0 be whatever is currently left of N after dividing off all the
2095  * prime powers we have already returned to the caller.  Then we maintain
2096  * N0 as a product
2097  * (1) N0 = \prod_i P_i^{e_i} * \prod_j Q_j^{f_j} * \prod_k C_k^{g_k}
2098  * where the P_i and Q_j are distinct primes, each C_k is known composite,
2099  * none of the P_i divides any C_k, and we also know the total ordering
2100  * of all the P_i, Q_j and C_k; in particular, we will never try to divide
2101  * a C_k by a larger Q_j.  Some of the C_k may have common factors.
2102  *
2103  * Caveat implementor:  Taking gcds among C_k's is very likely to cost at
2104  * least as much time as dividing off any primes as we find them, and book-
2105  * keeping would be tough (since D=gcd(C_1,C_2) can still have common factors
2106  * with both C_1/D and C_2/D, and so on...).
2107  *
2108  * At startup, we just initialize the structure to
2109  * (2) N = C_1^1   (composite).
2110  *
2111  * Whenever ifac_primary_factor() or one of the arithmetic user interface
2112  * routines needs a primary factor, and the smallest thing in our list is P_1,
2113  * we return that and its exponent, and remove it from our list. (When nothing
2114  * is left, we return a sentinel value -- gen_1.  And in Moebius mode, when we
2115  * see something with exponent > 1, whether prime or composite, we return gen_0
2116  * or 0, depending on the function). In all other cases, ifac_main() iterates
2117  * the following steps until we have a P_1 in the smallest position.
2118  *
2119  * When the smallest item is C_1, as it is initially:
2120  * (3.1) Crack C_1 into a nontrivial product  U_1 * U_2  by whatever method
2121  * comes to mind for this size. (U for 'unknown'.)  Cracking will detect
2122  * perfect powers, so we may instead see a power of some U_1 here, or even
2123  * something of the form U_1^k*U_2^k; of course the exponent already attached
2124  * to C_1 is taken into account in the following.
2125  * (3.2) If we have U_1*U_2, sort the two factors (distinct: squares are caught
2126  * in stage 3.1). N.B. U_1 and U_2 are smaller than anything else in our list.
2127  * (3.3) Check U_1 and U_2 for primality, and flag them accordingly.
2128  * (3.4) Iterate.
2129  *
2130  * When the smallest item is Q_1:
2131  * This is the unpleasant case.  We go through the entire list and try to
2132  * divide Q_1 off each of the current C_k's, which usually fails, but may
2133  * succeed several times. When a division was successful, the corresponding
2134  * C_k is removed from our list, and the cofactor becomes a U_l for the moment
2135  * unless it is 1 (which happens when C_k was a power of Q_1).  When we're
2136  * through we upgrade Q_1 to P_1 status, then do a primality check on each U_l
2137  * and sort it back into the list either as a Q_j or as a C_k.  If during the
2138  * insertion sort we discover that some U_l equals some P_i or Q_j or C_k we
2139  * already have, we just add U_l's exponent to that of its twin. (The sorting
2140  * therefore happens before the primality test). Since this may produce one or
2141  * more elements smaller than the P_1 we just confirmed, we may have to repeat
2142  * the iteration.
2143  * A trick avoids some Q_1 instances: just after the sweep classifying
2144  * all current unknowns as either composites or primes, we do another downward
2145  * sweep beginning with the largest current factor and stopping just above the
2146  * largest current composite.  Every Q_j we pass is turned into a P_i.
2147  * (Different primes are automatically coprime among each other, and primes do
2148  * not divide smaller composites.)
2149  * NB: We have no use for comparing the square of a prime to N0.  Normally
2150  * we will get called after casting out only the smallest primes, and
2151  * since we cannot guarantee that we see the large prime factors in as-
2152  * cending order, we cannot stop when we find one larger than sqrt(N0). */
2153 
2154 /* Data structure: We keep everything in a single t_VEC of t_INTs.  The
2155  * first 2 components are read-only:
2156  * 1) the first records whether we're doing full (NULL) or Moebius (gen_1)
2157  * factorization; in the latter case subroutines return a sentinel value as
2158  * soon as they spot an exponent > 1.
2159  * 2) the second records the hint from factorint()'s optional flag, for use by
2160  * ifac_crack().
2161  *
2162  * The remaining components (initially 15) are used in groups of three:
2163  * [ factor (t_INT), exponent (t_INT), factor class ], where factor class is
2164  *  NULL : unknown
2165  *  gen_0: known composite C_k
2166  *  gen_1: known prime Q_j awaiting trial division
2167  *  gen_2: finished prime P_i.
2168  * When during the division stage we re-sort a C_k-turned-U_l to a lower
2169  * position, we rotate any intervening material upward towards its old
2170  * slot.  When a C_k was divided down to 1, its slot is left empty at
2171  * first; similarly when the re-sorting detects a repeated factor.
2172  * After the sorting phase, we de-fragment the list and squeeze all the
2173  * occupied slots together to the high end, so that ifac_crack() has room
2174  * for new factors.  When this doesn't suffice, we abandon the current vector
2175  * and allocate a somewhat larger one, defragmenting again while copying.
2176  *
2177  * For internal use: note that all exponents will fit into C longs, given
2178  * PARI's lgefint field size.  When we work with them, we sometimes read
2179  * out the GEN pointer, and sometimes do an itos, whatever is more con-
2180  * venient for the task at hand. */
2181 
2182 /*** Overview ***/
2183 
2184 /* The '*where' argument in the following points into *partial at the first of
2185  * the three fields of the first occupied slot.  It's there because the caller
2186  * would already know where 'here' is, so we don't want to search for it again.
2187  * We do not preserve this from one user-interface call to the next. */
2188 
2189 /* In the most cases, control flows from the user interface to ifac_main() and
2190  * then to a succession of ifac_crack()s and ifac_divide()s, with (typically)
2191  * none of the latter finding anything. */
2192 
2193 #define LAST(x) x+lg(x)-3
2194 #define FIRST(x) x+3
2195 
2196 #define MOEBIUS(x) gel(x,1)
2197 #define HINT(x) gel(x,2)
2198 
2199 /* y <- x */
2200 INLINE void
SHALLOWCOPY(GEN x,GEN y)2201 SHALLOWCOPY(GEN x, GEN y) {
2202   VALUE(y) = VALUE(x);
2203   EXPON(y) = EXPON(x);
2204   CLASS(y) = CLASS(x);
2205 }
2206 /* y <- x */
2207 INLINE void
COPY(GEN x,GEN y)2208 COPY(GEN x, GEN y) {
2209   icopyifstack(VALUE(x), VALUE(y));
2210   icopyifstack(EXPON(x), EXPON(y));
2211   CLASS(y) = CLASS(x);
2212 }
2213 
2214 /* Diagnostics */
2215 static void
ifac_factor_dbg(GEN x)2216 ifac_factor_dbg(GEN x)
2217 {
2218   GEN c = CLASS(x), v = VALUE(x);
2219   if (c == gen_2) err_printf("IFAC: factor %Ps\n\tis prime (finished)\n", v);
2220   else if (c == gen_1) err_printf("IFAC: factor %Ps\n\tis prime\n", v);
2221   else if (c == gen_0) err_printf("IFAC: factor %Ps\n\tis composite\n", v);
2222 }
2223 static void
ifac_check(GEN partial,GEN where)2224 ifac_check(GEN partial, GEN where)
2225 {
2226   if (!where || where < FIRST(partial) || where > LAST(partial))
2227     pari_err_BUG("ifac_check ['where' out of bounds]");
2228 }
2229 static void
ifac_print(GEN part,GEN where)2230 ifac_print(GEN part, GEN where)
2231 {
2232   long l = lg(part);
2233   GEN p;
2234 
2235   err_printf("ifac partial factorization structure: %ld slots, ", (l-3)/3);
2236   if (MOEBIUS(part)) err_printf("Moebius mode, ");
2237   err_printf("hint = %ld\n", itos(HINT(part)));
2238   ifac_check(part, where);
2239   for (p = part+3; p < part + l; p += 3)
2240   {
2241     GEN v = VALUE(p), e = EXPON(p), c = CLASS(p);
2242     const char *s = "";
2243     if (!v) { err_printf("[empty slot]\n"); continue; }
2244     if (c == NULL) s = "unknown";
2245     else if (c == gen_0) s = "composite";
2246     else if (c == gen_1) s = "unfinished prime";
2247     else if (c == gen_2) s = "prime";
2248     else pari_err_BUG("unknown factor class");
2249     err_printf("[%Ps, %Ps, %s]\n", v, e, s);
2250   }
2251   err_printf("Done.\n");
2252 }
2253 
2254 static const long decomp_default_hint = 0;
2255 /* assume n > 0, which we can assign to */
2256 /* return initial data structure, see ifac_crack() for the hint argument */
2257 static GEN
ifac_start_hint(GEN n,int moebius,long hint)2258 ifac_start_hint(GEN n, int moebius, long hint)
2259 {
2260   const long ifac_initial_length = 3 + 7*3;
2261   /* codeword, moebius, hint, 7 slots -- a 512-bit product of distinct 8-bit
2262    * primes needs at most 7 slots at a time) */
2263   GEN here, part = cgetg(ifac_initial_length, t_VEC);
2264 
2265   MOEBIUS(part) = moebius? gen_1 : NULL;
2266   HINT(part) = stoi(hint);
2267   /* fill first slot at the top end */
2268   here = part + ifac_initial_length - 3; /* LAST(part) */
2269   INIT(here, n,gen_1,gen_0); /* n^1: composite */
2270   while ((here -= 3) > part) ifac_delete(here);
2271   return part;
2272 }
2273 GEN
ifac_start(GEN n,int moebius)2274 ifac_start(GEN n, int moebius)
2275 { return ifac_start_hint(n,moebius,decomp_default_hint); }
2276 
2277 /* Return next nonempty slot after 'here', NULL if none exist */
2278 static GEN
ifac_find(GEN partial)2279 ifac_find(GEN partial)
2280 {
2281   GEN scan, end = partial + lg(partial);
2282 
2283 #ifdef IFAC_DEBUG
2284   ifac_check(partial, partial);
2285 #endif
2286   for (scan = partial+3; scan < end; scan += 3)
2287     if (VALUE(scan)) return scan;
2288   return NULL;
2289 }
2290 
2291 /* Defragment: squeeze out unoccupied slots above *where. Unoccupied slots
2292  * arise when a composite factor dissolves completely whilst dividing off a
2293  * prime, or when ifac_resort() spots a coincidence and merges two factors.
2294  * Update *where */
2295 static void
ifac_defrag(GEN * partial,GEN * where)2296 ifac_defrag(GEN *partial, GEN *where)
2297 {
2298   GEN scan_new = LAST(*partial), scan_old;
2299 
2300   for (scan_old = scan_new; scan_old >= *where; scan_old -= 3)
2301   {
2302     if (!VALUE(scan_old)) continue; /* empty slot */
2303     if (scan_old < scan_new) SHALLOWCOPY(scan_old, scan_new);
2304     scan_new -= 3; /* point at next slot to be written */
2305   }
2306   scan_new += 3; /* back up to last slot written */
2307   *where = scan_new;
2308   while ((scan_new -= 3) > *partial) ifac_delete(scan_new); /* erase junk */
2309 }
2310 
2311 /* Move to a larger main vector, updating *where if it points into it, and
2312  * *partial in any case. Can be used as a specialized gcopy before
2313  * a gerepileupto() (pass 0 as the new length). Normally, one would pass
2314  * new_lg=1 to let this function guess the new size.  To be used sparingly.
2315  * Complex version of ifac_defrag(), combined with reallocation.  If new_lg
2316  * is 0, use the old length, so this acts just like gcopy except that the
2317  * 'where' pointer is carried along; if it is 1, we make an educated guess.
2318  * Exception:  If new_lg is 0, the vector is full to the brim, and the first
2319  * entry is composite, we make it longer to avoid being called again a
2320  * microsecond later. It is safe to call this with *where = NULL:
2321  * if it doesn't point anywhere within the old structure, it is left alone */
2322 static void
ifac_realloc(GEN * partial,GEN * where,long new_lg)2323 ifac_realloc(GEN *partial, GEN *where, long new_lg)
2324 {
2325   long old_lg = lg(*partial);
2326   GEN newpart, scan_new, scan_old;
2327 
2328   if (new_lg == 1)
2329     new_lg = 2*old_lg - 6;        /* from 7 slots to 13 to 25... */
2330   else if (new_lg <= old_lg)        /* includes case new_lg == 0 */
2331   {
2332     GEN first = *partial + 3;
2333     new_lg = old_lg;
2334     /* structure full and first entry composite or unknown */
2335     if (VALUE(first) && (CLASS(first) == gen_0 || CLASS(first)==NULL))
2336       new_lg += 6; /* give it a little more breathing space */
2337   }
2338   newpart = cgetg(new_lg, t_VEC);
2339   if (DEBUGMEM >= 3)
2340     err_printf("IFAC: new partial factorization structure (%ld slots)\n",
2341                (new_lg - 3)/3);
2342   MOEBIUS(newpart) = MOEBIUS(*partial);
2343   icopyifstack(HINT(*partial), HINT(newpart));
2344   /* Downward sweep through the old *partial. Pick up 'where' and carry it
2345    * over if we pass it. (Only useful if it pointed at a nonempty slot.)
2346    * Factors are COPY'd so that we again have a nice object (parent older
2347    * than children, connected), except the one factor that may still be living
2348    * in a clone where n originally was; exponents are similarly copied if they
2349    * aren't global constants; class-of-factor fields are global constants so we
2350    * need only copy them as pointers. Caller may then do a gerepileupto() */
2351   scan_new = newpart + new_lg - 3; /* LAST(newpart) */
2352   scan_old = *partial + old_lg - 3; /* LAST(*partial) */
2353   for (; scan_old > *partial + 2; scan_old -= 3)
2354   {
2355     if (*where == scan_old) *where = scan_new;
2356     if (!VALUE(scan_old)) continue; /* skip empty slots */
2357     COPY(scan_old, scan_new); scan_new -= 3;
2358   }
2359   scan_new += 3; /* back up to last slot written */
2360   while ((scan_new -= 3) > newpart) ifac_delete(scan_new);
2361   *partial = newpart;
2362 }
2363 
2364 /* Re-sort one (typically unknown) entry from washere to a new position,
2365  * rotating intervening entries upward to fill the vacant space. If the new
2366  * position is the same as the old one, or the new value of the entry coincides
2367  * with a value already occupying a lower slot, then we just add exponents (and
2368  * use the 'more known' class, and return 1 immediately when in Moebius mode).
2369  * Slots between *where and washere must be in sorted order, so a sweep using
2370  * this to re-sort several unknowns must proceed upward, see ifac_resort().
2371  * Bubble-sort-of-thing sort. Won't be exercised frequently, so this is ok */
2372 static void
ifac_sort_one(GEN * where,GEN washere)2373 ifac_sort_one(GEN *where, GEN washere)
2374 {
2375   GEN old, scan = washere - 3;
2376   GEN value, exponent, class0, class1;
2377   long cmp_res;
2378 
2379   if (scan < *where) return; /* nothing to do, washere==*where */
2380   value    = VALUE(washere);
2381   exponent = EXPON(washere);
2382   class0 = CLASS(washere);
2383   cmp_res = -1; /* sentinel */
2384   while (scan >= *where) /* at least once */
2385   {
2386     if (VALUE(scan))
2387     { /* current slot nonempty, check against where */
2388       cmp_res = cmpii(value, VALUE(scan));
2389       if (cmp_res >= 0) break; /* have found where to stop */
2390     }
2391     /* copy current slot upward by one position and move pointers down */
2392     SHALLOWCOPY(scan, scan+3);
2393     scan -= 3;
2394   }
2395   scan += 3;
2396   /* At this point there are the following possibilities:
2397    * 1) cmp_res == -1. Either value is less than that at *where, or *where was
2398    * pointing at vacant slots and any factors we saw en route were larger than
2399    * value. At any rate, scan == *where now, and scan is pointing at an empty
2400    * slot, into which we'll stash our entry.
2401    * 2) cmp_res == 0. The entry at scan-3 is the one, we compare class0
2402    * fields and add exponents, and put it all into the vacated scan slot,
2403    * NULLing the one at scan-3 (and possibly updating *where).
2404    * 3) cmp_res == 1. The slot at scan is the one to store our entry into. */
2405   if (cmp_res)
2406   {
2407     if (cmp_res < 0 && scan != *where)
2408       pari_err_BUG("ifact_sort_one [misaligned partial]");
2409     INIT(scan, value, exponent, class0); return;
2410   }
2411   /* case cmp_res == 0: repeated factor detected */
2412   if (DEBUGLEVEL >= 4)
2413     err_printf("IFAC: repeated factor %Ps\n\tin ifac_sort_one\n", value);
2414   old = scan - 3;
2415   /* if old class0 was composite and new is prime, or vice versa, complain
2416    * (and if one class0 was unknown and the other wasn't, use the known one) */
2417   class1 = CLASS(old);
2418   if (class0) /* should never be used */
2419   {
2420     if (class1)
2421     {
2422       if (class0 == gen_0 && class1 != gen_0)
2423         pari_err_BUG("ifac_sort_one (composite = prime)");
2424       else if (class0 != gen_0 && class1 == gen_0)
2425         pari_err_BUG("ifac_sort_one (prime = composite)");
2426       else if (class0 == gen_2)
2427         CLASS(scan) = class0;
2428     }
2429     else
2430       CLASS(scan) = class0;
2431   }
2432   /* else stay with the existing known class0 */
2433   CLASS(scan) = class1;
2434   /* in any case, add exponents */
2435   if (EXPON(old) == gen_1 && exponent == gen_1)
2436     EXPON(scan) = gen_2;
2437   else
2438     EXPON(scan) = addii(EXPON(old), exponent);
2439   /* move the value over and null out the vacated slot below */
2440   old = scan - 3;
2441   *scan = *old;
2442   ifac_delete(old);
2443   /* finally, see whether *where should be pulled in */
2444   if (old == *where) *where += 3;
2445 }
2446 
2447 /* Sort all current unknowns downward to where they belong. Sweeps in the
2448  * upward direction. Not needed after ifac_crack(), only when ifac_divide()
2449  * returned true. Update *where. */
2450 static void
ifac_resort(GEN * partial,GEN * where)2451 ifac_resort(GEN *partial, GEN *where)
2452 {
2453   GEN scan, end;
2454   ifac_defrag(partial, where); end = LAST(*partial);
2455   for (scan = *where; scan <= end; scan += 3)
2456     if (VALUE(scan) && !CLASS(scan)) ifac_sort_one(where, scan); /*unknown*/
2457   ifac_defrag(partial, where); /* remove newly created gaps */
2458 }
2459 
2460 /* Let x be a t_INT known not to have small divisors (< 2^14). Return 0 if x
2461  * is a proven composite. Return 1 if we believe it to be prime (fully proven
2462  * prime if factor_proven is set).  */
2463 int
ifac_isprime(GEN x)2464 ifac_isprime(GEN x)
2465 {
2466   if (!BPSW_psp_nosmalldiv(x)) return 0; /* composite */
2467   if (factor_proven && ! BPSW_isprime(x))
2468   {
2469     pari_warn(warner,
2470               "IFAC: pseudo-prime %Ps\n\tis not prime. PLEASE REPORT!\n", x);
2471     return 0;
2472   }
2473   return 1;
2474 }
2475 
2476 static int
ifac_checkprime(GEN x)2477 ifac_checkprime(GEN x)
2478 {
2479   int res = ifac_isprime(VALUE(x));
2480   CLASS(x) = res? gen_1: gen_0;
2481   if (DEBUGLEVEL>2) ifac_factor_dbg(x);
2482   return res;
2483 }
2484 
2485 /* Determine primality or compositeness of all current unknowns, and set
2486  * class Q primes to finished (class P) if everything larger is already
2487  * known to be prime.  When after_crack >= 0, only look at the
2488  * first after_crack things in the list (do nothing when it's 0) */
2489 static void
ifac_whoiswho(GEN * partial,GEN * where,long after_crack)2490 ifac_whoiswho(GEN *partial, GEN *where, long after_crack)
2491 {
2492   GEN scan, scan_end = LAST(*partial);
2493 
2494 #ifdef IFAC_DEBUG
2495   ifac_check(*partial, *where);
2496 #endif
2497   if (after_crack == 0) return;
2498   if (after_crack > 0) /* check at most after_crack entries */
2499     scan = *where + 3*(after_crack - 1); /* assert(scan <= scan_end) */
2500   else
2501     for (scan = scan_end; scan >= *where; scan -= 3)
2502     {
2503       if (CLASS(scan))
2504       { /* known class of factor */
2505         if (CLASS(scan) == gen_0) break;
2506         if (CLASS(scan) == gen_1)
2507         {
2508           if (DEBUGLEVEL>=3)
2509           {
2510             err_printf("IFAC: factor %Ps\n\tis prime (no larger composite)\n",
2511                        VALUE(*where));
2512             err_printf("IFAC: prime %Ps\n\tappears with exponent = %ld\n",
2513                        VALUE(*where), itos(EXPON(*where)));
2514           }
2515           CLASS(scan) = gen_2;
2516         }
2517         continue;
2518       }
2519       if (!ifac_checkprime(scan)) break; /* must disable Q-to-P */
2520       CLASS(scan) = gen_2; /* P_i, finished prime */
2521       if (DEBUGLEVEL>2) ifac_factor_dbg(scan);
2522     }
2523   /* go on, Q-to-P trick now disabled */
2524   for (; scan >= *where; scan -= 3)
2525   {
2526     if (CLASS(scan)) continue;
2527     (void)ifac_checkprime(scan); /* Qj | Ck */
2528   }
2529 }
2530 
2531 /* Divide all current composites by first (prime, class Q) entry, updating its
2532  * exponent, and turning it into a finished prime (class P).  Return 1 if any
2533  * such divisions succeeded  (in Moebius mode, the update may then not have
2534  * been completed), or 0 if none of them succeeded.  Doesn't modify *where.
2535  * Here we normally do not check that the first entry is a not-finished
2536  * prime.  Stack management: we may allocate a new exponent */
2537 static long
ifac_divide(GEN * partial,GEN * where,long moebius_mode)2538 ifac_divide(GEN *partial, GEN *where, long moebius_mode)
2539 {
2540   GEN scan, scan_end = LAST(*partial);
2541   long res = 0, exponent, newexp, otherexp;
2542 
2543 #ifdef IFAC_DEBUG
2544   ifac_check(*partial, *where);
2545   if (CLASS(*where) != gen_1)
2546     pari_err_BUG("ifac_divide [division by composite or finished prime]");
2547   if (!VALUE(*where)) pari_err_BUG("ifac_divide [division by nothing]");
2548 #endif
2549   newexp = exponent = itos(EXPON(*where));
2550   if (exponent > 1 && moebius_mode) return 1;
2551   /* should've been caught by caller */
2552 
2553   for (scan = *where+3; scan <= scan_end; scan += 3)
2554   {
2555     if (CLASS(scan) != gen_0) continue; /* the other thing ain't composite */
2556     otherexp = 0;
2557     /* divide in place to keep stack clutter minimal */
2558     while (dvdiiz(VALUE(scan), VALUE(*where), VALUE(scan)))
2559     {
2560       if (moebius_mode) return 1; /* immediately */
2561       if (!otherexp) otherexp = itos(EXPON(scan));
2562       newexp += otherexp;
2563     }
2564     if (newexp > exponent)        /* did anything happen? */
2565     {
2566       EXPON(*where) = (newexp == 2 ? gen_2 : utoipos(newexp));
2567       exponent = newexp;
2568       if (is_pm1((GEN)*scan)) /* factor dissolved completely */
2569       {
2570         ifac_delete(scan);
2571         if (DEBUGLEVEL >= 4)
2572           err_printf("IFAC: a factor was a power of another prime factor\n");
2573       } else {
2574         CLASS(scan) = NULL;        /* at any rate it's Unknown now */
2575         if (DEBUGLEVEL >= 4)
2576           err_printf("IFAC: a factor was divisible by another prime factor,\n"
2577                      "\tleaving a cofactor = %Ps\n", VALUE(scan));
2578       }
2579       res = 1;
2580       if (DEBUGLEVEL >= 5)
2581         err_printf("IFAC: prime %Ps\n\tappears at least to the power %ld\n",
2582                    VALUE(*where), newexp);
2583     }
2584   } /* for */
2585   CLASS(*where) = gen_2; /* make it a finished prime */
2586   if (DEBUGLEVEL >= 3)
2587     err_printf("IFAC: prime %Ps\n\tappears with exponent = %ld\n",
2588                VALUE(*where), newexp);
2589   return res;
2590 }
2591 
2592 /* found out our integer was factor^exp. Update */
2593 static void
update_pow(GEN where,GEN factor,long exp,pari_sp * av)2594 update_pow(GEN where, GEN factor, long exp, pari_sp *av)
2595 {
2596   GEN ex = EXPON(where);
2597   if (DEBUGLEVEL>3)
2598     err_printf("IFAC: found %Ps =\n\t%Ps ^%ld\n", *where, factor, exp);
2599   affii(factor, VALUE(where)); set_avma(*av);
2600   if (ex == gen_1)
2601   { EXPON(where) = exp == 2? gen_2: utoipos(exp); *av = avma; }
2602   else if (ex == gen_2)
2603   { EXPON(where) = utoipos(exp<<1); *av = avma; }
2604   else
2605     affsi(exp * itos(ex), EXPON(where));
2606 }
2607 /* hint = 0 : Use a default strategy
2608  * hint & 1 : avoid MPQS
2609  * hint & 2 : avoid first-stage ECM (may fall back to ECM if MPQS gives up)
2610  * hint & 4 : avoid Pollard and SQUFOF stages.
2611  * hint & 8 : avoid final ECM; may flag a composite as prime. */
2612 #define get_hint(partial) (itos(HINT(*partial)) & 15)
2613 
2614 /* Complete ifac_crack's job when a factoring engine splits the current factor
2615  * into a product of three or more new factors. Makes room for them if
2616  * necessary, sorts them, gives them the right exponents and class. Returns the
2617  * number of factors actually written, which may be less than #facvec if there
2618  * are duplicates. Vectors of factors (cf pollardbrent() or mpqs()) contain
2619  * 'slots' of 3 GENs per factor, interpreted as in our partial factorization
2620  * data structure. Thus engines can tell us what they already know about
2621  * factors being prime/composite or appearing to a power larger than thefirst.
2622  * Don't collect garbage.  No diagnostics: engine has printed what it found.
2623  * facvec contains slots of three components per factor; repeated factors are
2624  * allowed (their classes shouldn't contradict each other whereas their
2625  * exponents will be added up) */
2626 static long
ifac_insert_multiplet(GEN * partial,GEN * where,GEN facvec,long moebius_mode)2627 ifac_insert_multiplet(GEN *partial, GEN *where, GEN facvec, long moebius_mode)
2628 {
2629   long j,k=1, lfv=lg(facvec)-1, nf=lfv/3, room=(long)(*where-*partial);
2630   /* one of the factors will go into the *where slot, so room is now 3 times
2631    * the number of slots we can use */
2632   long needroom = lfv - room;
2633   GEN newexp, cur, sorted, auxvec = cgetg(nf+1, t_VEC), factor;
2634   long E = itos(EXPON(*where)); /* the old exponent */
2635 
2636   if (DEBUGLEVEL >= 5) /* squfof may return a single squared factor as a set */
2637     err_printf("IFAC: incorporating set of %ld factor(s)\n", nf);
2638   if (needroom > 0) ifac_realloc(partial, where, lg(*partial) + needroom);
2639 
2640   /* create sort permutation from the values of the factors */
2641   for (j=nf; j; j--) gel(auxvec,j) = gel(facvec,3*j-2);
2642   sorted = ZV_indexsort(auxvec);
2643   /* store factors, beginning at *where, and catching any duplicates */
2644   cur = facvec + 3*sorted[nf]-2; /* adjust for triple spacing */
2645   VALUE(*where) = VALUE(cur);
2646   newexp = EXPON(cur);
2647   /* if new exponent is 1, the old exponent already in place will do */
2648   if (newexp != gen_1) EXPON(*where) = mului(E,newexp);
2649   CLASS(*where) = CLASS(cur);
2650   if (DEBUGLEVEL >= 6) err_printf("\tstored (largest) factor no. %ld...\n", nf);
2651 
2652   for (j=nf-1; j; j--)
2653   {
2654     GEN e;
2655     cur = facvec + 3*sorted[j]-2;
2656     factor = VALUE(cur);
2657     if (equalii(factor, VALUE(*where)))
2658     {
2659       if (DEBUGLEVEL >= 6)
2660         err_printf("\tfactor no. %ld is a duplicate%s\n", j, (j>1? "...": ""));
2661       /* update exponent, ignore class which would already have been set,
2662        * then forget current factor */
2663       newexp = EXPON(cur);
2664       if (newexp != gen_1) /* new exp > 1 */
2665         e = addiu(EXPON(*where), E * itou(newexp));
2666       else if (EXPON(*where) == gen_1 && E == 1)
2667         e = gen_2;
2668       else
2669         e = addiu(EXPON(*where), E);
2670       EXPON(*where) = e;
2671 
2672       if (moebius_mode) return 0; /* stop now, with exponent updated */
2673       continue;
2674     }
2675 
2676     *where -= 3;
2677     CLASS(*where) = CLASS(cur); /* class as given */
2678     newexp = EXPON(cur);
2679     if (newexp != gen_1) /* new exp > 1 */
2680       e = (E == 1 && newexp == gen_2)? gen_2: mului(E, newexp);
2681     else /* inherit parent's exponent */
2682       e = (E == 1 ? gen_1 : (E == 2 ? gen_2 : utoipos(E)));
2683     EXPON(*where) = e;
2684     /* keep components younger than *partial */
2685     icopyifstack(factor, VALUE(*where));
2686     k++;
2687     if (DEBUGLEVEL >= 6)
2688       err_printf("\tfactor no. %ld was unique%s\n", j, j>1? " (so far)...": "");
2689   }
2690   return k;
2691 }
2692 
2693 /* Split the first (composite) entry.  There _must_ already be room for another
2694  * factor below *where, and *where is updated. Two cases:
2695  * - entry = factor^k is a pure power: factor^k is inserted, leaving *where
2696  *   unchanged;
2697  * - entry = factor * cofactor (not necessarily coprime): both factors are
2698  *   inserted in the correct order, updating *where
2699  * The inserted factors class is set to unknown, they inherit the exponent
2700  * (or a multiple thereof) of their ancestor.
2701  *
2702  * Returns number of factors written into the structure, normally 2 (1 if pure
2703  * power, maybe > 2 if a factoring engine returned a vector of factors instead
2704  * of a single factor). Can reallocate the data structure in the
2705  * vector-of-factors case, not in the most common single-factor case.
2706  * Stack housekeeping:  this routine may create one or more objects  (a new
2707  * factor, or possibly several, and perhaps one or more new exponents > 2) */
2708 static long
ifac_crack(GEN * partial,GEN * where,long moebius_mode)2709 ifac_crack(GEN *partial, GEN *where, long moebius_mode)
2710 {
2711   long cmp_res, hint = get_hint(partial);
2712   GEN factor, exponent;
2713 
2714 #ifdef IFAC_DEBUG
2715   ifac_check(*partial, *where);
2716   if (*where < *partial + 6)
2717     pari_err_BUG("ifac_crack ['*where' out of bounds]");
2718   if (!(VALUE(*where)) || typ(VALUE(*where)) != t_INT)
2719     pari_err_BUG("ifac_crack [incorrect VALUE(*where)]");
2720   if (CLASS(*where) != gen_0)
2721     pari_err_BUG("ifac_crack [operand not known composite]");
2722 #endif
2723 
2724   if (DEBUGLEVEL>2) {
2725     err_printf("IFAC: cracking composite\n\t%Ps\n", **where);
2726     if (DEBUGLEVEL>3) err_printf("IFAC: checking for pure square\n");
2727   }
2728   /* MPQS cannot factor prime powers. Look for pure powers even if MPQS is
2729    * blocked by hint: fast and useful in bounded factorization */
2730   {
2731     forprime_t T;
2732     ulong exp = 1, mask = 7;
2733     long good = 0;
2734     pari_sp av = avma;
2735     (void)u_forprime_init(&T, 11, ULONG_MAX);
2736     /* crack squares */
2737     while (Z_issquareall(VALUE(*where), &factor))
2738     {
2739       good = 1; /* remember we succeeded once */
2740       update_pow(*where, factor, 2, &av);
2741       if (moebius_mode) return 0; /* no need to carry on */
2742     }
2743     while ( (exp = is_357_power(VALUE(*where), &factor, &mask)) )
2744     {
2745       good = 1; /* remember we succeeded once */
2746       update_pow(*where, factor, exp, &av);
2747       if (moebius_mode) return 0; /* no need to carry on */
2748     }
2749     /* cutoff at 14 bits as trial division must have found everything below */
2750     while ( (exp = is_pth_power(VALUE(*where), &factor, &T, 15)) )
2751     {
2752       good = 1; /* remember we succeeded once */
2753       update_pow(*where, factor, exp, &av);
2754       if (moebius_mode) return 0; /* no need to carry on */
2755     }
2756 
2757     if (good && hint != 15 && ifac_checkprime(*where))
2758     { /* our composite was a prime power */
2759       if (DEBUGLEVEL>3)
2760         err_printf("IFAC: factor %Ps\n\tis prime\n", VALUE(*where));
2761       return 0; /* bypass subsequent ifac_whoiswho() call */
2762     }
2763   } /* pure power stage */
2764 
2765   factor = NULL;
2766   if (!(hint & 4))
2767   { /* pollardbrent() Rho usually gets a first chance */
2768     if (DEBUGLEVEL >= 4) err_printf("IFAC: trying Pollard-Brent rho method\n");
2769     factor = pollardbrent(VALUE(*where));
2770     if (!factor)
2771     { /* Shanks' squfof() */
2772       if (DEBUGLEVEL >= 4)
2773         err_printf("IFAC: trying Shanks' SQUFOF, will fail silently if input\n"
2774                    "      is too large for it.\n");
2775       factor = squfof(VALUE(*where));
2776     }
2777   }
2778   if (!factor && !(hint & 2))
2779   { /* First ECM stage */
2780     if (DEBUGLEVEL >= 4) err_printf("IFAC: trying Lenstra-Montgomery ECM\n");
2781     factor = ellfacteur(VALUE(*where), 0); /* do not insist */
2782   }
2783   if (!factor && !(hint & 1))
2784   { /* MPQS stage */
2785     if (DEBUGLEVEL >= 4) err_printf("IFAC: trying MPQS\n");
2786     factor = mpqs(VALUE(*where));
2787   }
2788   if (!factor)
2789   {
2790     if (!(hint & 8))
2791     { /* still no luck? Final ECM stage, guaranteed to succeed */
2792       if (DEBUGLEVEL >= 4)
2793         err_printf("IFAC: forcing ECM, may take some time\n");
2794       factor = ellfacteur(VALUE(*where), 1);
2795     }
2796     else
2797     { /* limited factorization */
2798       if (DEBUGLEVEL >= 2)
2799       {
2800         if (hint != 15)
2801           pari_warn(warner, "IFAC: unfactored composite declared prime");
2802         else
2803           pari_warn(warner, "IFAC: untested integer declared prime");
2804 
2805         /* don't print it out at level 3 or above, where it would appear
2806          * several times before and after this message already */
2807         if (DEBUGLEVEL == 2) err_printf("\t%Ps\n", VALUE(*where));
2808       }
2809       CLASS(*where) = gen_1; /* might as well trial-divide by it... */
2810       return 1;
2811     }
2812   }
2813   if (typ(factor) == t_VEC) /* delegate this case */
2814     return ifac_insert_multiplet(partial, where, factor, moebius_mode);
2815   /* typ(factor) == t_INT */
2816   /* got single integer back:  work out the cofactor (in place) */
2817   if (!dvdiiz(VALUE(*where), factor, VALUE(*where)))
2818   {
2819     err_printf("IFAC: factoring %Ps\n", VALUE(*where));
2820     err_printf("\tyielded 'factor' %Ps\n\twhich isn't!\n", factor);
2821     pari_err_BUG("factoring");
2822   }
2823   /* factoring engines report the factor found; tell about the cofactor */
2824   if (DEBUGLEVEL >= 4) err_printf("IFAC: cofactor = %Ps\n", VALUE(*where));
2825 
2826   /* The two factors are 'factor' and VALUE(*where), find out which is larger */
2827   cmp_res = cmpii(factor, VALUE(*where));
2828   CLASS(*where) = NULL; /* mark factor /cofactor 'unknown' */
2829   exponent = EXPON(*where);
2830   *where -= 3;
2831   CLASS(*where) = NULL; /* mark factor /cofactor 'unknown' */
2832   icopyifstack(exponent, EXPON(*where));
2833   if (cmp_res < 0)
2834     VALUE(*where) = factor; /* common case */
2835   else if (cmp_res > 0)
2836   { /* factor > cofactor, rearrange */
2837     GEN old = *where + 3;
2838     VALUE(*where) = VALUE(old); /* move cofactor pointer to lowest slot */
2839     VALUE(old) = factor; /* save factor */
2840   }
2841   else pari_err_BUG("ifac_crack [Z_issquareall miss]");
2842   return 2;
2843 }
2844 
2845 /* main loop:  iterate until smallest entry is a finished prime;  returns
2846  * a 'where' pointer, or NULL if nothing left, or gen_0 in Moebius mode if
2847  * we aren't squarefree */
2848 static GEN
ifac_main(GEN * partial)2849 ifac_main(GEN *partial)
2850 {
2851   const long moebius_mode = !!MOEBIUS(*partial);
2852   GEN here = ifac_find(*partial);
2853   long nf;
2854 
2855   if (!here) return NULL; /* nothing left */
2856   /* loop until first entry is a finished prime.  May involve reallocations,
2857    * thus updates of *partial */
2858   while (CLASS(here) != gen_2)
2859   {
2860     if (CLASS(here) == gen_0) /* composite: crack it */
2861     { /* make sure there's room for another factor */
2862       if (here < *partial + 6)
2863       {
2864         ifac_defrag(partial, &here);
2865         if (here < *partial + 6) ifac_realloc(partial, &here, 1); /* no luck */
2866       }
2867       nf = ifac_crack(partial, &here, moebius_mode);
2868       if (moebius_mode && EXPON(here) != gen_1) /* that was a power */
2869       {
2870         if (DEBUGLEVEL >= 3)
2871           err_printf("IFAC: main loop: repeated new factor\n\t%Ps\n", *here);
2872         return gen_0;
2873       }
2874       /* deal with the new unknowns.  No sort: ifac_crack did it */
2875       ifac_whoiswho(partial, &here, nf);
2876       continue;
2877     }
2878     if (CLASS(here) == gen_1) /* prime but not yet finished: finish it */
2879     {
2880       if (ifac_divide(partial, &here, moebius_mode))
2881       {
2882         if (moebius_mode)
2883         {
2884           if (DEBUGLEVEL >= 3)
2885             err_printf("IFAC: main loop: another factor was divisible by\n"
2886                        "\t%Ps\n", *here);
2887           return gen_0;
2888         }
2889         ifac_resort(partial, &here); /* sort new cofactors down */
2890         ifac_whoiswho(partial, &here, -1);
2891       }
2892       continue;
2893     }
2894     pari_err_BUG("ifac_main [nonexistent factor class]");
2895   } /* while */
2896   if (moebius_mode && EXPON(here) != gen_1)
2897   {
2898     if (DEBUGLEVEL >= 3)
2899       err_printf("IFAC: after main loop: repeated old factor\n\t%Ps\n", *here);
2900     return gen_0;
2901   }
2902   if (DEBUGLEVEL >= 4)
2903   {
2904     nf = (*partial + lg(*partial) - here - 3)/3;
2905     if (nf)
2906       err_printf("IFAC: main loop: %ld factor%s left\n", nf, (nf>1)? "s": "");
2907     else
2908       err_printf("IFAC: main loop: this was the last factor\n");
2909   }
2910   if (factor_add_primes && !(get_hint(partial) & 8))
2911   {
2912     GEN p = VALUE(here);
2913     if (lgefint(p)>3 || uel(p,2) > 0x1000000UL) (void)addprimes(p);
2914   }
2915   return here;
2916 }
2917 
2918 /* Encapsulated routines */
2919 
2920 /* prime/exponent pairs need to appear contiguously on the stack, but we also
2921  * need our data structure somewhere, and we don't know in advance how many
2922  * primes will turn up.  The following discipline achieves this:  When
2923  * ifac_decomp() is called, n should point at an object older than the oldest
2924  * small prime/exponent pair  (ifactor() guarantees this).
2925  * We allocate sufficient space to accommodate several pairs -- eleven pairs
2926  * ought to fit in a space not much larger than n itself -- before calling
2927  * ifac_start().  If we manage to complete the factorization before we run out
2928  * of space, we free the data structure and cull the excess reserved space
2929  * before returning.  When we do run out, we have to leapfrog to generate more
2930  * (guesstimating the requirements from what is left in the partial
2931  * factorization structure);  room for fresh pairs is allocated at the head of
2932  * the stack, followed by an ifac_realloc() to reconnect the data structure and
2933  * move it out of the way, followed by a few pointer tweaks to connect the new
2934  * pairs space to the old one. This whole affair translates into a surprisingly
2935  * compact routine. */
2936 
2937 /* find primary factors of n; destroy n */
2938 static long
ifac_decomp(GEN n,long hint)2939 ifac_decomp(GEN n, long hint)
2940 {
2941   pari_sp av = avma;
2942   long nb = 0;
2943   GEN part, here, workspc, pairs = (GEN)av;
2944 
2945   /* workspc will be doled out in pairs of smaller t_INTs. For n = prod p^{e_p}
2946    * (p not necessarily prime), need room to store all p and e_p [ cgeti(3) ],
2947    * bounded by
2948    *    sum_{p | n} ( log_{2^BIL} (p) + 6 ) <= log_{2^BIL} n + 6 log_2 n */
2949   workspc = new_chunk((expi(n) + 1) * 7);
2950   part = ifac_start_hint(n, 0, hint);
2951   for (;;)
2952   {
2953     here = ifac_main(&part);
2954     if (!here) break;
2955     if (gc_needed(av,1))
2956     {
2957       long offset;
2958       if(DEBUGMEM>1)
2959       {
2960         pari_warn(warnmem,"[2] ifac_decomp");
2961         ifac_print(part, here);
2962       }
2963       ifac_realloc(&part, &here, 0);
2964       offset = here - part;
2965       part = gerepileupto((pari_sp)workspc, part);
2966       here = part + offset;
2967     }
2968     nb++;
2969     pairs = icopy_avma(VALUE(here), (pari_sp)pairs);
2970     pairs = icopy_avma(EXPON(here), (pari_sp)pairs);
2971     ifac_delete(here);
2972   }
2973   set_avma((pari_sp)pairs);
2974   if (DEBUGLEVEL >= 3)
2975     err_printf("IFAC: found %ld large prime (power) factor%s.\n",
2976                nb, (nb>1? "s": ""));
2977   return nb;
2978 }
2979 
2980 /***********************************************************************/
2981 /**            ARITHMETIC FUNCTIONS WITH EARLY-ABORT                  **/
2982 /**  needing direct access to the factoring machinery to avoid work:  **/
2983 /**  e.g. if we find a square factor, moebius returns 0, core doesn't **/
2984 /**  need to factor it, etc.                                          **/
2985 /***********************************************************************/
2986 /* memory management */
2987 static void
ifac_GC(pari_sp av,GEN * part)2988 ifac_GC(pari_sp av, GEN *part)
2989 {
2990   GEN here = NULL;
2991   if(DEBUGMEM>1) pari_warn(warnmem,"ifac_xxx");
2992   ifac_realloc(part, &here, 0);
2993   *part = gerepileupto(av, *part);
2994 }
2995 
2996 /* destroys n */
2997 static long
ifac_moebius(GEN n)2998 ifac_moebius(GEN n)
2999 {
3000   long mu = 1;
3001   pari_sp av = avma;
3002   GEN part = ifac_start(n, 1);
3003   for(;;)
3004   {
3005     long v;
3006     GEN p;
3007     if (!ifac_next(&part,&p,&v)) return v? 0: mu;
3008     mu = -mu;
3009     if (gc_needed(av,1)) ifac_GC(av,&part);
3010   }
3011 }
3012 
3013 int
ifac_read(GEN part,GEN * p,long * e)3014 ifac_read(GEN part, GEN *p, long *e)
3015 {
3016   GEN here = ifac_find(part);
3017   if (!here) return 0;
3018   *p = VALUE(here);
3019   *e = EXPON(here)[2];
3020   return 1;
3021 }
3022 void
ifac_skip(GEN part)3023 ifac_skip(GEN part)
3024 {
3025   GEN here = ifac_find(part);
3026   if (here) ifac_delete(here);
3027 }
3028 
3029 /* destroys n */
3030 static int
ifac_ispowerful(GEN n)3031 ifac_ispowerful(GEN n)
3032 {
3033   pari_sp av = avma;
3034   GEN part = ifac_start(n, 0);
3035   for(;;)
3036   {
3037     long e;
3038     GEN p;
3039     if (!ifac_read(part,&p,&e)) return 1;
3040     /* power: skip */
3041     if (e != 1 || Z_isanypower(p,NULL)) { ifac_skip(part); continue; }
3042     if (!ifac_next(&part,&p,&e)) return 1;
3043     if (e == 1) return 0;
3044     if (gc_needed(av,1)) ifac_GC(av,&part);
3045   }
3046 }
3047 /* destroys n */
3048 static GEN
ifac_core(GEN n)3049 ifac_core(GEN n)
3050 {
3051   GEN m = gen_1, c = cgeti(lgefint(n));
3052   pari_sp av = avma;
3053   GEN part = ifac_start(n, 0);
3054   for(;;)
3055   {
3056     long e;
3057     GEN p;
3058     if (!ifac_read(part,&p,&e)) return m;
3059     /* square: skip */
3060     if (!odd(e) || Z_issquare(p)) { ifac_skip(part); continue; }
3061     if (!ifac_next(&part,&p,&e)) return m;
3062     if (odd(e)) m = mulii(m, p);
3063     if (gc_needed(av,1)) { affii(m,c); m=c; ifac_GC(av,&part); }
3064   }
3065 }
3066 
3067 /* Where to stop trial dividing in factorization. Guaranteed >= 2^14 */
3068 ulong
tridiv_bound(GEN n)3069 tridiv_bound(GEN n)
3070 {
3071   ulong l = (ulong)expi(n) + 1;
3072   if (l <= 32)  return 1UL<<14;
3073   if (l <= 512) return (l-16) << 10;
3074   return 1UL<<19; /* Rho is generally faster above this */
3075 }
3076 
3077 /* return a value <= (48 << 10) = 49152 < primelinit */
3078 static ulong
utridiv_bound(ulong n)3079 utridiv_bound(ulong n)
3080 {
3081 #ifdef LONG_IS_64BIT
3082   if (n & HIGHMASK)
3083     return ((ulong)expu(n) + 1 - 16) << 10;
3084 #else
3085   (void)n;
3086 #endif
3087   return 1UL<<14;
3088 }
3089 
3090 /* destroys n */
3091 static void
ifac_factoru(GEN n,long hint,GEN P,GEN E,long * pi)3092 ifac_factoru(GEN n, long hint, GEN P, GEN E, long *pi)
3093 {
3094   GEN part = ifac_start_hint(n, 0, hint);
3095   for(;;)
3096   {
3097     long v;
3098     GEN p;
3099     if (!ifac_next(&part,&p,&v)) return;
3100     P[*pi] = itou(p);
3101     E[*pi] = v;
3102     (*pi)++;
3103   }
3104 }
3105 /* destroys n */
3106 static long
ifac_moebiusu(GEN n)3107 ifac_moebiusu(GEN n)
3108 {
3109   GEN part = ifac_start(n, 1);
3110   long s = 1;
3111   for(;;)
3112   {
3113     long v;
3114     GEN p;
3115     if (!ifac_next(&part,&p,&v)) return v? 0: s;
3116     s = -s;
3117   }
3118 }
3119 
3120 INLINE ulong
u_forprime_next_fast(forprime_t * T)3121 u_forprime_next_fast(forprime_t *T)
3122 {
3123   if (*(T->d))
3124   {
3125     NEXT_PRIME_VIADIFF(T->p, T->d);
3126     return T->p > T->b ? 0: T->p;
3127   }
3128   return u_forprime_next(T);
3129 }
3130 
3131 /* Factor n and output [p,e] where
3132  * p, e are vecsmall with n = prod{p[i]^e[i]}. If all != 0:
3133  * if pU is not NULL, set it to unfactored composite; else include it
3134  * in factorization */
3135 static GEN
factoru_sign(ulong n,ulong all,long hint,ulong * pU1,ulong * pU2)3136 factoru_sign(ulong n, ulong all, long hint, ulong *pU1, ulong *pU2)
3137 {
3138   GEN f, E, E2, P, P2;
3139   pari_sp av;
3140   ulong p, lim;
3141   long i;
3142   forprime_t S;
3143 
3144   if (pU1) *pU1 = *pU2 = 1;
3145   if (n == 0) retmkvec2(mkvecsmall(0), mkvecsmall(1));
3146   if (n == 1) retmkvec2(cgetg(1,t_VECSMALL), cgetg(1,t_VECSMALL));
3147 
3148   f = cgetg(3,t_VEC); av = avma;
3149   lim = all; if (!lim) lim = utridiv_bound(n);
3150   /* enough room to store <= 15 primes and exponents (OK if n < 2^64) */
3151   (void)new_chunk(16*2);
3152   P = cgetg(16, t_VECSMALL); i = 1;
3153   E = cgetg(16, t_VECSMALL);
3154   if (lim > 2)
3155   {
3156     long v = vals(n), oldi;
3157     if (v)
3158     {
3159       P[1] = 2; E[1] = v; i = 2;
3160       n >>= v; if (n == 1) goto END;
3161     }
3162     u_forprime_init(&S, 3, lim-1);
3163     oldi = i;
3164     while ( (p = u_forprime_next_fast(&S)) )
3165     {
3166       int stop;
3167       /* tiny integers without small factors are often primes */
3168       if (p == 673)
3169       {
3170         oldi = i;
3171         if (uisprime_661(n)) { P[i] = n; E[i] = 1; i++; goto END; }
3172       }
3173       v = u_lvalrem_stop(&n, p, &stop);
3174       if (v) {
3175         P[i] = p;
3176         E[i] = v; i++;
3177       }
3178       if (stop) {
3179         if (n != 1) { P[i] = n; E[i] = 1; i++; }
3180         goto END;
3181       }
3182     }
3183     if (oldi != i && !all && uisprime_661(n))
3184     { P[i] = n; E[i] = 1; i++; goto END; }
3185   }
3186   if (all)
3187   { /* smallfact: look for easy pure powers then stop */
3188 #ifdef LONG_IS_64BIT
3189     ulong mask = all > 563 ? (all > 7129 ? 1: 3): 7;
3190 #else
3191     ulong mask = all > 22 ? (all > 83 ? 1: 3): 7;
3192 #endif
3193     long k = 1, ex;
3194     while (uissquareall(n, &n)) k <<= 1;
3195     while ( (ex = uis_357_power(n, &n, &mask)) ) k *= ex;
3196     if (pU1 && !uisprime(n))
3197     { *pU1 = n; *pU2 = (ulong)k; }
3198     else
3199     { P[i] = n; E[i] = k; i++; }
3200     goto END;
3201   }
3202   {
3203     GEN perm;
3204     ifac_factoru(utoipos(n), hint, P, E, &i);
3205     setlg(P, i);
3206     perm = vecsmall_indexsort(P);
3207     P = vecsmallpermute(P, perm);
3208     E = vecsmallpermute(E, perm);
3209   }
3210 END:
3211   set_avma(av);
3212   P2 = cgetg(i, t_VECSMALL); gel(f,1) = P2;
3213   E2 = cgetg(i, t_VECSMALL); gel(f,2) = E2;
3214   while (--i >= 1) { P2[i] = P[i]; E2[i] = E[i]; }
3215   return f;
3216 }
3217 GEN
factoru(ulong n)3218 factoru(ulong n)
3219 { return factoru_sign(n, 0, decomp_default_hint, NULL, NULL); }
3220 
3221 ulong
radicalu(ulong n)3222 radicalu(ulong n)
3223 {
3224   pari_sp av = avma;
3225   return gc_long(av, zv_prod(gel(factoru(n),1)));
3226 }
3227 
3228 long
moebiusu_fact(GEN f)3229 moebiusu_fact(GEN f)
3230 {
3231   GEN E = gel(f,2);
3232   long i, l = lg(E);
3233   for (i = 1; i < l; i++)
3234     if (E[i] > 1) return 0;
3235   return odd(l)? 1: -1;
3236 }
3237 
3238 long
moebiusu(ulong n)3239 moebiusu(ulong n)
3240 {
3241   pari_sp av;
3242   ulong p;
3243   long s, v, test_prime;
3244   forprime_t S;
3245 
3246   switch(n)
3247   {
3248     case 0: (void)check_arith_non0(gen_0,"moebius");/*error*/
3249     case 1: return  1;
3250     case 2: return -1;
3251   }
3252   v = vals(n);
3253   if (v == 0)
3254     s = 1;
3255   else
3256   {
3257     if (v > 1) return 0;
3258     n >>= 1;
3259     s = -1;
3260   }
3261   av = avma;
3262   u_forprime_init(&S, 3, utridiv_bound(n));
3263   test_prime = 0;
3264   while ((p = u_forprime_next_fast(&S)))
3265   {
3266     int stop;
3267     /* tiny integers without small factors are often primes */
3268     if (p == 673)
3269     {
3270       test_prime = 0;
3271       if (uisprime_661(n)) return gc_long(av,-s);
3272     }
3273     v = u_lvalrem_stop(&n, p, &stop);
3274     if (v) {
3275       if (v > 1) return gc_long(av,0);
3276       test_prime = 1;
3277       s = -s;
3278     }
3279     if (stop) return gc_long(av, n==1? s: -s);
3280   }
3281   set_avma(av);
3282   if (test_prime && uisprime_661(n)) return -s;
3283   else
3284   {
3285     long t = ifac_moebiusu(utoipos(n));
3286     set_avma(av);
3287     if (t == 0) return 0;
3288     return (s == t)? 1: -1;
3289   }
3290 }
3291 
3292 long
moebius(GEN n)3293 moebius(GEN n)
3294 {
3295   pari_sp av = avma;
3296   GEN F;
3297   ulong p;
3298   long i, l, s, v;
3299   forprime_t S;
3300 
3301   if ((F = check_arith_non0(n,"moebius")))
3302   {
3303     GEN E;
3304     F = clean_Z_factor(F);
3305     E = gel(F,2);
3306     l = lg(E);
3307     for(i = 1; i < l; i++)
3308       if (!equali1(gel(E,i))) return gc_long(av,0);
3309     return gc_long(av, odd(l)? 1: -1);
3310   }
3311   if (lgefint(n) == 3) return moebiusu(uel(n,2));
3312   p = mod4(n); if (!p) return 0;
3313   if (p == 2) { s = -1; n = shifti(n, -1); } else { s = 1; n = icopy(n); }
3314   setabssign(n);
3315 
3316   u_forprime_init(&S, 3, tridiv_bound(n));
3317   while ((p = u_forprime_next_fast(&S)))
3318   {
3319     int stop;
3320     v = Z_lvalrem_stop(&n, p, &stop);
3321     if (v)
3322     {
3323       if (v > 1) return gc_long(av,0);
3324       s = -s;
3325       if (stop) return gc_long(av, is_pm1(n)? s: -s);
3326     }
3327   }
3328   l = lg(primetab);
3329   for (i = 1; i < l; i++)
3330   {
3331     v = Z_pvalrem(n, gel(primetab,i), &n);
3332     if (v)
3333     {
3334       if (v > 1) return gc_long(av,0);
3335       s = -s;
3336       if (is_pm1(n)) return gc_long(av,s);
3337     }
3338   }
3339   if (ifac_isprime(n)) return gc_long(av,-s);
3340   /* large composite without small factors */
3341   v = ifac_moebius(n);
3342   return gc_long(av, s < 0? -v: v); /* correct also if v==0 */
3343 }
3344 
3345 long
ispowerful(GEN n)3346 ispowerful(GEN n)
3347 {
3348   pari_sp av = avma;
3349   GEN F;
3350   ulong p, bound;
3351   long i, l, v;
3352   forprime_t S;
3353 
3354   if ((F = check_arith_all(n, "ispowerful")))
3355   {
3356     GEN p, P = gel(F,1), E = gel(F,2);
3357     if (lg(P) == 1) return 1; /* 1 */
3358     p = gel(P,1);
3359     if (!signe(p)) return 1; /* 0 */
3360     i = is_pm1(p)? 2: 1; /* skip -1 */
3361     l = lg(E);
3362     for (; i < l; i++)
3363       if (equali1(gel(E,i))) return 0;
3364     return 1;
3365   }
3366   if (!signe(n)) return 1;
3367 
3368   if (mod4(n) == 2) return 0;
3369   n = shifti(n, -vali(n));
3370   if (is_pm1(n)) return 1;
3371   setabssign(n);
3372   bound = tridiv_bound(n);
3373   u_forprime_init(&S, 3, bound);
3374   while ((p = u_forprime_next_fast(&S)))
3375   {
3376     int stop;
3377     v = Z_lvalrem_stop(&n, p, &stop);
3378     if (v)
3379     {
3380       if (v == 1) return gc_long(av,0);
3381       if (stop) return gc_long(av, is_pm1(n));
3382     }
3383   }
3384   l = lg(primetab);
3385   for (i = 1; i < l; i++)
3386   {
3387     v = Z_pvalrem(n, gel(primetab,i), &n);
3388     if (v)
3389     {
3390       if (v == 1) return gc_long(av,0);
3391       if (is_pm1(n)) return gc_long(av,1);
3392     }
3393   }
3394   /* no need to factor: must be p^2 or not powerful */
3395   if (cmpii(powuu(bound+1, 3), n) > 0) return gc_long(av,  Z_issquare(n));
3396 
3397   if (ifac_isprime(n)) return gc_long(av,0);
3398   /* large composite without small factors */
3399   return gc_long(av, ifac_ispowerful(n));
3400 }
3401 
3402 ulong
coreu_fact(GEN f)3403 coreu_fact(GEN f)
3404 {
3405   GEN P = gel(f,1), E = gel(f,2);
3406   long i, l = lg(P), m = 1;
3407   for (i = 1; i < l; i++)
3408   {
3409     ulong p = P[i], e = E[i];
3410     if (e & 1) m *= p;
3411   }
3412   return m;
3413 }
3414 ulong
coreu(ulong n)3415 coreu(ulong n)
3416 {
3417   pari_sp av;
3418   if (n == 0) return 0;
3419   av = avma; return gc_ulong(av, coreu_fact(factoru(n)));
3420 }
3421 GEN
core(GEN n)3422 core(GEN n)
3423 {
3424   pari_sp av = avma;
3425   GEN m, F;
3426   ulong p;
3427   long i, l, v;
3428   forprime_t S;
3429 
3430   if ((F = check_arith_all(n, "core")))
3431   {
3432     GEN p, x, P = gel(F,1), E = gel(F,2);
3433     long j = 1;
3434     if (lg(P) == 1) return gen_1;
3435     p = gel(P,1);
3436     if (!signe(p)) return gen_0;
3437     l = lg(P); x = cgetg(l, t_VEC);
3438     for (i = 1; i < l; i++)
3439       if (mpodd(gel(E,i))) gel(x,j++) = gel(P,i);
3440     setlg(x, j); return ZV_prod(x);
3441   }
3442   switch(lgefint(n))
3443   {
3444     case 2: return gen_0;
3445     case 3:
3446       p = coreu(uel(n,2));
3447       return signe(n) > 0? utoipos(p): utoineg(p);
3448   }
3449 
3450   m = signe(n) < 0? gen_m1: gen_1;
3451   n = absi_shallow(n);
3452   u_forprime_init(&S, 2, tridiv_bound(n));
3453   while ((p = u_forprime_next_fast(&S)))
3454   {
3455     int stop;
3456     v = Z_lvalrem_stop(&n, p, &stop);
3457     if (v)
3458     {
3459       if (v & 1) m = muliu(m, p);
3460       if (stop)
3461       {
3462         if (!is_pm1(n)) m = mulii(m, n);
3463         return gerepileuptoint(av, m);
3464       }
3465     }
3466   }
3467   l = lg(primetab);
3468   for (i = 1; i < l; i++)
3469   {
3470     GEN q = gel(primetab,i);
3471     v = Z_pvalrem(n, q, &n);
3472     if (v)
3473     {
3474       if (v & 1) m = mulii(m, q);
3475       if (is_pm1(n)) return gerepileuptoint(av, m);
3476     }
3477   }
3478   if (ifac_isprime(n)) { m = mulii(m, n); return gerepileuptoint(av, m); }
3479   if (m == gen_1) n = icopy(n); /* ifac_core destroys n */
3480   /* large composite without small factors */
3481   return gerepileuptoint(av, mulii(m, ifac_core(n)));
3482 }
3483 
3484 long
Z_issmooth(GEN m,ulong lim)3485 Z_issmooth(GEN m, ulong lim)
3486 {
3487   pari_sp av = avma;
3488   ulong p = 2;
3489   forprime_t S;
3490   u_forprime_init(&S, 2, lim);
3491   while ((p = u_forprime_next_fast(&S)))
3492   {
3493     int stop;
3494     (void)Z_lvalrem_stop(&m, p, &stop);
3495     if (stop) return gc_long(av, abscmpiu(m,lim) <= 0);
3496   }
3497   return gc_long(av,0);
3498 }
3499 
3500 GEN
Z_issmooth_fact(GEN m,ulong lim)3501 Z_issmooth_fact(GEN m, ulong lim)
3502 {
3503   pari_sp av = avma;
3504   GEN F, P, E;
3505   ulong p;
3506   long i = 1, l = expi(m)+1;
3507   forprime_t S;
3508   P = cgetg(l, t_VECSMALL);
3509   E = cgetg(l, t_VECSMALL);
3510   F = mkmat2(P,E);
3511   u_forprime_init(&S, 2, lim);
3512   while ((p = u_forprime_next_fast(&S)))
3513   {
3514     long v;
3515     int stop;
3516     if ((v = Z_lvalrem_stop(&m, p, &stop)))
3517     {
3518       P[i] = p;
3519       E[i] = v; i++;
3520       if (stop)
3521       {
3522         if (abscmpiu(m,lim) > 0) break;
3523         if (m[2] > 1) { P[i] = m[2]; E[i] = 1; i++; }
3524         setlg(P, i);
3525         setlg(E, i); return gc_const((pari_sp)F, F);
3526       }
3527     }
3528   }
3529   return gc_NULL(av);
3530 }
3531 
3532 /***********************************************************************/
3533 /**                                                                   **/
3534 /**       COMPUTING THE MATRIX OF PRIME DIVISORS AND EXPONENTS        **/
3535 /**                                                                   **/
3536 /***********************************************************************/
3537 static GEN
aux_end(GEN M,GEN n,long nb)3538 aux_end(GEN M, GEN n, long nb)
3539 {
3540   GEN P,E, z = (GEN)avma;
3541   long i;
3542 
3543   guncloneNULL(n);
3544   P = cgetg(nb+1,t_COL);
3545   E = cgetg(nb+1,t_COL);
3546   for (i=nb; i; i--)
3547   { /* allow a stackdummy in the middle */
3548     while (typ(z) != t_INT) z += lg(z);
3549     gel(E,i) = z; z += lg(z);
3550     gel(P,i) = z; z += lg(z);
3551   }
3552   gel(M,1) = P;
3553   gel(M,2) = E;
3554   return sort_factor(M, (void*)&abscmpii, cmp_nodata);
3555 }
3556 
3557 static void
STORE(long * nb,GEN x,long e)3558 STORE(long *nb, GEN x, long e) { (*nb)++; (void)x; (void)utoipos(e); }
3559 static void
STOREu(long * nb,ulong x,long e)3560 STOREu(long *nb, ulong x, long e) { STORE(nb, utoipos(x), e); }
3561 static void
STOREi(long * nb,GEN x,long e)3562 STOREi(long *nb, GEN x, long e) { STORE(nb, icopy(x), e); }
3563 /* no prime less than p divides n; return 1 if factored completely */
3564 static int
special_primes(GEN n,ulong p,long * nb,GEN T)3565 special_primes(GEN n, ulong p, long *nb, GEN T)
3566 {
3567   long i, l = lg(T);
3568   if (l > 1)
3569   { /* pp = square of biggest p tried so far */
3570     long pp[] = { evaltyp(t_INT)|_evallg(4), 0,0,0 };
3571     pari_sp av = avma; affii(sqru(p), pp); set_avma(av);
3572 
3573     for (i = 1; i < l; i++)
3574       if (dvdiiz(n, gel(T,i), n))
3575       {
3576         long k = 1; while (dvdiiz(n, gel(T,i), n)) k++;
3577         STOREi(nb, gel(T,i), k);
3578         if (abscmpii(pp, n) > 0)
3579         {
3580           if (!is_pm1(n)) STOREi(nb, n, 1);
3581           return 1;
3582         }
3583       }
3584   }
3585   return 0;
3586 }
3587 
3588 /* factor(sn*|n|), where sn = -1 or 1.
3589  * all != 0 : only look for prime divisors < all. If pU is not NULL,
3590  * set it to unfactored composite */
3591 static GEN
ifactor_sign(GEN n,ulong all,long hint,long sn,GEN * pU)3592 ifactor_sign(GEN n, ulong all, long hint, long sn, GEN *pU)
3593 {
3594   GEN M, N;
3595   pari_sp av;
3596   long nb = 0, i;
3597   ulong lim;
3598   forprime_t T;
3599 
3600   if (lgefint(n) == 3)
3601   { /* small integer */
3602     GEN f, Pf, Ef, P, E, F = cgetg(3, t_MAT);
3603     ulong U1, U2;
3604     long l;
3605     av = avma;
3606     /* enough room to store <= 15 primes and exponents (OK if n < 2^64) */
3607     (void)new_chunk((15*3 + 15 + 1) * 2);
3608     f = factoru_sign(uel(n,2), all, hint, pU? &U1: NULL, pU? &U2: NULL);
3609     set_avma(av);
3610     Pf = gel(f,1);
3611     Ef = gel(f,2);
3612     l = lg(Pf);
3613     if (sn < 0)
3614     { /* add sign */
3615       long L = l+1;
3616       gel(F,1) = P = cgetg(L, t_COL);
3617       gel(F,2) = E = cgetg(L, t_COL);
3618       gel(P,1) = gen_m1; P++;
3619       gel(E,1) = gen_1;  E++;
3620     }
3621     else
3622     {
3623       gel(F,1) = P = cgetg(l, t_COL);
3624       gel(F,2) = E = cgetg(l, t_COL);
3625     }
3626     for (i = 1; i < l; i++)
3627     {
3628       gel(P,i) = utoipos(Pf[i]);
3629       gel(E,i) = utoipos(Ef[i]);
3630     }
3631     if (pU) *pU = U1 == 1? NULL: mkvec2(utoipos(U1), utoipos(U2));
3632     return F;
3633   }
3634   if (pU) *pU = NULL;
3635   M = cgetg(3,t_MAT);
3636   if (sn < 0) STORE(&nb, utoineg(1), 1);
3637   if (is_pm1(n)) return aux_end(M,NULL,nb);
3638 
3639   n = N = gclone(n); setabssign(n);
3640   /* trial division bound */
3641   lim = all; if (!lim) lim = tridiv_bound(n);
3642   if (lim > 2)
3643   {
3644     ulong maxp, p;
3645     pari_sp av2;
3646     i = vali(n);
3647     if (i)
3648     {
3649       STOREu(&nb, 2, i);
3650       av = avma; affii(shifti(n,-i), n); set_avma(av);
3651     }
3652     if (is_pm1(n)) return aux_end(M,n,nb);
3653     /* trial division */
3654     maxp = maxprime();
3655     av = avma; u_forprime_init(&T, 3, minss(lim, maxp)); av2 = avma;
3656     /* first pass: known to fit in private prime table */
3657     while ((p = u_forprime_next_fast(&T)))
3658     {
3659       pari_sp av3 = avma;
3660       int stop;
3661       long k = Z_lvalrem_stop(&n, p, &stop);
3662       if (k)
3663       {
3664         affii(n, N); n = N; set_avma(av3);
3665         STOREu(&nb, p, k);
3666       }
3667       if (stop)
3668       {
3669         if (!is_pm1(n)) STOREi(&nb, n, 1);
3670         stackdummy(av, av2);
3671         return aux_end(M,n,nb);
3672       }
3673     }
3674     stackdummy(av, av2);
3675     if (lim > maxp)
3676     { /* second pass, usually empty: outside private prime table */
3677       av = avma; u_forprime_init(&T, maxp+1, lim); av2 = avma;
3678       while ((p = u_forprime_next(&T)))
3679       {
3680         pari_sp av3 = avma;
3681         int stop;
3682         long k = Z_lvalrem_stop(&n, p, &stop);
3683         if (k)
3684         {
3685           affii(n, N); n = N; set_avma(av3);
3686           STOREu(&nb, p, k);
3687         }
3688         if (stop)
3689         {
3690           if (!is_pm1(n)) STOREi(&nb, n, 1);
3691           stackdummy(av, av2);
3692           return aux_end(M,n,nb);
3693         }
3694       }
3695       stackdummy(av, av2);
3696     }
3697   }
3698   if (special_primes(n, lim, &nb, primetab)) return aux_end(M,n, nb);
3699   if (all)
3700   { /* smallfact: look for easy pure powers then stop. Cf Z_isanypower */
3701     GEN x;
3702     long k;
3703     av = avma;
3704     k = isanypower_nosmalldiv(n, &x);
3705     if (k > 1) affii(x, n);
3706     if (pU)
3707     {
3708       GEN F;
3709       if (abscmpiu(n, lim) <= 0
3710           || cmpii(n, sqru(lim)) <= 0 || ifac_isprime(n))
3711       { set_avma(av); STOREi(&nb, n, k); return aux_end(M,n, nb); }
3712       set_avma(av); F = aux_end(M, NULL, nb); /* don't destroy n */
3713       *pU = mkvec2(icopy(n), utoipos(k)); /* composite cofactor */
3714       gunclone(n); return F;
3715     }
3716     else
3717     { set_avma(av); STOREi(&nb, n, k); }
3718     if (DEBUGLEVEL >= 2) {
3719       pari_warn(warner,
3720         "IFAC: untested %ld-bit integer declared prime", expi(n)+1);
3721       if (expi(n) <= 256) err_printf("\t%Ps\n", n);
3722     }
3723   }
3724   else if (ifac_isprime(n)) STOREi(&nb, n, 1);
3725   else nb += ifac_decomp(n, hint);
3726   return aux_end(M,n, nb);
3727 }
3728 
3729 static GEN
ifactor(GEN n,ulong all,long hint)3730 ifactor(GEN n, ulong all, long hint)
3731 {
3732   long s = signe(n);
3733   if (!s) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3734   return ifactor_sign(n, all, hint, s, NULL);
3735 }
3736 
3737 int
ifac_next(GEN * part,GEN * p,long * e)3738 ifac_next(GEN *part, GEN *p, long *e)
3739 {
3740   GEN here = ifac_main(part);
3741   if (here == gen_0) { *p = NULL; *e = 1; return 0; }
3742   if (!here) { *p = NULL; *e = 0; return 0; }
3743   *p = VALUE(here);
3744   *e = EXPON(here)[2];
3745   ifac_delete(here); return 1;
3746 }
3747 
3748 /* see before ifac_crack for current semantics of 'hint' (factorint's 'flag') */
3749 GEN
factorint(GEN n,long flag)3750 factorint(GEN n, long flag)
3751 {
3752   GEN F;
3753   if ((F = check_arith_all(n,"factorint"))) return gcopy(F);
3754   return ifactor(n,0,flag);
3755 }
3756 
3757 GEN
Z_factor_limit(GEN n,ulong all)3758 Z_factor_limit(GEN n, ulong all)
3759 {
3760   if (!all) all = GP_DATA->primelimit + 1;
3761   return ifactor(n, all, decomp_default_hint);
3762 }
3763 GEN
absZ_factor_limit_strict(GEN n,ulong all,GEN * pU)3764 absZ_factor_limit_strict(GEN n, ulong all, GEN *pU)
3765 {
3766   GEN F, U;
3767   if (!signe(n))
3768   {
3769     if (pU) *pU = NULL;
3770     retmkmat2(mkcol(gen_0), mkcol(gen_1));
3771   }
3772   if (!all) all = GP_DATA->primelimit + 1;
3773   F = ifactor_sign(n, all, decomp_default_hint, 1, &U);
3774   if (pU) *pU = U;
3775   return F;
3776 }
3777 GEN
absZ_factor_limit(GEN n,ulong all)3778 absZ_factor_limit(GEN n, ulong all)
3779 {
3780   if (!signe(n)) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3781   if (!all) all = GP_DATA->primelimit + 1;
3782   return ifactor_sign(n, all, decomp_default_hint, 1, NULL);
3783 }
3784 GEN
Z_factor(GEN n)3785 Z_factor(GEN n)
3786 { return ifactor(n,0,decomp_default_hint); }
3787 GEN
absZ_factor(GEN n)3788 absZ_factor(GEN n)
3789 {
3790   if (!signe(n)) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3791   return ifactor_sign(n, 0, decomp_default_hint, 1, NULL);
3792 }
3793 /* Factor until the unfactored part is smaller than limit. Return the
3794  * factored part. Hence factorback(output) may be smaller than n */
3795 GEN
Z_factor_until(GEN n,GEN limit)3796 Z_factor_until(GEN n, GEN limit)
3797 {
3798   pari_sp av = avma;
3799   long s = signe(n), eq;
3800   GEN q, F, U;
3801 
3802   if (!s) retmkmat2(mkcol(gen_0), mkcol(gen_1));
3803   F = ifactor_sign(n, tridiv_bound(n), decomp_default_hint, s, &U);
3804   if (!U) return F;
3805   q = gel(U,1); /* composite, q^eq = unfactored part */
3806   eq = itou(gel(U,2));
3807   if (cmpii(eq == 1? q: powiu(q, eq), limit) > 0)
3808   { /* factor further */
3809     long l2 = expi(q)+1;
3810     GEN P2, E2, F2, part;
3811     if (eq > 1) limit = sqrtnint(limit, eq);
3812     P2 = coltrunc_init(l2);
3813     E2 = coltrunc_init(l2); F2 = mkmat2(P2,E2);
3814     part = ifac_start(icopy(q), 0); /* ifac_next would destroy q */
3815     for(;;)
3816     {
3817       long e;
3818       GEN p;
3819       if (!ifac_next(&part,&p,&e)) break;
3820       vectrunc_append(P2, p);
3821       vectrunc_append(E2, utoipos(e * eq));
3822       q = diviiexact(q, powiu(p, e));
3823       if (cmpii(q, limit) <= 0) break;
3824     }
3825     F2 = sort_factor(F2, (void*)&abscmpii, cmp_nodata);
3826     F = merge_factor(F, F2, (void*)&abscmpii, cmp_nodata);
3827   }
3828   return gerepilecopy(av, F);
3829 }
3830 
3831 static void
matsmalltrunc_append(GEN m,ulong p,ulong e)3832 matsmalltrunc_append(GEN m, ulong p, ulong e)
3833 {
3834   GEN P = gel(m,1), E = gel(m,2);
3835   long l = lg(P);
3836   P[l] = p; lg_increase(P);
3837   E[l] = e; lg_increase(E);
3838 }
3839 static GEN
matsmalltrunc_init(long l)3840 matsmalltrunc_init(long l)
3841 {
3842   GEN P = vecsmalltrunc_init(l);
3843   GEN E = vecsmalltrunc_init(l); return mkvec2(P,E);
3844 }
3845 
3846 /* If a <= c <= b , factoru(c) = L[c-a+1] */
3847 GEN
vecfactoru_i(ulong a,ulong b)3848 vecfactoru_i(ulong a, ulong b)
3849 {
3850   ulong N, k, p, n = b-a+1;
3851   GEN v = const_vecsmall(n, 1);
3852   GEN L = cgetg(n+1, t_VEC);
3853   forprime_t T;
3854   if (b < 510510UL) N = 7;
3855   else if (b < 9699690UL) N = 8;
3856   else if (b < 223092870UL) N = 9;
3857 #ifdef LONG_IS_64BIT
3858   else if (b < 6469693230UL) N = 10;
3859   else if (b < 200560490130UL) N = 11;
3860   else if (b < 7420738134810UL) N = 12;
3861   else if (b < 304250263527210UL) N = 13;
3862   else N = 16; /* don't bother */
3863 #else
3864   else N = 10;
3865 #endif
3866   for (k = 1; k <= n; k++) gel(L,k) = matsmalltrunc_init(N);
3867   u_forprime_init(&T, 2, usqrt(b));
3868   while ((p = u_forprime_next(&T)))
3869   { /* p <= sqrt(b) */
3870     ulong pk = p, K = ulogint(b, p);
3871     for (k = 1; k <= K; k++)
3872     {
3873       ulong j, t = a / pk, ap = t * pk;
3874       if (ap < a) { ap += pk; t++; }
3875       /* t = (j+a-1) \ pk */
3876       t %= p;
3877       for (j = ap-a+1; j <= n; j += pk)
3878       {
3879         if (t) { v[j] *= pk; matsmalltrunc_append(gel(L,j), p,k); }
3880         if (++t == p) t = 0;
3881       }
3882       pk *= p;
3883     }
3884   }
3885   /* complete factorisation of non-sqrt(b)-smooth numbers */
3886   for (k = 1, N = a; k <= n; k++, N++)
3887     if (uel(v,k) != N) matsmalltrunc_append(gel(L,k), N/uel(v,k),1UL);
3888   return L;
3889 }
3890 GEN
vecfactoru(ulong a,ulong b)3891 vecfactoru(ulong a, ulong b)
3892 {
3893   pari_sp av = avma;
3894   return gerepilecopy(av, vecfactoru_i(a,b));
3895 }
3896 
3897 /* Assume a and b odd, return L s.t. L[k] = factoru(a + 2*(k-1))
3898  * If a <= c <= b odd, factoru(c) = L[(c-a)>>1 + 1] */
3899 GEN
vecfactoroddu_i(ulong a,ulong b)3900 vecfactoroddu_i(ulong a, ulong b)
3901 {
3902   ulong N, k, p, n = ((b-a)>>1) + 1;
3903   GEN v = const_vecsmall(n, 1);
3904   GEN L = cgetg(n+1, t_VEC);
3905   forprime_t T;
3906   /* f(N)=my(a=primes(n+1));vecprod(a[2..#a]); */
3907   if (b < 255255UL) N = 6;
3908   else if (b < 4849845UL) N = 7;
3909   else if (b < 111546435UL) N = 8;
3910   else if (b < 3234846615UL) N = 9;
3911 #ifdef LONG_IS_64BIT
3912   else if (b < 100280245065UL) N = 10;
3913   else if (b < 3710369067405UL) N = 11;
3914   else if (b < 152125131763605UL) N = 12;
3915   else N = 16; /* don't bother */
3916 #else
3917   else N = 10;
3918 #endif
3919   for (k = 1; k <= n; k++) gel(L,k) = matsmalltrunc_init(N);
3920   u_forprime_init(&T, 3, usqrt(b));
3921   while ((p = u_forprime_next(&T)))
3922   { /* p <= sqrt(b) */
3923     ulong pk = p, K = ulogint(b, p);
3924     for (k = 1; k <= K; k++)
3925     {
3926       ulong j, t = (a / pk) | 1UL, ap = t * pk;
3927       /* t and ap are odd, ap multiple of pk = p^k */
3928       if (ap < a) { ap += pk<<1; t+=2; }
3929       /* c=t*p^k by steps of 2*p^k; factorization of c*=p^k if (t,p)=1 */
3930       t %= p;
3931       for (j = ((ap-a)>>1)+1; j <= n; j += pk)
3932       {
3933         if (t) { v[j] *= pk; matsmalltrunc_append(gel(L,j), p,k); }
3934         t += 2; if (t >= p) t -= p;
3935       }
3936       pk *= p;
3937     }
3938   }
3939   /* complete factorisation of non-sqrt(b)-smooth numbers */
3940   for (k = 1, N = a; k <= n; k++, N+=2)
3941     if (uel(v,k) != N) matsmalltrunc_append(gel(L,k), N/uel(v,k),1UL);
3942   return L;
3943 }
3944 GEN
vecfactoroddu(ulong a,ulong b)3945 vecfactoroddu(ulong a, ulong b)
3946 {
3947   pari_sp av = avma;
3948   return gerepilecopy(av, vecfactoroddu_i(a,b));
3949 }
3950 
3951 /* If 0 <= a <= c <= b; L[c-a+1] = factoru(c)[,1] if c squarefree, else NULL */
3952 GEN
vecfactorsquarefreeu(ulong a,ulong b)3953 vecfactorsquarefreeu(ulong a, ulong b)
3954 {
3955   ulong N, k, p, n = b-a+1;
3956   GEN v = const_vecsmall(n, 1);
3957   GEN L = cgetg(n+1, t_VEC);
3958   forprime_t T;
3959   if (b < 510510UL) N = 7;
3960   else if (b < 9699690UL) N = 8;
3961   else if (b < 223092870UL) N = 9;
3962 #ifdef LONG_IS_64BIT
3963   else if (b < 6469693230UL) N = 10;
3964   else if (b < 200560490130UL) N = 11;
3965   else if (b < 7420738134810UL) N = 12;
3966   else if (b < 304250263527210UL) N = 13;
3967   else N = 16; /* don't bother */
3968 #else
3969   else N = 10;
3970 #endif
3971   for (k = 1; k <= n; k++) gel(L,k) = vecsmalltrunc_init(N);
3972   u_forprime_init(&T, 2, usqrt(b));
3973   while ((p = u_forprime_next(&T)))
3974   { /* p <= sqrt(b), kill nonsquarefree */
3975     ulong j, pk = p*p, t = a / pk, ap = t * pk;
3976     if (ap < a) ap += pk;
3977     for (j = ap-a+1; j <= n; j += pk) gel(L,j) = NULL;
3978 
3979     t = a / p; ap = t * p;
3980     if (ap < a) { ap += p; t++; }
3981     for (j = ap-a+1; j <= n; j += p, t++)
3982       if (gel(L,j)) { v[j] *= p; vecsmalltrunc_append(gel(L,j), p); }
3983   }
3984   /* complete factorisation of non-sqrt(b)-smooth numbers */
3985   for (k = 1, N = a; k <= n; k++, N++)
3986     if (gel(L,k) && uel(v,k) != N) vecsmalltrunc_append(gel(L,k), N/uel(v,k));
3987   return L;
3988 }
3989 /* If 0 <= a <= c <= b; L[c-a+1] = factoru(c)[,1] if c squarefree and coprime
3990  * to all the primes in sorted zv P, else NULL */
3991 GEN
vecfactorsquarefreeu_coprime(ulong a,ulong b,GEN P)3992 vecfactorsquarefreeu_coprime(ulong a, ulong b, GEN P)
3993 {
3994   ulong N, k, p, n = b-a+1, sqb = usqrt(b);
3995   GEN v = const_vecsmall(n, 1);
3996   GEN L = cgetg(n+1, t_VEC);
3997   forprime_t T;
3998   if (b < 510510UL) N = 7;
3999   else if (b < 9699690UL) N = 8;
4000   else if (b < 223092870UL) N = 9;
4001 #ifdef LONG_IS_64BIT
4002   else if (b < 6469693230UL) N = 10;
4003   else if (b < 200560490130UL) N = 11;
4004   else if (b < 7420738134810UL) N = 12;
4005   else if (b < 304250263527210UL) N = 13;
4006   else N = 16; /* don't bother */
4007 #else
4008   else N = 10;
4009 #endif
4010   for (k = 1; k <= n; k++) gel(L,k) = vecsmalltrunc_init(N);
4011   u_forprime_init(&T, 2, sqb);
4012   while ((p = u_forprime_next(&T)))
4013   { /* p <= sqrt(b), kill nonsquarefree */
4014     ulong j, t, ap, bad = zv_search(P, p), pk = bad ? p: p * p;
4015     t = a / pk; ap = t * pk; if (ap < a) ap += pk;
4016     for (j = ap-a+1; j <= n; j += pk) gel(L,j) = NULL;
4017     if (bad) continue;
4018 
4019     t = a / p; ap = t * p;
4020     if (ap < a) { ap += p; t++; }
4021     for (j = ap-a+1; j <= n; j += p, t++)
4022       if (gel(L,j)) { v[j] *= p; vecsmalltrunc_append(gel(L,j), p); }
4023   }
4024   if (uel(P,lg(P)-1) <= sqb) P = NULL;
4025   /* complete factorisation of non-sqrt(b)-smooth numbers */
4026   for (k = 1, N = a; k <= n; k++, N++)
4027     if (gel(L,k) && uel(v,k) != N)
4028     {
4029       ulong q = N / uel(v,k);
4030       if (!P || !zv_search(P, q)) vecsmalltrunc_append(gel(L,k), q);
4031     }
4032   return L;
4033 }
4034 
4035 GEN
vecsquarefreeu(ulong a,ulong b)4036 vecsquarefreeu(ulong a, ulong b)
4037 {
4038   ulong j, k, p, n = b-a+1;
4039   GEN L = const_vecsmall(n, 1);
4040   forprime_t T;
4041   u_forprime_init(&T, 2, usqrt(b));
4042   while ((p = u_forprime_next(&T)))
4043   { /* p <= sqrt(b), kill nonsquarefree */
4044     ulong pk = p*p, t = a / pk, ap = t * pk;
4045     if (ap < a) { ap += pk; t++; }
4046     /* t = (j+a-1) \ pk */
4047     for (j = ap-a+1; j <= n; j += pk, t++) L[j] = 0;
4048   }
4049   for (k = j = 1; k <= n; k++)
4050     if (L[k]) L[j++] = a+k-1;
4051   setlg(L,j); return L;
4052 }
4053