1 /* $Id: arith2.c 8535 2007-04-03 12:24:25Z bill $
2 
3 Copyright (C) 2000  The PARI group.
4 
5 This file is part of the PARI/GP package.
6 
7 PARI/GP is free software; you can redistribute it and/or modify it under the
8 terms of the GNU General Public License as published by the Free Software
9 Foundation. It is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY WHATSOEVER.
11 
12 Check the License for details. You should have received a copy of it, along
13 with the package; see the file 'COPYING'. If not, write to the Free Software
14 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
15 
16 /*********************************************************************/
17 /**                                                                 **/
18 /**                     ARITHMETIC FUNCTIONS                        **/
19 /**                        (second part)                            **/
20 /**                                                                 **/
21 /*********************************************************************/
22 #include "pari.h"
23 #include "paripriv.h"
24 /***********************************************************************/
25 /**                                                                   **/
26 /**                          PRIME NUMBERS                            **/
27 /**                                                                   **/
28 /***********************************************************************/
29 
30 /* assume all primes up to 59359 are precomputed */
31 GEN
prime(long n)32 prime(long n)
33 {
34   byteptr p;
35   ulong prime;
36 
37   if (n <= 0) pari_err(talker, "n-th prime meaningless if n = %ld",n);
38   if (n < 1000) {
39     p = diffptr;
40     prime = 0;
41   } else if (n < 2000) {
42     n -= 1000; p = diffptr+1000;
43     prime = 7919;
44   } else if (n < 3000) {
45     n -= 2000; p = diffptr+2000;
46     prime = 17389;
47   } else if (n < 4000) {
48     n -= 3000; p = diffptr+3000;
49     prime = 27449;
50   } else if (n < 5000) {
51     n -= 4000; p = diffptr+4000;
52     prime = 37813;
53   } else if (n < 6000) {
54     n -= 5000; p = diffptr+5000;
55     prime = 48611;
56   } else if (n < 10000 || maxprime() < 500000) {
57     n -= 6000; p = diffptr+6000;
58     prime = 59359;
59   } else if (n < 20000) {
60     n -= 10000; p = diffptr+10000;
61     prime = 104729;
62   } else if (n < 30000) {
63     n -= 20000; p = diffptr+20000;
64     prime = 224737;
65   } else if (n < 40000) {
66     n -= 30000; p = diffptr+30000;
67     prime = 350377;
68   } else {
69     n -= 40000; p = diffptr+40000;
70     prime = 479909;
71   }
72   while (n--) NEXT_PRIME_VIADIFF_CHECK(prime,p);
73   return utoipos(prime);
74 }
75 
76 GEN
primepi(GEN x)77 primepi(GEN x)
78 {
79   pari_sp av = avma;
80   byteptr p = diffptr;
81   ulong prime = 0, res = 0, n;
82   GEN N = typ(x) == t_INT? x: gfloor(x);
83 
84   if (typ(N) != t_INT || signe(N) <= 0) pari_err(typeer, "primepi");
85   avma = av; n = itou(N); maxprime_check(n);
86   while (prime <= n) { res++; NEXT_PRIME_VIADIFF(prime,p); }
87   return utoi(res-1);
88 }
89 
90 GEN
primes(long n)91 primes(long n)
92 {
93   byteptr p = diffptr;
94   ulong prime = 0;
95   GEN y,z;
96 
97   if (n < 0) n = 0;
98   z = y = cgetg(n+1,t_VEC);
99   while (n--)
100   {
101     NEXT_PRIME_VIADIFF_CHECK(prime,p);
102     gel(++z, 0) = utoi(prime);
103   }
104   return y;
105 }
106 
107 /* Building/Rebuilding the diffptr table. The actual work is done by the
108  * following two subroutines;  the user entry point is the function
109  * initprimes() below.  initprimes1() is the old algorithm, called when
110  * maxnum (size) is moderate. */
111 static byteptr
initprimes1(ulong size,long * lenp,long * lastp)112 initprimes1(ulong size, long *lenp, long *lastp)
113 {
114   long k;
115   byteptr q, r, s, fin, p = (byteptr) gpmalloc(size+2);
116 
117   memset(p, 0, size + 2); fin = p + size;
118   for (r=q=p,k=1; r<=fin; )
119   {
120     do { r+=k; k+=2; r+=k; } while (*++q);
121     for (s=r; s<=fin; s+=k) *s = 1;
122   }
123   r = p; *r++ = 2; *r++ = 1; /* 2 and 3 */
124   for (s=q=r-1; ; s=q)
125   {
126     do q++; while (*q);
127     if (q > fin) break;
128     *r++ = (unsigned char) ((q-s) << 1);
129   }
130   *r++ = 0;
131   *lenp = r - p;
132   *lastp = ((s - p) << 1) + 1;
133   return (byteptr) gprealloc(p,r-p);
134 }
135 
136 /*  Timing in ms (Athlon/850; reports 512K of secondary cache; looks
137     like there is 64K of quickier cache too).
138 
139       arena|    30m     100m    300m    1000m    2000m  <-- primelimit
140       =================================================
141       16K       1.1053  1.1407  1.2589  1.4368   1.6086
142       24K       1.0000  1.0625  1.1320  1.2443   1.3095
143       32K       1.0000  1.0469  1.0761  1.1336   1.1776
144       48K       1.0000  1.0000  1.0254  1.0445   1.0546
145       50K       1.0000  1.0000  1.0152  1.0345   1.0464
146       52K       1.0000  1.0000  1.0203  1.0273   1.0362
147       54K       1.0000  1.0000  1.0812  1.0216   1.0281
148       56K       1.0526  1.0000  1.0051  1.0144   1.0205
149       58K       1.0000  1.0000  1.0000  1.0086   1.0123
150       60K       0.9473  0.9844  1.0051  1.0014   1.0055
151       62K       1.0000  0.9844  0.9949  0.9971   0.9993
152       64K       1.0000  1.0000  1.0000  1.0000   1.0000
153       66K       1.2632  1.2187  1.2183  1.2055   1.1953
154       68K       1.4211  1.4844  1.4721  1.4425   1.4188
155       70K       1.7368  1.7188  1.7107  1.6767   1.6421
156       72K       1.9474  1.9531  1.9594  1.9023   1.8573
157       74K       2.2105  2.1875  2.1827  2.1207   2.0650
158       76K       2.4211  2.4219  2.4010  2.3305   2.2644
159       78K       2.5789  2.6250  2.6091  2.5330   2.4571
160       80K       2.8421  2.8125  2.8223  2.7213   2.6380
161       84K       3.1053  3.1875  3.1776  3.0819   2.9802
162       88K       3.5263  3.5312  3.5228  3.4124   3.2992
163       92K       3.7895  3.8438  3.8375  3.7213   3.5971
164       96K       4.0000  4.1093  4.1218  3.9986   3.9659
165       112K      4.3684  4.5781  4.5787  4.4583   4.6115
166       128K      4.7368  4.8750  4.9188  4.8075   4.8997
167       192K      5.5263  5.7188  5.8020  5.6911   5.7064
168       256K      6.0000  6.2187  6.3045  6.1954   6.1033
169       384K      6.7368  6.9531  7.0405  6.9181   6.7912
170       512K      7.3158  7.5156  7.6294  7.5000   7.4654
171       768K      9.1579  9.4531  9.6395  9.5014   9.1075
172       1024K    10.368  10.7497 10.9999 10.878   10.8201
173       1536K    12.579  13.3124 13.7660 13.747   13.4739
174       2048K    13.737  14.4839 15.0509 15.151   15.1282
175       3076K    14.789  15.5780 16.2993 16.513   16.3365
176 
177     Now the same number relative to the model
178 
179     (1 + 0.36*sqrt(primelimit)/arena) * (arena <= 64 ? 1.05 : (arena-64)**0.38)
180 
181      [SLOW2_IN_ROOTS = 0.36, ALPHA = 0.38]
182 
183       arena|    30m     100m    300m    1000m    2000m  <-- primelimit
184       =================================================
185         16K    1.014    0.9835  0.9942  0.9889  1.004
186         24K    0.9526   0.9758  0.9861  0.9942  0.981
187         32K    0.971    0.9939  0.9884  0.9849  0.9806
188         48K    0.9902   0.9825  0.996   0.9945  0.9885
189         50K    0.9917   0.9853  0.9906  0.9926  0.9907
190         52K    0.9932   0.9878  0.9999  0.9928  0.9903
191         54K    0.9945   0.9902  1.064   0.9939  0.9913
192         56K    1.048    0.9924  0.9925  0.993   0.9921
193         58K    0.9969   0.9945  0.9909  0.9932  0.9918
194         60K    0.9455   0.9809  0.9992  0.9915  0.9923
195         62K    0.9991   0.9827  0.9921  0.9924  0.9929
196         64K    1        1       1       1       1
197         66K    1.02     0.9849  0.9857  0.9772  0.9704
198         68K    0.8827   0.9232  0.9176  0.9025  0.8903
199         70K    0.9255   0.9177  0.9162  0.9029  0.8881
200         72K    0.9309   0.936   0.9429  0.9219  0.9052
201         74K    0.9715   0.9644  0.967   0.9477  0.9292
202         76K    0.9935   0.9975  0.9946  0.9751  0.9552
203         78K    0.9987   1.021   1.021   1.003   0.9819
204         80K    1.047    1.041   1.052   1.027   1.006
205         84K    1.052    1.086   1.092   1.075   1.053
206         88K    1.116    1.125   1.133   1.117   1.096
207         92K    1.132    1.156   1.167   1.155   1.134
208         96K    1.137    1.177   1.195   1.185   1.196
209        112K    1.067    1.13    1.148   1.15    1.217
210        128K    1.04     1.083   1.113   1.124   1.178
211        192K    0.9368   0.985   1.025   1.051   1.095
212        256K    0.8741   0.9224  0.9619  0.995   1.024
213        384K    0.8103   0.8533  0.8917  0.9282  0.9568
214        512K    0.7753   0.8135  0.8537  0.892   0.935
215        768K    0.8184   0.8638  0.9121  0.9586  0.9705
216       1024K    0.8241   0.8741  0.927   0.979   1.03
217       1536K    0.8505   0.9212  0.9882  1.056   1.096
218       2048K    0.8294   0.8954  0.9655  1.041   1.102
219 
220 */
221 
222 #ifndef SLOW2_IN_ROOTS
223   /* SLOW2_IN_ROOTS below 3: some slowdown starts to be noticable
224    * when things fit into the cache on Sparc.
225    * XXX The choice of 2.6 gives a slowdown of 1-2% on UltraSparcII,
226    * but makes calculations for "maximum" of 436273009 (not applicable anymore)
227    * fit into 256K cache (still common for some architectures).
228    *
229    * One may change it when small caches become uncommon, but the gain
230    * is not going to be very noticable... */
231 #  ifdef i386           /* gcc defines this? */
232 #    define SLOW2_IN_ROOTS      0.36
233 #  else
234 #    define SLOW2_IN_ROOTS      2.6
235 #  endif
236 #endif
237 #ifndef CACHE_ARENA
238 #  ifdef i386           /* gcc defines this? */
239    /* Due to smaller SLOW2_IN_ROOTS, smaller arena is OK; fit L1 cache */
240 #    define CACHE_ARENA (63 * 1024UL) /* No slowdown even with 64K L1 cache */
241 #  else
242 #    define CACHE_ARENA (200 * 1024UL) /* No slowdown even with 256K L2 cache */
243 #  endif
244 #endif
245 
246 #define CACHE_ALPHA     (0.38)          /* Cache performance model parameter */
247 #define CACHE_CUTOFF    (0.018)         /* Cache performance not smooth here */
248 
249 static double slow2_in_roots = SLOW2_IN_ROOTS;
250 
251 typedef struct {
252     ulong arena;
253     double power;
254     double cutoff;
255 } cache_model_t;
256 
257 static cache_model_t cache_model = { CACHE_ARENA, CACHE_ALPHA, CACHE_CUTOFF };
258 
259 /* Assume that some calculation requires a chunk of memory to be
260    accessed often in more or less random fashion (as in sieving).
261    Assume that the calculation can be done in steps by subdividing the
262    chunk into smaller subchunks (arenas) and treathing them
263    separately.  Assume that the overhead of subdivision is equivalent
264    to the number of arenas.
265 
266    Find an optimal size of the arena taking into account the overhead
267    of subdivision, and the overhead of arena not fitting into the
268    cache.  Assume that arenas of size slow2_in_roots slows down the
269    calculation 2x (comparing to very big arenas; when cache hits do
270    not matter).  Since cache performance varies wildly with
271    architecture, load, and wheather (especially with cache coloring
272    enabled), use an idealized cache model based on benchmarks above.
273 
274    Assume that an independent region of FIXED_TO_CACHE bytes is accessed
275    very often concurrently with the arena access.
276  */
277 static ulong
good_arena_size(ulong slow2_size,ulong total,ulong fixed_to_cache,cache_model_t * cache_model,long model_type)278 good_arena_size(ulong slow2_size, ulong total, ulong fixed_to_cache,
279                 cache_model_t *cache_model, long model_type)
280 {
281   ulong asize, cache_arena = cache_model->arena;
282   double Xmin, Xmax, A, B, C1, C2, D, V;
283   double alpha = cache_model->power, cut_off = cache_model->cutoff;
284 
285   /* Estimated relative slowdown,
286      with overhead = max((fixed_to_cache+arena)/cache_arena - 1, 0):
287 
288      1 + slow2_size/arena due to initialization overhead;
289 
290      max(1, 4.63 * overhead^0.38 ) due to footprint > cache size.
291 
292      [The latter is hard to substantiate theoretically, but this
293      function describes benchmarks pretty close; it does not hurt that
294      one can minimize it explicitly too ;-).  The switch between
295      diffenent choices of max() happens whe overhead=0.018.]
296 
297      Thus the problem is minimizing (1 + slow2_size/arena)*overhead**0.29.
298      This boils down to F=((X+A)/(X+B))X^alpha, X=overhead,
299      B = (1 - fixed_to_cache/cache_arena), A = B +
300      slow2_size/cache_arena, alpha = 0.38, and X>=0.018, X>-B.  It
301      turns out that the minimization problem is not very nasty.  (As
302      usual with minimization problems which depend on parameters,
303      different values of A,B lead to different algorithms.  However,
304      the boundaries between the domains in (A,B) plane where different
305      algorithms work are explicitly calculatable.)
306 
307      Thus we need to find the rightmost root of (X+A)*(X+B) -
308      alpha(A-B)X to the right of 0.018 (if such exists and is below
309      Xmax).  Then we manually check the remaining region [0, 0.018].
310 
311      Since we cannot trust the purely-experimental cache-hit slowdown
312      function, as a sanity check always prefer fitting into the
313      cache (or "almost fitting") if F-law predicts that the larger
314      value of the arena provides less than 10% speedup.
315 
316    */
317 
318   if (model_type != 0)
319       pari_err(talker, "unsupported type of cache model"); /* Future expansion */
320 
321   /* The simplest case: we fit into cache */
322   if (total + fixed_to_cache <= cache_arena)
323       return total;
324   /* The simple case: fitting into cache doesn't slow us down more than 10% */
325   if (cache_arena - fixed_to_cache > 10 * slow2_size) {
326       asize = cache_arena - fixed_to_cache;
327       if (asize > total) asize = total; /* Automatically false... */
328       return asize;
329   }
330   /* Slowdown of not fitting into cache is significant.  Try to optimize.
331      Do not be afraid to spend some time on optimization - in trivial
332      cases we do not reach this point; any gain we get should
333      compensate the time spent on optimization.  */
334 
335   B = (1 - ((double)fixed_to_cache)/cache_arena);
336   A = B + ((double)slow2_size)/cache_arena;
337   C2 = A*B;
338   C1 = (A + B - 1/alpha*(A - B))/2;
339   D = C1*C1 - C2;
340   if (D > 0)
341       V = cut_off*cut_off + 2*C1*cut_off + C2; /* Value at CUT_OFF */
342   else
343       V = 0;                            /* Peacify the warning */
344   Xmin = cut_off;
345   Xmax = ((double)total - fixed_to_cache)/cache_arena; /* Two candidates */
346 
347   if ( D <= 0 || (V >= 0 && C1 + cut_off >= 0) ) /* slowdown increasing */
348       Xmax = cut_off;                   /* Only one candidate */
349   else if (V >= 0 &&                    /* slowdown concave down */
350            ((Xmax + C1) <= 0 || (Xmax*Xmax + 2*C1*Xmax + C2) <= 0))
351       /* DO NOTHING */;                 /* Keep both candidates */
352   else if (V <= 0 && (Xmax*Xmax + 2*C1*Xmax + C2) <= 0) /* slowdown decreasing */
353       Xmin = cut_off;                   /* Only one candidate */
354   else /* Now we know: 2 roots, the largest is in CUT_OFF..Xmax */
355       Xmax = sqrt(D) - C1;
356   if (Xmax != Xmin) {   /* Xmin == CUT_OFF; Check which one is better */
357       double v1 = (cut_off + A)/(cut_off + B);
358       double v2 = 2.33 * (Xmax + A)/(Xmax + B) * pow(Xmax, alpha);
359 
360       if (1.1 * v2 >= v1) /* Prefer fitting into the cache if slowdown < 10% */
361           V = v1;
362       else {
363           Xmin = Xmax;
364           V = v2;
365       }
366   } else if (B > 0)                     /* We need V */
367       V = 2.33 * (Xmin + A)/(Xmin + B) * pow(Xmin, alpha);
368   if (B > 0 && 1.1 * V > A/B)  /* Now Xmin is the minumum.  Compare with 0 */
369       Xmin = 0;
370 
371   asize = (ulong)((1 + Xmin)*cache_arena - fixed_to_cache);
372   if (asize > total) asize = total;     /* May happen due to approximations */
373   return asize;
374 }
375 
376 /* Use as in
377     install(set_optimize,lLDG)          \\ Through some M too?
378     set_optimize(2,1) \\ disable dependence on limit
379     \\ 1: how much cache usable, 2: slowdown of setup, 3: alpha, 4: cutoff
380     \\ 2,3,4 are in units of 0.001
381 
382     { time_primes_arena(ar,limit) =     \\ ar = arena size in K
383         set_optimize(1,floor(ar*1024));
384         default(primelimit, 200 000);   \\ 100000 results in *larger* malloc()!
385         gettime;
386         default(primelimit, floor(limit));
387         if(ar >= 1, ar=floor(ar));
388         print("arena "ar"K => "gettime"ms");
389     }
390 */
391 long
set_optimize(long what,GEN g)392 set_optimize(long what, GEN g)
393 {
394   long ret = 0;
395 
396   switch (what) {
397   case 1:
398     ret = (long)cache_model.arena;
399     break;
400   case 2:
401     ret = (long)(slow2_in_roots * 1000);
402     break;
403   case 3:
404     ret = (long)(cache_model.power * 1000);
405     break;
406   case 4:
407     ret = (long)(cache_model.cutoff * 1000);
408     break;
409   default:
410     pari_err(talker, "panic: set_optimize");
411     break;
412   }
413   if (g != NULL) {
414     ulong val = itou(g);
415 
416     switch (what) {
417     case 1: cache_model.arena = val; break;
418     case 2: slow2_in_roots     = (double)val / 1000.; break;
419     case 3: cache_model.power  = (double)val / 1000.; break;
420     case 4: cache_model.cutoff = (double)val / 1000.; break;
421     }
422   }
423   return ret;
424 }
425 
426 static void
sieve_chunk(byteptr known_primes,ulong s,byteptr data,ulong count)427 sieve_chunk(byteptr known_primes, ulong s, byteptr data, ulong count)
428 {   /* start must be odd;
429        prime differences (starting from (5-3)=2) start at known_primes[2],
430        are terminated by a 0 byte;
431 
432        Checks cnt odd numbers starting at `start', setting bytes
433        starting at data to 0 or 1 depending on whether the
434        corresponding odd number is prime or not */
435     ulong p;
436     byteptr q;
437     register byteptr write_to = data;   /* Better code with gcc 2.8.1 */
438     register ulong   cnt      = count;  /* Better code with gcc 2.8.1 */
439     register ulong   start    = s;      /* Better code with gcc 2.8.1 */
440     register ulong   delta    = 1;      /* Better code with gcc 2.8.1 */
441 
442     memset(data, 0, cnt);
443     start >>= 1;                        /* (start - 1)/2 */
444     start += cnt;                       /* Corresponds to the end */
445     cnt -= 1;
446     /* data corresponds to start.  q runs over primediffs.  */
447     /* Don't care about DIFFPTR_SKIP: false positives provide no problem */
448     for (q = known_primes + 1, p = 3; delta; delta = *++q, p += delta) {
449         /* first odd number which is >= start > p and divisible by p
450            = last odd number which is <= start + 2p - 1 and 0 (mod p)
451            = p + the last even number which is <= start + p - 1 and 0 (mod p)
452            = p + the last even number which is <= start + p - 2 and 0 (mod p)
453            = p + start + p - 2 - (start + p - 2) % 2p
454            = start + 2(p - 1 - ((start-1)/2 + (p-1)/2) % p). */
455       long off = cnt - ((start+(p>>1)) % p);
456 
457       while (off >= 0) {
458           write_to[off] = 1;
459           off -= p;
460       }
461     }
462 }
463 
464 /* Do not inline sieve_chunk()!  It fits into registers in ix86 non-inlined */
465 void (*sieve_chunk_p)(byteptr known_primes, ulong s, byteptr data, ulong count)
466     = sieve_chunk;
467 
468 /* Here's the workhorse.  This is recursive, although normally the first
469    recursive call will bottom out and invoke initprimes1() at once.
470    (Not static;  might conceivably be useful to someone in library mode) */
471 byteptr
initprimes0(ulong maxnum,long * lenp,ulong * lastp)472 initprimes0(ulong maxnum, long *lenp, ulong *lastp)
473 {
474   long size, alloced, psize;
475   byteptr q, fin, p, p1, fin1, plast, curdiff;
476   ulong last, remains, curlow, rootnum, asize;
477   ulong prime_above = 3;
478   byteptr p_prime_above;
479 
480   if (maxnum <= 1ul<<17)        /* Arbitrary. */
481     return initprimes1(maxnum>>1, lenp, (long*)lastp); /* Break recursion */
482 
483   maxnum |= 1;                  /* make it odd. */
484 
485   /* Checked to be enough up to 40e6, attained at 155893 */
486   /* Due to multibyte representation of large gaps, this estimate will
487      be broken by large enough maxnum.  However, assuming exponential
488      distribution of gaps with the average log(n), we are safe up to
489      circa exp(-256/log(1/0.09)) = 1.5e46.  OK with LONG_BITS <= 128. ;-) */
490   size = (long) (1.09 * maxnum/log((double)maxnum)) + 146;
491   p1 = (byteptr) gpmalloc(size);
492   rootnum = (ulong) sqrt((double)maxnum); /* cast it back to a long */
493   rootnum |= 1;
494   {
495     byteptr p2 = initprimes0(rootnum, &psize, &last); /* recursive call */
496     memcpy(p1, p2, psize); free(p2);
497   }
498   fin1 = p1 + psize - 1;
499   remains = (maxnum - rootnum) >> 1; /* number of odd numbers to check */
500 
501   /* Actually, we access primes array of psize too; but we access it
502      consequently, thus we do not include it in fixed_to_cache */
503   asize = good_arena_size((ulong)(rootnum * slow2_in_roots), remains + 1, 0,
504                           &cache_model, 0) - 1;
505   /* enough room on the stack ? */
506   alloced = (((byteptr)avma) <= ((byteptr)bot) + asize);
507   if (alloced)
508     p = (byteptr) gpmalloc(asize + 1);
509   else
510     p = (byteptr) bot;
511   fin = p + asize;              /* the 0 sentinel goes at fin. */
512   curlow = rootnum + 2; /* First candidate: know primes up to rootnum (odd). */
513   curdiff = fin1;
514 
515   /* During each iteration p..fin-1 represents a range of odd
516      numbers.  plast is a pointer which represents the last prime seen,
517      it may point before p..fin-1. */
518   plast = p - ((rootnum - last) >> 1) - 1;
519   p_prime_above = p1 + 2;
520   while (remains)       /* Cycle over arenas.  Performance is not crucial */
521   {
522     unsigned char was_delta;
523 
524     if (asize > remains) {
525       asize = remains;
526       fin = p + asize;
527     }
528     /* Fake the upper limit appropriate for the given arena */
529     while (prime_above*prime_above <= curlow + (asize << 1) && *p_prime_above)
530         prime_above += *p_prime_above++;
531     was_delta = *p_prime_above;
532     *p_prime_above = 0;                 /* Put a 0 sentinel for sieve_chunk */
533 
534     (*sieve_chunk_p)(p1, curlow, p, asize);
535 
536     *p_prime_above = was_delta;         /* Restore */
537     p[asize] = 0;                       /* Put a 0 sentinel for ZZZ */
538     /* now q runs over addresses corresponding to primes */
539     for (q = p; ; plast = q++)
540     {
541       long d;
542 
543       while (*q) q++;                   /* use ZZZ 0-sentinel at end */
544       if (q >= fin) break;
545       d = (q - plast) << 1;
546       while (d >= DIFFPTR_SKIP)
547         *curdiff++ = DIFFPTR_SKIP, d -= DIFFPTR_SKIP;
548       *curdiff++ = (unsigned char)d;
549     }
550     plast -= asize;
551     remains -= asize;
552     curlow += (asize<<1);
553   } /* while (remains) */
554   last = curlow - ((p - plast) << 1);
555   *curdiff++ = 0;               /* sentinel */
556   *lenp = curdiff - p1;
557   *lastp = last;
558   if (alloced) free(p);
559   return (byteptr) gprealloc(p1, *lenp);
560 }
561 #if 0 /* not yet... GN */
562 /* The diffptr table will contain at least 6548 entries (up to and including
563    the 6547th prime, 65557, and the terminal null byte), because the isprime/
564    small-factor-extraction machinery wants to depend on everything up to 65539
565    being in the table, and we might as well go to a multiple of 4 Bytes.--GN */
566 
567 void
568 init_tinyprimes_tridiv(byteptr p);      /* in ifactor2.c */
569 #endif
570 
571 static ulong _maxprime = 0;
572 
573 ulong
maxprime(void)574 maxprime(void) { return _maxprime; }
575 
576 void
maxprime_check(ulong c)577 maxprime_check(ulong c)
578 {
579   if (_maxprime < c) pari_err(primer2, c);
580 }
581 
582 byteptr
initprimes(ulong maxnum)583 initprimes(ulong maxnum)
584 {
585   long len;
586   ulong last;
587   byteptr p;
588   /* The algorithm must see the next prime beyond maxnum, whence the +512. */
589   ulong maxnum1 = ((maxnum<65302?65302:maxnum)+512ul);
590 
591   if ((maxnum>>1) > VERYBIGINT - 1024)
592       pari_err(talker, "Too large primelimit");
593   p = initprimes0(maxnum1, &len, &last);
594 #if 0 /* not yet... GN */
595   static int build_the_tables = 1;
596   if (build_the_tables) { init_tinyprimes_tridiv(p); build_the_tables=0; }
597 #endif
598   _maxprime = last; return p;
599 }
600 
601 static void
cleanprimetab(void)602 cleanprimetab(void)
603 {
604   long i,j, lp = lg(primetab);
605 
606   for (i=j=1; i < lp; i++)
607     if (primetab[i]) primetab[j++] = primetab[i];
608   setlg(primetab,j);
609 }
610 
611 /* p is a single element or a row vector with "primes" to add to primetab.
612  * If p shares a non-trivial factor with another element in primetab, take it
613  * into account. */
614 GEN
addprimes(GEN p)615 addprimes(GEN p)
616 {
617   pari_sp av;
618   long i,k,tx,lp;
619   GEN L;
620 
621   if (!p) return primetab;
622   tx = typ(p);
623   if (is_vec_t(tx))
624   {
625     for (i=1; i < lg(p); i++) (void)addprimes(gel(p,i));
626     return primetab;
627   }
628   if (tx != t_INT) pari_err(typeer,"addprime");
629   if (is_pm1(p)) return primetab;
630   av = avma; i = signe(p);
631   if (i == 0) pari_err(talker,"can't accept 0 in addprimes");
632   if (i < 0) p = absi(p);
633 
634   lp = lg(primetab);
635   L = cgetg(2*lp,t_VEC); k = 1;
636   for (i=1; i < lp; i++)
637   {
638     GEN n = gel(primetab,i), d = gcdii(n, p);
639     if (! is_pm1(d))
640     {
641       if (!equalii(p,d)) gel(L,k++) = d;
642       gel(L,k++) = diviiexact(n,d);
643       gunclone(n); primetab[i] = 0;
644     }
645   }
646   primetab = (GEN) gprealloc(primetab, (lp+1)*sizeof(long));
647   gel(primetab,i) = gclone(p); setlg(primetab, lp+1);
648   if (k > 1) { cleanprimetab(); setlg(L,k); (void)addprimes(L); }
649   avma = av; return primetab;
650 }
651 
652 static GEN
removeprime(GEN prime)653 removeprime(GEN prime)
654 {
655   long i;
656 
657   if (typ(prime) != t_INT) pari_err(typeer,"removeprime");
658   for (i=lg(primetab) - 1; i; i--)
659     if (absi_equal(gel(primetab,i), prime))
660     {
661       gunclone(gel(primetab,i)); primetab[i]=0;
662       cleanprimetab(); break;
663     }
664   if (!i) pari_err(talker,"prime %Z is not in primetable", prime);
665   return primetab;
666 }
667 
668 GEN
removeprimes(GEN prime)669 removeprimes(GEN prime)
670 {
671   long i,tx;
672 
673   if (!prime) return primetab;
674   tx = typ(prime);
675   if (is_vec_t(tx))
676   {
677     if (prime == primetab)
678     {
679       for (i=1; i < lg(prime); i++) gunclone(gel(prime,i));
680       setlg(prime, 1);
681     }
682     else
683     {
684       for (i=1; i < lg(prime); i++) (void)removeprime(gel(prime,i));
685     }
686     return primetab;
687   }
688   return removeprime(prime);
689 }
690 
691 /***********************************************************************/
692 /**                                                                   **/
693 /**       COMPUTING THE MATRIX OF PRIME DIVISORS AND EXPONENTS        **/
694 /**                                                                   **/
695 /***********************************************************************/
696 static const long decomp_default_hint = 0;
697 
698 /* where to stop trial dividing in factorization */
699 static ulong
default_bound(GEN n,ulong all)700 default_bound(GEN n, ulong all)
701 {
702   ulong l;
703   if (all > 1) return all; /* use supplied limit */
704   if (!all) return (ulong)VERYBIGINT; /* smallfact() case */
705   l = (ulong)expi(n) + 1;
706   if (l <= 32)  return 1UL<<14;
707   if (l <= 512) return (l-16) << 10;
708   return 1UL<<19; /* Rho is generally faster above this */
709 }
710 static ulong
tridiv_bound(GEN n,ulong all)711 tridiv_bound(GEN n, ulong all)
712 {
713   ulong p = maxprime(), l = default_bound(n, all);
714   return min(p, l);
715 }
716 
717 static GEN
aux_end(GEN n,long nb)718 aux_end(GEN n, long nb)
719 {
720   GEN p1,p2, z = (GEN)avma;
721   long i;
722 
723   if (n) gunclone(n);
724   p1 = cgetg(nb+1,t_COL);
725   p2 = cgetg(nb+1,t_COL);
726   for (i=nb; i; i--)
727   {
728     gel(p2,i) = z; z += lg(z);
729     gel(p1,i) = z; z += lg(z);
730   }
731   gel(z,1) = p1;
732   gel(z,2) = p2;
733   if (nb) (void)sort_factor_gen(z, absi_cmp);
734   return z;
735 }
736 
737 static GEN
auxdecomp1(GEN n,long (* ifac_break)(GEN n,GEN pairs,GEN here,GEN state),GEN state,ulong all,long hint)738 auxdecomp1(GEN n, long (*ifac_break)(GEN n, GEN pairs, GEN here, GEN state),
739                   GEN state, ulong all, long hint)
740 {
741   pari_sp av;
742   long pp[] = { evaltyp(t_INT)|_evallg(4), 0,0,0 };
743   long nb = 0, i, lp;
744   ulong p, k, lim;
745   byteptr d = diffptr+1; /* start at p = 3 */
746 #define STORE(x,e) { nb++; (void)x; (void)utoipos(e); }
747 #define STOREu(x,e) STORE(utoipos(x), e)
748 #define STOREi(x,e) STORE(icopy(x),   e)
749 
750   if (typ(n) != t_INT) pari_err(arither1);
751   i = signe(n); if (!i) pari_err(talker, "zero argument in factorint");
752   (void)cgetg(3,t_MAT);
753   if (i < 0) STORE(utoineg(1), 1);
754   if (is_pm1(n)) return aux_end(NULL,nb);
755 
756   n = gclone(n); setsigne(n,1);
757   i = vali(n);
758   if (i)
759   {
760     STOREu(2, i);
761     av = avma; affii(shifti(n,-i), n); avma = av;
762   }
763   if (is_pm1(n)) return aux_end(n,nb);
764   lim = tridiv_bound(n, all);
765 
766   /* trial division */
767   p = 2;
768   for(;;)
769   {
770     int stop;
771     NEXT_PRIME_VIADIFF(p,d);
772     if (p >= lim) break;
773 
774     k = Z_lvalrem_stop(n, p, &stop);
775     if (k) STOREu(p, k);
776     if (stop)
777     {
778       if (!is_pm1(n)) STOREi(n, 1);
779       return aux_end(n,nb);
780     }
781   }
782 
783   /* pp = square of biggest p tried so far */
784   av = avma; affii(muluu(p,p), pp); avma = av;
785 
786   /* trial divide by the "special primes" (usually huge composites) */
787   lp = lg(primetab);
788   for (i=1; i<lp; i++)
789     if (dvdiiz(n,gel(primetab,i), n))
790     {
791       long k = 1; while (dvdiiz(n,gel(primetab,i), n)) k++;
792       STOREi(gel(primetab,i), k);
793       if (absi_cmp(pp, n) > 0)
794       {
795         if (!is_pm1(n)) STOREi(n, 1);
796         return aux_end(n,nb);
797       }
798     }
799 
800   if (all != 1) hint = 15; /* smallfact: turn off all except pure powers */
801   else /* else test primality */
802     if (BSW_psp(n)) { STOREi(n, 1); return aux_end(n,nb); }
803 
804   /* now we have a large composite */
805   if (ifac_break && (*ifac_break)(n,NULL,NULL,state)) /*initialize ifac_break*/
806   {
807     if (DEBUGLEVEL>2)
808       fprintferr("IFAC: (Partial fact.) Initial stop requested.\n");
809   }
810   else
811     nb += ifac_decomp_break(n, ifac_break, state, hint);
812 
813   return aux_end(n, nb);
814 }
815 
816 /* state[1]: current unfactored part.
817  * state[2]: limit. */
818 static long
ifac_break_limit(GEN n,GEN pairs,GEN here,GEN state)819 ifac_break_limit(GEN n, GEN pairs/*unused*/, GEN here, GEN state)
820 {
821   pari_sp ltop = avma;
822   GEN N;
823   int res;
824   (void)pairs;
825   if (!here) /* initial call */
826    /*Small primes have been removed, n is the new unfactored part.*/
827     N = n;
828   else
829   {
830     GEN q = powgi(gel(here,0),gel(here,1)); /* primary factor found.*/
831     if (DEBUGLEVEL>2) fprintferr("IFAC: Stop: Primary factor: %Z\n",q);
832     N = diviiexact(gel(state,1),q); /* divide unfactored part by q */
833   }
834   affii(N, gel(state,1)); /* affect()ed to state[1] to preserve stack. */
835   if (DEBUGLEVEL>2) fprintferr("IFAC: Stop: remaining %Z\n",state[1]);
836   /* check the stopping criterion, then restore stack */
837   res = cmpii(gel(state,1),gel(state,2)) <= 0;
838   avma = ltop; return res;
839 }
840 
841 static GEN
auxdecomp0(GEN n,ulong all,long hint)842 auxdecomp0(GEN n, ulong all, long hint)
843 {
844   return auxdecomp1(n, NULL, gen_0, all, hint);
845 }
846 
847 GEN
auxdecomp(GEN n,long all)848 auxdecomp(GEN n, long all)
849 {
850   return auxdecomp0(n,all,decomp_default_hint);
851 }
852 
853 /* see before ifac_crack() in ifactor1.c for current semantics of `hint'
854    (factorint's `flag') */
855 GEN
factorint(GEN n,long flag)856 factorint(GEN n, long flag)
857 {
858   return auxdecomp0(n,1,flag);
859 }
860 
861 GEN
Z_factor(GEN n)862 Z_factor(GEN n)
863 {
864   return auxdecomp0(n,1,decomp_default_hint);
865 }
866 
867 /* Factor until the unfactored part is smaller than limit. */
868 GEN
Z_factor_limit(GEN n,GEN limit)869 Z_factor_limit(GEN n, GEN limit)
870 {
871   GEN state = cgetg(3,t_VEC);
872  /* icopy is mainly done to allocate memory for affect().
873   * Currently state[1] is discarded in initial call to ifac_break_limit */
874   gel(state,1) = icopy(n);
875   gel(state,2) = gcopy(limit);
876   return auxdecomp1(n, &ifac_break_limit, state, 1, decomp_default_hint);
877 }
878 
879 GEN
smallfact(GEN n)880 smallfact(GEN n)
881 {
882   return boundfact(n,0);
883 }
884 
885 GEN
gboundfact(GEN n,long lim)886 gboundfact(GEN n, long lim)
887 {
888   return garith_proto2gs(boundfact,n,lim);
889 }
890 
891 GEN
boundfact(GEN n,long lim)892 boundfact(GEN n, long lim)
893 {
894   GEN p1, p2;
895   pari_sp av = avma;
896 
897   if (lim <= 1) lim = 0;
898   switch(typ(n))
899   {
900     case t_INT: return auxdecomp(n,lim);
901     case t_FRAC:
902       p1 = auxdecomp(gel(n,1),lim);
903       p2 = auxdecomp(gel(n,2),lim); gel(p2,2) = gneg_i(gel(p2,2));
904       return gerepilecopy(av, merge_factor_i(p1,p2));
905   }
906   pari_err(arither1);
907   return NULL; /* not reached */
908 }
909 /***********************************************************************/
910 /**                                                                   **/
911 /**                    SIMPLE FACTORISATIONS                          **/
912 /**                                                                   **/
913 /***********************************************************************/
914 
915 /* Factor n and output [p,e] where
916  * p, e are vecsmall with n = prod{p[i]^e[i]} */
917 GEN
factoru(ulong n)918 factoru(ulong n)
919 {
920   pari_sp ltop = avma;
921   GEN F = Z_factor(utoi(n)), P = gel(F,1), E = gel(F,2);
922   long i, l = lg(P);
923   GEN f = cgetg(3,t_VEC);
924   GEN p = cgetg(l,t_VECSMALL);
925   GEN e = cgetg(l,t_VECSMALL);
926   pari_sp lbot = avma;
927   gel(f,1) = p;
928   gel(f,2) = e;
929   for (i = 1; i < l; i++)
930   {
931     p[i] = itou(gel(P,i));
932     e[i] = itou(gel(E,i));
933   }
934   avma = lbot; return gerepileupto(ltop,f);
935 }
936 /* Factor n and output [p,e,c] where
937  * p, e and c are vecsmall with n = prod{p[i]^e[i]} and c[i] = p[i]^e[i] */
938 GEN
factoru_pow(ulong n)939 factoru_pow(ulong n)
940 {
941   pari_sp ltop = avma;
942   GEN F = Z_factor(utoi(n)), P = gel(F,1), E = gel(F,2);
943   long i, l = lg(P);
944   GEN f = cgetg(4,t_VEC);
945   GEN p = cgetg(l,t_VECSMALL);
946   GEN e = cgetg(l,t_VECSMALL);
947   GEN c = cgetg(l,t_VECSMALL);
948   pari_sp lbot = avma;
949   gel(f,1) = p;
950   gel(f,2) = e;
951   gel(f,3) = c;
952   for(i = 1; i < l; i++)
953   {
954     p[i] = itou(gel(P,i));
955     e[i] = itou(gel(E,i));
956     c[i] = itou(powiu(gel(P,i), e[i]));
957   }
958   avma = lbot; return gerepileupto(ltop,f);
959 }
960 
961 /***********************************************************************/
962 /**                                                                   **/
963 /**                    BASIC ARITHMETIC FUNCTIONS                     **/
964 /**                                                                   **/
965 /***********************************************************************/
966 
967 GEN
gmu(GEN n)968 gmu(GEN n) { return arith_proto(mu,n,1); }
969 
970 INLINE void
chk_arith(GEN n)971 chk_arith(GEN n) {
972   if (typ(n) != t_INT) pari_err(arither1);
973   if (!signe(n)) pari_err(talker, "zero argument in an arithmetic function");
974 }
975 
976 long
mu(GEN n)977 mu(GEN n)
978 {
979   byteptr d = diffptr+1; /* point at 3 - 2 */
980   pari_sp av = avma;
981   ulong p, lim;
982   long s, v;
983 
984   chk_arith(n); if (is_pm1(n)) return 1;
985   if (equaliu(n, 2)) return -1;
986   p = mod4(n); if (!p) return 0;
987   if (p == 2) { s = -1; n = shifti(n, -1); } else { s = 1; n = icopy(n); }
988   setsigne(n, 1);
989 
990   lim = tridiv_bound(n,1);
991   p = 2;
992   while (p < lim)
993   {
994     int stop;
995     NEXT_PRIME_VIADIFF(p,d);
996     v = Z_lvalrem_stop(n, p, &stop);
997     if (v > 1) { avma = av; return 0; }
998     if (v) s = -s;
999     if (stop) { avma = av; return is_pm1(n)? s: -s; }
1000   }
1001   if (BSW_psp(n)) { avma=av; return -s; }
1002   /* large composite without small factors */
1003   v = ifac_moebius(n, decomp_default_hint);
1004   avma = av; return (s<0 ? -v : v); /* correct also if v==0 */
1005 }
1006 
1007 GEN
gissquarefree(GEN x)1008 gissquarefree(GEN x) { return arith_proto(issquarefree,x,0); }
1009 
1010 long
Z_issquarefree(GEN x)1011 Z_issquarefree(GEN x)
1012 {
1013   byteptr d = diffptr+1;
1014   pari_sp av = avma;
1015   ulong p, lim;
1016   long v;
1017 
1018   if (!signe(x)) return 0;
1019   if (cmpiu(x, 2) <= 0) return 1;
1020   p = mod4(x); if (!p) return 0;
1021   x = (p == 2)? shifti(x, -1): icopy(x);
1022   setsigne(x, 1);
1023 
1024   lim = tridiv_bound(x,1);
1025   p = 2;
1026   while (p < lim)
1027   {
1028     int stop;
1029     NEXT_PRIME_VIADIFF(p,d);
1030     v = Z_lvalrem_stop(x, p, &stop);
1031     if (v > 1) { avma = av; return 0; }
1032     if (stop) { avma = av; return 1; }
1033   }
1034   if (BSW_psp(x)) { avma = av; return 1; }
1035   v = ifac_issquarefree(x, decomp_default_hint);
1036   avma = av; return v;
1037 }
1038 
1039 long
issquarefree(GEN x)1040 issquarefree(GEN x)
1041 {
1042   pari_sp av;
1043   GEN d;
1044   switch(typ(x))
1045   {
1046     case t_INT: return Z_issquarefree(x);
1047     case t_POL:
1048       if (!signe(x)) return 0;
1049       av = avma; d = ggcd(x, derivpol(x));
1050       avma = av; return (lg(d) == 3);
1051     default: pari_err(typeer,"issquarefree");
1052       return 0; /* not reached */
1053   }
1054 }
1055 
1056 GEN
gomega(GEN n)1057 gomega(GEN n) { return arith_proto(omega,n,1); }
1058 
1059 long
omega(GEN n)1060 omega(GEN n)
1061 {
1062   byteptr d = diffptr+1;
1063   pari_sp av = avma;
1064   long nb,v;
1065   ulong p, lim;
1066 
1067   chk_arith(n); if (is_pm1(n)) return 0;
1068   v = vali(n); nb = v ? 1 : 0;
1069   n = shifti(n, -v);
1070   if (is_pm1(n)) return nb;
1071   setsigne(n, 1);
1072 
1073   lim = tridiv_bound(n,1);
1074   p = 2;
1075   while (p < lim)
1076   {
1077     int stop;
1078     NEXT_PRIME_VIADIFF(p,d);
1079     v = Z_lvalrem_stop(n, p, &stop);
1080     if (v) nb++;
1081     if (stop) { avma = av; return is_pm1(n)? nb: nb+1; }
1082   }
1083   if (BSW_psp(n)) { avma = av; return nb+1; }
1084   /* large composite without small factors */
1085   nb += ifac_omega(n, decomp_default_hint);
1086   avma = av; return nb;
1087 }
1088 
1089 GEN
gbigomega(GEN n)1090 gbigomega(GEN n) { return arith_proto(bigomega,n,1); }
1091 
1092 long
bigomega(GEN n)1093 bigomega(GEN n)
1094 {
1095   byteptr d=diffptr+1;
1096   pari_sp av = avma;
1097   ulong p, lim;
1098   long nb,v;
1099 
1100   chk_arith(n); if (is_pm1(n)) return 0;
1101   nb = v = vali(n); n = shifti(n, -v);
1102   if (is_pm1(n)) { avma = av; return nb; }
1103   setsigne(n, 1);
1104 
1105   lim = tridiv_bound(n,1);
1106   p = 2;
1107   while (p < lim)
1108   {
1109     int stop;
1110     NEXT_PRIME_VIADIFF(p,d);
1111     v = Z_lvalrem_stop(n, p, &stop);
1112     nb += v;
1113     if (stop) { avma = av; return is_pm1(n)? nb: nb+1; }
1114   }
1115   if (BSW_psp(n)) { avma = av; return nb+1; }
1116   nb += ifac_bigomega(n, decomp_default_hint);
1117   avma = av; return nb;
1118 }
1119 
1120 GEN
gphi(GEN n)1121 gphi(GEN n) { return garith_proto(phi,n,1); }
1122 
1123 GEN
phi(GEN n)1124 phi(GEN n)
1125 {
1126   byteptr d = diffptr+1;
1127   pari_sp av = avma;
1128   GEN m;
1129   ulong p, lim;
1130   long v;
1131 
1132   chk_arith(n); if (is_pm1(n)) return gen_1;
1133   v = vali(n); n = shifti(n,-v); setsigne(n, 1);
1134   m = v > 1 ? int2n(v-1) : gen_1;
1135   if (is_pm1(n)) return gerepileuptoint(av,m);
1136 
1137   lim = tridiv_bound(n,1);
1138   p = 2;
1139   while (p < lim)
1140   {
1141     int stop;
1142     NEXT_PRIME_VIADIFF(p,d);
1143     v = Z_lvalrem_stop(n, p, &stop);
1144     if (v) {
1145       m = mulis(m, p-1);
1146       if (v > 2) m = mulii(m, powuu(p, v-1));
1147       else if (v == 2) m = mulis(m, p);
1148     }
1149     if (stop) {
1150       if (!is_pm1(n)) m = mulii(m, addis(n,-1));
1151       return gerepileuptoint(av,m);
1152     }
1153   }
1154   if (BSW_psp(n)) return gerepileuptoint(av, mulii(m, addis(n,-1)));
1155   m = mulii(m, ifac_totient(n, decomp_default_hint));
1156   return gerepileuptoint(av,m);
1157 }
1158 
1159 GEN
gnumbdiv(GEN n)1160 gnumbdiv(GEN n) { return garith_proto(numbdiv,n,1); }
1161 
1162 GEN
numbdiv(GEN n)1163 numbdiv(GEN n)
1164 {
1165   byteptr d = diffptr+1;
1166   pari_sp av = avma;
1167   GEN m;
1168   long v;
1169   ulong p, lim;
1170 
1171   chk_arith(n); if (is_pm1(n)) return gen_1;
1172   v = vali(n); n = shifti(n,-v); setsigne(n,1);
1173   m = utoipos(v+1);
1174   if (is_pm1(n)) return gerepileuptoint(av,m);
1175 
1176   lim = tridiv_bound(n,1);
1177   p = 2;
1178   while (p < lim)
1179   {
1180     int stop;
1181     NEXT_PRIME_VIADIFF(p,d);
1182     v = Z_lvalrem_stop(n, p, &stop);
1183     if (v) m = mulis(m, v+1);
1184     if (stop)
1185     {
1186       if (!is_pm1(n)) m = shifti(m,1);
1187       return gerepileuptoint(av,m);
1188     }
1189   }
1190   if(BSW_psp(n)) return gerepileuptoint(av, shifti(m,1));
1191   m = mulii(m, ifac_numdiv(n, decomp_default_hint));
1192   return gerepileuptoint(av,m);
1193 }
1194 
1195 GEN
gsumdiv(GEN n)1196 gsumdiv(GEN n) { return garith_proto(sumdiv,n,1); }
1197 
1198 GEN
sumdiv(GEN n)1199 sumdiv(GEN n)
1200 {
1201   byteptr d = diffptr+1;
1202   pari_sp av = avma;
1203   GEN m;
1204   ulong p, lim;
1205   long v;
1206 
1207   chk_arith(n); if (is_pm1(n)) return gen_1;
1208   v = vali(n); n = shifti(n,-v); setsigne(n,1);
1209   m = v ? addsi(-1, int2n(v+1)) : gen_1;
1210   if (is_pm1(n)) return gerepileuptoint(av,m);
1211 
1212   lim = tridiv_bound(n,1);
1213   p = 2;
1214   while (p < lim)
1215   {
1216     int stop;
1217     NEXT_PRIME_VIADIFF(p,d);
1218     v = Z_lvalrem_stop(n, p, &stop);
1219     if (v)
1220     {
1221       GEN m1 = utoipos(p+1);
1222       long i;
1223       for (i = 1; i < v; i++) m1 = addsi(1, mului(p,m1));
1224       m = mulii(m1,m);
1225     }
1226     if (stop)
1227     {
1228       if (!is_pm1(n)) m = mulii(m,addis(n,1));
1229       return gerepileuptoint(av, m);
1230     }
1231   }
1232   if(BSW_psp(n)) return gerepileuptoint(av, mulii(m,addsi(1,n)));
1233   m = mulii(m, ifac_sumdiv(n, decomp_default_hint));
1234   return gerepileuptoint(av,m);
1235 }
1236 
1237 GEN
gsumdivk(GEN n,long k)1238 gsumdivk(GEN n, long k) { return garith_proto2gs(sumdivk,n,k); }
1239 
1240 GEN
sumdivk(GEN n,long k)1241 sumdivk(GEN n, long k)
1242 {
1243   byteptr d = diffptr+1;
1244   pari_sp av = avma;
1245   GEN n1, m;
1246   ulong p, lim;
1247   long k1,v;
1248 
1249   if (!k) return numbdiv(n);
1250   if (k == 1) return sumdiv(n);
1251   chk_arith(n); if (is_pm1(n)) return gen_1;
1252   k1 = k; n1 = n;
1253   if (k < 0)  k = -k;
1254   if (k == 1) { m = sumdiv(n); goto fin; }
1255   v = vali(n); n = shifti(n,-v); setsigne(n,1);
1256   m = gen_1;
1257   while (v--)  m = addsi(1,shifti(m,k));
1258   if (is_pm1(n)) goto fin;
1259 
1260   lim = tridiv_bound(n,1);
1261   p = 2;
1262   while (p < lim)
1263   {
1264     int stop;
1265     NEXT_PRIME_VIADIFF(p,d);
1266     v = Z_lvalrem_stop(n, p, &stop);
1267     if (v)
1268     {
1269       long i;
1270       GEN pk = powuu(p,k), m1 = addsi(1,pk);
1271       for (i = 1; i < v; i++) m1 = addsi(1, mulii(pk,m1));
1272       m = mulii(m1,m);
1273     }
1274     if (stop)
1275     {
1276       if (!is_pm1(n)) m = mulii(m, addsi(1, powiu(n,k)));
1277       goto fin;
1278     }
1279   }
1280   if (BSW_psp(n)) { m = mulii(m, addsi(1, powiu(n,k))); goto fin; }
1281   m = mulii(m, ifac_sumdivk(n, k, decomp_default_hint));
1282  fin:
1283   if (k1 < 0) m = gdiv(m, powiu(n1,k));
1284   return gerepileupto(av,m);
1285 }
1286 
1287 /***********************************************************************/
1288 /**                                                                   **/
1289 /**                MISCELLANEOUS ARITHMETIC FUNCTIONS                 **/
1290 /**         (all of these ultimately depend on auxdecomp())           **/
1291 /**                                                                   **/
1292 /***********************************************************************/
1293 
1294 GEN
divisors(GEN n)1295 divisors(GEN n)
1296 {
1297   pari_sp av = avma;
1298   long i, j, l, tn = typ(n);
1299   ulong nbdiv;
1300   int isint = 1;
1301   GEN *d, *t, *t1, *t2, *t3, P, E, e;
1302 
1303   if (tn == t_MAT && lg(n) == 3)
1304   {
1305     P = gel(n,1); l = lg(P);
1306     for (i = 1; i < l; i++)
1307       if (typ(gel(P,i)) != t_INT) { isint = 0; break; }
1308   }
1309   else
1310   {
1311     if (tn == t_INT)
1312       n = auxdecomp(n,1);
1313     else {
1314       if (is_matvec_t(tn)) pari_err(typeer,"divisors");
1315       isint = 0;
1316       n = factor(n);
1317     }
1318     P = gel(n,1); l = lg(P);
1319   }
1320   E = gel(n,2);
1321   if (isint && l>1 && signe(P[1]) < 0) { E++; P++; l--; } /* skip -1 */
1322   e = cgetg(l, t_VECSMALL);
1323   nbdiv = 1;
1324   for (i=1; i<l; i++)
1325   {
1326     e[i] = itos(gel(E,i));
1327     if (e[i] < 0) pari_err(talker, "denominators not allowed in divisors");
1328     nbdiv = itou_or_0( muluu(nbdiv, 1+e[i]) );
1329   }
1330   if (!nbdiv || nbdiv & ~LGBITS)
1331     pari_err(talker, "too many divisors (more than %ld)", LGBITS - 1);
1332   d = t = (GEN*) cgetg(nbdiv+1,t_VEC);
1333   *++d = gen_1;
1334   if (isint)
1335   {
1336     for (i=1; i<l; i++)
1337       for (t1=t,j=e[i]; j; j--,t1=t2)
1338         for (t2=d, t3=t1; t3<t2; ) *++d = mulii(*++t3, gel(P,i));
1339     e = sort((GEN)t);
1340   } else {
1341     for (i=1; i<l; i++)
1342       for (t1=t,j=e[i]; j; j--,t1=t2)
1343         for (t2=d, t3=t1; t3<t2; ) *++d = gmul(*++t3, gel(P,i));
1344     e = (GEN)t;
1345   }
1346   return gerepileupto(av, e);
1347 }
1348 
1349 GEN
corepartial(GEN n,long all)1350 corepartial(GEN n, long all)
1351 {
1352   pari_sp av = avma;
1353   long i;
1354   GEN fa, P, E, c = gen_1;
1355 
1356   fa = auxdecomp(n,all);
1357   P = gel(fa,1);
1358   E = gel(fa,2);
1359   for (i=1; i<lg(P); i++)
1360     if (mod2(gel(E,i))) c = mulii(c,gel(P,i));
1361   return gerepileuptoint(av, c);
1362 }
1363 
1364 GEN
core2partial(GEN n,long all)1365 core2partial(GEN n, long all)
1366 {
1367   pari_sp av = avma;
1368   long i;
1369   GEN fa, P, E, c = gen_1, f = gen_1;
1370 
1371   fa = auxdecomp(n,all);
1372   P = gel(fa,1);
1373   E = gel(fa,2);
1374   for (i=1; i<lg(P); i++)
1375   {
1376     long e = itos(gel(E,i));
1377     if (e & 1)  c = mulii(c, gel(P,i));
1378     if (e != 1) f = mulii(f, gpowgs(gel(P,i), e >> 1));
1379   }
1380   return gerepilecopy(av, mkvec2(c,f));
1381 }
1382 
core(GEN n)1383 GEN core(GEN n)  { return corepartial(n,1); }
core2(GEN n)1384 GEN core2(GEN n) { return core2partial(n,1); }
1385 
1386 GEN
core0(GEN n,long flag)1387 core0(GEN n,long flag) { return flag? core2(n): core(n); }
1388 
1389 static long
_mod4(GEN c)1390 _mod4(GEN c) { long r = mod4(c); if (signe(c) < 0) r = 4-r; return r; }
1391 
1392 GEN
coredisc(GEN n)1393 coredisc(GEN n)
1394 {
1395   pari_sp av = avma;
1396   GEN c = core(n);
1397   if (_mod4(c)==1) return c;
1398   return gerepileuptoint(av, shifti(c,2));
1399 }
1400 
1401 GEN
coredisc2(GEN n)1402 coredisc2(GEN n)
1403 {
1404   pari_sp av = avma;
1405   GEN y = core2(n);
1406   GEN c = gel(y,1), f = gel(y,2);
1407   if (_mod4(c)==1) return y;
1408   y = cgetg(3,t_VEC);
1409   gel(y,1) = shifti(c,2);
1410   gel(y,2) = gmul2n(f,-1); return gerepileupto(av, y);
1411 }
1412 
1413 GEN
coredisc0(GEN n,long flag)1414 coredisc0(GEN n,long flag) { return flag? coredisc2(n): coredisc(n); }
1415 
1416 /*********************************************************************/
1417 /**                                                                 **/
1418 /**                       BINARY DECOMPOSITION                      **/
1419 /**                                                                 **/
1420 /*********************************************************************/
1421 
1422 INLINE GEN
inegate(GEN z)1423 inegate(GEN z) { return subsi(-1,z); }
1424 
1425 GEN
binaire(GEN x)1426 binaire(GEN x)
1427 {
1428   ulong m,u;
1429   long i,lx,ex,ly,tx=typ(x);
1430   GEN y,p1,p2;
1431 
1432   switch(tx)
1433   {
1434     case t_INT:
1435     {
1436       GEN xp=int_MSW(x);
1437       lx=lgefint(x);
1438       if (lx==2) return mkvec(gen_0);
1439       ly = BITS_IN_LONG+1; m=HIGHBIT; u=*xp;
1440       while (!(m & u)) { m>>=1; ly--; }
1441       y = cgetg(ly+((lx-3)<<TWOPOTBITS_IN_LONG),t_VEC); ly=1;
1442       do { gel(y,ly) = m & u ? gen_1 : gen_0; ly++; } while (m>>=1);
1443       for (i=3; i<lx; i++)
1444       {
1445         m=HIGHBIT; xp=int_precW(xp); u=*xp;
1446         do { gel(y,ly) = m & u ? gen_1 : gen_0; ly++; } while (m>>=1);
1447       }
1448       break;
1449     }
1450     case t_REAL:
1451       ex=expo(x);
1452       if (!signe(x))
1453       {
1454         lx=1+max(-ex,0); y=cgetg(lx,t_VEC);
1455         for (i=1; i<lx; i++) gel(y,i) = gen_0;
1456         return y;
1457       }
1458 
1459       lx=lg(x); y=cgetg(3,t_VEC);
1460       if (ex > bit_accuracy(lx)) pari_err(precer,"binary");
1461       p1 = cgetg(max(ex,0)+2,t_VEC);
1462       p2 = cgetg(bit_accuracy(lx)-ex,t_VEC);
1463       gel(y,1) = p1;
1464       gel(y,2) = p2;
1465       ly = -ex; ex++; m = HIGHBIT;
1466       if (ex<=0)
1467       {
1468         gel(p1,1) = gen_0; for (i=1; i <= -ex; i++) gel(p2,i) = gen_0;
1469         i=2;
1470       }
1471       else
1472       {
1473         ly=1;
1474         for (i=2; i<lx && ly<=ex; i++)
1475         {
1476           m=HIGHBIT; u=x[i];
1477           do
1478             { gel(p1,ly) = (m & u) ? gen_1 : gen_0; ly++; }
1479           while ((m>>=1) && ly<=ex);
1480         }
1481         ly=1;
1482         if (m) i--; else m=HIGHBIT;
1483       }
1484       for (; i<lx; i++)
1485       {
1486         u=x[i];
1487         do { gel(p2,ly) = m & u ? gen_1 : gen_0; ly++; } while (m>>=1);
1488         m=HIGHBIT;
1489       }
1490       break;
1491 
1492     case t_VEC: case t_COL: case t_MAT:
1493       lx=lg(x); y=cgetg(lx,tx);
1494       for (i=1; i<lx; i++) gel(y,i) = binaire(gel(x,i));
1495       break;
1496     default: pari_err(typeer,"binary");
1497       return NULL; /* not reached */
1498   }
1499   return y;
1500 }
1501 
1502 /* return 1 if bit n of x is set, 0 otherwise */
1503 long
bittest(GEN x,long n)1504 bittest(GEN x, long n)
1505 {
1506   ulong u;
1507   long l;
1508 
1509   if (!signe(x) || n < 0) return 0;
1510   if (signe(x) < 0)
1511   {
1512     pari_sp ltop=avma;
1513     long b = !bittest(inegate(x),n);
1514     avma=ltop;
1515     return b;
1516   }
1517   l = n>>TWOPOTBITS_IN_LONG;
1518   if (l+3 > lgefint(x)) return 0;
1519   u = (1UL << (n & (BITS_IN_LONG-1))) & *int_W(x,l);
1520   return u? 1: 0;
1521 }
1522 
1523 GEN
gbittest(GEN x,GEN n)1524 gbittest(GEN x, GEN n)
1525 {
1526   return arith_proto2gs(bittest,x,itos(n));
1527 }
1528 
1529 /***********************************************************************/
1530 /**                                                                   **/
1531 /**                          BITMAP OPS                               **/
1532 /** x & y (and), x | y (or), x ^ y (xor), ~x (neg), x & ~y (negimply) **/
1533 /**                                                                   **/
1534 /***********************************************************************/
1535 /* Truncate a non-negative integer to a number of bits.  */
1536 static GEN
ibittrunc(GEN x,long bits)1537 ibittrunc(GEN x, long bits)
1538 {
1539   long known_zero_words, xl = lgefint(x) - 2;
1540   long len_out = ((bits + BITS_IN_LONG - 1) >> TWOPOTBITS_IN_LONG);
1541 
1542   if (xl < len_out)
1543       return x;
1544       /* Check whether mask is trivial */
1545   if (!(bits & (BITS_IN_LONG - 1))) {
1546       if (xl == len_out)
1547           return x;
1548   } else if (len_out <= xl) {
1549     GEN xi = int_W(x, len_out-1);
1550     /* Non-trival mask is given by a formula, if x is not
1551        normalized, this works even in the exceptional case */
1552     *xi = *xi & ((1 << (bits & (BITS_IN_LONG - 1))) - 1);
1553     if (*xi && xl == len_out) return x;
1554   }
1555   /* Normalize */
1556   known_zero_words = xl - len_out;
1557   if (known_zero_words < 0) known_zero_words = 0;
1558   return int_normalize(x, known_zero_words);
1559 }
1560 
1561 GEN
gbitneg(GEN x,long bits)1562 gbitneg(GEN x, long bits)
1563 {
1564   const ulong uzero = 0;
1565   long xl, len_out, i;
1566 
1567   if (typ(x) != t_INT)
1568       pari_err(typeer, "bitwise negation");
1569   if (bits < -1)
1570       pari_err(talker, "negative exponent in bitwise negation");
1571   if (bits == -1) return inegate(x);
1572   if (bits == 0) return gen_0;
1573   if (signe(x) < 0) { /* Consider as if mod big power of 2 */
1574     pari_sp ltop = avma;
1575     return gerepileuptoint(ltop, ibittrunc(inegate(x), bits));
1576   }
1577   xl = lgefint(x);
1578   len_out = ((bits + BITS_IN_LONG - 1) >> TWOPOTBITS_IN_LONG) + 2;
1579   if (len_out > xl) { /* Need to grow */
1580     GEN out, outp, xp = int_MSW(x);
1581     out = cgeti(len_out); out[1] = evalsigne(1) | evallgefint(len_out);
1582     outp = int_MSW(out);
1583     if (!(bits & (BITS_IN_LONG - 1)))
1584       *outp = ~uzero;
1585     else
1586       *outp = (1 << (bits & (BITS_IN_LONG - 1))) - 1;
1587     for (i = 3; i < len_out - xl + 2; i++)
1588     {
1589       outp = int_precW(outp); *outp = ~uzero;
1590     }
1591     for (     ; i < len_out; i++)
1592     {
1593       outp = int_precW(outp); *outp = ~*xp;
1594       xp   = int_precW(xp);
1595     }
1596     return out;
1597   }
1598   x = icopy(x);
1599   for (i = 2; i < xl; i++) x[i] = ~x[i];
1600   return ibittrunc(int_normalize(x,0), bits);
1601 }
1602 
1603 /* bitwise 'and' of two positive integers (any integers, but we ignore sign).
1604  * Inputs are not necessary normalized. */
1605 GEN
ibitand(GEN x,GEN y)1606 ibitand(GEN x, GEN y)
1607 {
1608   long lx, ly, lout;
1609   long *xp, *yp, *outp;
1610   GEN out;
1611   long i;
1612 
1613   if (!signe(x) || !signe(y)) return gen_0;
1614   lx=lgefint(x); ly=lgefint(y);
1615   lout = min(lx,ly); /* > 2 */
1616   xp = int_LSW(x);
1617   yp = int_LSW(y);
1618   out = cgeti(lout); out[1] = evalsigne(1) | evallgefint(lout);
1619   outp = int_LSW(out);
1620   for (i=2; i<lout; i++)
1621   {
1622     *outp = (*xp) & (*yp);
1623     outp  = int_nextW(outp);
1624     xp    = int_nextW(xp);
1625     yp    = int_nextW(yp);
1626   }
1627   if ( !*int_MSW(out) ) out = int_normalize(out, 1);
1628   return out;
1629 }
1630 
1631 /* bitwise 'or' of absolute values of two integers */
1632 GEN
ibitor(GEN x,GEN y)1633 ibitor(GEN x, GEN y)
1634 {
1635   long lx, ly;
1636   long *xp, *yp, *outp;
1637   GEN  out;
1638   long i;
1639   if (!signe(x)) return absi(y);
1640   if (!signe(y)) return absi(x);
1641 
1642   lx = lgefint(x); xp = int_LSW(x);
1643   ly = lgefint(y); yp = int_LSW(y);
1644   if (lx < ly) swapspec(xp,yp,lx,ly);
1645   /* lx > 2 */
1646   out = cgeti(lx); out[1] = evalsigne(1) | evallgefint(lx);
1647   outp = int_LSW(out);
1648   for (i=2;i<ly;i++)
1649   {
1650     *outp = (*xp) | (*yp);
1651     outp  = int_nextW(outp);
1652     xp    = int_nextW(xp);
1653     yp    = int_nextW(yp);
1654   }
1655   for (   ;i<lx;i++)
1656   {
1657     *outp = *xp;
1658     outp  = int_nextW(outp);
1659     xp    = int_nextW(xp);
1660   }
1661   /* If input is normalized, this is not needed */
1662   if ( !*int_MSW(out) ) out = int_normalize(out, 1);
1663   return out;
1664 }
1665 
1666 /* bitwise 'xor' of absolute values of two integers */
1667 GEN
ibitxor(GEN x,GEN y)1668 ibitxor(GEN x, GEN y)
1669 {
1670   long lx, ly;
1671   long *xp, *yp, *outp;
1672   GEN  out;
1673   long i;
1674   if (!signe(x)) return absi(y);
1675   if (!signe(y)) return absi(x);
1676 
1677   lx = lgefint(x); xp = int_LSW(x);
1678   ly = lgefint(y); yp = int_LSW(y);
1679   if (lx < ly) swapspec(xp,yp,lx,ly);
1680   /* lx > 2 */
1681   out = cgeti(lx); out[1] = evalsigne(1) | evallgefint(lx);
1682   outp = int_LSW(out);
1683   for (i=2;i<ly;i++)
1684   {
1685     *outp = (*xp) ^ (*yp);
1686     outp  = int_nextW(outp);
1687     xp    = int_nextW(xp);
1688     yp    = int_nextW(yp);
1689   }
1690   for (   ;i<lx;i++)
1691   {
1692     *outp = *xp;
1693     outp  = int_nextW(outp);
1694     xp    = int_nextW(xp);
1695   }
1696   if ( !*int_MSW(out) ) out = int_normalize(out, 1);
1697   return out;
1698 }
1699 
1700 /* bitwise 'negimply' of absolute values of two integers */
1701 /* "negimply(x,y)" is ~(x => y) == ~(~x | y) == x & ~y   */
1702 GEN
ibitnegimply(GEN x,GEN y)1703 ibitnegimply(GEN x, GEN y)
1704 {
1705   long lx, ly, lin;
1706   long *xp, *yp, *outp;
1707   GEN out;
1708   long i;
1709   if (!signe(x)) return gen_0;
1710   if (!signe(y)) return absi(x);
1711 
1712   lx = lgefint(x); xp = int_LSW(x);
1713   ly = lgefint(y); yp = int_LSW(y);
1714   lin = min(lx,ly);
1715   out = cgeti(lx); out[1] = evalsigne(1) | evallgefint(lx);
1716   outp = int_LSW(out);
1717   for (i=2; i<lin; i++)
1718   {
1719     *outp = (*xp) & ~(*yp);
1720     outp  = int_nextW(outp);
1721     xp    = int_nextW(xp);
1722     yp    = int_nextW(yp);
1723   }
1724   for (   ;i<lx;i++)
1725   {
1726     *outp = *xp;
1727     outp  = int_nextW(outp);
1728     xp    = int_nextW(xp);
1729   }
1730   if ( !*int_MSW(out) ) out = int_normalize(out, 1);
1731   return out;
1732 }
1733 
1734 #define signs(x,y) (((signe(x) >= 0) << 1) | (signe(y) >= 0))
1735 
1736 GEN
gbitor(GEN x,GEN y)1737 gbitor(GEN x, GEN y)
1738 {
1739   pari_sp ltop = avma;
1740   GEN z;
1741 
1742   if (typ(x) != t_INT || typ(y) != t_INT) pari_err(typeer, "bitwise or");
1743   switch (signs(x, y))
1744   {
1745     case 3: /*1,1*/
1746       return ibitor(x,y);
1747     case 2: /*1,-1*/
1748       z = ibitnegimply(inegate(y),x);
1749       break;
1750     case 1: /*-1,1*/
1751       z = ibitnegimply(inegate(x),y);
1752       break;
1753     case 0: /*-1,-1*/
1754       z = ibitand(inegate(x),inegate(y));
1755       break;
1756     default: return NULL;
1757   }
1758   return gerepileuptoint(ltop, inegate(z));
1759 }
1760 
1761 GEN
gbitand(GEN x,GEN y)1762 gbitand(GEN x, GEN y)
1763 {
1764   pari_sp ltop = avma;
1765   GEN z;
1766 
1767   if (typ(x) != t_INT || typ(y) != t_INT) pari_err(typeer, "bitwise and");
1768   switch (signs(x, y))
1769   {
1770     case 3: /*1,1*/
1771       return ibitand(x,y);
1772     case 2: /*1,-1*/
1773       z = ibitnegimply(x,inegate(y));
1774       break;
1775     case 1: /*-1,1*/
1776       z = ibitnegimply(y,inegate(x));
1777       break;
1778     case 0: /*-1,-1*/
1779       z = inegate(ibitor(inegate(x),inegate(y)));
1780       break;
1781     default: return NULL;
1782   }
1783   return gerepileuptoint(ltop, z);
1784 }
1785 
1786 GEN
gbitxor(GEN x,GEN y)1787 gbitxor(GEN x, GEN y)
1788 {
1789   pari_sp ltop = avma;
1790   GEN z;
1791 
1792   if (typ(x) != t_INT || typ(y) != t_INT) pari_err(typeer, "bitwise xor");
1793   switch (signs(x, y))
1794   {
1795     case 3: /*1,1*/
1796       return ibitxor(x,y);
1797     case 2: /*1,-1*/
1798       z = inegate(ibitxor(x,inegate(y)));
1799       break;
1800     case 1: /*-1,1*/
1801       z = inegate(ibitxor(inegate(x),y));
1802       break;
1803     case 0: /*-1,-1*/
1804       z = ibitxor(inegate(x),inegate(y));
1805       break;
1806     default: return NULL;
1807   }
1808   return gerepileuptoint(ltop,z);
1809 }
1810 
1811 /* x & ~y */
1812 GEN
gbitnegimply(GEN x,GEN y)1813 gbitnegimply(GEN x, GEN y)
1814 {
1815   pari_sp ltop = avma;
1816   GEN z;
1817 
1818   if (typ(x) != t_INT || typ(y) != t_INT) pari_err(typeer, "bitwise negated imply");
1819   switch (signs(x, y))
1820   {
1821     case 3: /*1,1*/
1822       return ibitnegimply(x,y);
1823     case 2: /*1,-1*/
1824       z = ibitand(x,inegate(y));
1825       break;
1826     case 1: /*-1,1*/
1827       z = inegate(ibitor(y,inegate(x)));
1828       break;
1829     case 0: /*-1,-1*/
1830       z = ibitnegimply(inegate(y),inegate(x));
1831       break;
1832     default: return NULL;
1833   }
1834   return gerepileuptoint(ltop,z);
1835 }
1836