1 /* $Id: arith2.c 8535 2007-04-03 12:24:25Z bill $
2
3 Copyright (C) 2000 The PARI group.
4
5 This file is part of the PARI/GP package.
6
7 PARI/GP is free software; you can redistribute it and/or modify it under the
8 terms of the GNU General Public License as published by the Free Software
9 Foundation. It is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY WHATSOEVER.
11
12 Check the License for details. You should have received a copy of it, along
13 with the package; see the file 'COPYING'. If not, write to the Free Software
14 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
15
16 /*********************************************************************/
17 /** **/
18 /** ARITHMETIC FUNCTIONS **/
19 /** (second part) **/
20 /** **/
21 /*********************************************************************/
22 #include "pari.h"
23 #include "paripriv.h"
24 /***********************************************************************/
25 /** **/
26 /** PRIME NUMBERS **/
27 /** **/
28 /***********************************************************************/
29
30 /* assume all primes up to 59359 are precomputed */
31 GEN
prime(long n)32 prime(long n)
33 {
34 byteptr p;
35 ulong prime;
36
37 if (n <= 0) pari_err(talker, "n-th prime meaningless if n = %ld",n);
38 if (n < 1000) {
39 p = diffptr;
40 prime = 0;
41 } else if (n < 2000) {
42 n -= 1000; p = diffptr+1000;
43 prime = 7919;
44 } else if (n < 3000) {
45 n -= 2000; p = diffptr+2000;
46 prime = 17389;
47 } else if (n < 4000) {
48 n -= 3000; p = diffptr+3000;
49 prime = 27449;
50 } else if (n < 5000) {
51 n -= 4000; p = diffptr+4000;
52 prime = 37813;
53 } else if (n < 6000) {
54 n -= 5000; p = diffptr+5000;
55 prime = 48611;
56 } else if (n < 10000 || maxprime() < 500000) {
57 n -= 6000; p = diffptr+6000;
58 prime = 59359;
59 } else if (n < 20000) {
60 n -= 10000; p = diffptr+10000;
61 prime = 104729;
62 } else if (n < 30000) {
63 n -= 20000; p = diffptr+20000;
64 prime = 224737;
65 } else if (n < 40000) {
66 n -= 30000; p = diffptr+30000;
67 prime = 350377;
68 } else {
69 n -= 40000; p = diffptr+40000;
70 prime = 479909;
71 }
72 while (n--) NEXT_PRIME_VIADIFF_CHECK(prime,p);
73 return utoipos(prime);
74 }
75
76 GEN
primepi(GEN x)77 primepi(GEN x)
78 {
79 pari_sp av = avma;
80 byteptr p = diffptr;
81 ulong prime = 0, res = 0, n;
82 GEN N = typ(x) == t_INT? x: gfloor(x);
83
84 if (typ(N) != t_INT || signe(N) <= 0) pari_err(typeer, "primepi");
85 avma = av; n = itou(N); maxprime_check(n);
86 while (prime <= n) { res++; NEXT_PRIME_VIADIFF(prime,p); }
87 return utoi(res-1);
88 }
89
90 GEN
primes(long n)91 primes(long n)
92 {
93 byteptr p = diffptr;
94 ulong prime = 0;
95 GEN y,z;
96
97 if (n < 0) n = 0;
98 z = y = cgetg(n+1,t_VEC);
99 while (n--)
100 {
101 NEXT_PRIME_VIADIFF_CHECK(prime,p);
102 gel(++z, 0) = utoi(prime);
103 }
104 return y;
105 }
106
107 /* Building/Rebuilding the diffptr table. The actual work is done by the
108 * following two subroutines; the user entry point is the function
109 * initprimes() below. initprimes1() is the old algorithm, called when
110 * maxnum (size) is moderate. */
111 static byteptr
initprimes1(ulong size,long * lenp,long * lastp)112 initprimes1(ulong size, long *lenp, long *lastp)
113 {
114 long k;
115 byteptr q, r, s, fin, p = (byteptr) gpmalloc(size+2);
116
117 memset(p, 0, size + 2); fin = p + size;
118 for (r=q=p,k=1; r<=fin; )
119 {
120 do { r+=k; k+=2; r+=k; } while (*++q);
121 for (s=r; s<=fin; s+=k) *s = 1;
122 }
123 r = p; *r++ = 2; *r++ = 1; /* 2 and 3 */
124 for (s=q=r-1; ; s=q)
125 {
126 do q++; while (*q);
127 if (q > fin) break;
128 *r++ = (unsigned char) ((q-s) << 1);
129 }
130 *r++ = 0;
131 *lenp = r - p;
132 *lastp = ((s - p) << 1) + 1;
133 return (byteptr) gprealloc(p,r-p);
134 }
135
136 /* Timing in ms (Athlon/850; reports 512K of secondary cache; looks
137 like there is 64K of quickier cache too).
138
139 arena| 30m 100m 300m 1000m 2000m <-- primelimit
140 =================================================
141 16K 1.1053 1.1407 1.2589 1.4368 1.6086
142 24K 1.0000 1.0625 1.1320 1.2443 1.3095
143 32K 1.0000 1.0469 1.0761 1.1336 1.1776
144 48K 1.0000 1.0000 1.0254 1.0445 1.0546
145 50K 1.0000 1.0000 1.0152 1.0345 1.0464
146 52K 1.0000 1.0000 1.0203 1.0273 1.0362
147 54K 1.0000 1.0000 1.0812 1.0216 1.0281
148 56K 1.0526 1.0000 1.0051 1.0144 1.0205
149 58K 1.0000 1.0000 1.0000 1.0086 1.0123
150 60K 0.9473 0.9844 1.0051 1.0014 1.0055
151 62K 1.0000 0.9844 0.9949 0.9971 0.9993
152 64K 1.0000 1.0000 1.0000 1.0000 1.0000
153 66K 1.2632 1.2187 1.2183 1.2055 1.1953
154 68K 1.4211 1.4844 1.4721 1.4425 1.4188
155 70K 1.7368 1.7188 1.7107 1.6767 1.6421
156 72K 1.9474 1.9531 1.9594 1.9023 1.8573
157 74K 2.2105 2.1875 2.1827 2.1207 2.0650
158 76K 2.4211 2.4219 2.4010 2.3305 2.2644
159 78K 2.5789 2.6250 2.6091 2.5330 2.4571
160 80K 2.8421 2.8125 2.8223 2.7213 2.6380
161 84K 3.1053 3.1875 3.1776 3.0819 2.9802
162 88K 3.5263 3.5312 3.5228 3.4124 3.2992
163 92K 3.7895 3.8438 3.8375 3.7213 3.5971
164 96K 4.0000 4.1093 4.1218 3.9986 3.9659
165 112K 4.3684 4.5781 4.5787 4.4583 4.6115
166 128K 4.7368 4.8750 4.9188 4.8075 4.8997
167 192K 5.5263 5.7188 5.8020 5.6911 5.7064
168 256K 6.0000 6.2187 6.3045 6.1954 6.1033
169 384K 6.7368 6.9531 7.0405 6.9181 6.7912
170 512K 7.3158 7.5156 7.6294 7.5000 7.4654
171 768K 9.1579 9.4531 9.6395 9.5014 9.1075
172 1024K 10.368 10.7497 10.9999 10.878 10.8201
173 1536K 12.579 13.3124 13.7660 13.747 13.4739
174 2048K 13.737 14.4839 15.0509 15.151 15.1282
175 3076K 14.789 15.5780 16.2993 16.513 16.3365
176
177 Now the same number relative to the model
178
179 (1 + 0.36*sqrt(primelimit)/arena) * (arena <= 64 ? 1.05 : (arena-64)**0.38)
180
181 [SLOW2_IN_ROOTS = 0.36, ALPHA = 0.38]
182
183 arena| 30m 100m 300m 1000m 2000m <-- primelimit
184 =================================================
185 16K 1.014 0.9835 0.9942 0.9889 1.004
186 24K 0.9526 0.9758 0.9861 0.9942 0.981
187 32K 0.971 0.9939 0.9884 0.9849 0.9806
188 48K 0.9902 0.9825 0.996 0.9945 0.9885
189 50K 0.9917 0.9853 0.9906 0.9926 0.9907
190 52K 0.9932 0.9878 0.9999 0.9928 0.9903
191 54K 0.9945 0.9902 1.064 0.9939 0.9913
192 56K 1.048 0.9924 0.9925 0.993 0.9921
193 58K 0.9969 0.9945 0.9909 0.9932 0.9918
194 60K 0.9455 0.9809 0.9992 0.9915 0.9923
195 62K 0.9991 0.9827 0.9921 0.9924 0.9929
196 64K 1 1 1 1 1
197 66K 1.02 0.9849 0.9857 0.9772 0.9704
198 68K 0.8827 0.9232 0.9176 0.9025 0.8903
199 70K 0.9255 0.9177 0.9162 0.9029 0.8881
200 72K 0.9309 0.936 0.9429 0.9219 0.9052
201 74K 0.9715 0.9644 0.967 0.9477 0.9292
202 76K 0.9935 0.9975 0.9946 0.9751 0.9552
203 78K 0.9987 1.021 1.021 1.003 0.9819
204 80K 1.047 1.041 1.052 1.027 1.006
205 84K 1.052 1.086 1.092 1.075 1.053
206 88K 1.116 1.125 1.133 1.117 1.096
207 92K 1.132 1.156 1.167 1.155 1.134
208 96K 1.137 1.177 1.195 1.185 1.196
209 112K 1.067 1.13 1.148 1.15 1.217
210 128K 1.04 1.083 1.113 1.124 1.178
211 192K 0.9368 0.985 1.025 1.051 1.095
212 256K 0.8741 0.9224 0.9619 0.995 1.024
213 384K 0.8103 0.8533 0.8917 0.9282 0.9568
214 512K 0.7753 0.8135 0.8537 0.892 0.935
215 768K 0.8184 0.8638 0.9121 0.9586 0.9705
216 1024K 0.8241 0.8741 0.927 0.979 1.03
217 1536K 0.8505 0.9212 0.9882 1.056 1.096
218 2048K 0.8294 0.8954 0.9655 1.041 1.102
219
220 */
221
222 #ifndef SLOW2_IN_ROOTS
223 /* SLOW2_IN_ROOTS below 3: some slowdown starts to be noticable
224 * when things fit into the cache on Sparc.
225 * XXX The choice of 2.6 gives a slowdown of 1-2% on UltraSparcII,
226 * but makes calculations for "maximum" of 436273009 (not applicable anymore)
227 * fit into 256K cache (still common for some architectures).
228 *
229 * One may change it when small caches become uncommon, but the gain
230 * is not going to be very noticable... */
231 # ifdef i386 /* gcc defines this? */
232 # define SLOW2_IN_ROOTS 0.36
233 # else
234 # define SLOW2_IN_ROOTS 2.6
235 # endif
236 #endif
237 #ifndef CACHE_ARENA
238 # ifdef i386 /* gcc defines this? */
239 /* Due to smaller SLOW2_IN_ROOTS, smaller arena is OK; fit L1 cache */
240 # define CACHE_ARENA (63 * 1024UL) /* No slowdown even with 64K L1 cache */
241 # else
242 # define CACHE_ARENA (200 * 1024UL) /* No slowdown even with 256K L2 cache */
243 # endif
244 #endif
245
246 #define CACHE_ALPHA (0.38) /* Cache performance model parameter */
247 #define CACHE_CUTOFF (0.018) /* Cache performance not smooth here */
248
249 static double slow2_in_roots = SLOW2_IN_ROOTS;
250
251 typedef struct {
252 ulong arena;
253 double power;
254 double cutoff;
255 } cache_model_t;
256
257 static cache_model_t cache_model = { CACHE_ARENA, CACHE_ALPHA, CACHE_CUTOFF };
258
259 /* Assume that some calculation requires a chunk of memory to be
260 accessed often in more or less random fashion (as in sieving).
261 Assume that the calculation can be done in steps by subdividing the
262 chunk into smaller subchunks (arenas) and treathing them
263 separately. Assume that the overhead of subdivision is equivalent
264 to the number of arenas.
265
266 Find an optimal size of the arena taking into account the overhead
267 of subdivision, and the overhead of arena not fitting into the
268 cache. Assume that arenas of size slow2_in_roots slows down the
269 calculation 2x (comparing to very big arenas; when cache hits do
270 not matter). Since cache performance varies wildly with
271 architecture, load, and wheather (especially with cache coloring
272 enabled), use an idealized cache model based on benchmarks above.
273
274 Assume that an independent region of FIXED_TO_CACHE bytes is accessed
275 very often concurrently with the arena access.
276 */
277 static ulong
good_arena_size(ulong slow2_size,ulong total,ulong fixed_to_cache,cache_model_t * cache_model,long model_type)278 good_arena_size(ulong slow2_size, ulong total, ulong fixed_to_cache,
279 cache_model_t *cache_model, long model_type)
280 {
281 ulong asize, cache_arena = cache_model->arena;
282 double Xmin, Xmax, A, B, C1, C2, D, V;
283 double alpha = cache_model->power, cut_off = cache_model->cutoff;
284
285 /* Estimated relative slowdown,
286 with overhead = max((fixed_to_cache+arena)/cache_arena - 1, 0):
287
288 1 + slow2_size/arena due to initialization overhead;
289
290 max(1, 4.63 * overhead^0.38 ) due to footprint > cache size.
291
292 [The latter is hard to substantiate theoretically, but this
293 function describes benchmarks pretty close; it does not hurt that
294 one can minimize it explicitly too ;-). The switch between
295 diffenent choices of max() happens whe overhead=0.018.]
296
297 Thus the problem is minimizing (1 + slow2_size/arena)*overhead**0.29.
298 This boils down to F=((X+A)/(X+B))X^alpha, X=overhead,
299 B = (1 - fixed_to_cache/cache_arena), A = B +
300 slow2_size/cache_arena, alpha = 0.38, and X>=0.018, X>-B. It
301 turns out that the minimization problem is not very nasty. (As
302 usual with minimization problems which depend on parameters,
303 different values of A,B lead to different algorithms. However,
304 the boundaries between the domains in (A,B) plane where different
305 algorithms work are explicitly calculatable.)
306
307 Thus we need to find the rightmost root of (X+A)*(X+B) -
308 alpha(A-B)X to the right of 0.018 (if such exists and is below
309 Xmax). Then we manually check the remaining region [0, 0.018].
310
311 Since we cannot trust the purely-experimental cache-hit slowdown
312 function, as a sanity check always prefer fitting into the
313 cache (or "almost fitting") if F-law predicts that the larger
314 value of the arena provides less than 10% speedup.
315
316 */
317
318 if (model_type != 0)
319 pari_err(talker, "unsupported type of cache model"); /* Future expansion */
320
321 /* The simplest case: we fit into cache */
322 if (total + fixed_to_cache <= cache_arena)
323 return total;
324 /* The simple case: fitting into cache doesn't slow us down more than 10% */
325 if (cache_arena - fixed_to_cache > 10 * slow2_size) {
326 asize = cache_arena - fixed_to_cache;
327 if (asize > total) asize = total; /* Automatically false... */
328 return asize;
329 }
330 /* Slowdown of not fitting into cache is significant. Try to optimize.
331 Do not be afraid to spend some time on optimization - in trivial
332 cases we do not reach this point; any gain we get should
333 compensate the time spent on optimization. */
334
335 B = (1 - ((double)fixed_to_cache)/cache_arena);
336 A = B + ((double)slow2_size)/cache_arena;
337 C2 = A*B;
338 C1 = (A + B - 1/alpha*(A - B))/2;
339 D = C1*C1 - C2;
340 if (D > 0)
341 V = cut_off*cut_off + 2*C1*cut_off + C2; /* Value at CUT_OFF */
342 else
343 V = 0; /* Peacify the warning */
344 Xmin = cut_off;
345 Xmax = ((double)total - fixed_to_cache)/cache_arena; /* Two candidates */
346
347 if ( D <= 0 || (V >= 0 && C1 + cut_off >= 0) ) /* slowdown increasing */
348 Xmax = cut_off; /* Only one candidate */
349 else if (V >= 0 && /* slowdown concave down */
350 ((Xmax + C1) <= 0 || (Xmax*Xmax + 2*C1*Xmax + C2) <= 0))
351 /* DO NOTHING */; /* Keep both candidates */
352 else if (V <= 0 && (Xmax*Xmax + 2*C1*Xmax + C2) <= 0) /* slowdown decreasing */
353 Xmin = cut_off; /* Only one candidate */
354 else /* Now we know: 2 roots, the largest is in CUT_OFF..Xmax */
355 Xmax = sqrt(D) - C1;
356 if (Xmax != Xmin) { /* Xmin == CUT_OFF; Check which one is better */
357 double v1 = (cut_off + A)/(cut_off + B);
358 double v2 = 2.33 * (Xmax + A)/(Xmax + B) * pow(Xmax, alpha);
359
360 if (1.1 * v2 >= v1) /* Prefer fitting into the cache if slowdown < 10% */
361 V = v1;
362 else {
363 Xmin = Xmax;
364 V = v2;
365 }
366 } else if (B > 0) /* We need V */
367 V = 2.33 * (Xmin + A)/(Xmin + B) * pow(Xmin, alpha);
368 if (B > 0 && 1.1 * V > A/B) /* Now Xmin is the minumum. Compare with 0 */
369 Xmin = 0;
370
371 asize = (ulong)((1 + Xmin)*cache_arena - fixed_to_cache);
372 if (asize > total) asize = total; /* May happen due to approximations */
373 return asize;
374 }
375
376 /* Use as in
377 install(set_optimize,lLDG) \\ Through some M too?
378 set_optimize(2,1) \\ disable dependence on limit
379 \\ 1: how much cache usable, 2: slowdown of setup, 3: alpha, 4: cutoff
380 \\ 2,3,4 are in units of 0.001
381
382 { time_primes_arena(ar,limit) = \\ ar = arena size in K
383 set_optimize(1,floor(ar*1024));
384 default(primelimit, 200 000); \\ 100000 results in *larger* malloc()!
385 gettime;
386 default(primelimit, floor(limit));
387 if(ar >= 1, ar=floor(ar));
388 print("arena "ar"K => "gettime"ms");
389 }
390 */
391 long
set_optimize(long what,GEN g)392 set_optimize(long what, GEN g)
393 {
394 long ret = 0;
395
396 switch (what) {
397 case 1:
398 ret = (long)cache_model.arena;
399 break;
400 case 2:
401 ret = (long)(slow2_in_roots * 1000);
402 break;
403 case 3:
404 ret = (long)(cache_model.power * 1000);
405 break;
406 case 4:
407 ret = (long)(cache_model.cutoff * 1000);
408 break;
409 default:
410 pari_err(talker, "panic: set_optimize");
411 break;
412 }
413 if (g != NULL) {
414 ulong val = itou(g);
415
416 switch (what) {
417 case 1: cache_model.arena = val; break;
418 case 2: slow2_in_roots = (double)val / 1000.; break;
419 case 3: cache_model.power = (double)val / 1000.; break;
420 case 4: cache_model.cutoff = (double)val / 1000.; break;
421 }
422 }
423 return ret;
424 }
425
426 static void
sieve_chunk(byteptr known_primes,ulong s,byteptr data,ulong count)427 sieve_chunk(byteptr known_primes, ulong s, byteptr data, ulong count)
428 { /* start must be odd;
429 prime differences (starting from (5-3)=2) start at known_primes[2],
430 are terminated by a 0 byte;
431
432 Checks cnt odd numbers starting at `start', setting bytes
433 starting at data to 0 or 1 depending on whether the
434 corresponding odd number is prime or not */
435 ulong p;
436 byteptr q;
437 register byteptr write_to = data; /* Better code with gcc 2.8.1 */
438 register ulong cnt = count; /* Better code with gcc 2.8.1 */
439 register ulong start = s; /* Better code with gcc 2.8.1 */
440 register ulong delta = 1; /* Better code with gcc 2.8.1 */
441
442 memset(data, 0, cnt);
443 start >>= 1; /* (start - 1)/2 */
444 start += cnt; /* Corresponds to the end */
445 cnt -= 1;
446 /* data corresponds to start. q runs over primediffs. */
447 /* Don't care about DIFFPTR_SKIP: false positives provide no problem */
448 for (q = known_primes + 1, p = 3; delta; delta = *++q, p += delta) {
449 /* first odd number which is >= start > p and divisible by p
450 = last odd number which is <= start + 2p - 1 and 0 (mod p)
451 = p + the last even number which is <= start + p - 1 and 0 (mod p)
452 = p + the last even number which is <= start + p - 2 and 0 (mod p)
453 = p + start + p - 2 - (start + p - 2) % 2p
454 = start + 2(p - 1 - ((start-1)/2 + (p-1)/2) % p). */
455 long off = cnt - ((start+(p>>1)) % p);
456
457 while (off >= 0) {
458 write_to[off] = 1;
459 off -= p;
460 }
461 }
462 }
463
464 /* Do not inline sieve_chunk()! It fits into registers in ix86 non-inlined */
465 void (*sieve_chunk_p)(byteptr known_primes, ulong s, byteptr data, ulong count)
466 = sieve_chunk;
467
468 /* Here's the workhorse. This is recursive, although normally the first
469 recursive call will bottom out and invoke initprimes1() at once.
470 (Not static; might conceivably be useful to someone in library mode) */
471 byteptr
initprimes0(ulong maxnum,long * lenp,ulong * lastp)472 initprimes0(ulong maxnum, long *lenp, ulong *lastp)
473 {
474 long size, alloced, psize;
475 byteptr q, fin, p, p1, fin1, plast, curdiff;
476 ulong last, remains, curlow, rootnum, asize;
477 ulong prime_above = 3;
478 byteptr p_prime_above;
479
480 if (maxnum <= 1ul<<17) /* Arbitrary. */
481 return initprimes1(maxnum>>1, lenp, (long*)lastp); /* Break recursion */
482
483 maxnum |= 1; /* make it odd. */
484
485 /* Checked to be enough up to 40e6, attained at 155893 */
486 /* Due to multibyte representation of large gaps, this estimate will
487 be broken by large enough maxnum. However, assuming exponential
488 distribution of gaps with the average log(n), we are safe up to
489 circa exp(-256/log(1/0.09)) = 1.5e46. OK with LONG_BITS <= 128. ;-) */
490 size = (long) (1.09 * maxnum/log((double)maxnum)) + 146;
491 p1 = (byteptr) gpmalloc(size);
492 rootnum = (ulong) sqrt((double)maxnum); /* cast it back to a long */
493 rootnum |= 1;
494 {
495 byteptr p2 = initprimes0(rootnum, &psize, &last); /* recursive call */
496 memcpy(p1, p2, psize); free(p2);
497 }
498 fin1 = p1 + psize - 1;
499 remains = (maxnum - rootnum) >> 1; /* number of odd numbers to check */
500
501 /* Actually, we access primes array of psize too; but we access it
502 consequently, thus we do not include it in fixed_to_cache */
503 asize = good_arena_size((ulong)(rootnum * slow2_in_roots), remains + 1, 0,
504 &cache_model, 0) - 1;
505 /* enough room on the stack ? */
506 alloced = (((byteptr)avma) <= ((byteptr)bot) + asize);
507 if (alloced)
508 p = (byteptr) gpmalloc(asize + 1);
509 else
510 p = (byteptr) bot;
511 fin = p + asize; /* the 0 sentinel goes at fin. */
512 curlow = rootnum + 2; /* First candidate: know primes up to rootnum (odd). */
513 curdiff = fin1;
514
515 /* During each iteration p..fin-1 represents a range of odd
516 numbers. plast is a pointer which represents the last prime seen,
517 it may point before p..fin-1. */
518 plast = p - ((rootnum - last) >> 1) - 1;
519 p_prime_above = p1 + 2;
520 while (remains) /* Cycle over arenas. Performance is not crucial */
521 {
522 unsigned char was_delta;
523
524 if (asize > remains) {
525 asize = remains;
526 fin = p + asize;
527 }
528 /* Fake the upper limit appropriate for the given arena */
529 while (prime_above*prime_above <= curlow + (asize << 1) && *p_prime_above)
530 prime_above += *p_prime_above++;
531 was_delta = *p_prime_above;
532 *p_prime_above = 0; /* Put a 0 sentinel for sieve_chunk */
533
534 (*sieve_chunk_p)(p1, curlow, p, asize);
535
536 *p_prime_above = was_delta; /* Restore */
537 p[asize] = 0; /* Put a 0 sentinel for ZZZ */
538 /* now q runs over addresses corresponding to primes */
539 for (q = p; ; plast = q++)
540 {
541 long d;
542
543 while (*q) q++; /* use ZZZ 0-sentinel at end */
544 if (q >= fin) break;
545 d = (q - plast) << 1;
546 while (d >= DIFFPTR_SKIP)
547 *curdiff++ = DIFFPTR_SKIP, d -= DIFFPTR_SKIP;
548 *curdiff++ = (unsigned char)d;
549 }
550 plast -= asize;
551 remains -= asize;
552 curlow += (asize<<1);
553 } /* while (remains) */
554 last = curlow - ((p - plast) << 1);
555 *curdiff++ = 0; /* sentinel */
556 *lenp = curdiff - p1;
557 *lastp = last;
558 if (alloced) free(p);
559 return (byteptr) gprealloc(p1, *lenp);
560 }
561 #if 0 /* not yet... GN */
562 /* The diffptr table will contain at least 6548 entries (up to and including
563 the 6547th prime, 65557, and the terminal null byte), because the isprime/
564 small-factor-extraction machinery wants to depend on everything up to 65539
565 being in the table, and we might as well go to a multiple of 4 Bytes.--GN */
566
567 void
568 init_tinyprimes_tridiv(byteptr p); /* in ifactor2.c */
569 #endif
570
571 static ulong _maxprime = 0;
572
573 ulong
maxprime(void)574 maxprime(void) { return _maxprime; }
575
576 void
maxprime_check(ulong c)577 maxprime_check(ulong c)
578 {
579 if (_maxprime < c) pari_err(primer2, c);
580 }
581
582 byteptr
initprimes(ulong maxnum)583 initprimes(ulong maxnum)
584 {
585 long len;
586 ulong last;
587 byteptr p;
588 /* The algorithm must see the next prime beyond maxnum, whence the +512. */
589 ulong maxnum1 = ((maxnum<65302?65302:maxnum)+512ul);
590
591 if ((maxnum>>1) > VERYBIGINT - 1024)
592 pari_err(talker, "Too large primelimit");
593 p = initprimes0(maxnum1, &len, &last);
594 #if 0 /* not yet... GN */
595 static int build_the_tables = 1;
596 if (build_the_tables) { init_tinyprimes_tridiv(p); build_the_tables=0; }
597 #endif
598 _maxprime = last; return p;
599 }
600
601 static void
cleanprimetab(void)602 cleanprimetab(void)
603 {
604 long i,j, lp = lg(primetab);
605
606 for (i=j=1; i < lp; i++)
607 if (primetab[i]) primetab[j++] = primetab[i];
608 setlg(primetab,j);
609 }
610
611 /* p is a single element or a row vector with "primes" to add to primetab.
612 * If p shares a non-trivial factor with another element in primetab, take it
613 * into account. */
614 GEN
addprimes(GEN p)615 addprimes(GEN p)
616 {
617 pari_sp av;
618 long i,k,tx,lp;
619 GEN L;
620
621 if (!p) return primetab;
622 tx = typ(p);
623 if (is_vec_t(tx))
624 {
625 for (i=1; i < lg(p); i++) (void)addprimes(gel(p,i));
626 return primetab;
627 }
628 if (tx != t_INT) pari_err(typeer,"addprime");
629 if (is_pm1(p)) return primetab;
630 av = avma; i = signe(p);
631 if (i == 0) pari_err(talker,"can't accept 0 in addprimes");
632 if (i < 0) p = absi(p);
633
634 lp = lg(primetab);
635 L = cgetg(2*lp,t_VEC); k = 1;
636 for (i=1; i < lp; i++)
637 {
638 GEN n = gel(primetab,i), d = gcdii(n, p);
639 if (! is_pm1(d))
640 {
641 if (!equalii(p,d)) gel(L,k++) = d;
642 gel(L,k++) = diviiexact(n,d);
643 gunclone(n); primetab[i] = 0;
644 }
645 }
646 primetab = (GEN) gprealloc(primetab, (lp+1)*sizeof(long));
647 gel(primetab,i) = gclone(p); setlg(primetab, lp+1);
648 if (k > 1) { cleanprimetab(); setlg(L,k); (void)addprimes(L); }
649 avma = av; return primetab;
650 }
651
652 static GEN
removeprime(GEN prime)653 removeprime(GEN prime)
654 {
655 long i;
656
657 if (typ(prime) != t_INT) pari_err(typeer,"removeprime");
658 for (i=lg(primetab) - 1; i; i--)
659 if (absi_equal(gel(primetab,i), prime))
660 {
661 gunclone(gel(primetab,i)); primetab[i]=0;
662 cleanprimetab(); break;
663 }
664 if (!i) pari_err(talker,"prime %Z is not in primetable", prime);
665 return primetab;
666 }
667
668 GEN
removeprimes(GEN prime)669 removeprimes(GEN prime)
670 {
671 long i,tx;
672
673 if (!prime) return primetab;
674 tx = typ(prime);
675 if (is_vec_t(tx))
676 {
677 if (prime == primetab)
678 {
679 for (i=1; i < lg(prime); i++) gunclone(gel(prime,i));
680 setlg(prime, 1);
681 }
682 else
683 {
684 for (i=1; i < lg(prime); i++) (void)removeprime(gel(prime,i));
685 }
686 return primetab;
687 }
688 return removeprime(prime);
689 }
690
691 /***********************************************************************/
692 /** **/
693 /** COMPUTING THE MATRIX OF PRIME DIVISORS AND EXPONENTS **/
694 /** **/
695 /***********************************************************************/
696 static const long decomp_default_hint = 0;
697
698 /* where to stop trial dividing in factorization */
699 static ulong
default_bound(GEN n,ulong all)700 default_bound(GEN n, ulong all)
701 {
702 ulong l;
703 if (all > 1) return all; /* use supplied limit */
704 if (!all) return (ulong)VERYBIGINT; /* smallfact() case */
705 l = (ulong)expi(n) + 1;
706 if (l <= 32) return 1UL<<14;
707 if (l <= 512) return (l-16) << 10;
708 return 1UL<<19; /* Rho is generally faster above this */
709 }
710 static ulong
tridiv_bound(GEN n,ulong all)711 tridiv_bound(GEN n, ulong all)
712 {
713 ulong p = maxprime(), l = default_bound(n, all);
714 return min(p, l);
715 }
716
717 static GEN
aux_end(GEN n,long nb)718 aux_end(GEN n, long nb)
719 {
720 GEN p1,p2, z = (GEN)avma;
721 long i;
722
723 if (n) gunclone(n);
724 p1 = cgetg(nb+1,t_COL);
725 p2 = cgetg(nb+1,t_COL);
726 for (i=nb; i; i--)
727 {
728 gel(p2,i) = z; z += lg(z);
729 gel(p1,i) = z; z += lg(z);
730 }
731 gel(z,1) = p1;
732 gel(z,2) = p2;
733 if (nb) (void)sort_factor_gen(z, absi_cmp);
734 return z;
735 }
736
737 static GEN
auxdecomp1(GEN n,long (* ifac_break)(GEN n,GEN pairs,GEN here,GEN state),GEN state,ulong all,long hint)738 auxdecomp1(GEN n, long (*ifac_break)(GEN n, GEN pairs, GEN here, GEN state),
739 GEN state, ulong all, long hint)
740 {
741 pari_sp av;
742 long pp[] = { evaltyp(t_INT)|_evallg(4), 0,0,0 };
743 long nb = 0, i, lp;
744 ulong p, k, lim;
745 byteptr d = diffptr+1; /* start at p = 3 */
746 #define STORE(x,e) { nb++; (void)x; (void)utoipos(e); }
747 #define STOREu(x,e) STORE(utoipos(x), e)
748 #define STOREi(x,e) STORE(icopy(x), e)
749
750 if (typ(n) != t_INT) pari_err(arither1);
751 i = signe(n); if (!i) pari_err(talker, "zero argument in factorint");
752 (void)cgetg(3,t_MAT);
753 if (i < 0) STORE(utoineg(1), 1);
754 if (is_pm1(n)) return aux_end(NULL,nb);
755
756 n = gclone(n); setsigne(n,1);
757 i = vali(n);
758 if (i)
759 {
760 STOREu(2, i);
761 av = avma; affii(shifti(n,-i), n); avma = av;
762 }
763 if (is_pm1(n)) return aux_end(n,nb);
764 lim = tridiv_bound(n, all);
765
766 /* trial division */
767 p = 2;
768 for(;;)
769 {
770 int stop;
771 NEXT_PRIME_VIADIFF(p,d);
772 if (p >= lim) break;
773
774 k = Z_lvalrem_stop(n, p, &stop);
775 if (k) STOREu(p, k);
776 if (stop)
777 {
778 if (!is_pm1(n)) STOREi(n, 1);
779 return aux_end(n,nb);
780 }
781 }
782
783 /* pp = square of biggest p tried so far */
784 av = avma; affii(muluu(p,p), pp); avma = av;
785
786 /* trial divide by the "special primes" (usually huge composites) */
787 lp = lg(primetab);
788 for (i=1; i<lp; i++)
789 if (dvdiiz(n,gel(primetab,i), n))
790 {
791 long k = 1; while (dvdiiz(n,gel(primetab,i), n)) k++;
792 STOREi(gel(primetab,i), k);
793 if (absi_cmp(pp, n) > 0)
794 {
795 if (!is_pm1(n)) STOREi(n, 1);
796 return aux_end(n,nb);
797 }
798 }
799
800 if (all != 1) hint = 15; /* smallfact: turn off all except pure powers */
801 else /* else test primality */
802 if (BSW_psp(n)) { STOREi(n, 1); return aux_end(n,nb); }
803
804 /* now we have a large composite */
805 if (ifac_break && (*ifac_break)(n,NULL,NULL,state)) /*initialize ifac_break*/
806 {
807 if (DEBUGLEVEL>2)
808 fprintferr("IFAC: (Partial fact.) Initial stop requested.\n");
809 }
810 else
811 nb += ifac_decomp_break(n, ifac_break, state, hint);
812
813 return aux_end(n, nb);
814 }
815
816 /* state[1]: current unfactored part.
817 * state[2]: limit. */
818 static long
ifac_break_limit(GEN n,GEN pairs,GEN here,GEN state)819 ifac_break_limit(GEN n, GEN pairs/*unused*/, GEN here, GEN state)
820 {
821 pari_sp ltop = avma;
822 GEN N;
823 int res;
824 (void)pairs;
825 if (!here) /* initial call */
826 /*Small primes have been removed, n is the new unfactored part.*/
827 N = n;
828 else
829 {
830 GEN q = powgi(gel(here,0),gel(here,1)); /* primary factor found.*/
831 if (DEBUGLEVEL>2) fprintferr("IFAC: Stop: Primary factor: %Z\n",q);
832 N = diviiexact(gel(state,1),q); /* divide unfactored part by q */
833 }
834 affii(N, gel(state,1)); /* affect()ed to state[1] to preserve stack. */
835 if (DEBUGLEVEL>2) fprintferr("IFAC: Stop: remaining %Z\n",state[1]);
836 /* check the stopping criterion, then restore stack */
837 res = cmpii(gel(state,1),gel(state,2)) <= 0;
838 avma = ltop; return res;
839 }
840
841 static GEN
auxdecomp0(GEN n,ulong all,long hint)842 auxdecomp0(GEN n, ulong all, long hint)
843 {
844 return auxdecomp1(n, NULL, gen_0, all, hint);
845 }
846
847 GEN
auxdecomp(GEN n,long all)848 auxdecomp(GEN n, long all)
849 {
850 return auxdecomp0(n,all,decomp_default_hint);
851 }
852
853 /* see before ifac_crack() in ifactor1.c for current semantics of `hint'
854 (factorint's `flag') */
855 GEN
factorint(GEN n,long flag)856 factorint(GEN n, long flag)
857 {
858 return auxdecomp0(n,1,flag);
859 }
860
861 GEN
Z_factor(GEN n)862 Z_factor(GEN n)
863 {
864 return auxdecomp0(n,1,decomp_default_hint);
865 }
866
867 /* Factor until the unfactored part is smaller than limit. */
868 GEN
Z_factor_limit(GEN n,GEN limit)869 Z_factor_limit(GEN n, GEN limit)
870 {
871 GEN state = cgetg(3,t_VEC);
872 /* icopy is mainly done to allocate memory for affect().
873 * Currently state[1] is discarded in initial call to ifac_break_limit */
874 gel(state,1) = icopy(n);
875 gel(state,2) = gcopy(limit);
876 return auxdecomp1(n, &ifac_break_limit, state, 1, decomp_default_hint);
877 }
878
879 GEN
smallfact(GEN n)880 smallfact(GEN n)
881 {
882 return boundfact(n,0);
883 }
884
885 GEN
gboundfact(GEN n,long lim)886 gboundfact(GEN n, long lim)
887 {
888 return garith_proto2gs(boundfact,n,lim);
889 }
890
891 GEN
boundfact(GEN n,long lim)892 boundfact(GEN n, long lim)
893 {
894 GEN p1, p2;
895 pari_sp av = avma;
896
897 if (lim <= 1) lim = 0;
898 switch(typ(n))
899 {
900 case t_INT: return auxdecomp(n,lim);
901 case t_FRAC:
902 p1 = auxdecomp(gel(n,1),lim);
903 p2 = auxdecomp(gel(n,2),lim); gel(p2,2) = gneg_i(gel(p2,2));
904 return gerepilecopy(av, merge_factor_i(p1,p2));
905 }
906 pari_err(arither1);
907 return NULL; /* not reached */
908 }
909 /***********************************************************************/
910 /** **/
911 /** SIMPLE FACTORISATIONS **/
912 /** **/
913 /***********************************************************************/
914
915 /* Factor n and output [p,e] where
916 * p, e are vecsmall with n = prod{p[i]^e[i]} */
917 GEN
factoru(ulong n)918 factoru(ulong n)
919 {
920 pari_sp ltop = avma;
921 GEN F = Z_factor(utoi(n)), P = gel(F,1), E = gel(F,2);
922 long i, l = lg(P);
923 GEN f = cgetg(3,t_VEC);
924 GEN p = cgetg(l,t_VECSMALL);
925 GEN e = cgetg(l,t_VECSMALL);
926 pari_sp lbot = avma;
927 gel(f,1) = p;
928 gel(f,2) = e;
929 for (i = 1; i < l; i++)
930 {
931 p[i] = itou(gel(P,i));
932 e[i] = itou(gel(E,i));
933 }
934 avma = lbot; return gerepileupto(ltop,f);
935 }
936 /* Factor n and output [p,e,c] where
937 * p, e and c are vecsmall with n = prod{p[i]^e[i]} and c[i] = p[i]^e[i] */
938 GEN
factoru_pow(ulong n)939 factoru_pow(ulong n)
940 {
941 pari_sp ltop = avma;
942 GEN F = Z_factor(utoi(n)), P = gel(F,1), E = gel(F,2);
943 long i, l = lg(P);
944 GEN f = cgetg(4,t_VEC);
945 GEN p = cgetg(l,t_VECSMALL);
946 GEN e = cgetg(l,t_VECSMALL);
947 GEN c = cgetg(l,t_VECSMALL);
948 pari_sp lbot = avma;
949 gel(f,1) = p;
950 gel(f,2) = e;
951 gel(f,3) = c;
952 for(i = 1; i < l; i++)
953 {
954 p[i] = itou(gel(P,i));
955 e[i] = itou(gel(E,i));
956 c[i] = itou(powiu(gel(P,i), e[i]));
957 }
958 avma = lbot; return gerepileupto(ltop,f);
959 }
960
961 /***********************************************************************/
962 /** **/
963 /** BASIC ARITHMETIC FUNCTIONS **/
964 /** **/
965 /***********************************************************************/
966
967 GEN
gmu(GEN n)968 gmu(GEN n) { return arith_proto(mu,n,1); }
969
970 INLINE void
chk_arith(GEN n)971 chk_arith(GEN n) {
972 if (typ(n) != t_INT) pari_err(arither1);
973 if (!signe(n)) pari_err(talker, "zero argument in an arithmetic function");
974 }
975
976 long
mu(GEN n)977 mu(GEN n)
978 {
979 byteptr d = diffptr+1; /* point at 3 - 2 */
980 pari_sp av = avma;
981 ulong p, lim;
982 long s, v;
983
984 chk_arith(n); if (is_pm1(n)) return 1;
985 if (equaliu(n, 2)) return -1;
986 p = mod4(n); if (!p) return 0;
987 if (p == 2) { s = -1; n = shifti(n, -1); } else { s = 1; n = icopy(n); }
988 setsigne(n, 1);
989
990 lim = tridiv_bound(n,1);
991 p = 2;
992 while (p < lim)
993 {
994 int stop;
995 NEXT_PRIME_VIADIFF(p,d);
996 v = Z_lvalrem_stop(n, p, &stop);
997 if (v > 1) { avma = av; return 0; }
998 if (v) s = -s;
999 if (stop) { avma = av; return is_pm1(n)? s: -s; }
1000 }
1001 if (BSW_psp(n)) { avma=av; return -s; }
1002 /* large composite without small factors */
1003 v = ifac_moebius(n, decomp_default_hint);
1004 avma = av; return (s<0 ? -v : v); /* correct also if v==0 */
1005 }
1006
1007 GEN
gissquarefree(GEN x)1008 gissquarefree(GEN x) { return arith_proto(issquarefree,x,0); }
1009
1010 long
Z_issquarefree(GEN x)1011 Z_issquarefree(GEN x)
1012 {
1013 byteptr d = diffptr+1;
1014 pari_sp av = avma;
1015 ulong p, lim;
1016 long v;
1017
1018 if (!signe(x)) return 0;
1019 if (cmpiu(x, 2) <= 0) return 1;
1020 p = mod4(x); if (!p) return 0;
1021 x = (p == 2)? shifti(x, -1): icopy(x);
1022 setsigne(x, 1);
1023
1024 lim = tridiv_bound(x,1);
1025 p = 2;
1026 while (p < lim)
1027 {
1028 int stop;
1029 NEXT_PRIME_VIADIFF(p,d);
1030 v = Z_lvalrem_stop(x, p, &stop);
1031 if (v > 1) { avma = av; return 0; }
1032 if (stop) { avma = av; return 1; }
1033 }
1034 if (BSW_psp(x)) { avma = av; return 1; }
1035 v = ifac_issquarefree(x, decomp_default_hint);
1036 avma = av; return v;
1037 }
1038
1039 long
issquarefree(GEN x)1040 issquarefree(GEN x)
1041 {
1042 pari_sp av;
1043 GEN d;
1044 switch(typ(x))
1045 {
1046 case t_INT: return Z_issquarefree(x);
1047 case t_POL:
1048 if (!signe(x)) return 0;
1049 av = avma; d = ggcd(x, derivpol(x));
1050 avma = av; return (lg(d) == 3);
1051 default: pari_err(typeer,"issquarefree");
1052 return 0; /* not reached */
1053 }
1054 }
1055
1056 GEN
gomega(GEN n)1057 gomega(GEN n) { return arith_proto(omega,n,1); }
1058
1059 long
omega(GEN n)1060 omega(GEN n)
1061 {
1062 byteptr d = diffptr+1;
1063 pari_sp av = avma;
1064 long nb,v;
1065 ulong p, lim;
1066
1067 chk_arith(n); if (is_pm1(n)) return 0;
1068 v = vali(n); nb = v ? 1 : 0;
1069 n = shifti(n, -v);
1070 if (is_pm1(n)) return nb;
1071 setsigne(n, 1);
1072
1073 lim = tridiv_bound(n,1);
1074 p = 2;
1075 while (p < lim)
1076 {
1077 int stop;
1078 NEXT_PRIME_VIADIFF(p,d);
1079 v = Z_lvalrem_stop(n, p, &stop);
1080 if (v) nb++;
1081 if (stop) { avma = av; return is_pm1(n)? nb: nb+1; }
1082 }
1083 if (BSW_psp(n)) { avma = av; return nb+1; }
1084 /* large composite without small factors */
1085 nb += ifac_omega(n, decomp_default_hint);
1086 avma = av; return nb;
1087 }
1088
1089 GEN
gbigomega(GEN n)1090 gbigomega(GEN n) { return arith_proto(bigomega,n,1); }
1091
1092 long
bigomega(GEN n)1093 bigomega(GEN n)
1094 {
1095 byteptr d=diffptr+1;
1096 pari_sp av = avma;
1097 ulong p, lim;
1098 long nb,v;
1099
1100 chk_arith(n); if (is_pm1(n)) return 0;
1101 nb = v = vali(n); n = shifti(n, -v);
1102 if (is_pm1(n)) { avma = av; return nb; }
1103 setsigne(n, 1);
1104
1105 lim = tridiv_bound(n,1);
1106 p = 2;
1107 while (p < lim)
1108 {
1109 int stop;
1110 NEXT_PRIME_VIADIFF(p,d);
1111 v = Z_lvalrem_stop(n, p, &stop);
1112 nb += v;
1113 if (stop) { avma = av; return is_pm1(n)? nb: nb+1; }
1114 }
1115 if (BSW_psp(n)) { avma = av; return nb+1; }
1116 nb += ifac_bigomega(n, decomp_default_hint);
1117 avma = av; return nb;
1118 }
1119
1120 GEN
gphi(GEN n)1121 gphi(GEN n) { return garith_proto(phi,n,1); }
1122
1123 GEN
phi(GEN n)1124 phi(GEN n)
1125 {
1126 byteptr d = diffptr+1;
1127 pari_sp av = avma;
1128 GEN m;
1129 ulong p, lim;
1130 long v;
1131
1132 chk_arith(n); if (is_pm1(n)) return gen_1;
1133 v = vali(n); n = shifti(n,-v); setsigne(n, 1);
1134 m = v > 1 ? int2n(v-1) : gen_1;
1135 if (is_pm1(n)) return gerepileuptoint(av,m);
1136
1137 lim = tridiv_bound(n,1);
1138 p = 2;
1139 while (p < lim)
1140 {
1141 int stop;
1142 NEXT_PRIME_VIADIFF(p,d);
1143 v = Z_lvalrem_stop(n, p, &stop);
1144 if (v) {
1145 m = mulis(m, p-1);
1146 if (v > 2) m = mulii(m, powuu(p, v-1));
1147 else if (v == 2) m = mulis(m, p);
1148 }
1149 if (stop) {
1150 if (!is_pm1(n)) m = mulii(m, addis(n,-1));
1151 return gerepileuptoint(av,m);
1152 }
1153 }
1154 if (BSW_psp(n)) return gerepileuptoint(av, mulii(m, addis(n,-1)));
1155 m = mulii(m, ifac_totient(n, decomp_default_hint));
1156 return gerepileuptoint(av,m);
1157 }
1158
1159 GEN
gnumbdiv(GEN n)1160 gnumbdiv(GEN n) { return garith_proto(numbdiv,n,1); }
1161
1162 GEN
numbdiv(GEN n)1163 numbdiv(GEN n)
1164 {
1165 byteptr d = diffptr+1;
1166 pari_sp av = avma;
1167 GEN m;
1168 long v;
1169 ulong p, lim;
1170
1171 chk_arith(n); if (is_pm1(n)) return gen_1;
1172 v = vali(n); n = shifti(n,-v); setsigne(n,1);
1173 m = utoipos(v+1);
1174 if (is_pm1(n)) return gerepileuptoint(av,m);
1175
1176 lim = tridiv_bound(n,1);
1177 p = 2;
1178 while (p < lim)
1179 {
1180 int stop;
1181 NEXT_PRIME_VIADIFF(p,d);
1182 v = Z_lvalrem_stop(n, p, &stop);
1183 if (v) m = mulis(m, v+1);
1184 if (stop)
1185 {
1186 if (!is_pm1(n)) m = shifti(m,1);
1187 return gerepileuptoint(av,m);
1188 }
1189 }
1190 if(BSW_psp(n)) return gerepileuptoint(av, shifti(m,1));
1191 m = mulii(m, ifac_numdiv(n, decomp_default_hint));
1192 return gerepileuptoint(av,m);
1193 }
1194
1195 GEN
gsumdiv(GEN n)1196 gsumdiv(GEN n) { return garith_proto(sumdiv,n,1); }
1197
1198 GEN
sumdiv(GEN n)1199 sumdiv(GEN n)
1200 {
1201 byteptr d = diffptr+1;
1202 pari_sp av = avma;
1203 GEN m;
1204 ulong p, lim;
1205 long v;
1206
1207 chk_arith(n); if (is_pm1(n)) return gen_1;
1208 v = vali(n); n = shifti(n,-v); setsigne(n,1);
1209 m = v ? addsi(-1, int2n(v+1)) : gen_1;
1210 if (is_pm1(n)) return gerepileuptoint(av,m);
1211
1212 lim = tridiv_bound(n,1);
1213 p = 2;
1214 while (p < lim)
1215 {
1216 int stop;
1217 NEXT_PRIME_VIADIFF(p,d);
1218 v = Z_lvalrem_stop(n, p, &stop);
1219 if (v)
1220 {
1221 GEN m1 = utoipos(p+1);
1222 long i;
1223 for (i = 1; i < v; i++) m1 = addsi(1, mului(p,m1));
1224 m = mulii(m1,m);
1225 }
1226 if (stop)
1227 {
1228 if (!is_pm1(n)) m = mulii(m,addis(n,1));
1229 return gerepileuptoint(av, m);
1230 }
1231 }
1232 if(BSW_psp(n)) return gerepileuptoint(av, mulii(m,addsi(1,n)));
1233 m = mulii(m, ifac_sumdiv(n, decomp_default_hint));
1234 return gerepileuptoint(av,m);
1235 }
1236
1237 GEN
gsumdivk(GEN n,long k)1238 gsumdivk(GEN n, long k) { return garith_proto2gs(sumdivk,n,k); }
1239
1240 GEN
sumdivk(GEN n,long k)1241 sumdivk(GEN n, long k)
1242 {
1243 byteptr d = diffptr+1;
1244 pari_sp av = avma;
1245 GEN n1, m;
1246 ulong p, lim;
1247 long k1,v;
1248
1249 if (!k) return numbdiv(n);
1250 if (k == 1) return sumdiv(n);
1251 chk_arith(n); if (is_pm1(n)) return gen_1;
1252 k1 = k; n1 = n;
1253 if (k < 0) k = -k;
1254 if (k == 1) { m = sumdiv(n); goto fin; }
1255 v = vali(n); n = shifti(n,-v); setsigne(n,1);
1256 m = gen_1;
1257 while (v--) m = addsi(1,shifti(m,k));
1258 if (is_pm1(n)) goto fin;
1259
1260 lim = tridiv_bound(n,1);
1261 p = 2;
1262 while (p < lim)
1263 {
1264 int stop;
1265 NEXT_PRIME_VIADIFF(p,d);
1266 v = Z_lvalrem_stop(n, p, &stop);
1267 if (v)
1268 {
1269 long i;
1270 GEN pk = powuu(p,k), m1 = addsi(1,pk);
1271 for (i = 1; i < v; i++) m1 = addsi(1, mulii(pk,m1));
1272 m = mulii(m1,m);
1273 }
1274 if (stop)
1275 {
1276 if (!is_pm1(n)) m = mulii(m, addsi(1, powiu(n,k)));
1277 goto fin;
1278 }
1279 }
1280 if (BSW_psp(n)) { m = mulii(m, addsi(1, powiu(n,k))); goto fin; }
1281 m = mulii(m, ifac_sumdivk(n, k, decomp_default_hint));
1282 fin:
1283 if (k1 < 0) m = gdiv(m, powiu(n1,k));
1284 return gerepileupto(av,m);
1285 }
1286
1287 /***********************************************************************/
1288 /** **/
1289 /** MISCELLANEOUS ARITHMETIC FUNCTIONS **/
1290 /** (all of these ultimately depend on auxdecomp()) **/
1291 /** **/
1292 /***********************************************************************/
1293
1294 GEN
divisors(GEN n)1295 divisors(GEN n)
1296 {
1297 pari_sp av = avma;
1298 long i, j, l, tn = typ(n);
1299 ulong nbdiv;
1300 int isint = 1;
1301 GEN *d, *t, *t1, *t2, *t3, P, E, e;
1302
1303 if (tn == t_MAT && lg(n) == 3)
1304 {
1305 P = gel(n,1); l = lg(P);
1306 for (i = 1; i < l; i++)
1307 if (typ(gel(P,i)) != t_INT) { isint = 0; break; }
1308 }
1309 else
1310 {
1311 if (tn == t_INT)
1312 n = auxdecomp(n,1);
1313 else {
1314 if (is_matvec_t(tn)) pari_err(typeer,"divisors");
1315 isint = 0;
1316 n = factor(n);
1317 }
1318 P = gel(n,1); l = lg(P);
1319 }
1320 E = gel(n,2);
1321 if (isint && l>1 && signe(P[1]) < 0) { E++; P++; l--; } /* skip -1 */
1322 e = cgetg(l, t_VECSMALL);
1323 nbdiv = 1;
1324 for (i=1; i<l; i++)
1325 {
1326 e[i] = itos(gel(E,i));
1327 if (e[i] < 0) pari_err(talker, "denominators not allowed in divisors");
1328 nbdiv = itou_or_0( muluu(nbdiv, 1+e[i]) );
1329 }
1330 if (!nbdiv || nbdiv & ~LGBITS)
1331 pari_err(talker, "too many divisors (more than %ld)", LGBITS - 1);
1332 d = t = (GEN*) cgetg(nbdiv+1,t_VEC);
1333 *++d = gen_1;
1334 if (isint)
1335 {
1336 for (i=1; i<l; i++)
1337 for (t1=t,j=e[i]; j; j--,t1=t2)
1338 for (t2=d, t3=t1; t3<t2; ) *++d = mulii(*++t3, gel(P,i));
1339 e = sort((GEN)t);
1340 } else {
1341 for (i=1; i<l; i++)
1342 for (t1=t,j=e[i]; j; j--,t1=t2)
1343 for (t2=d, t3=t1; t3<t2; ) *++d = gmul(*++t3, gel(P,i));
1344 e = (GEN)t;
1345 }
1346 return gerepileupto(av, e);
1347 }
1348
1349 GEN
corepartial(GEN n,long all)1350 corepartial(GEN n, long all)
1351 {
1352 pari_sp av = avma;
1353 long i;
1354 GEN fa, P, E, c = gen_1;
1355
1356 fa = auxdecomp(n,all);
1357 P = gel(fa,1);
1358 E = gel(fa,2);
1359 for (i=1; i<lg(P); i++)
1360 if (mod2(gel(E,i))) c = mulii(c,gel(P,i));
1361 return gerepileuptoint(av, c);
1362 }
1363
1364 GEN
core2partial(GEN n,long all)1365 core2partial(GEN n, long all)
1366 {
1367 pari_sp av = avma;
1368 long i;
1369 GEN fa, P, E, c = gen_1, f = gen_1;
1370
1371 fa = auxdecomp(n,all);
1372 P = gel(fa,1);
1373 E = gel(fa,2);
1374 for (i=1; i<lg(P); i++)
1375 {
1376 long e = itos(gel(E,i));
1377 if (e & 1) c = mulii(c, gel(P,i));
1378 if (e != 1) f = mulii(f, gpowgs(gel(P,i), e >> 1));
1379 }
1380 return gerepilecopy(av, mkvec2(c,f));
1381 }
1382
core(GEN n)1383 GEN core(GEN n) { return corepartial(n,1); }
core2(GEN n)1384 GEN core2(GEN n) { return core2partial(n,1); }
1385
1386 GEN
core0(GEN n,long flag)1387 core0(GEN n,long flag) { return flag? core2(n): core(n); }
1388
1389 static long
_mod4(GEN c)1390 _mod4(GEN c) { long r = mod4(c); if (signe(c) < 0) r = 4-r; return r; }
1391
1392 GEN
coredisc(GEN n)1393 coredisc(GEN n)
1394 {
1395 pari_sp av = avma;
1396 GEN c = core(n);
1397 if (_mod4(c)==1) return c;
1398 return gerepileuptoint(av, shifti(c,2));
1399 }
1400
1401 GEN
coredisc2(GEN n)1402 coredisc2(GEN n)
1403 {
1404 pari_sp av = avma;
1405 GEN y = core2(n);
1406 GEN c = gel(y,1), f = gel(y,2);
1407 if (_mod4(c)==1) return y;
1408 y = cgetg(3,t_VEC);
1409 gel(y,1) = shifti(c,2);
1410 gel(y,2) = gmul2n(f,-1); return gerepileupto(av, y);
1411 }
1412
1413 GEN
coredisc0(GEN n,long flag)1414 coredisc0(GEN n,long flag) { return flag? coredisc2(n): coredisc(n); }
1415
1416 /*********************************************************************/
1417 /** **/
1418 /** BINARY DECOMPOSITION **/
1419 /** **/
1420 /*********************************************************************/
1421
1422 INLINE GEN
inegate(GEN z)1423 inegate(GEN z) { return subsi(-1,z); }
1424
1425 GEN
binaire(GEN x)1426 binaire(GEN x)
1427 {
1428 ulong m,u;
1429 long i,lx,ex,ly,tx=typ(x);
1430 GEN y,p1,p2;
1431
1432 switch(tx)
1433 {
1434 case t_INT:
1435 {
1436 GEN xp=int_MSW(x);
1437 lx=lgefint(x);
1438 if (lx==2) return mkvec(gen_0);
1439 ly = BITS_IN_LONG+1; m=HIGHBIT; u=*xp;
1440 while (!(m & u)) { m>>=1; ly--; }
1441 y = cgetg(ly+((lx-3)<<TWOPOTBITS_IN_LONG),t_VEC); ly=1;
1442 do { gel(y,ly) = m & u ? gen_1 : gen_0; ly++; } while (m>>=1);
1443 for (i=3; i<lx; i++)
1444 {
1445 m=HIGHBIT; xp=int_precW(xp); u=*xp;
1446 do { gel(y,ly) = m & u ? gen_1 : gen_0; ly++; } while (m>>=1);
1447 }
1448 break;
1449 }
1450 case t_REAL:
1451 ex=expo(x);
1452 if (!signe(x))
1453 {
1454 lx=1+max(-ex,0); y=cgetg(lx,t_VEC);
1455 for (i=1; i<lx; i++) gel(y,i) = gen_0;
1456 return y;
1457 }
1458
1459 lx=lg(x); y=cgetg(3,t_VEC);
1460 if (ex > bit_accuracy(lx)) pari_err(precer,"binary");
1461 p1 = cgetg(max(ex,0)+2,t_VEC);
1462 p2 = cgetg(bit_accuracy(lx)-ex,t_VEC);
1463 gel(y,1) = p1;
1464 gel(y,2) = p2;
1465 ly = -ex; ex++; m = HIGHBIT;
1466 if (ex<=0)
1467 {
1468 gel(p1,1) = gen_0; for (i=1; i <= -ex; i++) gel(p2,i) = gen_0;
1469 i=2;
1470 }
1471 else
1472 {
1473 ly=1;
1474 for (i=2; i<lx && ly<=ex; i++)
1475 {
1476 m=HIGHBIT; u=x[i];
1477 do
1478 { gel(p1,ly) = (m & u) ? gen_1 : gen_0; ly++; }
1479 while ((m>>=1) && ly<=ex);
1480 }
1481 ly=1;
1482 if (m) i--; else m=HIGHBIT;
1483 }
1484 for (; i<lx; i++)
1485 {
1486 u=x[i];
1487 do { gel(p2,ly) = m & u ? gen_1 : gen_0; ly++; } while (m>>=1);
1488 m=HIGHBIT;
1489 }
1490 break;
1491
1492 case t_VEC: case t_COL: case t_MAT:
1493 lx=lg(x); y=cgetg(lx,tx);
1494 for (i=1; i<lx; i++) gel(y,i) = binaire(gel(x,i));
1495 break;
1496 default: pari_err(typeer,"binary");
1497 return NULL; /* not reached */
1498 }
1499 return y;
1500 }
1501
1502 /* return 1 if bit n of x is set, 0 otherwise */
1503 long
bittest(GEN x,long n)1504 bittest(GEN x, long n)
1505 {
1506 ulong u;
1507 long l;
1508
1509 if (!signe(x) || n < 0) return 0;
1510 if (signe(x) < 0)
1511 {
1512 pari_sp ltop=avma;
1513 long b = !bittest(inegate(x),n);
1514 avma=ltop;
1515 return b;
1516 }
1517 l = n>>TWOPOTBITS_IN_LONG;
1518 if (l+3 > lgefint(x)) return 0;
1519 u = (1UL << (n & (BITS_IN_LONG-1))) & *int_W(x,l);
1520 return u? 1: 0;
1521 }
1522
1523 GEN
gbittest(GEN x,GEN n)1524 gbittest(GEN x, GEN n)
1525 {
1526 return arith_proto2gs(bittest,x,itos(n));
1527 }
1528
1529 /***********************************************************************/
1530 /** **/
1531 /** BITMAP OPS **/
1532 /** x & y (and), x | y (or), x ^ y (xor), ~x (neg), x & ~y (negimply) **/
1533 /** **/
1534 /***********************************************************************/
1535 /* Truncate a non-negative integer to a number of bits. */
1536 static GEN
ibittrunc(GEN x,long bits)1537 ibittrunc(GEN x, long bits)
1538 {
1539 long known_zero_words, xl = lgefint(x) - 2;
1540 long len_out = ((bits + BITS_IN_LONG - 1) >> TWOPOTBITS_IN_LONG);
1541
1542 if (xl < len_out)
1543 return x;
1544 /* Check whether mask is trivial */
1545 if (!(bits & (BITS_IN_LONG - 1))) {
1546 if (xl == len_out)
1547 return x;
1548 } else if (len_out <= xl) {
1549 GEN xi = int_W(x, len_out-1);
1550 /* Non-trival mask is given by a formula, if x is not
1551 normalized, this works even in the exceptional case */
1552 *xi = *xi & ((1 << (bits & (BITS_IN_LONG - 1))) - 1);
1553 if (*xi && xl == len_out) return x;
1554 }
1555 /* Normalize */
1556 known_zero_words = xl - len_out;
1557 if (known_zero_words < 0) known_zero_words = 0;
1558 return int_normalize(x, known_zero_words);
1559 }
1560
1561 GEN
gbitneg(GEN x,long bits)1562 gbitneg(GEN x, long bits)
1563 {
1564 const ulong uzero = 0;
1565 long xl, len_out, i;
1566
1567 if (typ(x) != t_INT)
1568 pari_err(typeer, "bitwise negation");
1569 if (bits < -1)
1570 pari_err(talker, "negative exponent in bitwise negation");
1571 if (bits == -1) return inegate(x);
1572 if (bits == 0) return gen_0;
1573 if (signe(x) < 0) { /* Consider as if mod big power of 2 */
1574 pari_sp ltop = avma;
1575 return gerepileuptoint(ltop, ibittrunc(inegate(x), bits));
1576 }
1577 xl = lgefint(x);
1578 len_out = ((bits + BITS_IN_LONG - 1) >> TWOPOTBITS_IN_LONG) + 2;
1579 if (len_out > xl) { /* Need to grow */
1580 GEN out, outp, xp = int_MSW(x);
1581 out = cgeti(len_out); out[1] = evalsigne(1) | evallgefint(len_out);
1582 outp = int_MSW(out);
1583 if (!(bits & (BITS_IN_LONG - 1)))
1584 *outp = ~uzero;
1585 else
1586 *outp = (1 << (bits & (BITS_IN_LONG - 1))) - 1;
1587 for (i = 3; i < len_out - xl + 2; i++)
1588 {
1589 outp = int_precW(outp); *outp = ~uzero;
1590 }
1591 for ( ; i < len_out; i++)
1592 {
1593 outp = int_precW(outp); *outp = ~*xp;
1594 xp = int_precW(xp);
1595 }
1596 return out;
1597 }
1598 x = icopy(x);
1599 for (i = 2; i < xl; i++) x[i] = ~x[i];
1600 return ibittrunc(int_normalize(x,0), bits);
1601 }
1602
1603 /* bitwise 'and' of two positive integers (any integers, but we ignore sign).
1604 * Inputs are not necessary normalized. */
1605 GEN
ibitand(GEN x,GEN y)1606 ibitand(GEN x, GEN y)
1607 {
1608 long lx, ly, lout;
1609 long *xp, *yp, *outp;
1610 GEN out;
1611 long i;
1612
1613 if (!signe(x) || !signe(y)) return gen_0;
1614 lx=lgefint(x); ly=lgefint(y);
1615 lout = min(lx,ly); /* > 2 */
1616 xp = int_LSW(x);
1617 yp = int_LSW(y);
1618 out = cgeti(lout); out[1] = evalsigne(1) | evallgefint(lout);
1619 outp = int_LSW(out);
1620 for (i=2; i<lout; i++)
1621 {
1622 *outp = (*xp) & (*yp);
1623 outp = int_nextW(outp);
1624 xp = int_nextW(xp);
1625 yp = int_nextW(yp);
1626 }
1627 if ( !*int_MSW(out) ) out = int_normalize(out, 1);
1628 return out;
1629 }
1630
1631 /* bitwise 'or' of absolute values of two integers */
1632 GEN
ibitor(GEN x,GEN y)1633 ibitor(GEN x, GEN y)
1634 {
1635 long lx, ly;
1636 long *xp, *yp, *outp;
1637 GEN out;
1638 long i;
1639 if (!signe(x)) return absi(y);
1640 if (!signe(y)) return absi(x);
1641
1642 lx = lgefint(x); xp = int_LSW(x);
1643 ly = lgefint(y); yp = int_LSW(y);
1644 if (lx < ly) swapspec(xp,yp,lx,ly);
1645 /* lx > 2 */
1646 out = cgeti(lx); out[1] = evalsigne(1) | evallgefint(lx);
1647 outp = int_LSW(out);
1648 for (i=2;i<ly;i++)
1649 {
1650 *outp = (*xp) | (*yp);
1651 outp = int_nextW(outp);
1652 xp = int_nextW(xp);
1653 yp = int_nextW(yp);
1654 }
1655 for ( ;i<lx;i++)
1656 {
1657 *outp = *xp;
1658 outp = int_nextW(outp);
1659 xp = int_nextW(xp);
1660 }
1661 /* If input is normalized, this is not needed */
1662 if ( !*int_MSW(out) ) out = int_normalize(out, 1);
1663 return out;
1664 }
1665
1666 /* bitwise 'xor' of absolute values of two integers */
1667 GEN
ibitxor(GEN x,GEN y)1668 ibitxor(GEN x, GEN y)
1669 {
1670 long lx, ly;
1671 long *xp, *yp, *outp;
1672 GEN out;
1673 long i;
1674 if (!signe(x)) return absi(y);
1675 if (!signe(y)) return absi(x);
1676
1677 lx = lgefint(x); xp = int_LSW(x);
1678 ly = lgefint(y); yp = int_LSW(y);
1679 if (lx < ly) swapspec(xp,yp,lx,ly);
1680 /* lx > 2 */
1681 out = cgeti(lx); out[1] = evalsigne(1) | evallgefint(lx);
1682 outp = int_LSW(out);
1683 for (i=2;i<ly;i++)
1684 {
1685 *outp = (*xp) ^ (*yp);
1686 outp = int_nextW(outp);
1687 xp = int_nextW(xp);
1688 yp = int_nextW(yp);
1689 }
1690 for ( ;i<lx;i++)
1691 {
1692 *outp = *xp;
1693 outp = int_nextW(outp);
1694 xp = int_nextW(xp);
1695 }
1696 if ( !*int_MSW(out) ) out = int_normalize(out, 1);
1697 return out;
1698 }
1699
1700 /* bitwise 'negimply' of absolute values of two integers */
1701 /* "negimply(x,y)" is ~(x => y) == ~(~x | y) == x & ~y */
1702 GEN
ibitnegimply(GEN x,GEN y)1703 ibitnegimply(GEN x, GEN y)
1704 {
1705 long lx, ly, lin;
1706 long *xp, *yp, *outp;
1707 GEN out;
1708 long i;
1709 if (!signe(x)) return gen_0;
1710 if (!signe(y)) return absi(x);
1711
1712 lx = lgefint(x); xp = int_LSW(x);
1713 ly = lgefint(y); yp = int_LSW(y);
1714 lin = min(lx,ly);
1715 out = cgeti(lx); out[1] = evalsigne(1) | evallgefint(lx);
1716 outp = int_LSW(out);
1717 for (i=2; i<lin; i++)
1718 {
1719 *outp = (*xp) & ~(*yp);
1720 outp = int_nextW(outp);
1721 xp = int_nextW(xp);
1722 yp = int_nextW(yp);
1723 }
1724 for ( ;i<lx;i++)
1725 {
1726 *outp = *xp;
1727 outp = int_nextW(outp);
1728 xp = int_nextW(xp);
1729 }
1730 if ( !*int_MSW(out) ) out = int_normalize(out, 1);
1731 return out;
1732 }
1733
1734 #define signs(x,y) (((signe(x) >= 0) << 1) | (signe(y) >= 0))
1735
1736 GEN
gbitor(GEN x,GEN y)1737 gbitor(GEN x, GEN y)
1738 {
1739 pari_sp ltop = avma;
1740 GEN z;
1741
1742 if (typ(x) != t_INT || typ(y) != t_INT) pari_err(typeer, "bitwise or");
1743 switch (signs(x, y))
1744 {
1745 case 3: /*1,1*/
1746 return ibitor(x,y);
1747 case 2: /*1,-1*/
1748 z = ibitnegimply(inegate(y),x);
1749 break;
1750 case 1: /*-1,1*/
1751 z = ibitnegimply(inegate(x),y);
1752 break;
1753 case 0: /*-1,-1*/
1754 z = ibitand(inegate(x),inegate(y));
1755 break;
1756 default: return NULL;
1757 }
1758 return gerepileuptoint(ltop, inegate(z));
1759 }
1760
1761 GEN
gbitand(GEN x,GEN y)1762 gbitand(GEN x, GEN y)
1763 {
1764 pari_sp ltop = avma;
1765 GEN z;
1766
1767 if (typ(x) != t_INT || typ(y) != t_INT) pari_err(typeer, "bitwise and");
1768 switch (signs(x, y))
1769 {
1770 case 3: /*1,1*/
1771 return ibitand(x,y);
1772 case 2: /*1,-1*/
1773 z = ibitnegimply(x,inegate(y));
1774 break;
1775 case 1: /*-1,1*/
1776 z = ibitnegimply(y,inegate(x));
1777 break;
1778 case 0: /*-1,-1*/
1779 z = inegate(ibitor(inegate(x),inegate(y)));
1780 break;
1781 default: return NULL;
1782 }
1783 return gerepileuptoint(ltop, z);
1784 }
1785
1786 GEN
gbitxor(GEN x,GEN y)1787 gbitxor(GEN x, GEN y)
1788 {
1789 pari_sp ltop = avma;
1790 GEN z;
1791
1792 if (typ(x) != t_INT || typ(y) != t_INT) pari_err(typeer, "bitwise xor");
1793 switch (signs(x, y))
1794 {
1795 case 3: /*1,1*/
1796 return ibitxor(x,y);
1797 case 2: /*1,-1*/
1798 z = inegate(ibitxor(x,inegate(y)));
1799 break;
1800 case 1: /*-1,1*/
1801 z = inegate(ibitxor(inegate(x),y));
1802 break;
1803 case 0: /*-1,-1*/
1804 z = ibitxor(inegate(x),inegate(y));
1805 break;
1806 default: return NULL;
1807 }
1808 return gerepileuptoint(ltop,z);
1809 }
1810
1811 /* x & ~y */
1812 GEN
gbitnegimply(GEN x,GEN y)1813 gbitnegimply(GEN x, GEN y)
1814 {
1815 pari_sp ltop = avma;
1816 GEN z;
1817
1818 if (typ(x) != t_INT || typ(y) != t_INT) pari_err(typeer, "bitwise negated imply");
1819 switch (signs(x, y))
1820 {
1821 case 3: /*1,1*/
1822 return ibitnegimply(x,y);
1823 case 2: /*1,-1*/
1824 z = ibitand(x,inegate(y));
1825 break;
1826 case 1: /*-1,1*/
1827 z = inegate(ibitor(y,inegate(x)));
1828 break;
1829 case 0: /*-1,-1*/
1830 z = ibitnegimply(inegate(y),inegate(x));
1831 break;
1832 default: return NULL;
1833 }
1834 return gerepileuptoint(ltop,z);
1835 }
1836