1 #line 2 "../src/kernel/none/mp.c"
2 /* $Id: mp.c 8000 2006-08-24 21:51:50Z kb $
3
4 Copyright (C) 2000-2003 The PARI group.
5
6 This file is part of the PARI/GP package.
7
8 PARI/GP is free software; you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation. It is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY WHATSOEVER.
12
13 Check the License for details. You should have received a copy of it, along
14 with the package; see the file 'COPYING'. If not, write to the Free Software
15 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
16
17 /***********************************************************************/
18 /** **/
19 /** MULTIPRECISION KERNEL **/
20 /** **/
21 /***********************************************************************/
22 #include "pari.h"
23 #include "paripriv.h"
24 #include "../src/kernel/none/tune-gen.h"
25
pari_kernel_init(void)26 int pari_kernel_init(void) { return 0; } /*nothing to do*/
27
28 /* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
29 * GENs but pairs (long *a, long na) representing a list of digits (in basis
30 * BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
31 * need to reintroduce codewords ] */
32
33 /* Normalize a non-negative integer */
34 GEN
int_normalize(GEN x,long known_zero_words)35 int_normalize(GEN x, long known_zero_words)
36 {
37 long lx = lgefint(x);
38 long i = 2 + known_zero_words;
39 for ( ; i < lx; i++)
40 if (x[i])
41 {
42 if (i != 2)
43 {
44 GEN x0 = x;
45 i -= 2; x += i;
46 if (x0 == (GEN)avma) avma = (pari_sp)x;
47 else stackdummy((pari_sp)(x0+i), (pari_sp)x0);
48 lx -= i;
49 x[0] = evaltyp(t_INT) | evallg(lx);
50 x[1] = evalsigne(1) | evallgefint(lx);
51 }
52 return x;
53 }
54 x[1] = evalsigne(0) | evallgefint(2); return x;
55 }
56
57 /***********************************************************************/
58 /** **/
59 /** ADDITION / SUBTRACTION **/
60 /** **/
61 /***********************************************************************/
62
63 GEN
setloop(GEN a)64 setloop(GEN a)
65 {
66 GEN z0 = (GEN)avma; (void)cgetg(lgefint(a) + 3, t_VECSMALL);
67 return icopy_av(a, z0); /* two cells of extra space before a */
68 }
69
70 /* we had a = setloop(?), then some incloops. Reset a to b */
71 GEN
resetloop(GEN a,GEN b)72 resetloop(GEN a, GEN b) {
73 long lb = lgefint(b);
74 a += lgefint(a) - lb;
75 a[0] = evaltyp(t_INT) | evallg(lb);
76 affii(b, a); return a;
77 }
78
79 /* assume a > 0, initialized by setloop. Do a++ */
80 static GEN
incpos(GEN a)81 incpos(GEN a)
82 {
83 long i, l = lgefint(a);
84 for (i=l-1; i>1; i--)
85 if (++a[i]) return a;
86 l++; a--; /* use extra cell */
87 a[0]=evaltyp(t_INT) | _evallg(l);
88 a[1]=evalsigne(1) | evallgefint(l);
89 a[2]=1; return a;
90 }
91
92 /* assume a < 0, initialized by setloop. Do a++ */
93 static GEN
incneg(GEN a)94 incneg(GEN a)
95 {
96 long l = lgefint(a)-1;
97 if (a[l]--)
98 {
99 if (l == 2 && !a[2])
100 {
101 a++; /* save one cell */
102 a[0] = evaltyp(t_INT) | _evallg(2);
103 a[1] = evalsigne(0) | evallgefint(2);
104 }
105 return a;
106 }
107 for (l--; l>1; l--)
108 if (a[l]--) break;
109 l++; a++; /* save one cell */
110 a[0] = evaltyp(t_INT) | _evallg(l);
111 a[1] = evalsigne(-1) | evallgefint(l);
112 return a;
113 }
114
115 /* assume a initialized by setloop. Do a++ */
116 GEN
incloop(GEN a)117 incloop(GEN a)
118 {
119 switch(signe(a))
120 {
121 case 0: a--; /* use extra cell */
122 a[0]=evaltyp(t_INT) | _evallg(3);
123 a[1]=evalsigne(1) | evallgefint(3);
124 a[2]=1; return a;
125 case -1: return incneg(a);
126 default: return incpos(a);
127 }
128 }
129
130 INLINE GEN
addsispec(long s,GEN x,long nx)131 addsispec(long s, GEN x, long nx)
132 {
133 GEN xd, zd = (GEN)avma;
134 long lz;
135
136 lz = nx+3; (void)new_chunk(lz);
137 xd = x + nx;
138 *--zd = *--xd + s;
139 if ((ulong)*zd < (ulong)s)
140 for(;;)
141 {
142 if (xd == x) { *--zd = 1; break; } /* enlarge z */
143 *--zd = ((ulong)*--xd) + 1;
144 if (*zd) { lz--; break; }
145 }
146 else lz--;
147 while (xd > x) *--zd = *--xd;
148 *--zd = evalsigne(1) | evallgefint(lz);
149 *--zd = evaltyp(t_INT) | evallg(lz);
150 avma=(pari_sp)zd; return zd;
151 }
152
153 static GEN
addiispec(GEN x,GEN y,long nx,long ny)154 addiispec(GEN x, GEN y, long nx, long ny)
155 {
156 GEN xd,yd,zd;
157 long lz;
158 LOCAL_OVERFLOW;
159
160 if (nx < ny) swapspec(x,y, nx,ny);
161 if (ny == 1) return addsispec(*y,x,nx);
162 zd = (GEN)avma;
163 lz = nx+3; (void)new_chunk(lz);
164 xd = x + nx;
165 yd = y + ny;
166 *--zd = addll(*--xd, *--yd);
167 while (yd > y) *--zd = addllx(*--xd, *--yd);
168 if (overflow)
169 for(;;)
170 {
171 if (xd == x) { *--zd = 1; break; } /* enlarge z */
172 *--zd = ((ulong)*--xd) + 1;
173 if (*zd) { lz--; break; }
174 }
175 else lz--;
176 while (xd > x) *--zd = *--xd;
177 *--zd = evalsigne(1) | evallgefint(lz);
178 *--zd = evaltyp(t_INT) | evallg(lz);
179 avma=(pari_sp)zd; return zd;
180 }
181
182 /* assume x >= y */
183 INLINE GEN
subisspec(GEN x,long s,long nx)184 subisspec(GEN x, long s, long nx)
185 {
186 GEN xd, zd = (GEN)avma;
187 long lz;
188 LOCAL_OVERFLOW;
189
190 lz = nx+2; (void)new_chunk(lz);
191 xd = x + nx;
192 *--zd = subll(*--xd, s);
193 if (overflow)
194 for(;;)
195 {
196 *--zd = ((ulong)*--xd) - 1;
197 if (*xd) break;
198 }
199 if (xd == x)
200 while (*zd == 0) { zd++; lz--; } /* shorten z */
201 else
202 do *--zd = *--xd; while (xd > x);
203 *--zd = evalsigne(1) | evallgefint(lz);
204 *--zd = evaltyp(t_INT) | evallg(lz);
205 avma=(pari_sp)zd; return zd;
206 }
207
208 /* assume x > y */
209 static GEN
subiispec(GEN x,GEN y,long nx,long ny)210 subiispec(GEN x, GEN y, long nx, long ny)
211 {
212 GEN xd,yd,zd;
213 long lz;
214 LOCAL_OVERFLOW;
215
216 if (ny==1) return subisspec(x,*y,nx);
217 zd = (GEN)avma;
218 lz = nx+2; (void)new_chunk(lz);
219 xd = x + nx;
220 yd = y + ny;
221 *--zd = subll(*--xd, *--yd);
222 while (yd > y) *--zd = subllx(*--xd, *--yd);
223 if (overflow)
224 for(;;)
225 {
226 *--zd = ((ulong)*--xd) - 1;
227 if (*xd) break;
228 }
229 if (xd == x)
230 while (*zd == 0) { zd++; lz--; } /* shorten z */
231 else
232 do *--zd = *--xd; while (xd > x);
233 *--zd = evalsigne(1) | evallgefint(lz);
234 *--zd = evaltyp(t_INT) | evallg(lz);
235 avma=(pari_sp)zd; return zd;
236 }
237
238 static void
roundr_up_ip(GEN x,long l)239 roundr_up_ip(GEN x, long l)
240 {
241 long i = l;
242 for(;;)
243 {
244 if (++x[--i]) break;
245 if (i == 2) { x[2] = HIGHBIT; setexpo(x, expo(x)+1); break; }
246 }
247 }
248
249 void
affir(GEN x,GEN y)250 affir(GEN x, GEN y)
251 {
252 const long s = signe(x), ly = lg(y);
253 long lx, sh, i;
254
255 if (!s)
256 {
257 y[1] = evalexpo(-bit_accuracy(ly));
258 return;
259 }
260
261 lx = lgefint(x); sh = bfffo(x[2]);
262 y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
263 if (sh) {
264 if (lx <= ly)
265 {
266 for (i=lx; i<ly; i++) y[i]=0;
267 shift_left(y,x,2,lx-1, 0,sh);
268 return;
269 }
270 shift_left(y,x,2,ly-1, x[ly],sh);
271 /* lx > ly: round properly */
272 if ((x[ly]<<sh) & HIGHBIT) roundr_up_ip(y, ly);
273 }
274 else {
275 if (lx <= ly)
276 {
277 for (i=2; i<lx; i++) y[i]=x[i];
278 for ( ; i<ly; i++) y[i]=0;
279 return;
280 }
281 for (i=2; i<ly; i++) y[i]=x[i];
282 /* lx > ly: round properly */
283 if (x[ly] & HIGHBIT) roundr_up_ip(y, ly);
284 }
285 }
286
287 static GEN
shifti_spec(GEN x,long lx,long n)288 shifti_spec(GEN x, long lx, long n)
289 {
290 long ly, i, m, s = signe(x);
291 GEN y;
292 if (!s) return gen_0;
293 if (!n)
294 {
295 y = cgeti(lx);
296 y[1] = evalsigne(s) | evallgefint(lx);
297 while (--lx > 1) y[lx]=x[lx];
298 return y;
299 }
300 if (n > 0)
301 {
302 GEN z = (GEN)avma;
303 long d = n>>TWOPOTBITS_IN_LONG;
304
305 ly = lx+d; y = new_chunk(ly);
306 for ( ; d; d--) *--z = 0;
307 m = n & (BITS_IN_LONG-1);
308 if (!m) for (i=2; i<lx; i++) y[i]=x[i];
309 else
310 {
311 register const ulong sh = BITS_IN_LONG - m;
312 shift_left2(y,x, 2,lx-1, 0,m,sh);
313 i = ((ulong)x[2]) >> sh;
314 /* Extend y on the left? */
315 if (i) { ly++; y = new_chunk(1); y[2] = i; }
316 }
317 }
318 else
319 {
320 n = -n;
321 ly = lx - (n>>TWOPOTBITS_IN_LONG);
322 if (ly<3) return gen_0;
323 y = new_chunk(ly);
324 m = n & (BITS_IN_LONG-1);
325 if (m) {
326 shift_right(y,x, 2,ly, 0,m);
327 if (y[2] == 0)
328 {
329 if (ly==3) { avma = (pari_sp)(y+3); return gen_0; }
330 ly--; avma = (pari_sp)(++y);
331 }
332 } else {
333 for (i=2; i<ly; i++) y[i]=x[i];
334 }
335 }
336 y[1] = evalsigne(s)|evallgefint(ly);
337 y[0] = evaltyp(t_INT)|evallg(ly); return y;
338 }
339
340 GEN
shifti(GEN x,long n)341 shifti(GEN x, long n)
342 {
343 return shifti_spec(x, lgefint(x), n);
344 }
345
346 GEN
ishiftr_lg(GEN x,long lx,long n)347 ishiftr_lg(GEN x, long lx, long n)
348 { /*This is a kludge since x is not an integer*/
349 return shifti_spec(x, lx, n);
350 }
351
352 GEN
truncr(GEN x)353 truncr(GEN x)
354 {
355 long d,e,i,s,m;
356 GEN y;
357
358 if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gen_0;
359 d = (e>>TWOPOTBITS_IN_LONG) + 3;
360 m = e & (BITS_IN_LONG-1);
361 if (d > lg(x)) pari_err(precer, "truncr (precision loss in truncation)");
362
363 y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
364 if (++m == BITS_IN_LONG)
365 for (i=2; i<d; i++) y[i]=x[i];
366 else
367 {
368 register const ulong sh = BITS_IN_LONG - m;
369 shift_right2(y,x, 2,d,0, sh,m);
370 }
371 return y;
372 }
373
374 /* integral part */
375 GEN
floorr(GEN x)376 floorr(GEN x)
377 {
378 long d,e,i,lx,m;
379 GEN y;
380
381 if (signe(x) >= 0) return truncr(x);
382 if ((e=expo(x)) < 0) return gen_m1;
383 d = (e>>TWOPOTBITS_IN_LONG) + 3;
384 m = e & (BITS_IN_LONG-1);
385 lx=lg(x); if (d>lx) pari_err(precer, "floorr (precision loss in truncation)");
386 y = new_chunk(d);
387 if (++m == BITS_IN_LONG)
388 {
389 for (i=2; i<d; i++) y[i]=x[i];
390 i=d; while (i<lx && !x[i]) i++;
391 if (i==lx) goto END;
392 }
393 else
394 {
395 register const ulong sh = BITS_IN_LONG - m;
396 shift_right2(y,x, 2,d,0, sh,m);
397 if (x[d-1]<<m == 0)
398 {
399 i=d; while (i<lx && !x[i]) i++;
400 if (i==lx) goto END;
401 }
402 }
403 /* set y:=y+1 */
404 for (i=d-1; i>=2; i--) { y[i]++; if (y[i]) goto END; }
405 y=new_chunk(1); y[2]=1; d++;
406 END:
407 y[1] = evalsigne(-1) | evallgefint(d);
408 y[0] = evaltyp(t_INT) | evallg(d); return y;
409 }
410
411 INLINE int
absi_cmp_lg(GEN x,GEN y,long l)412 absi_cmp_lg(GEN x, GEN y, long l)
413 {
414 long i=2;
415 while (i<l && x[i]==y[i]) i++;
416 if (i==l) return 0;
417 return ((ulong)x[i] > (ulong)y[i])? 1: -1;
418 }
419
420 INLINE int
absi_equal_lg(GEN x,GEN y,long l)421 absi_equal_lg(GEN x, GEN y, long l)
422 {
423 long i = l-1; while (i>1 && x[i]==y[i]) i--;
424 return i==1;
425 }
426
427 /***********************************************************************/
428 /** **/
429 /** MULTIPLICATION **/
430 /** **/
431 /***********************************************************************/
432 GEN
mulss(long x,long y)433 mulss(long x, long y)
434 {
435 long s,p1;
436 GEN z;
437 LOCAL_HIREMAINDER;
438
439 if (!x || !y) return gen_0;
440 if (x<0) { s = -1; x = -x; } else s=1;
441 if (y<0) { s = -s; y = -y; }
442 p1 = mulll(x,y);
443 if (hiremainder)
444 {
445 z=cgeti(4); z[1] = evalsigne(s) | evallgefint(4);
446 z[2]=hiremainder; z[3]=p1; return z;
447 }
448 z=cgeti(3); z[1] = evalsigne(s) | evallgefint(3);
449 z[2]=p1; return z;
450 }
451
452 GEN
muluu(ulong x,ulong y)453 muluu(ulong x, ulong y)
454 {
455 long p1;
456 GEN z;
457 LOCAL_HIREMAINDER;
458
459 if (!x || !y) return gen_0;
460 p1 = mulll(x,y);
461 if (hiremainder)
462 {
463 z=cgeti(4); z[1] = evalsigne(1) | evallgefint(4);
464 z[2]=hiremainder; z[3]=p1; return z;
465 }
466 z=cgeti(3); z[1] = evalsigne(1) | evallgefint(3);
467 z[2]=p1; return z;
468 }
469
470 /* assume ny > 0 */
471 INLINE GEN
muluispec(ulong x,GEN y,long ny)472 muluispec(ulong x, GEN y, long ny)
473 {
474 GEN yd, z = (GEN)avma;
475 long lz = ny+3;
476 LOCAL_HIREMAINDER;
477
478 (void)new_chunk(lz);
479 yd = y + ny; *--z = mulll(x, *--yd);
480 while (yd > y) *--z = addmul(x,*--yd);
481 if (hiremainder) *--z = hiremainder; else lz--;
482 *--z = evalsigne(1) | evallgefint(lz);
483 *--z = evaltyp(t_INT) | evallg(lz);
484 avma=(pari_sp)z; return z;
485 }
486
487 /* a + b*|Y| */
488 GEN
addumului(ulong a,ulong b,GEN Y)489 addumului(ulong a, ulong b, GEN Y)
490 {
491 GEN yd,y,z;
492 long ny,lz;
493 LOCAL_HIREMAINDER;
494 LOCAL_OVERFLOW;
495
496 if (!signe(Y)) return utoi(a);
497
498 y = Y+2; z = (GEN)avma;
499 ny = lgefint(Y)-2;
500 lz = ny+3;
501
502 (void)new_chunk(lz);
503 yd = y + ny; *--z = addll(a, mulll(b, *--yd));
504 if (overflow) hiremainder++; /* can't overflow */
505 while (yd > y) *--z = addmul(b,*--yd);
506 if (hiremainder) *--z = hiremainder; else lz--;
507 *--z = evalsigne(1) | evallgefint(lz);
508 *--z = evaltyp(t_INT) | evallg(lz);
509 avma=(pari_sp)z; return z;
510 }
511
512 GEN muliispec(GEN a, GEN b, long na, long nb);
513 /*#define KARAMULR_VARIANT*/
514 #define muliispec_mirror muliispec
515
516 /***********************************************************************/
517 /** **/
518 /** DIVISION **/
519 /** **/
520 /***********************************************************************/
521
522 ulong
umodiu(GEN y,ulong x)523 umodiu(GEN y, ulong x)
524 {
525 long sy=signe(y),ly,i;
526 LOCAL_HIREMAINDER;
527
528 if (!x) pari_err(gdiver);
529 if (!sy) return 0;
530 ly = lgefint(y);
531 if (x <= (ulong)y[2]) hiremainder=0;
532 else
533 {
534 if (ly==3) return (sy > 0)? (ulong)y[2]: x - (ulong)y[2];
535 hiremainder=y[2]; ly--; y++;
536 }
537 for (i=2; i<ly; i++) (void)divll(y[i],x);
538 if (!hiremainder) return 0;
539 return (sy > 0)? hiremainder: x - hiremainder;
540 }
541
542 /* return |y| \/ x */
543 GEN
diviu_rem(GEN y,ulong x,ulong * rem)544 diviu_rem(GEN y, ulong x, ulong *rem)
545 {
546 long ly,i;
547 GEN z;
548 LOCAL_HIREMAINDER;
549
550 if (!x) pari_err(gdiver);
551 if (!signe(y)) { *rem = 0; return gen_0; }
552
553 ly = lgefint(y);
554 if (x <= (ulong)y[2]) hiremainder=0;
555 else
556 {
557 if (ly==3) { *rem = (ulong)y[2]; return gen_0; }
558 hiremainder=y[2]; ly--; y++;
559 }
560 z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(1);
561 for (i=2; i<ly; i++) z[i]=divll(y[i],x);
562 *rem = hiremainder; return z;
563 }
564
565 GEN
divis_rem(GEN y,long x,long * rem)566 divis_rem(GEN y, long x, long *rem)
567 {
568 long sy=signe(y),ly,s,i;
569 GEN z;
570 LOCAL_HIREMAINDER;
571
572 if (!x) pari_err(gdiver);
573 if (!sy) { *rem=0; return gen_0; }
574 if (x<0) { s = -sy; x = -x; } else s = sy;
575
576 ly = lgefint(y);
577 if ((ulong)x <= (ulong)y[2]) hiremainder=0;
578 else
579 {
580 if (ly==3) { *rem = itos(y); return gen_0; }
581 hiremainder=y[2]; ly--; y++;
582 }
583 z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
584 for (i=2; i<ly; i++) z[i]=divll(y[i],x);
585 if (sy<0) hiremainder = - ((long)hiremainder);
586 *rem = (long)hiremainder; return z;
587 }
588
589 GEN
divis(GEN y,long x)590 divis(GEN y, long x)
591 {
592 long sy=signe(y),ly,s,i;
593 GEN z;
594 LOCAL_HIREMAINDER;
595
596 if (!x) pari_err(gdiver);
597 if (!sy) return gen_0;
598 if (x<0) { s = -sy; x = -x; } else s = sy;
599
600 ly = lgefint(y);
601 if ((ulong)x <= (ulong)y[2]) hiremainder=0;
602 else
603 {
604 if (ly==3) return gen_0;
605 hiremainder=y[2]; ly--; y++;
606 }
607 z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
608 for (i=2; i<ly; i++) z[i]=divll(y[i],x);
609 return z;
610 }
611
612 GEN
divrr(GEN x,GEN y)613 divrr(GEN x, GEN y)
614 {
615 long sx=signe(x), sy=signe(y), lx,ly,lr,e,i,j;
616 ulong y0,y1;
617 GEN r, r1;
618
619 if (!sy) pari_err(gdiver);
620 e = expo(x) - expo(y);
621 if (!sx) return real_0_bit(e);
622 if (sy<0) sx = -sx;
623
624 lx=lg(x); ly=lg(y);
625 if (ly==3)
626 {
627 ulong k = x[2], l = (lx>3)? x[3]: 0;
628 LOCAL_HIREMAINDER;
629 if (k < (ulong)y[2]) e--;
630 else
631 {
632 l >>= 1; if (k&1) l |= HIGHBIT;
633 k >>= 1;
634 }
635 r = cgetr(3); r[1] = evalsigne(sx) | evalexpo(e);
636 hiremainder=k; r[2]=divll(l,y[2]); return r;
637 }
638
639 lr = min(lx,ly); r = new_chunk(lr);
640 r1 = r-1;
641 r1[1] = 0; for (i=2; i<lr; i++) r1[i]=x[i];
642 r1[lr] = (lx>ly)? x[lr]: 0;
643 y0 = y[2]; y1 = y[3];
644 for (i=0; i<lr-1; i++)
645 { /* r1 = r + (i-1) */
646 ulong k, qp;
647 LOCAL_HIREMAINDER;
648 LOCAL_OVERFLOW;
649
650 if ((ulong)r1[1] == y0)
651 {
652 qp = MAXULONG; k = addll(y0,r1[2]);
653 }
654 else
655 {
656 if ((ulong)r1[1] > y0) /* can't happen if i=0 */
657 {
658 GEN y1 = y+1;
659 j = lr-i; r1[j] = subll(r1[j],y1[j]);
660 for (j--; j>0; j--) r1[j] = subllx(r1[j],y1[j]);
661 j=i; do r[--j]++; while (j && !r[j]);
662 }
663 hiremainder = r1[1]; overflow = 0;
664 qp = divll(r1[2],y0); k = hiremainder;
665 }
666 if (!overflow)
667 {
668 long k3 = subll(mulll(qp,y1), r1[3]);
669 long k4 = subllx(hiremainder,k);
670 while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
671 }
672 j = lr-i+1;
673 if (j<ly) (void)mulll(qp,y[j]); else { hiremainder = 0 ; j = ly; }
674 for (j--; j>1; j--)
675 {
676 r1[j] = subll(r1[j], addmul(qp,y[j]));
677 hiremainder += overflow;
678 }
679 if ((ulong)r1[1] != hiremainder)
680 {
681 if ((ulong)r1[1] < hiremainder)
682 {
683 qp--;
684 j = lr-i-(lr-i>=ly); r1[j] = addll(r1[j], y[j]);
685 for (j--; j>1; j--) r1[j] = addllx(r1[j], y[j]);
686 }
687 else
688 {
689 r1[1] -= hiremainder;
690 while (r1[1])
691 {
692 qp++; if (!qp) { j=i; do r[--j]++; while (j && !r[j]); }
693 j = lr-i-(lr-i>=ly); r1[j] = subll(r1[j],y[j]);
694 for (j--; j>1; j--) r1[j] = subllx(r1[j],y[j]);
695 r1[1] -= overflow;
696 }
697 }
698 }
699 *++r1 = qp;
700 }
701 /* i = lr-1 */
702 /* round correctly */
703 if ((ulong)r1[1] > (y0>>1))
704 {
705 j=i; do r[--j]++; while (j && !r[j]);
706 }
707 r1 = r-1; for (j=i; j>=2; j--) r[j]=r1[j];
708 if (r[0] == 0) e--;
709 else if (r[0] == 1) { shift_right(r,r, 2,lr, 1,1); }
710 else { /* possible only when rounding up to 0x2 0x0 ... */
711 r[2] = HIGHBIT; e++;
712 }
713 r[0] = evaltyp(t_REAL)|evallg(lr);
714 r[1] = evalsigne(sx) | evalexpo(e);
715 return r;
716 }
717
718 GEN
divri(GEN x,GEN y)719 divri(GEN x, GEN y)
720 {
721 long lx, s = signe(y);
722 pari_sp av;
723 GEN z;
724
725 if (!s) pari_err(gdiver);
726 if (!signe(x)) return real_0_bit(expo(x) - expi(y));
727 if (!is_bigint(y)) return divrs(x, s>0? y[2]: -y[2]);
728
729 lx = lg(x); z = cgetr(lx); av = avma;
730 affrr(divrr(x, itor(y, lx+1)), z);
731 avma = av; return z;
732 }
733
734 /* Integer division x / y: such that sign(r) = sign(x)
735 * if z = ONLY_REM return remainder, otherwise return quotient
736 * if z != NULL set *z to remainder
737 * *z is the last object on stack (and thus can be disposed of with cgiv
738 * instead of gerepile)
739 * If *z is zero, we put gen_0 here and no copy.
740 * space needed: lx + ly */
741 GEN
dvmdii(GEN x,GEN y,GEN * z)742 dvmdii(GEN x, GEN y, GEN *z)
743 {
744 long sx=signe(x),sy=signe(y);
745 long lx, ly, lz, i, j, sh, lq, lr;
746 pari_sp av;
747 ulong y0,y1, *xd,*rd,*qd;
748 GEN q, r, r1;
749
750 if (!sy) { if (z == ONLY_REM && !sx) return gen_0; pari_err(gdiver); }
751 if (!sx)
752 {
753 if (!z || z == ONLY_REM) return gen_0;
754 *z=gen_0; return gen_0;
755 }
756 lx=lgefint(x);
757 ly=lgefint(y); lz=lx-ly;
758 if (lz <= 0)
759 {
760 if (lz == 0)
761 {
762 for (i=2; i<lx; i++)
763 if (x[i] != y[i])
764 {
765 if ((ulong)x[i] > (ulong)y[i]) goto DIVIDE;
766 goto TRIVIAL;
767 }
768 if (z == ONLY_REM) return gen_0;
769 if (z) *z = gen_0;
770 if (sx < 0) sy = -sy;
771 return stoi(sy);
772 }
773 TRIVIAL:
774 if (z == ONLY_REM) return icopy(x);
775 if (z) *z = icopy(x);
776 return gen_0;
777 }
778 DIVIDE: /* quotient is non-zero */
779 av=avma; if (sx<0) sy = -sy;
780 if (ly==3)
781 {
782 LOCAL_HIREMAINDER;
783 y0 = y[2];
784 if (y0 <= (ulong)x[2]) hiremainder=0;
785 else
786 {
787 hiremainder = x[2]; lx--; x++;
788 }
789 q = new_chunk(lx); for (i=2; i<lx; i++) q[i]=divll(x[i],y0);
790 if (z == ONLY_REM)
791 {
792 avma=av; if (!hiremainder) return gen_0;
793 r=cgeti(3);
794 r[1] = evalsigne(sx) | evallgefint(3);
795 r[2]=hiremainder; return r;
796 }
797 q[1] = evalsigne(sy) | evallgefint(lx);
798 q[0] = evaltyp(t_INT) | evallg(lx);
799 if (!z) return q;
800 if (!hiremainder) { *z=gen_0; return q; }
801 r=cgeti(3);
802 r[1] = evalsigne(sx) | evallgefint(3);
803 r[2] = hiremainder; *z=r; return q;
804 }
805
806 r1 = new_chunk(lx); sh = bfffo(y[2]);
807 if (sh)
808 { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
809 register const ulong m = BITS_IN_LONG - sh;
810 r = new_chunk(ly);
811 shift_left2(r, y,2,ly-1, 0,sh,m); y = r;
812 shift_left2(r1,x,2,lx-1, 0,sh,m);
813 r1[1] = ((ulong)x[2]) >> m;
814 }
815 else
816 {
817 r1[1] = 0; for (j=2; j<lx; j++) r1[j] = x[j];
818 }
819 x = r1;
820 y0 = y[2]; y1 = y[3];
821 for (i=0; i<=lz; i++)
822 { /* r1 = x + i */
823 ulong k, qp;
824 LOCAL_HIREMAINDER;
825 LOCAL_OVERFLOW;
826
827 if ((ulong)r1[1] == y0)
828 {
829 qp = MAXULONG; k = addll(y0,r1[2]);
830 }
831 else
832 {
833 hiremainder = r1[1]; overflow = 0;
834 qp = divll(r1[2],y0); k = hiremainder;
835 }
836 if (!overflow)
837 {
838 long k3 = subll(mulll(qp,y1), r1[3]);
839 long k4 = subllx(hiremainder,k);
840 while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
841 }
842 hiremainder = 0; j = ly;
843 for (j--; j>1; j--)
844 {
845 r1[j] = subll(r1[j], addmul(qp,y[j]));
846 hiremainder += overflow;
847 }
848 if ((ulong)r1[1] < hiremainder)
849 {
850 qp--;
851 j = ly-1; r1[j] = addll(r1[j],y[j]);
852 for (j--; j>1; j--) r1[j] = addllx(r1[j],y[j]);
853 }
854 *++r1 = qp;
855 }
856
857 lq = lz+2;
858 if (!z)
859 {
860 qd = (ulong*)av;
861 xd = (ulong*)(x + lq);
862 if (x[1]) { lz++; lq++; }
863 while (lz--) *--qd = *--xd;
864 *--qd = evalsigne(sy) | evallgefint(lq);
865 *--qd = evaltyp(t_INT) | evallg(lq);
866 avma = (pari_sp)qd; return (GEN)qd;
867 }
868
869 j=lq; while (j<lx && !x[j]) j++;
870 lz = lx-j;
871 if (z == ONLY_REM)
872 {
873 if (lz==0) { avma = av; return gen_0; }
874 rd = (ulong*)av; lr = lz+2;
875 xd = (ulong*)(x + lx);
876 if (!sh) while (lz--) *--rd = *--xd;
877 else
878 { /* shift remainder right by sh bits */
879 const ulong shl = BITS_IN_LONG - sh;
880 ulong l;
881 xd--;
882 while (--lz) /* fill r[3..] */
883 {
884 l = *xd >> sh;
885 *--rd = l | (*--xd << shl);
886 }
887 l = *xd >> sh;
888 if (l) *--rd = l; else lr--;
889 }
890 *--rd = evalsigne(sx) | evallgefint(lr);
891 *--rd = evaltyp(t_INT) | evallg(lr);
892 avma = (pari_sp)rd; return (GEN)rd;
893 }
894
895 lr = lz+2;
896 rd = NULL; /* gcc -Wall */
897 if (lz)
898 { /* non zero remainder: initialize rd */
899 xd = (ulong*)(x + lx);
900 if (!sh)
901 {
902 rd = (ulong*)avma; (void)new_chunk(lr);
903 while (lz--) *--rd = *--xd;
904 }
905 else
906 { /* shift remainder right by sh bits */
907 const ulong shl = BITS_IN_LONG - sh;
908 ulong l;
909 rd = (ulong*)x; /* overwrite shifted y */
910 xd--;
911 while (--lz)
912 {
913 l = *xd >> sh;
914 *--rd = l | (*--xd << shl);
915 }
916 l = *xd >> sh;
917 if (l) *--rd = l; else lr--;
918 }
919 *--rd = evalsigne(sx) | evallgefint(lr);
920 *--rd = evaltyp(t_INT) | evallg(lr);
921 rd += lr;
922 }
923 qd = (ulong*)av;
924 xd = (ulong*)(x + lq);
925 if (x[1]) lq++;
926 j = lq-2; while (j--) *--qd = *--xd;
927 *--qd = evalsigne(sy) | evallgefint(lq);
928 *--qd = evaltyp(t_INT) | evallg(lq);
929 q = (GEN)qd;
930 if (lr==2) *z = gen_0;
931 else
932 { /* rd has been properly initialized: we had lz > 0 */
933 while (lr--) *--qd = *--rd;
934 *z = (GEN)qd;
935 }
936 avma = (pari_sp)qd; return q;
937 }
938
939 /* Montgomery reduction.
940 * N has k words, assume T >= 0 has less than 2k.
941 * Return res := T / B^k mod N, where B = 2^BIL
942 * such that 0 <= res < T/B^k + N and res has less than k words */
943 GEN
red_montgomery(GEN T,GEN N,ulong inv)944 red_montgomery(GEN T, GEN N, ulong inv)
945 {
946 pari_sp av;
947 GEN Te, Td, Ne, Nd, scratch;
948 ulong i, j, m, t, d, k = lgefint(N)-2;
949 int carry;
950 LOCAL_HIREMAINDER;
951 LOCAL_OVERFLOW;
952
953 if (k == 0) return gen_0;
954 d = lgefint(T)-2; /* <= 2*k */
955 #ifdef DEBUG
956 if (d > 2*k) pari_err(bugparier,"red_montgomery");
957 #endif
958 if (k == 1)
959 { /* as below, special cased for efficiency */
960 ulong n = (ulong)N[2];
961 t = (ulong)T[d+1];
962 m = t * inv;
963 (void)addll(mulll(m, n), t); /* = 0 */
964 t = hiremainder + overflow;
965 if (d == 2)
966 {
967 t = addll((ulong)T[2], t);
968 if (overflow) t -= n; /* t > n doesn't fit in 1 word */
969 }
970 return utoi(t);
971 }
972 /* assume k >= 2 */
973 av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */
974
975 /* copy T to scratch space (pad with zeroes to 2k words) */
976 Td = (GEN)av;
977 Te = T + (d+2);
978 for (i=0; i < d ; i++) *--Td = *--Te;
979 for ( ; i < (k<<1); i++) *--Td = 0;
980
981 Te = (GEN)av; /* 1 beyond end of T mantissa */
982 Ne = N + k+2; /* 1 beyond end of N mantissa */
983
984 carry = 0;
985 for (i=0; i<k; i++) /* set T := T/B nod N, k times */
986 {
987 Td = Te; /* one beyond end of (new) T mantissa */
988 Nd = Ne;
989 m = *--Td * inv; /* solve T + m N = O(B) */
990
991 /* set T := (T + mN) / B */
992 Te = Td;
993 (void)addll(mulll(m, *--Nd), *Td); /* = 0 */
994 for (j=1; j<k; j++)
995 {
996 hiremainder += overflow;
997 t = addll(addmul(m, *--Nd), *--Td); *Td = t;
998 }
999 overflow += hiremainder;
1000 t = addll(overflow, *--Td); *Td = t + carry;
1001 carry = (overflow || (carry && *Td == 0));
1002 }
1003 if (carry)
1004 { /* Td > N overflows (k+1 words), set Td := Td - N */
1005 Td = Te;
1006 Nd = Ne;
1007 t = subll(*--Td, *--Nd); *Td = t;
1008 while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; }
1009 }
1010
1011 /* copy result */
1012 Td = (GEN)av;
1013 while (! *scratch && Te > scratch) scratch++; /* strip leading 0s */
1014 while (Te > scratch) *--Td = *--Te;
1015 k = (GEN)av - Td; if (!k) return gen_0;
1016 k += 2;
1017 *--Td = evalsigne(1) | evallgefint(k);
1018 *--Td = evaltyp(t_INT) | evallg(k);
1019 #ifdef DEBUG
1020 {
1021 long l = lgefint(N)-2, s = BITS_IN_LONG*l;
1022 GEN R = int2n(s);
1023 GEN res = remii(mulii(T, Fp_inv(R, N)), N);
1024 if (k > lgefint(N)
1025 || !equalii(remii(Td,N),res)
1026 || cmpii(Td, addii(shifti(T, -s), N)) >= 0) pari_err(bugparier,"red_montgomery");
1027 }
1028 #endif
1029 avma = (pari_sp)Td; return Td;
1030 }
1031
1032 /* EXACT INTEGER DIVISION */
1033
1034 /* assume xy>0, the division is exact and y is odd. Destroy x */
1035 static GEN
diviuexact_i(GEN x,ulong y)1036 diviuexact_i(GEN x, ulong y)
1037 {
1038 long i, lz, lx;
1039 ulong q, yinv;
1040 GEN z, z0, x0, x0min;
1041
1042 if (y == 1) return icopy(x);
1043 lx = lgefint(x);
1044 if (lx == 3) return utoipos((ulong)x[2] / y);
1045 yinv = invrev(y);
1046 lz = (y <= (ulong)x[2]) ? lx : lx-1;
1047 z = new_chunk(lz);
1048 z0 = z + lz;
1049 x0 = x + lx; x0min = x + lx-lz+2;
1050
1051 while (x0 > x0min)
1052 {
1053 *--z0 = q = yinv*((ulong)*--x0); /* i-th quotient */
1054 if (!q) continue;
1055 /* x := x - q * y */
1056 { /* update neither lowest word (could set it to 0) nor highest ones */
1057 register GEN x1 = x0 - 1;
1058 LOCAL_HIREMAINDER;
1059 (void)mulll(q,y);
1060 if (hiremainder)
1061 {
1062 if ((ulong)*x1 < hiremainder)
1063 {
1064 *x1 -= hiremainder;
1065 do (*--x1)--; while ((ulong)*x1 == MAXULONG);
1066 }
1067 else
1068 *x1 -= hiremainder;
1069 }
1070 }
1071 }
1072 i=2; while(!z[i]) i++;
1073 z += i-2; lz -= i-2;
1074 z[0] = evaltyp(t_INT)|evallg(lz);
1075 z[1] = evalsigne(1)|evallg(lz);
1076 avma = (pari_sp)z; return z;
1077 }
1078
1079 /* assume y != 0 and the division is exact */
1080 GEN
diviuexact(GEN x,ulong y)1081 diviuexact(GEN x, ulong y)
1082 {
1083 pari_sp av;
1084 long lx, vy, s = signe(x);
1085 GEN z;
1086
1087 if (!s) return gen_0;
1088 if (y == 1) return icopy(x);
1089 lx = lgefint(x);
1090 if (lx == 3) {
1091 ulong q = (ulong)x[2] / y;
1092 return (s > 0)? utoipos(q): utoineg(q);
1093 }
1094 av = avma; (void)new_chunk(lx); vy = vals(y);
1095 if (vy) {
1096 y >>= vy;
1097 if (y == 1) { avma = av; return shifti(x, -vy); }
1098 x = shifti(x, -vy);
1099 if (lx == 3) {
1100 ulong q = (ulong)x[2] / y;
1101 avma = av;
1102 return (s > 0)? utoipos(q): utoineg(q);
1103 }
1104 } else x = icopy(x);
1105 avma = av;
1106 z = diviuexact_i(x, y);
1107 setsigne(z, s); return z;
1108 }
1109
1110 /* Find z such that x=y*z, knowing that y | x (unchecked)
1111 * Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
1112 * Set x := (x - z0 y) / B, updating only relevant words, and repeat */
1113 GEN
diviiexact(GEN x,GEN y)1114 diviiexact(GEN x, GEN y)
1115 {
1116 long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y);
1117 pari_sp av;
1118 ulong y0inv,q;
1119 GEN z;
1120
1121 if (!sy) pari_err(gdiver);
1122 if (!sx) return gen_0;
1123 lx = lgefint(x);
1124 if (lx == 3) {
1125 q = (ulong)x[2] / (ulong)y[2];
1126 return (sx+sy) ? utoipos(q): utoineg(q);
1127 }
1128 vy = vali(y); av = avma;
1129 (void)new_chunk(lx); /* enough room for z */
1130 if (vy)
1131 { /* make y odd */
1132 y = shifti(y,-vy);
1133 x = shifti(x,-vy); lx = lgefint(x);
1134 }
1135 else x = icopy(x); /* necessary because we destroy x */
1136 avma = av; /* will erase our x,y when exiting */
1137 /* now y is odd */
1138 ly = lgefint(y);
1139 if (ly == 3)
1140 {
1141 x = diviuexact_i(x,(ulong)y[2]); /* x != 0 */
1142 setsigne(x, (sx+sy)? 1: -1); return x;
1143 }
1144 y0inv = invrev(y[ly-1]);
1145 i=2; while (i<ly && y[i]==x[i]) i++;
1146 lz = (i==ly || (ulong)y[i] < (ulong)x[i]) ? lx-ly+3 : lx-ly+2;
1147 z = new_chunk(lz);
1148
1149 y += ly - 1; /* now y[-i] = i-th word of y */
1150 for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
1151 {
1152 long limj;
1153 LOCAL_HIREMAINDER;
1154 LOCAL_OVERFLOW;
1155
1156 z[i] = q = y0inv*((ulong)x[ii]); /* i-th quotient */
1157 if (!q) continue;
1158
1159 /* x := x - q * y */
1160 (void)mulll(q,y[0]); limj = max(lx - lz, ii+3-ly);
1161 { /* update neither lowest word (could set it to 0) nor highest ones */
1162 register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
1163 for (; x0 >= xlim; x0--, y0--)
1164 {
1165 *x0 = subll(*x0, addmul(q,*y0));
1166 hiremainder += overflow;
1167 }
1168 if (hiremainder && limj != lx - lz)
1169 {
1170 if ((ulong)*x0 < hiremainder)
1171 {
1172 *x0 -= hiremainder;
1173 do (*--x0)--; while ((ulong)*x0 == MAXULONG);
1174 }
1175 else
1176 *x0 -= hiremainder;
1177 }
1178 }
1179 }
1180 i=2; while(!z[i]) i++;
1181 z += i-2; lz -= (i-2);
1182 z[0] = evaltyp(t_INT)|evallg(lz);
1183 z[1] = evalsigne((sx+sy)? 1: -1) | evallg(lz);
1184 avma = (pari_sp)z; return z;
1185 }
1186
1187
1188 /********************************************************************/
1189 /** **/
1190 /** INTEGER MULTIPLICATION (KARATSUBA) **/
1191 /** **/
1192 /********************************************************************/
1193 /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
1194 INLINE GEN
muliispec_basecase(GEN x,GEN y,long nx,long ny)1195 muliispec_basecase(GEN x, GEN y, long nx, long ny)
1196 {
1197 GEN z2e,z2d,yd,xd,ye,zd;
1198 long p1,lz;
1199 LOCAL_HIREMAINDER;
1200
1201 if (!ny) return gen_0;
1202 zd = (GEN)avma; lz = nx+ny+2;
1203 (void)new_chunk(lz);
1204 xd = x + nx;
1205 yd = y + ny;
1206 ye = yd; p1 = *--xd;
1207
1208 *--zd = mulll(p1, *--yd); z2e = zd;
1209 while (yd > y) *--zd = addmul(p1, *--yd);
1210 *--zd = hiremainder;
1211
1212 while (xd > x)
1213 {
1214 LOCAL_OVERFLOW;
1215 yd = ye; p1 = *--xd;
1216
1217 z2d = --z2e;
1218 *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1219 while (yd > y)
1220 {
1221 hiremainder += overflow;
1222 *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1223 }
1224 *--zd = hiremainder + overflow;
1225 }
1226 if (*zd == 0) { zd++; lz--; } /* normalize */
1227 *--zd = evalsigne(1) | evallgefint(lz);
1228 *--zd = evaltyp(t_INT) | evallg(lz);
1229 avma=(pari_sp)zd; return zd;
1230 }
1231
1232 INLINE GEN
sqrispec_basecase(GEN x,long nx)1233 sqrispec_basecase(GEN x, long nx)
1234 {
1235 GEN z2e,z2d,yd,xd,zd,x0,z0;
1236 long p1,lz;
1237 LOCAL_HIREMAINDER;
1238 LOCAL_OVERFLOW;
1239
1240 if (!nx) return gen_0;
1241 zd = (GEN)avma; lz = (nx+1) << 1;
1242 z0 = new_chunk(lz);
1243 if (nx == 1)
1244 {
1245 *--zd = mulll(*x, *x);
1246 *--zd = hiremainder; goto END;
1247 }
1248 xd = x + nx;
1249
1250 /* compute double products --> zd */
1251 p1 = *--xd; yd = xd; --zd;
1252 *--zd = mulll(p1, *--yd); z2e = zd;
1253 while (yd > x) *--zd = addmul(p1, *--yd);
1254 *--zd = hiremainder;
1255
1256 x0 = x+1;
1257 while (xd > x0)
1258 {
1259 LOCAL_OVERFLOW;
1260 p1 = *--xd; yd = xd;
1261
1262 z2e -= 2; z2d = z2e;
1263 *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1264 while (yd > x)
1265 {
1266 hiremainder += overflow;
1267 *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1268 }
1269 *--zd = hiremainder + overflow;
1270 }
1271 /* multiply zd by 2 (put result in zd - 1) */
1272 zd[-1] = ((*zd & HIGHBIT) != 0);
1273 shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
1274
1275 /* add the squares */
1276 xd = x + nx; zd = z0 + lz;
1277 p1 = *--xd;
1278 zd--; *zd = mulll(p1,p1);
1279 zd--; *zd = addll(hiremainder, *zd);
1280 while (xd > x)
1281 {
1282 p1 = *--xd;
1283 zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
1284 zd--; *zd = addll(hiremainder + overflow, *zd);
1285 }
1286
1287 END:
1288 if (*zd == 0) { zd++; lz--; } /* normalize */
1289 *--zd = evalsigne(1) | evallgefint(lz);
1290 *--zd = evaltyp(t_INT) | evallg(lz);
1291 avma=(pari_sp)zd; return zd;
1292 }
1293
1294 /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
1295 static GEN
addshiftw(GEN x,GEN y,long d)1296 addshiftw(GEN x, GEN y, long d)
1297 {
1298 GEN z,z0,y0,yd, zd = (GEN)avma;
1299 long a,lz,ly = lgefint(y);
1300
1301 z0 = new_chunk(d);
1302 a = ly-2; yd = y+ly;
1303 if (a >= d)
1304 {
1305 y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
1306 a -= d;
1307 if (a)
1308 z = addiispec(x+2, y+2, lgefint(x)-2, a);
1309 else
1310 z = icopy(x);
1311 }
1312 else
1313 {
1314 y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
1315 while (zd >= z0) *--zd = 0; /* complete with 0s */
1316 z = icopy(x);
1317 }
1318 lz = lgefint(z)+d;
1319 z[1] = evalsigne(1) | evallgefint(lz);
1320 z[0] = evaltyp(t_INT) | evallg(lz); return z;
1321 }
1322
1323 /* Fast product (Karatsuba) of integers. a and b are "special" GENs
1324 * c,c0,c1,c2 are genuine GENs.
1325 */
1326 GEN
muliispec(GEN a,GEN b,long na,long nb)1327 muliispec(GEN a, GEN b, long na, long nb)
1328 {
1329 GEN a0,c,c0;
1330 long n0, n0a, i;
1331 pari_sp av;
1332
1333 if (na < nb) swapspec(a,b, na,nb);
1334 if (nb == 1) return muluispec((ulong)*b, a, na);
1335 if (nb == 0) return gen_0;
1336 if (nb < KARATSUBA_MULI_LIMIT) return muliispec_basecase(a,b,na,nb);
1337 i=(na>>1); n0=na-i; na=i;
1338 av=avma; a0=a+na; n0a=n0;
1339 while (!*a0 && n0a) { a0++; n0a--; }
1340
1341 if (n0a && nb > n0)
1342 { /* nb <= na <= n0 */
1343 GEN b0,c1,c2;
1344 long n0b;
1345
1346 nb -= n0;
1347 c = muliispec(a,b,na,nb);
1348 b0 = b+nb; n0b = n0;
1349 while (!*b0 && n0b) { b0++; n0b--; }
1350 if (n0b)
1351 {
1352 c0 = muliispec(a0,b0, n0a,n0b);
1353
1354 c2 = addiispec(a0,a, n0a,na);
1355 c1 = addiispec(b0,b, n0b,nb);
1356 c1 = muliispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
1357 c2 = addiispec(c0+2, c+2, lgefint(c0)-2,lgefint(c) -2);
1358
1359 c1 = subiispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
1360 }
1361 else
1362 {
1363 c0 = gen_0;
1364 c1 = muliispec(a0,b, n0a,nb);
1365 }
1366 c = addshiftw(c,c1, n0);
1367 }
1368 else
1369 {
1370 c = muliispec(a,b,na,nb);
1371 c0 = muliispec(a0,b,n0a,nb);
1372 }
1373 return gerepileuptoint(av, addshiftw(c,c0, n0));
1374 }
1375
1376 /* x % (2^n), assuming x, n >= 0 */
1377 GEN
resmod2n(GEN x,long n)1378 resmod2n(GEN x, long n)
1379 {
1380 long hi,l,k,lx,ly;
1381 GEN z, xd, zd;
1382
1383 if (!signe(x) || !n) return gen_0;
1384
1385 l = n & (BITS_IN_LONG-1); /* n % BITS_IN_LONG */
1386 k = n >> TWOPOTBITS_IN_LONG; /* n / BITS_IN_LONG */
1387 lx = lgefint(x);
1388 if (lx < k+3) return icopy(x);
1389
1390 xd = x + (lx-k-1);
1391 /* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
1392 * ^--- initial xd */
1393 hi = ((ulong)*xd) & ((1UL<<l)-1); /* last l bits of # = top bits of result */
1394 if (!hi)
1395 { /* strip leading zeroes from result */
1396 xd++; while (k && !*xd) { k--; xd++; }
1397 if (!k) return gen_0;
1398 ly = k+2; xd--;
1399 }
1400 else
1401 ly = k+3;
1402
1403 zd = z = cgeti(ly);
1404 *++zd = evalsigne(1) | evallgefint(ly);
1405 if (hi) *++zd = hi;
1406 for ( ;k; k--) *++zd = *++xd;
1407 return z;
1408 }
1409
1410 GEN
sqrispec(GEN a,long na)1411 sqrispec(GEN a, long na)
1412 {
1413 GEN a0,c;
1414 long n0, n0a, i;
1415 pari_sp av;
1416
1417 if (na < KARATSUBA_SQRI_LIMIT) return sqrispec_basecase(a,na);
1418 i=(na>>1); n0=na-i; na=i;
1419 av=avma; a0=a+na; n0a=n0;
1420 while (!*a0 && n0a) { a0++; n0a--; }
1421 c = sqrispec(a,na);
1422 if (n0a)
1423 {
1424 GEN t, c1, c0 = sqrispec(a0,n0a);
1425 #if 0
1426 c1 = shifti(muliispec(a0,a, n0a,na),1);
1427 #else /* faster */
1428 t = addiispec(a0,a,n0a,na);
1429 t = sqrispec(t+2,lgefint(t)-2);
1430 c1= addiispec(c0+2,c+2, lgefint(c0)-2, lgefint(c)-2);
1431 c1= subiispec(t+2, c1+2, lgefint(t)-2, lgefint(c1)-2);
1432 #endif
1433 c = addshiftw(c,c1, n0);
1434 c = addshiftw(c,c0, n0);
1435 }
1436 else
1437 c = addshiftw(c,gen_0,n0<<1);
1438 return gerepileuptoint(av, c);
1439 }
1440
1441 /********************************************************************/
1442 /** **/
1443 /** KARATSUBA SQUARE ROOT **/
1444 /** adapted from Paul Zimmermann's implementation of **/
1445 /** his algorithm in GMP (mpn_sqrtrem) **/
1446 /** **/
1447 /********************************************************************/
1448
1449 /* Square roots table */
1450 static const unsigned char approx_tab[192] = {
1451 128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
1452 143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
1453 156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
1454 169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
1455 181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
1456 192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
1457 202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
1458 212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
1459 221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
1460 230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
1461 239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
1462 247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
1463 };
1464
1465 /* N[0], assume N[0] >= 2^(BIL-2).
1466 * Return r,s such that s^2 + r = N, 0 <= r <= 2s */
1467 static void
p_sqrtu1(ulong * N,ulong * ps,ulong * pr)1468 p_sqrtu1(ulong *N, ulong *ps, ulong *pr)
1469 {
1470 ulong prec, r, s, q, u, n0 = N[0];
1471
1472 q = n0 >> (BITS_IN_LONG - 8);
1473 /* 2^6 = 64 <= q < 256 = 2^8 */
1474 s = approx_tab[q - 64]; /* 128 <= s < 255 */
1475 r = (n0 >> (BITS_IN_LONG - 16)) - s * s; /* r <= 2*s */
1476 if (r > (s << 1)) { r -= (s << 1) | 1; s++; }
1477
1478 /* 8-bit approximation from the high 8-bits of N[0] */
1479 prec = 8;
1480 n0 <<= 2 * prec;
1481 while (2 * prec < BITS_IN_LONG)
1482 { /* invariant: s has prec bits, and r <= 2*s */
1483 r = (r << prec) + (n0 >> (BITS_IN_LONG - prec));
1484 n0 <<= prec;
1485 u = 2 * s;
1486 q = r / u; u = r - q * u;
1487 s = (s << prec) + q;
1488 u = (u << prec) + (n0 >> (BITS_IN_LONG - prec));
1489 q = q * q;
1490 r = u - q;
1491 if (u < q) { s--; r += (s << 1) | 1; }
1492 n0 <<= prec;
1493 prec = 2 * prec;
1494 }
1495 *ps = s;
1496 *pr = r;
1497 }
1498
1499 /* N[0..1], assume N[0] >= 2^(BIL-2).
1500 * Return 1 if remainder overflows, 0 otherwise */
1501 static int
p_sqrtu2(ulong * N,ulong * ps,ulong * pr)1502 p_sqrtu2(ulong *N, ulong *ps, ulong *pr)
1503 {
1504 ulong cc, qhl, r, s, q, u, n1 = N[1];
1505 LOCAL_OVERFLOW;
1506
1507 p_sqrtu1(N, &s, &r); /* r <= 2s */
1508 qhl = 0; while (r >= s) { qhl++; r -= s; }
1509 /* now r < s < 2^(BIL/2) */
1510 r = (r << BITS_IN_HALFULONG) | (n1 >> BITS_IN_HALFULONG);
1511 u = s << 1;
1512 q = r / u; u = r - q * u;
1513 q += (qhl & 1) << (BITS_IN_HALFULONG - 1);
1514 qhl >>= 1;
1515 /* (initial r)<<(BIL/2) + n1>>(BIL/2) = (qhl<<(BIL/2) + q) * 2s + u */
1516 s = ((s + qhl) << BITS_IN_HALFULONG) + q;
1517 cc = u >> BITS_IN_HALFULONG;
1518 r = (u << BITS_IN_HALFULONG) | (n1 & LOWMASK);
1519 r = subll(r, q * q);
1520 cc -= overflow + qhl;
1521 /* now subtract 2*q*2^(BIL/2) + 2^BIL if qhl is set */
1522 if ((long)cc < 0)
1523 {
1524 if (s) {
1525 r = addll(r, s);
1526 cc += overflow;
1527 s--;
1528 } else {
1529 cc++;
1530 s = ~0UL;
1531 }
1532 r = addll(r, s);
1533 cc += overflow;
1534 }
1535 *ps = s;
1536 *pr = r; return cc;
1537 }
1538
1539 static void
xmpn_zero(GEN x,long n)1540 xmpn_zero(GEN x, long n)
1541 {
1542 while (--n >= 0) x[n]=0;
1543 }
1544 static void
xmpn_copy(GEN z,GEN x,long n)1545 xmpn_copy(GEN z, GEN x, long n)
1546 {
1547 long k = n;
1548 while (--k >= 0) z[k] = x[k];
1549 }
1550 static GEN
cat1u(ulong d)1551 cat1u(ulong d)
1552 {
1553 GEN R = cgeti(4);
1554 R[1] = evalsigne(1)|evallgefint(4);
1555 R[2] = 1;
1556 R[3] = d; return R;
1557 }
1558 /* a[0..la-1] * 2^(lb BIL) | b[0..lb-1] */
1559 static GEN
catii(GEN a,long la,GEN b,long lb)1560 catii(GEN a, long la, GEN b, long lb)
1561 {
1562 long l = la + lb + 2;
1563 GEN z = cgeti(l);
1564 z[1] = evalsigne(1) | evallgefint(l);
1565 xmpn_copy(z + 2, a, la);
1566 xmpn_copy(z + 2 + la, b, lb);
1567 return int_normalize(z, 0);
1568 }
1569
1570 /* sqrt n[0..1], assume n normalized */
1571 static GEN
sqrtispec2(GEN n,GEN * pr)1572 sqrtispec2(GEN n, GEN *pr)
1573 {
1574 ulong s, r;
1575 int hi = p_sqrtu2((ulong*)n, &s, &r);
1576 GEN S = utoi(s);
1577 *pr = hi? cat1u(r): utoi(r);
1578 return S;
1579 }
1580
1581 /* sqrt n[0], _dont_ assume n normalized */
1582 static GEN
sqrtispec1_sh(GEN n,GEN * pr)1583 sqrtispec1_sh(GEN n, GEN *pr)
1584 {
1585 GEN S;
1586 ulong r, s, u0 = (ulong)n[0];
1587 int sh = bfffo(u0) & ~1UL;
1588 if (sh) u0 <<= sh;
1589 p_sqrtu1(&u0, &s, &r);
1590 /* s^2 + r = u0, s < 2^(BIL/2). Rescale back:
1591 * 2^(2k) n = S^2 + R
1592 * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
1593 if (sh) {
1594 int k = sh >> 1;
1595 ulong s0 = s & ((1<<k) - 1);
1596 r += s * (s0<<1);
1597 s >>= k;
1598 r >>= sh;
1599 }
1600 S = utoi(s);
1601 if (pr) *pr = utoi(r);
1602 return S;
1603 }
1604
1605 /* sqrt n[0..1], _dont_ assume n normalized */
1606 static GEN
sqrtispec2_sh(GEN n,GEN * pr)1607 sqrtispec2_sh(GEN n, GEN *pr)
1608 {
1609 GEN S;
1610 ulong U[2], r, s, u0 = (ulong)n[0], u1 = (ulong)n[1];
1611 int hi, sh = bfffo(u0) & ~1UL;
1612 if (sh) {
1613 u0 = (u0 << sh) | (u1 >> (BITS_IN_LONG-sh));
1614 u1 <<= sh;
1615 }
1616 U[0] = u0;
1617 U[1] = u1; hi = p_sqrtu2(U, &s, &r);
1618 /* s^2 + R = u0|u1. Rescale back:
1619 * 2^(2k) n = S^2 + R
1620 * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
1621 if (sh) {
1622 int k = sh >> 1;
1623 ulong s0 = s & ((1<<k) - 1);
1624 LOCAL_HIREMAINDER;
1625 LOCAL_OVERFLOW;
1626 r = addll(r, mulll(s, (s0<<1)));
1627 if (overflow) hiremainder++;
1628 hiremainder += hi; /* + 0 or 1 */
1629 s >>= k;
1630 r = (r>>sh) | (hiremainder << (BITS_IN_LONG-sh));
1631 hi = (hiremainder & (1<<sh));
1632 }
1633 S = utoi(s);
1634 if (pr) *pr = hi? cat1u(r): utoi(r);
1635 return S;
1636 }
1637
1638 /* Let N = N[0..2n-1]. Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S
1639 * Assume N normalized */
1640 static GEN
sqrtispec(GEN N,long n,GEN * r)1641 sqrtispec(GEN N, long n, GEN *r)
1642 {
1643 GEN S, R, q, z, u;
1644 long l, h;
1645
1646 if (n == 1) return sqrtispec2(N, r);
1647 l = n >> 1;
1648 h = n - l; /* N = a3(h) | a2(h) | a1(l) | a0(l words) */
1649 S = sqrtispec(N, h, &R); /* S^2 + R = a3|a2 */
1650
1651 z = catii(R+2, lgefint(R)-2, N + 2*h, l); /* = R | a1(l) */
1652 q = dvmdii(z, shifti(S,1), &u);
1653 z = catii(u+2, lgefint(u)-2, N + n + h, l); /* = u | a0(l) */
1654
1655 S = addshiftw(S, q, l);
1656 R = subii(z, sqri(q));
1657 if (signe(R) < 0)
1658 {
1659 GEN S2 = shifti(S,1);
1660 R = addis(subiispec(S2+2, R+2, lgefint(S2)-2,lgefint(R)-2), -1);
1661 S = addis(S, -1);
1662 }
1663 *r = R; return S;
1664 }
1665
1666 /* Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S.
1667 * As for dvmdii, R is last on stack and guaranteed to be gen_0 in case the
1668 * remainder is 0. R = NULL is allowed. */
1669 GEN
sqrtremi(GEN N,GEN * r)1670 sqrtremi(GEN N, GEN *r)
1671 {
1672 pari_sp av;
1673 GEN S, R, n = N+2;
1674 long k, l2, ln = lgefint(N) - 2;
1675 int sh;
1676
1677 if (ln <= 2)
1678 {
1679 if (ln == 2) return sqrtispec2_sh(n, r);
1680 if (ln == 1) return sqrtispec1_sh(n, r);
1681 if (r) *r = gen_0;
1682 return gen_0;
1683 }
1684 av = avma;
1685 sh = bfffo(n[0]) >> 1;
1686 l2 = (ln + 1) >> 1;
1687 if (sh || (ln & 1)) { /* normalize n, so that n[0] >= 2^BIL / 4 */
1688 GEN s0, t = new_chunk(ln + 1);
1689 t[ln] = 0;
1690 if (sh)
1691 { shift_left(t, n, 0,ln-1, 0, (sh << 1)); }
1692 else
1693 xmpn_copy(t, n, ln);
1694 S = sqrtispec(t, l2, &R); /* t normalized, 2 * l2 words */
1695 /* Rescale back:
1696 * 2^(2k) n = S^2 + R, k = sh + (ln & 1)*BIL/2
1697 * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
1698 k = sh + (ln & 1) * (BITS_IN_LONG/2);
1699 s0 = resmod2n(S, k);
1700 R = addii(shifti(R,-1), mulii(s0, S));
1701 R = shifti(R, 1 - (k<<1));
1702 S = shifti(S, -k);
1703 }
1704 else
1705 S = sqrtispec(n, l2, &R);
1706
1707 if (!r) { avma = (pari_sp)S; return gerepileuptoint(av, S); }
1708 gerepileall(av, 2, &S, &R); *r = R; return S;
1709 }
1710
1711 /* compute sqrt(|a|), assuming a != 0 */
1712
1713 #if 1
1714 GEN
sqrtr_abs(GEN x)1715 sqrtr_abs(GEN x)
1716 {
1717 long l = lg(x) - 2, e = expo(x), er = e>>1;
1718 GEN b, c, res = cgetr(2 + l);
1719 res[1] = evalsigne(1) | evalexpo(er);
1720 if (e&1) {
1721 b = new_chunk(l << 1);
1722 xmpn_copy(b, x+2, l);
1723 xmpn_zero(b + l,l);
1724 b = sqrtispec(b, l, &c);
1725 xmpn_copy(res+2, b+2, l);
1726 if (cmpii(c, b) > 0) roundr_up_ip(res, l+2);
1727 } else {
1728 ulong u;
1729 b = new_chunk(2 + (l << 1));
1730 shift_left(b+1, x+2, 0,l-1, 0, BITS_IN_LONG-1);
1731 b[0] = ((ulong)x[2])>>1;
1732 xmpn_zero(b + l+1,l+1);
1733 b = sqrtispec(b, l+1, &c);
1734 xmpn_copy(res+2, b+2, l);
1735 u = (ulong)b[l+2];
1736 if ( u&HIGHBIT || (u == ~HIGHBIT && cmpii(c,b) > 0))
1737 roundr_up_ip(res, l+2);
1738 }
1739 avma = (pari_sp)res; return res;
1740 }
1741
1742 #else /* use t_REAL: currently much slower (quadratic division) */
1743
1744 #ifdef LONG_IS_64BIT
1745 /* 64 bits of b = sqrt(a[0] * 2^64 + a[1]) [ up to 1ulp ] */
1746 static ulong
sqrtu2(ulong * a)1747 sqrtu2(ulong *a)
1748 {
1749 ulong c, b = dblmantissa( sqrt((double)a[0]) );
1750 LOCAL_HIREMAINDER;
1751 LOCAL_OVERFLOW;
1752
1753 /* > 32 correct bits, 1 Newton iteration to reach 64 */
1754 if (b <= a[0]) return HIGHBIT | (a[0] >> 1);
1755 hiremainder = a[0]; c = divll(a[1], b);
1756 return (addll(c, b) >> 1) | HIGHBIT;
1757 }
1758 /* 64 bits of sqrt(a[0] * 2^63) */
1759 static ulong
sqrtu2_1(ulong * a)1760 sqrtu2_1(ulong *a)
1761 {
1762 ulong t[2];
1763 t[0] = (a[0] >> 1);
1764 t[1] = (a[0] << (BITS_IN_LONG-1)) | (a[1] >> 1);
1765 return sqrtu2(t);
1766 }
1767 #else
1768 /* 32 bits of sqrt(a[0] * 2^32) */
1769 static ulong
sqrtu2(ulong * a)1770 sqrtu2(ulong *a) { return dblmantissa( sqrt((double)a[0]) ); }
1771 /* 32 bits of sqrt(a[0] * 2^31) */
1772 static ulong
sqrtu2_1(ulong * a)1773 sqrtu2_1(ulong *a) { return dblmantissa( sqrt(2. * a[0]) ); }
1774 #endif
1775
1776 GEN
sqrtr_abs(GEN x)1777 sqrtr_abs(GEN x)
1778 {
1779 long l1, i, l = lg(x), ex = expo(x);
1780 GEN a, t, y = cgetr(l);
1781 pari_sp av, av0 = avma;
1782
1783 a = cgetr(l+1); affrr(x,a);
1784 t = cgetr(l+1);
1785 if (ex & 1) { /* odd exponent */
1786 a[1] = evalsigne(1) | evalexpo(1);
1787 t[2] = (long)sqrtu2((ulong*)a + 2);
1788 } else { /* even exponent */
1789 a[1] = evalsigne(1) | evalexpo(0);
1790 t[2] = (long)sqrtu2_1((ulong*)a + 2);
1791 }
1792 t[1] = evalsigne(1) | evalexpo(0);
1793 for (i = 3; i <= l; i++) t[i] = 0;
1794
1795 /* |x| = 2^(ex/2) a, t ~ sqrt(a) */
1796 l--; l1 = 1; av = avma;
1797 while (l1 < l) { /* let t := (t + a/t)/2 */
1798 l1 <<= 1; if (l1 > l) l1 = l;
1799 setlg(a, l1 + 2);
1800 setlg(t, l1 + 2);
1801 affrr(addrr(t, divrr(a,t)), t); setexpo(t, expo(t)-1);
1802 avma = av;
1803 }
1804 affrr(t,y); setexpo(y, expo(y) + (ex>>1));
1805 avma = av0; return y;
1806 }
1807
1808 #endif
1809