1 #line 2 "../src/kernel/none/mp.c"
2 /* $Id: mp.c 8000 2006-08-24 21:51:50Z kb $
3 
4 Copyright (C) 2000-2003 The PARI group.
5 
6 This file is part of the PARI/GP package.
7 
8 PARI/GP is free software; you can redistribute it and/or modify it under the
9 terms of the GNU General Public License as published by the Free Software
10 Foundation. It is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY WHATSOEVER.
12 
13 Check the License for details. You should have received a copy of it, along
14 with the package; see the file 'COPYING'. If not, write to the Free Software
15 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
16 
17 /***********************************************************************/
18 /**								      **/
19 /**		         MULTIPRECISION KERNEL           	      **/
20 /**                                                                   **/
21 /***********************************************************************/
22 #include "pari.h"
23 #include "paripriv.h"
24 #include "../src/kernel/none/tune-gen.h"
25 
pari_kernel_init(void)26 int pari_kernel_init(void) { return 0; } /*nothing to do*/
27 
28 /* NOTE: arguments of "spec" routines (muliispec, addiispec, etc.) aren't
29  * GENs but pairs (long *a, long na) representing a list of digits (in basis
30  * BITS_IN_LONG) : a[0], ..., a[na-1]. [ In ordre to facilitate splitting: no
31  * need to reintroduce codewords ] */
32 
33 /* Normalize a non-negative integer */
34 GEN
int_normalize(GEN x,long known_zero_words)35 int_normalize(GEN x, long known_zero_words)
36 {
37   long lx = lgefint(x);
38   long i = 2 + known_zero_words;
39   for ( ; i < lx; i++)
40     if (x[i])
41     {
42       if (i != 2)
43       {
44         GEN x0 = x;
45         i -= 2; x += i;
46         if (x0 == (GEN)avma) avma = (pari_sp)x;
47         else stackdummy((pari_sp)(x0+i), (pari_sp)x0);
48         lx -= i;
49         x[0] = evaltyp(t_INT) | evallg(lx);
50         x[1] = evalsigne(1) | evallgefint(lx);
51       }
52       return x;
53     }
54   x[1] = evalsigne(0) | evallgefint(2); return x;
55 }
56 
57 /***********************************************************************/
58 /**								      **/
59 /**		         ADDITION / SUBTRACTION          	      **/
60 /**                                                                   **/
61 /***********************************************************************/
62 
63 GEN
setloop(GEN a)64 setloop(GEN a)
65 {
66   GEN z0 = (GEN)avma; (void)cgetg(lgefint(a) + 3, t_VECSMALL);
67   return icopy_av(a, z0); /* two cells of extra space before a */
68 }
69 
70 /* we had a = setloop(?), then some incloops. Reset a to b */
71 GEN
resetloop(GEN a,GEN b)72 resetloop(GEN a, GEN b) {
73   long lb = lgefint(b);
74   a += lgefint(a) - lb;
75   a[0] = evaltyp(t_INT) | evallg(lb);
76   affii(b, a); return a;
77 }
78 
79 /* assume a > 0, initialized by setloop. Do a++ */
80 static GEN
incpos(GEN a)81 incpos(GEN a)
82 {
83   long i, l = lgefint(a);
84   for (i=l-1; i>1; i--)
85     if (++a[i]) return a;
86   l++; a--; /* use extra cell */
87   a[0]=evaltyp(t_INT) | _evallg(l);
88   a[1]=evalsigne(1) | evallgefint(l);
89   a[2]=1; return a;
90 }
91 
92 /* assume a < 0, initialized by setloop. Do a++ */
93 static GEN
incneg(GEN a)94 incneg(GEN a)
95 {
96   long l = lgefint(a)-1;
97   if (a[l]--)
98   {
99     if (l == 2 && !a[2])
100     {
101       a++; /* save one cell */
102       a[0] = evaltyp(t_INT) | _evallg(2);
103       a[1] = evalsigne(0) | evallgefint(2);
104     }
105     return a;
106   }
107   for (l--; l>1; l--)
108     if (a[l]--) break;
109   l++; a++; /* save one cell */
110   a[0] = evaltyp(t_INT) | _evallg(l);
111   a[1] = evalsigne(-1) | evallgefint(l);
112   return a;
113 }
114 
115 /* assume a initialized by setloop. Do a++ */
116 GEN
incloop(GEN a)117 incloop(GEN a)
118 {
119   switch(signe(a))
120   {
121     case 0: a--; /* use extra cell */
122       a[0]=evaltyp(t_INT) | _evallg(3);
123       a[1]=evalsigne(1) | evallgefint(3);
124       a[2]=1; return a;
125     case -1: return incneg(a);
126     default: return incpos(a);
127   }
128 }
129 
130 INLINE GEN
addsispec(long s,GEN x,long nx)131 addsispec(long s, GEN x, long nx)
132 {
133   GEN xd, zd = (GEN)avma;
134   long lz;
135 
136   lz = nx+3; (void)new_chunk(lz);
137   xd = x + nx;
138   *--zd = *--xd + s;
139   if ((ulong)*zd < (ulong)s)
140     for(;;)
141     {
142       if (xd == x) { *--zd = 1; break; } /* enlarge z */
143       *--zd = ((ulong)*--xd) + 1;
144       if (*zd) { lz--; break; }
145     }
146   else lz--;
147   while (xd > x) *--zd = *--xd;
148   *--zd = evalsigne(1) | evallgefint(lz);
149   *--zd = evaltyp(t_INT) | evallg(lz);
150   avma=(pari_sp)zd; return zd;
151 }
152 
153 static GEN
addiispec(GEN x,GEN y,long nx,long ny)154 addiispec(GEN x, GEN y, long nx, long ny)
155 {
156   GEN xd,yd,zd;
157   long lz;
158   LOCAL_OVERFLOW;
159 
160   if (nx < ny) swapspec(x,y, nx,ny);
161   if (ny == 1) return addsispec(*y,x,nx);
162   zd = (GEN)avma;
163   lz = nx+3; (void)new_chunk(lz);
164   xd = x + nx;
165   yd = y + ny;
166   *--zd = addll(*--xd, *--yd);
167   while (yd > y) *--zd = addllx(*--xd, *--yd);
168   if (overflow)
169     for(;;)
170     {
171       if (xd == x) { *--zd = 1; break; } /* enlarge z */
172       *--zd = ((ulong)*--xd) + 1;
173       if (*zd) { lz--; break; }
174     }
175   else lz--;
176   while (xd > x) *--zd = *--xd;
177   *--zd = evalsigne(1) | evallgefint(lz);
178   *--zd = evaltyp(t_INT) | evallg(lz);
179   avma=(pari_sp)zd; return zd;
180 }
181 
182 /* assume x >= y */
183 INLINE GEN
subisspec(GEN x,long s,long nx)184 subisspec(GEN x, long s, long nx)
185 {
186   GEN xd, zd = (GEN)avma;
187   long lz;
188   LOCAL_OVERFLOW;
189 
190   lz = nx+2; (void)new_chunk(lz);
191   xd = x + nx;
192   *--zd = subll(*--xd, s);
193   if (overflow)
194     for(;;)
195     {
196       *--zd = ((ulong)*--xd) - 1;
197       if (*xd) break;
198     }
199   if (xd == x)
200     while (*zd == 0) { zd++; lz--; } /* shorten z */
201   else
202     do  *--zd = *--xd; while (xd > x);
203   *--zd = evalsigne(1) | evallgefint(lz);
204   *--zd = evaltyp(t_INT) | evallg(lz);
205   avma=(pari_sp)zd; return zd;
206 }
207 
208 /* assume x > y */
209 static GEN
subiispec(GEN x,GEN y,long nx,long ny)210 subiispec(GEN x, GEN y, long nx, long ny)
211 {
212   GEN xd,yd,zd;
213   long lz;
214   LOCAL_OVERFLOW;
215 
216   if (ny==1) return subisspec(x,*y,nx);
217   zd = (GEN)avma;
218   lz = nx+2; (void)new_chunk(lz);
219   xd = x + nx;
220   yd = y + ny;
221   *--zd = subll(*--xd, *--yd);
222   while (yd > y) *--zd = subllx(*--xd, *--yd);
223   if (overflow)
224     for(;;)
225     {
226       *--zd = ((ulong)*--xd) - 1;
227       if (*xd) break;
228     }
229   if (xd == x)
230     while (*zd == 0) { zd++; lz--; } /* shorten z */
231   else
232     do  *--zd = *--xd; while (xd > x);
233   *--zd = evalsigne(1) | evallgefint(lz);
234   *--zd = evaltyp(t_INT) | evallg(lz);
235   avma=(pari_sp)zd; return zd;
236 }
237 
238 static void
roundr_up_ip(GEN x,long l)239 roundr_up_ip(GEN x, long l)
240 {
241   long i = l;
242   for(;;)
243   {
244     if (++x[--i]) break;
245     if (i == 2) { x[2] = HIGHBIT; setexpo(x, expo(x)+1); break; }
246   }
247 }
248 
249 void
affir(GEN x,GEN y)250 affir(GEN x, GEN y)
251 {
252   const long s = signe(x), ly = lg(y);
253   long lx, sh, i;
254 
255   if (!s)
256   {
257     y[1] = evalexpo(-bit_accuracy(ly));
258     return;
259   }
260 
261   lx = lgefint(x); sh = bfffo(x[2]);
262   y[1] = evalsigne(s) | evalexpo(bit_accuracy(lx)-sh-1);
263   if (sh) {
264     if (lx <= ly)
265     {
266       for (i=lx; i<ly; i++) y[i]=0;
267       shift_left(y,x,2,lx-1, 0,sh);
268       return;
269     }
270     shift_left(y,x,2,ly-1, x[ly],sh);
271     /* lx > ly: round properly */
272     if ((x[ly]<<sh) & HIGHBIT) roundr_up_ip(y, ly);
273   }
274   else {
275     if (lx <= ly)
276     {
277       for (i=2; i<lx; i++) y[i]=x[i];
278       for (   ; i<ly; i++) y[i]=0;
279       return;
280     }
281     for (i=2; i<ly; i++) y[i]=x[i];
282     /* lx > ly: round properly */
283     if (x[ly] & HIGHBIT) roundr_up_ip(y, ly);
284   }
285 }
286 
287 static GEN
shifti_spec(GEN x,long lx,long n)288 shifti_spec(GEN x, long lx, long n)
289 {
290   long ly, i, m, s = signe(x);
291   GEN y;
292   if (!s) return gen_0;
293   if (!n)
294   {
295     y = cgeti(lx);
296     y[1] = evalsigne(s) | evallgefint(lx);
297     while (--lx > 1) y[lx]=x[lx];
298     return y;
299   }
300   if (n > 0)
301   {
302     GEN z = (GEN)avma;
303     long d = n>>TWOPOTBITS_IN_LONG;
304 
305     ly = lx+d; y = new_chunk(ly);
306     for ( ; d; d--) *--z = 0;
307     m = n & (BITS_IN_LONG-1);
308     if (!m) for (i=2; i<lx; i++) y[i]=x[i];
309     else
310     {
311       register const ulong sh = BITS_IN_LONG - m;
312       shift_left2(y,x, 2,lx-1, 0,m,sh);
313       i = ((ulong)x[2]) >> sh;
314       /* Extend y on the left? */
315       if (i) { ly++; y = new_chunk(1); y[2] = i; }
316     }
317   }
318   else
319   {
320     n = -n;
321     ly = lx - (n>>TWOPOTBITS_IN_LONG);
322     if (ly<3) return gen_0;
323     y = new_chunk(ly);
324     m = n & (BITS_IN_LONG-1);
325     if (m) {
326       shift_right(y,x, 2,ly, 0,m);
327       if (y[2] == 0)
328       {
329         if (ly==3) { avma = (pari_sp)(y+3); return gen_0; }
330         ly--; avma = (pari_sp)(++y);
331       }
332     } else {
333       for (i=2; i<ly; i++) y[i]=x[i];
334     }
335   }
336   y[1] = evalsigne(s)|evallgefint(ly);
337   y[0] = evaltyp(t_INT)|evallg(ly); return y;
338 }
339 
340 GEN
shifti(GEN x,long n)341 shifti(GEN x, long n)
342 {
343   return shifti_spec(x, lgefint(x), n);
344 }
345 
346 GEN
ishiftr_lg(GEN x,long lx,long n)347 ishiftr_lg(GEN x, long lx, long n)
348 { /*This is a kludge since x is not an integer*/
349   return shifti_spec(x, lx, n);
350 }
351 
352 GEN
truncr(GEN x)353 truncr(GEN x)
354 {
355   long d,e,i,s,m;
356   GEN y;
357 
358   if ((s=signe(x)) == 0 || (e=expo(x)) < 0) return gen_0;
359   d = (e>>TWOPOTBITS_IN_LONG) + 3;
360   m = e & (BITS_IN_LONG-1);
361   if (d > lg(x)) pari_err(precer, "truncr (precision loss in truncation)");
362 
363   y=cgeti(d); y[1] = evalsigne(s) | evallgefint(d);
364   if (++m == BITS_IN_LONG)
365     for (i=2; i<d; i++) y[i]=x[i];
366   else
367   {
368     register const ulong sh = BITS_IN_LONG - m;
369     shift_right2(y,x, 2,d,0, sh,m);
370   }
371   return y;
372 }
373 
374 /* integral part */
375 GEN
floorr(GEN x)376 floorr(GEN x)
377 {
378   long d,e,i,lx,m;
379   GEN y;
380 
381   if (signe(x) >= 0) return truncr(x);
382   if ((e=expo(x)) < 0) return gen_m1;
383   d = (e>>TWOPOTBITS_IN_LONG) + 3;
384   m = e & (BITS_IN_LONG-1);
385   lx=lg(x); if (d>lx) pari_err(precer, "floorr (precision loss in truncation)");
386   y = new_chunk(d);
387   if (++m == BITS_IN_LONG)
388   {
389     for (i=2; i<d; i++) y[i]=x[i];
390     i=d; while (i<lx && !x[i]) i++;
391     if (i==lx) goto END;
392   }
393   else
394   {
395     register const ulong sh = BITS_IN_LONG - m;
396     shift_right2(y,x, 2,d,0, sh,m);
397     if (x[d-1]<<m == 0)
398     {
399       i=d; while (i<lx && !x[i]) i++;
400       if (i==lx) goto END;
401     }
402   }
403   /* set y:=y+1 */
404   for (i=d-1; i>=2; i--) { y[i]++; if (y[i]) goto END; }
405   y=new_chunk(1); y[2]=1; d++;
406 END:
407   y[1] = evalsigne(-1) | evallgefint(d);
408   y[0] = evaltyp(t_INT) | evallg(d); return y;
409 }
410 
411 INLINE int
absi_cmp_lg(GEN x,GEN y,long l)412 absi_cmp_lg(GEN x, GEN y, long l)
413 {
414   long i=2;
415   while (i<l && x[i]==y[i]) i++;
416   if (i==l) return 0;
417   return ((ulong)x[i] > (ulong)y[i])? 1: -1;
418 }
419 
420 INLINE int
absi_equal_lg(GEN x,GEN y,long l)421 absi_equal_lg(GEN x, GEN y, long l)
422 {
423   long i = l-1; while (i>1 && x[i]==y[i]) i--;
424   return i==1;
425 }
426 
427 /***********************************************************************/
428 /**								      **/
429 /**		          MULTIPLICATION                 	      **/
430 /**                                                                   **/
431 /***********************************************************************/
432 GEN
mulss(long x,long y)433 mulss(long x, long y)
434 {
435   long s,p1;
436   GEN z;
437   LOCAL_HIREMAINDER;
438 
439   if (!x || !y) return gen_0;
440   if (x<0) { s = -1; x = -x; } else s=1;
441   if (y<0) { s = -s; y = -y; }
442   p1 = mulll(x,y);
443   if (hiremainder)
444   {
445     z=cgeti(4); z[1] = evalsigne(s) | evallgefint(4);
446     z[2]=hiremainder; z[3]=p1; return z;
447   }
448   z=cgeti(3); z[1] = evalsigne(s) | evallgefint(3);
449   z[2]=p1; return z;
450 }
451 
452 GEN
muluu(ulong x,ulong y)453 muluu(ulong x, ulong y)
454 {
455   long p1;
456   GEN z;
457   LOCAL_HIREMAINDER;
458 
459   if (!x || !y) return gen_0;
460   p1 = mulll(x,y);
461   if (hiremainder)
462   {
463     z=cgeti(4); z[1] = evalsigne(1) | evallgefint(4);
464     z[2]=hiremainder; z[3]=p1; return z;
465   }
466   z=cgeti(3); z[1] = evalsigne(1) | evallgefint(3);
467   z[2]=p1; return z;
468 }
469 
470 /* assume ny > 0 */
471 INLINE GEN
muluispec(ulong x,GEN y,long ny)472 muluispec(ulong x, GEN y, long ny)
473 {
474   GEN yd, z = (GEN)avma;
475   long lz = ny+3;
476   LOCAL_HIREMAINDER;
477 
478   (void)new_chunk(lz);
479   yd = y + ny; *--z = mulll(x, *--yd);
480   while (yd > y) *--z = addmul(x,*--yd);
481   if (hiremainder) *--z = hiremainder; else lz--;
482   *--z = evalsigne(1) | evallgefint(lz);
483   *--z = evaltyp(t_INT) | evallg(lz);
484   avma=(pari_sp)z; return z;
485 }
486 
487 /* a + b*|Y| */
488 GEN
addumului(ulong a,ulong b,GEN Y)489 addumului(ulong a, ulong b, GEN Y)
490 {
491   GEN yd,y,z;
492   long ny,lz;
493   LOCAL_HIREMAINDER;
494   LOCAL_OVERFLOW;
495 
496   if (!signe(Y)) return utoi(a);
497 
498   y = Y+2; z = (GEN)avma;
499   ny = lgefint(Y)-2;
500   lz = ny+3;
501 
502   (void)new_chunk(lz);
503   yd = y + ny; *--z = addll(a, mulll(b, *--yd));
504   if (overflow) hiremainder++; /* can't overflow */
505   while (yd > y) *--z = addmul(b,*--yd);
506   if (hiremainder) *--z = hiremainder; else lz--;
507   *--z = evalsigne(1) | evallgefint(lz);
508   *--z = evaltyp(t_INT) | evallg(lz);
509   avma=(pari_sp)z; return z;
510 }
511 
512 GEN muliispec(GEN a, GEN b, long na, long nb);
513 /*#define KARAMULR_VARIANT*/
514 #define muliispec_mirror muliispec
515 
516 /***********************************************************************/
517 /**								      **/
518 /**		          DIVISION                       	      **/
519 /**                                                                   **/
520 /***********************************************************************/
521 
522 ulong
umodiu(GEN y,ulong x)523 umodiu(GEN y, ulong x)
524 {
525   long sy=signe(y),ly,i;
526   LOCAL_HIREMAINDER;
527 
528   if (!x) pari_err(gdiver);
529   if (!sy) return 0;
530   ly = lgefint(y);
531   if (x <= (ulong)y[2]) hiremainder=0;
532   else
533   {
534     if (ly==3) return (sy > 0)? (ulong)y[2]: x - (ulong)y[2];
535     hiremainder=y[2]; ly--; y++;
536   }
537   for (i=2; i<ly; i++) (void)divll(y[i],x);
538   if (!hiremainder) return 0;
539   return (sy > 0)? hiremainder: x - hiremainder;
540 }
541 
542 /* return |y| \/ x */
543 GEN
diviu_rem(GEN y,ulong x,ulong * rem)544 diviu_rem(GEN y, ulong x, ulong *rem)
545 {
546   long ly,i;
547   GEN z;
548   LOCAL_HIREMAINDER;
549 
550   if (!x) pari_err(gdiver);
551   if (!signe(y)) { *rem = 0; return gen_0; }
552 
553   ly = lgefint(y);
554   if (x <= (ulong)y[2]) hiremainder=0;
555   else
556   {
557     if (ly==3) { *rem = (ulong)y[2]; return gen_0; }
558     hiremainder=y[2]; ly--; y++;
559   }
560   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(1);
561   for (i=2; i<ly; i++) z[i]=divll(y[i],x);
562   *rem = hiremainder; return z;
563 }
564 
565 GEN
divis_rem(GEN y,long x,long * rem)566 divis_rem(GEN y, long x, long *rem)
567 {
568   long sy=signe(y),ly,s,i;
569   GEN z;
570   LOCAL_HIREMAINDER;
571 
572   if (!x) pari_err(gdiver);
573   if (!sy) { *rem=0; return gen_0; }
574   if (x<0) { s = -sy; x = -x; } else s = sy;
575 
576   ly = lgefint(y);
577   if ((ulong)x <= (ulong)y[2]) hiremainder=0;
578   else
579   {
580     if (ly==3) { *rem = itos(y); return gen_0; }
581     hiremainder=y[2]; ly--; y++;
582   }
583   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
584   for (i=2; i<ly; i++) z[i]=divll(y[i],x);
585   if (sy<0) hiremainder = - ((long)hiremainder);
586   *rem = (long)hiremainder; return z;
587 }
588 
589 GEN
divis(GEN y,long x)590 divis(GEN y, long x)
591 {
592   long sy=signe(y),ly,s,i;
593   GEN z;
594   LOCAL_HIREMAINDER;
595 
596   if (!x) pari_err(gdiver);
597   if (!sy) return gen_0;
598   if (x<0) { s = -sy; x = -x; } else s = sy;
599 
600   ly = lgefint(y);
601   if ((ulong)x <= (ulong)y[2]) hiremainder=0;
602   else
603   {
604     if (ly==3) return gen_0;
605     hiremainder=y[2]; ly--; y++;
606   }
607   z = cgeti(ly); z[1] = evallgefint(ly) | evalsigne(s);
608   for (i=2; i<ly; i++) z[i]=divll(y[i],x);
609   return z;
610 }
611 
612 GEN
divrr(GEN x,GEN y)613 divrr(GEN x, GEN y)
614 {
615   long sx=signe(x), sy=signe(y), lx,ly,lr,e,i,j;
616   ulong y0,y1;
617   GEN r, r1;
618 
619   if (!sy) pari_err(gdiver);
620   e = expo(x) - expo(y);
621   if (!sx) return real_0_bit(e);
622   if (sy<0) sx = -sx;
623 
624   lx=lg(x); ly=lg(y);
625   if (ly==3)
626   {
627     ulong k = x[2], l = (lx>3)? x[3]: 0;
628     LOCAL_HIREMAINDER;
629     if (k < (ulong)y[2]) e--;
630     else
631     {
632       l >>= 1; if (k&1) l |= HIGHBIT;
633       k >>= 1;
634     }
635     r = cgetr(3); r[1] = evalsigne(sx) | evalexpo(e);
636     hiremainder=k; r[2]=divll(l,y[2]); return r;
637   }
638 
639   lr = min(lx,ly); r = new_chunk(lr);
640   r1 = r-1;
641   r1[1] = 0; for (i=2; i<lr; i++) r1[i]=x[i];
642   r1[lr] = (lx>ly)? x[lr]: 0;
643   y0 = y[2]; y1 = y[3];
644   for (i=0; i<lr-1; i++)
645   { /* r1 = r + (i-1) */
646     ulong k, qp;
647     LOCAL_HIREMAINDER;
648     LOCAL_OVERFLOW;
649 
650     if ((ulong)r1[1] == y0)
651     {
652       qp = MAXULONG; k = addll(y0,r1[2]);
653     }
654     else
655     {
656       if ((ulong)r1[1] > y0) /* can't happen if i=0 */
657       {
658         GEN y1 = y+1;
659         j = lr-i; r1[j] = subll(r1[j],y1[j]);
660 	for (j--; j>0; j--) r1[j] = subllx(r1[j],y1[j]);
661 	j=i; do r[--j]++; while (j && !r[j]);
662       }
663       hiremainder = r1[1]; overflow = 0;
664       qp = divll(r1[2],y0); k = hiremainder;
665     }
666     if (!overflow)
667     {
668       long k3 = subll(mulll(qp,y1), r1[3]);
669       long k4 = subllx(hiremainder,k);
670       while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
671     }
672     j = lr-i+1;
673     if (j<ly) (void)mulll(qp,y[j]); else { hiremainder = 0 ; j = ly; }
674     for (j--; j>1; j--)
675     {
676       r1[j] = subll(r1[j], addmul(qp,y[j]));
677       hiremainder += overflow;
678     }
679     if ((ulong)r1[1] != hiremainder)
680     {
681       if ((ulong)r1[1] < hiremainder)
682       {
683         qp--;
684         j = lr-i-(lr-i>=ly); r1[j] = addll(r1[j], y[j]);
685         for (j--; j>1; j--) r1[j] = addllx(r1[j], y[j]);
686       }
687       else
688       {
689 	r1[1] -= hiremainder;
690 	while (r1[1])
691 	{
692 	  qp++; if (!qp) { j=i; do r[--j]++; while (j && !r[j]); }
693           j = lr-i-(lr-i>=ly); r1[j] = subll(r1[j],y[j]);
694           for (j--; j>1; j--) r1[j] = subllx(r1[j],y[j]);
695 	  r1[1] -= overflow;
696 	}
697       }
698     }
699     *++r1 = qp;
700   }
701   /* i = lr-1 */
702   /* round correctly */
703   if ((ulong)r1[1] > (y0>>1))
704   {
705     j=i; do r[--j]++; while (j && !r[j]);
706   }
707   r1 = r-1; for (j=i; j>=2; j--) r[j]=r1[j];
708   if (r[0] == 0) e--;
709   else if (r[0] == 1) { shift_right(r,r, 2,lr, 1,1); }
710   else { /* possible only when rounding up to 0x2 0x0 ... */
711     r[2] = HIGHBIT; e++;
712   }
713   r[0] = evaltyp(t_REAL)|evallg(lr);
714   r[1] = evalsigne(sx) | evalexpo(e);
715   return r;
716 }
717 
718 GEN
divri(GEN x,GEN y)719 divri(GEN x, GEN y)
720 {
721   long lx, s = signe(y);
722   pari_sp av;
723   GEN z;
724 
725   if (!s) pari_err(gdiver);
726   if (!signe(x)) return real_0_bit(expo(x) - expi(y));
727   if (!is_bigint(y)) return divrs(x, s>0? y[2]: -y[2]);
728 
729   lx = lg(x); z = cgetr(lx); av = avma;
730   affrr(divrr(x, itor(y, lx+1)), z);
731   avma = av; return z;
732 }
733 
734 /* Integer division x / y: such that sign(r) = sign(x)
735  *   if z = ONLY_REM return remainder, otherwise return quotient
736  *   if z != NULL set *z to remainder
737  *   *z is the last object on stack (and thus can be disposed of with cgiv
738  *   instead of gerepile)
739  * If *z is zero, we put gen_0 here and no copy.
740  * space needed: lx + ly */
741 GEN
dvmdii(GEN x,GEN y,GEN * z)742 dvmdii(GEN x, GEN y, GEN *z)
743 {
744   long sx=signe(x),sy=signe(y);
745   long lx, ly, lz, i, j, sh, lq, lr;
746   pari_sp av;
747   ulong y0,y1, *xd,*rd,*qd;
748   GEN q, r, r1;
749 
750   if (!sy) { if (z == ONLY_REM && !sx) return gen_0; pari_err(gdiver); }
751   if (!sx)
752   {
753     if (!z || z == ONLY_REM) return gen_0;
754     *z=gen_0; return gen_0;
755   }
756   lx=lgefint(x);
757   ly=lgefint(y); lz=lx-ly;
758   if (lz <= 0)
759   {
760     if (lz == 0)
761     {
762       for (i=2; i<lx; i++)
763         if (x[i] != y[i])
764         {
765           if ((ulong)x[i] > (ulong)y[i]) goto DIVIDE;
766           goto TRIVIAL;
767         }
768       if (z == ONLY_REM) return gen_0;
769       if (z) *z = gen_0;
770       if (sx < 0) sy = -sy;
771       return stoi(sy);
772     }
773 TRIVIAL:
774     if (z == ONLY_REM) return icopy(x);
775     if (z) *z = icopy(x);
776     return gen_0;
777   }
778 DIVIDE: /* quotient is non-zero */
779   av=avma; if (sx<0) sy = -sy;
780   if (ly==3)
781   {
782     LOCAL_HIREMAINDER;
783     y0 = y[2];
784     if (y0 <= (ulong)x[2]) hiremainder=0;
785     else
786     {
787       hiremainder = x[2]; lx--; x++;
788     }
789     q = new_chunk(lx); for (i=2; i<lx; i++) q[i]=divll(x[i],y0);
790     if (z == ONLY_REM)
791     {
792       avma=av; if (!hiremainder) return gen_0;
793       r=cgeti(3);
794       r[1] = evalsigne(sx) | evallgefint(3);
795       r[2]=hiremainder; return r;
796     }
797     q[1] = evalsigne(sy) | evallgefint(lx);
798     q[0] = evaltyp(t_INT) | evallg(lx);
799     if (!z) return q;
800     if (!hiremainder) { *z=gen_0; return q; }
801     r=cgeti(3);
802     r[1] = evalsigne(sx) | evallgefint(3);
803     r[2] = hiremainder; *z=r; return q;
804   }
805 
806   r1 = new_chunk(lx); sh = bfffo(y[2]);
807   if (sh)
808   { /* normalize so that highbit(y) = 1 (shift left x and y by sh bits)*/
809     register const ulong m = BITS_IN_LONG - sh;
810     r = new_chunk(ly);
811     shift_left2(r, y,2,ly-1, 0,sh,m); y = r;
812     shift_left2(r1,x,2,lx-1, 0,sh,m);
813     r1[1] = ((ulong)x[2]) >> m;
814   }
815   else
816   {
817     r1[1] = 0; for (j=2; j<lx; j++) r1[j] = x[j];
818   }
819   x = r1;
820   y0 = y[2]; y1 = y[3];
821   for (i=0; i<=lz; i++)
822   { /* r1 = x + i */
823     ulong k, qp;
824     LOCAL_HIREMAINDER;
825     LOCAL_OVERFLOW;
826 
827     if ((ulong)r1[1] == y0)
828     {
829       qp = MAXULONG; k = addll(y0,r1[2]);
830     }
831     else
832     {
833       hiremainder = r1[1]; overflow = 0;
834       qp = divll(r1[2],y0); k = hiremainder;
835     }
836     if (!overflow)
837     {
838       long k3 = subll(mulll(qp,y1), r1[3]);
839       long k4 = subllx(hiremainder,k);
840       while (!overflow && k4) { qp--; k3 = subll(k3,y1); k4 = subllx(k4,y0); }
841     }
842     hiremainder = 0; j = ly;
843     for (j--; j>1; j--)
844     {
845       r1[j] = subll(r1[j], addmul(qp,y[j]));
846       hiremainder += overflow;
847     }
848     if ((ulong)r1[1] < hiremainder)
849     {
850       qp--;
851       j = ly-1; r1[j] = addll(r1[j],y[j]);
852       for (j--; j>1; j--) r1[j] = addllx(r1[j],y[j]);
853     }
854     *++r1 = qp;
855   }
856 
857   lq = lz+2;
858   if (!z)
859   {
860     qd = (ulong*)av;
861     xd = (ulong*)(x + lq);
862     if (x[1]) { lz++; lq++; }
863     while (lz--) *--qd = *--xd;
864     *--qd = evalsigne(sy) | evallgefint(lq);
865     *--qd = evaltyp(t_INT) | evallg(lq);
866     avma = (pari_sp)qd; return (GEN)qd;
867   }
868 
869   j=lq; while (j<lx && !x[j]) j++;
870   lz = lx-j;
871   if (z == ONLY_REM)
872   {
873     if (lz==0) { avma = av; return gen_0; }
874     rd = (ulong*)av; lr = lz+2;
875     xd = (ulong*)(x + lx);
876     if (!sh) while (lz--) *--rd = *--xd;
877     else
878     { /* shift remainder right by sh bits */
879       const ulong shl = BITS_IN_LONG - sh;
880       ulong l;
881       xd--;
882       while (--lz) /* fill r[3..] */
883       {
884         l = *xd >> sh;
885         *--rd = l | (*--xd << shl);
886       }
887       l = *xd >> sh;
888       if (l) *--rd = l; else lr--;
889     }
890     *--rd = evalsigne(sx) | evallgefint(lr);
891     *--rd = evaltyp(t_INT) | evallg(lr);
892     avma = (pari_sp)rd; return (GEN)rd;
893   }
894 
895   lr = lz+2;
896   rd = NULL; /* gcc -Wall */
897   if (lz)
898   { /* non zero remainder: initialize rd */
899     xd = (ulong*)(x + lx);
900     if (!sh)
901     {
902       rd = (ulong*)avma; (void)new_chunk(lr);
903       while (lz--) *--rd = *--xd;
904     }
905     else
906     { /* shift remainder right by sh bits */
907       const ulong shl = BITS_IN_LONG - sh;
908       ulong l;
909       rd = (ulong*)x; /* overwrite shifted y */
910       xd--;
911       while (--lz)
912       {
913         l = *xd >> sh;
914         *--rd = l | (*--xd << shl);
915       }
916       l = *xd >> sh;
917       if (l) *--rd = l; else lr--;
918     }
919     *--rd = evalsigne(sx) | evallgefint(lr);
920     *--rd = evaltyp(t_INT) | evallg(lr);
921     rd += lr;
922   }
923   qd = (ulong*)av;
924   xd = (ulong*)(x + lq);
925   if (x[1]) lq++;
926   j = lq-2; while (j--) *--qd = *--xd;
927   *--qd = evalsigne(sy) | evallgefint(lq);
928   *--qd = evaltyp(t_INT) | evallg(lq);
929   q = (GEN)qd;
930   if (lr==2) *z = gen_0;
931   else
932   { /* rd has been properly initialized: we had lz > 0 */
933     while (lr--) *--qd = *--rd;
934     *z = (GEN)qd;
935   }
936   avma = (pari_sp)qd; return q;
937 }
938 
939 /* Montgomery reduction.
940  * N has k words, assume T >= 0 has less than 2k.
941  * Return res := T / B^k mod N, where B = 2^BIL
942  * such that 0 <= res < T/B^k + N  and  res has less than k words */
943 GEN
red_montgomery(GEN T,GEN N,ulong inv)944 red_montgomery(GEN T, GEN N, ulong inv)
945 {
946   pari_sp av;
947   GEN Te, Td, Ne, Nd, scratch;
948   ulong i, j, m, t, d, k = lgefint(N)-2;
949   int carry;
950   LOCAL_HIREMAINDER;
951   LOCAL_OVERFLOW;
952 
953   if (k == 0) return gen_0;
954   d = lgefint(T)-2; /* <= 2*k */
955 #ifdef DEBUG
956   if (d > 2*k) pari_err(bugparier,"red_montgomery");
957 #endif
958   if (k == 1)
959   { /* as below, special cased for efficiency */
960     ulong n = (ulong)N[2];
961     t = (ulong)T[d+1];
962     m = t * inv;
963     (void)addll(mulll(m, n), t); /* = 0 */
964     t = hiremainder + overflow;
965     if (d == 2)
966     {
967       t = addll((ulong)T[2], t);
968       if (overflow) t -= n; /* t > n doesn't fit in 1 word */
969     }
970     return utoi(t);
971   }
972   /* assume k >= 2 */
973   av = avma; scratch = new_chunk(k<<1); /* >= k + 2: result fits */
974 
975   /* copy T to scratch space (pad with zeroes to 2k words) */
976   Td = (GEN)av;
977   Te = T + (d+2);
978   for (i=0; i < d     ; i++) *--Td = *--Te;
979   for (   ; i < (k<<1); i++) *--Td = 0;
980 
981   Te = (GEN)av; /* 1 beyond end of T mantissa */
982   Ne = N + k+2; /* 1 beyond end of N mantissa */
983 
984   carry = 0;
985   for (i=0; i<k; i++) /* set T := T/B nod N, k times */
986   {
987     Td = Te; /* one beyond end of (new) T mantissa */
988     Nd = Ne;
989     m = *--Td * inv; /* solve T + m N = O(B) */
990 
991     /* set T := (T + mN) / B */
992     Te = Td;
993     (void)addll(mulll(m, *--Nd), *Td); /* = 0 */
994     for (j=1; j<k; j++)
995     {
996       hiremainder += overflow;
997       t = addll(addmul(m, *--Nd), *--Td); *Td = t;
998     }
999     overflow += hiremainder;
1000     t = addll(overflow, *--Td); *Td = t + carry;
1001     carry = (overflow || (carry && *Td == 0));
1002   }
1003   if (carry)
1004   { /* Td > N overflows (k+1 words), set Td := Td - N */
1005     Td = Te;
1006     Nd = Ne;
1007     t = subll(*--Td, *--Nd); *Td = t;
1008     while (Td > scratch) { t = subllx(*--Td, *--Nd); *Td = t; }
1009   }
1010 
1011   /* copy result */
1012   Td = (GEN)av;
1013   while (! *scratch && Te > scratch) scratch++; /* strip leading 0s */
1014   while (Te > scratch) *--Td = *--Te;
1015   k = (GEN)av - Td; if (!k) return gen_0;
1016   k += 2;
1017   *--Td = evalsigne(1) | evallgefint(k);
1018   *--Td = evaltyp(t_INT) | evallg(k);
1019 #ifdef DEBUG
1020 {
1021   long l = lgefint(N)-2, s = BITS_IN_LONG*l;
1022   GEN R = int2n(s);
1023   GEN res = remii(mulii(T, Fp_inv(R, N)), N);
1024   if (k > lgefint(N)
1025     || !equalii(remii(Td,N),res)
1026     || cmpii(Td, addii(shifti(T, -s), N)) >= 0) pari_err(bugparier,"red_montgomery");
1027 }
1028 #endif
1029   avma = (pari_sp)Td; return Td;
1030 }
1031 
1032 /* EXACT INTEGER DIVISION */
1033 
1034 /* assume xy>0, the division is exact and y is odd. Destroy x */
1035 static GEN
diviuexact_i(GEN x,ulong y)1036 diviuexact_i(GEN x, ulong y)
1037 {
1038   long i, lz, lx;
1039   ulong q, yinv;
1040   GEN z, z0, x0, x0min;
1041 
1042   if (y == 1) return icopy(x);
1043   lx = lgefint(x);
1044   if (lx == 3) return utoipos((ulong)x[2] / y);
1045   yinv = invrev(y);
1046   lz = (y <= (ulong)x[2]) ? lx : lx-1;
1047   z = new_chunk(lz);
1048   z0 = z + lz;
1049   x0 = x + lx; x0min = x + lx-lz+2;
1050 
1051   while (x0 > x0min)
1052   {
1053     *--z0 = q = yinv*((ulong)*--x0); /* i-th quotient */
1054     if (!q) continue;
1055     /* x := x - q * y */
1056     { /* update neither lowest word (could set it to 0) nor highest ones */
1057       register GEN x1 = x0 - 1;
1058       LOCAL_HIREMAINDER;
1059       (void)mulll(q,y);
1060       if (hiremainder)
1061       {
1062         if ((ulong)*x1 < hiremainder)
1063         {
1064           *x1 -= hiremainder;
1065           do (*--x1)--; while ((ulong)*x1 == MAXULONG);
1066         }
1067         else
1068           *x1 -= hiremainder;
1069       }
1070     }
1071   }
1072   i=2; while(!z[i]) i++;
1073   z += i-2; lz -= i-2;
1074   z[0] = evaltyp(t_INT)|evallg(lz);
1075   z[1] = evalsigne(1)|evallg(lz);
1076   avma = (pari_sp)z; return z;
1077 }
1078 
1079 /* assume y != 0 and the division is exact */
1080 GEN
diviuexact(GEN x,ulong y)1081 diviuexact(GEN x, ulong y)
1082 {
1083   pari_sp av;
1084   long lx, vy, s = signe(x);
1085   GEN z;
1086 
1087   if (!s) return gen_0;
1088   if (y == 1) return icopy(x);
1089   lx = lgefint(x);
1090   if (lx == 3) {
1091     ulong q = (ulong)x[2] / y;
1092     return (s > 0)? utoipos(q): utoineg(q);
1093   }
1094   av = avma; (void)new_chunk(lx); vy = vals(y);
1095   if (vy) {
1096     y >>= vy;
1097     if (y == 1) { avma = av; return shifti(x, -vy); }
1098     x = shifti(x, -vy);
1099     if (lx == 3) {
1100       ulong q = (ulong)x[2] / y;
1101       avma = av;
1102       return (s > 0)? utoipos(q): utoineg(q);
1103     }
1104   } else x = icopy(x);
1105   avma = av;
1106   z = diviuexact_i(x, y);
1107   setsigne(z, s); return z;
1108 }
1109 
1110 /* Find z such that x=y*z, knowing that y | x (unchecked)
1111  * Method: y0 z0 = x0 mod B = 2^BITS_IN_LONG ==> z0 = 1/y0 mod B.
1112  *    Set x := (x - z0 y) / B, updating only relevant words, and repeat */
1113 GEN
diviiexact(GEN x,GEN y)1114 diviiexact(GEN x, GEN y)
1115 {
1116   long lx, ly, lz, vy, i, ii, sx = signe(x), sy = signe(y);
1117   pari_sp av;
1118   ulong y0inv,q;
1119   GEN z;
1120 
1121   if (!sy) pari_err(gdiver);
1122   if (!sx) return gen_0;
1123   lx = lgefint(x);
1124   if (lx == 3) {
1125     q = (ulong)x[2] / (ulong)y[2];
1126     return (sx+sy) ? utoipos(q): utoineg(q);
1127   }
1128   vy = vali(y); av = avma;
1129   (void)new_chunk(lx); /* enough room for z */
1130   if (vy)
1131   { /* make y odd */
1132     y = shifti(y,-vy);
1133     x = shifti(x,-vy); lx = lgefint(x);
1134   }
1135   else x = icopy(x); /* necessary because we destroy x */
1136   avma = av; /* will erase our x,y when exiting */
1137   /* now y is odd */
1138   ly = lgefint(y);
1139   if (ly == 3)
1140   {
1141     x = diviuexact_i(x,(ulong)y[2]); /* x != 0 */
1142     setsigne(x, (sx+sy)? 1: -1); return x;
1143   }
1144   y0inv = invrev(y[ly-1]);
1145   i=2; while (i<ly && y[i]==x[i]) i++;
1146   lz = (i==ly || (ulong)y[i] < (ulong)x[i]) ? lx-ly+3 : lx-ly+2;
1147   z = new_chunk(lz);
1148 
1149   y += ly - 1; /* now y[-i] = i-th word of y */
1150   for (ii=lx-1,i=lz-1; i>=2; i--,ii--)
1151   {
1152     long limj;
1153     LOCAL_HIREMAINDER;
1154     LOCAL_OVERFLOW;
1155 
1156     z[i] = q = y0inv*((ulong)x[ii]); /* i-th quotient */
1157     if (!q) continue;
1158 
1159     /* x := x - q * y */
1160     (void)mulll(q,y[0]); limj = max(lx - lz, ii+3-ly);
1161     { /* update neither lowest word (could set it to 0) nor highest ones */
1162       register GEN x0 = x + (ii - 1), y0 = y - 1, xlim = x + limj;
1163       for (; x0 >= xlim; x0--, y0--)
1164       {
1165         *x0 = subll(*x0, addmul(q,*y0));
1166         hiremainder += overflow;
1167       }
1168       if (hiremainder && limj != lx - lz)
1169       {
1170         if ((ulong)*x0 < hiremainder)
1171         {
1172           *x0 -= hiremainder;
1173           do (*--x0)--; while ((ulong)*x0 == MAXULONG);
1174         }
1175         else
1176           *x0 -= hiremainder;
1177       }
1178     }
1179   }
1180   i=2; while(!z[i]) i++;
1181   z += i-2; lz -= (i-2);
1182   z[0] = evaltyp(t_INT)|evallg(lz);
1183   z[1] = evalsigne((sx+sy)? 1: -1) | evallg(lz);
1184   avma = (pari_sp)z; return z;
1185 }
1186 
1187 
1188 /********************************************************************/
1189 /**                                                                **/
1190 /**               INTEGER MULTIPLICATION (KARATSUBA)               **/
1191 /**                                                                **/
1192 /********************************************************************/
1193 /* nx >= ny = num. of digits of x, y (not GEN, see mulii) */
1194 INLINE GEN
muliispec_basecase(GEN x,GEN y,long nx,long ny)1195 muliispec_basecase(GEN x, GEN y, long nx, long ny)
1196 {
1197   GEN z2e,z2d,yd,xd,ye,zd;
1198   long p1,lz;
1199   LOCAL_HIREMAINDER;
1200 
1201   if (!ny) return gen_0;
1202   zd = (GEN)avma; lz = nx+ny+2;
1203   (void)new_chunk(lz);
1204   xd = x + nx;
1205   yd = y + ny;
1206   ye = yd; p1 = *--xd;
1207 
1208   *--zd = mulll(p1, *--yd); z2e = zd;
1209   while (yd > y) *--zd = addmul(p1, *--yd);
1210   *--zd = hiremainder;
1211 
1212   while (xd > x)
1213   {
1214     LOCAL_OVERFLOW;
1215     yd = ye; p1 = *--xd;
1216 
1217     z2d = --z2e;
1218     *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1219     while (yd > y)
1220     {
1221       hiremainder += overflow;
1222       *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1223     }
1224     *--zd = hiremainder + overflow;
1225   }
1226   if (*zd == 0) { zd++; lz--; } /* normalize */
1227   *--zd = evalsigne(1) | evallgefint(lz);
1228   *--zd = evaltyp(t_INT) | evallg(lz);
1229   avma=(pari_sp)zd; return zd;
1230 }
1231 
1232 INLINE GEN
sqrispec_basecase(GEN x,long nx)1233 sqrispec_basecase(GEN x, long nx)
1234 {
1235   GEN z2e,z2d,yd,xd,zd,x0,z0;
1236   long p1,lz;
1237   LOCAL_HIREMAINDER;
1238   LOCAL_OVERFLOW;
1239 
1240   if (!nx) return gen_0;
1241   zd = (GEN)avma; lz = (nx+1) << 1;
1242   z0 = new_chunk(lz);
1243   if (nx == 1)
1244   {
1245     *--zd = mulll(*x, *x);
1246     *--zd = hiremainder; goto END;
1247   }
1248   xd = x + nx;
1249 
1250   /* compute double products --> zd */
1251   p1 = *--xd; yd = xd; --zd;
1252   *--zd = mulll(p1, *--yd); z2e = zd;
1253   while (yd > x) *--zd = addmul(p1, *--yd);
1254   *--zd = hiremainder;
1255 
1256   x0 = x+1;
1257   while (xd > x0)
1258   {
1259     LOCAL_OVERFLOW;
1260     p1 = *--xd; yd = xd;
1261 
1262     z2e -= 2; z2d = z2e;
1263     *z2d = addll(mulll(p1, *--yd), *z2d); z2d--;
1264     while (yd > x)
1265     {
1266       hiremainder += overflow;
1267       *z2d = addll(addmul(p1, *--yd), *z2d); z2d--;
1268     }
1269     *--zd = hiremainder + overflow;
1270   }
1271   /* multiply zd by 2 (put result in zd - 1) */
1272   zd[-1] = ((*zd & HIGHBIT) != 0);
1273   shift_left(zd, zd, 0, (nx<<1)-3, 0, 1);
1274 
1275   /* add the squares */
1276   xd = x + nx; zd = z0 + lz;
1277   p1 = *--xd;
1278   zd--; *zd = mulll(p1,p1);
1279   zd--; *zd = addll(hiremainder, *zd);
1280   while (xd > x)
1281   {
1282     p1 = *--xd;
1283     zd--; *zd = addll(mulll(p1,p1)+ overflow, *zd);
1284     zd--; *zd = addll(hiremainder + overflow, *zd);
1285   }
1286 
1287 END:
1288   if (*zd == 0) { zd++; lz--; } /* normalize */
1289   *--zd = evalsigne(1) | evallgefint(lz);
1290   *--zd = evaltyp(t_INT) | evallg(lz);
1291   avma=(pari_sp)zd; return zd;
1292 }
1293 
1294 /* return (x shifted left d words) + y. Assume d > 0, x > 0 and y >= 0 */
1295 static GEN
addshiftw(GEN x,GEN y,long d)1296 addshiftw(GEN x, GEN y, long d)
1297 {
1298   GEN z,z0,y0,yd, zd = (GEN)avma;
1299   long a,lz,ly = lgefint(y);
1300 
1301   z0 = new_chunk(d);
1302   a = ly-2; yd = y+ly;
1303   if (a >= d)
1304   {
1305     y0 = yd-d; while (yd > y0) *--zd = *--yd; /* copy last d words of y */
1306     a -= d;
1307     if (a)
1308       z = addiispec(x+2, y+2, lgefint(x)-2, a);
1309     else
1310       z = icopy(x);
1311   }
1312   else
1313   {
1314     y0 = yd-a; while (yd > y0) *--zd = *--yd; /* copy last a words of y */
1315     while (zd >= z0) *--zd = 0;    /* complete with 0s */
1316     z = icopy(x);
1317   }
1318   lz = lgefint(z)+d;
1319   z[1] = evalsigne(1) | evallgefint(lz);
1320   z[0] = evaltyp(t_INT) | evallg(lz); return z;
1321 }
1322 
1323 /* Fast product (Karatsuba) of integers. a and b are "special" GENs
1324  * c,c0,c1,c2 are genuine GENs.
1325  */
1326 GEN
muliispec(GEN a,GEN b,long na,long nb)1327 muliispec(GEN a, GEN b, long na, long nb)
1328 {
1329   GEN a0,c,c0;
1330   long n0, n0a, i;
1331   pari_sp av;
1332 
1333   if (na < nb) swapspec(a,b, na,nb);
1334   if (nb == 1) return muluispec((ulong)*b, a, na);
1335   if (nb == 0) return gen_0;
1336   if (nb < KARATSUBA_MULI_LIMIT) return muliispec_basecase(a,b,na,nb);
1337   i=(na>>1); n0=na-i; na=i;
1338   av=avma; a0=a+na; n0a=n0;
1339   while (!*a0 && n0a) { a0++; n0a--; }
1340 
1341   if (n0a && nb > n0)
1342   { /* nb <= na <= n0 */
1343     GEN b0,c1,c2;
1344     long n0b;
1345 
1346     nb -= n0;
1347     c = muliispec(a,b,na,nb);
1348     b0 = b+nb; n0b = n0;
1349     while (!*b0 && n0b) { b0++; n0b--; }
1350     if (n0b)
1351     {
1352       c0 = muliispec(a0,b0, n0a,n0b);
1353 
1354       c2 = addiispec(a0,a, n0a,na);
1355       c1 = addiispec(b0,b, n0b,nb);
1356       c1 = muliispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
1357       c2 = addiispec(c0+2, c+2, lgefint(c0)-2,lgefint(c) -2);
1358 
1359       c1 = subiispec(c1+2,c2+2, lgefint(c1)-2,lgefint(c2)-2);
1360     }
1361     else
1362     {
1363       c0 = gen_0;
1364       c1 = muliispec(a0,b, n0a,nb);
1365     }
1366     c = addshiftw(c,c1, n0);
1367   }
1368   else
1369   {
1370     c = muliispec(a,b,na,nb);
1371     c0 = muliispec(a0,b,n0a,nb);
1372   }
1373   return gerepileuptoint(av, addshiftw(c,c0, n0));
1374 }
1375 
1376 /* x % (2^n), assuming x, n >= 0 */
1377 GEN
resmod2n(GEN x,long n)1378 resmod2n(GEN x, long n)
1379 {
1380   long hi,l,k,lx,ly;
1381   GEN z, xd, zd;
1382 
1383   if (!signe(x) || !n) return gen_0;
1384 
1385   l = n & (BITS_IN_LONG-1);    /* n % BITS_IN_LONG */
1386   k = n >> TWOPOTBITS_IN_LONG; /* n / BITS_IN_LONG */
1387   lx = lgefint(x);
1388   if (lx < k+3) return icopy(x);
1389 
1390   xd = x + (lx-k-1);
1391   /* x = |_|...|#|1|...|k| : copy the last l bits of # and the last k words
1392    *            ^--- initial xd  */
1393   hi = ((ulong)*xd) & ((1UL<<l)-1); /* last l bits of # = top bits of result */
1394   if (!hi)
1395   { /* strip leading zeroes from result */
1396     xd++; while (k && !*xd) { k--; xd++; }
1397     if (!k) return gen_0;
1398     ly = k+2; xd--;
1399   }
1400   else
1401     ly = k+3;
1402 
1403   zd = z = cgeti(ly);
1404   *++zd = evalsigne(1) | evallgefint(ly);
1405   if (hi) *++zd = hi;
1406   for ( ;k; k--) *++zd = *++xd;
1407   return z;
1408 }
1409 
1410 GEN
sqrispec(GEN a,long na)1411 sqrispec(GEN a, long na)
1412 {
1413   GEN a0,c;
1414   long n0, n0a, i;
1415   pari_sp av;
1416 
1417   if (na < KARATSUBA_SQRI_LIMIT) return sqrispec_basecase(a,na);
1418   i=(na>>1); n0=na-i; na=i;
1419   av=avma; a0=a+na; n0a=n0;
1420   while (!*a0 && n0a) { a0++; n0a--; }
1421   c = sqrispec(a,na);
1422   if (n0a)
1423   {
1424     GEN t, c1, c0 = sqrispec(a0,n0a);
1425 #if 0
1426     c1 = shifti(muliispec(a0,a, n0a,na),1);
1427 #else /* faster */
1428     t = addiispec(a0,a,n0a,na);
1429     t = sqrispec(t+2,lgefint(t)-2);
1430     c1= addiispec(c0+2,c+2, lgefint(c0)-2, lgefint(c)-2);
1431     c1= subiispec(t+2, c1+2, lgefint(t)-2, lgefint(c1)-2);
1432 #endif
1433     c = addshiftw(c,c1, n0);
1434     c = addshiftw(c,c0, n0);
1435   }
1436   else
1437     c = addshiftw(c,gen_0,n0<<1);
1438   return gerepileuptoint(av, c);
1439 }
1440 
1441 /********************************************************************/
1442 /**                                                                **/
1443 /**                    KARATSUBA SQUARE ROOT                       **/
1444 /**      adapted from Paul Zimmermann's implementation of          **/
1445 /**      his algorithm in GMP (mpn_sqrtrem)                        **/
1446 /**                                                                **/
1447 /********************************************************************/
1448 
1449 /* Square roots table */
1450 static const unsigned char approx_tab[192] = {
1451   128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
1452   143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
1453   156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
1454   169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
1455   181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
1456   192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
1457   202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
1458   212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
1459   221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
1460   230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
1461   239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
1462   247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
1463 };
1464 
1465 /* N[0], assume N[0] >= 2^(BIL-2).
1466  * Return r,s such that s^2 + r = N, 0 <= r <= 2s */
1467 static void
p_sqrtu1(ulong * N,ulong * ps,ulong * pr)1468 p_sqrtu1(ulong *N, ulong *ps, ulong *pr)
1469 {
1470   ulong prec, r, s, q, u, n0 = N[0];
1471 
1472   q = n0 >> (BITS_IN_LONG - 8);
1473   /* 2^6 = 64 <= q < 256 = 2^8 */
1474   s = approx_tab[q - 64];				/* 128 <= s < 255 */
1475   r = (n0 >> (BITS_IN_LONG - 16)) - s * s;		/* r <= 2*s */
1476   if (r > (s << 1)) { r -= (s << 1) | 1; s++; }
1477 
1478   /* 8-bit approximation from the high 8-bits of N[0] */
1479   prec = 8;
1480   n0 <<= 2 * prec;
1481   while (2 * prec < BITS_IN_LONG)
1482   { /* invariant: s has prec bits, and r <= 2*s */
1483     r = (r << prec) + (n0 >> (BITS_IN_LONG - prec));
1484     n0 <<= prec;
1485     u = 2 * s;
1486     q = r / u; u = r - q * u;
1487     s = (s << prec) + q;
1488     u = (u << prec) + (n0 >> (BITS_IN_LONG - prec));
1489     q = q * q;
1490     r = u - q;
1491     if (u < q) { s--; r += (s << 1) | 1; }
1492     n0 <<= prec;
1493     prec = 2 * prec;
1494   }
1495   *ps = s;
1496   *pr = r;
1497 }
1498 
1499 /* N[0..1], assume N[0] >= 2^(BIL-2).
1500  * Return 1 if remainder overflows, 0 otherwise */
1501 static int
p_sqrtu2(ulong * N,ulong * ps,ulong * pr)1502 p_sqrtu2(ulong *N, ulong *ps, ulong *pr)
1503 {
1504   ulong cc, qhl, r, s, q, u, n1 = N[1];
1505   LOCAL_OVERFLOW;
1506 
1507   p_sqrtu1(N, &s, &r); /* r <= 2s */
1508   qhl = 0; while (r >= s) { qhl++; r -= s; }
1509   /* now r < s < 2^(BIL/2) */
1510   r = (r << BITS_IN_HALFULONG) | (n1 >> BITS_IN_HALFULONG);
1511   u = s << 1;
1512   q = r / u; u = r - q * u;
1513   q += (qhl & 1) << (BITS_IN_HALFULONG - 1);
1514   qhl >>= 1;
1515   /* (initial r)<<(BIL/2) + n1>>(BIL/2) = (qhl<<(BIL/2) + q) * 2s + u */
1516   s = ((s + qhl) << BITS_IN_HALFULONG) + q;
1517   cc = u >> BITS_IN_HALFULONG;
1518   r = (u << BITS_IN_HALFULONG) | (n1 & LOWMASK);
1519   r = subll(r, q * q);
1520   cc -= overflow + qhl;
1521   /* now subtract 2*q*2^(BIL/2) + 2^BIL if qhl is set */
1522   if ((long)cc < 0)
1523   {
1524     if (s) {
1525       r = addll(r, s);
1526       cc += overflow;
1527       s--;
1528     } else {
1529       cc++;
1530       s = ~0UL;
1531     }
1532     r = addll(r, s);
1533     cc += overflow;
1534   }
1535   *ps = s;
1536   *pr = r; return cc;
1537 }
1538 
1539 static void
xmpn_zero(GEN x,long n)1540 xmpn_zero(GEN x, long n)
1541 {
1542   while (--n >= 0) x[n]=0;
1543 }
1544 static void
xmpn_copy(GEN z,GEN x,long n)1545 xmpn_copy(GEN z, GEN x, long n)
1546 {
1547   long k = n;
1548   while (--k >= 0) z[k] = x[k];
1549 }
1550 static GEN
cat1u(ulong d)1551 cat1u(ulong d)
1552 {
1553   GEN R = cgeti(4);
1554   R[1] = evalsigne(1)|evallgefint(4);
1555   R[2] = 1;
1556   R[3] = d; return R;
1557 }
1558 /* a[0..la-1] * 2^(lb BIL) | b[0..lb-1] */
1559 static GEN
catii(GEN a,long la,GEN b,long lb)1560 catii(GEN a, long la, GEN b, long lb)
1561 {
1562   long l = la + lb + 2;
1563   GEN z = cgeti(l);
1564   z[1] = evalsigne(1) | evallgefint(l);
1565   xmpn_copy(z + 2, a, la);
1566   xmpn_copy(z + 2 + la, b, lb);
1567   return int_normalize(z, 0);
1568 }
1569 
1570 /* sqrt n[0..1], assume n normalized */
1571 static GEN
sqrtispec2(GEN n,GEN * pr)1572 sqrtispec2(GEN n, GEN *pr)
1573 {
1574   ulong s, r;
1575   int hi = p_sqrtu2((ulong*)n, &s, &r);
1576   GEN S = utoi(s);
1577   *pr = hi? cat1u(r): utoi(r);
1578   return S;
1579 }
1580 
1581 /* sqrt n[0], _dont_ assume n normalized */
1582 static GEN
sqrtispec1_sh(GEN n,GEN * pr)1583 sqrtispec1_sh(GEN n, GEN *pr)
1584 {
1585   GEN S;
1586   ulong r, s, u0 = (ulong)n[0];
1587   int sh = bfffo(u0) & ~1UL;
1588   if (sh) u0 <<= sh;
1589   p_sqrtu1(&u0, &s, &r);
1590   /* s^2 + r = u0, s < 2^(BIL/2). Rescale back:
1591    * 2^(2k) n = S^2 + R
1592    * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
1593   if (sh) {
1594     int k = sh >> 1;
1595     ulong s0 = s & ((1<<k) - 1);
1596     r += s * (s0<<1);
1597     s >>= k;
1598     r >>= sh;
1599   }
1600   S = utoi(s);
1601   if (pr) *pr = utoi(r);
1602   return S;
1603 }
1604 
1605 /* sqrt n[0..1], _dont_ assume n normalized */
1606 static GEN
sqrtispec2_sh(GEN n,GEN * pr)1607 sqrtispec2_sh(GEN n, GEN *pr)
1608 {
1609   GEN S;
1610   ulong U[2], r, s, u0 = (ulong)n[0], u1 = (ulong)n[1];
1611   int hi, sh = bfffo(u0) & ~1UL;
1612   if (sh) {
1613     u0 = (u0 << sh) | (u1 >> (BITS_IN_LONG-sh));
1614     u1 <<= sh;
1615   }
1616   U[0] = u0;
1617   U[1] = u1; hi = p_sqrtu2(U, &s, &r);
1618   /* s^2 + R = u0|u1. Rescale back:
1619    * 2^(2k) n = S^2 + R
1620    * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
1621   if (sh) {
1622     int k = sh >> 1;
1623     ulong s0 = s & ((1<<k) - 1);
1624     LOCAL_HIREMAINDER;
1625     LOCAL_OVERFLOW;
1626     r = addll(r, mulll(s, (s0<<1)));
1627     if (overflow) hiremainder++;
1628     hiremainder += hi; /* + 0 or 1 */
1629     s >>= k;
1630     r = (r>>sh) | (hiremainder << (BITS_IN_LONG-sh));
1631     hi = (hiremainder & (1<<sh));
1632   }
1633   S = utoi(s);
1634   if (pr) *pr = hi? cat1u(r): utoi(r);
1635   return S;
1636 }
1637 
1638 /* Let N = N[0..2n-1]. Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S
1639  * Assume N normalized */
1640 static GEN
sqrtispec(GEN N,long n,GEN * r)1641 sqrtispec(GEN N, long n, GEN *r)
1642 {
1643   GEN S, R, q, z, u;
1644   long l, h;
1645 
1646   if (n == 1) return sqrtispec2(N, r);
1647   l = n >> 1;
1648   h = n - l; /* N = a3(h) | a2(h) | a1(l) | a0(l words) */
1649   S = sqrtispec(N, h, &R); /* S^2 + R = a3|a2 */
1650 
1651   z = catii(R+2, lgefint(R)-2, N + 2*h, l); /* = R | a1(l) */
1652   q = dvmdii(z, shifti(S,1), &u);
1653   z = catii(u+2, lgefint(u)-2, N + n + h, l); /* = u | a0(l) */
1654 
1655   S = addshiftw(S, q, l);
1656   R = subii(z, sqri(q));
1657   if (signe(R) < 0)
1658   {
1659     GEN S2 = shifti(S,1);
1660     R = addis(subiispec(S2+2, R+2, lgefint(S2)-2,lgefint(R)-2), -1);
1661     S = addis(S, -1);
1662   }
1663   *r = R; return S;
1664 }
1665 
1666 /* Return S (and set R) s.t S^2 + R = N, 0 <= R <= 2S.
1667  * As for dvmdii, R is last on stack and guaranteed to be gen_0 in case the
1668  * remainder is 0. R = NULL is allowed. */
1669 GEN
sqrtremi(GEN N,GEN * r)1670 sqrtremi(GEN N, GEN *r)
1671 {
1672   pari_sp av;
1673   GEN S, R, n = N+2;
1674   long k, l2, ln = lgefint(N) - 2;
1675   int sh;
1676 
1677   if (ln <= 2)
1678   {
1679     if (ln == 2) return sqrtispec2_sh(n, r);
1680     if (ln == 1) return sqrtispec1_sh(n, r);
1681     if (r) *r = gen_0;
1682     return gen_0;
1683   }
1684   av = avma;
1685   sh = bfffo(n[0]) >> 1;
1686   l2 = (ln + 1) >> 1;
1687   if (sh || (ln & 1)) { /* normalize n, so that n[0] >= 2^BIL / 4 */
1688     GEN s0, t = new_chunk(ln + 1);
1689     t[ln] = 0;
1690     if (sh)
1691     { shift_left(t, n, 0,ln-1, 0, (sh << 1)); }
1692     else
1693       xmpn_copy(t, n, ln);
1694     S = sqrtispec(t, l2, &R); /* t normalized, 2 * l2 words */
1695     /* Rescale back:
1696      * 2^(2k) n = S^2 + R, k = sh + (ln & 1)*BIL/2
1697      * so 2^(2k) n = (S - s0)^2 + (2*S*s0 - s0^2 + R), s0 = S mod 2^k. */
1698     k = sh + (ln & 1) * (BITS_IN_LONG/2);
1699     s0 = resmod2n(S, k);
1700     R = addii(shifti(R,-1), mulii(s0, S));
1701     R = shifti(R, 1 - (k<<1));
1702     S = shifti(S, -k);
1703   }
1704   else
1705     S = sqrtispec(n, l2, &R);
1706 
1707   if (!r) { avma = (pari_sp)S; return gerepileuptoint(av, S); }
1708   gerepileall(av, 2, &S, &R); *r = R; return S;
1709 }
1710 
1711 /* compute sqrt(|a|), assuming a != 0 */
1712 
1713 #if 1
1714 GEN
sqrtr_abs(GEN x)1715 sqrtr_abs(GEN x)
1716 {
1717   long l = lg(x) - 2, e = expo(x), er = e>>1;
1718   GEN b, c, res = cgetr(2 + l);
1719   res[1] = evalsigne(1) | evalexpo(er);
1720   if (e&1) {
1721     b = new_chunk(l << 1);
1722     xmpn_copy(b, x+2, l);
1723     xmpn_zero(b + l,l);
1724     b = sqrtispec(b, l, &c);
1725     xmpn_copy(res+2, b+2, l);
1726     if (cmpii(c, b) > 0) roundr_up_ip(res, l+2);
1727   } else {
1728     ulong u;
1729     b = new_chunk(2 + (l << 1));
1730     shift_left(b+1, x+2, 0,l-1, 0, BITS_IN_LONG-1);
1731     b[0] = ((ulong)x[2])>>1;
1732     xmpn_zero(b + l+1,l+1);
1733     b = sqrtispec(b, l+1, &c);
1734     xmpn_copy(res+2, b+2, l);
1735     u = (ulong)b[l+2];
1736     if ( u&HIGHBIT || (u == ~HIGHBIT && cmpii(c,b) > 0))
1737       roundr_up_ip(res, l+2);
1738   }
1739   avma = (pari_sp)res; return res;
1740 }
1741 
1742 #else /* use t_REAL: currently much slower (quadratic division) */
1743 
1744 #ifdef LONG_IS_64BIT
1745 /* 64 bits of b = sqrt(a[0] * 2^64 + a[1])  [ up to 1ulp ] */
1746 static ulong
sqrtu2(ulong * a)1747 sqrtu2(ulong *a)
1748 {
1749   ulong c, b = dblmantissa( sqrt((double)a[0]) );
1750   LOCAL_HIREMAINDER;
1751   LOCAL_OVERFLOW;
1752 
1753   /* > 32 correct bits, 1 Newton iteration to reach 64 */
1754   if (b <= a[0]) return HIGHBIT | (a[0] >> 1);
1755   hiremainder = a[0]; c = divll(a[1], b);
1756   return (addll(c, b) >> 1) | HIGHBIT;
1757 }
1758 /* 64 bits of sqrt(a[0] * 2^63) */
1759 static ulong
sqrtu2_1(ulong * a)1760 sqrtu2_1(ulong *a)
1761 {
1762   ulong t[2];
1763   t[0] = (a[0] >> 1);
1764   t[1] = (a[0] << (BITS_IN_LONG-1)) | (a[1] >> 1);
1765   return sqrtu2(t);
1766 }
1767 #else
1768 /* 32 bits of sqrt(a[0] * 2^32) */
1769 static ulong
sqrtu2(ulong * a)1770 sqrtu2(ulong *a)   { return dblmantissa( sqrt((double)a[0]) ); }
1771 /* 32 bits of sqrt(a[0] * 2^31) */
1772 static ulong
sqrtu2_1(ulong * a)1773 sqrtu2_1(ulong *a) { return dblmantissa( sqrt(2. * a[0]) ); }
1774 #endif
1775 
1776 GEN
sqrtr_abs(GEN x)1777 sqrtr_abs(GEN x)
1778 {
1779   long l1, i, l = lg(x), ex = expo(x);
1780   GEN a, t, y = cgetr(l);
1781   pari_sp av, av0 = avma;
1782 
1783   a = cgetr(l+1); affrr(x,a);
1784   t = cgetr(l+1);
1785   if (ex & 1) { /* odd exponent */
1786     a[1] = evalsigne(1) | evalexpo(1);
1787     t[2] = (long)sqrtu2((ulong*)a + 2);
1788   } else { /* even exponent */
1789     a[1] = evalsigne(1) | evalexpo(0);
1790     t[2] = (long)sqrtu2_1((ulong*)a + 2);
1791   }
1792   t[1] = evalsigne(1) | evalexpo(0);
1793   for (i = 3; i <= l; i++) t[i] = 0;
1794 
1795   /* |x| = 2^(ex/2) a, t ~ sqrt(a) */
1796   l--; l1 = 1; av = avma;
1797   while (l1 < l) { /* let t := (t + a/t)/2 */
1798     l1 <<= 1; if (l1 > l) l1 = l;
1799     setlg(a, l1 + 2);
1800     setlg(t, l1 + 2);
1801     affrr(addrr(t, divrr(a,t)), t); setexpo(t, expo(t)-1);
1802     avma = av;
1803   }
1804   affrr(t,y); setexpo(y, expo(y) + (ex>>1));
1805   avma = av0; return y;
1806 }
1807 
1808 #endif
1809