1 /* Reference mpn functions, designed to be simple, portable and independent
2    of the normal gmp code.  Speed isn't a consideration.
3 
4 Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library test suite.
7 
8 The GNU MP Library test suite is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 3 of the License,
11 or (at your option) any later version.
12 
13 The GNU MP Library test suite is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
16 Public License for more details.
17 
18 You should have received a copy of the GNU General Public License along with
19 the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
20 
21 
22 /* Most routines have assertions representing what the mpn routines are
23    supposed to accept.  Many of these reference routines do sensible things
24    outside these ranges (eg. for size==0), but the assertions are present to
25    pick up bad parameters passed here that are about to be passed the same
26    to a real mpn routine being compared.  */
27 
28 /* always do assertion checking */
29 #define WANT_ASSERT  1
30 
31 #include <stdio.h>  /* for NULL */
32 #include <stdlib.h> /* for malloc */
33 
34 #include "gmp-impl.h"
35 #include "longlong.h"
36 
37 #include "tests.h"
38 
39 
40 
41 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
42    in bytes. */
43 int
byte_overlap_p(const void * v_xp,mp_size_t xsize,const void * v_yp,mp_size_t ysize)44 byte_overlap_p (const void *v_xp, mp_size_t xsize,
45 		const void *v_yp, mp_size_t ysize)
46 {
47   const char *xp = (const char *) v_xp;
48   const char *yp = (const char *) v_yp;
49 
50   ASSERT (xsize >= 0);
51   ASSERT (ysize >= 0);
52 
53   /* no wraparounds */
54   ASSERT (xp+xsize >= xp);
55   ASSERT (yp+ysize >= yp);
56 
57   if (xp + xsize <= yp)
58     return 0;
59 
60   if (yp + ysize <= xp)
61     return 0;
62 
63   return 1;
64 }
65 
66 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
67 int
refmpn_overlap_p(mp_srcptr xp,mp_size_t xsize,mp_srcptr yp,mp_size_t ysize)68 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
69 {
70   return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES,
71 			 yp, ysize * GMP_LIMB_BYTES);
72 }
73 
74 /* Check overlap for a routine defined to work low to high. */
75 int
refmpn_overlap_low_to_high_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)76 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
77 {
78   return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
79 }
80 
81 /* Check overlap for a routine defined to work high to low. */
82 int
refmpn_overlap_high_to_low_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)83 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
84 {
85   return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
86 }
87 
88 /* Check overlap for a standard routine requiring equal or separate. */
89 int
refmpn_overlap_fullonly_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)90 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
91 {
92   return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
93 }
94 int
refmpn_overlap_fullonly_two_p(mp_srcptr dst,mp_srcptr src1,mp_srcptr src2,mp_size_t size)95 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
96 			       mp_size_t size)
97 {
98   return (refmpn_overlap_fullonly_p (dst, src1, size)
99 	  && refmpn_overlap_fullonly_p (dst, src2, size));
100 }
101 
102 
103 mp_ptr
refmpn_malloc_limbs(mp_size_t size)104 refmpn_malloc_limbs (mp_size_t size)
105 {
106   mp_ptr  p;
107   ASSERT (size >= 0);
108   if (size == 0)
109     size = 1;
110   p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES));
111   ASSERT (p != NULL);
112   return p;
113 }
114 
115 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
116  * memory allocated by refmpn_malloc_limbs_aligned. */
117 void
refmpn_free_limbs(mp_ptr p)118 refmpn_free_limbs (mp_ptr p)
119 {
120   free (p);
121 }
122 
123 mp_ptr
refmpn_memdup_limbs(mp_srcptr ptr,mp_size_t size)124 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
125 {
126   mp_ptr  p;
127   p = refmpn_malloc_limbs (size);
128   refmpn_copyi (p, ptr, size);
129   return p;
130 }
131 
132 /* malloc n limbs on a multiple of m bytes boundary */
133 mp_ptr
refmpn_malloc_limbs_aligned(mp_size_t n,size_t m)134 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
135 {
136   return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
137 }
138 
139 
140 void
refmpn_fill(mp_ptr ptr,mp_size_t size,mp_limb_t value)141 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
142 {
143   mp_size_t  i;
144   ASSERT (size >= 0);
145   for (i = 0; i < size; i++)
146     ptr[i] = value;
147 }
148 
149 void
refmpn_zero(mp_ptr ptr,mp_size_t size)150 refmpn_zero (mp_ptr ptr, mp_size_t size)
151 {
152   refmpn_fill (ptr, size, CNST_LIMB(0));
153 }
154 
155 void
refmpn_zero_extend(mp_ptr ptr,mp_size_t oldsize,mp_size_t newsize)156 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
157 {
158   ASSERT (newsize >= oldsize);
159   refmpn_zero (ptr+oldsize, newsize-oldsize);
160 }
161 
162 int
refmpn_zero_p(mp_srcptr ptr,mp_size_t size)163 refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
164 {
165   mp_size_t  i;
166   for (i = 0; i < size; i++)
167     if (ptr[i] != 0)
168       return 0;
169   return 1;
170 }
171 
172 mp_size_t
refmpn_normalize(mp_srcptr ptr,mp_size_t size)173 refmpn_normalize (mp_srcptr ptr, mp_size_t size)
174 {
175   ASSERT (size >= 0);
176   while (size > 0 && ptr[size-1] == 0)
177     size--;
178   return size;
179 }
180 
181 /* the highest one bit in x */
182 mp_limb_t
refmpn_msbone(mp_limb_t x)183 refmpn_msbone (mp_limb_t x)
184 {
185   mp_limb_t  n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
186 
187   while (n != 0)
188     {
189       if (x & n)
190 	break;
191       n >>= 1;
192     }
193   return n;
194 }
195 
196 /* a mask of the highest one bit plus and all bits below */
197 mp_limb_t
refmpn_msbone_mask(mp_limb_t x)198 refmpn_msbone_mask (mp_limb_t x)
199 {
200   if (x == 0)
201     return 0;
202 
203   return (refmpn_msbone (x) << 1) - 1;
204 }
205 
206 /* How many digits in the given base will fit in a limb.
207    Notice that the product b is allowed to be equal to the limit
208    2^GMP_NUMB_BITS, this ensures the result for base==2 will be
209    GMP_NUMB_BITS (and similarly other powers of 2).  */
210 int
refmpn_chars_per_limb(int base)211 refmpn_chars_per_limb (int base)
212 {
213   mp_limb_t  limit[2], b[2];
214   int        chars_per_limb;
215 
216   ASSERT (base >= 2);
217 
218   limit[0] = 0;  /* limit = 2^GMP_NUMB_BITS */
219   limit[1] = 1;
220   b[0] = 1;      /* b = 1 */
221   b[1] = 0;
222 
223   chars_per_limb = 0;
224   for (;;)
225     {
226       if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
227 	break;
228       if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
229 	break;
230       chars_per_limb++;
231     }
232   return chars_per_limb;
233 }
234 
235 /* The biggest value base**n which fits in GMP_NUMB_BITS. */
236 mp_limb_t
refmpn_big_base(int base)237 refmpn_big_base (int base)
238 {
239   int        chars_per_limb = refmpn_chars_per_limb (base);
240   int        i;
241   mp_limb_t  bb;
242 
243   ASSERT (base >= 2);
244   bb = 1;
245   for (i = 0; i < chars_per_limb; i++)
246     bb *= base;
247   return bb;
248 }
249 
250 
251 void
refmpn_setbit(mp_ptr ptr,unsigned long bit)252 refmpn_setbit (mp_ptr ptr, unsigned long bit)
253 {
254   ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
255 }
256 
257 void
refmpn_clrbit(mp_ptr ptr,unsigned long bit)258 refmpn_clrbit (mp_ptr ptr, unsigned long bit)
259 {
260   ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
261 }
262 
263 #define REFMPN_TSTBIT(ptr,bit) \
264   (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
265 
266 int
refmpn_tstbit(mp_srcptr ptr,unsigned long bit)267 refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
268 {
269   return REFMPN_TSTBIT (ptr, bit);
270 }
271 
272 unsigned long
refmpn_scan0(mp_srcptr ptr,unsigned long bit)273 refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
274 {
275   while (REFMPN_TSTBIT (ptr, bit) != 0)
276     bit++;
277   return bit;
278 }
279 
280 unsigned long
refmpn_scan1(mp_srcptr ptr,unsigned long bit)281 refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
282 {
283   while (REFMPN_TSTBIT (ptr, bit) == 0)
284     bit++;
285   return bit;
286 }
287 
288 void
refmpn_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size)289 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
290 {
291   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
292   refmpn_copyi (rp, sp, size);
293 }
294 
295 void
refmpn_copyi(mp_ptr rp,mp_srcptr sp,mp_size_t size)296 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
297 {
298   mp_size_t i;
299 
300   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
301   ASSERT (size >= 0);
302 
303   for (i = 0; i < size; i++)
304     rp[i] = sp[i];
305 }
306 
307 void
refmpn_copyd(mp_ptr rp,mp_srcptr sp,mp_size_t size)308 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
309 {
310   mp_size_t i;
311 
312   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
313   ASSERT (size >= 0);
314 
315   for (i = size-1; i >= 0; i--)
316     rp[i] = sp[i];
317 }
318 
319 /* Copy {xp,xsize} to {wp,wsize}.  If x is shorter, then pad w with low
320    zeros to wsize.  If x is longer, then copy just the high wsize limbs.  */
321 void
refmpn_copy_extend(mp_ptr wp,mp_size_t wsize,mp_srcptr xp,mp_size_t xsize)322 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
323 {
324   ASSERT (wsize >= 0);
325   ASSERT (xsize >= 0);
326 
327   /* high part of x if x bigger than w */
328   if (xsize > wsize)
329     {
330       xp += xsize - wsize;
331       xsize = wsize;
332     }
333 
334   refmpn_copy (wp + wsize-xsize, xp, xsize);
335   refmpn_zero (wp, wsize-xsize);
336 }
337 
338 int
refmpn_cmp(mp_srcptr xp,mp_srcptr yp,mp_size_t size)339 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
340 {
341   mp_size_t  i;
342 
343   ASSERT (size >= 1);
344   ASSERT_MPN (xp, size);
345   ASSERT_MPN (yp, size);
346 
347   for (i = size-1; i >= 0; i--)
348     {
349       if (xp[i] > yp[i])  return 1;
350       if (xp[i] < yp[i])  return -1;
351     }
352   return 0;
353 }
354 
355 int
refmpn_cmp_allowzero(mp_srcptr xp,mp_srcptr yp,mp_size_t size)356 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
357 {
358   if (size == 0)
359     return 0;
360   else
361     return refmpn_cmp (xp, yp, size);
362 }
363 
364 int
refmpn_cmp_twosizes(mp_srcptr xp,mp_size_t xsize,mp_srcptr yp,mp_size_t ysize)365 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
366 		     mp_srcptr yp, mp_size_t ysize)
367 {
368   int  opp, cmp;
369 
370   ASSERT_MPN (xp, xsize);
371   ASSERT_MPN (yp, ysize);
372 
373   opp = (xsize < ysize);
374   if (opp)
375     MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
376 
377   if (! refmpn_zero_p (xp+ysize, xsize-ysize))
378     cmp = 1;
379   else
380     cmp = refmpn_cmp (xp, yp, ysize);
381 
382   return (opp ? -cmp : cmp);
383 }
384 
385 int
refmpn_equal_anynail(mp_srcptr xp,mp_srcptr yp,mp_size_t size)386 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
387 {
388   mp_size_t  i;
389   ASSERT (size >= 0);
390 
391   for (i = 0; i < size; i++)
392       if (xp[i] != yp[i])
393 	return 0;
394   return 1;
395 }
396 
397 
398 #define LOGOPS(operation)                                               \
399   {                                                                     \
400     mp_size_t  i;                                                       \
401 									\
402     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
403     ASSERT (size >= 1);                                                 \
404     ASSERT_MPN (s1p, size);                                             \
405     ASSERT_MPN (s2p, size);                                             \
406 									\
407     for (i = 0; i < size; i++)                                          \
408       rp[i] = operation;                                                \
409   }
410 
411 void
refmpn_and_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)412 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
413 {
414   LOGOPS (s1p[i] & s2p[i]);
415 }
416 void
refmpn_andn_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)417 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
418 {
419   LOGOPS (s1p[i] & ~s2p[i]);
420 }
421 void
refmpn_nand_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)422 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
423 {
424   LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
425 }
426 void
refmpn_ior_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)427 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
428 {
429   LOGOPS (s1p[i] | s2p[i]);
430 }
431 void
refmpn_iorn_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)432 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
433 {
434   LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
435 }
436 void
refmpn_nior_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)437 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
438 {
439   LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
440 }
441 void
refmpn_xor_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)442 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
443 {
444   LOGOPS (s1p[i] ^ s2p[i]);
445 }
446 void
refmpn_xnor_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)447 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
448 {
449   LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
450 }
451 
452 
453 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */
454 void
refmpn_sub_ddmmss(mp_limb_t * dh,mp_limb_t * dl,mp_limb_t mh,mp_limb_t ml,mp_limb_t sh,mp_limb_t sl)455 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
456 		   mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
457 {
458   *dl = ml - sl;
459   *dh = mh - sh - (ml < sl);
460 }
461 
462 
463 /* set *w to x+y, return 0 or 1 carry */
464 mp_limb_t
ref_addc_limb(mp_limb_t * w,mp_limb_t x,mp_limb_t y)465 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
466 {
467   mp_limb_t  sum, cy;
468 
469   ASSERT_LIMB (x);
470   ASSERT_LIMB (y);
471 
472   sum = x + y;
473 #if GMP_NAIL_BITS == 0
474   *w = sum;
475   cy = (sum < x);
476 #else
477   *w = sum & GMP_NUMB_MASK;
478   cy = (sum >> GMP_NUMB_BITS);
479 #endif
480   return cy;
481 }
482 
483 /* set *w to x-y, return 0 or 1 borrow */
484 mp_limb_t
ref_subc_limb(mp_limb_t * w,mp_limb_t x,mp_limb_t y)485 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
486 {
487   mp_limb_t  diff, cy;
488 
489   ASSERT_LIMB (x);
490   ASSERT_LIMB (y);
491 
492   diff = x - y;
493 #if GMP_NAIL_BITS == 0
494   *w = diff;
495   cy = (diff > x);
496 #else
497   *w = diff & GMP_NUMB_MASK;
498   cy = (diff >> GMP_NUMB_BITS) & 1;
499 #endif
500   return cy;
501 }
502 
503 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
504 mp_limb_t
adc(mp_limb_t * w,mp_limb_t x,mp_limb_t y,mp_limb_t c)505 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
506 {
507   mp_limb_t  r;
508 
509   ASSERT_LIMB (x);
510   ASSERT_LIMB (y);
511   ASSERT (c == 0 || c == 1);
512 
513   r = ref_addc_limb (w, x, y);
514   return r + ref_addc_limb (w, *w, c);
515 }
516 
517 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
518 mp_limb_t
sbb(mp_limb_t * w,mp_limb_t x,mp_limb_t y,mp_limb_t c)519 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
520 {
521   mp_limb_t  r;
522 
523   ASSERT_LIMB (x);
524   ASSERT_LIMB (y);
525   ASSERT (c == 0 || c == 1);
526 
527   r = ref_subc_limb (w, x, y);
528   return r + ref_subc_limb (w, *w, c);
529 }
530 
531 
532 #define AORS_1(operation)                               \
533   {                                                     \
534     mp_size_t  i;                                       \
535 							\
536     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
537     ASSERT (size >= 1);                                 \
538     ASSERT_MPN (sp, size);                              \
539     ASSERT_LIMB (n);                                    \
540 							\
541     for (i = 0; i < size; i++)                          \
542       n = operation (&rp[i], sp[i], n);                 \
543     return n;                                           \
544   }
545 
546 mp_limb_t
refmpn_add_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t n)547 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
548 {
549   AORS_1 (ref_addc_limb);
550 }
551 mp_limb_t
refmpn_sub_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t n)552 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
553 {
554   AORS_1 (ref_subc_limb);
555 }
556 
557 #define AORS_NC(operation)                                              \
558   {                                                                     \
559     mp_size_t  i;                                                       \
560 									\
561     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
562     ASSERT (carry == 0 || carry == 1);                                  \
563     ASSERT (size >= 1);                                                 \
564     ASSERT_MPN (s1p, size);                                             \
565     ASSERT_MPN (s2p, size);                                             \
566 									\
567     for (i = 0; i < size; i++)                                          \
568       carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
569     return carry;                                                       \
570   }
571 
572 mp_limb_t
refmpn_add_nc(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)573 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
574 	       mp_limb_t carry)
575 {
576   AORS_NC (adc);
577 }
578 mp_limb_t
refmpn_sub_nc(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)579 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
580 	       mp_limb_t carry)
581 {
582   AORS_NC (sbb);
583 }
584 
585 
586 mp_limb_t
refmpn_add_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)587 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
588 {
589   return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
590 }
591 mp_limb_t
refmpn_sub_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)592 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
593 {
594   return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
595 }
596 
597 mp_limb_t
refmpn_cnd_add_n(mp_limb_t cnd,mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)598 refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
599 {
600   if (cnd != 0)
601     return refmpn_add_n (rp, s1p, s2p, size);
602   else
603     {
604       refmpn_copyi (rp, s1p, size);
605       return 0;
606     }
607 }
608 mp_limb_t
refmpn_cnd_sub_n(mp_limb_t cnd,mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)609 refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
610 {
611   if (cnd != 0)
612     return refmpn_sub_n (rp, s1p, s2p, size);
613   else
614     {
615       refmpn_copyi (rp, s1p, size);
616       return 0;
617     }
618 }
619 
620 
621 #define AORS_ERR1_N(operation)						\
622   {                                                                     \
623     mp_size_t  i;                                                       \
624     mp_limb_t carry2;							\
625 									\
626     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
627     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
628     ASSERT (! refmpn_overlap_p (rp, size, yp, size));			\
629     ASSERT (! refmpn_overlap_p (ep, 2, s1p, size));			\
630     ASSERT (! refmpn_overlap_p (ep, 2, s2p, size));			\
631     ASSERT (! refmpn_overlap_p (ep, 2, yp, size));			\
632     ASSERT (! refmpn_overlap_p (ep, 2, rp, size));			\
633 									\
634     ASSERT (carry == 0 || carry == 1);					\
635     ASSERT (size >= 1);							\
636     ASSERT_MPN (s1p, size);						\
637     ASSERT_MPN (s2p, size);						\
638     ASSERT_MPN (yp, size);						\
639 									\
640     ep[0] = ep[1] = CNST_LIMB(0);					\
641 									\
642     for (i = 0; i < size; i++)                                          \
643       {									\
644 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
645 	if (carry == 1)							\
646 	  {								\
647 	    carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]);	\
648 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
649 	    ASSERT (carry2 == 0);					\
650 	  }								\
651       }									\
652     return carry;                                                       \
653   }
654 
655 mp_limb_t
refmpn_add_err1_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr yp,mp_size_t size,mp_limb_t carry)656 refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
657 		   mp_ptr ep, mp_srcptr yp,
658 		   mp_size_t size, mp_limb_t carry)
659 {
660   AORS_ERR1_N (adc);
661 }
662 mp_limb_t
refmpn_sub_err1_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr yp,mp_size_t size,mp_limb_t carry)663 refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
664 		   mp_ptr ep, mp_srcptr yp,
665 		   mp_size_t size, mp_limb_t carry)
666 {
667   AORS_ERR1_N (sbb);
668 }
669 
670 
671 #define AORS_ERR2_N(operation)						\
672   {                                                                     \
673     mp_size_t  i;                                                       \
674     mp_limb_t carry2;							\
675 									\
676     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
677     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
678     ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
679     ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
680     ASSERT (! refmpn_overlap_p (ep, 4, s1p, size));			\
681     ASSERT (! refmpn_overlap_p (ep, 4, s2p, size));			\
682     ASSERT (! refmpn_overlap_p (ep, 4, y1p, size));			\
683     ASSERT (! refmpn_overlap_p (ep, 4, y2p, size));			\
684     ASSERT (! refmpn_overlap_p (ep, 4, rp, size));			\
685 									\
686     ASSERT (carry == 0 || carry == 1);					\
687     ASSERT (size >= 1);							\
688     ASSERT_MPN (s1p, size);						\
689     ASSERT_MPN (s2p, size);						\
690     ASSERT_MPN (y1p, size);						\
691     ASSERT_MPN (y2p, size);						\
692 									\
693     ep[0] = ep[1] = CNST_LIMB(0);					\
694     ep[2] = ep[3] = CNST_LIMB(0);					\
695 									\
696     for (i = 0; i < size; i++)                                          \
697       {									\
698 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
699 	if (carry == 1)							\
700 	  {								\
701 	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
702 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
703 	    ASSERT (carry2 == 0);					\
704 	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
705 	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
706 	    ASSERT (carry2 == 0);					\
707 	  }								\
708       }									\
709     return carry;                                                       \
710   }
711 
712 mp_limb_t
refmpn_add_err2_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_size_t size,mp_limb_t carry)713 refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
714 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
715 		   mp_size_t size, mp_limb_t carry)
716 {
717   AORS_ERR2_N (adc);
718 }
719 mp_limb_t
refmpn_sub_err2_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_size_t size,mp_limb_t carry)720 refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
721 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
722 		   mp_size_t size, mp_limb_t carry)
723 {
724   AORS_ERR2_N (sbb);
725 }
726 
727 
728 #define AORS_ERR3_N(operation)						\
729   {                                                                     \
730     mp_size_t  i;                                                       \
731     mp_limb_t carry2;							\
732 									\
733     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
734     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
735     ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
736     ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
737     ASSERT (! refmpn_overlap_p (rp, size, y3p, size));			\
738     ASSERT (! refmpn_overlap_p (ep, 6, s1p, size));			\
739     ASSERT (! refmpn_overlap_p (ep, 6, s2p, size));			\
740     ASSERT (! refmpn_overlap_p (ep, 6, y1p, size));			\
741     ASSERT (! refmpn_overlap_p (ep, 6, y2p, size));			\
742     ASSERT (! refmpn_overlap_p (ep, 6, y3p, size));			\
743     ASSERT (! refmpn_overlap_p (ep, 6, rp, size));			\
744 									\
745     ASSERT (carry == 0 || carry == 1);					\
746     ASSERT (size >= 1);							\
747     ASSERT_MPN (s1p, size);						\
748     ASSERT_MPN (s2p, size);						\
749     ASSERT_MPN (y1p, size);						\
750     ASSERT_MPN (y2p, size);						\
751     ASSERT_MPN (y3p, size);						\
752 									\
753     ep[0] = ep[1] = CNST_LIMB(0);					\
754     ep[2] = ep[3] = CNST_LIMB(0);					\
755     ep[4] = ep[5] = CNST_LIMB(0);					\
756 									\
757     for (i = 0; i < size; i++)                                          \
758       {									\
759 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
760 	if (carry == 1)							\
761 	  {								\
762 	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
763 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
764 	    ASSERT (carry2 == 0);					\
765 	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
766 	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
767 	    ASSERT (carry2 == 0);					\
768 	    carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]);	\
769 	    carry2 = ref_addc_limb (&ep[5], ep[5], carry2);		\
770 	    ASSERT (carry2 == 0);					\
771 	  }								\
772       }									\
773     return carry;                                                       \
774   }
775 
776 mp_limb_t
refmpn_add_err3_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_srcptr y3p,mp_size_t size,mp_limb_t carry)777 refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
778 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
779 		   mp_size_t size, mp_limb_t carry)
780 {
781   AORS_ERR3_N (adc);
782 }
783 mp_limb_t
refmpn_sub_err3_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_srcptr y3p,mp_size_t size,mp_limb_t carry)784 refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
785 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
786 		   mp_size_t size, mp_limb_t carry)
787 {
788   AORS_ERR3_N (sbb);
789 }
790 
791 
792 mp_limb_t
refmpn_addlsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)793 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
794 		 mp_size_t n, unsigned int s)
795 {
796   mp_limb_t cy;
797   mp_ptr tp;
798 
799   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
800   ASSERT (n >= 1);
801   ASSERT (0 < s && s < GMP_NUMB_BITS);
802   ASSERT_MPN (up, n);
803   ASSERT_MPN (vp, n);
804 
805   tp = refmpn_malloc_limbs (n);
806   cy  = refmpn_lshift (tp, vp, n, s);
807   cy += refmpn_add_n (rp, up, tp, n);
808   free (tp);
809   return cy;
810 }
811 mp_limb_t
refmpn_addlsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)812 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
813 {
814   return refmpn_addlsh_n (rp, up, vp, n, 1);
815 }
816 mp_limb_t
refmpn_addlsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)817 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
818 {
819   return refmpn_addlsh_n (rp, up, vp, n, 2);
820 }
821 mp_limb_t
refmpn_addlsh_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)822 refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
823 {
824   return refmpn_addlsh_n (rp, rp, vp, n, s);
825 }
826 mp_limb_t
refmpn_addlsh1_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)827 refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
828 {
829   return refmpn_addlsh_n (rp, rp, vp, n, 1);
830 }
831 mp_limb_t
refmpn_addlsh2_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)832 refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
833 {
834   return refmpn_addlsh_n (rp, rp, vp, n, 2);
835 }
836 mp_limb_t
refmpn_addlsh_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)837 refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
838 {
839   return refmpn_addlsh_n (rp, vp, rp, n, s);
840 }
841 mp_limb_t
refmpn_addlsh1_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)842 refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
843 {
844   return refmpn_addlsh_n (rp, vp, rp, n, 1);
845 }
846 mp_limb_t
refmpn_addlsh2_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)847 refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
848 {
849   return refmpn_addlsh_n (rp, vp, rp, n, 2);
850 }
851 mp_limb_t
refmpn_addlsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_t carry)852 refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
853 		  mp_size_t n, unsigned int s, mp_limb_t carry)
854 {
855   mp_limb_t cy;
856 
857   ASSERT (carry <= (CNST_LIMB(1) << s));
858 
859   cy = refmpn_addlsh_n (rp, up, vp, n, s);
860   cy += refmpn_add_1 (rp, rp, n, carry);
861   return cy;
862 }
863 mp_limb_t
refmpn_addlsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)864 refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
865 {
866   return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
867 }
868 mp_limb_t
refmpn_addlsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)869 refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
870 {
871   return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
872 }
873 
874 mp_limb_t
refmpn_sublsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)875 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
876 		 mp_size_t n, unsigned int s)
877 {
878   mp_limb_t cy;
879   mp_ptr tp;
880 
881   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
882   ASSERT (n >= 1);
883   ASSERT (0 < s && s < GMP_NUMB_BITS);
884   ASSERT_MPN (up, n);
885   ASSERT_MPN (vp, n);
886 
887   tp = refmpn_malloc_limbs (n);
888   cy  = mpn_lshift (tp, vp, n, s);
889   cy += mpn_sub_n (rp, up, tp, n);
890   free (tp);
891   return cy;
892 }
893 mp_limb_t
refmpn_sublsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)894 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
895 {
896   return refmpn_sublsh_n (rp, up, vp, n, 1);
897 }
898 mp_limb_t
refmpn_sublsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)899 refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
900 {
901   return refmpn_sublsh_n (rp, up, vp, n, 2);
902 }
903 mp_limb_t
refmpn_sublsh_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)904 refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
905 {
906   return refmpn_sublsh_n (rp, rp, vp, n, s);
907 }
908 mp_limb_t
refmpn_sublsh1_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)909 refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
910 {
911   return refmpn_sublsh_n (rp, rp, vp, n, 1);
912 }
913 mp_limb_t
refmpn_sublsh2_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)914 refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
915 {
916   return refmpn_sublsh_n (rp, rp, vp, n, 2);
917 }
918 mp_limb_t
refmpn_sublsh_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)919 refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
920 {
921   return refmpn_sublsh_n (rp, vp, rp, n, s);
922 }
923 mp_limb_t
refmpn_sublsh1_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)924 refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
925 {
926   return refmpn_sublsh_n (rp, vp, rp, n, 1);
927 }
928 mp_limb_t
refmpn_sublsh2_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)929 refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
930 {
931   return refmpn_sublsh_n (rp, vp, rp, n, 2);
932 }
933 mp_limb_t
refmpn_sublsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_t carry)934 refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
935 		  mp_size_t n, unsigned int s, mp_limb_t carry)
936 {
937   mp_limb_t cy;
938 
939   ASSERT (carry <= (CNST_LIMB(1) << s));
940 
941   cy = refmpn_sublsh_n (rp, up, vp, n, s);
942   cy += refmpn_sub_1 (rp, rp, n, carry);
943   return cy;
944 }
945 mp_limb_t
refmpn_sublsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)946 refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
947 {
948   return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
949 }
950 mp_limb_t
refmpn_sublsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)951 refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
952 {
953   return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
954 }
955 
956 mp_limb_signed_t
refmpn_rsblsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)957 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
958 		 mp_size_t n, unsigned int s)
959 {
960   mp_limb_signed_t cy;
961   mp_ptr tp;
962 
963   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
964   ASSERT (n >= 1);
965   ASSERT (0 < s && s < GMP_NUMB_BITS);
966   ASSERT_MPN (up, n);
967   ASSERT_MPN (vp, n);
968 
969   tp = refmpn_malloc_limbs (n);
970   cy  = mpn_lshift (tp, vp, n, s);
971   cy -= mpn_sub_n (rp, tp, up, n);
972   free (tp);
973   return cy;
974 }
975 mp_limb_signed_t
refmpn_rsblsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)976 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
977 {
978   return refmpn_rsblsh_n (rp, up, vp, n, 1);
979 }
980 mp_limb_signed_t
refmpn_rsblsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)981 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
982 {
983   return refmpn_rsblsh_n (rp, up, vp, n, 2);
984 }
985 mp_limb_signed_t
refmpn_rsblsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_signed_t carry)986 refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
987 		  mp_size_t n, unsigned int s, mp_limb_signed_t carry)
988 {
989   mp_limb_signed_t cy;
990 
991   ASSERT (carry == -1 || (carry >> s) == 0);
992 
993   cy = refmpn_rsblsh_n (rp, up, vp, n, s);
994   if (carry > 0)
995     cy += refmpn_add_1 (rp, rp, n, carry);
996   else
997     cy -= refmpn_sub_1 (rp, rp, n, -carry);
998   return cy;
999 }
1000 mp_limb_signed_t
refmpn_rsblsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_signed_t carry)1001 refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1002 {
1003   return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
1004 }
1005 mp_limb_signed_t
refmpn_rsblsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_signed_t carry)1006 refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1007 {
1008   return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
1009 }
1010 
1011 mp_limb_t
refmpn_rsh1add_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1012 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1013 {
1014   mp_limb_t cya, cys;
1015 
1016   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1017   ASSERT (n >= 1);
1018   ASSERT_MPN (up, n);
1019   ASSERT_MPN (vp, n);
1020 
1021   cya = mpn_add_n (rp, up, vp, n);
1022   cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1023   rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1024   return cys;
1025 }
1026 mp_limb_t
refmpn_rsh1sub_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1027 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1028 {
1029   mp_limb_t cya, cys;
1030 
1031   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1032   ASSERT (n >= 1);
1033   ASSERT_MPN (up, n);
1034   ASSERT_MPN (vp, n);
1035 
1036   cya = mpn_sub_n (rp, up, vp, n);
1037   cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1038   rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1039   return cys;
1040 }
1041 
1042 /* Twos complement, return borrow. */
1043 mp_limb_t
refmpn_neg(mp_ptr dst,mp_srcptr src,mp_size_t size)1044 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
1045 {
1046   mp_ptr     zeros;
1047   mp_limb_t  ret;
1048 
1049   ASSERT (size >= 1);
1050 
1051   zeros = refmpn_malloc_limbs (size);
1052   refmpn_fill (zeros, size, CNST_LIMB(0));
1053   ret = refmpn_sub_n (dst, zeros, src, size);
1054   free (zeros);
1055   return ret;
1056 }
1057 
1058 
1059 #define AORS(aors_n, aors_1)                                    \
1060   {                                                             \
1061     mp_limb_t  c;                                               \
1062     ASSERT (s1size >= s2size);                                  \
1063     ASSERT (s2size >= 1);                                       \
1064     c = aors_n (rp, s1p, s2p, s2size);                          \
1065     if (s1size-s2size != 0)                                     \
1066       c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
1067     return c;                                                   \
1068   }
1069 mp_limb_t
refmpn_add(mp_ptr rp,mp_srcptr s1p,mp_size_t s1size,mp_srcptr s2p,mp_size_t s2size)1070 refmpn_add (mp_ptr rp,
1071 	    mp_srcptr s1p, mp_size_t s1size,
1072 	    mp_srcptr s2p, mp_size_t s2size)
1073 {
1074   AORS (refmpn_add_n, refmpn_add_1);
1075 }
1076 mp_limb_t
refmpn_sub(mp_ptr rp,mp_srcptr s1p,mp_size_t s1size,mp_srcptr s2p,mp_size_t s2size)1077 refmpn_sub (mp_ptr rp,
1078 	    mp_srcptr s1p, mp_size_t s1size,
1079 	    mp_srcptr s2p, mp_size_t s2size)
1080 {
1081   AORS (refmpn_sub_n, refmpn_sub_1);
1082 }
1083 
1084 
1085 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
1086 #define SHIFTLOW(x)  ((x) >> GMP_LIMB_BITS/2)
1087 
1088 #define LOWMASK   (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
1089 #define HIGHMASK  SHIFTHIGH(LOWMASK)
1090 
1091 #define LOWPART(x)   ((x) & LOWMASK)
1092 #define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
1093 
1094 /* Set return:*lo to x*y, using full limbs not nails. */
1095 mp_limb_t
refmpn_umul_ppmm(mp_limb_t * lo,mp_limb_t x,mp_limb_t y)1096 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
1097 {
1098   mp_limb_t  hi, s;
1099 
1100   *lo = LOWPART(x) * LOWPART(y);
1101   hi = HIGHPART(x) * HIGHPART(y);
1102 
1103   s = LOWPART(x) * HIGHPART(y);
1104   hi += HIGHPART(s);
1105   s = SHIFTHIGH(LOWPART(s));
1106   *lo += s;
1107   hi += (*lo < s);
1108 
1109   s = HIGHPART(x) * LOWPART(y);
1110   hi += HIGHPART(s);
1111   s = SHIFTHIGH(LOWPART(s));
1112   *lo += s;
1113   hi += (*lo < s);
1114 
1115   return hi;
1116 }
1117 
1118 mp_limb_t
refmpn_umul_ppmm_r(mp_limb_t x,mp_limb_t y,mp_limb_t * lo)1119 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
1120 {
1121   return refmpn_umul_ppmm (lo, x, y);
1122 }
1123 
1124 mp_limb_t
refmpn_mul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1125 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
1126 	       mp_limb_t carry)
1127 {
1128   mp_size_t  i;
1129   mp_limb_t  hi, lo;
1130 
1131   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1132   ASSERT (size >= 1);
1133   ASSERT_MPN (sp, size);
1134   ASSERT_LIMB (multiplier);
1135   ASSERT_LIMB (carry);
1136 
1137   multiplier <<= GMP_NAIL_BITS;
1138   for (i = 0; i < size; i++)
1139     {
1140       hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
1141       lo >>= GMP_NAIL_BITS;
1142       ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
1143       rp[i] = lo;
1144       carry = hi;
1145     }
1146   return carry;
1147 }
1148 
1149 mp_limb_t
refmpn_mul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1150 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1151 {
1152   return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1153 }
1154 
1155 
1156 mp_limb_t
refmpn_mul_N(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_srcptr mult,mp_size_t msize)1157 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1158 	      mp_srcptr mult, mp_size_t msize)
1159 {
1160   mp_ptr     src_copy;
1161   mp_limb_t  ret;
1162   mp_size_t  i;
1163 
1164   ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
1165   ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1166   ASSERT (size >= msize);
1167   ASSERT_MPN (mult, msize);
1168 
1169   /* in case dst==src */
1170   src_copy = refmpn_malloc_limbs (size);
1171   refmpn_copyi (src_copy, src, size);
1172   src = src_copy;
1173 
1174   dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
1175   for (i = 1; i < msize-1; i++)
1176     dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1177   ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1178 
1179   free (src_copy);
1180   return ret;
1181 }
1182 
1183 mp_limb_t
refmpn_mul_2(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1184 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1185 {
1186   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
1187 }
1188 mp_limb_t
refmpn_mul_3(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1189 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1190 {
1191   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
1192 }
1193 mp_limb_t
refmpn_mul_4(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1194 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1195 {
1196   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
1197 }
1198 mp_limb_t
refmpn_mul_5(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1199 refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1200 {
1201   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
1202 }
1203 mp_limb_t
refmpn_mul_6(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1204 refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1205 {
1206   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
1207 }
1208 
1209 #define AORSMUL_1C(operation_n)                                 \
1210   {                                                             \
1211     mp_ptr     p;                                               \
1212     mp_limb_t  ret;                                             \
1213 								\
1214     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
1215 								\
1216     p = refmpn_malloc_limbs (size);                             \
1217     ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
1218     ret += operation_n (rp, rp, p, size);                       \
1219 								\
1220     free (p);                                                   \
1221     return ret;                                                 \
1222   }
1223 
1224 mp_limb_t
refmpn_addmul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1225 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1226 		  mp_limb_t multiplier, mp_limb_t carry)
1227 {
1228   AORSMUL_1C (refmpn_add_n);
1229 }
1230 mp_limb_t
refmpn_submul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1231 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1232 		  mp_limb_t multiplier, mp_limb_t carry)
1233 {
1234   AORSMUL_1C (refmpn_sub_n);
1235 }
1236 
1237 
1238 mp_limb_t
refmpn_addmul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1239 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1240 {
1241   return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1242 }
1243 mp_limb_t
refmpn_submul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1244 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1245 {
1246   return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1247 }
1248 
1249 
1250 mp_limb_t
refmpn_addmul_N(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_srcptr mult,mp_size_t msize)1251 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1252 		 mp_srcptr mult, mp_size_t msize)
1253 {
1254   mp_ptr     src_copy;
1255   mp_limb_t  ret;
1256   mp_size_t  i;
1257 
1258   ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
1259   ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1260   ASSERT (size >= msize);
1261   ASSERT_MPN (mult, msize);
1262 
1263   /* in case dst==src */
1264   src_copy = refmpn_malloc_limbs (size);
1265   refmpn_copyi (src_copy, src, size);
1266   src = src_copy;
1267 
1268   for (i = 0; i < msize-1; i++)
1269     dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1270   ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1271 
1272   free (src_copy);
1273   return ret;
1274 }
1275 
1276 mp_limb_t
refmpn_addmul_2(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1277 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1278 {
1279   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
1280 }
1281 mp_limb_t
refmpn_addmul_3(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1282 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1283 {
1284   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
1285 }
1286 mp_limb_t
refmpn_addmul_4(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1287 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1288 {
1289   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
1290 }
1291 mp_limb_t
refmpn_addmul_5(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1292 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1293 {
1294   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
1295 }
1296 mp_limb_t
refmpn_addmul_6(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1297 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1298 {
1299   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
1300 }
1301 mp_limb_t
refmpn_addmul_7(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1302 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1303 {
1304   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
1305 }
1306 mp_limb_t
refmpn_addmul_8(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1307 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1308 {
1309   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
1310 }
1311 
1312 mp_limb_t
refmpn_add_n_sub_nc(mp_ptr r1p,mp_ptr r2p,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)1313 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
1314 		  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
1315 		  mp_limb_t carry)
1316 {
1317   mp_ptr p;
1318   mp_limb_t acy, scy;
1319 
1320   /* Destinations can't overlap. */
1321   ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
1322   ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
1323   ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
1324   ASSERT (size >= 1);
1325 
1326   /* in case r1p==s1p or r1p==s2p */
1327   p = refmpn_malloc_limbs (size);
1328 
1329   acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
1330   scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
1331   refmpn_copyi (r1p, p, size);
1332 
1333   free (p);
1334   return 2 * acy + scy;
1335 }
1336 
1337 mp_limb_t
refmpn_add_n_sub_n(mp_ptr r1p,mp_ptr r2p,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)1338 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
1339 		 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1340 {
1341   return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
1342 }
1343 
1344 
1345 /* Right shift hi,lo and return the low limb of the result.
1346    Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1347 mp_limb_t
rshift_make(mp_limb_t hi,mp_limb_t lo,unsigned shift)1348 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1349 {
1350   ASSERT (shift < GMP_NUMB_BITS);
1351   if (shift == 0)
1352     return lo;
1353   else
1354     return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
1355 }
1356 
1357 /* Left shift hi,lo and return the high limb of the result.
1358    Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1359 mp_limb_t
lshift_make(mp_limb_t hi,mp_limb_t lo,unsigned shift)1360 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1361 {
1362   ASSERT (shift < GMP_NUMB_BITS);
1363   if (shift == 0)
1364     return hi;
1365   else
1366     return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
1367 }
1368 
1369 
1370 mp_limb_t
refmpn_rshift(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1371 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1372 {
1373   mp_limb_t  ret;
1374   mp_size_t  i;
1375 
1376   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1377   ASSERT (size >= 1);
1378   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1379   ASSERT_MPN (sp, size);
1380 
1381   ret = rshift_make (sp[0], CNST_LIMB(0), shift);
1382 
1383   for (i = 0; i < size-1; i++)
1384     rp[i] = rshift_make (sp[i+1], sp[i], shift);
1385 
1386   rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
1387   return ret;
1388 }
1389 
1390 mp_limb_t
refmpn_lshift(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1391 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1392 {
1393   mp_limb_t  ret;
1394   mp_size_t  i;
1395 
1396   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1397   ASSERT (size >= 1);
1398   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1399   ASSERT_MPN (sp, size);
1400 
1401   ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
1402 
1403   for (i = size-2; i >= 0; i--)
1404     rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
1405 
1406   rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
1407   return ret;
1408 }
1409 
1410 void
refmpn_com(mp_ptr rp,mp_srcptr sp,mp_size_t size)1411 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1412 {
1413   mp_size_t i;
1414 
1415   /* We work downwards since mpn_lshiftc needs that. */
1416   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1417 
1418   for (i = size - 1; i >= 0; i--)
1419     rp[i] = (~sp[i]) & GMP_NUMB_MASK;
1420 }
1421 
1422 mp_limb_t
refmpn_lshiftc(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1423 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1424 {
1425   mp_limb_t res;
1426 
1427   /* No asserts here, refmpn_lshift will assert what we need. */
1428 
1429   res = refmpn_lshift (rp, sp, size, shift);
1430   refmpn_com (rp, rp, size);
1431   return res;
1432 }
1433 
1434 /* accepting shift==0 and doing a plain copyi or copyd in that case */
1435 mp_limb_t
refmpn_rshift_or_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1436 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1437 {
1438   if (shift == 0)
1439     {
1440       refmpn_copyi (rp, sp, size);
1441       return 0;
1442     }
1443   else
1444     {
1445       return refmpn_rshift (rp, sp, size, shift);
1446     }
1447 }
1448 mp_limb_t
refmpn_lshift_or_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1449 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1450 {
1451   if (shift == 0)
1452     {
1453       refmpn_copyd (rp, sp, size);
1454       return 0;
1455     }
1456   else
1457     {
1458       return refmpn_lshift (rp, sp, size, shift);
1459     }
1460 }
1461 
1462 /* accepting size==0 too */
1463 mp_limb_t
refmpn_rshift_or_copy_any(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1464 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1465 			   unsigned shift)
1466 {
1467   return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
1468 }
1469 mp_limb_t
refmpn_lshift_or_copy_any(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1470 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1471 			   unsigned shift)
1472 {
1473   return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
1474 }
1475 
1476 /* Divide h,l by d, return quotient, store remainder to *rp.
1477    Operates on full limbs, not nails.
1478    Must have h < d.
1479    __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
1480 mp_limb_t
refmpn_udiv_qrnnd(mp_limb_t * rp,mp_limb_t h,mp_limb_t l,mp_limb_t d)1481 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
1482 {
1483   mp_limb_t  q, r;
1484   int  n;
1485 
1486   ASSERT (d != 0);
1487   ASSERT (h < d);
1488 
1489 #if 0
1490   udiv_qrnnd (q, r, h, l, d);
1491   *rp = r;
1492   return q;
1493 #endif
1494 
1495   n = refmpn_count_leading_zeros (d);
1496   d <<= n;
1497 
1498   if (n != 0)
1499     {
1500       h = (h << n) | (l >> (GMP_LIMB_BITS - n));
1501       l <<= n;
1502     }
1503 
1504   __udiv_qrnnd_c (q, r, h, l, d);
1505   r >>= n;
1506   *rp = r;
1507   return q;
1508 }
1509 
1510 mp_limb_t
refmpn_udiv_qrnnd_r(mp_limb_t h,mp_limb_t l,mp_limb_t d,mp_limb_t * rp)1511 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
1512 {
1513   return refmpn_udiv_qrnnd (rp, h, l, d);
1514 }
1515 
1516 /* This little subroutine avoids some bad code generation from i386 gcc 3.0
1517    -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
1518 static mp_limb_t
refmpn_divmod_1c_workaround(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1519 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1520 			     mp_limb_t divisor, mp_limb_t carry)
1521 {
1522   mp_size_t  i;
1523   mp_limb_t rem[1];
1524   for (i = size-1; i >= 0; i--)
1525     {
1526       rp[i] = refmpn_udiv_qrnnd (rem, carry,
1527 				 sp[i] << GMP_NAIL_BITS,
1528 				 divisor << GMP_NAIL_BITS);
1529       carry = *rem >> GMP_NAIL_BITS;
1530     }
1531   return carry;
1532 }
1533 
1534 mp_limb_t
refmpn_divmod_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1535 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1536 		  mp_limb_t divisor, mp_limb_t carry)
1537 {
1538   mp_ptr     sp_orig;
1539   mp_ptr     prod;
1540   mp_limb_t  carry_orig;
1541 
1542   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1543   ASSERT (size >= 0);
1544   ASSERT (carry < divisor);
1545   ASSERT_MPN (sp, size);
1546   ASSERT_LIMB (divisor);
1547   ASSERT_LIMB (carry);
1548 
1549   if (size == 0)
1550     return carry;
1551 
1552   sp_orig = refmpn_memdup_limbs (sp, size);
1553   prod = refmpn_malloc_limbs (size);
1554   carry_orig = carry;
1555 
1556   carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
1557 
1558   /* check by multiplying back */
1559 #if 0
1560   printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
1561 	  size, divisor, carry_orig, carry);
1562   mpn_trace("s",sp_copy,size);
1563   mpn_trace("r",rp,size);
1564   printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
1565   mpn_trace("p",prod,size);
1566 #endif
1567   ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
1568   ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
1569   free (sp_orig);
1570   free (prod);
1571 
1572   return carry;
1573 }
1574 
1575 mp_limb_t
refmpn_divmod_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1576 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1577 {
1578   return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
1579 }
1580 
1581 
1582 mp_limb_t
refmpn_mod_1c(mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1583 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1584 	       mp_limb_t carry)
1585 {
1586   mp_ptr  p = refmpn_malloc_limbs (size);
1587   carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
1588   free (p);
1589   return carry;
1590 }
1591 
1592 mp_limb_t
refmpn_mod_1(mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1593 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1594 {
1595   return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
1596 }
1597 
1598 mp_limb_t
refmpn_preinv_mod_1(mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t inverse)1599 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1600 		     mp_limb_t inverse)
1601 {
1602   ASSERT (divisor & GMP_NUMB_HIGHBIT);
1603   ASSERT (inverse == refmpn_invert_limb (divisor));
1604   return refmpn_mod_1 (sp, size, divisor);
1605 }
1606 
1607 /* This implementation will be rather slow, but has the advantage of being
1608    in a different style than the libgmp versions.  */
1609 mp_limb_t
refmpn_mod_34lsub1(mp_srcptr p,mp_size_t n)1610 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
1611 {
1612   ASSERT ((GMP_NUMB_BITS % 4) == 0);
1613   return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
1614 }
1615 
1616 
1617 mp_limb_t
refmpn_divrem_1c(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1618 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
1619 		  mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1620 		  mp_limb_t carry)
1621 {
1622   mp_ptr  z;
1623 
1624   z = refmpn_malloc_limbs (xsize);
1625   refmpn_fill (z, xsize, CNST_LIMB(0));
1626 
1627   carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
1628   carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
1629 
1630   free (z);
1631   return carry;
1632 }
1633 
1634 mp_limb_t
refmpn_divrem_1(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1635 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
1636 		 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1637 {
1638   return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
1639 }
1640 
1641 mp_limb_t
refmpn_preinv_divrem_1(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t inverse,unsigned shift)1642 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
1643 			mp_srcptr sp, mp_size_t size,
1644 			mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
1645 {
1646   ASSERT (size >= 0);
1647   ASSERT (shift == refmpn_count_leading_zeros (divisor));
1648   ASSERT (inverse == refmpn_invert_limb (divisor << shift));
1649 
1650   return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
1651 }
1652 
1653 mp_limb_t
refmpn_divrem_2(mp_ptr qp,mp_size_t qxn,mp_ptr np,mp_size_t nn,mp_srcptr dp)1654 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
1655 		 mp_ptr np, mp_size_t nn,
1656 		 mp_srcptr dp)
1657 {
1658   mp_ptr tp;
1659   mp_limb_t qh;
1660 
1661   tp = refmpn_malloc_limbs (nn + qxn);
1662   refmpn_zero (tp, qxn);
1663   refmpn_copyi (tp + qxn, np, nn);
1664   qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
1665   refmpn_copyi (np, tp, 2);
1666   free (tp);
1667   return qh;
1668 }
1669 
1670 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1671    paper, figure 8.1 m', where b=2^GMP_LIMB_BITS.  Note that -d-1 < d
1672    since d has the high bit set. */
1673 
1674 mp_limb_t
refmpn_invert_limb(mp_limb_t d)1675 refmpn_invert_limb (mp_limb_t d)
1676 {
1677   mp_limb_t r;
1678   ASSERT (d & GMP_LIMB_HIGHBIT);
1679   return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
1680 }
1681 
1682 void
refmpn_invert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)1683 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1684 {
1685   mp_ptr qp, tp;
1686   TMP_DECL;
1687   TMP_MARK;
1688 
1689   tp = TMP_ALLOC_LIMBS (2 * n);
1690   qp = TMP_ALLOC_LIMBS (n + 1);
1691 
1692   MPN_ZERO (tp, 2 * n);  mpn_sub_1 (tp, tp, 2 * n, 1);
1693 
1694   refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
1695   refmpn_copyi (rp, qp, n);
1696 
1697   TMP_FREE;
1698 }
1699 
1700 void
refmpn_binvert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)1701 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1702 {
1703   mp_ptr tp;
1704   mp_limb_t binv;
1705   TMP_DECL;
1706   TMP_MARK;
1707 
1708   /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
1709      code.  To make up for it, we check that the inverse is correct using a
1710      multiply.  */
1711 
1712   tp = TMP_ALLOC_LIMBS (2 * n);
1713 
1714   MPN_ZERO (tp, n);
1715   tp[0] = 1;
1716   binvert_limb (binv, up[0]);
1717   mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
1718 
1719   refmpn_mul_n (tp, rp, up, n);
1720   ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
1721 
1722   TMP_FREE;
1723 }
1724 
1725 /* The aim is to produce a dst quotient and return a remainder c, satisfying
1726    c*b^n + src-i == 3*dst, where i is the incoming carry.
1727 
1728    Some value c==0, c==1 or c==2 will satisfy, so just try each.
1729 
1730    If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1731    remainder from the first division attempt determines the correct
1732    remainder (3-c), but don't bother with that, since we can't guarantee
1733    anything about GMP_NUMB_BITS when using nails.
1734 
1735    If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1736    complement negative, ie. b^n+a-i, and the calculation produces c1
1737    satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
1738    means it's enough to just add any borrow back at the end.
1739 
1740    A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1741    mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1742    or 1 respectively, so with 1 added the final return value is still in the
1743    prescribed range 0 to 2. */
1744 
1745 mp_limb_t
refmpn_divexact_by3c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t carry)1746 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1747 {
1748   mp_ptr     spcopy;
1749   mp_limb_t  c, cs;
1750 
1751   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1752   ASSERT (size >= 1);
1753   ASSERT (carry <= 2);
1754   ASSERT_MPN (sp, size);
1755 
1756   spcopy = refmpn_malloc_limbs (size);
1757   cs = refmpn_sub_1 (spcopy, sp, size, carry);
1758 
1759   for (c = 0; c <= 2; c++)
1760     if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
1761       goto done;
1762   ASSERT_FAIL (no value of c satisfies);
1763 
1764  done:
1765   c += cs;
1766   ASSERT (c <= 2);
1767 
1768   free (spcopy);
1769   return c;
1770 }
1771 
1772 mp_limb_t
refmpn_divexact_by3(mp_ptr rp,mp_srcptr sp,mp_size_t size)1773 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1774 {
1775   return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1776 }
1777 
1778 
1779 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1780 void
refmpn_mul_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)1781 refmpn_mul_basecase (mp_ptr prodp,
1782 		     mp_srcptr up, mp_size_t usize,
1783 		     mp_srcptr vp, mp_size_t vsize)
1784 {
1785   mp_size_t i;
1786 
1787   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1788   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1789   ASSERT (usize >= vsize);
1790   ASSERT (vsize >= 1);
1791   ASSERT_MPN (up, usize);
1792   ASSERT_MPN (vp, vsize);
1793 
1794   prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1795   for (i = 1; i < vsize; i++)
1796     prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1797 }
1798 
1799 
1800 /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
1801 void
refmpn_mulmid_basecase(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1802 refmpn_mulmid_basecase (mp_ptr rp,
1803 			mp_srcptr up, mp_size_t un,
1804 			mp_srcptr vp, mp_size_t vn)
1805 {
1806   mp_limb_t cy;
1807   mp_size_t i;
1808 
1809   ASSERT (un >= vn);
1810   ASSERT (vn >= 1);
1811   ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
1812   ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
1813   ASSERT_MPN (up, un);
1814   ASSERT_MPN (vp, vn);
1815 
1816   rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
1817   rp[un - vn + 2] = CNST_LIMB (0);
1818   for (i = 1; i < vn; i++)
1819     {
1820       cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
1821       cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
1822       cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
1823       ASSERT (cy == 0);
1824     }
1825 }
1826 
1827 void
refmpn_toom42_mulmid(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_ptr scratch)1828 refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
1829 		      mp_ptr scratch)
1830 {
1831   refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1832 }
1833 
1834 void
refmpn_mulmid_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1835 refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1836 {
1837   /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1838      off products near the middle product region boundary */
1839   refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1840 }
1841 
1842 void
refmpn_mulmid(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1843 refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
1844 	       mp_srcptr vp, mp_size_t vn)
1845 {
1846   /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1847      off products near the middle product region boundary */
1848   refmpn_mulmid_basecase (rp, up, un, vp, vn);
1849 }
1850 
1851 
1852 
1853 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
1854 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
1855 #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
1856 #if WANT_FFT
1857 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
1858 #else
1859 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
1860 #endif
1861 
1862 void
refmpn_mul(mp_ptr wp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1863 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
1864 {
1865   mp_ptr tp, rp;
1866   mp_size_t tn;
1867 
1868   if (vn < TOOM3_THRESHOLD)
1869     {
1870       /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */
1871       if (vn != 0)
1872 	refmpn_mul_basecase (wp, up, un, vp, vn);
1873       else
1874 	MPN_ZERO (wp, un);
1875       return;
1876     }
1877 
1878   MPN_ZERO (wp, vn);
1879   rp = refmpn_malloc_limbs (2 * vn);
1880 
1881   if (vn < TOOM4_THRESHOLD)
1882     tn = mpn_toom22_mul_itch (vn, vn);
1883   else if (vn < TOOM6_THRESHOLD)
1884     tn = mpn_toom33_mul_itch (vn, vn);
1885   else if (vn < FFT_THRESHOLD)
1886     tn = mpn_toom44_mul_itch (vn, vn);
1887   else
1888     tn = mpn_toom6h_mul_itch (vn, vn);
1889   tp = refmpn_malloc_limbs (tn);
1890 
1891   while (un >= vn)
1892     {
1893       if (vn < TOOM4_THRESHOLD)
1894 	/* In the toom3 range, use mpn_toom22_mul.  */
1895 	mpn_toom22_mul (rp, up, vn, vp, vn, tp);
1896       else if (vn < TOOM6_THRESHOLD)
1897 	/* In the toom4 range, use mpn_toom33_mul.  */
1898 	mpn_toom33_mul (rp, up, vn, vp, vn, tp);
1899       else if (vn < FFT_THRESHOLD)
1900 	/* In the toom6 range, use mpn_toom44_mul.  */
1901 	mpn_toom44_mul (rp, up, vn, vp, vn, tp);
1902       else
1903 	/* For the largest operands, use mpn_toom6h_mul.  */
1904 	mpn_toom6h_mul (rp, up, vn, vp, vn, tp);
1905 
1906       ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn));
1907       wp += vn;
1908 
1909       up += vn;
1910       un -= vn;
1911     }
1912 
1913   free (tp);
1914 
1915   if (un != 0)
1916     {
1917       refmpn_mul (rp, vp, vn, up, un);
1918       ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn));
1919     }
1920   free (rp);
1921 }
1922 
1923 void
refmpn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)1924 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1925 {
1926   refmpn_mul (prodp, up, size, vp, size);
1927 }
1928 
1929 void
refmpn_mullo_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)1930 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1931 {
1932   mp_ptr tp = refmpn_malloc_limbs (2*size);
1933   refmpn_mul (tp, up, size, vp, size);
1934   refmpn_copyi (prodp, tp, size);
1935   free (tp);
1936 }
1937 
1938 void
refmpn_sqr(mp_ptr dst,mp_srcptr src,mp_size_t size)1939 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1940 {
1941   refmpn_mul (dst, src, size, src, size);
1942 }
1943 
1944 void
refmpn_sqrlo(mp_ptr dst,mp_srcptr src,mp_size_t size)1945 refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size)
1946 {
1947   refmpn_mullo_n (dst, src, src, size);
1948 }
1949 
1950 /* Allowing usize<vsize, usize==0 or vsize==0. */
1951 void
refmpn_mul_any(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)1952 refmpn_mul_any (mp_ptr prodp,
1953 		     mp_srcptr up, mp_size_t usize,
1954 		     mp_srcptr vp, mp_size_t vsize)
1955 {
1956   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1957   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1958   ASSERT (usize >= 0);
1959   ASSERT (vsize >= 0);
1960   ASSERT_MPN (up, usize);
1961   ASSERT_MPN (vp, vsize);
1962 
1963   if (usize == 0)
1964     {
1965       refmpn_fill (prodp, vsize, CNST_LIMB(0));
1966       return;
1967     }
1968 
1969   if (vsize == 0)
1970     {
1971       refmpn_fill (prodp, usize, CNST_LIMB(0));
1972       return;
1973     }
1974 
1975   if (usize >= vsize)
1976     refmpn_mul (prodp, up, usize, vp, vsize);
1977   else
1978     refmpn_mul (prodp, vp, vsize, up, usize);
1979 }
1980 
1981 
1982 mp_limb_t
refmpn_gcd_11(mp_limb_t x,mp_limb_t y)1983 refmpn_gcd_11 (mp_limb_t x, mp_limb_t y)
1984 {
1985   /* The non-ref function also requires input operands to be odd, but
1986      below refmpn_gcd_1 doesn't guarantee that. */
1987   ASSERT (x > 0);
1988   ASSERT (y > 0);
1989   do
1990     {
1991       while ((x & 1) == 0)  x >>= 1;
1992       while ((y & 1) == 0)  y >>= 1;
1993 
1994       if (x < y)
1995 	MP_LIMB_T_SWAP (x, y);
1996 
1997       x -= y;
1998     }
1999   while (x != 0);
2000 
2001   return y;
2002 }
2003 
2004 mp_double_limb_t
refmpn_gcd_22(mp_limb_t x1,mp_limb_t x0,mp_limb_t y1,mp_limb_t y0)2005 refmpn_gcd_22 (mp_limb_t x1, mp_limb_t x0, mp_limb_t y1, mp_limb_t y0)
2006 {
2007   mp_double_limb_t g;
2008   mp_limb_t cy;
2009   ASSERT ((x0 & 1) != 0);
2010   ASSERT ((y0 & 1) != 0);
2011 
2012   do
2013     {
2014       while ((x0 & 1) == 0)
2015 	{
2016 	  x0 = (x1 << (GMP_NUMB_BITS - 1)) | (x0 >> 1);
2017 	  x1 >>= 1;
2018 	}
2019       while ((y0 & 1) == 0)
2020 	{
2021 	  y0 = (y1 << (GMP_NUMB_BITS - 1)) | (y0 >> 1);
2022 	  y1 >>= 1;
2023 	}
2024 
2025 
2026       if (x1 < y1 || (x1 == y1 && x0 < y0))
2027 	{
2028 	  mp_limb_t t;
2029 	  t = x1; x1 = y1; y1 = t;
2030 	  t = x0; x0 = y0; y0 = t;
2031 	}
2032 
2033       cy = (x0 < y0);
2034       x0 -= y0;
2035       x1 -= y1 + cy;
2036     }
2037   while ((x1 | x0) != 0);
2038 
2039   g.d1 = y1;
2040   g.d0 = y0;
2041   return g;
2042 }
2043 
2044 mp_limb_t
refmpn_gcd_1(mp_srcptr xp,mp_size_t xsize,mp_limb_t y)2045 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
2046 {
2047   mp_limb_t  x;
2048   int  twos;
2049 
2050   ASSERT (y != 0);
2051   ASSERT (! refmpn_zero_p (xp, xsize));
2052   ASSERT_MPN (xp, xsize);
2053   ASSERT_LIMB (y);
2054 
2055   x = refmpn_mod_1 (xp, xsize, y);
2056   if (x == 0)
2057     return y;
2058 
2059   twos = 0;
2060   while ((x & 1) == 0 && (y & 1) == 0)
2061     {
2062       x >>= 1;
2063       y >>= 1;
2064       twos++;
2065     }
2066 
2067   return refmpn_gcd_11 (x, y) << twos;
2068 }
2069 
2070 
2071 /* Based on the full limb x, not nails. */
2072 unsigned
refmpn_count_leading_zeros(mp_limb_t x)2073 refmpn_count_leading_zeros (mp_limb_t x)
2074 {
2075   unsigned  n = 0;
2076 
2077   ASSERT (x != 0);
2078 
2079   while ((x & GMP_LIMB_HIGHBIT) == 0)
2080     {
2081       x <<= 1;
2082       n++;
2083     }
2084   return n;
2085 }
2086 
2087 /* Full limbs allowed, not limited to nails. */
2088 unsigned
refmpn_count_trailing_zeros(mp_limb_t x)2089 refmpn_count_trailing_zeros (mp_limb_t x)
2090 {
2091   unsigned  n = 0;
2092 
2093   ASSERT (x != 0);
2094   ASSERT_LIMB (x);
2095 
2096   while ((x & 1) == 0)
2097     {
2098       x >>= 1;
2099       n++;
2100     }
2101   return n;
2102 }
2103 
2104 /* Strip factors of two (low zero bits) from {p,size} by right shifting.
2105    The return value is the number of twos stripped.  */
2106 mp_size_t
refmpn_strip_twos(mp_ptr p,mp_size_t size)2107 refmpn_strip_twos (mp_ptr p, mp_size_t size)
2108 {
2109   mp_size_t  limbs;
2110   unsigned   shift;
2111 
2112   ASSERT (size >= 1);
2113   ASSERT (! refmpn_zero_p (p, size));
2114   ASSERT_MPN (p, size);
2115 
2116   for (limbs = 0; p[0] == 0; limbs++)
2117     {
2118       refmpn_copyi (p, p+1, size-1);
2119       p[size-1] = 0;
2120     }
2121 
2122   shift = refmpn_count_trailing_zeros (p[0]);
2123   if (shift)
2124     refmpn_rshift (p, p, size, shift);
2125 
2126   return limbs*GMP_NUMB_BITS + shift;
2127 }
2128 
2129 mp_limb_t
refmpn_gcd(mp_ptr gp,mp_ptr xp,mp_size_t xsize,mp_ptr yp,mp_size_t ysize)2130 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
2131 {
2132   int       cmp;
2133 
2134   ASSERT (ysize >= 1);
2135   ASSERT (xsize >= ysize);
2136   ASSERT ((xp[0] & 1) != 0);
2137   ASSERT ((yp[0] & 1) != 0);
2138   /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
2139   ASSERT (yp[ysize-1] != 0);
2140   ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
2141   ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
2142   ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
2143   if (xsize == ysize)
2144     ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
2145   ASSERT_MPN (xp, xsize);
2146   ASSERT_MPN (yp, ysize);
2147 
2148   refmpn_strip_twos (xp, xsize);
2149   MPN_NORMALIZE (xp, xsize);
2150   MPN_NORMALIZE (yp, ysize);
2151 
2152   for (;;)
2153     {
2154       cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
2155       if (cmp == 0)
2156 	break;
2157       if (cmp < 0)
2158 	MPN_PTR_SWAP (xp,xsize, yp,ysize);
2159 
2160       ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
2161 
2162       refmpn_strip_twos (xp, xsize);
2163       MPN_NORMALIZE (xp, xsize);
2164     }
2165 
2166   refmpn_copyi (gp, xp, xsize);
2167   return xsize;
2168 }
2169 
2170 unsigned long
ref_popc_limb(mp_limb_t src)2171 ref_popc_limb (mp_limb_t src)
2172 {
2173   unsigned long  count;
2174   int  i;
2175 
2176   count = 0;
2177   for (i = 0; i < GMP_LIMB_BITS; i++)
2178     {
2179       count += (src & 1);
2180       src >>= 1;
2181     }
2182   return count;
2183 }
2184 
2185 unsigned long
refmpn_popcount(mp_srcptr sp,mp_size_t size)2186 refmpn_popcount (mp_srcptr sp, mp_size_t size)
2187 {
2188   unsigned long  count = 0;
2189   mp_size_t  i;
2190 
2191   ASSERT (size >= 0);
2192   ASSERT_MPN (sp, size);
2193 
2194   for (i = 0; i < size; i++)
2195     count += ref_popc_limb (sp[i]);
2196   return count;
2197 }
2198 
2199 unsigned long
refmpn_hamdist(mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)2200 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
2201 {
2202   mp_ptr  d;
2203   unsigned long  count;
2204 
2205   ASSERT (size >= 0);
2206   ASSERT_MPN (s1p, size);
2207   ASSERT_MPN (s2p, size);
2208 
2209   if (size == 0)
2210     return 0;
2211 
2212   d = refmpn_malloc_limbs (size);
2213   refmpn_xor_n (d, s1p, s2p, size);
2214   count = refmpn_popcount (d, size);
2215   free (d);
2216   return count;
2217 }
2218 
2219 
2220 /* set r to a%d */
2221 void
refmpn_mod2(mp_limb_t r[2],const mp_limb_t a[2],const mp_limb_t d[2])2222 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
2223 {
2224   mp_limb_t  D[2];
2225   int        n;
2226 
2227   ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
2228   ASSERT_MPN (a, 2);
2229   ASSERT_MPN (d, 2);
2230 
2231   D[1] = d[1], D[0] = d[0];
2232   r[1] = a[1], r[0] = a[0];
2233   n = 0;
2234 
2235   for (;;)
2236     {
2237       if (D[1] & GMP_NUMB_HIGHBIT)
2238 	break;
2239       if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
2240 	break;
2241       refmpn_lshift (D, D, (mp_size_t) 2, 1);
2242       n++;
2243       ASSERT (n <= GMP_NUMB_BITS);
2244     }
2245 
2246   while (n >= 0)
2247     {
2248       if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
2249 	ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
2250       refmpn_rshift (D, D, (mp_size_t) 2, 1);
2251       n--;
2252     }
2253 
2254   ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
2255 }
2256 
2257 
2258 
2259 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2260    particular the trial quotient is allowed to be 2 too big. */
2261 mp_limb_t
refmpn_sb_div_qr(mp_ptr qp,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)2262 refmpn_sb_div_qr (mp_ptr qp,
2263 		  mp_ptr np, mp_size_t nsize,
2264 		  mp_srcptr dp, mp_size_t dsize)
2265 {
2266   mp_limb_t  retval = 0;
2267   mp_size_t  i;
2268   mp_limb_t  d1 = dp[dsize-1];
2269   mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
2270 
2271   ASSERT (nsize >= dsize);
2272   /* ASSERT (dsize > 2); */
2273   ASSERT (dsize >= 2);
2274   ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
2275   ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
2276   ASSERT_MPN (np, nsize);
2277   ASSERT_MPN (dp, dsize);
2278 
2279   i = nsize-dsize;
2280   if (refmpn_cmp (np+i, dp, dsize) >= 0)
2281     {
2282       ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
2283       retval = 1;
2284     }
2285 
2286   for (i--; i >= 0; i--)
2287     {
2288       mp_limb_t  n0 = np[i+dsize];
2289       mp_limb_t  n1 = np[i+dsize-1];
2290       mp_limb_t  q, dummy_r;
2291 
2292       ASSERT (n0 <= d1);
2293       if (n0 == d1)
2294 	q = GMP_NUMB_MAX;
2295       else
2296 	q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
2297 			       d1 << GMP_NAIL_BITS);
2298 
2299       n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
2300       ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
2301       if (n0)
2302 	{
2303 	  q--;
2304 	  if (! refmpn_add_n (np+i, np+i, dp, dsize))
2305 	    {
2306 	      q--;
2307 	      ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
2308 	    }
2309 	}
2310       np[i+dsize] = 0;
2311 
2312       qp[i] = q;
2313     }
2314 
2315   /* remainder < divisor */
2316 #if 0		/* ASSERT triggers gcc 4.2.1 bug */
2317   ASSERT (refmpn_cmp (np, dp, dsize) < 0);
2318 #endif
2319 
2320   /* multiply back to original */
2321   {
2322     mp_ptr  mp = refmpn_malloc_limbs (nsize);
2323 
2324     refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
2325     if (retval)
2326       ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
2327     ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
2328     ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
2329 
2330     free (mp);
2331   }
2332 
2333   free (np_orig);
2334   return retval;
2335 }
2336 
2337 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2338    particular the trial quotient is allowed to be 2 too big. */
2339 void
refmpn_tdiv_qr(mp_ptr qp,mp_ptr rp,mp_size_t qxn,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)2340 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
2341 		mp_ptr np, mp_size_t nsize,
2342 		mp_srcptr dp, mp_size_t dsize)
2343 {
2344   ASSERT (qxn == 0);
2345   ASSERT_MPN (np, nsize);
2346   ASSERT_MPN (dp, dsize);
2347   ASSERT (dsize > 0);
2348   ASSERT (dp[dsize-1] != 0);
2349 
2350   if (dsize == 1)
2351     {
2352       rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
2353       return;
2354     }
2355   else
2356     {
2357       mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
2358       mp_ptr  d2p = refmpn_malloc_limbs (dsize);
2359       int     norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
2360 
2361       n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
2362       ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
2363 
2364       refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
2365       refmpn_rshift_or_copy (rp, n2p, dsize, norm);
2366 
2367       /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
2368       free (n2p);
2369       free (d2p);
2370     }
2371 }
2372 
2373 mp_limb_t
refmpn_redc_1(mp_ptr rp,mp_ptr up,mp_srcptr mp,mp_size_t n,mp_limb_t invm)2374 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
2375 {
2376   mp_size_t j;
2377   mp_limb_t cy;
2378 
2379   ASSERT_MPN (up, 2*n);
2380   /* ASSERT about directed overlap rp, up */
2381   /* ASSERT about overlap rp, mp */
2382   /* ASSERT about overlap up, mp */
2383 
2384   for (j = n - 1; j >= 0; j--)
2385     {
2386       up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
2387       up++;
2388     }
2389   cy = mpn_add_n (rp, up, up - n, n);
2390   return cy;
2391 }
2392 
2393 size_t
refmpn_get_str(unsigned char * dst,int base,mp_ptr src,mp_size_t size)2394 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
2395 {
2396   unsigned char  *d;
2397   size_t  dsize;
2398 
2399   ASSERT (size >= 0);
2400   ASSERT (base >= 2);
2401   ASSERT (base < numberof (mp_bases));
2402   ASSERT (size == 0 || src[size-1] != 0);
2403   ASSERT_MPN (src, size);
2404 
2405   MPN_SIZEINBASE (dsize, src, size, base);
2406   ASSERT (dsize >= 1);
2407   ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES));
2408 
2409   if (size == 0)
2410     {
2411       dst[0] = 0;
2412       return 1;
2413     }
2414 
2415   /* don't clobber input for power of 2 bases */
2416   if (POW2_P (base))
2417     src = refmpn_memdup_limbs (src, size);
2418 
2419   d = dst + dsize;
2420   do
2421     {
2422       d--;
2423       ASSERT (d >= dst);
2424       *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
2425       size -= (src[size-1] == 0);
2426     }
2427   while (size != 0);
2428 
2429   /* Move result back and decrement dsize if we didn't generate
2430      the maximum possible digits.  */
2431   if (d != dst)
2432     {
2433       size_t i;
2434       dsize -= d - dst;
2435       for (i = 0; i < dsize; i++)
2436 	dst[i] = d[i];
2437     }
2438 
2439   if (POW2_P (base))
2440     free (src);
2441 
2442   return dsize;
2443 }
2444 
2445 
2446 mp_limb_t
ref_bswap_limb(mp_limb_t src)2447 ref_bswap_limb (mp_limb_t src)
2448 {
2449   mp_limb_t  dst;
2450   int        i;
2451 
2452   dst = 0;
2453   for (i = 0; i < GMP_LIMB_BYTES; i++)
2454     {
2455       dst = (dst << 8) + (src & 0xFF);
2456       src >>= 8;
2457     }
2458   return dst;
2459 }
2460 
2461 
2462 /* These random functions are mostly for transitional purposes while adding
2463    nail support, since they're independent of the normal mpn routines.  They
2464    can probably be removed when those normal routines are reliable, though
2465    perhaps something independent would still be useful at times.  */
2466 
2467 #if GMP_LIMB_BITS == 32
2468 #define RAND_A  CNST_LIMB(0x29CF535)
2469 #endif
2470 #if GMP_LIMB_BITS == 64
2471 #define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
2472 #endif
2473 
2474 mp_limb_t  refmpn_random_seed;
2475 
2476 mp_limb_t
refmpn_random_half(void)2477 refmpn_random_half (void)
2478 {
2479   refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
2480   return (refmpn_random_seed >> GMP_LIMB_BITS/2);
2481 }
2482 
2483 mp_limb_t
refmpn_random_limb(void)2484 refmpn_random_limb (void)
2485 {
2486   return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
2487 	   | refmpn_random_half ()) & GMP_NUMB_MASK;
2488 }
2489 
2490 void
refmpn_random(mp_ptr ptr,mp_size_t size)2491 refmpn_random (mp_ptr ptr, mp_size_t size)
2492 {
2493   mp_size_t  i;
2494   if (GMP_NAIL_BITS == 0)
2495     {
2496       mpn_random (ptr, size);
2497       return;
2498     }
2499 
2500   for (i = 0; i < size; i++)
2501     ptr[i] = refmpn_random_limb ();
2502 }
2503 
2504 void
refmpn_random2(mp_ptr ptr,mp_size_t size)2505 refmpn_random2 (mp_ptr ptr, mp_size_t size)
2506 {
2507   mp_size_t  i;
2508   mp_limb_t  bit, mask, limb;
2509   int        run;
2510 
2511   if (GMP_NAIL_BITS == 0)
2512     {
2513       mpn_random2 (ptr, size);
2514       return;
2515     }
2516 
2517 #define RUN_MODULUS  32
2518 
2519   /* start with ones at a random pos in the high limb */
2520   bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
2521   mask = 0;
2522   run = 0;
2523 
2524   for (i = size-1; i >= 0; i--)
2525     {
2526       limb = 0;
2527       do
2528 	{
2529 	  if (run == 0)
2530 	    {
2531 	      run = (refmpn_random_half () % RUN_MODULUS) + 1;
2532 	      mask = ~mask;
2533 	    }
2534 
2535 	  limb |= (bit & mask);
2536 	  bit >>= 1;
2537 	  run--;
2538 	}
2539       while (bit != 0);
2540 
2541       ptr[i] = limb;
2542       bit = GMP_NUMB_HIGHBIT;
2543     }
2544 }
2545 
2546 /* This is a simple bitwise algorithm working high to low across "s" and
2547    testing each time whether setting the bit would make s^2 exceed n.  */
2548 mp_size_t
refmpn_sqrtrem(mp_ptr sp,mp_ptr rp,mp_srcptr np,mp_size_t nsize)2549 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
2550 {
2551   mp_ptr     tp, dp;
2552   mp_size_t  ssize, talloc, tsize, dsize, ret, ilimbs;
2553   unsigned   ibit;
2554   long       i;
2555   mp_limb_t  c;
2556 
2557   ASSERT (nsize >= 0);
2558 
2559   /* If n==0, then s=0 and r=0.  */
2560   if (nsize == 0)
2561     return 0;
2562 
2563   ASSERT (np[nsize - 1] != 0);
2564   ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
2565   ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
2566   ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
2567 
2568   /* root */
2569   ssize = (nsize+1)/2;
2570   refmpn_zero (sp, ssize);
2571 
2572   /* the remainder so far */
2573   dp = refmpn_memdup_limbs (np, nsize);
2574   dsize = nsize;
2575 
2576   /* temporary */
2577   talloc = 2*ssize + 1;
2578   tp = refmpn_malloc_limbs (talloc);
2579 
2580   for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
2581     {
2582       /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
2583 	 is added to it */
2584 
2585       ilimbs = (i+1) / GMP_NUMB_BITS;
2586       ibit = (i+1) % GMP_NUMB_BITS;
2587       refmpn_zero (tp, ilimbs);
2588       c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
2589       tsize = ilimbs + ssize;
2590       tp[tsize] = c;
2591       tsize += (c != 0);
2592 
2593       ilimbs = (2*i) / GMP_NUMB_BITS;
2594       ibit = (2*i) % GMP_NUMB_BITS;
2595       if (ilimbs + 1 > tsize)
2596 	{
2597 	  refmpn_zero_extend (tp, tsize, ilimbs + 1);
2598 	  tsize = ilimbs + 1;
2599 	}
2600       c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
2601 			CNST_LIMB(1) << ibit);
2602       ASSERT (tsize < talloc);
2603       tp[tsize] = c;
2604       tsize += (c != 0);
2605 
2606       if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
2607 	{
2608 	  /* set this bit in s and subtract from the remainder */
2609 	  refmpn_setbit (sp, i);
2610 
2611 	  ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
2612 	  dsize = refmpn_normalize (dp, dsize);
2613 	}
2614     }
2615 
2616   if (rp == NULL)
2617     {
2618       ret = ! refmpn_zero_p (dp, dsize);
2619     }
2620   else
2621     {
2622       ASSERT (dsize == 0 || dp[dsize-1] != 0);
2623       refmpn_copy (rp, dp, dsize);
2624       ret = dsize;
2625     }
2626 
2627   free (dp);
2628   free (tp);
2629   return ret;
2630 }
2631