1 /* Reference mpn functions, designed to be simple, portable and independent
2    of the normal gmp code.  Speed isn't a consideration.
3 
4 Copyright 1996-2009, 2011-2013 Free Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library test suite.
7 
8 The GNU MP Library test suite is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 3 of the License,
11 or (at your option) any later version.
12 
13 The GNU MP Library test suite is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
16 Public License for more details.
17 
18 You should have received a copy of the GNU General Public License along with
19 the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
20 
21 
22 /* Most routines have assertions representing what the mpn routines are
23    supposed to accept.  Many of these reference routines do sensible things
24    outside these ranges (eg. for size==0), but the assertions are present to
25    pick up bad parameters passed here that are about to be passed the same
26    to a real mpn routine being compared.  */
27 
28 /* always do assertion checking */
29 #define WANT_ASSERT  1
30 
31 #include <stdio.h>  /* for NULL */
32 #include <stdlib.h> /* for malloc */
33 
34 #include "gmp.h"
35 #include "gmp-impl.h"
36 #include "longlong.h"
37 
38 #include "tests.h"
39 
40 
41 
42 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
43    in bytes. */
44 int
byte_overlap_p(const void * v_xp,mp_size_t xsize,const void * v_yp,mp_size_t ysize)45 byte_overlap_p (const void *v_xp, mp_size_t xsize,
46 		const void *v_yp, mp_size_t ysize)
47 {
48   const char *xp = (const char *) v_xp;
49   const char *yp = (const char *) v_yp;
50 
51   ASSERT (xsize >= 0);
52   ASSERT (ysize >= 0);
53 
54   /* no wraparounds */
55   ASSERT (xp+xsize >= xp);
56   ASSERT (yp+ysize >= yp);
57 
58   if (xp + xsize <= yp)
59     return 0;
60 
61   if (yp + ysize <= xp)
62     return 0;
63 
64   return 1;
65 }
66 
67 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
68 int
refmpn_overlap_p(mp_srcptr xp,mp_size_t xsize,mp_srcptr yp,mp_size_t ysize)69 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
70 {
71   return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES,
72 			 yp, ysize * GMP_LIMB_BYTES);
73 }
74 
75 /* Check overlap for a routine defined to work low to high. */
76 int
refmpn_overlap_low_to_high_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)77 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
78 {
79   return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
80 }
81 
82 /* Check overlap for a routine defined to work high to low. */
83 int
refmpn_overlap_high_to_low_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)84 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
85 {
86   return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
87 }
88 
89 /* Check overlap for a standard routine requiring equal or separate. */
90 int
refmpn_overlap_fullonly_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)91 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
92 {
93   return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
94 }
95 int
refmpn_overlap_fullonly_two_p(mp_srcptr dst,mp_srcptr src1,mp_srcptr src2,mp_size_t size)96 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
97 			       mp_size_t size)
98 {
99   return (refmpn_overlap_fullonly_p (dst, src1, size)
100 	  && refmpn_overlap_fullonly_p (dst, src2, size));
101 }
102 
103 
104 mp_ptr
refmpn_malloc_limbs(mp_size_t size)105 refmpn_malloc_limbs (mp_size_t size)
106 {
107   mp_ptr  p;
108   ASSERT (size >= 0);
109   if (size == 0)
110     size = 1;
111   p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES));
112   ASSERT (p != NULL);
113   return p;
114 }
115 
116 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
117  * memory allocated by refmpn_malloc_limbs_aligned. */
118 void
refmpn_free_limbs(mp_ptr p)119 refmpn_free_limbs (mp_ptr p)
120 {
121   free (p);
122 }
123 
124 mp_ptr
refmpn_memdup_limbs(mp_srcptr ptr,mp_size_t size)125 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
126 {
127   mp_ptr  p;
128   p = refmpn_malloc_limbs (size);
129   refmpn_copyi (p, ptr, size);
130   return p;
131 }
132 
133 /* malloc n limbs on a multiple of m bytes boundary */
134 mp_ptr
refmpn_malloc_limbs_aligned(mp_size_t n,size_t m)135 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
136 {
137   return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
138 }
139 
140 
141 void
refmpn_fill(mp_ptr ptr,mp_size_t size,mp_limb_t value)142 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
143 {
144   mp_size_t  i;
145   ASSERT (size >= 0);
146   for (i = 0; i < size; i++)
147     ptr[i] = value;
148 }
149 
150 void
refmpn_zero(mp_ptr ptr,mp_size_t size)151 refmpn_zero (mp_ptr ptr, mp_size_t size)
152 {
153   refmpn_fill (ptr, size, CNST_LIMB(0));
154 }
155 
156 void
refmpn_zero_extend(mp_ptr ptr,mp_size_t oldsize,mp_size_t newsize)157 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
158 {
159   ASSERT (newsize >= oldsize);
160   refmpn_zero (ptr+oldsize, newsize-oldsize);
161 }
162 
163 int
refmpn_zero_p(mp_srcptr ptr,mp_size_t size)164 refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
165 {
166   mp_size_t  i;
167   for (i = 0; i < size; i++)
168     if (ptr[i] != 0)
169       return 0;
170   return 1;
171 }
172 
173 mp_size_t
refmpn_normalize(mp_srcptr ptr,mp_size_t size)174 refmpn_normalize (mp_srcptr ptr, mp_size_t size)
175 {
176   ASSERT (size >= 0);
177   while (size > 0 && ptr[size-1] == 0)
178     size--;
179   return size;
180 }
181 
182 /* the highest one bit in x */
183 mp_limb_t
refmpn_msbone(mp_limb_t x)184 refmpn_msbone (mp_limb_t x)
185 {
186   mp_limb_t  n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
187 
188   while (n != 0)
189     {
190       if (x & n)
191 	break;
192       n >>= 1;
193     }
194   return n;
195 }
196 
197 /* a mask of the highest one bit plus and all bits below */
198 mp_limb_t
refmpn_msbone_mask(mp_limb_t x)199 refmpn_msbone_mask (mp_limb_t x)
200 {
201   if (x == 0)
202     return 0;
203 
204   return (refmpn_msbone (x) << 1) - 1;
205 }
206 
207 /* How many digits in the given base will fit in a limb.
208    Notice that the product b is allowed to be equal to the limit
209    2^GMP_NUMB_BITS, this ensures the result for base==2 will be
210    GMP_NUMB_BITS (and similarly other powers of 2).  */
211 int
refmpn_chars_per_limb(int base)212 refmpn_chars_per_limb (int base)
213 {
214   mp_limb_t  limit[2], b[2];
215   int        chars_per_limb;
216 
217   ASSERT (base >= 2);
218 
219   limit[0] = 0;  /* limit = 2^GMP_NUMB_BITS */
220   limit[1] = 1;
221   b[0] = 1;      /* b = 1 */
222   b[1] = 0;
223 
224   chars_per_limb = 0;
225   for (;;)
226     {
227       if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
228 	break;
229       if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
230 	break;
231       chars_per_limb++;
232     }
233   return chars_per_limb;
234 }
235 
236 /* The biggest value base**n which fits in GMP_NUMB_BITS. */
237 mp_limb_t
refmpn_big_base(int base)238 refmpn_big_base (int base)
239 {
240   int        chars_per_limb = refmpn_chars_per_limb (base);
241   int        i;
242   mp_limb_t  bb;
243 
244   ASSERT (base >= 2);
245   bb = 1;
246   for (i = 0; i < chars_per_limb; i++)
247     bb *= base;
248   return bb;
249 }
250 
251 
252 void
refmpn_setbit(mp_ptr ptr,unsigned long bit)253 refmpn_setbit (mp_ptr ptr, unsigned long bit)
254 {
255   ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
256 }
257 
258 void
refmpn_clrbit(mp_ptr ptr,unsigned long bit)259 refmpn_clrbit (mp_ptr ptr, unsigned long bit)
260 {
261   ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
262 }
263 
264 #define REFMPN_TSTBIT(ptr,bit) \
265   (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
266 
267 int
refmpn_tstbit(mp_srcptr ptr,unsigned long bit)268 refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
269 {
270   return REFMPN_TSTBIT (ptr, bit);
271 }
272 
273 unsigned long
refmpn_scan0(mp_srcptr ptr,unsigned long bit)274 refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
275 {
276   while (REFMPN_TSTBIT (ptr, bit) != 0)
277     bit++;
278   return bit;
279 }
280 
281 unsigned long
refmpn_scan1(mp_srcptr ptr,unsigned long bit)282 refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
283 {
284   while (REFMPN_TSTBIT (ptr, bit) == 0)
285     bit++;
286   return bit;
287 }
288 
289 void
refmpn_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size)290 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
291 {
292   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
293   refmpn_copyi (rp, sp, size);
294 }
295 
296 void
refmpn_copyi(mp_ptr rp,mp_srcptr sp,mp_size_t size)297 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
298 {
299   mp_size_t i;
300 
301   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
302   ASSERT (size >= 0);
303 
304   for (i = 0; i < size; i++)
305     rp[i] = sp[i];
306 }
307 
308 void
refmpn_copyd(mp_ptr rp,mp_srcptr sp,mp_size_t size)309 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
310 {
311   mp_size_t i;
312 
313   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
314   ASSERT (size >= 0);
315 
316   for (i = size-1; i >= 0; i--)
317     rp[i] = sp[i];
318 }
319 
320 /* Copy {xp,xsize} to {wp,wsize}.  If x is shorter, then pad w with low
321    zeros to wsize.  If x is longer, then copy just the high wsize limbs.  */
322 void
refmpn_copy_extend(mp_ptr wp,mp_size_t wsize,mp_srcptr xp,mp_size_t xsize)323 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
324 {
325   ASSERT (wsize >= 0);
326   ASSERT (xsize >= 0);
327 
328   /* high part of x if x bigger than w */
329   if (xsize > wsize)
330     {
331       xp += xsize - wsize;
332       xsize = wsize;
333     }
334 
335   refmpn_copy (wp + wsize-xsize, xp, xsize);
336   refmpn_zero (wp, wsize-xsize);
337 }
338 
339 int
refmpn_cmp(mp_srcptr xp,mp_srcptr yp,mp_size_t size)340 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
341 {
342   mp_size_t  i;
343 
344   ASSERT (size >= 1);
345   ASSERT_MPN (xp, size);
346   ASSERT_MPN (yp, size);
347 
348   for (i = size-1; i >= 0; i--)
349     {
350       if (xp[i] > yp[i])  return 1;
351       if (xp[i] < yp[i])  return -1;
352     }
353   return 0;
354 }
355 
356 int
refmpn_cmp_allowzero(mp_srcptr xp,mp_srcptr yp,mp_size_t size)357 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
358 {
359   if (size == 0)
360     return 0;
361   else
362     return refmpn_cmp (xp, yp, size);
363 }
364 
365 int
refmpn_cmp_twosizes(mp_srcptr xp,mp_size_t xsize,mp_srcptr yp,mp_size_t ysize)366 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
367 		     mp_srcptr yp, mp_size_t ysize)
368 {
369   int  opp, cmp;
370 
371   ASSERT_MPN (xp, xsize);
372   ASSERT_MPN (yp, ysize);
373 
374   opp = (xsize < ysize);
375   if (opp)
376     MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
377 
378   if (! refmpn_zero_p (xp+ysize, xsize-ysize))
379     cmp = 1;
380   else
381     cmp = refmpn_cmp (xp, yp, ysize);
382 
383   return (opp ? -cmp : cmp);
384 }
385 
386 int
refmpn_equal_anynail(mp_srcptr xp,mp_srcptr yp,mp_size_t size)387 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
388 {
389   mp_size_t  i;
390   ASSERT (size >= 0);
391 
392   for (i = 0; i < size; i++)
393       if (xp[i] != yp[i])
394 	return 0;
395   return 1;
396 }
397 
398 
399 #define LOGOPS(operation)                                               \
400   {                                                                     \
401     mp_size_t  i;                                                       \
402 									\
403     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
404     ASSERT (size >= 1);                                                 \
405     ASSERT_MPN (s1p, size);                                             \
406     ASSERT_MPN (s2p, size);                                             \
407 									\
408     for (i = 0; i < size; i++)                                          \
409       rp[i] = operation;                                                \
410   }
411 
412 void
refmpn_and_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)413 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
414 {
415   LOGOPS (s1p[i] & s2p[i]);
416 }
417 void
refmpn_andn_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)418 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
419 {
420   LOGOPS (s1p[i] & ~s2p[i]);
421 }
422 void
refmpn_nand_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)423 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
424 {
425   LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
426 }
427 void
refmpn_ior_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)428 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
429 {
430   LOGOPS (s1p[i] | s2p[i]);
431 }
432 void
refmpn_iorn_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)433 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
434 {
435   LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
436 }
437 void
refmpn_nior_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)438 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
439 {
440   LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
441 }
442 void
refmpn_xor_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)443 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
444 {
445   LOGOPS (s1p[i] ^ s2p[i]);
446 }
447 void
refmpn_xnor_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)448 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
449 {
450   LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
451 }
452 
453 
454 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */
455 void
refmpn_sub_ddmmss(mp_limb_t * dh,mp_limb_t * dl,mp_limb_t mh,mp_limb_t ml,mp_limb_t sh,mp_limb_t sl)456 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
457 		   mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
458 {
459   *dl = ml - sl;
460   *dh = mh - sh - (ml < sl);
461 }
462 
463 
464 /* set *w to x+y, return 0 or 1 carry */
465 mp_limb_t
ref_addc_limb(mp_limb_t * w,mp_limb_t x,mp_limb_t y)466 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
467 {
468   mp_limb_t  sum, cy;
469 
470   ASSERT_LIMB (x);
471   ASSERT_LIMB (y);
472 
473   sum = x + y;
474 #if GMP_NAIL_BITS == 0
475   *w = sum;
476   cy = (sum < x);
477 #else
478   *w = sum & GMP_NUMB_MASK;
479   cy = (sum >> GMP_NUMB_BITS);
480 #endif
481   return cy;
482 }
483 
484 /* set *w to x-y, return 0 or 1 borrow */
485 mp_limb_t
ref_subc_limb(mp_limb_t * w,mp_limb_t x,mp_limb_t y)486 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
487 {
488   mp_limb_t  diff, cy;
489 
490   ASSERT_LIMB (x);
491   ASSERT_LIMB (y);
492 
493   diff = x - y;
494 #if GMP_NAIL_BITS == 0
495   *w = diff;
496   cy = (diff > x);
497 #else
498   *w = diff & GMP_NUMB_MASK;
499   cy = (diff >> GMP_NUMB_BITS) & 1;
500 #endif
501   return cy;
502 }
503 
504 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
505 mp_limb_t
adc(mp_limb_t * w,mp_limb_t x,mp_limb_t y,mp_limb_t c)506 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
507 {
508   mp_limb_t  r;
509 
510   ASSERT_LIMB (x);
511   ASSERT_LIMB (y);
512   ASSERT (c == 0 || c == 1);
513 
514   r = ref_addc_limb (w, x, y);
515   return r + ref_addc_limb (w, *w, c);
516 }
517 
518 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
519 mp_limb_t
sbb(mp_limb_t * w,mp_limb_t x,mp_limb_t y,mp_limb_t c)520 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
521 {
522   mp_limb_t  r;
523 
524   ASSERT_LIMB (x);
525   ASSERT_LIMB (y);
526   ASSERT (c == 0 || c == 1);
527 
528   r = ref_subc_limb (w, x, y);
529   return r + ref_subc_limb (w, *w, c);
530 }
531 
532 
533 #define AORS_1(operation)                               \
534   {                                                     \
535     mp_size_t  i;                                       \
536 							\
537     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));  \
538     ASSERT (size >= 1);                                 \
539     ASSERT_MPN (sp, size);                              \
540     ASSERT_LIMB (n);                                    \
541 							\
542     for (i = 0; i < size; i++)                          \
543       n = operation (&rp[i], sp[i], n);                 \
544     return n;                                           \
545   }
546 
547 mp_limb_t
refmpn_add_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t n)548 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
549 {
550   AORS_1 (ref_addc_limb);
551 }
552 mp_limb_t
refmpn_sub_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t n)553 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
554 {
555   AORS_1 (ref_subc_limb);
556 }
557 
558 #define AORS_NC(operation)                                              \
559   {                                                                     \
560     mp_size_t  i;                                                       \
561 									\
562     ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size));        \
563     ASSERT (carry == 0 || carry == 1);                                  \
564     ASSERT (size >= 1);                                                 \
565     ASSERT_MPN (s1p, size);                                             \
566     ASSERT_MPN (s2p, size);                                             \
567 									\
568     for (i = 0; i < size; i++)                                          \
569       carry = operation (&rp[i], s1p[i], s2p[i], carry);                \
570     return carry;                                                       \
571   }
572 
573 mp_limb_t
refmpn_add_nc(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)574 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
575 	       mp_limb_t carry)
576 {
577   AORS_NC (adc);
578 }
579 mp_limb_t
refmpn_sub_nc(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)580 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
581 	       mp_limb_t carry)
582 {
583   AORS_NC (sbb);
584 }
585 
586 
587 mp_limb_t
refmpn_add_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)588 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
589 {
590   return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
591 }
592 mp_limb_t
refmpn_sub_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)593 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
594 {
595   return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
596 }
597 
598 mp_limb_t
refmpn_cnd_add_n(mp_limb_t cnd,mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)599 refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
600 {
601   if (cnd != 0)
602     return refmpn_add_n (rp, s1p, s2p, size);
603   else
604     {
605       refmpn_copyi (rp, s1p, size);
606       return 0;
607     }
608 }
609 mp_limb_t
refmpn_cnd_sub_n(mp_limb_t cnd,mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)610 refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
611 {
612   if (cnd != 0)
613     return refmpn_sub_n (rp, s1p, s2p, size);
614   else
615     {
616       refmpn_copyi (rp, s1p, size);
617       return 0;
618     }
619 }
620 
621 
622 #define AORS_ERR1_N(operation)						\
623   {                                                                     \
624     mp_size_t  i;                                                       \
625     mp_limb_t carry2;							\
626 									\
627     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
628     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
629     ASSERT (! refmpn_overlap_p (rp, size, yp, size));			\
630     ASSERT (! refmpn_overlap_p (ep, 2, s1p, size));			\
631     ASSERT (! refmpn_overlap_p (ep, 2, s2p, size));			\
632     ASSERT (! refmpn_overlap_p (ep, 2, yp, size));			\
633     ASSERT (! refmpn_overlap_p (ep, 2, rp, size));			\
634 									\
635     ASSERT (carry == 0 || carry == 1);					\
636     ASSERT (size >= 1);							\
637     ASSERT_MPN (s1p, size);						\
638     ASSERT_MPN (s2p, size);						\
639     ASSERT_MPN (yp, size);						\
640 									\
641     ep[0] = ep[1] = CNST_LIMB(0);					\
642 									\
643     for (i = 0; i < size; i++)                                          \
644       {									\
645 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
646 	if (carry == 1)							\
647 	  {								\
648 	    carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]);	\
649 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
650 	    ASSERT (carry2 == 0);					\
651 	  }								\
652       }									\
653     return carry;                                                       \
654   }
655 
656 mp_limb_t
refmpn_add_err1_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr yp,mp_size_t size,mp_limb_t carry)657 refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
658 		   mp_ptr ep, mp_srcptr yp,
659 		   mp_size_t size, mp_limb_t carry)
660 {
661   AORS_ERR1_N (adc);
662 }
663 mp_limb_t
refmpn_sub_err1_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr yp,mp_size_t size,mp_limb_t carry)664 refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
665 		   mp_ptr ep, mp_srcptr yp,
666 		   mp_size_t size, mp_limb_t carry)
667 {
668   AORS_ERR1_N (sbb);
669 }
670 
671 
672 #define AORS_ERR2_N(operation)						\
673   {                                                                     \
674     mp_size_t  i;                                                       \
675     mp_limb_t carry2;							\
676 									\
677     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
678     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
679     ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
680     ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
681     ASSERT (! refmpn_overlap_p (ep, 4, s1p, size));			\
682     ASSERT (! refmpn_overlap_p (ep, 4, s2p, size));			\
683     ASSERT (! refmpn_overlap_p (ep, 4, y1p, size));			\
684     ASSERT (! refmpn_overlap_p (ep, 4, y2p, size));			\
685     ASSERT (! refmpn_overlap_p (ep, 4, rp, size));			\
686 									\
687     ASSERT (carry == 0 || carry == 1);					\
688     ASSERT (size >= 1);							\
689     ASSERT_MPN (s1p, size);						\
690     ASSERT_MPN (s2p, size);						\
691     ASSERT_MPN (y1p, size);						\
692     ASSERT_MPN (y2p, size);						\
693 									\
694     ep[0] = ep[1] = CNST_LIMB(0);					\
695     ep[2] = ep[3] = CNST_LIMB(0);					\
696 									\
697     for (i = 0; i < size; i++)                                          \
698       {									\
699 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
700 	if (carry == 1)							\
701 	  {								\
702 	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
703 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
704 	    ASSERT (carry2 == 0);					\
705 	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
706 	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
707 	    ASSERT (carry2 == 0);					\
708 	  }								\
709       }									\
710     return carry;                                                       \
711   }
712 
713 mp_limb_t
refmpn_add_err2_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_size_t size,mp_limb_t carry)714 refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
715 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
716 		   mp_size_t size, mp_limb_t carry)
717 {
718   AORS_ERR2_N (adc);
719 }
720 mp_limb_t
refmpn_sub_err2_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_size_t size,mp_limb_t carry)721 refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
722 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
723 		   mp_size_t size, mp_limb_t carry)
724 {
725   AORS_ERR2_N (sbb);
726 }
727 
728 
729 #define AORS_ERR3_N(operation)						\
730   {                                                                     \
731     mp_size_t  i;                                                       \
732     mp_limb_t carry2;							\
733 									\
734     ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size));			\
735     ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size));			\
736     ASSERT (! refmpn_overlap_p (rp, size, y1p, size));			\
737     ASSERT (! refmpn_overlap_p (rp, size, y2p, size));			\
738     ASSERT (! refmpn_overlap_p (rp, size, y3p, size));			\
739     ASSERT (! refmpn_overlap_p (ep, 6, s1p, size));			\
740     ASSERT (! refmpn_overlap_p (ep, 6, s2p, size));			\
741     ASSERT (! refmpn_overlap_p (ep, 6, y1p, size));			\
742     ASSERT (! refmpn_overlap_p (ep, 6, y2p, size));			\
743     ASSERT (! refmpn_overlap_p (ep, 6, y3p, size));			\
744     ASSERT (! refmpn_overlap_p (ep, 6, rp, size));			\
745 									\
746     ASSERT (carry == 0 || carry == 1);					\
747     ASSERT (size >= 1);							\
748     ASSERT_MPN (s1p, size);						\
749     ASSERT_MPN (s2p, size);						\
750     ASSERT_MPN (y1p, size);						\
751     ASSERT_MPN (y2p, size);						\
752     ASSERT_MPN (y3p, size);						\
753 									\
754     ep[0] = ep[1] = CNST_LIMB(0);					\
755     ep[2] = ep[3] = CNST_LIMB(0);					\
756     ep[4] = ep[5] = CNST_LIMB(0);					\
757 									\
758     for (i = 0; i < size; i++)                                          \
759       {									\
760 	carry = operation (&rp[i], s1p[i], s2p[i], carry);		\
761 	if (carry == 1)							\
762 	  {								\
763 	    carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]);	\
764 	    carry2 = ref_addc_limb (&ep[1], ep[1], carry2);		\
765 	    ASSERT (carry2 == 0);					\
766 	    carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]);	\
767 	    carry2 = ref_addc_limb (&ep[3], ep[3], carry2);		\
768 	    ASSERT (carry2 == 0);					\
769 	    carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]);	\
770 	    carry2 = ref_addc_limb (&ep[5], ep[5], carry2);		\
771 	    ASSERT (carry2 == 0);					\
772 	  }								\
773       }									\
774     return carry;                                                       \
775   }
776 
777 mp_limb_t
refmpn_add_err3_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_srcptr y3p,mp_size_t size,mp_limb_t carry)778 refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
779 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
780 		   mp_size_t size, mp_limb_t carry)
781 {
782   AORS_ERR3_N (adc);
783 }
784 mp_limb_t
refmpn_sub_err3_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_srcptr y3p,mp_size_t size,mp_limb_t carry)785 refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
786 		   mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
787 		   mp_size_t size, mp_limb_t carry)
788 {
789   AORS_ERR3_N (sbb);
790 }
791 
792 
793 mp_limb_t
refmpn_addlsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)794 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
795 		 mp_size_t n, unsigned int s)
796 {
797   mp_limb_t cy;
798   mp_ptr tp;
799 
800   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
801   ASSERT (n >= 1);
802   ASSERT (0 < s && s < GMP_NUMB_BITS);
803   ASSERT_MPN (up, n);
804   ASSERT_MPN (vp, n);
805 
806   tp = refmpn_malloc_limbs (n);
807   cy  = refmpn_lshift (tp, vp, n, s);
808   cy += refmpn_add_n (rp, up, tp, n);
809   free (tp);
810   return cy;
811 }
812 mp_limb_t
refmpn_addlsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)813 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
814 {
815   return refmpn_addlsh_n (rp, up, vp, n, 1);
816 }
817 mp_limb_t
refmpn_addlsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)818 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
819 {
820   return refmpn_addlsh_n (rp, up, vp, n, 2);
821 }
822 mp_limb_t
refmpn_addlsh_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)823 refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
824 {
825   return refmpn_addlsh_n (rp, rp, vp, n, s);
826 }
827 mp_limb_t
refmpn_addlsh1_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)828 refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
829 {
830   return refmpn_addlsh_n (rp, rp, vp, n, 1);
831 }
832 mp_limb_t
refmpn_addlsh2_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)833 refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
834 {
835   return refmpn_addlsh_n (rp, rp, vp, n, 2);
836 }
837 mp_limb_t
refmpn_addlsh_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)838 refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
839 {
840   return refmpn_addlsh_n (rp, vp, rp, n, s);
841 }
842 mp_limb_t
refmpn_addlsh1_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)843 refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
844 {
845   return refmpn_addlsh_n (rp, vp, rp, n, 1);
846 }
847 mp_limb_t
refmpn_addlsh2_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)848 refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
849 {
850   return refmpn_addlsh_n (rp, vp, rp, n, 2);
851 }
852 mp_limb_t
refmpn_addlsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_t carry)853 refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
854 		  mp_size_t n, unsigned int s, mp_limb_t carry)
855 {
856   mp_limb_t cy;
857 
858   ASSERT (carry <= (CNST_LIMB(1) << s));
859 
860   cy = refmpn_addlsh_n (rp, up, vp, n, s);
861   cy += refmpn_add_1 (rp, rp, n, carry);
862   return cy;
863 }
864 mp_limb_t
refmpn_addlsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)865 refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
866 {
867   return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
868 }
869 mp_limb_t
refmpn_addlsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)870 refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
871 {
872   return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
873 }
874 
875 mp_limb_t
refmpn_sublsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)876 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
877 		 mp_size_t n, unsigned int s)
878 {
879   mp_limb_t cy;
880   mp_ptr tp;
881 
882   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
883   ASSERT (n >= 1);
884   ASSERT (0 < s && s < GMP_NUMB_BITS);
885   ASSERT_MPN (up, n);
886   ASSERT_MPN (vp, n);
887 
888   tp = refmpn_malloc_limbs (n);
889   cy  = mpn_lshift (tp, vp, n, s);
890   cy += mpn_sub_n (rp, up, tp, n);
891   free (tp);
892   return cy;
893 }
894 mp_limb_t
refmpn_sublsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)895 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
896 {
897   return refmpn_sublsh_n (rp, up, vp, n, 1);
898 }
899 mp_limb_t
refmpn_sublsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)900 refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
901 {
902   return refmpn_sublsh_n (rp, up, vp, n, 2);
903 }
904 mp_limb_t
refmpn_sublsh_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)905 refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
906 {
907   return refmpn_sublsh_n (rp, rp, vp, n, s);
908 }
909 mp_limb_t
refmpn_sublsh1_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)910 refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
911 {
912   return refmpn_sublsh_n (rp, rp, vp, n, 1);
913 }
914 mp_limb_t
refmpn_sublsh2_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)915 refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
916 {
917   return refmpn_sublsh_n (rp, rp, vp, n, 2);
918 }
919 mp_limb_t
refmpn_sublsh_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)920 refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
921 {
922   return refmpn_sublsh_n (rp, vp, rp, n, s);
923 }
924 mp_limb_t
refmpn_sublsh1_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)925 refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
926 {
927   return refmpn_sublsh_n (rp, vp, rp, n, 1);
928 }
929 mp_limb_t
refmpn_sublsh2_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)930 refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
931 {
932   return refmpn_sublsh_n (rp, vp, rp, n, 2);
933 }
934 mp_limb_t
refmpn_sublsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_t carry)935 refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
936 		  mp_size_t n, unsigned int s, mp_limb_t carry)
937 {
938   mp_limb_t cy;
939 
940   ASSERT (carry <= (CNST_LIMB(1) << s));
941 
942   cy = refmpn_sublsh_n (rp, up, vp, n, s);
943   cy += refmpn_sub_1 (rp, rp, n, carry);
944   return cy;
945 }
946 mp_limb_t
refmpn_sublsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)947 refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
948 {
949   return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
950 }
951 mp_limb_t
refmpn_sublsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)952 refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
953 {
954   return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
955 }
956 
957 mp_limb_signed_t
refmpn_rsblsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)958 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
959 		 mp_size_t n, unsigned int s)
960 {
961   mp_limb_signed_t cy;
962   mp_ptr tp;
963 
964   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
965   ASSERT (n >= 1);
966   ASSERT (0 < s && s < GMP_NUMB_BITS);
967   ASSERT_MPN (up, n);
968   ASSERT_MPN (vp, n);
969 
970   tp = refmpn_malloc_limbs (n);
971   cy  = mpn_lshift (tp, vp, n, s);
972   cy -= mpn_sub_n (rp, tp, up, n);
973   free (tp);
974   return cy;
975 }
976 mp_limb_signed_t
refmpn_rsblsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)977 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
978 {
979   return refmpn_rsblsh_n (rp, up, vp, n, 1);
980 }
981 mp_limb_signed_t
refmpn_rsblsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)982 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
983 {
984   return refmpn_rsblsh_n (rp, up, vp, n, 2);
985 }
986 mp_limb_signed_t
refmpn_rsblsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_signed_t carry)987 refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
988 		  mp_size_t n, unsigned int s, mp_limb_signed_t carry)
989 {
990   mp_limb_signed_t cy;
991 
992   ASSERT (carry == -1 || (carry >> s) == 0);
993 
994   cy = refmpn_rsblsh_n (rp, up, vp, n, s);
995   if (carry > 0)
996     cy += refmpn_add_1 (rp, rp, n, carry);
997   else
998     cy -= refmpn_sub_1 (rp, rp, n, -carry);
999   return cy;
1000 }
1001 mp_limb_signed_t
refmpn_rsblsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_signed_t carry)1002 refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1003 {
1004   return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
1005 }
1006 mp_limb_signed_t
refmpn_rsblsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_signed_t carry)1007 refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1008 {
1009   return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
1010 }
1011 
1012 mp_limb_t
refmpn_rsh1add_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1013 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1014 {
1015   mp_limb_t cya, cys;
1016 
1017   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1018   ASSERT (n >= 1);
1019   ASSERT_MPN (up, n);
1020   ASSERT_MPN (vp, n);
1021 
1022   cya = mpn_add_n (rp, up, vp, n);
1023   cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1024   rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1025   return cys;
1026 }
1027 mp_limb_t
refmpn_rsh1sub_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1028 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1029 {
1030   mp_limb_t cya, cys;
1031 
1032   ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1033   ASSERT (n >= 1);
1034   ASSERT_MPN (up, n);
1035   ASSERT_MPN (vp, n);
1036 
1037   cya = mpn_sub_n (rp, up, vp, n);
1038   cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1039   rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1040   return cys;
1041 }
1042 
1043 /* Twos complement, return borrow. */
1044 mp_limb_t
refmpn_neg(mp_ptr dst,mp_srcptr src,mp_size_t size)1045 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
1046 {
1047   mp_ptr     zeros;
1048   mp_limb_t  ret;
1049 
1050   ASSERT (size >= 1);
1051 
1052   zeros = refmpn_malloc_limbs (size);
1053   refmpn_fill (zeros, size, CNST_LIMB(0));
1054   ret = refmpn_sub_n (dst, zeros, src, size);
1055   free (zeros);
1056   return ret;
1057 }
1058 
1059 
1060 #define AORS(aors_n, aors_1)                                    \
1061   {                                                             \
1062     mp_limb_t  c;                                               \
1063     ASSERT (s1size >= s2size);                                  \
1064     ASSERT (s2size >= 1);                                       \
1065     c = aors_n (rp, s1p, s2p, s2size);                          \
1066     if (s1size-s2size != 0)                                     \
1067       c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c);     \
1068     return c;                                                   \
1069   }
1070 mp_limb_t
refmpn_add(mp_ptr rp,mp_srcptr s1p,mp_size_t s1size,mp_srcptr s2p,mp_size_t s2size)1071 refmpn_add (mp_ptr rp,
1072 	    mp_srcptr s1p, mp_size_t s1size,
1073 	    mp_srcptr s2p, mp_size_t s2size)
1074 {
1075   AORS (refmpn_add_n, refmpn_add_1);
1076 }
1077 mp_limb_t
refmpn_sub(mp_ptr rp,mp_srcptr s1p,mp_size_t s1size,mp_srcptr s2p,mp_size_t s2size)1078 refmpn_sub (mp_ptr rp,
1079 	    mp_srcptr s1p, mp_size_t s1size,
1080 	    mp_srcptr s2p, mp_size_t s2size)
1081 {
1082   AORS (refmpn_sub_n, refmpn_sub_1);
1083 }
1084 
1085 
1086 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
1087 #define SHIFTLOW(x)  ((x) >> GMP_LIMB_BITS/2)
1088 
1089 #define LOWMASK   (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
1090 #define HIGHMASK  SHIFTHIGH(LOWMASK)
1091 
1092 #define LOWPART(x)   ((x) & LOWMASK)
1093 #define HIGHPART(x)  SHIFTLOW((x) & HIGHMASK)
1094 
1095 /* Set return:*lo to x*y, using full limbs not nails. */
1096 mp_limb_t
refmpn_umul_ppmm(mp_limb_t * lo,mp_limb_t x,mp_limb_t y)1097 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
1098 {
1099   mp_limb_t  hi, s;
1100 
1101   *lo = LOWPART(x) * LOWPART(y);
1102   hi = HIGHPART(x) * HIGHPART(y);
1103 
1104   s = LOWPART(x) * HIGHPART(y);
1105   hi += HIGHPART(s);
1106   s = SHIFTHIGH(LOWPART(s));
1107   *lo += s;
1108   hi += (*lo < s);
1109 
1110   s = HIGHPART(x) * LOWPART(y);
1111   hi += HIGHPART(s);
1112   s = SHIFTHIGH(LOWPART(s));
1113   *lo += s;
1114   hi += (*lo < s);
1115 
1116   return hi;
1117 }
1118 
1119 mp_limb_t
refmpn_umul_ppmm_r(mp_limb_t x,mp_limb_t y,mp_limb_t * lo)1120 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
1121 {
1122   return refmpn_umul_ppmm (lo, x, y);
1123 }
1124 
1125 mp_limb_t
refmpn_mul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1126 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
1127 	       mp_limb_t carry)
1128 {
1129   mp_size_t  i;
1130   mp_limb_t  hi, lo;
1131 
1132   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1133   ASSERT (size >= 1);
1134   ASSERT_MPN (sp, size);
1135   ASSERT_LIMB (multiplier);
1136   ASSERT_LIMB (carry);
1137 
1138   multiplier <<= GMP_NAIL_BITS;
1139   for (i = 0; i < size; i++)
1140     {
1141       hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
1142       lo >>= GMP_NAIL_BITS;
1143       ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
1144       rp[i] = lo;
1145       carry = hi;
1146     }
1147   return carry;
1148 }
1149 
1150 mp_limb_t
refmpn_mul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1151 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1152 {
1153   return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1154 }
1155 
1156 
1157 mp_limb_t
refmpn_mul_N(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_srcptr mult,mp_size_t msize)1158 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1159 	      mp_srcptr mult, mp_size_t msize)
1160 {
1161   mp_ptr     src_copy;
1162   mp_limb_t  ret;
1163   mp_size_t  i;
1164 
1165   ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
1166   ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1167   ASSERT (size >= msize);
1168   ASSERT_MPN (mult, msize);
1169 
1170   /* in case dst==src */
1171   src_copy = refmpn_malloc_limbs (size);
1172   refmpn_copyi (src_copy, src, size);
1173   src = src_copy;
1174 
1175   dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
1176   for (i = 1; i < msize-1; i++)
1177     dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1178   ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1179 
1180   free (src_copy);
1181   return ret;
1182 }
1183 
1184 mp_limb_t
refmpn_mul_2(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1185 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1186 {
1187   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
1188 }
1189 mp_limb_t
refmpn_mul_3(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1190 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1191 {
1192   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
1193 }
1194 mp_limb_t
refmpn_mul_4(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1195 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1196 {
1197   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
1198 }
1199 mp_limb_t
refmpn_mul_5(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1200 refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1201 {
1202   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
1203 }
1204 mp_limb_t
refmpn_mul_6(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1205 refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1206 {
1207   return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
1208 }
1209 
1210 #define AORSMUL_1C(operation_n)                                 \
1211   {                                                             \
1212     mp_ptr     p;                                               \
1213     mp_limb_t  ret;                                             \
1214 								\
1215     ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));          \
1216 								\
1217     p = refmpn_malloc_limbs (size);                             \
1218     ret = refmpn_mul_1c (p, sp, size, multiplier, carry);       \
1219     ret += operation_n (rp, rp, p, size);                       \
1220 								\
1221     free (p);                                                   \
1222     return ret;                                                 \
1223   }
1224 
1225 mp_limb_t
refmpn_addmul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1226 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1227 		  mp_limb_t multiplier, mp_limb_t carry)
1228 {
1229   AORSMUL_1C (refmpn_add_n);
1230 }
1231 mp_limb_t
refmpn_submul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1232 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1233 		  mp_limb_t multiplier, mp_limb_t carry)
1234 {
1235   AORSMUL_1C (refmpn_sub_n);
1236 }
1237 
1238 
1239 mp_limb_t
refmpn_addmul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1240 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1241 {
1242   return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1243 }
1244 mp_limb_t
refmpn_submul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1245 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1246 {
1247   return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1248 }
1249 
1250 
1251 mp_limb_t
refmpn_addmul_N(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_srcptr mult,mp_size_t msize)1252 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1253 		 mp_srcptr mult, mp_size_t msize)
1254 {
1255   mp_ptr     src_copy;
1256   mp_limb_t  ret;
1257   mp_size_t  i;
1258 
1259   ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
1260   ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1261   ASSERT (size >= msize);
1262   ASSERT_MPN (mult, msize);
1263 
1264   /* in case dst==src */
1265   src_copy = refmpn_malloc_limbs (size);
1266   refmpn_copyi (src_copy, src, size);
1267   src = src_copy;
1268 
1269   for (i = 0; i < msize-1; i++)
1270     dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1271   ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1272 
1273   free (src_copy);
1274   return ret;
1275 }
1276 
1277 mp_limb_t
refmpn_addmul_2(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1278 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1279 {
1280   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
1281 }
1282 mp_limb_t
refmpn_addmul_3(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1283 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1284 {
1285   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
1286 }
1287 mp_limb_t
refmpn_addmul_4(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1288 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1289 {
1290   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
1291 }
1292 mp_limb_t
refmpn_addmul_5(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1293 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1294 {
1295   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
1296 }
1297 mp_limb_t
refmpn_addmul_6(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1298 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1299 {
1300   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
1301 }
1302 mp_limb_t
refmpn_addmul_7(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1303 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1304 {
1305   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
1306 }
1307 mp_limb_t
refmpn_addmul_8(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1308 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1309 {
1310   return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
1311 }
1312 
1313 mp_limb_t
refmpn_add_n_sub_nc(mp_ptr r1p,mp_ptr r2p,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)1314 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
1315 		  mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
1316 		  mp_limb_t carry)
1317 {
1318   mp_ptr p;
1319   mp_limb_t acy, scy;
1320 
1321   /* Destinations can't overlap. */
1322   ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
1323   ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
1324   ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
1325   ASSERT (size >= 1);
1326 
1327   /* in case r1p==s1p or r1p==s2p */
1328   p = refmpn_malloc_limbs (size);
1329 
1330   acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
1331   scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
1332   refmpn_copyi (r1p, p, size);
1333 
1334   free (p);
1335   return 2 * acy + scy;
1336 }
1337 
1338 mp_limb_t
refmpn_add_n_sub_n(mp_ptr r1p,mp_ptr r2p,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)1339 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
1340 		 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1341 {
1342   return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
1343 }
1344 
1345 
1346 /* Right shift hi,lo and return the low limb of the result.
1347    Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1348 mp_limb_t
rshift_make(mp_limb_t hi,mp_limb_t lo,unsigned shift)1349 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1350 {
1351   ASSERT (shift < GMP_NUMB_BITS);
1352   if (shift == 0)
1353     return lo;
1354   else
1355     return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
1356 }
1357 
1358 /* Left shift hi,lo and return the high limb of the result.
1359    Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1360 mp_limb_t
lshift_make(mp_limb_t hi,mp_limb_t lo,unsigned shift)1361 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1362 {
1363   ASSERT (shift < GMP_NUMB_BITS);
1364   if (shift == 0)
1365     return hi;
1366   else
1367     return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
1368 }
1369 
1370 
1371 mp_limb_t
refmpn_rshift(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1372 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1373 {
1374   mp_limb_t  ret;
1375   mp_size_t  i;
1376 
1377   ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1378   ASSERT (size >= 1);
1379   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1380   ASSERT_MPN (sp, size);
1381 
1382   ret = rshift_make (sp[0], CNST_LIMB(0), shift);
1383 
1384   for (i = 0; i < size-1; i++)
1385     rp[i] = rshift_make (sp[i+1], sp[i], shift);
1386 
1387   rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
1388   return ret;
1389 }
1390 
1391 mp_limb_t
refmpn_lshift(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1392 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1393 {
1394   mp_limb_t  ret;
1395   mp_size_t  i;
1396 
1397   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1398   ASSERT (size >= 1);
1399   ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1400   ASSERT_MPN (sp, size);
1401 
1402   ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
1403 
1404   for (i = size-2; i >= 0; i--)
1405     rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
1406 
1407   rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
1408   return ret;
1409 }
1410 
1411 void
refmpn_com(mp_ptr rp,mp_srcptr sp,mp_size_t size)1412 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1413 {
1414   mp_size_t i;
1415 
1416   /* We work downwards since mpn_lshiftc needs that. */
1417   ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1418 
1419   for (i = size - 1; i >= 0; i--)
1420     rp[i] = (~sp[i]) & GMP_NUMB_MASK;
1421 }
1422 
1423 mp_limb_t
refmpn_lshiftc(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1424 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1425 {
1426   mp_limb_t res;
1427 
1428   /* No asserts here, refmpn_lshift will assert what we need. */
1429 
1430   res = refmpn_lshift (rp, sp, size, shift);
1431   refmpn_com (rp, rp, size);
1432   return res;
1433 }
1434 
1435 /* accepting shift==0 and doing a plain copyi or copyd in that case */
1436 mp_limb_t
refmpn_rshift_or_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1437 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1438 {
1439   if (shift == 0)
1440     {
1441       refmpn_copyi (rp, sp, size);
1442       return 0;
1443     }
1444   else
1445     {
1446       return refmpn_rshift (rp, sp, size, shift);
1447     }
1448 }
1449 mp_limb_t
refmpn_lshift_or_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1450 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1451 {
1452   if (shift == 0)
1453     {
1454       refmpn_copyd (rp, sp, size);
1455       return 0;
1456     }
1457   else
1458     {
1459       return refmpn_lshift (rp, sp, size, shift);
1460     }
1461 }
1462 
1463 /* accepting size==0 too */
1464 mp_limb_t
refmpn_rshift_or_copy_any(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1465 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1466 			   unsigned shift)
1467 {
1468   return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
1469 }
1470 mp_limb_t
refmpn_lshift_or_copy_any(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1471 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1472 			   unsigned shift)
1473 {
1474   return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
1475 }
1476 
1477 /* Divide h,l by d, return quotient, store remainder to *rp.
1478    Operates on full limbs, not nails.
1479    Must have h < d.
1480    __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
1481 mp_limb_t
refmpn_udiv_qrnnd(mp_limb_t * rp,mp_limb_t h,mp_limb_t l,mp_limb_t d)1482 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
1483 {
1484   mp_limb_t  q, r;
1485   int  n;
1486 
1487   ASSERT (d != 0);
1488   ASSERT (h < d);
1489 
1490 #if 0
1491   udiv_qrnnd (q, r, h, l, d);
1492   *rp = r;
1493   return q;
1494 #endif
1495 
1496   n = refmpn_count_leading_zeros (d);
1497   d <<= n;
1498 
1499   if (n != 0)
1500     {
1501       h = (h << n) | (l >> (GMP_LIMB_BITS - n));
1502       l <<= n;
1503     }
1504 
1505   __udiv_qrnnd_c (q, r, h, l, d);
1506   r >>= n;
1507   *rp = r;
1508   return q;
1509 }
1510 
1511 mp_limb_t
refmpn_udiv_qrnnd_r(mp_limb_t h,mp_limb_t l,mp_limb_t d,mp_limb_t * rp)1512 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
1513 {
1514   return refmpn_udiv_qrnnd (rp, h, l, d);
1515 }
1516 
1517 /* This little subroutine avoids some bad code generation from i386 gcc 3.0
1518    -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized).  */
1519 static mp_limb_t
refmpn_divmod_1c_workaround(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1520 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1521 			     mp_limb_t divisor, mp_limb_t carry)
1522 {
1523   mp_size_t  i;
1524   mp_limb_t rem[1];
1525   for (i = size-1; i >= 0; i--)
1526     {
1527       rp[i] = refmpn_udiv_qrnnd (rem, carry,
1528 				 sp[i] << GMP_NAIL_BITS,
1529 				 divisor << GMP_NAIL_BITS);
1530       carry = *rem >> GMP_NAIL_BITS;
1531     }
1532   return carry;
1533 }
1534 
1535 mp_limb_t
refmpn_divmod_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1536 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1537 		  mp_limb_t divisor, mp_limb_t carry)
1538 {
1539   mp_ptr     sp_orig;
1540   mp_ptr     prod;
1541   mp_limb_t  carry_orig;
1542 
1543   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1544   ASSERT (size >= 0);
1545   ASSERT (carry < divisor);
1546   ASSERT_MPN (sp, size);
1547   ASSERT_LIMB (divisor);
1548   ASSERT_LIMB (carry);
1549 
1550   if (size == 0)
1551     return carry;
1552 
1553   sp_orig = refmpn_memdup_limbs (sp, size);
1554   prod = refmpn_malloc_limbs (size);
1555   carry_orig = carry;
1556 
1557   carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
1558 
1559   /* check by multiplying back */
1560 #if 0
1561   printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
1562 	  size, divisor, carry_orig, carry);
1563   mpn_trace("s",sp_copy,size);
1564   mpn_trace("r",rp,size);
1565   printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
1566   mpn_trace("p",prod,size);
1567 #endif
1568   ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
1569   ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
1570   free (sp_orig);
1571   free (prod);
1572 
1573   return carry;
1574 }
1575 
1576 mp_limb_t
refmpn_divmod_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1577 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1578 {
1579   return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
1580 }
1581 
1582 
1583 mp_limb_t
refmpn_mod_1c(mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1584 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1585 	       mp_limb_t carry)
1586 {
1587   mp_ptr  p = refmpn_malloc_limbs (size);
1588   carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
1589   free (p);
1590   return carry;
1591 }
1592 
1593 mp_limb_t
refmpn_mod_1(mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1594 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1595 {
1596   return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
1597 }
1598 
1599 mp_limb_t
refmpn_preinv_mod_1(mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t inverse)1600 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1601 		     mp_limb_t inverse)
1602 {
1603   ASSERT (divisor & GMP_NUMB_HIGHBIT);
1604   ASSERT (inverse == refmpn_invert_limb (divisor));
1605   return refmpn_mod_1 (sp, size, divisor);
1606 }
1607 
1608 /* This implementation will be rather slow, but has the advantage of being
1609    in a different style than the libgmp versions.  */
1610 mp_limb_t
refmpn_mod_34lsub1(mp_srcptr p,mp_size_t n)1611 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
1612 {
1613   ASSERT ((GMP_NUMB_BITS % 4) == 0);
1614   return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
1615 }
1616 
1617 
1618 mp_limb_t
refmpn_divrem_1c(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1619 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
1620 		  mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1621 		  mp_limb_t carry)
1622 {
1623   mp_ptr  z;
1624 
1625   z = refmpn_malloc_limbs (xsize);
1626   refmpn_fill (z, xsize, CNST_LIMB(0));
1627 
1628   carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
1629   carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
1630 
1631   free (z);
1632   return carry;
1633 }
1634 
1635 mp_limb_t
refmpn_divrem_1(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1636 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
1637 		 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1638 {
1639   return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
1640 }
1641 
1642 mp_limb_t
refmpn_preinv_divrem_1(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t inverse,unsigned shift)1643 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
1644 			mp_srcptr sp, mp_size_t size,
1645 			mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
1646 {
1647   ASSERT (size >= 0);
1648   ASSERT (shift == refmpn_count_leading_zeros (divisor));
1649   ASSERT (inverse == refmpn_invert_limb (divisor << shift));
1650 
1651   return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
1652 }
1653 
1654 mp_limb_t
refmpn_divrem_2(mp_ptr qp,mp_size_t qxn,mp_ptr np,mp_size_t nn,mp_srcptr dp)1655 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
1656 		 mp_ptr np, mp_size_t nn,
1657 		 mp_srcptr dp)
1658 {
1659   mp_ptr tp;
1660   mp_limb_t qh;
1661 
1662   tp = refmpn_malloc_limbs (nn + qxn);
1663   refmpn_zero (tp, qxn);
1664   refmpn_copyi (tp + qxn, np, nn);
1665   qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
1666   refmpn_copyi (np, tp, 2);
1667   free (tp);
1668   return qh;
1669 }
1670 
1671 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1672    paper, figure 8.1 m', where b=2^GMP_LIMB_BITS.  Note that -d-1 < d
1673    since d has the high bit set. */
1674 
1675 mp_limb_t
refmpn_invert_limb(mp_limb_t d)1676 refmpn_invert_limb (mp_limb_t d)
1677 {
1678   mp_limb_t r;
1679   ASSERT (d & GMP_LIMB_HIGHBIT);
1680   return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
1681 }
1682 
1683 void
refmpn_invert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)1684 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1685 {
1686   mp_ptr qp, tp;
1687   TMP_DECL;
1688   TMP_MARK;
1689 
1690   tp = TMP_ALLOC_LIMBS (2 * n);
1691   qp = TMP_ALLOC_LIMBS (n + 1);
1692 
1693   MPN_ZERO (tp, 2 * n);  mpn_sub_1 (tp, tp, 2 * n, 1);
1694 
1695   refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
1696   refmpn_copyi (rp, qp, n);
1697 
1698   TMP_FREE;
1699 }
1700 
1701 void
refmpn_binvert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)1702 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1703 {
1704   mp_ptr tp;
1705   mp_limb_t binv;
1706   TMP_DECL;
1707   TMP_MARK;
1708 
1709   /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
1710      code.  To make up for it, we check that the inverse is correct using a
1711      multiply.  */
1712 
1713   tp = TMP_ALLOC_LIMBS (2 * n);
1714 
1715   MPN_ZERO (tp, n);
1716   tp[0] = 1;
1717   binvert_limb (binv, up[0]);
1718   mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
1719 
1720   refmpn_mul_n (tp, rp, up, n);
1721   ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
1722 
1723   TMP_FREE;
1724 }
1725 
1726 /* The aim is to produce a dst quotient and return a remainder c, satisfying
1727    c*b^n + src-i == 3*dst, where i is the incoming carry.
1728 
1729    Some value c==0, c==1 or c==2 will satisfy, so just try each.
1730 
1731    If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1732    remainder from the first division attempt determines the correct
1733    remainder (3-c), but don't bother with that, since we can't guarantee
1734    anything about GMP_NUMB_BITS when using nails.
1735 
1736    If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1737    complement negative, ie. b^n+a-i, and the calculation produces c1
1738    satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1.  This
1739    means it's enough to just add any borrow back at the end.
1740 
1741    A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1742    mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1743    or 1 respectively, so with 1 added the final return value is still in the
1744    prescribed range 0 to 2. */
1745 
1746 mp_limb_t
refmpn_divexact_by3c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t carry)1747 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1748 {
1749   mp_ptr     spcopy;
1750   mp_limb_t  c, cs;
1751 
1752   ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1753   ASSERT (size >= 1);
1754   ASSERT (carry <= 2);
1755   ASSERT_MPN (sp, size);
1756 
1757   spcopy = refmpn_malloc_limbs (size);
1758   cs = refmpn_sub_1 (spcopy, sp, size, carry);
1759 
1760   for (c = 0; c <= 2; c++)
1761     if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
1762       goto done;
1763   ASSERT_FAIL (no value of c satisfies);
1764 
1765  done:
1766   c += cs;
1767   ASSERT (c <= 2);
1768 
1769   free (spcopy);
1770   return c;
1771 }
1772 
1773 mp_limb_t
refmpn_divexact_by3(mp_ptr rp,mp_srcptr sp,mp_size_t size)1774 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1775 {
1776   return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1777 }
1778 
1779 
1780 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1781 void
refmpn_mul_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)1782 refmpn_mul_basecase (mp_ptr prodp,
1783 		     mp_srcptr up, mp_size_t usize,
1784 		     mp_srcptr vp, mp_size_t vsize)
1785 {
1786   mp_size_t i;
1787 
1788   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1789   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1790   ASSERT (usize >= vsize);
1791   ASSERT (vsize >= 1);
1792   ASSERT_MPN (up, usize);
1793   ASSERT_MPN (vp, vsize);
1794 
1795   prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1796   for (i = 1; i < vsize; i++)
1797     prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1798 }
1799 
1800 
1801 /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
1802 void
refmpn_mulmid_basecase(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1803 refmpn_mulmid_basecase (mp_ptr rp,
1804 			mp_srcptr up, mp_size_t un,
1805 			mp_srcptr vp, mp_size_t vn)
1806 {
1807   mp_limb_t cy;
1808   mp_size_t i;
1809 
1810   ASSERT (un >= vn);
1811   ASSERT (vn >= 1);
1812   ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
1813   ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
1814   ASSERT_MPN (up, un);
1815   ASSERT_MPN (vp, vn);
1816 
1817   rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
1818   rp[un - vn + 2] = CNST_LIMB (0);
1819   for (i = 1; i < vn; i++)
1820     {
1821       cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
1822       cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
1823       cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
1824       ASSERT (cy == 0);
1825     }
1826 }
1827 
1828 void
refmpn_toom42_mulmid(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_ptr scratch)1829 refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
1830 		      mp_ptr scratch)
1831 {
1832   refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1833 }
1834 
1835 void
refmpn_mulmid_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1836 refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1837 {
1838   /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1839      off products near the middle product region boundary */
1840   refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1841 }
1842 
1843 void
refmpn_mulmid(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1844 refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
1845 	       mp_srcptr vp, mp_size_t vn)
1846 {
1847   /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1848      off products near the middle product region boundary */
1849   refmpn_mulmid_basecase (rp, up, un, vp, vn);
1850 }
1851 
1852 
1853 
1854 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
1855 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
1856 #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
1857 #if WANT_FFT
1858 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
1859 #else
1860 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
1861 #endif
1862 
1863 void
refmpn_mul(mp_ptr wp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1864 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
1865 {
1866   mp_ptr tp;
1867   mp_size_t tn;
1868 
1869   if (vn < TOOM3_THRESHOLD)
1870     {
1871       /* In the mpn_mul_basecase and toom2 range, use our own mul_basecase.  */
1872       if (vn != 0)
1873 	refmpn_mul_basecase (wp, up, un, vp, vn);
1874       else
1875 	MPN_ZERO (wp, un);
1876       return;
1877     }
1878 
1879   if (vn < TOOM4_THRESHOLD)
1880     {
1881       /* In the toom3 range, use mpn_toom22_mul.  */
1882       tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
1883       tp = refmpn_malloc_limbs (tn);
1884       mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1885     }
1886   else if (vn < TOOM6_THRESHOLD)
1887     {
1888       /* In the toom4 range, use mpn_toom33_mul.  */
1889       tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
1890       tp = refmpn_malloc_limbs (tn);
1891       mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1892     }
1893   else if (vn < FFT_THRESHOLD)
1894     {
1895       /* In the toom6 range, use mpn_toom44_mul.  */
1896       tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
1897       tp = refmpn_malloc_limbs (tn);
1898       mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1899     }
1900   else
1901     {
1902       /* Finally, for the largest operands, use mpn_toom6h_mul.  */
1903       tn = 2 * vn + mpn_toom6h_mul_itch (vn, vn);
1904       tp = refmpn_malloc_limbs (tn);
1905       mpn_toom6h_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1906     }
1907 
1908   if (un != vn)
1909     {
1910       if (un - vn < vn)
1911 	refmpn_mul (wp + vn, vp, vn, up + vn, un - vn);
1912       else
1913 	refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
1914 
1915       MPN_COPY (wp, tp, vn);
1916       ASSERT_NOCARRY (refmpn_add (wp + vn, wp + vn, un, tp + vn, vn));
1917     }
1918   else
1919     {
1920       MPN_COPY (wp, tp, 2 * vn);
1921     }
1922 
1923   free (tp);
1924 }
1925 
1926 void
refmpn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)1927 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1928 {
1929   refmpn_mul (prodp, up, size, vp, size);
1930 }
1931 
1932 void
refmpn_mullo_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)1933 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1934 {
1935   mp_ptr tp = refmpn_malloc_limbs (2*size);
1936   refmpn_mul (tp, up, size, vp, size);
1937   refmpn_copyi (prodp, tp, size);
1938   free (tp);
1939 }
1940 
1941 void
refmpn_sqr(mp_ptr dst,mp_srcptr src,mp_size_t size)1942 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1943 {
1944   refmpn_mul (dst, src, size, src, size);
1945 }
1946 
1947 /* Allowing usize<vsize, usize==0 or vsize==0. */
1948 void
refmpn_mul_any(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)1949 refmpn_mul_any (mp_ptr prodp,
1950 		     mp_srcptr up, mp_size_t usize,
1951 		     mp_srcptr vp, mp_size_t vsize)
1952 {
1953   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1954   ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1955   ASSERT (usize >= 0);
1956   ASSERT (vsize >= 0);
1957   ASSERT_MPN (up, usize);
1958   ASSERT_MPN (vp, vsize);
1959 
1960   if (usize == 0)
1961     {
1962       refmpn_fill (prodp, vsize, CNST_LIMB(0));
1963       return;
1964     }
1965 
1966   if (vsize == 0)
1967     {
1968       refmpn_fill (prodp, usize, CNST_LIMB(0));
1969       return;
1970     }
1971 
1972   if (usize >= vsize)
1973     refmpn_mul (prodp, up, usize, vp, vsize);
1974   else
1975     refmpn_mul (prodp, vp, vsize, up, usize);
1976 }
1977 
1978 
1979 mp_limb_t
refmpn_gcd_1(mp_srcptr xp,mp_size_t xsize,mp_limb_t y)1980 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
1981 {
1982   mp_limb_t  x;
1983   int  twos;
1984 
1985   ASSERT (y != 0);
1986   ASSERT (! refmpn_zero_p (xp, xsize));
1987   ASSERT_MPN (xp, xsize);
1988   ASSERT_LIMB (y);
1989 
1990   x = refmpn_mod_1 (xp, xsize, y);
1991   if (x == 0)
1992     return y;
1993 
1994   twos = 0;
1995   while ((x & 1) == 0 && (y & 1) == 0)
1996     {
1997       x >>= 1;
1998       y >>= 1;
1999       twos++;
2000     }
2001 
2002   for (;;)
2003     {
2004       while ((x & 1) == 0)  x >>= 1;
2005       while ((y & 1) == 0)  y >>= 1;
2006 
2007       if (x < y)
2008 	MP_LIMB_T_SWAP (x, y);
2009 
2010       x -= y;
2011       if (x == 0)
2012 	break;
2013     }
2014 
2015   return y << twos;
2016 }
2017 
2018 
2019 /* Based on the full limb x, not nails. */
2020 unsigned
refmpn_count_leading_zeros(mp_limb_t x)2021 refmpn_count_leading_zeros (mp_limb_t x)
2022 {
2023   unsigned  n = 0;
2024 
2025   ASSERT (x != 0);
2026 
2027   while ((x & GMP_LIMB_HIGHBIT) == 0)
2028     {
2029       x <<= 1;
2030       n++;
2031     }
2032   return n;
2033 }
2034 
2035 /* Full limbs allowed, not limited to nails. */
2036 unsigned
refmpn_count_trailing_zeros(mp_limb_t x)2037 refmpn_count_trailing_zeros (mp_limb_t x)
2038 {
2039   unsigned  n = 0;
2040 
2041   ASSERT (x != 0);
2042   ASSERT_LIMB (x);
2043 
2044   while ((x & 1) == 0)
2045     {
2046       x >>= 1;
2047       n++;
2048     }
2049   return n;
2050 }
2051 
2052 /* Strip factors of two (low zero bits) from {p,size} by right shifting.
2053    The return value is the number of twos stripped.  */
2054 mp_size_t
refmpn_strip_twos(mp_ptr p,mp_size_t size)2055 refmpn_strip_twos (mp_ptr p, mp_size_t size)
2056 {
2057   mp_size_t  limbs;
2058   unsigned   shift;
2059 
2060   ASSERT (size >= 1);
2061   ASSERT (! refmpn_zero_p (p, size));
2062   ASSERT_MPN (p, size);
2063 
2064   for (limbs = 0; p[0] == 0; limbs++)
2065     {
2066       refmpn_copyi (p, p+1, size-1);
2067       p[size-1] = 0;
2068     }
2069 
2070   shift = refmpn_count_trailing_zeros (p[0]);
2071   if (shift)
2072     refmpn_rshift (p, p, size, shift);
2073 
2074   return limbs*GMP_NUMB_BITS + shift;
2075 }
2076 
2077 mp_limb_t
refmpn_gcd(mp_ptr gp,mp_ptr xp,mp_size_t xsize,mp_ptr yp,mp_size_t ysize)2078 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
2079 {
2080   int       cmp;
2081 
2082   ASSERT (ysize >= 1);
2083   ASSERT (xsize >= ysize);
2084   ASSERT ((xp[0] & 1) != 0);
2085   ASSERT ((yp[0] & 1) != 0);
2086   /* ASSERT (xp[xsize-1] != 0); */  /* don't think x needs to be odd */
2087   ASSERT (yp[ysize-1] != 0);
2088   ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
2089   ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
2090   ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
2091   if (xsize == ysize)
2092     ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
2093   ASSERT_MPN (xp, xsize);
2094   ASSERT_MPN (yp, ysize);
2095 
2096   refmpn_strip_twos (xp, xsize);
2097   MPN_NORMALIZE (xp, xsize);
2098   MPN_NORMALIZE (yp, ysize);
2099 
2100   for (;;)
2101     {
2102       cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
2103       if (cmp == 0)
2104 	break;
2105       if (cmp < 0)
2106 	MPN_PTR_SWAP (xp,xsize, yp,ysize);
2107 
2108       ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
2109 
2110       refmpn_strip_twos (xp, xsize);
2111       MPN_NORMALIZE (xp, xsize);
2112     }
2113 
2114   refmpn_copyi (gp, xp, xsize);
2115   return xsize;
2116 }
2117 
2118 unsigned long
ref_popc_limb(mp_limb_t src)2119 ref_popc_limb (mp_limb_t src)
2120 {
2121   unsigned long  count;
2122   int  i;
2123 
2124   count = 0;
2125   for (i = 0; i < GMP_LIMB_BITS; i++)
2126     {
2127       count += (src & 1);
2128       src >>= 1;
2129     }
2130   return count;
2131 }
2132 
2133 unsigned long
refmpn_popcount(mp_srcptr sp,mp_size_t size)2134 refmpn_popcount (mp_srcptr sp, mp_size_t size)
2135 {
2136   unsigned long  count = 0;
2137   mp_size_t  i;
2138 
2139   ASSERT (size >= 0);
2140   ASSERT_MPN (sp, size);
2141 
2142   for (i = 0; i < size; i++)
2143     count += ref_popc_limb (sp[i]);
2144   return count;
2145 }
2146 
2147 unsigned long
refmpn_hamdist(mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)2148 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
2149 {
2150   mp_ptr  d;
2151   unsigned long  count;
2152 
2153   ASSERT (size >= 0);
2154   ASSERT_MPN (s1p, size);
2155   ASSERT_MPN (s2p, size);
2156 
2157   if (size == 0)
2158     return 0;
2159 
2160   d = refmpn_malloc_limbs (size);
2161   refmpn_xor_n (d, s1p, s2p, size);
2162   count = refmpn_popcount (d, size);
2163   free (d);
2164   return count;
2165 }
2166 
2167 
2168 /* set r to a%d */
2169 void
refmpn_mod2(mp_limb_t r[2],const mp_limb_t a[2],const mp_limb_t d[2])2170 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
2171 {
2172   mp_limb_t  D[2];
2173   int        n;
2174 
2175   ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
2176   ASSERT_MPN (a, 2);
2177   ASSERT_MPN (d, 2);
2178 
2179   D[1] = d[1], D[0] = d[0];
2180   r[1] = a[1], r[0] = a[0];
2181   n = 0;
2182 
2183   for (;;)
2184     {
2185       if (D[1] & GMP_NUMB_HIGHBIT)
2186 	break;
2187       if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
2188 	break;
2189       refmpn_lshift (D, D, (mp_size_t) 2, 1);
2190       n++;
2191       ASSERT (n <= GMP_NUMB_BITS);
2192     }
2193 
2194   while (n >= 0)
2195     {
2196       if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
2197 	ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
2198       refmpn_rshift (D, D, (mp_size_t) 2, 1);
2199       n--;
2200     }
2201 
2202   ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
2203 }
2204 
2205 
2206 
2207 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2208    particular the trial quotient is allowed to be 2 too big. */
2209 mp_limb_t
refmpn_sb_div_qr(mp_ptr qp,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)2210 refmpn_sb_div_qr (mp_ptr qp,
2211 		  mp_ptr np, mp_size_t nsize,
2212 		  mp_srcptr dp, mp_size_t dsize)
2213 {
2214   mp_limb_t  retval = 0;
2215   mp_size_t  i;
2216   mp_limb_t  d1 = dp[dsize-1];
2217   mp_ptr     np_orig = refmpn_memdup_limbs (np, nsize);
2218 
2219   ASSERT (nsize >= dsize);
2220   /* ASSERT (dsize > 2); */
2221   ASSERT (dsize >= 2);
2222   ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
2223   ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
2224   ASSERT_MPN (np, nsize);
2225   ASSERT_MPN (dp, dsize);
2226 
2227   i = nsize-dsize;
2228   if (refmpn_cmp (np+i, dp, dsize) >= 0)
2229     {
2230       ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
2231       retval = 1;
2232     }
2233 
2234   for (i--; i >= 0; i--)
2235     {
2236       mp_limb_t  n0 = np[i+dsize];
2237       mp_limb_t  n1 = np[i+dsize-1];
2238       mp_limb_t  q, dummy_r;
2239 
2240       ASSERT (n0 <= d1);
2241       if (n0 == d1)
2242 	q = GMP_NUMB_MAX;
2243       else
2244 	q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
2245 			       d1 << GMP_NAIL_BITS);
2246 
2247       n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
2248       ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
2249       if (n0)
2250 	{
2251 	  q--;
2252 	  if (! refmpn_add_n (np+i, np+i, dp, dsize))
2253 	    {
2254 	      q--;
2255 	      ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
2256 	    }
2257 	}
2258       np[i+dsize] = 0;
2259 
2260       qp[i] = q;
2261     }
2262 
2263   /* remainder < divisor */
2264 #if 0		/* ASSERT triggers gcc 4.2.1 bug */
2265   ASSERT (refmpn_cmp (np, dp, dsize) < 0);
2266 #endif
2267 
2268   /* multiply back to original */
2269   {
2270     mp_ptr  mp = refmpn_malloc_limbs (nsize);
2271 
2272     refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
2273     if (retval)
2274       ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
2275     ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
2276     ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
2277 
2278     free (mp);
2279   }
2280 
2281   free (np_orig);
2282   return retval;
2283 }
2284 
2285 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2286    particular the trial quotient is allowed to be 2 too big. */
2287 void
refmpn_tdiv_qr(mp_ptr qp,mp_ptr rp,mp_size_t qxn,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)2288 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
2289 		mp_ptr np, mp_size_t nsize,
2290 		mp_srcptr dp, mp_size_t dsize)
2291 {
2292   ASSERT (qxn == 0);
2293   ASSERT_MPN (np, nsize);
2294   ASSERT_MPN (dp, dsize);
2295   ASSERT (dsize > 0);
2296   ASSERT (dp[dsize-1] != 0);
2297 
2298   if (dsize == 1)
2299     {
2300       rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
2301       return;
2302     }
2303   else
2304     {
2305       mp_ptr  n2p = refmpn_malloc_limbs (nsize+1);
2306       mp_ptr  d2p = refmpn_malloc_limbs (dsize);
2307       int     norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
2308 
2309       n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
2310       ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
2311 
2312       refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
2313       refmpn_rshift_or_copy (rp, n2p, dsize, norm);
2314 
2315       /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
2316       free (n2p);
2317       free (d2p);
2318     }
2319 }
2320 
2321 mp_limb_t
refmpn_redc_1(mp_ptr rp,mp_ptr up,mp_srcptr mp,mp_size_t n,mp_limb_t invm)2322 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
2323 {
2324   mp_size_t j;
2325   mp_limb_t cy;
2326 
2327   ASSERT_MPN (up, 2*n);
2328   /* ASSERT about directed overlap rp, up */
2329   /* ASSERT about overlap rp, mp */
2330   /* ASSERT about overlap up, mp */
2331 
2332   for (j = n - 1; j >= 0; j--)
2333     {
2334       up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
2335       up++;
2336     }
2337   cy = mpn_add_n (rp, up, up - n, n);
2338   return cy;
2339 }
2340 
2341 size_t
refmpn_get_str(unsigned char * dst,int base,mp_ptr src,mp_size_t size)2342 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
2343 {
2344   unsigned char  *d;
2345   size_t  dsize;
2346 
2347   ASSERT (size >= 0);
2348   ASSERT (base >= 2);
2349   ASSERT (base < numberof (mp_bases));
2350   ASSERT (size == 0 || src[size-1] != 0);
2351   ASSERT_MPN (src, size);
2352 
2353   MPN_SIZEINBASE (dsize, src, size, base);
2354   ASSERT (dsize >= 1);
2355   ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES));
2356 
2357   if (size == 0)
2358     {
2359       dst[0] = 0;
2360       return 1;
2361     }
2362 
2363   /* don't clobber input for power of 2 bases */
2364   if (POW2_P (base))
2365     src = refmpn_memdup_limbs (src, size);
2366 
2367   d = dst + dsize;
2368   do
2369     {
2370       d--;
2371       ASSERT (d >= dst);
2372       *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
2373       size -= (src[size-1] == 0);
2374     }
2375   while (size != 0);
2376 
2377   /* Move result back and decrement dsize if we didn't generate
2378      the maximum possible digits.  */
2379   if (d != dst)
2380     {
2381       size_t i;
2382       dsize -= d - dst;
2383       for (i = 0; i < dsize; i++)
2384 	dst[i] = d[i];
2385     }
2386 
2387   if (POW2_P (base))
2388     free (src);
2389 
2390   return dsize;
2391 }
2392 
2393 
2394 mp_limb_t
ref_bswap_limb(mp_limb_t src)2395 ref_bswap_limb (mp_limb_t src)
2396 {
2397   mp_limb_t  dst;
2398   int        i;
2399 
2400   dst = 0;
2401   for (i = 0; i < GMP_LIMB_BYTES; i++)
2402     {
2403       dst = (dst << 8) + (src & 0xFF);
2404       src >>= 8;
2405     }
2406   return dst;
2407 }
2408 
2409 
2410 /* These random functions are mostly for transitional purposes while adding
2411    nail support, since they're independent of the normal mpn routines.  They
2412    can probably be removed when those normal routines are reliable, though
2413    perhaps something independent would still be useful at times.  */
2414 
2415 #if GMP_LIMB_BITS == 32
2416 #define RAND_A  CNST_LIMB(0x29CF535)
2417 #endif
2418 #if GMP_LIMB_BITS == 64
2419 #define RAND_A  CNST_LIMB(0xBAECD515DAF0B49D)
2420 #endif
2421 
2422 mp_limb_t  refmpn_random_seed;
2423 
2424 mp_limb_t
refmpn_random_half(void)2425 refmpn_random_half (void)
2426 {
2427   refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
2428   return (refmpn_random_seed >> GMP_LIMB_BITS/2);
2429 }
2430 
2431 mp_limb_t
refmpn_random_limb(void)2432 refmpn_random_limb (void)
2433 {
2434   return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
2435 	   | refmpn_random_half ()) & GMP_NUMB_MASK;
2436 }
2437 
2438 void
refmpn_random(mp_ptr ptr,mp_size_t size)2439 refmpn_random (mp_ptr ptr, mp_size_t size)
2440 {
2441   mp_size_t  i;
2442   if (GMP_NAIL_BITS == 0)
2443     {
2444       mpn_random (ptr, size);
2445       return;
2446     }
2447 
2448   for (i = 0; i < size; i++)
2449     ptr[i] = refmpn_random_limb ();
2450 }
2451 
2452 void
refmpn_random2(mp_ptr ptr,mp_size_t size)2453 refmpn_random2 (mp_ptr ptr, mp_size_t size)
2454 {
2455   mp_size_t  i;
2456   mp_limb_t  bit, mask, limb;
2457   int        run;
2458 
2459   if (GMP_NAIL_BITS == 0)
2460     {
2461       mpn_random2 (ptr, size);
2462       return;
2463     }
2464 
2465 #define RUN_MODULUS  32
2466 
2467   /* start with ones at a random pos in the high limb */
2468   bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
2469   mask = 0;
2470   run = 0;
2471 
2472   for (i = size-1; i >= 0; i--)
2473     {
2474       limb = 0;
2475       do
2476 	{
2477 	  if (run == 0)
2478 	    {
2479 	      run = (refmpn_random_half () % RUN_MODULUS) + 1;
2480 	      mask = ~mask;
2481 	    }
2482 
2483 	  limb |= (bit & mask);
2484 	  bit >>= 1;
2485 	  run--;
2486 	}
2487       while (bit != 0);
2488 
2489       ptr[i] = limb;
2490       bit = GMP_NUMB_HIGHBIT;
2491     }
2492 }
2493 
2494 /* This is a simple bitwise algorithm working high to low across "s" and
2495    testing each time whether setting the bit would make s^2 exceed n.  */
2496 mp_size_t
refmpn_sqrtrem(mp_ptr sp,mp_ptr rp,mp_srcptr np,mp_size_t nsize)2497 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
2498 {
2499   mp_ptr     tp, dp;
2500   mp_size_t  ssize, talloc, tsize, dsize, ret, ilimbs;
2501   unsigned   ibit;
2502   long       i;
2503   mp_limb_t  c;
2504 
2505   ASSERT (nsize >= 0);
2506 
2507   /* If n==0, then s=0 and r=0.  */
2508   if (nsize == 0)
2509     return 0;
2510 
2511   ASSERT (np[nsize - 1] != 0);
2512   ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
2513   ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
2514   ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
2515 
2516   /* root */
2517   ssize = (nsize+1)/2;
2518   refmpn_zero (sp, ssize);
2519 
2520   /* the remainder so far */
2521   dp = refmpn_memdup_limbs (np, nsize);
2522   dsize = nsize;
2523 
2524   /* temporary */
2525   talloc = 2*ssize + 1;
2526   tp = refmpn_malloc_limbs (talloc);
2527 
2528   for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
2529     {
2530       /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
2531 	 is added to it */
2532 
2533       ilimbs = (i+1) / GMP_NUMB_BITS;
2534       ibit = (i+1) % GMP_NUMB_BITS;
2535       refmpn_zero (tp, ilimbs);
2536       c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
2537       tsize = ilimbs + ssize;
2538       tp[tsize] = c;
2539       tsize += (c != 0);
2540 
2541       ilimbs = (2*i) / GMP_NUMB_BITS;
2542       ibit = (2*i) % GMP_NUMB_BITS;
2543       if (ilimbs + 1 > tsize)
2544 	{
2545 	  refmpn_zero_extend (tp, tsize, ilimbs + 1);
2546 	  tsize = ilimbs + 1;
2547 	}
2548       c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
2549 			CNST_LIMB(1) << ibit);
2550       ASSERT (tsize < talloc);
2551       tp[tsize] = c;
2552       tsize += (c != 0);
2553 
2554       if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
2555 	{
2556 	  /* set this bit in s and subtract from the remainder */
2557 	  refmpn_setbit (sp, i);
2558 
2559 	  ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
2560 	  dsize = refmpn_normalize (dp, dsize);
2561 	}
2562     }
2563 
2564   if (rp == NULL)
2565     {
2566       ret = ! refmpn_zero_p (dp, dsize);
2567     }
2568   else
2569     {
2570       ASSERT (dsize == 0 || dp[dsize-1] != 0);
2571       refmpn_copy (rp, dp, dsize);
2572       ret = dsize;
2573     }
2574 
2575   free (dp);
2576   free (tp);
2577   return ret;
2578 }
2579