1 /* Reference mpn functions, designed to be simple, portable and independent
2 of the normal gmp code. Speed isn't a consideration.
3
4 Copyright 1996-2009, 2011-2014 Free Software Foundation, Inc.
5
6 This file is part of the GNU MP Library test suite.
7
8 The GNU MP Library test suite is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 3 of the License,
11 or (at your option) any later version.
12
13 The GNU MP Library test suite is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
16 Public License for more details.
17
18 You should have received a copy of the GNU General Public License along with
19 the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
20
21
22 /* Most routines have assertions representing what the mpn routines are
23 supposed to accept. Many of these reference routines do sensible things
24 outside these ranges (eg. for size==0), but the assertions are present to
25 pick up bad parameters passed here that are about to be passed the same
26 to a real mpn routine being compared. */
27
28 /* always do assertion checking */
29 #define WANT_ASSERT 1
30
31 #include <stdio.h> /* for NULL */
32 #include <stdlib.h> /* for malloc */
33
34 #include "gmp.h"
35 #include "gmp-impl.h"
36 #include "longlong.h"
37
38 #include "tests.h"
39
40
41
42 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
43 in bytes. */
44 int
byte_overlap_p(const void * v_xp,mp_size_t xsize,const void * v_yp,mp_size_t ysize)45 byte_overlap_p (const void *v_xp, mp_size_t xsize,
46 const void *v_yp, mp_size_t ysize)
47 {
48 const char *xp = (const char *) v_xp;
49 const char *yp = (const char *) v_yp;
50
51 ASSERT (xsize >= 0);
52 ASSERT (ysize >= 0);
53
54 /* no wraparounds */
55 ASSERT (xp+xsize >= xp);
56 ASSERT (yp+ysize >= yp);
57
58 if (xp + xsize <= yp)
59 return 0;
60
61 if (yp + ysize <= xp)
62 return 0;
63
64 return 1;
65 }
66
67 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
68 int
refmpn_overlap_p(mp_srcptr xp,mp_size_t xsize,mp_srcptr yp,mp_size_t ysize)69 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
70 {
71 return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES,
72 yp, ysize * GMP_LIMB_BYTES);
73 }
74
75 /* Check overlap for a routine defined to work low to high. */
76 int
refmpn_overlap_low_to_high_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)77 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
78 {
79 return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
80 }
81
82 /* Check overlap for a routine defined to work high to low. */
83 int
refmpn_overlap_high_to_low_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)84 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
85 {
86 return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
87 }
88
89 /* Check overlap for a standard routine requiring equal or separate. */
90 int
refmpn_overlap_fullonly_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)91 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
92 {
93 return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
94 }
95 int
refmpn_overlap_fullonly_two_p(mp_srcptr dst,mp_srcptr src1,mp_srcptr src2,mp_size_t size)96 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
97 mp_size_t size)
98 {
99 return (refmpn_overlap_fullonly_p (dst, src1, size)
100 && refmpn_overlap_fullonly_p (dst, src2, size));
101 }
102
103
104 mp_ptr
refmpn_malloc_limbs(mp_size_t size)105 refmpn_malloc_limbs (mp_size_t size)
106 {
107 mp_ptr p;
108 ASSERT (size >= 0);
109 if (size == 0)
110 size = 1;
111 p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES));
112 ASSERT (p != NULL);
113 return p;
114 }
115
116 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
117 * memory allocated by refmpn_malloc_limbs_aligned. */
118 void
refmpn_free_limbs(mp_ptr p)119 refmpn_free_limbs (mp_ptr p)
120 {
121 free (p);
122 }
123
124 mp_ptr
refmpn_memdup_limbs(mp_srcptr ptr,mp_size_t size)125 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
126 {
127 mp_ptr p;
128 p = refmpn_malloc_limbs (size);
129 refmpn_copyi (p, ptr, size);
130 return p;
131 }
132
133 /* malloc n limbs on a multiple of m bytes boundary */
134 mp_ptr
refmpn_malloc_limbs_aligned(mp_size_t n,size_t m)135 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
136 {
137 return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
138 }
139
140
141 void
refmpn_fill(mp_ptr ptr,mp_size_t size,mp_limb_t value)142 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
143 {
144 mp_size_t i;
145 ASSERT (size >= 0);
146 for (i = 0; i < size; i++)
147 ptr[i] = value;
148 }
149
150 void
refmpn_zero(mp_ptr ptr,mp_size_t size)151 refmpn_zero (mp_ptr ptr, mp_size_t size)
152 {
153 refmpn_fill (ptr, size, CNST_LIMB(0));
154 }
155
156 void
refmpn_zero_extend(mp_ptr ptr,mp_size_t oldsize,mp_size_t newsize)157 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
158 {
159 ASSERT (newsize >= oldsize);
160 refmpn_zero (ptr+oldsize, newsize-oldsize);
161 }
162
163 int
refmpn_zero_p(mp_srcptr ptr,mp_size_t size)164 refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
165 {
166 mp_size_t i;
167 for (i = 0; i < size; i++)
168 if (ptr[i] != 0)
169 return 0;
170 return 1;
171 }
172
173 mp_size_t
refmpn_normalize(mp_srcptr ptr,mp_size_t size)174 refmpn_normalize (mp_srcptr ptr, mp_size_t size)
175 {
176 ASSERT (size >= 0);
177 while (size > 0 && ptr[size-1] == 0)
178 size--;
179 return size;
180 }
181
182 /* the highest one bit in x */
183 mp_limb_t
refmpn_msbone(mp_limb_t x)184 refmpn_msbone (mp_limb_t x)
185 {
186 mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
187
188 while (n != 0)
189 {
190 if (x & n)
191 break;
192 n >>= 1;
193 }
194 return n;
195 }
196
197 /* a mask of the highest one bit plus and all bits below */
198 mp_limb_t
refmpn_msbone_mask(mp_limb_t x)199 refmpn_msbone_mask (mp_limb_t x)
200 {
201 if (x == 0)
202 return 0;
203
204 return (refmpn_msbone (x) << 1) - 1;
205 }
206
207 /* How many digits in the given base will fit in a limb.
208 Notice that the product b is allowed to be equal to the limit
209 2^GMP_NUMB_BITS, this ensures the result for base==2 will be
210 GMP_NUMB_BITS (and similarly other powers of 2). */
211 int
refmpn_chars_per_limb(int base)212 refmpn_chars_per_limb (int base)
213 {
214 mp_limb_t limit[2], b[2];
215 int chars_per_limb;
216
217 ASSERT (base >= 2);
218
219 limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */
220 limit[1] = 1;
221 b[0] = 1; /* b = 1 */
222 b[1] = 0;
223
224 chars_per_limb = 0;
225 for (;;)
226 {
227 if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
228 break;
229 if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
230 break;
231 chars_per_limb++;
232 }
233 return chars_per_limb;
234 }
235
236 /* The biggest value base**n which fits in GMP_NUMB_BITS. */
237 mp_limb_t
refmpn_big_base(int base)238 refmpn_big_base (int base)
239 {
240 int chars_per_limb = refmpn_chars_per_limb (base);
241 int i;
242 mp_limb_t bb;
243
244 ASSERT (base >= 2);
245 bb = 1;
246 for (i = 0; i < chars_per_limb; i++)
247 bb *= base;
248 return bb;
249 }
250
251
252 void
refmpn_setbit(mp_ptr ptr,unsigned long bit)253 refmpn_setbit (mp_ptr ptr, unsigned long bit)
254 {
255 ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
256 }
257
258 void
refmpn_clrbit(mp_ptr ptr,unsigned long bit)259 refmpn_clrbit (mp_ptr ptr, unsigned long bit)
260 {
261 ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
262 }
263
264 #define REFMPN_TSTBIT(ptr,bit) \
265 (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
266
267 int
refmpn_tstbit(mp_srcptr ptr,unsigned long bit)268 refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
269 {
270 return REFMPN_TSTBIT (ptr, bit);
271 }
272
273 unsigned long
refmpn_scan0(mp_srcptr ptr,unsigned long bit)274 refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
275 {
276 while (REFMPN_TSTBIT (ptr, bit) != 0)
277 bit++;
278 return bit;
279 }
280
281 unsigned long
refmpn_scan1(mp_srcptr ptr,unsigned long bit)282 refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
283 {
284 while (REFMPN_TSTBIT (ptr, bit) == 0)
285 bit++;
286 return bit;
287 }
288
289 void
refmpn_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size)290 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
291 {
292 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
293 refmpn_copyi (rp, sp, size);
294 }
295
296 void
refmpn_copyi(mp_ptr rp,mp_srcptr sp,mp_size_t size)297 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
298 {
299 mp_size_t i;
300
301 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
302 ASSERT (size >= 0);
303
304 for (i = 0; i < size; i++)
305 rp[i] = sp[i];
306 }
307
308 void
refmpn_copyd(mp_ptr rp,mp_srcptr sp,mp_size_t size)309 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
310 {
311 mp_size_t i;
312
313 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
314 ASSERT (size >= 0);
315
316 for (i = size-1; i >= 0; i--)
317 rp[i] = sp[i];
318 }
319
320 /* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low
321 zeros to wsize. If x is longer, then copy just the high wsize limbs. */
322 void
refmpn_copy_extend(mp_ptr wp,mp_size_t wsize,mp_srcptr xp,mp_size_t xsize)323 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
324 {
325 ASSERT (wsize >= 0);
326 ASSERT (xsize >= 0);
327
328 /* high part of x if x bigger than w */
329 if (xsize > wsize)
330 {
331 xp += xsize - wsize;
332 xsize = wsize;
333 }
334
335 refmpn_copy (wp + wsize-xsize, xp, xsize);
336 refmpn_zero (wp, wsize-xsize);
337 }
338
339 int
refmpn_cmp(mp_srcptr xp,mp_srcptr yp,mp_size_t size)340 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
341 {
342 mp_size_t i;
343
344 ASSERT (size >= 1);
345 ASSERT_MPN (xp, size);
346 ASSERT_MPN (yp, size);
347
348 for (i = size-1; i >= 0; i--)
349 {
350 if (xp[i] > yp[i]) return 1;
351 if (xp[i] < yp[i]) return -1;
352 }
353 return 0;
354 }
355
356 int
refmpn_cmp_allowzero(mp_srcptr xp,mp_srcptr yp,mp_size_t size)357 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
358 {
359 if (size == 0)
360 return 0;
361 else
362 return refmpn_cmp (xp, yp, size);
363 }
364
365 int
refmpn_cmp_twosizes(mp_srcptr xp,mp_size_t xsize,mp_srcptr yp,mp_size_t ysize)366 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
367 mp_srcptr yp, mp_size_t ysize)
368 {
369 int opp, cmp;
370
371 ASSERT_MPN (xp, xsize);
372 ASSERT_MPN (yp, ysize);
373
374 opp = (xsize < ysize);
375 if (opp)
376 MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
377
378 if (! refmpn_zero_p (xp+ysize, xsize-ysize))
379 cmp = 1;
380 else
381 cmp = refmpn_cmp (xp, yp, ysize);
382
383 return (opp ? -cmp : cmp);
384 }
385
386 int
refmpn_equal_anynail(mp_srcptr xp,mp_srcptr yp,mp_size_t size)387 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
388 {
389 mp_size_t i;
390 ASSERT (size >= 0);
391
392 for (i = 0; i < size; i++)
393 if (xp[i] != yp[i])
394 return 0;
395 return 1;
396 }
397
398
399 #define LOGOPS(operation) \
400 { \
401 mp_size_t i; \
402 \
403 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
404 ASSERT (size >= 1); \
405 ASSERT_MPN (s1p, size); \
406 ASSERT_MPN (s2p, size); \
407 \
408 for (i = 0; i < size; i++) \
409 rp[i] = operation; \
410 }
411
412 void
refmpn_and_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)413 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
414 {
415 LOGOPS (s1p[i] & s2p[i]);
416 }
417 void
refmpn_andn_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)418 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
419 {
420 LOGOPS (s1p[i] & ~s2p[i]);
421 }
422 void
refmpn_nand_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)423 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
424 {
425 LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
426 }
427 void
refmpn_ior_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)428 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
429 {
430 LOGOPS (s1p[i] | s2p[i]);
431 }
432 void
refmpn_iorn_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)433 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
434 {
435 LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
436 }
437 void
refmpn_nior_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)438 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
439 {
440 LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
441 }
442 void
refmpn_xor_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)443 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
444 {
445 LOGOPS (s1p[i] ^ s2p[i]);
446 }
447 void
refmpn_xnor_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)448 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
449 {
450 LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
451 }
452
453
454 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */
455 void
refmpn_sub_ddmmss(mp_limb_t * dh,mp_limb_t * dl,mp_limb_t mh,mp_limb_t ml,mp_limb_t sh,mp_limb_t sl)456 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
457 mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
458 {
459 *dl = ml - sl;
460 *dh = mh - sh - (ml < sl);
461 }
462
463
464 /* set *w to x+y, return 0 or 1 carry */
465 mp_limb_t
ref_addc_limb(mp_limb_t * w,mp_limb_t x,mp_limb_t y)466 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
467 {
468 mp_limb_t sum, cy;
469
470 ASSERT_LIMB (x);
471 ASSERT_LIMB (y);
472
473 sum = x + y;
474 #if GMP_NAIL_BITS == 0
475 *w = sum;
476 cy = (sum < x);
477 #else
478 *w = sum & GMP_NUMB_MASK;
479 cy = (sum >> GMP_NUMB_BITS);
480 #endif
481 return cy;
482 }
483
484 /* set *w to x-y, return 0 or 1 borrow */
485 mp_limb_t
ref_subc_limb(mp_limb_t * w,mp_limb_t x,mp_limb_t y)486 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
487 {
488 mp_limb_t diff, cy;
489
490 ASSERT_LIMB (x);
491 ASSERT_LIMB (y);
492
493 diff = x - y;
494 #if GMP_NAIL_BITS == 0
495 *w = diff;
496 cy = (diff > x);
497 #else
498 *w = diff & GMP_NUMB_MASK;
499 cy = (diff >> GMP_NUMB_BITS) & 1;
500 #endif
501 return cy;
502 }
503
504 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
505 mp_limb_t
adc(mp_limb_t * w,mp_limb_t x,mp_limb_t y,mp_limb_t c)506 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
507 {
508 mp_limb_t r;
509
510 ASSERT_LIMB (x);
511 ASSERT_LIMB (y);
512 ASSERT (c == 0 || c == 1);
513
514 r = ref_addc_limb (w, x, y);
515 return r + ref_addc_limb (w, *w, c);
516 }
517
518 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
519 mp_limb_t
sbb(mp_limb_t * w,mp_limb_t x,mp_limb_t y,mp_limb_t c)520 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
521 {
522 mp_limb_t r;
523
524 ASSERT_LIMB (x);
525 ASSERT_LIMB (y);
526 ASSERT (c == 0 || c == 1);
527
528 r = ref_subc_limb (w, x, y);
529 return r + ref_subc_limb (w, *w, c);
530 }
531
532
533 #define AORS_1(operation) \
534 { \
535 mp_size_t i; \
536 \
537 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
538 ASSERT (size >= 1); \
539 ASSERT_MPN (sp, size); \
540 ASSERT_LIMB (n); \
541 \
542 for (i = 0; i < size; i++) \
543 n = operation (&rp[i], sp[i], n); \
544 return n; \
545 }
546
547 mp_limb_t
refmpn_add_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t n)548 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
549 {
550 AORS_1 (ref_addc_limb);
551 }
552 mp_limb_t
refmpn_sub_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t n)553 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
554 {
555 AORS_1 (ref_subc_limb);
556 }
557
558 #define AORS_NC(operation) \
559 { \
560 mp_size_t i; \
561 \
562 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
563 ASSERT (carry == 0 || carry == 1); \
564 ASSERT (size >= 1); \
565 ASSERT_MPN (s1p, size); \
566 ASSERT_MPN (s2p, size); \
567 \
568 for (i = 0; i < size; i++) \
569 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
570 return carry; \
571 }
572
573 mp_limb_t
refmpn_add_nc(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)574 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
575 mp_limb_t carry)
576 {
577 AORS_NC (adc);
578 }
579 mp_limb_t
refmpn_sub_nc(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)580 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
581 mp_limb_t carry)
582 {
583 AORS_NC (sbb);
584 }
585
586
587 mp_limb_t
refmpn_add_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)588 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
589 {
590 return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
591 }
592 mp_limb_t
refmpn_sub_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)593 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
594 {
595 return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
596 }
597
598 mp_limb_t
refmpn_cnd_add_n(mp_limb_t cnd,mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)599 refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
600 {
601 if (cnd != 0)
602 return refmpn_add_n (rp, s1p, s2p, size);
603 else
604 {
605 refmpn_copyi (rp, s1p, size);
606 return 0;
607 }
608 }
609 mp_limb_t
refmpn_cnd_sub_n(mp_limb_t cnd,mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)610 refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
611 {
612 if (cnd != 0)
613 return refmpn_sub_n (rp, s1p, s2p, size);
614 else
615 {
616 refmpn_copyi (rp, s1p, size);
617 return 0;
618 }
619 }
620
621
622 #define AORS_ERR1_N(operation) \
623 { \
624 mp_size_t i; \
625 mp_limb_t carry2; \
626 \
627 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
628 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
629 ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \
630 ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \
631 ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \
632 ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \
633 ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \
634 \
635 ASSERT (carry == 0 || carry == 1); \
636 ASSERT (size >= 1); \
637 ASSERT_MPN (s1p, size); \
638 ASSERT_MPN (s2p, size); \
639 ASSERT_MPN (yp, size); \
640 \
641 ep[0] = ep[1] = CNST_LIMB(0); \
642 \
643 for (i = 0; i < size; i++) \
644 { \
645 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
646 if (carry == 1) \
647 { \
648 carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \
649 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
650 ASSERT (carry2 == 0); \
651 } \
652 } \
653 return carry; \
654 }
655
656 mp_limb_t
refmpn_add_err1_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr yp,mp_size_t size,mp_limb_t carry)657 refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
658 mp_ptr ep, mp_srcptr yp,
659 mp_size_t size, mp_limb_t carry)
660 {
661 AORS_ERR1_N (adc);
662 }
663 mp_limb_t
refmpn_sub_err1_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr yp,mp_size_t size,mp_limb_t carry)664 refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
665 mp_ptr ep, mp_srcptr yp,
666 mp_size_t size, mp_limb_t carry)
667 {
668 AORS_ERR1_N (sbb);
669 }
670
671
672 #define AORS_ERR2_N(operation) \
673 { \
674 mp_size_t i; \
675 mp_limb_t carry2; \
676 \
677 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
678 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
679 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \
680 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \
681 ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \
682 ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \
683 ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \
684 ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \
685 ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \
686 \
687 ASSERT (carry == 0 || carry == 1); \
688 ASSERT (size >= 1); \
689 ASSERT_MPN (s1p, size); \
690 ASSERT_MPN (s2p, size); \
691 ASSERT_MPN (y1p, size); \
692 ASSERT_MPN (y2p, size); \
693 \
694 ep[0] = ep[1] = CNST_LIMB(0); \
695 ep[2] = ep[3] = CNST_LIMB(0); \
696 \
697 for (i = 0; i < size; i++) \
698 { \
699 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
700 if (carry == 1) \
701 { \
702 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \
703 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
704 ASSERT (carry2 == 0); \
705 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \
706 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \
707 ASSERT (carry2 == 0); \
708 } \
709 } \
710 return carry; \
711 }
712
713 mp_limb_t
refmpn_add_err2_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_size_t size,mp_limb_t carry)714 refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
715 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
716 mp_size_t size, mp_limb_t carry)
717 {
718 AORS_ERR2_N (adc);
719 }
720 mp_limb_t
refmpn_sub_err2_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_size_t size,mp_limb_t carry)721 refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
722 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
723 mp_size_t size, mp_limb_t carry)
724 {
725 AORS_ERR2_N (sbb);
726 }
727
728
729 #define AORS_ERR3_N(operation) \
730 { \
731 mp_size_t i; \
732 mp_limb_t carry2; \
733 \
734 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
735 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
736 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \
737 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \
738 ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \
739 ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \
740 ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \
741 ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \
742 ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \
743 ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \
744 ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \
745 \
746 ASSERT (carry == 0 || carry == 1); \
747 ASSERT (size >= 1); \
748 ASSERT_MPN (s1p, size); \
749 ASSERT_MPN (s2p, size); \
750 ASSERT_MPN (y1p, size); \
751 ASSERT_MPN (y2p, size); \
752 ASSERT_MPN (y3p, size); \
753 \
754 ep[0] = ep[1] = CNST_LIMB(0); \
755 ep[2] = ep[3] = CNST_LIMB(0); \
756 ep[4] = ep[5] = CNST_LIMB(0); \
757 \
758 for (i = 0; i < size; i++) \
759 { \
760 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
761 if (carry == 1) \
762 { \
763 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \
764 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
765 ASSERT (carry2 == 0); \
766 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \
767 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \
768 ASSERT (carry2 == 0); \
769 carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \
770 carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \
771 ASSERT (carry2 == 0); \
772 } \
773 } \
774 return carry; \
775 }
776
777 mp_limb_t
refmpn_add_err3_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_srcptr y3p,mp_size_t size,mp_limb_t carry)778 refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
779 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
780 mp_size_t size, mp_limb_t carry)
781 {
782 AORS_ERR3_N (adc);
783 }
784 mp_limb_t
refmpn_sub_err3_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_srcptr y3p,mp_size_t size,mp_limb_t carry)785 refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
786 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
787 mp_size_t size, mp_limb_t carry)
788 {
789 AORS_ERR3_N (sbb);
790 }
791
792
793 mp_limb_t
refmpn_addlsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)794 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
795 mp_size_t n, unsigned int s)
796 {
797 mp_limb_t cy;
798 mp_ptr tp;
799
800 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
801 ASSERT (n >= 1);
802 ASSERT (0 < s && s < GMP_NUMB_BITS);
803 ASSERT_MPN (up, n);
804 ASSERT_MPN (vp, n);
805
806 tp = refmpn_malloc_limbs (n);
807 cy = refmpn_lshift (tp, vp, n, s);
808 cy += refmpn_add_n (rp, up, tp, n);
809 free (tp);
810 return cy;
811 }
812 mp_limb_t
refmpn_addlsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)813 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
814 {
815 return refmpn_addlsh_n (rp, up, vp, n, 1);
816 }
817 mp_limb_t
refmpn_addlsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)818 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
819 {
820 return refmpn_addlsh_n (rp, up, vp, n, 2);
821 }
822 mp_limb_t
refmpn_addlsh_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)823 refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
824 {
825 return refmpn_addlsh_n (rp, rp, vp, n, s);
826 }
827 mp_limb_t
refmpn_addlsh1_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)828 refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
829 {
830 return refmpn_addlsh_n (rp, rp, vp, n, 1);
831 }
832 mp_limb_t
refmpn_addlsh2_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)833 refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
834 {
835 return refmpn_addlsh_n (rp, rp, vp, n, 2);
836 }
837 mp_limb_t
refmpn_addlsh_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)838 refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
839 {
840 return refmpn_addlsh_n (rp, vp, rp, n, s);
841 }
842 mp_limb_t
refmpn_addlsh1_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)843 refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
844 {
845 return refmpn_addlsh_n (rp, vp, rp, n, 1);
846 }
847 mp_limb_t
refmpn_addlsh2_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)848 refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
849 {
850 return refmpn_addlsh_n (rp, vp, rp, n, 2);
851 }
852 mp_limb_t
refmpn_addlsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_t carry)853 refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
854 mp_size_t n, unsigned int s, mp_limb_t carry)
855 {
856 mp_limb_t cy;
857
858 ASSERT (carry <= (CNST_LIMB(1) << s));
859
860 cy = refmpn_addlsh_n (rp, up, vp, n, s);
861 cy += refmpn_add_1 (rp, rp, n, carry);
862 return cy;
863 }
864 mp_limb_t
refmpn_addlsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)865 refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
866 {
867 return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
868 }
869 mp_limb_t
refmpn_addlsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)870 refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
871 {
872 return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
873 }
874
875 mp_limb_t
refmpn_sublsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)876 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
877 mp_size_t n, unsigned int s)
878 {
879 mp_limb_t cy;
880 mp_ptr tp;
881
882 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
883 ASSERT (n >= 1);
884 ASSERT (0 < s && s < GMP_NUMB_BITS);
885 ASSERT_MPN (up, n);
886 ASSERT_MPN (vp, n);
887
888 tp = refmpn_malloc_limbs (n);
889 cy = mpn_lshift (tp, vp, n, s);
890 cy += mpn_sub_n (rp, up, tp, n);
891 free (tp);
892 return cy;
893 }
894 mp_limb_t
refmpn_sublsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)895 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
896 {
897 return refmpn_sublsh_n (rp, up, vp, n, 1);
898 }
899 mp_limb_t
refmpn_sublsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)900 refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
901 {
902 return refmpn_sublsh_n (rp, up, vp, n, 2);
903 }
904 mp_limb_t
refmpn_sublsh_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)905 refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
906 {
907 return refmpn_sublsh_n (rp, rp, vp, n, s);
908 }
909 mp_limb_t
refmpn_sublsh1_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)910 refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
911 {
912 return refmpn_sublsh_n (rp, rp, vp, n, 1);
913 }
914 mp_limb_t
refmpn_sublsh2_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)915 refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
916 {
917 return refmpn_sublsh_n (rp, rp, vp, n, 2);
918 }
919 mp_limb_t
refmpn_sublsh_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)920 refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
921 {
922 return refmpn_sublsh_n (rp, vp, rp, n, s);
923 }
924 mp_limb_t
refmpn_sublsh1_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)925 refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
926 {
927 return refmpn_sublsh_n (rp, vp, rp, n, 1);
928 }
929 mp_limb_t
refmpn_sublsh2_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)930 refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
931 {
932 return refmpn_sublsh_n (rp, vp, rp, n, 2);
933 }
934 mp_limb_t
refmpn_sublsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_t carry)935 refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
936 mp_size_t n, unsigned int s, mp_limb_t carry)
937 {
938 mp_limb_t cy;
939
940 ASSERT (carry <= (CNST_LIMB(1) << s));
941
942 cy = refmpn_sublsh_n (rp, up, vp, n, s);
943 cy += refmpn_sub_1 (rp, rp, n, carry);
944 return cy;
945 }
946 mp_limb_t
refmpn_sublsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)947 refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
948 {
949 return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
950 }
951 mp_limb_t
refmpn_sublsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)952 refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
953 {
954 return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
955 }
956
957 mp_limb_signed_t
refmpn_rsblsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)958 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
959 mp_size_t n, unsigned int s)
960 {
961 mp_limb_signed_t cy;
962 mp_ptr tp;
963
964 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
965 ASSERT (n >= 1);
966 ASSERT (0 < s && s < GMP_NUMB_BITS);
967 ASSERT_MPN (up, n);
968 ASSERT_MPN (vp, n);
969
970 tp = refmpn_malloc_limbs (n);
971 cy = mpn_lshift (tp, vp, n, s);
972 cy -= mpn_sub_n (rp, tp, up, n);
973 free (tp);
974 return cy;
975 }
976 mp_limb_signed_t
refmpn_rsblsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)977 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
978 {
979 return refmpn_rsblsh_n (rp, up, vp, n, 1);
980 }
981 mp_limb_signed_t
refmpn_rsblsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)982 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
983 {
984 return refmpn_rsblsh_n (rp, up, vp, n, 2);
985 }
986 mp_limb_signed_t
refmpn_rsblsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_signed_t carry)987 refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
988 mp_size_t n, unsigned int s, mp_limb_signed_t carry)
989 {
990 mp_limb_signed_t cy;
991
992 ASSERT (carry == -1 || (carry >> s) == 0);
993
994 cy = refmpn_rsblsh_n (rp, up, vp, n, s);
995 if (carry > 0)
996 cy += refmpn_add_1 (rp, rp, n, carry);
997 else
998 cy -= refmpn_sub_1 (rp, rp, n, -carry);
999 return cy;
1000 }
1001 mp_limb_signed_t
refmpn_rsblsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_signed_t carry)1002 refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1003 {
1004 return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
1005 }
1006 mp_limb_signed_t
refmpn_rsblsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_signed_t carry)1007 refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1008 {
1009 return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
1010 }
1011
1012 mp_limb_t
refmpn_rsh1add_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1013 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1014 {
1015 mp_limb_t cya, cys;
1016
1017 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1018 ASSERT (n >= 1);
1019 ASSERT_MPN (up, n);
1020 ASSERT_MPN (vp, n);
1021
1022 cya = mpn_add_n (rp, up, vp, n);
1023 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1024 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1025 return cys;
1026 }
1027 mp_limb_t
refmpn_rsh1sub_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1028 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1029 {
1030 mp_limb_t cya, cys;
1031
1032 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1033 ASSERT (n >= 1);
1034 ASSERT_MPN (up, n);
1035 ASSERT_MPN (vp, n);
1036
1037 cya = mpn_sub_n (rp, up, vp, n);
1038 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1039 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1040 return cys;
1041 }
1042
1043 /* Twos complement, return borrow. */
1044 mp_limb_t
refmpn_neg(mp_ptr dst,mp_srcptr src,mp_size_t size)1045 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
1046 {
1047 mp_ptr zeros;
1048 mp_limb_t ret;
1049
1050 ASSERT (size >= 1);
1051
1052 zeros = refmpn_malloc_limbs (size);
1053 refmpn_fill (zeros, size, CNST_LIMB(0));
1054 ret = refmpn_sub_n (dst, zeros, src, size);
1055 free (zeros);
1056 return ret;
1057 }
1058
1059
1060 #define AORS(aors_n, aors_1) \
1061 { \
1062 mp_limb_t c; \
1063 ASSERT (s1size >= s2size); \
1064 ASSERT (s2size >= 1); \
1065 c = aors_n (rp, s1p, s2p, s2size); \
1066 if (s1size-s2size != 0) \
1067 c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \
1068 return c; \
1069 }
1070 mp_limb_t
refmpn_add(mp_ptr rp,mp_srcptr s1p,mp_size_t s1size,mp_srcptr s2p,mp_size_t s2size)1071 refmpn_add (mp_ptr rp,
1072 mp_srcptr s1p, mp_size_t s1size,
1073 mp_srcptr s2p, mp_size_t s2size)
1074 {
1075 AORS (refmpn_add_n, refmpn_add_1);
1076 }
1077 mp_limb_t
refmpn_sub(mp_ptr rp,mp_srcptr s1p,mp_size_t s1size,mp_srcptr s2p,mp_size_t s2size)1078 refmpn_sub (mp_ptr rp,
1079 mp_srcptr s1p, mp_size_t s1size,
1080 mp_srcptr s2p, mp_size_t s2size)
1081 {
1082 AORS (refmpn_sub_n, refmpn_sub_1);
1083 }
1084
1085
1086 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
1087 #define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2)
1088
1089 #define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
1090 #define HIGHMASK SHIFTHIGH(LOWMASK)
1091
1092 #define LOWPART(x) ((x) & LOWMASK)
1093 #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK)
1094
1095 /* Set return:*lo to x*y, using full limbs not nails. */
1096 mp_limb_t
refmpn_umul_ppmm(mp_limb_t * lo,mp_limb_t x,mp_limb_t y)1097 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
1098 {
1099 mp_limb_t hi, s;
1100
1101 *lo = LOWPART(x) * LOWPART(y);
1102 hi = HIGHPART(x) * HIGHPART(y);
1103
1104 s = LOWPART(x) * HIGHPART(y);
1105 hi += HIGHPART(s);
1106 s = SHIFTHIGH(LOWPART(s));
1107 *lo += s;
1108 hi += (*lo < s);
1109
1110 s = HIGHPART(x) * LOWPART(y);
1111 hi += HIGHPART(s);
1112 s = SHIFTHIGH(LOWPART(s));
1113 *lo += s;
1114 hi += (*lo < s);
1115
1116 return hi;
1117 }
1118
1119 mp_limb_t
refmpn_umul_ppmm_r(mp_limb_t x,mp_limb_t y,mp_limb_t * lo)1120 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
1121 {
1122 return refmpn_umul_ppmm (lo, x, y);
1123 }
1124
1125 mp_limb_t
refmpn_mul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1126 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
1127 mp_limb_t carry)
1128 {
1129 mp_size_t i;
1130 mp_limb_t hi, lo;
1131
1132 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1133 ASSERT (size >= 1);
1134 ASSERT_MPN (sp, size);
1135 ASSERT_LIMB (multiplier);
1136 ASSERT_LIMB (carry);
1137
1138 multiplier <<= GMP_NAIL_BITS;
1139 for (i = 0; i < size; i++)
1140 {
1141 hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
1142 lo >>= GMP_NAIL_BITS;
1143 ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
1144 rp[i] = lo;
1145 carry = hi;
1146 }
1147 return carry;
1148 }
1149
1150 mp_limb_t
refmpn_mul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1151 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1152 {
1153 return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1154 }
1155
1156
1157 mp_limb_t
refmpn_mul_N(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_srcptr mult,mp_size_t msize)1158 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1159 mp_srcptr mult, mp_size_t msize)
1160 {
1161 mp_ptr src_copy;
1162 mp_limb_t ret;
1163 mp_size_t i;
1164
1165 ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
1166 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1167 ASSERT (size >= msize);
1168 ASSERT_MPN (mult, msize);
1169
1170 /* in case dst==src */
1171 src_copy = refmpn_malloc_limbs (size);
1172 refmpn_copyi (src_copy, src, size);
1173 src = src_copy;
1174
1175 dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
1176 for (i = 1; i < msize-1; i++)
1177 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1178 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1179
1180 free (src_copy);
1181 return ret;
1182 }
1183
1184 mp_limb_t
refmpn_mul_2(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1185 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1186 {
1187 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
1188 }
1189 mp_limb_t
refmpn_mul_3(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1190 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1191 {
1192 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
1193 }
1194 mp_limb_t
refmpn_mul_4(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1195 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1196 {
1197 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
1198 }
1199 mp_limb_t
refmpn_mul_5(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1200 refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1201 {
1202 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
1203 }
1204 mp_limb_t
refmpn_mul_6(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1205 refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1206 {
1207 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
1208 }
1209
1210 #define AORSMUL_1C(operation_n) \
1211 { \
1212 mp_ptr p; \
1213 mp_limb_t ret; \
1214 \
1215 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
1216 \
1217 p = refmpn_malloc_limbs (size); \
1218 ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \
1219 ret += operation_n (rp, rp, p, size); \
1220 \
1221 free (p); \
1222 return ret; \
1223 }
1224
1225 mp_limb_t
refmpn_addmul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1226 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1227 mp_limb_t multiplier, mp_limb_t carry)
1228 {
1229 AORSMUL_1C (refmpn_add_n);
1230 }
1231 mp_limb_t
refmpn_submul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1232 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1233 mp_limb_t multiplier, mp_limb_t carry)
1234 {
1235 AORSMUL_1C (refmpn_sub_n);
1236 }
1237
1238
1239 mp_limb_t
refmpn_addmul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1240 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1241 {
1242 return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1243 }
1244 mp_limb_t
refmpn_submul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1245 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1246 {
1247 return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1248 }
1249
1250
1251 mp_limb_t
refmpn_addmul_N(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_srcptr mult,mp_size_t msize)1252 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1253 mp_srcptr mult, mp_size_t msize)
1254 {
1255 mp_ptr src_copy;
1256 mp_limb_t ret;
1257 mp_size_t i;
1258
1259 ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
1260 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1261 ASSERT (size >= msize);
1262 ASSERT_MPN (mult, msize);
1263
1264 /* in case dst==src */
1265 src_copy = refmpn_malloc_limbs (size);
1266 refmpn_copyi (src_copy, src, size);
1267 src = src_copy;
1268
1269 for (i = 0; i < msize-1; i++)
1270 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1271 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1272
1273 free (src_copy);
1274 return ret;
1275 }
1276
1277 mp_limb_t
refmpn_addmul_2(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1278 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1279 {
1280 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
1281 }
1282 mp_limb_t
refmpn_addmul_3(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1283 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1284 {
1285 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
1286 }
1287 mp_limb_t
refmpn_addmul_4(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1288 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1289 {
1290 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
1291 }
1292 mp_limb_t
refmpn_addmul_5(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1293 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1294 {
1295 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
1296 }
1297 mp_limb_t
refmpn_addmul_6(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1298 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1299 {
1300 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
1301 }
1302 mp_limb_t
refmpn_addmul_7(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1303 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1304 {
1305 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
1306 }
1307 mp_limb_t
refmpn_addmul_8(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1308 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1309 {
1310 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
1311 }
1312
1313 mp_limb_t
refmpn_add_n_sub_nc(mp_ptr r1p,mp_ptr r2p,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)1314 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
1315 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
1316 mp_limb_t carry)
1317 {
1318 mp_ptr p;
1319 mp_limb_t acy, scy;
1320
1321 /* Destinations can't overlap. */
1322 ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
1323 ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
1324 ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
1325 ASSERT (size >= 1);
1326
1327 /* in case r1p==s1p or r1p==s2p */
1328 p = refmpn_malloc_limbs (size);
1329
1330 acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
1331 scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
1332 refmpn_copyi (r1p, p, size);
1333
1334 free (p);
1335 return 2 * acy + scy;
1336 }
1337
1338 mp_limb_t
refmpn_add_n_sub_n(mp_ptr r1p,mp_ptr r2p,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)1339 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
1340 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1341 {
1342 return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
1343 }
1344
1345
1346 /* Right shift hi,lo and return the low limb of the result.
1347 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1348 mp_limb_t
rshift_make(mp_limb_t hi,mp_limb_t lo,unsigned shift)1349 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1350 {
1351 ASSERT (shift < GMP_NUMB_BITS);
1352 if (shift == 0)
1353 return lo;
1354 else
1355 return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
1356 }
1357
1358 /* Left shift hi,lo and return the high limb of the result.
1359 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1360 mp_limb_t
lshift_make(mp_limb_t hi,mp_limb_t lo,unsigned shift)1361 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1362 {
1363 ASSERT (shift < GMP_NUMB_BITS);
1364 if (shift == 0)
1365 return hi;
1366 else
1367 return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
1368 }
1369
1370
1371 mp_limb_t
refmpn_rshift(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1372 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1373 {
1374 mp_limb_t ret;
1375 mp_size_t i;
1376
1377 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1378 ASSERT (size >= 1);
1379 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1380 ASSERT_MPN (sp, size);
1381
1382 ret = rshift_make (sp[0], CNST_LIMB(0), shift);
1383
1384 for (i = 0; i < size-1; i++)
1385 rp[i] = rshift_make (sp[i+1], sp[i], shift);
1386
1387 rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
1388 return ret;
1389 }
1390
1391 mp_limb_t
refmpn_lshift(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1392 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1393 {
1394 mp_limb_t ret;
1395 mp_size_t i;
1396
1397 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1398 ASSERT (size >= 1);
1399 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1400 ASSERT_MPN (sp, size);
1401
1402 ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
1403
1404 for (i = size-2; i >= 0; i--)
1405 rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
1406
1407 rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
1408 return ret;
1409 }
1410
1411 void
refmpn_com(mp_ptr rp,mp_srcptr sp,mp_size_t size)1412 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1413 {
1414 mp_size_t i;
1415
1416 /* We work downwards since mpn_lshiftc needs that. */
1417 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1418
1419 for (i = size - 1; i >= 0; i--)
1420 rp[i] = (~sp[i]) & GMP_NUMB_MASK;
1421 }
1422
1423 mp_limb_t
refmpn_lshiftc(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1424 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1425 {
1426 mp_limb_t res;
1427
1428 /* No asserts here, refmpn_lshift will assert what we need. */
1429
1430 res = refmpn_lshift (rp, sp, size, shift);
1431 refmpn_com (rp, rp, size);
1432 return res;
1433 }
1434
1435 /* accepting shift==0 and doing a plain copyi or copyd in that case */
1436 mp_limb_t
refmpn_rshift_or_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1437 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1438 {
1439 if (shift == 0)
1440 {
1441 refmpn_copyi (rp, sp, size);
1442 return 0;
1443 }
1444 else
1445 {
1446 return refmpn_rshift (rp, sp, size, shift);
1447 }
1448 }
1449 mp_limb_t
refmpn_lshift_or_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1450 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1451 {
1452 if (shift == 0)
1453 {
1454 refmpn_copyd (rp, sp, size);
1455 return 0;
1456 }
1457 else
1458 {
1459 return refmpn_lshift (rp, sp, size, shift);
1460 }
1461 }
1462
1463 /* accepting size==0 too */
1464 mp_limb_t
refmpn_rshift_or_copy_any(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1465 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1466 unsigned shift)
1467 {
1468 return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
1469 }
1470 mp_limb_t
refmpn_lshift_or_copy_any(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1471 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1472 unsigned shift)
1473 {
1474 return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
1475 }
1476
1477 /* Divide h,l by d, return quotient, store remainder to *rp.
1478 Operates on full limbs, not nails.
1479 Must have h < d.
1480 __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
1481 mp_limb_t
refmpn_udiv_qrnnd(mp_limb_t * rp,mp_limb_t h,mp_limb_t l,mp_limb_t d)1482 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
1483 {
1484 mp_limb_t q, r;
1485 int n;
1486
1487 ASSERT (d != 0);
1488 ASSERT (h < d);
1489
1490 #if 0
1491 udiv_qrnnd (q, r, h, l, d);
1492 *rp = r;
1493 return q;
1494 #endif
1495
1496 n = refmpn_count_leading_zeros (d);
1497 d <<= n;
1498
1499 if (n != 0)
1500 {
1501 h = (h << n) | (l >> (GMP_LIMB_BITS - n));
1502 l <<= n;
1503 }
1504
1505 __udiv_qrnnd_c (q, r, h, l, d);
1506 r >>= n;
1507 *rp = r;
1508 return q;
1509 }
1510
1511 mp_limb_t
refmpn_udiv_qrnnd_r(mp_limb_t h,mp_limb_t l,mp_limb_t d,mp_limb_t * rp)1512 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
1513 {
1514 return refmpn_udiv_qrnnd (rp, h, l, d);
1515 }
1516
1517 /* This little subroutine avoids some bad code generation from i386 gcc 3.0
1518 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */
1519 static mp_limb_t
refmpn_divmod_1c_workaround(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1520 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1521 mp_limb_t divisor, mp_limb_t carry)
1522 {
1523 mp_size_t i;
1524 mp_limb_t rem[1];
1525 for (i = size-1; i >= 0; i--)
1526 {
1527 rp[i] = refmpn_udiv_qrnnd (rem, carry,
1528 sp[i] << GMP_NAIL_BITS,
1529 divisor << GMP_NAIL_BITS);
1530 carry = *rem >> GMP_NAIL_BITS;
1531 }
1532 return carry;
1533 }
1534
1535 mp_limb_t
refmpn_divmod_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1536 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1537 mp_limb_t divisor, mp_limb_t carry)
1538 {
1539 mp_ptr sp_orig;
1540 mp_ptr prod;
1541 mp_limb_t carry_orig;
1542
1543 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1544 ASSERT (size >= 0);
1545 ASSERT (carry < divisor);
1546 ASSERT_MPN (sp, size);
1547 ASSERT_LIMB (divisor);
1548 ASSERT_LIMB (carry);
1549
1550 if (size == 0)
1551 return carry;
1552
1553 sp_orig = refmpn_memdup_limbs (sp, size);
1554 prod = refmpn_malloc_limbs (size);
1555 carry_orig = carry;
1556
1557 carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
1558
1559 /* check by multiplying back */
1560 #if 0
1561 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
1562 size, divisor, carry_orig, carry);
1563 mpn_trace("s",sp_copy,size);
1564 mpn_trace("r",rp,size);
1565 printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
1566 mpn_trace("p",prod,size);
1567 #endif
1568 ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
1569 ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
1570 free (sp_orig);
1571 free (prod);
1572
1573 return carry;
1574 }
1575
1576 mp_limb_t
refmpn_divmod_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1577 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1578 {
1579 return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
1580 }
1581
1582
1583 mp_limb_t
refmpn_mod_1c(mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1584 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1585 mp_limb_t carry)
1586 {
1587 mp_ptr p = refmpn_malloc_limbs (size);
1588 carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
1589 free (p);
1590 return carry;
1591 }
1592
1593 mp_limb_t
refmpn_mod_1(mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1594 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1595 {
1596 return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
1597 }
1598
1599 mp_limb_t
refmpn_preinv_mod_1(mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t inverse)1600 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1601 mp_limb_t inverse)
1602 {
1603 ASSERT (divisor & GMP_NUMB_HIGHBIT);
1604 ASSERT (inverse == refmpn_invert_limb (divisor));
1605 return refmpn_mod_1 (sp, size, divisor);
1606 }
1607
1608 /* This implementation will be rather slow, but has the advantage of being
1609 in a different style than the libgmp versions. */
1610 mp_limb_t
refmpn_mod_34lsub1(mp_srcptr p,mp_size_t n)1611 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
1612 {
1613 ASSERT ((GMP_NUMB_BITS % 4) == 0);
1614 return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
1615 }
1616
1617
1618 mp_limb_t
refmpn_divrem_1c(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1619 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
1620 mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1621 mp_limb_t carry)
1622 {
1623 mp_ptr z;
1624
1625 z = refmpn_malloc_limbs (xsize);
1626 refmpn_fill (z, xsize, CNST_LIMB(0));
1627
1628 carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
1629 carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
1630
1631 free (z);
1632 return carry;
1633 }
1634
1635 mp_limb_t
refmpn_divrem_1(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1636 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
1637 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1638 {
1639 return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
1640 }
1641
1642 mp_limb_t
refmpn_preinv_divrem_1(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t inverse,unsigned shift)1643 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
1644 mp_srcptr sp, mp_size_t size,
1645 mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
1646 {
1647 ASSERT (size >= 0);
1648 ASSERT (shift == refmpn_count_leading_zeros (divisor));
1649 ASSERT (inverse == refmpn_invert_limb (divisor << shift));
1650
1651 return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
1652 }
1653
1654 mp_limb_t
refmpn_divrem_2(mp_ptr qp,mp_size_t qxn,mp_ptr np,mp_size_t nn,mp_srcptr dp)1655 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
1656 mp_ptr np, mp_size_t nn,
1657 mp_srcptr dp)
1658 {
1659 mp_ptr tp;
1660 mp_limb_t qh;
1661
1662 tp = refmpn_malloc_limbs (nn + qxn);
1663 refmpn_zero (tp, qxn);
1664 refmpn_copyi (tp + qxn, np, nn);
1665 qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
1666 refmpn_copyi (np, tp, 2);
1667 free (tp);
1668 return qh;
1669 }
1670
1671 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1672 paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d
1673 since d has the high bit set. */
1674
1675 mp_limb_t
refmpn_invert_limb(mp_limb_t d)1676 refmpn_invert_limb (mp_limb_t d)
1677 {
1678 mp_limb_t r;
1679 ASSERT (d & GMP_LIMB_HIGHBIT);
1680 return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
1681 }
1682
1683 void
refmpn_invert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)1684 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1685 {
1686 mp_ptr qp, tp;
1687 TMP_DECL;
1688 TMP_MARK;
1689
1690 tp = TMP_ALLOC_LIMBS (2 * n);
1691 qp = TMP_ALLOC_LIMBS (n + 1);
1692
1693 MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1);
1694
1695 refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
1696 refmpn_copyi (rp, qp, n);
1697
1698 TMP_FREE;
1699 }
1700
1701 void
refmpn_binvert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)1702 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1703 {
1704 mp_ptr tp;
1705 mp_limb_t binv;
1706 TMP_DECL;
1707 TMP_MARK;
1708
1709 /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
1710 code. To make up for it, we check that the inverse is correct using a
1711 multiply. */
1712
1713 tp = TMP_ALLOC_LIMBS (2 * n);
1714
1715 MPN_ZERO (tp, n);
1716 tp[0] = 1;
1717 binvert_limb (binv, up[0]);
1718 mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
1719
1720 refmpn_mul_n (tp, rp, up, n);
1721 ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
1722
1723 TMP_FREE;
1724 }
1725
1726 /* The aim is to produce a dst quotient and return a remainder c, satisfying
1727 c*b^n + src-i == 3*dst, where i is the incoming carry.
1728
1729 Some value c==0, c==1 or c==2 will satisfy, so just try each.
1730
1731 If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1732 remainder from the first division attempt determines the correct
1733 remainder (3-c), but don't bother with that, since we can't guarantee
1734 anything about GMP_NUMB_BITS when using nails.
1735
1736 If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1737 complement negative, ie. b^n+a-i, and the calculation produces c1
1738 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This
1739 means it's enough to just add any borrow back at the end.
1740
1741 A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1742 mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1743 or 1 respectively, so with 1 added the final return value is still in the
1744 prescribed range 0 to 2. */
1745
1746 mp_limb_t
refmpn_divexact_by3c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t carry)1747 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1748 {
1749 mp_ptr spcopy;
1750 mp_limb_t c, cs;
1751
1752 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1753 ASSERT (size >= 1);
1754 ASSERT (carry <= 2);
1755 ASSERT_MPN (sp, size);
1756
1757 spcopy = refmpn_malloc_limbs (size);
1758 cs = refmpn_sub_1 (spcopy, sp, size, carry);
1759
1760 for (c = 0; c <= 2; c++)
1761 if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
1762 goto done;
1763 ASSERT_FAIL (no value of c satisfies);
1764
1765 done:
1766 c += cs;
1767 ASSERT (c <= 2);
1768
1769 free (spcopy);
1770 return c;
1771 }
1772
1773 mp_limb_t
refmpn_divexact_by3(mp_ptr rp,mp_srcptr sp,mp_size_t size)1774 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1775 {
1776 return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1777 }
1778
1779
1780 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1781 void
refmpn_mul_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)1782 refmpn_mul_basecase (mp_ptr prodp,
1783 mp_srcptr up, mp_size_t usize,
1784 mp_srcptr vp, mp_size_t vsize)
1785 {
1786 mp_size_t i;
1787
1788 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1789 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1790 ASSERT (usize >= vsize);
1791 ASSERT (vsize >= 1);
1792 ASSERT_MPN (up, usize);
1793 ASSERT_MPN (vp, vsize);
1794
1795 prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1796 for (i = 1; i < vsize; i++)
1797 prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1798 }
1799
1800
1801 /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
1802 void
refmpn_mulmid_basecase(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1803 refmpn_mulmid_basecase (mp_ptr rp,
1804 mp_srcptr up, mp_size_t un,
1805 mp_srcptr vp, mp_size_t vn)
1806 {
1807 mp_limb_t cy;
1808 mp_size_t i;
1809
1810 ASSERT (un >= vn);
1811 ASSERT (vn >= 1);
1812 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
1813 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
1814 ASSERT_MPN (up, un);
1815 ASSERT_MPN (vp, vn);
1816
1817 rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
1818 rp[un - vn + 2] = CNST_LIMB (0);
1819 for (i = 1; i < vn; i++)
1820 {
1821 cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
1822 cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
1823 cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
1824 ASSERT (cy == 0);
1825 }
1826 }
1827
1828 void
refmpn_toom42_mulmid(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_ptr scratch)1829 refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
1830 mp_ptr scratch)
1831 {
1832 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1833 }
1834
1835 void
refmpn_mulmid_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1836 refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1837 {
1838 /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1839 off products near the middle product region boundary */
1840 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1841 }
1842
1843 void
refmpn_mulmid(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1844 refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
1845 mp_srcptr vp, mp_size_t vn)
1846 {
1847 /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1848 off products near the middle product region boundary */
1849 refmpn_mulmid_basecase (rp, up, un, vp, vn);
1850 }
1851
1852
1853
1854 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
1855 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
1856 #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
1857 #if WANT_FFT
1858 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
1859 #else
1860 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
1861 #endif
1862
1863 void
refmpn_mul(mp_ptr wp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1864 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
1865 {
1866 mp_ptr tp, rp;
1867 mp_size_t tn;
1868
1869 if (vn < TOOM3_THRESHOLD)
1870 {
1871 /* In the mpn_mul_basecase and toom2 ranges, use our own mul_basecase. */
1872 if (vn != 0)
1873 refmpn_mul_basecase (wp, up, un, vp, vn);
1874 else
1875 MPN_ZERO (wp, un);
1876 return;
1877 }
1878
1879 MPN_ZERO (wp, vn);
1880 rp = refmpn_malloc_limbs (2 * vn);
1881
1882 if (vn < TOOM4_THRESHOLD)
1883 tn = mpn_toom22_mul_itch (vn, vn);
1884 else if (vn < TOOM6_THRESHOLD)
1885 tn = mpn_toom33_mul_itch (vn, vn);
1886 else if (vn < FFT_THRESHOLD)
1887 tn = mpn_toom44_mul_itch (vn, vn);
1888 else
1889 tn = mpn_toom6h_mul_itch (vn, vn);
1890 tp = refmpn_malloc_limbs (tn);
1891
1892 while (un >= vn)
1893 {
1894 if (vn < TOOM4_THRESHOLD)
1895 /* In the toom3 range, use mpn_toom22_mul. */
1896 mpn_toom22_mul (rp, up, vn, vp, vn, tp);
1897 else if (vn < TOOM6_THRESHOLD)
1898 /* In the toom4 range, use mpn_toom33_mul. */
1899 mpn_toom33_mul (rp, up, vn, vp, vn, tp);
1900 else if (vn < FFT_THRESHOLD)
1901 /* In the toom6 range, use mpn_toom44_mul. */
1902 mpn_toom44_mul (rp, up, vn, vp, vn, tp);
1903 else
1904 /* For the largest operands, use mpn_toom6h_mul. */
1905 mpn_toom6h_mul (rp, up, vn, vp, vn, tp);
1906
1907 ASSERT_NOCARRY (refmpn_add (wp, rp, 2 * vn, wp, vn));
1908 wp += vn;
1909
1910 up += vn;
1911 un -= vn;
1912 }
1913
1914 free (tp);
1915
1916 if (un != 0)
1917 {
1918 refmpn_mul (rp, vp, vn, up, un);
1919 ASSERT_NOCARRY (refmpn_add (wp, rp, un + vn, wp, vn));
1920 }
1921 free (rp);
1922 }
1923
1924 void
refmpn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)1925 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1926 {
1927 refmpn_mul (prodp, up, size, vp, size);
1928 }
1929
1930 void
refmpn_mullo_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)1931 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1932 {
1933 mp_ptr tp = refmpn_malloc_limbs (2*size);
1934 refmpn_mul (tp, up, size, vp, size);
1935 refmpn_copyi (prodp, tp, size);
1936 free (tp);
1937 }
1938
1939 void
refmpn_sqr(mp_ptr dst,mp_srcptr src,mp_size_t size)1940 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1941 {
1942 refmpn_mul (dst, src, size, src, size);
1943 }
1944
1945 void
refmpn_sqrlo(mp_ptr dst,mp_srcptr src,mp_size_t size)1946 refmpn_sqrlo (mp_ptr dst, mp_srcptr src, mp_size_t size)
1947 {
1948 refmpn_mullo_n (dst, src, src, size);
1949 }
1950
1951 /* Allowing usize<vsize, usize==0 or vsize==0. */
1952 void
refmpn_mul_any(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)1953 refmpn_mul_any (mp_ptr prodp,
1954 mp_srcptr up, mp_size_t usize,
1955 mp_srcptr vp, mp_size_t vsize)
1956 {
1957 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1958 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1959 ASSERT (usize >= 0);
1960 ASSERT (vsize >= 0);
1961 ASSERT_MPN (up, usize);
1962 ASSERT_MPN (vp, vsize);
1963
1964 if (usize == 0)
1965 {
1966 refmpn_fill (prodp, vsize, CNST_LIMB(0));
1967 return;
1968 }
1969
1970 if (vsize == 0)
1971 {
1972 refmpn_fill (prodp, usize, CNST_LIMB(0));
1973 return;
1974 }
1975
1976 if (usize >= vsize)
1977 refmpn_mul (prodp, up, usize, vp, vsize);
1978 else
1979 refmpn_mul (prodp, vp, vsize, up, usize);
1980 }
1981
1982
1983 mp_limb_t
refmpn_gcd_1(mp_srcptr xp,mp_size_t xsize,mp_limb_t y)1984 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
1985 {
1986 mp_limb_t x;
1987 int twos;
1988
1989 ASSERT (y != 0);
1990 ASSERT (! refmpn_zero_p (xp, xsize));
1991 ASSERT_MPN (xp, xsize);
1992 ASSERT_LIMB (y);
1993
1994 x = refmpn_mod_1 (xp, xsize, y);
1995 if (x == 0)
1996 return y;
1997
1998 twos = 0;
1999 while ((x & 1) == 0 && (y & 1) == 0)
2000 {
2001 x >>= 1;
2002 y >>= 1;
2003 twos++;
2004 }
2005
2006 for (;;)
2007 {
2008 while ((x & 1) == 0) x >>= 1;
2009 while ((y & 1) == 0) y >>= 1;
2010
2011 if (x < y)
2012 MP_LIMB_T_SWAP (x, y);
2013
2014 x -= y;
2015 if (x == 0)
2016 break;
2017 }
2018
2019 return y << twos;
2020 }
2021
2022
2023 /* Based on the full limb x, not nails. */
2024 unsigned
refmpn_count_leading_zeros(mp_limb_t x)2025 refmpn_count_leading_zeros (mp_limb_t x)
2026 {
2027 unsigned n = 0;
2028
2029 ASSERT (x != 0);
2030
2031 while ((x & GMP_LIMB_HIGHBIT) == 0)
2032 {
2033 x <<= 1;
2034 n++;
2035 }
2036 return n;
2037 }
2038
2039 /* Full limbs allowed, not limited to nails. */
2040 unsigned
refmpn_count_trailing_zeros(mp_limb_t x)2041 refmpn_count_trailing_zeros (mp_limb_t x)
2042 {
2043 unsigned n = 0;
2044
2045 ASSERT (x != 0);
2046 ASSERT_LIMB (x);
2047
2048 while ((x & 1) == 0)
2049 {
2050 x >>= 1;
2051 n++;
2052 }
2053 return n;
2054 }
2055
2056 /* Strip factors of two (low zero bits) from {p,size} by right shifting.
2057 The return value is the number of twos stripped. */
2058 mp_size_t
refmpn_strip_twos(mp_ptr p,mp_size_t size)2059 refmpn_strip_twos (mp_ptr p, mp_size_t size)
2060 {
2061 mp_size_t limbs;
2062 unsigned shift;
2063
2064 ASSERT (size >= 1);
2065 ASSERT (! refmpn_zero_p (p, size));
2066 ASSERT_MPN (p, size);
2067
2068 for (limbs = 0; p[0] == 0; limbs++)
2069 {
2070 refmpn_copyi (p, p+1, size-1);
2071 p[size-1] = 0;
2072 }
2073
2074 shift = refmpn_count_trailing_zeros (p[0]);
2075 if (shift)
2076 refmpn_rshift (p, p, size, shift);
2077
2078 return limbs*GMP_NUMB_BITS + shift;
2079 }
2080
2081 mp_limb_t
refmpn_gcd(mp_ptr gp,mp_ptr xp,mp_size_t xsize,mp_ptr yp,mp_size_t ysize)2082 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
2083 {
2084 int cmp;
2085
2086 ASSERT (ysize >= 1);
2087 ASSERT (xsize >= ysize);
2088 ASSERT ((xp[0] & 1) != 0);
2089 ASSERT ((yp[0] & 1) != 0);
2090 /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */
2091 ASSERT (yp[ysize-1] != 0);
2092 ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
2093 ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
2094 ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
2095 if (xsize == ysize)
2096 ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
2097 ASSERT_MPN (xp, xsize);
2098 ASSERT_MPN (yp, ysize);
2099
2100 refmpn_strip_twos (xp, xsize);
2101 MPN_NORMALIZE (xp, xsize);
2102 MPN_NORMALIZE (yp, ysize);
2103
2104 for (;;)
2105 {
2106 cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
2107 if (cmp == 0)
2108 break;
2109 if (cmp < 0)
2110 MPN_PTR_SWAP (xp,xsize, yp,ysize);
2111
2112 ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
2113
2114 refmpn_strip_twos (xp, xsize);
2115 MPN_NORMALIZE (xp, xsize);
2116 }
2117
2118 refmpn_copyi (gp, xp, xsize);
2119 return xsize;
2120 }
2121
2122 unsigned long
ref_popc_limb(mp_limb_t src)2123 ref_popc_limb (mp_limb_t src)
2124 {
2125 unsigned long count;
2126 int i;
2127
2128 count = 0;
2129 for (i = 0; i < GMP_LIMB_BITS; i++)
2130 {
2131 count += (src & 1);
2132 src >>= 1;
2133 }
2134 return count;
2135 }
2136
2137 unsigned long
refmpn_popcount(mp_srcptr sp,mp_size_t size)2138 refmpn_popcount (mp_srcptr sp, mp_size_t size)
2139 {
2140 unsigned long count = 0;
2141 mp_size_t i;
2142
2143 ASSERT (size >= 0);
2144 ASSERT_MPN (sp, size);
2145
2146 for (i = 0; i < size; i++)
2147 count += ref_popc_limb (sp[i]);
2148 return count;
2149 }
2150
2151 unsigned long
refmpn_hamdist(mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)2152 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
2153 {
2154 mp_ptr d;
2155 unsigned long count;
2156
2157 ASSERT (size >= 0);
2158 ASSERT_MPN (s1p, size);
2159 ASSERT_MPN (s2p, size);
2160
2161 if (size == 0)
2162 return 0;
2163
2164 d = refmpn_malloc_limbs (size);
2165 refmpn_xor_n (d, s1p, s2p, size);
2166 count = refmpn_popcount (d, size);
2167 free (d);
2168 return count;
2169 }
2170
2171
2172 /* set r to a%d */
2173 void
refmpn_mod2(mp_limb_t r[2],const mp_limb_t a[2],const mp_limb_t d[2])2174 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
2175 {
2176 mp_limb_t D[2];
2177 int n;
2178
2179 ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
2180 ASSERT_MPN (a, 2);
2181 ASSERT_MPN (d, 2);
2182
2183 D[1] = d[1], D[0] = d[0];
2184 r[1] = a[1], r[0] = a[0];
2185 n = 0;
2186
2187 for (;;)
2188 {
2189 if (D[1] & GMP_NUMB_HIGHBIT)
2190 break;
2191 if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
2192 break;
2193 refmpn_lshift (D, D, (mp_size_t) 2, 1);
2194 n++;
2195 ASSERT (n <= GMP_NUMB_BITS);
2196 }
2197
2198 while (n >= 0)
2199 {
2200 if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
2201 ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
2202 refmpn_rshift (D, D, (mp_size_t) 2, 1);
2203 n--;
2204 }
2205
2206 ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
2207 }
2208
2209
2210
2211 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2212 particular the trial quotient is allowed to be 2 too big. */
2213 mp_limb_t
refmpn_sb_div_qr(mp_ptr qp,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)2214 refmpn_sb_div_qr (mp_ptr qp,
2215 mp_ptr np, mp_size_t nsize,
2216 mp_srcptr dp, mp_size_t dsize)
2217 {
2218 mp_limb_t retval = 0;
2219 mp_size_t i;
2220 mp_limb_t d1 = dp[dsize-1];
2221 mp_ptr np_orig = refmpn_memdup_limbs (np, nsize);
2222
2223 ASSERT (nsize >= dsize);
2224 /* ASSERT (dsize > 2); */
2225 ASSERT (dsize >= 2);
2226 ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
2227 ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
2228 ASSERT_MPN (np, nsize);
2229 ASSERT_MPN (dp, dsize);
2230
2231 i = nsize-dsize;
2232 if (refmpn_cmp (np+i, dp, dsize) >= 0)
2233 {
2234 ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
2235 retval = 1;
2236 }
2237
2238 for (i--; i >= 0; i--)
2239 {
2240 mp_limb_t n0 = np[i+dsize];
2241 mp_limb_t n1 = np[i+dsize-1];
2242 mp_limb_t q, dummy_r;
2243
2244 ASSERT (n0 <= d1);
2245 if (n0 == d1)
2246 q = GMP_NUMB_MAX;
2247 else
2248 q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
2249 d1 << GMP_NAIL_BITS);
2250
2251 n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
2252 ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
2253 if (n0)
2254 {
2255 q--;
2256 if (! refmpn_add_n (np+i, np+i, dp, dsize))
2257 {
2258 q--;
2259 ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
2260 }
2261 }
2262 np[i+dsize] = 0;
2263
2264 qp[i] = q;
2265 }
2266
2267 /* remainder < divisor */
2268 #if 0 /* ASSERT triggers gcc 4.2.1 bug */
2269 ASSERT (refmpn_cmp (np, dp, dsize) < 0);
2270 #endif
2271
2272 /* multiply back to original */
2273 {
2274 mp_ptr mp = refmpn_malloc_limbs (nsize);
2275
2276 refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
2277 if (retval)
2278 ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
2279 ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
2280 ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
2281
2282 free (mp);
2283 }
2284
2285 free (np_orig);
2286 return retval;
2287 }
2288
2289 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2290 particular the trial quotient is allowed to be 2 too big. */
2291 void
refmpn_tdiv_qr(mp_ptr qp,mp_ptr rp,mp_size_t qxn,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)2292 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
2293 mp_ptr np, mp_size_t nsize,
2294 mp_srcptr dp, mp_size_t dsize)
2295 {
2296 ASSERT (qxn == 0);
2297 ASSERT_MPN (np, nsize);
2298 ASSERT_MPN (dp, dsize);
2299 ASSERT (dsize > 0);
2300 ASSERT (dp[dsize-1] != 0);
2301
2302 if (dsize == 1)
2303 {
2304 rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
2305 return;
2306 }
2307 else
2308 {
2309 mp_ptr n2p = refmpn_malloc_limbs (nsize+1);
2310 mp_ptr d2p = refmpn_malloc_limbs (dsize);
2311 int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
2312
2313 n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
2314 ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
2315
2316 refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
2317 refmpn_rshift_or_copy (rp, n2p, dsize, norm);
2318
2319 /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
2320 free (n2p);
2321 free (d2p);
2322 }
2323 }
2324
2325 mp_limb_t
refmpn_redc_1(mp_ptr rp,mp_ptr up,mp_srcptr mp,mp_size_t n,mp_limb_t invm)2326 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
2327 {
2328 mp_size_t j;
2329 mp_limb_t cy;
2330
2331 ASSERT_MPN (up, 2*n);
2332 /* ASSERT about directed overlap rp, up */
2333 /* ASSERT about overlap rp, mp */
2334 /* ASSERT about overlap up, mp */
2335
2336 for (j = n - 1; j >= 0; j--)
2337 {
2338 up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
2339 up++;
2340 }
2341 cy = mpn_add_n (rp, up, up - n, n);
2342 return cy;
2343 }
2344
2345 size_t
refmpn_get_str(unsigned char * dst,int base,mp_ptr src,mp_size_t size)2346 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
2347 {
2348 unsigned char *d;
2349 size_t dsize;
2350
2351 ASSERT (size >= 0);
2352 ASSERT (base >= 2);
2353 ASSERT (base < numberof (mp_bases));
2354 ASSERT (size == 0 || src[size-1] != 0);
2355 ASSERT_MPN (src, size);
2356
2357 MPN_SIZEINBASE (dsize, src, size, base);
2358 ASSERT (dsize >= 1);
2359 ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES));
2360
2361 if (size == 0)
2362 {
2363 dst[0] = 0;
2364 return 1;
2365 }
2366
2367 /* don't clobber input for power of 2 bases */
2368 if (POW2_P (base))
2369 src = refmpn_memdup_limbs (src, size);
2370
2371 d = dst + dsize;
2372 do
2373 {
2374 d--;
2375 ASSERT (d >= dst);
2376 *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
2377 size -= (src[size-1] == 0);
2378 }
2379 while (size != 0);
2380
2381 /* Move result back and decrement dsize if we didn't generate
2382 the maximum possible digits. */
2383 if (d != dst)
2384 {
2385 size_t i;
2386 dsize -= d - dst;
2387 for (i = 0; i < dsize; i++)
2388 dst[i] = d[i];
2389 }
2390
2391 if (POW2_P (base))
2392 free (src);
2393
2394 return dsize;
2395 }
2396
2397
2398 mp_limb_t
ref_bswap_limb(mp_limb_t src)2399 ref_bswap_limb (mp_limb_t src)
2400 {
2401 mp_limb_t dst;
2402 int i;
2403
2404 dst = 0;
2405 for (i = 0; i < GMP_LIMB_BYTES; i++)
2406 {
2407 dst = (dst << 8) + (src & 0xFF);
2408 src >>= 8;
2409 }
2410 return dst;
2411 }
2412
2413
2414 /* These random functions are mostly for transitional purposes while adding
2415 nail support, since they're independent of the normal mpn routines. They
2416 can probably be removed when those normal routines are reliable, though
2417 perhaps something independent would still be useful at times. */
2418
2419 #if GMP_LIMB_BITS == 32
2420 #define RAND_A CNST_LIMB(0x29CF535)
2421 #endif
2422 #if GMP_LIMB_BITS == 64
2423 #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D)
2424 #endif
2425
2426 mp_limb_t refmpn_random_seed;
2427
2428 mp_limb_t
refmpn_random_half(void)2429 refmpn_random_half (void)
2430 {
2431 refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
2432 return (refmpn_random_seed >> GMP_LIMB_BITS/2);
2433 }
2434
2435 mp_limb_t
refmpn_random_limb(void)2436 refmpn_random_limb (void)
2437 {
2438 return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
2439 | refmpn_random_half ()) & GMP_NUMB_MASK;
2440 }
2441
2442 void
refmpn_random(mp_ptr ptr,mp_size_t size)2443 refmpn_random (mp_ptr ptr, mp_size_t size)
2444 {
2445 mp_size_t i;
2446 if (GMP_NAIL_BITS == 0)
2447 {
2448 mpn_random (ptr, size);
2449 return;
2450 }
2451
2452 for (i = 0; i < size; i++)
2453 ptr[i] = refmpn_random_limb ();
2454 }
2455
2456 void
refmpn_random2(mp_ptr ptr,mp_size_t size)2457 refmpn_random2 (mp_ptr ptr, mp_size_t size)
2458 {
2459 mp_size_t i;
2460 mp_limb_t bit, mask, limb;
2461 int run;
2462
2463 if (GMP_NAIL_BITS == 0)
2464 {
2465 mpn_random2 (ptr, size);
2466 return;
2467 }
2468
2469 #define RUN_MODULUS 32
2470
2471 /* start with ones at a random pos in the high limb */
2472 bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
2473 mask = 0;
2474 run = 0;
2475
2476 for (i = size-1; i >= 0; i--)
2477 {
2478 limb = 0;
2479 do
2480 {
2481 if (run == 0)
2482 {
2483 run = (refmpn_random_half () % RUN_MODULUS) + 1;
2484 mask = ~mask;
2485 }
2486
2487 limb |= (bit & mask);
2488 bit >>= 1;
2489 run--;
2490 }
2491 while (bit != 0);
2492
2493 ptr[i] = limb;
2494 bit = GMP_NUMB_HIGHBIT;
2495 }
2496 }
2497
2498 /* This is a simple bitwise algorithm working high to low across "s" and
2499 testing each time whether setting the bit would make s^2 exceed n. */
2500 mp_size_t
refmpn_sqrtrem(mp_ptr sp,mp_ptr rp,mp_srcptr np,mp_size_t nsize)2501 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
2502 {
2503 mp_ptr tp, dp;
2504 mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs;
2505 unsigned ibit;
2506 long i;
2507 mp_limb_t c;
2508
2509 ASSERT (nsize >= 0);
2510
2511 /* If n==0, then s=0 and r=0. */
2512 if (nsize == 0)
2513 return 0;
2514
2515 ASSERT (np[nsize - 1] != 0);
2516 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
2517 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
2518 ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
2519
2520 /* root */
2521 ssize = (nsize+1)/2;
2522 refmpn_zero (sp, ssize);
2523
2524 /* the remainder so far */
2525 dp = refmpn_memdup_limbs (np, nsize);
2526 dsize = nsize;
2527
2528 /* temporary */
2529 talloc = 2*ssize + 1;
2530 tp = refmpn_malloc_limbs (talloc);
2531
2532 for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
2533 {
2534 /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
2535 is added to it */
2536
2537 ilimbs = (i+1) / GMP_NUMB_BITS;
2538 ibit = (i+1) % GMP_NUMB_BITS;
2539 refmpn_zero (tp, ilimbs);
2540 c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
2541 tsize = ilimbs + ssize;
2542 tp[tsize] = c;
2543 tsize += (c != 0);
2544
2545 ilimbs = (2*i) / GMP_NUMB_BITS;
2546 ibit = (2*i) % GMP_NUMB_BITS;
2547 if (ilimbs + 1 > tsize)
2548 {
2549 refmpn_zero_extend (tp, tsize, ilimbs + 1);
2550 tsize = ilimbs + 1;
2551 }
2552 c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
2553 CNST_LIMB(1) << ibit);
2554 ASSERT (tsize < talloc);
2555 tp[tsize] = c;
2556 tsize += (c != 0);
2557
2558 if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
2559 {
2560 /* set this bit in s and subtract from the remainder */
2561 refmpn_setbit (sp, i);
2562
2563 ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
2564 dsize = refmpn_normalize (dp, dsize);
2565 }
2566 }
2567
2568 if (rp == NULL)
2569 {
2570 ret = ! refmpn_zero_p (dp, dsize);
2571 }
2572 else
2573 {
2574 ASSERT (dsize == 0 || dp[dsize-1] != 0);
2575 refmpn_copy (rp, dp, dsize);
2576 ret = dsize;
2577 }
2578
2579 free (dp);
2580 free (tp);
2581 return ret;
2582 }
2583