1 /* Reference mpn functions, designed to be simple, portable and independent
2 of the normal gmp code. Speed isn't a consideration.
3
4 Copyright 1996-2009, 2011-2013 Free Software Foundation, Inc.
5
6 This file is part of the GNU MP Library test suite.
7
8 The GNU MP Library test suite is free software; you can redistribute it
9 and/or modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 3 of the License,
11 or (at your option) any later version.
12
13 The GNU MP Library test suite is distributed in the hope that it will be
14 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
16 Public License for more details.
17
18 You should have received a copy of the GNU General Public License along with
19 the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
20
21
22 /* Most routines have assertions representing what the mpn routines are
23 supposed to accept. Many of these reference routines do sensible things
24 outside these ranges (eg. for size==0), but the assertions are present to
25 pick up bad parameters passed here that are about to be passed the same
26 to a real mpn routine being compared. */
27
28 /* always do assertion checking */
29 #define WANT_ASSERT 1
30
31 #include <stdio.h> /* for NULL */
32 #include <stdlib.h> /* for malloc */
33
34 #include "gmp.h"
35 #include "gmp-impl.h"
36 #include "longlong.h"
37
38 #include "tests.h"
39
40
41
42 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
43 in bytes. */
44 int
byte_overlap_p(const void * v_xp,mp_size_t xsize,const void * v_yp,mp_size_t ysize)45 byte_overlap_p (const void *v_xp, mp_size_t xsize,
46 const void *v_yp, mp_size_t ysize)
47 {
48 const char *xp = (const char *) v_xp;
49 const char *yp = (const char *) v_yp;
50
51 ASSERT (xsize >= 0);
52 ASSERT (ysize >= 0);
53
54 /* no wraparounds */
55 ASSERT (xp+xsize >= xp);
56 ASSERT (yp+ysize >= yp);
57
58 if (xp + xsize <= yp)
59 return 0;
60
61 if (yp + ysize <= xp)
62 return 0;
63
64 return 1;
65 }
66
67 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
68 int
refmpn_overlap_p(mp_srcptr xp,mp_size_t xsize,mp_srcptr yp,mp_size_t ysize)69 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
70 {
71 return byte_overlap_p (xp, xsize * GMP_LIMB_BYTES,
72 yp, ysize * GMP_LIMB_BYTES);
73 }
74
75 /* Check overlap for a routine defined to work low to high. */
76 int
refmpn_overlap_low_to_high_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)77 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
78 {
79 return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
80 }
81
82 /* Check overlap for a routine defined to work high to low. */
83 int
refmpn_overlap_high_to_low_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)84 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
85 {
86 return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
87 }
88
89 /* Check overlap for a standard routine requiring equal or separate. */
90 int
refmpn_overlap_fullonly_p(mp_srcptr dst,mp_srcptr src,mp_size_t size)91 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
92 {
93 return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
94 }
95 int
refmpn_overlap_fullonly_two_p(mp_srcptr dst,mp_srcptr src1,mp_srcptr src2,mp_size_t size)96 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
97 mp_size_t size)
98 {
99 return (refmpn_overlap_fullonly_p (dst, src1, size)
100 && refmpn_overlap_fullonly_p (dst, src2, size));
101 }
102
103
104 mp_ptr
refmpn_malloc_limbs(mp_size_t size)105 refmpn_malloc_limbs (mp_size_t size)
106 {
107 mp_ptr p;
108 ASSERT (size >= 0);
109 if (size == 0)
110 size = 1;
111 p = (mp_ptr) malloc ((size_t) (size * GMP_LIMB_BYTES));
112 ASSERT (p != NULL);
113 return p;
114 }
115
116 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
117 * memory allocated by refmpn_malloc_limbs_aligned. */
118 void
refmpn_free_limbs(mp_ptr p)119 refmpn_free_limbs (mp_ptr p)
120 {
121 free (p);
122 }
123
124 mp_ptr
refmpn_memdup_limbs(mp_srcptr ptr,mp_size_t size)125 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
126 {
127 mp_ptr p;
128 p = refmpn_malloc_limbs (size);
129 refmpn_copyi (p, ptr, size);
130 return p;
131 }
132
133 /* malloc n limbs on a multiple of m bytes boundary */
134 mp_ptr
refmpn_malloc_limbs_aligned(mp_size_t n,size_t m)135 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
136 {
137 return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
138 }
139
140
141 void
refmpn_fill(mp_ptr ptr,mp_size_t size,mp_limb_t value)142 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
143 {
144 mp_size_t i;
145 ASSERT (size >= 0);
146 for (i = 0; i < size; i++)
147 ptr[i] = value;
148 }
149
150 void
refmpn_zero(mp_ptr ptr,mp_size_t size)151 refmpn_zero (mp_ptr ptr, mp_size_t size)
152 {
153 refmpn_fill (ptr, size, CNST_LIMB(0));
154 }
155
156 void
refmpn_zero_extend(mp_ptr ptr,mp_size_t oldsize,mp_size_t newsize)157 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
158 {
159 ASSERT (newsize >= oldsize);
160 refmpn_zero (ptr+oldsize, newsize-oldsize);
161 }
162
163 int
refmpn_zero_p(mp_srcptr ptr,mp_size_t size)164 refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
165 {
166 mp_size_t i;
167 for (i = 0; i < size; i++)
168 if (ptr[i] != 0)
169 return 0;
170 return 1;
171 }
172
173 mp_size_t
refmpn_normalize(mp_srcptr ptr,mp_size_t size)174 refmpn_normalize (mp_srcptr ptr, mp_size_t size)
175 {
176 ASSERT (size >= 0);
177 while (size > 0 && ptr[size-1] == 0)
178 size--;
179 return size;
180 }
181
182 /* the highest one bit in x */
183 mp_limb_t
refmpn_msbone(mp_limb_t x)184 refmpn_msbone (mp_limb_t x)
185 {
186 mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
187
188 while (n != 0)
189 {
190 if (x & n)
191 break;
192 n >>= 1;
193 }
194 return n;
195 }
196
197 /* a mask of the highest one bit plus and all bits below */
198 mp_limb_t
refmpn_msbone_mask(mp_limb_t x)199 refmpn_msbone_mask (mp_limb_t x)
200 {
201 if (x == 0)
202 return 0;
203
204 return (refmpn_msbone (x) << 1) - 1;
205 }
206
207 /* How many digits in the given base will fit in a limb.
208 Notice that the product b is allowed to be equal to the limit
209 2^GMP_NUMB_BITS, this ensures the result for base==2 will be
210 GMP_NUMB_BITS (and similarly other powers of 2). */
211 int
refmpn_chars_per_limb(int base)212 refmpn_chars_per_limb (int base)
213 {
214 mp_limb_t limit[2], b[2];
215 int chars_per_limb;
216
217 ASSERT (base >= 2);
218
219 limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */
220 limit[1] = 1;
221 b[0] = 1; /* b = 1 */
222 b[1] = 0;
223
224 chars_per_limb = 0;
225 for (;;)
226 {
227 if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
228 break;
229 if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
230 break;
231 chars_per_limb++;
232 }
233 return chars_per_limb;
234 }
235
236 /* The biggest value base**n which fits in GMP_NUMB_BITS. */
237 mp_limb_t
refmpn_big_base(int base)238 refmpn_big_base (int base)
239 {
240 int chars_per_limb = refmpn_chars_per_limb (base);
241 int i;
242 mp_limb_t bb;
243
244 ASSERT (base >= 2);
245 bb = 1;
246 for (i = 0; i < chars_per_limb; i++)
247 bb *= base;
248 return bb;
249 }
250
251
252 void
refmpn_setbit(mp_ptr ptr,unsigned long bit)253 refmpn_setbit (mp_ptr ptr, unsigned long bit)
254 {
255 ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
256 }
257
258 void
refmpn_clrbit(mp_ptr ptr,unsigned long bit)259 refmpn_clrbit (mp_ptr ptr, unsigned long bit)
260 {
261 ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
262 }
263
264 #define REFMPN_TSTBIT(ptr,bit) \
265 (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
266
267 int
refmpn_tstbit(mp_srcptr ptr,unsigned long bit)268 refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
269 {
270 return REFMPN_TSTBIT (ptr, bit);
271 }
272
273 unsigned long
refmpn_scan0(mp_srcptr ptr,unsigned long bit)274 refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
275 {
276 while (REFMPN_TSTBIT (ptr, bit) != 0)
277 bit++;
278 return bit;
279 }
280
281 unsigned long
refmpn_scan1(mp_srcptr ptr,unsigned long bit)282 refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
283 {
284 while (REFMPN_TSTBIT (ptr, bit) == 0)
285 bit++;
286 return bit;
287 }
288
289 void
refmpn_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size)290 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
291 {
292 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
293 refmpn_copyi (rp, sp, size);
294 }
295
296 void
refmpn_copyi(mp_ptr rp,mp_srcptr sp,mp_size_t size)297 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
298 {
299 mp_size_t i;
300
301 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
302 ASSERT (size >= 0);
303
304 for (i = 0; i < size; i++)
305 rp[i] = sp[i];
306 }
307
308 void
refmpn_copyd(mp_ptr rp,mp_srcptr sp,mp_size_t size)309 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
310 {
311 mp_size_t i;
312
313 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
314 ASSERT (size >= 0);
315
316 for (i = size-1; i >= 0; i--)
317 rp[i] = sp[i];
318 }
319
320 /* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low
321 zeros to wsize. If x is longer, then copy just the high wsize limbs. */
322 void
refmpn_copy_extend(mp_ptr wp,mp_size_t wsize,mp_srcptr xp,mp_size_t xsize)323 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
324 {
325 ASSERT (wsize >= 0);
326 ASSERT (xsize >= 0);
327
328 /* high part of x if x bigger than w */
329 if (xsize > wsize)
330 {
331 xp += xsize - wsize;
332 xsize = wsize;
333 }
334
335 refmpn_copy (wp + wsize-xsize, xp, xsize);
336 refmpn_zero (wp, wsize-xsize);
337 }
338
339 int
refmpn_cmp(mp_srcptr xp,mp_srcptr yp,mp_size_t size)340 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
341 {
342 mp_size_t i;
343
344 ASSERT (size >= 1);
345 ASSERT_MPN (xp, size);
346 ASSERT_MPN (yp, size);
347
348 for (i = size-1; i >= 0; i--)
349 {
350 if (xp[i] > yp[i]) return 1;
351 if (xp[i] < yp[i]) return -1;
352 }
353 return 0;
354 }
355
356 int
refmpn_cmp_allowzero(mp_srcptr xp,mp_srcptr yp,mp_size_t size)357 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
358 {
359 if (size == 0)
360 return 0;
361 else
362 return refmpn_cmp (xp, yp, size);
363 }
364
365 int
refmpn_cmp_twosizes(mp_srcptr xp,mp_size_t xsize,mp_srcptr yp,mp_size_t ysize)366 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
367 mp_srcptr yp, mp_size_t ysize)
368 {
369 int opp, cmp;
370
371 ASSERT_MPN (xp, xsize);
372 ASSERT_MPN (yp, ysize);
373
374 opp = (xsize < ysize);
375 if (opp)
376 MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
377
378 if (! refmpn_zero_p (xp+ysize, xsize-ysize))
379 cmp = 1;
380 else
381 cmp = refmpn_cmp (xp, yp, ysize);
382
383 return (opp ? -cmp : cmp);
384 }
385
386 int
refmpn_equal_anynail(mp_srcptr xp,mp_srcptr yp,mp_size_t size)387 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
388 {
389 mp_size_t i;
390 ASSERT (size >= 0);
391
392 for (i = 0; i < size; i++)
393 if (xp[i] != yp[i])
394 return 0;
395 return 1;
396 }
397
398
399 #define LOGOPS(operation) \
400 { \
401 mp_size_t i; \
402 \
403 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
404 ASSERT (size >= 1); \
405 ASSERT_MPN (s1p, size); \
406 ASSERT_MPN (s2p, size); \
407 \
408 for (i = 0; i < size; i++) \
409 rp[i] = operation; \
410 }
411
412 void
refmpn_and_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)413 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
414 {
415 LOGOPS (s1p[i] & s2p[i]);
416 }
417 void
refmpn_andn_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)418 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
419 {
420 LOGOPS (s1p[i] & ~s2p[i]);
421 }
422 void
refmpn_nand_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)423 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
424 {
425 LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
426 }
427 void
refmpn_ior_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)428 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
429 {
430 LOGOPS (s1p[i] | s2p[i]);
431 }
432 void
refmpn_iorn_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)433 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
434 {
435 LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
436 }
437 void
refmpn_nior_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)438 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
439 {
440 LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
441 }
442 void
refmpn_xor_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)443 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
444 {
445 LOGOPS (s1p[i] ^ s2p[i]);
446 }
447 void
refmpn_xnor_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)448 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
449 {
450 LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
451 }
452
453
454 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */
455 void
refmpn_sub_ddmmss(mp_limb_t * dh,mp_limb_t * dl,mp_limb_t mh,mp_limb_t ml,mp_limb_t sh,mp_limb_t sl)456 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
457 mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
458 {
459 *dl = ml - sl;
460 *dh = mh - sh - (ml < sl);
461 }
462
463
464 /* set *w to x+y, return 0 or 1 carry */
465 mp_limb_t
ref_addc_limb(mp_limb_t * w,mp_limb_t x,mp_limb_t y)466 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
467 {
468 mp_limb_t sum, cy;
469
470 ASSERT_LIMB (x);
471 ASSERT_LIMB (y);
472
473 sum = x + y;
474 #if GMP_NAIL_BITS == 0
475 *w = sum;
476 cy = (sum < x);
477 #else
478 *w = sum & GMP_NUMB_MASK;
479 cy = (sum >> GMP_NUMB_BITS);
480 #endif
481 return cy;
482 }
483
484 /* set *w to x-y, return 0 or 1 borrow */
485 mp_limb_t
ref_subc_limb(mp_limb_t * w,mp_limb_t x,mp_limb_t y)486 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
487 {
488 mp_limb_t diff, cy;
489
490 ASSERT_LIMB (x);
491 ASSERT_LIMB (y);
492
493 diff = x - y;
494 #if GMP_NAIL_BITS == 0
495 *w = diff;
496 cy = (diff > x);
497 #else
498 *w = diff & GMP_NUMB_MASK;
499 cy = (diff >> GMP_NUMB_BITS) & 1;
500 #endif
501 return cy;
502 }
503
504 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
505 mp_limb_t
adc(mp_limb_t * w,mp_limb_t x,mp_limb_t y,mp_limb_t c)506 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
507 {
508 mp_limb_t r;
509
510 ASSERT_LIMB (x);
511 ASSERT_LIMB (y);
512 ASSERT (c == 0 || c == 1);
513
514 r = ref_addc_limb (w, x, y);
515 return r + ref_addc_limb (w, *w, c);
516 }
517
518 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
519 mp_limb_t
sbb(mp_limb_t * w,mp_limb_t x,mp_limb_t y,mp_limb_t c)520 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
521 {
522 mp_limb_t r;
523
524 ASSERT_LIMB (x);
525 ASSERT_LIMB (y);
526 ASSERT (c == 0 || c == 1);
527
528 r = ref_subc_limb (w, x, y);
529 return r + ref_subc_limb (w, *w, c);
530 }
531
532
533 #define AORS_1(operation) \
534 { \
535 mp_size_t i; \
536 \
537 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
538 ASSERT (size >= 1); \
539 ASSERT_MPN (sp, size); \
540 ASSERT_LIMB (n); \
541 \
542 for (i = 0; i < size; i++) \
543 n = operation (&rp[i], sp[i], n); \
544 return n; \
545 }
546
547 mp_limb_t
refmpn_add_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t n)548 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
549 {
550 AORS_1 (ref_addc_limb);
551 }
552 mp_limb_t
refmpn_sub_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t n)553 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
554 {
555 AORS_1 (ref_subc_limb);
556 }
557
558 #define AORS_NC(operation) \
559 { \
560 mp_size_t i; \
561 \
562 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
563 ASSERT (carry == 0 || carry == 1); \
564 ASSERT (size >= 1); \
565 ASSERT_MPN (s1p, size); \
566 ASSERT_MPN (s2p, size); \
567 \
568 for (i = 0; i < size; i++) \
569 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
570 return carry; \
571 }
572
573 mp_limb_t
refmpn_add_nc(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)574 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
575 mp_limb_t carry)
576 {
577 AORS_NC (adc);
578 }
579 mp_limb_t
refmpn_sub_nc(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)580 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
581 mp_limb_t carry)
582 {
583 AORS_NC (sbb);
584 }
585
586
587 mp_limb_t
refmpn_add_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)588 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
589 {
590 return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
591 }
592 mp_limb_t
refmpn_sub_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)593 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
594 {
595 return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
596 }
597
598 mp_limb_t
refmpn_cnd_add_n(mp_limb_t cnd,mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)599 refmpn_cnd_add_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
600 {
601 if (cnd != 0)
602 return refmpn_add_n (rp, s1p, s2p, size);
603 else
604 {
605 refmpn_copyi (rp, s1p, size);
606 return 0;
607 }
608 }
609 mp_limb_t
refmpn_cnd_sub_n(mp_limb_t cnd,mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)610 refmpn_cnd_sub_n (mp_limb_t cnd, mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
611 {
612 if (cnd != 0)
613 return refmpn_sub_n (rp, s1p, s2p, size);
614 else
615 {
616 refmpn_copyi (rp, s1p, size);
617 return 0;
618 }
619 }
620
621
622 #define AORS_ERR1_N(operation) \
623 { \
624 mp_size_t i; \
625 mp_limb_t carry2; \
626 \
627 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
628 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
629 ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \
630 ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \
631 ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \
632 ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \
633 ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \
634 \
635 ASSERT (carry == 0 || carry == 1); \
636 ASSERT (size >= 1); \
637 ASSERT_MPN (s1p, size); \
638 ASSERT_MPN (s2p, size); \
639 ASSERT_MPN (yp, size); \
640 \
641 ep[0] = ep[1] = CNST_LIMB(0); \
642 \
643 for (i = 0; i < size; i++) \
644 { \
645 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
646 if (carry == 1) \
647 { \
648 carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \
649 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
650 ASSERT (carry2 == 0); \
651 } \
652 } \
653 return carry; \
654 }
655
656 mp_limb_t
refmpn_add_err1_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr yp,mp_size_t size,mp_limb_t carry)657 refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
658 mp_ptr ep, mp_srcptr yp,
659 mp_size_t size, mp_limb_t carry)
660 {
661 AORS_ERR1_N (adc);
662 }
663 mp_limb_t
refmpn_sub_err1_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr yp,mp_size_t size,mp_limb_t carry)664 refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
665 mp_ptr ep, mp_srcptr yp,
666 mp_size_t size, mp_limb_t carry)
667 {
668 AORS_ERR1_N (sbb);
669 }
670
671
672 #define AORS_ERR2_N(operation) \
673 { \
674 mp_size_t i; \
675 mp_limb_t carry2; \
676 \
677 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
678 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
679 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \
680 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \
681 ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \
682 ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \
683 ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \
684 ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \
685 ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \
686 \
687 ASSERT (carry == 0 || carry == 1); \
688 ASSERT (size >= 1); \
689 ASSERT_MPN (s1p, size); \
690 ASSERT_MPN (s2p, size); \
691 ASSERT_MPN (y1p, size); \
692 ASSERT_MPN (y2p, size); \
693 \
694 ep[0] = ep[1] = CNST_LIMB(0); \
695 ep[2] = ep[3] = CNST_LIMB(0); \
696 \
697 for (i = 0; i < size; i++) \
698 { \
699 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
700 if (carry == 1) \
701 { \
702 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \
703 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
704 ASSERT (carry2 == 0); \
705 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \
706 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \
707 ASSERT (carry2 == 0); \
708 } \
709 } \
710 return carry; \
711 }
712
713 mp_limb_t
refmpn_add_err2_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_size_t size,mp_limb_t carry)714 refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
715 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
716 mp_size_t size, mp_limb_t carry)
717 {
718 AORS_ERR2_N (adc);
719 }
720 mp_limb_t
refmpn_sub_err2_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_size_t size,mp_limb_t carry)721 refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
722 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
723 mp_size_t size, mp_limb_t carry)
724 {
725 AORS_ERR2_N (sbb);
726 }
727
728
729 #define AORS_ERR3_N(operation) \
730 { \
731 mp_size_t i; \
732 mp_limb_t carry2; \
733 \
734 ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
735 ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
736 ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \
737 ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \
738 ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \
739 ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \
740 ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \
741 ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \
742 ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \
743 ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \
744 ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \
745 \
746 ASSERT (carry == 0 || carry == 1); \
747 ASSERT (size >= 1); \
748 ASSERT_MPN (s1p, size); \
749 ASSERT_MPN (s2p, size); \
750 ASSERT_MPN (y1p, size); \
751 ASSERT_MPN (y2p, size); \
752 ASSERT_MPN (y3p, size); \
753 \
754 ep[0] = ep[1] = CNST_LIMB(0); \
755 ep[2] = ep[3] = CNST_LIMB(0); \
756 ep[4] = ep[5] = CNST_LIMB(0); \
757 \
758 for (i = 0; i < size; i++) \
759 { \
760 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
761 if (carry == 1) \
762 { \
763 carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \
764 carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
765 ASSERT (carry2 == 0); \
766 carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \
767 carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \
768 ASSERT (carry2 == 0); \
769 carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \
770 carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \
771 ASSERT (carry2 == 0); \
772 } \
773 } \
774 return carry; \
775 }
776
777 mp_limb_t
refmpn_add_err3_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_srcptr y3p,mp_size_t size,mp_limb_t carry)778 refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
779 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
780 mp_size_t size, mp_limb_t carry)
781 {
782 AORS_ERR3_N (adc);
783 }
784 mp_limb_t
refmpn_sub_err3_n(mp_ptr rp,mp_srcptr s1p,mp_srcptr s2p,mp_ptr ep,mp_srcptr y1p,mp_srcptr y2p,mp_srcptr y3p,mp_size_t size,mp_limb_t carry)785 refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
786 mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
787 mp_size_t size, mp_limb_t carry)
788 {
789 AORS_ERR3_N (sbb);
790 }
791
792
793 mp_limb_t
refmpn_addlsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)794 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
795 mp_size_t n, unsigned int s)
796 {
797 mp_limb_t cy;
798 mp_ptr tp;
799
800 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
801 ASSERT (n >= 1);
802 ASSERT (0 < s && s < GMP_NUMB_BITS);
803 ASSERT_MPN (up, n);
804 ASSERT_MPN (vp, n);
805
806 tp = refmpn_malloc_limbs (n);
807 cy = refmpn_lshift (tp, vp, n, s);
808 cy += refmpn_add_n (rp, up, tp, n);
809 free (tp);
810 return cy;
811 }
812 mp_limb_t
refmpn_addlsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)813 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
814 {
815 return refmpn_addlsh_n (rp, up, vp, n, 1);
816 }
817 mp_limb_t
refmpn_addlsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)818 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
819 {
820 return refmpn_addlsh_n (rp, up, vp, n, 2);
821 }
822 mp_limb_t
refmpn_addlsh_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)823 refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
824 {
825 return refmpn_addlsh_n (rp, rp, vp, n, s);
826 }
827 mp_limb_t
refmpn_addlsh1_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)828 refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
829 {
830 return refmpn_addlsh_n (rp, rp, vp, n, 1);
831 }
832 mp_limb_t
refmpn_addlsh2_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)833 refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
834 {
835 return refmpn_addlsh_n (rp, rp, vp, n, 2);
836 }
837 mp_limb_t
refmpn_addlsh_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)838 refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
839 {
840 return refmpn_addlsh_n (rp, vp, rp, n, s);
841 }
842 mp_limb_t
refmpn_addlsh1_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)843 refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
844 {
845 return refmpn_addlsh_n (rp, vp, rp, n, 1);
846 }
847 mp_limb_t
refmpn_addlsh2_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)848 refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
849 {
850 return refmpn_addlsh_n (rp, vp, rp, n, 2);
851 }
852 mp_limb_t
refmpn_addlsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_t carry)853 refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
854 mp_size_t n, unsigned int s, mp_limb_t carry)
855 {
856 mp_limb_t cy;
857
858 ASSERT (carry <= (CNST_LIMB(1) << s));
859
860 cy = refmpn_addlsh_n (rp, up, vp, n, s);
861 cy += refmpn_add_1 (rp, rp, n, carry);
862 return cy;
863 }
864 mp_limb_t
refmpn_addlsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)865 refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
866 {
867 return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
868 }
869 mp_limb_t
refmpn_addlsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)870 refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
871 {
872 return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
873 }
874
875 mp_limb_t
refmpn_sublsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)876 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
877 mp_size_t n, unsigned int s)
878 {
879 mp_limb_t cy;
880 mp_ptr tp;
881
882 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
883 ASSERT (n >= 1);
884 ASSERT (0 < s && s < GMP_NUMB_BITS);
885 ASSERT_MPN (up, n);
886 ASSERT_MPN (vp, n);
887
888 tp = refmpn_malloc_limbs (n);
889 cy = mpn_lshift (tp, vp, n, s);
890 cy += mpn_sub_n (rp, up, tp, n);
891 free (tp);
892 return cy;
893 }
894 mp_limb_t
refmpn_sublsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)895 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
896 {
897 return refmpn_sublsh_n (rp, up, vp, n, 1);
898 }
899 mp_limb_t
refmpn_sublsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)900 refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
901 {
902 return refmpn_sublsh_n (rp, up, vp, n, 2);
903 }
904 mp_limb_t
refmpn_sublsh_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)905 refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
906 {
907 return refmpn_sublsh_n (rp, rp, vp, n, s);
908 }
909 mp_limb_t
refmpn_sublsh1_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)910 refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
911 {
912 return refmpn_sublsh_n (rp, rp, vp, n, 1);
913 }
914 mp_limb_t
refmpn_sublsh2_n_ip1(mp_ptr rp,mp_srcptr vp,mp_size_t n)915 refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
916 {
917 return refmpn_sublsh_n (rp, rp, vp, n, 2);
918 }
919 mp_limb_t
refmpn_sublsh_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n,unsigned int s)920 refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
921 {
922 return refmpn_sublsh_n (rp, vp, rp, n, s);
923 }
924 mp_limb_t
refmpn_sublsh1_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)925 refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
926 {
927 return refmpn_sublsh_n (rp, vp, rp, n, 1);
928 }
929 mp_limb_t
refmpn_sublsh2_n_ip2(mp_ptr rp,mp_srcptr vp,mp_size_t n)930 refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
931 {
932 return refmpn_sublsh_n (rp, vp, rp, n, 2);
933 }
934 mp_limb_t
refmpn_sublsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_t carry)935 refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
936 mp_size_t n, unsigned int s, mp_limb_t carry)
937 {
938 mp_limb_t cy;
939
940 ASSERT (carry <= (CNST_LIMB(1) << s));
941
942 cy = refmpn_sublsh_n (rp, up, vp, n, s);
943 cy += refmpn_sub_1 (rp, rp, n, carry);
944 return cy;
945 }
946 mp_limb_t
refmpn_sublsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)947 refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
948 {
949 return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
950 }
951 mp_limb_t
refmpn_sublsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t carry)952 refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
953 {
954 return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
955 }
956
957 mp_limb_signed_t
refmpn_rsblsh_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s)958 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
959 mp_size_t n, unsigned int s)
960 {
961 mp_limb_signed_t cy;
962 mp_ptr tp;
963
964 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
965 ASSERT (n >= 1);
966 ASSERT (0 < s && s < GMP_NUMB_BITS);
967 ASSERT_MPN (up, n);
968 ASSERT_MPN (vp, n);
969
970 tp = refmpn_malloc_limbs (n);
971 cy = mpn_lshift (tp, vp, n, s);
972 cy -= mpn_sub_n (rp, tp, up, n);
973 free (tp);
974 return cy;
975 }
976 mp_limb_signed_t
refmpn_rsblsh1_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)977 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
978 {
979 return refmpn_rsblsh_n (rp, up, vp, n, 1);
980 }
981 mp_limb_signed_t
refmpn_rsblsh2_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)982 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
983 {
984 return refmpn_rsblsh_n (rp, up, vp, n, 2);
985 }
986 mp_limb_signed_t
refmpn_rsblsh_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,unsigned int s,mp_limb_signed_t carry)987 refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
988 mp_size_t n, unsigned int s, mp_limb_signed_t carry)
989 {
990 mp_limb_signed_t cy;
991
992 ASSERT (carry == -1 || (carry >> s) == 0);
993
994 cy = refmpn_rsblsh_n (rp, up, vp, n, s);
995 if (carry > 0)
996 cy += refmpn_add_1 (rp, rp, n, carry);
997 else
998 cy -= refmpn_sub_1 (rp, rp, n, -carry);
999 return cy;
1000 }
1001 mp_limb_signed_t
refmpn_rsblsh1_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_signed_t carry)1002 refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1003 {
1004 return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
1005 }
1006 mp_limb_signed_t
refmpn_rsblsh2_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_signed_t carry)1007 refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
1008 {
1009 return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
1010 }
1011
1012 mp_limb_t
refmpn_rsh1add_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1013 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1014 {
1015 mp_limb_t cya, cys;
1016
1017 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1018 ASSERT (n >= 1);
1019 ASSERT_MPN (up, n);
1020 ASSERT_MPN (vp, n);
1021
1022 cya = mpn_add_n (rp, up, vp, n);
1023 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1024 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1025 return cys;
1026 }
1027 mp_limb_t
refmpn_rsh1sub_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1028 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1029 {
1030 mp_limb_t cya, cys;
1031
1032 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
1033 ASSERT (n >= 1);
1034 ASSERT_MPN (up, n);
1035 ASSERT_MPN (vp, n);
1036
1037 cya = mpn_sub_n (rp, up, vp, n);
1038 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
1039 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
1040 return cys;
1041 }
1042
1043 /* Twos complement, return borrow. */
1044 mp_limb_t
refmpn_neg(mp_ptr dst,mp_srcptr src,mp_size_t size)1045 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
1046 {
1047 mp_ptr zeros;
1048 mp_limb_t ret;
1049
1050 ASSERT (size >= 1);
1051
1052 zeros = refmpn_malloc_limbs (size);
1053 refmpn_fill (zeros, size, CNST_LIMB(0));
1054 ret = refmpn_sub_n (dst, zeros, src, size);
1055 free (zeros);
1056 return ret;
1057 }
1058
1059
1060 #define AORS(aors_n, aors_1) \
1061 { \
1062 mp_limb_t c; \
1063 ASSERT (s1size >= s2size); \
1064 ASSERT (s2size >= 1); \
1065 c = aors_n (rp, s1p, s2p, s2size); \
1066 if (s1size-s2size != 0) \
1067 c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \
1068 return c; \
1069 }
1070 mp_limb_t
refmpn_add(mp_ptr rp,mp_srcptr s1p,mp_size_t s1size,mp_srcptr s2p,mp_size_t s2size)1071 refmpn_add (mp_ptr rp,
1072 mp_srcptr s1p, mp_size_t s1size,
1073 mp_srcptr s2p, mp_size_t s2size)
1074 {
1075 AORS (refmpn_add_n, refmpn_add_1);
1076 }
1077 mp_limb_t
refmpn_sub(mp_ptr rp,mp_srcptr s1p,mp_size_t s1size,mp_srcptr s2p,mp_size_t s2size)1078 refmpn_sub (mp_ptr rp,
1079 mp_srcptr s1p, mp_size_t s1size,
1080 mp_srcptr s2p, mp_size_t s2size)
1081 {
1082 AORS (refmpn_sub_n, refmpn_sub_1);
1083 }
1084
1085
1086 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
1087 #define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2)
1088
1089 #define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
1090 #define HIGHMASK SHIFTHIGH(LOWMASK)
1091
1092 #define LOWPART(x) ((x) & LOWMASK)
1093 #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK)
1094
1095 /* Set return:*lo to x*y, using full limbs not nails. */
1096 mp_limb_t
refmpn_umul_ppmm(mp_limb_t * lo,mp_limb_t x,mp_limb_t y)1097 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
1098 {
1099 mp_limb_t hi, s;
1100
1101 *lo = LOWPART(x) * LOWPART(y);
1102 hi = HIGHPART(x) * HIGHPART(y);
1103
1104 s = LOWPART(x) * HIGHPART(y);
1105 hi += HIGHPART(s);
1106 s = SHIFTHIGH(LOWPART(s));
1107 *lo += s;
1108 hi += (*lo < s);
1109
1110 s = HIGHPART(x) * LOWPART(y);
1111 hi += HIGHPART(s);
1112 s = SHIFTHIGH(LOWPART(s));
1113 *lo += s;
1114 hi += (*lo < s);
1115
1116 return hi;
1117 }
1118
1119 mp_limb_t
refmpn_umul_ppmm_r(mp_limb_t x,mp_limb_t y,mp_limb_t * lo)1120 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
1121 {
1122 return refmpn_umul_ppmm (lo, x, y);
1123 }
1124
1125 mp_limb_t
refmpn_mul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1126 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
1127 mp_limb_t carry)
1128 {
1129 mp_size_t i;
1130 mp_limb_t hi, lo;
1131
1132 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1133 ASSERT (size >= 1);
1134 ASSERT_MPN (sp, size);
1135 ASSERT_LIMB (multiplier);
1136 ASSERT_LIMB (carry);
1137
1138 multiplier <<= GMP_NAIL_BITS;
1139 for (i = 0; i < size; i++)
1140 {
1141 hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
1142 lo >>= GMP_NAIL_BITS;
1143 ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
1144 rp[i] = lo;
1145 carry = hi;
1146 }
1147 return carry;
1148 }
1149
1150 mp_limb_t
refmpn_mul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1151 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1152 {
1153 return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1154 }
1155
1156
1157 mp_limb_t
refmpn_mul_N(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_srcptr mult,mp_size_t msize)1158 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1159 mp_srcptr mult, mp_size_t msize)
1160 {
1161 mp_ptr src_copy;
1162 mp_limb_t ret;
1163 mp_size_t i;
1164
1165 ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
1166 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1167 ASSERT (size >= msize);
1168 ASSERT_MPN (mult, msize);
1169
1170 /* in case dst==src */
1171 src_copy = refmpn_malloc_limbs (size);
1172 refmpn_copyi (src_copy, src, size);
1173 src = src_copy;
1174
1175 dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
1176 for (i = 1; i < msize-1; i++)
1177 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1178 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1179
1180 free (src_copy);
1181 return ret;
1182 }
1183
1184 mp_limb_t
refmpn_mul_2(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1185 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1186 {
1187 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
1188 }
1189 mp_limb_t
refmpn_mul_3(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1190 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1191 {
1192 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
1193 }
1194 mp_limb_t
refmpn_mul_4(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1195 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1196 {
1197 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
1198 }
1199 mp_limb_t
refmpn_mul_5(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1200 refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1201 {
1202 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
1203 }
1204 mp_limb_t
refmpn_mul_6(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1205 refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1206 {
1207 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
1208 }
1209
1210 #define AORSMUL_1C(operation_n) \
1211 { \
1212 mp_ptr p; \
1213 mp_limb_t ret; \
1214 \
1215 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
1216 \
1217 p = refmpn_malloc_limbs (size); \
1218 ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \
1219 ret += operation_n (rp, rp, p, size); \
1220 \
1221 free (p); \
1222 return ret; \
1223 }
1224
1225 mp_limb_t
refmpn_addmul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1226 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1227 mp_limb_t multiplier, mp_limb_t carry)
1228 {
1229 AORSMUL_1C (refmpn_add_n);
1230 }
1231 mp_limb_t
refmpn_submul_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier,mp_limb_t carry)1232 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1233 mp_limb_t multiplier, mp_limb_t carry)
1234 {
1235 AORSMUL_1C (refmpn_sub_n);
1236 }
1237
1238
1239 mp_limb_t
refmpn_addmul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1240 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1241 {
1242 return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1243 }
1244 mp_limb_t
refmpn_submul_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t multiplier)1245 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
1246 {
1247 return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
1248 }
1249
1250
1251 mp_limb_t
refmpn_addmul_N(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_srcptr mult,mp_size_t msize)1252 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
1253 mp_srcptr mult, mp_size_t msize)
1254 {
1255 mp_ptr src_copy;
1256 mp_limb_t ret;
1257 mp_size_t i;
1258
1259 ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
1260 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
1261 ASSERT (size >= msize);
1262 ASSERT_MPN (mult, msize);
1263
1264 /* in case dst==src */
1265 src_copy = refmpn_malloc_limbs (size);
1266 refmpn_copyi (src_copy, src, size);
1267 src = src_copy;
1268
1269 for (i = 0; i < msize-1; i++)
1270 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1271 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
1272
1273 free (src_copy);
1274 return ret;
1275 }
1276
1277 mp_limb_t
refmpn_addmul_2(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1278 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1279 {
1280 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
1281 }
1282 mp_limb_t
refmpn_addmul_3(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1283 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1284 {
1285 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
1286 }
1287 mp_limb_t
refmpn_addmul_4(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1288 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1289 {
1290 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
1291 }
1292 mp_limb_t
refmpn_addmul_5(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1293 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1294 {
1295 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
1296 }
1297 mp_limb_t
refmpn_addmul_6(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1298 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1299 {
1300 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
1301 }
1302 mp_limb_t
refmpn_addmul_7(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1303 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1304 {
1305 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
1306 }
1307 mp_limb_t
refmpn_addmul_8(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_srcptr mult)1308 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
1309 {
1310 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
1311 }
1312
1313 mp_limb_t
refmpn_add_n_sub_nc(mp_ptr r1p,mp_ptr r2p,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size,mp_limb_t carry)1314 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
1315 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
1316 mp_limb_t carry)
1317 {
1318 mp_ptr p;
1319 mp_limb_t acy, scy;
1320
1321 /* Destinations can't overlap. */
1322 ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
1323 ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
1324 ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
1325 ASSERT (size >= 1);
1326
1327 /* in case r1p==s1p or r1p==s2p */
1328 p = refmpn_malloc_limbs (size);
1329
1330 acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
1331 scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
1332 refmpn_copyi (r1p, p, size);
1333
1334 free (p);
1335 return 2 * acy + scy;
1336 }
1337
1338 mp_limb_t
refmpn_add_n_sub_n(mp_ptr r1p,mp_ptr r2p,mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)1339 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
1340 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1341 {
1342 return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
1343 }
1344
1345
1346 /* Right shift hi,lo and return the low limb of the result.
1347 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1348 mp_limb_t
rshift_make(mp_limb_t hi,mp_limb_t lo,unsigned shift)1349 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1350 {
1351 ASSERT (shift < GMP_NUMB_BITS);
1352 if (shift == 0)
1353 return lo;
1354 else
1355 return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
1356 }
1357
1358 /* Left shift hi,lo and return the high limb of the result.
1359 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1360 mp_limb_t
lshift_make(mp_limb_t hi,mp_limb_t lo,unsigned shift)1361 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1362 {
1363 ASSERT (shift < GMP_NUMB_BITS);
1364 if (shift == 0)
1365 return hi;
1366 else
1367 return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
1368 }
1369
1370
1371 mp_limb_t
refmpn_rshift(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1372 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1373 {
1374 mp_limb_t ret;
1375 mp_size_t i;
1376
1377 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1378 ASSERT (size >= 1);
1379 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1380 ASSERT_MPN (sp, size);
1381
1382 ret = rshift_make (sp[0], CNST_LIMB(0), shift);
1383
1384 for (i = 0; i < size-1; i++)
1385 rp[i] = rshift_make (sp[i+1], sp[i], shift);
1386
1387 rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
1388 return ret;
1389 }
1390
1391 mp_limb_t
refmpn_lshift(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1392 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1393 {
1394 mp_limb_t ret;
1395 mp_size_t i;
1396
1397 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1398 ASSERT (size >= 1);
1399 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1400 ASSERT_MPN (sp, size);
1401
1402 ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
1403
1404 for (i = size-2; i >= 0; i--)
1405 rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
1406
1407 rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
1408 return ret;
1409 }
1410
1411 void
refmpn_com(mp_ptr rp,mp_srcptr sp,mp_size_t size)1412 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1413 {
1414 mp_size_t i;
1415
1416 /* We work downwards since mpn_lshiftc needs that. */
1417 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1418
1419 for (i = size - 1; i >= 0; i--)
1420 rp[i] = (~sp[i]) & GMP_NUMB_MASK;
1421 }
1422
1423 mp_limb_t
refmpn_lshiftc(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1424 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1425 {
1426 mp_limb_t res;
1427
1428 /* No asserts here, refmpn_lshift will assert what we need. */
1429
1430 res = refmpn_lshift (rp, sp, size, shift);
1431 refmpn_com (rp, rp, size);
1432 return res;
1433 }
1434
1435 /* accepting shift==0 and doing a plain copyi or copyd in that case */
1436 mp_limb_t
refmpn_rshift_or_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1437 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1438 {
1439 if (shift == 0)
1440 {
1441 refmpn_copyi (rp, sp, size);
1442 return 0;
1443 }
1444 else
1445 {
1446 return refmpn_rshift (rp, sp, size, shift);
1447 }
1448 }
1449 mp_limb_t
refmpn_lshift_or_copy(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1450 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1451 {
1452 if (shift == 0)
1453 {
1454 refmpn_copyd (rp, sp, size);
1455 return 0;
1456 }
1457 else
1458 {
1459 return refmpn_lshift (rp, sp, size, shift);
1460 }
1461 }
1462
1463 /* accepting size==0 too */
1464 mp_limb_t
refmpn_rshift_or_copy_any(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1465 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1466 unsigned shift)
1467 {
1468 return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
1469 }
1470 mp_limb_t
refmpn_lshift_or_copy_any(mp_ptr rp,mp_srcptr sp,mp_size_t size,unsigned shift)1471 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1472 unsigned shift)
1473 {
1474 return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
1475 }
1476
1477 /* Divide h,l by d, return quotient, store remainder to *rp.
1478 Operates on full limbs, not nails.
1479 Must have h < d.
1480 __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
1481 mp_limb_t
refmpn_udiv_qrnnd(mp_limb_t * rp,mp_limb_t h,mp_limb_t l,mp_limb_t d)1482 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
1483 {
1484 mp_limb_t q, r;
1485 int n;
1486
1487 ASSERT (d != 0);
1488 ASSERT (h < d);
1489
1490 #if 0
1491 udiv_qrnnd (q, r, h, l, d);
1492 *rp = r;
1493 return q;
1494 #endif
1495
1496 n = refmpn_count_leading_zeros (d);
1497 d <<= n;
1498
1499 if (n != 0)
1500 {
1501 h = (h << n) | (l >> (GMP_LIMB_BITS - n));
1502 l <<= n;
1503 }
1504
1505 __udiv_qrnnd_c (q, r, h, l, d);
1506 r >>= n;
1507 *rp = r;
1508 return q;
1509 }
1510
1511 mp_limb_t
refmpn_udiv_qrnnd_r(mp_limb_t h,mp_limb_t l,mp_limb_t d,mp_limb_t * rp)1512 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
1513 {
1514 return refmpn_udiv_qrnnd (rp, h, l, d);
1515 }
1516
1517 /* This little subroutine avoids some bad code generation from i386 gcc 3.0
1518 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */
1519 static mp_limb_t
refmpn_divmod_1c_workaround(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1520 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1521 mp_limb_t divisor, mp_limb_t carry)
1522 {
1523 mp_size_t i;
1524 mp_limb_t rem[1];
1525 for (i = size-1; i >= 0; i--)
1526 {
1527 rp[i] = refmpn_udiv_qrnnd (rem, carry,
1528 sp[i] << GMP_NAIL_BITS,
1529 divisor << GMP_NAIL_BITS);
1530 carry = *rem >> GMP_NAIL_BITS;
1531 }
1532 return carry;
1533 }
1534
1535 mp_limb_t
refmpn_divmod_1c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1536 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1537 mp_limb_t divisor, mp_limb_t carry)
1538 {
1539 mp_ptr sp_orig;
1540 mp_ptr prod;
1541 mp_limb_t carry_orig;
1542
1543 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1544 ASSERT (size >= 0);
1545 ASSERT (carry < divisor);
1546 ASSERT_MPN (sp, size);
1547 ASSERT_LIMB (divisor);
1548 ASSERT_LIMB (carry);
1549
1550 if (size == 0)
1551 return carry;
1552
1553 sp_orig = refmpn_memdup_limbs (sp, size);
1554 prod = refmpn_malloc_limbs (size);
1555 carry_orig = carry;
1556
1557 carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
1558
1559 /* check by multiplying back */
1560 #if 0
1561 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
1562 size, divisor, carry_orig, carry);
1563 mpn_trace("s",sp_copy,size);
1564 mpn_trace("r",rp,size);
1565 printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
1566 mpn_trace("p",prod,size);
1567 #endif
1568 ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
1569 ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
1570 free (sp_orig);
1571 free (prod);
1572
1573 return carry;
1574 }
1575
1576 mp_limb_t
refmpn_divmod_1(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1577 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1578 {
1579 return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
1580 }
1581
1582
1583 mp_limb_t
refmpn_mod_1c(mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1584 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1585 mp_limb_t carry)
1586 {
1587 mp_ptr p = refmpn_malloc_limbs (size);
1588 carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
1589 free (p);
1590 return carry;
1591 }
1592
1593 mp_limb_t
refmpn_mod_1(mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1594 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1595 {
1596 return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
1597 }
1598
1599 mp_limb_t
refmpn_preinv_mod_1(mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t inverse)1600 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1601 mp_limb_t inverse)
1602 {
1603 ASSERT (divisor & GMP_NUMB_HIGHBIT);
1604 ASSERT (inverse == refmpn_invert_limb (divisor));
1605 return refmpn_mod_1 (sp, size, divisor);
1606 }
1607
1608 /* This implementation will be rather slow, but has the advantage of being
1609 in a different style than the libgmp versions. */
1610 mp_limb_t
refmpn_mod_34lsub1(mp_srcptr p,mp_size_t n)1611 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
1612 {
1613 ASSERT ((GMP_NUMB_BITS % 4) == 0);
1614 return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
1615 }
1616
1617
1618 mp_limb_t
refmpn_divrem_1c(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t carry)1619 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
1620 mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1621 mp_limb_t carry)
1622 {
1623 mp_ptr z;
1624
1625 z = refmpn_malloc_limbs (xsize);
1626 refmpn_fill (z, xsize, CNST_LIMB(0));
1627
1628 carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
1629 carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
1630
1631 free (z);
1632 return carry;
1633 }
1634
1635 mp_limb_t
refmpn_divrem_1(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor)1636 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
1637 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1638 {
1639 return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
1640 }
1641
1642 mp_limb_t
refmpn_preinv_divrem_1(mp_ptr rp,mp_size_t xsize,mp_srcptr sp,mp_size_t size,mp_limb_t divisor,mp_limb_t inverse,unsigned shift)1643 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
1644 mp_srcptr sp, mp_size_t size,
1645 mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
1646 {
1647 ASSERT (size >= 0);
1648 ASSERT (shift == refmpn_count_leading_zeros (divisor));
1649 ASSERT (inverse == refmpn_invert_limb (divisor << shift));
1650
1651 return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
1652 }
1653
1654 mp_limb_t
refmpn_divrem_2(mp_ptr qp,mp_size_t qxn,mp_ptr np,mp_size_t nn,mp_srcptr dp)1655 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
1656 mp_ptr np, mp_size_t nn,
1657 mp_srcptr dp)
1658 {
1659 mp_ptr tp;
1660 mp_limb_t qh;
1661
1662 tp = refmpn_malloc_limbs (nn + qxn);
1663 refmpn_zero (tp, qxn);
1664 refmpn_copyi (tp + qxn, np, nn);
1665 qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
1666 refmpn_copyi (np, tp, 2);
1667 free (tp);
1668 return qh;
1669 }
1670
1671 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1672 paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d
1673 since d has the high bit set. */
1674
1675 mp_limb_t
refmpn_invert_limb(mp_limb_t d)1676 refmpn_invert_limb (mp_limb_t d)
1677 {
1678 mp_limb_t r;
1679 ASSERT (d & GMP_LIMB_HIGHBIT);
1680 return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
1681 }
1682
1683 void
refmpn_invert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)1684 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1685 {
1686 mp_ptr qp, tp;
1687 TMP_DECL;
1688 TMP_MARK;
1689
1690 tp = TMP_ALLOC_LIMBS (2 * n);
1691 qp = TMP_ALLOC_LIMBS (n + 1);
1692
1693 MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1);
1694
1695 refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
1696 refmpn_copyi (rp, qp, n);
1697
1698 TMP_FREE;
1699 }
1700
1701 void
refmpn_binvert(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_ptr scratch)1702 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1703 {
1704 mp_ptr tp;
1705 mp_limb_t binv;
1706 TMP_DECL;
1707 TMP_MARK;
1708
1709 /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
1710 code. To make up for it, we check that the inverse is correct using a
1711 multiply. */
1712
1713 tp = TMP_ALLOC_LIMBS (2 * n);
1714
1715 MPN_ZERO (tp, n);
1716 tp[0] = 1;
1717 binvert_limb (binv, up[0]);
1718 mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
1719
1720 refmpn_mul_n (tp, rp, up, n);
1721 ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
1722
1723 TMP_FREE;
1724 }
1725
1726 /* The aim is to produce a dst quotient and return a remainder c, satisfying
1727 c*b^n + src-i == 3*dst, where i is the incoming carry.
1728
1729 Some value c==0, c==1 or c==2 will satisfy, so just try each.
1730
1731 If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1732 remainder from the first division attempt determines the correct
1733 remainder (3-c), but don't bother with that, since we can't guarantee
1734 anything about GMP_NUMB_BITS when using nails.
1735
1736 If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1737 complement negative, ie. b^n+a-i, and the calculation produces c1
1738 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This
1739 means it's enough to just add any borrow back at the end.
1740
1741 A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1742 mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1743 or 1 respectively, so with 1 added the final return value is still in the
1744 prescribed range 0 to 2. */
1745
1746 mp_limb_t
refmpn_divexact_by3c(mp_ptr rp,mp_srcptr sp,mp_size_t size,mp_limb_t carry)1747 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1748 {
1749 mp_ptr spcopy;
1750 mp_limb_t c, cs;
1751
1752 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1753 ASSERT (size >= 1);
1754 ASSERT (carry <= 2);
1755 ASSERT_MPN (sp, size);
1756
1757 spcopy = refmpn_malloc_limbs (size);
1758 cs = refmpn_sub_1 (spcopy, sp, size, carry);
1759
1760 for (c = 0; c <= 2; c++)
1761 if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
1762 goto done;
1763 ASSERT_FAIL (no value of c satisfies);
1764
1765 done:
1766 c += cs;
1767 ASSERT (c <= 2);
1768
1769 free (spcopy);
1770 return c;
1771 }
1772
1773 mp_limb_t
refmpn_divexact_by3(mp_ptr rp,mp_srcptr sp,mp_size_t size)1774 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1775 {
1776 return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1777 }
1778
1779
1780 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1781 void
refmpn_mul_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)1782 refmpn_mul_basecase (mp_ptr prodp,
1783 mp_srcptr up, mp_size_t usize,
1784 mp_srcptr vp, mp_size_t vsize)
1785 {
1786 mp_size_t i;
1787
1788 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1789 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1790 ASSERT (usize >= vsize);
1791 ASSERT (vsize >= 1);
1792 ASSERT_MPN (up, usize);
1793 ASSERT_MPN (vp, vsize);
1794
1795 prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1796 for (i = 1; i < vsize; i++)
1797 prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1798 }
1799
1800
1801 /* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
1802 void
refmpn_mulmid_basecase(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1803 refmpn_mulmid_basecase (mp_ptr rp,
1804 mp_srcptr up, mp_size_t un,
1805 mp_srcptr vp, mp_size_t vn)
1806 {
1807 mp_limb_t cy;
1808 mp_size_t i;
1809
1810 ASSERT (un >= vn);
1811 ASSERT (vn >= 1);
1812 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
1813 ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
1814 ASSERT_MPN (up, un);
1815 ASSERT_MPN (vp, vn);
1816
1817 rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
1818 rp[un - vn + 2] = CNST_LIMB (0);
1819 for (i = 1; i < vn; i++)
1820 {
1821 cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
1822 cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
1823 cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
1824 ASSERT (cy == 0);
1825 }
1826 }
1827
1828 void
refmpn_toom42_mulmid(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_ptr scratch)1829 refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
1830 mp_ptr scratch)
1831 {
1832 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1833 }
1834
1835 void
refmpn_mulmid_n(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n)1836 refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
1837 {
1838 /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1839 off products near the middle product region boundary */
1840 refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
1841 }
1842
1843 void
refmpn_mulmid(mp_ptr rp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1844 refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
1845 mp_srcptr vp, mp_size_t vn)
1846 {
1847 /* FIXME: this could be made faster by using refmpn_mul and then subtracting
1848 off products near the middle product region boundary */
1849 refmpn_mulmid_basecase (rp, up, un, vp, vn);
1850 }
1851
1852
1853
1854 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
1855 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
1856 #define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
1857 #if WANT_FFT
1858 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
1859 #else
1860 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
1861 #endif
1862
1863 void
refmpn_mul(mp_ptr wp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)1864 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
1865 {
1866 mp_ptr tp;
1867 mp_size_t tn;
1868
1869 if (vn < TOOM3_THRESHOLD)
1870 {
1871 /* In the mpn_mul_basecase and toom2 range, use our own mul_basecase. */
1872 if (vn != 0)
1873 refmpn_mul_basecase (wp, up, un, vp, vn);
1874 else
1875 MPN_ZERO (wp, un);
1876 return;
1877 }
1878
1879 if (vn < TOOM4_THRESHOLD)
1880 {
1881 /* In the toom3 range, use mpn_toom22_mul. */
1882 tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
1883 tp = refmpn_malloc_limbs (tn);
1884 mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1885 }
1886 else if (vn < TOOM6_THRESHOLD)
1887 {
1888 /* In the toom4 range, use mpn_toom33_mul. */
1889 tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
1890 tp = refmpn_malloc_limbs (tn);
1891 mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1892 }
1893 else if (vn < FFT_THRESHOLD)
1894 {
1895 /* In the toom6 range, use mpn_toom44_mul. */
1896 tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
1897 tp = refmpn_malloc_limbs (tn);
1898 mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1899 }
1900 else
1901 {
1902 /* Finally, for the largest operands, use mpn_toom6h_mul. */
1903 tn = 2 * vn + mpn_toom6h_mul_itch (vn, vn);
1904 tp = refmpn_malloc_limbs (tn);
1905 mpn_toom6h_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1906 }
1907
1908 if (un != vn)
1909 {
1910 if (un - vn < vn)
1911 refmpn_mul (wp + vn, vp, vn, up + vn, un - vn);
1912 else
1913 refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
1914
1915 MPN_COPY (wp, tp, vn);
1916 ASSERT_NOCARRY (refmpn_add (wp + vn, wp + vn, un, tp + vn, vn));
1917 }
1918 else
1919 {
1920 MPN_COPY (wp, tp, 2 * vn);
1921 }
1922
1923 free (tp);
1924 }
1925
1926 void
refmpn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)1927 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1928 {
1929 refmpn_mul (prodp, up, size, vp, size);
1930 }
1931
1932 void
refmpn_mullo_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)1933 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1934 {
1935 mp_ptr tp = refmpn_malloc_limbs (2*size);
1936 refmpn_mul (tp, up, size, vp, size);
1937 refmpn_copyi (prodp, tp, size);
1938 free (tp);
1939 }
1940
1941 void
refmpn_sqr(mp_ptr dst,mp_srcptr src,mp_size_t size)1942 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1943 {
1944 refmpn_mul (dst, src, size, src, size);
1945 }
1946
1947 /* Allowing usize<vsize, usize==0 or vsize==0. */
1948 void
refmpn_mul_any(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)1949 refmpn_mul_any (mp_ptr prodp,
1950 mp_srcptr up, mp_size_t usize,
1951 mp_srcptr vp, mp_size_t vsize)
1952 {
1953 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1954 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1955 ASSERT (usize >= 0);
1956 ASSERT (vsize >= 0);
1957 ASSERT_MPN (up, usize);
1958 ASSERT_MPN (vp, vsize);
1959
1960 if (usize == 0)
1961 {
1962 refmpn_fill (prodp, vsize, CNST_LIMB(0));
1963 return;
1964 }
1965
1966 if (vsize == 0)
1967 {
1968 refmpn_fill (prodp, usize, CNST_LIMB(0));
1969 return;
1970 }
1971
1972 if (usize >= vsize)
1973 refmpn_mul (prodp, up, usize, vp, vsize);
1974 else
1975 refmpn_mul (prodp, vp, vsize, up, usize);
1976 }
1977
1978
1979 mp_limb_t
refmpn_gcd_1(mp_srcptr xp,mp_size_t xsize,mp_limb_t y)1980 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
1981 {
1982 mp_limb_t x;
1983 int twos;
1984
1985 ASSERT (y != 0);
1986 ASSERT (! refmpn_zero_p (xp, xsize));
1987 ASSERT_MPN (xp, xsize);
1988 ASSERT_LIMB (y);
1989
1990 x = refmpn_mod_1 (xp, xsize, y);
1991 if (x == 0)
1992 return y;
1993
1994 twos = 0;
1995 while ((x & 1) == 0 && (y & 1) == 0)
1996 {
1997 x >>= 1;
1998 y >>= 1;
1999 twos++;
2000 }
2001
2002 for (;;)
2003 {
2004 while ((x & 1) == 0) x >>= 1;
2005 while ((y & 1) == 0) y >>= 1;
2006
2007 if (x < y)
2008 MP_LIMB_T_SWAP (x, y);
2009
2010 x -= y;
2011 if (x == 0)
2012 break;
2013 }
2014
2015 return y << twos;
2016 }
2017
2018
2019 /* Based on the full limb x, not nails. */
2020 unsigned
refmpn_count_leading_zeros(mp_limb_t x)2021 refmpn_count_leading_zeros (mp_limb_t x)
2022 {
2023 unsigned n = 0;
2024
2025 ASSERT (x != 0);
2026
2027 while ((x & GMP_LIMB_HIGHBIT) == 0)
2028 {
2029 x <<= 1;
2030 n++;
2031 }
2032 return n;
2033 }
2034
2035 /* Full limbs allowed, not limited to nails. */
2036 unsigned
refmpn_count_trailing_zeros(mp_limb_t x)2037 refmpn_count_trailing_zeros (mp_limb_t x)
2038 {
2039 unsigned n = 0;
2040
2041 ASSERT (x != 0);
2042 ASSERT_LIMB (x);
2043
2044 while ((x & 1) == 0)
2045 {
2046 x >>= 1;
2047 n++;
2048 }
2049 return n;
2050 }
2051
2052 /* Strip factors of two (low zero bits) from {p,size} by right shifting.
2053 The return value is the number of twos stripped. */
2054 mp_size_t
refmpn_strip_twos(mp_ptr p,mp_size_t size)2055 refmpn_strip_twos (mp_ptr p, mp_size_t size)
2056 {
2057 mp_size_t limbs;
2058 unsigned shift;
2059
2060 ASSERT (size >= 1);
2061 ASSERT (! refmpn_zero_p (p, size));
2062 ASSERT_MPN (p, size);
2063
2064 for (limbs = 0; p[0] == 0; limbs++)
2065 {
2066 refmpn_copyi (p, p+1, size-1);
2067 p[size-1] = 0;
2068 }
2069
2070 shift = refmpn_count_trailing_zeros (p[0]);
2071 if (shift)
2072 refmpn_rshift (p, p, size, shift);
2073
2074 return limbs*GMP_NUMB_BITS + shift;
2075 }
2076
2077 mp_limb_t
refmpn_gcd(mp_ptr gp,mp_ptr xp,mp_size_t xsize,mp_ptr yp,mp_size_t ysize)2078 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
2079 {
2080 int cmp;
2081
2082 ASSERT (ysize >= 1);
2083 ASSERT (xsize >= ysize);
2084 ASSERT ((xp[0] & 1) != 0);
2085 ASSERT ((yp[0] & 1) != 0);
2086 /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */
2087 ASSERT (yp[ysize-1] != 0);
2088 ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
2089 ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
2090 ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
2091 if (xsize == ysize)
2092 ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
2093 ASSERT_MPN (xp, xsize);
2094 ASSERT_MPN (yp, ysize);
2095
2096 refmpn_strip_twos (xp, xsize);
2097 MPN_NORMALIZE (xp, xsize);
2098 MPN_NORMALIZE (yp, ysize);
2099
2100 for (;;)
2101 {
2102 cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
2103 if (cmp == 0)
2104 break;
2105 if (cmp < 0)
2106 MPN_PTR_SWAP (xp,xsize, yp,ysize);
2107
2108 ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
2109
2110 refmpn_strip_twos (xp, xsize);
2111 MPN_NORMALIZE (xp, xsize);
2112 }
2113
2114 refmpn_copyi (gp, xp, xsize);
2115 return xsize;
2116 }
2117
2118 unsigned long
ref_popc_limb(mp_limb_t src)2119 ref_popc_limb (mp_limb_t src)
2120 {
2121 unsigned long count;
2122 int i;
2123
2124 count = 0;
2125 for (i = 0; i < GMP_LIMB_BITS; i++)
2126 {
2127 count += (src & 1);
2128 src >>= 1;
2129 }
2130 return count;
2131 }
2132
2133 unsigned long
refmpn_popcount(mp_srcptr sp,mp_size_t size)2134 refmpn_popcount (mp_srcptr sp, mp_size_t size)
2135 {
2136 unsigned long count = 0;
2137 mp_size_t i;
2138
2139 ASSERT (size >= 0);
2140 ASSERT_MPN (sp, size);
2141
2142 for (i = 0; i < size; i++)
2143 count += ref_popc_limb (sp[i]);
2144 return count;
2145 }
2146
2147 unsigned long
refmpn_hamdist(mp_srcptr s1p,mp_srcptr s2p,mp_size_t size)2148 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
2149 {
2150 mp_ptr d;
2151 unsigned long count;
2152
2153 ASSERT (size >= 0);
2154 ASSERT_MPN (s1p, size);
2155 ASSERT_MPN (s2p, size);
2156
2157 if (size == 0)
2158 return 0;
2159
2160 d = refmpn_malloc_limbs (size);
2161 refmpn_xor_n (d, s1p, s2p, size);
2162 count = refmpn_popcount (d, size);
2163 free (d);
2164 return count;
2165 }
2166
2167
2168 /* set r to a%d */
2169 void
refmpn_mod2(mp_limb_t r[2],const mp_limb_t a[2],const mp_limb_t d[2])2170 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
2171 {
2172 mp_limb_t D[2];
2173 int n;
2174
2175 ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
2176 ASSERT_MPN (a, 2);
2177 ASSERT_MPN (d, 2);
2178
2179 D[1] = d[1], D[0] = d[0];
2180 r[1] = a[1], r[0] = a[0];
2181 n = 0;
2182
2183 for (;;)
2184 {
2185 if (D[1] & GMP_NUMB_HIGHBIT)
2186 break;
2187 if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
2188 break;
2189 refmpn_lshift (D, D, (mp_size_t) 2, 1);
2190 n++;
2191 ASSERT (n <= GMP_NUMB_BITS);
2192 }
2193
2194 while (n >= 0)
2195 {
2196 if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
2197 ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
2198 refmpn_rshift (D, D, (mp_size_t) 2, 1);
2199 n--;
2200 }
2201
2202 ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
2203 }
2204
2205
2206
2207 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2208 particular the trial quotient is allowed to be 2 too big. */
2209 mp_limb_t
refmpn_sb_div_qr(mp_ptr qp,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)2210 refmpn_sb_div_qr (mp_ptr qp,
2211 mp_ptr np, mp_size_t nsize,
2212 mp_srcptr dp, mp_size_t dsize)
2213 {
2214 mp_limb_t retval = 0;
2215 mp_size_t i;
2216 mp_limb_t d1 = dp[dsize-1];
2217 mp_ptr np_orig = refmpn_memdup_limbs (np, nsize);
2218
2219 ASSERT (nsize >= dsize);
2220 /* ASSERT (dsize > 2); */
2221 ASSERT (dsize >= 2);
2222 ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
2223 ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
2224 ASSERT_MPN (np, nsize);
2225 ASSERT_MPN (dp, dsize);
2226
2227 i = nsize-dsize;
2228 if (refmpn_cmp (np+i, dp, dsize) >= 0)
2229 {
2230 ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
2231 retval = 1;
2232 }
2233
2234 for (i--; i >= 0; i--)
2235 {
2236 mp_limb_t n0 = np[i+dsize];
2237 mp_limb_t n1 = np[i+dsize-1];
2238 mp_limb_t q, dummy_r;
2239
2240 ASSERT (n0 <= d1);
2241 if (n0 == d1)
2242 q = GMP_NUMB_MAX;
2243 else
2244 q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
2245 d1 << GMP_NAIL_BITS);
2246
2247 n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
2248 ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
2249 if (n0)
2250 {
2251 q--;
2252 if (! refmpn_add_n (np+i, np+i, dp, dsize))
2253 {
2254 q--;
2255 ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
2256 }
2257 }
2258 np[i+dsize] = 0;
2259
2260 qp[i] = q;
2261 }
2262
2263 /* remainder < divisor */
2264 #if 0 /* ASSERT triggers gcc 4.2.1 bug */
2265 ASSERT (refmpn_cmp (np, dp, dsize) < 0);
2266 #endif
2267
2268 /* multiply back to original */
2269 {
2270 mp_ptr mp = refmpn_malloc_limbs (nsize);
2271
2272 refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
2273 if (retval)
2274 ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
2275 ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
2276 ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
2277
2278 free (mp);
2279 }
2280
2281 free (np_orig);
2282 return retval;
2283 }
2284
2285 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
2286 particular the trial quotient is allowed to be 2 too big. */
2287 void
refmpn_tdiv_qr(mp_ptr qp,mp_ptr rp,mp_size_t qxn,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)2288 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
2289 mp_ptr np, mp_size_t nsize,
2290 mp_srcptr dp, mp_size_t dsize)
2291 {
2292 ASSERT (qxn == 0);
2293 ASSERT_MPN (np, nsize);
2294 ASSERT_MPN (dp, dsize);
2295 ASSERT (dsize > 0);
2296 ASSERT (dp[dsize-1] != 0);
2297
2298 if (dsize == 1)
2299 {
2300 rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
2301 return;
2302 }
2303 else
2304 {
2305 mp_ptr n2p = refmpn_malloc_limbs (nsize+1);
2306 mp_ptr d2p = refmpn_malloc_limbs (dsize);
2307 int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
2308
2309 n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
2310 ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
2311
2312 refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
2313 refmpn_rshift_or_copy (rp, n2p, dsize, norm);
2314
2315 /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
2316 free (n2p);
2317 free (d2p);
2318 }
2319 }
2320
2321 mp_limb_t
refmpn_redc_1(mp_ptr rp,mp_ptr up,mp_srcptr mp,mp_size_t n,mp_limb_t invm)2322 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
2323 {
2324 mp_size_t j;
2325 mp_limb_t cy;
2326
2327 ASSERT_MPN (up, 2*n);
2328 /* ASSERT about directed overlap rp, up */
2329 /* ASSERT about overlap rp, mp */
2330 /* ASSERT about overlap up, mp */
2331
2332 for (j = n - 1; j >= 0; j--)
2333 {
2334 up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
2335 up++;
2336 }
2337 cy = mpn_add_n (rp, up, up - n, n);
2338 return cy;
2339 }
2340
2341 size_t
refmpn_get_str(unsigned char * dst,int base,mp_ptr src,mp_size_t size)2342 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
2343 {
2344 unsigned char *d;
2345 size_t dsize;
2346
2347 ASSERT (size >= 0);
2348 ASSERT (base >= 2);
2349 ASSERT (base < numberof (mp_bases));
2350 ASSERT (size == 0 || src[size-1] != 0);
2351 ASSERT_MPN (src, size);
2352
2353 MPN_SIZEINBASE (dsize, src, size, base);
2354 ASSERT (dsize >= 1);
2355 ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * GMP_LIMB_BYTES));
2356
2357 if (size == 0)
2358 {
2359 dst[0] = 0;
2360 return 1;
2361 }
2362
2363 /* don't clobber input for power of 2 bases */
2364 if (POW2_P (base))
2365 src = refmpn_memdup_limbs (src, size);
2366
2367 d = dst + dsize;
2368 do
2369 {
2370 d--;
2371 ASSERT (d >= dst);
2372 *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
2373 size -= (src[size-1] == 0);
2374 }
2375 while (size != 0);
2376
2377 /* Move result back and decrement dsize if we didn't generate
2378 the maximum possible digits. */
2379 if (d != dst)
2380 {
2381 size_t i;
2382 dsize -= d - dst;
2383 for (i = 0; i < dsize; i++)
2384 dst[i] = d[i];
2385 }
2386
2387 if (POW2_P (base))
2388 free (src);
2389
2390 return dsize;
2391 }
2392
2393
2394 mp_limb_t
ref_bswap_limb(mp_limb_t src)2395 ref_bswap_limb (mp_limb_t src)
2396 {
2397 mp_limb_t dst;
2398 int i;
2399
2400 dst = 0;
2401 for (i = 0; i < GMP_LIMB_BYTES; i++)
2402 {
2403 dst = (dst << 8) + (src & 0xFF);
2404 src >>= 8;
2405 }
2406 return dst;
2407 }
2408
2409
2410 /* These random functions are mostly for transitional purposes while adding
2411 nail support, since they're independent of the normal mpn routines. They
2412 can probably be removed when those normal routines are reliable, though
2413 perhaps something independent would still be useful at times. */
2414
2415 #if GMP_LIMB_BITS == 32
2416 #define RAND_A CNST_LIMB(0x29CF535)
2417 #endif
2418 #if GMP_LIMB_BITS == 64
2419 #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D)
2420 #endif
2421
2422 mp_limb_t refmpn_random_seed;
2423
2424 mp_limb_t
refmpn_random_half(void)2425 refmpn_random_half (void)
2426 {
2427 refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
2428 return (refmpn_random_seed >> GMP_LIMB_BITS/2);
2429 }
2430
2431 mp_limb_t
refmpn_random_limb(void)2432 refmpn_random_limb (void)
2433 {
2434 return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
2435 | refmpn_random_half ()) & GMP_NUMB_MASK;
2436 }
2437
2438 void
refmpn_random(mp_ptr ptr,mp_size_t size)2439 refmpn_random (mp_ptr ptr, mp_size_t size)
2440 {
2441 mp_size_t i;
2442 if (GMP_NAIL_BITS == 0)
2443 {
2444 mpn_random (ptr, size);
2445 return;
2446 }
2447
2448 for (i = 0; i < size; i++)
2449 ptr[i] = refmpn_random_limb ();
2450 }
2451
2452 void
refmpn_random2(mp_ptr ptr,mp_size_t size)2453 refmpn_random2 (mp_ptr ptr, mp_size_t size)
2454 {
2455 mp_size_t i;
2456 mp_limb_t bit, mask, limb;
2457 int run;
2458
2459 if (GMP_NAIL_BITS == 0)
2460 {
2461 mpn_random2 (ptr, size);
2462 return;
2463 }
2464
2465 #define RUN_MODULUS 32
2466
2467 /* start with ones at a random pos in the high limb */
2468 bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
2469 mask = 0;
2470 run = 0;
2471
2472 for (i = size-1; i >= 0; i--)
2473 {
2474 limb = 0;
2475 do
2476 {
2477 if (run == 0)
2478 {
2479 run = (refmpn_random_half () % RUN_MODULUS) + 1;
2480 mask = ~mask;
2481 }
2482
2483 limb |= (bit & mask);
2484 bit >>= 1;
2485 run--;
2486 }
2487 while (bit != 0);
2488
2489 ptr[i] = limb;
2490 bit = GMP_NUMB_HIGHBIT;
2491 }
2492 }
2493
2494 /* This is a simple bitwise algorithm working high to low across "s" and
2495 testing each time whether setting the bit would make s^2 exceed n. */
2496 mp_size_t
refmpn_sqrtrem(mp_ptr sp,mp_ptr rp,mp_srcptr np,mp_size_t nsize)2497 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
2498 {
2499 mp_ptr tp, dp;
2500 mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs;
2501 unsigned ibit;
2502 long i;
2503 mp_limb_t c;
2504
2505 ASSERT (nsize >= 0);
2506
2507 /* If n==0, then s=0 and r=0. */
2508 if (nsize == 0)
2509 return 0;
2510
2511 ASSERT (np[nsize - 1] != 0);
2512 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
2513 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
2514 ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
2515
2516 /* root */
2517 ssize = (nsize+1)/2;
2518 refmpn_zero (sp, ssize);
2519
2520 /* the remainder so far */
2521 dp = refmpn_memdup_limbs (np, nsize);
2522 dsize = nsize;
2523
2524 /* temporary */
2525 talloc = 2*ssize + 1;
2526 tp = refmpn_malloc_limbs (talloc);
2527
2528 for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
2529 {
2530 /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
2531 is added to it */
2532
2533 ilimbs = (i+1) / GMP_NUMB_BITS;
2534 ibit = (i+1) % GMP_NUMB_BITS;
2535 refmpn_zero (tp, ilimbs);
2536 c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
2537 tsize = ilimbs + ssize;
2538 tp[tsize] = c;
2539 tsize += (c != 0);
2540
2541 ilimbs = (2*i) / GMP_NUMB_BITS;
2542 ibit = (2*i) % GMP_NUMB_BITS;
2543 if (ilimbs + 1 > tsize)
2544 {
2545 refmpn_zero_extend (tp, tsize, ilimbs + 1);
2546 tsize = ilimbs + 1;
2547 }
2548 c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
2549 CNST_LIMB(1) << ibit);
2550 ASSERT (tsize < talloc);
2551 tp[tsize] = c;
2552 tsize += (c != 0);
2553
2554 if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
2555 {
2556 /* set this bit in s and subtract from the remainder */
2557 refmpn_setbit (sp, i);
2558
2559 ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
2560 dsize = refmpn_normalize (dp, dsize);
2561 }
2562 }
2563
2564 if (rp == NULL)
2565 {
2566 ret = ! refmpn_zero_p (dp, dsize);
2567 }
2568 else
2569 {
2570 ASSERT (dsize == 0 || dp[dsize-1] != 0);
2571 refmpn_copy (rp, dp, dsize);
2572 ret = dsize;
2573 }
2574
2575 free (dp);
2576 free (tp);
2577 return ret;
2578 }
2579