1 /* Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
2
3 This file is part of the GNU MP Library.
4
5 The GNU MP Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
8 option) any later version.
9
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
17 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 MA 02111-1307, USA. */
19
20 #include "../schpriv.h"
21 #include "gmp.h"
22 #include "gmp-impl.h"
23 #include "gmplonglong.h"
24
25 #if defined(__GNUC__)
26 # define USED_ONLY_SOMETIMES __attribute__((unused))
27 #else
28 # define USED_ONLY_SOMETIMES /* empty */
29 #endif
30
31 extern void *scheme_malloc_gmp(uintptr_t, void **mem_pool);
32 extern void scheme_free_gmp(void *, void **mem_pool);
33 THREAD_LOCAL_DECL(static void *gmp_mem_pool);
34 #define MALLOC(amt) scheme_malloc_gmp(amt, &gmp_mem_pool)
35 #define FREE(p, s) scheme_free_gmp(p, &gmp_mem_pool)
36
37 #define GMP_NAIL_BITS 0
38 #define GMP_LIMB_BITS BITS_PER_MP_LIMB
39 #define GMP_NUMB_BITS BITS_PER_MP_LIMB
40 #define GMP_LIMB_HIGHBIT ((mp_limb_t)1 << (BITS_PER_MP_LIMB - 1))
41 #define GMP_NUMB_HIGHBIT GMP_LIMB_HIGHBIT
42
43 #ifndef NULL
44 # define NULL 0L
45 #endif
46
47 #if GMP_NUMB_BITS == 32
48 # define MP_BASES_CHARS_PER_LIMB_10 9
49 # define MP_BASES_BIG_BASE_10 CNST_LIMB(0x3b9aca00)
50 # define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0x12e0be82)
51 # define MP_BASES_NORMALIZATION_STEPS_10 2
52 # define GMP_NUMB_MASK 0xFFFFFFFF
53 #else
54 # define MP_BASES_CHARS_PER_LIMB_10 19
55 # define MP_BASES_BIG_BASE_10 CNST_LIMB(0x8ac7230489e80000)
56 # define MP_BASES_BIG_BASE_INVERTED_10 CNST_LIMB(0xd83c94fb6d2ac34a)
57 # define MP_BASES_NORMALIZATION_STEPS_10 0
58 # define GMP_NUMB_MASK (~(mp_limb_t)0)
59 #endif
60
61 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
62 mpn_divrem_1 (qp, xsize, ap, size, d)
63
64 #define ABOVE_THRESHOLD(size,thresh) \
65 ((thresh) == 0 \
66 || ((thresh) != MP_SIZE_T_MAX \
67 && (size) >= (thresh)))
68 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
69
70 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
71 then that lowest one bit must have been the only bit set. n==0 will
72 return true though, so avoid that. */
73 #define POW2_P(n) (((n) & ((n) - 1)) == 0)
74
75 # define __GMP_ALLOCATE_FUNC_LIMBS(n) TMP_ALLOC(n * sizeof(mp_limb_t))
76 # define __GMP_FREE_FUNC_LIMBS(p, n) /* */
77
78 /* static const int mp_bits_per_limb = BITS_PER_MP_LIMB; */
79 /* static const int __gmp_0 = 0; */
80 /* static int __gmp_junk; */
81 /* static int gmp_errno = 0; */
82
83 #define SCHEME_BIGNUM_USE_FUEL(n) scheme_bignum_use_fuel(n)
84 extern void scheme_bignum_use_fuel(intptr_t n);
85
86 /* Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
87 There are no restrictions on the relative sizes of
88 the two arguments.
89 Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */
90
91 int
92 #if __STDC__
mpn_cmp(mp_srcptr op1_ptr,mp_srcptr op2_ptr,mp_size_t size)93 mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
94 #else
95 mpn_cmp (op1_ptr, op2_ptr, size)
96 mp_srcptr op1_ptr;
97 mp_srcptr op2_ptr;
98 mp_size_t size;
99 #endif
100 {
101 mp_size_t i;
102 mp_limb_t op1_word, op2_word;
103
104 for (i = size - 1; i >= 0; i--)
105 {
106 op1_word = op1_ptr[i];
107 op2_word = op2_ptr[i];
108 if (op1_word != op2_word)
109 goto diff;
110 }
111 return 0;
112 diff:
113 /* This can *not* be simplified to
114 op2_word - op2_word
115 since that expression might give signed overflow. */
116 return (op1_word > op2_word) ? 1 : -1;
117 }
118
119
120 /* mpn_sub_n -- Subtract two limb vectors of equal, non-zero length. */
121
122 mp_limb_t
123 #if __STDC__
mpn_sub_n(mp_ptr res_ptr,mp_srcptr s1_ptr,mp_srcptr s2_ptr,mp_size_t size)124 mpn_sub_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
125 #else
126 mpn_sub_n (res_ptr, s1_ptr, s2_ptr, size)
127 register mp_ptr res_ptr;
128 register mp_srcptr s1_ptr;
129 register mp_srcptr s2_ptr;
130 mp_size_t size;
131 #endif
132 {
133 register mp_limb_t x, y, cy;
134 register mp_size_t j;
135
136 /* The loop counter and index J goes from -SIZE to -1. This way
137 the loop becomes faster. */
138 j = -size;
139
140 /* Offset the base pointers to compensate for the negative indices. */
141 s1_ptr -= j;
142 s2_ptr -= j;
143 res_ptr -= j;
144
145 cy = 0;
146 do
147 {
148 y = s2_ptr[j];
149 x = s1_ptr[j];
150 y += cy; /* add previous carry to subtrahend */
151 cy = (y < cy); /* get out carry from that addition */
152 y = x - y; /* main subtract */
153 cy = (y > x) + cy; /* get out carry from the subtract, combine */
154 res_ptr[j] = y;
155 }
156 while (++j != 0);
157
158 return cy;
159 }
160
161 /* mpn_add_n -- Add two limb vectors of equal, non-zero length. */
162
163 mp_limb_t
164 #if __STDC__
mpn_add_n(mp_ptr res_ptr,mp_srcptr s1_ptr,mp_srcptr s2_ptr,mp_size_t size)165 mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
166 #else
167 mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
168 register mp_ptr res_ptr;
169 register mp_srcptr s1_ptr;
170 register mp_srcptr s2_ptr;
171 mp_size_t size;
172 #endif
173 {
174 register mp_limb_t x, y, cy;
175 register mp_size_t j;
176
177 /* The loop counter and index J goes from -SIZE to -1. This way
178 the loop becomes faster. */
179 j = -size;
180
181 /* Offset the base pointers to compensate for the negative indices. */
182 s1_ptr -= j;
183 s2_ptr -= j;
184 res_ptr -= j;
185
186 cy = 0;
187 do
188 {
189 y = s2_ptr[j];
190 x = s1_ptr[j];
191 y += cy; /* add previous carry to one addend */
192 cy = (y < cy); /* get out carry from that addition */
193 y = x + y; /* add other addend */
194 cy = (y < x) + cy; /* get out carry from that add, combine */
195 res_ptr[j] = y;
196 }
197 while (++j != 0);
198
199 return cy;
200 }
201
202
203 /* mpn_mul_n and helper function -- Multiply/square natural numbers. */
204
205 /* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
206 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
207 #define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1)
208
209 #if !defined (__alpha) && !defined (__mips)
210 /* For all other machines, we want to call mpn functions for the compund
211 operations instead of open-coding them. */
212 #define USE_MORE_MPN
213 #endif
214
215 /*== Function declarations =================================================*/
216
217 static void evaluate3 _PROTO ((mp_ptr, mp_ptr, mp_ptr,
218 mp_ptr, mp_ptr, mp_ptr,
219 mp_srcptr, mp_srcptr, mp_srcptr,
220 mp_size_t, mp_size_t));
221 static void interpolate3 _PROTO ((mp_srcptr,
222 mp_ptr, mp_ptr, mp_ptr,
223 mp_srcptr,
224 mp_ptr, mp_ptr, mp_ptr,
225 mp_size_t, mp_size_t));
226 static mp_limb_t add2Times _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
227
228
229 /*-- mpn_kara_mul_n ---------------------------------------------------------------*/
230
231 /* Multiplies using 3 half-sized mults and so on recursively.
232 * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
233 * No overlap of p[...] with a[...] or b[...].
234 * ws is workspace.
235 */
236
237 void
238 #if __STDC__
mpn_kara_mul_n(mp_ptr p,mp_srcptr a,mp_srcptr b,mp_size_t n,mp_ptr ws)239 mpn_kara_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
240 #else
241 mpn_kara_mul_n(p, a, b, n, ws)
242 mp_ptr p;
243 mp_srcptr a;
244 mp_srcptr b;
245 mp_size_t n;
246 mp_ptr ws;
247 #endif
248 {
249 mp_limb_t i, sign, w, w0, w1;
250 mp_size_t n2;
251 mp_srcptr x, y;
252
253 n2 = n >> 1;
254 ASSERT (n2 > 0);
255
256 SCHEME_BIGNUM_USE_FUEL(n);
257
258 if (n & 1)
259 {
260 /* Odd length. */
261 mp_size_t n1, n3, nm1;
262
263 n3 = n - n2;
264
265 sign = 0;
266 w = a[n2];
267 if (w != 0)
268 w -= mpn_sub_n (p, a, a + n3, n2);
269 else
270 {
271 i = n2;
272 do
273 {
274 --i;
275 w0 = a[i];
276 w1 = a[n3+i];
277 }
278 while (w0 == w1 && i != 0);
279 if (w0 < w1)
280 {
281 x = a + n3;
282 y = a;
283 sign = 1;
284 }
285 else
286 {
287 x = a;
288 y = a + n3;
289 }
290 mpn_sub_n (p, x, y, n2);
291 }
292 p[n2] = w;
293
294 w = b[n2];
295 if (w != 0)
296 w -= mpn_sub_n (p + n3, b, b + n3, n2);
297 else
298 {
299 i = n2;
300 do
301 {
302 --i;
303 w0 = b[i];
304 w1 = b[n3+i];
305 }
306 while (w0 == w1 && i != 0);
307 if (w0 < w1)
308 {
309 x = b + n3;
310 y = b;
311 sign ^= 1;
312 }
313 else
314 {
315 x = b;
316 y = b + n3;
317 }
318 mpn_sub_n (p + n3, x, y, n2);
319 }
320 p[n] = w;
321
322 n1 = n + 1;
323 if (n2 < KARATSUBA_MUL_THRESHOLD)
324 {
325 if (n3 < KARATSUBA_MUL_THRESHOLD)
326 {
327 mpn_mul_basecase (ws, p, n3, p + n3, n3);
328 mpn_mul_basecase (p, a, n3, b, n3);
329 }
330 else
331 {
332 mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
333 mpn_kara_mul_n (p, a, b, n3, ws + n1);
334 }
335 mpn_mul_basecase (p + n1, a + n3, n2, b + n3, n2);
336 }
337 else
338 {
339 mpn_kara_mul_n (ws, p, p + n3, n3, ws + n1);
340 mpn_kara_mul_n (p, a, b, n3, ws + n1);
341 mpn_kara_mul_n (p + n1, a + n3, b + n3, n2, ws + n1);
342 }
343
344 if (sign)
345 mpn_add_n (ws, p, ws, n1);
346 else
347 mpn_sub_n (ws, p, ws, n1);
348
349 nm1 = n - 1;
350 if (mpn_add_n (ws, p + n1, ws, nm1))
351 {
352 mp_limb_t x = ws[nm1] + 1;
353 ws[nm1] = x;
354 if (x == 0)
355 ++ws[n];
356 }
357 if (mpn_add_n (p + n3, p + n3, ws, n1))
358 {
359 mp_limb_t x;
360 i = n1 + n3;
361 do
362 {
363 x = p[i] + 1;
364 p[i] = x;
365 ++i;
366 } while (x == 0);
367 }
368 }
369 else
370 {
371 /* Even length. */
372 mp_limb_t t;
373
374 i = n2;
375 do
376 {
377 --i;
378 w0 = a[i];
379 w1 = a[n2+i];
380 }
381 while (w0 == w1 && i != 0);
382 sign = 0;
383 if (w0 < w1)
384 {
385 x = a + n2;
386 y = a;
387 sign = 1;
388 }
389 else
390 {
391 x = a;
392 y = a + n2;
393 }
394 mpn_sub_n (p, x, y, n2);
395
396 i = n2;
397 do
398 {
399 --i;
400 w0 = b[i];
401 w1 = b[n2+i];
402 }
403 while (w0 == w1 && i != 0);
404 if (w0 < w1)
405 {
406 x = b + n2;
407 y = b;
408 sign ^= 1;
409 }
410 else
411 {
412 x = b;
413 y = b + n2;
414 }
415 mpn_sub_n (p + n2, x, y, n2);
416
417 /* Pointwise products. */
418 if (n2 < KARATSUBA_MUL_THRESHOLD)
419 {
420 mpn_mul_basecase (ws, p, n2, p + n2, n2);
421 mpn_mul_basecase (p, a, n2, b, n2);
422 mpn_mul_basecase (p + n, a + n2, n2, b + n2, n2);
423 }
424 else
425 {
426 mpn_kara_mul_n (ws, p, p + n2, n2, ws + n);
427 mpn_kara_mul_n (p, a, b, n2, ws + n);
428 mpn_kara_mul_n (p + n, a + n2, b + n2, n2, ws + n);
429 }
430
431 /* Interpolate. */
432 if (sign)
433 w = mpn_add_n (ws, p, ws, n);
434 else
435 w = -mpn_sub_n (ws, p, ws, n);
436 w += mpn_add_n (ws, p + n, ws, n);
437 w += mpn_add_n (p + n2, p + n2, ws, n);
438 /* TO DO: could put "if (w) { ... }" here.
439 * Less work but badly predicted branch.
440 * No measurable difference in speed on Alpha.
441 */
442 i = n + n2;
443 t = p[i] + w;
444 p[i] = t;
445 if (t < w)
446 {
447 do
448 {
449 ++i;
450 w = p[i] + 1;
451 p[i] = w;
452 }
453 while (w == 0);
454 }
455 }
456 }
457
458 void
459 #if __STDC__
mpn_kara_sqr_n(mp_ptr p,mp_srcptr a,mp_size_t n,mp_ptr ws)460 mpn_kara_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
461 #else
462 mpn_kara_sqr_n (p, a, n, ws)
463 mp_ptr p;
464 mp_srcptr a;
465 mp_size_t n;
466 mp_ptr ws;
467 #endif
468 {
469 mp_limb_t i, sign, w, w0, w1;
470 mp_size_t n2;
471 mp_srcptr x, y;
472
473 n2 = n >> 1;
474 ASSERT (n2 > 0);
475
476 SCHEME_BIGNUM_USE_FUEL(n);
477
478 if (n & 1)
479 {
480 /* Odd length. */
481 mp_size_t n1, n3, nm1;
482
483 n3 = n - n2;
484
485 sign = 0;
486 w = a[n2];
487 if (w != 0)
488 w -= mpn_sub_n (p, a, a + n3, n2);
489 else
490 {
491 i = n2;
492 do
493 {
494 --i;
495 w0 = a[i];
496 w1 = a[n3+i];
497 }
498 while (w0 == w1 && i != 0);
499 if (w0 < w1)
500 {
501 x = a + n3;
502 y = a;
503 sign = 1;
504 }
505 else
506 {
507 x = a;
508 y = a + n3;
509 }
510 mpn_sub_n (p, x, y, n2);
511 }
512 p[n2] = w;
513
514 w = a[n2];
515 if (w != 0)
516 w -= mpn_sub_n (p + n3, a, a + n3, n2);
517 else
518 {
519 i = n2;
520 do
521 {
522 --i;
523 w0 = a[i];
524 w1 = a[n3+i];
525 }
526 while (w0 == w1 && i != 0);
527 if (w0 < w1)
528 {
529 x = a + n3;
530 y = a;
531 sign ^= 1;
532 }
533 else
534 {
535 x = a;
536 y = a + n3;
537 }
538 mpn_sub_n (p + n3, x, y, n2);
539 }
540 p[n] = w;
541
542 n1 = n + 1;
543 if (n2 < KARATSUBA_SQR_THRESHOLD)
544 {
545 if (n3 < KARATSUBA_SQR_THRESHOLD)
546 {
547 mpn_sqr_basecase (ws, p, n3);
548 mpn_sqr_basecase (p, a, n3);
549 }
550 else
551 {
552 mpn_kara_sqr_n (ws, p, n3, ws + n1);
553 mpn_kara_sqr_n (p, a, n3, ws + n1);
554 }
555 mpn_sqr_basecase (p + n1, a + n3, n2);
556 }
557 else
558 {
559 mpn_kara_sqr_n (ws, p, n3, ws + n1);
560 mpn_kara_sqr_n (p, a, n3, ws + n1);
561 mpn_kara_sqr_n (p + n1, a + n3, n2, ws + n1);
562 }
563
564 if (sign)
565 mpn_add_n (ws, p, ws, n1);
566 else
567 mpn_sub_n (ws, p, ws, n1);
568
569 nm1 = n - 1;
570 if (mpn_add_n (ws, p + n1, ws, nm1))
571 {
572 mp_limb_t x = ws[nm1] + 1;
573 ws[nm1] = x;
574 if (x == 0)
575 ++ws[n];
576 }
577 if (mpn_add_n (p + n3, p + n3, ws, n1))
578 {
579 mp_limb_t x;
580 i = n1 + n3;
581 do
582 {
583 x = p[i] + 1;
584 p[i] = x;
585 ++i;
586 } while (x == 0);
587 }
588 }
589 else
590 {
591 /* Even length. */
592 mp_limb_t t;
593
594 i = n2;
595 do
596 {
597 --i;
598 w0 = a[i];
599 w1 = a[n2+i];
600 }
601 while (w0 == w1 && i != 0);
602 sign = 0;
603 if (w0 < w1)
604 {
605 x = a + n2;
606 y = a;
607 sign = 1;
608 }
609 else
610 {
611 x = a;
612 y = a + n2;
613 }
614 mpn_sub_n (p, x, y, n2);
615
616 i = n2;
617 do
618 {
619 --i;
620 w0 = a[i];
621 w1 = a[n2+i];
622 }
623 while (w0 == w1 && i != 0);
624 if (w0 < w1)
625 {
626 x = a + n2;
627 y = a;
628 sign ^= 1;
629 }
630 else
631 {
632 x = a;
633 y = a + n2;
634 }
635 mpn_sub_n (p + n2, x, y, n2);
636
637 /* Pointwise products. */
638 if (n2 < KARATSUBA_SQR_THRESHOLD)
639 {
640 mpn_sqr_basecase (ws, p, n2);
641 mpn_sqr_basecase (p, a, n2);
642 mpn_sqr_basecase (p + n, a + n2, n2);
643 }
644 else
645 {
646 mpn_kara_sqr_n (ws, p, n2, ws + n);
647 mpn_kara_sqr_n (p, a, n2, ws + n);
648 mpn_kara_sqr_n (p + n, a + n2, n2, ws + n);
649 }
650
651 /* Interpolate. */
652 if (sign)
653 w = mpn_add_n (ws, p, ws, n);
654 else
655 w = -mpn_sub_n (ws, p, ws, n);
656 w += mpn_add_n (ws, p + n, ws, n);
657 w += mpn_add_n (p + n2, p + n2, ws, n);
658 /* TO DO: could put "if (w) { ... }" here.
659 * Less work but badly predicted branch.
660 * No measurable difference in speed on Alpha.
661 */
662 i = n + n2;
663 t = p[i] + w;
664 p[i] = t;
665 if (t < w)
666 {
667 do
668 {
669 ++i;
670 w = p[i] + 1;
671 p[i] = w;
672 }
673 while (w == 0);
674 }
675 }
676 }
677
678 /*-- add2Times -------------------------------------------------------------*/
679
680 /* z[] = x[] + 2 * y[]
681 Note that z and x might point to the same vectors. */
682 #ifdef USE_MORE_MPN
683 static inline mp_limb_t
684 #if __STDC__
add2Times(mp_ptr z,mp_srcptr x,mp_srcptr y,mp_size_t n)685 add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n)
686 #else
687 add2Times (z, x, y, n)
688 mp_ptr z;
689 mp_srcptr x;
690 mp_srcptr y;
691 mp_size_t n;
692 #endif
693 {
694 mp_ptr t;
695 mp_limb_t c;
696 TMP_DECL (marker);
697 TMP_MARK (marker);
698 t = (mp_ptr) TMP_ALLOC (n * BYTES_PER_MP_LIMB);
699 c = mpn_lshift (t, y, n, 1);
700 c += mpn_add_n (z, x, t, n);
701 TMP_FREE (marker);
702 return c;
703 }
704 #else
705
706 static mp_limb_t
707 #if __STDC__
add2Times(mp_ptr z,mp_srcptr x,mp_srcptr y,mp_size_t n)708 add2Times (mp_ptr z, mp_srcptr x, mp_srcptr y, mp_size_t n)
709 #else
710 add2Times (z, x, y, n)
711 mp_ptr z;
712 mp_srcptr x;
713 mp_srcptr y;
714 mp_size_t n;
715 #endif
716 {
717 mp_limb_t c, v, w;
718
719 ASSERT (n > 0);
720 v = *x; w = *y;
721 c = w >> (BITS_PER_MP_LIMB - 1);
722 w <<= 1;
723 v += w;
724 c += v < w;
725 *z = v;
726 ++x; ++y; ++z;
727 while (--n)
728 {
729 v = *x;
730 w = *y;
731 v += c;
732 c = v < c;
733 c += w >> (BITS_PER_MP_LIMB - 1);
734 w <<= 1;
735 v += w;
736 c += v < w;
737 *z = v;
738 ++x; ++y; ++z;
739 }
740
741 return c;
742 }
743 #endif
744
745 /*-- evaluate3 -------------------------------------------------------------*/
746
747 /* Evaluates:
748 * ph := 4*A+2*B+C
749 * p1 := A+B+C
750 * p2 := A+2*B+4*C
751 * where:
752 * ph[], p1[], p2[], A[] and B[] all have length len,
753 * C[] has length len2 with len-len2 = 0, 1 or 2.
754 * Returns top words (overflow) at pth, pt1 and pt2 respectively.
755 */
756 #ifdef USE_MORE_MPN
757 static void
758 #if __STDC__
evaluate3(mp_ptr ph,mp_ptr p1,mp_ptr p2,mp_ptr pth,mp_ptr pt1,mp_ptr pt2,mp_srcptr A,mp_srcptr B,mp_srcptr C,mp_size_t len,mp_size_t len2)759 evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,
760 mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t len, mp_size_t len2)
761 #else
762 evaluate3 (ph, p1, p2, pth, pt1, pt2,
763 A, B, C, len, len2)
764 mp_ptr ph;
765 mp_ptr p1;
766 mp_ptr p2;
767 mp_ptr pth;
768 mp_ptr pt1;
769 mp_ptr pt2;
770 mp_srcptr A;
771 mp_srcptr B;
772 mp_srcptr C;
773 mp_size_t len;
774 mp_size_t len2;
775 #endif
776 {
777 mp_limb_t c, d, e;
778
779 ASSERT (len - len2 <= 2);
780
781 e = mpn_lshift (p1, B, len, 1);
782
783 c = mpn_lshift (ph, A, len, 2);
784 c += e + mpn_add_n (ph, ph, p1, len);
785 d = mpn_add_n (ph, ph, C, len2);
786 if (len2 == len) c += d; else c += mpn_add_1 (ph + len2, ph + len2, len-len2, d);
787 ASSERT (c < 7);
788 *pth = c;
789
790 c = mpn_lshift (p2, C, len2, 2);
791 #if 1
792 if (len2 != len) { p2[len-1] = 0; p2[len2] = c; c = 0; }
793 c += e + mpn_add_n (p2, p2, p1, len);
794 #else
795 d = mpn_add_n (p2, p2, p1, len2);
796 c += d;
797 if (len2 != len) c = mpn_add_1 (p2+len2, p1+len2, len-len2, c);
798 c += e;
799 #endif
800 c += mpn_add_n (p2, p2, A, len);
801 ASSERT (c < 7);
802 *pt2 = c;
803
804 c = mpn_add_n (p1, A, B, len);
805 d = mpn_add_n (p1, p1, C, len2);
806 if (len2 == len) c += d;
807 else c += mpn_add_1 (p1+len2, p1+len2, len-len2, d);
808 ASSERT (c < 3);
809 *pt1 = c;
810
811 }
812
813 #else
814
815 static void
816 #if __STDC__
evaluate3(mp_ptr ph,mp_ptr p1,mp_ptr p2,mp_ptr pth,mp_ptr pt1,mp_ptr pt2,mp_srcptr A,mp_srcptr B,mp_srcptr C,mp_size_t l,mp_size_t ls)817 evaluate3 (mp_ptr ph, mp_ptr p1, mp_ptr p2, mp_ptr pth, mp_ptr pt1, mp_ptr pt2,
818 mp_srcptr A, mp_srcptr B, mp_srcptr C, mp_size_t l, mp_size_t ls)
819 #else
820 evaluate3 (ph, p1, p2, pth, pt1, pt2,
821 A, B, C, l, ls)
822 mp_ptr ph;
823 mp_ptr p1;
824 mp_ptr p2;
825 mp_ptr pth;
826 mp_ptr pt1;
827 mp_ptr pt2;
828 mp_srcptr A;
829 mp_srcptr B;
830 mp_srcptr C;
831 mp_size_t l;
832 mp_size_t ls;
833 #endif
834 {
835 mp_limb_t a,b,c, i, t, th,t1,t2, vh,v1,v2;
836
837 ASSERT (l - ls <= 2);
838
839 th = t1 = t2 = 0;
840 for (i = 0; i < l; ++i)
841 {
842 a = *A;
843 b = *B;
844 c = i < ls ? *C : 0;
845
846 /* TO DO: choose one of the following alternatives. */
847 #if 0
848 t = a << 2;
849 vh = th + t;
850 th = vh < t;
851 th += a >> (BITS_PER_MP_LIMB - 2);
852 t = b << 1;
853 vh += t;
854 th += vh < t;
855 th += b >> (BITS_PER_MP_LIMB - 1);
856 vh += c;
857 th += vh < c;
858 #else
859 vh = th + c;
860 th = vh < c;
861 t = b << 1;
862 vh += t;
863 th += vh < t;
864 th += b >> (BITS_PER_MP_LIMB - 1);
865 t = a << 2;
866 vh += t;
867 th += vh < t;
868 th += a >> (BITS_PER_MP_LIMB - 2);
869 #endif
870
871 v1 = t1 + a;
872 t1 = v1 < a;
873 v1 += b;
874 t1 += v1 < b;
875 v1 += c;
876 t1 += v1 < c;
877
878 v2 = t2 + a;
879 t2 = v2 < a;
880 t = b << 1;
881 v2 += t;
882 t2 += v2 < t;
883 t2 += b >> (BITS_PER_MP_LIMB - 1);
884 t = c << 2;
885 v2 += t;
886 t2 += v2 < t;
887 t2 += c >> (BITS_PER_MP_LIMB - 2);
888
889 *ph = vh;
890 *p1 = v1;
891 *p2 = v2;
892
893 ++A; ++B; ++C;
894 ++ph; ++p1; ++p2;
895 }
896
897 ASSERT (th < 7);
898 ASSERT (t1 < 3);
899 ASSERT (t2 < 7);
900
901 *pth = th;
902 *pt1 = t1;
903 *pt2 = t2;
904 }
905 #endif
906
907
908 /*-- interpolate3 ----------------------------------------------------------*/
909
910 /* Interpolates B, C, D (in-place) from:
911 * 16*A+8*B+4*C+2*D+E
912 * A+B+C+D+E
913 * A+2*B+4*C+8*D+16*E
914 * where:
915 * A[], B[], C[] and D[] all have length l,
916 * E[] has length ls with l-ls = 0, 2 or 4.
917 *
918 * Reads top words (from earlier overflow) from ptb, ptc and ptd,
919 * and returns new top words there.
920 */
921
922 #ifdef USE_MORE_MPN
923 static void
924 #if __STDC__
interpolate3(mp_srcptr A,mp_ptr B,mp_ptr C,mp_ptr D,mp_srcptr E,mp_ptr ptb,mp_ptr ptc,mp_ptr ptd,mp_size_t len,mp_size_t len2)925 interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,
926 mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t len, mp_size_t len2)
927 #else
928 interpolate3 (A, B, C, D, E,
929 ptb, ptc, ptd, len, len2)
930 mp_srcptr A;
931 mp_ptr B;
932 mp_ptr C;
933 mp_ptr D;
934 mp_srcptr E;
935 mp_ptr ptb;
936 mp_ptr ptc;
937 mp_ptr ptd;
938 mp_size_t len;
939 mp_size_t len2;
940 #endif
941 {
942 mp_ptr ws;
943 mp_limb_t t, tb,tc,td;
944 TMP_DECL (marker);
945 TMP_MARK (marker);
946
947 ASSERT (len - len2 == 0 || len - len2 == 2 || len - len2 == 4);
948
949 /* Let x1, x2, x3 be the values to interpolate. We have:
950 * b = 16*a + 8*x1 + 4*x2 + 2*x3 + e
951 * c = a + x1 + x2 + x3 + e
952 * d = a + 2*x1 + 4*x2 + 8*x3 + 16*e
953 */
954
955 ws = (mp_ptr) TMP_ALLOC (len * BYTES_PER_MP_LIMB);
956
957 tb = *ptb; tc = *ptc; td = *ptd;
958
959
960 /* b := b - 16*a - e
961 * c := c - a - e
962 * d := d - a - 16*e
963 */
964
965 t = mpn_lshift (ws, A, len, 4);
966 tb -= t + mpn_sub_n (B, B, ws, len);
967 t = mpn_sub_n (B, B, E, len2);
968 if (len2 == len) tb -= t;
969 else tb -= mpn_sub_1 (B+len2, B+len2, len-len2, t);
970
971 tc -= mpn_sub_n (C, C, A, len);
972 t = mpn_sub_n (C, C, E, len2);
973 if (len2 == len) tc -= t;
974 else tc -= mpn_sub_1 (C+len2, C+len2, len-len2, t);
975
976 t = mpn_lshift (ws, E, len2, 4);
977 t += mpn_add_n (ws, ws, A, len2);
978 #if 1
979 if (len2 != len) t = mpn_add_1 (ws+len2, A+len2, len-len2, t);
980 td -= t + mpn_sub_n (D, D, ws, len);
981 #else
982 t += mpn_sub_n (D, D, ws, len2);
983 if (len2 != len) {
984 t = mpn_sub_1 (D+len2, D+len2, len-len2, t);
985 t += mpn_sub_n (D+len2, D+len2, A+len2, len-len2);
986 } /* end if/else */
987 td -= t;
988 #endif
989
990
991 /* b, d := b + d, b - d */
992
993 #ifdef HAVE_MPN_ADD_SUB_N
994 /* #error TO DO ... */
995 #else
996 t = tb + td + mpn_add_n (ws, B, D, len);
997 td = tb - td - mpn_sub_n (D, B, D, len);
998 tb = t;
999 MPN_COPY (B, ws, len);
1000 #endif
1001
1002 /* b := b-8*c */
1003 t = 8 * tc + mpn_lshift (ws, C, len, 3);
1004 tb -= t + mpn_sub_n (B, B, ws, len);
1005
1006 /* c := 2*c - b */
1007 tc = 2 * tc + mpn_lshift (C, C, len, 1);
1008 tc -= tb + mpn_sub_n (C, C, B, len);
1009
1010 /* d := d/3 */
1011 td = (td - mpn_divexact_by3 (D, D, len)) * INVERSE_3;
1012
1013 /* b, d := b + d, b - d */
1014 #ifdef HAVE_MPN_ADD_SUB_N
1015 /* #error TO DO ... */
1016 #else
1017 t = tb + td + mpn_add_n (ws, B, D, len);
1018 td = tb - td - mpn_sub_n (D, B, D, len);
1019 tb = t;
1020 MPN_COPY (B, ws, len);
1021 #endif
1022
1023 /* Now:
1024 * b = 4*x1
1025 * c = 2*x2
1026 * d = 4*x3
1027 */
1028
1029 ASSERT(!(*B & 3));
1030 mpn_rshift (B, B, len, 2);
1031 B[len-1] |= tb<<(BITS_PER_MP_LIMB-2);
1032 ASSERT((long)tb >= 0);
1033 tb >>= 2;
1034
1035 ASSERT(!(*C & 1));
1036 mpn_rshift (C, C, len, 1);
1037 C[len-1] |= tc<<(BITS_PER_MP_LIMB-1);
1038 ASSERT((long)tc >= 0);
1039 tc >>= 1;
1040
1041 ASSERT(!(*D & 3));
1042 mpn_rshift (D, D, len, 2);
1043 D[len-1] |= td<<(BITS_PER_MP_LIMB-2);
1044 ASSERT((long)td >= 0);
1045 td >>= 2;
1046
1047 #if WANT_ASSERT
1048 ASSERT (tb < 2);
1049 if (len == len2)
1050 {
1051 ASSERT (tc < 3);
1052 ASSERT (td < 2);
1053 }
1054 else
1055 {
1056 ASSERT (tc < 2);
1057 ASSERT (!td);
1058 }
1059 #endif
1060
1061 *ptb = tb;
1062 *ptc = tc;
1063 *ptd = td;
1064
1065 TMP_FREE (marker);
1066 }
1067
1068 #else
1069
1070 static void
1071 #if __STDC__
interpolate3(mp_srcptr A,mp_ptr B,mp_ptr C,mp_ptr D,mp_srcptr E,mp_ptr ptb,mp_ptr ptc,mp_ptr ptd,mp_size_t l,mp_size_t ls)1072 interpolate3 (mp_srcptr A, mp_ptr B, mp_ptr C, mp_ptr D, mp_srcptr E,
1073 mp_ptr ptb, mp_ptr ptc, mp_ptr ptd, mp_size_t l, mp_size_t ls)
1074 #else
1075 interpolate3 (A, B, C, D, E,
1076 ptb, ptc, ptd, l, ls)
1077 mp_srcptr A;
1078 mp_ptr B;
1079 mp_ptr C;
1080 mp_ptr D;
1081 mp_srcptr E;
1082 mp_ptr ptb;
1083 mp_ptr ptc;
1084 mp_ptr ptd;
1085 mp_size_t l;
1086 mp_size_t ls;
1087 #endif
1088 {
1089 mp_limb_t a,b,c,d,e,t, i, sb,sc,sd, ob,oc,od;
1090 const mp_limb_t maskOffHalf = (~(mp_limb_t) 0) << (BITS_PER_MP_LIMB >> 1);
1091
1092 #if WANT_ASSERT
1093 t = l - ls;
1094 ASSERT (t == 0 || t == 2 || t == 4);
1095 #endif
1096
1097 sb = sc = sd = 0;
1098 for (i = 0; i < l; ++i)
1099 {
1100 mp_limb_t tb, tc, td, tt;
1101
1102 a = *A;
1103 b = *B;
1104 c = *C;
1105 d = *D;
1106 e = i < ls ? *E : 0;
1107
1108 /* Let x1, x2, x3 be the values to interpolate. We have:
1109 * b = 16*a + 8*x1 + 4*x2 + 2*x3 + e
1110 * c = a + x1 + x2 + x3 + e
1111 * d = a + 2*x1 + 4*x2 + 8*x3 + 16*e
1112 */
1113
1114 /* b := b - 16*a - e
1115 * c := c - a - e
1116 * d := d - a - 16*e
1117 */
1118 t = a << 4;
1119 tb = -(a >> (BITS_PER_MP_LIMB - 4)) - (b < t);
1120 b -= t;
1121 tb -= b < e;
1122 b -= e;
1123 tc = -(c < a);
1124 c -= a;
1125 tc -= c < e;
1126 c -= e;
1127 td = -(d < a);
1128 d -= a;
1129 t = e << 4;
1130 td = td - (e >> (BITS_PER_MP_LIMB - 4)) - (d < t);
1131 d -= t;
1132
1133 /* b, d := b + d, b - d */
1134 t = b + d;
1135 tt = tb + td + (t < b);
1136 td = tb - td - (b < d);
1137 d = b - d;
1138 b = t;
1139 tb = tt;
1140
1141 /* b := b-8*c */
1142 t = c << 3;
1143 tb = tb - (tc << 3) - (c >> (BITS_PER_MP_LIMB - 3)) - (b < t);
1144 b -= t;
1145
1146 /* c := 2*c - b */
1147 t = c << 1;
1148 tc = (tc << 1) + (c >> (BITS_PER_MP_LIMB - 1)) - tb - (t < b);
1149 c = t - b;
1150
1151 /* d := d/3 */
1152 d *= INVERSE_3;
1153 td = td - (d >> (BITS_PER_MP_LIMB - 1)) - (d*3 < d);
1154 td *= INVERSE_3;
1155
1156 /* b, d := b + d, b - d */
1157 t = b + d;
1158 tt = tb + td + (t < b);
1159 td = tb - td - (b < d);
1160 d = b - d;
1161 b = t;
1162 tb = tt;
1163
1164 /* Now:
1165 * b = 4*x1
1166 * c = 2*x2
1167 * d = 4*x3
1168 */
1169
1170 /* sb has period 2. */
1171 b += sb;
1172 tb += b < sb;
1173 sb &= maskOffHalf;
1174 sb |= sb >> (BITS_PER_MP_LIMB >> 1);
1175 sb += tb;
1176
1177 /* sc has period 1. */
1178 c += sc;
1179 tc += c < sc;
1180 /* TO DO: choose one of the following alternatives. */
1181 #if 1
1182 sc = (mp_limb_t)((mp_limb_signed_t)sc >> (BITS_PER_MP_LIMB - 1));
1183 sc += tc;
1184 #else
1185 sc = tc - ((mp_limb_signed_t)sc < 0L);
1186 #endif
1187
1188 /* sd has period 2. */
1189 d += sd;
1190 td += d < sd;
1191 sd &= maskOffHalf;
1192 sd |= sd >> (BITS_PER_MP_LIMB >> 1);
1193 sd += td;
1194
1195 if (i != 0)
1196 {
1197 B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);
1198 C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);
1199 D[-1] = od | d << (BITS_PER_MP_LIMB - 2);
1200 }
1201 ob = b >> 2;
1202 oc = c >> 1;
1203 od = d >> 2;
1204
1205 ++A; ++B; ++C; ++D; ++E;
1206 }
1207
1208 /* Handle top words. */
1209 b = *ptb;
1210 c = *ptc;
1211 d = *ptd;
1212
1213 t = b + d;
1214 d = b - d;
1215 b = t;
1216 b -= c << 3;
1217 c = (c << 1) - b;
1218 d *= INVERSE_3;
1219 t = b + d;
1220 d = b - d;
1221 b = t;
1222
1223 b += sb;
1224 c += sc;
1225 d += sd;
1226
1227 B[-1] = ob | b << (BITS_PER_MP_LIMB - 2);
1228 C[-1] = oc | c << (BITS_PER_MP_LIMB - 1);
1229 D[-1] = od | d << (BITS_PER_MP_LIMB - 2);
1230
1231 b >>= 2;
1232 c >>= 1;
1233 d >>= 2;
1234
1235 #if WANT_ASSERT
1236 ASSERT (b < 2);
1237 if (l == ls)
1238 {
1239 ASSERT (c < 3);
1240 ASSERT (d < 2);
1241 }
1242 else
1243 {
1244 ASSERT (c < 2);
1245 ASSERT (!d);
1246 }
1247 #endif
1248
1249 *ptb = b;
1250 *ptc = c;
1251 *ptd = d;
1252 }
1253 #endif
1254
1255
1256 /*-- mpn_toom3_mul_n --------------------------------------------------------------*/
1257
1258 /* Multiplies using 5 mults of one third size and so on recursively.
1259 * p[0..2*n-1] := product of a[0..n-1] and b[0..n-1].
1260 * No overlap of p[...] with a[...] or b[...].
1261 * ws is workspace.
1262 */
1263
1264 /* TO DO: If TOOM3_MUL_THRESHOLD is much bigger than KARATSUBA_MUL_THRESHOLD then the
1265 * recursion in mpn_toom3_mul_n() will always bottom out with mpn_kara_mul_n()
1266 * because the "n < KARATSUBA_MUL_THRESHOLD" test here will always be false.
1267 */
1268
1269 #define TOOM3_MUL_REC(p, a, b, n, ws) \
1270 do { \
1271 if (n < KARATSUBA_MUL_THRESHOLD) \
1272 mpn_mul_basecase (p, a, n, b, n); \
1273 else if (n < TOOM3_MUL_THRESHOLD) \
1274 mpn_kara_mul_n (p, a, b, n, ws); \
1275 else \
1276 mpn_toom3_mul_n (p, a, b, n, ws); \
1277 } while (0)
1278
1279 void
1280 #if __STDC__
mpn_toom3_mul_n(mp_ptr p,mp_srcptr a,mp_srcptr b,mp_size_t n,mp_ptr ws)1281 mpn_toom3_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n, mp_ptr ws)
1282 #else
1283 mpn_toom3_mul_n (p, a, b, n, ws)
1284 mp_ptr p;
1285 mp_srcptr a;
1286 mp_srcptr b;
1287 mp_size_t n;
1288 mp_ptr ws;
1289 #endif
1290 {
1291 mp_limb_t cB,cC,cD, dB,dC,dD, tB,tC,tD;
1292 mp_limb_t *A,*B,*C,*D,*E, *W;
1293 mp_size_t l,l2,l3,l4,l5,ls;
1294
1295 SCHEME_BIGNUM_USE_FUEL(n);
1296
1297 /* Break n words into chunks of size l, l and ls.
1298 * n = 3*k => l = k, ls = k
1299 * n = 3*k+1 => l = k+1, ls = k-1
1300 * n = 3*k+2 => l = k+1, ls = k
1301 */
1302 {
1303 mp_limb_t m;
1304
1305 ASSERT (n >= TOOM3_MUL_THRESHOLD);
1306 l = ls = n / 3;
1307 m = n - l * 3;
1308 if (m != 0)
1309 ++l;
1310 if (m == 1)
1311 --ls;
1312
1313 l2 = l * 2;
1314 l3 = l * 3;
1315 l4 = l * 4;
1316 l5 = l * 5;
1317 A = p;
1318 B = ws;
1319 C = p + l2;
1320 D = ws + l2;
1321 E = p + l4;
1322 W = ws + l4;
1323 }
1324
1325 /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/
1326 evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);
1327 evaluate3 (A + l, B + l, C + l, &dB, &dC, &dD, b, b + l, b + l2, l, ls);
1328
1329 /** Second stage: pointwise multiplies. **/
1330 TOOM3_MUL_REC(D, C, C + l, l, W);
1331 tD = cD*dD;
1332 if (cD) tD += mpn_addmul_1 (D + l, C + l, l, cD);
1333 if (dD) tD += mpn_addmul_1 (D + l, C, l, dD);
1334 ASSERT (tD < 49);
1335 TOOM3_MUL_REC(C, B, B + l, l, W);
1336 tC = cC*dC;
1337 /* TO DO: choose one of the following alternatives. */
1338 #if 0
1339 if (cC) tC += mpn_addmul_1 (C + l, B + l, l, cC);
1340 if (dC) tC += mpn_addmul_1 (C + l, B, l, dC);
1341 #else
1342 if (cC)
1343 {
1344 if (cC == 1) tC += mpn_add_n (C + l, C + l, B + l, l);
1345 else tC += add2Times (C + l, C + l, B + l, l);
1346 }
1347 if (dC)
1348 {
1349 if (dC == 1) tC += mpn_add_n (C + l, C + l, B, l);
1350 else tC += add2Times (C + l, C + l, B, l);
1351 }
1352 #endif
1353 ASSERT (tC < 9);
1354 TOOM3_MUL_REC(B, A, A + l, l, W);
1355 tB = cB*dB;
1356 if (cB) tB += mpn_addmul_1 (B + l, A + l, l, cB);
1357 if (dB) tB += mpn_addmul_1 (B + l, A, l, dB);
1358 ASSERT (tB < 49);
1359 TOOM3_MUL_REC(A, a, b, l, W);
1360 TOOM3_MUL_REC(E, a + l2, b + l2, ls, W);
1361
1362 /** Third stage: interpolation. **/
1363 interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);
1364
1365 /** Final stage: add up the coefficients. **/
1366 {
1367 /* mp_limb_t i, x, y; */
1368 tB += mpn_add_n (p + l, p + l, B, l2);
1369 tD += mpn_add_n (p + l3, p + l3, D, l2);
1370 mpn_incr_u (p + l3, tB);
1371 mpn_incr_u (p + l4, tC);
1372 mpn_incr_u (p + l5, tD);
1373 }
1374 }
1375
1376 /*-- mpn_toom3_sqr_n --------------------------------------------------------------*/
1377
1378 /* Like previous function but for squaring */
1379
1380 #define TOOM3_SQR_REC(p, a, n, ws) \
1381 do { \
1382 if (n < KARATSUBA_SQR_THRESHOLD) \
1383 mpn_sqr_basecase (p, a, n); \
1384 else if (n < TOOM3_SQR_THRESHOLD) \
1385 mpn_kara_sqr_n (p, a, n, ws); \
1386 else \
1387 mpn_toom3_sqr_n (p, a, n, ws); \
1388 } while (0)
1389
1390 void
1391 #if __STDC__
mpn_toom3_sqr_n(mp_ptr p,mp_srcptr a,mp_size_t n,mp_ptr ws)1392 mpn_toom3_sqr_n (mp_ptr p, mp_srcptr a, mp_size_t n, mp_ptr ws)
1393 #else
1394 mpn_toom3_sqr_n (p, a, n, ws)
1395 mp_ptr p;
1396 mp_srcptr a;
1397 mp_size_t n;
1398 mp_ptr ws;
1399 #endif
1400 {
1401 mp_limb_t cB,cC,cD, tB,tC,tD;
1402 mp_limb_t *A,*B,*C,*D,*E, *W;
1403 mp_size_t l,l2,l3,l4,l5,ls;
1404
1405 SCHEME_BIGNUM_USE_FUEL(n);
1406
1407 /* Break n words into chunks of size l, l and ls.
1408 * n = 3*k => l = k, ls = k
1409 * n = 3*k+1 => l = k+1, ls = k-1
1410 * n = 3*k+2 => l = k+1, ls = k
1411 */
1412 {
1413 mp_limb_t m;
1414
1415 ASSERT (n >= TOOM3_MUL_THRESHOLD);
1416 l = ls = n / 3;
1417 m = n - l * 3;
1418 if (m != 0)
1419 ++l;
1420 if (m == 1)
1421 --ls;
1422
1423 l2 = l * 2;
1424 l3 = l * 3;
1425 l4 = l * 4;
1426 l5 = l * 5;
1427 A = p;
1428 B = ws;
1429 C = p + l2;
1430 D = ws + l2;
1431 E = p + l4;
1432 W = ws + l4;
1433 }
1434
1435 /** First stage: evaluation at points 0, 1/2, 1, 2, oo. **/
1436 evaluate3 (A, B, C, &cB, &cC, &cD, a, a + l, a + l2, l, ls);
1437
1438 /** Second stage: pointwise multiplies. **/
1439 TOOM3_SQR_REC(D, C, l, W);
1440 tD = cD*cD;
1441 if (cD) tD += mpn_addmul_1 (D + l, C, l, 2*cD);
1442 ASSERT (tD < 49);
1443 TOOM3_SQR_REC(C, B, l, W);
1444 tC = cC*cC;
1445 /* TO DO: choose one of the following alternatives. */
1446 #if 0
1447 if (cC) tC += mpn_addmul_1 (C + l, B, l, 2*cC);
1448 #else
1449 if (cC >= 1)
1450 {
1451 tC += add2Times (C + l, C + l, B, l);
1452 if (cC == 2)
1453 tC += add2Times (C + l, C + l, B, l);
1454 }
1455 #endif
1456 ASSERT (tC < 9);
1457 TOOM3_SQR_REC(B, A, l, W);
1458 tB = cB*cB;
1459 if (cB) tB += mpn_addmul_1 (B + l, A, l, 2*cB);
1460 ASSERT (tB < 49);
1461 TOOM3_SQR_REC(A, a, l, W);
1462 TOOM3_SQR_REC(E, a + l2, ls, W);
1463
1464 /** Third stage: interpolation. **/
1465 interpolate3 (A, B, C, D, E, &tB, &tC, &tD, l2, ls << 1);
1466
1467 /** Final stage: add up the coefficients. **/
1468 {
1469 /* mp_limb_t i, x, y; */
1470 tB += mpn_add_n (p + l, p + l, B, l2);
1471 tD += mpn_add_n (p + l3, p + l3, D, l2);
1472 mpn_incr_u (p + l3, tB);
1473 mpn_incr_u (p + l4, tC);
1474 mpn_incr_u (p + l5, tD);
1475 }
1476 }
1477
1478 void
1479 #if __STDC__
mpn_mul_n(mp_ptr p,mp_srcptr a,mp_srcptr b,mp_size_t n)1480 mpn_mul_n (mp_ptr p, mp_srcptr a, mp_srcptr b, mp_size_t n)
1481 #else
1482 mpn_mul_n (p, a, b, n)
1483 mp_ptr p;
1484 mp_srcptr a;
1485 mp_srcptr b;
1486 mp_size_t n;
1487 #endif
1488 {
1489 if (n < KARATSUBA_MUL_THRESHOLD)
1490 mpn_mul_basecase (p, a, n, b, n);
1491 else if (n < TOOM3_MUL_THRESHOLD)
1492 {
1493 /* Allocate workspace of fixed size on stack: fast! */
1494 #if TUNE_PROGRAM_BUILD
1495 mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD_LIMIT-1) + 2 * BITS_PER_MP_LIMB];
1496 #else
1497 mp_limb_t ws[2 * (TOOM3_MUL_THRESHOLD-1) + 2 * BITS_PER_MP_LIMB];
1498 #endif
1499 mpn_kara_mul_n (p, a, b, n, ws);
1500 }
1501 #if WANT_FFT || TUNE_PROGRAM_BUILD
1502 else if (n < FFT_MUL_THRESHOLD)
1503 #else
1504 else
1505 #endif
1506 {
1507 /* Use workspace of unknown size in heap, as stack space may
1508 * be limited. Since n is at least TOOM3_MUL_THRESHOLD, the
1509 * multiplication will take much longer than malloc()/free(). */
1510 mp_limb_t wsLen, *ws;
1511 TMP_DECL (marker);
1512 TMP_MARK (marker);
1513
1514 wsLen = 2 * n + 3 * BITS_PER_MP_LIMB;
1515 ws = (mp_ptr) TMP_ALLOC ((size_t) wsLen * sizeof (mp_limb_t));
1516 mpn_toom3_mul_n (p, a, b, n, ws);
1517 TMP_FREE (marker);
1518 }
1519 #if WANT_FFT || TUNE_PROGRAM_BUILD
1520 else
1521 {
1522 mpn_mul_fft_full (p, a, n, b, n);
1523 }
1524 #endif
1525 }
1526
1527
1528 /* mpn_rshift -- Shift right a low-level natural-number integer. */
1529
1530 /* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
1531 and store the USIZE least significant limbs of the result at WP.
1532 The bits shifted out to the right are returned.
1533
1534 Argument constraints:
1535 1. 0 < CNT < BITS_PER_MP_LIMB
1536 2. If the result is to be written over the input, WP must be <= UP.
1537 */
1538
1539 mp_limb_t
1540 #if __STDC__
mpn_rshift(register mp_ptr wp,register mp_srcptr up,mp_size_t usize,register unsigned int cnt)1541 mpn_rshift (register mp_ptr wp,
1542 register mp_srcptr up, mp_size_t usize,
1543 register unsigned int cnt)
1544 #else
1545 mpn_rshift (wp, up, usize, cnt)
1546 register mp_ptr wp;
1547 register mp_srcptr up;
1548 mp_size_t usize;
1549 register unsigned int cnt;
1550 #endif
1551 {
1552 register mp_limb_t high_limb, low_limb;
1553 register unsigned sh_1, sh_2;
1554 register mp_size_t i;
1555 mp_limb_t retval;
1556
1557 #ifdef DEBUG
1558 if (usize == 0 || cnt == 0)
1559 abort ();
1560 #endif
1561
1562 sh_1 = cnt;
1563
1564 #if 0
1565 if (sh_1 == 0)
1566 {
1567 if (wp != up)
1568 {
1569 /* Copy from low end to high end, to allow specified input/output
1570 overlapping. */
1571 for (i = 0; i < usize; i++)
1572 wp[i] = up[i];
1573 }
1574 return usize;
1575 }
1576 #endif
1577
1578 wp -= 1;
1579 sh_2 = BITS_PER_MP_LIMB - sh_1;
1580 high_limb = up[0];
1581 retval = high_limb << sh_2;
1582 low_limb = high_limb;
1583
1584 for (i = 1; i < usize; i++)
1585 {
1586 high_limb = up[i];
1587 wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
1588 low_limb = high_limb;
1589 }
1590 wp[i] = low_limb >> sh_1;
1591
1592 return retval;
1593 }
1594
1595 /* mpn_lshift -- Shift left low level. */
1596
1597 /* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
1598 and store the USIZE least significant digits of the result at WP.
1599 Return the bits shifted out from the most significant digit.
1600
1601 Argument constraints:
1602 1. 0 < CNT < BITS_PER_MP_LIMB
1603 2. If the result is to be written over the input, WP must be >= UP.
1604 */
1605
1606 mp_limb_t
1607 #if __STDC__
mpn_lshift(register mp_ptr wp,register mp_srcptr up,mp_size_t usize,register unsigned int cnt)1608 mpn_lshift (register mp_ptr wp,
1609 register mp_srcptr up, mp_size_t usize,
1610 register unsigned int cnt)
1611 #else
1612 mpn_lshift (wp, up, usize, cnt)
1613 register mp_ptr wp;
1614 register mp_srcptr up;
1615 mp_size_t usize;
1616 register unsigned int cnt;
1617 #endif
1618 {
1619 register mp_limb_t high_limb, low_limb;
1620 register unsigned sh_1, sh_2;
1621 register mp_size_t i;
1622 mp_limb_t retval;
1623
1624 #ifdef DEBUG
1625 if (usize == 0 || cnt == 0)
1626 abort ();
1627 #endif
1628
1629 sh_1 = cnt;
1630 #if 0
1631 if (sh_1 == 0)
1632 {
1633 if (wp != up)
1634 {
1635 /* Copy from high end to low end, to allow specified input/output
1636 overlapping. */
1637 for (i = usize - 1; i >= 0; i--)
1638 wp[i] = up[i];
1639 }
1640 return 0;
1641 }
1642 #endif
1643
1644 wp += 1;
1645 sh_2 = BITS_PER_MP_LIMB - sh_1;
1646 i = usize - 1;
1647 low_limb = up[i];
1648 retval = low_limb >> sh_2;
1649 high_limb = low_limb;
1650 while (--i >= 0)
1651 {
1652 low_limb = up[i];
1653 wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
1654 high_limb = low_limb;
1655 }
1656 wp[i] = high_limb << sh_1;
1657
1658 return retval;
1659 }
1660
1661
1662
1663 /* Conversion of U {up,un} to a string in base b. Internally, we convert to
1664 base B = b^m, the largest power of b that fits a limb. Basic algorithms:
1665
1666 A) Divide U repeatedly by B, generating a quotient and remainder, until the
1667 quotient becomes zero. The remainders hold the converted digits. Digits
1668 come out from right to left. (Used in mpn_sb_get_str.)
1669
1670 B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
1671 Then develop digits by multiplying the fraction repeatedly by b. Digits
1672 come out from left to right. (Currently not used herein, except for in
1673 code for converting single limbs to individual digits.)
1674
1675 C) Compute B^1, B^2, B^4, ..., B^(2^s), for s such that B^(2^s) > sqrt(U).
1676 Then divide U by B^(2^k), generating an integer quotient and remainder.
1677 Recursively convert the quotient, then the remainder, using the
1678 precomputed powers. Digits come out from left to right. (Used in
1679 mpn_dc_get_str.)
1680
1681 When using algorithm C, algorithm B might be suitable for basecase code,
1682 since the required b^g power will be readily accessible.
1683
1684 Optimization ideas:
1685 1. The recursive function of (C) could avoid TMP allocation:
1686 a) Overwrite dividend with quotient and remainder, just as permitted by
1687 mpn_sb_divrem_mn.
1688 b) If TMP memory is anyway needed, pass it as a parameter, similarly to
1689 how we do it in Karatsuba multiplication.
1690 2. Store the powers of (C) in normalized form, with the normalization count.
1691 Quotients will usually need to be left-shifted before each divide, and
1692 remainders will either need to be left-shifted of right-shifted.
1693 3. When b is even, the powers will end up with lots of low zero limbs. Could
1694 save significant time in the mpn_tdiv_qr call by stripping these zeros.
1695 4. In the code for developing digits from a single limb, we could avoid using
1696 a full umul_ppmm except for the first (or first few) digits, provided base
1697 is even. Subsequent digits can be developed using plain multiplication.
1698 (This saves on register-starved machines (read x86) and on all machines
1699 that generate the upper product half using a separate instruction (alpha,
1700 powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
1701 5. Separate mpn_dc_get_str basecase code from code for small conversions. The
1702 former code will have the exact right power readily available in the
1703 powtab parameter for dividing the current number into a fraction. Convert
1704 that using algorithm B.
1705 6. Completely avoid division. Compute the inverses of the powers now in
1706 powtab instead of the actual powers.
1707
1708 Basic structure of (C):
1709 mpn_get_str:
1710 if POW2_P (n)
1711 ...
1712 else
1713 if (un < GET_STR_PRECOMPUTE_THRESHOLD)
1714 mpn_sb_get_str (str, base, up, un);
1715 else
1716 precompute_power_tables
1717 mpn_dc_get_str
1718
1719 mpn_dc_get_str:
1720 mpn_tdiv_qr
1721 if (qn < GET_STR_DC_THRESHOLD)
1722 mpn_sb_get_str
1723 else
1724 mpn_dc_get_str
1725 if (rn < GET_STR_DC_THRESHOLD)
1726 mpn_sb_get_str
1727 else
1728 mpn_dc_get_str
1729
1730
1731 The reason for the two threshold values is the cost of
1732 precompute_power_tables. GET_STR_PRECOMPUTE_THRESHOLD will be considerably
1733 larger than GET_STR_PRECOMPUTE_THRESHOLD. Do you think I should change
1734 mpn_dc_get_str to instead look like the following? */
1735
1736
1737 /* The x86s and m68020 have a quotient and remainder "div" instruction and
1738 gcc recognises an adjacent "/" and "%" can be combined using that.
1739 Elsewhere "/" and "%" are either separate instructions, or separate
1740 libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
1741 A multiply and subtract should be faster than a "%" in those cases. */
1742 #if HAVE_HOST_CPU_FAMILY_x86 \
1743 || HAVE_HOST_CPU_m68020 \
1744 || HAVE_HOST_CPU_m68030 \
1745 || HAVE_HOST_CPU_m68040 \
1746 || HAVE_HOST_CPU_m68060 \
1747 || HAVE_HOST_CPU_m68360 /* CPU32 */
1748 #define udiv_qrnd_unnorm(q,r,n,d) \
1749 do { \
1750 mp_limb_t __q = (n) / (d); \
1751 mp_limb_t __r = (n) % (d); \
1752 (q) = __q; \
1753 (r) = __r; \
1754 } while (0)
1755 #else
1756 #define udiv_qrnd_unnorm(q,r,n,d) \
1757 do { \
1758 mp_limb_t __q = (n) / (d); \
1759 mp_limb_t __r = (n) - __q*(d); \
1760 (q) = __q; \
1761 (r) = __r; \
1762 } while (0)
1763 #endif
1764
1765 /* When to stop divide-and-conquer and call the basecase mpn_get_str. */
1766 #ifndef GET_STR_DC_THRESHOLD
1767 #define GET_STR_DC_THRESHOLD 15
1768 #endif
1769 /* Whether to bother at all with precomputing powers of the base, or go
1770 to the basecase mpn_get_str directly. */
1771 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
1772 #define GET_STR_PRECOMPUTE_THRESHOLD 30
1773 #endif
1774
1775 struct powers
1776 {
1777 size_t digits_in_base;
1778 mp_ptr p;
1779 mp_size_t n; /* mpz_struct uses int for sizes, but not mpn! */
1780 int base;
1781 };
1782 typedef struct powers powers_t;
1783
1784 /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
1785 the string in STR. Generate LEN characters, possibly padding with zeros to
1786 the left. If LEN is zero, generate as many characters as required.
1787 Return a pointer immediately after the last digit of the result string.
1788 Complexity is O(UN^2); intended for small conversions. */
1789 static unsigned char *
mpn_sb_get_str(unsigned char * str,size_t len,mp_ptr up,mp_size_t un,const powers_t * powtab)1790 mpn_sb_get_str (unsigned char *str, size_t len,
1791 mp_ptr up, mp_size_t un,
1792 const powers_t *powtab)
1793 {
1794 mp_limb_t rl, ul;
1795 unsigned char *s;
1796 int base;
1797 size_t l;
1798 /* Allocate memory for largest possible string, given that we only get here
1799 for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
1800 base is 3. 7/11 is an approximation to 1/log2(3). */
1801 #if TUNE_PROGRAM_BUILD
1802 #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * BITS_PER_MP_LIMB * 7 / 11)
1803 #else
1804 #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * BITS_PER_MP_LIMB * 7 / 11)
1805 #endif
1806 unsigned char buf[BUF_ALLOC];
1807 #if TUNE_PROGRAM_BUILD
1808 mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
1809 #else
1810 mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
1811 #endif
1812
1813 base = powtab->base;
1814 if (base == 10)
1815 {
1816 /* Special case code for base==10 so that the compiler has a chance to
1817 optimize things. */
1818
1819 MPN_COPY (rp + 1, up, un);
1820
1821 s = buf + BUF_ALLOC;
1822 while (un > 1)
1823 {
1824 int i;
1825 mp_limb_t frac, digit;
1826 MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
1827 MP_BASES_BIG_BASE_10,
1828 MP_BASES_BIG_BASE_INVERTED_10,
1829 MP_BASES_NORMALIZATION_STEPS_10);
1830 un -= rp[un] == 0;
1831 frac = (rp[0] + 1) << GMP_NAIL_BITS;
1832 s -= MP_BASES_CHARS_PER_LIMB_10;
1833 #if HAVE_HOST_CPU_FAMILY_x86
1834 /* The code below turns out to be a bit slower for x86 using gcc.
1835 Use plain code. */
1836 i = MP_BASES_CHARS_PER_LIMB_10;
1837 do
1838 {
1839 umul_ppmm (digit, frac, frac, 10);
1840 *s++ = digit;
1841 }
1842 while (--i);
1843 #else
1844 /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
1845 After a few umul_ppmm, we will have accumulated enough low zeros
1846 to use a plain multiply. */
1847 if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
1848 {
1849 umul_ppmm (digit, frac, frac, 10);
1850 *s++ = digit;
1851 }
1852 if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
1853 {
1854 umul_ppmm (digit, frac, frac, 10);
1855 *s++ = digit;
1856 }
1857 if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
1858 {
1859 umul_ppmm (digit, frac, frac, 10);
1860 *s++ = digit;
1861 }
1862 if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
1863 {
1864 umul_ppmm (digit, frac, frac, 10);
1865 *s++ = digit;
1866 }
1867 i = MP_BASES_CHARS_PER_LIMB_10 - (4-MP_BASES_NORMALIZATION_STEPS_10);
1868 frac = (frac + 0xf) >> 4;
1869 do
1870 {
1871 frac *= 10;
1872 digit = frac >> (BITS_PER_MP_LIMB - 4);
1873 *s++ = digit;
1874 frac &= (~(mp_limb_t) 0) >> 4;
1875 }
1876 while (--i);
1877 #endif
1878 s -= MP_BASES_CHARS_PER_LIMB_10;
1879 }
1880
1881 ul = rp[1];
1882 while (ul != 0)
1883 {
1884 udiv_qrnd_unnorm (ul, rl, ul, 10);
1885 *--s = rl;
1886 }
1887 }
1888 else /* not base 10 */
1889 {
1890 unsigned chars_per_limb;
1891 mp_limb_t big_base;
1892 mp_limb_t big_base_inverted USED_ONLY_SOMETIMES;
1893 unsigned normalization_steps USED_ONLY_SOMETIMES;
1894
1895 chars_per_limb = __mp_bases[base].chars_per_limb;
1896 big_base = __mp_bases[base].big_base;
1897 big_base_inverted = __mp_bases[base].big_base_inverted;
1898 count_leading_zeros (normalization_steps, big_base);
1899
1900 MPN_COPY (rp + 1, up, un);
1901
1902 s = buf + BUF_ALLOC;
1903 while (un > 1)
1904 {
1905 int i;
1906 mp_limb_t frac;
1907 MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
1908 big_base, big_base_inverted,
1909 normalization_steps);
1910 un -= rp[un] == 0;
1911 frac = (rp[0] + 1) << GMP_NAIL_BITS;
1912 s -= chars_per_limb;
1913 i = chars_per_limb;
1914 do
1915 {
1916 mp_limb_t digit;
1917 umul_ppmm (digit, frac, frac, base);
1918 *s++ = digit;
1919 }
1920 while (--i);
1921 s -= chars_per_limb;
1922 }
1923
1924 ul = rp[1];
1925 while (ul != 0)
1926 {
1927 udiv_qrnd_unnorm (ul, rl, ul, base);
1928 *--s = rl;
1929 }
1930 }
1931
1932 l = buf + BUF_ALLOC - s;
1933 while (l < len)
1934 {
1935 *str++ = 0;
1936 len--;
1937 }
1938 while (l != 0)
1939 {
1940 *str++ = *s++;
1941 l--;
1942 }
1943 return str;
1944 }
1945
1946 /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
1947 the string in STR. Generate LEN characters, possibly padding with zeros to
1948 the left. If LEN is zero, generate as many characters as required.
1949 Return a pointer immediately after the last digit of the result string.
1950 This uses divide-and-conquer and is intended for large conversions. */
1951 static unsigned char *
mpn_dc_get_str(unsigned char * str,size_t len,mp_ptr up,mp_size_t un,const powers_t * powtab)1952 mpn_dc_get_str (unsigned char *str, size_t len,
1953 mp_ptr up, mp_size_t un,
1954 const powers_t *powtab)
1955 {
1956 if (un < GET_STR_DC_THRESHOLD)
1957 {
1958 if (un != 0)
1959 str = mpn_sb_get_str (str, len, up, un, powtab);
1960 else
1961 {
1962 while (len != 0)
1963 {
1964 *str++ = 0;
1965 len--;
1966 }
1967 }
1968 }
1969 else
1970 {
1971 mp_ptr pwp, qp, rp;
1972 mp_size_t pwn, qn;
1973
1974 pwp = powtab->p;
1975 pwn = powtab->n;
1976
1977 if (un < pwn || (un == pwn && mpn_cmp (up, pwp, un) < 0))
1978 {
1979 str = mpn_dc_get_str (str, len, up, un, powtab - 1);
1980 }
1981 else
1982 {
1983 TMP_DECL (marker);
1984 TMP_MARK (marker);
1985 qp = TMP_ALLOC_LIMBS (un - pwn + 1);
1986 rp = TMP_ALLOC_LIMBS (pwn);
1987
1988 mpn_tdiv_qr (qp, rp, 0L, up, un, pwp, pwn);
1989 qn = un - pwn; qn += qp[qn] != 0; /* quotient size */
1990 if (len != 0)
1991 len = len - powtab->digits_in_base;
1992 str = mpn_dc_get_str (str, len, qp, qn, powtab - 1);
1993 str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn, powtab - 1);
1994 TMP_FREE (marker);
1995 }
1996 }
1997 return str;
1998 }
1999
2000 /* There are no leading zeros on the digits generated at str, but that's not
2001 currently a documented feature. */
2002
2003 size_t
mpn_get_str(unsigned char * str,int base,mp_ptr up,mp_size_t un)2004 mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
2005 {
2006 mp_ptr powtab_mem, powtab_mem_ptr;
2007 mp_limb_t big_base;
2008 size_t digits_in_base;
2009 powers_t powtab[30];
2010 int pi;
2011 mp_size_t n;
2012 mp_ptr p, t;
2013 size_t out_len;
2014
2015 /* Special case zero, as the code below doesn't handle it. */
2016 if (un == 0)
2017 {
2018 str[0] = 0;
2019 return 1;
2020 }
2021
2022 if (POW2_P (base))
2023 {
2024 /* The base is a power of 2. Convert from most significant end. */
2025 mp_limb_t n1, n0;
2026 int bits_per_digit = __mp_bases[base].big_base;
2027 int cnt;
2028 int bit_pos;
2029 mp_size_t i;
2030 unsigned char *s = str;
2031
2032 n1 = up[un - 1];
2033 count_leading_zeros (cnt, n1);
2034
2035 /* BIT_POS should be R when input ends in least significant nibble,
2036 R + bits_per_digit * n when input ends in nth least significant
2037 nibble. */
2038
2039 {
2040 unsigned long bits;
2041
2042 bits = GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
2043 cnt = bits % bits_per_digit;
2044 if (cnt != 0)
2045 bits += bits_per_digit - cnt;
2046 bit_pos = bits - (un - 1) * GMP_NUMB_BITS;
2047 }
2048
2049 /* Fast loop for bit output. */
2050 i = un - 1;
2051 for (;;)
2052 {
2053 bit_pos -= bits_per_digit;
2054 while (bit_pos >= 0)
2055 {
2056 *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
2057 bit_pos -= bits_per_digit;
2058 }
2059 i--;
2060 if (i < 0)
2061 break;
2062 n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
2063 n1 = up[i];
2064 bit_pos += GMP_NUMB_BITS;
2065 *s++ = n0 | (n1 >> bit_pos);
2066
2067 if (!(i & 0xFF)) SCHEME_BIGNUM_USE_FUEL(1);
2068 }
2069
2070 *s = 0;
2071
2072 return s - str;
2073 }
2074
2075 /* General case. The base is not a power of 2. */
2076
2077 if (un < GET_STR_PRECOMPUTE_THRESHOLD)
2078 {
2079 struct powers ptab[1];
2080 ptab[0].base = base;
2081 return mpn_sb_get_str (str, (size_t) 0, up, un, ptab) - str;
2082 }
2083
2084 /* Allocate one large block for the powers of big_base. With the current
2085 scheme, we need to allocate twice as much as would be possible if a
2086 minimal set of powers were generated. */
2087 #define ALLOC_SIZE (2 * un + 30)
2088 {
2089 TMP_DECL (marker);
2090 TMP_MARK (marker);
2091
2092 powtab_mem = __GMP_ALLOCATE_FUNC_LIMBS (ALLOC_SIZE);
2093 powtab_mem_ptr = powtab_mem;
2094
2095 /* Compute a table of powers: big_base^1, big_base^2, big_base^4, ...,
2096 big_base^(2^k), for k such that the biggest power is between U and
2097 sqrt(U). */
2098
2099 big_base = __mp_bases[base].big_base;
2100 digits_in_base = __mp_bases[base].chars_per_limb;
2101
2102 powtab[0].base = base; /* FIXME: hack for getting base to mpn_sb_get_str */
2103 powtab[1].p = &big_base;
2104 powtab[1].n = 1;
2105 powtab[1].digits_in_base = digits_in_base;
2106 powtab[1].base = base;
2107 powtab[2].p = &big_base;
2108 powtab[2].n = 1;
2109 powtab[2].digits_in_base = digits_in_base;
2110 powtab[2].base = base;
2111 n = 1;
2112 pi = 2;
2113 p = &big_base;
2114 for (;;)
2115 {
2116 ++pi;
2117 t = powtab_mem_ptr;
2118 powtab_mem_ptr += 2 * n;
2119 mpn_sqr_n (t, p, n);
2120 n *= 2; n -= t[n - 1] == 0;
2121 digits_in_base *= 2;
2122 p = t;
2123 powtab[pi].p = p;
2124 powtab[pi].n = n;
2125 powtab[pi].digits_in_base = digits_in_base;
2126 powtab[pi].base = base;
2127
2128 if (2 * n > un)
2129 break;
2130 }
2131 ASSERT_ALWAYS (ALLOC_SIZE > powtab_mem_ptr - powtab_mem);
2132
2133 /* Using our precomputed powers, now in powtab[], convert our number. */
2134 out_len = mpn_dc_get_str (str, 0, up, un, powtab + pi) - str;
2135
2136 __GMP_FREE_FUNC_LIMBS (powtab_mem, ALLOC_SIZE);
2137
2138 TMP_FREE(marker);
2139 }
2140
2141 return out_len;
2142 }
2143
2144
2145 /* When to switch to sub-quadratic code. This counts characters/digits in
2146 the input string, not limbs as most other *_THRESHOLD. */
2147 #ifndef SET_STR_THRESHOLD
2148 #define SET_STR_THRESHOLD 4000
2149 #endif
2150
2151 /* Don't define this to anything but 1 for now. In order to make other values
2152 work well, either the convert_blocks function should be generazed to handle
2153 larger blocks than chars_per_limb, or the basecase code should be broken out
2154 of the main function. Also note that this must be a power of 2. */
2155 #ifndef SET_STR_BLOCK_SIZE
2156 #define SET_STR_BLOCK_SIZE 1 /* Must be a power of 2. */
2157 #endif
2158
2159
2160 /* This check interferes with expression based values of SET_STR_THRESHOLD
2161 used for tuning and measuring.
2162 #if SET_STR_BLOCK_SIZE >= SET_STR_THRESHOLD
2163 These values are silly.
2164 The sub-quadratic code would recurse to itself.
2165 #endif
2166 */
2167
2168 static mp_size_t
2169 convert_blocks (mp_ptr dp, const unsigned char *str, size_t str_len, int base);
2170
2171 mp_size_t
mpn_set_str(mp_ptr rp,const unsigned char * str,size_t str_len,int base)2172 mpn_set_str (mp_ptr rp, const unsigned char *str, size_t str_len, int base)
2173 {
2174 mp_size_t size;
2175 mp_limb_t big_base;
2176 int chars_per_limb;
2177 mp_limb_t res_digit;
2178
2179 ASSERT (base >= 2);
2180 ASSERT (base < numberof (__mp_bases));
2181 ASSERT (str_len >= 1);
2182
2183 big_base = __mp_bases[base].big_base;
2184 chars_per_limb = __mp_bases[base].chars_per_limb;
2185
2186 size = 0;
2187
2188 if (POW2_P (base))
2189 {
2190 /* The base is a power of 2. Read the input string from least to most
2191 significant character/digit. */
2192
2193 const unsigned char *s;
2194 int next_bitpos;
2195 int bits_per_indigit = big_base;
2196
2197 res_digit = 0;
2198 next_bitpos = 0;
2199
2200 for (s = str + str_len - 1; s >= str; s--)
2201 {
2202 int inp_digit = *s;
2203
2204 res_digit |= ((mp_limb_t) inp_digit << next_bitpos) & GMP_NUMB_MASK;
2205 next_bitpos += bits_per_indigit;
2206 if (next_bitpos >= GMP_NUMB_BITS)
2207 {
2208 rp[size++] = res_digit;
2209 next_bitpos -= GMP_NUMB_BITS;
2210 res_digit = inp_digit >> (bits_per_indigit - next_bitpos);
2211 }
2212
2213 if (!((intptr_t)s & 0xFF)) SCHEME_BIGNUM_USE_FUEL(1);
2214 }
2215
2216 if (res_digit != 0)
2217 rp[size++] = res_digit;
2218 return size;
2219 }
2220 else
2221 {
2222 /* General case. The base is not a power of 2. */
2223
2224 if (str_len < SET_STR_THRESHOLD)
2225 {
2226 size_t i;
2227 int j;
2228 mp_limb_t cy_limb;
2229
2230 for (i = chars_per_limb; i < str_len; i += chars_per_limb)
2231 {
2232 res_digit = *str++;
2233 if (base == 10)
2234 { /* This is a common case.
2235 Help the compiler to avoid multiplication. */
2236 for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
2237 res_digit = res_digit * 10 + *str++;
2238 }
2239 else
2240 {
2241 for (j = chars_per_limb - 1; j != 0; j--)
2242 res_digit = res_digit * base + *str++;
2243 }
2244
2245 if (size == 0)
2246 {
2247 if (res_digit != 0)
2248 {
2249 rp[0] = res_digit;
2250 size = 1;
2251 }
2252 }
2253 else
2254 {
2255 #if HAVE_NATIVE_mpn_mul_1c
2256 cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
2257 #else
2258 cy_limb = mpn_mul_1 (rp, rp, size, big_base);
2259 cy_limb += mpn_add_1 (rp, rp, size, res_digit);
2260 #endif
2261 if (cy_limb != 0)
2262 rp[size++] = cy_limb;
2263 }
2264 }
2265
2266 big_base = base;
2267 res_digit = *str++;
2268 if (base == 10)
2269 { /* This is a common case.
2270 Help the compiler to avoid multiplication. */
2271 for (j = str_len - (i - MP_BASES_CHARS_PER_LIMB_10) - 1; j > 0; j--)
2272 {
2273 res_digit = res_digit * 10 + *str++;
2274 big_base *= 10;
2275 }
2276 }
2277 else
2278 {
2279 for (j = str_len - (i - chars_per_limb) - 1; j > 0; j--)
2280 {
2281 res_digit = res_digit * base + *str++;
2282 big_base *= base;
2283 }
2284 }
2285
2286 if (size == 0)
2287 {
2288 if (res_digit != 0)
2289 {
2290 rp[0] = res_digit;
2291 size = 1;
2292 }
2293 }
2294 else
2295 {
2296 #if HAVE_NATIVE_mpn_mul_1c
2297 cy_limb = mpn_mul_1c (rp, rp, size, big_base, res_digit);
2298 #else
2299 cy_limb = mpn_mul_1 (rp, rp, size, big_base);
2300 cy_limb += mpn_add_1 (rp, rp, size, res_digit);
2301 #endif
2302 if (cy_limb != 0)
2303 rp[size++] = cy_limb;
2304 }
2305 return size;
2306 }
2307 else
2308 {
2309 /* Sub-quadratic code. */
2310
2311 mp_ptr dp;
2312 mp_size_t dsize;
2313 mp_ptr xp, tp;
2314 mp_size_t step;
2315 mp_size_t i;
2316 size_t alloc;
2317 mp_size_t n;
2318 mp_ptr pow_mem;
2319 TMP_DECL (marker);
2320 TMP_MARK (marker);
2321
2322 alloc = (str_len + chars_per_limb - 1) / chars_per_limb;
2323 alloc = 2 * alloc;
2324 dp = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
2325
2326 #if SET_STR_BLOCK_SIZE == 1
2327 dsize = convert_blocks (dp, str, str_len, base);
2328 #else
2329 {
2330 const unsigned char *s;
2331 mp_ptr ddp = dp;
2332
2333 s = str + str_len;
2334 while (s - str > SET_STR_BLOCK_SIZE * chars_per_limb)
2335 {
2336 s -= SET_STR_BLOCK_SIZE * chars_per_limb;
2337 mpn_set_str (ddp, s, SET_STR_BLOCK_SIZE * chars_per_limb, base);
2338 ddp += SET_STR_BLOCK_SIZE;
2339 }
2340 ddp += mpn_set_str (ddp, str, s - str, base);
2341 dsize = ddp - dp;
2342 }
2343 #endif
2344
2345 /* Allocate space for powers of big_base. Could trim this in two
2346 ways:
2347 1. Only really need 2^ceil(log2(dsize)) bits for the largest
2348 power.
2349 2. Only the variable to get the largest power need that much
2350 memory. The other variable needs half as much. Need just
2351 figure out which of xp and tp will hold the last one.
2352 Net space savings would be in the range 1/4 to 5/8 of current
2353 allocation, depending on how close to the next power of 2 that
2354 dsize is. */
2355 pow_mem = __GMP_ALLOCATE_FUNC_LIMBS (2 * alloc);
2356 xp = pow_mem;
2357 tp = pow_mem + alloc;
2358
2359 xp[0] = big_base;
2360 n = 1;
2361 step = 1;
2362 #if SET_STR_BLOCK_SIZE != 1
2363 for (i = SET_STR_BLOCK_SIZE; i > 1; i >>= 1)
2364 {
2365 mpn_sqr_n (tp, xp, n);
2366 n = 2 * n;
2367 n -= tp[n - 1] == 0;
2368
2369 step = 2 * step;
2370 MP_PTR_SWAP (tp, xp);
2371 }
2372 #endif
2373
2374 /* Multiply every second limb block, each `step' limbs large by the
2375 base power currently in xp[], then add this to the adjacent block.
2376 We thereby convert from dsize blocks in base big_base, to dsize/2
2377 blocks in base big_base^2, then to dsize/4 blocks in base
2378 big_base^4, etc, etc. */
2379
2380 if (step < dsize)
2381 {
2382 for (;;)
2383 {
2384 for (i = 0; i < dsize - step; i += 2 * step)
2385 {
2386 mp_ptr bp = dp + i;
2387 mp_size_t m = dsize - i - step;
2388 mp_size_t hi;
2389 if (n >= m)
2390 {
2391 mpn_mul (tp, xp, n, bp + step, m);
2392 mpn_add (bp, tp, n + m, bp, n);
2393 hi = i + n + m;
2394 dsize = hi;
2395 dsize -= dp[dsize - 1] == 0;
2396 }
2397 else
2398 {
2399 mpn_mul_n (tp, xp, bp + step, n);
2400 mpn_add (bp, tp, n + n, bp, n);
2401 }
2402 }
2403
2404 step = 2 * step;
2405 if (! (step < dsize))
2406 break;
2407
2408 mpn_sqr_n (tp, xp, n);
2409 n = 2 * n;
2410 n -= tp[n - 1] == 0;
2411 MP_PTR_SWAP (tp, xp);
2412 }
2413 }
2414
2415 MPN_NORMALIZE (dp, dsize);
2416 MPN_COPY (rp, dp, dsize);
2417 __GMP_FREE_FUNC_LIMBS (pow_mem, 2 * alloc);
2418 __GMP_FREE_FUNC_LIMBS (dp, alloc);
2419 TMP_FREE (marker);
2420 return dsize;
2421 }
2422 }
2423 }
2424
2425 static mp_size_t
convert_blocks(mp_ptr dp,const unsigned char * str,size_t str_len,int base)2426 convert_blocks (mp_ptr dp, const unsigned char *str, size_t str_len, int base)
2427 {
2428 int chars_per_limb;
2429 mp_size_t i;
2430 int j;
2431 int ds;
2432 mp_size_t dsize;
2433 mp_limb_t res_digit;
2434
2435 chars_per_limb = __mp_bases[base].chars_per_limb;
2436
2437 dsize = str_len / chars_per_limb;
2438 ds = str_len % chars_per_limb;
2439
2440 if (ds != 0)
2441 {
2442 res_digit = *str++;
2443 for (j = ds - 1; j != 0; j--)
2444 res_digit = res_digit * base + *str++;
2445 dp[dsize] = res_digit;
2446 }
2447
2448 if (base == 10)
2449 {
2450 for (i = dsize - 1; i >= 0; i--)
2451 {
2452 res_digit = *str++;
2453 for (j = MP_BASES_CHARS_PER_LIMB_10 - 1; j != 0; j--)
2454 res_digit = res_digit * 10 + *str++;
2455 dp[i] = res_digit;
2456 }
2457 }
2458 else
2459 {
2460 for (i = dsize - 1; i >= 0; i--)
2461 {
2462 res_digit = *str++;
2463 for (j = chars_per_limb - 1; j != 0; j--)
2464 res_digit = res_digit * base + *str++;
2465 dp[i] = res_digit;
2466 }
2467 }
2468
2469 return dsize + (ds != 0);
2470 }
2471
2472
2473 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2474 write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
2475 qxn is non-zero, generate that many fraction limbs and append them after the
2476 other quotient limbs, and update the remainder accordningly. The input
2477 operands are unaffected.
2478
2479 Preconditions:
2480 1. The most significant limb of of the divisor must be non-zero.
2481 2. No argument overlap is permitted. (??? relax this ???)
2482 3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
2483
2484 The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
2485 complexity of multiplication. */
2486
2487 #ifndef BZ_THRESHOLD
2488 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
2489 #endif
2490
2491 /* Extract the middle limb from ((h,,l) << cnt) */
2492 #define SHL(h,l,cnt) \
2493 ((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
2494
2495 void
2496 #if __STDC__
mpn_tdiv_qr(mp_ptr qp,mp_ptr rp,mp_size_t qxn,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)2497 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
2498 mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
2499 #else
2500 mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
2501 mp_ptr qp;
2502 mp_ptr rp;
2503 mp_size_t qxn;
2504 mp_srcptr np;
2505 mp_size_t nn;
2506 mp_srcptr dp;
2507 mp_size_t dn;
2508 #endif
2509 {
2510 /* FIXME:
2511 1. qxn
2512 2. pass allocated storage in additional parameter?
2513 */
2514 #ifdef DEBUG
2515 if (qxn != 0)
2516 abort ();
2517 #endif
2518
2519 switch (dn)
2520 {
2521 case 0:
2522 #if 0
2523 DIVIDE_BY_ZERO;
2524 #endif
2525 return;
2526
2527 case 1:
2528 {
2529 rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
2530 return;
2531 }
2532
2533 case 2:
2534 {
2535 int cnt;
2536 mp_ptr n2p, d2p;
2537 mp_limb_t qhl, cy;
2538 TMP_DECL (marker);
2539 TMP_MARK (marker);
2540 count_leading_zeros (cnt, dp[dn - 1]);
2541 if (cnt != 0)
2542 {
2543 d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
2544 mpn_lshift (d2p, dp, dn, cnt);
2545 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
2546 cy = mpn_lshift (n2p, np, nn, cnt);
2547 n2p[nn] = cy;
2548 qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
2549 if (cy == 0)
2550 qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
2551 }
2552 else
2553 {
2554 d2p = (mp_ptr) dp;
2555 n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
2556 MPN_COPY (n2p, np, nn);
2557 qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
2558 qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
2559 }
2560
2561 if (cnt != 0)
2562 mpn_rshift (rp, n2p, dn, cnt);
2563 else
2564 MPN_COPY (rp, n2p, dn);
2565 TMP_FREE (marker);
2566 return;
2567 }
2568
2569 default:
2570 {
2571 int adjust;
2572 TMP_DECL (marker);
2573 TMP_MARK (marker);
2574 adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
2575 if (nn + adjust >= 2 * dn)
2576 {
2577 mp_ptr n2p, d2p;
2578 mp_limb_t cy;
2579 int cnt;
2580 count_leading_zeros (cnt, dp[dn - 1]);
2581
2582 qp[nn - dn] = 0; /* zero high quotient limb */
2583 if (cnt != 0) /* normalize divisor if needed */
2584 {
2585 d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
2586 mpn_lshift (d2p, dp, dn, cnt);
2587 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
2588 cy = mpn_lshift (n2p, np, nn, cnt);
2589 n2p[nn] = cy;
2590 nn += adjust;
2591 }
2592 else
2593 {
2594 d2p = (mp_ptr) dp;
2595 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
2596 MPN_COPY (n2p, np, nn);
2597 n2p[nn] = 0;
2598 nn += adjust;
2599 }
2600
2601 if (dn == 2)
2602 mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
2603 else if (dn < BZ_THRESHOLD)
2604 mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
2605 else
2606 {
2607 /* Perform 2*dn / dn limb divisions as long as the limbs
2608 in np last. */
2609 mp_ptr q2p = qp + nn - 2 * dn;
2610 n2p += nn - 2 * dn;
2611 mpn_bz_divrem_n (q2p, n2p, d2p, dn);
2612 nn -= dn;
2613 while (nn >= 2 * dn)
2614 {
2615 mp_limb_t c;
2616 q2p -= dn; n2p -= dn;
2617 c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
2618 ASSERT_ALWAYS (c == 0);
2619 nn -= dn;
2620 }
2621
2622 if (nn != dn)
2623 {
2624 n2p -= nn - dn;
2625 /* In theory, we could fall out to the cute code below
2626 since we now have exactly the situation that code
2627 is designed to handle. We botch this badly and call
2628 the basic mpn_sb_divrem_mn! */
2629 if (dn == 2)
2630 mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
2631 else
2632 mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
2633 }
2634 }
2635
2636
2637 if (cnt != 0)
2638 mpn_rshift (rp, n2p, dn, cnt);
2639 else
2640 MPN_COPY (rp, n2p, dn);
2641 TMP_FREE (marker);
2642 return;
2643 }
2644
2645 /* When we come here, the numerator/partial remainder is less
2646 than twice the size of the denominator. */
2647
2648 {
2649 /* Problem:
2650
2651 Divide a numerator N with nn limbs by a denominator D with dn
2652 limbs forming a quotient of nn-dn+1 limbs. When qn is small
2653 compared to dn, conventional division algorithms perform poorly.
2654 We want an algorithm that has an expected running time that is
2655 dependent only on qn. It is assumed that the most significant
2656 limb of the numerator is smaller than the most significant limb
2657 of the denominator.
2658
2659 Algorithm (very informally stated):
2660
2661 1) Divide the 2 x qn most significant limbs from the numerator
2662 by the qn most significant limbs from the denominator. Call
2663 the result qest. This is either the correct quotient, but
2664 might be 1 or 2 too large. Compute the remainder from the
2665 division. (This step is implemented by a mpn_divrem call.)
2666
2667 2) Is the most significant limb from the remainder < p, where p
2668 is the product of the most significant limb from the quotient
2669 and the next(d). (Next(d) denotes the next ignored limb from
2670 the denominator.) If it is, decrement qest, and adjust the
2671 remainder accordingly.
2672
2673 3) Is the remainder >= qest? If it is, qest is the desired
2674 quotient. The algorithm terminates.
2675
2676 4) Subtract qest x next(d) from the remainder. If there is
2677 borrow out, decrement qest, and adjust the remainder
2678 accordingly.
2679
2680 5) Skip one word from the denominator (i.e., let next(d) denote
2681 the next less significant limb. */
2682
2683 mp_size_t qn;
2684 mp_ptr n2p, d2p;
2685 mp_ptr tp;
2686 mp_limb_t cy;
2687 mp_size_t in, rn;
2688 mp_limb_t quotient_too_large;
2689 int cnt;
2690
2691 qn = nn - dn;
2692 qp[qn] = 0; /* zero high quotient limb */
2693 qn += adjust; /* qn cannot become bigger */
2694
2695 if (qn == 0)
2696 {
2697 MPN_COPY (rp, np, dn);
2698 TMP_FREE (marker);
2699 return;
2700 }
2701
2702 in = dn - qn; /* (at least partially) ignored # of limbs in ops */
2703 /* Normalize denominator by shifting it to the left such that its
2704 most significant bit is set. Then shift the numerator the same
2705 amount, to mathematically preserve quotient. */
2706 count_leading_zeros (cnt, dp[dn - 1]);
2707 if (cnt != 0)
2708 {
2709 d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
2710
2711 mpn_lshift (d2p, dp + in, qn, cnt);
2712 d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
2713
2714 n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
2715 cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
2716 if (adjust)
2717 {
2718 n2p[2 * qn] = cy;
2719 n2p++;
2720 }
2721 else
2722 {
2723 n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
2724 }
2725 }
2726 else
2727 {
2728 d2p = (mp_ptr) dp + in;
2729
2730 n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
2731 MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
2732 if (adjust)
2733 {
2734 n2p[2 * qn] = 0;
2735 n2p++;
2736 }
2737 }
2738
2739 /* Get an approximate quotient using the extracted operands. */
2740 if (qn == 1)
2741 {
2742 mp_limb_t q0, r0;
2743 mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
2744 /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
2745 temps here. This doesn't hurt code quality on any machines
2746 so we do it unconditionally. */
2747 gcc272bug_n1 = n2p[1];
2748 gcc272bug_n0 = n2p[0];
2749 gcc272bug_d0 = d2p[0];
2750 udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
2751 n2p[0] = r0;
2752 qp[0] = q0;
2753 }
2754 else if (qn == 2)
2755 mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
2756 else if (qn < BZ_THRESHOLD)
2757 mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
2758 else
2759 mpn_bz_divrem_n (qp, n2p, d2p, qn);
2760
2761 rn = qn;
2762 /* Multiply the first ignored divisor limb by the most significant
2763 quotient limb. If that product is > the partial remainder's
2764 most significant limb, we know the quotient is too large. This
2765 test quickly catches most cases where the quotient is too large;
2766 it catches all cases where the quotient is 2 too large. */
2767 {
2768 mp_limb_t dl, x;
2769 mp_limb_t h;
2770 mp_limb_t l USED_ONLY_SOMETIMES;
2771
2772 if (in - 2 < 0)
2773 dl = 0;
2774 else
2775 dl = dp[in - 2];
2776
2777 x = SHL (dp[in - 1], dl, cnt);
2778 umul_ppmm (h, l, x, qp[qn - 1]);
2779
2780 if (n2p[qn - 1] < h)
2781 {
2782 mp_limb_t cy;
2783
2784 mpn_decr_u (qp, (mp_limb_t) 1);
2785 cy = mpn_add_n (n2p, n2p, d2p, qn);
2786 if (cy)
2787 {
2788 /* The partial remainder is safely large. */
2789 n2p[qn] = cy;
2790 ++rn;
2791 }
2792 }
2793 }
2794
2795 quotient_too_large = 0;
2796 if (cnt != 0)
2797 {
2798 mp_limb_t cy1, cy2;
2799
2800 /* Append partially used numerator limb to partial remainder. */
2801 cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
2802 n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
2803
2804 /* Update partial remainder with partially used divisor limb. */
2805 cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
2806 if (qn != rn)
2807 {
2808 #ifdef DEBUG
2809 if (n2p[qn] < cy2)
2810 abort ();
2811 #endif
2812 n2p[qn] -= cy2;
2813 }
2814 else
2815 {
2816 n2p[qn] = cy1 - cy2;
2817
2818 quotient_too_large = (cy1 < cy2);
2819 ++rn;
2820 }
2821 --in;
2822 }
2823 /* True: partial remainder now is neutral, i.e., it is not shifted up. */
2824
2825 tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
2826
2827 if (in < qn)
2828 {
2829 if (in == 0)
2830 {
2831 MPN_COPY (rp, n2p, rn);
2832 #ifdef DEBUG
2833 if (rn != dn)
2834 abort ();
2835 #endif
2836 goto foo;
2837 }
2838 mpn_mul (tp, qp, qn, dp, in);
2839 }
2840 else
2841 mpn_mul (tp, dp, in, qp, qn);
2842
2843 cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
2844 MPN_COPY (rp + in, n2p, dn - in);
2845 quotient_too_large |= cy;
2846 cy = mpn_sub_n (rp, np, tp, in);
2847 cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
2848 quotient_too_large |= cy;
2849 foo:
2850 if (quotient_too_large)
2851 {
2852 mpn_decr_u (qp, (mp_limb_t) 1);
2853 mpn_add_n (rp, rp, dp, dn);
2854 }
2855 }
2856 TMP_FREE (marker);
2857 return;
2858 }
2859 }
2860 }
2861
2862
2863 /* Square roots table. Generated by the following program:
2864 #include "gmp.h"
2865 main(){mpz_t x;int i;mpz_init(x);for(i=64;i<256;i++){mpz_set_ui(x,256*i);
2866 mpz_sqrt(x,x);mpz_out_str(0,10,x);printf(",");if(i%16==15)printf("\n");}}
2867 */
2868 static const unsigned char approx_tab[192] =
2869 {
2870 128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
2871 143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
2872 156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
2873 169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
2874 181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
2875 192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
2876 202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
2877 212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
2878 221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
2879 230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
2880 239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
2881 247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
2882 };
2883
2884 #define HALF_NAIL (GMP_NAIL_BITS / 2)
2885
2886 /* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */
2887 static mp_size_t
mpn_sqrtrem1(mp_ptr sp,mp_ptr rp,mp_srcptr np)2888 mpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
2889 {
2890 mp_limb_t np0, s, r, q, u;
2891 int prec;
2892
2893 ASSERT (np[0] >= GMP_NUMB_HIGHBIT / 2);
2894 ASSERT (GMP_LIMB_BITS >= 16);
2895
2896 /* first compute a 8-bit approximation from the high 8-bits of np[0] */
2897 np0 = np[0] << GMP_NAIL_BITS;
2898 q = np0 >> (GMP_LIMB_BITS - 8);
2899 /* 2^6 = 64 <= q < 256 = 2^8 */
2900 s = approx_tab[q - 64]; /* 128 <= s < 255 */
2901 r = (np0 >> (GMP_LIMB_BITS - 16)) - s * s; /* r <= 2*s */
2902 if (r > 2 * s)
2903 {
2904 r -= 2 * s + 1;
2905 s++;
2906 }
2907
2908 prec = 8;
2909 np0 <<= 2 * prec;
2910 while (2 * prec < GMP_LIMB_BITS)
2911 {
2912 /* invariant: s has prec bits, and r <= 2*s */
2913 r = (r << prec) + (np0 >> (GMP_LIMB_BITS - prec));
2914 np0 <<= prec;
2915 u = 2 * s;
2916 q = r / u;
2917 u = r - q * u;
2918 s = (s << prec) + q;
2919 u = (u << prec) + (np0 >> (GMP_LIMB_BITS - prec));
2920 q = q * q;
2921 r = u - q;
2922 if (u < q)
2923 {
2924 r += 2 * s - 1;
2925 s --;
2926 }
2927 np0 <<= prec;
2928 prec = 2 * prec;
2929 }
2930
2931 ASSERT (2 * prec == GMP_LIMB_BITS); /* GMP_LIMB_BITS must be a power of 2 */
2932
2933 /* normalize back, assuming GMP_NAIL_BITS is even */
2934 ASSERT (GMP_NAIL_BITS % 2 == 0);
2935 sp[0] = s >> HALF_NAIL;
2936 u = s - (sp[0] << HALF_NAIL); /* s mod 2^HALF_NAIL */
2937 r += u * ((sp[0] << (HALF_NAIL + 1)) + u);
2938 r = r >> GMP_NAIL_BITS;
2939
2940 if (rp != NULL)
2941 rp[0] = r;
2942 return r != 0 ? 1 : 0;
2943 }
2944
2945
2946 #define Prec (GMP_NUMB_BITS >> 1)
2947
2948 /* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
2949 return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
2950 static mp_limb_t
mpn_sqrtrem2(mp_ptr sp,mp_ptr rp,mp_srcptr np)2951 mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
2952 {
2953 mp_limb_t qhl, q, u, np0;
2954 int cc;
2955
2956 ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
2957
2958 np0 = np[0];
2959 mpn_sqrtrem1 (sp, rp, np + 1);
2960 qhl = 0;
2961 while (rp[0] >= sp[0])
2962 {
2963 qhl++;
2964 rp[0] -= sp[0];
2965 }
2966 /* now rp[0] < sp[0] < 2^Prec */
2967 rp[0] = (rp[0] << Prec) + (np0 >> Prec);
2968 u = 2 * sp[0];
2969 q = rp[0] / u;
2970 u = rp[0] - q * u;
2971 q += (qhl & 1) << (Prec - 1);
2972 qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */
2973 /* now we have (initial rp[0])<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp[0]) + u */
2974 sp[0] = ((sp[0] + qhl) << Prec) + q;
2975 cc = u >> Prec;
2976 rp[0] = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
2977 /* subtract q * q or qhl*2^(2*Prec) from rp */
2978 cc -= mpn_sub_1 (rp, rp, 1, q * q) + qhl;
2979 /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */
2980 if (cc < 0)
2981 {
2982 cc += sp[0] != 0 ? mpn_add_1 (rp, rp, 1, sp[0]) : 1;
2983 cc += mpn_add_1 (rp, rp, 1, --sp[0]);
2984 }
2985
2986 return cc;
2987 }
2988
2989 /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
2990 and in {np, n} the low n limbs of the remainder, returns the high
2991 limb of the remainder (which is 0 or 1).
2992 Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
2993 where B=2^GMP_NUMB_BITS. */
2994 static mp_limb_t
mpn_dc_sqrtrem(mp_ptr sp,mp_ptr np,mp_size_t n)2995 mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
2996 {
2997 mp_limb_t q; /* carry out of {sp, n} */
2998 int c, b; /* carry out of remainder */
2999 mp_size_t l, h;
3000
3001 ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
3002
3003 if (n == 1)
3004 c = mpn_sqrtrem2 (sp, np, np);
3005 else
3006 {
3007 l = n / 2;
3008 h = n - l;
3009 q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
3010 if (q != 0)
3011 mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
3012 q += mpn_divrem (sp, 0, np + l, n, sp + l, h);
3013 c = sp[0] & 1;
3014 mpn_rshift (sp, sp, l, 1);
3015 sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
3016 q >>= 1;
3017 if (c != 0)
3018 c = mpn_add_n (np + l, np + l, sp + l, h);
3019 mpn_sqr_n (np + n, sp, l);
3020 b = q + mpn_sub_n (np, np, np + n, 2 * l);
3021 c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, b);
3022 q = mpn_add_1 (sp + l, sp + l, h, q);
3023
3024 if (c < 0)
3025 {
3026 c += mpn_addmul_1 (np, sp, n, 2) + 2 * q;
3027 c -= mpn_sub_1 (np, np, n, 1);
3028 q -= mpn_sub_1 (sp, sp, n, 1);
3029 }
3030 }
3031
3032 return c;
3033 }
3034
3035
3036 mp_size_t
mpn_sqrtrem(mp_ptr sp,mp_ptr rp,mp_srcptr np,mp_size_t nn)3037 mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
3038 {
3039 mp_limb_t *tp, s0[1], cc, high, rl;
3040 int c;
3041 mp_size_t rn, tn;
3042 TMP_DECL (marker);
3043
3044 ASSERT (nn >= 0);
3045
3046 /* If OP is zero, both results are zero. */
3047 if (nn == 0)
3048 return 0;
3049
3050 ASSERT (np[nn - 1] != 0);
3051 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
3052 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
3053 ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
3054
3055 high = np[nn - 1];
3056 if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
3057 return mpn_sqrtrem1 (sp, rp, np);
3058 count_leading_zeros (c, high);
3059 c -= GMP_NAIL_BITS;
3060
3061 c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
3062 tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
3063
3064 TMP_MARK (marker);
3065 if (nn % 2 != 0 || c > 0)
3066 {
3067 tp = TMP_ALLOC_LIMBS (2 * tn);
3068 tp[0] = 0; /* needed only when 2*tn > nn, but saves a test */
3069 if (c != 0)
3070 mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
3071 else
3072 MPN_COPY (tp + 2 * tn - nn, np, nn);
3073 rl = mpn_dc_sqrtrem (sp, tp, tn);
3074 /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
3075 thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
3076 c += (nn % 2) * GMP_NUMB_BITS / 2; /* c now represents k */
3077 s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1); /* S mod 2^k */
3078 rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2*s0*S */
3079 cc = mpn_submul_1 (tp, s0, 1, s0[0]);
3080 rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
3081 mpn_rshift (sp, sp, tn, c);
3082 tp[tn] = rl;
3083 if (rp == NULL)
3084 rp = tp;
3085 c = c << 1;
3086 if (c < GMP_NUMB_BITS)
3087 tn++;
3088 else
3089 {
3090 tp++;
3091 c -= GMP_NUMB_BITS;
3092 }
3093 if (c != 0)
3094 mpn_rshift (rp, tp, tn, c);
3095 else
3096 MPN_COPY_INCR (rp, tp, tn);
3097 rn = tn;
3098 }
3099 else
3100 {
3101 if (rp == NULL)
3102 rp = TMP_ALLOC_LIMBS (nn);
3103 if (rp != np)
3104 MPN_COPY (rp, np, nn);
3105 rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
3106 }
3107
3108 MPN_NORMALIZE (rp, rn);
3109
3110 TMP_FREE (marker);
3111 return rn;
3112 }
3113
3114 /* mpn_bz_divrem_n and auxiliary routines. */
3115
3116 /*
3117 [1] Fast Recursive Division, by Christoph Burnikel and Joachim Ziegler,
3118 Technical report MPI-I-98-1-022, october 1998.
3119 http://www.mpi-sb.mpg.de/~ziegler/TechRep.ps.gz
3120 */
3121
3122 static mp_limb_t mpn_bz_div_3_halves_by_2
3123 _PROTO ((mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n));
3124
3125
3126 /* mpn_bz_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
3127 div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
3128 i.e. n/2 >= KARATSUBA_MUL_THRESHOLD */
3129 #ifndef BZ_THRESHOLD
3130 #define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
3131 #endif
3132
3133 /* mpn_bz_divrem_n - Implements algorithm of page 8 in [1]: divides (np,2n)
3134 by (dp,n) and puts the quotient in (qp,n), the remainder in (np,n).
3135 Returns most significant limb of the quotient, which is 0 or 1.
3136 Requires that the most significant bit of the divisor is set. */
3137
3138 mp_limb_t
3139 #if __STDC__
mpn_bz_divrem_n(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n)3140 mpn_bz_divrem_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
3141 #else
3142 mpn_bz_divrem_n (qp, np, dp, n)
3143 mp_ptr qp;
3144 mp_ptr np;
3145 mp_srcptr dp;
3146 mp_size_t n;
3147 #endif
3148 {
3149 mp_limb_t qhl, cc;
3150
3151 if (n % 2 != 0)
3152 {
3153 qhl = mpn_bz_divrem_n (qp + 1, np + 2, dp + 1, n - 1);
3154 cc = mpn_submul_1 (np + 1, qp + 1, n - 1, dp[0]);
3155 cc = mpn_sub_1 (np + n, np + n, 1, cc);
3156 if (qhl) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
3157 while (cc)
3158 {
3159 qhl -= mpn_sub_1 (qp + 1, qp + 1, n - 1, (mp_limb_t) 1);
3160 cc -= mpn_add_n (np + 1, np + 1, dp, n);
3161 }
3162 qhl += mpn_add_1 (qp + 1, qp + 1, n - 1,
3163 mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
3164 }
3165 else
3166 {
3167 mp_size_t n2 = n/2;
3168 qhl = mpn_bz_div_3_halves_by_2 (qp + n2, np + n2, dp, n2);
3169 qhl += mpn_add_1 (qp + n2, qp + n2, n2,
3170 mpn_bz_div_3_halves_by_2 (qp, np, dp, n2));
3171 }
3172 return qhl;
3173 }
3174
3175
3176 /* divides (np, 3n) by (dp, 2n) and puts the quotient in (qp, n),
3177 the remainder in (np, 2n) */
3178
3179 static mp_limb_t
3180 #if __STDC__
mpn_bz_div_3_halves_by_2(mp_ptr qp,mp_ptr np,mp_srcptr dp,mp_size_t n)3181 mpn_bz_div_3_halves_by_2 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n)
3182 #else
3183 mpn_bz_div_3_halves_by_2 (qp, np, dp, n)
3184 mp_ptr qp;
3185 mp_ptr np;
3186 mp_srcptr dp;
3187 mp_size_t n;
3188 #endif
3189 {
3190 mp_size_t twon = n + n;
3191 mp_limb_t qhl, cc;
3192 mp_ptr tmp;
3193 TMP_DECL (marker);
3194
3195 TMP_MARK (marker);
3196 if (n < BZ_THRESHOLD)
3197 qhl = mpn_sb_divrem_mn (qp, np + n, twon, dp + n, n);
3198 else
3199 qhl = mpn_bz_divrem_n (qp, np + n, dp + n, n);
3200 tmp = (mp_ptr) TMP_ALLOC (twon * BYTES_PER_MP_LIMB);
3201 mpn_mul_n (tmp, qp, dp, n);
3202 cc = mpn_sub_n (np, np, tmp, twon);
3203 TMP_FREE (marker);
3204 if (qhl) cc += mpn_sub_n (np + n, np + n, dp, n);
3205 while (cc)
3206 {
3207 qhl -= mpn_sub_1 (qp, qp, n, (mp_limb_t) 1);
3208 cc -= mpn_add_n (np, np, dp, twon);
3209 }
3210 return qhl;
3211 }
3212
3213 /* mpn_sb_divrem_mn -- Divide natural numbers, producing both remainder and
3214 quotient. */
3215
3216 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
3217 the NSIZE-DSIZE least significant quotient limbs at QP
3218 and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
3219 non-zero, generate that many fraction bits and append them after the
3220 other quotient limbs.
3221 Return the most significant limb of the quotient, this is always 0 or 1.
3222
3223 Preconditions:
3224 0. NSIZE >= DSIZE.
3225 1. The most significant bit of the divisor must be set.
3226 2. QP must either not overlap with the input operands at all, or
3227 QP + DSIZE >= NP must hold true. (This means that it's
3228 possible to put the quotient in the high part of NUM, right after the
3229 remainder in NUM.
3230 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
3231 4. DSIZE >= 2. */
3232
3233
3234 #define PREINVERT_VIABLE \
3235 (UDIV_TIME > 2 * UMUL_TIME + 6 /* && ! TARGET_REGISTER_STARVED */)
3236
3237 mp_limb_t
3238 #if __STDC__
mpn_sb_divrem_mn(mp_ptr qp,mp_ptr np,mp_size_t nsize,mp_srcptr dp,mp_size_t dsize)3239 mpn_sb_divrem_mn (mp_ptr qp,
3240 mp_ptr np, mp_size_t nsize,
3241 mp_srcptr dp, mp_size_t dsize)
3242 #else
3243 mpn_sb_divrem_mn (qp, np, nsize, dp, dsize)
3244 mp_ptr qp;
3245 mp_ptr np;
3246 mp_size_t nsize;
3247 mp_srcptr dp;
3248 mp_size_t dsize;
3249 #endif
3250 {
3251 mp_limb_t most_significant_q_limb = 0;
3252 mp_size_t i;
3253 mp_limb_t dx, d1, n0;
3254 mp_limb_t dxinv = 0;
3255 int have_preinv;
3256
3257 ASSERT_ALWAYS (dsize > 2);
3258
3259 np += nsize - dsize;
3260 dx = dp[dsize - 1];
3261 d1 = dp[dsize - 2];
3262 n0 = np[dsize - 1];
3263
3264 if (n0 >= dx)
3265 {
3266 if (n0 > dx || mpn_cmp (np, dp, dsize - 1) >= 0)
3267 {
3268 mpn_sub_n (np, np, dp, dsize);
3269 most_significant_q_limb = 1;
3270 }
3271 }
3272
3273 /* If multiplication is much faster than division, preinvert the
3274 most significant divisor limb before entering the loop. */
3275 if (PREINVERT_VIABLE)
3276 {
3277 have_preinv = 0;
3278 if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - dsize) > UDIV_TIME)
3279 {
3280 invert_limb (dxinv, dx);
3281 have_preinv = 1;
3282 }
3283 }
3284
3285 for (i = nsize - dsize - 1; i >= 0; i--)
3286 {
3287 mp_limb_t q;
3288 mp_limb_t nx;
3289 mp_limb_t cy_limb;
3290
3291 nx = np[dsize - 1];
3292 np--;
3293
3294 if (nx == dx)
3295 {
3296 /* This might over-estimate q, but it's probably not worth
3297 the extra code here to find out. */
3298 q = ~(mp_limb_t) 0;
3299
3300 #if 1
3301 cy_limb = mpn_submul_1 (np, dp, dsize, q);
3302 #else
3303 /* This should be faster on many machines */
3304 cy_limb = mpn_sub_n (np + 1, np + 1, dp, dsize);
3305 cy = mpn_add_n (np, np, dp, dsize);
3306 np[dsize] += cy;
3307 #endif
3308
3309 if (nx != cy_limb)
3310 {
3311 mpn_add_n (np, np, dp, dsize);
3312 q--;
3313 }
3314
3315 qp[i] = q;
3316 }
3317 else
3318 {
3319 mp_limb_t rx, r1, r0, p1, p0;
3320
3321 /* "workaround" avoids a problem with gcc 2.7.2.3 i386 register
3322 usage when np[dsize-1] is used in an asm statement like
3323 umul_ppmm in udiv_qrnnd_preinv. The symptom is seg faults due
3324 to registers being clobbered. gcc 2.95 i386 doesn't have the
3325 problem. */
3326 {
3327 mp_limb_t workaround = np[dsize - 1];
3328 if (PREINVERT_VIABLE && have_preinv)
3329 udiv_qrnnd_preinv (q, r1, nx, workaround, dx, dxinv);
3330 else
3331 udiv_qrnnd (q, r1, nx, workaround, dx);
3332 }
3333 umul_ppmm (p1, p0, d1, q);
3334
3335 r0 = np[dsize - 2];
3336 rx = 0;
3337 if (r1 < p1 || (r1 == p1 && r0 < p0))
3338 {
3339 p1 -= p0 < d1;
3340 p0 -= d1;
3341 q--;
3342 r1 += dx;
3343 rx = r1 < dx;
3344 }
3345
3346 p1 += r0 < p0; /* cannot carry! */
3347 rx -= r1 < p1; /* may become 11..1 if q is still too large */
3348 r1 -= p1;
3349 r0 -= p0;
3350
3351 cy_limb = mpn_submul_1 (np, dp, dsize - 2, q);
3352
3353 {
3354 mp_limb_t cy1, cy2;
3355 cy1 = r0 < cy_limb;
3356 r0 -= cy_limb;
3357 cy2 = r1 < cy1;
3358 r1 -= cy1;
3359 np[dsize - 1] = r1;
3360 np[dsize - 2] = r0;
3361 if (cy2 != rx)
3362 {
3363 mpn_add_n (np, np, dp, dsize);
3364 q--;
3365 }
3366 }
3367 qp[i] = q;
3368 }
3369 }
3370
3371 /* ______ ______ ______
3372 |__rx__|__r1__|__r0__| partial remainder
3373 ______ ______
3374 - |__p1__|__p0__| partial product to subtract
3375 ______ ______
3376 - |______|cylimb|
3377
3378 rx is -1, 0 or 1. If rx=1, then q is correct (it should match
3379 carry out). If rx=-1 then q is too large. If rx=0, then q might
3380 be too large, but it is most likely correct.
3381 */
3382
3383 return most_significant_q_limb;
3384 }
3385
3386 /* mpn_divrem_1(quot_ptr, qsize, dividend_ptr, dividend_size, divisor_limb) --
3387 Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
3388 Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
3389 Return the single-limb remainder.
3390 There are no constraints on the value of the divisor.
3391
3392 QUOT_PTR and DIVIDEND_PTR might point to the same limb.
3393 */
3394
3395 /* __gmpn_divmod_1_internal(quot_ptr,dividend_ptr,dividend_size,divisor_limb)
3396 Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
3397 Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
3398 Return the single-limb remainder.
3399 There are no constraints on the value of the divisor.
3400
3401 QUOT_PTR and DIVIDEND_PTR might point to the same limb. */
3402
3403 #ifndef UMUL_TIME
3404 #define UMUL_TIME 1
3405 #endif
3406
3407 #ifndef UDIV_TIME
3408 #define UDIV_TIME UMUL_TIME
3409 #endif
3410
3411 static mp_limb_t
3412 #if __STDC__
__gmpn_divmod_1_internal(mp_ptr quot_ptr,mp_srcptr dividend_ptr,mp_size_t dividend_size,mp_limb_t divisor_limb)3413 __gmpn_divmod_1_internal (mp_ptr quot_ptr,
3414 mp_srcptr dividend_ptr, mp_size_t dividend_size,
3415 mp_limb_t divisor_limb)
3416 #else
3417 __gmpn_divmod_1_internal (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
3418 mp_ptr quot_ptr;
3419 mp_srcptr dividend_ptr;
3420 mp_size_t dividend_size;
3421 mp_limb_t divisor_limb;
3422 #endif
3423 {
3424 mp_size_t i;
3425 mp_limb_t n1, n0, r;
3426
3427 /* ??? Should this be handled at all? Rely on callers? */
3428 if (dividend_size == 0)
3429 return 0;
3430
3431 /* If multiplication is much faster than division, and the
3432 dividend is large, pre-invert the divisor, and use
3433 only multiplications in the inner loop. */
3434
3435 /* This test should be read:
3436 Does it ever help to use udiv_qrnnd_preinv?
3437 && Does what we save compensate for the inversion overhead? */
3438 if (UDIV_TIME > (2 * UMUL_TIME + 6)
3439 && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME)
3440 {
3441 int normalization_steps;
3442
3443 count_leading_zeros (normalization_steps, divisor_limb);
3444 if (normalization_steps != 0)
3445 {
3446 mp_limb_t divisor_limb_inverted;
3447
3448 divisor_limb <<= normalization_steps;
3449 invert_limb (divisor_limb_inverted, divisor_limb);
3450
3451 n1 = dividend_ptr[dividend_size - 1];
3452 r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
3453
3454 /* Possible optimization:
3455 if (r == 0
3456 && divisor_limb > ((n1 << normalization_steps)
3457 | (dividend_ptr[dividend_size - 2] >> ...)))
3458 ...one division less... */
3459
3460 for (i = dividend_size - 2; i >= 0; i--)
3461 {
3462 n0 = dividend_ptr[i];
3463 udiv_qrnnd_preinv (quot_ptr[i + 1], r, r,
3464 ((n1 << normalization_steps)
3465 | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
3466 divisor_limb, divisor_limb_inverted);
3467 n1 = n0;
3468 }
3469 udiv_qrnnd_preinv (quot_ptr[0], r, r,
3470 n1 << normalization_steps,
3471 divisor_limb, divisor_limb_inverted);
3472 return r >> normalization_steps;
3473 }
3474 else
3475 {
3476 mp_limb_t divisor_limb_inverted;
3477
3478 invert_limb (divisor_limb_inverted, divisor_limb);
3479
3480 i = dividend_size - 1;
3481 r = dividend_ptr[i];
3482
3483 if (r >= divisor_limb)
3484 r = 0;
3485 else
3486 {
3487 quot_ptr[i] = 0;
3488 i--;
3489 }
3490
3491 for (; i >= 0; i--)
3492 {
3493 n0 = dividend_ptr[i];
3494 udiv_qrnnd_preinv (quot_ptr[i], r, r,
3495 n0, divisor_limb, divisor_limb_inverted);
3496 }
3497 return r;
3498 }
3499 }
3500 else
3501 {
3502 if (UDIV_NEEDS_NORMALIZATION)
3503 {
3504 int normalization_steps;
3505
3506 count_leading_zeros (normalization_steps, divisor_limb);
3507 if (normalization_steps != 0)
3508 {
3509 divisor_limb <<= normalization_steps;
3510
3511 n1 = dividend_ptr[dividend_size - 1];
3512 r = n1 >> (BITS_PER_MP_LIMB - normalization_steps);
3513
3514 /* Possible optimization:
3515 if (r == 0
3516 && divisor_limb > ((n1 << normalization_steps)
3517 | (dividend_ptr[dividend_size - 2] >> ...)))
3518 ...one division less... */
3519
3520 for (i = dividend_size - 2; i >= 0; i--)
3521 {
3522 n0 = dividend_ptr[i];
3523 udiv_qrnnd (quot_ptr[i + 1], r, r,
3524 ((n1 << normalization_steps)
3525 | (n0 >> (BITS_PER_MP_LIMB - normalization_steps))),
3526 divisor_limb);
3527 n1 = n0;
3528 }
3529 udiv_qrnnd (quot_ptr[0], r, r,
3530 n1 << normalization_steps,
3531 divisor_limb);
3532 return r >> normalization_steps;
3533 }
3534 }
3535 /* No normalization needed, either because udiv_qrnnd doesn't require
3536 it, or because DIVISOR_LIMB is already normalized. */
3537
3538 i = dividend_size - 1;
3539 r = dividend_ptr[i];
3540
3541 if (r >= divisor_limb)
3542 r = 0;
3543 else
3544 {
3545 quot_ptr[i] = 0;
3546 i--;
3547 }
3548
3549 for (; i >= 0; i--)
3550 {
3551 n0 = dividend_ptr[i];
3552 udiv_qrnnd (quot_ptr[i], r, r, n0, divisor_limb);
3553 }
3554 return r;
3555 }
3556 }
3557
3558
3559
3560 mp_limb_t
3561 #if __STDC__
mpn_divrem_1(mp_ptr qp,mp_size_t qxn,mp_srcptr np,mp_size_t nn,mp_limb_t d)3562 mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
3563 mp_srcptr np, mp_size_t nn,
3564 mp_limb_t d)
3565 #else
3566 mpn_divrem_1 (qp, qxn, np, nn, d)
3567 mp_ptr qp;
3568 mp_size_t qxn;
3569 mp_srcptr np;
3570 mp_size_t nn;
3571 mp_limb_t d;
3572 #endif
3573 {
3574 mp_limb_t rlimb;
3575 mp_size_t i;
3576
3577 /* Develop integer part of quotient. */
3578 rlimb = __gmpn_divmod_1_internal (qp + qxn, np, nn, d);
3579
3580 /* Develop fraction part of quotient. This is not as fast as it should;
3581 the preinvert stuff from __gmpn_divmod_1_internal ought to be used here
3582 too. */
3583 if (UDIV_NEEDS_NORMALIZATION)
3584 {
3585 int normalization_steps;
3586
3587 count_leading_zeros (normalization_steps, d);
3588 if (normalization_steps != 0)
3589 {
3590 d <<= normalization_steps;
3591 rlimb <<= normalization_steps;
3592
3593 for (i = qxn - 1; i >= 0; i--)
3594 udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
3595
3596 return rlimb >> normalization_steps;
3597 }
3598 else
3599 /* fall out */
3600 ;
3601 }
3602
3603 for (i = qxn - 1; i >= 0; i--)
3604 udiv_qrnnd (qp[i], rlimb, rlimb, 0, d);
3605
3606 return rlimb;
3607 }
3608
3609 /* mpn_divrem_2 -- Divide natural numbers, producing both remainder and
3610 quotient. The divisor is two limbs. */
3611
3612 /* Divide num (NP/NSIZE) by den (DP/2) and write
3613 the NSIZE-2 least significant quotient limbs at QP
3614 and the 2 long remainder at NP. If QEXTRA_LIMBS is
3615 non-zero, generate that many fraction bits and append them after the
3616 other quotient limbs.
3617 Return the most significant limb of the quotient, this is always 0 or 1.
3618
3619 Preconditions:
3620 0. NSIZE >= 2.
3621 1. The most significant bit of the divisor must be set.
3622 2. QP must either not overlap with the input operands at all, or
3623 QP + 2 >= NP must hold true. (This means that it's
3624 possible to put the quotient in the high part of NUM, right after the
3625 remainder in NUM.
3626 3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero. */
3627
3628 mp_limb_t
3629 #if __STDC__
mpn_divrem_2(mp_ptr qp,mp_size_t qxn,mp_ptr np,mp_size_t nsize,mp_srcptr dp)3630 mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
3631 mp_ptr np, mp_size_t nsize,
3632 mp_srcptr dp)
3633 #else
3634 mpn_divrem_2 (qp, qxn, np, nsize, dp)
3635 mp_ptr qp;
3636 mp_size_t qxn;
3637 mp_ptr np;
3638 mp_size_t nsize;
3639 mp_srcptr dp;
3640 #endif
3641 {
3642 mp_limb_t most_significant_q_limb = 0;
3643 mp_size_t i;
3644 mp_limb_t n1, n0, n2;
3645 mp_limb_t d1, d0;
3646 mp_limb_t d1inv = 0;
3647 int have_preinv;
3648
3649 np += nsize - 2;
3650 d1 = dp[1];
3651 d0 = dp[0];
3652 n1 = np[1];
3653 n0 = np[0];
3654
3655 if (n1 >= d1 && (n1 > d1 || n0 >= d0))
3656 {
3657 sub_ddmmss (n1, n0, n1, n0, d1, d0);
3658 most_significant_q_limb = 1;
3659 }
3660
3661 /* If multiplication is much faster than division, preinvert the most
3662 significant divisor limb before entering the loop. */
3663 if (UDIV_TIME > 2 * UMUL_TIME + 6)
3664 {
3665 have_preinv = 0;
3666 if ((UDIV_TIME - (2 * UMUL_TIME + 6)) * (nsize - 2) > UDIV_TIME)
3667 {
3668 invert_limb (d1inv, d1);
3669 have_preinv = 1;
3670 }
3671 }
3672
3673 for (i = qxn + nsize - 2 - 1; i >= 0; i--)
3674 {
3675 mp_limb_t q;
3676 mp_limb_t r;
3677
3678 if (i >= qxn)
3679 np--;
3680 else
3681 np[0] = 0;
3682
3683 if (n1 == d1)
3684 {
3685 /* Q should be either 111..111 or 111..110. Need special treatment
3686 of this rare case as normal division would give overflow. */
3687 q = ~(mp_limb_t) 0;
3688
3689 r = n0 + d1;
3690 if (r < d1) /* Carry in the addition? */
3691 {
3692 add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
3693 qp[i] = q;
3694 continue;
3695 }
3696 n1 = d0 - (d0 != 0);
3697 n0 = -d0;
3698 }
3699 else
3700 {
3701 if (UDIV_TIME > 2 * UMUL_TIME + 6 && have_preinv)
3702 udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
3703 else
3704 udiv_qrnnd (q, r, n1, n0, d1);
3705 umul_ppmm (n1, n0, d0, q);
3706 }
3707
3708 n2 = np[0];
3709
3710 q_test:
3711 if (n1 > r || (n1 == r && n0 > n2))
3712 {
3713 /* The estimated Q was too large. */
3714 q--;
3715
3716 sub_ddmmss (n1, n0, n1, n0, 0, d0);
3717 r += d1;
3718 if (r >= d1) /* If not carry, test Q again. */
3719 goto q_test;
3720 }
3721
3722 qp[i] = q;
3723 sub_ddmmss (n1, n0, r, n2, n1, n0);
3724 }
3725 np[1] = n1;
3726 np[0] = n0;
3727
3728 return most_significant_q_limb;
3729 }
3730
3731 /* mpn_mul_basecase -- Internal routine to multiply two natural numbers
3732 of length m and n. */
3733
3734 /* Handle simple cases with traditional multiplication.
3735
3736 This is the most critical code of multiplication. All multiplies rely on
3737 this, both small and huge. Small ones arrive here immediately, huge ones
3738 arrive here as this is the base case for Karatsuba's recursive algorithm. */
3739
3740 void
3741 #if __STDC__
mpn_mul_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize)3742 mpn_mul_basecase (mp_ptr prodp,
3743 mp_srcptr up, mp_size_t usize,
3744 mp_srcptr vp, mp_size_t vsize)
3745 #else
3746 mpn_mul_basecase (prodp, up, usize, vp, vsize)
3747 mp_ptr prodp;
3748 mp_srcptr up;
3749 mp_size_t usize;
3750 mp_srcptr vp;
3751 mp_size_t vsize;
3752 #endif
3753 {
3754 /* We first multiply by the low order one or two limbs, as the result can
3755 be stored, not added, to PROD. We also avoid a loop for zeroing this
3756 way. */
3757 #if HAVE_NATIVE_mpn_mul_2
3758 if (vsize >= 2)
3759 {
3760 prodp[usize + 1] = mpn_mul_2 (prodp, up, usize, vp[0], vp[1]);
3761 prodp += 2, vp += 2, vsize -= 2;
3762 }
3763 else
3764 {
3765 prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
3766 return;
3767 }
3768 #else
3769 prodp[usize] = mpn_mul_1 (prodp, up, usize, vp[0]);
3770 prodp += 1, vp += 1, vsize -= 1;
3771 #endif
3772
3773 #if HAVE_NATIVE_mpn_addmul_2
3774 while (vsize >= 2)
3775 {
3776 prodp[usize + 1] = mpn_addmul_2 (prodp, up, usize, vp[0], vp[1]);
3777 prodp += 2, vp += 2, vsize -= 2;
3778 }
3779 if (vsize != 0)
3780 prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
3781 #else
3782 /* For each iteration in the loop, multiply U with one limb from V, and
3783 add the result to PROD. */
3784 while (vsize != 0)
3785 {
3786 prodp[usize] = mpn_addmul_1 (prodp, up, usize, vp[0]);
3787 prodp += 1, vp += 1, vsize -= 1;
3788 }
3789 #endif
3790 }
3791
3792
3793 /* mpn_sqr_basecase -- Internal routine to square two natural numbers
3794 of length m and n. */
3795
3796
3797 #define SQR_KARATSUBA_THRESHOLD KARATSUBA_SQR_THRESHOLD
3798
3799 void
mpn_sqr_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t n)3800 mpn_sqr_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t n)
3801 {
3802 mp_size_t i;
3803
3804 ASSERT (n >= 1);
3805 ASSERT (! MPN_OVERLAP_P (prodp, 2*n, up, n));
3806
3807 {
3808 /* N.B.! We need the superfluous indirection through argh to work around
3809 a reloader bug in GCC 2.7.*. */
3810 #if GMP_NAIL_BITS == 0
3811 mp_limb_t ul, argh;
3812 ul = up[0];
3813 umul_ppmm (argh, prodp[0], ul, ul);
3814 prodp[1] = argh;
3815 #else
3816 mp_limb_t ul, lpl;
3817 ul = up[0];
3818 umul_ppmm (prodp[1], lpl, ul, ul << GMP_NAIL_BITS);
3819 prodp[0] = lpl >> GMP_NAIL_BITS;
3820 #endif
3821 }
3822 if (n > 1)
3823 {
3824 mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
3825 mp_ptr tp = tarr;
3826 mp_limb_t cy;
3827
3828 /* must fit 2*n limbs in tarr */
3829 ASSERT (n <= SQR_KARATSUBA_THRESHOLD);
3830
3831 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
3832 tp[n - 1] = cy;
3833 for (i = 2; i < n; i++)
3834 {
3835 mp_limb_t cy;
3836 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
3837 tp[n + i - 2] = cy;
3838 }
3839 #if HAVE_NATIVE_mpn_sqr_diagonal
3840 mpn_sqr_diagonal (prodp + 2, up + 1, n - 1);
3841 #else
3842 for (i = 1; i < n; i++)
3843 {
3844 #if GMP_NAIL_BITS == 0
3845 mp_limb_t ul;
3846 ul = up[i];
3847 umul_ppmm (prodp[2 * i + 1], prodp[2 * i], ul, ul);
3848 #else
3849 mp_limb_t ul, lpl;
3850 ul = up[i];
3851 umul_ppmm (prodp[2 * i + 1], lpl, ul, ul << GMP_NAIL_BITS);
3852 prodp[2 * i] = lpl >> GMP_NAIL_BITS;
3853 #endif
3854 }
3855 #endif
3856 {
3857 mp_limb_t cy;
3858 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
3859 cy += mpn_add_n (prodp + 1, prodp + 1, tp, 2 * n - 2);
3860 prodp[2 * n - 1] += cy;
3861 }
3862 }
3863 }
3864
3865 #if 0
3866
3867 void
3868 #if __STDC__
3869 mpn_sqr_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t n)
3870 #else
3871 mpn_sqr_basecase (prodp, up, n)
3872 mp_ptr prodp;
3873 mp_srcptr up;
3874 mp_size_t n;
3875 #endif
3876 {
3877 mp_size_t i;
3878
3879 {
3880 /* N.B.! We need the superfluous indirection through argh to work around
3881 a reloader bug in GCC 2.7.*. */
3882 mp_limb_t x;
3883 mp_limb_t argh;
3884 x = up[0];
3885 umul_ppmm (argh, prodp[0], x, x);
3886 prodp[1] = argh;
3887 }
3888 if (n > 1)
3889 {
3890 mp_limb_t tarr[2 * KARATSUBA_SQR_THRESHOLD];
3891 mp_ptr tp = tarr;
3892 mp_limb_t cy;
3893
3894 /* must fit 2*n limbs in tarr */
3895 ASSERT (n <= KARATSUBA_SQR_THRESHOLD);
3896
3897 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
3898 tp[n - 1] = cy;
3899 for (i = 2; i < n; i++)
3900 {
3901 mp_limb_t cy;
3902 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
3903 tp[n + i - 2] = cy;
3904 }
3905 for (i = 1; i < n; i++)
3906 {
3907 mp_limb_t x;
3908 x = up[i];
3909 umul_ppmm (prodp[2 * i + 1], prodp[2 * i], x, x);
3910 }
3911 {
3912 mp_limb_t cy;
3913 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
3914 cy += mpn_add_n (prodp + 1, prodp + 1, tp, 2 * n - 2);
3915 prodp[2 * n - 1] += cy;
3916 }
3917 }
3918 }
3919
3920 #endif
3921
3922 /* mpn_mul_1 -- Multiply a limb vector with a single limb and
3923 store the product in a second limb vector. */
3924
3925 mp_limb_t
mpn_mul_1(mp_ptr res_ptr,mp_srcptr s1_ptr,mp_size_t s1_size,mp_limb_t s2_limb)3926 mpn_mul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_size_t s1_size, mp_limb_t s2_limb)
3927 {
3928 register mp_limb_t cy_limb;
3929 register mp_size_t j;
3930 register mp_limb_t prod_high, prod_low;
3931
3932 SCHEME_BIGNUM_USE_FUEL(s1_size);
3933
3934 /* The loop counter and index J goes from -S1_SIZE to -1. This way
3935 the loop becomes faster. */
3936 j = -s1_size;
3937
3938 /* Offset the base pointers to compensate for the negative indices. */
3939 s1_ptr -= j;
3940 res_ptr -= j;
3941
3942 cy_limb = 0;
3943 do
3944 {
3945 umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
3946
3947 prod_low += cy_limb;
3948 cy_limb = (prod_low < cy_limb) + prod_high;
3949
3950 res_ptr[j] = prod_low;
3951 }
3952 while (++j != 0);
3953
3954 return cy_limb;
3955 }
3956
3957 /* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
3958 by S2_LIMB, add the S1_SIZE least significant limbs of the product to the
3959 limb vector pointed to by RES_PTR. Return the most significant limb of
3960 the product, adjusted for carry-out from the addition. */
3961
3962 mp_limb_t
mpn_addmul_1(mp_ptr rp,mp_srcptr up,mp_size_t n,mp_limb_t vl)3963 mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
3964 {
3965 mp_limb_t ul, cl, hpl, lpl, rl;
3966
3967 SCHEME_BIGNUM_USE_FUEL(n);
3968
3969 cl = 0;
3970 do
3971 {
3972 ul = *up++;
3973 umul_ppmm (hpl, lpl, ul, vl);
3974
3975 lpl += cl;
3976 cl = (lpl < cl) + hpl;
3977
3978 rl = *rp;
3979 lpl = rl + lpl;
3980 cl += lpl < rl;
3981 *rp++ = lpl;
3982 }
3983 while (--n != 0);
3984
3985 return cl;
3986 }
3987
3988 /* mpn_submul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
3989 by S2_LIMB, subtract the S1_SIZE least significant limbs of the product
3990 from the limb vector pointed to by RES_PTR. Return the most significant
3991 limb of the product, adjusted for carry-out from the subtraction.
3992 */
3993
3994 mp_limb_t
mpn_submul_1(mp_ptr res_ptr,mp_srcptr s1_ptr,mp_size_t s1_size,mp_limb_t s2_limb)3995 mpn_submul_1 (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_size_t s1_size, mp_limb_t s2_limb)
3996 {
3997 register mp_limb_t cy_limb;
3998 register mp_size_t j;
3999 register mp_limb_t prod_high, prod_low;
4000 register mp_limb_t x;
4001
4002 SCHEME_BIGNUM_USE_FUEL(s1_size);
4003
4004 /* The loop counter and index J goes from -SIZE to -1. This way
4005 the loop becomes faster. */
4006 j = -s1_size;
4007
4008 /* Offset the base pointers to compensate for the negative indices. */
4009 res_ptr -= j;
4010 s1_ptr -= j;
4011
4012 cy_limb = 0;
4013 do
4014 {
4015 umul_ppmm (prod_high, prod_low, s1_ptr[j], s2_limb);
4016
4017 prod_low += cy_limb;
4018 cy_limb = (prod_low < cy_limb) + prod_high;
4019
4020 x = res_ptr[j];
4021 prod_low = x - prod_low;
4022 cy_limb += (prod_low > x);
4023 res_ptr[j] = prod_low;
4024 }
4025 while (++j != 0);
4026
4027 return cy_limb;
4028 }
4029
4030 /* mpn_divexact_by3 -- mpn division by 3, expecting no remainder. */
4031
4032
4033 /* Multiplicative inverse of 3, modulo 2^BITS_PER_MP_LIMB.
4034 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. */
4035 #define INVERSE_3 ((MP_LIMB_T_MAX / 3) * 2 + 1)
4036
4037
4038 /* The "c += ..."s are adding the high limb of 3*l to c. That high limb
4039 will be 0, 1 or 2. Doing two separate "+="s seems to turn out better
4040 code on gcc (as of 2.95.2 at least).
4041
4042 When a subtraction of a 0,1,2 carry value causes a borrow, that leaves a
4043 limb value of either 0xFF...FF or 0xFF...FE and the multiply by INVERSE_3
4044 gives 0x55...55 or 0xAA...AA respectively, producing a further borrow of
4045 only 0 or 1 respectively. Hence the carry out of each stage and for the
4046 return value is always only 0, 1 or 2. */
4047
4048 mp_limb_t
4049 #if __STDC__
mpn_divexact_by3c(mp_ptr dst,mp_srcptr src,mp_size_t size,mp_limb_t c)4050 mpn_divexact_by3c (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t c)
4051 #else
4052 mpn_divexact_by3c (dst, src, size, c)
4053 mp_ptr dst;
4054 mp_srcptr src;
4055 mp_size_t size;
4056 mp_limb_t c;
4057 #endif
4058 {
4059 mp_size_t i;
4060
4061 SCHEME_BIGNUM_USE_FUEL(size);
4062
4063 ASSERT (size >= 1);
4064
4065 i = 0;
4066 do
4067 {
4068 mp_limb_t l, s;
4069
4070 s = src[i];
4071 l = s - c;
4072 c = (l > s);
4073
4074 l *= INVERSE_3;
4075 dst[i] = l;
4076
4077 c += (l > MP_LIMB_T_MAX/3);
4078 c += (l > (MP_LIMB_T_MAX/3)*2);
4079 }
4080 while (++i < size);
4081
4082 return c;
4083 }
4084
4085 /* mpn_mul -- Multiply two natural numbers. */
4086
4087 /* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
4088 (pointed to by VP, with VN limbs), and store the result at PRODP. The
4089 result is UN + VN limbs. Return the most significant limb of the result.
4090
4091 NOTE: The space pointed to by PRODP is overwritten before finished with U
4092 and V, so overlap is an error.
4093
4094 Argument constraints:
4095 1. UN >= VN.
4096 2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
4097 the multiplier and the multiplicand. */
4098
4099 void
4100 #if __STDC__
mpn_sqr_n(mp_ptr prodp,mp_srcptr up,mp_size_t un)4101 mpn_sqr_n (mp_ptr prodp,
4102 mp_srcptr up, mp_size_t un)
4103 #else
4104 mpn_sqr_n (prodp, up, un)
4105 mp_ptr prodp;
4106 mp_srcptr up;
4107 mp_size_t un;
4108 #endif
4109 {
4110 if (un < KARATSUBA_SQR_THRESHOLD)
4111 { /* plain schoolbook multiplication */
4112 if (un == 0)
4113 return;
4114 mpn_sqr_basecase (prodp, up, un);
4115 }
4116 else if (un < TOOM3_SQR_THRESHOLD)
4117 { /* karatsuba multiplication */
4118 mp_ptr tspace;
4119 TMP_DECL (marker);
4120 TMP_MARK (marker);
4121 tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB);
4122 mpn_kara_sqr_n (prodp, up, un, tspace);
4123 TMP_FREE (marker);
4124 }
4125 #if WANT_FFT || TUNE_PROGRAM_BUILD
4126 else if (un < FFT_SQR_THRESHOLD)
4127 #else
4128 else
4129 #endif
4130 { /* toom3 multiplication */
4131 mp_ptr tspace;
4132 TMP_DECL (marker);
4133 TMP_MARK (marker);
4134 tspace = (mp_ptr) TMP_ALLOC (2 * (un + BITS_PER_MP_LIMB) * BYTES_PER_MP_LIMB);
4135 mpn_toom3_sqr_n (prodp, up, un, tspace);
4136 TMP_FREE (marker);
4137 }
4138 #if WANT_FFT || TUNE_PROGRAM_BUILD
4139 else
4140 {
4141 /* schoenhage multiplication */
4142 mpn_mul_fft_full (prodp, up, un, up, un);
4143 }
4144 #endif
4145 }
4146
4147 mp_limb_t
4148 #if __STDC__
mpn_mul(mp_ptr prodp,mp_srcptr up,mp_size_t un,mp_srcptr vp,mp_size_t vn)4149 mpn_mul (mp_ptr prodp,
4150 mp_srcptr up, mp_size_t un,
4151 mp_srcptr vp, mp_size_t vn)
4152 #else
4153 mpn_mul (prodp, up, un, vp, vn)
4154 mp_ptr prodp;
4155 mp_srcptr up;
4156 mp_size_t un;
4157 mp_srcptr vp;
4158 mp_size_t vn;
4159 #endif
4160 {
4161 mp_size_t l;
4162 mp_limb_t c;
4163
4164 if (up == vp && un == vn)
4165 {
4166 mpn_sqr_n (prodp, up, un);
4167 return prodp[2 * un - 1];
4168 }
4169
4170 if (vn < KARATSUBA_MUL_THRESHOLD)
4171 { /* long multiplication */
4172 mpn_mul_basecase (prodp, up, un, vp, vn);
4173 return prodp[un + vn - 1];
4174 }
4175
4176 mpn_mul_n (prodp, up, vp, vn);
4177 if (un != vn)
4178 { mp_limb_t t;
4179 mp_ptr ws;
4180 TMP_DECL (marker);
4181 TMP_MARK (marker);
4182
4183 prodp += vn;
4184 l = vn;
4185 up += vn;
4186 un -= vn;
4187
4188 if (un < vn)
4189 {
4190 /* Swap u's and v's. */
4191 MPN_SRCPTR_SWAP (up,un, vp,vn);
4192 }
4193
4194 ws = (mp_ptr) TMP_ALLOC (((vn >= KARATSUBA_MUL_THRESHOLD ? vn : un) + vn)
4195 * BYTES_PER_MP_LIMB);
4196
4197 t = 0;
4198 while (vn >= KARATSUBA_MUL_THRESHOLD)
4199 {
4200 mpn_mul_n (ws, up, vp, vn);
4201 if (l <= 2*vn)
4202 {
4203 t += mpn_add_n (prodp, prodp, ws, l);
4204 if (l != 2*vn)
4205 {
4206 t = mpn_add_1 (prodp + l, ws + l, 2*vn - l, t);
4207 l = 2*vn;
4208 }
4209 }
4210 else
4211 {
4212 c = mpn_add_n (prodp, prodp, ws, 2*vn);
4213 t += mpn_add_1 (prodp + 2*vn, prodp + 2*vn, l - 2*vn, c);
4214 }
4215 prodp += vn;
4216 l -= vn;
4217 up += vn;
4218 un -= vn;
4219 if (un < vn)
4220 {
4221 /* Swap u's and v's. */
4222 MPN_SRCPTR_SWAP (up,un, vp,vn);
4223 }
4224 }
4225
4226 if (vn)
4227 {
4228 mpn_mul_basecase (ws, up, un, vp, vn);
4229 if (l <= un + vn)
4230 {
4231 t += mpn_add_n (prodp, prodp, ws, l);
4232 if (l != un + vn)
4233 t = mpn_add_1 (prodp + l, ws + l, un + vn - l, t);
4234 }
4235 else
4236 {
4237 c = mpn_add_n (prodp, prodp, ws, un + vn);
4238 t += mpn_add_1 (prodp + un + vn, prodp + un + vn, l - un - vn, c);
4239 }
4240 }
4241
4242 TMP_FREE (marker);
4243 }
4244 return prodp[un + vn - 1];
4245 }
4246
4247 /* mpn_divrem -- Divide natural numbers, producing both remainder and
4248 quotient. This is now just a middle layer for calling the new
4249 internal mpn_tdiv_qr. */
4250
4251 mp_limb_t
4252 #if __STDC__
mpn_divrem(mp_ptr qp,mp_size_t qxn,mp_ptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)4253 mpn_divrem (mp_ptr qp, mp_size_t qxn,
4254 mp_ptr np, mp_size_t nn,
4255 mp_srcptr dp, mp_size_t dn)
4256 #else
4257 mpn_divrem (qp, qxn, np, nn, dp, dn)
4258 mp_ptr qp;
4259 mp_size_t qxn;
4260 mp_ptr np;
4261 mp_size_t nn;
4262 mp_srcptr dp;
4263 mp_size_t dn;
4264 #endif
4265 {
4266 SCHEME_BIGNUM_USE_FUEL(dn + nn);
4267
4268 if (dn == 1)
4269 {
4270 mp_limb_t ret;
4271 mp_ptr q2p;
4272 mp_size_t qn;
4273 TMP_DECL (marker);
4274
4275 TMP_MARK (marker);
4276 q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
4277
4278 np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
4279 qn = nn + qxn - 1;
4280 MPN_COPY (qp, q2p, qn);
4281 ret = q2p[qn];
4282
4283 TMP_FREE (marker);
4284 return ret;
4285 }
4286 else if (dn == 2)
4287 {
4288 return mpn_divrem_2 (qp, qxn, np, nn, dp);
4289 }
4290 else
4291 {
4292 mp_ptr rp, q2p;
4293 mp_limb_t qhl;
4294 mp_size_t qn;
4295 TMP_DECL (marker);
4296
4297 TMP_MARK (marker);
4298 if (qxn != 0)
4299 {
4300 mp_ptr n2p;
4301 n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
4302 MPN_ZERO (n2p, qxn);
4303 MPN_COPY (n2p + qxn, np, nn);
4304 q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
4305 rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
4306 mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
4307 MPN_COPY (np, rp, dn);
4308 qn = nn - dn + qxn;
4309 MPN_COPY (qp, q2p, qn);
4310 qhl = q2p[qn];
4311 }
4312 else
4313 {
4314 q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
4315 rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
4316 mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
4317 MPN_COPY (np, rp, dn); /* overwrite np area with remainder */
4318 qn = nn - dn;
4319 MPN_COPY (qp, q2p, qn);
4320 qhl = q2p[qn];
4321 }
4322 TMP_FREE (marker);
4323 return qhl;
4324 }
4325 }
4326
4327 /************************************************************************/
4328 /* GCD: */
4329
4330 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
4331 mpn_mod_1 (src, size, divisor)
4332
4333
4334
4335 /* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
4336 meaning the quotient size where that should happen, the quotient size
4337 being how many udiv divisions will be done.
4338
4339 The default is to use preinv always, CPUs where this doesn't suit have
4340 tuned thresholds. Note in particular that preinv should certainly be
4341 used if that's the only division available (USE_PREINV_ALWAYS). */
4342
4343 #ifndef MOD_1_NORM_THRESHOLD
4344 #define MOD_1_NORM_THRESHOLD 0
4345 #endif
4346 #ifndef MOD_1_UNNORM_THRESHOLD
4347 #define MOD_1_UNNORM_THRESHOLD 0
4348 #endif
4349
4350
4351 /* The comments in mpn/generic/divrem_1.c apply here too.
4352
4353 As noted in the algorithms section of the manual, the shifts in the loop
4354 for the unnorm case can be avoided by calculating r = a%(d*2^n), followed
4355 by a final (r*2^n)%(d*2^n). In fact if it happens that a%(d*2^n) can
4356 skip a division where (a*2^n)%(d*2^n) can't then there's the same number
4357 of divide steps, though how often that happens depends on the assumed
4358 distributions of dividend and divisor. In any case this idea is left to
4359 CPU specific implementations to consider. */
4360
4361 mp_limb_t
mpn_mod_1(mp_srcptr up,mp_size_t un,mp_limb_t d)4362 mpn_mod_1 (mp_srcptr up, mp_size_t un, mp_limb_t d)
4363 {
4364 mp_size_t i;
4365 mp_limb_t n1, n0, r;
4366 mp_limb_t dummy USED_ONLY_SOMETIMES;
4367
4368 ASSERT (un >= 0);
4369 ASSERT (d != 0);
4370
4371 /* Botch: Should this be handled at all? Rely on callers?
4372 But note un==0 is currently required by mpz/fdiv_r_ui.c and possibly
4373 other places. */
4374 if (un == 0)
4375 return 0;
4376
4377 d <<= GMP_NAIL_BITS;
4378
4379 if ((d & GMP_LIMB_HIGHBIT) != 0)
4380 {
4381 /* High limb is initial remainder, possibly with one subtract of
4382 d to get r<d. */
4383 r = up[un - 1] << GMP_NAIL_BITS;
4384 if (r >= d)
4385 r -= d;
4386 r >>= GMP_NAIL_BITS;
4387 un--;
4388 if (un == 0)
4389 return r;
4390
4391 if (BELOW_THRESHOLD (un, MOD_1_NORM_THRESHOLD))
4392 {
4393 plain:
4394 for (i = un - 1; i >= 0; i--)
4395 {
4396 n0 = up[i] << GMP_NAIL_BITS;
4397 udiv_qrnnd (dummy, r, r, n0, d);
4398 r >>= GMP_NAIL_BITS;
4399 }
4400 return r;
4401 }
4402 else
4403 {
4404 mp_limb_t inv;
4405 invert_limb (inv, d);
4406 for (i = un - 1; i >= 0; i--)
4407 {
4408 n0 = up[i] << GMP_NAIL_BITS;
4409 udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
4410 r >>= GMP_NAIL_BITS;
4411 }
4412 return r;
4413 }
4414 }
4415 else
4416 {
4417 int norm;
4418
4419 /* Skip a division if high < divisor. Having the test here before
4420 normalizing will still skip as often as possible. */
4421 r = up[un - 1] << GMP_NAIL_BITS;
4422 if (r < d)
4423 {
4424 r >>= GMP_NAIL_BITS;
4425 un--;
4426 if (un == 0)
4427 return r;
4428 }
4429 else
4430 r = 0;
4431
4432 /* If udiv_qrnnd doesn't need a normalized divisor, can use the simple
4433 code above. */
4434 if (! UDIV_NEEDS_NORMALIZATION
4435 && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
4436 goto plain;
4437
4438 count_leading_zeros (norm, d);
4439 d <<= norm;
4440
4441 n1 = up[un - 1] << GMP_NAIL_BITS;
4442 r = (r << norm) | (n1 >> (GMP_LIMB_BITS - norm));
4443
4444 if (UDIV_NEEDS_NORMALIZATION
4445 && BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
4446 {
4447 for (i = un - 2; i >= 0; i--)
4448 {
4449 n0 = up[i] << GMP_NAIL_BITS;
4450 udiv_qrnnd (dummy, r, r,
4451 (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
4452 d);
4453 r >>= GMP_NAIL_BITS;
4454 n1 = n0;
4455 }
4456 udiv_qrnnd (dummy, r, r, n1 << norm, d);
4457 r >>= GMP_NAIL_BITS;
4458 return r >> norm;
4459 }
4460 else
4461 {
4462 mp_limb_t inv;
4463 invert_limb (inv, d);
4464
4465 for (i = un - 2; i >= 0; i--)
4466 {
4467 n0 = up[i] << GMP_NAIL_BITS;
4468 udiv_qrnnd_preinv (dummy, r, r,
4469 (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
4470 d, inv);
4471 r >>= GMP_NAIL_BITS;
4472 n1 = n0;
4473 }
4474 udiv_qrnnd_preinv (dummy, r, r, n1 << norm, d, inv);
4475 r >>= GMP_NAIL_BITS;
4476 return r >> norm;
4477 }
4478 }
4479 }
4480
4481 /* Does not work for U == 0 or V == 0. It would be tough to make it work for
4482 V == 0 since gcd(x,0) = x, and U does not generally fit in an mp_limb_t.
4483
4484 The threshold for doing u%v when size==1 will vary by CPU according to
4485 the speed of a division and the code generated for the main loop. Any
4486 tuning for this is left to a CPU specific implementation. */
4487
4488 mp_limb_t
mpn_gcd_1(mp_srcptr up,mp_size_t size,mp_limb_t vlimb)4489 mpn_gcd_1 (mp_srcptr up, mp_size_t size, mp_limb_t vlimb)
4490 {
4491 mp_limb_t ulimb;
4492 unsigned long zero_bits, u_low_zero_bits;
4493
4494 ASSERT (size >= 1);
4495 ASSERT (vlimb != 0);
4496 /* ASSERT_MPN_NONZERO_P (up, size); */
4497
4498 ulimb = up[0];
4499
4500 /* Need vlimb odd for modexact, want it odd to get common zeros. */
4501 count_trailing_zeros (zero_bits, vlimb);
4502 vlimb >>= zero_bits;
4503
4504 if (size > 1)
4505 {
4506 /* Must get common zeros before the mod reduction. If ulimb==0 then
4507 vlimb already gives the common zeros. */
4508 if (ulimb != 0)
4509 {
4510 count_trailing_zeros (u_low_zero_bits, ulimb);
4511 zero_bits = MIN (zero_bits, u_low_zero_bits);
4512 }
4513
4514 ulimb = MPN_MOD_OR_MODEXACT_1_ODD (up, size, vlimb);
4515 if (ulimb == 0)
4516 goto done;
4517
4518 goto strip_u_maybe;
4519 }
4520
4521 /* size==1, so up[0]!=0 */
4522 count_trailing_zeros (u_low_zero_bits, ulimb);
4523 ulimb >>= u_low_zero_bits;
4524 zero_bits = MIN (zero_bits, u_low_zero_bits);
4525
4526 /* make u bigger */
4527 if (vlimb > ulimb)
4528 MP_LIMB_T_SWAP (ulimb, vlimb);
4529
4530 /* if u is much bigger than v, reduce using a division rather than
4531 chipping away at it bit-by-bit */
4532 if ((ulimb >> 16) > vlimb)
4533 {
4534 ulimb %= vlimb;
4535 if (ulimb == 0)
4536 goto done;
4537 goto strip_u_maybe;
4538 }
4539
4540 while (ulimb != vlimb)
4541 {
4542 ASSERT (ulimb & 1);
4543 ASSERT (vlimb & 1);
4544
4545 if (ulimb > vlimb)
4546 {
4547 ulimb -= vlimb;
4548 do
4549 {
4550 ulimb >>= 1;
4551 ASSERT (ulimb != 0);
4552 strip_u_maybe:
4553 ;
4554 }
4555 while ((ulimb & 1) == 0);
4556 }
4557 else /* vlimb > ulimb. */
4558 {
4559 vlimb -= ulimb;
4560 do
4561 {
4562 vlimb >>= 1;
4563 ASSERT (vlimb != 0);
4564 }
4565 while ((vlimb & 1) == 0);
4566 }
4567 }
4568
4569 done:
4570 return vlimb << zero_bits;
4571 }
4572
4573
4574 /* Integer greatest common divisor of two unsigned integers, using
4575 the accelerated algorithm (see reference below).
4576
4577 mp_size_t mpn_gcd (up, usize, vp, vsize).
4578
4579 Preconditions [U = (up, usize) and V = (vp, vsize)]:
4580
4581 1. V is odd.
4582 2. numbits(U) >= numbits(V).
4583
4584 Both U and V are destroyed by the operation. The result is left at vp,
4585 and its size is returned.
4586
4587 Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
4588
4589 Funding for this work has been partially provided by Conselho Nacional
4590 de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
4591 301314194-2, and was done while I was a visiting reseacher in the Instituto
4592 de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
4593
4594 Refer to
4595 K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
4596 Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
4597
4598 /* If MIN (usize, vsize) >= GCD_ACCEL_THRESHOLD, then the accelerated
4599 algorithm is used, otherwise the binary algorithm is used. This may be
4600 adjusted for different architectures. */
4601 #ifndef GCD_ACCEL_THRESHOLD
4602 #define GCD_ACCEL_THRESHOLD 5
4603 #endif
4604
4605 /* When U and V differ in size by more than BMOD_THRESHOLD, the accelerated
4606 algorithm reduces using the bmod operation. Otherwise, the k-ary reduction
4607 is used. 0 <= BMOD_THRESHOLD < GMP_NUMB_BITS. */
4608 enum
4609 {
4610 BMOD_THRESHOLD = GMP_NUMB_BITS/2
4611 };
4612
4613
4614 /* Use binary algorithm to compute V <-- GCD (V, U) for usize, vsize == 2.
4615 Both U and V must be odd. */
4616 static inline mp_size_t
gcd_2(mp_ptr vp,mp_srcptr up)4617 gcd_2 (mp_ptr vp, mp_srcptr up)
4618 {
4619 mp_limb_t u0, u1, v0, v1;
4620 mp_size_t vsize;
4621
4622 u0 = up[0];
4623 u1 = up[1];
4624 v0 = vp[0];
4625 v1 = vp[1];
4626
4627 while (u1 != v1 && u0 != v0)
4628 {
4629 unsigned long int r;
4630 if (u1 > v1)
4631 {
4632 u1 -= v1 + (u0 < v0);
4633 u0 = (u0 - v0) & GMP_NUMB_MASK;
4634 count_trailing_zeros (r, u0);
4635 u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
4636 u1 >>= r;
4637 }
4638 else /* u1 < v1. */
4639 {
4640 v1 -= u1 + (v0 < u0);
4641 v0 = (v0 - u0) & GMP_NUMB_MASK;
4642 count_trailing_zeros (r, v0);
4643 v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
4644 v1 >>= r;
4645 }
4646 }
4647
4648 vp[0] = v0, vp[1] = v1, vsize = 1 + (v1 != 0);
4649
4650 /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
4651 if (u1 == v1 && u0 == v0)
4652 return vsize;
4653
4654 v0 = (u0 == v0) ? (u1 > v1) ? u1-v1 : v1-u1 : (u0 > v0) ? u0-v0 : v0-u0;
4655 vp[0] = mpn_gcd_1 (vp, vsize, v0);
4656
4657 return 1;
4658 }
4659
4660 /* The function find_a finds 0 < N < 2^GMP_NUMB_BITS such that there exists
4661 0 < |D| < 2^GMP_NUMB_BITS, and N == D * C mod 2^(2*GMP_NUMB_BITS).
4662 In the reference article, D was computed along with N, but it is better to
4663 compute D separately as D <-- N / C mod 2^(GMP_NUMB_BITS + 1), treating
4664 the result as a twos' complement signed integer.
4665
4666 Initialize N1 to C mod 2^(2*GMP_NUMB_BITS). According to the reference
4667 article, N2 should be initialized to 2^(2*GMP_NUMB_BITS), but we use
4668 2^(2*GMP_NUMB_BITS) - N1 to start the calculations within double
4669 precision. If N2 > N1 initially, the first iteration of the while loop
4670 will swap them. In all other situations, N1 >= N2 is maintained. */
4671
4672 #if HAVE_NATIVE_mpn_gcd_finda
4673 #define find_a(cp) mpn_gcd_finda (cp)
4674
4675 #else
4676 static
4677 #if ! defined (__i386__)
4678 inline /* don't inline this for the x86 */
4679 #endif
4680 mp_limb_t
find_a(mp_srcptr cp)4681 find_a (mp_srcptr cp)
4682 {
4683 unsigned long int leading_zero_bits = 0;
4684
4685 mp_limb_t n1_l = cp[0]; /* N1 == n1_h * 2^GMP_NUMB_BITS + n1_l. */
4686 mp_limb_t n1_h = cp[1];
4687
4688 mp_limb_t n2_l = (-n1_l & GMP_NUMB_MASK); /* N2 == n2_h * 2^GMP_NUMB_BITS + n2_l. */
4689 mp_limb_t n2_h = (~n1_h & GMP_NUMB_MASK);
4690
4691 /* Main loop. */
4692 while (n2_h != 0) /* While N2 >= 2^GMP_NUMB_BITS. */
4693 {
4694 /* N1 <-- N1 % N2. */
4695 if (((GMP_NUMB_HIGHBIT >> leading_zero_bits) & n2_h) == 0)
4696 {
4697 unsigned long int i;
4698 count_leading_zeros (i, n2_h);
4699 i -= GMP_NAIL_BITS;
4700 i -= leading_zero_bits;
4701 leading_zero_bits += i;
4702 n2_h = ((n2_h << i) & GMP_NUMB_MASK) | (n2_l >> (GMP_NUMB_BITS - i));
4703 n2_l = (n2_l << i) & GMP_NUMB_MASK;
4704 do
4705 {
4706 if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
4707 {
4708 n1_h -= n2_h + (n1_l < n2_l);
4709 n1_l = (n1_l - n2_l) & GMP_NUMB_MASK;
4710 }
4711 n2_l = (n2_l >> 1) | ((n2_h << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK);
4712 n2_h >>= 1;
4713 i -= 1;
4714 }
4715 while (i != 0);
4716 }
4717 if (n1_h > n2_h || (n1_h == n2_h && n1_l >= n2_l))
4718 {
4719 n1_h -= n2_h + (n1_l < n2_l);
4720 n1_l = (n1_l - n2_l) & GMP_NUMB_MASK;
4721 }
4722
4723 MP_LIMB_T_SWAP (n1_h, n2_h);
4724 MP_LIMB_T_SWAP (n1_l, n2_l);
4725 }
4726
4727 return n2_l;
4728 }
4729 #endif
4730
4731
4732 mp_size_t
mpn_gcd(mp_ptr gp,mp_ptr up,mp_size_t usize,mp_ptr vp,mp_size_t vsize)4733 mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)
4734 {
4735 mp_ptr orig_vp = vp;
4736 mp_size_t orig_vsize = vsize;
4737 int binary_gcd_ctr; /* Number of times binary gcd will execute. */
4738 TMP_DECL (marker);
4739
4740 ASSERT (usize >= 1);
4741 ASSERT (vsize >= 1);
4742 ASSERT (usize >= vsize);
4743 ASSERT (vp[0] & 1);
4744 ASSERT (up[usize - 1] != 0);
4745 ASSERT (vp[vsize - 1] != 0);
4746 #if WANT_ASSERT
4747 if (usize == vsize)
4748 {
4749 int uzeros, vzeros;
4750 count_leading_zeros (uzeros, up[usize - 1]);
4751 count_leading_zeros (vzeros, vp[vsize - 1]);
4752 ASSERT (uzeros <= vzeros);
4753 }
4754 #endif
4755 ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
4756 ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, vsize, up, usize));
4757 ASSERT (MPN_SAME_OR_SEPARATE2_P (gp, vsize, vp, vsize));
4758
4759 TMP_MARK (marker);
4760
4761 /* Use accelerated algorithm if vsize is over GCD_ACCEL_THRESHOLD.
4762 Two EXTRA limbs for U and V are required for kary reduction. */
4763 if (vsize >= GCD_ACCEL_THRESHOLD)
4764 {
4765 unsigned long int vbitsize, d;
4766 mp_ptr orig_up = up;
4767 mp_size_t orig_usize = usize;
4768 mp_ptr anchor_up = (mp_ptr) TMP_ALLOC ((usize + 2) * BYTES_PER_MP_LIMB);
4769
4770 MPN_COPY (anchor_up, orig_up, usize);
4771 up = anchor_up;
4772
4773 count_leading_zeros (d, up[usize - 1]);
4774 d -= GMP_NAIL_BITS;
4775 d = usize * GMP_NUMB_BITS - d;
4776 count_leading_zeros (vbitsize, vp[vsize - 1]);
4777 vbitsize -= GMP_NAIL_BITS;
4778 vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
4779 ASSERT (d >= vbitsize);
4780 d = d - vbitsize + 1;
4781
4782 /* Use bmod reduction to quickly discover whether V divides U. */
4783 up[usize++] = 0; /* Insert leading zero. */
4784 mpn_bdivmod (up, up, usize, vp, vsize, d);
4785
4786 /* Now skip U/V mod 2^d and any low zero limbs. */
4787 d /= GMP_NUMB_BITS, up += d, usize -= d;
4788 while (usize != 0 && up[0] == 0)
4789 up++, usize--;
4790
4791 if (usize == 0) /* GCD == ORIG_V. */
4792 goto done;
4793
4794 vp = (mp_ptr) TMP_ALLOC ((vsize + 2) * BYTES_PER_MP_LIMB);
4795 MPN_COPY (vp, orig_vp, vsize);
4796
4797 do /* Main loop. */
4798 {
4799 /* mpn_com_n can't be used here because anchor_up and up may
4800 partially overlap */
4801 if ((up[usize - 1] & GMP_NUMB_HIGHBIT) != 0) /* U < 0; take twos' compl. */
4802 {
4803 mp_size_t i;
4804 anchor_up[0] = -up[0] & GMP_NUMB_MASK;
4805 for (i = 1; i < usize; i++)
4806 anchor_up[i] = (~up[i] & GMP_NUMB_MASK);
4807 up = anchor_up;
4808 }
4809
4810 MPN_NORMALIZE_NOT_ZERO (up, usize);
4811
4812 if ((up[0] & 1) == 0) /* Result even; remove twos. */
4813 {
4814 unsigned int r;
4815 count_trailing_zeros (r, up[0]);
4816 mpn_rshift (anchor_up, up, usize, r);
4817 usize -= (anchor_up[usize - 1] == 0);
4818 }
4819 else if (anchor_up != up)
4820 MPN_COPY_INCR (anchor_up, up, usize);
4821
4822 MPN_PTR_SWAP (anchor_up,usize, vp,vsize);
4823 up = anchor_up;
4824
4825 if (vsize <= 2) /* Kary can't handle < 2 limbs and */
4826 break; /* isn't efficient for == 2 limbs. */
4827
4828 d = vbitsize;
4829 count_leading_zeros (vbitsize, vp[vsize - 1]);
4830 vbitsize -= GMP_NAIL_BITS;
4831 vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
4832 d = d - vbitsize + 1;
4833
4834 if (d > BMOD_THRESHOLD) /* Bmod reduction. */
4835 {
4836 up[usize++] = 0;
4837 mpn_bdivmod (up, up, usize, vp, vsize, d);
4838 d /= GMP_NUMB_BITS, up += d, usize -= d;
4839 }
4840 else /* Kary reduction. */
4841 {
4842 mp_limb_t bp[2], cp[2];
4843
4844 /* C <-- V/U mod 2^(2*GMP_NUMB_BITS). */
4845 {
4846 mp_limb_t u_inv, hi, lo;
4847 modlimb_invert (u_inv, up[0]);
4848 cp[0] = (vp[0] * u_inv) & GMP_NUMB_MASK;
4849 umul_ppmm (hi, lo, cp[0], up[0] << GMP_NAIL_BITS);
4850 lo >>= GMP_NAIL_BITS;
4851 cp[1] = (vp[1] - hi - cp[0] * up[1]) * u_inv & GMP_NUMB_MASK;
4852 }
4853
4854 /* U <-- find_a (C) * U. */
4855 up[usize] = mpn_mul_1 (up, up, usize, find_a (cp));
4856 usize++;
4857
4858 /* B <-- A/C == U/V mod 2^(GMP_NUMB_BITS + 1).
4859 bp[0] <-- U/V mod 2^GMP_NUMB_BITS and
4860 bp[1] <-- ( (U - bp[0] * V)/2^GMP_NUMB_BITS ) / V mod 2
4861
4862 Like V/U above, but simplified because only the low bit of
4863 bp[1] is wanted. */
4864 {
4865 mp_limb_t v_inv, hi, lo;
4866 modlimb_invert (v_inv, vp[0]);
4867 bp[0] = (up[0] * v_inv) & GMP_NUMB_MASK;
4868 umul_ppmm (hi, lo, bp[0], vp[0] << GMP_NAIL_BITS);
4869 lo >>= GMP_NAIL_BITS;
4870 bp[1] = (up[1] + hi + (bp[0] & vp[1])) & 1;
4871 }
4872
4873 up[usize++] = 0;
4874 if (bp[1] != 0) /* B < 0: U <-- U + (-B) * V. */
4875 {
4876 mp_limb_t c = mpn_addmul_1 (up, vp, vsize, -bp[0] & GMP_NUMB_MASK);
4877 mpn_add_1 (up + vsize, up + vsize, usize - vsize, c);
4878 }
4879 else /* B >= 0: U <-- U - B * V. */
4880 {
4881 mp_limb_t b = mpn_submul_1 (up, vp, vsize, bp[0]);
4882 mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
4883 }
4884
4885 up += 2, usize -= 2; /* At least two low limbs are zero. */
4886 }
4887
4888 /* Must remove low zero limbs before complementing. */
4889 while (usize != 0 && up[0] == 0)
4890 up++, usize--;
4891 }
4892 while (usize != 0);
4893
4894 /* Compute GCD (ORIG_V, GCD (ORIG_U, V)). Binary will execute twice. */
4895 up = orig_up, usize = orig_usize;
4896 binary_gcd_ctr = 2;
4897 }
4898 else
4899 binary_gcd_ctr = 1;
4900
4901 /* Finish up with the binary algorithm. Executes once or twice. */
4902 for ( ; binary_gcd_ctr--; up = orig_vp, usize = orig_vsize)
4903 {
4904 if (usize > 2) /* First make U close to V in size. */
4905 {
4906 unsigned long int vbitsize, d;
4907 count_leading_zeros (d, up[usize - 1]);
4908 d -= GMP_NAIL_BITS;
4909 d = usize * GMP_NUMB_BITS - d;
4910 count_leading_zeros (vbitsize, vp[vsize - 1]);
4911 vbitsize -= GMP_NAIL_BITS;
4912 vbitsize = vsize * GMP_NUMB_BITS - vbitsize;
4913 d = d - vbitsize - 1;
4914 if (d != -(unsigned long int)1 && d > 2)
4915 {
4916 mpn_bdivmod (up, up, usize, vp, vsize, d); /* Result > 0. */
4917 d /= (unsigned long int)GMP_NUMB_BITS, up += d, usize -= d;
4918 }
4919 }
4920
4921 /* Start binary GCD. */
4922 do
4923 {
4924 mp_size_t zeros;
4925
4926 /* Make sure U is odd. */
4927 MPN_NORMALIZE (up, usize);
4928 while (up[0] == 0)
4929 up += 1, usize -= 1;
4930 if ((up[0] & 1) == 0)
4931 {
4932 unsigned int r;
4933 count_trailing_zeros (r, up[0]);
4934 mpn_rshift (up, up, usize, r);
4935 usize -= (up[usize - 1] == 0);
4936 }
4937
4938 /* Keep usize >= vsize. */
4939 if (usize < vsize)
4940 MPN_PTR_SWAP (up, usize, vp, vsize);
4941
4942 if (usize <= 2) /* Double precision. */
4943 {
4944 if (vsize == 1)
4945 vp[0] = mpn_gcd_1 (up, usize, vp[0]);
4946 else
4947 vsize = gcd_2 (vp, up);
4948 break; /* Binary GCD done. */
4949 }
4950
4951 /* Count number of low zero limbs of U - V. */
4952 for (zeros = 0; up[zeros] == vp[zeros] && ++zeros != vsize; )
4953 continue;
4954
4955 /* If U < V, swap U and V; in any case, subtract V from U. */
4956 if (zeros == vsize) /* Subtract done. */
4957 up += zeros, usize -= zeros;
4958 else if (usize == vsize)
4959 {
4960 mp_size_t size = vsize;
4961 do
4962 size--;
4963 while (up[size] == vp[size]);
4964 if (up[size] < vp[size]) /* usize == vsize. */
4965 MP_PTR_SWAP (up, vp);
4966 up += zeros, usize = size + 1 - zeros;
4967 mpn_sub_n (up, up, vp + zeros, usize);
4968 }
4969 else
4970 {
4971 mp_size_t size = vsize - zeros;
4972 up += zeros, usize -= zeros;
4973 if (mpn_sub_n (up, up, vp + zeros, size))
4974 {
4975 while (up[size] == 0) /* Propagate borrow. */
4976 up[size++] = -(mp_limb_t)1;
4977 up[size] -= 1;
4978 }
4979 }
4980 }
4981 while (usize); /* End binary GCD. */
4982 }
4983
4984 done:
4985 if (vp != gp)
4986 MPN_COPY_INCR (gp, vp, vsize);
4987 TMP_FREE (marker);
4988 return vsize;
4989 }
4990
4991
4992 /* q_high = mpn_bdivmod (qp, up, usize, vp, vsize, d).
4993
4994 Puts the low d/BITS_PER_MP_LIMB limbs of Q = U / V mod 2^d at qp, and
4995 returns the high d%BITS_PER_MP_LIMB bits of Q as the result.
4996
4997 Also, U - Q * V mod 2^(usize*BITS_PER_MP_LIMB) is placed at up. Since the
4998 low d/BITS_PER_MP_LIMB limbs of this difference are zero, the code allows
4999 the limb vectors at qp to overwrite the low limbs at up, provided qp <= up.
5000
5001 Preconditions:
5002 1. V is odd.
5003 2. usize * BITS_PER_MP_LIMB >= d.
5004 3. If Q and U overlap, qp <= up.
5005
5006 Ken Weber (kweber@mat.ufrgs.br, kweber@mcs.kent.edu)
5007
5008 Funding for this work has been partially provided by Conselho Nacional
5009 de Desenvolvimento Cienti'fico e Tecnolo'gico (CNPq) do Brazil, Grant
5010 301314194-2, and was done while I was a visiting reseacher in the Instituto
5011 de Matema'tica at Universidade Federal do Rio Grande do Sul (UFRGS).
5012
5013 References:
5014 T. Jebelean, An algorithm for exact division, Journal of Symbolic
5015 Computation, v. 15, 1993, pp. 169-180.
5016
5017 K. Weber, The accelerated integer GCD algorithm, ACM Transactions on
5018 Mathematical Software, v. 21 (March), 1995, pp. 111-122. */
5019
5020 mp_limb_t
mpn_bdivmod(mp_ptr qp,mp_ptr up,mp_size_t usize,mp_srcptr vp,mp_size_t vsize,unsigned long int d)5021 mpn_bdivmod (mp_ptr qp, mp_ptr up, mp_size_t usize,
5022 mp_srcptr vp, mp_size_t vsize, unsigned long int d)
5023 {
5024 mp_limb_t v_inv;
5025
5026 ASSERT (usize >= 1);
5027 ASSERT (vsize >= 1);
5028 ASSERT (usize * GMP_NUMB_BITS >= d);
5029 ASSERT (! MPN_OVERLAP_P (up, usize, vp, vsize));
5030 ASSERT (! MPN_OVERLAP_P (qp, d/GMP_NUMB_BITS, vp, vsize));
5031 ASSERT (MPN_SAME_OR_INCR2_P (qp, d/GMP_NUMB_BITS, up, usize));
5032 /* ASSERT_MPN (up, usize); */
5033 /* ASSERT_MPN (vp, vsize); */
5034
5035 /* 1/V mod 2^GMP_NUMB_BITS. */
5036 modlimb_invert (v_inv, vp[0]);
5037
5038 /* Fast code for two cases previously used by the accel part of mpn_gcd.
5039 (Could probably remove this now it's inlined there.) */
5040 if (usize == 2 && vsize == 2 &&
5041 (d == GMP_NUMB_BITS || d == 2*GMP_NUMB_BITS))
5042 {
5043 mp_limb_t hi;
5044 mp_limb_t lo USED_ONLY_SOMETIMES;
5045 mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
5046 umul_ppmm (hi, lo, q, vp[0] << GMP_NAIL_BITS);
5047 up[0] = 0;
5048 up[1] -= hi + q*vp[1];
5049 qp[0] = q;
5050 if (d == 2*GMP_NUMB_BITS)
5051 {
5052 q = (up[1] * v_inv) & GMP_NUMB_MASK;
5053 up[1] = 0;
5054 qp[1] = q;
5055 }
5056 return 0;
5057 }
5058
5059 /* Main loop. */
5060 while (d >= GMP_NUMB_BITS)
5061 {
5062 mp_limb_t q = (up[0] * v_inv) & GMP_NUMB_MASK;
5063 mp_limb_t b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
5064 if (usize > vsize)
5065 mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
5066 d -= GMP_NUMB_BITS;
5067 up += 1, usize -= 1;
5068 *qp++ = q;
5069 }
5070
5071 if (d)
5072 {
5073 mp_limb_t b;
5074 mp_limb_t q = (up[0] * v_inv) & (((mp_limb_t)1<<d) - 1);
5075 if (q <= 1)
5076 {
5077 if (q == 0)
5078 return 0;
5079 else
5080 b = mpn_sub_n (up, up, vp, MIN (usize, vsize));
5081 }
5082 else
5083 b = mpn_submul_1 (up, vp, MIN (usize, vsize), q);
5084
5085 if (usize > vsize)
5086 mpn_sub_1 (up + vsize, up + vsize, usize - vsize, b);
5087 return q;
5088 }
5089
5090 return 0;
5091 }
5092
5093
5094 /* modlimb_invert_table[i] is the multiplicative inverse of 2*i+1 mod 256,
5095 ie. (modlimb_invert_table[i] * (2*i+1)) % 256 == 1 */
5096
5097 const unsigned char modlimb_invert_table[128] = {
5098 0x01, 0xAB, 0xCD, 0xB7, 0x39, 0xA3, 0xC5, 0xEF,
5099 0xF1, 0x1B, 0x3D, 0xA7, 0x29, 0x13, 0x35, 0xDF,
5100 0xE1, 0x8B, 0xAD, 0x97, 0x19, 0x83, 0xA5, 0xCF,
5101 0xD1, 0xFB, 0x1D, 0x87, 0x09, 0xF3, 0x15, 0xBF,
5102 0xC1, 0x6B, 0x8D, 0x77, 0xF9, 0x63, 0x85, 0xAF,
5103 0xB1, 0xDB, 0xFD, 0x67, 0xE9, 0xD3, 0xF5, 0x9F,
5104 0xA1, 0x4B, 0x6D, 0x57, 0xD9, 0x43, 0x65, 0x8F,
5105 0x91, 0xBB, 0xDD, 0x47, 0xC9, 0xB3, 0xD5, 0x7F,
5106 0x81, 0x2B, 0x4D, 0x37, 0xB9, 0x23, 0x45, 0x6F,
5107 0x71, 0x9B, 0xBD, 0x27, 0xA9, 0x93, 0xB5, 0x5F,
5108 0x61, 0x0B, 0x2D, 0x17, 0x99, 0x03, 0x25, 0x4F,
5109 0x51, 0x7B, 0x9D, 0x07, 0x89, 0x73, 0x95, 0x3F,
5110 0x41, 0xEB, 0x0D, 0xF7, 0x79, 0xE3, 0x05, 0x2F,
5111 0x31, 0x5B, 0x7D, 0xE7, 0x69, 0x53, 0x75, 0x1F,
5112 0x21, 0xCB, 0xED, 0xD7, 0x59, 0xC3, 0xE5, 0x0F,
5113 0x11, 0x3B, 0x5D, 0xC7, 0x49, 0x33, 0x55, 0xFF
5114 };
5115
5116
5117 /************************************************************************/
5118
5119 /* __mp_bases -- Structure for conversion between internal binary
5120 format and strings in base 2..255. The fields are explained in
5121 gmp-impl.h. */
5122
5123 #if BITS_PER_MP_LIMB == 32
5124 const struct bases __mp_bases[256] =
5125 {
5126 /* 0 */ {0, 0.0, 0, 0},
5127 /* 1 */ {0, 1e38, 0, 0},
5128 /* 2 */ {32, 1.0000000000000000, 0x1, 0x0},
5129 /* 3 */ {20, 0.6309297535714575, 0xcfd41b91, 0x3b563c24},
5130 /* 4 */ {16, 0.5000000000000000, 0x2, 0x0},
5131 /* 5 */ {13, 0.4306765580733931, 0x48c27395, 0xc25c2684},
5132 /* 6 */ {12, 0.3868528072345416, 0x81bf1000, 0xf91bd1b6},
5133 /* 7 */ {11, 0.3562071871080222, 0x75db9c97, 0x1607a2cb},
5134 /* 8 */ {10, 0.3333333333333334, 0x3, 0x0},
5135 /* 9 */ {10, 0.3154648767857287, 0xcfd41b91, 0x3b563c24},
5136 /* 10 */ {9, 0.3010299956639811, 0x3b9aca00, 0x12e0be82},
5137 /* 11 */ {9, 0.2890648263178878, 0x8c8b6d2b, 0xd24cde04},
5138 /* 12 */ {8, 0.2789429456511298, 0x19a10000, 0x3fa39ab5},
5139 /* 13 */ {8, 0.2702381544273197, 0x309f1021, 0x50f8ac5f},
5140 /* 14 */ {8, 0.2626495350371936, 0x57f6c100, 0x74843b1e},
5141 /* 15 */ {8, 0.2559580248098155, 0x98c29b81, 0xad0326c2},
5142 /* 16 */ {8, 0.2500000000000000, 0x4, 0x0},
5143 /* 17 */ {7, 0.2446505421182260, 0x18754571, 0x4ef0b6bd},
5144 /* 18 */ {7, 0.2398124665681315, 0x247dbc80, 0xc0fc48a1},
5145 /* 19 */ {7, 0.2354089133666382, 0x3547667b, 0x33838942},
5146 /* 20 */ {7, 0.2313782131597592, 0x4c4b4000, 0xad7f29ab},
5147 /* 21 */ {7, 0.2276702486969530, 0x6b5a6e1d, 0x313c3d15},
5148 /* 22 */ {7, 0.2242438242175754, 0x94ace180, 0xb8cca9e0},
5149 /* 23 */ {7, 0.2210647294575037, 0xcaf18367, 0x42ed6de9},
5150 /* 24 */ {6, 0.2181042919855316, 0xb640000, 0x67980e0b},
5151 /* 25 */ {6, 0.2153382790366965, 0xe8d4a51, 0x19799812},
5152 /* 26 */ {6, 0.2127460535533632, 0x1269ae40, 0xbce85396},
5153 /* 27 */ {6, 0.2103099178571525, 0x17179149, 0x62c103a9},
5154 /* 28 */ {6, 0.2080145976765095, 0x1cb91000, 0x1d353d43},
5155 /* 29 */ {6, 0.2058468324604344, 0x23744899, 0xce1decea},
5156 /* 30 */ {6, 0.2037950470905062, 0x2b73a840, 0x790fc511},
5157 /* 31 */ {6, 0.2018490865820999, 0x34e63b41, 0x35b865a0},
5158 /* 32 */ {6, 0.2000000000000000, 0x5, 0x0},
5159 /* 33 */ {6, 0.1982398631705605, 0x4cfa3cc1, 0xa9aed1b3},
5160 /* 34 */ {6, 0.1965616322328226, 0x5c13d840, 0x63dfc229},
5161 /* 35 */ {6, 0.1949590218937863, 0x6d91b519, 0x2b0fee30},
5162 /* 36 */ {6, 0.1934264036172708, 0x81bf1000, 0xf91bd1b6},
5163 /* 37 */ {6, 0.1919587200065601, 0x98ede0c9, 0xac89c3a9},
5164 /* 38 */ {6, 0.1905514124267734, 0xb3773e40, 0x6d2c32fe},
5165 /* 39 */ {6, 0.1892003595168700, 0xd1bbc4d1, 0x387907c9},
5166 /* 40 */ {6, 0.1879018247091076, 0xf4240000, 0xc6f7a0b},
5167 /* 41 */ {5, 0.1866524112389434, 0x6e7d349, 0x28928154},
5168 /* 42 */ {5, 0.1854490234153689, 0x7ca30a0, 0x6e8629d},
5169 /* 43 */ {5, 0.1842888331487062, 0x8c32bbb, 0xd373dca0},
5170 /* 44 */ {5, 0.1831692509136336, 0x9d46c00, 0xa0b17895},
5171 /* 45 */ {5, 0.1820879004699383, 0xaffacfd, 0x746811a5},
5172 /* 46 */ {5, 0.1810425967800402, 0xc46bee0, 0x4da6500f},
5173 /* 47 */ {5, 0.1800313266566926, 0xdab86ef, 0x2ba23582},
5174 /* 48 */ {5, 0.1790522317510414, 0xf300000, 0xdb20a88},
5175 /* 49 */ {5, 0.1781035935540111, 0x10d63af1, 0xe68d5ce4},
5176 /* 50 */ {5, 0.1771838201355579, 0x12a05f20, 0xb7cdfd9d},
5177 /* 51 */ {5, 0.1762914343888821, 0x1490aae3, 0x8e583933},
5178 /* 52 */ {5, 0.1754250635819545, 0x16a97400, 0x697cc3ea},
5179 /* 53 */ {5, 0.1745834300480449, 0x18ed2825, 0x48a5ca6c},
5180 /* 54 */ {5, 0.1737653428714400, 0x1b5e4d60, 0x2b52db16},
5181 /* 55 */ {5, 0.1729696904450771, 0x1dff8297, 0x111586a6},
5182 /* 56 */ {5, 0.1721954337940981, 0x20d38000, 0xf31d2b36},
5183 /* 57 */ {5, 0.1714416005739134, 0x23dd1799, 0xc8d76d19},
5184 /* 58 */ {5, 0.1707072796637201, 0x271f35a0, 0xa2cb1eb4},
5185 /* 59 */ {5, 0.1699916162869140, 0x2a9ce10b, 0x807c3ec3},
5186 /* 60 */ {5, 0.1692938075987814, 0x2e593c00, 0x617ec8bf},
5187 /* 61 */ {5, 0.1686130986895011, 0x3257844d, 0x45746cbe},
5188 /* 62 */ {5, 0.1679487789570419, 0x369b13e0, 0x2c0aa273},
5189 /* 63 */ {5, 0.1673001788101741, 0x3b27613f, 0x14f90805},
5190 /* 64 */ {5, 0.1666666666666667, 0x6, 0x0},
5191 /* 65 */ {5, 0.1660476462159378, 0x4528a141, 0xd9cf0829},
5192 /* 66 */ {5, 0.1654425539190583, 0x4aa51420, 0xb6fc4841},
5193 /* 67 */ {5, 0.1648508567221604, 0x50794633, 0x973054cb},
5194 /* 68 */ {5, 0.1642720499620502, 0x56a94400, 0x7a1dbe4b},
5195 /* 69 */ {5, 0.1637056554452156, 0x5d393975, 0x5f7fcd7f},
5196 /* 70 */ {5, 0.1631512196835108, 0x642d7260, 0x47196c84},
5197 /* 71 */ {5, 0.1626083122716341, 0x6b8a5ae7, 0x30b43635},
5198 /* 72 */ {5, 0.1620765243931223, 0x73548000, 0x1c1fa5f6},
5199 /* 73 */ {5, 0.1615554674429964, 0x7b908fe9, 0x930634a},
5200 /* 74 */ {5, 0.1610447717564445, 0x84435aa0, 0xef7f4a3c},
5201 /* 75 */ {5, 0.1605440854340214, 0x8d71d25b, 0xcf5552d2},
5202 /* 76 */ {5, 0.1600530732548213, 0x97210c00, 0xb1a47c8e},
5203 /* 77 */ {5, 0.1595714156699382, 0xa1563f9d, 0x9634b43e},
5204 /* 78 */ {5, 0.1590988078692941, 0xac16c8e0, 0x7cd3817d},
5205 /* 79 */ {5, 0.1586349589155960, 0xb768278f, 0x65536761},
5206 /* 80 */ {5, 0.1581795909397823, 0xc3500000, 0x4f8b588e},
5207 /* 81 */ {5, 0.1577324383928644, 0xcfd41b91, 0x3b563c24},
5208 /* 82 */ {5, 0.1572932473495469, 0xdcfa6920, 0x28928154},
5209 /* 83 */ {5, 0.1568617748594410, 0xeac8fd83, 0x1721bfb0},
5210 /* 84 */ {5, 0.1564377883420716, 0xf9461400, 0x6e8629d},
5211 /* 85 */ {4, 0.1560210650222250, 0x31c84b1, 0x491cc17c},
5212 /* 86 */ {4, 0.1556113914024940, 0x342ab10, 0x3a11d83b},
5213 /* 87 */ {4, 0.1552085627701551, 0x36a2c21, 0x2be074cd},
5214 /* 88 */ {4, 0.1548123827357682, 0x3931000, 0x1e7a02e7},
5215 /* 89 */ {4, 0.1544226628011101, 0x3bd5ee1, 0x11d10edd},
5216 /* 90 */ {4, 0.1540392219542636, 0x3e92110, 0x5d92c68},
5217 /* 91 */ {4, 0.1536618862898642, 0x4165ef1, 0xf50dbfb2},
5218 /* 92 */ {4, 0.1532904886526781, 0x4452100, 0xdf9f1316},
5219 /* 93 */ {4, 0.1529248683028321, 0x4756fd1, 0xcb52a684},
5220 /* 94 */ {4, 0.1525648706011593, 0x4a75410, 0xb8163e97},
5221 /* 95 */ {4, 0.1522103467132434, 0x4dad681, 0xa5d8f269},
5222 /* 96 */ {4, 0.1518611533308632, 0x5100000, 0x948b0fcd},
5223 /* 97 */ {4, 0.1515171524096389, 0x546d981, 0x841e0215},
5224 /* 98 */ {4, 0.1511782109217764, 0x57f6c10, 0x74843b1e},
5225 /* 99 */ {4, 0.1508442006228941, 0x5b9c0d1, 0x65b11e6e},
5226 /* 100 */ {4, 0.1505149978319906, 0x5f5e100, 0x5798ee23},
5227 /* 101 */ {4, 0.1501904832236879, 0x633d5f1, 0x4a30b99b},
5228 /* 102 */ {4, 0.1498705416319474, 0x673a910, 0x3d6e4d94},
5229 /* 103 */ {4, 0.1495550618645152, 0x6b563e1, 0x314825b0},
5230 /* 104 */ {4, 0.1492439365274121, 0x6f91000, 0x25b55f2e},
5231 /* 105 */ {4, 0.1489370618588283, 0x73eb721, 0x1aadaccb},
5232 /* 106 */ {4, 0.1486343375718350, 0x7866310, 0x10294ba2},
5233 /* 107 */ {4, 0.1483356667053617, 0x7d01db1, 0x620f8f6},
5234 /* 108 */ {4, 0.1480409554829326, 0x81bf100, 0xf91bd1b6},
5235 /* 109 */ {4, 0.1477501131786861, 0x869e711, 0xe6d37b2a},
5236 /* 110 */ {4, 0.1474630519902391, 0x8ba0a10, 0xd55cff6e},
5237 /* 111 */ {4, 0.1471796869179852, 0x90c6441, 0xc4ad2db2},
5238 /* 112 */ {4, 0.1468999356504447, 0x9610000, 0xb4b985cf},
5239 /* 113 */ {4, 0.1466237184553111, 0x9b7e7c1, 0xa5782bef},
5240 /* 114 */ {4, 0.1463509580758620, 0xa112610, 0x96dfdd2a},
5241 /* 115 */ {4, 0.1460815796324244, 0xa6cc591, 0x88e7e509},
5242 /* 116 */ {4, 0.1458155105286054, 0xacad100, 0x7b8813d3},
5243 /* 117 */ {4, 0.1455526803620167, 0xb2b5331, 0x6eb8b595},
5244 /* 118 */ {4, 0.1452930208392428, 0xb8e5710, 0x627289db},
5245 /* 119 */ {4, 0.1450364656948130, 0xbf3e7a1, 0x56aebc07},
5246 /* 120 */ {4, 0.1447829506139581, 0xc5c1000, 0x4b66dc33},
5247 /* 121 */ {4, 0.1445324131589439, 0xcc6db61, 0x4094d8a3},
5248 /* 122 */ {4, 0.1442847926987864, 0xd345510, 0x3632f7a5},
5249 /* 123 */ {4, 0.1440400303421672, 0xda48871, 0x2c3bd1f0},
5250 /* 124 */ {4, 0.1437980688733775, 0xe178100, 0x22aa4d5f},
5251 /* 125 */ {4, 0.1435588526911310, 0xe8d4a51, 0x19799812},
5252 /* 126 */ {4, 0.1433223277500932, 0xf05f010, 0x10a523e5},
5253 /* 127 */ {4, 0.1430884415049874, 0xf817e01, 0x828a237},
5254 /* 128 */ {4, 0.1428571428571428, 0x7, 0x0},
5255 /* 129 */ {4, 0.1426283821033600, 0x10818201, 0xf04ec452},
5256 /* 130 */ {4, 0.1424021108869747, 0x11061010, 0xe136444a},
5257 /* 131 */ {4, 0.1421782821510107, 0x118db651, 0xd2af9589},
5258 /* 132 */ {4, 0.1419568500933153, 0x12188100, 0xc4b42a83},
5259 /* 133 */ {4, 0.1417377701235801, 0x12a67c71, 0xb73dccf5},
5260 /* 134 */ {4, 0.1415209988221527, 0x1337b510, 0xaa4698c5},
5261 /* 135 */ {4, 0.1413064939005528, 0x13cc3761, 0x9dc8f729},
5262 /* 136 */ {4, 0.1410942141636095, 0x14641000, 0x91bf9a30},
5263 /* 137 */ {4, 0.1408841194731412, 0x14ff4ba1, 0x86257887},
5264 /* 138 */ {4, 0.1406761707131039, 0x159df710, 0x7af5c98c},
5265 /* 139 */ {4, 0.1404703297561400, 0x16401f31, 0x702c01a0},
5266 /* 140 */ {4, 0.1402665594314587, 0x16e5d100, 0x65c3ceb1},
5267 /* 141 */ {4, 0.1400648234939879, 0x178f1991, 0x5bb91502},
5268 /* 142 */ {4, 0.1398650865947379, 0x183c0610, 0x5207ec23},
5269 /* 143 */ {4, 0.1396673142523192, 0x18eca3c1, 0x48ac9c19},
5270 /* 144 */ {4, 0.1394714728255649, 0x19a10000, 0x3fa39ab5},
5271 /* 145 */ {4, 0.1392775294872041, 0x1a592841, 0x36e98912},
5272 /* 146 */ {4, 0.1390854521985406, 0x1b152a10, 0x2e7b3140},
5273 /* 147 */ {4, 0.1388952096850913, 0x1bd51311, 0x2655840b},
5274 /* 148 */ {4, 0.1387067714131417, 0x1c98f100, 0x1e7596ea},
5275 /* 149 */ {4, 0.1385201075671774, 0x1d60d1b1, 0x16d8a20d},
5276 /* 150 */ {4, 0.1383351890281539, 0x1e2cc310, 0xf7bfe87},
5277 /* 151 */ {4, 0.1381519873525671, 0x1efcd321, 0x85d2492},
5278 /* 152 */ {4, 0.1379704747522905, 0x1fd11000, 0x179a9f4},
5279 /* 153 */ {4, 0.1377906240751463, 0x20a987e1, 0xf59e80eb},
5280 /* 154 */ {4, 0.1376124087861776, 0x21864910, 0xe8b768db},
5281 /* 155 */ {4, 0.1374358029495937, 0x226761f1, 0xdc39d6d5},
5282 /* 156 */ {4, 0.1372607812113589, 0x234ce100, 0xd021c5d1},
5283 /* 157 */ {4, 0.1370873187823978, 0x2436d4d1, 0xc46b5e37},
5284 /* 158 */ {4, 0.1369153914223921, 0x25254c10, 0xb912f39c},
5285 /* 159 */ {4, 0.1367449754241439, 0x26185581, 0xae150294},
5286 /* 160 */ {4, 0.1365760475984821, 0x27100000, 0xa36e2eb1},
5287 /* 161 */ {4, 0.1364085852596902, 0x280c5a81, 0x991b4094},
5288 /* 162 */ {4, 0.1362425662114337, 0x290d7410, 0x8f19241e},
5289 /* 163 */ {4, 0.1360779687331669, 0x2a135bd1, 0x8564e6b7},
5290 /* 164 */ {4, 0.1359147715670014, 0x2b1e2100, 0x7bfbb5b4},
5291 /* 165 */ {4, 0.1357529539050150, 0x2c2dd2f1, 0x72dadcc8},
5292 /* 166 */ {4, 0.1355924953769863, 0x2d428110, 0x69ffc498},
5293 /* 167 */ {4, 0.1354333760385373, 0x2e5c3ae1, 0x6167f154},
5294 /* 168 */ {4, 0.1352755763596663, 0x2f7b1000, 0x5911016e},
5295 /* 169 */ {4, 0.1351190772136599, 0x309f1021, 0x50f8ac5f},
5296 /* 170 */ {4, 0.1349638598663645, 0x31c84b10, 0x491cc17c},
5297 /* 171 */ {4, 0.1348099059658079, 0x32f6d0b1, 0x417b26d8},
5298 /* 172 */ {4, 0.1346571975321549, 0x342ab100, 0x3a11d83b},
5299 /* 173 */ {4, 0.1345057169479844, 0x3563fc11, 0x32dee622},
5300 /* 174 */ {4, 0.1343554469488779, 0x36a2c210, 0x2be074cd},
5301 /* 175 */ {4, 0.1342063706143054, 0x37e71341, 0x2514bb58},
5302 /* 176 */ {4, 0.1340584713587980, 0x39310000, 0x1e7a02e7},
5303 /* 177 */ {4, 0.1339117329233981, 0x3a8098c1, 0x180ea5d0},
5304 /* 178 */ {4, 0.1337661393673756, 0x3bd5ee10, 0x11d10edd},
5305 /* 179 */ {4, 0.1336216750601996, 0x3d311091, 0xbbfb88e},
5306 /* 180 */ {4, 0.1334783246737591, 0x3e921100, 0x5d92c68},
5307 /* 181 */ {4, 0.1333360731748201, 0x3ff90031, 0x1c024c},
5308 /* 182 */ {4, 0.1331949058177136, 0x4165ef10, 0xf50dbfb2},
5309 /* 183 */ {4, 0.1330548081372441, 0x42d8eea1, 0xea30efa3},
5310 /* 184 */ {4, 0.1329157659418126, 0x44521000, 0xdf9f1316},
5311 /* 185 */ {4, 0.1327777653067443, 0x45d16461, 0xd555c0c9},
5312 /* 186 */ {4, 0.1326407925678156, 0x4756fd10, 0xcb52a684},
5313 /* 187 */ {4, 0.1325048343149731, 0x48e2eb71, 0xc193881f},
5314 /* 188 */ {4, 0.1323698773862368, 0x4a754100, 0xb8163e97},
5315 /* 189 */ {4, 0.1322359088617821, 0x4c0e0f51, 0xaed8b724},
5316 /* 190 */ {4, 0.1321029160581950, 0x4dad6810, 0xa5d8f269},
5317 /* 191 */ {4, 0.1319708865228925, 0x4f535d01, 0x9d15039d},
5318 /* 192 */ {4, 0.1318398080287045, 0x51000000, 0x948b0fcd},
5319 /* 193 */ {4, 0.1317096685686114, 0x52b36301, 0x8c394d1d},
5320 /* 194 */ {4, 0.1315804563506306, 0x546d9810, 0x841e0215},
5321 /* 195 */ {4, 0.1314521597928493, 0x562eb151, 0x7c3784f8},
5322 /* 196 */ {4, 0.1313247675185968, 0x57f6c100, 0x74843b1e},
5323 /* 197 */ {4, 0.1311982683517524, 0x59c5d971, 0x6d02985d},
5324 /* 198 */ {4, 0.1310726513121843, 0x5b9c0d10, 0x65b11e6e},
5325 /* 199 */ {4, 0.1309479056113158, 0x5d796e61, 0x5e8e5c64},
5326 /* 200 */ {4, 0.1308240206478128, 0x5f5e1000, 0x5798ee23},
5327 /* 201 */ {4, 0.1307009860033912, 0x614a04a1, 0x50cf7bde},
5328 /* 202 */ {4, 0.1305787914387386, 0x633d5f10, 0x4a30b99b},
5329 /* 203 */ {4, 0.1304574268895465, 0x65383231, 0x43bb66bd},
5330 /* 204 */ {4, 0.1303368824626505, 0x673a9100, 0x3d6e4d94},
5331 /* 205 */ {4, 0.1302171484322746, 0x69448e91, 0x374842ee},
5332 /* 206 */ {4, 0.1300982152363760, 0x6b563e10, 0x314825b0},
5333 /* 207 */ {4, 0.1299800734730872, 0x6d6fb2c1, 0x2b6cde75},
5334 /* 208 */ {4, 0.1298627138972530, 0x6f910000, 0x25b55f2e},
5335 /* 209 */ {4, 0.1297461274170591, 0x71ba3941, 0x2020a2c5},
5336 /* 210 */ {4, 0.1296303050907487, 0x73eb7210, 0x1aadaccb},
5337 /* 211 */ {4, 0.1295152381234257, 0x7624be11, 0x155b891f},
5338 /* 212 */ {4, 0.1294009178639407, 0x78663100, 0x10294ba2},
5339 /* 213 */ {4, 0.1292873358018581, 0x7aafdeb1, 0xb160fe9},
5340 /* 214 */ {4, 0.1291744835645007, 0x7d01db10, 0x620f8f6},
5341 /* 215 */ {4, 0.1290623529140715, 0x7f5c3a21, 0x14930ef},
5342 /* 216 */ {4, 0.1289509357448472, 0x81bf1000, 0xf91bd1b6},
5343 /* 217 */ {4, 0.1288402240804449, 0x842a70e1, 0xefdcb0c7},
5344 /* 218 */ {4, 0.1287302100711567, 0x869e7110, 0xe6d37b2a},
5345 /* 219 */ {4, 0.1286208859913518, 0x891b24f1, 0xddfeb94a},
5346 /* 220 */ {4, 0.1285122442369443, 0x8ba0a100, 0xd55cff6e},
5347 /* 221 */ {4, 0.1284042773229231, 0x8e2ef9d1, 0xcceced50},
5348 /* 222 */ {4, 0.1282969778809442, 0x90c64410, 0xc4ad2db2},
5349 /* 223 */ {4, 0.1281903386569819, 0x93669481, 0xbc9c75f9},
5350 /* 224 */ {4, 0.1280843525090381, 0x96100000, 0xb4b985cf},
5351 /* 225 */ {4, 0.1279790124049077, 0x98c29b81, 0xad0326c2},
5352 /* 226 */ {4, 0.1278743114199984, 0x9b7e7c10, 0xa5782bef},
5353 /* 227 */ {4, 0.1277702427352035, 0x9e43b6d1, 0x9e1771a9},
5354 /* 228 */ {4, 0.1276667996348261, 0xa1126100, 0x96dfdd2a},
5355 /* 229 */ {4, 0.1275639755045533, 0xa3ea8ff1, 0x8fd05c41},
5356 /* 230 */ {4, 0.1274617638294791, 0xa6cc5910, 0x88e7e509},
5357 /* 231 */ {4, 0.1273601581921741, 0xa9b7d1e1, 0x8225759d},
5358 /* 232 */ {4, 0.1272591522708010, 0xacad1000, 0x7b8813d3},
5359 /* 233 */ {4, 0.1271587398372755, 0xafac2921, 0x750eccf9},
5360 /* 234 */ {4, 0.1270589147554692, 0xb2b53310, 0x6eb8b595},
5361 /* 235 */ {4, 0.1269596709794558, 0xb5c843b1, 0x6884e923},
5362 /* 236 */ {4, 0.1268610025517973, 0xb8e57100, 0x627289db},
5363 /* 237 */ {4, 0.1267629036018709, 0xbc0cd111, 0x5c80c07b},
5364 /* 238 */ {4, 0.1266653683442337, 0xbf3e7a10, 0x56aebc07},
5365 /* 239 */ {4, 0.1265683910770258, 0xc27a8241, 0x50fbb19b},
5366 /* 240 */ {4, 0.1264719661804097, 0xc5c10000, 0x4b66dc33},
5367 /* 241 */ {4, 0.1263760881150453, 0xc91209c1, 0x45ef7c7c},
5368 /* 242 */ {4, 0.1262807514205999, 0xcc6db610, 0x4094d8a3},
5369 /* 243 */ {4, 0.1261859507142915, 0xcfd41b91, 0x3b563c24},
5370 /* 244 */ {4, 0.1260916806894653, 0xd3455100, 0x3632f7a5},
5371 /* 245 */ {4, 0.1259979361142023, 0xd6c16d31, 0x312a60c3},
5372 /* 246 */ {4, 0.1259047118299582, 0xda488710, 0x2c3bd1f0},
5373 /* 247 */ {4, 0.1258120027502338, 0xdddab5a1, 0x2766aa45},
5374 /* 248 */ {4, 0.1257198038592741, 0xe1781000, 0x22aa4d5f},
5375 /* 249 */ {4, 0.1256281102107963, 0xe520ad61, 0x1e06233c},
5376 /* 250 */ {4, 0.1255369169267456, 0xe8d4a510, 0x19799812},
5377 /* 251 */ {4, 0.1254462191960791, 0xec940e71, 0x15041c33},
5378 /* 252 */ {4, 0.1253560122735751, 0xf05f0100, 0x10a523e5},
5379 /* 253 */ {4, 0.1252662914786691, 0xf4359451, 0xc5c2749},
5380 /* 254 */ {4, 0.1251770521943144, 0xf817e010, 0x828a237},
5381 /* 255 */ {4, 0.1250882898658681, 0xfc05fc01, 0x40a1423},
5382 };
5383 #endif
5384 #if BITS_PER_MP_LIMB == 64
5385 const struct bases __mp_bases[256] =
5386 {
5387 /* 0 */ {0, 0.0, 0, 0},
5388 /* 1 */ {0, 1e38, 0, 0},
5389 /* 2 */ {64, 1.0000000000000000, CNST_LIMB(0x1), CNST_LIMB(0x0)},
5390 /* 3 */ {40, 0.6309297535714574, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
5391 /* 4 */ {32, 0.5000000000000000, CNST_LIMB(0x2), CNST_LIMB(0x0)},
5392 /* 5 */ {27, 0.4306765580733931, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)},
5393 /* 6 */ {24, 0.3868528072345416, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
5394 /* 7 */ {22, 0.3562071871080222, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)},
5395 /* 8 */ {21, 0.3333333333333334, CNST_LIMB(0x3), CNST_LIMB(0x0)},
5396 /* 9 */ {20, 0.3154648767857287, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
5397 /* 10 */ {19, 0.3010299956639811, CNST_LIMB(0x8ac7230489e80000), CNST_LIMB(0xd83c94fb6d2ac34a)},
5398 /* 11 */ {18, 0.2890648263178878, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)},
5399 /* 12 */ {17, 0.2789429456511298, CNST_LIMB(0x1eca170c00000000), CNST_LIMB(0xa10c2bec5da8f8f)},
5400 /* 13 */ {17, 0.2702381544273197, CNST_LIMB(0x780c7372621bd74d), CNST_LIMB(0x10f4becafe412ec3)},
5401 /* 14 */ {16, 0.2626495350371936, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)},
5402 /* 15 */ {16, 0.2559580248098155, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)},
5403 /* 16 */ {16, 0.2500000000000000, CNST_LIMB(0x4), CNST_LIMB(0x0)},
5404 /* 17 */ {15, 0.2446505421182260, CNST_LIMB(0x27b95e997e21d9f1), CNST_LIMB(0x9c71e11bab279323)},
5405 /* 18 */ {15, 0.2398124665681315, CNST_LIMB(0x5da0e1e53c5c8000), CNST_LIMB(0x5dfaa697ec6f6a1c)},
5406 /* 19 */ {15, 0.2354089133666382, CNST_LIMB(0xd2ae3299c1c4aedb), CNST_LIMB(0x3711783f6be7e9ec)},
5407 /* 20 */ {14, 0.2313782131597592, CNST_LIMB(0x16bcc41e90000000), CNST_LIMB(0x6849b86a12b9b01e)},
5408 /* 21 */ {14, 0.2276702486969530, CNST_LIMB(0x2d04b7fdd9c0ef49), CNST_LIMB(0x6bf097ba5ca5e239)},
5409 /* 22 */ {14, 0.2242438242175754, CNST_LIMB(0x5658597bcaa24000), CNST_LIMB(0x7b8015c8d7af8f08)},
5410 /* 23 */ {14, 0.2210647294575037, CNST_LIMB(0xa0e2073737609371), CNST_LIMB(0x975a24b3a3151b38)},
5411 /* 24 */ {13, 0.2181042919855316, CNST_LIMB(0xc29e98000000000), CNST_LIMB(0x50bd367972689db1)},
5412 /* 25 */ {13, 0.2153382790366965, CNST_LIMB(0x14adf4b7320334b9), CNST_LIMB(0x8c240c4aecb13bb5)},
5413 /* 26 */ {13, 0.2127460535533632, CNST_LIMB(0x226ed36478bfa000), CNST_LIMB(0xdbd2e56854e118c9)},
5414 /* 27 */ {13, 0.2103099178571525, CNST_LIMB(0x383d9170b85ff80b), CNST_LIMB(0x2351ffcaa9c7c4ae)},
5415 /* 28 */ {13, 0.2080145976765095, CNST_LIMB(0x5a3c23e39c000000), CNST_LIMB(0x6b24188ca33b0636)},
5416 /* 29 */ {13, 0.2058468324604344, CNST_LIMB(0x8e65137388122bcd), CNST_LIMB(0xcc3dceaf2b8ba99d)},
5417 /* 30 */ {13, 0.2037950470905062, CNST_LIMB(0xdd41bb36d259e000), CNST_LIMB(0x2832e835c6c7d6b6)},
5418 /* 31 */ {12, 0.2018490865820999, CNST_LIMB(0xaee5720ee830681), CNST_LIMB(0x76b6aa272e1873c5)},
5419 /* 32 */ {12, 0.2000000000000000, CNST_LIMB(0x5), CNST_LIMB(0x0)},
5420 /* 33 */ {12, 0.1982398631705605, CNST_LIMB(0x172588ad4f5f0981), CNST_LIMB(0x61eaf5d402c7bf4f)},
5421 /* 34 */ {12, 0.1965616322328226, CNST_LIMB(0x211e44f7d02c1000), CNST_LIMB(0xeeb658123ffb27ec)},
5422 /* 35 */ {12, 0.1949590218937863, CNST_LIMB(0x2ee56725f06e5c71), CNST_LIMB(0x5d5e3762e6fdf509)},
5423 /* 36 */ {12, 0.1934264036172708, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
5424 /* 37 */ {12, 0.1919587200065601, CNST_LIMB(0x5b5b57f8a98a5dd1), CNST_LIMB(0x66ae7831762efb6f)},
5425 /* 38 */ {12, 0.1905514124267734, CNST_LIMB(0x7dcff8986ea31000), CNST_LIMB(0x47388865a00f544)},
5426 /* 39 */ {12, 0.1892003595168700, CNST_LIMB(0xabd4211662a6b2a1), CNST_LIMB(0x7d673c33a123b54c)},
5427 /* 40 */ {12, 0.1879018247091076, CNST_LIMB(0xe8d4a51000000000), CNST_LIMB(0x19799812dea11197)},
5428 /* 41 */ {11, 0.1866524112389434, CNST_LIMB(0x7a32956ad081b79), CNST_LIMB(0xc27e62e0686feae)},
5429 /* 42 */ {11, 0.1854490234153689, CNST_LIMB(0x9f49aaff0e86800), CNST_LIMB(0x9b6e7507064ce7c7)},
5430 /* 43 */ {11, 0.1842888331487062, CNST_LIMB(0xce583bb812d37b3), CNST_LIMB(0x3d9ac2bf66cfed94)},
5431 /* 44 */ {11, 0.1831692509136336, CNST_LIMB(0x109b79a654c00000), CNST_LIMB(0xed46bc50ce59712a)},
5432 /* 45 */ {11, 0.1820879004699383, CNST_LIMB(0x1543beff214c8b95), CNST_LIMB(0x813d97e2c89b8d46)},
5433 /* 46 */ {11, 0.1810425967800402, CNST_LIMB(0x1b149a79459a3800), CNST_LIMB(0x2e81751956af8083)},
5434 /* 47 */ {11, 0.1800313266566926, CNST_LIMB(0x224edfb5434a830f), CNST_LIMB(0xdd8e0a95e30c0988)},
5435 /* 48 */ {11, 0.1790522317510413, CNST_LIMB(0x2b3fb00000000000), CNST_LIMB(0x7ad4dd48a0b5b167)},
5436 /* 49 */ {11, 0.1781035935540111, CNST_LIMB(0x3642798750226111), CNST_LIMB(0x2df495ccaa57147b)},
5437 /* 50 */ {11, 0.1771838201355579, CNST_LIMB(0x43c33c1937564800), CNST_LIMB(0xe392010175ee5962)},
5438 /* 51 */ {11, 0.1762914343888821, CNST_LIMB(0x54411b2441c3cd8b), CNST_LIMB(0x84eaf11b2fe7738e)},
5439 /* 52 */ {11, 0.1754250635819545, CNST_LIMB(0x6851455acd400000), CNST_LIMB(0x3a1e3971e008995d)},
5440 /* 53 */ {11, 0.1745834300480449, CNST_LIMB(0x80a23b117c8feb6d), CNST_LIMB(0xfd7a462344ffce25)},
5441 /* 54 */ {11, 0.1737653428714400, CNST_LIMB(0x9dff7d32d5dc1800), CNST_LIMB(0x9eca40b40ebcef8a)},
5442 /* 55 */ {11, 0.1729696904450771, CNST_LIMB(0xc155af6faeffe6a7), CNST_LIMB(0x52fa161a4a48e43d)},
5443 /* 56 */ {11, 0.1721954337940981, CNST_LIMB(0xebb7392e00000000), CNST_LIMB(0x1607a2cbacf930c1)},
5444 /* 57 */ {10, 0.1714416005739134, CNST_LIMB(0x50633659656d971), CNST_LIMB(0x97a014f8e3be55f1)},
5445 /* 58 */ {10, 0.1707072796637201, CNST_LIMB(0x5fa8624c7fba400), CNST_LIMB(0x568df8b76cbf212c)},
5446 /* 59 */ {10, 0.1699916162869140, CNST_LIMB(0x717d9faa73c5679), CNST_LIMB(0x20ba7c4b4e6ef492)},
5447 /* 60 */ {10, 0.1692938075987814, CNST_LIMB(0x86430aac6100000), CNST_LIMB(0xe81ee46b9ef492f5)},
5448 /* 61 */ {10, 0.1686130986895011, CNST_LIMB(0x9e64d9944b57f29), CNST_LIMB(0x9dc0d10d51940416)},
5449 /* 62 */ {10, 0.1679487789570419, CNST_LIMB(0xba5ca5392cb0400), CNST_LIMB(0x5fa8ed2f450272a5)},
5450 /* 63 */ {10, 0.1673001788101741, CNST_LIMB(0xdab2ce1d022cd81), CNST_LIMB(0x2ba9eb8c5e04e641)},
5451 /* 64 */ {10, 0.1666666666666667, CNST_LIMB(0x6), CNST_LIMB(0x0)},
5452 /* 65 */ {10, 0.1660476462159378, CNST_LIMB(0x12aeed5fd3e2d281), CNST_LIMB(0xb67759cc00287bf1)},
5453 /* 66 */ {10, 0.1654425539190583, CNST_LIMB(0x15c3da1572d50400), CNST_LIMB(0x78621feeb7f4ed33)},
5454 /* 67 */ {10, 0.1648508567221604, CNST_LIMB(0x194c05534f75ee29), CNST_LIMB(0x43d55b5f72943bc0)},
5455 /* 68 */ {10, 0.1642720499620502, CNST_LIMB(0x1d56299ada100000), CNST_LIMB(0x173decb64d1d4409)},
5456 /* 69 */ {10, 0.1637056554452156, CNST_LIMB(0x21f2a089a4ff4f79), CNST_LIMB(0xe29fb54fd6b6074f)},
5457 /* 70 */ {10, 0.1631512196835108, CNST_LIMB(0x2733896c68d9a400), CNST_LIMB(0xa1f1f5c210d54e62)},
5458 /* 71 */ {10, 0.1626083122716341, CNST_LIMB(0x2d2cf2c33b533c71), CNST_LIMB(0x6aac7f9bfafd57b2)},
5459 /* 72 */ {10, 0.1620765243931223, CNST_LIMB(0x33f506e440000000), CNST_LIMB(0x3b563c2478b72ee2)},
5460 /* 73 */ {10, 0.1615554674429964, CNST_LIMB(0x3ba43bec1d062211), CNST_LIMB(0x12b536b574e92d1b)},
5461 /* 74 */ {10, 0.1610447717564444, CNST_LIMB(0x4455872d8fd4e400), CNST_LIMB(0xdf86c03020404fa5)},
5462 /* 75 */ {10, 0.1605440854340214, CNST_LIMB(0x4e2694539f2f6c59), CNST_LIMB(0xa34adf02234eea8e)},
5463 /* 76 */ {10, 0.1600530732548213, CNST_LIMB(0x5938006c18900000), CNST_LIMB(0x6f46eb8574eb59dd)},
5464 /* 77 */ {10, 0.1595714156699382, CNST_LIMB(0x65ad9912474aa649), CNST_LIMB(0x42459b481df47cec)},
5465 /* 78 */ {10, 0.1590988078692941, CNST_LIMB(0x73ae9ff4241ec400), CNST_LIMB(0x1b424b95d80ca505)},
5466 /* 79 */ {10, 0.1586349589155960, CNST_LIMB(0x836612ee9c4ce1e1), CNST_LIMB(0xf2c1b982203a0dac)},
5467 /* 80 */ {10, 0.1581795909397823, CNST_LIMB(0x9502f90000000000), CNST_LIMB(0xb7cdfd9d7bdbab7d)},
5468 /* 81 */ {10, 0.1577324383928644, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
5469 /* 82 */ {10, 0.1572932473495469, CNST_LIMB(0xbebf59a07dab4400), CNST_LIMB(0x57931eeaf85cf64f)},
5470 /* 83 */ {10, 0.1568617748594410, CNST_LIMB(0xd7540d4093bc3109), CNST_LIMB(0x305a944507c82f47)},
5471 /* 84 */ {10, 0.1564377883420716, CNST_LIMB(0xf2b96616f1900000), CNST_LIMB(0xe007ccc9c22781a)},
5472 /* 85 */ {9, 0.1560210650222250, CNST_LIMB(0x336de62af2bca35), CNST_LIMB(0x3e92c42e000eeed4)},
5473 /* 86 */ {9, 0.1556113914024940, CNST_LIMB(0x39235ec33d49600), CNST_LIMB(0x1ebe59130db2795e)},
5474 /* 87 */ {9, 0.1552085627701551, CNST_LIMB(0x3f674e539585a17), CNST_LIMB(0x268859e90f51b89)},
5475 /* 88 */ {9, 0.1548123827357682, CNST_LIMB(0x4645b6958000000), CNST_LIMB(0xd24cde0463108cfa)},
5476 /* 89 */ {9, 0.1544226628011101, CNST_LIMB(0x4dcb74afbc49c19), CNST_LIMB(0xa536009f37adc383)},
5477 /* 90 */ {9, 0.1540392219542636, CNST_LIMB(0x56064e1d18d9a00), CNST_LIMB(0x7cea06ce1c9ace10)},
5478 /* 91 */ {9, 0.1536618862898642, CNST_LIMB(0x5f04fe2cd8a39fb), CNST_LIMB(0x58db032e72e8ba43)},
5479 /* 92 */ {9, 0.1532904886526781, CNST_LIMB(0x68d74421f5c0000), CNST_LIMB(0x388cc17cae105447)},
5480 /* 93 */ {9, 0.1529248683028321, CNST_LIMB(0x738df1f6ab4827d), CNST_LIMB(0x1b92672857620ce0)},
5481 /* 94 */ {9, 0.1525648706011593, CNST_LIMB(0x7f3afbc9cfb5e00), CNST_LIMB(0x18c6a9575c2ade4)},
5482 /* 95 */ {9, 0.1522103467132434, CNST_LIMB(0x8bf187fba88f35f), CNST_LIMB(0xd44da7da8e44b24f)},
5483 /* 96 */ {9, 0.1518611533308632, CNST_LIMB(0x99c600000000000), CNST_LIMB(0xaa2f78f1b4cc6794)},
5484 /* 97 */ {9, 0.1515171524096389, CNST_LIMB(0xa8ce21eb6531361), CNST_LIMB(0x843c067d091ee4cc)},
5485 /* 98 */ {9, 0.1511782109217764, CNST_LIMB(0xb92112c1a0b6200), CNST_LIMB(0x62005e1e913356e3)},
5486 /* 99 */ {9, 0.1508442006228941, CNST_LIMB(0xcad7718b8747c43), CNST_LIMB(0x4316eed01dedd518)},
5487 /* 100 */ {9, 0.1505149978319906, CNST_LIMB(0xde0b6b3a7640000), CNST_LIMB(0x2725dd1d243aba0e)},
5488 /* 101 */ {9, 0.1501904832236879, CNST_LIMB(0xf2d8cf5fe6d74c5), CNST_LIMB(0xddd9057c24cb54f)},
5489 /* 102 */ {9, 0.1498705416319474, CNST_LIMB(0x1095d25bfa712600), CNST_LIMB(0xedeee175a736d2a1)},
5490 /* 103 */ {9, 0.1495550618645152, CNST_LIMB(0x121b7c4c3698faa7), CNST_LIMB(0xc4699f3df8b6b328)},
5491 /* 104 */ {9, 0.1492439365274121, CNST_LIMB(0x13c09e8d68000000), CNST_LIMB(0x9ebbe7d859cb5a7c)},
5492 /* 105 */ {9, 0.1489370618588283, CNST_LIMB(0x15876ccb0b709ca9), CNST_LIMB(0x7c828b9887eb2179)},
5493 /* 106 */ {9, 0.1486343375718350, CNST_LIMB(0x17723c2976da2a00), CNST_LIMB(0x5d652ab99001adcf)},
5494 /* 107 */ {9, 0.1483356667053617, CNST_LIMB(0x198384e9c259048b), CNST_LIMB(0x4114f1754e5d7b32)},
5495 /* 108 */ {9, 0.1480409554829326, CNST_LIMB(0x1bbde41dfeec0000), CNST_LIMB(0x274b7c902f7e0188)},
5496 /* 109 */ {9, 0.1477501131786861, CNST_LIMB(0x1e241d6e3337910d), CNST_LIMB(0xfc9e0fbb32e210c)},
5497 /* 110 */ {9, 0.1474630519902391, CNST_LIMB(0x20b91cee9901ee00), CNST_LIMB(0xf4afa3e594f8ea1f)},
5498 /* 111 */ {9, 0.1471796869179852, CNST_LIMB(0x237ff9079863dfef), CNST_LIMB(0xcd85c32e9e4437b0)},
5499 /* 112 */ {9, 0.1468999356504447, CNST_LIMB(0x267bf47000000000), CNST_LIMB(0xa9bbb147e0dd92a8)},
5500 /* 113 */ {9, 0.1466237184553111, CNST_LIMB(0x29b08039fbeda7f1), CNST_LIMB(0x8900447b70e8eb82)},
5501 /* 114 */ {9, 0.1463509580758620, CNST_LIMB(0x2d213df34f65f200), CNST_LIMB(0x6b0a92adaad5848a)},
5502 /* 115 */ {9, 0.1460815796324244, CNST_LIMB(0x30d201d957a7c2d3), CNST_LIMB(0x4f990ad8740f0ee5)},
5503 /* 116 */ {9, 0.1458155105286054, CNST_LIMB(0x34c6d52160f40000), CNST_LIMB(0x3670a9663a8d3610)},
5504 /* 117 */ {9, 0.1455526803620167, CNST_LIMB(0x3903f855d8f4c755), CNST_LIMB(0x1f5c44188057be3c)},
5505 /* 118 */ {9, 0.1452930208392428, CNST_LIMB(0x3d8de5c8ec59b600), CNST_LIMB(0xa2bea956c4e4977)},
5506 /* 119 */ {9, 0.1450364656948130, CNST_LIMB(0x4269541d1ff01337), CNST_LIMB(0xed68b23033c3637e)},
5507 /* 120 */ {9, 0.1447829506139581, CNST_LIMB(0x479b38e478000000), CNST_LIMB(0xc99cf624e50549c5)},
5508 /* 121 */ {9, 0.1445324131589439, CNST_LIMB(0x4d28cb56c33fa539), CNST_LIMB(0xa8adf7ae45e7577b)},
5509 /* 122 */ {9, 0.1442847926987864, CNST_LIMB(0x5317871fa13aba00), CNST_LIMB(0x8a5bc740b1c113e5)},
5510 /* 123 */ {9, 0.1440400303421672, CNST_LIMB(0x596d2f44de9fa71b), CNST_LIMB(0x6e6c7efb81cfbb9b)},
5511 /* 124 */ {9, 0.1437980688733775, CNST_LIMB(0x602fd125c47c0000), CNST_LIMB(0x54aba5c5cada5f10)},
5512 /* 125 */ {9, 0.1435588526911310, CNST_LIMB(0x6765c793fa10079d), CNST_LIMB(0x3ce9a36f23c0fc90)},
5513 /* 126 */ {9, 0.1433223277500932, CNST_LIMB(0x6f15be069b847e00), CNST_LIMB(0x26fb43de2c8cd2a8)},
5514 /* 127 */ {9, 0.1430884415049874, CNST_LIMB(0x7746b3e82a77047f), CNST_LIMB(0x12b94793db8486a1)},
5515 /* 128 */ {9, 0.1428571428571428, CNST_LIMB(0x7), CNST_LIMB(0x0)},
5516 /* 129 */ {9, 0.1426283821033600, CNST_LIMB(0x894953f7ea890481), CNST_LIMB(0xdd5deca404c0156d)},
5517 /* 130 */ {9, 0.1424021108869747, CNST_LIMB(0x932abffea4848200), CNST_LIMB(0xbd51373330291de0)},
5518 /* 131 */ {9, 0.1421782821510107, CNST_LIMB(0x9dacb687d3d6a163), CNST_LIMB(0x9fa4025d66f23085)},
5519 /* 132 */ {9, 0.1419568500933153, CNST_LIMB(0xa8d8102a44840000), CNST_LIMB(0x842530ee2db4949d)},
5520 /* 133 */ {9, 0.1417377701235801, CNST_LIMB(0xb4b60f9d140541e5), CNST_LIMB(0x6aa7f2766b03dc25)},
5521 /* 134 */ {9, 0.1415209988221527, CNST_LIMB(0xc15065d4856e4600), CNST_LIMB(0x53035ba7ebf32e8d)},
5522 /* 135 */ {9, 0.1413064939005528, CNST_LIMB(0xceb1363f396d23c7), CNST_LIMB(0x3d12091fc9fb4914)},
5523 /* 136 */ {9, 0.1410942141636095, CNST_LIMB(0xdce31b2488000000), CNST_LIMB(0x28b1cb81b1ef1849)},
5524 /* 137 */ {9, 0.1408841194731412, CNST_LIMB(0xebf12a24bca135c9), CNST_LIMB(0x15c35be67ae3e2c9)},
5525 /* 138 */ {9, 0.1406761707131039, CNST_LIMB(0xfbe6f8dbf88f4a00), CNST_LIMB(0x42a17bd09be1ff0)},
5526 /* 139 */ {8, 0.1404703297561400, CNST_LIMB(0x1ef156c084ce761), CNST_LIMB(0x8bf461f03cf0bbf)},
5527 /* 140 */ {8, 0.1402665594314587, CNST_LIMB(0x20c4e3b94a10000), CNST_LIMB(0xf3fbb43f68a32d05)},
5528 /* 141 */ {8, 0.1400648234939879, CNST_LIMB(0x22b0695a08ba421), CNST_LIMB(0xd84f44c48564dc19)},
5529 /* 142 */ {8, 0.1398650865947379, CNST_LIMB(0x24b4f35d7a4c100), CNST_LIMB(0xbe58ebcce7956abe)},
5530 /* 143 */ {8, 0.1396673142523192, CNST_LIMB(0x26d397284975781), CNST_LIMB(0xa5fac463c7c134b7)},
5531 /* 144 */ {8, 0.1394714728255649, CNST_LIMB(0x290d74100000000), CNST_LIMB(0x8f19241e28c7d757)},
5532 /* 145 */ {8, 0.1392775294872041, CNST_LIMB(0x2b63b3a37866081), CNST_LIMB(0x799a6d046c0ae1ae)},
5533 /* 146 */ {8, 0.1390854521985406, CNST_LIMB(0x2dd789f4d894100), CNST_LIMB(0x6566e37d746a9e40)},
5534 /* 147 */ {8, 0.1388952096850913, CNST_LIMB(0x306a35e51b58721), CNST_LIMB(0x526887dbfb5f788f)},
5535 /* 148 */ {8, 0.1387067714131417, CNST_LIMB(0x331d01712e10000), CNST_LIMB(0x408af3382b8efd3d)},
5536 /* 149 */ {8, 0.1385201075671774, CNST_LIMB(0x35f14200a827c61), CNST_LIMB(0x2fbb374806ec05f1)},
5537 /* 150 */ {8, 0.1383351890281539, CNST_LIMB(0x38e858b62216100), CNST_LIMB(0x1fe7c0f0afce87fe)},
5538 /* 151 */ {8, 0.1381519873525671, CNST_LIMB(0x3c03b2c13176a41), CNST_LIMB(0x11003d517540d32e)},
5539 /* 152 */ {8, 0.1379704747522905, CNST_LIMB(0x3f44c9b21000000), CNST_LIMB(0x2f5810f98eff0dc)},
5540 /* 153 */ {8, 0.1377906240751463, CNST_LIMB(0x42ad23cef3113c1), CNST_LIMB(0xeb72e35e7840d910)},
5541 /* 154 */ {8, 0.1376124087861776, CNST_LIMB(0x463e546b19a2100), CNST_LIMB(0xd27de19593dc3614)},
5542 /* 155 */ {8, 0.1374358029495937, CNST_LIMB(0x49f9fc3f96684e1), CNST_LIMB(0xbaf391fd3e5e6fc2)},
5543 /* 156 */ {8, 0.1372607812113589, CNST_LIMB(0x4de1c9c5dc10000), CNST_LIMB(0xa4bd38c55228c81d)},
5544 /* 157 */ {8, 0.1370873187823978, CNST_LIMB(0x51f77994116d2a1), CNST_LIMB(0x8fc5a8de8e1de782)},
5545 /* 158 */ {8, 0.1369153914223921, CNST_LIMB(0x563cd6bb3398100), CNST_LIMB(0x7bf9265bea9d3a3b)},
5546 /* 159 */ {8, 0.1367449754241439, CNST_LIMB(0x5ab3bb270beeb01), CNST_LIMB(0x69454b325983dccd)},
5547 /* 160 */ {8, 0.1365760475984821, CNST_LIMB(0x5f5e10000000000), CNST_LIMB(0x5798ee2308c39df9)},
5548 /* 161 */ {8, 0.1364085852596902, CNST_LIMB(0x643dce0ec16f501), CNST_LIMB(0x46e40ba0fa66a753)},
5549 /* 162 */ {8, 0.1362425662114337, CNST_LIMB(0x6954fe21e3e8100), CNST_LIMB(0x3717b0870b0db3a7)},
5550 /* 163 */ {8, 0.1360779687331669, CNST_LIMB(0x6ea5b9755f440a1), CNST_LIMB(0x2825e6775d11cdeb)},
5551 /* 164 */ {8, 0.1359147715670014, CNST_LIMB(0x74322a1c0410000), CNST_LIMB(0x1a01a1c09d1b4dac)},
5552 /* 165 */ {8, 0.1357529539050150, CNST_LIMB(0x79fc8b6ae8a46e1), CNST_LIMB(0xc9eb0a8bebc8f3e)},
5553 /* 166 */ {8, 0.1355924953769863, CNST_LIMB(0x80072a66d512100), CNST_LIMB(0xffe357ff59e6a004)},
5554 /* 167 */ {8, 0.1354333760385373, CNST_LIMB(0x86546633b42b9c1), CNST_LIMB(0xe7dfd1be05fa61a8)},
5555 /* 168 */ {8, 0.1352755763596663, CNST_LIMB(0x8ce6b0861000000), CNST_LIMB(0xd11ed6fc78f760e5)},
5556 /* 169 */ {8, 0.1351190772136599, CNST_LIMB(0x93c08e16a022441), CNST_LIMB(0xbb8db609dd29ebfe)},
5557 /* 170 */ {8, 0.1349638598663645, CNST_LIMB(0x9ae49717f026100), CNST_LIMB(0xa71aec8d1813d532)},
5558 /* 171 */ {8, 0.1348099059658079, CNST_LIMB(0xa25577ae24c1a61), CNST_LIMB(0x93b612a9f20fbc02)},
5559 /* 172 */ {8, 0.1346571975321549, CNST_LIMB(0xaa15f068e610000), CNST_LIMB(0x814fc7b19a67d317)},
5560 /* 173 */ {8, 0.1345057169479844, CNST_LIMB(0xb228d6bf7577921), CNST_LIMB(0x6fd9a03f2e0a4b7c)},
5561 /* 174 */ {8, 0.1343554469488779, CNST_LIMB(0xba91158ef5c4100), CNST_LIMB(0x5f4615a38d0d316e)},
5562 /* 175 */ {8, 0.1342063706143054, CNST_LIMB(0xc351ad9aec0b681), CNST_LIMB(0x4f8876863479a286)},
5563 /* 176 */ {8, 0.1340584713587980, CNST_LIMB(0xcc6db6100000000), CNST_LIMB(0x4094d8a3041b60eb)},
5564 /* 177 */ {8, 0.1339117329233981, CNST_LIMB(0xd5e85d09025c181), CNST_LIMB(0x32600b8ed883a09b)},
5565 /* 178 */ {8, 0.1337661393673756, CNST_LIMB(0xdfc4e816401c100), CNST_LIMB(0x24df8c6eb4b6d1f1)},
5566 /* 179 */ {8, 0.1336216750601996, CNST_LIMB(0xea06b4c72947221), CNST_LIMB(0x18097a8ee151acef)},
5567 /* 180 */ {8, 0.1334783246737591, CNST_LIMB(0xf4b139365210000), CNST_LIMB(0xbd48cc8ec1cd8e3)},
5568 /* 181 */ {8, 0.1333360731748201, CNST_LIMB(0xffc80497d520961), CNST_LIMB(0x3807a8d67485fb)},
5569 /* 182 */ {8, 0.1331949058177136, CNST_LIMB(0x10b4ebfca1dee100), CNST_LIMB(0xea5768860b62e8d8)},
5570 /* 183 */ {8, 0.1330548081372441, CNST_LIMB(0x117492de921fc141), CNST_LIMB(0xd54faf5b635c5005)},
5571 /* 184 */ {8, 0.1329157659418126, CNST_LIMB(0x123bb2ce41000000), CNST_LIMB(0xc14a56233a377926)},
5572 /* 185 */ {8, 0.1327777653067443, CNST_LIMB(0x130a8b6157bdecc1), CNST_LIMB(0xae39a88db7cd329f)},
5573 /* 186 */ {8, 0.1326407925678156, CNST_LIMB(0x13e15dede0e8a100), CNST_LIMB(0x9c10bde69efa7ab6)},
5574 /* 187 */ {8, 0.1325048343149731, CNST_LIMB(0x14c06d941c0ca7e1), CNST_LIMB(0x8ac36c42a2836497)},
5575 /* 188 */ {8, 0.1323698773862368, CNST_LIMB(0x15a7ff487a810000), CNST_LIMB(0x7a463c8b84f5ef67)},
5576 /* 189 */ {8, 0.1322359088617821, CNST_LIMB(0x169859ddc5c697a1), CNST_LIMB(0x6a8e5f5ad090fd4b)},
5577 /* 190 */ {8, 0.1321029160581950, CNST_LIMB(0x1791c60f6fed0100), CNST_LIMB(0x5b91a2943596fc56)},
5578 /* 191 */ {8, 0.1319708865228925, CNST_LIMB(0x18948e8c0e6fba01), CNST_LIMB(0x4d4667b1c468e8f0)},
5579 /* 192 */ {8, 0.1318398080287045, CNST_LIMB(0x19a1000000000000), CNST_LIMB(0x3fa39ab547994daf)},
5580 /* 193 */ {8, 0.1317096685686114, CNST_LIMB(0x1ab769203dafc601), CNST_LIMB(0x32a0a9b2faee1e2a)},
5581 /* 194 */ {8, 0.1315804563506306, CNST_LIMB(0x1bd81ab557f30100), CNST_LIMB(0x26357ceac0e96962)},
5582 /* 195 */ {8, 0.1314521597928493, CNST_LIMB(0x1d0367a69fed1ba1), CNST_LIMB(0x1a5a6f65caa5859e)},
5583 /* 196 */ {8, 0.1313247675185968, CNST_LIMB(0x1e39a5057d810000), CNST_LIMB(0xf08480f672b4e86)},
5584 /* 197 */ {8, 0.1311982683517524, CNST_LIMB(0x1f7b2a18f29ac3e1), CNST_LIMB(0x4383340615612ca)},
5585 /* 198 */ {8, 0.1310726513121843, CNST_LIMB(0x20c850694c2aa100), CNST_LIMB(0xf3c77969ee4be5a2)},
5586 /* 199 */ {8, 0.1309479056113158, CNST_LIMB(0x222173cc014980c1), CNST_LIMB(0xe00993cc187c5ec9)},
5587 /* 200 */ {8, 0.1308240206478128, CNST_LIMB(0x2386f26fc1000000), CNST_LIMB(0xcd2b297d889bc2b6)},
5588 /* 201 */ {8, 0.1307009860033912, CNST_LIMB(0x24f92ce8af296d41), CNST_LIMB(0xbb214d5064862b22)},
5589 /* 202 */ {8, 0.1305787914387386, CNST_LIMB(0x2678863cd0ece100), CNST_LIMB(0xa9e1a7ca7ea10e20)},
5590 /* 203 */ {8, 0.1304574268895465, CNST_LIMB(0x280563f0a9472d61), CNST_LIMB(0x99626e72b39ea0cf)},
5591 /* 204 */ {8, 0.1303368824626505, CNST_LIMB(0x29a02e1406210000), CNST_LIMB(0x899a5ba9c13fafd9)},
5592 /* 205 */ {8, 0.1302171484322746, CNST_LIMB(0x2b494f4efe6d2e21), CNST_LIMB(0x7a80a705391e96ff)},
5593 /* 206 */ {8, 0.1300982152363760, CNST_LIMB(0x2d0134ef21cbc100), CNST_LIMB(0x6c0cfe23de23042a)},
5594 /* 207 */ {8, 0.1299800734730872, CNST_LIMB(0x2ec84ef4da2ef581), CNST_LIMB(0x5e377df359c944dd)},
5595 /* 208 */ {8, 0.1298627138972530, CNST_LIMB(0x309f102100000000), CNST_LIMB(0x50f8ac5fc8f53985)},
5596 /* 209 */ {8, 0.1297461274170591, CNST_LIMB(0x3285ee02a1420281), CNST_LIMB(0x44497266278e35b7)},
5597 /* 210 */ {8, 0.1296303050907487, CNST_LIMB(0x347d6104fc324100), CNST_LIMB(0x382316831f7ee175)},
5598 /* 211 */ {8, 0.1295152381234257, CNST_LIMB(0x3685e47dade53d21), CNST_LIMB(0x2c7f377833b8946e)},
5599 /* 212 */ {8, 0.1294009178639407, CNST_LIMB(0x389ff6bb15610000), CNST_LIMB(0x2157c761ab4163ef)},
5600 /* 213 */ {8, 0.1292873358018581, CNST_LIMB(0x3acc1912ebb57661), CNST_LIMB(0x16a7071803cc49a9)},
5601 /* 214 */ {8, 0.1291744835645007, CNST_LIMB(0x3d0acff111946100), CNST_LIMB(0xc6781d80f8224fc)},
5602 /* 215 */ {8, 0.1290623529140715, CNST_LIMB(0x3f5ca2e692eaf841), CNST_LIMB(0x294092d370a900b)},
5603 /* 216 */ {8, 0.1289509357448472, CNST_LIMB(0x41c21cb8e1000000), CNST_LIMB(0xf24f62335024a295)},
5604 /* 217 */ {8, 0.1288402240804449, CNST_LIMB(0x443bcb714399a5c1), CNST_LIMB(0xe03b98f103fad6d2)},
5605 /* 218 */ {8, 0.1287302100711567, CNST_LIMB(0x46ca406c81af2100), CNST_LIMB(0xcee3d32cad2a9049)},
5606 /* 219 */ {8, 0.1286208859913518, CNST_LIMB(0x496e106ac22aaae1), CNST_LIMB(0xbe3f9df9277fdada)},
5607 /* 220 */ {8, 0.1285122442369443, CNST_LIMB(0x4c27d39fa5410000), CNST_LIMB(0xae46f0d94c05e933)},
5608 /* 221 */ {8, 0.1284042773229231, CNST_LIMB(0x4ef825c296e43ca1), CNST_LIMB(0x9ef2280fb437a33d)},
5609 /* 222 */ {8, 0.1282969778809442, CNST_LIMB(0x51dfa61f5ad88100), CNST_LIMB(0x9039ff426d3f284b)},
5610 /* 223 */ {8, 0.1281903386569819, CNST_LIMB(0x54def7a6d2f16901), CNST_LIMB(0x82178c6d6b51f8f4)},
5611 /* 224 */ {8, 0.1280843525090381, CNST_LIMB(0x57f6c10000000000), CNST_LIMB(0x74843b1ee4c1e053)},
5612 /* 225 */ {8, 0.1279790124049077, CNST_LIMB(0x5b27ac993df97701), CNST_LIMB(0x6779c7f90dc42f48)},
5613 /* 226 */ {8, 0.1278743114199984, CNST_LIMB(0x5e7268b9bbdf8100), CNST_LIMB(0x5af23c74f9ad9fe9)},
5614 /* 227 */ {8, 0.1277702427352035, CNST_LIMB(0x61d7a7932ff3d6a1), CNST_LIMB(0x4ee7eae2acdc617e)},
5615 /* 228 */ {8, 0.1276667996348261, CNST_LIMB(0x65581f53c8c10000), CNST_LIMB(0x43556aa2ac262a0b)},
5616 /* 229 */ {8, 0.1275639755045533, CNST_LIMB(0x68f48a385b8320e1), CNST_LIMB(0x3835949593b8ddd1)},
5617 /* 230 */ {8, 0.1274617638294791, CNST_LIMB(0x6cada69ed07c2100), CNST_LIMB(0x2d837fbe78458762)},
5618 /* 231 */ {8, 0.1273601581921741, CNST_LIMB(0x70843718cdbf27c1), CNST_LIMB(0x233a7e150a54a555)},
5619 /* 232 */ {8, 0.1272591522708010, CNST_LIMB(0x7479027ea1000000), CNST_LIMB(0x19561984a50ff8fe)},
5620 /* 233 */ {8, 0.1271587398372755, CNST_LIMB(0x788cd40268f39641), CNST_LIMB(0xfd211159fe3490f)},
5621 /* 234 */ {8, 0.1270589147554692, CNST_LIMB(0x7cc07b437ecf6100), CNST_LIMB(0x6aa563e655033e3)},
5622 /* 235 */ {8, 0.1269596709794558, CNST_LIMB(0x8114cc6220762061), CNST_LIMB(0xfbb614b3f2d3b14c)},
5623 /* 236 */ {8, 0.1268610025517973, CNST_LIMB(0x858aa0135be10000), CNST_LIMB(0xeac0f8837fb05773)},
5624 /* 237 */ {8, 0.1267629036018709, CNST_LIMB(0x8a22d3b53c54c321), CNST_LIMB(0xda6e4c10e8615ca5)},
5625 /* 238 */ {8, 0.1266653683442337, CNST_LIMB(0x8ede496339f34100), CNST_LIMB(0xcab755a8d01fa67f)},
5626 /* 239 */ {8, 0.1265683910770258, CNST_LIMB(0x93bde80aec3a1481), CNST_LIMB(0xbb95a9ae71aa3e0c)},
5627 /* 240 */ {8, 0.1264719661804097, CNST_LIMB(0x98c29b8100000000), CNST_LIMB(0xad0326c296b4f529)},
5628 /* 241 */ {8, 0.1263760881150453, CNST_LIMB(0x9ded549671832381), CNST_LIMB(0x9ef9f21eed31b7c1)},
5629 /* 242 */ {8, 0.1262807514205999, CNST_LIMB(0xa33f092e0b1ac100), CNST_LIMB(0x91747422be14b0b2)},
5630 /* 243 */ {8, 0.1261859507142915, CNST_LIMB(0xa8b8b452291fe821), CNST_LIMB(0x846d550e37b5063d)},
5631 /* 244 */ {8, 0.1260916806894653, CNST_LIMB(0xae5b564ac3a10000), CNST_LIMB(0x77df79e9a96c06f6)},
5632 /* 245 */ {8, 0.1259979361142023, CNST_LIMB(0xb427f4b3be74c361), CNST_LIMB(0x6bc6019636c7d0c2)},
5633 /* 246 */ {8, 0.1259047118299582, CNST_LIMB(0xba1f9a938041e100), CNST_LIMB(0x601c4205aebd9e47)},
5634 /* 247 */ {8, 0.1258120027502338, CNST_LIMB(0xc0435871d1110f41), CNST_LIMB(0x54ddc59756f05016)},
5635 /* 248 */ {8, 0.1257198038592741, CNST_LIMB(0xc694446f01000000), CNST_LIMB(0x4a0648979c838c18)},
5636 /* 249 */ {8, 0.1256281102107963, CNST_LIMB(0xcd137a5b57ac3ec1), CNST_LIMB(0x3f91b6e0bb3a053d)},
5637 /* 250 */ {8, 0.1255369169267456, CNST_LIMB(0xd3c21bcecceda100), CNST_LIMB(0x357c299a88ea76a5)},
5638 /* 251 */ {8, 0.1254462191960791, CNST_LIMB(0xdaa150410b788de1), CNST_LIMB(0x2bc1e517aecc56e3)},
5639 /* 252 */ {8, 0.1253560122735751, CNST_LIMB(0xe1b24521be010000), CNST_LIMB(0x225f56ceb3da9f5d)},
5640 /* 253 */ {8, 0.1252662914786691, CNST_LIMB(0xe8f62df12777c1a1), CNST_LIMB(0x1951136d53ad63ac)},
5641 /* 254 */ {8, 0.1251770521943144, CNST_LIMB(0xf06e445906fc0100), CNST_LIMB(0x1093d504b3cd7d93)},
5642 /* 255 */ {8, 0.1250882898658681, CNST_LIMB(0xf81bc845c81bf801), CNST_LIMB(0x824794d1ec1814f)},
5643 };
5644 #endif
5645
5646 static int
5647 #if __STDC__
__gmp_assert_fail(const char * filename,int linenum,const char * expr)5648 __gmp_assert_fail (const char *filename, int linenum,
5649 const char *expr)
5650 #else
5651 __gmp_assert_fail (filename, linenum, expr)
5652 char *filename;
5653 int linenum;
5654 char *expr;
5655 #endif
5656 {
5657 #if 0
5658 if (filename != NULL && filename[0] != '\0')
5659 {
5660 fprintf (stderr, "%s:", filename);
5661 if (linenum != -1)
5662 fprintf (stderr, "%d: ", linenum);
5663 }
5664
5665 fprintf (stderr, "GNU MP assertion failed: %s\n", expr);
5666 abort();
5667 #endif
5668
5669 /*NOTREACHED*/
5670 return 0;
5671 }
5672
5673 /* __clz_tab -- support for longlong.h */
5674
5675 const
5676 unsigned char __clz_tab[] =
5677 {
5678 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5679 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
5680 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
5681 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
5682 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
5683 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
5684 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
5685 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
5686 };
5687
5688
5689 /****************************************/
5690
5691 #ifndef HAVE_ALLOCA
5692
5693 typedef struct gmp_tmp_stack tmp_stack;
5694
5695 THREAD_LOCAL_DECL(static uintptr_t max_total_allocation);
5696 THREAD_LOCAL_DECL(static uintptr_t current_total_allocation);
5697
5698 THREAD_LOCAL_DECL(static tmp_stack gmp_tmp_xxx);
5699 THREAD_LOCAL_DECL(static tmp_stack *gmp_tmp_current = 0);
5700
5701 /* The rounded size of the header of each allocation block. */
5702 #define HSIZ ((sizeof (tmp_stack) + __TMP_ALIGN - 1) & -__TMP_ALIGN)
5703
5704 /* Allocate a block of exactly <size> bytes. This should only be called
5705 through the TMP_ALLOC macro, which takes care of rounding/alignment. */
5706 void *
5707 #if __STDC__
__gmp_tmp_alloc(unsigned long size)5708 __gmp_tmp_alloc (unsigned long size)
5709 #else
5710 __gmp_tmp_alloc (size)
5711 unsigned long size;
5712 #endif
5713 {
5714 void *that;
5715
5716 if (size > (char *) gmp_tmp_current->end - (char *) gmp_tmp_current->alloc_point)
5717 {
5718 void *chunk;
5719 tmp_stack *header;
5720 unsigned long chunk_size;
5721 unsigned long now;
5722
5723 /* Allocate a chunk that makes the total current allocation somewhat
5724 larger than the maximum allocation ever. If size is very large, we
5725 allocate that much. */
5726
5727 now = current_total_allocation + size;
5728 if (now > max_total_allocation)
5729 {
5730 /* We need more temporary memory than ever before. Increase
5731 for future needs. */
5732 now = now * 3 / 2;
5733 chunk_size = now - current_total_allocation + HSIZ;
5734 current_total_allocation = now;
5735 max_total_allocation = current_total_allocation;
5736 }
5737 else
5738 {
5739 chunk_size = max_total_allocation - current_total_allocation + HSIZ;
5740 current_total_allocation = max_total_allocation;
5741 }
5742
5743 chunk = MALLOC (chunk_size);
5744 header = (tmp_stack *) chunk;
5745 header->end = (char *) chunk + chunk_size;
5746 header->alloc_point = (char *) chunk + HSIZ;
5747 header->prev = gmp_tmp_current;
5748 gmp_tmp_current = header;
5749 }
5750
5751 that = gmp_tmp_current->alloc_point;
5752 gmp_tmp_current->alloc_point = (char *) that + size;
5753 return that;
5754 }
5755
5756 /* Typically called at function entry. <mark> is assigned so that
5757 __gmp_tmp_free can later be used to reclaim all subsequently allocated
5758 storage. */
5759 void
5760 #if __STDC__
__gmp_tmp_mark(tmp_marker * mark)5761 __gmp_tmp_mark (tmp_marker *mark)
5762 #else
5763 __gmp_tmp_mark (mark)
5764 tmp_marker *mark;
5765 #endif
5766 {
5767 mark->which_chunk = gmp_tmp_current;
5768 mark->alloc_point = gmp_tmp_current->alloc_point;
5769 }
5770
5771 /* Free everything allocated since <mark> was assigned by __gmp_tmp_mark */
5772 void
5773 #if __STDC__
__gmp_tmp_free(tmp_marker * mark)5774 __gmp_tmp_free (tmp_marker *mark)
5775 #else
5776 __gmp_tmp_free (mark)
5777 tmp_marker *mark;
5778 #endif
5779 {
5780 while (mark->which_chunk != gmp_tmp_current)
5781 {
5782 tmp_stack *tmp;
5783
5784 tmp = gmp_tmp_current;
5785 gmp_tmp_current = tmp->prev;
5786 current_total_allocation -= (((char *) (tmp->end) - (char *) tmp) - HSIZ);
5787 FREE (tmp, (char *) tmp->end - (char *) tmp);
5788 }
5789 gmp_tmp_current->alloc_point = mark->alloc_point;
5790 }
5791
scheme_init_gmp_places()5792 void scheme_init_gmp_places() {
5793 gmp_tmp_xxx.end = &gmp_tmp_xxx;
5794 gmp_tmp_xxx.alloc_point = &gmp_tmp_xxx;
5795 gmp_tmp_xxx.prev = 0;
5796 gmp_tmp_current = &gmp_tmp_xxx;
5797 REGISTER_SO(gmp_mem_pool);
5798 }
5799
scheme_gmp_tls_init(intptr_t * s)5800 void scheme_gmp_tls_init(intptr_t *s)
5801 {
5802 s[0] = 0;
5803 s[1] = 0;
5804 s[2] = (intptr_t)&gmp_tmp_xxx;
5805 ((tmp_marker *)(s + 3))->which_chunk = &gmp_tmp_xxx;
5806 ((tmp_marker *)(s + 3))->alloc_point = &gmp_tmp_xxx;
5807 }
5808
scheme_gmp_tls_load(intptr_t * s)5809 void *scheme_gmp_tls_load(intptr_t *s)
5810 {
5811 s[0] = (intptr_t)current_total_allocation;
5812 s[1] = (intptr_t)max_total_allocation;
5813 s[2] = (intptr_t)gmp_tmp_current;
5814 return gmp_mem_pool;
5815 }
5816
scheme_gmp_tls_unload(intptr_t * s,void * data)5817 void scheme_gmp_tls_unload(intptr_t *s, void *data)
5818 {
5819 current_total_allocation = (uintptr_t)s[0];
5820 max_total_allocation = (uintptr_t)s[1];
5821 gmp_tmp_current = (tmp_stack *)s[2];
5822 s[0] = 0;
5823 gmp_mem_pool = data;
5824 }
5825
scheme_gmp_tls_snapshot(intptr_t * s,intptr_t * save)5826 void scheme_gmp_tls_snapshot(intptr_t *s, intptr_t *save)
5827 {
5828 save[0] = s[3];
5829 save[1] = s[4];
5830 __gmp_tmp_mark((tmp_marker *)(s + 3));
5831 }
5832
scheme_gmp_tls_restore_snapshot(intptr_t * s,void * data,intptr_t * save,int do_free)5833 void scheme_gmp_tls_restore_snapshot(intptr_t *s, void *data, intptr_t *save, int do_free)
5834 {
5835 /* silence gcc "may be used uninitialized in this function" warnings */
5836 intptr_t other[6] = {0,0,0,0,0,0};
5837 void *other_data;
5838
5839 if (do_free == 2) {
5840 other_data = scheme_gmp_tls_load(other);
5841 scheme_gmp_tls_unload(s, data);
5842 } else
5843 other_data = NULL;
5844
5845 if (do_free)
5846 __gmp_tmp_free((tmp_marker *)(s + 3));
5847
5848 if (save) {
5849 s[3] = save[0];
5850 s[4] = save[1];
5851
5852 }
5853
5854 if (do_free == 2) {
5855 data = scheme_gmp_tls_load(s);
5856 scheme_gmp_tls_unload(other, other_data);
5857 }
5858 }
5859
5860 #else
5861
scheme_gmp_tls_init(intptr_t * s)5862 void scheme_gmp_tls_init(intptr_t *s)
5863 {
5864 }
5865
scheme_gmp_tls_load(intptr_t * s)5866 void scheme_gmp_tls_load(intptr_t *s)
5867 {
5868 }
5869
scheme_gmp_tls_unload(intptr_t * s)5870 void scheme_gmp_tls_unload(intptr_t *s)
5871 {
5872 }
5873
5874 #endif
5875