1 /* Include file for internal GNU MP types and definitions.
2
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
5
6 Copyright 1991-2018 Free Software Foundation, Inc.
7
8 This file is part of the GNU MP Library.
9
10 The GNU MP Library is free software; you can redistribute it and/or modify
11 it under the terms of either:
12
13 * the GNU Lesser General Public License as published by the Free
14 Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
16
17 or
18
19 * the GNU General Public License as published by the Free Software
20 Foundation; either version 2 of the License, or (at your option) any
21 later version.
22
23 or both in parallel, as here.
24
25 The GNU MP Library is distributed in the hope that it will be useful, but
26 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28 for more details.
29
30 You should have received copies of the GNU General Public License and the
31 GNU Lesser General Public License along with the GNU MP Library. If not,
32 see https://www.gnu.org/licenses/. */
33
34
35 /* __GMP_DECLSPEC must be given on any global data that will be accessed
36 from outside libgmp, meaning from the test or development programs, or
37 from libgmpxx. Failing to do this will result in an incorrect address
38 being used for the accesses. On functions __GMP_DECLSPEC makes calls
39 from outside libgmp more efficient, but they'll still work fine without
40 it. */
41
42
43 #ifndef __GMP_IMPL_H__
44 #define __GMP_IMPL_H__
45
46 #if defined _CRAY
47 #include <intrinsics.h> /* for _popcnt */
48 #endif
49
50 /* For INT_MAX, etc. We used to avoid it because of a bug (on solaris,
51 gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
52 values (the ABI=64 values)), but it should be safe now.
53
54 On Cray vector systems, however, we need the system limits.h since sizes
55 of signed and unsigned types can differ there, depending on compiler
56 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For
57 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
58 short can be 24, 32, 46 or 64 bits, and different for ushort. */
59
60 #include <limits.h>
61
62 /* For fat.h and other fat binary stuff.
63 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
64 declared this way are only used to set function pointers in __gmpn_cpuvec,
65 they're not called directly. */
66 #define DECL_add_n(name) \
67 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
68 #define DECL_addlsh1_n(name) \
69 DECL_add_n (name)
70 #define DECL_addlsh2_n(name) \
71 DECL_add_n (name)
72 #define DECL_addmul_1(name) \
73 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
74 #define DECL_addmul_2(name) \
75 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
76 #define DECL_bdiv_dbm1c(name) \
77 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
78 #define DECL_cnd_add_n(name) \
79 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
80 #define DECL_cnd_sub_n(name) \
81 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
82 #define DECL_com(name) \
83 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
84 #define DECL_copyd(name) \
85 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
86 #define DECL_copyi(name) \
87 DECL_copyd (name)
88 #define DECL_divexact_1(name) \
89 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
90 #define DECL_divexact_by3c(name) \
91 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
92 #define DECL_divrem_1(name) \
93 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)
94 #define DECL_gcd_11(name) \
95 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_limb_t)
96 #define DECL_lshift(name) \
97 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned)
98 #define DECL_lshiftc(name) \
99 DECL_lshift (name)
100 #define DECL_mod_1(name) \
101 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
102 #define DECL_mod_1_1p(name) \
103 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [])
104 #define DECL_mod_1_1p_cps(name) \
105 __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b)
106 #define DECL_mod_1s_2p(name) \
107 DECL_mod_1_1p (name)
108 #define DECL_mod_1s_2p_cps(name) \
109 DECL_mod_1_1p_cps (name)
110 #define DECL_mod_1s_4p(name) \
111 DECL_mod_1_1p (name)
112 #define DECL_mod_1s_4p_cps(name) \
113 DECL_mod_1_1p_cps (name)
114 #define DECL_mod_34lsub1(name) \
115 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t)
116 #define DECL_modexact_1c_odd(name) \
117 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
118 #define DECL_mul_1(name) \
119 DECL_addmul_1 (name)
120 #define DECL_mul_basecase(name) \
121 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)
122 #define DECL_mullo_basecase(name) \
123 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
124 #define DECL_preinv_divrem_1(name) \
125 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)
126 #define DECL_preinv_mod_1(name) \
127 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
128 #define DECL_redc_1(name) \
129 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
130 #define DECL_redc_2(name) \
131 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
132 #define DECL_rshift(name) \
133 DECL_lshift (name)
134 #define DECL_sqr_basecase(name) \
135 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
136 #define DECL_sub_n(name) \
137 DECL_add_n (name)
138 #define DECL_sublsh1_n(name) \
139 DECL_add_n (name)
140 #define DECL_submul_1(name) \
141 DECL_addmul_1 (name)
142
143 #if ! defined (__GMP_WITHIN_CONFIGURE)
144 #include "config.h"
145 #include "gmp.h"
146 #include "gmp-mparam.h"
147 #include "fib_table.h"
148 #include "fac_table.h"
149 #include "mp_bases.h"
150 #if WANT_FAT_BINARY
151 #include "fat.h"
152 #endif
153 #endif
154
155 #if HAVE_INTTYPES_H /* for uint_least32_t */
156 # include <inttypes.h>
157 #else
158 # if HAVE_STDINT_H
159 # include <stdint.h>
160 # endif
161 #endif
162
163 #ifdef __cplusplus
164 #include <cstring> /* for strlen */
165 #include <string> /* for std::string */
166 #endif
167
168
169 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */
170 #define WANT_TMP_DEBUG 0
171 #endif
172
173 /* The following tries to get a good version of alloca. The tests are
174 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
175 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
176 be setup appropriately.
177
178 ifndef alloca - a cpp define might already exist.
179 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
180 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
181
182 GCC __builtin_alloca - preferred whenever available.
183
184 _AIX pragma - IBM compilers need a #pragma in "each module that needs to
185 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was
186 used in past versions of GMP, retained still in case it matters.
187
188 The autoconf manual says this pragma needs to be at the start of a C
189 file, apart from comments and preprocessor directives. Is that true?
190 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
191 from gmp.h.
192 */
193
194 #ifndef alloca
195 # ifdef __GNUC__
196 # define alloca __builtin_alloca
197 # else
198 # ifdef __DECC
199 # define alloca(x) __ALLOCA(x)
200 # else
201 # ifdef _MSC_VER
202 # include <malloc.h>
203 # define alloca _alloca
204 # else
205 # if HAVE_ALLOCA_H
206 # include <alloca.h>
207 # else
208 # if defined (_AIX) || defined (_IBMR2)
209 #pragma alloca
210 # else
211 # if !defined (__NetBSD__)
212 char *alloca ();
213 # else
214 # include <stdlib.h>
215 # endif
216 # endif
217 # endif
218 # endif
219 # endif
220 # endif
221 #endif
222
223
224 /* if not provided by gmp-mparam.h */
225 #ifndef GMP_LIMB_BYTES
226 #define GMP_LIMB_BYTES SIZEOF_MP_LIMB_T
227 #endif
228 #ifndef GMP_LIMB_BITS
229 #define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T)
230 #endif
231
232 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG)
233
234
235 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
236 #if HAVE_UINT_LEAST32_T
237 typedef uint_least32_t gmp_uint_least32_t;
238 #else
239 #if SIZEOF_UNSIGNED_SHORT >= 4
240 typedef unsigned short gmp_uint_least32_t;
241 #else
242 #if SIZEOF_UNSIGNED >= 4
243 typedef unsigned gmp_uint_least32_t;
244 #else
245 typedef unsigned long gmp_uint_least32_t;
246 #endif
247 #endif
248 #endif
249
250
251 /* gmp_intptr_t, for pointer to integer casts */
252 #if HAVE_INTPTR_T
253 typedef intptr_t gmp_intptr_t;
254 #else /* fallback */
255 typedef size_t gmp_intptr_t;
256 #endif
257
258
259 /* pre-inverse types for truncating division and modulo */
260 typedef struct {mp_limb_t inv32;} gmp_pi1_t;
261 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
262
263
264 /* "const" basically means a function does nothing but examine its arguments
265 and give a return value, it doesn't read or write any memory (neither
266 global nor pointed to by arguments), and has no other side-effects. This
267 is more restrictive than "pure". See info node "(gcc)Function
268 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
269 this off when trying to write timing loops. */
270 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
271 #define ATTRIBUTE_CONST __attribute__ ((const))
272 #else
273 #define ATTRIBUTE_CONST
274 #endif
275
276 #if HAVE_ATTRIBUTE_NORETURN
277 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn))
278 #else
279 #define ATTRIBUTE_NORETURN
280 #endif
281
282 /* "malloc" means a function behaves like malloc in that the pointer it
283 returns doesn't alias anything. */
284 #if HAVE_ATTRIBUTE_MALLOC
285 #define ATTRIBUTE_MALLOC __attribute__ ((malloc))
286 #else
287 #define ATTRIBUTE_MALLOC
288 #endif
289
290
291 #if ! HAVE_STRCHR
292 #define strchr(s,c) index(s,c)
293 #endif
294
295 #if ! HAVE_MEMSET
296 #define memset(p, c, n) \
297 do { \
298 unsigned char *__memset__p = (unsigned char *) (p); \
299 int __i; \
300 ASSERT ((n) >= 0); \
301 for (__i = 0; __i < (n); __i++) \
302 __memset__p[__i] = (c); \
303 } while (0)
304 #endif
305
306 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
307 mode. Falling back to a memcpy will give maximum portability, since it
308 works no matter whether va_list is a pointer, struct or array. */
309 #if ! defined (va_copy) && defined (__va_copy)
310 #define va_copy(dst,src) __va_copy(dst,src)
311 #endif
312 #if ! defined (va_copy)
313 #define va_copy(dst,src) \
314 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
315 #endif
316
317
318 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
319 (ie. ctlz, ctpop, cttz). */
320 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \
321 || HAVE_HOST_CPU_alphaev7
322 #define HAVE_HOST_CPU_alpha_CIX 1
323 #endif
324
325
326 #if defined (__cplusplus)
327 extern "C" {
328 #endif
329
330
331 /* Usage: TMP_DECL;
332 TMP_MARK;
333 ptr = TMP_ALLOC (bytes);
334 TMP_FREE;
335
336 Small allocations should use TMP_SALLOC, big allocations should use
337 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC.
338
339 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
340 TMP_SFREE.
341
342 TMP_DECL just declares a variable, but might be empty and so must be last
343 in a list of variables. TMP_MARK must be done before any TMP_ALLOC.
344 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a
345 TMP_MARK was made, but then no TMP_ALLOCs. */
346
347 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
348 __gmp_allocate_func doesn't already determine it. */
349 union tmp_align_t {
350 mp_limb_t l;
351 double d;
352 char *p;
353 };
354 #define __TMP_ALIGN sizeof (union tmp_align_t)
355
356 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
357 "a" must be an unsigned type.
358 This is designed for use with a compile-time constant "m".
359 The POW2 case is expected to be usual, and gcc 3.0 and up recognises
360 "(-(8*n))%8" or the like is always zero, which means the rounding up in
361 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */
362 #define ROUND_UP_MULTIPLE(a,m) \
363 (POW2_P(m) ? (a) + (-(a))%(m) \
364 : (a)+(m)-1 - (((a)+(m)-1) % (m)))
365
366 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
367 struct tmp_reentrant_t {
368 struct tmp_reentrant_t *next;
369 size_t size; /* bytes, including header */
370 };
371 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;
372 __GMP_DECLSPEC void __gmp_tmp_reentrant_free (struct tmp_reentrant_t *);
373 #endif
374
375 #if WANT_TMP_ALLOCA
376 #define TMP_SDECL
377 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
378 #define TMP_SMARK
379 #define TMP_MARK __tmp_marker = 0
380 #define TMP_SALLOC(n) alloca(n)
381 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
382 /* The peculiar stack allocation limit here is chosen for efficient asm. */
383 #define TMP_ALLOC(n) \
384 (LIKELY ((n) <= 0x7f00) ? TMP_SALLOC(n) : TMP_BALLOC(n))
385 #define TMP_SFREE
386 #define TMP_FREE \
387 do { \
388 if (UNLIKELY (__tmp_marker != 0)) \
389 __gmp_tmp_reentrant_free (__tmp_marker); \
390 } while (0)
391 #endif
392
393 #if WANT_TMP_REENTRANT
394 #define TMP_SDECL TMP_DECL
395 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
396 #define TMP_SMARK TMP_MARK
397 #define TMP_MARK __tmp_marker = 0
398 #define TMP_SALLOC(n) TMP_ALLOC(n)
399 #define TMP_BALLOC(n) TMP_ALLOC(n)
400 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
401 #define TMP_SFREE TMP_FREE
402 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker)
403 #endif
404
405 #if WANT_TMP_NOTREENTRANT
406 struct tmp_marker
407 {
408 struct tmp_stack *which_chunk;
409 void *alloc_point;
410 };
411 __GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC;
412 __GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *);
413 __GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *);
414 #define TMP_SDECL TMP_DECL
415 #define TMP_DECL struct tmp_marker __tmp_marker
416 #define TMP_SMARK TMP_MARK
417 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker)
418 #define TMP_SALLOC(n) TMP_ALLOC(n)
419 #define TMP_BALLOC(n) TMP_ALLOC(n)
420 #define TMP_ALLOC(n) \
421 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
422 #define TMP_SFREE TMP_FREE
423 #define TMP_FREE __gmp_tmp_free (&__tmp_marker)
424 #endif
425
426 #if WANT_TMP_DEBUG
427 /* See tal-debug.c for some comments. */
428 struct tmp_debug_t {
429 struct tmp_debug_entry_t *list;
430 const char *file;
431 int line;
432 };
433 struct tmp_debug_entry_t {
434 struct tmp_debug_entry_t *next;
435 void *block;
436 size_t size;
437 };
438 __GMP_DECLSPEC void __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **,
439 struct tmp_debug_t *,
440 const char *, const char *);
441 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int,
442 struct tmp_debug_t **, const char *,
443 size_t) ATTRIBUTE_MALLOC;
444 __GMP_DECLSPEC void __gmp_tmp_debug_free (const char *, int, int,
445 struct tmp_debug_t **,
446 const char *, const char *);
447 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
448 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
449 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
450 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
451 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
452 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
453 /* The marker variable is designed to provoke an uninitialized variable
454 warning from the compiler if TMP_FREE is used without a TMP_MARK.
455 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
456 these things up too. */
457 #define TMP_DECL_NAME(marker, marker_name) \
458 int marker; \
459 int __tmp_marker_inscope; \
460 const char *__tmp_marker_name = marker_name; \
461 struct tmp_debug_t __tmp_marker_struct; \
462 /* don't demand NULL, just cast a zero */ \
463 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
464 #define TMP_MARK_NAME(marker, marker_name) \
465 do { \
466 marker = 1; \
467 __tmp_marker_inscope = 1; \
468 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
469 &__tmp_marker, &__tmp_marker_struct, \
470 __tmp_marker_name, marker_name); \
471 } while (0)
472 #define TMP_SALLOC(n) TMP_ALLOC(n)
473 #define TMP_BALLOC(n) TMP_ALLOC(n)
474 #define TMP_ALLOC(size) \
475 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
476 __tmp_marker_inscope, \
477 &__tmp_marker, __tmp_marker_name, size)
478 #define TMP_FREE_NAME(marker, marker_name) \
479 do { \
480 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
481 marker, &__tmp_marker, \
482 __tmp_marker_name, marker_name); \
483 } while (0)
484 #endif /* WANT_TMP_DEBUG */
485
486
487 /* Allocating various types. */
488 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
489 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
490 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
491 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
492 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t)
493 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t)
494 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
495 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr)
496 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr)
497
498 /* It's more efficient to allocate one block than many. This is certainly
499 true of the malloc methods, but it can even be true of alloca if that
500 involves copying a chunk of stack (various RISCs), or a call to a stack
501 bounds check (mingw). In any case, when debugging keep separate blocks
502 so a redzoning malloc debugger can protect each individually. */
503 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
504 do { \
505 if (WANT_TMP_DEBUG) \
506 { \
507 (xp) = TMP_ALLOC_LIMBS (xsize); \
508 (yp) = TMP_ALLOC_LIMBS (ysize); \
509 } \
510 else \
511 { \
512 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
513 (yp) = (xp) + (xsize); \
514 } \
515 } while (0)
516 #define TMP_ALLOC_LIMBS_3(xp,xsize, yp,ysize, zp,zsize) \
517 do { \
518 if (WANT_TMP_DEBUG) \
519 { \
520 (xp) = TMP_ALLOC_LIMBS (xsize); \
521 (yp) = TMP_ALLOC_LIMBS (ysize); \
522 (zp) = TMP_ALLOC_LIMBS (zsize); \
523 } \
524 else \
525 { \
526 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize) + (zsize)); \
527 (yp) = (xp) + (xsize); \
528 (zp) = (yp) + (ysize); \
529 } \
530 } while (0)
531
532 /* From gmp.h, nicer names for internal use. */
533 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str)
534 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size)
535 #define LIKELY(cond) __GMP_LIKELY(cond)
536 #define UNLIKELY(cond) __GMP_UNLIKELY(cond)
537
538 #define ABS(x) ((x) >= 0 ? (x) : -(x))
539 #define NEG_CAST(T,x) (- (__GMP_CAST (T, (x) + 1) - 1))
540 #define ABS_CAST(T,x) ((x) >= 0 ? __GMP_CAST (T, x) : NEG_CAST (T,x))
541 #undef MIN
542 #define MIN(l,o) ((l) < (o) ? (l) : (o))
543 #undef MAX
544 #define MAX(h,i) ((h) > (i) ? (h) : (i))
545 #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
546
547 /* Field access macros. */
548 #define SIZ(x) ((x)->_mp_size)
549 #define ABSIZ(x) ABS (SIZ (x))
550 #define PTR(x) ((x)->_mp_d)
551 #define EXP(x) ((x)->_mp_exp)
552 #define PREC(x) ((x)->_mp_prec)
553 #define ALLOC(x) ((x)->_mp_alloc)
554 #define NUM(x) mpq_numref(x)
555 #define DEN(x) mpq_denref(x)
556
557 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
558 then that lowest one bit must have been the only bit set. n==0 will
559 return true though, so avoid that. */
560 #define POW2_P(n) (((n) & ((n) - 1)) == 0)
561
562 /* This is intended for constant THRESHOLDs only, where the compiler
563 can completely fold the result. */
564 #define LOG2C(n) \
565 (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
566 ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
567 ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
568 ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
569
570 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0)
571
572 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
573 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc
574 treats the plain decimal values in <limits.h> as signed. */
575 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
576 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
577 #define USHRT_HIGHBIT (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))
578 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
579
580 #if __GMP_MP_SIZE_T_INT
581 #define MP_SIZE_T_MAX INT_MAX
582 #define MP_SIZE_T_MIN INT_MIN
583 #else
584 #define MP_SIZE_T_MAX LONG_MAX
585 #define MP_SIZE_T_MIN LONG_MIN
586 #endif
587
588 /* mp_exp_t is the same as mp_size_t */
589 #define MP_EXP_T_MAX MP_SIZE_T_MAX
590 #define MP_EXP_T_MIN MP_SIZE_T_MIN
591
592 #define LONG_HIGHBIT LONG_MIN
593 #define INT_HIGHBIT INT_MIN
594 #define SHRT_HIGHBIT SHRT_MIN
595
596
597 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
598
599 #if GMP_NAIL_BITS == 0
600 #define GMP_NAIL_LOWBIT CNST_LIMB(0)
601 #else
602 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS)
603 #endif
604
605 #if GMP_NAIL_BITS != 0
606 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using
607 code that has not yet been qualified. */
608
609 #undef DC_DIV_QR_THRESHOLD
610 #define DC_DIV_QR_THRESHOLD 50
611
612 #undef DIVREM_1_NORM_THRESHOLD
613 #undef DIVREM_1_UNNORM_THRESHOLD
614 #undef MOD_1_NORM_THRESHOLD
615 #undef MOD_1_UNNORM_THRESHOLD
616 #undef USE_PREINV_DIVREM_1
617 #undef DIVREM_2_THRESHOLD
618 #undef DIVEXACT_1_THRESHOLD
619 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
620 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
621 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
622 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
623 #define USE_PREINV_DIVREM_1 0 /* no preinv */
624 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */
625
626 /* mpn/generic/mul_fft.c is not nails-capable. */
627 #undef MUL_FFT_THRESHOLD
628 #undef SQR_FFT_THRESHOLD
629 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX
630 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX
631 #endif
632
633 /* Swap macros. */
634
635 #define MP_LIMB_T_SWAP(x, y) \
636 do { \
637 mp_limb_t __mp_limb_t_swap__tmp = (x); \
638 (x) = (y); \
639 (y) = __mp_limb_t_swap__tmp; \
640 } while (0)
641 #define MP_SIZE_T_SWAP(x, y) \
642 do { \
643 mp_size_t __mp_size_t_swap__tmp = (x); \
644 (x) = (y); \
645 (y) = __mp_size_t_swap__tmp; \
646 } while (0)
647
648 #define MP_PTR_SWAP(x, y) \
649 do { \
650 mp_ptr __mp_ptr_swap__tmp = (x); \
651 (x) = (y); \
652 (y) = __mp_ptr_swap__tmp; \
653 } while (0)
654 #define MP_SRCPTR_SWAP(x, y) \
655 do { \
656 mp_srcptr __mp_srcptr_swap__tmp = (x); \
657 (x) = (y); \
658 (y) = __mp_srcptr_swap__tmp; \
659 } while (0)
660
661 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
662 do { \
663 MP_PTR_SWAP (xp, yp); \
664 MP_SIZE_T_SWAP (xs, ys); \
665 } while(0)
666 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
667 do { \
668 MP_SRCPTR_SWAP (xp, yp); \
669 MP_SIZE_T_SWAP (xs, ys); \
670 } while(0)
671
672 #define MPZ_PTR_SWAP(x, y) \
673 do { \
674 mpz_ptr __mpz_ptr_swap__tmp = (x); \
675 (x) = (y); \
676 (y) = __mpz_ptr_swap__tmp; \
677 } while (0)
678 #define MPZ_SRCPTR_SWAP(x, y) \
679 do { \
680 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
681 (x) = (y); \
682 (y) = __mpz_srcptr_swap__tmp; \
683 } while (0)
684
685 #define MPQ_PTR_SWAP(x, y) \
686 do { \
687 mpq_ptr __mpq_ptr_swap__tmp = (x); \
688 (x) = (y); \
689 (y) = __mpq_ptr_swap__tmp; \
690 } while (0)
691 #define MPQ_SRCPTR_SWAP(x, y) \
692 do { \
693 mpq_srcptr __mpq_srcptr_swap__tmp = (x); \
694 (x) = (y); \
695 (y) = __mpq_srcptr_swap__tmp; \
696 } while (0)
697
698
699 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
700 but current gcc (3.0) doesn't seem to support that. */
701 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
702 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
703 __GMP_DECLSPEC extern void (*__gmp_free_func) (void *, size_t);
704
705 __GMP_DECLSPEC void *__gmp_default_allocate (size_t);
706 __GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t);
707 __GMP_DECLSPEC void __gmp_default_free (void *, size_t);
708
709 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
710 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
711 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
712
713 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
714 ((type *) (*__gmp_reallocate_func) \
715 (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
716 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
717 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
718
719 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
720 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
721
722 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
723 do { \
724 if ((oldsize) != (newsize)) \
725 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
726 } while (0)
727
728 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
729 do { \
730 if ((oldsize) != (newsize)) \
731 (ptr) = (type *) (*__gmp_reallocate_func) \
732 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
733 } while (0)
734
735
736 /* Dummy for non-gcc, code involving it will go dead. */
737 #if ! defined (__GNUC__) || __GNUC__ < 2
738 #define __builtin_constant_p(x) 0
739 #endif
740
741
742 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
743 stack usage is compatible. __attribute__ ((regparm (N))) helps by
744 putting leading parameters in registers, avoiding extra stack.
745
746 regparm cannot be used with calls going through the PLT, because the
747 binding code there may clobber the registers (%eax, %edx, %ecx) used for
748 the regparm parameters. Calls to local (ie. static) functions could
749 still use this, if we cared to differentiate locals and globals.
750
751 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
752 -p or -pg profiling, since that version of gcc doesn't realize the
753 .mcount calls will clobber the parameter registers. Other systems are
754 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
755 bother to try to detect this. regparm is only an optimization so we just
756 disable it when profiling (profiling being a slowdown anyway). */
757
758 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
759 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
760 #define USE_LEADING_REGPARM 1
761 #else
762 #define USE_LEADING_REGPARM 0
763 #endif
764
765 /* Macros for altering parameter order according to regparm usage. */
766 #if USE_LEADING_REGPARM
767 #define REGPARM_2_1(a,b,x) x,a,b
768 #define REGPARM_3_1(a,b,c,x) x,a,b,c
769 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
770 #else
771 #define REGPARM_2_1(a,b,x) a,b,x
772 #define REGPARM_3_1(a,b,c,x) a,b,c,x
773 #define REGPARM_ATTR(n)
774 #endif
775
776
777 /* ASM_L gives a local label for a gcc asm block, for use when temporary
778 local labels like "1:" might not be available, which is the case for
779 instance on the x86s (the SCO assembler doesn't support them).
780
781 The label generated is made unique by including "%=" which is a unique
782 number for each insn. This ensures the same name can be used in multiple
783 asm blocks, perhaps via a macro. Since jumps between asm blocks are not
784 allowed there's no need for a label to be usable outside a single
785 block. */
786
787 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name
788
789
790 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
791 #if 0
792 /* FIXME: Check that these actually improve things.
793 FIXME: Need a cld after each std.
794 FIXME: Can't have inputs in clobbered registers, must describe them as
795 dummy outputs, and add volatile. */
796 #define MPN_COPY_INCR(DST, SRC, N) \
797 __asm__ ("cld\n\trep\n\tmovsl" : : \
798 "D" (DST), "S" (SRC), "c" (N) : \
799 "cx", "di", "si", "memory")
800 #define MPN_COPY_DECR(DST, SRC, N) \
801 __asm__ ("std\n\trep\n\tmovsl" : : \
802 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
803 "cx", "di", "si", "memory")
804 #endif
805 #endif
806
807
808 __GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1);
809 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
810
811 #define mpz_n_pow_ui __gmpz_n_pow_ui
812 __GMP_DECLSPEC void mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long);
813
814
815 #define mpn_addmul_1c __MPN(addmul_1c)
816 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
817
818 #ifndef mpn_addmul_2 /* if not done with cpuvec in a fat binary */
819 #define mpn_addmul_2 __MPN(addmul_2)
820 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
821 #endif
822
823 #define mpn_addmul_3 __MPN(addmul_3)
824 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
825
826 #define mpn_addmul_4 __MPN(addmul_4)
827 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
828
829 #define mpn_addmul_5 __MPN(addmul_5)
830 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
831
832 #define mpn_addmul_6 __MPN(addmul_6)
833 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
834
835 #define mpn_addmul_7 __MPN(addmul_7)
836 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
837
838 #define mpn_addmul_8 __MPN(addmul_8)
839 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
840
841 /* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase. */
842 #define mpn_addmul_2s __MPN(addmul_2s)
843 __GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
844
845 /* Override mpn_addlsh1_n, mpn_addlsh2_n, mpn_sublsh1_n, etc with mpn_addlsh_n,
846 etc when !HAVE_NATIVE the former but HAVE_NATIVE_ the latter. Similarly,
847 override foo_ip1 functions with foo. We then lie and say these macros
848 represent native functions, but leave a trace by using the value 2 rather
849 than 1. */
850
851 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh1_n
852 #define mpn_addlsh1_n(a,b,c,d) mpn_addlsh_n(a,b,c,d,1)
853 #define HAVE_NATIVE_mpn_addlsh1_n 2
854 #endif
855
856 #if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh1_nc
857 #define mpn_addlsh1_nc(a,b,c,d,x) mpn_addlsh_nc(a,b,c,d,1,x)
858 #define HAVE_NATIVE_mpn_addlsh1_nc 2
859 #endif
860
861 #if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
862 #define mpn_addlsh1_n_ip1(a,b,n) mpn_addlsh1_n(a,a,b,n)
863 #define HAVE_NATIVE_mpn_addlsh1_n_ip1 2
864 #endif
865
866 #if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
867 #define mpn_addlsh1_nc_ip1(a,b,n,c) mpn_addlsh1_nc(a,a,b,n,c)
868 #define HAVE_NATIVE_mpn_addlsh1_nc_ip1 2
869 #endif
870
871 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh2_n
872 #define mpn_addlsh2_n(a,b,c,d) mpn_addlsh_n(a,b,c,d,2)
873 #define HAVE_NATIVE_mpn_addlsh2_n 2
874 #endif
875
876 #if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh2_nc
877 #define mpn_addlsh2_nc(a,b,c,d,x) mpn_addlsh_nc(a,b,c,d,2,x)
878 #define HAVE_NATIVE_mpn_addlsh2_nc 2
879 #endif
880
881 #if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
882 #define mpn_addlsh2_n_ip1(a,b,n) mpn_addlsh2_n(a,a,b,n)
883 #define HAVE_NATIVE_mpn_addlsh2_n_ip1 2
884 #endif
885
886 #if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
887 #define mpn_addlsh2_nc_ip1(a,b,n,c) mpn_addlsh2_nc(a,a,b,n,c)
888 #define HAVE_NATIVE_mpn_addlsh2_nc_ip1 2
889 #endif
890
891 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh1_n
892 #define mpn_sublsh1_n(a,b,c,d) mpn_sublsh_n(a,b,c,d,1)
893 #define HAVE_NATIVE_mpn_sublsh1_n 2
894 #endif
895
896 #if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh1_nc
897 #define mpn_sublsh1_nc(a,b,c,d,x) mpn_sublsh_nc(a,b,c,d,1,x)
898 #define HAVE_NATIVE_mpn_sublsh1_nc 2
899 #endif
900
901 #if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
902 #define mpn_sublsh1_n_ip1(a,b,n) mpn_sublsh1_n(a,a,b,n)
903 #define HAVE_NATIVE_mpn_sublsh1_n_ip1 2
904 #endif
905
906 #if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
907 #define mpn_sublsh1_nc_ip1(a,b,n,c) mpn_sublsh1_nc(a,a,b,n,c)
908 #define HAVE_NATIVE_mpn_sublsh1_nc_ip1 2
909 #endif
910
911 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh2_n
912 #define mpn_sublsh2_n(a,b,c,d) mpn_sublsh_n(a,b,c,d,2)
913 #define HAVE_NATIVE_mpn_sublsh2_n 2
914 #endif
915
916 #if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh2_nc
917 #define mpn_sublsh2_nc(a,b,c,d,x) mpn_sublsh_nc(a,b,c,d,2,x)
918 #define HAVE_NATIVE_mpn_sublsh2_nc 2
919 #endif
920
921 #if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
922 #define mpn_sublsh2_n_ip1(a,b,n) mpn_sublsh2_n(a,a,b,n)
923 #define HAVE_NATIVE_mpn_sublsh2_n_ip1 2
924 #endif
925
926 #if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
927 #define mpn_sublsh2_nc_ip1(a,b,n,c) mpn_sublsh2_nc(a,a,b,n,c)
928 #define HAVE_NATIVE_mpn_sublsh2_nc_ip1 2
929 #endif
930
931 #if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh1_n
932 #define mpn_rsblsh1_n(a,b,c,d) mpn_rsblsh_n(a,b,c,d,1)
933 #define HAVE_NATIVE_mpn_rsblsh1_n 2
934 #endif
935
936 #if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc
937 #define mpn_rsblsh1_nc(a,b,c,d,x) mpn_rsblsh_nc(a,b,c,d,1,x)
938 #define HAVE_NATIVE_mpn_rsblsh1_nc 2
939 #endif
940
941 #if HAVE_NATIVE_mpn_rsblsh1_n && ! HAVE_NATIVE_mpn_rsblsh1_n_ip1
942 #define mpn_rsblsh1_n_ip1(a,b,n) mpn_rsblsh1_n(a,a,b,n)
943 #define HAVE_NATIVE_mpn_rsblsh1_n_ip1 2
944 #endif
945
946 #if HAVE_NATIVE_mpn_rsblsh1_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc_ip1
947 #define mpn_rsblsh1_nc_ip1(a,b,n,c) mpn_rsblsh1_nc(a,a,b,n,c)
948 #define HAVE_NATIVE_mpn_rsblsh1_nc_ip1 2
949 #endif
950
951 #if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh2_n
952 #define mpn_rsblsh2_n(a,b,c,d) mpn_rsblsh_n(a,b,c,d,2)
953 #define HAVE_NATIVE_mpn_rsblsh2_n 2
954 #endif
955
956 #if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc
957 #define mpn_rsblsh2_nc(a,b,c,d,x) mpn_rsblsh_nc(a,b,c,d,2,x)
958 #define HAVE_NATIVE_mpn_rsblsh2_nc 2
959 #endif
960
961 #if HAVE_NATIVE_mpn_rsblsh2_n && ! HAVE_NATIVE_mpn_rsblsh2_n_ip1
962 #define mpn_rsblsh2_n_ip1(a,b,n) mpn_rsblsh2_n(a,a,b,n)
963 #define HAVE_NATIVE_mpn_rsblsh2_n_ip1 2
964 #endif
965
966 #if HAVE_NATIVE_mpn_rsblsh2_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc_ip1
967 #define mpn_rsblsh2_nc_ip1(a,b,n,c) mpn_rsblsh2_nc(a,a,b,n,c)
968 #define HAVE_NATIVE_mpn_rsblsh2_nc_ip1 2
969 #endif
970
971
972 #ifndef mpn_addlsh1_n
973 #define mpn_addlsh1_n __MPN(addlsh1_n)
974 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
975 #endif
976 #ifndef mpn_addlsh1_nc
977 #define mpn_addlsh1_nc __MPN(addlsh1_nc)
978 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
979 #endif
980 #ifndef mpn_addlsh1_n_ip1
981 #define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
982 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
983 #endif
984 #ifndef mpn_addlsh1_nc_ip1
985 #define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
986 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
987 #endif
988
989 #ifndef mpn_addlsh2_n
990 #define mpn_addlsh2_n __MPN(addlsh2_n)
991 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
992 #endif
993 #ifndef mpn_addlsh2_nc
994 #define mpn_addlsh2_nc __MPN(addlsh2_nc)
995 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
996 #endif
997 #ifndef mpn_addlsh2_n_ip1
998 #define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
999 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
1000 #endif
1001 #ifndef mpn_addlsh2_nc_ip1
1002 #define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
1003 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1004 #endif
1005
1006 #ifndef mpn_addlsh_n
1007 #define mpn_addlsh_n __MPN(addlsh_n)
1008 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1009 #endif
1010 #ifndef mpn_addlsh_nc
1011 #define mpn_addlsh_nc __MPN(addlsh_nc)
1012 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1013 #endif
1014 #ifndef mpn_addlsh_n_ip1
1015 #define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
1016 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1017 #endif
1018 #ifndef mpn_addlsh_nc_ip1
1019 #define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
1020 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1021 #endif
1022
1023 #ifndef mpn_sublsh1_n
1024 #define mpn_sublsh1_n __MPN(sublsh1_n)
1025 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1026 #endif
1027 #ifndef mpn_sublsh1_nc
1028 #define mpn_sublsh1_nc __MPN(sublsh1_nc)
1029 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1030 #endif
1031 #ifndef mpn_sublsh1_n_ip1
1032 #define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
1033 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
1034 #endif
1035 #ifndef mpn_sublsh1_nc_ip1
1036 #define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
1037 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1038 #endif
1039
1040 #ifndef mpn_sublsh2_n
1041 #define mpn_sublsh2_n __MPN(sublsh2_n)
1042 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1043 #endif
1044 #ifndef mpn_sublsh2_nc
1045 #define mpn_sublsh2_nc __MPN(sublsh2_nc)
1046 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1047 #endif
1048 #ifndef mpn_sublsh2_n_ip1
1049 #define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
1050 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
1051 #endif
1052 #ifndef mpn_sublsh2_nc_ip1
1053 #define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
1054 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1055 #endif
1056
1057 #ifndef mpn_sublsh_n
1058 #define mpn_sublsh_n __MPN(sublsh_n)
1059 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1060 #endif
1061 #ifndef mpn_sublsh_nc
1062 #define mpn_sublsh_nc __MPN(sublsh_nc)
1063 __GMP_DECLSPEC mp_limb_t mpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1064 #endif
1065 #ifndef mpn_sublsh_n_ip1
1066 #define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
1067 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1068 #endif
1069 #ifndef mpn_sublsh_nc_ip1
1070 #define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
1071 __GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1072 #endif
1073
1074 #define mpn_rsblsh1_n __MPN(rsblsh1_n)
1075 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1076 #define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
1077 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1078
1079 #define mpn_rsblsh2_n __MPN(rsblsh2_n)
1080 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1081 #define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
1082 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1083
1084 #define mpn_rsblsh_n __MPN(rsblsh_n)
1085 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1086 #define mpn_rsblsh_nc __MPN(rsblsh_nc)
1087 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1088
1089 #define mpn_rsh1add_n __MPN(rsh1add_n)
1090 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1091 #define mpn_rsh1add_nc __MPN(rsh1add_nc)
1092 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1093
1094 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
1095 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1096 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
1097 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1098
1099 #ifndef mpn_lshiftc /* if not done with cpuvec in a fat binary */
1100 #define mpn_lshiftc __MPN(lshiftc)
1101 __GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1102 #endif
1103
1104 #define mpn_add_err1_n __MPN(add_err1_n)
1105 __GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1106
1107 #define mpn_add_err2_n __MPN(add_err2_n)
1108 __GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1109
1110 #define mpn_add_err3_n __MPN(add_err3_n)
1111 __GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1112
1113 #define mpn_sub_err1_n __MPN(sub_err1_n)
1114 __GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1115
1116 #define mpn_sub_err2_n __MPN(sub_err2_n)
1117 __GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1118
1119 #define mpn_sub_err3_n __MPN(sub_err3_n)
1120 __GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1121
1122 #define mpn_add_n_sub_n __MPN(add_n_sub_n)
1123 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1124
1125 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
1126 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1127
1128 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
1129 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1130
1131 #define mpn_divrem_1c __MPN(divrem_1c)
1132 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1133
1134 #define mpn_dump __MPN(dump)
1135 __GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t);
1136
1137 #define mpn_fib2_ui __MPN(fib2_ui)
1138 __GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long);
1139
1140 #define mpn_fib2m __MPN(fib2m)
1141 __GMP_DECLSPEC int mpn_fib2m (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1142
1143 #define mpn_strongfibo __MPN(strongfibo)
1144 __GMP_DECLSPEC int mpn_strongfibo (mp_srcptr, mp_size_t, mp_ptr);
1145
1146 /* Remap names of internal mpn functions. */
1147 #define __clz_tab __MPN(clz_tab)
1148 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
1149
1150 #define mpn_jacobi_base __MPN(jacobi_base)
1151 __GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST;
1152
1153 #define mpn_jacobi_2 __MPN(jacobi_2)
1154 __GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned);
1155
1156 #define mpn_jacobi_n __MPN(jacobi_n)
1157 __GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned);
1158
1159 #define mpn_mod_1c __MPN(mod_1c)
1160 __GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
1161
1162 #define mpn_mul_1c __MPN(mul_1c)
1163 __GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1164
1165 #define mpn_mul_2 __MPN(mul_2)
1166 __GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1167
1168 #define mpn_mul_3 __MPN(mul_3)
1169 __GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1170
1171 #define mpn_mul_4 __MPN(mul_4)
1172 __GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1173
1174 #define mpn_mul_5 __MPN(mul_5)
1175 __GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1176
1177 #define mpn_mul_6 __MPN(mul_6)
1178 __GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1179
1180 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
1181 #define mpn_mul_basecase __MPN(mul_basecase)
1182 __GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1183 #endif
1184
1185 #define mpn_mullo_n __MPN(mullo_n)
1186 __GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1187
1188 #ifndef mpn_mullo_basecase /* if not done with cpuvec in a fat binary */
1189 #define mpn_mullo_basecase __MPN(mullo_basecase)
1190 __GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1191 #endif
1192
1193 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */
1194 #define mpn_sqr_basecase __MPN(sqr_basecase)
1195 __GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t);
1196 #endif
1197
1198 #define mpn_sqrlo __MPN(sqrlo)
1199 __GMP_DECLSPEC void mpn_sqrlo (mp_ptr, mp_srcptr, mp_size_t);
1200
1201 #define mpn_sqrlo_basecase __MPN(sqrlo_basecase)
1202 __GMP_DECLSPEC void mpn_sqrlo_basecase (mp_ptr, mp_srcptr, mp_size_t);
1203
1204 #define mpn_mulmid_basecase __MPN(mulmid_basecase)
1205 __GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1206
1207 #define mpn_mulmid_n __MPN(mulmid_n)
1208 __GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1209
1210 #define mpn_mulmid __MPN(mulmid)
1211 __GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1212
1213 #define mpn_submul_1c __MPN(submul_1c)
1214 __GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1215
1216 #ifndef mpn_redc_1 /* if not done with cpuvec in a fat binary */
1217 #define mpn_redc_1 __MPN(redc_1)
1218 __GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1219 #endif
1220
1221 #ifndef mpn_redc_2 /* if not done with cpuvec in a fat binary */
1222 #define mpn_redc_2 __MPN(redc_2)
1223 __GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1224 #endif
1225
1226 #define mpn_redc_n __MPN(redc_n)
1227 __GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1228
1229
1230 #ifndef mpn_mod_1_1p_cps /* if not done with cpuvec in a fat binary */
1231 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
1232 __GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t);
1233 #endif
1234 #ifndef mpn_mod_1_1p /* if not done with cpuvec in a fat binary */
1235 #define mpn_mod_1_1p __MPN(mod_1_1p)
1236 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]) __GMP_ATTRIBUTE_PURE;
1237 #endif
1238
1239 #ifndef mpn_mod_1s_2p_cps /* if not done with cpuvec in a fat binary */
1240 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
1241 __GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t);
1242 #endif
1243 #ifndef mpn_mod_1s_2p /* if not done with cpuvec in a fat binary */
1244 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
1245 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [5]) __GMP_ATTRIBUTE_PURE;
1246 #endif
1247
1248 #ifndef mpn_mod_1s_3p_cps /* if not done with cpuvec in a fat binary */
1249 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
1250 __GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t);
1251 #endif
1252 #ifndef mpn_mod_1s_3p /* if not done with cpuvec in a fat binary */
1253 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
1254 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [6]) __GMP_ATTRIBUTE_PURE;
1255 #endif
1256
1257 #ifndef mpn_mod_1s_4p_cps /* if not done with cpuvec in a fat binary */
1258 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
1259 __GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t);
1260 #endif
1261 #ifndef mpn_mod_1s_4p /* if not done with cpuvec in a fat binary */
1262 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
1263 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [7]) __GMP_ATTRIBUTE_PURE;
1264 #endif
1265
1266 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
1267 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
1268 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
1269 __GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1270 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
1271 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
1272 static inline mp_size_t
mpn_mulmod_bnm1_itch(mp_size_t rn,mp_size_t an,mp_size_t bn)1273 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
1274 mp_size_t n, itch;
1275 n = rn >> 1;
1276 itch = rn + 4 +
1277 (an > n ? (bn > n ? rn : n) : 0);
1278 return itch;
1279 }
1280
1281 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
1282 __GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1283 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
1284 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
1285 static inline mp_size_t
mpn_sqrmod_bnm1_itch(mp_size_t rn,mp_size_t an)1286 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
1287 mp_size_t n, itch;
1288 n = rn >> 1;
1289 itch = rn + 3 +
1290 (an > n ? an : 0);
1291 return itch;
1292 }
1293
1294 typedef __gmp_randstate_struct *gmp_randstate_ptr;
1295 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
1296
1297 /* Pseudo-random number generator function pointers structure. */
1298 typedef struct {
1299 void (*randseed_fn) (gmp_randstate_t, mpz_srcptr);
1300 void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int);
1301 void (*randclear_fn) (gmp_randstate_t);
1302 void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr);
1303 } gmp_randfnptr_t;
1304
1305 /* Macro to obtain a void pointer to the function pointers structure. */
1306 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
1307
1308 /* Macro to obtain a pointer to the generator's state.
1309 When used as a lvalue the rvalue needs to be cast to mp_ptr. */
1310 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
1311
1312 /* Write a given number of random bits to rp. */
1313 #define _gmp_rand(rp, state, bits) \
1314 do { \
1315 gmp_randstate_ptr __rstate = (state); \
1316 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
1317 (__rstate, rp, bits); \
1318 } while (0)
1319
1320 __GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t);
1321
1322
1323 /* __gmp_rands is the global state for the old-style random functions, and
1324 is also used in the test programs (hence the __GMP_DECLSPEC).
1325
1326 There's no seeding here, so mpz_random etc will generate the same
1327 sequence every time. This is not unlike the C library random functions
1328 if you don't seed them, so perhaps it's acceptable. Digging up a seed
1329 from /dev/random or the like would work on many systems, but might
1330 encourage a false confidence, since it'd be pretty much impossible to do
1331 something that would work reliably everywhere. In any case the new style
1332 functions are recommended to applications which care about randomness, so
1333 the old functions aren't too important. */
1334
1335 __GMP_DECLSPEC extern char __gmp_rands_initialized;
1336 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
1337
1338 #define RANDS \
1339 ((__gmp_rands_initialized ? 0 \
1340 : (__gmp_rands_initialized = 1, \
1341 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
1342 __gmp_rands)
1343
1344 /* this is used by the test programs, to free memory */
1345 #define RANDS_CLEAR() \
1346 do { \
1347 if (__gmp_rands_initialized) \
1348 { \
1349 __gmp_rands_initialized = 0; \
1350 gmp_randclear (__gmp_rands); \
1351 } \
1352 } while (0)
1353
1354
1355 /* For a threshold between algorithms A and B, size>=thresh is where B
1356 should be used. Special value MP_SIZE_T_MAX means only ever use A, or
1357 value 0 means only ever use B. The tests for these special values will
1358 be compile-time constants, so the compiler should be able to eliminate
1359 the code for the unwanted algorithm. */
1360
1361 #if ! defined (__GNUC__) || __GNUC__ < 2
1362 #define ABOVE_THRESHOLD(size,thresh) \
1363 ((thresh) == 0 \
1364 || ((thresh) != MP_SIZE_T_MAX \
1365 && (size) >= (thresh)))
1366 #else
1367 #define ABOVE_THRESHOLD(size,thresh) \
1368 ((__builtin_constant_p (thresh) && (thresh) == 0) \
1369 || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX) \
1370 && (size) >= (thresh)))
1371 #endif
1372 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
1373
1374 /* The minimal supported value for Toom22 depends also on Toom32 and
1375 Toom42 implementations. */
1376 #define MPN_TOOM22_MUL_MINSIZE 6
1377 #define MPN_TOOM2_SQR_MINSIZE 4
1378
1379 #define MPN_TOOM33_MUL_MINSIZE 17
1380 #define MPN_TOOM3_SQR_MINSIZE 17
1381
1382 #define MPN_TOOM44_MUL_MINSIZE 30
1383 #define MPN_TOOM4_SQR_MINSIZE 30
1384
1385 #define MPN_TOOM6H_MUL_MINSIZE 46
1386 #define MPN_TOOM6_SQR_MINSIZE 46
1387
1388 #define MPN_TOOM8H_MUL_MINSIZE 86
1389 #define MPN_TOOM8_SQR_MINSIZE 86
1390
1391 #define MPN_TOOM32_MUL_MINSIZE 10
1392 #define MPN_TOOM42_MUL_MINSIZE 10
1393 #define MPN_TOOM43_MUL_MINSIZE 25
1394 #define MPN_TOOM53_MUL_MINSIZE 17
1395 #define MPN_TOOM54_MUL_MINSIZE 31
1396 #define MPN_TOOM63_MUL_MINSIZE 49
1397
1398 #define MPN_TOOM42_MULMID_MINSIZE 4
1399
1400 #define mpn_sqr_diagonal __MPN(sqr_diagonal)
1401 __GMP_DECLSPEC void mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t);
1402
1403 #define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
1404 __GMP_DECLSPEC void mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1405
1406 #define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1407 __GMP_DECLSPEC void mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t);
1408
1409 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
1410 #define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
1411 __GMP_DECLSPEC void mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t);
1412
1413 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
1414 #define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1415 __GMP_DECLSPEC void mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
1416
1417 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
1418 __GMP_DECLSPEC void mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
1419
1420 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
1421 __GMP_DECLSPEC void mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
1422
1423 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
1424 __GMP_DECLSPEC void mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
1425
1426 #define mpn_toom_couple_handling __MPN(toom_couple_handling)
1427 __GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int);
1428
1429 #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
1430 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1431
1432 #define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
1433 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1434
1435 #define mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
1436 __GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1437
1438 #define mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
1439 __GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1440
1441 #define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
1442 __GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
1443
1444 #define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
1445 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
1446
1447 #define mpn_toom22_mul __MPN(toom22_mul)
1448 __GMP_DECLSPEC void mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1449
1450 #define mpn_toom32_mul __MPN(toom32_mul)
1451 __GMP_DECLSPEC void mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1452
1453 #define mpn_toom42_mul __MPN(toom42_mul)
1454 __GMP_DECLSPEC void mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1455
1456 #define mpn_toom52_mul __MPN(toom52_mul)
1457 __GMP_DECLSPEC void mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1458
1459 #define mpn_toom62_mul __MPN(toom62_mul)
1460 __GMP_DECLSPEC void mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1461
1462 #define mpn_toom2_sqr __MPN(toom2_sqr)
1463 __GMP_DECLSPEC void mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1464
1465 #define mpn_toom33_mul __MPN(toom33_mul)
1466 __GMP_DECLSPEC void mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1467
1468 #define mpn_toom43_mul __MPN(toom43_mul)
1469 __GMP_DECLSPEC void mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1470
1471 #define mpn_toom53_mul __MPN(toom53_mul)
1472 __GMP_DECLSPEC void mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1473
1474 #define mpn_toom54_mul __MPN(toom54_mul)
1475 __GMP_DECLSPEC void mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1476
1477 #define mpn_toom63_mul __MPN(toom63_mul)
1478 __GMP_DECLSPEC void mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1479
1480 #define mpn_toom3_sqr __MPN(toom3_sqr)
1481 __GMP_DECLSPEC void mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1482
1483 #define mpn_toom44_mul __MPN(toom44_mul)
1484 __GMP_DECLSPEC void mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1485
1486 #define mpn_toom4_sqr __MPN(toom4_sqr)
1487 __GMP_DECLSPEC void mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1488
1489 #define mpn_toom6h_mul __MPN(toom6h_mul)
1490 __GMP_DECLSPEC void mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1491
1492 #define mpn_toom6_sqr __MPN(toom6_sqr)
1493 __GMP_DECLSPEC void mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1494
1495 #define mpn_toom8h_mul __MPN(toom8h_mul)
1496 __GMP_DECLSPEC void mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1497
1498 #define mpn_toom8_sqr __MPN(toom8_sqr)
1499 __GMP_DECLSPEC void mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1500
1501 #define mpn_toom42_mulmid __MPN(toom42_mulmid)
1502 __GMP_DECLSPEC void mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
1503
1504 #define mpn_fft_best_k __MPN(fft_best_k)
1505 __GMP_DECLSPEC int mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST;
1506
1507 #define mpn_mul_fft __MPN(mul_fft)
1508 __GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
1509
1510 #define mpn_mul_fft_full __MPN(mul_fft_full)
1511 __GMP_DECLSPEC void mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1512
1513 #define mpn_nussbaumer_mul __MPN(nussbaumer_mul)
1514 __GMP_DECLSPEC void mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1515
1516 #define mpn_fft_next_size __MPN(fft_next_size)
1517 __GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
1518
1519 #define mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1)
1520 __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
1521
1522 #define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
1523 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
1524
1525 #define mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
1526 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t);
1527
1528 #define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
1529 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1530
1531 #define mpn_sbpi1_div_q __MPN(sbpi1_div_q)
1532 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1533
1534 #define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
1535 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1536
1537 #define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
1538 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1539 #define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
1540 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
1541
1542 #define mpn_dcpi1_div_q __MPN(dcpi1_div_q)
1543 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1544
1545 #define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
1546 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1547
1548 #define mpn_mu_div_qr __MPN(mu_div_qr)
1549 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1550 #define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1551 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1552
1553 #define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1554 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1555 #define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
1556 __GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1557
1558 #define mpn_mu_divappr_q __MPN(mu_divappr_q)
1559 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1560 #define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1561 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1562
1563 #define mpn_mu_div_q __MPN(mu_div_q)
1564 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1565 #define mpn_mu_div_q_itch __MPN(mu_div_q_itch)
1566 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1567
1568 #define mpn_div_q __MPN(div_q)
1569 __GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1570
1571 #define mpn_invert __MPN(invert)
1572 __GMP_DECLSPEC void mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1573 #define mpn_invert_itch(n) mpn_invertappr_itch(n)
1574
1575 #define mpn_ni_invertappr __MPN(ni_invertappr)
1576 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1577 #define mpn_invertappr __MPN(invertappr)
1578 __GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1579 #define mpn_invertappr_itch(n) (2 * (n))
1580
1581 #define mpn_binvert __MPN(binvert)
1582 __GMP_DECLSPEC void mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1583 #define mpn_binvert_itch __MPN(binvert_itch)
1584 __GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t) ATTRIBUTE_CONST;
1585
1586 #define mpn_bdiv_q_1 __MPN(bdiv_q_1)
1587 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1588
1589 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
1590 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
1591
1592 #define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
1593 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1594
1595 #define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
1596 __GMP_DECLSPEC void mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1597
1598 #define mpn_sbpi1_bdiv_r __MPN(sbpi1_bdiv_r)
1599 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1600
1601 #define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
1602 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1603 #define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
1604 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t) ATTRIBUTE_CONST;
1605
1606 #define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
1607 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1608 #define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
1609 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1610
1611 #define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1612 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1613 #define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1614 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1615
1616 #define mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1617 __GMP_DECLSPEC void mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1618 #define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1619 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1620
1621 #define mpn_bdiv_qr __MPN(bdiv_qr)
1622 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1623 #define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
1624 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1625
1626 #define mpn_bdiv_q __MPN(bdiv_q)
1627 __GMP_DECLSPEC void mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1628 #define mpn_bdiv_q_itch __MPN(bdiv_q_itch)
1629 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1630
1631 #define mpn_divexact __MPN(divexact)
1632 __GMP_DECLSPEC void mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1633 #define mpn_divexact_itch __MPN(divexact_itch)
1634 __GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1635
1636 #ifndef mpn_bdiv_dbm1c /* if not done with cpuvec in a fat binary */
1637 #define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1638 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1639 #endif
1640
1641 #define mpn_bdiv_dbm1(dst, src, size, divisor) \
1642 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1643
1644 #define mpn_powm __MPN(powm)
1645 __GMP_DECLSPEC void mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1646 #define mpn_powlo __MPN(powlo)
1647 __GMP_DECLSPEC void mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1648
1649 #define mpn_sec_pi1_div_qr __MPN(sec_pi1_div_qr)
1650 __GMP_DECLSPEC mp_limb_t mpn_sec_pi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1651 #define mpn_sec_pi1_div_r __MPN(sec_pi1_div_r)
1652 __GMP_DECLSPEC void mpn_sec_pi1_div_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1653
1654
1655 #ifndef DIVEXACT_BY3_METHOD
1656 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1657 #define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */
1658 #else
1659 #define DIVEXACT_BY3_METHOD 1
1660 #endif
1661 #endif
1662
1663 #if DIVEXACT_BY3_METHOD == 0
1664 #undef mpn_divexact_by3
1665 #define mpn_divexact_by3(dst,src,size) \
1666 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1667 /* override mpn_divexact_by3c defined in gmp.h */
1668 /*
1669 #undef mpn_divexact_by3c
1670 #define mpn_divexact_by3c(dst,src,size,cy) \
1671 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1672 */
1673 #endif
1674
1675 #if GMP_NUMB_BITS % 4 == 0
1676 #define mpn_divexact_by5(dst,src,size) \
1677 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1678 #endif
1679
1680 #if GMP_NUMB_BITS % 3 == 0
1681 #define mpn_divexact_by7(dst,src,size) \
1682 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1683 #endif
1684
1685 #if GMP_NUMB_BITS % 6 == 0
1686 #define mpn_divexact_by9(dst,src,size) \
1687 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1688 #endif
1689
1690 #if GMP_NUMB_BITS % 10 == 0
1691 #define mpn_divexact_by11(dst,src,size) \
1692 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1693 #endif
1694
1695 #if GMP_NUMB_BITS % 12 == 0
1696 #define mpn_divexact_by13(dst,src,size) \
1697 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1698 #endif
1699
1700 #if GMP_NUMB_BITS % 4 == 0
1701 #define mpn_divexact_by15(dst,src,size) \
1702 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1703 #endif
1704
1705 #define mpz_divexact_gcd __gmpz_divexact_gcd
1706 __GMP_DECLSPEC void mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
1707
1708 #define mpz_prodlimbs __gmpz_prodlimbs
1709 __GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);
1710
1711 #define mpz_oddfac_1 __gmpz_oddfac_1
1712 __GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);
1713
1714 #define mpz_stronglucas __gmpz_stronglucas
1715 __GMP_DECLSPEC int mpz_stronglucas (mpz_srcptr, mpz_ptr, mpz_ptr);
1716
1717 #define mpz_lucas_mod __gmpz_lucas_mod
1718 __GMP_DECLSPEC int mpz_lucas_mod (mpz_ptr, mpz_ptr, long, mp_bitcnt_t, mpz_srcptr, mpz_ptr, mpz_ptr);
1719
1720 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1721 #ifdef _GMP_H_HAVE_FILE
1722 __GMP_DECLSPEC size_t mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t);
1723 #endif
1724
1725 #define mpn_divisible_p __MPN(divisible_p)
1726 __GMP_DECLSPEC int mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
1727
1728 #define mpn_rootrem __MPN(rootrem)
1729 __GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1730
1731 #define mpn_broot __MPN(broot)
1732 __GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1733
1734 #define mpn_broot_invm1 __MPN(broot_invm1)
1735 __GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1736
1737 #define mpn_brootinv __MPN(brootinv)
1738 __GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1739
1740 #define mpn_bsqrt __MPN(bsqrt)
1741 __GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
1742
1743 #define mpn_bsqrtinv __MPN(bsqrtinv)
1744 __GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
1745
1746 #if defined (_CRAY)
1747 #define MPN_COPY_INCR(dst, src, n) \
1748 do { \
1749 int __i; /* Faster on some Crays with plain int */ \
1750 _Pragma ("_CRI ivdep"); \
1751 for (__i = 0; __i < (n); __i++) \
1752 (dst)[__i] = (src)[__i]; \
1753 } while (0)
1754 #endif
1755
1756 /* used by test programs, hence __GMP_DECLSPEC */
1757 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */
1758 #define mpn_copyi __MPN(copyi)
1759 __GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
1760 #endif
1761
1762 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1763 #define MPN_COPY_INCR(dst, src, size) \
1764 do { \
1765 ASSERT ((size) >= 0); \
1766 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
1767 mpn_copyi (dst, src, size); \
1768 } while (0)
1769 #endif
1770
1771 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
1772 #if ! defined (MPN_COPY_INCR)
1773 #define MPN_COPY_INCR(dst, src, n) \
1774 do { \
1775 ASSERT ((n) >= 0); \
1776 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
1777 if ((n) != 0) \
1778 { \
1779 mp_size_t __n = (n) - 1; \
1780 mp_ptr __dst = (dst); \
1781 mp_srcptr __src = (src); \
1782 mp_limb_t __x; \
1783 __x = *__src++; \
1784 if (__n != 0) \
1785 { \
1786 do \
1787 { \
1788 *__dst++ = __x; \
1789 __x = *__src++; \
1790 } \
1791 while (--__n); \
1792 } \
1793 *__dst++ = __x; \
1794 } \
1795 } while (0)
1796 #endif
1797
1798
1799 #if defined (_CRAY)
1800 #define MPN_COPY_DECR(dst, src, n) \
1801 do { \
1802 int __i; /* Faster on some Crays with plain int */ \
1803 _Pragma ("_CRI ivdep"); \
1804 for (__i = (n) - 1; __i >= 0; __i--) \
1805 (dst)[__i] = (src)[__i]; \
1806 } while (0)
1807 #endif
1808
1809 /* used by test programs, hence __GMP_DECLSPEC */
1810 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */
1811 #define mpn_copyd __MPN(copyd)
1812 __GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
1813 #endif
1814
1815 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1816 #define MPN_COPY_DECR(dst, src, size) \
1817 do { \
1818 ASSERT ((size) >= 0); \
1819 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
1820 mpn_copyd (dst, src, size); \
1821 } while (0)
1822 #endif
1823
1824 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
1825 #if ! defined (MPN_COPY_DECR)
1826 #define MPN_COPY_DECR(dst, src, n) \
1827 do { \
1828 ASSERT ((n) >= 0); \
1829 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
1830 if ((n) != 0) \
1831 { \
1832 mp_size_t __n = (n) - 1; \
1833 mp_ptr __dst = (dst) + __n; \
1834 mp_srcptr __src = (src) + __n; \
1835 mp_limb_t __x; \
1836 __x = *__src--; \
1837 if (__n != 0) \
1838 { \
1839 do \
1840 { \
1841 *__dst-- = __x; \
1842 __x = *__src--; \
1843 } \
1844 while (--__n); \
1845 } \
1846 *__dst-- = __x; \
1847 } \
1848 } while (0)
1849 #endif
1850
1851
1852 #ifndef MPN_COPY
1853 #define MPN_COPY(d,s,n) \
1854 do { \
1855 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
1856 MPN_COPY_INCR (d, s, n); \
1857 } while (0)
1858 #endif
1859
1860
1861 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1862 #define MPN_REVERSE(dst, src, size) \
1863 do { \
1864 mp_ptr __dst = (dst); \
1865 mp_size_t __size = (size); \
1866 mp_srcptr __src = (src) + __size - 1; \
1867 mp_size_t __i; \
1868 ASSERT ((size) >= 0); \
1869 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
1870 CRAY_Pragma ("_CRI ivdep"); \
1871 for (__i = 0; __i < __size; __i++) \
1872 { \
1873 *__dst = *__src; \
1874 __dst++; \
1875 __src--; \
1876 } \
1877 } while (0)
1878
1879
1880 /* Zero n limbs at dst.
1881
1882 For power and powerpc we want an inline stu/bdnz loop for zeroing. On
1883 ppc630 for instance this is optimal since it can sustain only 1 store per
1884 cycle.
1885
1886 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1887 "for" loop in the generic code below can become stu/bdnz. The do/while
1888 here helps it get to that. The same caveat about plain -mpowerpc64 mode
1889 applies here as to __GMPN_COPY_INCR in gmp.h.
1890
1891 xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1892 this loop too.
1893
1894 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1895 at a time. MPN_ZERO isn't all that important in GMP, so it might be more
1896 trouble than it's worth to do the same, though perhaps a call to memset
1897 would be good when on a GNU system. */
1898
1899 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1900 #define MPN_FILL(dst, n, f) \
1901 do { \
1902 mp_ptr __dst = (dst) - 1; \
1903 mp_size_t __n = (n); \
1904 ASSERT (__n > 0); \
1905 do \
1906 *++__dst = (f); \
1907 while (--__n); \
1908 } while (0)
1909 #endif
1910
1911 #ifndef MPN_FILL
1912 #define MPN_FILL(dst, n, f) \
1913 do { \
1914 mp_ptr __dst = (dst); \
1915 mp_size_t __n = (n); \
1916 ASSERT (__n > 0); \
1917 do \
1918 *__dst++ = (f); \
1919 while (--__n); \
1920 } while (0)
1921 #endif
1922
1923 #define MPN_ZERO(dst, n) \
1924 do { \
1925 ASSERT ((n) >= 0); \
1926 if ((n) != 0) \
1927 MPN_FILL (dst, n, CNST_LIMB (0)); \
1928 } while (0)
1929
1930 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1931 start up and would need to strip a lot of zeros before it'd be faster
1932 than a simple cmpl loop. Here are some times in cycles for
1933 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1934 low zeros).
1935
1936 std cld
1937 P5 18 16
1938 P6 46 38
1939 K6 36 13
1940 K7 21 20
1941 */
1942 #ifndef MPN_NORMALIZE
1943 #define MPN_NORMALIZE(DST, NLIMBS) \
1944 do { \
1945 while ((NLIMBS) > 0) \
1946 { \
1947 if ((DST)[(NLIMBS) - 1] != 0) \
1948 break; \
1949 (NLIMBS)--; \
1950 } \
1951 } while (0)
1952 #endif
1953 #ifndef MPN_NORMALIZE_NOT_ZERO
1954 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
1955 do { \
1956 while (1) \
1957 { \
1958 ASSERT ((NLIMBS) >= 1); \
1959 if ((DST)[(NLIMBS) - 1] != 0) \
1960 break; \
1961 (NLIMBS)--; \
1962 } \
1963 } while (0)
1964 #endif
1965
1966 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1967 and decrementing size. low should be ptr[0], and will be the new ptr[0]
1968 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
1969 somewhere a non-zero limb. */
1970 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
1971 do { \
1972 ASSERT ((size) >= 1); \
1973 ASSERT ((low) == (ptr)[0]); \
1974 \
1975 while ((low) == 0) \
1976 { \
1977 (size)--; \
1978 ASSERT ((size) >= 1); \
1979 (ptr)++; \
1980 (low) = *(ptr); \
1981 } \
1982 } while (0)
1983
1984 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
1985 temporary variable; it will be automatically cleared out at function
1986 return. We use __x here to make it possible to accept both mpz_ptr and
1987 mpz_t arguments. */
1988 #define MPZ_TMP_INIT(X, NLIMBS) \
1989 do { \
1990 mpz_ptr __x = (X); \
1991 ASSERT ((NLIMBS) >= 1); \
1992 __x->_mp_alloc = (NLIMBS); \
1993 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \
1994 } while (0)
1995
1996 #if WANT_ASSERT
1997 static inline void *
_mpz_newalloc(mpz_ptr z,mp_size_t n)1998 _mpz_newalloc (mpz_ptr z, mp_size_t n)
1999 {
2000 void * res = _mpz_realloc(z,n);
2001 /* If we are checking the code, force a random change to limbs. */
2002 ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1];
2003 return res;
2004 }
2005 #else
2006 #define _mpz_newalloc _mpz_realloc
2007 #endif
2008 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
2009 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
2010 ? (mp_ptr) _mpz_realloc(z,n) \
2011 : PTR(z))
2012 #define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
2013 ? (mp_ptr) _mpz_newalloc(z,n) \
2014 : PTR(z))
2015
2016 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
2017
2018
2019 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
2020 f1p.
2021
2022 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
2023 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
2024 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
2025
2026 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
2027 without good floating point. There's +2 for rounding up, and a further
2028 +2 since at the last step x limbs are doubled into a 2x+1 limb region
2029 whereas the actual F[2k] value might be only 2x-1 limbs.
2030
2031 Note that a division is done first, since on a 32-bit system it's at
2032 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
2033 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
2034 whatever a multiply of two 190Mbyte numbers takes.)
2035
2036 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
2037 worked into the multiplier. */
2038
2039 #define MPN_FIB2_SIZE(n) \
2040 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
2041
2042
2043 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
2044 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
2045
2046 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
2047 F[n] + 2*F[n-1] fits in a limb. */
2048
2049 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
2050 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
2051
2052 extern const mp_limb_t __gmp_oddfac_table[];
2053 extern const mp_limb_t __gmp_odd2fac_table[];
2054 extern const unsigned char __gmp_fac2cnt_table[];
2055 extern const mp_limb_t __gmp_limbroots_table[];
2056
2057 /* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */
2058 static inline unsigned
log_n_max(mp_limb_t n)2059 log_n_max (mp_limb_t n)
2060 {
2061 unsigned log;
2062 for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);
2063 return log;
2064 }
2065
2066 #define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */
2067 typedef struct
2068 {
2069 unsigned long d; /* current index in s[] */
2070 unsigned long s0; /* number corresponding to s[0] */
2071 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */
2072 unsigned char s[SIEVESIZE + 1]; /* sieve table */
2073 } gmp_primesieve_t;
2074
2075 #define gmp_init_primesieve __gmp_init_primesieve
2076 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
2077
2078 #define gmp_nextprime __gmp_nextprime
2079 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
2080
2081 #define gmp_primesieve __gmp_primesieve
2082 __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
2083
2084
2085 #ifndef MUL_TOOM22_THRESHOLD
2086 #define MUL_TOOM22_THRESHOLD 30
2087 #endif
2088
2089 #ifndef MUL_TOOM33_THRESHOLD
2090 #define MUL_TOOM33_THRESHOLD 100
2091 #endif
2092
2093 #ifndef MUL_TOOM44_THRESHOLD
2094 #define MUL_TOOM44_THRESHOLD 300
2095 #endif
2096
2097 #ifndef MUL_TOOM6H_THRESHOLD
2098 #define MUL_TOOM6H_THRESHOLD 350
2099 #endif
2100
2101 #ifndef SQR_TOOM6_THRESHOLD
2102 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
2103 #endif
2104
2105 #ifndef MUL_TOOM8H_THRESHOLD
2106 #define MUL_TOOM8H_THRESHOLD 450
2107 #endif
2108
2109 #ifndef SQR_TOOM8_THRESHOLD
2110 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
2111 #endif
2112
2113 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
2114 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 100
2115 #endif
2116
2117 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
2118 #define MUL_TOOM32_TO_TOOM53_THRESHOLD 110
2119 #endif
2120
2121 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
2122 #define MUL_TOOM42_TO_TOOM53_THRESHOLD 100
2123 #endif
2124
2125 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
2126 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 110
2127 #endif
2128
2129 #ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD
2130 #define MUL_TOOM43_TO_TOOM54_THRESHOLD 150
2131 #endif
2132
2133 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a
2134 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat
2135 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
2136 separate hard limit will have been defined. Similarly for TOOM3. */
2137 #ifndef MUL_TOOM22_THRESHOLD_LIMIT
2138 #define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD
2139 #endif
2140 #ifndef MUL_TOOM33_THRESHOLD_LIMIT
2141 #define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD
2142 #endif
2143 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT
2144 #define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD
2145 #endif
2146 #ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
2147 #define SQRLO_BASECASE_THRESHOLD_LIMIT SQRLO_BASECASE_THRESHOLD
2148 #endif
2149 #ifndef SQRLO_DC_THRESHOLD_LIMIT
2150 #define SQRLO_DC_THRESHOLD_LIMIT SQRLO_DC_THRESHOLD
2151 #endif
2152
2153 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
2154 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we
2155 certainly always want it if there's a native assembler mpn_sqr_basecase.)
2156
2157 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
2158 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
2159 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less
2160 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
2161 should be used, and that may be never. */
2162
2163 #ifndef SQR_BASECASE_THRESHOLD
2164 #define SQR_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */
2165 #endif
2166
2167 #ifndef SQR_TOOM2_THRESHOLD
2168 #define SQR_TOOM2_THRESHOLD 50
2169 #endif
2170
2171 #ifndef SQR_TOOM3_THRESHOLD
2172 #define SQR_TOOM3_THRESHOLD 120
2173 #endif
2174
2175 #ifndef SQR_TOOM4_THRESHOLD
2176 #define SQR_TOOM4_THRESHOLD 400
2177 #endif
2178
2179 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */
2180 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
2181 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD
2182 #endif
2183
2184 #ifndef MULMID_TOOM42_THRESHOLD
2185 #define MULMID_TOOM42_THRESHOLD MUL_TOOM22_THRESHOLD
2186 #endif
2187
2188 #ifndef MULLO_BASECASE_THRESHOLD
2189 #define MULLO_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */
2190 #endif
2191
2192 #ifndef MULLO_DC_THRESHOLD
2193 #define MULLO_DC_THRESHOLD (2*MUL_TOOM22_THRESHOLD)
2194 #endif
2195
2196 #ifndef MULLO_MUL_N_THRESHOLD
2197 #define MULLO_MUL_N_THRESHOLD (2*MUL_FFT_THRESHOLD)
2198 #endif
2199
2200 #ifndef SQRLO_BASECASE_THRESHOLD
2201 #define SQRLO_BASECASE_THRESHOLD 0 /* never use mpn_sqr_basecase */
2202 #endif
2203
2204 #ifndef SQRLO_DC_THRESHOLD
2205 #define SQRLO_DC_THRESHOLD (MULLO_DC_THRESHOLD)
2206 #endif
2207
2208 #ifndef SQRLO_SQR_THRESHOLD
2209 #define SQRLO_SQR_THRESHOLD (MULLO_MUL_N_THRESHOLD)
2210 #endif
2211
2212 #ifndef DC_DIV_QR_THRESHOLD
2213 #define DC_DIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD)
2214 #endif
2215
2216 #ifndef DC_DIVAPPR_Q_THRESHOLD
2217 #define DC_DIVAPPR_Q_THRESHOLD 200
2218 #endif
2219
2220 #ifndef DC_BDIV_QR_THRESHOLD
2221 #define DC_BDIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD)
2222 #endif
2223
2224 #ifndef DC_BDIV_Q_THRESHOLD
2225 #define DC_BDIV_Q_THRESHOLD 180
2226 #endif
2227
2228 #ifndef DIVEXACT_JEB_THRESHOLD
2229 #define DIVEXACT_JEB_THRESHOLD 25
2230 #endif
2231
2232 #ifndef INV_MULMOD_BNM1_THRESHOLD
2233 #define INV_MULMOD_BNM1_THRESHOLD (4*MULMOD_BNM1_THRESHOLD)
2234 #endif
2235
2236 #ifndef INV_APPR_THRESHOLD
2237 #define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD
2238 #endif
2239
2240 #ifndef INV_NEWTON_THRESHOLD
2241 #define INV_NEWTON_THRESHOLD 200
2242 #endif
2243
2244 #ifndef BINV_NEWTON_THRESHOLD
2245 #define BINV_NEWTON_THRESHOLD 300
2246 #endif
2247
2248 #ifndef MU_DIVAPPR_Q_THRESHOLD
2249 #define MU_DIVAPPR_Q_THRESHOLD 2000
2250 #endif
2251
2252 #ifndef MU_DIV_QR_THRESHOLD
2253 #define MU_DIV_QR_THRESHOLD 2000
2254 #endif
2255
2256 #ifndef MUPI_DIV_QR_THRESHOLD
2257 #define MUPI_DIV_QR_THRESHOLD 200
2258 #endif
2259
2260 #ifndef MU_BDIV_Q_THRESHOLD
2261 #define MU_BDIV_Q_THRESHOLD 2000
2262 #endif
2263
2264 #ifndef MU_BDIV_QR_THRESHOLD
2265 #define MU_BDIV_QR_THRESHOLD 2000
2266 #endif
2267
2268 #ifndef MULMOD_BNM1_THRESHOLD
2269 #define MULMOD_BNM1_THRESHOLD 16
2270 #endif
2271
2272 #ifndef SQRMOD_BNM1_THRESHOLD
2273 #define SQRMOD_BNM1_THRESHOLD 16
2274 #endif
2275
2276 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
2277 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2)
2278 #endif
2279
2280 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
2281
2282 #ifndef REDC_1_TO_REDC_2_THRESHOLD
2283 #define REDC_1_TO_REDC_2_THRESHOLD 15
2284 #endif
2285 #ifndef REDC_2_TO_REDC_N_THRESHOLD
2286 #define REDC_2_TO_REDC_N_THRESHOLD 100
2287 #endif
2288
2289 #else
2290
2291 #ifndef REDC_1_TO_REDC_N_THRESHOLD
2292 #define REDC_1_TO_REDC_N_THRESHOLD 100
2293 #endif
2294
2295 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
2296
2297
2298 /* First k to use for an FFT modF multiply. A modF FFT is an order
2299 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
2300 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
2301 #define FFT_FIRST_K 4
2302
2303 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
2304 #ifndef MUL_FFT_MODF_THRESHOLD
2305 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3)
2306 #endif
2307 #ifndef SQR_FFT_MODF_THRESHOLD
2308 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
2309 #endif
2310
2311 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
2312 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
2313 NxN->2N multiply and not recursing into itself is an order
2314 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
2315 is the first better than toom3. */
2316 #ifndef MUL_FFT_THRESHOLD
2317 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
2318 #endif
2319 #ifndef SQR_FFT_THRESHOLD
2320 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
2321 #endif
2322
2323 /* Table of thresholds for successive modF FFT "k"s. The first entry is
2324 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
2325 etc. See mpn_fft_best_k(). */
2326 #ifndef MUL_FFT_TABLE
2327 #define MUL_FFT_TABLE \
2328 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \
2329 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \
2330 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \
2331 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \
2332 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \
2333 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \
2334 0 }
2335 #endif
2336 #ifndef SQR_FFT_TABLE
2337 #define SQR_FFT_TABLE \
2338 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
2339 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
2340 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
2341 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
2342 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
2343 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
2344 0 }
2345 #endif
2346
2347 struct fft_table_nk
2348 {
2349 gmp_uint_least32_t n:27;
2350 gmp_uint_least32_t k:5;
2351 };
2352
2353 #ifndef FFT_TABLE_ATTRS
2354 #define FFT_TABLE_ATTRS static const
2355 #endif
2356
2357 #define MPN_FFT_TABLE_SIZE 16
2358
2359
2360 #ifndef DC_DIV_QR_THRESHOLD
2361 #define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD)
2362 #endif
2363
2364 #ifndef GET_STR_DC_THRESHOLD
2365 #define GET_STR_DC_THRESHOLD 18
2366 #endif
2367
2368 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
2369 #define GET_STR_PRECOMPUTE_THRESHOLD 35
2370 #endif
2371
2372 #ifndef SET_STR_DC_THRESHOLD
2373 #define SET_STR_DC_THRESHOLD 750
2374 #endif
2375
2376 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
2377 #define SET_STR_PRECOMPUTE_THRESHOLD 2000
2378 #endif
2379
2380 #ifndef FAC_ODD_THRESHOLD
2381 #define FAC_ODD_THRESHOLD 35
2382 #endif
2383
2384 #ifndef FAC_DSC_THRESHOLD
2385 #define FAC_DSC_THRESHOLD 400
2386 #endif
2387
2388 /* Return non-zero if xp,xsize and yp,ysize overlap.
2389 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
2390 overlap. If both these are false, there's an overlap. */
2391 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
2392 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
2393 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
2394 ( (char *) (xp) + (xsize) > (char *) (yp) \
2395 && (char *) (yp) + (ysize) > (char *) (xp))
2396
2397 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
2398 overlapping. Return zero if they're partially overlapping. */
2399 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
2400 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
2401 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
2402 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
2403
2404 /* Return non-zero if dst,dsize and src,ssize are either identical or
2405 overlapping in a way suitable for an incrementing/decrementing algorithm.
2406 Return zero if they're partially overlapping in an unsuitable fashion. */
2407 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
2408 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
2409 #define MPN_SAME_OR_INCR_P(dst, src, size) \
2410 MPN_SAME_OR_INCR2_P(dst, size, src, size)
2411 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
2412 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
2413 #define MPN_SAME_OR_DECR_P(dst, src, size) \
2414 MPN_SAME_OR_DECR2_P(dst, size, src, size)
2415
2416
2417 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
2418 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
2419 does it always. Generally assertions are meant for development, but
2420 might help when looking for a problem later too. */
2421
2422 #ifdef __LINE__
2423 #define ASSERT_LINE __LINE__
2424 #else
2425 #define ASSERT_LINE -1
2426 #endif
2427
2428 #ifdef __FILE__
2429 #define ASSERT_FILE __FILE__
2430 #else
2431 #define ASSERT_FILE ""
2432 #endif
2433
2434 __GMP_DECLSPEC void __gmp_assert_header (const char *, int);
2435 __GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN;
2436
2437 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
2438
2439 #define ASSERT_ALWAYS(expr) \
2440 do { \
2441 if (UNLIKELY (!(expr))) \
2442 ASSERT_FAIL (expr); \
2443 } while (0)
2444
2445 #if WANT_ASSERT
2446 #define ASSERT(expr) ASSERT_ALWAYS (expr)
2447 #else
2448 #define ASSERT(expr) do {} while (0)
2449 #endif
2450
2451
2452 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
2453 that it's zero. In both cases if assertion checking is disabled the
2454 expression is still evaluated. These macros are meant for use with
2455 routines like mpn_add_n() where the return value represents a carry or
2456 whatever that should or shouldn't occur in some context. For example,
2457 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
2458 #if WANT_ASSERT
2459 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
2460 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
2461 #else
2462 #define ASSERT_CARRY(expr) (expr)
2463 #define ASSERT_NOCARRY(expr) (expr)
2464 #endif
2465
2466
2467 /* ASSERT_CODE includes code when assertion checking is wanted. This is the
2468 same as writing "#if WANT_ASSERT", but more compact. */
2469 #if WANT_ASSERT
2470 #define ASSERT_CODE(expr) expr
2471 #else
2472 #define ASSERT_CODE(expr)
2473 #endif
2474
2475
2476 /* Test that an mpq_t is in fully canonical form. This can be used as
2477 protection on routines like mpq_equal which give wrong results on
2478 non-canonical inputs. */
2479 #if WANT_ASSERT
2480 #define ASSERT_MPQ_CANONICAL(q) \
2481 do { \
2482 ASSERT (q->_mp_den._mp_size > 0); \
2483 if (q->_mp_num._mp_size == 0) \
2484 { \
2485 /* zero should be 0/1 */ \
2486 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
2487 } \
2488 else \
2489 { \
2490 /* no common factors */ \
2491 mpz_t __g; \
2492 mpz_init (__g); \
2493 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
2494 ASSERT (mpz_cmp_ui (__g, 1) == 0); \
2495 mpz_clear (__g); \
2496 } \
2497 } while (0)
2498 #else
2499 #define ASSERT_MPQ_CANONICAL(q) do {} while (0)
2500 #endif
2501
2502 /* Check that the nail parts are zero. */
2503 #define ASSERT_ALWAYS_LIMB(limb) \
2504 do { \
2505 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
2506 ASSERT_ALWAYS (__nail == 0); \
2507 } while (0)
2508 #define ASSERT_ALWAYS_MPN(ptr, size) \
2509 do { \
2510 /* let whole loop go dead when no nails */ \
2511 if (GMP_NAIL_BITS != 0) \
2512 { \
2513 mp_size_t __i; \
2514 for (__i = 0; __i < (size); __i++) \
2515 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
2516 } \
2517 } while (0)
2518 #if WANT_ASSERT
2519 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
2520 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size)
2521 #else
2522 #define ASSERT_LIMB(limb) do {} while (0)
2523 #define ASSERT_MPN(ptr, size) do {} while (0)
2524 #endif
2525
2526
2527 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
2528 size==0 is allowed, and in that case {ptr,size} considered to be zero. */
2529 #if WANT_ASSERT
2530 #define ASSERT_MPN_ZERO_P(ptr,size) \
2531 do { \
2532 mp_size_t __i; \
2533 ASSERT ((size) >= 0); \
2534 for (__i = 0; __i < (size); __i++) \
2535 ASSERT ((ptr)[__i] == 0); \
2536 } while (0)
2537 #define ASSERT_MPN_NONZERO_P(ptr,size) \
2538 do { \
2539 mp_size_t __i; \
2540 int __nonzero = 0; \
2541 ASSERT ((size) >= 0); \
2542 for (__i = 0; __i < (size); __i++) \
2543 if ((ptr)[__i] != 0) \
2544 { \
2545 __nonzero = 1; \
2546 break; \
2547 } \
2548 ASSERT (__nonzero); \
2549 } while (0)
2550 #else
2551 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
2552 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0)
2553 #endif
2554
2555
2556 #if ! HAVE_NATIVE_mpn_com
2557 #undef mpn_com
2558 #define mpn_com(d,s,n) \
2559 do { \
2560 mp_ptr __d = (d); \
2561 mp_srcptr __s = (s); \
2562 mp_size_t __n = (n); \
2563 ASSERT (__n >= 1); \
2564 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
2565 do \
2566 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
2567 while (--__n); \
2568 } while (0)
2569 #endif
2570
2571 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \
2572 do { \
2573 mp_srcptr __up = (up); \
2574 mp_srcptr __vp = (vp); \
2575 mp_ptr __rp = (rp); \
2576 mp_size_t __n = (n); \
2577 mp_limb_t __a, __b; \
2578 ASSERT (__n > 0); \
2579 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \
2580 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \
2581 __up += __n; \
2582 __vp += __n; \
2583 __rp += __n; \
2584 __n = -__n; \
2585 do { \
2586 __a = __up[__n]; \
2587 __b = __vp[__n]; \
2588 __rp[__n] = operation; \
2589 } while (++__n); \
2590 } while (0)
2591
2592
2593 #if ! HAVE_NATIVE_mpn_and_n
2594 #undef mpn_and_n
2595 #define mpn_and_n(rp, up, vp, n) \
2596 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
2597 #endif
2598
2599 #if ! HAVE_NATIVE_mpn_andn_n
2600 #undef mpn_andn_n
2601 #define mpn_andn_n(rp, up, vp, n) \
2602 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
2603 #endif
2604
2605 #if ! HAVE_NATIVE_mpn_nand_n
2606 #undef mpn_nand_n
2607 #define mpn_nand_n(rp, up, vp, n) \
2608 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
2609 #endif
2610
2611 #if ! HAVE_NATIVE_mpn_ior_n
2612 #undef mpn_ior_n
2613 #define mpn_ior_n(rp, up, vp, n) \
2614 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
2615 #endif
2616
2617 #if ! HAVE_NATIVE_mpn_iorn_n
2618 #undef mpn_iorn_n
2619 #define mpn_iorn_n(rp, up, vp, n) \
2620 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
2621 #endif
2622
2623 #if ! HAVE_NATIVE_mpn_nior_n
2624 #undef mpn_nior_n
2625 #define mpn_nior_n(rp, up, vp, n) \
2626 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
2627 #endif
2628
2629 #if ! HAVE_NATIVE_mpn_xor_n
2630 #undef mpn_xor_n
2631 #define mpn_xor_n(rp, up, vp, n) \
2632 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
2633 #endif
2634
2635 #if ! HAVE_NATIVE_mpn_xnor_n
2636 #undef mpn_xnor_n
2637 #define mpn_xnor_n(rp, up, vp, n) \
2638 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
2639 #endif
2640
2641 #define mpn_trialdiv __MPN(trialdiv)
2642 __GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
2643
2644 #define mpn_remove __MPN(remove)
2645 __GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_bitcnt_t);
2646
2647
2648 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2649 #if GMP_NAIL_BITS == 0
2650 #define ADDC_LIMB(cout, w, x, y) \
2651 do { \
2652 mp_limb_t __x = (x); \
2653 mp_limb_t __y = (y); \
2654 mp_limb_t __w = __x + __y; \
2655 (w) = __w; \
2656 (cout) = __w < __x; \
2657 } while (0)
2658 #else
2659 #define ADDC_LIMB(cout, w, x, y) \
2660 do { \
2661 mp_limb_t __w; \
2662 ASSERT_LIMB (x); \
2663 ASSERT_LIMB (y); \
2664 __w = (x) + (y); \
2665 (w) = __w & GMP_NUMB_MASK; \
2666 (cout) = __w >> GMP_NUMB_BITS; \
2667 } while (0)
2668 #endif
2669
2670 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2671 subtract. */
2672 #if GMP_NAIL_BITS == 0
2673 #define SUBC_LIMB(cout, w, x, y) \
2674 do { \
2675 mp_limb_t __x = (x); \
2676 mp_limb_t __y = (y); \
2677 mp_limb_t __w = __x - __y; \
2678 (w) = __w; \
2679 (cout) = __w > __x; \
2680 } while (0)
2681 #else
2682 #define SUBC_LIMB(cout, w, x, y) \
2683 do { \
2684 mp_limb_t __w = (x) - (y); \
2685 (w) = __w & GMP_NUMB_MASK; \
2686 (cout) = __w >> (GMP_LIMB_BITS-1); \
2687 } while (0)
2688 #endif
2689
2690
2691 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2692 expecting no carry (or borrow) from that.
2693
2694 The size parameter is only for the benefit of assertion checking. In a
2695 normal build it's unused and the carry/borrow is just propagated as far
2696 as it needs to go.
2697
2698 On random data, usually only one or two limbs of {ptr,size} get updated,
2699 so there's no need for any sophisticated looping, just something compact
2700 and sensible.
2701
2702 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2703 declaring their operand sizes, then remove the former. This is purely
2704 for the benefit of assertion checking. */
2705
2706 #if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM) \
2707 && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \
2708 && ! WANT_ASSERT
2709 /* Better flags handling than the generic C gives on i386, saving a few
2710 bytes of code and maybe a cycle or two. */
2711
2712 #define MPN_IORD_U(ptr, incr, aors) \
2713 do { \
2714 mp_ptr __ptr_dummy; \
2715 if (__builtin_constant_p (incr) && (incr) == 0) \
2716 { \
2717 } \
2718 else if (__builtin_constant_p (incr) && (incr) == 1) \
2719 { \
2720 __asm__ __volatile__ \
2721 ("\n" ASM_L(top) ":\n" \
2722 "\t" aors "\t$1, (%0)\n" \
2723 "\tlea\t%c2(%0), %0\n" \
2724 "\tjc\t" ASM_L(top) \
2725 : "=r" (__ptr_dummy) \
2726 : "0" (ptr), "n" (sizeof(mp_limb_t)) \
2727 : "memory"); \
2728 } \
2729 else \
2730 { \
2731 __asm__ __volatile__ \
2732 ( aors "\t%2, (%0)\n" \
2733 "\tjnc\t" ASM_L(done) "\n" \
2734 ASM_L(top) ":\n" \
2735 "\t" aors "\t$1, %c3(%0)\n" \
2736 "\tlea\t%c3(%0), %0\n" \
2737 "\tjc\t" ASM_L(top) "\n" \
2738 ASM_L(done) ":\n" \
2739 : "=r" (__ptr_dummy) \
2740 : "0" (ptr), \
2741 "re" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t)) \
2742 : "memory"); \
2743 } \
2744 } while (0)
2745
2746 #if GMP_LIMB_BITS == 32
2747 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
2748 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
2749 #endif
2750 #if GMP_LIMB_BITS == 64
2751 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addq")
2752 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subq")
2753 #endif
2754 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
2755 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
2756 #endif
2757
2758 #if GMP_NAIL_BITS == 0
2759 #ifndef mpn_incr_u
2760 #define mpn_incr_u(p,incr) \
2761 do { \
2762 mp_limb_t __x; \
2763 mp_ptr __p = (p); \
2764 if (__builtin_constant_p (incr) && (incr) == 1) \
2765 { \
2766 while (++(*(__p++)) == 0) \
2767 ; \
2768 } \
2769 else \
2770 { \
2771 __x = *__p + (incr); \
2772 *__p = __x; \
2773 if (__x < (incr)) \
2774 while (++(*(++__p)) == 0) \
2775 ; \
2776 } \
2777 } while (0)
2778 #endif
2779 #ifndef mpn_decr_u
2780 #define mpn_decr_u(p,incr) \
2781 do { \
2782 mp_limb_t __x; \
2783 mp_ptr __p = (p); \
2784 if (__builtin_constant_p (incr) && (incr) == 1) \
2785 { \
2786 while ((*(__p++))-- == 0) \
2787 ; \
2788 } \
2789 else \
2790 { \
2791 __x = *__p; \
2792 *__p = __x - (incr); \
2793 if (__x < (incr)) \
2794 while ((*(++__p))-- == 0) \
2795 ; \
2796 } \
2797 } while (0)
2798 #endif
2799 #endif
2800
2801 #if GMP_NAIL_BITS >= 1
2802 #ifndef mpn_incr_u
2803 #define mpn_incr_u(p,incr) \
2804 do { \
2805 mp_limb_t __x; \
2806 mp_ptr __p = (p); \
2807 if (__builtin_constant_p (incr) && (incr) == 1) \
2808 { \
2809 do \
2810 { \
2811 __x = (*__p + 1) & GMP_NUMB_MASK; \
2812 *__p++ = __x; \
2813 } \
2814 while (__x == 0); \
2815 } \
2816 else \
2817 { \
2818 __x = (*__p + (incr)); \
2819 *__p++ = __x & GMP_NUMB_MASK; \
2820 if (__x >> GMP_NUMB_BITS != 0) \
2821 { \
2822 do \
2823 { \
2824 __x = (*__p + 1) & GMP_NUMB_MASK; \
2825 *__p++ = __x; \
2826 } \
2827 while (__x == 0); \
2828 } \
2829 } \
2830 } while (0)
2831 #endif
2832 #ifndef mpn_decr_u
2833 #define mpn_decr_u(p,incr) \
2834 do { \
2835 mp_limb_t __x; \
2836 mp_ptr __p = (p); \
2837 if (__builtin_constant_p (incr) && (incr) == 1) \
2838 { \
2839 do \
2840 { \
2841 __x = *__p; \
2842 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2843 } \
2844 while (__x == 0); \
2845 } \
2846 else \
2847 { \
2848 __x = *__p - (incr); \
2849 *__p++ = __x & GMP_NUMB_MASK; \
2850 if (__x >> GMP_NUMB_BITS != 0) \
2851 { \
2852 do \
2853 { \
2854 __x = *__p; \
2855 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2856 } \
2857 while (__x == 0); \
2858 } \
2859 } \
2860 } while (0)
2861 #endif
2862 #endif
2863
2864 #ifndef MPN_INCR_U
2865 #if WANT_ASSERT
2866 #define MPN_INCR_U(ptr, size, n) \
2867 do { \
2868 ASSERT ((size) >= 1); \
2869 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
2870 } while (0)
2871 #else
2872 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
2873 #endif
2874 #endif
2875
2876 #ifndef MPN_DECR_U
2877 #if WANT_ASSERT
2878 #define MPN_DECR_U(ptr, size, n) \
2879 do { \
2880 ASSERT ((size) >= 1); \
2881 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
2882 } while (0)
2883 #else
2884 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
2885 #endif
2886 #endif
2887
2888
2889 /* Structure for conversion between internal binary format and strings. */
2890 struct bases
2891 {
2892 /* Number of digits in the conversion base that always fits in an mp_limb_t.
2893 For example, for base 10 on a machine where an mp_limb_t has 32 bits this
2894 is 9, since 10**9 is the largest number that fits into an mp_limb_t. */
2895 int chars_per_limb;
2896
2897 /* log(2)/log(conversion_base) */
2898 mp_limb_t logb2;
2899
2900 /* log(conversion_base)/log(2) */
2901 mp_limb_t log2b;
2902
2903 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2904 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
2905 i.e. the number of bits used to represent each digit in the base. */
2906 mp_limb_t big_base;
2907
2908 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
2909 fixed-point number. Instead of dividing by big_base an application can
2910 choose to multiply by big_base_inverted. */
2911 mp_limb_t big_base_inverted;
2912 };
2913
2914 #define mp_bases __MPN(bases)
2915 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2916
2917
2918 /* Compute the number of digits in base for nbits bits, making sure the result
2919 is never too small. The two variants of the macro implement the same
2920 function; the GT2 variant below works just for bases > 2. */
2921 #define DIGITS_IN_BASE_FROM_BITS(res, nbits, b) \
2922 do { \
2923 mp_limb_t _ph, _dummy; \
2924 size_t _nbits = (nbits); \
2925 umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits); \
2926 _ph += (_dummy + _nbits < _dummy); \
2927 res = _ph + 1; \
2928 } while (0)
2929 #define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b) \
2930 do { \
2931 mp_limb_t _ph, _dummy; \
2932 size_t _nbits = (nbits); \
2933 umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits); \
2934 res = _ph + 1; \
2935 } while (0)
2936
2937 /* For power of 2 bases this is exact. For other bases the result is either
2938 exact or one too big.
2939
2940 To be exact always it'd be necessary to examine all the limbs of the
2941 operand, since numbers like 100..000 and 99...999 generally differ only
2942 in the lowest limb. It'd be possible to examine just a couple of high
2943 limbs to increase the probability of being exact, but that doesn't seem
2944 worth bothering with. */
2945
2946 #define MPN_SIZEINBASE(result, ptr, size, base) \
2947 do { \
2948 int __lb_base, __cnt; \
2949 size_t __totbits; \
2950 \
2951 ASSERT ((size) >= 0); \
2952 ASSERT ((base) >= 2); \
2953 ASSERT ((base) < numberof (mp_bases)); \
2954 \
2955 /* Special case for X == 0. */ \
2956 if ((size) == 0) \
2957 (result) = 1; \
2958 else \
2959 { \
2960 /* Calculate the total number of significant bits of X. */ \
2961 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2962 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2963 \
2964 if (POW2_P (base)) \
2965 { \
2966 __lb_base = mp_bases[base].big_base; \
2967 (result) = (__totbits + __lb_base - 1) / __lb_base; \
2968 } \
2969 else \
2970 { \
2971 DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base); \
2972 } \
2973 } \
2974 } while (0)
2975
2976 #define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \
2977 do { \
2978 int __cnt; \
2979 mp_bitcnt_t __totbits; \
2980 ASSERT ((size) > 0); \
2981 ASSERT ((ptr)[(size)-1] != 0); \
2982 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2983 __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
2984 (result) = (__totbits + (base2exp)-1) / (base2exp); \
2985 } while (0)
2986
2987
2988 /* bit count to limb count, rounding up */
2989 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2990
2991 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake
2992 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs
2993 in both cases (LIMBS_PER_ULONG is also defined here.) */
2994 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2995
2996 #define LIMBS_PER_ULONG 1
2997 #define MPN_SET_UI(zp, zn, u) \
2998 (zp)[0] = (u); \
2999 (zn) = ((zp)[0] != 0);
3000 #define MPZ_FAKE_UI(z, zp, u) \
3001 (zp)[0] = (u); \
3002 PTR (z) = (zp); \
3003 SIZ (z) = ((zp)[0] != 0); \
3004 ASSERT_CODE (ALLOC (z) = 1);
3005
3006 #else /* need two limbs per ulong */
3007
3008 #define LIMBS_PER_ULONG 2
3009 #define MPN_SET_UI(zp, zn, u) \
3010 (zp)[0] = (u) & GMP_NUMB_MASK; \
3011 (zp)[1] = (u) >> GMP_NUMB_BITS; \
3012 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
3013 #define MPZ_FAKE_UI(z, zp, u) \
3014 (zp)[0] = (u) & GMP_NUMB_MASK; \
3015 (zp)[1] = (u) >> GMP_NUMB_BITS; \
3016 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
3017 PTR (z) = (zp); \
3018 ASSERT_CODE (ALLOC (z) = 2);
3019
3020 #endif
3021
3022
3023 #if HAVE_HOST_CPU_FAMILY_x86
3024 #define TARGET_REGISTER_STARVED 1
3025 #else
3026 #define TARGET_REGISTER_STARVED 0
3027 #endif
3028
3029
3030 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
3031 or 0 there into a limb 0xFF..FF or 0 respectively.
3032
3033 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
3034 but C99 doesn't guarantee signed right shifts are arithmetic, so we have
3035 a little compile-time test and a fallback to a "? :" form. The latter is
3036 necessary for instance on Cray vector systems.
3037
3038 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
3039 to an arithmetic right shift anyway, but it's good to get the desired
3040 shift on past versions too (in particular since an important use of
3041 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */
3042
3043 #define LIMB_HIGHBIT_TO_MASK(n) \
3044 (((mp_limb_signed_t) -1 >> 1) < 0 \
3045 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
3046 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
3047
3048
3049 /* Use a library function for invert_limb, if available. */
3050 #define mpn_invert_limb __MPN(invert_limb)
3051 __GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST;
3052 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
3053 #define invert_limb(invxl,xl) \
3054 do { \
3055 (invxl) = mpn_invert_limb (xl); \
3056 } while (0)
3057 #endif
3058
3059 #ifndef invert_limb
3060 #define invert_limb(invxl,xl) \
3061 do { \
3062 mp_limb_t _dummy; \
3063 ASSERT ((xl) != 0); \
3064 udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl); \
3065 } while (0)
3066 #endif
3067
3068 #define invert_pi1(dinv, d1, d0) \
3069 do { \
3070 mp_limb_t _v, _p, _t1, _t0, _mask; \
3071 invert_limb (_v, d1); \
3072 _p = (d1) * _v; \
3073 _p += (d0); \
3074 if (_p < (d0)) \
3075 { \
3076 _v--; \
3077 _mask = -(mp_limb_t) (_p >= (d1)); \
3078 _p -= (d1); \
3079 _v += _mask; \
3080 _p -= _mask & (d1); \
3081 } \
3082 umul_ppmm (_t1, _t0, d0, _v); \
3083 _p += _t1; \
3084 if (_p < _t1) \
3085 { \
3086 _v--; \
3087 if (UNLIKELY (_p >= (d1))) \
3088 { \
3089 if (_p > (d1) || _t0 >= (d0)) \
3090 _v--; \
3091 } \
3092 } \
3093 (dinv).inv32 = _v; \
3094 } while (0)
3095
3096
3097 /* udiv_qrnnd_preinv -- Based on work by Niels Möller and Torbjörn Granlund.
3098 We write things strangely below, to help gcc. A more straightforward
3099 version:
3100 _r = (nl) - _qh * (d);
3101 _t = _r + (d);
3102 if (_r >= _ql)
3103 {
3104 _qh--;
3105 _r = _t;
3106 }
3107 For one operation shorter critical path, one may want to use this form:
3108 _p = _qh * (d)
3109 _s = (nl) + (d);
3110 _r = (nl) - _p;
3111 _t = _s - _p;
3112 if (_r >= _ql)
3113 {
3114 _qh--;
3115 _r = _t;
3116 }
3117 */
3118 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
3119 do { \
3120 mp_limb_t _qh, _ql, _r, _mask; \
3121 umul_ppmm (_qh, _ql, (nh), (di)); \
3122 if (__builtin_constant_p (nl) && (nl) == 0) \
3123 { \
3124 _qh += (nh) + 1; \
3125 _r = - _qh * (d); \
3126 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3127 _qh += _mask; \
3128 _r += _mask & (d); \
3129 } \
3130 else \
3131 { \
3132 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
3133 _r = (nl) - _qh * (d); \
3134 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3135 _qh += _mask; \
3136 _r += _mask & (d); \
3137 if (UNLIKELY (_r >= (d))) \
3138 { \
3139 _r -= (d); \
3140 _qh++; \
3141 } \
3142 } \
3143 (r) = _r; \
3144 (q) = _qh; \
3145 } while (0)
3146
3147 /* Dividing (NH, NL) by D, returning the remainder only. Unlike
3148 udiv_qrnnd_preinv, works also for the case NH == D, where the
3149 quotient doesn't quite fit in a single limb. */
3150 #define udiv_rnnd_preinv(r, nh, nl, d, di) \
3151 do { \
3152 mp_limb_t _qh, _ql, _r, _mask; \
3153 umul_ppmm (_qh, _ql, (nh), (di)); \
3154 if (__builtin_constant_p (nl) && (nl) == 0) \
3155 { \
3156 _r = ~(_qh + (nh)) * (d); \
3157 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3158 _r += _mask & (d); \
3159 } \
3160 else \
3161 { \
3162 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
3163 _r = (nl) - _qh * (d); \
3164 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3165 _r += _mask & (d); \
3166 if (UNLIKELY (_r >= (d))) \
3167 _r -= (d); \
3168 } \
3169 (r) = _r; \
3170 } while (0)
3171
3172 /* Compute quotient the quotient and remainder for n / d. Requires d
3173 >= B^2 / 2 and n < d B. di is the inverse
3174
3175 floor ((B^3 - 1) / (d0 + d1 B)) - B.
3176
3177 NOTE: Output variables are updated multiple times. Only some inputs
3178 and outputs may overlap.
3179 */
3180 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
3181 do { \
3182 mp_limb_t _q0, _t1, _t0, _mask; \
3183 umul_ppmm ((q), _q0, (n2), (dinv)); \
3184 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
3185 \
3186 /* Compute the two most significant limbs of n - q'd */ \
3187 (r1) = (n1) - (d1) * (q); \
3188 sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
3189 umul_ppmm (_t1, _t0, (d0), (q)); \
3190 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
3191 (q)++; \
3192 \
3193 /* Conditionally adjust q and the remainders */ \
3194 _mask = - (mp_limb_t) ((r1) >= _q0); \
3195 (q) += _mask; \
3196 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
3197 if (UNLIKELY ((r1) >= (d1))) \
3198 { \
3199 if ((r1) > (d1) || (r0) >= (d0)) \
3200 { \
3201 (q)++; \
3202 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
3203 } \
3204 } \
3205 } while (0)
3206
3207 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
3208 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
3209 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
3210 #endif
3211
3212
3213 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
3214 plain mpn_divrem_1. The default is yes, since the few CISC chips where
3215 preinv is not good have defines saying so. */
3216 #ifndef USE_PREINV_DIVREM_1
3217 #define USE_PREINV_DIVREM_1 1
3218 #endif
3219
3220 #if USE_PREINV_DIVREM_1
3221 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
3222 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
3223 #else
3224 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
3225 mpn_divrem_1 (qp, xsize, ap, size, d)
3226 #endif
3227
3228 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
3229 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
3230 #endif
3231
3232 /* This selection may seem backwards. The reason mpn_mod_1 typically takes
3233 over for larger sizes is that it uses the mod_1_1 function. */
3234 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
3235 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \
3236 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \
3237 : mpn_mod_1 (src, size, divisor))
3238
3239
3240 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
3241 #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
3242 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
3243 #endif
3244
3245
3246 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
3247 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for
3248 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
3249 modexact are faster at all sizes, so the defaults are 0. Those CPUs
3250 where this is not right have a tuned threshold. */
3251 #ifndef DIVEXACT_1_THRESHOLD
3252 #define DIVEXACT_1_THRESHOLD 0
3253 #endif
3254 #ifndef BMOD_1_TO_MOD_1_THRESHOLD
3255 #define BMOD_1_TO_MOD_1_THRESHOLD 10
3256 #endif
3257
3258 #define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d) \
3259 do { \
3260 if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD)) \
3261 ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d)); \
3262 else \
3263 { \
3264 ASSERT (mpn_mod_1 (up, n, d) == 0); \
3265 mpn_divexact_1 (rp, up, n, d); \
3266 } \
3267 } while (0)
3268
3269 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */
3270 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
3271 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
3272 #endif
3273
3274 #if HAVE_NATIVE_mpn_modexact_1_odd
3275 #define mpn_modexact_1_odd __MPN(modexact_1_odd)
3276 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
3277 #else
3278 #define mpn_modexact_1_odd(src,size,divisor) \
3279 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
3280 #endif
3281
3282 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
3283 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \
3284 ? mpn_modexact_1_odd (src, size, divisor) \
3285 : mpn_mod_1 (src, size, divisor))
3286
3287 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
3288 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
3289 n must be odd (otherwise such an inverse doesn't exist).
3290
3291 This is not to be confused with invert_limb(), which is completely
3292 different.
3293
3294 The table lookup gives an inverse with the low 8 bits valid, and each
3295 multiply step doubles the number of bits. See Jebelean "An algorithm for
3296 exact division" end of section 4 (reference in gmp.texi).
3297
3298 Possible enhancement: Could use UHWtype until the last step, if half-size
3299 multiplies are faster (might help under _LONG_LONG_LIMB).
3300
3301 Alternative: As noted in Granlund and Montgomery "Division by Invariant
3302 Integers using Multiplication" (reference in gmp.texi), n itself gives a
3303 3-bit inverse immediately, and could be used instead of a table lookup.
3304 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
3305 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */
3306
3307 #define binvert_limb_table __gmp_binvert_limb_table
3308 __GMP_DECLSPEC extern const unsigned char binvert_limb_table[128];
3309
3310 #define binvert_limb(inv,n) \
3311 do { \
3312 mp_limb_t __n = (n); \
3313 mp_limb_t __inv; \
3314 ASSERT ((__n & 1) == 1); \
3315 \
3316 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \
3317 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \
3318 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \
3319 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \
3320 \
3321 if (GMP_NUMB_BITS > 64) \
3322 { \
3323 int __invbits = 64; \
3324 do { \
3325 __inv = 2 * __inv - __inv * __inv * __n; \
3326 __invbits *= 2; \
3327 } while (__invbits < GMP_NUMB_BITS); \
3328 } \
3329 \
3330 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
3331 (inv) = __inv & GMP_NUMB_MASK; \
3332 } while (0)
3333 #define modlimb_invert binvert_limb /* backward compatibility */
3334
3335 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
3336 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
3337 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
3338 we need to start from GMP_NUMB_MAX>>1. */
3339 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
3340
3341 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
3342 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
3343 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1)
3344 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
3345
3346
3347 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs.
3348
3349 It's not clear whether this is the best way to do this calculation.
3350 Anything congruent to -a would be fine for the one limb congruence
3351 tests. */
3352
3353 #define NEG_MOD(r, a, d) \
3354 do { \
3355 ASSERT ((d) != 0); \
3356 ASSERT_LIMB (a); \
3357 ASSERT_LIMB (d); \
3358 \
3359 if ((a) <= (d)) \
3360 { \
3361 /* small a is reasonably likely */ \
3362 (r) = (d) - (a); \
3363 } \
3364 else \
3365 { \
3366 unsigned __twos; \
3367 mp_limb_t __dnorm; \
3368 count_leading_zeros (__twos, d); \
3369 __twos -= GMP_NAIL_BITS; \
3370 __dnorm = (d) << __twos; \
3371 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
3372 } \
3373 \
3374 ASSERT_LIMB (r); \
3375 } while (0)
3376
3377 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
3378 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1)
3379
3380
3381 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
3382 to 0 if there's an even number. "n" should be an unsigned long and "p"
3383 an int. */
3384
3385 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3386 #define ULONG_PARITY(p, n) \
3387 do { \
3388 int __p; \
3389 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \
3390 (p) = __p & 1; \
3391 } while (0)
3392 #endif
3393
3394 /* Cray intrinsic _popcnt. */
3395 #ifdef _CRAY
3396 #define ULONG_PARITY(p, n) \
3397 do { \
3398 (p) = _popcnt (n) & 1; \
3399 } while (0)
3400 #endif
3401
3402 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3403 && ! defined (NO_ASM) && defined (__ia64)
3404 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
3405 to a 64 bit unsigned long long for popcnt */
3406 #define ULONG_PARITY(p, n) \
3407 do { \
3408 unsigned long long __n = (unsigned long) (n); \
3409 int __p; \
3410 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \
3411 (p) = __p & 1; \
3412 } while (0)
3413 #endif
3414
3415 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3416 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
3417 #if __GMP_GNUC_PREREQ (3,1)
3418 #define __GMP_qm "=Qm"
3419 #define __GMP_q "=Q"
3420 #else
3421 #define __GMP_qm "=qm"
3422 #define __GMP_q "=q"
3423 #endif
3424 #define ULONG_PARITY(p, n) \
3425 do { \
3426 char __p; \
3427 unsigned long __n = (n); \
3428 __n ^= (__n >> 16); \
3429 __asm__ ("xorb %h1, %b1\n\t" \
3430 "setpo %0" \
3431 : __GMP_qm (__p), __GMP_q (__n) \
3432 : "1" (__n)); \
3433 (p) = __p; \
3434 } while (0)
3435 #endif
3436
3437 #if ! defined (ULONG_PARITY)
3438 #define ULONG_PARITY(p, n) \
3439 do { \
3440 unsigned long __n = (n); \
3441 int __p = 0; \
3442 do \
3443 { \
3444 __p ^= 0x96696996L >> (__n & 0x1F); \
3445 __n >>= 5; \
3446 } \
3447 while (__n != 0); \
3448 \
3449 (p) = __p & 1; \
3450 } while (0)
3451 #endif
3452
3453
3454 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of
3455 version 3.1 at least) doesn't seem to know how to generate rlwimi for
3456 anything other than bit-fields, so use "asm". */
3457 #if defined (__GNUC__) && ! defined (NO_ASM) \
3458 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
3459 #define BSWAP_LIMB(dst, src) \
3460 do { \
3461 mp_limb_t __bswapl_src = (src); \
3462 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \
3463 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \
3464 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \
3465 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \
3466 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \
3467 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \
3468 (dst) = __tmp1 | __tmp2; /* whole */ \
3469 } while (0)
3470 #endif
3471
3472 /* bswap is available on i486 and up and is fast. A combination rorw $8 /
3473 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
3474 kernel with xchgb instead of rorw), but this is not done here, because
3475 i386 means generic x86 and mixing word and dword operations will cause
3476 partial register stalls on P6 chips. */
3477 #if defined (__GNUC__) && ! defined (NO_ASM) \
3478 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \
3479 && GMP_LIMB_BITS == 32
3480 #define BSWAP_LIMB(dst, src) \
3481 do { \
3482 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \
3483 } while (0)
3484 #endif
3485
3486 #if defined (__GNUC__) && ! defined (NO_ASM) \
3487 && defined (__amd64__) && GMP_LIMB_BITS == 64
3488 #define BSWAP_LIMB(dst, src) \
3489 do { \
3490 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \
3491 } while (0)
3492 #endif
3493
3494 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3495 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3496 #define BSWAP_LIMB(dst, src) \
3497 do { \
3498 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
3499 } while (0)
3500 #endif
3501
3502 /* As per glibc. */
3503 #if defined (__GNUC__) && ! defined (NO_ASM) \
3504 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
3505 #define BSWAP_LIMB(dst, src) \
3506 do { \
3507 mp_limb_t __bswapl_src = (src); \
3508 __asm__ ("ror%.w %#8, %0\n\t" \
3509 "swap %0\n\t" \
3510 "ror%.w %#8, %0" \
3511 : "=d" (dst) \
3512 : "0" (__bswapl_src)); \
3513 } while (0)
3514 #endif
3515
3516 #if ! defined (BSWAP_LIMB)
3517 #if GMP_LIMB_BITS == 8
3518 #define BSWAP_LIMB(dst, src) \
3519 do { (dst) = (src); } while (0)
3520 #endif
3521 #if GMP_LIMB_BITS == 16
3522 #define BSWAP_LIMB(dst, src) \
3523 do { \
3524 (dst) = ((src) << 8) + ((src) >> 8); \
3525 } while (0)
3526 #endif
3527 #if GMP_LIMB_BITS == 32
3528 #define BSWAP_LIMB(dst, src) \
3529 do { \
3530 (dst) = \
3531 ((src) << 24) \
3532 + (((src) & 0xFF00) << 8) \
3533 + (((src) >> 8) & 0xFF00) \
3534 + ((src) >> 24); \
3535 } while (0)
3536 #endif
3537 #if GMP_LIMB_BITS == 64
3538 #define BSWAP_LIMB(dst, src) \
3539 do { \
3540 (dst) = \
3541 ((src) << 56) \
3542 + (((src) & 0xFF00) << 40) \
3543 + (((src) & 0xFF0000) << 24) \
3544 + (((src) & 0xFF000000) << 8) \
3545 + (((src) >> 8) & 0xFF000000) \
3546 + (((src) >> 24) & 0xFF0000) \
3547 + (((src) >> 40) & 0xFF00) \
3548 + ((src) >> 56); \
3549 } while (0)
3550 #endif
3551 #endif
3552
3553 #if ! defined (BSWAP_LIMB)
3554 #define BSWAP_LIMB(dst, src) \
3555 do { \
3556 mp_limb_t __bswapl_src = (src); \
3557 mp_limb_t __dstl = 0; \
3558 int __i; \
3559 for (__i = 0; __i < GMP_LIMB_BYTES; __i++) \
3560 { \
3561 __dstl = (__dstl << 8) | (__bswapl_src & 0xFF); \
3562 __bswapl_src >>= 8; \
3563 } \
3564 (dst) = __dstl; \
3565 } while (0)
3566 #endif
3567
3568
3569 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3570 those we know are fast. */
3571 #if defined (__GNUC__) && ! defined (NO_ASM) \
3572 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3573 && (HAVE_HOST_CPU_powerpc604 \
3574 || HAVE_HOST_CPU_powerpc604e \
3575 || HAVE_HOST_CPU_powerpc750 \
3576 || HAVE_HOST_CPU_powerpc7400)
3577 #define BSWAP_LIMB_FETCH(limb, src) \
3578 do { \
3579 mp_srcptr __blf_src = (src); \
3580 mp_limb_t __limb; \
3581 __asm__ ("lwbrx %0, 0, %1" \
3582 : "=r" (__limb) \
3583 : "r" (__blf_src), \
3584 "m" (*__blf_src)); \
3585 (limb) = __limb; \
3586 } while (0)
3587 #endif
3588
3589 #if ! defined (BSWAP_LIMB_FETCH)
3590 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src))
3591 #endif
3592
3593
3594 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3595 know are fast. FIXME: Is this necessary? */
3596 #if defined (__GNUC__) && ! defined (NO_ASM) \
3597 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3598 && (HAVE_HOST_CPU_powerpc604 \
3599 || HAVE_HOST_CPU_powerpc604e \
3600 || HAVE_HOST_CPU_powerpc750 \
3601 || HAVE_HOST_CPU_powerpc7400)
3602 #define BSWAP_LIMB_STORE(dst, limb) \
3603 do { \
3604 mp_ptr __dst = (dst); \
3605 mp_limb_t __limb = (limb); \
3606 __asm__ ("stwbrx %1, 0, %2" \
3607 : "=m" (*__dst) \
3608 : "r" (__limb), \
3609 "r" (__dst)); \
3610 } while (0)
3611 #endif
3612
3613 #if ! defined (BSWAP_LIMB_STORE)
3614 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb)
3615 #endif
3616
3617
3618 /* Byte swap limbs from {src,size} and store at {dst,size}. */
3619 #define MPN_BSWAP(dst, src, size) \
3620 do { \
3621 mp_ptr __dst = (dst); \
3622 mp_srcptr __src = (src); \
3623 mp_size_t __size = (size); \
3624 mp_size_t __i; \
3625 ASSERT ((size) >= 0); \
3626 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
3627 CRAY_Pragma ("_CRI ivdep"); \
3628 for (__i = 0; __i < __size; __i++) \
3629 { \
3630 BSWAP_LIMB_FETCH (*__dst, __src); \
3631 __dst++; \
3632 __src++; \
3633 } \
3634 } while (0)
3635
3636 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3637 #define MPN_BSWAP_REVERSE(dst, src, size) \
3638 do { \
3639 mp_ptr __dst = (dst); \
3640 mp_size_t __size = (size); \
3641 mp_srcptr __src = (src) + __size - 1; \
3642 mp_size_t __i; \
3643 ASSERT ((size) >= 0); \
3644 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
3645 CRAY_Pragma ("_CRI ivdep"); \
3646 for (__i = 0; __i < __size; __i++) \
3647 { \
3648 BSWAP_LIMB_FETCH (*__dst, __src); \
3649 __dst++; \
3650 __src--; \
3651 } \
3652 } while (0)
3653
3654
3655 /* No processor claiming to be SPARC v9 compliant seems to
3656 implement the POPC instruction. Disable pattern for now. */
3657 #if 0
3658 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
3659 #define popc_limb(result, input) \
3660 do { \
3661 DItype __res; \
3662 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \
3663 } while (0)
3664 #endif
3665 #endif
3666
3667 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3668 #define popc_limb(result, input) \
3669 do { \
3670 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \
3671 } while (0)
3672 #endif
3673
3674 /* Cray intrinsic. */
3675 #ifdef _CRAY
3676 #define popc_limb(result, input) \
3677 do { \
3678 (result) = _popcnt (input); \
3679 } while (0)
3680 #endif
3681
3682 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3683 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3684 #define popc_limb(result, input) \
3685 do { \
3686 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \
3687 } while (0)
3688 #endif
3689
3690 /* Cool population count of an mp_limb_t.
3691 You have to figure out how this works, We won't tell you!
3692
3693 The constants could also be expressed as:
3694 0x55... = [2^N / 3] = [(2^N-1)/3]
3695 0x33... = [2^N / 5] = [(2^N-1)/5]
3696 0x0f... = [2^N / 17] = [(2^N-1)/17]
3697 (N is GMP_LIMB_BITS, [] denotes truncation.) */
3698
3699 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3700 #define popc_limb(result, input) \
3701 do { \
3702 mp_limb_t __x = (input); \
3703 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3704 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3705 __x = ((__x >> 4) + __x); \
3706 (result) = __x & 0x0f; \
3707 } while (0)
3708 #endif
3709
3710 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3711 #define popc_limb(result, input) \
3712 do { \
3713 mp_limb_t __x = (input); \
3714 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3715 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3716 __x += (__x >> 4); \
3717 __x = ((__x >> 8) & MP_LIMB_T_MAX/4369)+(__x & MP_LIMB_T_MAX/4369); \
3718 (result) = __x; \
3719 } while (0)
3720 #endif
3721
3722 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3723 #define popc_limb(result, input) \
3724 do { \
3725 mp_limb_t __x = (input); \
3726 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3727 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3728 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3729 __x = ((__x >> 8) + __x); \
3730 __x = ((__x >> 16) + __x); \
3731 (result) = __x & 0xff; \
3732 } while (0)
3733 #endif
3734
3735 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3736 #define popc_limb(result, input) \
3737 do { \
3738 mp_limb_t __x = (input); \
3739 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3740 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3741 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3742 __x = ((__x >> 8) + __x); \
3743 __x = ((__x >> 16) + __x); \
3744 __x = ((__x >> 32) + __x); \
3745 (result) = __x & 0xff; \
3746 } while (0)
3747 #endif
3748
3749
3750 /* Define stuff for longlong.h. */
3751 #if HAVE_ATTRIBUTE_MODE
3752 typedef unsigned int UQItype __attribute__ ((mode (QI)));
3753 typedef int SItype __attribute__ ((mode (SI)));
3754 typedef unsigned int USItype __attribute__ ((mode (SI)));
3755 typedef int DItype __attribute__ ((mode (DI)));
3756 typedef unsigned int UDItype __attribute__ ((mode (DI)));
3757 #else
3758 typedef unsigned char UQItype;
3759 typedef long SItype;
3760 typedef unsigned long USItype;
3761 #if HAVE_LONG_LONG
3762 typedef long long int DItype;
3763 typedef unsigned long long int UDItype;
3764 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
3765 typedef long int DItype;
3766 typedef unsigned long int UDItype;
3767 #endif
3768 #endif
3769
3770 typedef mp_limb_t UWtype;
3771 typedef unsigned int UHWtype;
3772 #define W_TYPE_SIZE GMP_LIMB_BITS
3773
3774 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3775
3776 Bit field packing is "implementation defined" according to C99, which
3777 leaves us at the compiler's mercy here. For some systems packing is
3778 defined in the ABI (eg. x86). In any case so far it seems universal that
3779 little endian systems pack from low to high, and big endian from high to
3780 low within the given type.
3781
3782 Within the fields we rely on the integer endianness being the same as the
3783 float endianness, this is true everywhere we know of and it'd be a fairly
3784 strange system that did anything else. */
3785
3786 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3787 #define _GMP_IEEE_FLOATS 1
3788 union ieee_double_extract
3789 {
3790 struct
3791 {
3792 gmp_uint_least32_t manh:20;
3793 gmp_uint_least32_t exp:11;
3794 gmp_uint_least32_t sig:1;
3795 gmp_uint_least32_t manl:32;
3796 } s;
3797 double d;
3798 };
3799 #endif
3800
3801 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3802 #define _GMP_IEEE_FLOATS 1
3803 union ieee_double_extract
3804 {
3805 struct
3806 {
3807 gmp_uint_least32_t manl:32;
3808 gmp_uint_least32_t manh:20;
3809 gmp_uint_least32_t exp:11;
3810 gmp_uint_least32_t sig:1;
3811 } s;
3812 double d;
3813 };
3814 #endif
3815
3816 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3817 #define _GMP_IEEE_FLOATS 1
3818 union ieee_double_extract
3819 {
3820 struct
3821 {
3822 gmp_uint_least32_t sig:1;
3823 gmp_uint_least32_t exp:11;
3824 gmp_uint_least32_t manh:20;
3825 gmp_uint_least32_t manl:32;
3826 } s;
3827 double d;
3828 };
3829 #endif
3830
3831 #if HAVE_DOUBLE_VAX_D
3832 union double_extract
3833 {
3834 struct
3835 {
3836 gmp_uint_least32_t man3:7; /* highest 7 bits */
3837 gmp_uint_least32_t exp:8; /* excess-128 exponent */
3838 gmp_uint_least32_t sig:1;
3839 gmp_uint_least32_t man2:16;
3840 gmp_uint_least32_t man1:16;
3841 gmp_uint_least32_t man0:16; /* lowest 16 bits */
3842 } s;
3843 double d;
3844 };
3845 #endif
3846
3847 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3848 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
3849 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3850 /* Maximum number of limbs it will take to store any `double'.
3851 We assume doubles have 53 mantissa bits. */
3852 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3853
3854 __GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double);
3855
3856 #define mpn_get_d __gmpn_get_d
3857 __GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;
3858
3859
3860 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3861 a_inf if x is an infinity. Both are considered unlikely values, for
3862 branch prediction. */
3863
3864 #if _GMP_IEEE_FLOATS
3865 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3866 do { \
3867 union ieee_double_extract u; \
3868 u.d = (x); \
3869 if (UNLIKELY (u.s.exp == 0x7FF)) \
3870 { \
3871 if (u.s.manl == 0 && u.s.manh == 0) \
3872 { a_inf; } \
3873 else \
3874 { a_nan; } \
3875 } \
3876 } while (0)
3877 #endif
3878
3879 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3880 /* no nans or infs in these formats */
3881 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3882 do { } while (0)
3883 #endif
3884
3885 #ifndef DOUBLE_NAN_INF_ACTION
3886 /* Unknown format, try something generic.
3887 NaN should be "unordered", so x!=x.
3888 Inf should be bigger than DBL_MAX. */
3889 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3890 do { \
3891 { \
3892 if (UNLIKELY ((x) != (x))) \
3893 { a_nan; } \
3894 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
3895 { a_inf; } \
3896 } \
3897 } while (0)
3898 #endif
3899
3900 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3901 in the coprocessor, which means a bigger exponent range than normal, and
3902 depending on the rounding mode, a bigger mantissa than normal. (See
3903 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches
3904 "d" through memory to force any rounding and overflows to occur.
3905
3906 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3907 registers, where there's no such extra precision and no need for the
3908 FORCE_DOUBLE. We don't bother to detect this since the present uses for
3909 FORCE_DOUBLE are only in test programs and default generic C code.
3910
3911 Not quite sure that an "automatic volatile" will use memory, but it does
3912 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3913 apparently matching operands like "0" are only allowed on a register
3914 output. gcc 3.4 warns about this, though in fact it and past versions
3915 seem to put the operand through memory as hoped. */
3916
3917 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \
3918 || defined (__amd64__))
3919 #define FORCE_DOUBLE(d) \
3920 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3921 #else
3922 #define FORCE_DOUBLE(d) do { } while (0)
3923 #endif
3924
3925
3926 __GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[];
3927
3928 __GMP_DECLSPEC extern int __gmp_junk;
3929 __GMP_DECLSPEC extern const int __gmp_0;
3930 __GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN;
3931 __GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN;
3932 __GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN;
3933 __GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN;
3934 #define GMP_ERROR(code) __gmp_exception (code)
3935 #define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
3936 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
3937
3938 #if defined _LONG_LONG_LIMB
3939 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
3940 #else /* not _LONG_LONG_LIMB */
3941 #define CNST_LIMB(C) ((mp_limb_t) C##L)
3942 #endif /* _LONG_LONG_LIMB */
3943
3944 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3945 #if GMP_NUMB_BITS == 2
3946 #define PP 0x3 /* 3 */
3947 #define PP_FIRST_OMITTED 5
3948 #endif
3949 #if GMP_NUMB_BITS == 4
3950 #define PP 0xF /* 3 x 5 */
3951 #define PP_FIRST_OMITTED 7
3952 #endif
3953 #if GMP_NUMB_BITS == 8
3954 #define PP 0x69 /* 3 x 5 x 7 */
3955 #define PP_FIRST_OMITTED 11
3956 #endif
3957 #if GMP_NUMB_BITS == 16
3958 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */
3959 #define PP_FIRST_OMITTED 17
3960 #endif
3961 #if GMP_NUMB_BITS == 32
3962 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */
3963 #define PP_INVERTED 0x53E5645CL
3964 #define PP_FIRST_OMITTED 31
3965 #endif
3966 #if GMP_NUMB_BITS == 64
3967 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */
3968 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3969 #define PP_FIRST_OMITTED 59
3970 #endif
3971 #ifndef PP_FIRST_OMITTED
3972 #define PP_FIRST_OMITTED 3
3973 #endif
3974
3975 typedef struct
3976 {
3977 mp_limb_t d0, d1;
3978 } mp_double_limb_t;
3979
3980 #define mpn_gcd_22 __MPN (gcd_22)
3981 __GMP_DECLSPEC mp_double_limb_t mpn_gcd_22 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
3982
3983 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3984 zero bit representing +1 and a one bit representing -1. Bits other than
3985 bit 1 are garbage. These are meant to be kept in "int"s, and casts are
3986 used to ensure the expressions are "int"s even if a and/or b might be
3987 other types.
3988
3989 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3990 and their speed is important. Expressions are used rather than
3991 conditionals to accumulate sign changes, which effectively means XORs
3992 instead of conditional JUMPs. */
3993
3994 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3995 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1))
3996
3997 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3998 #define JACOBI_U0(a) ((a) == 1)
3999
4000 /* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and
4001 come up with a better name. */
4002
4003 /* (a/0), with a given by low and size;
4004 is 1 if a=+/-1, 0 otherwise */
4005 #define JACOBI_LS0(alow,asize) \
4006 (((asize) == 1 || (asize) == -1) && (alow) == 1)
4007
4008 /* (a/0), with a an mpz_t;
4009 fetch of low limb always valid, even if size is zero */
4010 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a))
4011
4012 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
4013 #define JACOBI_0U(b) ((b) == 1)
4014
4015 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
4016 #define JACOBI_0S(b) ((b) == 1 || (b) == -1)
4017
4018 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
4019 #define JACOBI_0LS(blow,bsize) \
4020 (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
4021
4022 /* Convert a bit1 to +1 or -1. */
4023 #define JACOBI_BIT1_TO_PN(result_bit1) \
4024 (1 - ((int) (result_bit1) & 2))
4025
4026 /* (2/b), with b unsigned and odd;
4027 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
4028 hence obtained from (b>>1)^b */
4029 #define JACOBI_TWO_U_BIT1(b) \
4030 ((int) (((b) >> 1) ^ (b)))
4031
4032 /* (2/b)^twos, with b unsigned and odd */
4033 #define JACOBI_TWOS_U_BIT1(twos, b) \
4034 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
4035
4036 /* (2/b)^twos, with b unsigned and odd */
4037 #define JACOBI_TWOS_U(twos, b) \
4038 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
4039
4040 /* (-1/b), with b odd (signed or unsigned);
4041 is (-1)^((b-1)/2) */
4042 #define JACOBI_N1B_BIT1(b) \
4043 ((int) (b))
4044
4045 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
4046 is (-1/b) if a<0, or +1 if a>=0 */
4047 #define JACOBI_ASGN_SU_BIT1(a, b) \
4048 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
4049
4050 /* (a/b) effect due to sign of b: signed/signed;
4051 is -1 if a and b both negative, +1 otherwise */
4052 #define JACOBI_BSGN_SS_BIT1(a, b) \
4053 ((((a)<0) & ((b)<0)) << 1)
4054
4055 /* (a/b) effect due to sign of b: signed/mpz;
4056 is -1 if a and b both negative, +1 otherwise */
4057 #define JACOBI_BSGN_SZ_BIT1(a, b) \
4058 JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
4059
4060 /* (a/b) effect due to sign of b: mpz/signed;
4061 is -1 if a and b both negative, +1 otherwise */
4062 #define JACOBI_BSGN_ZS_BIT1(a, b) \
4063 JACOBI_BSGN_SZ_BIT1 (b, a)
4064
4065 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
4066 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
4067 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
4068 because this is used in a couple of places with only bit 1 of a or b
4069 valid. */
4070 #define JACOBI_RECIP_UU_BIT1(a, b) \
4071 ((int) ((a) & (b)))
4072
4073 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
4074 decrementing b_size. b_low should be b_ptr[0] on entry, and will be
4075 updated for the new b_ptr. result_bit1 is updated according to the
4076 factors of 2 stripped, as per (a/2). */
4077 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
4078 do { \
4079 ASSERT ((b_size) >= 1); \
4080 ASSERT ((b_low) == (b_ptr)[0]); \
4081 \
4082 while (UNLIKELY ((b_low) == 0)) \
4083 { \
4084 (b_size)--; \
4085 ASSERT ((b_size) >= 1); \
4086 (b_ptr)++; \
4087 (b_low) = *(b_ptr); \
4088 \
4089 ASSERT (((a) & 1) != 0); \
4090 if ((GMP_NUMB_BITS % 2) == 1) \
4091 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
4092 } \
4093 } while (0)
4094
4095 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
4096 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and
4097 unsigned. modexact_1_odd effectively calculates -a mod b, and
4098 result_bit1 is adjusted for the factor of -1.
4099
4100 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
4101 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
4102 factor to introduce into result_bit1, so for that case use mpn_mod_1
4103 unconditionally.
4104
4105 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
4106 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result,
4107 or not skip a divide step, or something. */
4108
4109 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
4110 do { \
4111 mp_srcptr __a_ptr = (a_ptr); \
4112 mp_size_t __a_size = (a_size); \
4113 mp_limb_t __b = (b); \
4114 \
4115 ASSERT (__a_size >= 1); \
4116 ASSERT (__b & 1); \
4117 \
4118 if ((GMP_NUMB_BITS % 2) != 0 \
4119 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \
4120 { \
4121 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
4122 } \
4123 else \
4124 { \
4125 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
4126 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
4127 } \
4128 } while (0)
4129
4130 /* State for the Jacobi computation using Lehmer. */
4131 #define jacobi_table __gmp_jacobi_table
4132 __GMP_DECLSPEC extern const unsigned char jacobi_table[208];
4133
4134 /* Bit layout for the initial state. b must be odd.
4135
4136 3 2 1 0
4137 +--+--+--+--+
4138 |a1|a0|b1| s|
4139 +--+--+--+--+
4140
4141 */
4142 static inline unsigned
mpn_jacobi_init(unsigned a,unsigned b,unsigned s)4143 mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
4144 {
4145 ASSERT (b & 1);
4146 ASSERT (s <= 1);
4147 return ((a & 3) << 2) + (b & 2) + s;
4148 }
4149
4150 static inline int
mpn_jacobi_finish(unsigned bits)4151 mpn_jacobi_finish (unsigned bits)
4152 {
4153 /* (a, b) = (1,0) or (0,1) */
4154 ASSERT ( (bits & 14) == 0);
4155
4156 return 1-2*(bits & 1);
4157 }
4158
4159 static inline unsigned
mpn_jacobi_update(unsigned bits,unsigned denominator,unsigned q)4160 mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
4161 {
4162 /* FIXME: Could halve table size by not including the e bit in the
4163 * index, and instead xor when updating. Then the lookup would be
4164 * like
4165 *
4166 * bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
4167 */
4168
4169 ASSERT (bits < 26);
4170 ASSERT (denominator < 2);
4171 ASSERT (q < 4);
4172
4173 /* For almost all calls, denominator is constant and quite often q
4174 is constant too. So use addition rather than or, so the compiler
4175 can put the constant part can into the offset of an indexed
4176 addressing instruction.
4177
4178 With constant denominator, the below table lookup is compiled to
4179
4180 C Constant q = 1, constant denominator = 1
4181 movzbl table+5(%eax,8), %eax
4182
4183 or
4184
4185 C q in %edx, constant denominator = 1
4186 movzbl table+4(%edx,%eax,8), %eax
4187
4188 One could maintain the state preshifted 3 bits, to save a shift
4189 here, but at least on x86, that's no real saving.
4190 */
4191 return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];
4192 }
4193
4194 /* Matrix multiplication */
4195 #define mpn_matrix22_mul __MPN(matrix22_mul)
4196 __GMP_DECLSPEC void mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
4197 #define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
4198 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
4199
4200 #ifndef MATRIX22_STRASSEN_THRESHOLD
4201 #define MATRIX22_STRASSEN_THRESHOLD 30
4202 #endif
4203
4204 /* HGCD definitions */
4205
4206 /* Extract one numb, shifting count bits left
4207 ________ ________
4208 |___xh___||___xl___|
4209 |____r____|
4210 >count <
4211
4212 The count includes any nail bits, so it should work fine if count
4213 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
4214 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
4215
4216 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
4217 those calls where the count high bits of xh may be non-zero.
4218 */
4219
4220 #define MPN_EXTRACT_NUMB(count, xh, xl) \
4221 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \
4222 ((xl) >> (GMP_LIMB_BITS - (count))))
4223
4224
4225 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
4226 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
4227 than a, b. The determinant must always be one, so that M has an
4228 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
4229 bits. */
4230 struct hgcd_matrix1
4231 {
4232 mp_limb_t u[2][2];
4233 };
4234
4235 #define mpn_hgcd2 __MPN (hgcd2)
4236 __GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *);
4237
4238 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
4239 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
4240
4241 #define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)
4242 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
4243
4244 #define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)
4245 __GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);
4246
4247 struct hgcd_matrix
4248 {
4249 mp_size_t alloc; /* for sanity checking only */
4250 mp_size_t n;
4251 mp_ptr p[2][2];
4252 };
4253
4254 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
4255
4256 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
4257 __GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);
4258
4259 #define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)
4260 __GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);
4261
4262 #define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)
4263 __GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);
4264
4265 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
4266 __GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);
4267
4268 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
4269 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
4270
4271 #define mpn_hgcd_step __MPN(hgcd_step)
4272 __GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4273
4274 #define mpn_hgcd_reduce __MPN(hgcd_reduce)
4275 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
4276
4277 #define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)
4278 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
4279
4280 #define mpn_hgcd_itch __MPN (hgcd_itch)
4281 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t) ATTRIBUTE_CONST;
4282
4283 #define mpn_hgcd __MPN (hgcd)
4284 __GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4285
4286 #define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
4287 __GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t) ATTRIBUTE_CONST;
4288
4289 #define mpn_hgcd_appr __MPN (hgcd_appr)
4290 __GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4291
4292 #define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
4293 __GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);
4294
4295 typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
4296
4297 /* Needs storage for the quotient */
4298 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
4299
4300 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
4301 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);
4302
4303 struct gcdext_ctx
4304 {
4305 /* Result parameters. */
4306 mp_ptr gp;
4307 mp_size_t gn;
4308 mp_ptr up;
4309 mp_size_t *usize;
4310
4311 /* Cofactors updated in each step. */
4312 mp_size_t un;
4313 mp_ptr u0, u1, tp;
4314 };
4315
4316 #define mpn_gcdext_hook __MPN (gcdext_hook)
4317 gcd_subdiv_step_hook mpn_gcdext_hook;
4318
4319 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
4320
4321 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
4322 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
4323
4324 /* 4*(an + 1) + 4*(bn + 1) + an */
4325 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
4326
4327 #ifndef HGCD_THRESHOLD
4328 #define HGCD_THRESHOLD 400
4329 #endif
4330
4331 #ifndef HGCD_APPR_THRESHOLD
4332 #define HGCD_APPR_THRESHOLD 400
4333 #endif
4334
4335 #ifndef HGCD_REDUCE_THRESHOLD
4336 #define HGCD_REDUCE_THRESHOLD 1000
4337 #endif
4338
4339 #ifndef GCD_DC_THRESHOLD
4340 #define GCD_DC_THRESHOLD 1000
4341 #endif
4342
4343 #ifndef GCDEXT_DC_THRESHOLD
4344 #define GCDEXT_DC_THRESHOLD 600
4345 #endif
4346
4347 /* Definitions for mpn_set_str and mpn_get_str */
4348 struct powers
4349 {
4350 mp_ptr p; /* actual power value */
4351 mp_size_t n; /* # of limbs at p */
4352 mp_size_t shift; /* weight of lowest limb, in limb base B */
4353 size_t digits_in_base; /* number of corresponding digits */
4354 int base;
4355 };
4356 typedef struct powers powers_t;
4357 #define mpn_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) /* FIXME: This can perhaps be trimmed */
4358 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
4359 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
4360
4361 #define mpn_compute_powtab __MPN(compute_powtab)
4362 __GMP_DECLSPEC size_t mpn_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
4363 #define mpn_dc_set_str __MPN(dc_set_str)
4364 __GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);
4365 #define mpn_bc_set_str __MPN(bc_set_str)
4366 __GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
4367
4368
4369 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
4370 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb,
4371 hence giving back the user's size in bits rounded up. Notice that
4372 converting prec->bits->prec gives an unchanged value. */
4373 #define __GMPF_BITS_TO_PREC(n) \
4374 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
4375 #define __GMPF_PREC_TO_BITS(n) \
4376 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
4377
4378 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
4379
4380 /* Compute the number of base-b digits corresponding to nlimbs limbs, rounding
4381 down. */
4382 #define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b) \
4383 do { \
4384 mp_limb_t _ph, _dummy; \
4385 umul_ppmm (_ph, _dummy, \
4386 mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\
4387 res = _ph; \
4388 } while (0)
4389
4390 /* Compute the number of limbs corresponding to ndigits base-b digits, rounding
4391 up. */
4392 #define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b) \
4393 do { \
4394 mp_limb_t _ph, _dummy; \
4395 umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits)); \
4396 res = 8 * _ph / GMP_NUMB_BITS + 2; \
4397 } while (0)
4398
4399
4400 /* Set n to the number of significant digits an mpf of the given _mp_prec
4401 field, in the given base. This is a rounded up value, designed to ensure
4402 there's enough digits to reproduce all the guaranteed part of the value.
4403
4404 There are prec many limbs, but the high might be only "1" so forget it
4405 and just count prec-1 limbs into chars. +1 rounds that upwards, and a
4406 further +1 is because the limbs usually won't fall on digit boundaries.
4407
4408 FIXME: If base is a power of 2 and the bits per digit divides
4409 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for
4410 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
4411
4412 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
4413 do { \
4414 size_t rawn; \
4415 ASSERT (base >= 2 && base < numberof (mp_bases)); \
4416 DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base); \
4417 n = rawn + 2; \
4418 } while (0)
4419
4420
4421 /* Decimal point string, from the current C locale. Needs <langinfo.h> for
4422 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
4423 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
4424 their respective #if HAVE_FOO_H.
4425
4426 GLIBC recommends nl_langinfo because getting only one facet can be
4427 faster, apparently. */
4428
4429 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
4430 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
4431 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT))
4432 #endif
4433 /* RADIXCHAR is deprecated, still in unix98 or some such. */
4434 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
4435 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR))
4436 #endif
4437 /* localeconv is slower since it returns all locale stuff */
4438 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
4439 #define GMP_DECIMAL_POINT (localeconv()->decimal_point)
4440 #endif
4441 #if ! defined (GMP_DECIMAL_POINT)
4442 #define GMP_DECIMAL_POINT (".")
4443 #endif
4444
4445
4446 #define DOPRNT_CONV_FIXED 1
4447 #define DOPRNT_CONV_SCIENTIFIC 2
4448 #define DOPRNT_CONV_GENERAL 3
4449
4450 #define DOPRNT_JUSTIFY_NONE 0
4451 #define DOPRNT_JUSTIFY_LEFT 1
4452 #define DOPRNT_JUSTIFY_RIGHT 2
4453 #define DOPRNT_JUSTIFY_INTERNAL 3
4454
4455 #define DOPRNT_SHOWBASE_YES 1
4456 #define DOPRNT_SHOWBASE_NO 2
4457 #define DOPRNT_SHOWBASE_NONZERO 3
4458
4459 struct doprnt_params_t {
4460 int base; /* negative for upper case */
4461 int conv; /* choices above */
4462 const char *expfmt; /* exponent format */
4463 int exptimes4; /* exponent multiply by 4 */
4464 char fill; /* character */
4465 int justify; /* choices above */
4466 int prec; /* prec field, or -1 for all digits */
4467 int showbase; /* choices above */
4468 int showpoint; /* if radix point always shown */
4469 int showtrailing; /* if trailing zeros wanted */
4470 char sign; /* '+', ' ', or '\0' */
4471 int width; /* width field */
4472 };
4473
4474 #if _GMP_H_HAVE_VA_LIST
4475
4476 typedef int (*doprnt_format_t) (void *, const char *, va_list);
4477 typedef int (*doprnt_memory_t) (void *, const char *, size_t);
4478 typedef int (*doprnt_reps_t) (void *, int, int);
4479 typedef int (*doprnt_final_t) (void *);
4480
4481 struct doprnt_funs_t {
4482 doprnt_format_t format;
4483 doprnt_memory_t memory;
4484 doprnt_reps_t reps;
4485 doprnt_final_t final; /* NULL if not required */
4486 };
4487
4488 extern const struct doprnt_funs_t __gmp_fprintf_funs;
4489 extern const struct doprnt_funs_t __gmp_sprintf_funs;
4490 extern const struct doprnt_funs_t __gmp_snprintf_funs;
4491 extern const struct doprnt_funs_t __gmp_obstack_printf_funs;
4492 extern const struct doprnt_funs_t __gmp_ostream_funs;
4493
4494 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first
4495 "size" of these have been written. "alloc > size" is maintained, so
4496 there's room to store a '\0' at the end. "result" is where the
4497 application wants the final block pointer. */
4498 struct gmp_asprintf_t {
4499 char **result;
4500 char *buf;
4501 size_t size;
4502 size_t alloc;
4503 };
4504
4505 #define GMP_ASPRINTF_T_INIT(d, output) \
4506 do { \
4507 (d).result = (output); \
4508 (d).alloc = 256; \
4509 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
4510 (d).size = 0; \
4511 } while (0)
4512
4513 /* If a realloc is necessary, use twice the size actually required, so as to
4514 avoid repeated small reallocs. */
4515 #define GMP_ASPRINTF_T_NEED(d, n) \
4516 do { \
4517 size_t alloc, newsize, newalloc; \
4518 ASSERT ((d)->alloc >= (d)->size + 1); \
4519 \
4520 alloc = (d)->alloc; \
4521 newsize = (d)->size + (n); \
4522 if (alloc <= newsize) \
4523 { \
4524 newalloc = 2*newsize; \
4525 (d)->alloc = newalloc; \
4526 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
4527 alloc, newalloc, char); \
4528 } \
4529 } while (0)
4530
4531 __GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t);
4532 __GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int);
4533 __GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *);
4534
4535 /* buf is where to write the next output, and size is how much space is left
4536 there. If the application passed size==0 then that's what we'll have
4537 here, and nothing at all should be written. */
4538 struct gmp_snprintf_t {
4539 char *buf;
4540 size_t size;
4541 };
4542
4543 /* Add the bytes printed by the call to the total retval, or bail out on an
4544 error. */
4545 #define DOPRNT_ACCUMULATE(call) \
4546 do { \
4547 int __ret; \
4548 __ret = call; \
4549 if (__ret == -1) \
4550 goto error; \
4551 retval += __ret; \
4552 } while (0)
4553 #define DOPRNT_ACCUMULATE_FUN(fun, params) \
4554 do { \
4555 ASSERT ((fun) != NULL); \
4556 DOPRNT_ACCUMULATE ((*(fun)) params); \
4557 } while (0)
4558
4559 #define DOPRNT_FORMAT(fmt, ap) \
4560 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
4561 #define DOPRNT_MEMORY(ptr, len) \
4562 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
4563 #define DOPRNT_REPS(c, n) \
4564 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
4565
4566 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
4567
4568 #define DOPRNT_REPS_MAYBE(c, n) \
4569 do { \
4570 if ((n) != 0) \
4571 DOPRNT_REPS (c, n); \
4572 } while (0)
4573 #define DOPRNT_MEMORY_MAYBE(ptr, len) \
4574 do { \
4575 if ((len) != 0) \
4576 DOPRNT_MEMORY (ptr, len); \
4577 } while (0)
4578
4579 __GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list);
4580 __GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);
4581
4582 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
4583 __GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);
4584
4585 __GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list);
4586 #endif /* _GMP_H_HAVE_VA_LIST */
4587
4588
4589 typedef int (*gmp_doscan_scan_t) (void *, const char *, ...);
4590 typedef void *(*gmp_doscan_step_t) (void *, int);
4591 typedef int (*gmp_doscan_get_t) (void *);
4592 typedef int (*gmp_doscan_unget_t) (int, void *);
4593
4594 struct gmp_doscan_funs_t {
4595 gmp_doscan_scan_t scan;
4596 gmp_doscan_step_t step;
4597 gmp_doscan_get_t get;
4598 gmp_doscan_unget_t unget;
4599 };
4600 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs;
4601 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
4602
4603 #if _GMP_H_HAVE_VA_LIST
4604 __GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list);
4605 #endif
4606
4607
4608 /* For testing and debugging. */
4609 #define MPZ_CHECK_FORMAT(z) \
4610 do { \
4611 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
4612 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
4613 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
4614 } while (0)
4615
4616 #define MPQ_CHECK_FORMAT(q) \
4617 do { \
4618 MPZ_CHECK_FORMAT (mpq_numref (q)); \
4619 MPZ_CHECK_FORMAT (mpq_denref (q)); \
4620 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
4621 \
4622 if (SIZ(mpq_numref(q)) == 0) \
4623 { \
4624 /* should have zero as 0/1 */ \
4625 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
4626 && PTR(mpq_denref(q))[0] == 1); \
4627 } \
4628 else \
4629 { \
4630 /* should have no common factors */ \
4631 mpz_t g; \
4632 mpz_init (g); \
4633 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
4634 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
4635 mpz_clear (g); \
4636 } \
4637 } while (0)
4638
4639 #define MPF_CHECK_FORMAT(f) \
4640 do { \
4641 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
4642 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
4643 if (SIZ(f) == 0) \
4644 ASSERT_ALWAYS (EXP(f) == 0); \
4645 if (SIZ(f) != 0) \
4646 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
4647 } while (0)
4648
4649
4650 /* Enhancement: The "mod" and "gcd_1" functions below could have
4651 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
4652 function pointers, only actual functions. It probably doesn't make much
4653 difference to the gmp code, since hopefully we arrange calls so there's
4654 no great need for the compiler to move things around. */
4655
4656 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
4657 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
4658 in mpn/x86/x86-defs.m4 and mpn/x86_64/x86_64-defs.m4. Be sure to update
4659 those when changing here. */
4660 struct cpuvec_t {
4661 DECL_add_n ((*add_n));
4662 DECL_addlsh1_n ((*addlsh1_n));
4663 DECL_addlsh2_n ((*addlsh2_n));
4664 DECL_addmul_1 ((*addmul_1));
4665 DECL_addmul_2 ((*addmul_2));
4666 DECL_bdiv_dbm1c ((*bdiv_dbm1c));
4667 DECL_cnd_add_n ((*cnd_add_n));
4668 DECL_cnd_sub_n ((*cnd_sub_n));
4669 DECL_com ((*com));
4670 DECL_copyd ((*copyd));
4671 DECL_copyi ((*copyi));
4672 DECL_divexact_1 ((*divexact_1));
4673 DECL_divrem_1 ((*divrem_1));
4674 DECL_gcd_11 ((*gcd_11));
4675 DECL_lshift ((*lshift));
4676 DECL_lshiftc ((*lshiftc));
4677 DECL_mod_1 ((*mod_1));
4678 DECL_mod_1_1p ((*mod_1_1p));
4679 DECL_mod_1_1p_cps ((*mod_1_1p_cps));
4680 DECL_mod_1s_2p ((*mod_1s_2p));
4681 DECL_mod_1s_2p_cps ((*mod_1s_2p_cps));
4682 DECL_mod_1s_4p ((*mod_1s_4p));
4683 DECL_mod_1s_4p_cps ((*mod_1s_4p_cps));
4684 DECL_mod_34lsub1 ((*mod_34lsub1));
4685 DECL_modexact_1c_odd ((*modexact_1c_odd));
4686 DECL_mul_1 ((*mul_1));
4687 DECL_mul_basecase ((*mul_basecase));
4688 DECL_mullo_basecase ((*mullo_basecase));
4689 DECL_preinv_divrem_1 ((*preinv_divrem_1));
4690 DECL_preinv_mod_1 ((*preinv_mod_1));
4691 DECL_redc_1 ((*redc_1));
4692 DECL_redc_2 ((*redc_2));
4693 DECL_rshift ((*rshift));
4694 DECL_sqr_basecase ((*sqr_basecase));
4695 DECL_sub_n ((*sub_n));
4696 DECL_sublsh1_n ((*sublsh1_n));
4697 DECL_submul_1 ((*submul_1));
4698 mp_size_t mul_toom22_threshold;
4699 mp_size_t mul_toom33_threshold;
4700 mp_size_t sqr_toom2_threshold;
4701 mp_size_t sqr_toom3_threshold;
4702 mp_size_t bmod_1_to_mod_1_threshold;
4703 };
4704 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
4705 __GMP_DECLSPEC extern int __gmpn_cpuvec_initialized;
4706 #endif /* x86 fat binary */
4707
4708 __GMP_DECLSPEC void __gmpn_cpuvec_init (void);
4709
4710 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
4711 if that hasn't yet been done (to establish the right values). */
4712 #define CPUVEC_THRESHOLD(field) \
4713 ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
4714 __gmpn_cpuvec.field)
4715
4716
4717 #if HAVE_NATIVE_mpn_add_nc
4718 #define mpn_add_nc __MPN(add_nc)
4719 __GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
4720 #else
4721 static inline
4722 mp_limb_t
mpn_add_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t ci)4723 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4724 {
4725 mp_limb_t co;
4726 co = mpn_add_n (rp, up, vp, n);
4727 co += mpn_add_1 (rp, rp, n, ci);
4728 return co;
4729 }
4730 #endif
4731
4732 #if HAVE_NATIVE_mpn_sub_nc
4733 #define mpn_sub_nc __MPN(sub_nc)
4734 __GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
4735 #else
4736 static inline mp_limb_t
mpn_sub_nc(mp_ptr rp,mp_srcptr up,mp_srcptr vp,mp_size_t n,mp_limb_t ci)4737 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4738 {
4739 mp_limb_t co;
4740 co = mpn_sub_n (rp, up, vp, n);
4741 co += mpn_sub_1 (rp, rp, n, ci);
4742 return co;
4743 }
4744 #endif
4745
4746 #if TUNE_PROGRAM_BUILD
4747 /* Some extras wanted when recompiling some .c files for use by the tune
4748 program. Not part of a normal build.
4749
4750 It's necessary to keep these thresholds as #defines (just to an
4751 identically named variable), since various defaults are established based
4752 on #ifdef in the .c files. For some this is not so (the defaults are
4753 instead established above), but all are done this way for consistency. */
4754
4755 #undef MUL_TOOM22_THRESHOLD
4756 #define MUL_TOOM22_THRESHOLD mul_toom22_threshold
4757 extern mp_size_t mul_toom22_threshold;
4758
4759 #undef MUL_TOOM33_THRESHOLD
4760 #define MUL_TOOM33_THRESHOLD mul_toom33_threshold
4761 extern mp_size_t mul_toom33_threshold;
4762
4763 #undef MUL_TOOM44_THRESHOLD
4764 #define MUL_TOOM44_THRESHOLD mul_toom44_threshold
4765 extern mp_size_t mul_toom44_threshold;
4766
4767 #undef MUL_TOOM6H_THRESHOLD
4768 #define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold
4769 extern mp_size_t mul_toom6h_threshold;
4770
4771 #undef MUL_TOOM8H_THRESHOLD
4772 #define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold
4773 extern mp_size_t mul_toom8h_threshold;
4774
4775 #undef MUL_TOOM32_TO_TOOM43_THRESHOLD
4776 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
4777 extern mp_size_t mul_toom32_to_toom43_threshold;
4778
4779 #undef MUL_TOOM32_TO_TOOM53_THRESHOLD
4780 #define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold
4781 extern mp_size_t mul_toom32_to_toom53_threshold;
4782
4783 #undef MUL_TOOM42_TO_TOOM53_THRESHOLD
4784 #define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold
4785 extern mp_size_t mul_toom42_to_toom53_threshold;
4786
4787 #undef MUL_TOOM42_TO_TOOM63_THRESHOLD
4788 #define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
4789 extern mp_size_t mul_toom42_to_toom63_threshold;
4790
4791 #undef MUL_TOOM43_TO_TOOM54_THRESHOLD
4792 #define MUL_TOOM43_TO_TOOM54_THRESHOLD mul_toom43_to_toom54_threshold;
4793 extern mp_size_t mul_toom43_to_toom54_threshold;
4794
4795 #undef MUL_FFT_THRESHOLD
4796 #define MUL_FFT_THRESHOLD mul_fft_threshold
4797 extern mp_size_t mul_fft_threshold;
4798
4799 #undef MUL_FFT_MODF_THRESHOLD
4800 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold
4801 extern mp_size_t mul_fft_modf_threshold;
4802
4803 #undef MUL_FFT_TABLE
4804 #define MUL_FFT_TABLE { 0 }
4805
4806 #undef MUL_FFT_TABLE3
4807 #define MUL_FFT_TABLE3 { {0,0} }
4808
4809 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4810 remain as zero (always use it). */
4811 #if ! HAVE_NATIVE_mpn_sqr_basecase
4812 #undef SQR_BASECASE_THRESHOLD
4813 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold
4814 extern mp_size_t sqr_basecase_threshold;
4815 #endif
4816
4817 #if TUNE_PROGRAM_BUILD_SQR
4818 #undef SQR_TOOM2_THRESHOLD
4819 #define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC
4820 #else
4821 #undef SQR_TOOM2_THRESHOLD
4822 #define SQR_TOOM2_THRESHOLD sqr_toom2_threshold
4823 extern mp_size_t sqr_toom2_threshold;
4824 #endif
4825
4826 #undef SQR_TOOM3_THRESHOLD
4827 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold
4828 extern mp_size_t sqr_toom3_threshold;
4829
4830 #undef SQR_TOOM4_THRESHOLD
4831 #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold
4832 extern mp_size_t sqr_toom4_threshold;
4833
4834 #undef SQR_TOOM6_THRESHOLD
4835 #define SQR_TOOM6_THRESHOLD sqr_toom6_threshold
4836 extern mp_size_t sqr_toom6_threshold;
4837
4838 #undef SQR_TOOM8_THRESHOLD
4839 #define SQR_TOOM8_THRESHOLD sqr_toom8_threshold
4840 extern mp_size_t sqr_toom8_threshold;
4841
4842 #undef SQR_FFT_THRESHOLD
4843 #define SQR_FFT_THRESHOLD sqr_fft_threshold
4844 extern mp_size_t sqr_fft_threshold;
4845
4846 #undef SQR_FFT_MODF_THRESHOLD
4847 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold
4848 extern mp_size_t sqr_fft_modf_threshold;
4849
4850 #undef SQR_FFT_TABLE
4851 #define SQR_FFT_TABLE { 0 }
4852
4853 #undef SQR_FFT_TABLE3
4854 #define SQR_FFT_TABLE3 { {0,0} }
4855
4856 #undef MULLO_BASECASE_THRESHOLD
4857 #define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold
4858 extern mp_size_t mullo_basecase_threshold;
4859
4860 #undef MULLO_DC_THRESHOLD
4861 #define MULLO_DC_THRESHOLD mullo_dc_threshold
4862 extern mp_size_t mullo_dc_threshold;
4863
4864 #undef MULLO_MUL_N_THRESHOLD
4865 #define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold
4866 extern mp_size_t mullo_mul_n_threshold;
4867
4868 #undef SQRLO_BASECASE_THRESHOLD
4869 #define SQRLO_BASECASE_THRESHOLD sqrlo_basecase_threshold
4870 extern mp_size_t sqrlo_basecase_threshold;
4871
4872 #undef SQRLO_DC_THRESHOLD
4873 #define SQRLO_DC_THRESHOLD sqrlo_dc_threshold
4874 extern mp_size_t sqrlo_dc_threshold;
4875
4876 #undef SQRLO_SQR_THRESHOLD
4877 #define SQRLO_SQR_THRESHOLD sqrlo_sqr_threshold
4878 extern mp_size_t sqrlo_sqr_threshold;
4879
4880 #undef MULMID_TOOM42_THRESHOLD
4881 #define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold
4882 extern mp_size_t mulmid_toom42_threshold;
4883
4884 #undef DIV_QR_2_PI2_THRESHOLD
4885 #define DIV_QR_2_PI2_THRESHOLD div_qr_2_pi2_threshold
4886 extern mp_size_t div_qr_2_pi2_threshold;
4887
4888 #undef DC_DIV_QR_THRESHOLD
4889 #define DC_DIV_QR_THRESHOLD dc_div_qr_threshold
4890 extern mp_size_t dc_div_qr_threshold;
4891
4892 #undef DC_DIVAPPR_Q_THRESHOLD
4893 #define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold
4894 extern mp_size_t dc_divappr_q_threshold;
4895
4896 #undef DC_BDIV_Q_THRESHOLD
4897 #define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold
4898 extern mp_size_t dc_bdiv_q_threshold;
4899
4900 #undef DC_BDIV_QR_THRESHOLD
4901 #define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold
4902 extern mp_size_t dc_bdiv_qr_threshold;
4903
4904 #undef MU_DIV_QR_THRESHOLD
4905 #define MU_DIV_QR_THRESHOLD mu_div_qr_threshold
4906 extern mp_size_t mu_div_qr_threshold;
4907
4908 #undef MU_DIVAPPR_Q_THRESHOLD
4909 #define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold
4910 extern mp_size_t mu_divappr_q_threshold;
4911
4912 #undef MUPI_DIV_QR_THRESHOLD
4913 #define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold
4914 extern mp_size_t mupi_div_qr_threshold;
4915
4916 #undef MU_BDIV_QR_THRESHOLD
4917 #define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold
4918 extern mp_size_t mu_bdiv_qr_threshold;
4919
4920 #undef MU_BDIV_Q_THRESHOLD
4921 #define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold
4922 extern mp_size_t mu_bdiv_q_threshold;
4923
4924 #undef INV_MULMOD_BNM1_THRESHOLD
4925 #define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold
4926 extern mp_size_t inv_mulmod_bnm1_threshold;
4927
4928 #undef INV_NEWTON_THRESHOLD
4929 #define INV_NEWTON_THRESHOLD inv_newton_threshold
4930 extern mp_size_t inv_newton_threshold;
4931
4932 #undef INV_APPR_THRESHOLD
4933 #define INV_APPR_THRESHOLD inv_appr_threshold
4934 extern mp_size_t inv_appr_threshold;
4935
4936 #undef BINV_NEWTON_THRESHOLD
4937 #define BINV_NEWTON_THRESHOLD binv_newton_threshold
4938 extern mp_size_t binv_newton_threshold;
4939
4940 #undef REDC_1_TO_REDC_2_THRESHOLD
4941 #define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold
4942 extern mp_size_t redc_1_to_redc_2_threshold;
4943
4944 #undef REDC_2_TO_REDC_N_THRESHOLD
4945 #define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold
4946 extern mp_size_t redc_2_to_redc_n_threshold;
4947
4948 #undef REDC_1_TO_REDC_N_THRESHOLD
4949 #define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold
4950 extern mp_size_t redc_1_to_redc_n_threshold;
4951
4952 #undef MATRIX22_STRASSEN_THRESHOLD
4953 #define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold
4954 extern mp_size_t matrix22_strassen_threshold;
4955
4956 typedef int hgcd2_func_t (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,
4957 struct hgcd_matrix1 *);
4958 extern hgcd2_func_t *hgcd2_func;
4959
4960 #undef HGCD_THRESHOLD
4961 #define HGCD_THRESHOLD hgcd_threshold
4962 extern mp_size_t hgcd_threshold;
4963
4964 #undef HGCD_APPR_THRESHOLD
4965 #define HGCD_APPR_THRESHOLD hgcd_appr_threshold
4966 extern mp_size_t hgcd_appr_threshold;
4967
4968 #undef HGCD_REDUCE_THRESHOLD
4969 #define HGCD_REDUCE_THRESHOLD hgcd_reduce_threshold
4970 extern mp_size_t hgcd_reduce_threshold;
4971
4972 #undef GCD_DC_THRESHOLD
4973 #define GCD_DC_THRESHOLD gcd_dc_threshold
4974 extern mp_size_t gcd_dc_threshold;
4975
4976 #undef GCDEXT_DC_THRESHOLD
4977 #define GCDEXT_DC_THRESHOLD gcdext_dc_threshold
4978 extern mp_size_t gcdext_dc_threshold;
4979
4980 #undef DIV_QR_1N_PI1_METHOD
4981 #define DIV_QR_1N_PI1_METHOD div_qr_1n_pi1_method
4982 extern int div_qr_1n_pi1_method;
4983
4984 #undef DIV_QR_1_NORM_THRESHOLD
4985 #define DIV_QR_1_NORM_THRESHOLD div_qr_1_norm_threshold
4986 extern mp_size_t div_qr_1_norm_threshold;
4987
4988 #undef DIV_QR_1_UNNORM_THRESHOLD
4989 #define DIV_QR_1_UNNORM_THRESHOLD div_qr_1_unnorm_threshold
4990 extern mp_size_t div_qr_1_unnorm_threshold;
4991
4992 #undef DIVREM_1_NORM_THRESHOLD
4993 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold
4994 extern mp_size_t divrem_1_norm_threshold;
4995
4996 #undef DIVREM_1_UNNORM_THRESHOLD
4997 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold
4998 extern mp_size_t divrem_1_unnorm_threshold;
4999
5000 #undef MOD_1_NORM_THRESHOLD
5001 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold
5002 extern mp_size_t mod_1_norm_threshold;
5003
5004 #undef MOD_1_UNNORM_THRESHOLD
5005 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
5006 extern mp_size_t mod_1_unnorm_threshold;
5007
5008 #undef MOD_1_1P_METHOD
5009 #define MOD_1_1P_METHOD mod_1_1p_method
5010 extern int mod_1_1p_method;
5011
5012 #undef MOD_1N_TO_MOD_1_1_THRESHOLD
5013 #define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold
5014 extern mp_size_t mod_1n_to_mod_1_1_threshold;
5015
5016 #undef MOD_1U_TO_MOD_1_1_THRESHOLD
5017 #define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold
5018 extern mp_size_t mod_1u_to_mod_1_1_threshold;
5019
5020 #undef MOD_1_1_TO_MOD_1_2_THRESHOLD
5021 #define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold
5022 extern mp_size_t mod_1_1_to_mod_1_2_threshold;
5023
5024 #undef MOD_1_2_TO_MOD_1_4_THRESHOLD
5025 #define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold
5026 extern mp_size_t mod_1_2_to_mod_1_4_threshold;
5027
5028 #undef PREINV_MOD_1_TO_MOD_1_THRESHOLD
5029 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold
5030 extern mp_size_t preinv_mod_1_to_mod_1_threshold;
5031
5032 #if ! UDIV_PREINV_ALWAYS
5033 #undef DIVREM_2_THRESHOLD
5034 #define DIVREM_2_THRESHOLD divrem_2_threshold
5035 extern mp_size_t divrem_2_threshold;
5036 #endif
5037
5038 #undef MULMOD_BNM1_THRESHOLD
5039 #define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold
5040 extern mp_size_t mulmod_bnm1_threshold;
5041
5042 #undef SQRMOD_BNM1_THRESHOLD
5043 #define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold
5044 extern mp_size_t sqrmod_bnm1_threshold;
5045
5046 #undef GET_STR_DC_THRESHOLD
5047 #define GET_STR_DC_THRESHOLD get_str_dc_threshold
5048 extern mp_size_t get_str_dc_threshold;
5049
5050 #undef GET_STR_PRECOMPUTE_THRESHOLD
5051 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
5052 extern mp_size_t get_str_precompute_threshold;
5053
5054 #undef SET_STR_DC_THRESHOLD
5055 #define SET_STR_DC_THRESHOLD set_str_dc_threshold
5056 extern mp_size_t set_str_dc_threshold;
5057
5058 #undef SET_STR_PRECOMPUTE_THRESHOLD
5059 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
5060 extern mp_size_t set_str_precompute_threshold;
5061
5062 #undef FAC_ODD_THRESHOLD
5063 #define FAC_ODD_THRESHOLD fac_odd_threshold
5064 extern mp_size_t fac_odd_threshold;
5065
5066 #undef FAC_DSC_THRESHOLD
5067 #define FAC_DSC_THRESHOLD fac_dsc_threshold
5068 extern mp_size_t fac_dsc_threshold;
5069
5070 #undef FFT_TABLE_ATTRS
5071 #define FFT_TABLE_ATTRS
5072 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
5073 #define FFT_TABLE3_SIZE 2000 /* generous space for tuning */
5074 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
5075
5076 /* Sizes the tune program tests up to, used in a couple of recompilations. */
5077 #undef MUL_TOOM22_THRESHOLD_LIMIT
5078 #undef MUL_TOOM33_THRESHOLD_LIMIT
5079 #undef MULLO_BASECASE_THRESHOLD_LIMIT
5080 #undef SQRLO_BASECASE_THRESHOLD_LIMIT
5081 #undef SQRLO_DC_THRESHOLD_LIMIT
5082 #undef SQR_TOOM3_THRESHOLD_LIMIT
5083 #define SQR_TOOM2_MAX_GENERIC 200
5084 #define MUL_TOOM22_THRESHOLD_LIMIT 700
5085 #define MUL_TOOM33_THRESHOLD_LIMIT 700
5086 #define SQR_TOOM3_THRESHOLD_LIMIT 400
5087 #define MUL_TOOM44_THRESHOLD_LIMIT 1000
5088 #define SQR_TOOM4_THRESHOLD_LIMIT 1000
5089 #define MUL_TOOM6H_THRESHOLD_LIMIT 1100
5090 #define SQR_TOOM6_THRESHOLD_LIMIT 1100
5091 #define MUL_TOOM8H_THRESHOLD_LIMIT 1200
5092 #define SQR_TOOM8_THRESHOLD_LIMIT 1200
5093 #define MULLO_BASECASE_THRESHOLD_LIMIT 200
5094 #define SQRLO_BASECASE_THRESHOLD_LIMIT 200
5095 #define SQRLO_DC_THRESHOLD_LIMIT 400
5096 #define GET_STR_THRESHOLD_LIMIT 150
5097 #define FAC_DSC_THRESHOLD_LIMIT 2048
5098
5099 #endif /* TUNE_PROGRAM_BUILD */
5100
5101 #if defined (__cplusplus)
5102 }
5103 #endif
5104
5105 /* FIXME: Make these itch functions less conservative. Also consider making
5106 them dependent on just 'an', and compute the allocation directly from 'an'
5107 instead of via n. */
5108
5109 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
5110 k is ths smallest k such that
5111 ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
5112 which implies that
5113 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
5114 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
5115 */
5116 #define mpn_toom22_mul_itch(an, bn) \
5117 (2 * ((an) + GMP_NUMB_BITS))
5118 #define mpn_toom2_sqr_itch(an) \
5119 (2 * ((an) + GMP_NUMB_BITS))
5120
5121 /* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth.
5122 We use 3an + C, so that we can use a smaller constant.
5123 */
5124 #define mpn_toom33_mul_itch(an, bn) \
5125 (3 * (an) + GMP_NUMB_BITS)
5126 #define mpn_toom3_sqr_itch(an) \
5127 (3 * (an) + GMP_NUMB_BITS)
5128
5129 /* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth.
5130 We use 3an + C, so that we can use a smaller constant.
5131 */
5132 #define mpn_toom44_mul_itch(an, bn) \
5133 (3 * (an) + GMP_NUMB_BITS)
5134 #define mpn_toom4_sqr_itch(an) \
5135 (3 * (an) + GMP_NUMB_BITS)
5136
5137 #define mpn_toom6_sqr_itch(n) \
5138 (((n) - SQR_TOOM6_THRESHOLD)*2 + \
5139 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \
5140 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)))
5141
5142 #define MUL_TOOM6H_MIN \
5143 ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ? \
5144 MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD)
5145 #define mpn_toom6_mul_n_itch(n) \
5146 (((n) - MUL_TOOM6H_MIN)*2 + \
5147 MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6, \
5148 mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN)))
5149
5150 static inline mp_size_t
mpn_toom6h_mul_itch(mp_size_t an,mp_size_t bn)5151 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
5152 mp_size_t estimatedN;
5153 estimatedN = (an + bn) / (size_t) 10 + 1;
5154 return mpn_toom6_mul_n_itch (estimatedN * 6);
5155 }
5156
5157 #define mpn_toom8_sqr_itch(n) \
5158 ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \
5159 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
5160 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)))
5161
5162 #define MUL_TOOM8H_MIN \
5163 ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ? \
5164 MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN)
5165 #define mpn_toom8_mul_n_itch(n) \
5166 ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) + \
5167 MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6, \
5168 mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN)))
5169
5170 static inline mp_size_t
mpn_toom8h_mul_itch(mp_size_t an,mp_size_t bn)5171 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
5172 mp_size_t estimatedN;
5173 estimatedN = (an + bn) / (size_t) 14 + 1;
5174 return mpn_toom8_mul_n_itch (estimatedN * 8);
5175 }
5176
5177 static inline mp_size_t
mpn_toom32_mul_itch(mp_size_t an,mp_size_t bn)5178 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
5179 {
5180 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
5181 mp_size_t itch = 2 * n + 1;
5182
5183 return itch;
5184 }
5185
5186 static inline mp_size_t
mpn_toom42_mul_itch(mp_size_t an,mp_size_t bn)5187 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
5188 {
5189 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
5190 return 6 * n + 3;
5191 }
5192
5193 static inline mp_size_t
mpn_toom43_mul_itch(mp_size_t an,mp_size_t bn)5194 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
5195 {
5196 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
5197
5198 return 6*n + 4;
5199 }
5200
5201 static inline mp_size_t
mpn_toom52_mul_itch(mp_size_t an,mp_size_t bn)5202 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
5203 {
5204 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
5205 return 6*n + 4;
5206 }
5207
5208 static inline mp_size_t
mpn_toom53_mul_itch(mp_size_t an,mp_size_t bn)5209 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
5210 {
5211 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
5212 return 10 * n + 10;
5213 }
5214
5215 static inline mp_size_t
mpn_toom62_mul_itch(mp_size_t an,mp_size_t bn)5216 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
5217 {
5218 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
5219 return 10 * n + 10;
5220 }
5221
5222 static inline mp_size_t
mpn_toom63_mul_itch(mp_size_t an,mp_size_t bn)5223 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
5224 {
5225 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
5226 return 9 * n + 3;
5227 }
5228
5229 static inline mp_size_t
mpn_toom54_mul_itch(mp_size_t an,mp_size_t bn)5230 mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
5231 {
5232 mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
5233 return 9 * n + 3;
5234 }
5235
5236 /* let S(n) = space required for input size n,
5237 then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)). */
5238 #define mpn_toom42_mulmid_itch(n) \
5239 (3 * (n) + GMP_NUMB_BITS)
5240
5241 #if 0
5242 #define mpn_fft_mul mpn_mul_fft_full
5243 #else
5244 #define mpn_fft_mul mpn_nussbaumer_mul
5245 #endif
5246
5247 #ifdef __cplusplus
5248
5249 /* A little helper for a null-terminated __gmp_allocate_func string.
5250 The destructor ensures it's freed even if an exception is thrown.
5251 The len field is needed by the destructor, and can be used by anyone else
5252 to avoid a second strlen pass over the data.
5253
5254 Since our input is a C string, using strlen is correct. Perhaps it'd be
5255 more C++-ish style to use std::char_traits<char>::length, but char_traits
5256 isn't available in gcc 2.95.4. */
5257
5258 class gmp_allocated_string {
5259 public:
5260 char *str;
5261 size_t len;
gmp_allocated_string(char * arg)5262 gmp_allocated_string(char *arg)
5263 {
5264 str = arg;
5265 len = std::strlen (str);
5266 }
~gmp_allocated_string()5267 ~gmp_allocated_string()
5268 {
5269 (*__gmp_free_func) (str, len+1);
5270 }
5271 };
5272
5273 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
5274 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
5275 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
5276 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &);
5277 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *);
5278 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
5279
5280 #endif /* __cplusplus */
5281
5282 #endif /* __GMP_IMPL_H__ */
5283