1 /* Include file for internal GNU MP types and definitions. 2 3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO 4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. 5 6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 7 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of the GNU Lesser General Public License as published by 13 the Free Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 The GNU MP Library is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19 License for more details. 20 21 You should have received a copy of the GNU Lesser General Public License 22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 25 /* __GMP_DECLSPEC must be given on any global data that will be accessed 26 from outside libgmp, meaning from the test or development programs, or 27 from libgmpxx. Failing to do this will result in an incorrect address 28 being used for the accesses. On functions __GMP_DECLSPEC makes calls 29 from outside libgmp more efficient, but they'll still work fine without 30 it. */ 31 32 33 #ifndef __GMP_IMPL_H__ 34 #define __GMP_IMPL_H__ 35 36 #if defined _CRAY 37 #include <intrinsics.h> /* for _popcnt */ 38 #endif 39 40 /* limits.h is not used in general, since it's an ANSI-ism, and since on 41 solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong 42 values (the ABI=64 values). 43 44 On Cray vector systems, however, we need the system limits.h since sizes 45 of signed and unsigned types can differ there, depending on compiler 46 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For 47 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and 48 short can be 24, 32, 46 or 64 bits, and different for ushort. */ 49 50 #if defined _CRAY 51 #include <limits.h> 52 #endif 53 54 /* For fat.h and other fat binary stuff. 55 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions 56 declared this way are only used to set function pointers in __gmp_cpuvec, 57 they're not called directly. */ 58 #define DECL_add_n(name) \ 59 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)) 60 #define DECL_addmul_1(name) \ 61 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) 62 #define DECL_copyd(name) \ 63 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)) 64 #define DECL_copyi(name) \ 65 DECL_copyd (name) 66 #define DECL_divexact_1(name) \ 67 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) 68 #define DECL_divexact_by3c(name) \ 69 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) 70 #define DECL_divrem_1(name) \ 71 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)) 72 #define DECL_gcd_1(name) \ 73 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) 74 #define DECL_lshift(name) \ 75 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned)) 76 #define DECL_mod_1(name) \ 77 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) 78 #define DECL_mod_34lsub1(name) \ 79 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t)) 80 #define DECL_modexact_1c_odd(name) \ 81 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) 82 #define DECL_mul_1(name) \ 83 DECL_addmul_1 (name) 84 #define DECL_mul_basecase(name) \ 85 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) 86 #define DECL_preinv_divrem_1(name) \ 87 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)) 88 #define DECL_preinv_mod_1(name) \ 89 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) 90 #define DECL_rshift(name) \ 91 DECL_lshift (name) 92 #define DECL_sqr_basecase(name) \ 93 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)) 94 #define DECL_sub_n(name) \ 95 DECL_add_n (name) 96 #define DECL_submul_1(name) \ 97 DECL_addmul_1 (name) 98 99 #if ! __GMP_WITHIN_CONFIGURE 100 #include "config.h" 101 #include "gmp-mparam.h" 102 #include "fib_table.h" 103 #include "mp_bases.h" 104 #if WANT_FAT_BINARY 105 #include "fat.h" 106 #endif 107 #endif 108 109 #if HAVE_INTTYPES_H /* for uint_least32_t */ 110 # include <inttypes.h> 111 #else 112 # if HAVE_STDINT_H 113 # include <stdint.h> 114 # endif 115 #endif 116 117 #ifdef __cplusplus 118 #include <cstring> /* for strlen */ 119 #include <string> /* for std::string */ 120 #endif 121 122 123 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */ 124 #define WANT_TMP_DEBUG 0 125 #endif 126 127 /* The following tries to get a good version of alloca. The tests are 128 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions. 129 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will 130 be setup appropriately. 131 132 ifndef alloca - a cpp define might already exist. 133 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca. 134 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca. 135 136 GCC __builtin_alloca - preferred whenever available. 137 138 _AIX pragma - IBM compilers need a #pragma in "each module that needs to 139 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was 140 used in past versions of GMP, retained still in case it matters. 141 142 The autoconf manual says this pragma needs to be at the start of a C 143 file, apart from comments and preprocessor directives. Is that true? 144 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc 145 from gmp.h. 146 */ 147 148 #ifndef alloca 149 # ifdef __GNUC__ 150 # define alloca __builtin_alloca 151 # else 152 # ifdef __DECC 153 # define alloca(x) __ALLOCA(x) 154 # else 155 # ifdef _MSC_VER 156 # include <malloc.h> 157 # define alloca _alloca 158 # else 159 # if HAVE_ALLOCA_H 160 # include <alloca.h> 161 # else 162 # if defined (_AIX) || defined (_IBMR2) 163 #pragma alloca 164 # else 165 char *alloca (); 166 # endif 167 # endif 168 # endif 169 # endif 170 # endif 171 #endif 172 173 174 /* if not provided by gmp-mparam.h */ 175 #ifndef BYTES_PER_MP_LIMB 176 #define BYTES_PER_MP_LIMB SIZEOF_MP_LIMB_T 177 #endif 178 #define GMP_LIMB_BYTES BYTES_PER_MP_LIMB 179 #ifndef GMP_LIMB_BITS 180 #define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T) 181 #endif 182 183 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG) 184 185 186 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */ 187 #if HAVE_UINT_LEAST32_T 188 typedef uint_least32_t gmp_uint_least32_t; 189 #else 190 #if SIZEOF_UNSIGNED_SHORT >= 4 191 typedef unsigned short gmp_uint_least32_t; 192 #else 193 #if SIZEOF_UNSIGNED >= 4 194 typedef unsigned gmp_uint_least32_t; 195 #else 196 typedef unsigned long gmp_uint_least32_t; 197 #endif 198 #endif 199 #endif 200 201 202 /* gmp_intptr_t, for pointer to integer casts */ 203 #if HAVE_INTPTR_T 204 typedef intptr_t gmp_intptr_t; 205 #else /* fallback */ 206 typedef size_t gmp_intptr_t; 207 #endif 208 209 210 /* pre-inverse types for truncating division and modulo */ 211 typedef struct {mp_limb_t inv32;} gmp_pi1_t; 212 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t; 213 214 215 /* const and signed must match __gmp_const and __gmp_signed, so follow the 216 decision made for those in gmp.h. */ 217 #if ! __GMP_HAVE_CONST 218 #define const /* empty */ 219 #define signed /* empty */ 220 #endif 221 222 /* "const" basically means a function does nothing but examine its arguments 223 and give a return value, it doesn't read or write any memory (neither 224 global nor pointed to by arguments), and has no other side-effects. This 225 is more restrictive than "pure". See info node "(gcc)Function 226 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn 227 this off when trying to write timing loops. */ 228 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE) 229 #define ATTRIBUTE_CONST __attribute__ ((const)) 230 #else 231 #define ATTRIBUTE_CONST 232 #endif 233 234 #if HAVE_ATTRIBUTE_NORETURN 235 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn)) 236 #else 237 #define ATTRIBUTE_NORETURN 238 #endif 239 240 /* "malloc" means a function behaves like malloc in that the pointer it 241 returns doesn't alias anything. */ 242 #if HAVE_ATTRIBUTE_MALLOC 243 #define ATTRIBUTE_MALLOC __attribute__ ((malloc)) 244 #else 245 #define ATTRIBUTE_MALLOC 246 #endif 247 248 249 #if ! HAVE_STRCHR 250 #define strchr(s,c) index(s,c) 251 #endif 252 253 #if ! HAVE_MEMSET 254 #define memset(p, c, n) \ 255 do { \ 256 ASSERT ((n) >= 0); \ 257 char *__memset__p = (p); \ 258 int __i; \ 259 for (__i = 0; __i < (n); __i++) \ 260 __memset__p[__i] = (c); \ 261 } while (0) 262 #endif 263 264 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89 265 mode. Falling back to a memcpy will give maximum portability, since it 266 works no matter whether va_list is a pointer, struct or array. */ 267 #if ! defined (va_copy) && defined (__va_copy) 268 #define va_copy(dst,src) __va_copy(dst,src) 269 #endif 270 #if ! defined (va_copy) 271 #define va_copy(dst,src) \ 272 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0) 273 #endif 274 275 276 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions 277 (ie. ctlz, ctpop, cttz). */ 278 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \ 279 || HAVE_HOST_CPU_alphaev7 280 #define HAVE_HOST_CPU_alpha_CIX 1 281 #endif 282 283 284 #if defined (__cplusplus) 285 extern "C" { 286 #endif 287 288 289 /* Usage: TMP_DECL; 290 TMP_MARK; 291 ptr = TMP_ALLOC (bytes); 292 TMP_FREE; 293 294 Small allocations should use TMP_SALLOC, big allocations should use 295 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC. 296 297 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and 298 TMP_SFREE. 299 300 TMP_DECL just declares a variable, but might be empty and so must be last 301 in a list of variables. TMP_MARK must be done before any TMP_ALLOC. 302 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a 303 TMP_MARK was made, but then no TMP_ALLOCs. */ 304 305 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or 306 __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC 307 isn't used for "double"s, so that's not in the union. */ 308 union tmp_align_t { 309 mp_limb_t l; 310 char *p; 311 }; 312 #define __TMP_ALIGN sizeof (union tmp_align_t) 313 314 /* Return "a" rounded upwards to a multiple of "m", if it isn't already. 315 "a" must be an unsigned type. 316 This is designed for use with a compile-time constant "m". 317 The POW2 case is expected to be usual, and gcc 3.0 and up recognises 318 "(-(8*n))%8" or the like is always zero, which means the rounding up in 319 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */ 320 #define ROUND_UP_MULTIPLE(a,m) \ 321 (POW2_P(m) ? (a) + (-(a))%(m) \ 322 : (a)+(m)-1 - (((a)+(m)-1) % (m))) 323 324 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT) 325 struct tmp_reentrant_t { 326 struct tmp_reentrant_t *next; 327 size_t size; /* bytes, including header */ 328 }; 329 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC; 330 __GMP_DECLSPEC void __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *)); 331 #endif 332 333 #if WANT_TMP_ALLOCA 334 #define TMP_SDECL 335 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker 336 #define TMP_SMARK 337 #define TMP_MARK __tmp_marker = 0 338 #define TMP_SALLOC(n) alloca(n) 339 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n) 340 #define TMP_ALLOC(n) \ 341 (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n)) 342 #define TMP_SFREE 343 #define TMP_FREE \ 344 do { \ 345 if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \ 346 } while (0) 347 #endif 348 349 #if WANT_TMP_REENTRANT 350 #define TMP_SDECL TMP_DECL 351 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker 352 #define TMP_SMARK TMP_MARK 353 #define TMP_MARK __tmp_marker = 0 354 #define TMP_SALLOC(n) TMP_ALLOC(n) 355 #define TMP_BALLOC(n) TMP_ALLOC(n) 356 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n) 357 #define TMP_SFREE TMP_FREE 358 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker) 359 #endif 360 361 #if WANT_TMP_NOTREENTRANT 362 struct tmp_marker 363 { 364 struct tmp_stack *which_chunk; 365 void *alloc_point; 366 }; 367 __GMP_DECLSPEC void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC; 368 __GMP_DECLSPEC void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *)); 369 __GMP_DECLSPEC void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *)); 370 #define TMP_SDECL TMP_DECL 371 #define TMP_DECL struct tmp_marker __tmp_marker 372 #define TMP_SMARK TMP_MARK 373 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker) 374 #define TMP_SALLOC(n) TMP_ALLOC(n) 375 #define TMP_BALLOC(n) TMP_ALLOC(n) 376 #define TMP_ALLOC(n) \ 377 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN)) 378 #define TMP_SFREE TMP_FREE 379 #define TMP_FREE __gmp_tmp_free (&__tmp_marker) 380 #endif 381 382 #if WANT_TMP_DEBUG 383 /* See tal-debug.c for some comments. */ 384 struct tmp_debug_t { 385 struct tmp_debug_entry_t *list; 386 const char *file; 387 int line; 388 }; 389 struct tmp_debug_entry_t { 390 struct tmp_debug_entry_t *next; 391 char *block; 392 size_t size; 393 }; 394 __GMP_DECLSPEC void __gmp_tmp_debug_mark __GMP_PROTO ((const char *, int, struct tmp_debug_t **, 395 struct tmp_debug_t *, 396 const char *, const char *)); 397 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int, 398 struct tmp_debug_t **, const char *, 399 size_t)) ATTRIBUTE_MALLOC; 400 __GMP_DECLSPEC void __gmp_tmp_debug_free __GMP_PROTO ((const char *, int, int, 401 struct tmp_debug_t **, 402 const char *, const char *)); 403 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker") 404 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker") 405 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker") 406 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker") 407 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker") 408 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker") 409 /* The marker variable is designed to provoke an uninitialized variable 410 warning from the compiler if TMP_FREE is used without a TMP_MARK. 411 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick 412 these things up too. */ 413 #define TMP_DECL_NAME(marker, marker_name) \ 414 int marker; \ 415 int __tmp_marker_inscope; \ 416 const char *__tmp_marker_name = marker_name; \ 417 struct tmp_debug_t __tmp_marker_struct; \ 418 /* don't demand NULL, just cast a zero */ \ 419 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0 420 #define TMP_MARK_NAME(marker, marker_name) \ 421 do { \ 422 marker = 1; \ 423 __tmp_marker_inscope = 1; \ 424 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \ 425 &__tmp_marker, &__tmp_marker_struct, \ 426 __tmp_marker_name, marker_name); \ 427 } while (0) 428 #define TMP_SALLOC(n) TMP_ALLOC(n) 429 #define TMP_BALLOC(n) TMP_ALLOC(n) 430 #define TMP_ALLOC(size) \ 431 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \ 432 __tmp_marker_inscope, \ 433 &__tmp_marker, __tmp_marker_name, size) 434 #define TMP_FREE_NAME(marker, marker_name) \ 435 do { \ 436 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \ 437 marker, &__tmp_marker, \ 438 __tmp_marker_name, marker_name); \ 439 } while (0) 440 #endif /* WANT_TMP_DEBUG */ 441 442 443 /* Allocating various types. */ 444 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) 445 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type))) 446 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type))) 447 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) 448 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t) 449 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t) 450 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr) 451 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr) 452 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr) 453 454 /* It's more efficient to allocate one block than two. This is certainly 455 true of the malloc methods, but it can even be true of alloca if that 456 involves copying a chunk of stack (various RISCs), or a call to a stack 457 bounds check (mingw). In any case, when debugging keep separate blocks 458 so a redzoning malloc debugger can protect each individually. */ 459 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \ 460 do { \ 461 if (WANT_TMP_DEBUG) \ 462 { \ 463 (xp) = TMP_ALLOC_LIMBS (xsize); \ 464 (yp) = TMP_ALLOC_LIMBS (ysize); \ 465 } \ 466 else \ 467 { \ 468 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \ 469 (yp) = (xp) + (xsize); \ 470 } \ 471 } while (0) 472 473 474 /* From gmp.h, nicer names for internal use. */ 475 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str) 476 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size) 477 #define LIKELY(cond) __GMP_LIKELY(cond) 478 #define UNLIKELY(cond) __GMP_UNLIKELY(cond) 479 480 #define ABS(x) ((x) >= 0 ? (x) : -(x)) 481 #undef MIN 482 #define MIN(l,o) ((l) < (o) ? (l) : (o)) 483 #undef MAX 484 #define MAX(h,i) ((h) > (i) ? (h) : (i)) 485 #define numberof(x) (sizeof (x) / sizeof ((x)[0])) 486 487 /* Field access macros. */ 488 #define SIZ(x) ((x)->_mp_size) 489 #define ABSIZ(x) ABS (SIZ (x)) 490 #define PTR(x) ((x)->_mp_d) 491 #define LIMBS(x) ((x)->_mp_d) 492 #define EXP(x) ((x)->_mp_exp) 493 #define PREC(x) ((x)->_mp_prec) 494 #define ALLOC(x) ((x)->_mp_alloc) 495 496 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero 497 then that lowest one bit must have been the only bit set. n==0 will 498 return true though, so avoid that. */ 499 #define POW2_P(n) (((n) & ((n) - 1)) == 0) 500 501 502 /* The "short" defines are a bit different because shorts are promoted to 503 ints by ~ or >> etc. 504 505 #ifndef's are used since on some systems (HP?) header files other than 506 limits.h setup these defines. We could forcibly #undef in that case, but 507 there seems no need to worry about that. */ 508 509 #ifndef ULONG_MAX 510 #define ULONG_MAX __GMP_ULONG_MAX 511 #endif 512 #ifndef UINT_MAX 513 #define UINT_MAX __GMP_UINT_MAX 514 #endif 515 #ifndef USHRT_MAX 516 #define USHRT_MAX __GMP_USHRT_MAX 517 #endif 518 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0) 519 520 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be 521 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc 522 treats the plain decimal values in <limits.h> as signed. */ 523 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1)) 524 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1)) 525 #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))) 526 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1)) 527 528 #ifndef LONG_MIN 529 #define LONG_MIN ((long) ULONG_HIGHBIT) 530 #endif 531 #ifndef LONG_MAX 532 #define LONG_MAX (-(LONG_MIN+1)) 533 #endif 534 535 #ifndef INT_MIN 536 #define INT_MIN ((int) UINT_HIGHBIT) 537 #endif 538 #ifndef INT_MAX 539 #define INT_MAX (-(INT_MIN+1)) 540 #endif 541 542 #ifndef SHRT_MIN 543 #define SHRT_MIN ((short) USHRT_HIGHBIT) 544 #endif 545 #ifndef SHRT_MAX 546 #define SHRT_MAX ((short) (-(SHRT_MIN+1))) 547 #endif 548 549 #if __GMP_MP_SIZE_T_INT 550 #define MP_SIZE_T_MAX INT_MAX 551 #define MP_SIZE_T_MIN INT_MIN 552 #else 553 #define MP_SIZE_T_MAX LONG_MAX 554 #define MP_SIZE_T_MIN LONG_MIN 555 #endif 556 557 /* mp_exp_t is the same as mp_size_t */ 558 #define MP_EXP_T_MAX MP_SIZE_T_MAX 559 #define MP_EXP_T_MIN MP_SIZE_T_MIN 560 561 #define LONG_HIGHBIT LONG_MIN 562 #define INT_HIGHBIT INT_MIN 563 #define SHRT_HIGHBIT SHRT_MIN 564 565 566 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1)) 567 568 #if GMP_NAIL_BITS == 0 569 #define GMP_NAIL_LOWBIT CNST_LIMB(0) 570 #else 571 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS) 572 #endif 573 574 #if GMP_NAIL_BITS != 0 575 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using 576 code that has not yet been qualified. */ 577 578 #undef DC_DIV_QR_THRESHOLD 579 #define DC_DIV_QR_THRESHOLD 50 580 581 #undef DIVREM_1_NORM_THRESHOLD 582 #undef DIVREM_1_UNNORM_THRESHOLD 583 #undef MOD_1_NORM_THRESHOLD 584 #undef MOD_1_UNNORM_THRESHOLD 585 #undef USE_PREINV_DIVREM_1 586 #undef DIVREM_2_THRESHOLD 587 #undef DIVEXACT_1_THRESHOLD 588 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 589 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 590 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 591 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 592 #define USE_PREINV_DIVREM_1 0 /* no preinv */ 593 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 594 595 /* mpn/generic/mul_fft.c is not nails-capable. */ 596 #undef MUL_FFT_THRESHOLD 597 #undef SQR_FFT_THRESHOLD 598 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX 599 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX 600 #endif 601 602 /* Swap macros. */ 603 604 #define MP_LIMB_T_SWAP(x, y) \ 605 do { \ 606 mp_limb_t __mp_limb_t_swap__tmp = (x); \ 607 (x) = (y); \ 608 (y) = __mp_limb_t_swap__tmp; \ 609 } while (0) 610 #define MP_SIZE_T_SWAP(x, y) \ 611 do { \ 612 mp_size_t __mp_size_t_swap__tmp = (x); \ 613 (x) = (y); \ 614 (y) = __mp_size_t_swap__tmp; \ 615 } while (0) 616 617 #define MP_PTR_SWAP(x, y) \ 618 do { \ 619 mp_ptr __mp_ptr_swap__tmp = (x); \ 620 (x) = (y); \ 621 (y) = __mp_ptr_swap__tmp; \ 622 } while (0) 623 #define MP_SRCPTR_SWAP(x, y) \ 624 do { \ 625 mp_srcptr __mp_srcptr_swap__tmp = (x); \ 626 (x) = (y); \ 627 (y) = __mp_srcptr_swap__tmp; \ 628 } while (0) 629 630 #define MPN_PTR_SWAP(xp,xs, yp,ys) \ 631 do { \ 632 MP_PTR_SWAP (xp, yp); \ 633 MP_SIZE_T_SWAP (xs, ys); \ 634 } while(0) 635 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ 636 do { \ 637 MP_SRCPTR_SWAP (xp, yp); \ 638 MP_SIZE_T_SWAP (xs, ys); \ 639 } while(0) 640 641 #define MPZ_PTR_SWAP(x, y) \ 642 do { \ 643 mpz_ptr __mpz_ptr_swap__tmp = (x); \ 644 (x) = (y); \ 645 (y) = __mpz_ptr_swap__tmp; \ 646 } while (0) 647 #define MPZ_SRCPTR_SWAP(x, y) \ 648 do { \ 649 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ 650 (x) = (y); \ 651 (y) = __mpz_srcptr_swap__tmp; \ 652 } while (0) 653 654 655 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))", 656 but current gcc (3.0) doesn't seem to support that. */ 657 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t)); 658 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t)); 659 __GMP_DECLSPEC extern void (*__gmp_free_func) __GMP_PROTO ((void *, size_t)); 660 661 __GMP_DECLSPEC void *__gmp_default_allocate __GMP_PROTO ((size_t)); 662 __GMP_DECLSPEC void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t)); 663 __GMP_DECLSPEC void __gmp_default_free __GMP_PROTO ((void *, size_t)); 664 665 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \ 666 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type))) 667 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t) 668 669 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \ 670 ((type *) (*__gmp_reallocate_func) \ 671 (p, (old_size) * sizeof (type), (new_size) * sizeof (type))) 672 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \ 673 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t) 674 675 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type)) 676 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t) 677 678 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \ 679 do { \ 680 if ((oldsize) != (newsize)) \ 681 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \ 682 } while (0) 683 684 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \ 685 do { \ 686 if ((oldsize) != (newsize)) \ 687 (ptr) = (type *) (*__gmp_reallocate_func) \ 688 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \ 689 } while (0) 690 691 692 /* Dummy for non-gcc, code involving it will go dead. */ 693 #if ! defined (__GNUC__) || __GNUC__ < 2 694 #define __builtin_constant_p(x) 0 695 #endif 696 697 698 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the 699 stack usage is compatible. __attribute__ ((regparm (N))) helps by 700 putting leading parameters in registers, avoiding extra stack. 701 702 regparm cannot be used with calls going through the PLT, because the 703 binding code there may clobber the registers (%eax, %edx, %ecx) used for 704 the regparm parameters. Calls to local (ie. static) functions could 705 still use this, if we cared to differentiate locals and globals. 706 707 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with 708 -p or -pg profiling, since that version of gcc doesn't realize the 709 .mcount calls will clobber the parameter registers. Other systems are 710 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't 711 bother to try to detect this. regparm is only an optimization so we just 712 disable it when profiling (profiling being a slowdown anyway). */ 713 714 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \ 715 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF 716 #define USE_LEADING_REGPARM 1 717 #else 718 #define USE_LEADING_REGPARM 0 719 #endif 720 721 /* Macros for altering parameter order according to regparm usage. */ 722 #if USE_LEADING_REGPARM 723 #define REGPARM_2_1(a,b,x) x,a,b 724 #define REGPARM_3_1(a,b,c,x) x,a,b,c 725 #define REGPARM_ATTR(n) __attribute__ ((regparm (n))) 726 #else 727 #define REGPARM_2_1(a,b,x) a,b,x 728 #define REGPARM_3_1(a,b,c,x) a,b,c,x 729 #define REGPARM_ATTR(n) 730 #endif 731 732 733 /* ASM_L gives a local label for a gcc asm block, for use when temporary 734 local labels like "1:" might not be available, which is the case for 735 instance on the x86s (the SCO assembler doesn't support them). 736 737 The label generated is made unique by including "%=" which is a unique 738 number for each insn. This ensures the same name can be used in multiple 739 asm blocks, perhaps via a macro. Since jumps between asm blocks are not 740 allowed there's no need for a label to be usable outside a single 741 block. */ 742 743 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name 744 745 746 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 747 #if 0 748 /* FIXME: Check that these actually improve things. 749 FIXME: Need a cld after each std. 750 FIXME: Can't have inputs in clobbered registers, must describe them as 751 dummy outputs, and add volatile. */ 752 #define MPN_COPY_INCR(DST, SRC, N) \ 753 __asm__ ("cld\n\trep\n\tmovsl" : : \ 754 "D" (DST), "S" (SRC), "c" (N) : \ 755 "cx", "di", "si", "memory") 756 #define MPN_COPY_DECR(DST, SRC, N) \ 757 __asm__ ("std\n\trep\n\tmovsl" : : \ 758 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \ 759 "cx", "di", "si", "memory") 760 #endif 761 #endif 762 763 764 __GMP_DECLSPEC void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1); 765 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub)) 766 767 #define mpz_n_pow_ui __gmpz_n_pow_ui 768 __GMP_DECLSPEC void mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long)); 769 770 771 #define mpn_addmul_1c __MPN(addmul_1c) 772 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); 773 774 #define mpn_addmul_2 __MPN(addmul_2) 775 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 776 777 #define mpn_addmul_3 __MPN(addmul_3) 778 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 779 780 #define mpn_addmul_4 __MPN(addmul_4) 781 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 782 783 #define mpn_addmul_5 __MPN(addmul_5) 784 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 785 786 #define mpn_addmul_6 __MPN(addmul_6) 787 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 788 789 #define mpn_addmul_7 __MPN(addmul_7) 790 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 791 792 #define mpn_addmul_8 __MPN(addmul_8) 793 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 794 795 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and 796 returns the carry out (0, 1 or 2). */ 797 #define mpn_addlsh1_n __MPN(addlsh1_n) 798 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 799 800 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and 801 returns the carry out (0, ..., 4). */ 802 #define mpn_addlsh2_n __MPN(addlsh2_n) 803 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 804 805 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and 806 returns the carry out (0, ..., 2^k). */ 807 #define mpn_addlsh_n __MPN(addlsh_n) 808 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int)); 809 810 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and 811 returns the borrow out (0, 1 or 2). */ 812 #define mpn_sublsh1_n __MPN(sublsh1_n) 813 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 814 815 /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and 816 returns the carry out (-1, 0, 1). */ 817 #define mpn_rsblsh1_n __MPN(rsblsh1_n) 818 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 819 820 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and 821 returns the borrow out (FIXME 0, 1, 2 or 3). */ 822 #define mpn_sublsh2_n __MPN(sublsh2_n) 823 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 824 825 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and 826 returns the carry out (-1, ..., 3). */ 827 #define mpn_rsblsh2_n __MPN(rsblsh2_n) 828 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 829 830 /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and 831 returns the carry out (-1, 0, ..., 2^k-1). */ 832 #define mpn_rsblsh_n __MPN(rsblsh_n) 833 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int)); 834 835 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1, 836 and returns the bit rshifted out (0 or 1). */ 837 #define mpn_rsh1add_n __MPN(rsh1add_n) 838 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 839 #define mpn_rsh1add_nc __MPN(rsh1add_nc) 840 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); 841 842 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1, 843 and returns the bit rshifted out (0 or 1). If there's a borrow from the 844 subtract, it's stored as a 1 in the high bit of c[n-1], like a twos 845 complement negative. */ 846 #define mpn_rsh1sub_n __MPN(rsh1sub_n) 847 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 848 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc) 849 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); 850 851 #define mpn_lshiftc __MPN(lshiftc) 852 __GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int)); 853 854 #define mpn_add_n_sub_n __MPN(add_n_sub_n) 855 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 856 857 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc) 858 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); 859 860 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0) 861 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); 862 863 #define mpn_divrem_1c __MPN(divrem_1c) 864 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); 865 866 #define mpn_dump __MPN(dump) 867 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t)); 868 869 #define mpn_fib2_ui __MPN(fib2_ui) 870 __GMP_DECLSPEC mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long)); 871 872 /* Remap names of internal mpn functions. */ 873 #define __clz_tab __MPN(clz_tab) 874 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv) 875 876 #define mpn_jacobi_base __MPN(jacobi_base) 877 __GMP_DECLSPEC int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST; 878 879 #define mpn_mod_1c __MPN(mod_1c) 880 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE; 881 882 #define mpn_mul_1c __MPN(mul_1c) 883 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); 884 885 #define mpn_mul_2 __MPN(mul_2) 886 __GMP_DECLSPEC mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 887 888 #define mpn_mul_3 __MPN(mul_3) 889 __GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 890 891 #define mpn_mul_4 __MPN(mul_4) 892 __GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 893 894 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */ 895 #define mpn_mul_basecase __MPN(mul_basecase) 896 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); 897 #endif 898 899 #define mpn_mullo_n __MPN(mullo_n) 900 __GMP_DECLSPEC void mpn_mullo_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 901 902 #define mpn_mullo_basecase __MPN(mullo_basecase) 903 __GMP_DECLSPEC void mpn_mullo_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); 904 905 #define mpn_sqr __MPN(sqr) 906 __GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); 907 908 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */ 909 #define mpn_sqr_basecase __MPN(sqr_basecase) 910 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); 911 #endif 912 913 #define mpn_submul_1c __MPN(submul_1c) 914 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); 915 916 #define mpn_redc_1 __MPN(redc_1) 917 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); 918 919 #define mpn_redc_2 __MPN(redc_2) 920 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 921 #define mpn_redc_n __MPN(redc_n) 922 __GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)); 923 924 925 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps) 926 __GMP_DECLSPEC void mpn_mod_1_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t)); 927 #define mpn_mod_1_1p __MPN(mod_1_1p) 928 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4])) __GMP_ATTRIBUTE_PURE; 929 930 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps) 931 __GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t)); 932 #define mpn_mod_1s_2p __MPN(mod_1s_2p) 933 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5])) __GMP_ATTRIBUTE_PURE; 934 935 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps) 936 __GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t)); 937 #define mpn_mod_1s_3p __MPN(mod_1s_3p) 938 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6])) __GMP_ATTRIBUTE_PURE; 939 940 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps) 941 __GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t)); 942 #define mpn_mod_1s_4p __MPN(mod_1s_4p) 943 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7])) __GMP_ATTRIBUTE_PURE; 944 945 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1) 946 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); 947 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1) 948 __GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 949 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size) 950 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST; 951 static inline mp_size_t 952 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) { 953 mp_size_t n, itch; 954 n = rn >> 1; 955 itch = rn + 4 + 956 (an > n ? (bn > n ? rn : n) : 0); 957 return itch; 958 } 959 960 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1) 961 __GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 962 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size) 963 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST; 964 static inline mp_size_t 965 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) { 966 mp_size_t n, itch; 967 n = rn >> 1; 968 itch = rn + 3 + 969 (an > n ? an : 0); 970 return itch; 971 } 972 973 typedef __gmp_randstate_struct *gmp_randstate_ptr; 974 typedef const __gmp_randstate_struct *gmp_randstate_srcptr; 975 976 /* Pseudo-random number generator function pointers structure. */ 977 typedef struct { 978 void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr)); 979 void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int)); 980 void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t)); 981 void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr)); 982 } gmp_randfnptr_t; 983 984 /* Macro to obtain a void pointer to the function pointers structure. */ 985 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc) 986 987 /* Macro to obtain a pointer to the generator's state. 988 When used as a lvalue the rvalue needs to be cast to mp_ptr. */ 989 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d) 990 991 /* Write a given number of random bits to rp. */ 992 #define _gmp_rand(rp, state, bits) \ 993 do { \ 994 gmp_randstate_ptr __rstate = (state); \ 995 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \ 996 (__rstate, rp, bits); \ 997 } while (0) 998 999 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t)); 1000 1001 1002 /* __gmp_rands is the global state for the old-style random functions, and 1003 is also used in the test programs (hence the __GMP_DECLSPEC). 1004 1005 There's no seeding here, so mpz_random etc will generate the same 1006 sequence every time. This is not unlike the C library random functions 1007 if you don't seed them, so perhaps it's acceptable. Digging up a seed 1008 from /dev/random or the like would work on many systems, but might 1009 encourage a false confidence, since it'd be pretty much impossible to do 1010 something that would work reliably everywhere. In any case the new style 1011 functions are recommended to applications which care about randomness, so 1012 the old functions aren't too important. */ 1013 1014 __GMP_DECLSPEC extern char __gmp_rands_initialized; 1015 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands; 1016 1017 #define RANDS \ 1018 ((__gmp_rands_initialized ? 0 \ 1019 : (__gmp_rands_initialized = 1, \ 1020 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \ 1021 __gmp_rands) 1022 1023 /* this is used by the test programs, to free memory */ 1024 #define RANDS_CLEAR() \ 1025 do { \ 1026 if (__gmp_rands_initialized) \ 1027 { \ 1028 __gmp_rands_initialized = 0; \ 1029 gmp_randclear (__gmp_rands); \ 1030 } \ 1031 } while (0) 1032 1033 1034 /* For a threshold between algorithms A and B, size>=thresh is where B 1035 should be used. Special value MP_SIZE_T_MAX means only ever use A, or 1036 value 0 means only ever use B. The tests for these special values will 1037 be compile-time constants, so the compiler should be able to eliminate 1038 the code for the unwanted algorithm. */ 1039 1040 #define ABOVE_THRESHOLD(size,thresh) \ 1041 ((thresh) == 0 \ 1042 || ((thresh) != MP_SIZE_T_MAX \ 1043 && (size) >= (thresh))) 1044 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh)) 1045 1046 #define MPN_TOOM22_MUL_MINSIZE 4 1047 #define MPN_TOOM2_SQR_MINSIZE 4 1048 1049 #define MPN_TOOM33_MUL_MINSIZE 17 1050 #define MPN_TOOM3_SQR_MINSIZE 17 1051 1052 #define MPN_TOOM44_MUL_MINSIZE 30 1053 #define MPN_TOOM4_SQR_MINSIZE 30 1054 1055 #define MPN_TOOM6H_MUL_MINSIZE 46 1056 #define MPN_TOOM6_SQR_MINSIZE 46 1057 1058 #define MPN_TOOM8H_MUL_MINSIZE 86 1059 #define MPN_TOOM8_SQR_MINSIZE 86 1060 1061 #define MPN_TOOM32_MUL_MINSIZE 10 1062 #define MPN_TOOM42_MUL_MINSIZE 10 1063 #define MPN_TOOM43_MUL_MINSIZE 49 /* ??? */ 1064 #define MPN_TOOM53_MUL_MINSIZE 49 /* ??? */ 1065 #define MPN_TOOM63_MUL_MINSIZE 49 1066 1067 #define mpn_sqr_diagonal __MPN(sqr_diagonal) 1068 __GMP_DECLSPEC void mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); 1069 1070 #define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts) 1071 __GMP_DECLSPEC void mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t)); 1072 1073 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2}; 1074 #define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts) 1075 __GMP_DECLSPEC void mpn_toom_interpolate_6pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t)); 1076 1077 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 }; 1078 #define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts) 1079 __GMP_DECLSPEC void mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr)); 1080 1081 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts) 1082 __GMP_DECLSPEC void mpn_toom_interpolate_8pts __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr)); 1083 1084 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts) 1085 __GMP_DECLSPEC void mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr)); 1086 1087 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts) 1088 __GMP_DECLSPEC void mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr)); 1089 1090 #define mpn_toom_couple_handling __MPN(toom_couple_handling) 1091 __GMP_DECLSPEC void mpn_toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int)); 1092 1093 #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1) 1094 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr)); 1095 1096 #define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2) 1097 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr)); 1098 1099 #define mpn_toom_eval_pm1 __MPN(toom_eval_pm1) 1100 __GMP_DECLSPEC int mpn_toom_eval_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr)); 1101 1102 #define mpn_toom_eval_pm2 __MPN(toom_eval_pm2) 1103 __GMP_DECLSPEC int mpn_toom_eval_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr)); 1104 1105 #define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp) 1106 __GMP_DECLSPEC int mpn_toom_eval_pm2exp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr)); 1107 1108 #define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp) 1109 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr)); 1110 1111 #define mpn_toom22_mul __MPN(toom22_mul) 1112 __GMP_DECLSPEC void mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1113 1114 #define mpn_toom32_mul __MPN(toom32_mul) 1115 __GMP_DECLSPEC void mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1116 1117 #define mpn_toom42_mul __MPN(toom42_mul) 1118 __GMP_DECLSPEC void mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1119 1120 #define mpn_toom52_mul __MPN(toom52_mul) 1121 __GMP_DECLSPEC void mpn_toom52_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1122 1123 #define mpn_toom62_mul __MPN(toom62_mul) 1124 __GMP_DECLSPEC void mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1125 1126 #define mpn_toom2_sqr __MPN(toom2_sqr) 1127 __GMP_DECLSPEC void mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1128 1129 #define mpn_toom33_mul __MPN(toom33_mul) 1130 __GMP_DECLSPEC void mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1131 1132 #define mpn_toom43_mul __MPN(toom43_mul) 1133 __GMP_DECLSPEC void mpn_toom43_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1134 1135 #define mpn_toom53_mul __MPN(toom53_mul) 1136 __GMP_DECLSPEC void mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1137 1138 #define mpn_toom63_mul __MPN(toom63_mul) 1139 __GMP_DECLSPEC void mpn_toom63_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1140 1141 #define mpn_toom3_sqr __MPN(toom3_sqr) 1142 __GMP_DECLSPEC void mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1143 1144 #define mpn_toom44_mul __MPN(toom44_mul) 1145 __GMP_DECLSPEC void mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1146 1147 #define mpn_toom4_sqr __MPN(toom4_sqr) 1148 __GMP_DECLSPEC void mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1149 1150 #define mpn_toom6h_mul __MPN(toom6h_mul) 1151 __GMP_DECLSPEC void mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1152 1153 #define mpn_toom6_sqr __MPN(toom6_sqr) 1154 __GMP_DECLSPEC void mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1155 1156 #define mpn_toom8h_mul __MPN(toom8h_mul) 1157 __GMP_DECLSPEC void mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1158 1159 #define mpn_toom8_sqr __MPN(toom8_sqr) 1160 __GMP_DECLSPEC void mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1161 1162 #define mpn_fft_best_k __MPN(fft_best_k) 1163 __GMP_DECLSPEC int mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST; 1164 1165 #define mpn_mul_fft __MPN(mul_fft) 1166 __GMP_DECLSPEC mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int)); 1167 1168 #define mpn_mul_fft_full __MPN(mul_fft_full) 1169 __GMP_DECLSPEC void mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); 1170 1171 #define mpn_nussbaumer_mul __MPN(nussbaumer_mul) 1172 __GMP_DECLSPEC void mpn_nussbaumer_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); 1173 1174 #define mpn_fft_next_size __MPN(fft_next_size) 1175 __GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST; 1176 1177 #define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr) 1178 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); 1179 1180 #define mpn_sbpi1_div_q __MPN(sbpi1_div_q) 1181 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); 1182 1183 #define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q) 1184 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); 1185 1186 #define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr) 1187 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *)); 1188 #define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n) 1189 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr)); 1190 1191 #define mpn_dcpi1_div_q __MPN(dcpi1_div_q) 1192 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *)); 1193 1194 #define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q) 1195 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *)); 1196 #define mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n) 1197 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr)); 1198 1199 #define mpn_mu_div_qr __MPN(mu_div_qr) 1200 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1201 #define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch) 1202 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int)); 1203 #define mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in) 1204 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int)); 1205 1206 #define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr) 1207 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1208 #define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch) 1209 __GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t)); 1210 1211 #define mpn_mu_divappr_q __MPN(mu_divappr_q) 1212 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1213 #define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch) 1214 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int)); 1215 #define mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in) 1216 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int)); 1217 1218 #define mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q) 1219 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1220 1221 #define mpn_mu_div_q __MPN(mu_div_q) 1222 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1223 #define mpn_mu_div_q_itch __MPN(mu_div_q_itch) 1224 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int)); 1225 1226 #define mpn_div_q __MPN(div_q) 1227 __GMP_DECLSPEC void mpn_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1228 1229 #define mpn_invert __MPN(invert) 1230 __GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1231 #define mpn_invert_itch(n) mpn_invertappr_itch(n) 1232 1233 #define mpn_ni_invertappr __MPN(ni_invertappr) 1234 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1235 #define mpn_invertappr __MPN(invertappr) 1236 __GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1237 #define mpn_invertappr_itch(n) (3 * (n) + 2) 1238 1239 #define mpn_binvert __MPN(binvert) 1240 __GMP_DECLSPEC void mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); 1241 #define mpn_binvert_itch __MPN(binvert_itch) 1242 __GMP_DECLSPEC mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t)); 1243 1244 #define mpn_bdiv_q_1 __MPN(bdiv_q_1) 1245 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); 1246 1247 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1) 1248 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)); 1249 1250 #define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr) 1251 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); 1252 1253 #define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q) 1254 __GMP_DECLSPEC void mpn_sbpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); 1255 1256 #define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr) 1257 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); 1258 #define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch) 1259 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t)); 1260 1261 #define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n) 1262 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr)); 1263 #define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q) 1264 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)); 1265 1266 #define mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch) 1267 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch __GMP_PROTO ((mp_size_t)); 1268 #define mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n) 1269 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr)); 1270 1271 #define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr) 1272 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1273 #define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch) 1274 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t)); 1275 1276 #define mpn_mu_bdiv_q __MPN(mu_bdiv_q) 1277 __GMP_DECLSPEC void mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1278 #define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch) 1279 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t)); 1280 1281 #define mpn_bdiv_qr __MPN(bdiv_qr) 1282 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1283 #define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch) 1284 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t)); 1285 1286 #define mpn_bdiv_q __MPN(bdiv_q) 1287 __GMP_DECLSPEC void mpn_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1288 #define mpn_bdiv_q_itch __MPN(bdiv_q_itch) 1289 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t)); 1290 1291 #define mpn_divexact __MPN(divexact) 1292 __GMP_DECLSPEC void mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); 1293 #define mpn_divexact_itch __MPN(divexact_itch) 1294 __GMP_DECLSPEC mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t)); 1295 1296 #define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c) 1297 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)); 1298 #define mpn_bdiv_dbm1(dst, src, size, divisor) \ 1299 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0)) 1300 1301 #define mpn_powm __MPN(powm) 1302 __GMP_DECLSPEC void mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1303 #define mpn_powlo __MPN(powlo) 1304 __GMP_DECLSPEC void mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr)); 1305 #define mpn_powm_sec __MPN(powm_sec) 1306 __GMP_DECLSPEC void mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr)); 1307 #define mpn_powm_sec_itch __MPN(powm_sec_itch) 1308 __GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t)); 1309 #define mpn_subcnd_n __MPN(subcnd_n) 1310 __GMP_DECLSPEC mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); 1311 #define mpn_tabselect __MPN(tabselect) 1312 __GMP_DECLSPEC void mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t)); 1313 #define mpn_redc_1_sec __MPN(redc_1_sec) 1314 __GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); 1315 1316 #ifndef DIVEXACT_BY3_METHOD 1317 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c) 1318 #define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */ 1319 #else 1320 #define DIVEXACT_BY3_METHOD 1 1321 #endif 1322 #endif 1323 1324 #if DIVEXACT_BY3_METHOD == 0 1325 #undef mpn_divexact_by3 1326 #define mpn_divexact_by3(dst,src,size) \ 1327 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3))) 1328 /* override mpn_divexact_by3c defined in gmp.h */ 1329 /* 1330 #undef mpn_divexact_by3c 1331 #define mpn_divexact_by3c(dst,src,size,cy) \ 1332 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy))) 1333 */ 1334 #endif 1335 1336 #if GMP_NUMB_BITS % 4 == 0 1337 #define mpn_divexact_by5(dst,src,size) \ 1338 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5))) 1339 #endif 1340 1341 #if GMP_NUMB_BITS % 6 == 0 1342 #define mpn_divexact_by7(dst,src,size) \ 1343 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7))) 1344 #endif 1345 1346 #if GMP_NUMB_BITS % 6 == 0 1347 #define mpn_divexact_by9(dst,src,size) \ 1348 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9))) 1349 #endif 1350 1351 #if GMP_NUMB_BITS % 10 == 0 1352 #define mpn_divexact_by11(dst,src,size) \ 1353 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11))) 1354 #endif 1355 1356 #if GMP_NUMB_BITS % 12 == 0 1357 #define mpn_divexact_by13(dst,src,size) \ 1358 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13))) 1359 #endif 1360 1361 #if GMP_NUMB_BITS % 4 == 0 1362 #define mpn_divexact_by15(dst,src,size) \ 1363 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15))) 1364 #endif 1365 1366 #define mpz_divexact_gcd __gmpz_divexact_gcd 1367 __GMP_DECLSPEC void mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)); 1368 1369 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite 1370 #ifdef _GMP_H_HAVE_FILE 1371 __GMP_DECLSPEC size_t mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t)); 1372 #endif 1373 1374 #define mpn_divisible_p __MPN(divisible_p) 1375 __GMP_DECLSPEC int mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE; 1376 1377 #define mpn_rootrem __MPN(rootrem) 1378 __GMP_DECLSPEC mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); 1379 1380 1381 #if defined (_CRAY) 1382 #define MPN_COPY_INCR(dst, src, n) \ 1383 do { \ 1384 int __i; /* Faster on some Crays with plain int */ \ 1385 _Pragma ("_CRI ivdep"); \ 1386 for (__i = 0; __i < (n); __i++) \ 1387 (dst)[__i] = (src)[__i]; \ 1388 } while (0) 1389 #endif 1390 1391 /* used by test programs, hence __GMP_DECLSPEC */ 1392 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */ 1393 #define mpn_copyi __MPN(copyi) 1394 __GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); 1395 #endif 1396 1397 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi 1398 #define MPN_COPY_INCR(dst, src, size) \ 1399 do { \ 1400 ASSERT ((size) >= 0); \ 1401 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \ 1402 mpn_copyi (dst, src, size); \ 1403 } while (0) 1404 #endif 1405 1406 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */ 1407 #if ! defined (MPN_COPY_INCR) 1408 #define MPN_COPY_INCR(dst, src, n) \ 1409 do { \ 1410 ASSERT ((n) >= 0); \ 1411 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \ 1412 if ((n) != 0) \ 1413 { \ 1414 mp_size_t __n = (n) - 1; \ 1415 mp_ptr __dst = (dst); \ 1416 mp_srcptr __src = (src); \ 1417 mp_limb_t __x; \ 1418 __x = *__src++; \ 1419 if (__n != 0) \ 1420 { \ 1421 do \ 1422 { \ 1423 *__dst++ = __x; \ 1424 __x = *__src++; \ 1425 } \ 1426 while (--__n); \ 1427 } \ 1428 *__dst++ = __x; \ 1429 } \ 1430 } while (0) 1431 #endif 1432 1433 1434 #if defined (_CRAY) 1435 #define MPN_COPY_DECR(dst, src, n) \ 1436 do { \ 1437 int __i; /* Faster on some Crays with plain int */ \ 1438 _Pragma ("_CRI ivdep"); \ 1439 for (__i = (n) - 1; __i >= 0; __i--) \ 1440 (dst)[__i] = (src)[__i]; \ 1441 } while (0) 1442 #endif 1443 1444 /* used by test programs, hence __GMP_DECLSPEC */ 1445 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */ 1446 #define mpn_copyd __MPN(copyd) 1447 __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t)); 1448 #endif 1449 1450 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd 1451 #define MPN_COPY_DECR(dst, src, size) \ 1452 do { \ 1453 ASSERT ((size) >= 0); \ 1454 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \ 1455 mpn_copyd (dst, src, size); \ 1456 } while (0) 1457 #endif 1458 1459 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */ 1460 #if ! defined (MPN_COPY_DECR) 1461 #define MPN_COPY_DECR(dst, src, n) \ 1462 do { \ 1463 ASSERT ((n) >= 0); \ 1464 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \ 1465 if ((n) != 0) \ 1466 { \ 1467 mp_size_t __n = (n) - 1; \ 1468 mp_ptr __dst = (dst) + __n; \ 1469 mp_srcptr __src = (src) + __n; \ 1470 mp_limb_t __x; \ 1471 __x = *__src--; \ 1472 if (__n != 0) \ 1473 { \ 1474 do \ 1475 { \ 1476 *__dst-- = __x; \ 1477 __x = *__src--; \ 1478 } \ 1479 while (--__n); \ 1480 } \ 1481 *__dst-- = __x; \ 1482 } \ 1483 } while (0) 1484 #endif 1485 1486 1487 #ifndef MPN_COPY 1488 #define MPN_COPY(d,s,n) \ 1489 do { \ 1490 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \ 1491 MPN_COPY_INCR (d, s, n); \ 1492 } while (0) 1493 #endif 1494 1495 1496 /* Set {dst,size} to the limbs of {src,size} in reverse order. */ 1497 #define MPN_REVERSE(dst, src, size) \ 1498 do { \ 1499 mp_ptr __dst = (dst); \ 1500 mp_size_t __size = (size); \ 1501 mp_srcptr __src = (src) + __size - 1; \ 1502 mp_size_t __i; \ 1503 ASSERT ((size) >= 0); \ 1504 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ 1505 CRAY_Pragma ("_CRI ivdep"); \ 1506 for (__i = 0; __i < __size; __i++) \ 1507 { \ 1508 *__dst = *__src; \ 1509 __dst++; \ 1510 __src--; \ 1511 } \ 1512 } while (0) 1513 1514 1515 /* Zero n limbs at dst. 1516 1517 For power and powerpc we want an inline stu/bdnz loop for zeroing. On 1518 ppc630 for instance this is optimal since it can sustain only 1 store per 1519 cycle. 1520 1521 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the 1522 "for" loop in the generic code below can become stu/bdnz. The do/while 1523 here helps it get to that. The same caveat about plain -mpowerpc64 mode 1524 applies here as to __GMPN_COPY_INCR in gmp.h. 1525 1526 xlc 3.1 already generates stu/bdnz from the generic C, and does so from 1527 this loop too. 1528 1529 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines 1530 at a time. MPN_ZERO isn't all that important in GMP, so it might be more 1531 trouble than it's worth to do the same, though perhaps a call to memset 1532 would be good when on a GNU system. */ 1533 1534 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc 1535 #define MPN_ZERO(dst, n) \ 1536 do { \ 1537 ASSERT ((n) >= 0); \ 1538 if ((n) != 0) \ 1539 { \ 1540 mp_ptr __dst = (dst) - 1; \ 1541 mp_size_t __n = (n); \ 1542 do \ 1543 *++__dst = 0; \ 1544 while (--__n); \ 1545 } \ 1546 } while (0) 1547 #endif 1548 1549 #ifndef MPN_ZERO 1550 #define MPN_ZERO(dst, n) \ 1551 do { \ 1552 ASSERT ((n) >= 0); \ 1553 if ((n) != 0) \ 1554 { \ 1555 mp_ptr __dst = (dst); \ 1556 mp_size_t __n = (n); \ 1557 do \ 1558 *__dst++ = 0; \ 1559 while (--__n); \ 1560 } \ 1561 } while (0) 1562 #endif 1563 1564 1565 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to 1566 start up and would need to strip a lot of zeros before it'd be faster 1567 than a simple cmpl loop. Here are some times in cycles for 1568 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping 1569 low zeros). 1570 1571 std cld 1572 P5 18 16 1573 P6 46 38 1574 K6 36 13 1575 K7 21 20 1576 */ 1577 #ifndef MPN_NORMALIZE 1578 #define MPN_NORMALIZE(DST, NLIMBS) \ 1579 do { \ 1580 while ((NLIMBS) > 0) \ 1581 { \ 1582 if ((DST)[(NLIMBS) - 1] != 0) \ 1583 break; \ 1584 (NLIMBS)--; \ 1585 } \ 1586 } while (0) 1587 #endif 1588 #ifndef MPN_NORMALIZE_NOT_ZERO 1589 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ 1590 do { \ 1591 ASSERT ((NLIMBS) >= 1); \ 1592 while (1) \ 1593 { \ 1594 if ((DST)[(NLIMBS) - 1] != 0) \ 1595 break; \ 1596 (NLIMBS)--; \ 1597 } \ 1598 } while (0) 1599 #endif 1600 1601 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr 1602 and decrementing size. low should be ptr[0], and will be the new ptr[0] 1603 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and 1604 somewhere a non-zero limb. */ 1605 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \ 1606 do { \ 1607 ASSERT ((size) >= 1); \ 1608 ASSERT ((low) == (ptr)[0]); \ 1609 \ 1610 while ((low) == 0) \ 1611 { \ 1612 (size)--; \ 1613 ASSERT ((size) >= 1); \ 1614 (ptr)++; \ 1615 (low) = *(ptr); \ 1616 } \ 1617 } while (0) 1618 1619 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a 1620 temporary variable; it will be automatically cleared out at function 1621 return. We use __x here to make it possible to accept both mpz_ptr and 1622 mpz_t arguments. */ 1623 #define MPZ_TMP_INIT(X, NLIMBS) \ 1624 do { \ 1625 mpz_ptr __x = (X); \ 1626 ASSERT ((NLIMBS) >= 1); \ 1627 __x->_mp_alloc = (NLIMBS); \ 1628 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \ 1629 } while (0) 1630 1631 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */ 1632 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \ 1633 ? (mp_ptr) _mpz_realloc(z,n) \ 1634 : PTR(z)) 1635 1636 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1) 1637 1638 1639 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and 1640 f1p. 1641 1642 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the 1643 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the 1644 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419. 1645 1646 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs 1647 without good floating point. There's +2 for rounding up, and a further 1648 +2 since at the last step x limbs are doubled into a 2x+1 limb region 1649 whereas the actual F[2k] value might be only 2x-1 limbs. 1650 1651 Note that a division is done first, since on a 32-bit system it's at 1652 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be 1653 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and 1654 whatever a multiply of two 190Mbyte numbers takes.) 1655 1656 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be 1657 worked into the multiplier. */ 1658 1659 #define MPN_FIB2_SIZE(n) \ 1660 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4) 1661 1662 1663 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range 1664 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h). 1665 1666 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] = 1667 F[n] + 2*F[n-1] fits in a limb. */ 1668 1669 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; 1670 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1]) 1671 1672 #define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */ 1673 typedef struct 1674 { 1675 unsigned long d; /* current index in s[] */ 1676 unsigned long s0; /* number corresponding to s[0] */ 1677 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */ 1678 unsigned char s[SIEVESIZE + 1]; /* sieve table */ 1679 } gmp_primesieve_t; 1680 1681 #define gmp_init_primesieve __gmp_init_primesieve 1682 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *); 1683 1684 #define gmp_nextprime __gmp_nextprime 1685 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *); 1686 1687 1688 #ifndef MUL_TOOM22_THRESHOLD 1689 #define MUL_TOOM22_THRESHOLD 30 1690 #endif 1691 1692 #ifndef MUL_TOOM33_THRESHOLD 1693 #define MUL_TOOM33_THRESHOLD 100 1694 #endif 1695 1696 #ifndef MUL_TOOM44_THRESHOLD 1697 #define MUL_TOOM44_THRESHOLD 300 1698 #endif 1699 1700 #ifndef MUL_TOOM6H_THRESHOLD 1701 #define MUL_TOOM6H_THRESHOLD 350 1702 #endif 1703 1704 #ifndef SQR_TOOM6_THRESHOLD 1705 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD 1706 #endif 1707 1708 #ifndef MUL_TOOM8H_THRESHOLD 1709 #define MUL_TOOM8H_THRESHOLD 450 1710 #endif 1711 1712 #ifndef SQR_TOOM8_THRESHOLD 1713 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD 1714 #endif 1715 1716 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD 1717 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 100 1718 #endif 1719 1720 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD 1721 #define MUL_TOOM32_TO_TOOM53_THRESHOLD 110 1722 #endif 1723 1724 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD 1725 #define MUL_TOOM42_TO_TOOM53_THRESHOLD 100 1726 #endif 1727 1728 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD 1729 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 110 1730 #endif 1731 1732 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a 1733 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat 1734 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a 1735 separate hard limit will have been defined. Similarly for TOOM3. */ 1736 #ifndef MUL_TOOM22_THRESHOLD_LIMIT 1737 #define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD 1738 #endif 1739 #ifndef MUL_TOOM33_THRESHOLD_LIMIT 1740 #define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD 1741 #endif 1742 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT 1743 #define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD 1744 #endif 1745 1746 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from 1747 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we 1748 certainly always want it if there's a native assembler mpn_sqr_basecase.) 1749 1750 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase 1751 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2 1752 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less 1753 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase 1754 should be used, and that may be never. */ 1755 1756 #ifndef SQR_BASECASE_THRESHOLD 1757 #define SQR_BASECASE_THRESHOLD 0 1758 #endif 1759 1760 #ifndef SQR_TOOM2_THRESHOLD 1761 #define SQR_TOOM2_THRESHOLD 50 1762 #endif 1763 1764 #ifndef SQR_TOOM3_THRESHOLD 1765 #define SQR_TOOM3_THRESHOLD 120 1766 #endif 1767 1768 #ifndef SQR_TOOM4_THRESHOLD 1769 #define SQR_TOOM4_THRESHOLD 400 1770 #endif 1771 1772 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */ 1773 #ifndef SQR_TOOM3_THRESHOLD_LIMIT 1774 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD 1775 #endif 1776 1777 #ifndef DC_DIV_QR_THRESHOLD 1778 #define DC_DIV_QR_THRESHOLD 50 1779 #endif 1780 1781 #ifndef DC_DIVAPPR_Q_THRESHOLD 1782 #define DC_DIVAPPR_Q_THRESHOLD 200 1783 #endif 1784 1785 #ifndef DC_BDIV_QR_THRESHOLD 1786 #define DC_BDIV_QR_THRESHOLD 50 1787 #endif 1788 1789 #ifndef DC_BDIV_Q_THRESHOLD 1790 #define DC_BDIV_Q_THRESHOLD 180 1791 #endif 1792 1793 #ifndef DIVEXACT_JEB_THRESHOLD 1794 #define DIVEXACT_JEB_THRESHOLD 25 1795 #endif 1796 1797 #ifndef INV_MULMOD_BNM1_THRESHOLD 1798 #define INV_MULMOD_BNM1_THRESHOLD (5*MULMOD_BNM1_THRESHOLD) 1799 #endif 1800 1801 #ifndef INV_APPR_THRESHOLD 1802 #define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD 1803 #endif 1804 1805 #ifndef INV_NEWTON_THRESHOLD 1806 #define INV_NEWTON_THRESHOLD 200 1807 #endif 1808 1809 #ifndef BINV_NEWTON_THRESHOLD 1810 #define BINV_NEWTON_THRESHOLD 300 1811 #endif 1812 1813 #ifndef MU_DIVAPPR_Q_THRESHOLD 1814 #define MU_DIVAPPR_Q_THRESHOLD 2000 1815 #endif 1816 1817 #ifndef MU_DIV_QR_THRESHOLD 1818 #define MU_DIV_QR_THRESHOLD 2000 1819 #endif 1820 1821 #ifndef MUPI_DIV_QR_THRESHOLD 1822 #define MUPI_DIV_QR_THRESHOLD 200 1823 #endif 1824 1825 #ifndef MU_BDIV_Q_THRESHOLD 1826 #define MU_BDIV_Q_THRESHOLD 2000 1827 #endif 1828 1829 #ifndef MU_BDIV_QR_THRESHOLD 1830 #define MU_BDIV_QR_THRESHOLD 2000 1831 #endif 1832 1833 #ifndef MULMOD_BNM1_THRESHOLD 1834 #define MULMOD_BNM1_THRESHOLD 16 1835 #endif 1836 1837 #ifndef SQRMOD_BNM1_THRESHOLD 1838 #define SQRMOD_BNM1_THRESHOLD 16 1839 #endif 1840 1841 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD 1842 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2) 1843 #endif 1844 1845 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 1846 1847 #ifndef REDC_1_TO_REDC_2_THRESHOLD 1848 #define REDC_1_TO_REDC_2_THRESHOLD 15 1849 #endif 1850 #ifndef REDC_2_TO_REDC_N_THRESHOLD 1851 #define REDC_2_TO_REDC_N_THRESHOLD 100 1852 #endif 1853 1854 #else 1855 1856 #ifndef REDC_1_TO_REDC_N_THRESHOLD 1857 #define REDC_1_TO_REDC_N_THRESHOLD 100 1858 #endif 1859 1860 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */ 1861 1862 1863 /* First k to use for an FFT modF multiply. A modF FFT is an order 1864 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba, 1865 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */ 1866 #define FFT_FIRST_K 4 1867 1868 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */ 1869 #ifndef MUL_FFT_MODF_THRESHOLD 1870 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3) 1871 #endif 1872 #ifndef SQR_FFT_MODF_THRESHOLD 1873 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3) 1874 #endif 1875 1876 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This 1877 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an 1878 NxN->2N multiply and not recursing into itself is an order 1879 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which 1880 is the first better than toom3. */ 1881 #ifndef MUL_FFT_THRESHOLD 1882 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10) 1883 #endif 1884 #ifndef SQR_FFT_THRESHOLD 1885 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10) 1886 #endif 1887 1888 /* Table of thresholds for successive modF FFT "k"s. The first entry is 1889 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2, 1890 etc. See mpn_fft_best_k(). */ 1891 #ifndef MUL_FFT_TABLE 1892 #define MUL_FFT_TABLE \ 1893 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \ 1894 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \ 1895 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \ 1896 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \ 1897 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \ 1898 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \ 1899 0 } 1900 #endif 1901 #ifndef SQR_FFT_TABLE 1902 #define SQR_FFT_TABLE \ 1903 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \ 1904 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \ 1905 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \ 1906 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \ 1907 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \ 1908 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \ 1909 0 } 1910 #endif 1911 1912 struct fft_table_nk 1913 { 1914 unsigned int n:27; 1915 unsigned int k:5; 1916 }; 1917 1918 #ifndef FFT_TABLE_ATTRS 1919 #define FFT_TABLE_ATTRS static const 1920 #endif 1921 1922 #define MPN_FFT_TABLE_SIZE 16 1923 1924 1925 #ifndef DC_DIV_QR_THRESHOLD 1926 #define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD) 1927 #endif 1928 1929 #ifndef GET_STR_DC_THRESHOLD 1930 #define GET_STR_DC_THRESHOLD 18 1931 #endif 1932 1933 #ifndef GET_STR_PRECOMPUTE_THRESHOLD 1934 #define GET_STR_PRECOMPUTE_THRESHOLD 35 1935 #endif 1936 1937 #ifndef SET_STR_DC_THRESHOLD 1938 #define SET_STR_DC_THRESHOLD 750 1939 #endif 1940 1941 #ifndef SET_STR_PRECOMPUTE_THRESHOLD 1942 #define SET_STR_PRECOMPUTE_THRESHOLD 2000 1943 #endif 1944 1945 /* Return non-zero if xp,xsize and yp,ysize overlap. 1946 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no 1947 overlap. If both these are false, there's an overlap. */ 1948 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ 1949 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) 1950 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \ 1951 ( (char *) (xp) + (xsize) > (char *) (yp) \ 1952 && (char *) (yp) + (ysize) > (char *) (xp)) 1953 1954 /* Return non-zero if xp,xsize and yp,ysize are either identical or not 1955 overlapping. Return zero if they're partially overlapping. */ 1956 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \ 1957 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size) 1958 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \ 1959 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize)) 1960 1961 /* Return non-zero if dst,dsize and src,ssize are either identical or 1962 overlapping in a way suitable for an incrementing/decrementing algorithm. 1963 Return zero if they're partially overlapping in an unsuitable fashion. */ 1964 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \ 1965 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) 1966 #define MPN_SAME_OR_INCR_P(dst, src, size) \ 1967 MPN_SAME_OR_INCR2_P(dst, size, src, size) 1968 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \ 1969 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) 1970 #define MPN_SAME_OR_DECR_P(dst, src, size) \ 1971 MPN_SAME_OR_DECR2_P(dst, size, src, size) 1972 1973 1974 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>. 1975 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS() 1976 does it always. Generally assertions are meant for development, but 1977 might help when looking for a problem later too. 1978 1979 Note that strings shouldn't be used within the ASSERT expression, 1980 eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr" 1981 used in the !HAVE_STRINGIZE case (ie. K&R). */ 1982 1983 #ifdef __LINE__ 1984 #define ASSERT_LINE __LINE__ 1985 #else 1986 #define ASSERT_LINE -1 1987 #endif 1988 1989 #ifdef __FILE__ 1990 #define ASSERT_FILE __FILE__ 1991 #else 1992 #define ASSERT_FILE "" 1993 #endif 1994 1995 __GMP_DECLSPEC void __gmp_assert_header __GMP_PROTO ((const char *, int)); 1996 __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN; 1997 1998 #if HAVE_STRINGIZE 1999 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr) 2000 #else 2001 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr") 2002 #endif 2003 2004 #define ASSERT_ALWAYS(expr) \ 2005 do { \ 2006 if (!(expr)) \ 2007 ASSERT_FAIL (expr); \ 2008 } while (0) 2009 2010 #if WANT_ASSERT 2011 #define ASSERT(expr) ASSERT_ALWAYS (expr) 2012 #else 2013 #define ASSERT(expr) do {} while (0) 2014 #endif 2015 2016 2017 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks 2018 that it's zero. In both cases if assertion checking is disabled the 2019 expression is still evaluated. These macros are meant for use with 2020 routines like mpn_add_n() where the return value represents a carry or 2021 whatever that should or shouldn't occur in some context. For example, 2022 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */ 2023 #if WANT_ASSERT 2024 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0) 2025 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0) 2026 #else 2027 #define ASSERT_CARRY(expr) (expr) 2028 #define ASSERT_NOCARRY(expr) (expr) 2029 #endif 2030 2031 2032 /* ASSERT_CODE includes code when assertion checking is wanted. This is the 2033 same as writing "#if WANT_ASSERT", but more compact. */ 2034 #if WANT_ASSERT 2035 #define ASSERT_CODE(expr) expr 2036 #else 2037 #define ASSERT_CODE(expr) 2038 #endif 2039 2040 2041 /* Test that an mpq_t is in fully canonical form. This can be used as 2042 protection on routines like mpq_equal which give wrong results on 2043 non-canonical inputs. */ 2044 #if WANT_ASSERT 2045 #define ASSERT_MPQ_CANONICAL(q) \ 2046 do { \ 2047 ASSERT (q->_mp_den._mp_size > 0); \ 2048 if (q->_mp_num._mp_size == 0) \ 2049 { \ 2050 /* zero should be 0/1 */ \ 2051 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \ 2052 } \ 2053 else \ 2054 { \ 2055 /* no common factors */ \ 2056 mpz_t __g; \ 2057 mpz_init (__g); \ 2058 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \ 2059 ASSERT (mpz_cmp_ui (__g, 1) == 0); \ 2060 mpz_clear (__g); \ 2061 } \ 2062 } while (0) 2063 #else 2064 #define ASSERT_MPQ_CANONICAL(q) do {} while (0) 2065 #endif 2066 2067 /* Check that the nail parts are zero. */ 2068 #define ASSERT_ALWAYS_LIMB(limb) \ 2069 do { \ 2070 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \ 2071 ASSERT_ALWAYS (__nail == 0); \ 2072 } while (0) 2073 #define ASSERT_ALWAYS_MPN(ptr, size) \ 2074 do { \ 2075 /* let whole loop go dead when no nails */ \ 2076 if (GMP_NAIL_BITS != 0) \ 2077 { \ 2078 mp_size_t __i; \ 2079 for (__i = 0; __i < (size); __i++) \ 2080 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \ 2081 } \ 2082 } while (0) 2083 #if WANT_ASSERT 2084 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb) 2085 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size) 2086 #else 2087 #define ASSERT_LIMB(limb) do {} while (0) 2088 #define ASSERT_MPN(ptr, size) do {} while (0) 2089 #endif 2090 2091 2092 /* Assert that an mpn region {ptr,size} is zero, or non-zero. 2093 size==0 is allowed, and in that case {ptr,size} considered to be zero. */ 2094 #if WANT_ASSERT 2095 #define ASSERT_MPN_ZERO_P(ptr,size) \ 2096 do { \ 2097 mp_size_t __i; \ 2098 ASSERT ((size) >= 0); \ 2099 for (__i = 0; __i < (size); __i++) \ 2100 ASSERT ((ptr)[__i] == 0); \ 2101 } while (0) 2102 #define ASSERT_MPN_NONZERO_P(ptr,size) \ 2103 do { \ 2104 mp_size_t __i; \ 2105 int __nonzero = 0; \ 2106 ASSERT ((size) >= 0); \ 2107 for (__i = 0; __i < (size); __i++) \ 2108 if ((ptr)[__i] != 0) \ 2109 { \ 2110 __nonzero = 1; \ 2111 break; \ 2112 } \ 2113 ASSERT (__nonzero); \ 2114 } while (0) 2115 #else 2116 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0) 2117 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0) 2118 #endif 2119 2120 2121 #if ! HAVE_NATIVE_mpn_com 2122 #undef mpn_com 2123 #define mpn_com(d,s,n) \ 2124 do { \ 2125 mp_ptr __d = (d); \ 2126 mp_srcptr __s = (s); \ 2127 mp_size_t __n = (n); \ 2128 ASSERT (__n >= 1); \ 2129 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \ 2130 do \ 2131 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \ 2132 while (--__n); \ 2133 } while (0) 2134 #endif 2135 2136 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \ 2137 do { \ 2138 mp_srcptr __up = (up); \ 2139 mp_srcptr __vp = (vp); \ 2140 mp_ptr __rp = (rp); \ 2141 mp_size_t __n = (n); \ 2142 mp_limb_t __a, __b; \ 2143 ASSERT (__n > 0); \ 2144 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \ 2145 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \ 2146 __up += __n; \ 2147 __vp += __n; \ 2148 __rp += __n; \ 2149 __n = -__n; \ 2150 do { \ 2151 __a = __up[__n]; \ 2152 __b = __vp[__n]; \ 2153 __rp[__n] = operation; \ 2154 } while (++__n); \ 2155 } while (0) 2156 2157 2158 #if ! HAVE_NATIVE_mpn_and_n 2159 #undef mpn_and_n 2160 #define mpn_and_n(rp, up, vp, n) \ 2161 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b) 2162 #endif 2163 2164 #if ! HAVE_NATIVE_mpn_andn_n 2165 #undef mpn_andn_n 2166 #define mpn_andn_n(rp, up, vp, n) \ 2167 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b) 2168 #endif 2169 2170 #if ! HAVE_NATIVE_mpn_nand_n 2171 #undef mpn_nand_n 2172 #define mpn_nand_n(rp, up, vp, n) \ 2173 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK) 2174 #endif 2175 2176 #if ! HAVE_NATIVE_mpn_ior_n 2177 #undef mpn_ior_n 2178 #define mpn_ior_n(rp, up, vp, n) \ 2179 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b) 2180 #endif 2181 2182 #if ! HAVE_NATIVE_mpn_iorn_n 2183 #undef mpn_iorn_n 2184 #define mpn_iorn_n(rp, up, vp, n) \ 2185 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK) 2186 #endif 2187 2188 #if ! HAVE_NATIVE_mpn_nior_n 2189 #undef mpn_nior_n 2190 #define mpn_nior_n(rp, up, vp, n) \ 2191 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK) 2192 #endif 2193 2194 #if ! HAVE_NATIVE_mpn_xor_n 2195 #undef mpn_xor_n 2196 #define mpn_xor_n(rp, up, vp, n) \ 2197 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b) 2198 #endif 2199 2200 #if ! HAVE_NATIVE_mpn_xnor_n 2201 #undef mpn_xnor_n 2202 #define mpn_xnor_n(rp, up, vp, n) \ 2203 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK) 2204 #endif 2205 2206 #define mpn_trialdiv __MPN(trialdiv) 2207 __GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *)); 2208 2209 #define mpn_remove __MPN(remove) 2210 __GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t)); 2211 2212 2213 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */ 2214 #if GMP_NAIL_BITS == 0 2215 #define ADDC_LIMB(cout, w, x, y) \ 2216 do { \ 2217 mp_limb_t __x = (x); \ 2218 mp_limb_t __y = (y); \ 2219 mp_limb_t __w = __x + __y; \ 2220 (w) = __w; \ 2221 (cout) = __w < __x; \ 2222 } while (0) 2223 #else 2224 #define ADDC_LIMB(cout, w, x, y) \ 2225 do { \ 2226 mp_limb_t __w; \ 2227 ASSERT_LIMB (x); \ 2228 ASSERT_LIMB (y); \ 2229 __w = (x) + (y); \ 2230 (w) = __w & GMP_NUMB_MASK; \ 2231 (cout) = __w >> GMP_NUMB_BITS; \ 2232 } while (0) 2233 #endif 2234 2235 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that 2236 subtract. */ 2237 #if GMP_NAIL_BITS == 0 2238 #define SUBC_LIMB(cout, w, x, y) \ 2239 do { \ 2240 mp_limb_t __x = (x); \ 2241 mp_limb_t __y = (y); \ 2242 mp_limb_t __w = __x - __y; \ 2243 (w) = __w; \ 2244 (cout) = __w > __x; \ 2245 } while (0) 2246 #else 2247 #define SUBC_LIMB(cout, w, x, y) \ 2248 do { \ 2249 mp_limb_t __w = (x) - (y); \ 2250 (w) = __w & GMP_NUMB_MASK; \ 2251 (cout) = __w >> (GMP_LIMB_BITS-1); \ 2252 } while (0) 2253 #endif 2254 2255 2256 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both 2257 expecting no carry (or borrow) from that. 2258 2259 The size parameter is only for the benefit of assertion checking. In a 2260 normal build it's unused and the carry/borrow is just propagated as far 2261 as it needs to go. 2262 2263 On random data, usually only one or two limbs of {ptr,size} get updated, 2264 so there's no need for any sophisticated looping, just something compact 2265 and sensible. 2266 2267 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U, 2268 declaring their operand sizes, then remove the former. This is purely 2269 for the benefit of assertion checking. */ 2270 2271 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \ 2272 && GMP_LIMB_BITS == 32 && ! defined (NO_ASM) && ! WANT_ASSERT 2273 /* Better flags handling than the generic C gives on i386, saving a few 2274 bytes of code and maybe a cycle or two. */ 2275 2276 #define MPN_IORD_U(ptr, incr, aors) \ 2277 do { \ 2278 mp_ptr __ptr_dummy; \ 2279 if (__builtin_constant_p (incr) && (incr) == 1) \ 2280 { \ 2281 __asm__ __volatile__ \ 2282 ("\n" ASM_L(top) ":\n" \ 2283 "\t" aors " $1, (%0)\n" \ 2284 "\tleal 4(%0),%0\n" \ 2285 "\tjc " ASM_L(top) \ 2286 : "=r" (__ptr_dummy) \ 2287 : "0" (ptr) \ 2288 : "memory"); \ 2289 } \ 2290 else \ 2291 { \ 2292 __asm__ __volatile__ \ 2293 ( aors " %2,(%0)\n" \ 2294 "\tjnc " ASM_L(done) "\n" \ 2295 ASM_L(top) ":\n" \ 2296 "\t" aors " $1,4(%0)\n" \ 2297 "\tleal 4(%0),%0\n" \ 2298 "\tjc " ASM_L(top) "\n" \ 2299 ASM_L(done) ":\n" \ 2300 : "=r" (__ptr_dummy) \ 2301 : "0" (ptr), \ 2302 "ri" (incr) \ 2303 : "memory"); \ 2304 } \ 2305 } while (0) 2306 2307 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl") 2308 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl") 2309 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr) 2310 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr) 2311 #endif 2312 2313 #if GMP_NAIL_BITS == 0 2314 #ifndef mpn_incr_u 2315 #define mpn_incr_u(p,incr) \ 2316 do { \ 2317 mp_limb_t __x; \ 2318 mp_ptr __p = (p); \ 2319 if (__builtin_constant_p (incr) && (incr) == 1) \ 2320 { \ 2321 while (++(*(__p++)) == 0) \ 2322 ; \ 2323 } \ 2324 else \ 2325 { \ 2326 __x = *__p + (incr); \ 2327 *__p = __x; \ 2328 if (__x < (incr)) \ 2329 while (++(*(++__p)) == 0) \ 2330 ; \ 2331 } \ 2332 } while (0) 2333 #endif 2334 #ifndef mpn_decr_u 2335 #define mpn_decr_u(p,incr) \ 2336 do { \ 2337 mp_limb_t __x; \ 2338 mp_ptr __p = (p); \ 2339 if (__builtin_constant_p (incr) && (incr) == 1) \ 2340 { \ 2341 while ((*(__p++))-- == 0) \ 2342 ; \ 2343 } \ 2344 else \ 2345 { \ 2346 __x = *__p; \ 2347 *__p = __x - (incr); \ 2348 if (__x < (incr)) \ 2349 while ((*(++__p))-- == 0) \ 2350 ; \ 2351 } \ 2352 } while (0) 2353 #endif 2354 #endif 2355 2356 #if GMP_NAIL_BITS >= 1 2357 #ifndef mpn_incr_u 2358 #define mpn_incr_u(p,incr) \ 2359 do { \ 2360 mp_limb_t __x; \ 2361 mp_ptr __p = (p); \ 2362 if (__builtin_constant_p (incr) && (incr) == 1) \ 2363 { \ 2364 do \ 2365 { \ 2366 __x = (*__p + 1) & GMP_NUMB_MASK; \ 2367 *__p++ = __x; \ 2368 } \ 2369 while (__x == 0); \ 2370 } \ 2371 else \ 2372 { \ 2373 __x = (*__p + (incr)); \ 2374 *__p++ = __x & GMP_NUMB_MASK; \ 2375 if (__x >> GMP_NUMB_BITS != 0) \ 2376 { \ 2377 do \ 2378 { \ 2379 __x = (*__p + 1) & GMP_NUMB_MASK; \ 2380 *__p++ = __x; \ 2381 } \ 2382 while (__x == 0); \ 2383 } \ 2384 } \ 2385 } while (0) 2386 #endif 2387 #ifndef mpn_decr_u 2388 #define mpn_decr_u(p,incr) \ 2389 do { \ 2390 mp_limb_t __x; \ 2391 mp_ptr __p = (p); \ 2392 if (__builtin_constant_p (incr) && (incr) == 1) \ 2393 { \ 2394 do \ 2395 { \ 2396 __x = *__p; \ 2397 *__p++ = (__x - 1) & GMP_NUMB_MASK; \ 2398 } \ 2399 while (__x == 0); \ 2400 } \ 2401 else \ 2402 { \ 2403 __x = *__p - (incr); \ 2404 *__p++ = __x & GMP_NUMB_MASK; \ 2405 if (__x >> GMP_NUMB_BITS != 0) \ 2406 { \ 2407 do \ 2408 { \ 2409 __x = *__p; \ 2410 *__p++ = (__x - 1) & GMP_NUMB_MASK; \ 2411 } \ 2412 while (__x == 0); \ 2413 } \ 2414 } \ 2415 } while (0) 2416 #endif 2417 #endif 2418 2419 #ifndef MPN_INCR_U 2420 #if WANT_ASSERT 2421 #define MPN_INCR_U(ptr, size, n) \ 2422 do { \ 2423 ASSERT ((size) >= 1); \ 2424 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \ 2425 } while (0) 2426 #else 2427 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n) 2428 #endif 2429 #endif 2430 2431 #ifndef MPN_DECR_U 2432 #if WANT_ASSERT 2433 #define MPN_DECR_U(ptr, size, n) \ 2434 do { \ 2435 ASSERT ((size) >= 1); \ 2436 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \ 2437 } while (0) 2438 #else 2439 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n) 2440 #endif 2441 #endif 2442 2443 2444 /* Structure for conversion between internal binary format and 2445 strings in base 2..36. */ 2446 struct bases 2447 { 2448 /* Number of digits in the conversion base that always fits in an mp_limb_t. 2449 For example, for base 10 on a machine where a mp_limb_t has 32 bits this 2450 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */ 2451 int chars_per_limb; 2452 2453 /* log(2)/log(conversion_base) */ 2454 double chars_per_bit_exactly; 2455 2456 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by 2457 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base), 2458 i.e. the number of bits used to represent each digit in the base. */ 2459 mp_limb_t big_base; 2460 2461 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a 2462 fixed-point number. Instead of dividing by big_base an application can 2463 choose to multiply by big_base_inverted. */ 2464 mp_limb_t big_base_inverted; 2465 }; 2466 2467 #define mp_bases __MPN(bases) 2468 __GMP_DECLSPEC extern const struct bases mp_bases[257]; 2469 2470 2471 /* For power of 2 bases this is exact. For other bases the result is either 2472 exact or one too big. 2473 2474 To be exact always it'd be necessary to examine all the limbs of the 2475 operand, since numbers like 100..000 and 99...999 generally differ only 2476 in the lowest limb. It'd be possible to examine just a couple of high 2477 limbs to increase the probability of being exact, but that doesn't seem 2478 worth bothering with. */ 2479 2480 #define MPN_SIZEINBASE(result, ptr, size, base) \ 2481 do { \ 2482 int __lb_base, __cnt; \ 2483 size_t __totbits; \ 2484 \ 2485 ASSERT ((size) >= 0); \ 2486 ASSERT ((base) >= 2); \ 2487 ASSERT ((base) < numberof (mp_bases)); \ 2488 \ 2489 /* Special case for X == 0. */ \ 2490 if ((size) == 0) \ 2491 (result) = 1; \ 2492 else \ 2493 { \ 2494 /* Calculate the total number of significant bits of X. */ \ 2495 count_leading_zeros (__cnt, (ptr)[(size)-1]); \ 2496 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\ 2497 \ 2498 if (POW2_P (base)) \ 2499 { \ 2500 __lb_base = mp_bases[base].big_base; \ 2501 (result) = (__totbits + __lb_base - 1) / __lb_base; \ 2502 } \ 2503 else \ 2504 (result) = (size_t) \ 2505 (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \ 2506 } \ 2507 } while (0) 2508 2509 /* eliminate mp_bases lookups for base==16 */ 2510 #define MPN_SIZEINBASE_16(result, ptr, size) \ 2511 do { \ 2512 int __cnt; \ 2513 mp_size_t __totbits; \ 2514 \ 2515 ASSERT ((size) >= 0); \ 2516 \ 2517 /* Special case for X == 0. */ \ 2518 if ((size) == 0) \ 2519 (result) = 1; \ 2520 else \ 2521 { \ 2522 /* Calculate the total number of significant bits of X. */ \ 2523 count_leading_zeros (__cnt, (ptr)[(size)-1]); \ 2524 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\ 2525 (result) = (__totbits + 4 - 1) / 4; \ 2526 } \ 2527 } while (0) 2528 2529 /* bit count to limb count, rounding up */ 2530 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS) 2531 2532 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake 2533 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs 2534 in both cases (LIMBS_PER_ULONG is also defined here.) */ 2535 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */ 2536 2537 #define LIMBS_PER_ULONG 1 2538 #define MPN_SET_UI(zp, zn, u) \ 2539 (zp)[0] = (u); \ 2540 (zn) = ((zp)[0] != 0); 2541 #define MPZ_FAKE_UI(z, zp, u) \ 2542 (zp)[0] = (u); \ 2543 PTR (z) = (zp); \ 2544 SIZ (z) = ((zp)[0] != 0); \ 2545 ASSERT_CODE (ALLOC (z) = 1); 2546 2547 #else /* need two limbs per ulong */ 2548 2549 #define LIMBS_PER_ULONG 2 2550 #define MPN_SET_UI(zp, zn, u) \ 2551 (zp)[0] = (u) & GMP_NUMB_MASK; \ 2552 (zp)[1] = (u) >> GMP_NUMB_BITS; \ 2553 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); 2554 #define MPZ_FAKE_UI(z, zp, u) \ 2555 (zp)[0] = (u) & GMP_NUMB_MASK; \ 2556 (zp)[1] = (u) >> GMP_NUMB_BITS; \ 2557 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \ 2558 PTR (z) = (zp); \ 2559 ASSERT_CODE (ALLOC (z) = 2); 2560 2561 #endif 2562 2563 2564 #if HAVE_HOST_CPU_FAMILY_x86 2565 #define TARGET_REGISTER_STARVED 1 2566 #else 2567 #define TARGET_REGISTER_STARVED 0 2568 #endif 2569 2570 2571 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1 2572 or 0 there into a limb 0xFF..FF or 0 respectively. 2573 2574 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1, 2575 but C99 doesn't guarantee signed right shifts are arithmetic, so we have 2576 a little compile-time test and a fallback to a "? :" form. The latter is 2577 necessary for instance on Cray vector systems. 2578 2579 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this 2580 to an arithmetic right shift anyway, but it's good to get the desired 2581 shift on past versions too (in particular since an important use of 2582 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */ 2583 2584 #define LIMB_HIGHBIT_TO_MASK(n) \ 2585 (((mp_limb_signed_t) -1 >> 1) < 0 \ 2586 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \ 2587 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0)) 2588 2589 2590 /* Use a library function for invert_limb, if available. */ 2591 #define mpn_invert_limb __MPN(invert_limb) 2592 __GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST; 2593 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb 2594 #define invert_limb(invxl,xl) \ 2595 do { \ 2596 (invxl) = mpn_invert_limb (xl); \ 2597 } while (0) 2598 #endif 2599 2600 #ifndef invert_limb 2601 #define invert_limb(invxl,xl) \ 2602 do { \ 2603 mp_limb_t dummy; \ 2604 ASSERT ((xl) != 0); \ 2605 udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl); \ 2606 } while (0) 2607 #endif 2608 2609 #define invert_pi1(dinv, d1, d0) \ 2610 do { \ 2611 mp_limb_t v, p, t1, t0, mask; \ 2612 invert_limb (v, d1); \ 2613 p = d1 * v; \ 2614 p += d0; \ 2615 if (p < d0) \ 2616 { \ 2617 v--; \ 2618 mask = -(p >= d1); \ 2619 p -= d1; \ 2620 v += mask; \ 2621 p -= mask & d1; \ 2622 } \ 2623 umul_ppmm (t1, t0, d0, v); \ 2624 p += t1; \ 2625 if (p < t1) \ 2626 { \ 2627 v--; \ 2628 if (UNLIKELY (p >= d1)) \ 2629 { \ 2630 if (p > d1 || t0 >= d0) \ 2631 v--; \ 2632 } \ 2633 } \ 2634 (dinv).inv32 = v; \ 2635 } while (0) 2636 2637 2638 #ifndef udiv_qrnnd_preinv 2639 #define udiv_qrnnd_preinv udiv_qrnnd_preinv3 2640 #endif 2641 2642 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest 2643 limb not larger than (2**(2*GMP_LIMB_BITS))/D - (2**GMP_LIMB_BITS). 2644 If this would yield overflow, DI should be the largest possible number 2645 (i.e., only ones). For correct operation, the most significant bit of D 2646 has to be set. Put the quotient in Q and the remainder in R. */ 2647 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di) \ 2648 do { \ 2649 mp_limb_t _q, _ql, _r; \ 2650 mp_limb_t _xh, _xl; \ 2651 ASSERT ((d) != 0); \ 2652 umul_ppmm (_q, _ql, (nh), (di)); \ 2653 _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */ \ 2654 umul_ppmm (_xh, _xl, _q, (d)); \ 2655 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \ 2656 if (_xh != 0) \ 2657 { \ 2658 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ 2659 _q += 1; \ 2660 if (_xh != 0) \ 2661 { \ 2662 _r -= (d); \ 2663 _q += 1; \ 2664 } \ 2665 } \ 2666 if (_r >= (d)) \ 2667 { \ 2668 _r -= (d); \ 2669 _q += 1; \ 2670 } \ 2671 (r) = _r; \ 2672 (q) = _q; \ 2673 } while (0) 2674 2675 /* Like udiv_qrnnd_preinv, but branch-free. */ 2676 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di) \ 2677 do { \ 2678 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \ 2679 mp_limb_t _xh, _xl; \ 2680 _n2 = (nh); \ 2681 _n10 = (nl); \ 2682 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \ 2683 _nadj = _n10 + (_nmask & (d)); \ 2684 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \ 2685 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \ 2686 _q1 = ~_xh; \ 2687 umul_ppmm (_xh, _xl, _q1, d); \ 2688 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ 2689 _xh -= (d); /* xh = 0 or -1 */ \ 2690 (r) = _xl + ((d) & _xh); \ 2691 (q) = _xh - _q1; \ 2692 } while (0) 2693 2694 /* Like udiv_qrnnd_preinv2, but for for any value D. DNORM is D shifted left 2695 so that its most significant bit is set. LGUP is ceil(log2(D)). */ 2696 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \ 2697 do { \ 2698 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \ 2699 mp_limb_t _xh, _xl; \ 2700 _n2 = ((nh) << (GMP_LIMB_BITS - (lgup))) + ((nl) >> 1 >> (l - 1)); \ 2701 _n10 = (nl) << (GMP_LIMB_BITS - (lgup)); \ 2702 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \ 2703 _nadj = _n10 + (_nmask & (dnorm)); \ 2704 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \ 2705 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \ 2706 _q1 = ~_xh; \ 2707 umul_ppmm (_xh, _xl, _q1, d); \ 2708 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \ 2709 _xh -= (d); \ 2710 (r) = _xl + ((d) & _xh); \ 2711 (q) = _xh - _q1; \ 2712 } while (0) 2713 2714 /* udiv_qrnnd_preinv3 -- Based on work by Niels M�ller and Torbj�rn Granlund. 2715 2716 We write things strangely below, to help gcc. A more straightforward 2717 version: 2718 2719 _r = (nl) - _qh * (d); 2720 _t = _r + (d); 2721 if (_r >= _ql) 2722 { 2723 _qh--; 2724 _r = _t; 2725 } 2726 2727 For one operation shorter critical path, one may want to use this form: 2728 2729 _p = _qh * (d) 2730 _s = (nl) + (d); 2731 _r = (nl) - _p; 2732 _t = _s - _p; 2733 if (_r >= _ql) 2734 { 2735 _qh--; 2736 _r = _t; 2737 } 2738 */ 2739 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di) \ 2740 do { \ 2741 mp_limb_t _qh, _ql, _r; \ 2742 umul_ppmm (_qh, _ql, (nh), (di)); \ 2743 if (__builtin_constant_p (nl) && (nl) == 0) \ 2744 _qh += (nh) + 1; \ 2745 else \ 2746 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ 2747 _r = (nl) - _qh * (d); \ 2748 if (_r > _ql) /* both > and >= should be OK */ \ 2749 { \ 2750 _r += (d); \ 2751 _qh--; \ 2752 } \ 2753 if (UNLIKELY (_r >= (d))) \ 2754 { \ 2755 _r -= (d); \ 2756 _qh++; \ 2757 } \ 2758 (r) = _r; \ 2759 (q) = _qh; \ 2760 } while (0) 2761 2762 /* Compute r = nh*B mod d, where di is the inverse of d. */ 2763 #define udiv_rnd_preinv(r, nh, d, di) \ 2764 do { \ 2765 mp_limb_t _qh, _ql, _r; \ 2766 umul_ppmm (_qh, _ql, (nh), (di)); \ 2767 _qh += (nh) + 1; \ 2768 _r = - _qh * (d); \ 2769 if (_r > _ql) \ 2770 _r += (d); \ 2771 (r) = _r; \ 2772 } while (0) 2773 2774 /* Compute quotient the quotient and remainder for n / d. Requires d 2775 >= B^2 / 2 and n < d B. di is the inverse 2776 2777 floor ((B^3 - 1) / (d0 + d1 B)) - B. 2778 2779 NOTE: Output variables are updated multiple times. Only some inputs 2780 and outputs may overlap. 2781 */ 2782 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ 2783 do { \ 2784 mp_limb_t _q0, _t1, _t0, _mask; \ 2785 umul_ppmm ((q), _q0, (n2), (dinv)); \ 2786 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ 2787 \ 2788 /* Compute the two most significant limbs of n - q'd */ \ 2789 (r1) = (n1) - (d1) * (q); \ 2790 (r0) = (n0); \ 2791 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ 2792 umul_ppmm (_t1, _t0, (d0), (q)); \ 2793 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ 2794 (q)++; \ 2795 \ 2796 /* Conditionally adjust q and the remainders */ \ 2797 _mask = - (mp_limb_t) ((r1) >= _q0); \ 2798 (q) += _mask; \ 2799 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ 2800 if (UNLIKELY ((r1) >= (d1))) \ 2801 { \ 2802 if ((r1) > (d1) || (r0) >= (d0)) \ 2803 { \ 2804 (q)++; \ 2805 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ 2806 } \ 2807 } \ 2808 } while (0) 2809 2810 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */ 2811 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1) 2812 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)); 2813 #endif 2814 2815 2816 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the 2817 plain mpn_divrem_1. The default is yes, since the few CISC chips where 2818 preinv is not good have defines saying so. */ 2819 #ifndef USE_PREINV_DIVREM_1 2820 #define USE_PREINV_DIVREM_1 1 2821 #endif 2822 2823 #if USE_PREINV_DIVREM_1 2824 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ 2825 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift) 2826 #else 2827 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ 2828 mpn_divrem_1 (qp, xsize, ap, size, d) 2829 #endif 2830 2831 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD 2832 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10 2833 #endif 2834 2835 /* This selection may seem backwards. The reason mpn_mod_1 typically takes 2836 over for larger sizes is that it uses the mod_1_1 function. */ 2837 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \ 2838 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \ 2839 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \ 2840 : mpn_mod_1 (src, size, divisor)) 2841 2842 2843 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */ 2844 #define mpn_mod_34lsub1 __MPN(mod_34lsub1) 2845 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE; 2846 #endif 2847 2848 2849 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to 2850 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for 2851 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and 2852 modexact are faster at all sizes, so the defaults are 0. Those CPUs 2853 where this is not right have a tuned threshold. */ 2854 #ifndef DIVEXACT_1_THRESHOLD 2855 #define DIVEXACT_1_THRESHOLD 0 2856 #endif 2857 #ifndef BMOD_1_TO_MOD_1_THRESHOLD 2858 #define BMOD_1_TO_MOD_1_THRESHOLD 10 2859 #endif 2860 2861 #ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */ 2862 #define mpn_divexact_1 __MPN(divexact_1) 2863 __GMP_DECLSPEC void mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)); 2864 #endif 2865 2866 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \ 2867 do { \ 2868 if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \ 2869 ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \ 2870 else \ 2871 { \ 2872 ASSERT (mpn_mod_1 (src, size, divisor) == 0); \ 2873 mpn_divexact_1 (dst, src, size, divisor); \ 2874 } \ 2875 } while (0) 2876 2877 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */ 2878 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd) 2879 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE; 2880 #endif 2881 2882 #if HAVE_NATIVE_mpn_modexact_1_odd 2883 #define mpn_modexact_1_odd __MPN(modexact_1_odd) 2884 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE; 2885 #else 2886 #define mpn_modexact_1_odd(src,size,divisor) \ 2887 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0)) 2888 #endif 2889 2890 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \ 2891 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \ 2892 ? mpn_modexact_1_odd (src, size, divisor) \ 2893 : mpn_mod_1 (src, size, divisor)) 2894 2895 /* binvert_limb() sets inv to the multiplicative inverse of n modulo 2896 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS. 2897 n must be odd (otherwise such an inverse doesn't exist). 2898 2899 This is not to be confused with invert_limb(), which is completely 2900 different. 2901 2902 The table lookup gives an inverse with the low 8 bits valid, and each 2903 multiply step doubles the number of bits. See Jebelean "An algorithm for 2904 exact division" end of section 4 (reference in gmp.texi). 2905 2906 Possible enhancement: Could use UHWtype until the last step, if half-size 2907 multiplies are faster (might help under _LONG_LONG_LIMB). 2908 2909 Alternative: As noted in Granlund and Montgomery "Division by Invariant 2910 Integers using Multiplication" (reference in gmp.texi), n itself gives a 2911 3-bit inverse immediately, and could be used instead of a table lookup. 2912 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into 2913 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */ 2914 2915 #define binvert_limb_table __gmp_binvert_limb_table 2916 __GMP_DECLSPEC extern const unsigned char binvert_limb_table[128]; 2917 2918 #define binvert_limb(inv,n) \ 2919 do { \ 2920 mp_limb_t __n = (n); \ 2921 mp_limb_t __inv; \ 2922 ASSERT ((__n & 1) == 1); \ 2923 \ 2924 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \ 2925 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \ 2926 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \ 2927 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \ 2928 \ 2929 if (GMP_NUMB_BITS > 64) \ 2930 { \ 2931 int __invbits = 64; \ 2932 do { \ 2933 __inv = 2 * __inv - __inv * __inv * __n; \ 2934 __invbits *= 2; \ 2935 } while (__invbits < GMP_NUMB_BITS); \ 2936 } \ 2937 \ 2938 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ 2939 (inv) = __inv & GMP_NUMB_MASK; \ 2940 } while (0) 2941 #define modlimb_invert binvert_limb /* backward compatibility */ 2942 2943 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS. 2944 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. 2945 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd 2946 we need to start from GMP_NUMB_MAX>>1. */ 2947 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1) 2948 2949 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3). 2950 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */ 2951 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1) 2952 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT) 2953 2954 2955 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs. 2956 2957 It's not clear whether this is the best way to do this calculation. 2958 Anything congruent to -a would be fine for the one limb congruence 2959 tests. */ 2960 2961 #define NEG_MOD(r, a, d) \ 2962 do { \ 2963 ASSERT ((d) != 0); \ 2964 ASSERT_LIMB (a); \ 2965 ASSERT_LIMB (d); \ 2966 \ 2967 if ((a) <= (d)) \ 2968 { \ 2969 /* small a is reasonably likely */ \ 2970 (r) = (d) - (a); \ 2971 } \ 2972 else \ 2973 { \ 2974 unsigned __twos; \ 2975 mp_limb_t __dnorm; \ 2976 count_leading_zeros (__twos, d); \ 2977 __twos -= GMP_NAIL_BITS; \ 2978 __dnorm = (d) << __twos; \ 2979 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \ 2980 } \ 2981 \ 2982 ASSERT_LIMB (r); \ 2983 } while (0) 2984 2985 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */ 2986 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1) 2987 2988 2989 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or 2990 to 0 if there's an even number. "n" should be an unsigned long and "p" 2991 an int. */ 2992 2993 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX 2994 #define ULONG_PARITY(p, n) \ 2995 do { \ 2996 int __p; \ 2997 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \ 2998 (p) = __p & 1; \ 2999 } while (0) 3000 #endif 3001 3002 /* Cray intrinsic _popcnt. */ 3003 #ifdef _CRAY 3004 #define ULONG_PARITY(p, n) \ 3005 do { \ 3006 (p) = _popcnt (n) & 1; \ 3007 } while (0) 3008 #endif 3009 3010 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3011 && ! defined (NO_ASM) && defined (__ia64) 3012 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend 3013 to a 64 bit unsigned long long for popcnt */ 3014 #define ULONG_PARITY(p, n) \ 3015 do { \ 3016 unsigned long long __n = (unsigned long) (n); \ 3017 int __p; \ 3018 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \ 3019 (p) = __p & 1; \ 3020 } while (0) 3021 #endif 3022 3023 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3024 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86 3025 #if __GMP_GNUC_PREREQ (3,1) 3026 #define __GMP_qm "=Qm" 3027 #define __GMP_q "=Q" 3028 #else 3029 #define __GMP_qm "=qm" 3030 #define __GMP_q "=q" 3031 #endif 3032 #define ULONG_PARITY(p, n) \ 3033 do { \ 3034 char __p; \ 3035 unsigned long __n = (n); \ 3036 __n ^= (__n >> 16); \ 3037 __asm__ ("xorb %h1, %b1\n\t" \ 3038 "setpo %0" \ 3039 : __GMP_qm (__p), __GMP_q (__n) \ 3040 : "1" (__n)); \ 3041 (p) = __p; \ 3042 } while (0) 3043 #endif 3044 3045 #if ! defined (ULONG_PARITY) 3046 #define ULONG_PARITY(p, n) \ 3047 do { \ 3048 unsigned long __n = (n); \ 3049 int __p = 0; \ 3050 do \ 3051 { \ 3052 __p ^= 0x96696996L >> (__n & 0x1F); \ 3053 __n >>= 5; \ 3054 } \ 3055 while (__n != 0); \ 3056 \ 3057 (p) = __p & 1; \ 3058 } while (0) 3059 #endif 3060 3061 3062 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of 3063 version 3.1 at least) doesn't seem to know how to generate rlwimi for 3064 anything other than bit-fields, so use "asm". */ 3065 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3066 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32 3067 #define BSWAP_LIMB(dst, src) \ 3068 do { \ 3069 mp_limb_t __bswapl_src = (src); \ 3070 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \ 3071 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \ 3072 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \ 3073 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \ 3074 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \ 3075 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \ 3076 (dst) = __tmp1 | __tmp2; /* whole */ \ 3077 } while (0) 3078 #endif 3079 3080 /* bswap is available on i486 and up and is fast. A combination rorw $8 / 3081 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux 3082 kernel with xchgb instead of rorw), but this is not done here, because 3083 i386 means generic x86 and mixing word and dword operations will cause 3084 partial register stalls on P6 chips. */ 3085 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3086 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \ 3087 && GMP_LIMB_BITS == 32 3088 #define BSWAP_LIMB(dst, src) \ 3089 do { \ 3090 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \ 3091 } while (0) 3092 #endif 3093 3094 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3095 && defined (__amd64__) && GMP_LIMB_BITS == 64 3096 #define BSWAP_LIMB(dst, src) \ 3097 do { \ 3098 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \ 3099 } while (0) 3100 #endif 3101 3102 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3103 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64 3104 #define BSWAP_LIMB(dst, src) \ 3105 do { \ 3106 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \ 3107 } while (0) 3108 #endif 3109 3110 /* As per glibc. */ 3111 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3112 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32 3113 #define BSWAP_LIMB(dst, src) \ 3114 do { \ 3115 mp_limb_t __bswapl_src = (src); \ 3116 __asm__ ("ror%.w %#8, %0\n\t" \ 3117 "swap %0\n\t" \ 3118 "ror%.w %#8, %0" \ 3119 : "=d" (dst) \ 3120 : "0" (__bswapl_src)); \ 3121 } while (0) 3122 #endif 3123 3124 #if ! defined (BSWAP_LIMB) 3125 #if GMP_LIMB_BITS == 8 3126 #define BSWAP_LIMB(dst, src) \ 3127 do { (dst) = (src); } while (0) 3128 #endif 3129 #if GMP_LIMB_BITS == 16 3130 #define BSWAP_LIMB(dst, src) \ 3131 do { \ 3132 (dst) = ((src) << 8) + ((src) >> 8); \ 3133 } while (0) 3134 #endif 3135 #if GMP_LIMB_BITS == 32 3136 #define BSWAP_LIMB(dst, src) \ 3137 do { \ 3138 (dst) = \ 3139 ((src) << 24) \ 3140 + (((src) & 0xFF00) << 8) \ 3141 + (((src) >> 8) & 0xFF00) \ 3142 + ((src) >> 24); \ 3143 } while (0) 3144 #endif 3145 #if GMP_LIMB_BITS == 64 3146 #define BSWAP_LIMB(dst, src) \ 3147 do { \ 3148 (dst) = \ 3149 ((src) << 56) \ 3150 + (((src) & 0xFF00) << 40) \ 3151 + (((src) & 0xFF0000) << 24) \ 3152 + (((src) & 0xFF000000) << 8) \ 3153 + (((src) >> 8) & 0xFF000000) \ 3154 + (((src) >> 24) & 0xFF0000) \ 3155 + (((src) >> 40) & 0xFF00) \ 3156 + ((src) >> 56); \ 3157 } while (0) 3158 #endif 3159 #endif 3160 3161 #if ! defined (BSWAP_LIMB) 3162 #define BSWAP_LIMB(dst, src) \ 3163 do { \ 3164 mp_limb_t __bswapl_src = (src); \ 3165 mp_limb_t __dst = 0; \ 3166 int __i; \ 3167 for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \ 3168 { \ 3169 __dst = (__dst << 8) | (__bswapl_src & 0xFF); \ 3170 __bswapl_src >>= 8; \ 3171 } \ 3172 (dst) = __dst; \ 3173 } while (0) 3174 #endif 3175 3176 3177 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to 3178 those we know are fast. */ 3179 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3180 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \ 3181 && (HAVE_HOST_CPU_powerpc604 \ 3182 || HAVE_HOST_CPU_powerpc604e \ 3183 || HAVE_HOST_CPU_powerpc750 \ 3184 || HAVE_HOST_CPU_powerpc7400) 3185 #define BSWAP_LIMB_FETCH(limb, src) \ 3186 do { \ 3187 mp_srcptr __blf_src = (src); \ 3188 mp_limb_t __limb; \ 3189 __asm__ ("lwbrx %0, 0, %1" \ 3190 : "=r" (__limb) \ 3191 : "r" (__blf_src), \ 3192 "m" (*__blf_src)); \ 3193 (limb) = __limb; \ 3194 } while (0) 3195 #endif 3196 3197 #if ! defined (BSWAP_LIMB_FETCH) 3198 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src)) 3199 #endif 3200 3201 3202 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we 3203 know are fast. FIXME: Is this necessary? */ 3204 #if defined (__GNUC__) && ! defined (NO_ASM) \ 3205 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \ 3206 && (HAVE_HOST_CPU_powerpc604 \ 3207 || HAVE_HOST_CPU_powerpc604e \ 3208 || HAVE_HOST_CPU_powerpc750 \ 3209 || HAVE_HOST_CPU_powerpc7400) 3210 #define BSWAP_LIMB_STORE(dst, limb) \ 3211 do { \ 3212 mp_ptr __dst = (dst); \ 3213 mp_limb_t __limb = (limb); \ 3214 __asm__ ("stwbrx %1, 0, %2" \ 3215 : "=m" (*__dst) \ 3216 : "r" (__limb), \ 3217 "r" (__dst)); \ 3218 } while (0) 3219 #endif 3220 3221 #if ! defined (BSWAP_LIMB_STORE) 3222 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb) 3223 #endif 3224 3225 3226 /* Byte swap limbs from {src,size} and store at {dst,size}. */ 3227 #define MPN_BSWAP(dst, src, size) \ 3228 do { \ 3229 mp_ptr __dst = (dst); \ 3230 mp_srcptr __src = (src); \ 3231 mp_size_t __size = (size); \ 3232 mp_size_t __i; \ 3233 ASSERT ((size) >= 0); \ 3234 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \ 3235 CRAY_Pragma ("_CRI ivdep"); \ 3236 for (__i = 0; __i < __size; __i++) \ 3237 { \ 3238 BSWAP_LIMB_FETCH (*__dst, __src); \ 3239 __dst++; \ 3240 __src++; \ 3241 } \ 3242 } while (0) 3243 3244 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */ 3245 #define MPN_BSWAP_REVERSE(dst, src, size) \ 3246 do { \ 3247 mp_ptr __dst = (dst); \ 3248 mp_size_t __size = (size); \ 3249 mp_srcptr __src = (src) + __size - 1; \ 3250 mp_size_t __i; \ 3251 ASSERT ((size) >= 0); \ 3252 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ 3253 CRAY_Pragma ("_CRI ivdep"); \ 3254 for (__i = 0; __i < __size; __i++) \ 3255 { \ 3256 BSWAP_LIMB_FETCH (*__dst, __src); \ 3257 __dst++; \ 3258 __src--; \ 3259 } \ 3260 } while (0) 3261 3262 3263 /* No processor claiming to be SPARC v9 compliant seems to 3264 implement the POPC instruction. Disable pattern for now. */ 3265 #if 0 3266 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64 3267 #define popc_limb(result, input) \ 3268 do { \ 3269 DItype __res; \ 3270 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \ 3271 } while (0) 3272 #endif 3273 #endif 3274 3275 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX 3276 #define popc_limb(result, input) \ 3277 do { \ 3278 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \ 3279 } while (0) 3280 #endif 3281 3282 /* Cray intrinsic. */ 3283 #ifdef _CRAY 3284 #define popc_limb(result, input) \ 3285 do { \ 3286 (result) = _popcnt (input); \ 3287 } while (0) 3288 #endif 3289 3290 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3291 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64 3292 #define popc_limb(result, input) \ 3293 do { \ 3294 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \ 3295 } while (0) 3296 #endif 3297 3298 /* Cool population count of an mp_limb_t. 3299 You have to figure out how this works, We won't tell you! 3300 3301 The constants could also be expressed as: 3302 0x55... = [2^N / 3] = [(2^N-1)/3] 3303 0x33... = [2^N / 5] = [(2^N-1)/5] 3304 0x0f... = [2^N / 17] = [(2^N-1)/17] 3305 (N is GMP_LIMB_BITS, [] denotes truncation.) */ 3306 3307 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8 3308 #define popc_limb(result, input) \ 3309 do { \ 3310 mp_limb_t __x = (input); \ 3311 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3312 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3313 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3314 (result) = __x & 0xff; \ 3315 } while (0) 3316 #endif 3317 3318 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16 3319 #define popc_limb(result, input) \ 3320 do { \ 3321 mp_limb_t __x = (input); \ 3322 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3323 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3324 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3325 __x = ((__x >> 8) + __x); \ 3326 (result) = __x & 0xff; \ 3327 } while (0) 3328 #endif 3329 3330 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32 3331 #define popc_limb(result, input) \ 3332 do { \ 3333 mp_limb_t __x = (input); \ 3334 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3335 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3336 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3337 __x = ((__x >> 8) + __x); \ 3338 __x = ((__x >> 16) + __x); \ 3339 (result) = __x & 0xff; \ 3340 } while (0) 3341 #endif 3342 3343 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64 3344 #define popc_limb(result, input) \ 3345 do { \ 3346 mp_limb_t __x = (input); \ 3347 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3348 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3349 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3350 __x = ((__x >> 8) + __x); \ 3351 __x = ((__x >> 16) + __x); \ 3352 __x = ((__x >> 32) + __x); \ 3353 (result) = __x & 0xff; \ 3354 } while (0) 3355 #endif 3356 3357 3358 /* Define stuff for longlong.h. */ 3359 #if HAVE_ATTRIBUTE_MODE 3360 typedef unsigned int UQItype __attribute__ ((mode (QI))); 3361 typedef int SItype __attribute__ ((mode (SI))); 3362 typedef unsigned int USItype __attribute__ ((mode (SI))); 3363 typedef int DItype __attribute__ ((mode (DI))); 3364 typedef unsigned int UDItype __attribute__ ((mode (DI))); 3365 #else 3366 typedef unsigned char UQItype; 3367 typedef long SItype; 3368 typedef unsigned long USItype; 3369 #if HAVE_LONG_LONG 3370 typedef long long int DItype; 3371 typedef unsigned long long int UDItype; 3372 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ 3373 typedef long int DItype; 3374 typedef unsigned long int UDItype; 3375 #endif 3376 #endif 3377 3378 typedef mp_limb_t UWtype; 3379 typedef unsigned int UHWtype; 3380 #define W_TYPE_SIZE GMP_LIMB_BITS 3381 3382 /* Define ieee_double_extract and _GMP_IEEE_FLOATS. 3383 3384 Bit field packing is "implementation defined" according to C99, which 3385 leaves us at the compiler's mercy here. For some systems packing is 3386 defined in the ABI (eg. x86). In any case so far it seems universal that 3387 little endian systems pack from low to high, and big endian from high to 3388 low within the given type. 3389 3390 Within the fields we rely on the integer endianness being the same as the 3391 float endianness, this is true everywhere we know of and it'd be a fairly 3392 strange system that did anything else. */ 3393 3394 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED 3395 #define _GMP_IEEE_FLOATS 1 3396 union ieee_double_extract 3397 { 3398 struct 3399 { 3400 gmp_uint_least32_t manh:20; 3401 gmp_uint_least32_t exp:11; 3402 gmp_uint_least32_t sig:1; 3403 gmp_uint_least32_t manl:32; 3404 } s; 3405 double d; 3406 }; 3407 #endif 3408 3409 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 3410 #define _GMP_IEEE_FLOATS 1 3411 union ieee_double_extract 3412 { 3413 struct 3414 { 3415 gmp_uint_least32_t manl:32; 3416 gmp_uint_least32_t manh:20; 3417 gmp_uint_least32_t exp:11; 3418 gmp_uint_least32_t sig:1; 3419 } s; 3420 double d; 3421 }; 3422 #endif 3423 3424 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN 3425 #define _GMP_IEEE_FLOATS 1 3426 union ieee_double_extract 3427 { 3428 struct 3429 { 3430 gmp_uint_least32_t sig:1; 3431 gmp_uint_least32_t exp:11; 3432 gmp_uint_least32_t manh:20; 3433 gmp_uint_least32_t manl:32; 3434 } s; 3435 double d; 3436 }; 3437 #endif 3438 3439 3440 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers 3441 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */ 3442 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2))) 3443 /* Maximum number of limbs it will take to store any `double'. 3444 We assume doubles have 53 mantissa bits. */ 3445 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1) 3446 3447 __GMP_DECLSPEC int __gmp_extract_double __GMP_PROTO ((mp_ptr, double)); 3448 3449 #define mpn_get_d __gmpn_get_d 3450 __GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE; 3451 3452 3453 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes 3454 a_inf if x is an infinity. Both are considered unlikely values, for 3455 branch prediction. */ 3456 3457 #if _GMP_IEEE_FLOATS 3458 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3459 do { \ 3460 union ieee_double_extract u; \ 3461 u.d = (x); \ 3462 if (UNLIKELY (u.s.exp == 0x7FF)) \ 3463 { \ 3464 if (u.s.manl == 0 && u.s.manh == 0) \ 3465 { a_inf; } \ 3466 else \ 3467 { a_nan; } \ 3468 } \ 3469 } while (0) 3470 #endif 3471 3472 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP 3473 /* no nans or infs in these formats */ 3474 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3475 do { } while (0) 3476 #endif 3477 3478 #ifndef DOUBLE_NAN_INF_ACTION 3479 /* Unknown format, try something generic. 3480 NaN should be "unordered", so x!=x. 3481 Inf should be bigger than DBL_MAX. */ 3482 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3483 do { \ 3484 { \ 3485 if (UNLIKELY ((x) != (x))) \ 3486 { a_nan; } \ 3487 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \ 3488 { a_inf; } \ 3489 } \ 3490 } while (0) 3491 #endif 3492 3493 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles 3494 in the coprocessor, which means a bigger exponent range than normal, and 3495 depending on the rounding mode, a bigger mantissa than normal. (See 3496 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches 3497 "d" through memory to force any rounding and overflows to occur. 3498 3499 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm 3500 registers, where there's no such extra precision and no need for the 3501 FORCE_DOUBLE. We don't bother to detect this since the present uses for 3502 FORCE_DOUBLE are only in test programs and default generic C code. 3503 3504 Not quite sure that an "automatic volatile" will use memory, but it does 3505 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since 3506 apparently matching operands like "0" are only allowed on a register 3507 output. gcc 3.4 warns about this, though in fact it and past versions 3508 seem to put the operand through memory as hoped. */ 3509 3510 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \ 3511 || defined (__amd64__)) 3512 #define FORCE_DOUBLE(d) \ 3513 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0) 3514 #else 3515 #define FORCE_DOUBLE(d) do { } while (0) 3516 #endif 3517 3518 3519 __GMP_DECLSPEC extern int __gmp_junk; 3520 __GMP_DECLSPEC extern const int __gmp_0; 3521 __GMP_DECLSPEC void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN; 3522 __GMP_DECLSPEC void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN; 3523 __GMP_DECLSPEC void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN; 3524 __GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN; 3525 #define GMP_ERROR(code) __gmp_exception (code) 3526 #define DIVIDE_BY_ZERO __gmp_divide_by_zero () 3527 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative () 3528 3529 #if defined _LONG_LONG_LIMB 3530 #if __GMP_HAVE_TOKEN_PASTE 3531 #define CNST_LIMB(C) ((mp_limb_t) C##LL) 3532 #else 3533 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL) 3534 #endif 3535 #else /* not _LONG_LONG_LIMB */ 3536 #if __GMP_HAVE_TOKEN_PASTE 3537 #define CNST_LIMB(C) ((mp_limb_t) C##L) 3538 #else 3539 #define CNST_LIMB(C) ((mp_limb_t) C/**/L) 3540 #endif 3541 #endif /* _LONG_LONG_LIMB */ 3542 3543 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */ 3544 #if GMP_NUMB_BITS == 2 3545 #define PP 0x3 /* 3 */ 3546 #define PP_FIRST_OMITTED 5 3547 #endif 3548 #if GMP_NUMB_BITS == 4 3549 #define PP 0xF /* 3 x 5 */ 3550 #define PP_FIRST_OMITTED 7 3551 #endif 3552 #if GMP_NUMB_BITS == 8 3553 #define PP 0x69 /* 3 x 5 x 7 */ 3554 #define PP_FIRST_OMITTED 11 3555 #endif 3556 #if GMP_NUMB_BITS == 16 3557 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */ 3558 #define PP_FIRST_OMITTED 17 3559 #endif 3560 #if GMP_NUMB_BITS == 32 3561 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */ 3562 #define PP_INVERTED 0x53E5645CL 3563 #define PP_FIRST_OMITTED 31 3564 #endif 3565 #if GMP_NUMB_BITS == 64 3566 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */ 3567 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B) 3568 #define PP_FIRST_OMITTED 59 3569 #endif 3570 #ifndef PP_FIRST_OMITTED 3571 #define PP_FIRST_OMITTED 3 3572 #endif 3573 3574 3575 3576 /* BIT1 means a result value in bit 1 (second least significant bit), with a 3577 zero bit representing +1 and a one bit representing -1. Bits other than 3578 bit 1 are garbage. These are meant to be kept in "int"s, and casts are 3579 used to ensure the expressions are "int"s even if a and/or b might be 3580 other types. 3581 3582 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base 3583 and their speed is important. Expressions are used rather than 3584 conditionals to accumulate sign changes, which effectively means XORs 3585 instead of conditional JUMPs. */ 3586 3587 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */ 3588 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1)) 3589 3590 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */ 3591 #define JACOBI_U0(a) ((a) == 1) 3592 3593 /* (a/0), with a given by low and size; 3594 is 1 if a=+/-1, 0 otherwise */ 3595 #define JACOBI_LS0(alow,asize) \ 3596 (((asize) == 1 || (asize) == -1) && (alow) == 1) 3597 3598 /* (a/0), with a an mpz_t; 3599 fetch of low limb always valid, even if size is zero */ 3600 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a)) 3601 3602 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */ 3603 #define JACOBI_0U(b) ((b) == 1) 3604 3605 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */ 3606 #define JACOBI_0S(b) ((b) == 1 || (b) == -1) 3607 3608 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */ 3609 #define JACOBI_0LS(blow,bsize) \ 3610 (((bsize) == 1 || (bsize) == -1) && (blow) == 1) 3611 3612 /* Convert a bit1 to +1 or -1. */ 3613 #define JACOBI_BIT1_TO_PN(result_bit1) \ 3614 (1 - ((int) (result_bit1) & 2)) 3615 3616 /* (2/b), with b unsigned and odd; 3617 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and 3618 hence obtained from (b>>1)^b */ 3619 #define JACOBI_TWO_U_BIT1(b) \ 3620 ((int) (((b) >> 1) ^ (b))) 3621 3622 /* (2/b)^twos, with b unsigned and odd */ 3623 #define JACOBI_TWOS_U_BIT1(twos, b) \ 3624 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b)) 3625 3626 /* (2/b)^twos, with b unsigned and odd */ 3627 #define JACOBI_TWOS_U(twos, b) \ 3628 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b))) 3629 3630 /* (-1/b), with b odd (signed or unsigned); 3631 is (-1)^((b-1)/2) */ 3632 #define JACOBI_N1B_BIT1(b) \ 3633 ((int) (b)) 3634 3635 /* (a/b) effect due to sign of a: signed/unsigned, b odd; 3636 is (-1/b) if a<0, or +1 if a>=0 */ 3637 #define JACOBI_ASGN_SU_BIT1(a, b) \ 3638 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b)) 3639 3640 /* (a/b) effect due to sign of b: signed/signed; 3641 is -1 if a and b both negative, +1 otherwise */ 3642 #define JACOBI_BSGN_SS_BIT1(a, b) \ 3643 ((((a)<0) & ((b)<0)) << 1) 3644 3645 /* (a/b) effect due to sign of b: signed/mpz; 3646 is -1 if a and b both negative, +1 otherwise */ 3647 #define JACOBI_BSGN_SZ_BIT1(a, b) \ 3648 JACOBI_BSGN_SS_BIT1 (a, SIZ(b)) 3649 3650 /* (a/b) effect due to sign of b: mpz/signed; 3651 is -1 if a and b both negative, +1 otherwise */ 3652 #define JACOBI_BSGN_ZS_BIT1(a, b) \ 3653 JACOBI_BSGN_SZ_BIT1 (b, a) 3654 3655 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd; 3656 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if 3657 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd 3658 because this is used in a couple of places with only bit 1 of a or b 3659 valid. */ 3660 #define JACOBI_RECIP_UU_BIT1(a, b) \ 3661 ((int) ((a) & (b))) 3662 3663 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and 3664 decrementing b_size. b_low should be b_ptr[0] on entry, and will be 3665 updated for the new b_ptr. result_bit1 is updated according to the 3666 factors of 2 stripped, as per (a/2). */ 3667 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \ 3668 do { \ 3669 ASSERT ((b_size) >= 1); \ 3670 ASSERT ((b_low) == (b_ptr)[0]); \ 3671 \ 3672 while (UNLIKELY ((b_low) == 0)) \ 3673 { \ 3674 (b_size)--; \ 3675 ASSERT ((b_size) >= 1); \ 3676 (b_ptr)++; \ 3677 (b_low) = *(b_ptr); \ 3678 \ 3679 ASSERT (((a) & 1) != 0); \ 3680 if ((GMP_NUMB_BITS % 2) == 1) \ 3681 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \ 3682 } \ 3683 } while (0) 3684 3685 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or 3686 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and 3687 unsigned. modexact_1_odd effectively calculates -a mod b, and 3688 result_bit1 is adjusted for the factor of -1. 3689 3690 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and 3691 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what 3692 factor to introduce into result_bit1, so for that case use mpn_mod_1 3693 unconditionally. 3694 3695 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used 3696 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result, 3697 or not skip a divide step, or something. */ 3698 3699 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \ 3700 do { \ 3701 mp_srcptr __a_ptr = (a_ptr); \ 3702 mp_size_t __a_size = (a_size); \ 3703 mp_limb_t __b = (b); \ 3704 \ 3705 ASSERT (__a_size >= 1); \ 3706 ASSERT (__b & 1); \ 3707 \ 3708 if ((GMP_NUMB_BITS % 2) != 0 \ 3709 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \ 3710 { \ 3711 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \ 3712 } \ 3713 else \ 3714 { \ 3715 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \ 3716 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \ 3717 } \ 3718 } while (0) 3719 3720 /* Matrix multiplication */ 3721 #define mpn_matrix22_mul __MPN(matrix22_mul) 3722 __GMP_DECLSPEC void mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); 3723 #define mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen) 3724 __GMP_DECLSPEC void mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); 3725 #define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch) 3726 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t)); 3727 3728 #ifndef MATRIX22_STRASSEN_THRESHOLD 3729 #define MATRIX22_STRASSEN_THRESHOLD 30 3730 #endif 3731 3732 /* HGCD definitions */ 3733 3734 /* Extract one numb, shifting count bits left 3735 ________ ________ 3736 |___xh___||___xl___| 3737 |____r____| 3738 >count < 3739 3740 The count includes any nail bits, so it should work fine if count 3741 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of 3742 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS. 3743 3744 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for 3745 those calls where the count high bits of xh may be non-zero. 3746 */ 3747 3748 #define MPN_EXTRACT_NUMB(count, xh, xl) \ 3749 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \ 3750 ((xl) >> (GMP_LIMB_BITS - (count)))) 3751 3752 3753 /* The matrix non-negative M = (u, u'; v,v') keeps track of the 3754 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller 3755 than a, b. The determinant must always be one, so that M has an 3756 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1 3757 bits. */ 3758 struct hgcd_matrix1 3759 { 3760 mp_limb_t u[2][2]; 3761 }; 3762 3763 #define mpn_hgcd2 __MPN (hgcd2) 3764 __GMP_DECLSPEC int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *)); 3765 3766 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector) 3767 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t)); 3768 3769 #define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector) 3770 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t)); 3771 3772 struct hgcd_matrix 3773 { 3774 mp_size_t alloc; /* for sanity checking only */ 3775 mp_size_t n; 3776 mp_ptr p[2][2]; 3777 }; 3778 3779 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1)) 3780 3781 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init) 3782 __GMP_DECLSPEC void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr)); 3783 3784 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul) 3785 __GMP_DECLSPEC void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr)); 3786 3787 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust) 3788 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr)); 3789 3790 #define mpn_hgcd_itch __MPN (hgcd_itch) 3791 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t)); 3792 3793 #define mpn_hgcd __MPN (hgcd) 3794 __GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr)); 3795 3796 #define MPN_HGCD_LEHMER_ITCH(n) (n) 3797 3798 #define mpn_hgcd_lehmer __MPN (hgcd_lehmer) 3799 __GMP_DECLSPEC mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr)); 3800 3801 /* Needs storage for the quotient */ 3802 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n) 3803 3804 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step) 3805 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr)); 3806 3807 #define MPN_GCD_LEHMER_N_ITCH(n) (n) 3808 3809 #define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n) 3810 __GMP_DECLSPEC mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr)); 3811 3812 #define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step) 3813 __GMP_DECLSPEC mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr)); 3814 3815 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3) 3816 3817 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n) 3818 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr)); 3819 3820 /* 4*(an + 1) + 4*(bn + 1) + an */ 3821 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8) 3822 3823 #ifndef HGCD_THRESHOLD 3824 #define HGCD_THRESHOLD 400 3825 #endif 3826 3827 #ifndef GCD_DC_THRESHOLD 3828 #define GCD_DC_THRESHOLD 1000 3829 #endif 3830 3831 #ifndef GCDEXT_DC_THRESHOLD 3832 #define GCDEXT_DC_THRESHOLD 600 3833 #endif 3834 3835 /* Definitions for mpn_set_str and mpn_get_str */ 3836 struct powers 3837 { 3838 mp_ptr p; /* actual power value */ 3839 mp_size_t n; /* # of limbs at p */ 3840 mp_size_t shift; /* weight of lowest limb, in limb base B */ 3841 size_t digits_in_base; /* number of corresponding digits */ 3842 int base; 3843 }; 3844 typedef struct powers powers_t; 3845 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS) 3846 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS) 3847 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) 3848 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS) 3849 3850 #define mpn_dc_set_str __MPN(dc_set_str) 3851 __GMP_DECLSPEC mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr)); 3852 #define mpn_bc_set_str __MPN(bc_set_str) 3853 __GMP_DECLSPEC mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int)); 3854 #define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab) 3855 __GMP_DECLSPEC void mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int)); 3856 3857 3858 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole 3859 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb, 3860 hence giving back the user's size in bits rounded up. Notice that 3861 converting prec->bits->prec gives an unchanged value. */ 3862 #define __GMPF_BITS_TO_PREC(n) \ 3863 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS)) 3864 #define __GMPF_PREC_TO_BITS(n) \ 3865 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS) 3866 3867 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision; 3868 3869 3870 /* Set n to the number of significant digits an mpf of the given _mp_prec 3871 field, in the given base. This is a rounded up value, designed to ensure 3872 there's enough digits to reproduce all the guaranteed part of the value. 3873 3874 There are prec many limbs, but the high might be only "1" so forget it 3875 and just count prec-1 limbs into chars. +1 rounds that upwards, and a 3876 further +1 is because the limbs usually won't fall on digit boundaries. 3877 3878 FIXME: If base is a power of 2 and the bits per digit divides 3879 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for 3880 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */ 3881 3882 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \ 3883 do { \ 3884 ASSERT (base >= 2 && base < numberof (mp_bases)); \ 3885 (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS) \ 3886 * mp_bases[(base)].chars_per_bit_exactly); \ 3887 } while (0) 3888 3889 3890 /* Decimal point string, from the current C locale. Needs <langinfo.h> for 3891 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get 3892 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under 3893 their respective #if HAVE_FOO_H. 3894 3895 GLIBC recommends nl_langinfo because getting only one facet can be 3896 faster, apparently. */ 3897 3898 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */ 3899 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT) 3900 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT)) 3901 #endif 3902 /* RADIXCHAR is deprecated, still in unix98 or some such. */ 3903 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT) 3904 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR)) 3905 #endif 3906 /* localeconv is slower since it returns all locale stuff */ 3907 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT) 3908 #define GMP_DECIMAL_POINT (localeconv()->decimal_point) 3909 #endif 3910 #if ! defined (GMP_DECIMAL_POINT) 3911 #define GMP_DECIMAL_POINT (".") 3912 #endif 3913 3914 3915 #define DOPRNT_CONV_FIXED 1 3916 #define DOPRNT_CONV_SCIENTIFIC 2 3917 #define DOPRNT_CONV_GENERAL 3 3918 3919 #define DOPRNT_JUSTIFY_NONE 0 3920 #define DOPRNT_JUSTIFY_LEFT 1 3921 #define DOPRNT_JUSTIFY_RIGHT 2 3922 #define DOPRNT_JUSTIFY_INTERNAL 3 3923 3924 #define DOPRNT_SHOWBASE_YES 1 3925 #define DOPRNT_SHOWBASE_NO 2 3926 #define DOPRNT_SHOWBASE_NONZERO 3 3927 3928 struct doprnt_params_t { 3929 int base; /* negative for upper case */ 3930 int conv; /* choices above */ 3931 const char *expfmt; /* exponent format */ 3932 int exptimes4; /* exponent multiply by 4 */ 3933 char fill; /* character */ 3934 int justify; /* choices above */ 3935 int prec; /* prec field, or -1 for all digits */ 3936 int showbase; /* choices above */ 3937 int showpoint; /* if radix point always shown */ 3938 int showtrailing; /* if trailing zeros wanted */ 3939 char sign; /* '+', ' ', or '\0' */ 3940 int width; /* width field */ 3941 }; 3942 3943 #if _GMP_H_HAVE_VA_LIST 3944 3945 __GMP_DECLSPEC typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list)); 3946 __GMP_DECLSPEC typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t)); 3947 __GMP_DECLSPEC typedef int (*doprnt_reps_t) __GMP_PROTO ((void *, int, int)); 3948 __GMP_DECLSPEC typedef int (*doprnt_final_t) __GMP_PROTO ((void *)); 3949 3950 struct doprnt_funs_t { 3951 doprnt_format_t format; 3952 doprnt_memory_t memory; 3953 doprnt_reps_t reps; 3954 doprnt_final_t final; /* NULL if not required */ 3955 }; 3956 3957 extern const struct doprnt_funs_t __gmp_fprintf_funs; 3958 extern const struct doprnt_funs_t __gmp_sprintf_funs; 3959 extern const struct doprnt_funs_t __gmp_snprintf_funs; 3960 extern const struct doprnt_funs_t __gmp_obstack_printf_funs; 3961 extern const struct doprnt_funs_t __gmp_ostream_funs; 3962 3963 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first 3964 "size" of these have been written. "alloc > size" is maintained, so 3965 there's room to store a '\0' at the end. "result" is where the 3966 application wants the final block pointer. */ 3967 struct gmp_asprintf_t { 3968 char **result; 3969 char *buf; 3970 size_t size; 3971 size_t alloc; 3972 }; 3973 3974 #define GMP_ASPRINTF_T_INIT(d, output) \ 3975 do { \ 3976 (d).result = (output); \ 3977 (d).alloc = 256; \ 3978 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \ 3979 (d).size = 0; \ 3980 } while (0) 3981 3982 /* If a realloc is necessary, use twice the size actually required, so as to 3983 avoid repeated small reallocs. */ 3984 #define GMP_ASPRINTF_T_NEED(d, n) \ 3985 do { \ 3986 size_t alloc, newsize, newalloc; \ 3987 ASSERT ((d)->alloc >= (d)->size + 1); \ 3988 \ 3989 alloc = (d)->alloc; \ 3990 newsize = (d)->size + (n); \ 3991 if (alloc <= newsize) \ 3992 { \ 3993 newalloc = 2*newsize; \ 3994 (d)->alloc = newalloc; \ 3995 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \ 3996 alloc, newalloc, char); \ 3997 } \ 3998 } while (0) 3999 4000 __GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t)); 4001 __GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int)); 4002 __GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *)); 4003 4004 /* buf is where to write the next output, and size is how much space is left 4005 there. If the application passed size==0 then that's what we'll have 4006 here, and nothing at all should be written. */ 4007 struct gmp_snprintf_t { 4008 char *buf; 4009 size_t size; 4010 }; 4011 4012 /* Add the bytes printed by the call to the total retval, or bail out on an 4013 error. */ 4014 #define DOPRNT_ACCUMULATE(call) \ 4015 do { \ 4016 int __ret; \ 4017 __ret = call; \ 4018 if (__ret == -1) \ 4019 goto error; \ 4020 retval += __ret; \ 4021 } while (0) 4022 #define DOPRNT_ACCUMULATE_FUN(fun, params) \ 4023 do { \ 4024 ASSERT ((fun) != NULL); \ 4025 DOPRNT_ACCUMULATE ((*(fun)) params); \ 4026 } while (0) 4027 4028 #define DOPRNT_FORMAT(fmt, ap) \ 4029 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap)) 4030 #define DOPRNT_MEMORY(ptr, len) \ 4031 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len)) 4032 #define DOPRNT_REPS(c, n) \ 4033 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n)) 4034 4035 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str)) 4036 4037 #define DOPRNT_REPS_MAYBE(c, n) \ 4038 do { \ 4039 if ((n) != 0) \ 4040 DOPRNT_REPS (c, n); \ 4041 } while (0) 4042 #define DOPRNT_MEMORY_MAYBE(ptr, len) \ 4043 do { \ 4044 if ((len) != 0) \ 4045 DOPRNT_MEMORY (ptr, len); \ 4046 } while (0) 4047 4048 __GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list)); 4049 __GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *)); 4050 4051 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2 4052 __GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr)); 4053 4054 __GMP_DECLSPEC int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list)); 4055 #endif /* _GMP_H_HAVE_VA_LIST */ 4056 4057 4058 typedef int (*gmp_doscan_scan_t) __GMP_PROTO ((void *, const char *, ...)); 4059 typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int)); 4060 typedef int (*gmp_doscan_get_t) __GMP_PROTO ((void *)); 4061 typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *)); 4062 4063 struct gmp_doscan_funs_t { 4064 gmp_doscan_scan_t scan; 4065 gmp_doscan_step_t step; 4066 gmp_doscan_get_t get; 4067 gmp_doscan_unget_t unget; 4068 }; 4069 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs; 4070 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs; 4071 4072 #if _GMP_H_HAVE_VA_LIST 4073 __GMP_DECLSPEC int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *, const char *, va_list)); 4074 #endif 4075 4076 4077 /* For testing and debugging. */ 4078 #define MPZ_CHECK_FORMAT(z) \ 4079 do { \ 4080 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \ 4081 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \ 4082 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \ 4083 } while (0) 4084 4085 #define MPQ_CHECK_FORMAT(q) \ 4086 do { \ 4087 MPZ_CHECK_FORMAT (mpq_numref (q)); \ 4088 MPZ_CHECK_FORMAT (mpq_denref (q)); \ 4089 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \ 4090 \ 4091 if (SIZ(mpq_numref(q)) == 0) \ 4092 { \ 4093 /* should have zero as 0/1 */ \ 4094 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \ 4095 && PTR(mpq_denref(q))[0] == 1); \ 4096 } \ 4097 else \ 4098 { \ 4099 /* should have no common factors */ \ 4100 mpz_t g; \ 4101 mpz_init (g); \ 4102 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \ 4103 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \ 4104 mpz_clear (g); \ 4105 } \ 4106 } while (0) 4107 4108 #define MPF_CHECK_FORMAT(f) \ 4109 do { \ 4110 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \ 4111 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \ 4112 if (SIZ(f) == 0) \ 4113 ASSERT_ALWAYS (EXP(f) == 0); \ 4114 if (SIZ(f) != 0) \ 4115 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \ 4116 } while (0) 4117 4118 4119 #define MPZ_PROVOKE_REALLOC(z) \ 4120 do { ALLOC(z) = ABSIZ(z); } while (0) 4121 4122 4123 /* Enhancement: The "mod" and "gcd_1" functions below could have 4124 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on 4125 function pointers, only actual functions. It probably doesn't make much 4126 difference to the gmp code, since hopefully we arrange calls so there's 4127 no great need for the compiler to move things around. */ 4128 4129 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64) 4130 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST 4131 in mpn/x86/x86-defs.m4. Be sure to update that when changing here. */ 4132 struct cpuvec_t { 4133 DECL_add_n ((*add_n)); 4134 DECL_addmul_1 ((*addmul_1)); 4135 DECL_copyd ((*copyd)); 4136 DECL_copyi ((*copyi)); 4137 DECL_divexact_1 ((*divexact_1)); 4138 DECL_divexact_by3c ((*divexact_by3c)); 4139 DECL_divrem_1 ((*divrem_1)); 4140 DECL_gcd_1 ((*gcd_1)); 4141 DECL_lshift ((*lshift)); 4142 DECL_mod_1 ((*mod_1)); 4143 DECL_mod_34lsub1 ((*mod_34lsub1)); 4144 DECL_modexact_1c_odd ((*modexact_1c_odd)); 4145 DECL_mul_1 ((*mul_1)); 4146 DECL_mul_basecase ((*mul_basecase)); 4147 DECL_preinv_divrem_1 ((*preinv_divrem_1)); 4148 DECL_preinv_mod_1 ((*preinv_mod_1)); 4149 DECL_rshift ((*rshift)); 4150 DECL_sqr_basecase ((*sqr_basecase)); 4151 DECL_sub_n ((*sub_n)); 4152 DECL_submul_1 ((*submul_1)); 4153 int initialized; 4154 mp_size_t mul_toom22_threshold; 4155 mp_size_t mul_toom33_threshold; 4156 mp_size_t sqr_toom2_threshold; 4157 mp_size_t sqr_toom3_threshold; 4158 }; 4159 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec; 4160 #endif /* x86 fat binary */ 4161 4162 __GMP_DECLSPEC void __gmpn_cpuvec_init __GMP_PROTO ((void)); 4163 4164 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init() 4165 if that hasn't yet been done (to establish the right values). */ 4166 #define CPUVEC_THRESHOLD(field) \ 4167 ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \ 4168 __gmpn_cpuvec.field) 4169 4170 4171 #if HAVE_NATIVE_mpn_add_nc 4172 #define mpn_add_nc __MPN(add_nc) 4173 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); 4174 #else 4175 static inline 4176 mp_limb_t 4177 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci) 4178 { 4179 mp_limb_t co; 4180 co = mpn_add_n (rp, up, vp, n); 4181 co += mpn_add_1 (rp, rp, n, ci); 4182 return co; 4183 } 4184 #endif 4185 4186 #if HAVE_NATIVE_mpn_sub_nc 4187 #define mpn_sub_nc __MPN(sub_nc) 4188 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t)); 4189 #else 4190 static inline mp_limb_t 4191 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci) 4192 { 4193 mp_limb_t co; 4194 co = mpn_sub_n (rp, up, vp, n); 4195 co += mpn_sub_1 (rp, rp, n, ci); 4196 return co; 4197 } 4198 #endif 4199 4200 static inline int 4201 mpn_zero_p (mp_srcptr ap, mp_size_t n) 4202 { 4203 mp_size_t i; 4204 for (i = n - 1; i >= 0; i--) 4205 { 4206 if (ap[i] != 0) 4207 return 0; 4208 } 4209 return 1; 4210 } 4211 4212 #if TUNE_PROGRAM_BUILD 4213 /* Some extras wanted when recompiling some .c files for use by the tune 4214 program. Not part of a normal build. 4215 4216 It's necessary to keep these thresholds as #defines (just to an 4217 identically named variable), since various defaults are established based 4218 on #ifdef in the .c files. For some this is not so (the defaults are 4219 instead established above), but all are done this way for consistency. */ 4220 4221 #undef MUL_TOOM22_THRESHOLD 4222 #define MUL_TOOM22_THRESHOLD mul_toom22_threshold 4223 extern mp_size_t mul_toom22_threshold; 4224 4225 #undef MUL_TOOM33_THRESHOLD 4226 #define MUL_TOOM33_THRESHOLD mul_toom33_threshold 4227 extern mp_size_t mul_toom33_threshold; 4228 4229 #undef MUL_TOOM44_THRESHOLD 4230 #define MUL_TOOM44_THRESHOLD mul_toom44_threshold 4231 extern mp_size_t mul_toom44_threshold; 4232 4233 #undef MUL_TOOM6H_THRESHOLD 4234 #define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold 4235 extern mp_size_t mul_toom6h_threshold; 4236 4237 #undef MUL_TOOM8H_THRESHOLD 4238 #define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold 4239 extern mp_size_t mul_toom8h_threshold; 4240 4241 #undef MUL_TOOM32_TO_TOOM43_THRESHOLD 4242 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold 4243 extern mp_size_t mul_toom32_to_toom43_threshold; 4244 4245 #undef MUL_TOOM32_TO_TOOM53_THRESHOLD 4246 #define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold 4247 extern mp_size_t mul_toom32_to_toom53_threshold; 4248 4249 #undef MUL_TOOM42_TO_TOOM53_THRESHOLD 4250 #define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold 4251 extern mp_size_t mul_toom42_to_toom53_threshold; 4252 4253 #undef MUL_TOOM42_TO_TOOM63_THRESHOLD 4254 #define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold 4255 extern mp_size_t mul_toom42_to_toom63_threshold; 4256 4257 #undef MUL_FFT_THRESHOLD 4258 #define MUL_FFT_THRESHOLD mul_fft_threshold 4259 extern mp_size_t mul_fft_threshold; 4260 4261 #undef MUL_FFT_MODF_THRESHOLD 4262 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold 4263 extern mp_size_t mul_fft_modf_threshold; 4264 4265 #undef MUL_FFT_TABLE 4266 #define MUL_FFT_TABLE { 0 } 4267 4268 #undef MUL_FFT_TABLE3 4269 #define MUL_FFT_TABLE3 { {0,0} } 4270 4271 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should 4272 remain as zero (always use it). */ 4273 #if ! HAVE_NATIVE_mpn_sqr_basecase 4274 #undef SQR_BASECASE_THRESHOLD 4275 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold 4276 extern mp_size_t sqr_basecase_threshold; 4277 #endif 4278 4279 #if TUNE_PROGRAM_BUILD_SQR 4280 #undef SQR_TOOM2_THRESHOLD 4281 #define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC 4282 #else 4283 #undef SQR_TOOM2_THRESHOLD 4284 #define SQR_TOOM2_THRESHOLD sqr_toom2_threshold 4285 extern mp_size_t sqr_toom2_threshold; 4286 #endif 4287 4288 #undef SQR_TOOM3_THRESHOLD 4289 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold 4290 extern mp_size_t sqr_toom3_threshold; 4291 4292 #undef SQR_TOOM4_THRESHOLD 4293 #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold 4294 extern mp_size_t sqr_toom4_threshold; 4295 4296 #undef SQR_TOOM6_THRESHOLD 4297 #define SQR_TOOM6_THRESHOLD sqr_toom6_threshold 4298 extern mp_size_t sqr_toom6_threshold; 4299 4300 #undef SQR_TOOM8_THRESHOLD 4301 #define SQR_TOOM8_THRESHOLD sqr_toom8_threshold 4302 extern mp_size_t sqr_toom8_threshold; 4303 4304 #undef SQR_FFT_THRESHOLD 4305 #define SQR_FFT_THRESHOLD sqr_fft_threshold 4306 extern mp_size_t sqr_fft_threshold; 4307 4308 #undef SQR_FFT_MODF_THRESHOLD 4309 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold 4310 extern mp_size_t sqr_fft_modf_threshold; 4311 4312 #undef SQR_FFT_TABLE 4313 #define SQR_FFT_TABLE { 0 } 4314 4315 #undef SQR_FFT_TABLE3 4316 #define SQR_FFT_TABLE3 { {0,0} } 4317 4318 #undef MULLO_BASECASE_THRESHOLD 4319 #define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold 4320 extern mp_size_t mullo_basecase_threshold; 4321 4322 #undef MULLO_DC_THRESHOLD 4323 #define MULLO_DC_THRESHOLD mullo_dc_threshold 4324 extern mp_size_t mullo_dc_threshold; 4325 4326 #undef MULLO_MUL_N_THRESHOLD 4327 #define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold 4328 extern mp_size_t mullo_mul_n_threshold; 4329 4330 #undef DC_DIV_QR_THRESHOLD 4331 #define DC_DIV_QR_THRESHOLD dc_div_qr_threshold 4332 extern mp_size_t dc_div_qr_threshold; 4333 4334 #undef DC_DIVAPPR_Q_THRESHOLD 4335 #define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold 4336 extern mp_size_t dc_divappr_q_threshold; 4337 4338 #undef DC_BDIV_Q_THRESHOLD 4339 #define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold 4340 extern mp_size_t dc_bdiv_q_threshold; 4341 4342 #undef DC_BDIV_QR_THRESHOLD 4343 #define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold 4344 extern mp_size_t dc_bdiv_qr_threshold; 4345 4346 #undef MU_DIV_QR_THRESHOLD 4347 #define MU_DIV_QR_THRESHOLD mu_div_qr_threshold 4348 extern mp_size_t mu_div_qr_threshold; 4349 4350 #undef MU_DIVAPPR_Q_THRESHOLD 4351 #define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold 4352 extern mp_size_t mu_divappr_q_threshold; 4353 4354 #undef MUPI_DIV_QR_THRESHOLD 4355 #define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold 4356 extern mp_size_t mupi_div_qr_threshold; 4357 4358 #undef MU_BDIV_QR_THRESHOLD 4359 #define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold 4360 extern mp_size_t mu_bdiv_qr_threshold; 4361 4362 #undef MU_BDIV_Q_THRESHOLD 4363 #define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold 4364 extern mp_size_t mu_bdiv_q_threshold; 4365 4366 #undef INV_MULMOD_BNM1_THRESHOLD 4367 #define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold 4368 extern mp_size_t inv_mulmod_bnm1_threshold; 4369 4370 #undef INV_NEWTON_THRESHOLD 4371 #define INV_NEWTON_THRESHOLD inv_newton_threshold 4372 extern mp_size_t inv_newton_threshold; 4373 4374 #undef INV_APPR_THRESHOLD 4375 #define INV_APPR_THRESHOLD inv_appr_threshold 4376 extern mp_size_t inv_appr_threshold; 4377 4378 #undef BINV_NEWTON_THRESHOLD 4379 #define BINV_NEWTON_THRESHOLD binv_newton_threshold 4380 extern mp_size_t binv_newton_threshold; 4381 4382 #undef REDC_1_TO_REDC_2_THRESHOLD 4383 #define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold 4384 extern mp_size_t redc_1_to_redc_2_threshold; 4385 4386 #undef REDC_2_TO_REDC_N_THRESHOLD 4387 #define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold 4388 extern mp_size_t redc_2_to_redc_n_threshold; 4389 4390 #undef REDC_1_TO_REDC_N_THRESHOLD 4391 #define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold 4392 extern mp_size_t redc_1_to_redc_n_threshold; 4393 4394 #undef MATRIX22_STRASSEN_THRESHOLD 4395 #define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold 4396 extern mp_size_t matrix22_strassen_threshold; 4397 4398 #undef HGCD_THRESHOLD 4399 #define HGCD_THRESHOLD hgcd_threshold 4400 extern mp_size_t hgcd_threshold; 4401 4402 #undef GCD_DC_THRESHOLD 4403 #define GCD_DC_THRESHOLD gcd_dc_threshold 4404 extern mp_size_t gcd_dc_threshold; 4405 4406 #undef GCDEXT_DC_THRESHOLD 4407 #define GCDEXT_DC_THRESHOLD gcdext_dc_threshold 4408 extern mp_size_t gcdext_dc_threshold; 4409 4410 #undef DIVREM_1_NORM_THRESHOLD 4411 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold 4412 extern mp_size_t divrem_1_norm_threshold; 4413 4414 #undef DIVREM_1_UNNORM_THRESHOLD 4415 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold 4416 extern mp_size_t divrem_1_unnorm_threshold; 4417 4418 #undef MOD_1_NORM_THRESHOLD 4419 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold 4420 extern mp_size_t mod_1_norm_threshold; 4421 4422 #undef MOD_1_UNNORM_THRESHOLD 4423 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold 4424 extern mp_size_t mod_1_unnorm_threshold; 4425 4426 #undef MOD_1N_TO_MOD_1_1_THRESHOLD 4427 #define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold 4428 extern mp_size_t mod_1n_to_mod_1_1_threshold; 4429 4430 #undef MOD_1U_TO_MOD_1_1_THRESHOLD 4431 #define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold 4432 extern mp_size_t mod_1u_to_mod_1_1_threshold; 4433 4434 #undef MOD_1_1_TO_MOD_1_2_THRESHOLD 4435 #define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold 4436 extern mp_size_t mod_1_1_to_mod_1_2_threshold; 4437 4438 #undef MOD_1_2_TO_MOD_1_4_THRESHOLD 4439 #define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold 4440 extern mp_size_t mod_1_2_to_mod_1_4_threshold; 4441 4442 #undef PREINV_MOD_1_TO_MOD_1_THRESHOLD 4443 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold 4444 extern mp_size_t preinv_mod_1_to_mod_1_threshold; 4445 4446 #if ! UDIV_PREINV_ALWAYS 4447 #undef DIVREM_2_THRESHOLD 4448 #define DIVREM_2_THRESHOLD divrem_2_threshold 4449 extern mp_size_t divrem_2_threshold; 4450 #endif 4451 4452 #undef MULMOD_BNM1_THRESHOLD 4453 #define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold 4454 extern mp_size_t mulmod_bnm1_threshold; 4455 4456 #undef SQRMOD_BNM1_THRESHOLD 4457 #define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold 4458 extern mp_size_t sqrmod_bnm1_threshold; 4459 4460 #undef GET_STR_DC_THRESHOLD 4461 #define GET_STR_DC_THRESHOLD get_str_dc_threshold 4462 extern mp_size_t get_str_dc_threshold; 4463 4464 #undef GET_STR_PRECOMPUTE_THRESHOLD 4465 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold 4466 extern mp_size_t get_str_precompute_threshold; 4467 4468 #undef SET_STR_DC_THRESHOLD 4469 #define SET_STR_DC_THRESHOLD set_str_dc_threshold 4470 extern mp_size_t set_str_dc_threshold; 4471 4472 #undef SET_STR_PRECOMPUTE_THRESHOLD 4473 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold 4474 extern mp_size_t set_str_precompute_threshold; 4475 4476 #undef FFT_TABLE_ATTRS 4477 #define FFT_TABLE_ATTRS 4478 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; 4479 #define FFT_TABLE3_SIZE 2000 /* generous space for tuning */ 4480 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE]; 4481 4482 /* Sizes the tune program tests up to, used in a couple of recompilations. */ 4483 #undef MUL_TOOM22_THRESHOLD_LIMIT 4484 #undef MUL_TOOM33_THRESHOLD_LIMIT 4485 #undef MULLO_BASECASE_THRESHOLD_LIMIT 4486 #undef SQR_TOOM3_THRESHOLD_LIMIT 4487 #define SQR_TOOM2_MAX_GENERIC 200 4488 #define MUL_TOOM22_THRESHOLD_LIMIT 700 4489 #define MUL_TOOM33_THRESHOLD_LIMIT 700 4490 #define SQR_TOOM3_THRESHOLD_LIMIT 400 4491 #define MUL_TOOM44_THRESHOLD_LIMIT 1000 4492 #define SQR_TOOM4_THRESHOLD_LIMIT 1000 4493 #define MUL_TOOM6H_THRESHOLD_LIMIT 1100 4494 #define SQR_TOOM6_THRESHOLD_LIMIT 1100 4495 #define MUL_TOOM8H_THRESHOLD_LIMIT 1200 4496 #define SQR_TOOM8_THRESHOLD_LIMIT 1200 4497 #define MULLO_BASECASE_THRESHOLD_LIMIT 200 4498 #define GET_STR_THRESHOLD_LIMIT 150 4499 4500 #endif /* TUNE_PROGRAM_BUILD */ 4501 4502 #if defined (__cplusplus) 4503 } 4504 #endif 4505 4506 /* FIXME: Make these itch functions less conservative. Also consider making 4507 them dependent on just 'an', and compute the allocation directly from 'an' 4508 instead of via n. */ 4509 4510 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth. 4511 k is ths smallest k such that 4512 ceil(an/2^k) < MUL_TOOM22_THRESHOLD. 4513 which implies that 4514 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1)) 4515 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1)))) 4516 */ 4517 #define mpn_toom22_mul_itch(an, bn) \ 4518 (2 * ((an) + GMP_NUMB_BITS)) 4519 #define mpn_toom2_sqr_itch(an) \ 4520 (2 * ((an) + GMP_NUMB_BITS)) 4521 4522 /* Can probably be trimmed to 2 an + O(log an). */ 4523 #define mpn_toom33_mul_itch(an, bn) \ 4524 ((5 * (an) >> 1) + GMP_NUMB_BITS) 4525 #define mpn_toom3_sqr_itch(an) \ 4526 ((5 * (an) >> 1) + GMP_NUMB_BITS) 4527 4528 #define mpn_toom44_mul_itch(an, bn) \ 4529 (3 * (an) + GMP_NUMB_BITS) 4530 #define mpn_toom4_sqr_itch(an) \ 4531 (3 * (an) + GMP_NUMB_BITS) 4532 4533 #define mpn_toom6_sqr_itch(n) \ 4534 ( ((n) - SQR_TOOM6_THRESHOLD)*2 + \ 4535 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \ 4536 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)) ) 4537 4538 #define mpn_toom6_mul_n_itch(n) \ 4539 ( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \ 4540 MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \ 4541 mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) ) 4542 4543 static inline mp_size_t 4544 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) { 4545 mp_size_t estimatedN; 4546 estimatedN = (an + bn) / (size_t) 10 + 1; 4547 return mpn_toom6_mul_n_itch (estimatedN * 6); 4548 } 4549 4550 #define mpn_toom8_sqr_itch(n) \ 4551 ( (((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \ 4552 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \ 4553 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)) ) 4554 4555 #define mpn_toom8_mul_n_itch(n) \ 4556 ( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) + \ 4557 MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \ 4558 mpn_toom6_mul_n_itch(MUL_TOOM8H_THRESHOLD)) ) 4559 4560 static inline mp_size_t 4561 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) { 4562 mp_size_t estimatedN; 4563 estimatedN = (an + bn) / (size_t) 14 + 1; 4564 return mpn_toom8_mul_n_itch (estimatedN * 8); 4565 } 4566 4567 static inline mp_size_t 4568 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn) 4569 { 4570 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1); 4571 mp_size_t itch = 2 * n + 1; 4572 4573 return itch; 4574 } 4575 4576 static inline mp_size_t 4577 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn) 4578 { 4579 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1; 4580 return 6 * n + 3; 4581 } 4582 4583 static inline mp_size_t 4584 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn) 4585 { 4586 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3); 4587 4588 return 6*n + 4; 4589 } 4590 4591 static inline mp_size_t 4592 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn) 4593 { 4594 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1); 4595 return 6*n + 4; 4596 } 4597 4598 static inline mp_size_t 4599 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn) 4600 { 4601 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3); 4602 return 10 * n + 10; 4603 } 4604 4605 static inline mp_size_t 4606 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn) 4607 { 4608 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1); 4609 return 10 * n + 10; 4610 } 4611 4612 static inline mp_size_t 4613 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn) 4614 { 4615 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3); 4616 return 9 * n + 3; 4617 } 4618 4619 #if 0 4620 #define mpn_fft_mul mpn_mul_fft_full 4621 #else 4622 #define mpn_fft_mul mpn_nussbaumer_mul 4623 #endif 4624 4625 #ifdef __cplusplus 4626 4627 /* A little helper for a null-terminated __gmp_allocate_func string. 4628 The destructor ensures it's freed even if an exception is thrown. 4629 The len field is needed by the destructor, and can be used by anyone else 4630 to avoid a second strlen pass over the data. 4631 4632 Since our input is a C string, using strlen is correct. Perhaps it'd be 4633 more C++-ish style to use std::char_traits<char>::length, but char_traits 4634 isn't available in gcc 2.95.4. */ 4635 4636 class gmp_allocated_string { 4637 public: 4638 char *str; 4639 size_t len; 4640 gmp_allocated_string(char *arg) 4641 { 4642 str = arg; 4643 len = std::strlen (str); 4644 } 4645 ~gmp_allocated_string() 4646 { 4647 (*__gmp_free_func) (str, len+1); 4648 } 4649 }; 4650 4651 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char); 4652 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &); 4653 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int); 4654 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o); 4655 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s); 4656 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat; 4657 4658 #endif /* __cplusplus */ 4659 4660 #endif /* __GMP_IMPL_H__ */ 4661