1 
2 #ifndef NTL_ctools__H
3 #define NTL_ctools__H
4 
5 #include <NTL/config.h>
6 #include <NTL/mach_desc.h>
7 
8 #include <NTL/ALL_FEATURES.h>
9 
10 #include <NTL/PackageInfo.h>
11 
12 #if (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
13 #define NTL_GNUC_INTEL
14 #endif
15 
16 #if (!defined(NTL_HAVE_LL_TYPE) && defined(NTL_WINPACK) &&  (defined(_MSC_VER) || defined(NTL_GNUC_INTEL)))
17 // for the windows distribution,
18 //   we assume LL_TYPE works for MSVC++ (which is true for both x86 and ARM)
19 //   and for GNUC/Intel platforms (e.g., Code Blocks)
20 #define NTL_HAVE_LL_TYPE
21 #endif
22 
23 // Define the working C++ standard.
24 // Both NTL_STD_CXX14 and NTL_STD_CXX11, and we take the highest one
25 
26 #if defined(NTL_STD_CXX14)
27 #define NTL_CXX_STANDARD (2014)
28 #elif defined(NTL_STD_CXX11)
29 #define NTL_CXX_STANDARD (2011)
30 #else
31 #define NTL_CXX_STANDARD (1998)
32 #endif
33 
34 // define some macros regarding noexcept declarations
35 
36 #if (NTL_CXX_STANDARD >= 2011)
37 
38 #define NTL_NOEXCEPT noexcept
39 
40 #ifdef NTL_EXCEPTIONS
41 #define NTL_FAKE_NOEXCEPT
42 #else
43 #define NTL_FAKE_NOEXCEPT noexcept
44 #endif
45 
46 #else
47 
48 #define NTL_NOEXCEPT
49 #define NTL_FAKE_NOEXCEPT
50 
51 #endif
52 
53 
54 /*
55  * Resolve double-word integer type.
56  *
57  * Unfortunately, there is no "standard" way to do this.
58  * On 32-bit machines, 'long long' usually works (but not
59  * on MSVC++ or BORLAND), and on 64-bit machines, there is
60  * no standard.  However, most compilers do offer *some*
61  * non-standard double-word type.
62  *
63  * Note that C99 creates a standard header <stdint.h>,
64  * but it is not clear how widely this is implemented,
65  * and for example, older versions of GCC does not provide a type int128_t
66  * in <stdint.h> on 64-bit machines.
67  */
68 
69 
70 
71 #if (defined(NTL_UNSIGNED_LONG_LONG_TYPE))
72 
73 #define NTL_ULL_TYPE NTL_UNSIGNED_LONG_LONG_TYPE
74 
75 #elif (NTL_BITS_PER_LONG == 64 && defined(__GNUC__))
76 
77 #define NTL_ULL_TYPE __uint128_t
78 
79 #elif (NTL_BITS_PER_LONG == 32 && (defined(_MSC_VER) || defined(__BORLANDC__)))
80 
81 #define NTL_ULL_TYPE unsigned __int64
82 
83 #elif (NTL_BITS_PER_LONG == 64 && (defined(_MSC_VER) || defined(__BORLANDC__)))
84 
85 #define NTL_ULL_TYPE unsigned __int128
86 
87 #endif
88 
89 #if (!defined(NTL_ULL_TYPE))
90 
91 #define NTL_ULL_TYPE unsigned long long
92 
93 #endif
94 
95 
96 #ifdef NTL_HAVE_LL_TYPE
97 
98 typedef NTL_ULL_TYPE _ntl_ulonglong;
99 // typenames are more convenient than macros
100 
101 #else
102 
103 #undef NTL_ULL_TYPE
104 // prevent any use of these macros
105 
_ntl_ulonglong()106 class _ntl_ulonglong { private: _ntl_ulonglong() { } };
107 // cannot create variables of these types
108 
109 
110 #endif
111 
112 /********************************************************/
113 
114 
115 
116 // Define an unsigned type with at least 32 bits
117 // there is no truly portable way to do this, yet...
118 
119 
120 #if (NTL_BITS_PER_INT >= 32)
121 
122 typedef unsigned int _ntl_uint32; // 32-bit word
123 #define NTL_BITS_PER_INT32 NTL_BITS_PER_INT
124 
125 #else
126 
127 // NOTE: C++ standard guarantees longs are at least 32-bits wide,
128 // and this is also explicitly checked at builod time
129 
130 typedef unsigned long _ntl_uint32; // 32-bit word
131 #define NTL_BITS_PER_INT32 NTL_BITS_PER_LONG
132 
133 #endif
134 
135 
136 
137 // The usual token pasting stuff...
138 
139 #define NTL_PASTE_TOKENS2(a,b) a ## b
140 #define NTL_PASTE_TOKENS(a,b) NTL_PASTE_TOKENS2(a,b)
141 
142 #define NTL_STRINGIFY(x) NTL_STRINGIFY_AUX(x)
143 #define NTL_STRINGIFY_AUX(x) #x
144 
145 
146 
147 
148 
149 
150 #define NTL_OVFBND (1L << (NTL_BITS_PER_LONG-4))
151 
152 /*
153  * NTL_OVFBND is the general bound used throughout NTL to keep various
154  * integer values comfortably bounded away from an integer overflow
155  * condition.  Do not change this value!
156  */
157 
158 
159 
160 
161 
162 #if ((NTL_BITS_PER_SIZE_T-1) < (NTL_BITS_PER_LONG-4))
163 #define NTL_OVFBND1 (1L << (NTL_BITS_PER_SIZE_T-1))
164 #else
165 #define NTL_OVFBND1 NTL_OVFBND
166 #endif
167 
168 /*
169  * NTL_OVFBND1 is a smaller bound than NTL_OVF when size_t is
170  * narrower than long.  This prevents overflow on calls to malloc
171  * and realloc.
172  */
173 
174 
175 
176 
177 
178 
179 #define NTL_OVERFLOW(n, a, b) \
180    (((b) >= NTL_OVFBND) || (((long) (n)) > 0 && (((a) >= NTL_OVFBND) || \
181     (((long) (n)) >= (NTL_OVFBND-((long)(b))+((long)(a))-1)/((long)(a))))))
182 
183 /*
184  * NTL_OVERFLOW(n, a, b) returns 1 if n*a + b >= NTL_OVFBND,
185  * and returns 0 otherwise.  The value n is effectively treated as type long,
186  * while the values a and b may be *any* integral type.  It is assumed that
187  * n >= 0, a > 0, and b >= 0.  Care is taken to ensure that overflow does
188  * not occur. If a and b are constants, and n has no side effects,
189  * a good optimizing compiler will * translate this into a single test
190  * of the form n >= c, where c is a constant.
191  */
192 
193 
194 
195 
196 
197 
198 #define NTL_OVERFLOW1(n, a, b) \
199    (((b) >= NTL_OVFBND1) || (((long) (n)) > 0 && (((a) >= NTL_OVFBND1) || \
200     (((long) (n)) >= (NTL_OVFBND1-((long)(b))+((long)(a))-1)/((long)(a))))))
201 
202 /*
203  * NTL_OVERFLOW1 is the same as NTL_OVERFLOW, except that it uses the
204  * bound NTL_OVFBND1 instead of NTL_OVFBND.
205  */
206 
207 
208 
209 
210 #ifdef NTL_TEST_EXCEPTIONS
211 
212 extern unsigned long exception_counter;
213 
214 #define NTL_BASIC_MALLOC(n, a, b) \
215    (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
216     ((void *) malloc(((long)(n))*((long)(a)) + ((long)(b)))))
217 
218 #define NTL_MALLOC(n, a, b) \
219    (--exception_counter == 0 ? (void *) 0 : NTL_BASIC_MALLOC(n, a, b))
220 
221 #else
222 
223 #define NTL_MALLOC(n, a, b) \
224    (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
225     ((void *) malloc(((long)(n))*((long)(a)) + ((long)(b)))))
226 
227 
228 #endif
229 
230 /*
231  * NTL_MALLOC(n, a, b) returns 0 if a*n + b >= NTL_OVFBND1, and otherwise
232  * returns malloc(n*a + b).
233  * The programmer must ensure that the name "malloc" is visible
234  * at the point in the source code where this macro is expanded.
235  */
236 
237 
238 #ifdef NTL_TEST_EXCEPTIONS
239 
240 #define NTL_BASIC_SNS_MALLOC(n, a, b) \
241    (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
242     ((void *) NTL_SNS malloc(((long)(n))*((long)(a)) + ((long)(b)))))
243 
244 
245 #define NTL_SNS_MALLOC(n, a, b) \
246    (--exception_counter == 0 ? (void *) 0 : NTL_BASIC_SNS_MALLOC(n, a, b))
247 
248 
249 #else
250 
251 #define NTL_SNS_MALLOC(n, a, b) \
252    (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
253     ((void *) NTL_SNS malloc(((long)(n))*((long)(a)) + ((long)(b)))))
254 
255 #endif
256 
257 /*
258  * NTL_SNS_MALLOC is the same as NTL_MALLOC, except that the call
259  * to malloc is prefixed by NTL_SNS.
260  */
261 
262 
263 
264 
265 
266 
267 
268 
269 #define NTL_REALLOC(p, n, a, b) \
270    (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
271     ((void *) realloc((p), ((long)(n))*((long)(a)) + ((long)(b)))))
272 
273 /*
274  * NTL_REALLOC(n, a, b) returns 0 if a*n + b >= NTL_OVFBND1, and otherwise
275  * returns realloc(p, n*a + b).
276  * The programmer must ensure that the name "realloc" is visible
277  * at the point in the source code where this macro is expanded.
278  */
279 
280 
281 
282 
283 
284 
285 #define NTL_SNS_REALLOC(p, n, a, b) \
286    (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
287     ((void *) NTL_SNS realloc((p), ((long)(n))*((long)(a)) + ((long)(b)))))
288 
289 /*
290  * NTL_SNS_REALLOC is the same as NTL_REALLOC, except that the call
291  * to realloc is prefixed by NTL_SNS.
292  */
293 
294 
295 
296 
297 
298 #define NTL_MAX_ALLOC_BLOCK (40000)
299 
300 /*
301  * NTL_MAX_ALLOC_BLOCK is the number of bytes that are allocated in
302  * a single block in a number of places throughout NTL (for
303  * vec_ZZ_p, ZZVec, vec_GF2X, and GF2XVec).
304  */
305 
306 
307 #define NTL_ULONG_TO_LONG(a) \
308    ((((unsigned long) a) >> (NTL_BITS_PER_LONG-1)) ? \
309     (((long) (((unsigned long) a) ^ ((unsigned long) NTL_MIN_LONG))) ^ \
310        NTL_MIN_LONG) : \
311     ((long) a))
312 
313 /*
314  * This macro converts from unsigned long to signed long.  It is portable
315  * among platforms for which a long has a 2's complement representation
316  * of the same width as an unsigned long.  While it avoids assumptions
317  * about the behavior of non-standard conversions,  a good optimizing
318  * compiler should turn it into the identity function.
319  */
320 
321 
322 #define NTL_UINT_TO_INT(a) \
323    ((((unsigned int) a) >> (NTL_BITS_PER_INT-1)) ? \
324     (((int) (((unsigned int) a) ^ ((unsigned int) NTL_MIN_INT))) ^ \
325        NTL_MIN_INT) : \
326     ((int) a))
327 
328 /*
329  * This macro converts from unsigned int to signed int.  It is portable
330  * among platforms for which an int has a 2's complement representation
331  * of the same width as an unsigned int.  While it avoids assumptions
332  * about the behavior of non-standard conversions,  a good optimizing
333  * compiler should turn it into the identity function.
334  */
335 
336 
337 #ifdef NTL_THREADS
338 
339 #define NTL_THREAD_LOCAL thread_local
340 
341 #ifdef __GNUC__
342 #define NTL_CHEAP_THREAD_LOCAL __thread
343 #else
344 #define NTL_CHEAP_THREAD_LOCAL thread_local
345 #endif
346 
347 #else
348 
349 #define NTL_THREAD_LOCAL
350 #define NTL_CHEAP_THREAD_LOCAL
351 
352 #endif
353 
354 
355 #define NTL_RELEASE_THRESH (128)
356 
357 /*
358  * threshold for releasing scratch memory.
359  */
360 
361 
362 
363 double _ntl_GetWallTime();
364 
365 
366 long _ntl_IsFinite(double *p);
367 /* This forces a double into memory, and tests if it is "normal";
368    that means, not NaN, not +/- infinity, not denormalized, etc.
369    Forcing into memory is sometimes necessary on machines
370    with "extended" double precision registers (e.g., Intel x86s)
371    to force the standard IEEE format. */
372 
373 void _ntl_ForceToMem(double *p);
374 /* This is do-nothing routine that has the effect of forcing
375    a double into memory (see comment above). */
376 
377 double _ntl_ldexp(double x, long e);
378 
379 
380 #define NTL_DEFINE_SWAP(T)\
381 inline void _ntl_swap(T& a, T& b)\
382 {\
383    T t = a; a = b; b = t;\
384 }
385 
386 NTL_DEFINE_SWAP(long)
NTL_DEFINE_SWAP(int)387 NTL_DEFINE_SWAP(int)
388 NTL_DEFINE_SWAP(short)
389 NTL_DEFINE_SWAP(char)
390 
391 NTL_DEFINE_SWAP(unsigned long)
392 NTL_DEFINE_SWAP(unsigned int)
393 NTL_DEFINE_SWAP(unsigned short)
394 NTL_DEFINE_SWAP(unsigned char)
395 
396 NTL_DEFINE_SWAP(double)
397 NTL_DEFINE_SWAP(float)
398 
399 
400 template<class T>
401 void _ntl_swap(T*& a, T*& b)
402 {
403    T* t = a; a = b; b = t;
404 }
405 
406 /* These are convenience routines.  I don't want it to overload
407    the std library's swap function, nor do I want to rely on the latter,
408    as the C++ standard is kind of broken on the issue of where
409    swap is defined. And I also only want it defined for built-in types.
410  */
411 
412 
413 // The following do for "move" what the above does for swap
414 
415 #define NTL_DEFINE_SCALAR_MOVE(T)\
416 inline T _ntl_scalar_move(T& a)\
417 {\
418    T t = a; a = 0; return t;\
419 }
420 
421 NTL_DEFINE_SCALAR_MOVE(long)
NTL_DEFINE_SCALAR_MOVE(int)422 NTL_DEFINE_SCALAR_MOVE(int)
423 NTL_DEFINE_SCALAR_MOVE(short)
424 NTL_DEFINE_SCALAR_MOVE(char)
425 
426 NTL_DEFINE_SCALAR_MOVE(unsigned long)
427 NTL_DEFINE_SCALAR_MOVE(unsigned int)
428 NTL_DEFINE_SCALAR_MOVE(unsigned short)
429 NTL_DEFINE_SCALAR_MOVE(unsigned char)
430 
431 NTL_DEFINE_SCALAR_MOVE(double)
432 NTL_DEFINE_SCALAR_MOVE(float)
433 
434 
435 template<class T>
436 T* _ntl_scalar_move(T*& a)
437 {
438    T *t = a; a = 0; return t;
439 }
440 
441 
442 
443 
444 
445 // The following routine increments a pointer so that
446 // it is properly aligned.
447 // It is assumed that align > 0.
448 // If align is a constant power of 2, it compiles
449 // into a small handful of simple instructions.
450 
451 #if (NTL_BIG_POINTERS)
452 
453 #define NTL_UPTRINT_T unsigned long long
454 // DIRT: this should really be std::uintptr_t, defined
455 // in <cstdint>; however, that header is not widely available,
456 // and even if it were, std::uintptr_t is not guaranteed
457 // to be defined.  Of course, unsigned long long may not
458 // be defined in pre-C++11.
459 
460 #else
461 
462 #define NTL_UPTRINT_T unsigned long
463 
464 #endif
465 
466 
467 #ifdef NTL_HAVE_ALIGNED_ARRAY
468 
469 inline
_ntl_make_aligned(char * p,long align)470 char *_ntl_make_aligned(char *p, long align)
471 {
472    unsigned long r =  (unsigned long) (((NTL_UPTRINT_T) (p)) % ((NTL_UPTRINT_T) (align)));
473    return p + ((((unsigned long) (align)) - r) % ((unsigned long) (align)));
474 }
475 
476 #else
477 
478 
479 inline
_ntl_make_aligned(char * p,long align)480 char *_ntl_make_aligned(char *p, long align)
481 {
482    return p;
483 }
484 
485 
486 #endif
487 
488 
489 
490 
491 
492 // The following is for aligning small local arrays
493 // Equivalent to type x[n], but aligns to align bytes
494 // Only works for POD types
495 // NOTE: the gcc aligned attribute might work, but there is
496 // some chatter on the web that this was (at some point) buggy.
497 // Not clear what the current status is.
498 // Anyway, this is only intended for use with gcc on intel
499 // machines, so it should be OK.
500 
501 
502 #define NTL_ALIGNED_LOCAL_ARRAY(align, x, type, n) \
503    char x##__ntl_hidden_variable_storage[n*sizeof(type)+align]; \
504    type *x = (type *) _ntl_make_aligned(&x##__ntl_hidden_variable_storage[0], align);
505 
506 
507 #define NTL_AVX_BYTE_ALIGN (32)
508 #define NTL_AVX_DBL_ALIGN (NTL_AVX_BYTE_ALIGN/long(sizeof(double)))
509 
510 #define NTL_AVX_LOCAL_ARRAY(x, type, n) NTL_ALIGNED_LOCAL_ARRAY(NTL_AVX_BYTE_ALIGN, x, type, n)
511 
512 #define NTL_AVX512_BYTE_ALIGN (64)
513 
514 #define NTL_AVX512_LOCAL_ARRAY(x, type, n) NTL_ALIGNED_LOCAL_ARRAY(NTL_AVX512_BYTE_ALIGN, x, type, n)
515 
516 
517 #define NTL_DEFAULT_ALIGN (64)
518 // this should be big enough to satisfy any SIMD instructions,
519 // and it should also be as big as a cache line
520 
521 
522 
523 #ifdef NTL_HAVE_BUILTIN_CLZL
524 
525 inline long
_ntl_count_bits(unsigned long x)526 _ntl_count_bits(unsigned long x)
527 {
528    return x ? (NTL_BITS_PER_LONG - __builtin_clzl(x)) : 0;
529 }
530 
531 #else
532 
533 inline long
_ntl_count_bits(unsigned long x)534 _ntl_count_bits(unsigned long x)
535 {
536    if (!x) return 0;
537 
538    long res = NTL_BITS_PER_LONG;
539    while (x < (1UL << (NTL_BITS_PER_LONG-1))) {
540       x <<= 1;
541       res--;
542    }
543 
544    return res;
545 }
546 
547 #endif
548 
549 
550 
551 
552 #if (!defined(NTL_CLEAN_INT) && NTL_ARITH_RIGHT_SHIFT && (NTL_BITS_PER_LONG == (1 << (NTL_NUMBITS_BPL-1))))
553 
554 
555 
556 inline void
_ntl_bpl_divrem(long a,long & q,long & r)557 _ntl_bpl_divrem(long a, long& q, long& r)
558 {
559    q = a >> (NTL_NUMBITS_BPL-1);
560    r = a & (NTL_BITS_PER_LONG-1);
561 }
562 
563 #else
564 
565 inline void
_ntl_bpl_divrem(long a,long & q,long & r)566 _ntl_bpl_divrem(long a, long& q, long& r)
567 {
568    q = a / NTL_BITS_PER_LONG;
569    r = a % NTL_BITS_PER_LONG;
570    if (r < 0) {
571       q--;
572       r += NTL_BITS_PER_LONG;
573    }
574 }
575 
576 #endif
577 
578 inline void
_ntl_bpl_divrem(unsigned long a,long & q,long & r)579 _ntl_bpl_divrem(unsigned long a, long& q, long& r)
580 {
581    q = a / NTL_BITS_PER_LONG;
582    r = a % NTL_BITS_PER_LONG;
583 }
584 
585 
586 // vectors are grown by a factor of 1.5
_ntl_vec_grow(long n)587 inline long _ntl_vec_grow(long n)
588 { return n + n/2; }
589 
590 
591 template <class T>
592 struct _ntl_is_char_pointer
593 {
594  enum {value = false};
595 };
596 
597 template <>
598 struct _ntl_is_char_pointer<char*>
599 {
600  enum {value = true};
601 };
602 
603 template <>
604 struct _ntl_is_char_pointer<const char*>
605 {
606  enum {value = true};
607 };
608 
609 template <bool, typename T = void>
610 struct _ntl_enable_if
611 {};
612 
613 template <typename T>
614 struct _ntl_enable_if<true, T> {
615   typedef T type;
616 };
617 
618 
619 
620 
621 
622 #endif
623