1
2 #ifndef NTL_ctools__H
3 #define NTL_ctools__H
4
5 #include <NTL/config.h>
6 #include <NTL/mach_desc.h>
7
8 #include <NTL/ALL_FEATURES.h>
9
10 #include <NTL/PackageInfo.h>
11
12 #if (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
13 #define NTL_GNUC_INTEL
14 #endif
15
16 #if (!defined(NTL_HAVE_LL_TYPE) && defined(NTL_WINPACK) && (defined(_MSC_VER) || defined(NTL_GNUC_INTEL)))
17 // for the windows distribution,
18 // we assume LL_TYPE works for MSVC++ (which is true for both x86 and ARM)
19 // and for GNUC/Intel platforms (e.g., Code Blocks)
20 #define NTL_HAVE_LL_TYPE
21 #endif
22
23 // Define the working C++ standard.
24 // Both NTL_STD_CXX14 and NTL_STD_CXX11, and we take the highest one
25
26 #if defined(NTL_STD_CXX14)
27 #define NTL_CXX_STANDARD (2014)
28 #elif defined(NTL_STD_CXX11)
29 #define NTL_CXX_STANDARD (2011)
30 #else
31 #define NTL_CXX_STANDARD (1998)
32 #endif
33
34 // define some macros regarding noexcept declarations
35
36 #if (NTL_CXX_STANDARD >= 2011)
37
38 #define NTL_NOEXCEPT noexcept
39
40 #ifdef NTL_EXCEPTIONS
41 #define NTL_FAKE_NOEXCEPT
42 #else
43 #define NTL_FAKE_NOEXCEPT noexcept
44 #endif
45
46 #else
47
48 #define NTL_NOEXCEPT
49 #define NTL_FAKE_NOEXCEPT
50
51 #endif
52
53
54 /*
55 * Resolve double-word integer type.
56 *
57 * Unfortunately, there is no "standard" way to do this.
58 * On 32-bit machines, 'long long' usually works (but not
59 * on MSVC++ or BORLAND), and on 64-bit machines, there is
60 * no standard. However, most compilers do offer *some*
61 * non-standard double-word type.
62 *
63 * Note that C99 creates a standard header <stdint.h>,
64 * but it is not clear how widely this is implemented,
65 * and for example, older versions of GCC does not provide a type int128_t
66 * in <stdint.h> on 64-bit machines.
67 */
68
69
70
71 #if (defined(NTL_UNSIGNED_LONG_LONG_TYPE))
72
73 #define NTL_ULL_TYPE NTL_UNSIGNED_LONG_LONG_TYPE
74
75 #elif (NTL_BITS_PER_LONG == 64 && defined(__GNUC__))
76
77 #define NTL_ULL_TYPE __uint128_t
78
79 #elif (NTL_BITS_PER_LONG == 32 && (defined(_MSC_VER) || defined(__BORLANDC__)))
80
81 #define NTL_ULL_TYPE unsigned __int64
82
83 #elif (NTL_BITS_PER_LONG == 64 && (defined(_MSC_VER) || defined(__BORLANDC__)))
84
85 #define NTL_ULL_TYPE unsigned __int128
86
87 #endif
88
89 #if (!defined(NTL_ULL_TYPE))
90
91 #define NTL_ULL_TYPE unsigned long long
92
93 #endif
94
95
96 #ifdef NTL_HAVE_LL_TYPE
97
98 typedef NTL_ULL_TYPE _ntl_ulonglong;
99 // typenames are more convenient than macros
100
101 #else
102
103 #undef NTL_ULL_TYPE
104 // prevent any use of these macros
105
_ntl_ulonglong()106 class _ntl_ulonglong { private: _ntl_ulonglong() { } };
107 // cannot create variables of these types
108
109
110 #endif
111
112 /********************************************************/
113
114
115
116 // Define an unsigned type with at least 32 bits
117 // there is no truly portable way to do this, yet...
118
119
120 #if (NTL_BITS_PER_INT >= 32)
121
122 typedef unsigned int _ntl_uint32; // 32-bit word
123 #define NTL_BITS_PER_INT32 NTL_BITS_PER_INT
124
125 #else
126
127 // NOTE: C++ standard guarantees longs are at least 32-bits wide,
128 // and this is also explicitly checked at builod time
129
130 typedef unsigned long _ntl_uint32; // 32-bit word
131 #define NTL_BITS_PER_INT32 NTL_BITS_PER_LONG
132
133 #endif
134
135
136
137 // The usual token pasting stuff...
138
139 #define NTL_PASTE_TOKENS2(a,b) a ## b
140 #define NTL_PASTE_TOKENS(a,b) NTL_PASTE_TOKENS2(a,b)
141
142 #define NTL_STRINGIFY(x) NTL_STRINGIFY_AUX(x)
143 #define NTL_STRINGIFY_AUX(x) #x
144
145
146
147
148
149
150 #define NTL_OVFBND (1L << (NTL_BITS_PER_LONG-4))
151
152 /*
153 * NTL_OVFBND is the general bound used throughout NTL to keep various
154 * integer values comfortably bounded away from an integer overflow
155 * condition. Do not change this value!
156 */
157
158
159
160
161
162 #if ((NTL_BITS_PER_SIZE_T-1) < (NTL_BITS_PER_LONG-4))
163 #define NTL_OVFBND1 (1L << (NTL_BITS_PER_SIZE_T-1))
164 #else
165 #define NTL_OVFBND1 NTL_OVFBND
166 #endif
167
168 /*
169 * NTL_OVFBND1 is a smaller bound than NTL_OVF when size_t is
170 * narrower than long. This prevents overflow on calls to malloc
171 * and realloc.
172 */
173
174
175
176
177
178
179 #define NTL_OVERFLOW(n, a, b) \
180 (((b) >= NTL_OVFBND) || (((long) (n)) > 0 && (((a) >= NTL_OVFBND) || \
181 (((long) (n)) >= (NTL_OVFBND-((long)(b))+((long)(a))-1)/((long)(a))))))
182
183 /*
184 * NTL_OVERFLOW(n, a, b) returns 1 if n*a + b >= NTL_OVFBND,
185 * and returns 0 otherwise. The value n is effectively treated as type long,
186 * while the values a and b may be *any* integral type. It is assumed that
187 * n >= 0, a > 0, and b >= 0. Care is taken to ensure that overflow does
188 * not occur. If a and b are constants, and n has no side effects,
189 * a good optimizing compiler will * translate this into a single test
190 * of the form n >= c, where c is a constant.
191 */
192
193
194
195
196
197
198 #define NTL_OVERFLOW1(n, a, b) \
199 (((b) >= NTL_OVFBND1) || (((long) (n)) > 0 && (((a) >= NTL_OVFBND1) || \
200 (((long) (n)) >= (NTL_OVFBND1-((long)(b))+((long)(a))-1)/((long)(a))))))
201
202 /*
203 * NTL_OVERFLOW1 is the same as NTL_OVERFLOW, except that it uses the
204 * bound NTL_OVFBND1 instead of NTL_OVFBND.
205 */
206
207
208
209
210 #ifdef NTL_TEST_EXCEPTIONS
211
212 extern unsigned long exception_counter;
213
214 #define NTL_BASIC_MALLOC(n, a, b) \
215 (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
216 ((void *) malloc(((long)(n))*((long)(a)) + ((long)(b)))))
217
218 #define NTL_MALLOC(n, a, b) \
219 (--exception_counter == 0 ? (void *) 0 : NTL_BASIC_MALLOC(n, a, b))
220
221 #else
222
223 #define NTL_MALLOC(n, a, b) \
224 (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
225 ((void *) malloc(((long)(n))*((long)(a)) + ((long)(b)))))
226
227
228 #endif
229
230 /*
231 * NTL_MALLOC(n, a, b) returns 0 if a*n + b >= NTL_OVFBND1, and otherwise
232 * returns malloc(n*a + b).
233 * The programmer must ensure that the name "malloc" is visible
234 * at the point in the source code where this macro is expanded.
235 */
236
237
238 #ifdef NTL_TEST_EXCEPTIONS
239
240 #define NTL_BASIC_SNS_MALLOC(n, a, b) \
241 (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
242 ((void *) NTL_SNS malloc(((long)(n))*((long)(a)) + ((long)(b)))))
243
244
245 #define NTL_SNS_MALLOC(n, a, b) \
246 (--exception_counter == 0 ? (void *) 0 : NTL_BASIC_SNS_MALLOC(n, a, b))
247
248
249 #else
250
251 #define NTL_SNS_MALLOC(n, a, b) \
252 (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
253 ((void *) NTL_SNS malloc(((long)(n))*((long)(a)) + ((long)(b)))))
254
255 #endif
256
257 /*
258 * NTL_SNS_MALLOC is the same as NTL_MALLOC, except that the call
259 * to malloc is prefixed by NTL_SNS.
260 */
261
262
263
264
265
266
267
268
269 #define NTL_REALLOC(p, n, a, b) \
270 (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
271 ((void *) realloc((p), ((long)(n))*((long)(a)) + ((long)(b)))))
272
273 /*
274 * NTL_REALLOC(n, a, b) returns 0 if a*n + b >= NTL_OVFBND1, and otherwise
275 * returns realloc(p, n*a + b).
276 * The programmer must ensure that the name "realloc" is visible
277 * at the point in the source code where this macro is expanded.
278 */
279
280
281
282
283
284
285 #define NTL_SNS_REALLOC(p, n, a, b) \
286 (NTL_OVERFLOW1(n, a, b) ? ((void *) 0) : \
287 ((void *) NTL_SNS realloc((p), ((long)(n))*((long)(a)) + ((long)(b)))))
288
289 /*
290 * NTL_SNS_REALLOC is the same as NTL_REALLOC, except that the call
291 * to realloc is prefixed by NTL_SNS.
292 */
293
294
295
296
297
298 #define NTL_MAX_ALLOC_BLOCK (40000)
299
300 /*
301 * NTL_MAX_ALLOC_BLOCK is the number of bytes that are allocated in
302 * a single block in a number of places throughout NTL (for
303 * vec_ZZ_p, ZZVec, vec_GF2X, and GF2XVec).
304 */
305
306
307 #define NTL_ULONG_TO_LONG(a) \
308 ((((unsigned long) a) >> (NTL_BITS_PER_LONG-1)) ? \
309 (((long) (((unsigned long) a) ^ ((unsigned long) NTL_MIN_LONG))) ^ \
310 NTL_MIN_LONG) : \
311 ((long) a))
312
313 /*
314 * This macro converts from unsigned long to signed long. It is portable
315 * among platforms for which a long has a 2's complement representation
316 * of the same width as an unsigned long. While it avoids assumptions
317 * about the behavior of non-standard conversions, a good optimizing
318 * compiler should turn it into the identity function.
319 */
320
321
322 #define NTL_UINT_TO_INT(a) \
323 ((((unsigned int) a) >> (NTL_BITS_PER_INT-1)) ? \
324 (((int) (((unsigned int) a) ^ ((unsigned int) NTL_MIN_INT))) ^ \
325 NTL_MIN_INT) : \
326 ((int) a))
327
328 /*
329 * This macro converts from unsigned int to signed int. It is portable
330 * among platforms for which an int has a 2's complement representation
331 * of the same width as an unsigned int. While it avoids assumptions
332 * about the behavior of non-standard conversions, a good optimizing
333 * compiler should turn it into the identity function.
334 */
335
336
337 #ifdef NTL_THREADS
338
339 #define NTL_THREAD_LOCAL thread_local
340
341 #ifdef __GNUC__
342 #define NTL_CHEAP_THREAD_LOCAL __thread
343 #else
344 #define NTL_CHEAP_THREAD_LOCAL thread_local
345 #endif
346
347 #else
348
349 #define NTL_THREAD_LOCAL
350 #define NTL_CHEAP_THREAD_LOCAL
351
352 #endif
353
354
355 #define NTL_RELEASE_THRESH (128)
356
357 /*
358 * threshold for releasing scratch memory.
359 */
360
361
362
363 double _ntl_GetWallTime();
364
365
366 long _ntl_IsFinite(double *p);
367 /* This forces a double into memory, and tests if it is "normal";
368 that means, not NaN, not +/- infinity, not denormalized, etc.
369 Forcing into memory is sometimes necessary on machines
370 with "extended" double precision registers (e.g., Intel x86s)
371 to force the standard IEEE format. */
372
373 void _ntl_ForceToMem(double *p);
374 /* This is do-nothing routine that has the effect of forcing
375 a double into memory (see comment above). */
376
377 double _ntl_ldexp(double x, long e);
378
379
380 #define NTL_DEFINE_SWAP(T)\
381 inline void _ntl_swap(T& a, T& b)\
382 {\
383 T t = a; a = b; b = t;\
384 }
385
386 NTL_DEFINE_SWAP(long)
NTL_DEFINE_SWAP(int)387 NTL_DEFINE_SWAP(int)
388 NTL_DEFINE_SWAP(short)
389 NTL_DEFINE_SWAP(char)
390
391 NTL_DEFINE_SWAP(unsigned long)
392 NTL_DEFINE_SWAP(unsigned int)
393 NTL_DEFINE_SWAP(unsigned short)
394 NTL_DEFINE_SWAP(unsigned char)
395
396 NTL_DEFINE_SWAP(double)
397 NTL_DEFINE_SWAP(float)
398
399
400 template<class T>
401 void _ntl_swap(T*& a, T*& b)
402 {
403 T* t = a; a = b; b = t;
404 }
405
406 /* These are convenience routines. I don't want it to overload
407 the std library's swap function, nor do I want to rely on the latter,
408 as the C++ standard is kind of broken on the issue of where
409 swap is defined. And I also only want it defined for built-in types.
410 */
411
412
413 // The following do for "move" what the above does for swap
414
415 #define NTL_DEFINE_SCALAR_MOVE(T)\
416 inline T _ntl_scalar_move(T& a)\
417 {\
418 T t = a; a = 0; return t;\
419 }
420
421 NTL_DEFINE_SCALAR_MOVE(long)
NTL_DEFINE_SCALAR_MOVE(int)422 NTL_DEFINE_SCALAR_MOVE(int)
423 NTL_DEFINE_SCALAR_MOVE(short)
424 NTL_DEFINE_SCALAR_MOVE(char)
425
426 NTL_DEFINE_SCALAR_MOVE(unsigned long)
427 NTL_DEFINE_SCALAR_MOVE(unsigned int)
428 NTL_DEFINE_SCALAR_MOVE(unsigned short)
429 NTL_DEFINE_SCALAR_MOVE(unsigned char)
430
431 NTL_DEFINE_SCALAR_MOVE(double)
432 NTL_DEFINE_SCALAR_MOVE(float)
433
434
435 template<class T>
436 T* _ntl_scalar_move(T*& a)
437 {
438 T *t = a; a = 0; return t;
439 }
440
441
442
443
444
445 // The following routine increments a pointer so that
446 // it is properly aligned.
447 // It is assumed that align > 0.
448 // If align is a constant power of 2, it compiles
449 // into a small handful of simple instructions.
450
451 #if (NTL_BIG_POINTERS)
452
453 #define NTL_UPTRINT_T unsigned long long
454 // DIRT: this should really be std::uintptr_t, defined
455 // in <cstdint>; however, that header is not widely available,
456 // and even if it were, std::uintptr_t is not guaranteed
457 // to be defined. Of course, unsigned long long may not
458 // be defined in pre-C++11.
459
460 #else
461
462 #define NTL_UPTRINT_T unsigned long
463
464 #endif
465
466
467 #ifdef NTL_HAVE_ALIGNED_ARRAY
468
469 inline
_ntl_make_aligned(char * p,long align)470 char *_ntl_make_aligned(char *p, long align)
471 {
472 unsigned long r = (unsigned long) (((NTL_UPTRINT_T) (p)) % ((NTL_UPTRINT_T) (align)));
473 return p + ((((unsigned long) (align)) - r) % ((unsigned long) (align)));
474 }
475
476 #else
477
478
479 inline
_ntl_make_aligned(char * p,long align)480 char *_ntl_make_aligned(char *p, long align)
481 {
482 return p;
483 }
484
485
486 #endif
487
488
489
490
491
492 // The following is for aligning small local arrays
493 // Equivalent to type x[n], but aligns to align bytes
494 // Only works for POD types
495 // NOTE: the gcc aligned attribute might work, but there is
496 // some chatter on the web that this was (at some point) buggy.
497 // Not clear what the current status is.
498 // Anyway, this is only intended for use with gcc on intel
499 // machines, so it should be OK.
500
501
502 #define NTL_ALIGNED_LOCAL_ARRAY(align, x, type, n) \
503 char x##__ntl_hidden_variable_storage[n*sizeof(type)+align]; \
504 type *x = (type *) _ntl_make_aligned(&x##__ntl_hidden_variable_storage[0], align);
505
506
507 #define NTL_AVX_BYTE_ALIGN (32)
508 #define NTL_AVX_DBL_ALIGN (NTL_AVX_BYTE_ALIGN/long(sizeof(double)))
509
510 #define NTL_AVX_LOCAL_ARRAY(x, type, n) NTL_ALIGNED_LOCAL_ARRAY(NTL_AVX_BYTE_ALIGN, x, type, n)
511
512 #define NTL_AVX512_BYTE_ALIGN (64)
513
514 #define NTL_AVX512_LOCAL_ARRAY(x, type, n) NTL_ALIGNED_LOCAL_ARRAY(NTL_AVX512_BYTE_ALIGN, x, type, n)
515
516
517 #define NTL_DEFAULT_ALIGN (64)
518 // this should be big enough to satisfy any SIMD instructions,
519 // and it should also be as big as a cache line
520
521
522
523 #ifdef NTL_HAVE_BUILTIN_CLZL
524
525 inline long
_ntl_count_bits(unsigned long x)526 _ntl_count_bits(unsigned long x)
527 {
528 return x ? (NTL_BITS_PER_LONG - __builtin_clzl(x)) : 0;
529 }
530
531 #else
532
533 inline long
_ntl_count_bits(unsigned long x)534 _ntl_count_bits(unsigned long x)
535 {
536 if (!x) return 0;
537
538 long res = NTL_BITS_PER_LONG;
539 while (x < (1UL << (NTL_BITS_PER_LONG-1))) {
540 x <<= 1;
541 res--;
542 }
543
544 return res;
545 }
546
547 #endif
548
549
550
551
552 #if (!defined(NTL_CLEAN_INT) && NTL_ARITH_RIGHT_SHIFT && (NTL_BITS_PER_LONG == (1 << (NTL_NUMBITS_BPL-1))))
553
554
555
556 inline void
_ntl_bpl_divrem(long a,long & q,long & r)557 _ntl_bpl_divrem(long a, long& q, long& r)
558 {
559 q = a >> (NTL_NUMBITS_BPL-1);
560 r = a & (NTL_BITS_PER_LONG-1);
561 }
562
563 #else
564
565 inline void
_ntl_bpl_divrem(long a,long & q,long & r)566 _ntl_bpl_divrem(long a, long& q, long& r)
567 {
568 q = a / NTL_BITS_PER_LONG;
569 r = a % NTL_BITS_PER_LONG;
570 if (r < 0) {
571 q--;
572 r += NTL_BITS_PER_LONG;
573 }
574 }
575
576 #endif
577
578 inline void
_ntl_bpl_divrem(unsigned long a,long & q,long & r)579 _ntl_bpl_divrem(unsigned long a, long& q, long& r)
580 {
581 q = a / NTL_BITS_PER_LONG;
582 r = a % NTL_BITS_PER_LONG;
583 }
584
585
586 // vectors are grown by a factor of 1.5
_ntl_vec_grow(long n)587 inline long _ntl_vec_grow(long n)
588 { return n + n/2; }
589
590
591 template <class T>
592 struct _ntl_is_char_pointer
593 {
594 enum {value = false};
595 };
596
597 template <>
598 struct _ntl_is_char_pointer<char*>
599 {
600 enum {value = true};
601 };
602
603 template <>
604 struct _ntl_is_char_pointer<const char*>
605 {
606 enum {value = true};
607 };
608
609 template <bool, typename T = void>
610 struct _ntl_enable_if
611 {};
612
613 template <typename T>
614 struct _ntl_enable_if<true, T> {
615 typedef T type;
616 };
617
618
619
620
621
622 #endif
623