1 /*
2 Copyright (C) 2010, 2013 William Hart
3 Copyright (C) 2010 Brian Gladman
4
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11 this list of conditions and the following disclaimer.
12
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
21 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #ifndef HELPER_H
30 #define HELPER_H
31
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <limits.h>
36 #include <assert.h>
37 #include <sys/param.h> /* for BSD define */
38
39 #if !defined(BSD) && !defined(__MINGW64__) && !defined(__MINGW32__)
40 /* MinGW and FreeBSD have alloca, but not alloca.h */
41 #include <alloca.h>
42 #endif
43 #if defined(__MINGW32__)
44 #include <malloc.h> /* for alloca on MinGW */
45 #endif
46
47 #include "config.h"
48 #include "types_arch.h"
49 #include "tuning.h"
50
51 #ifdef __cplusplus
52 extern "C" {
53 #endif
54
55 #ifndef HAVE_ARCH_types
56
57 typedef unsigned long word_t;
58 typedef long sword_t;
59 typedef long len_t;
60 typedef long bits_t;
61
62 #define LEN_MAX LONG_MAX
63 #define BITS_MAX LONG_MAX
64
65 #define WORD_FMT "%l"
66 #define LEN_FMT "%ld"
67 #define BITS_FMT "%ld"
68
69 #if ULONG_MAX == 4294967295U /* 32 bit unsigned long */
70
71 typedef unsigned int dword_t __attribute__((mode(DI)));
72 #define WORD_BITS 32
73 #define WORD(x) (x##UL)
74
75 #else /* 64 bit unsigned long */
76
77 typedef unsigned int dword_t __attribute__((mode(TI)));
78 #define WORD_BITS 64
79 #define WORD(x) (x##UL)
80
81 #endif
82
83 #endif
84
85 #if WANT_ASSERT
86 #define ASSERT assert
87 #define ASSERT_ALWAYS assert
88 #else
89 #define ASSERT(xxx)
90 #define ASSERT_ALWAYS(xxx) xxx
91 #endif
92
93 #define BSDNT_ABS(x) \
94 ((x) < 0 ? (-x) : (x))
95
96 #define BSDNT_MIN(x, y) \
97 ((x) <= (y) ? (x) : (y))
98
99 #define BSDNT_MAX(x, y) \
100 ((x) >= (y) ? (x) : (y))
101
102
103 #define TYPED_SWAP(tt, a, b) \
104 do { \
105 tt __t = a; \
106 a = b; \
107 b = __t; \
108 } while (0)
109
110 #define BSDNT_SWAP(a, b) \
111 TYPED_SWAP(len_t, a, b)
112
113 typedef word_t * nn_t;
114 typedef const word_t * nn_src_t;
115
116 typedef word_t preinv1_t;
117
118 typedef word_t preinv2_t;
119
120 typedef word_t hensel_preinv1_t;
121
122 typedef struct mod_preinv1_t
123 {
124 word_t b1; /* B mod d */
125 word_t b2; /* B^2 mod d */
126 word_t b3; /* B^3 mod d */
127 } mod_preinv1_t;
128
129 #define TMP_INIT \
130 typedef struct __tmp_struct { \
131 void * block; \
132 struct __tmp_struct * next; \
133 } __tmp_t; \
134 __tmp_t * __tmp_root; \
135 __tmp_t * __t
136
137 #define TMP_START \
138 __tmp_root = NULL
139
140 #define TMP_ALLOC_BYTES(size) \
141 ((size) > 8192 ? \
142 (__t = (__tmp_t *) alloca(sizeof(__tmp_t)), \
143 __t->next = __tmp_root, \
144 __tmp_root = __t, \
145 __t->block = malloc(size)) : \
146 alloca(size))
147
148 #define TMP_ALLOC(size) \
149 TMP_ALLOC_BYTES(sizeof(word_t)*(size))
150
151 #define TMP_END \
152 while (__tmp_root) { \
153 free(__tmp_root->block); \
154 __tmp_root = __tmp_root->next; \
155 }
156
157 /*
158 Send the given error message to stderr.
159 */
160 void talker(const char * str);
161
162 #include "helper_arch.h"
163 #include "rand.h"
164
165 /**********************************************************************
166
167 Helper functions/macros
168
169 **********************************************************************/
170
171 #ifndef HAVE_ARCH_INTRINSICS
172
173 #define high_zero_bits __builtin_clzl
174 #define low_zero_bits __builtin_ctzl
175 #define popcount_bits __builtin_popcountl
176
177 #endif
178
179 #ifndef HAVE_ARCH_divapprox21_preinv1
180
181 /*
182 Given a double word u, a normalised divisor d and a precomputed
183 inverse dinv of d, computes an approximate quotient of u by d.
184 The quotient will be either correct, or up to 3 less than the
185 actual value.
186 */
187 #define divapprox21_preinv1(q, u_hi, u_lo, d, dinv) \
188 do { \
189 dword_t __q1 = (dword_t) u_hi * (dword_t) (dinv) \
190 + (((dword_t) u_hi) << WORD_BITS) + (dword_t) u_lo; \
191 const dword_t __q0 = (dword_t) u_lo * (dword_t) (dinv); \
192 __q1 += (dword_t) ((__q0) >> WORD_BITS); \
193 (q) = (__q1 >> WORD_BITS); \
194 } while (0)
195
196 #endif
197
198 #ifndef HAVE_ARCH_divrem21_preinv1
199
200 /*
201 Given a double word u, a normalised divisor d and a precomputed
202 inverse dinv of d, computes the quotient and remainder of u by d.
203 */
204 #define divrem21_preinv1(q, r, u_hi, u_lo, d, dinv) \
205 do { \
206 const dword_t __u = (((dword_t) u_hi) << WORD_BITS) + (dword_t) u_lo; \
207 dword_t __r, __q1 = (dword_t) u_hi * (dword_t) (dinv) + __u; \
208 const dword_t __q0 = (dword_t) u_lo * (dword_t) (dinv); \
209 __q1 += (dword_t) ((__q0) >> WORD_BITS); \
210 (q) = (__q1 >> WORD_BITS); \
211 __r = __u - (dword_t) (d) * (dword_t) (q); \
212 while ((word_t) (__r >> WORD_BITS) || ((word_t) __r >= (d))) \
213 { \
214 __r -= (dword_t) (d); \
215 (q)++; \
216 } \
217 (r) = (word_t) __r; \
218 } while (0)
219
220 #endif
221
222 #ifndef HAVE_ARCH_precompute_inverse1
223
224 /*
225 Precomputes an inverse of d. The value of the inverse is
226 2^WORD_BITS / (d + 1) - 2^(WORD_BITS). We assume d is
227 normalised, i.e. the most significant bit of d is set.
228 */
229 static inline
precompute_inverse1(word_t d)230 preinv1_t precompute_inverse1(word_t d)
231 {
232 ASSERT(d != 0);
233
234 d++;
235
236 if (d == 0)
237 return 0;
238
239 return (word_t) ((((dword_t) -d) << WORD_BITS) / (dword_t) d);
240 }
241
242 #endif
243
244 /*
245 Precomputes an inverse of a two limb value d. The value
246 of the inverse is 2^{3*WORD_BITS} / (d + 1) - 2^{WORD_BITS}.
247 We assume d is normalised, i.e. the most significant bit of d
248 is set.
249 */
250 preinv2_t precompute_inverse2(word_t d1, word_t d2);
251
252 #ifndef HAVE_ARCH_divapprox21_preinv2
253
254 #define divapprox21_preinv2(q, a_hi, a_lo, dinv) \
255 do { \
256 const dword_t __a = ((dword_t) (a_hi) << WORD_BITS) + (dword_t) (a_lo); \
257 dword_t __q2 = (dword_t) (a_hi) * (dword_t) (dinv); \
258 dword_t __q3 = (dword_t) (a_lo) * (dword_t) (dinv); \
259 __q2 += (__q3 >> WORD_BITS) + __a; \
260 (q) = (word_t) (__q2 >> WORD_BITS); \
261 } while (0)
262
263 #endif
264
265 #ifndef HAVE_ARCH_divrem21_preinv2
266
267 #define divrem21_preinv2(q, a_hi, a_lo, d1, d2, dinv) \
268 do { \
269 dword_t __a = ((dword_t) (a_hi) << WORD_BITS) + (dword_t) (a_lo); \
270 const dword_t __d = ((dword_t) (d1) << WORD_BITS) + (dword_t) (d2); \
271 dword_t __q2 = (dword_t) (a_hi) * (dword_t) (dinv); \
272 dword_t __q3 = (dword_t) (a_lo) * (dword_t) (dinv); \
273 q2 += (__q3 >> WORD_BITS) + __a; \
274 (q) = (word_t) (q2 >> WORD_BITS); \
275 __a -= (dword_t) (q) * (dword_t) (d1); \
276 __a -= (((dword_t) (q) * (dword_t) (d2)) >> WORD_BITS); \
277 if (__a > (dword_t) d1) __a -= (dword_t) d1, (q)++; \
278 (a_lo) = (word_t) __a; \
279 (a_hi) = (word_t) (__a >> WORD_BITS); \
280 } while (0)
281
282 #endif
283
284 #ifndef HAVE_ARCH_precompute_mod_inverse1
285
286 /*
287 Precomputes B, B^2, B^3 mod d. Requires that d is not zero.
288 */
289 static inline
precompute_mod_inverse1(mod_preinv1_t * inv,word_t d)290 void precompute_mod_inverse1(mod_preinv1_t * inv, word_t d)
291 {
292 dword_t u = (dword_t) 1;
293
294 ASSERT(d != 0);
295
296 u = (u << WORD_BITS) % (dword_t) d;
297 inv->b1 = (word_t) u;
298 u = (u << WORD_BITS) % (dword_t) d;
299 inv->b2 = (word_t) u;
300 u = (u << WORD_BITS) % (dword_t) d;
301 inv->b3 = (word_t) u;
302 }
303
304 #endif
305
306 #ifndef HAVE_ARCH_precompute_hensel_inverse1
307
308 /*
309 Precomputes a Hensel inverse of d, i.e. a value dinv such that
310 d * dinv = 1 mod B. The algorithm is via Hensel lifting.
311 Requires that d is odd.
312 */
313 static inline
precompute_hensel_inverse1(hensel_preinv1_t * inv,word_t d)314 void precompute_hensel_inverse1(hensel_preinv1_t * inv, word_t d)
315 {
316 word_t v = 1; /* initial solution modulo 2 */
317 word_t u;
318
319 ASSERT(d & (word_t) 1);
320
321 while ((u = d * v) != 1)
322 v += (1 - u) * v;
323
324 (*inv) = v;
325 }
326
327 #endif
328
329 /**********************************************************************
330
331 Printing functions
332
333 **********************************************************************/
334
335 /*
336 Print a word in hexacdecimal.
337 */
338 void printx_word(word_t a);
339
340 /*
341 Print a string containing format specifiers. This is identical to
342 printf with the exception of the additional format specifiers
343 %w : print a word_t
344 %m : print a len_t
345 %b : print a bits_t
346 */
347 void bsdnt_printf(const char * str, ...);
348
349 #ifdef __cplusplus
350 }
351 #endif
352
353 #endif
354