1 /*
2   Copyright (C) 2010, 2013 William Hart
3   Copyright (C) 2010 Brian Gladman
4 
5   All rights reserved.
6 
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions are met:
9 
10   1. Redistributions of source code must retain the above copyright notice,
11      this list of conditions and the following disclaimer.
12 
13   2. Redistributions in binary form must reproduce the above copyright
14      notice, this list of conditions and the following disclaimer in the
15 	 documentation and/or other materials provided with the distribution.
16 
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
18   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE
21   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 
29 #ifndef HELPER_H
30 #define HELPER_H
31 
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <stdio.h>
35 #include <limits.h>
36 #include <assert.h>
37 #include <sys/param.h> /* for BSD define */
38 
39 #if !defined(BSD) && !defined(__MINGW64__) && !defined(__MINGW32__)
40 /* MinGW and FreeBSD have alloca, but not alloca.h */
41 #include <alloca.h>
42 #endif
43 #if defined(__MINGW32__)
44 #include <malloc.h> /* for alloca on MinGW */
45 #endif
46 
47 #include "config.h"
48 #include "types_arch.h"
49 #include "tuning.h"
50 
51 #ifdef __cplusplus
52  extern "C" {
53 #endif
54 
55 #ifndef HAVE_ARCH_types
56 
57 typedef unsigned long word_t;
58 typedef long sword_t;
59 typedef long len_t;
60 typedef long bits_t;
61 
62 #define LEN_MAX LONG_MAX
63 #define BITS_MAX LONG_MAX
64 
65 #define WORD_FMT "%l"
66 #define LEN_FMT "%ld"
67 #define BITS_FMT "%ld"
68 
69 #if ULONG_MAX == 4294967295U /* 32 bit unsigned long */
70 
71 typedef unsigned int dword_t __attribute__((mode(DI)));
72 #define WORD_BITS 32
73 #define WORD(x) (x##UL)
74 
75 #else /* 64 bit unsigned long */
76 
77 typedef unsigned int dword_t __attribute__((mode(TI)));
78 #define WORD_BITS 64
79 #define WORD(x) (x##UL)
80 
81 #endif
82 
83 #endif
84 
85 #if WANT_ASSERT
86 #define ASSERT assert
87 #define ASSERT_ALWAYS assert
88 #else
89 #define ASSERT(xxx)
90 #define ASSERT_ALWAYS(xxx) xxx
91 #endif
92 
93 #define BSDNT_ABS(x) \
94    ((x) < 0 ? (-x) : (x))
95 
96 #define BSDNT_MIN(x, y) \
97    ((x) <= (y) ? (x) : (y))
98 
99 #define BSDNT_MAX(x, y) \
100    ((x) >= (y) ? (x) : (y))
101 
102 
103 #define TYPED_SWAP(tt, a, b) \
104    do {                      \
105       tt __t = a;            \
106       a = b;                 \
107       b = __t;               \
108    } while (0)
109 
110 #define BSDNT_SWAP(a, b) \
111    TYPED_SWAP(len_t, a, b)
112 
113 typedef word_t * nn_t;
114 typedef const word_t * nn_src_t;
115 
116 typedef word_t preinv1_t;
117 
118 typedef word_t preinv2_t;
119 
120 typedef word_t hensel_preinv1_t;
121 
122 typedef struct mod_preinv1_t
123 {
124    word_t b1; /* B   mod d */
125    word_t b2; /* B^2 mod d */
126    word_t b3; /* B^3 mod d */
127 } mod_preinv1_t;
128 
129 #define TMP_INIT \
130    typedef struct __tmp_struct { \
131       void * block; \
132       struct __tmp_struct * next; \
133    } __tmp_t; \
134    __tmp_t * __tmp_root; \
135    __tmp_t * __t
136 
137 #define TMP_START \
138    __tmp_root = NULL
139 
140 #define TMP_ALLOC_BYTES(size) \
141    ((size) > 8192 ? \
142       (__t = (__tmp_t *) alloca(sizeof(__tmp_t)), \
143        __t->next = __tmp_root, \
144        __tmp_root = __t, \
145        __t->block = malloc(size)) : \
146       alloca(size))
147 
148 #define TMP_ALLOC(size) \
149    TMP_ALLOC_BYTES(sizeof(word_t)*(size))
150 
151 #define TMP_END \
152    while (__tmp_root) { \
153       free(__tmp_root->block); \
154       __tmp_root = __tmp_root->next; \
155    }
156 
157 /*
158    Send the given error message to stderr.
159 */
160 void talker(const char * str);
161 
162 #include "helper_arch.h"
163 #include "rand.h"
164 
165 /**********************************************************************
166 
167     Helper functions/macros
168 
169 **********************************************************************/
170 
171 #ifndef HAVE_ARCH_INTRINSICS
172 
173 #define high_zero_bits __builtin_clzl
174 #define low_zero_bits __builtin_ctzl
175 #define popcount_bits __builtin_popcountl
176 
177 #endif
178 
179 #ifndef HAVE_ARCH_divapprox21_preinv1
180 
181 /*
182    Given a double word u, a normalised divisor d and a precomputed
183    inverse dinv of d, computes an approximate quotient of u by d.
184    The quotient will be either correct, or up to 3 less than the
185    actual value.
186 */
187 #define divapprox21_preinv1(q, u_hi, u_lo, d, dinv) \
188    do { \
189       dword_t __q1 = (dword_t) u_hi * (dword_t) (dinv) \
190                   + (((dword_t) u_hi) << WORD_BITS) + (dword_t) u_lo; \
191       const dword_t __q0 = (dword_t) u_lo * (dword_t) (dinv); \
192       __q1 += (dword_t) ((__q0) >> WORD_BITS); \
193       (q) = (__q1 >> WORD_BITS); \
194    } while (0)
195 
196 #endif
197 
198 #ifndef HAVE_ARCH_divrem21_preinv1
199 
200 /*
201    Given a double word u, a normalised divisor d and a precomputed
202    inverse dinv of d, computes the quotient and remainder of u by d.
203 */
204 #define divrem21_preinv1(q, r, u_hi, u_lo, d, dinv) \
205    do { \
206       const dword_t __u = (((dword_t) u_hi) << WORD_BITS) + (dword_t) u_lo; \
207       dword_t __r, __q1 = (dword_t) u_hi * (dword_t) (dinv) + __u; \
208       const dword_t __q0 = (dword_t) u_lo * (dword_t) (dinv); \
209       __q1 += (dword_t) ((__q0) >> WORD_BITS); \
210       (q) = (__q1 >> WORD_BITS); \
211       __r = __u - (dword_t) (d) * (dword_t) (q); \
212       while ((word_t) (__r >> WORD_BITS) || ((word_t) __r >= (d))) \
213       { \
214          __r -= (dword_t) (d); \
215          (q)++; \
216       } \
217       (r) = (word_t) __r; \
218    } while (0)
219 
220 #endif
221 
222 #ifndef HAVE_ARCH_precompute_inverse1
223 
224 /*
225    Precomputes an inverse of d. The value of the inverse is
226    2^WORD_BITS / (d + 1) - 2^(WORD_BITS). We assume d is
227    normalised, i.e. the most significant bit of d is set.
228 */
229 static inline
precompute_inverse1(word_t d)230 preinv1_t precompute_inverse1(word_t d)
231 {
232    ASSERT(d != 0);
233 
234    d++;
235 
236    if (d == 0)
237       return 0;
238 
239    return (word_t) ((((dword_t) -d) << WORD_BITS) / (dword_t) d);
240 }
241 
242 #endif
243 
244 /*
245    Precomputes an inverse of a two limb value d. The value
246    of the inverse is 2^{3*WORD_BITS} / (d + 1) - 2^{WORD_BITS}.
247    We assume d is normalised, i.e. the most significant bit of d
248    is set.
249 */
250 preinv2_t precompute_inverse2(word_t d1, word_t d2);
251 
252 #ifndef HAVE_ARCH_divapprox21_preinv2
253 
254 #define divapprox21_preinv2(q, a_hi, a_lo, dinv) \
255    do { \
256       const dword_t __a = ((dword_t) (a_hi) << WORD_BITS) + (dword_t) (a_lo); \
257       dword_t __q2 = (dword_t) (a_hi) * (dword_t) (dinv); \
258       dword_t __q3 = (dword_t) (a_lo) * (dword_t) (dinv); \
259       __q2 += (__q3 >> WORD_BITS) + __a; \
260       (q) = (word_t) (__q2 >> WORD_BITS); \
261    } while (0)
262 
263 #endif
264 
265 #ifndef HAVE_ARCH_divrem21_preinv2
266 
267 #define divrem21_preinv2(q, a_hi, a_lo, d1, d2, dinv) \
268    do { \
269       dword_t __a = ((dword_t) (a_hi) << WORD_BITS) + (dword_t) (a_lo); \
270       const dword_t __d = ((dword_t) (d1) << WORD_BITS) + (dword_t) (d2); \
271       dword_t __q2 = (dword_t) (a_hi) * (dword_t) (dinv); \
272       dword_t __q3 = (dword_t) (a_lo) * (dword_t) (dinv); \
273       q2 += (__q3 >> WORD_BITS) + __a; \
274       (q) = (word_t) (q2 >> WORD_BITS); \
275       __a -= (dword_t) (q) * (dword_t) (d1); \
276       __a -= (((dword_t) (q) * (dword_t) (d2)) >> WORD_BITS); \
277       if (__a > (dword_t) d1) __a -= (dword_t) d1, (q)++; \
278       (a_lo) = (word_t) __a; \
279       (a_hi) = (word_t) (__a >> WORD_BITS); \
280    } while (0)
281 
282 #endif
283 
284 #ifndef HAVE_ARCH_precompute_mod_inverse1
285 
286 /*
287    Precomputes B, B^2, B^3 mod d. Requires that d is not zero.
288 */
289 static inline
precompute_mod_inverse1(mod_preinv1_t * inv,word_t d)290 void precompute_mod_inverse1(mod_preinv1_t * inv, word_t d)
291 {
292    dword_t u = (dword_t) 1;
293 
294    ASSERT(d != 0);
295 
296    u = (u << WORD_BITS) % (dword_t) d;
297    inv->b1 = (word_t) u;
298    u = (u << WORD_BITS) % (dword_t) d;
299    inv->b2 = (word_t) u;
300    u = (u << WORD_BITS) % (dword_t) d;
301    inv->b3 = (word_t) u;
302 }
303 
304 #endif
305 
306 #ifndef HAVE_ARCH_precompute_hensel_inverse1
307 
308 /*
309    Precomputes a Hensel inverse of d, i.e. a value dinv such that
310    d * dinv = 1 mod B. The algorithm is via Hensel lifting.
311    Requires that d is odd.
312 */
313 static inline
precompute_hensel_inverse1(hensel_preinv1_t * inv,word_t d)314 void precompute_hensel_inverse1(hensel_preinv1_t * inv, word_t d)
315 {
316    word_t v = 1; /* initial solution modulo 2 */
317    word_t u;
318 
319    ASSERT(d & (word_t) 1);
320 
321    while ((u = d * v) != 1)
322       v += (1 - u) * v;
323 
324    (*inv) = v;
325 }
326 
327 #endif
328 
329 /**********************************************************************
330 
331     Printing functions
332 
333 **********************************************************************/
334 
335 /*
336    Print a word in hexacdecimal.
337 */
338 void printx_word(word_t a);
339 
340 /*
341    Print a string containing format specifiers. This is identical to
342    printf with the exception of the additional format specifiers
343    %w : print a word_t
344    %m : print a len_t
345    %b : print a bits_t
346 */
347 void bsdnt_printf(const char * str, ...);
348 
349 #ifdef __cplusplus
350  }
351 #endif
352 
353 #endif
354