xref: /freebsd/crypto/openssl/crypto/bn/bn_nist.c (revision 535af610)
1 /*
2  * Copyright 2002-2023 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include "bn_local.h"
11 #include "internal/cryptlib.h"
12 
13 #define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2
14 #define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2
15 #define BN_NIST_256_TOP (256+BN_BITS2-1)/BN_BITS2
16 #define BN_NIST_384_TOP (384+BN_BITS2-1)/BN_BITS2
17 #define BN_NIST_521_TOP (521+BN_BITS2-1)/BN_BITS2
18 
19 /* pre-computed tables are "carry-less" values of modulus*(i+1) */
20 #if BN_BITS2 == 64
21 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
22     {0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL},
23     {0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL},
24     {0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL}
25 };
26 
27 static const BN_ULONG _nist_p_192_sqr[] = {
28     0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000001ULL,
29     0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL
30 };
31 
32 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
33     {0x0000000000000001ULL, 0xFFFFFFFF00000000ULL,
34      0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL},
35     {0x0000000000000002ULL, 0xFFFFFFFE00000000ULL,
36      0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFFULL} /* this one is
37                                                     * "carry-full" */
38 };
39 
40 static const BN_ULONG _nist_p_224_sqr[] = {
41     0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
42     0xFFFFFFFFFFFFFFFFULL, 0x0000000200000000ULL,
43     0x0000000000000000ULL, 0xFFFFFFFFFFFFFFFEULL,
44     0xFFFFFFFFFFFFFFFFULL
45 };
46 
47 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
48     {0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL,
49      0x0000000000000000ULL, 0xFFFFFFFF00000001ULL},
50     {0xFFFFFFFFFFFFFFFEULL, 0x00000001FFFFFFFFULL,
51      0x0000000000000000ULL, 0xFFFFFFFE00000002ULL},
52     {0xFFFFFFFFFFFFFFFDULL, 0x00000002FFFFFFFFULL,
53      0x0000000000000000ULL, 0xFFFFFFFD00000003ULL},
54     {0xFFFFFFFFFFFFFFFCULL, 0x00000003FFFFFFFFULL,
55      0x0000000000000000ULL, 0xFFFFFFFC00000004ULL},
56     {0xFFFFFFFFFFFFFFFBULL, 0x00000004FFFFFFFFULL,
57      0x0000000000000000ULL, 0xFFFFFFFB00000005ULL},
58 };
59 
60 static const BN_ULONG _nist_p_256_sqr[] = {
61     0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
62     0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFEULL,
63     0x00000001FFFFFFFEULL, 0x00000001FFFFFFFEULL,
64     0xFFFFFFFE00000001ULL, 0xFFFFFFFE00000002ULL
65 };
66 
67 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
68     {0x00000000FFFFFFFFULL, 0xFFFFFFFF00000000ULL, 0xFFFFFFFFFFFFFFFEULL,
69      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
70     {0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
71      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
72     {0x00000002FFFFFFFDULL, 0xFFFFFFFD00000000ULL, 0xFFFFFFFFFFFFFFFCULL,
73      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
74     {0x00000003FFFFFFFCULL, 0xFFFFFFFC00000000ULL, 0xFFFFFFFFFFFFFFFBULL,
75      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
76     {0x00000004FFFFFFFBULL, 0xFFFFFFFB00000000ULL, 0xFFFFFFFFFFFFFFFAULL,
77      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
78 };
79 
80 static const BN_ULONG _nist_p_384_sqr[] = {
81     0xFFFFFFFE00000001ULL, 0x0000000200000000ULL, 0xFFFFFFFE00000000ULL,
82     0x0000000200000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL,
83     0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
84     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
85 };
86 
87 static const BN_ULONG _nist_p_521[] =
88     { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
89     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
90     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
91     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
92     0x00000000000001FFULL
93 };
94 
95 static const BN_ULONG _nist_p_521_sqr[] = {
96     0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
97     0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
98     0x0000000000000000ULL, 0x0000000000000000ULL, 0xFFFFFFFFFFFFFC00ULL,
99     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
100     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
101     0xFFFFFFFFFFFFFFFFULL, 0x000000000003FFFFULL
102 };
103 #elif BN_BITS2 == 32
104 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
105     {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
106     {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
107     {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
108 };
109 
110 static const BN_ULONG _nist_p_192_sqr[] = {
111     0x00000001, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000000,
112     0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
113 };
114 
115 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
116     {0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF,
117      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
118     {0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE,
119      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
120 };
121 
122 static const BN_ULONG _nist_p_224_sqr[] = {
123     0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
124     0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000002,
125     0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF,
126     0xFFFFFFFF, 0xFFFFFFFF
127 };
128 
129 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
130     {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
131      0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF},
132     {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001,
133      0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE},
134     {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002,
135      0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD},
136     {0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003,
137      0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC},
138     {0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004,
139      0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB},
140 };
141 
142 static const BN_ULONG _nist_p_256_sqr[] = {
143     0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
144     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000001,
145     0xFFFFFFFE, 0x00000001, 0xFFFFFFFE, 0x00000001,
146     0x00000001, 0xFFFFFFFE, 0x00000002, 0xFFFFFFFE
147 };
148 
149 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
150     {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
151      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
152     {0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
153      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
154     {0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFC, 0xFFFFFFFF,
155      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
156     {0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFFF,
157      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
158     {0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFFF,
159      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
160 };
161 
162 static const BN_ULONG _nist_p_384_sqr[] = {
163     0x00000001, 0xFFFFFFFE, 0x00000000, 0x00000002, 0x00000000, 0xFFFFFFFE,
164     0x00000000, 0x00000002, 0x00000001, 0x00000000, 0x00000000, 0x00000000,
165     0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
166     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
167 };
168 
169 static const BN_ULONG _nist_p_521[] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
170     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
171     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
172     0xFFFFFFFF, 0x000001FF
173 };
174 
175 static const BN_ULONG _nist_p_521_sqr[] = {
176     0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
177     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
178     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFC00, 0xFFFFFFFF,
179     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
180     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
181     0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFFF
182 };
183 #else
184 # error "unsupported BN_BITS2"
185 #endif
186 
187 static const BIGNUM ossl_bignum_nist_p_192 = {
188     (BN_ULONG *)_nist_p_192[0],
189     BN_NIST_192_TOP,
190     BN_NIST_192_TOP,
191     0,
192     BN_FLG_STATIC_DATA
193 };
194 
195 static const BIGNUM ossl_bignum_nist_p_224 = {
196     (BN_ULONG *)_nist_p_224[0],
197     BN_NIST_224_TOP,
198     BN_NIST_224_TOP,
199     0,
200     BN_FLG_STATIC_DATA
201 };
202 
203 static const BIGNUM ossl_bignum_nist_p_256 = {
204     (BN_ULONG *)_nist_p_256[0],
205     BN_NIST_256_TOP,
206     BN_NIST_256_TOP,
207     0,
208     BN_FLG_STATIC_DATA
209 };
210 
211 static const BIGNUM ossl_bignum_nist_p_384 = {
212     (BN_ULONG *)_nist_p_384[0],
213     BN_NIST_384_TOP,
214     BN_NIST_384_TOP,
215     0,
216     BN_FLG_STATIC_DATA
217 };
218 
219 static const BIGNUM ossl_bignum_nist_p_521 = {
220     (BN_ULONG *)_nist_p_521,
221     BN_NIST_521_TOP,
222     BN_NIST_521_TOP,
223     0,
224     BN_FLG_STATIC_DATA
225 };
226 
227 const BIGNUM *BN_get0_nist_prime_192(void)
228 {
229     return &ossl_bignum_nist_p_192;
230 }
231 
232 const BIGNUM *BN_get0_nist_prime_224(void)
233 {
234     return &ossl_bignum_nist_p_224;
235 }
236 
237 const BIGNUM *BN_get0_nist_prime_256(void)
238 {
239     return &ossl_bignum_nist_p_256;
240 }
241 
242 const BIGNUM *BN_get0_nist_prime_384(void)
243 {
244     return &ossl_bignum_nist_p_384;
245 }
246 
247 const BIGNUM *BN_get0_nist_prime_521(void)
248 {
249     return &ossl_bignum_nist_p_521;
250 }
251 
252 /*
253  * To avoid more recent compilers (specifically clang-14) from treating this
254  * code as a violation of the strict aliasing conditions and omiting it, this
255  * cannot be declared as a function.  Moreover, the dst parameter cannot be
256  * cached in a local since this no longer references the union and again falls
257  * foul of the strict aliasing criteria.  Refer to #18225 for the initial
258  * diagnostics and llvm/llvm-project#55255 for the later discussions with the
259  * LLVM developers.  The problem boils down to if an array in the union is
260  * converted to a pointer or if it is used directly.
261  *
262  * This function was inlined regardless, so there is no space cost to be
263  * paid for making it a macro.
264  */
265 #define nist_cp_bn_0(dst, src_in, top, max) \
266 {                                           \
267     int ii;                                 \
268     const BN_ULONG *src = src_in;           \
269                                             \
270     for (ii = 0; ii < top; ii++)            \
271         (dst)[ii] = src[ii];                \
272     for (; ii < max; ii++)                  \
273         (dst)[ii] = 0;                      \
274 }
275 
276 static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
277 {
278     int i;
279 
280     for (i = 0; i < top; i++)
281         dst[i] = src[i];
282 }
283 
284 #if BN_BITS2 == 64
285 # define bn_cp_64(to, n, from, m)        (to)[n] = (m>=0)?((from)[m]):0;
286 # define bn_64_set_0(to, n)              (to)[n] = (BN_ULONG)0;
287 /*
288  * two following macros are implemented under assumption that they
289  * are called in a sequence with *ascending* n, i.e. as they are...
290  */
291 # define bn_cp_32_naked(to, n, from, m)  (((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
292                                                 :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
293 # define bn_32_set_0(to, n)              (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
294 # define bn_cp_32(to,n,from,m)           ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
295 # if defined(L_ENDIAN)
296 #  if defined(__arch64__)
297 #   define NIST_INT64 long
298 #  else
299 #   define NIST_INT64 long long
300 #  endif
301 # endif
302 #else
303 # define bn_cp_64(to, n, from, m) \
304         { \
305         bn_cp_32(to, (n)*2, from, (m)*2); \
306         bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
307         }
308 # define bn_64_set_0(to, n) \
309         { \
310         bn_32_set_0(to, (n)*2); \
311         bn_32_set_0(to, (n)*2+1); \
312         }
313 # define bn_cp_32(to, n, from, m)        (to)[n] = (m>=0)?((from)[m]):0;
314 # define bn_32_set_0(to, n)              (to)[n] = (BN_ULONG)0;
315 # if defined(_WIN32) && !defined(__GNUC__)
316 #  define NIST_INT64 __int64
317 # elif defined(BN_LLONG)
318 #  define NIST_INT64 long long
319 # endif
320 #endif                          /* BN_BITS2 != 64 */
321 
322 #define nist_set_192(to, from, a1, a2, a3) \
323         { \
324         bn_cp_64(to, 0, from, (a3) - 3) \
325         bn_cp_64(to, 1, from, (a2) - 3) \
326         bn_cp_64(to, 2, from, (a1) - 3) \
327         }
328 
329 int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
330                     BN_CTX *ctx)
331 {
332     int top = a->top, i;
333     int carry;
334     register BN_ULONG *r_d, *a_d = a->d;
335     union {
336         BN_ULONG bn[BN_NIST_192_TOP];
337         unsigned int ui[BN_NIST_192_TOP * sizeof(BN_ULONG) /
338                         sizeof(unsigned int)];
339     } buf;
340     BN_ULONG c_d[BN_NIST_192_TOP], *res;
341     static const BIGNUM ossl_bignum_nist_p_192_sqr = {
342         (BN_ULONG *)_nist_p_192_sqr,
343         OSSL_NELEM(_nist_p_192_sqr),
344         OSSL_NELEM(_nist_p_192_sqr),
345         0, BN_FLG_STATIC_DATA
346     };
347 
348     field = &ossl_bignum_nist_p_192; /* just to make sure */
349 
350     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_192_sqr) >= 0)
351         return BN_nnmod(r, a, field, ctx);
352 
353     i = BN_ucmp(field, a);
354     if (i == 0) {
355         BN_zero(r);
356         return 1;
357     } else if (i > 0)
358         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
359 
360     if (r != a) {
361         if (!bn_wexpand(r, BN_NIST_192_TOP))
362             return 0;
363         r_d = r->d;
364         nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
365     } else
366         r_d = a_d;
367 
368     nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP,
369                  BN_NIST_192_TOP);
370 
371 #if defined(NIST_INT64)
372     {
373         NIST_INT64 acc;         /* accumulator */
374         unsigned int *rp = (unsigned int *)r_d;
375         const unsigned int *bp = (const unsigned int *)buf.ui;
376 
377         acc = rp[0];
378         acc += bp[3 * 2 - 6];
379         acc += bp[5 * 2 - 6];
380         rp[0] = (unsigned int)acc;
381         acc >>= 32;
382 
383         acc += rp[1];
384         acc += bp[3 * 2 - 5];
385         acc += bp[5 * 2 - 5];
386         rp[1] = (unsigned int)acc;
387         acc >>= 32;
388 
389         acc += rp[2];
390         acc += bp[3 * 2 - 6];
391         acc += bp[4 * 2 - 6];
392         acc += bp[5 * 2 - 6];
393         rp[2] = (unsigned int)acc;
394         acc >>= 32;
395 
396         acc += rp[3];
397         acc += bp[3 * 2 - 5];
398         acc += bp[4 * 2 - 5];
399         acc += bp[5 * 2 - 5];
400         rp[3] = (unsigned int)acc;
401         acc >>= 32;
402 
403         acc += rp[4];
404         acc += bp[4 * 2 - 6];
405         acc += bp[5 * 2 - 6];
406         rp[4] = (unsigned int)acc;
407         acc >>= 32;
408 
409         acc += rp[5];
410         acc += bp[4 * 2 - 5];
411         acc += bp[5 * 2 - 5];
412         rp[5] = (unsigned int)acc;
413 
414         carry = (int)(acc >> 32);
415     }
416 #else
417     {
418         BN_ULONG t_d[BN_NIST_192_TOP];
419 
420         nist_set_192(t_d, buf.bn, 0, 3, 3);
421         carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
422         nist_set_192(t_d, buf.bn, 4, 4, 0);
423         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
424         nist_set_192(t_d, buf.bn, 5, 5, 5)
425             carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
426     }
427 #endif
428     if (carry > 0)
429         carry =
430             (int)bn_sub_words(r_d, r_d, _nist_p_192[carry - 1],
431                               BN_NIST_192_TOP);
432     else
433         carry = 1;
434 
435     /*
436      * we need 'if (carry==0 || result>=modulus) result-=modulus;'
437      * as comparison implies subtraction, we can write
438      * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
439      * this is what happens below, but without explicit if:-) a.
440      */
441     res = (bn_sub_words(c_d, r_d, _nist_p_192[0], BN_NIST_192_TOP) && carry)
442         ? r_d
443         : c_d;
444     nist_cp_bn(r_d, res, BN_NIST_192_TOP);
445     r->top = BN_NIST_192_TOP;
446     bn_correct_top(r);
447 
448     return 1;
449 }
450 
451 typedef BN_ULONG (*bn_addsub_f) (BN_ULONG *, const BN_ULONG *,
452                                  const BN_ULONG *, int);
453 
454 #define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
455         { \
456         bn_cp_32(to, 0, from, (a7) - 7) \
457         bn_cp_32(to, 1, from, (a6) - 7) \
458         bn_cp_32(to, 2, from, (a5) - 7) \
459         bn_cp_32(to, 3, from, (a4) - 7) \
460         bn_cp_32(to, 4, from, (a3) - 7) \
461         bn_cp_32(to, 5, from, (a2) - 7) \
462         bn_cp_32(to, 6, from, (a1) - 7) \
463         }
464 
465 int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
466                     BN_CTX *ctx)
467 {
468     int top = a->top, i;
469     int carry;
470     BN_ULONG *r_d, *a_d = a->d;
471     union {
472         BN_ULONG bn[BN_NIST_224_TOP];
473         unsigned int ui[BN_NIST_224_TOP * sizeof(BN_ULONG) /
474                         sizeof(unsigned int)];
475     } buf;
476     BN_ULONG c_d[BN_NIST_224_TOP], *res;
477     bn_addsub_f adjust;
478     static const BIGNUM ossl_bignum_nist_p_224_sqr = {
479         (BN_ULONG *)_nist_p_224_sqr,
480         OSSL_NELEM(_nist_p_224_sqr),
481         OSSL_NELEM(_nist_p_224_sqr),
482         0, BN_FLG_STATIC_DATA
483     };
484 
485     field = &ossl_bignum_nist_p_224; /* just to make sure */
486 
487     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_224_sqr) >= 0)
488         return BN_nnmod(r, a, field, ctx);
489 
490     i = BN_ucmp(field, a);
491     if (i == 0) {
492         BN_zero(r);
493         return 1;
494     } else if (i > 0)
495         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
496 
497     if (r != a) {
498         if (!bn_wexpand(r, BN_NIST_224_TOP))
499             return 0;
500         r_d = r->d;
501         nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
502     } else
503         r_d = a_d;
504 
505 #if BN_BITS2==64
506     /* copy upper 256 bits of 448 bit number ... */
507     nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP - 1),
508                  top - (BN_NIST_224_TOP - 1), BN_NIST_224_TOP);
509     /* ... and right shift by 32 to obtain upper 224 bits */
510     nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8);
511     /* truncate lower part to 224 bits too */
512     r_d[BN_NIST_224_TOP - 1] &= BN_MASK2l;
513 #else
514     nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP,
515                  BN_NIST_224_TOP);
516 #endif
517 
518 #if defined(NIST_INT64) && BN_BITS2!=64
519     {
520         NIST_INT64 acc;         /* accumulator */
521         unsigned int *rp = (unsigned int *)r_d;
522         const unsigned int *bp = (const unsigned int *)buf.ui;
523 
524         acc = rp[0];
525         acc -= bp[7 - 7];
526         acc -= bp[11 - 7];
527         rp[0] = (unsigned int)acc;
528         acc >>= 32;
529 
530         acc += rp[1];
531         acc -= bp[8 - 7];
532         acc -= bp[12 - 7];
533         rp[1] = (unsigned int)acc;
534         acc >>= 32;
535 
536         acc += rp[2];
537         acc -= bp[9 - 7];
538         acc -= bp[13 - 7];
539         rp[2] = (unsigned int)acc;
540         acc >>= 32;
541 
542         acc += rp[3];
543         acc += bp[7 - 7];
544         acc += bp[11 - 7];
545         acc -= bp[10 - 7];
546         rp[3] = (unsigned int)acc;
547         acc >>= 32;
548 
549         acc += rp[4];
550         acc += bp[8 - 7];
551         acc += bp[12 - 7];
552         acc -= bp[11 - 7];
553         rp[4] = (unsigned int)acc;
554         acc >>= 32;
555 
556         acc += rp[5];
557         acc += bp[9 - 7];
558         acc += bp[13 - 7];
559         acc -= bp[12 - 7];
560         rp[5] = (unsigned int)acc;
561         acc >>= 32;
562 
563         acc += rp[6];
564         acc += bp[10 - 7];
565         acc -= bp[13 - 7];
566         rp[6] = (unsigned int)acc;
567 
568         carry = (int)(acc >> 32);
569 # if BN_BITS2==64
570         rp[7] = carry;
571 # endif
572     }
573 #else
574     {
575         BN_ULONG t_d[BN_NIST_224_TOP];
576 
577         nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0);
578         carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
579         nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0);
580         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
581         nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7);
582         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
583         nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11);
584         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
585 
586 # if BN_BITS2==64
587         carry = (int)(r_d[BN_NIST_224_TOP - 1] >> 32);
588 # endif
589     }
590 #endif
591     adjust = bn_sub_words;
592     if (carry > 0) {
593         carry =
594             (int)bn_sub_words(r_d, r_d, _nist_p_224[carry - 1],
595                               BN_NIST_224_TOP);
596 #if BN_BITS2==64
597         carry = (int)(~(r_d[BN_NIST_224_TOP - 1] >> 32)) & 1;
598 #endif
599     } else if (carry < 0) {
600         /*
601          * it's a bit more complicated logic in this case. if bn_add_words
602          * yields no carry, then result has to be adjusted by unconditionally
603          * *adding* the modulus. but if it does, then result has to be
604          * compared to the modulus and conditionally adjusted by
605          * *subtracting* the latter.
606          */
607         carry =
608             (int)bn_add_words(r_d, r_d, _nist_p_224[-carry - 1],
609                               BN_NIST_224_TOP);
610         adjust = carry ? bn_sub_words : bn_add_words;
611     } else
612         carry = 1;
613 
614     /* otherwise it's effectively same as in BN_nist_mod_192... */
615     res = ((*adjust) (c_d, r_d, _nist_p_224[0], BN_NIST_224_TOP) && carry)
616         ? r_d
617         : c_d;
618     nist_cp_bn(r_d, res, BN_NIST_224_TOP);
619     r->top = BN_NIST_224_TOP;
620     bn_correct_top(r);
621 
622     return 1;
623 }
624 
625 #define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
626         { \
627         bn_cp_32(to, 0, from, (a8) - 8) \
628         bn_cp_32(to, 1, from, (a7) - 8) \
629         bn_cp_32(to, 2, from, (a6) - 8) \
630         bn_cp_32(to, 3, from, (a5) - 8) \
631         bn_cp_32(to, 4, from, (a4) - 8) \
632         bn_cp_32(to, 5, from, (a3) - 8) \
633         bn_cp_32(to, 6, from, (a2) - 8) \
634         bn_cp_32(to, 7, from, (a1) - 8) \
635         }
636 
637 int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
638                     BN_CTX *ctx)
639 {
640     int i, top = a->top;
641     int carry = 0;
642     register BN_ULONG *a_d = a->d, *r_d;
643     union {
644         BN_ULONG bn[BN_NIST_256_TOP];
645         unsigned int ui[BN_NIST_256_TOP * sizeof(BN_ULONG) /
646                         sizeof(unsigned int)];
647     } buf;
648     BN_ULONG c_d[BN_NIST_256_TOP], *res;
649     bn_addsub_f adjust;
650     static const BIGNUM ossl_bignum_nist_p_256_sqr = {
651         (BN_ULONG *)_nist_p_256_sqr,
652         OSSL_NELEM(_nist_p_256_sqr),
653         OSSL_NELEM(_nist_p_256_sqr),
654         0, BN_FLG_STATIC_DATA
655     };
656 
657     field = &ossl_bignum_nist_p_256; /* just to make sure */
658 
659     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_256_sqr) >= 0)
660         return BN_nnmod(r, a, field, ctx);
661 
662     i = BN_ucmp(field, a);
663     if (i == 0) {
664         BN_zero(r);
665         return 1;
666     } else if (i > 0)
667         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
668 
669     if (r != a) {
670         if (!bn_wexpand(r, BN_NIST_256_TOP))
671             return 0;
672         r_d = r->d;
673         nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
674     } else
675         r_d = a_d;
676 
677     nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP,
678                  BN_NIST_256_TOP);
679 
680 #if defined(NIST_INT64)
681     {
682         NIST_INT64 acc;         /* accumulator */
683         unsigned int *rp = (unsigned int *)r_d;
684         const unsigned int *bp = (const unsigned int *)buf.ui;
685 
686         acc = rp[0];
687         acc += bp[8 - 8];
688         acc += bp[9 - 8];
689         acc -= bp[11 - 8];
690         acc -= bp[12 - 8];
691         acc -= bp[13 - 8];
692         acc -= bp[14 - 8];
693         rp[0] = (unsigned int)acc;
694         acc >>= 32;
695 
696         acc += rp[1];
697         acc += bp[9 - 8];
698         acc += bp[10 - 8];
699         acc -= bp[12 - 8];
700         acc -= bp[13 - 8];
701         acc -= bp[14 - 8];
702         acc -= bp[15 - 8];
703         rp[1] = (unsigned int)acc;
704         acc >>= 32;
705 
706         acc += rp[2];
707         acc += bp[10 - 8];
708         acc += bp[11 - 8];
709         acc -= bp[13 - 8];
710         acc -= bp[14 - 8];
711         acc -= bp[15 - 8];
712         rp[2] = (unsigned int)acc;
713         acc >>= 32;
714 
715         acc += rp[3];
716         acc += bp[11 - 8];
717         acc += bp[11 - 8];
718         acc += bp[12 - 8];
719         acc += bp[12 - 8];
720         acc += bp[13 - 8];
721         acc -= bp[15 - 8];
722         acc -= bp[8 - 8];
723         acc -= bp[9 - 8];
724         rp[3] = (unsigned int)acc;
725         acc >>= 32;
726 
727         acc += rp[4];
728         acc += bp[12 - 8];
729         acc += bp[12 - 8];
730         acc += bp[13 - 8];
731         acc += bp[13 - 8];
732         acc += bp[14 - 8];
733         acc -= bp[9 - 8];
734         acc -= bp[10 - 8];
735         rp[4] = (unsigned int)acc;
736         acc >>= 32;
737 
738         acc += rp[5];
739         acc += bp[13 - 8];
740         acc += bp[13 - 8];
741         acc += bp[14 - 8];
742         acc += bp[14 - 8];
743         acc += bp[15 - 8];
744         acc -= bp[10 - 8];
745         acc -= bp[11 - 8];
746         rp[5] = (unsigned int)acc;
747         acc >>= 32;
748 
749         acc += rp[6];
750         acc += bp[14 - 8];
751         acc += bp[14 - 8];
752         acc += bp[15 - 8];
753         acc += bp[15 - 8];
754         acc += bp[14 - 8];
755         acc += bp[13 - 8];
756         acc -= bp[8 - 8];
757         acc -= bp[9 - 8];
758         rp[6] = (unsigned int)acc;
759         acc >>= 32;
760 
761         acc += rp[7];
762         acc += bp[15 - 8];
763         acc += bp[15 - 8];
764         acc += bp[15 - 8];
765         acc += bp[8 - 8];
766         acc -= bp[10 - 8];
767         acc -= bp[11 - 8];
768         acc -= bp[12 - 8];
769         acc -= bp[13 - 8];
770         rp[7] = (unsigned int)acc;
771 
772         carry = (int)(acc >> 32);
773     }
774 #else
775     {
776         BN_ULONG t_d[BN_NIST_256_TOP];
777 
778         /*
779          * S1
780          */
781         nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
782         /*
783          * S2
784          */
785         nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
786         carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
787         /* left shift */
788         {
789             register BN_ULONG *ap, t, c;
790             ap = t_d;
791             c = 0;
792             for (i = BN_NIST_256_TOP; i != 0; --i) {
793                 t = *ap;
794                 *(ap++) = ((t << 1) | c) & BN_MASK2;
795                 c = (t & BN_TBIT) ? 1 : 0;
796             }
797             carry <<= 1;
798             carry |= c;
799         }
800         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
801         /*
802          * S3
803          */
804         nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
805         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
806         /*
807          * S4
808          */
809         nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
810         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
811         /*
812          * D1
813          */
814         nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
815         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
816         /*
817          * D2
818          */
819         nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
820         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
821         /*
822          * D3
823          */
824         nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
825         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
826         /*
827          * D4
828          */
829         nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
830         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
831 
832     }
833 #endif
834     /* see BN_nist_mod_224 for explanation */
835     adjust = bn_sub_words;
836     if (carry > 0)
837         carry =
838             (int)bn_sub_words(r_d, r_d, _nist_p_256[carry - 1],
839                               BN_NIST_256_TOP);
840     else if (carry < 0) {
841         carry =
842             (int)bn_add_words(r_d, r_d, _nist_p_256[-carry - 1],
843                               BN_NIST_256_TOP);
844         adjust = carry ? bn_sub_words : bn_add_words;
845     } else
846         carry = 1;
847 
848     res = ((*adjust) (c_d, r_d, _nist_p_256[0], BN_NIST_256_TOP) && carry)
849         ? r_d
850         : c_d;
851     nist_cp_bn(r_d, res, BN_NIST_256_TOP);
852     r->top = BN_NIST_256_TOP;
853     bn_correct_top(r);
854 
855     return 1;
856 }
857 
858 #define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
859         { \
860         bn_cp_32(to, 0, from,  (a12) - 12) \
861         bn_cp_32(to, 1, from,  (a11) - 12) \
862         bn_cp_32(to, 2, from,  (a10) - 12) \
863         bn_cp_32(to, 3, from,  (a9) - 12)  \
864         bn_cp_32(to, 4, from,  (a8) - 12)  \
865         bn_cp_32(to, 5, from,  (a7) - 12)  \
866         bn_cp_32(to, 6, from,  (a6) - 12)  \
867         bn_cp_32(to, 7, from,  (a5) - 12)  \
868         bn_cp_32(to, 8, from,  (a4) - 12)  \
869         bn_cp_32(to, 9, from,  (a3) - 12)  \
870         bn_cp_32(to, 10, from, (a2) - 12)  \
871         bn_cp_32(to, 11, from, (a1) - 12)  \
872         }
873 
874 int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
875                     BN_CTX *ctx)
876 {
877     int i, top = a->top;
878     int carry = 0;
879     register BN_ULONG *r_d, *a_d = a->d;
880     union {
881         BN_ULONG bn[BN_NIST_384_TOP];
882         unsigned int ui[BN_NIST_384_TOP * sizeof(BN_ULONG) /
883                         sizeof(unsigned int)];
884     } buf;
885     BN_ULONG c_d[BN_NIST_384_TOP], *res;
886     bn_addsub_f adjust;
887     static const BIGNUM ossl_bignum_nist_p_384_sqr = {
888         (BN_ULONG *)_nist_p_384_sqr,
889         OSSL_NELEM(_nist_p_384_sqr),
890         OSSL_NELEM(_nist_p_384_sqr),
891         0, BN_FLG_STATIC_DATA
892     };
893 
894     field = &ossl_bignum_nist_p_384; /* just to make sure */
895 
896     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_384_sqr) >= 0)
897         return BN_nnmod(r, a, field, ctx);
898 
899     i = BN_ucmp(field, a);
900     if (i == 0) {
901         BN_zero(r);
902         return 1;
903     } else if (i > 0)
904         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
905 
906     if (r != a) {
907         if (!bn_wexpand(r, BN_NIST_384_TOP))
908             return 0;
909         r_d = r->d;
910         nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
911     } else
912         r_d = a_d;
913 
914     nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP,
915                  BN_NIST_384_TOP);
916 
917 #if defined(NIST_INT64)
918     {
919         NIST_INT64 acc;         /* accumulator */
920         unsigned int *rp = (unsigned int *)r_d;
921         const unsigned int *bp = (const unsigned int *)buf.ui;
922 
923         acc = rp[0];
924         acc += bp[12 - 12];
925         acc += bp[21 - 12];
926         acc += bp[20 - 12];
927         acc -= bp[23 - 12];
928         rp[0] = (unsigned int)acc;
929         acc >>= 32;
930 
931         acc += rp[1];
932         acc += bp[13 - 12];
933         acc += bp[22 - 12];
934         acc += bp[23 - 12];
935         acc -= bp[12 - 12];
936         acc -= bp[20 - 12];
937         rp[1] = (unsigned int)acc;
938         acc >>= 32;
939 
940         acc += rp[2];
941         acc += bp[14 - 12];
942         acc += bp[23 - 12];
943         acc -= bp[13 - 12];
944         acc -= bp[21 - 12];
945         rp[2] = (unsigned int)acc;
946         acc >>= 32;
947 
948         acc += rp[3];
949         acc += bp[15 - 12];
950         acc += bp[12 - 12];
951         acc += bp[20 - 12];
952         acc += bp[21 - 12];
953         acc -= bp[14 - 12];
954         acc -= bp[22 - 12];
955         acc -= bp[23 - 12];
956         rp[3] = (unsigned int)acc;
957         acc >>= 32;
958 
959         acc += rp[4];
960         acc += bp[21 - 12];
961         acc += bp[21 - 12];
962         acc += bp[16 - 12];
963         acc += bp[13 - 12];
964         acc += bp[12 - 12];
965         acc += bp[20 - 12];
966         acc += bp[22 - 12];
967         acc -= bp[15 - 12];
968         acc -= bp[23 - 12];
969         acc -= bp[23 - 12];
970         rp[4] = (unsigned int)acc;
971         acc >>= 32;
972 
973         acc += rp[5];
974         acc += bp[22 - 12];
975         acc += bp[22 - 12];
976         acc += bp[17 - 12];
977         acc += bp[14 - 12];
978         acc += bp[13 - 12];
979         acc += bp[21 - 12];
980         acc += bp[23 - 12];
981         acc -= bp[16 - 12];
982         rp[5] = (unsigned int)acc;
983         acc >>= 32;
984 
985         acc += rp[6];
986         acc += bp[23 - 12];
987         acc += bp[23 - 12];
988         acc += bp[18 - 12];
989         acc += bp[15 - 12];
990         acc += bp[14 - 12];
991         acc += bp[22 - 12];
992         acc -= bp[17 - 12];
993         rp[6] = (unsigned int)acc;
994         acc >>= 32;
995 
996         acc += rp[7];
997         acc += bp[19 - 12];
998         acc += bp[16 - 12];
999         acc += bp[15 - 12];
1000         acc += bp[23 - 12];
1001         acc -= bp[18 - 12];
1002         rp[7] = (unsigned int)acc;
1003         acc >>= 32;
1004 
1005         acc += rp[8];
1006         acc += bp[20 - 12];
1007         acc += bp[17 - 12];
1008         acc += bp[16 - 12];
1009         acc -= bp[19 - 12];
1010         rp[8] = (unsigned int)acc;
1011         acc >>= 32;
1012 
1013         acc += rp[9];
1014         acc += bp[21 - 12];
1015         acc += bp[18 - 12];
1016         acc += bp[17 - 12];
1017         acc -= bp[20 - 12];
1018         rp[9] = (unsigned int)acc;
1019         acc >>= 32;
1020 
1021         acc += rp[10];
1022         acc += bp[22 - 12];
1023         acc += bp[19 - 12];
1024         acc += bp[18 - 12];
1025         acc -= bp[21 - 12];
1026         rp[10] = (unsigned int)acc;
1027         acc >>= 32;
1028 
1029         acc += rp[11];
1030         acc += bp[23 - 12];
1031         acc += bp[20 - 12];
1032         acc += bp[19 - 12];
1033         acc -= bp[22 - 12];
1034         rp[11] = (unsigned int)acc;
1035 
1036         carry = (int)(acc >> 32);
1037     }
1038 #else
1039     {
1040         BN_ULONG t_d[BN_NIST_384_TOP];
1041 
1042         /*
1043          * S1
1044          */
1045         nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23 - 4, 22 - 4, 21 - 4);
1046         /* left shift */
1047         {
1048             register BN_ULONG *ap, t, c;
1049             ap = t_d;
1050             c = 0;
1051             for (i = 3; i != 0; --i) {
1052                 t = *ap;
1053                 *(ap++) = ((t << 1) | c) & BN_MASK2;
1054                 c = (t & BN_TBIT) ? 1 : 0;
1055             }
1056             *ap = c;
1057         }
1058         carry =
1059             (int)bn_add_words(r_d + (128 / BN_BITS2), r_d + (128 / BN_BITS2),
1060                               t_d, BN_NIST_256_TOP);
1061         /*
1062          * S2
1063          */
1064         carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
1065         /*
1066          * S3
1067          */
1068         nist_set_384(t_d, buf.bn, 20, 19, 18, 17, 16, 15, 14, 13, 12, 23, 22,
1069                      21);
1070         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1071         /*
1072          * S4
1073          */
1074         nist_set_384(t_d, buf.bn, 19, 18, 17, 16, 15, 14, 13, 12, 20, 0, 23,
1075                      0);
1076         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1077         /*
1078          * S5
1079          */
1080         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 23, 22, 21, 20, 0, 0, 0, 0);
1081         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1082         /*
1083          * S6
1084          */
1085         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 23, 22, 21, 0, 0, 20);
1086         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1087         /*
1088          * D1
1089          */
1090         nist_set_384(t_d, buf.bn, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12,
1091                      23);
1092         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1093         /*
1094          * D2
1095          */
1096         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 22, 21, 20, 0);
1097         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1098         /*
1099          * D3
1100          */
1101         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 0, 0);
1102         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1103 
1104     }
1105 #endif
1106     /* see BN_nist_mod_224 for explanation */
1107     adjust = bn_sub_words;
1108     if (carry > 0)
1109         carry =
1110             (int)bn_sub_words(r_d, r_d, _nist_p_384[carry - 1],
1111                               BN_NIST_384_TOP);
1112     else if (carry < 0) {
1113         carry =
1114             (int)bn_add_words(r_d, r_d, _nist_p_384[-carry - 1],
1115                               BN_NIST_384_TOP);
1116         adjust = carry ? bn_sub_words : bn_add_words;
1117     } else
1118         carry = 1;
1119 
1120     res = ((*adjust) (c_d, r_d, _nist_p_384[0], BN_NIST_384_TOP) && carry)
1121         ? r_d
1122         : c_d;
1123     nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1124     r->top = BN_NIST_384_TOP;
1125     bn_correct_top(r);
1126 
1127     return 1;
1128 }
1129 
1130 #define BN_NIST_521_RSHIFT      (521%BN_BITS2)
1131 #define BN_NIST_521_LSHIFT      (BN_BITS2-BN_NIST_521_RSHIFT)
1132 #define BN_NIST_521_TOP_MASK    ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
1133 
1134 int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
1135                     BN_CTX *ctx)
1136 {
1137     int top = a->top, i;
1138     BN_ULONG *r_d, *a_d = a->d, t_d[BN_NIST_521_TOP], val, tmp, *res;
1139     static const BIGNUM ossl_bignum_nist_p_521_sqr = {
1140         (BN_ULONG *)_nist_p_521_sqr,
1141         OSSL_NELEM(_nist_p_521_sqr),
1142         OSSL_NELEM(_nist_p_521_sqr),
1143         0, BN_FLG_STATIC_DATA
1144     };
1145 
1146     field = &ossl_bignum_nist_p_521; /* just to make sure */
1147 
1148     if (BN_is_negative(a) || BN_ucmp(a, &ossl_bignum_nist_p_521_sqr) >= 0)
1149         return BN_nnmod(r, a, field, ctx);
1150 
1151     i = BN_ucmp(field, a);
1152     if (i == 0) {
1153         BN_zero(r);
1154         return 1;
1155     } else if (i > 0)
1156         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1157 
1158     if (r != a) {
1159         if (!bn_wexpand(r, BN_NIST_521_TOP))
1160             return 0;
1161         r_d = r->d;
1162         nist_cp_bn(r_d, a_d, BN_NIST_521_TOP);
1163     } else
1164         r_d = a_d;
1165 
1166     /* upper 521 bits, copy ... */
1167     nist_cp_bn_0(t_d, a_d + (BN_NIST_521_TOP - 1),
1168                  top - (BN_NIST_521_TOP - 1), BN_NIST_521_TOP);
1169     /* ... and right shift */
1170     for (val = t_d[0], i = 0; i < BN_NIST_521_TOP - 1; i++) {
1171 #if 0
1172         /*
1173          * MSC ARM compiler [version 2013, presumably even earlier,
1174          * much earlier] miscompiles this code, but not one in
1175          * #else section. See RT#3541.
1176          */
1177         tmp = val >> BN_NIST_521_RSHIFT;
1178         val = t_d[i + 1];
1179         t_d[i] = (tmp | val << BN_NIST_521_LSHIFT) & BN_MASK2;
1180 #else
1181         t_d[i] = (val >> BN_NIST_521_RSHIFT |
1182                   (tmp = t_d[i + 1]) << BN_NIST_521_LSHIFT) & BN_MASK2;
1183         val = tmp;
1184 #endif
1185     }
1186     t_d[i] = val >> BN_NIST_521_RSHIFT;
1187     /* lower 521 bits */
1188     r_d[i] &= BN_NIST_521_TOP_MASK;
1189 
1190     bn_add_words(r_d, r_d, t_d, BN_NIST_521_TOP);
1191     res = bn_sub_words(t_d, r_d, _nist_p_521,
1192                        BN_NIST_521_TOP)
1193         ? r_d
1194         : t_d;
1195     nist_cp_bn(r_d, res, BN_NIST_521_TOP);
1196     r->top = BN_NIST_521_TOP;
1197     bn_correct_top(r);
1198 
1199     return 1;
1200 }
1201 
1202 int (*BN_nist_mod_func(const BIGNUM *p)) (BIGNUM *r, const BIGNUM *a,
1203                                           const BIGNUM *field, BN_CTX *ctx) {
1204     if (BN_ucmp(&ossl_bignum_nist_p_192, p) == 0)
1205         return BN_nist_mod_192;
1206     if (BN_ucmp(&ossl_bignum_nist_p_224, p) == 0)
1207         return BN_nist_mod_224;
1208     if (BN_ucmp(&ossl_bignum_nist_p_256, p) == 0)
1209         return BN_nist_mod_256;
1210     if (BN_ucmp(&ossl_bignum_nist_p_384, p) == 0)
1211         return BN_nist_mod_384;
1212     if (BN_ucmp(&ossl_bignum_nist_p_521, p) == 0)
1213         return BN_nist_mod_521;
1214     return 0;
1215 }
1216