xref: /freebsd/crypto/openssl/crypto/bn/bn_nist.c (revision 9768746b)
1 /*
2  * Copyright 2002-2022 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the OpenSSL license (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include "bn_local.h"
11 #include "internal/cryptlib.h"
12 
13 #define BN_NIST_192_TOP (192+BN_BITS2-1)/BN_BITS2
14 #define BN_NIST_224_TOP (224+BN_BITS2-1)/BN_BITS2
15 #define BN_NIST_256_TOP (256+BN_BITS2-1)/BN_BITS2
16 #define BN_NIST_384_TOP (384+BN_BITS2-1)/BN_BITS2
17 #define BN_NIST_521_TOP (521+BN_BITS2-1)/BN_BITS2
18 
19 /* pre-computed tables are "carry-less" values of modulus*(i+1) */
20 #if BN_BITS2 == 64
21 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
22     {0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL},
23     {0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL},
24     {0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL}
25 };
26 
27 static const BN_ULONG _nist_p_192_sqr[] = {
28     0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000001ULL,
29     0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL
30 };
31 
32 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
33     {0x0000000000000001ULL, 0xFFFFFFFF00000000ULL,
34      0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL},
35     {0x0000000000000002ULL, 0xFFFFFFFE00000000ULL,
36      0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFFULL} /* this one is
37                                                     * "carry-full" */
38 };
39 
40 static const BN_ULONG _nist_p_224_sqr[] = {
41     0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
42     0xFFFFFFFFFFFFFFFFULL, 0x0000000200000000ULL,
43     0x0000000000000000ULL, 0xFFFFFFFFFFFFFFFEULL,
44     0xFFFFFFFFFFFFFFFFULL
45 };
46 
47 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
48     {0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL,
49      0x0000000000000000ULL, 0xFFFFFFFF00000001ULL},
50     {0xFFFFFFFFFFFFFFFEULL, 0x00000001FFFFFFFFULL,
51      0x0000000000000000ULL, 0xFFFFFFFE00000002ULL},
52     {0xFFFFFFFFFFFFFFFDULL, 0x00000002FFFFFFFFULL,
53      0x0000000000000000ULL, 0xFFFFFFFD00000003ULL},
54     {0xFFFFFFFFFFFFFFFCULL, 0x00000003FFFFFFFFULL,
55      0x0000000000000000ULL, 0xFFFFFFFC00000004ULL},
56     {0xFFFFFFFFFFFFFFFBULL, 0x00000004FFFFFFFFULL,
57      0x0000000000000000ULL, 0xFFFFFFFB00000005ULL},
58 };
59 
60 static const BN_ULONG _nist_p_256_sqr[] = {
61     0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
62     0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFEULL,
63     0x00000001FFFFFFFEULL, 0x00000001FFFFFFFEULL,
64     0xFFFFFFFE00000001ULL, 0xFFFFFFFE00000002ULL
65 };
66 
67 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
68     {0x00000000FFFFFFFFULL, 0xFFFFFFFF00000000ULL, 0xFFFFFFFFFFFFFFFEULL,
69      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
70     {0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
71      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
72     {0x00000002FFFFFFFDULL, 0xFFFFFFFD00000000ULL, 0xFFFFFFFFFFFFFFFCULL,
73      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
74     {0x00000003FFFFFFFCULL, 0xFFFFFFFC00000000ULL, 0xFFFFFFFFFFFFFFFBULL,
75      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
76     {0x00000004FFFFFFFBULL, 0xFFFFFFFB00000000ULL, 0xFFFFFFFFFFFFFFFAULL,
77      0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL},
78 };
79 
80 static const BN_ULONG _nist_p_384_sqr[] = {
81     0xFFFFFFFE00000001ULL, 0x0000000200000000ULL, 0xFFFFFFFE00000000ULL,
82     0x0000000200000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL,
83     0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
84     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
85 };
86 
87 static const BN_ULONG _nist_p_521[] =
88     { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
89     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
90     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
91     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
92     0x00000000000001FFULL
93 };
94 
95 static const BN_ULONG _nist_p_521_sqr[] = {
96     0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
97     0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
98     0x0000000000000000ULL, 0x0000000000000000ULL, 0xFFFFFFFFFFFFFC00ULL,
99     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
100     0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
101     0xFFFFFFFFFFFFFFFFULL, 0x000000000003FFFFULL
102 };
103 #elif BN_BITS2 == 32
104 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
105     {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
106     {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
107     {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
108 };
109 
110 static const BN_ULONG _nist_p_192_sqr[] = {
111     0x00000001, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000000,
112     0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
113 };
114 
115 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
116     {0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF,
117      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
118     {0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE,
119      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}
120 };
121 
122 static const BN_ULONG _nist_p_224_sqr[] = {
123     0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
124     0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000002,
125     0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF,
126     0xFFFFFFFF, 0xFFFFFFFF
127 };
128 
129 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
130     {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
131      0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF},
132     {0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001,
133      0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE},
134     {0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002,
135      0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD},
136     {0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003,
137      0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC},
138     {0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004,
139      0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB},
140 };
141 
142 static const BN_ULONG _nist_p_256_sqr[] = {
143     0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
144     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000001,
145     0xFFFFFFFE, 0x00000001, 0xFFFFFFFE, 0x00000001,
146     0x00000001, 0xFFFFFFFE, 0x00000002, 0xFFFFFFFE
147 };
148 
149 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
150     {0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
151      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
152     {0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
153      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
154     {0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD, 0xFFFFFFFC, 0xFFFFFFFF,
155      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
156     {0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC, 0xFFFFFFFB, 0xFFFFFFFF,
157      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
158     {0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB, 0xFFFFFFFA, 0xFFFFFFFF,
159      0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
160 };
161 
162 static const BN_ULONG _nist_p_384_sqr[] = {
163     0x00000001, 0xFFFFFFFE, 0x00000000, 0x00000002, 0x00000000, 0xFFFFFFFE,
164     0x00000000, 0x00000002, 0x00000001, 0x00000000, 0x00000000, 0x00000000,
165     0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
166     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
167 };
168 
169 static const BN_ULONG _nist_p_521[] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
170     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
171     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
172     0xFFFFFFFF, 0x000001FF
173 };
174 
175 static const BN_ULONG _nist_p_521_sqr[] = {
176     0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
177     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
178     0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFC00, 0xFFFFFFFF,
179     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
180     0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
181     0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFFF
182 };
183 #else
184 # error "unsupported BN_BITS2"
185 #endif
186 
187 static const BIGNUM _bignum_nist_p_192 = {
188     (BN_ULONG *)_nist_p_192[0],
189     BN_NIST_192_TOP,
190     BN_NIST_192_TOP,
191     0,
192     BN_FLG_STATIC_DATA
193 };
194 
195 static const BIGNUM _bignum_nist_p_224 = {
196     (BN_ULONG *)_nist_p_224[0],
197     BN_NIST_224_TOP,
198     BN_NIST_224_TOP,
199     0,
200     BN_FLG_STATIC_DATA
201 };
202 
203 static const BIGNUM _bignum_nist_p_256 = {
204     (BN_ULONG *)_nist_p_256[0],
205     BN_NIST_256_TOP,
206     BN_NIST_256_TOP,
207     0,
208     BN_FLG_STATIC_DATA
209 };
210 
211 static const BIGNUM _bignum_nist_p_384 = {
212     (BN_ULONG *)_nist_p_384[0],
213     BN_NIST_384_TOP,
214     BN_NIST_384_TOP,
215     0,
216     BN_FLG_STATIC_DATA
217 };
218 
219 static const BIGNUM _bignum_nist_p_521 = {
220     (BN_ULONG *)_nist_p_521,
221     BN_NIST_521_TOP,
222     BN_NIST_521_TOP,
223     0,
224     BN_FLG_STATIC_DATA
225 };
226 
227 const BIGNUM *BN_get0_nist_prime_192(void)
228 {
229     return &_bignum_nist_p_192;
230 }
231 
232 const BIGNUM *BN_get0_nist_prime_224(void)
233 {
234     return &_bignum_nist_p_224;
235 }
236 
237 const BIGNUM *BN_get0_nist_prime_256(void)
238 {
239     return &_bignum_nist_p_256;
240 }
241 
242 const BIGNUM *BN_get0_nist_prime_384(void)
243 {
244     return &_bignum_nist_p_384;
245 }
246 
247 const BIGNUM *BN_get0_nist_prime_521(void)
248 {
249     return &_bignum_nist_p_521;
250 }
251 
252 /*
253  * To avoid more recent compilers (specifically clang-14) from treating this
254  * code as a violation of the strict aliasing conditions and omiting it, this
255  * cannot be declared as a function.  Moreover, the dst parameter cannot be
256  * cached in a local since this no longer references the union and again falls
257  * foul of the strict aliasing criteria.  Refer to #18225 for the initial
258  * diagnostics and llvm/llvm-project#55255 for the later discussions with the
259  * LLVM developers.  The problem boils down to if an array in the union is
260  * converted to a pointer or if it is used directly.
261  *
262  * This function was inlined regardless, so there is no space cost to be
263  * paid for making it a macro.
264  */
265 #define nist_cp_bn_0(dst, src_in, top, max) \
266 {                                           \
267     int ii;                                 \
268     const BN_ULONG *src = src_in;           \
269                                             \
270     for (ii = 0; ii < top; ii++)            \
271         (dst)[ii] = src[ii];                \
272     for (; ii < max; ii++)                  \
273         (dst)[ii] = 0;                      \
274 }
275 
276 static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
277 {
278     int i;
279 
280     for (i = 0; i < top; i++)
281         dst[i] = src[i];
282 }
283 
284 #if BN_BITS2 == 64
285 # define bn_cp_64(to, n, from, m)        (to)[n] = (m>=0)?((from)[m]):0;
286 # define bn_64_set_0(to, n)              (to)[n] = (BN_ULONG)0;
287 /*
288  * two following macros are implemented under assumption that they
289  * are called in a sequence with *ascending* n, i.e. as they are...
290  */
291 # define bn_cp_32_naked(to, n, from, m)  (((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
292                                                 :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
293 # define bn_32_set_0(to, n)              (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
294 # define bn_cp_32(to,n,from,m)           ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
295 # if defined(L_ENDIAN)
296 #  if defined(__arch64__)
297 #   define NIST_INT64 long
298 #  else
299 #   define NIST_INT64 long long
300 #  endif
301 # endif
302 #else
303 # define bn_cp_64(to, n, from, m) \
304         { \
305         bn_cp_32(to, (n)*2, from, (m)*2); \
306         bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
307         }
308 # define bn_64_set_0(to, n) \
309         { \
310         bn_32_set_0(to, (n)*2); \
311         bn_32_set_0(to, (n)*2+1); \
312         }
313 # define bn_cp_32(to, n, from, m)        (to)[n] = (m>=0)?((from)[m]):0;
314 # define bn_32_set_0(to, n)              (to)[n] = (BN_ULONG)0;
315 # if defined(_WIN32) && !defined(__GNUC__)
316 #  define NIST_INT64 __int64
317 # elif defined(BN_LLONG)
318 #  define NIST_INT64 long long
319 # endif
320 #endif                          /* BN_BITS2 != 64 */
321 
322 #define nist_set_192(to, from, a1, a2, a3) \
323         { \
324         bn_cp_64(to, 0, from, (a3) - 3) \
325         bn_cp_64(to, 1, from, (a2) - 3) \
326         bn_cp_64(to, 2, from, (a1) - 3) \
327         }
328 
329 int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
330                     BN_CTX *ctx)
331 {
332     int top = a->top, i;
333     int carry;
334     register BN_ULONG *r_d, *a_d = a->d;
335     union {
336         BN_ULONG bn[BN_NIST_192_TOP];
337         unsigned int ui[BN_NIST_192_TOP * sizeof(BN_ULONG) /
338                         sizeof(unsigned int)];
339     } buf;
340     BN_ULONG c_d[BN_NIST_192_TOP], *res;
341     PTR_SIZE_INT mask;
342     static const BIGNUM _bignum_nist_p_192_sqr = {
343         (BN_ULONG *)_nist_p_192_sqr,
344         OSSL_NELEM(_nist_p_192_sqr),
345         OSSL_NELEM(_nist_p_192_sqr),
346         0, BN_FLG_STATIC_DATA
347     };
348 
349     field = &_bignum_nist_p_192; /* just to make sure */
350 
351     if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_192_sqr) >= 0)
352         return BN_nnmod(r, a, field, ctx);
353 
354     i = BN_ucmp(field, a);
355     if (i == 0) {
356         BN_zero(r);
357         return 1;
358     } else if (i > 0)
359         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
360 
361     if (r != a) {
362         if (!bn_wexpand(r, BN_NIST_192_TOP))
363             return 0;
364         r_d = r->d;
365         nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
366     } else
367         r_d = a_d;
368 
369     nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP,
370                  BN_NIST_192_TOP);
371 
372 #if defined(NIST_INT64)
373     {
374         NIST_INT64 acc;         /* accumulator */
375         unsigned int *rp = (unsigned int *)r_d;
376         const unsigned int *bp = (const unsigned int *)buf.ui;
377 
378         acc = rp[0];
379         acc += bp[3 * 2 - 6];
380         acc += bp[5 * 2 - 6];
381         rp[0] = (unsigned int)acc;
382         acc >>= 32;
383 
384         acc += rp[1];
385         acc += bp[3 * 2 - 5];
386         acc += bp[5 * 2 - 5];
387         rp[1] = (unsigned int)acc;
388         acc >>= 32;
389 
390         acc += rp[2];
391         acc += bp[3 * 2 - 6];
392         acc += bp[4 * 2 - 6];
393         acc += bp[5 * 2 - 6];
394         rp[2] = (unsigned int)acc;
395         acc >>= 32;
396 
397         acc += rp[3];
398         acc += bp[3 * 2 - 5];
399         acc += bp[4 * 2 - 5];
400         acc += bp[5 * 2 - 5];
401         rp[3] = (unsigned int)acc;
402         acc >>= 32;
403 
404         acc += rp[4];
405         acc += bp[4 * 2 - 6];
406         acc += bp[5 * 2 - 6];
407         rp[4] = (unsigned int)acc;
408         acc >>= 32;
409 
410         acc += rp[5];
411         acc += bp[4 * 2 - 5];
412         acc += bp[5 * 2 - 5];
413         rp[5] = (unsigned int)acc;
414 
415         carry = (int)(acc >> 32);
416     }
417 #else
418     {
419         BN_ULONG t_d[BN_NIST_192_TOP];
420 
421         nist_set_192(t_d, buf.bn, 0, 3, 3);
422         carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
423         nist_set_192(t_d, buf.bn, 4, 4, 0);
424         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
425         nist_set_192(t_d, buf.bn, 5, 5, 5)
426             carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
427     }
428 #endif
429     if (carry > 0)
430         carry =
431             (int)bn_sub_words(r_d, r_d, _nist_p_192[carry - 1],
432                               BN_NIST_192_TOP);
433     else
434         carry = 1;
435 
436     /*
437      * we need 'if (carry==0 || result>=modulus) result-=modulus;'
438      * as comparison implies subtraction, we can write
439      * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
440      * this is what happens below, but without explicit if:-) a.
441      */
442     mask =
443         0 - (PTR_SIZE_INT) bn_sub_words(c_d, r_d, _nist_p_192[0],
444                                         BN_NIST_192_TOP);
445     mask &= 0 - (PTR_SIZE_INT) carry;
446     res = c_d;
447     res = (BN_ULONG *)
448         (((PTR_SIZE_INT) res & ~mask) | ((PTR_SIZE_INT) r_d & mask));
449     nist_cp_bn(r_d, res, BN_NIST_192_TOP);
450     r->top = BN_NIST_192_TOP;
451     bn_correct_top(r);
452 
453     return 1;
454 }
455 
456 typedef BN_ULONG (*bn_addsub_f) (BN_ULONG *, const BN_ULONG *,
457                                  const BN_ULONG *, int);
458 
459 #define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
460         { \
461         bn_cp_32(to, 0, from, (a7) - 7) \
462         bn_cp_32(to, 1, from, (a6) - 7) \
463         bn_cp_32(to, 2, from, (a5) - 7) \
464         bn_cp_32(to, 3, from, (a4) - 7) \
465         bn_cp_32(to, 4, from, (a3) - 7) \
466         bn_cp_32(to, 5, from, (a2) - 7) \
467         bn_cp_32(to, 6, from, (a1) - 7) \
468         }
469 
470 int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
471                     BN_CTX *ctx)
472 {
473     int top = a->top, i;
474     int carry;
475     BN_ULONG *r_d, *a_d = a->d;
476     union {
477         BN_ULONG bn[BN_NIST_224_TOP];
478         unsigned int ui[BN_NIST_224_TOP * sizeof(BN_ULONG) /
479                         sizeof(unsigned int)];
480     } buf;
481     BN_ULONG c_d[BN_NIST_224_TOP], *res;
482     PTR_SIZE_INT mask;
483     union {
484         bn_addsub_f f;
485         PTR_SIZE_INT p;
486     } u;
487     static const BIGNUM _bignum_nist_p_224_sqr = {
488         (BN_ULONG *)_nist_p_224_sqr,
489         OSSL_NELEM(_nist_p_224_sqr),
490         OSSL_NELEM(_nist_p_224_sqr),
491         0, BN_FLG_STATIC_DATA
492     };
493 
494     field = &_bignum_nist_p_224; /* just to make sure */
495 
496     if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_224_sqr) >= 0)
497         return BN_nnmod(r, a, field, ctx);
498 
499     i = BN_ucmp(field, a);
500     if (i == 0) {
501         BN_zero(r);
502         return 1;
503     } else if (i > 0)
504         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
505 
506     if (r != a) {
507         if (!bn_wexpand(r, BN_NIST_224_TOP))
508             return 0;
509         r_d = r->d;
510         nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
511     } else
512         r_d = a_d;
513 
514 #if BN_BITS2==64
515     /* copy upper 256 bits of 448 bit number ... */
516     nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP - 1),
517                  top - (BN_NIST_224_TOP - 1), BN_NIST_224_TOP);
518     /* ... and right shift by 32 to obtain upper 224 bits */
519     nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8);
520     /* truncate lower part to 224 bits too */
521     r_d[BN_NIST_224_TOP - 1] &= BN_MASK2l;
522 #else
523     nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP,
524                  BN_NIST_224_TOP);
525 #endif
526 
527 #if defined(NIST_INT64) && BN_BITS2!=64
528     {
529         NIST_INT64 acc;         /* accumulator */
530         unsigned int *rp = (unsigned int *)r_d;
531         const unsigned int *bp = (const unsigned int *)buf.ui;
532 
533         acc = rp[0];
534         acc -= bp[7 - 7];
535         acc -= bp[11 - 7];
536         rp[0] = (unsigned int)acc;
537         acc >>= 32;
538 
539         acc += rp[1];
540         acc -= bp[8 - 7];
541         acc -= bp[12 - 7];
542         rp[1] = (unsigned int)acc;
543         acc >>= 32;
544 
545         acc += rp[2];
546         acc -= bp[9 - 7];
547         acc -= bp[13 - 7];
548         rp[2] = (unsigned int)acc;
549         acc >>= 32;
550 
551         acc += rp[3];
552         acc += bp[7 - 7];
553         acc += bp[11 - 7];
554         acc -= bp[10 - 7];
555         rp[3] = (unsigned int)acc;
556         acc >>= 32;
557 
558         acc += rp[4];
559         acc += bp[8 - 7];
560         acc += bp[12 - 7];
561         acc -= bp[11 - 7];
562         rp[4] = (unsigned int)acc;
563         acc >>= 32;
564 
565         acc += rp[5];
566         acc += bp[9 - 7];
567         acc += bp[13 - 7];
568         acc -= bp[12 - 7];
569         rp[5] = (unsigned int)acc;
570         acc >>= 32;
571 
572         acc += rp[6];
573         acc += bp[10 - 7];
574         acc -= bp[13 - 7];
575         rp[6] = (unsigned int)acc;
576 
577         carry = (int)(acc >> 32);
578 # if BN_BITS2==64
579         rp[7] = carry;
580 # endif
581     }
582 #else
583     {
584         BN_ULONG t_d[BN_NIST_224_TOP];
585 
586         nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0);
587         carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
588         nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0);
589         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
590         nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7);
591         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
592         nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11);
593         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
594 
595 # if BN_BITS2==64
596         carry = (int)(r_d[BN_NIST_224_TOP - 1] >> 32);
597 # endif
598     }
599 #endif
600     u.f = bn_sub_words;
601     if (carry > 0) {
602         carry =
603             (int)bn_sub_words(r_d, r_d, _nist_p_224[carry - 1],
604                               BN_NIST_224_TOP);
605 #if BN_BITS2==64
606         carry = (int)(~(r_d[BN_NIST_224_TOP - 1] >> 32)) & 1;
607 #endif
608     } else if (carry < 0) {
609         /*
610          * it's a bit more complicated logic in this case. if bn_add_words
611          * yields no carry, then result has to be adjusted by unconditionally
612          * *adding* the modulus. but if it does, then result has to be
613          * compared to the modulus and conditionally adjusted by
614          * *subtracting* the latter.
615          */
616         carry =
617             (int)bn_add_words(r_d, r_d, _nist_p_224[-carry - 1],
618                               BN_NIST_224_TOP);
619         mask = 0 - (PTR_SIZE_INT) carry;
620         u.p = ((PTR_SIZE_INT) bn_sub_words & mask) |
621             ((PTR_SIZE_INT) bn_add_words & ~mask);
622     } else
623         carry = 1;
624 
625     /* otherwise it's effectively same as in BN_nist_mod_192... */
626     mask =
627         0 - (PTR_SIZE_INT) (*u.f) (c_d, r_d, _nist_p_224[0], BN_NIST_224_TOP);
628     mask &= 0 - (PTR_SIZE_INT) carry;
629     res = c_d;
630     res = (BN_ULONG *)(((PTR_SIZE_INT) res & ~mask) |
631                        ((PTR_SIZE_INT) r_d & mask));
632     nist_cp_bn(r_d, res, BN_NIST_224_TOP);
633     r->top = BN_NIST_224_TOP;
634     bn_correct_top(r);
635 
636     return 1;
637 }
638 
639 #define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
640         { \
641         bn_cp_32(to, 0, from, (a8) - 8) \
642         bn_cp_32(to, 1, from, (a7) - 8) \
643         bn_cp_32(to, 2, from, (a6) - 8) \
644         bn_cp_32(to, 3, from, (a5) - 8) \
645         bn_cp_32(to, 4, from, (a4) - 8) \
646         bn_cp_32(to, 5, from, (a3) - 8) \
647         bn_cp_32(to, 6, from, (a2) - 8) \
648         bn_cp_32(to, 7, from, (a1) - 8) \
649         }
650 
651 int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
652                     BN_CTX *ctx)
653 {
654     int i, top = a->top;
655     int carry = 0;
656     register BN_ULONG *a_d = a->d, *r_d;
657     union {
658         BN_ULONG bn[BN_NIST_256_TOP];
659         unsigned int ui[BN_NIST_256_TOP * sizeof(BN_ULONG) /
660                         sizeof(unsigned int)];
661     } buf;
662     BN_ULONG c_d[BN_NIST_256_TOP], *res;
663     PTR_SIZE_INT mask;
664     union {
665         bn_addsub_f f;
666         PTR_SIZE_INT p;
667     } u;
668     static const BIGNUM _bignum_nist_p_256_sqr = {
669         (BN_ULONG *)_nist_p_256_sqr,
670         OSSL_NELEM(_nist_p_256_sqr),
671         OSSL_NELEM(_nist_p_256_sqr),
672         0, BN_FLG_STATIC_DATA
673     };
674 
675     field = &_bignum_nist_p_256; /* just to make sure */
676 
677     if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_256_sqr) >= 0)
678         return BN_nnmod(r, a, field, ctx);
679 
680     i = BN_ucmp(field, a);
681     if (i == 0) {
682         BN_zero(r);
683         return 1;
684     } else if (i > 0)
685         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
686 
687     if (r != a) {
688         if (!bn_wexpand(r, BN_NIST_256_TOP))
689             return 0;
690         r_d = r->d;
691         nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
692     } else
693         r_d = a_d;
694 
695     nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP, top - BN_NIST_256_TOP,
696                  BN_NIST_256_TOP);
697 
698 #if defined(NIST_INT64)
699     {
700         NIST_INT64 acc;         /* accumulator */
701         unsigned int *rp = (unsigned int *)r_d;
702         const unsigned int *bp = (const unsigned int *)buf.ui;
703 
704         acc = rp[0];
705         acc += bp[8 - 8];
706         acc += bp[9 - 8];
707         acc -= bp[11 - 8];
708         acc -= bp[12 - 8];
709         acc -= bp[13 - 8];
710         acc -= bp[14 - 8];
711         rp[0] = (unsigned int)acc;
712         acc >>= 32;
713 
714         acc += rp[1];
715         acc += bp[9 - 8];
716         acc += bp[10 - 8];
717         acc -= bp[12 - 8];
718         acc -= bp[13 - 8];
719         acc -= bp[14 - 8];
720         acc -= bp[15 - 8];
721         rp[1] = (unsigned int)acc;
722         acc >>= 32;
723 
724         acc += rp[2];
725         acc += bp[10 - 8];
726         acc += bp[11 - 8];
727         acc -= bp[13 - 8];
728         acc -= bp[14 - 8];
729         acc -= bp[15 - 8];
730         rp[2] = (unsigned int)acc;
731         acc >>= 32;
732 
733         acc += rp[3];
734         acc += bp[11 - 8];
735         acc += bp[11 - 8];
736         acc += bp[12 - 8];
737         acc += bp[12 - 8];
738         acc += bp[13 - 8];
739         acc -= bp[15 - 8];
740         acc -= bp[8 - 8];
741         acc -= bp[9 - 8];
742         rp[3] = (unsigned int)acc;
743         acc >>= 32;
744 
745         acc += rp[4];
746         acc += bp[12 - 8];
747         acc += bp[12 - 8];
748         acc += bp[13 - 8];
749         acc += bp[13 - 8];
750         acc += bp[14 - 8];
751         acc -= bp[9 - 8];
752         acc -= bp[10 - 8];
753         rp[4] = (unsigned int)acc;
754         acc >>= 32;
755 
756         acc += rp[5];
757         acc += bp[13 - 8];
758         acc += bp[13 - 8];
759         acc += bp[14 - 8];
760         acc += bp[14 - 8];
761         acc += bp[15 - 8];
762         acc -= bp[10 - 8];
763         acc -= bp[11 - 8];
764         rp[5] = (unsigned int)acc;
765         acc >>= 32;
766 
767         acc += rp[6];
768         acc += bp[14 - 8];
769         acc += bp[14 - 8];
770         acc += bp[15 - 8];
771         acc += bp[15 - 8];
772         acc += bp[14 - 8];
773         acc += bp[13 - 8];
774         acc -= bp[8 - 8];
775         acc -= bp[9 - 8];
776         rp[6] = (unsigned int)acc;
777         acc >>= 32;
778 
779         acc += rp[7];
780         acc += bp[15 - 8];
781         acc += bp[15 - 8];
782         acc += bp[15 - 8];
783         acc += bp[8 - 8];
784         acc -= bp[10 - 8];
785         acc -= bp[11 - 8];
786         acc -= bp[12 - 8];
787         acc -= bp[13 - 8];
788         rp[7] = (unsigned int)acc;
789 
790         carry = (int)(acc >> 32);
791     }
792 #else
793     {
794         BN_ULONG t_d[BN_NIST_256_TOP];
795 
796         /*
797          * S1
798          */
799         nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
800         /*
801          * S2
802          */
803         nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
804         carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
805         /* left shift */
806         {
807             register BN_ULONG *ap, t, c;
808             ap = t_d;
809             c = 0;
810             for (i = BN_NIST_256_TOP; i != 0; --i) {
811                 t = *ap;
812                 *(ap++) = ((t << 1) | c) & BN_MASK2;
813                 c = (t & BN_TBIT) ? 1 : 0;
814             }
815             carry <<= 1;
816             carry |= c;
817         }
818         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
819         /*
820          * S3
821          */
822         nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
823         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
824         /*
825          * S4
826          */
827         nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
828         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
829         /*
830          * D1
831          */
832         nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
833         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
834         /*
835          * D2
836          */
837         nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
838         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
839         /*
840          * D3
841          */
842         nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
843         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
844         /*
845          * D4
846          */
847         nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
848         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
849 
850     }
851 #endif
852     /* see BN_nist_mod_224 for explanation */
853     u.f = bn_sub_words;
854     if (carry > 0)
855         carry =
856             (int)bn_sub_words(r_d, r_d, _nist_p_256[carry - 1],
857                               BN_NIST_256_TOP);
858     else if (carry < 0) {
859         carry =
860             (int)bn_add_words(r_d, r_d, _nist_p_256[-carry - 1],
861                               BN_NIST_256_TOP);
862         mask = 0 - (PTR_SIZE_INT) carry;
863         u.p = ((PTR_SIZE_INT) bn_sub_words & mask) |
864             ((PTR_SIZE_INT) bn_add_words & ~mask);
865     } else
866         carry = 1;
867 
868     mask =
869         0 - (PTR_SIZE_INT) (*u.f) (c_d, r_d, _nist_p_256[0], BN_NIST_256_TOP);
870     mask &= 0 - (PTR_SIZE_INT) carry;
871     res = c_d;
872     res = (BN_ULONG *)(((PTR_SIZE_INT) res & ~mask) |
873                        ((PTR_SIZE_INT) r_d & mask));
874     nist_cp_bn(r_d, res, BN_NIST_256_TOP);
875     r->top = BN_NIST_256_TOP;
876     bn_correct_top(r);
877 
878     return 1;
879 }
880 
881 #define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
882         { \
883         bn_cp_32(to, 0, from,  (a12) - 12) \
884         bn_cp_32(to, 1, from,  (a11) - 12) \
885         bn_cp_32(to, 2, from,  (a10) - 12) \
886         bn_cp_32(to, 3, from,  (a9) - 12)  \
887         bn_cp_32(to, 4, from,  (a8) - 12)  \
888         bn_cp_32(to, 5, from,  (a7) - 12)  \
889         bn_cp_32(to, 6, from,  (a6) - 12)  \
890         bn_cp_32(to, 7, from,  (a5) - 12)  \
891         bn_cp_32(to, 8, from,  (a4) - 12)  \
892         bn_cp_32(to, 9, from,  (a3) - 12)  \
893         bn_cp_32(to, 10, from, (a2) - 12)  \
894         bn_cp_32(to, 11, from, (a1) - 12)  \
895         }
896 
897 int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
898                     BN_CTX *ctx)
899 {
900     int i, top = a->top;
901     int carry = 0;
902     register BN_ULONG *r_d, *a_d = a->d;
903     union {
904         BN_ULONG bn[BN_NIST_384_TOP];
905         unsigned int ui[BN_NIST_384_TOP * sizeof(BN_ULONG) /
906                         sizeof(unsigned int)];
907     } buf;
908     BN_ULONG c_d[BN_NIST_384_TOP], *res;
909     PTR_SIZE_INT mask;
910     union {
911         bn_addsub_f f;
912         PTR_SIZE_INT p;
913     } u;
914     static const BIGNUM _bignum_nist_p_384_sqr = {
915         (BN_ULONG *)_nist_p_384_sqr,
916         OSSL_NELEM(_nist_p_384_sqr),
917         OSSL_NELEM(_nist_p_384_sqr),
918         0, BN_FLG_STATIC_DATA
919     };
920 
921     field = &_bignum_nist_p_384; /* just to make sure */
922 
923     if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_384_sqr) >= 0)
924         return BN_nnmod(r, a, field, ctx);
925 
926     i = BN_ucmp(field, a);
927     if (i == 0) {
928         BN_zero(r);
929         return 1;
930     } else if (i > 0)
931         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
932 
933     if (r != a) {
934         if (!bn_wexpand(r, BN_NIST_384_TOP))
935             return 0;
936         r_d = r->d;
937         nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
938     } else
939         r_d = a_d;
940 
941     nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP, top - BN_NIST_384_TOP,
942                  BN_NIST_384_TOP);
943 
944 #if defined(NIST_INT64)
945     {
946         NIST_INT64 acc;         /* accumulator */
947         unsigned int *rp = (unsigned int *)r_d;
948         const unsigned int *bp = (const unsigned int *)buf.ui;
949 
950         acc = rp[0];
951         acc += bp[12 - 12];
952         acc += bp[21 - 12];
953         acc += bp[20 - 12];
954         acc -= bp[23 - 12];
955         rp[0] = (unsigned int)acc;
956         acc >>= 32;
957 
958         acc += rp[1];
959         acc += bp[13 - 12];
960         acc += bp[22 - 12];
961         acc += bp[23 - 12];
962         acc -= bp[12 - 12];
963         acc -= bp[20 - 12];
964         rp[1] = (unsigned int)acc;
965         acc >>= 32;
966 
967         acc += rp[2];
968         acc += bp[14 - 12];
969         acc += bp[23 - 12];
970         acc -= bp[13 - 12];
971         acc -= bp[21 - 12];
972         rp[2] = (unsigned int)acc;
973         acc >>= 32;
974 
975         acc += rp[3];
976         acc += bp[15 - 12];
977         acc += bp[12 - 12];
978         acc += bp[20 - 12];
979         acc += bp[21 - 12];
980         acc -= bp[14 - 12];
981         acc -= bp[22 - 12];
982         acc -= bp[23 - 12];
983         rp[3] = (unsigned int)acc;
984         acc >>= 32;
985 
986         acc += rp[4];
987         acc += bp[21 - 12];
988         acc += bp[21 - 12];
989         acc += bp[16 - 12];
990         acc += bp[13 - 12];
991         acc += bp[12 - 12];
992         acc += bp[20 - 12];
993         acc += bp[22 - 12];
994         acc -= bp[15 - 12];
995         acc -= bp[23 - 12];
996         acc -= bp[23 - 12];
997         rp[4] = (unsigned int)acc;
998         acc >>= 32;
999 
1000         acc += rp[5];
1001         acc += bp[22 - 12];
1002         acc += bp[22 - 12];
1003         acc += bp[17 - 12];
1004         acc += bp[14 - 12];
1005         acc += bp[13 - 12];
1006         acc += bp[21 - 12];
1007         acc += bp[23 - 12];
1008         acc -= bp[16 - 12];
1009         rp[5] = (unsigned int)acc;
1010         acc >>= 32;
1011 
1012         acc += rp[6];
1013         acc += bp[23 - 12];
1014         acc += bp[23 - 12];
1015         acc += bp[18 - 12];
1016         acc += bp[15 - 12];
1017         acc += bp[14 - 12];
1018         acc += bp[22 - 12];
1019         acc -= bp[17 - 12];
1020         rp[6] = (unsigned int)acc;
1021         acc >>= 32;
1022 
1023         acc += rp[7];
1024         acc += bp[19 - 12];
1025         acc += bp[16 - 12];
1026         acc += bp[15 - 12];
1027         acc += bp[23 - 12];
1028         acc -= bp[18 - 12];
1029         rp[7] = (unsigned int)acc;
1030         acc >>= 32;
1031 
1032         acc += rp[8];
1033         acc += bp[20 - 12];
1034         acc += bp[17 - 12];
1035         acc += bp[16 - 12];
1036         acc -= bp[19 - 12];
1037         rp[8] = (unsigned int)acc;
1038         acc >>= 32;
1039 
1040         acc += rp[9];
1041         acc += bp[21 - 12];
1042         acc += bp[18 - 12];
1043         acc += bp[17 - 12];
1044         acc -= bp[20 - 12];
1045         rp[9] = (unsigned int)acc;
1046         acc >>= 32;
1047 
1048         acc += rp[10];
1049         acc += bp[22 - 12];
1050         acc += bp[19 - 12];
1051         acc += bp[18 - 12];
1052         acc -= bp[21 - 12];
1053         rp[10] = (unsigned int)acc;
1054         acc >>= 32;
1055 
1056         acc += rp[11];
1057         acc += bp[23 - 12];
1058         acc += bp[20 - 12];
1059         acc += bp[19 - 12];
1060         acc -= bp[22 - 12];
1061         rp[11] = (unsigned int)acc;
1062 
1063         carry = (int)(acc >> 32);
1064     }
1065 #else
1066     {
1067         BN_ULONG t_d[BN_NIST_384_TOP];
1068 
1069         /*
1070          * S1
1071          */
1072         nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23 - 4, 22 - 4, 21 - 4);
1073         /* left shift */
1074         {
1075             register BN_ULONG *ap, t, c;
1076             ap = t_d;
1077             c = 0;
1078             for (i = 3; i != 0; --i) {
1079                 t = *ap;
1080                 *(ap++) = ((t << 1) | c) & BN_MASK2;
1081                 c = (t & BN_TBIT) ? 1 : 0;
1082             }
1083             *ap = c;
1084         }
1085         carry =
1086             (int)bn_add_words(r_d + (128 / BN_BITS2), r_d + (128 / BN_BITS2),
1087                               t_d, BN_NIST_256_TOP);
1088         /*
1089          * S2
1090          */
1091         carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
1092         /*
1093          * S3
1094          */
1095         nist_set_384(t_d, buf.bn, 20, 19, 18, 17, 16, 15, 14, 13, 12, 23, 22,
1096                      21);
1097         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1098         /*
1099          * S4
1100          */
1101         nist_set_384(t_d, buf.bn, 19, 18, 17, 16, 15, 14, 13, 12, 20, 0, 23,
1102                      0);
1103         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1104         /*
1105          * S5
1106          */
1107         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 23, 22, 21, 20, 0, 0, 0, 0);
1108         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1109         /*
1110          * S6
1111          */
1112         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 23, 22, 21, 0, 0, 20);
1113         carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1114         /*
1115          * D1
1116          */
1117         nist_set_384(t_d, buf.bn, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12,
1118                      23);
1119         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1120         /*
1121          * D2
1122          */
1123         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 22, 21, 20, 0);
1124         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1125         /*
1126          * D3
1127          */
1128         nist_set_384(t_d, buf.bn, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 0, 0);
1129         carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1130 
1131     }
1132 #endif
1133     /* see BN_nist_mod_224 for explanation */
1134     u.f = bn_sub_words;
1135     if (carry > 0)
1136         carry =
1137             (int)bn_sub_words(r_d, r_d, _nist_p_384[carry - 1],
1138                               BN_NIST_384_TOP);
1139     else if (carry < 0) {
1140         carry =
1141             (int)bn_add_words(r_d, r_d, _nist_p_384[-carry - 1],
1142                               BN_NIST_384_TOP);
1143         mask = 0 - (PTR_SIZE_INT) carry;
1144         u.p = ((PTR_SIZE_INT) bn_sub_words & mask) |
1145             ((PTR_SIZE_INT) bn_add_words & ~mask);
1146     } else
1147         carry = 1;
1148 
1149     mask =
1150         0 - (PTR_SIZE_INT) (*u.f) (c_d, r_d, _nist_p_384[0], BN_NIST_384_TOP);
1151     mask &= 0 - (PTR_SIZE_INT) carry;
1152     res = c_d;
1153     res = (BN_ULONG *)(((PTR_SIZE_INT) res & ~mask) |
1154                        ((PTR_SIZE_INT) r_d & mask));
1155     nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1156     r->top = BN_NIST_384_TOP;
1157     bn_correct_top(r);
1158 
1159     return 1;
1160 }
1161 
1162 #define BN_NIST_521_RSHIFT      (521%BN_BITS2)
1163 #define BN_NIST_521_LSHIFT      (BN_BITS2-BN_NIST_521_RSHIFT)
1164 #define BN_NIST_521_TOP_MASK    ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
1165 
1166 int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
1167                     BN_CTX *ctx)
1168 {
1169     int top = a->top, i;
1170     BN_ULONG *r_d, *a_d = a->d, t_d[BN_NIST_521_TOP], val, tmp, *res;
1171     PTR_SIZE_INT mask;
1172     static const BIGNUM _bignum_nist_p_521_sqr = {
1173         (BN_ULONG *)_nist_p_521_sqr,
1174         OSSL_NELEM(_nist_p_521_sqr),
1175         OSSL_NELEM(_nist_p_521_sqr),
1176         0, BN_FLG_STATIC_DATA
1177     };
1178 
1179     field = &_bignum_nist_p_521; /* just to make sure */
1180 
1181     if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_521_sqr) >= 0)
1182         return BN_nnmod(r, a, field, ctx);
1183 
1184     i = BN_ucmp(field, a);
1185     if (i == 0) {
1186         BN_zero(r);
1187         return 1;
1188     } else if (i > 0)
1189         return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1190 
1191     if (r != a) {
1192         if (!bn_wexpand(r, BN_NIST_521_TOP))
1193             return 0;
1194         r_d = r->d;
1195         nist_cp_bn(r_d, a_d, BN_NIST_521_TOP);
1196     } else
1197         r_d = a_d;
1198 
1199     /* upper 521 bits, copy ... */
1200     nist_cp_bn_0(t_d, a_d + (BN_NIST_521_TOP - 1),
1201                  top - (BN_NIST_521_TOP - 1), BN_NIST_521_TOP);
1202     /* ... and right shift */
1203     for (val = t_d[0], i = 0; i < BN_NIST_521_TOP - 1; i++) {
1204 #if 0
1205         /*
1206          * MSC ARM compiler [version 2013, presumably even earlier,
1207          * much earlier] miscompiles this code, but not one in
1208          * #else section. See RT#3541.
1209          */
1210         tmp = val >> BN_NIST_521_RSHIFT;
1211         val = t_d[i + 1];
1212         t_d[i] = (tmp | val << BN_NIST_521_LSHIFT) & BN_MASK2;
1213 #else
1214         t_d[i] = (val >> BN_NIST_521_RSHIFT |
1215                   (tmp = t_d[i + 1]) << BN_NIST_521_LSHIFT) & BN_MASK2;
1216         val = tmp;
1217 #endif
1218     }
1219     t_d[i] = val >> BN_NIST_521_RSHIFT;
1220     /* lower 521 bits */
1221     r_d[i] &= BN_NIST_521_TOP_MASK;
1222 
1223     bn_add_words(r_d, r_d, t_d, BN_NIST_521_TOP);
1224     mask =
1225         0 - (PTR_SIZE_INT) bn_sub_words(t_d, r_d, _nist_p_521,
1226                                         BN_NIST_521_TOP);
1227     res = t_d;
1228     res = (BN_ULONG *)(((PTR_SIZE_INT) res & ~mask) |
1229                        ((PTR_SIZE_INT) r_d & mask));
1230     nist_cp_bn(r_d, res, BN_NIST_521_TOP);
1231     r->top = BN_NIST_521_TOP;
1232     bn_correct_top(r);
1233 
1234     return 1;
1235 }
1236 
1237 int (*BN_nist_mod_func(const BIGNUM *p)) (BIGNUM *r, const BIGNUM *a,
1238                                           const BIGNUM *field, BN_CTX *ctx) {
1239     if (BN_ucmp(&_bignum_nist_p_192, p) == 0)
1240         return BN_nist_mod_192;
1241     if (BN_ucmp(&_bignum_nist_p_224, p) == 0)
1242         return BN_nist_mod_224;
1243     if (BN_ucmp(&_bignum_nist_p_256, p) == 0)
1244         return BN_nist_mod_256;
1245     if (BN_ucmp(&_bignum_nist_p_384, p) == 0)
1246         return BN_nist_mod_384;
1247     if (BN_ucmp(&_bignum_nist_p_521, p) == 0)
1248         return BN_nist_mod_521;
1249     return 0;
1250 }
1251