xref: /dragonfly/crypto/libressl/crypto/bn/bn_nist.c (revision 6f5ec8b5)
1 /* $OpenBSD: bn_nist.c,v 1.22 2022/07/31 14:38:38 jsing Exp $ */
2 /*
3  * Written by Nils Larsch for the OpenSSL project
4  */
5 /* ====================================================================
6  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  *
20  * 3. All advertising materials mentioning features or use of this
21  *    software must display the following acknowledgment:
22  *    "This product includes software developed by the OpenSSL Project
23  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
24  *
25  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26  *    endorse or promote products derived from this software without
27  *    prior written permission. For written permission, please contact
28  *    openssl-core@openssl.org.
29  *
30  * 5. Products derived from this software may not be called "OpenSSL"
31  *    nor may "OpenSSL" appear in their names without prior written
32  *    permission of the OpenSSL Project.
33  *
34  * 6. Redistributions of any form whatsoever must retain the following
35  *    acknowledgment:
36  *    "This product includes software developed by the OpenSSL Project
37  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
38  *
39  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
43  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50  * OF THE POSSIBILITY OF SUCH DAMAGE.
51  * ====================================================================
52  *
53  * This product includes cryptographic software written by Eric Young
54  * (eay@cryptsoft.com).  This product includes software written by Tim
55  * Hudson (tjh@cryptsoft.com).
56  *
57  */
58 
59 #include <endian.h>
60 #include <stdint.h>
61 #include <string.h>
62 
63 #include "bn_lcl.h"
64 
65 #define CTASSERT(x)	extern char  _ctassert[(x) ? 1 : -1 ]   \
66 			    __attribute__((__unused__))
67 
68 #define BN_NIST_192_TOP	(192+BN_BITS2-1)/BN_BITS2
69 #define BN_NIST_224_TOP	(224+BN_BITS2-1)/BN_BITS2
70 #define BN_NIST_256_TOP	(256+BN_BITS2-1)/BN_BITS2
71 #define BN_NIST_384_TOP	(384+BN_BITS2-1)/BN_BITS2
72 #define BN_NIST_521_TOP	(521+BN_BITS2-1)/BN_BITS2
73 
74 /* pre-computed tables are "carry-less" values of modulus*(i+1) */
75 #if BN_BITS2 == 64
76 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
77 	{0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL},
78 	{0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL},
79 	{0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL}
80 };
81 static const BN_ULONG _nist_p_192_sqr[] = {
82 	0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000001ULL,
83 	0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL
84 };
85 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
86 	{
87 		0x0000000000000001ULL, 0xFFFFFFFF00000000ULL,
88 		0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL
89 	},
90 	{
91 		0x0000000000000002ULL, 0xFFFFFFFE00000000ULL,
92 		0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFFULL
93 	} /* this one is "carry-full" */
94 };
95 static const BN_ULONG _nist_p_224_sqr[] = {
96 	0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
97 	0xFFFFFFFFFFFFFFFFULL, 0x0000000200000000ULL,
98 	0x0000000000000000ULL, 0xFFFFFFFFFFFFFFFEULL,
99 	0xFFFFFFFFFFFFFFFFULL
100 };
101 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
102 	{
103 		0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL,
104 		0x0000000000000000ULL, 0xFFFFFFFF00000001ULL
105 	},
106 	{
107 		0xFFFFFFFFFFFFFFFEULL, 0x00000001FFFFFFFFULL,
108 		0x0000000000000000ULL, 0xFFFFFFFE00000002ULL
109 	},
110 	{
111 		0xFFFFFFFFFFFFFFFDULL, 0x00000002FFFFFFFFULL,
112 		0x0000000000000000ULL, 0xFFFFFFFD00000003ULL
113 	},
114 	{
115 		0xFFFFFFFFFFFFFFFCULL, 0x00000003FFFFFFFFULL,
116 		0x0000000000000000ULL, 0xFFFFFFFC00000004ULL
117 	},
118 	{
119 		0xFFFFFFFFFFFFFFFBULL, 0x00000004FFFFFFFFULL,
120 		0x0000000000000000ULL, 0xFFFFFFFB00000005ULL
121 	},
122 };
123 static const BN_ULONG _nist_p_256_sqr[] = {
124 	0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
125 	0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFEULL,
126 	0x00000001FFFFFFFEULL, 0x00000001FFFFFFFEULL,
127 	0xFFFFFFFE00000001ULL, 0xFFFFFFFE00000002ULL
128 };
129 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
130 	{
131 		0x00000000FFFFFFFFULL, 0xFFFFFFFF00000000ULL,
132 		0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL,
133 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
134 	},
135 	{
136 		0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL,
137 		0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL,
138 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
139 	},
140 	{
141 		0x00000002FFFFFFFDULL, 0xFFFFFFFD00000000ULL,
142 		0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL,
143 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
144 	},
145 	{
146 		0x00000003FFFFFFFCULL, 0xFFFFFFFC00000000ULL,
147 		0xFFFFFFFFFFFFFFFBULL, 0xFFFFFFFFFFFFFFFFULL,
148 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
149 	},
150 	{
151 		0x00000004FFFFFFFBULL, 0xFFFFFFFB00000000ULL,
152 		0xFFFFFFFFFFFFFFFAULL, 0xFFFFFFFFFFFFFFFFULL,
153 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
154 	},
155 };
156 static const BN_ULONG _nist_p_384_sqr[] = {
157 	0xFFFFFFFE00000001ULL, 0x0000000200000000ULL, 0xFFFFFFFE00000000ULL,
158 	0x0000000200000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL,
159 	0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
160 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
161 };
162 static const BN_ULONG _nist_p_521[] = {
163 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
164 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
165 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0x00000000000001FFULL
166 };
167 static const BN_ULONG _nist_p_521_sqr[] = {
168 	0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
169 	0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
170 	0x0000000000000000ULL, 0x0000000000000000ULL, 0xFFFFFFFFFFFFFC00ULL,
171 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
172 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
173 	0xFFFFFFFFFFFFFFFFULL, 0x000000000003FFFFULL
174 };
175 #elif BN_BITS2 == 32
176 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
177 	{
178 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
179 		0xFFFFFFFF, 0xFFFFFFFF
180 	},
181 	{
182 		0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF,
183 		0xFFFFFFFF, 0xFFFFFFFF
184 	},
185 	{
186 		0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF,
187 		0xFFFFFFFF, 0xFFFFFFFF
188 	}
189 };
190 static const BN_ULONG _nist_p_192_sqr[] = {
191 	0x00000001, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000000,
192 	0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
193 };
194 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
195 	{
196 		0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF,
197 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
198 	},
199 	{
200 		0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE,
201 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
202 	}
203 };
204 static const BN_ULONG _nist_p_224_sqr[] = {
205 	0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
206 	0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000002,
207 	0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF,
208 	0xFFFFFFFF, 0xFFFFFFFF
209 };
210 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
211 	{
212 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
213 		0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF
214 	},
215 	{
216 		0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001,
217 		0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE
218 	},
219 	{
220 		0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002,
221 		0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD
222 	},
223 	{
224 		0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003,
225 		0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC
226 	},
227 	{
228 		0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004,
229 		0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB
230 	},
231 };
232 static const BN_ULONG _nist_p_256_sqr[] = {
233 	0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
234 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000001,
235 	0xFFFFFFFE, 0x00000001, 0xFFFFFFFE, 0x00000001,
236 	0x00000001, 0xFFFFFFFE, 0x00000002, 0xFFFFFFFE
237 };
238 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
239 	{
240 		0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF,
241 		0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
242 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
243 	},
244 	{
245 		0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE,
246 		0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
247 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
248 	},
249 	{
250 		0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD,
251 		0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
252 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
253 	},
254 	{
255 		0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC,
256 		0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
257 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
258 	},
259 	{
260 		0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB,
261 		0xFFFFFFFA, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
262 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
263 	},
264 };
265 static const BN_ULONG _nist_p_384_sqr[] = {
266 	0x00000001, 0xFFFFFFFE, 0x00000000, 0x00000002, 0x00000000, 0xFFFFFFFE,
267 	0x00000000, 0x00000002, 0x00000001, 0x00000000, 0x00000000, 0x00000000,
268 	0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
269 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
270 };
271 static const BN_ULONG _nist_p_521[] = {
272 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
273 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
274 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
275 	0xFFFFFFFF, 0x000001FF
276 };
277 static const BN_ULONG _nist_p_521_sqr[] = {
278 	0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
279 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
280 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFC00, 0xFFFFFFFF,
281 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
282 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
283 	0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFFF
284 };
285 #else
286 #error "unsupported BN_BITS2"
287 #endif
288 
289 static const BIGNUM _bignum_nist_p_192 = {
290 	.d = (BN_ULONG *)_nist_p_192[0],
291 	.top = BN_NIST_192_TOP,
292 	.dmax = BN_NIST_192_TOP,
293 	.neg = 0,
294 	.flags = BN_FLG_STATIC_DATA,
295 };
296 
297 static const BIGNUM _bignum_nist_p_192_sqr = {
298 	.d = (BN_ULONG *)_nist_p_192_sqr,
299 	.top = sizeof(_nist_p_192_sqr) / sizeof(_nist_p_192_sqr[0]),
300 	.dmax = sizeof(_nist_p_192_sqr) / sizeof(_nist_p_192_sqr[0]),
301 	.neg = 0,
302 	.flags = BN_FLG_STATIC_DATA,
303 };
304 
305 static const BIGNUM _bignum_nist_p_224 = {
306 	.d = (BN_ULONG *)_nist_p_224[0],
307 	.top = BN_NIST_224_TOP,
308 	.dmax = BN_NIST_224_TOP,
309 	.neg = 0,
310 	.flags = BN_FLG_STATIC_DATA,
311 };
312 
313 static const BIGNUM _bignum_nist_p_224_sqr = {
314 	.d = (BN_ULONG *)_nist_p_224_sqr,
315 	.top = sizeof(_nist_p_224_sqr) / sizeof(_nist_p_224_sqr[0]),
316 	.dmax = sizeof(_nist_p_224_sqr) / sizeof(_nist_p_224_sqr[0]),
317 	.neg = 0,
318 	.flags = BN_FLG_STATIC_DATA,
319 };
320 
321 static const BIGNUM _bignum_nist_p_256 = {
322 	.d = (BN_ULONG *)_nist_p_256[0],
323 	.top = BN_NIST_256_TOP,
324 	.dmax = BN_NIST_256_TOP,
325 	.neg = 0,
326 	.flags = BN_FLG_STATIC_DATA,
327 };
328 
329 static const BIGNUM _bignum_nist_p_256_sqr = {
330 	.d = (BN_ULONG *)_nist_p_256_sqr,
331 	.top = sizeof(_nist_p_256_sqr) / sizeof(_nist_p_256_sqr[0]),
332 	.dmax = sizeof(_nist_p_256_sqr) / sizeof(_nist_p_256_sqr[0]),
333 	.neg = 0,
334 	.flags = BN_FLG_STATIC_DATA,
335 };
336 
337 static const BIGNUM _bignum_nist_p_384 = {
338 	.d = (BN_ULONG *)_nist_p_384[0],
339 	.top = BN_NIST_384_TOP,
340 	.dmax = BN_NIST_384_TOP,
341 	.neg = 0,
342 	.flags = BN_FLG_STATIC_DATA,
343 };
344 
345 static const BIGNUM _bignum_nist_p_384_sqr = {
346 	.d = (BN_ULONG *)_nist_p_384_sqr,
347 	.top = sizeof(_nist_p_384_sqr) / sizeof(_nist_p_384_sqr[0]),
348 	.dmax = sizeof(_nist_p_384_sqr) / sizeof(_nist_p_384_sqr[0]),
349 	.neg = 0,
350 	.flags = BN_FLG_STATIC_DATA,
351 };
352 
353 static const BIGNUM _bignum_nist_p_521 = {
354 	.d = (BN_ULONG *)_nist_p_521,
355 	.top = BN_NIST_521_TOP,
356 	.dmax = BN_NIST_521_TOP,
357 	.neg = 0,
358 	.flags = BN_FLG_STATIC_DATA,
359 };
360 
361 static const BIGNUM _bignum_nist_p_521_sqr = {
362 	.d = (BN_ULONG *)_nist_p_521_sqr,
363 	.top = sizeof(_nist_p_521_sqr) / sizeof(_nist_p_521_sqr[0]),
364 	.dmax = sizeof(_nist_p_521_sqr) / sizeof(_nist_p_521_sqr[0]),
365 	.neg = 0,
366 	.flags = BN_FLG_STATIC_DATA,
367 };
368 
369 const BIGNUM *
370 BN_get0_nist_prime_192(void)
371 {
372 	return &_bignum_nist_p_192;
373 }
374 
375 const BIGNUM *
376 BN_get0_nist_prime_224(void)
377 {
378 	return &_bignum_nist_p_224;
379 }
380 
381 const BIGNUM *
382 BN_get0_nist_prime_256(void)
383 {
384 	return &_bignum_nist_p_256;
385 }
386 
387 const BIGNUM *
388 BN_get0_nist_prime_384(void)
389 {
390 	return &_bignum_nist_p_384;
391 }
392 
393 const BIGNUM *
394 BN_get0_nist_prime_521(void)
395 {
396 	return &_bignum_nist_p_521;
397 }
398 
399 static void
400 nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max)
401 {
402 	int i;
403 
404 #ifdef BN_DEBUG
405 	OPENSSL_assert(top <= max);
406 #endif
407 	for (i = 0; i < top; i++)
408 		dst[i] = src[i];
409 	for (; i < max; i++)
410 		dst[i] = 0;
411 }
412 
413 static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
414 {
415 	int i;
416 
417 	for (i = 0; i < top; i++)
418 		dst[i] = src[i];
419 }
420 
421 #if BN_BITS2 == 64
422 #define bn_cp_64(to, n, from, m)	(to)[n] = (m>=0)?((from)[m]):0;
423 #define bn_64_set_0(to, n)		(to)[n] = (BN_ULONG)0;
424 /*
425  * two following macros are implemented under assumption that they
426  * are called in a sequence with *ascending* n, i.e. as they are...
427  */
428 #define bn_cp_32_naked(to, n, from, m)	(((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
429 						:(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
430 #define bn_32_set_0(to, n)		(((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
431 #define bn_cp_32(to,n,from,m)		((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
432 # if BYTE_ORDER == LITTLE_ENDIAN
433 #  if defined(_LP64)
434 #   define NIST_INT64 long
435 #  else
436 #   define NIST_INT64 long long
437 #  endif
438 # endif
439 #else
440 #define bn_cp_64(to, n, from, m) \
441 	{ \
442 	bn_cp_32(to, (n)*2, from, (m)*2); \
443 	bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
444 	}
445 #define bn_64_set_0(to, n) \
446 	{ \
447 	bn_32_set_0(to, (n)*2); \
448 	bn_32_set_0(to, (n)*2+1); \
449 	}
450 #define bn_cp_32(to, n, from, m)	(to)[n] = (m>=0)?((from)[m]):0;
451 #define bn_32_set_0(to, n)		(to)[n] = (BN_ULONG)0;
452 # if defined(BN_LLONG)
453 #  define NIST_INT64 long long
454 # endif
455 #endif /* BN_BITS2 != 64 */
456 
457 #define nist_set_192(to, from, a1, a2, a3) \
458 	{ \
459 	bn_cp_64(to, 0, from, (a3) - 3) \
460 	bn_cp_64(to, 1, from, (a2) - 3) \
461 	bn_cp_64(to, 2, from, (a1) - 3) \
462 	}
463 
464 int
465 BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
466 {
467 	BN_ULONG bnbuf[BN_NIST_192_TOP] = { 0 };
468 	BN_ULONG c_d[BN_NIST_192_TOP] = { 0 };
469 	BN_ULONG *a_d = a->d;
470 	BN_ULONG *r_d, *res;
471 	uintptr_t mask;
472 	int top = a->top;
473 	int carry, i;
474 
475 	field = &_bignum_nist_p_192; /* just to make sure */
476 
477 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_192_sqr) >= 0)
478 		return BN_nnmod(r, a, field, ctx);
479 
480 	i = BN_ucmp(field, a);
481 	if (i == 0) {
482 		BN_zero(r);
483 		return 1;
484 	} else if (i > 0)
485 		return (r == a) ? 1 : (BN_copy(r , a) != NULL);
486 
487 	if (r != a) {
488 		if (!bn_wexpand(r, BN_NIST_192_TOP))
489 			return 0;
490 		r_d = r->d;
491 		nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
492 	} else
493 		r_d = a_d;
494 
495 	nist_cp_bn_0(bnbuf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP,
496 	    BN_NIST_192_TOP);
497 
498 #if defined(NIST_INT64)
499 	{
500 		NIST_INT64 acc;	/* accumulator */
501 		unsigned int bbuf[BN_NIST_192_TOP *
502 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
503 		unsigned int rbuf[BN_NIST_192_TOP *
504 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
505 		const unsigned int *bp = bbuf;
506 		unsigned int *rp = rbuf;
507 
508 		CTASSERT(sizeof(bbuf) == sizeof(bnbuf));
509 		CTASSERT(sizeof(rbuf) == sizeof(bnbuf));
510 
511 		/*
512 		 * Avoid strict aliasing violations by copying from an unsigned
513 		 * long array to an unsigned int array, then copying back the
514 		 * result. Any sensible compiler will omit the copies, while
515 		 * avoiding undefined behaviour that would result from unsafe
516 		 * type punning via pointer type casting.
517 		 */
518 		memcpy(bbuf, bnbuf, sizeof(bbuf));
519 		memcpy(rbuf, r_d, sizeof(rbuf));
520 
521 		acc = rp[0];
522 		acc += bp[3 * 2 - 6];
523 		acc += bp[5 * 2 - 6];
524 		rp[0] = (unsigned int)acc;
525 		acc >>= 32;
526 
527 		acc += rp[1];
528 		acc += bp[3 * 2 - 5];
529 		acc += bp[5 * 2 - 5];
530 		rp[1] = (unsigned int)acc;
531 		acc >>= 32;
532 
533 		acc += rp[2];
534 		acc += bp[3 * 2 - 6];
535 		acc += bp[4 * 2 - 6];
536 		acc += bp[5 * 2 - 6];
537 		rp[2] = (unsigned int)acc;
538 		acc >>= 32;
539 
540 		acc += rp[3];
541 		acc += bp[3 * 2 - 5];
542 		acc += bp[4 * 2 - 5];
543 		acc += bp[5 * 2 - 5];
544 		rp[3] = (unsigned int)acc;
545 		acc >>= 32;
546 
547 		acc += rp[4];
548 		acc += bp[4 * 2 - 6];
549 		acc += bp[5 * 2 - 6];
550 		rp[4] = (unsigned int)acc;
551 		acc >>= 32;
552 
553 		acc += rp[5];
554 		acc += bp[4 * 2 - 5];
555 		acc += bp[5 * 2 - 5];
556 		rp[5] = (unsigned int)acc;
557 
558 		memcpy(r_d, rbuf, sizeof(rbuf));
559 
560 		carry = (int)(acc >> 32);
561 	}
562 #else
563 	{
564 		BN_ULONG t_d[BN_NIST_192_TOP] = {0};
565 
566 		nist_set_192(t_d, bnbuf, 0, 3, 3);
567 		carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
568 		nist_set_192(t_d, bnbuf, 4, 4, 0);
569 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
570 		nist_set_192(t_d, bnbuf, 5, 5, 5)
571 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
572 	}
573 #endif
574 	if (carry > 0)
575 		carry = (int)bn_sub_words(r_d, r_d, _nist_p_192[carry - 1],
576 		    BN_NIST_192_TOP);
577 	else
578 		carry = 1;
579 
580 	/*
581 	 * we need 'if (carry==0 || result>=modulus) result-=modulus;'
582 	 * as comparison implies subtraction, we can write
583 	 * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
584 	 * this is what happens below, but without explicit if:-) a.
585 	 */
586 	mask = 0 - (uintptr_t)bn_sub_words(c_d, r_d, _nist_p_192[0],
587 	    BN_NIST_192_TOP);
588 	mask &= 0 - (uintptr_t)carry;
589 	res = c_d;
590 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
591 	nist_cp_bn(r_d, res, BN_NIST_192_TOP);
592 	r->top = BN_NIST_192_TOP;
593 	bn_correct_top(r);
594 
595 	return 1;
596 }
597 
598 typedef BN_ULONG (*bn_addsub_f)(BN_ULONG *, const BN_ULONG *,
599     const BN_ULONG *, int);
600 
601 #define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
602 	{ \
603 	bn_cp_32(to, 0, from, (a7) - 7) \
604 	bn_cp_32(to, 1, from, (a6) - 7) \
605 	bn_cp_32(to, 2, from, (a5) - 7) \
606 	bn_cp_32(to, 3, from, (a4) - 7) \
607 	bn_cp_32(to, 4, from, (a3) - 7) \
608 	bn_cp_32(to, 5, from, (a2) - 7) \
609 	bn_cp_32(to, 6, from, (a1) - 7) \
610 	}
611 
612 int
613 BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
614 {
615 	BN_ULONG bnbuf[BN_NIST_224_TOP] = { 0 };
616 	BN_ULONG c_d[BN_NIST_224_TOP] = { 0 };
617 	BN_ULONG *a_d = a->d;
618 	BN_ULONG *r_d, *res;
619 	bn_addsub_f addsubf;
620 	uintptr_t mask;
621 	int top = a->top;
622 	int carry, i;
623 
624 	field = &_bignum_nist_p_224; /* just to make sure */
625 
626 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_224_sqr) >= 0)
627 		return BN_nnmod(r, a, field, ctx);
628 
629 	i = BN_ucmp(field, a);
630 	if (i == 0) {
631 		BN_zero(r);
632 		return 1;
633 	} else if (i > 0)
634 		return (r == a) ? 1 : (BN_copy(r, a) != NULL);
635 
636 	if (r != a) {
637 		if (!bn_wexpand(r, BN_NIST_224_TOP))
638 			return 0;
639 		r_d = r->d;
640 		nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
641 	} else
642 		r_d = a_d;
643 
644 	memset(&bnbuf, 0, sizeof(bnbuf));
645 
646 #if BN_BITS2==64
647 	/* copy upper 256 bits of 448 bit number ... */
648 	nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP - 1),
649 	    top - (BN_NIST_224_TOP - 1), BN_NIST_224_TOP);
650 	/* ... and right shift by 32 to obtain upper 224 bits */
651 	nist_set_224(bnbuf, c_d, 14, 13, 12, 11, 10, 9, 8);
652 	/* truncate lower part to 224 bits too */
653 	r_d[BN_NIST_224_TOP - 1] &= BN_MASK2l;
654 #else
655 	nist_cp_bn_0(bnbuf, a_d + BN_NIST_224_TOP,
656 	    top - BN_NIST_224_TOP, BN_NIST_224_TOP);
657 #endif
658 
659 #if defined(NIST_INT64) && BN_BITS2!=64
660 	{
661 		NIST_INT64 acc;	/* accumulator */
662 		unsigned int bbuf[BN_NIST_224_TOP *
663 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
664 		unsigned int rbuf[BN_NIST_224_TOP *
665 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
666 		const unsigned int *bp = bbuf;
667 		unsigned int *rp = rbuf;
668 
669 		CTASSERT(sizeof(bbuf) == sizeof(bnbuf));
670 		CTASSERT(sizeof(rbuf) == sizeof(bnbuf));
671 
672 		/*
673 		 * Avoid strict aliasing violations by copying from an unsigned
674 		 * long array to an unsigned int array, then copying back the
675 		 * result. Any sensible compiler will omit the copies, while
676 		 * avoiding undefined behaviour that would result from unsafe
677 		 * type punning via pointer type casting.
678 		 */
679 		memcpy(bbuf, bnbuf, sizeof(bbuf));
680 		memcpy(rbuf, r_d, sizeof(rbuf));
681 
682 		acc = rp[0];
683 		acc -= bp[7 - 7];
684 		acc -= bp[11 - 7];
685 		rp[0] = (unsigned int)acc;
686 		acc >>= 32;
687 
688 		acc += rp[1];
689 		acc -= bp[8 - 7];
690 		acc -= bp[12 - 7];
691 		rp[1] = (unsigned int)acc;
692 		acc >>= 32;
693 
694 		acc += rp[2];
695 		acc -= bp[9 - 7];
696 		acc -= bp[13 - 7];
697 		rp[2] = (unsigned int)acc;
698 		acc >>= 32;
699 
700 		acc += rp[3];
701 		acc += bp[7 - 7];
702 		acc += bp[11 - 7];
703 		acc -= bp[10 - 7];
704 		rp[3] = (unsigned int)acc;
705 		acc >>= 32;
706 
707 		acc += rp[4];
708 		acc += bp[8 - 7];
709 		acc += bp[12 - 7];
710 		acc -= bp[11 - 7];
711 		rp[4] = (unsigned int)acc;
712 		acc >>= 32;
713 
714 		acc += rp[5];
715 		acc += bp[9 - 7];
716 		acc += bp[13 - 7];
717 		acc -= bp[12 - 7];
718 		rp[5] = (unsigned int)acc;
719 		acc >>= 32;
720 
721 		acc += rp[6];
722 		acc += bp[10 - 7];
723 		acc -= bp[13 - 7];
724 		rp[6] = (unsigned int)acc;
725 
726 		memcpy(r_d, rbuf, sizeof(rbuf));
727 
728 		carry = (int)(acc >> 32);
729 # if BN_BITS2==64
730 		rp[7] = carry;
731 # endif
732 	}
733 #else
734 	{
735 		BN_ULONG t_d[BN_NIST_224_TOP] = {0};
736 
737 		nist_set_224(t_d, bnbuf, 10, 9, 8, 7, 0, 0, 0);
738 		carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
739 		nist_set_224(t_d, bnbuf, 0, 13, 12, 11, 0, 0, 0);
740 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
741 		nist_set_224(t_d, bnbuf, 13, 12, 11, 10, 9, 8, 7);
742 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
743 		nist_set_224(t_d, bnbuf, 0, 0, 0, 0, 13, 12, 11);
744 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
745 
746 #if BN_BITS2==64
747 		carry = (int)(r_d[BN_NIST_224_TOP - 1] >> 32);
748 #endif
749 	}
750 #endif
751 	addsubf = bn_sub_words;
752 	if (carry > 0) {
753 		carry = (int)bn_sub_words(r_d, r_d, _nist_p_224[carry - 1],
754 		    BN_NIST_224_TOP);
755 #if BN_BITS2==64
756 		carry = (int)(~(r_d[BN_NIST_224_TOP - 1] >> 32)) & 1;
757 #endif
758 	} else if (carry < 0) {
759 		/* it's a bit more complicated logic in this case.
760 		 * if bn_add_words yields no carry, then result
761 		 * has to be adjusted by unconditionally *adding*
762 		 * the modulus. but if it does, then result has
763 		 * to be compared to the modulus and conditionally
764 		 * adjusted by *subtracting* the latter. */
765 		carry = (int)bn_add_words(r_d, r_d, _nist_p_224[-carry - 1],
766 		    BN_NIST_224_TOP);
767 		if (carry == 0)
768 			addsubf = bn_add_words;
769 	} else
770 		carry = 1;
771 
772 	/* otherwise it's effectively same as in BN_nist_mod_192... */
773 	mask = 0 - (uintptr_t)(*addsubf)(c_d, r_d, _nist_p_224[0], BN_NIST_224_TOP);
774 	mask &= 0 - (uintptr_t)carry;
775 	res = c_d;
776 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
777 	nist_cp_bn(r_d, res, BN_NIST_224_TOP);
778 	r->top = BN_NIST_224_TOP;
779 	bn_correct_top(r);
780 
781 	return 1;
782 }
783 
784 #define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
785 	{ \
786 	bn_cp_32(to, 0, from, (a8) - 8) \
787 	bn_cp_32(to, 1, from, (a7) - 8) \
788 	bn_cp_32(to, 2, from, (a6) - 8) \
789 	bn_cp_32(to, 3, from, (a5) - 8) \
790 	bn_cp_32(to, 4, from, (a4) - 8) \
791 	bn_cp_32(to, 5, from, (a3) - 8) \
792 	bn_cp_32(to, 6, from, (a2) - 8) \
793 	bn_cp_32(to, 7, from, (a1) - 8) \
794 	}
795 
796 int
797 BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
798 {
799 	BN_ULONG bnbuf[BN_NIST_256_TOP] = { 0 };
800 	BN_ULONG c_d[BN_NIST_256_TOP] = { 0 };
801 	BN_ULONG *a_d = a->d;
802 	BN_ULONG *r_d, *res;
803 	bn_addsub_f addsubf;
804 	uintptr_t mask;
805 	int top = a->top;
806 	int carry, i;
807 
808 	field = &_bignum_nist_p_256; /* just to make sure */
809 
810 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_256_sqr) >= 0)
811 		return BN_nnmod(r, a, field, ctx);
812 
813 	i = BN_ucmp(field, a);
814 	if (i == 0) {
815 		BN_zero(r);
816 		return 1;
817 	} else if (i > 0)
818 		return (r == a) ? 1 : (BN_copy(r, a) != NULL);
819 
820 	if (r != a) {
821 		if (!bn_wexpand(r, BN_NIST_256_TOP))
822 			return 0;
823 		r_d = r->d;
824 		nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
825 	} else
826 		r_d = a_d;
827 
828 	nist_cp_bn_0(bnbuf, a_d + BN_NIST_256_TOP,
829 	    top - BN_NIST_256_TOP, BN_NIST_256_TOP);
830 
831 #if defined(NIST_INT64)
832 	{
833 		NIST_INT64 acc;	/* accumulator */
834 		unsigned int bbuf[BN_NIST_256_TOP *
835 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
836 		unsigned int rbuf[BN_NIST_256_TOP *
837 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
838 		const unsigned int *bp = bbuf;
839 		unsigned int *rp = rbuf;
840 
841 		CTASSERT(sizeof(bbuf) == sizeof(bnbuf));
842 		CTASSERT(sizeof(rbuf) == sizeof(bnbuf));
843 
844 		/*
845 		 * Avoid strict aliasing violations by copying from an unsigned
846 		 * long array to an unsigned int array, then copying back the
847 		 * result. Any sensible compiler will omit the copies, while
848 		 * avoiding undefined behaviour that would result from unsafe
849 		 * type punning via pointer type casting.
850 		 */
851 		memcpy(bbuf, bnbuf, sizeof(bbuf));
852 		memcpy(rbuf, r_d, sizeof(rbuf));
853 
854 		acc = rp[0];
855 		acc += bp[8 - 8];
856 		acc += bp[9 - 8];
857 		acc -= bp[11 - 8];
858 		acc -= bp[12 - 8];
859 		acc -= bp[13 - 8];
860 		acc -= bp[14 - 8];
861 		rp[0] = (unsigned int)acc;
862 		acc >>= 32;
863 
864 		acc += rp[1];
865 		acc += bp[9 - 8];
866 		acc += bp[10 - 8];
867 		acc -= bp[12 - 8];
868 		acc -= bp[13 - 8];
869 		acc -= bp[14 - 8];
870 		acc -= bp[15 - 8];
871 		rp[1] = (unsigned int)acc;
872 		acc >>= 32;
873 
874 		acc += rp[2];
875 		acc += bp[10 - 8];
876 		acc += bp[11 - 8];
877 		acc -= bp[13 - 8];
878 		acc -= bp[14 - 8];
879 		acc -= bp[15 - 8];
880 		rp[2] = (unsigned int)acc;
881 		acc >>= 32;
882 
883 		acc += rp[3];
884 		acc += bp[11 - 8];
885 		acc += bp[11 - 8];
886 		acc += bp[12 - 8];
887 		acc += bp[12 - 8];
888 		acc += bp[13 - 8];
889 		acc -= bp[15 - 8];
890 		acc -= bp[8 - 8];
891 		acc -= bp[9 - 8];
892 		rp[3] = (unsigned int)acc;
893 		acc >>= 32;
894 
895 		acc += rp[4];
896 		acc += bp[12 - 8];
897 		acc += bp[12 - 8];
898 		acc += bp[13 - 8];
899 		acc += bp[13 - 8];
900 		acc += bp[14 - 8];
901 		acc -= bp[9 - 8];
902 		acc -= bp[10 - 8];
903 		rp[4] = (unsigned int)acc;
904 		acc >>= 32;
905 
906 		acc += rp[5];
907 		acc += bp[13 - 8];
908 		acc += bp[13 - 8];
909 		acc += bp[14 - 8];
910 		acc += bp[14 - 8];
911 		acc += bp[15 - 8];
912 		acc -= bp[10 - 8];
913 		acc -= bp[11 - 8];
914 		rp[5] = (unsigned int)acc;
915 		acc >>= 32;
916 
917 		acc += rp[6];
918 		acc += bp[14 - 8];
919 		acc += bp[14 - 8];
920 		acc += bp[15 - 8];
921 		acc += bp[15 - 8];
922 		acc += bp[14 - 8];
923 		acc += bp[13 - 8];
924 		acc -= bp[8 - 8];
925 		acc -= bp[9 - 8];
926 		rp[6] = (unsigned int)acc;
927 		acc >>= 32;
928 
929 		acc += rp[7];
930 		acc += bp[15 - 8];
931 		acc += bp[15 - 8];
932 		acc += bp[15 - 8];
933 		acc += bp[8 - 8];
934 		acc -= bp[10 - 8];
935 		acc -= bp[11 - 8];
936 		acc -= bp[12 - 8];
937 		acc -= bp[13 - 8];
938 		rp[7] = (unsigned int)acc;
939 
940 		memcpy(r_d, rbuf, sizeof(rbuf));
941 
942 		carry = (int)(acc >> 32);
943 	}
944 #else
945 	{
946 		BN_ULONG t_d[BN_NIST_256_TOP] = {0};
947 
948 		/*S1*/
949 		nist_set_256(t_d, bnbuf, 15, 14, 13, 12, 11, 0, 0, 0);
950 		/*S2*/
951 		nist_set_256(c_d, bnbuf, 0, 15, 14, 13, 12, 0, 0, 0);
952 		carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
953 		/* left shift */
954 		{
955 			BN_ULONG *ap, t, c;
956 			ap = t_d;
957 			c = 0;
958 			for (i = BN_NIST_256_TOP; i != 0; --i) {
959 				t = *ap;
960 				*(ap++) = ((t << 1) | c) & BN_MASK2;
961 				c = (t & BN_TBIT) ? 1 : 0;
962 			}
963 			carry <<= 1;
964 			carry |= c;
965 		}
966 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
967 		/*S3*/
968 		nist_set_256(t_d, bnbuf, 15, 14, 0, 0, 0, 10, 9, 8);
969 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
970 		/*S4*/
971 		nist_set_256(t_d, bnbuf, 8, 13, 15, 14, 13, 11, 10, 9);
972 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
973 		/*D1*/
974 		nist_set_256(t_d, bnbuf, 10, 8, 0, 0, 0, 13, 12, 11);
975 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
976 		/*D2*/
977 		nist_set_256(t_d, bnbuf, 11, 9, 0, 0, 15, 14, 13, 12);
978 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
979 		/*D3*/
980 		nist_set_256(t_d, bnbuf, 12, 0, 10, 9, 8, 15, 14, 13);
981 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
982 		/*D4*/
983 		nist_set_256(t_d, bnbuf, 13, 0, 11, 10, 9, 0, 15, 14);
984 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
985 
986 	}
987 #endif
988 	/* see BN_nist_mod_224 for explanation */
989 	addsubf = bn_sub_words;
990 	if (carry > 0)
991 		carry = (int)bn_sub_words(r_d, r_d, _nist_p_256[carry - 1],
992 		    BN_NIST_256_TOP);
993 	else if (carry < 0) {
994 		carry = (int)bn_add_words(r_d, r_d, _nist_p_256[-carry - 1],
995 		    BN_NIST_256_TOP);
996 		if (carry == 0)
997 			addsubf = bn_add_words;
998 	} else
999 		carry = 1;
1000 
1001 	mask = 0 - (uintptr_t)(*addsubf)(c_d, r_d, _nist_p_256[0], BN_NIST_256_TOP);
1002 	mask &= 0 - (uintptr_t)carry;
1003 	res = c_d;
1004 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
1005 	nist_cp_bn(r_d, res, BN_NIST_256_TOP);
1006 	r->top = BN_NIST_256_TOP;
1007 	bn_correct_top(r);
1008 
1009 	return 1;
1010 }
1011 
1012 #define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
1013 	{ \
1014 	bn_cp_32(to, 0, from,  (a12) - 12) \
1015 	bn_cp_32(to, 1, from,  (a11) - 12) \
1016 	bn_cp_32(to, 2, from,  (a10) - 12) \
1017 	bn_cp_32(to, 3, from,  (a9) - 12)  \
1018 	bn_cp_32(to, 4, from,  (a8) - 12)  \
1019 	bn_cp_32(to, 5, from,  (a7) - 12)  \
1020 	bn_cp_32(to, 6, from,  (a6) - 12)  \
1021 	bn_cp_32(to, 7, from,  (a5) - 12)  \
1022 	bn_cp_32(to, 8, from,  (a4) - 12)  \
1023 	bn_cp_32(to, 9, from,  (a3) - 12)  \
1024 	bn_cp_32(to, 10, from, (a2) - 12)  \
1025 	bn_cp_32(to, 11, from, (a1) - 12)  \
1026 	}
1027 
1028 int
1029 BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
1030 {
1031 	BN_ULONG bnbuf[BN_NIST_384_TOP] = { 0 };
1032 	BN_ULONG c_d[BN_NIST_384_TOP] = { 0 };
1033 	BN_ULONG *a_d = a->d;
1034 	BN_ULONG *r_d, *res;
1035 	bn_addsub_f addsubf;
1036 	uintptr_t mask;
1037 	int top = a->top;
1038 	int carry, i;
1039 
1040 	field = &_bignum_nist_p_384; /* just to make sure */
1041 
1042 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_384_sqr) >= 0)
1043 		return BN_nnmod(r, a, field, ctx);
1044 
1045 	i = BN_ucmp(field, a);
1046 	if (i == 0) {
1047 		BN_zero(r);
1048 		return 1;
1049 	} else if (i > 0)
1050 		return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1051 
1052 	if (r != a) {
1053 		if (!bn_wexpand(r, BN_NIST_384_TOP))
1054 			return 0;
1055 		r_d = r->d;
1056 		nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
1057 	} else
1058 		r_d = a_d;
1059 
1060 	nist_cp_bn_0(bnbuf, a_d + BN_NIST_384_TOP,
1061 	    top - BN_NIST_384_TOP, BN_NIST_384_TOP);
1062 
1063 #if defined(NIST_INT64)
1064 	{
1065 		NIST_INT64 acc;	/* accumulator */
1066 		unsigned int bbuf[BN_NIST_384_TOP *
1067 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
1068 		unsigned int rbuf[BN_NIST_384_TOP *
1069 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
1070 		const unsigned int *bp = bbuf;
1071 		unsigned int *rp = rbuf;
1072 
1073 		CTASSERT(sizeof(bbuf) == sizeof(bnbuf));
1074 		CTASSERT(sizeof(rbuf) == sizeof(bnbuf));
1075 
1076 		/*
1077 		 * Avoid strict aliasing violations by copying from an unsigned
1078 		 * long array to an unsigned int array, then copying back the
1079 		 * result. Any sensible compiler will omit the copies, while
1080 		 * avoiding undefined behaviour that would result from unsafe
1081 		 * type punning via pointer type casting.
1082 		 */
1083 		memcpy(bbuf, bnbuf, sizeof(bbuf));
1084 		memcpy(rbuf, r_d, sizeof(rbuf));
1085 
1086 		acc = rp[0];
1087 		acc += bp[12 - 12];
1088 		acc += bp[21 - 12];
1089 		acc += bp[20 - 12];
1090 		acc -= bp[23 - 12];
1091 		rp[0] = (unsigned int)acc;
1092 		acc >>= 32;
1093 
1094 		acc += rp[1];
1095 		acc += bp[13 - 12];
1096 		acc += bp[22 - 12];
1097 		acc += bp[23 - 12];
1098 		acc -= bp[12 - 12];
1099 		acc -= bp[20 - 12];
1100 		rp[1] = (unsigned int)acc;
1101 		acc >>= 32;
1102 
1103 		acc += rp[2];
1104 		acc += bp[14 - 12];
1105 		acc += bp[23 - 12];
1106 		acc -= bp[13 - 12];
1107 		acc -= bp[21 - 12];
1108 		rp[2] = (unsigned int)acc;
1109 		acc >>= 32;
1110 
1111 		acc += rp[3];
1112 		acc += bp[15 - 12];
1113 		acc += bp[12 - 12];
1114 		acc += bp[20 - 12];
1115 		acc += bp[21 - 12];
1116 		acc -= bp[14 - 12];
1117 		acc -= bp[22 - 12];
1118 		acc -= bp[23 - 12];
1119 		rp[3] = (unsigned int)acc;
1120 		acc >>= 32;
1121 
1122 		acc += rp[4];
1123 		acc += bp[21 - 12];
1124 		acc += bp[21 - 12];
1125 		acc += bp[16 - 12];
1126 		acc += bp[13 - 12];
1127 		acc += bp[12 - 12];
1128 		acc += bp[20 - 12];
1129 		acc += bp[22 - 12];
1130 		acc -= bp[15 - 12];
1131 		acc -= bp[23 - 12];
1132 		acc -= bp[23 - 12];
1133 		rp[4] = (unsigned int)acc;
1134 		acc >>= 32;
1135 
1136 		acc += rp[5];
1137 		acc += bp[22 - 12];
1138 		acc += bp[22 - 12];
1139 		acc += bp[17 - 12];
1140 		acc += bp[14 - 12];
1141 		acc += bp[13 - 12];
1142 		acc += bp[21 - 12];
1143 		acc += bp[23 - 12];
1144 		acc -= bp[16 - 12];
1145 		rp[5] = (unsigned int)acc;
1146 		acc >>= 32;
1147 
1148 		acc += rp[6];
1149 		acc += bp[23 - 12];
1150 		acc += bp[23 - 12];
1151 		acc += bp[18 - 12];
1152 		acc += bp[15 - 12];
1153 		acc += bp[14 - 12];
1154 		acc += bp[22 - 12];
1155 		acc -= bp[17 - 12];
1156 		rp[6] = (unsigned int)acc;
1157 		acc >>= 32;
1158 
1159 		acc += rp[7];
1160 		acc += bp[19 - 12];
1161 		acc += bp[16 - 12];
1162 		acc += bp[15 - 12];
1163 		acc += bp[23 - 12];
1164 		acc -= bp[18 - 12];
1165 		rp[7] = (unsigned int)acc;
1166 		acc >>= 32;
1167 
1168 		acc += rp[8];
1169 		acc += bp[20 - 12];
1170 		acc += bp[17 - 12];
1171 		acc += bp[16 - 12];
1172 		acc -= bp[19 - 12];
1173 		rp[8] = (unsigned int)acc;
1174 		acc >>= 32;
1175 
1176 		acc += rp[9];
1177 		acc += bp[21 - 12];
1178 		acc += bp[18 - 12];
1179 		acc += bp[17 - 12];
1180 		acc -= bp[20 - 12];
1181 		rp[9] = (unsigned int)acc;
1182 		acc >>= 32;
1183 
1184 		acc += rp[10];
1185 		acc += bp[22 - 12];
1186 		acc += bp[19 - 12];
1187 		acc += bp[18 - 12];
1188 		acc -= bp[21 - 12];
1189 		rp[10] = (unsigned int)acc;
1190 		acc >>= 32;
1191 
1192 		acc += rp[11];
1193 		acc += bp[23 - 12];
1194 		acc += bp[20 - 12];
1195 		acc += bp[19 - 12];
1196 		acc -= bp[22 - 12];
1197 		rp[11] = (unsigned int)acc;
1198 
1199 		memcpy(r_d, rbuf, sizeof(rbuf));
1200 
1201 		carry = (int)(acc >> 32);
1202 	}
1203 #else
1204 	{
1205 		BN_ULONG t_d[BN_NIST_384_TOP] = {0};
1206 
1207 		/*S1*/
1208 		nist_set_256(t_d, bnbuf, 0, 0, 0, 0, 0, 23 - 4, 22 - 4,
1209 		    21 - 4);
1210 		/* left shift */
1211 		{
1212 			BN_ULONG *ap, t, c;
1213 			ap = t_d;
1214 			c = 0;
1215 			for (i = 3; i != 0; --i) {
1216 				t= *ap;
1217 				*(ap++) = ((t << 1)|c) & BN_MASK2;
1218 				c = (t & BN_TBIT) ? 1 : 0;
1219 			}
1220 			*ap = c;
1221 		}
1222 		carry = (int)bn_add_words(r_d + (128 / BN_BITS2),
1223 		    r_d + (128 / BN_BITS2), t_d, BN_NIST_256_TOP);
1224 		/*S2 */
1225 		carry += (int)bn_add_words(r_d, r_d, bnbuf, BN_NIST_384_TOP);
1226 		/*S3*/
1227 		nist_set_384(t_d, bnbuf, 20, 19, 18, 17, 16, 15, 14, 13, 12,
1228 		    23, 22, 21);
1229 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1230 		/*S4*/
1231 		nist_set_384(t_d, bnbuf, 19, 18, 17, 16, 15, 14, 13, 12, 20,
1232 		    0, 23, 0);
1233 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1234 		/*S5*/
1235 		nist_set_384(t_d, bnbuf, 0,0, 0,0, 23, 22, 21, 20, 0,0, 0, 0);
1236 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1237 		/*S6*/
1238 		nist_set_384(t_d, bnbuf, 0,0, 0,0, 0,0, 23, 22, 21, 0,0, 20);
1239 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1240 		/*D1*/
1241 		nist_set_384(t_d, bnbuf, 22, 21, 20, 19, 18, 17, 16, 15, 14,
1242 		    13, 12, 23);
1243 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1244 		/*D2*/
1245 		nist_set_384(t_d, bnbuf, 0,0, 0,0, 0,0, 0,23, 22, 21, 20, 0);
1246 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1247 		/*D3*/
1248 		nist_set_384(t_d, bnbuf, 0,0, 0,0, 0,0, 0,23, 23, 0,0, 0);
1249 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1250 
1251 	}
1252 #endif
1253 	/* see BN_nist_mod_224 for explanation */
1254 	addsubf = bn_sub_words;
1255 	if (carry > 0)
1256 		carry = (int)bn_sub_words(r_d, r_d, _nist_p_384[carry - 1],
1257 		    BN_NIST_384_TOP);
1258 	else if (carry < 0) {
1259 		carry = (int)bn_add_words(r_d, r_d, _nist_p_384[-carry - 1],
1260 		    BN_NIST_384_TOP);
1261 		if (carry == 0)
1262 			addsubf = bn_add_words;
1263 	} else
1264 		carry = 1;
1265 
1266 	mask = 0 - (uintptr_t)(*addsubf)(c_d, r_d, _nist_p_384[0], BN_NIST_384_TOP);
1267 	mask &= 0 - (uintptr_t)carry;
1268 	res = c_d;
1269 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
1270 	nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1271 	r->top = BN_NIST_384_TOP;
1272 	bn_correct_top(r);
1273 
1274 	return 1;
1275 }
1276 
1277 #define BN_NIST_521_RSHIFT	(521%BN_BITS2)
1278 #define BN_NIST_521_LSHIFT	(BN_BITS2-BN_NIST_521_RSHIFT)
1279 #define BN_NIST_521_TOP_MASK	((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
1280 
1281 int
1282 BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
1283 {
1284 	BN_ULONG t_d[BN_NIST_521_TOP] = { 0 };
1285 	BN_ULONG *a_d = a->d;
1286 	BN_ULONG *r_d, *res;
1287 	BN_ULONG tmp, val;
1288 	uintptr_t mask;
1289 	int top = a->top;
1290 	int i;
1291 
1292 	field = &_bignum_nist_p_521; /* just to make sure */
1293 
1294 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_521_sqr) >= 0)
1295 		return BN_nnmod(r, a, field, ctx);
1296 
1297 	i = BN_ucmp(field, a);
1298 	if (i == 0) {
1299 		BN_zero(r);
1300 		return 1;
1301 	} else if (i > 0)
1302 		return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1303 
1304 	if (r != a) {
1305 		if (!bn_wexpand(r, BN_NIST_521_TOP))
1306 			return 0;
1307 		r_d = r->d;
1308 		nist_cp_bn(r_d, a_d, BN_NIST_521_TOP);
1309 	} else
1310 		r_d = a_d;
1311 
1312 	/* upper 521 bits, copy ... */
1313 	nist_cp_bn_0(t_d, a_d + (BN_NIST_521_TOP - 1),
1314 	    top - (BN_NIST_521_TOP - 1), BN_NIST_521_TOP);
1315 	/* ... and right shift */
1316 	for (val = t_d[0], i = 0; i < BN_NIST_521_TOP - 1; i++) {
1317 		tmp = val >> BN_NIST_521_RSHIFT;
1318 		val = t_d[i + 1];
1319 		t_d[i] = (tmp | val << BN_NIST_521_LSHIFT) & BN_MASK2;
1320 	}
1321 	t_d[i] = val >> BN_NIST_521_RSHIFT;
1322 	/* lower 521 bits */
1323 	r_d[i] &= BN_NIST_521_TOP_MASK;
1324 
1325 	bn_add_words(r_d, r_d, t_d, BN_NIST_521_TOP);
1326 	mask = 0 - (uintptr_t)bn_sub_words(t_d, r_d, _nist_p_521,
1327 	    BN_NIST_521_TOP);
1328 	res = t_d;
1329 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
1330 	nist_cp_bn(r_d, res, BN_NIST_521_TOP);
1331 	r->top = BN_NIST_521_TOP;
1332 	bn_correct_top(r);
1333 
1334 	return 1;
1335 }
1336