xref: /dragonfly/crypto/libressl/crypto/bn/bn_nist.c (revision 72c33676)
1 /* $OpenBSD: bn_nist.c,v 1.18 2016/07/18 01:04:52 bcook Exp $ */
2 /*
3  * Written by Nils Larsch for the OpenSSL project
4  */
5 /* ====================================================================
6  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  *
20  * 3. All advertising materials mentioning features or use of this
21  *    software must display the following acknowledgment:
22  *    "This product includes software developed by the OpenSSL Project
23  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
24  *
25  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
26  *    endorse or promote products derived from this software without
27  *    prior written permission. For written permission, please contact
28  *    openssl-core@openssl.org.
29  *
30  * 5. Products derived from this software may not be called "OpenSSL"
31  *    nor may "OpenSSL" appear in their names without prior written
32  *    permission of the OpenSSL Project.
33  *
34  * 6. Redistributions of any form whatsoever must retain the following
35  *    acknowledgment:
36  *    "This product includes software developed by the OpenSSL Project
37  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
38  *
39  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
40  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
41  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
43  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
46  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
48  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
49  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
50  * OF THE POSSIBILITY OF SUCH DAMAGE.
51  * ====================================================================
52  *
53  * This product includes cryptographic software written by Eric Young
54  * (eay@cryptsoft.com).  This product includes software written by Tim
55  * Hudson (tjh@cryptsoft.com).
56  *
57  */
58 
59 #include <machine/endian.h>
60 
61 #include <stdint.h>
62 #include <string.h>
63 
64 #include "bn_lcl.h"
65 
66 #define BN_NIST_192_TOP	(192+BN_BITS2-1)/BN_BITS2
67 #define BN_NIST_224_TOP	(224+BN_BITS2-1)/BN_BITS2
68 #define BN_NIST_256_TOP	(256+BN_BITS2-1)/BN_BITS2
69 #define BN_NIST_384_TOP	(384+BN_BITS2-1)/BN_BITS2
70 #define BN_NIST_521_TOP	(521+BN_BITS2-1)/BN_BITS2
71 
72 /* pre-computed tables are "carry-less" values of modulus*(i+1) */
73 #if BN_BITS2 == 64
74 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
75 	{0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL},
76 	{0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL},
77 	{0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL}
78 };
79 static const BN_ULONG _nist_p_192_sqr[] = {
80 	0x0000000000000001ULL, 0x0000000000000002ULL, 0x0000000000000001ULL,
81 	0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL
82 };
83 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
84 	{
85 		0x0000000000000001ULL, 0xFFFFFFFF00000000ULL,
86 		0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL
87 	},
88 	{
89 		0x0000000000000002ULL, 0xFFFFFFFE00000000ULL,
90 		0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFFULL
91 	} /* this one is "carry-full" */
92 };
93 static const BN_ULONG _nist_p_224_sqr[] = {
94 	0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
95 	0xFFFFFFFFFFFFFFFFULL, 0x0000000200000000ULL,
96 	0x0000000000000000ULL, 0xFFFFFFFFFFFFFFFEULL,
97 	0xFFFFFFFFFFFFFFFFULL
98 };
99 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
100 	{
101 		0xFFFFFFFFFFFFFFFFULL, 0x00000000FFFFFFFFULL,
102 		0x0000000000000000ULL, 0xFFFFFFFF00000001ULL
103 	},
104 	{
105 		0xFFFFFFFFFFFFFFFEULL, 0x00000001FFFFFFFFULL,
106 		0x0000000000000000ULL, 0xFFFFFFFE00000002ULL
107 	},
108 	{
109 		0xFFFFFFFFFFFFFFFDULL, 0x00000002FFFFFFFFULL,
110 		0x0000000000000000ULL, 0xFFFFFFFD00000003ULL
111 	},
112 	{
113 		0xFFFFFFFFFFFFFFFCULL, 0x00000003FFFFFFFFULL,
114 		0x0000000000000000ULL, 0xFFFFFFFC00000004ULL
115 	},
116 	{
117 		0xFFFFFFFFFFFFFFFBULL, 0x00000004FFFFFFFFULL,
118 		0x0000000000000000ULL, 0xFFFFFFFB00000005ULL
119 	},
120 };
121 static const BN_ULONG _nist_p_256_sqr[] = {
122 	0x0000000000000001ULL, 0xFFFFFFFE00000000ULL,
123 	0xFFFFFFFFFFFFFFFFULL, 0x00000001FFFFFFFEULL,
124 	0x00000001FFFFFFFEULL, 0x00000001FFFFFFFEULL,
125 	0xFFFFFFFE00000001ULL, 0xFFFFFFFE00000002ULL
126 };
127 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
128 	{
129 		0x00000000FFFFFFFFULL, 0xFFFFFFFF00000000ULL,
130 		0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFFULL,
131 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
132 	},
133 	{
134 		0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL,
135 		0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFFULL,
136 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
137 	},
138 	{
139 		0x00000002FFFFFFFDULL, 0xFFFFFFFD00000000ULL,
140 		0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFFULL,
141 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
142 	},
143 	{
144 		0x00000003FFFFFFFCULL, 0xFFFFFFFC00000000ULL,
145 		0xFFFFFFFFFFFFFFFBULL, 0xFFFFFFFFFFFFFFFFULL,
146 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
147 	},
148 	{
149 		0x00000004FFFFFFFBULL, 0xFFFFFFFB00000000ULL,
150 		0xFFFFFFFFFFFFFFFAULL, 0xFFFFFFFFFFFFFFFFULL,
151 		0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
152 	},
153 };
154 static const BN_ULONG _nist_p_384_sqr[] = {
155 	0xFFFFFFFE00000001ULL, 0x0000000200000000ULL, 0xFFFFFFFE00000000ULL,
156 	0x0000000200000000ULL, 0x0000000000000001ULL, 0x0000000000000000ULL,
157 	0x00000001FFFFFFFEULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFFFFFFFFFDULL,
158 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL
159 };
160 static const BN_ULONG _nist_p_521[] = {
161 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
162 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
163 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0x00000000000001FFULL
164 };
165 static const BN_ULONG _nist_p_521_sqr[] = {
166 	0x0000000000000001ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
167 	0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL,
168 	0x0000000000000000ULL, 0x0000000000000000ULL, 0xFFFFFFFFFFFFFC00ULL,
169 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
170 	0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
171 	0xFFFFFFFFFFFFFFFFULL, 0x000000000003FFFFULL
172 };
173 #elif BN_BITS2 == 32
174 static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
175 	{
176 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0xFFFFFFFF,
177 		0xFFFFFFFF, 0xFFFFFFFF
178 	},
179 	{
180 		0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF,
181 		0xFFFFFFFF, 0xFFFFFFFF
182 	},
183 	{
184 		0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFC, 0xFFFFFFFF,
185 		0xFFFFFFFF, 0xFFFFFFFF
186 	}
187 };
188 static const BN_ULONG _nist_p_192_sqr[] = {
189 	0x00000001, 0x00000000, 0x00000002, 0x00000000, 0x00000001, 0x00000000,
190 	0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
191 };
192 static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
193 	{
194 		0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFF,
195 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
196 	},
197 	{
198 		0x00000002, 0x00000000, 0x00000000, 0xFFFFFFFE,
199 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
200 	}
201 };
202 static const BN_ULONG _nist_p_224_sqr[] = {
203 	0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
204 	0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000002,
205 	0x00000000, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFF,
206 	0xFFFFFFFF, 0xFFFFFFFF
207 };
208 static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
209 	{
210 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
211 		0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF
212 	},
213 	{
214 		0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000001,
215 		0x00000000, 0x00000000, 0x00000002, 0xFFFFFFFE
216 	},
217 	{
218 		0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000002,
219 		0x00000000, 0x00000000, 0x00000003, 0xFFFFFFFD
220 	},
221 	{
222 		0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000003,
223 		0x00000000, 0x00000000, 0x00000004, 0xFFFFFFFC
224 	},
225 	{
226 		0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000004,
227 		0x00000000, 0x00000000, 0x00000005, 0xFFFFFFFB
228 	},
229 };
230 static const BN_ULONG _nist_p_256_sqr[] = {
231 	0x00000001, 0x00000000, 0x00000000, 0xFFFFFFFE,
232 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000001,
233 	0xFFFFFFFE, 0x00000001, 0xFFFFFFFE, 0x00000001,
234 	0x00000001, 0xFFFFFFFE, 0x00000002, 0xFFFFFFFE
235 };
236 static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
237 	{
238 		0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF,
239 		0xFFFFFFFE, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
240 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
241 	},
242 	{
243 		0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE,
244 		0xFFFFFFFD, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
245 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
246 	},
247 	{
248 		0xFFFFFFFD, 0x00000002, 0x00000000, 0xFFFFFFFD,
249 		0xFFFFFFFC, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
250 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
251 	},
252 	{
253 		0xFFFFFFFC, 0x00000003, 0x00000000, 0xFFFFFFFC,
254 		0xFFFFFFFB, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
255 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
256 	},
257 	{
258 		0xFFFFFFFB, 0x00000004, 0x00000000, 0xFFFFFFFB,
259 		0xFFFFFFFA, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
260 		0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
261 	},
262 };
263 static const BN_ULONG _nist_p_384_sqr[] = {
264 	0x00000001, 0xFFFFFFFE, 0x00000000, 0x00000002, 0x00000000, 0xFFFFFFFE,
265 	0x00000000, 0x00000002, 0x00000001, 0x00000000, 0x00000000, 0x00000000,
266 	0xFFFFFFFE, 0x00000001, 0x00000000, 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFF,
267 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
268 };
269 static const BN_ULONG _nist_p_521[] = {
270 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
271 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
272 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
273 	0xFFFFFFFF, 0x000001FF
274 };
275 static const BN_ULONG _nist_p_521_sqr[] = {
276 	0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
277 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
278 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xFFFFFC00, 0xFFFFFFFF,
279 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
280 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
281 	0xFFFFFFFF, 0xFFFFFFFF, 0x0003FFFF
282 };
283 #else
284 #error "unsupported BN_BITS2"
285 #endif
286 
287 static const BIGNUM _bignum_nist_p_192 = {
288 	(BN_ULONG *)_nist_p_192[0],
289 	BN_NIST_192_TOP,
290 	BN_NIST_192_TOP,
291 	0,
292 	BN_FLG_STATIC_DATA
293 };
294 
295 static const BIGNUM _bignum_nist_p_224 = {
296 	(BN_ULONG *)_nist_p_224[0],
297 	BN_NIST_224_TOP,
298 	BN_NIST_224_TOP,
299 	0,
300 	BN_FLG_STATIC_DATA
301 };
302 
303 static const BIGNUM _bignum_nist_p_256 = {
304 	(BN_ULONG *)_nist_p_256[0],
305 	BN_NIST_256_TOP,
306 	BN_NIST_256_TOP,
307 	0,
308 	BN_FLG_STATIC_DATA
309 };
310 
311 static const BIGNUM _bignum_nist_p_384 = {
312 	(BN_ULONG *)_nist_p_384[0],
313 	BN_NIST_384_TOP,
314 	BN_NIST_384_TOP,
315 	0,
316 	BN_FLG_STATIC_DATA
317 };
318 
319 static const BIGNUM _bignum_nist_p_521 = {
320 	(BN_ULONG *)_nist_p_521,
321 	BN_NIST_521_TOP,
322 	BN_NIST_521_TOP,
323 	0,
324 	BN_FLG_STATIC_DATA
325 };
326 
327 
328 const BIGNUM *
329 BN_get0_nist_prime_192(void)
330 {
331 	return &_bignum_nist_p_192;
332 }
333 
334 const BIGNUM *
335 BN_get0_nist_prime_224(void)
336 {
337 	return &_bignum_nist_p_224;
338 }
339 
340 const BIGNUM *
341 BN_get0_nist_prime_256(void)
342 {
343 	return &_bignum_nist_p_256;
344 }
345 
346 const BIGNUM *
347 BN_get0_nist_prime_384(void)
348 {
349 	return &_bignum_nist_p_384;
350 }
351 
352 const BIGNUM *
353 BN_get0_nist_prime_521(void)
354 {
355 	return &_bignum_nist_p_521;
356 }
357 
358 static void
359 nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max)
360 {
361 	int i;
362 
363 #ifdef BN_DEBUG
364 	OPENSSL_assert(top <= max);
365 #endif
366 	for (i = 0; i < top; i++)
367 		dst[i] = src[i];
368 	for (; i < max; i++)
369 		dst[i] = 0;
370 }
371 
372 static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
373 {
374 	int i;
375 
376 	for (i = 0; i < top; i++)
377 		dst[i] = src[i];
378 }
379 
380 #if BN_BITS2 == 64
381 #define bn_cp_64(to, n, from, m)	(to)[n] = (m>=0)?((from)[m]):0;
382 #define bn_64_set_0(to, n)		(to)[n] = (BN_ULONG)0;
383 /*
384  * two following macros are implemented under assumption that they
385  * are called in a sequence with *ascending* n, i.e. as they are...
386  */
387 #define bn_cp_32_naked(to, n, from, m)	(((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
388 						:(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
389 #define bn_32_set_0(to, n)		(((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
390 #define bn_cp_32(to,n,from,m)		((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
391 # if BYTE_ORDER == LITTLE_ENDIAN
392 #  if defined(_LP64)
393 #   define NIST_INT64 long
394 #  else
395 #   define NIST_INT64 long long
396 #  endif
397 # endif
398 #else
399 #define bn_cp_64(to, n, from, m) \
400 	{ \
401 	bn_cp_32(to, (n)*2, from, (m)*2); \
402 	bn_cp_32(to, (n)*2+1, from, (m)*2+1); \
403 	}
404 #define bn_64_set_0(to, n) \
405 	{ \
406 	bn_32_set_0(to, (n)*2); \
407 	bn_32_set_0(to, (n)*2+1); \
408 	}
409 #define bn_cp_32(to, n, from, m)	(to)[n] = (m>=0)?((from)[m]):0;
410 #define bn_32_set_0(to, n)		(to)[n] = (BN_ULONG)0;
411 # if defined(BN_LLONG)
412 #  define NIST_INT64 long long
413 # endif
414 #endif /* BN_BITS2 != 64 */
415 
416 #define nist_set_192(to, from, a1, a2, a3) \
417 	{ \
418 	bn_cp_64(to, 0, from, (a3) - 3) \
419 	bn_cp_64(to, 1, from, (a2) - 3) \
420 	bn_cp_64(to, 2, from, (a1) - 3) \
421 	}
422 
423 int
424 BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
425 {
426 	int top = a->top, i;
427 	int carry;
428 	BN_ULONG *r_d, *a_d = a->d;
429 	union {
430 		BN_ULONG bn[BN_NIST_192_TOP];
431 		unsigned int ui[BN_NIST_192_TOP *
432 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
433 	} buf;
434 	BN_ULONG c_d[BN_NIST_192_TOP], *res;
435 	uintptr_t mask;
436 	static const BIGNUM _bignum_nist_p_192_sqr = {
437 		(BN_ULONG *)_nist_p_192_sqr,
438 		sizeof(_nist_p_192_sqr) / sizeof(_nist_p_192_sqr[0]),
439 		sizeof(_nist_p_192_sqr) / sizeof(_nist_p_192_sqr[0]),
440 		0,
441 		BN_FLG_STATIC_DATA
442 	};
443 
444 	field = &_bignum_nist_p_192; /* just to make sure */
445 
446 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_192_sqr) >= 0)
447 		return BN_nnmod(r, a, field, ctx);
448 
449 	i = BN_ucmp(field, a);
450 	if (i == 0) {
451 		BN_zero(r);
452 		return 1;
453 	} else if (i > 0)
454 		return (r == a) ? 1 : (BN_copy(r , a) != NULL);
455 
456 	if (r != a) {
457 		if (!bn_wexpand(r, BN_NIST_192_TOP))
458 			return 0;
459 		r_d = r->d;
460 		nist_cp_bn(r_d, a_d, BN_NIST_192_TOP);
461 	} else
462 		r_d = a_d;
463 
464 	nist_cp_bn_0(buf.bn, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP,
465 	    BN_NIST_192_TOP);
466 
467 #if defined(NIST_INT64)
468 	{
469 		NIST_INT64 acc;	/* accumulator */
470 		unsigned int *rp = (unsigned int *)r_d;
471 		const unsigned int *bp = (const unsigned int *)buf.ui;
472 
473 		acc = rp[0];
474 		acc += bp[3 * 2 - 6];
475 		acc += bp[5 * 2 - 6];
476 		rp[0] = (unsigned int)acc;
477 		acc >>= 32;
478 
479 		acc += rp[1];
480 		acc += bp[3 * 2 - 5];
481 		acc += bp[5 * 2 - 5];
482 		rp[1] = (unsigned int)acc;
483 		acc >>= 32;
484 
485 		acc += rp[2];
486 		acc += bp[3 * 2 - 6];
487 		acc += bp[4 * 2 - 6];
488 		acc += bp[5 * 2 - 6];
489 		rp[2] = (unsigned int)acc;
490 		acc >>= 32;
491 
492 		acc += rp[3];
493 		acc += bp[3 * 2 - 5];
494 		acc += bp[4 * 2 - 5];
495 		acc += bp[5 * 2 - 5];
496 		rp[3] = (unsigned int)acc;
497 		acc >>= 32;
498 
499 		acc += rp[4];
500 		acc += bp[4 * 2 - 6];
501 		acc += bp[5 * 2 - 6];
502 		rp[4] = (unsigned int)acc;
503 		acc >>= 32;
504 
505 		acc += rp[5];
506 		acc += bp[4 * 2 - 5];
507 		acc += bp[5 * 2 - 5];
508 		rp[5] = (unsigned int)acc;
509 
510 		carry = (int)(acc >> 32);
511 	}
512 #else
513 	{
514 		BN_ULONG t_d[BN_NIST_192_TOP] = {0};
515 
516 		nist_set_192(t_d, buf.bn, 0, 3, 3);
517 		carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
518 		nist_set_192(t_d, buf.bn, 4, 4, 0);
519 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
520 		nist_set_192(t_d, buf.bn, 5, 5, 5)
521 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
522 	}
523 #endif
524 	if (carry > 0)
525 		carry = (int)bn_sub_words(r_d, r_d, _nist_p_192[carry - 1],
526 		    BN_NIST_192_TOP);
527 	else
528 		carry = 1;
529 
530 	/*
531 	 * we need 'if (carry==0 || result>=modulus) result-=modulus;'
532 	 * as comparison implies subtraction, we can write
533 	 * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
534 	 * this is what happens below, but without explicit if:-) a.
535 	 */
536 	mask = 0 - (uintptr_t)bn_sub_words(c_d, r_d, _nist_p_192[0],
537 	    BN_NIST_192_TOP);
538 	mask &= 0 - (uintptr_t)carry;
539 	res = c_d;
540 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
541 	nist_cp_bn(r_d, res, BN_NIST_192_TOP);
542 	r->top = BN_NIST_192_TOP;
543 	bn_correct_top(r);
544 
545 	return 1;
546 }
547 
548 typedef BN_ULONG (*bn_addsub_f)(BN_ULONG *, const BN_ULONG *,
549     const BN_ULONG *, int);
550 
551 #define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
552 	{ \
553 	bn_cp_32(to, 0, from, (a7) - 7) \
554 	bn_cp_32(to, 1, from, (a6) - 7) \
555 	bn_cp_32(to, 2, from, (a5) - 7) \
556 	bn_cp_32(to, 3, from, (a4) - 7) \
557 	bn_cp_32(to, 4, from, (a3) - 7) \
558 	bn_cp_32(to, 5, from, (a2) - 7) \
559 	bn_cp_32(to, 6, from, (a1) - 7) \
560 	}
561 
562 int
563 BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
564 {
565 	int top = a->top, i;
566 	int carry;
567 	BN_ULONG *r_d, *a_d = a->d;
568 	union {
569 		BN_ULONG bn[BN_NIST_224_TOP];
570 		unsigned int ui[BN_NIST_224_TOP *
571 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
572 	} buf;
573 	BN_ULONG c_d[BN_NIST_224_TOP], *res;
574 	uintptr_t mask;
575 	union {
576 		bn_addsub_f f;
577 		uintptr_t p;
578 	} u;
579 	static const BIGNUM _bignum_nist_p_224_sqr = {
580 		(BN_ULONG *)_nist_p_224_sqr,
581 		sizeof(_nist_p_224_sqr) / sizeof(_nist_p_224_sqr[0]),
582 		sizeof(_nist_p_224_sqr) / sizeof(_nist_p_224_sqr[0]),
583 		0,
584 		BN_FLG_STATIC_DATA
585 	};
586 
587 	field = &_bignum_nist_p_224; /* just to make sure */
588 
589 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_224_sqr) >= 0)
590 		return BN_nnmod(r, a, field, ctx);
591 
592 	i = BN_ucmp(field, a);
593 	if (i == 0) {
594 		BN_zero(r);
595 		return 1;
596 	} else if (i > 0)
597 		return (r == a) ? 1 : (BN_copy(r, a) != NULL);
598 
599 	if (r != a) {
600 		if (!bn_wexpand(r, BN_NIST_224_TOP))
601 			return 0;
602 		r_d = r->d;
603 		nist_cp_bn(r_d, a_d, BN_NIST_224_TOP);
604 	} else
605 		r_d = a_d;
606 
607 	memset(&buf, 0, sizeof(buf));
608 
609 #if BN_BITS2==64
610 	/* copy upper 256 bits of 448 bit number ... */
611 	nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP - 1),
612 	    top - (BN_NIST_224_TOP - 1), BN_NIST_224_TOP);
613 	/* ... and right shift by 32 to obtain upper 224 bits */
614 	nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8);
615 	/* truncate lower part to 224 bits too */
616 	r_d[BN_NIST_224_TOP - 1] &= BN_MASK2l;
617 #else
618 	nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP,
619 	    top - BN_NIST_224_TOP, BN_NIST_224_TOP);
620 #endif
621 
622 #if defined(NIST_INT64) && BN_BITS2!=64
623 	{
624 		NIST_INT64 acc;	/* accumulator */
625 		unsigned int *rp = (unsigned int *)r_d;
626 		const unsigned int *bp = (const unsigned int *)buf.ui;
627 
628 		acc = rp[0];
629 		acc -= bp[7 - 7];
630 		acc -= bp[11 - 7];
631 		rp[0] = (unsigned int)acc;
632 		acc >>= 32;
633 
634 		acc += rp[1];
635 		acc -= bp[8 - 7];
636 		acc -= bp[12 - 7];
637 		rp[1] = (unsigned int)acc;
638 		acc >>= 32;
639 
640 		acc += rp[2];
641 		acc -= bp[9 - 7];
642 		acc -= bp[13 - 7];
643 		rp[2] = (unsigned int)acc;
644 		acc >>= 32;
645 
646 		acc += rp[3];
647 		acc += bp[7 - 7];
648 		acc += bp[11 - 7];
649 		acc -= bp[10 - 7];
650 		rp[3] = (unsigned int)acc;
651 		acc >>= 32;
652 
653 		acc += rp[4];
654 		acc += bp[8 - 7];
655 		acc += bp[12 - 7];
656 		acc -= bp[11 - 7];
657 		rp[4] = (unsigned int)acc;
658 		acc >>= 32;
659 
660 		acc += rp[5];
661 		acc += bp[9 - 7];
662 		acc += bp[13 - 7];
663 		acc -= bp[12 - 7];
664 		rp[5] = (unsigned int)acc;
665 		acc >>= 32;
666 
667 		acc += rp[6];
668 		acc += bp[10 - 7];
669 		acc -= bp[13 - 7];
670 		rp[6] = (unsigned int)acc;
671 
672 		carry = (int)(acc >> 32);
673 # if BN_BITS2==64
674 		rp[7] = carry;
675 # endif
676 	}
677 #else
678 	{
679 		BN_ULONG t_d[BN_NIST_224_TOP] = {0};
680 
681 		nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0);
682 		carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
683 		nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0);
684 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
685 		nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7);
686 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
687 		nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11);
688 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
689 
690 #if BN_BITS2==64
691 		carry = (int)(r_d[BN_NIST_224_TOP - 1] >> 32);
692 #endif
693 	}
694 #endif
695 	u.f = bn_sub_words;
696 	if (carry > 0) {
697 		carry = (int)bn_sub_words(r_d, r_d, _nist_p_224[carry - 1],
698 		    BN_NIST_224_TOP);
699 #if BN_BITS2==64
700 		carry = (int)(~(r_d[BN_NIST_224_TOP - 1] >> 32)) & 1;
701 #endif
702 	} else if (carry < 0) {
703 		/* it's a bit more complicated logic in this case.
704 		 * if bn_add_words yields no carry, then result
705 		 * has to be adjusted by unconditionally *adding*
706 		 * the modulus. but if it does, then result has
707 		 * to be compared to the modulus and conditionally
708 		 * adjusted by *subtracting* the latter. */
709 		carry = (int)bn_add_words(r_d, r_d, _nist_p_224[-carry - 1],
710 		    BN_NIST_224_TOP);
711 		mask = 0 - (uintptr_t)carry;
712 		u.p = ((uintptr_t)bn_sub_words & mask) |
713 		    ((uintptr_t)bn_add_words & ~mask);
714 	} else
715 		carry = 1;
716 
717 	/* otherwise it's effectively same as in BN_nist_mod_192... */
718 	mask = 0 - (uintptr_t)(*u.f)(c_d, r_d, _nist_p_224[0], BN_NIST_224_TOP);
719 	mask &= 0 - (uintptr_t)carry;
720 	res = c_d;
721 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
722 	nist_cp_bn(r_d, res, BN_NIST_224_TOP);
723 	r->top = BN_NIST_224_TOP;
724 	bn_correct_top(r);
725 
726 	return 1;
727 }
728 
729 #define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
730 	{ \
731 	bn_cp_32(to, 0, from, (a8) - 8) \
732 	bn_cp_32(to, 1, from, (a7) - 8) \
733 	bn_cp_32(to, 2, from, (a6) - 8) \
734 	bn_cp_32(to, 3, from, (a5) - 8) \
735 	bn_cp_32(to, 4, from, (a4) - 8) \
736 	bn_cp_32(to, 5, from, (a3) - 8) \
737 	bn_cp_32(to, 6, from, (a2) - 8) \
738 	bn_cp_32(to, 7, from, (a1) - 8) \
739 	}
740 
741 int
742 BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
743 {
744 	int i, top = a->top;
745 	int carry = 0;
746 	BN_ULONG *a_d = a->d, *r_d;
747 	union {
748 		BN_ULONG bn[BN_NIST_256_TOP];
749 		unsigned int ui[BN_NIST_256_TOP *
750 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
751 	} buf;
752 	BN_ULONG c_d[BN_NIST_256_TOP] = {0}, *res;
753 	uintptr_t mask;
754 	union {
755 		bn_addsub_f f;
756 		uintptr_t p;
757 	} u;
758 	static const BIGNUM _bignum_nist_p_256_sqr = {
759 		(BN_ULONG *)_nist_p_256_sqr,
760 		sizeof(_nist_p_256_sqr) / sizeof(_nist_p_256_sqr[0]),
761 		sizeof(_nist_p_256_sqr) / sizeof(_nist_p_256_sqr[0]),
762 		0,
763 		BN_FLG_STATIC_DATA
764 	};
765 
766 	field = &_bignum_nist_p_256; /* just to make sure */
767 
768 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_256_sqr) >= 0)
769 		return BN_nnmod(r, a, field, ctx);
770 
771 	i = BN_ucmp(field, a);
772 	if (i == 0) {
773 		BN_zero(r);
774 		return 1;
775 	} else if (i > 0)
776 		return (r == a) ? 1 : (BN_copy(r, a) != NULL);
777 
778 	if (r != a) {
779 		if (!bn_wexpand(r, BN_NIST_256_TOP))
780 			return 0;
781 		r_d = r->d;
782 		nist_cp_bn(r_d, a_d, BN_NIST_256_TOP);
783 	} else
784 		r_d = a_d;
785 
786 	nist_cp_bn_0(buf.bn, a_d + BN_NIST_256_TOP,
787 	    top - BN_NIST_256_TOP, BN_NIST_256_TOP);
788 
789 #if defined(NIST_INT64)
790 	{
791 		NIST_INT64 acc;	/* accumulator */
792 		unsigned int *rp = (unsigned int *)r_d;
793 		const unsigned int *bp = (const unsigned int *)buf.ui;
794 
795 		acc = rp[0];
796 		acc += bp[8 - 8];
797 		acc += bp[9 - 8];
798 		acc -= bp[11 - 8];
799 		acc -= bp[12 - 8];
800 		acc -= bp[13 - 8];
801 		acc -= bp[14 - 8];
802 		rp[0] = (unsigned int)acc;
803 		acc >>= 32;
804 
805 		acc += rp[1];
806 		acc += bp[9 - 8];
807 		acc += bp[10 - 8];
808 		acc -= bp[12 - 8];
809 		acc -= bp[13 - 8];
810 		acc -= bp[14 - 8];
811 		acc -= bp[15 - 8];
812 		rp[1] = (unsigned int)acc;
813 		acc >>= 32;
814 
815 		acc += rp[2];
816 		acc += bp[10 - 8];
817 		acc += bp[11 - 8];
818 		acc -= bp[13 - 8];
819 		acc -= bp[14 - 8];
820 		acc -= bp[15 - 8];
821 		rp[2] = (unsigned int)acc;
822 		acc >>= 32;
823 
824 		acc += rp[3];
825 		acc += bp[11 - 8];
826 		acc += bp[11 - 8];
827 		acc += bp[12 - 8];
828 		acc += bp[12 - 8];
829 		acc += bp[13 - 8];
830 		acc -= bp[15 - 8];
831 		acc -= bp[8 - 8];
832 		acc -= bp[9 - 8];
833 		rp[3] = (unsigned int)acc;
834 		acc >>= 32;
835 
836 		acc += rp[4];
837 		acc += bp[12 - 8];
838 		acc += bp[12 - 8];
839 		acc += bp[13 - 8];
840 		acc += bp[13 - 8];
841 		acc += bp[14 - 8];
842 		acc -= bp[9 - 8];
843 		acc -= bp[10 - 8];
844 		rp[4] = (unsigned int)acc;
845 		acc >>= 32;
846 
847 		acc += rp[5];
848 		acc += bp[13 - 8];
849 		acc += bp[13 - 8];
850 		acc += bp[14 - 8];
851 		acc += bp[14 - 8];
852 		acc += bp[15 - 8];
853 		acc -= bp[10 - 8];
854 		acc -= bp[11 - 8];
855 		rp[5] = (unsigned int)acc;
856 		acc >>= 32;
857 
858 		acc += rp[6];
859 		acc += bp[14 - 8];
860 		acc += bp[14 - 8];
861 		acc += bp[15 - 8];
862 		acc += bp[15 - 8];
863 		acc += bp[14 - 8];
864 		acc += bp[13 - 8];
865 		acc -= bp[8 - 8];
866 		acc -= bp[9 - 8];
867 		rp[6] = (unsigned int)acc;
868 		acc >>= 32;
869 
870 		acc += rp[7];
871 		acc += bp[15 - 8];
872 		acc += bp[15 - 8];
873 		acc += bp[15 - 8];
874 		acc += bp[8 - 8];
875 		acc -= bp[10 - 8];
876 		acc -= bp[11 - 8];
877 		acc -= bp[12 - 8];
878 		acc -= bp[13 - 8];
879 		rp[7] = (unsigned int)acc;
880 
881 		carry = (int)(acc >> 32);
882 	}
883 #else
884 	{
885 		BN_ULONG t_d[BN_NIST_256_TOP] = {0};
886 
887 		/*S1*/
888 		nist_set_256(t_d, buf.bn, 15, 14, 13, 12, 11, 0, 0, 0);
889 		/*S2*/
890 		nist_set_256(c_d, buf.bn, 0, 15, 14, 13, 12, 0, 0, 0);
891 		carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
892 		/* left shift */
893 		{
894 			BN_ULONG *ap, t, c;
895 			ap = t_d;
896 			c = 0;
897 			for (i = BN_NIST_256_TOP; i != 0; --i) {
898 				t = *ap;
899 				*(ap++) = ((t << 1) | c) & BN_MASK2;
900 				c = (t & BN_TBIT) ? 1 : 0;
901 			}
902 			carry <<= 1;
903 			carry |= c;
904 		}
905 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
906 		/*S3*/
907 		nist_set_256(t_d, buf.bn, 15, 14, 0, 0, 0, 10, 9, 8);
908 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
909 		/*S4*/
910 		nist_set_256(t_d, buf.bn, 8, 13, 15, 14, 13, 11, 10, 9);
911 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
912 		/*D1*/
913 		nist_set_256(t_d, buf.bn, 10, 8, 0, 0, 0, 13, 12, 11);
914 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
915 		/*D2*/
916 		nist_set_256(t_d, buf.bn, 11, 9, 0, 0, 15, 14, 13, 12);
917 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
918 		/*D3*/
919 		nist_set_256(t_d, buf.bn, 12, 0, 10, 9, 8, 15, 14, 13);
920 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
921 		/*D4*/
922 		nist_set_256(t_d, buf.bn, 13, 0, 11, 10, 9, 0, 15, 14);
923 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
924 
925 	}
926 #endif
927 	/* see BN_nist_mod_224 for explanation */
928 	u.f = bn_sub_words;
929 	if (carry > 0)
930 		carry = (int)bn_sub_words(r_d, r_d, _nist_p_256[carry - 1],
931 		    BN_NIST_256_TOP);
932 	else if (carry < 0) {
933 		carry = (int)bn_add_words(r_d, r_d, _nist_p_256[-carry - 1],
934 		    BN_NIST_256_TOP);
935 		mask = 0 - (uintptr_t)carry;
936 		u.p = ((uintptr_t)bn_sub_words & mask) |
937 		    ((uintptr_t)bn_add_words & ~mask);
938 	} else
939 		carry = 1;
940 
941 	mask = 0 - (uintptr_t)(*u.f)(c_d, r_d, _nist_p_256[0], BN_NIST_256_TOP);
942 	mask &= 0 - (uintptr_t)carry;
943 	res = c_d;
944 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
945 	nist_cp_bn(r_d, res, BN_NIST_256_TOP);
946 	r->top = BN_NIST_256_TOP;
947 	bn_correct_top(r);
948 
949 	return 1;
950 }
951 
952 #define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
953 	{ \
954 	bn_cp_32(to, 0, from,  (a12) - 12) \
955 	bn_cp_32(to, 1, from,  (a11) - 12) \
956 	bn_cp_32(to, 2, from,  (a10) - 12) \
957 	bn_cp_32(to, 3, from,  (a9) - 12)  \
958 	bn_cp_32(to, 4, from,  (a8) - 12)  \
959 	bn_cp_32(to, 5, from,  (a7) - 12)  \
960 	bn_cp_32(to, 6, from,  (a6) - 12)  \
961 	bn_cp_32(to, 7, from,  (a5) - 12)  \
962 	bn_cp_32(to, 8, from,  (a4) - 12)  \
963 	bn_cp_32(to, 9, from,  (a3) - 12)  \
964 	bn_cp_32(to, 10, from, (a2) - 12)  \
965 	bn_cp_32(to, 11, from, (a1) - 12)  \
966 	}
967 
968 int
969 BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
970 {
971 	int i, top = a->top;
972 	int carry = 0;
973 	BN_ULONG *r_d, *a_d = a->d;
974 	union {
975 		BN_ULONG bn[BN_NIST_384_TOP];
976 		unsigned int ui[BN_NIST_384_TOP *
977 		    sizeof(BN_ULONG) / sizeof(unsigned int)];
978 	} buf;
979 	BN_ULONG c_d[BN_NIST_384_TOP], *res;
980 	uintptr_t mask;
981 	union {
982 		bn_addsub_f f;
983 		uintptr_t p;
984 	} u;
985 	static const BIGNUM _bignum_nist_p_384_sqr = {
986 		(BN_ULONG *)_nist_p_384_sqr,
987 		sizeof(_nist_p_384_sqr) / sizeof(_nist_p_384_sqr[0]),
988 		sizeof(_nist_p_384_sqr) / sizeof(_nist_p_384_sqr[0]),
989 		0,
990 		BN_FLG_STATIC_DATA
991 	};
992 
993 	field = &_bignum_nist_p_384; /* just to make sure */
994 
995 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_384_sqr) >= 0)
996 		return BN_nnmod(r, a, field, ctx);
997 
998 	i = BN_ucmp(field, a);
999 	if (i == 0) {
1000 		BN_zero(r);
1001 		return 1;
1002 	} else if (i > 0)
1003 		return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1004 
1005 	if (r != a) {
1006 		if (!bn_wexpand(r, BN_NIST_384_TOP))
1007 			return 0;
1008 		r_d = r->d;
1009 		nist_cp_bn(r_d, a_d, BN_NIST_384_TOP);
1010 	} else
1011 		r_d = a_d;
1012 
1013 	nist_cp_bn_0(buf.bn, a_d + BN_NIST_384_TOP,
1014 	    top - BN_NIST_384_TOP, BN_NIST_384_TOP);
1015 
1016 #if defined(NIST_INT64)
1017 	{
1018 		NIST_INT64 acc;	/* accumulator */
1019 		unsigned int *rp = (unsigned int *)r_d;
1020 		const unsigned int *bp = (const unsigned int *)buf.ui;
1021 
1022 		acc = rp[0];
1023 		acc += bp[12 - 12];
1024 		acc += bp[21 - 12];
1025 		acc += bp[20 - 12];
1026 		acc -= bp[23 - 12];
1027 		rp[0] = (unsigned int)acc;
1028 		acc >>= 32;
1029 
1030 		acc += rp[1];
1031 		acc += bp[13 - 12];
1032 		acc += bp[22 - 12];
1033 		acc += bp[23 - 12];
1034 		acc -= bp[12 - 12];
1035 		acc -= bp[20 - 12];
1036 		rp[1] = (unsigned int)acc;
1037 		acc >>= 32;
1038 
1039 		acc += rp[2];
1040 		acc += bp[14 - 12];
1041 		acc += bp[23 - 12];
1042 		acc -= bp[13 - 12];
1043 		acc -= bp[21 - 12];
1044 		rp[2] = (unsigned int)acc;
1045 		acc >>= 32;
1046 
1047 		acc += rp[3];
1048 		acc += bp[15 - 12];
1049 		acc += bp[12 - 12];
1050 		acc += bp[20 - 12];
1051 		acc += bp[21 - 12];
1052 		acc -= bp[14 - 12];
1053 		acc -= bp[22 - 12];
1054 		acc -= bp[23 - 12];
1055 		rp[3] = (unsigned int)acc;
1056 		acc >>= 32;
1057 
1058 		acc += rp[4];
1059 		acc += bp[21 - 12];
1060 		acc += bp[21 - 12];
1061 		acc += bp[16 - 12];
1062 		acc += bp[13 - 12];
1063 		acc += bp[12 - 12];
1064 		acc += bp[20 - 12];
1065 		acc += bp[22 - 12];
1066 		acc -= bp[15 - 12];
1067 		acc -= bp[23 - 12];
1068 		acc -= bp[23 - 12];
1069 		rp[4] = (unsigned int)acc;
1070 		acc >>= 32;
1071 
1072 		acc += rp[5];
1073 		acc += bp[22 - 12];
1074 		acc += bp[22 - 12];
1075 		acc += bp[17 - 12];
1076 		acc += bp[14 - 12];
1077 		acc += bp[13 - 12];
1078 		acc += bp[21 - 12];
1079 		acc += bp[23 - 12];
1080 		acc -= bp[16 - 12];
1081 		rp[5] = (unsigned int)acc;
1082 		acc >>= 32;
1083 
1084 		acc += rp[6];
1085 		acc += bp[23 - 12];
1086 		acc += bp[23 - 12];
1087 		acc += bp[18 - 12];
1088 		acc += bp[15 - 12];
1089 		acc += bp[14 - 12];
1090 		acc += bp[22 - 12];
1091 		acc -= bp[17 - 12];
1092 		rp[6] = (unsigned int)acc;
1093 		acc >>= 32;
1094 
1095 		acc += rp[7];
1096 		acc += bp[19 - 12];
1097 		acc += bp[16 - 12];
1098 		acc += bp[15 - 12];
1099 		acc += bp[23 - 12];
1100 		acc -= bp[18 - 12];
1101 		rp[7] = (unsigned int)acc;
1102 		acc >>= 32;
1103 
1104 		acc += rp[8];
1105 		acc += bp[20 - 12];
1106 		acc += bp[17 - 12];
1107 		acc += bp[16 - 12];
1108 		acc -= bp[19 - 12];
1109 		rp[8] = (unsigned int)acc;
1110 		acc >>= 32;
1111 
1112 		acc += rp[9];
1113 		acc += bp[21 - 12];
1114 		acc += bp[18 - 12];
1115 		acc += bp[17 - 12];
1116 		acc -= bp[20 - 12];
1117 		rp[9] = (unsigned int)acc;
1118 		acc >>= 32;
1119 
1120 		acc += rp[10];
1121 		acc += bp[22 - 12];
1122 		acc += bp[19 - 12];
1123 		acc += bp[18 - 12];
1124 		acc -= bp[21 - 12];
1125 		rp[10] = (unsigned int)acc;
1126 		acc >>= 32;
1127 
1128 		acc += rp[11];
1129 		acc += bp[23 - 12];
1130 		acc += bp[20 - 12];
1131 		acc += bp[19 - 12];
1132 		acc -= bp[22 - 12];
1133 		rp[11] = (unsigned int)acc;
1134 
1135 		carry = (int)(acc >> 32);
1136 	}
1137 #else
1138 	{
1139 		BN_ULONG t_d[BN_NIST_384_TOP] = {0};
1140 
1141 		/*S1*/
1142 		nist_set_256(t_d, buf.bn, 0, 0, 0, 0, 0, 23 - 4, 22 - 4,
1143 		    21 - 4);
1144 		/* left shift */
1145 		{
1146 			BN_ULONG *ap, t, c;
1147 			ap = t_d;
1148 			c = 0;
1149 			for (i = 3; i != 0; --i) {
1150 				t= *ap;
1151 				*(ap++) = ((t << 1)|c) & BN_MASK2;
1152 				c = (t & BN_TBIT) ? 1 : 0;
1153 			}
1154 			*ap = c;
1155 		}
1156 		carry = (int)bn_add_words(r_d + (128 / BN_BITS2),
1157 		    r_d + (128 / BN_BITS2), t_d, BN_NIST_256_TOP);
1158 		/*S2 */
1159 		carry += (int)bn_add_words(r_d, r_d, buf.bn, BN_NIST_384_TOP);
1160 		/*S3*/
1161 		nist_set_384(t_d, buf.bn, 20, 19, 18, 17, 16, 15, 14, 13, 12,
1162 		    23, 22, 21);
1163 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1164 		/*S4*/
1165 		nist_set_384(t_d, buf.bn, 19, 18, 17, 16, 15, 14, 13, 12, 20,
1166 		    0, 23, 0);
1167 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1168 		/*S5*/
1169 		nist_set_384(t_d, buf.bn, 0,0, 0,0, 23, 22, 21, 20, 0,0, 0, 0);
1170 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1171 		/*S6*/
1172 		nist_set_384(t_d, buf.bn, 0,0, 0,0, 0,0, 23, 22, 21, 0,0, 20);
1173 		carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1174 		/*D1*/
1175 		nist_set_384(t_d, buf.bn, 22, 21, 20, 19, 18, 17, 16, 15, 14,
1176 		    13, 12, 23);
1177 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1178 		/*D2*/
1179 		nist_set_384(t_d, buf.bn, 0,0, 0,0, 0,0, 0,23, 22, 21, 20, 0);
1180 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1181 		/*D3*/
1182 		nist_set_384(t_d, buf.bn, 0,0, 0,0, 0,0, 0,23, 23, 0,0, 0);
1183 		carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
1184 
1185 	}
1186 #endif
1187 	/* see BN_nist_mod_224 for explanation */
1188 	u.f = bn_sub_words;
1189 	if (carry > 0)
1190 		carry = (int)bn_sub_words(r_d, r_d, _nist_p_384[carry - 1],
1191 		    BN_NIST_384_TOP);
1192 	else if (carry < 0) {
1193 		carry = (int)bn_add_words(r_d, r_d, _nist_p_384[-carry - 1],
1194 		    BN_NIST_384_TOP);
1195 		mask = 0 - (uintptr_t)carry;
1196 		u.p = ((uintptr_t)bn_sub_words & mask) |
1197 		    ((uintptr_t)bn_add_words & ~mask);
1198 	} else
1199 		carry = 1;
1200 
1201 	mask = 0 - (uintptr_t)(*u.f)(c_d, r_d, _nist_p_384[0], BN_NIST_384_TOP);
1202 	mask &= 0 - (uintptr_t)carry;
1203 	res = c_d;
1204 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
1205 	nist_cp_bn(r_d, res, BN_NIST_384_TOP);
1206 	r->top = BN_NIST_384_TOP;
1207 	bn_correct_top(r);
1208 
1209 	return 1;
1210 }
1211 
1212 #define BN_NIST_521_RSHIFT	(521%BN_BITS2)
1213 #define BN_NIST_521_LSHIFT	(BN_BITS2-BN_NIST_521_RSHIFT)
1214 #define BN_NIST_521_TOP_MASK	((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
1215 
1216 int
1217 BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, BN_CTX *ctx)
1218 {
1219 	int top = a->top, i;
1220 	BN_ULONG *r_d, *a_d = a->d, t_d[BN_NIST_521_TOP], val, tmp, *res;
1221 	uintptr_t mask;
1222 	static const BIGNUM _bignum_nist_p_521_sqr = {
1223 		(BN_ULONG *)_nist_p_521_sqr,
1224 		sizeof(_nist_p_521_sqr) / sizeof(_nist_p_521_sqr[0]),
1225 		sizeof(_nist_p_521_sqr) / sizeof(_nist_p_521_sqr[0]),
1226 		0,
1227 		BN_FLG_STATIC_DATA
1228 	};
1229 
1230 	field = &_bignum_nist_p_521; /* just to make sure */
1231 
1232 	if (BN_is_negative(a) || BN_ucmp(a, &_bignum_nist_p_521_sqr) >= 0)
1233 		return BN_nnmod(r, a, field, ctx);
1234 
1235 	i = BN_ucmp(field, a);
1236 	if (i == 0) {
1237 		BN_zero(r);
1238 		return 1;
1239 	} else if (i > 0)
1240 		return (r == a) ? 1 : (BN_copy(r, a) != NULL);
1241 
1242 	if (r != a) {
1243 		if (!bn_wexpand(r, BN_NIST_521_TOP))
1244 			return 0;
1245 		r_d = r->d;
1246 		nist_cp_bn(r_d, a_d, BN_NIST_521_TOP);
1247 	} else
1248 		r_d = a_d;
1249 
1250 	/* upper 521 bits, copy ... */
1251 	nist_cp_bn_0(t_d, a_d + (BN_NIST_521_TOP - 1),
1252 	    top - (BN_NIST_521_TOP - 1), BN_NIST_521_TOP);
1253 	/* ... and right shift */
1254 	for (val = t_d[0], i = 0; i < BN_NIST_521_TOP - 1; i++) {
1255 		tmp = val >> BN_NIST_521_RSHIFT;
1256 		val = t_d[i + 1];
1257 		t_d[i] = (tmp | val << BN_NIST_521_LSHIFT) & BN_MASK2;
1258 	}
1259 	t_d[i] = val >> BN_NIST_521_RSHIFT;
1260 	/* lower 521 bits */
1261 	r_d[i] &= BN_NIST_521_TOP_MASK;
1262 
1263 	bn_add_words(r_d, r_d, t_d, BN_NIST_521_TOP);
1264 	mask = 0 - (uintptr_t)bn_sub_words(t_d, r_d, _nist_p_521,
1265 	    BN_NIST_521_TOP);
1266 	res = t_d;
1267 	res = (BN_ULONG *)(((uintptr_t)res & ~mask) | ((uintptr_t)r_d & mask));
1268 	nist_cp_bn(r_d, res, BN_NIST_521_TOP);
1269 	r->top = BN_NIST_521_TOP;
1270 	bn_correct_top(r);
1271 
1272 	return 1;
1273 }
1274