xref: /openbsd/lib/libcrypto/bn/bn_exp.c (revision b4a69cdb)
1 /* $OpenBSD: bn_exp.c,v 1.56 2025/01/22 12:53:16 tb Exp $ */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 /* ====================================================================
59  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  *
65  * 1. Redistributions of source code must retain the above copyright
66  *    notice, this list of conditions and the following disclaimer.
67  *
68  * 2. Redistributions in binary form must reproduce the above copyright
69  *    notice, this list of conditions and the following disclaimer in
70  *    the documentation and/or other materials provided with the
71  *    distribution.
72  *
73  * 3. All advertising materials mentioning features or use of this
74  *    software must display the following acknowledgment:
75  *    "This product includes software developed by the OpenSSL Project
76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77  *
78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79  *    endorse or promote products derived from this software without
80  *    prior written permission. For written permission, please contact
81  *    openssl-core@openssl.org.
82  *
83  * 5. Products derived from this software may not be called "OpenSSL"
84  *    nor may "OpenSSL" appear in their names without prior written
85  *    permission of the OpenSSL Project.
86  *
87  * 6. Redistributions of any form whatsoever must retain the following
88  *    acknowledgment:
89  *    "This product includes software developed by the OpenSSL Project
90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103  * OF THE POSSIBILITY OF SUCH DAMAGE.
104  * ====================================================================
105  *
106  * This product includes cryptographic software written by Eric Young
107  * (eay@cryptsoft.com).  This product includes software written by Tim
108  * Hudson (tjh@cryptsoft.com).
109  *
110  */
111 
112 #include <stdlib.h>
113 #include <string.h>
114 
115 #include <openssl/err.h>
116 
117 #include "bn_local.h"
118 #include "constant_time.h"
119 
120 /* maximum precomputation table size for *variable* sliding windows */
121 #define TABLE_SIZE	32
122 
123 /* Calculates r = a^p by successive squaring of a. Not constant time. */
124 int
BN_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,BN_CTX * ctx)125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
126 {
127 	BIGNUM *rr, *v;
128 	int i;
129 	int ret = 0;
130 
131 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
132 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
133 		return -1;
134 	}
135 
136 	BN_CTX_start(ctx);
137 
138 	if ((v = BN_CTX_get(ctx)) == NULL)
139 		goto err;
140 
141 	rr = r;
142 	if (r == a || r == p)
143 		rr = BN_CTX_get(ctx);
144 	if (rr == NULL)
145 		goto err;
146 
147 	if (!BN_one(rr))
148 		goto err;
149 	if (BN_is_odd(p)) {
150 		if (!bn_copy(rr, a))
151 			goto err;
152 	}
153 
154 	if (!bn_copy(v, a))
155 		goto err;
156 
157 	for (i = 1; i < BN_num_bits(p); i++) {
158 		if (!BN_sqr(v, v, ctx))
159 			goto err;
160 		if (!BN_is_bit_set(p, i))
161 			continue;
162 		if (!BN_mul(rr, rr, v, ctx))
163 			goto err;
164 	}
165 
166 	if (!bn_copy(r, rr))
167 		goto err;
168 
169 	ret = 1;
170 
171  err:
172 	BN_CTX_end(ctx);
173 
174 	return ret;
175 }
176 LCRYPTO_ALIAS(BN_exp);
177 
178 /* The old fallback, simple version :-) */
179 int
BN_mod_exp_simple(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)180 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
181     BN_CTX *ctx)
182 {
183 	int i, j, bits, wstart, wend, window, wvalue;
184 	int start = 1;
185 	BIGNUM *d, *q;
186 	/* Table of variables obtained from 'ctx' */
187 	BIGNUM *val[TABLE_SIZE];
188 	int ret = 0;
189 
190 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
191 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
192 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
193 		return -1;
194 	}
195 
196 	if (r == m) {
197 		BNerror(BN_R_INVALID_ARGUMENT);
198 		return 0;
199 	}
200 
201 	bits = BN_num_bits(p);
202 	if (bits == 0) {
203 		/* x**0 mod 1 is still zero. */
204 		if (BN_abs_is_word(m, 1)) {
205 			ret = 1;
206 			BN_zero(r);
207 		} else
208 			ret = BN_one(r);
209 		return ret;
210 	}
211 
212 	BN_CTX_start(ctx);
213 	if ((d = BN_CTX_get(ctx)) == NULL)
214 		goto err;
215 	if ((q = BN_CTX_get(ctx)) == NULL)
216 		goto err;
217 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
218 		goto err;
219 
220 	if (!BN_nnmod(val[0], a, m, ctx))
221 		goto err;
222 	if (BN_is_zero(val[0])) {
223 		BN_zero(r);
224 		goto done;
225 	}
226 	if (!bn_copy(q, p))
227 		goto err;
228 
229 	window = BN_window_bits_for_exponent_size(bits);
230 	if (window > 1) {
231 		if (!BN_mod_mul(d, val[0], val[0], m, ctx))
232 			goto err;
233 		j = 1 << (window - 1);
234 		for (i = 1; i < j; i++) {
235 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
236 			    !BN_mod_mul(val[i], val[i - 1], d,m, ctx))
237 				goto err;
238 		}
239 	}
240 
241 	start = 1;		/* This is used to avoid multiplication etc
242 				 * when there is only the value '1' in the
243 				 * buffer. */
244 	wvalue = 0;		/* The 'value' of the window */
245 	wstart = bits - 1;	/* The top bit of the window */
246 	wend = 0;		/* The bottom bit of the window */
247 
248 	if (!BN_one(r))
249 		goto err;
250 
251 	for (;;) {
252 		if (BN_is_bit_set(q, wstart) == 0) {
253 			if (!start)
254 				if (!BN_mod_mul(r, r, r, m, ctx))
255 					goto err;
256 			if (wstart == 0)
257 				break;
258 			wstart--;
259 			continue;
260 		}
261 		/* We now have wstart on a 'set' bit, we now need to work out
262 		 * how bit a window to do.  To do this we need to scan
263 		 * forward until the last set bit before the end of the
264 		 * window */
265 		j = wstart;
266 		wvalue = 1;
267 		wend = 0;
268 		for (i = 1; i < window; i++) {
269 			if (wstart - i < 0)
270 				break;
271 			if (BN_is_bit_set(q, wstart - i)) {
272 				wvalue <<= (i - wend);
273 				wvalue |= 1;
274 				wend = i;
275 			}
276 		}
277 
278 		/* wend is the size of the current window */
279 		j = wend + 1;
280 		/* add the 'bytes above' */
281 		if (!start)
282 			for (i = 0; i < j; i++) {
283 				if (!BN_mod_mul(r, r, r, m, ctx))
284 					goto err;
285 			}
286 
287 		/* wvalue will be an odd number < 2^window */
288 		if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
289 			goto err;
290 
291 		/* move the 'window' down further */
292 		wstart -= wend + 1;
293 		wvalue = 0;
294 		start = 0;
295 		if (wstart < 0)
296 			break;
297 	}
298 
299  done:
300 	ret = 1;
301 
302  err:
303 	BN_CTX_end(ctx);
304 
305 	return ret;
306 }
307 
308 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
309  * so that accessing any of these table values shows the same access pattern as far
310  * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
311  * from/to that table. */
312 
313 static int
MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM * b,int top,unsigned char * buf,int idx,int window)314 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf,
315     int idx, int window)
316 {
317 	int i, j;
318 	int width = 1 << window;
319 	BN_ULONG *table = (BN_ULONG *)buf;
320 
321 	if (top > b->top)
322 		top = b->top; /* this works because 'buf' is explicitly zeroed */
323 
324 	for (i = 0, j = idx; i < top; i++, j += width) {
325 		table[j] = b->d[i];
326 	}
327 
328 	return 1;
329 }
330 
331 static int
MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM * b,int top,unsigned char * buf,int idx,int window)332 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx,
333     int window)
334 {
335 	int i, j;
336 	int width = 1 << window;
337 	volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
338 
339 	if (!bn_wexpand(b, top))
340 		return 0;
341 
342 	if (window <= 3) {
343 		for (i = 0; i < top; i++, table += width) {
344 		    BN_ULONG acc = 0;
345 
346 		    for (j = 0; j < width; j++) {
347 			acc |= table[j] &
348 			       ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
349 		    }
350 
351 		    b->d[i] = acc;
352 		}
353 	} else {
354 		int xstride = 1 << (window - 2);
355 		BN_ULONG y0, y1, y2, y3;
356 
357 		i = idx >> (window - 2);        /* equivalent of idx / xstride */
358 		idx &= xstride - 1;             /* equivalent of idx % xstride */
359 
360 		y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
361 		y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
362 		y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
363 		y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
364 
365 		for (i = 0; i < top; i++, table += width) {
366 		    BN_ULONG acc = 0;
367 
368 		    for (j = 0; j < xstride; j++) {
369 			acc |= ( (table[j + 0 * xstride] & y0) |
370 				 (table[j + 1 * xstride] & y1) |
371 				 (table[j + 2 * xstride] & y2) |
372 				 (table[j + 3 * xstride] & y3) )
373 			       & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
374 		    }
375 
376 		    b->d[i] = acc;
377 		}
378 	}
379 	b->top = top;
380 	bn_correct_top(b);
381 	return 1;
382 }
383 
384 /* Given a pointer value, compute the next address that is a cache line multiple. */
385 #define MOD_EXP_CTIME_ALIGN(x_) \
386 	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
387 
388 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
389  * precomputation memory layout to limit data-dependency to a minimum
390  * to protect secret exponents (cf. the hyper-threading timing attacks
391  * pointed out by Colin Percival,
392  * http://www.daemonology.net/hyperthreading-considered-harmful/)
393  */
394 int
BN_mod_exp_mont_consttime(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)395 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
396     const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
397 {
398 	int i, bits, ret = 0, window, wvalue;
399 	int top;
400 	BN_MONT_CTX *mont = NULL;
401 	int numPowers;
402 	unsigned char *powerbufFree = NULL;
403 	int powerbufLen = 0;
404 	unsigned char *powerbuf = NULL;
405 	BIGNUM tmp, am;
406 
407 
408 	if (!BN_is_odd(m)) {
409 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
410 		return (0);
411 	}
412 
413 	top = m->top;
414 
415 	bits = BN_num_bits(p);
416 	if (bits == 0) {
417 		/* x**0 mod 1 is still zero. */
418 		if (BN_abs_is_word(m, 1)) {
419 			ret = 1;
420 			BN_zero(rr);
421 		} else
422 			ret = BN_one(rr);
423 		return ret;
424 	}
425 
426 	BN_CTX_start(ctx);
427 
428 	/*
429 	 * Allocate a Montgomery context if it was not supplied by the caller.
430 	 * If this is not done, things will break in the montgomery part.
431 	 */
432 	if (in_mont != NULL)
433 		mont = in_mont;
434 	else {
435 		if ((mont = BN_MONT_CTX_new()) == NULL)
436 			goto err;
437 		if (!BN_MONT_CTX_set(mont, m, ctx))
438 			goto err;
439 	}
440 
441 	/* Get the window size to use with size of p. */
442 	window = BN_window_bits_for_ctime_exponent_size(bits);
443 #if defined(OPENSSL_BN_ASM_MONT5)
444 	if (window == 6 && bits <= 1024)
445 		window = 5;	/* ~5% improvement of 2048-bit RSA sign */
446 #endif
447 
448 	/* Allocate a buffer large enough to hold all of the pre-computed
449 	 * powers of am, am itself and tmp.
450 	 */
451 	numPowers = 1 << window;
452 	powerbufLen = sizeof(m->d[0]) * (top * numPowers +
453 	    ((2*top) > numPowers ? (2*top) : numPowers));
454 	if ((powerbufFree = calloc(powerbufLen +
455 	    MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL)
456 		goto err;
457 	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
458 
459 	/* lay down tmp and am right after powers table */
460 	tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
461 	am.d = tmp.d + top;
462 	tmp.top = am.top = 0;
463 	tmp.dmax = am.dmax = top;
464 	tmp.neg = am.neg = 0;
465 	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
466 
467 	/* prepare a^0 in Montgomery domain */
468 #if 1
469 	if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
470 		goto err;
471 #else
472 	tmp.d[0] = (0 - m - >d[0]) & BN_MASK2;	/* 2^(top*BN_BITS2) - m */
473 	for (i = 1; i < top; i++)
474 		tmp.d[i] = (~m->d[i]) & BN_MASK2;
475 	tmp.top = top;
476 #endif
477 
478 	/* prepare a^1 in Montgomery domain */
479 	if (!BN_nnmod(&am, a, m, ctx))
480 		goto err;
481 	if (!BN_to_montgomery(&am, &am, mont, ctx))
482 		goto err;
483 
484 #if defined(OPENSSL_BN_ASM_MONT5)
485 	/* This optimization uses ideas from http://eprint.iacr.org/2011/239,
486 	 * specifically optimization of cache-timing attack countermeasures
487 	 * and pre-computation optimization. */
488 
489 	/* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
490 	 * 512-bit RSA is hardly relevant, we omit it to spare size... */
491 	if (window == 5 && top > 1) {
492 		void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
493 		    const void *table, const BN_ULONG *np,
494 		    const BN_ULONG *n0, int num, int power);
495 		void bn_scatter5(const BN_ULONG *inp, size_t num,
496 		    void *table, size_t power);
497 		void bn_gather5(BN_ULONG *out, size_t num,
498 		    void *table, size_t power);
499 
500 		BN_ULONG *np = mont->N.d, *n0 = mont->n0;
501 
502 		/* BN_to_montgomery can contaminate words above .top
503 		 * [in BN_DEBUG[_DEBUG] build]... */
504 		for (i = am.top; i < top; i++)
505 			am.d[i] = 0;
506 		for (i = tmp.top; i < top; i++)
507 			tmp.d[i] = 0;
508 
509 		bn_scatter5(tmp.d, top, powerbuf, 0);
510 		bn_scatter5(am.d, am.top, powerbuf, 1);
511 		bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
512 		bn_scatter5(tmp.d, top, powerbuf, 2);
513 
514 #if 0
515 		for (i = 3; i < 32; i++) {
516 			/* Calculate a^i = a^(i-1) * a */
517 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
518 			    n0, top, i - 1);
519 			bn_scatter5(tmp.d, top, powerbuf, i);
520 		}
521 #else
522 		/* same as above, but uses squaring for 1/2 of operations */
523 		for (i = 4; i < 32; i*=2) {
524 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
525 			bn_scatter5(tmp.d, top, powerbuf, i);
526 		}
527 		for (i = 3; i < 8; i += 2) {
528 			int j;
529 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
530 			    n0, top, i - 1);
531 			bn_scatter5(tmp.d, top, powerbuf, i);
532 			for (j = 2 * i; j < 32; j *= 2) {
533 				bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
534 				bn_scatter5(tmp.d, top, powerbuf, j);
535 			}
536 		}
537 		for (; i < 16; i += 2) {
538 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
539 			    n0, top, i - 1);
540 			bn_scatter5(tmp.d, top, powerbuf, i);
541 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
542 			bn_scatter5(tmp.d, top, powerbuf, 2*i);
543 		}
544 		for (; i < 32; i += 2) {
545 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
546 			    n0, top, i - 1);
547 			bn_scatter5(tmp.d, top, powerbuf, i);
548 		}
549 #endif
550 		bits--;
551 		for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
552 			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
553 		bn_gather5(tmp.d, top, powerbuf, wvalue);
554 
555 		/* Scan the exponent one window at a time starting from the most
556 		 * significant bits.
557 		 */
558 		while (bits >= 0) {
559 			for (wvalue = 0, i = 0; i < 5; i++, bits--)
560 				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
561 
562 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
563 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
564 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
565 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
566 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
567 			bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
568 		}
569 
570 		tmp.top = top;
571 		bn_correct_top(&tmp);
572 	} else
573 #endif
574 	{
575 		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0,
576 		    window))
577 			goto err;
578 		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1,
579 		    window))
580 			goto err;
581 
582 		/* If the window size is greater than 1, then calculate
583 		 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
584 		 * (even powers could instead be computed as (a^(i/2))^2
585 		 * to use the slight performance advantage of sqr over mul).
586 		 */
587 		if (window > 1) {
588 			if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
589 				goto err;
590 			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf,
591 			    2, window))
592 				goto err;
593 			for (i = 3; i < numPowers; i++) {
594 				/* Calculate a^i = a^(i-1) * a */
595 				if (!BN_mod_mul_montgomery(&tmp, &am, &tmp,
596 				    mont, ctx))
597 					goto err;
598 				if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top,
599 				    powerbuf, i, window))
600 					goto err;
601 			}
602 		}
603 
604 		bits--;
605 		for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
606 			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
607 		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf,
608 		    wvalue, window))
609 			goto err;
610 
611 		/* Scan the exponent one window at a time starting from the most
612 		 * significant bits.
613 		 */
614 		while (bits >= 0) {
615 			wvalue = 0; /* The 'value' of the window */
616 
617 			/* Scan the window, squaring the result as we go */
618 			for (i = 0; i < window; i++, bits--) {
619 				if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp,
620 				    mont, ctx))
621 					goto err;
622 				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
623 			}
624 
625 			/* Fetch the appropriate pre-computed value from the pre-buf */
626 			if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf,
627 			    wvalue, window))
628 				goto err;
629 
630 			/* Multiply the result into the intermediate result */
631 			if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
632 				goto err;
633 		}
634 	}
635 
636 	/* Convert the final result from montgomery to standard format */
637 	if (!BN_from_montgomery(rr, &tmp, mont, ctx))
638 		goto err;
639 	ret = 1;
640 
641 err:
642 	if ((in_mont == NULL) && (mont != NULL))
643 		BN_MONT_CTX_free(mont);
644 	freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
645 	BN_CTX_end(ctx);
646 	return (ret);
647 }
648 LCRYPTO_ALIAS(BN_mod_exp_mont_consttime);
649 
650 static int
BN_mod_exp_mont_internal(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont,int ct)651 BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
652     BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct)
653 {
654 	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
655 	int start = 1;
656 	BIGNUM *d, *r;
657 	const BIGNUM *aa;
658 	/* Table of variables obtained from 'ctx' */
659 	BIGNUM *val[TABLE_SIZE];
660 	BN_MONT_CTX *mont = NULL;
661 
662 	if (ct) {
663 		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
664 	}
665 
666 
667 	if (!BN_is_odd(m)) {
668 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
669 		return (0);
670 	}
671 
672 	bits = BN_num_bits(p);
673 	if (bits == 0) {
674 		/* x**0 mod 1 is still zero. */
675 		if (BN_abs_is_word(m, 1)) {
676 			ret = 1;
677 			BN_zero(rr);
678 		} else
679 			ret = BN_one(rr);
680 		return ret;
681 	}
682 
683 	BN_CTX_start(ctx);
684 	if ((d = BN_CTX_get(ctx)) == NULL)
685 		goto err;
686 	if ((r = BN_CTX_get(ctx)) == NULL)
687 		goto err;
688 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
689 		goto err;
690 
691 	/* If this is not done, things will break in the montgomery
692 	 * part */
693 
694 	if (in_mont != NULL)
695 		mont = in_mont;
696 	else {
697 		if ((mont = BN_MONT_CTX_new()) == NULL)
698 			goto err;
699 		if (!BN_MONT_CTX_set(mont, m, ctx))
700 			goto err;
701 	}
702 
703 	if (!BN_nnmod(val[0], a,m, ctx))
704 		goto err;
705 	aa = val[0];
706 	if (BN_is_zero(aa)) {
707 		BN_zero(rr);
708 		ret = 1;
709 		goto err;
710 	}
711 	if (!BN_to_montgomery(val[0], aa, mont, ctx))
712 		goto err;
713 
714 	window = BN_window_bits_for_exponent_size(bits);
715 	if (window > 1) {
716 		if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
717 			goto err;
718 		j = 1 << (window - 1);
719 		for (i = 1; i < j; i++) {
720 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
721 			    !BN_mod_mul_montgomery(val[i], val[i - 1],
722 			    d, mont, ctx))
723 				goto err;
724 		}
725 	}
726 
727 	start = 1;		/* This is used to avoid multiplication etc
728 				 * when there is only the value '1' in the
729 				 * buffer. */
730 	wvalue = 0;		/* The 'value' of the window */
731 	wstart = bits - 1;	/* The top bit of the window */
732 	wend = 0;		/* The bottom bit of the window */
733 
734 	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
735 		goto err;
736 	for (;;) {
737 		if (BN_is_bit_set(p, wstart) == 0) {
738 			if (!start) {
739 				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
740 					goto err;
741 			}
742 			if (wstart == 0)
743 				break;
744 			wstart--;
745 			continue;
746 		}
747 		/* We now have wstart on a 'set' bit, we now need to work out
748 		 * how bit a window to do.  To do this we need to scan
749 		 * forward until the last set bit before the end of the
750 		 * window */
751 		j = wstart;
752 		wvalue = 1;
753 		wend = 0;
754 		for (i = 1; i < window; i++) {
755 			if (wstart - i < 0)
756 				break;
757 			if (BN_is_bit_set(p, wstart - i)) {
758 				wvalue <<= (i - wend);
759 				wvalue |= 1;
760 				wend = i;
761 			}
762 		}
763 
764 		/* wend is the size of the current window */
765 		j = wend + 1;
766 		/* add the 'bytes above' */
767 		if (!start)
768 			for (i = 0; i < j; i++) {
769 				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
770 					goto err;
771 			}
772 
773 		/* wvalue will be an odd number < 2^window */
774 		if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
775 			goto err;
776 
777 		/* move the 'window' down further */
778 		wstart -= wend + 1;
779 		wvalue = 0;
780 		start = 0;
781 		if (wstart < 0)
782 			break;
783 	}
784 	if (!BN_from_montgomery(rr, r,mont, ctx))
785 		goto err;
786 	ret = 1;
787 
788 err:
789 	if ((in_mont == NULL) && (mont != NULL))
790 		BN_MONT_CTX_free(mont);
791 	BN_CTX_end(ctx);
792 	return (ret);
793 }
794 
795 int
BN_mod_exp_mont(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)796 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
797     BN_CTX *ctx, BN_MONT_CTX *in_mont)
798 {
799 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont,
800 	    (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
801 }
802 LCRYPTO_ALIAS(BN_mod_exp_mont);
803 
804 int
BN_mod_exp_mont_ct(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)805 BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
806     BN_CTX *ctx, BN_MONT_CTX *in_mont)
807 {
808 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1);
809 }
810 
811 int
BN_mod_exp_mont_nonct(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)812 BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
813     BN_CTX *ctx, BN_MONT_CTX *in_mont)
814 {
815 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0);
816 }
817 
818 int
BN_mod_exp_mont_word(BIGNUM * rr,BN_ULONG a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)819 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m,
820     BN_CTX *ctx, BN_MONT_CTX *in_mont)
821 {
822 	BN_MONT_CTX *mont = NULL;
823 	int b, bits, ret = 0;
824 	int r_is_one;
825 	BN_ULONG w, next_w;
826 	BIGNUM *d, *r, *t;
827 	BIGNUM *swap_tmp;
828 
829 #define BN_MOD_MUL_WORD(r, w, m) \
830 		(BN_mul_word(r, (w)) && \
831 		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
832 			(BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
833 		/* BN_MOD_MUL_WORD is only used with 'w' large,
834 		 * so the BN_ucmp test is probably more overhead
835 		 * than always using BN_mod (which uses bn_copy if
836 		 * a similar test returns true). */
837 		/* We can use BN_mod and do not need BN_nnmod because our
838 		 * accumulator is never negative (the result of BN_mod does
839 		 * not depend on the sign of the modulus).
840 		 */
841 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
842 		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
843 
844 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
845 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
846 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
847 		return -1;
848 	}
849 
850 
851 	if (!BN_is_odd(m)) {
852 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
853 		return (0);
854 	}
855 	if (m->top == 1)
856 		a %= m->d[0]; /* make sure that 'a' is reduced */
857 
858 	bits = BN_num_bits(p);
859 	if (bits == 0) {
860 		/* x**0 mod 1 is still zero. */
861 		if (BN_abs_is_word(m, 1)) {
862 			ret = 1;
863 			BN_zero(rr);
864 		} else
865 			ret = BN_one(rr);
866 		return ret;
867 	}
868 	if (a == 0) {
869 		BN_zero(rr);
870 		ret = 1;
871 		return ret;
872 	}
873 
874 	BN_CTX_start(ctx);
875 	if ((d = BN_CTX_get(ctx)) == NULL)
876 		goto err;
877 	if ((r = BN_CTX_get(ctx)) == NULL)
878 		goto err;
879 	if ((t = BN_CTX_get(ctx)) == NULL)
880 		goto err;
881 
882 	if (in_mont != NULL)
883 		mont = in_mont;
884 	else {
885 		if ((mont = BN_MONT_CTX_new()) == NULL)
886 			goto err;
887 		if (!BN_MONT_CTX_set(mont, m, ctx))
888 			goto err;
889 	}
890 
891 	r_is_one = 1; /* except for Montgomery factor */
892 
893 	/* bits-1 >= 0 */
894 
895 	/* The result is accumulated in the product r*w. */
896 	w = a; /* bit 'bits-1' of 'p' is always set */
897 	for (b = bits - 2; b >= 0; b--) {
898 		/* First, square r*w. */
899 		next_w = w * w;
900 		if ((next_w / w) != w) /* overflow */
901 		{
902 			if (r_is_one) {
903 				if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
904 					goto err;
905 				r_is_one = 0;
906 			} else {
907 				if (!BN_MOD_MUL_WORD(r, w, m))
908 					goto err;
909 			}
910 			next_w = 1;
911 		}
912 		w = next_w;
913 		if (!r_is_one) {
914 			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
915 				goto err;
916 		}
917 
918 		/* Second, multiply r*w by 'a' if exponent bit is set. */
919 		if (BN_is_bit_set(p, b)) {
920 			next_w = w * a;
921 			if ((next_w / a) != w) /* overflow */
922 			{
923 				if (r_is_one) {
924 					if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
925 						goto err;
926 					r_is_one = 0;
927 				} else {
928 					if (!BN_MOD_MUL_WORD(r, w, m))
929 						goto err;
930 				}
931 				next_w = a;
932 			}
933 			w = next_w;
934 		}
935 	}
936 
937 	/* Finally, set r:=r*w. */
938 	if (w != 1) {
939 		if (r_is_one) {
940 			if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
941 				goto err;
942 			r_is_one = 0;
943 		} else {
944 			if (!BN_MOD_MUL_WORD(r, w, m))
945 				goto err;
946 		}
947 	}
948 
949 	if (r_is_one) /* can happen only if a == 1*/
950 	{
951 		if (!BN_one(rr))
952 			goto err;
953 	} else {
954 		if (!BN_from_montgomery(rr, r, mont, ctx))
955 			goto err;
956 	}
957 	ret = 1;
958 
959 err:
960 	if ((in_mont == NULL) && (mont != NULL))
961 		BN_MONT_CTX_free(mont);
962 	BN_CTX_end(ctx);
963 	return (ret);
964 }
965 
966 int
BN_mod_exp_recp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)967 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
968     BN_CTX *ctx)
969 {
970 	int i, j, bits, wstart, wend, window, wvalue;
971 	int start = 1;
972 	BIGNUM *aa, *q;
973 	/* Table of variables obtained from 'ctx' */
974 	BIGNUM *val[TABLE_SIZE];
975 	BN_RECP_CTX *recp = NULL;
976 	int ret = 0;
977 
978 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
979 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
980 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
981 		return -1;
982 	}
983 
984 	bits = BN_num_bits(p);
985 	if (bits == 0) {
986 		/* x**0 mod 1 is still zero. */
987 		if (BN_abs_is_word(m, 1)) {
988 			ret = 1;
989 			BN_zero(r);
990 		} else
991 			ret = BN_one(r);
992 		return ret;
993 	}
994 
995 	BN_CTX_start(ctx);
996 	if ((aa = BN_CTX_get(ctx)) == NULL)
997 		goto err;
998 	if ((q = BN_CTX_get(ctx)) == NULL)
999 		goto err;
1000 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
1001 		goto err;
1002 
1003 	if ((recp = BN_RECP_CTX_create(m)) == NULL)
1004 		goto err;
1005 
1006 	if (!BN_nnmod(val[0], a, m, ctx))
1007 		goto err;
1008 	if (BN_is_zero(val[0])) {
1009 		BN_zero(r);
1010 		goto done;
1011 	}
1012 	if (!bn_copy(q, p))
1013 		goto err;
1014 
1015 	window = BN_window_bits_for_exponent_size(bits);
1016 	if (window > 1) {
1017 		if (!BN_mod_sqr_reciprocal(aa, val[0], recp, ctx))
1018 			goto err;
1019 		j = 1 << (window - 1);
1020 		for (i = 1; i < j; i++) {
1021 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1022 			    !BN_mod_mul_reciprocal(val[i], val[i - 1],
1023 			    aa, recp, ctx))
1024 				goto err;
1025 		}
1026 	}
1027 
1028 	start = 1;		/* This is used to avoid multiplication etc
1029 				 * when there is only the value '1' in the
1030 				 * buffer. */
1031 	wvalue = 0;		/* The 'value' of the window */
1032 	wstart = bits - 1;	/* The top bit of the window */
1033 	wend = 0;		/* The bottom bit of the window */
1034 
1035 	if (!BN_one(r))
1036 		goto err;
1037 
1038 	for (;;) {
1039 		if (BN_is_bit_set(q, wstart) == 0) {
1040 			if (!start)
1041 				if (!BN_mod_sqr_reciprocal(r, r, recp, ctx))
1042 					goto err;
1043 			if (wstart == 0)
1044 				break;
1045 			wstart--;
1046 			continue;
1047 		}
1048 		/* We now have wstart on a 'set' bit, we now need to work out
1049 		 * how bit a window to do.  To do this we need to scan
1050 		 * forward until the last set bit before the end of the
1051 		 * window */
1052 		j = wstart;
1053 		wvalue = 1;
1054 		wend = 0;
1055 		for (i = 1; i < window; i++) {
1056 			if (wstart - i < 0)
1057 				break;
1058 			if (BN_is_bit_set(q, wstart - i)) {
1059 				wvalue <<= (i - wend);
1060 				wvalue |= 1;
1061 				wend = i;
1062 			}
1063 		}
1064 
1065 		/* wend is the size of the current window */
1066 		j = wend + 1;
1067 		/* add the 'bytes above' */
1068 		if (!start)
1069 			for (i = 0; i < j; i++) {
1070 				if (!BN_mod_sqr_reciprocal(r, r, recp, ctx))
1071 					goto err;
1072 			}
1073 
1074 		/* wvalue will be an odd number < 2^window */
1075 		if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], recp, ctx))
1076 			goto err;
1077 
1078 		/* move the 'window' down further */
1079 		wstart -= wend + 1;
1080 		wvalue = 0;
1081 		start = 0;
1082 		if (wstart < 0)
1083 			break;
1084 	}
1085 
1086  done:
1087 	ret = 1;
1088 
1089  err:
1090 	BN_CTX_end(ctx);
1091 	BN_RECP_CTX_free(recp);
1092 
1093 	return ret;
1094 }
1095 
1096 static int
BN_mod_exp_internal(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,int ct)1097 BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1098     BN_CTX *ctx, int ct)
1099 {
1100 	int ret;
1101 
1102 
1103 	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
1104 	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
1105 	 * exponentiation for the odd part), using appropriate exponent
1106 	 * reductions, and combine the results using the CRT.
1107 	 *
1108 	 * For now, we use Montgomery only if the modulus is odd; otherwise,
1109 	 * exponentiation using the reciprocal-based quick remaindering
1110 	 * algorithm is used.
1111 	 *
1112 	 * (Timing obtained with expspeed.c [computations  a^p mod m
1113 	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
1114 	 * 4096, 8192 bits], compared to the running time of the
1115 	 * standard algorithm:
1116 	 *
1117 	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
1118 	 *                     55 .. 77 %  [UltraSparc processor, but
1119 	 *                                  debug-solaris-sparcv8-gcc conf.]
1120 	 *
1121 	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
1122 	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
1123 	 *
1124 	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
1125 	 * at 2048 and more bits, but at 512 and 1024 bits, it was
1126 	 * slower even than the standard algorithm!
1127 	 *
1128 	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
1129 	 * should be obtained when the new Montgomery reduction code
1130 	 * has been integrated into OpenSSL.)
1131 	 */
1132 
1133 	if (BN_is_odd(m)) {
1134 		if (a->top == 1 && !a->neg && !ct) {
1135 			BN_ULONG A = a->d[0];
1136 			ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
1137 		} else
1138 			ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL);
1139 	} else	{
1140 		ret = BN_mod_exp_recp(r, a,p, m, ctx);
1141 	}
1142 
1143 	return (ret);
1144 }
1145 
1146 int
BN_mod_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)1147 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1148     BN_CTX *ctx)
1149 {
1150 	return BN_mod_exp_internal(r, a, p, m, ctx,
1151 	    (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
1152 }
1153 LCRYPTO_ALIAS(BN_mod_exp);
1154 
1155 int
BN_mod_exp_ct(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)1156 BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1157     BN_CTX *ctx)
1158 {
1159 	return BN_mod_exp_internal(r, a, p, m, ctx, 1);
1160 }
1161 
1162 int
BN_mod_exp_nonct(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)1163 BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1164     BN_CTX *ctx)
1165 {
1166 	return BN_mod_exp_internal(r, a, p, m, ctx, 0);
1167 }
1168 
1169 int
BN_mod_exp2_mont(BIGNUM * rr,const BIGNUM * a1,const BIGNUM * p1,const BIGNUM * a2,const BIGNUM * p2,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)1170 BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
1171     const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, BN_CTX *ctx,
1172     BN_MONT_CTX *in_mont)
1173 {
1174 	int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, wvalue1, wvalue2;
1175 	int r_is_one = 1;
1176 	BIGNUM *d, *r;
1177 	const BIGNUM *a_mod_m;
1178 	/* Tables of variables obtained from 'ctx' */
1179 	BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
1180 	BN_MONT_CTX *mont = NULL;
1181 
1182 
1183 	if (!BN_is_odd(m)) {
1184 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
1185 		return (0);
1186 	}
1187 	bits1 = BN_num_bits(p1);
1188 	bits2 = BN_num_bits(p2);
1189 	if ((bits1 == 0) && (bits2 == 0)) {
1190 		ret = BN_one(rr);
1191 		return ret;
1192 	}
1193 
1194 	bits = (bits1 > bits2) ? bits1 : bits2;
1195 
1196 	BN_CTX_start(ctx);
1197 	if ((d = BN_CTX_get(ctx)) == NULL)
1198 		goto err;
1199 	if ((r = BN_CTX_get(ctx)) == NULL)
1200 		goto err;
1201 	if ((val1[0] = BN_CTX_get(ctx)) == NULL)
1202 		goto err;
1203 	if ((val2[0] = BN_CTX_get(ctx)) == NULL)
1204 		goto err;
1205 
1206 	if (in_mont != NULL)
1207 		mont = in_mont;
1208 	else {
1209 		if ((mont = BN_MONT_CTX_new()) == NULL)
1210 			goto err;
1211 		if (!BN_MONT_CTX_set(mont, m, ctx))
1212 			goto err;
1213 	}
1214 
1215 	window1 = BN_window_bits_for_exponent_size(bits1);
1216 	window2 = BN_window_bits_for_exponent_size(bits2);
1217 
1218 	/*
1219 	 * Build table for a1:   val1[i] := a1^(2*i + 1) mod m  for i = 0 .. 2^(window1-1)
1220 	 */
1221 	if (!BN_nnmod(val1[0], a1, m, ctx))
1222 		goto err;
1223 	a_mod_m = val1[0];
1224 	if (BN_is_zero(a_mod_m)) {
1225 		BN_zero(rr);
1226 		ret = 1;
1227 		goto err;
1228 	}
1229 
1230 	if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx))
1231 		goto err;
1232 	if (window1 > 1) {
1233 		if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx))
1234 			goto err;
1235 
1236 		j = 1 << (window1 - 1);
1237 		for (i = 1; i < j; i++) {
1238 			if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
1239 			    !BN_mod_mul_montgomery(val1[i], val1[i - 1],
1240 			    d, mont, ctx))
1241 				goto err;
1242 		}
1243 	}
1244 
1245 
1246 	/*
1247 	 * Build table for a2:   val2[i] := a2^(2*i + 1) mod m  for i = 0 .. 2^(window2-1)
1248 	 */
1249 	if (!BN_nnmod(val2[0], a2, m, ctx))
1250 		goto err;
1251 	a_mod_m = val2[0];
1252 	if (BN_is_zero(a_mod_m)) {
1253 		BN_zero(rr);
1254 		ret = 1;
1255 		goto err;
1256 	}
1257 	if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx))
1258 		goto err;
1259 	if (window2 > 1) {
1260 		if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx))
1261 			goto err;
1262 
1263 		j = 1 << (window2 - 1);
1264 		for (i = 1; i < j; i++) {
1265 			if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
1266 			    !BN_mod_mul_montgomery(val2[i], val2[i - 1],
1267 			    d, mont, ctx))
1268 				goto err;
1269 		}
1270 	}
1271 
1272 
1273 	/* Now compute the power product, using independent windows. */
1274 	r_is_one = 1;
1275 	wvalue1 = 0;  /* The 'value' of the first window */
1276 	wvalue2 = 0;  /* The 'value' of the second window */
1277 	wpos1 = 0;    /* If wvalue1 > 0, the bottom bit of the first window */
1278 	wpos2 = 0;    /* If wvalue2 > 0, the bottom bit of the second window */
1279 
1280 	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
1281 		goto err;
1282 	for (b = bits - 1; b >= 0; b--) {
1283 		if (!r_is_one) {
1284 			if (!BN_mod_mul_montgomery(r, r,r, mont, ctx))
1285 				goto err;
1286 		}
1287 
1288 		if (!wvalue1)
1289 			if (BN_is_bit_set(p1, b)) {
1290 			/* consider bits b-window1+1 .. b for this window */
1291 			i = b - window1 + 1;
1292 			while (!BN_is_bit_set(p1, i)) /* works for i<0 */
1293 				i++;
1294 			wpos1 = i;
1295 			wvalue1 = 1;
1296 			for (i = b - 1; i >= wpos1; i--) {
1297 				wvalue1 <<= 1;
1298 				if (BN_is_bit_set(p1, i))
1299 					wvalue1++;
1300 			}
1301 		}
1302 
1303 		if (!wvalue2)
1304 			if (BN_is_bit_set(p2, b)) {
1305 			/* consider bits b-window2+1 .. b for this window */
1306 			i = b - window2 + 1;
1307 			while (!BN_is_bit_set(p2, i))
1308 				i++;
1309 			wpos2 = i;
1310 			wvalue2 = 1;
1311 			for (i = b - 1; i >= wpos2; i--) {
1312 				wvalue2 <<= 1;
1313 				if (BN_is_bit_set(p2, i))
1314 					wvalue2++;
1315 			}
1316 		}
1317 
1318 		if (wvalue1 && b == wpos1) {
1319 			/* wvalue1 is odd and < 2^window1 */
1320 			if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1],
1321 			    mont, ctx))
1322 				goto err;
1323 			wvalue1 = 0;
1324 			r_is_one = 0;
1325 		}
1326 
1327 		if (wvalue2 && b == wpos2) {
1328 			/* wvalue2 is odd and < 2^window2 */
1329 			if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1],
1330 			    mont, ctx))
1331 				goto err;
1332 			wvalue2 = 0;
1333 			r_is_one = 0;
1334 		}
1335 	}
1336 	if (!BN_from_montgomery(rr, r,mont, ctx))
1337 		goto err;
1338 	ret = 1;
1339 
1340 err:
1341 	if ((in_mont == NULL) && (mont != NULL))
1342 		BN_MONT_CTX_free(mont);
1343 	BN_CTX_end(ctx);
1344 	return (ret);
1345 }
1346