xref: /dragonfly/crypto/libressl/crypto/bn/bn_exp.c (revision 6f5ec8b5)
1 /* $OpenBSD: bn_exp.c,v 1.32 2022/04/20 13:32:34 tb Exp $ */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 /* ====================================================================
59  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  *
65  * 1. Redistributions of source code must retain the above copyright
66  *    notice, this list of conditions and the following disclaimer.
67  *
68  * 2. Redistributions in binary form must reproduce the above copyright
69  *    notice, this list of conditions and the following disclaimer in
70  *    the documentation and/or other materials provided with the
71  *    distribution.
72  *
73  * 3. All advertising materials mentioning features or use of this
74  *    software must display the following acknowledgment:
75  *    "This product includes software developed by the OpenSSL Project
76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77  *
78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79  *    endorse or promote products derived from this software without
80  *    prior written permission. For written permission, please contact
81  *    openssl-core@openssl.org.
82  *
83  * 5. Products derived from this software may not be called "OpenSSL"
84  *    nor may "OpenSSL" appear in their names without prior written
85  *    permission of the OpenSSL Project.
86  *
87  * 6. Redistributions of any form whatsoever must retain the following
88  *    acknowledgment:
89  *    "This product includes software developed by the OpenSSL Project
90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103  * OF THE POSSIBILITY OF SUCH DAMAGE.
104  * ====================================================================
105  *
106  * This product includes cryptographic software written by Eric Young
107  * (eay@cryptsoft.com).  This product includes software written by Tim
108  * Hudson (tjh@cryptsoft.com).
109  *
110  */
111 
112 #include <stdlib.h>
113 #include <string.h>
114 
115 #include <openssl/err.h>
116 
117 #include "bn_lcl.h"
118 #include "constant_time_locl.h"
119 
120 /* maximum precomputation table size for *variable* sliding windows */
121 #define TABLE_SIZE	32
122 
123 /* this one works - simple but works */
124 int
125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
126 {
127 	int i, bits, ret = 0;
128 	BIGNUM *v, *rr;
129 
130 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
131 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
132 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
133 		return -1;
134 	}
135 
136 	BN_CTX_start(ctx);
137 	if ((r == a) || (r == p))
138 		rr = BN_CTX_get(ctx);
139 	else
140 		rr = r;
141 	v = BN_CTX_get(ctx);
142 	if (rr == NULL || v == NULL)
143 		goto err;
144 
145 	if (BN_copy(v, a) == NULL)
146 		goto err;
147 	bits = BN_num_bits(p);
148 
149 	if (BN_is_odd(p)) {
150 		if (BN_copy(rr, a) == NULL)
151 			goto err;
152 	} else {
153 		if (!BN_one(rr))
154 			goto err;
155 	}
156 
157 	for (i = 1; i < bits; i++) {
158 		if (!BN_sqr(v, v, ctx))
159 			goto err;
160 		if (BN_is_bit_set(p, i)) {
161 			if (!BN_mul(rr, rr, v, ctx))
162 				goto err;
163 		}
164 	}
165 	ret = 1;
166 
167 err:
168 	if (r != rr && rr != NULL)
169 		BN_copy(r, rr);
170 	BN_CTX_end(ctx);
171 	bn_check_top(r);
172 	return (ret);
173 }
174 
175 static int
176 BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
177     BN_CTX *ctx, int ct)
178 {
179 	int ret;
180 
181 	bn_check_top(a);
182 	bn_check_top(p);
183 	bn_check_top(m);
184 
185 	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
186 	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
187 	 * exponentiation for the odd part), using appropriate exponent
188 	 * reductions, and combine the results using the CRT.
189 	 *
190 	 * For now, we use Montgomery only if the modulus is odd; otherwise,
191 	 * exponentiation using the reciprocal-based quick remaindering
192 	 * algorithm is used.
193 	 *
194 	 * (Timing obtained with expspeed.c [computations  a^p mod m
195 	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
196 	 * 4096, 8192 bits], compared to the running time of the
197 	 * standard algorithm:
198 	 *
199 	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
200          *                     55 .. 77 %  [UltraSparc processor, but
201 	 *                                  debug-solaris-sparcv8-gcc conf.]
202 	 *
203 	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
204 	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
205 	 *
206 	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
207 	 * at 2048 and more bits, but at 512 and 1024 bits, it was
208 	 * slower even than the standard algorithm!
209 	 *
210 	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
211 	 * should be obtained when the new Montgomery reduction code
212 	 * has been integrated into OpenSSL.)
213 	 */
214 
215 	if (BN_is_odd(m)) {
216 		if (a->top == 1 && !a->neg && !ct) {
217 			BN_ULONG A = a->d[0];
218 			ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
219 		} else
220 			ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL);
221 	} else	{
222 		ret = BN_mod_exp_recp(r, a,p, m, ctx);
223 	}
224 
225 	bn_check_top(r);
226 	return (ret);
227 }
228 
229 int
230 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
231     BN_CTX *ctx)
232 {
233 	return BN_mod_exp_internal(r, a, p, m, ctx,
234 	    (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
235 }
236 
237 int
238 BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
239     BN_CTX *ctx)
240 {
241 	return BN_mod_exp_internal(r, a, p, m, ctx, 1);
242 }
243 
244 
245 int
246 BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
247     BN_CTX *ctx)
248 {
249 	return BN_mod_exp_internal(r, a, p, m, ctx, 0);
250 }
251 
252 
253 int
254 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
255     BN_CTX *ctx)
256 {
257 	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
258 	int start = 1;
259 	BIGNUM *aa;
260 	/* Table of variables obtained from 'ctx' */
261 	BIGNUM *val[TABLE_SIZE];
262 	BN_RECP_CTX recp;
263 
264 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
265 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
266 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
267 		return -1;
268 	}
269 
270 	bits = BN_num_bits(p);
271 	if (bits == 0) {
272 		/* x**0 mod 1 is still zero. */
273 		if (BN_is_one(m)) {
274 			ret = 1;
275 			BN_zero(r);
276 		} else
277 			ret = BN_one(r);
278 		return ret;
279 	}
280 
281 	BN_RECP_CTX_init(&recp);
282 
283 	BN_CTX_start(ctx);
284 	if ((aa = BN_CTX_get(ctx)) == NULL)
285 		goto err;
286 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
287 		goto err;
288 
289 	if (m->neg) {
290 		/* ignore sign of 'm' */
291 		if (!BN_copy(aa, m))
292 			goto err;
293 		aa->neg = 0;
294 		if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
295 			goto err;
296 	} else {
297 		if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
298 			goto err;
299 	}
300 
301 	if (!BN_nnmod(val[0], a, m, ctx))
302 		goto err;		/* 1 */
303 	if (BN_is_zero(val[0])) {
304 		BN_zero(r);
305 		ret = 1;
306 		goto err;
307 	}
308 
309 	window = BN_window_bits_for_exponent_size(bits);
310 	if (window > 1) {
311 		if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
312 			goto err;				/* 2 */
313 		j = 1 << (window - 1);
314 		for (i = 1; i < j; i++) {
315 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
316 			    !BN_mod_mul_reciprocal(val[i], val[i - 1],
317 			    aa, &recp, ctx))
318 				goto err;
319 		}
320 	}
321 
322 	start = 1;		/* This is used to avoid multiplication etc
323 				 * when there is only the value '1' in the
324 				 * buffer. */
325 	wvalue = 0;		/* The 'value' of the window */
326 	wstart = bits - 1;	/* The top bit of the window */
327 	wend = 0;		/* The bottom bit of the window */
328 
329 	if (!BN_one(r))
330 		goto err;
331 
332 	for (;;) {
333 		if (BN_is_bit_set(p, wstart) == 0) {
334 			if (!start)
335 				if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
336 					goto err;
337 			if (wstart == 0)
338 				break;
339 			wstart--;
340 			continue;
341 		}
342 		/* We now have wstart on a 'set' bit, we now need to work out
343 		 * how bit a window to do.  To do this we need to scan
344 		 * forward until the last set bit before the end of the
345 		 * window */
346 		j = wstart;
347 		wvalue = 1;
348 		wend = 0;
349 		for (i = 1; i < window; i++) {
350 			if (wstart - i < 0)
351 				break;
352 			if (BN_is_bit_set(p, wstart - i)) {
353 				wvalue <<= (i - wend);
354 				wvalue |= 1;
355 				wend = i;
356 			}
357 		}
358 
359 		/* wend is the size of the current window */
360 		j = wend + 1;
361 		/* add the 'bytes above' */
362 		if (!start)
363 			for (i = 0; i < j; i++) {
364 				if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
365 					goto err;
366 			}
367 
368 		/* wvalue will be an odd number < 2^window */
369 		if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx))
370 			goto err;
371 
372 		/* move the 'window' down further */
373 		wstart -= wend + 1;
374 		wvalue = 0;
375 		start = 0;
376 		if (wstart < 0)
377 			break;
378 	}
379 	ret = 1;
380 
381 err:
382 	BN_CTX_end(ctx);
383 	BN_RECP_CTX_free(&recp);
384 	bn_check_top(r);
385 	return (ret);
386 }
387 
388 static int
389 BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
390     BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct)
391 {
392 	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
393 	int start = 1;
394 	BIGNUM *d, *r;
395 	const BIGNUM *aa;
396 	/* Table of variables obtained from 'ctx' */
397 	BIGNUM *val[TABLE_SIZE];
398 	BN_MONT_CTX *mont = NULL;
399 
400 	if (ct) {
401 		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
402 	}
403 
404 	bn_check_top(a);
405 	bn_check_top(p);
406 	bn_check_top(m);
407 
408 	if (!BN_is_odd(m)) {
409 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
410 		return (0);
411 	}
412 
413 	bits = BN_num_bits(p);
414 	if (bits == 0) {
415 		/* x**0 mod 1 is still zero. */
416 		if (BN_is_one(m)) {
417 			ret = 1;
418 			BN_zero(rr);
419 		} else
420 			ret = BN_one(rr);
421 		return ret;
422 	}
423 
424 	BN_CTX_start(ctx);
425 	if ((d = BN_CTX_get(ctx)) == NULL)
426 		goto err;
427 	if ((r = BN_CTX_get(ctx)) == NULL)
428 		goto err;
429 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
430 		goto err;
431 
432 	/* If this is not done, things will break in the montgomery
433 	 * part */
434 
435 	if (in_mont != NULL)
436 		mont = in_mont;
437 	else {
438 		if ((mont = BN_MONT_CTX_new()) == NULL)
439 			goto err;
440 		if (!BN_MONT_CTX_set(mont, m, ctx))
441 			goto err;
442 	}
443 
444 	if (a->neg || BN_ucmp(a, m) >= 0) {
445 		if (!BN_nnmod(val[0], a,m, ctx))
446 			goto err;
447 		aa = val[0];
448 	} else
449 		aa = a;
450 	if (BN_is_zero(aa)) {
451 		BN_zero(rr);
452 		ret = 1;
453 		goto err;
454 	}
455 	if (!BN_to_montgomery(val[0], aa, mont, ctx))
456 		goto err; /* 1 */
457 
458 	window = BN_window_bits_for_exponent_size(bits);
459 	if (window > 1) {
460 		if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
461 			goto err; /* 2 */
462 		j = 1 << (window - 1);
463 		for (i = 1; i < j; i++) {
464 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
465 			    !BN_mod_mul_montgomery(val[i], val[i - 1],
466 			    d, mont, ctx))
467 				goto err;
468 		}
469 	}
470 
471 	start = 1;		/* This is used to avoid multiplication etc
472 				 * when there is only the value '1' in the
473 				 * buffer. */
474 	wvalue = 0;		/* The 'value' of the window */
475 	wstart = bits - 1;	/* The top bit of the window */
476 	wend = 0;		/* The bottom bit of the window */
477 
478 	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
479 		goto err;
480 	for (;;) {
481 		if (BN_is_bit_set(p, wstart) == 0) {
482 			if (!start) {
483 				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
484 					goto err;
485 			}
486 			if (wstart == 0)
487 				break;
488 			wstart--;
489 			continue;
490 		}
491 		/* We now have wstart on a 'set' bit, we now need to work out
492 		 * how bit a window to do.  To do this we need to scan
493 		 * forward until the last set bit before the end of the
494 		 * window */
495 		j = wstart;
496 		wvalue = 1;
497 		wend = 0;
498 		for (i = 1; i < window; i++) {
499 			if (wstart - i < 0)
500 				break;
501 			if (BN_is_bit_set(p, wstart - i)) {
502 				wvalue <<= (i - wend);
503 				wvalue |= 1;
504 				wend = i;
505 			}
506 		}
507 
508 		/* wend is the size of the current window */
509 		j = wend + 1;
510 		/* add the 'bytes above' */
511 		if (!start)
512 			for (i = 0; i < j; i++) {
513 				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
514 					goto err;
515 			}
516 
517 		/* wvalue will be an odd number < 2^window */
518 		if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
519 			goto err;
520 
521 		/* move the 'window' down further */
522 		wstart -= wend + 1;
523 		wvalue = 0;
524 		start = 0;
525 		if (wstart < 0)
526 			break;
527 	}
528 	if (!BN_from_montgomery(rr, r,mont, ctx))
529 		goto err;
530 	ret = 1;
531 
532 err:
533 	if ((in_mont == NULL) && (mont != NULL))
534 		BN_MONT_CTX_free(mont);
535 	BN_CTX_end(ctx);
536 	bn_check_top(rr);
537 	return (ret);
538 }
539 
540 int
541 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
542     BN_CTX *ctx, BN_MONT_CTX *in_mont)
543 {
544 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont,
545 	    (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
546 }
547 
548 int
549 BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
550     BN_CTX *ctx, BN_MONT_CTX *in_mont)
551 {
552 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1);
553 }
554 
555 int
556 BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
557     BN_CTX *ctx, BN_MONT_CTX *in_mont)
558 {
559 	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0);
560 }
561 
562 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
563  * so that accessing any of these table values shows the same access pattern as far
564  * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
565  * from/to that table. */
566 
567 static int
568 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf,
569     int idx, int window)
570 {
571 	int i, j;
572 	int width = 1 << window;
573 	BN_ULONG *table = (BN_ULONG *)buf;
574 
575 	if (top > b->top)
576 		top = b->top; /* this works because 'buf' is explicitly zeroed */
577 
578 	for (i = 0, j = idx; i < top; i++, j += width) {
579 		table[j] = b->d[i];
580 	}
581 
582 	return 1;
583 }
584 
585 static int
586 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx,
587     int window)
588 {
589 	int i, j;
590 	int width = 1 << window;
591 	volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
592 
593 	if (bn_wexpand(b, top) == NULL)
594 		return 0;
595 
596 	if (window <= 3) {
597 		for (i = 0; i < top; i++, table += width) {
598 		    BN_ULONG acc = 0;
599 
600 		    for (j = 0; j < width; j++) {
601 			acc |= table[j] &
602 			       ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
603 		    }
604 
605 		    b->d[i] = acc;
606 		}
607 	} else {
608 		int xstride = 1 << (window - 2);
609 		BN_ULONG y0, y1, y2, y3;
610 
611 		i = idx >> (window - 2);        /* equivalent of idx / xstride */
612 		idx &= xstride - 1;             /* equivalent of idx % xstride */
613 
614 		y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
615 		y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
616 		y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
617 		y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
618 
619 		for (i = 0; i < top; i++, table += width) {
620 		    BN_ULONG acc = 0;
621 
622 		    for (j = 0; j < xstride; j++) {
623 			acc |= ( (table[j + 0 * xstride] & y0) |
624 				 (table[j + 1 * xstride] & y1) |
625 				 (table[j + 2 * xstride] & y2) |
626 				 (table[j + 3 * xstride] & y3) )
627 			       & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
628 		    }
629 
630 		    b->d[i] = acc;
631 		}
632 	}
633 	b->top = top;
634 	bn_correct_top(b);
635 	return 1;
636 }
637 
638 /* Given a pointer value, compute the next address that is a cache line multiple. */
639 #define MOD_EXP_CTIME_ALIGN(x_) \
640 	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
641 
642 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
643  * precomputation memory layout to limit data-dependency to a minimum
644  * to protect secret exponents (cf. the hyper-threading timing attacks
645  * pointed out by Colin Percival,
646  * http://www.daemonology.net/hyperthreading-considered-harmful/)
647  */
648 int
649 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
650     const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
651 {
652 	int i, bits, ret = 0, window, wvalue;
653 	int top;
654 	BN_MONT_CTX *mont = NULL;
655 	int numPowers;
656 	unsigned char *powerbufFree = NULL;
657 	int powerbufLen = 0;
658 	unsigned char *powerbuf = NULL;
659 	BIGNUM tmp, am;
660 
661 	bn_check_top(a);
662 	bn_check_top(p);
663 	bn_check_top(m);
664 
665 	if (!BN_is_odd(m)) {
666 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
667 		return (0);
668 	}
669 
670 	top = m->top;
671 
672 	bits = BN_num_bits(p);
673 	if (bits == 0) {
674 		/* x**0 mod 1 is still zero. */
675 		if (BN_is_one(m)) {
676 			ret = 1;
677 			BN_zero(rr);
678 		} else
679 			ret = BN_one(rr);
680 		return ret;
681 	}
682 
683 	BN_CTX_start(ctx);
684 
685 	/* Allocate a montgomery context if it was not supplied by the caller.
686 	 * If this is not done, things will break in the montgomery part.
687  	 */
688 	if (in_mont != NULL)
689 		mont = in_mont;
690 	else {
691 		if ((mont = BN_MONT_CTX_new()) == NULL)
692 			goto err;
693 		if (!BN_MONT_CTX_set(mont, m, ctx))
694 			goto err;
695 	}
696 
697 	/* Get the window size to use with size of p. */
698 	window = BN_window_bits_for_ctime_exponent_size(bits);
699 #if defined(OPENSSL_BN_ASM_MONT5)
700 	if (window == 6 && bits <= 1024)
701 		window = 5;	/* ~5% improvement of 2048-bit RSA sign */
702 #endif
703 
704 	/* Allocate a buffer large enough to hold all of the pre-computed
705 	 * powers of am, am itself and tmp.
706 	 */
707 	numPowers = 1 << window;
708 	powerbufLen = sizeof(m->d[0]) * (top * numPowers +
709 	    ((2*top) > numPowers ? (2*top) : numPowers));
710 	if ((powerbufFree = calloc(powerbufLen +
711 	    MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL)
712 		goto err;
713 	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
714 
715 	/* lay down tmp and am right after powers table */
716 	tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
717 	am.d = tmp.d + top;
718 	tmp.top = am.top = 0;
719 	tmp.dmax = am.dmax = top;
720 	tmp.neg = am.neg = 0;
721 	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
722 
723 	/* prepare a^0 in Montgomery domain */
724 #if 1
725 	if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
726 		goto err;
727 #else
728 	tmp.d[0] = (0 - m - >d[0]) & BN_MASK2;	/* 2^(top*BN_BITS2) - m */
729 	for (i = 1; i < top; i++)
730 		tmp.d[i] = (~m->d[i]) & BN_MASK2;
731 	tmp.top = top;
732 #endif
733 
734 	/* prepare a^1 in Montgomery domain */
735 	if (a->neg || BN_ucmp(a, m) >= 0) {
736 		if (!BN_mod_ct(&am, a,m, ctx))
737 			goto err;
738 		if (!BN_to_montgomery(&am, &am, mont, ctx))
739 			goto err;
740 	} else if (!BN_to_montgomery(&am, a,mont, ctx))
741 		goto err;
742 
743 #if defined(OPENSSL_BN_ASM_MONT5)
744 	/* This optimization uses ideas from http://eprint.iacr.org/2011/239,
745 	 * specifically optimization of cache-timing attack countermeasures
746 	 * and pre-computation optimization. */
747 
748 	/* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
749 	 * 512-bit RSA is hardly relevant, we omit it to spare size... */
750 	if (window == 5 && top > 1) {
751 		void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
752 		    const void *table, const BN_ULONG *np,
753 		    const BN_ULONG *n0, int num, int power);
754 		void bn_scatter5(const BN_ULONG *inp, size_t num,
755 		    void *table, size_t power);
756 		void bn_gather5(BN_ULONG *out, size_t num,
757 		    void *table, size_t power);
758 
759 		BN_ULONG *np = mont->N.d, *n0 = mont->n0;
760 
761 		/* BN_to_montgomery can contaminate words above .top
762 		 * [in BN_DEBUG[_DEBUG] build]... */
763 		for (i = am.top; i < top; i++)
764 			am.d[i] = 0;
765 		for (i = tmp.top; i < top; i++)
766 			tmp.d[i] = 0;
767 
768 		bn_scatter5(tmp.d, top, powerbuf, 0);
769 		bn_scatter5(am.d, am.top, powerbuf, 1);
770 		bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
771 		bn_scatter5(tmp.d, top, powerbuf, 2);
772 
773 #if 0
774 		for (i = 3; i < 32; i++) {
775 			/* Calculate a^i = a^(i-1) * a */
776 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
777 			    n0, top, i - 1);
778 			bn_scatter5(tmp.d, top, powerbuf, i);
779 		}
780 #else
781 		/* same as above, but uses squaring for 1/2 of operations */
782 		for (i = 4; i < 32; i*=2) {
783 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
784 			bn_scatter5(tmp.d, top, powerbuf, i);
785 		}
786 		for (i = 3; i < 8; i += 2) {
787 			int j;
788 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
789 			    n0, top, i - 1);
790 			bn_scatter5(tmp.d, top, powerbuf, i);
791 			for (j = 2 * i; j < 32; j *= 2) {
792 				bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
793 				bn_scatter5(tmp.d, top, powerbuf, j);
794 			}
795 		}
796 		for (; i < 16; i += 2) {
797 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
798 			    n0, top, i - 1);
799 			bn_scatter5(tmp.d, top, powerbuf, i);
800 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
801 			bn_scatter5(tmp.d, top, powerbuf, 2*i);
802 		}
803 		for (; i < 32; i += 2) {
804 			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
805 			    n0, top, i - 1);
806 			bn_scatter5(tmp.d, top, powerbuf, i);
807 		}
808 #endif
809 		bits--;
810 		for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
811 			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
812 		bn_gather5(tmp.d, top, powerbuf, wvalue);
813 
814 		/* Scan the exponent one window at a time starting from the most
815 		 * significant bits.
816 		 */
817 		while (bits >= 0) {
818 			for (wvalue = 0, i = 0; i < 5; i++, bits--)
819 				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
820 
821 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
822 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
823 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
824 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
825 			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
826 			bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
827 		}
828 
829 		tmp.top = top;
830 		bn_correct_top(&tmp);
831 	} else
832 #endif
833 	{
834 		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0,
835 		    window))
836 			goto err;
837 		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1,
838 		    window))
839 			goto err;
840 
841 		/* If the window size is greater than 1, then calculate
842 		 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
843 		 * (even powers could instead be computed as (a^(i/2))^2
844 		 * to use the slight performance advantage of sqr over mul).
845 		 */
846 		if (window > 1) {
847 			if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
848 				goto err;
849 			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf,
850 			    2, window))
851 				goto err;
852 			for (i = 3; i < numPowers; i++) {
853 				/* Calculate a^i = a^(i-1) * a */
854 				if (!BN_mod_mul_montgomery(&tmp, &am, &tmp,
855 				    mont, ctx))
856 					goto err;
857 				if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top,
858 				    powerbuf, i, window))
859 					goto err;
860 			}
861 		}
862 
863 		bits--;
864 		for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
865 			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
866 		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf,
867 		    wvalue, window))
868 			goto err;
869 
870 		/* Scan the exponent one window at a time starting from the most
871 		 * significant bits.
872 		 */
873 		while (bits >= 0) {
874 			wvalue = 0; /* The 'value' of the window */
875 
876 			/* Scan the window, squaring the result as we go */
877 			for (i = 0; i < window; i++, bits--) {
878 				if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp,
879 				    mont, ctx))
880 					goto err;
881 				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
882 			}
883 
884 			/* Fetch the appropriate pre-computed value from the pre-buf */
885 			if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf,
886 			    wvalue, window))
887 				goto err;
888 
889 			/* Multiply the result into the intermediate result */
890 			if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
891 				goto err;
892 		}
893 	}
894 
895 	/* Convert the final result from montgomery to standard format */
896 	if (!BN_from_montgomery(rr, &tmp, mont, ctx))
897 		goto err;
898 	ret = 1;
899 
900 err:
901 	if ((in_mont == NULL) && (mont != NULL))
902 		BN_MONT_CTX_free(mont);
903 	freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
904 	BN_CTX_end(ctx);
905 	return (ret);
906 }
907 
908 int
909 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m,
910     BN_CTX *ctx, BN_MONT_CTX *in_mont)
911 {
912 	BN_MONT_CTX *mont = NULL;
913 	int b, bits, ret = 0;
914 	int r_is_one;
915 	BN_ULONG w, next_w;
916 	BIGNUM *d, *r, *t;
917 	BIGNUM *swap_tmp;
918 
919 #define BN_MOD_MUL_WORD(r, w, m) \
920 		(BN_mul_word(r, (w)) && \
921 		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
922 			(BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
923 		/* BN_MOD_MUL_WORD is only used with 'w' large,
924 		 * so the BN_ucmp test is probably more overhead
925 		 * than always using BN_mod (which uses BN_copy if
926 		 * a similar test returns true). */
927 		/* We can use BN_mod and do not need BN_nnmod because our
928 		 * accumulator is never negative (the result of BN_mod does
929 		 * not depend on the sign of the modulus).
930 		 */
931 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
932 		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
933 
934 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
935 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
936 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
937 		return -1;
938 	}
939 
940 	bn_check_top(p);
941 	bn_check_top(m);
942 
943 	if (!BN_is_odd(m)) {
944 		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
945 		return (0);
946 	}
947 	if (m->top == 1)
948 		a %= m->d[0]; /* make sure that 'a' is reduced */
949 
950 	bits = BN_num_bits(p);
951 	if (bits == 0) {
952 		/* x**0 mod 1 is still zero. */
953 		if (BN_is_one(m)) {
954 			ret = 1;
955 			BN_zero(rr);
956 		} else
957 			ret = BN_one(rr);
958 		return ret;
959 	}
960 	if (a == 0) {
961 		BN_zero(rr);
962 		ret = 1;
963 		return ret;
964 	}
965 
966 	BN_CTX_start(ctx);
967 	if ((d = BN_CTX_get(ctx)) == NULL)
968 		goto err;
969 	if ((r = BN_CTX_get(ctx)) == NULL)
970 		goto err;
971 	if ((t = BN_CTX_get(ctx)) == NULL)
972 		goto err;
973 
974 	if (in_mont != NULL)
975 		mont = in_mont;
976 	else {
977 		if ((mont = BN_MONT_CTX_new()) == NULL)
978 			goto err;
979 		if (!BN_MONT_CTX_set(mont, m, ctx))
980 			goto err;
981 	}
982 
983 	r_is_one = 1; /* except for Montgomery factor */
984 
985 	/* bits-1 >= 0 */
986 
987 	/* The result is accumulated in the product r*w. */
988 	w = a; /* bit 'bits-1' of 'p' is always set */
989 	for (b = bits - 2; b >= 0; b--) {
990 		/* First, square r*w. */
991 		next_w = w * w;
992 		if ((next_w / w) != w) /* overflow */
993 		{
994 			if (r_is_one) {
995 				if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
996 					goto err;
997 				r_is_one = 0;
998 			} else {
999 				if (!BN_MOD_MUL_WORD(r, w, m))
1000 					goto err;
1001 			}
1002 			next_w = 1;
1003 		}
1004 		w = next_w;
1005 		if (!r_is_one) {
1006 			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
1007 				goto err;
1008 		}
1009 
1010 		/* Second, multiply r*w by 'a' if exponent bit is set. */
1011 		if (BN_is_bit_set(p, b)) {
1012 			next_w = w * a;
1013 			if ((next_w / a) != w) /* overflow */
1014 			{
1015 				if (r_is_one) {
1016 					if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1017 						goto err;
1018 					r_is_one = 0;
1019 				} else {
1020 					if (!BN_MOD_MUL_WORD(r, w, m))
1021 						goto err;
1022 				}
1023 				next_w = a;
1024 			}
1025 			w = next_w;
1026 		}
1027 	}
1028 
1029 	/* Finally, set r:=r*w. */
1030 	if (w != 1) {
1031 		if (r_is_one) {
1032 			if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1033 				goto err;
1034 			r_is_one = 0;
1035 		} else {
1036 			if (!BN_MOD_MUL_WORD(r, w, m))
1037 				goto err;
1038 		}
1039 	}
1040 
1041 	if (r_is_one) /* can happen only if a == 1*/
1042 	{
1043 		if (!BN_one(rr))
1044 			goto err;
1045 	} else {
1046 		if (!BN_from_montgomery(rr, r, mont, ctx))
1047 			goto err;
1048 	}
1049 	ret = 1;
1050 
1051 err:
1052 	if ((in_mont == NULL) && (mont != NULL))
1053 		BN_MONT_CTX_free(mont);
1054 	BN_CTX_end(ctx);
1055 	bn_check_top(rr);
1056 	return (ret);
1057 }
1058 
1059 
1060 /* The old fallback, simple version :-) */
1061 int
1062 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1063     BN_CTX *ctx)
1064 {
1065 	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1066 	int start = 1;
1067 	BIGNUM *d;
1068 	/* Table of variables obtained from 'ctx' */
1069 	BIGNUM *val[TABLE_SIZE];
1070 
1071 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1072 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1073 		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1074 		return -1;
1075 	}
1076 
1077 	bits = BN_num_bits(p);
1078 	if (bits == 0) {
1079 		/* x**0 mod 1 is still zero. */
1080 		if (BN_is_one(m)) {
1081 			ret = 1;
1082 			BN_zero(r);
1083 		} else
1084 			ret = BN_one(r);
1085 		return ret;
1086 	}
1087 
1088 	BN_CTX_start(ctx);
1089 	if ((d = BN_CTX_get(ctx)) == NULL)
1090 		goto err;
1091 	if ((val[0] = BN_CTX_get(ctx)) == NULL)
1092 		goto err;
1093 
1094 	if (!BN_nnmod(val[0],a,m,ctx))
1095 		goto err;		/* 1 */
1096 	if (BN_is_zero(val[0])) {
1097 		BN_zero(r);
1098 		ret = 1;
1099 		goto err;
1100 	}
1101 
1102 	window = BN_window_bits_for_exponent_size(bits);
1103 	if (window > 1) {
1104 		if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1105 			goto err;				/* 2 */
1106 		j = 1 << (window - 1);
1107 		for (i = 1; i < j; i++) {
1108 			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1109 			    !BN_mod_mul(val[i], val[i - 1], d,m, ctx))
1110 				goto err;
1111 		}
1112 	}
1113 
1114 	start = 1;		/* This is used to avoid multiplication etc
1115 				 * when there is only the value '1' in the
1116 				 * buffer. */
1117 	wvalue = 0;		/* The 'value' of the window */
1118 	wstart = bits - 1;	/* The top bit of the window */
1119 	wend = 0;		/* The bottom bit of the window */
1120 
1121 	if (!BN_one(r))
1122 		goto err;
1123 
1124 	for (;;) {
1125 		if (BN_is_bit_set(p, wstart) == 0) {
1126 			if (!start)
1127 				if (!BN_mod_mul(r, r, r, m, ctx))
1128 					goto err;
1129 			if (wstart == 0)
1130 				break;
1131 			wstart--;
1132 			continue;
1133 		}
1134 		/* We now have wstart on a 'set' bit, we now need to work out
1135 		 * how bit a window to do.  To do this we need to scan
1136 		 * forward until the last set bit before the end of the
1137 		 * window */
1138 		j = wstart;
1139 		wvalue = 1;
1140 		wend = 0;
1141 		for (i = 1; i < window; i++) {
1142 			if (wstart - i < 0)
1143 				break;
1144 			if (BN_is_bit_set(p, wstart - i)) {
1145 				wvalue <<= (i - wend);
1146 				wvalue |= 1;
1147 				wend = i;
1148 			}
1149 		}
1150 
1151 		/* wend is the size of the current window */
1152 		j = wend + 1;
1153 		/* add the 'bytes above' */
1154 		if (!start)
1155 			for (i = 0; i < j; i++) {
1156 				if (!BN_mod_mul(r, r, r, m, ctx))
1157 					goto err;
1158 			}
1159 
1160 		/* wvalue will be an odd number < 2^window */
1161 		if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1162 			goto err;
1163 
1164 		/* move the 'window' down further */
1165 		wstart -= wend + 1;
1166 		wvalue = 0;
1167 		start = 0;
1168 		if (wstart < 0)
1169 			break;
1170 	}
1171 	ret = 1;
1172 
1173 err:
1174 	BN_CTX_end(ctx);
1175 	bn_check_top(r);
1176 	return (ret);
1177 }
1178