xref: /freebsd/crypto/openssl/crypto/bn/bn_exp.c (revision 39beb93c)
1 /* crypto/bn/bn_exp.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 /* ====================================================================
59  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60  *
61  * Redistribution and use in source and binary forms, with or without
62  * modification, are permitted provided that the following conditions
63  * are met:
64  *
65  * 1. Redistributions of source code must retain the above copyright
66  *    notice, this list of conditions and the following disclaimer.
67  *
68  * 2. Redistributions in binary form must reproduce the above copyright
69  *    notice, this list of conditions and the following disclaimer in
70  *    the documentation and/or other materials provided with the
71  *    distribution.
72  *
73  * 3. All advertising materials mentioning features or use of this
74  *    software must display the following acknowledgment:
75  *    "This product includes software developed by the OpenSSL Project
76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77  *
78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79  *    endorse or promote products derived from this software without
80  *    prior written permission. For written permission, please contact
81  *    openssl-core@openssl.org.
82  *
83  * 5. Products derived from this software may not be called "OpenSSL"
84  *    nor may "OpenSSL" appear in their names without prior written
85  *    permission of the OpenSSL Project.
86  *
87  * 6. Redistributions of any form whatsoever must retain the following
88  *    acknowledgment:
89  *    "This product includes software developed by the OpenSSL Project
90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103  * OF THE POSSIBILITY OF SUCH DAMAGE.
104  * ====================================================================
105  *
106  * This product includes cryptographic software written by Eric Young
107  * (eay@cryptsoft.com).  This product includes software written by Tim
108  * Hudson (tjh@cryptsoft.com).
109  *
110  */
111 
112 
113 #include "cryptlib.h"
114 #include "bn_lcl.h"
115 
116 /* maximum precomputation table size for *variable* sliding windows */
117 #define TABLE_SIZE	32
118 
119 /* this one works - simple but works */
120 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
121 	{
122 	int i,bits,ret=0;
123 	BIGNUM *v,*rr;
124 
125 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
126 		{
127 		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
128 		BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
129 		return -1;
130 		}
131 
132 	BN_CTX_start(ctx);
133 	if ((r == a) || (r == p))
134 		rr = BN_CTX_get(ctx);
135 	else
136 		rr = r;
137 	if ((v = BN_CTX_get(ctx)) == NULL) goto err;
138 
139 	if (BN_copy(v,a) == NULL) goto err;
140 	bits=BN_num_bits(p);
141 
142 	if (BN_is_odd(p))
143 		{ if (BN_copy(rr,a) == NULL) goto err; }
144 	else	{ if (!BN_one(rr)) goto err; }
145 
146 	for (i=1; i<bits; i++)
147 		{
148 		if (!BN_sqr(v,v,ctx)) goto err;
149 		if (BN_is_bit_set(p,i))
150 			{
151 			if (!BN_mul(rr,rr,v,ctx)) goto err;
152 			}
153 		}
154 	ret=1;
155 err:
156 	if (r != rr) BN_copy(r,rr);
157 	BN_CTX_end(ctx);
158 	bn_check_top(r);
159 	return(ret);
160 	}
161 
162 
163 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
164 	       BN_CTX *ctx)
165 	{
166 	int ret;
167 
168 	bn_check_top(a);
169 	bn_check_top(p);
170 	bn_check_top(m);
171 
172 	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
173 	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
174 	 * exponentiation for the odd part), using appropriate exponent
175 	 * reductions, and combine the results using the CRT.
176 	 *
177 	 * For now, we use Montgomery only if the modulus is odd; otherwise,
178 	 * exponentiation using the reciprocal-based quick remaindering
179 	 * algorithm is used.
180 	 *
181 	 * (Timing obtained with expspeed.c [computations  a^p mod m
182 	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
183 	 * 4096, 8192 bits], compared to the running time of the
184 	 * standard algorithm:
185 	 *
186 	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
187          *                     55 .. 77 %  [UltraSparc processor, but
188 	 *                                  debug-solaris-sparcv8-gcc conf.]
189 	 *
190 	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
191 	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
192 	 *
193 	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
194 	 * at 2048 and more bits, but at 512 and 1024 bits, it was
195 	 * slower even than the standard algorithm!
196 	 *
197 	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
198 	 * should be obtained when the new Montgomery reduction code
199 	 * has been integrated into OpenSSL.)
200 	 */
201 
202 #define MONT_MUL_MOD
203 #define MONT_EXP_WORD
204 #define RECP_MUL_MOD
205 
206 #ifdef MONT_MUL_MOD
207 	/* I have finally been able to take out this pre-condition of
208 	 * the top bit being set.  It was caused by an error in BN_div
209 	 * with negatives.  There was also another problem when for a^b%m
210 	 * a >= m.  eay 07-May-97 */
211 /*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
212 
213 	if (BN_is_odd(m))
214 		{
215 #  ifdef MONT_EXP_WORD
216 		if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) == 0))
217 			{
218 			BN_ULONG A = a->d[0];
219 			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
220 			}
221 		else
222 #  endif
223 			ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
224 		}
225 	else
226 #endif
227 #ifdef RECP_MUL_MOD
228 		{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
229 #else
230 		{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
231 #endif
232 
233 	bn_check_top(r);
234 	return(ret);
235 	}
236 
237 
238 int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
239 		    const BIGNUM *m, BN_CTX *ctx)
240 	{
241 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
242 	int start=1;
243 	BIGNUM *aa;
244 	/* Table of variables obtained from 'ctx' */
245 	BIGNUM *val[TABLE_SIZE];
246 	BN_RECP_CTX recp;
247 
248 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
249 		{
250 		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
251 		BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
252 		return -1;
253 		}
254 
255 	bits=BN_num_bits(p);
256 
257 	if (bits == 0)
258 		{
259 		ret = BN_one(r);
260 		return ret;
261 		}
262 
263 	BN_CTX_start(ctx);
264 	aa = BN_CTX_get(ctx);
265 	val[0] = BN_CTX_get(ctx);
266 	if(!aa || !val[0]) goto err;
267 
268 	BN_RECP_CTX_init(&recp);
269 	if (m->neg)
270 		{
271 		/* ignore sign of 'm' */
272 		if (!BN_copy(aa, m)) goto err;
273 		aa->neg = 0;
274 		if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
275 		}
276 	else
277 		{
278 		if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
279 		}
280 
281 	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
282 	if (BN_is_zero(val[0]))
283 		{
284 		BN_zero(r);
285 		ret = 1;
286 		goto err;
287 		}
288 
289 	window = BN_window_bits_for_exponent_size(bits);
290 	if (window > 1)
291 		{
292 		if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
293 			goto err;				/* 2 */
294 		j=1<<(window-1);
295 		for (i=1; i<j; i++)
296 			{
297 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
298 					!BN_mod_mul_reciprocal(val[i],val[i-1],
299 						aa,&recp,ctx))
300 				goto err;
301 			}
302 		}
303 
304 	start=1;	/* This is used to avoid multiplication etc
305 			 * when there is only the value '1' in the
306 			 * buffer. */
307 	wvalue=0;	/* The 'value' of the window */
308 	wstart=bits-1;	/* The top bit of the window */
309 	wend=0;		/* The bottom bit of the window */
310 
311 	if (!BN_one(r)) goto err;
312 
313 	for (;;)
314 		{
315 		if (BN_is_bit_set(p,wstart) == 0)
316 			{
317 			if (!start)
318 				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
319 				goto err;
320 			if (wstart == 0) break;
321 			wstart--;
322 			continue;
323 			}
324 		/* We now have wstart on a 'set' bit, we now need to work out
325 		 * how bit a window to do.  To do this we need to scan
326 		 * forward until the last set bit before the end of the
327 		 * window */
328 		j=wstart;
329 		wvalue=1;
330 		wend=0;
331 		for (i=1; i<window; i++)
332 			{
333 			if (wstart-i < 0) break;
334 			if (BN_is_bit_set(p,wstart-i))
335 				{
336 				wvalue<<=(i-wend);
337 				wvalue|=1;
338 				wend=i;
339 				}
340 			}
341 
342 		/* wend is the size of the current window */
343 		j=wend+1;
344 		/* add the 'bytes above' */
345 		if (!start)
346 			for (i=0; i<j; i++)
347 				{
348 				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
349 					goto err;
350 				}
351 
352 		/* wvalue will be an odd number < 2^window */
353 		if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
354 			goto err;
355 
356 		/* move the 'window' down further */
357 		wstart-=wend+1;
358 		wvalue=0;
359 		start=0;
360 		if (wstart < 0) break;
361 		}
362 	ret=1;
363 err:
364 	BN_CTX_end(ctx);
365 	BN_RECP_CTX_free(&recp);
366 	bn_check_top(r);
367 	return(ret);
368 	}
369 
370 
371 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
372 		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
373 	{
374 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
375 	int start=1;
376 	BIGNUM *d,*r;
377 	const BIGNUM *aa;
378 	/* Table of variables obtained from 'ctx' */
379 	BIGNUM *val[TABLE_SIZE];
380 	BN_MONT_CTX *mont=NULL;
381 
382 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
383 		{
384 		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
385 		}
386 
387 	bn_check_top(a);
388 	bn_check_top(p);
389 	bn_check_top(m);
390 
391 	if (!BN_is_odd(m))
392 		{
393 		BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
394 		return(0);
395 		}
396 	bits=BN_num_bits(p);
397 	if (bits == 0)
398 		{
399 		ret = BN_one(rr);
400 		return ret;
401 		}
402 
403 	BN_CTX_start(ctx);
404 	d = BN_CTX_get(ctx);
405 	r = BN_CTX_get(ctx);
406 	val[0] = BN_CTX_get(ctx);
407 	if (!d || !r || !val[0]) goto err;
408 
409 	/* If this is not done, things will break in the montgomery
410 	 * part */
411 
412 	if (in_mont != NULL)
413 		mont=in_mont;
414 	else
415 		{
416 		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
417 		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
418 		}
419 
420 	if (a->neg || BN_ucmp(a,m) >= 0)
421 		{
422 		if (!BN_nnmod(val[0],a,m,ctx))
423 			goto err;
424 		aa= val[0];
425 		}
426 	else
427 		aa=a;
428 	if (BN_is_zero(aa))
429 		{
430 		BN_zero(rr);
431 		ret = 1;
432 		goto err;
433 		}
434 	if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
435 
436 	window = BN_window_bits_for_exponent_size(bits);
437 	if (window > 1)
438 		{
439 		if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
440 		j=1<<(window-1);
441 		for (i=1; i<j; i++)
442 			{
443 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
444 					!BN_mod_mul_montgomery(val[i],val[i-1],
445 						d,mont,ctx))
446 				goto err;
447 			}
448 		}
449 
450 	start=1;	/* This is used to avoid multiplication etc
451 			 * when there is only the value '1' in the
452 			 * buffer. */
453 	wvalue=0;	/* The 'value' of the window */
454 	wstart=bits-1;	/* The top bit of the window */
455 	wend=0;		/* The bottom bit of the window */
456 
457 	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
458 	for (;;)
459 		{
460 		if (BN_is_bit_set(p,wstart) == 0)
461 			{
462 			if (!start)
463 				{
464 				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
465 				goto err;
466 				}
467 			if (wstart == 0) break;
468 			wstart--;
469 			continue;
470 			}
471 		/* We now have wstart on a 'set' bit, we now need to work out
472 		 * how bit a window to do.  To do this we need to scan
473 		 * forward until the last set bit before the end of the
474 		 * window */
475 		j=wstart;
476 		wvalue=1;
477 		wend=0;
478 		for (i=1; i<window; i++)
479 			{
480 			if (wstart-i < 0) break;
481 			if (BN_is_bit_set(p,wstart-i))
482 				{
483 				wvalue<<=(i-wend);
484 				wvalue|=1;
485 				wend=i;
486 				}
487 			}
488 
489 		/* wend is the size of the current window */
490 		j=wend+1;
491 		/* add the 'bytes above' */
492 		if (!start)
493 			for (i=0; i<j; i++)
494 				{
495 				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
496 					goto err;
497 				}
498 
499 		/* wvalue will be an odd number < 2^window */
500 		if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
501 			goto err;
502 
503 		/* move the 'window' down further */
504 		wstart-=wend+1;
505 		wvalue=0;
506 		start=0;
507 		if (wstart < 0) break;
508 		}
509 	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
510 	ret=1;
511 err:
512 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
513 	BN_CTX_end(ctx);
514 	bn_check_top(rr);
515 	return(ret);
516 	}
517 
518 
519 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
520  * so that accessing any of these table values shows the same access pattern as far
521  * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
522  * from/to that table. */
523 
524 static int MOD_EXP_CTIME_COPY_TO_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
525 	{
526 	size_t i, j;
527 
528 	if (bn_wexpand(b, top) == NULL)
529 		return 0;
530 	while (b->top < top)
531 		{
532 		b->d[b->top++] = 0;
533 		}
534 
535 	for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
536 		{
537 		buf[j] = ((unsigned char*)b->d)[i];
538 		}
539 
540 	bn_correct_top(b);
541 	return 1;
542 	}
543 
544 static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
545 	{
546 	size_t i, j;
547 
548 	if (bn_wexpand(b, top) == NULL)
549 		return 0;
550 
551 	for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
552 		{
553 		((unsigned char*)b->d)[i] = buf[j];
554 		}
555 
556 	b->top = top;
557 	bn_correct_top(b);
558 	return 1;
559 	}
560 
561 /* Given a pointer value, compute the next address that is a cache line multiple. */
562 #define MOD_EXP_CTIME_ALIGN(x_) \
563 	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((BN_ULONG)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
564 
565 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
566  * precomputation memory layout to limit data-dependency to a minimum
567  * to protect secret exponents (cf. the hyper-threading timing attacks
568  * pointed out by Colin Percival,
569  * http://www.daemonology.net/hyperthreading-considered-harmful/)
570  */
571 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
572 		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
573 	{
574 	int i,bits,ret=0,idx,window,wvalue;
575 	int top;
576  	BIGNUM *r;
577 	const BIGNUM *aa;
578 	BN_MONT_CTX *mont=NULL;
579 
580 	int numPowers;
581 	unsigned char *powerbufFree=NULL;
582 	int powerbufLen = 0;
583 	unsigned char *powerbuf=NULL;
584 	BIGNUM *computeTemp=NULL, *am=NULL;
585 
586 	bn_check_top(a);
587 	bn_check_top(p);
588 	bn_check_top(m);
589 
590 	top = m->top;
591 
592 	if (!(m->d[0] & 1))
593 		{
594 		BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
595 		return(0);
596 		}
597 	bits=BN_num_bits(p);
598 	if (bits == 0)
599 		{
600 		ret = BN_one(rr);
601 		return ret;
602 		}
603 
604  	/* Initialize BIGNUM context and allocate intermediate result */
605 	BN_CTX_start(ctx);
606 	r = BN_CTX_get(ctx);
607 	if (r == NULL) goto err;
608 
609 	/* Allocate a montgomery context if it was not supplied by the caller.
610 	 * If this is not done, things will break in the montgomery part.
611  	 */
612 	if (in_mont != NULL)
613 		mont=in_mont;
614 	else
615 		{
616 		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
617 		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
618 		}
619 
620 	/* Get the window size to use with size of p. */
621 	window = BN_window_bits_for_ctime_exponent_size(bits);
622 
623 	/* Allocate a buffer large enough to hold all of the pre-computed
624 	 * powers of a.
625 	 */
626 	numPowers = 1 << window;
627 	powerbufLen = sizeof(m->d[0])*top*numPowers;
628 	if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
629 		goto err;
630 
631 	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
632 	memset(powerbuf, 0, powerbufLen);
633 
634  	/* Initialize the intermediate result. Do this early to save double conversion,
635 	 * once each for a^0 and intermediate result.
636 	 */
637  	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
638 	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(r, top, powerbuf, 0, numPowers)) goto err;
639 
640 	/* Initialize computeTemp as a^1 with montgomery precalcs */
641 	computeTemp = BN_CTX_get(ctx);
642 	am = BN_CTX_get(ctx);
643 	if (computeTemp==NULL || am==NULL) goto err;
644 
645 	if (a->neg || BN_ucmp(a,m) >= 0)
646 		{
647 		if (!BN_mod(am,a,m,ctx))
648 			goto err;
649 		aa= am;
650 		}
651 	else
652 		aa=a;
653 	if (!BN_to_montgomery(am,aa,mont,ctx)) goto err;
654 	if (!BN_copy(computeTemp, am)) goto err;
655 	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(am, top, powerbuf, 1, numPowers)) goto err;
656 
657 	/* If the window size is greater than 1, then calculate
658 	 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
659 	 * (even powers could instead be computed as (a^(i/2))^2
660 	 * to use the slight performance advantage of sqr over mul).
661 	 */
662 	if (window > 1)
663 		{
664 		for (i=2; i<numPowers; i++)
665 			{
666 			/* Calculate a^i = a^(i-1) * a */
667 			if (!BN_mod_mul_montgomery(computeTemp,am,computeTemp,mont,ctx))
668 				goto err;
669 			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(computeTemp, top, powerbuf, i, numPowers)) goto err;
670 			}
671 		}
672 
673  	/* Adjust the number of bits up to a multiple of the window size.
674  	 * If the exponent length is not a multiple of the window size, then
675  	 * this pads the most significant bits with zeros to normalize the
676  	 * scanning loop to there's no special cases.
677  	 *
678  	 * * NOTE: Making the window size a power of two less than the native
679 	 * * word size ensures that the padded bits won't go past the last
680  	 * * word in the internal BIGNUM structure. Going past the end will
681  	 * * still produce the correct result, but causes a different branch
682  	 * * to be taken in the BN_is_bit_set function.
683  	 */
684  	bits = ((bits+window-1)/window)*window;
685  	idx=bits-1;	/* The top bit of the window */
686 
687  	/* Scan the exponent one window at a time starting from the most
688  	 * significant bits.
689  	 */
690  	while (idx >= 0)
691   		{
692  		wvalue=0; /* The 'value' of the window */
693 
694  		/* Scan the window, squaring the result as we go */
695  		for (i=0; i<window; i++,idx--)
696  			{
697 			if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))	goto err;
698 			wvalue = (wvalue<<1)+BN_is_bit_set(p,idx);
699   			}
700 
701 		/* Fetch the appropriate pre-computed value from the pre-buf */
702 		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(computeTemp, top, powerbuf, wvalue, numPowers)) goto err;
703 
704  		/* Multiply the result into the intermediate result */
705  		if (!BN_mod_mul_montgomery(r,r,computeTemp,mont,ctx)) goto err;
706   		}
707 
708  	/* Convert the final result from montgomery to standard format */
709 	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
710 	ret=1;
711 err:
712 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
713 	if (powerbuf!=NULL)
714 		{
715 		OPENSSL_cleanse(powerbuf,powerbufLen);
716 		OPENSSL_free(powerbufFree);
717 		}
718  	if (am!=NULL) BN_clear(am);
719  	if (computeTemp!=NULL) BN_clear(computeTemp);
720 	BN_CTX_end(ctx);
721 	return(ret);
722 	}
723 
724 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
725                          const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
726 	{
727 	BN_MONT_CTX *mont = NULL;
728 	int b, bits, ret=0;
729 	int r_is_one;
730 	BN_ULONG w, next_w;
731 	BIGNUM *d, *r, *t;
732 	BIGNUM *swap_tmp;
733 #define BN_MOD_MUL_WORD(r, w, m) \
734 		(BN_mul_word(r, (w)) && \
735 		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
736 			(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
737 		/* BN_MOD_MUL_WORD is only used with 'w' large,
738 		 * so the BN_ucmp test is probably more overhead
739 		 * than always using BN_mod (which uses BN_copy if
740 		 * a similar test returns true). */
741 		/* We can use BN_mod and do not need BN_nnmod because our
742 		 * accumulator is never negative (the result of BN_mod does
743 		 * not depend on the sign of the modulus).
744 		 */
745 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
746 		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
747 
748 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
749 		{
750 		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
751 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
752 		return -1;
753 		}
754 
755 	bn_check_top(p);
756 	bn_check_top(m);
757 
758 	if (!BN_is_odd(m))
759 		{
760 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
761 		return(0);
762 		}
763 	if (m->top == 1)
764 		a %= m->d[0]; /* make sure that 'a' is reduced */
765 
766 	bits = BN_num_bits(p);
767 	if (bits == 0)
768 		{
769 		ret = BN_one(rr);
770 		return ret;
771 		}
772 	if (a == 0)
773 		{
774 		BN_zero(rr);
775 		ret = 1;
776 		return ret;
777 		}
778 
779 	BN_CTX_start(ctx);
780 	d = BN_CTX_get(ctx);
781 	r = BN_CTX_get(ctx);
782 	t = BN_CTX_get(ctx);
783 	if (d == NULL || r == NULL || t == NULL) goto err;
784 
785 	if (in_mont != NULL)
786 		mont=in_mont;
787 	else
788 		{
789 		if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
790 		if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
791 		}
792 
793 	r_is_one = 1; /* except for Montgomery factor */
794 
795 	/* bits-1 >= 0 */
796 
797 	/* The result is accumulated in the product r*w. */
798 	w = a; /* bit 'bits-1' of 'p' is always set */
799 	for (b = bits-2; b >= 0; b--)
800 		{
801 		/* First, square r*w. */
802 		next_w = w*w;
803 		if ((next_w/w) != w) /* overflow */
804 			{
805 			if (r_is_one)
806 				{
807 				if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
808 				r_is_one = 0;
809 				}
810 			else
811 				{
812 				if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
813 				}
814 			next_w = 1;
815 			}
816 		w = next_w;
817 		if (!r_is_one)
818 			{
819 			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
820 			}
821 
822 		/* Second, multiply r*w by 'a' if exponent bit is set. */
823 		if (BN_is_bit_set(p, b))
824 			{
825 			next_w = w*a;
826 			if ((next_w/a) != w) /* overflow */
827 				{
828 				if (r_is_one)
829 					{
830 					if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
831 					r_is_one = 0;
832 					}
833 				else
834 					{
835 					if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
836 					}
837 				next_w = a;
838 				}
839 			w = next_w;
840 			}
841 		}
842 
843 	/* Finally, set r:=r*w. */
844 	if (w != 1)
845 		{
846 		if (r_is_one)
847 			{
848 			if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
849 			r_is_one = 0;
850 			}
851 		else
852 			{
853 			if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
854 			}
855 		}
856 
857 	if (r_is_one) /* can happen only if a == 1*/
858 		{
859 		if (!BN_one(rr)) goto err;
860 		}
861 	else
862 		{
863 		if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
864 		}
865 	ret = 1;
866 err:
867 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
868 	BN_CTX_end(ctx);
869 	bn_check_top(rr);
870 	return(ret);
871 	}
872 
873 
874 /* The old fallback, simple version :-) */
875 int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
876 		const BIGNUM *m, BN_CTX *ctx)
877 	{
878 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
879 	int start=1;
880 	BIGNUM *d;
881 	/* Table of variables obtained from 'ctx' */
882 	BIGNUM *val[TABLE_SIZE];
883 
884 	if (BN_get_flags(p, BN_FLG_EXP_CONSTTIME) != 0)
885 		{
886 		/* BN_FLG_EXP_CONSTTIME only supported by BN_mod_exp_mont() */
887 		BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
888 		return -1;
889 		}
890 
891 	bits=BN_num_bits(p);
892 
893 	if (bits == 0)
894 		{
895 		ret = BN_one(r);
896 		return ret;
897 		}
898 
899 	BN_CTX_start(ctx);
900 	d = BN_CTX_get(ctx);
901 	val[0] = BN_CTX_get(ctx);
902 	if(!d || !val[0]) goto err;
903 
904 	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
905 	if (BN_is_zero(val[0]))
906 		{
907 		BN_zero(r);
908 		ret = 1;
909 		goto err;
910 		}
911 
912 	window = BN_window_bits_for_exponent_size(bits);
913 	if (window > 1)
914 		{
915 		if (!BN_mod_mul(d,val[0],val[0],m,ctx))
916 			goto err;				/* 2 */
917 		j=1<<(window-1);
918 		for (i=1; i<j; i++)
919 			{
920 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
921 					!BN_mod_mul(val[i],val[i-1],d,m,ctx))
922 				goto err;
923 			}
924 		}
925 
926 	start=1;	/* This is used to avoid multiplication etc
927 			 * when there is only the value '1' in the
928 			 * buffer. */
929 	wvalue=0;	/* The 'value' of the window */
930 	wstart=bits-1;	/* The top bit of the window */
931 	wend=0;		/* The bottom bit of the window */
932 
933 	if (!BN_one(r)) goto err;
934 
935 	for (;;)
936 		{
937 		if (BN_is_bit_set(p,wstart) == 0)
938 			{
939 			if (!start)
940 				if (!BN_mod_mul(r,r,r,m,ctx))
941 				goto err;
942 			if (wstart == 0) break;
943 			wstart--;
944 			continue;
945 			}
946 		/* We now have wstart on a 'set' bit, we now need to work out
947 		 * how bit a window to do.  To do this we need to scan
948 		 * forward until the last set bit before the end of the
949 		 * window */
950 		j=wstart;
951 		wvalue=1;
952 		wend=0;
953 		for (i=1; i<window; i++)
954 			{
955 			if (wstart-i < 0) break;
956 			if (BN_is_bit_set(p,wstart-i))
957 				{
958 				wvalue<<=(i-wend);
959 				wvalue|=1;
960 				wend=i;
961 				}
962 			}
963 
964 		/* wend is the size of the current window */
965 		j=wend+1;
966 		/* add the 'bytes above' */
967 		if (!start)
968 			for (i=0; i<j; i++)
969 				{
970 				if (!BN_mod_mul(r,r,r,m,ctx))
971 					goto err;
972 				}
973 
974 		/* wvalue will be an odd number < 2^window */
975 		if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
976 			goto err;
977 
978 		/* move the 'window' down further */
979 		wstart-=wend+1;
980 		wvalue=0;
981 		start=0;
982 		if (wstart < 0) break;
983 		}
984 	ret=1;
985 err:
986 	BN_CTX_end(ctx);
987 	bn_check_top(r);
988 	return(ret);
989 	}
990 
991