1 /* $OpenBSD: bn_exp.c,v 1.56 2025/01/22 12:53:16 tb Exp $ */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to. The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 * must display the following acknowledgement:
33 * "This product includes cryptographic software written by
34 * Eric Young (eay@cryptsoft.com)"
35 * The word 'cryptographic' can be left out if the rouines from the library
36 * being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 * the apps directory (application code) you must include an acknowledgement:
39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed. i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58 /* ====================================================================
59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
60 *
61 * Redistribution and use in source and binary forms, with or without
62 * modification, are permitted provided that the following conditions
63 * are met:
64 *
65 * 1. Redistributions of source code must retain the above copyright
66 * notice, this list of conditions and the following disclaimer.
67 *
68 * 2. Redistributions in binary form must reproduce the above copyright
69 * notice, this list of conditions and the following disclaimer in
70 * the documentation and/or other materials provided with the
71 * distribution.
72 *
73 * 3. All advertising materials mentioning features or use of this
74 * software must display the following acknowledgment:
75 * "This product includes software developed by the OpenSSL Project
76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77 *
78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79 * endorse or promote products derived from this software without
80 * prior written permission. For written permission, please contact
81 * openssl-core@openssl.org.
82 *
83 * 5. Products derived from this software may not be called "OpenSSL"
84 * nor may "OpenSSL" appear in their names without prior written
85 * permission of the OpenSSL Project.
86 *
87 * 6. Redistributions of any form whatsoever must retain the following
88 * acknowledgment:
89 * "This product includes software developed by the OpenSSL Project
90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103 * OF THE POSSIBILITY OF SUCH DAMAGE.
104 * ====================================================================
105 *
106 * This product includes cryptographic software written by Eric Young
107 * (eay@cryptsoft.com). This product includes software written by Tim
108 * Hudson (tjh@cryptsoft.com).
109 *
110 */
111
112 #include <stdlib.h>
113 #include <string.h>
114
115 #include <openssl/err.h>
116
117 #include "bn_local.h"
118 #include "constant_time.h"
119
120 /* maximum precomputation table size for *variable* sliding windows */
121 #define TABLE_SIZE 32
122
123 /* Calculates r = a^p by successive squaring of a. Not constant time. */
124 int
BN_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,BN_CTX * ctx)125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
126 {
127 BIGNUM *rr, *v;
128 int i;
129 int ret = 0;
130
131 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
132 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
133 return -1;
134 }
135
136 BN_CTX_start(ctx);
137
138 if ((v = BN_CTX_get(ctx)) == NULL)
139 goto err;
140
141 rr = r;
142 if (r == a || r == p)
143 rr = BN_CTX_get(ctx);
144 if (rr == NULL)
145 goto err;
146
147 if (!BN_one(rr))
148 goto err;
149 if (BN_is_odd(p)) {
150 if (!bn_copy(rr, a))
151 goto err;
152 }
153
154 if (!bn_copy(v, a))
155 goto err;
156
157 for (i = 1; i < BN_num_bits(p); i++) {
158 if (!BN_sqr(v, v, ctx))
159 goto err;
160 if (!BN_is_bit_set(p, i))
161 continue;
162 if (!BN_mul(rr, rr, v, ctx))
163 goto err;
164 }
165
166 if (!bn_copy(r, rr))
167 goto err;
168
169 ret = 1;
170
171 err:
172 BN_CTX_end(ctx);
173
174 return ret;
175 }
176 LCRYPTO_ALIAS(BN_exp);
177
178 /* The old fallback, simple version :-) */
179 int
BN_mod_exp_simple(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)180 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
181 BN_CTX *ctx)
182 {
183 int i, j, bits, wstart, wend, window, wvalue;
184 int start = 1;
185 BIGNUM *d, *q;
186 /* Table of variables obtained from 'ctx' */
187 BIGNUM *val[TABLE_SIZE];
188 int ret = 0;
189
190 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
191 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
192 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
193 return -1;
194 }
195
196 if (r == m) {
197 BNerror(BN_R_INVALID_ARGUMENT);
198 return 0;
199 }
200
201 bits = BN_num_bits(p);
202 if (bits == 0) {
203 /* x**0 mod 1 is still zero. */
204 if (BN_abs_is_word(m, 1)) {
205 ret = 1;
206 BN_zero(r);
207 } else
208 ret = BN_one(r);
209 return ret;
210 }
211
212 BN_CTX_start(ctx);
213 if ((d = BN_CTX_get(ctx)) == NULL)
214 goto err;
215 if ((q = BN_CTX_get(ctx)) == NULL)
216 goto err;
217 if ((val[0] = BN_CTX_get(ctx)) == NULL)
218 goto err;
219
220 if (!BN_nnmod(val[0], a, m, ctx))
221 goto err;
222 if (BN_is_zero(val[0])) {
223 BN_zero(r);
224 goto done;
225 }
226 if (!bn_copy(q, p))
227 goto err;
228
229 window = BN_window_bits_for_exponent_size(bits);
230 if (window > 1) {
231 if (!BN_mod_mul(d, val[0], val[0], m, ctx))
232 goto err;
233 j = 1 << (window - 1);
234 for (i = 1; i < j; i++) {
235 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
236 !BN_mod_mul(val[i], val[i - 1], d,m, ctx))
237 goto err;
238 }
239 }
240
241 start = 1; /* This is used to avoid multiplication etc
242 * when there is only the value '1' in the
243 * buffer. */
244 wvalue = 0; /* The 'value' of the window */
245 wstart = bits - 1; /* The top bit of the window */
246 wend = 0; /* The bottom bit of the window */
247
248 if (!BN_one(r))
249 goto err;
250
251 for (;;) {
252 if (BN_is_bit_set(q, wstart) == 0) {
253 if (!start)
254 if (!BN_mod_mul(r, r, r, m, ctx))
255 goto err;
256 if (wstart == 0)
257 break;
258 wstart--;
259 continue;
260 }
261 /* We now have wstart on a 'set' bit, we now need to work out
262 * how bit a window to do. To do this we need to scan
263 * forward until the last set bit before the end of the
264 * window */
265 j = wstart;
266 wvalue = 1;
267 wend = 0;
268 for (i = 1; i < window; i++) {
269 if (wstart - i < 0)
270 break;
271 if (BN_is_bit_set(q, wstart - i)) {
272 wvalue <<= (i - wend);
273 wvalue |= 1;
274 wend = i;
275 }
276 }
277
278 /* wend is the size of the current window */
279 j = wend + 1;
280 /* add the 'bytes above' */
281 if (!start)
282 for (i = 0; i < j; i++) {
283 if (!BN_mod_mul(r, r, r, m, ctx))
284 goto err;
285 }
286
287 /* wvalue will be an odd number < 2^window */
288 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
289 goto err;
290
291 /* move the 'window' down further */
292 wstart -= wend + 1;
293 wvalue = 0;
294 start = 0;
295 if (wstart < 0)
296 break;
297 }
298
299 done:
300 ret = 1;
301
302 err:
303 BN_CTX_end(ctx);
304
305 return ret;
306 }
307
308 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
309 * so that accessing any of these table values shows the same access pattern as far
310 * as cache lines are concerned. The following functions are used to transfer a BIGNUM
311 * from/to that table. */
312
313 static int
MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM * b,int top,unsigned char * buf,int idx,int window)314 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf,
315 int idx, int window)
316 {
317 int i, j;
318 int width = 1 << window;
319 BN_ULONG *table = (BN_ULONG *)buf;
320
321 if (top > b->top)
322 top = b->top; /* this works because 'buf' is explicitly zeroed */
323
324 for (i = 0, j = idx; i < top; i++, j += width) {
325 table[j] = b->d[i];
326 }
327
328 return 1;
329 }
330
331 static int
MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM * b,int top,unsigned char * buf,int idx,int window)332 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx,
333 int window)
334 {
335 int i, j;
336 int width = 1 << window;
337 volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
338
339 if (!bn_wexpand(b, top))
340 return 0;
341
342 if (window <= 3) {
343 for (i = 0; i < top; i++, table += width) {
344 BN_ULONG acc = 0;
345
346 for (j = 0; j < width; j++) {
347 acc |= table[j] &
348 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
349 }
350
351 b->d[i] = acc;
352 }
353 } else {
354 int xstride = 1 << (window - 2);
355 BN_ULONG y0, y1, y2, y3;
356
357 i = idx >> (window - 2); /* equivalent of idx / xstride */
358 idx &= xstride - 1; /* equivalent of idx % xstride */
359
360 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
361 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
362 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
363 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
364
365 for (i = 0; i < top; i++, table += width) {
366 BN_ULONG acc = 0;
367
368 for (j = 0; j < xstride; j++) {
369 acc |= ( (table[j + 0 * xstride] & y0) |
370 (table[j + 1 * xstride] & y1) |
371 (table[j + 2 * xstride] & y2) |
372 (table[j + 3 * xstride] & y3) )
373 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
374 }
375
376 b->d[i] = acc;
377 }
378 }
379 b->top = top;
380 bn_correct_top(b);
381 return 1;
382 }
383
384 /* Given a pointer value, compute the next address that is a cache line multiple. */
385 #define MOD_EXP_CTIME_ALIGN(x_) \
386 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
387
388 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
389 * precomputation memory layout to limit data-dependency to a minimum
390 * to protect secret exponents (cf. the hyper-threading timing attacks
391 * pointed out by Colin Percival,
392 * http://www.daemonology.net/hyperthreading-considered-harmful/)
393 */
394 int
BN_mod_exp_mont_consttime(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)395 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
396 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
397 {
398 int i, bits, ret = 0, window, wvalue;
399 int top;
400 BN_MONT_CTX *mont = NULL;
401 int numPowers;
402 unsigned char *powerbufFree = NULL;
403 int powerbufLen = 0;
404 unsigned char *powerbuf = NULL;
405 BIGNUM tmp, am;
406
407
408 if (!BN_is_odd(m)) {
409 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
410 return (0);
411 }
412
413 top = m->top;
414
415 bits = BN_num_bits(p);
416 if (bits == 0) {
417 /* x**0 mod 1 is still zero. */
418 if (BN_abs_is_word(m, 1)) {
419 ret = 1;
420 BN_zero(rr);
421 } else
422 ret = BN_one(rr);
423 return ret;
424 }
425
426 BN_CTX_start(ctx);
427
428 /*
429 * Allocate a Montgomery context if it was not supplied by the caller.
430 * If this is not done, things will break in the montgomery part.
431 */
432 if (in_mont != NULL)
433 mont = in_mont;
434 else {
435 if ((mont = BN_MONT_CTX_new()) == NULL)
436 goto err;
437 if (!BN_MONT_CTX_set(mont, m, ctx))
438 goto err;
439 }
440
441 /* Get the window size to use with size of p. */
442 window = BN_window_bits_for_ctime_exponent_size(bits);
443 #if defined(OPENSSL_BN_ASM_MONT5)
444 if (window == 6 && bits <= 1024)
445 window = 5; /* ~5% improvement of 2048-bit RSA sign */
446 #endif
447
448 /* Allocate a buffer large enough to hold all of the pre-computed
449 * powers of am, am itself and tmp.
450 */
451 numPowers = 1 << window;
452 powerbufLen = sizeof(m->d[0]) * (top * numPowers +
453 ((2*top) > numPowers ? (2*top) : numPowers));
454 if ((powerbufFree = calloc(powerbufLen +
455 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL)
456 goto err;
457 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
458
459 /* lay down tmp and am right after powers table */
460 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
461 am.d = tmp.d + top;
462 tmp.top = am.top = 0;
463 tmp.dmax = am.dmax = top;
464 tmp.neg = am.neg = 0;
465 tmp.flags = am.flags = BN_FLG_STATIC_DATA;
466
467 /* prepare a^0 in Montgomery domain */
468 #if 1
469 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
470 goto err;
471 #else
472 tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */
473 for (i = 1; i < top; i++)
474 tmp.d[i] = (~m->d[i]) & BN_MASK2;
475 tmp.top = top;
476 #endif
477
478 /* prepare a^1 in Montgomery domain */
479 if (!BN_nnmod(&am, a, m, ctx))
480 goto err;
481 if (!BN_to_montgomery(&am, &am, mont, ctx))
482 goto err;
483
484 #if defined(OPENSSL_BN_ASM_MONT5)
485 /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
486 * specifically optimization of cache-timing attack countermeasures
487 * and pre-computation optimization. */
488
489 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
490 * 512-bit RSA is hardly relevant, we omit it to spare size... */
491 if (window == 5 && top > 1) {
492 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
493 const void *table, const BN_ULONG *np,
494 const BN_ULONG *n0, int num, int power);
495 void bn_scatter5(const BN_ULONG *inp, size_t num,
496 void *table, size_t power);
497 void bn_gather5(BN_ULONG *out, size_t num,
498 void *table, size_t power);
499
500 BN_ULONG *np = mont->N.d, *n0 = mont->n0;
501
502 /* BN_to_montgomery can contaminate words above .top
503 * [in BN_DEBUG[_DEBUG] build]... */
504 for (i = am.top; i < top; i++)
505 am.d[i] = 0;
506 for (i = tmp.top; i < top; i++)
507 tmp.d[i] = 0;
508
509 bn_scatter5(tmp.d, top, powerbuf, 0);
510 bn_scatter5(am.d, am.top, powerbuf, 1);
511 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
512 bn_scatter5(tmp.d, top, powerbuf, 2);
513
514 #if 0
515 for (i = 3; i < 32; i++) {
516 /* Calculate a^i = a^(i-1) * a */
517 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
518 n0, top, i - 1);
519 bn_scatter5(tmp.d, top, powerbuf, i);
520 }
521 #else
522 /* same as above, but uses squaring for 1/2 of operations */
523 for (i = 4; i < 32; i*=2) {
524 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
525 bn_scatter5(tmp.d, top, powerbuf, i);
526 }
527 for (i = 3; i < 8; i += 2) {
528 int j;
529 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
530 n0, top, i - 1);
531 bn_scatter5(tmp.d, top, powerbuf, i);
532 for (j = 2 * i; j < 32; j *= 2) {
533 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
534 bn_scatter5(tmp.d, top, powerbuf, j);
535 }
536 }
537 for (; i < 16; i += 2) {
538 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
539 n0, top, i - 1);
540 bn_scatter5(tmp.d, top, powerbuf, i);
541 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
542 bn_scatter5(tmp.d, top, powerbuf, 2*i);
543 }
544 for (; i < 32; i += 2) {
545 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
546 n0, top, i - 1);
547 bn_scatter5(tmp.d, top, powerbuf, i);
548 }
549 #endif
550 bits--;
551 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
552 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
553 bn_gather5(tmp.d, top, powerbuf, wvalue);
554
555 /* Scan the exponent one window at a time starting from the most
556 * significant bits.
557 */
558 while (bits >= 0) {
559 for (wvalue = 0, i = 0; i < 5; i++, bits--)
560 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
561
562 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
563 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
564 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
565 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
566 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
567 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
568 }
569
570 tmp.top = top;
571 bn_correct_top(&tmp);
572 } else
573 #endif
574 {
575 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0,
576 window))
577 goto err;
578 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1,
579 window))
580 goto err;
581
582 /* If the window size is greater than 1, then calculate
583 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
584 * (even powers could instead be computed as (a^(i/2))^2
585 * to use the slight performance advantage of sqr over mul).
586 */
587 if (window > 1) {
588 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
589 goto err;
590 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf,
591 2, window))
592 goto err;
593 for (i = 3; i < numPowers; i++) {
594 /* Calculate a^i = a^(i-1) * a */
595 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp,
596 mont, ctx))
597 goto err;
598 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top,
599 powerbuf, i, window))
600 goto err;
601 }
602 }
603
604 bits--;
605 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
606 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
607 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf,
608 wvalue, window))
609 goto err;
610
611 /* Scan the exponent one window at a time starting from the most
612 * significant bits.
613 */
614 while (bits >= 0) {
615 wvalue = 0; /* The 'value' of the window */
616
617 /* Scan the window, squaring the result as we go */
618 for (i = 0; i < window; i++, bits--) {
619 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp,
620 mont, ctx))
621 goto err;
622 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
623 }
624
625 /* Fetch the appropriate pre-computed value from the pre-buf */
626 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf,
627 wvalue, window))
628 goto err;
629
630 /* Multiply the result into the intermediate result */
631 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
632 goto err;
633 }
634 }
635
636 /* Convert the final result from montgomery to standard format */
637 if (!BN_from_montgomery(rr, &tmp, mont, ctx))
638 goto err;
639 ret = 1;
640
641 err:
642 if ((in_mont == NULL) && (mont != NULL))
643 BN_MONT_CTX_free(mont);
644 freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
645 BN_CTX_end(ctx);
646 return (ret);
647 }
648 LCRYPTO_ALIAS(BN_mod_exp_mont_consttime);
649
650 static int
BN_mod_exp_mont_internal(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont,int ct)651 BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
652 BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct)
653 {
654 int i, j, bits, ret = 0, wstart, wend, window, wvalue;
655 int start = 1;
656 BIGNUM *d, *r;
657 const BIGNUM *aa;
658 /* Table of variables obtained from 'ctx' */
659 BIGNUM *val[TABLE_SIZE];
660 BN_MONT_CTX *mont = NULL;
661
662 if (ct) {
663 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
664 }
665
666
667 if (!BN_is_odd(m)) {
668 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
669 return (0);
670 }
671
672 bits = BN_num_bits(p);
673 if (bits == 0) {
674 /* x**0 mod 1 is still zero. */
675 if (BN_abs_is_word(m, 1)) {
676 ret = 1;
677 BN_zero(rr);
678 } else
679 ret = BN_one(rr);
680 return ret;
681 }
682
683 BN_CTX_start(ctx);
684 if ((d = BN_CTX_get(ctx)) == NULL)
685 goto err;
686 if ((r = BN_CTX_get(ctx)) == NULL)
687 goto err;
688 if ((val[0] = BN_CTX_get(ctx)) == NULL)
689 goto err;
690
691 /* If this is not done, things will break in the montgomery
692 * part */
693
694 if (in_mont != NULL)
695 mont = in_mont;
696 else {
697 if ((mont = BN_MONT_CTX_new()) == NULL)
698 goto err;
699 if (!BN_MONT_CTX_set(mont, m, ctx))
700 goto err;
701 }
702
703 if (!BN_nnmod(val[0], a,m, ctx))
704 goto err;
705 aa = val[0];
706 if (BN_is_zero(aa)) {
707 BN_zero(rr);
708 ret = 1;
709 goto err;
710 }
711 if (!BN_to_montgomery(val[0], aa, mont, ctx))
712 goto err;
713
714 window = BN_window_bits_for_exponent_size(bits);
715 if (window > 1) {
716 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
717 goto err;
718 j = 1 << (window - 1);
719 for (i = 1; i < j; i++) {
720 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
721 !BN_mod_mul_montgomery(val[i], val[i - 1],
722 d, mont, ctx))
723 goto err;
724 }
725 }
726
727 start = 1; /* This is used to avoid multiplication etc
728 * when there is only the value '1' in the
729 * buffer. */
730 wvalue = 0; /* The 'value' of the window */
731 wstart = bits - 1; /* The top bit of the window */
732 wend = 0; /* The bottom bit of the window */
733
734 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
735 goto err;
736 for (;;) {
737 if (BN_is_bit_set(p, wstart) == 0) {
738 if (!start) {
739 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
740 goto err;
741 }
742 if (wstart == 0)
743 break;
744 wstart--;
745 continue;
746 }
747 /* We now have wstart on a 'set' bit, we now need to work out
748 * how bit a window to do. To do this we need to scan
749 * forward until the last set bit before the end of the
750 * window */
751 j = wstart;
752 wvalue = 1;
753 wend = 0;
754 for (i = 1; i < window; i++) {
755 if (wstart - i < 0)
756 break;
757 if (BN_is_bit_set(p, wstart - i)) {
758 wvalue <<= (i - wend);
759 wvalue |= 1;
760 wend = i;
761 }
762 }
763
764 /* wend is the size of the current window */
765 j = wend + 1;
766 /* add the 'bytes above' */
767 if (!start)
768 for (i = 0; i < j; i++) {
769 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
770 goto err;
771 }
772
773 /* wvalue will be an odd number < 2^window */
774 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
775 goto err;
776
777 /* move the 'window' down further */
778 wstart -= wend + 1;
779 wvalue = 0;
780 start = 0;
781 if (wstart < 0)
782 break;
783 }
784 if (!BN_from_montgomery(rr, r,mont, ctx))
785 goto err;
786 ret = 1;
787
788 err:
789 if ((in_mont == NULL) && (mont != NULL))
790 BN_MONT_CTX_free(mont);
791 BN_CTX_end(ctx);
792 return (ret);
793 }
794
795 int
BN_mod_exp_mont(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)796 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
797 BN_CTX *ctx, BN_MONT_CTX *in_mont)
798 {
799 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont,
800 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
801 }
802 LCRYPTO_ALIAS(BN_mod_exp_mont);
803
804 int
BN_mod_exp_mont_ct(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)805 BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
806 BN_CTX *ctx, BN_MONT_CTX *in_mont)
807 {
808 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1);
809 }
810
811 int
BN_mod_exp_mont_nonct(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)812 BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
813 BN_CTX *ctx, BN_MONT_CTX *in_mont)
814 {
815 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0);
816 }
817
818 int
BN_mod_exp_mont_word(BIGNUM * rr,BN_ULONG a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)819 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m,
820 BN_CTX *ctx, BN_MONT_CTX *in_mont)
821 {
822 BN_MONT_CTX *mont = NULL;
823 int b, bits, ret = 0;
824 int r_is_one;
825 BN_ULONG w, next_w;
826 BIGNUM *d, *r, *t;
827 BIGNUM *swap_tmp;
828
829 #define BN_MOD_MUL_WORD(r, w, m) \
830 (BN_mul_word(r, (w)) && \
831 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
832 (BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
833 /* BN_MOD_MUL_WORD is only used with 'w' large,
834 * so the BN_ucmp test is probably more overhead
835 * than always using BN_mod (which uses bn_copy if
836 * a similar test returns true). */
837 /* We can use BN_mod and do not need BN_nnmod because our
838 * accumulator is never negative (the result of BN_mod does
839 * not depend on the sign of the modulus).
840 */
841 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
842 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
843
844 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
845 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
846 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
847 return -1;
848 }
849
850
851 if (!BN_is_odd(m)) {
852 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
853 return (0);
854 }
855 if (m->top == 1)
856 a %= m->d[0]; /* make sure that 'a' is reduced */
857
858 bits = BN_num_bits(p);
859 if (bits == 0) {
860 /* x**0 mod 1 is still zero. */
861 if (BN_abs_is_word(m, 1)) {
862 ret = 1;
863 BN_zero(rr);
864 } else
865 ret = BN_one(rr);
866 return ret;
867 }
868 if (a == 0) {
869 BN_zero(rr);
870 ret = 1;
871 return ret;
872 }
873
874 BN_CTX_start(ctx);
875 if ((d = BN_CTX_get(ctx)) == NULL)
876 goto err;
877 if ((r = BN_CTX_get(ctx)) == NULL)
878 goto err;
879 if ((t = BN_CTX_get(ctx)) == NULL)
880 goto err;
881
882 if (in_mont != NULL)
883 mont = in_mont;
884 else {
885 if ((mont = BN_MONT_CTX_new()) == NULL)
886 goto err;
887 if (!BN_MONT_CTX_set(mont, m, ctx))
888 goto err;
889 }
890
891 r_is_one = 1; /* except for Montgomery factor */
892
893 /* bits-1 >= 0 */
894
895 /* The result is accumulated in the product r*w. */
896 w = a; /* bit 'bits-1' of 'p' is always set */
897 for (b = bits - 2; b >= 0; b--) {
898 /* First, square r*w. */
899 next_w = w * w;
900 if ((next_w / w) != w) /* overflow */
901 {
902 if (r_is_one) {
903 if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
904 goto err;
905 r_is_one = 0;
906 } else {
907 if (!BN_MOD_MUL_WORD(r, w, m))
908 goto err;
909 }
910 next_w = 1;
911 }
912 w = next_w;
913 if (!r_is_one) {
914 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
915 goto err;
916 }
917
918 /* Second, multiply r*w by 'a' if exponent bit is set. */
919 if (BN_is_bit_set(p, b)) {
920 next_w = w * a;
921 if ((next_w / a) != w) /* overflow */
922 {
923 if (r_is_one) {
924 if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
925 goto err;
926 r_is_one = 0;
927 } else {
928 if (!BN_MOD_MUL_WORD(r, w, m))
929 goto err;
930 }
931 next_w = a;
932 }
933 w = next_w;
934 }
935 }
936
937 /* Finally, set r:=r*w. */
938 if (w != 1) {
939 if (r_is_one) {
940 if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
941 goto err;
942 r_is_one = 0;
943 } else {
944 if (!BN_MOD_MUL_WORD(r, w, m))
945 goto err;
946 }
947 }
948
949 if (r_is_one) /* can happen only if a == 1*/
950 {
951 if (!BN_one(rr))
952 goto err;
953 } else {
954 if (!BN_from_montgomery(rr, r, mont, ctx))
955 goto err;
956 }
957 ret = 1;
958
959 err:
960 if ((in_mont == NULL) && (mont != NULL))
961 BN_MONT_CTX_free(mont);
962 BN_CTX_end(ctx);
963 return (ret);
964 }
965
966 int
BN_mod_exp_recp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)967 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
968 BN_CTX *ctx)
969 {
970 int i, j, bits, wstart, wend, window, wvalue;
971 int start = 1;
972 BIGNUM *aa, *q;
973 /* Table of variables obtained from 'ctx' */
974 BIGNUM *val[TABLE_SIZE];
975 BN_RECP_CTX *recp = NULL;
976 int ret = 0;
977
978 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
979 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
980 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
981 return -1;
982 }
983
984 bits = BN_num_bits(p);
985 if (bits == 0) {
986 /* x**0 mod 1 is still zero. */
987 if (BN_abs_is_word(m, 1)) {
988 ret = 1;
989 BN_zero(r);
990 } else
991 ret = BN_one(r);
992 return ret;
993 }
994
995 BN_CTX_start(ctx);
996 if ((aa = BN_CTX_get(ctx)) == NULL)
997 goto err;
998 if ((q = BN_CTX_get(ctx)) == NULL)
999 goto err;
1000 if ((val[0] = BN_CTX_get(ctx)) == NULL)
1001 goto err;
1002
1003 if ((recp = BN_RECP_CTX_create(m)) == NULL)
1004 goto err;
1005
1006 if (!BN_nnmod(val[0], a, m, ctx))
1007 goto err;
1008 if (BN_is_zero(val[0])) {
1009 BN_zero(r);
1010 goto done;
1011 }
1012 if (!bn_copy(q, p))
1013 goto err;
1014
1015 window = BN_window_bits_for_exponent_size(bits);
1016 if (window > 1) {
1017 if (!BN_mod_sqr_reciprocal(aa, val[0], recp, ctx))
1018 goto err;
1019 j = 1 << (window - 1);
1020 for (i = 1; i < j; i++) {
1021 if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1022 !BN_mod_mul_reciprocal(val[i], val[i - 1],
1023 aa, recp, ctx))
1024 goto err;
1025 }
1026 }
1027
1028 start = 1; /* This is used to avoid multiplication etc
1029 * when there is only the value '1' in the
1030 * buffer. */
1031 wvalue = 0; /* The 'value' of the window */
1032 wstart = bits - 1; /* The top bit of the window */
1033 wend = 0; /* The bottom bit of the window */
1034
1035 if (!BN_one(r))
1036 goto err;
1037
1038 for (;;) {
1039 if (BN_is_bit_set(q, wstart) == 0) {
1040 if (!start)
1041 if (!BN_mod_sqr_reciprocal(r, r, recp, ctx))
1042 goto err;
1043 if (wstart == 0)
1044 break;
1045 wstart--;
1046 continue;
1047 }
1048 /* We now have wstart on a 'set' bit, we now need to work out
1049 * how bit a window to do. To do this we need to scan
1050 * forward until the last set bit before the end of the
1051 * window */
1052 j = wstart;
1053 wvalue = 1;
1054 wend = 0;
1055 for (i = 1; i < window; i++) {
1056 if (wstart - i < 0)
1057 break;
1058 if (BN_is_bit_set(q, wstart - i)) {
1059 wvalue <<= (i - wend);
1060 wvalue |= 1;
1061 wend = i;
1062 }
1063 }
1064
1065 /* wend is the size of the current window */
1066 j = wend + 1;
1067 /* add the 'bytes above' */
1068 if (!start)
1069 for (i = 0; i < j; i++) {
1070 if (!BN_mod_sqr_reciprocal(r, r, recp, ctx))
1071 goto err;
1072 }
1073
1074 /* wvalue will be an odd number < 2^window */
1075 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], recp, ctx))
1076 goto err;
1077
1078 /* move the 'window' down further */
1079 wstart -= wend + 1;
1080 wvalue = 0;
1081 start = 0;
1082 if (wstart < 0)
1083 break;
1084 }
1085
1086 done:
1087 ret = 1;
1088
1089 err:
1090 BN_CTX_end(ctx);
1091 BN_RECP_CTX_free(recp);
1092
1093 return ret;
1094 }
1095
1096 static int
BN_mod_exp_internal(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,int ct)1097 BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1098 BN_CTX *ctx, int ct)
1099 {
1100 int ret;
1101
1102
1103 /* For even modulus m = 2^k*m_odd, it might make sense to compute
1104 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
1105 * exponentiation for the odd part), using appropriate exponent
1106 * reductions, and combine the results using the CRT.
1107 *
1108 * For now, we use Montgomery only if the modulus is odd; otherwise,
1109 * exponentiation using the reciprocal-based quick remaindering
1110 * algorithm is used.
1111 *
1112 * (Timing obtained with expspeed.c [computations a^p mod m
1113 * where a, p, m are of the same length: 256, 512, 1024, 2048,
1114 * 4096, 8192 bits], compared to the running time of the
1115 * standard algorithm:
1116 *
1117 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
1118 * 55 .. 77 % [UltraSparc processor, but
1119 * debug-solaris-sparcv8-gcc conf.]
1120 *
1121 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
1122 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
1123 *
1124 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
1125 * at 2048 and more bits, but at 512 and 1024 bits, it was
1126 * slower even than the standard algorithm!
1127 *
1128 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
1129 * should be obtained when the new Montgomery reduction code
1130 * has been integrated into OpenSSL.)
1131 */
1132
1133 if (BN_is_odd(m)) {
1134 if (a->top == 1 && !a->neg && !ct) {
1135 BN_ULONG A = a->d[0];
1136 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
1137 } else
1138 ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL);
1139 } else {
1140 ret = BN_mod_exp_recp(r, a,p, m, ctx);
1141 }
1142
1143 return (ret);
1144 }
1145
1146 int
BN_mod_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)1147 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1148 BN_CTX *ctx)
1149 {
1150 return BN_mod_exp_internal(r, a, p, m, ctx,
1151 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
1152 }
1153 LCRYPTO_ALIAS(BN_mod_exp);
1154
1155 int
BN_mod_exp_ct(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)1156 BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1157 BN_CTX *ctx)
1158 {
1159 return BN_mod_exp_internal(r, a, p, m, ctx, 1);
1160 }
1161
1162 int
BN_mod_exp_nonct(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)1163 BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1164 BN_CTX *ctx)
1165 {
1166 return BN_mod_exp_internal(r, a, p, m, ctx, 0);
1167 }
1168
1169 int
BN_mod_exp2_mont(BIGNUM * rr,const BIGNUM * a1,const BIGNUM * p1,const BIGNUM * a2,const BIGNUM * p2,const BIGNUM * m,BN_CTX * ctx,BN_MONT_CTX * in_mont)1170 BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
1171 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, BN_CTX *ctx,
1172 BN_MONT_CTX *in_mont)
1173 {
1174 int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, wvalue1, wvalue2;
1175 int r_is_one = 1;
1176 BIGNUM *d, *r;
1177 const BIGNUM *a_mod_m;
1178 /* Tables of variables obtained from 'ctx' */
1179 BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
1180 BN_MONT_CTX *mont = NULL;
1181
1182
1183 if (!BN_is_odd(m)) {
1184 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
1185 return (0);
1186 }
1187 bits1 = BN_num_bits(p1);
1188 bits2 = BN_num_bits(p2);
1189 if ((bits1 == 0) && (bits2 == 0)) {
1190 ret = BN_one(rr);
1191 return ret;
1192 }
1193
1194 bits = (bits1 > bits2) ? bits1 : bits2;
1195
1196 BN_CTX_start(ctx);
1197 if ((d = BN_CTX_get(ctx)) == NULL)
1198 goto err;
1199 if ((r = BN_CTX_get(ctx)) == NULL)
1200 goto err;
1201 if ((val1[0] = BN_CTX_get(ctx)) == NULL)
1202 goto err;
1203 if ((val2[0] = BN_CTX_get(ctx)) == NULL)
1204 goto err;
1205
1206 if (in_mont != NULL)
1207 mont = in_mont;
1208 else {
1209 if ((mont = BN_MONT_CTX_new()) == NULL)
1210 goto err;
1211 if (!BN_MONT_CTX_set(mont, m, ctx))
1212 goto err;
1213 }
1214
1215 window1 = BN_window_bits_for_exponent_size(bits1);
1216 window2 = BN_window_bits_for_exponent_size(bits2);
1217
1218 /*
1219 * Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
1220 */
1221 if (!BN_nnmod(val1[0], a1, m, ctx))
1222 goto err;
1223 a_mod_m = val1[0];
1224 if (BN_is_zero(a_mod_m)) {
1225 BN_zero(rr);
1226 ret = 1;
1227 goto err;
1228 }
1229
1230 if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx))
1231 goto err;
1232 if (window1 > 1) {
1233 if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx))
1234 goto err;
1235
1236 j = 1 << (window1 - 1);
1237 for (i = 1; i < j; i++) {
1238 if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
1239 !BN_mod_mul_montgomery(val1[i], val1[i - 1],
1240 d, mont, ctx))
1241 goto err;
1242 }
1243 }
1244
1245
1246 /*
1247 * Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
1248 */
1249 if (!BN_nnmod(val2[0], a2, m, ctx))
1250 goto err;
1251 a_mod_m = val2[0];
1252 if (BN_is_zero(a_mod_m)) {
1253 BN_zero(rr);
1254 ret = 1;
1255 goto err;
1256 }
1257 if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx))
1258 goto err;
1259 if (window2 > 1) {
1260 if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx))
1261 goto err;
1262
1263 j = 1 << (window2 - 1);
1264 for (i = 1; i < j; i++) {
1265 if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
1266 !BN_mod_mul_montgomery(val2[i], val2[i - 1],
1267 d, mont, ctx))
1268 goto err;
1269 }
1270 }
1271
1272
1273 /* Now compute the power product, using independent windows. */
1274 r_is_one = 1;
1275 wvalue1 = 0; /* The 'value' of the first window */
1276 wvalue2 = 0; /* The 'value' of the second window */
1277 wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */
1278 wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */
1279
1280 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
1281 goto err;
1282 for (b = bits - 1; b >= 0; b--) {
1283 if (!r_is_one) {
1284 if (!BN_mod_mul_montgomery(r, r,r, mont, ctx))
1285 goto err;
1286 }
1287
1288 if (!wvalue1)
1289 if (BN_is_bit_set(p1, b)) {
1290 /* consider bits b-window1+1 .. b for this window */
1291 i = b - window1 + 1;
1292 while (!BN_is_bit_set(p1, i)) /* works for i<0 */
1293 i++;
1294 wpos1 = i;
1295 wvalue1 = 1;
1296 for (i = b - 1; i >= wpos1; i--) {
1297 wvalue1 <<= 1;
1298 if (BN_is_bit_set(p1, i))
1299 wvalue1++;
1300 }
1301 }
1302
1303 if (!wvalue2)
1304 if (BN_is_bit_set(p2, b)) {
1305 /* consider bits b-window2+1 .. b for this window */
1306 i = b - window2 + 1;
1307 while (!BN_is_bit_set(p2, i))
1308 i++;
1309 wpos2 = i;
1310 wvalue2 = 1;
1311 for (i = b - 1; i >= wpos2; i--) {
1312 wvalue2 <<= 1;
1313 if (BN_is_bit_set(p2, i))
1314 wvalue2++;
1315 }
1316 }
1317
1318 if (wvalue1 && b == wpos1) {
1319 /* wvalue1 is odd and < 2^window1 */
1320 if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1],
1321 mont, ctx))
1322 goto err;
1323 wvalue1 = 0;
1324 r_is_one = 0;
1325 }
1326
1327 if (wvalue2 && b == wpos2) {
1328 /* wvalue2 is odd and < 2^window2 */
1329 if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1],
1330 mont, ctx))
1331 goto err;
1332 wvalue2 = 0;
1333 r_is_one = 0;
1334 }
1335 }
1336 if (!BN_from_montgomery(rr, r,mont, ctx))
1337 goto err;
1338 ret = 1;
1339
1340 err:
1341 if ((in_mont == NULL) && (mont != NULL))
1342 BN_MONT_CTX_free(mont);
1343 BN_CTX_end(ctx);
1344 return (ret);
1345 }
1346