1 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
2  * All rights reserved.
3  *
4  * This package is an SSL implementation written
5  * by Eric Young (eay@cryptsoft.com).
6  * The implementation was written so as to conform with Netscapes SSL.
7  *
8  * This library is free for commercial and non-commercial use as long as
9  * the following conditions are aheared to.  The following conditions
10  * apply to all code found in this distribution, be it the RC4, RSA,
11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
12  * included with this distribution is covered by the same copyright terms
13  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
14  *
15  * Copyright remains Eric Young's, and as such any Copyright notices in
16  * the code are not to be removed.
17  * If this package is used in a product, Eric Young should be given attribution
18  * as the author of the parts of the library used.
19  * This can be in the form of a textual message at program startup or
20  * in documentation (online or textual) provided with the package.
21  *
22  * Redistribution and use in source and binary forms, with or without
23  * modification, are permitted provided that the following conditions
24  * are met:
25  * 1. Redistributions of source code must retain the copyright
26  *    notice, this list of conditions and the following disclaimer.
27  * 2. Redistributions in binary form must reproduce the above copyright
28  *    notice, this list of conditions and the following disclaimer in the
29  *    documentation and/or other materials provided with the distribution.
30  * 3. All advertising materials mentioning features or use of this software
31  *    must display the following acknowledgement:
32  *    "This product includes cryptographic software written by
33  *     Eric Young (eay@cryptsoft.com)"
34  *    The word 'cryptographic' can be left out if the rouines from the library
35  *    being used are not cryptographic related :-).
36  * 4. If you include any Windows specific code (or a derivative thereof) from
37  *    the apps directory (application code) you must include an acknowledgement:
38  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
39  *
40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  * The licence and distribution terms for any publically available version or
53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
54  * copied and put under another distribution licence
55  * [including the GNU Public Licence.]
56  */
57 /* ====================================================================
58  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
59  *
60  * Redistribution and use in source and binary forms, with or without
61  * modification, are permitted provided that the following conditions
62  * are met:
63  *
64  * 1. Redistributions of source code must retain the above copyright
65  *    notice, this list of conditions and the following disclaimer.
66  *
67  * 2. Redistributions in binary form must reproduce the above copyright
68  *    notice, this list of conditions and the following disclaimer in
69  *    the documentation and/or other materials provided with the
70  *    distribution.
71  *
72  * 3. All advertising materials mentioning features or use of this
73  *    software must display the following acknowledgment:
74  *    "This product includes software developed by the OpenSSL Project
75  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
76  *
77  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
78  *    endorse or promote products derived from this software without
79  *    prior written permission. For written permission, please contact
80  *    openssl-core@openssl.org.
81  *
82  * 5. Products derived from this software may not be called "OpenSSL"
83  *    nor may "OpenSSL" appear in their names without prior written
84  *    permission of the OpenSSL Project.
85  *
86  * 6. Redistributions of any form whatsoever must retain the following
87  *    acknowledgment:
88  *    "This product includes software developed by the OpenSSL Project
89  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
90  *
91  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
92  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
94  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
95  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
96  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
97  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
98  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
100  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
101  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
102  * OF THE POSSIBILITY OF SUCH DAMAGE.
103  * ====================================================================
104  *
105  * This product includes cryptographic software written by Eric Young
106  * (eay@cryptsoft.com).  This product includes software written by Tim
107  * Hudson (tjh@cryptsoft.com). */
108 
109 #include <openssl/bn.h>
110 
111 #include <assert.h>
112 #include <stdlib.h>
113 #include <string.h>
114 
115 #include <openssl/cpu.h>
116 #include <openssl/err.h>
117 #include <openssl/mem.h>
118 
119 #include "internal.h"
120 #include "rsaz_exp.h"
121 
122 
BN_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,BN_CTX * ctx)123 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
124   int i, bits, ret = 0;
125   BIGNUM *v, *rr;
126 
127   BN_CTX_start(ctx);
128   if (r == a || r == p) {
129     rr = BN_CTX_get(ctx);
130   } else {
131     rr = r;
132   }
133 
134   v = BN_CTX_get(ctx);
135   if (rr == NULL || v == NULL) {
136     goto err;
137   }
138 
139   if (BN_copy(v, a) == NULL) {
140     goto err;
141   }
142   bits = BN_num_bits(p);
143 
144   if (BN_is_odd(p)) {
145     if (BN_copy(rr, a) == NULL) {
146       goto err;
147     }
148   } else {
149     if (!BN_one(rr)) {
150       goto err;
151     }
152   }
153 
154   for (i = 1; i < bits; i++) {
155     if (!BN_sqr(v, v, ctx)) {
156       goto err;
157     }
158     if (BN_is_bit_set(p, i)) {
159       if (!BN_mul(rr, rr, v, ctx)) {
160         goto err;
161       }
162     }
163   }
164 
165   if (r != rr && !BN_copy(r, rr)) {
166     goto err;
167   }
168   ret = 1;
169 
170 err:
171   BN_CTX_end(ctx);
172   return ret;
173 }
174 
175 typedef struct bn_recp_ctx_st {
176   BIGNUM N;   // the divisor
177   BIGNUM Nr;  // the reciprocal
178   int num_bits;
179   int shift;
180   int flags;
181 } BN_RECP_CTX;
182 
BN_RECP_CTX_init(BN_RECP_CTX * recp)183 static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
184   BN_init(&recp->N);
185   BN_init(&recp->Nr);
186   recp->num_bits = 0;
187   recp->shift = 0;
188   recp->flags = 0;
189 }
190 
BN_RECP_CTX_free(BN_RECP_CTX * recp)191 static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
192   if (recp == NULL) {
193     return;
194   }
195 
196   BN_free(&recp->N);
197   BN_free(&recp->Nr);
198 }
199 
BN_RECP_CTX_set(BN_RECP_CTX * recp,const BIGNUM * d,BN_CTX * ctx)200 static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
201   if (!BN_copy(&(recp->N), d)) {
202     return 0;
203   }
204   BN_zero(&recp->Nr);
205   recp->num_bits = BN_num_bits(d);
206   recp->shift = 0;
207 
208   return 1;
209 }
210 
211 // len is the expected size of the result We actually calculate with an extra
212 // word of precision, so we can do faster division if the remainder is not
213 // required.
214 // r := 2^len / m
BN_reciprocal(BIGNUM * r,const BIGNUM * m,int len,BN_CTX * ctx)215 static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
216   int ret = -1;
217   BIGNUM *t;
218 
219   BN_CTX_start(ctx);
220   t = BN_CTX_get(ctx);
221   if (t == NULL) {
222     goto err;
223   }
224 
225   if (!BN_set_bit(t, len)) {
226     goto err;
227   }
228 
229   if (!BN_div(r, NULL, t, m, ctx)) {
230     goto err;
231   }
232 
233   ret = len;
234 
235 err:
236   BN_CTX_end(ctx);
237   return ret;
238 }
239 
BN_div_recp(BIGNUM * dv,BIGNUM * rem,const BIGNUM * m,BN_RECP_CTX * recp,BN_CTX * ctx)240 static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
241                        BN_RECP_CTX *recp, BN_CTX *ctx) {
242   int i, j, ret = 0;
243   BIGNUM *a, *b, *d, *r;
244 
245   BN_CTX_start(ctx);
246   a = BN_CTX_get(ctx);
247   b = BN_CTX_get(ctx);
248   if (dv != NULL) {
249     d = dv;
250   } else {
251     d = BN_CTX_get(ctx);
252   }
253 
254   if (rem != NULL) {
255     r = rem;
256   } else {
257     r = BN_CTX_get(ctx);
258   }
259 
260   if (a == NULL || b == NULL || d == NULL || r == NULL) {
261     goto err;
262   }
263 
264   if (BN_ucmp(m, &recp->N) < 0) {
265     BN_zero(d);
266     if (!BN_copy(r, m)) {
267       goto err;
268     }
269     BN_CTX_end(ctx);
270     return 1;
271   }
272 
273   // We want the remainder
274   // Given input of ABCDEF / ab
275   // we need multiply ABCDEF by 3 digests of the reciprocal of ab
276 
277   // i := max(BN_num_bits(m), 2*BN_num_bits(N))
278   i = BN_num_bits(m);
279   j = recp->num_bits << 1;
280   if (j > i) {
281     i = j;
282   }
283 
284   // Nr := round(2^i / N)
285   if (i != recp->shift) {
286     recp->shift =
287         BN_reciprocal(&(recp->Nr), &(recp->N), i,
288                       ctx);  // BN_reciprocal returns i, or -1 for an error
289   }
290 
291   if (recp->shift == -1) {
292     goto err;
293   }
294 
295   // d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
296   // BN_num_bits(N)))|
297   //    = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
298   // BN_num_bits(N)))|
299   //   <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
300   //    = |m/N|
301   if (!BN_rshift(a, m, recp->num_bits)) {
302     goto err;
303   }
304   if (!BN_mul(b, a, &(recp->Nr), ctx)) {
305     goto err;
306   }
307   if (!BN_rshift(d, b, i - recp->num_bits)) {
308     goto err;
309   }
310   d->neg = 0;
311 
312   if (!BN_mul(b, &(recp->N), d, ctx)) {
313     goto err;
314   }
315   if (!BN_usub(r, m, b)) {
316     goto err;
317   }
318   r->neg = 0;
319 
320   j = 0;
321   while (BN_ucmp(r, &(recp->N)) >= 0) {
322     if (j++ > 2) {
323       OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL);
324       goto err;
325     }
326     if (!BN_usub(r, r, &(recp->N))) {
327       goto err;
328     }
329     if (!BN_add_word(d, 1)) {
330       goto err;
331     }
332   }
333 
334   r->neg = BN_is_zero(r) ? 0 : m->neg;
335   d->neg = m->neg ^ recp->N.neg;
336   ret = 1;
337 
338 err:
339   BN_CTX_end(ctx);
340   return ret;
341 }
342 
BN_mod_mul_reciprocal(BIGNUM * r,const BIGNUM * x,const BIGNUM * y,BN_RECP_CTX * recp,BN_CTX * ctx)343 static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
344                                  BN_RECP_CTX *recp, BN_CTX *ctx) {
345   int ret = 0;
346   BIGNUM *a;
347   const BIGNUM *ca;
348 
349   BN_CTX_start(ctx);
350   a = BN_CTX_get(ctx);
351   if (a == NULL) {
352     goto err;
353   }
354 
355   if (y != NULL) {
356     if (x == y) {
357       if (!BN_sqr(a, x, ctx)) {
358         goto err;
359       }
360     } else {
361       if (!BN_mul(a, x, y, ctx)) {
362         goto err;
363       }
364     }
365     ca = a;
366   } else {
367     ca = x;  // Just do the mod
368   }
369 
370   ret = BN_div_recp(NULL, r, ca, recp, ctx);
371 
372 err:
373   BN_CTX_end(ctx);
374   return ret;
375 }
376 
377 // BN_window_bits_for_exponent_size returns sliding window size for mod_exp with
378 // a |b| bit exponent.
379 //
380 // For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
381 // multiplications is a constant plus on average
382 //
383 //    2^(w-1) + (b-w)/(w+1);
384 //
385 // here 2^(w-1)  is for precomputing the table (we actually need entries only
386 // for windows that have the lowest bit set), and (b-w)/(w+1)  is an
387 // approximation for the expected number of w-bit windows, not counting the
388 // first one.
389 //
390 // Thus we should use
391 //
392 //    w >= 6  if        b > 671
393 //     w = 5  if  671 > b > 239
394 //     w = 4  if  239 > b >  79
395 //     w = 3  if   79 > b >  23
396 //    w <= 2  if   23 > b
397 //
398 // (with draws in between).  Very small exponents are often selected
399 // with low Hamming weight, so we use  w = 1  for b <= 23.
BN_window_bits_for_exponent_size(int b)400 static int BN_window_bits_for_exponent_size(int b) {
401   if (b > 671) {
402     return 6;
403   }
404   if (b > 239) {
405     return 5;
406   }
407   if (b > 79) {
408     return 4;
409   }
410   if (b > 23) {
411     return 3;
412   }
413   return 1;
414 }
415 
416 // TABLE_SIZE is the maximum precomputation table size for *variable* sliding
417 // windows. This must be 2^(max_window - 1), where max_window is the largest
418 // value returned from |BN_window_bits_for_exponent_size|.
419 #define TABLE_SIZE 32
420 
421 // TABLE_BITS_SMALL is the smallest value returned from
422 // |BN_window_bits_for_exponent_size| when |b| is at most |BN_BITS2| *
423 // |BN_SMALL_MAX_WORDS| words.
424 #define TABLE_BITS_SMALL 5
425 
426 // TABLE_SIZE_SMALL is the same as |TABLE_SIZE|, but when |b| is at most
427 // |BN_BITS2| * |BN_SMALL_MAX_WORDS|.
428 #define TABLE_SIZE_SMALL (1 << (TABLE_BITS_SMALL - 1))
429 
mod_exp_recp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)430 static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
431                         const BIGNUM *m, BN_CTX *ctx) {
432   int i, j, ret = 0, wstart, window;
433   int start = 1;
434   BIGNUM *aa;
435   // Table of variables obtained from 'ctx'
436   BIGNUM *val[TABLE_SIZE];
437   BN_RECP_CTX recp;
438 
439   // This function is only called on even moduli.
440   assert(!BN_is_odd(m));
441 
442   int bits = BN_num_bits(p);
443   if (bits == 0) {
444     return BN_one(r);
445   }
446 
447   BN_CTX_start(ctx);
448   aa = BN_CTX_get(ctx);
449   val[0] = BN_CTX_get(ctx);
450   if (!aa || !val[0]) {
451     goto err;
452   }
453 
454   BN_RECP_CTX_init(&recp);
455   if (m->neg) {
456     // ignore sign of 'm'
457     if (!BN_copy(aa, m)) {
458       goto err;
459     }
460     aa->neg = 0;
461     if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
462       goto err;
463     }
464   } else {
465     if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
466       goto err;
467     }
468   }
469 
470   if (!BN_nnmod(val[0], a, m, ctx)) {
471     goto err;  // 1
472   }
473   if (BN_is_zero(val[0])) {
474     BN_zero(r);
475     ret = 1;
476     goto err;
477   }
478 
479   window = BN_window_bits_for_exponent_size(bits);
480   if (window > 1) {
481     if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
482       goto err;  // 2
483     }
484     j = 1 << (window - 1);
485     for (i = 1; i < j; i++) {
486       if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
487           !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
488         goto err;
489       }
490     }
491   }
492 
493   start = 1;  // This is used to avoid multiplication etc
494               // when there is only the value '1' in the
495               // buffer.
496   wstart = bits - 1;  // The top bit of the window
497 
498   if (!BN_one(r)) {
499     goto err;
500   }
501 
502   for (;;) {
503     int wvalue;  // The 'value' of the window
504     int wend;  // The bottom bit of the window
505 
506     if (!BN_is_bit_set(p, wstart)) {
507       if (!start) {
508         if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
509           goto err;
510         }
511       }
512       if (wstart == 0) {
513         break;
514       }
515       wstart--;
516       continue;
517     }
518 
519     // We now have wstart on a 'set' bit, we now need to work out
520     // how bit a window to do.  To do this we need to scan
521     // forward until the last set bit before the end of the
522     // window
523     wvalue = 1;
524     wend = 0;
525     for (i = 1; i < window; i++) {
526       if (wstart - i < 0) {
527         break;
528       }
529       if (BN_is_bit_set(p, wstart - i)) {
530         wvalue <<= (i - wend);
531         wvalue |= 1;
532         wend = i;
533       }
534     }
535 
536     // wend is the size of the current window
537     j = wend + 1;
538     // add the 'bytes above'
539     if (!start) {
540       for (i = 0; i < j; i++) {
541         if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
542           goto err;
543         }
544       }
545     }
546 
547     // wvalue will be an odd number < 2^window
548     if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
549       goto err;
550     }
551 
552     // move the 'window' down further
553     wstart -= wend + 1;
554     start = 0;
555     if (wstart < 0) {
556       break;
557     }
558   }
559   ret = 1;
560 
561 err:
562   BN_CTX_end(ctx);
563   BN_RECP_CTX_free(&recp);
564   return ret;
565 }
566 
BN_mod_exp(BIGNUM * r,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx)567 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
568                BN_CTX *ctx) {
569   if (m->neg) {
570     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
571     return 0;
572   }
573   if (a->neg || BN_ucmp(a, m) >= 0) {
574     if (!BN_nnmod(r, a, m, ctx)) {
575       return 0;
576     }
577     a = r;
578   }
579 
580   if (BN_is_odd(m)) {
581     return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
582   }
583 
584   return mod_exp_recp(r, a, p, m, ctx);
585 }
586 
BN_mod_exp_mont(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,const BN_MONT_CTX * mont)587 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
588                     const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) {
589   if (!BN_is_odd(m)) {
590     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
591     return 0;
592   }
593   if (m->neg) {
594     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
595     return 0;
596   }
597   if (a->neg || BN_ucmp(a, m) >= 0) {
598     OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
599     return 0;
600   }
601 
602   int bits = BN_num_bits(p);
603   if (bits == 0) {
604     // x**0 mod 1 is still zero.
605     if (BN_abs_is_word(m, 1)) {
606       BN_zero(rr);
607       return 1;
608     }
609     return BN_one(rr);
610   }
611 
612   int ret = 0;
613   BIGNUM *val[TABLE_SIZE];
614   BN_MONT_CTX *new_mont = NULL;
615 
616   BN_CTX_start(ctx);
617   BIGNUM *r = BN_CTX_get(ctx);
618   val[0] = BN_CTX_get(ctx);
619   if (r == NULL || val[0] == NULL) {
620     goto err;
621   }
622 
623   // Allocate a montgomery context if it was not supplied by the caller.
624   if (mont == NULL) {
625     new_mont = BN_MONT_CTX_new_consttime(m, ctx);
626     if (new_mont == NULL) {
627       goto err;
628     }
629     mont = new_mont;
630   }
631 
632   // We exponentiate by looking at sliding windows of the exponent and
633   // precomputing powers of |a|. Windows may be shifted so they always end on a
634   // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1)
635   // for i = 0 to 2^(window-1), all in Montgomery form.
636   int window = BN_window_bits_for_exponent_size(bits);
637   if (!BN_to_montgomery(val[0], a, mont, ctx)) {
638     goto err;
639   }
640   if (window > 1) {
641     BIGNUM *d = BN_CTX_get(ctx);
642     if (d == NULL ||
643         !BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
644       goto err;
645     }
646     for (int i = 1; i < 1 << (window - 1); i++) {
647       val[i] = BN_CTX_get(ctx);
648       if (val[i] == NULL ||
649           !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
650         goto err;
651       }
652     }
653   }
654 
655   // |p| is non-zero, so at least one window is non-zero. To save some
656   // multiplications, defer initializing |r| until then.
657   int r_is_one = 1;
658   int wstart = bits - 1;  // The top bit of the window.
659   for (;;) {
660     if (!BN_is_bit_set(p, wstart)) {
661       if (!r_is_one && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
662         goto err;
663       }
664       if (wstart == 0) {
665         break;
666       }
667       wstart--;
668       continue;
669     }
670 
671     // We now have wstart on a set bit. Find the largest window we can use.
672     int wvalue = 1;
673     int wsize = 0;
674     for (int i = 1; i < window && i <= wstart; i++) {
675       if (BN_is_bit_set(p, wstart - i)) {
676         wvalue <<= (i - wsize);
677         wvalue |= 1;
678         wsize = i;
679       }
680     }
681 
682     // Shift |r| to the end of the window.
683     if (!r_is_one) {
684       for (int i = 0; i < wsize + 1; i++) {
685         if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
686           goto err;
687         }
688       }
689     }
690 
691     assert(wvalue & 1);
692     assert(wvalue < (1 << window));
693     if (r_is_one) {
694       if (!BN_copy(r, val[wvalue >> 1])) {
695         goto err;
696       }
697     } else if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
698       goto err;
699     }
700 
701     r_is_one = 0;
702     if (wstart == wsize) {
703       break;
704     }
705     wstart -= wsize + 1;
706   }
707 
708   // |p| is non-zero, so |r_is_one| must be cleared at some point.
709   assert(!r_is_one);
710 
711   if (!BN_from_montgomery(rr, r, mont, ctx)) {
712     goto err;
713   }
714   ret = 1;
715 
716 err:
717   BN_MONT_CTX_free(new_mont);
718   BN_CTX_end(ctx);
719   return ret;
720 }
721 
bn_mod_exp_mont_small(BN_ULONG * r,const BN_ULONG * a,size_t num,const BN_ULONG * p,size_t num_p,const BN_MONT_CTX * mont)722 void bn_mod_exp_mont_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
723                            const BN_ULONG *p, size_t num_p,
724                            const BN_MONT_CTX *mont) {
725   if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) {
726     abort();
727   }
728   assert(BN_is_odd(&mont->N));
729 
730   // Count the number of bits in |p|. Note this function treats |p| as public.
731   while (num_p != 0 && p[num_p - 1] == 0) {
732     num_p--;
733   }
734   if (num_p == 0) {
735     bn_from_montgomery_small(r, num, mont->RR.d, num, mont);
736     return;
737   }
738   unsigned bits = BN_num_bits_word(p[num_p - 1]) + (num_p - 1) * BN_BITS2;
739   assert(bits != 0);
740 
741   // We exponentiate by looking at sliding windows of the exponent and
742   // precomputing powers of |a|. Windows may be shifted so they always end on a
743   // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1) for
744   // i = 0 to 2^(window-1), all in Montgomery form.
745   unsigned window = BN_window_bits_for_exponent_size(bits);
746   if (window > TABLE_BITS_SMALL) {
747     window = TABLE_BITS_SMALL;  // Tolerate excessively large |p|.
748   }
749   BN_ULONG val[TABLE_SIZE_SMALL][BN_SMALL_MAX_WORDS];
750   OPENSSL_memcpy(val[0], a, num * sizeof(BN_ULONG));
751   if (window > 1) {
752     BN_ULONG d[BN_SMALL_MAX_WORDS];
753     bn_mod_mul_montgomery_small(d, val[0], val[0], num, mont);
754     for (unsigned i = 1; i < 1u << (window - 1); i++) {
755       bn_mod_mul_montgomery_small(val[i], val[i - 1], d, num, mont);
756     }
757   }
758 
759   // |p| is non-zero, so at least one window is non-zero. To save some
760   // multiplications, defer initializing |r| until then.
761   int r_is_one = 1;
762   unsigned wstart = bits - 1;  // The top bit of the window.
763   for (;;) {
764     if (!bn_is_bit_set_words(p, num_p, wstart)) {
765       if (!r_is_one) {
766         bn_mod_mul_montgomery_small(r, r, r, num, mont);
767       }
768       if (wstart == 0) {
769         break;
770       }
771       wstart--;
772       continue;
773     }
774 
775     // We now have wstart on a set bit. Find the largest window we can use.
776     unsigned wvalue = 1;
777     unsigned wsize = 0;
778     for (unsigned i = 1; i < window && i <= wstart; i++) {
779       if (bn_is_bit_set_words(p, num_p, wstart - i)) {
780         wvalue <<= (i - wsize);
781         wvalue |= 1;
782         wsize = i;
783       }
784     }
785 
786     // Shift |r| to the end of the window.
787     if (!r_is_one) {
788       for (unsigned i = 0; i < wsize + 1; i++) {
789         bn_mod_mul_montgomery_small(r, r, r, num, mont);
790       }
791     }
792 
793     assert(wvalue & 1);
794     assert(wvalue < (1u << window));
795     if (r_is_one) {
796       OPENSSL_memcpy(r, val[wvalue >> 1], num * sizeof(BN_ULONG));
797     } else {
798       bn_mod_mul_montgomery_small(r, r, val[wvalue >> 1], num, mont);
799     }
800     r_is_one = 0;
801     if (wstart == wsize) {
802       break;
803     }
804     wstart -= wsize + 1;
805   }
806 
807   // |p| is non-zero, so |r_is_one| must be cleared at some point.
808   assert(!r_is_one);
809   OPENSSL_cleanse(val, sizeof(val));
810 }
811 
bn_mod_inverse0_prime_mont_small(BN_ULONG * r,const BN_ULONG * a,size_t num,const BN_MONT_CTX * mont)812 void bn_mod_inverse0_prime_mont_small(BN_ULONG *r, const BN_ULONG *a,
813                                       size_t num, const BN_MONT_CTX *mont) {
814   if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) {
815     abort();
816   }
817 
818   // Per Fermat's Little Theorem, a^-1 = a^(p-2) (mod p) for p prime.
819   BN_ULONG p_minus_two[BN_SMALL_MAX_WORDS];
820   const BN_ULONG *p = mont->N.d;
821   OPENSSL_memcpy(p_minus_two, p, num * sizeof(BN_ULONG));
822   if (p_minus_two[0] >= 2) {
823     p_minus_two[0] -= 2;
824   } else {
825     p_minus_two[0] -= 2;
826     for (size_t i = 1; i < num; i++) {
827       if (p_minus_two[i]-- != 0) {
828         break;
829       }
830     }
831   }
832 
833   bn_mod_exp_mont_small(r, a, num, p_minus_two, num, mont);
834 }
835 
copy_to_prebuf(const BIGNUM * b,int top,BN_ULONG * table,int idx,int window)836 static void copy_to_prebuf(const BIGNUM *b, int top, BN_ULONG *table, int idx,
837                            int window) {
838   int ret = bn_copy_words(table + idx * top, top, b);
839   assert(ret);  // |b| is guaranteed to fit.
840   (void)ret;
841 }
842 
copy_from_prebuf(BIGNUM * b,int top,const BN_ULONG * table,int idx,int window)843 static int copy_from_prebuf(BIGNUM *b, int top, const BN_ULONG *table, int idx,
844                             int window) {
845   if (!bn_wexpand(b, top)) {
846     return 0;
847   }
848 
849   OPENSSL_memset(b->d, 0, sizeof(BN_ULONG) * top);
850   const int width = 1 << window;
851   for (int i = 0; i < width; i++, table += top) {
852     BN_ULONG mask = constant_time_eq_int(i, idx);
853     for (int j = 0; j < top; j++) {
854       b->d[j] |= table[j] & mask;
855     }
856   }
857 
858   b->width = top;
859   return 1;
860 }
861 
862 #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
863   (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
864 
865 // Window sizes optimized for fixed window size modular exponentiation
866 // algorithm (BN_mod_exp_mont_consttime).
867 //
868 // To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
869 // size of the window must not exceed
870 // log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
871 //
872 // Window size thresholds are defined for cache line sizes of 32 and 64, cache
873 // line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
874 // 7 should only be used on processors that have a 128 byte or greater cache
875 // line size.
876 #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
877 
878 #define BN_window_bits_for_ctime_exponent_size(b) \
879   ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
880 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
881 
882 #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
883 
884 #define BN_window_bits_for_ctime_exponent_size(b) \
885   ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
886 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
887 
888 #endif
889 
890 // Given a pointer value, compute the next address that is a cache line
891 // multiple.
892 #define MOD_EXP_CTIME_ALIGN(x_)          \
893   ((unsigned char *)(x_) +               \
894    (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
895     (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
896 
897 // This variant of |BN_mod_exp_mont| uses fixed windows and fixed memory access
898 // patterns to protect secret exponents (cf. the hyper-threading timing attacks
899 // pointed out by Colin Percival,
900 // http://www.daemonology.net/hyperthreading-considered-harmful/)
BN_mod_exp_mont_consttime(BIGNUM * rr,const BIGNUM * a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,const BN_MONT_CTX * mont)901 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
902                               const BIGNUM *m, BN_CTX *ctx,
903                               const BN_MONT_CTX *mont) {
904   int i, ret = 0, window, wvalue;
905   BN_MONT_CTX *new_mont = NULL;
906 
907   int numPowers;
908   unsigned char *powerbufFree = NULL;
909   int powerbufLen = 0;
910   BN_ULONG *powerbuf = NULL;
911   BIGNUM tmp, am;
912 
913   if (!BN_is_odd(m)) {
914     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
915     return 0;
916   }
917   if (m->neg) {
918     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
919     return 0;
920   }
921   if (a->neg || BN_ucmp(a, m) >= 0) {
922     OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
923     return 0;
924   }
925 
926   // Use all bits stored in |p|, rather than |BN_num_bits|, so we do not leak
927   // whether the top bits are zero.
928   int max_bits = p->width * BN_BITS2;
929   int bits = max_bits;
930   if (bits == 0) {
931     // x**0 mod 1 is still zero.
932     if (BN_abs_is_word(m, 1)) {
933       BN_zero(rr);
934       return 1;
935     }
936     return BN_one(rr);
937   }
938 
939   // Allocate a montgomery context if it was not supplied by the caller.
940   if (mont == NULL) {
941     new_mont = BN_MONT_CTX_new_consttime(m, ctx);
942     if (new_mont == NULL) {
943       goto err;
944     }
945     mont = new_mont;
946   }
947 
948   // Use the width in |mont->N|, rather than the copy in |m|. The assembly
949   // implementation assumes it can use |top| to size R.
950   int top = mont->N.width;
951 
952 #if defined(OPENSSL_BN_ASM_MONT5) || defined(RSAZ_ENABLED)
953   // Share one large stack-allocated buffer between the RSAZ and non-RSAZ code
954   // paths. If we were to use separate static buffers for each then there is
955   // some chance that both large buffers would be allocated on the stack,
956   // causing the stack space requirement to be truly huge (~10KB).
957   alignas(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH) BN_ULONG
958     storage[MOD_EXP_CTIME_STORAGE_LEN];
959 #endif
960 #if defined(RSAZ_ENABLED)
961   // If the size of the operands allow it, perform the optimized RSAZ
962   // exponentiation. For further information see crypto/fipsmodule/bn/rsaz_exp.c
963   // and accompanying assembly modules.
964   if (a->width == 16 && p->width == 16 && BN_num_bits(m) == 1024 &&
965       rsaz_avx2_preferred()) {
966     if (!bn_wexpand(rr, 16)) {
967       goto err;
968     }
969     RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0],
970                            storage);
971     rr->width = 16;
972     rr->neg = 0;
973     ret = 1;
974     goto err;
975   }
976 #endif
977 
978   // Get the window size to use with size of p.
979   window = BN_window_bits_for_ctime_exponent_size(bits);
980 #if defined(OPENSSL_BN_ASM_MONT5)
981   if (window >= 5) {
982     window = 5;  // ~5% improvement for RSA2048 sign, and even for RSA4096
983     // reserve space for mont->N.d[] copy
984     powerbufLen += top * sizeof(mont->N.d[0]);
985   }
986 #endif
987 
988   // Allocate a buffer large enough to hold all of the pre-computed
989   // powers of am, am itself and tmp.
990   numPowers = 1 << window;
991   powerbufLen +=
992       sizeof(m->d[0]) *
993       (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
994 
995 #if defined(OPENSSL_BN_ASM_MONT5)
996   if ((size_t)powerbufLen <= sizeof(storage)) {
997     powerbuf = storage;
998   }
999   // |storage| is more than large enough to handle 1024-bit inputs.
1000   assert(powerbuf != NULL || top * BN_BITS2 > 1024);
1001 #endif
1002   if (powerbuf == NULL) {
1003     powerbufFree =
1004         OPENSSL_malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
1005     if (powerbufFree == NULL) {
1006       goto err;
1007     }
1008     powerbuf = (BN_ULONG *)MOD_EXP_CTIME_ALIGN(powerbufFree);
1009   }
1010   OPENSSL_memset(powerbuf, 0, powerbufLen);
1011 
1012   // lay down tmp and am right after powers table
1013   tmp.d = powerbuf + top * numPowers;
1014   am.d = tmp.d + top;
1015   tmp.width = am.width = 0;
1016   tmp.dmax = am.dmax = top;
1017   tmp.neg = am.neg = 0;
1018   tmp.flags = am.flags = BN_FLG_STATIC_DATA;
1019 
1020   if (!bn_one_to_montgomery(&tmp, mont, ctx)) {
1021     goto err;
1022   }
1023 
1024   // prepare a^1 in Montgomery domain
1025   assert(!a->neg);
1026   assert(BN_ucmp(a, m) < 0);
1027   if (!BN_to_montgomery(&am, a, mont, ctx)) {
1028     goto err;
1029   }
1030 
1031 #if defined(OPENSSL_BN_ASM_MONT5)
1032   // This optimization uses ideas from http://eprint.iacr.org/2011/239,
1033   // specifically optimization of cache-timing attack countermeasures
1034   // and pre-computation optimization.
1035 
1036   // Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
1037   // 512-bit RSA is hardly relevant, we omit it to spare size...
1038   if (window == 5 && top > 1) {
1039     const BN_ULONG *n0 = mont->n0;
1040     BN_ULONG *np;
1041 
1042     // BN_to_montgomery can contaminate words above .top
1043     // [in BN_DEBUG[_DEBUG] build]...
1044     for (i = am.width; i < top; i++) {
1045       am.d[i] = 0;
1046     }
1047     for (i = tmp.width; i < top; i++) {
1048       tmp.d[i] = 0;
1049     }
1050 
1051     // copy mont->N.d[] to improve cache locality
1052     for (np = am.d + top, i = 0; i < top; i++) {
1053       np[i] = mont->N.d[i];
1054     }
1055 
1056     bn_scatter5(tmp.d, top, powerbuf, 0);
1057     bn_scatter5(am.d, am.width, powerbuf, 1);
1058     bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
1059     bn_scatter5(tmp.d, top, powerbuf, 2);
1060 
1061     // same as above, but uses squaring for 1/2 of operations
1062     for (i = 4; i < 32; i *= 2) {
1063       bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1064       bn_scatter5(tmp.d, top, powerbuf, i);
1065     }
1066     for (i = 3; i < 8; i += 2) {
1067       int j;
1068       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
1069       bn_scatter5(tmp.d, top, powerbuf, i);
1070       for (j = 2 * i; j < 32; j *= 2) {
1071         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1072         bn_scatter5(tmp.d, top, powerbuf, j);
1073       }
1074     }
1075     for (; i < 16; i += 2) {
1076       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
1077       bn_scatter5(tmp.d, top, powerbuf, i);
1078       bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1079       bn_scatter5(tmp.d, top, powerbuf, 2 * i);
1080     }
1081     for (; i < 32; i += 2) {
1082       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
1083       bn_scatter5(tmp.d, top, powerbuf, i);
1084     }
1085 
1086     bits--;
1087     for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {
1088       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1089     }
1090     bn_gather5(tmp.d, top, powerbuf, wvalue);
1091 
1092     // At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
1093     // that has not been read yet.)
1094     assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
1095 
1096     // Scan the exponent one window at a time starting from the most
1097     // significant bits.
1098     if (top & 7) {
1099       while (bits >= 0) {
1100         for (wvalue = 0, i = 0; i < 5; i++, bits--) {
1101           wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1102         }
1103 
1104         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1105         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1106         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1107         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1108         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
1109         bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
1110       }
1111     } else {
1112       const uint8_t *p_bytes = (const uint8_t *)p->d;
1113       assert(bits < max_bits);
1114       // |p = 0| has been handled as a special case, so |max_bits| is at least
1115       // one word.
1116       assert(max_bits >= 64);
1117 
1118       // If the first bit to be read lands in the last byte, unroll the first
1119       // iteration to avoid reading past the bounds of |p->d|. (After the first
1120       // iteration, we are guaranteed to be past the last byte.) Note |bits|
1121       // here is the top bit, inclusive.
1122       if (bits - 4 >= max_bits - 8) {
1123         // Read five bits from |bits-4| through |bits|, inclusive.
1124         wvalue = p_bytes[p->width * BN_BYTES - 1];
1125         wvalue >>= (bits - 4) & 7;
1126         wvalue &= 0x1f;
1127         bits -= 5;
1128         bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
1129       }
1130       while (bits >= 0) {
1131         // Read five bits from |bits-4| through |bits|, inclusive.
1132         int first_bit = bits - 4;
1133         uint16_t val;
1134         OPENSSL_memcpy(&val, p_bytes + (first_bit >> 3), sizeof(val));
1135         val >>= first_bit & 7;
1136         val &= 0x1f;
1137         bits -= 5;
1138         bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, val);
1139       }
1140     }
1141 
1142     ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
1143     tmp.width = top;
1144     if (ret) {
1145       if (!BN_copy(rr, &tmp)) {
1146         ret = 0;
1147       }
1148       goto err;  // non-zero ret means it's not error
1149     }
1150   } else
1151 #endif
1152   {
1153     copy_to_prebuf(&tmp, top, powerbuf, 0, window);
1154     copy_to_prebuf(&am, top, powerbuf, 1, window);
1155 
1156     // If the window size is greater than 1, then calculate
1157     // val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
1158     // (even powers could instead be computed as (a^(i/2))^2
1159     // to use the slight performance advantage of sqr over mul).
1160     if (window > 1) {
1161       if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) {
1162         goto err;
1163       }
1164 
1165       copy_to_prebuf(&tmp, top, powerbuf, 2, window);
1166 
1167       for (i = 3; i < numPowers; i++) {
1168         // Calculate a^i = a^(i-1) * a
1169         if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) {
1170           goto err;
1171         }
1172 
1173         copy_to_prebuf(&tmp, top, powerbuf, i, window);
1174       }
1175     }
1176 
1177     bits--;
1178     for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
1179       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1180     }
1181     if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) {
1182       goto err;
1183     }
1184 
1185     // Scan the exponent one window at a time starting from the most
1186     // significant bits.
1187     while (bits >= 0) {
1188       wvalue = 0;  // The 'value' of the window
1189 
1190       // Scan the window, squaring the result as we go
1191       for (i = 0; i < window; i++, bits--) {
1192         if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) {
1193           goto err;
1194         }
1195         wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
1196       }
1197 
1198       // Fetch the appropriate pre-computed value from the pre-buf
1199       if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
1200         goto err;
1201       }
1202 
1203       // Multiply the result into the intermediate result
1204       if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) {
1205         goto err;
1206       }
1207     }
1208   }
1209 
1210   // Convert the final result from montgomery to standard format
1211   if (!BN_from_montgomery(rr, &tmp, mont, ctx)) {
1212     goto err;
1213   }
1214   ret = 1;
1215 
1216 err:
1217   BN_MONT_CTX_free(new_mont);
1218   if (powerbuf != NULL && powerbufFree == NULL) {
1219     OPENSSL_cleanse(powerbuf, powerbufLen);
1220   }
1221   OPENSSL_free(powerbufFree);
1222   return (ret);
1223 }
1224 
BN_mod_exp_mont_word(BIGNUM * rr,BN_ULONG a,const BIGNUM * p,const BIGNUM * m,BN_CTX * ctx,const BN_MONT_CTX * mont)1225 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
1226                          const BIGNUM *m, BN_CTX *ctx,
1227                          const BN_MONT_CTX *mont) {
1228   BIGNUM a_bignum;
1229   BN_init(&a_bignum);
1230 
1231   int ret = 0;
1232 
1233   // BN_mod_exp_mont requires reduced inputs.
1234   if (bn_minimal_width(m) == 1) {
1235     a %= m->d[0];
1236   }
1237 
1238   if (!BN_set_word(&a_bignum, a)) {
1239     OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
1240     goto err;
1241   }
1242 
1243   ret = BN_mod_exp_mont(rr, &a_bignum, p, m, ctx, mont);
1244 
1245 err:
1246   BN_free(&a_bignum);
1247 
1248   return ret;
1249 }
1250 
1251 #define TABLE_SIZE 32
1252 
BN_mod_exp2_mont(BIGNUM * rr,const BIGNUM * a1,const BIGNUM * p1,const BIGNUM * a2,const BIGNUM * p2,const BIGNUM * m,BN_CTX * ctx,const BN_MONT_CTX * mont)1253 int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
1254                      const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
1255                      BN_CTX *ctx, const BN_MONT_CTX *mont) {
1256   BIGNUM tmp;
1257   BN_init(&tmp);
1258 
1259   int ret = 0;
1260   BN_MONT_CTX *new_mont = NULL;
1261 
1262   // Allocate a montgomery context if it was not supplied by the caller.
1263   if (mont == NULL) {
1264     new_mont = BN_MONT_CTX_new_for_modulus(m, ctx);
1265     if (new_mont == NULL) {
1266       goto err;
1267     }
1268     mont = new_mont;
1269   }
1270 
1271   // BN_mod_mul_montgomery removes one Montgomery factor, so passing one
1272   // Montgomery-encoded and one non-Montgomery-encoded value gives a
1273   // non-Montgomery-encoded result.
1274   if (!BN_mod_exp_mont(rr, a1, p1, m, ctx, mont) ||
1275       !BN_mod_exp_mont(&tmp, a2, p2, m, ctx, mont) ||
1276       !BN_to_montgomery(rr, rr, mont, ctx) ||
1277       !BN_mod_mul_montgomery(rr, rr, &tmp, mont, ctx)) {
1278     goto err;
1279   }
1280 
1281   ret = 1;
1282 
1283 err:
1284   BN_MONT_CTX_free(new_mont);
1285   BN_free(&tmp);
1286 
1287   return ret;
1288 }
1289