1 /* tfm.c
2  *
3  * Copyright (C) 2006-2021 wolfSSL Inc.
4  *
5  * This file is part of wolfSSL.
6  *
7  * wolfSSL is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * wolfSSL is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20  */
21 
22 
23 
24 /*
25  * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
26  * http://math.libtomcrypt.com
27  */
28 
29 /**
30  *  Edited by Moises Guimaraes (moises@wolfssl.com)
31  *  to fit wolfSSL's needs.
32  */
33 
34 #ifdef HAVE_CONFIG_H
35     #include <config.h>
36 #endif
37 
38 /* in case user set USE_FAST_MATH there */
39 #include <wolfssl/wolfcrypt/settings.h>
40 
41 #ifdef USE_FAST_MATH
42 
43 #ifdef NO_INLINE
44     #include <wolfssl/wolfcrypt/misc.h>
45 #else
46     #define WOLFSSL_MISC_INCLUDED
47     #include <wolfcrypt/src/misc.c>
48 #endif
49 
50 #include <wolfssl/wolfcrypt/random.h>
51 #include <wolfssl/wolfcrypt/tfm.h>
52 #include <wolfcrypt/src/asm.c>  /* will define asm MACROS or C ones */
53 #include <wolfssl/wolfcrypt/wolfmath.h> /* common functions */
54 
55 #if defined(FREESCALE_LTC_TFM)
56     #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
57 #endif
58 #ifdef WOLFSSL_DEBUG_MATH
59     #include <stdio.h>
60 #endif
61 
62 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
63 #ifdef __cplusplus
64     extern "C" {
65 #endif
66 WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
67     mp_int* res);
68 WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
69     mp_int* res);
70 WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
71     mp_int* res);
72 WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
73     mp_int* res);
74 WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
75     mp_int* res);
76 #ifdef __cplusplus
77     } /* extern "C" */
78 #endif
79 #endif
80 
81 
82 #if !defined(WOLFSSL_SP_MATH) && !defined(WOLFSSL_SP_MATH_ALL)
83 /* math settings check */
CheckRunTimeSettings(void)84 word32 CheckRunTimeSettings(void)
85 {
86     return CTC_SETTINGS;
87 }
88 
89 /* math settings size check */
CheckRunTimeFastMath(void)90 word32 CheckRunTimeFastMath(void)
91 {
92     return FP_SIZE;
93 }
94 #endif
95 
96 
97 /* Functions */
98 
fp_add(fp_int * a,fp_int * b,fp_int * c)99 int fp_add(fp_int *a, fp_int *b, fp_int *c)
100 {
101   int sa, sb;
102   int ret = FP_OKAY;
103 
104   /* get sign of both inputs */
105   sa = a->sign;
106   sb = b->sign;
107 
108   /* handle two cases, not four */
109   if (sa == sb) {
110     /* both positive or both negative */
111     /* add their magnitudes, copy the sign */
112     c->sign = sa;
113     ret = s_fp_add (a, b, c);
114   } else {
115     /* one positive, the other negative */
116     /* subtract the one with the greater magnitude from */
117     /* the one of the lesser magnitude.  The result gets */
118     /* the sign of the one with the greater magnitude. */
119     if (fp_cmp_mag (a, b) == FP_LT) {
120       c->sign = sb;
121       s_fp_sub (b, a, c);
122     } else {
123       c->sign = sa;
124       s_fp_sub (a, b, c);
125     }
126   }
127 
128   return ret;
129 }
130 
131 /* unsigned addition */
s_fp_add(fp_int * a,fp_int * b,fp_int * c)132 int s_fp_add(fp_int *a, fp_int *b, fp_int *c)
133 {
134   int      x, y, oldused;
135   fp_word  t;
136 
137   y       = MAX(a->used, b->used);
138   oldused = MIN(c->used, FP_SIZE);   /* help static analysis w/ largest size */
139   c->used = y;
140 
141   t = 0;
142   for (x = 0; x < y; x++) {
143       t         += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
144       c->dp[x]   = (fp_digit)t;
145       t        >>= DIGIT_BIT;
146   }
147   if (t != 0) {
148      if (x == FP_SIZE)
149          return FP_VAL;
150      c->dp[c->used++] = (fp_digit)t;
151      ++x;
152   }
153 
154   c->used = x;
155 
156   /* zero any excess digits on the destination that we didn't write to */
157   for (; x < oldused; x++) {
158      c->dp[x] = 0;
159   }
160   fp_clamp(c);
161   return FP_OKAY;
162 }
163 
164 /* c = a - b */
fp_sub(fp_int * a,fp_int * b,fp_int * c)165 int fp_sub(fp_int *a, fp_int *b, fp_int *c)
166 {
167   int sa, sb;
168   int ret = FP_OKAY;
169 
170   sa = a->sign;
171   sb = b->sign;
172 
173   if (sa != sb) {
174     /* subtract a negative from a positive, OR */
175     /* subtract a positive from a negative. */
176     /* In either case, ADD their magnitudes, */
177     /* and use the sign of the first number. */
178     c->sign = sa;
179     ret = s_fp_add (a, b, c);
180   } else {
181     /* subtract a positive from a positive, OR */
182     /* subtract a negative from a negative. */
183     /* First, take the difference between their */
184     /* magnitudes, then... */
185     if (fp_cmp_mag (a, b) != FP_LT) {
186       /* Copy the sign from the first */
187       c->sign = sa;
188       /* The first has a larger or equal magnitude */
189       s_fp_sub (a, b, c);
190     } else {
191       /* The result has the *opposite* sign from */
192       /* the first number. */
193       c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
194       /* The second has a larger magnitude */
195       s_fp_sub (b, a, c);
196     }
197   }
198   return ret;
199 }
200 
201 /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
s_fp_sub(fp_int * a,fp_int * b,fp_int * c)202 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
203 {
204   int      x, oldbused, oldused;
205   fp_word  t;
206 
207   oldused  = c->used;
208   oldbused = b->used;
209   c->used  = a->used;
210   t       = 0;
211   for (x = 0; x < oldbused; x++) {
212      t         = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
213      c->dp[x]  = (fp_digit)t;
214      t         = (t >> DIGIT_BIT)&1;
215   }
216   for (; x < a->used; x++) {
217      t         = ((fp_word)a->dp[x]) - t;
218      c->dp[x]  = (fp_digit)t;
219      t         = (t >> DIGIT_BIT)&1;
220    }
221 
222   /* zero any excess digits on the destination that we didn't write to */
223   for (; x < oldused; x++) {
224      c->dp[x] = 0;
225   }
226   fp_clamp(c);
227 }
228 
229 /* c = a * b */
fp_mul(fp_int * A,fp_int * B,fp_int * C)230 int fp_mul(fp_int *A, fp_int *B, fp_int *C)
231 {
232     int   ret = 0;
233     int   y, yy, oldused;
234 
235 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
236    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
237   ret = esp_mp_mul(A, B, C);
238   if(ret != -2) return ret;
239 #endif
240 
241     oldused = C->used;
242 
243     y  = MAX(A->used, B->used);
244     yy = MIN(A->used, B->used);
245 
246     /* fail if we are out of range */
247     if (y + yy >= FP_SIZE) {
248        ret = FP_VAL;
249        goto clean;
250     }
251 
252     /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
253        of the largest input.  We also want to avoid doing excess mults if the
254        inputs are not close to the next power of two.  That is, for example,
255        if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
256     */
257 
258 #if defined(TFM_MUL3) && FP_SIZE >= 6
259         if (y <= 3) {
260            ret = fp_mul_comba3(A,B,C);
261            goto clean;
262         }
263 #endif
264 #if defined(TFM_MUL4) && FP_SIZE >= 8
265         if (y == 4) {
266            ret = fp_mul_comba4(A,B,C);
267            goto clean;
268         }
269 #endif
270 #if defined(TFM_MUL6) && FP_SIZE >= 12
271         if (y <= 6) {
272            ret = fp_mul_comba6(A,B,C);
273            goto clean;
274         }
275 #endif
276 #if defined(TFM_MUL7) && FP_SIZE >= 14
277         if (y == 7) {
278            ret = fp_mul_comba7(A,B,C);
279            goto clean;
280         }
281 #endif
282 #if defined(TFM_MUL8) && FP_SIZE >= 16
283         if (y == 8) {
284            ret = fp_mul_comba8(A,B,C);
285            goto clean;
286         }
287 #endif
288 #if defined(TFM_MUL9) && FP_SIZE >= 18
289         if (y == 9) {
290            ret = fp_mul_comba9(A,B,C);
291            goto clean;
292         }
293 #endif
294 #if defined(TFM_MUL12) && FP_SIZE >= 24
295         if (y <= 12) {
296            ret = fp_mul_comba12(A,B,C);
297            goto clean;
298         }
299 #endif
300 #if defined(TFM_MUL17) && FP_SIZE >= 34
301         if (y <= 17) {
302            ret = fp_mul_comba17(A,B,C);
303            goto clean;
304         }
305 #endif
306 
307 #if defined(TFM_SMALL_SET) && FP_SIZE >= 32
308         if (y <= 16) {
309            ret = fp_mul_comba_small(A,B,C);
310            goto clean;
311         }
312 #endif
313 #if defined(TFM_MUL20) && FP_SIZE >= 40
314         if (y <= 20) {
315            ret = fp_mul_comba20(A,B,C);
316            goto clean;
317         }
318 #endif
319 #if defined(TFM_MUL24) && FP_SIZE >= 48
320         if (yy >= 16 && y <= 24) {
321            ret = fp_mul_comba24(A,B,C);
322            goto clean;
323         }
324 #endif
325 #if defined(TFM_MUL28) && FP_SIZE >= 56
326         if (yy >= 20 && y <= 28) {
327            ret = fp_mul_comba28(A,B,C);
328            goto clean;
329         }
330 #endif
331 #if defined(TFM_MUL32) && FP_SIZE >= 64
332         if (yy >= 24 && y <= 32) {
333            ret = fp_mul_comba32(A,B,C);
334            goto clean;
335         }
336 #endif
337 #if defined(TFM_MUL48) && FP_SIZE >= 96
338         if (yy >= 40 && y <= 48) {
339           ret = fp_mul_comba48(A,B,C);
340           goto clean;
341         }
342 #endif
343 #if defined(TFM_MUL64) && FP_SIZE >= 128
344         if (yy >= 56 && y <= 64) {
345            ret = fp_mul_comba64(A,B,C);
346            goto clean;
347         }
348 #endif
349         ret = fp_mul_comba(A,B,C);
350 
351 clean:
352     /* zero any excess digits on the destination that we didn't write to */
353     for (y = C->used; y >= 0 && y < oldused; y++) {
354         C->dp[y] = 0;
355     }
356 
357     return ret;
358 }
359 
fp_mul_2(fp_int * a,fp_int * b)360 int fp_mul_2(fp_int * a, fp_int * b)
361 {
362   int     x, oldused;
363 
364   /* Make sure value to double and result are in range. */
365   if ((a->used > (FP_SIZE-1)) || ((a->used == (FP_SIZE - 1)) &&
366               ((a->dp[FP_SIZE - 1] & ((fp_digit)1 << (DIGIT_BIT - 1))) != 0))) {
367     return FP_VAL;
368   }
369 
370   oldused = b->used;
371   b->used = a->used;
372 
373   {
374     fp_digit r, rr, *tmpa, *tmpb;
375 
376     /* alias for source */
377     tmpa = a->dp;
378 
379     /* alias for dest */
380     tmpb = b->dp;
381 
382     /* carry */
383     r = 0;
384     for (x = 0; x < a->used; x++) {
385 
386       /* get what will be the *next* carry bit from the
387        * MSB of the current digit
388        */
389       rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
390 
391       /* now shift up this digit, add in the carry [from the previous] */
392       *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
393 
394       /* copy the carry that would be from the source
395        * digit into the next iteration
396        */
397       r = rr;
398     }
399 
400     /* new leading digit? */
401     if (r != 0) {
402       /* add a MSB which is always 1 at this point */
403       *tmpb = 1;
404       ++(b->used);
405     }
406 
407     /* zero any excess digits on the destination that we didn't write to */
408     tmpb = b->dp + b->used;
409     for (x = b->used; x < oldused; x++) {
410       *tmpb++ = 0;
411     }
412   }
413   b->sign = a->sign;
414 
415   return FP_OKAY;
416 }
417 
418 /* c = a * b */
fp_mul_d(fp_int * a,fp_digit b,fp_int * c)419 int fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
420 {
421    fp_word  w;
422    int      x, oldused;
423 
424    oldused = c->used;
425    c->used = a->used;
426    c->sign = a->sign;
427    w       = 0;
428    for (x = 0; x < a->used; x++) {
429        w         = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
430        c->dp[x]  = (fp_digit)w;
431        w         = w >> DIGIT_BIT;
432    }
433    if (w != 0) {
434       if (a->used == FP_SIZE)
435           return FP_VAL;
436       c->dp[c->used++] = (fp_digit) w;
437       ++x;
438    }
439 
440    /* zero any excess digits on the destination that we didn't write to */
441    /* also checking FP_SIZE here for static analysis */
442    for (; x < oldused && x < FP_SIZE; x++) {
443       c->dp[x] = 0;
444    }
445 
446    fp_clamp(c);
447    return FP_OKAY;
448 }
449 
450 /* c = a * 2**d */
fp_mul_2d(fp_int * a,int b,fp_int * c)451 int fp_mul_2d(fp_int *a, int b, fp_int *c)
452 {
453    fp_digit carry, carrytmp, shift;
454    int x;
455 
456    /* copy it */
457    fp_copy(a, c);
458 
459    /* handle whole digits */
460    if (b >= DIGIT_BIT) {
461       int ret = fp_lshd(c, b/DIGIT_BIT);
462       if (ret != FP_OKAY)
463          return ret;
464    }
465    b %= DIGIT_BIT;
466 
467    /* shift the digits */
468    if (b != 0) {
469       carry = 0;
470       shift = DIGIT_BIT - b;
471       for (x = 0; x < c->used; x++) {
472           carrytmp = c->dp[x] >> shift;
473           c->dp[x] = (c->dp[x] << b) + carry;
474           carry = carrytmp;
475       }
476       /* store last carry if room */
477       if (carry && x < FP_SIZE) {
478          c->dp[c->used++] = carry;
479       }
480       if (x == FP_SIZE)
481          return FP_VAL;
482    }
483    fp_clamp(c);
484    return FP_OKAY;
485 }
486 
487 /* generic PxQ multiplier */
488 #if defined(HAVE_INTEL_MULX)
489 
fp_mul_comba_mulx(fp_int * A,fp_int * B,fp_int * C)490 WC_INLINE static int fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
491 
492 {
493    int       ix, iy, iz, pa;
494    fp_int    *dst;
495 #ifndef WOLFSSL_SMALL_STACK
496    fp_int    tmp[1];
497 #else
498    fp_int    *tmp;
499 #endif
500    fp_digit  carry;
501 
502    /* Variables used but not seen by cppcheck. */
503    (void)ix; (void)iy; (void)iz;
504 
505 #ifdef WOLFSSL_SMALL_STACK
506    tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
507    if (tmp == NULL)
508        return FP_MEM;
509 #endif
510 
511    /* get size of output and trim */
512    pa = A->used + B->used;
513    if (pa >= FP_SIZE) {
514       pa = FP_SIZE-1;
515    }
516 
517    /* Always take branch to use tmp variable. This avoids a cache attack for
518     * determining if C equals A */
519    if (1) {
520       fp_init(tmp);
521       dst = tmp;
522    }
523 
524    TFM_INTEL_MUL_COMBA(A, B, carry, dst) ;
525 
526   dst->used = pa;
527   dst->sign = A->sign ^ B->sign;
528   fp_clamp(dst);
529   fp_copy(dst, C);
530 
531 #ifdef WOLFSSL_SMALL_STACK
532   XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
533 #endif
534 
535   return FP_OKAY;
536 }
537 #endif
538 
fp_mul_comba(fp_int * A,fp_int * B,fp_int * C)539 int fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
540 {
541    int       ret = 0;
542    int       ix, iy, iz, tx, ty, pa;
543    fp_digit  c0, c1, c2, *tmpx, *tmpy;
544    fp_int    *dst;
545 #ifndef WOLFSSL_SMALL_STACK
546    fp_int    tmp[1];
547 #else
548    fp_int    *tmp;
549 #endif
550 
551    if (A->used + B->used >= FP_SIZE) return FP_VAL;
552 
553    IF_HAVE_INTEL_MULX(ret = fp_mul_comba_mulx(A, B, C), return ret) ;
554 
555 #ifdef WOLFSSL_SMALL_STACK
556    tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
557    if (tmp == NULL)
558        return FP_MEM;
559 #endif
560 
561    COMBA_START;
562    COMBA_CLEAR;
563 
564    /* get size of output and trim */
565    pa = A->used + B->used;
566    if (pa >= FP_SIZE) {
567       pa = FP_SIZE-1;
568    }
569 
570    /* Always take branch to use tmp variable. This avoids a cache attack for
571     * determining if C equals A */
572    if (1) {
573       fp_init(tmp);
574       dst = tmp;
575    }
576 
577    for (ix = 0; ix < pa; ix++) {
578       /* get offsets into the two bignums */
579       ty = MIN(ix, (B->used > 0 ? B->used - 1 : 0));
580       tx = ix - ty;
581 
582       /* setup temp aliases */
583       tmpx = A->dp + tx;
584       tmpy = B->dp + ty;
585 
586       /* this is the number of times the loop will iterate, essentially its
587          while (tx++ < a->used && ty-- >= 0) { ... }
588        */
589       iy = MIN(A->used-tx, ty+1);
590 
591       /* execute loop */
592       COMBA_FORWARD;
593       for (iz = 0; iz < iy; ++iz) {
594           fp_digit _tmpx = *tmpx++;
595           fp_digit _tmpy = *tmpy--;
596           MULADD(_tmpx, _tmpy);
597       }
598 
599       /* store term */
600       COMBA_STORE(dst->dp[ix]);
601   }
602   COMBA_FINI;
603 
604   dst->used = pa;
605   dst->sign = A->sign ^ B->sign;
606   fp_clamp(dst);
607   fp_copy(dst, C);
608 
609   /* Variables used but not seen by cppcheck. */
610   (void)c0; (void)c1; (void)c2;
611 
612 #ifdef WOLFSSL_SMALL_STACK
613   XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
614 #endif
615   return ret;
616 }
617 
618 /* a/b => cb + d == a */
fp_div(fp_int * a,fp_int * b,fp_int * c,fp_int * d)619 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
620 {
621   int     n, t, i, norm, neg;
622   int     ret;
623 #ifndef WOLFSSL_SMALL_STACK
624   fp_int  q[1], x[1], y[1], t1[1], t2[1];
625 #else
626   fp_int  *q, *x, *y, *t1, *t2;
627 #endif
628 
629   /* is divisor zero ? */
630   if (fp_iszero (b) == FP_YES) {
631     return FP_VAL;
632   }
633 
634   /* if a < b then q=0, r = a */
635   if (fp_cmp_mag (a, b) == FP_LT)
636   {
637     if (d != NULL) {
638       fp_copy (a, d);
639     }
640     if (c != NULL) {
641       fp_zero (c);
642     }
643     return FP_OKAY;
644   }
645 
646 #ifdef WOLFSSL_SMALL_STACK
647   q = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
648   if (q == NULL) {
649       return FP_MEM;
650   }
651   x = &q[1]; y = &q[2]; t1 = &q[3]; t2 = &q[4];
652 #endif
653 
654   fp_init(q);
655   /* qb + d = a, and b is an integer > 0, therefore q <= a */
656   q->used = a->used;
657 
658   fp_init(t1);
659   fp_init(t2);
660   fp_init_copy(x, a);
661   fp_init_copy(y, b);
662 
663   /* fix the sign */
664   neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
665   x->sign = y->sign = FP_ZPOS;
666 
667   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
668   norm = fp_count_bits(y) % DIGIT_BIT;
669   if (norm < (int)(DIGIT_BIT-1)) {
670     norm = (DIGIT_BIT-1) - norm;
671     ret = fp_mul_2d (x, norm, x);
672     if (ret != FP_OKAY) {
673     #ifdef WOLFSSL_SMALL_STACK
674       XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
675     #endif
676       return ret;
677     }
678     ret = fp_mul_2d (y, norm, y);
679     if (ret != FP_OKAY) {
680     #ifdef WOLFSSL_SMALL_STACK
681       XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
682     #endif
683       return ret;
684     }
685   } else {
686     norm = 0;
687   }
688 
689   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
690   n = x->used - 1;
691   t = y->used - 1;
692 
693   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
694   ret = fp_lshd (y, n - t); /* y = y*b**{n-t} */
695   if (ret != FP_OKAY) {
696   #ifdef WOLFSSL_SMALL_STACK
697     XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
698   #endif
699     return ret;
700   }
701 
702   while (fp_cmp (x, y) != FP_LT) {
703     ++(q->dp[n - t]);
704     ret = fp_sub (x, y, x);
705     if (ret != FP_OKAY) {
706     #ifdef WOLFSSL_SMALL_STACK
707       XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
708     #endif
709       return ret;
710     }
711   }
712 
713   /* reset y by shifting it back down */
714   fp_rshd (y, n - t);
715 
716   /* step 3. for i from n down to (t + 1) */
717   for (i = n; i >= (t + 1); i--) {
718     if (i > x->used) {
719       continue;
720     }
721 
722     /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
723      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
724     if (x->dp[i] == y->dp[t]) {
725       q->dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
726     } else {
727       fp_word tmp;
728       tmp = ((fp_word) x->dp[i]) << ((fp_word) DIGIT_BIT);
729       tmp |= ((fp_word) x->dp[i - 1]);
730 #ifdef WOLFSSL_LINUXKM
731       /* Linux kernel macro for in-place 64 bit integer division. */
732       do_div(tmp, (fp_word)y->dp[t]);
733 #else
734       tmp /= ((fp_word)y->dp[t]);
735 #endif
736       q->dp[i - t - 1] = (fp_digit) (tmp);
737     }
738 
739     /* while (q{i-t-1} * (yt * b + y{t-1})) >
740              xi * b**2 + xi-1 * b + xi-2
741 
742        do q{i-t-1} -= 1;
743     */
744     q->dp[i - t - 1] = (q->dp[i - t - 1] + 1);
745     do {
746       q->dp[i - t - 1] = (q->dp[i - t - 1] - 1);
747 
748       /* find left hand */
749       fp_zero (t1);
750       t1->dp[0] = (t - 1 < 0) ? 0 : y->dp[t - 1];
751       t1->dp[1] = y->dp[t];
752       t1->used = 2;
753       ret = fp_mul_d (t1, q->dp[i - t - 1], t1);
754       if (ret != FP_OKAY) {
755       #ifdef WOLFSSL_SMALL_STACK
756         XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
757       #endif
758         return ret;
759       }
760 
761       /* find right hand */
762       t2->dp[0] = (i - 2 < 0) ? 0 : x->dp[i - 2];
763       t2->dp[1] = (i - 1 < 0) ? 0 : x->dp[i - 1];
764       t2->dp[2] = x->dp[i];
765       t2->used = 3;
766     } while (fp_cmp_mag(t1, t2) == FP_GT);
767 
768     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
769     ret = fp_mul_d (y, q->dp[i - t - 1], t1);
770     if (ret != FP_OKAY) {
771     #ifdef WOLFSSL_SMALL_STACK
772       XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
773     #endif
774       return ret;
775     }
776     ret = fp_lshd  (t1, i - t - 1);
777     if (ret != FP_OKAY) {
778     #ifdef WOLFSSL_SMALL_STACK
779       XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
780     #endif
781       return ret;
782     }
783     ret = fp_sub   (x, t1, x);
784     if (ret != FP_OKAY) {
785     #ifdef WOLFSSL_SMALL_STACK
786       XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
787     #endif
788       return ret;
789     }
790 
791     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
792     if (x->sign == FP_NEG) {
793       fp_copy (y, t1);
794       ret = fp_lshd (t1, i - t - 1);
795       if (ret != FP_OKAY) {
796       #ifdef WOLFSSL_SMALL_STACK
797         XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
798       #endif
799         return ret;
800       }
801       ret = fp_add (x, t1, x);
802       if (ret != FP_OKAY) {
803       #ifdef WOLFSSL_SMALL_STACK
804         XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
805       #endif
806         return ret;
807       }
808       q->dp[i - t - 1] = q->dp[i - t - 1] - 1;
809     }
810   }
811 
812   /* now q is the quotient and x is the remainder
813    * [which we have to normalize]
814    */
815 
816   /* get sign before writing to c */
817   x->sign = x->used == 0 ? FP_ZPOS : a->sign;
818 
819   if (c != NULL) {
820     fp_clamp (q);
821     fp_copy (q, c);
822     c->sign = neg;
823   }
824 
825   if (d != NULL) {
826     fp_div_2d (x, norm, x, NULL);
827 
828     /* zero any excess digits on the destination that we didn't write to */
829     for (i = b->used; i < x->used; i++) {
830         x->dp[i] = 0;
831     }
832     fp_clamp(x);
833     fp_copy (x, d);
834   }
835 
836 #ifdef WOLFSSL_SMALL_STACK
837   XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
838 #endif
839   return FP_OKAY;
840 }
841 
842 /* b = a/2 */
fp_div_2(fp_int * a,fp_int * b)843 void fp_div_2(fp_int * a, fp_int * b)
844 {
845   int     x, oldused;
846 
847   oldused = b->used;
848   b->used = a->used;
849   {
850     fp_digit r, rr, *tmpa, *tmpb;
851 
852     /* source alias */
853     tmpa = a->dp + b->used - 1;
854 
855     /* dest alias */
856     tmpb = b->dp + b->used - 1;
857 
858     /* carry */
859     r = 0;
860     for (x = b->used - 1; x >= 0; x--) {
861       /* get the carry for the next iteration */
862       rr = *tmpa & 1;
863 
864       /* shift the current digit, add in carry and store */
865       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
866 
867       /* forward carry to next iteration */
868       r = rr;
869     }
870 
871     /* zero any excess digits on the destination that we didn't write to */
872     tmpb = b->dp + b->used;
873     for (x = b->used; x < oldused; x++) {
874       *tmpb++ = 0;
875     }
876   }
877   b->sign = a->sign;
878   fp_clamp (b);
879 }
880 
881 /* c = a / 2 (mod b) - constant time (a < b and positive) */
fp_div_2_mod_ct(fp_int * a,fp_int * b,fp_int * c)882 int fp_div_2_mod_ct(fp_int *a, fp_int *b, fp_int *c)
883 {
884   fp_word  w = 0;
885   fp_digit mask;
886   int i;
887 
888   mask = 0 - (a->dp[0] & 1);
889   for (i = 0; i < b->used; i++) {
890       fp_digit mask_a = 0 - (i < a->used);
891 
892       w         += b->dp[i] & mask;
893       w         += a->dp[i] & mask_a;
894       c->dp[i]   = (fp_digit)w;
895       w        >>= DIGIT_BIT;
896   }
897   c->dp[i] = (fp_digit)w;
898   c->used = i + 1;
899   c->sign = FP_ZPOS;
900   fp_clamp(c);
901   fp_div_2(c, c);
902 
903   return FP_OKAY;
904 }
905 
906 /* c = a / 2**b */
fp_div_2d(fp_int * a,int b,fp_int * c,fp_int * d)907 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
908 {
909   int      D;
910 
911   /* if the shift count is <= 0 then we do no work */
912   if (b <= 0) {
913     fp_copy (a, c);
914     if (d != NULL) {
915       fp_zero (d);
916     }
917     return;
918   }
919 
920   /* get the remainder before a is changed in calculating c */
921   if (a == c && d != NULL) {
922     fp_mod_2d (a, b, d);
923   }
924 
925   /* copy */
926   fp_copy(a, c);
927 
928   /* shift by as many digits in the bit count */
929   if (b >= (int)DIGIT_BIT) {
930     fp_rshd (c, b / DIGIT_BIT);
931   }
932 
933   /* shift any bit count < DIGIT_BIT */
934   D = (b % DIGIT_BIT);
935   if (D != 0) {
936     fp_rshb(c, D);
937   }
938 
939   /* get the remainder if a is not changed in calculating c */
940   if (a != c && d != NULL) {
941     fp_mod_2d (a, b, d);
942   }
943 
944   fp_clamp (c);
945 }
946 
947 /* c = a mod b, 0 <= c < b  */
fp_mod(fp_int * a,fp_int * b,fp_int * c)948 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
949 {
950 #ifndef WOLFSSL_SMALL_STACK
951    fp_int t[1];
952 #else
953    fp_int *t;
954 #endif
955    int    err;
956 
957 #ifdef WOLFSSL_SMALL_STACK
958    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
959    if (t == NULL)
960        return FP_MEM;
961 #endif
962 
963    fp_init(t);
964    err = fp_div(a, b, NULL, t);
965    if (err == FP_OKAY) {
966       if (!fp_iszero(t) && (t->sign != b->sign)) {
967          err = fp_add(t, b, c);
968       } else {
969          fp_copy(t, c);
970      }
971   }
972 
973 #ifdef WOLFSSL_SMALL_STACK
974   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
975 #endif
976   return err;
977 }
978 
979 /* c = a mod 2**d */
fp_mod_2d(fp_int * a,int b,fp_int * c)980 void fp_mod_2d(fp_int *a, int b, fp_int *c)
981 {
982    int x;
983    int bmax;
984 
985    /* zero if count less than or equal to zero */
986    if (b <= 0) {
987       fp_zero(c);
988       return;
989    }
990 
991    /* get copy of input */
992    fp_copy(a, c);
993 
994    /* if 2**d is larger than we just return */
995    if (c->sign == FP_ZPOS && b >= (DIGIT_BIT * a->used)) {
996       return;
997    }
998 
999   bmax = (b + DIGIT_BIT - 1) / DIGIT_BIT;
1000   /* zero digits above the last digit of the modulus */
1001   for (x = bmax; x < c->used; x++) {
1002     c->dp[x] = 0;
1003   }
1004 
1005   if (c->sign == FP_NEG) {
1006      fp_digit carry = 0;
1007      /* negate value */
1008      for (x = 0; x < c->used; x++) {
1009          fp_digit next = c->dp[x] > 0;
1010          c->dp[x] = (fp_digit)0 - c->dp[x] - carry;
1011          carry |= next;
1012      }
1013      for (; x < bmax; x++) {
1014          c->dp[x] = (fp_digit)0 - carry;
1015      }
1016      c->used = bmax;
1017      c->sign = FP_ZPOS;
1018   }
1019 
1020   /* clear the digit that is not completely outside/inside the modulus */
1021   x = DIGIT_BIT - (b % DIGIT_BIT);
1022   if (x != DIGIT_BIT) {
1023      c->dp[bmax - 1] &= ~((fp_digit)0) >> x;
1024   }
1025 
1026   fp_clamp (c);
1027 }
1028 
fp_invmod_slow(fp_int * a,fp_int * b,fp_int * c)1029 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
1030 {
1031 #ifndef WOLFSSL_SMALL_STACK
1032   fp_int  x[1], y[1], u[1], v[1], A[1], B[1], C[1], D[1];
1033 #else
1034   fp_int  *x, *y, *u, *v, *A, *B, *C, *D;
1035 #endif
1036   int     err;
1037 
1038   /* b cannot be negative */
1039   if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
1040     return FP_VAL;
1041   }
1042   if (fp_iszero(a) == FP_YES) {
1043     return FP_VAL;
1044   }
1045 
1046 #ifdef WOLFSSL_SMALL_STACK
1047   x = (fp_int*)XMALLOC(sizeof(fp_int) * 8, NULL, DYNAMIC_TYPE_BIGINT);
1048   if (x == NULL) {
1049       return FP_MEM;
1050   }
1051   y = &x[1]; u = &x[2]; v = &x[3]; A = &x[4]; B = &x[5]; C = &x[6]; D = &x[7];
1052 #endif
1053 
1054   /* init temps */
1055   fp_init(x);    fp_init(y);
1056   fp_init(u);    fp_init(v);
1057   fp_init(A);    fp_init(B);
1058   fp_init(C);    fp_init(D);
1059 
1060   /* x = a, y = b */
1061   if ((err = fp_mod(a, b, x)) != FP_OKAY) {
1062   #ifdef WOLFSSL_SMALL_STACK
1063     XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1064   #endif
1065     return err;
1066   }
1067   fp_copy(b, y);
1068 
1069   /* 2. [modified] if x,y are both even then return an error! */
1070   if (fp_iseven(x) == FP_YES && fp_iseven(y) == FP_YES) {
1071   #ifdef WOLFSSL_SMALL_STACK
1072     XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1073   #endif
1074     return FP_VAL;
1075   }
1076 
1077   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1078   fp_copy (x, u);
1079   fp_copy (y, v);
1080   fp_set (A, 1);
1081   fp_set (D, 1);
1082 
1083 top:
1084   /* 4.  while u is even do */
1085   while (fp_iseven (u) == FP_YES) {
1086     /* 4.1 u = u/2 */
1087     fp_div_2 (u, u);
1088 
1089     /* 4.2 if A or B is odd then */
1090     if (fp_isodd (A) == FP_YES || fp_isodd (B) == FP_YES) {
1091       /* A = (A+y)/2, B = (B-x)/2 */
1092       err = fp_add (A, y, A);
1093       if (err != FP_OKAY) {
1094       #ifdef WOLFSSL_SMALL_STACK
1095         XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1096       #endif
1097         return err;
1098       }
1099       err = fp_sub (B, x, B);
1100       if (err != FP_OKAY) {
1101       #ifdef WOLFSSL_SMALL_STACK
1102         XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1103       #endif
1104         return err;
1105       }
1106     }
1107     /* A = A/2, B = B/2 */
1108     fp_div_2 (A, A);
1109     fp_div_2 (B, B);
1110   }
1111 
1112   /* 5.  while v is even do */
1113   while (fp_iseven (v) == FP_YES) {
1114     /* 5.1 v = v/2 */
1115     fp_div_2 (v, v);
1116 
1117     /* 5.2 if C or D is odd then */
1118     if (fp_isodd (C) == FP_YES || fp_isodd (D) == FP_YES) {
1119       /* C = (C+y)/2, D = (D-x)/2 */
1120       err = fp_add (C, y, C);
1121       if (err != FP_OKAY) {
1122       #ifdef WOLFSSL_SMALL_STACK
1123         XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1124       #endif
1125         return err;
1126       }
1127       err = fp_sub (D, x, D);
1128       if (err != FP_OKAY) {
1129       #ifdef WOLFSSL_SMALL_STACK
1130         XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1131       #endif
1132         return err;
1133       }
1134     }
1135     /* C = C/2, D = D/2 */
1136     fp_div_2 (C, C);
1137     fp_div_2 (D, D);
1138   }
1139 
1140   /* 6.  if u >= v then */
1141   if (fp_cmp (u, v) != FP_LT) {
1142     /* u = u - v, A = A - C, B = B - D */
1143     err = fp_sub (u, v, u);
1144     if (err != FP_OKAY) {
1145     #ifdef WOLFSSL_SMALL_STACK
1146       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1147     #endif
1148       return err;
1149     }
1150     err = fp_sub (A, C, A);
1151     if (err != FP_OKAY) {
1152     #ifdef WOLFSSL_SMALL_STACK
1153       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1154     #endif
1155       return err;
1156     }
1157     err = fp_sub (B, D, B);
1158     if (err != FP_OKAY) {
1159     #ifdef WOLFSSL_SMALL_STACK
1160       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1161     #endif
1162       return err;
1163     }
1164   } else {
1165     /* v - v - u, C = C - A, D = D - B */
1166     err = fp_sub (v, u, v);
1167     if (err != FP_OKAY) {
1168     #ifdef WOLFSSL_SMALL_STACK
1169       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1170     #endif
1171       return err;
1172     }
1173     err = fp_sub (C, A, C);
1174     if (err != FP_OKAY) {
1175     #ifdef WOLFSSL_SMALL_STACK
1176       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1177     #endif
1178       return err;
1179     }
1180     err = fp_sub (D, B, D);
1181     if (err != FP_OKAY) {
1182     #ifdef WOLFSSL_SMALL_STACK
1183       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1184     #endif
1185       return err;
1186     }
1187   }
1188 
1189   /* if not zero goto step 4 */
1190   if (fp_iszero (u) == FP_NO)
1191     goto top;
1192 
1193   /* now a = C, b = D, gcd == g*v */
1194 
1195   /* if v != 1 then there is no inverse */
1196   if (fp_cmp_d (v, 1) != FP_EQ) {
1197   #ifdef WOLFSSL_SMALL_STACK
1198     XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1199   #endif
1200     return FP_VAL;
1201   }
1202 
1203   /* if its too low */
1204   while (fp_cmp_d(C, 0) == FP_LT) {
1205     err = fp_add(C, b, C);
1206     if (err != FP_OKAY) {
1207     #ifdef WOLFSSL_SMALL_STACK
1208       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1209     #endif
1210       return err;
1211     }
1212   }
1213 
1214   /* too big */
1215   while (fp_cmp_mag(C, b) != FP_LT) {
1216     err = fp_sub(C, b, C);
1217     if (err != FP_OKAY) {
1218     #ifdef WOLFSSL_SMALL_STACK
1219       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1220     #endif
1221       return err;
1222     }
1223   }
1224 
1225   /* C is now the inverse */
1226   fp_copy(C, c);
1227 #ifdef WOLFSSL_SMALL_STACK
1228   XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1229 #endif
1230   return FP_OKAY;
1231 }
1232 
1233 /* c = 1/a (mod b) for odd b only */
fp_invmod(fp_int * a,fp_int * b,fp_int * c)1234 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
1235 {
1236 #ifndef WOLFSSL_SMALL_STACK
1237   fp_int  x[1], y[1], u[1], v[1], B[1], D[1];
1238 #else
1239   fp_int  *x, *y, *u, *v, *B, *D;
1240 #endif
1241   int     neg;
1242   int     err;
1243 
1244   if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
1245     return FP_VAL;
1246   }
1247 
1248   /* [modified] sanity check on "a" */
1249   if (fp_iszero(a) == FP_YES) {
1250     return FP_VAL; /* can not divide by 0 here */
1251   }
1252 
1253   /* 2. [modified] b must be odd   */
1254   if (fp_iseven(b) == FP_YES) {
1255     return fp_invmod_slow(a,b,c);
1256   }
1257 
1258 #ifdef WOLFSSL_SMALL_STACK
1259   x = (fp_int*)XMALLOC(sizeof(fp_int) * 6, NULL, DYNAMIC_TYPE_BIGINT);
1260   if (x == NULL) {
1261       return FP_MEM;
1262   }
1263   y = &x[1]; u = &x[2]; v = &x[3]; B = &x[4]; D = &x[5];
1264 #endif
1265 
1266   /* init all our temps */
1267   fp_init(x);  fp_init(y);
1268   fp_init(u);  fp_init(v);
1269   fp_init(B);  fp_init(D);
1270 
1271   if (fp_cmp(a, b) != MP_LT) {
1272     err = mp_mod(a, b, y);
1273     if (err != FP_OKAY) {
1274     #ifdef WOLFSSL_SMALL_STACK
1275       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1276     #endif
1277       return err;
1278     }
1279     a = y;
1280   }
1281 
1282   if (fp_iszero(a) == FP_YES) {
1283   #ifdef WOLFSSL_SMALL_STACK
1284     XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1285   #endif
1286     return FP_VAL;
1287   }
1288 
1289   /* x == modulus, y == value to invert */
1290   fp_copy(b, x);
1291 
1292   /* we need y = |a| */
1293   fp_abs(a, y);
1294 
1295   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1296   fp_copy(x, u);
1297   fp_copy(y, v);
1298   fp_set (D, 1);
1299 
1300 top:
1301   /* 4.  while u is even do */
1302   while (fp_iseven (u) == FP_YES) {
1303     /* 4.1 u = u/2 */
1304     fp_div_2 (u, u);
1305 
1306     /* 4.2 if B is odd then */
1307     if (fp_isodd (B) == FP_YES) {
1308       err = fp_sub (B, x, B);
1309       if (err != FP_OKAY) {
1310       #ifdef WOLFSSL_SMALL_STACK
1311         XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1312       #endif
1313         return err;
1314       }
1315     }
1316     /* B = B/2 */
1317     fp_div_2 (B, B);
1318   }
1319 
1320   /* 5.  while v is even do */
1321   while (fp_iseven (v) == FP_YES) {
1322     /* 5.1 v = v/2 */
1323     fp_div_2 (v, v);
1324 
1325     /* 5.2 if D is odd then */
1326     if (fp_isodd (D) == FP_YES) {
1327       /* D = (D-x)/2 */
1328       err = fp_sub (D, x, D);
1329       if (err != FP_OKAY) {
1330       #ifdef WOLFSSL_SMALL_STACK
1331         XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1332       #endif
1333         return err;
1334       }
1335     }
1336     /* D = D/2 */
1337     fp_div_2 (D, D);
1338   }
1339 
1340   /* 6.  if u >= v then */
1341   if (fp_cmp (u, v) != FP_LT) {
1342     /* u = u - v, B = B - D */
1343     err = fp_sub (u, v, u);
1344     if (err != FP_OKAY) {
1345     #ifdef WOLFSSL_SMALL_STACK
1346       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1347     #endif
1348       return err;
1349     }
1350     err = fp_sub (B, D, B);
1351     if (err != FP_OKAY) {
1352     #ifdef WOLFSSL_SMALL_STACK
1353       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1354     #endif
1355       return err;
1356     }
1357   } else {
1358     /* v - v - u, D = D - B */
1359     err = fp_sub (v, u, v);
1360     if (err != FP_OKAY) {
1361     #ifdef WOLFSSL_SMALL_STACK
1362       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1363     #endif
1364       return err;
1365     }
1366     err = fp_sub (D, B, D);
1367     if (err != FP_OKAY) {
1368     #ifdef WOLFSSL_SMALL_STACK
1369       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1370     #endif
1371       return err;
1372     }
1373   }
1374 
1375   /* if not zero goto step 4 */
1376   if (fp_iszero (u) == FP_NO) {
1377     goto top;
1378   }
1379 
1380   /* now a = C, b = D, gcd == g*v */
1381 
1382   /* if v != 1 then there is no inverse */
1383   if (fp_cmp_d (v, 1) != FP_EQ) {
1384   #ifdef WOLFSSL_SMALL_STACK
1385     XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1386   #endif
1387     return FP_VAL;
1388   }
1389 
1390   /* b is now the inverse */
1391   neg = a->sign;
1392   while (D->sign == FP_NEG) {
1393     err = fp_add (D, b, D);
1394     if (err != FP_OKAY) {
1395     #ifdef WOLFSSL_SMALL_STACK
1396       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1397     #endif
1398       return FP_OKAY;
1399     }
1400   }
1401   /* too big */
1402   while (fp_cmp_mag(D, b) != FP_LT) {
1403     err = fp_sub(D, b, D);
1404     if (err != FP_OKAY) {
1405     #ifdef WOLFSSL_SMALL_STACK
1406       XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1407     #endif
1408       return err;
1409     }
1410   }
1411   fp_copy (D, c);
1412   c->sign = neg;
1413 #ifdef WOLFSSL_SMALL_STACK
1414   XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1415 #endif
1416   return FP_OKAY;
1417 }
1418 
1419 #define CT_INV_MOD_PRE_CNT      8
1420 
1421 /* modulus (b) must be greater than 2 and a prime */
fp_invmod_mont_ct(fp_int * a,fp_int * b,fp_int * c,fp_digit mp)1422 int fp_invmod_mont_ct(fp_int *a, fp_int *b, fp_int *c, fp_digit mp)
1423 {
1424   int i, j, err = FP_OKAY;
1425 #ifndef WOLFSSL_SMALL_STACK
1426   fp_int t[1], e[1];
1427   fp_int pre[CT_INV_MOD_PRE_CNT];
1428 #else
1429   fp_int* t;
1430   fp_int* e;
1431   fp_int* pre;
1432 #endif
1433 
1434   if ((a->used * 2 > FP_SIZE) || (b->used * 2 > FP_SIZE)) {
1435     return FP_VAL;
1436   }
1437 
1438 #ifdef WOLFSSL_SMALL_STACK
1439   t = (fp_int*)XMALLOC(sizeof(fp_int) * (2 + CT_INV_MOD_PRE_CNT), NULL,
1440                                                            DYNAMIC_TYPE_BIGINT);
1441   if (t == NULL)
1442     return FP_MEM;
1443   e = t + 1;
1444   pre = t + 2;
1445 #endif
1446 
1447   fp_init(t);
1448   fp_init(e);
1449 
1450   fp_init(&pre[0]);
1451   fp_copy(a, &pre[0]);
1452   for (i = 1; i < CT_INV_MOD_PRE_CNT; i++) {
1453     fp_init(&pre[i]);
1454     err |= fp_sqr(&pre[i-1], &pre[i]);
1455     err |= fp_montgomery_reduce(&pre[i], b, mp);
1456     err |= fp_mul(&pre[i], a, &pre[i]);
1457     err |= fp_montgomery_reduce(&pre[i], b, mp);
1458   }
1459 
1460   fp_sub_d(b, 2, e);
1461   /* Highest bit is always set. */
1462   j = 1;
1463   for (i = fp_count_bits(e)-2; i >= 0; i--) {
1464       if (!fp_is_bit_set(e, i) || j == CT_INV_MOD_PRE_CNT)
1465           break;
1466       j++;
1467   }
1468   fp_copy(&pre[j-1], t);
1469   j = 0;
1470   for (; i >= 0; i--) {
1471     int set = fp_is_bit_set(e, i);
1472 
1473     if ((j == CT_INV_MOD_PRE_CNT) || (!set && j > 0)) {
1474       err |= fp_mul(t, &pre[j-1], t);
1475       err |= fp_montgomery_reduce(t, b, mp);
1476       j = 0;
1477     }
1478     err |= fp_sqr(t, t);
1479     err |= fp_montgomery_reduce(t, b, mp);
1480     j += set;
1481   }
1482   if (j > 0) {
1483     err |= fp_mul(t, &pre[j-1], c);
1484     err |= fp_montgomery_reduce(c, b, mp);
1485   }
1486   else
1487     fp_copy(t, c);
1488 
1489 #ifdef WOLFSSL_SMALL_STACK
1490   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
1491 #endif
1492 
1493   return err;
1494 }
1495 
1496 /* d = a * b (mod c) */
fp_mulmod(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1497 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1498 {
1499   int err;
1500 #ifndef WOLFSSL_SMALL_STACK
1501    fp_int t[1];
1502 #else
1503    fp_int *t;
1504 #endif
1505 
1506 #ifdef WOLFSSL_SMALL_STACK
1507    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
1508    if (t == NULL)
1509        return FP_MEM;
1510 #endif
1511 
1512   fp_init(t);
1513   err = fp_mul(a, b, t);
1514   if (err == FP_OKAY) {
1515   #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
1516     if (d->size < FP_SIZE) {
1517       err = fp_mod(t, c, t);
1518       fp_copy(t, d);
1519     } else
1520   #endif
1521     {
1522       err = fp_mod(t, c, d);
1523     }
1524   }
1525 
1526 #ifdef WOLFSSL_SMALL_STACK
1527   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
1528 #endif
1529   return err;
1530 }
1531 
1532 /* d = a - b (mod c) */
fp_submod(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1533 int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1534 {
1535   int err;
1536 #ifndef WOLFSSL_SMALL_STACK
1537    fp_int t[1];
1538 #else
1539    fp_int *t;
1540 #endif
1541 
1542 #ifdef WOLFSSL_SMALL_STACK
1543    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
1544    if (t == NULL)
1545        return FP_MEM;
1546 #endif
1547 
1548   fp_init(t);
1549   err = fp_sub(a, b, t);
1550   if (err == FP_OKAY) {
1551   #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
1552     if (d->size < FP_SIZE) {
1553       err = fp_mod(t, c, t);
1554       fp_copy(t, d);
1555     } else
1556   #endif
1557     {
1558       err = fp_mod(t, c, d);
1559     }
1560   }
1561 
1562 #ifdef WOLFSSL_SMALL_STACK
1563   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
1564 #endif
1565   return err;
1566 }
1567 
1568 /* d = a + b (mod c) */
fp_addmod(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1569 int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1570 {
1571   int err;
1572 #ifndef WOLFSSL_SMALL_STACK
1573    fp_int t[1];
1574 #else
1575    fp_int *t;
1576 #endif
1577 
1578 #ifdef WOLFSSL_SMALL_STACK
1579    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
1580    if (t == NULL)
1581        return FP_MEM;
1582 #endif
1583 
1584   fp_init(t);
1585   err = fp_add(a, b, t);
1586   if (err == FP_OKAY) {
1587   #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
1588     if (d->size < FP_SIZE) {
1589       err = fp_mod(t, c, t);
1590       fp_copy(t, d);
1591     } else
1592   #endif
1593     {
1594       err = fp_mod(t, c, d);
1595     }
1596   }
1597 
1598 #ifdef WOLFSSL_SMALL_STACK
1599   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
1600 #endif
1601   return err;
1602 }
1603 
1604 /* d = a - b (mod c) - constant time (a < c and b < c and all positive)
1605  * c and d must not be the same pointers.
1606  */
fp_submod_ct(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1607 int fp_submod_ct(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1608 {
1609   fp_sword w;
1610   fp_digit mask;
1611   int i;
1612 
1613   if (c->used + 1 > FP_SIZE) {
1614     return FP_VAL;
1615   }
1616   if (c == d) {
1617     return FP_VAL;
1618   }
1619 
1620   /* In constant time, subtract b from a putting result in d. */
1621   w = 0;
1622   for (i = 0; i < c->used; i++) {
1623     w         += a->dp[i];
1624     w         -= b->dp[i];
1625     d->dp[i]   = (fp_digit)w;
1626     w        >>= DIGIT_BIT;
1627   }
1628   w  += a->dp[i];
1629   w  -= b->dp[i];
1630   w >>= DIGIT_BIT;
1631   /* When w is negative then we need to add modulus to make result positive. */
1632   mask = (fp_digit)0 - (w < 0);
1633   /* Constant time, conditionally, add modulus to difference. */
1634   w = 0;
1635   for (i = 0; i < c->used; i++) {
1636     w         += d->dp[i];
1637     w         += c->dp[i] & mask;
1638     d->dp[i]   = (fp_digit)w;
1639     w        >>= DIGIT_BIT;
1640   }
1641   /* Result will always have digits equal to or less than those in modulus. */
1642   d->used = i;
1643   d->sign = FP_ZPOS;
1644   fp_clamp(d);
1645 
1646   return FP_OKAY;
1647 }
1648 
1649 /* d = a + b (mod c) - constant time (a < c and b < c and all positive)
1650  * c and d must not be the same pointers.
1651  */
fp_addmod_ct(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1652 int fp_addmod_ct(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1653 {
1654   fp_word  w;
1655   fp_sword s;
1656   fp_digit mask;
1657   int i;
1658 
1659   if (c == d) {
1660     return FP_VAL;
1661   }
1662 
1663   /* Add a to b into d. Do the subtract of modulus but don't store result.
1664    * When subtract result is negative, the overflow will be negative.
1665    * Only need to subtract mod when result is positive - overflow is positive.
1666    */
1667   w = 0;
1668   s = 0;
1669   for (i = 0; i < c->used; i++) {
1670     w         += a->dp[i];
1671     w         += b->dp[i];
1672     d->dp[i]   = (fp_digit)w;
1673     s         += (fp_digit)w;
1674     s         -= c->dp[i];
1675     w        >>= DIGIT_BIT;
1676     s        >>= DIGIT_BIT;
1677   }
1678   s += (fp_digit)w;
1679   /* s will be positive when subtracting modulus is needed. */
1680   mask = (fp_digit)0 - (s >= 0);
1681 
1682   /* Constant time, conditionally, subtract modulus from sum. */
1683   w = 0;
1684   for (i = 0; i < c->used; i++) {
1685     w        += c->dp[i] & mask;
1686     w         = d->dp[i] - w;
1687     d->dp[i]  = (fp_digit)w;
1688     w         = (w >> DIGIT_BIT)&1;
1689   }
1690   /* Result will always have digits equal to or less than those in modulus. */
1691   d->used = i;
1692   d->sign = FP_ZPOS;
1693   fp_clamp(d);
1694 
1695   return FP_OKAY;
1696 }
1697 
1698 #ifdef TFM_TIMING_RESISTANT
1699 
1700 #ifdef WC_RSA_NONBLOCK
1701 
1702 #ifdef WC_RSA_NONBLOCK_TIME
1703   /* User can override the check-time at build-time using the
1704    * FP_EXPTMOD_NB_CHECKTIME macro to define your own function */
1705   #ifndef FP_EXPTMOD_NB_CHECKTIME
1706     /* instruction count for each type of operation */
1707     /* array lookup is using TFM_EXPTMOD_NB_* states */
1708     static const word32 exptModNbInst[TFM_EXPTMOD_NB_COUNT] = {
1709     #ifdef TFM_PPC32
1710       #ifdef _DEBUG
1711         11098, 8701, 3971, 178394, 858093, 1040, 822, 178056, 181574, 90883, 184339, 236813
1712       #else
1713         7050,  2554, 3187, 43178,  200422, 384,  275, 43024,  43550,  30450, 46270,  61376
1714       #endif
1715     #elif defined(TFM_X86_64)
1716       #ifdef _DEBUG
1717         954, 2377, 858, 19027, 90840, 287, 407, 20140, 7874,  11385, 8005,  6151
1718       #else
1719         765, 1007, 771, 5216,  34993, 248, 193, 4975,  4201,  3947,  4275,  3811
1720       #endif
1721     #else /* software only fast math */
1722       #ifdef _DEBUG
1723         798, 2245, 802, 16657, 66920, 352, 186, 16997, 16145, 12789, 16742, 15006
1724       #else
1725         775, 1084, 783, 4692,  37510, 207, 183, 4374,  4392,  3097,  4442,  4079
1726       #endif
1727     #endif
1728     };
1729 
fp_exptmod_nb_checktime(exptModNb_t * nb)1730     static int fp_exptmod_nb_checktime(exptModNb_t* nb)
1731     {
1732       word32 totalInst;
1733 
1734       /* if no max time has been set then stop (do not block) */
1735       if (nb->maxBlockInst == 0 || nb->state >= TFM_EXPTMOD_NB_COUNT) {
1736         return TFM_EXPTMOD_NB_STOP;
1737       }
1738 
1739       /* if instruction table not set then use maxBlockInst as simple counter */
1740       if (exptModNbInst[nb->state] == 0) {
1741         if (++nb->totalInst < nb->maxBlockInst)
1742           return TFM_EXPTMOD_NB_CONTINUE;
1743 
1744         nb->totalInst = 0; /* reset counter */
1745         return TFM_EXPTMOD_NB_STOP;
1746       }
1747 
1748       /* get total instruction count including next operation */
1749       totalInst = nb->totalInst + exptModNbInst[nb->state];
1750       /* if the next operation can completed within the maximum then continue */
1751       if (totalInst <= nb->maxBlockInst) {
1752         return TFM_EXPTMOD_NB_CONTINUE;
1753       }
1754 
1755       return TFM_EXPTMOD_NB_STOP;
1756     }
1757     #define FP_EXPTMOD_NB_CHECKTIME(nb) fp_exptmod_nb_checktime((nb))
1758   #endif /* !FP_EXPTMOD_NB_CHECKTIME */
1759 #endif /* WC_RSA_NONBLOCK_TIME */
1760 
1761 /* non-blocking version of timing resistant fp_exptmod function */
1762 /* supports cache resistance */
fp_exptmod_nb(exptModNb_t * nb,fp_int * G,fp_int * X,fp_int * P,fp_int * Y)1763 int fp_exptmod_nb(exptModNb_t* nb, fp_int* G, fp_int* X, fp_int* P, fp_int* Y)
1764 {
1765   int err, ret = FP_WOULDBLOCK;
1766 
1767   if (nb == NULL)
1768     return FP_VAL;
1769 
1770 #ifdef WC_RSA_NONBLOCK_TIME
1771   nb->totalInst = 0;
1772   do {
1773     nb->totalInst += exptModNbInst[nb->state];
1774 #endif
1775 
1776   switch (nb->state) {
1777   case TFM_EXPTMOD_NB_INIT:
1778     /* now setup montgomery */
1779     if ((err = fp_montgomery_setup(P, &nb->mp)) != FP_OKAY) {
1780       nb->state = TFM_EXPTMOD_NB_INIT;
1781       return err;
1782     }
1783 
1784     /* init ints */
1785     fp_init(&nb->R[0]);
1786     fp_init(&nb->R[1]);
1787   #ifndef WC_NO_CACHE_RESISTANT
1788     fp_init(&nb->R[2]);
1789   #endif
1790     nb->state = TFM_EXPTMOD_NB_MONT;
1791     break;
1792 
1793   case TFM_EXPTMOD_NB_MONT:
1794     /* mod m -> R[0] */
1795     err = fp_montgomery_calc_normalization(&nb->R[0], P);
1796     if (err != FP_OKAY) {
1797       nb->state = TFM_EXPTMOD_NB_INIT;
1798       return err;
1799     }
1800 
1801     nb->state = TFM_EXPTMOD_NB_MONT_RED;
1802     break;
1803 
1804   case TFM_EXPTMOD_NB_MONT_RED:
1805     /* reduce G -> R[1] */
1806     if (fp_cmp_mag(P, G) != FP_GT) {
1807        /* G > P so we reduce it first */
1808        fp_mod(G, P, &nb->R[1]);
1809     } else {
1810        fp_copy(G, &nb->R[1]);
1811     }
1812 
1813     nb->state = TFM_EXPTMOD_NB_MONT_MUL;
1814     break;
1815 
1816   case TFM_EXPTMOD_NB_MONT_MUL:
1817     /* G (R[1]) * m (R[0]) */
1818     err = fp_mul(&nb->R[1], &nb->R[0], &nb->R[1]);
1819     if (err != FP_OKAY) {
1820       nb->state = TFM_EXPTMOD_NB_INIT;
1821       return err;
1822     }
1823 
1824     nb->state = TFM_EXPTMOD_NB_MONT_MOD;
1825     break;
1826 
1827   case TFM_EXPTMOD_NB_MONT_MOD:
1828     /* mod m */
1829     err = fp_div(&nb->R[1], P, NULL, &nb->R[1]);
1830     if (err != FP_OKAY) {
1831       nb->state = TFM_EXPTMOD_NB_INIT;
1832       return err;
1833     }
1834 
1835     nb->state = TFM_EXPTMOD_NB_MONT_MODCHK;
1836     break;
1837 
1838   case TFM_EXPTMOD_NB_MONT_MODCHK:
1839     /* m matches sign of (G * R mod m) */
1840     if (nb->R[1].sign != P->sign) {
1841        err = fp_add(&nb->R[1], P, &nb->R[1]);
1842        if (err != FP_OKAY) {
1843          nb->state = TFM_EXPTMOD_NB_INIT;
1844          return err;
1845        }
1846     }
1847 
1848     /* set initial mode and bit cnt */
1849     nb->bitcnt = 1;
1850     nb->buf    = 0;
1851     nb->digidx = X->used - 1;
1852 
1853     nb->state = TFM_EXPTMOD_NB_NEXT;
1854     break;
1855 
1856   case TFM_EXPTMOD_NB_NEXT:
1857     /* grab next digit as required */
1858     if (--nb->bitcnt == 0) {
1859       /* if nb->digidx == -1 we are out of digits so break */
1860       if (nb->digidx == -1) {
1861         nb->state = TFM_EXPTMOD_NB_RED;
1862         break;
1863       }
1864       /* read next digit and reset nb->bitcnt */
1865       nb->buf    = X->dp[nb->digidx--];
1866       nb->bitcnt = (int)DIGIT_BIT;
1867     }
1868 
1869     /* grab the next msb from the exponent */
1870     nb->y     = (int)(nb->buf >> (DIGIT_BIT - 1)) & 1;
1871     nb->buf <<= (fp_digit)1;
1872     nb->state = TFM_EXPTMOD_NB_MUL;
1873     FALL_THROUGH;
1874 
1875   case TFM_EXPTMOD_NB_MUL:
1876     fp_mul(&nb->R[0], &nb->R[1], &nb->R[nb->y^1]);
1877     nb->state = TFM_EXPTMOD_NB_MUL_RED;
1878     break;
1879 
1880   case TFM_EXPTMOD_NB_MUL_RED:
1881     err = fp_montgomery_reduce(&nb->R[nb->y^1], P, nb->mp);
1882     if (err != FP_OKAY) {
1883       nb->state = TFM_EXPTMOD_NB_INIT;
1884       return err;
1885     }
1886     nb->state = TFM_EXPTMOD_NB_SQR;
1887     break;
1888 
1889   case TFM_EXPTMOD_NB_SQR:
1890   #ifdef WC_NO_CACHE_RESISTANT
1891     err = fp_sqr(&nb->R[nb->y], &nb->R[nb->y]);
1892   #else
1893     fp_copy((fp_int*) ( ((wc_ptr_t)&nb->R[0] & wc_off_on_addr[nb->y^1]) +
1894                         ((wc_ptr_t)&nb->R[1] & wc_off_on_addr[nb->y]) ),
1895             &nb->R[2]);
1896     err = fp_sqr(&nb->R[2], &nb->R[2]);
1897   #endif /* WC_NO_CACHE_RESISTANT */
1898     if (err != FP_OKAY) {
1899       nb->state = TFM_EXPTMOD_NB_INIT;
1900       return err;
1901     }
1902 
1903     nb->state = TFM_EXPTMOD_NB_SQR_RED;
1904     break;
1905 
1906   case TFM_EXPTMOD_NB_SQR_RED:
1907   #ifdef WC_NO_CACHE_RESISTANT
1908     err = fp_montgomery_reduce(&nb->R[nb->y], P, nb->mp);
1909   #else
1910     err = fp_montgomery_reduce(&nb->R[2], P, nb->mp);
1911     fp_copy(&nb->R[2],
1912             (fp_int*) ( ((wc_ptr_t)&nb->R[0] & wc_off_on_addr[nb->y^1]) +
1913                         ((wc_ptr_t)&nb->R[1] & wc_off_on_addr[nb->y]) ) );
1914   #endif /* WC_NO_CACHE_RESISTANT */
1915     if (err != FP_OKAY) {
1916       nb->state = TFM_EXPTMOD_NB_INIT;
1917       return err;
1918     }
1919 
1920     nb->state = TFM_EXPTMOD_NB_NEXT;
1921     break;
1922 
1923   case TFM_EXPTMOD_NB_RED:
1924     /* final reduce */
1925     err = fp_montgomery_reduce(&nb->R[0], P, nb->mp);
1926     if (err != FP_OKAY) {
1927       nb->state = TFM_EXPTMOD_NB_INIT;
1928       return err;
1929     }
1930     fp_copy(&nb->R[0], Y);
1931 
1932     nb->state = TFM_EXPTMOD_NB_INIT;
1933     ret = FP_OKAY;
1934     break;
1935   } /* switch */
1936 
1937 #ifdef WC_RSA_NONBLOCK_TIME
1938   /* determine if maximum blocking time has been reached */
1939   } while (ret == FP_WOULDBLOCK &&
1940     FP_EXPTMOD_NB_CHECKTIME(nb) == TFM_EXPTMOD_NB_CONTINUE);
1941 #endif
1942 
1943   return ret;
1944 }
1945 
1946 #endif /* WC_RSA_NONBLOCK */
1947 
1948 
1949 /* timing resistant montgomery ladder based exptmod
1950    Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
1951    Cryptographic Hardware and Embedded Systems, CHES 2002
1952 */
_fp_exptmod_ct(fp_int * G,fp_int * X,int digits,fp_int * P,fp_int * Y)1953 static int _fp_exptmod_ct(fp_int * G, fp_int * X, int digits, fp_int * P,
1954                           fp_int * Y)
1955 {
1956 #ifndef WOLFSSL_SMALL_STACK
1957 #ifdef WC_NO_CACHE_RESISTANT
1958   fp_int   R[2];
1959 #else
1960   fp_int   R[3];   /* need a temp for cache resistance */
1961 #endif
1962 #else
1963    fp_int  *R;
1964 #endif
1965   fp_digit buf, mp;
1966   int      err, bitcnt, digidx, y;
1967 
1968   /* now setup montgomery  */
1969   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
1970      return err;
1971   }
1972 
1973 #ifdef WOLFSSL_SMALL_STACK
1974 #ifndef WC_NO_CACHE_RESISTANT
1975    R = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
1976 #else
1977    R = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
1978 #endif
1979    if (R == NULL)
1980        return FP_MEM;
1981 #endif
1982   fp_init(&R[0]);
1983   fp_init(&R[1]);
1984 #ifndef WC_NO_CACHE_RESISTANT
1985   fp_init(&R[2]);
1986 #endif
1987 
1988   /* now we need R mod m */
1989   err = fp_montgomery_calc_normalization (&R[0], P);
1990   if (err != FP_OKAY) {
1991   #ifdef WOLFSSL_SMALL_STACK
1992     XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
1993   #endif
1994     return err;
1995   }
1996 
1997   /* now set R[0][1] to G * R mod m */
1998   if (fp_cmp_mag(P, G) != FP_GT) {
1999      /* G > P so we reduce it first */
2000      err = fp_mod(G, P, &R[1]);
2001      if (err != FP_OKAY) {
2002 #ifdef WOLFSSL_SMALL_STACK
2003          XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2004 #endif
2005          return err;
2006      }
2007   } else {
2008      fp_copy(G, &R[1]);
2009   }
2010   err = fp_mulmod (&R[1], &R[0], P, &R[1]);
2011   if (err != FP_OKAY) {
2012 #ifdef WOLFSSL_SMALL_STACK
2013       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2014 #endif
2015       return err;
2016   }
2017 
2018   /* for j = t-1 downto 0 do
2019         r_!k = R0*R1; r_k = r_k^2
2020   */
2021 
2022   /* set initial mode and bit cnt */
2023   bitcnt = 1;
2024   buf    = 0;
2025   digidx = digits - 1;
2026 
2027   for (;;) {
2028     /* grab next digit as required */
2029     if (--bitcnt == 0) {
2030       /* if digidx == -1 we are out of digits so break */
2031       if (digidx == -1) {
2032         break;
2033       }
2034       /* read next digit and reset bitcnt */
2035       buf    = X->dp[digidx--];
2036       bitcnt = (int)DIGIT_BIT;
2037     }
2038 
2039     /* grab the next msb from the exponent */
2040     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2041     buf <<= (fp_digit)1;
2042 
2043 #ifdef WC_NO_CACHE_RESISTANT
2044     /* do ops */
2045     err = fp_mul(&R[0], &R[1], &R[y^1]);
2046     if (err != FP_OKAY) {
2047     #ifdef WOLFSSL_SMALL_STACK
2048       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2049     #endif
2050       return err;
2051     }
2052     err = fp_montgomery_reduce(&R[y^1], P, mp);
2053     if (err != FP_OKAY) {
2054     #ifdef WOLFSSL_SMALL_STACK
2055       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2056     #endif
2057       return err;
2058     }
2059 
2060     err = fp_sqr(&R[y], &R[y]);
2061     if (err != FP_OKAY) {
2062     #ifdef WOLFSSL_SMALL_STACK
2063       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2064     #endif
2065       return err;
2066     }
2067     err = fp_montgomery_reduce(&R[y], P, mp);
2068     if (err != FP_OKAY) {
2069     #ifdef WOLFSSL_SMALL_STACK
2070       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2071     #endif
2072       return err;
2073     }
2074 #else
2075     /* do ops */
2076     err = fp_mul(&R[0], &R[1], &R[2]);
2077     if (err != FP_OKAY) {
2078     #ifdef WOLFSSL_SMALL_STACK
2079       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2080     #endif
2081       return err;
2082     }
2083     err = fp_montgomery_reduce(&R[2], P, mp);
2084     if (err != FP_OKAY) {
2085     #ifdef WOLFSSL_SMALL_STACK
2086       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2087     #endif
2088       return err;
2089     }
2090     /* instead of using R[y^1] for mul, which leaks key bit to cache monitor,
2091      * use R[2] as temp, make sure address calc is constant, keep
2092      * &R[0] and &R[1] in cache */
2093     fp_copy(&R[2],
2094             (fp_int*) ( ((wc_ptr_t)&R[0] & wc_off_on_addr[y]) +
2095                         ((wc_ptr_t)&R[1] & wc_off_on_addr[y^1]) ) );
2096 
2097     /* instead of using R[y] for sqr, which leaks key bit to cache monitor,
2098      * use R[2] as temp, make sure address calc is constant, keep
2099      * &R[0] and &R[1] in cache */
2100     fp_copy((fp_int*) ( ((wc_ptr_t)&R[0] & wc_off_on_addr[y^1]) +
2101                         ((wc_ptr_t)&R[1] & wc_off_on_addr[y]) ),
2102             &R[2]);
2103     err = fp_sqr(&R[2], &R[2]);
2104     if (err != FP_OKAY) {
2105     #ifdef WOLFSSL_SMALL_STACK
2106       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2107     #endif
2108       return err;
2109     }
2110     err = fp_montgomery_reduce(&R[2], P, mp);
2111     if (err != FP_OKAY) {
2112     #ifdef WOLFSSL_SMALL_STACK
2113       XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2114     #endif
2115       return err;
2116     }
2117     fp_copy(&R[2],
2118             (fp_int*) ( ((wc_ptr_t)&R[0] & wc_off_on_addr[y^1]) +
2119                         ((wc_ptr_t)&R[1] & wc_off_on_addr[y]) ) );
2120 #endif /* WC_NO_CACHE_RESISTANT */
2121   }
2122 
2123    err = fp_montgomery_reduce(&R[0], P, mp);
2124    fp_copy(&R[0], Y);
2125 #ifdef WOLFSSL_SMALL_STACK
2126    XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2127 #endif
2128    return err;
2129 }
2130 
2131 #endif /* TFM_TIMING_RESISTANT */
2132 
2133 /* y = g**x (mod b)
2134  * Some restrictions... x must be positive and < b
2135  */
_fp_exptmod_nct(fp_int * G,fp_int * X,fp_int * P,fp_int * Y)2136 static int _fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2137 {
2138   fp_int  *res;
2139   fp_digit buf, mp;
2140   int      err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
2141 #ifndef WOLFSSL_NO_MALLOC
2142   fp_int  *M;
2143 #else
2144   fp_int   M[(1 << 6) + 1];
2145 #endif
2146 
2147   /* find window size */
2148   x = fp_count_bits (X);
2149   if (x <= 21) {
2150     winsize = 1;
2151   } else if (x <= 36) {
2152     winsize = 3;
2153   } else if (x <= 140) {
2154     winsize = 4;
2155   } else if (x <= 450) {
2156     winsize = 5;
2157   } else {
2158     winsize = 6;
2159   }
2160 
2161   /* now setup montgomery  */
2162   if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
2163      return err;
2164   }
2165 
2166 #ifndef WOLFSSL_NO_MALLOC
2167   /* only allocate space for what's needed for window plus res */
2168   M = (fp_int*)XMALLOC(sizeof(fp_int)*((1 << winsize) + 1), NULL,
2169                                                            DYNAMIC_TYPE_BIGINT);
2170   if (M == NULL) {
2171      return FP_MEM;
2172   }
2173 #endif
2174   res = &M[(word32)(1 << winsize)];
2175 
2176   /* init M array */
2177   for(x = 0; x < (1 << winsize); x++)
2178     fp_init(&M[x]);
2179 
2180   /* setup result */
2181   fp_init(res);
2182 
2183   /* create M table
2184    *
2185    * The M table contains powers of the input base, e.g. M[x] = G^x mod P
2186    *
2187    * The first half of the table is not computed though except for M[0] and M[1]
2188    */
2189 
2190   /* now we need R mod m */
2191   err = fp_montgomery_calc_normalization (res, P);
2192   if (err != FP_OKAY) {
2193 #ifndef WOLFSSL_NO_MALLOC
2194     XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2195 #endif
2196     return err;
2197   }
2198 
2199   /* now set M[1] to G * R mod m */
2200   if (fp_cmp_mag(P, G) != FP_GT) {
2201      /* G > P so we reduce it first */
2202      fp_mod(G, P, &M[1]);
2203   } else {
2204      fp_copy(G, &M[1]);
2205   }
2206   fp_mulmod (&M[1], res, P, &M[1]);
2207 
2208   /* compute the value at M[1<<(winsize-1)] by
2209    * squaring M[1] (winsize-1) times */
2210   fp_copy (&M[1], &M[(word32)(1 << (winsize - 1))]);
2211   for (x = 0; x < (winsize - 1); x++) {
2212     err = fp_sqr (&M[(word32)(1 << (winsize - 1))],
2213                   &M[(word32)(1 << (winsize - 1))]);
2214     if (err != FP_OKAY) {
2215 #ifndef WOLFSSL_NO_MALLOC
2216       XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2217 #endif
2218       return err;
2219     }
2220     err = fp_montgomery_reduce_ex(&M[(word32)(1 << (winsize - 1))], P, mp, 0);
2221     if (err != FP_OKAY) {
2222 #ifndef WOLFSSL_NO_MALLOC
2223       XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2224 #endif
2225       return err;
2226     }
2227   }
2228 
2229   /* create upper table */
2230   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2231     err = fp_mul(&M[x - 1], &M[1], &M[x]);
2232     if (err != FP_OKAY) {
2233 #ifndef WOLFSSL_NO_MALLOC
2234       XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2235 #endif
2236       return err;
2237     }
2238     err = fp_montgomery_reduce_ex(&M[x], P, mp, 0);
2239     if (err != FP_OKAY) {
2240 #ifndef WOLFSSL_NO_MALLOC
2241       XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2242 #endif
2243       return err;
2244     }
2245   }
2246 
2247   /* set initial mode and bit cnt */
2248   mode   = 0;
2249   bitcnt = (x % DIGIT_BIT) + 1;
2250   buf    = 0;
2251   digidx = X->used - 1;
2252   bitcpy = 0;
2253   bitbuf = 0;
2254 
2255   for (;;) {
2256     /* grab next digit as required */
2257     if (--bitcnt == 0) {
2258       /* if digidx == -1 we are out of digits so break */
2259       if (digidx == -1) {
2260         break;
2261       }
2262       /* read next digit and reset bitcnt */
2263       buf    = X->dp[digidx--];
2264       bitcnt = (int)DIGIT_BIT;
2265     }
2266 
2267     /* grab the next msb from the exponent */
2268     y     = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2269     buf <<= (fp_digit)1;
2270 
2271     /* if the bit is zero and mode == 0 then we ignore it
2272      * These represent the leading zero bits before the first 1 bit
2273      * in the exponent.  Technically this opt is not required but it
2274      * does lower the # of trivial squaring/reductions used
2275      */
2276     if (mode == 0 && y == 0) {
2277       continue;
2278     }
2279 
2280     /* if the bit is zero and mode == 1 then we square */
2281     if (mode == 1 && y == 0) {
2282       err = fp_sqr(res, res);
2283       if (err != FP_OKAY) {
2284 #ifndef WOLFSSL_NO_MALLOC
2285         XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2286 #endif
2287         return err;
2288       }
2289       err = fp_montgomery_reduce_ex(res, P, mp, 0);
2290       if (err != FP_OKAY) {
2291 #ifndef WOLFSSL_NO_MALLOC
2292         XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2293 #endif
2294         return err;
2295       }
2296       continue;
2297     }
2298 
2299     /* else we add it to the window */
2300     bitbuf |= (y << (winsize - ++bitcpy));
2301     mode    = 2;
2302 
2303     if (bitcpy == winsize) {
2304       /* ok window is filled so square as required and multiply  */
2305       /* square first */
2306       for (x = 0; x < winsize; x++) {
2307         err = fp_sqr(res, res);
2308         if (err != FP_OKAY) {
2309 #ifndef WOLFSSL_NO_MALLOC
2310           XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2311 #endif
2312           return err;
2313         }
2314         err = fp_montgomery_reduce_ex(res, P, mp, 0);
2315         if (err != FP_OKAY) {
2316 #ifndef WOLFSSL_NO_MALLOC
2317           XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2318 #endif
2319           return err;
2320         }
2321       }
2322 
2323       /* then multiply */
2324       err = fp_mul(res, &M[bitbuf], res);
2325       if (err != FP_OKAY) {
2326 #ifndef WOLFSSL_NO_MALLOC
2327         XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2328 #endif
2329         return err;
2330       }
2331       err = fp_montgomery_reduce_ex(res, P, mp, 0);
2332       if (err != FP_OKAY) {
2333 #ifndef WOLFSSL_NO_MALLOC
2334         XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2335 #endif
2336         return err;
2337       }
2338 
2339       /* empty window and reset */
2340       bitcpy = 0;
2341       bitbuf = 0;
2342       mode   = 1;
2343     }
2344   }
2345 
2346   /* if bits remain then square/multiply */
2347   if (mode == 2 && bitcpy > 0) {
2348     /* square then multiply if the bit is set */
2349     for (x = 0; x < bitcpy; x++) {
2350       err = fp_sqr(res, res);
2351       if (err != FP_OKAY) {
2352 #ifndef WOLFSSL_NO_MALLOC
2353         XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2354 #endif
2355         return err;
2356       }
2357       err = fp_montgomery_reduce_ex(res, P, mp, 0);
2358       if (err != FP_OKAY) {
2359 #ifndef WOLFSSL_NO_MALLOC
2360         XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2361 #endif
2362         return err;
2363       }
2364 
2365       /* get next bit of the window */
2366       bitbuf <<= 1;
2367       if ((bitbuf & (1 << winsize)) != 0) {
2368         /* then multiply */
2369         err = fp_mul(res, &M[1], res);
2370         if (err != FP_OKAY) {
2371 #ifndef WOLFSSL_NO_MALLOC
2372           XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2373 #endif
2374           return err;
2375         }
2376         err = fp_montgomery_reduce_ex(res, P, mp, 0);
2377         if (err != FP_OKAY) {
2378 #ifndef WOLFSSL_NO_MALLOC
2379           XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2380 #endif
2381           return err;
2382         }
2383       }
2384     }
2385   }
2386 
2387   /* fixup result if Montgomery reduction is used
2388    * recall that any value in a Montgomery system is
2389    * actually multiplied by R mod n.  So we have
2390    * to reduce one more time to cancel out the factor
2391    * of R.
2392    */
2393   err = fp_montgomery_reduce_ex(res, P, mp, 0);
2394 
2395   /* swap res with Y */
2396   fp_copy (res, Y);
2397 
2398 #ifndef WOLFSSL_NO_MALLOC
2399   XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2400 #endif
2401   return err;
2402 }
2403 
2404 
2405 #ifdef TFM_TIMING_RESISTANT
2406 #if DIGIT_BIT <= 16
2407     #define WINSIZE    2
2408 #elif DIGIT_BIT <= 32
2409     #define WINSIZE    3
2410 #elif DIGIT_BIT <= 64
2411     #define WINSIZE    4
2412 #elif DIGIT_BIT <= 128
2413     #define WINSIZE    5
2414 #endif
2415 
2416 /* y = 2**x (mod b)
2417  * Some restrictions... x must be positive and < b
2418  */
_fp_exptmod_base_2(fp_int * X,int digits,fp_int * P,fp_int * Y)2419 static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
2420                               fp_int * Y)
2421 {
2422   fp_digit buf, mp;
2423   int      err, bitbuf, bitcpy, bitcnt, digidx, x, y;
2424 #ifdef WOLFSSL_SMALL_STACK
2425   fp_int  *res;
2426   fp_int  *tmp;
2427 #else
2428   fp_int   res[1];
2429   fp_int   tmp[1];
2430 #endif
2431 
2432 #ifdef WOLFSSL_SMALL_STACK
2433   res = (fp_int*)XMALLOC(2*sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
2434   if (res == NULL) {
2435      return FP_MEM;
2436   }
2437   tmp = &res[1];
2438 #endif
2439 
2440   /* now setup montgomery  */
2441   if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
2442 #ifdef WOLFSSL_SMALL_STACK
2443      XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2444 #endif
2445      return err;
2446   }
2447 
2448   /* setup result */
2449   fp_init(res);
2450   fp_init(tmp);
2451 
2452   err = fp_mul_2d(P, 1 << WINSIZE, tmp);
2453   if (err != FP_OKAY) {
2454   #ifdef WOLFSSL_SMALL_STACK
2455     XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2456   #endif
2457     return err;
2458   }
2459 
2460   /* now we need R mod m */
2461   err = fp_montgomery_calc_normalization(res, P);
2462   if (err != FP_OKAY) {
2463   #ifdef WOLFSSL_SMALL_STACK
2464     XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2465   #endif
2466     return err;
2467   }
2468 
2469   /* Get the top bits left over after taking WINSIZE bits starting at the
2470    * least-significant.
2471    */
2472   digidx = digits - 1;
2473   bitcpy = (digits * DIGIT_BIT) % WINSIZE;
2474   if (bitcpy > 0) {
2475       bitcnt = (int)DIGIT_BIT - bitcpy;
2476       buf    = X->dp[digidx--];
2477       bitbuf = (int)(buf >> bitcnt);
2478       /* Multiply montgomery representation of 1 by 2 ^ top */
2479       err = fp_mul_2d(res, bitbuf, res);
2480       if (err != FP_OKAY) {
2481       #ifdef WOLFSSL_SMALL_STACK
2482         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2483       #endif
2484         return err;
2485       }
2486       err = fp_add(res, tmp, res);
2487       if (err != FP_OKAY) {
2488       #ifdef WOLFSSL_SMALL_STACK
2489         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2490       #endif
2491         return err;
2492       }
2493       err = fp_mod(res, P, res);
2494       if (err != FP_OKAY) {
2495       #ifdef WOLFSSL_SMALL_STACK
2496         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2497       #endif
2498         return err;
2499       }
2500       /* Move out bits used */
2501       buf  <<= bitcpy;
2502       bitcnt++;
2503   }
2504   else {
2505       bitcnt = 1;
2506       buf    = 0;
2507   }
2508 
2509   /* empty window and reset  */
2510   bitbuf = 0;
2511   bitcpy = 0;
2512 
2513   for (;;) {
2514     /* grab next digit as required */
2515     if (--bitcnt == 0) {
2516       /* if digidx == -1 we are out of digits so break */
2517       if (digidx == -1) {
2518         break;
2519       }
2520       /* read next digit and reset bitcnt */
2521       buf    = X->dp[digidx--];
2522       bitcnt = (int)DIGIT_BIT;
2523     }
2524 
2525     /* grab the next msb from the exponent */
2526     y       = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2527     buf   <<= (fp_digit)1;
2528     /* add bit to the window */
2529     bitbuf |= (y << (WINSIZE - ++bitcpy));
2530 
2531     if (bitcpy == WINSIZE) {
2532       /* ok window is filled so square as required and multiply  */
2533       /* square first */
2534       for (x = 0; x < WINSIZE; x++) {
2535         err = fp_sqr(res, res);
2536         if (err != FP_OKAY) {
2537         #ifdef WOLFSSL_SMALL_STACK
2538           XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2539         #endif
2540           return err;
2541         }
2542         err = fp_montgomery_reduce(res, P, mp);
2543         if (err != FP_OKAY) {
2544         #ifdef WOLFSSL_SMALL_STACK
2545           XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2546         #endif
2547           return err;
2548         }
2549       }
2550 
2551       /* then multiply by 2^bitbuf */
2552       err = fp_mul_2d(res, bitbuf, res);
2553       if (err != FP_OKAY) {
2554       #ifdef WOLFSSL_SMALL_STACK
2555         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2556       #endif
2557         return err;
2558       }
2559       /* Add in value to make mod operation take same time */
2560       err = fp_add(res, tmp, res);
2561       if (err != FP_OKAY) {
2562       #ifdef WOLFSSL_SMALL_STACK
2563         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2564       #endif
2565         return err;
2566       }
2567       err = fp_mod(res, P, res);
2568       if (err != FP_OKAY) {
2569       #ifdef WOLFSSL_SMALL_STACK
2570         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2571       #endif
2572         return err;
2573       }
2574 
2575       /* empty window and reset */
2576       bitcpy = 0;
2577       bitbuf = 0;
2578     }
2579   }
2580 
2581   /* fixup result if Montgomery reduction is used
2582    * recall that any value in a Montgomery system is
2583    * actually multiplied by R mod n.  So we have
2584    * to reduce one more time to cancel out the factor
2585    * of R.
2586    */
2587   err = fp_montgomery_reduce(res, P, mp);
2588 
2589   /* swap res with Y */
2590   fp_copy(res, Y);
2591 
2592 #ifdef WOLFSSL_SMALL_STACK
2593   XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2594 #endif
2595   return err;
2596 }
2597 
2598 #undef WINSIZE
2599 #else
2600 #if DIGIT_BIT < 16
2601     #define WINSIZE    3
2602 #elif DIGIT_BIT < 32
2603     #define WINSIZE    4
2604 #elif DIGIT_BIT < 64
2605     #define WINSIZE    5
2606 #elif DIGIT_BIT < 128
2607     #define WINSIZE    6
2608 #elif DIGIT_BIT == 128
2609     #define WINSIZE    7
2610 #endif
2611 
2612 /* y = 2**x (mod b)
2613  * Some restrictions... x must be positive and < b
2614  */
_fp_exptmod_base_2(fp_int * X,int digits,fp_int * P,fp_int * Y)2615 static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
2616                               fp_int * Y)
2617 {
2618   fp_digit buf, mp;
2619   int      err, bitbuf, bitcpy, bitcnt, digidx, x, y;
2620 #ifdef WOLFSSL_SMALL_STACK
2621   fp_int  *res;
2622 #else
2623   fp_int   res[1];
2624 #endif
2625 
2626   /* now setup montgomery  */
2627   if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
2628     return err;
2629   }
2630 
2631 #ifdef WOLFSSL_SMALL_STACK
2632   res = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
2633   if (res == NULL) {
2634      return FP_MEM;
2635   }
2636 #endif
2637 
2638   /* setup result */
2639   fp_init(res);
2640 
2641   /* now we need R mod m */
2642   err = fp_montgomery_calc_normalization(res, P);
2643   if (err != FP_OKAY) {
2644   #ifdef WOLFSSL_SMALL_STACK
2645     XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2646   #endif
2647     return err;
2648   }
2649 
2650   /* Get the top bits left over after taking WINSIZE bits starting at the
2651    * least-significant.
2652    */
2653   digidx = digits - 1;
2654   bitcpy = (digits * DIGIT_BIT) % WINSIZE;
2655   if (bitcpy > 0) {
2656       bitcnt = (int)DIGIT_BIT - bitcpy;
2657       buf    = X->dp[digidx--];
2658       bitbuf = (int)(buf >> bitcnt);
2659       /* Multiply montgomery representation of 1 by 2 ^ top */
2660       err = fp_mul_2d(res, bitbuf, res);
2661       if (err != FP_OKAY) {
2662       #ifdef WOLFSSL_SMALL_STACK
2663         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2664       #endif
2665         return err;
2666       }
2667       err = fp_mod(res, P, res);
2668       if (err != FP_OKAY) {
2669       #ifdef WOLFSSL_SMALL_STACK
2670         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2671       #endif
2672         return err;
2673       }
2674       /* Move out bits used */
2675       buf  <<= bitcpy;
2676       bitcnt++;
2677   }
2678   else {
2679       bitcnt = 1;
2680       buf    = 0;
2681   }
2682 
2683   /* empty window and reset  */
2684   bitbuf = 0;
2685   bitcpy = 0;
2686 
2687   for (;;) {
2688     /* grab next digit as required */
2689     if (--bitcnt == 0) {
2690       /* if digidx == -1 we are out of digits so break */
2691       if (digidx == -1) {
2692         break;
2693       }
2694       /* read next digit and reset bitcnt */
2695       buf    = X->dp[digidx--];
2696       bitcnt = (int)DIGIT_BIT;
2697     }
2698 
2699     /* grab the next msb from the exponent */
2700     y       = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2701     buf   <<= (fp_digit)1;
2702     /* add bit to the window */
2703     bitbuf |= (y << (WINSIZE - ++bitcpy));
2704 
2705     if (bitcpy == WINSIZE) {
2706       /* ok window is filled so square as required and multiply  */
2707       /* square first */
2708       for (x = 0; x < WINSIZE; x++) {
2709         err = fp_sqr(res, res);
2710         if (err != FP_OKAY) {
2711         #ifdef WOLFSSL_SMALL_STACK
2712           XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2713         #endif
2714           return err;
2715         }
2716         err = fp_montgomery_reduce(res, P, mp);
2717         if (err != FP_OKAY) {
2718         #ifdef WOLFSSL_SMALL_STACK
2719           XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2720         #endif
2721           return err;
2722         }
2723       }
2724 
2725       /* then multiply by 2^bitbuf */
2726       err = fp_mul_2d(res, bitbuf, res);
2727       if (err != FP_OKAY) {
2728       #ifdef WOLFSSL_SMALL_STACK
2729         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2730       #endif
2731         return err;
2732       }
2733       err = fp_mod(res, P, res);
2734       if (err != FP_OKAY) {
2735       #ifdef WOLFSSL_SMALL_STACK
2736         XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2737       #endif
2738         return err;
2739       }
2740 
2741       /* empty window and reset */
2742       bitcpy = 0;
2743       bitbuf = 0;
2744     }
2745   }
2746 
2747   /* fixup result if Montgomery reduction is used
2748    * recall that any value in a Montgomery system is
2749    * actually multiplied by R mod n.  So we have
2750    * to reduce one more time to cancel out the factor
2751    * of R.
2752    */
2753   err = fp_montgomery_reduce(res, P, mp);
2754 
2755   /* swap res with Y */
2756   fp_copy(res, Y);
2757 
2758 #ifdef WOLFSSL_SMALL_STACK
2759   XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2760 #endif
2761   return err;
2762 }
2763 
2764 #undef WINSIZE
2765 #endif
2766 
2767 
fp_exptmod(fp_int * G,fp_int * X,fp_int * P,fp_int * Y)2768 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2769 {
2770 
2771 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2772    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2773    int x = fp_count_bits (X);
2774 #endif
2775 
2776    /* handle modulus of zero and prevent overflows */
2777    if (fp_iszero(P) || (P->used > (FP_SIZE/2))) {
2778       return FP_VAL;
2779    }
2780    if (fp_isone(P)) {
2781       fp_set(Y, 0);
2782       return FP_OKAY;
2783    }
2784    if (fp_iszero(X)) {
2785       fp_set(Y, 1);
2786       return FP_OKAY;
2787    }
2788    if (fp_iszero(G)) {
2789       fp_set(Y, 0);
2790       return FP_OKAY;
2791    }
2792 
2793 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2794    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2795    if(x > EPS_RSA_EXPT_XBTIS) {
2796       return esp_mp_exptmod(G, X, x, P, Y);
2797    }
2798 #endif
2799 
2800    if (X->sign == FP_NEG) {
2801 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
2802       int    err;
2803    #ifndef WOLFSSL_SMALL_STACK
2804       fp_int tmp[2];
2805    #else
2806       fp_int *tmp;
2807    #endif
2808 
2809    #ifdef WOLFSSL_SMALL_STACK
2810       tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
2811       if (tmp == NULL)
2812           return FP_MEM;
2813    #endif
2814 
2815       /* yes, copy G and invmod it */
2816       fp_init_copy(&tmp[0], G);
2817       fp_init_copy(&tmp[1], P);
2818       tmp[1].sign = FP_ZPOS;
2819       err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
2820       if (err == FP_OKAY) {
2821          fp_copy(X, &tmp[1]);
2822          tmp[1].sign = FP_ZPOS;
2823 #ifdef TFM_TIMING_RESISTANT
2824          err =  _fp_exptmod_ct(&tmp[0], &tmp[1], tmp[1].used, P, Y);
2825 #else
2826          err =  _fp_exptmod_nct(&tmp[0], &tmp[1], P, Y);
2827 #endif
2828          if ((err == 0) && (P->sign == FP_NEG)) {
2829             err = fp_add(Y, P, Y);
2830          }
2831       }
2832    #ifdef WOLFSSL_SMALL_STACK
2833       XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2834    #endif
2835       return err;
2836 #else
2837       return FP_VAL;
2838 #endif
2839    }
2840    else if (G->used == 1 && G->dp[0] == 2) {
2841       return _fp_exptmod_base_2(X, X->used, P, Y);
2842    }
2843    else {
2844       /* Positive exponent so just exptmod */
2845 #ifdef TFM_TIMING_RESISTANT
2846       return _fp_exptmod_ct(G, X, X->used, P, Y);
2847 #else
2848       return _fp_exptmod_nct(G, X, P, Y);
2849 #endif
2850    }
2851 }
2852 
fp_exptmod_ex(fp_int * G,fp_int * X,int digits,fp_int * P,fp_int * Y)2853 int fp_exptmod_ex(fp_int * G, fp_int * X, int digits, fp_int * P, fp_int * Y)
2854 {
2855 
2856 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2857    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2858    int x = fp_count_bits (X);
2859 #endif
2860 
2861    if (fp_iszero(G)) {
2862       fp_set(Y, 0);
2863       return FP_OKAY;
2864    }
2865 
2866    /* prevent overflows */
2867    if (P->used > (FP_SIZE/2)) {
2868       return FP_VAL;
2869    }
2870 
2871 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2872    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2873    if(x > EPS_RSA_EXPT_XBTIS) {
2874       return esp_mp_exptmod(G, X, x, P, Y);
2875    }
2876 #endif
2877 
2878    if (X->sign == FP_NEG) {
2879 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
2880       int    err;
2881    #ifndef WOLFSSL_SMALL_STACK
2882       fp_int tmp[2];
2883    #else
2884       fp_int *tmp;
2885    #endif
2886 
2887    #ifdef WOLFSSL_SMALL_STACK
2888       tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2889       if (tmp == NULL)
2890           return FP_MEM;
2891    #endif
2892 
2893       /* yes, copy G and invmod it */
2894       fp_init_copy(&tmp[0], G);
2895       fp_init_copy(&tmp[1], P);
2896       tmp[1].sign = FP_ZPOS;
2897       err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
2898       if (err == FP_OKAY) {
2899          X->sign = FP_ZPOS;
2900 #ifdef TFM_TIMING_RESISTANT
2901          err =  _fp_exptmod_ct(&tmp[0], X, digits, P, Y);
2902 #else
2903          err =  _fp_exptmod_nct(&tmp[0], X, P, Y);
2904          (void)digits;
2905 #endif
2906          if (X != Y) {
2907             X->sign = FP_NEG;
2908          }
2909          if ((err == 0) && (P->sign == FP_NEG)) {
2910             err = fp_add(Y, P, Y);
2911          }
2912       }
2913    #ifdef WOLFSSL_SMALL_STACK
2914       XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
2915    #endif
2916       return err;
2917 #else
2918       return FP_VAL;
2919 #endif
2920    }
2921    else {
2922       /* Positive exponent so just exptmod */
2923 #ifdef TFM_TIMING_RESISTANT
2924       return _fp_exptmod_ct(G, X, digits, P, Y);
2925 #else
2926       return  _fp_exptmod_nct(G, X, P, Y);
2927 #endif
2928    }
2929 }
2930 
fp_exptmod_nct(fp_int * G,fp_int * X,fp_int * P,fp_int * Y)2931 int fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2932 {
2933 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2934    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2935    int x = fp_count_bits (X);
2936 #endif
2937 
2938    if (fp_iszero(G)) {
2939       fp_set(G, 0);
2940       return FP_OKAY;
2941    }
2942 
2943    /* prevent overflows */
2944    if (P->used > (FP_SIZE/2)) {
2945       return FP_VAL;
2946    }
2947 
2948 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2949    !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2950    if(x > EPS_RSA_EXPT_XBTIS) {
2951       return esp_mp_exptmod(G, X, x, P, Y);
2952    }
2953 #endif
2954 
2955    if (X->sign == FP_NEG) {
2956 #ifndef POSITIVE_EXP_ONLY  /* reduce stack if assume no negatives */
2957       int    err;
2958    #ifndef WOLFSSL_SMALL_STACK
2959       fp_int tmp[2];
2960    #else
2961       fp_int *tmp;
2962    #endif
2963 
2964    #ifdef WOLFSSL_SMALL_STACK
2965       tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2966       if (tmp == NULL)
2967           return FP_MEM;
2968    #endif
2969 
2970       /* yes, copy G and invmod it */
2971       fp_init_copy(&tmp[0], G);
2972       fp_init_copy(&tmp[1], P);
2973       tmp[1].sign = FP_ZPOS;
2974       err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
2975       if (err == FP_OKAY) {
2976          X->sign = FP_ZPOS;
2977          err =  _fp_exptmod_nct(&tmp[0], X, P, Y);
2978          if (X != Y) {
2979             X->sign = FP_NEG;
2980          }
2981          if ((err == 0) && (P->sign == FP_NEG)) {
2982             err = fp_add(Y, P, Y);
2983          }
2984       }
2985    #ifdef WOLFSSL_SMALL_STACK
2986       XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
2987    #endif
2988       return err;
2989 #else
2990       return FP_VAL;
2991 #endif
2992    }
2993    else {
2994       /* Positive exponent so just exptmod */
2995       return  _fp_exptmod_nct(G, X, P, Y);
2996    }
2997 }
2998 
2999 /* computes a = 2**b */
fp_2expt(fp_int * a,int b)3000 void fp_2expt(fp_int *a, int b)
3001 {
3002    int     z;
3003 
3004    /* zero a as per default */
3005    fp_zero (a);
3006 
3007    if (b < 0) {
3008       return;
3009    }
3010 
3011    z = b / DIGIT_BIT;
3012    if (z >= FP_SIZE) {
3013       return;
3014    }
3015 
3016   /* set the used count of where the bit will go */
3017   a->used = z + 1;
3018 
3019   /* put the single bit in its place */
3020   a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
3021 }
3022 
3023 /* b = a*a  */
fp_sqr(fp_int * A,fp_int * B)3024 int fp_sqr(fp_int *A, fp_int *B)
3025 {
3026     int err;
3027     int y, oldused;
3028 
3029     oldused = B->used;
3030     y = A->used;
3031 
3032     /* error if we're out of range */
3033     if (y + y >= FP_SIZE) {
3034        err = FP_VAL;
3035        goto clean;
3036     }
3037 
3038 #if defined(TFM_SQR3) && FP_SIZE >= 6
3039         if (y <= 3) {
3040            err = fp_sqr_comba3(A,B);
3041            goto clean;
3042         }
3043 #endif
3044 #if defined(TFM_SQR4) && FP_SIZE >= 8
3045         if (y == 4) {
3046            err = fp_sqr_comba4(A,B);
3047            goto clean;
3048         }
3049 #endif
3050 #if defined(TFM_SQR6) && FP_SIZE >= 12
3051         if (y <= 6) {
3052            err = fp_sqr_comba6(A,B);
3053            goto clean;
3054         }
3055 #endif
3056 #if defined(TFM_SQR7) && FP_SIZE >= 14
3057         if (y == 7) {
3058            err = fp_sqr_comba7(A,B);
3059            goto clean;
3060         }
3061 #endif
3062 #if defined(TFM_SQR8) && FP_SIZE >= 16
3063         if (y == 8) {
3064            err = fp_sqr_comba8(A,B);
3065            goto clean;
3066         }
3067 #endif
3068 #if defined(TFM_SQR9) && FP_SIZE >= 18
3069         if (y == 9) {
3070            err = fp_sqr_comba9(A,B);
3071            goto clean;
3072         }
3073 #endif
3074 #if defined(TFM_SQR12) && FP_SIZE >= 24
3075         if (y <= 12) {
3076            err = fp_sqr_comba12(A,B);
3077            goto clean;
3078         }
3079 #endif
3080 #if defined(TFM_SQR17) && FP_SIZE >= 34
3081         if (y <= 17) {
3082            err = fp_sqr_comba17(A,B);
3083            goto clean;
3084         }
3085 #endif
3086 #if defined(TFM_SMALL_SET)
3087         if (y <= 16) {
3088            err = fp_sqr_comba_small(A,B);
3089            goto clean;
3090         }
3091 #endif
3092 #if defined(TFM_SQR20) && FP_SIZE >= 40
3093         if (y <= 20) {
3094            err = fp_sqr_comba20(A,B);
3095            goto clean;
3096         }
3097 #endif
3098 #if defined(TFM_SQR24) && FP_SIZE >= 48
3099         if (y <= 24) {
3100            err = fp_sqr_comba24(A,B);
3101            goto clean;
3102         }
3103 #endif
3104 #if defined(TFM_SQR28) && FP_SIZE >= 56
3105         if (y <= 28) {
3106            err = fp_sqr_comba28(A,B);
3107            goto clean;
3108         }
3109 #endif
3110 #if defined(TFM_SQR32) && FP_SIZE >= 64
3111         if (y <= 32) {
3112            err = fp_sqr_comba32(A,B);
3113            goto clean;
3114         }
3115 #endif
3116 #if defined(TFM_SQR48) && FP_SIZE >= 96
3117         if (y <= 48) {
3118            err = fp_sqr_comba48(A,B);
3119            goto clean;
3120         }
3121 #endif
3122 #if defined(TFM_SQR64) && FP_SIZE >= 128
3123         if (y <= 64) {
3124            err = fp_sqr_comba64(A,B);
3125            goto clean;
3126         }
3127 #endif
3128        err = fp_sqr_comba(A, B);
3129 
3130 clean:
3131   /* zero any excess digits on the destination that we didn't write to */
3132   for (y = B->used; y >= 0 && y < oldused; y++) {
3133     B->dp[y] = 0;
3134   }
3135 
3136   return err;
3137 }
3138 
3139 /* generic comba squarer */
fp_sqr_comba(fp_int * A,fp_int * B)3140 int fp_sqr_comba(fp_int *A, fp_int *B)
3141 {
3142   int       pa, ix, iz;
3143   fp_digit  c0, c1, c2;
3144 #ifdef TFM_ISO
3145   fp_word   tt;
3146 #endif
3147    fp_int    *dst;
3148 #ifndef WOLFSSL_SMALL_STACK
3149    fp_int    tmp[1];
3150 #else
3151    fp_int    *tmp;
3152 #endif
3153 
3154 #ifdef WOLFSSL_SMALL_STACK
3155    tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
3156    if (tmp == NULL)
3157        return FP_MEM;
3158 #endif
3159 
3160   /* get size of output and trim */
3161   pa = A->used + A->used;
3162   if (pa >= FP_SIZE) {
3163      pa = FP_SIZE-1;
3164   }
3165 
3166   /* number of output digits to produce */
3167   COMBA_START;
3168   COMBA_CLEAR;
3169 
3170   if (A == B) {
3171      fp_init(tmp);
3172      dst = tmp;
3173   } else {
3174      fp_zero(B);
3175      dst = B;
3176   }
3177 
3178   for (ix = 0; ix < pa; ix++) {
3179       int      tx, ty, iy;
3180       fp_digit *tmpy, *tmpx;
3181 
3182       /* get offsets into the two bignums */
3183       ty = MIN(A->used-1, ix);
3184       tx = ix - ty;
3185 
3186       /* setup temp aliases */
3187       tmpx = A->dp + tx;
3188       tmpy = A->dp + ty;
3189 
3190       /* this is the number of times the loop will iterate,
3191          while (tx++ < a->used && ty-- >= 0) { ... }
3192        */
3193       iy = MIN(A->used-tx, ty+1);
3194 
3195       /* now for squaring tx can never equal ty
3196        * we halve the distance since they approach
3197        * at a rate of 2x and we have to round because
3198        * odd cases need to be executed
3199        */
3200       iy = MIN(iy, (ty-tx+1)>>1);
3201 
3202       /* forward carries */
3203       COMBA_FORWARD;
3204 
3205       /* execute loop */
3206       for (iz = 0; iz < iy; iz++) {
3207           SQRADD2(*tmpx++, *tmpy--);
3208       }
3209 
3210       /* even columns have the square term in them */
3211       if ((ix&1) == 0) {
3212           /* TAO change COMBA_ADD back to SQRADD */
3213           SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
3214       }
3215 
3216       /* store it */
3217       COMBA_STORE(dst->dp[ix]);
3218   }
3219 
3220   COMBA_FINI;
3221 
3222   /* setup dest */
3223   dst->used = pa;
3224   fp_clamp (dst);
3225   if (dst != B) {
3226      fp_copy(dst, B);
3227   }
3228 
3229   /* Variables used but not seen by cppcheck. */
3230   (void)c0; (void)c1; (void)c2;
3231 #ifdef TFM_ISO
3232   (void)tt;
3233 #endif
3234 
3235 #ifdef WOLFSSL_SMALL_STACK
3236   XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
3237 #endif
3238   return FP_OKAY;
3239 }
3240 
fp_cmp(fp_int * a,fp_int * b)3241 int fp_cmp(fp_int *a, fp_int *b)
3242 {
3243    if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
3244       return FP_LT;
3245    } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
3246       return FP_GT;
3247    } else {
3248       /* compare digits */
3249       if (a->sign == FP_NEG) {
3250          /* if negative compare opposite direction */
3251          return fp_cmp_mag(b, a);
3252       } else {
3253          return fp_cmp_mag(a, b);
3254       }
3255    }
3256 }
3257 
3258 /* compare against a single digit */
fp_cmp_d(fp_int * a,fp_digit b)3259 int fp_cmp_d(fp_int *a, fp_digit b)
3260 {
3261   /* special case for zero*/
3262   if (a->used == 0 && b == 0)
3263     return FP_EQ;
3264 
3265   /* compare based on sign */
3266   if ((b && a->used == 0) || a->sign == FP_NEG) {
3267     return FP_LT;
3268   }
3269 
3270   /* compare based on magnitude */
3271   if (a->used > 1) {
3272     return FP_GT;
3273   }
3274 
3275   /* compare the only digit of a to b */
3276   if (a->dp[0] > b) {
3277     return FP_GT;
3278   } else if (a->dp[0] < b) {
3279     return FP_LT;
3280   } else {
3281     return FP_EQ;
3282   }
3283 
3284 }
3285 
fp_cmp_mag(fp_int * a,fp_int * b)3286 int fp_cmp_mag(fp_int *a, fp_int *b)
3287 {
3288    int x;
3289 
3290    if (a->used > b->used) {
3291       return FP_GT;
3292    } else if (a->used < b->used) {
3293       return FP_LT;
3294    } else {
3295       for (x = a->used - 1; x >= 0; x--) {
3296           if (a->dp[x] > b->dp[x]) {
3297              return FP_GT;
3298           } else if (a->dp[x] < b->dp[x]) {
3299              return FP_LT;
3300           }
3301       }
3302    }
3303    return FP_EQ;
3304 }
3305 
3306 
3307 /* sets up the montgomery reduction */
fp_montgomery_setup(fp_int * a,fp_digit * rho)3308 int fp_montgomery_setup(fp_int *a, fp_digit *rho)
3309 {
3310   fp_digit x, b;
3311 
3312 /* fast inversion mod 2**k
3313  *
3314  * Based on the fact that
3315  *
3316  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
3317  *                    =>  2*X*A - X*X*A*A = 1
3318  *                    =>  2*(1) - (1)     = 1
3319  */
3320   b = a->dp[0];
3321 
3322   if ((b & 1) == 0) {
3323     return FP_VAL;
3324   }
3325 
3326   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3327   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
3328   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
3329   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
3330 #ifdef FP_64BIT
3331   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
3332 #endif
3333 
3334   /* rho = -1/m mod b */
3335   *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
3336 
3337   return FP_OKAY;
3338 }
3339 
3340 /* computes a = B**n mod b without division or multiplication useful for
3341  * normalizing numbers in a Montgomery system.
3342  */
fp_montgomery_calc_normalization(fp_int * a,fp_int * b)3343 int fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
3344 {
3345   int     x, bits;
3346 
3347   /* how many bits of last digit does b use */
3348   bits = fp_count_bits (b) % DIGIT_BIT;
3349   if (!bits) bits = DIGIT_BIT;
3350 
3351   /* compute A = B^(n-1) * 2^(bits-1) */
3352   if (b->used > 1) {
3353      fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
3354   } else {
3355      fp_set(a, 1);
3356      bits = 1;
3357   }
3358 
3359   /* now compute C = A * B mod b */
3360   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3361     int err = fp_mul_2 (a, a);
3362     if (err != FP_OKAY) {
3363       return err;
3364     }
3365     if (fp_cmp_mag (a, b) != FP_LT) {
3366       s_fp_sub (a, b, a);
3367     }
3368   }
3369   return FP_OKAY;
3370 }
3371 
3372 
3373 #ifdef TFM_SMALL_MONT_SET
3374     #include "fp_mont_small.i"
3375 #endif
3376 
3377 #ifdef HAVE_INTEL_MULX
innermul8_mulx(fp_digit * c_mulx,fp_digit * cy_mulx,fp_digit * tmpm,fp_digit mu)3378 static WC_INLINE void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu)
3379 {
3380     fp_digit cy = *cy_mulx ;
3381     INNERMUL8_MULX ;
3382     *cy_mulx = cy ;
3383 }
3384 
3385 /* computes x/R == x (mod N) via Montgomery Reduction */
fp_montgomery_reduce_mulx(fp_int * a,fp_int * m,fp_digit mp,int ct)3386 static int fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp, int ct)
3387 {
3388 #ifndef WOLFSSL_SMALL_STACK
3389    fp_digit c[FP_SIZE+1];
3390 #else
3391    fp_digit *c;
3392 #endif
3393    fp_digit *_c, *tmpm, mu = 0;
3394    int      oldused, x, y, pa;
3395 
3396    /* bail if too large */
3397    if (m->used > (FP_SIZE/2)) {
3398       (void)mu;                     /* shut up compiler */
3399       return FP_VAL;
3400    }
3401 
3402 #ifdef TFM_SMALL_MONT_SET
3403    if (m->used <= 16) {
3404       return fp_montgomery_reduce_small(a, m, mp);
3405    }
3406 #endif
3407 
3408 #ifdef WOLFSSL_SMALL_STACK
3409    /* only allocate space for what's needed for window plus res */
3410    c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT);
3411    if (c == NULL) {
3412       return FP_MEM;
3413    }
3414 #endif
3415 
3416    /* now zero the buff */
3417    XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1));
3418    pa = m->used;
3419 
3420    /* copy the input */
3421 #ifdef TFM_TIMING_RESISTANT
3422    if (a->used <= m->used) {
3423       oldused = m->used;
3424    }
3425    else {
3426       oldused = m->used * 2;
3427    }
3428 #else
3429    oldused = a->used;
3430 #endif
3431    for (x = 0; x < oldused; x++) {
3432        c[x] = a->dp[x];
3433    }
3434    MONT_START;
3435 
3436    for (x = 0; x < pa; x++) {
3437        fp_digit cy = 0;
3438        /* get Mu for this round */
3439        LOOP_START;
3440        _c   = c + x;
3441        tmpm = m->dp;
3442        y = 0;
3443         for (; y < (pa & ~7); y += 8) {
3444               innermul8_mulx(_c, &cy, tmpm, mu) ;
3445               _c   += 8;
3446               tmpm += 8;
3447            }
3448        for (; y < pa; y++) {
3449           INNERMUL;
3450           ++_c;
3451        }
3452        LOOP_END;
3453        while (cy) {
3454            PROPCARRY;
3455            ++_c;
3456        }
3457   }
3458 
3459   /* now copy out */
3460   _c   = c + pa;
3461   tmpm = a->dp;
3462   for (x = 0; x < pa+1; x++) {
3463      *tmpm++ = *_c++;
3464   }
3465 
3466   /* zero any excess digits on the destination that we didn't write to */
3467   for (; x < oldused; x++) {
3468      *tmpm++ = 0;
3469   }
3470 
3471   MONT_FINI;
3472 
3473   a->used = pa+1;
3474   fp_clamp(a);
3475 
3476 #ifndef WOLFSSL_MONT_RED_CT
3477   /* if A >= m then A = A - m */
3478   if (fp_cmp_mag (a, m) != FP_LT) {
3479     s_fp_sub (a, m, a);
3480   }
3481   (void)ct;
3482 #else
3483   if (ct) {
3484     fp_submod_ct(a, m, m, a);
3485   }
3486   else if (fp_cmp_mag (a, m) != FP_LT) {
3487     s_fp_sub (a, m, a);
3488   }
3489 #endif
3490 
3491 #ifdef WOLFSSL_SMALL_STACK
3492   XFREE(c, NULL, DYNAMIC_TYPE_BIGINT);
3493 #endif
3494   return FP_OKAY;
3495 }
3496 #endif
3497 
3498 /* computes x/R == x (mod N) via Montgomery Reduction */
fp_montgomery_reduce_ex(fp_int * a,fp_int * m,fp_digit mp,int ct)3499 int fp_montgomery_reduce_ex(fp_int *a, fp_int *m, fp_digit mp, int ct)
3500 {
3501 #ifndef WOLFSSL_SMALL_STACK
3502    fp_digit c[FP_SIZE+1];
3503 #else
3504    fp_digit *c;
3505 #endif
3506    fp_digit *_c, *tmpm, mu = 0;
3507    int      oldused, x, y, pa, err = 0;
3508 
3509    IF_HAVE_INTEL_MULX(err=fp_montgomery_reduce_mulx(a, m, mp, ct), return err) ;
3510    (void)err;
3511 
3512    /* bail if too large */
3513    if (m->used > (FP_SIZE/2)) {
3514       (void)mu;                     /* shut up compiler */
3515       return FP_VAL;
3516    }
3517 
3518 #ifdef TFM_SMALL_MONT_SET
3519    if (m->used <= 16) {
3520       return fp_montgomery_reduce_small(a, m, mp);
3521    }
3522 #endif
3523 
3524 #ifdef WOLFSSL_SMALL_STACK
3525    /* only allocate space for what's needed for window plus res */
3526    c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT);
3527    if (c == NULL) {
3528       return FP_MEM;
3529    }
3530 #endif
3531 
3532    /* now zero the buff */
3533    XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1));
3534    pa = m->used;
3535 
3536    /* copy the input */
3537 #ifdef TFM_TIMING_RESISTANT
3538    if (a->used <= m->used) {
3539       oldused = m->used;
3540    }
3541    else {
3542       oldused = m->used * 2;
3543    }
3544 #else
3545    oldused = a->used;
3546 #endif
3547    for (x = 0; x < oldused; x++) {
3548        c[x] = a->dp[x];
3549    }
3550    MONT_START;
3551 
3552    for (x = 0; x < pa; x++) {
3553        fp_digit cy = 0;
3554        /* get Mu for this round */
3555        LOOP_START;
3556        _c   = c + x;
3557        tmpm = m->dp;
3558        y = 0;
3559 #if defined(INNERMUL8)
3560         for (; y < (pa & ~7); y += 8) {
3561               INNERMUL8 ;
3562               _c   += 8;
3563               tmpm += 8;
3564            }
3565 #endif
3566        for (; y < pa; y++) {
3567           INNERMUL;
3568           ++_c;
3569        }
3570        LOOP_END;
3571        while (cy) {
3572            PROPCARRY;
3573            ++_c;
3574        }
3575   }
3576 
3577   /* now copy out */
3578   _c   = c + pa;
3579   tmpm = a->dp;
3580   for (x = 0; x < pa+1; x++) {
3581      *tmpm++ = *_c++;
3582   }
3583 
3584   /* zero any excess digits on the destination that we didn't write to */
3585   for (; x < oldused; x++) {
3586      *tmpm++ = 0;
3587   }
3588 
3589   MONT_FINI;
3590 
3591   a->used = pa+1;
3592   fp_clamp(a);
3593 
3594 #ifndef WOLFSSL_MONT_RED_CT
3595   /* if A >= m then A = A - m */
3596   if (fp_cmp_mag (a, m) != FP_LT) {
3597     s_fp_sub (a, m, a);
3598   }
3599   (void)ct;
3600 #else
3601   if (ct) {
3602     fp_submod_ct(a, m, m, a);
3603   }
3604   else if (fp_cmp_mag (a, m) != FP_LT) {
3605     s_fp_sub (a, m, a);
3606   }
3607 #endif
3608 
3609 #ifdef WOLFSSL_SMALL_STACK
3610   XFREE(c, NULL, DYNAMIC_TYPE_BIGINT);
3611 #endif
3612   return FP_OKAY;
3613 }
3614 
fp_montgomery_reduce(fp_int * a,fp_int * m,fp_digit mp)3615 int fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
3616 {
3617   return fp_montgomery_reduce_ex(a, m, mp, 1);
3618 }
3619 
fp_read_unsigned_bin(fp_int * a,const unsigned char * b,int c)3620 int fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c)
3621 {
3622 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
3623   const word32 maxC = (a->size * sizeof(fp_digit));
3624 #else
3625   const word32 maxC = (FP_SIZE * sizeof(fp_digit));
3626 #endif
3627 
3628   /* zero the int */
3629   fp_zero (a);
3630 
3631   /* if input b excess max, then truncate */
3632   if (c > 0 && (word32)c > maxC) {
3633      int excess = (c - maxC);
3634      c -= excess;
3635      b += excess;
3636   }
3637 
3638 /* Not both endian simultaneously */
3639 #if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER)
3640 #error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined.
3641 #endif
3642 
3643 #if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && \
3644     (defined(FP_32BIT) || defined(FP_64BIT))
3645 #ifdef FP_32BIT
3646   /* If we know the endianness of this architecture, and we're using
3647      32-bit fp_digits, we can optimize this */
3648   {
3649      unsigned char *pd = (unsigned char *)a->dp;
3650 
3651      a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
3652 #ifdef BIG_ENDIAN_ORDER
3653      {
3654        /* Use Duff's device to unroll the loop. */
3655        int idx = (c - 1) & ~3;
3656        switch (c % 4) {
3657        case 0:    do { pd[idx+0] = *b++; FALL_THROUGH;
3658        case 3:         pd[idx+1] = *b++; FALL_THROUGH;
3659        case 2:         pd[idx+2] = *b++; FALL_THROUGH;
3660        case 1:         pd[idx+3] = *b++;
3661                      idx -= 4;
3662                  } while ((c -= 4) > 0);
3663        }
3664      }
3665 #else
3666      /* read the bytes in one at a time. */
3667      for (c -= 1; c >= 0; c -= 1) {
3668        pd[c] = *b++;
3669      }
3670 #endif
3671   }
3672 #elif defined(FP_64BIT)
3673   /* If we know the endianness of this architecture, and we're using
3674      64-bit fp_digits, we can optimize this */
3675   {
3676      unsigned char *pd = (unsigned char *)a->dp;
3677 
3678      a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
3679 #ifdef BIG_ENDIAN_ORDER
3680      {
3681        /* Use Duff's device to unroll the loop. */
3682        int idx = (c - 1) & ~7;
3683        switch (c % 8) {
3684        case 0:    do { pd[idx+0] = *b++; FALL_THROUGH;
3685        case 7:         pd[idx+1] = *b++; FALL_THROUGH;
3686        case 6:         pd[idx+2] = *b++; FALL_THROUGH;
3687        case 5:         pd[idx+3] = *b++; FALL_THROUGH;
3688        case 4:         pd[idx+4] = *b++; FALL_THROUGH;
3689        case 3:         pd[idx+5] = *b++; FALL_THROUGH;
3690        case 2:         pd[idx+6] = *b++; FALL_THROUGH;
3691        case 1:         pd[idx+7] = *b++;
3692                      idx -= 8;
3693                  } while ((c -= 8) > 0);
3694        }
3695      }
3696 #else
3697      /* read the bytes in one at a time. */
3698      for (c -= 1; c >= 0; c -= 1) {
3699        pd[c] = *b++;
3700      }
3701 #endif
3702   }
3703 #endif
3704 #else
3705   /* read the bytes in one at a time - unknown number of bits in digit */
3706   for (; c > 0; c--) {
3707      int err = fp_mul_2d (a, 8, a);
3708      if (err != FP_OKAY) {
3709          return err;
3710      }
3711      a->dp[0] |= *b++;
3712 
3713      if (a->used == 0) {
3714          a->used = 1;
3715      }
3716   }
3717 #endif
3718   fp_clamp (a);
3719 
3720   return FP_OKAY;
3721 }
3722 
fp_to_unsigned_bin_at_pos(int x,fp_int * t,unsigned char * b)3723 int fp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
3724 {
3725 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
3726    int i;
3727    int j = 0;
3728    fp_digit n;
3729 
3730    for (i = 0; i < t->used-1; ) {
3731        b[x++] = (unsigned char)(t->dp[i] >> j);
3732        j += 8;
3733        i += j == DIGIT_BIT;
3734        j &= DIGIT_BIT - 1;
3735    }
3736    n = t->dp[i];
3737    while (n != 0) {
3738        b[x++] = (unsigned char)n;
3739        n >>= 8;
3740    }
3741    return x;
3742 #else
3743    while (fp_iszero (t) == FP_NO) {
3744       b[x++] = (unsigned char) (t->dp[0] & 255);
3745       fp_div_2d (t, 8, t, NULL);
3746   }
3747   return x;
3748 #endif
3749 }
3750 
fp_to_unsigned_bin(fp_int * a,unsigned char * b)3751 int fp_to_unsigned_bin(fp_int *a, unsigned char *b)
3752 {
3753   int     x;
3754 #ifndef WOLFSSL_SMALL_STACK
3755    fp_int t[1];
3756 #else
3757    fp_int *t;
3758 #endif
3759 
3760 #ifdef WOLFSSL_SMALL_STACK
3761    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
3762    if (t == NULL)
3763        return FP_MEM;
3764 #endif
3765 
3766   fp_init_copy(t, a);
3767 
3768   x = fp_to_unsigned_bin_at_pos(0, t, b);
3769   fp_reverse (b, x);
3770 
3771 #ifdef WOLFSSL_SMALL_STACK
3772   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
3773 #endif
3774   return FP_OKAY;
3775 }
3776 
fp_to_unsigned_bin_len(fp_int * a,unsigned char * b,int c)3777 int fp_to_unsigned_bin_len(fp_int *a, unsigned char *b, int c)
3778 {
3779 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
3780   int i = 0;
3781   int j = 0;
3782   int x;
3783 
3784   for (x=c-1; x >= 0 && i < a->used; x--) {
3785      b[x] = (unsigned char)(a->dp[i] >> j);
3786      j += 8;
3787      i += j == DIGIT_BIT;
3788      j &= DIGIT_BIT - 1;
3789   }
3790   for (; x >= 0; x--) {
3791      b[x] = 0;
3792   }
3793 
3794   return FP_OKAY;
3795 #else
3796   int     x;
3797 #ifndef WOLFSSL_SMALL_STACK
3798    fp_int t[1];
3799 #else
3800    fp_int *t;
3801 #endif
3802 
3803 #ifdef WOLFSSL_SMALL_STACK
3804    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
3805    if (t == NULL)
3806        return FP_MEM;
3807 #endif
3808 
3809   fp_init_copy(t, a);
3810 
3811   for (x = 0; x < c; x++) {
3812       b[x] = (unsigned char) (t->dp[0] & 255);
3813       fp_div_2d (t, 8, t, NULL);
3814   }
3815   fp_reverse (b, x);
3816 
3817 #ifdef WOLFSSL_SMALL_STACK
3818   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
3819 #endif
3820   return FP_OKAY;
3821 #endif
3822 }
3823 
fp_unsigned_bin_size(const fp_int * a)3824 int fp_unsigned_bin_size(const fp_int *a)
3825 {
3826   int     size = fp_count_bits (a);
3827   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3828 }
3829 
fp_set(fp_int * a,fp_digit b)3830 void fp_set(fp_int *a, fp_digit b)
3831 {
3832    fp_zero(a);
3833    a->dp[0] = b;
3834    a->used  = a->dp[0] ? 1 : 0;
3835 }
3836 
3837 
3838 #ifndef MP_SET_CHUNK_BITS
3839     #define MP_SET_CHUNK_BITS 4
3840 #endif
fp_set_int(fp_int * a,unsigned long b)3841 int fp_set_int(fp_int *a, unsigned long b)
3842 {
3843   int x;
3844 
3845   /* use direct fp_set if b is less than fp_digit max
3846    * If input max value of b down shift by 1 less than full range
3847    * fp_digit, then condition is always true. */
3848 #if ((ULONG_MAX >> (DIGIT_BIT-1)) > 0)
3849   if (b < FP_DIGIT_MAX)
3850 #endif
3851   {
3852     fp_set (a, (fp_digit)b);
3853     return FP_OKAY;
3854   }
3855 
3856   fp_zero (a);
3857 
3858   /* set chunk bits at a time */
3859   for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
3860     int err = fp_mul_2d (a, MP_SET_CHUNK_BITS, a);
3861     if (err != FP_OKAY)
3862         return err;
3863 
3864     /* OR in the top bits of the source */
3865     a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
3866                                   ((1 << MP_SET_CHUNK_BITS) - 1);
3867 
3868     /* shift the source up to the next chunk bits */
3869     b <<= MP_SET_CHUNK_BITS;
3870 
3871     /* ensure that digits are not clamped off */
3872     a->used += 1;
3873   }
3874 
3875   /* clamp digits */
3876   fp_clamp(a);
3877 
3878   return FP_OKAY;
3879 }
3880 
3881 /* check if a bit is set */
fp_is_bit_set(fp_int * a,fp_digit b)3882 int fp_is_bit_set (fp_int *a, fp_digit b)
3883 {
3884     fp_digit i;
3885 
3886     if (b > FP_MAX_BITS)
3887         return FP_VAL;
3888 
3889     i = b/DIGIT_BIT;
3890 
3891     if ((fp_digit)a->used < i)
3892         return 0;
3893 
3894     return (int)((a->dp[i] >> b%DIGIT_BIT) & (fp_digit)1);
3895 }
3896 
3897 /* set the b bit of a */
fp_set_bit(fp_int * a,fp_digit b)3898 int fp_set_bit (fp_int * a, fp_digit b)
3899 {
3900     fp_digit i;
3901 
3902     if (b > FP_MAX_BITS)
3903         return FP_VAL;
3904 
3905     i = b/DIGIT_BIT;
3906 
3907     /* set the used count of where the bit will go if required */
3908     if (a->used < (int)(i+1))
3909         a->used = (int)(i+1);
3910 
3911     /* put the single bit in its place */
3912     a->dp[i] |= ((fp_digit)1) << (b % DIGIT_BIT);
3913 
3914     return MP_OKAY;
3915 }
3916 
fp_count_bits(const fp_int * a)3917 int fp_count_bits (const fp_int * a)
3918 {
3919   int     r;
3920   fp_digit q;
3921 
3922   /* shortcut */
3923   if (a->used == 0) {
3924     return 0;
3925   }
3926 
3927   /* get number of digits and add that */
3928   r = (a->used - 1) * DIGIT_BIT;
3929 
3930   /* take the last digit and count the bits in it */
3931   q = a->dp[a->used - 1];
3932   while (q > ((fp_digit) 0)) {
3933     ++r;
3934     q >>= ((fp_digit) 1);
3935   }
3936 
3937   return r;
3938 }
3939 
fp_leading_bit(fp_int * a)3940 int fp_leading_bit(fp_int *a)
3941 {
3942     int bit = 0;
3943 
3944     if (a->used != 0) {
3945         fp_digit q = a->dp[a->used - 1];
3946         int qSz = sizeof(fp_digit);
3947 
3948         while (qSz > 0) {
3949             if ((unsigned char)q != 0)
3950                 bit = (q & 0x80) != 0;
3951             q >>= 8;
3952             qSz--;
3953         }
3954     }
3955 
3956     return bit;
3957 }
3958 
fp_lshd(fp_int * a,int x)3959 int fp_lshd(fp_int *a, int x)
3960 {
3961     int y;
3962 
3963     if (a->used + x > FP_SIZE) return FP_VAL;
3964 
3965     y = a->used + x - 1;
3966 
3967     /* store new size */
3968     a->used = y + 1;
3969 
3970     /* move digits */
3971     for (; y >= x; y--) {
3972         a->dp[y] = a->dp[y-x];
3973     }
3974 
3975     /* zero lower digits */
3976     for (; y >= 0; y--) {
3977         a->dp[y] = 0;
3978     }
3979 
3980     /* clamp digits */
3981     fp_clamp(a);
3982     return FP_OKAY;
3983 }
3984 
3985 
3986 /* right shift by bit count */
fp_rshb(fp_int * c,int x)3987 void fp_rshb(fp_int *c, int x)
3988 {
3989     fp_digit *tmpc, mask, shift;
3990     fp_digit r, rr;
3991     fp_digit D = x;
3992 
3993     /* shifting by a negative number not supported, and shifting by
3994      * zero changes nothing.
3995      */
3996     if (x <= 0) return;
3997 
3998     /* shift digits first if needed */
3999     if (x >= DIGIT_BIT) {
4000         fp_rshd(c, x / DIGIT_BIT);
4001         /* recalculate number of bits to shift */
4002         D = x % DIGIT_BIT;
4003         /* check if any more shifting needed */
4004         if (D == 0) return;
4005 
4006     }
4007 
4008     /* zero shifted is always zero */
4009     if (fp_iszero(c)) return;
4010 
4011     /* mask */
4012     mask = (((fp_digit)1) << D) - 1;
4013 
4014     /* shift for lsb */
4015     shift = DIGIT_BIT - D;
4016 
4017     /* alias */
4018     tmpc = c->dp + (c->used - 1);
4019 
4020     /* carry */
4021     r = 0;
4022     for (x = c->used - 1; x >= 0; x--) {
4023       /* get the lower  bits of this word in a temp */
4024       rr = *tmpc & mask;
4025 
4026       /* shift the current word and mix in the carry bits from previous word */
4027       *tmpc = (*tmpc >> D) | (r << shift);
4028       --tmpc;
4029 
4030       /* set the carry to the carry bits of the current word found above */
4031       r = rr;
4032     }
4033 
4034     /* clamp digits */
4035     fp_clamp(c);
4036 }
4037 
4038 
fp_rshd(fp_int * a,int x)4039 void fp_rshd(fp_int *a, int x)
4040 {
4041   int y;
4042 
4043   /* too many digits just zero and return */
4044   if (x >= a->used) {
4045      fp_zero(a);
4046      return;
4047   }
4048 
4049    /* shift */
4050    for (y = 0; y < a->used - x; y++) {
4051       a->dp[y] = a->dp[y+x];
4052    }
4053 
4054    /* zero rest */
4055    for (; y < a->used; y++) {
4056       a->dp[y] = 0;
4057    }
4058 
4059    /* decrement count */
4060    a->used -= x;
4061    fp_clamp(a);
4062 }
4063 
4064 /* reverse an array, used for radix code */
fp_reverse(unsigned char * s,int len)4065 void fp_reverse (unsigned char *s, int len)
4066 {
4067   int     ix, iy;
4068   unsigned char t;
4069 
4070   ix = 0;
4071   iy = len - 1;
4072   while (ix < iy) {
4073     t     = s[ix];
4074     s[ix] = s[iy];
4075     s[iy] = t;
4076     ++ix;
4077     --iy;
4078   }
4079 }
4080 
4081 
4082 /* c = a - b */
fp_sub_d(fp_int * a,fp_digit b,fp_int * c)4083 int fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
4084 {
4085 #ifndef WOLFSSL_SMALL_STACK
4086    fp_int    tmp[1];
4087 #else
4088    fp_int    *tmp;
4089 #endif
4090    int       err = FP_OKAY;
4091 
4092 #ifdef WOLFSSL_SMALL_STACK
4093    tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4094    if (tmp == NULL)
4095        return FP_MEM;
4096 #endif
4097 
4098    fp_init(tmp);
4099    fp_set(tmp, b);
4100 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4101    if (c->size < FP_SIZE) {
4102      err = fp_sub(a, tmp, tmp);
4103      fp_copy(tmp, c);
4104    } else
4105 #endif
4106    {
4107      err = fp_sub(a, tmp, c);
4108    }
4109 
4110 #ifdef WOLFSSL_SMALL_STACK
4111    XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
4112 #endif
4113    return err;
4114 }
4115 
4116 
4117 /* wolfSSL callers from normal lib */
4118 
4119 /* init a new mp_int */
mp_init(mp_int * a)4120 int mp_init (mp_int * a)
4121 {
4122   if (a)
4123     fp_init(a);
4124   return MP_OKAY;
4125 }
4126 
fp_init(fp_int * a)4127 void fp_init(fp_int *a)
4128 {
4129 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4130     a->size = FP_SIZE;
4131 #endif
4132 #ifdef HAVE_WOLF_BIGINT
4133     wc_bigint_init(&a->raw);
4134 #endif
4135     fp_zero(a);
4136 }
4137 
fp_zero(fp_int * a)4138 void fp_zero(fp_int *a)
4139 {
4140     int size;
4141     a->used = 0;
4142     a->sign = FP_ZPOS;
4143 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4144     size = a->size;
4145 #else
4146     size = FP_SIZE;
4147 #endif
4148     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
4149 }
4150 
fp_clear(fp_int * a)4151 void fp_clear(fp_int *a)
4152 {
4153     int size;
4154     a->used = 0;
4155     a->sign = FP_ZPOS;
4156 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4157     size = a->size;
4158 #else
4159     size = FP_SIZE;
4160 #endif
4161     XMEMSET(a->dp, 0, size * sizeof(fp_digit));
4162     fp_free(a);
4163 }
4164 
fp_forcezero(mp_int * a)4165 void fp_forcezero (mp_int * a)
4166 {
4167     int size;
4168     a->used = 0;
4169     a->sign = FP_ZPOS;
4170 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4171     size = a->size;
4172 #else
4173     size = FP_SIZE;
4174 #endif
4175     ForceZero(a->dp, size * sizeof(fp_digit));
4176 #ifdef HAVE_WOLF_BIGINT
4177     wc_bigint_zero(&a->raw);
4178 #endif
4179     fp_free(a);
4180 }
4181 
mp_forcezero(mp_int * a)4182 void mp_forcezero (mp_int * a)
4183 {
4184     fp_forcezero(a);
4185 }
4186 
fp_free(fp_int * a)4187 void fp_free(fp_int* a)
4188 {
4189 #ifdef HAVE_WOLF_BIGINT
4190     wc_bigint_free(&a->raw);
4191 #else
4192     (void)a;
4193 #endif
4194 }
4195 
4196 
4197 /* clear one (frees)  */
mp_clear(mp_int * a)4198 void mp_clear (mp_int * a)
4199 {
4200     if (a == NULL)
4201         return;
4202     fp_clear(a);
4203 }
4204 
mp_free(mp_int * a)4205 void mp_free(mp_int* a)
4206 {
4207     fp_free(a);
4208 }
4209 
4210 /* handle up to 6 inits */
mp_init_multi(mp_int * a,mp_int * b,mp_int * c,mp_int * d,mp_int * e,mp_int * f)4211 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d,
4212                   mp_int* e, mp_int* f)
4213 {
4214     if (a)
4215         fp_init(a);
4216     if (b)
4217         fp_init(b);
4218     if (c)
4219         fp_init(c);
4220     if (d)
4221         fp_init(d);
4222     if (e)
4223         fp_init(e);
4224     if (f)
4225         fp_init(f);
4226 
4227     return MP_OKAY;
4228 }
4229 
4230 /* high level addition (handles signs) */
mp_add(mp_int * a,mp_int * b,mp_int * c)4231 int mp_add (mp_int * a, mp_int * b, mp_int * c)
4232 {
4233   return fp_add(a, b, c);
4234 }
4235 
4236 /* high level subtraction (handles signs) */
mp_sub(mp_int * a,mp_int * b,mp_int * c)4237 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
4238 {
4239   return fp_sub(a, b, c);
4240 }
4241 
4242 /* high level multiplication (handles sign) */
4243 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_mul(mp_int * a,mp_int * b,mp_int * c)4244 int wolfcrypt_mp_mul(mp_int * a, mp_int * b, mp_int * c)
4245 #else
4246 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
4247 #endif
4248 {
4249   return fp_mul(a, b, c);
4250 }
4251 
mp_mul_d(mp_int * a,mp_digit b,mp_int * c)4252 int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
4253 {
4254   return fp_mul_d(a, b, c);
4255 }
4256 
4257 /* d = a * b (mod c) */
4258 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_mulmod(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4259 int wolfcrypt_mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
4260 #else
4261 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
4262 #endif
4263 {
4264  #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
4265     !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
4266     int A = fp_count_bits (a);
4267     int B = fp_count_bits (b);
4268 
4269     if( A >= ESP_RSA_MULM_BITS && B >= ESP_RSA_MULM_BITS)
4270         return esp_mp_mulmod(a, b, c, d);
4271     else
4272  #endif
4273    return fp_mulmod(a, b, c, d);
4274 }
4275 
4276 /* d = a - b (mod c) */
mp_submod(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4277 int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
4278 {
4279   return fp_submod(a, b, c, d);
4280 }
4281 
4282 /* d = a + b (mod c) */
mp_addmod(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4283 int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
4284 {
4285   return fp_addmod(a, b, c, d);
4286 }
4287 
4288 /* d = a - b (mod c) - constant time (a < c and b < c) */
mp_submod_ct(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4289 int mp_submod_ct(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
4290 {
4291   return fp_submod_ct(a, b, c, d);
4292 }
4293 
4294 /* d = a + b (mod c) - constant time (a < c and b < c) */
mp_addmod_ct(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4295 int mp_addmod_ct(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
4296 {
4297   return fp_addmod_ct(a, b, c, d);
4298 }
4299 
4300 /* c = a mod b, 0 <= c < b */
4301 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_mod(mp_int * a,mp_int * b,mp_int * c)4302 int wolfcrypt_mp_mod (mp_int * a, mp_int * b, mp_int * c)
4303 #else
4304 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
4305 #endif
4306 {
4307   return fp_mod (a, b, c);
4308 }
4309 
4310 /* hac 14.61, pp608 */
4311 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_invmod(mp_int * a,mp_int * b,mp_int * c)4312 int wolfcrypt_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
4313 #else
4314 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
4315 #endif
4316 {
4317   return fp_invmod(a, b, c);
4318 }
4319 
4320 /* hac 14.61, pp608 */
mp_invmod_mont_ct(mp_int * a,mp_int * b,mp_int * c,mp_digit mp)4321 int mp_invmod_mont_ct (mp_int * a, mp_int * b, mp_int * c, mp_digit mp)
4322 {
4323   return fp_invmod_mont_ct(a, b, c, mp);
4324 }
4325 
4326 /* this is a shell function that calls either the normal or Montgomery
4327  * exptmod functions.  Originally the call to the montgomery code was
4328  * embedded in the normal function but that wasted a lot of stack space
4329  * for nothing (since 99% of the time the Montgomery code would be called)
4330  */
4331 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_exptmod(mp_int * G,mp_int * X,mp_int * P,mp_int * Y)4332 int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
4333 #else
4334 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
4335 #endif
4336 {
4337   return fp_exptmod(G, X, P, Y);
4338 }
4339 
mp_exptmod_ex(mp_int * G,mp_int * X,int digits,mp_int * P,mp_int * Y)4340 int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
4341 {
4342   return fp_exptmod_ex(G, X, digits, P, Y);
4343 }
4344 
4345 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_exptmod_nct(mp_int * G,mp_int * X,mp_int * P,mp_int * Y)4346 int wolfcrypt_mp_exptmod_nct (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
4347 #else
4348 int mp_exptmod_nct (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
4349 #endif
4350 {
4351   return fp_exptmod_nct(G, X, P, Y);
4352 }
4353 
4354 
4355 /* compare two ints (signed)*/
mp_cmp(mp_int * a,mp_int * b)4356 int mp_cmp (mp_int * a, mp_int * b)
4357 {
4358   return fp_cmp(a, b);
4359 }
4360 
4361 /* compare a digit */
mp_cmp_d(mp_int * a,mp_digit b)4362 int mp_cmp_d(mp_int * a, mp_digit b)
4363 {
4364   return fp_cmp_d(a, b);
4365 }
4366 
4367 /* get the size for an unsigned equivalent */
mp_unsigned_bin_size(const mp_int * a)4368 int mp_unsigned_bin_size (const mp_int * a)
4369 {
4370   return fp_unsigned_bin_size(a);
4371 }
4372 
mp_to_unsigned_bin_at_pos(int x,fp_int * t,unsigned char * b)4373 int mp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
4374 {
4375   return fp_to_unsigned_bin_at_pos(x, t, b);
4376 }
4377 
4378 /* store in unsigned [big endian] format */
mp_to_unsigned_bin(mp_int * a,unsigned char * b)4379 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
4380 {
4381   return fp_to_unsigned_bin(a,b);
4382 }
4383 
mp_to_unsigned_bin_len(mp_int * a,unsigned char * b,int c)4384 int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
4385 {
4386   return fp_to_unsigned_bin_len(a, b, c);
4387 }
4388 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
mp_read_unsigned_bin(mp_int * a,const unsigned char * b,int c)4389 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
4390 {
4391   return fp_read_unsigned_bin(a, b, c);
4392 }
4393 
4394 
mp_sub_d(fp_int * a,fp_digit b,fp_int * c)4395 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
4396 {
4397   return fp_sub_d(a, b, c);
4398 }
4399 
mp_mul_2d(fp_int * a,int b,fp_int * c)4400 int mp_mul_2d(fp_int *a, int b, fp_int *c)
4401 {
4402   return fp_mul_2d(a, b, c);
4403 }
4404 
mp_2expt(fp_int * a,int b)4405 int mp_2expt(fp_int* a, int b)
4406 {
4407   fp_2expt(a, b);
4408   return MP_OKAY;
4409 }
4410 
mp_div(fp_int * a,fp_int * b,fp_int * c,fp_int * d)4411 int mp_div(fp_int * a, fp_int * b, fp_int * c, fp_int * d)
4412 {
4413   return fp_div(a, b, c, d);
4414 }
4415 
mp_div_2d(fp_int * a,int b,fp_int * c,fp_int * d)4416 int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d)
4417 {
4418   fp_div_2d(a, b, c, d);
4419   return MP_OKAY;
4420 }
4421 
fp_copy(const fp_int * a,fp_int * b)4422 void fp_copy(const fp_int *a, fp_int *b)
4423 {
4424     /* if source and destination are different */
4425     if (a != b) {
4426 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4427         /* verify a will fit in b */
4428         if (b->size >= a->used) {
4429             int x, oldused;
4430             oldused = b->used;
4431             b->used = a->used;
4432             b->sign = a->sign;
4433 
4434             XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit));
4435 
4436             /* zero any excess digits on the destination that we didn't write to */
4437             for (x = b->used; x >= 0 && x < oldused; x++) {
4438                 b->dp[x] = 0;
4439             }
4440         }
4441         else {
4442             /* TODO: Handle error case */
4443         }
4444 #else
4445         /* all dp's are same size, so do straight copy */
4446         b->used = a->used;
4447         b->sign = a->sign;
4448         XMEMCPY(b->dp, a->dp, FP_SIZE * sizeof(fp_digit));
4449 #endif
4450     }
4451 }
4452 
mp_init_copy(fp_int * a,fp_int * b)4453 int mp_init_copy(fp_int * a, fp_int * b)
4454 {
4455     fp_init_copy(a, b);
4456     return MP_OKAY;
4457 }
4458 
fp_init_copy(fp_int * a,fp_int * b)4459 void fp_init_copy(fp_int *a, fp_int* b)
4460 {
4461     if (a != b) {
4462         fp_init(a);
4463         fp_copy(b, a);
4464     }
4465 }
4466 
4467 /* fast math wrappers */
mp_copy(const fp_int * a,fp_int * b)4468 int mp_copy(const fp_int* a, fp_int* b)
4469 {
4470     fp_copy(a, b);
4471     return MP_OKAY;
4472 }
4473 
mp_isodd(mp_int * a)4474 int mp_isodd(mp_int* a)
4475 {
4476     return fp_isodd(a);
4477 }
4478 
mp_iszero(mp_int * a)4479 int mp_iszero(mp_int* a)
4480 {
4481     return fp_iszero(a);
4482 }
4483 
mp_count_bits(const mp_int * a)4484 int mp_count_bits (const mp_int* a)
4485 {
4486     return fp_count_bits(a);
4487 }
4488 
mp_leading_bit(mp_int * a)4489 int mp_leading_bit (mp_int* a)
4490 {
4491     return fp_leading_bit(a);
4492 }
4493 
mp_rshb(mp_int * a,int x)4494 void mp_rshb (mp_int* a, int x)
4495 {
4496     fp_rshb(a, x);
4497 }
4498 
mp_rshd(mp_int * a,int x)4499 void mp_rshd (mp_int* a, int x)
4500 {
4501     fp_rshd(a, x);
4502 }
4503 
mp_set_int(mp_int * a,unsigned long b)4504 int mp_set_int(mp_int *a, unsigned long b)
4505 {
4506     return fp_set_int(a, b);
4507 }
4508 
mp_is_bit_set(mp_int * a,mp_digit b)4509 int mp_is_bit_set (mp_int *a, mp_digit b)
4510 {
4511     return fp_is_bit_set(a, b);
4512 }
4513 
mp_set_bit(mp_int * a,mp_digit b)4514 int mp_set_bit(mp_int *a, mp_digit b)
4515 {
4516     return fp_set_bit(a, b);
4517 }
4518 
4519 #if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC) || !defined(NO_DH) || \
4520     !defined(NO_DSA) || !defined(NO_RSA)
4521 
4522 /* c = a * a (mod b) */
fp_sqrmod(fp_int * a,fp_int * b,fp_int * c)4523 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
4524 {
4525   int err;
4526 #ifndef WOLFSSL_SMALL_STACK
4527    fp_int t[1];
4528 #else
4529    fp_int *t;
4530 #endif
4531 
4532 #ifdef WOLFSSL_SMALL_STACK
4533    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4534    if (t == NULL)
4535        return FP_MEM;
4536 #endif
4537 
4538   fp_init(t);
4539   err = fp_sqr(a, t);
4540   if (err == FP_OKAY) {
4541   #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4542     if (c->size < FP_SIZE) {
4543       err = fp_mod(t, b, t);
4544       fp_copy(t, c);
4545     }
4546     else
4547   #endif
4548     {
4549       err = fp_mod(t, b, c);
4550     }
4551   }
4552 
4553 #ifdef WOLFSSL_SMALL_STACK
4554   XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
4555 #endif
4556   return err;
4557 }
4558 
4559 /* fast math conversion */
mp_sqrmod(mp_int * a,mp_int * b,mp_int * c)4560 int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
4561 {
4562     return fp_sqrmod(a, b, c);
4563 }
4564 
4565 /* fast math conversion */
mp_montgomery_calc_normalization(mp_int * a,mp_int * b)4566 int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
4567 {
4568     return fp_montgomery_calc_normalization(a, b);
4569 }
4570 
4571 #endif /* WOLFSSL_KEY_GEN || HAVE_ECC */
4572 
fp_cond_swap_ct(mp_int * a,mp_int * b,int c,int m)4573 static int fp_cond_swap_ct (mp_int * a, mp_int * b, int c, int m)
4574 {
4575     int i;
4576     mp_digit mask = (mp_digit)0 - m;
4577 #ifndef WOLFSSL_SMALL_STACK
4578     fp_int  t[1];
4579 #else
4580     fp_int* t;
4581 #endif
4582 
4583 #ifdef WOLFSSL_SMALL_STACK
4584    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4585    if (t == NULL)
4586        return FP_MEM;
4587 #endif
4588 
4589     t->used = (a->used ^ b->used) & mask;
4590     for (i = 0; i < c; i++) {
4591         t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
4592     }
4593     a->used ^= t->used;
4594     for (i = 0; i < c; i++) {
4595         a->dp[i] ^= t->dp[i];
4596     }
4597     b->used ^= t->used;
4598     for (i = 0; i < c; i++) {
4599         b->dp[i] ^= t->dp[i];
4600     }
4601 
4602 #ifdef WOLFSSL_SMALL_STACK
4603     XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
4604 #endif
4605     return FP_OKAY;
4606 }
4607 
4608 
4609 #if defined(WC_MP_TO_RADIX) || !defined(NO_DH) || !defined(NO_DSA) || \
4610     !defined(NO_RSA)
4611 
4612 #ifdef WOLFSSL_KEY_GEN
4613 /* swap the elements of two integers, for cases where you can't simply swap the
4614  * mp_int pointers around
4615  */
fp_exch(fp_int * a,fp_int * b)4616 static int fp_exch (fp_int * a, fp_int * b)
4617 {
4618 #ifndef WOLFSSL_SMALL_STACK
4619     fp_int  t[1];
4620 #else
4621     fp_int *t;
4622 #endif
4623 
4624 #ifdef WOLFSSL_SMALL_STACK
4625    t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4626    if (t == NULL)
4627        return FP_MEM;
4628 #endif
4629 
4630     *t = *a;
4631     *a = *b;
4632     *b = *t;
4633 
4634 #ifdef WOLFSSL_SMALL_STACK
4635     XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
4636 #endif
4637     return FP_OKAY;
4638 }
4639 #endif
4640 
4641 static const int lnz[16] = {
4642    4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4643 };
4644 
4645 /* Counts the number of lsbs which are zero before the first zero bit */
fp_cnt_lsb(fp_int * a)4646 int fp_cnt_lsb(fp_int *a)
4647 {
4648    int x;
4649    fp_digit q, qq;
4650 
4651    /* easy out */
4652    if (fp_iszero(a) == FP_YES) {
4653       return 0;
4654    }
4655 
4656    /* scan lower digits until non-zero */
4657    for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
4658    q = a->dp[x];
4659    x *= DIGIT_BIT;
4660 
4661    /* now scan this digit until a 1 is found */
4662    if ((q & 1) == 0) {
4663       do {
4664          qq  = q & 15;
4665          x  += lnz[qq];
4666          q >>= 4;
4667       } while (qq == 0);
4668    }
4669    return x;
4670 }
4671 
4672 
s_is_power_of_two(fp_digit b,int * p)4673 static int s_is_power_of_two(fp_digit b, int *p)
4674 {
4675    int x;
4676 
4677    /* fast return if no power of two */
4678    if ((b==0) || (b & (b-1))) {
4679       return FP_NO;
4680    }
4681 
4682    for (x = 0; x < DIGIT_BIT; x++) {
4683       if (b == (((fp_digit)1)<<x)) {
4684          *p = x;
4685          return FP_YES;
4686       }
4687    }
4688    return FP_NO;
4689 }
4690 
4691 /* a/b => cb + d == a */
fp_div_d(fp_int * a,fp_digit b,fp_int * c,fp_digit * d)4692 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
4693 {
4694 #ifndef WOLFSSL_SMALL_STACK
4695   fp_int   q[1];
4696 #else
4697   fp_int   *q;
4698 #endif
4699   fp_word  w;
4700   fp_digit t;
4701   int      ix;
4702 
4703   /* cannot divide by zero */
4704   if (b == 0) {
4705      return FP_VAL;
4706   }
4707 
4708   /* quick outs */
4709   if (b == 1 || fp_iszero(a) == FP_YES) {
4710      if (d != NULL) {
4711         *d = 0;
4712      }
4713      if (c != NULL) {
4714         fp_copy(a, c);
4715      }
4716      return FP_OKAY;
4717   }
4718 
4719   /* power of two ? */
4720   if (s_is_power_of_two(b, &ix) == FP_YES) {
4721      if (d != NULL) {
4722         *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
4723      }
4724      if (c != NULL) {
4725         fp_div_2d(a, ix, c, NULL);
4726      }
4727      return FP_OKAY;
4728   }
4729 
4730 #ifdef WOLFSSL_SMALL_STACK
4731   q = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4732   if (q == NULL)
4733       return FP_MEM;
4734 #endif
4735 
4736   fp_init(q);
4737 
4738   if (c != NULL) {
4739     q->used = a->used;
4740     q->sign = a->sign;
4741   }
4742 
4743   w = 0;
4744   for (ix = a->used - 1; ix >= 0; ix--) {
4745      w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
4746 
4747      if (w >= b) {
4748 #ifdef WOLFSSL_LINUXKM
4749         t = (fp_digit)w;
4750         /* Linux kernel macro for in-place 64 bit integer division. */
4751         do_div(t, b);
4752 #else
4753         t = (fp_digit)(w / b);
4754 #endif
4755         w -= ((fp_word)t) * ((fp_word)b);
4756       } else {
4757         t = 0;
4758       }
4759       if (c != NULL)
4760         q->dp[ix] = (fp_digit)t;
4761   }
4762 
4763   if (d != NULL) {
4764      *d = (fp_digit)w;
4765   }
4766 
4767   if (c != NULL) {
4768      fp_clamp(q);
4769      fp_copy(q, c);
4770   }
4771 
4772 #ifdef WOLFSSL_SMALL_STACK
4773   XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
4774 #endif
4775   return FP_OKAY;
4776 }
4777 
4778 
4779 /* c = a mod b, 0 <= c < b  */
fp_mod_d(fp_int * a,fp_digit b,fp_digit * c)4780 static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
4781 {
4782    return fp_div_d(a, b, NULL, c);
4783 }
4784 
mp_mod_d(fp_int * a,fp_digit b,fp_digit * c)4785 int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
4786 {
4787    return fp_mod_d(a, b, c);
4788 }
4789 
4790 #endif /* WC_MP_TO_RADIX || !NO_DH || !NO_DSA || !NO_RSA */
4791 
4792 
4793 #if !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA) || \
4794     defined(WOLFSSL_KEY_GEN)
4795 
4796 static int  fp_isprime_ex(fp_int *a, int t, int* result);
4797 
4798 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_prime_is_prime(mp_int * a,int t,int * result)4799 int wolfcrypt_mp_prime_is_prime(mp_int* a, int t, int* result)
4800 #else
4801 int mp_prime_is_prime(mp_int* a, int t, int* result)
4802 #endif
4803 {
4804     return fp_isprime_ex(a, t, result);
4805 }
4806 
4807 /* Miller-Rabin test of "a" to the base of "b" as described in
4808  * HAC pp. 139 Algorithm 4.24
4809  *
4810  * Sets result to 0 if definitely composite or 1 if probably prime.
4811  * Randomly the chance of error is no more than 1/4 and often
4812  * very much lower.
4813  */
fp_prime_miller_rabin_ex(fp_int * a,fp_int * b,int * result,fp_int * n1,fp_int * y,fp_int * r)4814 static int fp_prime_miller_rabin_ex(fp_int * a, fp_int * b, int *result,
4815   fp_int *n1, fp_int *y, fp_int *r)
4816 {
4817   int s, j;
4818   int err;
4819 
4820   /* default */
4821   *result = FP_NO;
4822 
4823   /* ensure b > 1 */
4824   if (fp_cmp_d(b, 1) != FP_GT) {
4825      return FP_OKAY;
4826   }
4827 
4828   /* get n1 = a - 1 */
4829   fp_copy(a, n1);
4830   err = fp_sub_d(n1, 1, n1);
4831   if (err != FP_OKAY) {
4832      return err;
4833   }
4834 
4835   /* set 2**s * r = n1 */
4836   fp_copy(n1, r);
4837 
4838   /* count the number of least significant bits
4839    * which are zero
4840    */
4841   s = fp_cnt_lsb(r);
4842 
4843   /* now divide n - 1 by 2**s */
4844   fp_div_2d (r, s, r, NULL);
4845 
4846   /* compute y = b**r mod a */
4847   fp_zero(y);
4848 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
4849                                                      defined(WOLFSSL_HAVE_SP_DH)
4850 #ifndef WOLFSSL_SP_NO_2048
4851   if (fp_count_bits(a) == 1024 && fp_isodd(a))
4852       err = sp_ModExp_1024(b, r, a, y);
4853   else if (fp_count_bits(a) == 2048 && fp_isodd(a))
4854       err = sp_ModExp_2048(b, r, a, y);
4855   else
4856 #endif
4857 #ifndef WOLFSSL_SP_NO_3072
4858   if (fp_count_bits(a) == 1536 && fp_isodd(a))
4859       err = sp_ModExp_1536(b, r, a, y);
4860   else if (fp_count_bits(a) == 3072 && fp_isodd(a))
4861       err = sp_ModExp_3072(b, r, a, y);
4862   else
4863 #endif
4864 #ifdef WOLFSSL_SP_4096
4865   if (fp_count_bits(a) == 4096 && fp_isodd(a))
4866       err = sp_ModExp_4096(b, r, a, y);
4867   else
4868 #endif
4869 #endif
4870       err = fp_exptmod(b, r, a, y);
4871    if (err != FP_OKAY) {
4872        return err;
4873    }
4874 
4875   /* if y != 1 and y != n1 do */
4876   if (fp_cmp_d (y, 1) != FP_EQ && fp_cmp (y, n1) != FP_EQ) {
4877     j = 1;
4878     /* while j <= s-1 and y != n1 */
4879     while ((j <= (s - 1)) && fp_cmp (y, n1) != FP_EQ) {
4880       err = fp_sqrmod (y, a, y);
4881       if (err != FP_OKAY)
4882          return err;
4883 
4884       /* if y == 1 then composite */
4885       if (fp_cmp_d (y, 1) == FP_EQ) {
4886          return FP_OKAY;
4887       }
4888       ++j;
4889     }
4890 
4891     /* if y != n1 then composite */
4892     if (fp_cmp (y, n1) != FP_EQ) {
4893        return FP_OKAY;
4894     }
4895   }
4896 
4897   /* probably prime now */
4898   *result = FP_YES;
4899 
4900   return FP_OKAY;
4901 }
4902 
fp_prime_miller_rabin(fp_int * a,fp_int * b,int * result)4903 static int fp_prime_miller_rabin(fp_int * a, fp_int * b, int *result)
4904 {
4905   int err;
4906 #ifndef WOLFSSL_SMALL_STACK
4907   fp_int  n1[1], y[1], r[1];
4908 #else
4909   fp_int *n1, *y, *r;
4910 #endif
4911 
4912 #ifdef WOLFSSL_SMALL_STACK
4913   n1 = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
4914   if (n1 == NULL) {
4915       return FP_MEM;
4916   }
4917   y = &n1[1]; r = &n1[2];
4918 #endif
4919 
4920   fp_init(n1);
4921   fp_init(y);
4922   fp_init(r);
4923 
4924   err = fp_prime_miller_rabin_ex(a, b, result, n1, y, r);
4925 
4926   fp_clear(n1);
4927   fp_clear(y);
4928   fp_clear(r);
4929 
4930 #ifdef WOLFSSL_SMALL_STACK
4931   XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT);
4932 #endif
4933 
4934   return err;
4935 }
4936 
4937 
4938 /* a few primes */
4939 static const fp_digit primes[FP_PRIME_SIZE] = {
4940   0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4941   0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4942   0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4943   0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
4944   0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4945   0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4946   0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4947   0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4948 
4949   0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4950   0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4951   0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4952   0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4953   0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4954   0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4955   0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4956   0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4957 
4958   0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4959   0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4960   0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4961   0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4962   0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4963   0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4964   0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4965   0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4966 
4967   0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4968   0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4969   0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4970   0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4971   0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4972   0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4973   0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4974   0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4975 };
4976 
fp_isprime_ex(fp_int * a,int t,int * result)4977 int fp_isprime_ex(fp_int *a, int t, int* result)
4978 {
4979 #ifndef WOLFSSL_SMALL_STACK
4980    fp_int   b[1];
4981 #else
4982    fp_int   *b;
4983 #endif
4984    fp_digit d;
4985    int      r, res;
4986    int      err;
4987 
4988    if (t <= 0 || t > FP_PRIME_SIZE) {
4989      *result = FP_NO;
4990      return FP_VAL;
4991    }
4992 
4993    if (fp_isone(a)) {
4994        *result = FP_NO;
4995        return FP_OKAY;
4996    }
4997 
4998    /* check against primes table */
4999    for (r = 0; r < FP_PRIME_SIZE; r++) {
5000        if (fp_cmp_d(a, primes[r]) == FP_EQ) {
5001            *result = FP_YES;
5002            return FP_OKAY;
5003        }
5004    }
5005 
5006    /* do trial division */
5007    for (r = 0; r < FP_PRIME_SIZE; r++) {
5008        res = fp_mod_d(a, primes[r], &d);
5009        if (res != MP_OKAY || d == 0) {
5010            *result = FP_NO;
5011            return res;
5012        }
5013    }
5014 
5015 #ifdef WOLFSSL_SMALL_STACK
5016   b = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
5017   if (b == NULL)
5018       return FP_MEM;
5019 #endif
5020    /* now do 't' miller rabins */
5021    fp_init(b);
5022    for (r = 0; r < t; r++) {
5023        fp_set(b, primes[r]);
5024        err = fp_prime_miller_rabin(a, b, &res);
5025        if ((err != FP_OKAY) || (res == FP_NO)) {
5026           *result = res;
5027        #ifdef WOLFSSL_SMALL_STACK
5028           XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5029        #endif
5030           return err;
5031        }
5032    }
5033    *result = FP_YES;
5034 #ifdef WOLFSSL_SMALL_STACK
5035    XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5036 #endif
5037    return FP_OKAY;
5038 }
5039 
5040 
5041 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_prime_is_prime_ex(mp_int * a,int t,int * result,WC_RNG * rng)5042 int wolfcrypt_mp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng)
5043 #else
5044 int mp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng)
5045 #endif
5046 {
5047     int ret = FP_YES;
5048     fp_digit d;
5049     int i;
5050 
5051     if (a == NULL || result == NULL || rng == NULL)
5052         return FP_VAL;
5053 
5054     if (fp_isone(a)) {
5055         *result = FP_NO;
5056         return FP_OKAY;
5057     }
5058 
5059     /* check against primes table */
5060     for (i = 0; i < FP_PRIME_SIZE; i++) {
5061         if (fp_cmp_d(a, primes[i]) == FP_EQ) {
5062             *result = FP_YES;
5063             return FP_OKAY;
5064         }
5065     }
5066 
5067     /* do trial division */
5068     for (i = 0; i < FP_PRIME_SIZE; i++) {
5069         if (fp_mod_d(a, primes[i], &d) == MP_OKAY) {
5070             if (d == 0) {
5071                 *result = FP_NO;
5072                 return FP_OKAY;
5073             }
5074         }
5075         else
5076             return FP_VAL;
5077     }
5078 
5079 #ifndef WC_NO_RNG
5080     /* now do a miller rabin with up to t random numbers, this should
5081      * give a (1/4)^t chance of a false prime. */
5082     {
5083     #ifndef WOLFSSL_SMALL_STACK
5084         fp_int b[1], c[1], n1[1], y[1], r[1];
5085         byte   base[FP_MAX_PRIME_SIZE];
5086     #else
5087         fp_int *b, *c, *n1, *y, *r;
5088         byte*  base;
5089     #endif
5090         word32 baseSz;
5091         int    err;
5092 
5093         baseSz = fp_count_bits(a);
5094         /* The base size is the number of bits / 8. One is added if the number
5095          * of bits isn't an even 8. */
5096         baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
5097 
5098     #ifndef WOLFSSL_SMALL_STACK
5099         if (baseSz > sizeof(base))
5100             return FP_MEM;
5101     #else
5102         base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5103         if (base == NULL)
5104             return FP_MEM;
5105 
5106         b = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
5107         if (b == NULL) {
5108             XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5109             return FP_MEM;
5110         }
5111         c = &b[1]; n1 = &b[2]; y= &b[3]; r = &b[4];
5112     #endif
5113 
5114         fp_init(b);
5115         fp_init(c);
5116         fp_init(n1);
5117         fp_init(y);
5118         fp_init(r);
5119 
5120         err = fp_sub_d(a, 2, c);
5121         if (err != FP_OKAY) {
5122         #ifdef WOLFSSL_SMALL_STACK
5123            XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5124            XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5125         #endif
5126            return err;
5127         }
5128         while (t > 0) {
5129             if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
5130             #ifdef WOLFSSL_SMALL_STACK
5131                XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5132                XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5133             #endif
5134                return err;
5135             }
5136 
5137             err = fp_read_unsigned_bin(b, base, baseSz);
5138             if (err != FP_OKAY) {
5139             #ifdef WOLFSSL_SMALL_STACK
5140                XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5141                XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5142             #endif
5143                return err;
5144             }
5145             if (fp_cmp_d(b, 2) != FP_GT || fp_cmp(b, c) != FP_LT) {
5146                 continue;
5147             }
5148 
5149             err = fp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
5150             if (err != FP_OKAY) {
5151             #ifdef WOLFSSL_SMALL_STACK
5152                XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5153                XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5154             #endif
5155                return err;
5156             }
5157             if (ret == FP_NO)
5158                 break;
5159             fp_zero(b);
5160             t--;
5161         }
5162 
5163         fp_clear(n1);
5164         fp_clear(y);
5165         fp_clear(r);
5166         fp_clear(b);
5167         fp_clear(c);
5168      #ifdef WOLFSSL_SMALL_STACK
5169         XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5170         XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5171      #endif
5172     }
5173 #else
5174     (void)t;
5175 #endif /* !WC_NO_RNG */
5176 
5177     *result = ret;
5178     return FP_OKAY;
5179 }
5180 #endif /* !NO_RSA || !NO_DSA || !NO_DH || WOLFSSL_KEY_GEN */
5181 
5182 
mp_cond_swap_ct(mp_int * a,mp_int * b,int c,int m)5183 int mp_cond_swap_ct(mp_int * a, mp_int * b, int c, int m)
5184 {
5185     return fp_cond_swap_ct(a, b, c, m);
5186 }
5187 
5188 #ifdef WOLFSSL_KEY_GEN
5189 
5190 static int  fp_gcd(fp_int *a, fp_int *b, fp_int *c);
5191 static int  fp_lcm(fp_int *a, fp_int *b, fp_int *c);
5192 static int  fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap);
5193 
mp_gcd(fp_int * a,fp_int * b,fp_int * c)5194 int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
5195 {
5196     return fp_gcd(a, b, c);
5197 }
5198 
5199 
mp_lcm(fp_int * a,fp_int * b,fp_int * c)5200 int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
5201 {
5202     return fp_lcm(a, b, c);
5203 }
5204 
mp_rand_prime(mp_int * N,int len,WC_RNG * rng,void * heap)5205 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
5206 {
5207     int err;
5208 
5209     err = fp_randprime(N, len, rng, heap);
5210     switch(err) {
5211         case FP_VAL:
5212             return MP_VAL;
5213         case FP_MEM:
5214             return MP_MEM;
5215         default:
5216             break;
5217     }
5218 
5219     return MP_OKAY;
5220 }
5221 
mp_exch(mp_int * a,mp_int * b)5222 int mp_exch (mp_int * a, mp_int * b)
5223 {
5224     return fp_exch(a, b);
5225 }
5226 
5227 
5228 
fp_randprime(fp_int * N,int len,WC_RNG * rng,void * heap)5229 int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap)
5230 {
5231     static const int USE_BBS = 1;
5232     int   err, type;
5233     int   isPrime = FP_YES;
5234         /* Assume the candidate is probably prime and then test until
5235          * it is proven composite. */
5236     byte* buf;
5237 
5238     (void)heap;
5239 
5240     /* get type */
5241     if (len < 0) {
5242         type = USE_BBS;
5243         len = -len;
5244     } else {
5245         type = 0;
5246     }
5247 
5248     /* allow sizes between 2 and 512 bytes for a prime size */
5249     if (len < 2 || len > 512) {
5250         return FP_VAL;
5251     }
5252 
5253     /* allocate buffer to work with */
5254     buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_TMP_BUFFER);
5255     if (buf == NULL) {
5256         return FP_MEM;
5257     }
5258     XMEMSET(buf, 0, len);
5259 
5260     do {
5261 #ifdef SHOW_GEN
5262         printf(".");
5263         fflush(stdout);
5264 #endif
5265         /* generate value */
5266         err = wc_RNG_GenerateBlock(rng, buf, len);
5267         if (err != 0) {
5268             XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
5269             return FP_VAL;
5270         }
5271 
5272         /* munge bits */
5273         buf[0]     |= 0x80 | 0x40;
5274         buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
5275 
5276         /* load value */
5277         err = fp_read_unsigned_bin(N, buf, len);
5278         if (err != 0) {
5279             XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
5280             return err;
5281         }
5282 
5283         /* test */
5284         /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
5285          * of a 1024-bit candidate being a false positive, when it is our
5286          * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
5287          * Using 8 because we've always used 8 */
5288         mp_prime_is_prime_ex(N, 8, &isPrime, rng);
5289     } while (isPrime == FP_NO);
5290 
5291     XMEMSET(buf, 0, len);
5292     XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
5293 
5294     return FP_OKAY;
5295 }
5296 
5297 /* c = [a, b] */
fp_lcm(fp_int * a,fp_int * b,fp_int * c)5298 int fp_lcm(fp_int *a, fp_int *b, fp_int *c)
5299 {
5300    int     err;
5301 #ifndef WOLFSSL_SMALL_STACK
5302    fp_int  t[2];
5303 #else
5304    fp_int  *t;
5305 #endif
5306 
5307    /* LCM of 0 and any number is undefined as 0 is not in the set of values
5308     * being used. */
5309    if (fp_iszero(a) == FP_YES || fp_iszero(b) == FP_YES) {
5310        return FP_VAL;
5311    }
5312 
5313 #ifdef WOLFSSL_SMALL_STACK
5314    t = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
5315    if (t == NULL) {
5316        return FP_MEM;
5317    }
5318 #endif
5319 
5320    fp_init(&t[0]);
5321    fp_init(&t[1]);
5322    err = fp_gcd(a, b, &t[0]);
5323    if (err == FP_OKAY) {
5324       if (fp_cmp_mag(a, b) == FP_GT) {
5325         err = fp_div(a, &t[0], &t[1], NULL);
5326         if (err == FP_OKAY)
5327           err = fp_mul(b, &t[1], c);
5328      } else {
5329         err = fp_div(b, &t[0], &t[1], NULL);
5330         if (err == FP_OKAY)
5331           err = fp_mul(a, &t[1], c);
5332      }
5333    }
5334 
5335 #ifdef WOLFSSL_SMALL_STACK
5336    XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5337 #endif
5338    return err;
5339 }
5340 
5341 
5342 
5343 /* c = (a, b) */
fp_gcd(fp_int * a,fp_int * b,fp_int * c)5344 int fp_gcd(fp_int *a, fp_int *b, fp_int *c)
5345 {
5346 #ifndef WOLFSSL_SMALL_STACK
5347    fp_int u[1], v[1], r[1];
5348 #else
5349    fp_int *u, *v, *r;
5350 #endif
5351 
5352    /* GCD of 0 and 0 is undefined as all integers divide 0. */
5353    if (fp_iszero(a) == FP_YES && fp_iszero(b) == FP_YES) {
5354        return FP_VAL;
5355    }
5356 
5357    /* either zero than gcd is the largest */
5358    if (fp_iszero (a) == FP_YES && fp_iszero (b) == FP_NO) {
5359      fp_abs (b, c);
5360      return FP_OKAY;
5361    }
5362    if (fp_iszero (a) == FP_NO && fp_iszero (b) == FP_YES) {
5363      fp_abs (a, c);
5364      return FP_OKAY;
5365    }
5366 
5367    /* optimized.  At this point if a == 0 then
5368     * b must equal zero too
5369     */
5370    if (fp_iszero (a) == FP_YES) {
5371      fp_zero(c);
5372      return FP_OKAY;
5373    }
5374 
5375 #ifdef WOLFSSL_SMALL_STACK
5376    u = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
5377    if (u == NULL) {
5378        return FP_MEM;
5379    }
5380    v = &u[1]; r = &u[2];
5381 #endif
5382 
5383    /* sort inputs */
5384    if (fp_cmp_mag(a, b) != FP_LT) {
5385       fp_init_copy(u, a);
5386       fp_init_copy(v, b);
5387    } else {
5388       fp_init_copy(u, b);
5389       fp_init_copy(v, a);
5390    }
5391 
5392    u->sign = FP_ZPOS;
5393    v->sign = FP_ZPOS;
5394 
5395    fp_init(r);
5396    while (fp_iszero(v) == FP_NO) {
5397       int err = fp_mod(u, v, r);
5398       if (err != MP_OKAY) {
5399 #ifdef WOLFSSL_SMALL_STACK
5400           XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
5401 #endif
5402         return err;
5403       }
5404       fp_copy(v, u);
5405       fp_copy(r, v);
5406    }
5407    fp_copy(u, c);
5408 
5409 #ifdef WOLFSSL_SMALL_STACK
5410    XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
5411 #endif
5412    return FP_OKAY;
5413 }
5414 
5415 #endif /* WOLFSSL_KEY_GEN */
5416 
5417 
5418 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(OPENSSL_EXTRA) || \
5419     defined(WC_RSA_BLINDING) || !defined(NO_DSA) || \
5420     (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
5421 /* c = a + b */
fp_add_d(fp_int * a,fp_digit b,fp_int * c)5422 int fp_add_d(fp_int *a, fp_digit b, fp_int *c)
5423 {
5424 #ifndef WOLFSSL_SMALL_STACK
5425    fp_int  tmp[1];
5426 #else
5427    fp_int* tmp;
5428 #endif
5429    int     err;
5430 
5431 #ifdef WOLFSSL_SMALL_STACK
5432    tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
5433    if (tmp == NULL)
5434        return FP_MEM;
5435 #endif
5436 
5437    fp_init(tmp);
5438    fp_set(tmp, b);
5439    err = fp_add(a, tmp, c);
5440 
5441 #ifdef WOLFSSL_SMALL_STACK
5442    XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
5443 #endif
5444    return err;
5445 }
5446 
5447 /* external compatibility */
mp_add_d(fp_int * a,fp_digit b,fp_int * c)5448 int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
5449 {
5450     return fp_add_d(a, b, c);
5451 }
5452 
5453 #endif  /* HAVE_ECC || !NO_PWDBASED || OPENSSL_EXTRA || WC_RSA_BLINDING ||
5454   !NO_DSA || (!NO_RSA && !NO_RSA_BOUNDS_CHECK) */
5455 
5456 
5457 #if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
5458     defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
5459     defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
5460 
5461 /* chars used in radix conversions */
5462 static wcchar fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
5463                                     "abcdefghijklmnopqrstuvwxyz+/";
5464 #endif
5465 
5466 #if !defined(NO_DSA) || defined(HAVE_ECC)
5467 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
fp_read_radix_16(fp_int * a,const char * str)5468 static int fp_read_radix_16(fp_int *a, const char *str)
5469 {
5470   int     i, j, k, neg;
5471   int     ch;
5472 
5473   /* if the leading digit is a
5474    * minus set the sign to negative.
5475    */
5476   if (*str == '-') {
5477     ++str;
5478     neg = FP_NEG;
5479   } else {
5480     neg = FP_ZPOS;
5481   }
5482 
5483   j = 0;
5484   k = 0;
5485   for (i = (int)(XSTRLEN(str) - 1); i >= 0; i--) {
5486       ch = (int)HexCharToByte(str[i]);
5487       if (ch < 0) {
5488         return FP_VAL;
5489       }
5490 
5491       k += j == DIGIT_BIT;
5492       j &= DIGIT_BIT - 1;
5493       if (k >= FP_SIZE)
5494           return FP_VAL;
5495 
5496       a->dp[k] |= ((fp_digit)ch) << j;
5497       j += 4;
5498   }
5499 
5500   a->used = k + 1;
5501   fp_clamp(a);
5502   /* set the sign only if a != 0 */
5503   if (fp_iszero(a) != FP_YES) {
5504      a->sign = neg;
5505   }
5506   return FP_OKAY;
5507 }
5508 #endif
5509 
fp_read_radix(fp_int * a,const char * str,int radix)5510 static int fp_read_radix(fp_int *a, const char *str, int radix)
5511 {
5512   int     y, neg;
5513   char    ch;
5514 
5515   /* set the integer to the default of zero */
5516   fp_zero (a);
5517 
5518 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
5519   if (radix == 16)
5520       return fp_read_radix_16(a, str);
5521 #endif
5522 
5523   /* make sure the radix is ok */
5524   if (radix < 2 || radix > 64) {
5525     return FP_VAL;
5526   }
5527 
5528   /* if the leading digit is a
5529    * minus set the sign to negative.
5530    */
5531   if (*str == '-') {
5532     ++str;
5533     neg = FP_NEG;
5534   } else {
5535     neg = FP_ZPOS;
5536   }
5537 
5538   /* process each digit of the string */
5539   while (*str) {
5540     /* if the radix <= 36 the conversion is case insensitive
5541      * this allows numbers like 1AB and 1ab to represent the same  value
5542      * [e.g. in hex]
5543      */
5544     ch = (char)((radix <= 36) ? XTOUPPER((unsigned char)*str) : *str);
5545     for (y = 0; y < 64; y++) {
5546       if (ch == fp_s_rmap[y]) {
5547          break;
5548       }
5549     }
5550     if (y >= radix) {
5551       return FP_VAL;
5552     }
5553 
5554     /* if the char was found in the map
5555      * and is less than the given radix add it
5556      * to the number, otherwise exit the loop.
5557      */
5558     if (y < radix) {
5559       int ret = fp_mul_d (a, (fp_digit) radix, a);
5560       if (ret != FP_OKAY)
5561         return ret;
5562       ret = fp_add_d (a, (fp_digit) y, a);
5563       if (ret != FP_OKAY)
5564         return ret;
5565     } else {
5566       break;
5567     }
5568     ++str;
5569   }
5570 
5571   /* set the sign only if a != 0 */
5572   if (fp_iszero(a) != FP_YES) {
5573      a->sign = neg;
5574   }
5575   return FP_OKAY;
5576 }
5577 
5578 /* fast math conversion */
mp_read_radix(mp_int * a,const char * str,int radix)5579 int mp_read_radix(mp_int *a, const char *str, int radix)
5580 {
5581     return fp_read_radix(a, str, radix);
5582 }
5583 
5584 #endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
5585 
5586 #ifdef HAVE_ECC
5587 
5588 /* fast math conversion */
mp_sqr(fp_int * A,fp_int * B)5589 int mp_sqr(fp_int *A, fp_int *B)
5590 {
5591     return fp_sqr(A, B);
5592 }
5593 
5594 /* fast math conversion */
mp_montgomery_reduce(fp_int * a,fp_int * m,fp_digit mp)5595 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
5596 {
5597     return fp_montgomery_reduce(a, m, mp);
5598 }
5599 
mp_montgomery_reduce_ex(fp_int * a,fp_int * m,fp_digit mp,int ct)5600 int mp_montgomery_reduce_ex(fp_int *a, fp_int *m, fp_digit mp, int ct)
5601 {
5602     return fp_montgomery_reduce_ex(a, m, mp, ct);
5603 }
5604 
5605 
5606 /* fast math conversion */
mp_montgomery_setup(fp_int * a,fp_digit * rho)5607 int mp_montgomery_setup(fp_int *a, fp_digit *rho)
5608 {
5609     return fp_montgomery_setup(a, rho);
5610 }
5611 
mp_div_2(fp_int * a,fp_int * b)5612 int mp_div_2(fp_int * a, fp_int * b)
5613 {
5614     fp_div_2(a, b);
5615     return MP_OKAY;
5616 }
5617 
5618 /* c = a / 2 (mod b) - constant time (a < b and positive) */
mp_div_2_mod_ct(mp_int * a,mp_int * b,mp_int * c)5619 int mp_div_2_mod_ct(mp_int *a, mp_int *b, mp_int *c)
5620 {
5621   return fp_div_2_mod_ct(a, b, c);
5622 }
5623 
5624 #ifdef HAVE_COMP_KEY
5625 
mp_cnt_lsb(fp_int * a)5626 int mp_cnt_lsb(fp_int* a)
5627 {
5628     return fp_cnt_lsb(a);
5629 }
5630 
5631 #endif /* HAVE_COMP_KEY */
5632 
5633 #endif /* HAVE_ECC */
5634 
5635 #if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA) || \
5636     defined(WOLFSSL_KEY_GEN)
5637 /* fast math conversion */
mp_set(fp_int * a,fp_digit b)5638 int mp_set(fp_int *a, fp_digit b)
5639 {
5640     fp_set(a,b);
5641     return MP_OKAY;
5642 }
5643 #endif
5644 
5645 #ifdef WC_MP_TO_RADIX
5646 
5647 /* returns size of ASCII representation */
mp_radix_size(mp_int * a,int radix,int * size)5648 int mp_radix_size (mp_int *a, int radix, int *size)
5649 {
5650     int      res, digs;
5651     fp_digit d;
5652 #ifndef WOLFSSL_SMALL_STACK
5653     fp_int   t[1];
5654 #else
5655     fp_int   *t;
5656 #endif
5657 
5658     *size = 0;
5659 
5660     /* special case for binary */
5661     if (radix == 2) {
5662         *size = fp_count_bits(a);
5663         if (*size == 0)
5664           *size = 1;
5665         *size += (a->sign == FP_NEG ? 1 : 0) + 1; /* "-" sign + null term */
5666         return FP_OKAY;
5667     }
5668 
5669     /* make sure the radix is in range */
5670     if (radix < 2 || radix > 64) {
5671         return FP_VAL;
5672     }
5673 
5674     if (fp_iszero(a) == MP_YES) {
5675 #ifndef WC_DISABLE_RADIX_ZERO_PAD
5676         if (radix == 16)
5677             *size = 3;
5678         else
5679 #endif
5680             *size = 2;
5681         return FP_OKAY;
5682     }
5683 
5684     /* digs is the digit count */
5685     digs = 0;
5686 
5687 #ifdef WOLFSSL_SMALL_STACK
5688     t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
5689     if (t == NULL)
5690         return FP_MEM;
5691 #endif
5692 
5693     /* init a copy of the input */
5694     fp_init_copy (t, a);
5695 
5696     /* force temp to positive */
5697     t->sign = FP_ZPOS;
5698 
5699     /* fetch out all of the digits */
5700     while (fp_iszero (t) == FP_NO) {
5701         if ((res = fp_div_d (t, (mp_digit) radix, t, &d)) != FP_OKAY) {
5702             fp_zero (t);
5703         #ifdef WOLFSSL_SMALL_STACK
5704             XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5705         #endif
5706             return res;
5707         }
5708         ++digs;
5709     }
5710     fp_zero (t);
5711 
5712 #ifndef WC_DISABLE_RADIX_ZERO_PAD
5713     /* For hexadecimal output, add zero padding when number of digits is odd */
5714     if ((digs & 1) && (radix == 16)) {
5715         ++digs;
5716     }
5717 #endif
5718 
5719     /* if it's negative add one for the sign */
5720     if (a->sign == FP_NEG) {
5721         ++digs;
5722     }
5723 
5724     /* return digs + 1, the 1 is for the NULL byte that would be required. */
5725     *size = digs + 1;
5726 #ifdef WOLFSSL_SMALL_STACK
5727     XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5728 #endif
5729     return FP_OKAY;
5730 }
5731 
5732 /* stores a bignum as a ASCII string in a given radix (2..64) */
mp_toradix(mp_int * a,char * str,int radix)5733 int mp_toradix (mp_int *a, char *str, int radix)
5734 {
5735     int      res, digs;
5736     fp_digit d;
5737     char     *_s = str;
5738 #ifndef WOLFSSL_SMALL_STACK
5739     fp_int   t[1];
5740 #else
5741     fp_int   *t;
5742 #endif
5743 
5744     /* check range of the radix */
5745     if (radix < 2 || radix > 64) {
5746         return FP_VAL;
5747     }
5748 
5749     /* quick out if its zero */
5750     if (fp_iszero(a) == FP_YES) {
5751 #ifndef WC_DISABLE_RADIX_ZERO_PAD
5752         if (radix == 16)
5753             *str++ = '0';
5754 #endif
5755         *str++ = '0';
5756         *str = '\0';
5757         return FP_OKAY;
5758     }
5759 
5760 #ifdef WOLFSSL_SMALL_STACK
5761     t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
5762     if (t == NULL)
5763         return FP_MEM;
5764 #endif
5765 
5766     /* init a copy of the input */
5767     fp_init_copy (t, a);
5768 
5769     /* if it is negative output a - */
5770     if (t->sign == FP_NEG) {
5771         ++_s;
5772         *str++ = '-';
5773         t->sign = FP_ZPOS;
5774     }
5775 
5776     digs = 0;
5777     while (fp_iszero (t) == FP_NO) {
5778         if ((res = fp_div_d (t, (fp_digit) radix, t, &d)) != FP_OKAY) {
5779             fp_zero (t);
5780         #ifdef WOLFSSL_SMALL_STACK
5781             XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5782         #endif
5783             return res;
5784         }
5785         *str++ = fp_s_rmap[d];
5786         ++digs;
5787     }
5788 #ifndef WC_DISABLE_RADIX_ZERO_PAD
5789     /* For hexadecimal output, add zero padding when number of digits is odd */
5790     if ((digs & 1) && (radix == 16)) {
5791         *str++ = fp_s_rmap[0];
5792         ++digs;
5793     }
5794 #endif
5795     /* reverse the digits of the string.  In this case _s points
5796      * to the first digit [excluding the sign] of the number]
5797      */
5798     fp_reverse ((unsigned char *)_s, digs);
5799 
5800     /* append a NULL so the string is properly terminated */
5801     *str = '\0';
5802 
5803     fp_zero (t);
5804 #ifdef WOLFSSL_SMALL_STACK
5805     XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5806 #endif
5807     return FP_OKAY;
5808 }
5809 
5810 #ifdef WOLFSSL_DEBUG_MATH
mp_dump(const char * desc,mp_int * a,byte verbose)5811 void mp_dump(const char* desc, mp_int* a, byte verbose)
5812 {
5813   char buffer[FP_SIZE * sizeof(fp_digit) * 2];
5814   int size;
5815 
5816 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
5817   size = a->size;
5818 #else
5819   size = FP_SIZE;
5820 #endif
5821 
5822   printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n",
5823     desc, a, a->used, a->sign, size, (int)sizeof(fp_digit));
5824 
5825   mp_tohex(a, buffer);
5826   printf("  %s\n  ", buffer);
5827 
5828   if (verbose) {
5829     int i;
5830     for(i=0; i<size * (int)sizeof(fp_digit); i++) {
5831       printf("%x ", *(((byte*)a->dp) + i));
5832     }
5833     printf("\n");
5834   }
5835 }
5836 #endif /* WOLFSSL_DEBUG_MATH */
5837 
5838 #endif /* WC_MP_TO_RADIX */
5839 
5840 
mp_abs(mp_int * a,mp_int * b)5841 int mp_abs(mp_int* a, mp_int* b)
5842 {
5843   fp_abs(a, b);
5844   return FP_OKAY;
5845 }
5846 
5847 
mp_lshd(mp_int * a,int b)5848 int mp_lshd (mp_int * a, int b)
5849 {
5850   return fp_lshd(a, b);
5851 }
5852 
5853 #endif /* USE_FAST_MATH */
5854