1 /* tfm.c
2 *
3 * Copyright (C) 2006-2021 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22
23
24 /*
25 * Based on public domain TomsFastMath 0.10 by Tom St Denis, tomstdenis@iahu.ca,
26 * http://math.libtomcrypt.com
27 */
28
29 /**
30 * Edited by Moises Guimaraes (moises@wolfssl.com)
31 * to fit wolfSSL's needs.
32 */
33
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>
36 #endif
37
38 /* in case user set USE_FAST_MATH there */
39 #include <wolfssl/wolfcrypt/settings.h>
40
41 #ifdef USE_FAST_MATH
42
43 #ifdef NO_INLINE
44 #include <wolfssl/wolfcrypt/misc.h>
45 #else
46 #define WOLFSSL_MISC_INCLUDED
47 #include <wolfcrypt/src/misc.c>
48 #endif
49
50 #include <wolfssl/wolfcrypt/random.h>
51 #include <wolfssl/wolfcrypt/tfm.h>
52 #include <wolfcrypt/src/asm.c> /* will define asm MACROS or C ones */
53 #include <wolfssl/wolfcrypt/wolfmath.h> /* common functions */
54
55 #if defined(FREESCALE_LTC_TFM)
56 #include <wolfssl/wolfcrypt/port/nxp/ksdk_port.h>
57 #endif
58 #ifdef WOLFSSL_DEBUG_MATH
59 #include <stdio.h>
60 #endif
61
62 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
63 #ifdef __cplusplus
64 extern "C" {
65 #endif
66 WOLFSSL_LOCAL int sp_ModExp_1024(mp_int* base, mp_int* exp, mp_int* mod,
67 mp_int* res);
68 WOLFSSL_LOCAL int sp_ModExp_1536(mp_int* base, mp_int* exp, mp_int* mod,
69 mp_int* res);
70 WOLFSSL_LOCAL int sp_ModExp_2048(mp_int* base, mp_int* exp, mp_int* mod,
71 mp_int* res);
72 WOLFSSL_LOCAL int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod,
73 mp_int* res);
74 WOLFSSL_LOCAL int sp_ModExp_4096(mp_int* base, mp_int* exp, mp_int* mod,
75 mp_int* res);
76 #ifdef __cplusplus
77 } /* extern "C" */
78 #endif
79 #endif
80
81
82 #if !defined(WOLFSSL_SP_MATH) && !defined(WOLFSSL_SP_MATH_ALL)
83 /* math settings check */
CheckRunTimeSettings(void)84 word32 CheckRunTimeSettings(void)
85 {
86 return CTC_SETTINGS;
87 }
88
89 /* math settings size check */
CheckRunTimeFastMath(void)90 word32 CheckRunTimeFastMath(void)
91 {
92 return FP_SIZE;
93 }
94 #endif
95
96
97 /* Functions */
98
fp_add(fp_int * a,fp_int * b,fp_int * c)99 int fp_add(fp_int *a, fp_int *b, fp_int *c)
100 {
101 int sa, sb;
102 int ret = FP_OKAY;
103
104 /* get sign of both inputs */
105 sa = a->sign;
106 sb = b->sign;
107
108 /* handle two cases, not four */
109 if (sa == sb) {
110 /* both positive or both negative */
111 /* add their magnitudes, copy the sign */
112 c->sign = sa;
113 ret = s_fp_add (a, b, c);
114 } else {
115 /* one positive, the other negative */
116 /* subtract the one with the greater magnitude from */
117 /* the one of the lesser magnitude. The result gets */
118 /* the sign of the one with the greater magnitude. */
119 if (fp_cmp_mag (a, b) == FP_LT) {
120 c->sign = sb;
121 s_fp_sub (b, a, c);
122 } else {
123 c->sign = sa;
124 s_fp_sub (a, b, c);
125 }
126 }
127
128 return ret;
129 }
130
131 /* unsigned addition */
s_fp_add(fp_int * a,fp_int * b,fp_int * c)132 int s_fp_add(fp_int *a, fp_int *b, fp_int *c)
133 {
134 int x, y, oldused;
135 fp_word t;
136
137 y = MAX(a->used, b->used);
138 oldused = MIN(c->used, FP_SIZE); /* help static analysis w/ largest size */
139 c->used = y;
140
141 t = 0;
142 for (x = 0; x < y; x++) {
143 t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]);
144 c->dp[x] = (fp_digit)t;
145 t >>= DIGIT_BIT;
146 }
147 if (t != 0) {
148 if (x == FP_SIZE)
149 return FP_VAL;
150 c->dp[c->used++] = (fp_digit)t;
151 ++x;
152 }
153
154 c->used = x;
155
156 /* zero any excess digits on the destination that we didn't write to */
157 for (; x < oldused; x++) {
158 c->dp[x] = 0;
159 }
160 fp_clamp(c);
161 return FP_OKAY;
162 }
163
164 /* c = a - b */
fp_sub(fp_int * a,fp_int * b,fp_int * c)165 int fp_sub(fp_int *a, fp_int *b, fp_int *c)
166 {
167 int sa, sb;
168 int ret = FP_OKAY;
169
170 sa = a->sign;
171 sb = b->sign;
172
173 if (sa != sb) {
174 /* subtract a negative from a positive, OR */
175 /* subtract a positive from a negative. */
176 /* In either case, ADD their magnitudes, */
177 /* and use the sign of the first number. */
178 c->sign = sa;
179 ret = s_fp_add (a, b, c);
180 } else {
181 /* subtract a positive from a positive, OR */
182 /* subtract a negative from a negative. */
183 /* First, take the difference between their */
184 /* magnitudes, then... */
185 if (fp_cmp_mag (a, b) != FP_LT) {
186 /* Copy the sign from the first */
187 c->sign = sa;
188 /* The first has a larger or equal magnitude */
189 s_fp_sub (a, b, c);
190 } else {
191 /* The result has the *opposite* sign from */
192 /* the first number. */
193 c->sign = (sa == FP_ZPOS) ? FP_NEG : FP_ZPOS;
194 /* The second has a larger magnitude */
195 s_fp_sub (b, a, c);
196 }
197 }
198 return ret;
199 }
200
201 /* unsigned subtraction ||a|| >= ||b|| ALWAYS! */
s_fp_sub(fp_int * a,fp_int * b,fp_int * c)202 void s_fp_sub(fp_int *a, fp_int *b, fp_int *c)
203 {
204 int x, oldbused, oldused;
205 fp_word t;
206
207 oldused = c->used;
208 oldbused = b->used;
209 c->used = a->used;
210 t = 0;
211 for (x = 0; x < oldbused; x++) {
212 t = ((fp_word)a->dp[x]) - (((fp_word)b->dp[x]) + t);
213 c->dp[x] = (fp_digit)t;
214 t = (t >> DIGIT_BIT)&1;
215 }
216 for (; x < a->used; x++) {
217 t = ((fp_word)a->dp[x]) - t;
218 c->dp[x] = (fp_digit)t;
219 t = (t >> DIGIT_BIT)&1;
220 }
221
222 /* zero any excess digits on the destination that we didn't write to */
223 for (; x < oldused; x++) {
224 c->dp[x] = 0;
225 }
226 fp_clamp(c);
227 }
228
229 /* c = a * b */
fp_mul(fp_int * A,fp_int * B,fp_int * C)230 int fp_mul(fp_int *A, fp_int *B, fp_int *C)
231 {
232 int ret = 0;
233 int y, yy, oldused;
234
235 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
236 !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
237 ret = esp_mp_mul(A, B, C);
238 if(ret != -2) return ret;
239 #endif
240
241 oldused = C->used;
242
243 y = MAX(A->used, B->used);
244 yy = MIN(A->used, B->used);
245
246 /* fail if we are out of range */
247 if (y + yy >= FP_SIZE) {
248 ret = FP_VAL;
249 goto clean;
250 }
251
252 /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size
253 of the largest input. We also want to avoid doing excess mults if the
254 inputs are not close to the next power of two. That is, for example,
255 if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications
256 */
257
258 #if defined(TFM_MUL3) && FP_SIZE >= 6
259 if (y <= 3) {
260 ret = fp_mul_comba3(A,B,C);
261 goto clean;
262 }
263 #endif
264 #if defined(TFM_MUL4) && FP_SIZE >= 8
265 if (y == 4) {
266 ret = fp_mul_comba4(A,B,C);
267 goto clean;
268 }
269 #endif
270 #if defined(TFM_MUL6) && FP_SIZE >= 12
271 if (y <= 6) {
272 ret = fp_mul_comba6(A,B,C);
273 goto clean;
274 }
275 #endif
276 #if defined(TFM_MUL7) && FP_SIZE >= 14
277 if (y == 7) {
278 ret = fp_mul_comba7(A,B,C);
279 goto clean;
280 }
281 #endif
282 #if defined(TFM_MUL8) && FP_SIZE >= 16
283 if (y == 8) {
284 ret = fp_mul_comba8(A,B,C);
285 goto clean;
286 }
287 #endif
288 #if defined(TFM_MUL9) && FP_SIZE >= 18
289 if (y == 9) {
290 ret = fp_mul_comba9(A,B,C);
291 goto clean;
292 }
293 #endif
294 #if defined(TFM_MUL12) && FP_SIZE >= 24
295 if (y <= 12) {
296 ret = fp_mul_comba12(A,B,C);
297 goto clean;
298 }
299 #endif
300 #if defined(TFM_MUL17) && FP_SIZE >= 34
301 if (y <= 17) {
302 ret = fp_mul_comba17(A,B,C);
303 goto clean;
304 }
305 #endif
306
307 #if defined(TFM_SMALL_SET) && FP_SIZE >= 32
308 if (y <= 16) {
309 ret = fp_mul_comba_small(A,B,C);
310 goto clean;
311 }
312 #endif
313 #if defined(TFM_MUL20) && FP_SIZE >= 40
314 if (y <= 20) {
315 ret = fp_mul_comba20(A,B,C);
316 goto clean;
317 }
318 #endif
319 #if defined(TFM_MUL24) && FP_SIZE >= 48
320 if (yy >= 16 && y <= 24) {
321 ret = fp_mul_comba24(A,B,C);
322 goto clean;
323 }
324 #endif
325 #if defined(TFM_MUL28) && FP_SIZE >= 56
326 if (yy >= 20 && y <= 28) {
327 ret = fp_mul_comba28(A,B,C);
328 goto clean;
329 }
330 #endif
331 #if defined(TFM_MUL32) && FP_SIZE >= 64
332 if (yy >= 24 && y <= 32) {
333 ret = fp_mul_comba32(A,B,C);
334 goto clean;
335 }
336 #endif
337 #if defined(TFM_MUL48) && FP_SIZE >= 96
338 if (yy >= 40 && y <= 48) {
339 ret = fp_mul_comba48(A,B,C);
340 goto clean;
341 }
342 #endif
343 #if defined(TFM_MUL64) && FP_SIZE >= 128
344 if (yy >= 56 && y <= 64) {
345 ret = fp_mul_comba64(A,B,C);
346 goto clean;
347 }
348 #endif
349 ret = fp_mul_comba(A,B,C);
350
351 clean:
352 /* zero any excess digits on the destination that we didn't write to */
353 for (y = C->used; y >= 0 && y < oldused; y++) {
354 C->dp[y] = 0;
355 }
356
357 return ret;
358 }
359
fp_mul_2(fp_int * a,fp_int * b)360 int fp_mul_2(fp_int * a, fp_int * b)
361 {
362 int x, oldused;
363
364 /* Make sure value to double and result are in range. */
365 if ((a->used > (FP_SIZE-1)) || ((a->used == (FP_SIZE - 1)) &&
366 ((a->dp[FP_SIZE - 1] & ((fp_digit)1 << (DIGIT_BIT - 1))) != 0))) {
367 return FP_VAL;
368 }
369
370 oldused = b->used;
371 b->used = a->used;
372
373 {
374 fp_digit r, rr, *tmpa, *tmpb;
375
376 /* alias for source */
377 tmpa = a->dp;
378
379 /* alias for dest */
380 tmpb = b->dp;
381
382 /* carry */
383 r = 0;
384 for (x = 0; x < a->used; x++) {
385
386 /* get what will be the *next* carry bit from the
387 * MSB of the current digit
388 */
389 rr = *tmpa >> ((fp_digit)(DIGIT_BIT - 1));
390
391 /* now shift up this digit, add in the carry [from the previous] */
392 *tmpb++ = ((*tmpa++ << ((fp_digit)1)) | r);
393
394 /* copy the carry that would be from the source
395 * digit into the next iteration
396 */
397 r = rr;
398 }
399
400 /* new leading digit? */
401 if (r != 0) {
402 /* add a MSB which is always 1 at this point */
403 *tmpb = 1;
404 ++(b->used);
405 }
406
407 /* zero any excess digits on the destination that we didn't write to */
408 tmpb = b->dp + b->used;
409 for (x = b->used; x < oldused; x++) {
410 *tmpb++ = 0;
411 }
412 }
413 b->sign = a->sign;
414
415 return FP_OKAY;
416 }
417
418 /* c = a * b */
fp_mul_d(fp_int * a,fp_digit b,fp_int * c)419 int fp_mul_d(fp_int *a, fp_digit b, fp_int *c)
420 {
421 fp_word w;
422 int x, oldused;
423
424 oldused = c->used;
425 c->used = a->used;
426 c->sign = a->sign;
427 w = 0;
428 for (x = 0; x < a->used; x++) {
429 w = ((fp_word)a->dp[x]) * ((fp_word)b) + w;
430 c->dp[x] = (fp_digit)w;
431 w = w >> DIGIT_BIT;
432 }
433 if (w != 0) {
434 if (a->used == FP_SIZE)
435 return FP_VAL;
436 c->dp[c->used++] = (fp_digit) w;
437 ++x;
438 }
439
440 /* zero any excess digits on the destination that we didn't write to */
441 /* also checking FP_SIZE here for static analysis */
442 for (; x < oldused && x < FP_SIZE; x++) {
443 c->dp[x] = 0;
444 }
445
446 fp_clamp(c);
447 return FP_OKAY;
448 }
449
450 /* c = a * 2**d */
fp_mul_2d(fp_int * a,int b,fp_int * c)451 int fp_mul_2d(fp_int *a, int b, fp_int *c)
452 {
453 fp_digit carry, carrytmp, shift;
454 int x;
455
456 /* copy it */
457 fp_copy(a, c);
458
459 /* handle whole digits */
460 if (b >= DIGIT_BIT) {
461 int ret = fp_lshd(c, b/DIGIT_BIT);
462 if (ret != FP_OKAY)
463 return ret;
464 }
465 b %= DIGIT_BIT;
466
467 /* shift the digits */
468 if (b != 0) {
469 carry = 0;
470 shift = DIGIT_BIT - b;
471 for (x = 0; x < c->used; x++) {
472 carrytmp = c->dp[x] >> shift;
473 c->dp[x] = (c->dp[x] << b) + carry;
474 carry = carrytmp;
475 }
476 /* store last carry if room */
477 if (carry && x < FP_SIZE) {
478 c->dp[c->used++] = carry;
479 }
480 if (x == FP_SIZE)
481 return FP_VAL;
482 }
483 fp_clamp(c);
484 return FP_OKAY;
485 }
486
487 /* generic PxQ multiplier */
488 #if defined(HAVE_INTEL_MULX)
489
fp_mul_comba_mulx(fp_int * A,fp_int * B,fp_int * C)490 WC_INLINE static int fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C)
491
492 {
493 int ix, iy, iz, pa;
494 fp_int *dst;
495 #ifndef WOLFSSL_SMALL_STACK
496 fp_int tmp[1];
497 #else
498 fp_int *tmp;
499 #endif
500 fp_digit carry;
501
502 /* Variables used but not seen by cppcheck. */
503 (void)ix; (void)iy; (void)iz;
504
505 #ifdef WOLFSSL_SMALL_STACK
506 tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
507 if (tmp == NULL)
508 return FP_MEM;
509 #endif
510
511 /* get size of output and trim */
512 pa = A->used + B->used;
513 if (pa >= FP_SIZE) {
514 pa = FP_SIZE-1;
515 }
516
517 /* Always take branch to use tmp variable. This avoids a cache attack for
518 * determining if C equals A */
519 if (1) {
520 fp_init(tmp);
521 dst = tmp;
522 }
523
524 TFM_INTEL_MUL_COMBA(A, B, carry, dst) ;
525
526 dst->used = pa;
527 dst->sign = A->sign ^ B->sign;
528 fp_clamp(dst);
529 fp_copy(dst, C);
530
531 #ifdef WOLFSSL_SMALL_STACK
532 XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
533 #endif
534
535 return FP_OKAY;
536 }
537 #endif
538
fp_mul_comba(fp_int * A,fp_int * B,fp_int * C)539 int fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
540 {
541 int ret = 0;
542 int ix, iy, iz, tx, ty, pa;
543 fp_digit c0, c1, c2, *tmpx, *tmpy;
544 fp_int *dst;
545 #ifndef WOLFSSL_SMALL_STACK
546 fp_int tmp[1];
547 #else
548 fp_int *tmp;
549 #endif
550
551 if (A->used + B->used >= FP_SIZE) return FP_VAL;
552
553 IF_HAVE_INTEL_MULX(ret = fp_mul_comba_mulx(A, B, C), return ret) ;
554
555 #ifdef WOLFSSL_SMALL_STACK
556 tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
557 if (tmp == NULL)
558 return FP_MEM;
559 #endif
560
561 COMBA_START;
562 COMBA_CLEAR;
563
564 /* get size of output and trim */
565 pa = A->used + B->used;
566 if (pa >= FP_SIZE) {
567 pa = FP_SIZE-1;
568 }
569
570 /* Always take branch to use tmp variable. This avoids a cache attack for
571 * determining if C equals A */
572 if (1) {
573 fp_init(tmp);
574 dst = tmp;
575 }
576
577 for (ix = 0; ix < pa; ix++) {
578 /* get offsets into the two bignums */
579 ty = MIN(ix, (B->used > 0 ? B->used - 1 : 0));
580 tx = ix - ty;
581
582 /* setup temp aliases */
583 tmpx = A->dp + tx;
584 tmpy = B->dp + ty;
585
586 /* this is the number of times the loop will iterate, essentially its
587 while (tx++ < a->used && ty-- >= 0) { ... }
588 */
589 iy = MIN(A->used-tx, ty+1);
590
591 /* execute loop */
592 COMBA_FORWARD;
593 for (iz = 0; iz < iy; ++iz) {
594 fp_digit _tmpx = *tmpx++;
595 fp_digit _tmpy = *tmpy--;
596 MULADD(_tmpx, _tmpy);
597 }
598
599 /* store term */
600 COMBA_STORE(dst->dp[ix]);
601 }
602 COMBA_FINI;
603
604 dst->used = pa;
605 dst->sign = A->sign ^ B->sign;
606 fp_clamp(dst);
607 fp_copy(dst, C);
608
609 /* Variables used but not seen by cppcheck. */
610 (void)c0; (void)c1; (void)c2;
611
612 #ifdef WOLFSSL_SMALL_STACK
613 XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
614 #endif
615 return ret;
616 }
617
618 /* a/b => cb + d == a */
fp_div(fp_int * a,fp_int * b,fp_int * c,fp_int * d)619 int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
620 {
621 int n, t, i, norm, neg;
622 int ret;
623 #ifndef WOLFSSL_SMALL_STACK
624 fp_int q[1], x[1], y[1], t1[1], t2[1];
625 #else
626 fp_int *q, *x, *y, *t1, *t2;
627 #endif
628
629 /* is divisor zero ? */
630 if (fp_iszero (b) == FP_YES) {
631 return FP_VAL;
632 }
633
634 /* if a < b then q=0, r = a */
635 if (fp_cmp_mag (a, b) == FP_LT)
636 {
637 if (d != NULL) {
638 fp_copy (a, d);
639 }
640 if (c != NULL) {
641 fp_zero (c);
642 }
643 return FP_OKAY;
644 }
645
646 #ifdef WOLFSSL_SMALL_STACK
647 q = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
648 if (q == NULL) {
649 return FP_MEM;
650 }
651 x = &q[1]; y = &q[2]; t1 = &q[3]; t2 = &q[4];
652 #endif
653
654 fp_init(q);
655 /* qb + d = a, and b is an integer > 0, therefore q <= a */
656 q->used = a->used;
657
658 fp_init(t1);
659 fp_init(t2);
660 fp_init_copy(x, a);
661 fp_init_copy(y, b);
662
663 /* fix the sign */
664 neg = (a->sign == b->sign) ? FP_ZPOS : FP_NEG;
665 x->sign = y->sign = FP_ZPOS;
666
667 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
668 norm = fp_count_bits(y) % DIGIT_BIT;
669 if (norm < (int)(DIGIT_BIT-1)) {
670 norm = (DIGIT_BIT-1) - norm;
671 ret = fp_mul_2d (x, norm, x);
672 if (ret != FP_OKAY) {
673 #ifdef WOLFSSL_SMALL_STACK
674 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
675 #endif
676 return ret;
677 }
678 ret = fp_mul_2d (y, norm, y);
679 if (ret != FP_OKAY) {
680 #ifdef WOLFSSL_SMALL_STACK
681 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
682 #endif
683 return ret;
684 }
685 } else {
686 norm = 0;
687 }
688
689 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
690 n = x->used - 1;
691 t = y->used - 1;
692
693 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
694 ret = fp_lshd (y, n - t); /* y = y*b**{n-t} */
695 if (ret != FP_OKAY) {
696 #ifdef WOLFSSL_SMALL_STACK
697 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
698 #endif
699 return ret;
700 }
701
702 while (fp_cmp (x, y) != FP_LT) {
703 ++(q->dp[n - t]);
704 ret = fp_sub (x, y, x);
705 if (ret != FP_OKAY) {
706 #ifdef WOLFSSL_SMALL_STACK
707 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
708 #endif
709 return ret;
710 }
711 }
712
713 /* reset y by shifting it back down */
714 fp_rshd (y, n - t);
715
716 /* step 3. for i from n down to (t + 1) */
717 for (i = n; i >= (t + 1); i--) {
718 if (i > x->used) {
719 continue;
720 }
721
722 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
723 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
724 if (x->dp[i] == y->dp[t]) {
725 q->dp[i - t - 1] = (fp_digit) ((((fp_word)1) << DIGIT_BIT) - 1);
726 } else {
727 fp_word tmp;
728 tmp = ((fp_word) x->dp[i]) << ((fp_word) DIGIT_BIT);
729 tmp |= ((fp_word) x->dp[i - 1]);
730 #ifdef WOLFSSL_LINUXKM
731 /* Linux kernel macro for in-place 64 bit integer division. */
732 do_div(tmp, (fp_word)y->dp[t]);
733 #else
734 tmp /= ((fp_word)y->dp[t]);
735 #endif
736 q->dp[i - t - 1] = (fp_digit) (tmp);
737 }
738
739 /* while (q{i-t-1} * (yt * b + y{t-1})) >
740 xi * b**2 + xi-1 * b + xi-2
741
742 do q{i-t-1} -= 1;
743 */
744 q->dp[i - t - 1] = (q->dp[i - t - 1] + 1);
745 do {
746 q->dp[i - t - 1] = (q->dp[i - t - 1] - 1);
747
748 /* find left hand */
749 fp_zero (t1);
750 t1->dp[0] = (t - 1 < 0) ? 0 : y->dp[t - 1];
751 t1->dp[1] = y->dp[t];
752 t1->used = 2;
753 ret = fp_mul_d (t1, q->dp[i - t - 1], t1);
754 if (ret != FP_OKAY) {
755 #ifdef WOLFSSL_SMALL_STACK
756 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
757 #endif
758 return ret;
759 }
760
761 /* find right hand */
762 t2->dp[0] = (i - 2 < 0) ? 0 : x->dp[i - 2];
763 t2->dp[1] = (i - 1 < 0) ? 0 : x->dp[i - 1];
764 t2->dp[2] = x->dp[i];
765 t2->used = 3;
766 } while (fp_cmp_mag(t1, t2) == FP_GT);
767
768 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
769 ret = fp_mul_d (y, q->dp[i - t - 1], t1);
770 if (ret != FP_OKAY) {
771 #ifdef WOLFSSL_SMALL_STACK
772 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
773 #endif
774 return ret;
775 }
776 ret = fp_lshd (t1, i - t - 1);
777 if (ret != FP_OKAY) {
778 #ifdef WOLFSSL_SMALL_STACK
779 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
780 #endif
781 return ret;
782 }
783 ret = fp_sub (x, t1, x);
784 if (ret != FP_OKAY) {
785 #ifdef WOLFSSL_SMALL_STACK
786 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
787 #endif
788 return ret;
789 }
790
791 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
792 if (x->sign == FP_NEG) {
793 fp_copy (y, t1);
794 ret = fp_lshd (t1, i - t - 1);
795 if (ret != FP_OKAY) {
796 #ifdef WOLFSSL_SMALL_STACK
797 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
798 #endif
799 return ret;
800 }
801 ret = fp_add (x, t1, x);
802 if (ret != FP_OKAY) {
803 #ifdef WOLFSSL_SMALL_STACK
804 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
805 #endif
806 return ret;
807 }
808 q->dp[i - t - 1] = q->dp[i - t - 1] - 1;
809 }
810 }
811
812 /* now q is the quotient and x is the remainder
813 * [which we have to normalize]
814 */
815
816 /* get sign before writing to c */
817 x->sign = x->used == 0 ? FP_ZPOS : a->sign;
818
819 if (c != NULL) {
820 fp_clamp (q);
821 fp_copy (q, c);
822 c->sign = neg;
823 }
824
825 if (d != NULL) {
826 fp_div_2d (x, norm, x, NULL);
827
828 /* zero any excess digits on the destination that we didn't write to */
829 for (i = b->used; i < x->used; i++) {
830 x->dp[i] = 0;
831 }
832 fp_clamp(x);
833 fp_copy (x, d);
834 }
835
836 #ifdef WOLFSSL_SMALL_STACK
837 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
838 #endif
839 return FP_OKAY;
840 }
841
842 /* b = a/2 */
fp_div_2(fp_int * a,fp_int * b)843 void fp_div_2(fp_int * a, fp_int * b)
844 {
845 int x, oldused;
846
847 oldused = b->used;
848 b->used = a->used;
849 {
850 fp_digit r, rr, *tmpa, *tmpb;
851
852 /* source alias */
853 tmpa = a->dp + b->used - 1;
854
855 /* dest alias */
856 tmpb = b->dp + b->used - 1;
857
858 /* carry */
859 r = 0;
860 for (x = b->used - 1; x >= 0; x--) {
861 /* get the carry for the next iteration */
862 rr = *tmpa & 1;
863
864 /* shift the current digit, add in carry and store */
865 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
866
867 /* forward carry to next iteration */
868 r = rr;
869 }
870
871 /* zero any excess digits on the destination that we didn't write to */
872 tmpb = b->dp + b->used;
873 for (x = b->used; x < oldused; x++) {
874 *tmpb++ = 0;
875 }
876 }
877 b->sign = a->sign;
878 fp_clamp (b);
879 }
880
881 /* c = a / 2 (mod b) - constant time (a < b and positive) */
fp_div_2_mod_ct(fp_int * a,fp_int * b,fp_int * c)882 int fp_div_2_mod_ct(fp_int *a, fp_int *b, fp_int *c)
883 {
884 fp_word w = 0;
885 fp_digit mask;
886 int i;
887
888 mask = 0 - (a->dp[0] & 1);
889 for (i = 0; i < b->used; i++) {
890 fp_digit mask_a = 0 - (i < a->used);
891
892 w += b->dp[i] & mask;
893 w += a->dp[i] & mask_a;
894 c->dp[i] = (fp_digit)w;
895 w >>= DIGIT_BIT;
896 }
897 c->dp[i] = (fp_digit)w;
898 c->used = i + 1;
899 c->sign = FP_ZPOS;
900 fp_clamp(c);
901 fp_div_2(c, c);
902
903 return FP_OKAY;
904 }
905
906 /* c = a / 2**b */
fp_div_2d(fp_int * a,int b,fp_int * c,fp_int * d)907 void fp_div_2d(fp_int *a, int b, fp_int *c, fp_int *d)
908 {
909 int D;
910
911 /* if the shift count is <= 0 then we do no work */
912 if (b <= 0) {
913 fp_copy (a, c);
914 if (d != NULL) {
915 fp_zero (d);
916 }
917 return;
918 }
919
920 /* get the remainder before a is changed in calculating c */
921 if (a == c && d != NULL) {
922 fp_mod_2d (a, b, d);
923 }
924
925 /* copy */
926 fp_copy(a, c);
927
928 /* shift by as many digits in the bit count */
929 if (b >= (int)DIGIT_BIT) {
930 fp_rshd (c, b / DIGIT_BIT);
931 }
932
933 /* shift any bit count < DIGIT_BIT */
934 D = (b % DIGIT_BIT);
935 if (D != 0) {
936 fp_rshb(c, D);
937 }
938
939 /* get the remainder if a is not changed in calculating c */
940 if (a != c && d != NULL) {
941 fp_mod_2d (a, b, d);
942 }
943
944 fp_clamp (c);
945 }
946
947 /* c = a mod b, 0 <= c < b */
fp_mod(fp_int * a,fp_int * b,fp_int * c)948 int fp_mod(fp_int *a, fp_int *b, fp_int *c)
949 {
950 #ifndef WOLFSSL_SMALL_STACK
951 fp_int t[1];
952 #else
953 fp_int *t;
954 #endif
955 int err;
956
957 #ifdef WOLFSSL_SMALL_STACK
958 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
959 if (t == NULL)
960 return FP_MEM;
961 #endif
962
963 fp_init(t);
964 err = fp_div(a, b, NULL, t);
965 if (err == FP_OKAY) {
966 if (!fp_iszero(t) && (t->sign != b->sign)) {
967 err = fp_add(t, b, c);
968 } else {
969 fp_copy(t, c);
970 }
971 }
972
973 #ifdef WOLFSSL_SMALL_STACK
974 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
975 #endif
976 return err;
977 }
978
979 /* c = a mod 2**d */
fp_mod_2d(fp_int * a,int b,fp_int * c)980 void fp_mod_2d(fp_int *a, int b, fp_int *c)
981 {
982 int x;
983 int bmax;
984
985 /* zero if count less than or equal to zero */
986 if (b <= 0) {
987 fp_zero(c);
988 return;
989 }
990
991 /* get copy of input */
992 fp_copy(a, c);
993
994 /* if 2**d is larger than we just return */
995 if (c->sign == FP_ZPOS && b >= (DIGIT_BIT * a->used)) {
996 return;
997 }
998
999 bmax = (b + DIGIT_BIT - 1) / DIGIT_BIT;
1000 /* zero digits above the last digit of the modulus */
1001 for (x = bmax; x < c->used; x++) {
1002 c->dp[x] = 0;
1003 }
1004
1005 if (c->sign == FP_NEG) {
1006 fp_digit carry = 0;
1007 /* negate value */
1008 for (x = 0; x < c->used; x++) {
1009 fp_digit next = c->dp[x] > 0;
1010 c->dp[x] = (fp_digit)0 - c->dp[x] - carry;
1011 carry |= next;
1012 }
1013 for (; x < bmax; x++) {
1014 c->dp[x] = (fp_digit)0 - carry;
1015 }
1016 c->used = bmax;
1017 c->sign = FP_ZPOS;
1018 }
1019
1020 /* clear the digit that is not completely outside/inside the modulus */
1021 x = DIGIT_BIT - (b % DIGIT_BIT);
1022 if (x != DIGIT_BIT) {
1023 c->dp[bmax - 1] &= ~((fp_digit)0) >> x;
1024 }
1025
1026 fp_clamp (c);
1027 }
1028
fp_invmod_slow(fp_int * a,fp_int * b,fp_int * c)1029 static int fp_invmod_slow (fp_int * a, fp_int * b, fp_int * c)
1030 {
1031 #ifndef WOLFSSL_SMALL_STACK
1032 fp_int x[1], y[1], u[1], v[1], A[1], B[1], C[1], D[1];
1033 #else
1034 fp_int *x, *y, *u, *v, *A, *B, *C, *D;
1035 #endif
1036 int err;
1037
1038 /* b cannot be negative */
1039 if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
1040 return FP_VAL;
1041 }
1042 if (fp_iszero(a) == FP_YES) {
1043 return FP_VAL;
1044 }
1045
1046 #ifdef WOLFSSL_SMALL_STACK
1047 x = (fp_int*)XMALLOC(sizeof(fp_int) * 8, NULL, DYNAMIC_TYPE_BIGINT);
1048 if (x == NULL) {
1049 return FP_MEM;
1050 }
1051 y = &x[1]; u = &x[2]; v = &x[3]; A = &x[4]; B = &x[5]; C = &x[6]; D = &x[7];
1052 #endif
1053
1054 /* init temps */
1055 fp_init(x); fp_init(y);
1056 fp_init(u); fp_init(v);
1057 fp_init(A); fp_init(B);
1058 fp_init(C); fp_init(D);
1059
1060 /* x = a, y = b */
1061 if ((err = fp_mod(a, b, x)) != FP_OKAY) {
1062 #ifdef WOLFSSL_SMALL_STACK
1063 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1064 #endif
1065 return err;
1066 }
1067 fp_copy(b, y);
1068
1069 /* 2. [modified] if x,y are both even then return an error! */
1070 if (fp_iseven(x) == FP_YES && fp_iseven(y) == FP_YES) {
1071 #ifdef WOLFSSL_SMALL_STACK
1072 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1073 #endif
1074 return FP_VAL;
1075 }
1076
1077 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1078 fp_copy (x, u);
1079 fp_copy (y, v);
1080 fp_set (A, 1);
1081 fp_set (D, 1);
1082
1083 top:
1084 /* 4. while u is even do */
1085 while (fp_iseven (u) == FP_YES) {
1086 /* 4.1 u = u/2 */
1087 fp_div_2 (u, u);
1088
1089 /* 4.2 if A or B is odd then */
1090 if (fp_isodd (A) == FP_YES || fp_isodd (B) == FP_YES) {
1091 /* A = (A+y)/2, B = (B-x)/2 */
1092 err = fp_add (A, y, A);
1093 if (err != FP_OKAY) {
1094 #ifdef WOLFSSL_SMALL_STACK
1095 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1096 #endif
1097 return err;
1098 }
1099 err = fp_sub (B, x, B);
1100 if (err != FP_OKAY) {
1101 #ifdef WOLFSSL_SMALL_STACK
1102 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1103 #endif
1104 return err;
1105 }
1106 }
1107 /* A = A/2, B = B/2 */
1108 fp_div_2 (A, A);
1109 fp_div_2 (B, B);
1110 }
1111
1112 /* 5. while v is even do */
1113 while (fp_iseven (v) == FP_YES) {
1114 /* 5.1 v = v/2 */
1115 fp_div_2 (v, v);
1116
1117 /* 5.2 if C or D is odd then */
1118 if (fp_isodd (C) == FP_YES || fp_isodd (D) == FP_YES) {
1119 /* C = (C+y)/2, D = (D-x)/2 */
1120 err = fp_add (C, y, C);
1121 if (err != FP_OKAY) {
1122 #ifdef WOLFSSL_SMALL_STACK
1123 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1124 #endif
1125 return err;
1126 }
1127 err = fp_sub (D, x, D);
1128 if (err != FP_OKAY) {
1129 #ifdef WOLFSSL_SMALL_STACK
1130 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1131 #endif
1132 return err;
1133 }
1134 }
1135 /* C = C/2, D = D/2 */
1136 fp_div_2 (C, C);
1137 fp_div_2 (D, D);
1138 }
1139
1140 /* 6. if u >= v then */
1141 if (fp_cmp (u, v) != FP_LT) {
1142 /* u = u - v, A = A - C, B = B - D */
1143 err = fp_sub (u, v, u);
1144 if (err != FP_OKAY) {
1145 #ifdef WOLFSSL_SMALL_STACK
1146 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1147 #endif
1148 return err;
1149 }
1150 err = fp_sub (A, C, A);
1151 if (err != FP_OKAY) {
1152 #ifdef WOLFSSL_SMALL_STACK
1153 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1154 #endif
1155 return err;
1156 }
1157 err = fp_sub (B, D, B);
1158 if (err != FP_OKAY) {
1159 #ifdef WOLFSSL_SMALL_STACK
1160 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1161 #endif
1162 return err;
1163 }
1164 } else {
1165 /* v - v - u, C = C - A, D = D - B */
1166 err = fp_sub (v, u, v);
1167 if (err != FP_OKAY) {
1168 #ifdef WOLFSSL_SMALL_STACK
1169 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1170 #endif
1171 return err;
1172 }
1173 err = fp_sub (C, A, C);
1174 if (err != FP_OKAY) {
1175 #ifdef WOLFSSL_SMALL_STACK
1176 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1177 #endif
1178 return err;
1179 }
1180 err = fp_sub (D, B, D);
1181 if (err != FP_OKAY) {
1182 #ifdef WOLFSSL_SMALL_STACK
1183 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1184 #endif
1185 return err;
1186 }
1187 }
1188
1189 /* if not zero goto step 4 */
1190 if (fp_iszero (u) == FP_NO)
1191 goto top;
1192
1193 /* now a = C, b = D, gcd == g*v */
1194
1195 /* if v != 1 then there is no inverse */
1196 if (fp_cmp_d (v, 1) != FP_EQ) {
1197 #ifdef WOLFSSL_SMALL_STACK
1198 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1199 #endif
1200 return FP_VAL;
1201 }
1202
1203 /* if its too low */
1204 while (fp_cmp_d(C, 0) == FP_LT) {
1205 err = fp_add(C, b, C);
1206 if (err != FP_OKAY) {
1207 #ifdef WOLFSSL_SMALL_STACK
1208 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1209 #endif
1210 return err;
1211 }
1212 }
1213
1214 /* too big */
1215 while (fp_cmp_mag(C, b) != FP_LT) {
1216 err = fp_sub(C, b, C);
1217 if (err != FP_OKAY) {
1218 #ifdef WOLFSSL_SMALL_STACK
1219 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1220 #endif
1221 return err;
1222 }
1223 }
1224
1225 /* C is now the inverse */
1226 fp_copy(C, c);
1227 #ifdef WOLFSSL_SMALL_STACK
1228 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1229 #endif
1230 return FP_OKAY;
1231 }
1232
1233 /* c = 1/a (mod b) for odd b only */
fp_invmod(fp_int * a,fp_int * b,fp_int * c)1234 int fp_invmod(fp_int *a, fp_int *b, fp_int *c)
1235 {
1236 #ifndef WOLFSSL_SMALL_STACK
1237 fp_int x[1], y[1], u[1], v[1], B[1], D[1];
1238 #else
1239 fp_int *x, *y, *u, *v, *B, *D;
1240 #endif
1241 int neg;
1242 int err;
1243
1244 if (b->sign == FP_NEG || fp_iszero(b) == FP_YES) {
1245 return FP_VAL;
1246 }
1247
1248 /* [modified] sanity check on "a" */
1249 if (fp_iszero(a) == FP_YES) {
1250 return FP_VAL; /* can not divide by 0 here */
1251 }
1252
1253 /* 2. [modified] b must be odd */
1254 if (fp_iseven(b) == FP_YES) {
1255 return fp_invmod_slow(a,b,c);
1256 }
1257
1258 #ifdef WOLFSSL_SMALL_STACK
1259 x = (fp_int*)XMALLOC(sizeof(fp_int) * 6, NULL, DYNAMIC_TYPE_BIGINT);
1260 if (x == NULL) {
1261 return FP_MEM;
1262 }
1263 y = &x[1]; u = &x[2]; v = &x[3]; B = &x[4]; D = &x[5];
1264 #endif
1265
1266 /* init all our temps */
1267 fp_init(x); fp_init(y);
1268 fp_init(u); fp_init(v);
1269 fp_init(B); fp_init(D);
1270
1271 if (fp_cmp(a, b) != MP_LT) {
1272 err = mp_mod(a, b, y);
1273 if (err != FP_OKAY) {
1274 #ifdef WOLFSSL_SMALL_STACK
1275 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1276 #endif
1277 return err;
1278 }
1279 a = y;
1280 }
1281
1282 if (fp_iszero(a) == FP_YES) {
1283 #ifdef WOLFSSL_SMALL_STACK
1284 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1285 #endif
1286 return FP_VAL;
1287 }
1288
1289 /* x == modulus, y == value to invert */
1290 fp_copy(b, x);
1291
1292 /* we need y = |a| */
1293 fp_abs(a, y);
1294
1295 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1296 fp_copy(x, u);
1297 fp_copy(y, v);
1298 fp_set (D, 1);
1299
1300 top:
1301 /* 4. while u is even do */
1302 while (fp_iseven (u) == FP_YES) {
1303 /* 4.1 u = u/2 */
1304 fp_div_2 (u, u);
1305
1306 /* 4.2 if B is odd then */
1307 if (fp_isodd (B) == FP_YES) {
1308 err = fp_sub (B, x, B);
1309 if (err != FP_OKAY) {
1310 #ifdef WOLFSSL_SMALL_STACK
1311 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1312 #endif
1313 return err;
1314 }
1315 }
1316 /* B = B/2 */
1317 fp_div_2 (B, B);
1318 }
1319
1320 /* 5. while v is even do */
1321 while (fp_iseven (v) == FP_YES) {
1322 /* 5.1 v = v/2 */
1323 fp_div_2 (v, v);
1324
1325 /* 5.2 if D is odd then */
1326 if (fp_isodd (D) == FP_YES) {
1327 /* D = (D-x)/2 */
1328 err = fp_sub (D, x, D);
1329 if (err != FP_OKAY) {
1330 #ifdef WOLFSSL_SMALL_STACK
1331 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1332 #endif
1333 return err;
1334 }
1335 }
1336 /* D = D/2 */
1337 fp_div_2 (D, D);
1338 }
1339
1340 /* 6. if u >= v then */
1341 if (fp_cmp (u, v) != FP_LT) {
1342 /* u = u - v, B = B - D */
1343 err = fp_sub (u, v, u);
1344 if (err != FP_OKAY) {
1345 #ifdef WOLFSSL_SMALL_STACK
1346 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1347 #endif
1348 return err;
1349 }
1350 err = fp_sub (B, D, B);
1351 if (err != FP_OKAY) {
1352 #ifdef WOLFSSL_SMALL_STACK
1353 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1354 #endif
1355 return err;
1356 }
1357 } else {
1358 /* v - v - u, D = D - B */
1359 err = fp_sub (v, u, v);
1360 if (err != FP_OKAY) {
1361 #ifdef WOLFSSL_SMALL_STACK
1362 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1363 #endif
1364 return err;
1365 }
1366 err = fp_sub (D, B, D);
1367 if (err != FP_OKAY) {
1368 #ifdef WOLFSSL_SMALL_STACK
1369 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1370 #endif
1371 return err;
1372 }
1373 }
1374
1375 /* if not zero goto step 4 */
1376 if (fp_iszero (u) == FP_NO) {
1377 goto top;
1378 }
1379
1380 /* now a = C, b = D, gcd == g*v */
1381
1382 /* if v != 1 then there is no inverse */
1383 if (fp_cmp_d (v, 1) != FP_EQ) {
1384 #ifdef WOLFSSL_SMALL_STACK
1385 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1386 #endif
1387 return FP_VAL;
1388 }
1389
1390 /* b is now the inverse */
1391 neg = a->sign;
1392 while (D->sign == FP_NEG) {
1393 err = fp_add (D, b, D);
1394 if (err != FP_OKAY) {
1395 #ifdef WOLFSSL_SMALL_STACK
1396 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1397 #endif
1398 return FP_OKAY;
1399 }
1400 }
1401 /* too big */
1402 while (fp_cmp_mag(D, b) != FP_LT) {
1403 err = fp_sub(D, b, D);
1404 if (err != FP_OKAY) {
1405 #ifdef WOLFSSL_SMALL_STACK
1406 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1407 #endif
1408 return err;
1409 }
1410 }
1411 fp_copy (D, c);
1412 c->sign = neg;
1413 #ifdef WOLFSSL_SMALL_STACK
1414 XFREE(x, NULL, DYNAMIC_TYPE_BIGINT);
1415 #endif
1416 return FP_OKAY;
1417 }
1418
1419 #define CT_INV_MOD_PRE_CNT 8
1420
1421 /* modulus (b) must be greater than 2 and a prime */
fp_invmod_mont_ct(fp_int * a,fp_int * b,fp_int * c,fp_digit mp)1422 int fp_invmod_mont_ct(fp_int *a, fp_int *b, fp_int *c, fp_digit mp)
1423 {
1424 int i, j, err = FP_OKAY;
1425 #ifndef WOLFSSL_SMALL_STACK
1426 fp_int t[1], e[1];
1427 fp_int pre[CT_INV_MOD_PRE_CNT];
1428 #else
1429 fp_int* t;
1430 fp_int* e;
1431 fp_int* pre;
1432 #endif
1433
1434 if ((a->used * 2 > FP_SIZE) || (b->used * 2 > FP_SIZE)) {
1435 return FP_VAL;
1436 }
1437
1438 #ifdef WOLFSSL_SMALL_STACK
1439 t = (fp_int*)XMALLOC(sizeof(fp_int) * (2 + CT_INV_MOD_PRE_CNT), NULL,
1440 DYNAMIC_TYPE_BIGINT);
1441 if (t == NULL)
1442 return FP_MEM;
1443 e = t + 1;
1444 pre = t + 2;
1445 #endif
1446
1447 fp_init(t);
1448 fp_init(e);
1449
1450 fp_init(&pre[0]);
1451 fp_copy(a, &pre[0]);
1452 for (i = 1; i < CT_INV_MOD_PRE_CNT; i++) {
1453 fp_init(&pre[i]);
1454 err |= fp_sqr(&pre[i-1], &pre[i]);
1455 err |= fp_montgomery_reduce(&pre[i], b, mp);
1456 err |= fp_mul(&pre[i], a, &pre[i]);
1457 err |= fp_montgomery_reduce(&pre[i], b, mp);
1458 }
1459
1460 fp_sub_d(b, 2, e);
1461 /* Highest bit is always set. */
1462 j = 1;
1463 for (i = fp_count_bits(e)-2; i >= 0; i--) {
1464 if (!fp_is_bit_set(e, i) || j == CT_INV_MOD_PRE_CNT)
1465 break;
1466 j++;
1467 }
1468 fp_copy(&pre[j-1], t);
1469 j = 0;
1470 for (; i >= 0; i--) {
1471 int set = fp_is_bit_set(e, i);
1472
1473 if ((j == CT_INV_MOD_PRE_CNT) || (!set && j > 0)) {
1474 err |= fp_mul(t, &pre[j-1], t);
1475 err |= fp_montgomery_reduce(t, b, mp);
1476 j = 0;
1477 }
1478 err |= fp_sqr(t, t);
1479 err |= fp_montgomery_reduce(t, b, mp);
1480 j += set;
1481 }
1482 if (j > 0) {
1483 err |= fp_mul(t, &pre[j-1], c);
1484 err |= fp_montgomery_reduce(c, b, mp);
1485 }
1486 else
1487 fp_copy(t, c);
1488
1489 #ifdef WOLFSSL_SMALL_STACK
1490 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
1491 #endif
1492
1493 return err;
1494 }
1495
1496 /* d = a * b (mod c) */
fp_mulmod(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1497 int fp_mulmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1498 {
1499 int err;
1500 #ifndef WOLFSSL_SMALL_STACK
1501 fp_int t[1];
1502 #else
1503 fp_int *t;
1504 #endif
1505
1506 #ifdef WOLFSSL_SMALL_STACK
1507 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
1508 if (t == NULL)
1509 return FP_MEM;
1510 #endif
1511
1512 fp_init(t);
1513 err = fp_mul(a, b, t);
1514 if (err == FP_OKAY) {
1515 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
1516 if (d->size < FP_SIZE) {
1517 err = fp_mod(t, c, t);
1518 fp_copy(t, d);
1519 } else
1520 #endif
1521 {
1522 err = fp_mod(t, c, d);
1523 }
1524 }
1525
1526 #ifdef WOLFSSL_SMALL_STACK
1527 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
1528 #endif
1529 return err;
1530 }
1531
1532 /* d = a - b (mod c) */
fp_submod(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1533 int fp_submod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1534 {
1535 int err;
1536 #ifndef WOLFSSL_SMALL_STACK
1537 fp_int t[1];
1538 #else
1539 fp_int *t;
1540 #endif
1541
1542 #ifdef WOLFSSL_SMALL_STACK
1543 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
1544 if (t == NULL)
1545 return FP_MEM;
1546 #endif
1547
1548 fp_init(t);
1549 err = fp_sub(a, b, t);
1550 if (err == FP_OKAY) {
1551 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
1552 if (d->size < FP_SIZE) {
1553 err = fp_mod(t, c, t);
1554 fp_copy(t, d);
1555 } else
1556 #endif
1557 {
1558 err = fp_mod(t, c, d);
1559 }
1560 }
1561
1562 #ifdef WOLFSSL_SMALL_STACK
1563 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
1564 #endif
1565 return err;
1566 }
1567
1568 /* d = a + b (mod c) */
fp_addmod(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1569 int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1570 {
1571 int err;
1572 #ifndef WOLFSSL_SMALL_STACK
1573 fp_int t[1];
1574 #else
1575 fp_int *t;
1576 #endif
1577
1578 #ifdef WOLFSSL_SMALL_STACK
1579 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
1580 if (t == NULL)
1581 return FP_MEM;
1582 #endif
1583
1584 fp_init(t);
1585 err = fp_add(a, b, t);
1586 if (err == FP_OKAY) {
1587 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
1588 if (d->size < FP_SIZE) {
1589 err = fp_mod(t, c, t);
1590 fp_copy(t, d);
1591 } else
1592 #endif
1593 {
1594 err = fp_mod(t, c, d);
1595 }
1596 }
1597
1598 #ifdef WOLFSSL_SMALL_STACK
1599 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
1600 #endif
1601 return err;
1602 }
1603
1604 /* d = a - b (mod c) - constant time (a < c and b < c and all positive)
1605 * c and d must not be the same pointers.
1606 */
fp_submod_ct(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1607 int fp_submod_ct(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1608 {
1609 fp_sword w;
1610 fp_digit mask;
1611 int i;
1612
1613 if (c->used + 1 > FP_SIZE) {
1614 return FP_VAL;
1615 }
1616 if (c == d) {
1617 return FP_VAL;
1618 }
1619
1620 /* In constant time, subtract b from a putting result in d. */
1621 w = 0;
1622 for (i = 0; i < c->used; i++) {
1623 w += a->dp[i];
1624 w -= b->dp[i];
1625 d->dp[i] = (fp_digit)w;
1626 w >>= DIGIT_BIT;
1627 }
1628 w += a->dp[i];
1629 w -= b->dp[i];
1630 w >>= DIGIT_BIT;
1631 /* When w is negative then we need to add modulus to make result positive. */
1632 mask = (fp_digit)0 - (w < 0);
1633 /* Constant time, conditionally, add modulus to difference. */
1634 w = 0;
1635 for (i = 0; i < c->used; i++) {
1636 w += d->dp[i];
1637 w += c->dp[i] & mask;
1638 d->dp[i] = (fp_digit)w;
1639 w >>= DIGIT_BIT;
1640 }
1641 /* Result will always have digits equal to or less than those in modulus. */
1642 d->used = i;
1643 d->sign = FP_ZPOS;
1644 fp_clamp(d);
1645
1646 return FP_OKAY;
1647 }
1648
1649 /* d = a + b (mod c) - constant time (a < c and b < c and all positive)
1650 * c and d must not be the same pointers.
1651 */
fp_addmod_ct(fp_int * a,fp_int * b,fp_int * c,fp_int * d)1652 int fp_addmod_ct(fp_int *a, fp_int *b, fp_int *c, fp_int *d)
1653 {
1654 fp_word w;
1655 fp_sword s;
1656 fp_digit mask;
1657 int i;
1658
1659 if (c == d) {
1660 return FP_VAL;
1661 }
1662
1663 /* Add a to b into d. Do the subtract of modulus but don't store result.
1664 * When subtract result is negative, the overflow will be negative.
1665 * Only need to subtract mod when result is positive - overflow is positive.
1666 */
1667 w = 0;
1668 s = 0;
1669 for (i = 0; i < c->used; i++) {
1670 w += a->dp[i];
1671 w += b->dp[i];
1672 d->dp[i] = (fp_digit)w;
1673 s += (fp_digit)w;
1674 s -= c->dp[i];
1675 w >>= DIGIT_BIT;
1676 s >>= DIGIT_BIT;
1677 }
1678 s += (fp_digit)w;
1679 /* s will be positive when subtracting modulus is needed. */
1680 mask = (fp_digit)0 - (s >= 0);
1681
1682 /* Constant time, conditionally, subtract modulus from sum. */
1683 w = 0;
1684 for (i = 0; i < c->used; i++) {
1685 w += c->dp[i] & mask;
1686 w = d->dp[i] - w;
1687 d->dp[i] = (fp_digit)w;
1688 w = (w >> DIGIT_BIT)&1;
1689 }
1690 /* Result will always have digits equal to or less than those in modulus. */
1691 d->used = i;
1692 d->sign = FP_ZPOS;
1693 fp_clamp(d);
1694
1695 return FP_OKAY;
1696 }
1697
1698 #ifdef TFM_TIMING_RESISTANT
1699
1700 #ifdef WC_RSA_NONBLOCK
1701
1702 #ifdef WC_RSA_NONBLOCK_TIME
1703 /* User can override the check-time at build-time using the
1704 * FP_EXPTMOD_NB_CHECKTIME macro to define your own function */
1705 #ifndef FP_EXPTMOD_NB_CHECKTIME
1706 /* instruction count for each type of operation */
1707 /* array lookup is using TFM_EXPTMOD_NB_* states */
1708 static const word32 exptModNbInst[TFM_EXPTMOD_NB_COUNT] = {
1709 #ifdef TFM_PPC32
1710 #ifdef _DEBUG
1711 11098, 8701, 3971, 178394, 858093, 1040, 822, 178056, 181574, 90883, 184339, 236813
1712 #else
1713 7050, 2554, 3187, 43178, 200422, 384, 275, 43024, 43550, 30450, 46270, 61376
1714 #endif
1715 #elif defined(TFM_X86_64)
1716 #ifdef _DEBUG
1717 954, 2377, 858, 19027, 90840, 287, 407, 20140, 7874, 11385, 8005, 6151
1718 #else
1719 765, 1007, 771, 5216, 34993, 248, 193, 4975, 4201, 3947, 4275, 3811
1720 #endif
1721 #else /* software only fast math */
1722 #ifdef _DEBUG
1723 798, 2245, 802, 16657, 66920, 352, 186, 16997, 16145, 12789, 16742, 15006
1724 #else
1725 775, 1084, 783, 4692, 37510, 207, 183, 4374, 4392, 3097, 4442, 4079
1726 #endif
1727 #endif
1728 };
1729
fp_exptmod_nb_checktime(exptModNb_t * nb)1730 static int fp_exptmod_nb_checktime(exptModNb_t* nb)
1731 {
1732 word32 totalInst;
1733
1734 /* if no max time has been set then stop (do not block) */
1735 if (nb->maxBlockInst == 0 || nb->state >= TFM_EXPTMOD_NB_COUNT) {
1736 return TFM_EXPTMOD_NB_STOP;
1737 }
1738
1739 /* if instruction table not set then use maxBlockInst as simple counter */
1740 if (exptModNbInst[nb->state] == 0) {
1741 if (++nb->totalInst < nb->maxBlockInst)
1742 return TFM_EXPTMOD_NB_CONTINUE;
1743
1744 nb->totalInst = 0; /* reset counter */
1745 return TFM_EXPTMOD_NB_STOP;
1746 }
1747
1748 /* get total instruction count including next operation */
1749 totalInst = nb->totalInst + exptModNbInst[nb->state];
1750 /* if the next operation can completed within the maximum then continue */
1751 if (totalInst <= nb->maxBlockInst) {
1752 return TFM_EXPTMOD_NB_CONTINUE;
1753 }
1754
1755 return TFM_EXPTMOD_NB_STOP;
1756 }
1757 #define FP_EXPTMOD_NB_CHECKTIME(nb) fp_exptmod_nb_checktime((nb))
1758 #endif /* !FP_EXPTMOD_NB_CHECKTIME */
1759 #endif /* WC_RSA_NONBLOCK_TIME */
1760
1761 /* non-blocking version of timing resistant fp_exptmod function */
1762 /* supports cache resistance */
fp_exptmod_nb(exptModNb_t * nb,fp_int * G,fp_int * X,fp_int * P,fp_int * Y)1763 int fp_exptmod_nb(exptModNb_t* nb, fp_int* G, fp_int* X, fp_int* P, fp_int* Y)
1764 {
1765 int err, ret = FP_WOULDBLOCK;
1766
1767 if (nb == NULL)
1768 return FP_VAL;
1769
1770 #ifdef WC_RSA_NONBLOCK_TIME
1771 nb->totalInst = 0;
1772 do {
1773 nb->totalInst += exptModNbInst[nb->state];
1774 #endif
1775
1776 switch (nb->state) {
1777 case TFM_EXPTMOD_NB_INIT:
1778 /* now setup montgomery */
1779 if ((err = fp_montgomery_setup(P, &nb->mp)) != FP_OKAY) {
1780 nb->state = TFM_EXPTMOD_NB_INIT;
1781 return err;
1782 }
1783
1784 /* init ints */
1785 fp_init(&nb->R[0]);
1786 fp_init(&nb->R[1]);
1787 #ifndef WC_NO_CACHE_RESISTANT
1788 fp_init(&nb->R[2]);
1789 #endif
1790 nb->state = TFM_EXPTMOD_NB_MONT;
1791 break;
1792
1793 case TFM_EXPTMOD_NB_MONT:
1794 /* mod m -> R[0] */
1795 err = fp_montgomery_calc_normalization(&nb->R[0], P);
1796 if (err != FP_OKAY) {
1797 nb->state = TFM_EXPTMOD_NB_INIT;
1798 return err;
1799 }
1800
1801 nb->state = TFM_EXPTMOD_NB_MONT_RED;
1802 break;
1803
1804 case TFM_EXPTMOD_NB_MONT_RED:
1805 /* reduce G -> R[1] */
1806 if (fp_cmp_mag(P, G) != FP_GT) {
1807 /* G > P so we reduce it first */
1808 fp_mod(G, P, &nb->R[1]);
1809 } else {
1810 fp_copy(G, &nb->R[1]);
1811 }
1812
1813 nb->state = TFM_EXPTMOD_NB_MONT_MUL;
1814 break;
1815
1816 case TFM_EXPTMOD_NB_MONT_MUL:
1817 /* G (R[1]) * m (R[0]) */
1818 err = fp_mul(&nb->R[1], &nb->R[0], &nb->R[1]);
1819 if (err != FP_OKAY) {
1820 nb->state = TFM_EXPTMOD_NB_INIT;
1821 return err;
1822 }
1823
1824 nb->state = TFM_EXPTMOD_NB_MONT_MOD;
1825 break;
1826
1827 case TFM_EXPTMOD_NB_MONT_MOD:
1828 /* mod m */
1829 err = fp_div(&nb->R[1], P, NULL, &nb->R[1]);
1830 if (err != FP_OKAY) {
1831 nb->state = TFM_EXPTMOD_NB_INIT;
1832 return err;
1833 }
1834
1835 nb->state = TFM_EXPTMOD_NB_MONT_MODCHK;
1836 break;
1837
1838 case TFM_EXPTMOD_NB_MONT_MODCHK:
1839 /* m matches sign of (G * R mod m) */
1840 if (nb->R[1].sign != P->sign) {
1841 err = fp_add(&nb->R[1], P, &nb->R[1]);
1842 if (err != FP_OKAY) {
1843 nb->state = TFM_EXPTMOD_NB_INIT;
1844 return err;
1845 }
1846 }
1847
1848 /* set initial mode and bit cnt */
1849 nb->bitcnt = 1;
1850 nb->buf = 0;
1851 nb->digidx = X->used - 1;
1852
1853 nb->state = TFM_EXPTMOD_NB_NEXT;
1854 break;
1855
1856 case TFM_EXPTMOD_NB_NEXT:
1857 /* grab next digit as required */
1858 if (--nb->bitcnt == 0) {
1859 /* if nb->digidx == -1 we are out of digits so break */
1860 if (nb->digidx == -1) {
1861 nb->state = TFM_EXPTMOD_NB_RED;
1862 break;
1863 }
1864 /* read next digit and reset nb->bitcnt */
1865 nb->buf = X->dp[nb->digidx--];
1866 nb->bitcnt = (int)DIGIT_BIT;
1867 }
1868
1869 /* grab the next msb from the exponent */
1870 nb->y = (int)(nb->buf >> (DIGIT_BIT - 1)) & 1;
1871 nb->buf <<= (fp_digit)1;
1872 nb->state = TFM_EXPTMOD_NB_MUL;
1873 FALL_THROUGH;
1874
1875 case TFM_EXPTMOD_NB_MUL:
1876 fp_mul(&nb->R[0], &nb->R[1], &nb->R[nb->y^1]);
1877 nb->state = TFM_EXPTMOD_NB_MUL_RED;
1878 break;
1879
1880 case TFM_EXPTMOD_NB_MUL_RED:
1881 err = fp_montgomery_reduce(&nb->R[nb->y^1], P, nb->mp);
1882 if (err != FP_OKAY) {
1883 nb->state = TFM_EXPTMOD_NB_INIT;
1884 return err;
1885 }
1886 nb->state = TFM_EXPTMOD_NB_SQR;
1887 break;
1888
1889 case TFM_EXPTMOD_NB_SQR:
1890 #ifdef WC_NO_CACHE_RESISTANT
1891 err = fp_sqr(&nb->R[nb->y], &nb->R[nb->y]);
1892 #else
1893 fp_copy((fp_int*) ( ((wc_ptr_t)&nb->R[0] & wc_off_on_addr[nb->y^1]) +
1894 ((wc_ptr_t)&nb->R[1] & wc_off_on_addr[nb->y]) ),
1895 &nb->R[2]);
1896 err = fp_sqr(&nb->R[2], &nb->R[2]);
1897 #endif /* WC_NO_CACHE_RESISTANT */
1898 if (err != FP_OKAY) {
1899 nb->state = TFM_EXPTMOD_NB_INIT;
1900 return err;
1901 }
1902
1903 nb->state = TFM_EXPTMOD_NB_SQR_RED;
1904 break;
1905
1906 case TFM_EXPTMOD_NB_SQR_RED:
1907 #ifdef WC_NO_CACHE_RESISTANT
1908 err = fp_montgomery_reduce(&nb->R[nb->y], P, nb->mp);
1909 #else
1910 err = fp_montgomery_reduce(&nb->R[2], P, nb->mp);
1911 fp_copy(&nb->R[2],
1912 (fp_int*) ( ((wc_ptr_t)&nb->R[0] & wc_off_on_addr[nb->y^1]) +
1913 ((wc_ptr_t)&nb->R[1] & wc_off_on_addr[nb->y]) ) );
1914 #endif /* WC_NO_CACHE_RESISTANT */
1915 if (err != FP_OKAY) {
1916 nb->state = TFM_EXPTMOD_NB_INIT;
1917 return err;
1918 }
1919
1920 nb->state = TFM_EXPTMOD_NB_NEXT;
1921 break;
1922
1923 case TFM_EXPTMOD_NB_RED:
1924 /* final reduce */
1925 err = fp_montgomery_reduce(&nb->R[0], P, nb->mp);
1926 if (err != FP_OKAY) {
1927 nb->state = TFM_EXPTMOD_NB_INIT;
1928 return err;
1929 }
1930 fp_copy(&nb->R[0], Y);
1931
1932 nb->state = TFM_EXPTMOD_NB_INIT;
1933 ret = FP_OKAY;
1934 break;
1935 } /* switch */
1936
1937 #ifdef WC_RSA_NONBLOCK_TIME
1938 /* determine if maximum blocking time has been reached */
1939 } while (ret == FP_WOULDBLOCK &&
1940 FP_EXPTMOD_NB_CHECKTIME(nb) == TFM_EXPTMOD_NB_CONTINUE);
1941 #endif
1942
1943 return ret;
1944 }
1945
1946 #endif /* WC_RSA_NONBLOCK */
1947
1948
1949 /* timing resistant montgomery ladder based exptmod
1950 Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
1951 Cryptographic Hardware and Embedded Systems, CHES 2002
1952 */
_fp_exptmod_ct(fp_int * G,fp_int * X,int digits,fp_int * P,fp_int * Y)1953 static int _fp_exptmod_ct(fp_int * G, fp_int * X, int digits, fp_int * P,
1954 fp_int * Y)
1955 {
1956 #ifndef WOLFSSL_SMALL_STACK
1957 #ifdef WC_NO_CACHE_RESISTANT
1958 fp_int R[2];
1959 #else
1960 fp_int R[3]; /* need a temp for cache resistance */
1961 #endif
1962 #else
1963 fp_int *R;
1964 #endif
1965 fp_digit buf, mp;
1966 int err, bitcnt, digidx, y;
1967
1968 /* now setup montgomery */
1969 if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
1970 return err;
1971 }
1972
1973 #ifdef WOLFSSL_SMALL_STACK
1974 #ifndef WC_NO_CACHE_RESISTANT
1975 R = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
1976 #else
1977 R = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
1978 #endif
1979 if (R == NULL)
1980 return FP_MEM;
1981 #endif
1982 fp_init(&R[0]);
1983 fp_init(&R[1]);
1984 #ifndef WC_NO_CACHE_RESISTANT
1985 fp_init(&R[2]);
1986 #endif
1987
1988 /* now we need R mod m */
1989 err = fp_montgomery_calc_normalization (&R[0], P);
1990 if (err != FP_OKAY) {
1991 #ifdef WOLFSSL_SMALL_STACK
1992 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
1993 #endif
1994 return err;
1995 }
1996
1997 /* now set R[0][1] to G * R mod m */
1998 if (fp_cmp_mag(P, G) != FP_GT) {
1999 /* G > P so we reduce it first */
2000 err = fp_mod(G, P, &R[1]);
2001 if (err != FP_OKAY) {
2002 #ifdef WOLFSSL_SMALL_STACK
2003 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2004 #endif
2005 return err;
2006 }
2007 } else {
2008 fp_copy(G, &R[1]);
2009 }
2010 err = fp_mulmod (&R[1], &R[0], P, &R[1]);
2011 if (err != FP_OKAY) {
2012 #ifdef WOLFSSL_SMALL_STACK
2013 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2014 #endif
2015 return err;
2016 }
2017
2018 /* for j = t-1 downto 0 do
2019 r_!k = R0*R1; r_k = r_k^2
2020 */
2021
2022 /* set initial mode and bit cnt */
2023 bitcnt = 1;
2024 buf = 0;
2025 digidx = digits - 1;
2026
2027 for (;;) {
2028 /* grab next digit as required */
2029 if (--bitcnt == 0) {
2030 /* if digidx == -1 we are out of digits so break */
2031 if (digidx == -1) {
2032 break;
2033 }
2034 /* read next digit and reset bitcnt */
2035 buf = X->dp[digidx--];
2036 bitcnt = (int)DIGIT_BIT;
2037 }
2038
2039 /* grab the next msb from the exponent */
2040 y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2041 buf <<= (fp_digit)1;
2042
2043 #ifdef WC_NO_CACHE_RESISTANT
2044 /* do ops */
2045 err = fp_mul(&R[0], &R[1], &R[y^1]);
2046 if (err != FP_OKAY) {
2047 #ifdef WOLFSSL_SMALL_STACK
2048 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2049 #endif
2050 return err;
2051 }
2052 err = fp_montgomery_reduce(&R[y^1], P, mp);
2053 if (err != FP_OKAY) {
2054 #ifdef WOLFSSL_SMALL_STACK
2055 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2056 #endif
2057 return err;
2058 }
2059
2060 err = fp_sqr(&R[y], &R[y]);
2061 if (err != FP_OKAY) {
2062 #ifdef WOLFSSL_SMALL_STACK
2063 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2064 #endif
2065 return err;
2066 }
2067 err = fp_montgomery_reduce(&R[y], P, mp);
2068 if (err != FP_OKAY) {
2069 #ifdef WOLFSSL_SMALL_STACK
2070 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2071 #endif
2072 return err;
2073 }
2074 #else
2075 /* do ops */
2076 err = fp_mul(&R[0], &R[1], &R[2]);
2077 if (err != FP_OKAY) {
2078 #ifdef WOLFSSL_SMALL_STACK
2079 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2080 #endif
2081 return err;
2082 }
2083 err = fp_montgomery_reduce(&R[2], P, mp);
2084 if (err != FP_OKAY) {
2085 #ifdef WOLFSSL_SMALL_STACK
2086 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2087 #endif
2088 return err;
2089 }
2090 /* instead of using R[y^1] for mul, which leaks key bit to cache monitor,
2091 * use R[2] as temp, make sure address calc is constant, keep
2092 * &R[0] and &R[1] in cache */
2093 fp_copy(&R[2],
2094 (fp_int*) ( ((wc_ptr_t)&R[0] & wc_off_on_addr[y]) +
2095 ((wc_ptr_t)&R[1] & wc_off_on_addr[y^1]) ) );
2096
2097 /* instead of using R[y] for sqr, which leaks key bit to cache monitor,
2098 * use R[2] as temp, make sure address calc is constant, keep
2099 * &R[0] and &R[1] in cache */
2100 fp_copy((fp_int*) ( ((wc_ptr_t)&R[0] & wc_off_on_addr[y^1]) +
2101 ((wc_ptr_t)&R[1] & wc_off_on_addr[y]) ),
2102 &R[2]);
2103 err = fp_sqr(&R[2], &R[2]);
2104 if (err != FP_OKAY) {
2105 #ifdef WOLFSSL_SMALL_STACK
2106 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2107 #endif
2108 return err;
2109 }
2110 err = fp_montgomery_reduce(&R[2], P, mp);
2111 if (err != FP_OKAY) {
2112 #ifdef WOLFSSL_SMALL_STACK
2113 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2114 #endif
2115 return err;
2116 }
2117 fp_copy(&R[2],
2118 (fp_int*) ( ((wc_ptr_t)&R[0] & wc_off_on_addr[y^1]) +
2119 ((wc_ptr_t)&R[1] & wc_off_on_addr[y]) ) );
2120 #endif /* WC_NO_CACHE_RESISTANT */
2121 }
2122
2123 err = fp_montgomery_reduce(&R[0], P, mp);
2124 fp_copy(&R[0], Y);
2125 #ifdef WOLFSSL_SMALL_STACK
2126 XFREE(R, NULL, DYNAMIC_TYPE_BIGINT);
2127 #endif
2128 return err;
2129 }
2130
2131 #endif /* TFM_TIMING_RESISTANT */
2132
2133 /* y = g**x (mod b)
2134 * Some restrictions... x must be positive and < b
2135 */
_fp_exptmod_nct(fp_int * G,fp_int * X,fp_int * P,fp_int * Y)2136 static int _fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2137 {
2138 fp_int *res;
2139 fp_digit buf, mp;
2140 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
2141 #ifndef WOLFSSL_NO_MALLOC
2142 fp_int *M;
2143 #else
2144 fp_int M[(1 << 6) + 1];
2145 #endif
2146
2147 /* find window size */
2148 x = fp_count_bits (X);
2149 if (x <= 21) {
2150 winsize = 1;
2151 } else if (x <= 36) {
2152 winsize = 3;
2153 } else if (x <= 140) {
2154 winsize = 4;
2155 } else if (x <= 450) {
2156 winsize = 5;
2157 } else {
2158 winsize = 6;
2159 }
2160
2161 /* now setup montgomery */
2162 if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) {
2163 return err;
2164 }
2165
2166 #ifndef WOLFSSL_NO_MALLOC
2167 /* only allocate space for what's needed for window plus res */
2168 M = (fp_int*)XMALLOC(sizeof(fp_int)*((1 << winsize) + 1), NULL,
2169 DYNAMIC_TYPE_BIGINT);
2170 if (M == NULL) {
2171 return FP_MEM;
2172 }
2173 #endif
2174 res = &M[(word32)(1 << winsize)];
2175
2176 /* init M array */
2177 for(x = 0; x < (1 << winsize); x++)
2178 fp_init(&M[x]);
2179
2180 /* setup result */
2181 fp_init(res);
2182
2183 /* create M table
2184 *
2185 * The M table contains powers of the input base, e.g. M[x] = G^x mod P
2186 *
2187 * The first half of the table is not computed though except for M[0] and M[1]
2188 */
2189
2190 /* now we need R mod m */
2191 err = fp_montgomery_calc_normalization (res, P);
2192 if (err != FP_OKAY) {
2193 #ifndef WOLFSSL_NO_MALLOC
2194 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2195 #endif
2196 return err;
2197 }
2198
2199 /* now set M[1] to G * R mod m */
2200 if (fp_cmp_mag(P, G) != FP_GT) {
2201 /* G > P so we reduce it first */
2202 fp_mod(G, P, &M[1]);
2203 } else {
2204 fp_copy(G, &M[1]);
2205 }
2206 fp_mulmod (&M[1], res, P, &M[1]);
2207
2208 /* compute the value at M[1<<(winsize-1)] by
2209 * squaring M[1] (winsize-1) times */
2210 fp_copy (&M[1], &M[(word32)(1 << (winsize - 1))]);
2211 for (x = 0; x < (winsize - 1); x++) {
2212 err = fp_sqr (&M[(word32)(1 << (winsize - 1))],
2213 &M[(word32)(1 << (winsize - 1))]);
2214 if (err != FP_OKAY) {
2215 #ifndef WOLFSSL_NO_MALLOC
2216 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2217 #endif
2218 return err;
2219 }
2220 err = fp_montgomery_reduce_ex(&M[(word32)(1 << (winsize - 1))], P, mp, 0);
2221 if (err != FP_OKAY) {
2222 #ifndef WOLFSSL_NO_MALLOC
2223 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2224 #endif
2225 return err;
2226 }
2227 }
2228
2229 /* create upper table */
2230 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2231 err = fp_mul(&M[x - 1], &M[1], &M[x]);
2232 if (err != FP_OKAY) {
2233 #ifndef WOLFSSL_NO_MALLOC
2234 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2235 #endif
2236 return err;
2237 }
2238 err = fp_montgomery_reduce_ex(&M[x], P, mp, 0);
2239 if (err != FP_OKAY) {
2240 #ifndef WOLFSSL_NO_MALLOC
2241 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2242 #endif
2243 return err;
2244 }
2245 }
2246
2247 /* set initial mode and bit cnt */
2248 mode = 0;
2249 bitcnt = (x % DIGIT_BIT) + 1;
2250 buf = 0;
2251 digidx = X->used - 1;
2252 bitcpy = 0;
2253 bitbuf = 0;
2254
2255 for (;;) {
2256 /* grab next digit as required */
2257 if (--bitcnt == 0) {
2258 /* if digidx == -1 we are out of digits so break */
2259 if (digidx == -1) {
2260 break;
2261 }
2262 /* read next digit and reset bitcnt */
2263 buf = X->dp[digidx--];
2264 bitcnt = (int)DIGIT_BIT;
2265 }
2266
2267 /* grab the next msb from the exponent */
2268 y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2269 buf <<= (fp_digit)1;
2270
2271 /* if the bit is zero and mode == 0 then we ignore it
2272 * These represent the leading zero bits before the first 1 bit
2273 * in the exponent. Technically this opt is not required but it
2274 * does lower the # of trivial squaring/reductions used
2275 */
2276 if (mode == 0 && y == 0) {
2277 continue;
2278 }
2279
2280 /* if the bit is zero and mode == 1 then we square */
2281 if (mode == 1 && y == 0) {
2282 err = fp_sqr(res, res);
2283 if (err != FP_OKAY) {
2284 #ifndef WOLFSSL_NO_MALLOC
2285 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2286 #endif
2287 return err;
2288 }
2289 err = fp_montgomery_reduce_ex(res, P, mp, 0);
2290 if (err != FP_OKAY) {
2291 #ifndef WOLFSSL_NO_MALLOC
2292 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2293 #endif
2294 return err;
2295 }
2296 continue;
2297 }
2298
2299 /* else we add it to the window */
2300 bitbuf |= (y << (winsize - ++bitcpy));
2301 mode = 2;
2302
2303 if (bitcpy == winsize) {
2304 /* ok window is filled so square as required and multiply */
2305 /* square first */
2306 for (x = 0; x < winsize; x++) {
2307 err = fp_sqr(res, res);
2308 if (err != FP_OKAY) {
2309 #ifndef WOLFSSL_NO_MALLOC
2310 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2311 #endif
2312 return err;
2313 }
2314 err = fp_montgomery_reduce_ex(res, P, mp, 0);
2315 if (err != FP_OKAY) {
2316 #ifndef WOLFSSL_NO_MALLOC
2317 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2318 #endif
2319 return err;
2320 }
2321 }
2322
2323 /* then multiply */
2324 err = fp_mul(res, &M[bitbuf], res);
2325 if (err != FP_OKAY) {
2326 #ifndef WOLFSSL_NO_MALLOC
2327 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2328 #endif
2329 return err;
2330 }
2331 err = fp_montgomery_reduce_ex(res, P, mp, 0);
2332 if (err != FP_OKAY) {
2333 #ifndef WOLFSSL_NO_MALLOC
2334 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2335 #endif
2336 return err;
2337 }
2338
2339 /* empty window and reset */
2340 bitcpy = 0;
2341 bitbuf = 0;
2342 mode = 1;
2343 }
2344 }
2345
2346 /* if bits remain then square/multiply */
2347 if (mode == 2 && bitcpy > 0) {
2348 /* square then multiply if the bit is set */
2349 for (x = 0; x < bitcpy; x++) {
2350 err = fp_sqr(res, res);
2351 if (err != FP_OKAY) {
2352 #ifndef WOLFSSL_NO_MALLOC
2353 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2354 #endif
2355 return err;
2356 }
2357 err = fp_montgomery_reduce_ex(res, P, mp, 0);
2358 if (err != FP_OKAY) {
2359 #ifndef WOLFSSL_NO_MALLOC
2360 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2361 #endif
2362 return err;
2363 }
2364
2365 /* get next bit of the window */
2366 bitbuf <<= 1;
2367 if ((bitbuf & (1 << winsize)) != 0) {
2368 /* then multiply */
2369 err = fp_mul(res, &M[1], res);
2370 if (err != FP_OKAY) {
2371 #ifndef WOLFSSL_NO_MALLOC
2372 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2373 #endif
2374 return err;
2375 }
2376 err = fp_montgomery_reduce_ex(res, P, mp, 0);
2377 if (err != FP_OKAY) {
2378 #ifndef WOLFSSL_NO_MALLOC
2379 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2380 #endif
2381 return err;
2382 }
2383 }
2384 }
2385 }
2386
2387 /* fixup result if Montgomery reduction is used
2388 * recall that any value in a Montgomery system is
2389 * actually multiplied by R mod n. So we have
2390 * to reduce one more time to cancel out the factor
2391 * of R.
2392 */
2393 err = fp_montgomery_reduce_ex(res, P, mp, 0);
2394
2395 /* swap res with Y */
2396 fp_copy (res, Y);
2397
2398 #ifndef WOLFSSL_NO_MALLOC
2399 XFREE(M, NULL, DYNAMIC_TYPE_BIGINT);
2400 #endif
2401 return err;
2402 }
2403
2404
2405 #ifdef TFM_TIMING_RESISTANT
2406 #if DIGIT_BIT <= 16
2407 #define WINSIZE 2
2408 #elif DIGIT_BIT <= 32
2409 #define WINSIZE 3
2410 #elif DIGIT_BIT <= 64
2411 #define WINSIZE 4
2412 #elif DIGIT_BIT <= 128
2413 #define WINSIZE 5
2414 #endif
2415
2416 /* y = 2**x (mod b)
2417 * Some restrictions... x must be positive and < b
2418 */
_fp_exptmod_base_2(fp_int * X,int digits,fp_int * P,fp_int * Y)2419 static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
2420 fp_int * Y)
2421 {
2422 fp_digit buf, mp;
2423 int err, bitbuf, bitcpy, bitcnt, digidx, x, y;
2424 #ifdef WOLFSSL_SMALL_STACK
2425 fp_int *res;
2426 fp_int *tmp;
2427 #else
2428 fp_int res[1];
2429 fp_int tmp[1];
2430 #endif
2431
2432 #ifdef WOLFSSL_SMALL_STACK
2433 res = (fp_int*)XMALLOC(2*sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
2434 if (res == NULL) {
2435 return FP_MEM;
2436 }
2437 tmp = &res[1];
2438 #endif
2439
2440 /* now setup montgomery */
2441 if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
2442 #ifdef WOLFSSL_SMALL_STACK
2443 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2444 #endif
2445 return err;
2446 }
2447
2448 /* setup result */
2449 fp_init(res);
2450 fp_init(tmp);
2451
2452 err = fp_mul_2d(P, 1 << WINSIZE, tmp);
2453 if (err != FP_OKAY) {
2454 #ifdef WOLFSSL_SMALL_STACK
2455 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2456 #endif
2457 return err;
2458 }
2459
2460 /* now we need R mod m */
2461 err = fp_montgomery_calc_normalization(res, P);
2462 if (err != FP_OKAY) {
2463 #ifdef WOLFSSL_SMALL_STACK
2464 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2465 #endif
2466 return err;
2467 }
2468
2469 /* Get the top bits left over after taking WINSIZE bits starting at the
2470 * least-significant.
2471 */
2472 digidx = digits - 1;
2473 bitcpy = (digits * DIGIT_BIT) % WINSIZE;
2474 if (bitcpy > 0) {
2475 bitcnt = (int)DIGIT_BIT - bitcpy;
2476 buf = X->dp[digidx--];
2477 bitbuf = (int)(buf >> bitcnt);
2478 /* Multiply montgomery representation of 1 by 2 ^ top */
2479 err = fp_mul_2d(res, bitbuf, res);
2480 if (err != FP_OKAY) {
2481 #ifdef WOLFSSL_SMALL_STACK
2482 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2483 #endif
2484 return err;
2485 }
2486 err = fp_add(res, tmp, res);
2487 if (err != FP_OKAY) {
2488 #ifdef WOLFSSL_SMALL_STACK
2489 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2490 #endif
2491 return err;
2492 }
2493 err = fp_mod(res, P, res);
2494 if (err != FP_OKAY) {
2495 #ifdef WOLFSSL_SMALL_STACK
2496 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2497 #endif
2498 return err;
2499 }
2500 /* Move out bits used */
2501 buf <<= bitcpy;
2502 bitcnt++;
2503 }
2504 else {
2505 bitcnt = 1;
2506 buf = 0;
2507 }
2508
2509 /* empty window and reset */
2510 bitbuf = 0;
2511 bitcpy = 0;
2512
2513 for (;;) {
2514 /* grab next digit as required */
2515 if (--bitcnt == 0) {
2516 /* if digidx == -1 we are out of digits so break */
2517 if (digidx == -1) {
2518 break;
2519 }
2520 /* read next digit and reset bitcnt */
2521 buf = X->dp[digidx--];
2522 bitcnt = (int)DIGIT_BIT;
2523 }
2524
2525 /* grab the next msb from the exponent */
2526 y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2527 buf <<= (fp_digit)1;
2528 /* add bit to the window */
2529 bitbuf |= (y << (WINSIZE - ++bitcpy));
2530
2531 if (bitcpy == WINSIZE) {
2532 /* ok window is filled so square as required and multiply */
2533 /* square first */
2534 for (x = 0; x < WINSIZE; x++) {
2535 err = fp_sqr(res, res);
2536 if (err != FP_OKAY) {
2537 #ifdef WOLFSSL_SMALL_STACK
2538 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2539 #endif
2540 return err;
2541 }
2542 err = fp_montgomery_reduce(res, P, mp);
2543 if (err != FP_OKAY) {
2544 #ifdef WOLFSSL_SMALL_STACK
2545 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2546 #endif
2547 return err;
2548 }
2549 }
2550
2551 /* then multiply by 2^bitbuf */
2552 err = fp_mul_2d(res, bitbuf, res);
2553 if (err != FP_OKAY) {
2554 #ifdef WOLFSSL_SMALL_STACK
2555 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2556 #endif
2557 return err;
2558 }
2559 /* Add in value to make mod operation take same time */
2560 err = fp_add(res, tmp, res);
2561 if (err != FP_OKAY) {
2562 #ifdef WOLFSSL_SMALL_STACK
2563 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2564 #endif
2565 return err;
2566 }
2567 err = fp_mod(res, P, res);
2568 if (err != FP_OKAY) {
2569 #ifdef WOLFSSL_SMALL_STACK
2570 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2571 #endif
2572 return err;
2573 }
2574
2575 /* empty window and reset */
2576 bitcpy = 0;
2577 bitbuf = 0;
2578 }
2579 }
2580
2581 /* fixup result if Montgomery reduction is used
2582 * recall that any value in a Montgomery system is
2583 * actually multiplied by R mod n. So we have
2584 * to reduce one more time to cancel out the factor
2585 * of R.
2586 */
2587 err = fp_montgomery_reduce(res, P, mp);
2588
2589 /* swap res with Y */
2590 fp_copy(res, Y);
2591
2592 #ifdef WOLFSSL_SMALL_STACK
2593 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2594 #endif
2595 return err;
2596 }
2597
2598 #undef WINSIZE
2599 #else
2600 #if DIGIT_BIT < 16
2601 #define WINSIZE 3
2602 #elif DIGIT_BIT < 32
2603 #define WINSIZE 4
2604 #elif DIGIT_BIT < 64
2605 #define WINSIZE 5
2606 #elif DIGIT_BIT < 128
2607 #define WINSIZE 6
2608 #elif DIGIT_BIT == 128
2609 #define WINSIZE 7
2610 #endif
2611
2612 /* y = 2**x (mod b)
2613 * Some restrictions... x must be positive and < b
2614 */
_fp_exptmod_base_2(fp_int * X,int digits,fp_int * P,fp_int * Y)2615 static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
2616 fp_int * Y)
2617 {
2618 fp_digit buf, mp;
2619 int err, bitbuf, bitcpy, bitcnt, digidx, x, y;
2620 #ifdef WOLFSSL_SMALL_STACK
2621 fp_int *res;
2622 #else
2623 fp_int res[1];
2624 #endif
2625
2626 /* now setup montgomery */
2627 if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
2628 return err;
2629 }
2630
2631 #ifdef WOLFSSL_SMALL_STACK
2632 res = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
2633 if (res == NULL) {
2634 return FP_MEM;
2635 }
2636 #endif
2637
2638 /* setup result */
2639 fp_init(res);
2640
2641 /* now we need R mod m */
2642 err = fp_montgomery_calc_normalization(res, P);
2643 if (err != FP_OKAY) {
2644 #ifdef WOLFSSL_SMALL_STACK
2645 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2646 #endif
2647 return err;
2648 }
2649
2650 /* Get the top bits left over after taking WINSIZE bits starting at the
2651 * least-significant.
2652 */
2653 digidx = digits - 1;
2654 bitcpy = (digits * DIGIT_BIT) % WINSIZE;
2655 if (bitcpy > 0) {
2656 bitcnt = (int)DIGIT_BIT - bitcpy;
2657 buf = X->dp[digidx--];
2658 bitbuf = (int)(buf >> bitcnt);
2659 /* Multiply montgomery representation of 1 by 2 ^ top */
2660 err = fp_mul_2d(res, bitbuf, res);
2661 if (err != FP_OKAY) {
2662 #ifdef WOLFSSL_SMALL_STACK
2663 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2664 #endif
2665 return err;
2666 }
2667 err = fp_mod(res, P, res);
2668 if (err != FP_OKAY) {
2669 #ifdef WOLFSSL_SMALL_STACK
2670 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2671 #endif
2672 return err;
2673 }
2674 /* Move out bits used */
2675 buf <<= bitcpy;
2676 bitcnt++;
2677 }
2678 else {
2679 bitcnt = 1;
2680 buf = 0;
2681 }
2682
2683 /* empty window and reset */
2684 bitbuf = 0;
2685 bitcpy = 0;
2686
2687 for (;;) {
2688 /* grab next digit as required */
2689 if (--bitcnt == 0) {
2690 /* if digidx == -1 we are out of digits so break */
2691 if (digidx == -1) {
2692 break;
2693 }
2694 /* read next digit and reset bitcnt */
2695 buf = X->dp[digidx--];
2696 bitcnt = (int)DIGIT_BIT;
2697 }
2698
2699 /* grab the next msb from the exponent */
2700 y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
2701 buf <<= (fp_digit)1;
2702 /* add bit to the window */
2703 bitbuf |= (y << (WINSIZE - ++bitcpy));
2704
2705 if (bitcpy == WINSIZE) {
2706 /* ok window is filled so square as required and multiply */
2707 /* square first */
2708 for (x = 0; x < WINSIZE; x++) {
2709 err = fp_sqr(res, res);
2710 if (err != FP_OKAY) {
2711 #ifdef WOLFSSL_SMALL_STACK
2712 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2713 #endif
2714 return err;
2715 }
2716 err = fp_montgomery_reduce(res, P, mp);
2717 if (err != FP_OKAY) {
2718 #ifdef WOLFSSL_SMALL_STACK
2719 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2720 #endif
2721 return err;
2722 }
2723 }
2724
2725 /* then multiply by 2^bitbuf */
2726 err = fp_mul_2d(res, bitbuf, res);
2727 if (err != FP_OKAY) {
2728 #ifdef WOLFSSL_SMALL_STACK
2729 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2730 #endif
2731 return err;
2732 }
2733 err = fp_mod(res, P, res);
2734 if (err != FP_OKAY) {
2735 #ifdef WOLFSSL_SMALL_STACK
2736 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2737 #endif
2738 return err;
2739 }
2740
2741 /* empty window and reset */
2742 bitcpy = 0;
2743 bitbuf = 0;
2744 }
2745 }
2746
2747 /* fixup result if Montgomery reduction is used
2748 * recall that any value in a Montgomery system is
2749 * actually multiplied by R mod n. So we have
2750 * to reduce one more time to cancel out the factor
2751 * of R.
2752 */
2753 err = fp_montgomery_reduce(res, P, mp);
2754
2755 /* swap res with Y */
2756 fp_copy(res, Y);
2757
2758 #ifdef WOLFSSL_SMALL_STACK
2759 XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2760 #endif
2761 return err;
2762 }
2763
2764 #undef WINSIZE
2765 #endif
2766
2767
fp_exptmod(fp_int * G,fp_int * X,fp_int * P,fp_int * Y)2768 int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2769 {
2770
2771 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2772 !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2773 int x = fp_count_bits (X);
2774 #endif
2775
2776 /* handle modulus of zero and prevent overflows */
2777 if (fp_iszero(P) || (P->used > (FP_SIZE/2))) {
2778 return FP_VAL;
2779 }
2780 if (fp_isone(P)) {
2781 fp_set(Y, 0);
2782 return FP_OKAY;
2783 }
2784 if (fp_iszero(X)) {
2785 fp_set(Y, 1);
2786 return FP_OKAY;
2787 }
2788 if (fp_iszero(G)) {
2789 fp_set(Y, 0);
2790 return FP_OKAY;
2791 }
2792
2793 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2794 !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2795 if(x > EPS_RSA_EXPT_XBTIS) {
2796 return esp_mp_exptmod(G, X, x, P, Y);
2797 }
2798 #endif
2799
2800 if (X->sign == FP_NEG) {
2801 #ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */
2802 int err;
2803 #ifndef WOLFSSL_SMALL_STACK
2804 fp_int tmp[2];
2805 #else
2806 fp_int *tmp;
2807 #endif
2808
2809 #ifdef WOLFSSL_SMALL_STACK
2810 tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
2811 if (tmp == NULL)
2812 return FP_MEM;
2813 #endif
2814
2815 /* yes, copy G and invmod it */
2816 fp_init_copy(&tmp[0], G);
2817 fp_init_copy(&tmp[1], P);
2818 tmp[1].sign = FP_ZPOS;
2819 err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
2820 if (err == FP_OKAY) {
2821 fp_copy(X, &tmp[1]);
2822 tmp[1].sign = FP_ZPOS;
2823 #ifdef TFM_TIMING_RESISTANT
2824 err = _fp_exptmod_ct(&tmp[0], &tmp[1], tmp[1].used, P, Y);
2825 #else
2826 err = _fp_exptmod_nct(&tmp[0], &tmp[1], P, Y);
2827 #endif
2828 if ((err == 0) && (P->sign == FP_NEG)) {
2829 err = fp_add(Y, P, Y);
2830 }
2831 }
2832 #ifdef WOLFSSL_SMALL_STACK
2833 XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2834 #endif
2835 return err;
2836 #else
2837 return FP_VAL;
2838 #endif
2839 }
2840 else if (G->used == 1 && G->dp[0] == 2) {
2841 return _fp_exptmod_base_2(X, X->used, P, Y);
2842 }
2843 else {
2844 /* Positive exponent so just exptmod */
2845 #ifdef TFM_TIMING_RESISTANT
2846 return _fp_exptmod_ct(G, X, X->used, P, Y);
2847 #else
2848 return _fp_exptmod_nct(G, X, P, Y);
2849 #endif
2850 }
2851 }
2852
fp_exptmod_ex(fp_int * G,fp_int * X,int digits,fp_int * P,fp_int * Y)2853 int fp_exptmod_ex(fp_int * G, fp_int * X, int digits, fp_int * P, fp_int * Y)
2854 {
2855
2856 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2857 !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2858 int x = fp_count_bits (X);
2859 #endif
2860
2861 if (fp_iszero(G)) {
2862 fp_set(Y, 0);
2863 return FP_OKAY;
2864 }
2865
2866 /* prevent overflows */
2867 if (P->used > (FP_SIZE/2)) {
2868 return FP_VAL;
2869 }
2870
2871 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2872 !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2873 if(x > EPS_RSA_EXPT_XBTIS) {
2874 return esp_mp_exptmod(G, X, x, P, Y);
2875 }
2876 #endif
2877
2878 if (X->sign == FP_NEG) {
2879 #ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */
2880 int err;
2881 #ifndef WOLFSSL_SMALL_STACK
2882 fp_int tmp[2];
2883 #else
2884 fp_int *tmp;
2885 #endif
2886
2887 #ifdef WOLFSSL_SMALL_STACK
2888 tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2889 if (tmp == NULL)
2890 return FP_MEM;
2891 #endif
2892
2893 /* yes, copy G and invmod it */
2894 fp_init_copy(&tmp[0], G);
2895 fp_init_copy(&tmp[1], P);
2896 tmp[1].sign = FP_ZPOS;
2897 err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
2898 if (err == FP_OKAY) {
2899 X->sign = FP_ZPOS;
2900 #ifdef TFM_TIMING_RESISTANT
2901 err = _fp_exptmod_ct(&tmp[0], X, digits, P, Y);
2902 #else
2903 err = _fp_exptmod_nct(&tmp[0], X, P, Y);
2904 (void)digits;
2905 #endif
2906 if (X != Y) {
2907 X->sign = FP_NEG;
2908 }
2909 if ((err == 0) && (P->sign == FP_NEG)) {
2910 err = fp_add(Y, P, Y);
2911 }
2912 }
2913 #ifdef WOLFSSL_SMALL_STACK
2914 XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
2915 #endif
2916 return err;
2917 #else
2918 return FP_VAL;
2919 #endif
2920 }
2921 else {
2922 /* Positive exponent so just exptmod */
2923 #ifdef TFM_TIMING_RESISTANT
2924 return _fp_exptmod_ct(G, X, digits, P, Y);
2925 #else
2926 return _fp_exptmod_nct(G, X, P, Y);
2927 #endif
2928 }
2929 }
2930
fp_exptmod_nct(fp_int * G,fp_int * X,fp_int * P,fp_int * Y)2931 int fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
2932 {
2933 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2934 !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2935 int x = fp_count_bits (X);
2936 #endif
2937
2938 if (fp_iszero(G)) {
2939 fp_set(G, 0);
2940 return FP_OKAY;
2941 }
2942
2943 /* prevent overflows */
2944 if (P->used > (FP_SIZE/2)) {
2945 return FP_VAL;
2946 }
2947
2948 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
2949 !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
2950 if(x > EPS_RSA_EXPT_XBTIS) {
2951 return esp_mp_exptmod(G, X, x, P, Y);
2952 }
2953 #endif
2954
2955 if (X->sign == FP_NEG) {
2956 #ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */
2957 int err;
2958 #ifndef WOLFSSL_SMALL_STACK
2959 fp_int tmp[2];
2960 #else
2961 fp_int *tmp;
2962 #endif
2963
2964 #ifdef WOLFSSL_SMALL_STACK
2965 tmp = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2966 if (tmp == NULL)
2967 return FP_MEM;
2968 #endif
2969
2970 /* yes, copy G and invmod it */
2971 fp_init_copy(&tmp[0], G);
2972 fp_init_copy(&tmp[1], P);
2973 tmp[1].sign = FP_ZPOS;
2974 err = fp_invmod(&tmp[0], &tmp[1], &tmp[0]);
2975 if (err == FP_OKAY) {
2976 X->sign = FP_ZPOS;
2977 err = _fp_exptmod_nct(&tmp[0], X, P, Y);
2978 if (X != Y) {
2979 X->sign = FP_NEG;
2980 }
2981 if ((err == 0) && (P->sign == FP_NEG)) {
2982 err = fp_add(Y, P, Y);
2983 }
2984 }
2985 #ifdef WOLFSSL_SMALL_STACK
2986 XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
2987 #endif
2988 return err;
2989 #else
2990 return FP_VAL;
2991 #endif
2992 }
2993 else {
2994 /* Positive exponent so just exptmod */
2995 return _fp_exptmod_nct(G, X, P, Y);
2996 }
2997 }
2998
2999 /* computes a = 2**b */
fp_2expt(fp_int * a,int b)3000 void fp_2expt(fp_int *a, int b)
3001 {
3002 int z;
3003
3004 /* zero a as per default */
3005 fp_zero (a);
3006
3007 if (b < 0) {
3008 return;
3009 }
3010
3011 z = b / DIGIT_BIT;
3012 if (z >= FP_SIZE) {
3013 return;
3014 }
3015
3016 /* set the used count of where the bit will go */
3017 a->used = z + 1;
3018
3019 /* put the single bit in its place */
3020 a->dp[z] = ((fp_digit)1) << (b % DIGIT_BIT);
3021 }
3022
3023 /* b = a*a */
fp_sqr(fp_int * A,fp_int * B)3024 int fp_sqr(fp_int *A, fp_int *B)
3025 {
3026 int err;
3027 int y, oldused;
3028
3029 oldused = B->used;
3030 y = A->used;
3031
3032 /* error if we're out of range */
3033 if (y + y >= FP_SIZE) {
3034 err = FP_VAL;
3035 goto clean;
3036 }
3037
3038 #if defined(TFM_SQR3) && FP_SIZE >= 6
3039 if (y <= 3) {
3040 err = fp_sqr_comba3(A,B);
3041 goto clean;
3042 }
3043 #endif
3044 #if defined(TFM_SQR4) && FP_SIZE >= 8
3045 if (y == 4) {
3046 err = fp_sqr_comba4(A,B);
3047 goto clean;
3048 }
3049 #endif
3050 #if defined(TFM_SQR6) && FP_SIZE >= 12
3051 if (y <= 6) {
3052 err = fp_sqr_comba6(A,B);
3053 goto clean;
3054 }
3055 #endif
3056 #if defined(TFM_SQR7) && FP_SIZE >= 14
3057 if (y == 7) {
3058 err = fp_sqr_comba7(A,B);
3059 goto clean;
3060 }
3061 #endif
3062 #if defined(TFM_SQR8) && FP_SIZE >= 16
3063 if (y == 8) {
3064 err = fp_sqr_comba8(A,B);
3065 goto clean;
3066 }
3067 #endif
3068 #if defined(TFM_SQR9) && FP_SIZE >= 18
3069 if (y == 9) {
3070 err = fp_sqr_comba9(A,B);
3071 goto clean;
3072 }
3073 #endif
3074 #if defined(TFM_SQR12) && FP_SIZE >= 24
3075 if (y <= 12) {
3076 err = fp_sqr_comba12(A,B);
3077 goto clean;
3078 }
3079 #endif
3080 #if defined(TFM_SQR17) && FP_SIZE >= 34
3081 if (y <= 17) {
3082 err = fp_sqr_comba17(A,B);
3083 goto clean;
3084 }
3085 #endif
3086 #if defined(TFM_SMALL_SET)
3087 if (y <= 16) {
3088 err = fp_sqr_comba_small(A,B);
3089 goto clean;
3090 }
3091 #endif
3092 #if defined(TFM_SQR20) && FP_SIZE >= 40
3093 if (y <= 20) {
3094 err = fp_sqr_comba20(A,B);
3095 goto clean;
3096 }
3097 #endif
3098 #if defined(TFM_SQR24) && FP_SIZE >= 48
3099 if (y <= 24) {
3100 err = fp_sqr_comba24(A,B);
3101 goto clean;
3102 }
3103 #endif
3104 #if defined(TFM_SQR28) && FP_SIZE >= 56
3105 if (y <= 28) {
3106 err = fp_sqr_comba28(A,B);
3107 goto clean;
3108 }
3109 #endif
3110 #if defined(TFM_SQR32) && FP_SIZE >= 64
3111 if (y <= 32) {
3112 err = fp_sqr_comba32(A,B);
3113 goto clean;
3114 }
3115 #endif
3116 #if defined(TFM_SQR48) && FP_SIZE >= 96
3117 if (y <= 48) {
3118 err = fp_sqr_comba48(A,B);
3119 goto clean;
3120 }
3121 #endif
3122 #if defined(TFM_SQR64) && FP_SIZE >= 128
3123 if (y <= 64) {
3124 err = fp_sqr_comba64(A,B);
3125 goto clean;
3126 }
3127 #endif
3128 err = fp_sqr_comba(A, B);
3129
3130 clean:
3131 /* zero any excess digits on the destination that we didn't write to */
3132 for (y = B->used; y >= 0 && y < oldused; y++) {
3133 B->dp[y] = 0;
3134 }
3135
3136 return err;
3137 }
3138
3139 /* generic comba squarer */
fp_sqr_comba(fp_int * A,fp_int * B)3140 int fp_sqr_comba(fp_int *A, fp_int *B)
3141 {
3142 int pa, ix, iz;
3143 fp_digit c0, c1, c2;
3144 #ifdef TFM_ISO
3145 fp_word tt;
3146 #endif
3147 fp_int *dst;
3148 #ifndef WOLFSSL_SMALL_STACK
3149 fp_int tmp[1];
3150 #else
3151 fp_int *tmp;
3152 #endif
3153
3154 #ifdef WOLFSSL_SMALL_STACK
3155 tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
3156 if (tmp == NULL)
3157 return FP_MEM;
3158 #endif
3159
3160 /* get size of output and trim */
3161 pa = A->used + A->used;
3162 if (pa >= FP_SIZE) {
3163 pa = FP_SIZE-1;
3164 }
3165
3166 /* number of output digits to produce */
3167 COMBA_START;
3168 COMBA_CLEAR;
3169
3170 if (A == B) {
3171 fp_init(tmp);
3172 dst = tmp;
3173 } else {
3174 fp_zero(B);
3175 dst = B;
3176 }
3177
3178 for (ix = 0; ix < pa; ix++) {
3179 int tx, ty, iy;
3180 fp_digit *tmpy, *tmpx;
3181
3182 /* get offsets into the two bignums */
3183 ty = MIN(A->used-1, ix);
3184 tx = ix - ty;
3185
3186 /* setup temp aliases */
3187 tmpx = A->dp + tx;
3188 tmpy = A->dp + ty;
3189
3190 /* this is the number of times the loop will iterate,
3191 while (tx++ < a->used && ty-- >= 0) { ... }
3192 */
3193 iy = MIN(A->used-tx, ty+1);
3194
3195 /* now for squaring tx can never equal ty
3196 * we halve the distance since they approach
3197 * at a rate of 2x and we have to round because
3198 * odd cases need to be executed
3199 */
3200 iy = MIN(iy, (ty-tx+1)>>1);
3201
3202 /* forward carries */
3203 COMBA_FORWARD;
3204
3205 /* execute loop */
3206 for (iz = 0; iz < iy; iz++) {
3207 SQRADD2(*tmpx++, *tmpy--);
3208 }
3209
3210 /* even columns have the square term in them */
3211 if ((ix&1) == 0) {
3212 /* TAO change COMBA_ADD back to SQRADD */
3213 SQRADD(A->dp[ix>>1], A->dp[ix>>1]);
3214 }
3215
3216 /* store it */
3217 COMBA_STORE(dst->dp[ix]);
3218 }
3219
3220 COMBA_FINI;
3221
3222 /* setup dest */
3223 dst->used = pa;
3224 fp_clamp (dst);
3225 if (dst != B) {
3226 fp_copy(dst, B);
3227 }
3228
3229 /* Variables used but not seen by cppcheck. */
3230 (void)c0; (void)c1; (void)c2;
3231 #ifdef TFM_ISO
3232 (void)tt;
3233 #endif
3234
3235 #ifdef WOLFSSL_SMALL_STACK
3236 XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
3237 #endif
3238 return FP_OKAY;
3239 }
3240
fp_cmp(fp_int * a,fp_int * b)3241 int fp_cmp(fp_int *a, fp_int *b)
3242 {
3243 if (a->sign == FP_NEG && b->sign == FP_ZPOS) {
3244 return FP_LT;
3245 } else if (a->sign == FP_ZPOS && b->sign == FP_NEG) {
3246 return FP_GT;
3247 } else {
3248 /* compare digits */
3249 if (a->sign == FP_NEG) {
3250 /* if negative compare opposite direction */
3251 return fp_cmp_mag(b, a);
3252 } else {
3253 return fp_cmp_mag(a, b);
3254 }
3255 }
3256 }
3257
3258 /* compare against a single digit */
fp_cmp_d(fp_int * a,fp_digit b)3259 int fp_cmp_d(fp_int *a, fp_digit b)
3260 {
3261 /* special case for zero*/
3262 if (a->used == 0 && b == 0)
3263 return FP_EQ;
3264
3265 /* compare based on sign */
3266 if ((b && a->used == 0) || a->sign == FP_NEG) {
3267 return FP_LT;
3268 }
3269
3270 /* compare based on magnitude */
3271 if (a->used > 1) {
3272 return FP_GT;
3273 }
3274
3275 /* compare the only digit of a to b */
3276 if (a->dp[0] > b) {
3277 return FP_GT;
3278 } else if (a->dp[0] < b) {
3279 return FP_LT;
3280 } else {
3281 return FP_EQ;
3282 }
3283
3284 }
3285
fp_cmp_mag(fp_int * a,fp_int * b)3286 int fp_cmp_mag(fp_int *a, fp_int *b)
3287 {
3288 int x;
3289
3290 if (a->used > b->used) {
3291 return FP_GT;
3292 } else if (a->used < b->used) {
3293 return FP_LT;
3294 } else {
3295 for (x = a->used - 1; x >= 0; x--) {
3296 if (a->dp[x] > b->dp[x]) {
3297 return FP_GT;
3298 } else if (a->dp[x] < b->dp[x]) {
3299 return FP_LT;
3300 }
3301 }
3302 }
3303 return FP_EQ;
3304 }
3305
3306
3307 /* sets up the montgomery reduction */
fp_montgomery_setup(fp_int * a,fp_digit * rho)3308 int fp_montgomery_setup(fp_int *a, fp_digit *rho)
3309 {
3310 fp_digit x, b;
3311
3312 /* fast inversion mod 2**k
3313 *
3314 * Based on the fact that
3315 *
3316 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
3317 * => 2*X*A - X*X*A*A = 1
3318 * => 2*(1) - (1) = 1
3319 */
3320 b = a->dp[0];
3321
3322 if ((b & 1) == 0) {
3323 return FP_VAL;
3324 }
3325
3326 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3327 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
3328 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
3329 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
3330 #ifdef FP_64BIT
3331 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
3332 #endif
3333
3334 /* rho = -1/m mod b */
3335 *rho = (fp_digit) (((fp_word) 1 << ((fp_word) DIGIT_BIT)) - ((fp_word)x));
3336
3337 return FP_OKAY;
3338 }
3339
3340 /* computes a = B**n mod b without division or multiplication useful for
3341 * normalizing numbers in a Montgomery system.
3342 */
fp_montgomery_calc_normalization(fp_int * a,fp_int * b)3343 int fp_montgomery_calc_normalization(fp_int *a, fp_int *b)
3344 {
3345 int x, bits;
3346
3347 /* how many bits of last digit does b use */
3348 bits = fp_count_bits (b) % DIGIT_BIT;
3349 if (!bits) bits = DIGIT_BIT;
3350
3351 /* compute A = B^(n-1) * 2^(bits-1) */
3352 if (b->used > 1) {
3353 fp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1);
3354 } else {
3355 fp_set(a, 1);
3356 bits = 1;
3357 }
3358
3359 /* now compute C = A * B mod b */
3360 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
3361 int err = fp_mul_2 (a, a);
3362 if (err != FP_OKAY) {
3363 return err;
3364 }
3365 if (fp_cmp_mag (a, b) != FP_LT) {
3366 s_fp_sub (a, b, a);
3367 }
3368 }
3369 return FP_OKAY;
3370 }
3371
3372
3373 #ifdef TFM_SMALL_MONT_SET
3374 #include "fp_mont_small.i"
3375 #endif
3376
3377 #ifdef HAVE_INTEL_MULX
innermul8_mulx(fp_digit * c_mulx,fp_digit * cy_mulx,fp_digit * tmpm,fp_digit mu)3378 static WC_INLINE void innermul8_mulx(fp_digit *c_mulx, fp_digit *cy_mulx, fp_digit *tmpm, fp_digit mu)
3379 {
3380 fp_digit cy = *cy_mulx ;
3381 INNERMUL8_MULX ;
3382 *cy_mulx = cy ;
3383 }
3384
3385 /* computes x/R == x (mod N) via Montgomery Reduction */
fp_montgomery_reduce_mulx(fp_int * a,fp_int * m,fp_digit mp,int ct)3386 static int fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp, int ct)
3387 {
3388 #ifndef WOLFSSL_SMALL_STACK
3389 fp_digit c[FP_SIZE+1];
3390 #else
3391 fp_digit *c;
3392 #endif
3393 fp_digit *_c, *tmpm, mu = 0;
3394 int oldused, x, y, pa;
3395
3396 /* bail if too large */
3397 if (m->used > (FP_SIZE/2)) {
3398 (void)mu; /* shut up compiler */
3399 return FP_VAL;
3400 }
3401
3402 #ifdef TFM_SMALL_MONT_SET
3403 if (m->used <= 16) {
3404 return fp_montgomery_reduce_small(a, m, mp);
3405 }
3406 #endif
3407
3408 #ifdef WOLFSSL_SMALL_STACK
3409 /* only allocate space for what's needed for window plus res */
3410 c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT);
3411 if (c == NULL) {
3412 return FP_MEM;
3413 }
3414 #endif
3415
3416 /* now zero the buff */
3417 XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1));
3418 pa = m->used;
3419
3420 /* copy the input */
3421 #ifdef TFM_TIMING_RESISTANT
3422 if (a->used <= m->used) {
3423 oldused = m->used;
3424 }
3425 else {
3426 oldused = m->used * 2;
3427 }
3428 #else
3429 oldused = a->used;
3430 #endif
3431 for (x = 0; x < oldused; x++) {
3432 c[x] = a->dp[x];
3433 }
3434 MONT_START;
3435
3436 for (x = 0; x < pa; x++) {
3437 fp_digit cy = 0;
3438 /* get Mu for this round */
3439 LOOP_START;
3440 _c = c + x;
3441 tmpm = m->dp;
3442 y = 0;
3443 for (; y < (pa & ~7); y += 8) {
3444 innermul8_mulx(_c, &cy, tmpm, mu) ;
3445 _c += 8;
3446 tmpm += 8;
3447 }
3448 for (; y < pa; y++) {
3449 INNERMUL;
3450 ++_c;
3451 }
3452 LOOP_END;
3453 while (cy) {
3454 PROPCARRY;
3455 ++_c;
3456 }
3457 }
3458
3459 /* now copy out */
3460 _c = c + pa;
3461 tmpm = a->dp;
3462 for (x = 0; x < pa+1; x++) {
3463 *tmpm++ = *_c++;
3464 }
3465
3466 /* zero any excess digits on the destination that we didn't write to */
3467 for (; x < oldused; x++) {
3468 *tmpm++ = 0;
3469 }
3470
3471 MONT_FINI;
3472
3473 a->used = pa+1;
3474 fp_clamp(a);
3475
3476 #ifndef WOLFSSL_MONT_RED_CT
3477 /* if A >= m then A = A - m */
3478 if (fp_cmp_mag (a, m) != FP_LT) {
3479 s_fp_sub (a, m, a);
3480 }
3481 (void)ct;
3482 #else
3483 if (ct) {
3484 fp_submod_ct(a, m, m, a);
3485 }
3486 else if (fp_cmp_mag (a, m) != FP_LT) {
3487 s_fp_sub (a, m, a);
3488 }
3489 #endif
3490
3491 #ifdef WOLFSSL_SMALL_STACK
3492 XFREE(c, NULL, DYNAMIC_TYPE_BIGINT);
3493 #endif
3494 return FP_OKAY;
3495 }
3496 #endif
3497
3498 /* computes x/R == x (mod N) via Montgomery Reduction */
fp_montgomery_reduce_ex(fp_int * a,fp_int * m,fp_digit mp,int ct)3499 int fp_montgomery_reduce_ex(fp_int *a, fp_int *m, fp_digit mp, int ct)
3500 {
3501 #ifndef WOLFSSL_SMALL_STACK
3502 fp_digit c[FP_SIZE+1];
3503 #else
3504 fp_digit *c;
3505 #endif
3506 fp_digit *_c, *tmpm, mu = 0;
3507 int oldused, x, y, pa, err = 0;
3508
3509 IF_HAVE_INTEL_MULX(err=fp_montgomery_reduce_mulx(a, m, mp, ct), return err) ;
3510 (void)err;
3511
3512 /* bail if too large */
3513 if (m->used > (FP_SIZE/2)) {
3514 (void)mu; /* shut up compiler */
3515 return FP_VAL;
3516 }
3517
3518 #ifdef TFM_SMALL_MONT_SET
3519 if (m->used <= 16) {
3520 return fp_montgomery_reduce_small(a, m, mp);
3521 }
3522 #endif
3523
3524 #ifdef WOLFSSL_SMALL_STACK
3525 /* only allocate space for what's needed for window plus res */
3526 c = (fp_digit*)XMALLOC(sizeof(fp_digit)*(FP_SIZE + 1), NULL, DYNAMIC_TYPE_BIGINT);
3527 if (c == NULL) {
3528 return FP_MEM;
3529 }
3530 #endif
3531
3532 /* now zero the buff */
3533 XMEMSET(c, 0, sizeof(fp_digit)*(FP_SIZE + 1));
3534 pa = m->used;
3535
3536 /* copy the input */
3537 #ifdef TFM_TIMING_RESISTANT
3538 if (a->used <= m->used) {
3539 oldused = m->used;
3540 }
3541 else {
3542 oldused = m->used * 2;
3543 }
3544 #else
3545 oldused = a->used;
3546 #endif
3547 for (x = 0; x < oldused; x++) {
3548 c[x] = a->dp[x];
3549 }
3550 MONT_START;
3551
3552 for (x = 0; x < pa; x++) {
3553 fp_digit cy = 0;
3554 /* get Mu for this round */
3555 LOOP_START;
3556 _c = c + x;
3557 tmpm = m->dp;
3558 y = 0;
3559 #if defined(INNERMUL8)
3560 for (; y < (pa & ~7); y += 8) {
3561 INNERMUL8 ;
3562 _c += 8;
3563 tmpm += 8;
3564 }
3565 #endif
3566 for (; y < pa; y++) {
3567 INNERMUL;
3568 ++_c;
3569 }
3570 LOOP_END;
3571 while (cy) {
3572 PROPCARRY;
3573 ++_c;
3574 }
3575 }
3576
3577 /* now copy out */
3578 _c = c + pa;
3579 tmpm = a->dp;
3580 for (x = 0; x < pa+1; x++) {
3581 *tmpm++ = *_c++;
3582 }
3583
3584 /* zero any excess digits on the destination that we didn't write to */
3585 for (; x < oldused; x++) {
3586 *tmpm++ = 0;
3587 }
3588
3589 MONT_FINI;
3590
3591 a->used = pa+1;
3592 fp_clamp(a);
3593
3594 #ifndef WOLFSSL_MONT_RED_CT
3595 /* if A >= m then A = A - m */
3596 if (fp_cmp_mag (a, m) != FP_LT) {
3597 s_fp_sub (a, m, a);
3598 }
3599 (void)ct;
3600 #else
3601 if (ct) {
3602 fp_submod_ct(a, m, m, a);
3603 }
3604 else if (fp_cmp_mag (a, m) != FP_LT) {
3605 s_fp_sub (a, m, a);
3606 }
3607 #endif
3608
3609 #ifdef WOLFSSL_SMALL_STACK
3610 XFREE(c, NULL, DYNAMIC_TYPE_BIGINT);
3611 #endif
3612 return FP_OKAY;
3613 }
3614
fp_montgomery_reduce(fp_int * a,fp_int * m,fp_digit mp)3615 int fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
3616 {
3617 return fp_montgomery_reduce_ex(a, m, mp, 1);
3618 }
3619
fp_read_unsigned_bin(fp_int * a,const unsigned char * b,int c)3620 int fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c)
3621 {
3622 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
3623 const word32 maxC = (a->size * sizeof(fp_digit));
3624 #else
3625 const word32 maxC = (FP_SIZE * sizeof(fp_digit));
3626 #endif
3627
3628 /* zero the int */
3629 fp_zero (a);
3630
3631 /* if input b excess max, then truncate */
3632 if (c > 0 && (word32)c > maxC) {
3633 int excess = (c - maxC);
3634 c -= excess;
3635 b += excess;
3636 }
3637
3638 /* Not both endian simultaneously */
3639 #if defined(LITTLE_ENDIAN_ORDER) && defined(BIG_ENDIAN_ORDER)
3640 #error Both LITTLE_ENDIAN_ORDER and BIG_ENDIAN_ORDER defined.
3641 #endif
3642
3643 #if (defined(LITTLE_ENDIAN_ORDER) || defined(BIG_ENDIAN_ORDER)) && \
3644 (defined(FP_32BIT) || defined(FP_64BIT))
3645 #ifdef FP_32BIT
3646 /* If we know the endianness of this architecture, and we're using
3647 32-bit fp_digits, we can optimize this */
3648 {
3649 unsigned char *pd = (unsigned char *)a->dp;
3650
3651 a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
3652 #ifdef BIG_ENDIAN_ORDER
3653 {
3654 /* Use Duff's device to unroll the loop. */
3655 int idx = (c - 1) & ~3;
3656 switch (c % 4) {
3657 case 0: do { pd[idx+0] = *b++; FALL_THROUGH;
3658 case 3: pd[idx+1] = *b++; FALL_THROUGH;
3659 case 2: pd[idx+2] = *b++; FALL_THROUGH;
3660 case 1: pd[idx+3] = *b++;
3661 idx -= 4;
3662 } while ((c -= 4) > 0);
3663 }
3664 }
3665 #else
3666 /* read the bytes in one at a time. */
3667 for (c -= 1; c >= 0; c -= 1) {
3668 pd[c] = *b++;
3669 }
3670 #endif
3671 }
3672 #elif defined(FP_64BIT)
3673 /* If we know the endianness of this architecture, and we're using
3674 64-bit fp_digits, we can optimize this */
3675 {
3676 unsigned char *pd = (unsigned char *)a->dp;
3677
3678 a->used = (c + sizeof(fp_digit) - 1)/sizeof(fp_digit);
3679 #ifdef BIG_ENDIAN_ORDER
3680 {
3681 /* Use Duff's device to unroll the loop. */
3682 int idx = (c - 1) & ~7;
3683 switch (c % 8) {
3684 case 0: do { pd[idx+0] = *b++; FALL_THROUGH;
3685 case 7: pd[idx+1] = *b++; FALL_THROUGH;
3686 case 6: pd[idx+2] = *b++; FALL_THROUGH;
3687 case 5: pd[idx+3] = *b++; FALL_THROUGH;
3688 case 4: pd[idx+4] = *b++; FALL_THROUGH;
3689 case 3: pd[idx+5] = *b++; FALL_THROUGH;
3690 case 2: pd[idx+6] = *b++; FALL_THROUGH;
3691 case 1: pd[idx+7] = *b++;
3692 idx -= 8;
3693 } while ((c -= 8) > 0);
3694 }
3695 }
3696 #else
3697 /* read the bytes in one at a time. */
3698 for (c -= 1; c >= 0; c -= 1) {
3699 pd[c] = *b++;
3700 }
3701 #endif
3702 }
3703 #endif
3704 #else
3705 /* read the bytes in one at a time - unknown number of bits in digit */
3706 for (; c > 0; c--) {
3707 int err = fp_mul_2d (a, 8, a);
3708 if (err != FP_OKAY) {
3709 return err;
3710 }
3711 a->dp[0] |= *b++;
3712
3713 if (a->used == 0) {
3714 a->used = 1;
3715 }
3716 }
3717 #endif
3718 fp_clamp (a);
3719
3720 return FP_OKAY;
3721 }
3722
fp_to_unsigned_bin_at_pos(int x,fp_int * t,unsigned char * b)3723 int fp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
3724 {
3725 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
3726 int i;
3727 int j = 0;
3728 fp_digit n;
3729
3730 for (i = 0; i < t->used-1; ) {
3731 b[x++] = (unsigned char)(t->dp[i] >> j);
3732 j += 8;
3733 i += j == DIGIT_BIT;
3734 j &= DIGIT_BIT - 1;
3735 }
3736 n = t->dp[i];
3737 while (n != 0) {
3738 b[x++] = (unsigned char)n;
3739 n >>= 8;
3740 }
3741 return x;
3742 #else
3743 while (fp_iszero (t) == FP_NO) {
3744 b[x++] = (unsigned char) (t->dp[0] & 255);
3745 fp_div_2d (t, 8, t, NULL);
3746 }
3747 return x;
3748 #endif
3749 }
3750
fp_to_unsigned_bin(fp_int * a,unsigned char * b)3751 int fp_to_unsigned_bin(fp_int *a, unsigned char *b)
3752 {
3753 int x;
3754 #ifndef WOLFSSL_SMALL_STACK
3755 fp_int t[1];
3756 #else
3757 fp_int *t;
3758 #endif
3759
3760 #ifdef WOLFSSL_SMALL_STACK
3761 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
3762 if (t == NULL)
3763 return FP_MEM;
3764 #endif
3765
3766 fp_init_copy(t, a);
3767
3768 x = fp_to_unsigned_bin_at_pos(0, t, b);
3769 fp_reverse (b, x);
3770
3771 #ifdef WOLFSSL_SMALL_STACK
3772 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
3773 #endif
3774 return FP_OKAY;
3775 }
3776
fp_to_unsigned_bin_len(fp_int * a,unsigned char * b,int c)3777 int fp_to_unsigned_bin_len(fp_int *a, unsigned char *b, int c)
3778 {
3779 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
3780 int i = 0;
3781 int j = 0;
3782 int x;
3783
3784 for (x=c-1; x >= 0 && i < a->used; x--) {
3785 b[x] = (unsigned char)(a->dp[i] >> j);
3786 j += 8;
3787 i += j == DIGIT_BIT;
3788 j &= DIGIT_BIT - 1;
3789 }
3790 for (; x >= 0; x--) {
3791 b[x] = 0;
3792 }
3793
3794 return FP_OKAY;
3795 #else
3796 int x;
3797 #ifndef WOLFSSL_SMALL_STACK
3798 fp_int t[1];
3799 #else
3800 fp_int *t;
3801 #endif
3802
3803 #ifdef WOLFSSL_SMALL_STACK
3804 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
3805 if (t == NULL)
3806 return FP_MEM;
3807 #endif
3808
3809 fp_init_copy(t, a);
3810
3811 for (x = 0; x < c; x++) {
3812 b[x] = (unsigned char) (t->dp[0] & 255);
3813 fp_div_2d (t, 8, t, NULL);
3814 }
3815 fp_reverse (b, x);
3816
3817 #ifdef WOLFSSL_SMALL_STACK
3818 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
3819 #endif
3820 return FP_OKAY;
3821 #endif
3822 }
3823
fp_unsigned_bin_size(const fp_int * a)3824 int fp_unsigned_bin_size(const fp_int *a)
3825 {
3826 int size = fp_count_bits (a);
3827 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3828 }
3829
fp_set(fp_int * a,fp_digit b)3830 void fp_set(fp_int *a, fp_digit b)
3831 {
3832 fp_zero(a);
3833 a->dp[0] = b;
3834 a->used = a->dp[0] ? 1 : 0;
3835 }
3836
3837
3838 #ifndef MP_SET_CHUNK_BITS
3839 #define MP_SET_CHUNK_BITS 4
3840 #endif
fp_set_int(fp_int * a,unsigned long b)3841 int fp_set_int(fp_int *a, unsigned long b)
3842 {
3843 int x;
3844
3845 /* use direct fp_set if b is less than fp_digit max
3846 * If input max value of b down shift by 1 less than full range
3847 * fp_digit, then condition is always true. */
3848 #if ((ULONG_MAX >> (DIGIT_BIT-1)) > 0)
3849 if (b < FP_DIGIT_MAX)
3850 #endif
3851 {
3852 fp_set (a, (fp_digit)b);
3853 return FP_OKAY;
3854 }
3855
3856 fp_zero (a);
3857
3858 /* set chunk bits at a time */
3859 for (x = 0; x < (int)(sizeof(b) * 8) / MP_SET_CHUNK_BITS; x++) {
3860 int err = fp_mul_2d (a, MP_SET_CHUNK_BITS, a);
3861 if (err != FP_OKAY)
3862 return err;
3863
3864 /* OR in the top bits of the source */
3865 a->dp[0] |= (b >> ((sizeof(b) * 8) - MP_SET_CHUNK_BITS)) &
3866 ((1 << MP_SET_CHUNK_BITS) - 1);
3867
3868 /* shift the source up to the next chunk bits */
3869 b <<= MP_SET_CHUNK_BITS;
3870
3871 /* ensure that digits are not clamped off */
3872 a->used += 1;
3873 }
3874
3875 /* clamp digits */
3876 fp_clamp(a);
3877
3878 return FP_OKAY;
3879 }
3880
3881 /* check if a bit is set */
fp_is_bit_set(fp_int * a,fp_digit b)3882 int fp_is_bit_set (fp_int *a, fp_digit b)
3883 {
3884 fp_digit i;
3885
3886 if (b > FP_MAX_BITS)
3887 return FP_VAL;
3888
3889 i = b/DIGIT_BIT;
3890
3891 if ((fp_digit)a->used < i)
3892 return 0;
3893
3894 return (int)((a->dp[i] >> b%DIGIT_BIT) & (fp_digit)1);
3895 }
3896
3897 /* set the b bit of a */
fp_set_bit(fp_int * a,fp_digit b)3898 int fp_set_bit (fp_int * a, fp_digit b)
3899 {
3900 fp_digit i;
3901
3902 if (b > FP_MAX_BITS)
3903 return FP_VAL;
3904
3905 i = b/DIGIT_BIT;
3906
3907 /* set the used count of where the bit will go if required */
3908 if (a->used < (int)(i+1))
3909 a->used = (int)(i+1);
3910
3911 /* put the single bit in its place */
3912 a->dp[i] |= ((fp_digit)1) << (b % DIGIT_BIT);
3913
3914 return MP_OKAY;
3915 }
3916
fp_count_bits(const fp_int * a)3917 int fp_count_bits (const fp_int * a)
3918 {
3919 int r;
3920 fp_digit q;
3921
3922 /* shortcut */
3923 if (a->used == 0) {
3924 return 0;
3925 }
3926
3927 /* get number of digits and add that */
3928 r = (a->used - 1) * DIGIT_BIT;
3929
3930 /* take the last digit and count the bits in it */
3931 q = a->dp[a->used - 1];
3932 while (q > ((fp_digit) 0)) {
3933 ++r;
3934 q >>= ((fp_digit) 1);
3935 }
3936
3937 return r;
3938 }
3939
fp_leading_bit(fp_int * a)3940 int fp_leading_bit(fp_int *a)
3941 {
3942 int bit = 0;
3943
3944 if (a->used != 0) {
3945 fp_digit q = a->dp[a->used - 1];
3946 int qSz = sizeof(fp_digit);
3947
3948 while (qSz > 0) {
3949 if ((unsigned char)q != 0)
3950 bit = (q & 0x80) != 0;
3951 q >>= 8;
3952 qSz--;
3953 }
3954 }
3955
3956 return bit;
3957 }
3958
fp_lshd(fp_int * a,int x)3959 int fp_lshd(fp_int *a, int x)
3960 {
3961 int y;
3962
3963 if (a->used + x > FP_SIZE) return FP_VAL;
3964
3965 y = a->used + x - 1;
3966
3967 /* store new size */
3968 a->used = y + 1;
3969
3970 /* move digits */
3971 for (; y >= x; y--) {
3972 a->dp[y] = a->dp[y-x];
3973 }
3974
3975 /* zero lower digits */
3976 for (; y >= 0; y--) {
3977 a->dp[y] = 0;
3978 }
3979
3980 /* clamp digits */
3981 fp_clamp(a);
3982 return FP_OKAY;
3983 }
3984
3985
3986 /* right shift by bit count */
fp_rshb(fp_int * c,int x)3987 void fp_rshb(fp_int *c, int x)
3988 {
3989 fp_digit *tmpc, mask, shift;
3990 fp_digit r, rr;
3991 fp_digit D = x;
3992
3993 /* shifting by a negative number not supported, and shifting by
3994 * zero changes nothing.
3995 */
3996 if (x <= 0) return;
3997
3998 /* shift digits first if needed */
3999 if (x >= DIGIT_BIT) {
4000 fp_rshd(c, x / DIGIT_BIT);
4001 /* recalculate number of bits to shift */
4002 D = x % DIGIT_BIT;
4003 /* check if any more shifting needed */
4004 if (D == 0) return;
4005
4006 }
4007
4008 /* zero shifted is always zero */
4009 if (fp_iszero(c)) return;
4010
4011 /* mask */
4012 mask = (((fp_digit)1) << D) - 1;
4013
4014 /* shift for lsb */
4015 shift = DIGIT_BIT - D;
4016
4017 /* alias */
4018 tmpc = c->dp + (c->used - 1);
4019
4020 /* carry */
4021 r = 0;
4022 for (x = c->used - 1; x >= 0; x--) {
4023 /* get the lower bits of this word in a temp */
4024 rr = *tmpc & mask;
4025
4026 /* shift the current word and mix in the carry bits from previous word */
4027 *tmpc = (*tmpc >> D) | (r << shift);
4028 --tmpc;
4029
4030 /* set the carry to the carry bits of the current word found above */
4031 r = rr;
4032 }
4033
4034 /* clamp digits */
4035 fp_clamp(c);
4036 }
4037
4038
fp_rshd(fp_int * a,int x)4039 void fp_rshd(fp_int *a, int x)
4040 {
4041 int y;
4042
4043 /* too many digits just zero and return */
4044 if (x >= a->used) {
4045 fp_zero(a);
4046 return;
4047 }
4048
4049 /* shift */
4050 for (y = 0; y < a->used - x; y++) {
4051 a->dp[y] = a->dp[y+x];
4052 }
4053
4054 /* zero rest */
4055 for (; y < a->used; y++) {
4056 a->dp[y] = 0;
4057 }
4058
4059 /* decrement count */
4060 a->used -= x;
4061 fp_clamp(a);
4062 }
4063
4064 /* reverse an array, used for radix code */
fp_reverse(unsigned char * s,int len)4065 void fp_reverse (unsigned char *s, int len)
4066 {
4067 int ix, iy;
4068 unsigned char t;
4069
4070 ix = 0;
4071 iy = len - 1;
4072 while (ix < iy) {
4073 t = s[ix];
4074 s[ix] = s[iy];
4075 s[iy] = t;
4076 ++ix;
4077 --iy;
4078 }
4079 }
4080
4081
4082 /* c = a - b */
fp_sub_d(fp_int * a,fp_digit b,fp_int * c)4083 int fp_sub_d(fp_int *a, fp_digit b, fp_int *c)
4084 {
4085 #ifndef WOLFSSL_SMALL_STACK
4086 fp_int tmp[1];
4087 #else
4088 fp_int *tmp;
4089 #endif
4090 int err = FP_OKAY;
4091
4092 #ifdef WOLFSSL_SMALL_STACK
4093 tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4094 if (tmp == NULL)
4095 return FP_MEM;
4096 #endif
4097
4098 fp_init(tmp);
4099 fp_set(tmp, b);
4100 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4101 if (c->size < FP_SIZE) {
4102 err = fp_sub(a, tmp, tmp);
4103 fp_copy(tmp, c);
4104 } else
4105 #endif
4106 {
4107 err = fp_sub(a, tmp, c);
4108 }
4109
4110 #ifdef WOLFSSL_SMALL_STACK
4111 XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
4112 #endif
4113 return err;
4114 }
4115
4116
4117 /* wolfSSL callers from normal lib */
4118
4119 /* init a new mp_int */
mp_init(mp_int * a)4120 int mp_init (mp_int * a)
4121 {
4122 if (a)
4123 fp_init(a);
4124 return MP_OKAY;
4125 }
4126
fp_init(fp_int * a)4127 void fp_init(fp_int *a)
4128 {
4129 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4130 a->size = FP_SIZE;
4131 #endif
4132 #ifdef HAVE_WOLF_BIGINT
4133 wc_bigint_init(&a->raw);
4134 #endif
4135 fp_zero(a);
4136 }
4137
fp_zero(fp_int * a)4138 void fp_zero(fp_int *a)
4139 {
4140 int size;
4141 a->used = 0;
4142 a->sign = FP_ZPOS;
4143 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4144 size = a->size;
4145 #else
4146 size = FP_SIZE;
4147 #endif
4148 XMEMSET(a->dp, 0, size * sizeof(fp_digit));
4149 }
4150
fp_clear(fp_int * a)4151 void fp_clear(fp_int *a)
4152 {
4153 int size;
4154 a->used = 0;
4155 a->sign = FP_ZPOS;
4156 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4157 size = a->size;
4158 #else
4159 size = FP_SIZE;
4160 #endif
4161 XMEMSET(a->dp, 0, size * sizeof(fp_digit));
4162 fp_free(a);
4163 }
4164
fp_forcezero(mp_int * a)4165 void fp_forcezero (mp_int * a)
4166 {
4167 int size;
4168 a->used = 0;
4169 a->sign = FP_ZPOS;
4170 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4171 size = a->size;
4172 #else
4173 size = FP_SIZE;
4174 #endif
4175 ForceZero(a->dp, size * sizeof(fp_digit));
4176 #ifdef HAVE_WOLF_BIGINT
4177 wc_bigint_zero(&a->raw);
4178 #endif
4179 fp_free(a);
4180 }
4181
mp_forcezero(mp_int * a)4182 void mp_forcezero (mp_int * a)
4183 {
4184 fp_forcezero(a);
4185 }
4186
fp_free(fp_int * a)4187 void fp_free(fp_int* a)
4188 {
4189 #ifdef HAVE_WOLF_BIGINT
4190 wc_bigint_free(&a->raw);
4191 #else
4192 (void)a;
4193 #endif
4194 }
4195
4196
4197 /* clear one (frees) */
mp_clear(mp_int * a)4198 void mp_clear (mp_int * a)
4199 {
4200 if (a == NULL)
4201 return;
4202 fp_clear(a);
4203 }
4204
mp_free(mp_int * a)4205 void mp_free(mp_int* a)
4206 {
4207 fp_free(a);
4208 }
4209
4210 /* handle up to 6 inits */
mp_init_multi(mp_int * a,mp_int * b,mp_int * c,mp_int * d,mp_int * e,mp_int * f)4211 int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d,
4212 mp_int* e, mp_int* f)
4213 {
4214 if (a)
4215 fp_init(a);
4216 if (b)
4217 fp_init(b);
4218 if (c)
4219 fp_init(c);
4220 if (d)
4221 fp_init(d);
4222 if (e)
4223 fp_init(e);
4224 if (f)
4225 fp_init(f);
4226
4227 return MP_OKAY;
4228 }
4229
4230 /* high level addition (handles signs) */
mp_add(mp_int * a,mp_int * b,mp_int * c)4231 int mp_add (mp_int * a, mp_int * b, mp_int * c)
4232 {
4233 return fp_add(a, b, c);
4234 }
4235
4236 /* high level subtraction (handles signs) */
mp_sub(mp_int * a,mp_int * b,mp_int * c)4237 int mp_sub (mp_int * a, mp_int * b, mp_int * c)
4238 {
4239 return fp_sub(a, b, c);
4240 }
4241
4242 /* high level multiplication (handles sign) */
4243 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_mul(mp_int * a,mp_int * b,mp_int * c)4244 int wolfcrypt_mp_mul(mp_int * a, mp_int * b, mp_int * c)
4245 #else
4246 int mp_mul (mp_int * a, mp_int * b, mp_int * c)
4247 #endif
4248 {
4249 return fp_mul(a, b, c);
4250 }
4251
mp_mul_d(mp_int * a,mp_digit b,mp_int * c)4252 int mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
4253 {
4254 return fp_mul_d(a, b, c);
4255 }
4256
4257 /* d = a * b (mod c) */
4258 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_mulmod(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4259 int wolfcrypt_mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
4260 #else
4261 int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
4262 #endif
4263 {
4264 #if defined(WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI) && \
4265 !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI)
4266 int A = fp_count_bits (a);
4267 int B = fp_count_bits (b);
4268
4269 if( A >= ESP_RSA_MULM_BITS && B >= ESP_RSA_MULM_BITS)
4270 return esp_mp_mulmod(a, b, c, d);
4271 else
4272 #endif
4273 return fp_mulmod(a, b, c, d);
4274 }
4275
4276 /* d = a - b (mod c) */
mp_submod(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4277 int mp_submod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
4278 {
4279 return fp_submod(a, b, c, d);
4280 }
4281
4282 /* d = a + b (mod c) */
mp_addmod(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4283 int mp_addmod(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
4284 {
4285 return fp_addmod(a, b, c, d);
4286 }
4287
4288 /* d = a - b (mod c) - constant time (a < c and b < c) */
mp_submod_ct(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4289 int mp_submod_ct(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
4290 {
4291 return fp_submod_ct(a, b, c, d);
4292 }
4293
4294 /* d = a + b (mod c) - constant time (a < c and b < c) */
mp_addmod_ct(mp_int * a,mp_int * b,mp_int * c,mp_int * d)4295 int mp_addmod_ct(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
4296 {
4297 return fp_addmod_ct(a, b, c, d);
4298 }
4299
4300 /* c = a mod b, 0 <= c < b */
4301 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_mod(mp_int * a,mp_int * b,mp_int * c)4302 int wolfcrypt_mp_mod (mp_int * a, mp_int * b, mp_int * c)
4303 #else
4304 int mp_mod (mp_int * a, mp_int * b, mp_int * c)
4305 #endif
4306 {
4307 return fp_mod (a, b, c);
4308 }
4309
4310 /* hac 14.61, pp608 */
4311 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_invmod(mp_int * a,mp_int * b,mp_int * c)4312 int wolfcrypt_mp_invmod (mp_int * a, mp_int * b, mp_int * c)
4313 #else
4314 int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
4315 #endif
4316 {
4317 return fp_invmod(a, b, c);
4318 }
4319
4320 /* hac 14.61, pp608 */
mp_invmod_mont_ct(mp_int * a,mp_int * b,mp_int * c,mp_digit mp)4321 int mp_invmod_mont_ct (mp_int * a, mp_int * b, mp_int * c, mp_digit mp)
4322 {
4323 return fp_invmod_mont_ct(a, b, c, mp);
4324 }
4325
4326 /* this is a shell function that calls either the normal or Montgomery
4327 * exptmod functions. Originally the call to the montgomery code was
4328 * embedded in the normal function but that wasted a lot of stack space
4329 * for nothing (since 99% of the time the Montgomery code would be called)
4330 */
4331 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_exptmod(mp_int * G,mp_int * X,mp_int * P,mp_int * Y)4332 int wolfcrypt_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
4333 #else
4334 int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
4335 #endif
4336 {
4337 return fp_exptmod(G, X, P, Y);
4338 }
4339
mp_exptmod_ex(mp_int * G,mp_int * X,int digits,mp_int * P,mp_int * Y)4340 int mp_exptmod_ex (mp_int * G, mp_int * X, int digits, mp_int * P, mp_int * Y)
4341 {
4342 return fp_exptmod_ex(G, X, digits, P, Y);
4343 }
4344
4345 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_exptmod_nct(mp_int * G,mp_int * X,mp_int * P,mp_int * Y)4346 int wolfcrypt_mp_exptmod_nct (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
4347 #else
4348 int mp_exptmod_nct (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
4349 #endif
4350 {
4351 return fp_exptmod_nct(G, X, P, Y);
4352 }
4353
4354
4355 /* compare two ints (signed)*/
mp_cmp(mp_int * a,mp_int * b)4356 int mp_cmp (mp_int * a, mp_int * b)
4357 {
4358 return fp_cmp(a, b);
4359 }
4360
4361 /* compare a digit */
mp_cmp_d(mp_int * a,mp_digit b)4362 int mp_cmp_d(mp_int * a, mp_digit b)
4363 {
4364 return fp_cmp_d(a, b);
4365 }
4366
4367 /* get the size for an unsigned equivalent */
mp_unsigned_bin_size(const mp_int * a)4368 int mp_unsigned_bin_size (const mp_int * a)
4369 {
4370 return fp_unsigned_bin_size(a);
4371 }
4372
mp_to_unsigned_bin_at_pos(int x,fp_int * t,unsigned char * b)4373 int mp_to_unsigned_bin_at_pos(int x, fp_int *t, unsigned char *b)
4374 {
4375 return fp_to_unsigned_bin_at_pos(x, t, b);
4376 }
4377
4378 /* store in unsigned [big endian] format */
mp_to_unsigned_bin(mp_int * a,unsigned char * b)4379 int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
4380 {
4381 return fp_to_unsigned_bin(a,b);
4382 }
4383
mp_to_unsigned_bin_len(mp_int * a,unsigned char * b,int c)4384 int mp_to_unsigned_bin_len(mp_int * a, unsigned char *b, int c)
4385 {
4386 return fp_to_unsigned_bin_len(a, b, c);
4387 }
4388 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
mp_read_unsigned_bin(mp_int * a,const unsigned char * b,int c)4389 int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
4390 {
4391 return fp_read_unsigned_bin(a, b, c);
4392 }
4393
4394
mp_sub_d(fp_int * a,fp_digit b,fp_int * c)4395 int mp_sub_d(fp_int *a, fp_digit b, fp_int *c)
4396 {
4397 return fp_sub_d(a, b, c);
4398 }
4399
mp_mul_2d(fp_int * a,int b,fp_int * c)4400 int mp_mul_2d(fp_int *a, int b, fp_int *c)
4401 {
4402 return fp_mul_2d(a, b, c);
4403 }
4404
mp_2expt(fp_int * a,int b)4405 int mp_2expt(fp_int* a, int b)
4406 {
4407 fp_2expt(a, b);
4408 return MP_OKAY;
4409 }
4410
mp_div(fp_int * a,fp_int * b,fp_int * c,fp_int * d)4411 int mp_div(fp_int * a, fp_int * b, fp_int * c, fp_int * d)
4412 {
4413 return fp_div(a, b, c, d);
4414 }
4415
mp_div_2d(fp_int * a,int b,fp_int * c,fp_int * d)4416 int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d)
4417 {
4418 fp_div_2d(a, b, c, d);
4419 return MP_OKAY;
4420 }
4421
fp_copy(const fp_int * a,fp_int * b)4422 void fp_copy(const fp_int *a, fp_int *b)
4423 {
4424 /* if source and destination are different */
4425 if (a != b) {
4426 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4427 /* verify a will fit in b */
4428 if (b->size >= a->used) {
4429 int x, oldused;
4430 oldused = b->used;
4431 b->used = a->used;
4432 b->sign = a->sign;
4433
4434 XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit));
4435
4436 /* zero any excess digits on the destination that we didn't write to */
4437 for (x = b->used; x >= 0 && x < oldused; x++) {
4438 b->dp[x] = 0;
4439 }
4440 }
4441 else {
4442 /* TODO: Handle error case */
4443 }
4444 #else
4445 /* all dp's are same size, so do straight copy */
4446 b->used = a->used;
4447 b->sign = a->sign;
4448 XMEMCPY(b->dp, a->dp, FP_SIZE * sizeof(fp_digit));
4449 #endif
4450 }
4451 }
4452
mp_init_copy(fp_int * a,fp_int * b)4453 int mp_init_copy(fp_int * a, fp_int * b)
4454 {
4455 fp_init_copy(a, b);
4456 return MP_OKAY;
4457 }
4458
fp_init_copy(fp_int * a,fp_int * b)4459 void fp_init_copy(fp_int *a, fp_int* b)
4460 {
4461 if (a != b) {
4462 fp_init(a);
4463 fp_copy(b, a);
4464 }
4465 }
4466
4467 /* fast math wrappers */
mp_copy(const fp_int * a,fp_int * b)4468 int mp_copy(const fp_int* a, fp_int* b)
4469 {
4470 fp_copy(a, b);
4471 return MP_OKAY;
4472 }
4473
mp_isodd(mp_int * a)4474 int mp_isodd(mp_int* a)
4475 {
4476 return fp_isodd(a);
4477 }
4478
mp_iszero(mp_int * a)4479 int mp_iszero(mp_int* a)
4480 {
4481 return fp_iszero(a);
4482 }
4483
mp_count_bits(const mp_int * a)4484 int mp_count_bits (const mp_int* a)
4485 {
4486 return fp_count_bits(a);
4487 }
4488
mp_leading_bit(mp_int * a)4489 int mp_leading_bit (mp_int* a)
4490 {
4491 return fp_leading_bit(a);
4492 }
4493
mp_rshb(mp_int * a,int x)4494 void mp_rshb (mp_int* a, int x)
4495 {
4496 fp_rshb(a, x);
4497 }
4498
mp_rshd(mp_int * a,int x)4499 void mp_rshd (mp_int* a, int x)
4500 {
4501 fp_rshd(a, x);
4502 }
4503
mp_set_int(mp_int * a,unsigned long b)4504 int mp_set_int(mp_int *a, unsigned long b)
4505 {
4506 return fp_set_int(a, b);
4507 }
4508
mp_is_bit_set(mp_int * a,mp_digit b)4509 int mp_is_bit_set (mp_int *a, mp_digit b)
4510 {
4511 return fp_is_bit_set(a, b);
4512 }
4513
mp_set_bit(mp_int * a,mp_digit b)4514 int mp_set_bit(mp_int *a, mp_digit b)
4515 {
4516 return fp_set_bit(a, b);
4517 }
4518
4519 #if defined(WOLFSSL_KEY_GEN) || defined (HAVE_ECC) || !defined(NO_DH) || \
4520 !defined(NO_DSA) || !defined(NO_RSA)
4521
4522 /* c = a * a (mod b) */
fp_sqrmod(fp_int * a,fp_int * b,fp_int * c)4523 int fp_sqrmod(fp_int *a, fp_int *b, fp_int *c)
4524 {
4525 int err;
4526 #ifndef WOLFSSL_SMALL_STACK
4527 fp_int t[1];
4528 #else
4529 fp_int *t;
4530 #endif
4531
4532 #ifdef WOLFSSL_SMALL_STACK
4533 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4534 if (t == NULL)
4535 return FP_MEM;
4536 #endif
4537
4538 fp_init(t);
4539 err = fp_sqr(a, t);
4540 if (err == FP_OKAY) {
4541 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
4542 if (c->size < FP_SIZE) {
4543 err = fp_mod(t, b, t);
4544 fp_copy(t, c);
4545 }
4546 else
4547 #endif
4548 {
4549 err = fp_mod(t, b, c);
4550 }
4551 }
4552
4553 #ifdef WOLFSSL_SMALL_STACK
4554 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
4555 #endif
4556 return err;
4557 }
4558
4559 /* fast math conversion */
mp_sqrmod(mp_int * a,mp_int * b,mp_int * c)4560 int mp_sqrmod(mp_int *a, mp_int *b, mp_int *c)
4561 {
4562 return fp_sqrmod(a, b, c);
4563 }
4564
4565 /* fast math conversion */
mp_montgomery_calc_normalization(mp_int * a,mp_int * b)4566 int mp_montgomery_calc_normalization(mp_int *a, mp_int *b)
4567 {
4568 return fp_montgomery_calc_normalization(a, b);
4569 }
4570
4571 #endif /* WOLFSSL_KEY_GEN || HAVE_ECC */
4572
fp_cond_swap_ct(mp_int * a,mp_int * b,int c,int m)4573 static int fp_cond_swap_ct (mp_int * a, mp_int * b, int c, int m)
4574 {
4575 int i;
4576 mp_digit mask = (mp_digit)0 - m;
4577 #ifndef WOLFSSL_SMALL_STACK
4578 fp_int t[1];
4579 #else
4580 fp_int* t;
4581 #endif
4582
4583 #ifdef WOLFSSL_SMALL_STACK
4584 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4585 if (t == NULL)
4586 return FP_MEM;
4587 #endif
4588
4589 t->used = (a->used ^ b->used) & mask;
4590 for (i = 0; i < c; i++) {
4591 t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
4592 }
4593 a->used ^= t->used;
4594 for (i = 0; i < c; i++) {
4595 a->dp[i] ^= t->dp[i];
4596 }
4597 b->used ^= t->used;
4598 for (i = 0; i < c; i++) {
4599 b->dp[i] ^= t->dp[i];
4600 }
4601
4602 #ifdef WOLFSSL_SMALL_STACK
4603 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
4604 #endif
4605 return FP_OKAY;
4606 }
4607
4608
4609 #if defined(WC_MP_TO_RADIX) || !defined(NO_DH) || !defined(NO_DSA) || \
4610 !defined(NO_RSA)
4611
4612 #ifdef WOLFSSL_KEY_GEN
4613 /* swap the elements of two integers, for cases where you can't simply swap the
4614 * mp_int pointers around
4615 */
fp_exch(fp_int * a,fp_int * b)4616 static int fp_exch (fp_int * a, fp_int * b)
4617 {
4618 #ifndef WOLFSSL_SMALL_STACK
4619 fp_int t[1];
4620 #else
4621 fp_int *t;
4622 #endif
4623
4624 #ifdef WOLFSSL_SMALL_STACK
4625 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4626 if (t == NULL)
4627 return FP_MEM;
4628 #endif
4629
4630 *t = *a;
4631 *a = *b;
4632 *b = *t;
4633
4634 #ifdef WOLFSSL_SMALL_STACK
4635 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
4636 #endif
4637 return FP_OKAY;
4638 }
4639 #endif
4640
4641 static const int lnz[16] = {
4642 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4643 };
4644
4645 /* Counts the number of lsbs which are zero before the first zero bit */
fp_cnt_lsb(fp_int * a)4646 int fp_cnt_lsb(fp_int *a)
4647 {
4648 int x;
4649 fp_digit q, qq;
4650
4651 /* easy out */
4652 if (fp_iszero(a) == FP_YES) {
4653 return 0;
4654 }
4655
4656 /* scan lower digits until non-zero */
4657 for (x = 0; x < a->used && a->dp[x] == 0; x++) {}
4658 q = a->dp[x];
4659 x *= DIGIT_BIT;
4660
4661 /* now scan this digit until a 1 is found */
4662 if ((q & 1) == 0) {
4663 do {
4664 qq = q & 15;
4665 x += lnz[qq];
4666 q >>= 4;
4667 } while (qq == 0);
4668 }
4669 return x;
4670 }
4671
4672
s_is_power_of_two(fp_digit b,int * p)4673 static int s_is_power_of_two(fp_digit b, int *p)
4674 {
4675 int x;
4676
4677 /* fast return if no power of two */
4678 if ((b==0) || (b & (b-1))) {
4679 return FP_NO;
4680 }
4681
4682 for (x = 0; x < DIGIT_BIT; x++) {
4683 if (b == (((fp_digit)1)<<x)) {
4684 *p = x;
4685 return FP_YES;
4686 }
4687 }
4688 return FP_NO;
4689 }
4690
4691 /* a/b => cb + d == a */
fp_div_d(fp_int * a,fp_digit b,fp_int * c,fp_digit * d)4692 static int fp_div_d(fp_int *a, fp_digit b, fp_int *c, fp_digit *d)
4693 {
4694 #ifndef WOLFSSL_SMALL_STACK
4695 fp_int q[1];
4696 #else
4697 fp_int *q;
4698 #endif
4699 fp_word w;
4700 fp_digit t;
4701 int ix;
4702
4703 /* cannot divide by zero */
4704 if (b == 0) {
4705 return FP_VAL;
4706 }
4707
4708 /* quick outs */
4709 if (b == 1 || fp_iszero(a) == FP_YES) {
4710 if (d != NULL) {
4711 *d = 0;
4712 }
4713 if (c != NULL) {
4714 fp_copy(a, c);
4715 }
4716 return FP_OKAY;
4717 }
4718
4719 /* power of two ? */
4720 if (s_is_power_of_two(b, &ix) == FP_YES) {
4721 if (d != NULL) {
4722 *d = a->dp[0] & ((((fp_digit)1)<<ix) - 1);
4723 }
4724 if (c != NULL) {
4725 fp_div_2d(a, ix, c, NULL);
4726 }
4727 return FP_OKAY;
4728 }
4729
4730 #ifdef WOLFSSL_SMALL_STACK
4731 q = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
4732 if (q == NULL)
4733 return FP_MEM;
4734 #endif
4735
4736 fp_init(q);
4737
4738 if (c != NULL) {
4739 q->used = a->used;
4740 q->sign = a->sign;
4741 }
4742
4743 w = 0;
4744 for (ix = a->used - 1; ix >= 0; ix--) {
4745 w = (w << ((fp_word)DIGIT_BIT)) | ((fp_word)a->dp[ix]);
4746
4747 if (w >= b) {
4748 #ifdef WOLFSSL_LINUXKM
4749 t = (fp_digit)w;
4750 /* Linux kernel macro for in-place 64 bit integer division. */
4751 do_div(t, b);
4752 #else
4753 t = (fp_digit)(w / b);
4754 #endif
4755 w -= ((fp_word)t) * ((fp_word)b);
4756 } else {
4757 t = 0;
4758 }
4759 if (c != NULL)
4760 q->dp[ix] = (fp_digit)t;
4761 }
4762
4763 if (d != NULL) {
4764 *d = (fp_digit)w;
4765 }
4766
4767 if (c != NULL) {
4768 fp_clamp(q);
4769 fp_copy(q, c);
4770 }
4771
4772 #ifdef WOLFSSL_SMALL_STACK
4773 XFREE(q, NULL, DYNAMIC_TYPE_BIGINT);
4774 #endif
4775 return FP_OKAY;
4776 }
4777
4778
4779 /* c = a mod b, 0 <= c < b */
fp_mod_d(fp_int * a,fp_digit b,fp_digit * c)4780 static int fp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
4781 {
4782 return fp_div_d(a, b, NULL, c);
4783 }
4784
mp_mod_d(fp_int * a,fp_digit b,fp_digit * c)4785 int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c)
4786 {
4787 return fp_mod_d(a, b, c);
4788 }
4789
4790 #endif /* WC_MP_TO_RADIX || !NO_DH || !NO_DSA || !NO_RSA */
4791
4792
4793 #if !defined(NO_DH) || !defined(NO_DSA) || !defined(NO_RSA) || \
4794 defined(WOLFSSL_KEY_GEN)
4795
4796 static int fp_isprime_ex(fp_int *a, int t, int* result);
4797
4798 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_prime_is_prime(mp_int * a,int t,int * result)4799 int wolfcrypt_mp_prime_is_prime(mp_int* a, int t, int* result)
4800 #else
4801 int mp_prime_is_prime(mp_int* a, int t, int* result)
4802 #endif
4803 {
4804 return fp_isprime_ex(a, t, result);
4805 }
4806
4807 /* Miller-Rabin test of "a" to the base of "b" as described in
4808 * HAC pp. 139 Algorithm 4.24
4809 *
4810 * Sets result to 0 if definitely composite or 1 if probably prime.
4811 * Randomly the chance of error is no more than 1/4 and often
4812 * very much lower.
4813 */
fp_prime_miller_rabin_ex(fp_int * a,fp_int * b,int * result,fp_int * n1,fp_int * y,fp_int * r)4814 static int fp_prime_miller_rabin_ex(fp_int * a, fp_int * b, int *result,
4815 fp_int *n1, fp_int *y, fp_int *r)
4816 {
4817 int s, j;
4818 int err;
4819
4820 /* default */
4821 *result = FP_NO;
4822
4823 /* ensure b > 1 */
4824 if (fp_cmp_d(b, 1) != FP_GT) {
4825 return FP_OKAY;
4826 }
4827
4828 /* get n1 = a - 1 */
4829 fp_copy(a, n1);
4830 err = fp_sub_d(n1, 1, n1);
4831 if (err != FP_OKAY) {
4832 return err;
4833 }
4834
4835 /* set 2**s * r = n1 */
4836 fp_copy(n1, r);
4837
4838 /* count the number of least significant bits
4839 * which are zero
4840 */
4841 s = fp_cnt_lsb(r);
4842
4843 /* now divide n - 1 by 2**s */
4844 fp_div_2d (r, s, r, NULL);
4845
4846 /* compute y = b**r mod a */
4847 fp_zero(y);
4848 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
4849 defined(WOLFSSL_HAVE_SP_DH)
4850 #ifndef WOLFSSL_SP_NO_2048
4851 if (fp_count_bits(a) == 1024 && fp_isodd(a))
4852 err = sp_ModExp_1024(b, r, a, y);
4853 else if (fp_count_bits(a) == 2048 && fp_isodd(a))
4854 err = sp_ModExp_2048(b, r, a, y);
4855 else
4856 #endif
4857 #ifndef WOLFSSL_SP_NO_3072
4858 if (fp_count_bits(a) == 1536 && fp_isodd(a))
4859 err = sp_ModExp_1536(b, r, a, y);
4860 else if (fp_count_bits(a) == 3072 && fp_isodd(a))
4861 err = sp_ModExp_3072(b, r, a, y);
4862 else
4863 #endif
4864 #ifdef WOLFSSL_SP_4096
4865 if (fp_count_bits(a) == 4096 && fp_isodd(a))
4866 err = sp_ModExp_4096(b, r, a, y);
4867 else
4868 #endif
4869 #endif
4870 err = fp_exptmod(b, r, a, y);
4871 if (err != FP_OKAY) {
4872 return err;
4873 }
4874
4875 /* if y != 1 and y != n1 do */
4876 if (fp_cmp_d (y, 1) != FP_EQ && fp_cmp (y, n1) != FP_EQ) {
4877 j = 1;
4878 /* while j <= s-1 and y != n1 */
4879 while ((j <= (s - 1)) && fp_cmp (y, n1) != FP_EQ) {
4880 err = fp_sqrmod (y, a, y);
4881 if (err != FP_OKAY)
4882 return err;
4883
4884 /* if y == 1 then composite */
4885 if (fp_cmp_d (y, 1) == FP_EQ) {
4886 return FP_OKAY;
4887 }
4888 ++j;
4889 }
4890
4891 /* if y != n1 then composite */
4892 if (fp_cmp (y, n1) != FP_EQ) {
4893 return FP_OKAY;
4894 }
4895 }
4896
4897 /* probably prime now */
4898 *result = FP_YES;
4899
4900 return FP_OKAY;
4901 }
4902
fp_prime_miller_rabin(fp_int * a,fp_int * b,int * result)4903 static int fp_prime_miller_rabin(fp_int * a, fp_int * b, int *result)
4904 {
4905 int err;
4906 #ifndef WOLFSSL_SMALL_STACK
4907 fp_int n1[1], y[1], r[1];
4908 #else
4909 fp_int *n1, *y, *r;
4910 #endif
4911
4912 #ifdef WOLFSSL_SMALL_STACK
4913 n1 = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
4914 if (n1 == NULL) {
4915 return FP_MEM;
4916 }
4917 y = &n1[1]; r = &n1[2];
4918 #endif
4919
4920 fp_init(n1);
4921 fp_init(y);
4922 fp_init(r);
4923
4924 err = fp_prime_miller_rabin_ex(a, b, result, n1, y, r);
4925
4926 fp_clear(n1);
4927 fp_clear(y);
4928 fp_clear(r);
4929
4930 #ifdef WOLFSSL_SMALL_STACK
4931 XFREE(n1, NULL, DYNAMIC_TYPE_BIGINT);
4932 #endif
4933
4934 return err;
4935 }
4936
4937
4938 /* a few primes */
4939 static const fp_digit primes[FP_PRIME_SIZE] = {
4940 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4941 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4942 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4943 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
4944 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4945 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4946 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4947 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4948
4949 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4950 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4951 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4952 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4953 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4954 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4955 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4956 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4957
4958 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4959 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4960 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4961 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4962 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4963 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4964 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4965 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4966
4967 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4968 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4969 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4970 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4971 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4972 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4973 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4974 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4975 };
4976
fp_isprime_ex(fp_int * a,int t,int * result)4977 int fp_isprime_ex(fp_int *a, int t, int* result)
4978 {
4979 #ifndef WOLFSSL_SMALL_STACK
4980 fp_int b[1];
4981 #else
4982 fp_int *b;
4983 #endif
4984 fp_digit d;
4985 int r, res;
4986 int err;
4987
4988 if (t <= 0 || t > FP_PRIME_SIZE) {
4989 *result = FP_NO;
4990 return FP_VAL;
4991 }
4992
4993 if (fp_isone(a)) {
4994 *result = FP_NO;
4995 return FP_OKAY;
4996 }
4997
4998 /* check against primes table */
4999 for (r = 0; r < FP_PRIME_SIZE; r++) {
5000 if (fp_cmp_d(a, primes[r]) == FP_EQ) {
5001 *result = FP_YES;
5002 return FP_OKAY;
5003 }
5004 }
5005
5006 /* do trial division */
5007 for (r = 0; r < FP_PRIME_SIZE; r++) {
5008 res = fp_mod_d(a, primes[r], &d);
5009 if (res != MP_OKAY || d == 0) {
5010 *result = FP_NO;
5011 return res;
5012 }
5013 }
5014
5015 #ifdef WOLFSSL_SMALL_STACK
5016 b = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
5017 if (b == NULL)
5018 return FP_MEM;
5019 #endif
5020 /* now do 't' miller rabins */
5021 fp_init(b);
5022 for (r = 0; r < t; r++) {
5023 fp_set(b, primes[r]);
5024 err = fp_prime_miller_rabin(a, b, &res);
5025 if ((err != FP_OKAY) || (res == FP_NO)) {
5026 *result = res;
5027 #ifdef WOLFSSL_SMALL_STACK
5028 XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5029 #endif
5030 return err;
5031 }
5032 }
5033 *result = FP_YES;
5034 #ifdef WOLFSSL_SMALL_STACK
5035 XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5036 #endif
5037 return FP_OKAY;
5038 }
5039
5040
5041 #if defined(FREESCALE_LTC_TFM)
wolfcrypt_mp_prime_is_prime_ex(mp_int * a,int t,int * result,WC_RNG * rng)5042 int wolfcrypt_mp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng)
5043 #else
5044 int mp_prime_is_prime_ex(mp_int* a, int t, int* result, WC_RNG* rng)
5045 #endif
5046 {
5047 int ret = FP_YES;
5048 fp_digit d;
5049 int i;
5050
5051 if (a == NULL || result == NULL || rng == NULL)
5052 return FP_VAL;
5053
5054 if (fp_isone(a)) {
5055 *result = FP_NO;
5056 return FP_OKAY;
5057 }
5058
5059 /* check against primes table */
5060 for (i = 0; i < FP_PRIME_SIZE; i++) {
5061 if (fp_cmp_d(a, primes[i]) == FP_EQ) {
5062 *result = FP_YES;
5063 return FP_OKAY;
5064 }
5065 }
5066
5067 /* do trial division */
5068 for (i = 0; i < FP_PRIME_SIZE; i++) {
5069 if (fp_mod_d(a, primes[i], &d) == MP_OKAY) {
5070 if (d == 0) {
5071 *result = FP_NO;
5072 return FP_OKAY;
5073 }
5074 }
5075 else
5076 return FP_VAL;
5077 }
5078
5079 #ifndef WC_NO_RNG
5080 /* now do a miller rabin with up to t random numbers, this should
5081 * give a (1/4)^t chance of a false prime. */
5082 {
5083 #ifndef WOLFSSL_SMALL_STACK
5084 fp_int b[1], c[1], n1[1], y[1], r[1];
5085 byte base[FP_MAX_PRIME_SIZE];
5086 #else
5087 fp_int *b, *c, *n1, *y, *r;
5088 byte* base;
5089 #endif
5090 word32 baseSz;
5091 int err;
5092
5093 baseSz = fp_count_bits(a);
5094 /* The base size is the number of bits / 8. One is added if the number
5095 * of bits isn't an even 8. */
5096 baseSz = (baseSz / 8) + ((baseSz % 8) ? 1 : 0);
5097
5098 #ifndef WOLFSSL_SMALL_STACK
5099 if (baseSz > sizeof(base))
5100 return FP_MEM;
5101 #else
5102 base = (byte*)XMALLOC(baseSz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5103 if (base == NULL)
5104 return FP_MEM;
5105
5106 b = (fp_int*)XMALLOC(sizeof(fp_int) * 5, NULL, DYNAMIC_TYPE_BIGINT);
5107 if (b == NULL) {
5108 XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5109 return FP_MEM;
5110 }
5111 c = &b[1]; n1 = &b[2]; y= &b[3]; r = &b[4];
5112 #endif
5113
5114 fp_init(b);
5115 fp_init(c);
5116 fp_init(n1);
5117 fp_init(y);
5118 fp_init(r);
5119
5120 err = fp_sub_d(a, 2, c);
5121 if (err != FP_OKAY) {
5122 #ifdef WOLFSSL_SMALL_STACK
5123 XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5124 XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5125 #endif
5126 return err;
5127 }
5128 while (t > 0) {
5129 if ((err = wc_RNG_GenerateBlock(rng, base, baseSz)) != 0) {
5130 #ifdef WOLFSSL_SMALL_STACK
5131 XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5132 XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5133 #endif
5134 return err;
5135 }
5136
5137 err = fp_read_unsigned_bin(b, base, baseSz);
5138 if (err != FP_OKAY) {
5139 #ifdef WOLFSSL_SMALL_STACK
5140 XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5141 XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5142 #endif
5143 return err;
5144 }
5145 if (fp_cmp_d(b, 2) != FP_GT || fp_cmp(b, c) != FP_LT) {
5146 continue;
5147 }
5148
5149 err = fp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
5150 if (err != FP_OKAY) {
5151 #ifdef WOLFSSL_SMALL_STACK
5152 XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5153 XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5154 #endif
5155 return err;
5156 }
5157 if (ret == FP_NO)
5158 break;
5159 fp_zero(b);
5160 t--;
5161 }
5162
5163 fp_clear(n1);
5164 fp_clear(y);
5165 fp_clear(r);
5166 fp_clear(b);
5167 fp_clear(c);
5168 #ifdef WOLFSSL_SMALL_STACK
5169 XFREE(b, NULL, DYNAMIC_TYPE_BIGINT);
5170 XFREE(base, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5171 #endif
5172 }
5173 #else
5174 (void)t;
5175 #endif /* !WC_NO_RNG */
5176
5177 *result = ret;
5178 return FP_OKAY;
5179 }
5180 #endif /* !NO_RSA || !NO_DSA || !NO_DH || WOLFSSL_KEY_GEN */
5181
5182
mp_cond_swap_ct(mp_int * a,mp_int * b,int c,int m)5183 int mp_cond_swap_ct(mp_int * a, mp_int * b, int c, int m)
5184 {
5185 return fp_cond_swap_ct(a, b, c, m);
5186 }
5187
5188 #ifdef WOLFSSL_KEY_GEN
5189
5190 static int fp_gcd(fp_int *a, fp_int *b, fp_int *c);
5191 static int fp_lcm(fp_int *a, fp_int *b, fp_int *c);
5192 static int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap);
5193
mp_gcd(fp_int * a,fp_int * b,fp_int * c)5194 int mp_gcd(fp_int *a, fp_int *b, fp_int *c)
5195 {
5196 return fp_gcd(a, b, c);
5197 }
5198
5199
mp_lcm(fp_int * a,fp_int * b,fp_int * c)5200 int mp_lcm(fp_int *a, fp_int *b, fp_int *c)
5201 {
5202 return fp_lcm(a, b, c);
5203 }
5204
mp_rand_prime(mp_int * N,int len,WC_RNG * rng,void * heap)5205 int mp_rand_prime(mp_int* N, int len, WC_RNG* rng, void* heap)
5206 {
5207 int err;
5208
5209 err = fp_randprime(N, len, rng, heap);
5210 switch(err) {
5211 case FP_VAL:
5212 return MP_VAL;
5213 case FP_MEM:
5214 return MP_MEM;
5215 default:
5216 break;
5217 }
5218
5219 return MP_OKAY;
5220 }
5221
mp_exch(mp_int * a,mp_int * b)5222 int mp_exch (mp_int * a, mp_int * b)
5223 {
5224 return fp_exch(a, b);
5225 }
5226
5227
5228
fp_randprime(fp_int * N,int len,WC_RNG * rng,void * heap)5229 int fp_randprime(fp_int* N, int len, WC_RNG* rng, void* heap)
5230 {
5231 static const int USE_BBS = 1;
5232 int err, type;
5233 int isPrime = FP_YES;
5234 /* Assume the candidate is probably prime and then test until
5235 * it is proven composite. */
5236 byte* buf;
5237
5238 (void)heap;
5239
5240 /* get type */
5241 if (len < 0) {
5242 type = USE_BBS;
5243 len = -len;
5244 } else {
5245 type = 0;
5246 }
5247
5248 /* allow sizes between 2 and 512 bytes for a prime size */
5249 if (len < 2 || len > 512) {
5250 return FP_VAL;
5251 }
5252
5253 /* allocate buffer to work with */
5254 buf = (byte*)XMALLOC(len, heap, DYNAMIC_TYPE_TMP_BUFFER);
5255 if (buf == NULL) {
5256 return FP_MEM;
5257 }
5258 XMEMSET(buf, 0, len);
5259
5260 do {
5261 #ifdef SHOW_GEN
5262 printf(".");
5263 fflush(stdout);
5264 #endif
5265 /* generate value */
5266 err = wc_RNG_GenerateBlock(rng, buf, len);
5267 if (err != 0) {
5268 XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
5269 return FP_VAL;
5270 }
5271
5272 /* munge bits */
5273 buf[0] |= 0x80 | 0x40;
5274 buf[len-1] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
5275
5276 /* load value */
5277 err = fp_read_unsigned_bin(N, buf, len);
5278 if (err != 0) {
5279 XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
5280 return err;
5281 }
5282
5283 /* test */
5284 /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
5285 * of a 1024-bit candidate being a false positive, when it is our
5286 * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
5287 * Using 8 because we've always used 8 */
5288 mp_prime_is_prime_ex(N, 8, &isPrime, rng);
5289 } while (isPrime == FP_NO);
5290
5291 XMEMSET(buf, 0, len);
5292 XFREE(buf, heap, DYNAMIC_TYPE_TMP_BUFFER);
5293
5294 return FP_OKAY;
5295 }
5296
5297 /* c = [a, b] */
fp_lcm(fp_int * a,fp_int * b,fp_int * c)5298 int fp_lcm(fp_int *a, fp_int *b, fp_int *c)
5299 {
5300 int err;
5301 #ifndef WOLFSSL_SMALL_STACK
5302 fp_int t[2];
5303 #else
5304 fp_int *t;
5305 #endif
5306
5307 /* LCM of 0 and any number is undefined as 0 is not in the set of values
5308 * being used. */
5309 if (fp_iszero(a) == FP_YES || fp_iszero(b) == FP_YES) {
5310 return FP_VAL;
5311 }
5312
5313 #ifdef WOLFSSL_SMALL_STACK
5314 t = (fp_int*)XMALLOC(sizeof(fp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT);
5315 if (t == NULL) {
5316 return FP_MEM;
5317 }
5318 #endif
5319
5320 fp_init(&t[0]);
5321 fp_init(&t[1]);
5322 err = fp_gcd(a, b, &t[0]);
5323 if (err == FP_OKAY) {
5324 if (fp_cmp_mag(a, b) == FP_GT) {
5325 err = fp_div(a, &t[0], &t[1], NULL);
5326 if (err == FP_OKAY)
5327 err = fp_mul(b, &t[1], c);
5328 } else {
5329 err = fp_div(b, &t[0], &t[1], NULL);
5330 if (err == FP_OKAY)
5331 err = fp_mul(a, &t[1], c);
5332 }
5333 }
5334
5335 #ifdef WOLFSSL_SMALL_STACK
5336 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5337 #endif
5338 return err;
5339 }
5340
5341
5342
5343 /* c = (a, b) */
fp_gcd(fp_int * a,fp_int * b,fp_int * c)5344 int fp_gcd(fp_int *a, fp_int *b, fp_int *c)
5345 {
5346 #ifndef WOLFSSL_SMALL_STACK
5347 fp_int u[1], v[1], r[1];
5348 #else
5349 fp_int *u, *v, *r;
5350 #endif
5351
5352 /* GCD of 0 and 0 is undefined as all integers divide 0. */
5353 if (fp_iszero(a) == FP_YES && fp_iszero(b) == FP_YES) {
5354 return FP_VAL;
5355 }
5356
5357 /* either zero than gcd is the largest */
5358 if (fp_iszero (a) == FP_YES && fp_iszero (b) == FP_NO) {
5359 fp_abs (b, c);
5360 return FP_OKAY;
5361 }
5362 if (fp_iszero (a) == FP_NO && fp_iszero (b) == FP_YES) {
5363 fp_abs (a, c);
5364 return FP_OKAY;
5365 }
5366
5367 /* optimized. At this point if a == 0 then
5368 * b must equal zero too
5369 */
5370 if (fp_iszero (a) == FP_YES) {
5371 fp_zero(c);
5372 return FP_OKAY;
5373 }
5374
5375 #ifdef WOLFSSL_SMALL_STACK
5376 u = (fp_int*)XMALLOC(sizeof(fp_int) * 3, NULL, DYNAMIC_TYPE_BIGINT);
5377 if (u == NULL) {
5378 return FP_MEM;
5379 }
5380 v = &u[1]; r = &u[2];
5381 #endif
5382
5383 /* sort inputs */
5384 if (fp_cmp_mag(a, b) != FP_LT) {
5385 fp_init_copy(u, a);
5386 fp_init_copy(v, b);
5387 } else {
5388 fp_init_copy(u, b);
5389 fp_init_copy(v, a);
5390 }
5391
5392 u->sign = FP_ZPOS;
5393 v->sign = FP_ZPOS;
5394
5395 fp_init(r);
5396 while (fp_iszero(v) == FP_NO) {
5397 int err = fp_mod(u, v, r);
5398 if (err != MP_OKAY) {
5399 #ifdef WOLFSSL_SMALL_STACK
5400 XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
5401 #endif
5402 return err;
5403 }
5404 fp_copy(v, u);
5405 fp_copy(r, v);
5406 }
5407 fp_copy(u, c);
5408
5409 #ifdef WOLFSSL_SMALL_STACK
5410 XFREE(u, NULL, DYNAMIC_TYPE_BIGINT);
5411 #endif
5412 return FP_OKAY;
5413 }
5414
5415 #endif /* WOLFSSL_KEY_GEN */
5416
5417
5418 #if defined(HAVE_ECC) || !defined(NO_PWDBASED) || defined(OPENSSL_EXTRA) || \
5419 defined(WC_RSA_BLINDING) || !defined(NO_DSA) || \
5420 (!defined(NO_RSA) && !defined(NO_RSA_BOUNDS_CHECK))
5421 /* c = a + b */
fp_add_d(fp_int * a,fp_digit b,fp_int * c)5422 int fp_add_d(fp_int *a, fp_digit b, fp_int *c)
5423 {
5424 #ifndef WOLFSSL_SMALL_STACK
5425 fp_int tmp[1];
5426 #else
5427 fp_int* tmp;
5428 #endif
5429 int err;
5430
5431 #ifdef WOLFSSL_SMALL_STACK
5432 tmp = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
5433 if (tmp == NULL)
5434 return FP_MEM;
5435 #endif
5436
5437 fp_init(tmp);
5438 fp_set(tmp, b);
5439 err = fp_add(a, tmp, c);
5440
5441 #ifdef WOLFSSL_SMALL_STACK
5442 XFREE(tmp, NULL, DYNAMIC_TYPE_BIGINT);
5443 #endif
5444 return err;
5445 }
5446
5447 /* external compatibility */
mp_add_d(fp_int * a,fp_digit b,fp_int * c)5448 int mp_add_d(fp_int *a, fp_digit b, fp_int *c)
5449 {
5450 return fp_add_d(a, b, c);
5451 }
5452
5453 #endif /* HAVE_ECC || !NO_PWDBASED || OPENSSL_EXTRA || WC_RSA_BLINDING ||
5454 !NO_DSA || (!NO_RSA && !NO_RSA_BOUNDS_CHECK) */
5455
5456
5457 #if !defined(NO_DSA) || defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || \
5458 defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) || \
5459 defined(DEBUG_WOLFSSL) || defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
5460
5461 /* chars used in radix conversions */
5462 static wcchar fp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
5463 "abcdefghijklmnopqrstuvwxyz+/";
5464 #endif
5465
5466 #if !defined(NO_DSA) || defined(HAVE_ECC)
5467 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
fp_read_radix_16(fp_int * a,const char * str)5468 static int fp_read_radix_16(fp_int *a, const char *str)
5469 {
5470 int i, j, k, neg;
5471 int ch;
5472
5473 /* if the leading digit is a
5474 * minus set the sign to negative.
5475 */
5476 if (*str == '-') {
5477 ++str;
5478 neg = FP_NEG;
5479 } else {
5480 neg = FP_ZPOS;
5481 }
5482
5483 j = 0;
5484 k = 0;
5485 for (i = (int)(XSTRLEN(str) - 1); i >= 0; i--) {
5486 ch = (int)HexCharToByte(str[i]);
5487 if (ch < 0) {
5488 return FP_VAL;
5489 }
5490
5491 k += j == DIGIT_BIT;
5492 j &= DIGIT_BIT - 1;
5493 if (k >= FP_SIZE)
5494 return FP_VAL;
5495
5496 a->dp[k] |= ((fp_digit)ch) << j;
5497 j += 4;
5498 }
5499
5500 a->used = k + 1;
5501 fp_clamp(a);
5502 /* set the sign only if a != 0 */
5503 if (fp_iszero(a) != FP_YES) {
5504 a->sign = neg;
5505 }
5506 return FP_OKAY;
5507 }
5508 #endif
5509
fp_read_radix(fp_int * a,const char * str,int radix)5510 static int fp_read_radix(fp_int *a, const char *str, int radix)
5511 {
5512 int y, neg;
5513 char ch;
5514
5515 /* set the integer to the default of zero */
5516 fp_zero (a);
5517
5518 #if DIGIT_BIT == 64 || DIGIT_BIT == 32
5519 if (radix == 16)
5520 return fp_read_radix_16(a, str);
5521 #endif
5522
5523 /* make sure the radix is ok */
5524 if (radix < 2 || radix > 64) {
5525 return FP_VAL;
5526 }
5527
5528 /* if the leading digit is a
5529 * minus set the sign to negative.
5530 */
5531 if (*str == '-') {
5532 ++str;
5533 neg = FP_NEG;
5534 } else {
5535 neg = FP_ZPOS;
5536 }
5537
5538 /* process each digit of the string */
5539 while (*str) {
5540 /* if the radix <= 36 the conversion is case insensitive
5541 * this allows numbers like 1AB and 1ab to represent the same value
5542 * [e.g. in hex]
5543 */
5544 ch = (char)((radix <= 36) ? XTOUPPER((unsigned char)*str) : *str);
5545 for (y = 0; y < 64; y++) {
5546 if (ch == fp_s_rmap[y]) {
5547 break;
5548 }
5549 }
5550 if (y >= radix) {
5551 return FP_VAL;
5552 }
5553
5554 /* if the char was found in the map
5555 * and is less than the given radix add it
5556 * to the number, otherwise exit the loop.
5557 */
5558 if (y < radix) {
5559 int ret = fp_mul_d (a, (fp_digit) radix, a);
5560 if (ret != FP_OKAY)
5561 return ret;
5562 ret = fp_add_d (a, (fp_digit) y, a);
5563 if (ret != FP_OKAY)
5564 return ret;
5565 } else {
5566 break;
5567 }
5568 ++str;
5569 }
5570
5571 /* set the sign only if a != 0 */
5572 if (fp_iszero(a) != FP_YES) {
5573 a->sign = neg;
5574 }
5575 return FP_OKAY;
5576 }
5577
5578 /* fast math conversion */
mp_read_radix(mp_int * a,const char * str,int radix)5579 int mp_read_radix(mp_int *a, const char *str, int radix)
5580 {
5581 return fp_read_radix(a, str, radix);
5582 }
5583
5584 #endif /* !defined(NO_DSA) || defined(HAVE_ECC) */
5585
5586 #ifdef HAVE_ECC
5587
5588 /* fast math conversion */
mp_sqr(fp_int * A,fp_int * B)5589 int mp_sqr(fp_int *A, fp_int *B)
5590 {
5591 return fp_sqr(A, B);
5592 }
5593
5594 /* fast math conversion */
mp_montgomery_reduce(fp_int * a,fp_int * m,fp_digit mp)5595 int mp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp)
5596 {
5597 return fp_montgomery_reduce(a, m, mp);
5598 }
5599
mp_montgomery_reduce_ex(fp_int * a,fp_int * m,fp_digit mp,int ct)5600 int mp_montgomery_reduce_ex(fp_int *a, fp_int *m, fp_digit mp, int ct)
5601 {
5602 return fp_montgomery_reduce_ex(a, m, mp, ct);
5603 }
5604
5605
5606 /* fast math conversion */
mp_montgomery_setup(fp_int * a,fp_digit * rho)5607 int mp_montgomery_setup(fp_int *a, fp_digit *rho)
5608 {
5609 return fp_montgomery_setup(a, rho);
5610 }
5611
mp_div_2(fp_int * a,fp_int * b)5612 int mp_div_2(fp_int * a, fp_int * b)
5613 {
5614 fp_div_2(a, b);
5615 return MP_OKAY;
5616 }
5617
5618 /* c = a / 2 (mod b) - constant time (a < b and positive) */
mp_div_2_mod_ct(mp_int * a,mp_int * b,mp_int * c)5619 int mp_div_2_mod_ct(mp_int *a, mp_int *b, mp_int *c)
5620 {
5621 return fp_div_2_mod_ct(a, b, c);
5622 }
5623
5624 #ifdef HAVE_COMP_KEY
5625
mp_cnt_lsb(fp_int * a)5626 int mp_cnt_lsb(fp_int* a)
5627 {
5628 return fp_cnt_lsb(a);
5629 }
5630
5631 #endif /* HAVE_COMP_KEY */
5632
5633 #endif /* HAVE_ECC */
5634
5635 #if defined(HAVE_ECC) || !defined(NO_RSA) || !defined(NO_DSA) || \
5636 defined(WOLFSSL_KEY_GEN)
5637 /* fast math conversion */
mp_set(fp_int * a,fp_digit b)5638 int mp_set(fp_int *a, fp_digit b)
5639 {
5640 fp_set(a,b);
5641 return MP_OKAY;
5642 }
5643 #endif
5644
5645 #ifdef WC_MP_TO_RADIX
5646
5647 /* returns size of ASCII representation */
mp_radix_size(mp_int * a,int radix,int * size)5648 int mp_radix_size (mp_int *a, int radix, int *size)
5649 {
5650 int res, digs;
5651 fp_digit d;
5652 #ifndef WOLFSSL_SMALL_STACK
5653 fp_int t[1];
5654 #else
5655 fp_int *t;
5656 #endif
5657
5658 *size = 0;
5659
5660 /* special case for binary */
5661 if (radix == 2) {
5662 *size = fp_count_bits(a);
5663 if (*size == 0)
5664 *size = 1;
5665 *size += (a->sign == FP_NEG ? 1 : 0) + 1; /* "-" sign + null term */
5666 return FP_OKAY;
5667 }
5668
5669 /* make sure the radix is in range */
5670 if (radix < 2 || radix > 64) {
5671 return FP_VAL;
5672 }
5673
5674 if (fp_iszero(a) == MP_YES) {
5675 #ifndef WC_DISABLE_RADIX_ZERO_PAD
5676 if (radix == 16)
5677 *size = 3;
5678 else
5679 #endif
5680 *size = 2;
5681 return FP_OKAY;
5682 }
5683
5684 /* digs is the digit count */
5685 digs = 0;
5686
5687 #ifdef WOLFSSL_SMALL_STACK
5688 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
5689 if (t == NULL)
5690 return FP_MEM;
5691 #endif
5692
5693 /* init a copy of the input */
5694 fp_init_copy (t, a);
5695
5696 /* force temp to positive */
5697 t->sign = FP_ZPOS;
5698
5699 /* fetch out all of the digits */
5700 while (fp_iszero (t) == FP_NO) {
5701 if ((res = fp_div_d (t, (mp_digit) radix, t, &d)) != FP_OKAY) {
5702 fp_zero (t);
5703 #ifdef WOLFSSL_SMALL_STACK
5704 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5705 #endif
5706 return res;
5707 }
5708 ++digs;
5709 }
5710 fp_zero (t);
5711
5712 #ifndef WC_DISABLE_RADIX_ZERO_PAD
5713 /* For hexadecimal output, add zero padding when number of digits is odd */
5714 if ((digs & 1) && (radix == 16)) {
5715 ++digs;
5716 }
5717 #endif
5718
5719 /* if it's negative add one for the sign */
5720 if (a->sign == FP_NEG) {
5721 ++digs;
5722 }
5723
5724 /* return digs + 1, the 1 is for the NULL byte that would be required. */
5725 *size = digs + 1;
5726 #ifdef WOLFSSL_SMALL_STACK
5727 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5728 #endif
5729 return FP_OKAY;
5730 }
5731
5732 /* stores a bignum as a ASCII string in a given radix (2..64) */
mp_toradix(mp_int * a,char * str,int radix)5733 int mp_toradix (mp_int *a, char *str, int radix)
5734 {
5735 int res, digs;
5736 fp_digit d;
5737 char *_s = str;
5738 #ifndef WOLFSSL_SMALL_STACK
5739 fp_int t[1];
5740 #else
5741 fp_int *t;
5742 #endif
5743
5744 /* check range of the radix */
5745 if (radix < 2 || radix > 64) {
5746 return FP_VAL;
5747 }
5748
5749 /* quick out if its zero */
5750 if (fp_iszero(a) == FP_YES) {
5751 #ifndef WC_DISABLE_RADIX_ZERO_PAD
5752 if (radix == 16)
5753 *str++ = '0';
5754 #endif
5755 *str++ = '0';
5756 *str = '\0';
5757 return FP_OKAY;
5758 }
5759
5760 #ifdef WOLFSSL_SMALL_STACK
5761 t = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_BIGINT);
5762 if (t == NULL)
5763 return FP_MEM;
5764 #endif
5765
5766 /* init a copy of the input */
5767 fp_init_copy (t, a);
5768
5769 /* if it is negative output a - */
5770 if (t->sign == FP_NEG) {
5771 ++_s;
5772 *str++ = '-';
5773 t->sign = FP_ZPOS;
5774 }
5775
5776 digs = 0;
5777 while (fp_iszero (t) == FP_NO) {
5778 if ((res = fp_div_d (t, (fp_digit) radix, t, &d)) != FP_OKAY) {
5779 fp_zero (t);
5780 #ifdef WOLFSSL_SMALL_STACK
5781 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5782 #endif
5783 return res;
5784 }
5785 *str++ = fp_s_rmap[d];
5786 ++digs;
5787 }
5788 #ifndef WC_DISABLE_RADIX_ZERO_PAD
5789 /* For hexadecimal output, add zero padding when number of digits is odd */
5790 if ((digs & 1) && (radix == 16)) {
5791 *str++ = fp_s_rmap[0];
5792 ++digs;
5793 }
5794 #endif
5795 /* reverse the digits of the string. In this case _s points
5796 * to the first digit [excluding the sign] of the number]
5797 */
5798 fp_reverse ((unsigned char *)_s, digs);
5799
5800 /* append a NULL so the string is properly terminated */
5801 *str = '\0';
5802
5803 fp_zero (t);
5804 #ifdef WOLFSSL_SMALL_STACK
5805 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5806 #endif
5807 return FP_OKAY;
5808 }
5809
5810 #ifdef WOLFSSL_DEBUG_MATH
mp_dump(const char * desc,mp_int * a,byte verbose)5811 void mp_dump(const char* desc, mp_int* a, byte verbose)
5812 {
5813 char buffer[FP_SIZE * sizeof(fp_digit) * 2];
5814 int size;
5815
5816 #if defined(ALT_ECC_SIZE) || defined(HAVE_WOLF_BIGINT)
5817 size = a->size;
5818 #else
5819 size = FP_SIZE;
5820 #endif
5821
5822 printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n",
5823 desc, a, a->used, a->sign, size, (int)sizeof(fp_digit));
5824
5825 mp_tohex(a, buffer);
5826 printf(" %s\n ", buffer);
5827
5828 if (verbose) {
5829 int i;
5830 for(i=0; i<size * (int)sizeof(fp_digit); i++) {
5831 printf("%x ", *(((byte*)a->dp) + i));
5832 }
5833 printf("\n");
5834 }
5835 }
5836 #endif /* WOLFSSL_DEBUG_MATH */
5837
5838 #endif /* WC_MP_TO_RADIX */
5839
5840
mp_abs(mp_int * a,mp_int * b)5841 int mp_abs(mp_int* a, mp_int* b)
5842 {
5843 fp_abs(a, b);
5844 return FP_OKAY;
5845 }
5846
5847
mp_lshd(mp_int * a,int b)5848 int mp_lshd (mp_int * a, int b)
5849 {
5850 return fp_lshd(a, b);
5851 }
5852
5853 #endif /* USE_FAST_MATH */
5854