1 /* fe_low_mem.c
2 *
3 * Copyright (C) 2006-2021 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22
23 /* Based from Daniel Beer's public domain work. */
24
25 #ifdef HAVE_CONFIG_H
26 #include <config.h>
27 #endif
28
29 #include <wolfssl/wolfcrypt/settings.h>
30
31 #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
32 #if defined(CURVE25519_SMALL) || defined(ED25519_SMALL) /* use slower code that takes less memory */
33
34 #include <wolfssl/wolfcrypt/fe_operations.h>
35
36 #ifdef NO_INLINE
37 #include <wolfssl/wolfcrypt/misc.h>
38 #else
39 #define WOLFSSL_MISC_INCLUDED
40 #include <wolfcrypt/src/misc.c>
41 #endif
42
fprime_copy(byte * x,const byte * a)43 void fprime_copy(byte *x, const byte *a)
44 {
45 int i;
46 for (i = 0; i < F25519_SIZE; i++)
47 x[i] = a[i];
48 }
49
50
lm_copy(byte * x,const byte * a)51 void lm_copy(byte* x, const byte* a)
52 {
53 int i;
54 for (i = 0; i < F25519_SIZE; i++)
55 x[i] = a[i];
56 }
57
58 #if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \
59 (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \
60 !defined(FREESCALE_LTC_ECC)
61 /* to be Complementary to fe_low_mem.c */
62 #else
fe_init(void)63 void fe_init(void)
64 {
65 }
66 #endif
67
68 #ifdef CURVE25519_SMALL
69
70 /* Double an X-coordinate */
xc_double(byte * x3,byte * z3,const byte * x1,const byte * z1)71 static void xc_double(byte *x3, byte *z3,
72 const byte *x1, const byte *z1)
73 {
74 /* Explicit formulas database: dbl-1987-m
75 *
76 * source 1987 Montgomery "Speeding the Pollard and elliptic
77 * curve methods of factorization", page 261, fourth display
78 * compute X3 = (X1^2-Z1^2)^2
79 * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
80 */
81 byte x1sq[F25519_SIZE];
82 byte z1sq[F25519_SIZE];
83 byte x1z1[F25519_SIZE];
84 byte a[F25519_SIZE];
85
86 fe_mul__distinct(x1sq, x1, x1);
87 fe_mul__distinct(z1sq, z1, z1);
88 fe_mul__distinct(x1z1, x1, z1);
89
90 lm_sub(a, x1sq, z1sq);
91 fe_mul__distinct(x3, a, a);
92
93 fe_mul_c(a, x1z1, 486662);
94 lm_add(a, x1sq, a);
95 lm_add(a, z1sq, a);
96 fe_mul__distinct(x1sq, x1z1, a);
97 fe_mul_c(z3, x1sq, 4);
98 }
99
100
101 /* Differential addition */
xc_diffadd(byte * x5,byte * z5,const byte * x1,const byte * z1,const byte * x2,const byte * z2,const byte * x3,const byte * z3)102 static void xc_diffadd(byte *x5, byte *z5,
103 const byte *x1, const byte *z1,
104 const byte *x2, const byte *z2,
105 const byte *x3, const byte *z3)
106 {
107 /* Explicit formulas database: dbl-1987-m3
108 *
109 * source 1987 Montgomery "Speeding the Pollard and elliptic curve
110 * methods of factorization", page 261, fifth display, plus
111 * common-subexpression elimination
112 * compute A = X2+Z2
113 * compute B = X2-Z2
114 * compute C = X3+Z3
115 * compute D = X3-Z3
116 * compute DA = D A
117 * compute CB = C B
118 * compute X5 = Z1(DA+CB)^2
119 * compute Z5 = X1(DA-CB)^2
120 */
121 byte da[F25519_SIZE];
122 byte cb[F25519_SIZE];
123 byte a[F25519_SIZE];
124 byte b[F25519_SIZE];
125
126 lm_add(a, x2, z2);
127 lm_sub(b, x3, z3); /* D */
128 fe_mul__distinct(da, a, b);
129
130 lm_sub(b, x2, z2);
131 lm_add(a, x3, z3); /* C */
132 fe_mul__distinct(cb, a, b);
133
134 lm_add(a, da, cb);
135 fe_mul__distinct(b, a, a);
136 fe_mul__distinct(x5, z1, b);
137
138 lm_sub(a, da, cb);
139 fe_mul__distinct(b, a, a);
140 fe_mul__distinct(z5, x1, b);
141 }
142
143 #ifndef FREESCALE_LTC_ECC
curve25519(byte * result,const byte * e,const byte * q)144 int curve25519(byte *result, const byte *e, const byte *q)
145 {
146 /* Current point: P_m */
147 byte xm[F25519_SIZE];
148 byte zm[F25519_SIZE] = {1};
149
150 /* Predecessor: P_(m-1) */
151 byte xm1[F25519_SIZE] = {1};
152 byte zm1[F25519_SIZE] = {0};
153
154 int i;
155
156 /* Note: bit 254 is assumed to be 1 */
157 lm_copy(xm, q);
158
159 for (i = 253; i >= 0; i--) {
160 const int bit = (e[i >> 3] >> (i & 7)) & 1;
161 byte xms[F25519_SIZE];
162 byte zms[F25519_SIZE];
163
164 /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
165 xc_diffadd(xm1, zm1, q, f25519_one, xm, zm, xm1, zm1);
166 xc_double(xm, zm, xm, zm);
167
168 /* Compute P_(2m+1) */
169 xc_diffadd(xms, zms, xm1, zm1, xm, zm, q, f25519_one);
170
171 /* Select:
172 * bit = 1 --> (P_(2m+1), P_(2m))
173 * bit = 0 --> (P_(2m), P_(2m-1))
174 */
175 fe_select(xm1, xm1, xm, bit);
176 fe_select(zm1, zm1, zm, bit);
177 fe_select(xm, xm, xms, bit);
178 fe_select(zm, zm, zms, bit);
179 }
180
181 /* Freeze out of projective coordinates */
182 fe_inv__distinct(zm1, zm);
183 fe_mul__distinct(result, zm1, xm);
184 fe_normalize(result);
185 return 0;
186 }
187 #endif /* !FREESCALE_LTC_ECC */
188 #endif /* CURVE25519_SMALL */
189
190
raw_add(byte * x,const byte * p)191 static void raw_add(byte *x, const byte *p)
192 {
193 word16 c = 0;
194 int i;
195
196 for (i = 0; i < F25519_SIZE; i++) {
197 c += ((word16)x[i]) + ((word16)p[i]);
198 x[i] = (byte)c;
199 c >>= 8;
200 }
201 }
202
203
raw_try_sub(byte * x,const byte * p)204 static void raw_try_sub(byte *x, const byte *p)
205 {
206 byte minusp[F25519_SIZE];
207 word16 c = 0;
208 int i;
209
210 for (i = 0; i < F25519_SIZE; i++) {
211 c = ((word16)x[i]) - ((word16)p[i]) - c;
212 minusp[i] = (byte)c;
213 c = (c >> 8) & 1;
214 }
215
216 fprime_select(x, minusp, x, (byte)c);
217 }
218
219
prime_msb(const byte * p)220 static int prime_msb(const byte *p)
221 {
222 int i;
223 byte x;
224 int shift = 1;
225 int z = F25519_SIZE - 1;
226
227 /*
228 Test for any hot bits.
229 As soon as one instance is encountered set shift to 0.
230 */
231 for (i = F25519_SIZE - 1; i >= 0; i--) {
232 shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
233 z -= shift;
234 }
235 x = p[z];
236 z <<= 3;
237 shift = 1;
238 for (i = 0; i < 8; i++) {
239 shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
240 z += shift;
241 }
242
243 return z - 1;
244 }
245
246
fprime_select(byte * dst,const byte * zero,const byte * one,byte condition)247 void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
248 {
249 const byte mask = -condition;
250 int i;
251
252 for (i = 0; i < F25519_SIZE; i++)
253 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
254 }
255
256
fprime_add(byte * r,const byte * a,const byte * modulus)257 void fprime_add(byte *r, const byte *a, const byte *modulus)
258 {
259 raw_add(r, a);
260 raw_try_sub(r, modulus);
261 }
262
263
fprime_sub(byte * r,const byte * a,const byte * modulus)264 void fprime_sub(byte *r, const byte *a, const byte *modulus)
265 {
266 raw_add(r, modulus);
267 raw_try_sub(r, a);
268 raw_try_sub(r, modulus);
269 }
270
271
fprime_mul(byte * r,const byte * a,const byte * b,const byte * modulus)272 void fprime_mul(byte *r, const byte *a, const byte *b,
273 const byte *modulus)
274 {
275 word16 c = 0;
276 int i,j;
277
278 XMEMSET(r, 0, F25519_SIZE);
279
280 for (i = prime_msb(modulus); i >= 0; i--) {
281 const byte bit = (b[i >> 3] >> (i & 7)) & 1;
282 byte plusa[F25519_SIZE];
283
284 for (j = 0; j < F25519_SIZE; j++) {
285 c |= ((word16)r[j]) << 1;
286 r[j] = (byte)c;
287 c >>= 8;
288 }
289 raw_try_sub(r, modulus);
290
291 fprime_copy(plusa, r);
292 fprime_add(plusa, a, modulus);
293
294 fprime_select(r, r, plusa, bit);
295 }
296 }
297
298
fe_load(byte * x,word32 c)299 void fe_load(byte *x, word32 c)
300 {
301 word32 i;
302
303 for (i = 0; i < sizeof(c); i++) {
304 x[i] = c;
305 c >>= 8;
306 }
307
308 for (; i < F25519_SIZE; i++)
309 x[i] = 0;
310 }
311
312
fe_normalize(byte * x)313 void fe_normalize(byte *x)
314 {
315 byte minusp[F25519_SIZE];
316 word16 c;
317 int i;
318
319 /* Reduce using 2^255 = 19 mod p */
320 c = (x[31] >> 7) * 19;
321 x[31] &= 127;
322
323 for (i = 0; i < F25519_SIZE; i++) {
324 c += x[i];
325 x[i] = (byte)c;
326 c >>= 8;
327 }
328
329 /* The number is now less than 2^255 + 18, and therefore less than
330 * 2p. Try subtracting p, and conditionally load the subtracted
331 * value if underflow did not occur.
332 */
333 c = 19;
334
335 for (i = 0; i + 1 < F25519_SIZE; i++) {
336 c += x[i];
337 minusp[i] = (byte)c;
338 c >>= 8;
339 }
340
341 c += ((word16)x[i]) - 128;
342 minusp[31] = (byte)c;
343
344 /* Load x-p if no underflow */
345 fe_select(x, minusp, x, (c >> 15) & 1);
346 }
347
348
fe_select(byte * dst,const byte * zero,const byte * one,byte condition)349 void fe_select(byte *dst,
350 const byte *zero, const byte *one,
351 byte condition)
352 {
353 const byte mask = -condition;
354 int i;
355
356 for (i = 0; i < F25519_SIZE; i++)
357 dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
358 }
359
360
lm_add(byte * r,const byte * a,const byte * b)361 void lm_add(byte* r, const byte* a, const byte* b)
362 {
363 word16 c = 0;
364 int i;
365
366 /* Add */
367 for (i = 0; i < F25519_SIZE; i++) {
368 c >>= 8;
369 c += ((word16)a[i]) + ((word16)b[i]);
370 r[i] = (byte)c;
371 }
372
373 /* Reduce with 2^255 = 19 mod p */
374 r[31] &= 127;
375 c = (c >> 7) * 19;
376
377 for (i = 0; i < F25519_SIZE; i++) {
378 c += r[i];
379 r[i] = (byte)c;
380 c >>= 8;
381 }
382 }
383
384
lm_sub(byte * r,const byte * a,const byte * b)385 void lm_sub(byte* r, const byte* a, const byte* b)
386 {
387 word32 c = 0;
388 int i;
389
390 /* Calculate a + 2p - b, to avoid underflow */
391 c = 218;
392 for (i = 0; i + 1 < F25519_SIZE; i++) {
393 c += 65280 + ((word32)a[i]) - ((word32)b[i]);
394 r[i] = c;
395 c >>= 8;
396 }
397
398 c += ((word32)a[31]) - ((word32)b[31]);
399 r[31] = c & 127;
400 c = (c >> 7) * 19;
401
402 for (i = 0; i < F25519_SIZE; i++) {
403 c += r[i];
404 r[i] = c;
405 c >>= 8;
406 }
407 }
408
409
lm_neg(byte * r,const byte * a)410 void lm_neg(byte* r, const byte* a)
411 {
412 word32 c = 0;
413 int i;
414
415 /* Calculate 2p - a, to avoid underflow */
416 c = 218;
417 for (i = 0; i + 1 < F25519_SIZE; i++) {
418 c += 65280 - ((word32)a[i]);
419 r[i] = c;
420 c >>= 8;
421 }
422
423 c -= ((word32)a[31]);
424 r[31] = c & 127;
425 c = (c >> 7) * 19;
426
427 for (i = 0; i < F25519_SIZE; i++) {
428 c += r[i];
429 r[i] = c;
430 c >>= 8;
431 }
432 }
433
434
fe_mul__distinct(byte * r,const byte * a,const byte * b)435 void fe_mul__distinct(byte *r, const byte *a, const byte *b)
436 {
437 word32 c = 0;
438 int i;
439
440 for (i = 0; i < F25519_SIZE; i++) {
441 int j;
442
443 c >>= 8;
444 for (j = 0; j <= i; j++)
445 c += ((word32)a[j]) * ((word32)b[i - j]);
446
447 for (; j < F25519_SIZE; j++)
448 c += ((word32)a[j]) *
449 ((word32)b[i + F25519_SIZE - j]) * 38;
450
451 r[i] = c;
452 }
453
454 r[31] &= 127;
455 c = (c >> 7) * 19;
456
457 for (i = 0; i < F25519_SIZE; i++) {
458 c += r[i];
459 r[i] = c;
460 c >>= 8;
461 }
462 }
463
464
lm_mul(byte * r,const byte * a,const byte * b)465 void lm_mul(byte *r, const byte* a, const byte *b)
466 {
467 byte tmp[F25519_SIZE];
468
469 fe_mul__distinct(tmp, a, b);
470 lm_copy(r, tmp);
471 }
472
473
fe_mul_c(byte * r,const byte * a,word32 b)474 void fe_mul_c(byte *r, const byte *a, word32 b)
475 {
476 word32 c = 0;
477 int i;
478
479 for (i = 0; i < F25519_SIZE; i++) {
480 c >>= 8;
481 c += b * ((word32)a[i]);
482 r[i] = c;
483 }
484
485 r[31] &= 127;
486 c >>= 7;
487 c *= 19;
488
489 for (i = 0; i < F25519_SIZE; i++) {
490 c += r[i];
491 r[i] = c;
492 c >>= 8;
493 }
494 }
495
496
fe_inv__distinct(byte * r,const byte * x)497 void fe_inv__distinct(byte *r, const byte *x)
498 {
499 byte s[F25519_SIZE];
500 int i;
501
502 /* This is a prime field, so by Fermat's little theorem:
503 *
504 * x^(p-1) = 1 mod p
505 *
506 * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
507 * inverse.
508 *
509 * This is a 255-bit binary number with the digits:
510 *
511 * 11111111... 01011
512 *
513 * We compute the result by the usual binary chain, but
514 * alternate between keeping the accumulator in r and s, so as
515 * to avoid copying temporaries.
516 */
517
518 /* 1 1 */
519 fe_mul__distinct(s, x, x);
520 fe_mul__distinct(r, s, x);
521
522 /* 1 x 248 */
523 for (i = 0; i < 248; i++) {
524 fe_mul__distinct(s, r, r);
525 fe_mul__distinct(r, s, x);
526 }
527
528 /* 0 */
529 fe_mul__distinct(s, r, r);
530
531 /* 1 */
532 fe_mul__distinct(r, s, s);
533 fe_mul__distinct(s, r, x);
534
535 /* 0 */
536 fe_mul__distinct(r, s, s);
537
538 /* 1 */
539 fe_mul__distinct(s, r, r);
540 fe_mul__distinct(r, s, x);
541
542 /* 1 */
543 fe_mul__distinct(s, r, r);
544 fe_mul__distinct(r, s, x);
545 }
546
547
lm_invert(byte * r,const byte * x)548 void lm_invert(byte *r, const byte *x)
549 {
550 byte tmp[F25519_SIZE];
551
552 fe_inv__distinct(tmp, x);
553 lm_copy(r, tmp);
554 }
555
556
557 /* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
558 * storage.
559 */
exp2523(byte * r,const byte * x,byte * s)560 static void exp2523(byte *r, const byte *x, byte *s)
561 {
562 int i;
563
564 /* This number is a 252-bit number with the binary expansion:
565 *
566 * 111111... 01
567 */
568
569 /* 1 1 */
570 fe_mul__distinct(r, x, x);
571 fe_mul__distinct(s, r, x);
572
573 /* 1 x 248 */
574 for (i = 0; i < 248; i++) {
575 fe_mul__distinct(r, s, s);
576 fe_mul__distinct(s, r, x);
577 }
578
579 /* 0 */
580 fe_mul__distinct(r, s, s);
581
582 /* 1 */
583 fe_mul__distinct(s, r, r);
584 fe_mul__distinct(r, s, x);
585 }
586
587
fe_sqrt(byte * r,const byte * a)588 void fe_sqrt(byte *r, const byte *a)
589 {
590 byte v[F25519_SIZE];
591 byte i[F25519_SIZE];
592 byte x[F25519_SIZE];
593 byte y[F25519_SIZE];
594
595 /* v = (2a)^((p-5)/8) [x = 2a] */
596 fe_mul_c(x, a, 2);
597 exp2523(v, x, y);
598
599 /* i = 2av^2 - 1 */
600 fe_mul__distinct(y, v, v);
601 fe_mul__distinct(i, x, y);
602 fe_load(y, 1);
603 lm_sub(i, i, y);
604
605 /* r = avi */
606 fe_mul__distinct(x, v, a);
607 fe_mul__distinct(r, x, i);
608 }
609
610 #endif /* CURVE25519_SMALL || ED25519_SMALL */
611 #endif /* HAVE_CURVE25519 || HAVE_ED25519 */
612