1 /* fe_448.c
2 *
3 * Copyright (C) 2006-2021 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22 /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
23 * Small implementation based on Daniel Beer's curve25519 public domain work.
24 * Reworked for curve448 by Sean Parkinson.
25 */
26
27 #ifdef HAVE_CONFIG_H
28 #include <config.h>
29 #endif
30
31 #include <wolfssl/wolfcrypt/settings.h>
32
33 #if defined(HAVE_CURVE448) || defined(HAVE_ED448)
34
35 #include <wolfssl/wolfcrypt/fe_448.h>
36
37 #ifdef NO_INLINE
38 #include <wolfssl/wolfcrypt/misc.h>
39 #else
40 #define WOLFSSL_MISC_INCLUDED
41 #include <wolfcrypt/src/misc.c>
42 #endif
43
44 #if defined(CURVE448_SMALL) || defined(ED448_SMALL)
45
46 /* Initialize the field element operations.
47 */
fe448_init(void)48 void fe448_init(void)
49 {
50 }
51
52 /* Normalize the field element.
53 * Ensure result is in range: 0..2^448-2^224-2
54 *
55 * a [in] Field element in range 0..2^448-1.
56 */
fe448_norm(word8 * a)57 void fe448_norm(word8* a)
58 {
59 int i;
60 sword16 c = 0;
61 sword16 o = 0;
62
63 for (i = 0; i < 56; i++) {
64 c += a[i];
65 if ((i == 0) || (i == 28))
66 c += 1;
67 c >>= 8;
68 }
69
70 for (i = 0; i < 56; i++) {
71 if ((i == 0) || (i == 28)) o += c;
72 o += a[i];
73 a[i] = (word8)o;
74 o >>= 8;
75 }
76 }
77
78 /* Copy one field element into another: d = a.
79 *
80 * d [in] Destination field element.
81 * a [in] Source field element.
82 */
fe448_copy(word8 * d,const word8 * a)83 void fe448_copy(word8* d, const word8* a)
84 {
85 int i;
86 for (i = 0; i < 56; i++) {
87 d[i] = a[i];
88 }
89 }
90
91 /* Conditionally swap the elements.
92 * Constant time implementation.
93 *
94 * a [in] First field element.
95 * b [in] Second field element.
96 * c [in] Swap when 1. Valid values: 0, 1.
97 */
fe448_cswap(word8 * a,word8 * b,int c)98 static void fe448_cswap(word8* a, word8* b, int c)
99 {
100 int i;
101 word8 mask = -(word8)c;
102 word8 t[56];
103
104 for (i = 0; i < 56; i++)
105 t[i] = (a[i] ^ b[i]) & mask;
106 for (i = 0; i < 56; i++)
107 a[i] ^= t[i];
108 for (i = 0; i < 56; i++)
109 b[i] ^= t[i];
110 }
111
112 /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
113 *
114 * r [in] Field element to hold sum.
115 * a [in] Field element to add.
116 * b [in] Field element to add.
117 */
fe448_add(word8 * r,const word8 * a,const word8 * b)118 void fe448_add(word8* r, const word8* a, const word8* b)
119 {
120 int i;
121 sword16 c = 0;
122 sword16 o = 0;
123
124 for (i = 0; i < 56; i++) {
125 c += a[i];
126 c += b[i];
127 r[i] = (word8)c;
128 c >>= 8;
129 }
130
131 for (i = 0; i < 56; i++) {
132 if ((i == 0) || (i == 28)) o += c;
133 o += r[i];
134 r[i] = (word8)o;
135 o >>= 8;
136 }
137 }
138
139 /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
140 *
141 * r [in] Field element to hold difference.
142 * a [in] Field element to subtract from.
143 * b [in] Field element to subtract.
144 */
fe448_sub(word8 * r,const word8 * a,const word8 * b)145 void fe448_sub(word8* r, const word8* a, const word8* b)
146 {
147 int i;
148 sword16 c = 0;
149 sword16 o = 0;
150
151 for (i = 0; i < 56; i++) {
152 if (i == 28)
153 c += 0x1fc;
154 else
155 c += 0x1fe;
156 c += a[i];
157 c -= b[i];
158 r[i] = (word8)c;
159 c >>= 8;
160 }
161
162 for (i = 0; i < 56; i++) {
163 if ((i == 0) || (i == 28)) o += c;
164 o += r[i];
165 r[i] = (word8)o;
166 o >>= 8;
167 }
168 }
169
170 /* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
171 *
172 * r [in] Field element to hold result.
173 * a [in] Field element to multiply.
174 */
fe448_mul39081(word8 * r,const word8 * a)175 void fe448_mul39081(word8* r, const word8* a)
176 {
177 int i;
178 sword32 c = 0;
179 sword32 o = 0;
180
181 for (i = 0; i < 56; i++) {
182 c += a[i] * (sword32)39081;
183 r[i] = (word8)c;
184 c >>= 8;
185 }
186
187 for (i = 0; i < 56; i++) {
188 if ((i == 0) || (i == 28)) o += c;
189 o += r[i];
190 r[i] = (word8)o;
191 o >>= 8;
192 }
193 }
194
195 /* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
196 *
197 * r [in] Field element to hold result.
198 * a [in] Field element to multiply.
199 * b [in] Field element to multiply.
200 */
fe448_mul(word8 * r,const word8 * a,const word8 * b)201 void fe448_mul(word8* r, const word8* a, const word8* b)
202 {
203 int i, k;
204 sword32 c = 0;
205 sword16 o = 0, cc = 0;
206 word8 t[112];
207
208 for (k = 0; k < 56; k++) {
209 i = 0;
210 for (; i <= k; i++) {
211 c += (sword32)a[i] * b[k - i];
212 }
213 t[k] = (word8)c;
214 c >>= 8;
215 }
216 for (; k < 111; k++) {
217 i = k - 55;
218 for (; i < 56; i++) {
219 c += (sword32)a[i] * b[k - i];
220 }
221 t[k] = (word8)c;
222 c >>= 8;
223 }
224 t[k] = (word8)c;
225
226 for (i = 0; i < 28; i++) {
227 o += t[i];
228 o += t[i + 56];
229 o += t[i + 84];
230 r[i] = (word8)o;
231 o >>= 8;
232 }
233 for (i = 28; i < 56; i++) {
234 o += t[i];
235 o += t[i + 56];
236 o += t[i + 28];
237 o += t[i + 56];
238 r[i] = (word8)o;
239 o >>= 8;
240 }
241 for (i = 0; i < 56; i++) {
242 if ((i == 0) || (i == 28)) cc += o;
243 cc += r[i];
244 r[i] = (word8)cc;
245 cc >>= 8;
246 }
247 }
248
249 /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
250 *
251 * r [in] Field element to hold result.
252 * a [in] Field element to square.
253 */
fe448_sqr(word8 * r,const word8 * a)254 void fe448_sqr(word8* r, const word8* a)
255 {
256 int i, k;
257 sword32 c = 0;
258 sword32 p;
259 sword16 o = 0, cc = 0;
260 word8 t[112];
261
262 for (k = 0; k < 56; k++) {
263 i = 0;
264 for (; i <= k; i++) {
265 if (k - i < i)
266 break;
267 p = (sword32)a[i] * a[k - i];
268 if (k - i != i)
269 p *= 2;
270 c += p;
271 }
272 t[k] = (word8)c;
273 c >>= 8;
274 }
275 for (; k < 111; k++) {
276 i = k - 55;
277 for (; i < 56; i++) {
278 if (k - i < i)
279 break;
280 p = (sword32)a[i] * a[k - i];
281 if (k - i != i)
282 p *= 2;
283 c += p;
284 }
285 t[k] = (word8)c;
286 c >>= 8;
287 }
288 t[k] = (word8)c;
289
290 for (i = 0; i < 28; i++) {
291 o += t[i];
292 o += t[i + 56];
293 o += t[i + 84];
294 r[i] = (word8)o;
295 o >>= 8;
296 }
297 for (i = 28; i < 56; i++) {
298 o += t[i];
299 o += t[i + 56];
300 o += t[i + 28];
301 o += t[i + 56];
302 r[i] = (word8)o;
303 o >>= 8;
304 }
305 for (i = 0; i < 56; i++) {
306 if ((i == 0) || (i == 28)) cc += o;
307 cc += r[i];
308 r[i] = (word8)cc;
309 cc >>= 8;
310 }
311 fe448_norm(r);
312 }
313
314 /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
315 * Constant time implementation - using Fermat's little theorem:
316 * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
317 * For curve448: p - 2 = 2^448 - 2^224 - 3
318 *
319 * r [in] Field element to hold result.
320 * a [in] Field element to invert.
321 */
fe448_invert(word8 * r,const word8 * a)322 void fe448_invert(word8* r, const word8* a)
323 {
324 int i;
325 word8 t[56];
326
327 fe448_sqr(t, a);
328 fe448_mul(t, t, a);
329 for (i = 0; i < 221; i++) {
330 fe448_sqr(t, t);
331 fe448_mul(t, t, a);
332 }
333 fe448_sqr(t, t);
334 for (i = 0; i < 222; i++) {
335 fe448_sqr(t, t);
336 fe448_mul(t, t, a);
337 }
338 fe448_sqr(t, t);
339 fe448_sqr(t, t);
340 fe448_mul(r, t, a);
341 }
342
343 /* Scalar multiply the point by a number. r = n.a
344 * Uses Montgomery ladder and only requires the x-ordinate.
345 *
346 * r [in] Field element to hold result.
347 * n [in] Scalar as an array of bytes.
348 * a [in] Point to multiply - x-ordinate only.
349 */
curve448(byte * r,const byte * n,const byte * a)350 int curve448(byte* r, const byte* n, const byte* a)
351 {
352 word8 x1[56];
353 word8 x2[56] = {1};
354 word8 z2[56] = {0};
355 word8 x3[56];
356 word8 z3[56] = {1};
357 word8 t0[56];
358 word8 t1[56];
359 int i;
360 unsigned int swap;
361 unsigned int b;
362
363 fe448_copy(x1, a);
364 fe448_copy(x3, a);
365
366 swap = 0;
367 for (i = 447; i >= 0; --i) {
368 b = (n[i >> 3] >> (i & 7)) & 1;
369 swap ^= b;
370 fe448_cswap(x2, x3, swap);
371 fe448_cswap(z2, z3, swap);
372 swap = b;
373
374 /* Montgomery Ladder - double and add */
375 fe448_add(t0, x2, z2);
376 fe448_add(t1, x3, z3);
377 fe448_sub(x2, x2, z2);
378 fe448_sub(x3, x3, z3);
379 fe448_mul(t1, t1, x2);
380 fe448_mul(z3, x3, t0);
381 fe448_sqr(t0, t0);
382 fe448_sqr(x2, x2);
383 fe448_add(x3, z3, t1);
384 fe448_sqr(x3, x3);
385 fe448_sub(z3, z3, t1);
386 fe448_sqr(z3, z3);
387 fe448_mul(z3, z3, x1);
388 fe448_sub(t1, t0, x2);
389 fe448_mul(x2, t0, x2);
390 fe448_mul39081(z2, t1);
391 fe448_add(z2, t0, z2);
392 fe448_mul(z2, z2, t1);
393 }
394 fe448_cswap(x2, x3, swap);
395 fe448_cswap(z2, z3, swap);
396
397 fe448_invert(z2, z2);
398 fe448_mul(r, x2, z2);
399 fe448_norm(r);
400
401 return 0;
402 }
403
404 #ifdef HAVE_ED448
405 /* Check whether field element is not 0.
406 * Field element must have been normalized before call.
407 *
408 * a [in] Field element.
409 * returns 0 when zero, and any other value otherwise.
410 */
fe448_isnonzero(const word8 * a)411 int fe448_isnonzero(const word8* a)
412 {
413 int i;
414 byte c = 0;
415 for (i = 0; i < 56; i++)
416 c |= a[i];
417 return c;
418 }
419
420 /* Negates the field element. r = -a mod (2^448 - 2^224 - 1)
421 * Add 0x200 to each element and subtract 2 from next.
422 * Top element overflow handled by subtracting 2 from index 0 and 28.
423 *
424 * r [in] Field element to hold result.
425 * a [in] Field element.
426 */
fe448_neg(word8 * r,const word8 * a)427 void fe448_neg(word8* r, const word8* a)
428 {
429 int i;
430 sword16 c = 0;
431 sword16 o = 0;
432
433 for (i = 0; i < 56; i++) {
434 if (i == 28)
435 c += 0x1fc;
436 else
437 c += 0x1fe;
438 c -= a[i];
439 r[i] = (word8)c;
440 c >>= 8;
441 }
442
443 for (i = 0; i < 56; i++) {
444 if ((i == 0) || (i == 28)) o += c;
445 o += r[i];
446 r[i] = (word8)o;
447 o >>= 8;
448 }
449 }
450
451 /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
452 * Used for calcualting y-ordinate from x-ordinate for Ed448.
453 *
454 * r [in] Field element to hold result.
455 * a [in] Field element to exponentiate.
456 */
fe448_pow_2_446_222_1(word8 * r,const word8 * a)457 void fe448_pow_2_446_222_1(word8* r, const word8* a)
458 {
459 int i;
460 word8 t[56];
461
462 fe448_sqr(t, a);
463 fe448_mul(t, t, a);
464 for (i = 0; i < 221; i++) {
465 fe448_sqr(t, t);
466 fe448_mul(t, t, a);
467 }
468 fe448_sqr(t, t);
469 for (i = 0; i < 221; i++) {
470 fe448_sqr(t, t);
471 fe448_mul(t, t, a);
472 }
473 fe448_sqr(t, t);
474 fe448_mul(r, t, a);
475 }
476
477 /* Constant time, conditional move of b into a.
478 * a is not changed if the condition is 0.
479 *
480 * a A field element.
481 * b A field element.
482 * c If 1 then copy and if 0 then don't copy.
483 */
fe448_cmov(word8 * a,const word8 * b,int c)484 void fe448_cmov(word8* a, const word8* b, int c)
485 {
486 int i;
487 word8 m = -(word8)c;
488 word8 t[56];
489
490 for (i = 0; i < 56; i++)
491 t[i] = m & (a[i] ^ b[i]);
492 for (i = 0; i < 56; i++)
493 a[i] ^= t[i];
494 }
495
496 #endif /* HAVE_ED448 */
497 #elif defined(CURVED448_128BIT)
498
499 /* Initialize the field element operations.
500 */
fe448_init(void)501 void fe448_init(void)
502 {
503 }
504
505 /* Convert the field element from a byte array to an array of 56-bits.
506 *
507 * r [in] Array to encode into.
508 * b [in] Byte array.
509 */
fe448_from_bytes(sword64 * r,const unsigned char * b)510 void fe448_from_bytes(sword64* r, const unsigned char* b)
511 {
512 r[ 0] = ((sword64) (b[ 0]) << 0)
513 | ((sword64) (b[ 1]) << 8)
514 | ((sword64) (b[ 2]) << 16)
515 | ((sword64) (b[ 3]) << 24)
516 | ((sword64) (b[ 4]) << 32)
517 | ((sword64) (b[ 5]) << 40)
518 | ((sword64) (b[ 6]) << 48);
519 r[ 1] = ((sword64) (b[ 7]) << 0)
520 | ((sword64) (b[ 8]) << 8)
521 | ((sword64) (b[ 9]) << 16)
522 | ((sword64) (b[10]) << 24)
523 | ((sword64) (b[11]) << 32)
524 | ((sword64) (b[12]) << 40)
525 | ((sword64) (b[13]) << 48);
526 r[ 2] = ((sword64) (b[14]) << 0)
527 | ((sword64) (b[15]) << 8)
528 | ((sword64) (b[16]) << 16)
529 | ((sword64) (b[17]) << 24)
530 | ((sword64) (b[18]) << 32)
531 | ((sword64) (b[19]) << 40)
532 | ((sword64) (b[20]) << 48);
533 r[ 3] = ((sword64) (b[21]) << 0)
534 | ((sword64) (b[22]) << 8)
535 | ((sword64) (b[23]) << 16)
536 | ((sword64) (b[24]) << 24)
537 | ((sword64) (b[25]) << 32)
538 | ((sword64) (b[26]) << 40)
539 | ((sword64) (b[27]) << 48);
540 r[ 4] = ((sword64) (b[28]) << 0)
541 | ((sword64) (b[29]) << 8)
542 | ((sword64) (b[30]) << 16)
543 | ((sword64) (b[31]) << 24)
544 | ((sword64) (b[32]) << 32)
545 | ((sword64) (b[33]) << 40)
546 | ((sword64) (b[34]) << 48);
547 r[ 5] = ((sword64) (b[35]) << 0)
548 | ((sword64) (b[36]) << 8)
549 | ((sword64) (b[37]) << 16)
550 | ((sword64) (b[38]) << 24)
551 | ((sword64) (b[39]) << 32)
552 | ((sword64) (b[40]) << 40)
553 | ((sword64) (b[41]) << 48);
554 r[ 6] = ((sword64) (b[42]) << 0)
555 | ((sword64) (b[43]) << 8)
556 | ((sword64) (b[44]) << 16)
557 | ((sword64) (b[45]) << 24)
558 | ((sword64) (b[46]) << 32)
559 | ((sword64) (b[47]) << 40)
560 | ((sword64) (b[48]) << 48);
561 r[ 7] = ((sword64) (b[49]) << 0)
562 | ((sword64) (b[50]) << 8)
563 | ((sword64) (b[51]) << 16)
564 | ((sword64) (b[52]) << 24)
565 | ((sword64) (b[53]) << 32)
566 | ((sword64) (b[54]) << 40)
567 | ((sword64) (b[55]) << 48);
568 }
569
570 /* Convert the field element to a byte array from an array of 56-bits.
571 *
572 * b [in] Byte array.
573 * a [in] Array to encode into.
574 */
fe448_to_bytes(unsigned char * b,const sword64 * a)575 void fe448_to_bytes(unsigned char* b, const sword64* a)
576 {
577 sword128 t;
578 /* Mod */
579 sword64 in0 = a[0];
580 sword64 in1 = a[1];
581 sword64 in2 = a[2];
582 sword64 in3 = a[3];
583 sword64 in4 = a[4];
584 sword64 in5 = a[5];
585 sword64 in6 = a[6];
586 sword64 in7 = a[7];
587 sword64 o = in7 >> 56;
588 in7 -= o << 56;
589 in0 += o;
590 in4 += o;
591 o = (in0 + 1) >> 56;
592 o = (o + in1) >> 56;
593 o = (o + in2) >> 56;
594 o = (o + in3) >> 56;
595 o = (o + in4 + 1) >> 56;
596 o = (o + in5) >> 56;
597 o = (o + in6) >> 56;
598 o = (o + in7) >> 56;
599 in0 += o;
600 in4 += o;
601 in7 -= o << 56;
602 o = (in0 >> 56); in1 += o; t = o << 56; in0 -= (sword64)t;
603 o = (in1 >> 56); in2 += o; t = o << 56; in1 -= (sword64)t;
604 o = (in2 >> 56); in3 += o; t = o << 56; in2 -= (sword64)t;
605 o = (in3 >> 56); in4 += o; t = o << 56; in3 -= (sword64)t;
606 o = (in4 >> 56); in5 += o; t = o << 56; in4 -= (sword64)t;
607 o = (in5 >> 56); in6 += o; t = o << 56; in5 -= (sword64)t;
608 o = (in6 >> 56); in7 += o; t = o << 56; in6 -= (sword64)t;
609 o = (in7 >> 56); in0 += o;
610 in4 += o; t = o << 56; in7 -= (sword64)t;
611
612 /* Output as bytes */
613 b[ 0] = (in0 >> 0);
614 b[ 1] = (in0 >> 8);
615 b[ 2] = (in0 >> 16);
616 b[ 3] = (in0 >> 24);
617 b[ 4] = (in0 >> 32);
618 b[ 5] = (in0 >> 40);
619 b[ 6] = (in0 >> 48);
620 b[ 7] = (in1 >> 0);
621 b[ 8] = (in1 >> 8);
622 b[ 9] = (in1 >> 16);
623 b[10] = (in1 >> 24);
624 b[11] = (in1 >> 32);
625 b[12] = (in1 >> 40);
626 b[13] = (in1 >> 48);
627 b[14] = (in2 >> 0);
628 b[15] = (in2 >> 8);
629 b[16] = (in2 >> 16);
630 b[17] = (in2 >> 24);
631 b[18] = (in2 >> 32);
632 b[19] = (in2 >> 40);
633 b[20] = (in2 >> 48);
634 b[21] = (in3 >> 0);
635 b[22] = (in3 >> 8);
636 b[23] = (in3 >> 16);
637 b[24] = (in3 >> 24);
638 b[25] = (in3 >> 32);
639 b[26] = (in3 >> 40);
640 b[27] = (in3 >> 48);
641 b[28] = (in4 >> 0);
642 b[29] = (in4 >> 8);
643 b[30] = (in4 >> 16);
644 b[31] = (in4 >> 24);
645 b[32] = (in4 >> 32);
646 b[33] = (in4 >> 40);
647 b[34] = (in4 >> 48);
648 b[35] = (in5 >> 0);
649 b[36] = (in5 >> 8);
650 b[37] = (in5 >> 16);
651 b[38] = (in5 >> 24);
652 b[39] = (in5 >> 32);
653 b[40] = (in5 >> 40);
654 b[41] = (in5 >> 48);
655 b[42] = (in6 >> 0);
656 b[43] = (in6 >> 8);
657 b[44] = (in6 >> 16);
658 b[45] = (in6 >> 24);
659 b[46] = (in6 >> 32);
660 b[47] = (in6 >> 40);
661 b[48] = (in6 >> 48);
662 b[49] = (in7 >> 0);
663 b[50] = (in7 >> 8);
664 b[51] = (in7 >> 16);
665 b[52] = (in7 >> 24);
666 b[53] = (in7 >> 32);
667 b[54] = (in7 >> 40);
668 b[55] = (in7 >> 48);
669 }
670
671 /* Set the field element to 0.
672 *
673 * a [in] Field element.
674 */
fe448_1(sword64 * a)675 void fe448_1(sword64* a)
676 {
677 a[0] = 1;
678 a[1] = 0;
679 a[2] = 0;
680 a[3] = 0;
681 a[4] = 0;
682 a[5] = 0;
683 a[6] = 0;
684 a[7] = 0;
685 }
686
687 /* Set the field element to 0.
688 *
689 * a [in] Field element.
690 */
fe448_0(sword64 * a)691 void fe448_0(sword64* a)
692 {
693 a[0] = 0;
694 a[1] = 0;
695 a[2] = 0;
696 a[3] = 0;
697 a[4] = 0;
698 a[5] = 0;
699 a[6] = 0;
700 a[7] = 0;
701 }
702
703 /* Copy one field element into another: d = a.
704 *
705 * d [in] Destination field element.
706 * a [in] Source field element.
707 */
fe448_copy(sword64 * d,const sword64 * a)708 void fe448_copy(sword64* d, const sword64* a)
709 {
710 d[0] = a[0];
711 d[1] = a[1];
712 d[2] = a[2];
713 d[3] = a[3];
714 d[4] = a[4];
715 d[5] = a[5];
716 d[6] = a[6];
717 d[7] = a[7];
718 }
719
720 /* Conditionally swap the elements.
721 * Constant time implementation.
722 *
723 * a [in] First field element.
724 * b [in] Second field element.
725 * c [in] Swap when 1. Valid values: 0, 1.
726 */
fe448_cswap(sword64 * a,sword64 * b,int c)727 static void fe448_cswap(sword64* a, sword64* b, int c)
728 {
729 sword64 mask = -(sword64)c;
730 sword64 t0 = (a[0] ^ b[0]) & mask;
731 sword64 t1 = (a[1] ^ b[1]) & mask;
732 sword64 t2 = (a[2] ^ b[2]) & mask;
733 sword64 t3 = (a[3] ^ b[3]) & mask;
734 sword64 t4 = (a[4] ^ b[4]) & mask;
735 sword64 t5 = (a[5] ^ b[5]) & mask;
736 sword64 t6 = (a[6] ^ b[6]) & mask;
737 sword64 t7 = (a[7] ^ b[7]) & mask;
738 a[0] ^= t0;
739 a[1] ^= t1;
740 a[2] ^= t2;
741 a[3] ^= t3;
742 a[4] ^= t4;
743 a[5] ^= t5;
744 a[6] ^= t6;
745 a[7] ^= t7;
746 b[0] ^= t0;
747 b[1] ^= t1;
748 b[2] ^= t2;
749 b[3] ^= t3;
750 b[4] ^= t4;
751 b[5] ^= t5;
752 b[6] ^= t6;
753 b[7] ^= t7;
754 }
755
756 /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
757 *
758 * r [in] Field element to hold sum.
759 * a [in] Field element to add.
760 * b [in] Field element to add.
761 */
fe448_add(sword64 * r,const sword64 * a,const sword64 * b)762 void fe448_add(sword64* r, const sword64* a, const sword64* b)
763 {
764 r[0] = a[0] + b[0];
765 r[1] = a[1] + b[1];
766 r[2] = a[2] + b[2];
767 r[3] = a[3] + b[3];
768 r[4] = a[4] + b[4];
769 r[5] = a[5] + b[5];
770 r[6] = a[6] + b[6];
771 r[7] = a[7] + b[7];
772 }
773
774 /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
775 *
776 * r [in] Field element to hold difference.
777 * a [in] Field element to subtract from.
778 * b [in] Field element to subtract.
779 */
fe448_sub(sword64 * r,const sword64 * a,const sword64 * b)780 void fe448_sub(sword64* r, const sword64* a, const sword64* b)
781 {
782 r[0] = a[0] - b[0];
783 r[1] = a[1] - b[1];
784 r[2] = a[2] - b[2];
785 r[3] = a[3] - b[3];
786 r[4] = a[4] - b[4];
787 r[5] = a[5] - b[5];
788 r[6] = a[6] - b[6];
789 r[7] = a[7] - b[7];
790 }
791
792 /* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
793 *
794 * r [in] Field element to hold result.
795 * a [in] Field element to multiply.
796 */
fe448_mul39081(sword64 * r,const sword64 * a)797 void fe448_mul39081(sword64* r, const sword64* a)
798 {
799 sword128 t;
800 sword64 o;
801 sword128 t0 = a[0] * (sword128)39081;
802 sword128 t1 = a[1] * (sword128)39081;
803 sword128 t2 = a[2] * (sword128)39081;
804 sword128 t3 = a[3] * (sword128)39081;
805 sword128 t4 = a[4] * (sword128)39081;
806 sword128 t5 = a[5] * (sword128)39081;
807 sword128 t6 = a[6] * (sword128)39081;
808 sword128 t7 = a[7] * (sword128)39081;
809 o = (sword64)(t0 >> 56); t1 += o; t = (sword128)o << 56; t0 -= t;
810 o = (sword64)(t1 >> 56); t2 += o; t = (sword128)o << 56; t1 -= t;
811 o = (sword64)(t2 >> 56); t3 += o; t = (sword128)o << 56; t2 -= t;
812 o = (sword64)(t3 >> 56); t4 += o; t = (sword128)o << 56; t3 -= t;
813 o = (sword64)(t4 >> 56); t5 += o; t = (sword128)o << 56; t4 -= t;
814 o = (sword64)(t5 >> 56); t6 += o; t = (sword128)o << 56; t5 -= t;
815 o = (sword64)(t6 >> 56); t7 += o; t = (sword128)o << 56; t6 -= t;
816 o = (sword64)(t7 >> 56); t0 += o;
817 t4 += o; t = (sword128)o << 56; t7 -= t;
818
819 /* Store */
820 r[0] = (sword64)t0;
821 r[1] = (sword64)t1;
822 r[2] = (sword64)t2;
823 r[3] = (sword64)t3;
824 r[4] = (sword64)t4;
825 r[5] = (sword64)t5;
826 r[6] = (sword64)t6;
827 r[7] = (sword64)t7;
828 }
829
830 /* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
831 *
832 * r [in] Field element to hold result.
833 * a [in] Field element to multiply.
834 * b [in] Field element to multiply.
835 */
fe448_mul(sword64 * r,const sword64 * a,const sword64 * b)836 void fe448_mul(sword64* r, const sword64* a, const sword64* b)
837 {
838 sword128 t;
839 sword64 o;
840 sword128 t0 = (sword128)a[ 0] * b[ 0];
841 sword128 t1 = (sword128)a[ 0] * b[ 1];
842 sword128 t101 = (sword128)a[ 1] * b[ 0];
843 sword128 t2 = (sword128)a[ 0] * b[ 2];
844 sword128 t102 = (sword128)a[ 1] * b[ 1];
845 sword128 t202 = (sword128)a[ 2] * b[ 0];
846 sword128 t3 = (sword128)a[ 0] * b[ 3];
847 sword128 t103 = (sword128)a[ 1] * b[ 2];
848 sword128 t203 = (sword128)a[ 2] * b[ 1];
849 sword128 t303 = (sword128)a[ 3] * b[ 0];
850 sword128 t4 = (sword128)a[ 0] * b[ 4];
851 sword128 t104 = (sword128)a[ 1] * b[ 3];
852 sword128 t204 = (sword128)a[ 2] * b[ 2];
853 sword128 t304 = (sword128)a[ 3] * b[ 1];
854 sword128 t404 = (sword128)a[ 4] * b[ 0];
855 sword128 t5 = (sword128)a[ 0] * b[ 5];
856 sword128 t105 = (sword128)a[ 1] * b[ 4];
857 sword128 t205 = (sword128)a[ 2] * b[ 3];
858 sword128 t305 = (sword128)a[ 3] * b[ 2];
859 sword128 t405 = (sword128)a[ 4] * b[ 1];
860 sword128 t505 = (sword128)a[ 5] * b[ 0];
861 sword128 t6 = (sword128)a[ 0] * b[ 6];
862 sword128 t106 = (sword128)a[ 1] * b[ 5];
863 sword128 t206 = (sword128)a[ 2] * b[ 4];
864 sword128 t306 = (sword128)a[ 3] * b[ 3];
865 sword128 t406 = (sword128)a[ 4] * b[ 2];
866 sword128 t506 = (sword128)a[ 5] * b[ 1];
867 sword128 t606 = (sword128)a[ 6] * b[ 0];
868 sword128 t7 = (sword128)a[ 0] * b[ 7];
869 sword128 t107 = (sword128)a[ 1] * b[ 6];
870 sword128 t207 = (sword128)a[ 2] * b[ 5];
871 sword128 t307 = (sword128)a[ 3] * b[ 4];
872 sword128 t407 = (sword128)a[ 4] * b[ 3];
873 sword128 t507 = (sword128)a[ 5] * b[ 2];
874 sword128 t607 = (sword128)a[ 6] * b[ 1];
875 sword128 t707 = (sword128)a[ 7] * b[ 0];
876 sword128 t8 = (sword128)a[ 1] * b[ 7];
877 sword128 t108 = (sword128)a[ 2] * b[ 6];
878 sword128 t208 = (sword128)a[ 3] * b[ 5];
879 sword128 t308 = (sword128)a[ 4] * b[ 4];
880 sword128 t408 = (sword128)a[ 5] * b[ 3];
881 sword128 t508 = (sword128)a[ 6] * b[ 2];
882 sword128 t608 = (sword128)a[ 7] * b[ 1];
883 sword128 t9 = (sword128)a[ 2] * b[ 7];
884 sword128 t109 = (sword128)a[ 3] * b[ 6];
885 sword128 t209 = (sword128)a[ 4] * b[ 5];
886 sword128 t309 = (sword128)a[ 5] * b[ 4];
887 sword128 t409 = (sword128)a[ 6] * b[ 3];
888 sword128 t509 = (sword128)a[ 7] * b[ 2];
889 sword128 t10 = (sword128)a[ 3] * b[ 7];
890 sword128 t110 = (sword128)a[ 4] * b[ 6];
891 sword128 t210 = (sword128)a[ 5] * b[ 5];
892 sword128 t310 = (sword128)a[ 6] * b[ 4];
893 sword128 t410 = (sword128)a[ 7] * b[ 3];
894 sword128 t11 = (sword128)a[ 4] * b[ 7];
895 sword128 t111 = (sword128)a[ 5] * b[ 6];
896 sword128 t211 = (sword128)a[ 6] * b[ 5];
897 sword128 t311 = (sword128)a[ 7] * b[ 4];
898 sword128 t12 = (sword128)a[ 5] * b[ 7];
899 sword128 t112 = (sword128)a[ 6] * b[ 6];
900 sword128 t212 = (sword128)a[ 7] * b[ 5];
901 sword128 t13 = (sword128)a[ 6] * b[ 7];
902 sword128 t113 = (sword128)a[ 7] * b[ 6];
903 sword128 t14 = (sword128)a[ 7] * b[ 7];
904 t1 += t101;
905 t2 += t102; t2 += t202;
906 t3 += t103; t3 += t203; t3 += t303;
907 t4 += t104; t4 += t204; t4 += t304; t4 += t404;
908 t5 += t105; t5 += t205; t5 += t305; t5 += t405; t5 += t505;
909 t6 += t106; t6 += t206; t6 += t306; t6 += t406; t6 += t506;
910 t6 += t606;
911 t7 += t107; t7 += t207; t7 += t307; t7 += t407; t7 += t507;
912 t7 += t607;
913 t7 += t707;
914 t8 += t108; t8 += t208; t8 += t308; t8 += t408; t8 += t508;
915 t8 += t608;
916 t9 += t109; t9 += t209; t9 += t309; t9 += t409; t9 += t509;
917 t10 += t110; t10 += t210; t10 += t310; t10 += t410;
918 t11 += t111; t11 += t211; t11 += t311;
919 t12 += t112; t12 += t212;
920 t13 += t113;
921
922 /* Reduce */
923 t0 += t8 + t12;
924 t1 += t9 + t13;
925 t2 += t10 + t14;
926 t3 += t11;
927 t4 += t12 + t8 + t12;
928 t5 += t13 + t9 + t13;
929 t6 += t14 + t10 + t14;
930 t7 += t11;
931 o = t7 >> 56; t0 += o;
932 t4 += o; t = (sword128)o << 56; t7 -= t;
933 o = (sword64)(t0 >> 56); t1 += o; t = (sword128)o << 56; t0 -= t;
934 o = (sword64)(t1 >> 56); t2 += o; t = (sword128)o << 56; t1 -= t;
935 o = (sword64)(t2 >> 56); t3 += o; t = (sword128)o << 56; t2 -= t;
936 o = (sword64)(t3 >> 56); t4 += o; t = (sword128)o << 56; t3 -= t;
937 o = (sword64)(t4 >> 56); t5 += o; t = (sword128)o << 56; t4 -= t;
938 o = (sword64)(t5 >> 56); t6 += o; t = (sword128)o << 56; t5 -= t;
939 o = (sword64)(t6 >> 56); t7 += o; t = (sword128)o << 56; t6 -= t;
940 o = (sword64)(t7 >> 56); t0 += o;
941 t4 += o; t = (sword128)o << 56; t7 -= t;
942
943 /* Store */
944 r[0] = (sword64)t0;
945 r[1] = (sword64)t1;
946 r[2] = (sword64)t2;
947 r[3] = (sword64)t3;
948 r[4] = (sword64)t4;
949 r[5] = (sword64)t5;
950 r[6] = (sword64)t6;
951 r[7] = (sword64)t7;
952 }
953
954 /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
955 *
956 * r [in] Field element to hold result.
957 * a [in] Field element to square.
958 */
fe448_sqr(sword64 * r,const sword64 * a)959 void fe448_sqr(sword64* r, const sword64* a)
960 {
961 sword128 t;
962 sword64 o;
963 sword128 t0 = (sword128)a[ 0] * a[ 0];
964 sword128 t1 = 2 * (sword128)a[ 0] * a[ 1];
965 sword128 t2 = 2 * (sword128)a[ 0] * a[ 2];
966 sword128 t102 = (sword128)a[ 1] * a[ 1];
967 sword128 t3 = 2 * (sword128)a[ 0] * a[ 3];
968 sword128 t103 = 2 * (sword128)a[ 1] * a[ 2];
969 sword128 t4 = 2 * (sword128)a[ 0] * a[ 4];
970 sword128 t104 = 2 * (sword128)a[ 1] * a[ 3];
971 sword128 t204 = (sword128)a[ 2] * a[ 2];
972 sword128 t5 = 2 * (sword128)a[ 0] * a[ 5];
973 sword128 t105 = 2 * (sword128)a[ 1] * a[ 4];
974 sword128 t205 = 2 * (sword128)a[ 2] * a[ 3];
975 sword128 t6 = 2 * (sword128)a[ 0] * a[ 6];
976 sword128 t106 = 2 * (sword128)a[ 1] * a[ 5];
977 sword128 t206 = 2 * (sword128)a[ 2] * a[ 4];
978 sword128 t306 = (sword128)a[ 3] * a[ 3];
979 sword128 t7 = 2 * (sword128)a[ 0] * a[ 7];
980 sword128 t107 = 2 * (sword128)a[ 1] * a[ 6];
981 sword128 t207 = 2 * (sword128)a[ 2] * a[ 5];
982 sword128 t307 = 2 * (sword128)a[ 3] * a[ 4];
983 sword128 t8 = 2 * (sword128)a[ 1] * a[ 7];
984 sword128 t108 = 2 * (sword128)a[ 2] * a[ 6];
985 sword128 t208 = 2 * (sword128)a[ 3] * a[ 5];
986 sword128 t308 = (sword128)a[ 4] * a[ 4];
987 sword128 t9 = 2 * (sword128)a[ 2] * a[ 7];
988 sword128 t109 = 2 * (sword128)a[ 3] * a[ 6];
989 sword128 t209 = 2 * (sword128)a[ 4] * a[ 5];
990 sword128 t10 = 2 * (sword128)a[ 3] * a[ 7];
991 sword128 t110 = 2 * (sword128)a[ 4] * a[ 6];
992 sword128 t210 = (sword128)a[ 5] * a[ 5];
993 sword128 t11 = 2 * (sword128)a[ 4] * a[ 7];
994 sword128 t111 = 2 * (sword128)a[ 5] * a[ 6];
995 sword128 t12 = 2 * (sword128)a[ 5] * a[ 7];
996 sword128 t112 = (sword128)a[ 6] * a[ 6];
997 sword128 t13 = 2 * (sword128)a[ 6] * a[ 7];
998 sword128 t14 = (sword128)a[ 7] * a[ 7];
999 t2 += t102;
1000 t3 += t103;
1001 t4 += t104; t4 += t204;
1002 t5 += t105; t5 += t205;
1003 t6 += t106; t6 += t206; t6 += t306;
1004 t7 += t107; t7 += t207; t7 += t307;
1005 t8 += t108; t8 += t208; t8 += t308;
1006 t9 += t109; t9 += t209;
1007 t10 += t110; t10 += t210;
1008 t11 += t111;
1009 t12 += t112;
1010
1011 /* Reduce */
1012 t0 += t8 + t12;
1013 t1 += t9 + t13;
1014 t2 += t10 + t14;
1015 t3 += t11;
1016 t4 += t12 + t8 + t12;
1017 t5 += t13 + t9 + t13;
1018 t6 += t14 + t10 + t14;
1019 t7 += t11;
1020 o = t7 >> 56; t0 += o;
1021 t4 += o; t = (sword128)o << 56; t7 -= t;
1022 o = (sword64)(t0 >> 56); t1 += o; t = (sword128)o << 56; t0 -= t;
1023 o = (sword64)(t1 >> 56); t2 += o; t = (sword128)o << 56; t1 -= t;
1024 o = (sword64)(t2 >> 56); t3 += o; t = (sword128)o << 56; t2 -= t;
1025 o = (sword64)(t3 >> 56); t4 += o; t = (sword128)o << 56; t3 -= t;
1026 o = (sword64)(t4 >> 56); t5 += o; t = (sword128)o << 56; t4 -= t;
1027 o = (sword64)(t5 >> 56); t6 += o; t = (sword128)o << 56; t5 -= t;
1028 o = (sword64)(t6 >> 56); t7 += o; t = (sword128)o << 56; t6 -= t;
1029 o = (sword64)(t7 >> 56); t0 += o;
1030 t4 += o; t = (sword128)o << 56; t7 -= t;
1031
1032 /* Store */
1033 r[0] = (sword64)t0;
1034 r[1] = (sword64)t1;
1035 r[2] = (sword64)t2;
1036 r[3] = (sword64)t3;
1037 r[4] = (sword64)t4;
1038 r[5] = (sword64)t5;
1039 r[6] = (sword64)t6;
1040 r[7] = (sword64)t7;
1041 }
1042
1043 /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
1044 * Constant time implementation - using Fermat's little theorem:
1045 * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
1046 * For curve448: p - 2 = 2^448 - 2^224 - 3
1047 *
1048 * r [in] Field element to hold result.
1049 * a [in] Field element to invert.
1050 */
fe448_invert(sword64 * r,const sword64 * a)1051 void fe448_invert(sword64* r, const sword64* a)
1052 {
1053 sword64 t1[8];
1054 sword64 t2[8];
1055 sword64 t3[8];
1056 sword64 t4[8];
1057 int i;
1058
1059 fe448_sqr(t1, a);
1060 /* t1 = 2 */
1061 fe448_mul(t1, t1, a);
1062 /* t1 = 3 */
1063 fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1064 /* t2 = c */
1065 fe448_mul(t3, t2, a);
1066 /* t3 = d */
1067 fe448_mul(t1, t2, t1);
1068 /* t1 = f */
1069 fe448_sqr(t2, t1);
1070 /* t2 = 1e */
1071 fe448_mul(t4, t2, a);
1072 /* t4 = 1f */
1073 fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1074 /* t2 = 3e0 */
1075 fe448_mul(t1, t2, t4);
1076 /* t1 = 3ff */
1077 fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
1078 /* t2 = ffc00 */
1079 fe448_mul(t1, t2, t1);
1080 /* t1 = fffff */
1081 fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1082 /* t2 = 1ffffe0 */
1083 fe448_mul(t1, t2, t4);
1084 /* t1 = 1ffffff */
1085 fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
1086 /* t2 = 3fffffe000000 */
1087 fe448_mul(t1, t2, t1);
1088 /* t1 = 3ffffffffffff */
1089 fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1090 /* t2 = 7fffffffffffe0 */
1091 fe448_mul(t1, t2, t4);
1092 /* t1 = 7fffffffffffff */
1093 fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
1094 /* t2 = 3fffffffffffff80000000000000 */
1095 fe448_mul(t1, t2, t1);
1096 /* t1 = 3fffffffffffffffffffffffffff */
1097 fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
1098 /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
1099 fe448_mul(t1, t2, t1);
1100 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1101 fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1102 /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
1103 fe448_mul(t3, t3, t2);
1104 /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1105 fe448_mul(t1, t3, a);
1106 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1107 fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
1108 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
1109 fe448_mul(r, t3, t1);
1110 /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1111 }
1112
1113 /* Scalar multiply the point by a number. r = n.a
1114 * Uses Montgomery ladder and only requires the x-ordinate.
1115 *
1116 * r [in] Field element to hold result.
1117 * n [in] Scalar as an array of bytes.
1118 * a [in] Point to multiply - x-ordinate only.
1119 */
curve448(byte * r,const byte * n,const byte * a)1120 int curve448(byte* r, const byte* n, const byte* a)
1121 {
1122 sword64 x1[8];
1123 sword64 x2[8];
1124 sword64 z2[8];
1125 sword64 x3[8];
1126 sword64 z3[8];
1127 sword64 t0[8];
1128 sword64 t1[8];
1129 int i;
1130 unsigned int swap;
1131 unsigned int b;
1132
1133 fe448_from_bytes(x1, a);
1134 fe448_1(x2);
1135 fe448_0(z2);
1136 fe448_copy(x3, x1);
1137 fe448_1(z3);
1138
1139 swap = 0;
1140 for (i = 447; i >= 0; --i) {
1141 b = (n[i >> 3] >> (i & 7)) & 1;
1142 swap ^= b;
1143 fe448_cswap(x2, x3, swap);
1144 fe448_cswap(z2, z3, swap);
1145 swap = b;
1146
1147 /* Montgomery Ladder - double and add */
1148 fe448_add(t0, x2, z2);
1149 fe448_reduce(t0);
1150 fe448_add(t1, x3, z3);
1151 fe448_reduce(t1);
1152 fe448_sub(x2, x2, z2);
1153 fe448_sub(x3, x3, z3);
1154 fe448_mul(t1, t1, x2);
1155 fe448_mul(z3, x3, t0);
1156 fe448_sqr(t0, t0);
1157 fe448_sqr(x2, x2);
1158 fe448_add(x3, z3, t1);
1159 fe448_reduce(x3);
1160 fe448_sqr(x3, x3);
1161 fe448_sub(z3, z3, t1);
1162 fe448_sqr(z3, z3);
1163 fe448_mul(z3, z3, x1);
1164 fe448_sub(t1, t0, x2);
1165 fe448_mul(x2, t0, x2);
1166 fe448_mul39081(z2, t1);
1167 fe448_add(z2, t0, z2);
1168 fe448_mul(z2, z2, t1);
1169 }
1170 /* Last two bits are 0 - no final swap check required. */
1171
1172 fe448_invert(z2, z2);
1173 fe448_mul(x2, x2, z2);
1174 fe448_to_bytes(r, x2);
1175
1176 return 0;
1177 }
1178
1179 #ifdef HAVE_ED448
1180 /* Check whether field element is not 0.
1181 * Must convert to a normalized form before checking.
1182 *
1183 * a [in] Field element.
1184 * returns 0 when zero, and any other value otherwise.
1185 */
fe448_isnonzero(const sword64 * a)1186 int fe448_isnonzero(const sword64* a)
1187 {
1188 byte b[56];
1189 int i;
1190 byte c = 0;
1191 fe448_to_bytes(b, a);
1192 for (i = 0; i < 56; i++)
1193 c |= b[i];
1194 return c;
1195 }
1196
1197 /* Check whether field element is negative.
1198 * Must convert to a normalized form before checking.
1199 *
1200 * a [in] Field element.
1201 * returns 1 when negative, and 0 otherwise.
1202 */
fe448_isnegative(const sword64 * a)1203 int fe448_isnegative(const sword64* a)
1204 {
1205 byte b[56];
1206 fe448_to_bytes(b, a);
1207 return b[0] & 1;
1208 }
1209
1210 /* Negates the field element. r = -a
1211 *
1212 * r [in] Field element to hold result.
1213 * a [in] Field element.
1214 */
fe448_neg(sword64 * r,const sword64 * a)1215 void fe448_neg(sword64* r, const sword64* a)
1216 {
1217 r[0] = -a[0];
1218 r[1] = -a[1];
1219 r[2] = -a[2];
1220 r[3] = -a[3];
1221 r[4] = -a[4];
1222 r[5] = -a[5];
1223 r[6] = -a[6];
1224 r[7] = -a[7];
1225 }
1226
1227 /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
1228 * Used for calcualting y-ordinate from x-ordinate for Ed448.
1229 *
1230 * r [in] Field element to hold result.
1231 * a [in] Field element to exponentiate.
1232 */
fe448_pow_2_446_222_1(sword64 * r,const sword64 * a)1233 void fe448_pow_2_446_222_1(sword64* r, const sword64* a)
1234 {
1235 sword64 t1[8];
1236 sword64 t2[8];
1237 sword64 t3[8];
1238 sword64 t4[8];
1239 sword64 t5[8];
1240 int i;
1241
1242 fe448_sqr(t3, a);
1243 /* t3 = 2 */
1244 fe448_mul(t1, t3, a);
1245 /* t1 = 3 */
1246 fe448_sqr(t5, t1);
1247 /* t5 = 6 */
1248 fe448_mul(t5, t5, a);
1249 /* t5 = 7 */
1250 fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1251 /* t2 = c */
1252 fe448_mul(t3, t2, t3);
1253 /* t3 = e */
1254 fe448_mul(t1, t2, t1);
1255 /* t1 = f */
1256 fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
1257 /* t2 = 78 */
1258 fe448_mul(t5, t2, t5);
1259 /* t5 = 7f */
1260 fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1261 /* t2 = f0 */
1262 fe448_mul(t1, t2, t1);
1263 /* t1 = ff */
1264 fe448_mul(t3, t3, t2);
1265 /* t3 = fe */
1266 fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
1267 /* t2 = 7f80 */
1268 fe448_mul(t5, t2, t5);
1269 /* t5 = 7fff */
1270 fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
1271 /* t2 = ff00 */
1272 fe448_mul(t1, t2, t1);
1273 /* t1 = ffff */
1274 fe448_mul(t3, t3, t2);
1275 /* t3 = fffe */
1276 fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
1277 /* t2 = 3fff8000 */
1278 fe448_mul(t5, t2, t5);
1279 /* t5 = 3fffffff */
1280 fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
1281 /* t2 = ffff0000 */
1282 fe448_mul(t1, t2, t1);
1283 /* t1 = ffffffff */
1284 fe448_mul(t3, t3, t2);
1285 /* t3 = fffffffe */
1286 fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1287 /* t2 = ffffffff00000000 */
1288 fe448_mul(t2, t2, t1);
1289 /* t2 = ffffffffffffffff */
1290 fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1291 /* t1 = ffffffffffffffff0000000000000000 */
1292 fe448_mul(t1, t1, t2);
1293 /* t1 = ffffffffffffffffffffffffffffffff */
1294 fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1295 /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
1296 fe448_mul(t4, t1, t2);
1297 /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
1298 fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1299 /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
1300 fe448_mul(t3, t3, t2);
1301 /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1302 fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
1303 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
1304 fe448_mul(t1, t1, t4);
1305 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
1306 fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
1307 /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
1308 fe448_mul(r, t5, t1);
1309 /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1310 }
1311
1312 /* Constant time, conditional move of b into a.
1313 * a is not changed if the condition is 0.
1314 *
1315 * a A field element.
1316 * b A field element.
1317 * c If 1 then copy and if 0 then don't copy.
1318 */
fe448_cmov(sword64 * a,const sword64 * b,int c)1319 void fe448_cmov(sword64* a, const sword64* b, int c)
1320 {
1321 sword64 m = -(sword64)c;
1322 sword64 t0 = m & (a[0] ^ b[0]);
1323 sword64 t1 = m & (a[1] ^ b[1]);
1324 sword64 t2 = m & (a[2] ^ b[2]);
1325 sword64 t3 = m & (a[3] ^ b[3]);
1326 sword64 t4 = m & (a[4] ^ b[4]);
1327 sword64 t5 = m & (a[5] ^ b[5]);
1328 sword64 t6 = m & (a[6] ^ b[6]);
1329 sword64 t7 = m & (a[7] ^ b[7]);
1330
1331 a[0] ^= t0;
1332 a[1] ^= t1;
1333 a[2] ^= t2;
1334 a[3] ^= t3;
1335 a[4] ^= t4;
1336 a[5] ^= t5;
1337 a[6] ^= t6;
1338 a[7] ^= t7;
1339 }
1340
1341 #endif /* HAVE_ED448 */
1342 #else
1343
1344 /* Initialize the field element operations.
1345 */
fe448_init(void)1346 void fe448_init(void)
1347 {
1348 }
1349
1350 /* Convert the field element from a byte array to an array of 28-bits.
1351 *
1352 * r [in] Array to encode into.
1353 * b [in] Byte array.
1354 */
fe448_from_bytes(sword32 * r,const unsigned char * b)1355 void fe448_from_bytes(sword32* r, const unsigned char* b)
1356 {
1357 r[ 0] = (((sword32)((b[ 0] ) >> 0)) << 0)
1358 | (((sword32)((b[ 1] ) >> 0)) << 8)
1359 | (((sword32)((b[ 2] ) >> 0)) << 16)
1360 | ((((sword32)((b[ 3] & 0xf )) >> 0)) << 24);
1361 r[ 1] = (((sword32)((b[ 3] ) >> 4)) << 0)
1362 | (((sword32)((b[ 4] ) >> 0)) << 4)
1363 | (((sword32)((b[ 5] ) >> 0)) << 12)
1364 | (((sword32)((b[ 6] ) >> 0)) << 20);
1365 r[ 2] = (((sword32)((b[ 7] ) >> 0)) << 0)
1366 | (((sword32)((b[ 8] ) >> 0)) << 8)
1367 | (((sword32)((b[ 9] ) >> 0)) << 16)
1368 | ((((sword32)((b[10] & 0xf )) >> 0)) << 24);
1369 r[ 3] = (((sword32)((b[10] ) >> 4)) << 0)
1370 | (((sword32)((b[11] ) >> 0)) << 4)
1371 | (((sword32)((b[12] ) >> 0)) << 12)
1372 | (((sword32)((b[13] ) >> 0)) << 20);
1373 r[ 4] = (((sword32)((b[14] ) >> 0)) << 0)
1374 | (((sword32)((b[15] ) >> 0)) << 8)
1375 | (((sword32)((b[16] ) >> 0)) << 16)
1376 | ((((sword32)((b[17] & 0xf )) >> 0)) << 24);
1377 r[ 5] = (((sword32)((b[17] ) >> 4)) << 0)
1378 | (((sword32)((b[18] ) >> 0)) << 4)
1379 | (((sword32)((b[19] ) >> 0)) << 12)
1380 | (((sword32)((b[20] ) >> 0)) << 20);
1381 r[ 6] = (((sword32)((b[21] ) >> 0)) << 0)
1382 | (((sword32)((b[22] ) >> 0)) << 8)
1383 | (((sword32)((b[23] ) >> 0)) << 16)
1384 | ((((sword32)((b[24] & 0xf )) >> 0)) << 24);
1385 r[ 7] = (((sword32)((b[24] ) >> 4)) << 0)
1386 | (((sword32)((b[25] ) >> 0)) << 4)
1387 | (((sword32)((b[26] ) >> 0)) << 12)
1388 | (((sword32)((b[27] ) >> 0)) << 20);
1389 r[ 8] = (((sword32)((b[28] ) >> 0)) << 0)
1390 | (((sword32)((b[29] ) >> 0)) << 8)
1391 | (((sword32)((b[30] ) >> 0)) << 16)
1392 | ((((sword32)((b[31] & 0xf )) >> 0)) << 24);
1393 r[ 9] = (((sword32)((b[31] ) >> 4)) << 0)
1394 | (((sword32)((b[32] ) >> 0)) << 4)
1395 | (((sword32)((b[33] ) >> 0)) << 12)
1396 | (((sword32)((b[34] ) >> 0)) << 20);
1397 r[10] = (((sword32)((b[35] ) >> 0)) << 0)
1398 | (((sword32)((b[36] ) >> 0)) << 8)
1399 | (((sword32)((b[37] ) >> 0)) << 16)
1400 | ((((sword32)((b[38] & 0xf )) >> 0)) << 24);
1401 r[11] = (((sword32)((b[38] ) >> 4)) << 0)
1402 | (((sword32)((b[39] ) >> 0)) << 4)
1403 | (((sword32)((b[40] ) >> 0)) << 12)
1404 | (((sword32)((b[41] ) >> 0)) << 20);
1405 r[12] = (((sword32)((b[42] ) >> 0)) << 0)
1406 | (((sword32)((b[43] ) >> 0)) << 8)
1407 | (((sword32)((b[44] ) >> 0)) << 16)
1408 | ((((sword32)((b[45] & 0xf )) >> 0)) << 24);
1409 r[13] = (((sword32)((b[45] ) >> 4)) << 0)
1410 | (((sword32)((b[46] ) >> 0)) << 4)
1411 | (((sword32)((b[47] ) >> 0)) << 12)
1412 | (((sword32)((b[48] ) >> 0)) << 20);
1413 r[14] = (((sword32)((b[49] ) >> 0)) << 0)
1414 | (((sword32)((b[50] ) >> 0)) << 8)
1415 | (((sword32)((b[51] ) >> 0)) << 16)
1416 | ((((sword32)((b[52] & 0xf )) >> 0)) << 24);
1417 r[15] = (((sword32)((b[52] ) >> 4)) << 0)
1418 | (((sword32)((b[53] ) >> 0)) << 4)
1419 | (((sword32)((b[54] ) >> 0)) << 12)
1420 | (((sword32)((b[55] ) >> 0)) << 20);
1421 }
1422
1423 /* Convert the field element to a byte array from an array of 28-bits.
1424 *
1425 * b [in] Byte array.
1426 * a [in] Array to encode into.
1427 */
fe448_to_bytes(unsigned char * b,const sword32 * a)1428 void fe448_to_bytes(unsigned char* b, const sword32* a)
1429 {
1430 sword64 t;
1431 /* Mod */
1432 sword32 in0 = a[0];
1433 sword32 in1 = a[1];
1434 sword32 in2 = a[2];
1435 sword32 in3 = a[3];
1436 sword32 in4 = a[4];
1437 sword32 in5 = a[5];
1438 sword32 in6 = a[6];
1439 sword32 in7 = a[7];
1440 sword32 in8 = a[8];
1441 sword32 in9 = a[9];
1442 sword32 in10 = a[10];
1443 sword32 in11 = a[11];
1444 sword32 in12 = a[12];
1445 sword32 in13 = a[13];
1446 sword32 in14 = a[14];
1447 sword32 in15 = a[15];
1448 sword32 o = in15 >> 28;
1449 in15 -= o << 28;
1450 in0 += o;
1451 in8 += o;
1452 o = (in0 + 1) >> 28;
1453 o = (o + in1) >> 28;
1454 o = (o + in2) >> 28;
1455 o = (o + in3) >> 28;
1456 o = (o + in4) >> 28;
1457 o = (o + in5) >> 28;
1458 o = (o + in6) >> 28;
1459 o = (o + in7) >> 28;
1460 o = (o + in8 + 1) >> 28;
1461 o = (o + in9) >> 28;
1462 o = (o + in10) >> 28;
1463 o = (o + in11) >> 28;
1464 o = (o + in12) >> 28;
1465 o = (o + in13) >> 28;
1466 o = (o + in14) >> 28;
1467 o = (o + in15) >> 28;
1468 in0 += o;
1469 in8 += o;
1470 in15 -= o << 28;
1471 o = (in0 >> 28); in1 += o; t = o << 28; in0 -= (sword32)t;
1472 o = (in1 >> 28); in2 += o; t = o << 28; in1 -= (sword32)t;
1473 o = (in2 >> 28); in3 += o; t = o << 28; in2 -= (sword32)t;
1474 o = (in3 >> 28); in4 += o; t = o << 28; in3 -= (sword32)t;
1475 o = (in4 >> 28); in5 += o; t = o << 28; in4 -= (sword32)t;
1476 o = (in5 >> 28); in6 += o; t = o << 28; in5 -= (sword32)t;
1477 o = (in6 >> 28); in7 += o; t = o << 28; in6 -= (sword32)t;
1478 o = (in7 >> 28); in8 += o; t = o << 28; in7 -= (sword32)t;
1479 o = (in8 >> 28); in9 += o; t = o << 28; in8 -= (sword32)t;
1480 o = (in9 >> 28); in10 += o; t = o << 28; in9 -= (sword32)t;
1481 o = (in10 >> 28); in11 += o; t = o << 28; in10 -= (sword32)t;
1482 o = (in11 >> 28); in12 += o; t = o << 28; in11 -= (sword32)t;
1483 o = (in12 >> 28); in13 += o; t = o << 28; in12 -= (sword32)t;
1484 o = (in13 >> 28); in14 += o; t = o << 28; in13 -= (sword32)t;
1485 o = (in14 >> 28); in15 += o; t = o << 28; in14 -= (sword32)t;
1486 o = (in15 >> 28); in0 += o;
1487 in8 += o; t = o << 28; in15 -= (sword32)t;
1488
1489 /* Output as bytes */
1490 b[ 0] = (in0 >> 0);
1491 b[ 1] = (in0 >> 8);
1492 b[ 2] = (in0 >> 16);
1493 b[ 3] = (in0 >> 24) + ((in1 >> 0) << 4);
1494 b[ 4] = (in1 >> 4);
1495 b[ 5] = (in1 >> 12);
1496 b[ 6] = (in1 >> 20);
1497 b[ 7] = (in2 >> 0);
1498 b[ 8] = (in2 >> 8);
1499 b[ 9] = (in2 >> 16);
1500 b[10] = (in2 >> 24) + ((in3 >> 0) << 4);
1501 b[11] = (in3 >> 4);
1502 b[12] = (in3 >> 12);
1503 b[13] = (in3 >> 20);
1504 b[14] = (in4 >> 0);
1505 b[15] = (in4 >> 8);
1506 b[16] = (in4 >> 16);
1507 b[17] = (in4 >> 24) + ((in5 >> 0) << 4);
1508 b[18] = (in5 >> 4);
1509 b[19] = (in5 >> 12);
1510 b[20] = (in5 >> 20);
1511 b[21] = (in6 >> 0);
1512 b[22] = (in6 >> 8);
1513 b[23] = (in6 >> 16);
1514 b[24] = (in6 >> 24) + ((in7 >> 0) << 4);
1515 b[25] = (in7 >> 4);
1516 b[26] = (in7 >> 12);
1517 b[27] = (in7 >> 20);
1518 b[28] = (in8 >> 0);
1519 b[29] = (in8 >> 8);
1520 b[30] = (in8 >> 16);
1521 b[31] = (in8 >> 24) + ((in9 >> 0) << 4);
1522 b[32] = (in9 >> 4);
1523 b[33] = (in9 >> 12);
1524 b[34] = (in9 >> 20);
1525 b[35] = (in10 >> 0);
1526 b[36] = (in10 >> 8);
1527 b[37] = (in10 >> 16);
1528 b[38] = (in10 >> 24) + ((in11 >> 0) << 4);
1529 b[39] = (in11 >> 4);
1530 b[40] = (in11 >> 12);
1531 b[41] = (in11 >> 20);
1532 b[42] = (in12 >> 0);
1533 b[43] = (in12 >> 8);
1534 b[44] = (in12 >> 16);
1535 b[45] = (in12 >> 24) + ((in13 >> 0) << 4);
1536 b[46] = (in13 >> 4);
1537 b[47] = (in13 >> 12);
1538 b[48] = (in13 >> 20);
1539 b[49] = (in14 >> 0);
1540 b[50] = (in14 >> 8);
1541 b[51] = (in14 >> 16);
1542 b[52] = (in14 >> 24) + ((in15 >> 0) << 4);
1543 b[53] = (in15 >> 4);
1544 b[54] = (in15 >> 12);
1545 b[55] = (in15 >> 20);
1546 }
1547
1548 /* Set the field element to 0.
1549 *
1550 * a [in] Field element.
1551 */
fe448_1(sword32 * a)1552 void fe448_1(sword32* a)
1553 {
1554 a[0] = 1;
1555 a[1] = 0;
1556 a[2] = 0;
1557 a[3] = 0;
1558 a[4] = 0;
1559 a[5] = 0;
1560 a[6] = 0;
1561 a[7] = 0;
1562 a[8] = 0;
1563 a[9] = 0;
1564 a[10] = 0;
1565 a[11] = 0;
1566 a[12] = 0;
1567 a[13] = 0;
1568 a[14] = 0;
1569 a[15] = 0;
1570 }
1571
1572 /* Set the field element to 0.
1573 *
1574 * a [in] Field element.
1575 */
fe448_0(sword32 * a)1576 void fe448_0(sword32* a)
1577 {
1578 a[0] = 0;
1579 a[1] = 0;
1580 a[2] = 0;
1581 a[3] = 0;
1582 a[4] = 0;
1583 a[5] = 0;
1584 a[6] = 0;
1585 a[7] = 0;
1586 a[8] = 0;
1587 a[9] = 0;
1588 a[10] = 0;
1589 a[11] = 0;
1590 a[12] = 0;
1591 a[13] = 0;
1592 a[14] = 0;
1593 a[15] = 0;
1594 }
1595
1596 /* Copy one field element into another: d = a.
1597 *
1598 * d [in] Destination field element.
1599 * a [in] Source field element.
1600 */
fe448_copy(sword32 * d,const sword32 * a)1601 void fe448_copy(sword32* d, const sword32* a)
1602 {
1603 d[0] = a[0];
1604 d[1] = a[1];
1605 d[2] = a[2];
1606 d[3] = a[3];
1607 d[4] = a[4];
1608 d[5] = a[5];
1609 d[6] = a[6];
1610 d[7] = a[7];
1611 d[8] = a[8];
1612 d[9] = a[9];
1613 d[10] = a[10];
1614 d[11] = a[11];
1615 d[12] = a[12];
1616 d[13] = a[13];
1617 d[14] = a[14];
1618 d[15] = a[15];
1619 }
1620
1621 /* Conditionally swap the elements.
1622 * Constant time implementation.
1623 *
1624 * a [in] First field element.
1625 * b [in] Second field element.
1626 * c [in] Swap when 1. Valid values: 0, 1.
1627 */
fe448_cswap(sword32 * a,sword32 * b,int c)1628 static void fe448_cswap(sword32* a, sword32* b, int c)
1629 {
1630 sword32 mask = -(sword32)c;
1631 sword32 t0 = (a[0] ^ b[0]) & mask;
1632 sword32 t1 = (a[1] ^ b[1]) & mask;
1633 sword32 t2 = (a[2] ^ b[2]) & mask;
1634 sword32 t3 = (a[3] ^ b[3]) & mask;
1635 sword32 t4 = (a[4] ^ b[4]) & mask;
1636 sword32 t5 = (a[5] ^ b[5]) & mask;
1637 sword32 t6 = (a[6] ^ b[6]) & mask;
1638 sword32 t7 = (a[7] ^ b[7]) & mask;
1639 sword32 t8 = (a[8] ^ b[8]) & mask;
1640 sword32 t9 = (a[9] ^ b[9]) & mask;
1641 sword32 t10 = (a[10] ^ b[10]) & mask;
1642 sword32 t11 = (a[11] ^ b[11]) & mask;
1643 sword32 t12 = (a[12] ^ b[12]) & mask;
1644 sword32 t13 = (a[13] ^ b[13]) & mask;
1645 sword32 t14 = (a[14] ^ b[14]) & mask;
1646 sword32 t15 = (a[15] ^ b[15]) & mask;
1647 a[0] ^= t0;
1648 a[1] ^= t1;
1649 a[2] ^= t2;
1650 a[3] ^= t3;
1651 a[4] ^= t4;
1652 a[5] ^= t5;
1653 a[6] ^= t6;
1654 a[7] ^= t7;
1655 a[8] ^= t8;
1656 a[9] ^= t9;
1657 a[10] ^= t10;
1658 a[11] ^= t11;
1659 a[12] ^= t12;
1660 a[13] ^= t13;
1661 a[14] ^= t14;
1662 a[15] ^= t15;
1663 b[0] ^= t0;
1664 b[1] ^= t1;
1665 b[2] ^= t2;
1666 b[3] ^= t3;
1667 b[4] ^= t4;
1668 b[5] ^= t5;
1669 b[6] ^= t6;
1670 b[7] ^= t7;
1671 b[8] ^= t8;
1672 b[9] ^= t9;
1673 b[10] ^= t10;
1674 b[11] ^= t11;
1675 b[12] ^= t12;
1676 b[13] ^= t13;
1677 b[14] ^= t14;
1678 b[15] ^= t15;
1679 }
1680
1681 /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
1682 *
1683 * r [in] Field element to hold sum.
1684 * a [in] Field element to add.
1685 * b [in] Field element to add.
1686 */
fe448_add(sword32 * r,const sword32 * a,const sword32 * b)1687 void fe448_add(sword32* r, const sword32* a, const sword32* b)
1688 {
1689 r[0] = a[0] + b[0];
1690 r[1] = a[1] + b[1];
1691 r[2] = a[2] + b[2];
1692 r[3] = a[3] + b[3];
1693 r[4] = a[4] + b[4];
1694 r[5] = a[5] + b[5];
1695 r[6] = a[6] + b[6];
1696 r[7] = a[7] + b[7];
1697 r[8] = a[8] + b[8];
1698 r[9] = a[9] + b[9];
1699 r[10] = a[10] + b[10];
1700 r[11] = a[11] + b[11];
1701 r[12] = a[12] + b[12];
1702 r[13] = a[13] + b[13];
1703 r[14] = a[14] + b[14];
1704 r[15] = a[15] + b[15];
1705 }
1706
1707 /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
1708 *
1709 * r [in] Field element to hold difference.
1710 * a [in] Field element to subtract from.
1711 * b [in] Field element to subtract.
1712 */
fe448_sub(sword32 * r,const sword32 * a,const sword32 * b)1713 void fe448_sub(sword32* r, const sword32* a, const sword32* b)
1714 {
1715 r[0] = a[0] - b[0];
1716 r[1] = a[1] - b[1];
1717 r[2] = a[2] - b[2];
1718 r[3] = a[3] - b[3];
1719 r[4] = a[4] - b[4];
1720 r[5] = a[5] - b[5];
1721 r[6] = a[6] - b[6];
1722 r[7] = a[7] - b[7];
1723 r[8] = a[8] - b[8];
1724 r[9] = a[9] - b[9];
1725 r[10] = a[10] - b[10];
1726 r[11] = a[11] - b[11];
1727 r[12] = a[12] - b[12];
1728 r[13] = a[13] - b[13];
1729 r[14] = a[14] - b[14];
1730 r[15] = a[15] - b[15];
1731 }
1732
fe448_reduce(sword32 * a)1733 void fe448_reduce(sword32* a)
1734 {
1735 sword64 o;
1736
1737 o = a[0 ] >> 28; a[1 ] += (sword32)o; a[0 ] -= (sword32)(o << 28);
1738 o = a[1 ] >> 28; a[2 ] += (sword32)o; a[1 ] -= (sword32)(o << 28);
1739 o = a[2 ] >> 28; a[3 ] += (sword32)o; a[2 ] -= (sword32)(o << 28);
1740 o = a[3 ] >> 28; a[4 ] += (sword32)o; a[3 ] -= (sword32)(o << 28);
1741 o = a[4 ] >> 28; a[5 ] += (sword32)o; a[4 ] -= (sword32)(o << 28);
1742 o = a[5 ] >> 28; a[6 ] += (sword32)o; a[5 ] -= (sword32)(o << 28);
1743 o = a[6 ] >> 28; a[7 ] += (sword32)o; a[6 ] -= (sword32)(o << 28);
1744 o = a[7 ] >> 28; a[8 ] += (sword32)o; a[7 ] -= (sword32)(o << 28);
1745 o = a[8 ] >> 28; a[9 ] += (sword32)o; a[8 ] -= (sword32)(o << 28);
1746 o = a[9 ] >> 28; a[10] += (sword32)o; a[9 ] -= (sword32)(o << 28);
1747 o = a[10] >> 28; a[11] += (sword32)o; a[10] -= (sword32)(o << 28);
1748 o = a[11] >> 28; a[12] += (sword32)o; a[11] -= (sword32)(o << 28);
1749 o = a[12] >> 28; a[13] += (sword32)o; a[12] -= (sword32)(o << 28);
1750 o = a[13] >> 28; a[14] += (sword32)o; a[13] -= (sword32)(o << 28);
1751 o = a[14] >> 28; a[15] += (sword32)o; a[14] -= (sword32)(o << 28);
1752 o = a[15] >> 28; a[0] += (sword32)o;
1753 a[8] += (sword32)o; a[15] -= (sword32)(o << 28);
1754 }
1755 /* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
1756 *
1757 * r [in] Field element to hold result.
1758 * a [in] Field element to multiply.
1759 */
fe448_mul39081(sword32 * r,const sword32 * a)1760 void fe448_mul39081(sword32* r, const sword32* a)
1761 {
1762 sword64 t;
1763 sword32 o;
1764 sword64 t0 = a[0] * (sword64)39081;
1765 sword64 t1 = a[1] * (sword64)39081;
1766 sword64 t2 = a[2] * (sword64)39081;
1767 sword64 t3 = a[3] * (sword64)39081;
1768 sword64 t4 = a[4] * (sword64)39081;
1769 sword64 t5 = a[5] * (sword64)39081;
1770 sword64 t6 = a[6] * (sword64)39081;
1771 sword64 t7 = a[7] * (sword64)39081;
1772 sword64 t8 = a[8] * (sword64)39081;
1773 sword64 t9 = a[9] * (sword64)39081;
1774 sword64 t10 = a[10] * (sword64)39081;
1775 sword64 t11 = a[11] * (sword64)39081;
1776 sword64 t12 = a[12] * (sword64)39081;
1777 sword64 t13 = a[13] * (sword64)39081;
1778 sword64 t14 = a[14] * (sword64)39081;
1779 sword64 t15 = a[15] * (sword64)39081;
1780 o = (sword32)(t0 >> 28); t1 += o; t = (sword64)o << 28; t0 -= t;
1781 o = (sword32)(t1 >> 28); t2 += o; t = (sword64)o << 28; t1 -= t;
1782 o = (sword32)(t2 >> 28); t3 += o; t = (sword64)o << 28; t2 -= t;
1783 o = (sword32)(t3 >> 28); t4 += o; t = (sword64)o << 28; t3 -= t;
1784 o = (sword32)(t4 >> 28); t5 += o; t = (sword64)o << 28; t4 -= t;
1785 o = (sword32)(t5 >> 28); t6 += o; t = (sword64)o << 28; t5 -= t;
1786 o = (sword32)(t6 >> 28); t7 += o; t = (sword64)o << 28; t6 -= t;
1787 o = (sword32)(t7 >> 28); t8 += o; t = (sword64)o << 28; t7 -= t;
1788 o = (sword32)(t8 >> 28); t9 += o; t = (sword64)o << 28; t8 -= t;
1789 o = (sword32)(t9 >> 28); t10 += o; t = (sword64)o << 28; t9 -= t;
1790 o = (sword32)(t10 >> 28); t11 += o; t = (sword64)o << 28; t10 -= t;
1791 o = (sword32)(t11 >> 28); t12 += o; t = (sword64)o << 28; t11 -= t;
1792 o = (sword32)(t12 >> 28); t13 += o; t = (sword64)o << 28; t12 -= t;
1793 o = (sword32)(t13 >> 28); t14 += o; t = (sword64)o << 28; t13 -= t;
1794 o = (sword32)(t14 >> 28); t15 += o; t = (sword64)o << 28; t14 -= t;
1795 o = (sword32)(t15 >> 28); t0 += o;
1796 t8 += o; t = (sword64)o << 28; t15 -= t;
1797
1798 /* Store */
1799 r[0] = (sword32)t0;
1800 r[1] = (sword32)t1;
1801 r[2] = (sword32)t2;
1802 r[3] = (sword32)t3;
1803 r[4] = (sword32)t4;
1804 r[5] = (sword32)t5;
1805 r[6] = (sword32)t6;
1806 r[7] = (sword32)t7;
1807 r[8] = (sword32)t8;
1808 r[9] = (sword32)t9;
1809 r[10] = (sword32)t10;
1810 r[11] = (sword32)t11;
1811 r[12] = (sword32)t12;
1812 r[13] = (sword32)t13;
1813 r[14] = (sword32)t14;
1814 r[15] = (sword32)t15;
1815 }
1816
1817 /* Mulitply two field elements. r = a * b
1818 *
1819 * r [in] Field element to hold result.
1820 * a [in] Field element to multiply.
1821 * b [in] Field element to multiply.
1822 */
fe448_mul_8(sword32 * r,const sword32 * a,const sword32 * b)1823 static WC_INLINE void fe448_mul_8(sword32* r, const sword32* a, const sword32* b)
1824 {
1825 sword64 t;
1826 sword64 t0 = (sword64)a[ 0] * b[ 0];
1827 sword64 t1 = (sword64)a[ 0] * b[ 1];
1828 sword64 t101 = (sword64)a[ 1] * b[ 0];
1829 sword64 t2 = (sword64)a[ 0] * b[ 2];
1830 sword64 t102 = (sword64)a[ 1] * b[ 1];
1831 sword64 t202 = (sword64)a[ 2] * b[ 0];
1832 sword64 t3 = (sword64)a[ 0] * b[ 3];
1833 sword64 t103 = (sword64)a[ 1] * b[ 2];
1834 sword64 t203 = (sword64)a[ 2] * b[ 1];
1835 sword64 t303 = (sword64)a[ 3] * b[ 0];
1836 sword64 t4 = (sword64)a[ 0] * b[ 4];
1837 sword64 t104 = (sword64)a[ 1] * b[ 3];
1838 sword64 t204 = (sword64)a[ 2] * b[ 2];
1839 sword64 t304 = (sword64)a[ 3] * b[ 1];
1840 sword64 t404 = (sword64)a[ 4] * b[ 0];
1841 sword64 t5 = (sword64)a[ 0] * b[ 5];
1842 sword64 t105 = (sword64)a[ 1] * b[ 4];
1843 sword64 t205 = (sword64)a[ 2] * b[ 3];
1844 sword64 t305 = (sword64)a[ 3] * b[ 2];
1845 sword64 t405 = (sword64)a[ 4] * b[ 1];
1846 sword64 t505 = (sword64)a[ 5] * b[ 0];
1847 sword64 t6 = (sword64)a[ 0] * b[ 6];
1848 sword64 t106 = (sword64)a[ 1] * b[ 5];
1849 sword64 t206 = (sword64)a[ 2] * b[ 4];
1850 sword64 t306 = (sword64)a[ 3] * b[ 3];
1851 sword64 t406 = (sword64)a[ 4] * b[ 2];
1852 sword64 t506 = (sword64)a[ 5] * b[ 1];
1853 sword64 t606 = (sword64)a[ 6] * b[ 0];
1854 sword64 t7 = (sword64)a[ 0] * b[ 7];
1855 sword64 t107 = (sword64)a[ 1] * b[ 6];
1856 sword64 t207 = (sword64)a[ 2] * b[ 5];
1857 sword64 t307 = (sword64)a[ 3] * b[ 4];
1858 sword64 t407 = (sword64)a[ 4] * b[ 3];
1859 sword64 t507 = (sword64)a[ 5] * b[ 2];
1860 sword64 t607 = (sword64)a[ 6] * b[ 1];
1861 sword64 t707 = (sword64)a[ 7] * b[ 0];
1862 sword64 t8 = (sword64)a[ 1] * b[ 7];
1863 sword64 t108 = (sword64)a[ 2] * b[ 6];
1864 sword64 t208 = (sword64)a[ 3] * b[ 5];
1865 sword64 t308 = (sword64)a[ 4] * b[ 4];
1866 sword64 t408 = (sword64)a[ 5] * b[ 3];
1867 sword64 t508 = (sword64)a[ 6] * b[ 2];
1868 sword64 t608 = (sword64)a[ 7] * b[ 1];
1869 sword64 t9 = (sword64)a[ 2] * b[ 7];
1870 sword64 t109 = (sword64)a[ 3] * b[ 6];
1871 sword64 t209 = (sword64)a[ 4] * b[ 5];
1872 sword64 t309 = (sword64)a[ 5] * b[ 4];
1873 sword64 t409 = (sword64)a[ 6] * b[ 3];
1874 sword64 t509 = (sword64)a[ 7] * b[ 2];
1875 sword64 t10 = (sword64)a[ 3] * b[ 7];
1876 sword64 t110 = (sword64)a[ 4] * b[ 6];
1877 sword64 t210 = (sword64)a[ 5] * b[ 5];
1878 sword64 t310 = (sword64)a[ 6] * b[ 4];
1879 sword64 t410 = (sword64)a[ 7] * b[ 3];
1880 sword64 t11 = (sword64)a[ 4] * b[ 7];
1881 sword64 t111 = (sword64)a[ 5] * b[ 6];
1882 sword64 t211 = (sword64)a[ 6] * b[ 5];
1883 sword64 t311 = (sword64)a[ 7] * b[ 4];
1884 sword64 t12 = (sword64)a[ 5] * b[ 7];
1885 sword64 t112 = (sword64)a[ 6] * b[ 6];
1886 sword64 t212 = (sword64)a[ 7] * b[ 5];
1887 sword64 t13 = (sword64)a[ 6] * b[ 7];
1888 sword64 t113 = (sword64)a[ 7] * b[ 6];
1889 sword64 t14 = (sword64)a[ 7] * b[ 7];
1890 t1 += t101;
1891 t2 += t102; t2 += t202;
1892 t3 += t103; t3 += t203; t3 += t303;
1893 t4 += t104; t4 += t204; t4 += t304; t4 += t404;
1894 t5 += t105; t5 += t205; t5 += t305; t5 += t405; t5 += t505;
1895 t6 += t106; t6 += t206; t6 += t306; t6 += t406; t6 += t506;
1896 t6 += t606;
1897 t7 += t107; t7 += t207; t7 += t307; t7 += t407; t7 += t507;
1898 t7 += t607;
1899 t7 += t707;
1900 t8 += t108; t8 += t208; t8 += t308; t8 += t408; t8 += t508;
1901 t8 += t608;
1902 t9 += t109; t9 += t209; t9 += t309; t9 += t409; t9 += t509;
1903 t10 += t110; t10 += t210; t10 += t310; t10 += t410;
1904 t11 += t111; t11 += t211; t11 += t311;
1905 t12 += t112; t12 += t212;
1906 t13 += t113;
1907 sword64 o = t14 >> 28;
1908 sword64 t15 = o;
1909 t14 -= o << 28;
1910 o = (t0 >> 28); t1 += o; t = o << 28; t0 -= t;
1911 o = (t1 >> 28); t2 += o; t = o << 28; t1 -= t;
1912 o = (t2 >> 28); t3 += o; t = o << 28; t2 -= t;
1913 o = (t3 >> 28); t4 += o; t = o << 28; t3 -= t;
1914 o = (t4 >> 28); t5 += o; t = o << 28; t4 -= t;
1915 o = (t5 >> 28); t6 += o; t = o << 28; t5 -= t;
1916 o = (t6 >> 28); t7 += o; t = o << 28; t6 -= t;
1917 o = (t7 >> 28); t8 += o; t = o << 28; t7 -= t;
1918 o = (t8 >> 28); t9 += o; t = o << 28; t8 -= t;
1919 o = (t9 >> 28); t10 += o; t = o << 28; t9 -= t;
1920 o = (t10 >> 28); t11 += o; t = o << 28; t10 -= t;
1921 o = (t11 >> 28); t12 += o; t = o << 28; t11 -= t;
1922 o = (t12 >> 28); t13 += o; t = o << 28; t12 -= t;
1923 o = (t13 >> 28); t14 += o; t = o << 28; t13 -= t;
1924 o = (t14 >> 28); t15 += o; t = o << 28; t14 -= t;
1925 o = (t15 >> 28); t0 += o;
1926 t8 += o; t = o << 28; t15 -= t;
1927
1928 /* Store */
1929 r[0] = (sword32)t0;
1930 r[1] = (sword32)t1;
1931 r[2] = (sword32)t2;
1932 r[3] = (sword32)t3;
1933 r[4] = (sword32)t4;
1934 r[5] = (sword32)t5;
1935 r[6] = (sword32)t6;
1936 r[7] = (sword32)t7;
1937 r[8] = (sword32)t8;
1938 r[9] = (sword32)t9;
1939 r[10] = (sword32)t10;
1940 r[11] = (sword32)t11;
1941 r[12] = (sword32)t12;
1942 r[13] = (sword32)t13;
1943 r[14] = (sword32)t14;
1944 r[15] = (sword32)t15;
1945 }
1946
1947 /* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
1948 *
1949 * r [in] Field element to hold result.
1950 * a [in] Field element to multiply.
1951 * b [in] Field element to multiply.
1952 */
fe448_mul(sword32 * r,const sword32 * a,const sword32 * b)1953 void fe448_mul(sword32* r, const sword32* a, const sword32* b)
1954 {
1955 sword32 r0[16];
1956 sword32 r1[16];
1957 sword32* a1 = r1;
1958 sword32 b1[8];
1959 sword32 r2[16];
1960 a1[0] = a[0] + a[8];
1961 a1[1] = a[1] + a[9];
1962 a1[2] = a[2] + a[10];
1963 a1[3] = a[3] + a[11];
1964 a1[4] = a[4] + a[12];
1965 a1[5] = a[5] + a[13];
1966 a1[6] = a[6] + a[14];
1967 a1[7] = a[7] + a[15];
1968 b1[0] = b[0] + b[8];
1969 b1[1] = b[1] + b[9];
1970 b1[2] = b[2] + b[10];
1971 b1[3] = b[3] + b[11];
1972 b1[4] = b[4] + b[12];
1973 b1[5] = b[5] + b[13];
1974 b1[6] = b[6] + b[14];
1975 b1[7] = b[7] + b[15];
1976 fe448_mul_8(r2, a + 8, b + 8);
1977 fe448_mul_8(r0, a, b);
1978 fe448_mul_8(r1, a1, b1);
1979 r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
1980 r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
1981 r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
1982 r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
1983 r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
1984 r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
1985 r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
1986 r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
1987 r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8];
1988 r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9];
1989 r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10];
1990 r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11];
1991 r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12];
1992 r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13];
1993 r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14];
1994 r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15];
1995 }
1996
1997 /* Square a field element. r = a * a
1998 *
1999 * r [in] Field element to hold result.
2000 * a [in] Field element to square.
2001 */
fe448_sqr_8(sword32 * r,const sword32 * a)2002 static WC_INLINE void fe448_sqr_8(sword32* r, const sword32* a)
2003 {
2004 sword64 t;
2005 sword64 t0 = (sword64)a[ 0] * a[ 0];
2006 sword64 t1 = 2 * (sword64)a[ 0] * a[ 1];
2007 sword64 t2 = 2 * (sword64)a[ 0] * a[ 2];
2008 sword64 t102 = (sword64)a[ 1] * a[ 1];
2009 sword64 t3 = 2 * (sword64)a[ 0] * a[ 3];
2010 sword64 t103 = 2 * (sword64)a[ 1] * a[ 2];
2011 sword64 t4 = 2 * (sword64)a[ 0] * a[ 4];
2012 sword64 t104 = 2 * (sword64)a[ 1] * a[ 3];
2013 sword64 t204 = (sword64)a[ 2] * a[ 2];
2014 sword64 t5 = 2 * (sword64)a[ 0] * a[ 5];
2015 sword64 t105 = 2 * (sword64)a[ 1] * a[ 4];
2016 sword64 t205 = 2 * (sword64)a[ 2] * a[ 3];
2017 sword64 t6 = 2 * (sword64)a[ 0] * a[ 6];
2018 sword64 t106 = 2 * (sword64)a[ 1] * a[ 5];
2019 sword64 t206 = 2 * (sword64)a[ 2] * a[ 4];
2020 sword64 t306 = (sword64)a[ 3] * a[ 3];
2021 sword64 t7 = 2 * (sword64)a[ 0] * a[ 7];
2022 sword64 t107 = 2 * (sword64)a[ 1] * a[ 6];
2023 sword64 t207 = 2 * (sword64)a[ 2] * a[ 5];
2024 sword64 t307 = 2 * (sword64)a[ 3] * a[ 4];
2025 sword64 t8 = 2 * (sword64)a[ 1] * a[ 7];
2026 sword64 t108 = 2 * (sword64)a[ 2] * a[ 6];
2027 sword64 t208 = 2 * (sword64)a[ 3] * a[ 5];
2028 sword64 t308 = (sword64)a[ 4] * a[ 4];
2029 sword64 t9 = 2 * (sword64)a[ 2] * a[ 7];
2030 sword64 t109 = 2 * (sword64)a[ 3] * a[ 6];
2031 sword64 t209 = 2 * (sword64)a[ 4] * a[ 5];
2032 sword64 t10 = 2 * (sword64)a[ 3] * a[ 7];
2033 sword64 t110 = 2 * (sword64)a[ 4] * a[ 6];
2034 sword64 t210 = (sword64)a[ 5] * a[ 5];
2035 sword64 t11 = 2 * (sword64)a[ 4] * a[ 7];
2036 sword64 t111 = 2 * (sword64)a[ 5] * a[ 6];
2037 sword64 t12 = 2 * (sword64)a[ 5] * a[ 7];
2038 sword64 t112 = (sword64)a[ 6] * a[ 6];
2039 sword64 t13 = 2 * (sword64)a[ 6] * a[ 7];
2040 sword64 t14 = (sword64)a[ 7] * a[ 7];
2041 t2 += t102;
2042 t3 += t103;
2043 t4 += t104; t4 += t204;
2044 t5 += t105; t5 += t205;
2045 t6 += t106; t6 += t206; t6 += t306;
2046 t7 += t107; t7 += t207; t7 += t307;
2047 t8 += t108; t8 += t208; t8 += t308;
2048 t9 += t109; t9 += t209;
2049 t10 += t110; t10 += t210;
2050 t11 += t111;
2051 t12 += t112;
2052 sword64 o = t14 >> 28;
2053 sword64 t15 = o;
2054 t14 -= o << 28;
2055 o = (t0 >> 28); t1 += o; t = o << 28; t0 -= t;
2056 o = (t1 >> 28); t2 += o; t = o << 28; t1 -= t;
2057 o = (t2 >> 28); t3 += o; t = o << 28; t2 -= t;
2058 o = (t3 >> 28); t4 += o; t = o << 28; t3 -= t;
2059 o = (t4 >> 28); t5 += o; t = o << 28; t4 -= t;
2060 o = (t5 >> 28); t6 += o; t = o << 28; t5 -= t;
2061 o = (t6 >> 28); t7 += o; t = o << 28; t6 -= t;
2062 o = (t7 >> 28); t8 += o; t = o << 28; t7 -= t;
2063 o = (t8 >> 28); t9 += o; t = o << 28; t8 -= t;
2064 o = (t9 >> 28); t10 += o; t = o << 28; t9 -= t;
2065 o = (t10 >> 28); t11 += o; t = o << 28; t10 -= t;
2066 o = (t11 >> 28); t12 += o; t = o << 28; t11 -= t;
2067 o = (t12 >> 28); t13 += o; t = o << 28; t12 -= t;
2068 o = (t13 >> 28); t14 += o; t = o << 28; t13 -= t;
2069 o = (t14 >> 28); t15 += o; t = o << 28; t14 -= t;
2070 o = (t15 >> 28); t0 += o;
2071 t8 += o; t = o << 28; t15 -= t;
2072
2073 /* Store */
2074 r[0] = (sword32)t0;
2075 r[1] = (sword32)t1;
2076 r[2] = (sword32)t2;
2077 r[3] = (sword32)t3;
2078 r[4] = (sword32)t4;
2079 r[5] = (sword32)t5;
2080 r[6] = (sword32)t6;
2081 r[7] = (sword32)t7;
2082 r[8] = (sword32)t8;
2083 r[9] = (sword32)t9;
2084 r[10] = (sword32)t10;
2085 r[11] = (sword32)t11;
2086 r[12] = (sword32)t12;
2087 r[13] = (sword32)t13;
2088 r[14] = (sword32)t14;
2089 r[15] = (sword32)t15;
2090 }
2091
2092 /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
2093 *
2094 * r [in] Field element to hold result.
2095 * a [in] Field element to square.
2096 */
fe448_sqr(sword32 * r,const sword32 * a)2097 void fe448_sqr(sword32* r, const sword32* a)
2098 {
2099 sword32 r0[16];
2100 sword32 r1[16];
2101 sword32* a1 = r1;
2102 sword32 r2[16];
2103 a1[0] = a[0] + a[8];
2104 a1[1] = a[1] + a[9];
2105 a1[2] = a[2] + a[10];
2106 a1[3] = a[3] + a[11];
2107 a1[4] = a[4] + a[12];
2108 a1[5] = a[5] + a[13];
2109 a1[6] = a[6] + a[14];
2110 a1[7] = a[7] + a[15];
2111 fe448_sqr_8(r2, a + 8);
2112 fe448_sqr_8(r0, a);
2113 fe448_sqr_8(r1, a1);
2114 r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
2115 r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
2116 r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
2117 r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
2118 r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
2119 r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
2120 r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
2121 r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
2122 r[ 8] = r2[ 8] + r1[ 0] - r0[ 0] + r1[ 8];
2123 r[ 9] = r2[ 9] + r1[ 1] - r0[ 1] + r1[ 9];
2124 r[10] = r2[10] + r1[ 2] - r0[ 2] + r1[10];
2125 r[11] = r2[11] + r1[ 3] - r0[ 3] + r1[11];
2126 r[12] = r2[12] + r1[ 4] - r0[ 4] + r1[12];
2127 r[13] = r2[13] + r1[ 5] - r0[ 5] + r1[13];
2128 r[14] = r2[14] + r1[ 6] - r0[ 6] + r1[14];
2129 r[15] = r2[15] + r1[ 7] - r0[ 7] + r1[15];
2130 }
2131
2132 /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
2133 * Constant time implementation - using Fermat's little theorem:
2134 * a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
2135 * For curve448: p - 2 = 2^448 - 2^224 - 3
2136 *
2137 * r [in] Field element to hold result.
2138 * a [in] Field element to invert.
2139 */
fe448_invert(sword32 * r,const sword32 * a)2140 void fe448_invert(sword32* r, const sword32* a)
2141 {
2142 sword32 t1[16];
2143 sword32 t2[16];
2144 sword32 t3[16];
2145 sword32 t4[16];
2146 int i;
2147
2148 fe448_sqr(t1, a);
2149 /* t1 = 2 */
2150 fe448_mul(t1, t1, a);
2151 /* t1 = 3 */
2152 fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2153 /* t2 = c */
2154 fe448_mul(t3, t2, a);
2155 /* t3 = d */
2156 fe448_mul(t1, t2, t1);
2157 /* t1 = f */
2158 fe448_sqr(t2, t1);
2159 /* t2 = 1e */
2160 fe448_mul(t4, t2, a);
2161 /* t4 = 1f */
2162 fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2163 /* t2 = 3e0 */
2164 fe448_mul(t1, t2, t4);
2165 /* t1 = 3ff */
2166 fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
2167 /* t2 = ffc00 */
2168 fe448_mul(t1, t2, t1);
2169 /* t1 = fffff */
2170 fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2171 /* t2 = 1ffffe0 */
2172 fe448_mul(t1, t2, t4);
2173 /* t1 = 1ffffff */
2174 fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
2175 /* t2 = 3fffffe000000 */
2176 fe448_mul(t1, t2, t1);
2177 /* t1 = 3ffffffffffff */
2178 fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2179 /* t2 = 7fffffffffffe0 */
2180 fe448_mul(t1, t2, t4);
2181 /* t1 = 7fffffffffffff */
2182 fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
2183 /* t2 = 3fffffffffffff80000000000000 */
2184 fe448_mul(t1, t2, t1);
2185 /* t1 = 3fffffffffffffffffffffffffff */
2186 fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
2187 /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
2188 fe448_mul(t1, t2, t1);
2189 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2190 fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2191 /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
2192 fe448_mul(t3, t3, t2);
2193 /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2194 fe448_mul(t1, t3, a);
2195 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2196 fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
2197 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
2198 fe448_mul(r, t3, t1);
2199 /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2200 }
2201
2202 /* Scalar multiply the point by a number. r = n.a
2203 * Uses Montgomery ladder and only requires the x-ordinate.
2204 *
2205 * r [in] Field element to hold result.
2206 * n [in] Scalar as an array of bytes.
2207 * a [in] Point to multiply - x-ordinate only.
2208 */
curve448(byte * r,const byte * n,const byte * a)2209 int curve448(byte* r, const byte* n, const byte* a)
2210 {
2211 sword32 x1[16];
2212 sword32 x2[16];
2213 sword32 z2[16];
2214 sword32 x3[16];
2215 sword32 z3[16];
2216 sword32 t0[16];
2217 sword32 t1[16];
2218 int i;
2219 unsigned int swap;
2220 unsigned int b;
2221
2222 fe448_from_bytes(x1, a);
2223 fe448_1(x2);
2224 fe448_0(z2);
2225 fe448_copy(x3, x1);
2226 fe448_1(z3);
2227
2228 swap = 0;
2229 for (i = 447; i >= 0; --i) {
2230 b = (n[i >> 3] >> (i & 7)) & 1;
2231 swap ^= b;
2232 fe448_cswap(x2, x3, swap);
2233 fe448_cswap(z2, z3, swap);
2234 swap = b;
2235
2236 /* Montgomery Ladder - double and add */
2237 fe448_add(t0, x2, z2);
2238 fe448_reduce(t0);
2239 fe448_add(t1, x3, z3);
2240 fe448_reduce(t1);
2241 fe448_sub(x2, x2, z2);
2242 fe448_sub(x3, x3, z3);
2243 fe448_mul(t1, t1, x2);
2244 fe448_mul(z3, x3, t0);
2245 fe448_sqr(t0, t0);
2246 fe448_sqr(x2, x2);
2247 fe448_add(x3, z3, t1);
2248 fe448_reduce(x3);
2249 fe448_sqr(x3, x3);
2250 fe448_sub(z3, z3, t1);
2251 fe448_sqr(z3, z3);
2252 fe448_mul(z3, z3, x1);
2253 fe448_sub(t1, t0, x2);
2254 fe448_mul(x2, t0, x2);
2255 fe448_mul39081(z2, t1);
2256 fe448_add(z2, t0, z2);
2257 fe448_mul(z2, z2, t1);
2258 }
2259 /* Last two bits are 0 - no final swap check required. */
2260
2261 fe448_invert(z2, z2);
2262 fe448_mul(x2, x2, z2);
2263 fe448_to_bytes(r, x2);
2264
2265 return 0;
2266 }
2267
2268 #ifdef HAVE_ED448
2269 /* Check whether field element is not 0.
2270 * Must convert to a normalized form before checking.
2271 *
2272 * a [in] Field element.
2273 * returns 0 when zero, and any other value otherwise.
2274 */
fe448_isnonzero(const sword32 * a)2275 int fe448_isnonzero(const sword32* a)
2276 {
2277 byte b[56];
2278 int i;
2279 byte c = 0;
2280 fe448_to_bytes(b, a);
2281 for (i = 0; i < 56; i++)
2282 c |= b[i];
2283 return c;
2284 }
2285
2286 /* Check whether field element is negative.
2287 * Must convert to a normalized form before checking.
2288 *
2289 * a [in] Field element.
2290 * returns 1 when negative, and 0 otherwise.
2291 */
fe448_isnegative(const sword32 * a)2292 int fe448_isnegative(const sword32* a)
2293 {
2294 byte b[56];
2295 fe448_to_bytes(b, a);
2296 return b[0] & 1;
2297 }
2298
2299 /* Negates the field element. r = -a
2300 *
2301 * r [in] Field element to hold result.
2302 * a [in] Field element.
2303 */
fe448_neg(sword32 * r,const sword32 * a)2304 void fe448_neg(sword32* r, const sword32* a)
2305 {
2306 r[0] = -a[0];
2307 r[1] = -a[1];
2308 r[2] = -a[2];
2309 r[3] = -a[3];
2310 r[4] = -a[4];
2311 r[5] = -a[5];
2312 r[6] = -a[6];
2313 r[7] = -a[7];
2314 r[8] = -a[8];
2315 r[9] = -a[9];
2316 r[10] = -a[10];
2317 r[11] = -a[11];
2318 r[12] = -a[12];
2319 r[13] = -a[13];
2320 r[14] = -a[14];
2321 r[15] = -a[15];
2322 }
2323
2324 /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
2325 * Used for calcualting y-ordinate from x-ordinate for Ed448.
2326 *
2327 * r [in] Field element to hold result.
2328 * a [in] Field element to exponentiate.
2329 */
fe448_pow_2_446_222_1(sword32 * r,const sword32 * a)2330 void fe448_pow_2_446_222_1(sword32* r, const sword32* a)
2331 {
2332 sword32 t1[16];
2333 sword32 t2[16];
2334 sword32 t3[16];
2335 sword32 t4[16];
2336 sword32 t5[16];
2337 int i;
2338
2339 fe448_sqr(t3, a);
2340 /* t3 = 2 */
2341 fe448_mul(t1, t3, a);
2342 /* t1 = 3 */
2343 fe448_sqr(t5, t1);
2344 /* t5 = 6 */
2345 fe448_mul(t5, t5, a);
2346 /* t5 = 7 */
2347 fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2348 /* t2 = c */
2349 fe448_mul(t3, t2, t3);
2350 /* t3 = e */
2351 fe448_mul(t1, t2, t1);
2352 /* t1 = f */
2353 fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
2354 /* t2 = 78 */
2355 fe448_mul(t5, t2, t5);
2356 /* t5 = 7f */
2357 fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2358 /* t2 = f0 */
2359 fe448_mul(t1, t2, t1);
2360 /* t1 = ff */
2361 fe448_mul(t3, t3, t2);
2362 /* t3 = fe */
2363 fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
2364 /* t2 = 7f80 */
2365 fe448_mul(t5, t2, t5);
2366 /* t5 = 7fff */
2367 fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
2368 /* t2 = ff00 */
2369 fe448_mul(t1, t2, t1);
2370 /* t1 = ffff */
2371 fe448_mul(t3, t3, t2);
2372 /* t3 = fffe */
2373 fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
2374 /* t2 = 3fff8000 */
2375 fe448_mul(t5, t2, t5);
2376 /* t5 = 3fffffff */
2377 fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
2378 /* t2 = ffff0000 */
2379 fe448_mul(t1, t2, t1);
2380 /* t1 = ffffffff */
2381 fe448_mul(t3, t3, t2);
2382 /* t3 = fffffffe */
2383 fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2384 /* t2 = ffffffff00000000 */
2385 fe448_mul(t2, t2, t1);
2386 /* t2 = ffffffffffffffff */
2387 fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2388 /* t1 = ffffffffffffffff0000000000000000 */
2389 fe448_mul(t1, t1, t2);
2390 /* t1 = ffffffffffffffffffffffffffffffff */
2391 fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2392 /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
2393 fe448_mul(t4, t1, t2);
2394 /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
2395 fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2396 /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
2397 fe448_mul(t3, t3, t2);
2398 /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2399 fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
2400 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
2401 fe448_mul(t1, t1, t4);
2402 /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
2403 fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
2404 /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
2405 fe448_mul(r, t5, t1);
2406 /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2407 }
2408
2409 /* Constant time, conditional move of b into a.
2410 * a is not changed if the condition is 0.
2411 *
2412 * a A field element.
2413 * b A field element.
2414 * c If 1 then copy and if 0 then don't copy.
2415 */
fe448_cmov(sword32 * a,const sword32 * b,int c)2416 void fe448_cmov(sword32* a, const sword32* b, int c)
2417 {
2418 sword32 m = -(sword32)c;
2419 sword32 t0 = m & (a[0] ^ b[0]);
2420 sword32 t1 = m & (a[1] ^ b[1]);
2421 sword32 t2 = m & (a[2] ^ b[2]);
2422 sword32 t3 = m & (a[3] ^ b[3]);
2423 sword32 t4 = m & (a[4] ^ b[4]);
2424 sword32 t5 = m & (a[5] ^ b[5]);
2425 sword32 t6 = m & (a[6] ^ b[6]);
2426 sword32 t7 = m & (a[7] ^ b[7]);
2427 sword32 t8 = m & (a[8] ^ b[8]);
2428 sword32 t9 = m & (a[9] ^ b[9]);
2429 sword32 t10 = m & (a[10] ^ b[10]);
2430 sword32 t11 = m & (a[11] ^ b[11]);
2431 sword32 t12 = m & (a[12] ^ b[12]);
2432 sword32 t13 = m & (a[13] ^ b[13]);
2433 sword32 t14 = m & (a[14] ^ b[14]);
2434 sword32 t15 = m & (a[15] ^ b[15]);
2435
2436 a[0] ^= t0;
2437 a[1] ^= t1;
2438 a[2] ^= t2;
2439 a[3] ^= t3;
2440 a[4] ^= t4;
2441 a[5] ^= t5;
2442 a[6] ^= t6;
2443 a[7] ^= t7;
2444 a[8] ^= t8;
2445 a[9] ^= t9;
2446 a[10] ^= t10;
2447 a[11] ^= t11;
2448 a[12] ^= t12;
2449 a[13] ^= t13;
2450 a[14] ^= t14;
2451 a[15] ^= t15;
2452 }
2453
2454 #endif /* HAVE_ED448 */
2455 #endif
2456
2457 #endif /* HAVE_CURVE448 || HAVE_ED448 */
2458