1 /* fe_448.c
2  *
3  * Copyright (C) 2006-2021 wolfSSL Inc.
4  *
5  * This file is part of wolfSSL.
6  *
7  * wolfSSL is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * wolfSSL is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20  */
21 
22 /* Based On Daniel J Bernstein's curve25519 Public Domain ref10 work.
23  * Small implementation based on Daniel Beer's curve25519 public domain work.
24  * Reworked for curve448 by Sean Parkinson.
25  */
26 
27 #ifdef HAVE_CONFIG_H
28     #include <config.h>
29 #endif
30 
31 #include <wolfssl/wolfcrypt/settings.h>
32 
33 #if defined(HAVE_CURVE448) || defined(HAVE_ED448)
34 
35 #include <wolfssl/wolfcrypt/fe_448.h>
36 
37 #ifdef NO_INLINE
38     #include <wolfssl/wolfcrypt/misc.h>
39 #else
40     #define WOLFSSL_MISC_INCLUDED
41     #include <wolfcrypt/src/misc.c>
42 #endif
43 
44 #if defined(CURVE448_SMALL) || defined(ED448_SMALL)
45 
46 /* Initialize the field element operations.
47  */
fe448_init(void)48 void fe448_init(void)
49 {
50 }
51 
52 /* Normalize the field element.
53  * Ensure result is in range: 0..2^448-2^224-2
54  *
55  * a  [in]  Field element in range 0..2^448-1.
56  */
fe448_norm(word8 * a)57 void fe448_norm(word8* a)
58 {
59     int i;
60     sword16 c = 0;
61     sword16 o = 0;
62 
63     for (i = 0; i < 56; i++) {
64         c += a[i];
65         if ((i == 0) || (i == 28))
66             c += 1;
67         c >>= 8;
68     }
69 
70     for (i = 0; i < 56; i++) {
71         if ((i == 0) || (i == 28)) o += c;
72         o += a[i];
73         a[i] = (word8)o;
74         o >>= 8;
75     }
76 }
77 
78 /* Copy one field element into another: d = a.
79  *
80  * d  [in]  Destination field element.
81  * a  [in]  Source field element.
82  */
fe448_copy(word8 * d,const word8 * a)83 void fe448_copy(word8* d, const word8* a)
84 {
85     int i;
86     for (i = 0; i < 56; i++) {
87          d[i] = a[i];
88     }
89 }
90 
91 /* Conditionally swap the elements.
92  * Constant time implementation.
93  *
94  * a  [in]  First field element.
95  * b  [in]  Second field element.
96  * c  [in]  Swap when 1. Valid values: 0, 1.
97  */
fe448_cswap(word8 * a,word8 * b,int c)98 static void fe448_cswap(word8* a, word8* b, int c)
99 {
100     int i;
101     word8 mask = -(word8)c;
102     word8 t[56];
103 
104     for (i = 0; i < 56; i++)
105         t[i] = (a[i] ^ b[i]) & mask;
106     for (i = 0; i < 56; i++)
107         a[i] ^= t[i];
108     for (i = 0; i < 56; i++)
109         b[i] ^= t[i];
110 }
111 
112 /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
113  *
114  * r  [in]  Field element to hold sum.
115  * a  [in]  Field element to add.
116  * b  [in]  Field element to add.
117  */
fe448_add(word8 * r,const word8 * a,const word8 * b)118 void fe448_add(word8* r, const word8* a, const word8* b)
119 {
120     int i;
121     sword16 c = 0;
122     sword16 o = 0;
123 
124     for (i = 0; i < 56; i++) {
125         c += a[i];
126         c += b[i];
127         r[i] = (word8)c;
128         c >>= 8;
129     }
130 
131     for (i = 0; i < 56; i++) {
132         if ((i == 0) || (i == 28)) o += c;
133         o += r[i];
134         r[i] = (word8)o;
135         o >>= 8;
136     }
137 }
138 
139 /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
140  *
141  * r  [in]  Field element to hold difference.
142  * a  [in]  Field element to subtract from.
143  * b  [in]  Field element to subtract.
144  */
fe448_sub(word8 * r,const word8 * a,const word8 * b)145 void fe448_sub(word8* r, const word8* a, const word8* b)
146 {
147     int i;
148     sword16 c = 0;
149     sword16 o = 0;
150 
151     for (i = 0; i < 56; i++) {
152         if (i == 28)
153             c += 0x1fc;
154         else
155             c += 0x1fe;
156         c += a[i];
157         c -= b[i];
158         r[i] = (word8)c;
159         c >>= 8;
160     }
161 
162     for (i = 0; i < 56; i++) {
163         if ((i == 0) || (i == 28)) o += c;
164         o += r[i];
165         r[i] = (word8)o;
166         o >>= 8;
167     }
168 }
169 
170 /* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
171  *
172  * r  [in]  Field element to hold result.
173  * a  [in]  Field element to multiply.
174  */
fe448_mul39081(word8 * r,const word8 * a)175 void fe448_mul39081(word8* r, const word8* a)
176 {
177     int i;
178     sword32 c = 0;
179     sword32 o = 0;
180 
181     for (i = 0; i < 56; i++) {
182         c += a[i] * (sword32)39081;
183         r[i] = (word8)c;
184         c >>= 8;
185     }
186 
187     for (i = 0; i < 56; i++) {
188         if ((i == 0) || (i == 28)) o += c;
189         o += r[i];
190         r[i] = (word8)o;
191         o >>= 8;
192     }
193 }
194 
195 /* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
196  *
197  * r  [in]  Field element to hold result.
198  * a  [in]  Field element to multiply.
199  * b  [in]  Field element to multiply.
200  */
fe448_mul(word8 * r,const word8 * a,const word8 * b)201 void fe448_mul(word8* r, const word8* a, const word8* b)
202 {
203     int i, k;
204     sword32 c = 0;
205     sword16 o = 0, cc = 0;
206     word8 t[112];
207 
208     for (k = 0; k < 56; k++) {
209         i = 0;
210         for (; i <= k; i++) {
211             c += (sword32)a[i] * b[k - i];
212         }
213         t[k] = (word8)c;
214         c >>= 8;
215     }
216     for (; k < 111; k++) {
217         i = k - 55;
218         for (; i < 56; i++) {
219             c += (sword32)a[i] * b[k - i];
220         }
221         t[k] = (word8)c;
222         c >>= 8;
223     }
224     t[k] = (word8)c;
225 
226     for (i = 0; i < 28; i++) {
227         o += t[i];
228         o += t[i + 56];
229         o += t[i + 84];
230         r[i] = (word8)o;
231         o >>= 8;
232     }
233     for (i = 28; i < 56; i++) {
234         o += t[i];
235         o += t[i + 56];
236         o += t[i + 28];
237         o += t[i + 56];
238         r[i] = (word8)o;
239         o >>= 8;
240     }
241     for (i = 0; i < 56; i++) {
242         if ((i == 0) || (i == 28)) cc += o;
243         cc += r[i];
244         r[i] = (word8)cc;
245         cc >>= 8;
246     }
247 }
248 
249 /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
250  *
251  * r  [in]  Field element to hold result.
252  * a  [in]  Field element to square.
253  */
fe448_sqr(word8 * r,const word8 * a)254 void fe448_sqr(word8* r, const word8* a)
255 {
256     int i, k;
257     sword32 c = 0;
258     sword32 p;
259     sword16 o = 0, cc = 0;
260     word8 t[112];
261 
262     for (k = 0; k < 56; k++) {
263         i = 0;
264         for (; i <= k; i++) {
265             if (k - i < i)
266                 break;
267             p = (sword32)a[i] * a[k - i];
268             if (k - i != i)
269                 p *= 2;
270             c += p;
271         }
272         t[k] = (word8)c;
273         c >>= 8;
274     }
275     for (; k < 111; k++) {
276          i = k - 55;
277         for (; i < 56; i++) {
278             if (k - i < i)
279                 break;
280             p = (sword32)a[i] * a[k - i];
281             if (k - i != i)
282                 p *= 2;
283             c += p;
284         }
285         t[k] = (word8)c;
286         c >>= 8;
287     }
288     t[k] = (word8)c;
289 
290     for (i = 0; i < 28; i++) {
291         o += t[i];
292         o += t[i + 56];
293         o += t[i + 84];
294         r[i] = (word8)o;
295         o >>= 8;
296     }
297     for (i = 28; i < 56; i++) {
298         o += t[i];
299         o += t[i + 56];
300         o += t[i + 28];
301         o += t[i + 56];
302         r[i] = (word8)o;
303         o >>= 8;
304     }
305     for (i = 0; i < 56; i++) {
306         if ((i == 0) || (i == 28)) cc += o;
307         cc += r[i];
308         r[i] = (word8)cc;
309         cc >>= 8;
310     }
311     fe448_norm(r);
312 }
313 
314 /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
315  * Constant time implementation - using Fermat's little theorem:
316  *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
317  * For curve448: p - 2 = 2^448 - 2^224 - 3
318  *
319  * r  [in]  Field element to hold result.
320  * a  [in]  Field element to invert.
321  */
fe448_invert(word8 * r,const word8 * a)322 void fe448_invert(word8* r, const word8* a)
323 {
324     int i;
325     word8 t[56];
326 
327     fe448_sqr(t, a);
328     fe448_mul(t, t, a);
329     for (i = 0; i < 221; i++) {
330         fe448_sqr(t, t);
331         fe448_mul(t, t, a);
332     }
333     fe448_sqr(t, t);
334     for (i = 0; i < 222; i++) {
335         fe448_sqr(t, t);
336         fe448_mul(t, t, a);
337     }
338     fe448_sqr(t, t);
339     fe448_sqr(t, t);
340     fe448_mul(r, t, a);
341 }
342 
343 /* Scalar multiply the point by a number. r = n.a
344  * Uses Montgomery ladder and only requires the x-ordinate.
345  *
346  * r  [in]  Field element to hold result.
347  * n  [in]  Scalar as an array of bytes.
348  * a  [in]  Point to multiply - x-ordinate only.
349  */
curve448(byte * r,const byte * n,const byte * a)350 int curve448(byte* r, const byte* n, const byte* a)
351 {
352     word8 x1[56];
353     word8 x2[56] = {1};
354     word8 z2[56] = {0};
355     word8 x3[56];
356     word8 z3[56] = {1};
357     word8 t0[56];
358     word8 t1[56];
359     int i;
360     unsigned int swap;
361     unsigned int b;
362 
363     fe448_copy(x1, a);
364     fe448_copy(x3, a);
365 
366     swap = 0;
367     for (i = 447; i >= 0; --i) {
368         b = (n[i >> 3] >> (i & 7)) & 1;
369         swap ^= b;
370         fe448_cswap(x2, x3, swap);
371         fe448_cswap(z2, z3, swap);
372         swap = b;
373 
374         /* Montgomery Ladder - double and add */
375         fe448_add(t0, x2, z2);
376         fe448_add(t1, x3, z3);
377         fe448_sub(x2, x2, z2);
378         fe448_sub(x3, x3, z3);
379         fe448_mul(t1, t1, x2);
380         fe448_mul(z3, x3, t0);
381         fe448_sqr(t0, t0);
382         fe448_sqr(x2, x2);
383         fe448_add(x3, z3, t1);
384         fe448_sqr(x3, x3);
385         fe448_sub(z3, z3, t1);
386         fe448_sqr(z3, z3);
387         fe448_mul(z3, z3, x1);
388         fe448_sub(t1, t0, x2);
389         fe448_mul(x2, t0, x2);
390         fe448_mul39081(z2, t1);
391         fe448_add(z2, t0, z2);
392         fe448_mul(z2, z2, t1);
393     }
394     fe448_cswap(x2, x3, swap);
395     fe448_cswap(z2, z3, swap);
396 
397     fe448_invert(z2, z2);
398     fe448_mul(r, x2, z2);
399     fe448_norm(r);
400 
401     return 0;
402 }
403 
404 #ifdef HAVE_ED448
405 /* Check whether field element is not 0.
406  * Field element must have been normalized before call.
407  *
408  * a  [in]  Field element.
409  * returns 0 when zero, and any other value otherwise.
410  */
fe448_isnonzero(const word8 * a)411 int fe448_isnonzero(const word8* a)
412 {
413     int i;
414     byte c = 0;
415     for (i = 0; i < 56; i++)
416         c |= a[i];
417     return c;
418 }
419 
420 /* Negates the field element. r = -a mod (2^448 - 2^224 - 1)
421  * Add 0x200 to each element and subtract 2 from next.
422  * Top element overflow handled by subtracting 2 from index 0 and 28.
423  *
424  * r  [in]  Field element to hold result.
425  * a  [in]  Field element.
426  */
fe448_neg(word8 * r,const word8 * a)427 void fe448_neg(word8* r, const word8* a)
428 {
429     int i;
430     sword16 c = 0;
431     sword16 o = 0;
432 
433     for (i = 0; i < 56; i++) {
434         if (i == 28)
435             c += 0x1fc;
436         else
437             c += 0x1fe;
438         c -= a[i];
439         r[i] = (word8)c;
440         c >>= 8;
441     }
442 
443     for (i = 0; i < 56; i++) {
444         if ((i == 0) || (i == 28)) o += c;
445         o += r[i];
446         r[i] = (word8)o;
447         o >>= 8;
448     }
449 }
450 
451 /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
452  * Used for calcualting y-ordinate from x-ordinate for Ed448.
453  *
454  * r  [in]  Field element to hold result.
455  * a  [in]  Field element to exponentiate.
456  */
fe448_pow_2_446_222_1(word8 * r,const word8 * a)457 void fe448_pow_2_446_222_1(word8* r, const word8* a)
458 {
459     int i;
460     word8 t[56];
461 
462     fe448_sqr(t, a);
463     fe448_mul(t, t, a);
464     for (i = 0; i < 221; i++) {
465         fe448_sqr(t, t);
466         fe448_mul(t, t, a);
467     }
468     fe448_sqr(t, t);
469     for (i = 0; i < 221; i++) {
470         fe448_sqr(t, t);
471         fe448_mul(t, t, a);
472     }
473     fe448_sqr(t, t);
474     fe448_mul(r, t, a);
475 }
476 
477 /* Constant time, conditional move of b into a.
478  * a is not changed if the condition is 0.
479  *
480  * a  A field element.
481  * b  A field element.
482  * c  If 1 then copy and if 0 then don't copy.
483  */
fe448_cmov(word8 * a,const word8 * b,int c)484 void fe448_cmov(word8* a, const word8* b, int c)
485 {
486     int i;
487     word8 m = -(word8)c;
488     word8 t[56];
489 
490     for (i = 0; i < 56; i++)
491         t[i] = m & (a[i] ^ b[i]);
492     for (i = 0; i < 56; i++)
493         a[i] ^= t[i];
494 }
495 
496 #endif /* HAVE_ED448 */
497 #elif defined(CURVED448_128BIT)
498 
499 /* Initialize the field element operations.
500  */
fe448_init(void)501 void fe448_init(void)
502 {
503 }
504 
505 /* Convert the field element from a byte array to an array of 56-bits.
506  *
507  * r  [in]  Array to encode into.
508  * b  [in]  Byte array.
509  */
fe448_from_bytes(sword64 * r,const unsigned char * b)510 void fe448_from_bytes(sword64* r, const unsigned char* b)
511 {
512     r[ 0] =  ((sword64) (b[ 0]) <<  0)
513           |  ((sword64) (b[ 1]) <<  8)
514           |  ((sword64) (b[ 2]) << 16)
515           |  ((sword64) (b[ 3]) << 24)
516           |  ((sword64) (b[ 4]) << 32)
517           |  ((sword64) (b[ 5]) << 40)
518           |  ((sword64) (b[ 6]) << 48);
519     r[ 1] =  ((sword64) (b[ 7]) <<  0)
520           |  ((sword64) (b[ 8]) <<  8)
521           |  ((sword64) (b[ 9]) << 16)
522           |  ((sword64) (b[10]) << 24)
523           |  ((sword64) (b[11]) << 32)
524           |  ((sword64) (b[12]) << 40)
525           |  ((sword64) (b[13]) << 48);
526     r[ 2] =  ((sword64) (b[14]) <<  0)
527           |  ((sword64) (b[15]) <<  8)
528           |  ((sword64) (b[16]) << 16)
529           |  ((sword64) (b[17]) << 24)
530           |  ((sword64) (b[18]) << 32)
531           |  ((sword64) (b[19]) << 40)
532           |  ((sword64) (b[20]) << 48);
533     r[ 3] =  ((sword64) (b[21]) <<  0)
534           |  ((sword64) (b[22]) <<  8)
535           |  ((sword64) (b[23]) << 16)
536           |  ((sword64) (b[24]) << 24)
537           |  ((sword64) (b[25]) << 32)
538           |  ((sword64) (b[26]) << 40)
539           |  ((sword64) (b[27]) << 48);
540     r[ 4] =  ((sword64) (b[28]) <<  0)
541           |  ((sword64) (b[29]) <<  8)
542           |  ((sword64) (b[30]) << 16)
543           |  ((sword64) (b[31]) << 24)
544           |  ((sword64) (b[32]) << 32)
545           |  ((sword64) (b[33]) << 40)
546           |  ((sword64) (b[34]) << 48);
547     r[ 5] =  ((sword64) (b[35]) <<  0)
548           |  ((sword64) (b[36]) <<  8)
549           |  ((sword64) (b[37]) << 16)
550           |  ((sword64) (b[38]) << 24)
551           |  ((sword64) (b[39]) << 32)
552           |  ((sword64) (b[40]) << 40)
553           |  ((sword64) (b[41]) << 48);
554     r[ 6] =  ((sword64) (b[42]) <<  0)
555           |  ((sword64) (b[43]) <<  8)
556           |  ((sword64) (b[44]) << 16)
557           |  ((sword64) (b[45]) << 24)
558           |  ((sword64) (b[46]) << 32)
559           |  ((sword64) (b[47]) << 40)
560           |  ((sword64) (b[48]) << 48);
561     r[ 7] =  ((sword64) (b[49]) <<  0)
562           |  ((sword64) (b[50]) <<  8)
563           |  ((sword64) (b[51]) << 16)
564           |  ((sword64) (b[52]) << 24)
565           |  ((sword64) (b[53]) << 32)
566           |  ((sword64) (b[54]) << 40)
567           |  ((sword64) (b[55]) << 48);
568 }
569 
570 /* Convert the field element to a byte array from an array of 56-bits.
571  *
572  * b  [in]  Byte array.
573  * a  [in]  Array to encode into.
574  */
fe448_to_bytes(unsigned char * b,const sword64 * a)575 void fe448_to_bytes(unsigned char* b, const sword64* a)
576 {
577     sword128 t;
578     /* Mod */
579     sword64 in0 = a[0];
580     sword64 in1 = a[1];
581     sword64 in2 = a[2];
582     sword64 in3 = a[3];
583     sword64 in4 = a[4];
584     sword64 in5 = a[5];
585     sword64 in6 = a[6];
586     sword64 in7 = a[7];
587     sword64 o = in7 >> 56;
588     in7 -= o << 56;
589     in0 += o;
590     in4 += o;
591     o = (in0 + 1) >> 56;
592     o = (o + in1) >> 56;
593     o = (o + in2) >> 56;
594     o = (o + in3) >> 56;
595     o = (o + in4 + 1) >> 56;
596     o = (o + in5) >> 56;
597     o = (o + in6) >> 56;
598     o = (o + in7) >> 56;
599     in0 += o;
600     in4 += o;
601     in7 -= o << 56;
602     o = (in0  >> 56); in1  += o; t = o << 56; in0  -= (sword64)t;
603     o = (in1  >> 56); in2  += o; t = o << 56; in1  -= (sword64)t;
604     o = (in2  >> 56); in3  += o; t = o << 56; in2  -= (sword64)t;
605     o = (in3  >> 56); in4  += o; t = o << 56; in3  -= (sword64)t;
606     o = (in4  >> 56); in5  += o; t = o << 56; in4  -= (sword64)t;
607     o = (in5  >> 56); in6  += o; t = o << 56; in5  -= (sword64)t;
608     o = (in6  >> 56); in7  += o; t = o << 56; in6  -= (sword64)t;
609     o = (in7  >> 56); in0  += o;
610                     in4  += o; t = o << 56; in7  -= (sword64)t;
611 
612     /* Output as bytes */
613     b[ 0] = (in0  >>  0);
614     b[ 1] = (in0  >>  8);
615     b[ 2] = (in0  >> 16);
616     b[ 3] = (in0  >> 24);
617     b[ 4] = (in0  >> 32);
618     b[ 5] = (in0  >> 40);
619     b[ 6] = (in0  >> 48);
620     b[ 7] = (in1  >>  0);
621     b[ 8] = (in1  >>  8);
622     b[ 9] = (in1  >> 16);
623     b[10] = (in1  >> 24);
624     b[11] = (in1  >> 32);
625     b[12] = (in1  >> 40);
626     b[13] = (in1  >> 48);
627     b[14] = (in2  >>  0);
628     b[15] = (in2  >>  8);
629     b[16] = (in2  >> 16);
630     b[17] = (in2  >> 24);
631     b[18] = (in2  >> 32);
632     b[19] = (in2  >> 40);
633     b[20] = (in2  >> 48);
634     b[21] = (in3  >>  0);
635     b[22] = (in3  >>  8);
636     b[23] = (in3  >> 16);
637     b[24] = (in3  >> 24);
638     b[25] = (in3  >> 32);
639     b[26] = (in3  >> 40);
640     b[27] = (in3  >> 48);
641     b[28] = (in4  >>  0);
642     b[29] = (in4  >>  8);
643     b[30] = (in4  >> 16);
644     b[31] = (in4  >> 24);
645     b[32] = (in4  >> 32);
646     b[33] = (in4  >> 40);
647     b[34] = (in4  >> 48);
648     b[35] = (in5  >>  0);
649     b[36] = (in5  >>  8);
650     b[37] = (in5  >> 16);
651     b[38] = (in5  >> 24);
652     b[39] = (in5  >> 32);
653     b[40] = (in5  >> 40);
654     b[41] = (in5  >> 48);
655     b[42] = (in6  >>  0);
656     b[43] = (in6  >>  8);
657     b[44] = (in6  >> 16);
658     b[45] = (in6  >> 24);
659     b[46] = (in6  >> 32);
660     b[47] = (in6  >> 40);
661     b[48] = (in6  >> 48);
662     b[49] = (in7  >>  0);
663     b[50] = (in7  >>  8);
664     b[51] = (in7  >> 16);
665     b[52] = (in7  >> 24);
666     b[53] = (in7  >> 32);
667     b[54] = (in7  >> 40);
668     b[55] = (in7  >> 48);
669 }
670 
671 /* Set the field element to 0.
672  *
673  * a  [in]  Field element.
674  */
fe448_1(sword64 * a)675 void fe448_1(sword64* a)
676 {
677     a[0] = 1;
678     a[1] = 0;
679     a[2] = 0;
680     a[3] = 0;
681     a[4] = 0;
682     a[5] = 0;
683     a[6] = 0;
684     a[7] = 0;
685 }
686 
687 /* Set the field element to 0.
688  *
689  * a  [in]  Field element.
690  */
fe448_0(sword64 * a)691 void fe448_0(sword64* a)
692 {
693     a[0] = 0;
694     a[1] = 0;
695     a[2] = 0;
696     a[3] = 0;
697     a[4] = 0;
698     a[5] = 0;
699     a[6] = 0;
700     a[7] = 0;
701 }
702 
703 /* Copy one field element into another: d = a.
704  *
705  * d  [in]  Destination field element.
706  * a  [in]  Source field element.
707  */
fe448_copy(sword64 * d,const sword64 * a)708 void fe448_copy(sword64* d, const sword64* a)
709 {
710     d[0] = a[0];
711     d[1] = a[1];
712     d[2] = a[2];
713     d[3] = a[3];
714     d[4] = a[4];
715     d[5] = a[5];
716     d[6] = a[6];
717     d[7] = a[7];
718 }
719 
720 /* Conditionally swap the elements.
721  * Constant time implementation.
722  *
723  * a  [in]  First field element.
724  * b  [in]  Second field element.
725  * c  [in]  Swap when 1. Valid values: 0, 1.
726  */
fe448_cswap(sword64 * a,sword64 * b,int c)727 static void fe448_cswap(sword64* a, sword64* b, int c)
728 {
729     sword64 mask = -(sword64)c;
730     sword64 t0 = (a[0] ^ b[0]) & mask;
731     sword64 t1 = (a[1] ^ b[1]) & mask;
732     sword64 t2 = (a[2] ^ b[2]) & mask;
733     sword64 t3 = (a[3] ^ b[3]) & mask;
734     sword64 t4 = (a[4] ^ b[4]) & mask;
735     sword64 t5 = (a[5] ^ b[5]) & mask;
736     sword64 t6 = (a[6] ^ b[6]) & mask;
737     sword64 t7 = (a[7] ^ b[7]) & mask;
738     a[0] ^= t0;
739     a[1] ^= t1;
740     a[2] ^= t2;
741     a[3] ^= t3;
742     a[4] ^= t4;
743     a[5] ^= t5;
744     a[6] ^= t6;
745     a[7] ^= t7;
746     b[0] ^= t0;
747     b[1] ^= t1;
748     b[2] ^= t2;
749     b[3] ^= t3;
750     b[4] ^= t4;
751     b[5] ^= t5;
752     b[6] ^= t6;
753     b[7] ^= t7;
754 }
755 
756 /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
757  *
758  * r  [in]  Field element to hold sum.
759  * a  [in]  Field element to add.
760  * b  [in]  Field element to add.
761  */
fe448_add(sword64 * r,const sword64 * a,const sword64 * b)762 void fe448_add(sword64* r, const sword64* a, const sword64* b)
763 {
764     r[0] = a[0] + b[0];
765     r[1] = a[1] + b[1];
766     r[2] = a[2] + b[2];
767     r[3] = a[3] + b[3];
768     r[4] = a[4] + b[4];
769     r[5] = a[5] + b[5];
770     r[6] = a[6] + b[6];
771     r[7] = a[7] + b[7];
772 }
773 
774 /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
775  *
776  * r  [in]  Field element to hold difference.
777  * a  [in]  Field element to subtract from.
778  * b  [in]  Field element to subtract.
779  */
fe448_sub(sword64 * r,const sword64 * a,const sword64 * b)780 void fe448_sub(sword64* r, const sword64* a, const sword64* b)
781 {
782     r[0] = a[0] - b[0];
783     r[1] = a[1] - b[1];
784     r[2] = a[2] - b[2];
785     r[3] = a[3] - b[3];
786     r[4] = a[4] - b[4];
787     r[5] = a[5] - b[5];
788     r[6] = a[6] - b[6];
789     r[7] = a[7] - b[7];
790 }
791 
792 /* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
793  *
794  * r  [in]  Field element to hold result.
795  * a  [in]  Field element to multiply.
796  */
fe448_mul39081(sword64 * r,const sword64 * a)797 void fe448_mul39081(sword64* r, const sword64* a)
798 {
799     sword128 t;
800     sword64 o;
801     sword128 t0 = a[0] * (sword128)39081;
802     sword128 t1 = a[1] * (sword128)39081;
803     sword128 t2 = a[2] * (sword128)39081;
804     sword128 t3 = a[3] * (sword128)39081;
805     sword128 t4 = a[4] * (sword128)39081;
806     sword128 t5 = a[5] * (sword128)39081;
807     sword128 t6 = a[6] * (sword128)39081;
808     sword128 t7 = a[7] * (sword128)39081;
809     o = (sword64)(t0  >> 56); t1  += o; t = (sword128)o << 56; t0  -= t;
810     o = (sword64)(t1  >> 56); t2  += o; t = (sword128)o << 56; t1  -= t;
811     o = (sword64)(t2  >> 56); t3  += o; t = (sword128)o << 56; t2  -= t;
812     o = (sword64)(t3  >> 56); t4  += o; t = (sword128)o << 56; t3  -= t;
813     o = (sword64)(t4  >> 56); t5  += o; t = (sword128)o << 56; t4  -= t;
814     o = (sword64)(t5  >> 56); t6  += o; t = (sword128)o << 56; t5  -= t;
815     o = (sword64)(t6  >> 56); t7  += o; t = (sword128)o << 56; t6  -= t;
816     o = (sword64)(t7  >> 56); t0  += o;
817                    t4  += o; t = (sword128)o << 56; t7  -= t;
818 
819     /* Store */
820     r[0] = (sword64)t0;
821     r[1] = (sword64)t1;
822     r[2] = (sword64)t2;
823     r[3] = (sword64)t3;
824     r[4] = (sword64)t4;
825     r[5] = (sword64)t5;
826     r[6] = (sword64)t6;
827     r[7] = (sword64)t7;
828 }
829 
830 /* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
831  *
832  * r  [in]  Field element to hold result.
833  * a  [in]  Field element to multiply.
834  * b  [in]  Field element to multiply.
835  */
fe448_mul(sword64 * r,const sword64 * a,const sword64 * b)836 void fe448_mul(sword64* r, const sword64* a, const sword64* b)
837 {
838     sword128 t;
839     sword64 o;
840     sword128 t0   = (sword128)a[ 0] * b[ 0];
841     sword128 t1   = (sword128)a[ 0] * b[ 1];
842     sword128 t101 = (sword128)a[ 1] * b[ 0];
843     sword128 t2   = (sword128)a[ 0] * b[ 2];
844     sword128 t102 = (sword128)a[ 1] * b[ 1];
845     sword128 t202 = (sword128)a[ 2] * b[ 0];
846     sword128 t3   = (sword128)a[ 0] * b[ 3];
847     sword128 t103 = (sword128)a[ 1] * b[ 2];
848     sword128 t203 = (sword128)a[ 2] * b[ 1];
849     sword128 t303 = (sword128)a[ 3] * b[ 0];
850     sword128 t4   = (sword128)a[ 0] * b[ 4];
851     sword128 t104 = (sword128)a[ 1] * b[ 3];
852     sword128 t204 = (sword128)a[ 2] * b[ 2];
853     sword128 t304 = (sword128)a[ 3] * b[ 1];
854     sword128 t404 = (sword128)a[ 4] * b[ 0];
855     sword128 t5   = (sword128)a[ 0] * b[ 5];
856     sword128 t105 = (sword128)a[ 1] * b[ 4];
857     sword128 t205 = (sword128)a[ 2] * b[ 3];
858     sword128 t305 = (sword128)a[ 3] * b[ 2];
859     sword128 t405 = (sword128)a[ 4] * b[ 1];
860     sword128 t505 = (sword128)a[ 5] * b[ 0];
861     sword128 t6   = (sword128)a[ 0] * b[ 6];
862     sword128 t106 = (sword128)a[ 1] * b[ 5];
863     sword128 t206 = (sword128)a[ 2] * b[ 4];
864     sword128 t306 = (sword128)a[ 3] * b[ 3];
865     sword128 t406 = (sword128)a[ 4] * b[ 2];
866     sword128 t506 = (sword128)a[ 5] * b[ 1];
867     sword128 t606 = (sword128)a[ 6] * b[ 0];
868     sword128 t7   = (sword128)a[ 0] * b[ 7];
869     sword128 t107 = (sword128)a[ 1] * b[ 6];
870     sword128 t207 = (sword128)a[ 2] * b[ 5];
871     sword128 t307 = (sword128)a[ 3] * b[ 4];
872     sword128 t407 = (sword128)a[ 4] * b[ 3];
873     sword128 t507 = (sword128)a[ 5] * b[ 2];
874     sword128 t607 = (sword128)a[ 6] * b[ 1];
875     sword128 t707 = (sword128)a[ 7] * b[ 0];
876     sword128 t8   = (sword128)a[ 1] * b[ 7];
877     sword128 t108 = (sword128)a[ 2] * b[ 6];
878     sword128 t208 = (sword128)a[ 3] * b[ 5];
879     sword128 t308 = (sword128)a[ 4] * b[ 4];
880     sword128 t408 = (sword128)a[ 5] * b[ 3];
881     sword128 t508 = (sword128)a[ 6] * b[ 2];
882     sword128 t608 = (sword128)a[ 7] * b[ 1];
883     sword128 t9   = (sword128)a[ 2] * b[ 7];
884     sword128 t109 = (sword128)a[ 3] * b[ 6];
885     sword128 t209 = (sword128)a[ 4] * b[ 5];
886     sword128 t309 = (sword128)a[ 5] * b[ 4];
887     sword128 t409 = (sword128)a[ 6] * b[ 3];
888     sword128 t509 = (sword128)a[ 7] * b[ 2];
889     sword128 t10  = (sword128)a[ 3] * b[ 7];
890     sword128 t110 = (sword128)a[ 4] * b[ 6];
891     sword128 t210 = (sword128)a[ 5] * b[ 5];
892     sword128 t310 = (sword128)a[ 6] * b[ 4];
893     sword128 t410 = (sword128)a[ 7] * b[ 3];
894     sword128 t11  = (sword128)a[ 4] * b[ 7];
895     sword128 t111 = (sword128)a[ 5] * b[ 6];
896     sword128 t211 = (sword128)a[ 6] * b[ 5];
897     sword128 t311 = (sword128)a[ 7] * b[ 4];
898     sword128 t12  = (sword128)a[ 5] * b[ 7];
899     sword128 t112 = (sword128)a[ 6] * b[ 6];
900     sword128 t212 = (sword128)a[ 7] * b[ 5];
901     sword128 t13  = (sword128)a[ 6] * b[ 7];
902     sword128 t113 = (sword128)a[ 7] * b[ 6];
903     sword128 t14  = (sword128)a[ 7] * b[ 7];
904     t1  += t101;
905     t2  += t102; t2  += t202;
906     t3  += t103; t3  += t203; t3  += t303;
907     t4  += t104; t4  += t204; t4  += t304; t4  += t404;
908     t5  += t105; t5  += t205; t5  += t305; t5  += t405; t5  += t505;
909     t6  += t106; t6  += t206; t6  += t306; t6  += t406; t6  += t506;
910     t6  += t606;
911     t7  += t107; t7  += t207; t7  += t307; t7  += t407; t7  += t507;
912     t7  += t607;
913     t7  += t707;
914     t8  += t108; t8  += t208; t8  += t308; t8  += t408; t8  += t508;
915     t8  += t608;
916     t9  += t109; t9  += t209; t9  += t309; t9  += t409; t9  += t509;
917     t10 += t110; t10 += t210; t10 += t310; t10 += t410;
918     t11 += t111; t11 += t211; t11 += t311;
919     t12 += t112; t12 += t212;
920     t13 += t113;
921 
922     /* Reduce */
923     t0  += t8  + t12;
924     t1  += t9  + t13;
925     t2  += t10 + t14;
926     t3  += t11;
927     t4  += t12 + t8  + t12;
928     t5  += t13 + t9  + t13;
929     t6  += t14 + t10 + t14;
930     t7  +=       t11;
931     o = t7  >> 56; t0  += o;
932                    t4  += o; t = (sword128)o << 56; t7  -= t;
933     o = (sword64)(t0  >> 56); t1  += o; t = (sword128)o << 56; t0  -= t;
934     o = (sword64)(t1  >> 56); t2  += o; t = (sword128)o << 56; t1  -= t;
935     o = (sword64)(t2  >> 56); t3  += o; t = (sword128)o << 56; t2  -= t;
936     o = (sword64)(t3  >> 56); t4  += o; t = (sword128)o << 56; t3  -= t;
937     o = (sword64)(t4  >> 56); t5  += o; t = (sword128)o << 56; t4  -= t;
938     o = (sword64)(t5  >> 56); t6  += o; t = (sword128)o << 56; t5  -= t;
939     o = (sword64)(t6  >> 56); t7  += o; t = (sword128)o << 56; t6  -= t;
940     o = (sword64)(t7  >> 56); t0  += o;
941                    t4  += o; t = (sword128)o << 56; t7  -= t;
942 
943     /* Store */
944     r[0] = (sword64)t0;
945     r[1] = (sword64)t1;
946     r[2] = (sword64)t2;
947     r[3] = (sword64)t3;
948     r[4] = (sword64)t4;
949     r[5] = (sword64)t5;
950     r[6] = (sword64)t6;
951     r[7] = (sword64)t7;
952 }
953 
954 /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
955  *
956  * r  [in]  Field element to hold result.
957  * a  [in]  Field element to square.
958  */
fe448_sqr(sword64 * r,const sword64 * a)959 void fe448_sqr(sword64* r, const sword64* a)
960 {
961     sword128 t;
962     sword64 o;
963     sword128 t0   =     (sword128)a[ 0] * a[ 0];
964     sword128 t1   = 2 * (sword128)a[ 0] * a[ 1];
965     sword128 t2   = 2 * (sword128)a[ 0] * a[ 2];
966     sword128 t102 =     (sword128)a[ 1] * a[ 1];
967     sword128 t3   = 2 * (sword128)a[ 0] * a[ 3];
968     sword128 t103 = 2 * (sword128)a[ 1] * a[ 2];
969     sword128 t4   = 2 * (sword128)a[ 0] * a[ 4];
970     sword128 t104 = 2 * (sword128)a[ 1] * a[ 3];
971     sword128 t204 =     (sword128)a[ 2] * a[ 2];
972     sword128 t5   = 2 * (sword128)a[ 0] * a[ 5];
973     sword128 t105 = 2 * (sword128)a[ 1] * a[ 4];
974     sword128 t205 = 2 * (sword128)a[ 2] * a[ 3];
975     sword128 t6   = 2 * (sword128)a[ 0] * a[ 6];
976     sword128 t106 = 2 * (sword128)a[ 1] * a[ 5];
977     sword128 t206 = 2 * (sword128)a[ 2] * a[ 4];
978     sword128 t306 =     (sword128)a[ 3] * a[ 3];
979     sword128 t7   = 2 * (sword128)a[ 0] * a[ 7];
980     sword128 t107 = 2 * (sword128)a[ 1] * a[ 6];
981     sword128 t207 = 2 * (sword128)a[ 2] * a[ 5];
982     sword128 t307 = 2 * (sword128)a[ 3] * a[ 4];
983     sword128 t8   = 2 * (sword128)a[ 1] * a[ 7];
984     sword128 t108 = 2 * (sword128)a[ 2] * a[ 6];
985     sword128 t208 = 2 * (sword128)a[ 3] * a[ 5];
986     sword128 t308 =     (sword128)a[ 4] * a[ 4];
987     sword128 t9   = 2 * (sword128)a[ 2] * a[ 7];
988     sword128 t109 = 2 * (sword128)a[ 3] * a[ 6];
989     sword128 t209 = 2 * (sword128)a[ 4] * a[ 5];
990     sword128 t10  = 2 * (sword128)a[ 3] * a[ 7];
991     sword128 t110 = 2 * (sword128)a[ 4] * a[ 6];
992     sword128 t210 =     (sword128)a[ 5] * a[ 5];
993     sword128 t11  = 2 * (sword128)a[ 4] * a[ 7];
994     sword128 t111 = 2 * (sword128)a[ 5] * a[ 6];
995     sword128 t12  = 2 * (sword128)a[ 5] * a[ 7];
996     sword128 t112 =     (sword128)a[ 6] * a[ 6];
997     sword128 t13  = 2 * (sword128)a[ 6] * a[ 7];
998     sword128 t14  =     (sword128)a[ 7] * a[ 7];
999     t2  += t102;
1000     t3  += t103;
1001     t4  += t104; t4  += t204;
1002     t5  += t105; t5  += t205;
1003     t6  += t106; t6  += t206; t6  += t306;
1004     t7  += t107; t7  += t207; t7  += t307;
1005     t8  += t108; t8  += t208; t8  += t308;
1006     t9  += t109; t9  += t209;
1007     t10 += t110; t10 += t210;
1008     t11 += t111;
1009     t12 += t112;
1010 
1011     /* Reduce */
1012     t0  += t8  + t12;
1013     t1  += t9  + t13;
1014     t2  += t10 + t14;
1015     t3  += t11;
1016     t4  += t12 + t8  + t12;
1017     t5  += t13 + t9  + t13;
1018     t6  += t14 + t10 + t14;
1019     t7  +=       t11;
1020     o = t7  >> 56; t0  += o;
1021                    t4  += o; t = (sword128)o << 56; t7  -= t;
1022     o = (sword64)(t0  >> 56); t1  += o; t = (sword128)o << 56; t0  -= t;
1023     o = (sword64)(t1  >> 56); t2  += o; t = (sword128)o << 56; t1  -= t;
1024     o = (sword64)(t2  >> 56); t3  += o; t = (sword128)o << 56; t2  -= t;
1025     o = (sword64)(t3  >> 56); t4  += o; t = (sword128)o << 56; t3  -= t;
1026     o = (sword64)(t4  >> 56); t5  += o; t = (sword128)o << 56; t4  -= t;
1027     o = (sword64)(t5  >> 56); t6  += o; t = (sword128)o << 56; t5  -= t;
1028     o = (sword64)(t6  >> 56); t7  += o; t = (sword128)o << 56; t6  -= t;
1029     o = (sword64)(t7  >> 56); t0  += o;
1030                    t4  += o; t = (sword128)o << 56; t7  -= t;
1031 
1032     /* Store */
1033     r[0] = (sword64)t0;
1034     r[1] = (sword64)t1;
1035     r[2] = (sword64)t2;
1036     r[3] = (sword64)t3;
1037     r[4] = (sword64)t4;
1038     r[5] = (sword64)t5;
1039     r[6] = (sword64)t6;
1040     r[7] = (sword64)t7;
1041 }
1042 
1043 /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
1044  * Constant time implementation - using Fermat's little theorem:
1045  *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
1046  * For curve448: p - 2 = 2^448 - 2^224 - 3
1047  *
1048  * r  [in]  Field element to hold result.
1049  * a  [in]  Field element to invert.
1050  */
fe448_invert(sword64 * r,const sword64 * a)1051 void fe448_invert(sword64* r, const sword64* a)
1052 {
1053     sword64 t1[8];
1054     sword64 t2[8];
1055     sword64 t3[8];
1056     sword64 t4[8];
1057     int i;
1058 
1059     fe448_sqr(t1, a);
1060     /* t1 = 2 */
1061     fe448_mul(t1, t1, a);
1062     /* t1 = 3 */
1063     fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1064     /* t2 = c */
1065     fe448_mul(t3, t2, a);
1066     /* t3 = d */
1067     fe448_mul(t1, t2, t1);
1068     /* t1 = f */
1069     fe448_sqr(t2, t1);
1070     /* t2 = 1e */
1071     fe448_mul(t4, t2, a);
1072     /* t4 = 1f */
1073     fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1074     /* t2 = 3e0 */
1075     fe448_mul(t1, t2, t4);
1076     /* t1 = 3ff */
1077     fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
1078     /* t2 = ffc00 */
1079     fe448_mul(t1, t2, t1);
1080     /* t1 = fffff */
1081     fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1082     /* t2 = 1ffffe0 */
1083     fe448_mul(t1, t2, t4);
1084     /* t1 = 1ffffff */
1085     fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
1086     /* t2 = 3fffffe000000 */
1087     fe448_mul(t1, t2, t1);
1088     /* t1 = 3ffffffffffff */
1089     fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
1090     /* t2 = 7fffffffffffe0 */
1091     fe448_mul(t1, t2, t4);
1092     /* t1 = 7fffffffffffff */
1093     fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
1094     /* t2 = 3fffffffffffff80000000000000 */
1095     fe448_mul(t1, t2, t1);
1096     /* t1 = 3fffffffffffffffffffffffffff */
1097     fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
1098     /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
1099     fe448_mul(t1, t2, t1);
1100     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1101     fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1102     /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
1103     fe448_mul(t3, t3, t2);
1104     /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1105     fe448_mul(t1, t3, a);
1106     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1107     fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
1108     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
1109     fe448_mul(r, t3, t1);
1110     /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
1111 }
1112 
1113 /* Scalar multiply the point by a number. r = n.a
1114  * Uses Montgomery ladder and only requires the x-ordinate.
1115  *
1116  * r  [in]  Field element to hold result.
1117  * n  [in]  Scalar as an array of bytes.
1118  * a  [in]  Point to multiply - x-ordinate only.
1119  */
curve448(byte * r,const byte * n,const byte * a)1120 int curve448(byte* r, const byte* n, const byte* a)
1121 {
1122     sword64 x1[8];
1123     sword64 x2[8];
1124     sword64 z2[8];
1125     sword64 x3[8];
1126     sword64 z3[8];
1127     sword64 t0[8];
1128     sword64 t1[8];
1129     int i;
1130     unsigned int swap;
1131     unsigned int b;
1132 
1133     fe448_from_bytes(x1, a);
1134     fe448_1(x2);
1135     fe448_0(z2);
1136     fe448_copy(x3, x1);
1137     fe448_1(z3);
1138 
1139     swap = 0;
1140     for (i = 447; i >= 0; --i) {
1141         b = (n[i >> 3] >> (i & 7)) & 1;
1142         swap ^= b;
1143         fe448_cswap(x2, x3, swap);
1144         fe448_cswap(z2, z3, swap);
1145         swap = b;
1146 
1147         /* Montgomery Ladder - double and add */
1148         fe448_add(t0, x2, z2);
1149         fe448_reduce(t0);
1150         fe448_add(t1, x3, z3);
1151         fe448_reduce(t1);
1152         fe448_sub(x2, x2, z2);
1153         fe448_sub(x3, x3, z3);
1154         fe448_mul(t1, t1, x2);
1155         fe448_mul(z3, x3, t0);
1156         fe448_sqr(t0, t0);
1157         fe448_sqr(x2, x2);
1158         fe448_add(x3, z3, t1);
1159         fe448_reduce(x3);
1160         fe448_sqr(x3, x3);
1161         fe448_sub(z3, z3, t1);
1162         fe448_sqr(z3, z3);
1163         fe448_mul(z3, z3, x1);
1164         fe448_sub(t1, t0, x2);
1165         fe448_mul(x2, t0, x2);
1166         fe448_mul39081(z2, t1);
1167         fe448_add(z2, t0, z2);
1168         fe448_mul(z2, z2, t1);
1169     }
1170     /* Last two bits are 0 - no final swap check required. */
1171 
1172     fe448_invert(z2, z2);
1173     fe448_mul(x2, x2, z2);
1174     fe448_to_bytes(r, x2);
1175 
1176     return 0;
1177 }
1178 
1179 #ifdef HAVE_ED448
1180 /* Check whether field element is not 0.
1181  * Must convert to a normalized form before checking.
1182  *
1183  * a  [in]  Field element.
1184  * returns 0 when zero, and any other value otherwise.
1185  */
fe448_isnonzero(const sword64 * a)1186 int fe448_isnonzero(const sword64* a)
1187 {
1188     byte b[56];
1189     int i;
1190     byte c = 0;
1191     fe448_to_bytes(b, a);
1192     for (i = 0; i < 56; i++)
1193         c |= b[i];
1194     return c;
1195 }
1196 
1197 /* Check whether field element is negative.
1198  * Must convert to a normalized form before checking.
1199  *
1200  * a  [in]  Field element.
1201  * returns 1 when negative, and 0 otherwise.
1202  */
fe448_isnegative(const sword64 * a)1203 int fe448_isnegative(const sword64* a)
1204 {
1205     byte b[56];
1206     fe448_to_bytes(b, a);
1207     return b[0] & 1;
1208 }
1209 
1210 /* Negates the field element. r = -a
1211  *
1212  * r  [in]  Field element to hold result.
1213  * a  [in]  Field element.
1214  */
fe448_neg(sword64 * r,const sword64 * a)1215 void fe448_neg(sword64* r, const sword64* a)
1216 {
1217     r[0] = -a[0];
1218     r[1] = -a[1];
1219     r[2] = -a[2];
1220     r[3] = -a[3];
1221     r[4] = -a[4];
1222     r[5] = -a[5];
1223     r[6] = -a[6];
1224     r[7] = -a[7];
1225 }
1226 
1227 /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
1228  * Used for calcualting y-ordinate from x-ordinate for Ed448.
1229  *
1230  * r  [in]  Field element to hold result.
1231  * a  [in]  Field element to exponentiate.
1232  */
fe448_pow_2_446_222_1(sword64 * r,const sword64 * a)1233 void fe448_pow_2_446_222_1(sword64* r, const sword64* a)
1234 {
1235     sword64 t1[8];
1236     sword64 t2[8];
1237     sword64 t3[8];
1238     sword64 t4[8];
1239     sword64 t5[8];
1240     int i;
1241 
1242     fe448_sqr(t3, a);
1243     /* t3 = 2 */
1244     fe448_mul(t1, t3, a);
1245     /* t1 = 3 */
1246     fe448_sqr(t5, t1);
1247     /* t5 = 6 */
1248     fe448_mul(t5, t5, a);
1249     /* t5 = 7 */
1250     fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
1251     /* t2 = c */
1252     fe448_mul(t3, t2, t3);
1253     /* t3 = e */
1254     fe448_mul(t1, t2, t1);
1255     /* t1 = f */
1256     fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
1257     /* t2 = 78 */
1258     fe448_mul(t5, t2, t5);
1259     /* t5 = 7f */
1260     fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
1261     /* t2 = f0 */
1262     fe448_mul(t1, t2, t1);
1263     /* t1 = ff */
1264     fe448_mul(t3, t3, t2);
1265     /* t3 = fe */
1266     fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
1267     /* t2 = 7f80 */
1268     fe448_mul(t5, t2, t5);
1269     /* t5 = 7fff */
1270     fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
1271     /* t2 = ff00 */
1272     fe448_mul(t1, t2, t1);
1273     /* t1 = ffff */
1274     fe448_mul(t3, t3, t2);
1275     /* t3 = fffe */
1276     fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
1277     /* t2 = 3fff8000 */
1278     fe448_mul(t5, t2, t5);
1279     /* t5 = 3fffffff */
1280     fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
1281     /* t2 = ffff0000 */
1282     fe448_mul(t1, t2, t1);
1283     /* t1 = ffffffff */
1284     fe448_mul(t3, t3, t2);
1285     /* t3 = fffffffe */
1286     fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1287     /* t2 = ffffffff00000000 */
1288     fe448_mul(t2, t2, t1);
1289     /* t2 = ffffffffffffffff */
1290     fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1291     /* t1 = ffffffffffffffff0000000000000000 */
1292     fe448_mul(t1, t1, t2);
1293     /* t1 = ffffffffffffffffffffffffffffffff */
1294     fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
1295     /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
1296     fe448_mul(t4, t1, t2);
1297     /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
1298     fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
1299     /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
1300     fe448_mul(t3, t3, t2);
1301     /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
1302     fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
1303     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
1304     fe448_mul(t1, t1, t4);
1305     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
1306     fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
1307     /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
1308     fe448_mul(r, t5, t1);
1309     /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
1310 }
1311 
1312 /* Constant time, conditional move of b into a.
1313  * a is not changed if the condition is 0.
1314  *
1315  * a  A field element.
1316  * b  A field element.
1317  * c  If 1 then copy and if 0 then don't copy.
1318  */
fe448_cmov(sword64 * a,const sword64 * b,int c)1319 void fe448_cmov(sword64* a, const sword64* b, int c)
1320 {
1321     sword64 m = -(sword64)c;
1322     sword64 t0 = m & (a[0] ^ b[0]);
1323     sword64 t1 = m & (a[1] ^ b[1]);
1324     sword64 t2 = m & (a[2] ^ b[2]);
1325     sword64 t3 = m & (a[3] ^ b[3]);
1326     sword64 t4 = m & (a[4] ^ b[4]);
1327     sword64 t5 = m & (a[5] ^ b[5]);
1328     sword64 t6 = m & (a[6] ^ b[6]);
1329     sword64 t7 = m & (a[7] ^ b[7]);
1330 
1331     a[0] ^= t0;
1332     a[1] ^= t1;
1333     a[2] ^= t2;
1334     a[3] ^= t3;
1335     a[4] ^= t4;
1336     a[5] ^= t5;
1337     a[6] ^= t6;
1338     a[7] ^= t7;
1339 }
1340 
1341 #endif /* HAVE_ED448 */
1342 #else
1343 
1344 /* Initialize the field element operations.
1345  */
fe448_init(void)1346 void fe448_init(void)
1347 {
1348 }
1349 
1350 /* Convert the field element from a byte array to an array of 28-bits.
1351  *
1352  * r  [in]  Array to encode into.
1353  * b  [in]  Byte array.
1354  */
fe448_from_bytes(sword32 * r,const unsigned char * b)1355 void fe448_from_bytes(sword32* r, const unsigned char* b)
1356 {
1357     r[ 0] =  (((sword32)((b[ 0]        ) >>  0)) <<  0)
1358           |  (((sword32)((b[ 1]        ) >>  0)) <<  8)
1359           |  (((sword32)((b[ 2]        ) >>  0)) << 16)
1360           | ((((sword32)((b[ 3] & 0xf )) >>  0)) << 24);
1361     r[ 1] =  (((sword32)((b[ 3]        ) >>  4)) <<  0)
1362           |  (((sword32)((b[ 4]        ) >>  0)) <<  4)
1363           |  (((sword32)((b[ 5]        ) >>  0)) << 12)
1364           |  (((sword32)((b[ 6]        ) >>  0)) << 20);
1365     r[ 2] =  (((sword32)((b[ 7]        ) >>  0)) <<  0)
1366           |  (((sword32)((b[ 8]        ) >>  0)) <<  8)
1367           |  (((sword32)((b[ 9]        ) >>  0)) << 16)
1368           | ((((sword32)((b[10] & 0xf )) >>  0)) << 24);
1369     r[ 3] =  (((sword32)((b[10]        ) >>  4)) <<  0)
1370           |  (((sword32)((b[11]        ) >>  0)) <<  4)
1371           |  (((sword32)((b[12]        ) >>  0)) << 12)
1372           |  (((sword32)((b[13]        ) >>  0)) << 20);
1373     r[ 4] =  (((sword32)((b[14]        ) >>  0)) <<  0)
1374           |  (((sword32)((b[15]        ) >>  0)) <<  8)
1375           |  (((sword32)((b[16]        ) >>  0)) << 16)
1376           | ((((sword32)((b[17] & 0xf )) >>  0)) << 24);
1377     r[ 5] =  (((sword32)((b[17]        ) >>  4)) <<  0)
1378           |  (((sword32)((b[18]        ) >>  0)) <<  4)
1379           |  (((sword32)((b[19]        ) >>  0)) << 12)
1380           |  (((sword32)((b[20]        ) >>  0)) << 20);
1381     r[ 6] =  (((sword32)((b[21]        ) >>  0)) <<  0)
1382           |  (((sword32)((b[22]        ) >>  0)) <<  8)
1383           |  (((sword32)((b[23]        ) >>  0)) << 16)
1384           | ((((sword32)((b[24] & 0xf )) >>  0)) << 24);
1385     r[ 7] =  (((sword32)((b[24]        ) >>  4)) <<  0)
1386           |  (((sword32)((b[25]        ) >>  0)) <<  4)
1387           |  (((sword32)((b[26]        ) >>  0)) << 12)
1388           |  (((sword32)((b[27]        ) >>  0)) << 20);
1389     r[ 8] =  (((sword32)((b[28]        ) >>  0)) <<  0)
1390           |  (((sword32)((b[29]        ) >>  0)) <<  8)
1391           |  (((sword32)((b[30]        ) >>  0)) << 16)
1392           | ((((sword32)((b[31] & 0xf )) >>  0)) << 24);
1393     r[ 9] =  (((sword32)((b[31]        ) >>  4)) <<  0)
1394           |  (((sword32)((b[32]        ) >>  0)) <<  4)
1395           |  (((sword32)((b[33]        ) >>  0)) << 12)
1396           |  (((sword32)((b[34]        ) >>  0)) << 20);
1397     r[10] =  (((sword32)((b[35]        ) >>  0)) <<  0)
1398           |  (((sword32)((b[36]        ) >>  0)) <<  8)
1399           |  (((sword32)((b[37]        ) >>  0)) << 16)
1400           | ((((sword32)((b[38] & 0xf )) >>  0)) << 24);
1401     r[11] =  (((sword32)((b[38]        ) >>  4)) <<  0)
1402           |  (((sword32)((b[39]        ) >>  0)) <<  4)
1403           |  (((sword32)((b[40]        ) >>  0)) << 12)
1404           |  (((sword32)((b[41]        ) >>  0)) << 20);
1405     r[12] =  (((sword32)((b[42]        ) >>  0)) <<  0)
1406           |  (((sword32)((b[43]        ) >>  0)) <<  8)
1407           |  (((sword32)((b[44]        ) >>  0)) << 16)
1408           | ((((sword32)((b[45] & 0xf )) >>  0)) << 24);
1409     r[13] =  (((sword32)((b[45]        ) >>  4)) <<  0)
1410           |  (((sword32)((b[46]        ) >>  0)) <<  4)
1411           |  (((sword32)((b[47]        ) >>  0)) << 12)
1412           |  (((sword32)((b[48]        ) >>  0)) << 20);
1413     r[14] =  (((sword32)((b[49]        ) >>  0)) <<  0)
1414           |  (((sword32)((b[50]        ) >>  0)) <<  8)
1415           |  (((sword32)((b[51]        ) >>  0)) << 16)
1416           | ((((sword32)((b[52] & 0xf )) >>  0)) << 24);
1417     r[15] =  (((sword32)((b[52]        ) >>  4)) <<  0)
1418           |  (((sword32)((b[53]        ) >>  0)) <<  4)
1419           |  (((sword32)((b[54]        ) >>  0)) << 12)
1420           |  (((sword32)((b[55]        ) >>  0)) << 20);
1421 }
1422 
1423 /* Convert the field element to a byte array from an array of 28-bits.
1424  *
1425  * b  [in]  Byte array.
1426  * a  [in]  Array to encode into.
1427  */
fe448_to_bytes(unsigned char * b,const sword32 * a)1428 void fe448_to_bytes(unsigned char* b, const sword32* a)
1429 {
1430     sword64 t;
1431     /* Mod */
1432     sword32 in0 = a[0];
1433     sword32 in1 = a[1];
1434     sword32 in2 = a[2];
1435     sword32 in3 = a[3];
1436     sword32 in4 = a[4];
1437     sword32 in5 = a[5];
1438     sword32 in6 = a[6];
1439     sword32 in7 = a[7];
1440     sword32 in8 = a[8];
1441     sword32 in9 = a[9];
1442     sword32 in10 = a[10];
1443     sword32 in11 = a[11];
1444     sword32 in12 = a[12];
1445     sword32 in13 = a[13];
1446     sword32 in14 = a[14];
1447     sword32 in15 = a[15];
1448     sword32 o = in15 >> 28;
1449     in15 -= o << 28;
1450     in0 += o;
1451     in8 += o;
1452     o = (in0 + 1) >> 28;
1453     o = (o + in1) >> 28;
1454     o = (o + in2) >> 28;
1455     o = (o + in3) >> 28;
1456     o = (o + in4) >> 28;
1457     o = (o + in5) >> 28;
1458     o = (o + in6) >> 28;
1459     o = (o + in7) >> 28;
1460     o = (o + in8 + 1) >> 28;
1461     o = (o + in9) >> 28;
1462     o = (o + in10) >> 28;
1463     o = (o + in11) >> 28;
1464     o = (o + in12) >> 28;
1465     o = (o + in13) >> 28;
1466     o = (o + in14) >> 28;
1467     o = (o + in15) >> 28;
1468     in0 += o;
1469     in8 += o;
1470     in15 -= o << 28;
1471     o = (in0  >> 28); in1  += o; t = o << 28; in0  -= (sword32)t;
1472     o = (in1  >> 28); in2  += o; t = o << 28; in1  -= (sword32)t;
1473     o = (in2  >> 28); in3  += o; t = o << 28; in2  -= (sword32)t;
1474     o = (in3  >> 28); in4  += o; t = o << 28; in3  -= (sword32)t;
1475     o = (in4  >> 28); in5  += o; t = o << 28; in4  -= (sword32)t;
1476     o = (in5  >> 28); in6  += o; t = o << 28; in5  -= (sword32)t;
1477     o = (in6  >> 28); in7  += o; t = o << 28; in6  -= (sword32)t;
1478     o = (in7  >> 28); in8  += o; t = o << 28; in7  -= (sword32)t;
1479     o = (in8  >> 28); in9  += o; t = o << 28; in8  -= (sword32)t;
1480     o = (in9  >> 28); in10 += o; t = o << 28; in9  -= (sword32)t;
1481     o = (in10 >> 28); in11 += o; t = o << 28; in10 -= (sword32)t;
1482     o = (in11 >> 28); in12 += o; t = o << 28; in11 -= (sword32)t;
1483     o = (in12 >> 28); in13 += o; t = o << 28; in12 -= (sword32)t;
1484     o = (in13 >> 28); in14 += o; t = o << 28; in13 -= (sword32)t;
1485     o = (in14 >> 28); in15 += o; t = o << 28; in14 -= (sword32)t;
1486     o = (in15 >> 28); in0  += o;
1487                     in8  += o; t = o << 28; in15 -= (sword32)t;
1488 
1489     /* Output as bytes */
1490     b[ 0] = (in0  >>  0);
1491     b[ 1] = (in0  >>  8);
1492     b[ 2] = (in0  >> 16);
1493     b[ 3] = (in0  >> 24) + ((in1  >>  0) <<  4);
1494     b[ 4] = (in1  >>  4);
1495     b[ 5] = (in1  >> 12);
1496     b[ 6] = (in1  >> 20);
1497     b[ 7] = (in2  >>  0);
1498     b[ 8] = (in2  >>  8);
1499     b[ 9] = (in2  >> 16);
1500     b[10] = (in2  >> 24) + ((in3  >>  0) <<  4);
1501     b[11] = (in3  >>  4);
1502     b[12] = (in3  >> 12);
1503     b[13] = (in3  >> 20);
1504     b[14] = (in4  >>  0);
1505     b[15] = (in4  >>  8);
1506     b[16] = (in4  >> 16);
1507     b[17] = (in4  >> 24) + ((in5  >>  0) <<  4);
1508     b[18] = (in5  >>  4);
1509     b[19] = (in5  >> 12);
1510     b[20] = (in5  >> 20);
1511     b[21] = (in6  >>  0);
1512     b[22] = (in6  >>  8);
1513     b[23] = (in6  >> 16);
1514     b[24] = (in6  >> 24) + ((in7  >>  0) <<  4);
1515     b[25] = (in7  >>  4);
1516     b[26] = (in7  >> 12);
1517     b[27] = (in7  >> 20);
1518     b[28] = (in8  >>  0);
1519     b[29] = (in8  >>  8);
1520     b[30] = (in8  >> 16);
1521     b[31] = (in8  >> 24) + ((in9  >>  0) <<  4);
1522     b[32] = (in9  >>  4);
1523     b[33] = (in9  >> 12);
1524     b[34] = (in9  >> 20);
1525     b[35] = (in10 >>  0);
1526     b[36] = (in10 >>  8);
1527     b[37] = (in10 >> 16);
1528     b[38] = (in10 >> 24) + ((in11 >>  0) <<  4);
1529     b[39] = (in11 >>  4);
1530     b[40] = (in11 >> 12);
1531     b[41] = (in11 >> 20);
1532     b[42] = (in12 >>  0);
1533     b[43] = (in12 >>  8);
1534     b[44] = (in12 >> 16);
1535     b[45] = (in12 >> 24) + ((in13 >>  0) <<  4);
1536     b[46] = (in13 >>  4);
1537     b[47] = (in13 >> 12);
1538     b[48] = (in13 >> 20);
1539     b[49] = (in14 >>  0);
1540     b[50] = (in14 >>  8);
1541     b[51] = (in14 >> 16);
1542     b[52] = (in14 >> 24) + ((in15 >>  0) <<  4);
1543     b[53] = (in15 >>  4);
1544     b[54] = (in15 >> 12);
1545     b[55] = (in15 >> 20);
1546 }
1547 
1548 /* Set the field element to 0.
1549  *
1550  * a  [in]  Field element.
1551  */
fe448_1(sword32 * a)1552 void fe448_1(sword32* a)
1553 {
1554     a[0] = 1;
1555     a[1] = 0;
1556     a[2] = 0;
1557     a[3] = 0;
1558     a[4] = 0;
1559     a[5] = 0;
1560     a[6] = 0;
1561     a[7] = 0;
1562     a[8] = 0;
1563     a[9] = 0;
1564     a[10] = 0;
1565     a[11] = 0;
1566     a[12] = 0;
1567     a[13] = 0;
1568     a[14] = 0;
1569     a[15] = 0;
1570 }
1571 
1572 /* Set the field element to 0.
1573  *
1574  * a  [in]  Field element.
1575  */
fe448_0(sword32 * a)1576 void fe448_0(sword32* a)
1577 {
1578     a[0] = 0;
1579     a[1] = 0;
1580     a[2] = 0;
1581     a[3] = 0;
1582     a[4] = 0;
1583     a[5] = 0;
1584     a[6] = 0;
1585     a[7] = 0;
1586     a[8] = 0;
1587     a[9] = 0;
1588     a[10] = 0;
1589     a[11] = 0;
1590     a[12] = 0;
1591     a[13] = 0;
1592     a[14] = 0;
1593     a[15] = 0;
1594 }
1595 
1596 /* Copy one field element into another: d = a.
1597  *
1598  * d  [in]  Destination field element.
1599  * a  [in]  Source field element.
1600  */
fe448_copy(sword32 * d,const sword32 * a)1601 void fe448_copy(sword32* d, const sword32* a)
1602 {
1603     d[0] = a[0];
1604     d[1] = a[1];
1605     d[2] = a[2];
1606     d[3] = a[3];
1607     d[4] = a[4];
1608     d[5] = a[5];
1609     d[6] = a[6];
1610     d[7] = a[7];
1611     d[8] = a[8];
1612     d[9] = a[9];
1613     d[10] = a[10];
1614     d[11] = a[11];
1615     d[12] = a[12];
1616     d[13] = a[13];
1617     d[14] = a[14];
1618     d[15] = a[15];
1619 }
1620 
1621 /* Conditionally swap the elements.
1622  * Constant time implementation.
1623  *
1624  * a  [in]  First field element.
1625  * b  [in]  Second field element.
1626  * c  [in]  Swap when 1. Valid values: 0, 1.
1627  */
fe448_cswap(sword32 * a,sword32 * b,int c)1628 static void fe448_cswap(sword32* a, sword32* b, int c)
1629 {
1630     sword32 mask = -(sword32)c;
1631     sword32 t0 = (a[0] ^ b[0]) & mask;
1632     sword32 t1 = (a[1] ^ b[1]) & mask;
1633     sword32 t2 = (a[2] ^ b[2]) & mask;
1634     sword32 t3 = (a[3] ^ b[3]) & mask;
1635     sword32 t4 = (a[4] ^ b[4]) & mask;
1636     sword32 t5 = (a[5] ^ b[5]) & mask;
1637     sword32 t6 = (a[6] ^ b[6]) & mask;
1638     sword32 t7 = (a[7] ^ b[7]) & mask;
1639     sword32 t8 = (a[8] ^ b[8]) & mask;
1640     sword32 t9 = (a[9] ^ b[9]) & mask;
1641     sword32 t10 = (a[10] ^ b[10]) & mask;
1642     sword32 t11 = (a[11] ^ b[11]) & mask;
1643     sword32 t12 = (a[12] ^ b[12]) & mask;
1644     sword32 t13 = (a[13] ^ b[13]) & mask;
1645     sword32 t14 = (a[14] ^ b[14]) & mask;
1646     sword32 t15 = (a[15] ^ b[15]) & mask;
1647     a[0] ^= t0;
1648     a[1] ^= t1;
1649     a[2] ^= t2;
1650     a[3] ^= t3;
1651     a[4] ^= t4;
1652     a[5] ^= t5;
1653     a[6] ^= t6;
1654     a[7] ^= t7;
1655     a[8] ^= t8;
1656     a[9] ^= t9;
1657     a[10] ^= t10;
1658     a[11] ^= t11;
1659     a[12] ^= t12;
1660     a[13] ^= t13;
1661     a[14] ^= t14;
1662     a[15] ^= t15;
1663     b[0] ^= t0;
1664     b[1] ^= t1;
1665     b[2] ^= t2;
1666     b[3] ^= t3;
1667     b[4] ^= t4;
1668     b[5] ^= t5;
1669     b[6] ^= t6;
1670     b[7] ^= t7;
1671     b[8] ^= t8;
1672     b[9] ^= t9;
1673     b[10] ^= t10;
1674     b[11] ^= t11;
1675     b[12] ^= t12;
1676     b[13] ^= t13;
1677     b[14] ^= t14;
1678     b[15] ^= t15;
1679 }
1680 
1681 /* Add two field elements. r = (a + b) mod (2^448 - 2^224 - 1)
1682  *
1683  * r  [in]  Field element to hold sum.
1684  * a  [in]  Field element to add.
1685  * b  [in]  Field element to add.
1686  */
fe448_add(sword32 * r,const sword32 * a,const sword32 * b)1687 void fe448_add(sword32* r, const sword32* a, const sword32* b)
1688 {
1689     r[0] = a[0] + b[0];
1690     r[1] = a[1] + b[1];
1691     r[2] = a[2] + b[2];
1692     r[3] = a[3] + b[3];
1693     r[4] = a[4] + b[4];
1694     r[5] = a[5] + b[5];
1695     r[6] = a[6] + b[6];
1696     r[7] = a[7] + b[7];
1697     r[8] = a[8] + b[8];
1698     r[9] = a[9] + b[9];
1699     r[10] = a[10] + b[10];
1700     r[11] = a[11] + b[11];
1701     r[12] = a[12] + b[12];
1702     r[13] = a[13] + b[13];
1703     r[14] = a[14] + b[14];
1704     r[15] = a[15] + b[15];
1705 }
1706 
1707 /* Subtract a field element from another. r = (a - b) mod (2^448 - 2^224 - 1)
1708  *
1709  * r  [in]  Field element to hold difference.
1710  * a  [in]  Field element to subtract from.
1711  * b  [in]  Field element to subtract.
1712  */
fe448_sub(sword32 * r,const sword32 * a,const sword32 * b)1713 void fe448_sub(sword32* r, const sword32* a, const sword32* b)
1714 {
1715     r[0] = a[0] - b[0];
1716     r[1] = a[1] - b[1];
1717     r[2] = a[2] - b[2];
1718     r[3] = a[3] - b[3];
1719     r[4] = a[4] - b[4];
1720     r[5] = a[5] - b[5];
1721     r[6] = a[6] - b[6];
1722     r[7] = a[7] - b[7];
1723     r[8] = a[8] - b[8];
1724     r[9] = a[9] - b[9];
1725     r[10] = a[10] - b[10];
1726     r[11] = a[11] - b[11];
1727     r[12] = a[12] - b[12];
1728     r[13] = a[13] - b[13];
1729     r[14] = a[14] - b[14];
1730     r[15] = a[15] - b[15];
1731 }
1732 
fe448_reduce(sword32 * a)1733 void fe448_reduce(sword32* a)
1734 {
1735     sword64 o;
1736 
1737     o = a[0 ] >> 28; a[1 ] += (sword32)o; a[0 ] -= (sword32)(o << 28);
1738     o = a[1 ] >> 28; a[2 ] += (sword32)o; a[1 ] -= (sword32)(o << 28);
1739     o = a[2 ] >> 28; a[3 ] += (sword32)o; a[2 ] -= (sword32)(o << 28);
1740     o = a[3 ] >> 28; a[4 ] += (sword32)o; a[3 ] -= (sword32)(o << 28);
1741     o = a[4 ] >> 28; a[5 ] += (sword32)o; a[4 ] -= (sword32)(o << 28);
1742     o = a[5 ] >> 28; a[6 ] += (sword32)o; a[5 ] -= (sword32)(o << 28);
1743     o = a[6 ] >> 28; a[7 ] += (sword32)o; a[6 ] -= (sword32)(o << 28);
1744     o = a[7 ] >> 28; a[8 ] += (sword32)o; a[7 ] -= (sword32)(o << 28);
1745     o = a[8 ] >> 28; a[9 ] += (sword32)o; a[8 ] -= (sword32)(o << 28);
1746     o = a[9 ] >> 28; a[10] += (sword32)o; a[9 ] -= (sword32)(o << 28);
1747     o = a[10] >> 28; a[11] += (sword32)o; a[10] -= (sword32)(o << 28);
1748     o = a[11] >> 28; a[12] += (sword32)o; a[11] -= (sword32)(o << 28);
1749     o = a[12] >> 28; a[13] += (sword32)o; a[12] -= (sword32)(o << 28);
1750     o = a[13] >> 28; a[14] += (sword32)o; a[13] -= (sword32)(o << 28);
1751     o = a[14] >> 28; a[15] += (sword32)o; a[14] -= (sword32)(o << 28);
1752     o = a[15] >> 28; a[0]  += (sword32)o;
1753                      a[8]  += (sword32)o; a[15] -= (sword32)(o << 28);
1754 }
1755 /* Mulitply a field element by 39081. r = (39081 * a) mod (2^448 - 2^224 - 1)
1756  *
1757  * r  [in]  Field element to hold result.
1758  * a  [in]  Field element to multiply.
1759  */
fe448_mul39081(sword32 * r,const sword32 * a)1760 void fe448_mul39081(sword32* r, const sword32* a)
1761 {
1762     sword64 t;
1763     sword32 o;
1764     sword64 t0 = a[0] * (sword64)39081;
1765     sword64 t1 = a[1] * (sword64)39081;
1766     sword64 t2 = a[2] * (sword64)39081;
1767     sword64 t3 = a[3] * (sword64)39081;
1768     sword64 t4 = a[4] * (sword64)39081;
1769     sword64 t5 = a[5] * (sword64)39081;
1770     sword64 t6 = a[6] * (sword64)39081;
1771     sword64 t7 = a[7] * (sword64)39081;
1772     sword64 t8 = a[8] * (sword64)39081;
1773     sword64 t9 = a[9] * (sword64)39081;
1774     sword64 t10 = a[10] * (sword64)39081;
1775     sword64 t11 = a[11] * (sword64)39081;
1776     sword64 t12 = a[12] * (sword64)39081;
1777     sword64 t13 = a[13] * (sword64)39081;
1778     sword64 t14 = a[14] * (sword64)39081;
1779     sword64 t15 = a[15] * (sword64)39081;
1780     o = (sword32)(t0  >> 28); t1  += o; t = (sword64)o << 28; t0  -= t;
1781     o = (sword32)(t1  >> 28); t2  += o; t = (sword64)o << 28; t1  -= t;
1782     o = (sword32)(t2  >> 28); t3  += o; t = (sword64)o << 28; t2  -= t;
1783     o = (sword32)(t3  >> 28); t4  += o; t = (sword64)o << 28; t3  -= t;
1784     o = (sword32)(t4  >> 28); t5  += o; t = (sword64)o << 28; t4  -= t;
1785     o = (sword32)(t5  >> 28); t6  += o; t = (sword64)o << 28; t5  -= t;
1786     o = (sword32)(t6  >> 28); t7  += o; t = (sword64)o << 28; t6  -= t;
1787     o = (sword32)(t7  >> 28); t8  += o; t = (sword64)o << 28; t7  -= t;
1788     o = (sword32)(t8  >> 28); t9  += o; t = (sword64)o << 28; t8  -= t;
1789     o = (sword32)(t9  >> 28); t10 += o; t = (sword64)o << 28; t9  -= t;
1790     o = (sword32)(t10 >> 28); t11 += o; t = (sword64)o << 28; t10 -= t;
1791     o = (sword32)(t11 >> 28); t12 += o; t = (sword64)o << 28; t11 -= t;
1792     o = (sword32)(t12 >> 28); t13 += o; t = (sword64)o << 28; t12 -= t;
1793     o = (sword32)(t13 >> 28); t14 += o; t = (sword64)o << 28; t13 -= t;
1794     o = (sword32)(t14 >> 28); t15 += o; t = (sword64)o << 28; t14 -= t;
1795     o = (sword32)(t15 >> 28); t0  += o;
1796                    t8  += o; t = (sword64)o << 28; t15 -= t;
1797 
1798     /* Store */
1799     r[0] = (sword32)t0;
1800     r[1] = (sword32)t1;
1801     r[2] = (sword32)t2;
1802     r[3] = (sword32)t3;
1803     r[4] = (sword32)t4;
1804     r[5] = (sword32)t5;
1805     r[6] = (sword32)t6;
1806     r[7] = (sword32)t7;
1807     r[8] = (sword32)t8;
1808     r[9] = (sword32)t9;
1809     r[10] = (sword32)t10;
1810     r[11] = (sword32)t11;
1811     r[12] = (sword32)t12;
1812     r[13] = (sword32)t13;
1813     r[14] = (sword32)t14;
1814     r[15] = (sword32)t15;
1815 }
1816 
1817 /* Mulitply two field elements. r = a * b
1818  *
1819  * r  [in]  Field element to hold result.
1820  * a  [in]  Field element to multiply.
1821  * b  [in]  Field element to multiply.
1822  */
fe448_mul_8(sword32 * r,const sword32 * a,const sword32 * b)1823 static WC_INLINE void fe448_mul_8(sword32* r, const sword32* a, const sword32* b)
1824 {
1825     sword64 t;
1826     sword64 t0   = (sword64)a[ 0] * b[ 0];
1827     sword64 t1   = (sword64)a[ 0] * b[ 1];
1828     sword64 t101 = (sword64)a[ 1] * b[ 0];
1829     sword64 t2   = (sword64)a[ 0] * b[ 2];
1830     sword64 t102 = (sword64)a[ 1] * b[ 1];
1831     sword64 t202 = (sword64)a[ 2] * b[ 0];
1832     sword64 t3   = (sword64)a[ 0] * b[ 3];
1833     sword64 t103 = (sword64)a[ 1] * b[ 2];
1834     sword64 t203 = (sword64)a[ 2] * b[ 1];
1835     sword64 t303 = (sword64)a[ 3] * b[ 0];
1836     sword64 t4   = (sword64)a[ 0] * b[ 4];
1837     sword64 t104 = (sword64)a[ 1] * b[ 3];
1838     sword64 t204 = (sword64)a[ 2] * b[ 2];
1839     sword64 t304 = (sword64)a[ 3] * b[ 1];
1840     sword64 t404 = (sword64)a[ 4] * b[ 0];
1841     sword64 t5   = (sword64)a[ 0] * b[ 5];
1842     sword64 t105 = (sword64)a[ 1] * b[ 4];
1843     sword64 t205 = (sword64)a[ 2] * b[ 3];
1844     sword64 t305 = (sword64)a[ 3] * b[ 2];
1845     sword64 t405 = (sword64)a[ 4] * b[ 1];
1846     sword64 t505 = (sword64)a[ 5] * b[ 0];
1847     sword64 t6   = (sword64)a[ 0] * b[ 6];
1848     sword64 t106 = (sword64)a[ 1] * b[ 5];
1849     sword64 t206 = (sword64)a[ 2] * b[ 4];
1850     sword64 t306 = (sword64)a[ 3] * b[ 3];
1851     sword64 t406 = (sword64)a[ 4] * b[ 2];
1852     sword64 t506 = (sword64)a[ 5] * b[ 1];
1853     sword64 t606 = (sword64)a[ 6] * b[ 0];
1854     sword64 t7   = (sword64)a[ 0] * b[ 7];
1855     sword64 t107 = (sword64)a[ 1] * b[ 6];
1856     sword64 t207 = (sword64)a[ 2] * b[ 5];
1857     sword64 t307 = (sword64)a[ 3] * b[ 4];
1858     sword64 t407 = (sword64)a[ 4] * b[ 3];
1859     sword64 t507 = (sword64)a[ 5] * b[ 2];
1860     sword64 t607 = (sword64)a[ 6] * b[ 1];
1861     sword64 t707 = (sword64)a[ 7] * b[ 0];
1862     sword64 t8   = (sword64)a[ 1] * b[ 7];
1863     sword64 t108 = (sword64)a[ 2] * b[ 6];
1864     sword64 t208 = (sword64)a[ 3] * b[ 5];
1865     sword64 t308 = (sword64)a[ 4] * b[ 4];
1866     sword64 t408 = (sword64)a[ 5] * b[ 3];
1867     sword64 t508 = (sword64)a[ 6] * b[ 2];
1868     sword64 t608 = (sword64)a[ 7] * b[ 1];
1869     sword64 t9   = (sword64)a[ 2] * b[ 7];
1870     sword64 t109 = (sword64)a[ 3] * b[ 6];
1871     sword64 t209 = (sword64)a[ 4] * b[ 5];
1872     sword64 t309 = (sword64)a[ 5] * b[ 4];
1873     sword64 t409 = (sword64)a[ 6] * b[ 3];
1874     sword64 t509 = (sword64)a[ 7] * b[ 2];
1875     sword64 t10  = (sword64)a[ 3] * b[ 7];
1876     sword64 t110 = (sword64)a[ 4] * b[ 6];
1877     sword64 t210 = (sword64)a[ 5] * b[ 5];
1878     sword64 t310 = (sword64)a[ 6] * b[ 4];
1879     sword64 t410 = (sword64)a[ 7] * b[ 3];
1880     sword64 t11  = (sword64)a[ 4] * b[ 7];
1881     sword64 t111 = (sword64)a[ 5] * b[ 6];
1882     sword64 t211 = (sword64)a[ 6] * b[ 5];
1883     sword64 t311 = (sword64)a[ 7] * b[ 4];
1884     sword64 t12  = (sword64)a[ 5] * b[ 7];
1885     sword64 t112 = (sword64)a[ 6] * b[ 6];
1886     sword64 t212 = (sword64)a[ 7] * b[ 5];
1887     sword64 t13  = (sword64)a[ 6] * b[ 7];
1888     sword64 t113 = (sword64)a[ 7] * b[ 6];
1889     sword64 t14  = (sword64)a[ 7] * b[ 7];
1890     t1  += t101;
1891     t2  += t102; t2  += t202;
1892     t3  += t103; t3  += t203; t3  += t303;
1893     t4  += t104; t4  += t204; t4  += t304; t4  += t404;
1894     t5  += t105; t5  += t205; t5  += t305; t5  += t405; t5  += t505;
1895     t6  += t106; t6  += t206; t6  += t306; t6  += t406; t6  += t506;
1896     t6  += t606;
1897     t7  += t107; t7  += t207; t7  += t307; t7  += t407; t7  += t507;
1898     t7  += t607;
1899     t7  += t707;
1900     t8  += t108; t8  += t208; t8  += t308; t8  += t408; t8  += t508;
1901     t8  += t608;
1902     t9  += t109; t9  += t209; t9  += t309; t9  += t409; t9  += t509;
1903     t10 += t110; t10 += t210; t10 += t310; t10 += t410;
1904     t11 += t111; t11 += t211; t11 += t311;
1905     t12 += t112; t12 += t212;
1906     t13 += t113;
1907     sword64 o = t14 >> 28;
1908     sword64 t15 = o;
1909     t14 -= o << 28;
1910     o = (t0  >> 28); t1  += o; t = o << 28; t0  -= t;
1911     o = (t1  >> 28); t2  += o; t = o << 28; t1  -= t;
1912     o = (t2  >> 28); t3  += o; t = o << 28; t2  -= t;
1913     o = (t3  >> 28); t4  += o; t = o << 28; t3  -= t;
1914     o = (t4  >> 28); t5  += o; t = o << 28; t4  -= t;
1915     o = (t5  >> 28); t6  += o; t = o << 28; t5  -= t;
1916     o = (t6  >> 28); t7  += o; t = o << 28; t6  -= t;
1917     o = (t7  >> 28); t8  += o; t = o << 28; t7  -= t;
1918     o = (t8  >> 28); t9  += o; t = o << 28; t8  -= t;
1919     o = (t9  >> 28); t10 += o; t = o << 28; t9  -= t;
1920     o = (t10 >> 28); t11 += o; t = o << 28; t10 -= t;
1921     o = (t11 >> 28); t12 += o; t = o << 28; t11 -= t;
1922     o = (t12 >> 28); t13 += o; t = o << 28; t12 -= t;
1923     o = (t13 >> 28); t14 += o; t = o << 28; t13 -= t;
1924     o = (t14 >> 28); t15 += o; t = o << 28; t14 -= t;
1925     o = (t15 >> 28); t0  += o;
1926                    t8  += o; t = o << 28; t15 -= t;
1927 
1928     /* Store */
1929     r[0] = (sword32)t0;
1930     r[1] = (sword32)t1;
1931     r[2] = (sword32)t2;
1932     r[3] = (sword32)t3;
1933     r[4] = (sword32)t4;
1934     r[5] = (sword32)t5;
1935     r[6] = (sword32)t6;
1936     r[7] = (sword32)t7;
1937     r[8] = (sword32)t8;
1938     r[9] = (sword32)t9;
1939     r[10] = (sword32)t10;
1940     r[11] = (sword32)t11;
1941     r[12] = (sword32)t12;
1942     r[13] = (sword32)t13;
1943     r[14] = (sword32)t14;
1944     r[15] = (sword32)t15;
1945 }
1946 
1947 /* Mulitply two field elements. r = (a * b) mod (2^448 - 2^224 - 1)
1948  *
1949  * r  [in]  Field element to hold result.
1950  * a  [in]  Field element to multiply.
1951  * b  [in]  Field element to multiply.
1952  */
fe448_mul(sword32 * r,const sword32 * a,const sword32 * b)1953 void fe448_mul(sword32* r, const sword32* a, const sword32* b)
1954 {
1955     sword32 r0[16];
1956     sword32 r1[16];
1957     sword32* a1 = r1;
1958     sword32 b1[8];
1959     sword32 r2[16];
1960     a1[0] = a[0] + a[8];
1961     a1[1] = a[1] + a[9];
1962     a1[2] = a[2] + a[10];
1963     a1[3] = a[3] + a[11];
1964     a1[4] = a[4] + a[12];
1965     a1[5] = a[5] + a[13];
1966     a1[6] = a[6] + a[14];
1967     a1[7] = a[7] + a[15];
1968     b1[0] = b[0] + b[8];
1969     b1[1] = b[1] + b[9];
1970     b1[2] = b[2] + b[10];
1971     b1[3] = b[3] + b[11];
1972     b1[4] = b[4] + b[12];
1973     b1[5] = b[5] + b[13];
1974     b1[6] = b[6] + b[14];
1975     b1[7] = b[7] + b[15];
1976     fe448_mul_8(r2, a + 8, b + 8);
1977     fe448_mul_8(r0, a, b);
1978     fe448_mul_8(r1, a1, b1);
1979     r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
1980     r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
1981     r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
1982     r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
1983     r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
1984     r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
1985     r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
1986     r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
1987     r[ 8] = r2[ 8]          + r1[ 0] - r0[ 0] + r1[ 8];
1988     r[ 9] = r2[ 9]          + r1[ 1] - r0[ 1] + r1[ 9];
1989     r[10] = r2[10]          + r1[ 2] - r0[ 2] + r1[10];
1990     r[11] = r2[11]          + r1[ 3] - r0[ 3] + r1[11];
1991     r[12] = r2[12]          + r1[ 4] - r0[ 4] + r1[12];
1992     r[13] = r2[13]          + r1[ 5] - r0[ 5] + r1[13];
1993     r[14] = r2[14]          + r1[ 6] - r0[ 6] + r1[14];
1994     r[15] = r2[15]          + r1[ 7] - r0[ 7] + r1[15];
1995 }
1996 
1997 /* Square a field element. r = a * a
1998  *
1999  * r  [in]  Field element to hold result.
2000  * a  [in]  Field element to square.
2001  */
fe448_sqr_8(sword32 * r,const sword32 * a)2002 static WC_INLINE void fe448_sqr_8(sword32* r, const sword32* a)
2003 {
2004     sword64 t;
2005     sword64 t0   =     (sword64)a[ 0] * a[ 0];
2006     sword64 t1   = 2 * (sword64)a[ 0] * a[ 1];
2007     sword64 t2   = 2 * (sword64)a[ 0] * a[ 2];
2008     sword64 t102 =     (sword64)a[ 1] * a[ 1];
2009     sword64 t3   = 2 * (sword64)a[ 0] * a[ 3];
2010     sword64 t103 = 2 * (sword64)a[ 1] * a[ 2];
2011     sword64 t4   = 2 * (sword64)a[ 0] * a[ 4];
2012     sword64 t104 = 2 * (sword64)a[ 1] * a[ 3];
2013     sword64 t204 =     (sword64)a[ 2] * a[ 2];
2014     sword64 t5   = 2 * (sword64)a[ 0] * a[ 5];
2015     sword64 t105 = 2 * (sword64)a[ 1] * a[ 4];
2016     sword64 t205 = 2 * (sword64)a[ 2] * a[ 3];
2017     sword64 t6   = 2 * (sword64)a[ 0] * a[ 6];
2018     sword64 t106 = 2 * (sword64)a[ 1] * a[ 5];
2019     sword64 t206 = 2 * (sword64)a[ 2] * a[ 4];
2020     sword64 t306 =     (sword64)a[ 3] * a[ 3];
2021     sword64 t7   = 2 * (sword64)a[ 0] * a[ 7];
2022     sword64 t107 = 2 * (sword64)a[ 1] * a[ 6];
2023     sword64 t207 = 2 * (sword64)a[ 2] * a[ 5];
2024     sword64 t307 = 2 * (sword64)a[ 3] * a[ 4];
2025     sword64 t8   = 2 * (sword64)a[ 1] * a[ 7];
2026     sword64 t108 = 2 * (sword64)a[ 2] * a[ 6];
2027     sword64 t208 = 2 * (sword64)a[ 3] * a[ 5];
2028     sword64 t308 =     (sword64)a[ 4] * a[ 4];
2029     sword64 t9   = 2 * (sword64)a[ 2] * a[ 7];
2030     sword64 t109 = 2 * (sword64)a[ 3] * a[ 6];
2031     sword64 t209 = 2 * (sword64)a[ 4] * a[ 5];
2032     sword64 t10  = 2 * (sword64)a[ 3] * a[ 7];
2033     sword64 t110 = 2 * (sword64)a[ 4] * a[ 6];
2034     sword64 t210 =     (sword64)a[ 5] * a[ 5];
2035     sword64 t11  = 2 * (sword64)a[ 4] * a[ 7];
2036     sword64 t111 = 2 * (sword64)a[ 5] * a[ 6];
2037     sword64 t12  = 2 * (sword64)a[ 5] * a[ 7];
2038     sword64 t112 =     (sword64)a[ 6] * a[ 6];
2039     sword64 t13  = 2 * (sword64)a[ 6] * a[ 7];
2040     sword64 t14  =     (sword64)a[ 7] * a[ 7];
2041     t2  += t102;
2042     t3  += t103;
2043     t4  += t104; t4  += t204;
2044     t5  += t105; t5  += t205;
2045     t6  += t106; t6  += t206; t6  += t306;
2046     t7  += t107; t7  += t207; t7  += t307;
2047     t8  += t108; t8  += t208; t8  += t308;
2048     t9  += t109; t9  += t209;
2049     t10 += t110; t10 += t210;
2050     t11 += t111;
2051     t12 += t112;
2052     sword64 o = t14 >> 28;
2053     sword64 t15 = o;
2054     t14 -= o << 28;
2055     o = (t0  >> 28); t1  += o; t = o << 28; t0  -= t;
2056     o = (t1  >> 28); t2  += o; t = o << 28; t1  -= t;
2057     o = (t2  >> 28); t3  += o; t = o << 28; t2  -= t;
2058     o = (t3  >> 28); t4  += o; t = o << 28; t3  -= t;
2059     o = (t4  >> 28); t5  += o; t = o << 28; t4  -= t;
2060     o = (t5  >> 28); t6  += o; t = o << 28; t5  -= t;
2061     o = (t6  >> 28); t7  += o; t = o << 28; t6  -= t;
2062     o = (t7  >> 28); t8  += o; t = o << 28; t7  -= t;
2063     o = (t8  >> 28); t9  += o; t = o << 28; t8  -= t;
2064     o = (t9  >> 28); t10 += o; t = o << 28; t9  -= t;
2065     o = (t10 >> 28); t11 += o; t = o << 28; t10 -= t;
2066     o = (t11 >> 28); t12 += o; t = o << 28; t11 -= t;
2067     o = (t12 >> 28); t13 += o; t = o << 28; t12 -= t;
2068     o = (t13 >> 28); t14 += o; t = o << 28; t13 -= t;
2069     o = (t14 >> 28); t15 += o; t = o << 28; t14 -= t;
2070     o = (t15 >> 28); t0  += o;
2071                    t8  += o; t = o << 28; t15 -= t;
2072 
2073     /* Store */
2074     r[0] = (sword32)t0;
2075     r[1] = (sword32)t1;
2076     r[2] = (sword32)t2;
2077     r[3] = (sword32)t3;
2078     r[4] = (sword32)t4;
2079     r[5] = (sword32)t5;
2080     r[6] = (sword32)t6;
2081     r[7] = (sword32)t7;
2082     r[8] = (sword32)t8;
2083     r[9] = (sword32)t9;
2084     r[10] = (sword32)t10;
2085     r[11] = (sword32)t11;
2086     r[12] = (sword32)t12;
2087     r[13] = (sword32)t13;
2088     r[14] = (sword32)t14;
2089     r[15] = (sword32)t15;
2090 }
2091 
2092 /* Square a field element. r = (a * a) mod (2^448 - 2^224 - 1)
2093  *
2094  * r  [in]  Field element to hold result.
2095  * a  [in]  Field element to square.
2096  */
fe448_sqr(sword32 * r,const sword32 * a)2097 void fe448_sqr(sword32* r, const sword32* a)
2098 {
2099     sword32 r0[16];
2100     sword32 r1[16];
2101     sword32* a1 = r1;
2102     sword32 r2[16];
2103     a1[0] = a[0] + a[8];
2104     a1[1] = a[1] + a[9];
2105     a1[2] = a[2] + a[10];
2106     a1[3] = a[3] + a[11];
2107     a1[4] = a[4] + a[12];
2108     a1[5] = a[5] + a[13];
2109     a1[6] = a[6] + a[14];
2110     a1[7] = a[7] + a[15];
2111     fe448_sqr_8(r2, a + 8);
2112     fe448_sqr_8(r0, a);
2113     fe448_sqr_8(r1, a1);
2114     r[ 0] = r0[ 0] + r2[ 0] + r1[ 8] - r0[ 8];
2115     r[ 1] = r0[ 1] + r2[ 1] + r1[ 9] - r0[ 9];
2116     r[ 2] = r0[ 2] + r2[ 2] + r1[10] - r0[10];
2117     r[ 3] = r0[ 3] + r2[ 3] + r1[11] - r0[11];
2118     r[ 4] = r0[ 4] + r2[ 4] + r1[12] - r0[12];
2119     r[ 5] = r0[ 5] + r2[ 5] + r1[13] - r0[13];
2120     r[ 6] = r0[ 6] + r2[ 6] + r1[14] - r0[14];
2121     r[ 7] = r0[ 7] + r2[ 7] + r1[15] - r0[15];
2122     r[ 8] = r2[ 8]          + r1[ 0] - r0[ 0] + r1[ 8];
2123     r[ 9] = r2[ 9]          + r1[ 1] - r0[ 1] + r1[ 9];
2124     r[10] = r2[10]          + r1[ 2] - r0[ 2] + r1[10];
2125     r[11] = r2[11]          + r1[ 3] - r0[ 3] + r1[11];
2126     r[12] = r2[12]          + r1[ 4] - r0[ 4] + r1[12];
2127     r[13] = r2[13]          + r1[ 5] - r0[ 5] + r1[13];
2128     r[14] = r2[14]          + r1[ 6] - r0[ 6] + r1[14];
2129     r[15] = r2[15]          + r1[ 7] - r0[ 7] + r1[15];
2130 }
2131 
2132 /* Invert the field element. (r * a) mod (2^448 - 2^224 - 1) = 1
2133  * Constant time implementation - using Fermat's little theorem:
2134  *   a^(p-1) mod p = 1 => a^(p-2) mod p = 1/a
2135  * For curve448: p - 2 = 2^448 - 2^224 - 3
2136  *
2137  * r  [in]  Field element to hold result.
2138  * a  [in]  Field element to invert.
2139  */
fe448_invert(sword32 * r,const sword32 * a)2140 void fe448_invert(sword32* r, const sword32* a)
2141 {
2142     sword32 t1[16];
2143     sword32 t2[16];
2144     sword32 t3[16];
2145     sword32 t4[16];
2146     int i;
2147 
2148     fe448_sqr(t1, a);
2149     /* t1 = 2 */
2150     fe448_mul(t1, t1, a);
2151     /* t1 = 3 */
2152     fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2153     /* t2 = c */
2154     fe448_mul(t3, t2, a);
2155     /* t3 = d */
2156     fe448_mul(t1, t2, t1);
2157     /* t1 = f */
2158     fe448_sqr(t2, t1);
2159     /* t2 = 1e */
2160     fe448_mul(t4, t2, a);
2161     /* t4 = 1f */
2162     fe448_sqr(t2, t4); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2163     /* t2 = 3e0 */
2164     fe448_mul(t1, t2, t4);
2165     /* t1 = 3ff */
2166     fe448_sqr(t2, t1); for (i = 1; i < 10; ++i) fe448_sqr(t2, t2);
2167     /* t2 = ffc00 */
2168     fe448_mul(t1, t2, t1);
2169     /* t1 = fffff */
2170     fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2171     /* t2 = 1ffffe0 */
2172     fe448_mul(t1, t2, t4);
2173     /* t1 = 1ffffff */
2174     fe448_sqr(t2, t1); for (i = 1; i < 25; ++i) fe448_sqr(t2, t2);
2175     /* t2 = 3fffffe000000 */
2176     fe448_mul(t1, t2, t1);
2177     /* t1 = 3ffffffffffff */
2178     fe448_sqr(t2, t1); for (i = 1; i < 5; ++i) fe448_sqr(t2, t2);
2179     /* t2 = 7fffffffffffe0 */
2180     fe448_mul(t1, t2, t4);
2181     /* t1 = 7fffffffffffff */
2182     fe448_sqr(t2, t1); for (i = 1; i < 55; ++i) fe448_sqr(t2, t2);
2183     /* t2 = 3fffffffffffff80000000000000 */
2184     fe448_mul(t1, t2, t1);
2185     /* t1 = 3fffffffffffffffffffffffffff */
2186     fe448_sqr(t2, t1); for (i = 1; i < 110; ++i) fe448_sqr(t2, t2);
2187     /* t2 = fffffffffffffffffffffffffffc000000000000000000000000000 */
2188     fe448_mul(t1, t2, t1);
2189     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2190     fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2191     /* t2 = fffffffffffffffffffffffffffffffffffffffffffffffffffffff0 */
2192     fe448_mul(t3, t3, t2);
2193     /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2194     fe448_mul(t1, t3, a);
2195     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2196     fe448_sqr(t1, t1); for (i = 1; i < 224; ++i) fe448_sqr(t1, t1);
2197     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000000000000000000000000000000000000000000000000000 */
2198     fe448_mul(r, t3, t1);
2199     /* r = fffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd */
2200 }
2201 
2202 /* Scalar multiply the point by a number. r = n.a
2203  * Uses Montgomery ladder and only requires the x-ordinate.
2204  *
2205  * r  [in]  Field element to hold result.
2206  * n  [in]  Scalar as an array of bytes.
2207  * a  [in]  Point to multiply - x-ordinate only.
2208  */
curve448(byte * r,const byte * n,const byte * a)2209 int curve448(byte* r, const byte* n, const byte* a)
2210 {
2211     sword32 x1[16];
2212     sword32 x2[16];
2213     sword32 z2[16];
2214     sword32 x3[16];
2215     sword32 z3[16];
2216     sword32 t0[16];
2217     sword32 t1[16];
2218     int i;
2219     unsigned int swap;
2220     unsigned int b;
2221 
2222     fe448_from_bytes(x1, a);
2223     fe448_1(x2);
2224     fe448_0(z2);
2225     fe448_copy(x3, x1);
2226     fe448_1(z3);
2227 
2228     swap = 0;
2229     for (i = 447; i >= 0; --i) {
2230         b = (n[i >> 3] >> (i & 7)) & 1;
2231         swap ^= b;
2232         fe448_cswap(x2, x3, swap);
2233         fe448_cswap(z2, z3, swap);
2234         swap = b;
2235 
2236         /* Montgomery Ladder - double and add */
2237         fe448_add(t0, x2, z2);
2238         fe448_reduce(t0);
2239         fe448_add(t1, x3, z3);
2240         fe448_reduce(t1);
2241         fe448_sub(x2, x2, z2);
2242         fe448_sub(x3, x3, z3);
2243         fe448_mul(t1, t1, x2);
2244         fe448_mul(z3, x3, t0);
2245         fe448_sqr(t0, t0);
2246         fe448_sqr(x2, x2);
2247         fe448_add(x3, z3, t1);
2248         fe448_reduce(x3);
2249         fe448_sqr(x3, x3);
2250         fe448_sub(z3, z3, t1);
2251         fe448_sqr(z3, z3);
2252         fe448_mul(z3, z3, x1);
2253         fe448_sub(t1, t0, x2);
2254         fe448_mul(x2, t0, x2);
2255         fe448_mul39081(z2, t1);
2256         fe448_add(z2, t0, z2);
2257         fe448_mul(z2, z2, t1);
2258     }
2259     /* Last two bits are 0 - no final swap check required. */
2260 
2261     fe448_invert(z2, z2);
2262     fe448_mul(x2, x2, z2);
2263     fe448_to_bytes(r, x2);
2264 
2265     return 0;
2266 }
2267 
2268 #ifdef HAVE_ED448
2269 /* Check whether field element is not 0.
2270  * Must convert to a normalized form before checking.
2271  *
2272  * a  [in]  Field element.
2273  * returns 0 when zero, and any other value otherwise.
2274  */
fe448_isnonzero(const sword32 * a)2275 int fe448_isnonzero(const sword32* a)
2276 {
2277     byte b[56];
2278     int i;
2279     byte c = 0;
2280     fe448_to_bytes(b, a);
2281     for (i = 0; i < 56; i++)
2282         c |= b[i];
2283     return c;
2284 }
2285 
2286 /* Check whether field element is negative.
2287  * Must convert to a normalized form before checking.
2288  *
2289  * a  [in]  Field element.
2290  * returns 1 when negative, and 0 otherwise.
2291  */
fe448_isnegative(const sword32 * a)2292 int fe448_isnegative(const sword32* a)
2293 {
2294     byte b[56];
2295     fe448_to_bytes(b, a);
2296     return b[0] & 1;
2297 }
2298 
2299 /* Negates the field element. r = -a
2300  *
2301  * r  [in]  Field element to hold result.
2302  * a  [in]  Field element.
2303  */
fe448_neg(sword32 * r,const sword32 * a)2304 void fe448_neg(sword32* r, const sword32* a)
2305 {
2306     r[0] = -a[0];
2307     r[1] = -a[1];
2308     r[2] = -a[2];
2309     r[3] = -a[3];
2310     r[4] = -a[4];
2311     r[5] = -a[5];
2312     r[6] = -a[6];
2313     r[7] = -a[7];
2314     r[8] = -a[8];
2315     r[9] = -a[9];
2316     r[10] = -a[10];
2317     r[11] = -a[11];
2318     r[12] = -a[12];
2319     r[13] = -a[13];
2320     r[14] = -a[14];
2321     r[15] = -a[15];
2322 }
2323 
2324 /* Raise field element to (p-3) / 4: 2^446 - 2^222 - 1
2325  * Used for calcualting y-ordinate from x-ordinate for Ed448.
2326  *
2327  * r  [in]  Field element to hold result.
2328  * a  [in]  Field element to exponentiate.
2329  */
fe448_pow_2_446_222_1(sword32 * r,const sword32 * a)2330 void fe448_pow_2_446_222_1(sword32* r, const sword32* a)
2331 {
2332     sword32 t1[16];
2333     sword32 t2[16];
2334     sword32 t3[16];
2335     sword32 t4[16];
2336     sword32 t5[16];
2337     int i;
2338 
2339     fe448_sqr(t3, a);
2340     /* t3 = 2 */
2341     fe448_mul(t1, t3, a);
2342     /* t1 = 3 */
2343     fe448_sqr(t5, t1);
2344     /* t5 = 6 */
2345     fe448_mul(t5, t5, a);
2346     /* t5 = 7 */
2347     fe448_sqr(t2, t1); for (i = 1; i < 2; ++i) fe448_sqr(t2, t2);
2348     /* t2 = c */
2349     fe448_mul(t3, t2, t3);
2350     /* t3 = e */
2351     fe448_mul(t1, t2, t1);
2352     /* t1 = f */
2353     fe448_sqr(t2, t1); for (i = 1; i < 3; ++i) fe448_sqr(t2, t2);
2354     /* t2 = 78 */
2355     fe448_mul(t5, t2, t5);
2356     /* t5 = 7f */
2357     fe448_sqr(t2, t1); for (i = 1; i < 4; ++i) fe448_sqr(t2, t2);
2358     /* t2 = f0 */
2359     fe448_mul(t1, t2, t1);
2360     /* t1 = ff */
2361     fe448_mul(t3, t3, t2);
2362     /* t3 = fe */
2363     fe448_sqr(t2, t1); for (i = 1; i < 7; ++i) fe448_sqr(t2, t2);
2364     /* t2 = 7f80 */
2365     fe448_mul(t5, t2, t5);
2366     /* t5 = 7fff */
2367     fe448_sqr(t2, t1); for (i = 1; i < 8; ++i) fe448_sqr(t2, t2);
2368     /* t2 = ff00 */
2369     fe448_mul(t1, t2, t1);
2370     /* t1 = ffff */
2371     fe448_mul(t3, t3, t2);
2372     /* t3 = fffe */
2373     fe448_sqr(t2, t5); for (i = 1; i < 15; ++i) fe448_sqr(t2, t2);
2374     /* t2 = 3fff8000 */
2375     fe448_mul(t5, t2, t5);
2376     /* t5 = 3fffffff */
2377     fe448_sqr(t2, t1); for (i = 1; i < 16; ++i) fe448_sqr(t2, t2);
2378     /* t2 = ffff0000 */
2379     fe448_mul(t1, t2, t1);
2380     /* t1 = ffffffff */
2381     fe448_mul(t3, t3, t2);
2382     /* t3 = fffffffe */
2383     fe448_sqr(t2, t1); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2384     /* t2 = ffffffff00000000 */
2385     fe448_mul(t2, t2, t1);
2386     /* t2 = ffffffffffffffff */
2387     fe448_sqr(t1, t2); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2388     /* t1 = ffffffffffffffff0000000000000000 */
2389     fe448_mul(t1, t1, t2);
2390     /* t1 = ffffffffffffffffffffffffffffffff */
2391     fe448_sqr(t1, t1); for (i = 1; i < 64; ++i) fe448_sqr(t1, t1);
2392     /* t1 = ffffffffffffffffffffffffffffffff0000000000000000 */
2393     fe448_mul(t4, t1, t2);
2394     /* t4 = ffffffffffffffffffffffffffffffffffffffffffffffff */
2395     fe448_sqr(t2, t4); for (i = 1; i < 32; ++i) fe448_sqr(t2, t2);
2396     /* t2 = ffffffffffffffffffffffffffffffffffffffffffffffff00000000 */
2397     fe448_mul(t3, t3, t2);
2398     /* t3 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe */
2399     fe448_sqr(t1, t3); for (i = 1; i < 192; ++i) fe448_sqr(t1, t1);
2400     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffe000000000000000000000000000000000000000000000000 */
2401     fe448_mul(t1, t1, t4);
2402     /* t1 = fffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffffffffffffffffffffffffffffffffffffffffffff */
2403     fe448_sqr(t1, t1); for (i = 1; i < 30; ++i) fe448_sqr(t1, t1);
2404     /* t1 = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffc0000000 */
2405     fe448_mul(r, t5, t1);
2406     /* r = 3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff */
2407 }
2408 
2409 /* Constant time, conditional move of b into a.
2410  * a is not changed if the condition is 0.
2411  *
2412  * a  A field element.
2413  * b  A field element.
2414  * c  If 1 then copy and if 0 then don't copy.
2415  */
fe448_cmov(sword32 * a,const sword32 * b,int c)2416 void fe448_cmov(sword32* a, const sword32* b, int c)
2417 {
2418     sword32 m = -(sword32)c;
2419     sword32 t0 = m & (a[0] ^ b[0]);
2420     sword32 t1 = m & (a[1] ^ b[1]);
2421     sword32 t2 = m & (a[2] ^ b[2]);
2422     sword32 t3 = m & (a[3] ^ b[3]);
2423     sword32 t4 = m & (a[4] ^ b[4]);
2424     sword32 t5 = m & (a[5] ^ b[5]);
2425     sword32 t6 = m & (a[6] ^ b[6]);
2426     sword32 t7 = m & (a[7] ^ b[7]);
2427     sword32 t8 = m & (a[8] ^ b[8]);
2428     sword32 t9 = m & (a[9] ^ b[9]);
2429     sword32 t10 = m & (a[10] ^ b[10]);
2430     sword32 t11 = m & (a[11] ^ b[11]);
2431     sword32 t12 = m & (a[12] ^ b[12]);
2432     sword32 t13 = m & (a[13] ^ b[13]);
2433     sword32 t14 = m & (a[14] ^ b[14]);
2434     sword32 t15 = m & (a[15] ^ b[15]);
2435 
2436     a[0] ^= t0;
2437     a[1] ^= t1;
2438     a[2] ^= t2;
2439     a[3] ^= t3;
2440     a[4] ^= t4;
2441     a[5] ^= t5;
2442     a[6] ^= t6;
2443     a[7] ^= t7;
2444     a[8] ^= t8;
2445     a[9] ^= t9;
2446     a[10] ^= t10;
2447     a[11] ^= t11;
2448     a[12] ^= t12;
2449     a[13] ^= t13;
2450     a[14] ^= t14;
2451     a[15] ^= t15;
2452 }
2453 
2454 #endif /* HAVE_ED448 */
2455 #endif
2456 
2457 #endif /* HAVE_CURVE448 || HAVE_ED448 */
2458