1 // The MIT License (MIT)
2 //
3 // Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
4 //
5 // Permission is hereby granted, free of charge, to any person obtaining a copy
6 // of this software and associated documentation files (the "Software"), to deal
7 // in the Software without restriction, including without limitation the rights
8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the Software is
10 // furnished to do so, subject to the following conditions:
11 //
12 // The above copyright notice and this permission notice shall be included in all
13 // copies or substantial portions of the Software.
14 //
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 // SOFTWARE.
22 
23 /*
24  * Derived from machine-generated code via Fiat-Crypto:
25  * https://github.com/mit-plv/fiat-crypto and https://github.com/briansmith/ring
26  *
27  * The below captures notable changes:
28  *
29  *  1. Convert custom integer types to stdint.h types
30  */
31 
32 #ifdef FREEBL_NO_DEPEND
33 #include "../stubs.h"
34 #endif
35 
36 #include "ecl-priv.h"
37 
38 /* fe means field element. Here the field is \Z/(2^255-19). An element t,
39  * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
40  * t[3]+2^102 t[4]+...+2^230 t[9].
41  * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
42  * Multiplication and carrying produce fe from fe_loose.
43  */
44 typedef struct fe {
45     uint32_t v[10];
46 } fe;
47 
48 /* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
49  * Addition and subtraction produce fe_loose from (fe, fe).
50  */
51 typedef struct fe_loose {
52     uint32_t v[10];
53 } fe_loose;
54 
55 #define assert_fe(f)                                                         \
56     do {                                                                     \
57         for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
58             PORT_Assert(f[_assert_fe_i] <=                                   \
59                         ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u));     \
60         }                                                                    \
61     } while (0)
62 
63 #define assert_fe_loose(f)                                                   \
64     do {                                                                     \
65         for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
66             PORT_Assert(f[_assert_fe_i] <=                                   \
67                         ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u));     \
68         }                                                                    \
69     } while (0)
70 
71 /*
72  * The function fiat_25519_subborrowx_u26 is a subtraction with borrow.
73  * Postconditions:
74  *   out1 = (-arg1 + arg2 + -arg3) mod 2^26
75  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^26⌋
76  *
77  * Input Bounds:
78  *   arg1: [0x0 ~> 0x1]
79  *   arg2: [0x0 ~> 0x3ffffff]
80  *   arg3: [0x0 ~> 0x3ffffff]
81  * Output Bounds:
82  *   out1: [0x0 ~> 0x3ffffff]
83  *   out2: [0x0 ~> 0x1]
84  */
85 static void
fiat_25519_subborrowx_u26(uint32_t * out1,uint8_t * out2,uint8_t arg1,uint32_t arg2,uint32_t arg3)86 fiat_25519_subborrowx_u26(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3)
87 {
88     int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
89     int8_t x2 = (int8_t)(x1 >> 26);
90     uint32_t x3 = (x1 & UINT32_C(0x3ffffff));
91     *out1 = x3;
92     *out2 = (uint8_t)(0x0 - x2);
93 }
94 
95 /*
96  * The function fiat_25519_subborrowx_u25 is a subtraction with borrow.
97  * Postconditions:
98  *   out1 = (-arg1 + arg2 + -arg3) mod 2^25
99  *   out2 = -⌊(-arg1 + arg2 + -arg3) / 2^25⌋
100  *
101  * Input Bounds:
102  *   arg1: [0x0 ~> 0x1]
103  *   arg2: [0x0 ~> 0x1ffffff]
104  *   arg3: [0x0 ~> 0x1ffffff]
105  * Output Bounds:
106  *   out1: [0x0 ~> 0x1ffffff]
107  *   out2: [0x0 ~> 0x1]
108  */
109 static void
fiat_25519_subborrowx_u25(uint32_t * out1,uint8_t * out2,uint8_t arg1,uint32_t arg2,uint32_t arg3)110 fiat_25519_subborrowx_u25(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3)
111 {
112     int32_t x1 = ((int32_t)(arg2 - arg1) - (int32_t)arg3);
113     int8_t x2 = (int8_t)(x1 >> 25);
114     uint32_t x3 = (x1 & UINT32_C(0x1ffffff));
115     *out1 = x3;
116     *out2 = (uint8_t)(0x0 - x2);
117 }
118 
119 /*
120  * The function fiat_25519_addcarryx_u26 is an addition with carry.
121  * Postconditions:
122  *   out1 = (arg1 + arg2 + arg3) mod 2^26
123  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^26⌋
124  *
125  * Input Bounds:
126  *   arg1: [0x0 ~> 0x1]
127  *   arg2: [0x0 ~> 0x3ffffff]
128  *   arg3: [0x0 ~> 0x3ffffff]
129  * Output Bounds:
130  *   out1: [0x0 ~> 0x3ffffff]
131  *   out2: [0x0 ~> 0x1]
132  */
133 static void
fiat_25519_addcarryx_u26(uint32_t * out1,uint8_t * out2,uint8_t arg1,uint32_t arg2,uint32_t arg3)134 fiat_25519_addcarryx_u26(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3)
135 {
136     uint32_t x1 = ((arg1 + arg2) + arg3);
137     uint32_t x2 = (x1 & UINT32_C(0x3ffffff));
138     uint8_t x3 = (uint8_t)(x1 >> 26);
139     *out1 = x2;
140     *out2 = x3;
141 }
142 
143 /*
144  * The function fiat_25519_addcarryx_u25 is an addition with carry.
145  * Postconditions:
146  *   out1 = (arg1 + arg2 + arg3) mod 2^25
147  *   out2 = ⌊(arg1 + arg2 + arg3) / 2^25⌋
148  *
149  * Input Bounds:
150  *   arg1: [0x0 ~> 0x1]
151  *   arg2: [0x0 ~> 0x1ffffff]
152  *   arg3: [0x0 ~> 0x1ffffff]
153  * Output Bounds:
154  *   out1: [0x0 ~> 0x1ffffff]
155  *   out2: [0x0 ~> 0x1]
156  */
157 static void
fiat_25519_addcarryx_u25(uint32_t * out1,uint8_t * out2,uint8_t arg1,uint32_t arg2,uint32_t arg3)158 fiat_25519_addcarryx_u25(uint32_t *out1, uint8_t *out2, uint8_t arg1, uint32_t arg2, uint32_t arg3)
159 {
160     uint32_t x1 = ((arg1 + arg2) + arg3);
161     uint32_t x2 = (x1 & UINT32_C(0x1ffffff));
162     uint8_t x3 = (uint8_t)(x1 >> 25);
163     *out1 = x2;
164     *out2 = x3;
165 }
166 
167 /*
168  * The function fiat_25519_cmovznz_u32 is a single-word conditional move.
169  * Postconditions:
170  *   out1 = (if arg1 = 0 then arg2 else arg3)
171  *
172  * Input Bounds:
173  *   arg1: [0x0 ~> 0x1]
174  *   arg2: [0x0 ~> 0xffffffff]
175  *   arg3: [0x0 ~> 0xffffffff]
176  * Output Bounds:
177  *   out1: [0x0 ~> 0xffffffff]
178  */
179 static void
fiat_25519_cmovznz_u32(uint32_t * out1,uint8_t arg1,uint32_t arg2,uint32_t arg3)180 fiat_25519_cmovznz_u32(uint32_t *out1, uint8_t arg1, uint32_t arg2, uint32_t arg3)
181 {
182     uint8_t x1 = (!(!arg1));
183     uint32_t x2 = ((int8_t)(0x0 - x1) & UINT32_C(0xffffffff));
184     uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2));
185     *out1 = x3;
186 }
187 
188 /*
189  * The function fiat_25519_from_bytes deserializes a field element from bytes in little-endian order.
190  * Postconditions:
191  *   eval out1 mod m = bytes_eval arg1 mod m
192  *
193  * Input Bounds:
194  *   arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
195  * Output Bounds:
196  *   out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
197  */
198 static void
fiat_25519_from_bytes(uint32_t out1[10],const uint8_t arg1[32])199 fiat_25519_from_bytes(uint32_t out1[10], const uint8_t arg1[32])
200 {
201     uint32_t x1 = ((uint32_t)(arg1[31]) << 18);
202     uint32_t x2 = ((uint32_t)(arg1[30]) << 10);
203     uint32_t x3 = ((uint32_t)(arg1[29]) << 2);
204     uint32_t x4 = ((uint32_t)(arg1[28]) << 20);
205     uint32_t x5 = ((uint32_t)(arg1[27]) << 12);
206     uint32_t x6 = ((uint32_t)(arg1[26]) << 4);
207     uint32_t x7 = ((uint32_t)(arg1[25]) << 21);
208     uint32_t x8 = ((uint32_t)(arg1[24]) << 13);
209     uint32_t x9 = ((uint32_t)(arg1[23]) << 5);
210     uint32_t x10 = ((uint32_t)(arg1[22]) << 23);
211     uint32_t x11 = ((uint32_t)(arg1[21]) << 15);
212     uint32_t x12 = ((uint32_t)(arg1[20]) << 7);
213     uint32_t x13 = ((uint32_t)(arg1[19]) << 24);
214     uint32_t x14 = ((uint32_t)(arg1[18]) << 16);
215     uint32_t x15 = ((uint32_t)(arg1[17]) << 8);
216     uint8_t x16 = (arg1[16]);
217     uint32_t x17 = ((uint32_t)(arg1[15]) << 18);
218     uint32_t x18 = ((uint32_t)(arg1[14]) << 10);
219     uint32_t x19 = ((uint32_t)(arg1[13]) << 2);
220     uint32_t x20 = ((uint32_t)(arg1[12]) << 19);
221     uint32_t x21 = ((uint32_t)(arg1[11]) << 11);
222     uint32_t x22 = ((uint32_t)(arg1[10]) << 3);
223     uint32_t x23 = ((uint32_t)(arg1[9]) << 21);
224     uint32_t x24 = ((uint32_t)(arg1[8]) << 13);
225     uint32_t x25 = ((uint32_t)(arg1[7]) << 5);
226     uint32_t x26 = ((uint32_t)(arg1[6]) << 22);
227     uint32_t x27 = ((uint32_t)(arg1[5]) << 14);
228     uint32_t x28 = ((uint32_t)(arg1[4]) << 6);
229     uint32_t x29 = ((uint32_t)(arg1[3]) << 24);
230     uint32_t x30 = ((uint32_t)(arg1[2]) << 16);
231     uint32_t x31 = ((uint32_t)(arg1[1]) << 8);
232     uint8_t x32 = (arg1[0]);
233     uint32_t x33 = (x32 + (x31 + (x30 + x29)));
234     uint8_t x34 = (uint8_t)(x33 >> 26);
235     uint32_t x35 = (x33 & UINT32_C(0x3ffffff));
236     uint32_t x36 = (x3 + (x2 + x1));
237     uint32_t x37 = (x6 + (x5 + x4));
238     uint32_t x38 = (x9 + (x8 + x7));
239     uint32_t x39 = (x12 + (x11 + x10));
240     uint32_t x40 = (x16 + (x15 + (x14 + x13)));
241     uint32_t x41 = (x19 + (x18 + x17));
242     uint32_t x42 = (x22 + (x21 + x20));
243     uint32_t x43 = (x25 + (x24 + x23));
244     uint32_t x44 = (x28 + (x27 + x26));
245     uint32_t x45 = (x34 + x44);
246     uint8_t x46 = (uint8_t)(x45 >> 25);
247     uint32_t x47 = (x45 & UINT32_C(0x1ffffff));
248     uint32_t x48 = (x46 + x43);
249     uint8_t x49 = (uint8_t)(x48 >> 26);
250     uint32_t x50 = (x48 & UINT32_C(0x3ffffff));
251     uint32_t x51 = (x49 + x42);
252     uint8_t x52 = (uint8_t)(x51 >> 25);
253     uint32_t x53 = (x51 & UINT32_C(0x1ffffff));
254     uint32_t x54 = (x52 + x41);
255     uint32_t x55 = (x54 & UINT32_C(0x3ffffff));
256     uint8_t x56 = (uint8_t)(x40 >> 25);
257     uint32_t x57 = (x40 & UINT32_C(0x1ffffff));
258     uint32_t x58 = (x56 + x39);
259     uint8_t x59 = (uint8_t)(x58 >> 26);
260     uint32_t x60 = (x58 & UINT32_C(0x3ffffff));
261     uint32_t x61 = (x59 + x38);
262     uint8_t x62 = (uint8_t)(x61 >> 25);
263     uint32_t x63 = (x61 & UINT32_C(0x1ffffff));
264     uint32_t x64 = (x62 + x37);
265     uint8_t x65 = (uint8_t)(x64 >> 26);
266     uint32_t x66 = (x64 & UINT32_C(0x3ffffff));
267     uint32_t x67 = (x65 + x36);
268     out1[0] = x35;
269     out1[1] = x47;
270     out1[2] = x50;
271     out1[3] = x53;
272     out1[4] = x55;
273     out1[5] = x57;
274     out1[6] = x60;
275     out1[7] = x63;
276     out1[8] = x66;
277     out1[9] = x67;
278 }
279 
280 static void
fe_frombytes_strict(fe * h,const uint8_t s[32])281 fe_frombytes_strict(fe *h, const uint8_t s[32])
282 {
283     // |fiat_25519_from_bytes| requires the top-most bit be clear.
284     PORT_Assert((s[31] & 0x80) == 0);
285     fiat_25519_from_bytes(h->v, s);
286     assert_fe(h->v);
287 }
288 
289 static inline void
fe_frombytes(fe * h,const uint8_t * s)290 fe_frombytes(fe *h, const uint8_t *s)
291 {
292     uint8_t s_copy[32];
293     memcpy(s_copy, s, 32);
294     s_copy[31] &= 0x7f;
295     fe_frombytes_strict(h, s_copy);
296 }
297 
298 /*
299  * The function fiat_25519_to_bytes serializes a field element to bytes in little-endian order.
300  * Postconditions:
301  *   out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..31]
302  *
303  * Input Bounds:
304  *   arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
305  * Output Bounds:
306  *   out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x7f]]
307  */
308 static void
fiat_25519_to_bytes(uint8_t out1[32],const uint32_t arg1[10])309 fiat_25519_to_bytes(uint8_t out1[32], const uint32_t arg1[10])
310 {
311     uint32_t x1;
312     uint8_t x2;
313     fiat_25519_subborrowx_u26(&x1, &x2, 0x0, (arg1[0]), UINT32_C(0x3ffffed));
314     uint32_t x3;
315     uint8_t x4;
316     fiat_25519_subborrowx_u25(&x3, &x4, x2, (arg1[1]), UINT32_C(0x1ffffff));
317     uint32_t x5;
318     uint8_t x6;
319     fiat_25519_subborrowx_u26(&x5, &x6, x4, (arg1[2]), UINT32_C(0x3ffffff));
320     uint32_t x7;
321     uint8_t x8;
322     fiat_25519_subborrowx_u25(&x7, &x8, x6, (arg1[3]), UINT32_C(0x1ffffff));
323     uint32_t x9;
324     uint8_t x10;
325     fiat_25519_subborrowx_u26(&x9, &x10, x8, (arg1[4]), UINT32_C(0x3ffffff));
326     uint32_t x11;
327     uint8_t x12;
328     fiat_25519_subborrowx_u25(&x11, &x12, x10, (arg1[5]), UINT32_C(0x1ffffff));
329     uint32_t x13;
330     uint8_t x14;
331     fiat_25519_subborrowx_u26(&x13, &x14, x12, (arg1[6]), UINT32_C(0x3ffffff));
332     uint32_t x15;
333     uint8_t x16;
334     fiat_25519_subborrowx_u25(&x15, &x16, x14, (arg1[7]), UINT32_C(0x1ffffff));
335     uint32_t x17;
336     uint8_t x18;
337     fiat_25519_subborrowx_u26(&x17, &x18, x16, (arg1[8]), UINT32_C(0x3ffffff));
338     uint32_t x19;
339     uint8_t x20;
340     fiat_25519_subborrowx_u25(&x19, &x20, x18, (arg1[9]), UINT32_C(0x1ffffff));
341     uint32_t x21;
342     fiat_25519_cmovznz_u32(&x21, x20, 0x0, UINT32_C(0xffffffff));
343     uint32_t x22;
344     uint8_t x23;
345     fiat_25519_addcarryx_u26(&x22, &x23, 0x0, x1, (x21 & UINT32_C(0x3ffffed)));
346     uint32_t x24;
347     uint8_t x25;
348     fiat_25519_addcarryx_u25(&x24, &x25, x23, x3, (x21 & UINT32_C(0x1ffffff)));
349     uint32_t x26;
350     uint8_t x27;
351     fiat_25519_addcarryx_u26(&x26, &x27, x25, x5, (x21 & UINT32_C(0x3ffffff)));
352     uint32_t x28;
353     uint8_t x29;
354     fiat_25519_addcarryx_u25(&x28, &x29, x27, x7, (x21 & UINT32_C(0x1ffffff)));
355     uint32_t x30;
356     uint8_t x31;
357     fiat_25519_addcarryx_u26(&x30, &x31, x29, x9, (x21 & UINT32_C(0x3ffffff)));
358     uint32_t x32;
359     uint8_t x33;
360     fiat_25519_addcarryx_u25(&x32, &x33, x31, x11, (x21 & UINT32_C(0x1ffffff)));
361     uint32_t x34;
362     uint8_t x35;
363     fiat_25519_addcarryx_u26(&x34, &x35, x33, x13, (x21 & UINT32_C(0x3ffffff)));
364     uint32_t x36;
365     uint8_t x37;
366     fiat_25519_addcarryx_u25(&x36, &x37, x35, x15, (x21 & UINT32_C(0x1ffffff)));
367     uint32_t x38;
368     uint8_t x39;
369     fiat_25519_addcarryx_u26(&x38, &x39, x37, x17, (x21 & UINT32_C(0x3ffffff)));
370     uint32_t x40;
371     uint8_t x41;
372     fiat_25519_addcarryx_u25(&x40, &x41, x39, x19, (x21 & UINT32_C(0x1ffffff)));
373     uint32_t x42 = (x40 << 6);
374     uint32_t x43 = (x38 << 4);
375     uint32_t x44 = (x36 << 3);
376     uint32_t x45 = (x34 * (uint32_t)0x2);
377     uint32_t x46 = (x30 << 6);
378     uint32_t x47 = (x28 << 5);
379     uint32_t x48 = (x26 << 3);
380     uint32_t x49 = (x24 << 2);
381     uint32_t x50 = (x22 >> 8);
382     uint8_t x51 = (uint8_t)(x22 & UINT8_C(0xff));
383     uint32_t x52 = (x50 >> 8);
384     uint8_t x53 = (uint8_t)(x50 & UINT8_C(0xff));
385     uint8_t x54 = (uint8_t)(x52 >> 8);
386     uint8_t x55 = (uint8_t)(x52 & UINT8_C(0xff));
387     uint32_t x56 = (x54 + x49);
388     uint32_t x57 = (x56 >> 8);
389     uint8_t x58 = (uint8_t)(x56 & UINT8_C(0xff));
390     uint32_t x59 = (x57 >> 8);
391     uint8_t x60 = (uint8_t)(x57 & UINT8_C(0xff));
392     uint8_t x61 = (uint8_t)(x59 >> 8);
393     uint8_t x62 = (uint8_t)(x59 & UINT8_C(0xff));
394     uint32_t x63 = (x61 + x48);
395     uint32_t x64 = (x63 >> 8);
396     uint8_t x65 = (uint8_t)(x63 & UINT8_C(0xff));
397     uint32_t x66 = (x64 >> 8);
398     uint8_t x67 = (uint8_t)(x64 & UINT8_C(0xff));
399     uint8_t x68 = (uint8_t)(x66 >> 8);
400     uint8_t x69 = (uint8_t)(x66 & UINT8_C(0xff));
401     uint32_t x70 = (x68 + x47);
402     uint32_t x71 = (x70 >> 8);
403     uint8_t x72 = (uint8_t)(x70 & UINT8_C(0xff));
404     uint32_t x73 = (x71 >> 8);
405     uint8_t x74 = (uint8_t)(x71 & UINT8_C(0xff));
406     uint8_t x75 = (uint8_t)(x73 >> 8);
407     uint8_t x76 = (uint8_t)(x73 & UINT8_C(0xff));
408     uint32_t x77 = (x75 + x46);
409     uint32_t x78 = (x77 >> 8);
410     uint8_t x79 = (uint8_t)(x77 & UINT8_C(0xff));
411     uint32_t x80 = (x78 >> 8);
412     uint8_t x81 = (uint8_t)(x78 & UINT8_C(0xff));
413     uint8_t x82 = (uint8_t)(x80 >> 8);
414     uint8_t x83 = (uint8_t)(x80 & UINT8_C(0xff));
415     uint8_t x84 = (uint8_t)(x82 & UINT8_C(0xff));
416     uint32_t x85 = (x32 >> 8);
417     uint8_t x86 = (uint8_t)(x32 & UINT8_C(0xff));
418     uint32_t x87 = (x85 >> 8);
419     uint8_t x88 = (uint8_t)(x85 & UINT8_C(0xff));
420     uint8_t x89 = (uint8_t)(x87 >> 8);
421     uint8_t x90 = (uint8_t)(x87 & UINT8_C(0xff));
422     uint32_t x91 = (x89 + x45);
423     uint32_t x92 = (x91 >> 8);
424     uint8_t x93 = (uint8_t)(x91 & UINT8_C(0xff));
425     uint32_t x94 = (x92 >> 8);
426     uint8_t x95 = (uint8_t)(x92 & UINT8_C(0xff));
427     uint8_t x96 = (uint8_t)(x94 >> 8);
428     uint8_t x97 = (uint8_t)(x94 & UINT8_C(0xff));
429     uint32_t x98 = (x96 + x44);
430     uint32_t x99 = (x98 >> 8);
431     uint8_t x100 = (uint8_t)(x98 & UINT8_C(0xff));
432     uint32_t x101 = (x99 >> 8);
433     uint8_t x102 = (uint8_t)(x99 & UINT8_C(0xff));
434     uint8_t x103 = (uint8_t)(x101 >> 8);
435     uint8_t x104 = (uint8_t)(x101 & UINT8_C(0xff));
436     uint32_t x105 = (x103 + x43);
437     uint32_t x106 = (x105 >> 8);
438     uint8_t x107 = (uint8_t)(x105 & UINT8_C(0xff));
439     uint32_t x108 = (x106 >> 8);
440     uint8_t x109 = (uint8_t)(x106 & UINT8_C(0xff));
441     uint8_t x110 = (uint8_t)(x108 >> 8);
442     uint8_t x111 = (uint8_t)(x108 & UINT8_C(0xff));
443     uint32_t x112 = (x110 + x42);
444     uint32_t x113 = (x112 >> 8);
445     uint8_t x114 = (uint8_t)(x112 & UINT8_C(0xff));
446     uint32_t x115 = (x113 >> 8);
447     uint8_t x116 = (uint8_t)(x113 & UINT8_C(0xff));
448     uint8_t x117 = (uint8_t)(x115 >> 8);
449     uint8_t x118 = (uint8_t)(x115 & UINT8_C(0xff));
450     out1[0] = x51;
451     out1[1] = x53;
452     out1[2] = x55;
453     out1[3] = x58;
454     out1[4] = x60;
455     out1[5] = x62;
456     out1[6] = x65;
457     out1[7] = x67;
458     out1[8] = x69;
459     out1[9] = x72;
460     out1[10] = x74;
461     out1[11] = x76;
462     out1[12] = x79;
463     out1[13] = x81;
464     out1[14] = x83;
465     out1[15] = x84;
466     out1[16] = x86;
467     out1[17] = x88;
468     out1[18] = x90;
469     out1[19] = x93;
470     out1[20] = x95;
471     out1[21] = x97;
472     out1[22] = x100;
473     out1[23] = x102;
474     out1[24] = x104;
475     out1[25] = x107;
476     out1[26] = x109;
477     out1[27] = x111;
478     out1[28] = x114;
479     out1[29] = x116;
480     out1[30] = x118;
481     out1[31] = x117;
482 }
483 
484 static inline void
fe_tobytes(uint8_t s[32],const fe * f)485 fe_tobytes(uint8_t s[32], const fe *f)
486 {
487     assert_fe(f->v);
488     fiat_25519_to_bytes(s, f->v);
489 }
490 
491 /* h = f */
492 static inline void
fe_copy(fe * h,const fe * f)493 fe_copy(fe *h, const fe *f)
494 {
495     memmove(h, f, sizeof(fe));
496 }
497 
498 static inline void
fe_copy_lt(fe_loose * h,const fe * f)499 fe_copy_lt(fe_loose *h, const fe *f)
500 {
501     PORT_Assert(sizeof(fe) == sizeof(fe_loose));
502     memmove(h, f, sizeof(fe));
503 }
504 
505 /*
506  * h = 0
507  */
508 static inline void
fe_0(fe * h)509 fe_0(fe *h)
510 {
511     memset(h, 0, sizeof(fe));
512 }
513 
514 /*
515  * h = 1
516  */
517 static inline void
fe_1(fe * h)518 fe_1(fe *h)
519 {
520     memset(h, 0, sizeof(fe));
521     h->v[0] = 1;
522 }
523 /*
524  * The function fiat_25519_add adds two field elements.
525  * Postconditions:
526  *   eval out1 mod m = (eval arg1 + eval arg2) mod m
527  *
528  * Input Bounds:
529  *   arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
530  *   arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
531  * Output Bounds:
532  *   out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
533  */
534 static void
fiat_25519_add(uint32_t out1[10],const uint32_t arg1[10],const uint32_t arg2[10])535 fiat_25519_add(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10])
536 {
537     uint32_t x1 = ((arg1[0]) + (arg2[0]));
538     uint32_t x2 = ((arg1[1]) + (arg2[1]));
539     uint32_t x3 = ((arg1[2]) + (arg2[2]));
540     uint32_t x4 = ((arg1[3]) + (arg2[3]));
541     uint32_t x5 = ((arg1[4]) + (arg2[4]));
542     uint32_t x6 = ((arg1[5]) + (arg2[5]));
543     uint32_t x7 = ((arg1[6]) + (arg2[6]));
544     uint32_t x8 = ((arg1[7]) + (arg2[7]));
545     uint32_t x9 = ((arg1[8]) + (arg2[8]));
546     uint32_t x10 = ((arg1[9]) + (arg2[9]));
547     out1[0] = x1;
548     out1[1] = x2;
549     out1[2] = x3;
550     out1[3] = x4;
551     out1[4] = x5;
552     out1[5] = x6;
553     out1[6] = x7;
554     out1[7] = x8;
555     out1[8] = x9;
556     out1[9] = x10;
557 }
558 
559 /*
560  * Add two field elements.
561  * h = f + g
562  */
563 static inline void
fe_add(fe_loose * h,const fe * f,const fe * g)564 fe_add(fe_loose *h, const fe *f, const fe *g)
565 {
566     assert_fe(f->v);
567     assert_fe(g->v);
568     fiat_25519_add(h->v, f->v, g->v);
569     assert_fe_loose(h->v);
570 }
571 
572 /*
573  * The function fiat_25519_sub subtracts two field elements.
574  * Postconditions:
575  *   eval out1 mod m = (eval arg1 - eval arg2) mod m
576  *
577  * Input Bounds:
578  *   arg1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
579  *   arg2: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
580  * Output Bounds:
581  *   out1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
582  */
583 static void
fiat_25519_sub(uint32_t out1[10],const uint32_t arg1[10],const uint32_t arg2[10])584 fiat_25519_sub(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10])
585 {
586     uint32_t x1 = ((UINT32_C(0x7ffffda) + (arg1[0])) - (arg2[0]));
587     uint32_t x2 = ((UINT32_C(0x3fffffe) + (arg1[1])) - (arg2[1]));
588     uint32_t x3 = ((UINT32_C(0x7fffffe) + (arg1[2])) - (arg2[2]));
589     uint32_t x4 = ((UINT32_C(0x3fffffe) + (arg1[3])) - (arg2[3]));
590     uint32_t x5 = ((UINT32_C(0x7fffffe) + (arg1[4])) - (arg2[4]));
591     uint32_t x6 = ((UINT32_C(0x3fffffe) + (arg1[5])) - (arg2[5]));
592     uint32_t x7 = ((UINT32_C(0x7fffffe) + (arg1[6])) - (arg2[6]));
593     uint32_t x8 = ((UINT32_C(0x3fffffe) + (arg1[7])) - (arg2[7]));
594     uint32_t x9 = ((UINT32_C(0x7fffffe) + (arg1[8])) - (arg2[8]));
595     uint32_t x10 = ((UINT32_C(0x3fffffe) + (arg1[9])) - (arg2[9]));
596     out1[0] = x1;
597     out1[1] = x2;
598     out1[2] = x3;
599     out1[3] = x4;
600     out1[4] = x5;
601     out1[5] = x6;
602     out1[6] = x7;
603     out1[7] = x8;
604     out1[8] = x9;
605     out1[9] = x10;
606 }
607 
608 /*
609  * Subtract two field elements.
610  * h = f - g
611  */
612 static void
fe_sub(fe_loose * h,const fe * f,const fe * g)613 fe_sub(fe_loose *h, const fe *f, const fe *g)
614 {
615     assert_fe(f->v);
616     assert_fe(g->v);
617     fiat_25519_sub(h->v, f->v, g->v);
618     assert_fe_loose(h->v);
619 }
620 
621 /*
622  * The function fiat_25519_carry_mul multiplies two field elements and reduces the result.
623  * Postconditions:
624  *   eval out1 mod m = (eval arg1 * eval arg2) mod m
625  *
626  * Input Bounds:
627  *   arg1: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
628  *   arg2: [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999], [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
629  * Output Bounds:
630  *   out1: [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333], [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
631  */
632 static void
fiat_25519_carry_mul(uint32_t out1[10],const uint32_t arg1[10],const uint32_t arg2[10])633 fiat_25519_carry_mul(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10])
634 {
635     uint64_t x1 = ((uint64_t)(arg1[9]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
636     uint64_t x2 = ((uint64_t)(arg1[9]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
637     uint64_t x3 = ((uint64_t)(arg1[9]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13))));
638     uint64_t x4 = ((uint64_t)(arg1[9]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
639     uint64_t x5 = ((uint64_t)(arg1[9]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13))));
640     uint64_t x6 = ((uint64_t)(arg1[9]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13)));
641     uint64_t x7 = ((uint64_t)(arg1[9]) * ((arg2[3]) * ((uint32_t)0x2 * UINT8_C(0x13))));
642     uint64_t x8 = ((uint64_t)(arg1[9]) * ((arg2[2]) * (uint32_t)UINT8_C(0x13)));
643     uint64_t x9 = ((uint64_t)(arg1[9]) * ((arg2[1]) * ((uint32_t)0x2 * UINT8_C(0x13))));
644     uint64_t x10 = ((uint64_t)(arg1[8]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13)));
645     uint64_t x11 = ((uint64_t)(arg1[8]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
646     uint64_t x12 = ((uint64_t)(arg1[8]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13)));
647     uint64_t x13 = ((uint64_t)(arg1[8]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
648     uint64_t x14 = ((uint64_t)(arg1[8]) * ((arg2[5]) * (uint32_t)UINT8_C(0x13)));
649     uint64_t x15 = ((uint64_t)(arg1[8]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13)));
650     uint64_t x16 = ((uint64_t)(arg1[8]) * ((arg2[3]) * (uint32_t)UINT8_C(0x13)));
651     uint64_t x17 = ((uint64_t)(arg1[8]) * ((arg2[2]) * (uint32_t)UINT8_C(0x13)));
652     uint64_t x18 = ((uint64_t)(arg1[7]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
653     uint64_t x19 = ((uint64_t)(arg1[7]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
654     uint64_t x20 = ((uint64_t)(arg1[7]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13))));
655     uint64_t x21 = ((uint64_t)(arg1[7]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
656     uint64_t x22 = ((uint64_t)(arg1[7]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13))));
657     uint64_t x23 = ((uint64_t)(arg1[7]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13)));
658     uint64_t x24 = ((uint64_t)(arg1[7]) * ((arg2[3]) * ((uint32_t)0x2 * UINT8_C(0x13))));
659     uint64_t x25 = ((uint64_t)(arg1[6]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13)));
660     uint64_t x26 = ((uint64_t)(arg1[6]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
661     uint64_t x27 = ((uint64_t)(arg1[6]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13)));
662     uint64_t x28 = ((uint64_t)(arg1[6]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
663     uint64_t x29 = ((uint64_t)(arg1[6]) * ((arg2[5]) * (uint32_t)UINT8_C(0x13)));
664     uint64_t x30 = ((uint64_t)(arg1[6]) * ((arg2[4]) * (uint32_t)UINT8_C(0x13)));
665     uint64_t x31 = ((uint64_t)(arg1[5]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
666     uint64_t x32 = ((uint64_t)(arg1[5]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
667     uint64_t x33 = ((uint64_t)(arg1[5]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13))));
668     uint64_t x34 = ((uint64_t)(arg1[5]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
669     uint64_t x35 = ((uint64_t)(arg1[5]) * ((arg2[5]) * ((uint32_t)0x2 * UINT8_C(0x13))));
670     uint64_t x36 = ((uint64_t)(arg1[4]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13)));
671     uint64_t x37 = ((uint64_t)(arg1[4]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
672     uint64_t x38 = ((uint64_t)(arg1[4]) * ((arg2[7]) * (uint32_t)UINT8_C(0x13)));
673     uint64_t x39 = ((uint64_t)(arg1[4]) * ((arg2[6]) * (uint32_t)UINT8_C(0x13)));
674     uint64_t x40 = ((uint64_t)(arg1[3]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
675     uint64_t x41 = ((uint64_t)(arg1[3]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
676     uint64_t x42 = ((uint64_t)(arg1[3]) * ((arg2[7]) * ((uint32_t)0x2 * UINT8_C(0x13))));
677     uint64_t x43 = ((uint64_t)(arg1[2]) * ((arg2[9]) * (uint32_t)UINT8_C(0x13)));
678     uint64_t x44 = ((uint64_t)(arg1[2]) * ((arg2[8]) * (uint32_t)UINT8_C(0x13)));
679     uint64_t x45 = ((uint64_t)(arg1[1]) * ((arg2[9]) * ((uint32_t)0x2 * UINT8_C(0x13))));
680     uint64_t x46 = ((uint64_t)(arg1[9]) * (arg2[0]));
681     uint64_t x47 = ((uint64_t)(arg1[8]) * (arg2[1]));
682     uint64_t x48 = ((uint64_t)(arg1[8]) * (arg2[0]));
683     uint64_t x49 = ((uint64_t)(arg1[7]) * (arg2[2]));
684     uint64_t x50 = ((uint64_t)(arg1[7]) * ((arg2[1]) * (uint32_t)0x2));
685     uint64_t x51 = ((uint64_t)(arg1[7]) * (arg2[0]));
686     uint64_t x52 = ((uint64_t)(arg1[6]) * (arg2[3]));
687     uint64_t x53 = ((uint64_t)(arg1[6]) * (arg2[2]));
688     uint64_t x54 = ((uint64_t)(arg1[6]) * (arg2[1]));
689     uint64_t x55 = ((uint64_t)(arg1[6]) * (arg2[0]));
690     uint64_t x56 = ((uint64_t)(arg1[5]) * (arg2[4]));
691     uint64_t x57 = ((uint64_t)(arg1[5]) * ((arg2[3]) * (uint32_t)0x2));
692     uint64_t x58 = ((uint64_t)(arg1[5]) * (arg2[2]));
693     uint64_t x59 = ((uint64_t)(arg1[5]) * ((arg2[1]) * (uint32_t)0x2));
694     uint64_t x60 = ((uint64_t)(arg1[5]) * (arg2[0]));
695     uint64_t x61 = ((uint64_t)(arg1[4]) * (arg2[5]));
696     uint64_t x62 = ((uint64_t)(arg1[4]) * (arg2[4]));
697     uint64_t x63 = ((uint64_t)(arg1[4]) * (arg2[3]));
698     uint64_t x64 = ((uint64_t)(arg1[4]) * (arg2[2]));
699     uint64_t x65 = ((uint64_t)(arg1[4]) * (arg2[1]));
700     uint64_t x66 = ((uint64_t)(arg1[4]) * (arg2[0]));
701     uint64_t x67 = ((uint64_t)(arg1[3]) * (arg2[6]));
702     uint64_t x68 = ((uint64_t)(arg1[3]) * ((arg2[5]) * (uint32_t)0x2));
703     uint64_t x69 = ((uint64_t)(arg1[3]) * (arg2[4]));
704     uint64_t x70 = ((uint64_t)(arg1[3]) * ((arg2[3]) * (uint32_t)0x2));
705     uint64_t x71 = ((uint64_t)(arg1[3]) * (arg2[2]));
706     uint64_t x72 = ((uint64_t)(arg1[3]) * ((arg2[1]) * (uint32_t)0x2));
707     uint64_t x73 = ((uint64_t)(arg1[3]) * (arg2[0]));
708     uint64_t x74 = ((uint64_t)(arg1[2]) * (arg2[7]));
709     uint64_t x75 = ((uint64_t)(arg1[2]) * (arg2[6]));
710     uint64_t x76 = ((uint64_t)(arg1[2]) * (arg2[5]));
711     uint64_t x77 = ((uint64_t)(arg1[2]) * (arg2[4]));
712     uint64_t x78 = ((uint64_t)(arg1[2]) * (arg2[3]));
713     uint64_t x79 = ((uint64_t)(arg1[2]) * (arg2[2]));
714     uint64_t x80 = ((uint64_t)(arg1[2]) * (arg2[1]));
715     uint64_t x81 = ((uint64_t)(arg1[2]) * (arg2[0]));
716     uint64_t x82 = ((uint64_t)(arg1[1]) * (arg2[8]));
717     uint64_t x83 = ((uint64_t)(arg1[1]) * ((arg2[7]) * (uint32_t)0x2));
718     uint64_t x84 = ((uint64_t)(arg1[1]) * (arg2[6]));
719     uint64_t x85 = ((uint64_t)(arg1[1]) * ((arg2[5]) * (uint32_t)0x2));
720     uint64_t x86 = ((uint64_t)(arg1[1]) * (arg2[4]));
721     uint64_t x87 = ((uint64_t)(arg1[1]) * ((arg2[3]) * (uint32_t)0x2));
722     uint64_t x88 = ((uint64_t)(arg1[1]) * (arg2[2]));
723     uint64_t x89 = ((uint64_t)(arg1[1]) * ((arg2[1]) * (uint32_t)0x2));
724     uint64_t x90 = ((uint64_t)(arg1[1]) * (arg2[0]));
725     uint64_t x91 = ((uint64_t)(arg1[0]) * (arg2[9]));
726     uint64_t x92 = ((uint64_t)(arg1[0]) * (arg2[8]));
727     uint64_t x93 = ((uint64_t)(arg1[0]) * (arg2[7]));
728     uint64_t x94 = ((uint64_t)(arg1[0]) * (arg2[6]));
729     uint64_t x95 = ((uint64_t)(arg1[0]) * (arg2[5]));
730     uint64_t x96 = ((uint64_t)(arg1[0]) * (arg2[4]));
731     uint64_t x97 = ((uint64_t)(arg1[0]) * (arg2[3]));
732     uint64_t x98 = ((uint64_t)(arg1[0]) * (arg2[2]));
733     uint64_t x99 = ((uint64_t)(arg1[0]) * (arg2[1]));
734     uint64_t x100 = ((uint64_t)(arg1[0]) * (arg2[0]));
735     uint64_t x101 = (x100 + (x45 + (x44 + (x42 + (x39 + (x35 + (x30 + (x24 + (x17 + x9)))))))));
736     uint64_t x102 = (x101 >> 26);
737     uint32_t x103 = (uint32_t)(x101 & UINT32_C(0x3ffffff));
738     uint64_t x104 = (x91 + (x82 + (x74 + (x67 + (x61 + (x56 + (x52 + (x49 + (x47 + x46)))))))));
739     uint64_t x105 = (x92 + (x83 + (x75 + (x68 + (x62 + (x57 + (x53 + (x50 + (x48 + x1)))))))));
740     uint64_t x106 = (x93 + (x84 + (x76 + (x69 + (x63 + (x58 + (x54 + (x51 + (x10 + x2)))))))));
741     uint64_t x107 = (x94 + (x85 + (x77 + (x70 + (x64 + (x59 + (x55 + (x18 + (x11 + x3)))))))));
742     uint64_t x108 = (x95 + (x86 + (x78 + (x71 + (x65 + (x60 + (x25 + (x19 + (x12 + x4)))))))));
743     uint64_t x109 = (x96 + (x87 + (x79 + (x72 + (x66 + (x31 + (x26 + (x20 + (x13 + x5)))))))));
744     uint64_t x110 = (x97 + (x88 + (x80 + (x73 + (x36 + (x32 + (x27 + (x21 + (x14 + x6)))))))));
745     uint64_t x111 = (x98 + (x89 + (x81 + (x40 + (x37 + (x33 + (x28 + (x22 + (x15 + x7)))))))));
746     uint64_t x112 = (x99 + (x90 + (x43 + (x41 + (x38 + (x34 + (x29 + (x23 + (x16 + x8)))))))));
747     uint64_t x113 = (x102 + x112);
748     uint64_t x114 = (x113 >> 25);
749     uint32_t x115 = (uint32_t)(x113 & UINT32_C(0x1ffffff));
750     uint64_t x116 = (x114 + x111);
751     uint64_t x117 = (x116 >> 26);
752     uint32_t x118 = (uint32_t)(x116 & UINT32_C(0x3ffffff));
753     uint64_t x119 = (x117 + x110);
754     uint64_t x120 = (x119 >> 25);
755     uint32_t x121 = (uint32_t)(x119 & UINT32_C(0x1ffffff));
756     uint64_t x122 = (x120 + x109);
757     uint64_t x123 = (x122 >> 26);
758     uint32_t x124 = (uint32_t)(x122 & UINT32_C(0x3ffffff));
759     uint64_t x125 = (x123 + x108);
760     uint64_t x126 = (x125 >> 25);
761     uint32_t x127 = (uint32_t)(x125 & UINT32_C(0x1ffffff));
762     uint64_t x128 = (x126 + x107);
763     uint64_t x129 = (x128 >> 26);
764     uint32_t x130 = (uint32_t)(x128 & UINT32_C(0x3ffffff));
765     uint64_t x131 = (x129 + x106);
766     uint64_t x132 = (x131 >> 25);
767     uint32_t x133 = (uint32_t)(x131 & UINT32_C(0x1ffffff));
768     uint64_t x134 = (x132 + x105);
769     uint64_t x135 = (x134 >> 26);
770     uint32_t x136 = (uint32_t)(x134 & UINT32_C(0x3ffffff));
771     uint64_t x137 = (x135 + x104);
772     uint64_t x138 = (x137 >> 25);
773     uint32_t x139 = (uint32_t)(x137 & UINT32_C(0x1ffffff));
774     uint64_t x140 = (x138 * (uint64_t)UINT8_C(0x13));
775     uint64_t x141 = (x103 + x140);
776     uint32_t x142 = (uint32_t)(x141 >> 26);
777     uint32_t x143 = (uint32_t)(x141 & UINT32_C(0x3ffffff));
778     uint32_t x144 = (x142 + x115);
779     uint32_t x145 = (x144 >> 25);
780     uint32_t x146 = (x144 & UINT32_C(0x1ffffff));
781     uint32_t x147 = (x145 + x118);
782     out1[0] = x143;
783     out1[1] = x146;
784     out1[2] = x147;
785     out1[3] = x121;
786     out1[4] = x124;
787     out1[5] = x127;
788     out1[6] = x130;
789     out1[7] = x133;
790     out1[8] = x136;
791     out1[9] = x139;
792 }
793 
794 static void
fe_mul(uint32_t out1[10],const uint32_t arg1[10],const uint32_t arg2[10])795 fe_mul(uint32_t out1[10], const uint32_t arg1[10], const uint32_t arg2[10])
796 {
797     assert_fe_loose(arg1);
798     assert_fe_loose(arg2);
799     fiat_25519_carry_mul(out1, arg1, arg2);
800     assert_fe(out1);
801 }
802 
803 static void
fe_mul_ttt(fe * h,const fe * f,const fe * g)804 fe_mul_ttt(fe *h, const fe *f, const fe *g)
805 {
806     fe_mul(h->v, f->v, g->v);
807 }
808 
809 static void
fe_mul_tlt(fe * h,const fe_loose * f,const fe * g)810 fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
811 {
812     fe_mul(h->v, f->v, g->v);
813 }
814 
815 static void
fe_mul_tll(fe * h,const fe_loose * f,const fe_loose * g)816 fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
817 {
818     fe_mul(h->v, f->v, g->v);
819 }
820 
821 static void
fe_sq(uint32_t out[10],const uint32_t in1[10])822 fe_sq(uint32_t out[10], const uint32_t in1[10])
823 {
824     const uint32_t x17 = in1[9];
825     const uint32_t x18 = in1[8];
826     const uint32_t x16 = in1[7];
827     const uint32_t x14 = in1[6];
828     const uint32_t x12 = in1[5];
829     const uint32_t x10 = in1[4];
830     const uint32_t x8 = in1[3];
831     const uint32_t x6 = in1[2];
832     const uint32_t x4 = in1[1];
833     const uint32_t x2 = in1[0];
834     uint64_t x19 = ((uint64_t)x2 * x2);
835     uint64_t x20 = ((uint64_t)(0x2 * x2) * x4);
836     uint64_t x21 = (0x2 * (((uint64_t)x4 * x4) + ((uint64_t)x2 * x6)));
837     uint64_t x22 = (0x2 * (((uint64_t)x4 * x6) + ((uint64_t)x2 * x8)));
838     uint64_t x23 = ((((uint64_t)x6 * x6) + ((uint64_t)(0x4 * x4) * x8)) + ((uint64_t)(0x2 * x2) * x10));
839     uint64_t x24 = (0x2 * ((((uint64_t)x6 * x8) + ((uint64_t)x4 * x10)) + ((uint64_t)x2 * x12)));
840     uint64_t x25 = (0x2 * (((((uint64_t)x8 * x8) + ((uint64_t)x6 * x10)) + ((uint64_t)x2 * x14)) + ((uint64_t)(0x2 * x4) * x12)));
841     uint64_t x26 = (0x2 * (((((uint64_t)x8 * x10) + ((uint64_t)x6 * x12)) + ((uint64_t)x4 * x14)) + ((uint64_t)x2 * x16)));
842     uint64_t x27 = (((uint64_t)x10 * x10) + (0x2 * ((((uint64_t)x6 * x14) + ((uint64_t)x2 * x18)) + (0x2 * (((uint64_t)x4 * x16) + ((uint64_t)x8 * x12))))));
843     uint64_t x28 = (0x2 * ((((((uint64_t)x10 * x12) + ((uint64_t)x8 * x14)) + ((uint64_t)x6 * x16)) + ((uint64_t)x4 * x18)) + ((uint64_t)x2 * x17)));
844     uint64_t x29 = (0x2 * (((((uint64_t)x12 * x12) + ((uint64_t)x10 * x14)) + ((uint64_t)x6 * x18)) + (0x2 * (((uint64_t)x8 * x16) + ((uint64_t)x4 * x17)))));
845     uint64_t x30 = (0x2 * (((((uint64_t)x12 * x14) + ((uint64_t)x10 * x16)) + ((uint64_t)x8 * x18)) + ((uint64_t)x6 * x17)));
846     uint64_t x31 = (((uint64_t)x14 * x14) + (0x2 * (((uint64_t)x10 * x18) + (0x2 * (((uint64_t)x12 * x16) + ((uint64_t)x8 * x17))))));
847     uint64_t x32 = (0x2 * ((((uint64_t)x14 * x16) + ((uint64_t)x12 * x18)) + ((uint64_t)x10 * x17)));
848     uint64_t x33 = (0x2 * ((((uint64_t)x16 * x16) + ((uint64_t)x14 * x18)) + ((uint64_t)(0x2 * x12) * x17)));
849     uint64_t x34 = (0x2 * (((uint64_t)x16 * x18) + ((uint64_t)x14 * x17)));
850     uint64_t x35 = (((uint64_t)x18 * x18) + ((uint64_t)(0x4 * x16) * x17));
851     uint64_t x36 = ((uint64_t)(0x2 * x18) * x17);
852     uint64_t x37 = ((uint64_t)(0x2 * x17) * x17);
853     uint64_t x38 = (x27 + (x37 << 0x4));
854     uint64_t x39 = (x38 + (x37 << 0x1));
855     uint64_t x40 = (x39 + x37);
856     uint64_t x41 = (x26 + (x36 << 0x4));
857     uint64_t x42 = (x41 + (x36 << 0x1));
858     uint64_t x43 = (x42 + x36);
859     uint64_t x44 = (x25 + (x35 << 0x4));
860     uint64_t x45 = (x44 + (x35 << 0x1));
861     uint64_t x46 = (x45 + x35);
862     uint64_t x47 = (x24 + (x34 << 0x4));
863     uint64_t x48 = (x47 + (x34 << 0x1));
864     uint64_t x49 = (x48 + x34);
865     uint64_t x50 = (x23 + (x33 << 0x4));
866     uint64_t x51 = (x50 + (x33 << 0x1));
867     uint64_t x52 = (x51 + x33);
868     uint64_t x53 = (x22 + (x32 << 0x4));
869     uint64_t x54 = (x53 + (x32 << 0x1));
870     uint64_t x55 = (x54 + x32);
871     uint64_t x56 = (x21 + (x31 << 0x4));
872     uint64_t x57 = (x56 + (x31 << 0x1));
873     uint64_t x58 = (x57 + x31);
874     uint64_t x59 = (x20 + (x30 << 0x4));
875     uint64_t x60 = (x59 + (x30 << 0x1));
876     uint64_t x61 = (x60 + x30);
877     uint64_t x62 = (x19 + (x29 << 0x4));
878     uint64_t x63 = (x62 + (x29 << 0x1));
879     uint64_t x64 = (x63 + x29);
880     uint64_t x65 = (x64 >> 0x1a);
881     uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
882     uint64_t x67 = (x65 + x61);
883     uint64_t x68 = (x67 >> 0x19);
884     uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
885     uint64_t x70 = (x68 + x58);
886     uint64_t x71 = (x70 >> 0x1a);
887     uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
888     uint64_t x73 = (x71 + x55);
889     uint64_t x74 = (x73 >> 0x19);
890     uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
891     uint64_t x76 = (x74 + x52);
892     uint64_t x77 = (x76 >> 0x1a);
893     uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
894     uint64_t x79 = (x77 + x49);
895     uint64_t x80 = (x79 >> 0x19);
896     uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
897     uint64_t x82 = (x80 + x46);
898     uint64_t x83 = (x82 >> 0x1a);
899     uint32_t x84 = ((uint32_t)x82 & 0x3ffffff);
900     uint64_t x85 = (x83 + x43);
901     uint64_t x86 = (x85 >> 0x19);
902     uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
903     uint64_t x88 = (x86 + x40);
904     uint64_t x89 = (x88 >> 0x1a);
905     uint32_t x90 = ((uint32_t)x88 & 0x3ffffff);
906     uint64_t x91 = (x89 + x28);
907     uint64_t x92 = (x91 >> 0x19);
908     uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
909     uint64_t x94 = (x66 + (0x13 * x92));
910     uint32_t x95 = (uint32_t)(x94 >> 0x1a);
911     uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
912     uint32_t x97 = (x95 + x69);
913     uint32_t x98 = (x97 >> 0x19);
914     uint32_t x99 = (x97 & 0x1ffffff);
915     out[0] = x96;
916     out[1] = x99;
917     out[2] = (x98 + x72);
918     out[3] = x75;
919     out[4] = x78;
920     out[5] = x81;
921     out[6] = x84;
922     out[7] = x87;
923     out[8] = x90;
924     out[9] = x93;
925 }
926 
927 static void
fe_sq_tl(fe * h,const fe_loose * f)928 fe_sq_tl(fe *h, const fe_loose *f)
929 {
930     fe_sq(h->v, f->v);
931 }
932 
933 static void
fe_sq_tt(fe * h,const fe * f)934 fe_sq_tt(fe *h, const fe *f)
935 {
936     fe_sq(h->v, f->v);
937 }
938 
939 static inline void
fe_loose_invert(fe * out,const fe_loose * z)940 fe_loose_invert(fe *out, const fe_loose *z)
941 {
942     fe t0, t1, t2, t3;
943     int i;
944 
945     fe_sq_tl(&t0, z);
946     fe_sq_tt(&t1, &t0);
947     for (i = 1; i < 2; ++i) {
948         fe_sq_tt(&t1, &t1);
949     }
950     fe_mul_tlt(&t1, z, &t1);
951     fe_mul_ttt(&t0, &t0, &t1);
952     fe_sq_tt(&t2, &t0);
953     fe_mul_ttt(&t1, &t1, &t2);
954     fe_sq_tt(&t2, &t1);
955     for (i = 1; i < 5; ++i) {
956         fe_sq_tt(&t2, &t2);
957     }
958     fe_mul_ttt(&t1, &t2, &t1);
959     fe_sq_tt(&t2, &t1);
960     for (i = 1; i < 10; ++i) {
961         fe_sq_tt(&t2, &t2);
962     }
963     fe_mul_ttt(&t2, &t2, &t1);
964     fe_sq_tt(&t3, &t2);
965     for (i = 1; i < 20; ++i) {
966         fe_sq_tt(&t3, &t3);
967     }
968     fe_mul_ttt(&t2, &t3, &t2);
969     fe_sq_tt(&t2, &t2);
970     for (i = 1; i < 10; ++i) {
971         fe_sq_tt(&t2, &t2);
972     }
973     fe_mul_ttt(&t1, &t2, &t1);
974     fe_sq_tt(&t2, &t1);
975     for (i = 1; i < 50; ++i) {
976         fe_sq_tt(&t2, &t2);
977     }
978     fe_mul_ttt(&t2, &t2, &t1);
979     fe_sq_tt(&t3, &t2);
980     for (i = 1; i < 100; ++i) {
981         fe_sq_tt(&t3, &t3);
982     }
983     fe_mul_ttt(&t2, &t3, &t2);
984     fe_sq_tt(&t2, &t2);
985     for (i = 1; i < 50; ++i) {
986         fe_sq_tt(&t2, &t2);
987     }
988     fe_mul_ttt(&t1, &t2, &t1);
989     fe_sq_tt(&t1, &t1);
990     for (i = 1; i < 5; ++i) {
991         fe_sq_tt(&t1, &t1);
992     }
993     fe_mul_ttt(out, &t1, &t0);
994 }
995 
996 static inline void
fe_invert(fe * out,const fe * z)997 fe_invert(fe *out, const fe *z)
998 {
999     fe_loose l;
1000     fe_copy_lt(&l, z);
1001     fe_loose_invert(out, &l);
1002 }
1003 
1004 /* Replace (f,g) with (g,f) if b == 1;
1005  * replace (f,g) with (f,g) if b == 0.
1006  *
1007  * Preconditions: b in {0,1}
1008  */
1009 static inline void
fe_cswap(fe * f,fe * g,unsigned int b)1010 fe_cswap(fe *f, fe *g, unsigned int b)
1011 {
1012     PORT_Assert(b < 2);
1013     unsigned int i;
1014     b = 0 - b;
1015     for (i = 0; i < 10; i++) {
1016         uint32_t x = f->v[i] ^ g->v[i];
1017         x &= b;
1018         f->v[i] ^= x;
1019         g->v[i] ^= x;
1020     }
1021 }
1022 
1023 /* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
1024 static inline void
fe_mul_121666(uint32_t out[10],const uint32_t in1[10])1025 fe_mul_121666(uint32_t out[10], const uint32_t in1[10])
1026 {
1027     const uint32_t x20 = in1[9];
1028     const uint32_t x21 = in1[8];
1029     const uint32_t x19 = in1[7];
1030     const uint32_t x17 = in1[6];
1031     const uint32_t x15 = in1[5];
1032     const uint32_t x13 = in1[4];
1033     const uint32_t x11 = in1[3];
1034     const uint32_t x9 = in1[2];
1035     const uint32_t x7 = in1[1];
1036     const uint32_t x5 = in1[0];
1037     const uint32_t x38 = 0;
1038     const uint32_t x39 = 0;
1039     const uint32_t x37 = 0;
1040     const uint32_t x35 = 0;
1041     const uint32_t x33 = 0;
1042     const uint32_t x31 = 0;
1043     const uint32_t x29 = 0;
1044     const uint32_t x27 = 0;
1045     const uint32_t x25 = 0;
1046     const uint32_t x23 = 121666;
1047     uint64_t x40 = ((uint64_t)x23 * x5);
1048     uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5));
1049     uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5));
1050     uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5));
1051     uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5));
1052     uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5));
1053     uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5));
1054     uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5));
1055     uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5));
1056     uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5));
1057     uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9));
1058     uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9));
1059     uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13));
1060     uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13));
1061     uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17));
1062     uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17));
1063     uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19))));
1064     uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21));
1065     uint64_t x58 = ((uint64_t)(0x2 * x38) * x20);
1066     uint64_t x59 = (x48 + (x58 << 0x4));
1067     uint64_t x60 = (x59 + (x58 << 0x1));
1068     uint64_t x61 = (x60 + x58);
1069     uint64_t x62 = (x47 + (x57 << 0x4));
1070     uint64_t x63 = (x62 + (x57 << 0x1));
1071     uint64_t x64 = (x63 + x57);
1072     uint64_t x65 = (x46 + (x56 << 0x4));
1073     uint64_t x66 = (x65 + (x56 << 0x1));
1074     uint64_t x67 = (x66 + x56);
1075     uint64_t x68 = (x45 + (x55 << 0x4));
1076     uint64_t x69 = (x68 + (x55 << 0x1));
1077     uint64_t x70 = (x69 + x55);
1078     uint64_t x71 = (x44 + (x54 << 0x4));
1079     uint64_t x72 = (x71 + (x54 << 0x1));
1080     uint64_t x73 = (x72 + x54);
1081     uint64_t x74 = (x43 + (x53 << 0x4));
1082     uint64_t x75 = (x74 + (x53 << 0x1));
1083     uint64_t x76 = (x75 + x53);
1084     uint64_t x77 = (x42 + (x52 << 0x4));
1085     uint64_t x78 = (x77 + (x52 << 0x1));
1086     uint64_t x79 = (x78 + x52);
1087     uint64_t x80 = (x41 + (x51 << 0x4));
1088     uint64_t x81 = (x80 + (x51 << 0x1));
1089     uint64_t x82 = (x81 + x51);
1090     uint64_t x83 = (x40 + (x50 << 0x4));
1091     uint64_t x84 = (x83 + (x50 << 0x1));
1092     uint64_t x85 = (x84 + x50);
1093     uint64_t x86 = (x85 >> 0x1a);
1094     uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
1095     uint64_t x88 = (x86 + x82);
1096     uint64_t x89 = (x88 >> 0x19);
1097     uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
1098     uint64_t x91 = (x89 + x79);
1099     uint64_t x92 = (x91 >> 0x1a);
1100     uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
1101     uint64_t x94 = (x92 + x76);
1102     uint64_t x95 = (x94 >> 0x19);
1103     uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
1104     uint64_t x97 = (x95 + x73);
1105     uint64_t x98 = (x97 >> 0x1a);
1106     uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
1107     uint64_t x100 = (x98 + x70);
1108     uint64_t x101 = (x100 >> 0x19);
1109     uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
1110     uint64_t x103 = (x101 + x67);
1111     uint64_t x104 = (x103 >> 0x1a);
1112     uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
1113     uint64_t x106 = (x104 + x64);
1114     uint64_t x107 = (x106 >> 0x19);
1115     uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
1116     uint64_t x109 = (x107 + x61);
1117     uint64_t x110 = (x109 >> 0x1a);
1118     uint32_t x111 = ((uint32_t)x109 & 0x3ffffff);
1119     uint64_t x112 = (x110 + x49);
1120     uint64_t x113 = (x112 >> 0x19);
1121     uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
1122     uint64_t x115 = (x87 + (0x13 * x113));
1123     uint32_t x116 = (uint32_t)(x115 >> 0x1a);
1124     uint32_t x117 = ((uint32_t)x115 & 0x3ffffff);
1125     uint32_t x118 = (x116 + x90);
1126     uint32_t x119 = (x118 >> 0x19);
1127     uint32_t x120 = (x118 & 0x1ffffff);
1128     out[0] = x117;
1129     out[1] = x120;
1130     out[2] = (x119 + x93);
1131     out[3] = x96;
1132     out[4] = x99;
1133     out[5] = x102;
1134     out[6] = x105;
1135     out[7] = x108;
1136     out[8] = x111;
1137     out[9] = x114;
1138 }
1139 
1140 static void
fe_mul_121666_tl(fe * h,const fe_loose * f)1141 fe_mul_121666_tl(fe *h, const fe_loose *f)
1142 {
1143     assert_fe_loose(f->v);
1144     fe_mul_121666(h->v, f->v);
1145     assert_fe(h->v);
1146 }
1147 
1148 SECStatus
ec_Curve25519_mul(PRUint8 * out,const PRUint8 * scalar,const PRUint8 * point)1149 ec_Curve25519_mul(PRUint8 *out, const PRUint8 *scalar, const PRUint8 *point)
1150 {
1151     fe x1, x2, z2, x3, z3, tmp0, tmp1;
1152     fe_loose x2l, z2l, x3l, tmp0l, tmp1l;
1153     unsigned int swap = 0;
1154     unsigned int b;
1155     int pos;
1156     uint8_t e[32];
1157 
1158     memcpy(e, scalar, 32);
1159     e[0] &= 0xF8;
1160     e[31] &= 0x7F;
1161     e[31] |= 0x40;
1162 
1163     fe_frombytes(&x1, point);
1164     fe_1(&x2);
1165     fe_0(&z2);
1166     fe_copy(&x3, &x1);
1167     fe_1(&z3);
1168 
1169     for (pos = 254; pos >= 0; --pos) {
1170         b = e[pos / 8] >> (pos & 7);
1171         b &= 1;
1172         swap ^= b;
1173         fe_cswap(&x2, &x3, swap);
1174         fe_cswap(&z2, &z3, swap);
1175         swap = b;
1176         fe_sub(&tmp0l, &x3, &z3);
1177         fe_sub(&tmp1l, &x2, &z2);
1178         fe_add(&x2l, &x2, &z2);
1179         fe_add(&z2l, &x3, &z3);
1180         fe_mul_tll(&z3, &tmp0l, &x2l);
1181         fe_mul_tll(&z2, &z2l, &tmp1l);
1182         fe_sq_tl(&tmp0, &tmp1l);
1183         fe_sq_tl(&tmp1, &x2l);
1184         fe_add(&x3l, &z3, &z2);
1185         fe_sub(&z2l, &z3, &z2);
1186         fe_mul_ttt(&x2, &tmp1, &tmp0);
1187         fe_sub(&tmp1l, &tmp1, &tmp0);
1188         fe_sq_tl(&z2, &z2l);
1189         fe_mul_121666_tl(&z3, &tmp1l);
1190         fe_sq_tl(&x3, &x3l);
1191         fe_add(&tmp0l, &tmp0, &z3);
1192         fe_mul_ttt(&z3, &x1, &z2);
1193         fe_mul_tll(&z2, &tmp1l, &tmp0l);
1194     }
1195 
1196     fe_cswap(&x2, &x3, swap);
1197     fe_cswap(&z2, &z3, swap);
1198 
1199     fe_invert(&z2, &z2);
1200     fe_mul_ttt(&x2, &x2, &z2);
1201     fe_tobytes(out, &x2);
1202 
1203     memset(x1.v, 0, sizeof(x1));
1204     memset(x2.v, 0, sizeof(x2));
1205     memset(z2.v, 0, sizeof(z2));
1206     memset(x3.v, 0, sizeof(x3));
1207     memset(z3.v, 0, sizeof(z3));
1208     memset(x2l.v, 0, sizeof(x2l));
1209     memset(z2l.v, 0, sizeof(z2l));
1210     memset(x3l.v, 0, sizeof(x3l));
1211     memset(e, 0, sizeof(e));
1212     return 0;
1213 }
1214