1 /*
2  * Copyright 2013 The Android Open Source Project
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *     * Redistributions of source code must retain the above copyright
7  *       notice, this list of conditions and the following disclaimer.
8  *     * Redistributions in binary form must reproduce the above copyright
9  *       notice, this list of conditions and the following disclaimer in the
10  *       documentation and/or other materials provided with the distribution.
11  *     * Neither the name of Google Inc. nor the names of its contributors may
12  *       be used to endorse or promote products derived from this software
13  *       without specific prior written permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18  * EVENT SHALL Google Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 // This is an implementation of the P256 elliptic curve group. It's written to
28 // be portable and still constant-time.
29 //
30 // WARNING: Implementing these functions in a constant-time manner is far from
31 //          obvious. Be careful when touching this code.
32 //
33 // See http://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background.
34 
35 #include <assert.h>
36 #include <stdint.h>
37 #include <string.h>
38 #include <stdio.h>
39 
40 #include "p256/p256.h"
41 
42 const cryptonite_p256_int cryptonite_SECP256r1_n =  // curve order
43   {{P256_LITERAL(0xfc632551, 0xf3b9cac2), P256_LITERAL(0xa7179e84, 0xbce6faad),
44     P256_LITERAL(-1, -1), P256_LITERAL(0, -1)}};
45 
46 const cryptonite_p256_int cryptonite_SECP256r1_p =  // curve field size
47   {{P256_LITERAL(-1, -1), P256_LITERAL(-1, 0),
48     P256_LITERAL(0, 0), P256_LITERAL(1, -1) }};
49 
50 const cryptonite_p256_int cryptonite_SECP256r1_b =  // curve b
51   {{P256_LITERAL(0x27d2604b, 0x3bce3c3e), P256_LITERAL(0xcc53b0f6, 0x651d06b0),
52     P256_LITERAL(0x769886bc, 0xb3ebbd55), P256_LITERAL(0xaa3a93e7, 0x5ac635d8)}};
53 
cryptonite_p256_init(cryptonite_p256_int * a)54 void cryptonite_p256_init(cryptonite_p256_int* a) {
55   memset(a, 0, sizeof(*a));
56 }
57 
cryptonite_p256_clear(cryptonite_p256_int * a)58 void cryptonite_p256_clear(cryptonite_p256_int* a) { cryptonite_p256_init(a); }
59 
cryptonite_p256_get_bit(const cryptonite_p256_int * scalar,int bit)60 int cryptonite_p256_get_bit(const cryptonite_p256_int* scalar, int bit) {
61   return (P256_DIGIT(scalar, bit / P256_BITSPERDIGIT)
62               >> (bit & (P256_BITSPERDIGIT - 1))) & 1;
63 }
64 
cryptonite_p256_is_zero(const cryptonite_p256_int * a)65 int cryptonite_p256_is_zero(const cryptonite_p256_int* a) {
66   cryptonite_p256_digit result = 0;
67   int i = 0;
68   for (i = 0; i < P256_NDIGITS; ++i) result |= P256_DIGIT(a, i);
69   return result == 0;
70 }
71 
72 // top, c[] += a[] * b
73 // Returns new top
mulAdd(const cryptonite_p256_int * a,cryptonite_p256_digit b,cryptonite_p256_digit top,cryptonite_p256_digit * c)74 static cryptonite_p256_digit mulAdd(const cryptonite_p256_int* a,
75                          cryptonite_p256_digit b,
76                          cryptonite_p256_digit top,
77                          cryptonite_p256_digit* c) {
78   int i;
79   cryptonite_p256_ddigit carry = 0;
80 
81   for (i = 0; i < P256_NDIGITS; ++i) {
82     carry += *c;
83     carry += (cryptonite_p256_ddigit)P256_DIGIT(a, i) * b;
84     *c++ = (cryptonite_p256_digit)carry;
85     carry >>= P256_BITSPERDIGIT;
86   }
87   return top + (cryptonite_p256_digit)carry;
88 }
89 
90 // top, c[] -= top_a, a[]
subTop(cryptonite_p256_digit top_a,const cryptonite_p256_digit * a,cryptonite_p256_digit top_c,cryptonite_p256_digit * c)91 static cryptonite_p256_digit subTop(cryptonite_p256_digit top_a,
92                          const cryptonite_p256_digit* a,
93                          cryptonite_p256_digit top_c,
94                          cryptonite_p256_digit* c) {
95   int i;
96   cryptonite_p256_sddigit borrow = 0;
97 
98   for (i = 0; i < P256_NDIGITS; ++i) {
99     borrow += *c;
100     borrow -= *a++;
101     *c++ = (cryptonite_p256_digit)borrow;
102     borrow >>= P256_BITSPERDIGIT;
103   }
104   borrow += top_c;
105   borrow -= top_a;
106   top_c = (cryptonite_p256_digit)borrow;
107   assert((borrow >> P256_BITSPERDIGIT) == 0);
108   return top_c;
109 }
110 
111 // top, c[] -= MOD[] & mask (0 or -1)
112 // returns new top.
subM(const cryptonite_p256_int * MOD,cryptonite_p256_digit top,cryptonite_p256_digit * c,cryptonite_p256_digit mask)113 static cryptonite_p256_digit subM(const cryptonite_p256_int* MOD,
114                        cryptonite_p256_digit top,
115                        cryptonite_p256_digit* c,
116                        cryptonite_p256_digit mask) {
117   int i;
118   cryptonite_p256_sddigit borrow = 0;
119   for (i = 0; i < P256_NDIGITS; ++i) {
120     borrow += *c;
121     borrow -= P256_DIGIT(MOD, i) & mask;
122     *c++ = (cryptonite_p256_digit)borrow;
123     borrow >>= P256_BITSPERDIGIT;
124   }
125   return top + (cryptonite_p256_digit)borrow;
126 }
127 
128 // top, c[] += MOD[] & mask (0 or -1)
129 // returns new top.
addM(const cryptonite_p256_int * MOD,cryptonite_p256_digit top,cryptonite_p256_digit * c,cryptonite_p256_digit mask)130 static cryptonite_p256_digit addM(const cryptonite_p256_int* MOD,
131                        cryptonite_p256_digit top,
132                        cryptonite_p256_digit* c,
133                        cryptonite_p256_digit mask) {
134   int i;
135   cryptonite_p256_ddigit carry = 0;
136   for (i = 0; i < P256_NDIGITS; ++i) {
137     carry += *c;
138     carry += P256_DIGIT(MOD, i) & mask;
139     *c++ = (cryptonite_p256_digit)carry;
140     carry >>= P256_BITSPERDIGIT;
141   }
142   return top + (cryptonite_p256_digit)carry;
143 }
144 
145 // c = a * b mod MOD. c can be a and/or b.
cryptonite_p256_modmul(const cryptonite_p256_int * MOD,const cryptonite_p256_int * a,const cryptonite_p256_digit top_b,const cryptonite_p256_int * b,cryptonite_p256_int * c)146 void cryptonite_p256_modmul(const cryptonite_p256_int* MOD,
147                  const cryptonite_p256_int* a,
148                  const cryptonite_p256_digit top_b,
149                  const cryptonite_p256_int* b,
150                  cryptonite_p256_int* c) {
151   cryptonite_p256_digit tmp[P256_NDIGITS * 2 + 1] = { 0 };
152   cryptonite_p256_digit top = 0;
153   int i;
154 
155   // Multiply/add into tmp.
156   for (i = 0; i < P256_NDIGITS; ++i) {
157     if (i) tmp[i + P256_NDIGITS - 1] = top;
158     top = mulAdd(a, P256_DIGIT(b, i), 0, tmp + i);
159   }
160 
161   // Multiply/add top digit
162   tmp[i + P256_NDIGITS - 1] = top;
163   top = mulAdd(a, top_b, 0, tmp + i);
164 
165   // Reduce tmp, digit by digit.
166   for (; i >= 0; --i) {
167     cryptonite_p256_digit reducer[P256_NDIGITS] = { 0 };
168     cryptonite_p256_digit top_reducer;
169 
170     // top can be any value at this point.
171     // Guestimate reducer as top * MOD, since msw of MOD is -1.
172     top_reducer = mulAdd(MOD, top, 0, reducer);
173 #if P256_BITSPERDIGIT > 32
174     // Correction when msw of MOD has only high 32 bits set
175     top_reducer += mulAdd(MOD, top >> 32, 0, reducer);
176 #endif
177 
178     // Subtract reducer from top | tmp.
179     top = subTop(top_reducer, reducer, top, tmp + i);
180 
181     // top is now either 0 or 1. Make it 0, fixed-timing.
182     assert(top <= 1);
183 
184     top = subM(MOD, top, tmp + i, ~(top - 1));
185 
186     assert(top == 0);
187 
188     // We have now reduced the top digit off tmp. Fetch new top digit.
189     top = tmp[i + P256_NDIGITS - 1];
190   }
191 
192   // tmp might still be larger than MOD, yet same bit length.
193   // Make sure it is less, fixed-timing.
194   addM(MOD, 0, tmp, subM(MOD, 0, tmp, -1));
195 
196   memcpy(c, tmp, P256_NBYTES);
197 }
cryptonite_p256_is_odd(const cryptonite_p256_int * a)198 int cryptonite_p256_is_odd(const cryptonite_p256_int* a) { return P256_DIGIT(a, 0) & 1; }
cryptonite_p256_is_even(const cryptonite_p256_int * a)199 int cryptonite_p256_is_even(const cryptonite_p256_int* a) { return !(P256_DIGIT(a, 0) & 1); }
200 
cryptonite_p256_shl(const cryptonite_p256_int * a,int n,cryptonite_p256_int * b)201 cryptonite_p256_digit cryptonite_p256_shl(const cryptonite_p256_int* a, int n, cryptonite_p256_int* b) {
202   int i;
203   cryptonite_p256_digit top = P256_DIGIT(a, P256_NDIGITS - 1);
204 
205   n %= P256_BITSPERDIGIT;
206   for (i = P256_NDIGITS - 1; i > 0; --i) {
207     cryptonite_p256_digit accu = (P256_DIGIT(a, i) << n);
208     accu |= (P256_DIGIT(a, i - 1) >> (P256_BITSPERDIGIT - n));
209     P256_DIGIT(b, i) = accu;
210   }
211   P256_DIGIT(b, i) = (P256_DIGIT(a, i) << n);
212 
213   top = (cryptonite_p256_digit)((((cryptonite_p256_ddigit)top) << n) >> P256_BITSPERDIGIT);
214 
215   return top;
216 }
217 
cryptonite_p256_shr(const cryptonite_p256_int * a,int n,cryptonite_p256_int * b)218 void cryptonite_p256_shr(const cryptonite_p256_int* a, int n, cryptonite_p256_int* b) {
219   int i;
220 
221   n %= P256_BITSPERDIGIT;
222   for (i = 0; i < P256_NDIGITS - 1; ++i) {
223     cryptonite_p256_digit accu = (P256_DIGIT(a, i) >> n);
224     accu |= (P256_DIGIT(a, i + 1) << (P256_BITSPERDIGIT - n));
225     P256_DIGIT(b, i) = accu;
226   }
227   P256_DIGIT(b, i) = (P256_DIGIT(a, i) >> n);
228 }
229 
cryptonite_p256_shr1(const cryptonite_p256_int * a,int highbit,cryptonite_p256_int * b)230 static void cryptonite_p256_shr1(const cryptonite_p256_int* a, int highbit, cryptonite_p256_int* b) {
231   int i;
232 
233   for (i = 0; i < P256_NDIGITS - 1; ++i) {
234     cryptonite_p256_digit accu = (P256_DIGIT(a, i) >> 1);
235     accu |= (P256_DIGIT(a, i + 1) << (P256_BITSPERDIGIT - 1));
236     P256_DIGIT(b, i) = accu;
237   }
238   P256_DIGIT(b, i) = (P256_DIGIT(a, i) >> 1) |
239       (((cryptonite_p256_sdigit) highbit) << (P256_BITSPERDIGIT - 1));
240 }
241 
242 // Return -1, 0, 1 for a < b, a == b or a > b respectively.
cryptonite_p256_cmp(const cryptonite_p256_int * a,const cryptonite_p256_int * b)243 int cryptonite_p256_cmp(const cryptonite_p256_int* a, const cryptonite_p256_int* b) {
244   int i;
245   cryptonite_p256_sddigit borrow = 0;
246   cryptonite_p256_digit notzero = 0;
247 
248   for (i = 0; i < P256_NDIGITS; ++i) {
249     borrow += (cryptonite_p256_sddigit)P256_DIGIT(a, i) - P256_DIGIT(b, i);
250     // Track whether any result digit is ever not zero.
251     // Relies on !!(non-zero) evaluating to 1, e.g., !!(-1) evaluating to 1.
252     notzero |= !!((cryptonite_p256_digit)borrow);
253     borrow >>= P256_BITSPERDIGIT;
254   }
255   return (int)borrow | notzero;
256 }
257 
258 // c = a - b. Returns borrow: 0 or -1.
cryptonite_p256_sub(const cryptonite_p256_int * a,const cryptonite_p256_int * b,cryptonite_p256_int * c)259 int cryptonite_p256_sub(const cryptonite_p256_int* a, const cryptonite_p256_int* b, cryptonite_p256_int* c) {
260   int i;
261   cryptonite_p256_sddigit borrow = 0;
262 
263   for (i = 0; i < P256_NDIGITS; ++i) {
264     borrow += (cryptonite_p256_sddigit)P256_DIGIT(a, i) - P256_DIGIT(b, i);
265     if (c) P256_DIGIT(c, i) = (cryptonite_p256_digit)borrow;
266     borrow >>= P256_BITSPERDIGIT;
267   }
268   return (int)borrow;
269 }
270 
271 // c = a + b. Returns carry: 0 or 1.
cryptonite_p256_add(const cryptonite_p256_int * a,const cryptonite_p256_int * b,cryptonite_p256_int * c)272 int cryptonite_p256_add(const cryptonite_p256_int* a, const cryptonite_p256_int* b, cryptonite_p256_int* c) {
273   int i;
274   cryptonite_p256_ddigit carry = 0;
275 
276   for (i = 0; i < P256_NDIGITS; ++i) {
277     carry += (cryptonite_p256_ddigit)P256_DIGIT(a, i) + P256_DIGIT(b, i);
278     if (c) P256_DIGIT(c, i) = (cryptonite_p256_digit)carry;
279     carry >>= P256_BITSPERDIGIT;
280   }
281   return (int)carry;
282 }
283 
284 // b = a + d. Returns carry, 0 or 1.
cryptonite_p256_add_d(const cryptonite_p256_int * a,cryptonite_p256_digit d,cryptonite_p256_int * b)285 int cryptonite_p256_add_d(const cryptonite_p256_int* a, cryptonite_p256_digit d, cryptonite_p256_int* b) {
286   int i;
287   cryptonite_p256_ddigit carry = d;
288 
289   for (i = 0; i < P256_NDIGITS; ++i) {
290     carry += (cryptonite_p256_ddigit)P256_DIGIT(a, i);
291     if (b) P256_DIGIT(b, i) = (cryptonite_p256_digit)carry;
292     carry >>= P256_BITSPERDIGIT;
293   }
294   return (int)carry;
295 }
296 
297 // b = 1/a mod MOD, binary euclid.
cryptonite_p256_modinv_vartime(const cryptonite_p256_int * MOD,const cryptonite_p256_int * a,cryptonite_p256_int * b)298 void cryptonite_p256_modinv_vartime(const cryptonite_p256_int* MOD,
299                          const cryptonite_p256_int* a,
300                          cryptonite_p256_int* b) {
301   cryptonite_p256_int R = P256_ZERO;
302   cryptonite_p256_int S = P256_ONE;
303   cryptonite_p256_int U = *MOD;
304   cryptonite_p256_int V = *a;
305 
306   for (;;) {
307     if (cryptonite_p256_is_even(&U)) {
308       cryptonite_p256_shr1(&U, 0, &U);
309       if (cryptonite_p256_is_even(&R)) {
310         cryptonite_p256_shr1(&R, 0, &R);
311       } else {
312         // R = (R+MOD)/2
313         cryptonite_p256_shr1(&R, cryptonite_p256_add(&R, MOD, &R), &R);
314       }
315     } else if (cryptonite_p256_is_even(&V)) {
316       cryptonite_p256_shr1(&V, 0, &V);
317       if (cryptonite_p256_is_even(&S)) {
318         cryptonite_p256_shr1(&S, 0, &S);
319       } else {
320         // S = (S+MOD)/2
321         cryptonite_p256_shr1(&S, cryptonite_p256_add(&S, MOD, &S) , &S);
322       }
323     } else {  // U,V both odd.
324       if (!cryptonite_p256_sub(&V, &U, NULL)) {
325         cryptonite_p256_sub(&V, &U, &V);
326         if (cryptonite_p256_sub(&S, &R, &S)) cryptonite_p256_add(&S, MOD, &S);
327         if (cryptonite_p256_is_zero(&V)) break;  // done.
328       } else {
329         cryptonite_p256_sub(&U, &V, &U);
330         if (cryptonite_p256_sub(&R, &S, &R)) cryptonite_p256_add(&R, MOD, &R);
331       }
332     }
333   }
334 
335   cryptonite_p256_mod(MOD, &R, b);
336 }
337 
cryptonite_p256_mod(const cryptonite_p256_int * MOD,const cryptonite_p256_int * in,cryptonite_p256_int * out)338 void cryptonite_p256_mod(const cryptonite_p256_int* MOD,
339               const cryptonite_p256_int* in,
340               cryptonite_p256_int* out) {
341   if (out != in) *out = *in;
342   addM(MOD, 0, P256_DIGITS(out), subM(MOD, 0, P256_DIGITS(out), -1));
343 }
344 
345 // Verify y^2 == x^3 - 3x + b mod p
346 // and 0 < x < p and 0 < y < p
cryptonite_p256_is_valid_point(const cryptonite_p256_int * x,const cryptonite_p256_int * y)347 int cryptonite_p256_is_valid_point(const cryptonite_p256_int* x, const cryptonite_p256_int* y) {
348   cryptonite_p256_int y2, x3;
349 
350   if (cryptonite_p256_cmp(&cryptonite_SECP256r1_p, x) <= 0 ||
351       cryptonite_p256_cmp(&cryptonite_SECP256r1_p, y) <= 0 ||
352       cryptonite_p256_is_zero(x) ||
353       cryptonite_p256_is_zero(y)) return 0;
354 
355   cryptonite_p256_modmul(&cryptonite_SECP256r1_p, y, 0, y, &y2);  // y^2
356 
357   cryptonite_p256_modmul(&cryptonite_SECP256r1_p, x, 0, x, &x3);  // x^2
358   cryptonite_p256_modmul(&cryptonite_SECP256r1_p, x, 0, &x3, &x3);  // x^3
359   if (cryptonite_p256_sub(&x3, x, &x3)) cryptonite_p256_add(&x3, &cryptonite_SECP256r1_p, &x3);  // x^3 - x
360   if (cryptonite_p256_sub(&x3, x, &x3)) cryptonite_p256_add(&x3, &cryptonite_SECP256r1_p, &x3);  // x^3 - 2x
361   if (cryptonite_p256_sub(&x3, x, &x3)) cryptonite_p256_add(&x3, &cryptonite_SECP256r1_p, &x3);  // x^3 - 3x
362   if (cryptonite_p256_add(&x3, &cryptonite_SECP256r1_b, &x3))  // x^3 - 3x + b
363     cryptonite_p256_sub(&x3, &cryptonite_SECP256r1_p, &x3);
364 
365   return cryptonite_p256_cmp(&y2, &x3) == 0;
366 }
367 
cryptonite_p256_from_bin(const uint8_t src[P256_NBYTES],cryptonite_p256_int * dst)368 void cryptonite_p256_from_bin(const uint8_t src[P256_NBYTES], cryptonite_p256_int* dst) {
369   int i, n;
370   const uint8_t* p = &src[0];
371 
372   for (i = P256_NDIGITS - 1; i >= 0; --i) {
373     cryptonite_p256_digit dig = 0;
374     n = P256_BITSPERDIGIT;
375     while (n > 0) {
376       n -= 8;
377       dig |= ((cryptonite_p256_digit) *(p++)) << n;
378     }
379     P256_DIGIT(dst, i) = dig;
380   }
381 }
382 
cryptonite_p256_to_bin(const cryptonite_p256_int * src,uint8_t dst[P256_NBYTES])383 void cryptonite_p256_to_bin(const cryptonite_p256_int* src, uint8_t dst[P256_NBYTES])
384 {
385 	int i, n;
386 	uint8_t* p = &dst[0];
387 
388 	for (i = P256_NDIGITS -1; i >= 0; --i) {
389 		const cryptonite_p256_digit dig = P256_DIGIT(src, i);
390 		n = P256_BITSPERDIGIT;
391 		while (n > 0) {
392 			n -= 8;
393 			*(p++) = dig >> n;
394 		}
395 	}
396 }
397 
398 /*
399   "p256e" functions are not part of the original source
400 */
401 
402 #define MSB_COMPLEMENT(x) (((x) >> (P256_BITSPERDIGIT - 1)) - 1)
403 
404 // c = a + b mod MOD
cryptonite_p256e_modadd(const cryptonite_p256_int * MOD,const cryptonite_p256_int * a,const cryptonite_p256_int * b,cryptonite_p256_int * c)405 void cryptonite_p256e_modadd(const cryptonite_p256_int* MOD, const cryptonite_p256_int* a, const cryptonite_p256_int* b, cryptonite_p256_int* c) {
406   assert(c);  /* avoid repeated checks inside inlined cryptonite_p256_add */
407   cryptonite_p256_digit top = cryptonite_p256_add(a, b, c);
408   top = subM(MOD, top, P256_DIGITS(c), -1);
409   top = subM(MOD, top, P256_DIGITS(c), MSB_COMPLEMENT(top));
410   addM(MOD, 0, P256_DIGITS(c), top);
411 }
412 
413 // c = a - b mod MOD
cryptonite_p256e_modsub(const cryptonite_p256_int * MOD,const cryptonite_p256_int * a,const cryptonite_p256_int * b,cryptonite_p256_int * c)414 void cryptonite_p256e_modsub(const cryptonite_p256_int* MOD, const cryptonite_p256_int* a, const cryptonite_p256_int* b, cryptonite_p256_int* c) {
415   assert(c); /* avoid repeated checks inside inlined cryptonite_p256_sub */
416   cryptonite_p256_digit top = cryptonite_p256_sub(a, b, c);
417   top = addM(MOD, top, P256_DIGITS(c), ~MSB_COMPLEMENT(top));
418   top = subM(MOD, top, P256_DIGITS(c), MSB_COMPLEMENT(top));
419   addM(MOD, 0, P256_DIGITS(c), top);
420 }
421 
422 #define NTH_DOUBLE_THEN_ADD(i, a, nth, b, out)   \
423     cryptonite_p256e_montmul(a, a, out);         \
424     for (i = 1; i < nth; i++)                    \
425         cryptonite_p256e_montmul(out, out, out); \
426     cryptonite_p256e_montmul(out, b, out);
427 
428 const cryptonite_p256_int cryptonite_SECP256r1_r2 = // r^2 mod n
429   {{P256_LITERAL(0xBE79EEA2, 0x83244C95), P256_LITERAL(0x49BD6FA6, 0x4699799C),
430     P256_LITERAL(0x2B6BEC59, 0x2845B239), P256_LITERAL(0xF3D95620, 0x66E12D94)}};
431 
432 const cryptonite_p256_int cryptonite_SECP256r1_one = {{1}};
433 
434 // Montgomery multiplication, i.e. c = ab/r mod n with r = 2^256.
435 // Implementation is adapted from 'sc_montmul' in libdecaf.
cryptonite_p256e_montmul(const cryptonite_p256_int * a,const cryptonite_p256_int * b,cryptonite_p256_int * c)436 static void cryptonite_p256e_montmul(const cryptonite_p256_int* a, const cryptonite_p256_int* b, cryptonite_p256_int* c) {
437   int i, j, borrow;
438   cryptonite_p256_digit accum[P256_NDIGITS+1] = {0};
439   cryptonite_p256_digit hi_carry = 0;
440 
441   for (i=0; i<P256_NDIGITS; i++) {
442     cryptonite_p256_digit mand = P256_DIGIT(a, i);
443     const cryptonite_p256_digit *mier = P256_DIGITS(b);
444 
445     cryptonite_p256_ddigit chain = 0;
446     for (j=0; j<P256_NDIGITS; j++) {
447       chain += ((cryptonite_p256_ddigit)mand)*mier[j] + accum[j];
448       accum[j] = chain;
449       chain >>= P256_BITSPERDIGIT;
450     }
451     accum[j] = chain;
452 
453     mand = accum[0] * P256_MONTGOMERY_FACTOR;
454     chain = 0;
455     mier = P256_DIGITS(&cryptonite_SECP256r1_n);
456     for (j=0; j<P256_NDIGITS; j++) {
457       chain += (cryptonite_p256_ddigit)mand*mier[j] + accum[j];
458       if (j) accum[j-1] = chain;
459       chain >>= P256_BITSPERDIGIT;
460     }
461     chain += accum[j];
462     chain += hi_carry;
463     accum[j-1] = chain;
464     hi_carry = chain >> P256_BITSPERDIGIT;
465   }
466 
467   memcpy(P256_DIGITS(c), accum, sizeof(*c));
468   borrow = cryptonite_p256_sub(c, &cryptonite_SECP256r1_n, c);
469   addM(&cryptonite_SECP256r1_n, 0, P256_DIGITS(c), borrow + hi_carry);
470 }
471 
472 // b = 1/a mod n, using Fermat's little theorem.
cryptonite_p256e_scalar_invert(const cryptonite_p256_int * a,cryptonite_p256_int * b)473 void cryptonite_p256e_scalar_invert(const cryptonite_p256_int* a, cryptonite_p256_int* b) {
474   cryptonite_p256_int _1, _10, _11, _101, _111, _1010, _1111;
475   cryptonite_p256_int _10101, _101010, _101111, x6, x8, x16, x32;
476   int i;
477 
478   // Montgomerize
479   cryptonite_p256e_montmul(a, &cryptonite_SECP256r1_r2, &_1);
480 
481   // P-256 (secp256r1) Scalar Inversion
482   // <https://briansmith.org/ecc-inversion-addition-chains-01>
483   cryptonite_p256e_montmul(&_1     , &_1     , &_10);
484   cryptonite_p256e_montmul(&_10    , &_1     , &_11);
485   cryptonite_p256e_montmul(&_10    , &_11    , &_101);
486   cryptonite_p256e_montmul(&_10    , &_101   , &_111);
487   cryptonite_p256e_montmul(&_101   , &_101   , &_1010);
488   cryptonite_p256e_montmul(&_101   , &_1010  , &_1111);
489   NTH_DOUBLE_THEN_ADD(i, &_1010,  1   , &_1     , &_10101);
490   cryptonite_p256e_montmul(&_10101 , &_10101 , &_101010);
491   cryptonite_p256e_montmul(&_101   , &_101010, &_101111);
492   cryptonite_p256e_montmul(&_10101 , &_101010, &x6);
493   NTH_DOUBLE_THEN_ADD(i, &x6   ,  2   , &_11    , &x8);
494   NTH_DOUBLE_THEN_ADD(i, &x8   ,  8   , &x8     , &x16);
495   NTH_DOUBLE_THEN_ADD(i, &x16  , 16   , &x16    , &x32);
496 
497   NTH_DOUBLE_THEN_ADD(i, &x32  , 32+32, &x32    , b);
498   NTH_DOUBLE_THEN_ADD(i, b     ,    32, &x32    , b);
499   NTH_DOUBLE_THEN_ADD(i, b     ,     6, &_101111, b);
500   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 3, &_111   , b);
501   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 2, &_11    , b);
502   NTH_DOUBLE_THEN_ADD(i, b     , 1 + 4, &_1111  , b);
503   NTH_DOUBLE_THEN_ADD(i, b     ,     5, &_10101 , b);
504   NTH_DOUBLE_THEN_ADD(i, b     , 1 + 3, &_101   , b);
505   NTH_DOUBLE_THEN_ADD(i, b     ,     3, &_101   , b);
506   NTH_DOUBLE_THEN_ADD(i, b     ,     3, &_101   , b);
507   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 3, &_111   , b);
508   NTH_DOUBLE_THEN_ADD(i, b     , 3 + 6, &_101111, b);
509   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 4, &_1111  , b);
510   NTH_DOUBLE_THEN_ADD(i, b     , 1 + 1, &_1     , b);
511   NTH_DOUBLE_THEN_ADD(i, b     , 4 + 1, &_1     , b);
512   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 4, &_1111  , b);
513   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 3, &_111   , b);
514   NTH_DOUBLE_THEN_ADD(i, b     , 1 + 3, &_111   , b);
515   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 3, &_111   , b);
516   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 3, &_101   , b);
517   NTH_DOUBLE_THEN_ADD(i, b     , 1 + 2, &_11    , b);
518   NTH_DOUBLE_THEN_ADD(i, b     , 4 + 6, &_101111, b);
519   NTH_DOUBLE_THEN_ADD(i, b     ,     2, &_11    , b);
520   NTH_DOUBLE_THEN_ADD(i, b     , 3 + 2, &_11    , b);
521   NTH_DOUBLE_THEN_ADD(i, b     , 3 + 2, &_11    , b);
522   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 1, &_1     , b);
523   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 5, &_10101 , b);
524   NTH_DOUBLE_THEN_ADD(i, b     , 2 + 4, &_1111  , b);
525 
526   // Demontgomerize
527   cryptonite_p256e_montmul(b, &cryptonite_SECP256r1_one, b);
528 }
529