1 /*
2  * Copyright 2013 The Android Open Source Project
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *     * Redistributions of source code must retain the above copyright
7  *       notice, this list of conditions and the following disclaimer.
8  *     * Redistributions in binary form must reproduce the above copyright
9  *       notice, this list of conditions and the following disclaimer in the
10  *       documentation and/or other materials provided with the distribution.
11  *     * Neither the name of Google Inc. nor the names of its contributors may
12  *       be used to endorse or promote products derived from this software
13  *       without specific prior written permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY Google Inc. ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18  * EVENT SHALL Google Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 // This is an implementation of the P256 elliptic curve group. It's written to
28 // be portable and still constant-time.
29 //
30 // WARNING: Implementing these functions in a constant-time manner is far from
31 //          obvious. Be careful when touching this code.
32 //
33 // See http://www.imperialviolet.org/2010/12/04/ecc.html ([1]) for background.
34 
35 #include "p256/p256_gf.h"
36 
37 
38 /* Field element operations: */
39 
40 /* felem_inv calculates |out| = |in|^{-1}
41  *
42  * Based on Fermat's Little Theorem:
43  *   a^p = a (mod p)
44  *   a^{p-1} = 1 (mod p)
45  *   a^{p-2} = a^{-1} (mod p)
46  */
felem_inv(felem out,const felem in)47 static void felem_inv(felem out, const felem in) {
48   felem ftmp, ftmp2;
49   /* each e_I will hold |in|^{2^I - 1} */
50   felem e2, e4, e8, e16, e32, e64;
51   unsigned i;
52 
53   felem_square(ftmp, in); /* 2^1 */
54   felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */
55   felem_assign(e2, ftmp);
56   felem_square(ftmp, ftmp); /* 2^3 - 2^1 */
57   felem_square(ftmp, ftmp); /* 2^4 - 2^2 */
58   felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */
59   felem_assign(e4, ftmp);
60   felem_square(ftmp, ftmp); /* 2^5 - 2^1 */
61   felem_square(ftmp, ftmp); /* 2^6 - 2^2 */
62   felem_square(ftmp, ftmp); /* 2^7 - 2^3 */
63   felem_square(ftmp, ftmp); /* 2^8 - 2^4 */
64   felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */
65   felem_assign(e8, ftmp);
66   for (i = 0; i < 8; i++) {
67     felem_square(ftmp, ftmp);
68   } /* 2^16 - 2^8 */
69   felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */
70   felem_assign(e16, ftmp);
71   for (i = 0; i < 16; i++) {
72     felem_square(ftmp, ftmp);
73   } /* 2^32 - 2^16 */
74   felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */
75   felem_assign(e32, ftmp);
76   for (i = 0; i < 32; i++) {
77     felem_square(ftmp, ftmp);
78   } /* 2^64 - 2^32 */
79   felem_assign(e64, ftmp);
80   felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */
81   for (i = 0; i < 192; i++) {
82     felem_square(ftmp, ftmp);
83   } /* 2^256 - 2^224 + 2^192 */
84 
85   felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */
86   for (i = 0; i < 16; i++) {
87     felem_square(ftmp2, ftmp2);
88   } /* 2^80 - 2^16 */
89   felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */
90   for (i = 0; i < 8; i++) {
91     felem_square(ftmp2, ftmp2);
92   } /* 2^88 - 2^8 */
93   felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */
94   for (i = 0; i < 4; i++) {
95     felem_square(ftmp2, ftmp2);
96   } /* 2^92 - 2^4 */
97   felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */
98   felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */
99   felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */
100   felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */
101   felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */
102   felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */
103   felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */
104 
105   felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
106 }
107 
108 
109 /* Group operations:
110  *
111  * Elements of the elliptic curve group are represented in Jacobian
112  * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
113  * Jacobian form. */
114 
115 /* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.
116  *
117  * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l */
point_double(felem x_out,felem y_out,felem z_out,const felem x,const felem y,const felem z)118 static void point_double(felem x_out, felem y_out, felem z_out, const felem x,
119                          const felem y, const felem z) {
120   felem delta, gamma, alpha, beta, tmp, tmp2;
121 
122   felem_square(delta, z);
123   felem_square(gamma, y);
124   felem_mul(beta, x, gamma);
125 
126   felem_sum(tmp, x, delta);
127   felem_diff(tmp2, x, delta);
128   felem_mul(alpha, tmp, tmp2);
129   felem_scalar_3(alpha);
130 
131   felem_sum(tmp, y, z);
132   felem_square(tmp, tmp);
133   felem_diff(tmp, tmp, gamma);
134   felem_diff(z_out, tmp, delta);
135 
136   felem_scalar_4(beta);
137   felem_square(x_out, alpha);
138   felem_diff(x_out, x_out, beta);
139   felem_diff(x_out, x_out, beta);
140 
141   felem_diff(tmp, beta, x_out);
142   felem_mul(tmp, alpha, tmp);
143   felem_square(tmp2, gamma);
144   felem_scalar_8(tmp2);
145   felem_diff(y_out, tmp, tmp2);
146 }
147 
148 /* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.
149  * (i.e. the second point is affine.)
150  *
151  * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
152  *
153  * Note that this function does not handle P+P, infinity+P nor P+infinity
154  * correctly. */
point_add_mixed(felem x_out,felem y_out,felem z_out,const felem x1,const felem y1,const felem z1,const felem x2,const felem y2)155 static void point_add_mixed(felem x_out, felem y_out, felem z_out,
156                             const felem x1, const felem y1, const felem z1,
157                             const felem x2, const felem y2) {
158   felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;
159 
160   felem_square(z1z1, z1);
161   felem_sum(tmp, z1, z1);
162 
163   felem_mul(u2, x2, z1z1);
164   felem_mul(z1z1z1, z1, z1z1);
165   felem_mul(s2, y2, z1z1z1);
166   felem_diff(h, u2, x1);
167   felem_sum(i, h, h);
168   felem_square(i, i);
169   felem_mul(j, h, i);
170   felem_diff(r, s2, y1);
171   felem_sum(r, r, r);
172   felem_mul(v, x1, i);
173 
174   felem_mul(z_out, tmp, h);
175   felem_square(rr, r);
176   felem_diff(x_out, rr, j);
177   felem_diff(x_out, x_out, v);
178   felem_diff(x_out, x_out, v);
179 
180   felem_diff(tmp, v, x_out);
181   felem_mul(y_out, tmp, r);
182   felem_mul(tmp, y1, j);
183   felem_diff(y_out, y_out, tmp);
184   felem_diff(y_out, y_out, tmp);
185 }
186 
187 /* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.
188  *
189  * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
190  *
191  * Note that this function does not handle P+P, infinity+P nor P+infinity
192  * correctly. */
point_add(felem x_out,felem y_out,felem z_out,const felem x1,const felem y1,const felem z1,const felem x2,const felem y2,const felem z2)193 static void point_add(felem x_out, felem y_out, felem z_out, const felem x1,
194                       const felem y1, const felem z1, const felem x2,
195                       const felem y2, const felem z2) {
196   felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
197 
198   felem_square(z1z1, z1);
199   felem_square(z2z2, z2);
200   felem_mul(u1, x1, z2z2);
201 
202   felem_sum(tmp, z1, z2);
203   felem_square(tmp, tmp);
204   felem_diff(tmp, tmp, z1z1);
205   felem_diff(tmp, tmp, z2z2);
206 
207   felem_mul(z2z2z2, z2, z2z2);
208   felem_mul(s1, y1, z2z2z2);
209 
210   felem_mul(u2, x2, z1z1);
211   felem_mul(z1z1z1, z1, z1z1);
212   felem_mul(s2, y2, z1z1z1);
213   felem_diff(h, u2, u1);
214   felem_sum(i, h, h);
215   felem_square(i, i);
216   felem_mul(j, h, i);
217   felem_diff(r, s2, s1);
218   felem_sum(r, r, r);
219   felem_mul(v, u1, i);
220 
221   felem_mul(z_out, tmp, h);
222   felem_square(rr, r);
223   felem_diff(x_out, rr, j);
224   felem_diff(x_out, x_out, v);
225   felem_diff(x_out, x_out, v);
226 
227   felem_diff(tmp, v, x_out);
228   felem_mul(y_out, tmp, r);
229   felem_mul(tmp, s1, j);
230   felem_diff(y_out, y_out, tmp);
231   felem_diff(y_out, y_out, tmp);
232 }
233 
234 /* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +
235  *                                                        {x2,y2,z2}.
236  *
237  * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
238  *
239  * This function handles the case where {x1,y1,z1}={x2,y2,z2}. */
point_add_or_double_vartime(felem x_out,felem y_out,felem z_out,const felem x1,const felem y1,const felem z1,const felem x2,const felem y2,const felem z2)240 static void point_add_or_double_vartime(
241     felem x_out, felem y_out, felem z_out, const felem x1, const felem y1,
242     const felem z1, const felem x2, const felem y2, const felem z2) {
243   felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
244   char x_equal, y_equal;
245 
246   felem_square(z1z1, z1);
247   felem_square(z2z2, z2);
248   felem_mul(u1, x1, z2z2);
249 
250   felem_sum(tmp, z1, z2);
251   felem_square(tmp, tmp);
252   felem_diff(tmp, tmp, z1z1);
253   felem_diff(tmp, tmp, z2z2);
254 
255   felem_mul(z2z2z2, z2, z2z2);
256   felem_mul(s1, y1, z2z2z2);
257 
258   felem_mul(u2, x2, z1z1);
259   felem_mul(z1z1z1, z1, z1z1);
260   felem_mul(s2, y2, z1z1z1);
261   felem_diff(h, u2, u1);
262   x_equal = felem_is_zero_vartime(h);
263   felem_sum(i, h, h);
264   felem_square(i, i);
265   felem_mul(j, h, i);
266   felem_diff(r, s2, s1);
267   y_equal = felem_is_zero_vartime(r);
268   if (x_equal && y_equal) {
269     point_double(x_out, y_out, z_out, x1, y1, z1);
270     return;
271   }
272   felem_sum(r, r, r);
273   felem_mul(v, u1, i);
274 
275   felem_mul(z_out, tmp, h);
276   felem_square(rr, r);
277   felem_diff(x_out, rr, j);
278   felem_diff(x_out, x_out, v);
279   felem_diff(x_out, x_out, v);
280 
281   felem_diff(tmp, v, x_out);
282   felem_mul(y_out, tmp, r);
283   felem_mul(tmp, s1, j);
284   felem_diff(y_out, y_out, tmp);
285   felem_diff(y_out, y_out, tmp);
286 }
287 
288 /* copy_conditional sets out=in if mask = -1 in constant time.
289  *
290  * On entry: mask is either 0 or -1. */
copy_conditional(felem out,const felem in,limb mask)291 static void copy_conditional(felem out, const felem in, limb mask) {
292   int i;
293 
294   for (i = 0; i < NLIMBS; i++) {
295     const limb tmp = mask & (in[i] ^ out[i]);
296     out[i] ^= tmp;
297   }
298 }
299 
300 /* select_affine_point sets {out_x,out_y} to the index'th entry of table.
301  * On entry: index < 16, table[0] must be zero. */
select_affine_point(felem out_x,felem out_y,const limb * table,limb index)302 static void select_affine_point(felem out_x, felem out_y, const limb* table,
303                                 limb index) {
304   limb i, j;
305 
306   memset(out_x, 0, sizeof(felem));
307   memset(out_y, 0, sizeof(felem));
308 
309   for (i = 1; i < 16; i++) {
310     limb mask = i ^ index;
311     mask |= mask >> 2;
312     mask |= mask >> 1;
313     mask &= 1;
314     mask--;
315     for (j = 0; j < NLIMBS; j++, table++) {
316       out_x[j] |= *table & mask;
317     }
318     for (j = 0; j < NLIMBS; j++, table++) {
319       out_y[j] |= *table & mask;
320     }
321   }
322 }
323 
324 /* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of
325  * table. On entry: index < 16, table[0] must be zero. */
select_jacobian_point(felem out_x,felem out_y,felem out_z,const limb * table,limb index)326 static void select_jacobian_point(felem out_x, felem out_y, felem out_z,
327                                   const limb* table, limb index) {
328   limb i, j;
329 
330   memset(out_x, 0, sizeof(felem));
331   memset(out_y, 0, sizeof(felem));
332   memset(out_z, 0, sizeof(felem));
333 
334   /* The implicit value at index 0 is all zero. We don't need to perform that
335    * iteration of the loop because we already set out_* to zero. */
336   table += 3 * NLIMBS;
337 
338   // Hit all entries to obscure cache profiling.
339   for (i = 1; i < 16; i++) {
340     limb mask = i ^ index;
341     mask |= mask >> 2;
342     mask |= mask >> 1;
343     mask &= 1;
344     mask--;
345     for (j = 0; j < NLIMBS; j++, table++) {
346       out_x[j] |= *table & mask;
347     }
348     for (j = 0; j < NLIMBS; j++, table++) {
349       out_y[j] |= *table & mask;
350     }
351     for (j = 0; j < NLIMBS; j++, table++) {
352       out_z[j] |= *table & mask;
353     }
354   }
355 }
356 
357 /* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian
358  * number. Note that the value of scalar must be less than the order of the
359  * group. */
scalar_base_mult(felem nx,felem ny,felem nz,const cryptonite_p256_int * scalar)360 static void scalar_base_mult(felem nx, felem ny, felem nz,
361                              const cryptonite_p256_int* scalar) {
362   int i, j;
363   limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;
364   u32 table_offset;
365 
366   felem px, py;
367   felem tx, ty, tz;
368 
369   memset(nx, 0, sizeof(felem));
370   memset(ny, 0, sizeof(felem));
371   memset(nz, 0, sizeof(felem));
372 
373   /* The loop adds bits at positions 0, 64, 128 and 192, followed by
374    * positions 32,96,160 and 224 and does this 32 times. */
375   for (i = 0; i < 32; i++) {
376     if (i) {
377       point_double(nx, ny, nz, nx, ny, nz);
378     }
379     table_offset = 0;
380     for (j = 0; j <= 32; j += 32) {
381       char bit0 = cryptonite_p256_get_bit(scalar, 31 - i + j);
382       char bit1 = cryptonite_p256_get_bit(scalar, 95 - i + j);
383       char bit2 = cryptonite_p256_get_bit(scalar, 159 - i + j);
384       char bit3 = cryptonite_p256_get_bit(scalar, 223 - i + j);
385       limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
386 
387       select_affine_point(px, py, kPrecomputed + table_offset, index);
388       table_offset += 30 * NLIMBS;
389 
390       /* Since scalar is less than the order of the group, we know that
391        * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
392        * below. */
393       point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);
394       /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero
395        * (a.k.a.  the point at infinity). We handle that situation by
396        * copying the point from the table. */
397       copy_conditional(nx, px, n_is_infinity_mask);
398       copy_conditional(ny, py, n_is_infinity_mask);
399       copy_conditional(nz, kOne, n_is_infinity_mask);
400 
401       /* Equally, the result is also wrong if the point from the table is
402        * zero, which happens when the index is zero. We handle that by
403        * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0. */
404       p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
405       mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
406       copy_conditional(nx, tx, mask);
407       copy_conditional(ny, ty, mask);
408       copy_conditional(nz, tz, mask);
409       /* If p was not zero, then n is now non-zero. */
410       n_is_infinity_mask &= ~p_is_noninfinite_mask;
411     }
412   }
413 }
414 
415 /* point_to_affine converts a Jacobian point to an affine point. If the input
416  * is the point at infinity then it returns (0, 0) in constant time. */
point_to_affine(felem x_out,felem y_out,const felem nx,const felem ny,const felem nz)417 static void point_to_affine(felem x_out, felem y_out, const felem nx,
418                             const felem ny, const felem nz) {
419   felem z_inv, z_inv_sq;
420   felem_inv(z_inv, nz);
421   felem_square(z_inv_sq, z_inv);
422   felem_mul(x_out, nx, z_inv_sq);
423   felem_mul(z_inv, z_inv, z_inv_sq);
424   felem_mul(y_out, ny, z_inv);
425 }
426 
427 /* scalar_base_mult sets {nx,ny,nz} = scalar*{x,y}. */
scalar_mult(felem nx,felem ny,felem nz,const felem x,const felem y,const cryptonite_p256_int * scalar)428 static void scalar_mult(felem nx, felem ny, felem nz, const felem x,
429                         const felem y, const cryptonite_p256_int* scalar) {
430   int i;
431   felem px, py, pz, tx, ty, tz;
432   felem precomp[16][3];
433   limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;
434 
435   /* We precompute 0,1,2,... times {x,y}. */
436   memset(precomp, 0, sizeof(felem) * 3);
437   memcpy(&precomp[1][0], x, sizeof(felem));
438   memcpy(&precomp[1][1], y, sizeof(felem));
439   memcpy(&precomp[1][2], kOne, sizeof(felem));
440 
441   for (i = 2; i < 16; i += 2) {
442     point_double(precomp[i][0], precomp[i][1], precomp[i][2],
443                  precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);
444 
445     point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],
446                     precomp[i][0], precomp[i][1], precomp[i][2], x, y);
447   }
448 
449   memset(nx, 0, sizeof(felem));
450   memset(ny, 0, sizeof(felem));
451   memset(nz, 0, sizeof(felem));
452   n_is_infinity_mask = -1;
453 
454   /* We add in a window of four bits each iteration and do this 64 times. */
455   for (i = 0; i < 256; i += 4) {
456     if (i) {
457       point_double(nx, ny, nz, nx, ny, nz);
458       point_double(nx, ny, nz, nx, ny, nz);
459       point_double(nx, ny, nz, nx, ny, nz);
460       point_double(nx, ny, nz, nx, ny, nz);
461     }
462 
463     index = (cryptonite_p256_get_bit(scalar, 255 - i - 0) << 3) |
464             (cryptonite_p256_get_bit(scalar, 255 - i - 1) << 2) |
465             (cryptonite_p256_get_bit(scalar, 255 - i - 2) << 1) |
466             cryptonite_p256_get_bit(scalar, 255 - i - 3);
467 
468     /* See the comments in scalar_base_mult about handling infinities. */
469     select_jacobian_point(px, py, pz, precomp[0][0], index);
470     point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
471     copy_conditional(nx, px, n_is_infinity_mask);
472     copy_conditional(ny, py, n_is_infinity_mask);
473     copy_conditional(nz, pz, n_is_infinity_mask);
474 
475     p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
476     mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
477 
478     copy_conditional(nx, tx, mask);
479     copy_conditional(ny, ty, mask);
480     copy_conditional(nz, tz, mask);
481     n_is_infinity_mask &= ~p_is_noninfinite_mask;
482   }
483 }
484 
485 /* cryptonite_p256_base_point_mul sets {out_x,out_y} = nG, where n is < the
486  * order of the group. */
cryptonite_p256_base_point_mul(const cryptonite_p256_int * n,cryptonite_p256_int * out_x,cryptonite_p256_int * out_y)487 void cryptonite_p256_base_point_mul(const cryptonite_p256_int* n, cryptonite_p256_int* out_x, cryptonite_p256_int* out_y) {
488   felem x, y, z;
489 
490   scalar_base_mult(x, y, z, n);
491 
492   {
493     felem x_affine, y_affine;
494 
495     point_to_affine(x_affine, y_affine, x, y, z);
496     from_montgomery(out_x, x_affine);
497     from_montgomery(out_y, y_affine);
498   }
499 }
500 
501 /* cryptonite_p256_points_mul_vartime sets {out_x,out_y} = n1*G + n2*{in_x,in_y}, where
502  * n1 and n2 are < the order of the group.
503  *
504  * As indicated by the name, this function operates in variable time. This
505  * is safe because it's used for signature validation which doesn't deal
506  * with secrets. */
cryptonite_p256_points_mul_vartime(const cryptonite_p256_int * n1,const cryptonite_p256_int * n2,const cryptonite_p256_int * in_x,const cryptonite_p256_int * in_y,cryptonite_p256_int * out_x,cryptonite_p256_int * out_y)507 void cryptonite_p256_points_mul_vartime(
508     const cryptonite_p256_int* n1, const cryptonite_p256_int* n2, const cryptonite_p256_int* in_x,
509     const cryptonite_p256_int* in_y, cryptonite_p256_int* out_x, cryptonite_p256_int* out_y) {
510   felem x1, y1, z1, x2, y2, z2, px, py;
511 
512   /* If both scalars are zero, then the result is the point at infinity. */
513   if (cryptonite_p256_is_zero(n1) != 0 && cryptonite_p256_is_zero(n2) != 0) {
514     cryptonite_p256_clear(out_x);
515     cryptonite_p256_clear(out_y);
516     return;
517   }
518 
519   to_montgomery(px, in_x);
520   to_montgomery(py, in_y);
521   scalar_base_mult(x1, y1, z1, n1);
522   scalar_mult(x2, y2, z2, px, py, n2);
523 
524   if (cryptonite_p256_is_zero(n2) != 0) {
525     /* If n2 == 0, then {x2,y2,z2} is zero and the result is just
526          * {x1,y1,z1}. */
527   } else if (cryptonite_p256_is_zero(n1) != 0) {
528     /* If n1 == 0, then {x1,y1,z1} is zero and the result is just
529          * {x2,y2,z2}. */
530     memcpy(x1, x2, sizeof(x2));
531     memcpy(y1, y2, sizeof(y2));
532     memcpy(z1, z2, sizeof(z2));
533   } else {
534     /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */
535     point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);
536   }
537 
538   point_to_affine(px, py, x1, y1, z1);
539   from_montgomery(out_x, px);
540   from_montgomery(out_y, py);
541 }
542 
543 /* this function is not part of the original source
544    add 2 points together. so far untested.
545    probably vartime, as it use point_add_or_double_vartime
546  */
cryptonite_p256e_point_add(const cryptonite_p256_int * in_x1,const cryptonite_p256_int * in_y1,const cryptonite_p256_int * in_x2,const cryptonite_p256_int * in_y2,cryptonite_p256_int * out_x,cryptonite_p256_int * out_y)547 void cryptonite_p256e_point_add(
548     const cryptonite_p256_int *in_x1, const cryptonite_p256_int *in_y1,
549     const cryptonite_p256_int *in_x2, const cryptonite_p256_int *in_y2,
550     cryptonite_p256_int *out_x, cryptonite_p256_int *out_y)
551 {
552     felem x, y, z, px1, py1, px2, py2;
553 
554     to_montgomery(px1, in_x1);
555     to_montgomery(py1, in_y1);
556     to_montgomery(px2, in_x2);
557     to_montgomery(py2, in_y2);
558 
559     point_add_or_double_vartime(x, y, z, px1, py1, kOne, px2, py2, kOne);
560 
561     point_to_affine(px1, py1, x, y, z);
562     from_montgomery(out_x, px1);
563     from_montgomery(out_y, py1);
564 }
565 
566 /* this function is not part of the original source
567    negate a point, i.e. (out_x, out_y) = (in_x, -in_y)
568  */
cryptonite_p256e_point_negate(const cryptonite_p256_int * in_x,const cryptonite_p256_int * in_y,cryptonite_p256_int * out_x,cryptonite_p256_int * out_y)569 void cryptonite_p256e_point_negate(
570     const cryptonite_p256_int *in_x, const cryptonite_p256_int *in_y,
571     cryptonite_p256_int *out_x, cryptonite_p256_int *out_y)
572 {
573     memcpy(out_x, in_x, P256_NBYTES);
574     cryptonite_p256_sub(&cryptonite_SECP256r1_p, in_y, out_y);
575 }
576 
577 /* this function is not part of the original source
578    cryptonite_p256e_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where
579    n is < the order of the group.
580  */
cryptonite_p256e_point_mul(const cryptonite_p256_int * n,const cryptonite_p256_int * in_x,const cryptonite_p256_int * in_y,cryptonite_p256_int * out_x,cryptonite_p256_int * out_y)581 void cryptonite_p256e_point_mul(const cryptonite_p256_int* n,
582     const cryptonite_p256_int* in_x, const cryptonite_p256_int* in_y,
583     cryptonite_p256_int* out_x, cryptonite_p256_int* out_y) {
584   felem x, y, z, px, py;
585 
586   to_montgomery(px, in_x);
587   to_montgomery(py, in_y);
588   scalar_mult(x, y, z, px, py, n);
589   point_to_affine(px, py, x, y, z);
590   from_montgomery(out_x, px);
591   from_montgomery(out_y, py);
592 }
593