1 /* crypto/ec/ecp_nistp224.c */
2 /*
3  * Written by Emilia Kasper (Google) for the OpenSSL project.
4  */
5 /* Copyright 2011 Google Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  *
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *  Unless required by applicable law or agreed to in writing, software
15  *  distributed under the License is distributed on an "AS IS" BASIS,
16  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  *  See the License for the specific language governing permissions and
18  *  limitations under the License.
19  */
20 
21 /*
22  * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
23  *
24  * Inspired by Daniel J. Bernstein's public domain nistp224 implementation
25  * and Adam Langley's public domain 64-bit C implementation of curve25519
26  */
27 
28 #include <openssl/opensslconf.h>
29 #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
30 
31 # ifndef OPENSSL_SYS_VMS
32 #  include <stdint.h>
33 # else
34 #  include <inttypes.h>
35 # endif
36 
37 # include <string.h>
38 # include <openssl/err.h>
39 # include "ec_lcl.h"
40 
41 # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
42   /* even with gcc, the typedef won't work for 32-bit platforms */
43 typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
44                                  * platforms */
45 # else
46 #  error "Need GCC 3.1 or later to define type uint128_t"
47 # endif
48 
49 typedef uint8_t u8;
50 typedef uint64_t u64;
51 
52 /******************************************************************************/
53 /*-
54  * INTERNAL REPRESENTATION OF FIELD ELEMENTS
55  *
56  * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3
57  * using 64-bit coefficients called 'limbs',
58  * and sometimes (for multiplication results) as
59  * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6
60  * using 128-bit coefficients called 'widelimbs'.
61  * A 4-limb representation is an 'felem';
62  * a 7-widelimb representation is a 'widefelem'.
63  * Even within felems, bits of adjacent limbs overlap, and we don't always
64  * reduce the representations: we ensure that inputs to each felem
65  * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60,
66  * and fit into a 128-bit word without overflow. The coefficients are then
67  * again partially reduced to obtain an felem satisfying a_i < 2^57.
68  * We only reduce to the unique minimal representation at the end of the
69  * computation.
70  */
71 
72 typedef uint64_t limb;
73 typedef uint128_t widelimb;
74 
75 typedef limb felem[4];
76 typedef widelimb widefelem[7];
77 
78 /*
79  * Field element represented as a byte arrary. 28*8 = 224 bits is also the
80  * group order size for the elliptic curve, and we also use this type for
81  * scalars for point multiplication.
82  */
83 typedef u8 felem_bytearray[28];
84 
85 static const felem_bytearray nistp224_curve_params[5] = {
86     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
87      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
88      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
89     {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
90      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
91      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
92     {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
93      0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
94      0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
95     {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
96      0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
97      0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
98     {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
99      0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
100      0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
101 };
102 
103 /*-
104  * Precomputed multiples of the standard generator
105  * Points are given in coordinates (X, Y, Z) where Z normally is 1
106  * (0 for the point at infinity).
107  * For each field element, slice a_0 is word 0, etc.
108  *
109  * The table has 2 * 16 elements, starting with the following:
110  * index | bits    | point
111  * ------+---------+------------------------------
112  *     0 | 0 0 0 0 | 0G
113  *     1 | 0 0 0 1 | 1G
114  *     2 | 0 0 1 0 | 2^56G
115  *     3 | 0 0 1 1 | (2^56 + 1)G
116  *     4 | 0 1 0 0 | 2^112G
117  *     5 | 0 1 0 1 | (2^112 + 1)G
118  *     6 | 0 1 1 0 | (2^112 + 2^56)G
119  *     7 | 0 1 1 1 | (2^112 + 2^56 + 1)G
120  *     8 | 1 0 0 0 | 2^168G
121  *     9 | 1 0 0 1 | (2^168 + 1)G
122  *    10 | 1 0 1 0 | (2^168 + 2^56)G
123  *    11 | 1 0 1 1 | (2^168 + 2^56 + 1)G
124  *    12 | 1 1 0 0 | (2^168 + 2^112)G
125  *    13 | 1 1 0 1 | (2^168 + 2^112 + 1)G
126  *    14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G
127  *    15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G
128  * followed by a copy of this with each element multiplied by 2^28.
129  *
130  * The reason for this is so that we can clock bits into four different
131  * locations when doing simple scalar multiplies against the base point,
132  * and then another four locations using the second 16 elements.
133  */
134 static const felem gmul[2][16][3] = { {{{0, 0, 0, 0},
135                                         {0, 0, 0, 0},
136                                         {0, 0, 0, 0}},
137                                        {{0x3280d6115c1d21, 0xc1d356c2112234,
138                                          0x7f321390b94a03, 0xb70e0cbd6bb4bf},
139                                         {0xd5819985007e34, 0x75a05a07476444,
140                                          0xfb4c22dfe6cd43, 0xbd376388b5f723},
141                                         {1, 0, 0, 0}},
142                                        {{0xfd9675666ebbe9, 0xbca7664d40ce5e,
143                                          0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144                                         {0x29e0b892dc9c43, 0xece8608436e662,
145                                          0xdc858f185310d0, 0x9812dd4eb8d321},
146                                         {1, 0, 0, 0}},
147                                        {{0x6d3e678d5d8eb8, 0x559eed1cb362f1,
148                                          0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
149                                         {0xf19f90ed50266d, 0xabf2b4bf65f9df,
150                                          0x313865468fafec, 0x5cb379ba910a17},
151                                         {1, 0, 0, 0}},
152                                        {{0x0641966cab26e3, 0x91fb2991fab0a0,
153                                          0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
154                                         {0x7510407766af5d, 0x84d929610d5450,
155                                          0x81d77aae82f706, 0x6916f6d4338c5b},
156                                         {1, 0, 0, 0}},
157                                        {{0xea95ac3b1f15c6, 0x086000905e82d4,
158                                          0xdd323ae4d1c8b1, 0x932b56be7685a3},
159                                         {0x9ef93dea25dbbf, 0x41665960f390f0,
160                                          0xfdec76dbe2a8a7, 0x523e80f019062a},
161                                         {1, 0, 0, 0}},
162                                        {{0x822fdd26732c73, 0xa01c83531b5d0f,
163                                          0x363f37347c1ba4, 0xc391b45c84725c},
164                                         {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec,
165                                          0xc393da7e222a7f, 0x1efb7890ede244},
166                                         {1, 0, 0, 0}},
167                                        {{0x4c9e90ca217da1, 0xd11beca79159bb,
168                                          0xff8d33c2c98b7c, 0x2610b39409f849},
169                                         {0x44d1352ac64da0, 0xcdbb7b2c46b4fb,
170                                          0x966c079b753c89, 0xfe67e4e820b112},
171                                         {1, 0, 0, 0}},
172                                        {{0xe28cae2df5312d, 0xc71b61d16f5c6e,
173                                          0x79b7619a3e7c4c, 0x05c73240899b47},
174                                         {0x9f7f6382c73e3a, 0x18615165c56bda,
175                                          0x641fab2116fd56, 0x72855882b08394},
176                                         {1, 0, 0, 0}},
177                                        {{0x0469182f161c09, 0x74a98ca8d00fb5,
178                                          0xb89da93489a3e0, 0x41c98768fb0c1d},
179                                         {0xe5ea05fb32da81, 0x3dce9ffbca6855,
180                                          0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
181                                         {1, 0, 0, 0}},
182                                        {{0xdab22b2333e87f, 0x4430137a5dd2f6,
183                                          0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
184                                         {0x764a7df0c8fda5, 0x185ba5c3fa2044,
185                                          0x9281d688bcbe50, 0xc40331df893881},
186                                         {1, 0, 0, 0}},
187                                        {{0xb89530796f0f60, 0xade92bd26909a3,
188                                          0x1a0c83fb4884da, 0x1765bf22a5a984},
189                                         {0x772a9ee75db09e, 0x23bc6c67cec16f,
190                                          0x4c1edba8b14e2f, 0xe2a215d9611369},
191                                         {1, 0, 0, 0}},
192                                        {{0x571e509fb5efb3, 0xade88696410552,
193                                          0xc8ae85fada74fe, 0x6c7e4be83bbde3},
194                                         {0xff9f51160f4652, 0xb47ce2495a6539,
195                                          0xa2946c53b582f4, 0x286d2db3ee9a60},
196                                         {1, 0, 0, 0}},
197                                        {{0x40bbd5081a44af, 0x0995183b13926c,
198                                          0xbcefba6f47f6d0, 0x215619e9cc0057},
199                                         {0x8bc94d3b0df45e, 0xf11c54a3694f6f,
200                                          0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
201                                         {1, 0, 0, 0}},
202                                        {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8,
203                                          0x1c29819435d2c6, 0xc813132f4c07e9},
204                                         {0x2891425503b11f, 0x08781030579fea,
205                                          0xf5426ba5cc9674, 0x1e28ebf18562bc},
206                                         {1, 0, 0, 0}},
207                                        {{0x9f31997cc864eb, 0x06cd91d28b5e4c,
208                                          0xff17036691a973, 0xf1aef351497c58},
209                                         {0xdd1f2d600564ff, 0xdead073b1402db,
210                                          0x74a684435bd693, 0xeea7471f962558},
211                                         {1, 0, 0, 0}}},
212 {{{0, 0, 0, 0},
213   {0, 0, 0, 0},
214   {0, 0, 0, 0}},
215  {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
216   {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
217   {1, 0, 0, 0}},
218  {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
219   {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
220   {1, 0, 0, 0}},
221  {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
222   {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
223   {1, 0, 0, 0}},
224  {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
225   {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
226   {1, 0, 0, 0}},
227  {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
228   {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
229   {1, 0, 0, 0}},
230  {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
231   {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
232   {1, 0, 0, 0}},
233  {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
234   {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
235   {1, 0, 0, 0}},
236  {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
237   {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
238   {1, 0, 0, 0}},
239  {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
240   {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
241   {1, 0, 0, 0}},
242  {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
243   {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
244   {1, 0, 0, 0}},
245  {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
246   {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
247   {1, 0, 0, 0}},
248  {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
249   {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
250   {1, 0, 0, 0}},
251  {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
252   {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
253   {1, 0, 0, 0}},
254  {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
255   {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
256   {1, 0, 0, 0}},
257  {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
258   {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
259   {1, 0, 0, 0}}}
260 };
261 
262 /* Precomputation for the group generator. */
263 typedef struct {
264     felem g_pre_comp[2][16][3];
265     int references;
266 } NISTP224_PRE_COMP;
267 
268 const EC_METHOD *EC_GFp_nistp224_method(void)
269 {
270     static const EC_METHOD ret = {
271         EC_FLAGS_DEFAULT_OCT,
272         NID_X9_62_prime_field,
273         ec_GFp_nistp224_group_init,
274         ec_GFp_simple_group_finish,
275         ec_GFp_simple_group_clear_finish,
276         ec_GFp_nist_group_copy,
277         ec_GFp_nistp224_group_set_curve,
278         ec_GFp_simple_group_get_curve,
279         ec_GFp_simple_group_get_degree,
280         ec_GFp_simple_group_check_discriminant,
281         ec_GFp_simple_point_init,
282         ec_GFp_simple_point_finish,
283         ec_GFp_simple_point_clear_finish,
284         ec_GFp_simple_point_copy,
285         ec_GFp_simple_point_set_to_infinity,
286         ec_GFp_simple_set_Jprojective_coordinates_GFp,
287         ec_GFp_simple_get_Jprojective_coordinates_GFp,
288         ec_GFp_simple_point_set_affine_coordinates,
289         ec_GFp_nistp224_point_get_affine_coordinates,
290         0 /* point_set_compressed_coordinates */ ,
291         0 /* point2oct */ ,
292         0 /* oct2point */ ,
293         ec_GFp_simple_add,
294         ec_GFp_simple_dbl,
295         ec_GFp_simple_invert,
296         ec_GFp_simple_is_at_infinity,
297         ec_GFp_simple_is_on_curve,
298         ec_GFp_simple_cmp,
299         ec_GFp_simple_make_affine,
300         ec_GFp_simple_points_make_affine,
301         ec_GFp_nistp224_points_mul,
302         ec_GFp_nistp224_precompute_mult,
303         ec_GFp_nistp224_have_precompute_mult,
304         ec_GFp_nist_field_mul,
305         ec_GFp_nist_field_sqr,
306         0 /* field_div */ ,
307         0 /* field_encode */ ,
308         0 /* field_decode */ ,
309         0                       /* field_set_to_one */
310     };
311 
312     return &ret;
313 }
314 
315 /*
316  * Helper functions to convert field elements to/from internal representation
317  */
318 static void bin28_to_felem(felem out, const u8 in[28])
319 {
320     out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff;
321     out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff;
322     out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff;
323     out[3] = (*((const uint64_t *)(in+20))) >> 8;
324 }
325 
326 static void felem_to_bin28(u8 out[28], const felem in)
327 {
328     unsigned i;
329     for (i = 0; i < 7; ++i) {
330         out[i] = in[0] >> (8 * i);
331         out[i + 7] = in[1] >> (8 * i);
332         out[i + 14] = in[2] >> (8 * i);
333         out[i + 21] = in[3] >> (8 * i);
334     }
335 }
336 
337 /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
338 static void flip_endian(u8 *out, const u8 *in, unsigned len)
339 {
340     unsigned i;
341     for (i = 0; i < len; ++i)
342         out[i] = in[len - 1 - i];
343 }
344 
345 /* From OpenSSL BIGNUM to internal representation */
346 static int BN_to_felem(felem out, const BIGNUM *bn)
347 {
348     felem_bytearray b_in;
349     felem_bytearray b_out;
350     unsigned num_bytes;
351 
352     /* BN_bn2bin eats leading zeroes */
353     memset(b_out, 0, sizeof(b_out));
354     num_bytes = BN_num_bytes(bn);
355     if (num_bytes > sizeof(b_out)) {
356         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
357         return 0;
358     }
359     if (BN_is_negative(bn)) {
360         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
361         return 0;
362     }
363     num_bytes = BN_bn2bin(bn, b_in);
364     flip_endian(b_out, b_in, num_bytes);
365     bin28_to_felem(out, b_out);
366     return 1;
367 }
368 
369 /* From internal representation to OpenSSL BIGNUM */
370 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
371 {
372     felem_bytearray b_in, b_out;
373     felem_to_bin28(b_in, in);
374     flip_endian(b_out, b_in, sizeof(b_out));
375     return BN_bin2bn(b_out, sizeof(b_out), out);
376 }
377 
378 /******************************************************************************/
379 /*-
380  *                              FIELD OPERATIONS
381  *
382  * Field operations, using the internal representation of field elements.
383  * NB! These operations are specific to our point multiplication and cannot be
384  * expected to be correct in general - e.g., multiplication with a large scalar
385  * will cause an overflow.
386  *
387  */
388 
389 static void felem_one(felem out)
390 {
391     out[0] = 1;
392     out[1] = 0;
393     out[2] = 0;
394     out[3] = 0;
395 }
396 
397 static void felem_assign(felem out, const felem in)
398 {
399     out[0] = in[0];
400     out[1] = in[1];
401     out[2] = in[2];
402     out[3] = in[3];
403 }
404 
405 /* Sum two field elements: out += in */
406 static void felem_sum(felem out, const felem in)
407 {
408     out[0] += in[0];
409     out[1] += in[1];
410     out[2] += in[2];
411     out[3] += in[3];
412 }
413 
414 /* Get negative value: out = -in */
415 /* Assumes in[i] < 2^57 */
416 static void felem_neg(felem out, const felem in)
417 {
418     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
419     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
420     static const limb two58m42m2 = (((limb) 1) << 58) -
421         (((limb) 1) << 42) - (((limb) 1) << 2);
422 
423     /* Set to 0 mod 2^224-2^96+1 to ensure out > in */
424     out[0] = two58p2 - in[0];
425     out[1] = two58m42m2 - in[1];
426     out[2] = two58m2 - in[2];
427     out[3] = two58m2 - in[3];
428 }
429 
430 /* Subtract field elements: out -= in */
431 /* Assumes in[i] < 2^57 */
432 static void felem_diff(felem out, const felem in)
433 {
434     static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
435     static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
436     static const limb two58m42m2 = (((limb) 1) << 58) -
437         (((limb) 1) << 42) - (((limb) 1) << 2);
438 
439     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
440     out[0] += two58p2;
441     out[1] += two58m42m2;
442     out[2] += two58m2;
443     out[3] += two58m2;
444 
445     out[0] -= in[0];
446     out[1] -= in[1];
447     out[2] -= in[2];
448     out[3] -= in[3];
449 }
450 
451 /* Subtract in unreduced 128-bit mode: out -= in */
452 /* Assumes in[i] < 2^119 */
453 static void widefelem_diff(widefelem out, const widefelem in)
454 {
455     static const widelimb two120 = ((widelimb) 1) << 120;
456     static const widelimb two120m64 = (((widelimb) 1) << 120) -
457         (((widelimb) 1) << 64);
458     static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
459         (((widelimb) 1) << 104) - (((widelimb) 1) << 64);
460 
461     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
462     out[0] += two120;
463     out[1] += two120m64;
464     out[2] += two120m64;
465     out[3] += two120;
466     out[4] += two120m104m64;
467     out[5] += two120m64;
468     out[6] += two120m64;
469 
470     out[0] -= in[0];
471     out[1] -= in[1];
472     out[2] -= in[2];
473     out[3] -= in[3];
474     out[4] -= in[4];
475     out[5] -= in[5];
476     out[6] -= in[6];
477 }
478 
479 /* Subtract in mixed mode: out128 -= in64 */
480 /* in[i] < 2^63 */
481 static void felem_diff_128_64(widefelem out, const felem in)
482 {
483     static const widelimb two64p8 = (((widelimb) 1) << 64) +
484         (((widelimb) 1) << 8);
485     static const widelimb two64m8 = (((widelimb) 1) << 64) -
486         (((widelimb) 1) << 8);
487     static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
488         (((widelimb) 1) << 48) - (((widelimb) 1) << 8);
489 
490     /* Add 0 mod 2^224-2^96+1 to ensure out > in */
491     out[0] += two64p8;
492     out[1] += two64m48m8;
493     out[2] += two64m8;
494     out[3] += two64m8;
495 
496     out[0] -= in[0];
497     out[1] -= in[1];
498     out[2] -= in[2];
499     out[3] -= in[3];
500 }
501 
502 /*
503  * Multiply a field element by a scalar: out = out * scalar The scalars we
504  * actually use are small, so results fit without overflow
505  */
506 static void felem_scalar(felem out, const limb scalar)
507 {
508     out[0] *= scalar;
509     out[1] *= scalar;
510     out[2] *= scalar;
511     out[3] *= scalar;
512 }
513 
514 /*
515  * Multiply an unreduced field element by a scalar: out = out * scalar The
516  * scalars we actually use are small, so results fit without overflow
517  */
518 static void widefelem_scalar(widefelem out, const widelimb scalar)
519 {
520     out[0] *= scalar;
521     out[1] *= scalar;
522     out[2] *= scalar;
523     out[3] *= scalar;
524     out[4] *= scalar;
525     out[5] *= scalar;
526     out[6] *= scalar;
527 }
528 
529 /* Square a field element: out = in^2 */
530 static void felem_square(widefelem out, const felem in)
531 {
532     limb tmp0, tmp1, tmp2;
533     tmp0 = 2 * in[0];
534     tmp1 = 2 * in[1];
535     tmp2 = 2 * in[2];
536     out[0] = ((widelimb) in[0]) * in[0];
537     out[1] = ((widelimb) in[0]) * tmp1;
538     out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
539     out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
540     out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
541     out[5] = ((widelimb) in[3]) * tmp2;
542     out[6] = ((widelimb) in[3]) * in[3];
543 }
544 
545 /* Multiply two field elements: out = in1 * in2 */
546 static void felem_mul(widefelem out, const felem in1, const felem in2)
547 {
548     out[0] = ((widelimb) in1[0]) * in2[0];
549     out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
550     out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
551         ((widelimb) in1[2]) * in2[0];
552     out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
553         ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
554     out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
555         ((widelimb) in1[3]) * in2[1];
556     out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
557     out[6] = ((widelimb) in1[3]) * in2[3];
558 }
559 
560 /*-
561  * Reduce seven 128-bit coefficients to four 64-bit coefficients.
562  * Requires in[i] < 2^126,
563  * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
564 static void felem_reduce(felem out, const widefelem in)
565 {
566     static const widelimb two127p15 = (((widelimb) 1) << 127) +
567         (((widelimb) 1) << 15);
568     static const widelimb two127m71 = (((widelimb) 1) << 127) -
569         (((widelimb) 1) << 71);
570     static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
571         (((widelimb) 1) << 71) - (((widelimb) 1) << 55);
572     widelimb output[5];
573 
574     /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
575     output[0] = in[0] + two127p15;
576     output[1] = in[1] + two127m71m55;
577     output[2] = in[2] + two127m71;
578     output[3] = in[3];
579     output[4] = in[4];
580 
581     /* Eliminate in[4], in[5], in[6] */
582     output[4] += in[6] >> 16;
583     output[3] += (in[6] & 0xffff) << 40;
584     output[2] -= in[6];
585 
586     output[3] += in[5] >> 16;
587     output[2] += (in[5] & 0xffff) << 40;
588     output[1] -= in[5];
589 
590     output[2] += output[4] >> 16;
591     output[1] += (output[4] & 0xffff) << 40;
592     output[0] -= output[4];
593 
594     /* Carry 2 -> 3 -> 4 */
595     output[3] += output[2] >> 56;
596     output[2] &= 0x00ffffffffffffff;
597 
598     output[4] = output[3] >> 56;
599     output[3] &= 0x00ffffffffffffff;
600 
601     /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
602 
603     /* Eliminate output[4] */
604     output[2] += output[4] >> 16;
605     /* output[2] < 2^56 + 2^56 = 2^57 */
606     output[1] += (output[4] & 0xffff) << 40;
607     output[0] -= output[4];
608 
609     /* Carry 0 -> 1 -> 2 -> 3 */
610     output[1] += output[0] >> 56;
611     out[0] = output[0] & 0x00ffffffffffffff;
612 
613     output[2] += output[1] >> 56;
614     /* output[2] < 2^57 + 2^72 */
615     out[1] = output[1] & 0x00ffffffffffffff;
616     output[3] += output[2] >> 56;
617     /* output[3] <= 2^56 + 2^16 */
618     out[2] = output[2] & 0x00ffffffffffffff;
619 
620     /*-
621      * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
622      * out[3] <= 2^56 + 2^16 (due to final carry),
623      * so out < 2*p
624      */
625     out[3] = output[3];
626 }
627 
628 static void felem_square_reduce(felem out, const felem in)
629 {
630     widefelem tmp;
631     felem_square(tmp, in);
632     felem_reduce(out, tmp);
633 }
634 
635 static void felem_mul_reduce(felem out, const felem in1, const felem in2)
636 {
637     widefelem tmp;
638     felem_mul(tmp, in1, in2);
639     felem_reduce(out, tmp);
640 }
641 
642 /*
643  * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
644  * call felem_reduce first)
645  */
646 static void felem_contract(felem out, const felem in)
647 {
648     static const int64_t two56 = ((limb) 1) << 56;
649     /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */
650     /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
651     int64_t tmp[4], a;
652     tmp[0] = in[0];
653     tmp[1] = in[1];
654     tmp[2] = in[2];
655     tmp[3] = in[3];
656     /* Case 1: a = 1 iff in >= 2^224 */
657     a = (in[3] >> 56);
658     tmp[0] -= a;
659     tmp[1] += a << 40;
660     tmp[3] &= 0x00ffffffffffffff;
661     /*
662      * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
663      * and the lower part is non-zero
664      */
665     a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) |
666         (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
667     a &= 0x00ffffffffffffff;
668     /* turn a into an all-one mask (if a = 0) or an all-zero mask */
669     a = (a - 1) >> 63;
670     /* subtract 2^224 - 2^96 + 1 if a is all-one */
671     tmp[3] &= a ^ 0xffffffffffffffff;
672     tmp[2] &= a ^ 0xffffffffffffffff;
673     tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff;
674     tmp[0] -= 1 & a;
675 
676     /*
677      * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
678      * non-zero, so we only need one step
679      */
680     a = tmp[0] >> 63;
681     tmp[0] += two56 & a;
682     tmp[1] -= 1 & a;
683 
684     /* carry 1 -> 2 -> 3 */
685     tmp[2] += tmp[1] >> 56;
686     tmp[1] &= 0x00ffffffffffffff;
687 
688     tmp[3] += tmp[2] >> 56;
689     tmp[2] &= 0x00ffffffffffffff;
690 
691     /* Now 0 <= out < p */
692     out[0] = tmp[0];
693     out[1] = tmp[1];
694     out[2] = tmp[2];
695     out[3] = tmp[3];
696 }
697 
698 /*
699  * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
700  * elements are reduced to in < 2^225, so we only need to check three cases:
701  * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
702  */
703 static limb felem_is_zero(const felem in)
704 {
705     limb zero, two224m96p1, two225m97p2;
706 
707     zero = in[0] | in[1] | in[2] | in[3];
708     zero = (((int64_t) (zero) - 1) >> 63) & 1;
709     two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000)
710         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff);
711     two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
712     two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000)
713         | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff);
714     two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
715     return (zero | two224m96p1 | two225m97p2);
716 }
717 
718 static int felem_is_zero_int(const void *in)
719 {
720     return (int)(felem_is_zero(in) & ((limb) 1));
721 }
722 
723 /* Invert a field element */
724 /* Computation chain copied from djb's code */
725 static void felem_inv(felem out, const felem in)
726 {
727     felem ftmp, ftmp2, ftmp3, ftmp4;
728     widefelem tmp;
729     unsigned i;
730 
731     felem_square(tmp, in);
732     felem_reduce(ftmp, tmp);    /* 2 */
733     felem_mul(tmp, in, ftmp);
734     felem_reduce(ftmp, tmp);    /* 2^2 - 1 */
735     felem_square(tmp, ftmp);
736     felem_reduce(ftmp, tmp);    /* 2^3 - 2 */
737     felem_mul(tmp, in, ftmp);
738     felem_reduce(ftmp, tmp);    /* 2^3 - 1 */
739     felem_square(tmp, ftmp);
740     felem_reduce(ftmp2, tmp);   /* 2^4 - 2 */
741     felem_square(tmp, ftmp2);
742     felem_reduce(ftmp2, tmp);   /* 2^5 - 4 */
743     felem_square(tmp, ftmp2);
744     felem_reduce(ftmp2, tmp);   /* 2^6 - 8 */
745     felem_mul(tmp, ftmp2, ftmp);
746     felem_reduce(ftmp, tmp);    /* 2^6 - 1 */
747     felem_square(tmp, ftmp);
748     felem_reduce(ftmp2, tmp);   /* 2^7 - 2 */
749     for (i = 0; i < 5; ++i) {   /* 2^12 - 2^6 */
750         felem_square(tmp, ftmp2);
751         felem_reduce(ftmp2, tmp);
752     }
753     felem_mul(tmp, ftmp2, ftmp);
754     felem_reduce(ftmp2, tmp);   /* 2^12 - 1 */
755     felem_square(tmp, ftmp2);
756     felem_reduce(ftmp3, tmp);   /* 2^13 - 2 */
757     for (i = 0; i < 11; ++i) {  /* 2^24 - 2^12 */
758         felem_square(tmp, ftmp3);
759         felem_reduce(ftmp3, tmp);
760     }
761     felem_mul(tmp, ftmp3, ftmp2);
762     felem_reduce(ftmp2, tmp);   /* 2^24 - 1 */
763     felem_square(tmp, ftmp2);
764     felem_reduce(ftmp3, tmp);   /* 2^25 - 2 */
765     for (i = 0; i < 23; ++i) {  /* 2^48 - 2^24 */
766         felem_square(tmp, ftmp3);
767         felem_reduce(ftmp3, tmp);
768     }
769     felem_mul(tmp, ftmp3, ftmp2);
770     felem_reduce(ftmp3, tmp);   /* 2^48 - 1 */
771     felem_square(tmp, ftmp3);
772     felem_reduce(ftmp4, tmp);   /* 2^49 - 2 */
773     for (i = 0; i < 47; ++i) {  /* 2^96 - 2^48 */
774         felem_square(tmp, ftmp4);
775         felem_reduce(ftmp4, tmp);
776     }
777     felem_mul(tmp, ftmp3, ftmp4);
778     felem_reduce(ftmp3, tmp);   /* 2^96 - 1 */
779     felem_square(tmp, ftmp3);
780     felem_reduce(ftmp4, tmp);   /* 2^97 - 2 */
781     for (i = 0; i < 23; ++i) {  /* 2^120 - 2^24 */
782         felem_square(tmp, ftmp4);
783         felem_reduce(ftmp4, tmp);
784     }
785     felem_mul(tmp, ftmp2, ftmp4);
786     felem_reduce(ftmp2, tmp);   /* 2^120 - 1 */
787     for (i = 0; i < 6; ++i) {   /* 2^126 - 2^6 */
788         felem_square(tmp, ftmp2);
789         felem_reduce(ftmp2, tmp);
790     }
791     felem_mul(tmp, ftmp2, ftmp);
792     felem_reduce(ftmp, tmp);    /* 2^126 - 1 */
793     felem_square(tmp, ftmp);
794     felem_reduce(ftmp, tmp);    /* 2^127 - 2 */
795     felem_mul(tmp, ftmp, in);
796     felem_reduce(ftmp, tmp);    /* 2^127 - 1 */
797     for (i = 0; i < 97; ++i) {  /* 2^224 - 2^97 */
798         felem_square(tmp, ftmp);
799         felem_reduce(ftmp, tmp);
800     }
801     felem_mul(tmp, ftmp, ftmp3);
802     felem_reduce(out, tmp);     /* 2^224 - 2^96 - 1 */
803 }
804 
805 /*
806  * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
807  * out to itself.
808  */
809 static void copy_conditional(felem out, const felem in, limb icopy)
810 {
811     unsigned i;
812     /*
813      * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
814      */
815     const limb copy = -icopy;
816     for (i = 0; i < 4; ++i) {
817         const limb tmp = copy & (in[i] ^ out[i]);
818         out[i] ^= tmp;
819     }
820 }
821 
822 /******************************************************************************/
823 /*-
824  *                       ELLIPTIC CURVE POINT OPERATIONS
825  *
826  * Points are represented in Jacobian projective coordinates:
827  * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
828  * or to the point at infinity if Z == 0.
829  *
830  */
831 
832 /*-
833  * Double an elliptic curve point:
834  * (X', Y', Z') = 2 * (X, Y, Z), where
835  * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
836  * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^2
837  * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
838  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
839  * while x_out == y_in is not (maybe this works, but it's not tested).
840  */
841 static void
842 point_double(felem x_out, felem y_out, felem z_out,
843              const felem x_in, const felem y_in, const felem z_in)
844 {
845     widefelem tmp, tmp2;
846     felem delta, gamma, beta, alpha, ftmp, ftmp2;
847 
848     felem_assign(ftmp, x_in);
849     felem_assign(ftmp2, x_in);
850 
851     /* delta = z^2 */
852     felem_square(tmp, z_in);
853     felem_reduce(delta, tmp);
854 
855     /* gamma = y^2 */
856     felem_square(tmp, y_in);
857     felem_reduce(gamma, tmp);
858 
859     /* beta = x*gamma */
860     felem_mul(tmp, x_in, gamma);
861     felem_reduce(beta, tmp);
862 
863     /* alpha = 3*(x-delta)*(x+delta) */
864     felem_diff(ftmp, delta);
865     /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
866     felem_sum(ftmp2, delta);
867     /* ftmp2[i] < 2^57 + 2^57 = 2^58 */
868     felem_scalar(ftmp2, 3);
869     /* ftmp2[i] < 3 * 2^58 < 2^60 */
870     felem_mul(tmp, ftmp, ftmp2);
871     /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
872     felem_reduce(alpha, tmp);
873 
874     /* x' = alpha^2 - 8*beta */
875     felem_square(tmp, alpha);
876     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
877     felem_assign(ftmp, beta);
878     felem_scalar(ftmp, 8);
879     /* ftmp[i] < 8 * 2^57 = 2^60 */
880     felem_diff_128_64(tmp, ftmp);
881     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
882     felem_reduce(x_out, tmp);
883 
884     /* z' = (y + z)^2 - gamma - delta */
885     felem_sum(delta, gamma);
886     /* delta[i] < 2^57 + 2^57 = 2^58 */
887     felem_assign(ftmp, y_in);
888     felem_sum(ftmp, z_in);
889     /* ftmp[i] < 2^57 + 2^57 = 2^58 */
890     felem_square(tmp, ftmp);
891     /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
892     felem_diff_128_64(tmp, delta);
893     /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
894     felem_reduce(z_out, tmp);
895 
896     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
897     felem_scalar(beta, 4);
898     /* beta[i] < 4 * 2^57 = 2^59 */
899     felem_diff(beta, x_out);
900     /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
901     felem_mul(tmp, alpha, beta);
902     /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
903     felem_square(tmp2, gamma);
904     /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
905     widefelem_scalar(tmp2, 8);
906     /* tmp2[i] < 8 * 2^116 = 2^119 */
907     widefelem_diff(tmp, tmp2);
908     /* tmp[i] < 2^119 + 2^120 < 2^121 */
909     felem_reduce(y_out, tmp);
910 }
911 
912 /*-
913  * Add two elliptic curve points:
914  * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
915  * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
916  * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
917  * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
918  *        Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
919  * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
920  *
921  * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
922  */
923 
924 /*
925  * This function is not entirely constant-time: it includes a branch for
926  * checking whether the two input points are equal, (while not equal to the
927  * point at infinity). This case never happens during single point
928  * multiplication, so there is no timing leak for ECDH or ECDSA signing.
929  */
930 static void point_add(felem x3, felem y3, felem z3,
931                       const felem x1, const felem y1, const felem z1,
932                       const int mixed, const felem x2, const felem y2,
933                       const felem z2)
934 {
935     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
936     widefelem tmp, tmp2;
937     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
938 
939     if (!mixed) {
940         /* ftmp2 = z2^2 */
941         felem_square(tmp, z2);
942         felem_reduce(ftmp2, tmp);
943 
944         /* ftmp4 = z2^3 */
945         felem_mul(tmp, ftmp2, z2);
946         felem_reduce(ftmp4, tmp);
947 
948         /* ftmp4 = z2^3*y1 */
949         felem_mul(tmp2, ftmp4, y1);
950         felem_reduce(ftmp4, tmp2);
951 
952         /* ftmp2 = z2^2*x1 */
953         felem_mul(tmp2, ftmp2, x1);
954         felem_reduce(ftmp2, tmp2);
955     } else {
956         /*
957          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
958          */
959 
960         /* ftmp4 = z2^3*y1 */
961         felem_assign(ftmp4, y1);
962 
963         /* ftmp2 = z2^2*x1 */
964         felem_assign(ftmp2, x1);
965     }
966 
967     /* ftmp = z1^2 */
968     felem_square(tmp, z1);
969     felem_reduce(ftmp, tmp);
970 
971     /* ftmp3 = z1^3 */
972     felem_mul(tmp, ftmp, z1);
973     felem_reduce(ftmp3, tmp);
974 
975     /* tmp = z1^3*y2 */
976     felem_mul(tmp, ftmp3, y2);
977     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
978 
979     /* ftmp3 = z1^3*y2 - z2^3*y1 */
980     felem_diff_128_64(tmp, ftmp4);
981     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
982     felem_reduce(ftmp3, tmp);
983 
984     /* tmp = z1^2*x2 */
985     felem_mul(tmp, ftmp, x2);
986     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
987 
988     /* ftmp = z1^2*x2 - z2^2*x1 */
989     felem_diff_128_64(tmp, ftmp2);
990     /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
991     felem_reduce(ftmp, tmp);
992 
993     /*
994      * the formulae are incorrect if the points are equal so we check for
995      * this and do doubling if this happens
996      */
997     x_equal = felem_is_zero(ftmp);
998     y_equal = felem_is_zero(ftmp3);
999     z1_is_zero = felem_is_zero(z1);
1000     z2_is_zero = felem_is_zero(z2);
1001     /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
1002     if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1003         point_double(x3, y3, z3, x1, y1, z1);
1004         return;
1005     }
1006 
1007     /* ftmp5 = z1*z2 */
1008     if (!mixed) {
1009         felem_mul(tmp, z1, z2);
1010         felem_reduce(ftmp5, tmp);
1011     } else {
1012         /* special case z2 = 0 is handled later */
1013         felem_assign(ftmp5, z1);
1014     }
1015 
1016     /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */
1017     felem_mul(tmp, ftmp, ftmp5);
1018     felem_reduce(z_out, tmp);
1019 
1020     /* ftmp = (z1^2*x2 - z2^2*x1)^2 */
1021     felem_assign(ftmp5, ftmp);
1022     felem_square(tmp, ftmp);
1023     felem_reduce(ftmp, tmp);
1024 
1025     /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */
1026     felem_mul(tmp, ftmp, ftmp5);
1027     felem_reduce(ftmp5, tmp);
1028 
1029     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1030     felem_mul(tmp, ftmp2, ftmp);
1031     felem_reduce(ftmp2, tmp);
1032 
1033     /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */
1034     felem_mul(tmp, ftmp4, ftmp5);
1035     /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1036 
1037     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */
1038     felem_square(tmp2, ftmp3);
1039     /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1040 
1041     /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */
1042     felem_diff_128_64(tmp2, ftmp5);
1043     /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1044 
1045     /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */
1046     felem_assign(ftmp5, ftmp2);
1047     felem_scalar(ftmp5, 2);
1048     /* ftmp5[i] < 2 * 2^57 = 2^58 */
1049 
1050     /*-
1051      * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 -
1052      *  2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2
1053      */
1054     felem_diff_128_64(tmp2, ftmp5);
1055     /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1056     felem_reduce(x_out, tmp2);
1057 
1058     /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */
1059     felem_diff(ftmp2, x_out);
1060     /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1061 
1062     /*
1063      * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out)
1064      */
1065     felem_mul(tmp2, ftmp3, ftmp2);
1066     /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1067 
1068     /*-
1069      * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) -
1070      *  z2^3*y1*(z1^2*x2 - z2^2*x1)^3
1071      */
1072     widefelem_diff(tmp2, tmp);
1073     /* tmp2[i] < 2^118 + 2^120 < 2^121 */
1074     felem_reduce(y_out, tmp2);
1075 
1076     /*
1077      * the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1078      * the point at infinity, so we need to check for this separately
1079      */
1080 
1081     /*
1082      * if point 1 is at infinity, copy point 2 to output, and vice versa
1083      */
1084     copy_conditional(x_out, x2, z1_is_zero);
1085     copy_conditional(x_out, x1, z2_is_zero);
1086     copy_conditional(y_out, y2, z1_is_zero);
1087     copy_conditional(y_out, y1, z2_is_zero);
1088     copy_conditional(z_out, z2, z1_is_zero);
1089     copy_conditional(z_out, z1, z2_is_zero);
1090     felem_assign(x3, x_out);
1091     felem_assign(y3, y_out);
1092     felem_assign(z3, z_out);
1093 }
1094 
1095 /*
1096  * select_point selects the |idx|th point from a precomputation table and
1097  * copies it to out.
1098  * The pre_comp array argument should be size of |size| argument
1099  */
1100 static void select_point(const u64 idx, unsigned int size,
1101                          const felem pre_comp[][3], felem out[3])
1102 {
1103     unsigned i, j;
1104     limb *outlimbs = &out[0][0];
1105     memset(outlimbs, 0, 3 * sizeof(felem));
1106 
1107     for (i = 0; i < size; i++) {
1108         const limb *inlimbs = &pre_comp[i][0][0];
1109         u64 mask = i ^ idx;
1110         mask |= mask >> 4;
1111         mask |= mask >> 2;
1112         mask |= mask >> 1;
1113         mask &= 1;
1114         mask--;
1115         for (j = 0; j < 4 * 3; j++)
1116             outlimbs[j] |= inlimbs[j] & mask;
1117     }
1118 }
1119 
1120 /* get_bit returns the |i|th bit in |in| */
1121 static char get_bit(const felem_bytearray in, unsigned i)
1122 {
1123     if (i >= 224)
1124         return 0;
1125     return (in[i >> 3] >> (i & 7)) & 1;
1126 }
1127 
1128 /*
1129  * Interleaved point multiplication using precomputed point multiples: The
1130  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1131  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1132  * generator, using certain (large) precomputed multiples in g_pre_comp.
1133  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1134  */
1135 static void batch_mul(felem x_out, felem y_out, felem z_out,
1136                       const felem_bytearray scalars[],
1137                       const unsigned num_points, const u8 *g_scalar,
1138                       const int mixed, const felem pre_comp[][17][3],
1139                       const felem g_pre_comp[2][16][3])
1140 {
1141     int i, skip;
1142     unsigned num;
1143     unsigned gen_mul = (g_scalar != NULL);
1144     felem nq[3], tmp[4];
1145     u64 bits;
1146     u8 sign, digit;
1147 
1148     /* set nq to the point at infinity */
1149     memset(nq, 0, 3 * sizeof(felem));
1150 
1151     /*
1152      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1153      * of the generator (two in each of the last 28 rounds) and additions of
1154      * other points multiples (every 5th round).
1155      */
1156     skip = 1;                   /* save two point operations in the first
1157                                  * round */
1158     for (i = (num_points ? 220 : 27); i >= 0; --i) {
1159         /* double */
1160         if (!skip)
1161             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1162 
1163         /* add multiples of the generator */
1164         if (gen_mul && (i <= 27)) {
1165             /* first, look 28 bits upwards */
1166             bits = get_bit(g_scalar, i + 196) << 3;
1167             bits |= get_bit(g_scalar, i + 140) << 2;
1168             bits |= get_bit(g_scalar, i + 84) << 1;
1169             bits |= get_bit(g_scalar, i + 28);
1170             /* select the point to add, in constant time */
1171             select_point(bits, 16, g_pre_comp[1], tmp);
1172 
1173             if (!skip) {
1174                 /* value 1 below is argument for "mixed" */
1175                 point_add(nq[0], nq[1], nq[2],
1176                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1177             } else {
1178                 memcpy(nq, tmp, 3 * sizeof(felem));
1179                 skip = 0;
1180             }
1181 
1182             /* second, look at the current position */
1183             bits = get_bit(g_scalar, i + 168) << 3;
1184             bits |= get_bit(g_scalar, i + 112) << 2;
1185             bits |= get_bit(g_scalar, i + 56) << 1;
1186             bits |= get_bit(g_scalar, i);
1187             /* select the point to add, in constant time */
1188             select_point(bits, 16, g_pre_comp[0], tmp);
1189             point_add(nq[0], nq[1], nq[2],
1190                       nq[0], nq[1], nq[2],
1191                       1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1192         }
1193 
1194         /* do other additions every 5 doublings */
1195         if (num_points && (i % 5 == 0)) {
1196             /* loop over all scalars */
1197             for (num = 0; num < num_points; ++num) {
1198                 bits = get_bit(scalars[num], i + 4) << 5;
1199                 bits |= get_bit(scalars[num], i + 3) << 4;
1200                 bits |= get_bit(scalars[num], i + 2) << 3;
1201                 bits |= get_bit(scalars[num], i + 1) << 2;
1202                 bits |= get_bit(scalars[num], i) << 1;
1203                 bits |= get_bit(scalars[num], i - 1);
1204                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1205 
1206                 /* select the point to add or subtract */
1207                 select_point(digit, 17, pre_comp[num], tmp);
1208                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1209                                             * point */
1210                 copy_conditional(tmp[1], tmp[3], sign);
1211 
1212                 if (!skip) {
1213                     point_add(nq[0], nq[1], nq[2],
1214                               nq[0], nq[1], nq[2],
1215                               mixed, tmp[0], tmp[1], tmp[2]);
1216                 } else {
1217                     memcpy(nq, tmp, 3 * sizeof(felem));
1218                     skip = 0;
1219                 }
1220             }
1221         }
1222     }
1223     felem_assign(x_out, nq[0]);
1224     felem_assign(y_out, nq[1]);
1225     felem_assign(z_out, nq[2]);
1226 }
1227 
1228 /******************************************************************************/
1229 /*
1230  * FUNCTIONS TO MANAGE PRECOMPUTATION
1231  */
1232 
1233 static NISTP224_PRE_COMP *nistp224_pre_comp_new()
1234 {
1235     NISTP224_PRE_COMP *ret = NULL;
1236     ret = (NISTP224_PRE_COMP *) OPENSSL_malloc(sizeof(*ret));
1237     if (!ret) {
1238         ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1239         return ret;
1240     }
1241     memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1242     ret->references = 1;
1243     return ret;
1244 }
1245 
1246 static void *nistp224_pre_comp_dup(void *src_)
1247 {
1248     NISTP224_PRE_COMP *src = src_;
1249 
1250     /* no need to actually copy, these objects never change! */
1251     CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1252 
1253     return src_;
1254 }
1255 
1256 static void nistp224_pre_comp_free(void *pre_)
1257 {
1258     int i;
1259     NISTP224_PRE_COMP *pre = pre_;
1260 
1261     if (!pre)
1262         return;
1263 
1264     i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1265     if (i > 0)
1266         return;
1267 
1268     OPENSSL_free(pre);
1269 }
1270 
1271 static void nistp224_pre_comp_clear_free(void *pre_)
1272 {
1273     int i;
1274     NISTP224_PRE_COMP *pre = pre_;
1275 
1276     if (!pre)
1277         return;
1278 
1279     i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1280     if (i > 0)
1281         return;
1282 
1283     OPENSSL_cleanse(pre, sizeof(*pre));
1284     OPENSSL_free(pre);
1285 }
1286 
1287 /******************************************************************************/
1288 /*
1289  * OPENSSL EC_METHOD FUNCTIONS
1290  */
1291 
1292 int ec_GFp_nistp224_group_init(EC_GROUP *group)
1293 {
1294     int ret;
1295     ret = ec_GFp_simple_group_init(group);
1296     group->a_is_minus3 = 1;
1297     return ret;
1298 }
1299 
1300 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1301                                     const BIGNUM *a, const BIGNUM *b,
1302                                     BN_CTX *ctx)
1303 {
1304     int ret = 0;
1305     BN_CTX *new_ctx = NULL;
1306     BIGNUM *curve_p, *curve_a, *curve_b;
1307 
1308     if (ctx == NULL)
1309         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1310             return 0;
1311     BN_CTX_start(ctx);
1312     if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1313         ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1314         ((curve_b = BN_CTX_get(ctx)) == NULL))
1315         goto err;
1316     BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1317     BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1318     BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1319     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1320         ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1321               EC_R_WRONG_CURVE_PARAMETERS);
1322         goto err;
1323     }
1324     group->field_mod_func = BN_nist_mod_224;
1325     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1326  err:
1327     BN_CTX_end(ctx);
1328     if (new_ctx != NULL)
1329         BN_CTX_free(new_ctx);
1330     return ret;
1331 }
1332 
1333 /*
1334  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1335  * (X/Z^2, Y/Z^3)
1336  */
1337 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1338                                                  const EC_POINT *point,
1339                                                  BIGNUM *x, BIGNUM *y,
1340                                                  BN_CTX *ctx)
1341 {
1342     felem z1, z2, x_in, y_in, x_out, y_out;
1343     widefelem tmp;
1344 
1345     if (EC_POINT_is_at_infinity(group, point)) {
1346         ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1347               EC_R_POINT_AT_INFINITY);
1348         return 0;
1349     }
1350     if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1351         (!BN_to_felem(z1, &point->Z)))
1352         return 0;
1353     felem_inv(z2, z1);
1354     felem_square(tmp, z2);
1355     felem_reduce(z1, tmp);
1356     felem_mul(tmp, x_in, z1);
1357     felem_reduce(x_in, tmp);
1358     felem_contract(x_out, x_in);
1359     if (x != NULL) {
1360         if (!felem_to_BN(x, x_out)) {
1361             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1362                   ERR_R_BN_LIB);
1363             return 0;
1364         }
1365     }
1366     felem_mul(tmp, z1, z2);
1367     felem_reduce(z1, tmp);
1368     felem_mul(tmp, y_in, z1);
1369     felem_reduce(y_in, tmp);
1370     felem_contract(y_out, y_in);
1371     if (y != NULL) {
1372         if (!felem_to_BN(y, y_out)) {
1373             ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1374                   ERR_R_BN_LIB);
1375             return 0;
1376         }
1377     }
1378     return 1;
1379 }
1380 
1381 static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1382                                felem tmp_felems[ /* num+1 */ ])
1383 {
1384     /*
1385      * Runs in constant time, unless an input is the point at infinity (which
1386      * normally shouldn't happen).
1387      */
1388     ec_GFp_nistp_points_make_affine_internal(num,
1389                                              points,
1390                                              sizeof(felem),
1391                                              tmp_felems,
1392                                              (void (*)(void *))felem_one,
1393                                              felem_is_zero_int,
1394                                              (void (*)(void *, const void *))
1395                                              felem_assign,
1396                                              (void (*)(void *, const void *))
1397                                              felem_square_reduce, (void (*)
1398                                                                    (void *,
1399                                                                     const void
1400                                                                     *,
1401                                                                     const void
1402                                                                     *))
1403                                              felem_mul_reduce,
1404                                              (void (*)(void *, const void *))
1405                                              felem_inv,
1406                                              (void (*)(void *, const void *))
1407                                              felem_contract);
1408 }
1409 
1410 /*
1411  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1412  * values Result is stored in r (r can equal one of the inputs).
1413  */
1414 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
1415                                const BIGNUM *scalar, size_t num,
1416                                const EC_POINT *points[],
1417                                const BIGNUM *scalars[], BN_CTX *ctx)
1418 {
1419     int ret = 0;
1420     int j;
1421     unsigned i;
1422     int mixed = 0;
1423     BN_CTX *new_ctx = NULL;
1424     BIGNUM *x, *y, *z, *tmp_scalar;
1425     felem_bytearray g_secret;
1426     felem_bytearray *secrets = NULL;
1427     felem(*pre_comp)[17][3] = NULL;
1428     felem *tmp_felems = NULL;
1429     felem_bytearray tmp;
1430     unsigned num_bytes;
1431     int have_pre_comp = 0;
1432     size_t num_points = num;
1433     felem x_in, y_in, z_in, x_out, y_out, z_out;
1434     NISTP224_PRE_COMP *pre = NULL;
1435     const felem(*g_pre_comp)[16][3] = NULL;
1436     EC_POINT *generator = NULL;
1437     const EC_POINT *p = NULL;
1438     const BIGNUM *p_scalar = NULL;
1439 
1440     if (ctx == NULL)
1441         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1442             return 0;
1443     BN_CTX_start(ctx);
1444     if (((x = BN_CTX_get(ctx)) == NULL) ||
1445         ((y = BN_CTX_get(ctx)) == NULL) ||
1446         ((z = BN_CTX_get(ctx)) == NULL) ||
1447         ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1448         goto err;
1449 
1450     if (scalar != NULL) {
1451         pre = EC_EX_DATA_get_data(group->extra_data,
1452                                   nistp224_pre_comp_dup,
1453                                   nistp224_pre_comp_free,
1454                                   nistp224_pre_comp_clear_free);
1455         if (pre)
1456             /* we have precomputation, try to use it */
1457             g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1458         else
1459             /* try to use the standard precomputation */
1460             g_pre_comp = &gmul[0];
1461         generator = EC_POINT_new(group);
1462         if (generator == NULL)
1463             goto err;
1464         /* get the generator from precomputation */
1465         if (!felem_to_BN(x, g_pre_comp[0][1][0]) ||
1466             !felem_to_BN(y, g_pre_comp[0][1][1]) ||
1467             !felem_to_BN(z, g_pre_comp[0][1][2])) {
1468             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1469             goto err;
1470         }
1471         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1472                                                       generator, x, y, z,
1473                                                       ctx))
1474             goto err;
1475         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1476             /* precomputation matches generator */
1477             have_pre_comp = 1;
1478         else
1479             /*
1480              * we don't have valid precomputation: treat the generator as a
1481              * random point
1482              */
1483             num_points = num_points + 1;
1484     }
1485 
1486     if (num_points > 0) {
1487         if (num_points >= 3) {
1488             /*
1489              * unless we precompute multiples for just one or two points,
1490              * converting those into affine form is time well spent
1491              */
1492             mixed = 1;
1493         }
1494         secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1495         pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1496         if (mixed)
1497             tmp_felems =
1498                 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1499         if ((secrets == NULL) || (pre_comp == NULL)
1500             || (mixed && (tmp_felems == NULL))) {
1501             ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1502             goto err;
1503         }
1504 
1505         /*
1506          * we treat NULL scalars as 0, and NULL points as points at infinity,
1507          * i.e., they contribute nothing to the linear combination
1508          */
1509         memset(secrets, 0, num_points * sizeof(felem_bytearray));
1510         memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1511         for (i = 0; i < num_points; ++i) {
1512             if (i == num)
1513                 /* the generator */
1514             {
1515                 p = EC_GROUP_get0_generator(group);
1516                 p_scalar = scalar;
1517             } else
1518                 /* the i^th point */
1519             {
1520                 p = points[i];
1521                 p_scalar = scalars[i];
1522             }
1523             if ((p_scalar != NULL) && (p != NULL)) {
1524                 /* reduce scalar to 0 <= scalar < 2^224 */
1525                 if ((BN_num_bits(p_scalar) > 224)
1526                     || (BN_is_negative(p_scalar))) {
1527                     /*
1528                      * this is an unusual input, and we don't guarantee
1529                      * constant-timeness
1530                      */
1531                     if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1532                         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1533                         goto err;
1534                     }
1535                     num_bytes = BN_bn2bin(tmp_scalar, tmp);
1536                 } else
1537                     num_bytes = BN_bn2bin(p_scalar, tmp);
1538                 flip_endian(secrets[i], tmp, num_bytes);
1539                 /* precompute multiples */
1540                 if ((!BN_to_felem(x_out, &p->X)) ||
1541                     (!BN_to_felem(y_out, &p->Y)) ||
1542                     (!BN_to_felem(z_out, &p->Z)))
1543                     goto err;
1544                 felem_assign(pre_comp[i][1][0], x_out);
1545                 felem_assign(pre_comp[i][1][1], y_out);
1546                 felem_assign(pre_comp[i][1][2], z_out);
1547                 for (j = 2; j <= 16; ++j) {
1548                     if (j & 1) {
1549                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1550                                   pre_comp[i][j][2], pre_comp[i][1][0],
1551                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1552                                   pre_comp[i][j - 1][0],
1553                                   pre_comp[i][j - 1][1],
1554                                   pre_comp[i][j - 1][2]);
1555                     } else {
1556                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1557                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1558                                      pre_comp[i][j / 2][1],
1559                                      pre_comp[i][j / 2][2]);
1560                     }
1561                 }
1562             }
1563         }
1564         if (mixed)
1565             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1566     }
1567 
1568     /* the scalar for the generator */
1569     if ((scalar != NULL) && (have_pre_comp)) {
1570         memset(g_secret, 0, sizeof(g_secret));
1571         /* reduce scalar to 0 <= scalar < 2^224 */
1572         if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) {
1573             /*
1574              * this is an unusual input, and we don't guarantee
1575              * constant-timeness
1576              */
1577             if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1578                 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1579                 goto err;
1580             }
1581             num_bytes = BN_bn2bin(tmp_scalar, tmp);
1582         } else
1583             num_bytes = BN_bn2bin(scalar, tmp);
1584         flip_endian(g_secret, tmp, num_bytes);
1585         /* do the multiplication with generator precomputation */
1586         batch_mul(x_out, y_out, z_out,
1587                   (const felem_bytearray(*))secrets, num_points,
1588                   g_secret,
1589                   mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1590     } else
1591         /* do the multiplication without generator precomputation */
1592         batch_mul(x_out, y_out, z_out,
1593                   (const felem_bytearray(*))secrets, num_points,
1594                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1595     /* reduce the output to its unique minimal representation */
1596     felem_contract(x_in, x_out);
1597     felem_contract(y_in, y_out);
1598     felem_contract(z_in, z_out);
1599     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
1600         (!felem_to_BN(z, z_in))) {
1601         ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1602         goto err;
1603     }
1604     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1605 
1606  err:
1607     BN_CTX_end(ctx);
1608     if (generator != NULL)
1609         EC_POINT_free(generator);
1610     if (new_ctx != NULL)
1611         BN_CTX_free(new_ctx);
1612     if (secrets != NULL)
1613         OPENSSL_free(secrets);
1614     if (pre_comp != NULL)
1615         OPENSSL_free(pre_comp);
1616     if (tmp_felems != NULL)
1617         OPENSSL_free(tmp_felems);
1618     return ret;
1619 }
1620 
1621 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
1622 {
1623     int ret = 0;
1624     NISTP224_PRE_COMP *pre = NULL;
1625     int i, j;
1626     BN_CTX *new_ctx = NULL;
1627     BIGNUM *x, *y;
1628     EC_POINT *generator = NULL;
1629     felem tmp_felems[32];
1630 
1631     /* throw away old precomputation */
1632     EC_EX_DATA_free_data(&group->extra_data, nistp224_pre_comp_dup,
1633                          nistp224_pre_comp_free,
1634                          nistp224_pre_comp_clear_free);
1635     if (ctx == NULL)
1636         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1637             return 0;
1638     BN_CTX_start(ctx);
1639     if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
1640         goto err;
1641     /* get the generator */
1642     if (group->generator == NULL)
1643         goto err;
1644     generator = EC_POINT_new(group);
1645     if (generator == NULL)
1646         goto err;
1647     BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1648     BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1649     if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
1650         goto err;
1651     if ((pre = nistp224_pre_comp_new()) == NULL)
1652         goto err;
1653     /*
1654      * if the generator is the standard one, use built-in precomputation
1655      */
1656     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1657         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1658         goto done;
1659     }
1660     if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
1661         (!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
1662         (!BN_to_felem(pre->g_pre_comp[0][1][2], &group->generator->Z)))
1663         goto err;
1664     /*
1665      * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G,
1666      * 2^140*G, 2^196*G for the second one
1667      */
1668     for (i = 1; i <= 8; i <<= 1) {
1669         point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1670                      pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1671                      pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1672         for (j = 0; j < 27; ++j) {
1673             point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1674                          pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1675                          pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1676         }
1677         if (i == 8)
1678             break;
1679         point_double(pre->g_pre_comp[0][2 * i][0],
1680                      pre->g_pre_comp[0][2 * i][1],
1681                      pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1682                      pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1683         for (j = 0; j < 27; ++j) {
1684             point_double(pre->g_pre_comp[0][2 * i][0],
1685                          pre->g_pre_comp[0][2 * i][1],
1686                          pre->g_pre_comp[0][2 * i][2],
1687                          pre->g_pre_comp[0][2 * i][0],
1688                          pre->g_pre_comp[0][2 * i][1],
1689                          pre->g_pre_comp[0][2 * i][2]);
1690         }
1691     }
1692     for (i = 0; i < 2; i++) {
1693         /* g_pre_comp[i][0] is the point at infinity */
1694         memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1695         /* the remaining multiples */
1696         /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */
1697         point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1698                   pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1699                   pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1700                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1701                   pre->g_pre_comp[i][2][2]);
1702         /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */
1703         point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1704                   pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1705                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1706                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1707                   pre->g_pre_comp[i][2][2]);
1708         /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */
1709         point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1710                   pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1711                   pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1712                   0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1713                   pre->g_pre_comp[i][4][2]);
1714         /*
1715          * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G
1716          */
1717         point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1718                   pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1719                   pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1720                   0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1721                   pre->g_pre_comp[i][2][2]);
1722         for (j = 1; j < 8; ++j) {
1723             /* odd multiples: add G resp. 2^28*G */
1724             point_add(pre->g_pre_comp[i][2 * j + 1][0],
1725                       pre->g_pre_comp[i][2 * j + 1][1],
1726                       pre->g_pre_comp[i][2 * j + 1][2],
1727                       pre->g_pre_comp[i][2 * j][0],
1728                       pre->g_pre_comp[i][2 * j][1],
1729                       pre->g_pre_comp[i][2 * j][2], 0,
1730                       pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1731                       pre->g_pre_comp[i][1][2]);
1732         }
1733     }
1734     make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1735 
1736  done:
1737     if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
1738                              nistp224_pre_comp_free,
1739                              nistp224_pre_comp_clear_free))
1740         goto err;
1741     ret = 1;
1742     pre = NULL;
1743  err:
1744     BN_CTX_end(ctx);
1745     if (generator != NULL)
1746         EC_POINT_free(generator);
1747     if (new_ctx != NULL)
1748         BN_CTX_free(new_ctx);
1749     if (pre)
1750         nistp224_pre_comp_free(pre);
1751     return ret;
1752 }
1753 
1754 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1755 {
1756     if (EC_EX_DATA_get_data(group->extra_data, nistp224_pre_comp_dup,
1757                             nistp224_pre_comp_free,
1758                             nistp224_pre_comp_clear_free)
1759         != NULL)
1760         return 1;
1761     else
1762         return 0;
1763 }
1764 
1765 #else
1766 static void *dummy = &dummy;
1767 #endif
1768