1 /* 2 * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the OpenSSL license (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 /* Copyright 2011 Google Inc. 11 * 12 * Licensed under the Apache License, Version 2.0 (the "License"); 13 * 14 * you may not use this file except in compliance with the License. 15 * You may obtain a copy of the License at 16 * 17 * http://www.apache.org/licenses/LICENSE-2.0 18 * 19 * Unless required by applicable law or agreed to in writing, software 20 * distributed under the License is distributed on an "AS IS" BASIS, 21 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 * See the License for the specific language governing permissions and 23 * limitations under the License. 24 */ 25 26 /* 27 * A 64-bit implementation of the NIST P-224 elliptic curve point multiplication 28 * 29 * Inspired by Daniel J. Bernstein's public domain nistp224 implementation 30 * and Adam Langley's public domain 64-bit C implementation of curve25519 31 */ 32 33 #include <openssl/opensslconf.h> 34 #ifdef OPENSSL_NO_EC_NISTP_64_GCC_128 35 NON_EMPTY_TRANSLATION_UNIT 36 #else 37 38 # include <stdint.h> 39 # include <string.h> 40 # include <openssl/err.h> 41 # include "ec_local.h" 42 43 # if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 44 /* even with gcc, the typedef won't work for 32-bit platforms */ 45 typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit 46 * platforms */ 47 # else 48 # error "Your compiler doesn't appear to support 128-bit integer types" 49 # endif 50 51 typedef uint8_t u8; 52 typedef uint64_t u64; 53 54 /******************************************************************************/ 55 /*- 56 * INTERNAL REPRESENTATION OF FIELD ELEMENTS 57 * 58 * Field elements are represented as a_0 + 2^56*a_1 + 2^112*a_2 + 2^168*a_3 59 * using 64-bit coefficients called 'limbs', 60 * and sometimes (for multiplication results) as 61 * b_0 + 2^56*b_1 + 2^112*b_2 + 2^168*b_3 + 2^224*b_4 + 2^280*b_5 + 2^336*b_6 62 * using 128-bit coefficients called 'widelimbs'. 63 * A 4-limb representation is an 'felem'; 64 * a 7-widelimb representation is a 'widefelem'. 65 * Even within felems, bits of adjacent limbs overlap, and we don't always 66 * reduce the representations: we ensure that inputs to each felem 67 * multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 4*2^60*2^60, 68 * and fit into a 128-bit word without overflow. The coefficients are then 69 * again partially reduced to obtain an felem satisfying a_i < 2^57. 70 * We only reduce to the unique minimal representation at the end of the 71 * computation. 72 */ 73 74 typedef uint64_t limb; 75 typedef uint64_t limb_aX __attribute((__aligned__(1))); 76 typedef uint128_t widelimb; 77 78 typedef limb felem[4]; 79 typedef widelimb widefelem[7]; 80 81 /* 82 * Field element represented as a byte array. 28*8 = 224 bits is also the 83 * group order size for the elliptic curve, and we also use this type for 84 * scalars for point multiplication. 85 */ 86 typedef u8 felem_bytearray[28]; 87 88 static const felem_bytearray nistp224_curve_params[5] = { 89 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */ 90 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 91 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, 92 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */ 93 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 94 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE}, 95 {0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */ 96 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA, 97 0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4}, 98 {0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */ 99 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22, 100 0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21}, 101 {0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */ 102 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64, 103 0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34} 104 }; 105 106 /*- 107 * Precomputed multiples of the standard generator 108 * Points are given in coordinates (X, Y, Z) where Z normally is 1 109 * (0 for the point at infinity). 110 * For each field element, slice a_0 is word 0, etc. 111 * 112 * The table has 2 * 16 elements, starting with the following: 113 * index | bits | point 114 * ------+---------+------------------------------ 115 * 0 | 0 0 0 0 | 0G 116 * 1 | 0 0 0 1 | 1G 117 * 2 | 0 0 1 0 | 2^56G 118 * 3 | 0 0 1 1 | (2^56 + 1)G 119 * 4 | 0 1 0 0 | 2^112G 120 * 5 | 0 1 0 1 | (2^112 + 1)G 121 * 6 | 0 1 1 0 | (2^112 + 2^56)G 122 * 7 | 0 1 1 1 | (2^112 + 2^56 + 1)G 123 * 8 | 1 0 0 0 | 2^168G 124 * 9 | 1 0 0 1 | (2^168 + 1)G 125 * 10 | 1 0 1 0 | (2^168 + 2^56)G 126 * 11 | 1 0 1 1 | (2^168 + 2^56 + 1)G 127 * 12 | 1 1 0 0 | (2^168 + 2^112)G 128 * 13 | 1 1 0 1 | (2^168 + 2^112 + 1)G 129 * 14 | 1 1 1 0 | (2^168 + 2^112 + 2^56)G 130 * 15 | 1 1 1 1 | (2^168 + 2^112 + 2^56 + 1)G 131 * followed by a copy of this with each element multiplied by 2^28. 132 * 133 * The reason for this is so that we can clock bits into four different 134 * locations when doing simple scalar multiplies against the base point, 135 * and then another four locations using the second 16 elements. 136 */ 137 static const felem gmul[2][16][3] = { 138 {{{0, 0, 0, 0}, 139 {0, 0, 0, 0}, 140 {0, 0, 0, 0}}, 141 {{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf}, 142 {0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723}, 143 {1, 0, 0, 0}}, 144 {{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5}, 145 {0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321}, 146 {1, 0, 0, 0}}, 147 {{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748}, 148 {0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17}, 149 {1, 0, 0, 0}}, 150 {{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe}, 151 {0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b}, 152 {1, 0, 0, 0}}, 153 {{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3}, 154 {0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a}, 155 {1, 0, 0, 0}}, 156 {{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c}, 157 {0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244}, 158 {1, 0, 0, 0}}, 159 {{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849}, 160 {0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112}, 161 {1, 0, 0, 0}}, 162 {{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47}, 163 {0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394}, 164 {1, 0, 0, 0}}, 165 {{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d}, 166 {0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7}, 167 {1, 0, 0, 0}}, 168 {{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24}, 169 {0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881}, 170 {1, 0, 0, 0}}, 171 {{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984}, 172 {0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369}, 173 {1, 0, 0, 0}}, 174 {{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3}, 175 {0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60}, 176 {1, 0, 0, 0}}, 177 {{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057}, 178 {0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9}, 179 {1, 0, 0, 0}}, 180 {{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9}, 181 {0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc}, 182 {1, 0, 0, 0}}, 183 {{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58}, 184 {0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558}, 185 {1, 0, 0, 0}}}, 186 {{{0, 0, 0, 0}, 187 {0, 0, 0, 0}, 188 {0, 0, 0, 0}}, 189 {{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31}, 190 {0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d}, 191 {1, 0, 0, 0}}, 192 {{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3}, 193 {0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a}, 194 {1, 0, 0, 0}}, 195 {{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33}, 196 {0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100}, 197 {1, 0, 0, 0}}, 198 {{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5}, 199 {0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea}, 200 {1, 0, 0, 0}}, 201 {{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be}, 202 {0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51}, 203 {1, 0, 0, 0}}, 204 {{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1}, 205 {0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb}, 206 {1, 0, 0, 0}}, 207 {{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233}, 208 {0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def}, 209 {1, 0, 0, 0}}, 210 {{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae}, 211 {0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45}, 212 {1, 0, 0, 0}}, 213 {{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e}, 214 {0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb}, 215 {1, 0, 0, 0}}, 216 {{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de}, 217 {0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3}, 218 {1, 0, 0, 0}}, 219 {{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05}, 220 {0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58}, 221 {1, 0, 0, 0}}, 222 {{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb}, 223 {0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0}, 224 {1, 0, 0, 0}}, 225 {{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9}, 226 {0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea}, 227 {1, 0, 0, 0}}, 228 {{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba}, 229 {0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405}, 230 {1, 0, 0, 0}}, 231 {{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e}, 232 {0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e}, 233 {1, 0, 0, 0}}} 234 }; 235 236 /* Precomputation for the group generator. */ 237 struct nistp224_pre_comp_st { 238 felem g_pre_comp[2][16][3]; 239 CRYPTO_REF_COUNT references; 240 CRYPTO_RWLOCK *lock; 241 }; 242 243 const EC_METHOD *EC_GFp_nistp224_method(void) 244 { 245 static const EC_METHOD ret = { 246 EC_FLAGS_DEFAULT_OCT, 247 NID_X9_62_prime_field, 248 ec_GFp_nistp224_group_init, 249 ec_GFp_simple_group_finish, 250 ec_GFp_simple_group_clear_finish, 251 ec_GFp_nist_group_copy, 252 ec_GFp_nistp224_group_set_curve, 253 ec_GFp_simple_group_get_curve, 254 ec_GFp_simple_group_get_degree, 255 ec_group_simple_order_bits, 256 ec_GFp_simple_group_check_discriminant, 257 ec_GFp_simple_point_init, 258 ec_GFp_simple_point_finish, 259 ec_GFp_simple_point_clear_finish, 260 ec_GFp_simple_point_copy, 261 ec_GFp_simple_point_set_to_infinity, 262 ec_GFp_simple_set_Jprojective_coordinates_GFp, 263 ec_GFp_simple_get_Jprojective_coordinates_GFp, 264 ec_GFp_simple_point_set_affine_coordinates, 265 ec_GFp_nistp224_point_get_affine_coordinates, 266 0 /* point_set_compressed_coordinates */ , 267 0 /* point2oct */ , 268 0 /* oct2point */ , 269 ec_GFp_simple_add, 270 ec_GFp_simple_dbl, 271 ec_GFp_simple_invert, 272 ec_GFp_simple_is_at_infinity, 273 ec_GFp_simple_is_on_curve, 274 ec_GFp_simple_cmp, 275 ec_GFp_simple_make_affine, 276 ec_GFp_simple_points_make_affine, 277 ec_GFp_nistp224_points_mul, 278 ec_GFp_nistp224_precompute_mult, 279 ec_GFp_nistp224_have_precompute_mult, 280 ec_GFp_nist_field_mul, 281 ec_GFp_nist_field_sqr, 282 0 /* field_div */ , 283 ec_GFp_simple_field_inv, 284 0 /* field_encode */ , 285 0 /* field_decode */ , 286 0, /* field_set_to_one */ 287 ec_key_simple_priv2oct, 288 ec_key_simple_oct2priv, 289 0, /* set private */ 290 ec_key_simple_generate_key, 291 ec_key_simple_check_key, 292 ec_key_simple_generate_public_key, 293 0, /* keycopy */ 294 0, /* keyfinish */ 295 ecdh_simple_compute_key, 296 0, /* field_inverse_mod_ord */ 297 0, /* blind_coordinates */ 298 0, /* ladder_pre */ 299 0, /* ladder_step */ 300 0 /* ladder_post */ 301 }; 302 303 return &ret; 304 } 305 306 /* 307 * Helper functions to convert field elements to/from internal representation 308 */ 309 static void bin28_to_felem(felem out, const u8 in[28]) 310 { 311 out[0] = *((const limb *)(in)) & 0x00ffffffffffffff; 312 out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff; 313 out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff; 314 out[3] = (*((const limb_aX *)(in + 20))) >> 8; 315 } 316 317 static void felem_to_bin28(u8 out[28], const felem in) 318 { 319 unsigned i; 320 for (i = 0; i < 7; ++i) { 321 out[i] = in[0] >> (8 * i); 322 out[i + 7] = in[1] >> (8 * i); 323 out[i + 14] = in[2] >> (8 * i); 324 out[i + 21] = in[3] >> (8 * i); 325 } 326 } 327 328 /* From OpenSSL BIGNUM to internal representation */ 329 static int BN_to_felem(felem out, const BIGNUM *bn) 330 { 331 felem_bytearray b_out; 332 int num_bytes; 333 334 if (BN_is_negative(bn)) { 335 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 336 return 0; 337 } 338 num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out)); 339 if (num_bytes < 0) { 340 ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE); 341 return 0; 342 } 343 bin28_to_felem(out, b_out); 344 return 1; 345 } 346 347 /* From internal representation to OpenSSL BIGNUM */ 348 static BIGNUM *felem_to_BN(BIGNUM *out, const felem in) 349 { 350 felem_bytearray b_out; 351 felem_to_bin28(b_out, in); 352 return BN_lebin2bn(b_out, sizeof(b_out), out); 353 } 354 355 /******************************************************************************/ 356 /*- 357 * FIELD OPERATIONS 358 * 359 * Field operations, using the internal representation of field elements. 360 * NB! These operations are specific to our point multiplication and cannot be 361 * expected to be correct in general - e.g., multiplication with a large scalar 362 * will cause an overflow. 363 * 364 */ 365 366 static void felem_one(felem out) 367 { 368 out[0] = 1; 369 out[1] = 0; 370 out[2] = 0; 371 out[3] = 0; 372 } 373 374 static void felem_assign(felem out, const felem in) 375 { 376 out[0] = in[0]; 377 out[1] = in[1]; 378 out[2] = in[2]; 379 out[3] = in[3]; 380 } 381 382 /* Sum two field elements: out += in */ 383 static void felem_sum(felem out, const felem in) 384 { 385 out[0] += in[0]; 386 out[1] += in[1]; 387 out[2] += in[2]; 388 out[3] += in[3]; 389 } 390 391 /* Subtract field elements: out -= in */ 392 /* Assumes in[i] < 2^57 */ 393 static void felem_diff(felem out, const felem in) 394 { 395 static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2); 396 static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2); 397 static const limb two58m42m2 = (((limb) 1) << 58) - 398 (((limb) 1) << 42) - (((limb) 1) << 2); 399 400 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 401 out[0] += two58p2; 402 out[1] += two58m42m2; 403 out[2] += two58m2; 404 out[3] += two58m2; 405 406 out[0] -= in[0]; 407 out[1] -= in[1]; 408 out[2] -= in[2]; 409 out[3] -= in[3]; 410 } 411 412 /* Subtract in unreduced 128-bit mode: out -= in */ 413 /* Assumes in[i] < 2^119 */ 414 static void widefelem_diff(widefelem out, const widefelem in) 415 { 416 static const widelimb two120 = ((widelimb) 1) << 120; 417 static const widelimb two120m64 = (((widelimb) 1) << 120) - 418 (((widelimb) 1) << 64); 419 static const widelimb two120m104m64 = (((widelimb) 1) << 120) - 420 (((widelimb) 1) << 104) - (((widelimb) 1) << 64); 421 422 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 423 out[0] += two120; 424 out[1] += two120m64; 425 out[2] += two120m64; 426 out[3] += two120; 427 out[4] += two120m104m64; 428 out[5] += two120m64; 429 out[6] += two120m64; 430 431 out[0] -= in[0]; 432 out[1] -= in[1]; 433 out[2] -= in[2]; 434 out[3] -= in[3]; 435 out[4] -= in[4]; 436 out[5] -= in[5]; 437 out[6] -= in[6]; 438 } 439 440 /* Subtract in mixed mode: out128 -= in64 */ 441 /* in[i] < 2^63 */ 442 static void felem_diff_128_64(widefelem out, const felem in) 443 { 444 static const widelimb two64p8 = (((widelimb) 1) << 64) + 445 (((widelimb) 1) << 8); 446 static const widelimb two64m8 = (((widelimb) 1) << 64) - 447 (((widelimb) 1) << 8); 448 static const widelimb two64m48m8 = (((widelimb) 1) << 64) - 449 (((widelimb) 1) << 48) - (((widelimb) 1) << 8); 450 451 /* Add 0 mod 2^224-2^96+1 to ensure out > in */ 452 out[0] += two64p8; 453 out[1] += two64m48m8; 454 out[2] += two64m8; 455 out[3] += two64m8; 456 457 out[0] -= in[0]; 458 out[1] -= in[1]; 459 out[2] -= in[2]; 460 out[3] -= in[3]; 461 } 462 463 /* 464 * Multiply a field element by a scalar: out = out * scalar The scalars we 465 * actually use are small, so results fit without overflow 466 */ 467 static void felem_scalar(felem out, const limb scalar) 468 { 469 out[0] *= scalar; 470 out[1] *= scalar; 471 out[2] *= scalar; 472 out[3] *= scalar; 473 } 474 475 /* 476 * Multiply an unreduced field element by a scalar: out = out * scalar The 477 * scalars we actually use are small, so results fit without overflow 478 */ 479 static void widefelem_scalar(widefelem out, const widelimb scalar) 480 { 481 out[0] *= scalar; 482 out[1] *= scalar; 483 out[2] *= scalar; 484 out[3] *= scalar; 485 out[4] *= scalar; 486 out[5] *= scalar; 487 out[6] *= scalar; 488 } 489 490 /* Square a field element: out = in^2 */ 491 static void felem_square(widefelem out, const felem in) 492 { 493 limb tmp0, tmp1, tmp2; 494 tmp0 = 2 * in[0]; 495 tmp1 = 2 * in[1]; 496 tmp2 = 2 * in[2]; 497 out[0] = ((widelimb) in[0]) * in[0]; 498 out[1] = ((widelimb) in[0]) * tmp1; 499 out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1]; 500 out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2; 501 out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2]; 502 out[5] = ((widelimb) in[3]) * tmp2; 503 out[6] = ((widelimb) in[3]) * in[3]; 504 } 505 506 /* Multiply two field elements: out = in1 * in2 */ 507 static void felem_mul(widefelem out, const felem in1, const felem in2) 508 { 509 out[0] = ((widelimb) in1[0]) * in2[0]; 510 out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0]; 511 out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] + 512 ((widelimb) in1[2]) * in2[0]; 513 out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] + 514 ((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0]; 515 out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] + 516 ((widelimb) in1[3]) * in2[1]; 517 out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2]; 518 out[6] = ((widelimb) in1[3]) * in2[3]; 519 } 520 521 /*- 522 * Reduce seven 128-bit coefficients to four 64-bit coefficients. 523 * Requires in[i] < 2^126, 524 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */ 525 static void felem_reduce(felem out, const widefelem in) 526 { 527 static const widelimb two127p15 = (((widelimb) 1) << 127) + 528 (((widelimb) 1) << 15); 529 static const widelimb two127m71 = (((widelimb) 1) << 127) - 530 (((widelimb) 1) << 71); 531 static const widelimb two127m71m55 = (((widelimb) 1) << 127) - 532 (((widelimb) 1) << 71) - (((widelimb) 1) << 55); 533 widelimb output[5]; 534 535 /* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */ 536 output[0] = in[0] + two127p15; 537 output[1] = in[1] + two127m71m55; 538 output[2] = in[2] + two127m71; 539 output[3] = in[3]; 540 output[4] = in[4]; 541 542 /* Eliminate in[4], in[5], in[6] */ 543 output[4] += in[6] >> 16; 544 output[3] += (in[6] & 0xffff) << 40; 545 output[2] -= in[6]; 546 547 output[3] += in[5] >> 16; 548 output[2] += (in[5] & 0xffff) << 40; 549 output[1] -= in[5]; 550 551 output[2] += output[4] >> 16; 552 output[1] += (output[4] & 0xffff) << 40; 553 output[0] -= output[4]; 554 555 /* Carry 2 -> 3 -> 4 */ 556 output[3] += output[2] >> 56; 557 output[2] &= 0x00ffffffffffffff; 558 559 output[4] = output[3] >> 56; 560 output[3] &= 0x00ffffffffffffff; 561 562 /* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */ 563 564 /* Eliminate output[4] */ 565 output[2] += output[4] >> 16; 566 /* output[2] < 2^56 + 2^56 = 2^57 */ 567 output[1] += (output[4] & 0xffff) << 40; 568 output[0] -= output[4]; 569 570 /* Carry 0 -> 1 -> 2 -> 3 */ 571 output[1] += output[0] >> 56; 572 out[0] = output[0] & 0x00ffffffffffffff; 573 574 output[2] += output[1] >> 56; 575 /* output[2] < 2^57 + 2^72 */ 576 out[1] = output[1] & 0x00ffffffffffffff; 577 output[3] += output[2] >> 56; 578 /* output[3] <= 2^56 + 2^16 */ 579 out[2] = output[2] & 0x00ffffffffffffff; 580 581 /*- 582 * out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, 583 * out[3] <= 2^56 + 2^16 (due to final carry), 584 * so out < 2*p 585 */ 586 out[3] = output[3]; 587 } 588 589 static void felem_square_reduce(felem out, const felem in) 590 { 591 widefelem tmp; 592 felem_square(tmp, in); 593 felem_reduce(out, tmp); 594 } 595 596 static void felem_mul_reduce(felem out, const felem in1, const felem in2) 597 { 598 widefelem tmp; 599 felem_mul(tmp, in1, in2); 600 felem_reduce(out, tmp); 601 } 602 603 /* 604 * Reduce to unique minimal representation. Requires 0 <= in < 2*p (always 605 * call felem_reduce first) 606 */ 607 static void felem_contract(felem out, const felem in) 608 { 609 static const int64_t two56 = ((limb) 1) << 56; 610 /* 0 <= in < 2*p, p = 2^224 - 2^96 + 1 */ 611 /* if in > p , reduce in = in - 2^224 + 2^96 - 1 */ 612 int64_t tmp[4], a; 613 tmp[0] = in[0]; 614 tmp[1] = in[1]; 615 tmp[2] = in[2]; 616 tmp[3] = in[3]; 617 /* Case 1: a = 1 iff in >= 2^224 */ 618 a = (in[3] >> 56); 619 tmp[0] -= a; 620 tmp[1] += a << 40; 621 tmp[3] &= 0x00ffffffffffffff; 622 /* 623 * Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1 624 * and the lower part is non-zero 625 */ 626 a = ((in[3] & in[2] & (in[1] | 0x000000ffffffffff)) + 1) | 627 (((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63); 628 a &= 0x00ffffffffffffff; 629 /* turn a into an all-one mask (if a = 0) or an all-zero mask */ 630 a = (a - 1) >> 63; 631 /* subtract 2^224 - 2^96 + 1 if a is all-one */ 632 tmp[3] &= a ^ 0xffffffffffffffff; 633 tmp[2] &= a ^ 0xffffffffffffffff; 634 tmp[1] &= (a ^ 0xffffffffffffffff) | 0x000000ffffffffff; 635 tmp[0] -= 1 & a; 636 637 /* 638 * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be 639 * non-zero, so we only need one step 640 */ 641 a = tmp[0] >> 63; 642 tmp[0] += two56 & a; 643 tmp[1] -= 1 & a; 644 645 /* carry 1 -> 2 -> 3 */ 646 tmp[2] += tmp[1] >> 56; 647 tmp[1] &= 0x00ffffffffffffff; 648 649 tmp[3] += tmp[2] >> 56; 650 tmp[2] &= 0x00ffffffffffffff; 651 652 /* Now 0 <= out < p */ 653 out[0] = tmp[0]; 654 out[1] = tmp[1]; 655 out[2] = tmp[2]; 656 out[3] = tmp[3]; 657 } 658 659 /* 660 * Get negative value: out = -in 661 * Requires in[i] < 2^63, 662 * ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 663 */ 664 static void felem_neg(felem out, const felem in) 665 { 666 widefelem tmp = {0}; 667 felem_diff_128_64(tmp, in); 668 felem_reduce(out, tmp); 669 } 670 671 /* 672 * Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field 673 * elements are reduced to in < 2^225, so we only need to check three cases: 674 * 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2 675 */ 676 static limb felem_is_zero(const felem in) 677 { 678 limb zero, two224m96p1, two225m97p2; 679 680 zero = in[0] | in[1] | in[2] | in[3]; 681 zero = (((int64_t) (zero) - 1) >> 63) & 1; 682 two224m96p1 = (in[0] ^ 1) | (in[1] ^ 0x00ffff0000000000) 683 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x00ffffffffffffff); 684 two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1; 685 two225m97p2 = (in[0] ^ 2) | (in[1] ^ 0x00fffe0000000000) 686 | (in[2] ^ 0x00ffffffffffffff) | (in[3] ^ 0x01ffffffffffffff); 687 two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1; 688 return (zero | two224m96p1 | two225m97p2); 689 } 690 691 static int felem_is_zero_int(const void *in) 692 { 693 return (int)(felem_is_zero(in) & ((limb) 1)); 694 } 695 696 /* Invert a field element */ 697 /* Computation chain copied from djb's code */ 698 static void felem_inv(felem out, const felem in) 699 { 700 felem ftmp, ftmp2, ftmp3, ftmp4; 701 widefelem tmp; 702 unsigned i; 703 704 felem_square(tmp, in); 705 felem_reduce(ftmp, tmp); /* 2 */ 706 felem_mul(tmp, in, ftmp); 707 felem_reduce(ftmp, tmp); /* 2^2 - 1 */ 708 felem_square(tmp, ftmp); 709 felem_reduce(ftmp, tmp); /* 2^3 - 2 */ 710 felem_mul(tmp, in, ftmp); 711 felem_reduce(ftmp, tmp); /* 2^3 - 1 */ 712 felem_square(tmp, ftmp); 713 felem_reduce(ftmp2, tmp); /* 2^4 - 2 */ 714 felem_square(tmp, ftmp2); 715 felem_reduce(ftmp2, tmp); /* 2^5 - 4 */ 716 felem_square(tmp, ftmp2); 717 felem_reduce(ftmp2, tmp); /* 2^6 - 8 */ 718 felem_mul(tmp, ftmp2, ftmp); 719 felem_reduce(ftmp, tmp); /* 2^6 - 1 */ 720 felem_square(tmp, ftmp); 721 felem_reduce(ftmp2, tmp); /* 2^7 - 2 */ 722 for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */ 723 felem_square(tmp, ftmp2); 724 felem_reduce(ftmp2, tmp); 725 } 726 felem_mul(tmp, ftmp2, ftmp); 727 felem_reduce(ftmp2, tmp); /* 2^12 - 1 */ 728 felem_square(tmp, ftmp2); 729 felem_reduce(ftmp3, tmp); /* 2^13 - 2 */ 730 for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */ 731 felem_square(tmp, ftmp3); 732 felem_reduce(ftmp3, tmp); 733 } 734 felem_mul(tmp, ftmp3, ftmp2); 735 felem_reduce(ftmp2, tmp); /* 2^24 - 1 */ 736 felem_square(tmp, ftmp2); 737 felem_reduce(ftmp3, tmp); /* 2^25 - 2 */ 738 for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */ 739 felem_square(tmp, ftmp3); 740 felem_reduce(ftmp3, tmp); 741 } 742 felem_mul(tmp, ftmp3, ftmp2); 743 felem_reduce(ftmp3, tmp); /* 2^48 - 1 */ 744 felem_square(tmp, ftmp3); 745 felem_reduce(ftmp4, tmp); /* 2^49 - 2 */ 746 for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */ 747 felem_square(tmp, ftmp4); 748 felem_reduce(ftmp4, tmp); 749 } 750 felem_mul(tmp, ftmp3, ftmp4); 751 felem_reduce(ftmp3, tmp); /* 2^96 - 1 */ 752 felem_square(tmp, ftmp3); 753 felem_reduce(ftmp4, tmp); /* 2^97 - 2 */ 754 for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */ 755 felem_square(tmp, ftmp4); 756 felem_reduce(ftmp4, tmp); 757 } 758 felem_mul(tmp, ftmp2, ftmp4); 759 felem_reduce(ftmp2, tmp); /* 2^120 - 1 */ 760 for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */ 761 felem_square(tmp, ftmp2); 762 felem_reduce(ftmp2, tmp); 763 } 764 felem_mul(tmp, ftmp2, ftmp); 765 felem_reduce(ftmp, tmp); /* 2^126 - 1 */ 766 felem_square(tmp, ftmp); 767 felem_reduce(ftmp, tmp); /* 2^127 - 2 */ 768 felem_mul(tmp, ftmp, in); 769 felem_reduce(ftmp, tmp); /* 2^127 - 1 */ 770 for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */ 771 felem_square(tmp, ftmp); 772 felem_reduce(ftmp, tmp); 773 } 774 felem_mul(tmp, ftmp, ftmp3); 775 felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */ 776 } 777 778 /* 779 * Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy 780 * out to itself. 781 */ 782 static void copy_conditional(felem out, const felem in, limb icopy) 783 { 784 unsigned i; 785 /* 786 * icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one 787 */ 788 const limb copy = -icopy; 789 for (i = 0; i < 4; ++i) { 790 const limb tmp = copy & (in[i] ^ out[i]); 791 out[i] ^= tmp; 792 } 793 } 794 795 /******************************************************************************/ 796 /*- 797 * ELLIPTIC CURVE POINT OPERATIONS 798 * 799 * Points are represented in Jacobian projective coordinates: 800 * (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3), 801 * or to the point at infinity if Z == 0. 802 * 803 */ 804 805 /*- 806 * Double an elliptic curve point: 807 * (X', Y', Z') = 2 * (X, Y, Z), where 808 * X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2 809 * Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4 810 * Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z 811 * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed, 812 * while x_out == y_in is not (maybe this works, but it's not tested). 813 */ 814 static void 815 point_double(felem x_out, felem y_out, felem z_out, 816 const felem x_in, const felem y_in, const felem z_in) 817 { 818 widefelem tmp, tmp2; 819 felem delta, gamma, beta, alpha, ftmp, ftmp2; 820 821 felem_assign(ftmp, x_in); 822 felem_assign(ftmp2, x_in); 823 824 /* delta = z^2 */ 825 felem_square(tmp, z_in); 826 felem_reduce(delta, tmp); 827 828 /* gamma = y^2 */ 829 felem_square(tmp, y_in); 830 felem_reduce(gamma, tmp); 831 832 /* beta = x*gamma */ 833 felem_mul(tmp, x_in, gamma); 834 felem_reduce(beta, tmp); 835 836 /* alpha = 3*(x-delta)*(x+delta) */ 837 felem_diff(ftmp, delta); 838 /* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */ 839 felem_sum(ftmp2, delta); 840 /* ftmp2[i] < 2^57 + 2^57 = 2^58 */ 841 felem_scalar(ftmp2, 3); 842 /* ftmp2[i] < 3 * 2^58 < 2^60 */ 843 felem_mul(tmp, ftmp, ftmp2); 844 /* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */ 845 felem_reduce(alpha, tmp); 846 847 /* x' = alpha^2 - 8*beta */ 848 felem_square(tmp, alpha); 849 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 850 felem_assign(ftmp, beta); 851 felem_scalar(ftmp, 8); 852 /* ftmp[i] < 8 * 2^57 = 2^60 */ 853 felem_diff_128_64(tmp, ftmp); 854 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 855 felem_reduce(x_out, tmp); 856 857 /* z' = (y + z)^2 - gamma - delta */ 858 felem_sum(delta, gamma); 859 /* delta[i] < 2^57 + 2^57 = 2^58 */ 860 felem_assign(ftmp, y_in); 861 felem_sum(ftmp, z_in); 862 /* ftmp[i] < 2^57 + 2^57 = 2^58 */ 863 felem_square(tmp, ftmp); 864 /* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */ 865 felem_diff_128_64(tmp, delta); 866 /* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */ 867 felem_reduce(z_out, tmp); 868 869 /* y' = alpha*(4*beta - x') - 8*gamma^2 */ 870 felem_scalar(beta, 4); 871 /* beta[i] < 4 * 2^57 = 2^59 */ 872 felem_diff(beta, x_out); 873 /* beta[i] < 2^59 + 2^58 + 2 < 2^60 */ 874 felem_mul(tmp, alpha, beta); 875 /* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */ 876 felem_square(tmp2, gamma); 877 /* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */ 878 widefelem_scalar(tmp2, 8); 879 /* tmp2[i] < 8 * 2^116 = 2^119 */ 880 widefelem_diff(tmp, tmp2); 881 /* tmp[i] < 2^119 + 2^120 < 2^121 */ 882 felem_reduce(y_out, tmp); 883 } 884 885 /*- 886 * Add two elliptic curve points: 887 * (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where 888 * X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 - 889 * 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 890 * Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) - 891 * Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3 892 * Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2) 893 * 894 * This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0. 895 */ 896 897 /* 898 * This function is not entirely constant-time: it includes a branch for 899 * checking whether the two input points are equal, (while not equal to the 900 * point at infinity). This case never happens during single point 901 * multiplication, so there is no timing leak for ECDH or ECDSA signing. 902 */ 903 static void point_add(felem x3, felem y3, felem z3, 904 const felem x1, const felem y1, const felem z1, 905 const int mixed, const felem x2, const felem y2, 906 const felem z2) 907 { 908 felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out; 909 widefelem tmp, tmp2; 910 limb z1_is_zero, z2_is_zero, x_equal, y_equal; 911 limb points_equal; 912 913 if (!mixed) { 914 /* ftmp2 = z2^2 */ 915 felem_square(tmp, z2); 916 felem_reduce(ftmp2, tmp); 917 918 /* ftmp4 = z2^3 */ 919 felem_mul(tmp, ftmp2, z2); 920 felem_reduce(ftmp4, tmp); 921 922 /* ftmp4 = z2^3*y1 */ 923 felem_mul(tmp2, ftmp4, y1); 924 felem_reduce(ftmp4, tmp2); 925 926 /* ftmp2 = z2^2*x1 */ 927 felem_mul(tmp2, ftmp2, x1); 928 felem_reduce(ftmp2, tmp2); 929 } else { 930 /* 931 * We'll assume z2 = 1 (special case z2 = 0 is handled later) 932 */ 933 934 /* ftmp4 = z2^3*y1 */ 935 felem_assign(ftmp4, y1); 936 937 /* ftmp2 = z2^2*x1 */ 938 felem_assign(ftmp2, x1); 939 } 940 941 /* ftmp = z1^2 */ 942 felem_square(tmp, z1); 943 felem_reduce(ftmp, tmp); 944 945 /* ftmp3 = z1^3 */ 946 felem_mul(tmp, ftmp, z1); 947 felem_reduce(ftmp3, tmp); 948 949 /* tmp = z1^3*y2 */ 950 felem_mul(tmp, ftmp3, y2); 951 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 952 953 /* ftmp3 = z1^3*y2 - z2^3*y1 */ 954 felem_diff_128_64(tmp, ftmp4); 955 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 956 felem_reduce(ftmp3, tmp); 957 958 /* tmp = z1^2*x2 */ 959 felem_mul(tmp, ftmp, x2); 960 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 961 962 /* ftmp = z1^2*x2 - z2^2*x1 */ 963 felem_diff_128_64(tmp, ftmp2); 964 /* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */ 965 felem_reduce(ftmp, tmp); 966 967 /* 968 * The formulae are incorrect if the points are equal, in affine coordinates 969 * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this 970 * happens. 971 * 972 * We use bitwise operations to avoid potential side-channels introduced by 973 * the short-circuiting behaviour of boolean operators. 974 */ 975 x_equal = felem_is_zero(ftmp); 976 y_equal = felem_is_zero(ftmp3); 977 /* 978 * The special case of either point being the point at infinity (z1 and/or 979 * z2 are zero), is handled separately later on in this function, so we 980 * avoid jumping to point_double here in those special cases. 981 */ 982 z1_is_zero = felem_is_zero(z1); 983 z2_is_zero = felem_is_zero(z2); 984 985 /* 986 * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this 987 * specific implementation `felem_is_zero()` returns truth as `0x1` 988 * (rather than `0xff..ff`). 989 * 990 * This implies that `~true` in this implementation becomes 991 * `0xff..fe` (rather than `0x0`): for this reason, to be used in 992 * the if expression, we mask out only the last bit in the next 993 * line. 994 */ 995 points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1; 996 997 if (points_equal) { 998 /* 999 * This is obviously not constant-time but, as mentioned before, this 1000 * case never happens during single point multiplication, so there is no 1001 * timing leak for ECDH or ECDSA signing. 1002 */ 1003 point_double(x3, y3, z3, x1, y1, z1); 1004 return; 1005 } 1006 1007 /* ftmp5 = z1*z2 */ 1008 if (!mixed) { 1009 felem_mul(tmp, z1, z2); 1010 felem_reduce(ftmp5, tmp); 1011 } else { 1012 /* special case z2 = 0 is handled later */ 1013 felem_assign(ftmp5, z1); 1014 } 1015 1016 /* z_out = (z1^2*x2 - z2^2*x1)*(z1*z2) */ 1017 felem_mul(tmp, ftmp, ftmp5); 1018 felem_reduce(z_out, tmp); 1019 1020 /* ftmp = (z1^2*x2 - z2^2*x1)^2 */ 1021 felem_assign(ftmp5, ftmp); 1022 felem_square(tmp, ftmp); 1023 felem_reduce(ftmp, tmp); 1024 1025 /* ftmp5 = (z1^2*x2 - z2^2*x1)^3 */ 1026 felem_mul(tmp, ftmp, ftmp5); 1027 felem_reduce(ftmp5, tmp); 1028 1029 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1030 felem_mul(tmp, ftmp2, ftmp); 1031 felem_reduce(ftmp2, tmp); 1032 1033 /* tmp = z2^3*y1*(z1^2*x2 - z2^2*x1)^3 */ 1034 felem_mul(tmp, ftmp4, ftmp5); 1035 /* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */ 1036 1037 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 */ 1038 felem_square(tmp2, ftmp3); 1039 /* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */ 1040 1041 /* tmp2 = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 */ 1042 felem_diff_128_64(tmp2, ftmp5); 1043 /* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */ 1044 1045 /* ftmp5 = 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 */ 1046 felem_assign(ftmp5, ftmp2); 1047 felem_scalar(ftmp5, 2); 1048 /* ftmp5[i] < 2 * 2^57 = 2^58 */ 1049 1050 /*- 1051 * x_out = (z1^3*y2 - z2^3*y1)^2 - (z1^2*x2 - z2^2*x1)^3 - 1052 * 2*z2^2*x1*(z1^2*x2 - z2^2*x1)^2 1053 */ 1054 felem_diff_128_64(tmp2, ftmp5); 1055 /* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */ 1056 felem_reduce(x_out, tmp2); 1057 1058 /* ftmp2 = z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out */ 1059 felem_diff(ftmp2, x_out); 1060 /* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */ 1061 1062 /* 1063 * tmp2 = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) 1064 */ 1065 felem_mul(tmp2, ftmp3, ftmp2); 1066 /* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */ 1067 1068 /*- 1069 * y_out = (z1^3*y2 - z2^3*y1)*(z2^2*x1*(z1^2*x2 - z2^2*x1)^2 - x_out) - 1070 * z2^3*y1*(z1^2*x2 - z2^2*x1)^3 1071 */ 1072 widefelem_diff(tmp2, tmp); 1073 /* tmp2[i] < 2^118 + 2^120 < 2^121 */ 1074 felem_reduce(y_out, tmp2); 1075 1076 /* 1077 * the result (x_out, y_out, z_out) is incorrect if one of the inputs is 1078 * the point at infinity, so we need to check for this separately 1079 */ 1080 1081 /* 1082 * if point 1 is at infinity, copy point 2 to output, and vice versa 1083 */ 1084 copy_conditional(x_out, x2, z1_is_zero); 1085 copy_conditional(x_out, x1, z2_is_zero); 1086 copy_conditional(y_out, y2, z1_is_zero); 1087 copy_conditional(y_out, y1, z2_is_zero); 1088 copy_conditional(z_out, z2, z1_is_zero); 1089 copy_conditional(z_out, z1, z2_is_zero); 1090 felem_assign(x3, x_out); 1091 felem_assign(y3, y_out); 1092 felem_assign(z3, z_out); 1093 } 1094 1095 /* 1096 * select_point selects the |idx|th point from a precomputation table and 1097 * copies it to out. 1098 * The pre_comp array argument should be size of |size| argument 1099 */ 1100 static void select_point(const u64 idx, unsigned int size, 1101 const felem pre_comp[][3], felem out[3]) 1102 { 1103 unsigned i, j; 1104 limb *outlimbs = &out[0][0]; 1105 1106 memset(out, 0, sizeof(*out) * 3); 1107 for (i = 0; i < size; i++) { 1108 const limb *inlimbs = &pre_comp[i][0][0]; 1109 u64 mask = i ^ idx; 1110 mask |= mask >> 4; 1111 mask |= mask >> 2; 1112 mask |= mask >> 1; 1113 mask &= 1; 1114 mask--; 1115 for (j = 0; j < 4 * 3; j++) 1116 outlimbs[j] |= inlimbs[j] & mask; 1117 } 1118 } 1119 1120 /* get_bit returns the |i|th bit in |in| */ 1121 static char get_bit(const felem_bytearray in, unsigned i) 1122 { 1123 if (i >= 224) 1124 return 0; 1125 return (in[i >> 3] >> (i & 7)) & 1; 1126 } 1127 1128 /* 1129 * Interleaved point multiplication using precomputed point multiples: The 1130 * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars 1131 * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the 1132 * generator, using certain (large) precomputed multiples in g_pre_comp. 1133 * Output point (X, Y, Z) is stored in x_out, y_out, z_out 1134 */ 1135 static void batch_mul(felem x_out, felem y_out, felem z_out, 1136 const felem_bytearray scalars[], 1137 const unsigned num_points, const u8 *g_scalar, 1138 const int mixed, const felem pre_comp[][17][3], 1139 const felem g_pre_comp[2][16][3]) 1140 { 1141 int i, skip; 1142 unsigned num; 1143 unsigned gen_mul = (g_scalar != NULL); 1144 felem nq[3], tmp[4]; 1145 u64 bits; 1146 u8 sign, digit; 1147 1148 /* set nq to the point at infinity */ 1149 memset(nq, 0, sizeof(nq)); 1150 1151 /* 1152 * Loop over all scalars msb-to-lsb, interleaving additions of multiples 1153 * of the generator (two in each of the last 28 rounds) and additions of 1154 * other points multiples (every 5th round). 1155 */ 1156 skip = 1; /* save two point operations in the first 1157 * round */ 1158 for (i = (num_points ? 220 : 27); i >= 0; --i) { 1159 /* double */ 1160 if (!skip) 1161 point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); 1162 1163 /* add multiples of the generator */ 1164 if (gen_mul && (i <= 27)) { 1165 /* first, look 28 bits upwards */ 1166 bits = get_bit(g_scalar, i + 196) << 3; 1167 bits |= get_bit(g_scalar, i + 140) << 2; 1168 bits |= get_bit(g_scalar, i + 84) << 1; 1169 bits |= get_bit(g_scalar, i + 28); 1170 /* select the point to add, in constant time */ 1171 select_point(bits, 16, g_pre_comp[1], tmp); 1172 1173 if (!skip) { 1174 /* value 1 below is argument for "mixed" */ 1175 point_add(nq[0], nq[1], nq[2], 1176 nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]); 1177 } else { 1178 memcpy(nq, tmp, 3 * sizeof(felem)); 1179 skip = 0; 1180 } 1181 1182 /* second, look at the current position */ 1183 bits = get_bit(g_scalar, i + 168) << 3; 1184 bits |= get_bit(g_scalar, i + 112) << 2; 1185 bits |= get_bit(g_scalar, i + 56) << 1; 1186 bits |= get_bit(g_scalar, i); 1187 /* select the point to add, in constant time */ 1188 select_point(bits, 16, g_pre_comp[0], tmp); 1189 point_add(nq[0], nq[1], nq[2], 1190 nq[0], nq[1], nq[2], 1191 1 /* mixed */ , tmp[0], tmp[1], tmp[2]); 1192 } 1193 1194 /* do other additions every 5 doublings */ 1195 if (num_points && (i % 5 == 0)) { 1196 /* loop over all scalars */ 1197 for (num = 0; num < num_points; ++num) { 1198 bits = get_bit(scalars[num], i + 4) << 5; 1199 bits |= get_bit(scalars[num], i + 3) << 4; 1200 bits |= get_bit(scalars[num], i + 2) << 3; 1201 bits |= get_bit(scalars[num], i + 1) << 2; 1202 bits |= get_bit(scalars[num], i) << 1; 1203 bits |= get_bit(scalars[num], i - 1); 1204 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); 1205 1206 /* select the point to add or subtract */ 1207 select_point(digit, 17, pre_comp[num], tmp); 1208 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative 1209 * point */ 1210 copy_conditional(tmp[1], tmp[3], sign); 1211 1212 if (!skip) { 1213 point_add(nq[0], nq[1], nq[2], 1214 nq[0], nq[1], nq[2], 1215 mixed, tmp[0], tmp[1], tmp[2]); 1216 } else { 1217 memcpy(nq, tmp, 3 * sizeof(felem)); 1218 skip = 0; 1219 } 1220 } 1221 } 1222 } 1223 felem_assign(x_out, nq[0]); 1224 felem_assign(y_out, nq[1]); 1225 felem_assign(z_out, nq[2]); 1226 } 1227 1228 /******************************************************************************/ 1229 /* 1230 * FUNCTIONS TO MANAGE PRECOMPUTATION 1231 */ 1232 1233 static NISTP224_PRE_COMP *nistp224_pre_comp_new(void) 1234 { 1235 NISTP224_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret)); 1236 1237 if (!ret) { 1238 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1239 return ret; 1240 } 1241 1242 ret->references = 1; 1243 1244 ret->lock = CRYPTO_THREAD_lock_new(); 1245 if (ret->lock == NULL) { 1246 ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 1247 OPENSSL_free(ret); 1248 return NULL; 1249 } 1250 return ret; 1251 } 1252 1253 NISTP224_PRE_COMP *EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP *p) 1254 { 1255 int i; 1256 if (p != NULL) 1257 CRYPTO_UP_REF(&p->references, &i, p->lock); 1258 return p; 1259 } 1260 1261 void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p) 1262 { 1263 int i; 1264 1265 if (p == NULL) 1266 return; 1267 1268 CRYPTO_DOWN_REF(&p->references, &i, p->lock); 1269 REF_PRINT_COUNT("EC_nistp224", x); 1270 if (i > 0) 1271 return; 1272 REF_ASSERT_ISNT(i < 0); 1273 1274 CRYPTO_THREAD_lock_free(p->lock); 1275 OPENSSL_free(p); 1276 } 1277 1278 /******************************************************************************/ 1279 /* 1280 * OPENSSL EC_METHOD FUNCTIONS 1281 */ 1282 1283 int ec_GFp_nistp224_group_init(EC_GROUP *group) 1284 { 1285 int ret; 1286 ret = ec_GFp_simple_group_init(group); 1287 group->a_is_minus3 = 1; 1288 return ret; 1289 } 1290 1291 int ec_GFp_nistp224_group_set_curve(EC_GROUP *group, const BIGNUM *p, 1292 const BIGNUM *a, const BIGNUM *b, 1293 BN_CTX *ctx) 1294 { 1295 int ret = 0; 1296 BN_CTX *new_ctx = NULL; 1297 BIGNUM *curve_p, *curve_a, *curve_b; 1298 1299 if (ctx == NULL) 1300 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1301 return 0; 1302 BN_CTX_start(ctx); 1303 curve_p = BN_CTX_get(ctx); 1304 curve_a = BN_CTX_get(ctx); 1305 curve_b = BN_CTX_get(ctx); 1306 if (curve_b == NULL) 1307 goto err; 1308 BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p); 1309 BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a); 1310 BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b); 1311 if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) { 1312 ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE, 1313 EC_R_WRONG_CURVE_PARAMETERS); 1314 goto err; 1315 } 1316 group->field_mod_func = BN_nist_mod_224; 1317 ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx); 1318 err: 1319 BN_CTX_end(ctx); 1320 BN_CTX_free(new_ctx); 1321 return ret; 1322 } 1323 1324 /* 1325 * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') = 1326 * (X/Z^2, Y/Z^3) 1327 */ 1328 int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group, 1329 const EC_POINT *point, 1330 BIGNUM *x, BIGNUM *y, 1331 BN_CTX *ctx) 1332 { 1333 felem z1, z2, x_in, y_in, x_out, y_out; 1334 widefelem tmp; 1335 1336 if (EC_POINT_is_at_infinity(group, point)) { 1337 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1338 EC_R_POINT_AT_INFINITY); 1339 return 0; 1340 } 1341 if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) || 1342 (!BN_to_felem(z1, point->Z))) 1343 return 0; 1344 felem_inv(z2, z1); 1345 felem_square(tmp, z2); 1346 felem_reduce(z1, tmp); 1347 felem_mul(tmp, x_in, z1); 1348 felem_reduce(x_in, tmp); 1349 felem_contract(x_out, x_in); 1350 if (x != NULL) { 1351 if (!felem_to_BN(x, x_out)) { 1352 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1353 ERR_R_BN_LIB); 1354 return 0; 1355 } 1356 } 1357 felem_mul(tmp, z1, z2); 1358 felem_reduce(z1, tmp); 1359 felem_mul(tmp, y_in, z1); 1360 felem_reduce(y_in, tmp); 1361 felem_contract(y_out, y_in); 1362 if (y != NULL) { 1363 if (!felem_to_BN(y, y_out)) { 1364 ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES, 1365 ERR_R_BN_LIB); 1366 return 0; 1367 } 1368 } 1369 return 1; 1370 } 1371 1372 static void make_points_affine(size_t num, felem points[ /* num */ ][3], 1373 felem tmp_felems[ /* num+1 */ ]) 1374 { 1375 /* 1376 * Runs in constant time, unless an input is the point at infinity (which 1377 * normally shouldn't happen). 1378 */ 1379 ec_GFp_nistp_points_make_affine_internal(num, 1380 points, 1381 sizeof(felem), 1382 tmp_felems, 1383 (void (*)(void *))felem_one, 1384 felem_is_zero_int, 1385 (void (*)(void *, const void *)) 1386 felem_assign, 1387 (void (*)(void *, const void *)) 1388 felem_square_reduce, (void (*) 1389 (void *, 1390 const void 1391 *, 1392 const void 1393 *)) 1394 felem_mul_reduce, 1395 (void (*)(void *, const void *)) 1396 felem_inv, 1397 (void (*)(void *, const void *)) 1398 felem_contract); 1399 } 1400 1401 /* 1402 * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL 1403 * values Result is stored in r (r can equal one of the inputs). 1404 */ 1405 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r, 1406 const BIGNUM *scalar, size_t num, 1407 const EC_POINT *points[], 1408 const BIGNUM *scalars[], BN_CTX *ctx) 1409 { 1410 int ret = 0; 1411 int j; 1412 unsigned i; 1413 int mixed = 0; 1414 BIGNUM *x, *y, *z, *tmp_scalar; 1415 felem_bytearray g_secret; 1416 felem_bytearray *secrets = NULL; 1417 felem (*pre_comp)[17][3] = NULL; 1418 felem *tmp_felems = NULL; 1419 int num_bytes; 1420 int have_pre_comp = 0; 1421 size_t num_points = num; 1422 felem x_in, y_in, z_in, x_out, y_out, z_out; 1423 NISTP224_PRE_COMP *pre = NULL; 1424 const felem(*g_pre_comp)[16][3] = NULL; 1425 EC_POINT *generator = NULL; 1426 const EC_POINT *p = NULL; 1427 const BIGNUM *p_scalar = NULL; 1428 1429 BN_CTX_start(ctx); 1430 x = BN_CTX_get(ctx); 1431 y = BN_CTX_get(ctx); 1432 z = BN_CTX_get(ctx); 1433 tmp_scalar = BN_CTX_get(ctx); 1434 if (tmp_scalar == NULL) 1435 goto err; 1436 1437 if (scalar != NULL) { 1438 pre = group->pre_comp.nistp224; 1439 if (pre) 1440 /* we have precomputation, try to use it */ 1441 g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp; 1442 else 1443 /* try to use the standard precomputation */ 1444 g_pre_comp = &gmul[0]; 1445 generator = EC_POINT_new(group); 1446 if (generator == NULL) 1447 goto err; 1448 /* get the generator from precomputation */ 1449 if (!felem_to_BN(x, g_pre_comp[0][1][0]) || 1450 !felem_to_BN(y, g_pre_comp[0][1][1]) || 1451 !felem_to_BN(z, g_pre_comp[0][1][2])) { 1452 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1453 goto err; 1454 } 1455 if (!EC_POINT_set_Jprojective_coordinates_GFp(group, 1456 generator, x, y, z, 1457 ctx)) 1458 goto err; 1459 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) 1460 /* precomputation matches generator */ 1461 have_pre_comp = 1; 1462 else 1463 /* 1464 * we don't have valid precomputation: treat the generator as a 1465 * random point 1466 */ 1467 num_points = num_points + 1; 1468 } 1469 1470 if (num_points > 0) { 1471 if (num_points >= 3) { 1472 /* 1473 * unless we precompute multiples for just one or two points, 1474 * converting those into affine form is time well spent 1475 */ 1476 mixed = 1; 1477 } 1478 secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points); 1479 pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points); 1480 if (mixed) 1481 tmp_felems = 1482 OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1)); 1483 if ((secrets == NULL) || (pre_comp == NULL) 1484 || (mixed && (tmp_felems == NULL))) { 1485 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE); 1486 goto err; 1487 } 1488 1489 /* 1490 * we treat NULL scalars as 0, and NULL points as points at infinity, 1491 * i.e., they contribute nothing to the linear combination 1492 */ 1493 for (i = 0; i < num_points; ++i) { 1494 if (i == num) { 1495 /* the generator */ 1496 p = EC_GROUP_get0_generator(group); 1497 p_scalar = scalar; 1498 } else { 1499 /* the i^th point */ 1500 p = points[i]; 1501 p_scalar = scalars[i]; 1502 } 1503 if ((p_scalar != NULL) && (p != NULL)) { 1504 /* reduce scalar to 0 <= scalar < 2^224 */ 1505 if ((BN_num_bits(p_scalar) > 224) 1506 || (BN_is_negative(p_scalar))) { 1507 /* 1508 * this is an unusual input, and we don't guarantee 1509 * constant-timeness 1510 */ 1511 if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) { 1512 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1513 goto err; 1514 } 1515 num_bytes = BN_bn2lebinpad(tmp_scalar, 1516 secrets[i], sizeof(secrets[i])); 1517 } else { 1518 num_bytes = BN_bn2lebinpad(p_scalar, 1519 secrets[i], sizeof(secrets[i])); 1520 } 1521 if (num_bytes < 0) { 1522 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1523 goto err; 1524 } 1525 /* precompute multiples */ 1526 if ((!BN_to_felem(x_out, p->X)) || 1527 (!BN_to_felem(y_out, p->Y)) || 1528 (!BN_to_felem(z_out, p->Z))) 1529 goto err; 1530 felem_assign(pre_comp[i][1][0], x_out); 1531 felem_assign(pre_comp[i][1][1], y_out); 1532 felem_assign(pre_comp[i][1][2], z_out); 1533 for (j = 2; j <= 16; ++j) { 1534 if (j & 1) { 1535 point_add(pre_comp[i][j][0], pre_comp[i][j][1], 1536 pre_comp[i][j][2], pre_comp[i][1][0], 1537 pre_comp[i][1][1], pre_comp[i][1][2], 0, 1538 pre_comp[i][j - 1][0], 1539 pre_comp[i][j - 1][1], 1540 pre_comp[i][j - 1][2]); 1541 } else { 1542 point_double(pre_comp[i][j][0], pre_comp[i][j][1], 1543 pre_comp[i][j][2], pre_comp[i][j / 2][0], 1544 pre_comp[i][j / 2][1], 1545 pre_comp[i][j / 2][2]); 1546 } 1547 } 1548 } 1549 } 1550 if (mixed) 1551 make_points_affine(num_points * 17, pre_comp[0], tmp_felems); 1552 } 1553 1554 /* the scalar for the generator */ 1555 if ((scalar != NULL) && (have_pre_comp)) { 1556 memset(g_secret, 0, sizeof(g_secret)); 1557 /* reduce scalar to 0 <= scalar < 2^224 */ 1558 if ((BN_num_bits(scalar) > 224) || (BN_is_negative(scalar))) { 1559 /* 1560 * this is an unusual input, and we don't guarantee 1561 * constant-timeness 1562 */ 1563 if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) { 1564 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1565 goto err; 1566 } 1567 num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret)); 1568 } else { 1569 num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret)); 1570 } 1571 /* do the multiplication with generator precomputation */ 1572 batch_mul(x_out, y_out, z_out, 1573 (const felem_bytearray(*))secrets, num_points, 1574 g_secret, 1575 mixed, (const felem(*)[17][3])pre_comp, g_pre_comp); 1576 } else { 1577 /* do the multiplication without generator precomputation */ 1578 batch_mul(x_out, y_out, z_out, 1579 (const felem_bytearray(*))secrets, num_points, 1580 NULL, mixed, (const felem(*)[17][3])pre_comp, NULL); 1581 } 1582 /* reduce the output to its unique minimal representation */ 1583 felem_contract(x_in, x_out); 1584 felem_contract(y_in, y_out); 1585 felem_contract(z_in, z_out); 1586 if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) || 1587 (!felem_to_BN(z, z_in))) { 1588 ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB); 1589 goto err; 1590 } 1591 ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx); 1592 1593 err: 1594 BN_CTX_end(ctx); 1595 EC_POINT_free(generator); 1596 OPENSSL_free(secrets); 1597 OPENSSL_free(pre_comp); 1598 OPENSSL_free(tmp_felems); 1599 return ret; 1600 } 1601 1602 int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 1603 { 1604 int ret = 0; 1605 NISTP224_PRE_COMP *pre = NULL; 1606 int i, j; 1607 BN_CTX *new_ctx = NULL; 1608 BIGNUM *x, *y; 1609 EC_POINT *generator = NULL; 1610 felem tmp_felems[32]; 1611 1612 /* throw away old precomputation */ 1613 EC_pre_comp_free(group); 1614 if (ctx == NULL) 1615 if ((ctx = new_ctx = BN_CTX_new()) == NULL) 1616 return 0; 1617 BN_CTX_start(ctx); 1618 x = BN_CTX_get(ctx); 1619 y = BN_CTX_get(ctx); 1620 if (y == NULL) 1621 goto err; 1622 /* get the generator */ 1623 if (group->generator == NULL) 1624 goto err; 1625 generator = EC_POINT_new(group); 1626 if (generator == NULL) 1627 goto err; 1628 BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x); 1629 BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y); 1630 if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx)) 1631 goto err; 1632 if ((pre = nistp224_pre_comp_new()) == NULL) 1633 goto err; 1634 /* 1635 * if the generator is the standard one, use built-in precomputation 1636 */ 1637 if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) { 1638 memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp)); 1639 goto done; 1640 } 1641 if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) || 1642 (!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) || 1643 (!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z))) 1644 goto err; 1645 /* 1646 * compute 2^56*G, 2^112*G, 2^168*G for the first table, 2^28*G, 2^84*G, 1647 * 2^140*G, 2^196*G for the second one 1648 */ 1649 for (i = 1; i <= 8; i <<= 1) { 1650 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1651 pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0], 1652 pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]); 1653 for (j = 0; j < 27; ++j) { 1654 point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1], 1655 pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0], 1656 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1657 } 1658 if (i == 8) 1659 break; 1660 point_double(pre->g_pre_comp[0][2 * i][0], 1661 pre->g_pre_comp[0][2 * i][1], 1662 pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0], 1663 pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]); 1664 for (j = 0; j < 27; ++j) { 1665 point_double(pre->g_pre_comp[0][2 * i][0], 1666 pre->g_pre_comp[0][2 * i][1], 1667 pre->g_pre_comp[0][2 * i][2], 1668 pre->g_pre_comp[0][2 * i][0], 1669 pre->g_pre_comp[0][2 * i][1], 1670 pre->g_pre_comp[0][2 * i][2]); 1671 } 1672 } 1673 for (i = 0; i < 2; i++) { 1674 /* g_pre_comp[i][0] is the point at infinity */ 1675 memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0])); 1676 /* the remaining multiples */ 1677 /* 2^56*G + 2^112*G resp. 2^84*G + 2^140*G */ 1678 point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1], 1679 pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0], 1680 pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2], 1681 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1682 pre->g_pre_comp[i][2][2]); 1683 /* 2^56*G + 2^168*G resp. 2^84*G + 2^196*G */ 1684 point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1], 1685 pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0], 1686 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1687 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1688 pre->g_pre_comp[i][2][2]); 1689 /* 2^112*G + 2^168*G resp. 2^140*G + 2^196*G */ 1690 point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1], 1691 pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0], 1692 pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2], 1693 0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1], 1694 pre->g_pre_comp[i][4][2]); 1695 /* 1696 * 2^56*G + 2^112*G + 2^168*G resp. 2^84*G + 2^140*G + 2^196*G 1697 */ 1698 point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1], 1699 pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0], 1700 pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2], 1701 0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1], 1702 pre->g_pre_comp[i][2][2]); 1703 for (j = 1; j < 8; ++j) { 1704 /* odd multiples: add G resp. 2^28*G */ 1705 point_add(pre->g_pre_comp[i][2 * j + 1][0], 1706 pre->g_pre_comp[i][2 * j + 1][1], 1707 pre->g_pre_comp[i][2 * j + 1][2], 1708 pre->g_pre_comp[i][2 * j][0], 1709 pre->g_pre_comp[i][2 * j][1], 1710 pre->g_pre_comp[i][2 * j][2], 0, 1711 pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1], 1712 pre->g_pre_comp[i][1][2]); 1713 } 1714 } 1715 make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems); 1716 1717 done: 1718 SETPRECOMP(group, nistp224, pre); 1719 pre = NULL; 1720 ret = 1; 1721 err: 1722 BN_CTX_end(ctx); 1723 EC_POINT_free(generator); 1724 BN_CTX_free(new_ctx); 1725 EC_nistp224_pre_comp_free(pre); 1726 return ret; 1727 } 1728 1729 int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group) 1730 { 1731 return HAVEPRECOMP(group, nistp224); 1732 } 1733 1734 #endif 1735