1ebfedea0SLionel Sambuc /* crypto/ec/ecp_nistp521.c */
2ebfedea0SLionel Sambuc /*
3ebfedea0SLionel Sambuc  * Written by Adam Langley (Google) for the OpenSSL project
4ebfedea0SLionel Sambuc  */
5ebfedea0SLionel Sambuc /* Copyright 2011 Google Inc.
6ebfedea0SLionel Sambuc  *
7ebfedea0SLionel Sambuc  * Licensed under the Apache License, Version 2.0 (the "License");
8ebfedea0SLionel Sambuc  *
9ebfedea0SLionel Sambuc  * you may not use this file except in compliance with the License.
10ebfedea0SLionel Sambuc  * You may obtain a copy of the License at
11ebfedea0SLionel Sambuc  *
12ebfedea0SLionel Sambuc  *     http://www.apache.org/licenses/LICENSE-2.0
13ebfedea0SLionel Sambuc  *
14ebfedea0SLionel Sambuc  *  Unless required by applicable law or agreed to in writing, software
15ebfedea0SLionel Sambuc  *  distributed under the License is distributed on an "AS IS" BASIS,
16ebfedea0SLionel Sambuc  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17ebfedea0SLionel Sambuc  *  See the License for the specific language governing permissions and
18ebfedea0SLionel Sambuc  *  limitations under the License.
19ebfedea0SLionel Sambuc  */
20ebfedea0SLionel Sambuc 
21ebfedea0SLionel Sambuc /*
22ebfedea0SLionel Sambuc  * A 64-bit implementation of the NIST P-521 elliptic curve point multiplication
23ebfedea0SLionel Sambuc  *
24ebfedea0SLionel Sambuc  * OpenSSL integration was taken from Emilia Kasper's work in ecp_nistp224.c.
25ebfedea0SLionel Sambuc  * Otherwise based on Emilia's P224 work, which was inspired by my curve25519
26ebfedea0SLionel Sambuc  * work which got its smarts from Daniel J. Bernstein's work on the same.
27ebfedea0SLionel Sambuc  */
28ebfedea0SLionel Sambuc 
29ebfedea0SLionel Sambuc #include <openssl/opensslconf.h>
30ebfedea0SLionel Sambuc #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128
31ebfedea0SLionel Sambuc 
32ebfedea0SLionel Sambuc # ifndef OPENSSL_SYS_VMS
33ebfedea0SLionel Sambuc #  include <stdint.h>
34ebfedea0SLionel Sambuc # else
35ebfedea0SLionel Sambuc #  include <inttypes.h>
36ebfedea0SLionel Sambuc # endif
37ebfedea0SLionel Sambuc 
38ebfedea0SLionel Sambuc # include <string.h>
39ebfedea0SLionel Sambuc # include <openssl/err.h>
40ebfedea0SLionel Sambuc # include "ec_lcl.h"
41ebfedea0SLionel Sambuc 
42ebfedea0SLionel Sambuc # if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
43ebfedea0SLionel Sambuc   /* even with gcc, the typedef won't work for 32-bit platforms */
44*0a6a1f1dSLionel Sambuc typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
45*0a6a1f1dSLionel Sambuc                                  * platforms */
46ebfedea0SLionel Sambuc # else
47ebfedea0SLionel Sambuc #  error "Need GCC 3.1 or later to define type uint128_t"
48ebfedea0SLionel Sambuc # endif
49ebfedea0SLionel Sambuc 
50ebfedea0SLionel Sambuc typedef uint8_t u8;
51ebfedea0SLionel Sambuc typedef uint64_t u64;
52ebfedea0SLionel Sambuc typedef int64_t s64;
53ebfedea0SLionel Sambuc 
54*0a6a1f1dSLionel Sambuc /*
55*0a6a1f1dSLionel Sambuc  * The underlying field. P521 operates over GF(2^521-1). We can serialise an
56*0a6a1f1dSLionel Sambuc  * element of this field into 66 bytes where the most significant byte
57*0a6a1f1dSLionel Sambuc  * contains only a single bit. We call this an felem_bytearray.
58*0a6a1f1dSLionel Sambuc  */
59ebfedea0SLionel Sambuc 
60ebfedea0SLionel Sambuc typedef u8 felem_bytearray[66];
61ebfedea0SLionel Sambuc 
62*0a6a1f1dSLionel Sambuc /*
63*0a6a1f1dSLionel Sambuc  * These are the parameters of P521, taken from FIPS 186-3, section D.1.2.5.
64*0a6a1f1dSLionel Sambuc  * These values are big-endian.
65*0a6a1f1dSLionel Sambuc  */
66*0a6a1f1dSLionel Sambuc static const felem_bytearray nistp521_curve_params[5] = {
67ebfedea0SLionel Sambuc     {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* p */
68ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
69ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
70ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
71ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
72ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
73ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
74ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
75ebfedea0SLionel Sambuc      0xff, 0xff},
76ebfedea0SLionel Sambuc     {0x01, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* a = -3 */
77ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
78ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
79ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
80ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
81ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
82ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
83ebfedea0SLionel Sambuc      0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
84ebfedea0SLionel Sambuc      0xff, 0xfc},
85ebfedea0SLionel Sambuc     {0x00, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, /* b */
86ebfedea0SLionel Sambuc      0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85,
87ebfedea0SLionel Sambuc      0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3,
88ebfedea0SLionel Sambuc      0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1,
89ebfedea0SLionel Sambuc      0x09, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e,
90ebfedea0SLionel Sambuc      0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1,
91ebfedea0SLionel Sambuc      0xbf, 0x07, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c,
92ebfedea0SLionel Sambuc      0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50,
93ebfedea0SLionel Sambuc      0x3f, 0x00},
94ebfedea0SLionel Sambuc     {0x00, 0xc6, 0x85, 0x8e, 0x06, 0xb7, 0x04, 0x04, /* x */
95ebfedea0SLionel Sambuc      0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95,
96ebfedea0SLionel Sambuc      0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x05, 0x3f,
97ebfedea0SLionel Sambuc      0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d,
98ebfedea0SLionel Sambuc      0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7,
99ebfedea0SLionel Sambuc      0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff,
100ebfedea0SLionel Sambuc      0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a,
101ebfedea0SLionel Sambuc      0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5,
102ebfedea0SLionel Sambuc      0xbd, 0x66},
103ebfedea0SLionel Sambuc     {0x01, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, /* y */
104ebfedea0SLionel Sambuc      0xc0, 0x04, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d,
105ebfedea0SLionel Sambuc      0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b,
106ebfedea0SLionel Sambuc      0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e,
107ebfedea0SLionel Sambuc      0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4,
108ebfedea0SLionel Sambuc      0x26, 0x40, 0xc5, 0x50, 0xb9, 0x01, 0x3f, 0xad,
109ebfedea0SLionel Sambuc      0x07, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72,
110ebfedea0SLionel Sambuc      0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1,
111ebfedea0SLionel Sambuc      0x66, 0x50}
112ebfedea0SLionel Sambuc };
113ebfedea0SLionel Sambuc 
114*0a6a1f1dSLionel Sambuc /*-
115*0a6a1f1dSLionel Sambuc  * The representation of field elements.
116ebfedea0SLionel Sambuc  * ------------------------------------
117ebfedea0SLionel Sambuc  *
118ebfedea0SLionel Sambuc  * We represent field elements with nine values. These values are either 64 or
119ebfedea0SLionel Sambuc  * 128 bits and the field element represented is:
120ebfedea0SLionel Sambuc  *   v[0]*2^0 + v[1]*2^58 + v[2]*2^116 + ... + v[8]*2^464  (mod p)
121ebfedea0SLionel Sambuc  * Each of the nine values is called a 'limb'. Since the limbs are spaced only
122ebfedea0SLionel Sambuc  * 58 bits apart, but are greater than 58 bits in length, the most significant
123ebfedea0SLionel Sambuc  * bits of each limb overlap with the least significant bits of the next.
124ebfedea0SLionel Sambuc  *
125ebfedea0SLionel Sambuc  * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
126ebfedea0SLionel Sambuc  * 'largefelem' */
127ebfedea0SLionel Sambuc 
128ebfedea0SLionel Sambuc # define NLIMBS 9
129ebfedea0SLionel Sambuc 
130ebfedea0SLionel Sambuc typedef uint64_t limb;
131ebfedea0SLionel Sambuc typedef limb felem[NLIMBS];
132ebfedea0SLionel Sambuc typedef uint128_t largefelem[NLIMBS];
133ebfedea0SLionel Sambuc 
134ebfedea0SLionel Sambuc static const limb bottom57bits = 0x1ffffffffffffff;
135ebfedea0SLionel Sambuc static const limb bottom58bits = 0x3ffffffffffffff;
136ebfedea0SLionel Sambuc 
137*0a6a1f1dSLionel Sambuc /*
138*0a6a1f1dSLionel Sambuc  * bin66_to_felem takes a little-endian byte array and converts it into felem
139*0a6a1f1dSLionel Sambuc  * form. This assumes that the CPU is little-endian.
140*0a6a1f1dSLionel Sambuc  */
bin66_to_felem(felem out,const u8 in[66])141ebfedea0SLionel Sambuc static void bin66_to_felem(felem out, const u8 in[66])
142ebfedea0SLionel Sambuc {
143ebfedea0SLionel Sambuc     out[0] = (*((limb *) & in[0])) & bottom58bits;
144ebfedea0SLionel Sambuc     out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits;
145ebfedea0SLionel Sambuc     out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits;
146ebfedea0SLionel Sambuc     out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits;
147ebfedea0SLionel Sambuc     out[4] = (*((limb *) & in[29])) & bottom58bits;
148ebfedea0SLionel Sambuc     out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits;
149ebfedea0SLionel Sambuc     out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits;
150ebfedea0SLionel Sambuc     out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits;
151ebfedea0SLionel Sambuc     out[8] = (*((limb *) & in[58])) & bottom57bits;
152ebfedea0SLionel Sambuc }
153ebfedea0SLionel Sambuc 
154*0a6a1f1dSLionel Sambuc /*
155*0a6a1f1dSLionel Sambuc  * felem_to_bin66 takes an felem and serialises into a little endian, 66 byte
156*0a6a1f1dSLionel Sambuc  * array. This assumes that the CPU is little-endian.
157*0a6a1f1dSLionel Sambuc  */
felem_to_bin66(u8 out[66],const felem in)158ebfedea0SLionel Sambuc static void felem_to_bin66(u8 out[66], const felem in)
159ebfedea0SLionel Sambuc {
160ebfedea0SLionel Sambuc     memset(out, 0, 66);
161ebfedea0SLionel Sambuc     (*((limb *) & out[0])) = in[0];
162ebfedea0SLionel Sambuc     (*((limb *) & out[7])) |= in[1] << 2;
163ebfedea0SLionel Sambuc     (*((limb *) & out[14])) |= in[2] << 4;
164ebfedea0SLionel Sambuc     (*((limb *) & out[21])) |= in[3] << 6;
165ebfedea0SLionel Sambuc     (*((limb *) & out[29])) = in[4];
166ebfedea0SLionel Sambuc     (*((limb *) & out[36])) |= in[5] << 2;
167ebfedea0SLionel Sambuc     (*((limb *) & out[43])) |= in[6] << 4;
168ebfedea0SLionel Sambuc     (*((limb *) & out[50])) |= in[7] << 6;
169ebfedea0SLionel Sambuc     (*((limb *) & out[58])) = in[8];
170ebfedea0SLionel Sambuc }
171ebfedea0SLionel Sambuc 
172ebfedea0SLionel Sambuc /* To preserve endianness when using BN_bn2bin and BN_bin2bn */
flip_endian(u8 * out,const u8 * in,unsigned len)173ebfedea0SLionel Sambuc static void flip_endian(u8 *out, const u8 *in, unsigned len)
174ebfedea0SLionel Sambuc {
175ebfedea0SLionel Sambuc     unsigned i;
176ebfedea0SLionel Sambuc     for (i = 0; i < len; ++i)
177ebfedea0SLionel Sambuc         out[i] = in[len - 1 - i];
178ebfedea0SLionel Sambuc }
179ebfedea0SLionel Sambuc 
180ebfedea0SLionel Sambuc /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
BN_to_felem(felem out,const BIGNUM * bn)181ebfedea0SLionel Sambuc static int BN_to_felem(felem out, const BIGNUM *bn)
182ebfedea0SLionel Sambuc {
183ebfedea0SLionel Sambuc     felem_bytearray b_in;
184ebfedea0SLionel Sambuc     felem_bytearray b_out;
185ebfedea0SLionel Sambuc     unsigned num_bytes;
186ebfedea0SLionel Sambuc 
187ebfedea0SLionel Sambuc     /* BN_bn2bin eats leading zeroes */
188ebfedea0SLionel Sambuc     memset(b_out, 0, sizeof b_out);
189ebfedea0SLionel Sambuc     num_bytes = BN_num_bytes(bn);
190*0a6a1f1dSLionel Sambuc     if (num_bytes > sizeof b_out) {
191ebfedea0SLionel Sambuc         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
192ebfedea0SLionel Sambuc         return 0;
193ebfedea0SLionel Sambuc     }
194*0a6a1f1dSLionel Sambuc     if (BN_is_negative(bn)) {
195ebfedea0SLionel Sambuc         ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
196ebfedea0SLionel Sambuc         return 0;
197ebfedea0SLionel Sambuc     }
198ebfedea0SLionel Sambuc     num_bytes = BN_bn2bin(bn, b_in);
199ebfedea0SLionel Sambuc     flip_endian(b_out, b_in, num_bytes);
200ebfedea0SLionel Sambuc     bin66_to_felem(out, b_out);
201ebfedea0SLionel Sambuc     return 1;
202ebfedea0SLionel Sambuc }
203ebfedea0SLionel Sambuc 
204ebfedea0SLionel Sambuc /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
felem_to_BN(BIGNUM * out,const felem in)205ebfedea0SLionel Sambuc static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
206ebfedea0SLionel Sambuc {
207ebfedea0SLionel Sambuc     felem_bytearray b_in, b_out;
208ebfedea0SLionel Sambuc     felem_to_bin66(b_in, in);
209ebfedea0SLionel Sambuc     flip_endian(b_out, b_in, sizeof b_out);
210ebfedea0SLionel Sambuc     return BN_bin2bn(b_out, sizeof b_out, out);
211ebfedea0SLionel Sambuc }
212ebfedea0SLionel Sambuc 
213*0a6a1f1dSLionel Sambuc /*-
214*0a6a1f1dSLionel Sambuc  * Field operations
215*0a6a1f1dSLionel Sambuc  * ----------------
216*0a6a1f1dSLionel Sambuc  */
217ebfedea0SLionel Sambuc 
felem_one(felem out)218ebfedea0SLionel Sambuc static void felem_one(felem out)
219ebfedea0SLionel Sambuc {
220ebfedea0SLionel Sambuc     out[0] = 1;
221ebfedea0SLionel Sambuc     out[1] = 0;
222ebfedea0SLionel Sambuc     out[2] = 0;
223ebfedea0SLionel Sambuc     out[3] = 0;
224ebfedea0SLionel Sambuc     out[4] = 0;
225ebfedea0SLionel Sambuc     out[5] = 0;
226ebfedea0SLionel Sambuc     out[6] = 0;
227ebfedea0SLionel Sambuc     out[7] = 0;
228ebfedea0SLionel Sambuc     out[8] = 0;
229ebfedea0SLionel Sambuc }
230ebfedea0SLionel Sambuc 
felem_assign(felem out,const felem in)231ebfedea0SLionel Sambuc static void felem_assign(felem out, const felem in)
232ebfedea0SLionel Sambuc {
233ebfedea0SLionel Sambuc     out[0] = in[0];
234ebfedea0SLionel Sambuc     out[1] = in[1];
235ebfedea0SLionel Sambuc     out[2] = in[2];
236ebfedea0SLionel Sambuc     out[3] = in[3];
237ebfedea0SLionel Sambuc     out[4] = in[4];
238ebfedea0SLionel Sambuc     out[5] = in[5];
239ebfedea0SLionel Sambuc     out[6] = in[6];
240ebfedea0SLionel Sambuc     out[7] = in[7];
241ebfedea0SLionel Sambuc     out[8] = in[8];
242ebfedea0SLionel Sambuc }
243ebfedea0SLionel Sambuc 
244ebfedea0SLionel Sambuc /* felem_sum64 sets out = out + in. */
felem_sum64(felem out,const felem in)245ebfedea0SLionel Sambuc static void felem_sum64(felem out, const felem in)
246ebfedea0SLionel Sambuc {
247ebfedea0SLionel Sambuc     out[0] += in[0];
248ebfedea0SLionel Sambuc     out[1] += in[1];
249ebfedea0SLionel Sambuc     out[2] += in[2];
250ebfedea0SLionel Sambuc     out[3] += in[3];
251ebfedea0SLionel Sambuc     out[4] += in[4];
252ebfedea0SLionel Sambuc     out[5] += in[5];
253ebfedea0SLionel Sambuc     out[6] += in[6];
254ebfedea0SLionel Sambuc     out[7] += in[7];
255ebfedea0SLionel Sambuc     out[8] += in[8];
256ebfedea0SLionel Sambuc }
257ebfedea0SLionel Sambuc 
258ebfedea0SLionel Sambuc /* felem_scalar sets out = in * scalar */
felem_scalar(felem out,const felem in,limb scalar)259ebfedea0SLionel Sambuc static void felem_scalar(felem out, const felem in, limb scalar)
260ebfedea0SLionel Sambuc {
261ebfedea0SLionel Sambuc     out[0] = in[0] * scalar;
262ebfedea0SLionel Sambuc     out[1] = in[1] * scalar;
263ebfedea0SLionel Sambuc     out[2] = in[2] * scalar;
264ebfedea0SLionel Sambuc     out[3] = in[3] * scalar;
265ebfedea0SLionel Sambuc     out[4] = in[4] * scalar;
266ebfedea0SLionel Sambuc     out[5] = in[5] * scalar;
267ebfedea0SLionel Sambuc     out[6] = in[6] * scalar;
268ebfedea0SLionel Sambuc     out[7] = in[7] * scalar;
269ebfedea0SLionel Sambuc     out[8] = in[8] * scalar;
270ebfedea0SLionel Sambuc }
271ebfedea0SLionel Sambuc 
272ebfedea0SLionel Sambuc /* felem_scalar64 sets out = out * scalar */
felem_scalar64(felem out,limb scalar)273ebfedea0SLionel Sambuc static void felem_scalar64(felem out, limb scalar)
274ebfedea0SLionel Sambuc {
275ebfedea0SLionel Sambuc     out[0] *= scalar;
276ebfedea0SLionel Sambuc     out[1] *= scalar;
277ebfedea0SLionel Sambuc     out[2] *= scalar;
278ebfedea0SLionel Sambuc     out[3] *= scalar;
279ebfedea0SLionel Sambuc     out[4] *= scalar;
280ebfedea0SLionel Sambuc     out[5] *= scalar;
281ebfedea0SLionel Sambuc     out[6] *= scalar;
282ebfedea0SLionel Sambuc     out[7] *= scalar;
283ebfedea0SLionel Sambuc     out[8] *= scalar;
284ebfedea0SLionel Sambuc }
285ebfedea0SLionel Sambuc 
286ebfedea0SLionel Sambuc /* felem_scalar128 sets out = out * scalar */
felem_scalar128(largefelem out,limb scalar)287ebfedea0SLionel Sambuc static void felem_scalar128(largefelem out, limb scalar)
288ebfedea0SLionel Sambuc {
289ebfedea0SLionel Sambuc     out[0] *= scalar;
290ebfedea0SLionel Sambuc     out[1] *= scalar;
291ebfedea0SLionel Sambuc     out[2] *= scalar;
292ebfedea0SLionel Sambuc     out[3] *= scalar;
293ebfedea0SLionel Sambuc     out[4] *= scalar;
294ebfedea0SLionel Sambuc     out[5] *= scalar;
295ebfedea0SLionel Sambuc     out[6] *= scalar;
296ebfedea0SLionel Sambuc     out[7] *= scalar;
297ebfedea0SLionel Sambuc     out[8] *= scalar;
298ebfedea0SLionel Sambuc }
299ebfedea0SLionel Sambuc 
300*0a6a1f1dSLionel Sambuc /*-
301*0a6a1f1dSLionel Sambuc  * felem_neg sets |out| to |-in|
302ebfedea0SLionel Sambuc  * On entry:
303ebfedea0SLionel Sambuc  *   in[i] < 2^59 + 2^14
304ebfedea0SLionel Sambuc  * On exit:
305ebfedea0SLionel Sambuc  *   out[i] < 2^62
306ebfedea0SLionel Sambuc  */
felem_neg(felem out,const felem in)307ebfedea0SLionel Sambuc static void felem_neg(felem out, const felem in)
308ebfedea0SLionel Sambuc {
309ebfedea0SLionel Sambuc     /* In order to prevent underflow, we subtract from 0 mod p. */
310ebfedea0SLionel Sambuc     static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
311ebfedea0SLionel Sambuc     static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
312ebfedea0SLionel Sambuc 
313ebfedea0SLionel Sambuc     out[0] = two62m3 - in[0];
314ebfedea0SLionel Sambuc     out[1] = two62m2 - in[1];
315ebfedea0SLionel Sambuc     out[2] = two62m2 - in[2];
316ebfedea0SLionel Sambuc     out[3] = two62m2 - in[3];
317ebfedea0SLionel Sambuc     out[4] = two62m2 - in[4];
318ebfedea0SLionel Sambuc     out[5] = two62m2 - in[5];
319ebfedea0SLionel Sambuc     out[6] = two62m2 - in[6];
320ebfedea0SLionel Sambuc     out[7] = two62m2 - in[7];
321ebfedea0SLionel Sambuc     out[8] = two62m2 - in[8];
322ebfedea0SLionel Sambuc }
323ebfedea0SLionel Sambuc 
324*0a6a1f1dSLionel Sambuc /*-
325*0a6a1f1dSLionel Sambuc  * felem_diff64 subtracts |in| from |out|
326ebfedea0SLionel Sambuc  * On entry:
327ebfedea0SLionel Sambuc  *   in[i] < 2^59 + 2^14
328ebfedea0SLionel Sambuc  * On exit:
329ebfedea0SLionel Sambuc  *   out[i] < out[i] + 2^62
330ebfedea0SLionel Sambuc  */
felem_diff64(felem out,const felem in)331ebfedea0SLionel Sambuc static void felem_diff64(felem out, const felem in)
332ebfedea0SLionel Sambuc {
333*0a6a1f1dSLionel Sambuc     /*
334*0a6a1f1dSLionel Sambuc      * In order to prevent underflow, we add 0 mod p before subtracting.
335*0a6a1f1dSLionel Sambuc      */
336ebfedea0SLionel Sambuc     static const limb two62m3 = (((limb) 1) << 62) - (((limb) 1) << 5);
337ebfedea0SLionel Sambuc     static const limb two62m2 = (((limb) 1) << 62) - (((limb) 1) << 4);
338ebfedea0SLionel Sambuc 
339ebfedea0SLionel Sambuc     out[0] += two62m3 - in[0];
340ebfedea0SLionel Sambuc     out[1] += two62m2 - in[1];
341ebfedea0SLionel Sambuc     out[2] += two62m2 - in[2];
342ebfedea0SLionel Sambuc     out[3] += two62m2 - in[3];
343ebfedea0SLionel Sambuc     out[4] += two62m2 - in[4];
344ebfedea0SLionel Sambuc     out[5] += two62m2 - in[5];
345ebfedea0SLionel Sambuc     out[6] += two62m2 - in[6];
346ebfedea0SLionel Sambuc     out[7] += two62m2 - in[7];
347ebfedea0SLionel Sambuc     out[8] += two62m2 - in[8];
348ebfedea0SLionel Sambuc }
349ebfedea0SLionel Sambuc 
350*0a6a1f1dSLionel Sambuc /*-
351*0a6a1f1dSLionel Sambuc  * felem_diff_128_64 subtracts |in| from |out|
352ebfedea0SLionel Sambuc  * On entry:
353ebfedea0SLionel Sambuc  *   in[i] < 2^62 + 2^17
354ebfedea0SLionel Sambuc  * On exit:
355ebfedea0SLionel Sambuc  *   out[i] < out[i] + 2^63
356ebfedea0SLionel Sambuc  */
felem_diff_128_64(largefelem out,const felem in)357ebfedea0SLionel Sambuc static void felem_diff_128_64(largefelem out, const felem in)
358ebfedea0SLionel Sambuc {
359*0a6a1f1dSLionel Sambuc     /*
360*0a6a1f1dSLionel Sambuc      * In order to prevent underflow, we add 0 mod p before subtracting.
361*0a6a1f1dSLionel Sambuc      */
362ebfedea0SLionel Sambuc     static const limb two63m6 = (((limb) 1) << 62) - (((limb) 1) << 5);
363ebfedea0SLionel Sambuc     static const limb two63m5 = (((limb) 1) << 62) - (((limb) 1) << 4);
364ebfedea0SLionel Sambuc 
365ebfedea0SLionel Sambuc     out[0] += two63m6 - in[0];
366ebfedea0SLionel Sambuc     out[1] += two63m5 - in[1];
367ebfedea0SLionel Sambuc     out[2] += two63m5 - in[2];
368ebfedea0SLionel Sambuc     out[3] += two63m5 - in[3];
369ebfedea0SLionel Sambuc     out[4] += two63m5 - in[4];
370ebfedea0SLionel Sambuc     out[5] += two63m5 - in[5];
371ebfedea0SLionel Sambuc     out[6] += two63m5 - in[6];
372ebfedea0SLionel Sambuc     out[7] += two63m5 - in[7];
373ebfedea0SLionel Sambuc     out[8] += two63m5 - in[8];
374ebfedea0SLionel Sambuc }
375ebfedea0SLionel Sambuc 
376*0a6a1f1dSLionel Sambuc /*-
377*0a6a1f1dSLionel Sambuc  * felem_diff_128_64 subtracts |in| from |out|
378ebfedea0SLionel Sambuc  * On entry:
379ebfedea0SLionel Sambuc  *   in[i] < 2^126
380ebfedea0SLionel Sambuc  * On exit:
381ebfedea0SLionel Sambuc  *   out[i] < out[i] + 2^127 - 2^69
382ebfedea0SLionel Sambuc  */
felem_diff128(largefelem out,const largefelem in)383ebfedea0SLionel Sambuc static void felem_diff128(largefelem out, const largefelem in)
384ebfedea0SLionel Sambuc {
385*0a6a1f1dSLionel Sambuc     /*
386*0a6a1f1dSLionel Sambuc      * In order to prevent underflow, we add 0 mod p before subtracting.
387*0a6a1f1dSLionel Sambuc      */
388*0a6a1f1dSLionel Sambuc     static const uint128_t two127m70 =
389*0a6a1f1dSLionel Sambuc         (((uint128_t) 1) << 127) - (((uint128_t) 1) << 70);
390*0a6a1f1dSLionel Sambuc     static const uint128_t two127m69 =
391*0a6a1f1dSLionel Sambuc         (((uint128_t) 1) << 127) - (((uint128_t) 1) << 69);
392ebfedea0SLionel Sambuc 
393ebfedea0SLionel Sambuc     out[0] += (two127m70 - in[0]);
394ebfedea0SLionel Sambuc     out[1] += (two127m69 - in[1]);
395ebfedea0SLionel Sambuc     out[2] += (two127m69 - in[2]);
396ebfedea0SLionel Sambuc     out[3] += (two127m69 - in[3]);
397ebfedea0SLionel Sambuc     out[4] += (two127m69 - in[4]);
398ebfedea0SLionel Sambuc     out[5] += (two127m69 - in[5]);
399ebfedea0SLionel Sambuc     out[6] += (two127m69 - in[6]);
400ebfedea0SLionel Sambuc     out[7] += (two127m69 - in[7]);
401ebfedea0SLionel Sambuc     out[8] += (two127m69 - in[8]);
402ebfedea0SLionel Sambuc }
403ebfedea0SLionel Sambuc 
404*0a6a1f1dSLionel Sambuc /*-
405*0a6a1f1dSLionel Sambuc  * felem_square sets |out| = |in|^2
406ebfedea0SLionel Sambuc  * On entry:
407ebfedea0SLionel Sambuc  *   in[i] < 2^62
408ebfedea0SLionel Sambuc  * On exit:
409ebfedea0SLionel Sambuc  *   out[i] < 17 * max(in[i]) * max(in[i])
410ebfedea0SLionel Sambuc  */
felem_square(largefelem out,const felem in)411ebfedea0SLionel Sambuc static void felem_square(largefelem out, const felem in)
412ebfedea0SLionel Sambuc {
413ebfedea0SLionel Sambuc     felem inx2, inx4;
414ebfedea0SLionel Sambuc     felem_scalar(inx2, in, 2);
415ebfedea0SLionel Sambuc     felem_scalar(inx4, in, 4);
416ebfedea0SLionel Sambuc 
417*0a6a1f1dSLionel Sambuc     /*-
418*0a6a1f1dSLionel Sambuc      * We have many cases were we want to do
419ebfedea0SLionel Sambuc      *   in[x] * in[y] +
420ebfedea0SLionel Sambuc      *   in[y] * in[x]
421ebfedea0SLionel Sambuc      * This is obviously just
422ebfedea0SLionel Sambuc      *   2 * in[x] * in[y]
423ebfedea0SLionel Sambuc      * However, rather than do the doubling on the 128 bit result, we
424ebfedea0SLionel Sambuc      * double one of the inputs to the multiplication by reading from
425*0a6a1f1dSLionel Sambuc      * |inx2|
426*0a6a1f1dSLionel Sambuc      */
427ebfedea0SLionel Sambuc 
428ebfedea0SLionel Sambuc     out[0] = ((uint128_t) in[0]) * in[0];
429ebfedea0SLionel Sambuc     out[1] = ((uint128_t) in[0]) * inx2[1];
430*0a6a1f1dSLionel Sambuc     out[2] = ((uint128_t) in[0]) * inx2[2] + ((uint128_t) in[1]) * in[1];
431*0a6a1f1dSLionel Sambuc     out[3] = ((uint128_t) in[0]) * inx2[3] + ((uint128_t) in[1]) * inx2[2];
432ebfedea0SLionel Sambuc     out[4] = ((uint128_t) in[0]) * inx2[4] +
433*0a6a1f1dSLionel Sambuc         ((uint128_t) in[1]) * inx2[3] + ((uint128_t) in[2]) * in[2];
434ebfedea0SLionel Sambuc     out[5] = ((uint128_t) in[0]) * inx2[5] +
435*0a6a1f1dSLionel Sambuc         ((uint128_t) in[1]) * inx2[4] + ((uint128_t) in[2]) * inx2[3];
436ebfedea0SLionel Sambuc     out[6] = ((uint128_t) in[0]) * inx2[6] +
437ebfedea0SLionel Sambuc         ((uint128_t) in[1]) * inx2[5] +
438*0a6a1f1dSLionel Sambuc         ((uint128_t) in[2]) * inx2[4] + ((uint128_t) in[3]) * in[3];
439ebfedea0SLionel Sambuc     out[7] = ((uint128_t) in[0]) * inx2[7] +
440ebfedea0SLionel Sambuc         ((uint128_t) in[1]) * inx2[6] +
441*0a6a1f1dSLionel Sambuc         ((uint128_t) in[2]) * inx2[5] + ((uint128_t) in[3]) * inx2[4];
442ebfedea0SLionel Sambuc     out[8] = ((uint128_t) in[0]) * inx2[8] +
443ebfedea0SLionel Sambuc         ((uint128_t) in[1]) * inx2[7] +
444ebfedea0SLionel Sambuc         ((uint128_t) in[2]) * inx2[6] +
445*0a6a1f1dSLionel Sambuc         ((uint128_t) in[3]) * inx2[5] + ((uint128_t) in[4]) * in[4];
446ebfedea0SLionel Sambuc 
447*0a6a1f1dSLionel Sambuc     /*
448*0a6a1f1dSLionel Sambuc      * The remaining limbs fall above 2^521, with the first falling at 2^522.
449*0a6a1f1dSLionel Sambuc      * They correspond to locations one bit up from the limbs produced above
450*0a6a1f1dSLionel Sambuc      * so we would have to multiply by two to align them. Again, rather than
451*0a6a1f1dSLionel Sambuc      * operate on the 128-bit result, we double one of the inputs to the
452*0a6a1f1dSLionel Sambuc      * multiplication. If we want to double for both this reason, and the
453*0a6a1f1dSLionel Sambuc      * reason above, then we end up multiplying by four.
454*0a6a1f1dSLionel Sambuc      */
455ebfedea0SLionel Sambuc 
456ebfedea0SLionel Sambuc     /* 9 */
457ebfedea0SLionel Sambuc     out[0] += ((uint128_t) in[1]) * inx4[8] +
458ebfedea0SLionel Sambuc         ((uint128_t) in[2]) * inx4[7] +
459*0a6a1f1dSLionel Sambuc         ((uint128_t) in[3]) * inx4[6] + ((uint128_t) in[4]) * inx4[5];
460ebfedea0SLionel Sambuc 
461ebfedea0SLionel Sambuc     /* 10 */
462ebfedea0SLionel Sambuc     out[1] += ((uint128_t) in[2]) * inx4[8] +
463ebfedea0SLionel Sambuc         ((uint128_t) in[3]) * inx4[7] +
464*0a6a1f1dSLionel Sambuc         ((uint128_t) in[4]) * inx4[6] + ((uint128_t) in[5]) * inx2[5];
465ebfedea0SLionel Sambuc 
466ebfedea0SLionel Sambuc     /* 11 */
467ebfedea0SLionel Sambuc     out[2] += ((uint128_t) in[3]) * inx4[8] +
468*0a6a1f1dSLionel Sambuc         ((uint128_t) in[4]) * inx4[7] + ((uint128_t) in[5]) * inx4[6];
469ebfedea0SLionel Sambuc 
470ebfedea0SLionel Sambuc     /* 12 */
471ebfedea0SLionel Sambuc     out[3] += ((uint128_t) in[4]) * inx4[8] +
472*0a6a1f1dSLionel Sambuc         ((uint128_t) in[5]) * inx4[7] + ((uint128_t) in[6]) * inx2[6];
473ebfedea0SLionel Sambuc 
474ebfedea0SLionel Sambuc     /* 13 */
475*0a6a1f1dSLionel Sambuc     out[4] += ((uint128_t) in[5]) * inx4[8] + ((uint128_t) in[6]) * inx4[7];
476ebfedea0SLionel Sambuc 
477ebfedea0SLionel Sambuc     /* 14 */
478*0a6a1f1dSLionel Sambuc     out[5] += ((uint128_t) in[6]) * inx4[8] + ((uint128_t) in[7]) * inx2[7];
479ebfedea0SLionel Sambuc 
480ebfedea0SLionel Sambuc     /* 15 */
481ebfedea0SLionel Sambuc     out[6] += ((uint128_t) in[7]) * inx4[8];
482ebfedea0SLionel Sambuc 
483ebfedea0SLionel Sambuc     /* 16 */
484ebfedea0SLionel Sambuc     out[7] += ((uint128_t) in[8]) * inx2[8];
485ebfedea0SLionel Sambuc }
486ebfedea0SLionel Sambuc 
487*0a6a1f1dSLionel Sambuc /*-
488*0a6a1f1dSLionel Sambuc  * felem_mul sets |out| = |in1| * |in2|
489ebfedea0SLionel Sambuc  * On entry:
490ebfedea0SLionel Sambuc  *   in1[i] < 2^64
491ebfedea0SLionel Sambuc  *   in2[i] < 2^63
492ebfedea0SLionel Sambuc  * On exit:
493ebfedea0SLionel Sambuc  *   out[i] < 17 * max(in1[i]) * max(in2[i])
494ebfedea0SLionel Sambuc  */
felem_mul(largefelem out,const felem in1,const felem in2)495ebfedea0SLionel Sambuc static void felem_mul(largefelem out, const felem in1, const felem in2)
496ebfedea0SLionel Sambuc {
497ebfedea0SLionel Sambuc     felem in2x2;
498ebfedea0SLionel Sambuc     felem_scalar(in2x2, in2, 2);
499ebfedea0SLionel Sambuc 
500ebfedea0SLionel Sambuc     out[0] = ((uint128_t) in1[0]) * in2[0];
501ebfedea0SLionel Sambuc 
502*0a6a1f1dSLionel Sambuc     out[1] = ((uint128_t) in1[0]) * in2[1] + ((uint128_t) in1[1]) * in2[0];
503ebfedea0SLionel Sambuc 
504ebfedea0SLionel Sambuc     out[2] = ((uint128_t) in1[0]) * in2[2] +
505*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[1]) * in2[1] + ((uint128_t) in1[2]) * in2[0];
506ebfedea0SLionel Sambuc 
507ebfedea0SLionel Sambuc     out[3] = ((uint128_t) in1[0]) * in2[3] +
508ebfedea0SLionel Sambuc         ((uint128_t) in1[1]) * in2[2] +
509*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[2]) * in2[1] + ((uint128_t) in1[3]) * in2[0];
510ebfedea0SLionel Sambuc 
511ebfedea0SLionel Sambuc     out[4] = ((uint128_t) in1[0]) * in2[4] +
512ebfedea0SLionel Sambuc         ((uint128_t) in1[1]) * in2[3] +
513ebfedea0SLionel Sambuc         ((uint128_t) in1[2]) * in2[2] +
514*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[3]) * in2[1] + ((uint128_t) in1[4]) * in2[0];
515ebfedea0SLionel Sambuc 
516ebfedea0SLionel Sambuc     out[5] = ((uint128_t) in1[0]) * in2[5] +
517ebfedea0SLionel Sambuc         ((uint128_t) in1[1]) * in2[4] +
518ebfedea0SLionel Sambuc         ((uint128_t) in1[2]) * in2[3] +
519ebfedea0SLionel Sambuc         ((uint128_t) in1[3]) * in2[2] +
520*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[4]) * in2[1] + ((uint128_t) in1[5]) * in2[0];
521ebfedea0SLionel Sambuc 
522ebfedea0SLionel Sambuc     out[6] = ((uint128_t) in1[0]) * in2[6] +
523ebfedea0SLionel Sambuc         ((uint128_t) in1[1]) * in2[5] +
524ebfedea0SLionel Sambuc         ((uint128_t) in1[2]) * in2[4] +
525ebfedea0SLionel Sambuc         ((uint128_t) in1[3]) * in2[3] +
526ebfedea0SLionel Sambuc         ((uint128_t) in1[4]) * in2[2] +
527*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[5]) * in2[1] + ((uint128_t) in1[6]) * in2[0];
528ebfedea0SLionel Sambuc 
529ebfedea0SLionel Sambuc     out[7] = ((uint128_t) in1[0]) * in2[7] +
530ebfedea0SLionel Sambuc         ((uint128_t) in1[1]) * in2[6] +
531ebfedea0SLionel Sambuc         ((uint128_t) in1[2]) * in2[5] +
532ebfedea0SLionel Sambuc         ((uint128_t) in1[3]) * in2[4] +
533ebfedea0SLionel Sambuc         ((uint128_t) in1[4]) * in2[3] +
534ebfedea0SLionel Sambuc         ((uint128_t) in1[5]) * in2[2] +
535*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[6]) * in2[1] + ((uint128_t) in1[7]) * in2[0];
536ebfedea0SLionel Sambuc 
537ebfedea0SLionel Sambuc     out[8] = ((uint128_t) in1[0]) * in2[8] +
538ebfedea0SLionel Sambuc         ((uint128_t) in1[1]) * in2[7] +
539ebfedea0SLionel Sambuc         ((uint128_t) in1[2]) * in2[6] +
540ebfedea0SLionel Sambuc         ((uint128_t) in1[3]) * in2[5] +
541ebfedea0SLionel Sambuc         ((uint128_t) in1[4]) * in2[4] +
542ebfedea0SLionel Sambuc         ((uint128_t) in1[5]) * in2[3] +
543ebfedea0SLionel Sambuc         ((uint128_t) in1[6]) * in2[2] +
544*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[7]) * in2[1] + ((uint128_t) in1[8]) * in2[0];
545ebfedea0SLionel Sambuc 
546ebfedea0SLionel Sambuc     /* See comment in felem_square about the use of in2x2 here */
547ebfedea0SLionel Sambuc 
548ebfedea0SLionel Sambuc     out[0] += ((uint128_t) in1[1]) * in2x2[8] +
549ebfedea0SLionel Sambuc         ((uint128_t) in1[2]) * in2x2[7] +
550ebfedea0SLionel Sambuc         ((uint128_t) in1[3]) * in2x2[6] +
551ebfedea0SLionel Sambuc         ((uint128_t) in1[4]) * in2x2[5] +
552ebfedea0SLionel Sambuc         ((uint128_t) in1[5]) * in2x2[4] +
553ebfedea0SLionel Sambuc         ((uint128_t) in1[6]) * in2x2[3] +
554*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[7]) * in2x2[2] + ((uint128_t) in1[8]) * in2x2[1];
555ebfedea0SLionel Sambuc 
556ebfedea0SLionel Sambuc     out[1] += ((uint128_t) in1[2]) * in2x2[8] +
557ebfedea0SLionel Sambuc         ((uint128_t) in1[3]) * in2x2[7] +
558ebfedea0SLionel Sambuc         ((uint128_t) in1[4]) * in2x2[6] +
559ebfedea0SLionel Sambuc         ((uint128_t) in1[5]) * in2x2[5] +
560ebfedea0SLionel Sambuc         ((uint128_t) in1[6]) * in2x2[4] +
561*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[7]) * in2x2[3] + ((uint128_t) in1[8]) * in2x2[2];
562ebfedea0SLionel Sambuc 
563ebfedea0SLionel Sambuc     out[2] += ((uint128_t) in1[3]) * in2x2[8] +
564ebfedea0SLionel Sambuc         ((uint128_t) in1[4]) * in2x2[7] +
565ebfedea0SLionel Sambuc         ((uint128_t) in1[5]) * in2x2[6] +
566ebfedea0SLionel Sambuc         ((uint128_t) in1[6]) * in2x2[5] +
567*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[7]) * in2x2[4] + ((uint128_t) in1[8]) * in2x2[3];
568ebfedea0SLionel Sambuc 
569ebfedea0SLionel Sambuc     out[3] += ((uint128_t) in1[4]) * in2x2[8] +
570ebfedea0SLionel Sambuc         ((uint128_t) in1[5]) * in2x2[7] +
571ebfedea0SLionel Sambuc         ((uint128_t) in1[6]) * in2x2[6] +
572*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[7]) * in2x2[5] + ((uint128_t) in1[8]) * in2x2[4];
573ebfedea0SLionel Sambuc 
574ebfedea0SLionel Sambuc     out[4] += ((uint128_t) in1[5]) * in2x2[8] +
575ebfedea0SLionel Sambuc         ((uint128_t) in1[6]) * in2x2[7] +
576*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[7]) * in2x2[6] + ((uint128_t) in1[8]) * in2x2[5];
577ebfedea0SLionel Sambuc 
578ebfedea0SLionel Sambuc     out[5] += ((uint128_t) in1[6]) * in2x2[8] +
579*0a6a1f1dSLionel Sambuc         ((uint128_t) in1[7]) * in2x2[7] + ((uint128_t) in1[8]) * in2x2[6];
580ebfedea0SLionel Sambuc 
581ebfedea0SLionel Sambuc     out[6] += ((uint128_t) in1[7]) * in2x2[8] +
582ebfedea0SLionel Sambuc         ((uint128_t) in1[8]) * in2x2[7];
583ebfedea0SLionel Sambuc 
584ebfedea0SLionel Sambuc     out[7] += ((uint128_t) in1[8]) * in2x2[8];
585ebfedea0SLionel Sambuc }
586ebfedea0SLionel Sambuc 
587ebfedea0SLionel Sambuc static const limb bottom52bits = 0xfffffffffffff;
588ebfedea0SLionel Sambuc 
589*0a6a1f1dSLionel Sambuc /*-
590*0a6a1f1dSLionel Sambuc  * felem_reduce converts a largefelem to an felem.
591ebfedea0SLionel Sambuc  * On entry:
592ebfedea0SLionel Sambuc  *   in[i] < 2^128
593ebfedea0SLionel Sambuc  * On exit:
594ebfedea0SLionel Sambuc  *   out[i] < 2^59 + 2^14
595ebfedea0SLionel Sambuc  */
felem_reduce(felem out,const largefelem in)596ebfedea0SLionel Sambuc static void felem_reduce(felem out, const largefelem in)
597ebfedea0SLionel Sambuc {
598ebfedea0SLionel Sambuc     u64 overflow1, overflow2;
599ebfedea0SLionel Sambuc 
600ebfedea0SLionel Sambuc     out[0] = ((limb) in[0]) & bottom58bits;
601ebfedea0SLionel Sambuc     out[1] = ((limb) in[1]) & bottom58bits;
602ebfedea0SLionel Sambuc     out[2] = ((limb) in[2]) & bottom58bits;
603ebfedea0SLionel Sambuc     out[3] = ((limb) in[3]) & bottom58bits;
604ebfedea0SLionel Sambuc     out[4] = ((limb) in[4]) & bottom58bits;
605ebfedea0SLionel Sambuc     out[5] = ((limb) in[5]) & bottom58bits;
606ebfedea0SLionel Sambuc     out[6] = ((limb) in[6]) & bottom58bits;
607ebfedea0SLionel Sambuc     out[7] = ((limb) in[7]) & bottom58bits;
608ebfedea0SLionel Sambuc     out[8] = ((limb) in[8]) & bottom58bits;
609ebfedea0SLionel Sambuc 
610ebfedea0SLionel Sambuc     /* out[i] < 2^58 */
611ebfedea0SLionel Sambuc 
612ebfedea0SLionel Sambuc     out[1] += ((limb) in[0]) >> 58;
613ebfedea0SLionel Sambuc     out[1] += (((limb) (in[0] >> 64)) & bottom52bits) << 6;
614*0a6a1f1dSLionel Sambuc     /*-
615*0a6a1f1dSLionel Sambuc      * out[1] < 2^58 + 2^6 + 2^58
616*0a6a1f1dSLionel Sambuc      *        = 2^59 + 2^6
617*0a6a1f1dSLionel Sambuc      */
618ebfedea0SLionel Sambuc     out[2] += ((limb) (in[0] >> 64)) >> 52;
619ebfedea0SLionel Sambuc 
620ebfedea0SLionel Sambuc     out[2] += ((limb) in[1]) >> 58;
621ebfedea0SLionel Sambuc     out[2] += (((limb) (in[1] >> 64)) & bottom52bits) << 6;
622ebfedea0SLionel Sambuc     out[3] += ((limb) (in[1] >> 64)) >> 52;
623ebfedea0SLionel Sambuc 
624ebfedea0SLionel Sambuc     out[3] += ((limb) in[2]) >> 58;
625ebfedea0SLionel Sambuc     out[3] += (((limb) (in[2] >> 64)) & bottom52bits) << 6;
626ebfedea0SLionel Sambuc     out[4] += ((limb) (in[2] >> 64)) >> 52;
627ebfedea0SLionel Sambuc 
628ebfedea0SLionel Sambuc     out[4] += ((limb) in[3]) >> 58;
629ebfedea0SLionel Sambuc     out[4] += (((limb) (in[3] >> 64)) & bottom52bits) << 6;
630ebfedea0SLionel Sambuc     out[5] += ((limb) (in[3] >> 64)) >> 52;
631ebfedea0SLionel Sambuc 
632ebfedea0SLionel Sambuc     out[5] += ((limb) in[4]) >> 58;
633ebfedea0SLionel Sambuc     out[5] += (((limb) (in[4] >> 64)) & bottom52bits) << 6;
634ebfedea0SLionel Sambuc     out[6] += ((limb) (in[4] >> 64)) >> 52;
635ebfedea0SLionel Sambuc 
636ebfedea0SLionel Sambuc     out[6] += ((limb) in[5]) >> 58;
637ebfedea0SLionel Sambuc     out[6] += (((limb) (in[5] >> 64)) & bottom52bits) << 6;
638ebfedea0SLionel Sambuc     out[7] += ((limb) (in[5] >> 64)) >> 52;
639ebfedea0SLionel Sambuc 
640ebfedea0SLionel Sambuc     out[7] += ((limb) in[6]) >> 58;
641ebfedea0SLionel Sambuc     out[7] += (((limb) (in[6] >> 64)) & bottom52bits) << 6;
642ebfedea0SLionel Sambuc     out[8] += ((limb) (in[6] >> 64)) >> 52;
643ebfedea0SLionel Sambuc 
644ebfedea0SLionel Sambuc     out[8] += ((limb) in[7]) >> 58;
645ebfedea0SLionel Sambuc     out[8] += (((limb) (in[7] >> 64)) & bottom52bits) << 6;
646*0a6a1f1dSLionel Sambuc     /*-
647*0a6a1f1dSLionel Sambuc      * out[x > 1] < 2^58 + 2^6 + 2^58 + 2^12
648*0a6a1f1dSLionel Sambuc      *            < 2^59 + 2^13
649*0a6a1f1dSLionel Sambuc      */
650ebfedea0SLionel Sambuc     overflow1 = ((limb) (in[7] >> 64)) >> 52;
651ebfedea0SLionel Sambuc 
652ebfedea0SLionel Sambuc     overflow1 += ((limb) in[8]) >> 58;
653ebfedea0SLionel Sambuc     overflow1 += (((limb) (in[8] >> 64)) & bottom52bits) << 6;
654ebfedea0SLionel Sambuc     overflow2 = ((limb) (in[8] >> 64)) >> 52;
655ebfedea0SLionel Sambuc 
656ebfedea0SLionel Sambuc     overflow1 <<= 1;            /* overflow1 < 2^13 + 2^7 + 2^59 */
657ebfedea0SLionel Sambuc     overflow2 <<= 1;            /* overflow2 < 2^13 */
658ebfedea0SLionel Sambuc 
659ebfedea0SLionel Sambuc     out[0] += overflow1;        /* out[0] < 2^60 */
660ebfedea0SLionel Sambuc     out[1] += overflow2;        /* out[1] < 2^59 + 2^6 + 2^13 */
661ebfedea0SLionel Sambuc 
662*0a6a1f1dSLionel Sambuc     out[1] += out[0] >> 58;
663*0a6a1f1dSLionel Sambuc     out[0] &= bottom58bits;
664*0a6a1f1dSLionel Sambuc     /*-
665*0a6a1f1dSLionel Sambuc      * out[0] < 2^58
666ebfedea0SLionel Sambuc      * out[1] < 2^59 + 2^6 + 2^13 + 2^2
667*0a6a1f1dSLionel Sambuc      *        < 2^59 + 2^14
668*0a6a1f1dSLionel Sambuc      */
669ebfedea0SLionel Sambuc }
670ebfedea0SLionel Sambuc 
felem_square_reduce(felem out,const felem in)671ebfedea0SLionel Sambuc static void felem_square_reduce(felem out, const felem in)
672ebfedea0SLionel Sambuc {
673ebfedea0SLionel Sambuc     largefelem tmp;
674ebfedea0SLionel Sambuc     felem_square(tmp, in);
675ebfedea0SLionel Sambuc     felem_reduce(out, tmp);
676ebfedea0SLionel Sambuc }
677ebfedea0SLionel Sambuc 
felem_mul_reduce(felem out,const felem in1,const felem in2)678ebfedea0SLionel Sambuc static void felem_mul_reduce(felem out, const felem in1, const felem in2)
679ebfedea0SLionel Sambuc {
680ebfedea0SLionel Sambuc     largefelem tmp;
681ebfedea0SLionel Sambuc     felem_mul(tmp, in1, in2);
682ebfedea0SLionel Sambuc     felem_reduce(out, tmp);
683ebfedea0SLionel Sambuc }
684ebfedea0SLionel Sambuc 
685*0a6a1f1dSLionel Sambuc /*-
686*0a6a1f1dSLionel Sambuc  * felem_inv calculates |out| = |in|^{-1}
687ebfedea0SLionel Sambuc  *
688ebfedea0SLionel Sambuc  * Based on Fermat's Little Theorem:
689ebfedea0SLionel Sambuc  *   a^p = a (mod p)
690ebfedea0SLionel Sambuc  *   a^{p-1} = 1 (mod p)
691ebfedea0SLionel Sambuc  *   a^{p-2} = a^{-1} (mod p)
692ebfedea0SLionel Sambuc  */
felem_inv(felem out,const felem in)693ebfedea0SLionel Sambuc static void felem_inv(felem out, const felem in)
694ebfedea0SLionel Sambuc {
695ebfedea0SLionel Sambuc     felem ftmp, ftmp2, ftmp3, ftmp4;
696ebfedea0SLionel Sambuc     largefelem tmp;
697ebfedea0SLionel Sambuc     unsigned i;
698ebfedea0SLionel Sambuc 
699*0a6a1f1dSLionel Sambuc     felem_square(tmp, in);
700*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp, tmp);    /* 2^1 */
701*0a6a1f1dSLionel Sambuc     felem_mul(tmp, in, ftmp);
702*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp, tmp);    /* 2^2 - 2^0 */
703ebfedea0SLionel Sambuc     felem_assign(ftmp2, ftmp);
704*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp);
705*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp, tmp);    /* 2^3 - 2^1 */
706*0a6a1f1dSLionel Sambuc     felem_mul(tmp, in, ftmp);
707*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp, tmp);    /* 2^3 - 2^0 */
708*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp);
709*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp, tmp);    /* 2^4 - 2^1 */
710ebfedea0SLionel Sambuc 
711*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp2);
712*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^3 - 2^1 */
713*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp3);
714*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^4 - 2^2 */
715*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp2);
716*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^4 - 2^0 */
717ebfedea0SLionel Sambuc 
718ebfedea0SLionel Sambuc     felem_assign(ftmp2, ftmp3);
719*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp3);
720*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^5 - 2^1 */
721*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp3);
722*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^6 - 2^2 */
723*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp3);
724*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^7 - 2^3 */
725*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp3);
726*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^8 - 2^4 */
727ebfedea0SLionel Sambuc     felem_assign(ftmp4, ftmp3);
728*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp);
729*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp4, tmp);   /* 2^8 - 2^1 */
730*0a6a1f1dSLionel Sambuc     felem_square(tmp, ftmp4);
731*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp4, tmp);   /* 2^9 - 2^2 */
732*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp2);
733*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^8 - 2^0 */
734ebfedea0SLionel Sambuc     felem_assign(ftmp2, ftmp3);
735ebfedea0SLionel Sambuc 
736*0a6a1f1dSLionel Sambuc     for (i = 0; i < 8; i++) {
737*0a6a1f1dSLionel Sambuc         felem_square(tmp, ftmp3);
738*0a6a1f1dSLionel Sambuc         felem_reduce(ftmp3, tmp); /* 2^16 - 2^8 */
739ebfedea0SLionel Sambuc     }
740*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp2);
741*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^16 - 2^0 */
742ebfedea0SLionel Sambuc     felem_assign(ftmp2, ftmp3);
743ebfedea0SLionel Sambuc 
744*0a6a1f1dSLionel Sambuc     for (i = 0; i < 16; i++) {
745*0a6a1f1dSLionel Sambuc         felem_square(tmp, ftmp3);
746*0a6a1f1dSLionel Sambuc         felem_reduce(ftmp3, tmp); /* 2^32 - 2^16 */
747ebfedea0SLionel Sambuc     }
748*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp2);
749*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^32 - 2^0 */
750ebfedea0SLionel Sambuc     felem_assign(ftmp2, ftmp3);
751ebfedea0SLionel Sambuc 
752*0a6a1f1dSLionel Sambuc     for (i = 0; i < 32; i++) {
753*0a6a1f1dSLionel Sambuc         felem_square(tmp, ftmp3);
754*0a6a1f1dSLionel Sambuc         felem_reduce(ftmp3, tmp); /* 2^64 - 2^32 */
755ebfedea0SLionel Sambuc     }
756*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp2);
757*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^64 - 2^0 */
758ebfedea0SLionel Sambuc     felem_assign(ftmp2, ftmp3);
759ebfedea0SLionel Sambuc 
760*0a6a1f1dSLionel Sambuc     for (i = 0; i < 64; i++) {
761*0a6a1f1dSLionel Sambuc         felem_square(tmp, ftmp3);
762*0a6a1f1dSLionel Sambuc         felem_reduce(ftmp3, tmp); /* 2^128 - 2^64 */
763ebfedea0SLionel Sambuc     }
764*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp2);
765*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^128 - 2^0 */
766ebfedea0SLionel Sambuc     felem_assign(ftmp2, ftmp3);
767ebfedea0SLionel Sambuc 
768*0a6a1f1dSLionel Sambuc     for (i = 0; i < 128; i++) {
769*0a6a1f1dSLionel Sambuc         felem_square(tmp, ftmp3);
770*0a6a1f1dSLionel Sambuc         felem_reduce(ftmp3, tmp); /* 2^256 - 2^128 */
771ebfedea0SLionel Sambuc     }
772*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp2);
773*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^256 - 2^0 */
774ebfedea0SLionel Sambuc     felem_assign(ftmp2, ftmp3);
775ebfedea0SLionel Sambuc 
776*0a6a1f1dSLionel Sambuc     for (i = 0; i < 256; i++) {
777*0a6a1f1dSLionel Sambuc         felem_square(tmp, ftmp3);
778*0a6a1f1dSLionel Sambuc         felem_reduce(ftmp3, tmp); /* 2^512 - 2^256 */
779ebfedea0SLionel Sambuc     }
780*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp2);
781*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^512 - 2^0 */
782ebfedea0SLionel Sambuc 
783*0a6a1f1dSLionel Sambuc     for (i = 0; i < 9; i++) {
784*0a6a1f1dSLionel Sambuc         felem_square(tmp, ftmp3);
785*0a6a1f1dSLionel Sambuc         felem_reduce(ftmp3, tmp); /* 2^521 - 2^9 */
786ebfedea0SLionel Sambuc     }
787*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, ftmp4);
788*0a6a1f1dSLionel Sambuc     felem_reduce(ftmp3, tmp);   /* 2^512 - 2^2 */
789*0a6a1f1dSLionel Sambuc     felem_mul(tmp, ftmp3, in);
790*0a6a1f1dSLionel Sambuc     felem_reduce(out, tmp);     /* 2^512 - 3 */
791ebfedea0SLionel Sambuc }
792ebfedea0SLionel Sambuc 
793ebfedea0SLionel Sambuc /* This is 2^521-1, expressed as an felem */
794*0a6a1f1dSLionel Sambuc static const felem kPrime = {
795ebfedea0SLionel Sambuc     0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
796ebfedea0SLionel Sambuc     0x03ffffffffffffff, 0x03ffffffffffffff, 0x03ffffffffffffff,
797ebfedea0SLionel Sambuc     0x03ffffffffffffff, 0x03ffffffffffffff, 0x01ffffffffffffff
798ebfedea0SLionel Sambuc };
799ebfedea0SLionel Sambuc 
800*0a6a1f1dSLionel Sambuc /*-
801*0a6a1f1dSLionel Sambuc  * felem_is_zero returns a limb with all bits set if |in| == 0 (mod p) and 0
802ebfedea0SLionel Sambuc  * otherwise.
803ebfedea0SLionel Sambuc  * On entry:
804ebfedea0SLionel Sambuc  *   in[i] < 2^59 + 2^14
805ebfedea0SLionel Sambuc  */
felem_is_zero(const felem in)806ebfedea0SLionel Sambuc static limb felem_is_zero(const felem in)
807ebfedea0SLionel Sambuc {
808ebfedea0SLionel Sambuc     felem ftmp;
809ebfedea0SLionel Sambuc     limb is_zero, is_p;
810ebfedea0SLionel Sambuc     felem_assign(ftmp, in);
811ebfedea0SLionel Sambuc 
812*0a6a1f1dSLionel Sambuc     ftmp[0] += ftmp[8] >> 57;
813*0a6a1f1dSLionel Sambuc     ftmp[8] &= bottom57bits;
814ebfedea0SLionel Sambuc     /* ftmp[8] < 2^57 */
815*0a6a1f1dSLionel Sambuc     ftmp[1] += ftmp[0] >> 58;
816*0a6a1f1dSLionel Sambuc     ftmp[0] &= bottom58bits;
817*0a6a1f1dSLionel Sambuc     ftmp[2] += ftmp[1] >> 58;
818*0a6a1f1dSLionel Sambuc     ftmp[1] &= bottom58bits;
819*0a6a1f1dSLionel Sambuc     ftmp[3] += ftmp[2] >> 58;
820*0a6a1f1dSLionel Sambuc     ftmp[2] &= bottom58bits;
821*0a6a1f1dSLionel Sambuc     ftmp[4] += ftmp[3] >> 58;
822*0a6a1f1dSLionel Sambuc     ftmp[3] &= bottom58bits;
823*0a6a1f1dSLionel Sambuc     ftmp[5] += ftmp[4] >> 58;
824*0a6a1f1dSLionel Sambuc     ftmp[4] &= bottom58bits;
825*0a6a1f1dSLionel Sambuc     ftmp[6] += ftmp[5] >> 58;
826*0a6a1f1dSLionel Sambuc     ftmp[5] &= bottom58bits;
827*0a6a1f1dSLionel Sambuc     ftmp[7] += ftmp[6] >> 58;
828*0a6a1f1dSLionel Sambuc     ftmp[6] &= bottom58bits;
829*0a6a1f1dSLionel Sambuc     ftmp[8] += ftmp[7] >> 58;
830*0a6a1f1dSLionel Sambuc     ftmp[7] &= bottom58bits;
831ebfedea0SLionel Sambuc     /* ftmp[8] < 2^57 + 4 */
832ebfedea0SLionel Sambuc 
833*0a6a1f1dSLionel Sambuc     /*
834*0a6a1f1dSLionel Sambuc      * The ninth limb of 2*(2^521-1) is 0x03ffffffffffffff, which is greater
835*0a6a1f1dSLionel Sambuc      * than our bound for ftmp[8]. Therefore we only have to check if the
836*0a6a1f1dSLionel Sambuc      * zero is zero or 2^521-1.
837*0a6a1f1dSLionel Sambuc      */
838ebfedea0SLionel Sambuc 
839ebfedea0SLionel Sambuc     is_zero = 0;
840ebfedea0SLionel Sambuc     is_zero |= ftmp[0];
841ebfedea0SLionel Sambuc     is_zero |= ftmp[1];
842ebfedea0SLionel Sambuc     is_zero |= ftmp[2];
843ebfedea0SLionel Sambuc     is_zero |= ftmp[3];
844ebfedea0SLionel Sambuc     is_zero |= ftmp[4];
845ebfedea0SLionel Sambuc     is_zero |= ftmp[5];
846ebfedea0SLionel Sambuc     is_zero |= ftmp[6];
847ebfedea0SLionel Sambuc     is_zero |= ftmp[7];
848ebfedea0SLionel Sambuc     is_zero |= ftmp[8];
849ebfedea0SLionel Sambuc 
850ebfedea0SLionel Sambuc     is_zero--;
851*0a6a1f1dSLionel Sambuc     /*
852*0a6a1f1dSLionel Sambuc      * We know that ftmp[i] < 2^63, therefore the only way that the top bit
853*0a6a1f1dSLionel Sambuc      * can be set is if is_zero was 0 before the decrement.
854*0a6a1f1dSLionel Sambuc      */
855ebfedea0SLionel Sambuc     is_zero = ((s64) is_zero) >> 63;
856ebfedea0SLionel Sambuc 
857ebfedea0SLionel Sambuc     is_p = ftmp[0] ^ kPrime[0];
858ebfedea0SLionel Sambuc     is_p |= ftmp[1] ^ kPrime[1];
859ebfedea0SLionel Sambuc     is_p |= ftmp[2] ^ kPrime[2];
860ebfedea0SLionel Sambuc     is_p |= ftmp[3] ^ kPrime[3];
861ebfedea0SLionel Sambuc     is_p |= ftmp[4] ^ kPrime[4];
862ebfedea0SLionel Sambuc     is_p |= ftmp[5] ^ kPrime[5];
863ebfedea0SLionel Sambuc     is_p |= ftmp[6] ^ kPrime[6];
864ebfedea0SLionel Sambuc     is_p |= ftmp[7] ^ kPrime[7];
865ebfedea0SLionel Sambuc     is_p |= ftmp[8] ^ kPrime[8];
866ebfedea0SLionel Sambuc 
867ebfedea0SLionel Sambuc     is_p--;
868ebfedea0SLionel Sambuc     is_p = ((s64) is_p) >> 63;
869ebfedea0SLionel Sambuc 
870ebfedea0SLionel Sambuc     is_zero |= is_p;
871ebfedea0SLionel Sambuc     return is_zero;
872ebfedea0SLionel Sambuc }
873ebfedea0SLionel Sambuc 
felem_is_zero_int(const felem in)874ebfedea0SLionel Sambuc static int felem_is_zero_int(const felem in)
875ebfedea0SLionel Sambuc {
876ebfedea0SLionel Sambuc     return (int)(felem_is_zero(in) & ((limb) 1));
877ebfedea0SLionel Sambuc }
878ebfedea0SLionel Sambuc 
879*0a6a1f1dSLionel Sambuc /*-
880*0a6a1f1dSLionel Sambuc  * felem_contract converts |in| to its unique, minimal representation.
881ebfedea0SLionel Sambuc  * On entry:
882ebfedea0SLionel Sambuc  *   in[i] < 2^59 + 2^14
883ebfedea0SLionel Sambuc  */
felem_contract(felem out,const felem in)884ebfedea0SLionel Sambuc static void felem_contract(felem out, const felem in)
885ebfedea0SLionel Sambuc {
886ebfedea0SLionel Sambuc     limb is_p, is_greater, sign;
887ebfedea0SLionel Sambuc     static const limb two58 = ((limb) 1) << 58;
888ebfedea0SLionel Sambuc 
889ebfedea0SLionel Sambuc     felem_assign(out, in);
890ebfedea0SLionel Sambuc 
891*0a6a1f1dSLionel Sambuc     out[0] += out[8] >> 57;
892*0a6a1f1dSLionel Sambuc     out[8] &= bottom57bits;
893ebfedea0SLionel Sambuc     /* out[8] < 2^57 */
894*0a6a1f1dSLionel Sambuc     out[1] += out[0] >> 58;
895*0a6a1f1dSLionel Sambuc     out[0] &= bottom58bits;
896*0a6a1f1dSLionel Sambuc     out[2] += out[1] >> 58;
897*0a6a1f1dSLionel Sambuc     out[1] &= bottom58bits;
898*0a6a1f1dSLionel Sambuc     out[3] += out[2] >> 58;
899*0a6a1f1dSLionel Sambuc     out[2] &= bottom58bits;
900*0a6a1f1dSLionel Sambuc     out[4] += out[3] >> 58;
901*0a6a1f1dSLionel Sambuc     out[3] &= bottom58bits;
902*0a6a1f1dSLionel Sambuc     out[5] += out[4] >> 58;
903*0a6a1f1dSLionel Sambuc     out[4] &= bottom58bits;
904*0a6a1f1dSLionel Sambuc     out[6] += out[5] >> 58;
905*0a6a1f1dSLionel Sambuc     out[5] &= bottom58bits;
906*0a6a1f1dSLionel Sambuc     out[7] += out[6] >> 58;
907*0a6a1f1dSLionel Sambuc     out[6] &= bottom58bits;
908*0a6a1f1dSLionel Sambuc     out[8] += out[7] >> 58;
909*0a6a1f1dSLionel Sambuc     out[7] &= bottom58bits;
910ebfedea0SLionel Sambuc     /* out[8] < 2^57 + 4 */
911ebfedea0SLionel Sambuc 
912*0a6a1f1dSLionel Sambuc     /*
913*0a6a1f1dSLionel Sambuc      * If the value is greater than 2^521-1 then we have to subtract 2^521-1
914*0a6a1f1dSLionel Sambuc      * out. See the comments in felem_is_zero regarding why we don't test for
915*0a6a1f1dSLionel Sambuc      * other multiples of the prime.
916*0a6a1f1dSLionel Sambuc      */
917ebfedea0SLionel Sambuc 
918*0a6a1f1dSLionel Sambuc     /*
919*0a6a1f1dSLionel Sambuc      * First, if |out| is equal to 2^521-1, we subtract it out to get zero.
920*0a6a1f1dSLionel Sambuc      */
921ebfedea0SLionel Sambuc 
922ebfedea0SLionel Sambuc     is_p = out[0] ^ kPrime[0];
923ebfedea0SLionel Sambuc     is_p |= out[1] ^ kPrime[1];
924ebfedea0SLionel Sambuc     is_p |= out[2] ^ kPrime[2];
925ebfedea0SLionel Sambuc     is_p |= out[3] ^ kPrime[3];
926ebfedea0SLionel Sambuc     is_p |= out[4] ^ kPrime[4];
927ebfedea0SLionel Sambuc     is_p |= out[5] ^ kPrime[5];
928ebfedea0SLionel Sambuc     is_p |= out[6] ^ kPrime[6];
929ebfedea0SLionel Sambuc     is_p |= out[7] ^ kPrime[7];
930ebfedea0SLionel Sambuc     is_p |= out[8] ^ kPrime[8];
931ebfedea0SLionel Sambuc 
932ebfedea0SLionel Sambuc     is_p--;
933ebfedea0SLionel Sambuc     is_p &= is_p << 32;
934ebfedea0SLionel Sambuc     is_p &= is_p << 16;
935ebfedea0SLionel Sambuc     is_p &= is_p << 8;
936ebfedea0SLionel Sambuc     is_p &= is_p << 4;
937ebfedea0SLionel Sambuc     is_p &= is_p << 2;
938ebfedea0SLionel Sambuc     is_p &= is_p << 1;
939ebfedea0SLionel Sambuc     is_p = ((s64) is_p) >> 63;
940ebfedea0SLionel Sambuc     is_p = ~is_p;
941ebfedea0SLionel Sambuc 
942ebfedea0SLionel Sambuc     /* is_p is 0 iff |out| == 2^521-1 and all ones otherwise */
943ebfedea0SLionel Sambuc 
944ebfedea0SLionel Sambuc     out[0] &= is_p;
945ebfedea0SLionel Sambuc     out[1] &= is_p;
946ebfedea0SLionel Sambuc     out[2] &= is_p;
947ebfedea0SLionel Sambuc     out[3] &= is_p;
948ebfedea0SLionel Sambuc     out[4] &= is_p;
949ebfedea0SLionel Sambuc     out[5] &= is_p;
950ebfedea0SLionel Sambuc     out[6] &= is_p;
951ebfedea0SLionel Sambuc     out[7] &= is_p;
952ebfedea0SLionel Sambuc     out[8] &= is_p;
953ebfedea0SLionel Sambuc 
954*0a6a1f1dSLionel Sambuc     /*
955*0a6a1f1dSLionel Sambuc      * In order to test that |out| >= 2^521-1 we need only test if out[8] >>
956*0a6a1f1dSLionel Sambuc      * 57 is greater than zero as (2^521-1) + x >= 2^522
957*0a6a1f1dSLionel Sambuc      */
958ebfedea0SLionel Sambuc     is_greater = out[8] >> 57;
959ebfedea0SLionel Sambuc     is_greater |= is_greater << 32;
960ebfedea0SLionel Sambuc     is_greater |= is_greater << 16;
961ebfedea0SLionel Sambuc     is_greater |= is_greater << 8;
962ebfedea0SLionel Sambuc     is_greater |= is_greater << 4;
963ebfedea0SLionel Sambuc     is_greater |= is_greater << 2;
964ebfedea0SLionel Sambuc     is_greater |= is_greater << 1;
965ebfedea0SLionel Sambuc     is_greater = ((s64) is_greater) >> 63;
966ebfedea0SLionel Sambuc 
967ebfedea0SLionel Sambuc     out[0] -= kPrime[0] & is_greater;
968ebfedea0SLionel Sambuc     out[1] -= kPrime[1] & is_greater;
969ebfedea0SLionel Sambuc     out[2] -= kPrime[2] & is_greater;
970ebfedea0SLionel Sambuc     out[3] -= kPrime[3] & is_greater;
971ebfedea0SLionel Sambuc     out[4] -= kPrime[4] & is_greater;
972ebfedea0SLionel Sambuc     out[5] -= kPrime[5] & is_greater;
973ebfedea0SLionel Sambuc     out[6] -= kPrime[6] & is_greater;
974ebfedea0SLionel Sambuc     out[7] -= kPrime[7] & is_greater;
975ebfedea0SLionel Sambuc     out[8] -= kPrime[8] & is_greater;
976ebfedea0SLionel Sambuc 
977ebfedea0SLionel Sambuc     /* Eliminate negative coefficients */
978*0a6a1f1dSLionel Sambuc     sign = -(out[0] >> 63);
979*0a6a1f1dSLionel Sambuc     out[0] += (two58 & sign);
980*0a6a1f1dSLionel Sambuc     out[1] -= (1 & sign);
981*0a6a1f1dSLionel Sambuc     sign = -(out[1] >> 63);
982*0a6a1f1dSLionel Sambuc     out[1] += (two58 & sign);
983*0a6a1f1dSLionel Sambuc     out[2] -= (1 & sign);
984*0a6a1f1dSLionel Sambuc     sign = -(out[2] >> 63);
985*0a6a1f1dSLionel Sambuc     out[2] += (two58 & sign);
986*0a6a1f1dSLionel Sambuc     out[3] -= (1 & sign);
987*0a6a1f1dSLionel Sambuc     sign = -(out[3] >> 63);
988*0a6a1f1dSLionel Sambuc     out[3] += (two58 & sign);
989*0a6a1f1dSLionel Sambuc     out[4] -= (1 & sign);
990*0a6a1f1dSLionel Sambuc     sign = -(out[4] >> 63);
991*0a6a1f1dSLionel Sambuc     out[4] += (two58 & sign);
992*0a6a1f1dSLionel Sambuc     out[5] -= (1 & sign);
993*0a6a1f1dSLionel Sambuc     sign = -(out[0] >> 63);
994*0a6a1f1dSLionel Sambuc     out[5] += (two58 & sign);
995*0a6a1f1dSLionel Sambuc     out[6] -= (1 & sign);
996*0a6a1f1dSLionel Sambuc     sign = -(out[6] >> 63);
997*0a6a1f1dSLionel Sambuc     out[6] += (two58 & sign);
998*0a6a1f1dSLionel Sambuc     out[7] -= (1 & sign);
999*0a6a1f1dSLionel Sambuc     sign = -(out[7] >> 63);
1000*0a6a1f1dSLionel Sambuc     out[7] += (two58 & sign);
1001*0a6a1f1dSLionel Sambuc     out[8] -= (1 & sign);
1002*0a6a1f1dSLionel Sambuc     sign = -(out[5] >> 63);
1003*0a6a1f1dSLionel Sambuc     out[5] += (two58 & sign);
1004*0a6a1f1dSLionel Sambuc     out[6] -= (1 & sign);
1005*0a6a1f1dSLionel Sambuc     sign = -(out[6] >> 63);
1006*0a6a1f1dSLionel Sambuc     out[6] += (two58 & sign);
1007*0a6a1f1dSLionel Sambuc     out[7] -= (1 & sign);
1008*0a6a1f1dSLionel Sambuc     sign = -(out[7] >> 63);
1009*0a6a1f1dSLionel Sambuc     out[7] += (two58 & sign);
1010*0a6a1f1dSLionel Sambuc     out[8] -= (1 & sign);
1011ebfedea0SLionel Sambuc }
1012ebfedea0SLionel Sambuc 
1013*0a6a1f1dSLionel Sambuc /*-
1014*0a6a1f1dSLionel Sambuc  * Group operations
1015ebfedea0SLionel Sambuc  * ----------------
1016ebfedea0SLionel Sambuc  *
1017ebfedea0SLionel Sambuc  * Building on top of the field operations we have the operations on the
1018ebfedea0SLionel Sambuc  * elliptic curve group itself. Points on the curve are represented in Jacobian
1019ebfedea0SLionel Sambuc  * coordinates */
1020ebfedea0SLionel Sambuc 
1021*0a6a1f1dSLionel Sambuc /*-
1022*0a6a1f1dSLionel Sambuc  * point_double calcuates 2*(x_in, y_in, z_in)
1023ebfedea0SLionel Sambuc  *
1024ebfedea0SLionel Sambuc  * The method is taken from:
1025ebfedea0SLionel Sambuc  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
1026ebfedea0SLionel Sambuc  *
1027ebfedea0SLionel Sambuc  * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
1028ebfedea0SLionel Sambuc  * while x_out == y_in is not (maybe this works, but it's not tested). */
1029ebfedea0SLionel Sambuc static void
point_double(felem x_out,felem y_out,felem z_out,const felem x_in,const felem y_in,const felem z_in)1030ebfedea0SLionel Sambuc point_double(felem x_out, felem y_out, felem z_out,
1031ebfedea0SLionel Sambuc              const felem x_in, const felem y_in, const felem z_in)
1032ebfedea0SLionel Sambuc {
1033ebfedea0SLionel Sambuc     largefelem tmp, tmp2;
1034ebfedea0SLionel Sambuc     felem delta, gamma, beta, alpha, ftmp, ftmp2;
1035ebfedea0SLionel Sambuc 
1036ebfedea0SLionel Sambuc     felem_assign(ftmp, x_in);
1037ebfedea0SLionel Sambuc     felem_assign(ftmp2, x_in);
1038ebfedea0SLionel Sambuc 
1039ebfedea0SLionel Sambuc     /* delta = z^2 */
1040ebfedea0SLionel Sambuc     felem_square(tmp, z_in);
1041ebfedea0SLionel Sambuc     felem_reduce(delta, tmp);   /* delta[i] < 2^59 + 2^14 */
1042ebfedea0SLionel Sambuc 
1043ebfedea0SLionel Sambuc     /* gamma = y^2 */
1044ebfedea0SLionel Sambuc     felem_square(tmp, y_in);
1045ebfedea0SLionel Sambuc     felem_reduce(gamma, tmp);   /* gamma[i] < 2^59 + 2^14 */
1046ebfedea0SLionel Sambuc 
1047ebfedea0SLionel Sambuc     /* beta = x*gamma */
1048ebfedea0SLionel Sambuc     felem_mul(tmp, x_in, gamma);
1049ebfedea0SLionel Sambuc     felem_reduce(beta, tmp);    /* beta[i] < 2^59 + 2^14 */
1050ebfedea0SLionel Sambuc 
1051ebfedea0SLionel Sambuc     /* alpha = 3*(x-delta)*(x+delta) */
1052ebfedea0SLionel Sambuc     felem_diff64(ftmp, delta);
1053ebfedea0SLionel Sambuc     /* ftmp[i] < 2^61 */
1054ebfedea0SLionel Sambuc     felem_sum64(ftmp2, delta);
1055ebfedea0SLionel Sambuc     /* ftmp2[i] < 2^60 + 2^15 */
1056ebfedea0SLionel Sambuc     felem_scalar64(ftmp2, 3);
1057ebfedea0SLionel Sambuc     /* ftmp2[i] < 3*2^60 + 3*2^15 */
1058ebfedea0SLionel Sambuc     felem_mul(tmp, ftmp, ftmp2);
1059*0a6a1f1dSLionel Sambuc     /*-
1060*0a6a1f1dSLionel Sambuc      * tmp[i] < 17(3*2^121 + 3*2^76)
1061ebfedea0SLionel Sambuc      *        = 61*2^121 + 61*2^76
1062ebfedea0SLionel Sambuc      *        < 64*2^121 + 64*2^76
1063ebfedea0SLionel Sambuc      *        = 2^127 + 2^82
1064*0a6a1f1dSLionel Sambuc      *        < 2^128
1065*0a6a1f1dSLionel Sambuc      */
1066ebfedea0SLionel Sambuc     felem_reduce(alpha, tmp);
1067ebfedea0SLionel Sambuc 
1068ebfedea0SLionel Sambuc     /* x' = alpha^2 - 8*beta */
1069ebfedea0SLionel Sambuc     felem_square(tmp, alpha);
1070*0a6a1f1dSLionel Sambuc     /*
1071*0a6a1f1dSLionel Sambuc      * tmp[i] < 17*2^120 < 2^125
1072*0a6a1f1dSLionel Sambuc      */
1073ebfedea0SLionel Sambuc     felem_assign(ftmp, beta);
1074ebfedea0SLionel Sambuc     felem_scalar64(ftmp, 8);
1075ebfedea0SLionel Sambuc     /* ftmp[i] < 2^62 + 2^17 */
1076ebfedea0SLionel Sambuc     felem_diff_128_64(tmp, ftmp);
1077ebfedea0SLionel Sambuc     /* tmp[i] < 2^125 + 2^63 + 2^62 + 2^17 */
1078ebfedea0SLionel Sambuc     felem_reduce(x_out, tmp);
1079ebfedea0SLionel Sambuc 
1080ebfedea0SLionel Sambuc     /* z' = (y + z)^2 - gamma - delta */
1081ebfedea0SLionel Sambuc     felem_sum64(delta, gamma);
1082ebfedea0SLionel Sambuc     /* delta[i] < 2^60 + 2^15 */
1083ebfedea0SLionel Sambuc     felem_assign(ftmp, y_in);
1084ebfedea0SLionel Sambuc     felem_sum64(ftmp, z_in);
1085ebfedea0SLionel Sambuc     /* ftmp[i] < 2^60 + 2^15 */
1086ebfedea0SLionel Sambuc     felem_square(tmp, ftmp);
1087*0a6a1f1dSLionel Sambuc     /*
1088*0a6a1f1dSLionel Sambuc      * tmp[i] < 17(2^122) < 2^127
1089*0a6a1f1dSLionel Sambuc      */
1090ebfedea0SLionel Sambuc     felem_diff_128_64(tmp, delta);
1091ebfedea0SLionel Sambuc     /* tmp[i] < 2^127 + 2^63 */
1092ebfedea0SLionel Sambuc     felem_reduce(z_out, tmp);
1093ebfedea0SLionel Sambuc 
1094ebfedea0SLionel Sambuc     /* y' = alpha*(4*beta - x') - 8*gamma^2 */
1095ebfedea0SLionel Sambuc     felem_scalar64(beta, 4);
1096ebfedea0SLionel Sambuc     /* beta[i] < 2^61 + 2^16 */
1097ebfedea0SLionel Sambuc     felem_diff64(beta, x_out);
1098ebfedea0SLionel Sambuc     /* beta[i] < 2^61 + 2^60 + 2^16 */
1099ebfedea0SLionel Sambuc     felem_mul(tmp, alpha, beta);
1100*0a6a1f1dSLionel Sambuc     /*-
1101*0a6a1f1dSLionel Sambuc      * tmp[i] < 17*((2^59 + 2^14)(2^61 + 2^60 + 2^16))
1102ebfedea0SLionel Sambuc      *        = 17*(2^120 + 2^75 + 2^119 + 2^74 + 2^75 + 2^30)
1103ebfedea0SLionel Sambuc      *        = 17*(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1104*0a6a1f1dSLionel Sambuc      *        < 2^128
1105*0a6a1f1dSLionel Sambuc      */
1106ebfedea0SLionel Sambuc     felem_square(tmp2, gamma);
1107*0a6a1f1dSLionel Sambuc     /*-
1108*0a6a1f1dSLionel Sambuc      * tmp2[i] < 17*(2^59 + 2^14)^2
1109*0a6a1f1dSLionel Sambuc      *         = 17*(2^118 + 2^74 + 2^28)
1110*0a6a1f1dSLionel Sambuc      */
1111ebfedea0SLionel Sambuc     felem_scalar128(tmp2, 8);
1112*0a6a1f1dSLionel Sambuc     /*-
1113*0a6a1f1dSLionel Sambuc      * tmp2[i] < 8*17*(2^118 + 2^74 + 2^28)
1114ebfedea0SLionel Sambuc      *         = 2^125 + 2^121 + 2^81 + 2^77 + 2^35 + 2^31
1115*0a6a1f1dSLionel Sambuc      *         < 2^126
1116*0a6a1f1dSLionel Sambuc      */
1117ebfedea0SLionel Sambuc     felem_diff128(tmp, tmp2);
1118*0a6a1f1dSLionel Sambuc     /*-
1119*0a6a1f1dSLionel Sambuc      * tmp[i] < 2^127 - 2^69 + 17(2^120 + 2^119 + 2^76 + 2^74 + 2^30)
1120ebfedea0SLionel Sambuc      *        = 2^127 + 2^124 + 2^122 + 2^120 + 2^118 + 2^80 + 2^78 + 2^76 +
1121ebfedea0SLionel Sambuc      *          2^74 + 2^69 + 2^34 + 2^30
1122*0a6a1f1dSLionel Sambuc      *        < 2^128
1123*0a6a1f1dSLionel Sambuc      */
1124ebfedea0SLionel Sambuc     felem_reduce(y_out, tmp);
1125ebfedea0SLionel Sambuc }
1126ebfedea0SLionel Sambuc 
1127ebfedea0SLionel Sambuc /* copy_conditional copies in to out iff mask is all ones. */
copy_conditional(felem out,const felem in,limb mask)1128*0a6a1f1dSLionel Sambuc static void copy_conditional(felem out, const felem in, limb mask)
1129ebfedea0SLionel Sambuc {
1130ebfedea0SLionel Sambuc     unsigned i;
1131*0a6a1f1dSLionel Sambuc     for (i = 0; i < NLIMBS; ++i) {
1132ebfedea0SLionel Sambuc         const limb tmp = mask & (in[i] ^ out[i]);
1133ebfedea0SLionel Sambuc         out[i] ^= tmp;
1134ebfedea0SLionel Sambuc     }
1135ebfedea0SLionel Sambuc }
1136ebfedea0SLionel Sambuc 
1137*0a6a1f1dSLionel Sambuc /*-
1138*0a6a1f1dSLionel Sambuc  * point_add calcuates (x1, y1, z1) + (x2, y2, z2)
1139ebfedea0SLionel Sambuc  *
1140ebfedea0SLionel Sambuc  * The method is taken from
1141ebfedea0SLionel Sambuc  *   http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
1142ebfedea0SLionel Sambuc  * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
1143ebfedea0SLionel Sambuc  *
1144ebfedea0SLionel Sambuc  * This function includes a branch for checking whether the two input points
1145ebfedea0SLionel Sambuc  * are equal (while not equal to the point at infinity). This case never
1146ebfedea0SLionel Sambuc  * happens during single point multiplication, so there is no timing leak for
1147ebfedea0SLionel Sambuc  * ECDH or ECDSA signing. */
point_add(felem x3,felem y3,felem z3,const felem x1,const felem y1,const felem z1,const int mixed,const felem x2,const felem y2,const felem z2)1148ebfedea0SLionel Sambuc static void point_add(felem x3, felem y3, felem z3,
1149ebfedea0SLionel Sambuc                       const felem x1, const felem y1, const felem z1,
1150*0a6a1f1dSLionel Sambuc                       const int mixed, const felem x2, const felem y2,
1151*0a6a1f1dSLionel Sambuc                       const felem z2)
1152ebfedea0SLionel Sambuc {
1153ebfedea0SLionel Sambuc     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
1154ebfedea0SLionel Sambuc     largefelem tmp, tmp2;
1155ebfedea0SLionel Sambuc     limb x_equal, y_equal, z1_is_zero, z2_is_zero;
1156ebfedea0SLionel Sambuc 
1157ebfedea0SLionel Sambuc     z1_is_zero = felem_is_zero(z1);
1158ebfedea0SLionel Sambuc     z2_is_zero = felem_is_zero(z2);
1159ebfedea0SLionel Sambuc 
1160ebfedea0SLionel Sambuc     /* ftmp = z1z1 = z1**2 */
1161ebfedea0SLionel Sambuc     felem_square(tmp, z1);
1162ebfedea0SLionel Sambuc     felem_reduce(ftmp, tmp);
1163ebfedea0SLionel Sambuc 
1164*0a6a1f1dSLionel Sambuc     if (!mixed) {
1165ebfedea0SLionel Sambuc         /* ftmp2 = z2z2 = z2**2 */
1166ebfedea0SLionel Sambuc         felem_square(tmp, z2);
1167ebfedea0SLionel Sambuc         felem_reduce(ftmp2, tmp);
1168ebfedea0SLionel Sambuc 
1169ebfedea0SLionel Sambuc         /* u1 = ftmp3 = x1*z2z2 */
1170ebfedea0SLionel Sambuc         felem_mul(tmp, x1, ftmp2);
1171ebfedea0SLionel Sambuc         felem_reduce(ftmp3, tmp);
1172ebfedea0SLionel Sambuc 
1173ebfedea0SLionel Sambuc         /* ftmp5 = z1 + z2 */
1174ebfedea0SLionel Sambuc         felem_assign(ftmp5, z1);
1175ebfedea0SLionel Sambuc         felem_sum64(ftmp5, z2);
1176ebfedea0SLionel Sambuc         /* ftmp5[i] < 2^61 */
1177ebfedea0SLionel Sambuc 
1178ebfedea0SLionel Sambuc         /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
1179ebfedea0SLionel Sambuc         felem_square(tmp, ftmp5);
1180ebfedea0SLionel Sambuc         /* tmp[i] < 17*2^122 */
1181ebfedea0SLionel Sambuc         felem_diff_128_64(tmp, ftmp);
1182ebfedea0SLionel Sambuc         /* tmp[i] < 17*2^122 + 2^63 */
1183ebfedea0SLionel Sambuc         felem_diff_128_64(tmp, ftmp2);
1184ebfedea0SLionel Sambuc         /* tmp[i] < 17*2^122 + 2^64 */
1185ebfedea0SLionel Sambuc         felem_reduce(ftmp5, tmp);
1186ebfedea0SLionel Sambuc 
1187ebfedea0SLionel Sambuc         /* ftmp2 = z2 * z2z2 */
1188ebfedea0SLionel Sambuc         felem_mul(tmp, ftmp2, z2);
1189ebfedea0SLionel Sambuc         felem_reduce(ftmp2, tmp);
1190ebfedea0SLionel Sambuc 
1191ebfedea0SLionel Sambuc         /* s1 = ftmp6 = y1 * z2**3 */
1192ebfedea0SLionel Sambuc         felem_mul(tmp, y1, ftmp2);
1193ebfedea0SLionel Sambuc         felem_reduce(ftmp6, tmp);
1194*0a6a1f1dSLionel Sambuc     } else {
1195*0a6a1f1dSLionel Sambuc         /*
1196*0a6a1f1dSLionel Sambuc          * We'll assume z2 = 1 (special case z2 = 0 is handled later)
1197*0a6a1f1dSLionel Sambuc          */
1198ebfedea0SLionel Sambuc 
1199ebfedea0SLionel Sambuc         /* u1 = ftmp3 = x1*z2z2 */
1200ebfedea0SLionel Sambuc         felem_assign(ftmp3, x1);
1201ebfedea0SLionel Sambuc 
1202ebfedea0SLionel Sambuc         /* ftmp5 = 2*z1z2 */
1203ebfedea0SLionel Sambuc         felem_scalar(ftmp5, z1, 2);
1204ebfedea0SLionel Sambuc 
1205ebfedea0SLionel Sambuc         /* s1 = ftmp6 = y1 * z2**3 */
1206ebfedea0SLionel Sambuc         felem_assign(ftmp6, y1);
1207ebfedea0SLionel Sambuc     }
1208ebfedea0SLionel Sambuc 
1209ebfedea0SLionel Sambuc     /* u2 = x2*z1z1 */
1210ebfedea0SLionel Sambuc     felem_mul(tmp, x2, ftmp);
1211ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^120 */
1212ebfedea0SLionel Sambuc 
1213ebfedea0SLionel Sambuc     /* h = ftmp4 = u2 - u1 */
1214ebfedea0SLionel Sambuc     felem_diff_128_64(tmp, ftmp3);
1215ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^120 + 2^63 */
1216ebfedea0SLionel Sambuc     felem_reduce(ftmp4, tmp);
1217ebfedea0SLionel Sambuc 
1218ebfedea0SLionel Sambuc     x_equal = felem_is_zero(ftmp4);
1219ebfedea0SLionel Sambuc 
1220ebfedea0SLionel Sambuc     /* z_out = ftmp5 * h */
1221ebfedea0SLionel Sambuc     felem_mul(tmp, ftmp5, ftmp4);
1222ebfedea0SLionel Sambuc     felem_reduce(z_out, tmp);
1223ebfedea0SLionel Sambuc 
1224ebfedea0SLionel Sambuc     /* ftmp = z1 * z1z1 */
1225ebfedea0SLionel Sambuc     felem_mul(tmp, ftmp, z1);
1226ebfedea0SLionel Sambuc     felem_reduce(ftmp, tmp);
1227ebfedea0SLionel Sambuc 
1228ebfedea0SLionel Sambuc     /* s2 = tmp = y2 * z1**3 */
1229ebfedea0SLionel Sambuc     felem_mul(tmp, y2, ftmp);
1230ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^120 */
1231ebfedea0SLionel Sambuc 
1232ebfedea0SLionel Sambuc     /* r = ftmp5 = (s2 - s1)*2 */
1233ebfedea0SLionel Sambuc     felem_diff_128_64(tmp, ftmp6);
1234ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^120 + 2^63 */
1235ebfedea0SLionel Sambuc     felem_reduce(ftmp5, tmp);
1236ebfedea0SLionel Sambuc     y_equal = felem_is_zero(ftmp5);
1237ebfedea0SLionel Sambuc     felem_scalar64(ftmp5, 2);
1238ebfedea0SLionel Sambuc     /* ftmp5[i] < 2^61 */
1239ebfedea0SLionel Sambuc 
1240*0a6a1f1dSLionel Sambuc     if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
1241ebfedea0SLionel Sambuc         point_double(x3, y3, z3, x1, y1, z1);
1242ebfedea0SLionel Sambuc         return;
1243ebfedea0SLionel Sambuc     }
1244ebfedea0SLionel Sambuc 
1245ebfedea0SLionel Sambuc     /* I = ftmp = (2h)**2 */
1246ebfedea0SLionel Sambuc     felem_assign(ftmp, ftmp4);
1247ebfedea0SLionel Sambuc     felem_scalar64(ftmp, 2);
1248ebfedea0SLionel Sambuc     /* ftmp[i] < 2^61 */
1249ebfedea0SLionel Sambuc     felem_square(tmp, ftmp);
1250ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^122 */
1251ebfedea0SLionel Sambuc     felem_reduce(ftmp, tmp);
1252ebfedea0SLionel Sambuc 
1253ebfedea0SLionel Sambuc     /* J = ftmp2 = h * I */
1254ebfedea0SLionel Sambuc     felem_mul(tmp, ftmp4, ftmp);
1255ebfedea0SLionel Sambuc     felem_reduce(ftmp2, tmp);
1256ebfedea0SLionel Sambuc 
1257ebfedea0SLionel Sambuc     /* V = ftmp4 = U1 * I */
1258ebfedea0SLionel Sambuc     felem_mul(tmp, ftmp3, ftmp);
1259ebfedea0SLionel Sambuc     felem_reduce(ftmp4, tmp);
1260ebfedea0SLionel Sambuc 
1261ebfedea0SLionel Sambuc     /* x_out = r**2 - J - 2V */
1262ebfedea0SLionel Sambuc     felem_square(tmp, ftmp5);
1263ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^122 */
1264ebfedea0SLionel Sambuc     felem_diff_128_64(tmp, ftmp2);
1265ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^122 + 2^63 */
1266ebfedea0SLionel Sambuc     felem_assign(ftmp3, ftmp4);
1267ebfedea0SLionel Sambuc     felem_scalar64(ftmp4, 2);
1268ebfedea0SLionel Sambuc     /* ftmp4[i] < 2^61 */
1269ebfedea0SLionel Sambuc     felem_diff_128_64(tmp, ftmp4);
1270ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^122 + 2^64 */
1271ebfedea0SLionel Sambuc     felem_reduce(x_out, tmp);
1272ebfedea0SLionel Sambuc 
1273ebfedea0SLionel Sambuc     /* y_out = r(V-x_out) - 2 * s1 * J */
1274ebfedea0SLionel Sambuc     felem_diff64(ftmp3, x_out);
1275*0a6a1f1dSLionel Sambuc     /*
1276*0a6a1f1dSLionel Sambuc      * ftmp3[i] < 2^60 + 2^60 = 2^61
1277*0a6a1f1dSLionel Sambuc      */
1278ebfedea0SLionel Sambuc     felem_mul(tmp, ftmp5, ftmp3);
1279ebfedea0SLionel Sambuc     /* tmp[i] < 17*2^122 */
1280ebfedea0SLionel Sambuc     felem_mul(tmp2, ftmp6, ftmp2);
1281ebfedea0SLionel Sambuc     /* tmp2[i] < 17*2^120 */
1282ebfedea0SLionel Sambuc     felem_scalar128(tmp2, 2);
1283ebfedea0SLionel Sambuc     /* tmp2[i] < 17*2^121 */
1284ebfedea0SLionel Sambuc     felem_diff128(tmp, tmp2);
1285*0a6a1f1dSLionel Sambuc     /*-
1286*0a6a1f1dSLionel Sambuc      * tmp[i] < 2^127 - 2^69 + 17*2^122
1287ebfedea0SLionel Sambuc      *        = 2^126 - 2^122 - 2^6 - 2^2 - 1
1288*0a6a1f1dSLionel Sambuc      *        < 2^127
1289*0a6a1f1dSLionel Sambuc      */
1290ebfedea0SLionel Sambuc     felem_reduce(y_out, tmp);
1291ebfedea0SLionel Sambuc 
1292ebfedea0SLionel Sambuc     copy_conditional(x_out, x2, z1_is_zero);
1293ebfedea0SLionel Sambuc     copy_conditional(x_out, x1, z2_is_zero);
1294ebfedea0SLionel Sambuc     copy_conditional(y_out, y2, z1_is_zero);
1295ebfedea0SLionel Sambuc     copy_conditional(y_out, y1, z2_is_zero);
1296ebfedea0SLionel Sambuc     copy_conditional(z_out, z2, z1_is_zero);
1297ebfedea0SLionel Sambuc     copy_conditional(z_out, z1, z2_is_zero);
1298ebfedea0SLionel Sambuc     felem_assign(x3, x_out);
1299ebfedea0SLionel Sambuc     felem_assign(y3, y_out);
1300ebfedea0SLionel Sambuc     felem_assign(z3, z_out);
1301ebfedea0SLionel Sambuc }
1302ebfedea0SLionel Sambuc 
1303*0a6a1f1dSLionel Sambuc /*-
1304*0a6a1f1dSLionel Sambuc  * Base point pre computation
1305ebfedea0SLionel Sambuc  * --------------------------
1306ebfedea0SLionel Sambuc  *
1307ebfedea0SLionel Sambuc  * Two different sorts of precomputed tables are used in the following code.
1308ebfedea0SLionel Sambuc  * Each contain various points on the curve, where each point is three field
1309ebfedea0SLionel Sambuc  * elements (x, y, z).
1310ebfedea0SLionel Sambuc  *
1311ebfedea0SLionel Sambuc  * For the base point table, z is usually 1 (0 for the point at infinity).
1312ebfedea0SLionel Sambuc  * This table has 16 elements:
1313ebfedea0SLionel Sambuc  * index | bits    | point
1314ebfedea0SLionel Sambuc  * ------+---------+------------------------------
1315ebfedea0SLionel Sambuc  *     0 | 0 0 0 0 | 0G
1316ebfedea0SLionel Sambuc  *     1 | 0 0 0 1 | 1G
1317ebfedea0SLionel Sambuc  *     2 | 0 0 1 0 | 2^130G
1318ebfedea0SLionel Sambuc  *     3 | 0 0 1 1 | (2^130 + 1)G
1319ebfedea0SLionel Sambuc  *     4 | 0 1 0 0 | 2^260G
1320ebfedea0SLionel Sambuc  *     5 | 0 1 0 1 | (2^260 + 1)G
1321ebfedea0SLionel Sambuc  *     6 | 0 1 1 0 | (2^260 + 2^130)G
1322ebfedea0SLionel Sambuc  *     7 | 0 1 1 1 | (2^260 + 2^130 + 1)G
1323ebfedea0SLionel Sambuc  *     8 | 1 0 0 0 | 2^390G
1324ebfedea0SLionel Sambuc  *     9 | 1 0 0 1 | (2^390 + 1)G
1325ebfedea0SLionel Sambuc  *    10 | 1 0 1 0 | (2^390 + 2^130)G
1326ebfedea0SLionel Sambuc  *    11 | 1 0 1 1 | (2^390 + 2^130 + 1)G
1327ebfedea0SLionel Sambuc  *    12 | 1 1 0 0 | (2^390 + 2^260)G
1328ebfedea0SLionel Sambuc  *    13 | 1 1 0 1 | (2^390 + 2^260 + 1)G
1329ebfedea0SLionel Sambuc  *    14 | 1 1 1 0 | (2^390 + 2^260 + 2^130)G
1330ebfedea0SLionel Sambuc  *    15 | 1 1 1 1 | (2^390 + 2^260 + 2^130 + 1)G
1331ebfedea0SLionel Sambuc  *
1332ebfedea0SLionel Sambuc  * The reason for this is so that we can clock bits into four different
1333ebfedea0SLionel Sambuc  * locations when doing simple scalar multiplies against the base point.
1334ebfedea0SLionel Sambuc  *
1335ebfedea0SLionel Sambuc  * Tables for other points have table[i] = iG for i in 0 .. 16. */
1336ebfedea0SLionel Sambuc 
1337ebfedea0SLionel Sambuc /* gmul is the table of precomputed base points */
1338*0a6a1f1dSLionel Sambuc static const felem gmul[16][3] = { {{0, 0, 0, 0, 0, 0, 0, 0, 0},
1339ebfedea0SLionel Sambuc                                     {0, 0, 0, 0, 0, 0, 0, 0, 0},
1340ebfedea0SLionel Sambuc                                     {0, 0, 0, 0, 0, 0, 0, 0, 0}},
1341ebfedea0SLionel Sambuc {{0x017e7e31c2e5bd66, 0x022cf0615a90a6fe, 0x00127a2ffa8de334,
1342ebfedea0SLionel Sambuc   0x01dfbf9d64a3f877, 0x006b4d3dbaa14b5e, 0x014fed487e0a2bd8,
1343ebfedea0SLionel Sambuc   0x015b4429c6481390, 0x03a73678fb2d988e, 0x00c6858e06b70404},
1344ebfedea0SLionel Sambuc  {0x00be94769fd16650, 0x031c21a89cb09022, 0x039013fad0761353,
1345ebfedea0SLionel Sambuc   0x02657bd099031542, 0x03273e662c97ee72, 0x01e6d11a05ebef45,
1346ebfedea0SLionel Sambuc   0x03d1bd998f544495, 0x03001172297ed0b1, 0x011839296a789a3b},
1347ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1348ebfedea0SLionel Sambuc {{0x0373faacbc875bae, 0x00f325023721c671, 0x00f666fd3dbde5ad,
1349ebfedea0SLionel Sambuc   0x01a6932363f88ea7, 0x01fc6d9e13f9c47b, 0x03bcbffc2bbf734e,
1350ebfedea0SLionel Sambuc   0x013ee3c3647f3a92, 0x029409fefe75d07d, 0x00ef9199963d85e5},
1351ebfedea0SLionel Sambuc  {0x011173743ad5b178, 0x02499c7c21bf7d46, 0x035beaeabb8b1a58,
1352ebfedea0SLionel Sambuc   0x00f989c4752ea0a3, 0x0101e1de48a9c1a3, 0x01a20076be28ba6c,
1353ebfedea0SLionel Sambuc   0x02f8052e5eb2de95, 0x01bfe8f82dea117c, 0x0160074d3c36ddb7},
1354ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1355ebfedea0SLionel Sambuc {{0x012f3fc373393b3b, 0x03d3d6172f1419fa, 0x02adc943c0b86873,
1356ebfedea0SLionel Sambuc   0x00d475584177952b, 0x012a4d1673750ee2, 0x00512517a0f13b0c,
1357ebfedea0SLionel Sambuc   0x02b184671a7b1734, 0x0315b84236f1a50a, 0x00a4afc472edbdb9},
1358ebfedea0SLionel Sambuc  {0x00152a7077f385c4, 0x03044007d8d1c2ee, 0x0065829d61d52b52,
1359ebfedea0SLionel Sambuc   0x00494ff6b6631d0d, 0x00a11d94d5f06bcf, 0x02d2f89474d9282e,
1360ebfedea0SLionel Sambuc   0x0241c5727c06eeb9, 0x0386928710fbdb9d, 0x01f883f727b0dfbe},
1361ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1362ebfedea0SLionel Sambuc {{0x019b0c3c9185544d, 0x006243a37c9d97db, 0x02ee3cbe030a2ad2,
1363ebfedea0SLionel Sambuc   0x00cfdd946bb51e0d, 0x0271c00932606b91, 0x03f817d1ec68c561,
1364ebfedea0SLionel Sambuc   0x03f37009806a369c, 0x03c1f30baf184fd5, 0x01091022d6d2f065},
1365ebfedea0SLionel Sambuc  {0x0292c583514c45ed, 0x0316fca51f9a286c, 0x00300af507c1489a,
1366ebfedea0SLionel Sambuc   0x0295f69008298cf1, 0x02c0ed8274943d7b, 0x016509b9b47a431e,
1367ebfedea0SLionel Sambuc   0x02bc9de9634868ce, 0x005b34929bffcb09, 0x000c1a0121681524},
1368ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1369ebfedea0SLionel Sambuc {{0x0286abc0292fb9f2, 0x02665eee9805b3f7, 0x01ed7455f17f26d6,
1370ebfedea0SLionel Sambuc   0x0346355b83175d13, 0x006284944cd0a097, 0x0191895bcdec5e51,
1371ebfedea0SLionel Sambuc   0x02e288370afda7d9, 0x03b22312bfefa67a, 0x01d104d3fc0613fe},
1372ebfedea0SLionel Sambuc  {0x0092421a12f7e47f, 0x0077a83fa373c501, 0x03bd25c5f696bd0d,
1373ebfedea0SLionel Sambuc   0x035c41e4d5459761, 0x01ca0d1742b24f53, 0x00aaab27863a509c,
1374ebfedea0SLionel Sambuc   0x018b6de47df73917, 0x025c0b771705cd01, 0x01fd51d566d760a7},
1375ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1376ebfedea0SLionel Sambuc {{0x01dd92ff6b0d1dbd, 0x039c5e2e8f8afa69, 0x0261ed13242c3b27,
1377ebfedea0SLionel Sambuc   0x0382c6e67026e6a0, 0x01d60b10be2089f9, 0x03c15f3dce86723f,
1378ebfedea0SLionel Sambuc   0x03c764a32d2a062d, 0x017307eac0fad056, 0x018207c0b96c5256},
1379ebfedea0SLionel Sambuc  {0x0196a16d60e13154, 0x03e6ce74c0267030, 0x00ddbf2b4e52a5aa,
1380ebfedea0SLionel Sambuc   0x012738241bbf31c8, 0x00ebe8dc04685a28, 0x024c2ad6d380d4a2,
1381ebfedea0SLionel Sambuc   0x035ee062a6e62d0e, 0x0029ed74af7d3a0f, 0x00eef32aec142ebd},
1382ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1383ebfedea0SLionel Sambuc {{0x00c31ec398993b39, 0x03a9f45bcda68253, 0x00ac733c24c70890,
1384ebfedea0SLionel Sambuc   0x00872b111401ff01, 0x01d178c23195eafb, 0x03bca2c816b87f74,
1385ebfedea0SLionel Sambuc   0x0261a9af46fbad7a, 0x0324b2a8dd3d28f9, 0x00918121d8f24e23},
1386ebfedea0SLionel Sambuc  {0x032bc8c1ca983cd7, 0x00d869dfb08fc8c6, 0x01693cb61fce1516,
1387ebfedea0SLionel Sambuc   0x012a5ea68f4e88a8, 0x010869cab88d7ae3, 0x009081ad277ceee1,
1388ebfedea0SLionel Sambuc   0x033a77166d064cdc, 0x03955235a1fb3a95, 0x01251a4a9b25b65e},
1389ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1390ebfedea0SLionel Sambuc {{0x00148a3a1b27f40b, 0x0123186df1b31fdc, 0x00026e7beaad34ce,
1391ebfedea0SLionel Sambuc   0x01db446ac1d3dbba, 0x0299c1a33437eaec, 0x024540610183cbb7,
1392ebfedea0SLionel Sambuc   0x0173bb0e9ce92e46, 0x02b937e43921214b, 0x01ab0436a9bf01b5},
1393ebfedea0SLionel Sambuc  {0x0383381640d46948, 0x008dacbf0e7f330f, 0x03602122bcc3f318,
1394ebfedea0SLionel Sambuc   0x01ee596b200620d6, 0x03bd0585fda430b3, 0x014aed77fd123a83,
1395ebfedea0SLionel Sambuc   0x005ace749e52f742, 0x0390fe041da2b842, 0x0189a8ceb3299242},
1396ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1397ebfedea0SLionel Sambuc {{0x012a19d6b3282473, 0x00c0915918b423ce, 0x023a954eb94405ae,
1398ebfedea0SLionel Sambuc   0x00529f692be26158, 0x0289fa1b6fa4b2aa, 0x0198ae4ceea346ef,
1399ebfedea0SLionel Sambuc   0x0047d8cdfbdedd49, 0x00cc8c8953f0f6b8, 0x001424abbff49203},
1400ebfedea0SLionel Sambuc  {0x0256732a1115a03a, 0x0351bc38665c6733, 0x03f7b950fb4a6447,
1401ebfedea0SLionel Sambuc   0x000afffa94c22155, 0x025763d0a4dab540, 0x000511e92d4fc283,
1402ebfedea0SLionel Sambuc   0x030a7e9eda0ee96c, 0x004c3cd93a28bf0a, 0x017edb3a8719217f},
1403ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1404ebfedea0SLionel Sambuc {{0x011de5675a88e673, 0x031d7d0f5e567fbe, 0x0016b2062c970ae5,
1405ebfedea0SLionel Sambuc   0x03f4a2be49d90aa7, 0x03cef0bd13822866, 0x03f0923dcf774a6c,
1406ebfedea0SLionel Sambuc   0x0284bebc4f322f72, 0x016ab2645302bb2c, 0x01793f95dace0e2a},
1407ebfedea0SLionel Sambuc  {0x010646e13527a28f, 0x01ca1babd59dc5e7, 0x01afedfd9a5595df,
1408ebfedea0SLionel Sambuc   0x01f15785212ea6b1, 0x0324e5d64f6ae3f4, 0x02d680f526d00645,
1409ebfedea0SLionel Sambuc   0x0127920fadf627a7, 0x03b383f75df4f684, 0x0089e0057e783b0a},
1410ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1411ebfedea0SLionel Sambuc {{0x00f334b9eb3c26c6, 0x0298fdaa98568dce, 0x01c2d24843a82292,
1412ebfedea0SLionel Sambuc   0x020bcb24fa1b0711, 0x02cbdb3d2b1875e6, 0x0014907598f89422,
1413ebfedea0SLionel Sambuc   0x03abe3aa43b26664, 0x02cbf47f720bc168, 0x0133b5e73014b79b},
1414ebfedea0SLionel Sambuc  {0x034aab5dab05779d, 0x00cdc5d71fee9abb, 0x0399f16bd4bd9d30,
1415ebfedea0SLionel Sambuc   0x03582fa592d82647, 0x02be1cdfb775b0e9, 0x0034f7cea32e94cb,
1416ebfedea0SLionel Sambuc   0x0335a7f08f56f286, 0x03b707e9565d1c8b, 0x0015c946ea5b614f},
1417ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1418ebfedea0SLionel Sambuc {{0x024676f6cff72255, 0x00d14625cac96378, 0x00532b6008bc3767,
1419ebfedea0SLionel Sambuc   0x01fc16721b985322, 0x023355ea1b091668, 0x029de7afdc0317c3,
1420ebfedea0SLionel Sambuc   0x02fc8a7ca2da037c, 0x02de1217d74a6f30, 0x013f7173175b73bf},
1421ebfedea0SLionel Sambuc  {0x0344913f441490b5, 0x0200f9e272b61eca, 0x0258a246b1dd55d2,
1422ebfedea0SLionel Sambuc   0x03753db9ea496f36, 0x025e02937a09c5ef, 0x030cbd3d14012692,
1423ebfedea0SLionel Sambuc   0x01793a67e70dc72a, 0x03ec1d37048a662e, 0x006550f700c32a8d},
1424ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1425ebfedea0SLionel Sambuc {{0x00d3f48a347eba27, 0x008e636649b61bd8, 0x00d3b93716778fb3,
1426ebfedea0SLionel Sambuc   0x004d1915757bd209, 0x019d5311a3da44e0, 0x016d1afcbbe6aade,
1427ebfedea0SLionel Sambuc   0x0241bf5f73265616, 0x0384672e5d50d39b, 0x005009fee522b684},
1428ebfedea0SLionel Sambuc  {0x029b4fab064435fe, 0x018868ee095bbb07, 0x01ea3d6936cc92b8,
1429ebfedea0SLionel Sambuc   0x000608b00f78a2f3, 0x02db911073d1c20f, 0x018205938470100a,
1430ebfedea0SLionel Sambuc   0x01f1e4964cbe6ff2, 0x021a19a29eed4663, 0x01414485f42afa81},
1431ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1432ebfedea0SLionel Sambuc {{0x01612b3a17f63e34, 0x03813992885428e6, 0x022b3c215b5a9608,
1433ebfedea0SLionel Sambuc   0x029b4057e19f2fcb, 0x0384059a587af7e6, 0x02d6400ace6fe610,
1434ebfedea0SLionel Sambuc   0x029354d896e8e331, 0x00c047ee6dfba65e, 0x0037720542e9d49d},
1435ebfedea0SLionel Sambuc  {0x02ce9eed7c5e9278, 0x0374ed703e79643b, 0x01316c54c4072006,
1436ebfedea0SLionel Sambuc   0x005aaa09054b2ee8, 0x002824000c840d57, 0x03d4eba24771ed86,
1437ebfedea0SLionel Sambuc   0x0189c50aabc3bdae, 0x0338c01541e15510, 0x00466d56e38eed42},
1438ebfedea0SLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}},
1439ebfedea0SLionel Sambuc {{0x007efd8330ad8bd6, 0x02465ed48047710b, 0x0034c6606b215e0c,
1440ebfedea0SLionel Sambuc   0x016ae30c53cbf839, 0x01fa17bd37161216, 0x018ead4e61ce8ab9,
1441ebfedea0SLionel Sambuc   0x005482ed5f5dee46, 0x037543755bba1d7f, 0x005e5ac7e70a9d0f},
1442ebfedea0SLionel Sambuc  {0x0117e1bb2fdcb2a2, 0x03deea36249f40c4, 0x028d09b4a6246cb7,
1443ebfedea0SLionel Sambuc   0x03524b8855bcf756, 0x023d7d109d5ceb58, 0x0178e43e3223ef9c,
1444ebfedea0SLionel Sambuc   0x0154536a0c6e966a, 0x037964d1286ee9fe, 0x0199bcd90e125055},
1445*0a6a1f1dSLionel Sambuc  {1, 0, 0, 0, 0, 0, 0, 0, 0}}
1446*0a6a1f1dSLionel Sambuc };
1447ebfedea0SLionel Sambuc 
1448*0a6a1f1dSLionel Sambuc /*
1449*0a6a1f1dSLionel Sambuc  * select_point selects the |idx|th point from a precomputation table and
1450*0a6a1f1dSLionel Sambuc  * copies it to out.
1451*0a6a1f1dSLionel Sambuc  */
1452*0a6a1f1dSLionel Sambuc  /* pre_comp below is of the size provided in |size| */
select_point(const limb idx,unsigned int size,const felem pre_comp[][3],felem out[3])1453*0a6a1f1dSLionel Sambuc static void select_point(const limb idx, unsigned int size,
1454*0a6a1f1dSLionel Sambuc                          const felem pre_comp[][3], felem out[3])
1455ebfedea0SLionel Sambuc {
1456ebfedea0SLionel Sambuc     unsigned i, j;
1457ebfedea0SLionel Sambuc     limb *outlimbs = &out[0][0];
1458ebfedea0SLionel Sambuc     memset(outlimbs, 0, 3 * sizeof(felem));
1459ebfedea0SLionel Sambuc 
1460*0a6a1f1dSLionel Sambuc     for (i = 0; i < size; i++) {
1461ebfedea0SLionel Sambuc         const limb *inlimbs = &pre_comp[i][0][0];
1462ebfedea0SLionel Sambuc         limb mask = i ^ idx;
1463ebfedea0SLionel Sambuc         mask |= mask >> 4;
1464ebfedea0SLionel Sambuc         mask |= mask >> 2;
1465ebfedea0SLionel Sambuc         mask |= mask >> 1;
1466ebfedea0SLionel Sambuc         mask &= 1;
1467ebfedea0SLionel Sambuc         mask--;
1468ebfedea0SLionel Sambuc         for (j = 0; j < NLIMBS * 3; j++)
1469ebfedea0SLionel Sambuc             outlimbs[j] |= inlimbs[j] & mask;
1470ebfedea0SLionel Sambuc     }
1471ebfedea0SLionel Sambuc }
1472ebfedea0SLionel Sambuc 
1473ebfedea0SLionel Sambuc /* get_bit returns the |i|th bit in |in| */
get_bit(const felem_bytearray in,int i)1474ebfedea0SLionel Sambuc static char get_bit(const felem_bytearray in, int i)
1475ebfedea0SLionel Sambuc {
1476ebfedea0SLionel Sambuc     if (i < 0)
1477ebfedea0SLionel Sambuc         return 0;
1478ebfedea0SLionel Sambuc     return (in[i >> 3] >> (i & 7)) & 1;
1479ebfedea0SLionel Sambuc }
1480ebfedea0SLionel Sambuc 
1481*0a6a1f1dSLionel Sambuc /*
1482*0a6a1f1dSLionel Sambuc  * Interleaved point multiplication using precomputed point multiples: The
1483*0a6a1f1dSLionel Sambuc  * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
1484*0a6a1f1dSLionel Sambuc  * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1485*0a6a1f1dSLionel Sambuc  * generator, using certain (large) precomputed multiples in g_pre_comp.
1486*0a6a1f1dSLionel Sambuc  * Output point (X, Y, Z) is stored in x_out, y_out, z_out
1487*0a6a1f1dSLionel Sambuc  */
batch_mul(felem x_out,felem y_out,felem z_out,const felem_bytearray scalars[],const unsigned num_points,const u8 * g_scalar,const int mixed,const felem pre_comp[][17][3],const felem g_pre_comp[16][3])1488ebfedea0SLionel Sambuc static void batch_mul(felem x_out, felem y_out, felem z_out,
1489*0a6a1f1dSLionel Sambuc                       const felem_bytearray scalars[],
1490*0a6a1f1dSLionel Sambuc                       const unsigned num_points, const u8 *g_scalar,
1491*0a6a1f1dSLionel Sambuc                       const int mixed, const felem pre_comp[][17][3],
1492*0a6a1f1dSLionel Sambuc                       const felem g_pre_comp[16][3])
1493ebfedea0SLionel Sambuc {
1494ebfedea0SLionel Sambuc     int i, skip;
1495ebfedea0SLionel Sambuc     unsigned num, gen_mul = (g_scalar != NULL);
1496ebfedea0SLionel Sambuc     felem nq[3], tmp[4];
1497ebfedea0SLionel Sambuc     limb bits;
1498ebfedea0SLionel Sambuc     u8 sign, digit;
1499ebfedea0SLionel Sambuc 
1500ebfedea0SLionel Sambuc     /* set nq to the point at infinity */
1501ebfedea0SLionel Sambuc     memset(nq, 0, 3 * sizeof(felem));
1502ebfedea0SLionel Sambuc 
1503*0a6a1f1dSLionel Sambuc     /*
1504*0a6a1f1dSLionel Sambuc      * Loop over all scalars msb-to-lsb, interleaving additions of multiples
1505*0a6a1f1dSLionel Sambuc      * of the generator (last quarter of rounds) and additions of other
1506*0a6a1f1dSLionel Sambuc      * points multiples (every 5th round).
1507ebfedea0SLionel Sambuc      */
1508*0a6a1f1dSLionel Sambuc     skip = 1;                   /* save two point operations in the first
1509*0a6a1f1dSLionel Sambuc                                  * round */
1510*0a6a1f1dSLionel Sambuc     for (i = (num_points ? 520 : 130); i >= 0; --i) {
1511ebfedea0SLionel Sambuc         /* double */
1512ebfedea0SLionel Sambuc         if (!skip)
1513ebfedea0SLionel Sambuc             point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1514ebfedea0SLionel Sambuc 
1515ebfedea0SLionel Sambuc         /* add multiples of the generator */
1516*0a6a1f1dSLionel Sambuc         if (gen_mul && (i <= 130)) {
1517ebfedea0SLionel Sambuc             bits = get_bit(g_scalar, i + 390) << 3;
1518*0a6a1f1dSLionel Sambuc             if (i < 130) {
1519ebfedea0SLionel Sambuc                 bits |= get_bit(g_scalar, i + 260) << 2;
1520ebfedea0SLionel Sambuc                 bits |= get_bit(g_scalar, i + 130) << 1;
1521ebfedea0SLionel Sambuc                 bits |= get_bit(g_scalar, i);
1522ebfedea0SLionel Sambuc             }
1523ebfedea0SLionel Sambuc             /* select the point to add, in constant time */
1524ebfedea0SLionel Sambuc             select_point(bits, 16, g_pre_comp, tmp);
1525*0a6a1f1dSLionel Sambuc             if (!skip) {
1526*0a6a1f1dSLionel Sambuc                 /* The 1 argument below is for "mixed" */
1527ebfedea0SLionel Sambuc                 point_add(nq[0], nq[1], nq[2],
1528*0a6a1f1dSLionel Sambuc                           nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1529*0a6a1f1dSLionel Sambuc             } else {
1530ebfedea0SLionel Sambuc                 memcpy(nq, tmp, 3 * sizeof(felem));
1531ebfedea0SLionel Sambuc                 skip = 0;
1532ebfedea0SLionel Sambuc             }
1533ebfedea0SLionel Sambuc         }
1534ebfedea0SLionel Sambuc 
1535ebfedea0SLionel Sambuc         /* do other additions every 5 doublings */
1536*0a6a1f1dSLionel Sambuc         if (num_points && (i % 5 == 0)) {
1537ebfedea0SLionel Sambuc             /* loop over all scalars */
1538*0a6a1f1dSLionel Sambuc             for (num = 0; num < num_points; ++num) {
1539ebfedea0SLionel Sambuc                 bits = get_bit(scalars[num], i + 4) << 5;
1540ebfedea0SLionel Sambuc                 bits |= get_bit(scalars[num], i + 3) << 4;
1541ebfedea0SLionel Sambuc                 bits |= get_bit(scalars[num], i + 2) << 3;
1542ebfedea0SLionel Sambuc                 bits |= get_bit(scalars[num], i + 1) << 2;
1543ebfedea0SLionel Sambuc                 bits |= get_bit(scalars[num], i) << 1;
1544ebfedea0SLionel Sambuc                 bits |= get_bit(scalars[num], i - 1);
1545ebfedea0SLionel Sambuc                 ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1546ebfedea0SLionel Sambuc 
1547*0a6a1f1dSLionel Sambuc                 /*
1548*0a6a1f1dSLionel Sambuc                  * select the point to add or subtract, in constant time
1549*0a6a1f1dSLionel Sambuc                  */
1550ebfedea0SLionel Sambuc                 select_point(digit, 17, pre_comp[num], tmp);
1551*0a6a1f1dSLionel Sambuc                 felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1552*0a6a1f1dSLionel Sambuc                                             * point */
1553ebfedea0SLionel Sambuc                 copy_conditional(tmp[1], tmp[3], (-(limb) sign));
1554ebfedea0SLionel Sambuc 
1555*0a6a1f1dSLionel Sambuc                 if (!skip) {
1556ebfedea0SLionel Sambuc                     point_add(nq[0], nq[1], nq[2],
1557ebfedea0SLionel Sambuc                               nq[0], nq[1], nq[2],
1558ebfedea0SLionel Sambuc                               mixed, tmp[0], tmp[1], tmp[2]);
1559*0a6a1f1dSLionel Sambuc                 } else {
1560ebfedea0SLionel Sambuc                     memcpy(nq, tmp, 3 * sizeof(felem));
1561ebfedea0SLionel Sambuc                     skip = 0;
1562ebfedea0SLionel Sambuc                 }
1563ebfedea0SLionel Sambuc             }
1564ebfedea0SLionel Sambuc         }
1565ebfedea0SLionel Sambuc     }
1566ebfedea0SLionel Sambuc     felem_assign(x_out, nq[0]);
1567ebfedea0SLionel Sambuc     felem_assign(y_out, nq[1]);
1568ebfedea0SLionel Sambuc     felem_assign(z_out, nq[2]);
1569ebfedea0SLionel Sambuc }
1570ebfedea0SLionel Sambuc 
1571ebfedea0SLionel Sambuc /* Precomputation for the group generator. */
1572ebfedea0SLionel Sambuc typedef struct {
1573ebfedea0SLionel Sambuc     felem g_pre_comp[16][3];
1574ebfedea0SLionel Sambuc     int references;
1575ebfedea0SLionel Sambuc } NISTP521_PRE_COMP;
1576ebfedea0SLionel Sambuc 
EC_GFp_nistp521_method(void)1577ebfedea0SLionel Sambuc const EC_METHOD *EC_GFp_nistp521_method(void)
1578ebfedea0SLionel Sambuc {
1579ebfedea0SLionel Sambuc     static const EC_METHOD ret = {
1580ebfedea0SLionel Sambuc         EC_FLAGS_DEFAULT_OCT,
1581ebfedea0SLionel Sambuc         NID_X9_62_prime_field,
1582ebfedea0SLionel Sambuc         ec_GFp_nistp521_group_init,
1583ebfedea0SLionel Sambuc         ec_GFp_simple_group_finish,
1584ebfedea0SLionel Sambuc         ec_GFp_simple_group_clear_finish,
1585ebfedea0SLionel Sambuc         ec_GFp_nist_group_copy,
1586ebfedea0SLionel Sambuc         ec_GFp_nistp521_group_set_curve,
1587ebfedea0SLionel Sambuc         ec_GFp_simple_group_get_curve,
1588ebfedea0SLionel Sambuc         ec_GFp_simple_group_get_degree,
1589ebfedea0SLionel Sambuc         ec_GFp_simple_group_check_discriminant,
1590ebfedea0SLionel Sambuc         ec_GFp_simple_point_init,
1591ebfedea0SLionel Sambuc         ec_GFp_simple_point_finish,
1592ebfedea0SLionel Sambuc         ec_GFp_simple_point_clear_finish,
1593ebfedea0SLionel Sambuc         ec_GFp_simple_point_copy,
1594ebfedea0SLionel Sambuc         ec_GFp_simple_point_set_to_infinity,
1595ebfedea0SLionel Sambuc         ec_GFp_simple_set_Jprojective_coordinates_GFp,
1596ebfedea0SLionel Sambuc         ec_GFp_simple_get_Jprojective_coordinates_GFp,
1597ebfedea0SLionel Sambuc         ec_GFp_simple_point_set_affine_coordinates,
1598ebfedea0SLionel Sambuc         ec_GFp_nistp521_point_get_affine_coordinates,
1599ebfedea0SLionel Sambuc         0 /* point_set_compressed_coordinates */ ,
1600ebfedea0SLionel Sambuc         0 /* point2oct */ ,
1601ebfedea0SLionel Sambuc         0 /* oct2point */ ,
1602ebfedea0SLionel Sambuc         ec_GFp_simple_add,
1603ebfedea0SLionel Sambuc         ec_GFp_simple_dbl,
1604ebfedea0SLionel Sambuc         ec_GFp_simple_invert,
1605ebfedea0SLionel Sambuc         ec_GFp_simple_is_at_infinity,
1606ebfedea0SLionel Sambuc         ec_GFp_simple_is_on_curve,
1607ebfedea0SLionel Sambuc         ec_GFp_simple_cmp,
1608ebfedea0SLionel Sambuc         ec_GFp_simple_make_affine,
1609ebfedea0SLionel Sambuc         ec_GFp_simple_points_make_affine,
1610ebfedea0SLionel Sambuc         ec_GFp_nistp521_points_mul,
1611ebfedea0SLionel Sambuc         ec_GFp_nistp521_precompute_mult,
1612ebfedea0SLionel Sambuc         ec_GFp_nistp521_have_precompute_mult,
1613ebfedea0SLionel Sambuc         ec_GFp_nist_field_mul,
1614ebfedea0SLionel Sambuc         ec_GFp_nist_field_sqr,
1615ebfedea0SLionel Sambuc         0 /* field_div */ ,
1616ebfedea0SLionel Sambuc         0 /* field_encode */ ,
1617ebfedea0SLionel Sambuc         0 /* field_decode */ ,
1618*0a6a1f1dSLionel Sambuc         0                       /* field_set_to_one */
1619*0a6a1f1dSLionel Sambuc     };
1620ebfedea0SLionel Sambuc 
1621ebfedea0SLionel Sambuc     return &ret;
1622ebfedea0SLionel Sambuc }
1623ebfedea0SLionel Sambuc 
1624ebfedea0SLionel Sambuc /******************************************************************************/
1625*0a6a1f1dSLionel Sambuc /*
1626*0a6a1f1dSLionel Sambuc  * FUNCTIONS TO MANAGE PRECOMPUTATION
1627ebfedea0SLionel Sambuc  */
1628ebfedea0SLionel Sambuc 
nistp521_pre_comp_new()1629ebfedea0SLionel Sambuc static NISTP521_PRE_COMP *nistp521_pre_comp_new()
1630ebfedea0SLionel Sambuc {
1631ebfedea0SLionel Sambuc     NISTP521_PRE_COMP *ret = NULL;
1632ebfedea0SLionel Sambuc     ret = (NISTP521_PRE_COMP *) OPENSSL_malloc(sizeof(NISTP521_PRE_COMP));
1633*0a6a1f1dSLionel Sambuc     if (!ret) {
1634ebfedea0SLionel Sambuc         ECerr(EC_F_NISTP521_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1635ebfedea0SLionel Sambuc         return ret;
1636ebfedea0SLionel Sambuc     }
1637ebfedea0SLionel Sambuc     memset(ret->g_pre_comp, 0, sizeof(ret->g_pre_comp));
1638ebfedea0SLionel Sambuc     ret->references = 1;
1639ebfedea0SLionel Sambuc     return ret;
1640ebfedea0SLionel Sambuc }
1641ebfedea0SLionel Sambuc 
nistp521_pre_comp_dup(void * src_)1642ebfedea0SLionel Sambuc static void *nistp521_pre_comp_dup(void *src_)
1643ebfedea0SLionel Sambuc {
1644ebfedea0SLionel Sambuc     NISTP521_PRE_COMP *src = src_;
1645ebfedea0SLionel Sambuc 
1646ebfedea0SLionel Sambuc     /* no need to actually copy, these objects never change! */
1647ebfedea0SLionel Sambuc     CRYPTO_add(&src->references, 1, CRYPTO_LOCK_EC_PRE_COMP);
1648ebfedea0SLionel Sambuc 
1649ebfedea0SLionel Sambuc     return src_;
1650ebfedea0SLionel Sambuc }
1651ebfedea0SLionel Sambuc 
nistp521_pre_comp_free(void * pre_)1652ebfedea0SLionel Sambuc static void nistp521_pre_comp_free(void *pre_)
1653ebfedea0SLionel Sambuc {
1654ebfedea0SLionel Sambuc     int i;
1655ebfedea0SLionel Sambuc     NISTP521_PRE_COMP *pre = pre_;
1656ebfedea0SLionel Sambuc 
1657ebfedea0SLionel Sambuc     if (!pre)
1658ebfedea0SLionel Sambuc         return;
1659ebfedea0SLionel Sambuc 
1660ebfedea0SLionel Sambuc     i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1661ebfedea0SLionel Sambuc     if (i > 0)
1662ebfedea0SLionel Sambuc         return;
1663ebfedea0SLionel Sambuc 
1664ebfedea0SLionel Sambuc     OPENSSL_free(pre);
1665ebfedea0SLionel Sambuc }
1666ebfedea0SLionel Sambuc 
nistp521_pre_comp_clear_free(void * pre_)1667ebfedea0SLionel Sambuc static void nistp521_pre_comp_clear_free(void *pre_)
1668ebfedea0SLionel Sambuc {
1669ebfedea0SLionel Sambuc     int i;
1670ebfedea0SLionel Sambuc     NISTP521_PRE_COMP *pre = pre_;
1671ebfedea0SLionel Sambuc 
1672ebfedea0SLionel Sambuc     if (!pre)
1673ebfedea0SLionel Sambuc         return;
1674ebfedea0SLionel Sambuc 
1675ebfedea0SLionel Sambuc     i = CRYPTO_add(&pre->references, -1, CRYPTO_LOCK_EC_PRE_COMP);
1676ebfedea0SLionel Sambuc     if (i > 0)
1677ebfedea0SLionel Sambuc         return;
1678ebfedea0SLionel Sambuc 
1679ebfedea0SLionel Sambuc     OPENSSL_cleanse(pre, sizeof(*pre));
1680ebfedea0SLionel Sambuc     OPENSSL_free(pre);
1681ebfedea0SLionel Sambuc }
1682ebfedea0SLionel Sambuc 
1683ebfedea0SLionel Sambuc /******************************************************************************/
1684*0a6a1f1dSLionel Sambuc /*
1685*0a6a1f1dSLionel Sambuc  * OPENSSL EC_METHOD FUNCTIONS
1686ebfedea0SLionel Sambuc  */
1687ebfedea0SLionel Sambuc 
ec_GFp_nistp521_group_init(EC_GROUP * group)1688ebfedea0SLionel Sambuc int ec_GFp_nistp521_group_init(EC_GROUP *group)
1689ebfedea0SLionel Sambuc {
1690ebfedea0SLionel Sambuc     int ret;
1691ebfedea0SLionel Sambuc     ret = ec_GFp_simple_group_init(group);
1692ebfedea0SLionel Sambuc     group->a_is_minus3 = 1;
1693ebfedea0SLionel Sambuc     return ret;
1694ebfedea0SLionel Sambuc }
1695ebfedea0SLionel Sambuc 
ec_GFp_nistp521_group_set_curve(EC_GROUP * group,const BIGNUM * p,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)1696ebfedea0SLionel Sambuc int ec_GFp_nistp521_group_set_curve(EC_GROUP *group, const BIGNUM *p,
1697*0a6a1f1dSLionel Sambuc                                     const BIGNUM *a, const BIGNUM *b,
1698*0a6a1f1dSLionel Sambuc                                     BN_CTX *ctx)
1699ebfedea0SLionel Sambuc {
1700ebfedea0SLionel Sambuc     int ret = 0;
1701ebfedea0SLionel Sambuc     BN_CTX *new_ctx = NULL;
1702ebfedea0SLionel Sambuc     BIGNUM *curve_p, *curve_a, *curve_b;
1703ebfedea0SLionel Sambuc 
1704ebfedea0SLionel Sambuc     if (ctx == NULL)
1705*0a6a1f1dSLionel Sambuc         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1706*0a6a1f1dSLionel Sambuc             return 0;
1707ebfedea0SLionel Sambuc     BN_CTX_start(ctx);
1708ebfedea0SLionel Sambuc     if (((curve_p = BN_CTX_get(ctx)) == NULL) ||
1709ebfedea0SLionel Sambuc         ((curve_a = BN_CTX_get(ctx)) == NULL) ||
1710*0a6a1f1dSLionel Sambuc         ((curve_b = BN_CTX_get(ctx)) == NULL))
1711*0a6a1f1dSLionel Sambuc         goto err;
1712ebfedea0SLionel Sambuc     BN_bin2bn(nistp521_curve_params[0], sizeof(felem_bytearray), curve_p);
1713ebfedea0SLionel Sambuc     BN_bin2bn(nistp521_curve_params[1], sizeof(felem_bytearray), curve_a);
1714ebfedea0SLionel Sambuc     BN_bin2bn(nistp521_curve_params[2], sizeof(felem_bytearray), curve_b);
1715*0a6a1f1dSLionel Sambuc     if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
1716ebfedea0SLionel Sambuc         ECerr(EC_F_EC_GFP_NISTP521_GROUP_SET_CURVE,
1717ebfedea0SLionel Sambuc               EC_R_WRONG_CURVE_PARAMETERS);
1718ebfedea0SLionel Sambuc         goto err;
1719ebfedea0SLionel Sambuc     }
1720ebfedea0SLionel Sambuc     group->field_mod_func = BN_nist_mod_521;
1721ebfedea0SLionel Sambuc     ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1722ebfedea0SLionel Sambuc  err:
1723ebfedea0SLionel Sambuc     BN_CTX_end(ctx);
1724ebfedea0SLionel Sambuc     if (new_ctx != NULL)
1725ebfedea0SLionel Sambuc         BN_CTX_free(new_ctx);
1726ebfedea0SLionel Sambuc     return ret;
1727ebfedea0SLionel Sambuc }
1728ebfedea0SLionel Sambuc 
1729*0a6a1f1dSLionel Sambuc /*
1730*0a6a1f1dSLionel Sambuc  * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1731*0a6a1f1dSLionel Sambuc  * (X/Z^2, Y/Z^3)
1732*0a6a1f1dSLionel Sambuc  */
ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP * group,const EC_POINT * point,BIGNUM * x,BIGNUM * y,BN_CTX * ctx)1733ebfedea0SLionel Sambuc int ec_GFp_nistp521_point_get_affine_coordinates(const EC_GROUP *group,
1734*0a6a1f1dSLionel Sambuc                                                  const EC_POINT *point,
1735*0a6a1f1dSLionel Sambuc                                                  BIGNUM *x, BIGNUM *y,
1736*0a6a1f1dSLionel Sambuc                                                  BN_CTX *ctx)
1737ebfedea0SLionel Sambuc {
1738ebfedea0SLionel Sambuc     felem z1, z2, x_in, y_in, x_out, y_out;
1739ebfedea0SLionel Sambuc     largefelem tmp;
1740ebfedea0SLionel Sambuc 
1741*0a6a1f1dSLionel Sambuc     if (EC_POINT_is_at_infinity(group, point)) {
1742ebfedea0SLionel Sambuc         ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1743ebfedea0SLionel Sambuc               EC_R_POINT_AT_INFINITY);
1744ebfedea0SLionel Sambuc         return 0;
1745ebfedea0SLionel Sambuc     }
1746ebfedea0SLionel Sambuc     if ((!BN_to_felem(x_in, &point->X)) || (!BN_to_felem(y_in, &point->Y)) ||
1747*0a6a1f1dSLionel Sambuc         (!BN_to_felem(z1, &point->Z)))
1748*0a6a1f1dSLionel Sambuc         return 0;
1749ebfedea0SLionel Sambuc     felem_inv(z2, z1);
1750*0a6a1f1dSLionel Sambuc     felem_square(tmp, z2);
1751*0a6a1f1dSLionel Sambuc     felem_reduce(z1, tmp);
1752*0a6a1f1dSLionel Sambuc     felem_mul(tmp, x_in, z1);
1753*0a6a1f1dSLionel Sambuc     felem_reduce(x_in, tmp);
1754ebfedea0SLionel Sambuc     felem_contract(x_out, x_in);
1755*0a6a1f1dSLionel Sambuc     if (x != NULL) {
1756*0a6a1f1dSLionel Sambuc         if (!felem_to_BN(x, x_out)) {
1757*0a6a1f1dSLionel Sambuc             ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1758*0a6a1f1dSLionel Sambuc                   ERR_R_BN_LIB);
1759ebfedea0SLionel Sambuc             return 0;
1760ebfedea0SLionel Sambuc         }
1761ebfedea0SLionel Sambuc     }
1762*0a6a1f1dSLionel Sambuc     felem_mul(tmp, z1, z2);
1763*0a6a1f1dSLionel Sambuc     felem_reduce(z1, tmp);
1764*0a6a1f1dSLionel Sambuc     felem_mul(tmp, y_in, z1);
1765*0a6a1f1dSLionel Sambuc     felem_reduce(y_in, tmp);
1766ebfedea0SLionel Sambuc     felem_contract(y_out, y_in);
1767*0a6a1f1dSLionel Sambuc     if (y != NULL) {
1768*0a6a1f1dSLionel Sambuc         if (!felem_to_BN(y, y_out)) {
1769*0a6a1f1dSLionel Sambuc             ECerr(EC_F_EC_GFP_NISTP521_POINT_GET_AFFINE_COORDINATES,
1770*0a6a1f1dSLionel Sambuc                   ERR_R_BN_LIB);
1771ebfedea0SLionel Sambuc             return 0;
1772ebfedea0SLionel Sambuc         }
1773ebfedea0SLionel Sambuc     }
1774ebfedea0SLionel Sambuc     return 1;
1775ebfedea0SLionel Sambuc }
1776ebfedea0SLionel Sambuc 
1777*0a6a1f1dSLionel Sambuc /* points below is of size |num|, and tmp_felems is of size |num+1/ */
make_points_affine(size_t num,felem points[][3],felem tmp_felems[])1778*0a6a1f1dSLionel Sambuc static void make_points_affine(size_t num, felem points[][3],
1779*0a6a1f1dSLionel Sambuc                                felem tmp_felems[])
1780ebfedea0SLionel Sambuc {
1781*0a6a1f1dSLionel Sambuc     /*
1782*0a6a1f1dSLionel Sambuc      * Runs in constant time, unless an input is the point at infinity (which
1783*0a6a1f1dSLionel Sambuc      * normally shouldn't happen).
1784*0a6a1f1dSLionel Sambuc      */
1785*0a6a1f1dSLionel Sambuc     ec_GFp_nistp_points_make_affine_internal(num,
1786ebfedea0SLionel Sambuc                                              points,
1787ebfedea0SLionel Sambuc                                              sizeof(felem),
1788ebfedea0SLionel Sambuc                                              tmp_felems,
1789ebfedea0SLionel Sambuc                                              (void (*)(void *))felem_one,
1790*0a6a1f1dSLionel Sambuc                                              (int (*)(const void *))
1791*0a6a1f1dSLionel Sambuc                                              felem_is_zero_int,
1792*0a6a1f1dSLionel Sambuc                                              (void (*)(void *, const void *))
1793*0a6a1f1dSLionel Sambuc                                              felem_assign,
1794*0a6a1f1dSLionel Sambuc                                              (void (*)(void *, const void *))
1795*0a6a1f1dSLionel Sambuc                                              felem_square_reduce, (void (*)
1796*0a6a1f1dSLionel Sambuc                                                                    (void *,
1797*0a6a1f1dSLionel Sambuc                                                                     const void
1798*0a6a1f1dSLionel Sambuc                                                                     *,
1799*0a6a1f1dSLionel Sambuc                                                                     const void
1800*0a6a1f1dSLionel Sambuc                                                                     *))
1801*0a6a1f1dSLionel Sambuc                                              felem_mul_reduce,
1802*0a6a1f1dSLionel Sambuc                                              (void (*)(void *, const void *))
1803*0a6a1f1dSLionel Sambuc                                              felem_inv,
1804*0a6a1f1dSLionel Sambuc                                              (void (*)(void *, const void *))
1805*0a6a1f1dSLionel Sambuc                                              felem_contract);
1806ebfedea0SLionel Sambuc }
1807ebfedea0SLionel Sambuc 
1808*0a6a1f1dSLionel Sambuc /*
1809*0a6a1f1dSLionel Sambuc  * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
1810*0a6a1f1dSLionel Sambuc  * values Result is stored in r (r can equal one of the inputs).
1811*0a6a1f1dSLionel Sambuc  */
ec_GFp_nistp521_points_mul(const EC_GROUP * group,EC_POINT * r,const BIGNUM * scalar,size_t num,const EC_POINT * points[],const BIGNUM * scalars[],BN_CTX * ctx)1812ebfedea0SLionel Sambuc int ec_GFp_nistp521_points_mul(const EC_GROUP *group, EC_POINT *r,
1813*0a6a1f1dSLionel Sambuc                                const BIGNUM *scalar, size_t num,
1814*0a6a1f1dSLionel Sambuc                                const EC_POINT *points[],
1815ebfedea0SLionel Sambuc                                const BIGNUM *scalars[], BN_CTX *ctx)
1816ebfedea0SLionel Sambuc {
1817ebfedea0SLionel Sambuc     int ret = 0;
1818ebfedea0SLionel Sambuc     int j;
1819ebfedea0SLionel Sambuc     int mixed = 0;
1820ebfedea0SLionel Sambuc     BN_CTX *new_ctx = NULL;
1821ebfedea0SLionel Sambuc     BIGNUM *x, *y, *z, *tmp_scalar;
1822ebfedea0SLionel Sambuc     felem_bytearray g_secret;
1823ebfedea0SLionel Sambuc     felem_bytearray *secrets = NULL;
1824ebfedea0SLionel Sambuc     felem(*pre_comp)[17][3] = NULL;
1825ebfedea0SLionel Sambuc     felem *tmp_felems = NULL;
1826ebfedea0SLionel Sambuc     felem_bytearray tmp;
1827ebfedea0SLionel Sambuc     unsigned i, num_bytes;
1828ebfedea0SLionel Sambuc     int have_pre_comp = 0;
1829ebfedea0SLionel Sambuc     size_t num_points = num;
1830ebfedea0SLionel Sambuc     felem x_in, y_in, z_in, x_out, y_out, z_out;
1831ebfedea0SLionel Sambuc     NISTP521_PRE_COMP *pre = NULL;
1832ebfedea0SLionel Sambuc     felem(*g_pre_comp)[3] = NULL;
1833ebfedea0SLionel Sambuc     EC_POINT *generator = NULL;
1834ebfedea0SLionel Sambuc     const EC_POINT *p = NULL;
1835ebfedea0SLionel Sambuc     const BIGNUM *p_scalar = NULL;
1836ebfedea0SLionel Sambuc 
1837ebfedea0SLionel Sambuc     if (ctx == NULL)
1838*0a6a1f1dSLionel Sambuc         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1839*0a6a1f1dSLionel Sambuc             return 0;
1840ebfedea0SLionel Sambuc     BN_CTX_start(ctx);
1841ebfedea0SLionel Sambuc     if (((x = BN_CTX_get(ctx)) == NULL) ||
1842ebfedea0SLionel Sambuc         ((y = BN_CTX_get(ctx)) == NULL) ||
1843ebfedea0SLionel Sambuc         ((z = BN_CTX_get(ctx)) == NULL) ||
1844ebfedea0SLionel Sambuc         ((tmp_scalar = BN_CTX_get(ctx)) == NULL))
1845ebfedea0SLionel Sambuc         goto err;
1846ebfedea0SLionel Sambuc 
1847*0a6a1f1dSLionel Sambuc     if (scalar != NULL) {
1848ebfedea0SLionel Sambuc         pre = EC_EX_DATA_get_data(group->extra_data,
1849*0a6a1f1dSLionel Sambuc                                   nistp521_pre_comp_dup,
1850*0a6a1f1dSLionel Sambuc                                   nistp521_pre_comp_free,
1851ebfedea0SLionel Sambuc                                   nistp521_pre_comp_clear_free);
1852ebfedea0SLionel Sambuc         if (pre)
1853ebfedea0SLionel Sambuc             /* we have precomputation, try to use it */
1854ebfedea0SLionel Sambuc             g_pre_comp = &pre->g_pre_comp[0];
1855ebfedea0SLionel Sambuc         else
1856ebfedea0SLionel Sambuc             /* try to use the standard precomputation */
1857ebfedea0SLionel Sambuc             g_pre_comp = (felem(*)[3]) gmul;
1858ebfedea0SLionel Sambuc         generator = EC_POINT_new(group);
1859ebfedea0SLionel Sambuc         if (generator == NULL)
1860ebfedea0SLionel Sambuc             goto err;
1861ebfedea0SLionel Sambuc         /* get the generator from precomputation */
1862ebfedea0SLionel Sambuc         if (!felem_to_BN(x, g_pre_comp[1][0]) ||
1863ebfedea0SLionel Sambuc             !felem_to_BN(y, g_pre_comp[1][1]) ||
1864*0a6a1f1dSLionel Sambuc             !felem_to_BN(z, g_pre_comp[1][2])) {
1865ebfedea0SLionel Sambuc             ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1866ebfedea0SLionel Sambuc             goto err;
1867ebfedea0SLionel Sambuc         }
1868ebfedea0SLionel Sambuc         if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1869*0a6a1f1dSLionel Sambuc                                                       generator, x, y, z,
1870*0a6a1f1dSLionel Sambuc                                                       ctx))
1871ebfedea0SLionel Sambuc             goto err;
1872ebfedea0SLionel Sambuc         if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1873ebfedea0SLionel Sambuc             /* precomputation matches generator */
1874ebfedea0SLionel Sambuc             have_pre_comp = 1;
1875ebfedea0SLionel Sambuc         else
1876*0a6a1f1dSLionel Sambuc             /*
1877*0a6a1f1dSLionel Sambuc              * we don't have valid precomputation: treat the generator as a
1878*0a6a1f1dSLionel Sambuc              * random point
1879*0a6a1f1dSLionel Sambuc              */
1880ebfedea0SLionel Sambuc             num_points++;
1881ebfedea0SLionel Sambuc     }
1882ebfedea0SLionel Sambuc 
1883*0a6a1f1dSLionel Sambuc     if (num_points > 0) {
1884*0a6a1f1dSLionel Sambuc         if (num_points >= 2) {
1885*0a6a1f1dSLionel Sambuc             /*
1886*0a6a1f1dSLionel Sambuc              * unless we precompute multiples for just one point, converting
1887*0a6a1f1dSLionel Sambuc              * those into affine form is time well spent
1888*0a6a1f1dSLionel Sambuc              */
1889ebfedea0SLionel Sambuc             mixed = 1;
1890ebfedea0SLionel Sambuc         }
1891ebfedea0SLionel Sambuc         secrets = OPENSSL_malloc(num_points * sizeof(felem_bytearray));
1892ebfedea0SLionel Sambuc         pre_comp = OPENSSL_malloc(num_points * 17 * 3 * sizeof(felem));
1893ebfedea0SLionel Sambuc         if (mixed)
1894*0a6a1f1dSLionel Sambuc             tmp_felems =
1895*0a6a1f1dSLionel Sambuc                 OPENSSL_malloc((num_points * 17 + 1) * sizeof(felem));
1896*0a6a1f1dSLionel Sambuc         if ((secrets == NULL) || (pre_comp == NULL)
1897*0a6a1f1dSLionel Sambuc             || (mixed && (tmp_felems == NULL))) {
1898ebfedea0SLionel Sambuc             ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1899ebfedea0SLionel Sambuc             goto err;
1900ebfedea0SLionel Sambuc         }
1901ebfedea0SLionel Sambuc 
1902*0a6a1f1dSLionel Sambuc         /*
1903*0a6a1f1dSLionel Sambuc          * we treat NULL scalars as 0, and NULL points as points at infinity,
1904*0a6a1f1dSLionel Sambuc          * i.e., they contribute nothing to the linear combination
1905*0a6a1f1dSLionel Sambuc          */
1906ebfedea0SLionel Sambuc         memset(secrets, 0, num_points * sizeof(felem_bytearray));
1907ebfedea0SLionel Sambuc         memset(pre_comp, 0, num_points * 17 * 3 * sizeof(felem));
1908*0a6a1f1dSLionel Sambuc         for (i = 0; i < num_points; ++i) {
1909ebfedea0SLionel Sambuc             if (i == num)
1910*0a6a1f1dSLionel Sambuc                 /*
1911*0a6a1f1dSLionel Sambuc                  * we didn't have a valid precomputation, so we pick the
1912*0a6a1f1dSLionel Sambuc                  * generator
1913*0a6a1f1dSLionel Sambuc                  */
1914ebfedea0SLionel Sambuc             {
1915ebfedea0SLionel Sambuc                 p = EC_GROUP_get0_generator(group);
1916ebfedea0SLionel Sambuc                 p_scalar = scalar;
1917*0a6a1f1dSLionel Sambuc             } else
1918ebfedea0SLionel Sambuc                 /* the i^th point */
1919ebfedea0SLionel Sambuc             {
1920ebfedea0SLionel Sambuc                 p = points[i];
1921ebfedea0SLionel Sambuc                 p_scalar = scalars[i];
1922ebfedea0SLionel Sambuc             }
1923*0a6a1f1dSLionel Sambuc             if ((p_scalar != NULL) && (p != NULL)) {
1924ebfedea0SLionel Sambuc                 /* reduce scalar to 0 <= scalar < 2^521 */
1925*0a6a1f1dSLionel Sambuc                 if ((BN_num_bits(p_scalar) > 521)
1926*0a6a1f1dSLionel Sambuc                     || (BN_is_negative(p_scalar))) {
1927*0a6a1f1dSLionel Sambuc                     /*
1928*0a6a1f1dSLionel Sambuc                      * this is an unusual input, and we don't guarantee
1929*0a6a1f1dSLionel Sambuc                      * constant-timeness
1930*0a6a1f1dSLionel Sambuc                      */
1931*0a6a1f1dSLionel Sambuc                     if (!BN_nnmod(tmp_scalar, p_scalar, &group->order, ctx)) {
1932ebfedea0SLionel Sambuc                         ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1933ebfedea0SLionel Sambuc                         goto err;
1934ebfedea0SLionel Sambuc                     }
1935ebfedea0SLionel Sambuc                     num_bytes = BN_bn2bin(tmp_scalar, tmp);
1936*0a6a1f1dSLionel Sambuc                 } else
1937ebfedea0SLionel Sambuc                     num_bytes = BN_bn2bin(p_scalar, tmp);
1938ebfedea0SLionel Sambuc                 flip_endian(secrets[i], tmp, num_bytes);
1939ebfedea0SLionel Sambuc                 /* precompute multiples */
1940ebfedea0SLionel Sambuc                 if ((!BN_to_felem(x_out, &p->X)) ||
1941ebfedea0SLionel Sambuc                     (!BN_to_felem(y_out, &p->Y)) ||
1942*0a6a1f1dSLionel Sambuc                     (!BN_to_felem(z_out, &p->Z)))
1943*0a6a1f1dSLionel Sambuc                     goto err;
1944ebfedea0SLionel Sambuc                 memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
1945ebfedea0SLionel Sambuc                 memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
1946ebfedea0SLionel Sambuc                 memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
1947*0a6a1f1dSLionel Sambuc                 for (j = 2; j <= 16; ++j) {
1948*0a6a1f1dSLionel Sambuc                     if (j & 1) {
1949*0a6a1f1dSLionel Sambuc                         point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1950*0a6a1f1dSLionel Sambuc                                   pre_comp[i][j][2], pre_comp[i][1][0],
1951*0a6a1f1dSLionel Sambuc                                   pre_comp[i][1][1], pre_comp[i][1][2], 0,
1952*0a6a1f1dSLionel Sambuc                                   pre_comp[i][j - 1][0],
1953*0a6a1f1dSLionel Sambuc                                   pre_comp[i][j - 1][1],
1954*0a6a1f1dSLionel Sambuc                                   pre_comp[i][j - 1][2]);
1955*0a6a1f1dSLionel Sambuc                     } else {
1956*0a6a1f1dSLionel Sambuc                         point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1957*0a6a1f1dSLionel Sambuc                                      pre_comp[i][j][2], pre_comp[i][j / 2][0],
1958*0a6a1f1dSLionel Sambuc                                      pre_comp[i][j / 2][1],
1959*0a6a1f1dSLionel Sambuc                                      pre_comp[i][j / 2][2]);
1960ebfedea0SLionel Sambuc                     }
1961ebfedea0SLionel Sambuc                 }
1962ebfedea0SLionel Sambuc             }
1963ebfedea0SLionel Sambuc         }
1964ebfedea0SLionel Sambuc         if (mixed)
1965ebfedea0SLionel Sambuc             make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1966ebfedea0SLionel Sambuc     }
1967ebfedea0SLionel Sambuc 
1968ebfedea0SLionel Sambuc     /* the scalar for the generator */
1969*0a6a1f1dSLionel Sambuc     if ((scalar != NULL) && (have_pre_comp)) {
1970ebfedea0SLionel Sambuc         memset(g_secret, 0, sizeof(g_secret));
1971ebfedea0SLionel Sambuc         /* reduce scalar to 0 <= scalar < 2^521 */
1972*0a6a1f1dSLionel Sambuc         if ((BN_num_bits(scalar) > 521) || (BN_is_negative(scalar))) {
1973*0a6a1f1dSLionel Sambuc             /*
1974*0a6a1f1dSLionel Sambuc              * this is an unusual input, and we don't guarantee
1975*0a6a1f1dSLionel Sambuc              * constant-timeness
1976*0a6a1f1dSLionel Sambuc              */
1977*0a6a1f1dSLionel Sambuc             if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
1978ebfedea0SLionel Sambuc                 ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
1979ebfedea0SLionel Sambuc                 goto err;
1980ebfedea0SLionel Sambuc             }
1981ebfedea0SLionel Sambuc             num_bytes = BN_bn2bin(tmp_scalar, tmp);
1982*0a6a1f1dSLionel Sambuc         } else
1983ebfedea0SLionel Sambuc             num_bytes = BN_bn2bin(scalar, tmp);
1984ebfedea0SLionel Sambuc         flip_endian(g_secret, tmp, num_bytes);
1985ebfedea0SLionel Sambuc         /* do the multiplication with generator precomputation */
1986ebfedea0SLionel Sambuc         batch_mul(x_out, y_out, z_out,
1987ebfedea0SLionel Sambuc                   (const felem_bytearray(*))secrets, num_points,
1988ebfedea0SLionel Sambuc                   g_secret,
1989ebfedea0SLionel Sambuc                   mixed, (const felem(*)[17][3])pre_comp,
1990ebfedea0SLionel Sambuc                   (const felem(*)[3])g_pre_comp);
1991*0a6a1f1dSLionel Sambuc     } else
1992ebfedea0SLionel Sambuc         /* do the multiplication without generator precomputation */
1993ebfedea0SLionel Sambuc         batch_mul(x_out, y_out, z_out,
1994ebfedea0SLionel Sambuc                   (const felem_bytearray(*))secrets, num_points,
1995ebfedea0SLionel Sambuc                   NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1996ebfedea0SLionel Sambuc     /* reduce the output to its unique minimal representation */
1997ebfedea0SLionel Sambuc     felem_contract(x_in, x_out);
1998ebfedea0SLionel Sambuc     felem_contract(y_in, y_out);
1999ebfedea0SLionel Sambuc     felem_contract(z_in, z_out);
2000ebfedea0SLionel Sambuc     if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
2001*0a6a1f1dSLionel Sambuc         (!felem_to_BN(z, z_in))) {
2002ebfedea0SLionel Sambuc         ECerr(EC_F_EC_GFP_NISTP521_POINTS_MUL, ERR_R_BN_LIB);
2003ebfedea0SLionel Sambuc         goto err;
2004ebfedea0SLionel Sambuc     }
2005ebfedea0SLionel Sambuc     ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
2006ebfedea0SLionel Sambuc 
2007ebfedea0SLionel Sambuc  err:
2008ebfedea0SLionel Sambuc     BN_CTX_end(ctx);
2009ebfedea0SLionel Sambuc     if (generator != NULL)
2010ebfedea0SLionel Sambuc         EC_POINT_free(generator);
2011ebfedea0SLionel Sambuc     if (new_ctx != NULL)
2012ebfedea0SLionel Sambuc         BN_CTX_free(new_ctx);
2013ebfedea0SLionel Sambuc     if (secrets != NULL)
2014ebfedea0SLionel Sambuc         OPENSSL_free(secrets);
2015ebfedea0SLionel Sambuc     if (pre_comp != NULL)
2016ebfedea0SLionel Sambuc         OPENSSL_free(pre_comp);
2017ebfedea0SLionel Sambuc     if (tmp_felems != NULL)
2018ebfedea0SLionel Sambuc         OPENSSL_free(tmp_felems);
2019ebfedea0SLionel Sambuc     return ret;
2020ebfedea0SLionel Sambuc }
2021ebfedea0SLionel Sambuc 
ec_GFp_nistp521_precompute_mult(EC_GROUP * group,BN_CTX * ctx)2022ebfedea0SLionel Sambuc int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
2023ebfedea0SLionel Sambuc {
2024ebfedea0SLionel Sambuc     int ret = 0;
2025ebfedea0SLionel Sambuc     NISTP521_PRE_COMP *pre = NULL;
2026ebfedea0SLionel Sambuc     int i, j;
2027ebfedea0SLionel Sambuc     BN_CTX *new_ctx = NULL;
2028ebfedea0SLionel Sambuc     BIGNUM *x, *y;
2029ebfedea0SLionel Sambuc     EC_POINT *generator = NULL;
2030ebfedea0SLionel Sambuc     felem tmp_felems[16];
2031ebfedea0SLionel Sambuc 
2032ebfedea0SLionel Sambuc     /* throw away old precomputation */
2033ebfedea0SLionel Sambuc     EC_EX_DATA_free_data(&group->extra_data, nistp521_pre_comp_dup,
2034*0a6a1f1dSLionel Sambuc                          nistp521_pre_comp_free,
2035*0a6a1f1dSLionel Sambuc                          nistp521_pre_comp_clear_free);
2036ebfedea0SLionel Sambuc     if (ctx == NULL)
2037*0a6a1f1dSLionel Sambuc         if ((ctx = new_ctx = BN_CTX_new()) == NULL)
2038*0a6a1f1dSLionel Sambuc             return 0;
2039ebfedea0SLionel Sambuc     BN_CTX_start(ctx);
2040*0a6a1f1dSLionel Sambuc     if (((x = BN_CTX_get(ctx)) == NULL) || ((y = BN_CTX_get(ctx)) == NULL))
2041ebfedea0SLionel Sambuc         goto err;
2042ebfedea0SLionel Sambuc     /* get the generator */
2043*0a6a1f1dSLionel Sambuc     if (group->generator == NULL)
2044*0a6a1f1dSLionel Sambuc         goto err;
2045ebfedea0SLionel Sambuc     generator = EC_POINT_new(group);
2046ebfedea0SLionel Sambuc     if (generator == NULL)
2047ebfedea0SLionel Sambuc         goto err;
2048ebfedea0SLionel Sambuc     BN_bin2bn(nistp521_curve_params[3], sizeof(felem_bytearray), x);
2049ebfedea0SLionel Sambuc     BN_bin2bn(nistp521_curve_params[4], sizeof(felem_bytearray), y);
2050ebfedea0SLionel Sambuc     if (!EC_POINT_set_affine_coordinates_GFp(group, generator, x, y, ctx))
2051ebfedea0SLionel Sambuc         goto err;
2052ebfedea0SLionel Sambuc     if ((pre = nistp521_pre_comp_new()) == NULL)
2053ebfedea0SLionel Sambuc         goto err;
2054*0a6a1f1dSLionel Sambuc     /*
2055*0a6a1f1dSLionel Sambuc      * if the generator is the standard one, use built-in precomputation
2056*0a6a1f1dSLionel Sambuc      */
2057*0a6a1f1dSLionel Sambuc     if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
2058ebfedea0SLionel Sambuc         memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
2059ebfedea0SLionel Sambuc         ret = 1;
2060ebfedea0SLionel Sambuc         goto err;
2061ebfedea0SLionel Sambuc     }
2062ebfedea0SLionel Sambuc     if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
2063ebfedea0SLionel Sambuc         (!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
2064ebfedea0SLionel Sambuc         (!BN_to_felem(pre->g_pre_comp[1][2], &group->generator->Z)))
2065ebfedea0SLionel Sambuc         goto err;
2066ebfedea0SLionel Sambuc     /* compute 2^130*G, 2^260*G, 2^390*G */
2067*0a6a1f1dSLionel Sambuc     for (i = 1; i <= 4; i <<= 1) {
2068ebfedea0SLionel Sambuc         point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1],
2069ebfedea0SLionel Sambuc                      pre->g_pre_comp[2 * i][2], pre->g_pre_comp[i][0],
2070ebfedea0SLionel Sambuc                      pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
2071*0a6a1f1dSLionel Sambuc         for (j = 0; j < 129; ++j) {
2072ebfedea0SLionel Sambuc             point_double(pre->g_pre_comp[2 * i][0],
2073ebfedea0SLionel Sambuc                          pre->g_pre_comp[2 * i][1],
2074ebfedea0SLionel Sambuc                          pre->g_pre_comp[2 * i][2],
2075ebfedea0SLionel Sambuc                          pre->g_pre_comp[2 * i][0],
2076ebfedea0SLionel Sambuc                          pre->g_pre_comp[2 * i][1],
2077ebfedea0SLionel Sambuc                          pre->g_pre_comp[2 * i][2]);
2078ebfedea0SLionel Sambuc         }
2079ebfedea0SLionel Sambuc     }
2080ebfedea0SLionel Sambuc     /* g_pre_comp[0] is the point at infinity */
2081ebfedea0SLionel Sambuc     memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
2082ebfedea0SLionel Sambuc     /* the remaining multiples */
2083ebfedea0SLionel Sambuc     /* 2^130*G + 2^260*G */
2084ebfedea0SLionel Sambuc     point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1],
2085ebfedea0SLionel Sambuc               pre->g_pre_comp[6][2], pre->g_pre_comp[4][0],
2086ebfedea0SLionel Sambuc               pre->g_pre_comp[4][1], pre->g_pre_comp[4][2],
2087ebfedea0SLionel Sambuc               0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2088ebfedea0SLionel Sambuc               pre->g_pre_comp[2][2]);
2089ebfedea0SLionel Sambuc     /* 2^130*G + 2^390*G */
2090ebfedea0SLionel Sambuc     point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1],
2091ebfedea0SLionel Sambuc               pre->g_pre_comp[10][2], pre->g_pre_comp[8][0],
2092ebfedea0SLionel Sambuc               pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2093ebfedea0SLionel Sambuc               0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2094ebfedea0SLionel Sambuc               pre->g_pre_comp[2][2]);
2095ebfedea0SLionel Sambuc     /* 2^260*G + 2^390*G */
2096ebfedea0SLionel Sambuc     point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1],
2097ebfedea0SLionel Sambuc               pre->g_pre_comp[12][2], pre->g_pre_comp[8][0],
2098ebfedea0SLionel Sambuc               pre->g_pre_comp[8][1], pre->g_pre_comp[8][2],
2099ebfedea0SLionel Sambuc               0, pre->g_pre_comp[4][0], pre->g_pre_comp[4][1],
2100ebfedea0SLionel Sambuc               pre->g_pre_comp[4][2]);
2101ebfedea0SLionel Sambuc     /* 2^130*G + 2^260*G + 2^390*G */
2102ebfedea0SLionel Sambuc     point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1],
2103ebfedea0SLionel Sambuc               pre->g_pre_comp[14][2], pre->g_pre_comp[12][0],
2104ebfedea0SLionel Sambuc               pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
2105ebfedea0SLionel Sambuc               0, pre->g_pre_comp[2][0], pre->g_pre_comp[2][1],
2106ebfedea0SLionel Sambuc               pre->g_pre_comp[2][2]);
2107*0a6a1f1dSLionel Sambuc     for (i = 1; i < 8; ++i) {
2108ebfedea0SLionel Sambuc         /* odd multiples: add G */
2109*0a6a1f1dSLionel Sambuc         point_add(pre->g_pre_comp[2 * i + 1][0],
2110*0a6a1f1dSLionel Sambuc                   pre->g_pre_comp[2 * i + 1][1],
2111ebfedea0SLionel Sambuc                   pre->g_pre_comp[2 * i + 1][2], pre->g_pre_comp[2 * i][0],
2112*0a6a1f1dSLionel Sambuc                   pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0,
2113*0a6a1f1dSLionel Sambuc                   pre->g_pre_comp[1][0], pre->g_pre_comp[1][1],
2114ebfedea0SLionel Sambuc                   pre->g_pre_comp[1][2]);
2115ebfedea0SLionel Sambuc     }
2116ebfedea0SLionel Sambuc     make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
2117ebfedea0SLionel Sambuc 
2118ebfedea0SLionel Sambuc     if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
2119*0a6a1f1dSLionel Sambuc                              nistp521_pre_comp_free,
2120*0a6a1f1dSLionel Sambuc                              nistp521_pre_comp_clear_free))
2121ebfedea0SLionel Sambuc         goto err;
2122ebfedea0SLionel Sambuc     ret = 1;
2123ebfedea0SLionel Sambuc     pre = NULL;
2124ebfedea0SLionel Sambuc  err:
2125ebfedea0SLionel Sambuc     BN_CTX_end(ctx);
2126ebfedea0SLionel Sambuc     if (generator != NULL)
2127ebfedea0SLionel Sambuc         EC_POINT_free(generator);
2128ebfedea0SLionel Sambuc     if (new_ctx != NULL)
2129ebfedea0SLionel Sambuc         BN_CTX_free(new_ctx);
2130ebfedea0SLionel Sambuc     if (pre)
2131ebfedea0SLionel Sambuc         nistp521_pre_comp_free(pre);
2132ebfedea0SLionel Sambuc     return ret;
2133ebfedea0SLionel Sambuc }
2134ebfedea0SLionel Sambuc 
ec_GFp_nistp521_have_precompute_mult(const EC_GROUP * group)2135ebfedea0SLionel Sambuc int ec_GFp_nistp521_have_precompute_mult(const EC_GROUP *group)
2136ebfedea0SLionel Sambuc {
2137ebfedea0SLionel Sambuc     if (EC_EX_DATA_get_data(group->extra_data, nistp521_pre_comp_dup,
2138*0a6a1f1dSLionel Sambuc                             nistp521_pre_comp_free,
2139*0a6a1f1dSLionel Sambuc                             nistp521_pre_comp_clear_free)
2140ebfedea0SLionel Sambuc         != NULL)
2141ebfedea0SLionel Sambuc         return 1;
2142ebfedea0SLionel Sambuc     else
2143ebfedea0SLionel Sambuc         return 0;
2144ebfedea0SLionel Sambuc }
2145ebfedea0SLionel Sambuc 
2146ebfedea0SLionel Sambuc #else
2147ebfedea0SLionel Sambuc static void *dummy = &dummy;
2148ebfedea0SLionel Sambuc #endif
2149