1 /* ecc-secp256r1.c
2
3 Compile time constant (but machine dependent) tables.
4
5 Copyright (C) 2013, 2014 Niels Möller
6
7 This file is part of GNU Nettle.
8
9 GNU Nettle is free software: you can redistribute it and/or
10 modify it under the terms of either:
11
12 * the GNU Lesser General Public License as published by the Free
13 Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15
16 or
17
18 * the GNU General Public License as published by the Free
19 Software Foundation; either version 2 of the License, or (at your
20 option) any later version.
21
22 or both in parallel, as here.
23
24 GNU Nettle is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 General Public License for more details.
28
29 You should have received copies of the GNU General Public License and
30 the GNU Lesser General Public License along with this program. If
31 not, see http://www.gnu.org/licenses/.
32 */
33
34 /* Development of Nettle's ECC support was funded by the .SE Internet Fund. */
35
36 #if HAVE_CONFIG_H
37 # include "config.h"
38 #endif
39
40 #include <assert.h>
41
42 #include "ecc.h"
43 #include "ecc-internal.h"
44
45 #if HAVE_NATIVE_ecc_secp256r1_redc
46 # define USE_REDC 1
47 #else
48 # define USE_REDC (ECC_REDC_SIZE != 0)
49 #endif
50
51 #include "ecc-secp256r1.h"
52
53 #if HAVE_NATIVE_ecc_secp256r1_redc
54 # define ecc_secp256r1_redc _nettle_ecc_secp256r1_redc
55 void
56 ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp);
57 #else /* !HAVE_NATIVE_ecc_secp256r1_redc */
58 # if ECC_REDC_SIZE > 0
59 # define ecc_secp256r1_redc ecc_pp1_redc
60 # elif ECC_REDC_SIZE == 0
61 # define ecc_secp256r1_redc NULL
62 # else
63 # error Configuration error
64 # endif
65 #endif /* !HAVE_NATIVE_ecc_secp256r1_redc */
66
67 #if ECC_BMODP_SIZE < ECC_LIMB_SIZE
68 #define ecc_secp256r1_modp ecc_mod
69 #define ecc_secp256r1_modq ecc_mod
70 #elif GMP_NUMB_BITS == 64
71
72 static void
ecc_secp256r1_modp(const struct ecc_modulo * p,mp_limb_t * rp,mp_limb_t * xp)73 ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
74 {
75 mp_limb_t u1, u0;
76 mp_size_t n;
77
78 n = 2*p->size;
79 u1 = xp[--n];
80 u0 = xp[n-1];
81
82 /* This is not particularly fast, but should work well with assembly implementation. */
83 for (; n >= p->size; n--)
84 {
85 mp_limb_t q2, q1, q0, t, cy;
86
87 /* <q2, q1, q0> = v * u1 + <u1,u0>, with v = 2^32 - 1:
88
89 +---+---+
90 | u1| u0|
91 +---+---+
92 |-u1|
93 +-+-+-+
94 | u1|
95 +---+-+-+-+-+
96 | q2| q1| q0|
97 +---+---+---+
98 */
99 q1 = u1 - (u1 > u0);
100 q0 = u0 - u1;
101 t = u1 << 32;
102 q0 += t;
103 t = (u1 >> 32) + (q0 < t) + 1;
104 q1 += t;
105 q2 = q1 < t;
106
107 /* Compute candidate remainder */
108 u1 = u0 + (q1 << 32) - q1;
109 t = -(mp_limb_t) (u1 > q0);
110 u1 -= t & 0xffffffff;
111 q1 += t;
112 q2 += t + (q1 < t);
113
114 assert (q2 < 2);
115
116 /*
117 n-1 n-2 n-3 n-4
118 +---+---+---+---+
119 | u1| u0| u low |
120 +---+---+---+---+
121 - | q1(2^96-1)|
122 +-------+---+
123 |q2(2^.)|
124 +-------+
125
126 We multiply by two low limbs of p, 2^96 - 1, so we could use
127 shifts rather than mul.
128 */
129 t = mpn_submul_1 (xp + n - 4, p->m, 2, q1);
130 t += mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, p->m, 1);
131 t += (-q2) & 0xffffffff;
132
133 u0 = xp[n-2];
134 cy = (u0 < t);
135 u0 -= t;
136 t = (u1 < cy);
137 u1 -= cy;
138
139 cy = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, p->m, 2);
140 u0 += cy;
141 u1 += (u0 < cy);
142 u1 -= (-t) & 0xffffffff;
143 }
144 rp[0] = xp[0];
145 rp[1] = xp[1];
146 rp[2] = u0;
147 rp[3] = u1;
148 }
149
150 static void
ecc_secp256r1_modq(const struct ecc_modulo * q,mp_limb_t * rp,mp_limb_t * xp)151 ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp)
152 {
153 mp_limb_t u2, u1, u0;
154 mp_size_t n;
155
156 n = 2*q->size;
157 u2 = xp[--n];
158 u1 = xp[n-1];
159
160 /* This is not particularly fast, but should work well with assembly implementation. */
161 for (; n >= q->size; n--)
162 {
163 mp_limb_t q2, q1, q0, t, c1, c0;
164
165 u0 = xp[n-2];
166
167 /* <q2, q1, q0> = v * u2 + <u2,u1>, same method as above.
168
169 +---+---+
170 | u2| u1|
171 +---+---+
172 |-u2|
173 +-+-+-+
174 | u2|
175 +---+-+-+-+-+
176 | q2| q1| q0|
177 +---+---+---+
178 */
179 q1 = u2 - (u2 > u1);
180 q0 = u1 - u2;
181 t = u2 << 32;
182 q0 += t;
183 t = (u2 >> 32) + (q0 < t) + 1;
184 q1 += t;
185 q2 = q1 < t;
186
187 /* Compute candidate remainder, <u1, u0> - <q2, q1> * (2^128 - 2^96 + 2^64 - 1)
188 <u1, u0> + 2^64 q2 + (2^96 - 2^64 + 1) q1 (mod 2^128)
189
190 +---+---+
191 | u1| u0|
192 +---+---+
193 | q2| q1|
194 +---+---+
195 |-q1|
196 +-+-+-+
197 | q1|
198 --+-+-+-+---+
199 | u2| u1|
200 +---+---+
201 */
202 u2 = u1 + q2 - q1;
203 u1 = u0 + q1;
204 u2 += (u1 < q1);
205 u2 += (q1 << 32);
206
207 t = -(mp_limb_t) (u2 >= q0);
208 q1 += t;
209 q2 += t + (q1 < t);
210 u1 += t;
211 u2 += (t << 32) + (u1 < t);
212
213 assert (q2 < 2);
214
215 c0 = mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, q->m, 1);
216 c0 += (-q2) & q->m[1];
217 t = mpn_submul_1 (xp + n - 4, q->m, 2, q1);
218 c0 += t;
219 c1 = c0 < t;
220
221 /* Construct underflow condition. */
222 c1 += (u1 < c0);
223 t = - (mp_limb_t) (u2 < c1);
224
225 u1 -= c0;
226 u2 -= c1;
227
228 /* Conditional add of p */
229 u1 += t;
230 u2 += (t<<32) + (u1 < t);
231
232 t = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, q->m, 2);
233 u1 += t;
234 u2 += (u1 < t);
235 }
236 rp[0] = xp[0];
237 rp[1] = xp[1];
238 rp[2] = u1;
239 rp[3] = u2;
240 }
241
242 #else
243 #error Unsupported parameters
244 #endif
245
246 #define ECC_SECP256R1_INV_ITCH (4*ECC_LIMB_SIZE)
247
248 static void
ecc_secp256r1_inv(const struct ecc_modulo * p,mp_limb_t * rp,const mp_limb_t * ap,mp_limb_t * scratch)249 ecc_secp256r1_inv (const struct ecc_modulo *p,
250 mp_limb_t *rp, const mp_limb_t *ap,
251 mp_limb_t *scratch)
252 {
253 #define a5m1 scratch
254 #define t0 (scratch + ECC_LIMB_SIZE)
255 #define a15m1 t0
256 #define a32m1 a5m1
257 #define tp (scratch + 2*ECC_LIMB_SIZE)
258 /*
259 Addition chain for p - 2 = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 3
260
261 2^5 - 1 = 1 + 2 (2^4 - 1) = 1 + 2 (2^2+1)(2 + 1) 4 S + 3 M
262 2^{15} - 1 = (2^5 - 1) (1 + 2^5 (1 + 2^5) 10 S + 2 M
263 2^{16} - 1 = 1 + 2 (2^{15} - 1) S + M
264 2^{32} - 1 = (2^{16} + 1) (2^{16} - 1) 16 S + M
265 2^{64} - 2^{32} + 1 = 2^{32} (2^{32} - 1) + 1 32 S + M
266 2^{192} - 2^{160} + 2^{128} + 2^{32} - 1
267 = 2^{128} (2^{64} - 2^{32} + 1) + 2^{32} - 1 128 S + M
268 2^{224} - 2^{192} + 2^{160} + 2^{64} - 1
269 = 2^{32} (...) + 2^{32} - 1 32 S + M
270 2^{239} - 2^{207} + 2^{175} + 2^{79} - 1
271 = 2^{15} (...) + 2^{15} - 1 15 S + M
272 2^{254} - 2^{222} + 2^{190} + 2^{94} - 1
273 = 2^{15} (...) + 2^{15} - 1 15 S + M
274 p - 2 = 2^2 (...) + 1 2 S M
275 ---------------
276 255 S + 13 M
277 */
278 ecc_mod_sqr (p, rp, ap, tp); /* a^2 */
279 ecc_mod_mul (p, rp, rp, ap, tp); /* a^3 */
280 ecc_mod_pow_2kp1 (p, t0, rp, 2, tp); /* a^{2^4 - 1} */
281 ecc_mod_sqr (p, rp, t0, tp); /* a^{2^5 - 2} */
282 ecc_mod_mul (p, a5m1, rp, ap, tp); /* a^{2^5 - 1}, a5m1 */
283
284 ecc_mod_pow_2kp1 (p, rp, a5m1, 5, tp); /* a^{2^{10} - 1, a5m1*/
285 ecc_mod_pow_2k_mul (p, a15m1, rp, 5, a5m1, tp); /* a^{2^{15} - 1}, a5m1 a15m1 */
286 ecc_mod_sqr (p, rp, a15m1, tp); /* a^{2^{16} - 2}, a15m1 */
287 ecc_mod_mul (p, rp, rp, ap, tp); /* a^{2^{16} - 1}, a15m1 */
288 ecc_mod_pow_2kp1 (p, a32m1, rp, 16, tp); /* a^{2^{32} - 1}, a15m1, a32m1 */
289
290 ecc_mod_pow_2k_mul (p, rp, a32m1, 32, ap, tp);/* a^{2^{64} - 2^{32} + 1 */
291 ecc_mod_pow_2k_mul (p, rp, rp, 128, a32m1, tp); /* a^{2^{192} - 2^{160} + 2^{128} + 2^{32} - 1} */
292 ecc_mod_pow_2k_mul (p, rp, rp, 32, a32m1, tp);/* a^{2^{224} - 2^{192} + 2^{160} + 2^{64} - 1} */
293 ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{239} - 2^{207} + 2^{175} + 2^{79} - 1} */
294 ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{254} - 2^{222} + 2^{190} + 2^{94} - 1} */
295 ecc_mod_pow_2k_mul (p, rp, rp, 2, ap, tp); /* a^{2^{256} - 2^{224} + 2^{192} + 2^{96} - 3} */
296 }
297
298 const struct ecc_curve _nettle_secp_256r1 =
299 {
300 {
301 256,
302 ECC_LIMB_SIZE,
303 ECC_BMODP_SIZE,
304 ECC_REDC_SIZE,
305 ECC_SECP256R1_INV_ITCH,
306 0,
307
308 ecc_p,
309 ecc_Bmodp,
310 ecc_Bmodp_shifted,
311 ecc_redc_ppm1,
312 ecc_pp1h,
313
314 ecc_secp256r1_modp,
315 USE_REDC ? ecc_secp256r1_redc : ecc_secp256r1_modp,
316 ecc_secp256r1_inv,
317 NULL,
318 },
319 {
320 256,
321 ECC_LIMB_SIZE,
322 ECC_BMODQ_SIZE,
323 0,
324 ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
325 0,
326
327 ecc_q,
328 ecc_Bmodq,
329 ecc_Bmodq_shifted,
330 NULL,
331 ecc_qp1h,
332
333 ecc_secp256r1_modq,
334 ecc_secp256r1_modq,
335 ecc_mod_inv,
336 NULL,
337 },
338
339 USE_REDC,
340 ECC_PIPPENGER_K,
341 ECC_PIPPENGER_C,
342
343 ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE),
344 ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE),
345 ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE),
346 ECC_MUL_A_ITCH (ECC_LIMB_SIZE),
347 ECC_MUL_G_ITCH (ECC_LIMB_SIZE),
348 ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_SECP256R1_INV_ITCH),
349
350 ecc_add_jja,
351 ecc_add_jjj,
352 ecc_dup_jj,
353 ecc_mul_a,
354 ecc_mul_g,
355 ecc_j_to_a,
356
357 ecc_b,
358 ecc_unit,
359 ecc_table
360 };
361
nettle_get_secp_256r1(void)362 const struct ecc_curve *nettle_get_secp_256r1(void)
363 {
364 return &_nettle_secp_256r1;
365 }
366