1 /* ecc-secp256r1.c
2 
3    Compile time constant (but machine dependent) tables.
4 
5    Copyright (C) 2013, 2014 Niels Möller
6 
7    This file is part of GNU Nettle.
8 
9    GNU Nettle is free software: you can redistribute it and/or
10    modify it under the terms of either:
11 
12      * the GNU Lesser General Public License as published by the Free
13        Software Foundation; either version 3 of the License, or (at your
14        option) any later version.
15 
16    or
17 
18      * the GNU General Public License as published by the Free
19        Software Foundation; either version 2 of the License, or (at your
20        option) any later version.
21 
22    or both in parallel, as here.
23 
24    GNU Nettle is distributed in the hope that it will be useful,
25    but WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    General Public License for more details.
28 
29    You should have received copies of the GNU General Public License and
30    the GNU Lesser General Public License along with this program.  If
31    not, see http://www.gnu.org/licenses/.
32 */
33 
34 /* Development of Nettle's ECC support was funded by the .SE Internet Fund. */
35 
36 #if HAVE_CONFIG_H
37 # include "config.h"
38 #endif
39 
40 #include <assert.h>
41 
42 #include "ecc.h"
43 #include "ecc-internal.h"
44 
45 #if HAVE_NATIVE_ecc_secp256r1_redc
46 # define USE_REDC 1
47 #else
48 # define USE_REDC (ECC_REDC_SIZE != 0)
49 #endif
50 
51 #include "ecc-secp256r1.h"
52 
53 #if HAVE_NATIVE_ecc_secp256r1_redc
54 # define ecc_secp256r1_redc _nettle_ecc_secp256r1_redc
55 void
56 ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp);
57 #else /* !HAVE_NATIVE_ecc_secp256r1_redc */
58 # if ECC_REDC_SIZE > 0
59 #   define ecc_secp256r1_redc ecc_pp1_redc
60 # elif ECC_REDC_SIZE == 0
61 #   define ecc_secp256r1_redc NULL
62 # else
63 #  error Configuration error
64 # endif
65 #endif /* !HAVE_NATIVE_ecc_secp256r1_redc */
66 
67 #if ECC_BMODP_SIZE < ECC_LIMB_SIZE
68 #define ecc_secp256r1_modp ecc_mod
69 #define ecc_secp256r1_modq ecc_mod
70 #elif GMP_NUMB_BITS == 64
71 
72 static void
ecc_secp256r1_modp(const struct ecc_modulo * p,mp_limb_t * rp,mp_limb_t * xp)73 ecc_secp256r1_modp (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp)
74 {
75   mp_limb_t u1, u0;
76   mp_size_t n;
77 
78   n = 2*p->size;
79   u1 = xp[--n];
80   u0 = xp[n-1];
81 
82   /* This is not particularly fast, but should work well with assembly implementation. */
83   for (; n >= p->size; n--)
84     {
85       mp_limb_t q2, q1, q0, t, cy;
86 
87       /* <q2, q1, q0> = v * u1 + <u1,u0>, with v = 2^32 - 1:
88 
89 	   +---+---+
90 	   | u1| u0|
91 	   +---+---+
92 	       |-u1|
93 	     +-+-+-+
94 	     | u1|
95        +---+-+-+-+-+
96        | q2| q1| q0|
97        +---+---+---+
98       */
99       q1 = u1 - (u1 > u0);
100       q0 = u0 - u1;
101       t = u1 << 32;
102       q0 += t;
103       t = (u1 >> 32) + (q0 < t) + 1;
104       q1 += t;
105       q2 = q1 < t;
106 
107       /* Compute candidate remainder */
108       u1 = u0 + (q1 << 32) - q1;
109       t = -(mp_limb_t) (u1 > q0);
110       u1 -= t & 0xffffffff;
111       q1 += t;
112       q2 += t + (q1 < t);
113 
114       assert (q2 < 2);
115 
116       /*
117 	 n-1 n-2 n-3 n-4
118 	+---+---+---+---+
119 	| u1| u0| u low |
120 	+---+---+---+---+
121 	  - | q1(2^96-1)|
122 	    +-------+---+
123 	    |q2(2^.)|
124 	    +-------+
125 
126 	 We multiply by two low limbs of p, 2^96 - 1, so we could use
127 	 shifts rather than mul.
128       */
129       t = mpn_submul_1 (xp + n - 4, p->m, 2, q1);
130       t += mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, p->m, 1);
131       t += (-q2) & 0xffffffff;
132 
133       u0 = xp[n-2];
134       cy = (u0 < t);
135       u0 -= t;
136       t = (u1 < cy);
137       u1 -= cy;
138 
139       cy = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, p->m, 2);
140       u0 += cy;
141       u1 += (u0 < cy);
142       u1 -= (-t) & 0xffffffff;
143     }
144   rp[0] = xp[0];
145   rp[1] = xp[1];
146   rp[2] = u0;
147   rp[3] = u1;
148 }
149 
150 static void
ecc_secp256r1_modq(const struct ecc_modulo * q,mp_limb_t * rp,mp_limb_t * xp)151 ecc_secp256r1_modq (const struct ecc_modulo *q, mp_limb_t *rp, mp_limb_t *xp)
152 {
153   mp_limb_t u2, u1, u0;
154   mp_size_t n;
155 
156   n = 2*q->size;
157   u2 = xp[--n];
158   u1 = xp[n-1];
159 
160   /* This is not particularly fast, but should work well with assembly implementation. */
161   for (; n >= q->size; n--)
162     {
163       mp_limb_t q2, q1, q0, t, c1, c0;
164 
165       u0 = xp[n-2];
166 
167       /* <q2, q1, q0> = v * u2 + <u2,u1>, same method as above.
168 
169 	   +---+---+
170 	   | u2| u1|
171 	   +---+---+
172 	       |-u2|
173 	     +-+-+-+
174 	     | u2|
175        +---+-+-+-+-+
176        | q2| q1| q0|
177        +---+---+---+
178       */
179       q1 = u2 - (u2 > u1);
180       q0 = u1 - u2;
181       t = u2 << 32;
182       q0 += t;
183       t = (u2 >> 32) + (q0 < t) + 1;
184       q1 += t;
185       q2 = q1 < t;
186 
187       /* Compute candidate remainder, <u1, u0> - <q2, q1> * (2^128 - 2^96 + 2^64 - 1)
188 	 <u1, u0> + 2^64 q2 + (2^96 - 2^64 + 1) q1 (mod 2^128)
189 
190 	   +---+---+
191 	   | u1| u0|
192 	   +---+---+
193 	   | q2| q1|
194 	   +---+---+
195 	   |-q1|
196 	 +-+-+-+
197 	 | q1|
198        --+-+-+-+---+
199 	   | u2| u1|
200 	   +---+---+
201       */
202       u2 = u1 + q2 - q1;
203       u1 = u0 + q1;
204       u2 += (u1 < q1);
205       u2 += (q1 << 32);
206 
207       t = -(mp_limb_t) (u2 >= q0);
208       q1 += t;
209       q2 += t + (q1 < t);
210       u1 += t;
211       u2 += (t << 32) + (u1 < t);
212 
213       assert (q2 < 2);
214 
215       c0 = mpn_cnd_sub_n (q2, xp + n - 3, xp + n - 3, q->m, 1);
216       c0 += (-q2) & q->m[1];
217       t = mpn_submul_1 (xp + n - 4, q->m, 2, q1);
218       c0 += t;
219       c1 = c0 < t;
220 
221       /* Construct underflow condition. */
222       c1 += (u1 < c0);
223       t = - (mp_limb_t) (u2 < c1);
224 
225       u1 -= c0;
226       u2 -= c1;
227 
228       /* Conditional add of p */
229       u1 += t;
230       u2 += (t<<32) + (u1 < t);
231 
232       t = mpn_cnd_add_n (t, xp + n - 4, xp + n - 4, q->m, 2);
233       u1 += t;
234       u2 += (u1 < t);
235     }
236   rp[0] = xp[0];
237   rp[1] = xp[1];
238   rp[2] = u1;
239   rp[3] = u2;
240 }
241 
242 #else
243 #error Unsupported parameters
244 #endif
245 
246 #define ECC_SECP256R1_INV_ITCH (4*ECC_LIMB_SIZE)
247 
248 static void
ecc_secp256r1_inv(const struct ecc_modulo * p,mp_limb_t * rp,const mp_limb_t * ap,mp_limb_t * scratch)249 ecc_secp256r1_inv (const struct ecc_modulo *p,
250 		   mp_limb_t *rp, const mp_limb_t *ap,
251 		   mp_limb_t *scratch)
252 {
253 #define a5m1 scratch
254 #define t0 (scratch + ECC_LIMB_SIZE)
255 #define a15m1 t0
256 #define a32m1 a5m1
257 #define tp (scratch + 2*ECC_LIMB_SIZE)
258 /*
259    Addition chain for p - 2 = 2^{256} - 2^{224} + 2^{192} + 2^{96} - 3
260 
261     2^5 - 1 = 1 + 2 (2^4 - 1) = 1 + 2 (2^2+1)(2 + 1)    4 S + 3 M
262     2^{15} - 1 = (2^5 - 1) (1 + 2^5 (1 + 2^5)          10 S + 2 M
263     2^{16} - 1 = 1 + 2 (2^{15} - 1)                       S +   M
264     2^{32} - 1 = (2^{16} + 1) (2^{16} - 1)             16 S +   M
265     2^{64} - 2^{32} + 1 = 2^{32} (2^{32} - 1) + 1      32 S +   M
266     2^{192} - 2^{160} + 2^{128} + 2^{32} - 1
267         = 2^{128} (2^{64} - 2^{32} + 1) + 2^{32} - 1  128 S +   M
268     2^{224} - 2^{192} + 2^{160} + 2^{64} - 1
269         = 2^{32} (...) + 2^{32} - 1                    32 S +   M
270     2^{239} - 2^{207} + 2^{175} + 2^{79} - 1
271         = 2^{15} (...) + 2^{15} - 1                    15 S +   M
272     2^{254} - 2^{222} + 2^{190} + 2^{94} - 1
273         = 2^{15} (...) + 2^{15} - 1                    15 S +   M
274     p - 2 = 2^2 (...) + 1                               2 S     M
275                                                    ---------------
276 						      255 S + 13 M
277  */
278   ecc_mod_sqr (p, rp, ap, tp);			/* a^2 */
279   ecc_mod_mul (p, rp, rp, ap, tp);		/* a^3 */
280   ecc_mod_pow_2kp1 (p, t0, rp, 2, tp);		/* a^{2^4 - 1} */
281   ecc_mod_sqr (p, rp, t0, tp);			/* a^{2^5 - 2} */
282   ecc_mod_mul (p, a5m1, rp, ap, tp);		/* a^{2^5 - 1}, a5m1 */
283 
284   ecc_mod_pow_2kp1 (p, rp, a5m1, 5, tp);	/* a^{2^{10} - 1, a5m1*/
285   ecc_mod_pow_2k_mul (p, a15m1, rp, 5, a5m1, tp); /* a^{2^{15} - 1}, a5m1 a15m1 */
286   ecc_mod_sqr (p, rp, a15m1, tp);		/* a^{2^{16} - 2}, a15m1 */
287   ecc_mod_mul (p, rp, rp, ap, tp);		/* a^{2^{16} - 1}, a15m1 */
288   ecc_mod_pow_2kp1 (p, a32m1, rp, 16, tp);	/* a^{2^{32} - 1}, a15m1, a32m1 */
289 
290   ecc_mod_pow_2k_mul (p, rp, a32m1, 32, ap, tp);/* a^{2^{64} - 2^{32} + 1 */
291   ecc_mod_pow_2k_mul (p, rp, rp, 128, a32m1, tp); /* a^{2^{192} - 2^{160} + 2^{128} + 2^{32} - 1} */
292   ecc_mod_pow_2k_mul (p, rp, rp, 32, a32m1, tp);/* a^{2^{224} - 2^{192} + 2^{160} + 2^{64} - 1} */
293   ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{239} - 2^{207} + 2^{175} + 2^{79} - 1} */
294   ecc_mod_pow_2k_mul (p, rp, rp, 15, a15m1, tp);/* a^{2^{254} - 2^{222} + 2^{190} + 2^{94} - 1} */
295   ecc_mod_pow_2k_mul (p, rp, rp, 2, ap, tp); 	/* a^{2^{256} - 2^{224} + 2^{192} + 2^{96} - 3} */
296 }
297 
298 const struct ecc_curve _nettle_secp_256r1 =
299 {
300   {
301     256,
302     ECC_LIMB_SIZE,
303     ECC_BMODP_SIZE,
304     ECC_REDC_SIZE,
305     ECC_SECP256R1_INV_ITCH,
306     0,
307 
308     ecc_p,
309     ecc_Bmodp,
310     ecc_Bmodp_shifted,
311     ecc_redc_ppm1,
312     ecc_pp1h,
313 
314     ecc_secp256r1_modp,
315     USE_REDC ? ecc_secp256r1_redc : ecc_secp256r1_modp,
316     ecc_secp256r1_inv,
317     NULL,
318   },
319   {
320     256,
321     ECC_LIMB_SIZE,
322     ECC_BMODQ_SIZE,
323     0,
324     ECC_MOD_INV_ITCH (ECC_LIMB_SIZE),
325     0,
326 
327     ecc_q,
328     ecc_Bmodq,
329     ecc_Bmodq_shifted,
330     NULL,
331     ecc_qp1h,
332 
333     ecc_secp256r1_modq,
334     ecc_secp256r1_modq,
335     ecc_mod_inv,
336     NULL,
337   },
338 
339   USE_REDC,
340   ECC_PIPPENGER_K,
341   ECC_PIPPENGER_C,
342 
343   ECC_ADD_JJA_ITCH (ECC_LIMB_SIZE),
344   ECC_ADD_JJJ_ITCH (ECC_LIMB_SIZE),
345   ECC_DUP_JJ_ITCH (ECC_LIMB_SIZE),
346   ECC_MUL_A_ITCH (ECC_LIMB_SIZE),
347   ECC_MUL_G_ITCH (ECC_LIMB_SIZE),
348   ECC_J_TO_A_ITCH(ECC_LIMB_SIZE, ECC_SECP256R1_INV_ITCH),
349 
350   ecc_add_jja,
351   ecc_add_jjj,
352   ecc_dup_jj,
353   ecc_mul_a,
354   ecc_mul_g,
355   ecc_j_to_a,
356 
357   ecc_b,
358   ecc_unit,
359   ecc_table
360 };
361 
nettle_get_secp_256r1(void)362 const struct ecc_curve *nettle_get_secp_256r1(void)
363 {
364   return &_nettle_secp_256r1;
365 }
366