1 /*
2  * Copyright (c) 2007, 2011, Oracle and/or its affiliates. All rights reserved.
3  * Use is subject to license terms.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this library; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /* *********************************************************************
25  *
26  * The Original Code is the elliptic curve math library for prime field curves.
27  *
28  * The Initial Developer of the Original Code is
29  * Sun Microsystems, Inc.
30  * Portions created by the Initial Developer are Copyright (C) 2003
31  * the Initial Developer. All Rights Reserved.
32  *
33  * Contributor(s):
34  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
35  *
36  *********************************************************************** */
37 
38 #include "ecp.h"
39 #include "mpi.h"
40 #include "mplogic.h"
41 #include "mpi-priv.h"
42 #ifndef _KERNEL
43 #include <stdlib.h>
44 #endif
45 
46 #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
47 
48 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
49  * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
50  * Implementation of the NIST Elliptic Curves over Prime Fields. */
51 mp_err
ec_GFp_nistp192_mod(const mp_int * a,mp_int * r,const GFMethod * meth)52 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
53 {
54         mp_err res = MP_OKAY;
55         mp_size a_used = MP_USED(a);
56         mp_digit r3;
57 #ifndef MPI_AMD64_ADD
58         mp_digit carry;
59 #endif
60 #ifdef ECL_THIRTY_TWO_BIT
61         mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
62         mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
63 #else
64         mp_digit a5 = 0, a4 = 0, a3 = 0;
65         mp_digit r0, r1, r2;
66 #endif
67 
68         /* reduction not needed if a is not larger than field size */
69         if (a_used < ECP192_DIGITS) {
70                 if (a == r) {
71                         return MP_OKAY;
72                 }
73                 return mp_copy(a, r);
74         }
75 
76         /* for polynomials larger than twice the field size, use regular
77          * reduction */
78         if (a_used > ECP192_DIGITS*2) {
79                 MP_CHECKOK(mp_mod(a, &meth->irr, r));
80         } else {
81                 /* copy out upper words of a */
82 
83 #ifdef ECL_THIRTY_TWO_BIT
84 
85                 /* in all the math below,
86                  * nXb is most signifiant, nXa is least significant */
87                 switch (a_used) {
88                 case 12:
89                         a5b = MP_DIGIT(a, 11);
90                 case 11:
91                         a5a = MP_DIGIT(a, 10);
92                 case 10:
93                         a4b = MP_DIGIT(a, 9);
94                 case 9:
95                         a4a = MP_DIGIT(a, 8);
96                 case 8:
97                         a3b = MP_DIGIT(a, 7);
98                 case 7:
99                         a3a = MP_DIGIT(a, 6);
100                 }
101 
102 
103                 r2b= MP_DIGIT(a, 5);
104                 r2a= MP_DIGIT(a, 4);
105                 r1b = MP_DIGIT(a, 3);
106                 r1a = MP_DIGIT(a, 2);
107                 r0b = MP_DIGIT(a, 1);
108                 r0a = MP_DIGIT(a, 0);
109 
110                 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
111                 MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
112                 MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
113                 MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
114                 MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
115                 MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
116                 MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
117                 r3 = carry; carry = 0;
118                 MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
119                 MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
120                 MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
121                 MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
122                 MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
123                 MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
124                 r3 += carry;
125                 MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
126                 MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
127                 MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
128                 MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
129                 r3 += carry;
130 
131                 /* reduce out the carry */
132                 while (r3) {
133                         MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
134                         MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
135                         MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
136                         MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
137                         MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
138                         MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
139                         r3 = carry;
140                 }
141 
142                 /* check for final reduction */
143                 /*
144                  * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
145                  * 0xffffffffffffffff. That means we can only be over and need
146                  * one more reduction
147                  *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
148                  *     and
149                  *     r1 == 0xffffffffffffffffff   or
150                  *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
151                  * In all cases, we subtract the field (or add the 2's
152                  * complement value (1,1,0)).  (r0, r1, r2)
153                  */
154                 if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
155                         && (r1b == 0xffffffff) ) &&
156                            ((r1a == 0xffffffff) ||
157                             (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
158                                         (r0b == 0xffffffff)) ) {
159                         /* do a quick subtract */
160                         MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
161                         r0b += carry;
162                         r1a = r1b = r2a = r2b = 0;
163                 }
164 
165                 /* set the lower words of r */
166                 if (a != r) {
167                         MP_CHECKOK(s_mp_pad(r, 6));
168                 }
169                 MP_DIGIT(r, 5) = r2b;
170                 MP_DIGIT(r, 4) = r2a;
171                 MP_DIGIT(r, 3) = r1b;
172                 MP_DIGIT(r, 2) = r1a;
173                 MP_DIGIT(r, 1) = r0b;
174                 MP_DIGIT(r, 0) = r0a;
175                 MP_USED(r) = 6;
176 #else
177                 switch (a_used) {
178                 case 6:
179                         a5 = MP_DIGIT(a, 5);
180                 case 5:
181                         a4 = MP_DIGIT(a, 4);
182                 case 4:
183                         a3 = MP_DIGIT(a, 3);
184                 }
185 
186                 r2 = MP_DIGIT(a, 2);
187                 r1 = MP_DIGIT(a, 1);
188                 r0 = MP_DIGIT(a, 0);
189 
190                 /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
191 #ifndef MPI_AMD64_ADD
192                 MP_ADD_CARRY_ZERO(r0, a3, r0, carry);
193                 MP_ADD_CARRY(r1, a3, r1, carry, carry);
194                 MP_ADD_CARRY(r2, a4, r2, carry, carry);
195                 r3 = carry;
196                 MP_ADD_CARRY_ZERO(r0, a5, r0, carry);
197                 MP_ADD_CARRY(r1, a5, r1, carry, carry);
198                 MP_ADD_CARRY(r2, a5, r2, carry, carry);
199                 r3 += carry;
200                 MP_ADD_CARRY_ZERO(r1, a4, r1, carry);
201                 MP_ADD_CARRY(r2,  0, r2, carry, carry);
202                 r3 += carry;
203 
204 #else
205                 r2 = MP_DIGIT(a, 2);
206                 r1 = MP_DIGIT(a, 1);
207                 r0 = MP_DIGIT(a, 0);
208 
209                 /* set the lower words of r */
210                 __asm__ (
211                 "xorq   %3,%3           \n\t"
212                 "addq   %4,%0           \n\t"
213                 "adcq   %4,%1           \n\t"
214                 "adcq   %5,%2           \n\t"
215                 "adcq   $0,%3           \n\t"
216                 "addq   %6,%0           \n\t"
217                 "adcq   %6,%1           \n\t"
218                 "adcq   %6,%2           \n\t"
219                 "adcq   $0,%3           \n\t"
220                 "addq   %5,%1           \n\t"
221                 "adcq   $0,%2           \n\t"
222                 "adcq   $0,%3           \n\t"
223                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
224                   "=r"(a4), "=r"(a5)
225                 : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
226                   "4" (a3), "5" (a4), "6"(a5)
227                 : "%cc" );
228 #endif
229 
230                 /* reduce out the carry */
231                 while (r3) {
232 #ifndef MPI_AMD64_ADD
233                         MP_ADD_CARRY_ZERO(r0, r3, r0, carry);
234                         MP_ADD_CARRY(r1, r3, r1, carry, carry);
235                         MP_ADD_CARRY(r2,  0, r2, carry, carry);
236                         r3 = carry;
237 #else
238                         a3=r3;
239                         __asm__ (
240                         "xorq   %3,%3           \n\t"
241                         "addq   %4,%0           \n\t"
242                         "adcq   %4,%1           \n\t"
243                         "adcq   $0,%2           \n\t"
244                         "adcq   $0,%3           \n\t"
245                         : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
246                         : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
247                         : "%cc" );
248 #endif
249                 }
250 
251                 /* check for final reduction */
252                 /*
253                  * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
254                  * 0xffffffffffffffff. That means we can only be over and need
255                  * one more reduction
256                  *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
257                  *     and
258                  *     r1 == 0xffffffffffffffffff   or
259                  *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
260                  * In all cases, we subtract the field (or add the 2's
261                  * complement value (1,1,0)).  (r0, r1, r2)
262                  */
263                 if (r3 || ((r2 == MP_DIGIT_MAX) &&
264                       ((r1 == MP_DIGIT_MAX) ||
265                         ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
266                         /* do a quick subtract */
267                         r0++;
268                         r1 = r2 = 0;
269                 }
270                 /* set the lower words of r */
271                 if (a != r) {
272                         MP_CHECKOK(s_mp_pad(r, 3));
273                 }
274                 MP_DIGIT(r, 2) = r2;
275                 MP_DIGIT(r, 1) = r1;
276                 MP_DIGIT(r, 0) = r0;
277                 MP_USED(r) = 3;
278 #endif
279         }
280 
281   CLEANUP:
282         return res;
283 }
284 
285 #ifndef ECL_THIRTY_TWO_BIT
286 /* Compute the sum of 192 bit curves. Do the work in-line since the
287  * number of words are so small, we don't want to overhead of mp function
288  * calls.  Uses optimized modular reduction for p192.
289  */
290 mp_err
ec_GFp_nistp192_add(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)291 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
292                         const GFMethod *meth)
293 {
294         mp_err res = MP_OKAY;
295         mp_digit a0 = 0, a1 = 0, a2 = 0;
296         mp_digit r0 = 0, r1 = 0, r2 = 0;
297         mp_digit carry;
298 
299         switch(MP_USED(a)) {
300         case 3:
301                 a2 = MP_DIGIT(a,2);
302         case 2:
303                 a1 = MP_DIGIT(a,1);
304         case 1:
305                 a0 = MP_DIGIT(a,0);
306         }
307         switch(MP_USED(b)) {
308         case 3:
309                 r2 = MP_DIGIT(b,2);
310         case 2:
311                 r1 = MP_DIGIT(b,1);
312         case 1:
313                 r0 = MP_DIGIT(b,0);
314         }
315 
316 #ifndef MPI_AMD64_ADD
317         MP_ADD_CARRY_ZERO(a0, r0, r0, carry);
318         MP_ADD_CARRY(a1, r1, r1, carry, carry);
319         MP_ADD_CARRY(a2, r2, r2, carry, carry);
320 #else
321         __asm__ (
322                 "xorq   %3,%3           \n\t"
323                 "addq   %4,%0           \n\t"
324                 "adcq   %5,%1           \n\t"
325                 "adcq   %6,%2           \n\t"
326                 "adcq   $0,%3           \n\t"
327                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
328                 : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
329                   "1" (r1), "2" (r2)
330                 : "%cc" );
331 #endif
332 
333         /* Do quick 'subract' if we've gone over
334          * (add the 2's complement of the curve field) */
335         if (carry || ((r2 == MP_DIGIT_MAX) &&
336                       ((r1 == MP_DIGIT_MAX) ||
337                         ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
338 #ifndef MPI_AMD64_ADD
339                 MP_ADD_CARRY_ZERO(r0, 1, r0, carry);
340                 MP_ADD_CARRY(r1, 1, r1, carry, carry);
341                 MP_ADD_CARRY(r2, 0, r2, carry, carry);
342 #else
343                 __asm__ (
344                         "addq   $1,%0           \n\t"
345                         "adcq   $1,%1           \n\t"
346                         "adcq   $0,%2           \n\t"
347                         : "=r"(r0), "=r"(r1), "=r"(r2)
348                         : "0" (r0), "1" (r1), "2" (r2)
349                         : "%cc" );
350 #endif
351         }
352 
353 
354         MP_CHECKOK(s_mp_pad(r, 3));
355         MP_DIGIT(r, 2) = r2;
356         MP_DIGIT(r, 1) = r1;
357         MP_DIGIT(r, 0) = r0;
358         MP_SIGN(r) = MP_ZPOS;
359         MP_USED(r) = 3;
360         s_mp_clamp(r);
361 
362 
363   CLEANUP:
364         return res;
365 }
366 
367 /* Compute the diff of 192 bit curves. Do the work in-line since the
368  * number of words are so small, we don't want to overhead of mp function
369  * calls.  Uses optimized modular reduction for p192.
370  */
371 mp_err
ec_GFp_nistp192_sub(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)372 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
373                         const GFMethod *meth)
374 {
375         mp_err res = MP_OKAY;
376         mp_digit b0 = 0, b1 = 0, b2 = 0;
377         mp_digit r0 = 0, r1 = 0, r2 = 0;
378         mp_digit borrow;
379 
380         switch(MP_USED(a)) {
381         case 3:
382                 r2 = MP_DIGIT(a,2);
383         case 2:
384                 r1 = MP_DIGIT(a,1);
385         case 1:
386                 r0 = MP_DIGIT(a,0);
387         }
388 
389         switch(MP_USED(b)) {
390         case 3:
391                 b2 = MP_DIGIT(b,2);
392         case 2:
393                 b1 = MP_DIGIT(b,1);
394         case 1:
395                 b0 = MP_DIGIT(b,0);
396         }
397 
398 #ifndef MPI_AMD64_ADD
399         MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
400         MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
401         MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
402 #else
403         __asm__ (
404                 "xorq   %3,%3           \n\t"
405                 "subq   %4,%0           \n\t"
406                 "sbbq   %5,%1           \n\t"
407                 "sbbq   %6,%2           \n\t"
408                 "adcq   $0,%3           \n\t"
409                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
410                 : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
411                   "1" (r1), "2" (r2)
412                 : "%cc" );
413 #endif
414 
415         /* Do quick 'add' if we've gone under 0
416          * (subtract the 2's complement of the curve field) */
417         if (borrow) {
418 #ifndef MPI_AMD64_ADD
419                 MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
420                 MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
421                 MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
422 #else
423                 __asm__ (
424                         "subq   $1,%0           \n\t"
425                         "sbbq   $1,%1           \n\t"
426                         "sbbq   $0,%2           \n\t"
427                         : "=r"(r0), "=r"(r1), "=r"(r2)
428                         : "0" (r0), "1" (r1), "2" (r2)
429                         : "%cc" );
430 #endif
431         }
432 
433         MP_CHECKOK(s_mp_pad(r, 3));
434         MP_DIGIT(r, 2) = r2;
435         MP_DIGIT(r, 1) = r1;
436         MP_DIGIT(r, 0) = r0;
437         MP_SIGN(r) = MP_ZPOS;
438         MP_USED(r) = 3;
439         s_mp_clamp(r);
440 
441   CLEANUP:
442         return res;
443 }
444 
445 #endif
446 
447 /* Compute the square of polynomial a, reduce modulo p192. Store the
448  * result in r.  r could be a.  Uses optimized modular reduction for p192.
449  */
450 mp_err
ec_GFp_nistp192_sqr(const mp_int * a,mp_int * r,const GFMethod * meth)451 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
452 {
453         mp_err res = MP_OKAY;
454 
455         MP_CHECKOK(mp_sqr(a, r));
456         MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
457   CLEANUP:
458         return res;
459 }
460 
461 /* Compute the product of two polynomials a and b, reduce modulo p192.
462  * Store the result in r.  r could be a or b; a could be b.  Uses
463  * optimized modular reduction for p192. */
464 mp_err
ec_GFp_nistp192_mul(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)465 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
466                                         const GFMethod *meth)
467 {
468         mp_err res = MP_OKAY;
469 
470         MP_CHECKOK(mp_mul(a, b, r));
471         MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
472   CLEANUP:
473         return res;
474 }
475 
476 /* Divides two field elements. If a is NULL, then returns the inverse of
477  * b. */
478 mp_err
ec_GFp_nistp192_div(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)479 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
480                    const GFMethod *meth)
481 {
482         mp_err res = MP_OKAY;
483         mp_int t;
484 
485         /* If a is NULL, then return the inverse of b, otherwise return a/b. */
486         if (a == NULL) {
487                 return  mp_invmod(b, &meth->irr, r);
488         } else {
489                 /* MPI doesn't support divmod, so we implement it using invmod and
490                  * mulmod. */
491                 MP_CHECKOK(mp_init(&t, FLAG(b)));
492                 MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
493                 MP_CHECKOK(mp_mul(a, &t, r));
494                 MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
495           CLEANUP:
496                 mp_clear(&t);
497                 return res;
498         }
499 }
500 
501 /* Wire in fast field arithmetic and precomputation of base point for
502  * named curves. */
503 mp_err
ec_group_set_gfp192(ECGroup * group,ECCurveName name)504 ec_group_set_gfp192(ECGroup *group, ECCurveName name)
505 {
506         if (name == ECCurve_NIST_P192) {
507                 group->meth->field_mod = &ec_GFp_nistp192_mod;
508                 group->meth->field_mul = &ec_GFp_nistp192_mul;
509                 group->meth->field_sqr = &ec_GFp_nistp192_sqr;
510                 group->meth->field_div = &ec_GFp_nistp192_div;
511 #ifndef ECL_THIRTY_TWO_BIT
512                 group->meth->field_add = &ec_GFp_nistp192_add;
513                 group->meth->field_sub = &ec_GFp_nistp192_sub;
514 #endif
515         }
516         return MP_OKAY;
517 }
518