1/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
2
3#ifndef _UECC_CURVE_SPECIFIC_H_
4#define _UECC_CURVE_SPECIFIC_H_
5
6#define num_bytes_secp160r1 20
7#define num_bytes_secp192r1 24
8#define num_bytes_secp224r1 28
9#define num_bytes_secp256r1 32
10#define num_bytes_secp256k1 32
11
12#if (uECC_WORD_SIZE == 1)
13
14#define num_words_secp160r1 20
15#define num_words_secp192r1 24
16#define num_words_secp224r1 28
17#define num_words_secp256r1 32
18#define num_words_secp256k1 32
19
20#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) \
21    0x##a, 0x##b, 0x##c, 0x##d, 0x##e, 0x##f, 0x##g, 0x##h
22#define BYTES_TO_WORDS_4(a, b, c, d) 0x##a, 0x##b, 0x##c, 0x##d
23
24#elif (uECC_WORD_SIZE == 4)
25
26#define num_words_secp160r1 5
27#define num_words_secp192r1 6
28#define num_words_secp224r1 7
29#define num_words_secp256r1 8
30#define num_words_secp256k1 8
31
32#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##d##c##b##a, 0x##h##g##f##e
33#define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a
34
35#elif (uECC_WORD_SIZE == 8)
36
37#define num_words_secp160r1 3
38#define num_words_secp192r1 3
39#define num_words_secp224r1 4
40#define num_words_secp256r1 4
41#define num_words_secp256k1 4
42
43#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##h##g##f##e##d##c##b##a##ull
44#define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a##ull
45
46#endif /* uECC_WORD_SIZE */
47
48#if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \
49    uECC_SUPPORTS_secp224r1 || uECC_SUPPORTS_secp256r1
50static void double_jacobian_default(uECC_word_t * X1,
51                                    uECC_word_t * Y1,
52                                    uECC_word_t * Z1,
53                                    uECC_Curve curve) {
54    /* t1 = X, t2 = Y, t3 = Z */
55    uECC_word_t t4[uECC_MAX_WORDS];
56    uECC_word_t t5[uECC_MAX_WORDS];
57    wordcount_t num_words = curve->num_words;
58
59    if (uECC_vli_isZero(Z1, num_words)) {
60        return;
61    }
62
63    uECC_vli_modSquare_fast(t4, Y1, curve);   /* t4 = y1^2 */
64    uECC_vli_modMult_fast(t5, X1, t4, curve); /* t5 = x1*y1^2 = A */
65    uECC_vli_modSquare_fast(t4, t4, curve);   /* t4 = y1^4 */
66    uECC_vli_modMult_fast(Y1, Y1, Z1, curve); /* t2 = y1*z1 = z3 */
67    uECC_vli_modSquare_fast(Z1, Z1, curve);   /* t3 = z1^2 */
68
69    uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = x1 + z1^2 */
70    uECC_vli_modAdd(Z1, Z1, Z1, curve->p, num_words); /* t3 = 2*z1^2 */
71    uECC_vli_modSub(Z1, X1, Z1, curve->p, num_words); /* t3 = x1 - z1^2 */
72    uECC_vli_modMult_fast(X1, X1, Z1, curve);                /* t1 = x1^2 - z1^4 */
73
74    uECC_vli_modAdd(Z1, X1, X1, curve->p, num_words); /* t3 = 2*(x1^2 - z1^4) */
75    uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = 3*(x1^2 - z1^4) */
76    if (uECC_vli_testBit(X1, 0)) {
77        uECC_word_t l_carry = uECC_vli_add(X1, X1, curve->p, num_words);
78        uECC_vli_rshift1(X1, num_words);
79        X1[num_words - 1] |= l_carry << (uECC_WORD_BITS - 1);
80    } else {
81        uECC_vli_rshift1(X1, num_words);
82    }
83    /* t1 = 3/2*(x1^2 - z1^4) = B */
84
85    uECC_vli_modSquare_fast(Z1, X1, curve);                  /* t3 = B^2 */
86    uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - A */
87    uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - 2A = x3 */
88    uECC_vli_modSub(t5, t5, Z1, curve->p, num_words); /* t5 = A - x3 */
89    uECC_vli_modMult_fast(X1, X1, t5, curve);                /* t1 = B * (A - x3) */
90    uECC_vli_modSub(t4, X1, t4, curve->p, num_words); /* t4 = B * (A - x3) - y1^4 = y3 */
91
92    uECC_vli_set(X1, Z1, num_words);
93    uECC_vli_set(Z1, Y1, num_words);
94    uECC_vli_set(Y1, t4, num_words);
95}
96
97/* Computes result = x^3 + ax + b. result must not overlap x. */
98static void x_side_default(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve) {
99    uECC_word_t _3[uECC_MAX_WORDS] = {3}; /* -a = 3 */
100    wordcount_t num_words = curve->num_words;
101
102    uECC_vli_modSquare_fast(result, x, curve);                             /* r = x^2 */
103    uECC_vli_modSub(result, result, _3, curve->p, num_words);       /* r = x^2 - 3 */
104    uECC_vli_modMult_fast(result, result, x, curve);                       /* r = x^3 - 3x */
105    uECC_vli_modAdd(result, result, curve->b, curve->p, num_words); /* r = x^3 - 3x + b */
106}
107#endif /* uECC_SUPPORTS_secp... */
108
109#if uECC_SUPPORT_COMPRESSED_POINT
110#if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \
111    uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1
112/* Compute a = sqrt(a) (mod curve_p). */
113static void mod_sqrt_default(uECC_word_t *a, uECC_Curve curve) {
114    bitcount_t i;
115    uECC_word_t p1[uECC_MAX_WORDS] = {1};
116    uECC_word_t l_result[uECC_MAX_WORDS] = {1};
117    wordcount_t num_words = curve->num_words;
118
119    /* When curve->p == 3 (mod 4), we can compute
120       sqrt(a) = a^((curve->p + 1) / 4) (mod curve->p). */
121    uECC_vli_add(p1, curve->p, p1, num_words); /* p1 = curve_p + 1 */
122    for (i = uECC_vli_numBits(p1, num_words) - 1; i > 1; --i) {
123        uECC_vli_modSquare_fast(l_result, l_result, curve);
124        if (uECC_vli_testBit(p1, i)) {
125            uECC_vli_modMult_fast(l_result, l_result, a, curve);
126        }
127    }
128    uECC_vli_set(a, l_result, num_words);
129}
130#endif /* uECC_SUPPORTS_secp... */
131#endif /* uECC_SUPPORT_COMPRESSED_POINT */
132
133#if uECC_SUPPORTS_secp160r1
134
135#if (uECC_OPTIMIZATION_LEVEL > 0)
136static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product);
137#endif
138
139static const struct uECC_Curve_t curve_secp160r1 = {
140    num_words_secp160r1,
141    num_bytes_secp160r1,
142    161, /* num_n_bits */
143    { BYTES_TO_WORDS_8(FF, FF, FF, 7F, FF, FF, FF, FF),
144        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
145        BYTES_TO_WORDS_4(FF, FF, FF, FF) },
146    { BYTES_TO_WORDS_8(57, 22, 75, CA, D3, AE, 27, F9),
147        BYTES_TO_WORDS_8(C8, F4, 01, 00, 00, 00, 00, 00),
148        BYTES_TO_WORDS_8(00, 00, 00, 00, 01, 00, 00, 00) },
149    { BYTES_TO_WORDS_8(82, FC, CB, 13, B9, 8B, C3, 68),
150        BYTES_TO_WORDS_8(89, 69, 64, 46, 28, 73, F5, 8E),
151        BYTES_TO_WORDS_4(68, B5, 96, 4A),
152
153        BYTES_TO_WORDS_8(32, FB, C5, 7A, 37, 51, 23, 04),
154        BYTES_TO_WORDS_8(12, C9, DC, 59, 7D, 94, 68, 31),
155        BYTES_TO_WORDS_4(55, 28, A6, 23) },
156    { BYTES_TO_WORDS_8(45, FA, 65, C5, AD, D4, D4, 81),
157        BYTES_TO_WORDS_8(9F, F8, AC, 65, 8B, 7A, BD, 54),
158        BYTES_TO_WORDS_4(FC, BE, 97, 1C) },
159    &double_jacobian_default,
160#if uECC_SUPPORT_COMPRESSED_POINT
161    &mod_sqrt_default,
162#endif
163    &x_side_default,
164#if (uECC_OPTIMIZATION_LEVEL > 0)
165    &vli_mmod_fast_secp160r1
166#endif
167};
168
169uECC_Curve uECC_secp160r1(void) { return &curve_secp160r1; }
170
171#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1)
172/* Computes result = product % curve_p
173    see http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf page 354
174
175    Note that this only works if log2(omega) < log2(p) / 2 */
176static void omega_mult_secp160r1(uECC_word_t *result, const uECC_word_t *right);
177#if uECC_WORD_SIZE == 8
178static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) {
179    uECC_word_t tmp[2 * num_words_secp160r1];
180    uECC_word_t copy;
181
182    uECC_vli_clear(tmp, num_words_secp160r1);
183    uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1);
184
185    omega_mult_secp160r1(tmp, product + num_words_secp160r1 - 1); /* (Rq, q) = q * c */
186
187    product[num_words_secp160r1 - 1] &= 0xffffffff;
188    copy = tmp[num_words_secp160r1 - 1];
189    tmp[num_words_secp160r1 - 1] &= 0xffffffff;
190    uECC_vli_add(result, product, tmp, num_words_secp160r1); /* (C, r) = r + q */
191    uECC_vli_clear(product, num_words_secp160r1);
192    tmp[num_words_secp160r1 - 1] = copy;
193    omega_mult_secp160r1(product, tmp + num_words_secp160r1 - 1); /* Rq*c */
194    uECC_vli_add(result, result, product, num_words_secp160r1); /* (C1, r) = r + Rq*c */
195
196    while (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > 0) {
197        uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1);
198    }
199}
200
201static void omega_mult_secp160r1(uint64_t *result, const uint64_t *right) {
202    uint32_t carry;
203    unsigned i;
204
205    /* Multiply by (2^31 + 1). */
206    carry = 0;
207    for (i = 0; i < num_words_secp160r1; ++i) {
208        uint64_t tmp = (right[i] >> 32) | (right[i + 1] << 32);
209        result[i] = (tmp << 31) + tmp + carry;
210        carry = (tmp >> 33) + (result[i] < tmp || (carry && result[i] == tmp));
211    }
212    result[i] = carry;
213}
214#else
215static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) {
216    uECC_word_t tmp[2 * num_words_secp160r1];
217    uECC_word_t carry;
218
219    uECC_vli_clear(tmp, num_words_secp160r1);
220    uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1);
221
222    omega_mult_secp160r1(tmp, product + num_words_secp160r1); /* (Rq, q) = q * c */
223
224    carry = uECC_vli_add(result, product, tmp, num_words_secp160r1); /* (C, r) = r + q */
225    uECC_vli_clear(product, num_words_secp160r1);
226    omega_mult_secp160r1(product, tmp + num_words_secp160r1); /* Rq*c */
227    carry += uECC_vli_add(result, result, product, num_words_secp160r1); /* (C1, r) = r + Rq*c */
228
229    while (carry > 0) {
230        --carry;
231        uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1);
232    }
233    if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > 0) {
234        uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1);
235    }
236}
237#endif
238
239#if uECC_WORD_SIZE == 1
240static void omega_mult_secp160r1(uint8_t *result, const uint8_t *right) {
241    uint8_t carry;
242    uint8_t i;
243
244    /* Multiply by (2^31 + 1). */
245    uECC_vli_set(result + 4, right, num_words_secp160r1); /* 2^32 */
246    uECC_vli_rshift1(result + 4, num_words_secp160r1); /* 2^31 */
247    result[3] = right[0] << 7; /* get last bit from shift */
248
249    carry = uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */
250    for (i = num_words_secp160r1; carry; ++i) {
251        uint16_t sum = (uint16_t)result[i] + carry;
252        result[i] = (uint8_t)sum;
253        carry = sum >> 8;
254    }
255}
256#elif uECC_WORD_SIZE == 4
257static void omega_mult_secp160r1(uint32_t *result, const uint32_t *right) {
258    uint32_t carry;
259    unsigned i;
260
261    /* Multiply by (2^31 + 1). */
262    uECC_vli_set(result + 1, right, num_words_secp160r1); /* 2^32 */
263    uECC_vli_rshift1(result + 1, num_words_secp160r1); /* 2^31 */
264    result[0] = right[0] << 31; /* get last bit from shift */
265
266    carry = uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */
267    for (i = num_words_secp160r1; carry; ++i) {
268        uint64_t sum = (uint64_t)result[i] + carry;
269        result[i] = (uint32_t)sum;
270        carry = sum >> 32;
271    }
272}
273#endif /* uECC_WORD_SIZE */
274#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) */
275
276#endif /* uECC_SUPPORTS_secp160r1 */
277
278#if uECC_SUPPORTS_secp192r1
279
280#if (uECC_OPTIMIZATION_LEVEL > 0)
281static void vli_mmod_fast_secp192r1(uECC_word_t *result, uECC_word_t *product);
282#endif
283
284static const struct uECC_Curve_t curve_secp192r1 = {
285    num_words_secp192r1,
286    num_bytes_secp192r1,
287    192, /* num_n_bits */
288    { BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
289        BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF),
290        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) },
291    { BYTES_TO_WORDS_8(31, 28, D2, B4, B1, C9, 6B, 14),
292        BYTES_TO_WORDS_8(36, F8, DE, 99, FF, FF, FF, FF),
293        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) },
294    { BYTES_TO_WORDS_8(12, 10, FF, 82, FD, 0A, FF, F4),
295        BYTES_TO_WORDS_8(00, 88, A1, 43, EB, 20, BF, 7C),
296        BYTES_TO_WORDS_8(F6, 90, 30, B0, 0E, A8, 8D, 18),
297
298        BYTES_TO_WORDS_8(11, 48, 79, 1E, A1, 77, F9, 73),
299        BYTES_TO_WORDS_8(D5, CD, 24, 6B, ED, 11, 10, 63),
300        BYTES_TO_WORDS_8(78, DA, C8, FF, 95, 2B, 19, 07) },
301    { BYTES_TO_WORDS_8(B1, B9, 46, C1, EC, DE, B8, FE),
302        BYTES_TO_WORDS_8(49, 30, 24, 72, AB, E9, A7, 0F),
303        BYTES_TO_WORDS_8(E7, 80, 9C, E5, 19, 05, 21, 64) },
304    &double_jacobian_default,
305#if uECC_SUPPORT_COMPRESSED_POINT
306    &mod_sqrt_default,
307#endif
308    &x_side_default,
309#if (uECC_OPTIMIZATION_LEVEL > 0)
310    &vli_mmod_fast_secp192r1
311#endif
312};
313
314uECC_Curve uECC_secp192r1(void) { return &curve_secp192r1; }
315
316#if (uECC_OPTIMIZATION_LEVEL > 0)
317/* Computes result = product % curve_p.
318   See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf */
319#if uECC_WORD_SIZE == 1
320static void vli_mmod_fast_secp192r1(uint8_t *result, uint8_t *product) {
321    uint8_t tmp[num_words_secp192r1];
322    uint8_t carry;
323
324    uECC_vli_set(result, product, num_words_secp192r1);
325
326    uECC_vli_set(tmp, &product[24], num_words_secp192r1);
327    carry = uECC_vli_add(result, result, tmp, num_words_secp192r1);
328
329    tmp[0] = tmp[1] = tmp[2] = tmp[3] = tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
330    tmp[8] = product[24]; tmp[9] = product[25]; tmp[10] = product[26]; tmp[11] = product[27];
331    tmp[12] = product[28]; tmp[13] = product[29]; tmp[14] = product[30]; tmp[15] = product[31];
332    tmp[16] = product[32]; tmp[17] = product[33]; tmp[18] = product[34]; tmp[19] = product[35];
333    tmp[20] = product[36]; tmp[21] = product[37]; tmp[22] = product[38]; tmp[23] = product[39];
334    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
335
336    tmp[0] = tmp[8] = product[40];
337    tmp[1] = tmp[9] = product[41];
338    tmp[2] = tmp[10] = product[42];
339    tmp[3] = tmp[11] = product[43];
340    tmp[4] = tmp[12] = product[44];
341    tmp[5] = tmp[13] = product[45];
342    tmp[6] = tmp[14] = product[46];
343    tmp[7] = tmp[15] = product[47];
344    tmp[16] = tmp[17] = tmp[18] = tmp[19] = tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
345    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
346
347    while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) {
348        carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1);
349    }
350}
351#elif uECC_WORD_SIZE == 4
352static void vli_mmod_fast_secp192r1(uint32_t *result, uint32_t *product) {
353    uint32_t tmp[num_words_secp192r1];
354    int carry;
355
356    uECC_vli_set(result, product, num_words_secp192r1);
357
358    uECC_vli_set(tmp, &product[6], num_words_secp192r1);
359    carry = uECC_vli_add(result, result, tmp, num_words_secp192r1);
360
361    tmp[0] = tmp[1] = 0;
362    tmp[2] = product[6];
363    tmp[3] = product[7];
364    tmp[4] = product[8];
365    tmp[5] = product[9];
366    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
367
368    tmp[0] = tmp[2] = product[10];
369    tmp[1] = tmp[3] = product[11];
370    tmp[4] = tmp[5] = 0;
371    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
372
373    while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) {
374        carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1);
375    }
376}
377#else
378static void vli_mmod_fast_secp192r1(uint64_t *result, uint64_t *product) {
379    uint64_t tmp[num_words_secp192r1];
380    int carry;
381
382    uECC_vli_set(result, product, num_words_secp192r1);
383
384    uECC_vli_set(tmp, &product[3], num_words_secp192r1);
385    carry = (int)uECC_vli_add(result, result, tmp, num_words_secp192r1);
386
387    tmp[0] = 0;
388    tmp[1] = product[3];
389    tmp[2] = product[4];
390    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
391
392    tmp[0] = tmp[1] = product[5];
393    tmp[2] = 0;
394    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
395
396    while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) {
397        carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1);
398    }
399}
400#endif /* uECC_WORD_SIZE */
401#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */
402
403#endif /* uECC_SUPPORTS_secp192r1 */
404
405#if uECC_SUPPORTS_secp224r1
406
407#if uECC_SUPPORT_COMPRESSED_POINT
408static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve);
409#endif
410#if (uECC_OPTIMIZATION_LEVEL > 0)
411static void vli_mmod_fast_secp224r1(uECC_word_t *result, uECC_word_t *product);
412#endif
413
414static const struct uECC_Curve_t curve_secp224r1 = {
415    num_words_secp224r1,
416    num_bytes_secp224r1,
417    224, /* num_n_bits */
418    { BYTES_TO_WORDS_8(01, 00, 00, 00, 00, 00, 00, 00),
419        BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF),
420        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
421        BYTES_TO_WORDS_4(FF, FF, FF, FF) },
422    { BYTES_TO_WORDS_8(3D, 2A, 5C, 5C, 45, 29, DD, 13),
423        BYTES_TO_WORDS_8(3E, F0, B8, E0, A2, 16, FF, FF),
424        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
425        BYTES_TO_WORDS_4(FF, FF, FF, FF) },
426    { BYTES_TO_WORDS_8(21, 1D, 5C, 11, D6, 80, 32, 34),
427        BYTES_TO_WORDS_8(22, 11, C2, 56, D3, C1, 03, 4A),
428        BYTES_TO_WORDS_8(B9, 90, 13, 32, 7F, BF, B4, 6B),
429        BYTES_TO_WORDS_4(BD, 0C, 0E, B7),
430
431        BYTES_TO_WORDS_8(34, 7E, 00, 85, 99, 81, D5, 44),
432        BYTES_TO_WORDS_8(64, 47, 07, 5A, A0, 75, 43, CD),
433        BYTES_TO_WORDS_8(E6, DF, 22, 4C, FB, 23, F7, B5),
434        BYTES_TO_WORDS_4(88, 63, 37, BD) },
435    { BYTES_TO_WORDS_8(B4, FF, 55, 23, 43, 39, 0B, 27),
436        BYTES_TO_WORDS_8(BA, D8, BF, D7, B7, B0, 44, 50),
437        BYTES_TO_WORDS_8(56, 32, 41, F5, AB, B3, 04, 0C),
438        BYTES_TO_WORDS_4(85, 0A, 05, B4) },
439    &double_jacobian_default,
440#if uECC_SUPPORT_COMPRESSED_POINT
441    &mod_sqrt_secp224r1,
442#endif
443    &x_side_default,
444#if (uECC_OPTIMIZATION_LEVEL > 0)
445    &vli_mmod_fast_secp224r1
446#endif
447};
448
449uECC_Curve uECC_secp224r1(void) { return &curve_secp224r1; }
450
451
452#if uECC_SUPPORT_COMPRESSED_POINT
453/* Routine 3.2.4 RS;  from http://www.nsa.gov/ia/_files/nist-routines.pdf */
454static void mod_sqrt_secp224r1_rs(uECC_word_t *d1,
455                                  uECC_word_t *e1,
456                                  uECC_word_t *f1,
457                                  const uECC_word_t *d0,
458                                  const uECC_word_t *e0,
459                                  const uECC_word_t *f0) {
460    uECC_word_t t[num_words_secp224r1];
461
462    uECC_vli_modSquare_fast(t, d0, &curve_secp224r1);                    /* t <-- d0 ^ 2 */
463    uECC_vli_modMult_fast(e1, d0, e0, &curve_secp224r1);                 /* e1 <-- d0 * e0 */
464    uECC_vli_modAdd(d1, t, f0, curve_secp224r1.p, num_words_secp224r1);  /* d1 <-- t  + f0 */
465    uECC_vli_modAdd(e1, e1, e1, curve_secp224r1.p, num_words_secp224r1); /* e1 <-- e1 + e1 */
466    uECC_vli_modMult_fast(f1, t, f0, &curve_secp224r1);                  /* f1 <-- t  * f0 */
467    uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- f1 + f1 */
468    uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- f1 + f1 */
469}
470
471/* Routine 3.2.5 RSS;  from http://www.nsa.gov/ia/_files/nist-routines.pdf */
472static void mod_sqrt_secp224r1_rss(uECC_word_t *d1,
473                                   uECC_word_t *e1,
474                                   uECC_word_t *f1,
475                                   const uECC_word_t *d0,
476                                   const uECC_word_t *e0,
477                                   const uECC_word_t *f0,
478                                   const bitcount_t j) {
479    bitcount_t i;
480
481    uECC_vli_set(d1, d0, num_words_secp224r1); /* d1 <-- d0 */
482    uECC_vli_set(e1, e0, num_words_secp224r1); /* e1 <-- e0 */
483    uECC_vli_set(f1, f0, num_words_secp224r1); /* f1 <-- f0 */
484    for (i = 1; i <= j; i++) {
485        mod_sqrt_secp224r1_rs(d1, e1, f1, d1, e1, f1); /* RS (d1,e1,f1,d1,e1,f1) */
486    }
487}
488
489/* Routine 3.2.6 RM;  from http://www.nsa.gov/ia/_files/nist-routines.pdf */
490static void mod_sqrt_secp224r1_rm(uECC_word_t *d2,
491                                  uECC_word_t *e2,
492                                  uECC_word_t *f2,
493                                  const uECC_word_t *c,
494                                  const uECC_word_t *d0,
495                                  const uECC_word_t *e0,
496                                  const uECC_word_t *d1,
497                                  const uECC_word_t *e1) {
498    uECC_word_t t1[num_words_secp224r1];
499    uECC_word_t t2[num_words_secp224r1];
500
501    uECC_vli_modMult_fast(t1, e0, e1, &curve_secp224r1); /* t1 <-- e0 * e1 */
502    uECC_vli_modMult_fast(t1, t1, c, &curve_secp224r1);  /* t1 <-- t1 * c */
503    /* t1 <-- p  - t1 */
504    uECC_vli_modSub(t1, curve_secp224r1.p, t1, curve_secp224r1.p, num_words_secp224r1);
505    uECC_vli_modMult_fast(t2, d0, d1, &curve_secp224r1);                 /* t2 <-- d0 * d1 */
506    uECC_vli_modAdd(t2, t2, t1, curve_secp224r1.p, num_words_secp224r1); /* t2 <-- t2 + t1 */
507    uECC_vli_modMult_fast(t1, d0, e1, &curve_secp224r1);                 /* t1 <-- d0 * e1 */
508    uECC_vli_modMult_fast(e2, d1, e0, &curve_secp224r1);                 /* e2 <-- d1 * e0 */
509    uECC_vli_modAdd(e2, e2, t1, curve_secp224r1.p, num_words_secp224r1); /* e2 <-- e2 + t1 */
510    uECC_vli_modSquare_fast(f2, e2, &curve_secp224r1);                   /* f2 <-- e2^2 */
511    uECC_vli_modMult_fast(f2, f2, c, &curve_secp224r1);                  /* f2 <-- f2 * c */
512    /* f2 <-- p  - f2 */
513    uECC_vli_modSub(f2, curve_secp224r1.p, f2, curve_secp224r1.p, num_words_secp224r1);
514    uECC_vli_set(d2, t2, num_words_secp224r1); /* d2 <-- t2 */
515}
516
517/* Routine 3.2.7 RP;  from http://www.nsa.gov/ia/_files/nist-routines.pdf */
518static void mod_sqrt_secp224r1_rp(uECC_word_t *d1,
519                                  uECC_word_t *e1,
520                                  uECC_word_t *f1,
521                                  const uECC_word_t *c,
522                                  const uECC_word_t *r) {
523    wordcount_t i;
524    wordcount_t pow2i = 1;
525    uECC_word_t d0[num_words_secp224r1];
526    uECC_word_t e0[num_words_secp224r1] = {1}; /* e0 <-- 1 */
527    uECC_word_t f0[num_words_secp224r1];
528
529    uECC_vli_set(d0, r, num_words_secp224r1); /* d0 <-- r */
530    /* f0 <-- p  - c */
531    uECC_vli_modSub(f0, curve_secp224r1.p, c, curve_secp224r1.p, num_words_secp224r1);
532    for (i = 0; i <= 6; i++) {
533        mod_sqrt_secp224r1_rss(d1, e1, f1, d0, e0, f0, pow2i); /* RSS (d1,e1,f1,d0,e0,f0,2^i) */
534        mod_sqrt_secp224r1_rm(d1, e1, f1, c, d1, e1, d0, e0);  /* RM (d1,e1,f1,c,d1,e1,d0,e0) */
535        uECC_vli_set(d0, d1, num_words_secp224r1); /* d0 <-- d1 */
536        uECC_vli_set(e0, e1, num_words_secp224r1); /* e0 <-- e1 */
537        uECC_vli_set(f0, f1, num_words_secp224r1); /* f0 <-- f1 */
538        pow2i *= 2;
539    }
540}
541
542/* Compute a = sqrt(a) (mod curve_p). */
543/* Routine 3.2.8 mp_mod_sqrt_224; from http://www.nsa.gov/ia/_files/nist-routines.pdf */
544static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve) {
545    bitcount_t i;
546    uECC_word_t e1[num_words_secp224r1];
547    uECC_word_t f1[num_words_secp224r1];
548    uECC_word_t d0[num_words_secp224r1];
549    uECC_word_t e0[num_words_secp224r1];
550    uECC_word_t f0[num_words_secp224r1];
551    uECC_word_t d1[num_words_secp224r1];
552
553    /* s = a; using constant instead of random value */
554    mod_sqrt_secp224r1_rp(d0, e0, f0, a, a);           /* RP (d0, e0, f0, c, s) */
555    mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, f0);     /* RS (d1, e1, f1, d0, e0, f0) */
556    for (i = 1; i <= 95; i++) {
557        uECC_vli_set(d0, d1, num_words_secp224r1);          /* d0 <-- d1 */
558        uECC_vli_set(e0, e1, num_words_secp224r1);          /* e0 <-- e1 */
559        uECC_vli_set(f0, f1, num_words_secp224r1);          /* f0 <-- f1 */
560        mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, f0); /* RS (d1, e1, f1, d0, e0, f0) */
561        if (uECC_vli_isZero(d1, num_words_secp224r1)) {     /* if d1 == 0 */
562                break;
563        }
564    }
565    uECC_vli_modInv(f1, e0, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- 1 / e0 */
566    uECC_vli_modMult_fast(a, d0, f1, &curve_secp224r1);              /* a  <-- d0 / e0 */
567}
568#endif /* uECC_SUPPORT_COMPRESSED_POINT */
569
570#if (uECC_OPTIMIZATION_LEVEL > 0)
571/* Computes result = product % curve_p
572   from http://www.nsa.gov/ia/_files/nist-routines.pdf */
573#if uECC_WORD_SIZE == 1
574static void vli_mmod_fast_secp224r1(uint8_t *result, uint8_t *product) {
575    uint8_t tmp[num_words_secp224r1];
576    int8_t carry;
577
578    /* t */
579    uECC_vli_set(result, product, num_words_secp224r1);
580
581    /* s1 */
582    tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0;
583    tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
584    tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
585    tmp[12] = product[28]; tmp[13] = product[29]; tmp[14] = product[30]; tmp[15] = product[31];
586    tmp[16] = product[32]; tmp[17] = product[33]; tmp[18] = product[34]; tmp[19] = product[35];
587    tmp[20] = product[36]; tmp[21] = product[37]; tmp[22] = product[38]; tmp[23] = product[39];
588    tmp[24] = product[40]; tmp[25] = product[41]; tmp[26] = product[42]; tmp[27] = product[43];
589    carry = uECC_vli_add(result, result, tmp, num_words_secp224r1);
590
591    /* s2 */
592    tmp[12] = product[44]; tmp[13] = product[45]; tmp[14] = product[46]; tmp[15] = product[47];
593    tmp[16] = product[48]; tmp[17] = product[49]; tmp[18] = product[50]; tmp[19] = product[51];
594    tmp[20] = product[52]; tmp[21] = product[53]; tmp[22] = product[54]; tmp[23] = product[55];
595    tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
596    carry += uECC_vli_add(result, result, tmp, num_words_secp224r1);
597
598    /* d1 */
599    tmp[0]  = product[28]; tmp[1]  = product[29]; tmp[2]  = product[30]; tmp[3]  = product[31];
600    tmp[4]  = product[32]; tmp[5]  = product[33]; tmp[6]  = product[34]; tmp[7]  = product[35];
601    tmp[8]  = product[36]; tmp[9]  = product[37]; tmp[10] = product[38]; tmp[11] = product[39];
602    tmp[12] = product[40]; tmp[13] = product[41]; tmp[14] = product[42]; tmp[15] = product[43];
603    tmp[16] = product[44]; tmp[17] = product[45]; tmp[18] = product[46]; tmp[19] = product[47];
604    tmp[20] = product[48]; tmp[21] = product[49]; tmp[22] = product[50]; tmp[23] = product[51];
605    tmp[24] = product[52]; tmp[25] = product[53]; tmp[26] = product[54]; tmp[27] = product[55];
606    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
607
608    /* d2 */
609    tmp[0]  = product[44]; tmp[1]  = product[45]; tmp[2]  = product[46]; tmp[3]  = product[47];
610    tmp[4]  = product[48]; tmp[5]  = product[49]; tmp[6]  = product[50]; tmp[7]  = product[51];
611    tmp[8]  = product[52]; tmp[9]  = product[53]; tmp[10] = product[54]; tmp[11] = product[55];
612    tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
613    tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
614    tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
615    tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
616    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
617
618    if (carry < 0) {
619        do {
620            carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1);
621        } while (carry < 0);
622    } else {
623        while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) {
624            carry -= uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1);
625        }
626    }
627}
628#elif uECC_WORD_SIZE == 4
629static void vli_mmod_fast_secp224r1(uint32_t *result, uint32_t *product)
630{
631    uint32_t tmp[num_words_secp224r1];
632    int carry;
633
634    /* t */
635    uECC_vli_set(result, product, num_words_secp224r1);
636
637    /* s1 */
638    tmp[0] = tmp[1] = tmp[2] = 0;
639    tmp[3] = product[7];
640    tmp[4] = product[8];
641    tmp[5] = product[9];
642    tmp[6] = product[10];
643    carry = uECC_vli_add(result, result, tmp, num_words_secp224r1);
644
645    /* s2 */
646    tmp[3] = product[11];
647    tmp[4] = product[12];
648    tmp[5] = product[13];
649    tmp[6] = 0;
650    carry += uECC_vli_add(result, result, tmp, num_words_secp224r1);
651
652    /* d1 */
653    tmp[0] = product[7];
654    tmp[1] = product[8];
655    tmp[2] = product[9];
656    tmp[3] = product[10];
657    tmp[4] = product[11];
658    tmp[5] = product[12];
659    tmp[6] = product[13];
660    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
661
662    /* d2 */
663    tmp[0] = product[11];
664    tmp[1] = product[12];
665    tmp[2] = product[13];
666    tmp[3] = tmp[4] = tmp[5] = tmp[6] = 0;
667    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
668
669    if (carry < 0) {
670        do {
671            carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1);
672        } while (carry < 0);
673    } else {
674        while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) {
675            carry -= uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1);
676        }
677    }
678}
679#else
680static void vli_mmod_fast_secp224r1(uint64_t *result, uint64_t *product)
681{
682    uint64_t tmp[num_words_secp224r1];
683    int carry = 0;
684
685    /* t */
686    uECC_vli_set(result, product, num_words_secp224r1);
687    result[num_words_secp224r1 - 1] &= 0xffffffff;
688
689    /* s1 */
690    tmp[0] = 0;
691    tmp[1] = product[3] & 0xffffffff00000000ull;
692    tmp[2] = product[4];
693    tmp[3] = product[5] & 0xffffffff;
694    uECC_vli_add(result, result, tmp, num_words_secp224r1);
695
696    /* s2 */
697    tmp[1] = product[5] & 0xffffffff00000000ull;
698    tmp[2] = product[6];
699    tmp[3] = 0;
700    uECC_vli_add(result, result, tmp, num_words_secp224r1);
701
702    /* d1 */
703    tmp[0] = (product[3] >> 32) | (product[4] << 32);
704    tmp[1] = (product[4] >> 32) | (product[5] << 32);
705    tmp[2] = (product[5] >> 32) | (product[6] << 32);
706    tmp[3] = product[6] >> 32;
707    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
708
709    /* d2 */
710    tmp[0] = (product[5] >> 32) | (product[6] << 32);
711    tmp[1] = product[6] >> 32;
712    tmp[2] = tmp[3] = 0;
713    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
714
715    if (carry < 0) {
716        do {
717            carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1);
718        } while (carry < 0);
719    } else {
720        while (uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) {
721            uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1);
722        }
723    }
724}
725#endif /* uECC_WORD_SIZE */
726#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */
727
728#endif /* uECC_SUPPORTS_secp224r1 */
729
730#if uECC_SUPPORTS_secp256r1
731
732#if (uECC_OPTIMIZATION_LEVEL > 0)
733static void vli_mmod_fast_secp256r1(uECC_word_t *result, uECC_word_t *product);
734#endif
735
736static const struct uECC_Curve_t curve_secp256r1 = {
737    num_words_secp256r1,
738    num_bytes_secp256r1,
739    256, /* num_n_bits */
740    { BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
741        BYTES_TO_WORDS_8(FF, FF, FF, FF, 00, 00, 00, 00),
742        BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00),
743        BYTES_TO_WORDS_8(01, 00, 00, 00, FF, FF, FF, FF) },
744    { BYTES_TO_WORDS_8(51, 25, 63, FC, C2, CA, B9, F3),
745        BYTES_TO_WORDS_8(84, 9E, 17, A7, AD, FA, E6, BC),
746        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
747        BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF) },
748    { BYTES_TO_WORDS_8(96, C2, 98, D8, 45, 39, A1, F4),
749        BYTES_TO_WORDS_8(A0, 33, EB, 2D, 81, 7D, 03, 77),
750        BYTES_TO_WORDS_8(F2, 40, A4, 63, E5, E6, BC, F8),
751        BYTES_TO_WORDS_8(47, 42, 2C, E1, F2, D1, 17, 6B),
752
753        BYTES_TO_WORDS_8(F5, 51, BF, 37, 68, 40, B6, CB),
754        BYTES_TO_WORDS_8(CE, 5E, 31, 6B, 57, 33, CE, 2B),
755        BYTES_TO_WORDS_8(16, 9E, 0F, 7C, 4A, EB, E7, 8E),
756        BYTES_TO_WORDS_8(9B, 7F, 1A, FE, E2, 42, E3, 4F) },
757    { BYTES_TO_WORDS_8(4B, 60, D2, 27, 3E, 3C, CE, 3B),
758        BYTES_TO_WORDS_8(F6, B0, 53, CC, B0, 06, 1D, 65),
759        BYTES_TO_WORDS_8(BC, 86, 98, 76, 55, BD, EB, B3),
760        BYTES_TO_WORDS_8(E7, 93, 3A, AA, D8, 35, C6, 5A) },
761    &double_jacobian_default,
762#if uECC_SUPPORT_COMPRESSED_POINT
763    &mod_sqrt_default,
764#endif
765    &x_side_default,
766#if (uECC_OPTIMIZATION_LEVEL > 0)
767    &vli_mmod_fast_secp256r1
768#endif
769};
770
771uECC_Curve uECC_secp256r1(void) { return &curve_secp256r1; }
772
773
774#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1)
775/* Computes result = product % curve_p
776   from http://www.nsa.gov/ia/_files/nist-routines.pdf */
777#if uECC_WORD_SIZE == 1
778static void vli_mmod_fast_secp256r1(uint8_t *result, uint8_t *product) {
779    uint8_t tmp[num_words_secp256r1];
780    int8_t carry;
781
782    /* t */
783    uECC_vli_set(result, product, num_words_secp256r1);
784
785    /* s1 */
786    tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0;
787    tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
788    tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
789    tmp[12] = product[44]; tmp[13] = product[45]; tmp[14] = product[46]; tmp[15] = product[47];
790    tmp[16] = product[48]; tmp[17] = product[49]; tmp[18] = product[50]; tmp[19] = product[51];
791    tmp[20] = product[52]; tmp[21] = product[53]; tmp[22] = product[54]; tmp[23] = product[55];
792    tmp[24] = product[56]; tmp[25] = product[57]; tmp[26] = product[58]; tmp[27] = product[59];
793    tmp[28] = product[60]; tmp[29] = product[61]; tmp[30] = product[62]; tmp[31] = product[63];
794    carry = uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
795    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
796
797    /* s2 */
798    tmp[12] = product[48]; tmp[13] = product[49]; tmp[14] = product[50]; tmp[15] = product[51];
799    tmp[16] = product[52]; tmp[17] = product[53]; tmp[18] = product[54]; tmp[19] = product[55];
800    tmp[20] = product[56]; tmp[21] = product[57]; tmp[22] = product[58]; tmp[23] = product[59];
801    tmp[24] = product[60]; tmp[25] = product[61]; tmp[26] = product[62]; tmp[27] = product[63];
802    tmp[28] = tmp[29] = tmp[30] = tmp[31] = 0;
803    carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
804    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
805
806    /* s3 */
807    tmp[0] = product[32]; tmp[1] = product[33]; tmp[2] = product[34]; tmp[3] = product[35];
808    tmp[4] = product[36]; tmp[5] = product[37]; tmp[6] = product[38]; tmp[7] = product[39];
809    tmp[8] = product[40]; tmp[9] = product[41]; tmp[10] = product[42]; tmp[11] = product[43];
810    tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
811    tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
812    tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
813    tmp[24] = product[56]; tmp[25] = product[57]; tmp[26] = product[58]; tmp[27] = product[59];
814    tmp[28] = product[60]; tmp[29] = product[61]; tmp[30] = product[62]; tmp[31] = product[63];
815    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
816
817    /* s4 */
818    tmp[0] = product[36]; tmp[1] = product[37]; tmp[2] = product[38]; tmp[3] = product[39];
819    tmp[4] = product[40]; tmp[5] = product[41]; tmp[6] = product[42]; tmp[7] = product[43];
820    tmp[8] = product[44]; tmp[9] = product[45]; tmp[10] = product[46]; tmp[11] = product[47];
821    tmp[12] = product[52]; tmp[13] = product[53]; tmp[14] = product[54]; tmp[15] = product[55];
822    tmp[16] = product[56]; tmp[17] = product[57]; tmp[18] = product[58]; tmp[19] = product[59];
823    tmp[20] = product[60]; tmp[21] = product[61]; tmp[22] = product[62]; tmp[23] = product[63];
824    tmp[24] = product[52]; tmp[25] = product[53]; tmp[26] = product[54]; tmp[27] = product[55];
825    tmp[28] = product[32]; tmp[29] = product[33]; tmp[30] = product[34]; tmp[31] = product[35];
826    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
827
828    /* d1 */
829    tmp[0] = product[44]; tmp[1] = product[45]; tmp[2] = product[46]; tmp[3] = product[47];
830    tmp[4] = product[48]; tmp[5] = product[49]; tmp[6] = product[50]; tmp[7] = product[51];
831    tmp[8] = product[52]; tmp[9] = product[53]; tmp[10] = product[54]; tmp[11] = product[55];
832    tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
833    tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
834    tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
835    tmp[24] = product[32]; tmp[25] = product[33]; tmp[26] = product[34]; tmp[27] = product[35];
836    tmp[28] = product[40]; tmp[29] = product[41]; tmp[30] = product[42]; tmp[31] = product[43];
837    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
838
839    /* d2 */
840    tmp[0] = product[48]; tmp[1] = product[49]; tmp[2] = product[50]; tmp[3] = product[51];
841    tmp[4] = product[52]; tmp[5] = product[53]; tmp[6] = product[54]; tmp[7] = product[55];
842    tmp[8] = product[56]; tmp[9] = product[57]; tmp[10] = product[58]; tmp[11] = product[59];
843    tmp[12] = product[60]; tmp[13] = product[61]; tmp[14] = product[62]; tmp[15] = product[63];
844    tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
845    tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
846    tmp[24] = product[36]; tmp[25] = product[37]; tmp[26] = product[38]; tmp[27] = product[39];
847    tmp[28] = product[44]; tmp[29] = product[45]; tmp[30] = product[46]; tmp[31] = product[47];
848    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
849
850    /* d3 */
851    tmp[0] = product[52]; tmp[1] = product[53]; tmp[2] = product[54]; tmp[3] = product[55];
852    tmp[4] = product[56]; tmp[5] = product[57]; tmp[6] = product[58]; tmp[7] = product[59];
853    tmp[8] = product[60]; tmp[9] = product[61]; tmp[10] = product[62]; tmp[11] = product[63];
854    tmp[12] = product[32]; tmp[13] = product[33]; tmp[14] = product[34]; tmp[15] = product[35];
855    tmp[16] = product[36]; tmp[17] = product[37]; tmp[18] = product[38]; tmp[19] = product[39];
856    tmp[20] = product[40]; tmp[21] = product[41]; tmp[22] = product[42]; tmp[23] = product[43];
857    tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
858    tmp[28] = product[48]; tmp[29] = product[49]; tmp[30] = product[50]; tmp[31] = product[51];
859    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
860
861    /* d4 */
862    tmp[0] = product[56]; tmp[1] = product[57]; tmp[2] = product[58]; tmp[3] = product[59];
863    tmp[4] = product[60]; tmp[5] = product[61]; tmp[6] = product[62]; tmp[7] = product[63];
864    tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
865    tmp[12] = product[36]; tmp[13] = product[37]; tmp[14] = product[38]; tmp[15] = product[39];
866    tmp[16] = product[40]; tmp[17] = product[41]; tmp[18] = product[42]; tmp[19] = product[43];
867    tmp[20] = product[44]; tmp[21] = product[45]; tmp[22] = product[46]; tmp[23] = product[47];
868    tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
869    tmp[28] = product[52]; tmp[29] = product[53]; tmp[30] = product[54]; tmp[31] = product[55];
870    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
871
872    if (carry < 0) {
873        do {
874            carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1);
875        } while (carry < 0);
876    } else {
877        while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) {
878            carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1);
879        }
880    }
881}
882#elif uECC_WORD_SIZE == 4
883static void vli_mmod_fast_secp256r1(uint32_t *result, uint32_t *product) {
884    uint32_t tmp[num_words_secp256r1];
885    int carry;
886
887    /* t */
888    uECC_vli_set(result, product, num_words_secp256r1);
889
890    /* s1 */
891    tmp[0] = tmp[1] = tmp[2] = 0;
892    tmp[3] = product[11];
893    tmp[4] = product[12];
894    tmp[5] = product[13];
895    tmp[6] = product[14];
896    tmp[7] = product[15];
897    carry = uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
898    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
899
900    /* s2 */
901    tmp[3] = product[12];
902    tmp[4] = product[13];
903    tmp[5] = product[14];
904    tmp[6] = product[15];
905    tmp[7] = 0;
906    carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
907    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
908
909    /* s3 */
910    tmp[0] = product[8];
911    tmp[1] = product[9];
912    tmp[2] = product[10];
913    tmp[3] = tmp[4] = tmp[5] = 0;
914    tmp[6] = product[14];
915    tmp[7] = product[15];
916    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
917
918    /* s4 */
919    tmp[0] = product[9];
920    tmp[1] = product[10];
921    tmp[2] = product[11];
922    tmp[3] = product[13];
923    tmp[4] = product[14];
924    tmp[5] = product[15];
925    tmp[6] = product[13];
926    tmp[7] = product[8];
927    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
928
929    /* d1 */
930    tmp[0] = product[11];
931    tmp[1] = product[12];
932    tmp[2] = product[13];
933    tmp[3] = tmp[4] = tmp[5] = 0;
934    tmp[6] = product[8];
935    tmp[7] = product[10];
936    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
937
938    /* d2 */
939    tmp[0] = product[12];
940    tmp[1] = product[13];
941    tmp[2] = product[14];
942    tmp[3] = product[15];
943    tmp[4] = tmp[5] = 0;
944    tmp[6] = product[9];
945    tmp[7] = product[11];
946    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
947
948    /* d3 */
949    tmp[0] = product[13];
950    tmp[1] = product[14];
951    tmp[2] = product[15];
952    tmp[3] = product[8];
953    tmp[4] = product[9];
954    tmp[5] = product[10];
955    tmp[6] = 0;
956    tmp[7] = product[12];
957    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
958
959    /* d4 */
960    tmp[0] = product[14];
961    tmp[1] = product[15];
962    tmp[2] = 0;
963    tmp[3] = product[9];
964    tmp[4] = product[10];
965    tmp[5] = product[11];
966    tmp[6] = 0;
967    tmp[7] = product[13];
968    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
969
970    if (carry < 0) {
971        do {
972            carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1);
973        } while (carry < 0);
974    } else {
975        while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) {
976            carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1);
977        }
978    }
979}
980#else
981static void vli_mmod_fast_secp256r1(uint64_t *result, uint64_t *product) {
982    uint64_t tmp[num_words_secp256r1];
983    int carry;
984
985    /* t */
986    uECC_vli_set(result, product, num_words_secp256r1);
987
988    /* s1 */
989    tmp[0] = 0;
990    tmp[1] = product[5] & 0xffffffff00000000ull;
991    tmp[2] = product[6];
992    tmp[3] = product[7];
993    carry = (int)uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
994    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
995
996    /* s2 */
997    tmp[1] = product[6] << 32;
998    tmp[2] = (product[6] >> 32) | (product[7] << 32);
999    tmp[3] = product[7] >> 32;
1000    carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
1001    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
1002
1003    /* s3 */
1004    tmp[0] = product[4];
1005    tmp[1] = product[5] & 0xffffffff;
1006    tmp[2] = 0;
1007    tmp[3] = product[7];
1008    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
1009
1010    /* s4 */
1011    tmp[0] = (product[4] >> 32) | (product[5] << 32);
1012    tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
1013    tmp[2] = product[7];
1014    tmp[3] = (product[6] >> 32) | (product[4] << 32);
1015    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
1016
1017    /* d1 */
1018    tmp[0] = (product[5] >> 32) | (product[6] << 32);
1019    tmp[1] = (product[6] >> 32);
1020    tmp[2] = 0;
1021    tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
1022    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
1023
1024    /* d2 */
1025    tmp[0] = product[6];
1026    tmp[1] = product[7];
1027    tmp[2] = 0;
1028    tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
1029    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
1030
1031    /* d3 */
1032    tmp[0] = (product[6] >> 32) | (product[7] << 32);
1033    tmp[1] = (product[7] >> 32) | (product[4] << 32);
1034    tmp[2] = (product[4] >> 32) | (product[5] << 32);
1035    tmp[3] = (product[6] << 32);
1036    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
1037
1038    /* d4 */
1039    tmp[0] = product[7];
1040    tmp[1] = product[4] & 0xffffffff00000000ull;
1041    tmp[2] = product[5];
1042    tmp[3] = product[6] & 0xffffffff00000000ull;
1043    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
1044
1045    if (carry < 0) {
1046        do {
1047            carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1);
1048        } while (carry < 0);
1049    } else {
1050        while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) {
1051            carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1);
1052        }
1053    }
1054}
1055#endif /* uECC_WORD_SIZE */
1056#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) */
1057
1058#endif /* uECC_SUPPORTS_secp256r1 */
1059
1060#if uECC_SUPPORTS_secp256k1
1061
1062static void double_jacobian_secp256k1(uECC_word_t * X1,
1063                                      uECC_word_t * Y1,
1064                                      uECC_word_t * Z1,
1065                                      uECC_Curve curve);
1066static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve);
1067#if (uECC_OPTIMIZATION_LEVEL > 0)
1068static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product);
1069#endif
1070
1071static const struct uECC_Curve_t curve_secp256k1 = {
1072    num_words_secp256k1,
1073    num_bytes_secp256k1,
1074    256, /* num_n_bits */
1075    { BYTES_TO_WORDS_8(2F, FC, FF, FF, FE, FF, FF, FF),
1076        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
1077        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
1078        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) },
1079    { BYTES_TO_WORDS_8(41, 41, 36, D0, 8C, 5E, D2, BF),
1080        BYTES_TO_WORDS_8(3B, A0, 48, AF, E6, DC, AE, BA),
1081        BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF),
1082        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) },
1083    { BYTES_TO_WORDS_8(98, 17, F8, 16, 5B, 81, F2, 59),
1084        BYTES_TO_WORDS_8(D9, 28, CE, 2D, DB, FC, 9B, 02),
1085        BYTES_TO_WORDS_8(07, 0B, 87, CE, 95, 62, A0, 55),
1086        BYTES_TO_WORDS_8(AC, BB, DC, F9, 7E, 66, BE, 79),
1087
1088        BYTES_TO_WORDS_8(B8, D4, 10, FB, 8F, D0, 47, 9C),
1089        BYTES_TO_WORDS_8(19, 54, 85, A6, 48, B4, 17, FD),
1090        BYTES_TO_WORDS_8(A8, 08, 11, 0E, FC, FB, A4, 5D),
1091        BYTES_TO_WORDS_8(65, C4, A3, 26, 77, DA, 3A, 48) },
1092    { BYTES_TO_WORDS_8(07, 00, 00, 00, 00, 00, 00, 00),
1093        BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00),
1094        BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00),
1095        BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00) },
1096    &double_jacobian_secp256k1,
1097#if uECC_SUPPORT_COMPRESSED_POINT
1098    &mod_sqrt_default,
1099#endif
1100    &x_side_secp256k1,
1101#if (uECC_OPTIMIZATION_LEVEL > 0)
1102    &vli_mmod_fast_secp256k1
1103#endif
1104};
1105
1106uECC_Curve uECC_secp256k1(void) { return &curve_secp256k1; }
1107
1108
1109/* Double in place */
1110static void double_jacobian_secp256k1(uECC_word_t * X1,
1111                                      uECC_word_t * Y1,
1112                                      uECC_word_t * Z1,
1113                                      uECC_Curve curve) {
1114    /* t1 = X, t2 = Y, t3 = Z */
1115    uECC_word_t t4[num_words_secp256k1];
1116    uECC_word_t t5[num_words_secp256k1];
1117
1118    if (uECC_vli_isZero(Z1, num_words_secp256k1)) {
1119        return;
1120    }
1121
1122    uECC_vli_modSquare_fast(t5, Y1, curve);   /* t5 = y1^2 */
1123    uECC_vli_modMult_fast(t4, X1, t5, curve); /* t4 = x1*y1^2 = A */
1124    uECC_vli_modSquare_fast(X1, X1, curve);   /* t1 = x1^2 */
1125    uECC_vli_modSquare_fast(t5, t5, curve);   /* t5 = y1^4 */
1126    uECC_vli_modMult_fast(Z1, Y1, Z1, curve); /* t3 = y1*z1 = z3 */
1127
1128    uECC_vli_modAdd(Y1, X1, X1, curve->p, num_words_secp256k1); /* t2 = 2*x1^2 */
1129    uECC_vli_modAdd(Y1, Y1, X1, curve->p, num_words_secp256k1); /* t2 = 3*x1^2 */
1130    if (uECC_vli_testBit(Y1, 0)) {
1131        uECC_word_t carry = uECC_vli_add(Y1, Y1, curve->p, num_words_secp256k1);
1132        uECC_vli_rshift1(Y1, num_words_secp256k1);
1133        Y1[num_words_secp256k1 - 1] |= carry << (uECC_WORD_BITS - 1);
1134    } else {
1135        uECC_vli_rshift1(Y1, num_words_secp256k1);
1136    }
1137    /* t2 = 3/2*(x1^2) = B */
1138
1139    uECC_vli_modSquare_fast(X1, Y1, curve);                     /* t1 = B^2 */
1140    uECC_vli_modSub(X1, X1, t4, curve->p, num_words_secp256k1); /* t1 = B^2 - A */
1141    uECC_vli_modSub(X1, X1, t4, curve->p, num_words_secp256k1); /* t1 = B^2 - 2A = x3 */
1142
1143    uECC_vli_modSub(t4, t4, X1, curve->p, num_words_secp256k1); /* t4 = A - x3 */
1144    uECC_vli_modMult_fast(Y1, Y1, t4, curve);                   /* t2 = B * (A - x3) */
1145    uECC_vli_modSub(Y1, Y1, t5, curve->p, num_words_secp256k1); /* t2 = B * (A - x3) - y1^4 = y3 */
1146}
1147
1148/* Computes result = x^3 + b. result must not overlap x. */
1149static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve) {
1150    uECC_vli_modSquare_fast(result, x, curve);                                /* r = x^2 */
1151    uECC_vli_modMult_fast(result, result, x, curve);                          /* r = x^3 */
1152    uECC_vli_modAdd(result, result, curve->b, curve->p, num_words_secp256k1); /* r = x^3 + b */
1153}
1154
1155#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256k1)
1156static void omega_mult_secp256k1(uECC_word_t *result, const uECC_word_t *right);
1157static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) {
1158    uECC_word_t tmp[2 * num_words_secp256k1];
1159    uECC_word_t carry;
1160
1161    uECC_vli_clear(tmp, num_words_secp256k1);
1162    uECC_vli_clear(tmp + num_words_secp256k1, num_words_secp256k1);
1163
1164    omega_mult_secp256k1(tmp, product + num_words_secp256k1); /* (Rq, q) = q * c */
1165
1166    carry = uECC_vli_add(result, product, tmp, num_words_secp256k1); /* (C, r) = r + q       */
1167    uECC_vli_clear(product, num_words_secp256k1);
1168    omega_mult_secp256k1(product, tmp + num_words_secp256k1); /* Rq*c */
1169    carry += uECC_vli_add(result, result, product, num_words_secp256k1); /* (C1, r) = r + Rq*c */
1170
1171    while (carry > 0) {
1172        --carry;
1173        uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1);
1174    }
1175    if (uECC_vli_cmp_unsafe(result, curve_secp256k1.p, num_words_secp256k1) > 0) {
1176        uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1);
1177    }
1178}
1179
1180#if uECC_WORD_SIZE == 1
1181static void omega_mult_secp256k1(uint8_t * result, const uint8_t * right) {
1182    /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
1183    uECC_word_t r0 = 0;
1184    uECC_word_t r1 = 0;
1185    uECC_word_t r2 = 0;
1186    wordcount_t k;
1187
1188    /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
1189    muladd(0xD1, right[0], &r0, &r1, &r2);
1190    result[0] = r0;
1191    r0 = r1;
1192    r1 = r2;
1193    /* r2 is still 0 */
1194
1195    for (k = 1; k < num_words_secp256k1; ++k) {
1196        muladd(0x03, right[k - 1], &r0, &r1, &r2);
1197        muladd(0xD1, right[k], &r0, &r1, &r2);
1198        result[k] = r0;
1199        r0 = r1;
1200        r1 = r2;
1201        r2 = 0;
1202    }
1203    muladd(0x03, right[num_words_secp256k1 - 1], &r0, &r1, &r2);
1204    result[num_words_secp256k1] = r0;
1205    result[num_words_secp256k1 + 1] = r1;
1206    /* add the 2^32 multiple */
1207    result[4 + num_words_secp256k1] =
1208        uECC_vli_add(result + 4, result + 4, right, num_words_secp256k1);
1209}
1210#elif uECC_WORD_SIZE == 4
1211static void omega_mult_secp256k1(uint32_t * result, const uint32_t * right) {
1212    /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
1213    uint32_t carry = 0;
1214    wordcount_t k;
1215
1216    for (k = 0; k < num_words_secp256k1; ++k) {
1217        uint64_t p = (uint64_t)0x3D1 * right[k] + carry;
1218        result[k] = (uint32_t) p;
1219        carry = p >> 32;
1220    }
1221    result[num_words_secp256k1] = carry;
1222    /* add the 2^32 multiple */
1223    result[1 + num_words_secp256k1] =
1224        uECC_vli_add(result + 1, result + 1, right, num_words_secp256k1);
1225}
1226#else
1227static void omega_mult_secp256k1(uint64_t * result, const uint64_t * right) {
1228    uECC_word_t r0 = 0;
1229    uECC_word_t r1 = 0;
1230    uECC_word_t r2 = 0;
1231    wordcount_t k;
1232
1233    /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
1234    for (k = 0; k < num_words_secp256k1; ++k) {
1235        muladd(0x1000003D1ull, right[k], &r0, &r1, &r2);
1236        result[k] = r0;
1237        r0 = r1;
1238        r1 = r2;
1239        r2 = 0;
1240    }
1241    result[num_words_secp256k1] = r0;
1242}
1243#endif /* uECC_WORD_SIZE */
1244#endif /* (uECC_OPTIMIZATION_LEVEL > 0 &&  && !asm_mmod_fast_secp256k1) */
1245
1246#endif /* uECC_SUPPORTS_secp256k1 */
1247
1248#endif /* _UECC_CURVE_SPECIFIC_H_ */
1249