1 /* sp_cdsp_signed.c
2  *
3  * Copyright (C) 2006-2021 wolfSSL Inc.
4  *
5  * This file is part of wolfSSL.
6  *
7  * wolfSSL is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * wolfSSL is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20  */
21 
22 /* from wolfcrypt/src/sp_c32.c */
23 
24 #ifdef HAVE_CONFIG_H
25     #include <config.h>
26 #endif
27 
28 #include <wolfssl/wolfcrypt/settings.h>
29 #include <wolfssl/wolfcrypt/error-crypt.h>
30 #include <wolfssl/wolfcrypt/cpuid.h>
31 #ifdef NO_INLINE
32     #include <wolfssl/wolfcrypt/misc.h>
33 #else
34     #define WOLFSSL_MISC_INCLUDED
35     #include <wolfcrypt/src/misc.c>
36 #endif
37 
38 #if defined(WOLFSSL_HAVE_SP_ECC)
39 #ifdef WOLFSSL_DSP
40 
41 #include <wolfssl/wolfcrypt/sp.h>
42 #include "remote.h"
43 #include "hexagon_protos.h"
44 #include "hexagon_types.h"
45 
46 #if (!defined(WC_NO_CACHE_RESISTANT) || defined(WOLFSSL_SP_SMALL)) && \
47              (defined(WOLFSSL_HAVE_SP_ECC) || !defined(WOLFSSL_RSA_PUBLIC_ONLY))
48 /* Mask for address to obfuscate which of the two address will be used. */
49 static const size_t addr_mask[2] = { 0, (size_t)-1 };
50 #endif
51 
52 #ifdef WOLFSSL_HAVE_SP_ECC
53 #ifndef WOLFSSL_SP_NO_256
54 
55 /* Point structure to use. */
56 typedef struct sp_point {
57     sp_digit x[2 * 10] __attribute__((aligned(128)));
58     sp_digit y[2 * 10] __attribute__((aligned(128)));
59     sp_digit z[2 * 10] __attribute__((aligned(128)));
60     int infinity;
61 } sp_point;
62 
63 /* The modulus (prime) of the curve P256. */
64 static const sp_digit p256_mod[10] __attribute__((aligned(128))) = {
65     0x3ffffff,0x3ffffff,0x3ffffff,0x003ffff,0x0000000,0x0000000,0x0000000,
66     0x0000400,0x3ff0000,0x03fffff
67 };
68 #ifndef WOLFSSL_SP_SMALL
69 /* The Montgomery normalizer for modulus of the curve P256. */
70 static const sp_digit p256_norm_mod[10] __attribute__((aligned(128))) = {
71     0x0000001,0x0000000,0x0000000,0x3fc0000,0x3ffffff,0x3ffffff,0x3ffffff,
72     0x3fffbff,0x000ffff,0x0000000
73 };
74 #endif /* WOLFSSL_SP_SMALL */
75 /* The Montgomery multiplier for modulus of the curve P256. */
76 static const sp_digit p256_mp_mod __attribute__((aligned(128))) = 0x000001;
77 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
78                                             defined(HAVE_ECC_VERIFY)
79 /* The order of the curve P256. */
80 static const sp_digit p256_order[10] __attribute__((aligned(128))) = {
81     0x0632551,0x272b0bf,0x1e84f3b,0x2b69c5e,0x3bce6fa,0x3ffffff,0x3ffffff,
82     0x00003ff,0x3ff0000,0x03fffff
83 };
84 #endif
85 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
86 /* The Montgomery normalizer for order of the curve P256. */
87 static const sp_digit p256_norm_order[10] __attribute__((aligned(128))) = {
88     0x39cdaaf,0x18d4f40,0x217b0c4,0x14963a1,0x0431905,0x0000000,0x0000000,
89     0x3fffc00,0x000ffff,0x0000000
90 };
91 #endif
92 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
93 /* The Montgomery multiplier for order of the curve P256. */
94 static const sp_digit p256_mp_order __attribute__((aligned(128))) = 0x200bc4f;
95 #endif
96 /* The base point of curve P256. */
97 static const sp_point p256_base __attribute__((aligned(128))) = {
98     /* X ordinate */
99     {
100         0x098c296,0x04e5176,0x33a0f4a,0x204b7ac,0x277037d,0x0e9103c,0x3ce6e56,
101         0x1091fe2,0x1f2e12c,0x01ac5f4, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
102     },
103     /* Y ordinate */
104     {
105         0x3bf51f5,0x1901a0d,0x1ececbb,0x15dacc5,0x22bce33,0x303e785,0x27eb4a7,
106         0x1fe6e3b,0x2e2fe1a,0x013f8d0, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
107     },
108     /* Z ordinate */
109     {
110         0x0000001,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,0x0000000,
111         0x0000000,0x0000000,0x0000000, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
112     },
113     /* infinity */
114     0
115 };
116 
sp_ecc_point_new_ex(void * heap,sp_point * sp,sp_point ** p)117 static int sp_ecc_point_new_ex(void* heap, sp_point* sp, sp_point** p)
118 {
119     int ret = MP_OKAY;
120 
121     if (p == NULL) {
122         ret = MEMORY_E;
123     } else {
124 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
125         (void)sp;
126         *p = (sp_point*)XMALLOC(sizeof(sp_point), heap, DYNAMIC_TYPE_ECC);
127         if (*p == NULL) {
128             ret = MEMORY_E;
129         }
130 #else
131         (void)heap;
132         if (sp == NULL) {
133             ret = MEMORY_E;
134         } else {
135             *p = sp;
136         }
137 #endif
138     }
139 
140     return ret;
141 }
142 
143 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
144 /* Allocate memory for point and return error. */
145 #define sp_ecc_point_new(heap, sp, p) sp_ecc_point_new_ex((heap), NULL, &(p))
146 #else
147 /* Set pointer to data and return no error. */
148 #define sp_ecc_point_new(heap, sp, p) sp_ecc_point_new_ex((heap), &(sp), &(p))
149 #endif
150 
151 
sp_ecc_point_free(sp_point * p,int clear,void * heap)152 static void sp_ecc_point_free(sp_point* p, int clear, void* heap)
153 {
154 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
155 /* If valid pointer then clear point data if requested and free data. */
156     if (p != NULL) {
157         if (clear != 0) {
158             XMEMSET(p, 0, sizeof(*p));
159         }
160         XFREE(p, heap, DYNAMIC_TYPE_ECC);
161     }
162 #else
163 /* Clear point data if requested. */
164     if (clear != 0) {
165         XMEMSET(p, 0, sizeof(*p));
166     }
167 #endif
168     (void)heap;
169 }
170 
171 /* Multiply a number by Montgomery normalizer mod modulus (prime).
172  *
173  * r  The resulting Montgomery form number.
174  * a  The number to convert.
175  * m  The modulus (prime).
176  * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
177  */
sp_256_mod_mul_norm_10(sp_digit * r,const sp_digit * a,const sp_digit * m)178 static int sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
179 {
180 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
181     int64_t* td;
182 #else
183     int64_t td[8];
184     int64_t a32d[8];
185 #endif
186     int64_t* t;
187     int64_t* a32;
188     int64_t o;
189     int err = MP_OKAY;
190 
191     (void)m;
192 
193 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
194     td = (int64_t*)XMALLOC(sizeof(int64_t) * 2 * 8, NULL, DYNAMIC_TYPE_ECC);
195     if (td == NULL) {
196         err = MEMORY_E;
197     }
198 #endif
199 
200     if (err == MP_OKAY) {
201 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
202         t = td;
203         a32 = td + 8;
204 #else
205         t = td;
206         a32 = a32d;
207 #endif
208 
209         a32[0] = a[0];
210         a32[0] |= a[1] << 26U;
211         a32[0] &= 0xffffffffL;
212         a32[1] = (sp_digit)(a[1] >> 6);
213         a32[1] |= a[2] << 20U;
214         a32[1] &= 0xffffffffL;
215         a32[2] = (sp_digit)(a[2] >> 12);
216         a32[2] |= a[3] << 14U;
217         a32[2] &= 0xffffffffL;
218         a32[3] = (sp_digit)(a[3] >> 18);
219         a32[3] |= a[4] << 8U;
220         a32[3] &= 0xffffffffL;
221         a32[4] = (sp_digit)(a[4] >> 24);
222         a32[4] |= a[5] << 2U;
223         a32[4] |= a[6] << 28U;
224         a32[4] &= 0xffffffffL;
225         a32[5] = (sp_digit)(a[6] >> 4);
226         a32[5] |= a[7] << 22U;
227         a32[5] &= 0xffffffffL;
228         a32[6] = (sp_digit)(a[7] >> 10);
229         a32[6] |= a[8] << 16U;
230         a32[6] &= 0xffffffffL;
231         a32[7] = (sp_digit)(a[8] >> 16);
232         a32[7] |= a[9] << 10U;
233         a32[7] &= 0xffffffffL;
234 
235         /*  1  1  0 -1 -1 -1 -1  0 */
236         t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6];
237         /*  0  1  1  0 -1 -1 -1 -1 */
238         t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7];
239         /*  0  0  1  1  0 -1 -1 -1 */
240         t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7];
241         /* -1 -1  0  2  2  1  0 -1 */
242         t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7];
243         /*  0 -1 -1  0  2  2  1  0 */
244         t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6];
245         /*  0  0 -1 -1  0  2  2  1 */
246         t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7];
247         /* -1 -1  0  0  0  1  3  2 */
248         t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7];
249         /*  1  0 -1 -1 -1 -1  0  3 */
250         t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7];
251 
252         t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
253         t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
254         t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
255         t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
256         t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
257         t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
258         t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
259         o     = t[7] >> 32U; t[7] &= 0xffffffffL;
260         t[0] += o;
261         t[3] -= o;
262         t[6] -= o;
263         t[7] += o;
264         t[1] += t[0] >> 32U; t[0] &= 0xffffffffL;
265         t[2] += t[1] >> 32U; t[1] &= 0xffffffffL;
266         t[3] += t[2] >> 32U; t[2] &= 0xffffffffL;
267         t[4] += t[3] >> 32U; t[3] &= 0xffffffffL;
268         t[5] += t[4] >> 32U; t[4] &= 0xffffffffL;
269         t[6] += t[5] >> 32U; t[5] &= 0xffffffffL;
270         t[7] += t[6] >> 32U; t[6] &= 0xffffffffL;
271 
272         r[0] = (sp_digit)(t[0]) & 0x3ffffffL;
273         r[1] = (sp_digit)(t[0] >> 26U);
274         r[1] |= t[1] << 6U;
275         r[1] &= 0x3ffffffL;
276         r[2] = (sp_digit)(t[1] >> 20U);
277         r[2] |= t[2] << 12U;
278         r[2] &= 0x3ffffffL;
279         r[3] = (sp_digit)(t[2] >> 14U);
280         r[3] |= t[3] << 18U;
281         r[3] &= 0x3ffffffL;
282         r[4] = (sp_digit)(t[3] >> 8U);
283         r[4] |= t[4] << 24U;
284         r[4] &= 0x3ffffffL;
285         r[5] = (sp_digit)(t[4] >> 2U) & 0x3ffffffL;
286         r[6] = (sp_digit)(t[4] >> 28U);
287         r[6] |= t[5] << 4U;
288         r[6] &= 0x3ffffffL;
289         r[7] = (sp_digit)(t[5] >> 22U);
290         r[7] |= t[6] << 10U;
291         r[7] &= 0x3ffffffL;
292         r[8] = (sp_digit)(t[6] >> 16U);
293         r[8] |= t[7] << 16U;
294         r[8] &= 0x3ffffffL;
295         r[9] = (sp_digit)(t[7] >> 10U);
296     }
297 
298 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
299     if (td != NULL) {
300         XFREE(td, NULL, DYNAMIC_TYPE_ECC);
301     }
302 #endif
303 
304     return err;
305 }
306 
307 
308 /* Compare a with b in constant time.
309  *
310  * a  A single precision integer.
311  * b  A single precision integer.
312  * return -ve, 0 or +ve if a is less than, equal to or greater than b
313  * respectively.
314  */
sp_256_cmp_10(const sp_digit * a,const sp_digit * b)315 static sp_digit sp_256_cmp_10(const sp_digit* a, const sp_digit* b)
316 {
317     sp_digit r = 0;
318 #ifdef WOLFSSL_SP_SMALL
319     int i;
320 
321     for (i=9; i>=0; i--) {
322         r |= (a[i] - b[i]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
323     }
324 #else
325     r |= (a[ 9] - b[ 9]) & (0 - (sp_digit)1);
326     r |= (a[ 8] - b[ 8]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
327     r |= (a[ 7] - b[ 7]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
328     r |= (a[ 6] - b[ 6]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
329     r |= (a[ 5] - b[ 5]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
330     r |= (a[ 4] - b[ 4]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
331     r |= (a[ 3] - b[ 3]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
332     r |= (a[ 2] - b[ 2]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
333     r |= (a[ 1] - b[ 1]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
334     r |= (a[ 0] - b[ 0]) & (0 - ((r == 0) ? (sp_digit)1 : (sp_digit)0));
335 #endif /* WOLFSSL_SP_SMALL */
336 
337     return r;
338 }
339 
340 /* Normalize the values in each word to 26.
341  *
342  * a  Array of sp_digit to normalize.
343  */
sp_256_norm_10(sp_digit * a)344 static void sp_256_norm_10(sp_digit* a)
345 {
346 #ifdef WOLFSSL_SP_SMALL
347     int i;
348     for (i = 0; i < 9; i++) {
349         a[i+1] += a[i] >> 26;
350         a[i] &= 0x3ffffff;
351     }
352 #else
353     a[1] += a[0] >> 26; a[0] = Q6_R_and_RR(a[0], 0x3ffffff);
354     a[2] += a[1] >> 26; a[1] = Q6_R_and_RR(a[1], 0x3ffffff);
355     a[3] += a[2] >> 26; a[2] = Q6_R_and_RR(a[2], 0x3ffffff);
356     a[4] += a[3] >> 26; a[3] = Q6_R_and_RR(a[3], 0x3ffffff);
357     a[5] += a[4] >> 26; a[4] = Q6_R_and_RR(a[4], 0x3ffffff);
358     a[6] += a[5] >> 26; a[5] = Q6_R_and_RR(a[5], 0x3ffffff);
359     a[7] += a[6] >> 26; a[6] = Q6_R_and_RR(a[6], 0x3ffffff);
360     a[8] += a[7] >> 26; a[7] = Q6_R_and_RR(a[7], 0x3ffffff);
361     a[9] += a[8] >> 26; a[8] = Q6_R_and_RR(a[8], 0x3ffffff);
362 #endif
363 }
364 
365 /* Conditionally subtract b from a using the mask m.
366  * m is -1 to subtract and 0 when not.
367  *
368  * r  A single precision number representing condition subtract result.
369  * a  A single precision number to subtract from.
370  * b  A single precision number to subtract.
371  * m  Mask value to apply.
372  */
sp_256_cond_sub_10(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit m)373 static void sp_256_cond_sub_10(sp_digit* r, const sp_digit* a,
374         const sp_digit* b, const sp_digit m)
375 {
376 #ifdef WOLFSSL_SP_SMALL
377     int i;
378 
379     for (i = 0; i < 10; i++) {
380         r[i] = a[i] - (b[i] & m);
381     }
382 #else
383     r[ 0] = Q6_R_sub_RR(a[ 0], Q6_R_and_RR(b[ 0], m));
384     r[ 1] = Q6_R_sub_RR(a[ 1], Q6_R_and_RR(b[ 1], m));
385     r[ 2] = Q6_R_sub_RR(a[ 2], Q6_R_and_RR(b[ 2], m));
386     r[ 3] = Q6_R_sub_RR(a[ 3], Q6_R_and_RR(b[ 3], m));
387     r[ 4] = Q6_R_sub_RR(a[ 4], Q6_R_and_RR(b[ 4], m));
388     r[ 5] = Q6_R_sub_RR(a[ 5], Q6_R_and_RR(b[ 5], m));
389     r[ 6] = Q6_R_sub_RR(a[ 6], Q6_R_and_RR(b[ 6], m));
390     r[ 7] = Q6_R_sub_RR(a[ 7], Q6_R_and_RR(b[ 7], m));
391     r[ 8] = Q6_R_sub_RR(a[ 8], Q6_R_and_RR(b[ 8], m));
392     r[ 9] = Q6_R_sub_RR(a[ 9], Q6_R_and_RR(b[ 9], m));
393 #endif /* WOLFSSL_SP_SMALL */
394 }
395 
396 #define sp_256_mont_reduce_order_10         sp_256_mont_reduce_10
397 
398 /* Mul a by scalar b and add into r. (r += a * b)
399  *
400  * r  A single precision integer.
401  * a  A single precision integer.
402  * b  A scalar.
403  */
sp_256_mul_add_10(sp_digit * r,const sp_digit * a,const sp_digit b)404 SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a,
405         const sp_digit b)
406 {
407 #ifdef WOLFSSL_SP_SMALL
408     int64_t tb = b;
409     int64_t t = 0;
410     int i;
411 
412     for (i = 0; i < 10; i++) {
413         t += (tb * a[i]) + r[i];
414         r[i] = t & 0x3ffffff;
415         t >>= 26;
416     }
417     r[10] += t;
418 #else
419     int64_t tb = b;
420     int64_t t[10];
421 
422     t[ 0] = Q6_P_mpy_RR(tb, a[ 0]);
423     t[ 1] = Q6_P_mpy_RR(tb, a[ 1]);
424     t[ 2] = Q6_P_mpy_RR(tb, a[ 2]);
425     t[ 3] = Q6_P_mpy_RR(tb, a[ 3]);
426     t[ 4] = Q6_P_mpy_RR(tb, a[ 4]);
427     t[ 5] = Q6_P_mpy_RR(tb, a[ 5]);
428     t[ 6] = Q6_P_mpy_RR(tb, a[ 6]);
429     t[ 7] = Q6_P_mpy_RR(tb, a[ 7]);
430     t[ 8] = Q6_P_mpy_RR(tb, a[ 8]);
431     t[ 9] = Q6_P_mpy_RR(tb, a[ 9]);
432     r[ 0] +=                 (t[ 0] & 0x3ffffff);
433     r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff);
434     r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff);
435     r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff);
436     r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff);
437     r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff);
438     r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff);
439     r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff);
440     r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff);
441     r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff);
442     r[10] +=  t[ 9] >> 26;
443 #endif /* WOLFSSL_SP_SMALL */
444 }
445 
446 /* Shift the result in the high 256 bits down to the bottom.
447  *
448  * r  A single precision number.
449  * a  A single precision number.
450  */
sp_256_mont_shift_10(sp_digit * r,const sp_digit * a)451 static void sp_256_mont_shift_10(sp_digit* r, const sp_digit* a)
452 {
453 #ifdef WOLFSSL_SP_SMALL
454     int i;
455     sp_digit n, s;
456 
457     s = a[10];
458     n = a[9] >> 22;
459     for (i = 0; i < 9; i++) {
460         n += (s & 0x3ffffff) << 4;
461         r[i] = n & 0x3ffffff;
462         n >>= 26;
463         s = a[11 + i] + (s >> 26);
464     }
465     n += s << 4;
466     r[9] = n;
467 #else
468     sp_digit n, s;
469 
470     s = a[10]; n = a[9] >> 22;
471     n += (s & 0x3ffffff) << 4; r[ 0] = Q6_R_and_RR(n, 0x3ffffff);
472     n >>= 26; s = a[11] + (s >> 26);
473     n += (s & 0x3ffffff) << 4; r[ 1] = Q6_R_and_RR(n, 0x3ffffff);
474     n >>= 26; s = a[12] + (s >> 26);
475     n += (s & 0x3ffffff) << 4; r[ 2] = Q6_R_and_RR(n, 0x3ffffff);
476     n >>= 26; s = a[13] + (s >> 26);
477     n += (s & 0x3ffffff) << 4; r[ 3] = Q6_R_and_RR(n, 0x3ffffff);
478     n >>= 26; s = a[14] + (s >> 26);
479     n += (s & 0x3ffffff) << 4; r[ 4] = Q6_R_and_RR(n, 0x3ffffff);
480     n >>= 26; s = a[15] + (s >> 26);
481     n += (s & 0x3ffffff) << 4; r[ 5] = Q6_R_and_RR(n, 0x3ffffff);
482     n >>= 26; s = a[16] + (s >> 26);
483     n += (s & 0x3ffffff) << 4; r[ 6] = Q6_R_and_RR(n, 0x3ffffff);
484     n >>= 26; s = a[17] + (s >> 26);
485     n += (s & 0x3ffffff) << 4; r[ 7] = Q6_R_and_RR(n, 0x3ffffff);
486     n >>= 26; s = a[18] + (s >> 26);
487     n += (s & 0x3ffffff) << 4; r[ 8] = Q6_R_and_RR(n, 0x3ffffff);
488     n >>= 26; s = a[19] + (s >> 26);
489     n += s << 4;              r[ 9] = n;
490 #endif /* WOLFSSL_SP_SMALL */
491     XMEMSET(&r[10], 0, sizeof(*r) * 10U);
492 }
493 
494 
495 /* Reduce the number back to 256 bits using Montgomery reduction.
496  *
497  * a   A single precision number to reduce in place.
498  * m   The single precision number representing the modulus.
499  * mp  The digit representing the negative inverse of m mod 2^n.
500  */
sp_256_mont_reduce_10(sp_digit * a,const sp_digit * m,sp_digit mp)501 static void sp_256_mont_reduce_10(sp_digit* a, const sp_digit* m, sp_digit mp)
502 {
503     sp_digit mu;
504 
505 
506     /* unrolled for loops due to unexpected behavior with -O optimizations */
507     if (mp != 1) {
508         mu = Q6_P_mpy_RR(a[0], mp) & 0x3ffffff;
509         sp_256_mul_add_10(a+0, m, mu);
510         a[0+1] += a[0] >> 26;
511 
512         mu = Q6_P_mpy_RR(a[1], mp) & 0x3ffffff;
513         sp_256_mul_add_10(a+1, m, mu);
514         a[1+1] += a[1] >> 26;
515 
516         mu = Q6_P_mpy_RR(a[2], mp) & 0x3ffffff;
517         sp_256_mul_add_10(a+2, m, mu);
518         a[2+1] += a[2] >> 26;
519 
520         mu = Q6_P_mpy_RR(a[3], mp) & 0x3ffffff;
521         sp_256_mul_add_10(a+3, m, mu);
522         a[3+1] += a[3] >> 26;
523 
524         mu = Q6_P_mpy_RR(a[4], mp) & 0x3ffffff;
525         sp_256_mul_add_10(a+4, m, mu);
526         a[4+1] += a[4] >> 26;
527 
528         mu = Q6_P_mpy_RR(a[5], mp) & 0x3ffffff;
529         sp_256_mul_add_10(a+5, m, mu);
530         a[5+1] += a[5] >> 26;
531 
532         mu = Q6_P_mpy_RR(a[6], mp) & 0x3ffffff;
533         sp_256_mul_add_10(a+6, m, mu);
534         a[6+1] += a[6] >> 26;
535 
536         mu = Q6_P_mpy_RR(a[7], mp) & 0x3ffffff;
537         sp_256_mul_add_10(a+7, m, mu);
538         a[7+1] += a[7] >> 26;
539 
540         mu = Q6_P_mpy_RR(a[8], mp) & 0x3ffffff;
541         sp_256_mul_add_10(a+8, m, mu);
542         a[8+1] += a[8] >> 26;
543 
544         mu = Q6_P_mpy_RR(a[9], mp) & 0x3fffffL;
545         sp_256_mul_add_10(a+9, m, mu);
546         a[9+1] += a[9] >> 26;
547         a[9] &= 0x3ffffff;
548     }
549     else {
550         mu = Q6_P_mpy_RR(a[0], mp) & 0x3ffffff;
551         sp_256_mul_add_10(a+0, p256_mod, mu);
552         a[0+1] += a[0] >> 26;
553 
554         mu = Q6_P_mpy_RR(a[1], mp) & 0x3ffffff;
555         sp_256_mul_add_10(a+1, p256_mod, mu);
556         a[1+1] += a[1] >> 26;
557 
558         mu = Q6_P_mpy_RR(a[2], mp) & 0x3ffffff;
559         sp_256_mul_add_10(a+2, p256_mod, mu);
560         a[2+1] += a[2] >> 26;
561 
562         mu = Q6_P_mpy_RR(a[3], mp) & 0x3ffffff;
563         sp_256_mul_add_10(a+3, p256_mod, mu);
564         a[3+1] += a[3] >> 26;
565 
566         mu = Q6_P_mpy_RR(a[4], mp) & 0x3ffffff;
567         sp_256_mul_add_10(a+4, p256_mod, mu);
568         a[4+1] += a[4] >> 26;
569 
570         mu = Q6_P_mpy_RR(a[5], mp) & 0x3ffffff;
571         sp_256_mul_add_10(a+5, p256_mod, mu);
572         a[5+1] += a[5] >> 26;
573 
574         mu = Q6_P_mpy_RR(a[6], mp) & 0x3ffffff;
575         sp_256_mul_add_10(a+6, p256_mod, mu);
576         a[6+1] += a[6] >> 26;
577 
578         mu = Q6_P_mpy_RR(a[7], mp) & 0x3ffffff;
579         sp_256_mul_add_10(a+7, p256_mod, mu);
580         a[7+1] += a[7] >> 26;
581 
582         mu = Q6_P_mpy_RR(a[8], mp) & 0x3ffffff;
583         sp_256_mul_add_10(a+8, p256_mod, mu);
584         a[8+1] += a[8] >> 26;
585 
586         mu = Q6_P_mpy_RR(a[9], mp) & 0x3fffffL;
587         sp_256_mul_add_10(a+9, p256_mod, mu);
588         a[9+1] += a[9] >> 26;
589         a[9] &= 0x3ffffff;
590     }
591 
592 
593     sp_256_mont_shift_10(a, a);
594     sp_256_cond_sub_10(a, a, m, 0 - (((a[9] >> 22) > 0) ?
595             (sp_digit)1 : (sp_digit)0));
596     sp_256_norm_10(a);
597 }
598 
599 /* Multiply a and b into r. (r = a * b)
600  *
601  * r  A single precision integer.
602  * a  A single precision integer.
603  * b  A single precision integer.
604  */
sp_256_mul_10(sp_digit * r,const sp_digit * a,const sp_digit * b)605 SP_NOINLINE static void sp_256_mul_10(sp_digit* r, const sp_digit* a,
606     const sp_digit* b)
607 {
608 #if 1
609     int64_t t0   = Q6_P_mpy_RR(a[0], b[0]);
610     int64_t t1   = Q6_P_mpy_RR(a[0], b[1])
611                  + Q6_P_mpy_RR(a[1], b[0]);
612     int64_t t2   = Q6_P_mpy_RR(a[0], b[2])
613                  + Q6_P_mpy_RR(a[1], b[1])
614                  + Q6_P_mpy_RR(a[2], b[0]);
615     int64_t t3   = Q6_P_mpy_RR(a[0], b[3])
616                  + Q6_P_mpy_RR(a[1], b[2])
617                  + Q6_P_mpy_RR(a[2], b[1])
618                  + Q6_P_mpy_RR(a[3], b[0]);
619     int64_t t4   = Q6_P_mpy_RR(a[0], b[4])
620                  + Q6_P_mpy_RR(a[1], b[3])
621                  + Q6_P_mpy_RR(a[2], b[2])
622                  + Q6_P_mpy_RR(a[3], b[1])
623                  + Q6_P_mpy_RR(a[4], b[0]);
624     int64_t t5   = Q6_P_mpy_RR(a[0], b[5])
625                  + Q6_P_mpy_RR(a[1], b[4])
626                  + Q6_P_mpy_RR(a[2], b[3])
627                  + Q6_P_mpy_RR(a[3], b[2])
628                  + Q6_P_mpy_RR(a[4], b[1])
629                  + Q6_P_mpy_RR(a[5], b[0]);
630     int64_t t6   = Q6_P_mpy_RR(a[0], b[6])
631                  + Q6_P_mpy_RR(a[1], b[5])
632                  + Q6_P_mpy_RR(a[2], b[4])
633                  + Q6_P_mpy_RR(a[3], b[3])
634                  + Q6_P_mpy_RR(a[4], b[2])
635                  + Q6_P_mpy_RR(a[5], b[1])
636                  + Q6_P_mpy_RR(a[6], b[0]);
637     int64_t t7   = Q6_P_mpy_RR(a[0], b[7])
638                  + Q6_P_mpy_RR(a[1], b[6])
639                  + Q6_P_mpy_RR(a[2], b[5])
640                  + Q6_P_mpy_RR(a[3], b[4])
641                  + Q6_P_mpy_RR(a[4], b[3])
642                  + Q6_P_mpy_RR(a[5], b[2])
643                  + Q6_P_mpy_RR(a[6], b[1])
644                  + Q6_P_mpy_RR(a[7], b[0]);
645     int64_t t8   = Q6_P_mpy_RR(a[0], b[8])
646                  + Q6_P_mpy_RR(a[1], b[7])
647                  + Q6_P_mpy_RR(a[2], b[6])
648                  + Q6_P_mpy_RR(a[3], b[5])
649                  + Q6_P_mpy_RR(a[4], b[4])
650                  + Q6_P_mpy_RR(a[5], b[3])
651                  + Q6_P_mpy_RR(a[6], b[2])
652                  + Q6_P_mpy_RR(a[7], b[1])
653                  + Q6_P_mpy_RR(a[8], b[0]);
654     int64_t t9   = Q6_P_mpy_RR(a[0], b[9])
655                  + Q6_P_mpy_RR(a[1], b[8])
656                  + Q6_P_mpy_RR(a[2], b[7])
657                  + Q6_P_mpy_RR(a[3], b[6])
658                  + Q6_P_mpy_RR(a[4], b[5])
659                  + Q6_P_mpy_RR(a[5], b[4])
660                  + Q6_P_mpy_RR(a[6], b[3])
661                  + Q6_P_mpy_RR(a[7], b[2])
662                  + Q6_P_mpy_RR(a[8], b[1])
663                  + Q6_P_mpy_RR(a[9], b[0]);
664     int64_t t10  = Q6_P_mpy_RR(a[1], b[9])
665                  + Q6_P_mpy_RR(a[2], b[8])
666                  + Q6_P_mpy_RR(a[3], b[7])
667                  + Q6_P_mpy_RR(a[4], b[6])
668                  + Q6_P_mpy_RR(a[5], b[5])
669                  + Q6_P_mpy_RR(a[6], b[4])
670                  + Q6_P_mpy_RR(a[7], b[3])
671                  + Q6_P_mpy_RR(a[8], b[2])
672                  + Q6_P_mpy_RR(a[9], b[1]);
673     int64_t t11  = Q6_P_mpy_RR(a[2], b[9])
674                  + Q6_P_mpy_RR(a[3], b[8])
675                  + Q6_P_mpy_RR(a[4], b[7])
676                  + Q6_P_mpy_RR(a[5], b[6])
677                  + Q6_P_mpy_RR(a[6], b[5])
678                  + Q6_P_mpy_RR(a[7], b[4])
679                  + Q6_P_mpy_RR(a[8], b[3])
680                  + Q6_P_mpy_RR(a[9], b[2]);
681     int64_t t12  = Q6_P_mpy_RR(a[3], b[9])
682                  + Q6_P_mpy_RR(a[4], b[8])
683                  + Q6_P_mpy_RR(a[5], b[7])
684                  + Q6_P_mpy_RR(a[6], b[6])
685                  + Q6_P_mpy_RR(a[7], b[5])
686                  + Q6_P_mpy_RR(a[8], b[4])
687                  + Q6_P_mpy_RR(a[9], b[3]);
688     int64_t t13  = Q6_P_mpy_RR(a[4], b[9])
689                  + Q6_P_mpy_RR(a[5], b[8])
690                  + Q6_P_mpy_RR(a[6], b[7])
691                  + Q6_P_mpy_RR(a[7], b[6])
692                  + Q6_P_mpy_RR(a[8], b[5])
693                  + Q6_P_mpy_RR(a[9], b[4]);
694     int64_t t14  = Q6_P_mpy_RR(a[5], b[9])
695                  + Q6_P_mpy_RR(a[6], b[8])
696                  + Q6_P_mpy_RR(a[7], b[7])
697                  + Q6_P_mpy_RR(a[8], b[6])
698                  + Q6_P_mpy_RR(a[9], b[5]);
699     int64_t t15  = Q6_P_mpy_RR(a[6], b[9])
700                  + Q6_P_mpy_RR(a[7], b[8])
701                  + Q6_P_mpy_RR(a[8], b[7])
702                  + Q6_P_mpy_RR(a[9], b[6]);
703     int64_t t16  = Q6_P_mpy_RR(a[7], b[9])
704                  + Q6_P_mpy_RR(a[8], b[8])
705                  + Q6_P_mpy_RR(a[9], b[7]);
706     int64_t t17  = Q6_P_mpy_RR(a[8], b[9])
707                  + Q6_P_mpy_RR(a[9], b[8]);
708     int64_t t18  = Q6_P_mpy_RR(a[9], b[9]);
709 
710 
711     t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
712     t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
713     t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
714     t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
715     t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
716     t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
717     t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
718     t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
719     t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
720     t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
721     t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
722     t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
723     t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
724     t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
725     t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
726     t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
727     t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
728     t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
729     r[19] = (sp_digit)(t18 >> 26);
730                        r[18] = t18 & 0x3ffffff;
731 #endif
732 #if 0
733     /* Testing speeds with using HVX_Vectors */
734     {
735     int64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15, t16, t17, t18;
736     HVX_Vector av, splat;
737     HVX_Vector vlow, vhi;
738 
739     av = Q6_V_vzero();
740     vlow = Q6_V_vzero();
741     vhi = Q6_V_vzero();
742 
743     XMEMCPY((byte*)&av, (byte*)a, 40);
744 
745     splat = Q6_V_vsplat_R(b[0]);
746     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
747     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
748 
749     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
750     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
751     unsigned int* loi = (unsigned int*)&vlow;
752     int* hii = (int*)&vhi;
753 
754     /* a[0] * b[0] */
755     t0 = loi[0] | ((int64_t)hii[0] << 31);
756 
757     /* a[1] * b[0] */
758     t1 = loi[1] | ((int64_t)hii[1] << 31);
759 
760     /* a[2] * b[0] */
761     t2 = loi[2] | ((int64_t)hii[2] << 31);
762 
763     /* a[3] * b[0] */
764     t3 = loi[3] | ((int64_t)hii[3] << 31);
765 
766     /* a[4] * b[0] */
767     t4 = loi[4] | ((int64_t)hii[4] << 31);
768 
769     /* a[5] * b[0] */
770     t5 = loi[5] | ((int64_t)hii[5] << 31);
771 
772     /* a[6] * b[0] */
773     t6 = loi[6] | ((int64_t)hii[6] << 31);
774 
775     /* a[7] * b[0] */
776     t7 = loi[7] | ((int64_t)hii[7] << 31);
777 
778     /* a[8] * b[0] */
779     t8 = loi[8] | ((int64_t)hii[8] << 31);
780 
781     /* a[9] * b[0] */
782     t9 = loi[9] | ((int64_t)hii[9] << 31);
783 
784     /* a[*] * b[1] */
785     splat = Q6_V_vsplat_R(b[1]);
786     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
787     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
788     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
789     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
790     loi = (unsigned int*)&vlow;
791     hii = (int*)&vhi;
792 
793     /* a[0] * b[1] */
794     t1 += (loi[0] | ((int64_t)hii[0] << 31));
795 
796     /* a[1] * b[1] */
797     t2 += (loi[1] | ((int64_t)hii[1] << 31));
798 
799     /* a[2] * b[1] */
800     t3 += (loi[2] | ((int64_t)hii[2] << 31));
801 
802     /* a[3] * b[1] */
803     t4 += (loi[3] | ((int64_t)hii[3] << 31));
804 
805     /* a[4] * b[1] */
806     t5 += (loi[4] | ((int64_t)hii[4] << 31));
807 
808     /* a[5] * b[1] */
809     t6 += (loi[5] | ((int64_t)hii[5] << 31));
810 
811     /* a[6] * b[1] */
812     t7 += (loi[6] | ((int64_t)hii[6] << 31));
813 
814     /* a[7] * b[1] */
815     t8 += (loi[7] | ((int64_t)hii[7] << 31));
816 
817     /* a[8] * b[1] */
818     t9 += (loi[8] | ((int64_t)hii[8] << 31));
819 
820     /* a[9] * b[1] */
821     t10 = (loi[9] | ((int64_t)hii[9] << 31));
822 
823     /* a[*] * b[2] */
824     splat = Q6_V_vsplat_R(b[2]);
825     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
826     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
827     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
828     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
829     loi = (unsigned int*)&vlow;
830     hii = (int*)&vhi;
831 
832 
833     /* a[0] * b[2] */
834     t2 += (loi[0] | ((int64_t)hii[0] << 31));
835 
836     /* a[1] * b[2] */
837     t3 += (loi[1] | ((int64_t)hii[1] << 31));
838 
839     /* a[2] * b[2] */
840     t4 += (loi[2] | ((int64_t)hii[2] << 31));
841 
842     /* a[3] * b[2] */
843     t5 += (loi[3] | ((int64_t)hii[3] << 31));
844 
845     /* a[4] * b[2] */
846     t6 += (loi[4] | ((int64_t)hii[4] << 31));
847 
848     /* a[5] * b[2] */
849     t7 += (loi[5] | ((int64_t)hii[5] << 31));
850 
851     /* a[6] * b[2] */
852     t8 += (loi[6] | ((int64_t)hii[6] << 31));
853 
854     /* a[7] * b[2] */
855     t9 += (loi[7] | ((int64_t)hii[7] << 31));
856 
857     /* a[8] * b[2] */
858     t10 += (loi[8] | ((int64_t)hii[8] << 31));
859 
860     /* a[9] * b[2] */
861     t11 = (loi[9] | ((int64_t)hii[9] << 31));
862 
863 
864     /* a[*] * b[3] */
865     splat = Q6_V_vsplat_R(b[3]);
866     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
867     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
868     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
869     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
870     loi = (unsigned int*)&vlow;
871     hii = (int*)&vhi;
872 
873 
874     /* a[0] * b[3] */
875     t3 += (loi[0] | ((int64_t)hii[0] << 31));
876 
877     /* a[1] * b[3] */
878     t4 += (loi[1] | ((int64_t)hii[1] << 31));
879 
880     /* a[2] * b[3] */
881     t5 += (loi[2] | ((int64_t)hii[2] << 31));
882 
883     /* a[3] * b[3] */
884     t6 += (loi[3] | ((int64_t)hii[3] << 31));
885 
886     /* a[4] * b[3] */
887     t7 += (loi[4] | ((int64_t)hii[4] << 31));
888 
889     /* a[5] * b[3] */
890     t8 += (loi[5] | ((int64_t)hii[5] << 31));
891 
892     /* a[6] * b[3] */
893     t9 += (loi[6] | ((int64_t)hii[6] << 31));
894 
895     /* a[7] * b[3] */
896     t10 += (loi[7] | ((int64_t)hii[7] << 31));
897 
898     /* a[8] * b[3] */
899     t11 += (loi[8] | ((int64_t)hii[8] << 31));
900 
901     /* a[9] * b[3] */
902     t12 = (loi[9] | ((int64_t)hii[9] << 31));
903 
904 
905     /* a[*] * b[4] */
906     splat = Q6_V_vsplat_R(b[4]);
907     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
908     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
909     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
910     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
911     loi = (unsigned int*)&vlow;
912     hii = (int*)&vhi;
913 
914 
915     /* a[0] * b[4] */
916     t4 += (loi[0] | ((int64_t)hii[0] << 31));
917 
918     /* a[1] * b[4] */
919     t5 += (loi[1] | ((int64_t)hii[1] << 31));
920 
921     /* a[2] * b[4] */
922     t6 += (loi[2] | ((int64_t)hii[2] << 31));
923 
924     /* a[3] * b[4] */
925     t7 += (loi[3] | ((int64_t)hii[3] << 31));
926 
927     /* a[4] * b[4] */
928     t8 += (loi[4] | ((int64_t)hii[4] << 31));
929 
930     /* a[5] * b[4] */
931     t9 += (loi[5] | ((int64_t)hii[5] << 31));
932 
933     /* a[6] * b[4] */
934     t10 += (loi[6] | ((int64_t)hii[6] << 31));
935 
936     /* a[7] * b[4] */
937     t11 += (loi[7] | ((int64_t)hii[7] << 31));
938 
939     /* a[8] * b[4] */
940     t12 += (loi[8] | ((int64_t)hii[8] << 31));
941 
942     /* a[9] * b[4] */
943     t13 = (loi[9] | ((int64_t)hii[9] << 31));
944 
945 
946     /* a[*] * b[5] */
947     splat = Q6_V_vsplat_R(b[5]);
948     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
949     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
950     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
951     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
952     loi = (unsigned int*)&vlow;
953     hii = (int*)&vhi;
954 
955 
956     /* a[0] * b[5] */
957     t5 += (loi[0] | ((int64_t)hii[0] << 31));
958 
959     /* a[1] * b[5] */
960     t6 += (loi[1] | ((int64_t)hii[1] << 31));
961 
962     /* a[2] * b[5] */
963     t7 += (loi[2] | ((int64_t)hii[2] << 31));
964 
965     /* a[3] * b[5] */
966     t8 += (loi[3] | ((int64_t)hii[3] << 31));
967 
968     /* a[4] * b[5] */
969     t9 += (loi[4] | ((int64_t)hii[4] << 31));
970 
971     /* a[5] * b[5] */
972     t10 += (loi[5] | ((int64_t)hii[5] << 31));
973 
974     /* a[6] * b[5] */
975     t11 += (loi[6] | ((int64_t)hii[6] << 31));
976 
977     /* a[7] * b[5] */
978     t12 += (loi[7] | ((int64_t)hii[7] << 31));
979 
980     /* a[8] * b[5] */
981     t13 += (loi[8] | ((int64_t)hii[8] << 31));
982 
983     /* a[9] * b[5] */
984     t14 = (loi[9] | ((int64_t)hii[9] << 31));
985 
986 
987     /* a[*] * b[6] */
988     splat = Q6_V_vsplat_R(b[6]);
989     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
990     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
991     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
992     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
993     loi = (unsigned int*)&vlow;
994     hii = (int*)&vhi;
995 
996 
997     /* a[0] * b[6] */
998     t6 += (loi[0] | ((int64_t)hii[0] << 31));
999 
1000     /* a[1] * b[6] */
1001     t7 += (loi[1] | ((int64_t)hii[1] << 31));
1002 
1003     /* a[2] * b[6] */
1004     t8 += (loi[2] | ((int64_t)hii[2] << 31));
1005 
1006     /* a[3] * b[6] */
1007     t9 += (loi[3] | ((int64_t)hii[3] << 31));
1008 
1009     /* a[4] * b[6] */
1010     t10 += (loi[4] | ((int64_t)hii[4] << 31));
1011 
1012     /* a[5] * b[6] */
1013     t11 += (loi[5] | ((int64_t)hii[5] << 31));
1014 
1015     /* a[6] * b[6] */
1016     t12 += (loi[6] | ((int64_t)hii[6] << 31));
1017 
1018     /* a[7] * b[6] */
1019     t13 += (loi[7] | ((int64_t)hii[7] << 31));
1020 
1021     /* a[8] * b[6] */
1022     t14 += (loi[8] | ((int64_t)hii[8] << 31));
1023 
1024     /* a[9] * b[6] */
1025     t15 = (loi[9] | ((int64_t)hii[9] << 31));
1026 
1027 
1028 
1029     /* a[*] * b[7] */
1030     splat = Q6_V_vsplat_R(b[7]);
1031     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
1032     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
1033     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
1034     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
1035     loi = (unsigned int*)&vlow;
1036     hii = (int*)&vhi;
1037 
1038 
1039     /* a[0] * b[7] */
1040     t7 += (loi[0] | ((int64_t)hii[0] << 31));
1041 
1042     /* a[1] * b[7] */
1043     t8 += (loi[1] | ((int64_t)hii[1] << 31));
1044 
1045     /* a[2] * b[7] */
1046     t9 += (loi[2] | ((int64_t)hii[2] << 31));
1047 
1048     /* a[3] * b[7] */
1049     t10 += (loi[3] | ((int64_t)hii[3] << 31));
1050 
1051     /* a[4] * b[7] */
1052     t11 += (loi[4] | ((int64_t)hii[4] << 31));
1053 
1054     /* a[5] * b[7] */
1055     t12 += (loi[5] | ((int64_t)hii[5] << 31));
1056 
1057     /* a[6] * b[7] */
1058     t13 += (loi[6] | ((int64_t)hii[6] << 31));
1059 
1060     /* a[7] * b[7] */
1061     t14 += (loi[7] | ((int64_t)hii[7] << 31));
1062 
1063     /* a[8] * b[7] */
1064     t15 += (loi[8] | ((int64_t)hii[8] << 31));
1065 
1066     /* a[9] * b[7] */
1067     t16 = (loi[9] | ((int64_t)hii[9] << 31));
1068 
1069 
1070     /* a[*] * b[8] */
1071     splat = Q6_V_vsplat_R(b[8]);
1072     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
1073     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
1074     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
1075     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
1076     loi = (unsigned int*)&vlow;
1077     hii = (int*)&vhi;
1078 
1079 
1080     /* a[0] * b[8] */
1081     t8 += (loi[0] | ((int64_t)hii[0] << 31));
1082 
1083     /* a[1] * b[8] */
1084     t9 += (loi[1] | ((int64_t)hii[1] << 31));
1085 
1086     /* a[2] * b[8] */
1087     t10 += (loi[2] | ((int64_t)hii[2] << 31));
1088 
1089     /* a[3] * b[8] */
1090     t11 += (loi[3] | ((int64_t)hii[3] << 31));
1091 
1092     /* a[4] * b[8] */
1093     t12 += (loi[4] | ((int64_t)hii[4] << 31));
1094 
1095     /* a[5] * b[8] */
1096     t13 += (loi[5] | ((int64_t)hii[5] << 31));
1097 
1098     /* a[6] * b[8] */
1099     t14 += (loi[6] | ((int64_t)hii[6] << 31));
1100 
1101     /* a[7] * b[8] */
1102     t15 += (loi[7] | ((int64_t)hii[7] << 31));
1103 
1104     /* a[8] * b[8] */
1105     t16 += (loi[8] | ((int64_t)hii[8] << 31));
1106 
1107     /* a[9] * b[8] */
1108     t17 = (loi[9] | ((int64_t)hii[9] << 31));
1109 
1110 
1111     /* a[*] * b[9] */
1112     splat = Q6_V_vsplat_R(b[9]);
1113     vlow = Q6_Vw_vmpyieo_VhVh(av, splat);
1114     vlow = Q6_Vw_vmpyieacc_VwVwVuh(vlow, av, splat);
1115     vhi = Q6_Vw_vmpye_VwVuh(av, splat);
1116     vhi = Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(vhi, av, splat);
1117     loi = (unsigned int*)&vlow;
1118     hii = (int*)&vhi;
1119 
1120 
1121     /* a[0] * b[9] */
1122     t9 += (loi[0] | ((int64_t)hii[0] << 31));
1123 
1124     /* a[1] * b[9] */
1125     t10 += (loi[1] | ((int64_t)hii[1] << 31));
1126 
1127     /* a[2] * b[9] */
1128     t11 += (loi[2] | ((int64_t)hii[2] << 31));
1129 
1130     /* a[3] * b[9] */
1131     t12 += (loi[3] | ((int64_t)hii[3] << 31));
1132 
1133     /* a[4] * b[9] */
1134     t13 += (loi[4] | ((int64_t)hii[4] << 31));
1135 
1136     /* a[5] * b[9] */
1137     t14 += (loi[5] | ((int64_t)hii[5] << 31));
1138 
1139     /* a[6] * b[9] */
1140     t15 += (loi[6] | ((int64_t)hii[6] << 31));
1141 
1142     /* a[7] * b[9] */
1143     t16 += (loi[7] | ((int64_t)hii[7] << 31));
1144 
1145     /* a[8] * b[9] */
1146     t17 += (loi[8] | ((int64_t)hii[8] << 31));
1147 
1148     /* a[9] * b[9] */
1149     t18 = (loi[9] | ((int64_t)hii[9] << 31));
1150 
1151         t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
1152         t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
1153         t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
1154         t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
1155         t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
1156         t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
1157         t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
1158         t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
1159         t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
1160         t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
1161         t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
1162         t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
1163         t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
1164         t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
1165         t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
1166         t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
1167         t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
1168         t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
1169         r[19] = (sp_digit)(t18 >> 26);
1170                            r[18] = t18 & 0x3ffffff;
1171     }
1172 #endif
1173 }
1174 
1175 
1176 /* Multiply two Montgomery form numbers mod the modulus (prime).
1177  * (r = a * b mod m)
1178  *
1179  * r   Result of multiplication.
1180  * a   First number to multiply in Montgomery form.
1181  * b   Second number to multiply in Montgomery form.
1182  * m   Modulus (prime).
1183  * mp  Montgomery mulitplier.
1184  */
sp_256_mont_mul_10(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)1185 static void sp_256_mont_mul_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
1186         const sp_digit* m, sp_digit mp)
1187 {
1188     sp_256_mul_10(r, a, b);
1189     sp_256_mont_reduce_10(r, m, mp);
1190 }
1191 
1192 
1193 /* Square a and put result in r. (r = a * a)
1194  *
1195  * r  A single precision integer.
1196  * a  A single precision integer.
1197  */
sp_256_sqr_10(sp_digit * r,const sp_digit * a)1198 SP_NOINLINE static void sp_256_sqr_10(sp_digit* r, const sp_digit* a)
1199 {
1200     int64_t t0   = Q6_P_mpy_RR(a[0], a[0]);
1201     int64_t t1   = Q6_P_mpy_RR(a[0], a[1]) * 2;
1202     int64_t t2   = Q6_P_mpy_RR(a[0], a[2]) * 2
1203                  + Q6_P_mpy_RR(a[1], a[1]);
1204     int64_t t3   = (Q6_P_mpy_RR(a[0], a[3])
1205                  + Q6_P_mpy_RR(a[1], a[2])) * 2;
1206     int64_t t4   = (Q6_P_mpy_RR(a[ 0], a[ 4])
1207                  + Q6_P_mpy_RR(a[ 1], a[ 3])) * 2
1208                  + Q6_P_mpy_RR(a[ 2], a[ 2]);
1209     int64_t t5   = (Q6_P_mpy_RR(a[ 0], a[ 5])
1210                  + Q6_P_mpy_RR(a[ 1], a[ 4])
1211                  + Q6_P_mpy_RR(a[ 2], a[ 3])) * 2;
1212     int64_t t6   = (Q6_P_mpy_RR(a[ 0], a[ 6])
1213                  + Q6_P_mpy_RR(a[ 1], a[ 5])
1214                  + Q6_P_mpy_RR(a[ 2], a[ 4])) * 2
1215                  + Q6_P_mpy_RR(a[ 3], a[ 3]);
1216     int64_t t7   = (Q6_P_mpy_RR(a[ 0], a[ 7])
1217                  + Q6_P_mpy_RR(a[ 1], a[ 6])
1218                  + Q6_P_mpy_RR(a[ 2], a[ 5])
1219                  + Q6_P_mpy_RR(a[ 3], a[ 4])) * 2;
1220     int64_t t8   = (Q6_P_mpy_RR(a[ 0], a[ 8])
1221                  + Q6_P_mpy_RR(a[ 1], a[ 7])
1222                  + Q6_P_mpy_RR(a[ 2], a[ 6])
1223                  + Q6_P_mpy_RR(a[ 3], a[ 5])) * 2
1224                  + Q6_P_mpy_RR(a[ 4], a[ 4]);
1225     int64_t t9   = (Q6_P_mpy_RR(a[ 0], a[ 9])
1226                  + Q6_P_mpy_RR(a[ 1], a[ 8])
1227                  + Q6_P_mpy_RR(a[ 2], a[ 7])
1228                  + Q6_P_mpy_RR(a[ 3], a[ 6])
1229                  + Q6_P_mpy_RR(a[ 4], a[ 5])) * 2;
1230     int64_t t10  = (Q6_P_mpy_RR(a[ 1], a[ 9])
1231                  + Q6_P_mpy_RR(a[ 2], a[ 8])
1232                  + Q6_P_mpy_RR(a[ 3], a[ 7])
1233                  + Q6_P_mpy_RR(a[ 4], a[ 6])) * 2
1234                  + Q6_P_mpy_RR(a[ 5], a[ 5]);
1235     int64_t t11  = (Q6_P_mpy_RR(a[ 2], a[ 9])
1236                  + Q6_P_mpy_RR(a[ 3], a[ 8])
1237                  + Q6_P_mpy_RR(a[ 4], a[ 7])
1238                  + Q6_P_mpy_RR(a[ 5], a[ 6])) * 2;
1239     int64_t t12  = (Q6_P_mpy_RR(a[ 3], a[ 9])
1240                  + Q6_P_mpy_RR(a[ 4], a[ 8])
1241                  + Q6_P_mpy_RR(a[ 5], a[ 7])) * 2
1242                  + Q6_P_mpy_RR(a[ 6], a[ 6]);
1243     int64_t t13  = (Q6_P_mpy_RR(a[ 4], a[ 9])
1244                  + Q6_P_mpy_RR(a[ 5], a[ 8])
1245                  + Q6_P_mpy_RR(a[ 6], a[ 7])) * 2;
1246     int64_t t14  = (Q6_P_mpy_RR(a[ 5], a[ 9])
1247                  + Q6_P_mpy_RR(a[ 6], a[ 8])) * 2
1248                  + Q6_P_mpy_RR(a[ 7], a[ 7]);
1249     int64_t t15  =( Q6_P_mpy_RR(a[ 6], a[ 9])
1250                  + Q6_P_mpy_RR(a[ 7], a[ 8])) * 2;
1251     int64_t t16  = Q6_P_mpy_RR(a[ 7], a[ 9]) * 2
1252                  + Q6_P_mpy_RR(a[ 8], a[ 8]);
1253     int64_t t17  = Q6_P_mpy_RR(a[ 8], a[ 9]) * 2;
1254     int64_t t18  = Q6_P_mpy_RR(a[ 9], a[ 9]);
1255 
1256     t1   += t0  >> 26; r[ 0] = t0  & 0x3ffffff;
1257     t2   += t1  >> 26; r[ 1] = t1  & 0x3ffffff;
1258     t3   += t2  >> 26; r[ 2] = t2  & 0x3ffffff;
1259     t4   += t3  >> 26; r[ 3] = t3  & 0x3ffffff;
1260     t5   += t4  >> 26; r[ 4] = t4  & 0x3ffffff;
1261     t6   += t5  >> 26; r[ 5] = t5  & 0x3ffffff;
1262     t7   += t6  >> 26; r[ 6] = t6  & 0x3ffffff;
1263     t8   += t7  >> 26; r[ 7] = t7  & 0x3ffffff;
1264     t9   += t8  >> 26; r[ 8] = t8  & 0x3ffffff;
1265     t10  += t9  >> 26; r[ 9] = t9  & 0x3ffffff;
1266     t11  += t10 >> 26; r[10] = t10 & 0x3ffffff;
1267     t12  += t11 >> 26; r[11] = t11 & 0x3ffffff;
1268     t13  += t12 >> 26; r[12] = t12 & 0x3ffffff;
1269     t14  += t13 >> 26; r[13] = t13 & 0x3ffffff;
1270     t15  += t14 >> 26; r[14] = t14 & 0x3ffffff;
1271     t16  += t15 >> 26; r[15] = t15 & 0x3ffffff;
1272     t17  += t16 >> 26; r[16] = t16 & 0x3ffffff;
1273     t18  += t17 >> 26; r[17] = t17 & 0x3ffffff;
1274     r[19] = (sp_digit)(t18 >> 26);
1275                        r[18] = t18 & 0x3ffffff;
1276 }
1277 
1278 
1279 /* Square the Montgomery form number. (r = a * a mod m)
1280  *
1281  * r   Result of squaring.
1282  * a   Number to square in Montgomery form.
1283  * m   Modulus (prime).
1284  * mp  Montgomery mulitplier.
1285  */
sp_256_mont_sqr_10(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)1286 static void sp_256_mont_sqr_10(sp_digit* r, const sp_digit* a, const sp_digit* m,
1287         sp_digit mp)
1288 {
1289     sp_256_sqr_10(r, a);
1290     sp_256_mont_reduce_10(r, m, mp);
1291 }
1292 
1293 #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
1294 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
1295  *
1296  * r   Result of squaring.
1297  * a   Number to square in Montgomery form.
1298  * n   Number of times to square.
1299  * m   Modulus (prime).
1300  * mp  Montgomery mulitplier.
1301  */
sp_256_mont_sqr_n_10(sp_digit * r,const sp_digit * a,int n,const sp_digit * m,sp_digit mp)1302 static void sp_256_mont_sqr_n_10(sp_digit* r, const sp_digit* a, int n,
1303         const sp_digit* m, sp_digit mp)
1304 {
1305     sp_256_mont_sqr_10(r, a, m, mp);
1306     for (; n > 1; n--) {
1307         sp_256_mont_sqr_10(r, r, m, mp);
1308     }
1309 }
1310 
1311 #endif /* !WOLFSSL_SP_SMALL || HAVE_COMP_KEY */
1312 #ifdef WOLFSSL_SP_SMALL
1313 /* Mod-2 for the P256 curve. */
1314 static const uint32_t p256_mod_2[8] = {
1315     0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
1316     0x00000001U,0xffffffffU
1317 };
1318 #endif /* !WOLFSSL_SP_SMALL */
1319 
1320 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
1321  * P256 curve. (r = 1 / a mod m)
1322  *
1323  * r   Inverse result.
1324  * a   Number to invert.
1325  * td  Temporary data.
1326  */
sp_256_mont_inv_10(sp_digit * r,const sp_digit * a,sp_digit * td)1327 static void sp_256_mont_inv_10(sp_digit* r, const sp_digit* a, sp_digit* td)
1328 {
1329 #ifdef WOLFSSL_SP_SMALL
1330     sp_digit* t = td;
1331     int i;
1332 
1333     XMEMCPY(t, a, sizeof(sp_digit) * 10);
1334     for (i=254; i>=0; i--) {
1335         sp_256_mont_sqr_10(t, t, p256_mod, p256_mp_mod);
1336         if (p256_mod_2[i / 32] & ((sp_digit)1 << (i % 32)))
1337             sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
1338     }
1339     XMEMCPY(r, t, sizeof(sp_digit) * 10);
1340 #else
1341     sp_digit* t = td;
1342     sp_digit* t2 = td + Q6_P_mpy_RR(2, 10);
1343     sp_digit* t3 = td + Q6_P_mpy_RR(4, 10);
1344 
1345     /* t = a^2 */
1346     sp_256_mont_sqr_10(t, a, p256_mod, p256_mp_mod);
1347     /* t = a^3 = t * a */
1348     sp_256_mont_mul_10(t, t, a, p256_mod, p256_mp_mod);
1349     /* t2= a^c = t ^ 2 ^ 2 */
1350     sp_256_mont_sqr_n_10(t2, t, 2, p256_mod, p256_mp_mod);
1351     /* t3= a^d = t2 * a */
1352     sp_256_mont_mul_10(t3, t2, a, p256_mod, p256_mp_mod);
1353     /* t = a^f = t2 * t */
1354     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
1355     /* t2= a^f0 = t ^ 2 ^ 4 */
1356     sp_256_mont_sqr_n_10(t2, t, 4, p256_mod, p256_mp_mod);
1357     /* t3= a^fd = t2 * t3 */
1358     sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
1359     /* t = a^ff = t2 * t */
1360     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
1361     /* t2= a^ff00 = t ^ 2 ^ 8 */
1362     sp_256_mont_sqr_n_10(t2, t, 8, p256_mod, p256_mp_mod);
1363     /* t3= a^fffd = t2 * t3 */
1364     sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
1365     /* t = a^ffff = t2 * t */
1366     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
1367     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
1368     sp_256_mont_sqr_n_10(t2, t, 16, p256_mod, p256_mp_mod);
1369     /* t3= a^fffffffd = t2 * t3 */
1370     sp_256_mont_mul_10(t3, t2, t3, p256_mod, p256_mp_mod);
1371     /* t = a^ffffffff = t2 * t */
1372     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
1373     /* t = a^ffffffff00000000 = t ^ 2 ^ 32  */
1374     sp_256_mont_sqr_n_10(t2, t, 32, p256_mod, p256_mp_mod);
1375     /* t2= a^ffffffffffffffff = t2 * t */
1376     sp_256_mont_mul_10(t, t2, t, p256_mod, p256_mp_mod);
1377     /* t2= a^ffffffff00000001 = t2 * a */
1378     sp_256_mont_mul_10(t2, t2, a, p256_mod, p256_mp_mod);
1379     /* t2= a^ffffffff000000010000000000000000000000000000000000000000
1380      *   = t2 ^ 2 ^ 160 */
1381     sp_256_mont_sqr_n_10(t2, t2, 160, p256_mod, p256_mp_mod);
1382     /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff
1383      *   = t2 * t */
1384     sp_256_mont_mul_10(t2, t2, t, p256_mod, p256_mp_mod);
1385     /* t2= a^ffffffff00000001000000000000000000000000ffffffffffffffff00000000
1386      *   = t2 ^ 2 ^ 32 */
1387     sp_256_mont_sqr_n_10(t2, t2, 32, p256_mod, p256_mp_mod);
1388     /* r = a^ffffffff00000001000000000000000000000000fffffffffffffffffffffffd
1389      *   = t2 * t3 */
1390     sp_256_mont_mul_10(r, t2, t3, p256_mod, p256_mp_mod);
1391 #endif /* WOLFSSL_SP_SMALL */
1392 }
1393 
1394 
1395 /* Map the Montgomery form projective co-ordinate point to an affine point.
1396  *
1397  * r  Resulting affine co-ordinate point.
1398  * p  Montgomery form projective co-ordinate point.
1399  * t  Temporary ordinate data.
1400  */
sp_256_map_10(sp_point * r,const sp_point * p,sp_digit * t)1401 static void sp_256_map_10(sp_point* r, const sp_point* p, sp_digit* t)
1402 {
1403     sp_digit* t1 = t;
1404     sp_digit* t2 = t + Q6_P_mpy_RR(2, 10);
1405     int32_t n;
1406 
1407     sp_256_mont_inv_10(t1, p->z, t + 2*10);
1408 
1409     sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
1410     sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
1411 
1412     /* x /= z^2 */
1413     sp_256_mont_mul_10(r->x, p->x, t2, p256_mod, p256_mp_mod);
1414     XMEMSET(r->x + 10, 0, sizeof(r->x) / 2U);
1415     sp_256_mont_reduce_10(r->x, p256_mod, p256_mp_mod);
1416     /* Reduce x to less than modulus */
1417     n = sp_256_cmp_10(r->x, p256_mod);
1418     sp_256_cond_sub_10(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
1419                 (sp_digit)1 : (sp_digit)0));
1420     sp_256_norm_10(r->x);
1421 
1422     /* y /= z^3 */
1423     sp_256_mont_mul_10(r->y, p->y, t1, p256_mod, p256_mp_mod);
1424     XMEMSET(r->y + 10, 0, sizeof(r->y) / 2U);
1425     sp_256_mont_reduce_10(r->y, p256_mod, p256_mp_mod);
1426     /* Reduce y to less than modulus */
1427     n = sp_256_cmp_10(r->y, p256_mod);
1428     sp_256_cond_sub_10(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
1429                 (sp_digit)1 : (sp_digit)0));
1430     sp_256_norm_10(r->y);
1431 
1432     XMEMSET(r->z, 0, sizeof(r->z));
1433     r->z[0] = 1;
1434 
1435 }
1436 
1437 
1438 /* Add b to a into r. (r = a + b)
1439  *
1440  * r  A single precision integer.
1441  * a  A single precision integer.
1442  * b  A single precision integer.
1443  */
sp_256_add_10(sp_digit * r,const sp_digit * a,const sp_digit * b)1444 SP_NOINLINE static int sp_256_add_10(sp_digit* r, const sp_digit* a,
1445         const sp_digit* b)
1446 {
1447 #if 0
1448     r[ 0] = Q6_R_add_RR(a[0], b[0]);
1449     r[ 1] = Q6_R_add_RR(a[1], b[1]);
1450     r[ 2] = Q6_R_add_RR(a[2], b[2]);
1451     r[ 3] = Q6_R_add_RR(a[3], b[3]);
1452     r[ 4] = Q6_R_add_RR(a[4], b[4]);
1453     r[ 5] = Q6_R_add_RR(a[5], b[5]);
1454     r[ 6] = Q6_R_add_RR(a[6], b[6]);
1455     r[ 7] = Q6_R_add_RR(a[7], b[7]);
1456     r[ 8] = Q6_R_add_RR(a[8], b[8]);
1457     r[ 9] = Q6_R_add_RR(a[9], b[9]);
1458 #endif
1459 #if 1
1460     __asm__ __volatile__ (
1461         "{ r1 = memw(%[a]+#0)  \n"
1462         "  r2 = memw(%[b]+#0) }\n"
1463         "{ r3 = memw(%[a]+#4)  \n"
1464         "  r19 = add(r1,r2)    \n"
1465         "  r4 = memw(%[b]+#4) }\n"
1466         "{ r5 = memw(%[a]+#8)  \n"
1467         "  r20 = add(r3,r4)    \n"
1468         "  r6 = memw(%[b]+#8) }\n"
1469         "{ memw(%[r]+#0) = r19 }\n"
1470         "{ r7 = memw(%[a]+#12)  \n"
1471         "  r21 = add(r5,r6)    \n"
1472         "  r8 = memw(%[b]+#12) }\n"
1473         "{ memw(%[r]+#4) = r20 }\n"
1474         "{ r9 = memw(%[a]+#16)  \n"
1475         "  r22 = add(r7,r8)     \n"
1476         "  r10 = memw(%[b]+#16) }\n"
1477         "{ memw(%[r]+#8) = r21 }\n"
1478         "{ r11 = memw(%[a]+#20)  \n"
1479         "  r23 = add(r9,r10)     \n"
1480         "  r12 = memw(%[b]+#20) }\n"
1481         "{ memw(%[r]+#12) = r22 }\n"
1482         "{ r13 = memw(%[a]+#24)  \n"
1483         "  r24 = add(r11,r12)     \n"
1484         "  r14 = memw(%[b]+#24) }\n"
1485         "{ memw(%[r]+#16) = r23 }\n"
1486         "{ r15 = memw(%[a]+#28)  \n"
1487         "  r25 = add(r13,r14)     \n"
1488         "  r16 = memw(%[b]+#28) }\n"
1489         "{ memw(%[r]+#20) = r24 }\n"
1490         "{ r17 = memw(%[a]+#32)  \n"
1491         "  r26 = add(r15,r16)     \n"
1492         "  r18 = memw(%[b]+#32) }\n"
1493         "{ memw(%[r]+#24) = r25 }\n"
1494         "{ r5 = memw(%[a]+#36)  \n"
1495         "  r19 = add(r17,r18)    \n"
1496         "  r6 = memw(%[b]+#36) }\n"
1497         "{ memw(%[r]+#28) = r26 }\n"
1498         "{ r20 = add(r5,r6)      \n"
1499             "  memw(%[r]+#32) = r19 }\n"
1500         "{ memw(%[r]+#36) = r20 }\n"
1501         : [r] "+r" (r)
1502         : [a] "r"(a), [b] "r"(b)
1503         : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26"
1504     );
1505 #endif
1506     return 0;
1507 }
1508 
1509 
1510 /* Add two Montgomery form numbers (r = a + b % m).
1511  *
1512  * r   Result of addition.
1513  * a   First number to add in Montgomery form.
1514  * b   Second number to add in Montgomery form.
1515  * m   Modulus (prime).
1516  */
sp_256_mont_add_10(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m)1517 static void sp_256_mont_add_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
1518         const sp_digit* m)
1519 {
1520     (void)sp_256_add_10(r, a, b);
1521     sp_256_norm_10(r);
1522     sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
1523                 (sp_digit)1 : (sp_digit)0));
1524     sp_256_norm_10(r);
1525 }
1526 
1527 
1528 /* Double a Montgomery form number (r = a + a % m).
1529  *
1530  * r   Result of doubling.
1531  * a   Number to double in Montgomery form.
1532  * m   Modulus (prime).
1533  */
sp_256_mont_dbl_10(sp_digit * r,const sp_digit * a,const sp_digit * m)1534 static void sp_256_mont_dbl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
1535 {
1536     (void)sp_256_add_10(r, a, a);
1537     sp_256_norm_10(r);
1538     sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
1539                 (sp_digit)1 : (sp_digit)0));
1540     sp_256_norm_10(r);
1541 }
1542 
1543 
1544 /* Triple a Montgomery form number (r = a + a + a % m).
1545  *
1546  * r   Result of Tripling.
1547  * a   Number to triple in Montgomery form.
1548  * m   Modulus (prime).
1549  */
sp_256_mont_tpl_10(sp_digit * r,const sp_digit * a,const sp_digit * m)1550 static void sp_256_mont_tpl_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
1551 {
1552     (void)sp_256_add_10(r, a, a);
1553     sp_256_norm_10(r);
1554     sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
1555                 (sp_digit)1 : (sp_digit)0));
1556     sp_256_norm_10(r);
1557     (void)sp_256_add_10(r, r, a);
1558     sp_256_norm_10(r);
1559     sp_256_cond_sub_10(r, r, m, 0 - (((r[9] >> 22) > 0) ?
1560                 (sp_digit)1 : (sp_digit)0));
1561     sp_256_norm_10(r);
1562 }
1563 
1564 /* Sub b from a into r. (r = a - b)
1565  *
1566  * r  A single precision integer.
1567  * a  A single precision integer.
1568  * b  A single precision integer.
1569  */
sp_256_sub_10(sp_digit * r,const sp_digit * a,const sp_digit * b)1570 SP_NOINLINE static int sp_256_sub_10(sp_digit* r, const sp_digit* a,
1571         const sp_digit* b)
1572 {
1573 #if 0
1574     r[ 0] = Q6_R_sub_RR(a[0], b[0]);
1575     r[ 1] = Q6_R_sub_RR(a[1], b[1]);
1576     r[ 2] = Q6_R_sub_RR(a[2], b[2]);
1577     r[ 3] = Q6_R_sub_RR(a[3], b[3]);
1578     r[ 4] = Q6_R_sub_RR(a[4], b[4]);
1579     r[ 5] = Q6_R_sub_RR(a[5], b[5]);
1580     r[ 6] = Q6_R_sub_RR(a[6], b[6]);
1581     r[ 7] = Q6_R_sub_RR(a[7], b[7]);
1582     r[ 8] = Q6_R_sub_RR(a[8], b[8]);
1583     r[ 9] = Q6_R_sub_RR(a[9], b[9]);
1584 #endif
1585 #if 1
1586     __asm__ __volatile__ (
1587         "{ r1 = memw(%[a]+#0)  \n"
1588         "  r2 = memw(%[b]+#0) }\n"
1589         "{ r3 = memw(%[a]+#4)  \n"
1590         "  r19 = sub(r1,r2)    \n"
1591         "  r4 = memw(%[b]+#4) }\n"
1592         "{ r5 = memw(%[a]+#8)  \n"
1593         "  r20 = sub(r3,r4)    \n"
1594         "  r6 = memw(%[b]+#8) }\n"
1595         "{ memw(%[r]+#0) = r19 }\n"
1596         "{ r7 = memw(%[a]+#12)  \n"
1597         "  r21 = sub(r5,r6)    \n"
1598         "  r8 = memw(%[b]+#12) }\n"
1599         "{ memw(%[r]+#4) = r20 }\n"
1600         "{ r9 = memw(%[a]+#16)  \n"
1601         "  r22 = sub(r7,r8)     \n"
1602         "  r10 = memw(%[b]+#16) }\n"
1603         "{ memw(%[r]+#8) = r21 }\n"
1604         "{ r11 = memw(%[a]+#20)  \n"
1605         "  r23 = sub(r9,r10)     \n"
1606         "  r12 = memw(%[b]+#20) }\n"
1607         "{ memw(%[r]+#12) = r22 }\n"
1608         "{ r13 = memw(%[a]+#24)  \n"
1609         "  r24 = sub(r11,r12)     \n"
1610         "  r14 = memw(%[b]+#24) }\n"
1611         "{ memw(%[r]+#16) = r23 }\n"
1612         "{ r15 = memw(%[a]+#28)  \n"
1613         "  r25 = sub(r13,r14)     \n"
1614         "  r16 = memw(%[b]+#28) }\n"
1615         "{ memw(%[r]+#20) = r24 }\n"
1616         "{ r17 = memw(%[a]+#32)  \n"
1617         "  r26 = sub(r15,r16)     \n"
1618         "  r18 = memw(%[b]+#32) }\n"
1619         "{ memw(%[r]+#24) = r25 }\n"
1620         "{ r5 = memw(%[a]+#36)  \n"
1621         "  r19 = sub(r17,r18)    \n"
1622         "  r6 = memw(%[b]+#36) }\n"
1623         "{ memw(%[r]+#28) = r26 }\n"
1624         "{ r20 = sub(r5,r6)      \n"
1625             "  memw(%[r]+#32) = r19 }\n"
1626         "{ memw(%[r]+#36) = r20 }\n"
1627         : [r] "+r" (r)
1628         : [a] "r"(a), [b] "r"(b)
1629         : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26"
1630     );
1631 #endif
1632     return 0;
1633 }
1634 
1635 /* Conditionally add a and b using the mask m.
1636  * m is -1 to add and 0 when not.
1637  *
1638  * r  A single precision number representing conditional add result.
1639  * a  A single precision number to add with.
1640  * b  A single precision number to add.
1641  * m  Mask value to apply.
1642  */
sp_256_cond_add_10(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit m)1643 static void sp_256_cond_add_10(sp_digit* r, const sp_digit* a,
1644         const sp_digit* b, const sp_digit m)
1645 {
1646 #ifdef WOLFSSL_SP_SMALL
1647     int i;
1648 
1649     for (i = 0; i < 10; i++) {
1650         r[i] = a[i] + (b[i] & m);
1651     }
1652 #else
1653     r[ 0] = Q6_R_add_RR(a[ 0], Q6_R_and_RR(b[ 0], m));
1654     r[ 1] = Q6_R_add_RR(a[ 1], Q6_R_and_RR(b[ 1], m));
1655     r[ 2] = Q6_R_add_RR(a[ 2], Q6_R_and_RR(b[ 2], m));
1656     r[ 3] = Q6_R_add_RR(a[ 3], Q6_R_and_RR(b[ 3], m));
1657     r[ 4] = Q6_R_add_RR(a[ 4], Q6_R_and_RR(b[ 4], m));
1658     r[ 5] = Q6_R_add_RR(a[ 5], Q6_R_and_RR(b[ 5], m));
1659     r[ 6] = Q6_R_add_RR(a[ 6], Q6_R_and_RR(b[ 6], m));
1660     r[ 7] = Q6_R_add_RR(a[ 7], Q6_R_and_RR(b[ 7], m));
1661     r[ 8] = Q6_R_add_RR(a[ 8], Q6_R_and_RR(b[ 8], m));
1662     r[ 9] = Q6_R_add_RR(a[ 9], Q6_R_and_RR(b[ 9], m));
1663 #endif /* WOLFSSL_SP_SMALL */
1664 }
1665 
1666 
1667 /* Subtract two Montgomery form numbers (r = a - b % m).
1668  *
1669  * r   Result of subtration.
1670  * a   Number to subtract from in Montgomery form.
1671  * b   Number to subtract with in Montgomery form.
1672  * m   Modulus (prime).
1673  */
sp_256_mont_sub_10(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m)1674 static void sp_256_mont_sub_10(sp_digit* r, const sp_digit* a, const sp_digit* b,
1675         const sp_digit* m)
1676 {
1677     (void)sp_256_sub_10(r, a, b);
1678     sp_256_cond_add_10(r, r, m, r[9] >> 22);
1679     sp_256_norm_10(r);
1680 }
1681 
1682 
1683 /* Shift number left one bit.
1684  * Bottom bit is lost.
1685  *
1686  * r  Result of shift.
1687  * a  Number to shift.
1688  */
sp_256_rshift1_10(sp_digit * r,sp_digit * a)1689 SP_NOINLINE static void sp_256_rshift1_10(sp_digit* r, sp_digit* a)
1690 {
1691 #ifdef WOLFSSL_SP_SMALL
1692     int i;
1693 
1694     for (i=0; i<9; i++) {
1695         r[i] = ((a[i] >> 1) | (a[i + 1] << 25)) & 0x3ffffff;
1696     }
1697 #else
1698     r[0] = ((a[0] >> 1) | Q6_R_and_RR((a[1] << 25), 0x3ffffff));
1699     r[1] = ((a[1] >> 1) | Q6_R_and_RR((a[2] << 25), 0x3ffffff));
1700     r[2] = ((a[2] >> 1) | Q6_R_and_RR((a[3] << 25), 0x3ffffff));
1701     r[3] = ((a[3] >> 1) | Q6_R_and_RR((a[4] << 25), 0x3ffffff));
1702     r[4] = ((a[4] >> 1) | Q6_R_and_RR((a[5] << 25), 0x3ffffff));
1703     r[5] = ((a[5] >> 1) | Q6_R_and_RR((a[6] << 25), 0x3ffffff));
1704     r[6] = ((a[6] >> 1) | Q6_R_and_RR((a[7] << 25), 0x3ffffff));
1705     r[7] = ((a[7] >> 1) | Q6_R_and_RR((a[8] << 25), 0x3ffffff));
1706     r[8] = ((a[8] >> 1) | Q6_R_and_RR((a[9] << 25), 0x3ffffff));
1707 #endif
1708     r[9] = a[9] >> 1;
1709 }
1710 
1711 
1712 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
1713  *
1714  * r  Result of division by 2.
1715  * a  Number to divide.
1716  * m  Modulus (prime).
1717  */
sp_256_div2_10(sp_digit * r,const sp_digit * a,const sp_digit * m)1718 static void sp_256_div2_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
1719 {
1720     sp_256_cond_add_10(r, a, m, 0 - (a[0] & 1));
1721     sp_256_norm_10(r);
1722     sp_256_rshift1_10(r, r);
1723 }
1724 
1725 
1726 /* Double the Montgomery form projective point p.
1727  *
1728  * r  Result of doubling point.
1729  * p  Point to double.
1730  * t  Temporary ordinate data.
1731  */
sp_256_proj_point_dbl_10(sp_point * r,const sp_point * p,sp_digit * t)1732 static void sp_256_proj_point_dbl_10(sp_point* r, const sp_point* p, sp_digit* t)
1733 {
1734     sp_point* rp[2];
1735     sp_digit* t1 = t;
1736     sp_digit* t2 = t + 2*10;
1737     sp_digit* x;
1738     sp_digit* y;
1739     sp_digit* z;
1740     int i;
1741 
1742     /* When infinity don't double point passed in - constant time. */
1743     rp[0] = r;
1744 
1745     /*lint allow cast to different type of pointer*/
1746     rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
1747     XMEMSET(rp[1], 0, sizeof(sp_point));
1748     x = rp[p->infinity]->x;
1749     y = rp[p->infinity]->y;
1750     z = rp[p->infinity]->z;
1751     /* Put point to double into result - good for infinity. */
1752     if (r != p) {
1753         for (i=0; i<10; i++) {
1754             r->x[i] = p->x[i];
1755         }
1756         for (i=0; i<10; i++) {
1757             r->y[i] = p->y[i];
1758         }
1759         for (i=0; i<10; i++) {
1760             r->z[i] = p->z[i];
1761         }
1762         r->infinity = p->infinity;
1763     }
1764 
1765     /* T1 = Z * Z */
1766     sp_256_mont_sqr_10(t1, z, p256_mod, p256_mp_mod);
1767     /* Z = Y * Z */
1768     sp_256_mont_mul_10(z, y, z, p256_mod, p256_mp_mod);
1769     /* Z = 2Z */
1770     sp_256_mont_dbl_10(z, z, p256_mod);
1771     /* T2 = X - T1 */
1772     sp_256_mont_sub_10(t2, x, t1, p256_mod);
1773     /* T1 = X + T1 */
1774     sp_256_mont_add_10(t1, x, t1, p256_mod);
1775     /* T2 = T1 * T2 */
1776     sp_256_mont_mul_10(t2, t1, t2, p256_mod, p256_mp_mod);
1777     /* T1 = 3T2 */
1778     sp_256_mont_tpl_10(t1, t2, p256_mod);
1779     /* Y = 2Y */
1780     sp_256_mont_dbl_10(y, y, p256_mod);
1781     /* Y = Y * Y */
1782     sp_256_mont_sqr_10(y, y, p256_mod, p256_mp_mod);
1783     /* T2 = Y * Y */
1784     sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
1785     /* T2 = T2/2 */
1786     sp_256_div2_10(t2, t2, p256_mod);
1787     /* Y = Y * X */
1788     sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
1789     /* X = T1 * T1 */
1790     sp_256_mont_mul_10(x, t1, t1, p256_mod, p256_mp_mod);
1791     /* X = X - Y */
1792     sp_256_mont_sub_10(x, x, y, p256_mod);
1793     /* X = X - Y */
1794     sp_256_mont_sub_10(x, x, y, p256_mod);
1795     /* Y = Y - X */
1796     sp_256_mont_sub_10(y, y, x, p256_mod);
1797     /* Y = Y * T1 */
1798     sp_256_mont_mul_10(y, y, t1, p256_mod, p256_mp_mod);
1799     /* Y = Y - T2 */
1800     sp_256_mont_sub_10(y, y, t2, p256_mod);
1801 
1802 }
1803 
1804 
1805 /* Compare two numbers to determine if they are equal.
1806  * Constant time implementation.
1807  *
1808  * a  First number to compare.
1809  * b  Second number to compare.
1810  * returns 1 when equal and 0 otherwise.
1811  */
sp_256_cmp_equal_10(const sp_digit * a,const sp_digit * b)1812 static int sp_256_cmp_equal_10(const sp_digit* a, const sp_digit* b)
1813 {
1814     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) |
1815             (a[4] ^ b[4]) | (a[5] ^ b[5]) | (a[6] ^ b[6]) | (a[7] ^ b[7]) |
1816             (a[8] ^ b[8]) | (a[9] ^ b[9])) == 0;
1817 }
1818 
1819 /* Add two Montgomery form projective points.
1820  *
1821  * r  Result of addition.
1822  * p  First point to add.
1823  * q  Second point to add.
1824  * t  Temporary ordinate data.
1825  */
sp_256_proj_point_add_10(sp_point * r,const sp_point * p,const sp_point * q,sp_digit * t)1826 static void sp_256_proj_point_add_10(sp_point* r, const sp_point* p, const sp_point* q,
1827         sp_digit* t)
1828 {
1829     const sp_point* ap[2];
1830     sp_point* rp[2];
1831     sp_digit* t1 = t;
1832     sp_digit* t2 = t + 2*10;
1833     sp_digit* t3 = t + 4*10;
1834     sp_digit* t4 = t + 6*10;
1835     sp_digit* t5 = t + 8*10;
1836     sp_digit* x;
1837     sp_digit* y;
1838     sp_digit* z;
1839     int i;
1840 
1841     /* Ensure only the first point is the same as the result. */
1842     if (q == r) {
1843         const sp_point* a = p;
1844         p = q;
1845         q = a;
1846     }
1847 
1848     /* Check double */
1849     (void)sp_256_sub_10(t1, p256_mod, q->y);
1850     sp_256_norm_10(t1);
1851     if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
1852         (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
1853         sp_256_proj_point_dbl_10(r, p, t);
1854     }
1855     else {
1856         rp[0] = r;
1857 
1858         /*lint allow cast to different type of pointer*/
1859         rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
1860         XMEMSET(rp[1], 0, sizeof(sp_point));
1861         x = rp[p->infinity | q->infinity]->x;
1862         y = rp[p->infinity | q->infinity]->y;
1863         z = rp[p->infinity | q->infinity]->z;
1864 
1865         ap[0] = p;
1866         ap[1] = q;
1867         for (i=0; i<10; i++) {
1868             r->x[i] = ap[p->infinity]->x[i];
1869         }
1870         for (i=0; i<10; i++) {
1871             r->y[i] = ap[p->infinity]->y[i];
1872         }
1873         for (i=0; i<10; i++) {
1874             r->z[i] = ap[p->infinity]->z[i];
1875         }
1876         r->infinity = ap[p->infinity]->infinity;
1877 
1878         /* U1 = X1*Z2^2 */
1879         sp_256_mont_sqr_10(t1, q->z, p256_mod, p256_mp_mod);
1880         sp_256_mont_mul_10(t3, t1, q->z, p256_mod, p256_mp_mod);
1881         sp_256_mont_mul_10(t1, t1, x, p256_mod, p256_mp_mod);
1882         /* U2 = X2*Z1^2 */
1883         sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
1884         sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
1885         sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
1886         /* S1 = Y1*Z2^3 */
1887         sp_256_mont_mul_10(t3, t3, y, p256_mod, p256_mp_mod);
1888         /* S2 = Y2*Z1^3 */
1889         sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
1890         /* H = U2 - U1 */
1891         sp_256_mont_sub_10(t2, t2, t1, p256_mod);
1892         /* R = S2 - S1 */
1893         sp_256_mont_sub_10(t4, t4, t3, p256_mod);
1894         /* Z3 = H*Z1*Z2 */
1895         sp_256_mont_mul_10(z, z, q->z, p256_mod, p256_mp_mod);
1896         sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
1897         /* X3 = R^2 - H^3 - 2*U1*H^2 */
1898         sp_256_mont_sqr_10(x, t4, p256_mod, p256_mp_mod);
1899         sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
1900         sp_256_mont_mul_10(y, t1, t5, p256_mod, p256_mp_mod);
1901         sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
1902         sp_256_mont_sub_10(x, x, t5, p256_mod);
1903         sp_256_mont_dbl_10(t1, y, p256_mod);
1904         sp_256_mont_sub_10(x, x, t1, p256_mod);
1905         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
1906         sp_256_mont_sub_10(y, y, x, p256_mod);
1907         sp_256_mont_mul_10(y, y, t4, p256_mod, p256_mp_mod);
1908         sp_256_mont_mul_10(t5, t5, t3, p256_mod, p256_mp_mod);
1909         sp_256_mont_sub_10(y, y, t5, p256_mod);
1910     }
1911 }
1912 
1913 #ifdef WOLFSSL_SP_SMALL
1914 /* Multiply the point by the scalar and return the result.
1915  * If map is true then convert result to affine co-ordinates.
1916  *
1917  * r     Resulting point.
1918  * g     Point to multiply.
1919  * k     Scalar to multiply by.
1920  * map   Indicates whether to convert result to affine.
1921  * heap  Heap to use for allocation.
1922  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
1923  */
sp_256_ecc_mulmod_10(sp_point * r,const sp_point * g,const sp_digit * k,int map,void * heap)1924 static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
1925         int map, void* heap)
1926 {
1927     sp_point* td;
1928     sp_point* t[3];
1929     sp_digit* tmp;
1930     sp_digit n;
1931     int i;
1932     int c, y;
1933     int err = MP_OKAY;
1934 
1935     (void)heap;
1936 
1937     td = (sp_point*)XMALLOC(sizeof(sp_point) * 3, heap, DYNAMIC_TYPE_ECC);
1938     if (td == NULL)
1939         err = MEMORY_E;
1940     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
1941                                                               DYNAMIC_TYPE_ECC);
1942     if (tmp == NULL)
1943         err = MEMORY_E;
1944 
1945     if (err == MP_OKAY) {
1946         XMEMSET(td, 0, sizeof(*td) * 3);
1947 
1948         t[0] = &td[0];
1949         t[1] = &td[1];
1950         t[2] = &td[2];
1951 
1952         /* t[0] = {0, 0, 1} * norm */
1953         t[0]->infinity = 1;
1954         /* t[1] = {g->x, g->y, g->z} * norm */
1955         err = sp_256_mod_mul_norm_10(t[1]->x, g->x, p256_mod);
1956     }
1957     if (err == MP_OKAY)
1958         err = sp_256_mod_mul_norm_10(t[1]->y, g->y, p256_mod);
1959     if (err == MP_OKAY)
1960         err = sp_256_mod_mul_norm_10(t[1]->z, g->z, p256_mod);
1961 
1962     if (err == MP_OKAY) {
1963         i = 9;
1964         c = 22;
1965         n = k[i--] << (26 - c);
1966         for (; ; c--) {
1967             if (c == 0) {
1968                 if (i == -1)
1969                     break;
1970 
1971                 n = k[i--];
1972                 c = 26;
1973             }
1974 
1975             y = (n >> 25) & 1;
1976             n <<= 1;
1977 
1978             sp_256_proj_point_add_10(t[y^1], t[0], t[1], tmp);
1979 
1980             XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) +
1981                                   ((size_t)t[1] & addr_mask[y])),
1982                     sizeof(sp_point));
1983             sp_256_proj_point_dbl_10(t[2], t[2], tmp);
1984             XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) +
1985                             ((size_t)t[1] & addr_mask[y])), t[2],
1986                     sizeof(sp_point));
1987         }
1988 
1989         if (map != 0) {
1990             sp_256_map_10(r, t[0], tmp);
1991         }
1992         else {
1993             XMEMCPY(r, t[0], sizeof(sp_point));
1994         }
1995     }
1996 
1997     if (tmp != NULL) {
1998         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
1999         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
2000     }
2001     if (td != NULL) {
2002         XMEMSET(td, 0, sizeof(sp_point) * 3);
2003         XFREE(td, NULL, DYNAMIC_TYPE_ECC);
2004     }
2005 
2006     return err;
2007 }
2008 
2009 #elif !defined(WC_NO_CACHE_RESISTANT)
2010 /* Multiply the point by the scalar and return the result.
2011  * If map is true then convert result to affine co-ordinates.
2012  *
2013  * r     Resulting point.
2014  * g     Point to multiply.
2015  * k     Scalar to multiply by.
2016  * map   Indicates whether to convert result to affine.
2017  * heap  Heap to use for allocation.
2018  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
2019  */
sp_256_ecc_mulmod_10(sp_point * r,const sp_point * g,const sp_digit * k,int map,void * heap)2020 static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
2021         int map, void* heap)
2022 {
2023 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
2024     sp_point td[3];
2025     sp_digit tmpd[2 * 10 * 5];
2026 #endif
2027     sp_point* t;
2028     sp_digit* tmp;
2029     sp_digit n;
2030     int i;
2031     int c, y;
2032     int err = MP_OKAY;
2033 
2034     (void)heap;
2035 
2036 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
2037     sp_point td[3];
2038     t = (sp_point*)XMALLOC(sizeof(*td) * 3, heap, DYNAMIC_TYPE_ECC);
2039     if (t == NULL)
2040         err = MEMORY_E;
2041     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
2042                              DYNAMIC_TYPE_ECC);
2043     if (tmp == NULL)
2044         err = MEMORY_E;
2045 #else
2046     t = td;
2047     tmp = tmpd;
2048 #endif
2049 
2050     if (err == MP_OKAY) {
2051         t[0] = &td[0];
2052         t[1] = &td[1];
2053         t[2] = &td[2];
2054 
2055         /* t[0] = {0, 0, 1} * norm */
2056         XMEMSET(&t[0], 0, sizeof(t[0]));
2057         t[0].infinity = 1;
2058         /* t[1] = {g->x, g->y, g->z} * norm */
2059         err = sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
2060     }
2061     if (err == MP_OKAY)
2062         err = sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
2063     if (err == MP_OKAY)
2064         err = sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
2065 
2066     if (err == MP_OKAY) {
2067         i = 9;
2068         c = 22;
2069         n = k[i--] << (26 - c);
2070         for (; ; c--) {
2071             if (c == 0) {
2072                 if (i == -1)
2073                     break;
2074 
2075                 n = k[i--];
2076                 c = 26;
2077             }
2078 
2079             y = (n >> 25) & 1;
2080             n <<= 1;
2081 
2082             sp_256_proj_point_add_10(&t[y^1], &t[0], &t[1], tmp);
2083 
2084             XMEMCPY(&t[2], (void*)(((size_t)&t[0] & addr_mask[y^1]) +
2085                                  ((size_t)&t[1] & addr_mask[y])), sizeof(t[2]));
2086             sp_256_proj_point_dbl_10(&t[2], &t[2], tmp);
2087             XMEMCPY((void*)(((size_t)&t[0] & addr_mask[y^1]) +
2088                           ((size_t)&t[1] & addr_mask[y])), &t[2], sizeof(t[2]));
2089         }
2090 
2091         if (map != 0) {
2092             sp_256_map_10(r, &t[0], tmp);
2093         }
2094         else {
2095             XMEMCPY(r, &t[0], sizeof(sp_point));
2096         }
2097     }
2098 
2099 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
2100     if (tmp != NULL) {
2101         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
2102         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
2103     }
2104     if (t != NULL) {
2105         XMEMSET(t, 0, sizeof(sp_point) * 3);
2106         XFREE(t, heap, DYNAMIC_TYPE_ECC);
2107     }
2108 #else
2109     ForceZero(tmpd, sizeof(tmpd));
2110     ForceZero(td, sizeof(td));
2111 #endif
2112 
2113     return err;
2114 }
2115 
2116 #else
2117 /* A table entry for pre-computed points. */
2118 typedef struct sp_table_entry {
2119     sp_digit x[10] __attribute__((aligned(128)));
2120     sp_digit y[10] __attribute__((aligned(128)));
2121 } sp_table_entry;
2122 
2123 /* Multiply the point by the scalar and return the result.
2124  * If map is true then convert result to affine co-ordinates.
2125  *
2126  * r     Resulting point.
2127  * g     Point to multiply.
2128  * k     Scalar to multiply by.
2129  * map   Indicates whether to convert result to affine.
2130  * heap  Heap to use for allocation.
2131  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
2132  */
sp_256_ecc_mulmod_fast_10(sp_point * r,const sp_point * g,const sp_digit * k,int map,void * heap)2133 static int sp_256_ecc_mulmod_fast_10(sp_point* r, const sp_point* g, const sp_digit* k,
2134         int map, void* heap)
2135 {
2136 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
2137     sp_point td[16];
2138     sp_point rtd;
2139     sp_digit tmpd[2 * 10 * 5];
2140 #endif
2141     sp_point* t;
2142     sp_point* rt;
2143     sp_digit* tmp;
2144     sp_digit n;
2145     int i;
2146     int c, y;
2147     int err;
2148 
2149     (void)heap;
2150 
2151     err = sp_ecc_point_new(heap, rtd, rt);
2152 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
2153     t = (sp_point*)XMALLOC(sizeof(sp_point) * 16, heap, DYNAMIC_TYPE_ECC);
2154     if (t == NULL)
2155         err = MEMORY_E;
2156     tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
2157                              DYNAMIC_TYPE_ECC);
2158     if (tmp == NULL)
2159         err = MEMORY_E;
2160 #else
2161     t = td;
2162     tmp = tmpd;
2163 #endif
2164 
2165     if (err == MP_OKAY) {
2166         /* t[0] = {0, 0, 1} * norm */
2167         XMEMSET(&t[0], 0, sizeof(t[0]));
2168         t[0].infinity = 1;
2169         /* t[1] = {g->x, g->y, g->z} * norm */
2170         (void)sp_256_mod_mul_norm_10(t[1].x, g->x, p256_mod);
2171         (void)sp_256_mod_mul_norm_10(t[1].y, g->y, p256_mod);
2172         (void)sp_256_mod_mul_norm_10(t[1].z, g->z, p256_mod);
2173         t[1].infinity = 0;
2174         sp_256_proj_point_dbl_10(&t[ 2], &t[ 1], tmp);
2175         t[ 2].infinity = 0;
2176         sp_256_proj_point_add_10(&t[ 3], &t[ 2], &t[ 1], tmp);
2177         t[ 3].infinity = 0;
2178         sp_256_proj_point_dbl_10(&t[ 4], &t[ 2], tmp);
2179         t[ 4].infinity = 0;
2180         sp_256_proj_point_add_10(&t[ 5], &t[ 3], &t[ 2], tmp);
2181         t[ 5].infinity = 0;
2182         sp_256_proj_point_dbl_10(&t[ 6], &t[ 3], tmp);
2183         t[ 6].infinity = 0;
2184         sp_256_proj_point_add_10(&t[ 7], &t[ 4], &t[ 3], tmp);
2185         t[ 7].infinity = 0;
2186         sp_256_proj_point_dbl_10(&t[ 8], &t[ 4], tmp);
2187         t[ 8].infinity = 0;
2188         sp_256_proj_point_add_10(&t[ 9], &t[ 5], &t[ 4], tmp);
2189         t[ 9].infinity = 0;
2190         sp_256_proj_point_dbl_10(&t[10], &t[ 5], tmp);
2191         t[10].infinity = 0;
2192         sp_256_proj_point_add_10(&t[11], &t[ 6], &t[ 5], tmp);
2193         t[11].infinity = 0;
2194         sp_256_proj_point_dbl_10(&t[12], &t[ 6], tmp);
2195         t[12].infinity = 0;
2196         sp_256_proj_point_add_10(&t[13], &t[ 7], &t[ 6], tmp);
2197         t[13].infinity = 0;
2198         sp_256_proj_point_dbl_10(&t[14], &t[ 7], tmp);
2199         t[14].infinity = 0;
2200         sp_256_proj_point_add_10(&t[15], &t[ 8], &t[ 7], tmp);
2201         t[15].infinity = 0;
2202 
2203         i = 8;
2204         n = k[i+1] << 6;
2205         c = 18;
2206         y = n >> 24;
2207         XMEMCPY(rt, &t[y], sizeof(sp_point));
2208         n <<= 8;
2209         for (; i>=0 || c>=4; ) {
2210             if (c < 4) {
2211                 n |= k[i--] << (6 - c);
2212                 c += 26;
2213             }
2214             y = (n >> 28) & 0xf;
2215             n <<= 4;
2216             c -= 4;
2217 
2218             sp_256_proj_point_dbl_10(rt, rt, tmp);
2219             sp_256_proj_point_dbl_10(rt, rt, tmp);
2220             sp_256_proj_point_dbl_10(rt, rt, tmp);
2221             sp_256_proj_point_dbl_10(rt, rt, tmp);
2222 
2223             sp_256_proj_point_add_10(rt, rt, &t[y], tmp);
2224         }
2225 
2226         if (map != 0) {
2227             sp_256_map_10(r, rt, tmp);
2228         }
2229         else {
2230             XMEMCPY(r, rt, sizeof(sp_point));
2231         }
2232     }
2233 
2234 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
2235     if (tmp != NULL) {
2236         XMEMSET(tmp, 0, sizeof(sp_digit) * 2 * 10 * 5);
2237         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
2238     }
2239     if (t != NULL) {
2240         XMEMSET(t, 0, sizeof(sp_point) * 16);
2241         XFREE(t, heap, DYNAMIC_TYPE_ECC);
2242     }
2243 #else
2244     ForceZero(tmpd, sizeof(tmpd));
2245     ForceZero(td, sizeof(td));
2246 #endif
2247     sp_ecc_point_free(rt, 1, heap);
2248 
2249     return err;
2250 }
2251 
2252 #ifdef FP_ECC
2253 /* Double the Montgomery form projective point p a number of times.
2254  *
2255  * r  Result of repeated doubling of point.
2256  * p  Point to double.
2257  * n  Number of times to double
2258  * t  Temporary ordinate data.
2259  */
sp_256_proj_point_dbl_n_10(sp_point * r,const sp_point * p,int n,sp_digit * t)2260 static void sp_256_proj_point_dbl_n_10(sp_point* r, const sp_point* p, int n,
2261         sp_digit* t)
2262 {
2263     sp_point* rp[2];
2264     sp_digit* w = t;
2265     sp_digit* a = t + 2*10;
2266     sp_digit* b = t + 4*10;
2267     sp_digit* t1 = t + 6*10;
2268     sp_digit* t2 = t + 8*10;
2269     sp_digit* x;
2270     sp_digit* y;
2271     sp_digit* z;
2272     int i;
2273 
2274     rp[0] = r;
2275 
2276     /*lint allow cast to different type of pointer*/
2277     rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
2278     XMEMSET(rp[1], 0, sizeof(sp_point));
2279     x = rp[p->infinity]->x;
2280     y = rp[p->infinity]->y;
2281     z = rp[p->infinity]->z;
2282     if (r != p) {
2283         for (i=0; i<10; i++) {
2284             r->x[i] = p->x[i];
2285         }
2286         for (i=0; i<10; i++) {
2287             r->y[i] = p->y[i];
2288         }
2289         for (i=0; i<10; i++) {
2290             r->z[i] = p->z[i];
2291         }
2292         r->infinity = p->infinity;
2293     }
2294 
2295     /* Y = 2*Y */
2296     sp_256_mont_dbl_10(y, y, p256_mod);
2297     /* W = Z^4 */
2298     sp_256_mont_sqr_10(w, z, p256_mod, p256_mp_mod);
2299     sp_256_mont_sqr_10(w, w, p256_mod, p256_mp_mod);
2300     while (n-- > 0) {
2301         /* A = 3*(X^2 - W) */
2302         sp_256_mont_sqr_10(t1, x, p256_mod, p256_mp_mod);
2303         sp_256_mont_sub_10(t1, t1, w, p256_mod);
2304         sp_256_mont_tpl_10(a, t1, p256_mod);
2305         /* B = X*Y^2 */
2306         sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
2307         sp_256_mont_mul_10(b, t2, x, p256_mod, p256_mp_mod);
2308         /* X = A^2 - 2B */
2309         sp_256_mont_sqr_10(x, a, p256_mod, p256_mp_mod);
2310         sp_256_mont_dbl_10(t1, b, p256_mod);
2311         sp_256_mont_sub_10(x, x, t1, p256_mod);
2312         /* Z = Z*Y */
2313         sp_256_mont_mul_10(z, z, y, p256_mod, p256_mp_mod);
2314         /* t2 = Y^4 */
2315         sp_256_mont_sqr_10(t2, t2, p256_mod, p256_mp_mod);
2316         if (n != 0) {
2317             /* W = W*Y^4 */
2318             sp_256_mont_mul_10(w, w, t2, p256_mod, p256_mp_mod);
2319         }
2320         /* y = 2*A*(B - X) - Y^4 */
2321         sp_256_mont_sub_10(y, b, x, p256_mod);
2322         sp_256_mont_mul_10(y, y, a, p256_mod, p256_mp_mod);
2323         sp_256_mont_dbl_10(y, y, p256_mod);
2324         sp_256_mont_sub_10(y, y, t2, p256_mod);
2325     }
2326     /* Y = Y/2 */
2327     sp_256_div2_10(y, y, p256_mod);
2328 }
2329 
2330 #endif /* FP_ECC */
2331 
2332 
2333 /* Add two Montgomery form projective points. The second point has a q value of
2334  * one.
2335  * Only the first point can be the same pointer as the result point.
2336  *
2337  * r  Result of addition.
2338  * p  First point to add.
2339  * q  Second point to add.
2340  * t  Temporary ordinate data.
2341  */
sp_256_proj_point_add_qz1_10(sp_point * r,const sp_point * p,const sp_point * q,sp_digit * t)2342 static void sp_256_proj_point_add_qz1_10(sp_point* r, const sp_point* p,
2343         const sp_point* q, sp_digit* t)
2344 {
2345     const sp_point* ap[2];
2346     sp_point* rp[2];
2347     sp_digit* t1 = t;
2348     sp_digit* t2 = t + 2*10;
2349     sp_digit* t3 = t + 4*10;
2350     sp_digit* t4 = t + 6*10;
2351     sp_digit* t5 = t + 8*10;
2352     sp_digit* x;
2353     sp_digit* y;
2354     sp_digit* z;
2355     int i;
2356 
2357     /* Check double */
2358     (void)sp_256_sub_10(t1, p256_mod, q->y);
2359     sp_256_norm_10(t1);
2360     if ((sp_256_cmp_equal_10(p->x, q->x) & sp_256_cmp_equal_10(p->z, q->z) &
2361         (sp_256_cmp_equal_10(p->y, q->y) | sp_256_cmp_equal_10(p->y, t1))) != 0) {
2362         sp_256_proj_point_dbl_10(r, p, t);
2363     }
2364     else {
2365         rp[0] = r;
2366 
2367         /*lint allow cast to different type of pointer*/
2368         rp[1] = (sp_point*)t; /*lint !e9087 !e740*/
2369         XMEMSET(rp[1], 0, sizeof(sp_point));
2370         x = rp[p->infinity | q->infinity]->x;
2371         y = rp[p->infinity | q->infinity]->y;
2372         z = rp[p->infinity | q->infinity]->z;
2373 
2374         ap[0] = p;
2375         ap[1] = q;
2376         for (i=0; i<10; i++) {
2377             r->x[i] = ap[p->infinity]->x[i];
2378         }
2379         for (i=0; i<10; i++) {
2380             r->y[i] = ap[p->infinity]->y[i];
2381         }
2382         for (i=0; i<10; i++) {
2383             r->z[i] = ap[p->infinity]->z[i];
2384         }
2385         r->infinity = ap[p->infinity]->infinity;
2386 
2387         /* U2 = X2*Z1^2 */
2388         sp_256_mont_sqr_10(t2, z, p256_mod, p256_mp_mod);
2389         sp_256_mont_mul_10(t4, t2, z, p256_mod, p256_mp_mod);
2390         sp_256_mont_mul_10(t2, t2, q->x, p256_mod, p256_mp_mod);
2391         /* S2 = Y2*Z1^3 */
2392         sp_256_mont_mul_10(t4, t4, q->y, p256_mod, p256_mp_mod);
2393         /* H = U2 - X1 */
2394         sp_256_mont_sub_10(t2, t2, x, p256_mod);
2395         /* R = S2 - Y1 */
2396         sp_256_mont_sub_10(t4, t4, y, p256_mod);
2397         /* Z3 = H*Z1 */
2398         sp_256_mont_mul_10(z, z, t2, p256_mod, p256_mp_mod);
2399         /* X3 = R^2 - H^3 - 2*X1*H^2 */
2400         sp_256_mont_sqr_10(t1, t4, p256_mod, p256_mp_mod);
2401         sp_256_mont_sqr_10(t5, t2, p256_mod, p256_mp_mod);
2402         sp_256_mont_mul_10(t3, x, t5, p256_mod, p256_mp_mod);
2403         sp_256_mont_mul_10(t5, t5, t2, p256_mod, p256_mp_mod);
2404         sp_256_mont_sub_10(x, t1, t5, p256_mod);
2405         sp_256_mont_dbl_10(t1, t3, p256_mod);
2406         sp_256_mont_sub_10(x, x, t1, p256_mod);
2407         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
2408         sp_256_mont_sub_10(t3, t3, x, p256_mod);
2409         sp_256_mont_mul_10(t3, t3, t4, p256_mod, p256_mp_mod);
2410         sp_256_mont_mul_10(t5, t5, y, p256_mod, p256_mp_mod);
2411         sp_256_mont_sub_10(y, t3, t5, p256_mod);
2412     }
2413 }
2414 
2415 #ifdef FP_ECC
2416 /* Convert the projective point to affine.
2417  * Ordinates are in Montgomery form.
2418  *
2419  * a  Point to convert.
2420  * t  Temporary data.
2421  */
sp_256_proj_to_affine_10(sp_point * a,sp_digit * t)2422 static void sp_256_proj_to_affine_10(sp_point* a, sp_digit* t)
2423 {
2424     sp_digit* t1 = t;
2425     sp_digit* t2 = t + 2 * 10;
2426     sp_digit* tmp = t + 4 * 10;
2427 
2428     sp_256_mont_inv_10(t1, a->z, tmp);
2429 
2430     sp_256_mont_sqr_10(t2, t1, p256_mod, p256_mp_mod);
2431     sp_256_mont_mul_10(t1, t2, t1, p256_mod, p256_mp_mod);
2432 
2433     sp_256_mont_mul_10(a->x, a->x, t2, p256_mod, p256_mp_mod);
2434     sp_256_mont_mul_10(a->y, a->y, t1, p256_mod, p256_mp_mod);
2435     XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
2436 }
2437 
2438 
2439 /* Generate the pre-computed table of points for the base point.
2440  *
2441  * a      The base point.
2442  * table  Place to store generated point data.
2443  * tmp    Temporary data.
2444  * heap  Heap to use for allocation.
2445  */
sp_256_gen_stripe_table_10(const sp_point * a,sp_table_entry * table,sp_digit * tmp,void * heap)2446 static int sp_256_gen_stripe_table_10(const sp_point* a,
2447         sp_table_entry* table, sp_digit* tmp, void* heap)
2448 {
2449 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
2450     sp_point td, s1d, s2d;
2451 #endif
2452     sp_point* t;
2453     sp_point* s1 = NULL;
2454     sp_point* s2 = NULL;
2455     int i, j;
2456     int err;
2457 
2458     (void)heap;
2459 
2460     err = sp_ecc_point_new(heap, td, t);
2461     if (err == MP_OKAY) {
2462         err = sp_ecc_point_new(heap, s1d, s1);
2463     }
2464     if (err == MP_OKAY) {
2465         err = sp_ecc_point_new(heap, s2d, s2);
2466     }
2467 
2468     if (err == MP_OKAY) {
2469         err = sp_256_mod_mul_norm_10(t->x, a->x, p256_mod);
2470     }
2471     if (err == MP_OKAY) {
2472         err = sp_256_mod_mul_norm_10(t->y, a->y, p256_mod);
2473     }
2474     if (err == MP_OKAY) {
2475         err = sp_256_mod_mul_norm_10(t->z, a->z, p256_mod);
2476     }
2477     if (err == MP_OKAY) {
2478         t->infinity = 0;
2479         sp_256_proj_to_affine_10(t, tmp);
2480 
2481         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
2482         s1->infinity = 0;
2483         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
2484         s2->infinity = 0;
2485 
2486         /* table[0] = {0, 0, infinity} */
2487         XMEMSET(&table[0], 0, sizeof(sp_table_entry));
2488         /* table[1] = Affine version of 'a' in Montgomery form */
2489         XMEMCPY(table[1].x, t->x, sizeof(table->x));
2490         XMEMCPY(table[1].y, t->y, sizeof(table->y));
2491 
2492         for (i=1; i<8; i++) {
2493             sp_256_proj_point_dbl_n_10(t, t, 32, tmp);
2494             sp_256_proj_to_affine_10(t, tmp);
2495             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
2496             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
2497         }
2498 
2499         for (i=1; i<8; i++) {
2500             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
2501             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
2502             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
2503                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
2504                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
2505                 sp_256_proj_point_add_qz1_10(t, s1, s2, tmp);
2506                 sp_256_proj_to_affine_10(t, tmp);
2507                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
2508                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
2509             }
2510         }
2511     }
2512 
2513     sp_ecc_point_free(s2, 0, heap);
2514     sp_ecc_point_free(s1, 0, heap);
2515     sp_ecc_point_free( t, 0, heap);
2516 
2517     return err;
2518 }
2519 
2520 #endif /* FP_ECC */
2521 /* Multiply the point by the scalar and return the result.
2522  * If map is true then convert result to affine co-ordinates.
2523  *
2524  * r     Resulting point.
2525  * k     Scalar to multiply by.
2526  * map   Indicates whether to convert result to affine.
2527  * heap  Heap to use for allocation.
2528  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
2529  */
sp_256_ecc_mulmod_stripe_10(sp_point * r,const sp_point * g,const sp_table_entry * table,const sp_digit * k,int map,void * heap)2530 static int sp_256_ecc_mulmod_stripe_10(sp_point* r, const sp_point* g,
2531         const sp_table_entry* table, const sp_digit* k, int map, void* heap)
2532 {
2533 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
2534     sp_point rtd;
2535     sp_point pd;
2536     sp_digit td[2 * 10 * 5];
2537 #endif
2538     sp_point* rt;
2539     sp_point* p = NULL;
2540     sp_digit* t;
2541     int i, j;
2542     int y, x;
2543     int err;
2544 
2545     (void)g;
2546     (void)heap;
2547 
2548     err = sp_ecc_point_new(heap, rtd, rt);
2549     if (err == MP_OKAY) {
2550         err = sp_ecc_point_new(heap, pd, p);
2551     }
2552 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
2553     t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, heap,
2554                            DYNAMIC_TYPE_ECC);
2555     if (t == NULL) {
2556         err = MEMORY_E;
2557     }
2558 #else
2559     t = td;
2560 #endif
2561 
2562     if (err == MP_OKAY) {
2563         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
2564         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
2565 
2566         y = 0;
2567         for (j=0,x=31; j<8; j++,x+=32) {
2568             y |= ((k[x / 26] >> (x % 26)) & 1) << j;
2569         }
2570         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
2571         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
2572         rt->infinity = !y;
2573         for (i=30; i>=0; i--) {
2574             y = 0;
2575             for (j=0,x=i; j<8; j++,x+=32) {
2576                 y |= ((k[x / 26] >> (x % 26)) & 1) << j;
2577             }
2578 
2579             sp_256_proj_point_dbl_10(rt, rt, t);
2580             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
2581             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
2582             p->infinity = !y;
2583             sp_256_proj_point_add_qz1_10(rt, rt, p, t);
2584         }
2585 
2586         if (map != 0) {
2587             sp_256_map_10(r, rt, t);
2588         }
2589         else {
2590             XMEMCPY(r, rt, sizeof(sp_point));
2591         }
2592     }
2593 
2594 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
2595     if (t != NULL) {
2596         XFREE(t, heap, DYNAMIC_TYPE_ECC);
2597     }
2598 #endif
2599     sp_ecc_point_free(p, 0, heap);
2600     sp_ecc_point_free(rt, 0, heap);
2601 
2602     return err;
2603 }
2604 
2605 #ifdef FP_ECC
2606 #ifndef FP_ENTRIES
2607     #define FP_ENTRIES 16
2608 #endif
2609 
2610 typedef struct sp_cache_t {
2611     sp_digit x[10] __attribute__((aligned(128)));
2612     sp_digit y[10] __attribute__((aligned(128)));
2613     sp_table_entry table[256] __attribute__((aligned(128)));
2614     uint32_t cnt;
2615     int set;
2616 } sp_cache_t;
2617 
2618 static THREAD_LS_T sp_cache_t sp_cache[FP_ENTRIES];
2619 static THREAD_LS_T int sp_cache_last = -1;
2620 static THREAD_LS_T int sp_cache_inited = 0;
2621 
2622 #ifndef HAVE_THREAD_LS
2623     static volatile int initCacheMutex = 0;
2624     static wolfSSL_Mutex sp_cache_lock;
2625 #endif
2626 
sp_ecc_get_cache(const sp_point * g,sp_cache_t ** cache)2627 static void sp_ecc_get_cache(const sp_point* g, sp_cache_t** cache)
2628 {
2629     int i, j;
2630     uint32_t least;
2631 
2632     if (sp_cache_inited == 0) {
2633         for (i=0; i<FP_ENTRIES; i++) {
2634             sp_cache[i].set = 0;
2635         }
2636         sp_cache_inited = 1;
2637     }
2638 
2639     /* Compare point with those in cache. */
2640     for (i=0; i<FP_ENTRIES; i++) {
2641         if (!sp_cache[i].set)
2642             continue;
2643 
2644         if (sp_256_cmp_equal_10(g->x, sp_cache[i].x) &
2645                            sp_256_cmp_equal_10(g->y, sp_cache[i].y)) {
2646             sp_cache[i].cnt++;
2647             break;
2648         }
2649     }
2650 
2651     /* No match. */
2652     if (i == FP_ENTRIES) {
2653         /* Find empty entry. */
2654         i = (sp_cache_last + 1) % FP_ENTRIES;
2655         for (; i != sp_cache_last; i=(i+1)%FP_ENTRIES) {
2656             if (!sp_cache[i].set) {
2657                 break;
2658             }
2659         }
2660 
2661         /* Evict least used. */
2662         if (i == sp_cache_last) {
2663             least = sp_cache[0].cnt;
2664             for (j=1; j<FP_ENTRIES; j++) {
2665                 if (sp_cache[j].cnt < least) {
2666                     i = j;
2667                     least = sp_cache[i].cnt;
2668                 }
2669             }
2670         }
2671 
2672         XMEMCPY(sp_cache[i].x, g->x, sizeof(sp_cache[i].x));
2673         XMEMCPY(sp_cache[i].y, g->y, sizeof(sp_cache[i].y));
2674         sp_cache[i].set = 1;
2675         sp_cache[i].cnt = 1;
2676     }
2677 
2678     *cache = &sp_cache[i];
2679     sp_cache_last = i;
2680 }
2681 #endif /* FP_ECC */
2682 
2683 /* Multiply the base point of P256 by the scalar and return the result.
2684  * If map is true then convert result to affine co-ordinates.
2685  *
2686  * r     Resulting point.
2687  * g     Point to multiply.
2688  * k     Scalar to multiply by.
2689  * map   Indicates whether to convert result to affine.
2690  * heap  Heap to use for allocation.
2691  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
2692  */
sp_256_ecc_mulmod_10(sp_point * r,const sp_point * g,const sp_digit * k,int map,void * heap)2693 static int sp_256_ecc_mulmod_10(sp_point* r, const sp_point* g, const sp_digit* k,
2694         int map, void* heap)
2695 {
2696 #ifndef FP_ECC
2697     return sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
2698 #else
2699     sp_digit tmp[2 * 10 * 5];
2700     sp_cache_t* cache;
2701     int err = MP_OKAY;
2702 
2703 #ifndef HAVE_THREAD_LS
2704     if (initCacheMutex == 0) {
2705          wc_InitMutex(&sp_cache_lock);
2706          initCacheMutex = 1;
2707     }
2708     if (wc_LockMutex(&sp_cache_lock) != 0)
2709        err = BAD_MUTEX_E;
2710 #endif /* HAVE_THREAD_LS */
2711 
2712     if (err == MP_OKAY) {
2713         sp_ecc_get_cache(g, &cache);
2714         if (cache->cnt == 2)
2715             sp_256_gen_stripe_table_10(g, cache->table, tmp, heap);
2716 
2717 #ifndef HAVE_THREAD_LS
2718         wc_UnLockMutex(&sp_cache_lock);
2719 #endif /* HAVE_THREAD_LS */
2720 
2721         if (cache->cnt < 2) {
2722             err = sp_256_ecc_mulmod_fast_10(r, g, k, map, heap);
2723         }
2724         else {
2725             err = sp_256_ecc_mulmod_stripe_10(r, g, cache->table, k,
2726                     map, heap);
2727         }
2728     }
2729 
2730     return err;
2731 #endif
2732 }
2733 
2734 #endif
2735 
2736 #ifdef WOLFSSL_SP_SMALL
2737 /* Multiply the base point of P256 by the scalar and return the result.
2738  * If map is true then convert result to affine co-ordinates.
2739  *
2740  * r     Resulting point.
2741  * k     Scalar to multiply by.
2742  * map   Indicates whether to convert result to affine.
2743  * heap  Heap to use for allocation.
2744  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
2745  */
sp_256_ecc_mulmod_base_10(sp_point * r,const sp_digit * k,int map,void * heap)2746 static int sp_256_ecc_mulmod_base_10(sp_point* r, const sp_digit* k,
2747         int map, void* heap)
2748 {
2749     /* No pre-computed values. */
2750     return sp_256_ecc_mulmod_10(r, &p256_base, k, map, heap);
2751 }
2752 
2753 #else
2754 static const sp_table_entry p256_table[256] = {
2755     /* 0 */
2756     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
2757       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
2758     /* 1 */
2759     { { 0x0a9143c,0x1cc3506,0x360179e,0x3f17fb6,0x075ba95,0x1d88944,
2760         0x3b732b7,0x15719e7,0x376a537,0x0062417 },
2761       { 0x295560a,0x094d5f3,0x245cddf,0x392e867,0x18b4ab8,0x3487cc9,
2762         0x288688d,0x176174b,0x3182588,0x0215c7f } },
2763     /* 2 */
2764     { { 0x147519a,0x2218090,0x32f0202,0x2b09acd,0x0d0981e,0x1e17af2,
2765         0x14a7caa,0x163a6a7,0x10ddbdf,0x03654f1 },
2766       { 0x1590f8f,0x0d8733f,0x09179d6,0x1ad139b,0x372e962,0x0bad933,
2767         0x1961102,0x223cdff,0x37e9eb2,0x0218fae } },
2768     /* 3 */
2769     { { 0x0db6485,0x1ad88d7,0x2f97785,0x288bc28,0x3808f0e,0x3df8c02,
2770         0x28d9544,0x20280f9,0x055b5ff,0x00001d8 },
2771       { 0x38d2010,0x13ae6e0,0x308a763,0x2ecc90d,0x254014f,0x10a9981,
2772         0x247d398,0x0fb8383,0x3613437,0x020c21d } },
2773     /* 4 */
2774     { { 0x2a0d2bb,0x08bf145,0x34994f9,0x1b06988,0x30d5cc1,0x1f18b22,
2775         0x01cf3a5,0x199fe49,0x161fd1b,0x00bd79a },
2776       { 0x1a01797,0x171c2fd,0x21925c1,0x1358255,0x23d20b4,0x1c7f6d4,
2777         0x111b370,0x03dec12,0x1168d6f,0x03d923e } },
2778     /* 5 */
2779     { { 0x137bbbc,0x19a11f8,0x0bec9e5,0x27a29a8,0x3e43446,0x275cd18,
2780         0x0427617,0x00056c7,0x285133d,0x016af80 },
2781       { 0x04c7dab,0x2a0df30,0x0c0792a,0x1310c98,0x3573d9f,0x239b30d,
2782         0x1315627,0x1ce0c32,0x25b6b6f,0x0252edc } },
2783     /* 6 */
2784     { { 0x20f141c,0x26d23dc,0x3c74bbf,0x334b7d6,0x06199b3,0x0441171,
2785         0x3f61294,0x313bf70,0x3cb2f7d,0x03375ae },
2786       { 0x2f436fd,0x19c02fa,0x26becca,0x1b6e64c,0x26f647f,0x053c948,
2787         0x0fa7920,0x397d830,0x2bd4bda,0x028d86f } },
2788     /* 7 */
2789     { { 0x17c13c7,0x2895616,0x03e128a,0x17d42df,0x1c38d63,0x0f02747,
2790         0x039aecf,0x0a4b01c,0x209c4b5,0x02e84b2 },
2791       { 0x1f91dfd,0x023e916,0x07fb9e4,0x19b3ba8,0x13af43b,0x35e02ca,
2792         0x0eb0899,0x3bd2c7b,0x19d701f,0x014faee } },
2793     /* 8 */
2794     { { 0x0e63d34,0x1fb8c6c,0x0fab4fe,0x1caa795,0x0f46005,0x179ed69,
2795         0x093334d,0x120c701,0x39206d5,0x021627e },
2796       { 0x183553a,0x03d7319,0x09e5aa7,0x12b8959,0x2087909,0x0011194,
2797         0x1045071,0x0713f32,0x16d0254,0x03aec1a } },
2798     /* 9 */
2799     { { 0x01647c5,0x1b2856b,0x1799461,0x11f133d,0x0b8127d,0x1937eeb,
2800         0x266aa37,0x1f68f71,0x0cbd1b2,0x03aca08 },
2801       { 0x287e008,0x1be361a,0x38f3940,0x276488d,0x2d87dfa,0x0333b2c,
2802         0x2d2e428,0x368755b,0x09b55a7,0x007ca0a } },
2803     /* 10 */
2804     { { 0x389da99,0x2a8300e,0x0022abb,0x27ae0a1,0x0a6f2d7,0x207017a,
2805         0x047862b,0x1358c9e,0x35905e5,0x00cde92 },
2806       { 0x1f7794a,0x1d40348,0x3f613c6,0x2ddf5b5,0x0207005,0x133f5ba,
2807         0x1a37810,0x3ef5829,0x0d5f4c2,0x0035978 } },
2808     /* 11 */
2809     { { 0x1275d38,0x026efad,0x2358d9d,0x1142f82,0x14268a7,0x1cfac99,
2810         0x362ff49,0x288cbc1,0x24252f4,0x0308f68 },
2811       { 0x394520c,0x06e13c2,0x178e5da,0x18ec16f,0x1096667,0x134a7a8,
2812         0x0dcb869,0x33fc4e9,0x38cc790,0x006778e } },
2813     /* 12 */
2814     { { 0x2c5fe04,0x29c5b09,0x1bdb183,0x02ceee8,0x03b28de,0x132dc4b,
2815         0x32c586a,0x32ff5d0,0x3d491fc,0x038d372 },
2816       { 0x2a58403,0x2351aea,0x3a53b40,0x21a0ba5,0x39a6974,0x1aaaa2b,
2817         0x3901273,0x03dfe78,0x3447b4e,0x039d907 } },
2818     /* 13 */
2819     { { 0x364ba59,0x14e5077,0x02fc7d7,0x3b02c09,0x1d33f10,0x0560616,
2820         0x06dfc6a,0x15efd3c,0x357052a,0x01284b7 },
2821       { 0x039dbd0,0x18ce3e5,0x3e1fbfa,0x352f794,0x0d3c24b,0x07c6cc5,
2822         0x1e4ffa2,0x3a91bf5,0x293bb5b,0x01abd6a } },
2823     /* 14 */
2824     { { 0x0c91999,0x02da644,0x0491da1,0x100a960,0x00a24b4,0x2330824,
2825         0x0094b4b,0x1004cf8,0x35a66a4,0x017f8d1 },
2826       { 0x13e7b4b,0x232af7e,0x391ab0f,0x069f08f,0x3292b50,0x3479898,
2827         0x2889aec,0x2a4590b,0x308ecfe,0x02d5138 } },
2828     /* 15 */
2829     { { 0x2ddfdce,0x231ba45,0x39e6647,0x19be245,0x12c3291,0x35399f8,
2830         0x0d6e764,0x3082d3a,0x2bda6b0,0x0382dac },
2831       { 0x37efb57,0x04b7cae,0x00070d3,0x379e431,0x01aac0d,0x1e6f251,
2832         0x0336ad6,0x0ddd3e4,0x3de25a6,0x01c7008 } },
2833     /* 16 */
2834     { { 0x3e20925,0x230912f,0x286762a,0x30e3f73,0x391c19a,0x34e1c18,
2835         0x16a5d5d,0x093d96a,0x3d421d3,0x0187561 },
2836       { 0x37173ea,0x19ce8a8,0x0b65e87,0x0214dde,0x2238480,0x16ead0f,
2837         0x38441e0,0x3bef843,0x2124621,0x03e847f } },
2838     /* 17 */
2839     { { 0x0b19ffd,0x247cacb,0x3c231c8,0x16ec648,0x201ba8d,0x2b172a3,
2840         0x103d678,0x2fb72db,0x04c1f13,0x0161bac },
2841       { 0x3e8ed09,0x171b949,0x2de20c3,0x0f06067,0x21e81a3,0x1b194be,
2842         0x0fd6c05,0x13c449e,0x0087086,0x006756b } },
2843     /* 18 */
2844     { { 0x09a4e1f,0x27d604c,0x00741e9,0x06fa49c,0x0ab7de7,0x3f4a348,
2845         0x25ef0be,0x158fc9a,0x33f7f9c,0x039f001 },
2846       { 0x2f59f76,0x3598e83,0x30501f6,0x15083f2,0x0669b3b,0x29980b5,
2847         0x0c1f7a7,0x0f02b02,0x0fec65b,0x0382141 } },
2848     /* 19 */
2849     { { 0x031b3ca,0x23da368,0x2d66f09,0x27b9b69,0x06d1cab,0x13c91ba,
2850         0x3d81fa9,0x25ad16f,0x0825b09,0x01e3c06 },
2851       { 0x225787f,0x3bf790e,0x2c9bb7e,0x0347732,0x28016f8,0x0d6ff0d,
2852         0x2a4877b,0x1d1e833,0x3b87e94,0x010e9dc } },
2853     /* 20 */
2854     { { 0x2b533d5,0x1ddcd34,0x1dc0625,0x3da86f7,0x3673b8a,0x1e7b0a4,
2855         0x3e7c9aa,0x19ac55d,0x251c3b2,0x02edb79 },
2856       { 0x25259b3,0x24c0ead,0x3480e7e,0x34f40e9,0x3d6a0af,0x2cf3f09,
2857         0x2c83d19,0x2e66f16,0x19a5d18,0x0182d18 } },
2858     /* 21 */
2859     { { 0x2e5aa1c,0x28e3846,0x3658bd6,0x0ad279c,0x1b8b765,0x397e1fb,
2860         0x130014e,0x3ff342c,0x3b2aeeb,0x02743c9 },
2861       { 0x2730a55,0x0918c5e,0x083aca9,0x0bf76ef,0x19c955b,0x300669c,
2862         0x01dfe0a,0x312341f,0x26d356e,0x0091295 } },
2863     /* 22 */
2864     { { 0x2cf1f96,0x00e52ba,0x271c6db,0x2a40930,0x19f2122,0x0b2f4ee,
2865         0x26ac1b8,0x3bda498,0x0873581,0x0117963 },
2866       { 0x38f9dbc,0x3d1e768,0x2040d3f,0x11ba222,0x3a8aaf1,0x1b82fb5,
2867         0x1adfb24,0x2de9251,0x21cc1e4,0x0301038 } },
2868     /* 23 */
2869     { { 0x38117b6,0x2bc001b,0x1433847,0x3fdce8d,0x3651969,0x3651d7a,
2870         0x2b35761,0x1bb1d20,0x097682c,0x00737d7 },
2871       { 0x1f04839,0x1dd6d04,0x16987db,0x3d12378,0x17dbeac,0x1c2cc86,
2872         0x121dd1b,0x3fcf6ca,0x1f8a92d,0x00119d5 } },
2873     /* 24 */
2874     { { 0x0e8ffcd,0x2b174af,0x1a82cc8,0x22cbf98,0x30d53c4,0x080b5b1,
2875         0x3161727,0x297cfdb,0x2113b83,0x0011b97 },
2876       { 0x0007f01,0x23fd936,0x3183e7b,0x0496bd0,0x07fb1ef,0x178680f,
2877         0x1c5ea63,0x0016c11,0x2c3303d,0x01b8041 } },
2878     /* 25 */
2879     { { 0x0dd73b1,0x1cd6122,0x10d948c,0x23e657b,0x3767070,0x15a8aad,
2880         0x385ea8c,0x33c7ce0,0x0ede901,0x0110965 },
2881       { 0x2d4b65b,0x2a8b244,0x0c37f8f,0x0ee5b24,0x394c234,0x3a5e347,
2882         0x26e4a15,0x39a3b4c,0x2514c2e,0x029e5be } },
2883     /* 26 */
2884     { { 0x23addd7,0x3ed8120,0x13b3359,0x20f959a,0x09e2a61,0x32fcf20,
2885         0x05b78e3,0x19ba7e2,0x1a9c697,0x0392b4b },
2886       { 0x2048a61,0x3dfd0a3,0x19a0357,0x233024b,0x3082d19,0x00fb63b,
2887         0x3a1af4c,0x1450ff0,0x046c37b,0x0317a50 } },
2888     /* 27 */
2889     { { 0x3e75f9e,0x294e30a,0x3a78476,0x3a32c48,0x36fd1a9,0x0427012,
2890         0x1e4df0b,0x11d1f61,0x1afdb46,0x018ca0f },
2891       { 0x2f2df15,0x0a33dee,0x27f4ce7,0x1542b66,0x3e592c4,0x20d2f30,
2892         0x3226ade,0x2a4e3ea,0x1ab1981,0x01a2f46 } },
2893     /* 28 */
2894     { { 0x087d659,0x3ab5446,0x305ac08,0x3d2cd64,0x33374d5,0x3f9d3f8,
2895         0x186981c,0x37f5a5a,0x2f53c6f,0x01254a4 },
2896       { 0x2cec896,0x1e32786,0x04844a8,0x043b16d,0x3d964b2,0x1935829,
2897         0x16f7e26,0x1a0dd9a,0x30d2603,0x003b1d4 } },
2898     /* 29 */
2899     { { 0x12687bb,0x04e816b,0x21fa2da,0x1abccb8,0x3a1f83b,0x375181e,
2900         0x0f5ef51,0x0fc2ce4,0x3a66486,0x003d881 },
2901       { 0x3138233,0x1f8eec3,0x2718bd6,0x1b09caa,0x2dd66b9,0x1bb222b,
2902         0x1004072,0x1b73e3b,0x07208ed,0x03fc36c } },
2903     /* 30 */
2904     { { 0x095d553,0x3e84053,0x0a8a749,0x3f575a0,0x3a44052,0x3ced59b,
2905         0x3b4317f,0x03a8c60,0x13c8874,0x00c4ed4 },
2906       { 0x0d11549,0x0b8ab02,0x221cb40,0x02ed37b,0x2071ee1,0x1fc8c83,
2907         0x3987dd4,0x27e049a,0x0f986f1,0x00b4eaf } },
2908     /* 31 */
2909     { { 0x15581a2,0x2214060,0x11af4c2,0x1598c88,0x19a0a6d,0x32acba6,
2910         0x3a7a0f0,0x2337c66,0x210ded9,0x0300dbe },
2911       { 0x1fbd009,0x3822eb0,0x181629a,0x2401b45,0x30b68b1,0x2e78363,
2912         0x2b32779,0x006530b,0x2c4b6d4,0x029aca8 } },
2913     /* 32 */
2914     { { 0x13549cf,0x0f943db,0x265ed43,0x1bfeb35,0x06f3369,0x3847f2d,
2915         0x1bfdacc,0x26181a5,0x252af7c,0x02043b8 },
2916       { 0x159bb2c,0x143f85c,0x357b654,0x2f9d62c,0x2f7dfbe,0x1a7fa9c,
2917         0x057e74d,0x05d14ac,0x17a9273,0x035215c } },
2918     /* 33 */
2919     { { 0x0cb5a98,0x106a2bc,0x10bf117,0x24c7cc4,0x3d3da8f,0x2ce0ab7,
2920         0x14e2cba,0x1813866,0x1a72f9a,0x01a9811 },
2921       { 0x2b2411d,0x3034fe8,0x16e0170,0x0f9443a,0x0be0eb8,0x2196cf3,
2922         0x0c9f738,0x15e40ef,0x0faf9e1,0x034f917 } },
2923     /* 34 */
2924     { { 0x03f7669,0x3da6efa,0x3d6bce1,0x209ca1d,0x109f8ae,0x09109e3,
2925         0x08ae543,0x3067255,0x1dee3c2,0x0081dd5 },
2926       { 0x3ef1945,0x358765b,0x28c387b,0x3bec4b4,0x218813c,0x0b7d92a,
2927         0x3cd1d67,0x2c0367e,0x2e57154,0x0123717 } },
2928     /* 35 */
2929     { { 0x3e5a199,0x1e42ffd,0x0bb7123,0x33e6273,0x1e0efb8,0x294671e,
2930         0x3a2bfe0,0x3d11709,0x2eddff6,0x03cbec2 },
2931       { 0x0b5025f,0x0255d7c,0x1f2241c,0x35d03ea,0x0550543,0x202fef4,
2932         0x23c8ad3,0x354963e,0x015db28,0x0284fa4 } },
2933     /* 36 */
2934     { { 0x2b65cbc,0x1e8d428,0x0226f9f,0x1c8a919,0x10b04b9,0x08fc1e8,
2935         0x1ce241e,0x149bc99,0x2b01497,0x00afc35 },
2936       { 0x3216fb7,0x1374fd2,0x226ad3d,0x19fef76,0x0f7d7b8,0x1c21417,
2937         0x37b83f6,0x3a27eba,0x25a162f,0x010aa52 } },
2938     /* 37 */
2939     { { 0x2adf191,0x1ab42fa,0x28d7584,0x2409689,0x20f8a48,0x253707d,
2940         0x2030504,0x378f7a1,0x169c65e,0x00b0b76 },
2941       { 0x3849c17,0x085c764,0x10dd6d0,0x2e87689,0x1460488,0x30e9521,
2942         0x10c7063,0x1b6f120,0x21f42c5,0x03d0dfe } },
2943     /* 38 */
2944     { { 0x20f7dab,0x035c512,0x29ac6aa,0x24c5ddb,0x20f0497,0x17ce5e1,
2945         0x00a050f,0x1eaa14b,0x3335470,0x02abd16 },
2946       { 0x18d364a,0x0df0cf0,0x316585e,0x018f925,0x0d40b9b,0x17b1511,
2947         0x1716811,0x1caf3d0,0x10df4f2,0x0337d8c } },
2948     /* 39 */
2949     { { 0x2a8b7ef,0x0f188e3,0x2287747,0x06216f0,0x008e935,0x2f6a38d,
2950         0x1567722,0x0bfc906,0x0bada9e,0x03c3402 },
2951       { 0x014d3b1,0x099c749,0x2a76291,0x216c067,0x3b37549,0x14ef2f6,
2952         0x21b96d4,0x1ee2d71,0x2f5ca88,0x016f570 } },
2953     /* 40 */
2954     { { 0x09a3154,0x3d1a7bd,0x2e9aef0,0x255b8ac,0x03e85a5,0x2a492a7,
2955         0x2aec1ea,0x11c6516,0x3c8a09e,0x02a84b7 },
2956       { 0x1f69f1d,0x09c89d3,0x1e7326f,0x0b28bfd,0x0e0e4c8,0x1ea7751,
2957         0x18ce73b,0x2a406e7,0x273e48c,0x01b00db } },
2958     /* 41 */
2959     { { 0x36e3138,0x2b84a83,0x345a5cf,0x00096b4,0x16966ef,0x159caf1,
2960         0x13c64b4,0x2f89226,0x25896af,0x00a4bfd },
2961       { 0x2213402,0x1435117,0x09fed52,0x09d0e4b,0x0f6580e,0x2871cba,
2962         0x3b397fd,0x1c9d825,0x090311b,0x0191383 } },
2963     /* 42 */
2964     { { 0x07153f0,0x1087869,0x18c9e1e,0x1e64810,0x2b86c3b,0x0175d9c,
2965         0x3dce877,0x269de4e,0x393cab7,0x03c96b9 },
2966       { 0x1869d0c,0x06528db,0x02641f3,0x209261b,0x29d55c8,0x25ba517,
2967         0x3b5ea30,0x028f927,0x25313db,0x00e6e39 } },
2968     /* 43 */
2969     { { 0x2fd2e59,0x150802d,0x098f377,0x19a4957,0x135e2c0,0x38a95ce,
2970         0x1ab21a0,0x36c1b67,0x32f0f19,0x00e448b },
2971       { 0x3cad53c,0x3387800,0x17e3cfb,0x03f9970,0x3225b2c,0x2a84e1d,
2972         0x3af1d29,0x3fe35ca,0x2f8ce80,0x0237a02 } },
2973     /* 44 */
2974     { { 0x07bbb76,0x3aa3648,0x2758afb,0x1f085e0,0x1921c7e,0x3010dac,
2975         0x22b74b1,0x230137e,0x1062e36,0x021c652 },
2976       { 0x3993df5,0x24a2ee8,0x126ab5f,0x2d7cecf,0x0639d75,0x16d5414,
2977         0x1aa78a8,0x3f78404,0x26a5b74,0x03f0c57 } },
2978     /* 45 */
2979     { { 0x0d6ecfa,0x3f506ba,0x3f86561,0x3d86bb1,0x15f8c44,0x2491d07,
2980         0x052a7b4,0x2422261,0x3adee38,0x039b529 },
2981       { 0x193c75d,0x14bb451,0x1162605,0x293749c,0x370a70d,0x2e8b1f6,
2982         0x2ede937,0x2b95f4a,0x39a9be2,0x00d77eb } },
2983     /* 46 */
2984     { { 0x2736636,0x15bf36a,0x2b7e6b9,0x25eb8b2,0x209f51d,0x3cd2659,
2985         0x10bf410,0x034afec,0x3d71c83,0x0076971 },
2986       { 0x0ce6825,0x07920cf,0x3c3b5c4,0x23fe55c,0x015ad11,0x08c0dae,
2987         0x0552c7f,0x2e75a8a,0x0fddbf4,0x01c1df0 } },
2988     /* 47 */
2989     { { 0x2b9661c,0x0ffe351,0x3d71bf6,0x1ac34b3,0x3a1dfd3,0x211fe3d,
2990         0x33e140a,0x3f9100d,0x32ee50e,0x014ea18 },
2991       { 0x16d8051,0x1bfda1a,0x068a097,0x2571d3d,0x1daec0c,0x39389af,
2992         0x194dc35,0x3f3058a,0x36d34e1,0x000a329 } },
2993     /* 48 */
2994     { { 0x09877ee,0x351f73f,0x0002d11,0x0420074,0x2c8b362,0x130982d,
2995         0x02c1175,0x3c11b40,0x0d86962,0x001305f },
2996       { 0x0daddf5,0x2f4252c,0x15c06d9,0x1d49339,0x1bea235,0x0b680ed,
2997         0x3356e67,0x1d1d198,0x1e9fed9,0x03dee93 } },
2998     /* 49 */
2999     { { 0x3e1263f,0x2fe8d3a,0x3ce6d0d,0x0d5c6b9,0x3557637,0x0a9bd48,
3000         0x0405538,0x0710749,0x2005213,0x038c7e5 },
3001       { 0x26b6ec6,0x2e485ba,0x3c44d1b,0x0b9cf0b,0x037a1d1,0x27428a5,
3002         0x0e7eac8,0x351ef04,0x259ce34,0x02a8e98 } },
3003     /* 50 */
3004     { { 0x2f3dcd3,0x3e77d4d,0x3360fbc,0x1434afd,0x36ceded,0x3d413d6,
3005         0x1710fad,0x36bb924,0x1627e79,0x008e637 },
3006       { 0x109569e,0x1c168db,0x3769cf4,0x2ed4527,0x0ea0619,0x17d80d3,
3007         0x1c03773,0x18843fe,0x1b21c04,0x015c5fd } },
3008     /* 51 */
3009     { { 0x1dd895e,0x08a7248,0x04519fe,0x001030a,0x18e5185,0x358dfb3,
3010         0x13d2391,0x0a37be8,0x0560e3c,0x019828b },
3011       { 0x27fcbd0,0x2a22bb5,0x30969cc,0x1e03aa7,0x1c84724,0x0ba4ad3,
3012         0x32f4817,0x0914cca,0x14c4f52,0x01893b9 } },
3013     /* 52 */
3014     { { 0x097eccc,0x1273936,0x00aa095,0x364fe62,0x04d49d1,0x10e9f08,
3015         0x3c24230,0x3ef01c8,0x2fb92bd,0x013ce4a },
3016       { 0x1e44fd9,0x27e3e9f,0x2156696,0x3915ecc,0x0b66cfb,0x1a3af0f,
3017         0x2fa8033,0x0e6736c,0x177ccdb,0x0228f9e } },
3018     /* 53 */
3019     { { 0x2c4b125,0x06207c1,0x0a8cdde,0x003db8f,0x1ae34e3,0x31e84fa,
3020         0x2999de5,0x11013bd,0x02370c2,0x00e2234 },
3021       { 0x0f91081,0x200d591,0x1504762,0x1857c05,0x23d9fcf,0x0cb34db,
3022         0x27edc86,0x08cd860,0x2471810,0x029798b } },
3023     /* 54 */
3024     { { 0x3acd6c8,0x097b8cb,0x3c661a8,0x15152f2,0x1699c63,0x237e64c,
3025         0x23edf79,0x16b7033,0x0e6466a,0x00b11da },
3026       { 0x0a64bc9,0x1bfe324,0x1f5cb34,0x08391de,0x0630a60,0x3017a21,
3027         0x09d064b,0x14a8365,0x041f9e6,0x01ed799 } },
3028     /* 55 */
3029     { { 0x128444a,0x2508b07,0x2a39216,0x362f84d,0x2e996c5,0x2c31ff3,
3030         0x07afe5f,0x1d1288e,0x3cb0c8d,0x02e2bdc },
3031       { 0x38b86fd,0x3a0ea8c,0x1cff5fd,0x1629629,0x3fee3f1,0x02b250c,
3032         0x2e8f6f2,0x0225727,0x15f7f3f,0x0280d8e } },
3033     /* 56 */
3034     { { 0x10f7770,0x0f1aee8,0x0e248c7,0x20684a8,0x3a6f16d,0x06f0ae7,
3035         0x0df6825,0x2d4cc40,0x301875f,0x012f8da },
3036       { 0x3b56dbb,0x1821ba7,0x24f8922,0x22c1f9e,0x0306fef,0x1b54bc8,
3037         0x2ccc056,0x00303ba,0x2871bdc,0x0232f26 } },
3038     /* 57 */
3039     { { 0x0dac4ab,0x0625730,0x3112e13,0x101c4bf,0x3a874a4,0x2873b95,
3040         0x32ae7c6,0x0d7e18c,0x13e0c08,0x01139d5 },
3041       { 0x334002d,0x00fffdd,0x025c6d5,0x22c2cd1,0x19d35cb,0x3a1ce2d,
3042         0x3702760,0x3f06257,0x03a5eb8,0x011c29a } },
3043     /* 58 */
3044     { { 0x0513482,0x1d87724,0x276a81b,0x0a807a4,0x3028720,0x339cc20,
3045         0x2441ee0,0x31bbf36,0x290c63d,0x0059041 },
3046       { 0x106a2ed,0x0d2819b,0x100bf50,0x114626c,0x1dd4d77,0x2e08632,
3047         0x14ae72a,0x2ed3f64,0x1fd7abc,0x035cd1e } },
3048     /* 59 */
3049     { { 0x2d4c6e5,0x3bec596,0x104d7ed,0x23d6c1b,0x0262cf0,0x15d72c5,
3050         0x2d5bb18,0x199ac4b,0x1e30771,0x020591a },
3051       { 0x21e291e,0x2e75e55,0x1661d7a,0x08b0778,0x3eb9daf,0x0d78144,
3052         0x1827eb1,0x0fe73d2,0x123f0dd,0x0028db7 } },
3053     /* 60 */
3054     { { 0x1d5533c,0x34cb1d0,0x228f098,0x27a1a11,0x17c5f5a,0x0d26f44,
3055         0x2228ade,0x2c460e6,0x3d6fdba,0x038cc77 },
3056       { 0x3cc6ed8,0x02ada1a,0x260e510,0x2f7bde8,0x37160c3,0x33a1435,
3057         0x23d9a7b,0x0ce2641,0x02a492e,0x034ed1e } },
3058     /* 61 */
3059     { { 0x3821f90,0x26dba3c,0x3aada14,0x3b59bad,0x292edd9,0x2804c45,
3060         0x3669531,0x296f42e,0x35a4c86,0x01ca049 },
3061       { 0x3ff47e5,0x2163df4,0x2441503,0x2f18405,0x15e1616,0x37f66ec,
3062         0x30f11a7,0x141658a,0x27ece14,0x00b018b } },
3063     /* 62 */
3064     { { 0x159ac2e,0x3e65bc0,0x2713a76,0x0db2f6c,0x3281e77,0x2391811,
3065         0x16d2880,0x1fbc4ab,0x1f92c4e,0x00a0a8d },
3066       { 0x0ce5cd2,0x152c7b0,0x02299c3,0x3244de7,0x2cf99ef,0x3a0b047,
3067         0x2caf383,0x0aaf664,0x113554d,0x031c735 } },
3068     /* 63 */
3069     { { 0x1b578f4,0x177a702,0x3a7a488,0x1638ebf,0x31884e2,0x2460bc7,
3070         0x36b1b75,0x3ce8e3d,0x340cf47,0x03143d9 },
3071       { 0x34b68ea,0x12b7ccd,0x1fe2a9c,0x08da659,0x0a406f3,0x1694c14,
3072         0x06a2228,0x16370be,0x3a72129,0x02e7b2c } },
3073     /* 64 */
3074     { { 0x0f8b16a,0x21043bd,0x266a56f,0x3fb11ec,0x197241a,0x36721f0,
3075         0x006b8e6,0x2ac6c29,0x202cd42,0x0200fcf },
3076       { 0x0dbec69,0x0c26a01,0x105f7f0,0x3dceeeb,0x3a83b85,0x363865f,
3077         0x097273a,0x2b70718,0x00e5067,0x03025d1 } },
3078     /* 65 */
3079     { { 0x379ab34,0x295bcb0,0x38d1846,0x22e1077,0x3a8ee06,0x1db1a3b,
3080         0x3144591,0x07cc080,0x2d5915f,0x03c6bcc },
3081       { 0x175bd50,0x0dd4c57,0x27bc99c,0x2ebdcbd,0x3837cff,0x235dc8f,
3082         0x13a4184,0x0722c18,0x130e2d4,0x008f43c } },
3083     /* 66 */
3084     { { 0x01500d9,0x2adbb7d,0x2da8857,0x397f2fa,0x10d890a,0x25c9654,
3085         0x3e86488,0x3eb754b,0x1d6c0a3,0x02c0a23 },
3086       { 0x10bcb08,0x083cc19,0x2e16853,0x04da575,0x271af63,0x2626a9d,
3087         0x3520a7b,0x32348c7,0x24ff408,0x03ff4dc } },
3088     /* 67 */
3089     { { 0x058e6cb,0x1a3992d,0x1d28539,0x080c5e9,0x2992dad,0x2a9d7d5,
3090         0x14ae0b7,0x09b7ce0,0x34ad78c,0x03d5643 },
3091       { 0x30ba55a,0x092f4f3,0x0bae0fc,0x12831de,0x20fc472,0x20ed9d2,
3092         0x29864f6,0x1288073,0x254f6f7,0x00635b6 } },
3093     /* 68 */
3094     { { 0x1be5a2b,0x0f88975,0x33c6ed9,0x20d64d3,0x06fe799,0x0989bff,
3095         0x1409262,0x085a90c,0x0d97990,0x0142eed },
3096       { 0x17ec63e,0x06471b9,0x0db2378,0x1006077,0x265422c,0x08db83d,
3097         0x28099b0,0x1270d06,0x11801fe,0x00ac400 } },
3098     /* 69 */
3099     { { 0x3391593,0x22d7166,0x30fcfc6,0x2896609,0x3c385f5,0x066b72e,
3100         0x04f3aad,0x2b831c5,0x19983fb,0x0375562 },
3101       { 0x0b82ff4,0x222e39d,0x34c993b,0x101c79c,0x2d2e03c,0x0f00c8a,
3102         0x3a9eaf4,0x1810669,0x151149d,0x039b931 } },
3103     /* 70 */
3104     { { 0x29af288,0x1956ec7,0x293155f,0x193deb6,0x1647e1a,0x2ca0839,
3105         0x297e4bc,0x15bfd0d,0x1b107ed,0x0147803 },
3106       { 0x31c327e,0x05a6e1d,0x02ad43d,0x02d2a5b,0x129cdb2,0x37ad1de,
3107         0x3d51f53,0x245df01,0x2414982,0x0388bd0 } },
3108     /* 71 */
3109     { { 0x35f1abb,0x17a3d18,0x0874cd4,0x2d5a14e,0x17edc0c,0x16a00d3,
3110         0x072c1fb,0x1232725,0x33d52dc,0x03dc24d },
3111       { 0x0af30d6,0x259aeea,0x369c401,0x12bc4de,0x295bf5f,0x0d8711f,
3112         0x26162a9,0x16c44e5,0x288e727,0x02f54b4 } },
3113     /* 72 */
3114     { { 0x05fa877,0x1571ea7,0x3d48ab1,0x1c9f4e8,0x017dad6,0x0f46276,
3115         0x343f9e7,0x1de990f,0x0e4c8aa,0x028343e },
3116       { 0x094f92d,0x3abf633,0x1b3a0bb,0x2f83137,0x0d818c8,0x20bae85,
3117         0x0c65f8b,0x1a8008b,0x0c7946d,0x0295b1e } },
3118     /* 73 */
3119     { { 0x1d09529,0x08e46c3,0x1fcf296,0x298f6b7,0x1803e0e,0x2d6fd20,
3120         0x37351f5,0x0d9e8b1,0x1f8731a,0x0362fbf },
3121       { 0x00157f4,0x06750bf,0x2650ab9,0x35ffb23,0x2f51cae,0x0b522c2,
3122         0x39cb400,0x191e337,0x0a5ce9f,0x021529a } },
3123     /* 74 */
3124     { { 0x3506ea5,0x17d9ed8,0x0d66dc3,0x22693f8,0x19286c4,0x3a57353,
3125         0x101d3bf,0x1aa54fc,0x20b9884,0x0172b3a },
3126       { 0x0eac44d,0x37d8327,0x1c3aa90,0x3d0d534,0x23db29a,0x3576eaf,
3127         0x1d3de8a,0x3bea423,0x11235e4,0x039260b } },
3128     /* 75 */
3129     { { 0x34cd55e,0x01288b0,0x1132231,0x2cc9a03,0x358695b,0x3e87650,
3130         0x345afa1,0x01267ec,0x3f616b2,0x02011ad },
3131       { 0x0e7d098,0x0d6078e,0x0b70b53,0x237d1bc,0x0d7f61e,0x132de31,
3132         0x1ea9ea4,0x2bd54c3,0x27b9082,0x03ac5f2 } },
3133     /* 76 */
3134     { { 0x2a145b9,0x06d661d,0x31ec175,0x03f06f1,0x3a5cf6b,0x249c56e,
3135         0x2035653,0x384c74f,0x0bafab5,0x0025ec0 },
3136       { 0x25f69e1,0x1b23a55,0x1199aa6,0x16ad6f9,0x077e8f7,0x293f661,
3137         0x33ba11d,0x3327980,0x07bafdb,0x03e571d } },
3138     /* 77 */
3139     { { 0x2bae45e,0x3c074ef,0x2955558,0x3c312f1,0x2a8ebe9,0x2f193f1,
3140         0x3705b1d,0x360deba,0x01e566e,0x00d4498 },
3141       { 0x21161cd,0x1bc787e,0x2f87933,0x3553197,0x1328ab8,0x093c879,
3142         0x17eee27,0x2adad1d,0x1236068,0x003be5c } },
3143     /* 78 */
3144     { { 0x0ca4226,0x2633dd5,0x2c8e025,0x0e3e190,0x05eede1,0x1a385e4,
3145         0x163f744,0x2f25522,0x1333b4f,0x03f05b6 },
3146       { 0x3c800ca,0x1becc79,0x2daabe9,0x0c499e2,0x1138063,0x3fcfa2d,
3147         0x2244976,0x1e85cf5,0x2f1b95d,0x0053292 } },
3148     /* 79 */
3149     { { 0x12f81d5,0x1dc6eaf,0x11967a4,0x1a407df,0x31a5f9d,0x2b67241,
3150         0x18bef7c,0x08c7762,0x063f59c,0x01015ec },
3151       { 0x1c05c0a,0x360bfa2,0x1f85bff,0x1bc7703,0x3e4911c,0x0d685b6,
3152         0x2fccaea,0x02c4cef,0x164f133,0x0070ed7 } },
3153     /* 80 */
3154     { { 0x0ec21fe,0x052ffa0,0x3e825fe,0x1ab0956,0x3f6ce11,0x3d29759,
3155         0x3c5a072,0x18ebe62,0x148db7e,0x03eb49c },
3156       { 0x1ab05b3,0x02dab0a,0x1ae690c,0x0f13894,0x137a9a8,0x0aab79f,
3157         0x3dc875c,0x06a1029,0x1e39f0e,0x01dce1f } },
3158     /* 81 */
3159     { { 0x16c0dd7,0x3b31269,0x2c741e9,0x3611821,0x2a5cffc,0x1416bb3,
3160         0x3a1408f,0x311fa3d,0x1c0bef0,0x02cdee1 },
3161       { 0x00e6a8f,0x1adb933,0x0f23359,0x2fdace2,0x2fd6d4b,0x0e73bd3,
3162         0x2453fac,0x0a356ae,0x2c8f9f6,0x02704d6 } },
3163     /* 82 */
3164     { { 0x0e35743,0x28c80a1,0x0def32a,0x2c6168f,0x1320d6a,0x37c6606,
3165         0x21b1761,0x2147ee0,0x21fc433,0x015c84d },
3166       { 0x1fc9168,0x36cda9c,0x003c1f0,0x1cd7971,0x15f98ba,0x1ef363d,
3167         0x0ca87e3,0x046f7d9,0x3c9e6bb,0x0372eb0 } },
3168     /* 83 */
3169     { { 0x118cbe2,0x3665a11,0x304ef01,0x062727a,0x3d242fc,0x11ffbaf,
3170         0x3663c7e,0x1a189c9,0x09e2d62,0x02e3072 },
3171       { 0x0e1d569,0x162f772,0x0cd051a,0x322df62,0x3563809,0x047cc7a,
3172         0x027fd9f,0x08b509b,0x3da2f94,0x01748ee } },
3173     /* 84 */
3174     { { 0x1c8f8be,0x31ca525,0x22bf0a1,0x200efcd,0x02961c4,0x3d8f52b,
3175         0x018403d,0x3a40279,0x1cb91ec,0x030427e },
3176       { 0x0945705,0x0257416,0x05c0c2d,0x25b77ae,0x3b9083d,0x2901126,
3177         0x292b8d7,0x07b8611,0x04f2eee,0x026f0cd } },
3178     /* 85 */
3179     { { 0x2913074,0x2b8d590,0x02b10d5,0x09d2295,0x255491b,0x0c41cca,
3180         0x1ca665b,0x133051a,0x1525f1a,0x00a5647 },
3181       { 0x04f983f,0x3d6daee,0x04e1e76,0x1067d7e,0x1be7eef,0x02ea862,
3182         0x00d4968,0x0ccb048,0x11f18ef,0x018dd95 } },
3183     /* 86 */
3184     { { 0x22976cc,0x17c5395,0x2c38bda,0x3983bc4,0x222bca3,0x332a614,
3185         0x3a30646,0x261eaef,0x1c808e2,0x02f6de7 },
3186       { 0x306a772,0x32d7272,0x2dcefd2,0x2abf94d,0x038f475,0x30ad76e,
3187         0x23e0227,0x3052b0a,0x001add3,0x023ba18 } },
3188     /* 87 */
3189     { { 0x0ade873,0x25a6069,0x248ccbe,0x13713ee,0x17ee9aa,0x28152e9,
3190         0x2e28995,0x2a92cb3,0x17a6f77,0x024b947 },
3191       { 0x190a34d,0x2ebea1c,0x1ed1948,0x16fdaf4,0x0d698f7,0x32bc451,
3192         0x0ee6e30,0x2aaab40,0x06f0a56,0x01460be } },
3193     /* 88 */
3194     { { 0x24cc99c,0x1884b1e,0x1ca1fba,0x1a0f9b6,0x2ff609b,0x2b26316,
3195         0x3b27cb5,0x29bc976,0x35d4073,0x024772a },
3196       { 0x3575a70,0x1b30f57,0x07fa01b,0x0e5be36,0x20cb361,0x26605cd,
3197         0x1d4e8c8,0x13cac59,0x2db9797,0x005e833 } },
3198     /* 89 */
3199     { { 0x36c8d3a,0x1878a81,0x124b388,0x0e4843e,0x1701aad,0x0ea0d76,
3200         0x10eae41,0x37d0653,0x36c7f4c,0x00ba338 },
3201       { 0x37a862b,0x1cf6ac0,0x08fa912,0x2dd8393,0x101ba9b,0x0eebcb7,
3202         0x2453883,0x1a3cfe5,0x2cb34f6,0x03d3331 } },
3203     /* 90 */
3204     { { 0x1f79687,0x3d4973c,0x281544e,0x2564bbe,0x17c5954,0x171e34a,
3205         0x231741a,0x3cf2784,0x0889a0d,0x02b036d },
3206       { 0x301747f,0x3f1c477,0x1f1386b,0x163bc5f,0x1592b93,0x332daed,
3207         0x080e4f5,0x1d28b96,0x26194c9,0x0256992 } },
3208     /* 91 */
3209     { { 0x15a4c93,0x07bf6b0,0x114172c,0x1ce0961,0x140269b,0x1b2c2eb,
3210         0x0dfb1c1,0x019ddaa,0x0ba2921,0x008c795 },
3211       { 0x2e6d2dc,0x37e45e2,0x2918a70,0x0fce444,0x34d6aa6,0x396dc88,
3212         0x27726b5,0x0c787d8,0x032d8a7,0x02ac2f8 } },
3213     /* 92 */
3214     { { 0x1131f2d,0x2b43a63,0x3101097,0x38cec13,0x0637f09,0x17a69d2,
3215         0x086196d,0x299e46b,0x0802cf6,0x03c6f32 },
3216       { 0x0daacb4,0x1a4503a,0x100925c,0x15583d9,0x23c4e40,0x1de4de9,
3217         0x1cc8fc4,0x2c9c564,0x0695aeb,0x02145a5 } },
3218     /* 93 */
3219     { { 0x1dcf593,0x17050fc,0x3e3bde3,0x0a6c062,0x178202b,0x2f7674f,
3220         0x0dadc29,0x15763a7,0x1d2daad,0x023d9f6 },
3221       { 0x081ea5f,0x045959d,0x190c841,0x3a78d31,0x0e7d2dd,0x1414fea,
3222         0x1d43f40,0x22d77ff,0x2b9c072,0x03e115c } },
3223     /* 94 */
3224     { { 0x3af71c9,0x29e9c65,0x25655e1,0x111e9cd,0x3a14494,0x3875418,
3225         0x34ae070,0x0b06686,0x310616b,0x03b7b89 },
3226       { 0x1734121,0x00d3d44,0x29f0b2f,0x1552897,0x31cac6e,0x1030bb3,
3227         0x0148f3a,0x35fd237,0x29b44eb,0x027f49f } },
3228     /* 95 */
3229     { { 0x2e2cb16,0x1d962bd,0x19b63cc,0x0b3f964,0x3e3eb7d,0x1a35560,
3230         0x0c58161,0x3ce1d6a,0x3b6958f,0x029030b },
3231       { 0x2dcc158,0x3b1583f,0x30568c9,0x31957c8,0x27ad804,0x28c1f84,
3232         0x3967049,0x37b3f64,0x3b87dc6,0x0266f26 } },
3233     /* 96 */
3234     { { 0x27dafc6,0x2548764,0x0d1984a,0x1a57027,0x252c1fb,0x24d9b77,
3235         0x1581a0f,0x1f99276,0x10ba16d,0x026af88 },
3236       { 0x0915220,0x2be1292,0x16c6480,0x1a93760,0x2fa7317,0x1a07296,
3237         0x1539871,0x112c31f,0x25787f3,0x01e2070 } },
3238     /* 97 */
3239     { { 0x0bcf3ff,0x266d478,0x34f6933,0x31449fd,0x00d02cb,0x340765a,
3240         0x3465a2d,0x225023e,0x319a30e,0x00579b8 },
3241       { 0x20e05f4,0x35b834f,0x0404646,0x3710d62,0x3fad7bd,0x13e1434,
3242         0x21c7d1c,0x1cb3af9,0x2cf1911,0x003957e } },
3243     /* 98 */
3244     { { 0x0787564,0x36601be,0x1ce67e9,0x084c7a1,0x21a3317,0x2067a35,
3245         0x0158cab,0x195ddac,0x1766fe9,0x035cf42 },
3246       { 0x2b7206e,0x20d0947,0x3b42424,0x03f1862,0x0a51929,0x38c2948,
3247         0x0bb8595,0x2942d77,0x3748f15,0x0249428 } },
3248     /* 99 */
3249     { { 0x2577410,0x3c23e2f,0x28c6caf,0x00d41de,0x0fd408a,0x30298e9,
3250         0x363289e,0x2302fc7,0x082c1cc,0x01dd050 },
3251       { 0x30991cd,0x103e9ba,0x029605a,0x19927f7,0x0c1ca08,0x0c93f50,
3252         0x28a3c7b,0x082e4e9,0x34d12eb,0x0232c13 } },
3253     /* 100 */
3254     { { 0x106171c,0x0b4155a,0x0c3fb1c,0x336c090,0x19073e9,0x2241a10,
3255         0x0e6b4fd,0x0ed476e,0x1ef4712,0x039390a },
3256       { 0x0ec36f4,0x3754f0e,0x2a270b8,0x007fd2d,0x0f9d2dc,0x1e6a692,
3257         0x066e078,0x1954974,0x2ff3c6e,0x00def28 } },
3258     /* 101 */
3259     { { 0x3562470,0x0b8f1f7,0x0ac94cd,0x28b0259,0x244f272,0x031e4ef,
3260         0x2d5df98,0x2c8a9f1,0x2dc3002,0x016644f },
3261       { 0x350592a,0x0e6a0d5,0x1e027a1,0x2039e0f,0x399e01d,0x2817593,
3262         0x0c0375e,0x3889b3e,0x24ab013,0x010de1b } },
3263     /* 102 */
3264     { { 0x256b5a6,0x0ac3b67,0x28f9ff3,0x29b67f1,0x30750d9,0x25e11a9,
3265         0x15e8455,0x279ebb0,0x298b7e7,0x0218e32 },
3266       { 0x2fc24b2,0x2b82582,0x28f22f5,0x2bd36b3,0x305398e,0x3b2e9e3,
3267         0x365dd0a,0x29bc0ed,0x36a7b3a,0x007b374 } },
3268     /* 103 */
3269     { { 0x05ff2f3,0x2b3589b,0x29785d3,0x300a1ce,0x0a2d516,0x0844355,
3270         0x14c9fad,0x3ccb6b6,0x385d459,0x0361743 },
3271       { 0x0b11da3,0x002e344,0x18c49f7,0x0c29e0c,0x1d2c22c,0x08237b3,
3272         0x2988f49,0x0f18955,0x1c3b4ed,0x02813c6 } },
3273     /* 104 */
3274     { { 0x17f93bd,0x249323b,0x11f6087,0x174e4bd,0x3cb64ac,0x086dc6b,
3275         0x2e330a8,0x142c1f2,0x2ea5c09,0x024acbb },
3276       { 0x1b6e235,0x3132521,0x00f085a,0x2a4a4db,0x1ab2ca4,0x0142224,
3277         0x3aa6b3e,0x09db203,0x2215834,0x007b9e0 } },
3278     /* 105 */
3279     { { 0x23e79f7,0x28b8039,0x1906a60,0x2cbce67,0x1f590e7,0x181f027,
3280         0x21054a6,0x3854240,0x2d857a6,0x03cfcb3 },
3281       { 0x10d9b55,0x1443cfc,0x2648200,0x2b36190,0x09d2fcf,0x22f439f,
3282         0x231aa7e,0x3884395,0x0543da3,0x003d5a9 } },
3283     /* 106 */
3284     { { 0x043e0df,0x06ffe84,0x3e6d5b2,0x3327001,0x26c74b6,0x12a145e,
3285         0x256ec0d,0x3898c69,0x3411969,0x02f63c5 },
3286       { 0x2b7494a,0x2eee1af,0x38388a9,0x1bd17ce,0x21567d4,0x13969e6,
3287         0x3a12a7a,0x3e8277d,0x03530cc,0x00b4687 } },
3288     /* 107 */
3289     { { 0x06508da,0x38e04d4,0x15a7192,0x312875e,0x3336180,0x2a6512c,
3290         0x1b59497,0x2e91b37,0x25eb91f,0x02841e9 },
3291       { 0x394d639,0x0747143,0x37d7e6d,0x1d62962,0x08b4af3,0x34df287,
3292         0x3c5584b,0x26bc869,0x20af87a,0x0060f5d } },
3293     /* 108 */
3294     { { 0x1de59a4,0x1a5c443,0x2f8729d,0x01c3a2f,0x0f1ad8d,0x3cbaf9e,
3295         0x1b49634,0x35d508a,0x39dc269,0x0075105 },
3296       { 0x390d30e,0x37033e0,0x110cb32,0x14c37a0,0x20a3b27,0x2f00ce6,
3297         0x2f1dc52,0x34988c6,0x0c29606,0x01dc7e7 } },
3298     /* 109 */
3299     { { 0x1040739,0x24f9de1,0x2939999,0x2e6009a,0x244539d,0x17e3f09,
3300         0x00f6f2f,0x1c63b3d,0x2310362,0x019109e },
3301       { 0x1428aa8,0x3cb61e1,0x09a84f4,0x0ffafed,0x07b7adc,0x08f406b,
3302         0x1b2c6df,0x035b480,0x3496ae9,0x012766d } },
3303     /* 110 */
3304     { { 0x35d1099,0x2362f10,0x1a08cc7,0x13a3a34,0x12adbcd,0x32da290,
3305         0x02e2a02,0x151140b,0x01b3f60,0x0240df6 },
3306       { 0x34c7b61,0x2eb09c1,0x172e7cd,0x2ad5eff,0x2fe2031,0x25b54d4,
3307         0x0cec965,0x18e7187,0x26a7cc0,0x00230f7 } },
3308     /* 111 */
3309     { { 0x2d552ab,0x374083d,0x01f120f,0x2601736,0x156baff,0x04d44a4,
3310         0x3b7c3e9,0x1acbc1b,0x0424579,0x031a425 },
3311       { 0x1231bd1,0x0eba710,0x020517b,0x21d7316,0x21eac6e,0x275a848,
3312         0x0837abf,0x0eb0082,0x302cafe,0x00fe8f6 } },
3313     /* 112 */
3314     { { 0x1058880,0x28f9941,0x03f2d75,0x3bd90e5,0x17da365,0x2ac9249,
3315         0x07861cf,0x023fd05,0x1b0fdb8,0x031712f },
3316       { 0x272b56b,0x04f8d2c,0x043a735,0x25446e4,0x1c8327e,0x221125a,
3317         0x0ce37df,0x2dad7f6,0x39446c2,0x00b55b6 } },
3318     /* 113 */
3319     { { 0x346ac6b,0x05e0bff,0x2425246,0x0981e8b,0x1d19f79,0x2692378,
3320         0x3ea3c40,0x2e90beb,0x19de503,0x003d5af },
3321       { 0x05cda49,0x353b44d,0x299d137,0x3f205bc,0x2821158,0x3ad0d00,
3322         0x06a54aa,0x2d7c79f,0x39d1173,0x01000ee } },
3323     /* 114 */
3324     { { 0x0803387,0x3a06268,0x14043b8,0x3d4e72f,0x1ece115,0x0a1dfc8,
3325         0x17208dd,0x0be790a,0x122a07f,0x014dd95 },
3326       { 0x0a4182d,0x202886a,0x1f79a49,0x1e8c867,0x0a2bbd0,0x28668b5,
3327         0x0d0a2e1,0x115259d,0x3586c5d,0x01e815b } },
3328     /* 115 */
3329     { { 0x18a2a47,0x2c95627,0x2773646,0x1230f7c,0x15b5829,0x2fc354e,
3330         0x2c000ea,0x099d547,0x2f17a1a,0x01df520 },
3331       { 0x3853948,0x06f6561,0x3feeb8a,0x2f5b3ef,0x3a6f817,0x01a0791,
3332         0x2ec0578,0x2c392ad,0x12b2b38,0x0104540 } },
3333     /* 116 */
3334     { { 0x1e28ced,0x0fc3d1b,0x2c473c7,0x1826c4f,0x21d5da7,0x39718e4,
3335         0x38ce9e6,0x0251986,0x172fbea,0x0337c11 },
3336       { 0x053c3b0,0x0f162db,0x043c1cb,0x04111ee,0x297fe3c,0x32e5e03,
3337         0x2b8ae12,0x0c427ec,0x1da9738,0x03b9c0f } },
3338     /* 117 */
3339     { { 0x357e43a,0x054503f,0x11b8345,0x34ec6e0,0x2d44660,0x3d0ae61,
3340         0x3b5dff8,0x33884ac,0x09da162,0x00a82b6 },
3341       { 0x3c277ba,0x129a51a,0x027664e,0x1530507,0x0c788c9,0x2afd89d,
3342         0x1aa64cc,0x1196450,0x367ac2b,0x0358b42 } },
3343     /* 118 */
3344     { { 0x0054ac4,0x1761ecb,0x378839c,0x167c9f7,0x2570058,0x0604a35,
3345         0x37cbf3b,0x0909bb7,0x3f2991c,0x02ce688 },
3346       { 0x0b16ae5,0x212857c,0x351b952,0x2c684db,0x30c6a05,0x09c01e0,
3347         0x23c137f,0x1331475,0x092c067,0x0013b40 } },
3348     /* 119 */
3349     { { 0x2e90393,0x0617466,0x24e61f4,0x0a528f5,0x03047b4,0x2153f05,
3350         0x0001a69,0x30e1eb8,0x3c10177,0x0282a47 },
3351       { 0x22c831e,0x28fc06b,0x3e16ff0,0x208adc9,0x0bb76ae,0x28c1d6d,
3352         0x12c8a15,0x031063c,0x1889ed2,0x002133e } },
3353     /* 120 */
3354     { { 0x0a6becf,0x14277bf,0x3328d98,0x201f7fe,0x12fceae,0x1de3a2e,
3355         0x0a15c44,0x3ddf976,0x1b273ab,0x0355e55 },
3356       { 0x1b5d4f1,0x369e78c,0x3a1c210,0x12cf3e9,0x3aa52f0,0x309f082,
3357         0x112089d,0x107c753,0x24202d1,0x023853a } },
3358     /* 121 */
3359     { { 0x2897042,0x140d17c,0x2c4aeed,0x07d0d00,0x18d0533,0x22f7ec8,
3360         0x19c194c,0x3456323,0x2372aa4,0x0165f86 },
3361       { 0x30bd68c,0x1fb06b3,0x0945032,0x372ac09,0x06d4be0,0x27f8fa1,
3362         0x1c8d7ac,0x137a96e,0x236199b,0x0328fc0 } },
3363     /* 122 */
3364     { { 0x170bd20,0x2842d58,0x1de7592,0x3c5b4fd,0x20ea897,0x12cab78,
3365         0x363ff14,0x01f928c,0x17e309c,0x02f79ff },
3366       { 0x0f5432c,0x2edb4ae,0x044b516,0x32f810d,0x2210dc1,0x23e56d6,
3367         0x301e6ff,0x34660f6,0x10e0a7d,0x02d88eb } },
3368     /* 123 */
3369     { { 0x0c7b65b,0x2f59d58,0x2289a75,0x2408e92,0x1ab8c55,0x1ec99e5,
3370         0x220fd0d,0x04defe0,0x24658ec,0x035aa8b },
3371       { 0x138bb85,0x2f002d4,0x295c10a,0x08760ce,0x28c31d1,0x1c0a8cb,
3372         0x0ff00b1,0x144eac9,0x2e02dcc,0x0044598 } },
3373     /* 124 */
3374     { { 0x3b42b87,0x050057b,0x0dff781,0x1c06db1,0x1bd9f5d,0x1f5f04a,
3375         0x2cccd7a,0x143e19b,0x1cb94b7,0x036cfb8 },
3376       { 0x34837cf,0x3cf6c3c,0x0d4fb26,0x22ee55e,0x1e7eed1,0x315995f,
3377         0x2cdf937,0x1a96574,0x0425220,0x0221a99 } },
3378     /* 125 */
3379     { { 0x1b569ea,0x0d33ed9,0x19c13c2,0x107dc84,0x2200111,0x0569867,
3380         0x2dc85da,0x05ef22e,0x0eb018a,0x029c33d },
3381       { 0x04a6a65,0x3e5eba3,0x378f224,0x09c04d0,0x036e5cf,0x3df8258,
3382         0x3a609e4,0x1eddef8,0x2abd174,0x02a91dc } },
3383     /* 126 */
3384     { { 0x2a60cc0,0x1d84c5e,0x115f676,0x1840da0,0x2c79163,0x2f06ed6,
3385         0x198bb4b,0x3e5d37b,0x1dc30fa,0x018469b },
3386       { 0x15ee47a,0x1e32f30,0x16a530e,0x2093836,0x02e8962,0x3767b62,
3387         0x335adf3,0x27220db,0x2f81642,0x0173ffe } },
3388     /* 127 */
3389     { { 0x37a99cd,0x1533fe6,0x05a1c0d,0x27610f1,0x17bf3b9,0x0b1ce78,
3390         0x0a908f6,0x265300e,0x3237dc1,0x01b969a },
3391       { 0x3a5db77,0x2d15382,0x0d63ef8,0x1feb3d8,0x0b7b880,0x19820de,
3392         0x11c0c67,0x2af3396,0x38d242d,0x0120688 } },
3393     /* 128 */
3394     { { 0x1d0b34a,0x05ef00d,0x00a7e34,0x1ae0c9f,0x1440b38,0x300d8b4,
3395         0x37262da,0x3e50e3e,0x14ce0cd,0x00b1044 },
3396       { 0x195a0b1,0x173bc6b,0x03622ba,0x2a19f55,0x1c09b37,0x07921b2,
3397         0x16cdd20,0x24a5c9b,0x2bf42ff,0x00811de } },
3398     /* 129 */
3399     { { 0x0d65dbf,0x145cf06,0x1ad82f7,0x038ce7b,0x077bf94,0x33c4007,
3400         0x22d26bd,0x25ad9c0,0x09ac773,0x02b1990 },
3401       { 0x2261cc3,0x2ecdbf1,0x3e908b0,0x3246439,0x0213f7b,0x1179b04,
3402         0x01cebaa,0x0be1595,0x175cc12,0x033a39a } },
3403     /* 130 */
3404     { { 0x00a67d2,0x086d06f,0x248a0f1,0x0291134,0x362d476,0x166d1cd,
3405         0x044f1d6,0x2d2a038,0x365250b,0x0023f78 },
3406       { 0x08bf287,0x3b0f6a1,0x1d6eace,0x20b4cda,0x2c2a621,0x0912520,
3407         0x02dfdc9,0x1b35cd6,0x3d2565d,0x00bdf8b } },
3408     /* 131 */
3409     { { 0x3770fa7,0x2e4b6f0,0x03f9ae4,0x170de41,0x1095e8d,0x1dd845c,
3410         0x334e9d1,0x00ab953,0x12e9077,0x03196fa },
3411       { 0x2fd0a40,0x228c0fd,0x384b275,0x38ef339,0x3e7d822,0x3e5d9ef,
3412         0x24f5854,0x0ece9eb,0x247d119,0x012ffe3 } },
3413     /* 132 */
3414     { { 0x0ff1480,0x07487c0,0x1b16cd4,0x1f41d53,0x22ab8fb,0x2f83cfa,
3415         0x01d2efb,0x259f6b2,0x2e65772,0x00f9392 },
3416       { 0x05303e6,0x23cdb4f,0x23977e1,0x12e4898,0x03bd999,0x0c930f0,
3417         0x170e261,0x180a27b,0x2fd58ec,0x014e22b } },
3418     /* 133 */
3419     { { 0x25d7713,0x0c5fad7,0x09daad1,0x3b9d779,0x109b985,0x1d3ec98,
3420         0x35bc4fc,0x2f838cb,0x0d14f75,0x0173e42 },
3421       { 0x2657b12,0x10d4423,0x19e6760,0x296e5bb,0x2bfd421,0x25c3330,
3422         0x29f51f8,0x0338838,0x24060f0,0x029a62e } },
3423     /* 134 */
3424     { { 0x3748fec,0x2c5a1bb,0x2cf973d,0x289fa74,0x3e6e755,0x38997bf,
3425         0x0b6544c,0x2b6358c,0x38a7aeb,0x02c50bb },
3426       { 0x3d5770a,0x06be7c5,0x012fad3,0x19cb2cd,0x266af3b,0x3ccd677,
3427         0x160d1bd,0x141d5af,0x2965851,0x034625a } },
3428     /* 135 */
3429     { { 0x3c41c08,0x255eacc,0x22e1ec5,0x2b151a3,0x087de94,0x311cbdb,
3430         0x016b73a,0x368e462,0x20b7981,0x0099ec3 },
3431       { 0x262b988,0x1539763,0x21e76e5,0x15445b4,0x1d8ddc7,0x34a9be6,
3432         0x10faf03,0x24e4d18,0x07aa111,0x02d538a } },
3433     /* 136 */
3434     { { 0x38a876b,0x048ad45,0x04b40a0,0x3fc2144,0x251ff96,0x13ca7dd,
3435         0x0b31ab1,0x3539814,0x28b5f87,0x0212aec },
3436       { 0x270790a,0x350e7e0,0x346bd5e,0x276178f,0x22d6cb5,0x3078884,
3437         0x355c1b6,0x15901d7,0x3671765,0x03950db } },
3438     /* 137 */
3439     { { 0x286e8d5,0x2409788,0x13be53f,0x2d21911,0x0353c95,0x10238e8,
3440         0x32f5bde,0x3a67b60,0x28b5b9c,0x001013d },
3441       { 0x381e8e5,0x0cef7a9,0x2f5bcad,0x06058f0,0x33cdf50,0x04672a8,
3442         0x1769600,0x31c055d,0x3df0ac1,0x00e9098 } },
3443     /* 138 */
3444     { { 0x2eb596d,0x197b326,0x12b4c29,0x39c08f2,0x101ea03,0x3804e58,
3445         0x04b4b62,0x28d9d1c,0x13f905e,0x0032a3f },
3446       { 0x11b2b61,0x08e9095,0x0d06925,0x270e43f,0x21eb7a8,0x0e4a98f,
3447         0x31d2be0,0x030cf9f,0x2644ddb,0x025b728 } },
3448     /* 139 */
3449     { { 0x07510af,0x2ed0e8e,0x2a01203,0x2a2a68d,0x0846fea,0x3e540de,
3450         0x3a57702,0x1677348,0x2123aad,0x010d8f8 },
3451       { 0x0246a47,0x0e871d0,0x124dca4,0x34b9577,0x2b362b8,0x363ebe5,
3452         0x3086045,0x26313e6,0x15cd8bb,0x0210384 } },
3453     /* 140 */
3454     { { 0x023e8a7,0x0817884,0x3a0bf12,0x3376371,0x3c808a8,0x18e9777,
3455         0x12a2721,0x35b538a,0x2bd30de,0x017835a },
3456       { 0x0fc0f64,0x1c8709f,0x2d8807a,0x0743957,0x242eec0,0x347e76c,
3457         0x27bef91,0x289689a,0x0f42945,0x01f7a92 } },
3458     /* 141 */
3459     { { 0x1060a81,0x3dbc739,0x1615abd,0x1cbe3e5,0x3e79f9c,0x1ab09a2,
3460         0x136c540,0x05b473f,0x2beebfd,0x02af0a8 },
3461       { 0x3e2eac7,0x19be474,0x04668ac,0x18f4b74,0x36f10ba,0x0a0b4c6,
3462         0x10e3770,0x3bf059e,0x3946c7e,0x013a8d4 } },
3463     /* 142 */
3464     { { 0x266309d,0x28be354,0x1a3eed8,0x3020651,0x10a51c6,0x1e31770,
3465         0x0af45a5,0x3ff0f3b,0x2891c94,0x00e9db9 },
3466       { 0x17b0d0f,0x33a291f,0x0a5f9aa,0x25a3d61,0x2963ace,0x39a5fef,
3467         0x230c724,0x1919146,0x10a465e,0x02084a8 } },
3468     /* 143 */
3469     { { 0x3ab8caa,0x31870f3,0x2390ef7,0x2103850,0x218eb8e,0x3a5ccf2,
3470         0x1dff677,0x2c59334,0x371599c,0x02a9f2a },
3471       { 0x0837bd1,0x3249cef,0x35d702f,0x3430dab,0x1c06407,0x108f692,
3472         0x221292f,0x05f0c5d,0x073fe06,0x01038e0 } },
3473     /* 144 */
3474     { { 0x3bf9b7c,0x2020929,0x30d0f4f,0x080fef8,0x3365d23,0x1f3e738,
3475         0x3e53209,0x1549afe,0x300b305,0x038d811 },
3476       { 0x0c6c2c7,0x2e6445b,0x3ee64dc,0x022e932,0x0726837,0x0deb67b,
3477         0x1ed4346,0x3857f73,0x277a3de,0x01950b5 } },
3478     /* 145 */
3479     { { 0x36c377a,0x0adb41e,0x08be3f3,0x11e40d1,0x36cb038,0x036a2bd,
3480         0x3dd3a82,0x1bc875b,0x2ee09bb,0x02994d2 },
3481       { 0x035facf,0x05e0344,0x07e630a,0x0ce772d,0x335e55a,0x111fce4,
3482         0x250fe1c,0x3bc89ba,0x32fdc9a,0x03cf2d9 } },
3483     /* 146 */
3484     { { 0x355fd83,0x1c67f8e,0x1d10eb3,0x1b21d77,0x0e0d7a4,0x173a9e1,
3485         0x2c9fa90,0x1c39cce,0x22eaae8,0x01f2bea },
3486       { 0x153b338,0x0534107,0x26c69b8,0x283be1f,0x3e0acc0,0x059cac3,
3487         0x13d1081,0x148bbee,0x3c1b9bd,0x002aac4 } },
3488     /* 147 */
3489     { { 0x2681297,0x3389e34,0x146addc,0x2c6d425,0x2cb350e,0x1986abc,
3490         0x0431737,0x04ba4b7,0x2028470,0x012e469 },
3491       { 0x2f8ddcf,0x3c4255c,0x1af4dcf,0x07a6a44,0x208ebf6,0x0dc90c3,
3492         0x34360ac,0x072ad23,0x0537232,0x01254d3 } },
3493     /* 148 */
3494     { { 0x07b7e9d,0x3df5c7c,0x116f83d,0x28c4f35,0x3a478ef,0x3011fb8,
3495         0x2f264b6,0x317b9e3,0x04fd65a,0x032bd1b },
3496       { 0x2aa8266,0x3431de4,0x04bba04,0x19a44da,0x0edf454,0x392c5ac,
3497         0x265168a,0x1dc3d5b,0x25704c6,0x00533a7 } },
3498     /* 149 */
3499     { { 0x25e8f91,0x1178fa5,0x2492994,0x2eb2c3c,0x0d3aca1,0x0322828,
3500         0x1cc70f9,0x269c74c,0x0a53e4c,0x006edc2 },
3501       { 0x18bdd7a,0x2a79a55,0x26b1d5c,0x0200628,0x0734a05,0x3273c7b,
3502         0x13aa714,0x0040ac2,0x2f2da30,0x03e7449 } },
3503     /* 150 */
3504     { { 0x3f9563e,0x2f29eab,0x14a0749,0x3fad264,0x1dd077a,0x3d7c59c,
3505         0x3a0311b,0x331a789,0x0b9729e,0x0201ebf },
3506       { 0x1b08b77,0x2a4cdf2,0x3e387f8,0x21510f1,0x286c3a7,0x1dbf62e,
3507         0x3afa594,0x3363217,0x0d16568,0x01d46b7 } },
3508     /* 151 */
3509     { { 0x0715c0d,0x28e2d04,0x17f78ae,0x1c63dda,0x1d113ea,0x0fefc1b,
3510         0x1eab149,0x1d0fd99,0x0682537,0x00a7b11 },
3511       { 0x10bebbc,0x11c672d,0x14223d9,0x2ff9141,0x1399ee5,0x34b7b6c,
3512         0x0d5b3a8,0x01df643,0x0e392a4,0x03fe4dc } },
3513     /* 152 */
3514     { { 0x2b75b65,0x0b5a6f1,0x11c559a,0x3549999,0x24188f8,0x37a75f4,
3515         0x29f33e3,0x34068a2,0x38ba2a9,0x025dd91 },
3516       { 0x29af2c7,0x0988b64,0x0923885,0x1b539a4,0x1334f5d,0x226947a,
3517         0x2cc7e5a,0x20beb39,0x13fac2f,0x01d298c } },
3518     /* 153 */
3519     { { 0x35f079c,0x137f76d,0x2fbbb2f,0x254638d,0x185b07c,0x1f34db7,
3520         0x2cfcf0e,0x218f46d,0x2150ff4,0x02add6f },
3521       { 0x33fc9b7,0x0d9f005,0x0fd081b,0x0834965,0x2b90a74,0x102448d,
3522         0x3dbf03c,0x167d857,0x02e0b44,0x013afab } },
3523     /* 154 */
3524     { { 0x09f2c53,0x317f9d7,0x1411eb6,0x0463aba,0x0d25220,0x256b176,
3525         0x087633f,0x2bff322,0x07b2c1b,0x037e662 },
3526       { 0x10aaecb,0x23bb4a1,0x2272bb7,0x06c075a,0x09d4918,0x0736f2b,
3527         0x0dd511b,0x101625e,0x0a7779f,0x009ec10 } },
3528     /* 155 */
3529     { { 0x33b2eb2,0x0176dfd,0x2118904,0x022386c,0x2e0df85,0x2588c9f,
3530         0x1b71525,0x28fd540,0x137e4cf,0x02ce4f7 },
3531       { 0x3d75165,0x0c39ecf,0x3554a12,0x30af34c,0x2d66344,0x3ded408,
3532         0x36f1be0,0x0d065b0,0x012d046,0x0025623 } },
3533     /* 156 */
3534     { { 0x2601c3b,0x1824fc0,0x335fe08,0x3e33d70,0x0fb0252,0x252bfca,
3535         0x1cf2808,0x1922e55,0x1a9db9f,0x020721e },
3536       { 0x2f56c51,0x39a1f31,0x218c040,0x1a4fc5d,0x3fed471,0x0164d4e,
3537         0x388a419,0x06f1113,0x0f55fc1,0x03e8352 } },
3538     /* 157 */
3539     { { 0x1608e4d,0x3872778,0x022cbc6,0x044d60a,0x3010dda,0x15fb0b5,
3540         0x37ddc11,0x19f5bda,0x156b6a3,0x023a838 },
3541       { 0x383b3b4,0x1380bc8,0x353ca35,0x250fc07,0x169966b,0x3780f29,
3542         0x36632b2,0x2d6b13f,0x124fa00,0x00fd6ae } },
3543     /* 158 */
3544     { { 0x1739efb,0x2ec3656,0x2c0d337,0x3d39faf,0x1c751b0,0x04699f4,
3545         0x252dd64,0x095b8b6,0x0872b74,0x022f1da },
3546       { 0x2d3d253,0x38edca0,0x379fa5b,0x287d635,0x3a9f679,0x059d9ee,
3547         0x0ac168e,0x3cd3e87,0x19060fc,0x02ce1bc } },
3548     /* 159 */
3549     { { 0x3edcfc2,0x0f04d4b,0x2f0d31f,0x1898be2,0x25396bf,0x15ca230,
3550         0x02b4eae,0x2713668,0x0f71b06,0x0132d18 },
3551       { 0x38095ea,0x1ed34d6,0x3603ae6,0x165bf01,0x192bbf8,0x1852859,
3552         0x075f66b,0x1488f85,0x10895ef,0x014b035 } },
3553     /* 160 */
3554     { { 0x1339848,0x3084385,0x0c8d231,0x3a1c1de,0x0e87a28,0x255b85c,
3555         0x1de6616,0x2702e74,0x1382bb0,0x012b0f2 },
3556       { 0x198987d,0x381545a,0x34d619b,0x312b827,0x18b2376,0x28fe4cf,
3557         0x20b7651,0x017d077,0x0c7e397,0x00e0365 } },
3558     /* 161 */
3559     { { 0x1542e75,0x0d56aa0,0x39b701a,0x287b806,0x396c724,0x0935c21,
3560         0x3a29776,0x0debdac,0x171de26,0x00b38f8 },
3561       { 0x1d5bc1a,0x3fad27d,0x22b5cfe,0x1f89ddf,0x0a65560,0x144dd5b,
3562         0x2aac2f9,0x139353f,0x0520b62,0x00b9b36 } },
3563     /* 162 */
3564     { { 0x031c31d,0x16552e3,0x1a0c368,0x0016fc8,0x168533d,0x171e7b2,
3565         0x17626e7,0x275502f,0x14742c6,0x03285dd },
3566       { 0x2d2dbb2,0x3b6bffd,0x1d18cc6,0x2f45d2a,0x0fd0d8c,0x2915e3a,
3567         0x1e8793a,0x0b39a1d,0x3139cab,0x02a5da9 } },
3568     /* 163 */
3569     { { 0x3fb353d,0x147c6e4,0x3a720a6,0x22d5ff3,0x1d75cab,0x06c54a0,
3570         0x08cfa73,0x12666aa,0x3170a1f,0x021c829 },
3571       { 0x13e1b90,0x3a34dda,0x1fc38c3,0x02c5bdb,0x2d345dc,0x14aa1d0,
3572         0x28d00ab,0x224f23a,0x329c769,0x025c67b } },
3573     /* 164 */
3574     { { 0x0e35909,0x3bb6356,0x0116820,0x370cf77,0x29366d8,0x3881409,
3575         0x3999d06,0x013075f,0x176e157,0x02941ca },
3576       { 0x0e70b2e,0x28dfab1,0x2a8a002,0x15da242,0x084dcf6,0x116ca97,
3577         0x31bf186,0x1dc9735,0x09df7b7,0x0264e27 } },
3578     /* 165 */
3579     { { 0x2da7a4b,0x3023c9e,0x1366238,0x00ff4e2,0x03abe9d,0x19bd44b,
3580         0x272e897,0x20b91ad,0x2aa202c,0x02a2201 },
3581       { 0x380184e,0x08112b4,0x0b85660,0x31049aa,0x3a8cb78,0x36113c5,
3582         0x1670c0a,0x373f9e7,0x3fb4738,0x00010ef } },
3583     /* 166 */
3584     { { 0x2d5192e,0x26d770d,0x32af8d5,0x34d1642,0x1acf885,0x05805e0,
3585         0x166d0a1,0x1219a0d,0x301ba6c,0x014bcfb },
3586       { 0x2dcb64d,0x19cca83,0x379f398,0x08e01a0,0x10a482c,0x0103cc2,
3587         0x0be5fa7,0x1f9d45b,0x1899ef2,0x00ca5af } },
3588     /* 167 */
3589     { { 0x14d81d7,0x2aea251,0x1b3c476,0x3bd47ae,0x29eade7,0x0715e61,
3590         0x1a21cd8,0x1c7a586,0x2bfaee5,0x00ee43f },
3591       { 0x096f7cb,0x0c08f95,0x1bc4939,0x361fed4,0x255be41,0x26fad73,
3592         0x31dd489,0x02c600f,0x29d9f81,0x01ba201 } },
3593     /* 168 */
3594     { { 0x03ea1db,0x1eac46d,0x1292ce3,0x2a54967,0x20a7ff1,0x3e13c61,
3595         0x1b02218,0x2b44e14,0x3eadefa,0x029c88a },
3596       { 0x30a9144,0x31e3b0a,0x19c5a2a,0x147cbe9,0x05a0240,0x051f38e,
3597         0x11eca56,0x31a4247,0x123bc2a,0x02fa535 } },
3598     /* 169 */
3599     { { 0x3226ce7,0x1251782,0x0b7072f,0x11e59fa,0x2b8afd7,0x169b18f,
3600         0x2a46f18,0x31d9bb7,0x2fe9be8,0x01de0b7 },
3601       { 0x1b38626,0x34aa90f,0x3ad1760,0x21ddbd9,0x3460ae7,0x1126736,
3602         0x1b86fc5,0x0b92cd0,0x167a289,0x000e0e1 } },
3603     /* 170 */
3604     { { 0x1ec1a0f,0x36bbf5e,0x1c972d8,0x3f73ace,0x13bbcd6,0x23d86a5,
3605         0x175ffc5,0x2d083d5,0x2c4adf7,0x036f661 },
3606       { 0x1f39eb7,0x2a20505,0x176c81a,0x3d6e636,0x16ee2fc,0x3cbdc5f,
3607         0x25475dc,0x2ef4151,0x3c46860,0x0238934 } },
3608     /* 171 */
3609     { { 0x2587390,0x3639526,0x0588749,0x13c32fb,0x212bb19,0x09660f1,
3610         0x207da4b,0x2bf211b,0x1c4407b,0x01506a6 },
3611       { 0x24c8842,0x105a498,0x05ffdb2,0x0ab61b0,0x26044c1,0x3dff3d8,
3612         0x1d14b44,0x0d74716,0x049f57d,0x030024b } },
3613     /* 172 */
3614     { { 0x32e61ef,0x31d70f7,0x35cad3c,0x320b86c,0x07e8841,0x027ca7d,
3615         0x2d30d19,0x2513718,0x2347286,0x01d7901 },
3616       { 0x3c237d0,0x107f16e,0x01c9e7d,0x3c3b13c,0x0c9537b,0x20af54d,
3617         0x051a162,0x2161a47,0x258c784,0x016df2d } },
3618     /* 173 */
3619     { { 0x228ead1,0x29c2122,0x07f6964,0x023f4ed,0x1802dc5,0x19f96ce,
3620         0x24bfd17,0x25e866b,0x2ba8df0,0x01eb84f },
3621       { 0x2dd384e,0x05bbe3a,0x3f06fd2,0x366dacb,0x30361a2,0x2f36d7c,
3622         0x0b98784,0x38ff481,0x074e2a8,0x01e1f60 } },
3623     /* 174 */
3624     { { 0x17fbb1c,0x0975add,0x1debc5e,0x2cb2880,0x3e47bdd,0x3488cff,
3625         0x15e9a36,0x2121129,0x0199ef2,0x017088a },
3626       { 0x0315250,0x352a162,0x17c1773,0x0ae09c2,0x321b21a,0x3bd74cf,
3627         0x3c4ea1d,0x3cac2ad,0x3abbaf0,0x039174d } },
3628     /* 175 */
3629     { { 0x0511c8a,0x3c78d0a,0x2cd3d2d,0x322f729,0x3ebb229,0x09f0e69,
3630         0x0a71a76,0x2e74d5e,0x12284df,0x03b5ef0 },
3631       { 0x3dea561,0x0a9b7e4,0x0ed1cf2,0x237523c,0x05443f1,0x2eb48fa,
3632         0x3861405,0x1b49f62,0x0c945ca,0x02ab25f } },
3633     /* 176 */
3634     { { 0x16bd00a,0x13a9d28,0x3cc1eb5,0x2b7d702,0x2d839e9,0x3e6ff01,
3635         0x2bb7f11,0x3713824,0x3b31163,0x00c63e5 },
3636       { 0x30d7138,0x0316fb0,0x0220ecc,0x08eaf0c,0x244e8df,0x0088d81,
3637         0x37972fb,0x3fd34ae,0x2a19a84,0x03e907e } },
3638     /* 177 */
3639     { { 0x2642269,0x0b65d29,0x03bd440,0x33a6ede,0x3c81814,0x2507982,
3640         0x0d38e47,0x3a788e6,0x32c1d26,0x00e2eda },
3641       { 0x2577f87,0x392895a,0x3e1cc64,0x14f7047,0x08b52d2,0x08a01ca,
3642         0x336abf6,0x00697fc,0x105ce76,0x0253742 } },
3643     /* 178 */
3644     { { 0x293f92a,0x33df737,0x3315156,0x32e26d7,0x0a01333,0x26579d4,
3645         0x004df9c,0x0aba409,0x067d25c,0x02481de },
3646       { 0x3f39d44,0x1c78042,0x13d7e24,0x0825aed,0x35f2c90,0x3270f63,
3647         0x04b7b35,0x3ad4531,0x28bd29b,0x0207a10 } },
3648     /* 179 */
3649     { { 0x077199f,0x270aeb1,0x0dd96dd,0x3b9ad7b,0x28cb8ee,0x3903f43,
3650         0x37db3fe,0x292c62b,0x362dbbf,0x006e52a },
3651       { 0x247f143,0x0362cf3,0x216344f,0x3f18fd1,0x351e623,0x31664e0,
3652         0x0f270fc,0x243bbc6,0x2280555,0x001a8e3 } },
3653     /* 180 */
3654     { { 0x3355b49,0x2c04e6c,0x399b2e5,0x182d3af,0x020e265,0x09a7cf7,
3655         0x0ffa6bd,0x353e302,0x02083d9,0x029ecdb },
3656       { 0x33e8830,0x0570e86,0x1c0b64d,0x386a27e,0x0d5fcea,0x0b45a4c,
3657         0x2ee4a2e,0x0a8833f,0x2b4a282,0x02f9531 } },
3658     /* 181 */
3659     { { 0x191167c,0x36cf7e3,0x225ed6c,0x1e79e99,0x0517c3f,0x11ab1fd,
3660         0x05648f3,0x08aedc4,0x1abeae0,0x02fcc29 },
3661       { 0x3828a68,0x1e16fa4,0x30368e7,0x0c9fcfb,0x25161c3,0x24851ac,
3662         0x1b5feb5,0x344eb84,0x0de2732,0x0347208 } },
3663     /* 182 */
3664     { { 0x038b363,0x384d1e4,0x2519043,0x151ac17,0x158c11f,0x009b2b4,
3665         0x257abe6,0x2368d3f,0x3ed68a1,0x02df45e },
3666       { 0x29c2559,0x2962478,0x3d8444c,0x1d96fff,0x04f7a03,0x1391a52,
3667         0x0de4af7,0x3319126,0x15e6412,0x00e65ff } },
3668     /* 183 */
3669     { { 0x3d61507,0x1d1a0a2,0x0d2af20,0x354d299,0x329e132,0x2a28578,
3670         0x2ddfb08,0x04fa3ff,0x1293c6c,0x003bae2 },
3671       { 0x3e259f8,0x1a68fa9,0x3e67e9b,0x39b44f9,0x1ce1db7,0x347e9a1,
3672         0x3318f6a,0x2dbbc9d,0x2f8c922,0x008a245 } },
3673     /* 184 */
3674     { { 0x212ab5b,0x2b896c2,0x0136959,0x07e55ef,0x0cc1117,0x05b8ac3,
3675         0x18429ed,0x025fa01,0x11d6e93,0x03b016b },
3676       { 0x03f3708,0x2e96fab,0x1d77157,0x0d4c2d6,0x131baf9,0x0608d39,
3677         0x3552371,0x06cdd1e,0x1567ff1,0x01f4c50 } },
3678     /* 185 */
3679     { { 0x2dfefab,0x270173d,0x37077bd,0x1a372cd,0x1be2f22,0x28e2ee5,
3680         0x3ead973,0x35e8f94,0x2fc9bc1,0x03a7399 },
3681       { 0x36a02a1,0x2855d9b,0x00ed75a,0x37d8398,0x138c087,0x233706e,
3682         0x147f346,0x01947e2,0x3017228,0x0365942 } },
3683     /* 186 */
3684     { { 0x2057e60,0x2d31296,0x25e4504,0x2fa37bc,0x1cbccc3,0x1f0732f,
3685         0x3532081,0x2de8a98,0x19a804e,0x005359a },
3686       { 0x31f411a,0x2a10576,0x369c2c8,0x02fe035,0x109fbaf,0x30bddeb,
3687         0x1eef901,0x1662ad3,0x0410d43,0x01bd31a } },
3688     /* 187 */
3689     { { 0x2c24a96,0x1b7d3a5,0x19a3872,0x217f2f6,0x2534dbc,0x2cab8c2,
3690         0x066ef28,0x26aecf1,0x0fd6118,0x01310d4 },
3691       { 0x055b8da,0x1fdc5be,0x38a1296,0x25118f0,0x341a423,0x2ba4cd0,
3692         0x3e1413e,0x062d70d,0x2425a31,0x029c9b4 } },
3693     /* 188 */
3694     { { 0x08c1086,0x1acfba5,0x22e1dae,0x0f72f4e,0x3f1de50,0x0f408bc,
3695         0x35ed3f0,0x3ce48fc,0x282cc6c,0x004d8e7 },
3696       { 0x1afaa86,0x24e3ef3,0x22589ac,0x3ec9952,0x1f45bc5,0x14144ca,
3697         0x23b26e4,0x0d68c65,0x1e1c1a3,0x032a4d9 } },
3698     /* 189 */
3699     { { 0x03b2d20,0x16b1d53,0x241b361,0x05e4138,0x1742a54,0x32741c7,
3700         0x0521c4c,0x1ca96c2,0x034970b,0x02738a7 },
3701       { 0x13e0ad6,0x207dcdb,0x034c8cc,0x27bcbe1,0x18060da,0x33a18b6,
3702         0x2d1d1a6,0x2be60d7,0x3d7ab42,0x012312a } },
3703     /* 190 */
3704     { { 0x0c7485a,0x06c3310,0x0dbfd22,0x2ef949d,0x0ead455,0x098f4ba,
3705         0x3c76989,0x0cf2d24,0x032f67b,0x01e005f },
3706       { 0x30cb5ee,0x0d5da64,0x0ed2b9d,0x2503102,0x1c0d14e,0x1cbc693,
3707         0x37bf552,0x07013e2,0x054de5c,0x014f341 } },
3708     /* 191 */
3709     { { 0x128ccac,0x1617e97,0x346ebcd,0x158016d,0x25f823e,0x34048ea,
3710         0x39f0a1c,0x3ea3df1,0x1c1d3d7,0x03ba919 },
3711       { 0x151803b,0x01967c1,0x2f70781,0x27df39a,0x06c0b59,0x24a239c,
3712         0x15a7702,0x2464d06,0x2a47ae6,0x006db90 } },
3713     /* 192 */
3714     { { 0x27d04c3,0x024df3d,0x38112e8,0x38a27ba,0x01e312b,0x0965358,
3715         0x35d8879,0x2f4f55a,0x214187f,0x0008936 },
3716       { 0x05fe36f,0x2ee18c3,0x1f5f87a,0x1813bd4,0x0580f3c,0x0ed0a7b,
3717         0x0fb1bfb,0x3fcce59,0x2f042bf,0x01820e3 } },
3718     /* 193 */
3719     { { 0x20bbe99,0x32cbc9f,0x39ee432,0x3cc12a8,0x37bda44,0x3ea4e40,
3720         0x097c7a9,0x0590d7d,0x2022d33,0x018dbac },
3721       { 0x3ae00aa,0x3439864,0x2d2ffcf,0x3f8c6b9,0x0875a00,0x3e4e407,
3722         0x3658a29,0x22eb3d0,0x2b63921,0x022113b } },
3723     /* 194 */
3724     { { 0x33bae58,0x05c749a,0x1f3e114,0x1c45f8e,0x27db3df,0x06a3ab6,
3725         0x37bc7f8,0x1e27b34,0x3dc51fb,0x009eea0 },
3726       { 0x3f54de5,0x3d0e7fe,0x1a71a7d,0x02ed7f8,0x0727703,0x2ca5e92,
3727         0x2e8e35d,0x292ad0b,0x13487f3,0x02b6d8b } },
3728     /* 195 */
3729     { { 0x175df2a,0x05a28a8,0x32e99b1,0x13d8630,0x2082aa0,0x11ac245,
3730         0x24f2e71,0x322cb27,0x17675e7,0x02e643f },
3731       { 0x1f37313,0x2765ad3,0x0789082,0x1e742d0,0x11c2055,0x2021dc4,
3732         0x09ae4a7,0x346359b,0x2f94d10,0x0205c1f } },
3733     /* 196 */
3734     { { 0x3d6ff96,0x1f2ac80,0x336097d,0x3f03610,0x35b851b,0x010b6d2,
3735         0x0823c4d,0x2a9709a,0x2ead5a8,0x00de4b6 },
3736       { 0x01afa0b,0x0621965,0x3671528,0x1050b60,0x3f3e9e7,0x2f93829,
3737         0x0825275,0x006e85f,0x35e94b0,0x016af58 } },
3738     /* 197 */
3739     { { 0x2c4927c,0x3ea1382,0x0f23727,0x0d69f23,0x3e38860,0x2b72837,
3740         0x3cd5ea4,0x2d84292,0x321846a,0x016656f },
3741       { 0x29dfa33,0x3e182e0,0x018be90,0x2ba563f,0x2caafe2,0x218c0d9,
3742         0x3baf447,0x1047a6c,0x0a2d483,0x01130cb } },
3743     /* 198 */
3744     { { 0x00ed80c,0x2a5fc79,0x0a82a74,0x2c4c74b,0x15f938c,0x30b5ab6,
3745         0x32124b7,0x295314f,0x2fb8082,0x007c858 },
3746       { 0x20b173e,0x19f315c,0x12f97e4,0x198217c,0x040e8a6,0x3275977,
3747         0x2bc20e4,0x01f2633,0x02bc3e9,0x023c750 } },
3748     /* 199 */
3749     { { 0x3c4058a,0x24be73e,0x16704f5,0x2d8a4bd,0x3b15e14,0x3076315,
3750         0x1cfe37b,0x36fe715,0x343926e,0x02c6603 },
3751       { 0x2c76b09,0x0cf824c,0x3f7898c,0x274cec1,0x11df527,0x18eed18,
3752         0x08ead48,0x23915bc,0x19b3744,0x00a0a2b } },
3753     /* 200 */
3754     { { 0x0cf4ac5,0x1c8b131,0x0afb696,0x0ff7799,0x2f5ac1a,0x022420c,
3755         0x11baa2e,0x2ce4015,0x1275a14,0x0125cfc },
3756       { 0x22eac5d,0x360cd4c,0x3568e59,0x3d42f66,0x35e07ee,0x09620e4,
3757         0x36720fa,0x22b1eac,0x2d0db16,0x01b6b23 } },
3758     /* 201 */
3759     { { 0x1a835ef,0x1516bbb,0x2d51f7b,0x3487443,0x14aa113,0x0dd06c2,
3760         0x1a65e01,0x379300d,0x35920b9,0x012c8fb },
3761       { 0x04c7341,0x2eda00f,0x3c37e82,0x1b4fd62,0x0d45770,0x1478fba,
3762         0x127863a,0x26939cd,0x134ddf4,0x01375c5 } },
3763     /* 202 */
3764     { { 0x1476cd9,0x1119ca5,0x325bbf9,0x0bf8c69,0x0648d07,0x312d9f8,
3765         0x01c8b8f,0x136ec51,0x0002f4a,0x03f4c5c },
3766       { 0x195d0e1,0x10ffd22,0x29aa1cb,0x3443bdc,0x276e695,0x05e6260,
3767         0x15f9764,0x3cd9783,0x18c9569,0x0053eb1 } },
3768     /* 203 */
3769     { { 0x312ae18,0x280197c,0x3fc9ad9,0x303f324,0x251958d,0x29f4a11,
3770         0x2142408,0x3694366,0x25136ab,0x03b5f1d },
3771       { 0x1d4abbc,0x1c3c689,0x13ea462,0x3cfc684,0x39b5dd8,0x2d4654b,
3772         0x09b0755,0x27d4f18,0x3f74d2e,0x03fbf2d } },
3773     /* 204 */
3774     { { 0x2119185,0x2525eae,0x1ba4bd0,0x0c2ab11,0x1d54e8c,0x294845e,
3775         0x2479dea,0x3602d24,0x17e87e0,0x0060069 },
3776       { 0x0afffb0,0x34fe37f,0x1240073,0x02eb895,0x06cf33c,0x2d7f7ef,
3777         0x1d763b5,0x04191e0,0x11e1ead,0x027e3f0 } },
3778     /* 205 */
3779     { { 0x269544c,0x0e85c57,0x3813158,0x19fc12d,0x20eaf85,0x1e2930c,
3780         0x22a8fd2,0x1a6a478,0x09d3d3a,0x02a74e0 },
3781       { 0x1a2da3b,0x30b0b16,0x0847936,0x3d86257,0x138ccbc,0x0f5421a,
3782         0x25244e6,0x23bdd79,0x1aee117,0x00c01ae } },
3783     /* 206 */
3784     { { 0x1eead28,0x07cac32,0x1fbc0bb,0x17627d3,0x17eef63,0x0b3a24e,
3785         0x0757fdb,0x3dd841d,0x3d745f8,0x002ae17 },
3786       { 0x25b4549,0x29f24cf,0x2f21ecd,0x1725e48,0x04be2bb,0x10ee010,
3787         0x1a1274b,0x10b0898,0x27511e9,0x02c48b5 } },
3788     /* 207 */
3789     { { 0x2a5ae7a,0x181ef99,0x0be33be,0x3e9dab7,0x101e703,0x3adb971,
3790         0x1043014,0x2ebb2be,0x1c1097d,0x027d667 },
3791       { 0x3f250ed,0x16dc603,0x20dc6d7,0x1d0d268,0x38eb915,0x02c89e8,
3792         0x1605a41,0x12de109,0x0e08a29,0x01f554a } },
3793     /* 208 */
3794     { { 0x0c26def,0x163d988,0x2d1ef0f,0x3a960ac,0x1025585,0x0738e20,
3795         0x27d79b0,0x05cc3ef,0x201303f,0x00a333a },
3796       { 0x1644ba5,0x2af345e,0x30b8d1d,0x3a01bff,0x31fc643,0x1acf85e,
3797         0x0a76fc6,0x04efe98,0x348a1d0,0x03062eb } },
3798     /* 209 */
3799     { { 0x1c4216d,0x18e3217,0x02ac34e,0x19c8185,0x200c010,0x17d4192,
3800         0x13a1719,0x165af51,0x09db7a9,0x0277be0 },
3801       { 0x3ab8d2c,0x2190b99,0x22b641e,0x0cd88de,0x3b42404,0x1310862,
3802         0x106a6d6,0x23395f5,0x0b06880,0x000d5fe } },
3803     /* 210 */
3804     { { 0x0d2cc88,0x36f9913,0x339d8e9,0x237c2e3,0x0cc61c2,0x34c2832,
3805         0x309874c,0x2621d28,0x2dd1b48,0x0392806 },
3806       { 0x17cd8f9,0x07bab3d,0x0c482ed,0x0faf565,0x31b767d,0x2f4bde1,
3807         0x295c717,0x330c29c,0x179ce10,0x0119b5f } },
3808     /* 211 */
3809     { { 0x1ada2c7,0x0c624a7,0x227d47d,0x30e3e6a,0x14fa0a6,0x0829678,
3810         0x24fd288,0x2b46a43,0x122451e,0x0319ca9 },
3811       { 0x186b655,0x01f3217,0x0af1306,0x0efe6b5,0x2f0235d,0x1c45ca9,
3812         0x2086805,0x1d44e66,0x0faf2a6,0x0178f59 } },
3813     /* 212 */
3814     { { 0x33b4416,0x10431e6,0x2d99aa6,0x217aac9,0x0cd8fcf,0x2d95a9d,
3815         0x3ff74ad,0x10bf17a,0x295eb8e,0x01b229e },
3816       { 0x02a63bd,0x182e9ec,0x004710c,0x00e2e3c,0x06b2f23,0x04b642c,
3817         0x2c37383,0x32a4631,0x022ad82,0x00d22b9 } },
3818     /* 213 */
3819     { { 0x0cda2fb,0x1d198d7,0x26d27f4,0x286381c,0x022acca,0x24ac7c8,
3820         0x2df7824,0x0b4ba16,0x1e0d9ef,0x03041d3 },
3821       { 0x29a65b3,0x0f3912b,0x151bfcf,0x2b0175c,0x0fd71e4,0x39aa5e2,
3822         0x311f50c,0x13ff351,0x3dbc9e5,0x03eeb7e } },
3823     /* 214 */
3824     { { 0x0a99363,0x0fc7348,0x2775171,0x23db3c8,0x2b91565,0x134d66c,
3825         0x0175cd2,0x1bf365a,0x2b48371,0x02dfe5d },
3826       { 0x16dbf74,0x2389357,0x2f36575,0x3f5c70e,0x38d23ba,0x090f7f8,
3827         0x3477600,0x3201523,0x32ecafc,0x03d3506 } },
3828     /* 215 */
3829     { { 0x1abd48d,0x073ca3f,0x38a451f,0x0d8cb01,0x1ce81be,0x05c51ba,
3830         0x0e29741,0x03c41ab,0x0eae016,0x0060209 },
3831       { 0x2e58358,0x1da62d9,0x2358038,0x14b39b2,0x1635687,0x39079b1,
3832         0x380e345,0x1b49608,0x23983cf,0x019f97d } },
3833     /* 216 */
3834     { { 0x34899ef,0x332e373,0x04c0f89,0x3c27aed,0x1949015,0x09663b2,
3835         0x2f9276b,0x07f1951,0x09a04c1,0x027fbde },
3836       { 0x3d2a071,0x19fb3d4,0x1b096d3,0x1fe9146,0x3b10e1a,0x0478bbb,
3837         0x2b3fb06,0x1388329,0x181a99c,0x02f2030 } },
3838     /* 217 */
3839     { { 0x1eb82e6,0x14dbe39,0x3920972,0x31fd5b2,0x21a484f,0x02d7697,
3840         0x0e21715,0x37c431e,0x2629f8c,0x01249c3 },
3841       { 0x26b50ad,0x26deefa,0x0ffc1a3,0x30688e2,0x39a0284,0x041c65e,
3842         0x03eb178,0x0bdfd50,0x2f96137,0x034bb94 } },
3843     /* 218 */
3844     { { 0x0e0362a,0x334a162,0x194dd37,0x29e3e97,0x2442fa8,0x10d2949,
3845         0x3836e5a,0x2dccebf,0x0bee5ab,0x037ed1e },
3846       { 0x33eede6,0x3c739d9,0x2f04a91,0x350ad6c,0x3a5390a,0x14c368b,
3847         0x26f7bf5,0x11ce979,0x0b408df,0x0366850 } },
3848     /* 219 */
3849     { { 0x28ea498,0x0886d5b,0x2e090e0,0x0a4d58f,0x2623478,0x0d74ab7,
3850         0x2b83913,0x12c6b81,0x18d623f,0x01d8301 },
3851       { 0x198aa79,0x26d6330,0x3a7f0b8,0x34bc1ea,0x2f74890,0x378955a,
3852         0x204110f,0x0102538,0x02d8f19,0x01c5066 } },
3853     /* 220 */
3854     { { 0x14b0f45,0x2838cd3,0x14e16f0,0x0e0e4aa,0x2d9280b,0x0f18757,
3855         0x3324c6b,0x1391ceb,0x1ce89d5,0x00ebe74 },
3856       { 0x0930371,0x3de6048,0x3097fd8,0x1308705,0x3eda266,0x3108c26,
3857         0x1545dcd,0x1f7583a,0x1c37395,0x02c7e05 } },
3858     /* 221 */
3859     { { 0x1fec44a,0x2a9e3a2,0x0caf84f,0x11cf2a9,0x0c8c2ae,0x06da989,
3860         0x1c807dc,0x3c149a4,0x1141543,0x02906bb },
3861       { 0x15ffe04,0x0d4e65f,0x2e20424,0x37d896d,0x18bacb2,0x1e05ddd,
3862         0x1660be8,0x183be17,0x1dd86fb,0x035ba70 } },
3863     /* 222 */
3864     { { 0x2853264,0x0ba5fb1,0x0a0b3aa,0x2df88c1,0x2771533,0x23aba6f,
3865         0x112bb7b,0x3e3086e,0x210ae9b,0x027271b },
3866       { 0x030b74c,0x0269678,0x1e90a23,0x135a98c,0x24ed749,0x126de7c,
3867         0x344b23a,0x186da27,0x19640fa,0x0159af5 } },
3868     /* 223 */
3869     { { 0x18061f3,0x3004630,0x3c70066,0x34df20f,0x1190b25,0x1c9cc91,
3870         0x1fc8e02,0x0d17bc1,0x390f525,0x033cb1c },
3871       { 0x0eb30cf,0x2f3ad04,0x303aa09,0x2e835dd,0x1cfd2eb,0x143fc95,
3872         0x02c43a1,0x025e7a1,0x3558aa2,0x000bd45 } },
3873     /* 224 */
3874     { { 0x1db7d07,0x3bde52b,0x1500396,0x1089115,0x20b4fc7,0x1e2a8f3,
3875         0x3f8eacc,0x365f7eb,0x1a5e8d4,0x0053a6b },
3876       { 0x37079e2,0x120284b,0x000edaa,0x33792c2,0x145baa3,0x20e055f,
3877         0x365e2d7,0x26ba005,0x3ab8e9d,0x0282b53 } },
3878     /* 225 */
3879     { { 0x2653618,0x2dd8852,0x2a5f0bf,0x0f0c7aa,0x2187281,0x1252757,
3880         0x13e7374,0x3b47855,0x0b86e56,0x02f354c },
3881       { 0x2e9c47b,0x2fa14cc,0x19ab169,0x3fad401,0x0dc2776,0x24afeed,
3882         0x3a97611,0x0d07736,0x3cf6979,0x02424a0 } },
3883     /* 226 */
3884     { { 0x2e81a13,0x000c91d,0x123967b,0x265885c,0x29bee1a,0x0cb8675,
3885         0x2d361bd,0x1526823,0x3c9ace1,0x00d7bad },
3886       { 0x24e5bdc,0x02b969f,0x2c6e128,0x34edb3b,0x12dcd2c,0x3899af0,
3887         0x24224c6,0x3a1914b,0x0f4448a,0x026a2cb } },
3888     /* 227 */
3889     { { 0x1d03b59,0x1c6fc82,0x32abf64,0x28ed96b,0x1c90e62,0x2f57bb2,
3890         0x3ff168e,0x04de7fd,0x0f4d449,0x01af6d8 },
3891       { 0x255bc30,0x2bfaf22,0x3fe0dad,0x0584025,0x1c79ead,0x3078ef7,
3892         0x2197414,0x022a50b,0x0fd94ba,0x0007b0f } },
3893     /* 228 */
3894     { { 0x09485c2,0x09dfaf7,0x10c7ba6,0x1e48bec,0x248cc9a,0x028a362,
3895         0x21d60f7,0x193d93d,0x1c04754,0x0346b2c },
3896       { 0x2f36612,0x240ac49,0x0d8bd26,0x13b8186,0x259c3a4,0x020d5fb,
3897         0x38a8133,0x09b0937,0x39d4056,0x01f7341 } },
3898     /* 229 */
3899     { { 0x05a4b48,0x1f534fc,0x07725ce,0x148dc8c,0x2adcd29,0x04aa456,
3900         0x0f79718,0x066e346,0x189377d,0x002fd4d },
3901       { 0x068ea73,0x336569b,0x184d35e,0x32a08e9,0x3c7f3bb,0x11ce9c8,
3902         0x3674c6f,0x21bf27e,0x0d9e166,0x034a2f9 } },
3903     /* 230 */
3904     { { 0x0fa8e4b,0x2e6418e,0x18fc5d2,0x1ba24ff,0x0559f18,0x0dbedbf,
3905         0x2de2aa4,0x22338e9,0x3aa510f,0x035d801 },
3906       { 0x23a4988,0x02aad94,0x02732d1,0x111d374,0x0b455cf,0x0d01c9e,
3907         0x067082a,0x2ec05fd,0x368b303,0x03cad4b } },
3908     /* 231 */
3909     { { 0x035b4ca,0x1fabea6,0x1cbc0d5,0x3f2ed9a,0x02d2232,0x1990c66,
3910         0x2eb680c,0x3b4ea3b,0x18ecc5a,0x03636fa },
3911       { 0x1a02709,0x26f8ff1,0x1fa8cba,0x397d6e8,0x230be68,0x043aa14,
3912         0x3d43cdf,0x25c17fa,0x3a3ee55,0x0380564 } },
3913     /* 232 */
3914     { { 0x275a0a6,0x16bd43a,0x0033d3e,0x2b15e16,0x2512226,0x005d901,
3915         0x26d50fd,0x3bc19bf,0x3b1aeb8,0x02bfb01 },
3916       { 0x0bb0a31,0x26559e0,0x1aae7fb,0x330dcc2,0x16f1af3,0x06afce2,
3917         0x13a15a0,0x2ff7645,0x3546e2d,0x029c6e4 } },
3918     /* 233 */
3919     { { 0x0f593d2,0x384b806,0x122bbf8,0x0a281e0,0x1d1a904,0x2e93cab,
3920         0x0505db0,0x08f6454,0x05c6285,0x014e880 },
3921       { 0x3f2b935,0x22d8e79,0x161a07c,0x16b060a,0x02bff97,0x146328b,
3922         0x3ceea77,0x238f61a,0x19b3d58,0x02fd1f4 } },
3923     /* 234 */
3924     { { 0x17665d5,0x259e9f7,0x0de5672,0x15cbcbd,0x34e3030,0x035240f,
3925         0x0005ae8,0x286d851,0x07f39c9,0x000070b },
3926       { 0x1efc6d6,0x2a0051a,0x2724143,0x2a9ef1e,0x0c810bd,0x1e05429,
3927         0x25670ba,0x2e66d7d,0x0e786ff,0x03f6b7e } },
3928     /* 235 */
3929     { { 0x3c00785,0x232e23f,0x2b67fd3,0x244ed23,0x077fa75,0x3cda3ef,
3930         0x14d055b,0x0f25011,0x24d5aa4,0x00ea0e3 },
3931       { 0x297bb9a,0x198ca4f,0x14d9561,0x18d1076,0x39eb933,0x2b6caa0,
3932         0x1591a60,0x0768d45,0x257873e,0x00f36e0 } },
3933     /* 236 */
3934     { { 0x1e77eab,0x0502a5f,0x0109137,0x0350592,0x3f7e1c5,0x3ac7437,
3935         0x2dcad2c,0x1fee9d8,0x089f1f5,0x0169833 },
3936       { 0x0d45673,0x0d8e090,0x065580b,0x065644f,0x11b82be,0x3592dd0,
3937         0x3284b8d,0x23f0015,0x16fdbfd,0x0248bfd } },
3938     /* 237 */
3939     { { 0x1a129a1,0x1977bb2,0x0e041b2,0x15f30a1,0x0a5b1ce,0x3afef8f,
3940         0x380c46c,0x3358810,0x27df6c5,0x01ca466 },
3941       { 0x3b90f9a,0x3d14ea3,0x031b298,0x02e2390,0x2d719c0,0x25bc615,
3942         0x2c0e777,0x0226b8c,0x3803624,0x0179e45 } },
3943     /* 238 */
3944     { { 0x363cdfb,0x1bb155f,0x24fd5c1,0x1c7c72b,0x28e6a35,0x18165f2,
3945         0x226bea5,0x0beaff3,0x371e24c,0x0138294 },
3946       { 0x1765357,0x29034e9,0x22b4276,0x11035ce,0x23c89af,0x074468c,
3947         0x3370ae4,0x013bae3,0x018d566,0x03d7fde } },
3948     /* 239 */
3949     { { 0x209df21,0x0f8ff86,0x0e47fbf,0x23b99ba,0x126d5d2,0x2722405,
3950         0x16bd0a2,0x1799082,0x0e9533f,0x039077c },
3951       { 0x3ba9e3f,0x3f6902c,0x1895305,0x3ac9813,0x3f2340c,0x3c0d9f1,
3952         0x26e1927,0x0557c21,0x16eac4f,0x023b75f } },
3953     /* 240 */
3954     { { 0x3fc8ff3,0x0770382,0x342fc9a,0x0afa4db,0x314efd8,0x328e07b,
3955         0x016f7cc,0x3ba599c,0x1caed8a,0x0050cb0 },
3956       { 0x0b23c26,0x2120a5c,0x3273ec6,0x1cc1cd6,0x2a64fe8,0x2bbc3d6,
3957         0x09f6e5e,0x34b1b8e,0x00b5ac8,0x032bbd2 } },
3958     /* 241 */
3959     { { 0x1315922,0x1725e1d,0x0ca5524,0x1c4c18f,0x3d82951,0x193bcb2,
3960         0x0e60d0b,0x388dbcf,0x37e8efa,0x0342e85 },
3961       { 0x1b3af60,0x26ba3ec,0x220e53a,0x394f4b6,0x01a796a,0x3e7bbca,
3962         0x163605d,0x2b85807,0x17c1c54,0x03cc725 } },
3963     /* 242 */
3964     { { 0x1cc4597,0x1635492,0x2028c0f,0x2c2eb82,0x2dc5015,0x0d2a052,
3965         0x05fc557,0x1f0ebbf,0x0cb96e1,0x0004d01 },
3966       { 0x1a824bf,0x3896172,0x2ed7b29,0x178007a,0x0d59318,0x07bda2b,
3967         0x2ee6826,0x0f9b235,0x04b9193,0x01bcddf } },
3968     /* 243 */
3969     { { 0x0333fd2,0x0eeb46a,0x15b89f9,0x00968aa,0x2a89302,0x2bdd6b3,
3970         0x1e5037e,0x2541884,0x24ed2d0,0x01b6e8f },
3971       { 0x04399cd,0x3be6334,0x3adea48,0x1bb9adc,0x31811c6,0x05fb2bc,
3972         0x360752c,0x3d29dcb,0x3423bec,0x03c4f3c } },
3973     /* 244 */
3974     { { 0x119e2eb,0x2e7b02a,0x0f68cee,0x257d8b0,0x183a9a1,0x2ae88a6,
3975         0x3a3bb67,0x2eb4f3e,0x1a9274b,0x0320fea },
3976       { 0x2fa1ce0,0x346c2d8,0x2fbf0d7,0x3d4d063,0x0e58b60,0x09c1bc1,
3977         0x28ef9e5,0x09a0efe,0x0f45d70,0x02d275c } },
3978     /* 245 */
3979     { { 0x2d5513b,0x31d443e,0x1e2d914,0x3b2c5d4,0x105f32e,0x27ee756,
3980         0x050418d,0x3c73db6,0x1bb0c30,0x01673eb },
3981       { 0x1cb7fd6,0x1eb08d5,0x26a3e16,0x2e20810,0x0249367,0x029e219,
3982         0x2ec58c9,0x12d9fab,0x362354a,0x016eafc } },
3983     /* 246 */
3984     { { 0x2424865,0x260747b,0x177f37c,0x1e3cb95,0x08b0028,0x2783016,
3985         0x2970f1b,0x323c1c0,0x2a79026,0x0186231 },
3986       { 0x0f244da,0x26866f4,0x087306f,0x173ec20,0x31ecced,0x3c84d8d,
3987         0x070f9b9,0x2e764d5,0x075df50,0x0264ff9 } },
3988     /* 247 */
3989     { { 0x32c3609,0x0c737e6,0x14ea68e,0x300b11b,0x184eb19,0x29dd440,
3990         0x09ec1a9,0x185adeb,0x0664c80,0x0207dd9 },
3991       { 0x1fbe978,0x30a969d,0x33561d7,0x34fc60e,0x36743fe,0x00774af,
3992         0x0d1f045,0x018360e,0x12a5fe9,0x01592a0 } },
3993     /* 248 */
3994     { { 0x2817d1d,0x2993d3e,0x2e0f7a5,0x112faa0,0x255f968,0x355fe6a,
3995         0x3f5a0fc,0x075b2d7,0x3cf00e5,0x0089afc },
3996       { 0x32833cf,0x06a7e4b,0x09a8d6d,0x1693d3e,0x320a0a3,0x3cfdfdd,
3997         0x136c498,0x1e0d845,0x347ff25,0x01a1de7 } },
3998     /* 249 */
3999     { { 0x3043d08,0x030705c,0x20fa79b,0x1d07f00,0x0a54467,0x29b49b4,
4000         0x367e289,0x0b82f4d,0x0d1eb09,0x025ef2c },
4001       { 0x32ed3c3,0x1baaa3c,0x3c482ab,0x146ca06,0x3c8a4f1,0x3e85e3c,
4002         0x1bf4f3b,0x1195534,0x3e80a78,0x02a1cbf } },
4003     /* 250 */
4004     { { 0x32b2086,0x2de4d68,0x3486b1a,0x03a0583,0x2e1eb71,0x2dab9af,
4005         0x10cd913,0x28daa6f,0x3fcb732,0x000a04a },
4006       { 0x3605318,0x3f5f2b3,0x2d1da63,0x143f7f5,0x1646e5d,0x040b586,
4007         0x1683982,0x25abe87,0x0c9fe53,0x001ce47 } },
4008     /* 251 */
4009     { { 0x380d02b,0x055fc22,0x3f7fc50,0x3458a1d,0x26b8333,0x23550ab,
4010         0x0a1af87,0x0a821eb,0x2dc7e6d,0x00d574a },
4011       { 0x07386e1,0x3ccd68a,0x3275b41,0x253e390,0x2fd272a,0x1e6627a,
4012         0x2ca2cde,0x0e9e4a1,0x1e37c2a,0x00f70ac } },
4013     /* 252 */
4014     { { 0x0581352,0x2748701,0x02bed68,0x094dd9e,0x30a00c8,0x3fb5c07,
4015         0x3bd5909,0x211ac80,0x1103ccd,0x0311e1a },
4016       { 0x0c768ed,0x29dc209,0x36575db,0x009a107,0x272feea,0x2b33383,
4017         0x313ed56,0x134c9cc,0x168d5bb,0x033310a } },
4018     /* 253 */
4019     { { 0x17620b9,0x143784f,0x256a94e,0x229664a,0x1d89a5c,0x1d521f2,
4020         0x0076406,0x1c73f70,0x342aa48,0x03851fa },
4021       { 0x0f3ae46,0x2ad3bab,0x0fbe274,0x3ed40d4,0x2fd4936,0x232103a,
4022         0x2afe474,0x25b8f7c,0x047080e,0x008e6b0 } },
4023     /* 254 */
4024     { { 0x3fee8d4,0x347cd4a,0x0fec481,0x33fe9ec,0x0ce80b5,0x33a6bcf,
4025         0x1c4c9e2,0x3967441,0x1a3f5f7,0x03157e8 },
4026       { 0x257c227,0x1bc53a0,0x200b318,0x0fcd0af,0x2c5b165,0x2a413ec,
4027         0x2fc998a,0x2da6426,0x19cd4f4,0x0025336 } },
4028     /* 255 */
4029     { { 0x303beba,0x2072135,0x32918a9,0x140cb3a,0x08631d1,0x0ef527b,
4030         0x05f2c9e,0x2b4ce91,0x0b642ab,0x02e428c },
4031       { 0x0a5abf9,0x15013ed,0x3603b46,0x30dd76d,0x3004750,0x28d7627,
4032         0x1a42ccc,0x093ddbe,0x39a1b79,0x00067e2 } },
4033 };
4034 
4035 /* Multiply the base point of P256 by the scalar and return the result.
4036  * If map is true then convert result to affine co-ordinates.
4037  *
4038  * r     Resulting point.
4039  * k     Scalar to multiply by.
4040  * map   Indicates whether to convert result to affine.
4041  * heap  Heap to use for allocation.
4042  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
4043  */
sp_256_ecc_mulmod_base_10(sp_point * r,const sp_digit * k,int map,void * heap)4044 static int sp_256_ecc_mulmod_base_10(sp_point* r, const sp_digit* k,
4045         int map, void* heap)
4046 {
4047     return sp_256_ecc_mulmod_stripe_10(r, &p256_base, p256_table,
4048                                       k, map, heap);
4049 }
4050 
4051 #endif
4052 
4053 
4054 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
4055 #endif
4056 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
4057 /* Multiply a by scalar b into r. (r = a * b)
4058  *
4059  * r  A single precision integer.
4060  * a  A single precision integer.
4061  * b  A scalar.
4062  */
sp_256_mul_d_10(sp_digit * r,const sp_digit * a,sp_digit b)4063 SP_NOINLINE static void sp_256_mul_d_10(sp_digit* r, const sp_digit* a,
4064     sp_digit b)
4065 {
4066 #ifdef WOLFSSL_SP_SMALL
4067     int64_t tb = b;
4068     int64_t t = 0;
4069     int i;
4070 
4071     for (i = 0; i < 10; i++) {
4072         t += tb * a[i];
4073         r[i] = t & 0x3ffffff;
4074         t >>= 26;
4075     }
4076     r[10] = (sp_digit)t;
4077 #else
4078     int64_t tb = b;
4079     int64_t t[10];
4080 
4081     t[ 0] = Q6_P_mpy_RR(tb, a[0]);
4082     t[ 1] = Q6_P_mpy_RR(tb, a[1]);
4083     t[ 2] = Q6_P_mpy_RR(tb, a[2]);
4084     t[ 3] = Q6_P_mpy_RR(tb, a[3]);
4085     t[ 4] = Q6_P_mpy_RR(tb, a[4]);
4086     t[ 5] = Q6_P_mpy_RR(tb, a[5]);
4087     t[ 6] = Q6_P_mpy_RR(tb, a[6]);
4088     t[ 7] = Q6_P_mpy_RR(tb, a[7]);
4089     t[ 8] = Q6_P_mpy_RR(tb, a[8]);
4090     t[ 9] = Q6_P_mpy_RR(tb, a[9]);
4091     r[ 0] =                           Q6_R_and_RR(t[ 0], 0x3ffffff);
4092     r[ 1] = (sp_digit)(t[ 0] >> 26) + Q6_R_and_RR(t[ 1], 0x3ffffff);
4093     r[ 2] = (sp_digit)(t[ 1] >> 26) + Q6_R_and_RR(t[ 2], 0x3ffffff);
4094     r[ 3] = (sp_digit)(t[ 2] >> 26) + Q6_R_and_RR(t[ 3], 0x3ffffff);
4095     r[ 4] = (sp_digit)(t[ 3] >> 26) + Q6_R_and_RR(t[ 4], 0x3ffffff);
4096     r[ 5] = (sp_digit)(t[ 4] >> 26) + Q6_R_and_RR(t[ 5], 0x3ffffff);
4097     r[ 6] = (sp_digit)(t[ 5] >> 26) + Q6_R_and_RR(t[ 6], 0x3ffffff);
4098     r[ 7] = (sp_digit)(t[ 6] >> 26) + Q6_R_and_RR(t[ 7], 0x3ffffff);
4099     r[ 8] = (sp_digit)(t[ 7] >> 26) + Q6_R_and_RR(t[ 8], 0x3ffffff);
4100     r[ 9] = (sp_digit)(t[ 8] >> 26) + Q6_R_and_RR(t[ 9], 0x3ffffff);
4101     r[10] = (sp_digit)(t[ 9] >> 26);
4102 #endif /* WOLFSSL_SP_SMALL */
4103 }
4104 
4105 #ifdef WOLFSSL_SP_DIV_32
sp_256_div_word_10(sp_digit d1,sp_digit d0,sp_digit dv)4106 static WC_INLINE sp_digit sp_256_div_word_10(sp_digit d1, sp_digit d0,
4107     sp_digit dv)
4108 {
4109     sp_digit d, r, t, dv;
4110     int64_t t0, t1;
4111 
4112     /* dv has 14 bits. */
4113     dv = (div >> 12) + 1;
4114     /* All 26 bits from d1 and top 5 bits from d0. */
4115     d = (d1 << 5) | (d0 >> 21);
4116     r = d / dv;
4117     d -= r * dv;
4118     /* Up to 17 bits in r */
4119     /* Next 9 bits from d0. */
4120     d <<= 9;
4121     r <<= 9;
4122     d |= (d0 >> 12) & ((1 << 9) - 1);
4123     t = d / dv;
4124     d -= t * dv;
4125     r += t;
4126     /* Up to 26 bits in r */
4127 
4128     /* Handle rounding error with dv - top part */
4129     t0 = ((int64_t)d1 << 26) + d0;
4130     t1 = (int64_t)r * dv;
4131     t1 = t0 - t1;
4132     t = (sp_digit)(t1 >> 12) / dv;
4133     r += t;
4134 
4135     /* Handle rounding error with dv - bottom 32 bits */
4136     t1 = (sp_digit)t0 - (r * dv);
4137     t = (sp_digit)t1 / dv;
4138     r += t;
4139 
4140     return r;
4141 }
4142 #endif /* WOLFSSL_SP_DIV_32 */
4143 
4144 /* Divide d in a and put remainder into r (m*d + r = a)
4145  * m is not calculated as it is not needed at this time.
4146  *
4147  * a  Number to be divided.
4148  * d  Number to divide with.
4149  * m  Multiplier result.
4150  * r  Remainder from the division.
4151  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
4152  */
sp_256_div_10(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)4153 static int sp_256_div_10(const sp_digit* a, const sp_digit* d, sp_digit* m,
4154         sp_digit* r)
4155 {
4156     int i;
4157 #ifndef WOLFSSL_SP_DIV_32
4158     int64_t d1;
4159 #endif
4160     sp_digit dv, r1;
4161 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4162     sp_digit* td;
4163 #else
4164     sp_digit t1d[20], t2d[10 + 1];
4165 #endif
4166     sp_digit* t1;
4167     sp_digit* t2;
4168     int err = MP_OKAY;
4169 
4170     (void)m;
4171 
4172 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4173     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (3 * 10 + 1), NULL,
4174                                                        DYNAMIC_TYPE_TMP_BUFFER);
4175     if (td == NULL) {
4176         err = MEMORY_E;
4177     }
4178 #endif
4179 
4180     if (err == MP_OKAY) {
4181 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4182         t1 = td;
4183         t2 = td + 2 * 10;
4184 #else
4185         t1 = t1d;
4186         t2 = t2d;
4187 #endif
4188 
4189         dv = d[9];
4190         XMEMCPY(t1, a, sizeof(*t1) * 2U * 10U);
4191         for (i=9; i>=0; i--) {
4192             t1[10 + i] += t1[10 + i - 1] >> 26;
4193             t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
4194 #ifndef WOLFSSL_SP_DIV_32
4195             d1 = t1[10 + i];
4196             d1 <<= 26;
4197             d1 += t1[10 + i - 1];
4198             r1 = (sp_digit)(d1 / dv);
4199 #else
4200             r1 = sp_256_div_word_10(t1[10 + i], t1[10 + i - 1], dv);
4201 #endif
4202 
4203             sp_256_mul_d_10(t2, d, r1);
4204             (void)sp_256_sub_10(&t1[i], &t1[i], t2);
4205             t1[10 + i] -= t2[10];
4206             t1[10 + i] += t1[10 + i - 1] >> 26;
4207             t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
4208             r1 = (((-t1[10 + i]) << 26) - t1[10 + i - 1]) / dv;
4209             r1++;
4210             sp_256_mul_d_10(t2, d, r1);
4211             (void)sp_256_add_10(&t1[i], &t1[i], t2);
4212             t1[10 + i] += t1[10 + i - 1] >> 26;
4213             t1[10 + i - 1] = Q6_R_and_RR(t1[10 + i - 1], 0x3ffffff);
4214         }
4215         t1[10 - 1] += t1[10 - 2] >> 26;
4216         t1[10 - 2] &= 0x3ffffff;
4217         d1 = t1[10 - 1];
4218         r1 = (sp_digit)(d1 / dv);
4219 
4220         sp_256_mul_d_10(t2, d, r1);
4221         (void)sp_256_sub_10(t1, t1, t2);
4222         XMEMCPY(r, t1, sizeof(*r) * 2U * 10U);
4223         for (i=0; i<8; i++) {
4224             r[i+1] += r[i] >> 26;
4225             r[i] &= 0x3ffffff;
4226         }
4227         sp_256_cond_add_10(r, r, d, 0 - ((r[9] < 0) ?
4228                     (sp_digit)1 : (sp_digit)0));
4229     }
4230 
4231 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4232     if (td != NULL) {
4233         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4234     }
4235 #endif
4236 
4237     return err;
4238 }
4239 
4240 /* Reduce a modulo m into r. (r = a mod m)
4241  *
4242  * r  A single precision number that is the reduced result.
4243  * a  A single precision number that is to be reduced.
4244  * m  A single precision number that is the modulus to reduce with.
4245  * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise.
4246  */
sp_256_mod_10(sp_digit * r,const sp_digit * a,const sp_digit * m)4247 static int sp_256_mod_10(sp_digit* r, const sp_digit* a, const sp_digit* m)
4248 {
4249     return sp_256_div_10(a, m, NULL, r);
4250 }
4251 
4252 #endif
4253 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
4254 #ifdef WOLFSSL_SP_SMALL
4255 /* Order-2 for the P256 curve. */
4256 static const uint32_t p256_order_2[8] = {
4257     0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
4258     0x00000000U,0xffffffffU
4259 };
4260 #else
4261 /* The low half of the order-2 of the P256 curve. */
4262 static const uint32_t p256_order_low[4] = {
4263     0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
4264 };
4265 #endif /* WOLFSSL_SP_SMALL */
4266 
4267 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
4268  *
4269  * r  Result of the multiplication.
4270  * a  First operand of the multiplication.
4271  * b  Second operand of the multiplication.
4272  */
sp_256_mont_mul_order_10(sp_digit * r,const sp_digit * a,const sp_digit * b)4273 static void sp_256_mont_mul_order_10(sp_digit* r, const sp_digit* a, const sp_digit* b)
4274 {
4275     sp_256_mul_10(r, a, b);
4276     sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
4277 }
4278 
4279 /* Square number mod the order of P256 curve. (r = a * a mod order)
4280  *
4281  * r  Result of the squaring.
4282  * a  Number to square.
4283  */
sp_256_mont_sqr_order_10(sp_digit * r,const sp_digit * a)4284 static void sp_256_mont_sqr_order_10(sp_digit* r, const sp_digit* a)
4285 {
4286     sp_256_sqr_10(r, a);
4287     sp_256_mont_reduce_order_10(r, p256_order, p256_mp_order);
4288 }
4289 
4290 #ifndef WOLFSSL_SP_SMALL
4291 /* Square number mod the order of P256 curve a number of times.
4292  * (r = a ^ n mod order)
4293  *
4294  * r  Result of the squaring.
4295  * a  Number to square.
4296  */
sp_256_mont_sqr_n_order_10(sp_digit * r,const sp_digit * a,int n)4297 static void sp_256_mont_sqr_n_order_10(sp_digit* r, const sp_digit* a, int n)
4298 {
4299     int i;
4300 
4301     sp_256_mont_sqr_order_10(r, a);
4302     for (i=1; i<n; i++) {
4303         sp_256_mont_sqr_order_10(r, r);
4304     }
4305 }
4306 #endif /* !WOLFSSL_SP_SMALL */
4307 
4308 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
4309  * (r = 1 / a mod order)
4310  *
4311  * r   Inverse result.
4312  * a   Number to invert.
4313  * td  Temporary data.
4314  */
sp_256_mont_inv_order_10(sp_digit * r,const sp_digit * a,sp_digit * td)4315 static void sp_256_mont_inv_order_10(sp_digit* r, const sp_digit* a,
4316         sp_digit* td)
4317 {
4318 #ifdef WOLFSSL_SP_SMALL
4319     sp_digit* t = td;
4320     int i;
4321 
4322     XMEMCPY(t, a, sizeof(sp_digit) * 10);
4323     for (i=254; i>=0; i--) {
4324         sp_256_mont_sqr_order_10(t, t);
4325         if ((p256_order_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
4326             sp_256_mont_mul_order_10(t, t, a);
4327         }
4328     }
4329     XMEMCPY(r, t, sizeof(sp_digit) * 10U);
4330 #else
4331     sp_digit* t = td;
4332     sp_digit* t2 = td + 2 * 10;
4333     sp_digit* t3 = td + 4 * 10;
4334     int i;
4335 
4336 
4337     /* t = a^2 */
4338     sp_256_mont_sqr_order_10(t, a);
4339     /* t = a^3 = t * a */
4340     sp_256_mont_mul_order_10(t, t, a);
4341     /* t2= a^c = t ^ 2 ^ 2 */
4342     sp_256_mont_sqr_n_order_10(t2, t, 2);
4343     /* t3= a^f = t2 * t */
4344     sp_256_mont_mul_order_10(t3, t2, t);
4345     /* t2= a^f0 = t3 ^ 2 ^ 4 */
4346     sp_256_mont_sqr_n_order_10(t2, t3, 4);
4347     /* t = a^ff = t2 * t3 */
4348     sp_256_mont_mul_order_10(t, t2, t3);
4349     /* t3= a^ff00 = t ^ 2 ^ 8 */
4350     sp_256_mont_sqr_n_order_10(t2, t, 8);
4351     /* t = a^ffff = t2 * t */
4352     sp_256_mont_mul_order_10(t, t2, t);
4353     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
4354     sp_256_mont_sqr_n_order_10(t2, t, 16);
4355     /* t = a^ffffffff = t2 * t */
4356     sp_256_mont_mul_order_10(t, t2, t);
4357     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
4358     sp_256_mont_sqr_n_order_10(t2, t, 64);
4359     /* t2= a^ffffffff00000000ffffffff = t2 * t */
4360     sp_256_mont_mul_order_10(t2, t2, t);
4361     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
4362     sp_256_mont_sqr_n_order_10(t2, t2, 32);
4363     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
4364     sp_256_mont_mul_order_10(t2, t2, t);
4365     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
4366 
4367     for (i=127; i>=112; i--) {
4368         sp_256_mont_sqr_order_10(t2, t2);
4369         if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
4370             sp_256_mont_mul_order_10(t2, t2, a);
4371         }
4372     }
4373     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
4374     sp_256_mont_sqr_n_order_10(t2, t2, 4);
4375     sp_256_mont_mul_order_10(t2, t2, t3);
4376     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
4377     for (i=107; i>=64; i--) {
4378         sp_256_mont_sqr_order_10(t2, t2);
4379         if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
4380             sp_256_mont_mul_order_10(t2, t2, a);
4381         }
4382     }
4383     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
4384     sp_256_mont_sqr_n_order_10(t2, t2, 4);
4385     sp_256_mont_mul_order_10(t2, t2, t3);
4386     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
4387     for (i=59; i>=32; i--) {
4388         sp_256_mont_sqr_order_10(t2, t2);
4389         if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
4390             sp_256_mont_mul_order_10(t2, t2, a);
4391         }
4392     }
4393     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
4394     sp_256_mont_sqr_n_order_10(t2, t2, 4);
4395     sp_256_mont_mul_order_10(t2, t2, t3);
4396     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
4397     for (i=27; i>=0; i--) {
4398         sp_256_mont_sqr_order_10(t2, t2);
4399         if (((sp_digit)p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
4400             sp_256_mont_mul_order_10(t2, t2, a);
4401         }
4402     }
4403     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
4404     sp_256_mont_sqr_n_order_10(t2, t2, 4);
4405     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
4406     sp_256_mont_mul_order_10(r, t2, t3);
4407 #endif /* WOLFSSL_SP_SMALL */
4408 }
4409 
4410 #endif /* HAVE_ECC_SIGN || HAVE_ECC_VERIFY */
4411 
4412 #ifdef HAVE_ECC_VERIFY
4413 
4414 
4415 /* Verify the signature values with the hash and public key.
4416  *   e = Truncate(hash, 256)
4417  *   u1 = e/s mod order
4418  *   u2 = r/s mod order
4419  *   r == (u1.G + u2.Q)->x mod order
4420  * Optimization: Leave point in projective form.
4421  *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
4422  *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
4423  * The hash is truncated to the first 256 bits.
4424  *
4425  * hash     Hash to sign.
4426  * hashLen  Length of the hash data.
4427  * rng      Random number generator.
4428  * priv     Private part of key - scalar.
4429  * rm       First part of result as an mp_int.
4430  * sm       Sirst part of result as an mp_int.
4431  * heap     Heap to use for allocation.
4432  * returns RNG failures, MEMORY_E when memory allocation fails and
4433  * MP_OKAY on success.
4434  */
wolfSSL_DSP_ECC_Verify_256(remote_handle64 h,int32 * u1,int hashLen,int32 * r,int rSz,int32 * s,int sSz,int32 * x,int xSz,int32 * y,int ySz,int32 * z,int zSz,int * res)4435 int wolfSSL_DSP_ECC_Verify_256(remote_handle64 h, int32 *u1, int hashLen, int32* r, int rSz, int32* s, int sSz,
4436 	int32* x, int xSz, int32* y, int ySz, int32* z, int zSz, int* res)
4437 {
4438 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4439     sp_digit* d = NULL;
4440 #else
4441     sp_digit u2d[2*10] __attribute__((aligned(128)));
4442     sp_digit tmpd[2*10 * 5] __attribute__((aligned(128)));
4443     sp_point p1d;
4444     sp_point p2d;
4445 #endif
4446     sp_digit* u2 = NULL;
4447     sp_digit* tmp = NULL;
4448     sp_point* p1;
4449     sp_point* p2 = NULL;
4450     sp_digit carry;
4451     int32_t c;
4452     int err;
4453     void* heap = NULL;
4454 
4455     (void)h;
4456     (void)hashLen;
4457 
4458     err = sp_ecc_point_new(heap, p1d, p1);
4459     if (err == MP_OKAY) {
4460         err = sp_ecc_point_new(heap, p2d, p2);
4461     }
4462 
4463     if (err == MP_OKAY) {
4464         u2 = u2d;
4465         tmp = tmpd;
4466 
4467 	XMEMCPY(u2, r, 40);
4468 	XMEMCPY(p2->x, x, 40);
4469 	XMEMCPY(p2->y, y, 40);
4470 	XMEMCPY(p2->z, z, 40);
4471 
4472             sp_256_mul_10(s, s, p256_norm_order);
4473         err = sp_256_mod_10(s, s, p256_order);
4474     }
4475     if (err == MP_OKAY) {
4476         sp_256_norm_10(s);
4477         {
4478 
4479             sp_256_mont_inv_order_10(s, s, tmp);
4480             sp_256_mont_mul_order_10(u1, u1, s);
4481             sp_256_mont_mul_order_10(u2, u2, s);
4482         }
4483 
4484             err = sp_256_ecc_mulmod_base_10(p1, u1, 0, heap);
4485     }
4486     if (err == MP_OKAY) {
4487             err = sp_256_ecc_mulmod_10(p2, p2, u2, 0, heap);
4488     }
4489 
4490     if (err == MP_OKAY) {
4491             sp_256_proj_point_add_10(p1, p1, p2, tmp);
4492 
4493         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
4494         /* Reload r and convert to Montgomery form. */
4495 	XMEMCPY(u2, r, 40);
4496         err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
4497     }
4498 
4499     if (err == MP_OKAY) {
4500         /* u1 = r.z'.z' mod prime */
4501         sp_256_mont_sqr_10(p1->z, p1->z, p256_mod, p256_mp_mod);
4502         sp_256_mont_mul_10(u1, u2, p1->z, p256_mod, p256_mp_mod);
4503         *res = (int)(sp_256_cmp_10(p1->x, u1) == 0);
4504         if (*res == 0) {
4505             /* Reload r and add order. */
4506 	    XMEMCPY(u2, r, 40);
4507             carry = sp_256_add_10(u2, u2, p256_order);
4508             /* Carry means result is greater than mod and is not valid. */
4509             if (carry == 0) {
4510                 sp_256_norm_10(u2);
4511 
4512                 /* Compare with mod and if greater or equal then not valid. */
4513                 c = sp_256_cmp_10(u2, p256_mod);
4514                 if (c < 0) {
4515                     /* Convert to Montogomery form */
4516                     err = sp_256_mod_mul_norm_10(u2, u2, p256_mod);
4517                     if (err == MP_OKAY) {
4518                         /* u1 = (r + 1*order).z'.z' mod prime */
4519                         sp_256_mont_mul_10(u1, u2, p1->z, p256_mod,
4520                                                                   p256_mp_mod);
4521                         *res = (int)(sp_256_cmp_10(p1->x, u2) == 0);
4522                     }
4523                 }
4524             }
4525         }
4526     }
4527 
4528 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4529     if (d != NULL)
4530         XFREE(d, heap, DYNAMIC_TYPE_ECC);
4531 #endif
4532     sp_ecc_point_free(p1, 0, heap);
4533     sp_ecc_point_free(p2, 0, heap);
4534 
4535     return err;
4536 }
4537 
4538 /** Free the Fixed Point cache */
wc_ecc_fp_free(void)4539 void wc_ecc_fp_free(void)
4540 {
4541 }
4542 
4543 
wolfSSL_open(const char * uri,remote_handle64 * handle)4544 AEEResult wolfSSL_open(const char *uri, remote_handle64 *handle)
4545 {
4546   /* can be any value or ignored, rpc layer doesn't care
4547    * also ok
4548    * *handle = 0;
4549    * *handle = 0xdeadc0de;
4550    */
4551    *handle = (remote_handle64)malloc(1);
4552    return 0;
4553 }
4554 
wolfSSL_close(remote_handle64 handle)4555 AEEResult wolfSSL_close(remote_handle64 handle)
4556 {
4557    if (handle)
4558       free((void*)handle);
4559    return 0;
4560 }
4561 #endif /* HAVE_ECC_VERIFY */
4562 
4563 #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
4564 /* Add two projective EC points together.
4565  * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
4566  *
4567  * pX   First EC point's X ordinate.
4568  * pY   First EC point's Y ordinate.
4569  * pZ   First EC point's Z ordinate.
4570  * qX   Second EC point's X ordinate.
4571  * qY   Second EC point's Y ordinate.
4572  * qZ   Second EC point's Z ordinate.
4573  * rX   Resultant EC point's X ordinate.
4574  * rY   Resultant EC point's Y ordinate.
4575  * rZ   Resultant EC point's Z ordinate.
4576  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
4577  */
sp_ecc_proj_add_point_256(mp_int * pX,mp_int * pY,mp_int * pZ,mp_int * qX,mp_int * qY,mp_int * qZ,mp_int * rX,mp_int * rY,mp_int * rZ)4578 int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
4579                               mp_int* qX, mp_int* qY, mp_int* qZ,
4580                               mp_int* rX, mp_int* rY, mp_int* rZ)
4581 {
4582 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
4583     sp_digit tmpd[2 * 10 * 5];
4584     sp_point pd;
4585     sp_point qd;
4586 #endif
4587     sp_digit* tmp;
4588     sp_point* p;
4589     sp_point* q = NULL;
4590     int err;
4591 
4592     err = sp_ecc_point_new(NULL, pd, p);
4593     if (err == MP_OKAY) {
4594         err = sp_ecc_point_new(NULL, qd, q);
4595     }
4596 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4597     if (err == MP_OKAY) {
4598         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 5, NULL,
4599                                                               DYNAMIC_TYPE_ECC);
4600         if (tmp == NULL) {
4601             err = MEMORY_E;
4602         }
4603     }
4604 #else
4605     tmp = tmpd;
4606 #endif
4607 
4608     if (err == MP_OKAY) {
4609         sp_256_from_mp(p->x, 10, pX);
4610         sp_256_from_mp(p->y, 10, pY);
4611         sp_256_from_mp(p->z, 10, pZ);
4612         sp_256_from_mp(q->x, 10, qX);
4613         sp_256_from_mp(q->y, 10, qY);
4614         sp_256_from_mp(q->z, 10, qZ);
4615 
4616             sp_256_proj_point_add_10(p, p, q, tmp);
4617     }
4618 
4619     if (err == MP_OKAY) {
4620         err = sp_256_to_mp(p->x, rX);
4621     }
4622     if (err == MP_OKAY) {
4623         err = sp_256_to_mp(p->y, rY);
4624     }
4625     if (err == MP_OKAY) {
4626         err = sp_256_to_mp(p->z, rZ);
4627     }
4628 
4629 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4630     if (tmp != NULL) {
4631         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
4632     }
4633 #endif
4634     sp_ecc_point_free(q, 0, NULL);
4635     sp_ecc_point_free(p, 0, NULL);
4636 
4637     return err;
4638 }
4639 
4640 
4641 /* Double a projective EC point.
4642  * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
4643  *
4644  * pX   EC point's X ordinate.
4645  * pY   EC point's Y ordinate.
4646  * pZ   EC point's Z ordinate.
4647  * rX   Resultant EC point's X ordinate.
4648  * rY   Resultant EC point's Y ordinate.
4649  * rZ   Resultant EC point's Z ordinate.
4650  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
4651  */
sp_ecc_proj_dbl_point_256(mp_int * pX,mp_int * pY,mp_int * pZ,mp_int * rX,mp_int * rY,mp_int * rZ)4652 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
4653                               mp_int* rX, mp_int* rY, mp_int* rZ)
4654 {
4655 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
4656     sp_digit tmpd[2 * 10 * 2];
4657     sp_point pd;
4658 #endif
4659     sp_digit* tmp;
4660     sp_point* p;
4661     int err;
4662 
4663     err = sp_ecc_point_new(NULL, pd, p);
4664 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4665     if (err == MP_OKAY) {
4666         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 2, NULL,
4667                                                               DYNAMIC_TYPE_ECC);
4668         if (tmp == NULL) {
4669             err = MEMORY_E;
4670         }
4671     }
4672 #else
4673     tmp = tmpd;
4674 #endif
4675 
4676     if (err == MP_OKAY) {
4677         sp_256_from_mp(p->x, 10, pX);
4678         sp_256_from_mp(p->y, 10, pY);
4679         sp_256_from_mp(p->z, 10, pZ);
4680 
4681             sp_256_proj_point_dbl_10(p, p, tmp);
4682     }
4683 
4684     if (err == MP_OKAY) {
4685         err = sp_256_to_mp(p->x, rX);
4686     }
4687     if (err == MP_OKAY) {
4688         err = sp_256_to_mp(p->y, rY);
4689     }
4690     if (err == MP_OKAY) {
4691         err = sp_256_to_mp(p->z, rZ);
4692     }
4693 
4694 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4695     if (tmp != NULL) {
4696         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
4697     }
4698 #endif
4699     sp_ecc_point_free(p, 0, NULL);
4700 
4701     return err;
4702 }
4703 
4704 /* Map a projective EC point to affine in place.
4705  * pZ will be one.
4706  *
4707  * pX   EC point's X ordinate.
4708  * pY   EC point's Y ordinate.
4709  * pZ   EC point's Z ordinate.
4710  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
4711  */
sp_ecc_map_256(mp_int * pX,mp_int * pY,mp_int * pZ)4712 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
4713 {
4714 #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK)
4715     sp_digit tmpd[2 * 10 * 4];
4716     sp_point pd;
4717 #endif
4718     sp_digit* tmp;
4719     sp_point* p;
4720     int err;
4721 
4722     err = sp_ecc_point_new(NULL, pd, p);
4723 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4724     if (err == MP_OKAY) {
4725         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 10 * 4, NULL,
4726                                                               DYNAMIC_TYPE_ECC);
4727         if (tmp == NULL) {
4728             err = MEMORY_E;
4729         }
4730     }
4731 #else
4732     tmp = tmpd;
4733 #endif
4734     if (err == MP_OKAY) {
4735         sp_256_from_mp(p->x, 10, pX);
4736         sp_256_from_mp(p->y, 10, pY);
4737         sp_256_from_mp(p->z, 10, pZ);
4738 
4739         sp_256_map_10(p, p, tmp);
4740     }
4741 
4742     if (err == MP_OKAY) {
4743         err = sp_256_to_mp(p->x, pX);
4744     }
4745     if (err == MP_OKAY) {
4746         err = sp_256_to_mp(p->y, pY);
4747     }
4748     if (err == MP_OKAY) {
4749         err = sp_256_to_mp(p->z, pZ);
4750     }
4751 
4752 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4753     if (tmp != NULL) {
4754         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
4755     }
4756 #endif
4757     sp_ecc_point_free(p, 0, NULL);
4758 
4759     return err;
4760 }
4761 #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
4762 #ifdef HAVE_COMP_KEY
4763 /* Find the square root of a number mod the prime of the curve.
4764  *
4765  * y  The number to operate on and the result.
4766  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
4767  */
sp_256_mont_sqrt_10(sp_digit * y)4768 static int sp_256_mont_sqrt_10(sp_digit* y)
4769 {
4770 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4771     sp_digit* d;
4772 #else
4773     sp_digit t1d[2 * 10];
4774     sp_digit t2d[2 * 10];
4775 #endif
4776     sp_digit* t1;
4777     sp_digit* t2;
4778     int err = MP_OKAY;
4779 
4780 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4781     d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
4782     if (d == NULL) {
4783         err = MEMORY_E;
4784     }
4785 #endif
4786 
4787     if (err == MP_OKAY) {
4788 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4789         t1 = d + 0 * 10;
4790         t2 = d + 2 * 10;
4791 #else
4792         t1 = t1d;
4793         t2 = t2d;
4794 #endif
4795 
4796         {
4797             /* t2 = y ^ 0x2 */
4798             sp_256_mont_sqr_10(t2, y, p256_mod, p256_mp_mod);
4799             /* t1 = y ^ 0x3 */
4800             sp_256_mont_mul_10(t1, t2, y, p256_mod, p256_mp_mod);
4801             /* t2 = y ^ 0xc */
4802             sp_256_mont_sqr_n_10(t2, t1, 2, p256_mod, p256_mp_mod);
4803             /* t1 = y ^ 0xf */
4804             sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
4805             /* t2 = y ^ 0xf0 */
4806             sp_256_mont_sqr_n_10(t2, t1, 4, p256_mod, p256_mp_mod);
4807             /* t1 = y ^ 0xff */
4808             sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
4809             /* t2 = y ^ 0xff00 */
4810             sp_256_mont_sqr_n_10(t2, t1, 8, p256_mod, p256_mp_mod);
4811             /* t1 = y ^ 0xffff */
4812             sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
4813             /* t2 = y ^ 0xffff0000 */
4814             sp_256_mont_sqr_n_10(t2, t1, 16, p256_mod, p256_mp_mod);
4815             /* t1 = y ^ 0xffffffff */
4816             sp_256_mont_mul_10(t1, t1, t2, p256_mod, p256_mp_mod);
4817             /* t1 = y ^ 0xffffffff00000000 */
4818             sp_256_mont_sqr_n_10(t1, t1, 32, p256_mod, p256_mp_mod);
4819             /* t1 = y ^ 0xffffffff00000001 */
4820             sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
4821             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
4822             sp_256_mont_sqr_n_10(t1, t1, 96, p256_mod, p256_mp_mod);
4823             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
4824             sp_256_mont_mul_10(t1, t1, y, p256_mod, p256_mp_mod);
4825             sp_256_mont_sqr_n_10(y, t1, 94, p256_mod, p256_mp_mod);
4826         }
4827     }
4828 
4829 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4830     if (d != NULL) {
4831         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
4832     }
4833 #endif
4834 
4835     return err;
4836 }
4837 
4838 /* Uncompress the point given the X ordinate.
4839  *
4840  * xm    X ordinate.
4841  * odd   Whether the Y ordinate is odd.
4842  * ym    Calculated Y ordinate.
4843  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
4844  */
sp_ecc_uncompress_256(mp_int * xm,int odd,mp_int * ym)4845 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
4846 {
4847 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4848     sp_digit* d;
4849 #else
4850     sp_digit xd[2 * 10];
4851     sp_digit yd[2 * 10];
4852 #endif
4853     sp_digit* x = NULL;
4854     sp_digit* y = NULL;
4855     int err = MP_OKAY;
4856 
4857 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4858     d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 10, NULL, DYNAMIC_TYPE_ECC);
4859     if (d == NULL) {
4860         err = MEMORY_E;
4861     }
4862 #endif
4863 
4864     if (err == MP_OKAY) {
4865 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4866         x = d + 0 * 10;
4867         y = d + 2 * 10;
4868 #else
4869         x = xd;
4870         y = yd;
4871 #endif
4872 
4873         sp_256_from_mp(x, 10, xm);
4874         err = sp_256_mod_mul_norm_10(x, x, p256_mod);
4875     }
4876     if (err == MP_OKAY) {
4877         /* y = x^3 */
4878         {
4879             sp_256_mont_sqr_10(y, x, p256_mod, p256_mp_mod);
4880             sp_256_mont_mul_10(y, y, x, p256_mod, p256_mp_mod);
4881         }
4882         /* y = x^3 - 3x */
4883         sp_256_mont_sub_10(y, y, x, p256_mod);
4884         sp_256_mont_sub_10(y, y, x, p256_mod);
4885         sp_256_mont_sub_10(y, y, x, p256_mod);
4886         /* y = x^3 - 3x + b */
4887         err = sp_256_mod_mul_norm_10(x, p256_b, p256_mod);
4888     }
4889     if (err == MP_OKAY) {
4890         sp_256_mont_add_10(y, y, x, p256_mod);
4891         /* y = sqrt(x^3 - 3x + b) */
4892         err = sp_256_mont_sqrt_10(y);
4893     }
4894     if (err == MP_OKAY) {
4895         XMEMSET(y + 10, 0, 10U * sizeof(sp_digit));
4896         sp_256_mont_reduce_10(y, p256_mod, p256_mp_mod);
4897         if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
4898             sp_256_mont_sub_10(y, p256_mod, y, p256_mod);
4899         }
4900 
4901         err = sp_256_to_mp(y, ym);
4902     }
4903 
4904 #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)
4905     if (d != NULL) {
4906         XFREE(d, NULL, DYNAMIC_TYPE_ECC);
4907     }
4908 #endif
4909 
4910     return err;
4911 }
4912 #endif
4913 #endif /* !WOLFSSL_SP_NO_256 */
4914 #endif /* WOLFSSL_HAVE_SP_ECC */
4915 #endif /* WOLFSSL_DSP */
4916 #endif /* WOLFSSL_HAVE_SP_ECC */
4917 
4918