1 /* sp.c
2  *
3  * Copyright (C) 2006-2021 wolfSSL Inc.
4  *
5  * This file is part of wolfSSL.
6  *
7  * wolfSSL is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * wolfSSL is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20  */
21 
22 /* Implementation by Sean Parkinson. */
23 
24 #ifdef HAVE_CONFIG_H
25     #include <config.h>
26 #endif
27 
28 #include <wolfssl/wolfcrypt/settings.h>
29 
30 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH) || \
31     defined(WOLFSSL_HAVE_SP_ECC)
32 
33 #include <wolfssl/wolfcrypt/error-crypt.h>
34 #include <wolfssl/wolfcrypt/cpuid.h>
35 #ifdef NO_INLINE
36     #include <wolfssl/wolfcrypt/misc.h>
37 #else
38     #define WOLFSSL_MISC_INCLUDED
39     #include <wolfcrypt/src/misc.c>
40 #endif
41 
42 #ifdef RSA_LOW_MEM
43 #ifndef WOLFSSL_SP_SMALL
44 #define WOLFSSL_SP_SMALL
45 #endif
46 #endif
47 
48 #include <wolfssl/wolfcrypt/sp.h>
49 
50 #ifdef __IAR_SYSTEMS_ICC__
51 #define __asm__        asm
52 #define __volatile__   volatile
53 #endif /* __IAR_SYSTEMS_ICC__ */
54 #ifdef __KEIL__
55 #define __asm__        __asm
56 #define __volatile__   volatile
57 #endif
58 
59 #ifdef WOLFSSL_SP_ARM_CORTEX_M_ASM
60 #define SP_PRINT_NUM(var, name, total, words, bits)     \
61     do {                                                \
62         int ii;                                         \
63         fprintf(stderr, name "=0x");                    \
64         for (ii = words - 1; ii >= 0; ii--)             \
65             fprintf(stderr, SP_PRINT_FMT, (var)[ii]);   \
66         fprintf(stderr, "\n");                         \
67     } while (0)
68 
69 #define SP_PRINT_VAL(var, name)                         \
70     fprintf(stderr, name "=0x" SP_PRINT_FMT "\n", var)
71 #if defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH)
72 #ifndef WOLFSSL_SP_NO_2048
73 /* Read big endian unsigned byte array into r.
74  *
75  * r  A single precision integer.
76  * size  Maximum number of bytes to convert
77  * a  Byte array.
78  * n  Number of bytes in array to read.
79  */
sp_2048_from_bin(sp_digit * r,int size,const byte * a,int n)80 static void sp_2048_from_bin(sp_digit* r, int size, const byte* a, int n)
81 {
82     int i;
83     int j = 0;
84     word32 s = 0;
85 
86     r[0] = 0;
87     for (i = n-1; i >= 0; i--) {
88         r[j] |= (((sp_digit)a[i]) << s);
89         if (s >= 24U) {
90             r[j] &= 0xffffffff;
91             s = 32U - s;
92             if (j + 1 >= size) {
93                 break;
94             }
95             r[++j] = (sp_digit)a[i] >> s;
96             s = 8U - s;
97         }
98         else {
99             s += 8U;
100         }
101     }
102 
103     for (j++; j < size; j++) {
104         r[j] = 0;
105     }
106 }
107 
108 /* Convert an mp_int to an array of sp_digit.
109  *
110  * r  A single precision integer.
111  * size  Maximum number of bytes to convert
112  * a  A multi-precision integer.
113  */
sp_2048_from_mp(sp_digit * r,int size,const mp_int * a)114 static void sp_2048_from_mp(sp_digit* r, int size, const mp_int* a)
115 {
116 #if DIGIT_BIT == 32
117     int j;
118 
119     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
120 
121     for (j = a->used; j < size; j++) {
122         r[j] = 0;
123     }
124 #elif DIGIT_BIT > 32
125     int i;
126     int j = 0;
127     word32 s = 0;
128 
129     r[0] = 0;
130     for (i = 0; i < a->used && j < size; i++) {
131         r[j] |= ((sp_digit)a->dp[i] << s);
132         r[j] &= 0xffffffff;
133         s = 32U - s;
134         if (j + 1 >= size) {
135             break;
136         }
137         /* lint allow cast of mismatch word32 and mp_digit */
138         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
139         while ((s + 32U) <= (word32)DIGIT_BIT) {
140             s += 32U;
141             r[j] &= 0xffffffff;
142             if (j + 1 >= size) {
143                 break;
144             }
145             if (s < (word32)DIGIT_BIT) {
146                 /* lint allow cast of mismatch word32 and mp_digit */
147                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
148             }
149             else {
150                 r[++j] = (sp_digit)0;
151             }
152         }
153         s = (word32)DIGIT_BIT - s;
154     }
155 
156     for (j++; j < size; j++) {
157         r[j] = 0;
158     }
159 #else
160     int i;
161     int j = 0;
162     int s = 0;
163 
164     r[0] = 0;
165     for (i = 0; i < a->used && j < size; i++) {
166         r[j] |= ((sp_digit)a->dp[i]) << s;
167         if (s + DIGIT_BIT >= 32) {
168             r[j] &= 0xffffffff;
169             if (j + 1 >= size) {
170                 break;
171             }
172             s = 32 - s;
173             if (s == DIGIT_BIT) {
174                 r[++j] = 0;
175                 s = 0;
176             }
177             else {
178                 r[++j] = a->dp[i] >> s;
179                 s = DIGIT_BIT - s;
180             }
181         }
182         else {
183             s += DIGIT_BIT;
184         }
185     }
186 
187     for (j++; j < size; j++) {
188         r[j] = 0;
189     }
190 #endif
191 }
192 
193 /* Write r as big endian to byte array.
194  * Fixed length number of bytes written: 256
195  *
196  * r  A single precision integer.
197  * a  Byte array.
198  */
sp_2048_to_bin_64(sp_digit * r,byte * a)199 static void sp_2048_to_bin_64(sp_digit* r, byte* a)
200 {
201     int i;
202     int j;
203     int s = 0;
204     int b;
205 
206     j = 2048 / 8 - 1;
207     a[j] = 0;
208     for (i=0; i<64 && j>=0; i++) {
209         b = 0;
210         /* lint allow cast of mismatch sp_digit and int */
211         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
212         b += 8 - s;
213         if (j < 0) {
214             break;
215         }
216         while (b < 32) {
217             a[j--] = (byte)(r[i] >> b);
218             b += 8;
219             if (j < 0) {
220                 break;
221             }
222         }
223         s = 8 - (b - 32);
224         if (j >= 0) {
225             a[j] = 0;
226         }
227         if (s != 0) {
228             j++;
229         }
230     }
231 }
232 
233 #if (defined(WOLFSSL_HAVE_SP_RSA) && (!defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(WOLFSSL_SP_SMALL))) || defined(WOLFSSL_HAVE_SP_DH)
234 /* Normalize the values in each word to 32.
235  *
236  * a  Array of sp_digit to normalize.
237  */
238 #define sp_2048_norm_64(a)
239 
240 #endif /* (WOLFSSL_HAVE_SP_RSA && (!WOLFSSL_RSA_PUBLIC_ONLY || !WOLFSSL_SP_SMALL)) || WOLFSSL_HAVE_SP_DH */
241 /* Normalize the values in each word to 32.
242  *
243  * a  Array of sp_digit to normalize.
244  */
245 #define sp_2048_norm_64(a)
246 
247 #ifndef WOLFSSL_SP_SMALL
248 /* Multiply a and b into r. (r = a * b)
249  *
250  * r  A single precision integer.
251  * a  A single precision integer.
252  * b  A single precision integer.
253  */
sp_2048_mul_8(sp_digit * r,const sp_digit * a,const sp_digit * b)254 SP_NOINLINE static void sp_2048_mul_8(sp_digit* r, const sp_digit* a,
255         const sp_digit* b)
256 {
257     sp_digit tmp_arr[8];
258     sp_digit* tmp = tmp_arr;
259 
260     __asm__ __volatile__ (
261         /* A[0] * B[0] */
262         "ldr	r6, [%[a], #0]\n\t"
263         "ldr	r8, [%[b], #0]\n\t"
264         "umull	r3, r4, r6, r8\n\t"
265         "mov	r5, #0\n\t"
266         "str	r3, [%[tmp], #0]\n\t"
267         "mov	r3, #0\n\t"
268         /* A[0] * B[1] */
269         "ldr	r8, [%[b], #4]\n\t"
270         "umull	r6, r8, r6, r8\n\t"
271         "adds	r4, r4, r6\n\t"
272         "adc	r5, r5, r8\n\t"
273         /* A[1] * B[0] */
274         "ldr	r6, [%[a], #4]\n\t"
275         "ldr	r8, [%[b], #0]\n\t"
276         "umull	r6, r8, r6, r8\n\t"
277         "adds	r4, r4, r6\n\t"
278         "adcs 	r5, r5, r8\n\t"
279         "adc	r3, r3, #0\n\t"
280         "str	r4, [%[tmp], #4]\n\t"
281         "mov	r4, #0\n\t"
282         /* A[0] * B[2] */
283         "ldr	r6, [%[a], #0]\n\t"
284         "ldr	r8, [%[b], #8]\n\t"
285         "umull	r6, r8, r6, r8\n\t"
286         "adds	r5, r5, r6\n\t"
287         "adcs 	r3, r3, r8\n\t"
288         "adc	r4, r4, #0\n\t"
289         /* A[1] * B[1] */
290         "ldr	r6, [%[a], #4]\n\t"
291         "ldr	r8, [%[b], #4]\n\t"
292         "umull	r6, r8, r6, r8\n\t"
293         "adds	r5, r5, r6\n\t"
294         "adcs 	r3, r3, r8\n\t"
295         "adc	r4, r4, #0\n\t"
296         /* A[2] * B[0] */
297         "ldr	r6, [%[a], #8]\n\t"
298         "ldr	r8, [%[b], #0]\n\t"
299         "umull	r6, r8, r6, r8\n\t"
300         "adds	r5, r5, r6\n\t"
301         "adcs 	r3, r3, r8\n\t"
302         "adc	r4, r4, #0\n\t"
303         "str	r5, [%[tmp], #8]\n\t"
304         "mov	r5, #0\n\t"
305         /* A[0] * B[3] */
306         "ldr	r6, [%[a], #0]\n\t"
307         "ldr	r8, [%[b], #12]\n\t"
308         "umull	r6, r8, r6, r8\n\t"
309         "adds	r3, r3, r6\n\t"
310         "adcs 	r4, r4, r8\n\t"
311         "adc	r5, r5, #0\n\t"
312         /* A[1] * B[2] */
313         "ldr	r6, [%[a], #4]\n\t"
314         "ldr	r8, [%[b], #8]\n\t"
315         "umull	r6, r8, r6, r8\n\t"
316         "adds	r3, r3, r6\n\t"
317         "adcs 	r4, r4, r8\n\t"
318         "adc	r5, r5, #0\n\t"
319         /* A[2] * B[1] */
320         "ldr	r6, [%[a], #8]\n\t"
321         "ldr	r8, [%[b], #4]\n\t"
322         "umull	r6, r8, r6, r8\n\t"
323         "adds	r3, r3, r6\n\t"
324         "adcs 	r4, r4, r8\n\t"
325         "adc	r5, r5, #0\n\t"
326         /* A[3] * B[0] */
327         "ldr	r6, [%[a], #12]\n\t"
328         "ldr	r8, [%[b], #0]\n\t"
329         "umull	r6, r8, r6, r8\n\t"
330         "adds	r3, r3, r6\n\t"
331         "adcs 	r4, r4, r8\n\t"
332         "adc	r5, r5, #0\n\t"
333         "str	r3, [%[tmp], #12]\n\t"
334         "mov	r3, #0\n\t"
335         /* A[0] * B[4] */
336         "ldr	r6, [%[a], #0]\n\t"
337         "ldr	r8, [%[b], #16]\n\t"
338         "umull	r6, r8, r6, r8\n\t"
339         "adds	r4, r4, r6\n\t"
340         "adcs 	r5, r5, r8\n\t"
341         "adc	r3, r3, #0\n\t"
342         /* A[1] * B[3] */
343         "ldr	r6, [%[a], #4]\n\t"
344         "ldr	r8, [%[b], #12]\n\t"
345         "umull	r6, r8, r6, r8\n\t"
346         "adds	r4, r4, r6\n\t"
347         "adcs 	r5, r5, r8\n\t"
348         "adc	r3, r3, #0\n\t"
349         /* A[2] * B[2] */
350         "ldr	r6, [%[a], #8]\n\t"
351         "ldr	r8, [%[b], #8]\n\t"
352         "umull	r6, r8, r6, r8\n\t"
353         "adds	r4, r4, r6\n\t"
354         "adcs 	r5, r5, r8\n\t"
355         "adc	r3, r3, #0\n\t"
356         /* A[3] * B[1] */
357         "ldr	r6, [%[a], #12]\n\t"
358         "ldr	r8, [%[b], #4]\n\t"
359         "umull	r6, r8, r6, r8\n\t"
360         "adds	r4, r4, r6\n\t"
361         "adcs 	r5, r5, r8\n\t"
362         "adc	r3, r3, #0\n\t"
363         /* A[4] * B[0] */
364         "ldr	r6, [%[a], #16]\n\t"
365         "ldr	r8, [%[b], #0]\n\t"
366         "umull	r6, r8, r6, r8\n\t"
367         "adds	r4, r4, r6\n\t"
368         "adcs 	r5, r5, r8\n\t"
369         "adc	r3, r3, #0\n\t"
370         "str	r4, [%[tmp], #16]\n\t"
371         "mov	r4, #0\n\t"
372         /* A[0] * B[5] */
373         "ldr	r6, [%[a], #0]\n\t"
374         "ldr	r8, [%[b], #20]\n\t"
375         "umull	r6, r8, r6, r8\n\t"
376         "adds	r5, r5, r6\n\t"
377         "adcs 	r3, r3, r8\n\t"
378         "adc	r4, r4, #0\n\t"
379         /* A[1] * B[4] */
380         "ldr	r6, [%[a], #4]\n\t"
381         "ldr	r8, [%[b], #16]\n\t"
382         "umull	r6, r8, r6, r8\n\t"
383         "adds	r5, r5, r6\n\t"
384         "adcs 	r3, r3, r8\n\t"
385         "adc	r4, r4, #0\n\t"
386         /* A[2] * B[3] */
387         "ldr	r6, [%[a], #8]\n\t"
388         "ldr	r8, [%[b], #12]\n\t"
389         "umull	r6, r8, r6, r8\n\t"
390         "adds	r5, r5, r6\n\t"
391         "adcs 	r3, r3, r8\n\t"
392         "adc	r4, r4, #0\n\t"
393         /* A[3] * B[2] */
394         "ldr	r6, [%[a], #12]\n\t"
395         "ldr	r8, [%[b], #8]\n\t"
396         "umull	r6, r8, r6, r8\n\t"
397         "adds	r5, r5, r6\n\t"
398         "adcs 	r3, r3, r8\n\t"
399         "adc	r4, r4, #0\n\t"
400         /* A[4] * B[1] */
401         "ldr	r6, [%[a], #16]\n\t"
402         "ldr	r8, [%[b], #4]\n\t"
403         "umull	r6, r8, r6, r8\n\t"
404         "adds	r5, r5, r6\n\t"
405         "adcs 	r3, r3, r8\n\t"
406         "adc	r4, r4, #0\n\t"
407         /* A[5] * B[0] */
408         "ldr	r6, [%[a], #20]\n\t"
409         "ldr	r8, [%[b], #0]\n\t"
410         "umull	r6, r8, r6, r8\n\t"
411         "adds	r5, r5, r6\n\t"
412         "adcs 	r3, r3, r8\n\t"
413         "adc	r4, r4, #0\n\t"
414         "str	r5, [%[tmp], #20]\n\t"
415         "mov	r5, #0\n\t"
416         /* A[0] * B[6] */
417         "ldr	r6, [%[a], #0]\n\t"
418         "ldr	r8, [%[b], #24]\n\t"
419         "umull	r6, r8, r6, r8\n\t"
420         "adds	r3, r3, r6\n\t"
421         "adcs 	r4, r4, r8\n\t"
422         "adc	r5, r5, #0\n\t"
423         /* A[1] * B[5] */
424         "ldr	r6, [%[a], #4]\n\t"
425         "ldr	r8, [%[b], #20]\n\t"
426         "umull	r6, r8, r6, r8\n\t"
427         "adds	r3, r3, r6\n\t"
428         "adcs 	r4, r4, r8\n\t"
429         "adc	r5, r5, #0\n\t"
430         /* A[2] * B[4] */
431         "ldr	r6, [%[a], #8]\n\t"
432         "ldr	r8, [%[b], #16]\n\t"
433         "umull	r6, r8, r6, r8\n\t"
434         "adds	r3, r3, r6\n\t"
435         "adcs 	r4, r4, r8\n\t"
436         "adc	r5, r5, #0\n\t"
437         /* A[3] * B[3] */
438         "ldr	r6, [%[a], #12]\n\t"
439         "ldr	r8, [%[b], #12]\n\t"
440         "umull	r6, r8, r6, r8\n\t"
441         "adds	r3, r3, r6\n\t"
442         "adcs 	r4, r4, r8\n\t"
443         "adc	r5, r5, #0\n\t"
444         /* A[4] * B[2] */
445         "ldr	r6, [%[a], #16]\n\t"
446         "ldr	r8, [%[b], #8]\n\t"
447         "umull	r6, r8, r6, r8\n\t"
448         "adds	r3, r3, r6\n\t"
449         "adcs 	r4, r4, r8\n\t"
450         "adc	r5, r5, #0\n\t"
451         /* A[5] * B[1] */
452         "ldr	r6, [%[a], #20]\n\t"
453         "ldr	r8, [%[b], #4]\n\t"
454         "umull	r6, r8, r6, r8\n\t"
455         "adds	r3, r3, r6\n\t"
456         "adcs 	r4, r4, r8\n\t"
457         "adc	r5, r5, #0\n\t"
458         /* A[6] * B[0] */
459         "ldr	r6, [%[a], #24]\n\t"
460         "ldr	r8, [%[b], #0]\n\t"
461         "umull	r6, r8, r6, r8\n\t"
462         "adds	r3, r3, r6\n\t"
463         "adcs 	r4, r4, r8\n\t"
464         "adc	r5, r5, #0\n\t"
465         "str	r3, [%[tmp], #24]\n\t"
466         "mov	r3, #0\n\t"
467         /* A[0] * B[7] */
468         "ldr	r6, [%[a], #0]\n\t"
469         "ldr	r8, [%[b], #28]\n\t"
470         "umull	r6, r8, r6, r8\n\t"
471         "adds	r4, r4, r6\n\t"
472         "adcs 	r5, r5, r8\n\t"
473         "adc	r3, r3, #0\n\t"
474         /* A[1] * B[6] */
475         "ldr	r6, [%[a], #4]\n\t"
476         "ldr	r8, [%[b], #24]\n\t"
477         "umull	r6, r8, r6, r8\n\t"
478         "adds	r4, r4, r6\n\t"
479         "adcs 	r5, r5, r8\n\t"
480         "adc	r3, r3, #0\n\t"
481         /* A[2] * B[5] */
482         "ldr	r6, [%[a], #8]\n\t"
483         "ldr	r8, [%[b], #20]\n\t"
484         "umull	r6, r8, r6, r8\n\t"
485         "adds	r4, r4, r6\n\t"
486         "adcs 	r5, r5, r8\n\t"
487         "adc	r3, r3, #0\n\t"
488         /* A[3] * B[4] */
489         "ldr	r6, [%[a], #12]\n\t"
490         "ldr	r8, [%[b], #16]\n\t"
491         "umull	r6, r8, r6, r8\n\t"
492         "adds	r4, r4, r6\n\t"
493         "adcs 	r5, r5, r8\n\t"
494         "adc	r3, r3, #0\n\t"
495         /* A[4] * B[3] */
496         "ldr	r6, [%[a], #16]\n\t"
497         "ldr	r8, [%[b], #12]\n\t"
498         "umull	r6, r8, r6, r8\n\t"
499         "adds	r4, r4, r6\n\t"
500         "adcs 	r5, r5, r8\n\t"
501         "adc	r3, r3, #0\n\t"
502         /* A[5] * B[2] */
503         "ldr	r6, [%[a], #20]\n\t"
504         "ldr	r8, [%[b], #8]\n\t"
505         "umull	r6, r8, r6, r8\n\t"
506         "adds	r4, r4, r6\n\t"
507         "adcs 	r5, r5, r8\n\t"
508         "adc	r3, r3, #0\n\t"
509         /* A[6] * B[1] */
510         "ldr	r6, [%[a], #24]\n\t"
511         "ldr	r8, [%[b], #4]\n\t"
512         "umull	r6, r8, r6, r8\n\t"
513         "adds	r4, r4, r6\n\t"
514         "adcs 	r5, r5, r8\n\t"
515         "adc	r3, r3, #0\n\t"
516         /* A[7] * B[0] */
517         "ldr	r6, [%[a], #28]\n\t"
518         "ldr	r8, [%[b], #0]\n\t"
519         "umull	r6, r8, r6, r8\n\t"
520         "adds	r4, r4, r6\n\t"
521         "adcs 	r5, r5, r8\n\t"
522         "adc	r3, r3, #0\n\t"
523         "str	r4, [%[tmp], #28]\n\t"
524         "mov	r4, #0\n\t"
525         /* A[1] * B[7] */
526         "ldr	r6, [%[a], #4]\n\t"
527         "ldr	r8, [%[b], #28]\n\t"
528         "umull	r6, r8, r6, r8\n\t"
529         "adds	r5, r5, r6\n\t"
530         "adcs 	r3, r3, r8\n\t"
531         "adc	r4, r4, #0\n\t"
532         /* A[2] * B[6] */
533         "ldr	r6, [%[a], #8]\n\t"
534         "ldr	r8, [%[b], #24]\n\t"
535         "umull	r6, r8, r6, r8\n\t"
536         "adds	r5, r5, r6\n\t"
537         "adcs 	r3, r3, r8\n\t"
538         "adc	r4, r4, #0\n\t"
539         /* A[3] * B[5] */
540         "ldr	r6, [%[a], #12]\n\t"
541         "ldr	r8, [%[b], #20]\n\t"
542         "umull	r6, r8, r6, r8\n\t"
543         "adds	r5, r5, r6\n\t"
544         "adcs 	r3, r3, r8\n\t"
545         "adc	r4, r4, #0\n\t"
546         /* A[4] * B[4] */
547         "ldr	r6, [%[a], #16]\n\t"
548         "ldr	r8, [%[b], #16]\n\t"
549         "umull	r6, r8, r6, r8\n\t"
550         "adds	r5, r5, r6\n\t"
551         "adcs 	r3, r3, r8\n\t"
552         "adc	r4, r4, #0\n\t"
553         /* A[5] * B[3] */
554         "ldr	r6, [%[a], #20]\n\t"
555         "ldr	r8, [%[b], #12]\n\t"
556         "umull	r6, r8, r6, r8\n\t"
557         "adds	r5, r5, r6\n\t"
558         "adcs 	r3, r3, r8\n\t"
559         "adc	r4, r4, #0\n\t"
560         /* A[6] * B[2] */
561         "ldr	r6, [%[a], #24]\n\t"
562         "ldr	r8, [%[b], #8]\n\t"
563         "umull	r6, r8, r6, r8\n\t"
564         "adds	r5, r5, r6\n\t"
565         "adcs 	r3, r3, r8\n\t"
566         "adc	r4, r4, #0\n\t"
567         /* A[7] * B[1] */
568         "ldr	r6, [%[a], #28]\n\t"
569         "ldr	r8, [%[b], #4]\n\t"
570         "umull	r6, r8, r6, r8\n\t"
571         "adds	r5, r5, r6\n\t"
572         "adcs 	r3, r3, r8\n\t"
573         "adc	r4, r4, #0\n\t"
574         "str	r5, [%[r], #32]\n\t"
575         "mov	r5, #0\n\t"
576         /* A[2] * B[7] */
577         "ldr	r6, [%[a], #8]\n\t"
578         "ldr	r8, [%[b], #28]\n\t"
579         "umull	r6, r8, r6, r8\n\t"
580         "adds	r3, r3, r6\n\t"
581         "adcs 	r4, r4, r8\n\t"
582         "adc	r5, r5, #0\n\t"
583         /* A[3] * B[6] */
584         "ldr	r6, [%[a], #12]\n\t"
585         "ldr	r8, [%[b], #24]\n\t"
586         "umull	r6, r8, r6, r8\n\t"
587         "adds	r3, r3, r6\n\t"
588         "adcs 	r4, r4, r8\n\t"
589         "adc	r5, r5, #0\n\t"
590         /* A[4] * B[5] */
591         "ldr	r6, [%[a], #16]\n\t"
592         "ldr	r8, [%[b], #20]\n\t"
593         "umull	r6, r8, r6, r8\n\t"
594         "adds	r3, r3, r6\n\t"
595         "adcs 	r4, r4, r8\n\t"
596         "adc	r5, r5, #0\n\t"
597         /* A[5] * B[4] */
598         "ldr	r6, [%[a], #20]\n\t"
599         "ldr	r8, [%[b], #16]\n\t"
600         "umull	r6, r8, r6, r8\n\t"
601         "adds	r3, r3, r6\n\t"
602         "adcs 	r4, r4, r8\n\t"
603         "adc	r5, r5, #0\n\t"
604         /* A[6] * B[3] */
605         "ldr	r6, [%[a], #24]\n\t"
606         "ldr	r8, [%[b], #12]\n\t"
607         "umull	r6, r8, r6, r8\n\t"
608         "adds	r3, r3, r6\n\t"
609         "adcs 	r4, r4, r8\n\t"
610         "adc	r5, r5, #0\n\t"
611         /* A[7] * B[2] */
612         "ldr	r6, [%[a], #28]\n\t"
613         "ldr	r8, [%[b], #8]\n\t"
614         "umull	r6, r8, r6, r8\n\t"
615         "adds	r3, r3, r6\n\t"
616         "adcs 	r4, r4, r8\n\t"
617         "adc	r5, r5, #0\n\t"
618         "str	r3, [%[r], #36]\n\t"
619         "mov	r3, #0\n\t"
620         /* A[3] * B[7] */
621         "ldr	r6, [%[a], #12]\n\t"
622         "ldr	r8, [%[b], #28]\n\t"
623         "umull	r6, r8, r6, r8\n\t"
624         "adds	r4, r4, r6\n\t"
625         "adcs 	r5, r5, r8\n\t"
626         "adc	r3, r3, #0\n\t"
627         /* A[4] * B[6] */
628         "ldr	r6, [%[a], #16]\n\t"
629         "ldr	r8, [%[b], #24]\n\t"
630         "umull	r6, r8, r6, r8\n\t"
631         "adds	r4, r4, r6\n\t"
632         "adcs 	r5, r5, r8\n\t"
633         "adc	r3, r3, #0\n\t"
634         /* A[5] * B[5] */
635         "ldr	r6, [%[a], #20]\n\t"
636         "ldr	r8, [%[b], #20]\n\t"
637         "umull	r6, r8, r6, r8\n\t"
638         "adds	r4, r4, r6\n\t"
639         "adcs 	r5, r5, r8\n\t"
640         "adc	r3, r3, #0\n\t"
641         /* A[6] * B[4] */
642         "ldr	r6, [%[a], #24]\n\t"
643         "ldr	r8, [%[b], #16]\n\t"
644         "umull	r6, r8, r6, r8\n\t"
645         "adds	r4, r4, r6\n\t"
646         "adcs 	r5, r5, r8\n\t"
647         "adc	r3, r3, #0\n\t"
648         /* A[7] * B[3] */
649         "ldr	r6, [%[a], #28]\n\t"
650         "ldr	r8, [%[b], #12]\n\t"
651         "umull	r6, r8, r6, r8\n\t"
652         "adds	r4, r4, r6\n\t"
653         "adcs 	r5, r5, r8\n\t"
654         "adc	r3, r3, #0\n\t"
655         "str	r4, [%[r], #40]\n\t"
656         "mov	r4, #0\n\t"
657         /* A[4] * B[7] */
658         "ldr	r6, [%[a], #16]\n\t"
659         "ldr	r8, [%[b], #28]\n\t"
660         "umull	r6, r8, r6, r8\n\t"
661         "adds	r5, r5, r6\n\t"
662         "adcs 	r3, r3, r8\n\t"
663         "adc	r4, r4, #0\n\t"
664         /* A[5] * B[6] */
665         "ldr	r6, [%[a], #20]\n\t"
666         "ldr	r8, [%[b], #24]\n\t"
667         "umull	r6, r8, r6, r8\n\t"
668         "adds	r5, r5, r6\n\t"
669         "adcs 	r3, r3, r8\n\t"
670         "adc	r4, r4, #0\n\t"
671         /* A[6] * B[5] */
672         "ldr	r6, [%[a], #24]\n\t"
673         "ldr	r8, [%[b], #20]\n\t"
674         "umull	r6, r8, r6, r8\n\t"
675         "adds	r5, r5, r6\n\t"
676         "adcs 	r3, r3, r8\n\t"
677         "adc	r4, r4, #0\n\t"
678         /* A[7] * B[4] */
679         "ldr	r6, [%[a], #28]\n\t"
680         "ldr	r8, [%[b], #16]\n\t"
681         "umull	r6, r8, r6, r8\n\t"
682         "adds	r5, r5, r6\n\t"
683         "adcs 	r3, r3, r8\n\t"
684         "adc	r4, r4, #0\n\t"
685         "str	r5, [%[r], #44]\n\t"
686         "mov	r5, #0\n\t"
687         /* A[5] * B[7] */
688         "ldr	r6, [%[a], #20]\n\t"
689         "ldr	r8, [%[b], #28]\n\t"
690         "umull	r6, r8, r6, r8\n\t"
691         "adds	r3, r3, r6\n\t"
692         "adcs 	r4, r4, r8\n\t"
693         "adc	r5, r5, #0\n\t"
694         /* A[6] * B[6] */
695         "ldr	r6, [%[a], #24]\n\t"
696         "ldr	r8, [%[b], #24]\n\t"
697         "umull	r6, r8, r6, r8\n\t"
698         "adds	r3, r3, r6\n\t"
699         "adcs 	r4, r4, r8\n\t"
700         "adc	r5, r5, #0\n\t"
701         /* A[7] * B[5] */
702         "ldr	r6, [%[a], #28]\n\t"
703         "ldr	r8, [%[b], #20]\n\t"
704         "umull	r6, r8, r6, r8\n\t"
705         "adds	r3, r3, r6\n\t"
706         "adcs 	r4, r4, r8\n\t"
707         "adc	r5, r5, #0\n\t"
708         "str	r3, [%[r], #48]\n\t"
709         "mov	r3, #0\n\t"
710         /* A[6] * B[7] */
711         "ldr	r6, [%[a], #24]\n\t"
712         "ldr	r8, [%[b], #28]\n\t"
713         "umull	r6, r8, r6, r8\n\t"
714         "adds	r4, r4, r6\n\t"
715         "adcs 	r5, r5, r8\n\t"
716         "adc	r3, r3, #0\n\t"
717         /* A[7] * B[6] */
718         "ldr	r6, [%[a], #28]\n\t"
719         "ldr	r8, [%[b], #24]\n\t"
720         "umull	r6, r8, r6, r8\n\t"
721         "adds	r4, r4, r6\n\t"
722         "adcs 	r5, r5, r8\n\t"
723         "adc	r3, r3, #0\n\t"
724         "str	r4, [%[r], #52]\n\t"
725         "mov	r4, #0\n\t"
726         /* A[7] * B[7] */
727         "ldr	r6, [%[a], #28]\n\t"
728         "ldr	r8, [%[b], #28]\n\t"
729         "umull	r6, r8, r6, r8\n\t"
730         "adds	r5, r5, r6\n\t"
731         "adc	r3, r3, r8\n\t"
732         "str	r5, [%[r], #56]\n\t"
733         "str	r3, [%[r], #60]\n\t"
734         /* Transfer tmp to r */
735         "ldr	r3, [%[tmp], #0]\n\t"
736         "ldr	r4, [%[tmp], #4]\n\t"
737         "ldr	r5, [%[tmp], #8]\n\t"
738         "ldr	r6, [%[tmp], #12]\n\t"
739         "str	r3, [%[r], #0]\n\t"
740         "str	r4, [%[r], #4]\n\t"
741         "str	r5, [%[r], #8]\n\t"
742         "str	r6, [%[r], #12]\n\t"
743         "ldr	r3, [%[tmp], #16]\n\t"
744         "ldr	r4, [%[tmp], #20]\n\t"
745         "ldr	r5, [%[tmp], #24]\n\t"
746         "ldr	r6, [%[tmp], #28]\n\t"
747         "str	r3, [%[r], #16]\n\t"
748         "str	r4, [%[r], #20]\n\t"
749         "str	r5, [%[r], #24]\n\t"
750         "str	r6, [%[r], #28]\n\t"
751         :
752         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
753         : "memory", "r3", "r4", "r5", "r6", "r8"
754     );
755 }
756 
757 /* Square a and put result in r. (r = a * a)
758  *
759  * r  A single precision integer.
760  * a  A single precision integer.
761  */
sp_2048_sqr_8(sp_digit * r,const sp_digit * a)762 SP_NOINLINE static void sp_2048_sqr_8(sp_digit* r, const sp_digit* a)
763 {
764     sp_digit tmp_arr[8];
765     sp_digit* tmp = tmp_arr;
766     __asm__ __volatile__ (
767         /* A[0] * A[0] */
768         "ldr	r6, [%[a], #0]\n\t"
769         "umull	r3, r4, r6, r6\n\t"
770         "mov	r5, #0\n\t"
771         "str	r3, [%[tmp], #0]\n\t"
772         "mov	r3, #0\n\t"
773         /* A[0] * A[1] */
774         "ldr	r8, [%[a], #4]\n\t"
775         "umull	r6, r8, r6, r8\n\t"
776         "adds	r4, r4, r6\n\t"
777         "adc	r5, r5, r8\n\t"
778         "adds	r4, r4, r6\n\t"
779         "adcs 	r5, r5, r8\n\t"
780         "adc	r3, r3, #0\n\t"
781         "str	r4, [%[tmp], #4]\n\t"
782         "mov	r4, #0\n\t"
783         /* A[0] * A[2] */
784         "ldr	r6, [%[a], #0]\n\t"
785         "ldr	r8, [%[a], #8]\n\t"
786         "umull	r6, r8, r6, r8\n\t"
787         "adds	r5, r5, r6\n\t"
788         "adc	r3, r3, r8\n\t"
789         "adds	r5, r5, r6\n\t"
790         "adcs 	r3, r3, r8\n\t"
791         "adc	r4, r4, #0\n\t"
792         /* A[1] * A[1] */
793         "ldr	r6, [%[a], #4]\n\t"
794         "umull	r6, r8, r6, r6\n\t"
795         "adds	r5, r5, r6\n\t"
796         "adcs	r3, r3, r8\n\t"
797         "adc	r4, r4, #0\n\t"
798         "str	r5, [%[tmp], #8]\n\t"
799         "mov	r5, #0\n\t"
800         /* A[0] * A[3] */
801         "ldr	r6, [%[a], #0]\n\t"
802         "ldr	r8, [%[a], #12]\n\t"
803         "umull	r9, r10, r6, r8\n\t"
804         "mov	r11, #0\n\t"
805         /* A[1] * A[2] */
806         "ldr	r6, [%[a], #4]\n\t"
807         "ldr	r8, [%[a], #8]\n\t"
808         "umull	r6, r8, r6, r8\n\t"
809         "adds	r9, r9, r6\n\t"
810         "adcs 	r10, r10, r8\n\t"
811         "adc	r11, r11, #0\n\t"
812         "adds	r9, r9, r9\n\t"
813         "adcs	r10, r10, r10\n\t"
814         "adc	r11, r11, r11\n\t"
815         "adds	r3, r3, r9\n\t"
816         "adcs	r4, r4, r10\n\t"
817         "adc	r5, r5, r11\n\t"
818         "str	r3, [%[tmp], #12]\n\t"
819         "mov	r3, #0\n\t"
820         /* A[0] * A[4] */
821         "ldr	r6, [%[a], #0]\n\t"
822         "ldr	r8, [%[a], #16]\n\t"
823         "umull	r9, r10, r6, r8\n\t"
824         "mov	r11, #0\n\t"
825         /* A[1] * A[3] */
826         "ldr	r6, [%[a], #4]\n\t"
827         "ldr	r8, [%[a], #12]\n\t"
828         "umull	r6, r8, r6, r8\n\t"
829         "adds	r9, r9, r6\n\t"
830         "adcs 	r10, r10, r8\n\t"
831         "adc	r11, r11, #0\n\t"
832         /* A[2] * A[2] */
833         "ldr	r6, [%[a], #8]\n\t"
834         "umull	r6, r8, r6, r6\n\t"
835         "adds	r4, r4, r6\n\t"
836         "adcs	r5, r5, r8\n\t"
837         "adc	r3, r3, #0\n\t"
838         "adds	r9, r9, r9\n\t"
839         "adcs	r10, r10, r10\n\t"
840         "adc	r11, r11, r11\n\t"
841         "adds	r4, r4, r9\n\t"
842         "adcs	r5, r5, r10\n\t"
843         "adc	r3, r3, r11\n\t"
844         "str	r4, [%[tmp], #16]\n\t"
845         "mov	r4, #0\n\t"
846         /* A[0] * A[5] */
847         "ldr	r6, [%[a], #0]\n\t"
848         "ldr	r8, [%[a], #20]\n\t"
849         "umull	r9, r10, r6, r8\n\t"
850         "mov	r11, #0\n\t"
851         /* A[1] * A[4] */
852         "ldr	r6, [%[a], #4]\n\t"
853         "ldr	r8, [%[a], #16]\n\t"
854         "umull	r6, r8, r6, r8\n\t"
855         "adds	r9, r9, r6\n\t"
856         "adcs 	r10, r10, r8\n\t"
857         "adc	r11, r11, #0\n\t"
858         /* A[2] * A[3] */
859         "ldr	r6, [%[a], #8]\n\t"
860         "ldr	r8, [%[a], #12]\n\t"
861         "umull	r6, r8, r6, r8\n\t"
862         "adds	r9, r9, r6\n\t"
863         "adcs 	r10, r10, r8\n\t"
864         "adc	r11, r11, #0\n\t"
865         "adds	r9, r9, r9\n\t"
866         "adcs	r10, r10, r10\n\t"
867         "adc	r11, r11, r11\n\t"
868         "adds	r5, r5, r9\n\t"
869         "adcs	r3, r3, r10\n\t"
870         "adc	r4, r4, r11\n\t"
871         "str	r5, [%[tmp], #20]\n\t"
872         "mov	r5, #0\n\t"
873         /* A[0] * A[6] */
874         "ldr	r6, [%[a], #0]\n\t"
875         "ldr	r8, [%[a], #24]\n\t"
876         "umull	r9, r10, r6, r8\n\t"
877         "mov	r11, #0\n\t"
878         /* A[1] * A[5] */
879         "ldr	r6, [%[a], #4]\n\t"
880         "ldr	r8, [%[a], #20]\n\t"
881         "umull	r6, r8, r6, r8\n\t"
882         "adds	r9, r9, r6\n\t"
883         "adcs 	r10, r10, r8\n\t"
884         "adc	r11, r11, #0\n\t"
885         /* A[2] * A[4] */
886         "ldr	r6, [%[a], #8]\n\t"
887         "ldr	r8, [%[a], #16]\n\t"
888         "umull	r6, r8, r6, r8\n\t"
889         "adds	r9, r9, r6\n\t"
890         "adcs 	r10, r10, r8\n\t"
891         "adc	r11, r11, #0\n\t"
892         /* A[3] * A[3] */
893         "ldr	r6, [%[a], #12]\n\t"
894         "umull	r6, r8, r6, r6\n\t"
895         "adds	r3, r3, r6\n\t"
896         "adcs	r4, r4, r8\n\t"
897         "adc	r5, r5, #0\n\t"
898         "adds	r9, r9, r9\n\t"
899         "adcs	r10, r10, r10\n\t"
900         "adc	r11, r11, r11\n\t"
901         "adds	r3, r3, r9\n\t"
902         "adcs	r4, r4, r10\n\t"
903         "adc	r5, r5, r11\n\t"
904         "str	r3, [%[tmp], #24]\n\t"
905         "mov	r3, #0\n\t"
906         /* A[0] * A[7] */
907         "ldr	r6, [%[a], #0]\n\t"
908         "ldr	r8, [%[a], #28]\n\t"
909         "umull	r9, r10, r6, r8\n\t"
910         "mov	r11, #0\n\t"
911         /* A[1] * A[6] */
912         "ldr	r6, [%[a], #4]\n\t"
913         "ldr	r8, [%[a], #24]\n\t"
914         "umull	r6, r8, r6, r8\n\t"
915         "adds	r9, r9, r6\n\t"
916         "adcs 	r10, r10, r8\n\t"
917         "adc	r11, r11, #0\n\t"
918         /* A[2] * A[5] */
919         "ldr	r6, [%[a], #8]\n\t"
920         "ldr	r8, [%[a], #20]\n\t"
921         "umull	r6, r8, r6, r8\n\t"
922         "adds	r9, r9, r6\n\t"
923         "adcs 	r10, r10, r8\n\t"
924         "adc	r11, r11, #0\n\t"
925         /* A[3] * A[4] */
926         "ldr	r6, [%[a], #12]\n\t"
927         "ldr	r8, [%[a], #16]\n\t"
928         "umull	r6, r8, r6, r8\n\t"
929         "adds	r9, r9, r6\n\t"
930         "adcs 	r10, r10, r8\n\t"
931         "adc	r11, r11, #0\n\t"
932         "adds	r9, r9, r9\n\t"
933         "adcs	r10, r10, r10\n\t"
934         "adc	r11, r11, r11\n\t"
935         "adds	r4, r4, r9\n\t"
936         "adcs	r5, r5, r10\n\t"
937         "adc	r3, r3, r11\n\t"
938         "str	r4, [%[tmp], #28]\n\t"
939         "mov	r4, #0\n\t"
940         /* A[1] * A[7] */
941         "ldr	r6, [%[a], #4]\n\t"
942         "ldr	r8, [%[a], #28]\n\t"
943         "umull	r9, r10, r6, r8\n\t"
944         "mov	r11, #0\n\t"
945         /* A[2] * A[6] */
946         "ldr	r6, [%[a], #8]\n\t"
947         "ldr	r8, [%[a], #24]\n\t"
948         "umull	r6, r8, r6, r8\n\t"
949         "adds	r9, r9, r6\n\t"
950         "adcs 	r10, r10, r8\n\t"
951         "adc	r11, r11, #0\n\t"
952         /* A[3] * A[5] */
953         "ldr	r6, [%[a], #12]\n\t"
954         "ldr	r8, [%[a], #20]\n\t"
955         "umull	r6, r8, r6, r8\n\t"
956         "adds	r9, r9, r6\n\t"
957         "adcs 	r10, r10, r8\n\t"
958         "adc	r11, r11, #0\n\t"
959         /* A[4] * A[4] */
960         "ldr	r6, [%[a], #16]\n\t"
961         "umull	r6, r8, r6, r6\n\t"
962         "adds	r5, r5, r6\n\t"
963         "adcs	r3, r3, r8\n\t"
964         "adc	r4, r4, #0\n\t"
965         "adds	r9, r9, r9\n\t"
966         "adcs	r10, r10, r10\n\t"
967         "adc	r11, r11, r11\n\t"
968         "adds	r5, r5, r9\n\t"
969         "adcs	r3, r3, r10\n\t"
970         "adc	r4, r4, r11\n\t"
971         "str	r5, [%[r], #32]\n\t"
972         "mov	r5, #0\n\t"
973         /* A[2] * A[7] */
974         "ldr	r6, [%[a], #8]\n\t"
975         "ldr	r8, [%[a], #28]\n\t"
976         "umull	r9, r10, r6, r8\n\t"
977         "mov	r11, #0\n\t"
978         /* A[3] * A[6] */
979         "ldr	r6, [%[a], #12]\n\t"
980         "ldr	r8, [%[a], #24]\n\t"
981         "umull	r6, r8, r6, r8\n\t"
982         "adds	r9, r9, r6\n\t"
983         "adcs 	r10, r10, r8\n\t"
984         "adc	r11, r11, #0\n\t"
985         /* A[4] * A[5] */
986         "ldr	r6, [%[a], #16]\n\t"
987         "ldr	r8, [%[a], #20]\n\t"
988         "umull	r6, r8, r6, r8\n\t"
989         "adds	r9, r9, r6\n\t"
990         "adcs 	r10, r10, r8\n\t"
991         "adc	r11, r11, #0\n\t"
992         "adds	r9, r9, r9\n\t"
993         "adcs	r10, r10, r10\n\t"
994         "adc	r11, r11, r11\n\t"
995         "adds	r3, r3, r9\n\t"
996         "adcs	r4, r4, r10\n\t"
997         "adc	r5, r5, r11\n\t"
998         "str	r3, [%[r], #36]\n\t"
999         "mov	r3, #0\n\t"
1000         /* A[3] * A[7] */
1001         "ldr	r6, [%[a], #12]\n\t"
1002         "ldr	r8, [%[a], #28]\n\t"
1003         "umull	r9, r10, r6, r8\n\t"
1004         "mov	r11, #0\n\t"
1005         /* A[4] * A[6] */
1006         "ldr	r6, [%[a], #16]\n\t"
1007         "ldr	r8, [%[a], #24]\n\t"
1008         "umull	r6, r8, r6, r8\n\t"
1009         "adds	r9, r9, r6\n\t"
1010         "adcs 	r10, r10, r8\n\t"
1011         "adc	r11, r11, #0\n\t"
1012         /* A[5] * A[5] */
1013         "ldr	r6, [%[a], #20]\n\t"
1014         "umull	r6, r8, r6, r6\n\t"
1015         "adds	r4, r4, r6\n\t"
1016         "adcs	r5, r5, r8\n\t"
1017         "adc	r3, r3, #0\n\t"
1018         "adds	r9, r9, r9\n\t"
1019         "adcs	r10, r10, r10\n\t"
1020         "adc	r11, r11, r11\n\t"
1021         "adds	r4, r4, r9\n\t"
1022         "adcs	r5, r5, r10\n\t"
1023         "adc	r3, r3, r11\n\t"
1024         "str	r4, [%[r], #40]\n\t"
1025         "mov	r4, #0\n\t"
1026         /* A[4] * A[7] */
1027         "ldr	r6, [%[a], #16]\n\t"
1028         "ldr	r8, [%[a], #28]\n\t"
1029         "umull	r6, r8, r6, r8\n\t"
1030         "adds	r5, r5, r6\n\t"
1031         "adcs 	r3, r3, r8\n\t"
1032         "adc	r4, r4, #0\n\t"
1033         "adds	r5, r5, r6\n\t"
1034         "adcs 	r3, r3, r8\n\t"
1035         "adc	r4, r4, #0\n\t"
1036         /* A[5] * A[6] */
1037         "ldr	r6, [%[a], #20]\n\t"
1038         "ldr	r8, [%[a], #24]\n\t"
1039         "umull	r6, r8, r6, r8\n\t"
1040         "adds	r5, r5, r6\n\t"
1041         "adcs 	r3, r3, r8\n\t"
1042         "adc	r4, r4, #0\n\t"
1043         "adds	r5, r5, r6\n\t"
1044         "adcs 	r3, r3, r8\n\t"
1045         "adc	r4, r4, #0\n\t"
1046         "str	r5, [%[r], #44]\n\t"
1047         "mov	r5, #0\n\t"
1048         /* A[5] * A[7] */
1049         "ldr	r6, [%[a], #20]\n\t"
1050         "ldr	r8, [%[a], #28]\n\t"
1051         "umull	r6, r8, r6, r8\n\t"
1052         "adds	r3, r3, r6\n\t"
1053         "adcs 	r4, r4, r8\n\t"
1054         "adc	r5, r5, #0\n\t"
1055         "adds	r3, r3, r6\n\t"
1056         "adcs 	r4, r4, r8\n\t"
1057         "adc	r5, r5, #0\n\t"
1058         /* A[6] * A[6] */
1059         "ldr	r6, [%[a], #24]\n\t"
1060         "umull	r6, r8, r6, r6\n\t"
1061         "adds	r3, r3, r6\n\t"
1062         "adcs	r4, r4, r8\n\t"
1063         "adc	r5, r5, #0\n\t"
1064         "str	r3, [%[r], #48]\n\t"
1065         "mov	r3, #0\n\t"
1066         /* A[6] * A[7] */
1067         "ldr	r6, [%[a], #24]\n\t"
1068         "ldr	r8, [%[a], #28]\n\t"
1069         "umull	r6, r8, r6, r8\n\t"
1070         "adds	r4, r4, r6\n\t"
1071         "adcs 	r5, r5, r8\n\t"
1072         "adc	r3, r3, #0\n\t"
1073         "adds	r4, r4, r6\n\t"
1074         "adcs 	r5, r5, r8\n\t"
1075         "adc	r3, r3, #0\n\t"
1076         "str	r4, [%[r], #52]\n\t"
1077         "mov	r4, #0\n\t"
1078         /* A[7] * A[7] */
1079         "ldr	r6, [%[a], #28]\n\t"
1080         "umull	r6, r8, r6, r6\n\t"
1081         "adds	r5, r5, r6\n\t"
1082         "adc	r3, r3, r8\n\t"
1083         "str	r5, [%[r], #56]\n\t"
1084         "str	r3, [%[r], #60]\n\t"
1085         /* Transfer tmp to r */
1086         "ldr	r3, [%[tmp], #0]\n\t"
1087         "ldr	r4, [%[tmp], #4]\n\t"
1088         "ldr	r5, [%[tmp], #8]\n\t"
1089         "ldr	r6, [%[tmp], #12]\n\t"
1090         "str	r3, [%[r], #0]\n\t"
1091         "str	r4, [%[r], #4]\n\t"
1092         "str	r5, [%[r], #8]\n\t"
1093         "str	r6, [%[r], #12]\n\t"
1094         "ldr	r3, [%[tmp], #16]\n\t"
1095         "ldr	r4, [%[tmp], #20]\n\t"
1096         "ldr	r5, [%[tmp], #24]\n\t"
1097         "ldr	r6, [%[tmp], #28]\n\t"
1098         "str	r3, [%[r], #16]\n\t"
1099         "str	r4, [%[r], #20]\n\t"
1100         "str	r5, [%[r], #24]\n\t"
1101         "str	r6, [%[r], #28]\n\t"
1102         :
1103         : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
1104         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11"
1105     );
1106 }
1107 
1108 /* Add b to a into r. (r = a + b)
1109  *
1110  * r  A single precision integer.
1111  * a  A single precision integer.
1112  * b  A single precision integer.
1113  */
sp_2048_add_8(sp_digit * r,const sp_digit * a,const sp_digit * b)1114 SP_NOINLINE static sp_digit sp_2048_add_8(sp_digit* r, const sp_digit* a,
1115         const sp_digit* b)
1116 {
1117     sp_digit c = 0;
1118 
1119     __asm__ __volatile__ (
1120         "ldm	%[a]!, {r4, r5}\n\t"
1121         "ldm	%[b]!, {r6, r8}\n\t"
1122         "adds	r4, r4, r6\n\t"
1123         "adcs	r5, r5, r8\n\t"
1124         "stm	%[r]!, {r4, r5}\n\t"
1125         "ldm	%[a]!, {r4, r5}\n\t"
1126         "ldm	%[b]!, {r6, r8}\n\t"
1127         "adcs	r4, r4, r6\n\t"
1128         "adcs	r5, r5, r8\n\t"
1129         "stm	%[r]!, {r4, r5}\n\t"
1130         "ldm	%[a]!, {r4, r5}\n\t"
1131         "ldm	%[b]!, {r6, r8}\n\t"
1132         "adcs	r4, r4, r6\n\t"
1133         "adcs	r5, r5, r8\n\t"
1134         "stm	%[r]!, {r4, r5}\n\t"
1135         "ldm	%[a]!, {r4, r5}\n\t"
1136         "ldm	%[b]!, {r6, r8}\n\t"
1137         "adcs	r4, r4, r6\n\t"
1138         "adcs	r5, r5, r8\n\t"
1139         "stm	%[r]!, {r4, r5}\n\t"
1140         "mov	%[c], #0\n\t"
1141         "adc	%[c], %[c], %[c]\n\t"
1142         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
1143         :
1144         : "memory", "r4", "r5", "r6", "r8"
1145     );
1146 
1147     return c;
1148 }
1149 
1150 /* Sub b from a into r. (r = a - b)
1151  *
1152  * r  A single precision integer.
1153  * a  A single precision integer.
1154  * b  A single precision integer.
1155  */
sp_2048_sub_in_place_16(sp_digit * a,const sp_digit * b)1156 SP_NOINLINE static sp_digit sp_2048_sub_in_place_16(sp_digit* a,
1157         const sp_digit* b)
1158 {
1159     sp_digit c = 0;
1160 
1161     __asm__ __volatile__ (
1162         "ldm	%[a], {r3, r4}\n\t"
1163         "ldm	%[b]!, {r5, r6}\n\t"
1164         "subs	r3, r3, r5\n\t"
1165         "sbcs	r4, r4, r6\n\t"
1166         "stm	%[a]!, {r3, r4}\n\t"
1167         "ldm	%[a], {r3, r4}\n\t"
1168         "ldm	%[b]!, {r5, r6}\n\t"
1169         "sbcs	r3, r3, r5\n\t"
1170         "sbcs	r4, r4, r6\n\t"
1171         "stm	%[a]!, {r3, r4}\n\t"
1172         "ldm	%[a], {r3, r4}\n\t"
1173         "ldm	%[b]!, {r5, r6}\n\t"
1174         "sbcs	r3, r3, r5\n\t"
1175         "sbcs	r4, r4, r6\n\t"
1176         "stm	%[a]!, {r3, r4}\n\t"
1177         "ldm	%[a], {r3, r4}\n\t"
1178         "ldm	%[b]!, {r5, r6}\n\t"
1179         "sbcs	r3, r3, r5\n\t"
1180         "sbcs	r4, r4, r6\n\t"
1181         "stm	%[a]!, {r3, r4}\n\t"
1182         "ldm	%[a], {r3, r4}\n\t"
1183         "ldm	%[b]!, {r5, r6}\n\t"
1184         "sbcs	r3, r3, r5\n\t"
1185         "sbcs	r4, r4, r6\n\t"
1186         "stm	%[a]!, {r3, r4}\n\t"
1187         "ldm	%[a], {r3, r4}\n\t"
1188         "ldm	%[b]!, {r5, r6}\n\t"
1189         "sbcs	r3, r3, r5\n\t"
1190         "sbcs	r4, r4, r6\n\t"
1191         "stm	%[a]!, {r3, r4}\n\t"
1192         "ldm	%[a], {r3, r4}\n\t"
1193         "ldm	%[b]!, {r5, r6}\n\t"
1194         "sbcs	r3, r3, r5\n\t"
1195         "sbcs	r4, r4, r6\n\t"
1196         "stm	%[a]!, {r3, r4}\n\t"
1197         "ldm	%[a], {r3, r4}\n\t"
1198         "ldm	%[b]!, {r5, r6}\n\t"
1199         "sbcs	r3, r3, r5\n\t"
1200         "sbcs	r4, r4, r6\n\t"
1201         "stm	%[a]!, {r3, r4}\n\t"
1202         "sbc	%[c], %[c], %[c]\n\t"
1203         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
1204         :
1205         : "memory", "r3", "r4", "r5", "r6"
1206     );
1207 
1208     return c;
1209 }
1210 
1211 /* Add b to a into r. (r = a + b)
1212  *
1213  * r  A single precision integer.
1214  * a  A single precision integer.
1215  * b  A single precision integer.
1216  */
sp_2048_add_16(sp_digit * r,const sp_digit * a,const sp_digit * b)1217 SP_NOINLINE static sp_digit sp_2048_add_16(sp_digit* r, const sp_digit* a,
1218         const sp_digit* b)
1219 {
1220     sp_digit c = 0;
1221 
1222     __asm__ __volatile__ (
1223         "ldm	%[a]!, {r4, r5}\n\t"
1224         "ldm	%[b]!, {r6, r8}\n\t"
1225         "adds	r4, r4, r6\n\t"
1226         "adcs	r5, r5, r8\n\t"
1227         "stm	%[r]!, {r4, r5}\n\t"
1228         "ldm	%[a]!, {r4, r5}\n\t"
1229         "ldm	%[b]!, {r6, r8}\n\t"
1230         "adcs	r4, r4, r6\n\t"
1231         "adcs	r5, r5, r8\n\t"
1232         "stm	%[r]!, {r4, r5}\n\t"
1233         "ldm	%[a]!, {r4, r5}\n\t"
1234         "ldm	%[b]!, {r6, r8}\n\t"
1235         "adcs	r4, r4, r6\n\t"
1236         "adcs	r5, r5, r8\n\t"
1237         "stm	%[r]!, {r4, r5}\n\t"
1238         "ldm	%[a]!, {r4, r5}\n\t"
1239         "ldm	%[b]!, {r6, r8}\n\t"
1240         "adcs	r4, r4, r6\n\t"
1241         "adcs	r5, r5, r8\n\t"
1242         "stm	%[r]!, {r4, r5}\n\t"
1243         "ldm	%[a]!, {r4, r5}\n\t"
1244         "ldm	%[b]!, {r6, r8}\n\t"
1245         "adcs	r4, r4, r6\n\t"
1246         "adcs	r5, r5, r8\n\t"
1247         "stm	%[r]!, {r4, r5}\n\t"
1248         "ldm	%[a]!, {r4, r5}\n\t"
1249         "ldm	%[b]!, {r6, r8}\n\t"
1250         "adcs	r4, r4, r6\n\t"
1251         "adcs	r5, r5, r8\n\t"
1252         "stm	%[r]!, {r4, r5}\n\t"
1253         "ldm	%[a]!, {r4, r5}\n\t"
1254         "ldm	%[b]!, {r6, r8}\n\t"
1255         "adcs	r4, r4, r6\n\t"
1256         "adcs	r5, r5, r8\n\t"
1257         "stm	%[r]!, {r4, r5}\n\t"
1258         "ldm	%[a]!, {r4, r5}\n\t"
1259         "ldm	%[b]!, {r6, r8}\n\t"
1260         "adcs	r4, r4, r6\n\t"
1261         "adcs	r5, r5, r8\n\t"
1262         "stm	%[r]!, {r4, r5}\n\t"
1263         "mov	%[c], #0\n\t"
1264         "adc	%[c], %[c], %[c]\n\t"
1265         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
1266         :
1267         : "memory", "r4", "r5", "r6", "r8"
1268     );
1269 
1270     return c;
1271 }
1272 
1273 /* AND m into each word of a and store in r.
1274  *
1275  * r  A single precision integer.
1276  * a  A single precision integer.
1277  * m  Mask to AND against each digit.
1278  */
sp_2048_mask_8(sp_digit * r,const sp_digit * a,sp_digit m)1279 static void sp_2048_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
1280 {
1281 #ifdef WOLFSSL_SP_SMALL
1282     int i;
1283 
1284     for (i=0; i<8; i++) {
1285         r[i] = a[i] & m;
1286     }
1287 #else
1288     r[0] = a[0] & m;
1289     r[1] = a[1] & m;
1290     r[2] = a[2] & m;
1291     r[3] = a[3] & m;
1292     r[4] = a[4] & m;
1293     r[5] = a[5] & m;
1294     r[6] = a[6] & m;
1295     r[7] = a[7] & m;
1296 #endif
1297 }
1298 
1299 /* Multiply a and b into r. (r = a * b)
1300  *
1301  * r  A single precision integer.
1302  * a  A single precision integer.
1303  * b  A single precision integer.
1304  */
sp_2048_mul_16(sp_digit * r,const sp_digit * a,const sp_digit * b)1305 SP_NOINLINE static void sp_2048_mul_16(sp_digit* r, const sp_digit* a,
1306         const sp_digit* b)
1307 {
1308     sp_digit* z0 = r;
1309     sp_digit z1[16];
1310     sp_digit a1[8];
1311     sp_digit b1[8];
1312     sp_digit z2[16];
1313     sp_digit u;
1314     sp_digit ca;
1315     sp_digit cb;
1316 
1317     ca = sp_2048_add_8(a1, a, &a[8]);
1318     cb = sp_2048_add_8(b1, b, &b[8]);
1319     u  = ca & cb;
1320     sp_2048_mul_8(z1, a1, b1);
1321     sp_2048_mul_8(z2, &a[8], &b[8]);
1322     sp_2048_mul_8(z0, a, b);
1323     sp_2048_mask_8(r + 16, a1, 0 - cb);
1324     sp_2048_mask_8(b1, b1, 0 - ca);
1325     u += sp_2048_add_8(r + 16, r + 16, b1);
1326     u += sp_2048_sub_in_place_16(z1, z2);
1327     u += sp_2048_sub_in_place_16(z1, z0);
1328     u += sp_2048_add_16(r + 8, r + 8, z1);
1329     r[24] = u;
1330     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
1331     (void)sp_2048_add_16(r + 16, r + 16, z2);
1332 }
1333 
1334 /* Square a and put result in r. (r = a * a)
1335  *
1336  * r  A single precision integer.
1337  * a  A single precision integer.
1338  */
sp_2048_sqr_16(sp_digit * r,const sp_digit * a)1339 SP_NOINLINE static void sp_2048_sqr_16(sp_digit* r, const sp_digit* a)
1340 {
1341     sp_digit* z0 = r;
1342     sp_digit z2[16];
1343     sp_digit z1[16];
1344     sp_digit a1[8];
1345     sp_digit u;
1346 
1347     u = sp_2048_add_8(a1, a, &a[8]);
1348     sp_2048_sqr_8(z1, a1);
1349     sp_2048_sqr_8(z2, &a[8]);
1350     sp_2048_sqr_8(z0, a);
1351     sp_2048_mask_8(r + 16, a1, 0 - u);
1352     u += sp_2048_add_8(r + 16, r + 16, r + 16);
1353     u += sp_2048_sub_in_place_16(z1, z2);
1354     u += sp_2048_sub_in_place_16(z1, z0);
1355     u += sp_2048_add_16(r + 8, r + 8, z1);
1356     r[24] = u;
1357     XMEMSET(r + 24 + 1, 0, sizeof(sp_digit) * (8 - 1));
1358     (void)sp_2048_add_16(r + 16, r + 16, z2);
1359 }
1360 
1361 /* Sub b from a into r. (r = a - b)
1362  *
1363  * r  A single precision integer.
1364  * a  A single precision integer.
1365  * b  A single precision integer.
1366  */
sp_2048_sub_in_place_32(sp_digit * a,const sp_digit * b)1367 SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
1368         const sp_digit* b)
1369 {
1370     sp_digit c = 0;
1371 
1372     __asm__ __volatile__ (
1373         "ldm	%[a], {r3, r4}\n\t"
1374         "ldm	%[b]!, {r5, r6}\n\t"
1375         "subs	r3, r3, r5\n\t"
1376         "sbcs	r4, r4, r6\n\t"
1377         "stm	%[a]!, {r3, r4}\n\t"
1378         "ldm	%[a], {r3, r4}\n\t"
1379         "ldm	%[b]!, {r5, r6}\n\t"
1380         "sbcs	r3, r3, r5\n\t"
1381         "sbcs	r4, r4, r6\n\t"
1382         "stm	%[a]!, {r3, r4}\n\t"
1383         "ldm	%[a], {r3, r4}\n\t"
1384         "ldm	%[b]!, {r5, r6}\n\t"
1385         "sbcs	r3, r3, r5\n\t"
1386         "sbcs	r4, r4, r6\n\t"
1387         "stm	%[a]!, {r3, r4}\n\t"
1388         "ldm	%[a], {r3, r4}\n\t"
1389         "ldm	%[b]!, {r5, r6}\n\t"
1390         "sbcs	r3, r3, r5\n\t"
1391         "sbcs	r4, r4, r6\n\t"
1392         "stm	%[a]!, {r3, r4}\n\t"
1393         "ldm	%[a], {r3, r4}\n\t"
1394         "ldm	%[b]!, {r5, r6}\n\t"
1395         "sbcs	r3, r3, r5\n\t"
1396         "sbcs	r4, r4, r6\n\t"
1397         "stm	%[a]!, {r3, r4}\n\t"
1398         "ldm	%[a], {r3, r4}\n\t"
1399         "ldm	%[b]!, {r5, r6}\n\t"
1400         "sbcs	r3, r3, r5\n\t"
1401         "sbcs	r4, r4, r6\n\t"
1402         "stm	%[a]!, {r3, r4}\n\t"
1403         "ldm	%[a], {r3, r4}\n\t"
1404         "ldm	%[b]!, {r5, r6}\n\t"
1405         "sbcs	r3, r3, r5\n\t"
1406         "sbcs	r4, r4, r6\n\t"
1407         "stm	%[a]!, {r3, r4}\n\t"
1408         "ldm	%[a], {r3, r4}\n\t"
1409         "ldm	%[b]!, {r5, r6}\n\t"
1410         "sbcs	r3, r3, r5\n\t"
1411         "sbcs	r4, r4, r6\n\t"
1412         "stm	%[a]!, {r3, r4}\n\t"
1413         "ldm	%[a], {r3, r4}\n\t"
1414         "ldm	%[b]!, {r5, r6}\n\t"
1415         "sbcs	r3, r3, r5\n\t"
1416         "sbcs	r4, r4, r6\n\t"
1417         "stm	%[a]!, {r3, r4}\n\t"
1418         "ldm	%[a], {r3, r4}\n\t"
1419         "ldm	%[b]!, {r5, r6}\n\t"
1420         "sbcs	r3, r3, r5\n\t"
1421         "sbcs	r4, r4, r6\n\t"
1422         "stm	%[a]!, {r3, r4}\n\t"
1423         "ldm	%[a], {r3, r4}\n\t"
1424         "ldm	%[b]!, {r5, r6}\n\t"
1425         "sbcs	r3, r3, r5\n\t"
1426         "sbcs	r4, r4, r6\n\t"
1427         "stm	%[a]!, {r3, r4}\n\t"
1428         "ldm	%[a], {r3, r4}\n\t"
1429         "ldm	%[b]!, {r5, r6}\n\t"
1430         "sbcs	r3, r3, r5\n\t"
1431         "sbcs	r4, r4, r6\n\t"
1432         "stm	%[a]!, {r3, r4}\n\t"
1433         "ldm	%[a], {r3, r4}\n\t"
1434         "ldm	%[b]!, {r5, r6}\n\t"
1435         "sbcs	r3, r3, r5\n\t"
1436         "sbcs	r4, r4, r6\n\t"
1437         "stm	%[a]!, {r3, r4}\n\t"
1438         "ldm	%[a], {r3, r4}\n\t"
1439         "ldm	%[b]!, {r5, r6}\n\t"
1440         "sbcs	r3, r3, r5\n\t"
1441         "sbcs	r4, r4, r6\n\t"
1442         "stm	%[a]!, {r3, r4}\n\t"
1443         "ldm	%[a], {r3, r4}\n\t"
1444         "ldm	%[b]!, {r5, r6}\n\t"
1445         "sbcs	r3, r3, r5\n\t"
1446         "sbcs	r4, r4, r6\n\t"
1447         "stm	%[a]!, {r3, r4}\n\t"
1448         "ldm	%[a], {r3, r4}\n\t"
1449         "ldm	%[b]!, {r5, r6}\n\t"
1450         "sbcs	r3, r3, r5\n\t"
1451         "sbcs	r4, r4, r6\n\t"
1452         "stm	%[a]!, {r3, r4}\n\t"
1453         "sbc	%[c], %[c], %[c]\n\t"
1454         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
1455         :
1456         : "memory", "r3", "r4", "r5", "r6"
1457     );
1458 
1459     return c;
1460 }
1461 
1462 /* Add b to a into r. (r = a + b)
1463  *
1464  * r  A single precision integer.
1465  * a  A single precision integer.
1466  * b  A single precision integer.
1467  */
sp_2048_add_32(sp_digit * r,const sp_digit * a,const sp_digit * b)1468 SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
1469         const sp_digit* b)
1470 {
1471     sp_digit c = 0;
1472 
1473     __asm__ __volatile__ (
1474         "ldm	%[a]!, {r4, r5}\n\t"
1475         "ldm	%[b]!, {r6, r8}\n\t"
1476         "adds	r4, r4, r6\n\t"
1477         "adcs	r5, r5, r8\n\t"
1478         "stm	%[r]!, {r4, r5}\n\t"
1479         "ldm	%[a]!, {r4, r5}\n\t"
1480         "ldm	%[b]!, {r6, r8}\n\t"
1481         "adcs	r4, r4, r6\n\t"
1482         "adcs	r5, r5, r8\n\t"
1483         "stm	%[r]!, {r4, r5}\n\t"
1484         "ldm	%[a]!, {r4, r5}\n\t"
1485         "ldm	%[b]!, {r6, r8}\n\t"
1486         "adcs	r4, r4, r6\n\t"
1487         "adcs	r5, r5, r8\n\t"
1488         "stm	%[r]!, {r4, r5}\n\t"
1489         "ldm	%[a]!, {r4, r5}\n\t"
1490         "ldm	%[b]!, {r6, r8}\n\t"
1491         "adcs	r4, r4, r6\n\t"
1492         "adcs	r5, r5, r8\n\t"
1493         "stm	%[r]!, {r4, r5}\n\t"
1494         "ldm	%[a]!, {r4, r5}\n\t"
1495         "ldm	%[b]!, {r6, r8}\n\t"
1496         "adcs	r4, r4, r6\n\t"
1497         "adcs	r5, r5, r8\n\t"
1498         "stm	%[r]!, {r4, r5}\n\t"
1499         "ldm	%[a]!, {r4, r5}\n\t"
1500         "ldm	%[b]!, {r6, r8}\n\t"
1501         "adcs	r4, r4, r6\n\t"
1502         "adcs	r5, r5, r8\n\t"
1503         "stm	%[r]!, {r4, r5}\n\t"
1504         "ldm	%[a]!, {r4, r5}\n\t"
1505         "ldm	%[b]!, {r6, r8}\n\t"
1506         "adcs	r4, r4, r6\n\t"
1507         "adcs	r5, r5, r8\n\t"
1508         "stm	%[r]!, {r4, r5}\n\t"
1509         "ldm	%[a]!, {r4, r5}\n\t"
1510         "ldm	%[b]!, {r6, r8}\n\t"
1511         "adcs	r4, r4, r6\n\t"
1512         "adcs	r5, r5, r8\n\t"
1513         "stm	%[r]!, {r4, r5}\n\t"
1514         "ldm	%[a]!, {r4, r5}\n\t"
1515         "ldm	%[b]!, {r6, r8}\n\t"
1516         "adcs	r4, r4, r6\n\t"
1517         "adcs	r5, r5, r8\n\t"
1518         "stm	%[r]!, {r4, r5}\n\t"
1519         "ldm	%[a]!, {r4, r5}\n\t"
1520         "ldm	%[b]!, {r6, r8}\n\t"
1521         "adcs	r4, r4, r6\n\t"
1522         "adcs	r5, r5, r8\n\t"
1523         "stm	%[r]!, {r4, r5}\n\t"
1524         "ldm	%[a]!, {r4, r5}\n\t"
1525         "ldm	%[b]!, {r6, r8}\n\t"
1526         "adcs	r4, r4, r6\n\t"
1527         "adcs	r5, r5, r8\n\t"
1528         "stm	%[r]!, {r4, r5}\n\t"
1529         "ldm	%[a]!, {r4, r5}\n\t"
1530         "ldm	%[b]!, {r6, r8}\n\t"
1531         "adcs	r4, r4, r6\n\t"
1532         "adcs	r5, r5, r8\n\t"
1533         "stm	%[r]!, {r4, r5}\n\t"
1534         "ldm	%[a]!, {r4, r5}\n\t"
1535         "ldm	%[b]!, {r6, r8}\n\t"
1536         "adcs	r4, r4, r6\n\t"
1537         "adcs	r5, r5, r8\n\t"
1538         "stm	%[r]!, {r4, r5}\n\t"
1539         "ldm	%[a]!, {r4, r5}\n\t"
1540         "ldm	%[b]!, {r6, r8}\n\t"
1541         "adcs	r4, r4, r6\n\t"
1542         "adcs	r5, r5, r8\n\t"
1543         "stm	%[r]!, {r4, r5}\n\t"
1544         "ldm	%[a]!, {r4, r5}\n\t"
1545         "ldm	%[b]!, {r6, r8}\n\t"
1546         "adcs	r4, r4, r6\n\t"
1547         "adcs	r5, r5, r8\n\t"
1548         "stm	%[r]!, {r4, r5}\n\t"
1549         "ldm	%[a]!, {r4, r5}\n\t"
1550         "ldm	%[b]!, {r6, r8}\n\t"
1551         "adcs	r4, r4, r6\n\t"
1552         "adcs	r5, r5, r8\n\t"
1553         "stm	%[r]!, {r4, r5}\n\t"
1554         "mov	%[c], #0\n\t"
1555         "adc	%[c], %[c], %[c]\n\t"
1556         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
1557         :
1558         : "memory", "r4", "r5", "r6", "r8"
1559     );
1560 
1561     return c;
1562 }
1563 
1564 /* AND m into each word of a and store in r.
1565  *
1566  * r  A single precision integer.
1567  * a  A single precision integer.
1568  * m  Mask to AND against each digit.
1569  */
sp_2048_mask_16(sp_digit * r,const sp_digit * a,sp_digit m)1570 static void sp_2048_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
1571 {
1572 #ifdef WOLFSSL_SP_SMALL
1573     int i;
1574 
1575     for (i=0; i<16; i++) {
1576         r[i] = a[i] & m;
1577     }
1578 #else
1579     int i;
1580 
1581     for (i = 0; i < 16; i += 8) {
1582         r[i+0] = a[i+0] & m;
1583         r[i+1] = a[i+1] & m;
1584         r[i+2] = a[i+2] & m;
1585         r[i+3] = a[i+3] & m;
1586         r[i+4] = a[i+4] & m;
1587         r[i+5] = a[i+5] & m;
1588         r[i+6] = a[i+6] & m;
1589         r[i+7] = a[i+7] & m;
1590     }
1591 #endif
1592 }
1593 
1594 /* Multiply a and b into r. (r = a * b)
1595  *
1596  * r  A single precision integer.
1597  * a  A single precision integer.
1598  * b  A single precision integer.
1599  */
sp_2048_mul_32(sp_digit * r,const sp_digit * a,const sp_digit * b)1600 SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
1601         const sp_digit* b)
1602 {
1603     sp_digit* z0 = r;
1604     sp_digit z1[32];
1605     sp_digit a1[16];
1606     sp_digit b1[16];
1607     sp_digit z2[32];
1608     sp_digit u;
1609     sp_digit ca;
1610     sp_digit cb;
1611 
1612     ca = sp_2048_add_16(a1, a, &a[16]);
1613     cb = sp_2048_add_16(b1, b, &b[16]);
1614     u  = ca & cb;
1615     sp_2048_mul_16(z1, a1, b1);
1616     sp_2048_mul_16(z2, &a[16], &b[16]);
1617     sp_2048_mul_16(z0, a, b);
1618     sp_2048_mask_16(r + 32, a1, 0 - cb);
1619     sp_2048_mask_16(b1, b1, 0 - ca);
1620     u += sp_2048_add_16(r + 32, r + 32, b1);
1621     u += sp_2048_sub_in_place_32(z1, z2);
1622     u += sp_2048_sub_in_place_32(z1, z0);
1623     u += sp_2048_add_32(r + 16, r + 16, z1);
1624     r[48] = u;
1625     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
1626     (void)sp_2048_add_32(r + 32, r + 32, z2);
1627 }
1628 
1629 /* Square a and put result in r. (r = a * a)
1630  *
1631  * r  A single precision integer.
1632  * a  A single precision integer.
1633  */
sp_2048_sqr_32(sp_digit * r,const sp_digit * a)1634 SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
1635 {
1636     sp_digit* z0 = r;
1637     sp_digit z2[32];
1638     sp_digit z1[32];
1639     sp_digit a1[16];
1640     sp_digit u;
1641 
1642     u = sp_2048_add_16(a1, a, &a[16]);
1643     sp_2048_sqr_16(z1, a1);
1644     sp_2048_sqr_16(z2, &a[16]);
1645     sp_2048_sqr_16(z0, a);
1646     sp_2048_mask_16(r + 32, a1, 0 - u);
1647     u += sp_2048_add_16(r + 32, r + 32, r + 32);
1648     u += sp_2048_sub_in_place_32(z1, z2);
1649     u += sp_2048_sub_in_place_32(z1, z0);
1650     u += sp_2048_add_32(r + 16, r + 16, z1);
1651     r[48] = u;
1652     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
1653     (void)sp_2048_add_32(r + 32, r + 32, z2);
1654 }
1655 
1656 /* Sub b from a into r. (r = a - b)
1657  *
1658  * r  A single precision integer.
1659  * a  A single precision integer.
1660  * b  A single precision integer.
1661  */
sp_2048_sub_in_place_64(sp_digit * a,const sp_digit * b)1662 SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
1663         const sp_digit* b)
1664 {
1665     sp_digit c = 0;
1666 
1667     __asm__ __volatile__ (
1668         "ldm	%[a], {r3, r4}\n\t"
1669         "ldm	%[b]!, {r5, r6}\n\t"
1670         "subs	r3, r3, r5\n\t"
1671         "sbcs	r4, r4, r6\n\t"
1672         "stm	%[a]!, {r3, r4}\n\t"
1673         "ldm	%[a], {r3, r4}\n\t"
1674         "ldm	%[b]!, {r5, r6}\n\t"
1675         "sbcs	r3, r3, r5\n\t"
1676         "sbcs	r4, r4, r6\n\t"
1677         "stm	%[a]!, {r3, r4}\n\t"
1678         "ldm	%[a], {r3, r4}\n\t"
1679         "ldm	%[b]!, {r5, r6}\n\t"
1680         "sbcs	r3, r3, r5\n\t"
1681         "sbcs	r4, r4, r6\n\t"
1682         "stm	%[a]!, {r3, r4}\n\t"
1683         "ldm	%[a], {r3, r4}\n\t"
1684         "ldm	%[b]!, {r5, r6}\n\t"
1685         "sbcs	r3, r3, r5\n\t"
1686         "sbcs	r4, r4, r6\n\t"
1687         "stm	%[a]!, {r3, r4}\n\t"
1688         "ldm	%[a], {r3, r4}\n\t"
1689         "ldm	%[b]!, {r5, r6}\n\t"
1690         "sbcs	r3, r3, r5\n\t"
1691         "sbcs	r4, r4, r6\n\t"
1692         "stm	%[a]!, {r3, r4}\n\t"
1693         "ldm	%[a], {r3, r4}\n\t"
1694         "ldm	%[b]!, {r5, r6}\n\t"
1695         "sbcs	r3, r3, r5\n\t"
1696         "sbcs	r4, r4, r6\n\t"
1697         "stm	%[a]!, {r3, r4}\n\t"
1698         "ldm	%[a], {r3, r4}\n\t"
1699         "ldm	%[b]!, {r5, r6}\n\t"
1700         "sbcs	r3, r3, r5\n\t"
1701         "sbcs	r4, r4, r6\n\t"
1702         "stm	%[a]!, {r3, r4}\n\t"
1703         "ldm	%[a], {r3, r4}\n\t"
1704         "ldm	%[b]!, {r5, r6}\n\t"
1705         "sbcs	r3, r3, r5\n\t"
1706         "sbcs	r4, r4, r6\n\t"
1707         "stm	%[a]!, {r3, r4}\n\t"
1708         "ldm	%[a], {r3, r4}\n\t"
1709         "ldm	%[b]!, {r5, r6}\n\t"
1710         "sbcs	r3, r3, r5\n\t"
1711         "sbcs	r4, r4, r6\n\t"
1712         "stm	%[a]!, {r3, r4}\n\t"
1713         "ldm	%[a], {r3, r4}\n\t"
1714         "ldm	%[b]!, {r5, r6}\n\t"
1715         "sbcs	r3, r3, r5\n\t"
1716         "sbcs	r4, r4, r6\n\t"
1717         "stm	%[a]!, {r3, r4}\n\t"
1718         "ldm	%[a], {r3, r4}\n\t"
1719         "ldm	%[b]!, {r5, r6}\n\t"
1720         "sbcs	r3, r3, r5\n\t"
1721         "sbcs	r4, r4, r6\n\t"
1722         "stm	%[a]!, {r3, r4}\n\t"
1723         "ldm	%[a], {r3, r4}\n\t"
1724         "ldm	%[b]!, {r5, r6}\n\t"
1725         "sbcs	r3, r3, r5\n\t"
1726         "sbcs	r4, r4, r6\n\t"
1727         "stm	%[a]!, {r3, r4}\n\t"
1728         "ldm	%[a], {r3, r4}\n\t"
1729         "ldm	%[b]!, {r5, r6}\n\t"
1730         "sbcs	r3, r3, r5\n\t"
1731         "sbcs	r4, r4, r6\n\t"
1732         "stm	%[a]!, {r3, r4}\n\t"
1733         "ldm	%[a], {r3, r4}\n\t"
1734         "ldm	%[b]!, {r5, r6}\n\t"
1735         "sbcs	r3, r3, r5\n\t"
1736         "sbcs	r4, r4, r6\n\t"
1737         "stm	%[a]!, {r3, r4}\n\t"
1738         "ldm	%[a], {r3, r4}\n\t"
1739         "ldm	%[b]!, {r5, r6}\n\t"
1740         "sbcs	r3, r3, r5\n\t"
1741         "sbcs	r4, r4, r6\n\t"
1742         "stm	%[a]!, {r3, r4}\n\t"
1743         "ldm	%[a], {r3, r4}\n\t"
1744         "ldm	%[b]!, {r5, r6}\n\t"
1745         "sbcs	r3, r3, r5\n\t"
1746         "sbcs	r4, r4, r6\n\t"
1747         "stm	%[a]!, {r3, r4}\n\t"
1748         "ldm	%[a], {r3, r4}\n\t"
1749         "ldm	%[b]!, {r5, r6}\n\t"
1750         "sbcs	r3, r3, r5\n\t"
1751         "sbcs	r4, r4, r6\n\t"
1752         "stm	%[a]!, {r3, r4}\n\t"
1753         "ldm	%[a], {r3, r4}\n\t"
1754         "ldm	%[b]!, {r5, r6}\n\t"
1755         "sbcs	r3, r3, r5\n\t"
1756         "sbcs	r4, r4, r6\n\t"
1757         "stm	%[a]!, {r3, r4}\n\t"
1758         "ldm	%[a], {r3, r4}\n\t"
1759         "ldm	%[b]!, {r5, r6}\n\t"
1760         "sbcs	r3, r3, r5\n\t"
1761         "sbcs	r4, r4, r6\n\t"
1762         "stm	%[a]!, {r3, r4}\n\t"
1763         "ldm	%[a], {r3, r4}\n\t"
1764         "ldm	%[b]!, {r5, r6}\n\t"
1765         "sbcs	r3, r3, r5\n\t"
1766         "sbcs	r4, r4, r6\n\t"
1767         "stm	%[a]!, {r3, r4}\n\t"
1768         "ldm	%[a], {r3, r4}\n\t"
1769         "ldm	%[b]!, {r5, r6}\n\t"
1770         "sbcs	r3, r3, r5\n\t"
1771         "sbcs	r4, r4, r6\n\t"
1772         "stm	%[a]!, {r3, r4}\n\t"
1773         "ldm	%[a], {r3, r4}\n\t"
1774         "ldm	%[b]!, {r5, r6}\n\t"
1775         "sbcs	r3, r3, r5\n\t"
1776         "sbcs	r4, r4, r6\n\t"
1777         "stm	%[a]!, {r3, r4}\n\t"
1778         "ldm	%[a], {r3, r4}\n\t"
1779         "ldm	%[b]!, {r5, r6}\n\t"
1780         "sbcs	r3, r3, r5\n\t"
1781         "sbcs	r4, r4, r6\n\t"
1782         "stm	%[a]!, {r3, r4}\n\t"
1783         "ldm	%[a], {r3, r4}\n\t"
1784         "ldm	%[b]!, {r5, r6}\n\t"
1785         "sbcs	r3, r3, r5\n\t"
1786         "sbcs	r4, r4, r6\n\t"
1787         "stm	%[a]!, {r3, r4}\n\t"
1788         "ldm	%[a], {r3, r4}\n\t"
1789         "ldm	%[b]!, {r5, r6}\n\t"
1790         "sbcs	r3, r3, r5\n\t"
1791         "sbcs	r4, r4, r6\n\t"
1792         "stm	%[a]!, {r3, r4}\n\t"
1793         "ldm	%[a], {r3, r4}\n\t"
1794         "ldm	%[b]!, {r5, r6}\n\t"
1795         "sbcs	r3, r3, r5\n\t"
1796         "sbcs	r4, r4, r6\n\t"
1797         "stm	%[a]!, {r3, r4}\n\t"
1798         "ldm	%[a], {r3, r4}\n\t"
1799         "ldm	%[b]!, {r5, r6}\n\t"
1800         "sbcs	r3, r3, r5\n\t"
1801         "sbcs	r4, r4, r6\n\t"
1802         "stm	%[a]!, {r3, r4}\n\t"
1803         "ldm	%[a], {r3, r4}\n\t"
1804         "ldm	%[b]!, {r5, r6}\n\t"
1805         "sbcs	r3, r3, r5\n\t"
1806         "sbcs	r4, r4, r6\n\t"
1807         "stm	%[a]!, {r3, r4}\n\t"
1808         "ldm	%[a], {r3, r4}\n\t"
1809         "ldm	%[b]!, {r5, r6}\n\t"
1810         "sbcs	r3, r3, r5\n\t"
1811         "sbcs	r4, r4, r6\n\t"
1812         "stm	%[a]!, {r3, r4}\n\t"
1813         "ldm	%[a], {r3, r4}\n\t"
1814         "ldm	%[b]!, {r5, r6}\n\t"
1815         "sbcs	r3, r3, r5\n\t"
1816         "sbcs	r4, r4, r6\n\t"
1817         "stm	%[a]!, {r3, r4}\n\t"
1818         "ldm	%[a], {r3, r4}\n\t"
1819         "ldm	%[b]!, {r5, r6}\n\t"
1820         "sbcs	r3, r3, r5\n\t"
1821         "sbcs	r4, r4, r6\n\t"
1822         "stm	%[a]!, {r3, r4}\n\t"
1823         "ldm	%[a], {r3, r4}\n\t"
1824         "ldm	%[b]!, {r5, r6}\n\t"
1825         "sbcs	r3, r3, r5\n\t"
1826         "sbcs	r4, r4, r6\n\t"
1827         "stm	%[a]!, {r3, r4}\n\t"
1828         "sbc	%[c], %[c], %[c]\n\t"
1829         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
1830         :
1831         : "memory", "r3", "r4", "r5", "r6"
1832     );
1833 
1834     return c;
1835 }
1836 
1837 /* Add b to a into r. (r = a + b)
1838  *
1839  * r  A single precision integer.
1840  * a  A single precision integer.
1841  * b  A single precision integer.
1842  */
sp_2048_add_64(sp_digit * r,const sp_digit * a,const sp_digit * b)1843 SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
1844         const sp_digit* b)
1845 {
1846     sp_digit c = 0;
1847 
1848     __asm__ __volatile__ (
1849         "ldm	%[a]!, {r4, r5}\n\t"
1850         "ldm	%[b]!, {r6, r8}\n\t"
1851         "adds	r4, r4, r6\n\t"
1852         "adcs	r5, r5, r8\n\t"
1853         "stm	%[r]!, {r4, r5}\n\t"
1854         "ldm	%[a]!, {r4, r5}\n\t"
1855         "ldm	%[b]!, {r6, r8}\n\t"
1856         "adcs	r4, r4, r6\n\t"
1857         "adcs	r5, r5, r8\n\t"
1858         "stm	%[r]!, {r4, r5}\n\t"
1859         "ldm	%[a]!, {r4, r5}\n\t"
1860         "ldm	%[b]!, {r6, r8}\n\t"
1861         "adcs	r4, r4, r6\n\t"
1862         "adcs	r5, r5, r8\n\t"
1863         "stm	%[r]!, {r4, r5}\n\t"
1864         "ldm	%[a]!, {r4, r5}\n\t"
1865         "ldm	%[b]!, {r6, r8}\n\t"
1866         "adcs	r4, r4, r6\n\t"
1867         "adcs	r5, r5, r8\n\t"
1868         "stm	%[r]!, {r4, r5}\n\t"
1869         "ldm	%[a]!, {r4, r5}\n\t"
1870         "ldm	%[b]!, {r6, r8}\n\t"
1871         "adcs	r4, r4, r6\n\t"
1872         "adcs	r5, r5, r8\n\t"
1873         "stm	%[r]!, {r4, r5}\n\t"
1874         "ldm	%[a]!, {r4, r5}\n\t"
1875         "ldm	%[b]!, {r6, r8}\n\t"
1876         "adcs	r4, r4, r6\n\t"
1877         "adcs	r5, r5, r8\n\t"
1878         "stm	%[r]!, {r4, r5}\n\t"
1879         "ldm	%[a]!, {r4, r5}\n\t"
1880         "ldm	%[b]!, {r6, r8}\n\t"
1881         "adcs	r4, r4, r6\n\t"
1882         "adcs	r5, r5, r8\n\t"
1883         "stm	%[r]!, {r4, r5}\n\t"
1884         "ldm	%[a]!, {r4, r5}\n\t"
1885         "ldm	%[b]!, {r6, r8}\n\t"
1886         "adcs	r4, r4, r6\n\t"
1887         "adcs	r5, r5, r8\n\t"
1888         "stm	%[r]!, {r4, r5}\n\t"
1889         "ldm	%[a]!, {r4, r5}\n\t"
1890         "ldm	%[b]!, {r6, r8}\n\t"
1891         "adcs	r4, r4, r6\n\t"
1892         "adcs	r5, r5, r8\n\t"
1893         "stm	%[r]!, {r4, r5}\n\t"
1894         "ldm	%[a]!, {r4, r5}\n\t"
1895         "ldm	%[b]!, {r6, r8}\n\t"
1896         "adcs	r4, r4, r6\n\t"
1897         "adcs	r5, r5, r8\n\t"
1898         "stm	%[r]!, {r4, r5}\n\t"
1899         "ldm	%[a]!, {r4, r5}\n\t"
1900         "ldm	%[b]!, {r6, r8}\n\t"
1901         "adcs	r4, r4, r6\n\t"
1902         "adcs	r5, r5, r8\n\t"
1903         "stm	%[r]!, {r4, r5}\n\t"
1904         "ldm	%[a]!, {r4, r5}\n\t"
1905         "ldm	%[b]!, {r6, r8}\n\t"
1906         "adcs	r4, r4, r6\n\t"
1907         "adcs	r5, r5, r8\n\t"
1908         "stm	%[r]!, {r4, r5}\n\t"
1909         "ldm	%[a]!, {r4, r5}\n\t"
1910         "ldm	%[b]!, {r6, r8}\n\t"
1911         "adcs	r4, r4, r6\n\t"
1912         "adcs	r5, r5, r8\n\t"
1913         "stm	%[r]!, {r4, r5}\n\t"
1914         "ldm	%[a]!, {r4, r5}\n\t"
1915         "ldm	%[b]!, {r6, r8}\n\t"
1916         "adcs	r4, r4, r6\n\t"
1917         "adcs	r5, r5, r8\n\t"
1918         "stm	%[r]!, {r4, r5}\n\t"
1919         "ldm	%[a]!, {r4, r5}\n\t"
1920         "ldm	%[b]!, {r6, r8}\n\t"
1921         "adcs	r4, r4, r6\n\t"
1922         "adcs	r5, r5, r8\n\t"
1923         "stm	%[r]!, {r4, r5}\n\t"
1924         "ldm	%[a]!, {r4, r5}\n\t"
1925         "ldm	%[b]!, {r6, r8}\n\t"
1926         "adcs	r4, r4, r6\n\t"
1927         "adcs	r5, r5, r8\n\t"
1928         "stm	%[r]!, {r4, r5}\n\t"
1929         "ldm	%[a]!, {r4, r5}\n\t"
1930         "ldm	%[b]!, {r6, r8}\n\t"
1931         "adcs	r4, r4, r6\n\t"
1932         "adcs	r5, r5, r8\n\t"
1933         "stm	%[r]!, {r4, r5}\n\t"
1934         "ldm	%[a]!, {r4, r5}\n\t"
1935         "ldm	%[b]!, {r6, r8}\n\t"
1936         "adcs	r4, r4, r6\n\t"
1937         "adcs	r5, r5, r8\n\t"
1938         "stm	%[r]!, {r4, r5}\n\t"
1939         "ldm	%[a]!, {r4, r5}\n\t"
1940         "ldm	%[b]!, {r6, r8}\n\t"
1941         "adcs	r4, r4, r6\n\t"
1942         "adcs	r5, r5, r8\n\t"
1943         "stm	%[r]!, {r4, r5}\n\t"
1944         "ldm	%[a]!, {r4, r5}\n\t"
1945         "ldm	%[b]!, {r6, r8}\n\t"
1946         "adcs	r4, r4, r6\n\t"
1947         "adcs	r5, r5, r8\n\t"
1948         "stm	%[r]!, {r4, r5}\n\t"
1949         "ldm	%[a]!, {r4, r5}\n\t"
1950         "ldm	%[b]!, {r6, r8}\n\t"
1951         "adcs	r4, r4, r6\n\t"
1952         "adcs	r5, r5, r8\n\t"
1953         "stm	%[r]!, {r4, r5}\n\t"
1954         "ldm	%[a]!, {r4, r5}\n\t"
1955         "ldm	%[b]!, {r6, r8}\n\t"
1956         "adcs	r4, r4, r6\n\t"
1957         "adcs	r5, r5, r8\n\t"
1958         "stm	%[r]!, {r4, r5}\n\t"
1959         "ldm	%[a]!, {r4, r5}\n\t"
1960         "ldm	%[b]!, {r6, r8}\n\t"
1961         "adcs	r4, r4, r6\n\t"
1962         "adcs	r5, r5, r8\n\t"
1963         "stm	%[r]!, {r4, r5}\n\t"
1964         "ldm	%[a]!, {r4, r5}\n\t"
1965         "ldm	%[b]!, {r6, r8}\n\t"
1966         "adcs	r4, r4, r6\n\t"
1967         "adcs	r5, r5, r8\n\t"
1968         "stm	%[r]!, {r4, r5}\n\t"
1969         "ldm	%[a]!, {r4, r5}\n\t"
1970         "ldm	%[b]!, {r6, r8}\n\t"
1971         "adcs	r4, r4, r6\n\t"
1972         "adcs	r5, r5, r8\n\t"
1973         "stm	%[r]!, {r4, r5}\n\t"
1974         "ldm	%[a]!, {r4, r5}\n\t"
1975         "ldm	%[b]!, {r6, r8}\n\t"
1976         "adcs	r4, r4, r6\n\t"
1977         "adcs	r5, r5, r8\n\t"
1978         "stm	%[r]!, {r4, r5}\n\t"
1979         "ldm	%[a]!, {r4, r5}\n\t"
1980         "ldm	%[b]!, {r6, r8}\n\t"
1981         "adcs	r4, r4, r6\n\t"
1982         "adcs	r5, r5, r8\n\t"
1983         "stm	%[r]!, {r4, r5}\n\t"
1984         "ldm	%[a]!, {r4, r5}\n\t"
1985         "ldm	%[b]!, {r6, r8}\n\t"
1986         "adcs	r4, r4, r6\n\t"
1987         "adcs	r5, r5, r8\n\t"
1988         "stm	%[r]!, {r4, r5}\n\t"
1989         "ldm	%[a]!, {r4, r5}\n\t"
1990         "ldm	%[b]!, {r6, r8}\n\t"
1991         "adcs	r4, r4, r6\n\t"
1992         "adcs	r5, r5, r8\n\t"
1993         "stm	%[r]!, {r4, r5}\n\t"
1994         "ldm	%[a]!, {r4, r5}\n\t"
1995         "ldm	%[b]!, {r6, r8}\n\t"
1996         "adcs	r4, r4, r6\n\t"
1997         "adcs	r5, r5, r8\n\t"
1998         "stm	%[r]!, {r4, r5}\n\t"
1999         "ldm	%[a]!, {r4, r5}\n\t"
2000         "ldm	%[b]!, {r6, r8}\n\t"
2001         "adcs	r4, r4, r6\n\t"
2002         "adcs	r5, r5, r8\n\t"
2003         "stm	%[r]!, {r4, r5}\n\t"
2004         "ldm	%[a]!, {r4, r5}\n\t"
2005         "ldm	%[b]!, {r6, r8}\n\t"
2006         "adcs	r4, r4, r6\n\t"
2007         "adcs	r5, r5, r8\n\t"
2008         "stm	%[r]!, {r4, r5}\n\t"
2009         "mov	%[c], #0\n\t"
2010         "adc	%[c], %[c], %[c]\n\t"
2011         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
2012         :
2013         : "memory", "r4", "r5", "r6", "r8"
2014     );
2015 
2016     return c;
2017 }
2018 
2019 /* AND m into each word of a and store in r.
2020  *
2021  * r  A single precision integer.
2022  * a  A single precision integer.
2023  * m  Mask to AND against each digit.
2024  */
sp_2048_mask_32(sp_digit * r,const sp_digit * a,sp_digit m)2025 static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
2026 {
2027 #ifdef WOLFSSL_SP_SMALL
2028     int i;
2029 
2030     for (i=0; i<32; i++) {
2031         r[i] = a[i] & m;
2032     }
2033 #else
2034     int i;
2035 
2036     for (i = 0; i < 32; i += 8) {
2037         r[i+0] = a[i+0] & m;
2038         r[i+1] = a[i+1] & m;
2039         r[i+2] = a[i+2] & m;
2040         r[i+3] = a[i+3] & m;
2041         r[i+4] = a[i+4] & m;
2042         r[i+5] = a[i+5] & m;
2043         r[i+6] = a[i+6] & m;
2044         r[i+7] = a[i+7] & m;
2045     }
2046 #endif
2047 }
2048 
2049 /* Multiply a and b into r. (r = a * b)
2050  *
2051  * r  A single precision integer.
2052  * a  A single precision integer.
2053  * b  A single precision integer.
2054  */
sp_2048_mul_64(sp_digit * r,const sp_digit * a,const sp_digit * b)2055 SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
2056         const sp_digit* b)
2057 {
2058     sp_digit* z0 = r;
2059     sp_digit z1[64];
2060     sp_digit a1[32];
2061     sp_digit b1[32];
2062     sp_digit z2[64];
2063     sp_digit u;
2064     sp_digit ca;
2065     sp_digit cb;
2066 
2067     ca = sp_2048_add_32(a1, a, &a[32]);
2068     cb = sp_2048_add_32(b1, b, &b[32]);
2069     u  = ca & cb;
2070     sp_2048_mul_32(z1, a1, b1);
2071     sp_2048_mul_32(z2, &a[32], &b[32]);
2072     sp_2048_mul_32(z0, a, b);
2073     sp_2048_mask_32(r + 64, a1, 0 - cb);
2074     sp_2048_mask_32(b1, b1, 0 - ca);
2075     u += sp_2048_add_32(r + 64, r + 64, b1);
2076     u += sp_2048_sub_in_place_64(z1, z2);
2077     u += sp_2048_sub_in_place_64(z1, z0);
2078     u += sp_2048_add_64(r + 32, r + 32, z1);
2079     r[96] = u;
2080     XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
2081     (void)sp_2048_add_64(r + 64, r + 64, z2);
2082 }
2083 
2084 /* Square a and put result in r. (r = a * a)
2085  *
2086  * r  A single precision integer.
2087  * a  A single precision integer.
2088  */
sp_2048_sqr_64(sp_digit * r,const sp_digit * a)2089 SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
2090 {
2091     sp_digit* z0 = r;
2092     sp_digit z2[64];
2093     sp_digit z1[64];
2094     sp_digit a1[32];
2095     sp_digit u;
2096 
2097     u = sp_2048_add_32(a1, a, &a[32]);
2098     sp_2048_sqr_32(z1, a1);
2099     sp_2048_sqr_32(z2, &a[32]);
2100     sp_2048_sqr_32(z0, a);
2101     sp_2048_mask_32(r + 64, a1, 0 - u);
2102     u += sp_2048_add_32(r + 64, r + 64, r + 64);
2103     u += sp_2048_sub_in_place_64(z1, z2);
2104     u += sp_2048_sub_in_place_64(z1, z0);
2105     u += sp_2048_add_64(r + 32, r + 32, z1);
2106     r[96] = u;
2107     XMEMSET(r + 96 + 1, 0, sizeof(sp_digit) * (32 - 1));
2108     (void)sp_2048_add_64(r + 64, r + 64, z2);
2109 }
2110 
2111 #endif /* !WOLFSSL_SP_SMALL */
2112 #ifdef WOLFSSL_SP_SMALL
2113 /* Add b to a into r. (r = a + b)
2114  *
2115  * r  A single precision integer.
2116  * a  A single precision integer.
2117  * b  A single precision integer.
2118  */
sp_2048_add_64(sp_digit * r,const sp_digit * a,const sp_digit * b)2119 SP_NOINLINE static sp_digit sp_2048_add_64(sp_digit* r, const sp_digit* a,
2120         const sp_digit* b)
2121 {
2122     sp_digit c = 0;
2123 
2124     __asm__ __volatile__ (
2125         "mov	r6, %[a]\n\t"
2126         "mov	r8, #0\n\t"
2127         "add	r6, r6, #256\n\t"
2128         "sub	r8, r8, #1\n\t"
2129         "\n1:\n\t"
2130         "adds	%[c], %[c], r8\n\t"
2131         "ldr	r4, [%[a]]\n\t"
2132         "ldr	r5, [%[b]]\n\t"
2133         "adcs	r4, r4, r5\n\t"
2134         "str	r4, [%[r]]\n\t"
2135         "mov	%[c], #0\n\t"
2136         "adc	%[c], %[c], %[c]\n\t"
2137         "add	%[a], %[a], #4\n\t"
2138         "add	%[b], %[b], #4\n\t"
2139         "add	%[r], %[r], #4\n\t"
2140         "cmp	%[a], r6\n\t"
2141 #ifdef __GNUC__
2142         "bne	1b\n\t"
2143 #else
2144         "bne.n	1b\n\t"
2145 #endif /* __GNUC__ */
2146         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
2147         :
2148         : "memory", "r4", "r5", "r6", "r8"
2149     );
2150 
2151     return c;
2152 }
2153 
2154 #endif /* WOLFSSL_SP_SMALL */
2155 #ifdef WOLFSSL_SP_SMALL
2156 /* Sub b from a into a. (a -= b)
2157  *
2158  * a  A single precision integer.
2159  * b  A single precision integer.
2160  */
sp_2048_sub_in_place_64(sp_digit * a,const sp_digit * b)2161 SP_NOINLINE static sp_digit sp_2048_sub_in_place_64(sp_digit* a,
2162         const sp_digit* b)
2163 {
2164     sp_digit c = 0;
2165     __asm__ __volatile__ (
2166         "mov	r8, %[a]\n\t"
2167         "add	r8, r8, #256\n\t"
2168         "\n1:\n\t"
2169         "mov	r5, #0\n\t"
2170         "subs	r5, r5, %[c]\n\t"
2171         "ldr	r3, [%[a]]\n\t"
2172         "ldr	r4, [%[a], #4]\n\t"
2173         "ldr	r5, [%[b]]\n\t"
2174         "ldr	r6, [%[b], #4]\n\t"
2175         "sbcs	r3, r3, r5\n\t"
2176         "sbcs	r4, r4, r6\n\t"
2177         "str	r3, [%[a]]\n\t"
2178         "str	r4, [%[a], #4]\n\t"
2179         "sbc	%[c], %[c], %[c]\n\t"
2180         "add	%[a], %[a], #8\n\t"
2181         "add	%[b], %[b], #8\n\t"
2182         "cmp	%[a], r8\n\t"
2183 #ifdef __GNUC__
2184         "bne	1b\n\t"
2185 #else
2186         "bne.n	1b\n\t"
2187 #endif /* __GNUC__ */
2188         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
2189         :
2190         : "memory", "r3", "r4", "r5", "r6", "r8"
2191     );
2192 
2193     return c;
2194 }
2195 
2196 #endif /* WOLFSSL_SP_SMALL */
2197 #ifdef WOLFSSL_SP_SMALL
2198 /* Multiply a and b into r. (r = a * b)
2199  *
2200  * r  A single precision integer.
2201  * a  A single precision integer.
2202  * b  A single precision integer.
2203  */
sp_2048_mul_64(sp_digit * r,const sp_digit * a,const sp_digit * b)2204 SP_NOINLINE static void sp_2048_mul_64(sp_digit* r, const sp_digit* a,
2205         const sp_digit* b)
2206 {
2207     sp_digit tmp_arr[64 * 2];
2208     sp_digit* tmp = tmp_arr;
2209     __asm__ __volatile__ (
2210         "mov	r3, #0\n\t"
2211         "mov	r4, #0\n\t"
2212         "mov	r9, r3\n\t"
2213         "mov	r12, %[r]\n\t"
2214         "mov	r10, %[a]\n\t"
2215         "mov	r11, %[b]\n\t"
2216         "mov	r6, #1\n\t"
2217         "lsl	r6, r6, #8\n\t"
2218         "add	r6, r6, r10\n\t"
2219         "mov	r14, r6\n\t"
2220         "\n1:\n\t"
2221         "mov	%[r], #0\n\t"
2222         "mov	r5, #0\n\t"
2223         "mov	r6, #252\n\t"
2224         "mov	%[a], r9\n\t"
2225         "subs	%[a], %[a], r6\n\t"
2226         "sbc	r6, r6, r6\n\t"
2227         "mvn	r6, r6\n\t"
2228         "and	%[a], %[a], r6\n\t"
2229         "mov	%[b], r9\n\t"
2230         "sub	%[b], %[b], %[a]\n\t"
2231         "add	%[a], %[a], r10\n\t"
2232         "add	%[b], %[b], r11\n\t"
2233         "\n2:\n\t"
2234         /* Multiply Start */
2235         "ldr	r6, [%[a]]\n\t"
2236         "ldr	r8, [%[b]]\n\t"
2237         "umull	r6, r8, r6, r8\n\t"
2238         "adds	r3, r3, r6\n\t"
2239         "adcs 	r4, r4, r8\n\t"
2240         "adc	r5, r5, %[r]\n\t"
2241         /* Multiply Done */
2242         "add	%[a], %[a], #4\n\t"
2243         "sub	%[b], %[b], #4\n\t"
2244         "cmp	%[a], r14\n\t"
2245 #ifdef __GNUC__
2246         "beq	3f\n\t"
2247 #else
2248         "beq.n	3f\n\t"
2249 #endif /* __GNUC__ */
2250         "mov	r6, r9\n\t"
2251         "add	r6, r6, r10\n\t"
2252         "cmp	%[a], r6\n\t"
2253 #ifdef __GNUC__
2254         "ble	2b\n\t"
2255 #else
2256         "ble.n	2b\n\t"
2257 #endif /* __GNUC__ */
2258         "\n3:\n\t"
2259         "mov	%[r], r12\n\t"
2260         "mov	r8, r9\n\t"
2261         "str	r3, [%[r], r8]\n\t"
2262         "mov	r3, r4\n\t"
2263         "mov	r4, r5\n\t"
2264         "add	r8, r8, #4\n\t"
2265         "mov	r9, r8\n\t"
2266         "mov	r6, #1\n\t"
2267         "lsl	r6, r6, #8\n\t"
2268         "add	r6, r6, #248\n\t"
2269         "cmp	r8, r6\n\t"
2270 #ifdef __GNUC__
2271         "ble	1b\n\t"
2272 #else
2273         "ble.n	1b\n\t"
2274 #endif /* __GNUC__ */
2275         "str	r3, [%[r], r8]\n\t"
2276         "mov	%[a], r10\n\t"
2277         "mov	%[b], r11\n\t"
2278         :
2279         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
2280         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
2281     );
2282 
2283     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
2284 }
2285 
2286 /* Square a and put result in r. (r = a * a)
2287  *
2288  * r  A single precision integer.
2289  * a  A single precision integer.
2290  */
sp_2048_sqr_64(sp_digit * r,const sp_digit * a)2291 SP_NOINLINE static void sp_2048_sqr_64(sp_digit* r, const sp_digit* a)
2292 {
2293     __asm__ __volatile__ (
2294         "mov	r3, #0\n\t"
2295         "mov	r4, #0\n\t"
2296         "mov	r5, #0\n\t"
2297         "mov	r9, r3\n\t"
2298         "mov	r12, %[r]\n\t"
2299         "mov	r6, #2\n\t"
2300         "lsl	r6, r6, #8\n\t"
2301         "neg	r6, r6\n\t"
2302         "add	sp, sp, r6\n\t"
2303         "mov	r11, sp\n\t"
2304         "mov	r10, %[a]\n\t"
2305         "\n1:\n\t"
2306         "mov	%[r], #0\n\t"
2307         "mov	r6, #252\n\t"
2308         "mov	%[a], r9\n\t"
2309         "subs	%[a], %[a], r6\n\t"
2310         "sbc	r6, r6, r6\n\t"
2311         "mvn	r6, r6\n\t"
2312         "and	%[a], %[a], r6\n\t"
2313         "mov	r2, r9\n\t"
2314         "sub	r2, r2, %[a]\n\t"
2315         "add	%[a], %[a], r10\n\t"
2316         "add	r2, r2, r10\n\t"
2317         "\n2:\n\t"
2318         "cmp	r2, %[a]\n\t"
2319 #ifdef __GNUC__
2320         "beq	4f\n\t"
2321 #else
2322         "beq.n	4f\n\t"
2323 #endif /* __GNUC__ */
2324         /* Multiply * 2: Start */
2325         "ldr	r6, [%[a]]\n\t"
2326         "ldr	r8, [r2]\n\t"
2327         "umull	r6, r8, r6, r8\n\t"
2328         "adds	r3, r3, r6\n\t"
2329         "adcs 	r4, r4, r8\n\t"
2330         "adc	r5, r5, %[r]\n\t"
2331         "adds	r3, r3, r6\n\t"
2332         "adcs 	r4, r4, r8\n\t"
2333         "adc	r5, r5, %[r]\n\t"
2334         /* Multiply * 2: Done */
2335 #ifdef __GNUC__
2336         "bal	5f\n\t"
2337 #else
2338         "bal.n	5f\n\t"
2339 #endif /* __GNUC__ */
2340         "\n4:\n\t"
2341         /* Square: Start */
2342         "ldr	r6, [%[a]]\n\t"
2343         "umull	r6, r8, r6, r6\n\t"
2344         "adds	r3, r3, r6\n\t"
2345         "adcs	r4, r4, r8\n\t"
2346         "adc	r5, r5, %[r]\n\t"
2347         /* Square: Done */
2348         "\n5:\n\t"
2349         "add	%[a], %[a], #4\n\t"
2350         "sub	r2, r2, #4\n\t"
2351         "mov	r6, #1\n\t"
2352         "lsl	r6, r6, #8\n\t"
2353         "add	r6, r6, r10\n\t"
2354         "cmp	%[a], r6\n\t"
2355 #ifdef __GNUC__
2356         "beq	3f\n\t"
2357 #else
2358         "beq.n	3f\n\t"
2359 #endif /* __GNUC__ */
2360         "cmp	%[a], r2\n\t"
2361 #ifdef __GNUC__
2362         "bgt	3f\n\t"
2363 #else
2364         "bgt.n	3f\n\t"
2365 #endif /* __GNUC__ */
2366         "mov	r8, r9\n\t"
2367         "add	r8, r8, r10\n\t"
2368         "cmp	%[a], r8\n\t"
2369 #ifdef __GNUC__
2370         "ble	2b\n\t"
2371 #else
2372         "ble.n	2b\n\t"
2373 #endif /* __GNUC__ */
2374         "\n3:\n\t"
2375         "mov	%[r], r11\n\t"
2376         "mov	r8, r9\n\t"
2377         "str	r3, [%[r], r8]\n\t"
2378         "mov	r3, r4\n\t"
2379         "mov	r4, r5\n\t"
2380         "mov	r5, #0\n\t"
2381         "add	r8, r8, #4\n\t"
2382         "mov	r9, r8\n\t"
2383         "mov	r6, #1\n\t"
2384         "lsl	r6, r6, #8\n\t"
2385         "add	r6, r6, #248\n\t"
2386         "cmp	r8, r6\n\t"
2387 #ifdef __GNUC__
2388         "ble	1b\n\t"
2389 #else
2390         "ble.n	1b\n\t"
2391 #endif /* __GNUC__ */
2392         "mov	%[a], r10\n\t"
2393         "str	r3, [%[r], r8]\n\t"
2394         "mov	%[r], r12\n\t"
2395         "mov	%[a], r11\n\t"
2396         "mov	r3, #1\n\t"
2397         "lsl	r3, r3, #8\n\t"
2398         "add	r3, r3, #252\n\t"
2399         "\n4:\n\t"
2400         "ldr	r6, [%[a], r3]\n\t"
2401         "str	r6, [%[r], r3]\n\t"
2402         "subs	r3, r3, #4\n\t"
2403 #ifdef __GNUC__
2404         "bge	4b\n\t"
2405 #else
2406         "bge.n	4b\n\t"
2407 #endif /* __GNUC__ */
2408         "mov	r6, #2\n\t"
2409         "lsl	r6, r6, #8\n\t"
2410         "add	sp, sp, r6\n\t"
2411         :
2412         : [r] "r" (r), [a] "r" (a)
2413         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
2414     );
2415 }
2416 
2417 #endif /* WOLFSSL_SP_SMALL */
2418 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
2419 #ifdef WOLFSSL_SP_SMALL
2420 /* AND m into each word of a and store in r.
2421  *
2422  * r  A single precision integer.
2423  * a  A single precision integer.
2424  * m  Mask to AND against each digit.
2425  */
sp_2048_mask_32(sp_digit * r,const sp_digit * a,sp_digit m)2426 static void sp_2048_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
2427 {
2428     int i;
2429 
2430     for (i=0; i<32; i++) {
2431         r[i] = a[i] & m;
2432     }
2433 }
2434 
2435 #endif /* WOLFSSL_SP_SMALL */
2436 #ifdef WOLFSSL_SP_SMALL
2437 /* Add b to a into r. (r = a + b)
2438  *
2439  * r  A single precision integer.
2440  * a  A single precision integer.
2441  * b  A single precision integer.
2442  */
sp_2048_add_32(sp_digit * r,const sp_digit * a,const sp_digit * b)2443 SP_NOINLINE static sp_digit sp_2048_add_32(sp_digit* r, const sp_digit* a,
2444         const sp_digit* b)
2445 {
2446     sp_digit c = 0;
2447 
2448     __asm__ __volatile__ (
2449         "mov	r6, %[a]\n\t"
2450         "mov	r8, #0\n\t"
2451         "add	r6, r6, #128\n\t"
2452         "sub	r8, r8, #1\n\t"
2453         "\n1:\n\t"
2454         "adds	%[c], %[c], r8\n\t"
2455         "ldr	r4, [%[a]]\n\t"
2456         "ldr	r5, [%[b]]\n\t"
2457         "adcs	r4, r4, r5\n\t"
2458         "str	r4, [%[r]]\n\t"
2459         "mov	%[c], #0\n\t"
2460         "adc	%[c], %[c], %[c]\n\t"
2461         "add	%[a], %[a], #4\n\t"
2462         "add	%[b], %[b], #4\n\t"
2463         "add	%[r], %[r], #4\n\t"
2464         "cmp	%[a], r6\n\t"
2465 #ifdef __GNUC__
2466         "bne	1b\n\t"
2467 #else
2468         "bne.n	1b\n\t"
2469 #endif /* __GNUC__ */
2470         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
2471         :
2472         : "memory", "r4", "r5", "r6", "r8"
2473     );
2474 
2475     return c;
2476 }
2477 
2478 #endif /* WOLFSSL_SP_SMALL */
2479 #ifdef WOLFSSL_SP_SMALL
2480 /* Sub b from a into a. (a -= b)
2481  *
2482  * a  A single precision integer.
2483  * b  A single precision integer.
2484  */
sp_2048_sub_in_place_32(sp_digit * a,const sp_digit * b)2485 SP_NOINLINE static sp_digit sp_2048_sub_in_place_32(sp_digit* a,
2486         const sp_digit* b)
2487 {
2488     sp_digit c = 0;
2489     __asm__ __volatile__ (
2490         "mov	r8, %[a]\n\t"
2491         "add	r8, r8, #128\n\t"
2492         "\n1:\n\t"
2493         "mov	r5, #0\n\t"
2494         "subs	r5, r5, %[c]\n\t"
2495         "ldr	r3, [%[a]]\n\t"
2496         "ldr	r4, [%[a], #4]\n\t"
2497         "ldr	r5, [%[b]]\n\t"
2498         "ldr	r6, [%[b], #4]\n\t"
2499         "sbcs	r3, r3, r5\n\t"
2500         "sbcs	r4, r4, r6\n\t"
2501         "str	r3, [%[a]]\n\t"
2502         "str	r4, [%[a], #4]\n\t"
2503         "sbc	%[c], %[c], %[c]\n\t"
2504         "add	%[a], %[a], #8\n\t"
2505         "add	%[b], %[b], #8\n\t"
2506         "cmp	%[a], r8\n\t"
2507 #ifdef __GNUC__
2508         "bne	1b\n\t"
2509 #else
2510         "bne.n	1b\n\t"
2511 #endif /* __GNUC__ */
2512         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
2513         :
2514         : "memory", "r3", "r4", "r5", "r6", "r8"
2515     );
2516 
2517     return c;
2518 }
2519 
2520 #endif /* WOLFSSL_SP_SMALL */
2521 #ifdef WOLFSSL_SP_SMALL
2522 /* Multiply a and b into r. (r = a * b)
2523  *
2524  * r  A single precision integer.
2525  * a  A single precision integer.
2526  * b  A single precision integer.
2527  */
sp_2048_mul_32(sp_digit * r,const sp_digit * a,const sp_digit * b)2528 SP_NOINLINE static void sp_2048_mul_32(sp_digit* r, const sp_digit* a,
2529         const sp_digit* b)
2530 {
2531     sp_digit tmp_arr[32 * 2];
2532     sp_digit* tmp = tmp_arr;
2533     __asm__ __volatile__ (
2534         "mov	r3, #0\n\t"
2535         "mov	r4, #0\n\t"
2536         "mov	r9, r3\n\t"
2537         "mov	r12, %[r]\n\t"
2538         "mov	r10, %[a]\n\t"
2539         "mov	r11, %[b]\n\t"
2540         "mov	r6, #128\n\t"
2541         "add	r6, r6, r10\n\t"
2542         "mov	r14, r6\n\t"
2543         "\n1:\n\t"
2544         "mov	%[r], #0\n\t"
2545         "mov	r5, #0\n\t"
2546         "mov	r6, #124\n\t"
2547         "mov	%[a], r9\n\t"
2548         "subs	%[a], %[a], r6\n\t"
2549         "sbc	r6, r6, r6\n\t"
2550         "mvn	r6, r6\n\t"
2551         "and	%[a], %[a], r6\n\t"
2552         "mov	%[b], r9\n\t"
2553         "sub	%[b], %[b], %[a]\n\t"
2554         "add	%[a], %[a], r10\n\t"
2555         "add	%[b], %[b], r11\n\t"
2556         "\n2:\n\t"
2557         /* Multiply Start */
2558         "ldr	r6, [%[a]]\n\t"
2559         "ldr	r8, [%[b]]\n\t"
2560         "umull	r6, r8, r6, r8\n\t"
2561         "adds	r3, r3, r6\n\t"
2562         "adcs 	r4, r4, r8\n\t"
2563         "adc	r5, r5, %[r]\n\t"
2564         /* Multiply Done */
2565         "add	%[a], %[a], #4\n\t"
2566         "sub	%[b], %[b], #4\n\t"
2567         "cmp	%[a], r14\n\t"
2568 #ifdef __GNUC__
2569         "beq	3f\n\t"
2570 #else
2571         "beq.n	3f\n\t"
2572 #endif /* __GNUC__ */
2573         "mov	r6, r9\n\t"
2574         "add	r6, r6, r10\n\t"
2575         "cmp	%[a], r6\n\t"
2576 #ifdef __GNUC__
2577         "ble	2b\n\t"
2578 #else
2579         "ble.n	2b\n\t"
2580 #endif /* __GNUC__ */
2581         "\n3:\n\t"
2582         "mov	%[r], r12\n\t"
2583         "mov	r8, r9\n\t"
2584         "str	r3, [%[r], r8]\n\t"
2585         "mov	r3, r4\n\t"
2586         "mov	r4, r5\n\t"
2587         "add	r8, r8, #4\n\t"
2588         "mov	r9, r8\n\t"
2589         "mov	r6, #248\n\t"
2590         "cmp	r8, r6\n\t"
2591 #ifdef __GNUC__
2592         "ble	1b\n\t"
2593 #else
2594         "ble.n	1b\n\t"
2595 #endif /* __GNUC__ */
2596         "str	r3, [%[r], r8]\n\t"
2597         "mov	%[a], r10\n\t"
2598         "mov	%[b], r11\n\t"
2599         :
2600         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
2601         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
2602     );
2603 
2604     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
2605 }
2606 
2607 /* Square a and put result in r. (r = a * a)
2608  *
2609  * r  A single precision integer.
2610  * a  A single precision integer.
2611  */
sp_2048_sqr_32(sp_digit * r,const sp_digit * a)2612 SP_NOINLINE static void sp_2048_sqr_32(sp_digit* r, const sp_digit* a)
2613 {
2614     __asm__ __volatile__ (
2615         "mov	r3, #0\n\t"
2616         "mov	r4, #0\n\t"
2617         "mov	r5, #0\n\t"
2618         "mov	r9, r3\n\t"
2619         "mov	r12, %[r]\n\t"
2620         "mov	r6, #1\n\t"
2621         "lsl	r6, r6, #8\n\t"
2622         "neg	r6, r6\n\t"
2623         "add	sp, sp, r6\n\t"
2624         "mov	r11, sp\n\t"
2625         "mov	r10, %[a]\n\t"
2626         "\n1:\n\t"
2627         "mov	%[r], #0\n\t"
2628         "mov	r6, #124\n\t"
2629         "mov	%[a], r9\n\t"
2630         "subs	%[a], %[a], r6\n\t"
2631         "sbc	r6, r6, r6\n\t"
2632         "mvn	r6, r6\n\t"
2633         "and	%[a], %[a], r6\n\t"
2634         "mov	r2, r9\n\t"
2635         "sub	r2, r2, %[a]\n\t"
2636         "add	%[a], %[a], r10\n\t"
2637         "add	r2, r2, r10\n\t"
2638         "\n2:\n\t"
2639         "cmp	r2, %[a]\n\t"
2640 #ifdef __GNUC__
2641         "beq	4f\n\t"
2642 #else
2643         "beq.n	4f\n\t"
2644 #endif /* __GNUC__ */
2645         /* Multiply * 2: Start */
2646         "ldr	r6, [%[a]]\n\t"
2647         "ldr	r8, [r2]\n\t"
2648         "umull	r6, r8, r6, r8\n\t"
2649         "adds	r3, r3, r6\n\t"
2650         "adcs 	r4, r4, r8\n\t"
2651         "adc	r5, r5, %[r]\n\t"
2652         "adds	r3, r3, r6\n\t"
2653         "adcs 	r4, r4, r8\n\t"
2654         "adc	r5, r5, %[r]\n\t"
2655         /* Multiply * 2: Done */
2656 #ifdef __GNUC__
2657         "bal	5f\n\t"
2658 #else
2659         "bal.n	5f\n\t"
2660 #endif /* __GNUC__ */
2661         "\n4:\n\t"
2662         /* Square: Start */
2663         "ldr	r6, [%[a]]\n\t"
2664         "umull	r6, r8, r6, r6\n\t"
2665         "adds	r3, r3, r6\n\t"
2666         "adcs	r4, r4, r8\n\t"
2667         "adc	r5, r5, %[r]\n\t"
2668         /* Square: Done */
2669         "\n5:\n\t"
2670         "add	%[a], %[a], #4\n\t"
2671         "sub	r2, r2, #4\n\t"
2672         "mov	r6, #128\n\t"
2673         "add	r6, r6, r10\n\t"
2674         "cmp	%[a], r6\n\t"
2675 #ifdef __GNUC__
2676         "beq	3f\n\t"
2677 #else
2678         "beq.n	3f\n\t"
2679 #endif /* __GNUC__ */
2680         "cmp	%[a], r2\n\t"
2681 #ifdef __GNUC__
2682         "bgt	3f\n\t"
2683 #else
2684         "bgt.n	3f\n\t"
2685 #endif /* __GNUC__ */
2686         "mov	r8, r9\n\t"
2687         "add	r8, r8, r10\n\t"
2688         "cmp	%[a], r8\n\t"
2689 #ifdef __GNUC__
2690         "ble	2b\n\t"
2691 #else
2692         "ble.n	2b\n\t"
2693 #endif /* __GNUC__ */
2694         "\n3:\n\t"
2695         "mov	%[r], r11\n\t"
2696         "mov	r8, r9\n\t"
2697         "str	r3, [%[r], r8]\n\t"
2698         "mov	r3, r4\n\t"
2699         "mov	r4, r5\n\t"
2700         "mov	r5, #0\n\t"
2701         "add	r8, r8, #4\n\t"
2702         "mov	r9, r8\n\t"
2703         "mov	r6, #248\n\t"
2704         "cmp	r8, r6\n\t"
2705 #ifdef __GNUC__
2706         "ble	1b\n\t"
2707 #else
2708         "ble.n	1b\n\t"
2709 #endif /* __GNUC__ */
2710         "mov	%[a], r10\n\t"
2711         "str	r3, [%[r], r8]\n\t"
2712         "mov	%[r], r12\n\t"
2713         "mov	%[a], r11\n\t"
2714         "mov	r3, #252\n\t"
2715         "\n4:\n\t"
2716         "ldr	r6, [%[a], r3]\n\t"
2717         "str	r6, [%[r], r3]\n\t"
2718         "subs	r3, r3, #4\n\t"
2719 #ifdef __GNUC__
2720         "bge	4b\n\t"
2721 #else
2722         "bge.n	4b\n\t"
2723 #endif /* __GNUC__ */
2724         "mov	r6, #1\n\t"
2725         "lsl	r6, r6, #8\n\t"
2726         "add	sp, sp, r6\n\t"
2727         :
2728         : [r] "r" (r), [a] "r" (a)
2729         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
2730     );
2731 }
2732 
2733 #endif /* WOLFSSL_SP_SMALL */
2734 #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */
2735 
2736 /* Caclulate the bottom digit of -1/a mod 2^n.
2737  *
2738  * a    A single precision number.
2739  * rho  Bottom word of inverse.
2740  */
sp_2048_mont_setup(const sp_digit * a,sp_digit * rho)2741 static void sp_2048_mont_setup(const sp_digit* a, sp_digit* rho)
2742 {
2743     sp_digit x;
2744     sp_digit b;
2745 
2746     b = a[0];
2747     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
2748     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
2749     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
2750     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
2751 
2752     /* rho = -1/m mod b */
2753     *rho = (sp_digit)0 - x;
2754 }
2755 
2756 /* Mul a by digit b into r. (r = a * b)
2757  *
2758  * r  A single precision integer.
2759  * a  A single precision integer.
2760  * b  A single precision digit.
2761  */
sp_2048_mul_d_64(sp_digit * r,const sp_digit * a,sp_digit b)2762 SP_NOINLINE static void sp_2048_mul_d_64(sp_digit* r, const sp_digit* a,
2763         sp_digit b)
2764 {
2765     __asm__ __volatile__ (
2766         "add	r9, %[a], #256\n\t"
2767         /* A[0] * B */
2768         "ldr	r6, [%[a]], #4\n\t"
2769         "umull	r5, r3, r6, %[b]\n\t"
2770         "mov	r4, #0\n\t"
2771         "str	r5, [%[r]], #4\n\t"
2772         /* A[0] * B - Done */
2773         "\n1:\n\t"
2774         "mov	r5, #0\n\t"
2775         /* A[] * B */
2776         "ldr	r6, [%[a]], #4\n\t"
2777         "umull	r6, r8, r6, %[b]\n\t"
2778         "adds	r3, r3, r6\n\t"
2779         "adcs 	r4, r4, r8\n\t"
2780         "adc	r5, r5, #0\n\t"
2781         /* A[] * B - Done */
2782         "str	r3, [%[r]], #4\n\t"
2783         "mov	r3, r4\n\t"
2784         "mov	r4, r5\n\t"
2785         "cmp	%[a], r9\n\t"
2786 #ifdef __GNUC__
2787         "blt	1b\n\t"
2788 #else
2789         "blt.n	1b\n\t"
2790 #endif /* __GNUC__ */
2791         "str	r3, [%[r]]\n\t"
2792         : [r] "+r" (r), [a] "+r" (a)
2793         : [b] "r" (b)
2794         : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
2795     );
2796 }
2797 
2798 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
2799 /* r = 2^n mod m where n is the number of bits to reduce by.
2800  * Given m must be 2048 bits, just need to subtract.
2801  *
2802  * r  A single precision number.
2803  * m  A single precision number.
2804  */
sp_2048_mont_norm_32(sp_digit * r,const sp_digit * m)2805 static void sp_2048_mont_norm_32(sp_digit* r, const sp_digit* m)
2806 {
2807     XMEMSET(r, 0, sizeof(sp_digit) * 32);
2808 
2809     /* r = 2^n mod m */
2810     sp_2048_sub_in_place_32(r, m);
2811 }
2812 
2813 /* Conditionally subtract b from a using the mask m.
2814  * m is -1 to subtract and 0 when not copying.
2815  *
2816  * r  A single precision number representing condition subtract result.
2817  * a  A single precision number to subtract from.
2818  * b  A single precision number to subtract.
2819  * m  Mask value to apply.
2820  */
sp_2048_cond_sub_32(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)2821 SP_NOINLINE static sp_digit sp_2048_cond_sub_32(sp_digit* r, const sp_digit* a,
2822         const sp_digit* b, sp_digit m)
2823 {
2824     sp_digit c = 0;
2825 
2826     __asm__ __volatile__ (
2827         "mov	r5, #128\n\t"
2828         "mov	r9, r5\n\t"
2829         "mov	r8, #0\n\t"
2830         "\n1:\n\t"
2831         "ldr	r6, [%[b], r8]\n\t"
2832         "and	r6, r6, %[m]\n\t"
2833         "mov	r5, #0\n\t"
2834         "subs	r5, r5, %[c]\n\t"
2835         "ldr	r5, [%[a], r8]\n\t"
2836         "sbcs	r5, r5, r6\n\t"
2837         "sbcs	%[c], %[c], %[c]\n\t"
2838         "str	r5, [%[r], r8]\n\t"
2839         "add	r8, r8, #4\n\t"
2840         "cmp	r8, r9\n\t"
2841 #ifdef __GNUC__
2842         "blt	1b\n\t"
2843 #else
2844         "blt.n	1b\n\t"
2845 #endif /* __GNUC__ */
2846         : [c] "+r" (c)
2847         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
2848         : "memory", "r5", "r6", "r8", "r9"
2849     );
2850 
2851     return c;
2852 }
2853 
2854 /* Reduce the number back to 2048 bits using Montgomery reduction.
2855  *
2856  * a   A single precision number to reduce in place.
2857  * m   The single precision number representing the modulus.
2858  * mp  The digit representing the negative inverse of m mod 2^n.
2859  */
sp_2048_mont_reduce_32(sp_digit * a,const sp_digit * m,sp_digit mp)2860 SP_NOINLINE static void sp_2048_mont_reduce_32(sp_digit* a, const sp_digit* m,
2861         sp_digit mp)
2862 {
2863     sp_digit ca = 0;
2864 
2865     __asm__ __volatile__ (
2866         "mov	r9, %[mp]\n\t"
2867         "mov	r12, %[m]\n\t"
2868         "mov	r10, %[a]\n\t"
2869         "mov	r4, #0\n\t"
2870         "add	r11, r10, #128\n\t"
2871         "\n1:\n\t"
2872         /* mu = a[i] * mp */
2873         "mov	%[mp], r9\n\t"
2874         "ldr	%[a], [r10]\n\t"
2875         "mul	%[mp], %[mp], %[a]\n\t"
2876         "mov	%[m], r12\n\t"
2877         "add	r14, r10, #120\n\t"
2878         "\n2:\n\t"
2879         /* a[i+j] += m[j] * mu */
2880         "ldr	%[a], [r10]\n\t"
2881         "mov	r5, #0\n\t"
2882         /* Multiply m[j] and mu - Start */
2883         "ldr	r8, [%[m]], #4\n\t"
2884         "umull	r6, r8, %[mp], r8\n\t"
2885         "adds	%[a], %[a], r6\n\t"
2886         "adc	r5, r5, r8\n\t"
2887         /* Multiply m[j] and mu - Done */
2888         "adds	r4, r4, %[a]\n\t"
2889         "adc	r5, r5, #0\n\t"
2890         "str	r4, [r10], #4\n\t"
2891         /* a[i+j+1] += m[j+1] * mu */
2892         "ldr	%[a], [r10]\n\t"
2893         "mov	r4, #0\n\t"
2894         /* Multiply m[j] and mu - Start */
2895         "ldr	r8, [%[m]], #4\n\t"
2896         "umull	r6, r8, %[mp], r8\n\t"
2897         "adds	%[a], %[a], r6\n\t"
2898         "adc	r4, r4, r8\n\t"
2899         /* Multiply m[j] and mu - Done */
2900         "adds	r5, r5, %[a]\n\t"
2901         "adc	r4, r4, #0\n\t"
2902         "str	r5, [r10], #4\n\t"
2903         "cmp	r10, r14\n\t"
2904 #ifdef __GNUC__
2905         "blt	2b\n\t"
2906 #else
2907         "blt.n	2b\n\t"
2908 #endif /* __GNUC__ */
2909         /* a[i+30] += m[30] * mu */
2910         "ldr	%[a], [r10]\n\t"
2911         "mov	r5, #0\n\t"
2912         /* Multiply m[j] and mu - Start */
2913         "ldr	r8, [%[m]], #4\n\t"
2914         "umull	r6, r8, %[mp], r8\n\t"
2915         "adds	%[a], %[a], r6\n\t"
2916         "adc	r5, r5, r8\n\t"
2917         /* Multiply m[j] and mu - Done */
2918         "adds	r4, r4, %[a]\n\t"
2919         "adc	r5, r5, #0\n\t"
2920         "str	r4, [r10], #4\n\t"
2921         /* a[i+31] += m[31] * mu */
2922         "mov	r4, %[ca]\n\t"
2923         "mov	%[ca], #0\n\t"
2924         /* Multiply m[31] and mu - Start */
2925         "ldr	r8, [%[m]]\n\t"
2926         "umull	r6, r8, %[mp], r8\n\t"
2927         "adds	r5, r5, r6\n\t"
2928         "adcs 	r4, r4, r8\n\t"
2929         "adc	%[ca], %[ca], #0\n\t"
2930         /* Multiply m[31] and mu - Done */
2931         "ldr	r6, [r10]\n\t"
2932         "ldr	r8, [r10, #4]\n\t"
2933         "adds	r6, r6, r5\n\t"
2934         "adcs	r8, r8, r4\n\t"
2935         "adc	%[ca], %[ca], #0\n\t"
2936         "str	r6, [r10]\n\t"
2937         "str	r8, [r10, #4]\n\t"
2938         /* Next word in a */
2939         "sub	r10, r10, #120\n\t"
2940         "cmp	r10, r11\n\t"
2941 #ifdef __GNUC__
2942         "blt	1b\n\t"
2943 #else
2944         "blt.n	1b\n\t"
2945 #endif /* __GNUC__ */
2946         "mov	%[a], r10\n\t"
2947         "mov	%[m], r12\n\t"
2948         : [ca] "+r" (ca), [a] "+r" (a)
2949         : [m] "r" (m), [mp] "r" (mp)
2950         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
2951     );
2952 
2953     sp_2048_cond_sub_32(a - 32, a, m, (sp_digit)0 - ca);
2954 }
2955 
2956 /* Multiply two Montgomery form numbers mod the modulus (prime).
2957  * (r = a * b mod m)
2958  *
2959  * r   Result of multiplication.
2960  * a   First number to multiply in Montgomery form.
2961  * b   Second number to multiply in Montgomery form.
2962  * m   Modulus (prime).
2963  * mp  Montgomery mulitplier.
2964  */
sp_2048_mont_mul_32(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)2965 static void sp_2048_mont_mul_32(sp_digit* r, const sp_digit* a,
2966         const sp_digit* b, const sp_digit* m, sp_digit mp)
2967 {
2968     sp_2048_mul_32(r, a, b);
2969     sp_2048_mont_reduce_32(r, m, mp);
2970 }
2971 
2972 /* Square the Montgomery form number. (r = a * a mod m)
2973  *
2974  * r   Result of squaring.
2975  * a   Number to square in Montgomery form.
2976  * m   Modulus (prime).
2977  * mp  Montgomery mulitplier.
2978  */
sp_2048_mont_sqr_32(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)2979 static void sp_2048_mont_sqr_32(sp_digit* r, const sp_digit* a,
2980         const sp_digit* m, sp_digit mp)
2981 {
2982     sp_2048_sqr_32(r, a);
2983     sp_2048_mont_reduce_32(r, m, mp);
2984 }
2985 
2986 /* Mul a by digit b into r. (r = a * b)
2987  *
2988  * r  A single precision integer.
2989  * a  A single precision integer.
2990  * b  A single precision digit.
2991  */
sp_2048_mul_d_32(sp_digit * r,const sp_digit * a,sp_digit b)2992 SP_NOINLINE static void sp_2048_mul_d_32(sp_digit* r, const sp_digit* a,
2993         sp_digit b)
2994 {
2995     __asm__ __volatile__ (
2996         "add	r9, %[a], #128\n\t"
2997         /* A[0] * B */
2998         "ldr	r6, [%[a]], #4\n\t"
2999         "umull	r5, r3, r6, %[b]\n\t"
3000         "mov	r4, #0\n\t"
3001         "str	r5, [%[r]], #4\n\t"
3002         /* A[0] * B - Done */
3003         "\n1:\n\t"
3004         "mov	r5, #0\n\t"
3005         /* A[] * B */
3006         "ldr	r6, [%[a]], #4\n\t"
3007         "umull	r6, r8, r6, %[b]\n\t"
3008         "adds	r3, r3, r6\n\t"
3009         "adcs 	r4, r4, r8\n\t"
3010         "adc	r5, r5, #0\n\t"
3011         /* A[] * B - Done */
3012         "str	r3, [%[r]], #4\n\t"
3013         "mov	r3, r4\n\t"
3014         "mov	r4, r5\n\t"
3015         "cmp	%[a], r9\n\t"
3016 #ifdef __GNUC__
3017         "blt	1b\n\t"
3018 #else
3019         "blt.n	1b\n\t"
3020 #endif /* __GNUC__ */
3021         "str	r3, [%[r]]\n\t"
3022         : [r] "+r" (r), [a] "+r" (a)
3023         : [b] "r" (b)
3024         : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
3025     );
3026 }
3027 
3028 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
3029  *
3030  * d1   The high order half of the number to divide.
3031  * d0   The low order half of the number to divide.
3032  * div  The dividend.
3033  * returns the result of the division.
3034  *
3035  * Note that this is an approximate div. It may give an answer 1 larger.
3036  */
div_2048_word_32(sp_digit d1,sp_digit d0,sp_digit div)3037 SP_NOINLINE static sp_digit div_2048_word_32(sp_digit d1, sp_digit d0,
3038         sp_digit div)
3039 {
3040     sp_digit r = 0;
3041 
3042     __asm__ __volatile__ (
3043         "lsr	r6, %[div], #16\n\t"
3044         "add	r6, r6, #1\n\t"
3045         "udiv	r4, %[d1], r6\n\t"
3046         "lsl	r8, r4, #16\n\t"
3047         "umull	r4, r5, %[div], r8\n\t"
3048         "subs	%[d0], %[d0], r4\n\t"
3049         "sbc	%[d1], %[d1], r5\n\t"
3050         "udiv	r5, %[d1], r6\n\t"
3051         "lsl	r4, r5, #16\n\t"
3052         "add	r8, r8, r4\n\t"
3053         "umull	r4, r5, %[div], r4\n\t"
3054         "subs	%[d0], %[d0], r4\n\t"
3055         "sbc	%[d1], %[d1], r5\n\t"
3056         "lsl	r4, %[d1], #16\n\t"
3057         "orr	r4, r4, %[d0], lsr #16\n\t"
3058         "udiv	r4, r4, r6\n\t"
3059         "add	r8, r8, r4\n\t"
3060         "umull	r4, r5, %[div], r4\n\t"
3061         "subs	%[d0], %[d0], r4\n\t"
3062         "sbc	%[d1], %[d1], r5\n\t"
3063         "lsl	r4, %[d1], #16\n\t"
3064         "orr	r4, r4, %[d0], lsr #16\n\t"
3065         "udiv	r4, r4, r6\n\t"
3066         "add	r8, r8, r4\n\t"
3067         "umull	r4, r5, %[div], r4\n\t"
3068         "subs	%[d0], %[d0], r4\n\t"
3069         "sbc	%[d1], %[d1], r5\n\t"
3070         "udiv	r4, %[d0], %[div]\n\t"
3071         "add	r8, r8, r4\n\t"
3072         "mov	%[r], r8\n\t"
3073         : [r] "+r" (r)
3074         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
3075         : "r4", "r5", "r6", "r8"
3076     );
3077     return r;
3078 }
3079 
3080 /* Compare a with b in constant time.
3081  *
3082  * a  A single precision integer.
3083  * b  A single precision integer.
3084  * return -ve, 0 or +ve if a is less than, equal to or greater than b
3085  * respectively.
3086  */
sp_2048_cmp_32(const sp_digit * a,const sp_digit * b)3087 SP_NOINLINE static sp_int32 sp_2048_cmp_32(const sp_digit* a, const sp_digit* b)
3088 {
3089     sp_digit r = 0;
3090 
3091 
3092     __asm__ __volatile__ (
3093         "mov	r3, #0\n\t"
3094         "mvn	r3, r3\n\t"
3095         "mov	r6, #124\n\t"
3096         "\n1:\n\t"
3097         "ldr	r8, [%[a], r6]\n\t"
3098         "ldr	r5, [%[b], r6]\n\t"
3099         "and	r8, r8, r3\n\t"
3100         "and	r5, r5, r3\n\t"
3101         "mov	r4, r8\n\t"
3102         "subs	r8, r8, r5\n\t"
3103         "sbc	r8, r8, r8\n\t"
3104         "add	%[r], %[r], r8\n\t"
3105         "mvn	r8, r8\n\t"
3106         "and	r3, r3, r8\n\t"
3107         "subs	r5, r5, r4\n\t"
3108         "sbc	r8, r8, r8\n\t"
3109         "sub	%[r], %[r], r8\n\t"
3110         "mvn	r8, r8\n\t"
3111         "and	r3, r3, r8\n\t"
3112         "sub	r6, r6, #4\n\t"
3113         "cmp	r6, #0\n\t"
3114 #ifdef __GNUC__
3115         "bge	1b\n\t"
3116 #else
3117         "bge.n	1b\n\t"
3118 #endif /* __GNUC__ */
3119         : [r] "+r" (r)
3120         : [a] "r" (a), [b] "r" (b)
3121         : "r3", "r4", "r5", "r6", "r8"
3122     );
3123 
3124     return r;
3125 }
3126 
3127 /* Divide d in a and put remainder into r (m*d + r = a)
3128  * m is not calculated as it is not needed at this time.
3129  *
3130  * a  Number to be divided.
3131  * d  Number to divide with.
3132  * m  Multiplier result.
3133  * r  Remainder from the division.
3134  * returns MP_OKAY indicating success.
3135  */
sp_2048_div_32(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)3136 static WC_INLINE int sp_2048_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
3137         sp_digit* r)
3138 {
3139     sp_digit t1[64], t2[33];
3140     sp_digit div, r1;
3141     int i;
3142 
3143     (void)m;
3144 
3145     div = d[31];
3146     XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
3147     for (i=31; i>=0; i--) {
3148         sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
3149         r1 = div_2048_word_32(hi, t1[32 + i - 1], div);
3150 
3151         sp_2048_mul_d_32(t2, d, r1);
3152         t1[32 + i] += sp_2048_sub_in_place_32(&t1[i], t2);
3153         t1[32 + i] -= t2[32];
3154         sp_2048_mask_32(t2, d, t1[32 + i]);
3155         t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
3156         sp_2048_mask_32(t2, d, t1[32 + i]);
3157         t1[32 + i] += sp_2048_add_32(&t1[i], &t1[i], t2);
3158     }
3159 
3160     r1 = sp_2048_cmp_32(t1, d) >= 0;
3161     sp_2048_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
3162 
3163     return MP_OKAY;
3164 }
3165 
3166 /* Reduce a modulo m into r. (r = a mod m)
3167  *
3168  * r  A single precision number that is the reduced result.
3169  * a  A single precision number that is to be reduced.
3170  * m  A single precision number that is the modulus to reduce with.
3171  * returns MP_OKAY indicating success.
3172  */
sp_2048_mod_32(sp_digit * r,const sp_digit * a,const sp_digit * m)3173 static WC_INLINE int sp_2048_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
3174 {
3175     return sp_2048_div_32(a, m, NULL, r);
3176 }
3177 
3178 #ifdef WOLFSSL_SP_SMALL
3179 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
3180  *
3181  * r     A single precision number that is the result of the operation.
3182  * a     A single precision number being exponentiated.
3183  * e     A single precision number that is the exponent.
3184  * bits  The number of bits in the exponent.
3185  * m     A single precision number that is the modulus.
3186  * returns  0 on success.
3187  * returns  MEMORY_E on dynamic memory allocation failure.
3188  * returns  MP_VAL when base is even or exponent is 0.
3189  */
sp_2048_mod_exp_32(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)3190 static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
3191         int bits, const sp_digit* m, int reduceA)
3192 {
3193 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
3194     sp_digit* td = NULL;
3195 #else
3196     sp_digit td[16 * 64];
3197 #endif
3198     sp_digit* t[16];
3199     sp_digit* norm = NULL;
3200     sp_digit mp = 1;
3201     sp_digit n;
3202     sp_digit mask;
3203     int i;
3204     int c;
3205     byte y;
3206     int err = MP_OKAY;
3207 
3208     if ((m[0] & 1) == 0) {
3209         err = MP_VAL;
3210     }
3211     else if (bits == 0) {
3212         err = MP_VAL;
3213     }
3214 
3215 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
3216     if (err == MP_OKAY) {
3217         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 64), NULL,
3218                                 DYNAMIC_TYPE_TMP_BUFFER);
3219         if (td == NULL)
3220             err = MEMORY_E;
3221     }
3222 #endif
3223 
3224     if (err == MP_OKAY) {
3225         norm = td;
3226         for (i=0; i<16; i++) {
3227             t[i] = td + i * 64;
3228         }
3229 
3230         sp_2048_mont_setup(m, &mp);
3231         sp_2048_mont_norm_32(norm, m);
3232 
3233         XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
3234         if (reduceA != 0) {
3235             err = sp_2048_mod_32(t[1] + 32, a, m);
3236             if (err == MP_OKAY) {
3237                 err = sp_2048_mod_32(t[1], t[1], m);
3238             }
3239         }
3240         else {
3241             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
3242             err = sp_2048_mod_32(t[1], t[1], m);
3243         }
3244     }
3245 
3246     if (err == MP_OKAY) {
3247         sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
3248         sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
3249         sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
3250         sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
3251         sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
3252         sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
3253         sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
3254         sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
3255         sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
3256         sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
3257         sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
3258         sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
3259         sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
3260         sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
3261 
3262         i = (bits - 1) / 32;
3263         n = e[i--];
3264         c = bits & 31;
3265         if (c == 0) {
3266             c = 32;
3267         }
3268         c -= bits % 4;
3269         if (c == 32) {
3270             c = 28;
3271         }
3272         if (c < 0) {
3273             /* Number of bits in top word is less than number needed. */
3274             c = -c;
3275             y = (byte)(n << c);
3276             n = e[i--];
3277             y |= (byte)(n >> (64 - c));
3278             n <<= c;
3279             c = 64 - c;
3280         }
3281         else {
3282             y = (byte)(n >> c);
3283             n <<= 32 - c;
3284         }
3285         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
3286         for (; i>=0 || c>=4; ) {
3287             if (c == 0) {
3288                 n = e[i--];
3289                 y = (byte)(n >> 28);
3290                 n <<= 4;
3291                 c = 28;
3292             }
3293             else if (c < 4) {
3294                 y = (byte)(n >> 28);
3295                 n = e[i--];
3296                 c = 4 - c;
3297                 y |= (byte)(n >> (32 - c));
3298                 n <<= c;
3299                 c = 32 - c;
3300             }
3301             else {
3302                 y = (byte)((n >> 28) & 0xf);
3303                 n <<= 4;
3304                 c -= 4;
3305             }
3306 
3307             sp_2048_mont_sqr_32(r, r, m, mp);
3308             sp_2048_mont_sqr_32(r, r, m, mp);
3309             sp_2048_mont_sqr_32(r, r, m, mp);
3310             sp_2048_mont_sqr_32(r, r, m, mp);
3311 
3312             sp_2048_mont_mul_32(r, r, t[y], m, mp);
3313         }
3314 
3315         XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
3316         sp_2048_mont_reduce_32(r, m, mp);
3317 
3318         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
3319         sp_2048_cond_sub_32(r, r, m, mask);
3320     }
3321 
3322 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
3323     if (td != NULL)
3324         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
3325 #endif
3326 
3327     return err;
3328 }
3329 #else
3330 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
3331  *
3332  * r     A single precision number that is the result of the operation.
3333  * a     A single precision number being exponentiated.
3334  * e     A single precision number that is the exponent.
3335  * bits  The number of bits in the exponent.
3336  * m     A single precision number that is the modulus.
3337  * returns  0 on success.
3338  * returns  MEMORY_E on dynamic memory allocation failure.
3339  * returns  MP_VAL when base is even or exponent is 0.
3340  */
sp_2048_mod_exp_32(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)3341 static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e,
3342         int bits, const sp_digit* m, int reduceA)
3343 {
3344 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
3345     sp_digit* td = NULL;
3346 #else
3347     sp_digit td[32 * 64];
3348 #endif
3349     sp_digit* t[32];
3350     sp_digit* norm = NULL;
3351     sp_digit mp = 1;
3352     sp_digit n;
3353     sp_digit mask;
3354     int i;
3355     int c;
3356     byte y;
3357     int err = MP_OKAY;
3358 
3359     if ((m[0] & 1) == 0) {
3360         err = MP_VAL;
3361     }
3362     else if (bits == 0) {
3363         err = MP_VAL;
3364     }
3365 
3366 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
3367     if (err == MP_OKAY) {
3368         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 64), NULL,
3369                                 DYNAMIC_TYPE_TMP_BUFFER);
3370         if (td == NULL)
3371             err = MEMORY_E;
3372     }
3373 #endif
3374 
3375     if (err == MP_OKAY) {
3376         norm = td;
3377         for (i=0; i<32; i++) {
3378             t[i] = td + i * 64;
3379         }
3380 
3381         sp_2048_mont_setup(m, &mp);
3382         sp_2048_mont_norm_32(norm, m);
3383 
3384         XMEMSET(t[1], 0, sizeof(sp_digit) * 32U);
3385         if (reduceA != 0) {
3386             err = sp_2048_mod_32(t[1] + 32, a, m);
3387             if (err == MP_OKAY) {
3388                 err = sp_2048_mod_32(t[1], t[1], m);
3389             }
3390         }
3391         else {
3392             XMEMCPY(t[1] + 32, a, sizeof(sp_digit) * 32);
3393             err = sp_2048_mod_32(t[1], t[1], m);
3394         }
3395     }
3396 
3397     if (err == MP_OKAY) {
3398         sp_2048_mont_sqr_32(t[ 2], t[ 1], m, mp);
3399         sp_2048_mont_mul_32(t[ 3], t[ 2], t[ 1], m, mp);
3400         sp_2048_mont_sqr_32(t[ 4], t[ 2], m, mp);
3401         sp_2048_mont_mul_32(t[ 5], t[ 3], t[ 2], m, mp);
3402         sp_2048_mont_sqr_32(t[ 6], t[ 3], m, mp);
3403         sp_2048_mont_mul_32(t[ 7], t[ 4], t[ 3], m, mp);
3404         sp_2048_mont_sqr_32(t[ 8], t[ 4], m, mp);
3405         sp_2048_mont_mul_32(t[ 9], t[ 5], t[ 4], m, mp);
3406         sp_2048_mont_sqr_32(t[10], t[ 5], m, mp);
3407         sp_2048_mont_mul_32(t[11], t[ 6], t[ 5], m, mp);
3408         sp_2048_mont_sqr_32(t[12], t[ 6], m, mp);
3409         sp_2048_mont_mul_32(t[13], t[ 7], t[ 6], m, mp);
3410         sp_2048_mont_sqr_32(t[14], t[ 7], m, mp);
3411         sp_2048_mont_mul_32(t[15], t[ 8], t[ 7], m, mp);
3412         sp_2048_mont_sqr_32(t[16], t[ 8], m, mp);
3413         sp_2048_mont_mul_32(t[17], t[ 9], t[ 8], m, mp);
3414         sp_2048_mont_sqr_32(t[18], t[ 9], m, mp);
3415         sp_2048_mont_mul_32(t[19], t[10], t[ 9], m, mp);
3416         sp_2048_mont_sqr_32(t[20], t[10], m, mp);
3417         sp_2048_mont_mul_32(t[21], t[11], t[10], m, mp);
3418         sp_2048_mont_sqr_32(t[22], t[11], m, mp);
3419         sp_2048_mont_mul_32(t[23], t[12], t[11], m, mp);
3420         sp_2048_mont_sqr_32(t[24], t[12], m, mp);
3421         sp_2048_mont_mul_32(t[25], t[13], t[12], m, mp);
3422         sp_2048_mont_sqr_32(t[26], t[13], m, mp);
3423         sp_2048_mont_mul_32(t[27], t[14], t[13], m, mp);
3424         sp_2048_mont_sqr_32(t[28], t[14], m, mp);
3425         sp_2048_mont_mul_32(t[29], t[15], t[14], m, mp);
3426         sp_2048_mont_sqr_32(t[30], t[15], m, mp);
3427         sp_2048_mont_mul_32(t[31], t[16], t[15], m, mp);
3428 
3429         i = (bits - 1) / 32;
3430         n = e[i--];
3431         c = bits & 31;
3432         if (c == 0) {
3433             c = 32;
3434         }
3435         c -= bits % 5;
3436         if (c == 32) {
3437             c = 27;
3438         }
3439         if (c < 0) {
3440             /* Number of bits in top word is less than number needed. */
3441             c = -c;
3442             y = (byte)(n << c);
3443             n = e[i--];
3444             y |= (byte)(n >> (64 - c));
3445             n <<= c;
3446             c = 64 - c;
3447         }
3448         else {
3449             y = (byte)(n >> c);
3450             n <<= 32 - c;
3451         }
3452         XMEMCPY(r, t[y], sizeof(sp_digit) * 32);
3453         for (; i>=0 || c>=5; ) {
3454             if (c == 0) {
3455                 n = e[i--];
3456                 y = (byte)(n >> 27);
3457                 n <<= 5;
3458                 c = 27;
3459             }
3460             else if (c < 5) {
3461                 y = (byte)(n >> 27);
3462                 n = e[i--];
3463                 c = 5 - c;
3464                 y |= (byte)(n >> (32 - c));
3465                 n <<= c;
3466                 c = 32 - c;
3467             }
3468             else {
3469                 y = (byte)((n >> 27) & 0x1f);
3470                 n <<= 5;
3471                 c -= 5;
3472             }
3473 
3474             sp_2048_mont_sqr_32(r, r, m, mp);
3475             sp_2048_mont_sqr_32(r, r, m, mp);
3476             sp_2048_mont_sqr_32(r, r, m, mp);
3477             sp_2048_mont_sqr_32(r, r, m, mp);
3478             sp_2048_mont_sqr_32(r, r, m, mp);
3479 
3480             sp_2048_mont_mul_32(r, r, t[y], m, mp);
3481         }
3482 
3483         XMEMSET(&r[32], 0, sizeof(sp_digit) * 32U);
3484         sp_2048_mont_reduce_32(r, m, mp);
3485 
3486         mask = 0 - (sp_2048_cmp_32(r, m) >= 0);
3487         sp_2048_cond_sub_32(r, r, m, mask);
3488     }
3489 
3490 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
3491     if (td != NULL)
3492         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
3493 #endif
3494 
3495     return err;
3496 }
3497 #endif /* WOLFSSL_SP_SMALL */
3498 
3499 #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */
3500 
3501 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
3502 /* r = 2^n mod m where n is the number of bits to reduce by.
3503  * Given m must be 2048 bits, just need to subtract.
3504  *
3505  * r  A single precision number.
3506  * m  A single precision number.
3507  */
sp_2048_mont_norm_64(sp_digit * r,const sp_digit * m)3508 static void sp_2048_mont_norm_64(sp_digit* r, const sp_digit* m)
3509 {
3510     XMEMSET(r, 0, sizeof(sp_digit) * 64);
3511 
3512     /* r = 2^n mod m */
3513     sp_2048_sub_in_place_64(r, m);
3514 }
3515 
3516 #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */
3517 /* Conditionally subtract b from a using the mask m.
3518  * m is -1 to subtract and 0 when not copying.
3519  *
3520  * r  A single precision number representing condition subtract result.
3521  * a  A single precision number to subtract from.
3522  * b  A single precision number to subtract.
3523  * m  Mask value to apply.
3524  */
sp_2048_cond_sub_64(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)3525 SP_NOINLINE static sp_digit sp_2048_cond_sub_64(sp_digit* r, const sp_digit* a,
3526         const sp_digit* b, sp_digit m)
3527 {
3528     sp_digit c = 0;
3529 
3530     __asm__ __volatile__ (
3531         "mov	r5, #1\n\t"
3532         "lsl	r5, r5, #8\n\t"
3533         "mov	r9, r5\n\t"
3534         "mov	r8, #0\n\t"
3535         "\n1:\n\t"
3536         "ldr	r6, [%[b], r8]\n\t"
3537         "and	r6, r6, %[m]\n\t"
3538         "mov	r5, #0\n\t"
3539         "subs	r5, r5, %[c]\n\t"
3540         "ldr	r5, [%[a], r8]\n\t"
3541         "sbcs	r5, r5, r6\n\t"
3542         "sbcs	%[c], %[c], %[c]\n\t"
3543         "str	r5, [%[r], r8]\n\t"
3544         "add	r8, r8, #4\n\t"
3545         "cmp	r8, r9\n\t"
3546 #ifdef __GNUC__
3547         "blt	1b\n\t"
3548 #else
3549         "blt.n	1b\n\t"
3550 #endif /* __GNUC__ */
3551         : [c] "+r" (c)
3552         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
3553         : "memory", "r5", "r6", "r8", "r9"
3554     );
3555 
3556     return c;
3557 }
3558 
3559 /* Reduce the number back to 2048 bits using Montgomery reduction.
3560  *
3561  * a   A single precision number to reduce in place.
3562  * m   The single precision number representing the modulus.
3563  * mp  The digit representing the negative inverse of m mod 2^n.
3564  */
sp_2048_mont_reduce_64(sp_digit * a,const sp_digit * m,sp_digit mp)3565 SP_NOINLINE static void sp_2048_mont_reduce_64(sp_digit* a, const sp_digit* m,
3566         sp_digit mp)
3567 {
3568     sp_digit ca = 0;
3569 
3570     __asm__ __volatile__ (
3571         "mov	r9, %[mp]\n\t"
3572         "mov	r12, %[m]\n\t"
3573         "mov	r10, %[a]\n\t"
3574         "mov	r4, #0\n\t"
3575         "add	r11, r10, #256\n\t"
3576         "\n1:\n\t"
3577         /* mu = a[i] * mp */
3578         "mov	%[mp], r9\n\t"
3579         "ldr	%[a], [r10]\n\t"
3580         "mul	%[mp], %[mp], %[a]\n\t"
3581         "mov	%[m], r12\n\t"
3582         "add	r14, r10, #248\n\t"
3583         "\n2:\n\t"
3584         /* a[i+j] += m[j] * mu */
3585         "ldr	%[a], [r10]\n\t"
3586         "mov	r5, #0\n\t"
3587         /* Multiply m[j] and mu - Start */
3588         "ldr	r8, [%[m]], #4\n\t"
3589         "umull	r6, r8, %[mp], r8\n\t"
3590         "adds	%[a], %[a], r6\n\t"
3591         "adc	r5, r5, r8\n\t"
3592         /* Multiply m[j] and mu - Done */
3593         "adds	r4, r4, %[a]\n\t"
3594         "adc	r5, r5, #0\n\t"
3595         "str	r4, [r10], #4\n\t"
3596         /* a[i+j+1] += m[j+1] * mu */
3597         "ldr	%[a], [r10]\n\t"
3598         "mov	r4, #0\n\t"
3599         /* Multiply m[j] and mu - Start */
3600         "ldr	r8, [%[m]], #4\n\t"
3601         "umull	r6, r8, %[mp], r8\n\t"
3602         "adds	%[a], %[a], r6\n\t"
3603         "adc	r4, r4, r8\n\t"
3604         /* Multiply m[j] and mu - Done */
3605         "adds	r5, r5, %[a]\n\t"
3606         "adc	r4, r4, #0\n\t"
3607         "str	r5, [r10], #4\n\t"
3608         "cmp	r10, r14\n\t"
3609 #ifdef __GNUC__
3610         "blt	2b\n\t"
3611 #else
3612         "blt.n	2b\n\t"
3613 #endif /* __GNUC__ */
3614         /* a[i+62] += m[62] * mu */
3615         "ldr	%[a], [r10]\n\t"
3616         "mov	r5, #0\n\t"
3617         /* Multiply m[j] and mu - Start */
3618         "ldr	r8, [%[m]], #4\n\t"
3619         "umull	r6, r8, %[mp], r8\n\t"
3620         "adds	%[a], %[a], r6\n\t"
3621         "adc	r5, r5, r8\n\t"
3622         /* Multiply m[j] and mu - Done */
3623         "adds	r4, r4, %[a]\n\t"
3624         "adc	r5, r5, #0\n\t"
3625         "str	r4, [r10], #4\n\t"
3626         /* a[i+63] += m[63] * mu */
3627         "mov	r4, %[ca]\n\t"
3628         "mov	%[ca], #0\n\t"
3629         /* Multiply m[63] and mu - Start */
3630         "ldr	r8, [%[m]]\n\t"
3631         "umull	r6, r8, %[mp], r8\n\t"
3632         "adds	r5, r5, r6\n\t"
3633         "adcs 	r4, r4, r8\n\t"
3634         "adc	%[ca], %[ca], #0\n\t"
3635         /* Multiply m[63] and mu - Done */
3636         "ldr	r6, [r10]\n\t"
3637         "ldr	r8, [r10, #4]\n\t"
3638         "adds	r6, r6, r5\n\t"
3639         "adcs	r8, r8, r4\n\t"
3640         "adc	%[ca], %[ca], #0\n\t"
3641         "str	r6, [r10]\n\t"
3642         "str	r8, [r10, #4]\n\t"
3643         /* Next word in a */
3644         "sub	r10, r10, #248\n\t"
3645         "cmp	r10, r11\n\t"
3646 #ifdef __GNUC__
3647         "blt	1b\n\t"
3648 #else
3649         "blt.n	1b\n\t"
3650 #endif /* __GNUC__ */
3651         "mov	%[a], r10\n\t"
3652         "mov	%[m], r12\n\t"
3653         : [ca] "+r" (ca), [a] "+r" (a)
3654         : [m] "r" (m), [mp] "r" (mp)
3655         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
3656     );
3657 
3658     sp_2048_cond_sub_64(a - 64, a, m, (sp_digit)0 - ca);
3659 }
3660 
3661 /* Multiply two Montgomery form numbers mod the modulus (prime).
3662  * (r = a * b mod m)
3663  *
3664  * r   Result of multiplication.
3665  * a   First number to multiply in Montgomery form.
3666  * b   Second number to multiply in Montgomery form.
3667  * m   Modulus (prime).
3668  * mp  Montgomery mulitplier.
3669  */
sp_2048_mont_mul_64(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)3670 static void sp_2048_mont_mul_64(sp_digit* r, const sp_digit* a,
3671         const sp_digit* b, const sp_digit* m, sp_digit mp)
3672 {
3673     sp_2048_mul_64(r, a, b);
3674     sp_2048_mont_reduce_64(r, m, mp);
3675 }
3676 
3677 /* Square the Montgomery form number. (r = a * a mod m)
3678  *
3679  * r   Result of squaring.
3680  * a   Number to square in Montgomery form.
3681  * m   Modulus (prime).
3682  * mp  Montgomery mulitplier.
3683  */
sp_2048_mont_sqr_64(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)3684 static void sp_2048_mont_sqr_64(sp_digit* r, const sp_digit* a,
3685         const sp_digit* m, sp_digit mp)
3686 {
3687     sp_2048_sqr_64(r, a);
3688     sp_2048_mont_reduce_64(r, m, mp);
3689 }
3690 
3691 #ifdef WOLFSSL_SP_SMALL
3692 /* Sub b from a into r. (r = a - b)
3693  *
3694  * r  A single precision integer.
3695  * a  A single precision integer.
3696  * b  A single precision integer.
3697  */
sp_2048_sub_64(sp_digit * r,const sp_digit * a,const sp_digit * b)3698 SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a,
3699         const sp_digit* b)
3700 {
3701     sp_digit c = 0;
3702 
3703     __asm__ __volatile__ (
3704         "mov	r6, %[a]\n\t"
3705         "mov	r5, #1\n\t"
3706         "lsl	r5, r5, #8\n\t"
3707         "add	r6, r6, r5\n\t"
3708         "\n1:\n\t"
3709         "mov	r5, #0\n\t"
3710         "subs	r5, r5, %[c]\n\t"
3711         "ldr	r4, [%[a]]\n\t"
3712         "ldr	r5, [%[b]]\n\t"
3713         "sbcs	r4, r4, r5\n\t"
3714         "str	r4, [%[r]]\n\t"
3715         "sbc	%[c], %[c], %[c]\n\t"
3716         "add	%[a], %[a], #4\n\t"
3717         "add	%[b], %[b], #4\n\t"
3718         "add	%[r], %[r], #4\n\t"
3719         "cmp	%[a], r6\n\t"
3720 #ifdef __GNUC__
3721         "bne	1b\n\t"
3722 #else
3723         "bne.n	1b\n\t"
3724 #endif /* __GNUC__ */
3725         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
3726         :
3727         : "memory", "r4", "r5", "r6"
3728     );
3729 
3730     return c;
3731 }
3732 
3733 #else
3734 /* Sub b from a into r. (r = a - b)
3735  *
3736  * r  A single precision integer.
3737  * a  A single precision integer.
3738  * b  A single precision integer.
3739  */
sp_2048_sub_64(sp_digit * r,const sp_digit * a,const sp_digit * b)3740 SP_NOINLINE static sp_digit sp_2048_sub_64(sp_digit* r, const sp_digit* a,
3741         const sp_digit* b)
3742 {
3743     sp_digit c = 0;
3744 
3745     __asm__ __volatile__ (
3746         "ldr	r4, [%[a], #0]\n\t"
3747         "ldr	r5, [%[a], #4]\n\t"
3748         "ldr	r6, [%[b], #0]\n\t"
3749         "ldr	r8, [%[b], #4]\n\t"
3750         "subs	r4, r4, r6\n\t"
3751         "sbcs	r5, r5, r8\n\t"
3752         "str	r4, [%[r], #0]\n\t"
3753         "str	r5, [%[r], #4]\n\t"
3754         "ldr	r4, [%[a], #8]\n\t"
3755         "ldr	r5, [%[a], #12]\n\t"
3756         "ldr	r6, [%[b], #8]\n\t"
3757         "ldr	r8, [%[b], #12]\n\t"
3758         "sbcs	r4, r4, r6\n\t"
3759         "sbcs	r5, r5, r8\n\t"
3760         "str	r4, [%[r], #8]\n\t"
3761         "str	r5, [%[r], #12]\n\t"
3762         "ldr	r4, [%[a], #16]\n\t"
3763         "ldr	r5, [%[a], #20]\n\t"
3764         "ldr	r6, [%[b], #16]\n\t"
3765         "ldr	r8, [%[b], #20]\n\t"
3766         "sbcs	r4, r4, r6\n\t"
3767         "sbcs	r5, r5, r8\n\t"
3768         "str	r4, [%[r], #16]\n\t"
3769         "str	r5, [%[r], #20]\n\t"
3770         "ldr	r4, [%[a], #24]\n\t"
3771         "ldr	r5, [%[a], #28]\n\t"
3772         "ldr	r6, [%[b], #24]\n\t"
3773         "ldr	r8, [%[b], #28]\n\t"
3774         "sbcs	r4, r4, r6\n\t"
3775         "sbcs	r5, r5, r8\n\t"
3776         "str	r4, [%[r], #24]\n\t"
3777         "str	r5, [%[r], #28]\n\t"
3778         "ldr	r4, [%[a], #32]\n\t"
3779         "ldr	r5, [%[a], #36]\n\t"
3780         "ldr	r6, [%[b], #32]\n\t"
3781         "ldr	r8, [%[b], #36]\n\t"
3782         "sbcs	r4, r4, r6\n\t"
3783         "sbcs	r5, r5, r8\n\t"
3784         "str	r4, [%[r], #32]\n\t"
3785         "str	r5, [%[r], #36]\n\t"
3786         "ldr	r4, [%[a], #40]\n\t"
3787         "ldr	r5, [%[a], #44]\n\t"
3788         "ldr	r6, [%[b], #40]\n\t"
3789         "ldr	r8, [%[b], #44]\n\t"
3790         "sbcs	r4, r4, r6\n\t"
3791         "sbcs	r5, r5, r8\n\t"
3792         "str	r4, [%[r], #40]\n\t"
3793         "str	r5, [%[r], #44]\n\t"
3794         "ldr	r4, [%[a], #48]\n\t"
3795         "ldr	r5, [%[a], #52]\n\t"
3796         "ldr	r6, [%[b], #48]\n\t"
3797         "ldr	r8, [%[b], #52]\n\t"
3798         "sbcs	r4, r4, r6\n\t"
3799         "sbcs	r5, r5, r8\n\t"
3800         "str	r4, [%[r], #48]\n\t"
3801         "str	r5, [%[r], #52]\n\t"
3802         "ldr	r4, [%[a], #56]\n\t"
3803         "ldr	r5, [%[a], #60]\n\t"
3804         "ldr	r6, [%[b], #56]\n\t"
3805         "ldr	r8, [%[b], #60]\n\t"
3806         "sbcs	r4, r4, r6\n\t"
3807         "sbcs	r5, r5, r8\n\t"
3808         "str	r4, [%[r], #56]\n\t"
3809         "str	r5, [%[r], #60]\n\t"
3810         "ldr	r4, [%[a], #64]\n\t"
3811         "ldr	r5, [%[a], #68]\n\t"
3812         "ldr	r6, [%[b], #64]\n\t"
3813         "ldr	r8, [%[b], #68]\n\t"
3814         "sbcs	r4, r4, r6\n\t"
3815         "sbcs	r5, r5, r8\n\t"
3816         "str	r4, [%[r], #64]\n\t"
3817         "str	r5, [%[r], #68]\n\t"
3818         "ldr	r4, [%[a], #72]\n\t"
3819         "ldr	r5, [%[a], #76]\n\t"
3820         "ldr	r6, [%[b], #72]\n\t"
3821         "ldr	r8, [%[b], #76]\n\t"
3822         "sbcs	r4, r4, r6\n\t"
3823         "sbcs	r5, r5, r8\n\t"
3824         "str	r4, [%[r], #72]\n\t"
3825         "str	r5, [%[r], #76]\n\t"
3826         "ldr	r4, [%[a], #80]\n\t"
3827         "ldr	r5, [%[a], #84]\n\t"
3828         "ldr	r6, [%[b], #80]\n\t"
3829         "ldr	r8, [%[b], #84]\n\t"
3830         "sbcs	r4, r4, r6\n\t"
3831         "sbcs	r5, r5, r8\n\t"
3832         "str	r4, [%[r], #80]\n\t"
3833         "str	r5, [%[r], #84]\n\t"
3834         "ldr	r4, [%[a], #88]\n\t"
3835         "ldr	r5, [%[a], #92]\n\t"
3836         "ldr	r6, [%[b], #88]\n\t"
3837         "ldr	r8, [%[b], #92]\n\t"
3838         "sbcs	r4, r4, r6\n\t"
3839         "sbcs	r5, r5, r8\n\t"
3840         "str	r4, [%[r], #88]\n\t"
3841         "str	r5, [%[r], #92]\n\t"
3842         "ldr	r4, [%[a], #96]\n\t"
3843         "ldr	r5, [%[a], #100]\n\t"
3844         "ldr	r6, [%[b], #96]\n\t"
3845         "ldr	r8, [%[b], #100]\n\t"
3846         "sbcs	r4, r4, r6\n\t"
3847         "sbcs	r5, r5, r8\n\t"
3848         "str	r4, [%[r], #96]\n\t"
3849         "str	r5, [%[r], #100]\n\t"
3850         "ldr	r4, [%[a], #104]\n\t"
3851         "ldr	r5, [%[a], #108]\n\t"
3852         "ldr	r6, [%[b], #104]\n\t"
3853         "ldr	r8, [%[b], #108]\n\t"
3854         "sbcs	r4, r4, r6\n\t"
3855         "sbcs	r5, r5, r8\n\t"
3856         "str	r4, [%[r], #104]\n\t"
3857         "str	r5, [%[r], #108]\n\t"
3858         "ldr	r4, [%[a], #112]\n\t"
3859         "ldr	r5, [%[a], #116]\n\t"
3860         "ldr	r6, [%[b], #112]\n\t"
3861         "ldr	r8, [%[b], #116]\n\t"
3862         "sbcs	r4, r4, r6\n\t"
3863         "sbcs	r5, r5, r8\n\t"
3864         "str	r4, [%[r], #112]\n\t"
3865         "str	r5, [%[r], #116]\n\t"
3866         "ldr	r4, [%[a], #120]\n\t"
3867         "ldr	r5, [%[a], #124]\n\t"
3868         "ldr	r6, [%[b], #120]\n\t"
3869         "ldr	r8, [%[b], #124]\n\t"
3870         "sbcs	r4, r4, r6\n\t"
3871         "sbcs	r5, r5, r8\n\t"
3872         "str	r4, [%[r], #120]\n\t"
3873         "str	r5, [%[r], #124]\n\t"
3874         "sbc	%[c], %[c], %[c]\n\t"
3875         "add	%[a], %[a], #0x80\n\t"
3876         "add	%[b], %[b], #0x80\n\t"
3877         "add	%[r], %[r], #0x80\n\t"
3878         "mov	r6, #0\n\t"
3879         "sub	r6, r6, %[c]\n\t"
3880         "ldr	r4, [%[a], #0]\n\t"
3881         "ldr	r5, [%[a], #4]\n\t"
3882         "ldr	r6, [%[b], #0]\n\t"
3883         "ldr	r8, [%[b], #4]\n\t"
3884         "sbcs	r4, r4, r6\n\t"
3885         "sbcs	r5, r5, r8\n\t"
3886         "str	r4, [%[r], #0]\n\t"
3887         "str	r5, [%[r], #4]\n\t"
3888         "ldr	r4, [%[a], #8]\n\t"
3889         "ldr	r5, [%[a], #12]\n\t"
3890         "ldr	r6, [%[b], #8]\n\t"
3891         "ldr	r8, [%[b], #12]\n\t"
3892         "sbcs	r4, r4, r6\n\t"
3893         "sbcs	r5, r5, r8\n\t"
3894         "str	r4, [%[r], #8]\n\t"
3895         "str	r5, [%[r], #12]\n\t"
3896         "ldr	r4, [%[a], #16]\n\t"
3897         "ldr	r5, [%[a], #20]\n\t"
3898         "ldr	r6, [%[b], #16]\n\t"
3899         "ldr	r8, [%[b], #20]\n\t"
3900         "sbcs	r4, r4, r6\n\t"
3901         "sbcs	r5, r5, r8\n\t"
3902         "str	r4, [%[r], #16]\n\t"
3903         "str	r5, [%[r], #20]\n\t"
3904         "ldr	r4, [%[a], #24]\n\t"
3905         "ldr	r5, [%[a], #28]\n\t"
3906         "ldr	r6, [%[b], #24]\n\t"
3907         "ldr	r8, [%[b], #28]\n\t"
3908         "sbcs	r4, r4, r6\n\t"
3909         "sbcs	r5, r5, r8\n\t"
3910         "str	r4, [%[r], #24]\n\t"
3911         "str	r5, [%[r], #28]\n\t"
3912         "ldr	r4, [%[a], #32]\n\t"
3913         "ldr	r5, [%[a], #36]\n\t"
3914         "ldr	r6, [%[b], #32]\n\t"
3915         "ldr	r8, [%[b], #36]\n\t"
3916         "sbcs	r4, r4, r6\n\t"
3917         "sbcs	r5, r5, r8\n\t"
3918         "str	r4, [%[r], #32]\n\t"
3919         "str	r5, [%[r], #36]\n\t"
3920         "ldr	r4, [%[a], #40]\n\t"
3921         "ldr	r5, [%[a], #44]\n\t"
3922         "ldr	r6, [%[b], #40]\n\t"
3923         "ldr	r8, [%[b], #44]\n\t"
3924         "sbcs	r4, r4, r6\n\t"
3925         "sbcs	r5, r5, r8\n\t"
3926         "str	r4, [%[r], #40]\n\t"
3927         "str	r5, [%[r], #44]\n\t"
3928         "ldr	r4, [%[a], #48]\n\t"
3929         "ldr	r5, [%[a], #52]\n\t"
3930         "ldr	r6, [%[b], #48]\n\t"
3931         "ldr	r8, [%[b], #52]\n\t"
3932         "sbcs	r4, r4, r6\n\t"
3933         "sbcs	r5, r5, r8\n\t"
3934         "str	r4, [%[r], #48]\n\t"
3935         "str	r5, [%[r], #52]\n\t"
3936         "ldr	r4, [%[a], #56]\n\t"
3937         "ldr	r5, [%[a], #60]\n\t"
3938         "ldr	r6, [%[b], #56]\n\t"
3939         "ldr	r8, [%[b], #60]\n\t"
3940         "sbcs	r4, r4, r6\n\t"
3941         "sbcs	r5, r5, r8\n\t"
3942         "str	r4, [%[r], #56]\n\t"
3943         "str	r5, [%[r], #60]\n\t"
3944         "ldr	r4, [%[a], #64]\n\t"
3945         "ldr	r5, [%[a], #68]\n\t"
3946         "ldr	r6, [%[b], #64]\n\t"
3947         "ldr	r8, [%[b], #68]\n\t"
3948         "sbcs	r4, r4, r6\n\t"
3949         "sbcs	r5, r5, r8\n\t"
3950         "str	r4, [%[r], #64]\n\t"
3951         "str	r5, [%[r], #68]\n\t"
3952         "ldr	r4, [%[a], #72]\n\t"
3953         "ldr	r5, [%[a], #76]\n\t"
3954         "ldr	r6, [%[b], #72]\n\t"
3955         "ldr	r8, [%[b], #76]\n\t"
3956         "sbcs	r4, r4, r6\n\t"
3957         "sbcs	r5, r5, r8\n\t"
3958         "str	r4, [%[r], #72]\n\t"
3959         "str	r5, [%[r], #76]\n\t"
3960         "ldr	r4, [%[a], #80]\n\t"
3961         "ldr	r5, [%[a], #84]\n\t"
3962         "ldr	r6, [%[b], #80]\n\t"
3963         "ldr	r8, [%[b], #84]\n\t"
3964         "sbcs	r4, r4, r6\n\t"
3965         "sbcs	r5, r5, r8\n\t"
3966         "str	r4, [%[r], #80]\n\t"
3967         "str	r5, [%[r], #84]\n\t"
3968         "ldr	r4, [%[a], #88]\n\t"
3969         "ldr	r5, [%[a], #92]\n\t"
3970         "ldr	r6, [%[b], #88]\n\t"
3971         "ldr	r8, [%[b], #92]\n\t"
3972         "sbcs	r4, r4, r6\n\t"
3973         "sbcs	r5, r5, r8\n\t"
3974         "str	r4, [%[r], #88]\n\t"
3975         "str	r5, [%[r], #92]\n\t"
3976         "ldr	r4, [%[a], #96]\n\t"
3977         "ldr	r5, [%[a], #100]\n\t"
3978         "ldr	r6, [%[b], #96]\n\t"
3979         "ldr	r8, [%[b], #100]\n\t"
3980         "sbcs	r4, r4, r6\n\t"
3981         "sbcs	r5, r5, r8\n\t"
3982         "str	r4, [%[r], #96]\n\t"
3983         "str	r5, [%[r], #100]\n\t"
3984         "ldr	r4, [%[a], #104]\n\t"
3985         "ldr	r5, [%[a], #108]\n\t"
3986         "ldr	r6, [%[b], #104]\n\t"
3987         "ldr	r8, [%[b], #108]\n\t"
3988         "sbcs	r4, r4, r6\n\t"
3989         "sbcs	r5, r5, r8\n\t"
3990         "str	r4, [%[r], #104]\n\t"
3991         "str	r5, [%[r], #108]\n\t"
3992         "ldr	r4, [%[a], #112]\n\t"
3993         "ldr	r5, [%[a], #116]\n\t"
3994         "ldr	r6, [%[b], #112]\n\t"
3995         "ldr	r8, [%[b], #116]\n\t"
3996         "sbcs	r4, r4, r6\n\t"
3997         "sbcs	r5, r5, r8\n\t"
3998         "str	r4, [%[r], #112]\n\t"
3999         "str	r5, [%[r], #116]\n\t"
4000         "ldr	r4, [%[a], #120]\n\t"
4001         "ldr	r5, [%[a], #124]\n\t"
4002         "ldr	r6, [%[b], #120]\n\t"
4003         "ldr	r8, [%[b], #124]\n\t"
4004         "sbcs	r4, r4, r6\n\t"
4005         "sbcs	r5, r5, r8\n\t"
4006         "str	r4, [%[r], #120]\n\t"
4007         "str	r5, [%[r], #124]\n\t"
4008         "sbc	%[c], %[c], %[c]\n\t"
4009         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
4010         :
4011         : "memory", "r4", "r5", "r6", "r8"
4012     );
4013 
4014     return c;
4015 }
4016 
4017 #endif /* WOLFSSL_SP_SMALL */
4018 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
4019  *
4020  * d1   The high order half of the number to divide.
4021  * d0   The low order half of the number to divide.
4022  * div  The dividend.
4023  * returns the result of the division.
4024  *
4025  * Note that this is an approximate div. It may give an answer 1 larger.
4026  */
div_2048_word_64(sp_digit d1,sp_digit d0,sp_digit div)4027 SP_NOINLINE static sp_digit div_2048_word_64(sp_digit d1, sp_digit d0,
4028         sp_digit div)
4029 {
4030     sp_digit r = 0;
4031 
4032     __asm__ __volatile__ (
4033         "lsr	r6, %[div], #16\n\t"
4034         "add	r6, r6, #1\n\t"
4035         "udiv	r4, %[d1], r6\n\t"
4036         "lsl	r8, r4, #16\n\t"
4037         "umull	r4, r5, %[div], r8\n\t"
4038         "subs	%[d0], %[d0], r4\n\t"
4039         "sbc	%[d1], %[d1], r5\n\t"
4040         "udiv	r5, %[d1], r6\n\t"
4041         "lsl	r4, r5, #16\n\t"
4042         "add	r8, r8, r4\n\t"
4043         "umull	r4, r5, %[div], r4\n\t"
4044         "subs	%[d0], %[d0], r4\n\t"
4045         "sbc	%[d1], %[d1], r5\n\t"
4046         "lsl	r4, %[d1], #16\n\t"
4047         "orr	r4, r4, %[d0], lsr #16\n\t"
4048         "udiv	r4, r4, r6\n\t"
4049         "add	r8, r8, r4\n\t"
4050         "umull	r4, r5, %[div], r4\n\t"
4051         "subs	%[d0], %[d0], r4\n\t"
4052         "sbc	%[d1], %[d1], r5\n\t"
4053         "lsl	r4, %[d1], #16\n\t"
4054         "orr	r4, r4, %[d0], lsr #16\n\t"
4055         "udiv	r4, r4, r6\n\t"
4056         "add	r8, r8, r4\n\t"
4057         "umull	r4, r5, %[div], r4\n\t"
4058         "subs	%[d0], %[d0], r4\n\t"
4059         "sbc	%[d1], %[d1], r5\n\t"
4060         "udiv	r4, %[d0], %[div]\n\t"
4061         "add	r8, r8, r4\n\t"
4062         "mov	%[r], r8\n\t"
4063         : [r] "+r" (r)
4064         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
4065         : "r4", "r5", "r6", "r8"
4066     );
4067     return r;
4068 }
4069 
4070 /* Divide d in a and put remainder into r (m*d + r = a)
4071  * m is not calculated as it is not needed at this time.
4072  *
4073  * a  Number to be divided.
4074  * d  Number to divide with.
4075  * m  Multiplier result.
4076  * r  Remainder from the division.
4077  * returns MP_OKAY indicating success.
4078  */
sp_2048_div_64_cond(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)4079 static WC_INLINE int sp_2048_div_64_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
4080         sp_digit* r)
4081 {
4082     sp_digit t1[128], t2[65];
4083     sp_digit div, r1;
4084     int i;
4085 
4086     (void)m;
4087 
4088     div = d[63];
4089     XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
4090     for (i=63; i>=0; i--) {
4091         sp_digit hi = t1[64 + i] - (t1[64 + i] == div);
4092         r1 = div_2048_word_64(hi, t1[64 + i - 1], div);
4093 
4094         sp_2048_mul_d_64(t2, d, r1);
4095         t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
4096         t1[64 + i] -= t2[64];
4097         if (t1[64 + i] != 0) {
4098             t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
4099             if (t1[64 + i] != 0)
4100                 t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], d);
4101         }
4102     }
4103 
4104     for (i = 63; i > 0; i--) {
4105         if (t1[i] != d[i])
4106             break;
4107     }
4108     if (t1[i] >= d[i]) {
4109         sp_2048_sub_64(r, t1, d);
4110     }
4111     else {
4112         XMEMCPY(r, t1, sizeof(*t1) * 64);
4113     }
4114 
4115     return MP_OKAY;
4116 }
4117 
4118 /* Reduce a modulo m into r. (r = a mod m)
4119  *
4120  * r  A single precision number that is the reduced result.
4121  * a  A single precision number that is to be reduced.
4122  * m  A single precision number that is the modulus to reduce with.
4123  * returns MP_OKAY indicating success.
4124  */
sp_2048_mod_64_cond(sp_digit * r,const sp_digit * a,const sp_digit * m)4125 static WC_INLINE int sp_2048_mod_64_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
4126 {
4127     return sp_2048_div_64_cond(a, m, NULL, r);
4128 }
4129 
4130 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
4131 /* AND m into each word of a and store in r.
4132  *
4133  * r  A single precision integer.
4134  * a  A single precision integer.
4135  * m  Mask to AND against each digit.
4136  */
sp_2048_mask_64(sp_digit * r,const sp_digit * a,sp_digit m)4137 static void sp_2048_mask_64(sp_digit* r, const sp_digit* a, sp_digit m)
4138 {
4139 #ifdef WOLFSSL_SP_SMALL
4140     int i;
4141 
4142     for (i=0; i<64; i++) {
4143         r[i] = a[i] & m;
4144     }
4145 #else
4146     int i;
4147 
4148     for (i = 0; i < 64; i += 8) {
4149         r[i+0] = a[i+0] & m;
4150         r[i+1] = a[i+1] & m;
4151         r[i+2] = a[i+2] & m;
4152         r[i+3] = a[i+3] & m;
4153         r[i+4] = a[i+4] & m;
4154         r[i+5] = a[i+5] & m;
4155         r[i+6] = a[i+6] & m;
4156         r[i+7] = a[i+7] & m;
4157     }
4158 #endif
4159 }
4160 
4161 /* Compare a with b in constant time.
4162  *
4163  * a  A single precision integer.
4164  * b  A single precision integer.
4165  * return -ve, 0 or +ve if a is less than, equal to or greater than b
4166  * respectively.
4167  */
sp_2048_cmp_64(const sp_digit * a,const sp_digit * b)4168 SP_NOINLINE static sp_int32 sp_2048_cmp_64(const sp_digit* a, const sp_digit* b)
4169 {
4170     sp_digit r = 0;
4171 
4172 
4173     __asm__ __volatile__ (
4174         "mov	r3, #0\n\t"
4175         "mvn	r3, r3\n\t"
4176         "mov	r6, #252\n\t"
4177         "\n1:\n\t"
4178         "ldr	r8, [%[a], r6]\n\t"
4179         "ldr	r5, [%[b], r6]\n\t"
4180         "and	r8, r8, r3\n\t"
4181         "and	r5, r5, r3\n\t"
4182         "mov	r4, r8\n\t"
4183         "subs	r8, r8, r5\n\t"
4184         "sbc	r8, r8, r8\n\t"
4185         "add	%[r], %[r], r8\n\t"
4186         "mvn	r8, r8\n\t"
4187         "and	r3, r3, r8\n\t"
4188         "subs	r5, r5, r4\n\t"
4189         "sbc	r8, r8, r8\n\t"
4190         "sub	%[r], %[r], r8\n\t"
4191         "mvn	r8, r8\n\t"
4192         "and	r3, r3, r8\n\t"
4193         "sub	r6, r6, #4\n\t"
4194         "cmp	r6, #0\n\t"
4195 #ifdef __GNUC__
4196         "bge	1b\n\t"
4197 #else
4198         "bge.n	1b\n\t"
4199 #endif /* __GNUC__ */
4200         : [r] "+r" (r)
4201         : [a] "r" (a), [b] "r" (b)
4202         : "r3", "r4", "r5", "r6", "r8"
4203     );
4204 
4205     return r;
4206 }
4207 
4208 /* Divide d in a and put remainder into r (m*d + r = a)
4209  * m is not calculated as it is not needed at this time.
4210  *
4211  * a  Number to be divided.
4212  * d  Number to divide with.
4213  * m  Multiplier result.
4214  * r  Remainder from the division.
4215  * returns MP_OKAY indicating success.
4216  */
sp_2048_div_64(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)4217 static WC_INLINE int sp_2048_div_64(const sp_digit* a, const sp_digit* d, sp_digit* m,
4218         sp_digit* r)
4219 {
4220     sp_digit t1[128], t2[65];
4221     sp_digit div, r1;
4222     int i;
4223 
4224     (void)m;
4225 
4226     div = d[63];
4227     XMEMCPY(t1, a, sizeof(*t1) * 2 * 64);
4228     for (i=63; i>=0; i--) {
4229         sp_digit hi = t1[64 + i] - (t1[64 + i] == div);
4230         r1 = div_2048_word_64(hi, t1[64 + i - 1], div);
4231 
4232         sp_2048_mul_d_64(t2, d, r1);
4233         t1[64 + i] += sp_2048_sub_in_place_64(&t1[i], t2);
4234         t1[64 + i] -= t2[64];
4235         sp_2048_mask_64(t2, d, t1[64 + i]);
4236         t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
4237         sp_2048_mask_64(t2, d, t1[64 + i]);
4238         t1[64 + i] += sp_2048_add_64(&t1[i], &t1[i], t2);
4239     }
4240 
4241     r1 = sp_2048_cmp_64(t1, d) >= 0;
4242     sp_2048_cond_sub_64(r, t1, d, (sp_digit)0 - r1);
4243 
4244     return MP_OKAY;
4245 }
4246 
4247 /* Reduce a modulo m into r. (r = a mod m)
4248  *
4249  * r  A single precision number that is the reduced result.
4250  * a  A single precision number that is to be reduced.
4251  * m  A single precision number that is the modulus to reduce with.
4252  * returns MP_OKAY indicating success.
4253  */
sp_2048_mod_64(sp_digit * r,const sp_digit * a,const sp_digit * m)4254 static WC_INLINE int sp_2048_mod_64(sp_digit* r, const sp_digit* a, const sp_digit* m)
4255 {
4256     return sp_2048_div_64(a, m, NULL, r);
4257 }
4258 
4259 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
4260                                                      defined(WOLFSSL_HAVE_SP_DH)
4261 #ifdef WOLFSSL_SP_SMALL
4262 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
4263  *
4264  * r     A single precision number that is the result of the operation.
4265  * a     A single precision number being exponentiated.
4266  * e     A single precision number that is the exponent.
4267  * bits  The number of bits in the exponent.
4268  * m     A single precision number that is the modulus.
4269  * returns  0 on success.
4270  * returns  MEMORY_E on dynamic memory allocation failure.
4271  * returns  MP_VAL when base is even or exponent is 0.
4272  */
sp_2048_mod_exp_64(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)4273 static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
4274         int bits, const sp_digit* m, int reduceA)
4275 {
4276 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4277     sp_digit* td = NULL;
4278 #else
4279     sp_digit td[8 * 128];
4280 #endif
4281     sp_digit* t[8];
4282     sp_digit* norm = NULL;
4283     sp_digit mp = 1;
4284     sp_digit n;
4285     sp_digit mask;
4286     int i;
4287     int c;
4288     byte y;
4289     int err = MP_OKAY;
4290 
4291     if ((m[0] & 1) == 0) {
4292         err = MP_VAL;
4293     }
4294     else if (bits == 0) {
4295         err = MP_VAL;
4296     }
4297 
4298 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4299     if (err == MP_OKAY) {
4300         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 128), NULL,
4301                                 DYNAMIC_TYPE_TMP_BUFFER);
4302         if (td == NULL)
4303             err = MEMORY_E;
4304     }
4305 #endif
4306 
4307     if (err == MP_OKAY) {
4308         norm = td;
4309         for (i=0; i<8; i++) {
4310             t[i] = td + i * 128;
4311         }
4312 
4313         sp_2048_mont_setup(m, &mp);
4314         sp_2048_mont_norm_64(norm, m);
4315 
4316         XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
4317         if (reduceA != 0) {
4318             err = sp_2048_mod_64(t[1] + 64, a, m);
4319             if (err == MP_OKAY) {
4320                 err = sp_2048_mod_64(t[1], t[1], m);
4321             }
4322         }
4323         else {
4324             XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
4325             err = sp_2048_mod_64(t[1], t[1], m);
4326         }
4327     }
4328 
4329     if (err == MP_OKAY) {
4330         sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
4331         sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
4332         sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
4333         sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
4334         sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
4335         sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
4336 
4337         i = (bits - 1) / 32;
4338         n = e[i--];
4339         c = bits & 31;
4340         if (c == 0) {
4341             c = 32;
4342         }
4343         c -= bits % 3;
4344         if (c == 32) {
4345             c = 29;
4346         }
4347         if (c < 0) {
4348             /* Number of bits in top word is less than number needed. */
4349             c = -c;
4350             y = (byte)(n << c);
4351             n = e[i--];
4352             y |= (byte)(n >> (64 - c));
4353             n <<= c;
4354             c = 64 - c;
4355         }
4356         else {
4357             y = (byte)(n >> c);
4358             n <<= 32 - c;
4359         }
4360         XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
4361         for (; i>=0 || c>=3; ) {
4362             if (c == 0) {
4363                 n = e[i--];
4364                 y = (byte)(n >> 29);
4365                 n <<= 3;
4366                 c = 29;
4367             }
4368             else if (c < 3) {
4369                 y = (byte)(n >> 29);
4370                 n = e[i--];
4371                 c = 3 - c;
4372                 y |= (byte)(n >> (32 - c));
4373                 n <<= c;
4374                 c = 32 - c;
4375             }
4376             else {
4377                 y = (byte)((n >> 29) & 0x7);
4378                 n <<= 3;
4379                 c -= 3;
4380             }
4381 
4382             sp_2048_mont_sqr_64(r, r, m, mp);
4383             sp_2048_mont_sqr_64(r, r, m, mp);
4384             sp_2048_mont_sqr_64(r, r, m, mp);
4385 
4386             sp_2048_mont_mul_64(r, r, t[y], m, mp);
4387         }
4388 
4389         XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
4390         sp_2048_mont_reduce_64(r, m, mp);
4391 
4392         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
4393         sp_2048_cond_sub_64(r, r, m, mask);
4394     }
4395 
4396 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4397     if (td != NULL)
4398         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4399 #endif
4400 
4401     return err;
4402 }
4403 #else
4404 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
4405  *
4406  * r     A single precision number that is the result of the operation.
4407  * a     A single precision number being exponentiated.
4408  * e     A single precision number that is the exponent.
4409  * bits  The number of bits in the exponent.
4410  * m     A single precision number that is the modulus.
4411  * returns  0 on success.
4412  * returns  MEMORY_E on dynamic memory allocation failure.
4413  * returns  MP_VAL when base is even or exponent is 0.
4414  */
sp_2048_mod_exp_64(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)4415 static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e,
4416         int bits, const sp_digit* m, int reduceA)
4417 {
4418 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4419     sp_digit* td = NULL;
4420 #else
4421     sp_digit td[16 * 128];
4422 #endif
4423     sp_digit* t[16];
4424     sp_digit* norm = NULL;
4425     sp_digit mp = 1;
4426     sp_digit n;
4427     sp_digit mask;
4428     int i;
4429     int c;
4430     byte y;
4431     int err = MP_OKAY;
4432 
4433     if ((m[0] & 1) == 0) {
4434         err = MP_VAL;
4435     }
4436     else if (bits == 0) {
4437         err = MP_VAL;
4438     }
4439 
4440 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4441     if (err == MP_OKAY) {
4442         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 128), NULL,
4443                                 DYNAMIC_TYPE_TMP_BUFFER);
4444         if (td == NULL)
4445             err = MEMORY_E;
4446     }
4447 #endif
4448 
4449     if (err == MP_OKAY) {
4450         norm = td;
4451         for (i=0; i<16; i++) {
4452             t[i] = td + i * 128;
4453         }
4454 
4455         sp_2048_mont_setup(m, &mp);
4456         sp_2048_mont_norm_64(norm, m);
4457 
4458         XMEMSET(t[1], 0, sizeof(sp_digit) * 64U);
4459         if (reduceA != 0) {
4460             err = sp_2048_mod_64(t[1] + 64, a, m);
4461             if (err == MP_OKAY) {
4462                 err = sp_2048_mod_64(t[1], t[1], m);
4463             }
4464         }
4465         else {
4466             XMEMCPY(t[1] + 64, a, sizeof(sp_digit) * 64);
4467             err = sp_2048_mod_64(t[1], t[1], m);
4468         }
4469     }
4470 
4471     if (err == MP_OKAY) {
4472         sp_2048_mont_sqr_64(t[ 2], t[ 1], m, mp);
4473         sp_2048_mont_mul_64(t[ 3], t[ 2], t[ 1], m, mp);
4474         sp_2048_mont_sqr_64(t[ 4], t[ 2], m, mp);
4475         sp_2048_mont_mul_64(t[ 5], t[ 3], t[ 2], m, mp);
4476         sp_2048_mont_sqr_64(t[ 6], t[ 3], m, mp);
4477         sp_2048_mont_mul_64(t[ 7], t[ 4], t[ 3], m, mp);
4478         sp_2048_mont_sqr_64(t[ 8], t[ 4], m, mp);
4479         sp_2048_mont_mul_64(t[ 9], t[ 5], t[ 4], m, mp);
4480         sp_2048_mont_sqr_64(t[10], t[ 5], m, mp);
4481         sp_2048_mont_mul_64(t[11], t[ 6], t[ 5], m, mp);
4482         sp_2048_mont_sqr_64(t[12], t[ 6], m, mp);
4483         sp_2048_mont_mul_64(t[13], t[ 7], t[ 6], m, mp);
4484         sp_2048_mont_sqr_64(t[14], t[ 7], m, mp);
4485         sp_2048_mont_mul_64(t[15], t[ 8], t[ 7], m, mp);
4486 
4487         i = (bits - 1) / 32;
4488         n = e[i--];
4489         c = bits & 31;
4490         if (c == 0) {
4491             c = 32;
4492         }
4493         c -= bits % 4;
4494         if (c == 32) {
4495             c = 28;
4496         }
4497         if (c < 0) {
4498             /* Number of bits in top word is less than number needed. */
4499             c = -c;
4500             y = (byte)(n << c);
4501             n = e[i--];
4502             y |= (byte)(n >> (64 - c));
4503             n <<= c;
4504             c = 64 - c;
4505         }
4506         else {
4507             y = (byte)(n >> c);
4508             n <<= 32 - c;
4509         }
4510         XMEMCPY(r, t[y], sizeof(sp_digit) * 64);
4511         for (; i>=0 || c>=4; ) {
4512             if (c == 0) {
4513                 n = e[i--];
4514                 y = (byte)(n >> 28);
4515                 n <<= 4;
4516                 c = 28;
4517             }
4518             else if (c < 4) {
4519                 y = (byte)(n >> 28);
4520                 n = e[i--];
4521                 c = 4 - c;
4522                 y |= (byte)(n >> (32 - c));
4523                 n <<= c;
4524                 c = 32 - c;
4525             }
4526             else {
4527                 y = (byte)((n >> 28) & 0xf);
4528                 n <<= 4;
4529                 c -= 4;
4530             }
4531 
4532             sp_2048_mont_sqr_64(r, r, m, mp);
4533             sp_2048_mont_sqr_64(r, r, m, mp);
4534             sp_2048_mont_sqr_64(r, r, m, mp);
4535             sp_2048_mont_sqr_64(r, r, m, mp);
4536 
4537             sp_2048_mont_mul_64(r, r, t[y], m, mp);
4538         }
4539 
4540         XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
4541         sp_2048_mont_reduce_64(r, m, mp);
4542 
4543         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
4544         sp_2048_cond_sub_64(r, r, m, mask);
4545     }
4546 
4547 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4548     if (td != NULL)
4549         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
4550 #endif
4551 
4552     return err;
4553 }
4554 #endif /* WOLFSSL_SP_SMALL */
4555 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
4556 
4557 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
4558 #ifdef WOLFSSL_HAVE_SP_RSA
4559 /* RSA public key operation.
4560  *
4561  * in      Array of bytes representing the number to exponentiate, base.
4562  * inLen   Number of bytes in base.
4563  * em      Public exponent.
4564  * mm      Modulus.
4565  * out     Buffer to hold big-endian bytes of exponentiation result.
4566  *         Must be at least 256 bytes long.
4567  * outLen  Number of bytes in result.
4568  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
4569  * an array is too long and MEMORY_E when dynamic memory allocation fails.
4570  */
sp_RsaPublic_2048(const byte * in,word32 inLen,const mp_int * em,const mp_int * mm,byte * out,word32 * outLen)4571 int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em,
4572     const mp_int* mm, byte* out, word32* outLen)
4573 {
4574 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4575     sp_digit* a = NULL;
4576 #else
4577     sp_digit a[64 * 5];
4578 #endif
4579     sp_digit* m = NULL;
4580     sp_digit* r = NULL;
4581     sp_digit *ah = NULL;
4582     sp_digit e[1] = {0};
4583     int err = MP_OKAY;
4584 
4585     if (*outLen < 256) {
4586         err = MP_TO_E;
4587     }
4588     else if (mp_count_bits(em) > 32 || inLen > 256 ||
4589                                                      mp_count_bits(mm) != 2048) {
4590         err = MP_READ_E;
4591     }
4592     else if (mp_iseven(mm)) {
4593         err = MP_VAL;
4594     }
4595 
4596 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4597     if (err == MP_OKAY) {
4598         a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 5, NULL,
4599                                                               DYNAMIC_TYPE_RSA);
4600         if (a == NULL)
4601             err = MEMORY_E;
4602     }
4603 #endif
4604 
4605     if (err == MP_OKAY) {
4606         r = a + 64 * 2;
4607         m = r + 64 * 2;
4608         ah = a + 64;
4609 
4610         sp_2048_from_bin(ah, 64, in, inLen);
4611 #if DIGIT_BIT >= 32
4612         e[0] = em->dp[0];
4613 #else
4614         e[0] = em->dp[0];
4615         if (em->used > 1) {
4616             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
4617         }
4618 #endif
4619         if (e[0] == 0) {
4620             err = MP_EXPTMOD_E;
4621         }
4622     }
4623     if (err == MP_OKAY) {
4624         sp_2048_from_mp(m, 64, mm);
4625 
4626         if (e[0] == 0x3) {
4627             if (err == MP_OKAY) {
4628                 sp_2048_sqr_64(r, ah);
4629                 err = sp_2048_mod_64_cond(r, r, m);
4630             }
4631             if (err == MP_OKAY) {
4632                 sp_2048_mul_64(r, ah, r);
4633                 err = sp_2048_mod_64_cond(r, r, m);
4634             }
4635         }
4636         else {
4637             int i;
4638             sp_digit mp;
4639 
4640             sp_2048_mont_setup(m, &mp);
4641 
4642             /* Convert to Montgomery form. */
4643             XMEMSET(a, 0, sizeof(sp_digit) * 64);
4644             err = sp_2048_mod_64_cond(a, a, m);
4645 
4646             if (err == MP_OKAY) {
4647                 for (i = 31; i >= 0; i--) {
4648                     if (e[0] >> i) {
4649                         break;
4650                     }
4651                 }
4652 
4653                 XMEMCPY(r, a, sizeof(sp_digit) * 64);
4654                 for (i--; i>=0; i--) {
4655                     sp_2048_mont_sqr_64(r, r, m, mp);
4656                     if (((e[0] >> i) & 1) == 1) {
4657                         sp_2048_mont_mul_64(r, r, a, m, mp);
4658                     }
4659                 }
4660                 XMEMSET(&r[64], 0, sizeof(sp_digit) * 64);
4661                 sp_2048_mont_reduce_64(r, m, mp);
4662 
4663                 for (i = 63; i > 0; i--) {
4664                     if (r[i] != m[i]) {
4665                         break;
4666                     }
4667                 }
4668                 if (r[i] >= m[i]) {
4669                     sp_2048_sub_in_place_64(r, m);
4670                 }
4671             }
4672         }
4673     }
4674 
4675     if (err == MP_OKAY) {
4676         sp_2048_to_bin_64(r, out);
4677         *outLen = 256;
4678     }
4679 
4680 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4681     if (a != NULL)
4682         XFREE(a, NULL, DYNAMIC_TYPE_RSA);
4683 #endif
4684 
4685     return err;
4686 }
4687 
4688 #ifndef WOLFSSL_RSA_PUBLIC_ONLY
4689 /* Conditionally add a and b using the mask m.
4690  * m is -1 to add and 0 when not.
4691  *
4692  * r  A single precision number representing conditional add result.
4693  * a  A single precision number to add with.
4694  * b  A single precision number to add.
4695  * m  Mask value to apply.
4696  */
sp_2048_cond_add_32(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)4697 SP_NOINLINE static sp_digit sp_2048_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
4698         sp_digit m)
4699 {
4700     sp_digit c = 0;
4701 
4702     __asm__ __volatile__ (
4703         "mov	r5, #128\n\t"
4704         "mov	r9, r5\n\t"
4705         "mov	r8, #0\n\t"
4706         "\n1:\n\t"
4707         "ldr	r6, [%[b], r8]\n\t"
4708         "and	r6, r6, %[m]\n\t"
4709         "adds	r5, %[c], #-1\n\t"
4710         "ldr	r5, [%[a], r8]\n\t"
4711         "adcs	r5, r5, r6\n\t"
4712         "mov	%[c], #0\n\t"
4713         "adcs	%[c], %[c], %[c]\n\t"
4714         "str	r5, [%[r], r8]\n\t"
4715         "add	r8, r8, #4\n\t"
4716         "cmp	r8, r9\n\t"
4717 #ifdef __GNUC__
4718         "blt	1b\n\t"
4719 #else
4720         "blt.n	1b\n\t"
4721 #endif /* __GNUC__ */
4722         : [c] "+r" (c)
4723         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
4724         : "memory", "r5", "r6", "r8", "r9"
4725     );
4726 
4727     return c;
4728 }
4729 
4730 /* RSA private key operation.
4731  *
4732  * in      Array of bytes representing the number to exponentiate, base.
4733  * inLen   Number of bytes in base.
4734  * dm      Private exponent.
4735  * pm      First prime.
4736  * qm      Second prime.
4737  * dpm     First prime's CRT exponent.
4738  * dqm     Second prime's CRT exponent.
4739  * qim     Inverse of second prime mod p.
4740  * mm      Modulus.
4741  * out     Buffer to hold big-endian bytes of exponentiation result.
4742  *         Must be at least 256 bytes long.
4743  * outLen  Number of bytes in result.
4744  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
4745  * an array is too long and MEMORY_E when dynamic memory allocation fails.
4746  */
sp_RsaPrivate_2048(const byte * in,word32 inLen,const mp_int * dm,const mp_int * pm,const mp_int * qm,const mp_int * dpm,const mp_int * dqm,const mp_int * qim,const mp_int * mm,byte * out,word32 * outLen)4747 int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm,
4748     const mp_int* pm, const mp_int* qm, const mp_int* dpm, const mp_int* dqm,
4749     const mp_int* qim, const mp_int* mm, byte* out, word32* outLen)
4750 {
4751 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
4752 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4753     sp_digit* d = NULL;
4754 #else
4755     sp_digit  d[64 * 4];
4756 #endif
4757     sp_digit* a = NULL;
4758     sp_digit* m = NULL;
4759     sp_digit* r = NULL;
4760     int err = MP_OKAY;
4761 
4762     (void)pm;
4763     (void)qm;
4764     (void)dpm;
4765     (void)dqm;
4766     (void)qim;
4767 
4768     if (*outLen < 256U) {
4769         err = MP_TO_E;
4770     }
4771     if (err == MP_OKAY) {
4772         if (mp_count_bits(dm) > 2048) {
4773            err = MP_READ_E;
4774         }
4775         else if (inLen > 256) {
4776             err = MP_READ_E;
4777         }
4778         else if (mp_count_bits(mm) != 2048) {
4779             err = MP_READ_E;
4780         }
4781         else if (mp_iseven(mm)) {
4782             err = MP_VAL;
4783         }
4784     }
4785 
4786 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4787     if (err == MP_OKAY) {
4788         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 4, NULL,
4789                                                               DYNAMIC_TYPE_RSA);
4790         if (d == NULL)
4791             err = MEMORY_E;
4792     }
4793 #endif
4794 
4795     if (err == MP_OKAY) {
4796         a = d + 64;
4797         m = a + 128;
4798         r = a;
4799 
4800         sp_2048_from_bin(a, 64, in, inLen);
4801         sp_2048_from_mp(d, 64, dm);
4802         sp_2048_from_mp(m, 64, mm);
4803         err = sp_2048_mod_exp_64(r, a, d, 2048, m, 0);
4804     }
4805 
4806     if (err == MP_OKAY) {
4807         sp_2048_to_bin_64(r, out);
4808         *outLen = 256;
4809     }
4810 
4811 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4812     if (d != NULL)
4813 #endif
4814     {
4815         /* only "a" and "r" are sensitive and need zeroized (same pointer) */
4816         if (a != NULL)
4817             ForceZero(a, sizeof(sp_digit) * 64);
4818 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4819         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
4820 #endif
4821     }
4822 
4823     return err;
4824 #else
4825 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4826     sp_digit* a = NULL;
4827 #else
4828     sp_digit a[32 * 11];
4829 #endif
4830     sp_digit* p = NULL;
4831     sp_digit* q = NULL;
4832     sp_digit* dp = NULL;
4833     sp_digit* tmpa = NULL;
4834     sp_digit* tmpb = NULL;
4835     sp_digit* r = NULL;
4836     sp_digit* qi = NULL;
4837     sp_digit* dq = NULL;
4838     sp_digit c;
4839     int err = MP_OKAY;
4840 
4841     (void)dm;
4842     (void)mm;
4843 
4844     if (*outLen < 256) {
4845         err = MP_TO_E;
4846     }
4847     else if (inLen > 256 || mp_count_bits(mm) != 2048) {
4848         err = MP_READ_E;
4849     }
4850     else if (mp_iseven(mm)) {
4851         err = MP_VAL;
4852     }
4853 
4854 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4855     if (err == MP_OKAY) {
4856         a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 11, NULL,
4857                                                               DYNAMIC_TYPE_RSA);
4858         if (a == NULL)
4859             err = MEMORY_E;
4860     }
4861 #endif
4862 
4863     if (err == MP_OKAY) {
4864         p = a + 64 * 2;
4865         q = p + 32;
4866         qi = dq = dp = q + 32;
4867         tmpa = qi + 32;
4868         tmpb = tmpa + 64;
4869         r = a;
4870 
4871         sp_2048_from_bin(a, 64, in, inLen);
4872         sp_2048_from_mp(p, 32, pm);
4873         sp_2048_from_mp(q, 32, qm);
4874         sp_2048_from_mp(dp, 32, dpm);
4875 
4876         err = sp_2048_mod_exp_32(tmpa, a, dp, 1024, p, 1);
4877     }
4878     if (err == MP_OKAY) {
4879         sp_2048_from_mp(dq, 32, dqm);
4880         err = sp_2048_mod_exp_32(tmpb, a, dq, 1024, q, 1);
4881     }
4882 
4883     if (err == MP_OKAY) {
4884         c = sp_2048_sub_in_place_32(tmpa, tmpb);
4885         c += sp_2048_cond_add_32(tmpa, tmpa, p, c);
4886         sp_2048_cond_add_32(tmpa, tmpa, p, c);
4887 
4888         sp_2048_from_mp(qi, 32, qim);
4889         sp_2048_mul_32(tmpa, tmpa, qi);
4890         err = sp_2048_mod_32(tmpa, tmpa, p);
4891     }
4892 
4893     if (err == MP_OKAY) {
4894         sp_2048_mul_32(tmpa, q, tmpa);
4895         XMEMSET(&tmpb[32], 0, sizeof(sp_digit) * 32);
4896         sp_2048_add_64(r, tmpb, tmpa);
4897 
4898         sp_2048_to_bin_64(r, out);
4899         *outLen = 256;
4900     }
4901 
4902 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4903     if (a != NULL)
4904 #endif
4905     {
4906         ForceZero(a, sizeof(sp_digit) * 32 * 11);
4907     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4908         XFREE(a, NULL, DYNAMIC_TYPE_RSA);
4909     #endif
4910     }
4911 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
4912     return err;
4913 }
4914 #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
4915 #endif /* WOLFSSL_HAVE_SP_RSA */
4916 #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
4917                                               !defined(WOLFSSL_RSA_PUBLIC_ONLY))
4918 /* Convert an array of sp_digit to an mp_int.
4919  *
4920  * a  A single precision integer.
4921  * r  A multi-precision integer.
4922  */
sp_2048_to_mp(const sp_digit * a,mp_int * r)4923 static int sp_2048_to_mp(const sp_digit* a, mp_int* r)
4924 {
4925     int err;
4926 
4927     err = mp_grow(r, (2048 + DIGIT_BIT - 1) / DIGIT_BIT);
4928     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
4929 #if DIGIT_BIT == 32
4930         XMEMCPY(r->dp, a, sizeof(sp_digit) * 64);
4931         r->used = 64;
4932         mp_clamp(r);
4933 #elif DIGIT_BIT < 32
4934         int i;
4935         int j = 0;
4936         int s = 0;
4937 
4938         r->dp[0] = 0;
4939         for (i = 0; i < 64; i++) {
4940             r->dp[j] |= (mp_digit)(a[i] << s);
4941             r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
4942             s = DIGIT_BIT - s;
4943             r->dp[++j] = (mp_digit)(a[i] >> s);
4944             while (s + DIGIT_BIT <= 32) {
4945                 s += DIGIT_BIT;
4946                 r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1;
4947                 if (s == SP_WORD_SIZE) {
4948                     r->dp[j] = 0;
4949                 }
4950                 else {
4951                     r->dp[j] = (mp_digit)(a[i] >> s);
4952                 }
4953             }
4954             s = 32 - s;
4955         }
4956         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
4957         mp_clamp(r);
4958 #else
4959         int i;
4960         int j = 0;
4961         int s = 0;
4962 
4963         r->dp[0] = 0;
4964         for (i = 0; i < 64; i++) {
4965             r->dp[j] |= ((mp_digit)a[i]) << s;
4966             if (s + 32 >= DIGIT_BIT) {
4967     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
4968                 r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
4969     #endif
4970                 s = DIGIT_BIT - s;
4971                 r->dp[++j] = a[i] >> s;
4972                 s = 32 - s;
4973             }
4974             else {
4975                 s += 32;
4976             }
4977         }
4978         r->used = (2048 + DIGIT_BIT - 1) / DIGIT_BIT;
4979         mp_clamp(r);
4980 #endif
4981     }
4982 
4983     return err;
4984 }
4985 
4986 /* Perform the modular exponentiation for Diffie-Hellman.
4987  *
4988  * base  Base. MP integer.
4989  * exp   Exponent. MP integer.
4990  * mod   Modulus. MP integer.
4991  * res   Result. MP integer.
4992  * returns 0 on success, MP_READ_E if there are too many bytes in an array
4993  * and MEMORY_E if memory allocation fails.
4994  */
sp_ModExp_2048(const mp_int * base,const mp_int * exp,const mp_int * mod,mp_int * res)4995 int sp_ModExp_2048(const mp_int* base, const mp_int* exp, const mp_int* mod,
4996     mp_int* res)
4997 {
4998     int err = MP_OKAY;
4999     sp_digit b[128];
5000     sp_digit e[64];
5001     sp_digit m[64];
5002     sp_digit* r = b;
5003     int expBits = mp_count_bits(exp);
5004 
5005     if (mp_count_bits(base) > 2048) {
5006         err = MP_READ_E;
5007     }
5008     else if (expBits > 2048) {
5009         err = MP_READ_E;
5010     }
5011     else if (mp_count_bits(mod) != 2048) {
5012         err = MP_READ_E;
5013     }
5014     else if (mp_iseven(mod)) {
5015         err = MP_VAL;
5016     }
5017 
5018     if (err == MP_OKAY) {
5019         sp_2048_from_mp(b, 64, base);
5020         sp_2048_from_mp(e, 64, exp);
5021         sp_2048_from_mp(m, 64, mod);
5022 
5023         err = sp_2048_mod_exp_64(r, b, e, expBits, m, 0);
5024     }
5025 
5026     if (err == MP_OKAY) {
5027         err = sp_2048_to_mp(r, res);
5028     }
5029 
5030     XMEMSET(e, 0, sizeof(e));
5031 
5032     return err;
5033 }
5034 
5035 #ifdef WOLFSSL_HAVE_SP_DH
5036 
5037 #ifdef HAVE_FFDHE_2048
sp_2048_lshift_64(sp_digit * r,sp_digit * a,byte n)5038 static void sp_2048_lshift_64(sp_digit* r, sp_digit* a, byte n)
5039 {
5040     __asm__ __volatile__ (
5041         "mov r6, #31\n\t"
5042         "sub r6, r6, %[n]\n\t"
5043         "add       %[a], %[a], #192\n\t"
5044         "add       %[r], %[r], #192\n\t"
5045         "ldr r3, [%[a], #60]\n\t"
5046         "lsr r4, r3, #1\n\t"
5047         "lsl r3, r3, %[n]\n\t"
5048         "lsr r4, r4, r6\n\t"
5049         "ldr       r2, [%[a], #56]\n\t"
5050         "str       r4, [%[r], #64]\n\t"
5051         "lsr       r5, r2, #1\n\t"
5052         "lsl       r2, r2, %[n]\n\t"
5053         "lsr       r5, r5, r6\n\t"
5054         "orr       r3, r3, r5\n\t"
5055         "ldr       r4, [%[a], #52]\n\t"
5056         "str       r3, [%[r], #60]\n\t"
5057         "lsr       r5, r4, #1\n\t"
5058         "lsl       r4, r4, %[n]\n\t"
5059         "lsr       r5, r5, r6\n\t"
5060         "orr       r2, r2, r5\n\t"
5061         "ldr       r3, [%[a], #48]\n\t"
5062         "str       r2, [%[r], #56]\n\t"
5063         "lsr       r5, r3, #1\n\t"
5064         "lsl       r3, r3, %[n]\n\t"
5065         "lsr       r5, r5, r6\n\t"
5066         "orr       r4, r4, r5\n\t"
5067         "ldr       r2, [%[a], #44]\n\t"
5068         "str       r4, [%[r], #52]\n\t"
5069         "lsr       r5, r2, #1\n\t"
5070         "lsl       r2, r2, %[n]\n\t"
5071         "lsr       r5, r5, r6\n\t"
5072         "orr       r3, r3, r5\n\t"
5073         "ldr       r4, [%[a], #40]\n\t"
5074         "str       r3, [%[r], #48]\n\t"
5075         "lsr       r5, r4, #1\n\t"
5076         "lsl       r4, r4, %[n]\n\t"
5077         "lsr       r5, r5, r6\n\t"
5078         "orr       r2, r2, r5\n\t"
5079         "ldr       r3, [%[a], #36]\n\t"
5080         "str       r2, [%[r], #44]\n\t"
5081         "lsr       r5, r3, #1\n\t"
5082         "lsl       r3, r3, %[n]\n\t"
5083         "lsr       r5, r5, r6\n\t"
5084         "orr       r4, r4, r5\n\t"
5085         "ldr       r2, [%[a], #32]\n\t"
5086         "str       r4, [%[r], #40]\n\t"
5087         "lsr       r5, r2, #1\n\t"
5088         "lsl       r2, r2, %[n]\n\t"
5089         "lsr       r5, r5, r6\n\t"
5090         "orr       r3, r3, r5\n\t"
5091         "ldr       r4, [%[a], #28]\n\t"
5092         "str       r3, [%[r], #36]\n\t"
5093         "lsr       r5, r4, #1\n\t"
5094         "lsl       r4, r4, %[n]\n\t"
5095         "lsr       r5, r5, r6\n\t"
5096         "orr       r2, r2, r5\n\t"
5097         "ldr       r3, [%[a], #24]\n\t"
5098         "str       r2, [%[r], #32]\n\t"
5099         "lsr       r5, r3, #1\n\t"
5100         "lsl       r3, r3, %[n]\n\t"
5101         "lsr       r5, r5, r6\n\t"
5102         "orr       r4, r4, r5\n\t"
5103         "ldr       r2, [%[a], #20]\n\t"
5104         "str       r4, [%[r], #28]\n\t"
5105         "lsr       r5, r2, #1\n\t"
5106         "lsl       r2, r2, %[n]\n\t"
5107         "lsr       r5, r5, r6\n\t"
5108         "orr       r3, r3, r5\n\t"
5109         "ldr       r4, [%[a], #16]\n\t"
5110         "str       r3, [%[r], #24]\n\t"
5111         "lsr       r5, r4, #1\n\t"
5112         "lsl       r4, r4, %[n]\n\t"
5113         "lsr       r5, r5, r6\n\t"
5114         "orr       r2, r2, r5\n\t"
5115         "ldr       r3, [%[a], #12]\n\t"
5116         "str       r2, [%[r], #20]\n\t"
5117         "lsr       r5, r3, #1\n\t"
5118         "lsl       r3, r3, %[n]\n\t"
5119         "lsr       r5, r5, r6\n\t"
5120         "orr       r4, r4, r5\n\t"
5121         "ldr       r2, [%[a], #8]\n\t"
5122         "str       r4, [%[r], #16]\n\t"
5123         "lsr       r5, r2, #1\n\t"
5124         "lsl       r2, r2, %[n]\n\t"
5125         "lsr       r5, r5, r6\n\t"
5126         "orr       r3, r3, r5\n\t"
5127         "ldr       r4, [%[a], #4]\n\t"
5128         "str       r3, [%[r], #12]\n\t"
5129         "lsr       r5, r4, #1\n\t"
5130         "lsl       r4, r4, %[n]\n\t"
5131         "lsr       r5, r5, r6\n\t"
5132         "orr       r2, r2, r5\n\t"
5133         "ldr       r3, [%[a], #0]\n\t"
5134         "str       r2, [%[r], #8]\n\t"
5135         "lsr       r5, r3, #1\n\t"
5136         "lsl       r3, r3, %[n]\n\t"
5137         "lsr       r5, r5, r6\n\t"
5138         "orr       r4, r4, r5\n\t"
5139         "sub     %[a], %[a], #64\n\t"
5140         "sub     %[r], %[r], #64\n\t"
5141         "ldr       r2, [%[a], #60]\n\t"
5142         "str       r4, [%[r], #68]\n\t"
5143         "lsr       r5, r2, #1\n\t"
5144         "lsl       r2, r2, %[n]\n\t"
5145         "lsr       r5, r5, r6\n\t"
5146         "orr       r3, r3, r5\n\t"
5147         "ldr       r4, [%[a], #56]\n\t"
5148         "str       r3, [%[r], #64]\n\t"
5149         "lsr       r5, r4, #1\n\t"
5150         "lsl       r4, r4, %[n]\n\t"
5151         "lsr       r5, r5, r6\n\t"
5152         "orr       r2, r2, r5\n\t"
5153         "ldr       r3, [%[a], #52]\n\t"
5154         "str       r2, [%[r], #60]\n\t"
5155         "lsr       r5, r3, #1\n\t"
5156         "lsl       r3, r3, %[n]\n\t"
5157         "lsr       r5, r5, r6\n\t"
5158         "orr       r4, r4, r5\n\t"
5159         "ldr       r2, [%[a], #48]\n\t"
5160         "str       r4, [%[r], #56]\n\t"
5161         "lsr       r5, r2, #1\n\t"
5162         "lsl       r2, r2, %[n]\n\t"
5163         "lsr       r5, r5, r6\n\t"
5164         "orr       r3, r3, r5\n\t"
5165         "ldr       r4, [%[a], #44]\n\t"
5166         "str       r3, [%[r], #52]\n\t"
5167         "lsr       r5, r4, #1\n\t"
5168         "lsl       r4, r4, %[n]\n\t"
5169         "lsr       r5, r5, r6\n\t"
5170         "orr       r2, r2, r5\n\t"
5171         "ldr       r3, [%[a], #40]\n\t"
5172         "str       r2, [%[r], #48]\n\t"
5173         "lsr       r5, r3, #1\n\t"
5174         "lsl       r3, r3, %[n]\n\t"
5175         "lsr       r5, r5, r6\n\t"
5176         "orr       r4, r4, r5\n\t"
5177         "ldr       r2, [%[a], #36]\n\t"
5178         "str       r4, [%[r], #44]\n\t"
5179         "lsr       r5, r2, #1\n\t"
5180         "lsl       r2, r2, %[n]\n\t"
5181         "lsr       r5, r5, r6\n\t"
5182         "orr       r3, r3, r5\n\t"
5183         "ldr       r4, [%[a], #32]\n\t"
5184         "str       r3, [%[r], #40]\n\t"
5185         "lsr       r5, r4, #1\n\t"
5186         "lsl       r4, r4, %[n]\n\t"
5187         "lsr       r5, r5, r6\n\t"
5188         "orr       r2, r2, r5\n\t"
5189         "ldr       r3, [%[a], #28]\n\t"
5190         "str       r2, [%[r], #36]\n\t"
5191         "lsr       r5, r3, #1\n\t"
5192         "lsl       r3, r3, %[n]\n\t"
5193         "lsr       r5, r5, r6\n\t"
5194         "orr       r4, r4, r5\n\t"
5195         "ldr       r2, [%[a], #24]\n\t"
5196         "str       r4, [%[r], #32]\n\t"
5197         "lsr       r5, r2, #1\n\t"
5198         "lsl       r2, r2, %[n]\n\t"
5199         "lsr       r5, r5, r6\n\t"
5200         "orr       r3, r3, r5\n\t"
5201         "ldr       r4, [%[a], #20]\n\t"
5202         "str       r3, [%[r], #28]\n\t"
5203         "lsr       r5, r4, #1\n\t"
5204         "lsl       r4, r4, %[n]\n\t"
5205         "lsr       r5, r5, r6\n\t"
5206         "orr       r2, r2, r5\n\t"
5207         "ldr       r3, [%[a], #16]\n\t"
5208         "str       r2, [%[r], #24]\n\t"
5209         "lsr       r5, r3, #1\n\t"
5210         "lsl       r3, r3, %[n]\n\t"
5211         "lsr       r5, r5, r6\n\t"
5212         "orr       r4, r4, r5\n\t"
5213         "ldr       r2, [%[a], #12]\n\t"
5214         "str       r4, [%[r], #20]\n\t"
5215         "lsr       r5, r2, #1\n\t"
5216         "lsl       r2, r2, %[n]\n\t"
5217         "lsr       r5, r5, r6\n\t"
5218         "orr       r3, r3, r5\n\t"
5219         "ldr       r4, [%[a], #8]\n\t"
5220         "str       r3, [%[r], #16]\n\t"
5221         "lsr       r5, r4, #1\n\t"
5222         "lsl       r4, r4, %[n]\n\t"
5223         "lsr       r5, r5, r6\n\t"
5224         "orr       r2, r2, r5\n\t"
5225         "ldr       r3, [%[a], #4]\n\t"
5226         "str       r2, [%[r], #12]\n\t"
5227         "lsr       r5, r3, #1\n\t"
5228         "lsl       r3, r3, %[n]\n\t"
5229         "lsr       r5, r5, r6\n\t"
5230         "orr       r4, r4, r5\n\t"
5231         "ldr       r2, [%[a], #0]\n\t"
5232         "str       r4, [%[r], #8]\n\t"
5233         "lsr       r5, r2, #1\n\t"
5234         "lsl       r2, r2, %[n]\n\t"
5235         "lsr       r5, r5, r6\n\t"
5236         "orr       r3, r3, r5\n\t"
5237         "sub     %[a], %[a], #64\n\t"
5238         "sub     %[r], %[r], #64\n\t"
5239         "ldr       r4, [%[a], #60]\n\t"
5240         "str       r3, [%[r], #68]\n\t"
5241         "lsr       r5, r4, #1\n\t"
5242         "lsl       r4, r4, %[n]\n\t"
5243         "lsr       r5, r5, r6\n\t"
5244         "orr       r2, r2, r5\n\t"
5245         "ldr       r3, [%[a], #56]\n\t"
5246         "str       r2, [%[r], #64]\n\t"
5247         "lsr       r5, r3, #1\n\t"
5248         "lsl       r3, r3, %[n]\n\t"
5249         "lsr       r5, r5, r6\n\t"
5250         "orr       r4, r4, r5\n\t"
5251         "ldr       r2, [%[a], #52]\n\t"
5252         "str       r4, [%[r], #60]\n\t"
5253         "lsr       r5, r2, #1\n\t"
5254         "lsl       r2, r2, %[n]\n\t"
5255         "lsr       r5, r5, r6\n\t"
5256         "orr       r3, r3, r5\n\t"
5257         "ldr       r4, [%[a], #48]\n\t"
5258         "str       r3, [%[r], #56]\n\t"
5259         "lsr       r5, r4, #1\n\t"
5260         "lsl       r4, r4, %[n]\n\t"
5261         "lsr       r5, r5, r6\n\t"
5262         "orr       r2, r2, r5\n\t"
5263         "ldr       r3, [%[a], #44]\n\t"
5264         "str       r2, [%[r], #52]\n\t"
5265         "lsr       r5, r3, #1\n\t"
5266         "lsl       r3, r3, %[n]\n\t"
5267         "lsr       r5, r5, r6\n\t"
5268         "orr       r4, r4, r5\n\t"
5269         "ldr       r2, [%[a], #40]\n\t"
5270         "str       r4, [%[r], #48]\n\t"
5271         "lsr       r5, r2, #1\n\t"
5272         "lsl       r2, r2, %[n]\n\t"
5273         "lsr       r5, r5, r6\n\t"
5274         "orr       r3, r3, r5\n\t"
5275         "ldr       r4, [%[a], #36]\n\t"
5276         "str       r3, [%[r], #44]\n\t"
5277         "lsr       r5, r4, #1\n\t"
5278         "lsl       r4, r4, %[n]\n\t"
5279         "lsr       r5, r5, r6\n\t"
5280         "orr       r2, r2, r5\n\t"
5281         "ldr       r3, [%[a], #32]\n\t"
5282         "str       r2, [%[r], #40]\n\t"
5283         "lsr       r5, r3, #1\n\t"
5284         "lsl       r3, r3, %[n]\n\t"
5285         "lsr       r5, r5, r6\n\t"
5286         "orr       r4, r4, r5\n\t"
5287         "ldr       r2, [%[a], #28]\n\t"
5288         "str       r4, [%[r], #36]\n\t"
5289         "lsr       r5, r2, #1\n\t"
5290         "lsl       r2, r2, %[n]\n\t"
5291         "lsr       r5, r5, r6\n\t"
5292         "orr       r3, r3, r5\n\t"
5293         "ldr       r4, [%[a], #24]\n\t"
5294         "str       r3, [%[r], #32]\n\t"
5295         "lsr       r5, r4, #1\n\t"
5296         "lsl       r4, r4, %[n]\n\t"
5297         "lsr       r5, r5, r6\n\t"
5298         "orr       r2, r2, r5\n\t"
5299         "ldr       r3, [%[a], #20]\n\t"
5300         "str       r2, [%[r], #28]\n\t"
5301         "lsr       r5, r3, #1\n\t"
5302         "lsl       r3, r3, %[n]\n\t"
5303         "lsr       r5, r5, r6\n\t"
5304         "orr       r4, r4, r5\n\t"
5305         "ldr       r2, [%[a], #16]\n\t"
5306         "str       r4, [%[r], #24]\n\t"
5307         "lsr       r5, r2, #1\n\t"
5308         "lsl       r2, r2, %[n]\n\t"
5309         "lsr       r5, r5, r6\n\t"
5310         "orr       r3, r3, r5\n\t"
5311         "ldr       r4, [%[a], #12]\n\t"
5312         "str       r3, [%[r], #20]\n\t"
5313         "lsr       r5, r4, #1\n\t"
5314         "lsl       r4, r4, %[n]\n\t"
5315         "lsr       r5, r5, r6\n\t"
5316         "orr       r2, r2, r5\n\t"
5317         "ldr       r3, [%[a], #8]\n\t"
5318         "str       r2, [%[r], #16]\n\t"
5319         "lsr       r5, r3, #1\n\t"
5320         "lsl       r3, r3, %[n]\n\t"
5321         "lsr       r5, r5, r6\n\t"
5322         "orr       r4, r4, r5\n\t"
5323         "ldr       r2, [%[a], #4]\n\t"
5324         "str       r4, [%[r], #12]\n\t"
5325         "lsr       r5, r2, #1\n\t"
5326         "lsl       r2, r2, %[n]\n\t"
5327         "lsr       r5, r5, r6\n\t"
5328         "orr       r3, r3, r5\n\t"
5329         "ldr       r4, [%[a], #0]\n\t"
5330         "str       r3, [%[r], #8]\n\t"
5331         "lsr       r5, r4, #1\n\t"
5332         "lsl       r4, r4, %[n]\n\t"
5333         "lsr       r5, r5, r6\n\t"
5334         "orr       r2, r2, r5\n\t"
5335         "sub     %[a], %[a], #64\n\t"
5336         "sub     %[r], %[r], #64\n\t"
5337         "ldr       r3, [%[a], #60]\n\t"
5338         "str       r2, [%[r], #68]\n\t"
5339         "lsr       r5, r3, #1\n\t"
5340         "lsl       r3, r3, %[n]\n\t"
5341         "lsr       r5, r5, r6\n\t"
5342         "orr       r4, r4, r5\n\t"
5343         "ldr       r2, [%[a], #56]\n\t"
5344         "str       r4, [%[r], #64]\n\t"
5345         "lsr       r5, r2, #1\n\t"
5346         "lsl       r2, r2, %[n]\n\t"
5347         "lsr       r5, r5, r6\n\t"
5348         "orr       r3, r3, r5\n\t"
5349         "ldr       r4, [%[a], #52]\n\t"
5350         "str       r3, [%[r], #60]\n\t"
5351         "lsr       r5, r4, #1\n\t"
5352         "lsl       r4, r4, %[n]\n\t"
5353         "lsr       r5, r5, r6\n\t"
5354         "orr       r2, r2, r5\n\t"
5355         "ldr       r3, [%[a], #48]\n\t"
5356         "str       r2, [%[r], #56]\n\t"
5357         "lsr       r5, r3, #1\n\t"
5358         "lsl       r3, r3, %[n]\n\t"
5359         "lsr       r5, r5, r6\n\t"
5360         "orr       r4, r4, r5\n\t"
5361         "ldr       r2, [%[a], #44]\n\t"
5362         "str       r4, [%[r], #52]\n\t"
5363         "lsr       r5, r2, #1\n\t"
5364         "lsl       r2, r2, %[n]\n\t"
5365         "lsr       r5, r5, r6\n\t"
5366         "orr       r3, r3, r5\n\t"
5367         "ldr       r4, [%[a], #40]\n\t"
5368         "str       r3, [%[r], #48]\n\t"
5369         "lsr       r5, r4, #1\n\t"
5370         "lsl       r4, r4, %[n]\n\t"
5371         "lsr       r5, r5, r6\n\t"
5372         "orr       r2, r2, r5\n\t"
5373         "ldr       r3, [%[a], #36]\n\t"
5374         "str       r2, [%[r], #44]\n\t"
5375         "lsr       r5, r3, #1\n\t"
5376         "lsl       r3, r3, %[n]\n\t"
5377         "lsr       r5, r5, r6\n\t"
5378         "orr       r4, r4, r5\n\t"
5379         "ldr       r2, [%[a], #32]\n\t"
5380         "str       r4, [%[r], #40]\n\t"
5381         "lsr       r5, r2, #1\n\t"
5382         "lsl       r2, r2, %[n]\n\t"
5383         "lsr       r5, r5, r6\n\t"
5384         "orr       r3, r3, r5\n\t"
5385         "ldr       r4, [%[a], #28]\n\t"
5386         "str       r3, [%[r], #36]\n\t"
5387         "lsr       r5, r4, #1\n\t"
5388         "lsl       r4, r4, %[n]\n\t"
5389         "lsr       r5, r5, r6\n\t"
5390         "orr       r2, r2, r5\n\t"
5391         "ldr       r3, [%[a], #24]\n\t"
5392         "str       r2, [%[r], #32]\n\t"
5393         "lsr       r5, r3, #1\n\t"
5394         "lsl       r3, r3, %[n]\n\t"
5395         "lsr       r5, r5, r6\n\t"
5396         "orr       r4, r4, r5\n\t"
5397         "ldr       r2, [%[a], #20]\n\t"
5398         "str       r4, [%[r], #28]\n\t"
5399         "lsr       r5, r2, #1\n\t"
5400         "lsl       r2, r2, %[n]\n\t"
5401         "lsr       r5, r5, r6\n\t"
5402         "orr       r3, r3, r5\n\t"
5403         "ldr       r4, [%[a], #16]\n\t"
5404         "str       r3, [%[r], #24]\n\t"
5405         "lsr       r5, r4, #1\n\t"
5406         "lsl       r4, r4, %[n]\n\t"
5407         "lsr       r5, r5, r6\n\t"
5408         "orr       r2, r2, r5\n\t"
5409         "ldr       r3, [%[a], #12]\n\t"
5410         "str       r2, [%[r], #20]\n\t"
5411         "lsr       r5, r3, #1\n\t"
5412         "lsl       r3, r3, %[n]\n\t"
5413         "lsr       r5, r5, r6\n\t"
5414         "orr       r4, r4, r5\n\t"
5415         "ldr       r2, [%[a], #8]\n\t"
5416         "str       r4, [%[r], #16]\n\t"
5417         "lsr       r5, r2, #1\n\t"
5418         "lsl       r2, r2, %[n]\n\t"
5419         "lsr       r5, r5, r6\n\t"
5420         "orr       r3, r3, r5\n\t"
5421         "ldr       r4, [%[a], #4]\n\t"
5422         "str       r3, [%[r], #12]\n\t"
5423         "lsr       r5, r4, #1\n\t"
5424         "lsl       r4, r4, %[n]\n\t"
5425         "lsr       r5, r5, r6\n\t"
5426         "orr       r2, r2, r5\n\t"
5427         "ldr       r3, [%[a], #0]\n\t"
5428         "str       r2, [%[r], #8]\n\t"
5429         "lsr       r5, r3, #1\n\t"
5430         "lsl       r3, r3, %[n]\n\t"
5431         "lsr       r5, r5, r6\n\t"
5432         "orr       r4, r4, r5\n\t"
5433         "str r3, [%[r]]\n\t"
5434         "str r4, [%[r], #4]\n\t"
5435         :
5436         : [r] "r" (r), [a] "r" (a), [n] "r" (n)
5437         : "memory", "r2", "r3", "r4", "r5", "r6"
5438     );
5439 }
5440 
5441 /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
5442  *
5443  * r     A single precision number that is the result of the operation.
5444  * e     A single precision number that is the exponent.
5445  * bits  The number of bits in the exponent.
5446  * m     A single precision number that is the modulus.
5447  * returns  0 on success.
5448  * returns  MEMORY_E on dynamic memory allocation failure.
5449  * returns  MP_VAL when base is even.
5450  */
sp_2048_mod_exp_2_64(sp_digit * r,const sp_digit * e,int bits,const sp_digit * m)5451 static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits,
5452         const sp_digit* m)
5453 {
5454 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5455     sp_digit* td = NULL;
5456 #else
5457     sp_digit td[193];
5458 #endif
5459     sp_digit* norm = NULL;
5460     sp_digit* tmp = NULL;
5461     sp_digit mp = 1;
5462     sp_digit n;
5463     sp_digit o;
5464     sp_digit mask;
5465     int i;
5466     int c;
5467     byte y;
5468     int err = MP_OKAY;
5469 
5470     if ((m[0] & 1) == 0) {
5471         err = MP_VAL;
5472     }
5473 
5474 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5475     if (err == MP_OKAY) {
5476         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 193, NULL,
5477                                 DYNAMIC_TYPE_TMP_BUFFER);
5478         if (td == NULL)
5479             err = MEMORY_E;
5480     }
5481 #endif
5482 
5483     if (err == MP_OKAY) {
5484         norm = td;
5485         tmp = td + 128;
5486 
5487         sp_2048_mont_setup(m, &mp);
5488         sp_2048_mont_norm_64(norm, m);
5489 
5490         i = (bits - 1) / 32;
5491         n = e[i--];
5492         c = bits & 31;
5493         if (c == 0) {
5494             c = 32;
5495         }
5496         c -= bits % 5;
5497         if (c == 32) {
5498             c = 27;
5499         }
5500         if (c < 0) {
5501             /* Number of bits in top word is less than number needed. */
5502             c = -c;
5503             y = (byte)(n << c);
5504             n = e[i--];
5505             y |= (byte)(n >> (64 - c));
5506             n <<= c;
5507             c = 64 - c;
5508         }
5509         else {
5510             y = (byte)(n >> c);
5511             n <<= 32 - c;
5512         }
5513         sp_2048_lshift_64(r, norm, y);
5514         for (; i>=0 || c>=5; ) {
5515             if (c == 0) {
5516                 n = e[i--];
5517                 y = (byte)(n >> 27);
5518                 n <<= 5;
5519                 c = 27;
5520             }
5521             else if (c < 5) {
5522                 y = (byte)(n >> 27);
5523                 n = e[i--];
5524                 c = 5 - c;
5525                 y |= (byte)(n >> (32 - c));
5526                 n <<= c;
5527                 c = 32 - c;
5528             }
5529             else {
5530                 y = (byte)((n >> 27) & 0x1f);
5531                 n <<= 5;
5532                 c -= 5;
5533             }
5534 
5535             sp_2048_mont_sqr_64(r, r, m, mp);
5536             sp_2048_mont_sqr_64(r, r, m, mp);
5537             sp_2048_mont_sqr_64(r, r, m, mp);
5538             sp_2048_mont_sqr_64(r, r, m, mp);
5539             sp_2048_mont_sqr_64(r, r, m, mp);
5540 
5541             sp_2048_lshift_64(r, r, y);
5542             sp_2048_mul_d_64(tmp, norm, r[64]);
5543             r[64] = 0;
5544             o = sp_2048_add_64(r, r, tmp);
5545             sp_2048_cond_sub_64(r, r, m, (sp_digit)0 - o);
5546         }
5547 
5548         XMEMSET(&r[64], 0, sizeof(sp_digit) * 64U);
5549         sp_2048_mont_reduce_64(r, m, mp);
5550 
5551         mask = 0 - (sp_2048_cmp_64(r, m) >= 0);
5552         sp_2048_cond_sub_64(r, r, m, mask);
5553     }
5554 
5555 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5556     if (td != NULL)
5557         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
5558 #endif
5559 
5560     return err;
5561 }
5562 #endif /* HAVE_FFDHE_2048 */
5563 
5564 /* Perform the modular exponentiation for Diffie-Hellman.
5565  *
5566  * base     Base.
5567  * exp      Array of bytes that is the exponent.
5568  * expLen   Length of data, in bytes, in exponent.
5569  * mod      Modulus.
5570  * out      Buffer to hold big-endian bytes of exponentiation result.
5571  *          Must be at least 256 bytes long.
5572  * outLen   Length, in bytes, of exponentiation result.
5573  * returns 0 on success, MP_READ_E if there are too many bytes in an array
5574  * and MEMORY_E if memory allocation fails.
5575  */
sp_DhExp_2048(const mp_int * base,const byte * exp,word32 expLen,const mp_int * mod,byte * out,word32 * outLen)5576 int sp_DhExp_2048(const mp_int* base, const byte* exp, word32 expLen,
5577     const mp_int* mod, byte* out, word32* outLen)
5578 {
5579     int err = MP_OKAY;
5580     sp_digit b[128];
5581     sp_digit e[64];
5582     sp_digit m[64];
5583     sp_digit* r = b;
5584     word32 i;
5585 
5586     if (mp_count_bits(base) > 2048) {
5587         err = MP_READ_E;
5588     }
5589     else if (expLen > 256) {
5590         err = MP_READ_E;
5591     }
5592     else if (mp_count_bits(mod) != 2048) {
5593         err = MP_READ_E;
5594     }
5595     else if (mp_iseven(mod)) {
5596         err = MP_VAL;
5597     }
5598 
5599     if (err == MP_OKAY) {
5600         sp_2048_from_mp(b, 64, base);
5601         sp_2048_from_bin(e, 64, exp, expLen);
5602         sp_2048_from_mp(m, 64, mod);
5603 
5604     #ifdef HAVE_FFDHE_2048
5605         if (base->used == 1 && base->dp[0] == 2 && m[63] == (sp_digit)-1)
5606             err = sp_2048_mod_exp_2_64(r, e, expLen * 8, m);
5607         else
5608     #endif
5609             err = sp_2048_mod_exp_64(r, b, e, expLen * 8, m, 0);
5610 
5611     }
5612 
5613     if (err == MP_OKAY) {
5614         sp_2048_to_bin_64(r, out);
5615         *outLen = 256;
5616         for (i=0; i<256 && out[i] == 0; i++) {
5617             /* Search for first non-zero. */
5618         }
5619         *outLen -= i;
5620         XMEMMOVE(out, out + i, *outLen);
5621 
5622     }
5623 
5624     XMEMSET(e, 0, sizeof(e));
5625 
5626     return err;
5627 }
5628 #endif /* WOLFSSL_HAVE_SP_DH */
5629 
5630 /* Perform the modular exponentiation for Diffie-Hellman.
5631  *
5632  * base  Base. MP integer.
5633  * exp   Exponent. MP integer.
5634  * mod   Modulus. MP integer.
5635  * res   Result. MP integer.
5636  * returns 0 on success, MP_READ_E if there are too many bytes in an array
5637  * and MEMORY_E if memory allocation fails.
5638  */
sp_ModExp_1024(const mp_int * base,const mp_int * exp,const mp_int * mod,mp_int * res)5639 int sp_ModExp_1024(const mp_int* base, const mp_int* exp, const mp_int* mod,
5640     mp_int* res)
5641 {
5642     int err = MP_OKAY;
5643     sp_digit b[64];
5644     sp_digit e[32];
5645     sp_digit m[32];
5646     sp_digit* r = b;
5647     int expBits = mp_count_bits(exp);
5648 
5649     if (mp_count_bits(base) > 1024) {
5650         err = MP_READ_E;
5651     }
5652     else if (expBits > 1024) {
5653         err = MP_READ_E;
5654     }
5655     else if (mp_count_bits(mod) != 1024) {
5656         err = MP_READ_E;
5657     }
5658     else if (mp_iseven(mod)) {
5659         err = MP_VAL;
5660     }
5661 
5662     if (err == MP_OKAY) {
5663         sp_2048_from_mp(b, 32, base);
5664         sp_2048_from_mp(e, 32, exp);
5665         sp_2048_from_mp(m, 32, mod);
5666 
5667         err = sp_2048_mod_exp_32(r, b, e, expBits, m, 0);
5668     }
5669 
5670     if (err == MP_OKAY) {
5671         XMEMSET(r + 32, 0, sizeof(*r) * 32U);
5672         err = sp_2048_to_mp(r, res);
5673         res->used = mod->used;
5674         mp_clamp(res);
5675     }
5676 
5677     XMEMSET(e, 0, sizeof(e));
5678 
5679     return err;
5680 }
5681 
5682 #endif /* WOLFSSL_HAVE_SP_DH | (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) */
5683 
5684 #endif /* !WOLFSSL_SP_NO_2048 */
5685 
5686 #ifndef WOLFSSL_SP_NO_3072
5687 /* Read big endian unsigned byte array into r.
5688  *
5689  * r  A single precision integer.
5690  * size  Maximum number of bytes to convert
5691  * a  Byte array.
5692  * n  Number of bytes in array to read.
5693  */
sp_3072_from_bin(sp_digit * r,int size,const byte * a,int n)5694 static void sp_3072_from_bin(sp_digit* r, int size, const byte* a, int n)
5695 {
5696     int i;
5697     int j = 0;
5698     word32 s = 0;
5699 
5700     r[0] = 0;
5701     for (i = n-1; i >= 0; i--) {
5702         r[j] |= (((sp_digit)a[i]) << s);
5703         if (s >= 24U) {
5704             r[j] &= 0xffffffff;
5705             s = 32U - s;
5706             if (j + 1 >= size) {
5707                 break;
5708             }
5709             r[++j] = (sp_digit)a[i] >> s;
5710             s = 8U - s;
5711         }
5712         else {
5713             s += 8U;
5714         }
5715     }
5716 
5717     for (j++; j < size; j++) {
5718         r[j] = 0;
5719     }
5720 }
5721 
5722 /* Convert an mp_int to an array of sp_digit.
5723  *
5724  * r  A single precision integer.
5725  * size  Maximum number of bytes to convert
5726  * a  A multi-precision integer.
5727  */
sp_3072_from_mp(sp_digit * r,int size,const mp_int * a)5728 static void sp_3072_from_mp(sp_digit* r, int size, const mp_int* a)
5729 {
5730 #if DIGIT_BIT == 32
5731     int j;
5732 
5733     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
5734 
5735     for (j = a->used; j < size; j++) {
5736         r[j] = 0;
5737     }
5738 #elif DIGIT_BIT > 32
5739     int i;
5740     int j = 0;
5741     word32 s = 0;
5742 
5743     r[0] = 0;
5744     for (i = 0; i < a->used && j < size; i++) {
5745         r[j] |= ((sp_digit)a->dp[i] << s);
5746         r[j] &= 0xffffffff;
5747         s = 32U - s;
5748         if (j + 1 >= size) {
5749             break;
5750         }
5751         /* lint allow cast of mismatch word32 and mp_digit */
5752         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
5753         while ((s + 32U) <= (word32)DIGIT_BIT) {
5754             s += 32U;
5755             r[j] &= 0xffffffff;
5756             if (j + 1 >= size) {
5757                 break;
5758             }
5759             if (s < (word32)DIGIT_BIT) {
5760                 /* lint allow cast of mismatch word32 and mp_digit */
5761                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
5762             }
5763             else {
5764                 r[++j] = (sp_digit)0;
5765             }
5766         }
5767         s = (word32)DIGIT_BIT - s;
5768     }
5769 
5770     for (j++; j < size; j++) {
5771         r[j] = 0;
5772     }
5773 #else
5774     int i;
5775     int j = 0;
5776     int s = 0;
5777 
5778     r[0] = 0;
5779     for (i = 0; i < a->used && j < size; i++) {
5780         r[j] |= ((sp_digit)a->dp[i]) << s;
5781         if (s + DIGIT_BIT >= 32) {
5782             r[j] &= 0xffffffff;
5783             if (j + 1 >= size) {
5784                 break;
5785             }
5786             s = 32 - s;
5787             if (s == DIGIT_BIT) {
5788                 r[++j] = 0;
5789                 s = 0;
5790             }
5791             else {
5792                 r[++j] = a->dp[i] >> s;
5793                 s = DIGIT_BIT - s;
5794             }
5795         }
5796         else {
5797             s += DIGIT_BIT;
5798         }
5799     }
5800 
5801     for (j++; j < size; j++) {
5802         r[j] = 0;
5803     }
5804 #endif
5805 }
5806 
5807 /* Write r as big endian to byte array.
5808  * Fixed length number of bytes written: 384
5809  *
5810  * r  A single precision integer.
5811  * a  Byte array.
5812  */
sp_3072_to_bin_96(sp_digit * r,byte * a)5813 static void sp_3072_to_bin_96(sp_digit* r, byte* a)
5814 {
5815     int i;
5816     int j;
5817     int s = 0;
5818     int b;
5819 
5820     j = 3072 / 8 - 1;
5821     a[j] = 0;
5822     for (i=0; i<96 && j>=0; i++) {
5823         b = 0;
5824         /* lint allow cast of mismatch sp_digit and int */
5825         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
5826         b += 8 - s;
5827         if (j < 0) {
5828             break;
5829         }
5830         while (b < 32) {
5831             a[j--] = (byte)(r[i] >> b);
5832             b += 8;
5833             if (j < 0) {
5834                 break;
5835             }
5836         }
5837         s = 8 - (b - 32);
5838         if (j >= 0) {
5839             a[j] = 0;
5840         }
5841         if (s != 0) {
5842             j++;
5843         }
5844     }
5845 }
5846 
5847 #if (defined(WOLFSSL_HAVE_SP_RSA) && (!defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(WOLFSSL_SP_SMALL))) || defined(WOLFSSL_HAVE_SP_DH)
5848 /* Normalize the values in each word to 32.
5849  *
5850  * a  Array of sp_digit to normalize.
5851  */
5852 #define sp_3072_norm_96(a)
5853 
5854 #endif /* (WOLFSSL_HAVE_SP_RSA && (!WOLFSSL_RSA_PUBLIC_ONLY || !WOLFSSL_SP_SMALL)) || WOLFSSL_HAVE_SP_DH */
5855 /* Normalize the values in each word to 32.
5856  *
5857  * a  Array of sp_digit to normalize.
5858  */
5859 #define sp_3072_norm_96(a)
5860 
5861 #ifndef WOLFSSL_SP_SMALL
5862 /* Multiply a and b into r. (r = a * b)
5863  *
5864  * r  A single precision integer.
5865  * a  A single precision integer.
5866  * b  A single precision integer.
5867  */
sp_3072_mul_12(sp_digit * r,const sp_digit * a,const sp_digit * b)5868 SP_NOINLINE static void sp_3072_mul_12(sp_digit* r, const sp_digit* a,
5869         const sp_digit* b)
5870 {
5871     sp_digit tmp_arr[12 * 2];
5872     sp_digit* tmp = tmp_arr;
5873     __asm__ __volatile__ (
5874         "mov	r3, #0\n\t"
5875         "mov	r4, #0\n\t"
5876         "mov	r9, r3\n\t"
5877         "mov	r12, %[r]\n\t"
5878         "mov	r10, %[a]\n\t"
5879         "mov	r11, %[b]\n\t"
5880         "mov	r6, #48\n\t"
5881         "add	r6, r6, r10\n\t"
5882         "mov	r14, r6\n\t"
5883         "\n1:\n\t"
5884         "mov	%[r], #0\n\t"
5885         "mov	r5, #0\n\t"
5886         "mov	r6, #44\n\t"
5887         "mov	%[a], r9\n\t"
5888         "subs	%[a], %[a], r6\n\t"
5889         "sbc	r6, r6, r6\n\t"
5890         "mvn	r6, r6\n\t"
5891         "and	%[a], %[a], r6\n\t"
5892         "mov	%[b], r9\n\t"
5893         "sub	%[b], %[b], %[a]\n\t"
5894         "add	%[a], %[a], r10\n\t"
5895         "add	%[b], %[b], r11\n\t"
5896         "\n2:\n\t"
5897         /* Multiply Start */
5898         "ldr	r6, [%[a]]\n\t"
5899         "ldr	r8, [%[b]]\n\t"
5900         "umull	r6, r8, r6, r8\n\t"
5901         "adds	r3, r3, r6\n\t"
5902         "adcs 	r4, r4, r8\n\t"
5903         "adc	r5, r5, %[r]\n\t"
5904         /* Multiply Done */
5905         "add	%[a], %[a], #4\n\t"
5906         "sub	%[b], %[b], #4\n\t"
5907         "cmp	%[a], r14\n\t"
5908 #ifdef __GNUC__
5909         "beq	3f\n\t"
5910 #else
5911         "beq.n	3f\n\t"
5912 #endif /* __GNUC__ */
5913         "mov	r6, r9\n\t"
5914         "add	r6, r6, r10\n\t"
5915         "cmp	%[a], r6\n\t"
5916 #ifdef __GNUC__
5917         "ble	2b\n\t"
5918 #else
5919         "ble.n	2b\n\t"
5920 #endif /* __GNUC__ */
5921         "\n3:\n\t"
5922         "mov	%[r], r12\n\t"
5923         "mov	r8, r9\n\t"
5924         "str	r3, [%[r], r8]\n\t"
5925         "mov	r3, r4\n\t"
5926         "mov	r4, r5\n\t"
5927         "add	r8, r8, #4\n\t"
5928         "mov	r9, r8\n\t"
5929         "mov	r6, #88\n\t"
5930         "cmp	r8, r6\n\t"
5931 #ifdef __GNUC__
5932         "ble	1b\n\t"
5933 #else
5934         "ble.n	1b\n\t"
5935 #endif /* __GNUC__ */
5936         "str	r3, [%[r], r8]\n\t"
5937         "mov	%[a], r10\n\t"
5938         "mov	%[b], r11\n\t"
5939         :
5940         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
5941         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
5942     );
5943 
5944     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
5945 }
5946 
5947 /* Square a and put result in r. (r = a * a)
5948  *
5949  * r  A single precision integer.
5950  * a  A single precision integer.
5951  */
sp_3072_sqr_12(sp_digit * r,const sp_digit * a)5952 SP_NOINLINE static void sp_3072_sqr_12(sp_digit* r, const sp_digit* a)
5953 {
5954     __asm__ __volatile__ (
5955         "mov	r3, #0\n\t"
5956         "mov	r4, #0\n\t"
5957         "mov	r5, #0\n\t"
5958         "mov	r9, r3\n\t"
5959         "mov	r12, %[r]\n\t"
5960         "mov	r6, #96\n\t"
5961         "neg	r6, r6\n\t"
5962         "add	sp, sp, r6\n\t"
5963         "mov	r11, sp\n\t"
5964         "mov	r10, %[a]\n\t"
5965         "\n1:\n\t"
5966         "mov	%[r], #0\n\t"
5967         "mov	r6, #44\n\t"
5968         "mov	%[a], r9\n\t"
5969         "subs	%[a], %[a], r6\n\t"
5970         "sbc	r6, r6, r6\n\t"
5971         "mvn	r6, r6\n\t"
5972         "and	%[a], %[a], r6\n\t"
5973         "mov	r2, r9\n\t"
5974         "sub	r2, r2, %[a]\n\t"
5975         "add	%[a], %[a], r10\n\t"
5976         "add	r2, r2, r10\n\t"
5977         "\n2:\n\t"
5978         "cmp	r2, %[a]\n\t"
5979 #ifdef __GNUC__
5980         "beq	4f\n\t"
5981 #else
5982         "beq.n	4f\n\t"
5983 #endif /* __GNUC__ */
5984         /* Multiply * 2: Start */
5985         "ldr	r6, [%[a]]\n\t"
5986         "ldr	r8, [r2]\n\t"
5987         "umull	r6, r8, r6, r8\n\t"
5988         "adds	r3, r3, r6\n\t"
5989         "adcs 	r4, r4, r8\n\t"
5990         "adc	r5, r5, %[r]\n\t"
5991         "adds	r3, r3, r6\n\t"
5992         "adcs 	r4, r4, r8\n\t"
5993         "adc	r5, r5, %[r]\n\t"
5994         /* Multiply * 2: Done */
5995 #ifdef __GNUC__
5996         "bal	5f\n\t"
5997 #else
5998         "bal.n	5f\n\t"
5999 #endif /* __GNUC__ */
6000         "\n4:\n\t"
6001         /* Square: Start */
6002         "ldr	r6, [%[a]]\n\t"
6003         "umull	r6, r8, r6, r6\n\t"
6004         "adds	r3, r3, r6\n\t"
6005         "adcs	r4, r4, r8\n\t"
6006         "adc	r5, r5, %[r]\n\t"
6007         /* Square: Done */
6008         "\n5:\n\t"
6009         "add	%[a], %[a], #4\n\t"
6010         "sub	r2, r2, #4\n\t"
6011         "mov	r6, #48\n\t"
6012         "add	r6, r6, r10\n\t"
6013         "cmp	%[a], r6\n\t"
6014 #ifdef __GNUC__
6015         "beq	3f\n\t"
6016 #else
6017         "beq.n	3f\n\t"
6018 #endif /* __GNUC__ */
6019         "cmp	%[a], r2\n\t"
6020 #ifdef __GNUC__
6021         "bgt	3f\n\t"
6022 #else
6023         "bgt.n	3f\n\t"
6024 #endif /* __GNUC__ */
6025         "mov	r8, r9\n\t"
6026         "add	r8, r8, r10\n\t"
6027         "cmp	%[a], r8\n\t"
6028 #ifdef __GNUC__
6029         "ble	2b\n\t"
6030 #else
6031         "ble.n	2b\n\t"
6032 #endif /* __GNUC__ */
6033         "\n3:\n\t"
6034         "mov	%[r], r11\n\t"
6035         "mov	r8, r9\n\t"
6036         "str	r3, [%[r], r8]\n\t"
6037         "mov	r3, r4\n\t"
6038         "mov	r4, r5\n\t"
6039         "mov	r5, #0\n\t"
6040         "add	r8, r8, #4\n\t"
6041         "mov	r9, r8\n\t"
6042         "mov	r6, #88\n\t"
6043         "cmp	r8, r6\n\t"
6044 #ifdef __GNUC__
6045         "ble	1b\n\t"
6046 #else
6047         "ble.n	1b\n\t"
6048 #endif /* __GNUC__ */
6049         "mov	%[a], r10\n\t"
6050         "str	r3, [%[r], r8]\n\t"
6051         "mov	%[r], r12\n\t"
6052         "mov	%[a], r11\n\t"
6053         "mov	r3, #92\n\t"
6054         "\n4:\n\t"
6055         "ldr	r6, [%[a], r3]\n\t"
6056         "str	r6, [%[r], r3]\n\t"
6057         "subs	r3, r3, #4\n\t"
6058 #ifdef __GNUC__
6059         "bge	4b\n\t"
6060 #else
6061         "bge.n	4b\n\t"
6062 #endif /* __GNUC__ */
6063         "mov	r6, #96\n\t"
6064         "add	sp, sp, r6\n\t"
6065         :
6066         : [r] "r" (r), [a] "r" (a)
6067         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
6068     );
6069 }
6070 
6071 /* Add b to a into r. (r = a + b)
6072  *
6073  * r  A single precision integer.
6074  * a  A single precision integer.
6075  * b  A single precision integer.
6076  */
sp_3072_add_12(sp_digit * r,const sp_digit * a,const sp_digit * b)6077 SP_NOINLINE static sp_digit sp_3072_add_12(sp_digit* r, const sp_digit* a,
6078         const sp_digit* b)
6079 {
6080     sp_digit c = 0;
6081 
6082     __asm__ __volatile__ (
6083         "ldm	%[a]!, {r4, r5}\n\t"
6084         "ldm	%[b]!, {r6, r8}\n\t"
6085         "adds	r4, r4, r6\n\t"
6086         "adcs	r5, r5, r8\n\t"
6087         "stm	%[r]!, {r4, r5}\n\t"
6088         "ldm	%[a]!, {r4, r5}\n\t"
6089         "ldm	%[b]!, {r6, r8}\n\t"
6090         "adcs	r4, r4, r6\n\t"
6091         "adcs	r5, r5, r8\n\t"
6092         "stm	%[r]!, {r4, r5}\n\t"
6093         "ldm	%[a]!, {r4, r5}\n\t"
6094         "ldm	%[b]!, {r6, r8}\n\t"
6095         "adcs	r4, r4, r6\n\t"
6096         "adcs	r5, r5, r8\n\t"
6097         "stm	%[r]!, {r4, r5}\n\t"
6098         "ldm	%[a]!, {r4, r5}\n\t"
6099         "ldm	%[b]!, {r6, r8}\n\t"
6100         "adcs	r4, r4, r6\n\t"
6101         "adcs	r5, r5, r8\n\t"
6102         "stm	%[r]!, {r4, r5}\n\t"
6103         "ldm	%[a]!, {r4, r5}\n\t"
6104         "ldm	%[b]!, {r6, r8}\n\t"
6105         "adcs	r4, r4, r6\n\t"
6106         "adcs	r5, r5, r8\n\t"
6107         "stm	%[r]!, {r4, r5}\n\t"
6108         "ldm	%[a]!, {r4, r5}\n\t"
6109         "ldm	%[b]!, {r6, r8}\n\t"
6110         "adcs	r4, r4, r6\n\t"
6111         "adcs	r5, r5, r8\n\t"
6112         "stm	%[r]!, {r4, r5}\n\t"
6113         "mov	%[c], #0\n\t"
6114         "adc	%[c], %[c], %[c]\n\t"
6115         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
6116         :
6117         : "memory", "r4", "r5", "r6", "r8"
6118     );
6119 
6120     return c;
6121 }
6122 
6123 /* Sub b from a into r. (r = a - b)
6124  *
6125  * r  A single precision integer.
6126  * a  A single precision integer.
6127  * b  A single precision integer.
6128  */
sp_3072_sub_in_place_24(sp_digit * a,const sp_digit * b)6129 SP_NOINLINE static sp_digit sp_3072_sub_in_place_24(sp_digit* a,
6130         const sp_digit* b)
6131 {
6132     sp_digit c = 0;
6133 
6134     __asm__ __volatile__ (
6135         "ldm	%[a], {r3, r4}\n\t"
6136         "ldm	%[b]!, {r5, r6}\n\t"
6137         "subs	r3, r3, r5\n\t"
6138         "sbcs	r4, r4, r6\n\t"
6139         "stm	%[a]!, {r3, r4}\n\t"
6140         "ldm	%[a], {r3, r4}\n\t"
6141         "ldm	%[b]!, {r5, r6}\n\t"
6142         "sbcs	r3, r3, r5\n\t"
6143         "sbcs	r4, r4, r6\n\t"
6144         "stm	%[a]!, {r3, r4}\n\t"
6145         "ldm	%[a], {r3, r4}\n\t"
6146         "ldm	%[b]!, {r5, r6}\n\t"
6147         "sbcs	r3, r3, r5\n\t"
6148         "sbcs	r4, r4, r6\n\t"
6149         "stm	%[a]!, {r3, r4}\n\t"
6150         "ldm	%[a], {r3, r4}\n\t"
6151         "ldm	%[b]!, {r5, r6}\n\t"
6152         "sbcs	r3, r3, r5\n\t"
6153         "sbcs	r4, r4, r6\n\t"
6154         "stm	%[a]!, {r3, r4}\n\t"
6155         "ldm	%[a], {r3, r4}\n\t"
6156         "ldm	%[b]!, {r5, r6}\n\t"
6157         "sbcs	r3, r3, r5\n\t"
6158         "sbcs	r4, r4, r6\n\t"
6159         "stm	%[a]!, {r3, r4}\n\t"
6160         "ldm	%[a], {r3, r4}\n\t"
6161         "ldm	%[b]!, {r5, r6}\n\t"
6162         "sbcs	r3, r3, r5\n\t"
6163         "sbcs	r4, r4, r6\n\t"
6164         "stm	%[a]!, {r3, r4}\n\t"
6165         "ldm	%[a], {r3, r4}\n\t"
6166         "ldm	%[b]!, {r5, r6}\n\t"
6167         "sbcs	r3, r3, r5\n\t"
6168         "sbcs	r4, r4, r6\n\t"
6169         "stm	%[a]!, {r3, r4}\n\t"
6170         "ldm	%[a], {r3, r4}\n\t"
6171         "ldm	%[b]!, {r5, r6}\n\t"
6172         "sbcs	r3, r3, r5\n\t"
6173         "sbcs	r4, r4, r6\n\t"
6174         "stm	%[a]!, {r3, r4}\n\t"
6175         "ldm	%[a], {r3, r4}\n\t"
6176         "ldm	%[b]!, {r5, r6}\n\t"
6177         "sbcs	r3, r3, r5\n\t"
6178         "sbcs	r4, r4, r6\n\t"
6179         "stm	%[a]!, {r3, r4}\n\t"
6180         "ldm	%[a], {r3, r4}\n\t"
6181         "ldm	%[b]!, {r5, r6}\n\t"
6182         "sbcs	r3, r3, r5\n\t"
6183         "sbcs	r4, r4, r6\n\t"
6184         "stm	%[a]!, {r3, r4}\n\t"
6185         "ldm	%[a], {r3, r4}\n\t"
6186         "ldm	%[b]!, {r5, r6}\n\t"
6187         "sbcs	r3, r3, r5\n\t"
6188         "sbcs	r4, r4, r6\n\t"
6189         "stm	%[a]!, {r3, r4}\n\t"
6190         "ldm	%[a], {r3, r4}\n\t"
6191         "ldm	%[b]!, {r5, r6}\n\t"
6192         "sbcs	r3, r3, r5\n\t"
6193         "sbcs	r4, r4, r6\n\t"
6194         "stm	%[a]!, {r3, r4}\n\t"
6195         "sbc	%[c], %[c], %[c]\n\t"
6196         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
6197         :
6198         : "memory", "r3", "r4", "r5", "r6"
6199     );
6200 
6201     return c;
6202 }
6203 
6204 /* Add b to a into r. (r = a + b)
6205  *
6206  * r  A single precision integer.
6207  * a  A single precision integer.
6208  * b  A single precision integer.
6209  */
sp_3072_add_24(sp_digit * r,const sp_digit * a,const sp_digit * b)6210 SP_NOINLINE static sp_digit sp_3072_add_24(sp_digit* r, const sp_digit* a,
6211         const sp_digit* b)
6212 {
6213     sp_digit c = 0;
6214 
6215     __asm__ __volatile__ (
6216         "ldm	%[a]!, {r4, r5}\n\t"
6217         "ldm	%[b]!, {r6, r8}\n\t"
6218         "adds	r4, r4, r6\n\t"
6219         "adcs	r5, r5, r8\n\t"
6220         "stm	%[r]!, {r4, r5}\n\t"
6221         "ldm	%[a]!, {r4, r5}\n\t"
6222         "ldm	%[b]!, {r6, r8}\n\t"
6223         "adcs	r4, r4, r6\n\t"
6224         "adcs	r5, r5, r8\n\t"
6225         "stm	%[r]!, {r4, r5}\n\t"
6226         "ldm	%[a]!, {r4, r5}\n\t"
6227         "ldm	%[b]!, {r6, r8}\n\t"
6228         "adcs	r4, r4, r6\n\t"
6229         "adcs	r5, r5, r8\n\t"
6230         "stm	%[r]!, {r4, r5}\n\t"
6231         "ldm	%[a]!, {r4, r5}\n\t"
6232         "ldm	%[b]!, {r6, r8}\n\t"
6233         "adcs	r4, r4, r6\n\t"
6234         "adcs	r5, r5, r8\n\t"
6235         "stm	%[r]!, {r4, r5}\n\t"
6236         "ldm	%[a]!, {r4, r5}\n\t"
6237         "ldm	%[b]!, {r6, r8}\n\t"
6238         "adcs	r4, r4, r6\n\t"
6239         "adcs	r5, r5, r8\n\t"
6240         "stm	%[r]!, {r4, r5}\n\t"
6241         "ldm	%[a]!, {r4, r5}\n\t"
6242         "ldm	%[b]!, {r6, r8}\n\t"
6243         "adcs	r4, r4, r6\n\t"
6244         "adcs	r5, r5, r8\n\t"
6245         "stm	%[r]!, {r4, r5}\n\t"
6246         "ldm	%[a]!, {r4, r5}\n\t"
6247         "ldm	%[b]!, {r6, r8}\n\t"
6248         "adcs	r4, r4, r6\n\t"
6249         "adcs	r5, r5, r8\n\t"
6250         "stm	%[r]!, {r4, r5}\n\t"
6251         "ldm	%[a]!, {r4, r5}\n\t"
6252         "ldm	%[b]!, {r6, r8}\n\t"
6253         "adcs	r4, r4, r6\n\t"
6254         "adcs	r5, r5, r8\n\t"
6255         "stm	%[r]!, {r4, r5}\n\t"
6256         "ldm	%[a]!, {r4, r5}\n\t"
6257         "ldm	%[b]!, {r6, r8}\n\t"
6258         "adcs	r4, r4, r6\n\t"
6259         "adcs	r5, r5, r8\n\t"
6260         "stm	%[r]!, {r4, r5}\n\t"
6261         "ldm	%[a]!, {r4, r5}\n\t"
6262         "ldm	%[b]!, {r6, r8}\n\t"
6263         "adcs	r4, r4, r6\n\t"
6264         "adcs	r5, r5, r8\n\t"
6265         "stm	%[r]!, {r4, r5}\n\t"
6266         "ldm	%[a]!, {r4, r5}\n\t"
6267         "ldm	%[b]!, {r6, r8}\n\t"
6268         "adcs	r4, r4, r6\n\t"
6269         "adcs	r5, r5, r8\n\t"
6270         "stm	%[r]!, {r4, r5}\n\t"
6271         "ldm	%[a]!, {r4, r5}\n\t"
6272         "ldm	%[b]!, {r6, r8}\n\t"
6273         "adcs	r4, r4, r6\n\t"
6274         "adcs	r5, r5, r8\n\t"
6275         "stm	%[r]!, {r4, r5}\n\t"
6276         "mov	%[c], #0\n\t"
6277         "adc	%[c], %[c], %[c]\n\t"
6278         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
6279         :
6280         : "memory", "r4", "r5", "r6", "r8"
6281     );
6282 
6283     return c;
6284 }
6285 
6286 /* AND m into each word of a and store in r.
6287  *
6288  * r  A single precision integer.
6289  * a  A single precision integer.
6290  * m  Mask to AND against each digit.
6291  */
sp_3072_mask_12(sp_digit * r,const sp_digit * a,sp_digit m)6292 static void sp_3072_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
6293 {
6294 #ifdef WOLFSSL_SP_SMALL
6295     int i;
6296 
6297     for (i=0; i<12; i++) {
6298         r[i] = a[i] & m;
6299     }
6300 #else
6301     r[0] = a[0] & m;
6302     r[1] = a[1] & m;
6303     r[2] = a[2] & m;
6304     r[3] = a[3] & m;
6305     r[4] = a[4] & m;
6306     r[5] = a[5] & m;
6307     r[6] = a[6] & m;
6308     r[7] = a[7] & m;
6309     r[8] = a[8] & m;
6310     r[9] = a[9] & m;
6311     r[10] = a[10] & m;
6312     r[11] = a[11] & m;
6313 #endif
6314 }
6315 
6316 /* Multiply a and b into r. (r = a * b)
6317  *
6318  * r  A single precision integer.
6319  * a  A single precision integer.
6320  * b  A single precision integer.
6321  */
sp_3072_mul_24(sp_digit * r,const sp_digit * a,const sp_digit * b)6322 SP_NOINLINE static void sp_3072_mul_24(sp_digit* r, const sp_digit* a,
6323         const sp_digit* b)
6324 {
6325     sp_digit* z0 = r;
6326     sp_digit z1[24];
6327     sp_digit a1[12];
6328     sp_digit b1[12];
6329     sp_digit z2[24];
6330     sp_digit u;
6331     sp_digit ca;
6332     sp_digit cb;
6333 
6334     ca = sp_3072_add_12(a1, a, &a[12]);
6335     cb = sp_3072_add_12(b1, b, &b[12]);
6336     u  = ca & cb;
6337     sp_3072_mul_12(z1, a1, b1);
6338     sp_3072_mul_12(z2, &a[12], &b[12]);
6339     sp_3072_mul_12(z0, a, b);
6340     sp_3072_mask_12(r + 24, a1, 0 - cb);
6341     sp_3072_mask_12(b1, b1, 0 - ca);
6342     u += sp_3072_add_12(r + 24, r + 24, b1);
6343     u += sp_3072_sub_in_place_24(z1, z2);
6344     u += sp_3072_sub_in_place_24(z1, z0);
6345     u += sp_3072_add_24(r + 12, r + 12, z1);
6346     r[36] = u;
6347     XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
6348     (void)sp_3072_add_24(r + 24, r + 24, z2);
6349 }
6350 
6351 /* Square a and put result in r. (r = a * a)
6352  *
6353  * r  A single precision integer.
6354  * a  A single precision integer.
6355  */
sp_3072_sqr_24(sp_digit * r,const sp_digit * a)6356 SP_NOINLINE static void sp_3072_sqr_24(sp_digit* r, const sp_digit* a)
6357 {
6358     sp_digit* z0 = r;
6359     sp_digit z2[24];
6360     sp_digit z1[24];
6361     sp_digit a1[12];
6362     sp_digit u;
6363 
6364     u = sp_3072_add_12(a1, a, &a[12]);
6365     sp_3072_sqr_12(z1, a1);
6366     sp_3072_sqr_12(z2, &a[12]);
6367     sp_3072_sqr_12(z0, a);
6368     sp_3072_mask_12(r + 24, a1, 0 - u);
6369     u += sp_3072_add_12(r + 24, r + 24, r + 24);
6370     u += sp_3072_sub_in_place_24(z1, z2);
6371     u += sp_3072_sub_in_place_24(z1, z0);
6372     u += sp_3072_add_24(r + 12, r + 12, z1);
6373     r[36] = u;
6374     XMEMSET(r + 36 + 1, 0, sizeof(sp_digit) * (12 - 1));
6375     (void)sp_3072_add_24(r + 24, r + 24, z2);
6376 }
6377 
6378 /* Sub b from a into r. (r = a - b)
6379  *
6380  * r  A single precision integer.
6381  * a  A single precision integer.
6382  * b  A single precision integer.
6383  */
sp_3072_sub_in_place_48(sp_digit * a,const sp_digit * b)6384 SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
6385         const sp_digit* b)
6386 {
6387     sp_digit c = 0;
6388 
6389     __asm__ __volatile__ (
6390         "ldm	%[a], {r3, r4}\n\t"
6391         "ldm	%[b]!, {r5, r6}\n\t"
6392         "subs	r3, r3, r5\n\t"
6393         "sbcs	r4, r4, r6\n\t"
6394         "stm	%[a]!, {r3, r4}\n\t"
6395         "ldm	%[a], {r3, r4}\n\t"
6396         "ldm	%[b]!, {r5, r6}\n\t"
6397         "sbcs	r3, r3, r5\n\t"
6398         "sbcs	r4, r4, r6\n\t"
6399         "stm	%[a]!, {r3, r4}\n\t"
6400         "ldm	%[a], {r3, r4}\n\t"
6401         "ldm	%[b]!, {r5, r6}\n\t"
6402         "sbcs	r3, r3, r5\n\t"
6403         "sbcs	r4, r4, r6\n\t"
6404         "stm	%[a]!, {r3, r4}\n\t"
6405         "ldm	%[a], {r3, r4}\n\t"
6406         "ldm	%[b]!, {r5, r6}\n\t"
6407         "sbcs	r3, r3, r5\n\t"
6408         "sbcs	r4, r4, r6\n\t"
6409         "stm	%[a]!, {r3, r4}\n\t"
6410         "ldm	%[a], {r3, r4}\n\t"
6411         "ldm	%[b]!, {r5, r6}\n\t"
6412         "sbcs	r3, r3, r5\n\t"
6413         "sbcs	r4, r4, r6\n\t"
6414         "stm	%[a]!, {r3, r4}\n\t"
6415         "ldm	%[a], {r3, r4}\n\t"
6416         "ldm	%[b]!, {r5, r6}\n\t"
6417         "sbcs	r3, r3, r5\n\t"
6418         "sbcs	r4, r4, r6\n\t"
6419         "stm	%[a]!, {r3, r4}\n\t"
6420         "ldm	%[a], {r3, r4}\n\t"
6421         "ldm	%[b]!, {r5, r6}\n\t"
6422         "sbcs	r3, r3, r5\n\t"
6423         "sbcs	r4, r4, r6\n\t"
6424         "stm	%[a]!, {r3, r4}\n\t"
6425         "ldm	%[a], {r3, r4}\n\t"
6426         "ldm	%[b]!, {r5, r6}\n\t"
6427         "sbcs	r3, r3, r5\n\t"
6428         "sbcs	r4, r4, r6\n\t"
6429         "stm	%[a]!, {r3, r4}\n\t"
6430         "ldm	%[a], {r3, r4}\n\t"
6431         "ldm	%[b]!, {r5, r6}\n\t"
6432         "sbcs	r3, r3, r5\n\t"
6433         "sbcs	r4, r4, r6\n\t"
6434         "stm	%[a]!, {r3, r4}\n\t"
6435         "ldm	%[a], {r3, r4}\n\t"
6436         "ldm	%[b]!, {r5, r6}\n\t"
6437         "sbcs	r3, r3, r5\n\t"
6438         "sbcs	r4, r4, r6\n\t"
6439         "stm	%[a]!, {r3, r4}\n\t"
6440         "ldm	%[a], {r3, r4}\n\t"
6441         "ldm	%[b]!, {r5, r6}\n\t"
6442         "sbcs	r3, r3, r5\n\t"
6443         "sbcs	r4, r4, r6\n\t"
6444         "stm	%[a]!, {r3, r4}\n\t"
6445         "ldm	%[a], {r3, r4}\n\t"
6446         "ldm	%[b]!, {r5, r6}\n\t"
6447         "sbcs	r3, r3, r5\n\t"
6448         "sbcs	r4, r4, r6\n\t"
6449         "stm	%[a]!, {r3, r4}\n\t"
6450         "ldm	%[a], {r3, r4}\n\t"
6451         "ldm	%[b]!, {r5, r6}\n\t"
6452         "sbcs	r3, r3, r5\n\t"
6453         "sbcs	r4, r4, r6\n\t"
6454         "stm	%[a]!, {r3, r4}\n\t"
6455         "ldm	%[a], {r3, r4}\n\t"
6456         "ldm	%[b]!, {r5, r6}\n\t"
6457         "sbcs	r3, r3, r5\n\t"
6458         "sbcs	r4, r4, r6\n\t"
6459         "stm	%[a]!, {r3, r4}\n\t"
6460         "ldm	%[a], {r3, r4}\n\t"
6461         "ldm	%[b]!, {r5, r6}\n\t"
6462         "sbcs	r3, r3, r5\n\t"
6463         "sbcs	r4, r4, r6\n\t"
6464         "stm	%[a]!, {r3, r4}\n\t"
6465         "ldm	%[a], {r3, r4}\n\t"
6466         "ldm	%[b]!, {r5, r6}\n\t"
6467         "sbcs	r3, r3, r5\n\t"
6468         "sbcs	r4, r4, r6\n\t"
6469         "stm	%[a]!, {r3, r4}\n\t"
6470         "ldm	%[a], {r3, r4}\n\t"
6471         "ldm	%[b]!, {r5, r6}\n\t"
6472         "sbcs	r3, r3, r5\n\t"
6473         "sbcs	r4, r4, r6\n\t"
6474         "stm	%[a]!, {r3, r4}\n\t"
6475         "ldm	%[a], {r3, r4}\n\t"
6476         "ldm	%[b]!, {r5, r6}\n\t"
6477         "sbcs	r3, r3, r5\n\t"
6478         "sbcs	r4, r4, r6\n\t"
6479         "stm	%[a]!, {r3, r4}\n\t"
6480         "ldm	%[a], {r3, r4}\n\t"
6481         "ldm	%[b]!, {r5, r6}\n\t"
6482         "sbcs	r3, r3, r5\n\t"
6483         "sbcs	r4, r4, r6\n\t"
6484         "stm	%[a]!, {r3, r4}\n\t"
6485         "ldm	%[a], {r3, r4}\n\t"
6486         "ldm	%[b]!, {r5, r6}\n\t"
6487         "sbcs	r3, r3, r5\n\t"
6488         "sbcs	r4, r4, r6\n\t"
6489         "stm	%[a]!, {r3, r4}\n\t"
6490         "ldm	%[a], {r3, r4}\n\t"
6491         "ldm	%[b]!, {r5, r6}\n\t"
6492         "sbcs	r3, r3, r5\n\t"
6493         "sbcs	r4, r4, r6\n\t"
6494         "stm	%[a]!, {r3, r4}\n\t"
6495         "ldm	%[a], {r3, r4}\n\t"
6496         "ldm	%[b]!, {r5, r6}\n\t"
6497         "sbcs	r3, r3, r5\n\t"
6498         "sbcs	r4, r4, r6\n\t"
6499         "stm	%[a]!, {r3, r4}\n\t"
6500         "ldm	%[a], {r3, r4}\n\t"
6501         "ldm	%[b]!, {r5, r6}\n\t"
6502         "sbcs	r3, r3, r5\n\t"
6503         "sbcs	r4, r4, r6\n\t"
6504         "stm	%[a]!, {r3, r4}\n\t"
6505         "ldm	%[a], {r3, r4}\n\t"
6506         "ldm	%[b]!, {r5, r6}\n\t"
6507         "sbcs	r3, r3, r5\n\t"
6508         "sbcs	r4, r4, r6\n\t"
6509         "stm	%[a]!, {r3, r4}\n\t"
6510         "sbc	%[c], %[c], %[c]\n\t"
6511         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
6512         :
6513         : "memory", "r3", "r4", "r5", "r6"
6514     );
6515 
6516     return c;
6517 }
6518 
6519 /* Add b to a into r. (r = a + b)
6520  *
6521  * r  A single precision integer.
6522  * a  A single precision integer.
6523  * b  A single precision integer.
6524  */
sp_3072_add_48(sp_digit * r,const sp_digit * a,const sp_digit * b)6525 SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
6526         const sp_digit* b)
6527 {
6528     sp_digit c = 0;
6529 
6530     __asm__ __volatile__ (
6531         "ldm	%[a]!, {r4, r5}\n\t"
6532         "ldm	%[b]!, {r6, r8}\n\t"
6533         "adds	r4, r4, r6\n\t"
6534         "adcs	r5, r5, r8\n\t"
6535         "stm	%[r]!, {r4, r5}\n\t"
6536         "ldm	%[a]!, {r4, r5}\n\t"
6537         "ldm	%[b]!, {r6, r8}\n\t"
6538         "adcs	r4, r4, r6\n\t"
6539         "adcs	r5, r5, r8\n\t"
6540         "stm	%[r]!, {r4, r5}\n\t"
6541         "ldm	%[a]!, {r4, r5}\n\t"
6542         "ldm	%[b]!, {r6, r8}\n\t"
6543         "adcs	r4, r4, r6\n\t"
6544         "adcs	r5, r5, r8\n\t"
6545         "stm	%[r]!, {r4, r5}\n\t"
6546         "ldm	%[a]!, {r4, r5}\n\t"
6547         "ldm	%[b]!, {r6, r8}\n\t"
6548         "adcs	r4, r4, r6\n\t"
6549         "adcs	r5, r5, r8\n\t"
6550         "stm	%[r]!, {r4, r5}\n\t"
6551         "ldm	%[a]!, {r4, r5}\n\t"
6552         "ldm	%[b]!, {r6, r8}\n\t"
6553         "adcs	r4, r4, r6\n\t"
6554         "adcs	r5, r5, r8\n\t"
6555         "stm	%[r]!, {r4, r5}\n\t"
6556         "ldm	%[a]!, {r4, r5}\n\t"
6557         "ldm	%[b]!, {r6, r8}\n\t"
6558         "adcs	r4, r4, r6\n\t"
6559         "adcs	r5, r5, r8\n\t"
6560         "stm	%[r]!, {r4, r5}\n\t"
6561         "ldm	%[a]!, {r4, r5}\n\t"
6562         "ldm	%[b]!, {r6, r8}\n\t"
6563         "adcs	r4, r4, r6\n\t"
6564         "adcs	r5, r5, r8\n\t"
6565         "stm	%[r]!, {r4, r5}\n\t"
6566         "ldm	%[a]!, {r4, r5}\n\t"
6567         "ldm	%[b]!, {r6, r8}\n\t"
6568         "adcs	r4, r4, r6\n\t"
6569         "adcs	r5, r5, r8\n\t"
6570         "stm	%[r]!, {r4, r5}\n\t"
6571         "ldm	%[a]!, {r4, r5}\n\t"
6572         "ldm	%[b]!, {r6, r8}\n\t"
6573         "adcs	r4, r4, r6\n\t"
6574         "adcs	r5, r5, r8\n\t"
6575         "stm	%[r]!, {r4, r5}\n\t"
6576         "ldm	%[a]!, {r4, r5}\n\t"
6577         "ldm	%[b]!, {r6, r8}\n\t"
6578         "adcs	r4, r4, r6\n\t"
6579         "adcs	r5, r5, r8\n\t"
6580         "stm	%[r]!, {r4, r5}\n\t"
6581         "ldm	%[a]!, {r4, r5}\n\t"
6582         "ldm	%[b]!, {r6, r8}\n\t"
6583         "adcs	r4, r4, r6\n\t"
6584         "adcs	r5, r5, r8\n\t"
6585         "stm	%[r]!, {r4, r5}\n\t"
6586         "ldm	%[a]!, {r4, r5}\n\t"
6587         "ldm	%[b]!, {r6, r8}\n\t"
6588         "adcs	r4, r4, r6\n\t"
6589         "adcs	r5, r5, r8\n\t"
6590         "stm	%[r]!, {r4, r5}\n\t"
6591         "ldm	%[a]!, {r4, r5}\n\t"
6592         "ldm	%[b]!, {r6, r8}\n\t"
6593         "adcs	r4, r4, r6\n\t"
6594         "adcs	r5, r5, r8\n\t"
6595         "stm	%[r]!, {r4, r5}\n\t"
6596         "ldm	%[a]!, {r4, r5}\n\t"
6597         "ldm	%[b]!, {r6, r8}\n\t"
6598         "adcs	r4, r4, r6\n\t"
6599         "adcs	r5, r5, r8\n\t"
6600         "stm	%[r]!, {r4, r5}\n\t"
6601         "ldm	%[a]!, {r4, r5}\n\t"
6602         "ldm	%[b]!, {r6, r8}\n\t"
6603         "adcs	r4, r4, r6\n\t"
6604         "adcs	r5, r5, r8\n\t"
6605         "stm	%[r]!, {r4, r5}\n\t"
6606         "ldm	%[a]!, {r4, r5}\n\t"
6607         "ldm	%[b]!, {r6, r8}\n\t"
6608         "adcs	r4, r4, r6\n\t"
6609         "adcs	r5, r5, r8\n\t"
6610         "stm	%[r]!, {r4, r5}\n\t"
6611         "ldm	%[a]!, {r4, r5}\n\t"
6612         "ldm	%[b]!, {r6, r8}\n\t"
6613         "adcs	r4, r4, r6\n\t"
6614         "adcs	r5, r5, r8\n\t"
6615         "stm	%[r]!, {r4, r5}\n\t"
6616         "ldm	%[a]!, {r4, r5}\n\t"
6617         "ldm	%[b]!, {r6, r8}\n\t"
6618         "adcs	r4, r4, r6\n\t"
6619         "adcs	r5, r5, r8\n\t"
6620         "stm	%[r]!, {r4, r5}\n\t"
6621         "ldm	%[a]!, {r4, r5}\n\t"
6622         "ldm	%[b]!, {r6, r8}\n\t"
6623         "adcs	r4, r4, r6\n\t"
6624         "adcs	r5, r5, r8\n\t"
6625         "stm	%[r]!, {r4, r5}\n\t"
6626         "ldm	%[a]!, {r4, r5}\n\t"
6627         "ldm	%[b]!, {r6, r8}\n\t"
6628         "adcs	r4, r4, r6\n\t"
6629         "adcs	r5, r5, r8\n\t"
6630         "stm	%[r]!, {r4, r5}\n\t"
6631         "ldm	%[a]!, {r4, r5}\n\t"
6632         "ldm	%[b]!, {r6, r8}\n\t"
6633         "adcs	r4, r4, r6\n\t"
6634         "adcs	r5, r5, r8\n\t"
6635         "stm	%[r]!, {r4, r5}\n\t"
6636         "ldm	%[a]!, {r4, r5}\n\t"
6637         "ldm	%[b]!, {r6, r8}\n\t"
6638         "adcs	r4, r4, r6\n\t"
6639         "adcs	r5, r5, r8\n\t"
6640         "stm	%[r]!, {r4, r5}\n\t"
6641         "ldm	%[a]!, {r4, r5}\n\t"
6642         "ldm	%[b]!, {r6, r8}\n\t"
6643         "adcs	r4, r4, r6\n\t"
6644         "adcs	r5, r5, r8\n\t"
6645         "stm	%[r]!, {r4, r5}\n\t"
6646         "ldm	%[a]!, {r4, r5}\n\t"
6647         "ldm	%[b]!, {r6, r8}\n\t"
6648         "adcs	r4, r4, r6\n\t"
6649         "adcs	r5, r5, r8\n\t"
6650         "stm	%[r]!, {r4, r5}\n\t"
6651         "mov	%[c], #0\n\t"
6652         "adc	%[c], %[c], %[c]\n\t"
6653         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
6654         :
6655         : "memory", "r4", "r5", "r6", "r8"
6656     );
6657 
6658     return c;
6659 }
6660 
6661 /* AND m into each word of a and store in r.
6662  *
6663  * r  A single precision integer.
6664  * a  A single precision integer.
6665  * m  Mask to AND against each digit.
6666  */
sp_3072_mask_24(sp_digit * r,const sp_digit * a,sp_digit m)6667 static void sp_3072_mask_24(sp_digit* r, const sp_digit* a, sp_digit m)
6668 {
6669 #ifdef WOLFSSL_SP_SMALL
6670     int i;
6671 
6672     for (i=0; i<24; i++) {
6673         r[i] = a[i] & m;
6674     }
6675 #else
6676     int i;
6677 
6678     for (i = 0; i < 24; i += 8) {
6679         r[i+0] = a[i+0] & m;
6680         r[i+1] = a[i+1] & m;
6681         r[i+2] = a[i+2] & m;
6682         r[i+3] = a[i+3] & m;
6683         r[i+4] = a[i+4] & m;
6684         r[i+5] = a[i+5] & m;
6685         r[i+6] = a[i+6] & m;
6686         r[i+7] = a[i+7] & m;
6687     }
6688 #endif
6689 }
6690 
6691 /* Multiply a and b into r. (r = a * b)
6692  *
6693  * r  A single precision integer.
6694  * a  A single precision integer.
6695  * b  A single precision integer.
6696  */
sp_3072_mul_48(sp_digit * r,const sp_digit * a,const sp_digit * b)6697 SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
6698         const sp_digit* b)
6699 {
6700     sp_digit* z0 = r;
6701     sp_digit z1[48];
6702     sp_digit a1[24];
6703     sp_digit b1[24];
6704     sp_digit z2[48];
6705     sp_digit u;
6706     sp_digit ca;
6707     sp_digit cb;
6708 
6709     ca = sp_3072_add_24(a1, a, &a[24]);
6710     cb = sp_3072_add_24(b1, b, &b[24]);
6711     u  = ca & cb;
6712     sp_3072_mul_24(z1, a1, b1);
6713     sp_3072_mul_24(z2, &a[24], &b[24]);
6714     sp_3072_mul_24(z0, a, b);
6715     sp_3072_mask_24(r + 48, a1, 0 - cb);
6716     sp_3072_mask_24(b1, b1, 0 - ca);
6717     u += sp_3072_add_24(r + 48, r + 48, b1);
6718     u += sp_3072_sub_in_place_48(z1, z2);
6719     u += sp_3072_sub_in_place_48(z1, z0);
6720     u += sp_3072_add_48(r + 24, r + 24, z1);
6721     r[72] = u;
6722     XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
6723     (void)sp_3072_add_48(r + 48, r + 48, z2);
6724 }
6725 
6726 /* Square a and put result in r. (r = a * a)
6727  *
6728  * r  A single precision integer.
6729  * a  A single precision integer.
6730  */
sp_3072_sqr_48(sp_digit * r,const sp_digit * a)6731 SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
6732 {
6733     sp_digit* z0 = r;
6734     sp_digit z2[48];
6735     sp_digit z1[48];
6736     sp_digit a1[24];
6737     sp_digit u;
6738 
6739     u = sp_3072_add_24(a1, a, &a[24]);
6740     sp_3072_sqr_24(z1, a1);
6741     sp_3072_sqr_24(z2, &a[24]);
6742     sp_3072_sqr_24(z0, a);
6743     sp_3072_mask_24(r + 48, a1, 0 - u);
6744     u += sp_3072_add_24(r + 48, r + 48, r + 48);
6745     u += sp_3072_sub_in_place_48(z1, z2);
6746     u += sp_3072_sub_in_place_48(z1, z0);
6747     u += sp_3072_add_48(r + 24, r + 24, z1);
6748     r[72] = u;
6749     XMEMSET(r + 72 + 1, 0, sizeof(sp_digit) * (24 - 1));
6750     (void)sp_3072_add_48(r + 48, r + 48, z2);
6751 }
6752 
6753 /* Sub b from a into r. (r = a - b)
6754  *
6755  * r  A single precision integer.
6756  * a  A single precision integer.
6757  * b  A single precision integer.
6758  */
sp_3072_sub_in_place_96(sp_digit * a,const sp_digit * b)6759 SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
6760         const sp_digit* b)
6761 {
6762     sp_digit c = 0;
6763 
6764     __asm__ __volatile__ (
6765         "ldm	%[a], {r3, r4}\n\t"
6766         "ldm	%[b]!, {r5, r6}\n\t"
6767         "subs	r3, r3, r5\n\t"
6768         "sbcs	r4, r4, r6\n\t"
6769         "stm	%[a]!, {r3, r4}\n\t"
6770         "ldm	%[a], {r3, r4}\n\t"
6771         "ldm	%[b]!, {r5, r6}\n\t"
6772         "sbcs	r3, r3, r5\n\t"
6773         "sbcs	r4, r4, r6\n\t"
6774         "stm	%[a]!, {r3, r4}\n\t"
6775         "ldm	%[a], {r3, r4}\n\t"
6776         "ldm	%[b]!, {r5, r6}\n\t"
6777         "sbcs	r3, r3, r5\n\t"
6778         "sbcs	r4, r4, r6\n\t"
6779         "stm	%[a]!, {r3, r4}\n\t"
6780         "ldm	%[a], {r3, r4}\n\t"
6781         "ldm	%[b]!, {r5, r6}\n\t"
6782         "sbcs	r3, r3, r5\n\t"
6783         "sbcs	r4, r4, r6\n\t"
6784         "stm	%[a]!, {r3, r4}\n\t"
6785         "ldm	%[a], {r3, r4}\n\t"
6786         "ldm	%[b]!, {r5, r6}\n\t"
6787         "sbcs	r3, r3, r5\n\t"
6788         "sbcs	r4, r4, r6\n\t"
6789         "stm	%[a]!, {r3, r4}\n\t"
6790         "ldm	%[a], {r3, r4}\n\t"
6791         "ldm	%[b]!, {r5, r6}\n\t"
6792         "sbcs	r3, r3, r5\n\t"
6793         "sbcs	r4, r4, r6\n\t"
6794         "stm	%[a]!, {r3, r4}\n\t"
6795         "ldm	%[a], {r3, r4}\n\t"
6796         "ldm	%[b]!, {r5, r6}\n\t"
6797         "sbcs	r3, r3, r5\n\t"
6798         "sbcs	r4, r4, r6\n\t"
6799         "stm	%[a]!, {r3, r4}\n\t"
6800         "ldm	%[a], {r3, r4}\n\t"
6801         "ldm	%[b]!, {r5, r6}\n\t"
6802         "sbcs	r3, r3, r5\n\t"
6803         "sbcs	r4, r4, r6\n\t"
6804         "stm	%[a]!, {r3, r4}\n\t"
6805         "ldm	%[a], {r3, r4}\n\t"
6806         "ldm	%[b]!, {r5, r6}\n\t"
6807         "sbcs	r3, r3, r5\n\t"
6808         "sbcs	r4, r4, r6\n\t"
6809         "stm	%[a]!, {r3, r4}\n\t"
6810         "ldm	%[a], {r3, r4}\n\t"
6811         "ldm	%[b]!, {r5, r6}\n\t"
6812         "sbcs	r3, r3, r5\n\t"
6813         "sbcs	r4, r4, r6\n\t"
6814         "stm	%[a]!, {r3, r4}\n\t"
6815         "ldm	%[a], {r3, r4}\n\t"
6816         "ldm	%[b]!, {r5, r6}\n\t"
6817         "sbcs	r3, r3, r5\n\t"
6818         "sbcs	r4, r4, r6\n\t"
6819         "stm	%[a]!, {r3, r4}\n\t"
6820         "ldm	%[a], {r3, r4}\n\t"
6821         "ldm	%[b]!, {r5, r6}\n\t"
6822         "sbcs	r3, r3, r5\n\t"
6823         "sbcs	r4, r4, r6\n\t"
6824         "stm	%[a]!, {r3, r4}\n\t"
6825         "ldm	%[a], {r3, r4}\n\t"
6826         "ldm	%[b]!, {r5, r6}\n\t"
6827         "sbcs	r3, r3, r5\n\t"
6828         "sbcs	r4, r4, r6\n\t"
6829         "stm	%[a]!, {r3, r4}\n\t"
6830         "ldm	%[a], {r3, r4}\n\t"
6831         "ldm	%[b]!, {r5, r6}\n\t"
6832         "sbcs	r3, r3, r5\n\t"
6833         "sbcs	r4, r4, r6\n\t"
6834         "stm	%[a]!, {r3, r4}\n\t"
6835         "ldm	%[a], {r3, r4}\n\t"
6836         "ldm	%[b]!, {r5, r6}\n\t"
6837         "sbcs	r3, r3, r5\n\t"
6838         "sbcs	r4, r4, r6\n\t"
6839         "stm	%[a]!, {r3, r4}\n\t"
6840         "ldm	%[a], {r3, r4}\n\t"
6841         "ldm	%[b]!, {r5, r6}\n\t"
6842         "sbcs	r3, r3, r5\n\t"
6843         "sbcs	r4, r4, r6\n\t"
6844         "stm	%[a]!, {r3, r4}\n\t"
6845         "ldm	%[a], {r3, r4}\n\t"
6846         "ldm	%[b]!, {r5, r6}\n\t"
6847         "sbcs	r3, r3, r5\n\t"
6848         "sbcs	r4, r4, r6\n\t"
6849         "stm	%[a]!, {r3, r4}\n\t"
6850         "ldm	%[a], {r3, r4}\n\t"
6851         "ldm	%[b]!, {r5, r6}\n\t"
6852         "sbcs	r3, r3, r5\n\t"
6853         "sbcs	r4, r4, r6\n\t"
6854         "stm	%[a]!, {r3, r4}\n\t"
6855         "ldm	%[a], {r3, r4}\n\t"
6856         "ldm	%[b]!, {r5, r6}\n\t"
6857         "sbcs	r3, r3, r5\n\t"
6858         "sbcs	r4, r4, r6\n\t"
6859         "stm	%[a]!, {r3, r4}\n\t"
6860         "ldm	%[a], {r3, r4}\n\t"
6861         "ldm	%[b]!, {r5, r6}\n\t"
6862         "sbcs	r3, r3, r5\n\t"
6863         "sbcs	r4, r4, r6\n\t"
6864         "stm	%[a]!, {r3, r4}\n\t"
6865         "ldm	%[a], {r3, r4}\n\t"
6866         "ldm	%[b]!, {r5, r6}\n\t"
6867         "sbcs	r3, r3, r5\n\t"
6868         "sbcs	r4, r4, r6\n\t"
6869         "stm	%[a]!, {r3, r4}\n\t"
6870         "ldm	%[a], {r3, r4}\n\t"
6871         "ldm	%[b]!, {r5, r6}\n\t"
6872         "sbcs	r3, r3, r5\n\t"
6873         "sbcs	r4, r4, r6\n\t"
6874         "stm	%[a]!, {r3, r4}\n\t"
6875         "ldm	%[a], {r3, r4}\n\t"
6876         "ldm	%[b]!, {r5, r6}\n\t"
6877         "sbcs	r3, r3, r5\n\t"
6878         "sbcs	r4, r4, r6\n\t"
6879         "stm	%[a]!, {r3, r4}\n\t"
6880         "ldm	%[a], {r3, r4}\n\t"
6881         "ldm	%[b]!, {r5, r6}\n\t"
6882         "sbcs	r3, r3, r5\n\t"
6883         "sbcs	r4, r4, r6\n\t"
6884         "stm	%[a]!, {r3, r4}\n\t"
6885         "ldm	%[a], {r3, r4}\n\t"
6886         "ldm	%[b]!, {r5, r6}\n\t"
6887         "sbcs	r3, r3, r5\n\t"
6888         "sbcs	r4, r4, r6\n\t"
6889         "stm	%[a]!, {r3, r4}\n\t"
6890         "ldm	%[a], {r3, r4}\n\t"
6891         "ldm	%[b]!, {r5, r6}\n\t"
6892         "sbcs	r3, r3, r5\n\t"
6893         "sbcs	r4, r4, r6\n\t"
6894         "stm	%[a]!, {r3, r4}\n\t"
6895         "ldm	%[a], {r3, r4}\n\t"
6896         "ldm	%[b]!, {r5, r6}\n\t"
6897         "sbcs	r3, r3, r5\n\t"
6898         "sbcs	r4, r4, r6\n\t"
6899         "stm	%[a]!, {r3, r4}\n\t"
6900         "ldm	%[a], {r3, r4}\n\t"
6901         "ldm	%[b]!, {r5, r6}\n\t"
6902         "sbcs	r3, r3, r5\n\t"
6903         "sbcs	r4, r4, r6\n\t"
6904         "stm	%[a]!, {r3, r4}\n\t"
6905         "ldm	%[a], {r3, r4}\n\t"
6906         "ldm	%[b]!, {r5, r6}\n\t"
6907         "sbcs	r3, r3, r5\n\t"
6908         "sbcs	r4, r4, r6\n\t"
6909         "stm	%[a]!, {r3, r4}\n\t"
6910         "ldm	%[a], {r3, r4}\n\t"
6911         "ldm	%[b]!, {r5, r6}\n\t"
6912         "sbcs	r3, r3, r5\n\t"
6913         "sbcs	r4, r4, r6\n\t"
6914         "stm	%[a]!, {r3, r4}\n\t"
6915         "ldm	%[a], {r3, r4}\n\t"
6916         "ldm	%[b]!, {r5, r6}\n\t"
6917         "sbcs	r3, r3, r5\n\t"
6918         "sbcs	r4, r4, r6\n\t"
6919         "stm	%[a]!, {r3, r4}\n\t"
6920         "ldm	%[a], {r3, r4}\n\t"
6921         "ldm	%[b]!, {r5, r6}\n\t"
6922         "sbcs	r3, r3, r5\n\t"
6923         "sbcs	r4, r4, r6\n\t"
6924         "stm	%[a]!, {r3, r4}\n\t"
6925         "ldm	%[a], {r3, r4}\n\t"
6926         "ldm	%[b]!, {r5, r6}\n\t"
6927         "sbcs	r3, r3, r5\n\t"
6928         "sbcs	r4, r4, r6\n\t"
6929         "stm	%[a]!, {r3, r4}\n\t"
6930         "ldm	%[a], {r3, r4}\n\t"
6931         "ldm	%[b]!, {r5, r6}\n\t"
6932         "sbcs	r3, r3, r5\n\t"
6933         "sbcs	r4, r4, r6\n\t"
6934         "stm	%[a]!, {r3, r4}\n\t"
6935         "ldm	%[a], {r3, r4}\n\t"
6936         "ldm	%[b]!, {r5, r6}\n\t"
6937         "sbcs	r3, r3, r5\n\t"
6938         "sbcs	r4, r4, r6\n\t"
6939         "stm	%[a]!, {r3, r4}\n\t"
6940         "ldm	%[a], {r3, r4}\n\t"
6941         "ldm	%[b]!, {r5, r6}\n\t"
6942         "sbcs	r3, r3, r5\n\t"
6943         "sbcs	r4, r4, r6\n\t"
6944         "stm	%[a]!, {r3, r4}\n\t"
6945         "ldm	%[a], {r3, r4}\n\t"
6946         "ldm	%[b]!, {r5, r6}\n\t"
6947         "sbcs	r3, r3, r5\n\t"
6948         "sbcs	r4, r4, r6\n\t"
6949         "stm	%[a]!, {r3, r4}\n\t"
6950         "ldm	%[a], {r3, r4}\n\t"
6951         "ldm	%[b]!, {r5, r6}\n\t"
6952         "sbcs	r3, r3, r5\n\t"
6953         "sbcs	r4, r4, r6\n\t"
6954         "stm	%[a]!, {r3, r4}\n\t"
6955         "ldm	%[a], {r3, r4}\n\t"
6956         "ldm	%[b]!, {r5, r6}\n\t"
6957         "sbcs	r3, r3, r5\n\t"
6958         "sbcs	r4, r4, r6\n\t"
6959         "stm	%[a]!, {r3, r4}\n\t"
6960         "ldm	%[a], {r3, r4}\n\t"
6961         "ldm	%[b]!, {r5, r6}\n\t"
6962         "sbcs	r3, r3, r5\n\t"
6963         "sbcs	r4, r4, r6\n\t"
6964         "stm	%[a]!, {r3, r4}\n\t"
6965         "ldm	%[a], {r3, r4}\n\t"
6966         "ldm	%[b]!, {r5, r6}\n\t"
6967         "sbcs	r3, r3, r5\n\t"
6968         "sbcs	r4, r4, r6\n\t"
6969         "stm	%[a]!, {r3, r4}\n\t"
6970         "ldm	%[a], {r3, r4}\n\t"
6971         "ldm	%[b]!, {r5, r6}\n\t"
6972         "sbcs	r3, r3, r5\n\t"
6973         "sbcs	r4, r4, r6\n\t"
6974         "stm	%[a]!, {r3, r4}\n\t"
6975         "ldm	%[a], {r3, r4}\n\t"
6976         "ldm	%[b]!, {r5, r6}\n\t"
6977         "sbcs	r3, r3, r5\n\t"
6978         "sbcs	r4, r4, r6\n\t"
6979         "stm	%[a]!, {r3, r4}\n\t"
6980         "ldm	%[a], {r3, r4}\n\t"
6981         "ldm	%[b]!, {r5, r6}\n\t"
6982         "sbcs	r3, r3, r5\n\t"
6983         "sbcs	r4, r4, r6\n\t"
6984         "stm	%[a]!, {r3, r4}\n\t"
6985         "ldm	%[a], {r3, r4}\n\t"
6986         "ldm	%[b]!, {r5, r6}\n\t"
6987         "sbcs	r3, r3, r5\n\t"
6988         "sbcs	r4, r4, r6\n\t"
6989         "stm	%[a]!, {r3, r4}\n\t"
6990         "ldm	%[a], {r3, r4}\n\t"
6991         "ldm	%[b]!, {r5, r6}\n\t"
6992         "sbcs	r3, r3, r5\n\t"
6993         "sbcs	r4, r4, r6\n\t"
6994         "stm	%[a]!, {r3, r4}\n\t"
6995         "ldm	%[a], {r3, r4}\n\t"
6996         "ldm	%[b]!, {r5, r6}\n\t"
6997         "sbcs	r3, r3, r5\n\t"
6998         "sbcs	r4, r4, r6\n\t"
6999         "stm	%[a]!, {r3, r4}\n\t"
7000         "ldm	%[a], {r3, r4}\n\t"
7001         "ldm	%[b]!, {r5, r6}\n\t"
7002         "sbcs	r3, r3, r5\n\t"
7003         "sbcs	r4, r4, r6\n\t"
7004         "stm	%[a]!, {r3, r4}\n\t"
7005         "sbc	%[c], %[c], %[c]\n\t"
7006         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
7007         :
7008         : "memory", "r3", "r4", "r5", "r6"
7009     );
7010 
7011     return c;
7012 }
7013 
7014 /* Add b to a into r. (r = a + b)
7015  *
7016  * r  A single precision integer.
7017  * a  A single precision integer.
7018  * b  A single precision integer.
7019  */
sp_3072_add_96(sp_digit * r,const sp_digit * a,const sp_digit * b)7020 SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
7021         const sp_digit* b)
7022 {
7023     sp_digit c = 0;
7024 
7025     __asm__ __volatile__ (
7026         "ldm	%[a]!, {r4, r5}\n\t"
7027         "ldm	%[b]!, {r6, r8}\n\t"
7028         "adds	r4, r4, r6\n\t"
7029         "adcs	r5, r5, r8\n\t"
7030         "stm	%[r]!, {r4, r5}\n\t"
7031         "ldm	%[a]!, {r4, r5}\n\t"
7032         "ldm	%[b]!, {r6, r8}\n\t"
7033         "adcs	r4, r4, r6\n\t"
7034         "adcs	r5, r5, r8\n\t"
7035         "stm	%[r]!, {r4, r5}\n\t"
7036         "ldm	%[a]!, {r4, r5}\n\t"
7037         "ldm	%[b]!, {r6, r8}\n\t"
7038         "adcs	r4, r4, r6\n\t"
7039         "adcs	r5, r5, r8\n\t"
7040         "stm	%[r]!, {r4, r5}\n\t"
7041         "ldm	%[a]!, {r4, r5}\n\t"
7042         "ldm	%[b]!, {r6, r8}\n\t"
7043         "adcs	r4, r4, r6\n\t"
7044         "adcs	r5, r5, r8\n\t"
7045         "stm	%[r]!, {r4, r5}\n\t"
7046         "ldm	%[a]!, {r4, r5}\n\t"
7047         "ldm	%[b]!, {r6, r8}\n\t"
7048         "adcs	r4, r4, r6\n\t"
7049         "adcs	r5, r5, r8\n\t"
7050         "stm	%[r]!, {r4, r5}\n\t"
7051         "ldm	%[a]!, {r4, r5}\n\t"
7052         "ldm	%[b]!, {r6, r8}\n\t"
7053         "adcs	r4, r4, r6\n\t"
7054         "adcs	r5, r5, r8\n\t"
7055         "stm	%[r]!, {r4, r5}\n\t"
7056         "ldm	%[a]!, {r4, r5}\n\t"
7057         "ldm	%[b]!, {r6, r8}\n\t"
7058         "adcs	r4, r4, r6\n\t"
7059         "adcs	r5, r5, r8\n\t"
7060         "stm	%[r]!, {r4, r5}\n\t"
7061         "ldm	%[a]!, {r4, r5}\n\t"
7062         "ldm	%[b]!, {r6, r8}\n\t"
7063         "adcs	r4, r4, r6\n\t"
7064         "adcs	r5, r5, r8\n\t"
7065         "stm	%[r]!, {r4, r5}\n\t"
7066         "ldm	%[a]!, {r4, r5}\n\t"
7067         "ldm	%[b]!, {r6, r8}\n\t"
7068         "adcs	r4, r4, r6\n\t"
7069         "adcs	r5, r5, r8\n\t"
7070         "stm	%[r]!, {r4, r5}\n\t"
7071         "ldm	%[a]!, {r4, r5}\n\t"
7072         "ldm	%[b]!, {r6, r8}\n\t"
7073         "adcs	r4, r4, r6\n\t"
7074         "adcs	r5, r5, r8\n\t"
7075         "stm	%[r]!, {r4, r5}\n\t"
7076         "ldm	%[a]!, {r4, r5}\n\t"
7077         "ldm	%[b]!, {r6, r8}\n\t"
7078         "adcs	r4, r4, r6\n\t"
7079         "adcs	r5, r5, r8\n\t"
7080         "stm	%[r]!, {r4, r5}\n\t"
7081         "ldm	%[a]!, {r4, r5}\n\t"
7082         "ldm	%[b]!, {r6, r8}\n\t"
7083         "adcs	r4, r4, r6\n\t"
7084         "adcs	r5, r5, r8\n\t"
7085         "stm	%[r]!, {r4, r5}\n\t"
7086         "ldm	%[a]!, {r4, r5}\n\t"
7087         "ldm	%[b]!, {r6, r8}\n\t"
7088         "adcs	r4, r4, r6\n\t"
7089         "adcs	r5, r5, r8\n\t"
7090         "stm	%[r]!, {r4, r5}\n\t"
7091         "ldm	%[a]!, {r4, r5}\n\t"
7092         "ldm	%[b]!, {r6, r8}\n\t"
7093         "adcs	r4, r4, r6\n\t"
7094         "adcs	r5, r5, r8\n\t"
7095         "stm	%[r]!, {r4, r5}\n\t"
7096         "ldm	%[a]!, {r4, r5}\n\t"
7097         "ldm	%[b]!, {r6, r8}\n\t"
7098         "adcs	r4, r4, r6\n\t"
7099         "adcs	r5, r5, r8\n\t"
7100         "stm	%[r]!, {r4, r5}\n\t"
7101         "ldm	%[a]!, {r4, r5}\n\t"
7102         "ldm	%[b]!, {r6, r8}\n\t"
7103         "adcs	r4, r4, r6\n\t"
7104         "adcs	r5, r5, r8\n\t"
7105         "stm	%[r]!, {r4, r5}\n\t"
7106         "ldm	%[a]!, {r4, r5}\n\t"
7107         "ldm	%[b]!, {r6, r8}\n\t"
7108         "adcs	r4, r4, r6\n\t"
7109         "adcs	r5, r5, r8\n\t"
7110         "stm	%[r]!, {r4, r5}\n\t"
7111         "ldm	%[a]!, {r4, r5}\n\t"
7112         "ldm	%[b]!, {r6, r8}\n\t"
7113         "adcs	r4, r4, r6\n\t"
7114         "adcs	r5, r5, r8\n\t"
7115         "stm	%[r]!, {r4, r5}\n\t"
7116         "ldm	%[a]!, {r4, r5}\n\t"
7117         "ldm	%[b]!, {r6, r8}\n\t"
7118         "adcs	r4, r4, r6\n\t"
7119         "adcs	r5, r5, r8\n\t"
7120         "stm	%[r]!, {r4, r5}\n\t"
7121         "ldm	%[a]!, {r4, r5}\n\t"
7122         "ldm	%[b]!, {r6, r8}\n\t"
7123         "adcs	r4, r4, r6\n\t"
7124         "adcs	r5, r5, r8\n\t"
7125         "stm	%[r]!, {r4, r5}\n\t"
7126         "ldm	%[a]!, {r4, r5}\n\t"
7127         "ldm	%[b]!, {r6, r8}\n\t"
7128         "adcs	r4, r4, r6\n\t"
7129         "adcs	r5, r5, r8\n\t"
7130         "stm	%[r]!, {r4, r5}\n\t"
7131         "ldm	%[a]!, {r4, r5}\n\t"
7132         "ldm	%[b]!, {r6, r8}\n\t"
7133         "adcs	r4, r4, r6\n\t"
7134         "adcs	r5, r5, r8\n\t"
7135         "stm	%[r]!, {r4, r5}\n\t"
7136         "ldm	%[a]!, {r4, r5}\n\t"
7137         "ldm	%[b]!, {r6, r8}\n\t"
7138         "adcs	r4, r4, r6\n\t"
7139         "adcs	r5, r5, r8\n\t"
7140         "stm	%[r]!, {r4, r5}\n\t"
7141         "ldm	%[a]!, {r4, r5}\n\t"
7142         "ldm	%[b]!, {r6, r8}\n\t"
7143         "adcs	r4, r4, r6\n\t"
7144         "adcs	r5, r5, r8\n\t"
7145         "stm	%[r]!, {r4, r5}\n\t"
7146         "ldm	%[a]!, {r4, r5}\n\t"
7147         "ldm	%[b]!, {r6, r8}\n\t"
7148         "adcs	r4, r4, r6\n\t"
7149         "adcs	r5, r5, r8\n\t"
7150         "stm	%[r]!, {r4, r5}\n\t"
7151         "ldm	%[a]!, {r4, r5}\n\t"
7152         "ldm	%[b]!, {r6, r8}\n\t"
7153         "adcs	r4, r4, r6\n\t"
7154         "adcs	r5, r5, r8\n\t"
7155         "stm	%[r]!, {r4, r5}\n\t"
7156         "ldm	%[a]!, {r4, r5}\n\t"
7157         "ldm	%[b]!, {r6, r8}\n\t"
7158         "adcs	r4, r4, r6\n\t"
7159         "adcs	r5, r5, r8\n\t"
7160         "stm	%[r]!, {r4, r5}\n\t"
7161         "ldm	%[a]!, {r4, r5}\n\t"
7162         "ldm	%[b]!, {r6, r8}\n\t"
7163         "adcs	r4, r4, r6\n\t"
7164         "adcs	r5, r5, r8\n\t"
7165         "stm	%[r]!, {r4, r5}\n\t"
7166         "ldm	%[a]!, {r4, r5}\n\t"
7167         "ldm	%[b]!, {r6, r8}\n\t"
7168         "adcs	r4, r4, r6\n\t"
7169         "adcs	r5, r5, r8\n\t"
7170         "stm	%[r]!, {r4, r5}\n\t"
7171         "ldm	%[a]!, {r4, r5}\n\t"
7172         "ldm	%[b]!, {r6, r8}\n\t"
7173         "adcs	r4, r4, r6\n\t"
7174         "adcs	r5, r5, r8\n\t"
7175         "stm	%[r]!, {r4, r5}\n\t"
7176         "ldm	%[a]!, {r4, r5}\n\t"
7177         "ldm	%[b]!, {r6, r8}\n\t"
7178         "adcs	r4, r4, r6\n\t"
7179         "adcs	r5, r5, r8\n\t"
7180         "stm	%[r]!, {r4, r5}\n\t"
7181         "ldm	%[a]!, {r4, r5}\n\t"
7182         "ldm	%[b]!, {r6, r8}\n\t"
7183         "adcs	r4, r4, r6\n\t"
7184         "adcs	r5, r5, r8\n\t"
7185         "stm	%[r]!, {r4, r5}\n\t"
7186         "ldm	%[a]!, {r4, r5}\n\t"
7187         "ldm	%[b]!, {r6, r8}\n\t"
7188         "adcs	r4, r4, r6\n\t"
7189         "adcs	r5, r5, r8\n\t"
7190         "stm	%[r]!, {r4, r5}\n\t"
7191         "ldm	%[a]!, {r4, r5}\n\t"
7192         "ldm	%[b]!, {r6, r8}\n\t"
7193         "adcs	r4, r4, r6\n\t"
7194         "adcs	r5, r5, r8\n\t"
7195         "stm	%[r]!, {r4, r5}\n\t"
7196         "ldm	%[a]!, {r4, r5}\n\t"
7197         "ldm	%[b]!, {r6, r8}\n\t"
7198         "adcs	r4, r4, r6\n\t"
7199         "adcs	r5, r5, r8\n\t"
7200         "stm	%[r]!, {r4, r5}\n\t"
7201         "ldm	%[a]!, {r4, r5}\n\t"
7202         "ldm	%[b]!, {r6, r8}\n\t"
7203         "adcs	r4, r4, r6\n\t"
7204         "adcs	r5, r5, r8\n\t"
7205         "stm	%[r]!, {r4, r5}\n\t"
7206         "ldm	%[a]!, {r4, r5}\n\t"
7207         "ldm	%[b]!, {r6, r8}\n\t"
7208         "adcs	r4, r4, r6\n\t"
7209         "adcs	r5, r5, r8\n\t"
7210         "stm	%[r]!, {r4, r5}\n\t"
7211         "ldm	%[a]!, {r4, r5}\n\t"
7212         "ldm	%[b]!, {r6, r8}\n\t"
7213         "adcs	r4, r4, r6\n\t"
7214         "adcs	r5, r5, r8\n\t"
7215         "stm	%[r]!, {r4, r5}\n\t"
7216         "ldm	%[a]!, {r4, r5}\n\t"
7217         "ldm	%[b]!, {r6, r8}\n\t"
7218         "adcs	r4, r4, r6\n\t"
7219         "adcs	r5, r5, r8\n\t"
7220         "stm	%[r]!, {r4, r5}\n\t"
7221         "ldm	%[a]!, {r4, r5}\n\t"
7222         "ldm	%[b]!, {r6, r8}\n\t"
7223         "adcs	r4, r4, r6\n\t"
7224         "adcs	r5, r5, r8\n\t"
7225         "stm	%[r]!, {r4, r5}\n\t"
7226         "ldm	%[a]!, {r4, r5}\n\t"
7227         "ldm	%[b]!, {r6, r8}\n\t"
7228         "adcs	r4, r4, r6\n\t"
7229         "adcs	r5, r5, r8\n\t"
7230         "stm	%[r]!, {r4, r5}\n\t"
7231         "ldm	%[a]!, {r4, r5}\n\t"
7232         "ldm	%[b]!, {r6, r8}\n\t"
7233         "adcs	r4, r4, r6\n\t"
7234         "adcs	r5, r5, r8\n\t"
7235         "stm	%[r]!, {r4, r5}\n\t"
7236         "ldm	%[a]!, {r4, r5}\n\t"
7237         "ldm	%[b]!, {r6, r8}\n\t"
7238         "adcs	r4, r4, r6\n\t"
7239         "adcs	r5, r5, r8\n\t"
7240         "stm	%[r]!, {r4, r5}\n\t"
7241         "ldm	%[a]!, {r4, r5}\n\t"
7242         "ldm	%[b]!, {r6, r8}\n\t"
7243         "adcs	r4, r4, r6\n\t"
7244         "adcs	r5, r5, r8\n\t"
7245         "stm	%[r]!, {r4, r5}\n\t"
7246         "ldm	%[a]!, {r4, r5}\n\t"
7247         "ldm	%[b]!, {r6, r8}\n\t"
7248         "adcs	r4, r4, r6\n\t"
7249         "adcs	r5, r5, r8\n\t"
7250         "stm	%[r]!, {r4, r5}\n\t"
7251         "ldm	%[a]!, {r4, r5}\n\t"
7252         "ldm	%[b]!, {r6, r8}\n\t"
7253         "adcs	r4, r4, r6\n\t"
7254         "adcs	r5, r5, r8\n\t"
7255         "stm	%[r]!, {r4, r5}\n\t"
7256         "ldm	%[a]!, {r4, r5}\n\t"
7257         "ldm	%[b]!, {r6, r8}\n\t"
7258         "adcs	r4, r4, r6\n\t"
7259         "adcs	r5, r5, r8\n\t"
7260         "stm	%[r]!, {r4, r5}\n\t"
7261         "ldm	%[a]!, {r4, r5}\n\t"
7262         "ldm	%[b]!, {r6, r8}\n\t"
7263         "adcs	r4, r4, r6\n\t"
7264         "adcs	r5, r5, r8\n\t"
7265         "stm	%[r]!, {r4, r5}\n\t"
7266         "mov	%[c], #0\n\t"
7267         "adc	%[c], %[c], %[c]\n\t"
7268         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
7269         :
7270         : "memory", "r4", "r5", "r6", "r8"
7271     );
7272 
7273     return c;
7274 }
7275 
7276 /* AND m into each word of a and store in r.
7277  *
7278  * r  A single precision integer.
7279  * a  A single precision integer.
7280  * m  Mask to AND against each digit.
7281  */
sp_3072_mask_48(sp_digit * r,const sp_digit * a,sp_digit m)7282 static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
7283 {
7284 #ifdef WOLFSSL_SP_SMALL
7285     int i;
7286 
7287     for (i=0; i<48; i++) {
7288         r[i] = a[i] & m;
7289     }
7290 #else
7291     int i;
7292 
7293     for (i = 0; i < 48; i += 8) {
7294         r[i+0] = a[i+0] & m;
7295         r[i+1] = a[i+1] & m;
7296         r[i+2] = a[i+2] & m;
7297         r[i+3] = a[i+3] & m;
7298         r[i+4] = a[i+4] & m;
7299         r[i+5] = a[i+5] & m;
7300         r[i+6] = a[i+6] & m;
7301         r[i+7] = a[i+7] & m;
7302     }
7303 #endif
7304 }
7305 
7306 /* Multiply a and b into r. (r = a * b)
7307  *
7308  * r  A single precision integer.
7309  * a  A single precision integer.
7310  * b  A single precision integer.
7311  */
sp_3072_mul_96(sp_digit * r,const sp_digit * a,const sp_digit * b)7312 SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
7313         const sp_digit* b)
7314 {
7315     sp_digit* z0 = r;
7316     sp_digit z1[96];
7317     sp_digit a1[48];
7318     sp_digit b1[48];
7319     sp_digit z2[96];
7320     sp_digit u;
7321     sp_digit ca;
7322     sp_digit cb;
7323 
7324     ca = sp_3072_add_48(a1, a, &a[48]);
7325     cb = sp_3072_add_48(b1, b, &b[48]);
7326     u  = ca & cb;
7327     sp_3072_mul_48(z1, a1, b1);
7328     sp_3072_mul_48(z2, &a[48], &b[48]);
7329     sp_3072_mul_48(z0, a, b);
7330     sp_3072_mask_48(r + 96, a1, 0 - cb);
7331     sp_3072_mask_48(b1, b1, 0 - ca);
7332     u += sp_3072_add_48(r + 96, r + 96, b1);
7333     u += sp_3072_sub_in_place_96(z1, z2);
7334     u += sp_3072_sub_in_place_96(z1, z0);
7335     u += sp_3072_add_96(r + 48, r + 48, z1);
7336     r[144] = u;
7337     XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
7338     (void)sp_3072_add_96(r + 96, r + 96, z2);
7339 }
7340 
7341 /* Square a and put result in r. (r = a * a)
7342  *
7343  * r  A single precision integer.
7344  * a  A single precision integer.
7345  */
sp_3072_sqr_96(sp_digit * r,const sp_digit * a)7346 SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
7347 {
7348     sp_digit* z0 = r;
7349     sp_digit z2[96];
7350     sp_digit z1[96];
7351     sp_digit a1[48];
7352     sp_digit u;
7353 
7354     u = sp_3072_add_48(a1, a, &a[48]);
7355     sp_3072_sqr_48(z1, a1);
7356     sp_3072_sqr_48(z2, &a[48]);
7357     sp_3072_sqr_48(z0, a);
7358     sp_3072_mask_48(r + 96, a1, 0 - u);
7359     u += sp_3072_add_48(r + 96, r + 96, r + 96);
7360     u += sp_3072_sub_in_place_96(z1, z2);
7361     u += sp_3072_sub_in_place_96(z1, z0);
7362     u += sp_3072_add_96(r + 48, r + 48, z1);
7363     r[144] = u;
7364     XMEMSET(r + 144 + 1, 0, sizeof(sp_digit) * (48 - 1));
7365     (void)sp_3072_add_96(r + 96, r + 96, z2);
7366 }
7367 
7368 #endif /* !WOLFSSL_SP_SMALL */
7369 #ifdef WOLFSSL_SP_SMALL
7370 /* Add b to a into r. (r = a + b)
7371  *
7372  * r  A single precision integer.
7373  * a  A single precision integer.
7374  * b  A single precision integer.
7375  */
sp_3072_add_96(sp_digit * r,const sp_digit * a,const sp_digit * b)7376 SP_NOINLINE static sp_digit sp_3072_add_96(sp_digit* r, const sp_digit* a,
7377         const sp_digit* b)
7378 {
7379     sp_digit c = 0;
7380 
7381     __asm__ __volatile__ (
7382         "mov	r6, %[a]\n\t"
7383         "mov	r8, #0\n\t"
7384         "add	r6, r6, #384\n\t"
7385         "sub	r8, r8, #1\n\t"
7386         "\n1:\n\t"
7387         "adds	%[c], %[c], r8\n\t"
7388         "ldr	r4, [%[a]]\n\t"
7389         "ldr	r5, [%[b]]\n\t"
7390         "adcs	r4, r4, r5\n\t"
7391         "str	r4, [%[r]]\n\t"
7392         "mov	%[c], #0\n\t"
7393         "adc	%[c], %[c], %[c]\n\t"
7394         "add	%[a], %[a], #4\n\t"
7395         "add	%[b], %[b], #4\n\t"
7396         "add	%[r], %[r], #4\n\t"
7397         "cmp	%[a], r6\n\t"
7398 #ifdef __GNUC__
7399         "bne	1b\n\t"
7400 #else
7401         "bne.n	1b\n\t"
7402 #endif /* __GNUC__ */
7403         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
7404         :
7405         : "memory", "r4", "r5", "r6", "r8"
7406     );
7407 
7408     return c;
7409 }
7410 
7411 #endif /* WOLFSSL_SP_SMALL */
7412 #ifdef WOLFSSL_SP_SMALL
7413 /* Sub b from a into a. (a -= b)
7414  *
7415  * a  A single precision integer.
7416  * b  A single precision integer.
7417  */
sp_3072_sub_in_place_96(sp_digit * a,const sp_digit * b)7418 SP_NOINLINE static sp_digit sp_3072_sub_in_place_96(sp_digit* a,
7419         const sp_digit* b)
7420 {
7421     sp_digit c = 0;
7422     __asm__ __volatile__ (
7423         "mov	r8, %[a]\n\t"
7424         "add	r8, r8, #384\n\t"
7425         "\n1:\n\t"
7426         "mov	r5, #0\n\t"
7427         "subs	r5, r5, %[c]\n\t"
7428         "ldr	r3, [%[a]]\n\t"
7429         "ldr	r4, [%[a], #4]\n\t"
7430         "ldr	r5, [%[b]]\n\t"
7431         "ldr	r6, [%[b], #4]\n\t"
7432         "sbcs	r3, r3, r5\n\t"
7433         "sbcs	r4, r4, r6\n\t"
7434         "str	r3, [%[a]]\n\t"
7435         "str	r4, [%[a], #4]\n\t"
7436         "sbc	%[c], %[c], %[c]\n\t"
7437         "add	%[a], %[a], #8\n\t"
7438         "add	%[b], %[b], #8\n\t"
7439         "cmp	%[a], r8\n\t"
7440 #ifdef __GNUC__
7441         "bne	1b\n\t"
7442 #else
7443         "bne.n	1b\n\t"
7444 #endif /* __GNUC__ */
7445         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
7446         :
7447         : "memory", "r3", "r4", "r5", "r6", "r8"
7448     );
7449 
7450     return c;
7451 }
7452 
7453 #endif /* WOLFSSL_SP_SMALL */
7454 #ifdef WOLFSSL_SP_SMALL
7455 /* Multiply a and b into r. (r = a * b)
7456  *
7457  * r  A single precision integer.
7458  * a  A single precision integer.
7459  * b  A single precision integer.
7460  */
sp_3072_mul_96(sp_digit * r,const sp_digit * a,const sp_digit * b)7461 SP_NOINLINE static void sp_3072_mul_96(sp_digit* r, const sp_digit* a,
7462         const sp_digit* b)
7463 {
7464     sp_digit tmp_arr[96 * 2];
7465     sp_digit* tmp = tmp_arr;
7466     __asm__ __volatile__ (
7467         "mov	r3, #0\n\t"
7468         "mov	r4, #0\n\t"
7469         "mov	r9, r3\n\t"
7470         "mov	r12, %[r]\n\t"
7471         "mov	r10, %[a]\n\t"
7472         "mov	r11, %[b]\n\t"
7473         "mov	r6, #1\n\t"
7474         "lsl	r6, r6, #8\n\t"
7475         "add	r6, r6, #128\n\t"
7476         "add	r6, r6, r10\n\t"
7477         "mov	r14, r6\n\t"
7478         "\n1:\n\t"
7479         "mov	%[r], #0\n\t"
7480         "mov	r5, #0\n\t"
7481         "mov	r6, #1\n\t"
7482         "lsl	r6, r6, #8\n\t"
7483         "add	r6, r6, #124\n\t"
7484         "mov	%[a], r9\n\t"
7485         "subs	%[a], %[a], r6\n\t"
7486         "sbc	r6, r6, r6\n\t"
7487         "mvn	r6, r6\n\t"
7488         "and	%[a], %[a], r6\n\t"
7489         "mov	%[b], r9\n\t"
7490         "sub	%[b], %[b], %[a]\n\t"
7491         "add	%[a], %[a], r10\n\t"
7492         "add	%[b], %[b], r11\n\t"
7493         "\n2:\n\t"
7494         /* Multiply Start */
7495         "ldr	r6, [%[a]]\n\t"
7496         "ldr	r8, [%[b]]\n\t"
7497         "umull	r6, r8, r6, r8\n\t"
7498         "adds	r3, r3, r6\n\t"
7499         "adcs 	r4, r4, r8\n\t"
7500         "adc	r5, r5, %[r]\n\t"
7501         /* Multiply Done */
7502         "add	%[a], %[a], #4\n\t"
7503         "sub	%[b], %[b], #4\n\t"
7504         "cmp	%[a], r14\n\t"
7505 #ifdef __GNUC__
7506         "beq	3f\n\t"
7507 #else
7508         "beq.n	3f\n\t"
7509 #endif /* __GNUC__ */
7510         "mov	r6, r9\n\t"
7511         "add	r6, r6, r10\n\t"
7512         "cmp	%[a], r6\n\t"
7513 #ifdef __GNUC__
7514         "ble	2b\n\t"
7515 #else
7516         "ble.n	2b\n\t"
7517 #endif /* __GNUC__ */
7518         "\n3:\n\t"
7519         "mov	%[r], r12\n\t"
7520         "mov	r8, r9\n\t"
7521         "str	r3, [%[r], r8]\n\t"
7522         "mov	r3, r4\n\t"
7523         "mov	r4, r5\n\t"
7524         "add	r8, r8, #4\n\t"
7525         "mov	r9, r8\n\t"
7526         "mov	r6, #2\n\t"
7527         "lsl	r6, r6, #8\n\t"
7528         "add	r6, r6, #248\n\t"
7529         "cmp	r8, r6\n\t"
7530 #ifdef __GNUC__
7531         "ble	1b\n\t"
7532 #else
7533         "ble.n	1b\n\t"
7534 #endif /* __GNUC__ */
7535         "str	r3, [%[r], r8]\n\t"
7536         "mov	%[a], r10\n\t"
7537         "mov	%[b], r11\n\t"
7538         :
7539         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
7540         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
7541     );
7542 
7543     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
7544 }
7545 
7546 /* Square a and put result in r. (r = a * a)
7547  *
7548  * r  A single precision integer.
7549  * a  A single precision integer.
7550  */
sp_3072_sqr_96(sp_digit * r,const sp_digit * a)7551 SP_NOINLINE static void sp_3072_sqr_96(sp_digit* r, const sp_digit* a)
7552 {
7553     __asm__ __volatile__ (
7554         "mov	r3, #0\n\t"
7555         "mov	r4, #0\n\t"
7556         "mov	r5, #0\n\t"
7557         "mov	r9, r3\n\t"
7558         "mov	r12, %[r]\n\t"
7559         "mov	r6, #3\n\t"
7560         "lsl	r6, r6, #8\n\t"
7561         "neg	r6, r6\n\t"
7562         "add	sp, sp, r6\n\t"
7563         "mov	r11, sp\n\t"
7564         "mov	r10, %[a]\n\t"
7565         "\n1:\n\t"
7566         "mov	%[r], #0\n\t"
7567         "mov	r6, #1\n\t"
7568         "lsl	r6, r6, #8\n\t"
7569         "add	r6, r6, #124\n\t"
7570         "mov	%[a], r9\n\t"
7571         "subs	%[a], %[a], r6\n\t"
7572         "sbc	r6, r6, r6\n\t"
7573         "mvn	r6, r6\n\t"
7574         "and	%[a], %[a], r6\n\t"
7575         "mov	r2, r9\n\t"
7576         "sub	r2, r2, %[a]\n\t"
7577         "add	%[a], %[a], r10\n\t"
7578         "add	r2, r2, r10\n\t"
7579         "\n2:\n\t"
7580         "cmp	r2, %[a]\n\t"
7581 #ifdef __GNUC__
7582         "beq	4f\n\t"
7583 #else
7584         "beq.n	4f\n\t"
7585 #endif /* __GNUC__ */
7586         /* Multiply * 2: Start */
7587         "ldr	r6, [%[a]]\n\t"
7588         "ldr	r8, [r2]\n\t"
7589         "umull	r6, r8, r6, r8\n\t"
7590         "adds	r3, r3, r6\n\t"
7591         "adcs 	r4, r4, r8\n\t"
7592         "adc	r5, r5, %[r]\n\t"
7593         "adds	r3, r3, r6\n\t"
7594         "adcs 	r4, r4, r8\n\t"
7595         "adc	r5, r5, %[r]\n\t"
7596         /* Multiply * 2: Done */
7597 #ifdef __GNUC__
7598         "bal	5f\n\t"
7599 #else
7600         "bal.n	5f\n\t"
7601 #endif /* __GNUC__ */
7602         "\n4:\n\t"
7603         /* Square: Start */
7604         "ldr	r6, [%[a]]\n\t"
7605         "umull	r6, r8, r6, r6\n\t"
7606         "adds	r3, r3, r6\n\t"
7607         "adcs	r4, r4, r8\n\t"
7608         "adc	r5, r5, %[r]\n\t"
7609         /* Square: Done */
7610         "\n5:\n\t"
7611         "add	%[a], %[a], #4\n\t"
7612         "sub	r2, r2, #4\n\t"
7613         "mov	r6, #1\n\t"
7614         "lsl	r6, r6, #8\n\t"
7615         "add	r6, r6, #128\n\t"
7616         "add	r6, r6, r10\n\t"
7617         "cmp	%[a], r6\n\t"
7618 #ifdef __GNUC__
7619         "beq	3f\n\t"
7620 #else
7621         "beq.n	3f\n\t"
7622 #endif /* __GNUC__ */
7623         "cmp	%[a], r2\n\t"
7624 #ifdef __GNUC__
7625         "bgt	3f\n\t"
7626 #else
7627         "bgt.n	3f\n\t"
7628 #endif /* __GNUC__ */
7629         "mov	r8, r9\n\t"
7630         "add	r8, r8, r10\n\t"
7631         "cmp	%[a], r8\n\t"
7632 #ifdef __GNUC__
7633         "ble	2b\n\t"
7634 #else
7635         "ble.n	2b\n\t"
7636 #endif /* __GNUC__ */
7637         "\n3:\n\t"
7638         "mov	%[r], r11\n\t"
7639         "mov	r8, r9\n\t"
7640         "str	r3, [%[r], r8]\n\t"
7641         "mov	r3, r4\n\t"
7642         "mov	r4, r5\n\t"
7643         "mov	r5, #0\n\t"
7644         "add	r8, r8, #4\n\t"
7645         "mov	r9, r8\n\t"
7646         "mov	r6, #2\n\t"
7647         "lsl	r6, r6, #8\n\t"
7648         "add	r6, r6, #248\n\t"
7649         "cmp	r8, r6\n\t"
7650 #ifdef __GNUC__
7651         "ble	1b\n\t"
7652 #else
7653         "ble.n	1b\n\t"
7654 #endif /* __GNUC__ */
7655         "mov	%[a], r10\n\t"
7656         "str	r3, [%[r], r8]\n\t"
7657         "mov	%[r], r12\n\t"
7658         "mov	%[a], r11\n\t"
7659         "mov	r3, #2\n\t"
7660         "lsl	r3, r3, #8\n\t"
7661         "add	r3, r3, #252\n\t"
7662         "\n4:\n\t"
7663         "ldr	r6, [%[a], r3]\n\t"
7664         "str	r6, [%[r], r3]\n\t"
7665         "subs	r3, r3, #4\n\t"
7666 #ifdef __GNUC__
7667         "bge	4b\n\t"
7668 #else
7669         "bge.n	4b\n\t"
7670 #endif /* __GNUC__ */
7671         "mov	r6, #3\n\t"
7672         "lsl	r6, r6, #8\n\t"
7673         "add	sp, sp, r6\n\t"
7674         :
7675         : [r] "r" (r), [a] "r" (a)
7676         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
7677     );
7678 }
7679 
7680 #endif /* WOLFSSL_SP_SMALL */
7681 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
7682 #ifdef WOLFSSL_SP_SMALL
7683 /* AND m into each word of a and store in r.
7684  *
7685  * r  A single precision integer.
7686  * a  A single precision integer.
7687  * m  Mask to AND against each digit.
7688  */
sp_3072_mask_48(sp_digit * r,const sp_digit * a,sp_digit m)7689 static void sp_3072_mask_48(sp_digit* r, const sp_digit* a, sp_digit m)
7690 {
7691     int i;
7692 
7693     for (i=0; i<48; i++) {
7694         r[i] = a[i] & m;
7695     }
7696 }
7697 
7698 #endif /* WOLFSSL_SP_SMALL */
7699 #ifdef WOLFSSL_SP_SMALL
7700 /* Add b to a into r. (r = a + b)
7701  *
7702  * r  A single precision integer.
7703  * a  A single precision integer.
7704  * b  A single precision integer.
7705  */
sp_3072_add_48(sp_digit * r,const sp_digit * a,const sp_digit * b)7706 SP_NOINLINE static sp_digit sp_3072_add_48(sp_digit* r, const sp_digit* a,
7707         const sp_digit* b)
7708 {
7709     sp_digit c = 0;
7710 
7711     __asm__ __volatile__ (
7712         "mov	r6, %[a]\n\t"
7713         "mov	r8, #0\n\t"
7714         "add	r6, r6, #192\n\t"
7715         "sub	r8, r8, #1\n\t"
7716         "\n1:\n\t"
7717         "adds	%[c], %[c], r8\n\t"
7718         "ldr	r4, [%[a]]\n\t"
7719         "ldr	r5, [%[b]]\n\t"
7720         "adcs	r4, r4, r5\n\t"
7721         "str	r4, [%[r]]\n\t"
7722         "mov	%[c], #0\n\t"
7723         "adc	%[c], %[c], %[c]\n\t"
7724         "add	%[a], %[a], #4\n\t"
7725         "add	%[b], %[b], #4\n\t"
7726         "add	%[r], %[r], #4\n\t"
7727         "cmp	%[a], r6\n\t"
7728 #ifdef __GNUC__
7729         "bne	1b\n\t"
7730 #else
7731         "bne.n	1b\n\t"
7732 #endif /* __GNUC__ */
7733         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
7734         :
7735         : "memory", "r4", "r5", "r6", "r8"
7736     );
7737 
7738     return c;
7739 }
7740 
7741 #endif /* WOLFSSL_SP_SMALL */
7742 #ifdef WOLFSSL_SP_SMALL
7743 /* Sub b from a into a. (a -= b)
7744  *
7745  * a  A single precision integer.
7746  * b  A single precision integer.
7747  */
sp_3072_sub_in_place_48(sp_digit * a,const sp_digit * b)7748 SP_NOINLINE static sp_digit sp_3072_sub_in_place_48(sp_digit* a,
7749         const sp_digit* b)
7750 {
7751     sp_digit c = 0;
7752     __asm__ __volatile__ (
7753         "mov	r8, %[a]\n\t"
7754         "add	r8, r8, #192\n\t"
7755         "\n1:\n\t"
7756         "mov	r5, #0\n\t"
7757         "subs	r5, r5, %[c]\n\t"
7758         "ldr	r3, [%[a]]\n\t"
7759         "ldr	r4, [%[a], #4]\n\t"
7760         "ldr	r5, [%[b]]\n\t"
7761         "ldr	r6, [%[b], #4]\n\t"
7762         "sbcs	r3, r3, r5\n\t"
7763         "sbcs	r4, r4, r6\n\t"
7764         "str	r3, [%[a]]\n\t"
7765         "str	r4, [%[a], #4]\n\t"
7766         "sbc	%[c], %[c], %[c]\n\t"
7767         "add	%[a], %[a], #8\n\t"
7768         "add	%[b], %[b], #8\n\t"
7769         "cmp	%[a], r8\n\t"
7770 #ifdef __GNUC__
7771         "bne	1b\n\t"
7772 #else
7773         "bne.n	1b\n\t"
7774 #endif /* __GNUC__ */
7775         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
7776         :
7777         : "memory", "r3", "r4", "r5", "r6", "r8"
7778     );
7779 
7780     return c;
7781 }
7782 
7783 #endif /* WOLFSSL_SP_SMALL */
7784 #ifdef WOLFSSL_SP_SMALL
7785 /* Multiply a and b into r. (r = a * b)
7786  *
7787  * r  A single precision integer.
7788  * a  A single precision integer.
7789  * b  A single precision integer.
7790  */
sp_3072_mul_48(sp_digit * r,const sp_digit * a,const sp_digit * b)7791 SP_NOINLINE static void sp_3072_mul_48(sp_digit* r, const sp_digit* a,
7792         const sp_digit* b)
7793 {
7794     sp_digit tmp_arr[48 * 2];
7795     sp_digit* tmp = tmp_arr;
7796     __asm__ __volatile__ (
7797         "mov	r3, #0\n\t"
7798         "mov	r4, #0\n\t"
7799         "mov	r9, r3\n\t"
7800         "mov	r12, %[r]\n\t"
7801         "mov	r10, %[a]\n\t"
7802         "mov	r11, %[b]\n\t"
7803         "mov	r6, #192\n\t"
7804         "add	r6, r6, r10\n\t"
7805         "mov	r14, r6\n\t"
7806         "\n1:\n\t"
7807         "mov	%[r], #0\n\t"
7808         "mov	r5, #0\n\t"
7809         "mov	r6, #188\n\t"
7810         "mov	%[a], r9\n\t"
7811         "subs	%[a], %[a], r6\n\t"
7812         "sbc	r6, r6, r6\n\t"
7813         "mvn	r6, r6\n\t"
7814         "and	%[a], %[a], r6\n\t"
7815         "mov	%[b], r9\n\t"
7816         "sub	%[b], %[b], %[a]\n\t"
7817         "add	%[a], %[a], r10\n\t"
7818         "add	%[b], %[b], r11\n\t"
7819         "\n2:\n\t"
7820         /* Multiply Start */
7821         "ldr	r6, [%[a]]\n\t"
7822         "ldr	r8, [%[b]]\n\t"
7823         "umull	r6, r8, r6, r8\n\t"
7824         "adds	r3, r3, r6\n\t"
7825         "adcs 	r4, r4, r8\n\t"
7826         "adc	r5, r5, %[r]\n\t"
7827         /* Multiply Done */
7828         "add	%[a], %[a], #4\n\t"
7829         "sub	%[b], %[b], #4\n\t"
7830         "cmp	%[a], r14\n\t"
7831 #ifdef __GNUC__
7832         "beq	3f\n\t"
7833 #else
7834         "beq.n	3f\n\t"
7835 #endif /* __GNUC__ */
7836         "mov	r6, r9\n\t"
7837         "add	r6, r6, r10\n\t"
7838         "cmp	%[a], r6\n\t"
7839 #ifdef __GNUC__
7840         "ble	2b\n\t"
7841 #else
7842         "ble.n	2b\n\t"
7843 #endif /* __GNUC__ */
7844         "\n3:\n\t"
7845         "mov	%[r], r12\n\t"
7846         "mov	r8, r9\n\t"
7847         "str	r3, [%[r], r8]\n\t"
7848         "mov	r3, r4\n\t"
7849         "mov	r4, r5\n\t"
7850         "add	r8, r8, #4\n\t"
7851         "mov	r9, r8\n\t"
7852         "mov	r6, #1\n\t"
7853         "lsl	r6, r6, #8\n\t"
7854         "add	r6, r6, #120\n\t"
7855         "cmp	r8, r6\n\t"
7856 #ifdef __GNUC__
7857         "ble	1b\n\t"
7858 #else
7859         "ble.n	1b\n\t"
7860 #endif /* __GNUC__ */
7861         "str	r3, [%[r], r8]\n\t"
7862         "mov	%[a], r10\n\t"
7863         "mov	%[b], r11\n\t"
7864         :
7865         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
7866         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
7867     );
7868 
7869     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
7870 }
7871 
7872 /* Square a and put result in r. (r = a * a)
7873  *
7874  * r  A single precision integer.
7875  * a  A single precision integer.
7876  */
sp_3072_sqr_48(sp_digit * r,const sp_digit * a)7877 SP_NOINLINE static void sp_3072_sqr_48(sp_digit* r, const sp_digit* a)
7878 {
7879     __asm__ __volatile__ (
7880         "mov	r3, #0\n\t"
7881         "mov	r4, #0\n\t"
7882         "mov	r5, #0\n\t"
7883         "mov	r9, r3\n\t"
7884         "mov	r12, %[r]\n\t"
7885         "mov	r6, #1\n\t"
7886         "lsl	r6, r6, #8\n\t"
7887         "add	r6, r6, #128\n\t"
7888         "neg	r6, r6\n\t"
7889         "add	sp, sp, r6\n\t"
7890         "mov	r11, sp\n\t"
7891         "mov	r10, %[a]\n\t"
7892         "\n1:\n\t"
7893         "mov	%[r], #0\n\t"
7894         "mov	r6, #188\n\t"
7895         "mov	%[a], r9\n\t"
7896         "subs	%[a], %[a], r6\n\t"
7897         "sbc	r6, r6, r6\n\t"
7898         "mvn	r6, r6\n\t"
7899         "and	%[a], %[a], r6\n\t"
7900         "mov	r2, r9\n\t"
7901         "sub	r2, r2, %[a]\n\t"
7902         "add	%[a], %[a], r10\n\t"
7903         "add	r2, r2, r10\n\t"
7904         "\n2:\n\t"
7905         "cmp	r2, %[a]\n\t"
7906 #ifdef __GNUC__
7907         "beq	4f\n\t"
7908 #else
7909         "beq.n	4f\n\t"
7910 #endif /* __GNUC__ */
7911         /* Multiply * 2: Start */
7912         "ldr	r6, [%[a]]\n\t"
7913         "ldr	r8, [r2]\n\t"
7914         "umull	r6, r8, r6, r8\n\t"
7915         "adds	r3, r3, r6\n\t"
7916         "adcs 	r4, r4, r8\n\t"
7917         "adc	r5, r5, %[r]\n\t"
7918         "adds	r3, r3, r6\n\t"
7919         "adcs 	r4, r4, r8\n\t"
7920         "adc	r5, r5, %[r]\n\t"
7921         /* Multiply * 2: Done */
7922 #ifdef __GNUC__
7923         "bal	5f\n\t"
7924 #else
7925         "bal.n	5f\n\t"
7926 #endif /* __GNUC__ */
7927         "\n4:\n\t"
7928         /* Square: Start */
7929         "ldr	r6, [%[a]]\n\t"
7930         "umull	r6, r8, r6, r6\n\t"
7931         "adds	r3, r3, r6\n\t"
7932         "adcs	r4, r4, r8\n\t"
7933         "adc	r5, r5, %[r]\n\t"
7934         /* Square: Done */
7935         "\n5:\n\t"
7936         "add	%[a], %[a], #4\n\t"
7937         "sub	r2, r2, #4\n\t"
7938         "mov	r6, #192\n\t"
7939         "add	r6, r6, r10\n\t"
7940         "cmp	%[a], r6\n\t"
7941 #ifdef __GNUC__
7942         "beq	3f\n\t"
7943 #else
7944         "beq.n	3f\n\t"
7945 #endif /* __GNUC__ */
7946         "cmp	%[a], r2\n\t"
7947 #ifdef __GNUC__
7948         "bgt	3f\n\t"
7949 #else
7950         "bgt.n	3f\n\t"
7951 #endif /* __GNUC__ */
7952         "mov	r8, r9\n\t"
7953         "add	r8, r8, r10\n\t"
7954         "cmp	%[a], r8\n\t"
7955 #ifdef __GNUC__
7956         "ble	2b\n\t"
7957 #else
7958         "ble.n	2b\n\t"
7959 #endif /* __GNUC__ */
7960         "\n3:\n\t"
7961         "mov	%[r], r11\n\t"
7962         "mov	r8, r9\n\t"
7963         "str	r3, [%[r], r8]\n\t"
7964         "mov	r3, r4\n\t"
7965         "mov	r4, r5\n\t"
7966         "mov	r5, #0\n\t"
7967         "add	r8, r8, #4\n\t"
7968         "mov	r9, r8\n\t"
7969         "mov	r6, #1\n\t"
7970         "lsl	r6, r6, #8\n\t"
7971         "add	r6, r6, #120\n\t"
7972         "cmp	r8, r6\n\t"
7973 #ifdef __GNUC__
7974         "ble	1b\n\t"
7975 #else
7976         "ble.n	1b\n\t"
7977 #endif /* __GNUC__ */
7978         "mov	%[a], r10\n\t"
7979         "str	r3, [%[r], r8]\n\t"
7980         "mov	%[r], r12\n\t"
7981         "mov	%[a], r11\n\t"
7982         "mov	r3, #1\n\t"
7983         "lsl	r3, r3, #8\n\t"
7984         "add	r3, r3, #124\n\t"
7985         "\n4:\n\t"
7986         "ldr	r6, [%[a], r3]\n\t"
7987         "str	r6, [%[r], r3]\n\t"
7988         "subs	r3, r3, #4\n\t"
7989 #ifdef __GNUC__
7990         "bge	4b\n\t"
7991 #else
7992         "bge.n	4b\n\t"
7993 #endif /* __GNUC__ */
7994         "mov	r6, #1\n\t"
7995         "lsl	r6, r6, #8\n\t"
7996         "add	r6, r6, #128\n\t"
7997         "add	sp, sp, r6\n\t"
7998         :
7999         : [r] "r" (r), [a] "r" (a)
8000         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
8001     );
8002 }
8003 
8004 #endif /* WOLFSSL_SP_SMALL */
8005 #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */
8006 
8007 /* Caclulate the bottom digit of -1/a mod 2^n.
8008  *
8009  * a    A single precision number.
8010  * rho  Bottom word of inverse.
8011  */
sp_3072_mont_setup(const sp_digit * a,sp_digit * rho)8012 static void sp_3072_mont_setup(const sp_digit* a, sp_digit* rho)
8013 {
8014     sp_digit x;
8015     sp_digit b;
8016 
8017     b = a[0];
8018     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
8019     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
8020     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
8021     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
8022 
8023     /* rho = -1/m mod b */
8024     *rho = (sp_digit)0 - x;
8025 }
8026 
8027 /* Mul a by digit b into r. (r = a * b)
8028  *
8029  * r  A single precision integer.
8030  * a  A single precision integer.
8031  * b  A single precision digit.
8032  */
sp_3072_mul_d_96(sp_digit * r,const sp_digit * a,sp_digit b)8033 SP_NOINLINE static void sp_3072_mul_d_96(sp_digit* r, const sp_digit* a,
8034         sp_digit b)
8035 {
8036     __asm__ __volatile__ (
8037         "add	r9, %[a], #384\n\t"
8038         /* A[0] * B */
8039         "ldr	r6, [%[a]], #4\n\t"
8040         "umull	r5, r3, r6, %[b]\n\t"
8041         "mov	r4, #0\n\t"
8042         "str	r5, [%[r]], #4\n\t"
8043         /* A[0] * B - Done */
8044         "\n1:\n\t"
8045         "mov	r5, #0\n\t"
8046         /* A[] * B */
8047         "ldr	r6, [%[a]], #4\n\t"
8048         "umull	r6, r8, r6, %[b]\n\t"
8049         "adds	r3, r3, r6\n\t"
8050         "adcs 	r4, r4, r8\n\t"
8051         "adc	r5, r5, #0\n\t"
8052         /* A[] * B - Done */
8053         "str	r3, [%[r]], #4\n\t"
8054         "mov	r3, r4\n\t"
8055         "mov	r4, r5\n\t"
8056         "cmp	%[a], r9\n\t"
8057 #ifdef __GNUC__
8058         "blt	1b\n\t"
8059 #else
8060         "blt.n	1b\n\t"
8061 #endif /* __GNUC__ */
8062         "str	r3, [%[r]]\n\t"
8063         : [r] "+r" (r), [a] "+r" (a)
8064         : [b] "r" (b)
8065         : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
8066     );
8067 }
8068 
8069 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
8070 /* r = 2^n mod m where n is the number of bits to reduce by.
8071  * Given m must be 3072 bits, just need to subtract.
8072  *
8073  * r  A single precision number.
8074  * m  A single precision number.
8075  */
sp_3072_mont_norm_48(sp_digit * r,const sp_digit * m)8076 static void sp_3072_mont_norm_48(sp_digit* r, const sp_digit* m)
8077 {
8078     XMEMSET(r, 0, sizeof(sp_digit) * 48);
8079 
8080     /* r = 2^n mod m */
8081     sp_3072_sub_in_place_48(r, m);
8082 }
8083 
8084 /* Conditionally subtract b from a using the mask m.
8085  * m is -1 to subtract and 0 when not copying.
8086  *
8087  * r  A single precision number representing condition subtract result.
8088  * a  A single precision number to subtract from.
8089  * b  A single precision number to subtract.
8090  * m  Mask value to apply.
8091  */
sp_3072_cond_sub_48(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)8092 SP_NOINLINE static sp_digit sp_3072_cond_sub_48(sp_digit* r, const sp_digit* a,
8093         const sp_digit* b, sp_digit m)
8094 {
8095     sp_digit c = 0;
8096 
8097     __asm__ __volatile__ (
8098         "mov	r5, #192\n\t"
8099         "mov	r9, r5\n\t"
8100         "mov	r8, #0\n\t"
8101         "\n1:\n\t"
8102         "ldr	r6, [%[b], r8]\n\t"
8103         "and	r6, r6, %[m]\n\t"
8104         "mov	r5, #0\n\t"
8105         "subs	r5, r5, %[c]\n\t"
8106         "ldr	r5, [%[a], r8]\n\t"
8107         "sbcs	r5, r5, r6\n\t"
8108         "sbcs	%[c], %[c], %[c]\n\t"
8109         "str	r5, [%[r], r8]\n\t"
8110         "add	r8, r8, #4\n\t"
8111         "cmp	r8, r9\n\t"
8112 #ifdef __GNUC__
8113         "blt	1b\n\t"
8114 #else
8115         "blt.n	1b\n\t"
8116 #endif /* __GNUC__ */
8117         : [c] "+r" (c)
8118         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
8119         : "memory", "r5", "r6", "r8", "r9"
8120     );
8121 
8122     return c;
8123 }
8124 
8125 /* Reduce the number back to 3072 bits using Montgomery reduction.
8126  *
8127  * a   A single precision number to reduce in place.
8128  * m   The single precision number representing the modulus.
8129  * mp  The digit representing the negative inverse of m mod 2^n.
8130  */
sp_3072_mont_reduce_48(sp_digit * a,const sp_digit * m,sp_digit mp)8131 SP_NOINLINE static void sp_3072_mont_reduce_48(sp_digit* a, const sp_digit* m,
8132         sp_digit mp)
8133 {
8134     sp_digit ca = 0;
8135 
8136     __asm__ __volatile__ (
8137         "mov	r9, %[mp]\n\t"
8138         "mov	r12, %[m]\n\t"
8139         "mov	r10, %[a]\n\t"
8140         "mov	r4, #0\n\t"
8141         "add	r11, r10, #192\n\t"
8142         "\n1:\n\t"
8143         /* mu = a[i] * mp */
8144         "mov	%[mp], r9\n\t"
8145         "ldr	%[a], [r10]\n\t"
8146         "mul	%[mp], %[mp], %[a]\n\t"
8147         "mov	%[m], r12\n\t"
8148         "add	r14, r10, #184\n\t"
8149         "\n2:\n\t"
8150         /* a[i+j] += m[j] * mu */
8151         "ldr	%[a], [r10]\n\t"
8152         "mov	r5, #0\n\t"
8153         /* Multiply m[j] and mu - Start */
8154         "ldr	r8, [%[m]], #4\n\t"
8155         "umull	r6, r8, %[mp], r8\n\t"
8156         "adds	%[a], %[a], r6\n\t"
8157         "adc	r5, r5, r8\n\t"
8158         /* Multiply m[j] and mu - Done */
8159         "adds	r4, r4, %[a]\n\t"
8160         "adc	r5, r5, #0\n\t"
8161         "str	r4, [r10], #4\n\t"
8162         /* a[i+j+1] += m[j+1] * mu */
8163         "ldr	%[a], [r10]\n\t"
8164         "mov	r4, #0\n\t"
8165         /* Multiply m[j] and mu - Start */
8166         "ldr	r8, [%[m]], #4\n\t"
8167         "umull	r6, r8, %[mp], r8\n\t"
8168         "adds	%[a], %[a], r6\n\t"
8169         "adc	r4, r4, r8\n\t"
8170         /* Multiply m[j] and mu - Done */
8171         "adds	r5, r5, %[a]\n\t"
8172         "adc	r4, r4, #0\n\t"
8173         "str	r5, [r10], #4\n\t"
8174         "cmp	r10, r14\n\t"
8175 #ifdef __GNUC__
8176         "blt	2b\n\t"
8177 #else
8178         "blt.n	2b\n\t"
8179 #endif /* __GNUC__ */
8180         /* a[i+46] += m[46] * mu */
8181         "ldr	%[a], [r10]\n\t"
8182         "mov	r5, #0\n\t"
8183         /* Multiply m[j] and mu - Start */
8184         "ldr	r8, [%[m]], #4\n\t"
8185         "umull	r6, r8, %[mp], r8\n\t"
8186         "adds	%[a], %[a], r6\n\t"
8187         "adc	r5, r5, r8\n\t"
8188         /* Multiply m[j] and mu - Done */
8189         "adds	r4, r4, %[a]\n\t"
8190         "adc	r5, r5, #0\n\t"
8191         "str	r4, [r10], #4\n\t"
8192         /* a[i+47] += m[47] * mu */
8193         "mov	r4, %[ca]\n\t"
8194         "mov	%[ca], #0\n\t"
8195         /* Multiply m[47] and mu - Start */
8196         "ldr	r8, [%[m]]\n\t"
8197         "umull	r6, r8, %[mp], r8\n\t"
8198         "adds	r5, r5, r6\n\t"
8199         "adcs 	r4, r4, r8\n\t"
8200         "adc	%[ca], %[ca], #0\n\t"
8201         /* Multiply m[47] and mu - Done */
8202         "ldr	r6, [r10]\n\t"
8203         "ldr	r8, [r10, #4]\n\t"
8204         "adds	r6, r6, r5\n\t"
8205         "adcs	r8, r8, r4\n\t"
8206         "adc	%[ca], %[ca], #0\n\t"
8207         "str	r6, [r10]\n\t"
8208         "str	r8, [r10, #4]\n\t"
8209         /* Next word in a */
8210         "sub	r10, r10, #184\n\t"
8211         "cmp	r10, r11\n\t"
8212 #ifdef __GNUC__
8213         "blt	1b\n\t"
8214 #else
8215         "blt.n	1b\n\t"
8216 #endif /* __GNUC__ */
8217         "mov	%[a], r10\n\t"
8218         "mov	%[m], r12\n\t"
8219         : [ca] "+r" (ca), [a] "+r" (a)
8220         : [m] "r" (m), [mp] "r" (mp)
8221         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
8222     );
8223 
8224     sp_3072_cond_sub_48(a - 48, a, m, (sp_digit)0 - ca);
8225 }
8226 
8227 /* Multiply two Montgomery form numbers mod the modulus (prime).
8228  * (r = a * b mod m)
8229  *
8230  * r   Result of multiplication.
8231  * a   First number to multiply in Montgomery form.
8232  * b   Second number to multiply in Montgomery form.
8233  * m   Modulus (prime).
8234  * mp  Montgomery mulitplier.
8235  */
sp_3072_mont_mul_48(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)8236 static void sp_3072_mont_mul_48(sp_digit* r, const sp_digit* a,
8237         const sp_digit* b, const sp_digit* m, sp_digit mp)
8238 {
8239     sp_3072_mul_48(r, a, b);
8240     sp_3072_mont_reduce_48(r, m, mp);
8241 }
8242 
8243 /* Square the Montgomery form number. (r = a * a mod m)
8244  *
8245  * r   Result of squaring.
8246  * a   Number to square in Montgomery form.
8247  * m   Modulus (prime).
8248  * mp  Montgomery mulitplier.
8249  */
sp_3072_mont_sqr_48(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)8250 static void sp_3072_mont_sqr_48(sp_digit* r, const sp_digit* a,
8251         const sp_digit* m, sp_digit mp)
8252 {
8253     sp_3072_sqr_48(r, a);
8254     sp_3072_mont_reduce_48(r, m, mp);
8255 }
8256 
8257 /* Mul a by digit b into r. (r = a * b)
8258  *
8259  * r  A single precision integer.
8260  * a  A single precision integer.
8261  * b  A single precision digit.
8262  */
sp_3072_mul_d_48(sp_digit * r,const sp_digit * a,sp_digit b)8263 SP_NOINLINE static void sp_3072_mul_d_48(sp_digit* r, const sp_digit* a,
8264         sp_digit b)
8265 {
8266     __asm__ __volatile__ (
8267         "add	r9, %[a], #192\n\t"
8268         /* A[0] * B */
8269         "ldr	r6, [%[a]], #4\n\t"
8270         "umull	r5, r3, r6, %[b]\n\t"
8271         "mov	r4, #0\n\t"
8272         "str	r5, [%[r]], #4\n\t"
8273         /* A[0] * B - Done */
8274         "\n1:\n\t"
8275         "mov	r5, #0\n\t"
8276         /* A[] * B */
8277         "ldr	r6, [%[a]], #4\n\t"
8278         "umull	r6, r8, r6, %[b]\n\t"
8279         "adds	r3, r3, r6\n\t"
8280         "adcs 	r4, r4, r8\n\t"
8281         "adc	r5, r5, #0\n\t"
8282         /* A[] * B - Done */
8283         "str	r3, [%[r]], #4\n\t"
8284         "mov	r3, r4\n\t"
8285         "mov	r4, r5\n\t"
8286         "cmp	%[a], r9\n\t"
8287 #ifdef __GNUC__
8288         "blt	1b\n\t"
8289 #else
8290         "blt.n	1b\n\t"
8291 #endif /* __GNUC__ */
8292         "str	r3, [%[r]]\n\t"
8293         : [r] "+r" (r), [a] "+r" (a)
8294         : [b] "r" (b)
8295         : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
8296     );
8297 }
8298 
8299 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
8300  *
8301  * d1   The high order half of the number to divide.
8302  * d0   The low order half of the number to divide.
8303  * div  The dividend.
8304  * returns the result of the division.
8305  *
8306  * Note that this is an approximate div. It may give an answer 1 larger.
8307  */
div_3072_word_48(sp_digit d1,sp_digit d0,sp_digit div)8308 SP_NOINLINE static sp_digit div_3072_word_48(sp_digit d1, sp_digit d0,
8309         sp_digit div)
8310 {
8311     sp_digit r = 0;
8312 
8313     __asm__ __volatile__ (
8314         "lsr	r6, %[div], #16\n\t"
8315         "add	r6, r6, #1\n\t"
8316         "udiv	r4, %[d1], r6\n\t"
8317         "lsl	r8, r4, #16\n\t"
8318         "umull	r4, r5, %[div], r8\n\t"
8319         "subs	%[d0], %[d0], r4\n\t"
8320         "sbc	%[d1], %[d1], r5\n\t"
8321         "udiv	r5, %[d1], r6\n\t"
8322         "lsl	r4, r5, #16\n\t"
8323         "add	r8, r8, r4\n\t"
8324         "umull	r4, r5, %[div], r4\n\t"
8325         "subs	%[d0], %[d0], r4\n\t"
8326         "sbc	%[d1], %[d1], r5\n\t"
8327         "lsl	r4, %[d1], #16\n\t"
8328         "orr	r4, r4, %[d0], lsr #16\n\t"
8329         "udiv	r4, r4, r6\n\t"
8330         "add	r8, r8, r4\n\t"
8331         "umull	r4, r5, %[div], r4\n\t"
8332         "subs	%[d0], %[d0], r4\n\t"
8333         "sbc	%[d1], %[d1], r5\n\t"
8334         "lsl	r4, %[d1], #16\n\t"
8335         "orr	r4, r4, %[d0], lsr #16\n\t"
8336         "udiv	r4, r4, r6\n\t"
8337         "add	r8, r8, r4\n\t"
8338         "umull	r4, r5, %[div], r4\n\t"
8339         "subs	%[d0], %[d0], r4\n\t"
8340         "sbc	%[d1], %[d1], r5\n\t"
8341         "udiv	r4, %[d0], %[div]\n\t"
8342         "add	r8, r8, r4\n\t"
8343         "mov	%[r], r8\n\t"
8344         : [r] "+r" (r)
8345         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
8346         : "r4", "r5", "r6", "r8"
8347     );
8348     return r;
8349 }
8350 
8351 /* Compare a with b in constant time.
8352  *
8353  * a  A single precision integer.
8354  * b  A single precision integer.
8355  * return -ve, 0 or +ve if a is less than, equal to or greater than b
8356  * respectively.
8357  */
sp_3072_cmp_48(const sp_digit * a,const sp_digit * b)8358 SP_NOINLINE static sp_int32 sp_3072_cmp_48(const sp_digit* a, const sp_digit* b)
8359 {
8360     sp_digit r = 0;
8361 
8362 
8363     __asm__ __volatile__ (
8364         "mov	r3, #0\n\t"
8365         "mvn	r3, r3\n\t"
8366         "mov	r6, #188\n\t"
8367         "\n1:\n\t"
8368         "ldr	r8, [%[a], r6]\n\t"
8369         "ldr	r5, [%[b], r6]\n\t"
8370         "and	r8, r8, r3\n\t"
8371         "and	r5, r5, r3\n\t"
8372         "mov	r4, r8\n\t"
8373         "subs	r8, r8, r5\n\t"
8374         "sbc	r8, r8, r8\n\t"
8375         "add	%[r], %[r], r8\n\t"
8376         "mvn	r8, r8\n\t"
8377         "and	r3, r3, r8\n\t"
8378         "subs	r5, r5, r4\n\t"
8379         "sbc	r8, r8, r8\n\t"
8380         "sub	%[r], %[r], r8\n\t"
8381         "mvn	r8, r8\n\t"
8382         "and	r3, r3, r8\n\t"
8383         "sub	r6, r6, #4\n\t"
8384         "cmp	r6, #0\n\t"
8385 #ifdef __GNUC__
8386         "bge	1b\n\t"
8387 #else
8388         "bge.n	1b\n\t"
8389 #endif /* __GNUC__ */
8390         : [r] "+r" (r)
8391         : [a] "r" (a), [b] "r" (b)
8392         : "r3", "r4", "r5", "r6", "r8"
8393     );
8394 
8395     return r;
8396 }
8397 
8398 /* Divide d in a and put remainder into r (m*d + r = a)
8399  * m is not calculated as it is not needed at this time.
8400  *
8401  * a  Number to be divided.
8402  * d  Number to divide with.
8403  * m  Multiplier result.
8404  * r  Remainder from the division.
8405  * returns MP_OKAY indicating success.
8406  */
sp_3072_div_48(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)8407 static WC_INLINE int sp_3072_div_48(const sp_digit* a, const sp_digit* d, sp_digit* m,
8408         sp_digit* r)
8409 {
8410     sp_digit t1[96], t2[49];
8411     sp_digit div, r1;
8412     int i;
8413 
8414     (void)m;
8415 
8416     div = d[47];
8417     XMEMCPY(t1, a, sizeof(*t1) * 2 * 48);
8418     for (i=47; i>=0; i--) {
8419         sp_digit hi = t1[48 + i] - (t1[48 + i] == div);
8420         r1 = div_3072_word_48(hi, t1[48 + i - 1], div);
8421 
8422         sp_3072_mul_d_48(t2, d, r1);
8423         t1[48 + i] += sp_3072_sub_in_place_48(&t1[i], t2);
8424         t1[48 + i] -= t2[48];
8425         sp_3072_mask_48(t2, d, t1[48 + i]);
8426         t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
8427         sp_3072_mask_48(t2, d, t1[48 + i]);
8428         t1[48 + i] += sp_3072_add_48(&t1[i], &t1[i], t2);
8429     }
8430 
8431     r1 = sp_3072_cmp_48(t1, d) >= 0;
8432     sp_3072_cond_sub_48(r, t1, d, (sp_digit)0 - r1);
8433 
8434     return MP_OKAY;
8435 }
8436 
8437 /* Reduce a modulo m into r. (r = a mod m)
8438  *
8439  * r  A single precision number that is the reduced result.
8440  * a  A single precision number that is to be reduced.
8441  * m  A single precision number that is the modulus to reduce with.
8442  * returns MP_OKAY indicating success.
8443  */
sp_3072_mod_48(sp_digit * r,const sp_digit * a,const sp_digit * m)8444 static WC_INLINE int sp_3072_mod_48(sp_digit* r, const sp_digit* a, const sp_digit* m)
8445 {
8446     return sp_3072_div_48(a, m, NULL, r);
8447 }
8448 
8449 #ifdef WOLFSSL_SP_SMALL
8450 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
8451  *
8452  * r     A single precision number that is the result of the operation.
8453  * a     A single precision number being exponentiated.
8454  * e     A single precision number that is the exponent.
8455  * bits  The number of bits in the exponent.
8456  * m     A single precision number that is the modulus.
8457  * returns  0 on success.
8458  * returns  MEMORY_E on dynamic memory allocation failure.
8459  * returns  MP_VAL when base is even or exponent is 0.
8460  */
sp_3072_mod_exp_48(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)8461 static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
8462         int bits, const sp_digit* m, int reduceA)
8463 {
8464 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8465     sp_digit* td = NULL;
8466 #else
8467     sp_digit td[16 * 96];
8468 #endif
8469     sp_digit* t[16];
8470     sp_digit* norm = NULL;
8471     sp_digit mp = 1;
8472     sp_digit n;
8473     sp_digit mask;
8474     int i;
8475     int c;
8476     byte y;
8477     int err = MP_OKAY;
8478 
8479     if ((m[0] & 1) == 0) {
8480         err = MP_VAL;
8481     }
8482     else if (bits == 0) {
8483         err = MP_VAL;
8484     }
8485 
8486 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8487     if (err == MP_OKAY) {
8488         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 96), NULL,
8489                                 DYNAMIC_TYPE_TMP_BUFFER);
8490         if (td == NULL)
8491             err = MEMORY_E;
8492     }
8493 #endif
8494 
8495     if (err == MP_OKAY) {
8496         norm = td;
8497         for (i=0; i<16; i++) {
8498             t[i] = td + i * 96;
8499         }
8500 
8501         sp_3072_mont_setup(m, &mp);
8502         sp_3072_mont_norm_48(norm, m);
8503 
8504         XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
8505         if (reduceA != 0) {
8506             err = sp_3072_mod_48(t[1] + 48, a, m);
8507             if (err == MP_OKAY) {
8508                 err = sp_3072_mod_48(t[1], t[1], m);
8509             }
8510         }
8511         else {
8512             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
8513             err = sp_3072_mod_48(t[1], t[1], m);
8514         }
8515     }
8516 
8517     if (err == MP_OKAY) {
8518         sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
8519         sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
8520         sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
8521         sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
8522         sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
8523         sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
8524         sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
8525         sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
8526         sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
8527         sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
8528         sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
8529         sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
8530         sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
8531         sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
8532 
8533         i = (bits - 1) / 32;
8534         n = e[i--];
8535         c = bits & 31;
8536         if (c == 0) {
8537             c = 32;
8538         }
8539         c -= bits % 4;
8540         if (c == 32) {
8541             c = 28;
8542         }
8543         if (c < 0) {
8544             /* Number of bits in top word is less than number needed. */
8545             c = -c;
8546             y = (byte)(n << c);
8547             n = e[i--];
8548             y |= (byte)(n >> (64 - c));
8549             n <<= c;
8550             c = 64 - c;
8551         }
8552         else {
8553             y = (byte)(n >> c);
8554             n <<= 32 - c;
8555         }
8556         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
8557         for (; i>=0 || c>=4; ) {
8558             if (c == 0) {
8559                 n = e[i--];
8560                 y = (byte)(n >> 28);
8561                 n <<= 4;
8562                 c = 28;
8563             }
8564             else if (c < 4) {
8565                 y = (byte)(n >> 28);
8566                 n = e[i--];
8567                 c = 4 - c;
8568                 y |= (byte)(n >> (32 - c));
8569                 n <<= c;
8570                 c = 32 - c;
8571             }
8572             else {
8573                 y = (byte)((n >> 28) & 0xf);
8574                 n <<= 4;
8575                 c -= 4;
8576             }
8577 
8578             sp_3072_mont_sqr_48(r, r, m, mp);
8579             sp_3072_mont_sqr_48(r, r, m, mp);
8580             sp_3072_mont_sqr_48(r, r, m, mp);
8581             sp_3072_mont_sqr_48(r, r, m, mp);
8582 
8583             sp_3072_mont_mul_48(r, r, t[y], m, mp);
8584         }
8585 
8586         XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
8587         sp_3072_mont_reduce_48(r, m, mp);
8588 
8589         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
8590         sp_3072_cond_sub_48(r, r, m, mask);
8591     }
8592 
8593 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8594     if (td != NULL)
8595         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
8596 #endif
8597 
8598     return err;
8599 }
8600 #else
8601 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
8602  *
8603  * r     A single precision number that is the result of the operation.
8604  * a     A single precision number being exponentiated.
8605  * e     A single precision number that is the exponent.
8606  * bits  The number of bits in the exponent.
8607  * m     A single precision number that is the modulus.
8608  * returns  0 on success.
8609  * returns  MEMORY_E on dynamic memory allocation failure.
8610  * returns  MP_VAL when base is even or exponent is 0.
8611  */
sp_3072_mod_exp_48(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)8612 static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e,
8613         int bits, const sp_digit* m, int reduceA)
8614 {
8615 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8616     sp_digit* td = NULL;
8617 #else
8618     sp_digit td[32 * 96];
8619 #endif
8620     sp_digit* t[32];
8621     sp_digit* norm = NULL;
8622     sp_digit mp = 1;
8623     sp_digit n;
8624     sp_digit mask;
8625     int i;
8626     int c;
8627     byte y;
8628     int err = MP_OKAY;
8629 
8630     if ((m[0] & 1) == 0) {
8631         err = MP_VAL;
8632     }
8633     else if (bits == 0) {
8634         err = MP_VAL;
8635     }
8636 
8637 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8638     if (err == MP_OKAY) {
8639         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (32 * 96), NULL,
8640                                 DYNAMIC_TYPE_TMP_BUFFER);
8641         if (td == NULL)
8642             err = MEMORY_E;
8643     }
8644 #endif
8645 
8646     if (err == MP_OKAY) {
8647         norm = td;
8648         for (i=0; i<32; i++) {
8649             t[i] = td + i * 96;
8650         }
8651 
8652         sp_3072_mont_setup(m, &mp);
8653         sp_3072_mont_norm_48(norm, m);
8654 
8655         XMEMSET(t[1], 0, sizeof(sp_digit) * 48U);
8656         if (reduceA != 0) {
8657             err = sp_3072_mod_48(t[1] + 48, a, m);
8658             if (err == MP_OKAY) {
8659                 err = sp_3072_mod_48(t[1], t[1], m);
8660             }
8661         }
8662         else {
8663             XMEMCPY(t[1] + 48, a, sizeof(sp_digit) * 48);
8664             err = sp_3072_mod_48(t[1], t[1], m);
8665         }
8666     }
8667 
8668     if (err == MP_OKAY) {
8669         sp_3072_mont_sqr_48(t[ 2], t[ 1], m, mp);
8670         sp_3072_mont_mul_48(t[ 3], t[ 2], t[ 1], m, mp);
8671         sp_3072_mont_sqr_48(t[ 4], t[ 2], m, mp);
8672         sp_3072_mont_mul_48(t[ 5], t[ 3], t[ 2], m, mp);
8673         sp_3072_mont_sqr_48(t[ 6], t[ 3], m, mp);
8674         sp_3072_mont_mul_48(t[ 7], t[ 4], t[ 3], m, mp);
8675         sp_3072_mont_sqr_48(t[ 8], t[ 4], m, mp);
8676         sp_3072_mont_mul_48(t[ 9], t[ 5], t[ 4], m, mp);
8677         sp_3072_mont_sqr_48(t[10], t[ 5], m, mp);
8678         sp_3072_mont_mul_48(t[11], t[ 6], t[ 5], m, mp);
8679         sp_3072_mont_sqr_48(t[12], t[ 6], m, mp);
8680         sp_3072_mont_mul_48(t[13], t[ 7], t[ 6], m, mp);
8681         sp_3072_mont_sqr_48(t[14], t[ 7], m, mp);
8682         sp_3072_mont_mul_48(t[15], t[ 8], t[ 7], m, mp);
8683         sp_3072_mont_sqr_48(t[16], t[ 8], m, mp);
8684         sp_3072_mont_mul_48(t[17], t[ 9], t[ 8], m, mp);
8685         sp_3072_mont_sqr_48(t[18], t[ 9], m, mp);
8686         sp_3072_mont_mul_48(t[19], t[10], t[ 9], m, mp);
8687         sp_3072_mont_sqr_48(t[20], t[10], m, mp);
8688         sp_3072_mont_mul_48(t[21], t[11], t[10], m, mp);
8689         sp_3072_mont_sqr_48(t[22], t[11], m, mp);
8690         sp_3072_mont_mul_48(t[23], t[12], t[11], m, mp);
8691         sp_3072_mont_sqr_48(t[24], t[12], m, mp);
8692         sp_3072_mont_mul_48(t[25], t[13], t[12], m, mp);
8693         sp_3072_mont_sqr_48(t[26], t[13], m, mp);
8694         sp_3072_mont_mul_48(t[27], t[14], t[13], m, mp);
8695         sp_3072_mont_sqr_48(t[28], t[14], m, mp);
8696         sp_3072_mont_mul_48(t[29], t[15], t[14], m, mp);
8697         sp_3072_mont_sqr_48(t[30], t[15], m, mp);
8698         sp_3072_mont_mul_48(t[31], t[16], t[15], m, mp);
8699 
8700         i = (bits - 1) / 32;
8701         n = e[i--];
8702         c = bits & 31;
8703         if (c == 0) {
8704             c = 32;
8705         }
8706         c -= bits % 5;
8707         if (c == 32) {
8708             c = 27;
8709         }
8710         if (c < 0) {
8711             /* Number of bits in top word is less than number needed. */
8712             c = -c;
8713             y = (byte)(n << c);
8714             n = e[i--];
8715             y |= (byte)(n >> (64 - c));
8716             n <<= c;
8717             c = 64 - c;
8718         }
8719         else {
8720             y = (byte)(n >> c);
8721             n <<= 32 - c;
8722         }
8723         XMEMCPY(r, t[y], sizeof(sp_digit) * 48);
8724         for (; i>=0 || c>=5; ) {
8725             if (c == 0) {
8726                 n = e[i--];
8727                 y = (byte)(n >> 27);
8728                 n <<= 5;
8729                 c = 27;
8730             }
8731             else if (c < 5) {
8732                 y = (byte)(n >> 27);
8733                 n = e[i--];
8734                 c = 5 - c;
8735                 y |= (byte)(n >> (32 - c));
8736                 n <<= c;
8737                 c = 32 - c;
8738             }
8739             else {
8740                 y = (byte)((n >> 27) & 0x1f);
8741                 n <<= 5;
8742                 c -= 5;
8743             }
8744 
8745             sp_3072_mont_sqr_48(r, r, m, mp);
8746             sp_3072_mont_sqr_48(r, r, m, mp);
8747             sp_3072_mont_sqr_48(r, r, m, mp);
8748             sp_3072_mont_sqr_48(r, r, m, mp);
8749             sp_3072_mont_sqr_48(r, r, m, mp);
8750 
8751             sp_3072_mont_mul_48(r, r, t[y], m, mp);
8752         }
8753 
8754         XMEMSET(&r[48], 0, sizeof(sp_digit) * 48U);
8755         sp_3072_mont_reduce_48(r, m, mp);
8756 
8757         mask = 0 - (sp_3072_cmp_48(r, m) >= 0);
8758         sp_3072_cond_sub_48(r, r, m, mask);
8759     }
8760 
8761 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
8762     if (td != NULL)
8763         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
8764 #endif
8765 
8766     return err;
8767 }
8768 #endif /* WOLFSSL_SP_SMALL */
8769 
8770 #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */
8771 
8772 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
8773 /* r = 2^n mod m where n is the number of bits to reduce by.
8774  * Given m must be 3072 bits, just need to subtract.
8775  *
8776  * r  A single precision number.
8777  * m  A single precision number.
8778  */
sp_3072_mont_norm_96(sp_digit * r,const sp_digit * m)8779 static void sp_3072_mont_norm_96(sp_digit* r, const sp_digit* m)
8780 {
8781     XMEMSET(r, 0, sizeof(sp_digit) * 96);
8782 
8783     /* r = 2^n mod m */
8784     sp_3072_sub_in_place_96(r, m);
8785 }
8786 
8787 #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */
8788 /* Conditionally subtract b from a using the mask m.
8789  * m is -1 to subtract and 0 when not copying.
8790  *
8791  * r  A single precision number representing condition subtract result.
8792  * a  A single precision number to subtract from.
8793  * b  A single precision number to subtract.
8794  * m  Mask value to apply.
8795  */
sp_3072_cond_sub_96(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)8796 SP_NOINLINE static sp_digit sp_3072_cond_sub_96(sp_digit* r, const sp_digit* a,
8797         const sp_digit* b, sp_digit m)
8798 {
8799     sp_digit c = 0;
8800 
8801     __asm__ __volatile__ (
8802         "mov	r5, #1\n\t"
8803         "lsl	r5, r5, #8\n\t"
8804         "add	r5, r5, #128\n\t"
8805         "mov	r9, r5\n\t"
8806         "mov	r8, #0\n\t"
8807         "\n1:\n\t"
8808         "ldr	r6, [%[b], r8]\n\t"
8809         "and	r6, r6, %[m]\n\t"
8810         "mov	r5, #0\n\t"
8811         "subs	r5, r5, %[c]\n\t"
8812         "ldr	r5, [%[a], r8]\n\t"
8813         "sbcs	r5, r5, r6\n\t"
8814         "sbcs	%[c], %[c], %[c]\n\t"
8815         "str	r5, [%[r], r8]\n\t"
8816         "add	r8, r8, #4\n\t"
8817         "cmp	r8, r9\n\t"
8818 #ifdef __GNUC__
8819         "blt	1b\n\t"
8820 #else
8821         "blt.n	1b\n\t"
8822 #endif /* __GNUC__ */
8823         : [c] "+r" (c)
8824         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
8825         : "memory", "r5", "r6", "r8", "r9"
8826     );
8827 
8828     return c;
8829 }
8830 
8831 /* Reduce the number back to 3072 bits using Montgomery reduction.
8832  *
8833  * a   A single precision number to reduce in place.
8834  * m   The single precision number representing the modulus.
8835  * mp  The digit representing the negative inverse of m mod 2^n.
8836  */
sp_3072_mont_reduce_96(sp_digit * a,const sp_digit * m,sp_digit mp)8837 SP_NOINLINE static void sp_3072_mont_reduce_96(sp_digit* a, const sp_digit* m,
8838         sp_digit mp)
8839 {
8840     sp_digit ca = 0;
8841 
8842     __asm__ __volatile__ (
8843         "mov	r9, %[mp]\n\t"
8844         "mov	r12, %[m]\n\t"
8845         "mov	r10, %[a]\n\t"
8846         "mov	r4, #0\n\t"
8847         "add	r11, r10, #384\n\t"
8848         "\n1:\n\t"
8849         /* mu = a[i] * mp */
8850         "mov	%[mp], r9\n\t"
8851         "ldr	%[a], [r10]\n\t"
8852         "mul	%[mp], %[mp], %[a]\n\t"
8853         "mov	%[m], r12\n\t"
8854         "add	r14, r10, #376\n\t"
8855         "\n2:\n\t"
8856         /* a[i+j] += m[j] * mu */
8857         "ldr	%[a], [r10]\n\t"
8858         "mov	r5, #0\n\t"
8859         /* Multiply m[j] and mu - Start */
8860         "ldr	r8, [%[m]], #4\n\t"
8861         "umull	r6, r8, %[mp], r8\n\t"
8862         "adds	%[a], %[a], r6\n\t"
8863         "adc	r5, r5, r8\n\t"
8864         /* Multiply m[j] and mu - Done */
8865         "adds	r4, r4, %[a]\n\t"
8866         "adc	r5, r5, #0\n\t"
8867         "str	r4, [r10], #4\n\t"
8868         /* a[i+j+1] += m[j+1] * mu */
8869         "ldr	%[a], [r10]\n\t"
8870         "mov	r4, #0\n\t"
8871         /* Multiply m[j] and mu - Start */
8872         "ldr	r8, [%[m]], #4\n\t"
8873         "umull	r6, r8, %[mp], r8\n\t"
8874         "adds	%[a], %[a], r6\n\t"
8875         "adc	r4, r4, r8\n\t"
8876         /* Multiply m[j] and mu - Done */
8877         "adds	r5, r5, %[a]\n\t"
8878         "adc	r4, r4, #0\n\t"
8879         "str	r5, [r10], #4\n\t"
8880         "cmp	r10, r14\n\t"
8881 #ifdef __GNUC__
8882         "blt	2b\n\t"
8883 #else
8884         "blt.n	2b\n\t"
8885 #endif /* __GNUC__ */
8886         /* a[i+94] += m[94] * mu */
8887         "ldr	%[a], [r10]\n\t"
8888         "mov	r5, #0\n\t"
8889         /* Multiply m[j] and mu - Start */
8890         "ldr	r8, [%[m]], #4\n\t"
8891         "umull	r6, r8, %[mp], r8\n\t"
8892         "adds	%[a], %[a], r6\n\t"
8893         "adc	r5, r5, r8\n\t"
8894         /* Multiply m[j] and mu - Done */
8895         "adds	r4, r4, %[a]\n\t"
8896         "adc	r5, r5, #0\n\t"
8897         "str	r4, [r10], #4\n\t"
8898         /* a[i+95] += m[95] * mu */
8899         "mov	r4, %[ca]\n\t"
8900         "mov	%[ca], #0\n\t"
8901         /* Multiply m[95] and mu - Start */
8902         "ldr	r8, [%[m]]\n\t"
8903         "umull	r6, r8, %[mp], r8\n\t"
8904         "adds	r5, r5, r6\n\t"
8905         "adcs 	r4, r4, r8\n\t"
8906         "adc	%[ca], %[ca], #0\n\t"
8907         /* Multiply m[95] and mu - Done */
8908         "ldr	r6, [r10]\n\t"
8909         "ldr	r8, [r10, #4]\n\t"
8910         "adds	r6, r6, r5\n\t"
8911         "adcs	r8, r8, r4\n\t"
8912         "adc	%[ca], %[ca], #0\n\t"
8913         "str	r6, [r10]\n\t"
8914         "str	r8, [r10, #4]\n\t"
8915         /* Next word in a */
8916         "sub	r10, r10, #376\n\t"
8917         "cmp	r10, r11\n\t"
8918 #ifdef __GNUC__
8919         "blt	1b\n\t"
8920 #else
8921         "blt.n	1b\n\t"
8922 #endif /* __GNUC__ */
8923         "mov	%[a], r10\n\t"
8924         "mov	%[m], r12\n\t"
8925         : [ca] "+r" (ca), [a] "+r" (a)
8926         : [m] "r" (m), [mp] "r" (mp)
8927         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
8928     );
8929 
8930     sp_3072_cond_sub_96(a - 96, a, m, (sp_digit)0 - ca);
8931 }
8932 
8933 /* Multiply two Montgomery form numbers mod the modulus (prime).
8934  * (r = a * b mod m)
8935  *
8936  * r   Result of multiplication.
8937  * a   First number to multiply in Montgomery form.
8938  * b   Second number to multiply in Montgomery form.
8939  * m   Modulus (prime).
8940  * mp  Montgomery mulitplier.
8941  */
sp_3072_mont_mul_96(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)8942 static void sp_3072_mont_mul_96(sp_digit* r, const sp_digit* a,
8943         const sp_digit* b, const sp_digit* m, sp_digit mp)
8944 {
8945     sp_3072_mul_96(r, a, b);
8946     sp_3072_mont_reduce_96(r, m, mp);
8947 }
8948 
8949 /* Square the Montgomery form number. (r = a * a mod m)
8950  *
8951  * r   Result of squaring.
8952  * a   Number to square in Montgomery form.
8953  * m   Modulus (prime).
8954  * mp  Montgomery mulitplier.
8955  */
sp_3072_mont_sqr_96(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)8956 static void sp_3072_mont_sqr_96(sp_digit* r, const sp_digit* a,
8957         const sp_digit* m, sp_digit mp)
8958 {
8959     sp_3072_sqr_96(r, a);
8960     sp_3072_mont_reduce_96(r, m, mp);
8961 }
8962 
8963 #ifdef WOLFSSL_SP_SMALL
8964 /* Sub b from a into r. (r = a - b)
8965  *
8966  * r  A single precision integer.
8967  * a  A single precision integer.
8968  * b  A single precision integer.
8969  */
sp_3072_sub_96(sp_digit * r,const sp_digit * a,const sp_digit * b)8970 SP_NOINLINE static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a,
8971         const sp_digit* b)
8972 {
8973     sp_digit c = 0;
8974 
8975     __asm__ __volatile__ (
8976         "mov	r6, %[a]\n\t"
8977         "mov	r5, #1\n\t"
8978         "lsl	r5, r5, #8\n\t"
8979         "add	r5, r5, #128\n\t"
8980         "add	r6, r6, r5\n\t"
8981         "\n1:\n\t"
8982         "mov	r5, #0\n\t"
8983         "subs	r5, r5, %[c]\n\t"
8984         "ldr	r4, [%[a]]\n\t"
8985         "ldr	r5, [%[b]]\n\t"
8986         "sbcs	r4, r4, r5\n\t"
8987         "str	r4, [%[r]]\n\t"
8988         "sbc	%[c], %[c], %[c]\n\t"
8989         "add	%[a], %[a], #4\n\t"
8990         "add	%[b], %[b], #4\n\t"
8991         "add	%[r], %[r], #4\n\t"
8992         "cmp	%[a], r6\n\t"
8993 #ifdef __GNUC__
8994         "bne	1b\n\t"
8995 #else
8996         "bne.n	1b\n\t"
8997 #endif /* __GNUC__ */
8998         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
8999         :
9000         : "memory", "r4", "r5", "r6"
9001     );
9002 
9003     return c;
9004 }
9005 
9006 #else
9007 /* Sub b from a into r. (r = a - b)
9008  *
9009  * r  A single precision integer.
9010  * a  A single precision integer.
9011  * b  A single precision integer.
9012  */
sp_3072_sub_96(sp_digit * r,const sp_digit * a,const sp_digit * b)9013 SP_NOINLINE static sp_digit sp_3072_sub_96(sp_digit* r, const sp_digit* a,
9014         const sp_digit* b)
9015 {
9016     sp_digit c = 0;
9017 
9018     __asm__ __volatile__ (
9019         "ldr	r4, [%[a], #0]\n\t"
9020         "ldr	r5, [%[a], #4]\n\t"
9021         "ldr	r6, [%[b], #0]\n\t"
9022         "ldr	r8, [%[b], #4]\n\t"
9023         "subs	r4, r4, r6\n\t"
9024         "sbcs	r5, r5, r8\n\t"
9025         "str	r4, [%[r], #0]\n\t"
9026         "str	r5, [%[r], #4]\n\t"
9027         "ldr	r4, [%[a], #8]\n\t"
9028         "ldr	r5, [%[a], #12]\n\t"
9029         "ldr	r6, [%[b], #8]\n\t"
9030         "ldr	r8, [%[b], #12]\n\t"
9031         "sbcs	r4, r4, r6\n\t"
9032         "sbcs	r5, r5, r8\n\t"
9033         "str	r4, [%[r], #8]\n\t"
9034         "str	r5, [%[r], #12]\n\t"
9035         "ldr	r4, [%[a], #16]\n\t"
9036         "ldr	r5, [%[a], #20]\n\t"
9037         "ldr	r6, [%[b], #16]\n\t"
9038         "ldr	r8, [%[b], #20]\n\t"
9039         "sbcs	r4, r4, r6\n\t"
9040         "sbcs	r5, r5, r8\n\t"
9041         "str	r4, [%[r], #16]\n\t"
9042         "str	r5, [%[r], #20]\n\t"
9043         "ldr	r4, [%[a], #24]\n\t"
9044         "ldr	r5, [%[a], #28]\n\t"
9045         "ldr	r6, [%[b], #24]\n\t"
9046         "ldr	r8, [%[b], #28]\n\t"
9047         "sbcs	r4, r4, r6\n\t"
9048         "sbcs	r5, r5, r8\n\t"
9049         "str	r4, [%[r], #24]\n\t"
9050         "str	r5, [%[r], #28]\n\t"
9051         "ldr	r4, [%[a], #32]\n\t"
9052         "ldr	r5, [%[a], #36]\n\t"
9053         "ldr	r6, [%[b], #32]\n\t"
9054         "ldr	r8, [%[b], #36]\n\t"
9055         "sbcs	r4, r4, r6\n\t"
9056         "sbcs	r5, r5, r8\n\t"
9057         "str	r4, [%[r], #32]\n\t"
9058         "str	r5, [%[r], #36]\n\t"
9059         "ldr	r4, [%[a], #40]\n\t"
9060         "ldr	r5, [%[a], #44]\n\t"
9061         "ldr	r6, [%[b], #40]\n\t"
9062         "ldr	r8, [%[b], #44]\n\t"
9063         "sbcs	r4, r4, r6\n\t"
9064         "sbcs	r5, r5, r8\n\t"
9065         "str	r4, [%[r], #40]\n\t"
9066         "str	r5, [%[r], #44]\n\t"
9067         "ldr	r4, [%[a], #48]\n\t"
9068         "ldr	r5, [%[a], #52]\n\t"
9069         "ldr	r6, [%[b], #48]\n\t"
9070         "ldr	r8, [%[b], #52]\n\t"
9071         "sbcs	r4, r4, r6\n\t"
9072         "sbcs	r5, r5, r8\n\t"
9073         "str	r4, [%[r], #48]\n\t"
9074         "str	r5, [%[r], #52]\n\t"
9075         "ldr	r4, [%[a], #56]\n\t"
9076         "ldr	r5, [%[a], #60]\n\t"
9077         "ldr	r6, [%[b], #56]\n\t"
9078         "ldr	r8, [%[b], #60]\n\t"
9079         "sbcs	r4, r4, r6\n\t"
9080         "sbcs	r5, r5, r8\n\t"
9081         "str	r4, [%[r], #56]\n\t"
9082         "str	r5, [%[r], #60]\n\t"
9083         "ldr	r4, [%[a], #64]\n\t"
9084         "ldr	r5, [%[a], #68]\n\t"
9085         "ldr	r6, [%[b], #64]\n\t"
9086         "ldr	r8, [%[b], #68]\n\t"
9087         "sbcs	r4, r4, r6\n\t"
9088         "sbcs	r5, r5, r8\n\t"
9089         "str	r4, [%[r], #64]\n\t"
9090         "str	r5, [%[r], #68]\n\t"
9091         "ldr	r4, [%[a], #72]\n\t"
9092         "ldr	r5, [%[a], #76]\n\t"
9093         "ldr	r6, [%[b], #72]\n\t"
9094         "ldr	r8, [%[b], #76]\n\t"
9095         "sbcs	r4, r4, r6\n\t"
9096         "sbcs	r5, r5, r8\n\t"
9097         "str	r4, [%[r], #72]\n\t"
9098         "str	r5, [%[r], #76]\n\t"
9099         "ldr	r4, [%[a], #80]\n\t"
9100         "ldr	r5, [%[a], #84]\n\t"
9101         "ldr	r6, [%[b], #80]\n\t"
9102         "ldr	r8, [%[b], #84]\n\t"
9103         "sbcs	r4, r4, r6\n\t"
9104         "sbcs	r5, r5, r8\n\t"
9105         "str	r4, [%[r], #80]\n\t"
9106         "str	r5, [%[r], #84]\n\t"
9107         "ldr	r4, [%[a], #88]\n\t"
9108         "ldr	r5, [%[a], #92]\n\t"
9109         "ldr	r6, [%[b], #88]\n\t"
9110         "ldr	r8, [%[b], #92]\n\t"
9111         "sbcs	r4, r4, r6\n\t"
9112         "sbcs	r5, r5, r8\n\t"
9113         "str	r4, [%[r], #88]\n\t"
9114         "str	r5, [%[r], #92]\n\t"
9115         "ldr	r4, [%[a], #96]\n\t"
9116         "ldr	r5, [%[a], #100]\n\t"
9117         "ldr	r6, [%[b], #96]\n\t"
9118         "ldr	r8, [%[b], #100]\n\t"
9119         "sbcs	r4, r4, r6\n\t"
9120         "sbcs	r5, r5, r8\n\t"
9121         "str	r4, [%[r], #96]\n\t"
9122         "str	r5, [%[r], #100]\n\t"
9123         "ldr	r4, [%[a], #104]\n\t"
9124         "ldr	r5, [%[a], #108]\n\t"
9125         "ldr	r6, [%[b], #104]\n\t"
9126         "ldr	r8, [%[b], #108]\n\t"
9127         "sbcs	r4, r4, r6\n\t"
9128         "sbcs	r5, r5, r8\n\t"
9129         "str	r4, [%[r], #104]\n\t"
9130         "str	r5, [%[r], #108]\n\t"
9131         "ldr	r4, [%[a], #112]\n\t"
9132         "ldr	r5, [%[a], #116]\n\t"
9133         "ldr	r6, [%[b], #112]\n\t"
9134         "ldr	r8, [%[b], #116]\n\t"
9135         "sbcs	r4, r4, r6\n\t"
9136         "sbcs	r5, r5, r8\n\t"
9137         "str	r4, [%[r], #112]\n\t"
9138         "str	r5, [%[r], #116]\n\t"
9139         "ldr	r4, [%[a], #120]\n\t"
9140         "ldr	r5, [%[a], #124]\n\t"
9141         "ldr	r6, [%[b], #120]\n\t"
9142         "ldr	r8, [%[b], #124]\n\t"
9143         "sbcs	r4, r4, r6\n\t"
9144         "sbcs	r5, r5, r8\n\t"
9145         "str	r4, [%[r], #120]\n\t"
9146         "str	r5, [%[r], #124]\n\t"
9147         "sbc	%[c], %[c], %[c]\n\t"
9148         "add	%[a], %[a], #0x80\n\t"
9149         "add	%[b], %[b], #0x80\n\t"
9150         "add	%[r], %[r], #0x80\n\t"
9151         "mov	r6, #0\n\t"
9152         "sub	r6, r6, %[c]\n\t"
9153         "ldr	r4, [%[a], #0]\n\t"
9154         "ldr	r5, [%[a], #4]\n\t"
9155         "ldr	r6, [%[b], #0]\n\t"
9156         "ldr	r8, [%[b], #4]\n\t"
9157         "sbcs	r4, r4, r6\n\t"
9158         "sbcs	r5, r5, r8\n\t"
9159         "str	r4, [%[r], #0]\n\t"
9160         "str	r5, [%[r], #4]\n\t"
9161         "ldr	r4, [%[a], #8]\n\t"
9162         "ldr	r5, [%[a], #12]\n\t"
9163         "ldr	r6, [%[b], #8]\n\t"
9164         "ldr	r8, [%[b], #12]\n\t"
9165         "sbcs	r4, r4, r6\n\t"
9166         "sbcs	r5, r5, r8\n\t"
9167         "str	r4, [%[r], #8]\n\t"
9168         "str	r5, [%[r], #12]\n\t"
9169         "ldr	r4, [%[a], #16]\n\t"
9170         "ldr	r5, [%[a], #20]\n\t"
9171         "ldr	r6, [%[b], #16]\n\t"
9172         "ldr	r8, [%[b], #20]\n\t"
9173         "sbcs	r4, r4, r6\n\t"
9174         "sbcs	r5, r5, r8\n\t"
9175         "str	r4, [%[r], #16]\n\t"
9176         "str	r5, [%[r], #20]\n\t"
9177         "ldr	r4, [%[a], #24]\n\t"
9178         "ldr	r5, [%[a], #28]\n\t"
9179         "ldr	r6, [%[b], #24]\n\t"
9180         "ldr	r8, [%[b], #28]\n\t"
9181         "sbcs	r4, r4, r6\n\t"
9182         "sbcs	r5, r5, r8\n\t"
9183         "str	r4, [%[r], #24]\n\t"
9184         "str	r5, [%[r], #28]\n\t"
9185         "ldr	r4, [%[a], #32]\n\t"
9186         "ldr	r5, [%[a], #36]\n\t"
9187         "ldr	r6, [%[b], #32]\n\t"
9188         "ldr	r8, [%[b], #36]\n\t"
9189         "sbcs	r4, r4, r6\n\t"
9190         "sbcs	r5, r5, r8\n\t"
9191         "str	r4, [%[r], #32]\n\t"
9192         "str	r5, [%[r], #36]\n\t"
9193         "ldr	r4, [%[a], #40]\n\t"
9194         "ldr	r5, [%[a], #44]\n\t"
9195         "ldr	r6, [%[b], #40]\n\t"
9196         "ldr	r8, [%[b], #44]\n\t"
9197         "sbcs	r4, r4, r6\n\t"
9198         "sbcs	r5, r5, r8\n\t"
9199         "str	r4, [%[r], #40]\n\t"
9200         "str	r5, [%[r], #44]\n\t"
9201         "ldr	r4, [%[a], #48]\n\t"
9202         "ldr	r5, [%[a], #52]\n\t"
9203         "ldr	r6, [%[b], #48]\n\t"
9204         "ldr	r8, [%[b], #52]\n\t"
9205         "sbcs	r4, r4, r6\n\t"
9206         "sbcs	r5, r5, r8\n\t"
9207         "str	r4, [%[r], #48]\n\t"
9208         "str	r5, [%[r], #52]\n\t"
9209         "ldr	r4, [%[a], #56]\n\t"
9210         "ldr	r5, [%[a], #60]\n\t"
9211         "ldr	r6, [%[b], #56]\n\t"
9212         "ldr	r8, [%[b], #60]\n\t"
9213         "sbcs	r4, r4, r6\n\t"
9214         "sbcs	r5, r5, r8\n\t"
9215         "str	r4, [%[r], #56]\n\t"
9216         "str	r5, [%[r], #60]\n\t"
9217         "ldr	r4, [%[a], #64]\n\t"
9218         "ldr	r5, [%[a], #68]\n\t"
9219         "ldr	r6, [%[b], #64]\n\t"
9220         "ldr	r8, [%[b], #68]\n\t"
9221         "sbcs	r4, r4, r6\n\t"
9222         "sbcs	r5, r5, r8\n\t"
9223         "str	r4, [%[r], #64]\n\t"
9224         "str	r5, [%[r], #68]\n\t"
9225         "ldr	r4, [%[a], #72]\n\t"
9226         "ldr	r5, [%[a], #76]\n\t"
9227         "ldr	r6, [%[b], #72]\n\t"
9228         "ldr	r8, [%[b], #76]\n\t"
9229         "sbcs	r4, r4, r6\n\t"
9230         "sbcs	r5, r5, r8\n\t"
9231         "str	r4, [%[r], #72]\n\t"
9232         "str	r5, [%[r], #76]\n\t"
9233         "ldr	r4, [%[a], #80]\n\t"
9234         "ldr	r5, [%[a], #84]\n\t"
9235         "ldr	r6, [%[b], #80]\n\t"
9236         "ldr	r8, [%[b], #84]\n\t"
9237         "sbcs	r4, r4, r6\n\t"
9238         "sbcs	r5, r5, r8\n\t"
9239         "str	r4, [%[r], #80]\n\t"
9240         "str	r5, [%[r], #84]\n\t"
9241         "ldr	r4, [%[a], #88]\n\t"
9242         "ldr	r5, [%[a], #92]\n\t"
9243         "ldr	r6, [%[b], #88]\n\t"
9244         "ldr	r8, [%[b], #92]\n\t"
9245         "sbcs	r4, r4, r6\n\t"
9246         "sbcs	r5, r5, r8\n\t"
9247         "str	r4, [%[r], #88]\n\t"
9248         "str	r5, [%[r], #92]\n\t"
9249         "ldr	r4, [%[a], #96]\n\t"
9250         "ldr	r5, [%[a], #100]\n\t"
9251         "ldr	r6, [%[b], #96]\n\t"
9252         "ldr	r8, [%[b], #100]\n\t"
9253         "sbcs	r4, r4, r6\n\t"
9254         "sbcs	r5, r5, r8\n\t"
9255         "str	r4, [%[r], #96]\n\t"
9256         "str	r5, [%[r], #100]\n\t"
9257         "ldr	r4, [%[a], #104]\n\t"
9258         "ldr	r5, [%[a], #108]\n\t"
9259         "ldr	r6, [%[b], #104]\n\t"
9260         "ldr	r8, [%[b], #108]\n\t"
9261         "sbcs	r4, r4, r6\n\t"
9262         "sbcs	r5, r5, r8\n\t"
9263         "str	r4, [%[r], #104]\n\t"
9264         "str	r5, [%[r], #108]\n\t"
9265         "ldr	r4, [%[a], #112]\n\t"
9266         "ldr	r5, [%[a], #116]\n\t"
9267         "ldr	r6, [%[b], #112]\n\t"
9268         "ldr	r8, [%[b], #116]\n\t"
9269         "sbcs	r4, r4, r6\n\t"
9270         "sbcs	r5, r5, r8\n\t"
9271         "str	r4, [%[r], #112]\n\t"
9272         "str	r5, [%[r], #116]\n\t"
9273         "ldr	r4, [%[a], #120]\n\t"
9274         "ldr	r5, [%[a], #124]\n\t"
9275         "ldr	r6, [%[b], #120]\n\t"
9276         "ldr	r8, [%[b], #124]\n\t"
9277         "sbcs	r4, r4, r6\n\t"
9278         "sbcs	r5, r5, r8\n\t"
9279         "str	r4, [%[r], #120]\n\t"
9280         "str	r5, [%[r], #124]\n\t"
9281         "sbc	%[c], %[c], %[c]\n\t"
9282         "add	%[a], %[a], #0x80\n\t"
9283         "add	%[b], %[b], #0x80\n\t"
9284         "add	%[r], %[r], #0x80\n\t"
9285         "mov	r6, #0\n\t"
9286         "sub	r6, r6, %[c]\n\t"
9287         "ldr	r4, [%[a], #0]\n\t"
9288         "ldr	r5, [%[a], #4]\n\t"
9289         "ldr	r6, [%[b], #0]\n\t"
9290         "ldr	r8, [%[b], #4]\n\t"
9291         "sbcs	r4, r4, r6\n\t"
9292         "sbcs	r5, r5, r8\n\t"
9293         "str	r4, [%[r], #0]\n\t"
9294         "str	r5, [%[r], #4]\n\t"
9295         "ldr	r4, [%[a], #8]\n\t"
9296         "ldr	r5, [%[a], #12]\n\t"
9297         "ldr	r6, [%[b], #8]\n\t"
9298         "ldr	r8, [%[b], #12]\n\t"
9299         "sbcs	r4, r4, r6\n\t"
9300         "sbcs	r5, r5, r8\n\t"
9301         "str	r4, [%[r], #8]\n\t"
9302         "str	r5, [%[r], #12]\n\t"
9303         "ldr	r4, [%[a], #16]\n\t"
9304         "ldr	r5, [%[a], #20]\n\t"
9305         "ldr	r6, [%[b], #16]\n\t"
9306         "ldr	r8, [%[b], #20]\n\t"
9307         "sbcs	r4, r4, r6\n\t"
9308         "sbcs	r5, r5, r8\n\t"
9309         "str	r4, [%[r], #16]\n\t"
9310         "str	r5, [%[r], #20]\n\t"
9311         "ldr	r4, [%[a], #24]\n\t"
9312         "ldr	r5, [%[a], #28]\n\t"
9313         "ldr	r6, [%[b], #24]\n\t"
9314         "ldr	r8, [%[b], #28]\n\t"
9315         "sbcs	r4, r4, r6\n\t"
9316         "sbcs	r5, r5, r8\n\t"
9317         "str	r4, [%[r], #24]\n\t"
9318         "str	r5, [%[r], #28]\n\t"
9319         "ldr	r4, [%[a], #32]\n\t"
9320         "ldr	r5, [%[a], #36]\n\t"
9321         "ldr	r6, [%[b], #32]\n\t"
9322         "ldr	r8, [%[b], #36]\n\t"
9323         "sbcs	r4, r4, r6\n\t"
9324         "sbcs	r5, r5, r8\n\t"
9325         "str	r4, [%[r], #32]\n\t"
9326         "str	r5, [%[r], #36]\n\t"
9327         "ldr	r4, [%[a], #40]\n\t"
9328         "ldr	r5, [%[a], #44]\n\t"
9329         "ldr	r6, [%[b], #40]\n\t"
9330         "ldr	r8, [%[b], #44]\n\t"
9331         "sbcs	r4, r4, r6\n\t"
9332         "sbcs	r5, r5, r8\n\t"
9333         "str	r4, [%[r], #40]\n\t"
9334         "str	r5, [%[r], #44]\n\t"
9335         "ldr	r4, [%[a], #48]\n\t"
9336         "ldr	r5, [%[a], #52]\n\t"
9337         "ldr	r6, [%[b], #48]\n\t"
9338         "ldr	r8, [%[b], #52]\n\t"
9339         "sbcs	r4, r4, r6\n\t"
9340         "sbcs	r5, r5, r8\n\t"
9341         "str	r4, [%[r], #48]\n\t"
9342         "str	r5, [%[r], #52]\n\t"
9343         "ldr	r4, [%[a], #56]\n\t"
9344         "ldr	r5, [%[a], #60]\n\t"
9345         "ldr	r6, [%[b], #56]\n\t"
9346         "ldr	r8, [%[b], #60]\n\t"
9347         "sbcs	r4, r4, r6\n\t"
9348         "sbcs	r5, r5, r8\n\t"
9349         "str	r4, [%[r], #56]\n\t"
9350         "str	r5, [%[r], #60]\n\t"
9351         "ldr	r4, [%[a], #64]\n\t"
9352         "ldr	r5, [%[a], #68]\n\t"
9353         "ldr	r6, [%[b], #64]\n\t"
9354         "ldr	r8, [%[b], #68]\n\t"
9355         "sbcs	r4, r4, r6\n\t"
9356         "sbcs	r5, r5, r8\n\t"
9357         "str	r4, [%[r], #64]\n\t"
9358         "str	r5, [%[r], #68]\n\t"
9359         "ldr	r4, [%[a], #72]\n\t"
9360         "ldr	r5, [%[a], #76]\n\t"
9361         "ldr	r6, [%[b], #72]\n\t"
9362         "ldr	r8, [%[b], #76]\n\t"
9363         "sbcs	r4, r4, r6\n\t"
9364         "sbcs	r5, r5, r8\n\t"
9365         "str	r4, [%[r], #72]\n\t"
9366         "str	r5, [%[r], #76]\n\t"
9367         "ldr	r4, [%[a], #80]\n\t"
9368         "ldr	r5, [%[a], #84]\n\t"
9369         "ldr	r6, [%[b], #80]\n\t"
9370         "ldr	r8, [%[b], #84]\n\t"
9371         "sbcs	r4, r4, r6\n\t"
9372         "sbcs	r5, r5, r8\n\t"
9373         "str	r4, [%[r], #80]\n\t"
9374         "str	r5, [%[r], #84]\n\t"
9375         "ldr	r4, [%[a], #88]\n\t"
9376         "ldr	r5, [%[a], #92]\n\t"
9377         "ldr	r6, [%[b], #88]\n\t"
9378         "ldr	r8, [%[b], #92]\n\t"
9379         "sbcs	r4, r4, r6\n\t"
9380         "sbcs	r5, r5, r8\n\t"
9381         "str	r4, [%[r], #88]\n\t"
9382         "str	r5, [%[r], #92]\n\t"
9383         "ldr	r4, [%[a], #96]\n\t"
9384         "ldr	r5, [%[a], #100]\n\t"
9385         "ldr	r6, [%[b], #96]\n\t"
9386         "ldr	r8, [%[b], #100]\n\t"
9387         "sbcs	r4, r4, r6\n\t"
9388         "sbcs	r5, r5, r8\n\t"
9389         "str	r4, [%[r], #96]\n\t"
9390         "str	r5, [%[r], #100]\n\t"
9391         "ldr	r4, [%[a], #104]\n\t"
9392         "ldr	r5, [%[a], #108]\n\t"
9393         "ldr	r6, [%[b], #104]\n\t"
9394         "ldr	r8, [%[b], #108]\n\t"
9395         "sbcs	r4, r4, r6\n\t"
9396         "sbcs	r5, r5, r8\n\t"
9397         "str	r4, [%[r], #104]\n\t"
9398         "str	r5, [%[r], #108]\n\t"
9399         "ldr	r4, [%[a], #112]\n\t"
9400         "ldr	r5, [%[a], #116]\n\t"
9401         "ldr	r6, [%[b], #112]\n\t"
9402         "ldr	r8, [%[b], #116]\n\t"
9403         "sbcs	r4, r4, r6\n\t"
9404         "sbcs	r5, r5, r8\n\t"
9405         "str	r4, [%[r], #112]\n\t"
9406         "str	r5, [%[r], #116]\n\t"
9407         "ldr	r4, [%[a], #120]\n\t"
9408         "ldr	r5, [%[a], #124]\n\t"
9409         "ldr	r6, [%[b], #120]\n\t"
9410         "ldr	r8, [%[b], #124]\n\t"
9411         "sbcs	r4, r4, r6\n\t"
9412         "sbcs	r5, r5, r8\n\t"
9413         "str	r4, [%[r], #120]\n\t"
9414         "str	r5, [%[r], #124]\n\t"
9415         "sbc	%[c], %[c], %[c]\n\t"
9416         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
9417         :
9418         : "memory", "r4", "r5", "r6", "r8"
9419     );
9420 
9421     return c;
9422 }
9423 
9424 #endif /* WOLFSSL_SP_SMALL */
9425 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
9426  *
9427  * d1   The high order half of the number to divide.
9428  * d0   The low order half of the number to divide.
9429  * div  The dividend.
9430  * returns the result of the division.
9431  *
9432  * Note that this is an approximate div. It may give an answer 1 larger.
9433  */
div_3072_word_96(sp_digit d1,sp_digit d0,sp_digit div)9434 SP_NOINLINE static sp_digit div_3072_word_96(sp_digit d1, sp_digit d0,
9435         sp_digit div)
9436 {
9437     sp_digit r = 0;
9438 
9439     __asm__ __volatile__ (
9440         "lsr	r6, %[div], #16\n\t"
9441         "add	r6, r6, #1\n\t"
9442         "udiv	r4, %[d1], r6\n\t"
9443         "lsl	r8, r4, #16\n\t"
9444         "umull	r4, r5, %[div], r8\n\t"
9445         "subs	%[d0], %[d0], r4\n\t"
9446         "sbc	%[d1], %[d1], r5\n\t"
9447         "udiv	r5, %[d1], r6\n\t"
9448         "lsl	r4, r5, #16\n\t"
9449         "add	r8, r8, r4\n\t"
9450         "umull	r4, r5, %[div], r4\n\t"
9451         "subs	%[d0], %[d0], r4\n\t"
9452         "sbc	%[d1], %[d1], r5\n\t"
9453         "lsl	r4, %[d1], #16\n\t"
9454         "orr	r4, r4, %[d0], lsr #16\n\t"
9455         "udiv	r4, r4, r6\n\t"
9456         "add	r8, r8, r4\n\t"
9457         "umull	r4, r5, %[div], r4\n\t"
9458         "subs	%[d0], %[d0], r4\n\t"
9459         "sbc	%[d1], %[d1], r5\n\t"
9460         "lsl	r4, %[d1], #16\n\t"
9461         "orr	r4, r4, %[d0], lsr #16\n\t"
9462         "udiv	r4, r4, r6\n\t"
9463         "add	r8, r8, r4\n\t"
9464         "umull	r4, r5, %[div], r4\n\t"
9465         "subs	%[d0], %[d0], r4\n\t"
9466         "sbc	%[d1], %[d1], r5\n\t"
9467         "udiv	r4, %[d0], %[div]\n\t"
9468         "add	r8, r8, r4\n\t"
9469         "mov	%[r], r8\n\t"
9470         : [r] "+r" (r)
9471         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
9472         : "r4", "r5", "r6", "r8"
9473     );
9474     return r;
9475 }
9476 
9477 /* Divide d in a and put remainder into r (m*d + r = a)
9478  * m is not calculated as it is not needed at this time.
9479  *
9480  * a  Number to be divided.
9481  * d  Number to divide with.
9482  * m  Multiplier result.
9483  * r  Remainder from the division.
9484  * returns MP_OKAY indicating success.
9485  */
sp_3072_div_96_cond(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)9486 static WC_INLINE int sp_3072_div_96_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
9487         sp_digit* r)
9488 {
9489     sp_digit t1[192], t2[97];
9490     sp_digit div, r1;
9491     int i;
9492 
9493     (void)m;
9494 
9495     div = d[95];
9496     XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
9497     for (i=95; i>=0; i--) {
9498         sp_digit hi = t1[96 + i] - (t1[96 + i] == div);
9499         r1 = div_3072_word_96(hi, t1[96 + i - 1], div);
9500 
9501         sp_3072_mul_d_96(t2, d, r1);
9502         t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
9503         t1[96 + i] -= t2[96];
9504         if (t1[96 + i] != 0) {
9505             t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
9506             if (t1[96 + i] != 0)
9507                 t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], d);
9508         }
9509     }
9510 
9511     for (i = 95; i > 0; i--) {
9512         if (t1[i] != d[i])
9513             break;
9514     }
9515     if (t1[i] >= d[i]) {
9516         sp_3072_sub_96(r, t1, d);
9517     }
9518     else {
9519         XMEMCPY(r, t1, sizeof(*t1) * 96);
9520     }
9521 
9522     return MP_OKAY;
9523 }
9524 
9525 /* Reduce a modulo m into r. (r = a mod m)
9526  *
9527  * r  A single precision number that is the reduced result.
9528  * a  A single precision number that is to be reduced.
9529  * m  A single precision number that is the modulus to reduce with.
9530  * returns MP_OKAY indicating success.
9531  */
sp_3072_mod_96_cond(sp_digit * r,const sp_digit * a,const sp_digit * m)9532 static WC_INLINE int sp_3072_mod_96_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
9533 {
9534     return sp_3072_div_96_cond(a, m, NULL, r);
9535 }
9536 
9537 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
9538 /* AND m into each word of a and store in r.
9539  *
9540  * r  A single precision integer.
9541  * a  A single precision integer.
9542  * m  Mask to AND against each digit.
9543  */
sp_3072_mask_96(sp_digit * r,const sp_digit * a,sp_digit m)9544 static void sp_3072_mask_96(sp_digit* r, const sp_digit* a, sp_digit m)
9545 {
9546 #ifdef WOLFSSL_SP_SMALL
9547     int i;
9548 
9549     for (i=0; i<96; i++) {
9550         r[i] = a[i] & m;
9551     }
9552 #else
9553     int i;
9554 
9555     for (i = 0; i < 96; i += 8) {
9556         r[i+0] = a[i+0] & m;
9557         r[i+1] = a[i+1] & m;
9558         r[i+2] = a[i+2] & m;
9559         r[i+3] = a[i+3] & m;
9560         r[i+4] = a[i+4] & m;
9561         r[i+5] = a[i+5] & m;
9562         r[i+6] = a[i+6] & m;
9563         r[i+7] = a[i+7] & m;
9564     }
9565 #endif
9566 }
9567 
9568 /* Compare a with b in constant time.
9569  *
9570  * a  A single precision integer.
9571  * b  A single precision integer.
9572  * return -ve, 0 or +ve if a is less than, equal to or greater than b
9573  * respectively.
9574  */
sp_3072_cmp_96(const sp_digit * a,const sp_digit * b)9575 SP_NOINLINE static sp_int32 sp_3072_cmp_96(const sp_digit* a, const sp_digit* b)
9576 {
9577     sp_digit r = 0;
9578 
9579 
9580     __asm__ __volatile__ (
9581         "mov	r3, #0\n\t"
9582         "mvn	r3, r3\n\t"
9583         "mov	r6, #1\n\t"
9584         "lsl	r6, r6, #8\n\t"
9585         "add	r6, r6, #124\n\t"
9586         "\n1:\n\t"
9587         "ldr	r8, [%[a], r6]\n\t"
9588         "ldr	r5, [%[b], r6]\n\t"
9589         "and	r8, r8, r3\n\t"
9590         "and	r5, r5, r3\n\t"
9591         "mov	r4, r8\n\t"
9592         "subs	r8, r8, r5\n\t"
9593         "sbc	r8, r8, r8\n\t"
9594         "add	%[r], %[r], r8\n\t"
9595         "mvn	r8, r8\n\t"
9596         "and	r3, r3, r8\n\t"
9597         "subs	r5, r5, r4\n\t"
9598         "sbc	r8, r8, r8\n\t"
9599         "sub	%[r], %[r], r8\n\t"
9600         "mvn	r8, r8\n\t"
9601         "and	r3, r3, r8\n\t"
9602         "sub	r6, r6, #4\n\t"
9603         "cmp	r6, #0\n\t"
9604 #ifdef __GNUC__
9605         "bge	1b\n\t"
9606 #else
9607         "bge.n	1b\n\t"
9608 #endif /* __GNUC__ */
9609         : [r] "+r" (r)
9610         : [a] "r" (a), [b] "r" (b)
9611         : "r3", "r4", "r5", "r6", "r8"
9612     );
9613 
9614     return r;
9615 }
9616 
9617 /* Divide d in a and put remainder into r (m*d + r = a)
9618  * m is not calculated as it is not needed at this time.
9619  *
9620  * a  Number to be divided.
9621  * d  Number to divide with.
9622  * m  Multiplier result.
9623  * r  Remainder from the division.
9624  * returns MP_OKAY indicating success.
9625  */
sp_3072_div_96(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)9626 static WC_INLINE int sp_3072_div_96(const sp_digit* a, const sp_digit* d, sp_digit* m,
9627         sp_digit* r)
9628 {
9629     sp_digit t1[192], t2[97];
9630     sp_digit div, r1;
9631     int i;
9632 
9633     (void)m;
9634 
9635     div = d[95];
9636     XMEMCPY(t1, a, sizeof(*t1) * 2 * 96);
9637     for (i=95; i>=0; i--) {
9638         sp_digit hi = t1[96 + i] - (t1[96 + i] == div);
9639         r1 = div_3072_word_96(hi, t1[96 + i - 1], div);
9640 
9641         sp_3072_mul_d_96(t2, d, r1);
9642         t1[96 + i] += sp_3072_sub_in_place_96(&t1[i], t2);
9643         t1[96 + i] -= t2[96];
9644         sp_3072_mask_96(t2, d, t1[96 + i]);
9645         t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
9646         sp_3072_mask_96(t2, d, t1[96 + i]);
9647         t1[96 + i] += sp_3072_add_96(&t1[i], &t1[i], t2);
9648     }
9649 
9650     r1 = sp_3072_cmp_96(t1, d) >= 0;
9651     sp_3072_cond_sub_96(r, t1, d, (sp_digit)0 - r1);
9652 
9653     return MP_OKAY;
9654 }
9655 
9656 /* Reduce a modulo m into r. (r = a mod m)
9657  *
9658  * r  A single precision number that is the reduced result.
9659  * a  A single precision number that is to be reduced.
9660  * m  A single precision number that is the modulus to reduce with.
9661  * returns MP_OKAY indicating success.
9662  */
sp_3072_mod_96(sp_digit * r,const sp_digit * a,const sp_digit * m)9663 static WC_INLINE int sp_3072_mod_96(sp_digit* r, const sp_digit* a, const sp_digit* m)
9664 {
9665     return sp_3072_div_96(a, m, NULL, r);
9666 }
9667 
9668 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
9669                                                      defined(WOLFSSL_HAVE_SP_DH)
9670 #ifdef WOLFSSL_SP_SMALL
9671 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
9672  *
9673  * r     A single precision number that is the result of the operation.
9674  * a     A single precision number being exponentiated.
9675  * e     A single precision number that is the exponent.
9676  * bits  The number of bits in the exponent.
9677  * m     A single precision number that is the modulus.
9678  * returns  0 on success.
9679  * returns  MEMORY_E on dynamic memory allocation failure.
9680  * returns  MP_VAL when base is even or exponent is 0.
9681  */
sp_3072_mod_exp_96(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)9682 static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
9683         int bits, const sp_digit* m, int reduceA)
9684 {
9685 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9686     sp_digit* td = NULL;
9687 #else
9688     sp_digit td[8 * 192];
9689 #endif
9690     sp_digit* t[8];
9691     sp_digit* norm = NULL;
9692     sp_digit mp = 1;
9693     sp_digit n;
9694     sp_digit mask;
9695     int i;
9696     int c;
9697     byte y;
9698     int err = MP_OKAY;
9699 
9700     if ((m[0] & 1) == 0) {
9701         err = MP_VAL;
9702     }
9703     else if (bits == 0) {
9704         err = MP_VAL;
9705     }
9706 
9707 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9708     if (err == MP_OKAY) {
9709         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 192), NULL,
9710                                 DYNAMIC_TYPE_TMP_BUFFER);
9711         if (td == NULL)
9712             err = MEMORY_E;
9713     }
9714 #endif
9715 
9716     if (err == MP_OKAY) {
9717         norm = td;
9718         for (i=0; i<8; i++) {
9719             t[i] = td + i * 192;
9720         }
9721 
9722         sp_3072_mont_setup(m, &mp);
9723         sp_3072_mont_norm_96(norm, m);
9724 
9725         XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
9726         if (reduceA != 0) {
9727             err = sp_3072_mod_96(t[1] + 96, a, m);
9728             if (err == MP_OKAY) {
9729                 err = sp_3072_mod_96(t[1], t[1], m);
9730             }
9731         }
9732         else {
9733             XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
9734             err = sp_3072_mod_96(t[1], t[1], m);
9735         }
9736     }
9737 
9738     if (err == MP_OKAY) {
9739         sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
9740         sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
9741         sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
9742         sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
9743         sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
9744         sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
9745 
9746         i = (bits - 1) / 32;
9747         n = e[i--];
9748         c = bits & 31;
9749         if (c == 0) {
9750             c = 32;
9751         }
9752         c -= bits % 3;
9753         if (c == 32) {
9754             c = 29;
9755         }
9756         if (c < 0) {
9757             /* Number of bits in top word is less than number needed. */
9758             c = -c;
9759             y = (byte)(n << c);
9760             n = e[i--];
9761             y |= (byte)(n >> (64 - c));
9762             n <<= c;
9763             c = 64 - c;
9764         }
9765         else {
9766             y = (byte)(n >> c);
9767             n <<= 32 - c;
9768         }
9769         XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
9770         for (; i>=0 || c>=3; ) {
9771             if (c == 0) {
9772                 n = e[i--];
9773                 y = (byte)(n >> 29);
9774                 n <<= 3;
9775                 c = 29;
9776             }
9777             else if (c < 3) {
9778                 y = (byte)(n >> 29);
9779                 n = e[i--];
9780                 c = 3 - c;
9781                 y |= (byte)(n >> (32 - c));
9782                 n <<= c;
9783                 c = 32 - c;
9784             }
9785             else {
9786                 y = (byte)((n >> 29) & 0x7);
9787                 n <<= 3;
9788                 c -= 3;
9789             }
9790 
9791             sp_3072_mont_sqr_96(r, r, m, mp);
9792             sp_3072_mont_sqr_96(r, r, m, mp);
9793             sp_3072_mont_sqr_96(r, r, m, mp);
9794 
9795             sp_3072_mont_mul_96(r, r, t[y], m, mp);
9796         }
9797 
9798         XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
9799         sp_3072_mont_reduce_96(r, m, mp);
9800 
9801         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
9802         sp_3072_cond_sub_96(r, r, m, mask);
9803     }
9804 
9805 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9806     if (td != NULL)
9807         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
9808 #endif
9809 
9810     return err;
9811 }
9812 #else
9813 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
9814  *
9815  * r     A single precision number that is the result of the operation.
9816  * a     A single precision number being exponentiated.
9817  * e     A single precision number that is the exponent.
9818  * bits  The number of bits in the exponent.
9819  * m     A single precision number that is the modulus.
9820  * returns  0 on success.
9821  * returns  MEMORY_E on dynamic memory allocation failure.
9822  * returns  MP_VAL when base is even or exponent is 0.
9823  */
sp_3072_mod_exp_96(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)9824 static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e,
9825         int bits, const sp_digit* m, int reduceA)
9826 {
9827 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9828     sp_digit* td = NULL;
9829 #else
9830     sp_digit td[16 * 192];
9831 #endif
9832     sp_digit* t[16];
9833     sp_digit* norm = NULL;
9834     sp_digit mp = 1;
9835     sp_digit n;
9836     sp_digit mask;
9837     int i;
9838     int c;
9839     byte y;
9840     int err = MP_OKAY;
9841 
9842     if ((m[0] & 1) == 0) {
9843         err = MP_VAL;
9844     }
9845     else if (bits == 0) {
9846         err = MP_VAL;
9847     }
9848 
9849 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9850     if (err == MP_OKAY) {
9851         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 192), NULL,
9852                                 DYNAMIC_TYPE_TMP_BUFFER);
9853         if (td == NULL)
9854             err = MEMORY_E;
9855     }
9856 #endif
9857 
9858     if (err == MP_OKAY) {
9859         norm = td;
9860         for (i=0; i<16; i++) {
9861             t[i] = td + i * 192;
9862         }
9863 
9864         sp_3072_mont_setup(m, &mp);
9865         sp_3072_mont_norm_96(norm, m);
9866 
9867         XMEMSET(t[1], 0, sizeof(sp_digit) * 96U);
9868         if (reduceA != 0) {
9869             err = sp_3072_mod_96(t[1] + 96, a, m);
9870             if (err == MP_OKAY) {
9871                 err = sp_3072_mod_96(t[1], t[1], m);
9872             }
9873         }
9874         else {
9875             XMEMCPY(t[1] + 96, a, sizeof(sp_digit) * 96);
9876             err = sp_3072_mod_96(t[1], t[1], m);
9877         }
9878     }
9879 
9880     if (err == MP_OKAY) {
9881         sp_3072_mont_sqr_96(t[ 2], t[ 1], m, mp);
9882         sp_3072_mont_mul_96(t[ 3], t[ 2], t[ 1], m, mp);
9883         sp_3072_mont_sqr_96(t[ 4], t[ 2], m, mp);
9884         sp_3072_mont_mul_96(t[ 5], t[ 3], t[ 2], m, mp);
9885         sp_3072_mont_sqr_96(t[ 6], t[ 3], m, mp);
9886         sp_3072_mont_mul_96(t[ 7], t[ 4], t[ 3], m, mp);
9887         sp_3072_mont_sqr_96(t[ 8], t[ 4], m, mp);
9888         sp_3072_mont_mul_96(t[ 9], t[ 5], t[ 4], m, mp);
9889         sp_3072_mont_sqr_96(t[10], t[ 5], m, mp);
9890         sp_3072_mont_mul_96(t[11], t[ 6], t[ 5], m, mp);
9891         sp_3072_mont_sqr_96(t[12], t[ 6], m, mp);
9892         sp_3072_mont_mul_96(t[13], t[ 7], t[ 6], m, mp);
9893         sp_3072_mont_sqr_96(t[14], t[ 7], m, mp);
9894         sp_3072_mont_mul_96(t[15], t[ 8], t[ 7], m, mp);
9895 
9896         i = (bits - 1) / 32;
9897         n = e[i--];
9898         c = bits & 31;
9899         if (c == 0) {
9900             c = 32;
9901         }
9902         c -= bits % 4;
9903         if (c == 32) {
9904             c = 28;
9905         }
9906         if (c < 0) {
9907             /* Number of bits in top word is less than number needed. */
9908             c = -c;
9909             y = (byte)(n << c);
9910             n = e[i--];
9911             y |= (byte)(n >> (64 - c));
9912             n <<= c;
9913             c = 64 - c;
9914         }
9915         else {
9916             y = (byte)(n >> c);
9917             n <<= 32 - c;
9918         }
9919         XMEMCPY(r, t[y], sizeof(sp_digit) * 96);
9920         for (; i>=0 || c>=4; ) {
9921             if (c == 0) {
9922                 n = e[i--];
9923                 y = (byte)(n >> 28);
9924                 n <<= 4;
9925                 c = 28;
9926             }
9927             else if (c < 4) {
9928                 y = (byte)(n >> 28);
9929                 n = e[i--];
9930                 c = 4 - c;
9931                 y |= (byte)(n >> (32 - c));
9932                 n <<= c;
9933                 c = 32 - c;
9934             }
9935             else {
9936                 y = (byte)((n >> 28) & 0xf);
9937                 n <<= 4;
9938                 c -= 4;
9939             }
9940 
9941             sp_3072_mont_sqr_96(r, r, m, mp);
9942             sp_3072_mont_sqr_96(r, r, m, mp);
9943             sp_3072_mont_sqr_96(r, r, m, mp);
9944             sp_3072_mont_sqr_96(r, r, m, mp);
9945 
9946             sp_3072_mont_mul_96(r, r, t[y], m, mp);
9947         }
9948 
9949         XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
9950         sp_3072_mont_reduce_96(r, m, mp);
9951 
9952         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
9953         sp_3072_cond_sub_96(r, r, m, mask);
9954     }
9955 
9956 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9957     if (td != NULL)
9958         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
9959 #endif
9960 
9961     return err;
9962 }
9963 #endif /* WOLFSSL_SP_SMALL */
9964 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
9965 
9966 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
9967 #ifdef WOLFSSL_HAVE_SP_RSA
9968 /* RSA public key operation.
9969  *
9970  * in      Array of bytes representing the number to exponentiate, base.
9971  * inLen   Number of bytes in base.
9972  * em      Public exponent.
9973  * mm      Modulus.
9974  * out     Buffer to hold big-endian bytes of exponentiation result.
9975  *         Must be at least 384 bytes long.
9976  * outLen  Number of bytes in result.
9977  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
9978  * an array is too long and MEMORY_E when dynamic memory allocation fails.
9979  */
sp_RsaPublic_3072(const byte * in,word32 inLen,const mp_int * em,const mp_int * mm,byte * out,word32 * outLen)9980 int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em,
9981     const mp_int* mm, byte* out, word32* outLen)
9982 {
9983 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9984     sp_digit* a = NULL;
9985 #else
9986     sp_digit a[96 * 5];
9987 #endif
9988     sp_digit* m = NULL;
9989     sp_digit* r = NULL;
9990     sp_digit *ah = NULL;
9991     sp_digit e[1] = {0};
9992     int err = MP_OKAY;
9993 
9994     if (*outLen < 384) {
9995         err = MP_TO_E;
9996     }
9997     else if (mp_count_bits(em) > 32 || inLen > 384 ||
9998                                                      mp_count_bits(mm) != 3072) {
9999         err = MP_READ_E;
10000     }
10001     else if (mp_iseven(mm)) {
10002         err = MP_VAL;
10003     }
10004 
10005 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10006     if (err == MP_OKAY) {
10007         a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 5, NULL,
10008                                                               DYNAMIC_TYPE_RSA);
10009         if (a == NULL)
10010             err = MEMORY_E;
10011     }
10012 #endif
10013 
10014     if (err == MP_OKAY) {
10015         r = a + 96 * 2;
10016         m = r + 96 * 2;
10017         ah = a + 96;
10018 
10019         sp_3072_from_bin(ah, 96, in, inLen);
10020 #if DIGIT_BIT >= 32
10021         e[0] = em->dp[0];
10022 #else
10023         e[0] = em->dp[0];
10024         if (em->used > 1) {
10025             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
10026         }
10027 #endif
10028         if (e[0] == 0) {
10029             err = MP_EXPTMOD_E;
10030         }
10031     }
10032     if (err == MP_OKAY) {
10033         sp_3072_from_mp(m, 96, mm);
10034 
10035         if (e[0] == 0x3) {
10036             if (err == MP_OKAY) {
10037                 sp_3072_sqr_96(r, ah);
10038                 err = sp_3072_mod_96_cond(r, r, m);
10039             }
10040             if (err == MP_OKAY) {
10041                 sp_3072_mul_96(r, ah, r);
10042                 err = sp_3072_mod_96_cond(r, r, m);
10043             }
10044         }
10045         else {
10046             int i;
10047             sp_digit mp;
10048 
10049             sp_3072_mont_setup(m, &mp);
10050 
10051             /* Convert to Montgomery form. */
10052             XMEMSET(a, 0, sizeof(sp_digit) * 96);
10053             err = sp_3072_mod_96_cond(a, a, m);
10054 
10055             if (err == MP_OKAY) {
10056                 for (i = 31; i >= 0; i--) {
10057                     if (e[0] >> i) {
10058                         break;
10059                     }
10060                 }
10061 
10062                 XMEMCPY(r, a, sizeof(sp_digit) * 96);
10063                 for (i--; i>=0; i--) {
10064                     sp_3072_mont_sqr_96(r, r, m, mp);
10065                     if (((e[0] >> i) & 1) == 1) {
10066                         sp_3072_mont_mul_96(r, r, a, m, mp);
10067                     }
10068                 }
10069                 XMEMSET(&r[96], 0, sizeof(sp_digit) * 96);
10070                 sp_3072_mont_reduce_96(r, m, mp);
10071 
10072                 for (i = 95; i > 0; i--) {
10073                     if (r[i] != m[i]) {
10074                         break;
10075                     }
10076                 }
10077                 if (r[i] >= m[i]) {
10078                     sp_3072_sub_in_place_96(r, m);
10079                 }
10080             }
10081         }
10082     }
10083 
10084     if (err == MP_OKAY) {
10085         sp_3072_to_bin_96(r, out);
10086         *outLen = 384;
10087     }
10088 
10089 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10090     if (a != NULL)
10091         XFREE(a, NULL, DYNAMIC_TYPE_RSA);
10092 #endif
10093 
10094     return err;
10095 }
10096 
10097 #ifndef WOLFSSL_RSA_PUBLIC_ONLY
10098 /* Conditionally add a and b using the mask m.
10099  * m is -1 to add and 0 when not.
10100  *
10101  * r  A single precision number representing conditional add result.
10102  * a  A single precision number to add with.
10103  * b  A single precision number to add.
10104  * m  Mask value to apply.
10105  */
sp_3072_cond_add_48(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)10106 SP_NOINLINE static sp_digit sp_3072_cond_add_48(sp_digit* r, const sp_digit* a, const sp_digit* b,
10107         sp_digit m)
10108 {
10109     sp_digit c = 0;
10110 
10111     __asm__ __volatile__ (
10112         "mov	r5, #192\n\t"
10113         "mov	r9, r5\n\t"
10114         "mov	r8, #0\n\t"
10115         "\n1:\n\t"
10116         "ldr	r6, [%[b], r8]\n\t"
10117         "and	r6, r6, %[m]\n\t"
10118         "adds	r5, %[c], #-1\n\t"
10119         "ldr	r5, [%[a], r8]\n\t"
10120         "adcs	r5, r5, r6\n\t"
10121         "mov	%[c], #0\n\t"
10122         "adcs	%[c], %[c], %[c]\n\t"
10123         "str	r5, [%[r], r8]\n\t"
10124         "add	r8, r8, #4\n\t"
10125         "cmp	r8, r9\n\t"
10126 #ifdef __GNUC__
10127         "blt	1b\n\t"
10128 #else
10129         "blt.n	1b\n\t"
10130 #endif /* __GNUC__ */
10131         : [c] "+r" (c)
10132         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
10133         : "memory", "r5", "r6", "r8", "r9"
10134     );
10135 
10136     return c;
10137 }
10138 
10139 /* RSA private key operation.
10140  *
10141  * in      Array of bytes representing the number to exponentiate, base.
10142  * inLen   Number of bytes in base.
10143  * dm      Private exponent.
10144  * pm      First prime.
10145  * qm      Second prime.
10146  * dpm     First prime's CRT exponent.
10147  * dqm     Second prime's CRT exponent.
10148  * qim     Inverse of second prime mod p.
10149  * mm      Modulus.
10150  * out     Buffer to hold big-endian bytes of exponentiation result.
10151  *         Must be at least 384 bytes long.
10152  * outLen  Number of bytes in result.
10153  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
10154  * an array is too long and MEMORY_E when dynamic memory allocation fails.
10155  */
sp_RsaPrivate_3072(const byte * in,word32 inLen,const mp_int * dm,const mp_int * pm,const mp_int * qm,const mp_int * dpm,const mp_int * dqm,const mp_int * qim,const mp_int * mm,byte * out,word32 * outLen)10156 int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm,
10157     const mp_int* pm, const mp_int* qm, const mp_int* dpm, const mp_int* dqm,
10158     const mp_int* qim, const mp_int* mm, byte* out, word32* outLen)
10159 {
10160 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
10161 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10162     sp_digit* d = NULL;
10163 #else
10164     sp_digit  d[96 * 4];
10165 #endif
10166     sp_digit* a = NULL;
10167     sp_digit* m = NULL;
10168     sp_digit* r = NULL;
10169     int err = MP_OKAY;
10170 
10171     (void)pm;
10172     (void)qm;
10173     (void)dpm;
10174     (void)dqm;
10175     (void)qim;
10176 
10177     if (*outLen < 384U) {
10178         err = MP_TO_E;
10179     }
10180     if (err == MP_OKAY) {
10181         if (mp_count_bits(dm) > 3072) {
10182            err = MP_READ_E;
10183         }
10184         else if (inLen > 384) {
10185             err = MP_READ_E;
10186         }
10187         else if (mp_count_bits(mm) != 3072) {
10188             err = MP_READ_E;
10189         }
10190         else if (mp_iseven(mm)) {
10191             err = MP_VAL;
10192         }
10193     }
10194 
10195 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10196     if (err == MP_OKAY) {
10197         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 96 * 4, NULL,
10198                                                               DYNAMIC_TYPE_RSA);
10199         if (d == NULL)
10200             err = MEMORY_E;
10201     }
10202 #endif
10203 
10204     if (err == MP_OKAY) {
10205         a = d + 96;
10206         m = a + 192;
10207         r = a;
10208 
10209         sp_3072_from_bin(a, 96, in, inLen);
10210         sp_3072_from_mp(d, 96, dm);
10211         sp_3072_from_mp(m, 96, mm);
10212         err = sp_3072_mod_exp_96(r, a, d, 3072, m, 0);
10213     }
10214 
10215     if (err == MP_OKAY) {
10216         sp_3072_to_bin_96(r, out);
10217         *outLen = 384;
10218     }
10219 
10220 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10221     if (d != NULL)
10222 #endif
10223     {
10224         /* only "a" and "r" are sensitive and need zeroized (same pointer) */
10225         if (a != NULL)
10226             ForceZero(a, sizeof(sp_digit) * 96);
10227 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10228         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
10229 #endif
10230     }
10231 
10232     return err;
10233 #else
10234 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10235     sp_digit* a = NULL;
10236 #else
10237     sp_digit a[48 * 11];
10238 #endif
10239     sp_digit* p = NULL;
10240     sp_digit* q = NULL;
10241     sp_digit* dp = NULL;
10242     sp_digit* tmpa = NULL;
10243     sp_digit* tmpb = NULL;
10244     sp_digit* r = NULL;
10245     sp_digit* qi = NULL;
10246     sp_digit* dq = NULL;
10247     sp_digit c;
10248     int err = MP_OKAY;
10249 
10250     (void)dm;
10251     (void)mm;
10252 
10253     if (*outLen < 384) {
10254         err = MP_TO_E;
10255     }
10256     else if (inLen > 384 || mp_count_bits(mm) != 3072) {
10257         err = MP_READ_E;
10258     }
10259     else if (mp_iseven(mm)) {
10260         err = MP_VAL;
10261     }
10262 
10263 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10264     if (err == MP_OKAY) {
10265         a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 48 * 11, NULL,
10266                                                               DYNAMIC_TYPE_RSA);
10267         if (a == NULL)
10268             err = MEMORY_E;
10269     }
10270 #endif
10271 
10272     if (err == MP_OKAY) {
10273         p = a + 96 * 2;
10274         q = p + 48;
10275         qi = dq = dp = q + 48;
10276         tmpa = qi + 48;
10277         tmpb = tmpa + 96;
10278         r = a;
10279 
10280         sp_3072_from_bin(a, 96, in, inLen);
10281         sp_3072_from_mp(p, 48, pm);
10282         sp_3072_from_mp(q, 48, qm);
10283         sp_3072_from_mp(dp, 48, dpm);
10284 
10285         err = sp_3072_mod_exp_48(tmpa, a, dp, 1536, p, 1);
10286     }
10287     if (err == MP_OKAY) {
10288         sp_3072_from_mp(dq, 48, dqm);
10289         err = sp_3072_mod_exp_48(tmpb, a, dq, 1536, q, 1);
10290     }
10291 
10292     if (err == MP_OKAY) {
10293         c = sp_3072_sub_in_place_48(tmpa, tmpb);
10294         c += sp_3072_cond_add_48(tmpa, tmpa, p, c);
10295         sp_3072_cond_add_48(tmpa, tmpa, p, c);
10296 
10297         sp_3072_from_mp(qi, 48, qim);
10298         sp_3072_mul_48(tmpa, tmpa, qi);
10299         err = sp_3072_mod_48(tmpa, tmpa, p);
10300     }
10301 
10302     if (err == MP_OKAY) {
10303         sp_3072_mul_48(tmpa, q, tmpa);
10304         XMEMSET(&tmpb[48], 0, sizeof(sp_digit) * 48);
10305         sp_3072_add_96(r, tmpb, tmpa);
10306 
10307         sp_3072_to_bin_96(r, out);
10308         *outLen = 384;
10309     }
10310 
10311 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10312     if (a != NULL)
10313 #endif
10314     {
10315         ForceZero(a, sizeof(sp_digit) * 48 * 11);
10316     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10317         XFREE(a, NULL, DYNAMIC_TYPE_RSA);
10318     #endif
10319     }
10320 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
10321     return err;
10322 }
10323 #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
10324 #endif /* WOLFSSL_HAVE_SP_RSA */
10325 #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
10326                                               !defined(WOLFSSL_RSA_PUBLIC_ONLY))
10327 /* Convert an array of sp_digit to an mp_int.
10328  *
10329  * a  A single precision integer.
10330  * r  A multi-precision integer.
10331  */
sp_3072_to_mp(const sp_digit * a,mp_int * r)10332 static int sp_3072_to_mp(const sp_digit* a, mp_int* r)
10333 {
10334     int err;
10335 
10336     err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT);
10337     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
10338 #if DIGIT_BIT == 32
10339         XMEMCPY(r->dp, a, sizeof(sp_digit) * 96);
10340         r->used = 96;
10341         mp_clamp(r);
10342 #elif DIGIT_BIT < 32
10343         int i;
10344         int j = 0;
10345         int s = 0;
10346 
10347         r->dp[0] = 0;
10348         for (i = 0; i < 96; i++) {
10349             r->dp[j] |= (mp_digit)(a[i] << s);
10350             r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
10351             s = DIGIT_BIT - s;
10352             r->dp[++j] = (mp_digit)(a[i] >> s);
10353             while (s + DIGIT_BIT <= 32) {
10354                 s += DIGIT_BIT;
10355                 r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1;
10356                 if (s == SP_WORD_SIZE) {
10357                     r->dp[j] = 0;
10358                 }
10359                 else {
10360                     r->dp[j] = (mp_digit)(a[i] >> s);
10361                 }
10362             }
10363             s = 32 - s;
10364         }
10365         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
10366         mp_clamp(r);
10367 #else
10368         int i;
10369         int j = 0;
10370         int s = 0;
10371 
10372         r->dp[0] = 0;
10373         for (i = 0; i < 96; i++) {
10374             r->dp[j] |= ((mp_digit)a[i]) << s;
10375             if (s + 32 >= DIGIT_BIT) {
10376     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
10377                 r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
10378     #endif
10379                 s = DIGIT_BIT - s;
10380                 r->dp[++j] = a[i] >> s;
10381                 s = 32 - s;
10382             }
10383             else {
10384                 s += 32;
10385             }
10386         }
10387         r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT;
10388         mp_clamp(r);
10389 #endif
10390     }
10391 
10392     return err;
10393 }
10394 
10395 /* Perform the modular exponentiation for Diffie-Hellman.
10396  *
10397  * base  Base. MP integer.
10398  * exp   Exponent. MP integer.
10399  * mod   Modulus. MP integer.
10400  * res   Result. MP integer.
10401  * returns 0 on success, MP_READ_E if there are too many bytes in an array
10402  * and MEMORY_E if memory allocation fails.
10403  */
sp_ModExp_3072(const mp_int * base,const mp_int * exp,const mp_int * mod,mp_int * res)10404 int sp_ModExp_3072(const mp_int* base, const mp_int* exp, const mp_int* mod,
10405     mp_int* res)
10406 {
10407     int err = MP_OKAY;
10408     sp_digit b[192];
10409     sp_digit e[96];
10410     sp_digit m[96];
10411     sp_digit* r = b;
10412     int expBits = mp_count_bits(exp);
10413 
10414     if (mp_count_bits(base) > 3072) {
10415         err = MP_READ_E;
10416     }
10417     else if (expBits > 3072) {
10418         err = MP_READ_E;
10419     }
10420     else if (mp_count_bits(mod) != 3072) {
10421         err = MP_READ_E;
10422     }
10423     else if (mp_iseven(mod)) {
10424         err = MP_VAL;
10425     }
10426 
10427     if (err == MP_OKAY) {
10428         sp_3072_from_mp(b, 96, base);
10429         sp_3072_from_mp(e, 96, exp);
10430         sp_3072_from_mp(m, 96, mod);
10431 
10432         err = sp_3072_mod_exp_96(r, b, e, expBits, m, 0);
10433     }
10434 
10435     if (err == MP_OKAY) {
10436         err = sp_3072_to_mp(r, res);
10437     }
10438 
10439     XMEMSET(e, 0, sizeof(e));
10440 
10441     return err;
10442 }
10443 
10444 #ifdef WOLFSSL_HAVE_SP_DH
10445 
10446 #ifdef HAVE_FFDHE_3072
sp_3072_lshift_96(sp_digit * r,sp_digit * a,byte n)10447 static void sp_3072_lshift_96(sp_digit* r, sp_digit* a, byte n)
10448 {
10449     __asm__ __volatile__ (
10450         "mov r6, #31\n\t"
10451         "sub r6, r6, %[n]\n\t"
10452         "add       %[a], %[a], #320\n\t"
10453         "add       %[r], %[r], #320\n\t"
10454         "ldr r3, [%[a], #60]\n\t"
10455         "lsr r4, r3, #1\n\t"
10456         "lsl r3, r3, %[n]\n\t"
10457         "lsr r4, r4, r6\n\t"
10458         "ldr       r2, [%[a], #56]\n\t"
10459         "str       r4, [%[r], #64]\n\t"
10460         "lsr       r5, r2, #1\n\t"
10461         "lsl       r2, r2, %[n]\n\t"
10462         "lsr       r5, r5, r6\n\t"
10463         "orr       r3, r3, r5\n\t"
10464         "ldr       r4, [%[a], #52]\n\t"
10465         "str       r3, [%[r], #60]\n\t"
10466         "lsr       r5, r4, #1\n\t"
10467         "lsl       r4, r4, %[n]\n\t"
10468         "lsr       r5, r5, r6\n\t"
10469         "orr       r2, r2, r5\n\t"
10470         "ldr       r3, [%[a], #48]\n\t"
10471         "str       r2, [%[r], #56]\n\t"
10472         "lsr       r5, r3, #1\n\t"
10473         "lsl       r3, r3, %[n]\n\t"
10474         "lsr       r5, r5, r6\n\t"
10475         "orr       r4, r4, r5\n\t"
10476         "ldr       r2, [%[a], #44]\n\t"
10477         "str       r4, [%[r], #52]\n\t"
10478         "lsr       r5, r2, #1\n\t"
10479         "lsl       r2, r2, %[n]\n\t"
10480         "lsr       r5, r5, r6\n\t"
10481         "orr       r3, r3, r5\n\t"
10482         "ldr       r4, [%[a], #40]\n\t"
10483         "str       r3, [%[r], #48]\n\t"
10484         "lsr       r5, r4, #1\n\t"
10485         "lsl       r4, r4, %[n]\n\t"
10486         "lsr       r5, r5, r6\n\t"
10487         "orr       r2, r2, r5\n\t"
10488         "ldr       r3, [%[a], #36]\n\t"
10489         "str       r2, [%[r], #44]\n\t"
10490         "lsr       r5, r3, #1\n\t"
10491         "lsl       r3, r3, %[n]\n\t"
10492         "lsr       r5, r5, r6\n\t"
10493         "orr       r4, r4, r5\n\t"
10494         "ldr       r2, [%[a], #32]\n\t"
10495         "str       r4, [%[r], #40]\n\t"
10496         "lsr       r5, r2, #1\n\t"
10497         "lsl       r2, r2, %[n]\n\t"
10498         "lsr       r5, r5, r6\n\t"
10499         "orr       r3, r3, r5\n\t"
10500         "ldr       r4, [%[a], #28]\n\t"
10501         "str       r3, [%[r], #36]\n\t"
10502         "lsr       r5, r4, #1\n\t"
10503         "lsl       r4, r4, %[n]\n\t"
10504         "lsr       r5, r5, r6\n\t"
10505         "orr       r2, r2, r5\n\t"
10506         "ldr       r3, [%[a], #24]\n\t"
10507         "str       r2, [%[r], #32]\n\t"
10508         "lsr       r5, r3, #1\n\t"
10509         "lsl       r3, r3, %[n]\n\t"
10510         "lsr       r5, r5, r6\n\t"
10511         "orr       r4, r4, r5\n\t"
10512         "ldr       r2, [%[a], #20]\n\t"
10513         "str       r4, [%[r], #28]\n\t"
10514         "lsr       r5, r2, #1\n\t"
10515         "lsl       r2, r2, %[n]\n\t"
10516         "lsr       r5, r5, r6\n\t"
10517         "orr       r3, r3, r5\n\t"
10518         "ldr       r4, [%[a], #16]\n\t"
10519         "str       r3, [%[r], #24]\n\t"
10520         "lsr       r5, r4, #1\n\t"
10521         "lsl       r4, r4, %[n]\n\t"
10522         "lsr       r5, r5, r6\n\t"
10523         "orr       r2, r2, r5\n\t"
10524         "ldr       r3, [%[a], #12]\n\t"
10525         "str       r2, [%[r], #20]\n\t"
10526         "lsr       r5, r3, #1\n\t"
10527         "lsl       r3, r3, %[n]\n\t"
10528         "lsr       r5, r5, r6\n\t"
10529         "orr       r4, r4, r5\n\t"
10530         "ldr       r2, [%[a], #8]\n\t"
10531         "str       r4, [%[r], #16]\n\t"
10532         "lsr       r5, r2, #1\n\t"
10533         "lsl       r2, r2, %[n]\n\t"
10534         "lsr       r5, r5, r6\n\t"
10535         "orr       r3, r3, r5\n\t"
10536         "ldr       r4, [%[a], #4]\n\t"
10537         "str       r3, [%[r], #12]\n\t"
10538         "lsr       r5, r4, #1\n\t"
10539         "lsl       r4, r4, %[n]\n\t"
10540         "lsr       r5, r5, r6\n\t"
10541         "orr       r2, r2, r5\n\t"
10542         "ldr       r3, [%[a], #0]\n\t"
10543         "str       r2, [%[r], #8]\n\t"
10544         "lsr       r5, r3, #1\n\t"
10545         "lsl       r3, r3, %[n]\n\t"
10546         "lsr       r5, r5, r6\n\t"
10547         "orr       r4, r4, r5\n\t"
10548         "sub     %[a], %[a], #64\n\t"
10549         "sub     %[r], %[r], #64\n\t"
10550         "ldr       r2, [%[a], #60]\n\t"
10551         "str       r4, [%[r], #68]\n\t"
10552         "lsr       r5, r2, #1\n\t"
10553         "lsl       r2, r2, %[n]\n\t"
10554         "lsr       r5, r5, r6\n\t"
10555         "orr       r3, r3, r5\n\t"
10556         "ldr       r4, [%[a], #56]\n\t"
10557         "str       r3, [%[r], #64]\n\t"
10558         "lsr       r5, r4, #1\n\t"
10559         "lsl       r4, r4, %[n]\n\t"
10560         "lsr       r5, r5, r6\n\t"
10561         "orr       r2, r2, r5\n\t"
10562         "ldr       r3, [%[a], #52]\n\t"
10563         "str       r2, [%[r], #60]\n\t"
10564         "lsr       r5, r3, #1\n\t"
10565         "lsl       r3, r3, %[n]\n\t"
10566         "lsr       r5, r5, r6\n\t"
10567         "orr       r4, r4, r5\n\t"
10568         "ldr       r2, [%[a], #48]\n\t"
10569         "str       r4, [%[r], #56]\n\t"
10570         "lsr       r5, r2, #1\n\t"
10571         "lsl       r2, r2, %[n]\n\t"
10572         "lsr       r5, r5, r6\n\t"
10573         "orr       r3, r3, r5\n\t"
10574         "ldr       r4, [%[a], #44]\n\t"
10575         "str       r3, [%[r], #52]\n\t"
10576         "lsr       r5, r4, #1\n\t"
10577         "lsl       r4, r4, %[n]\n\t"
10578         "lsr       r5, r5, r6\n\t"
10579         "orr       r2, r2, r5\n\t"
10580         "ldr       r3, [%[a], #40]\n\t"
10581         "str       r2, [%[r], #48]\n\t"
10582         "lsr       r5, r3, #1\n\t"
10583         "lsl       r3, r3, %[n]\n\t"
10584         "lsr       r5, r5, r6\n\t"
10585         "orr       r4, r4, r5\n\t"
10586         "ldr       r2, [%[a], #36]\n\t"
10587         "str       r4, [%[r], #44]\n\t"
10588         "lsr       r5, r2, #1\n\t"
10589         "lsl       r2, r2, %[n]\n\t"
10590         "lsr       r5, r5, r6\n\t"
10591         "orr       r3, r3, r5\n\t"
10592         "ldr       r4, [%[a], #32]\n\t"
10593         "str       r3, [%[r], #40]\n\t"
10594         "lsr       r5, r4, #1\n\t"
10595         "lsl       r4, r4, %[n]\n\t"
10596         "lsr       r5, r5, r6\n\t"
10597         "orr       r2, r2, r5\n\t"
10598         "ldr       r3, [%[a], #28]\n\t"
10599         "str       r2, [%[r], #36]\n\t"
10600         "lsr       r5, r3, #1\n\t"
10601         "lsl       r3, r3, %[n]\n\t"
10602         "lsr       r5, r5, r6\n\t"
10603         "orr       r4, r4, r5\n\t"
10604         "ldr       r2, [%[a], #24]\n\t"
10605         "str       r4, [%[r], #32]\n\t"
10606         "lsr       r5, r2, #1\n\t"
10607         "lsl       r2, r2, %[n]\n\t"
10608         "lsr       r5, r5, r6\n\t"
10609         "orr       r3, r3, r5\n\t"
10610         "ldr       r4, [%[a], #20]\n\t"
10611         "str       r3, [%[r], #28]\n\t"
10612         "lsr       r5, r4, #1\n\t"
10613         "lsl       r4, r4, %[n]\n\t"
10614         "lsr       r5, r5, r6\n\t"
10615         "orr       r2, r2, r5\n\t"
10616         "ldr       r3, [%[a], #16]\n\t"
10617         "str       r2, [%[r], #24]\n\t"
10618         "lsr       r5, r3, #1\n\t"
10619         "lsl       r3, r3, %[n]\n\t"
10620         "lsr       r5, r5, r6\n\t"
10621         "orr       r4, r4, r5\n\t"
10622         "ldr       r2, [%[a], #12]\n\t"
10623         "str       r4, [%[r], #20]\n\t"
10624         "lsr       r5, r2, #1\n\t"
10625         "lsl       r2, r2, %[n]\n\t"
10626         "lsr       r5, r5, r6\n\t"
10627         "orr       r3, r3, r5\n\t"
10628         "ldr       r4, [%[a], #8]\n\t"
10629         "str       r3, [%[r], #16]\n\t"
10630         "lsr       r5, r4, #1\n\t"
10631         "lsl       r4, r4, %[n]\n\t"
10632         "lsr       r5, r5, r6\n\t"
10633         "orr       r2, r2, r5\n\t"
10634         "ldr       r3, [%[a], #4]\n\t"
10635         "str       r2, [%[r], #12]\n\t"
10636         "lsr       r5, r3, #1\n\t"
10637         "lsl       r3, r3, %[n]\n\t"
10638         "lsr       r5, r5, r6\n\t"
10639         "orr       r4, r4, r5\n\t"
10640         "ldr       r2, [%[a], #0]\n\t"
10641         "str       r4, [%[r], #8]\n\t"
10642         "lsr       r5, r2, #1\n\t"
10643         "lsl       r2, r2, %[n]\n\t"
10644         "lsr       r5, r5, r6\n\t"
10645         "orr       r3, r3, r5\n\t"
10646         "sub     %[a], %[a], #64\n\t"
10647         "sub     %[r], %[r], #64\n\t"
10648         "ldr       r4, [%[a], #60]\n\t"
10649         "str       r3, [%[r], #68]\n\t"
10650         "lsr       r5, r4, #1\n\t"
10651         "lsl       r4, r4, %[n]\n\t"
10652         "lsr       r5, r5, r6\n\t"
10653         "orr       r2, r2, r5\n\t"
10654         "ldr       r3, [%[a], #56]\n\t"
10655         "str       r2, [%[r], #64]\n\t"
10656         "lsr       r5, r3, #1\n\t"
10657         "lsl       r3, r3, %[n]\n\t"
10658         "lsr       r5, r5, r6\n\t"
10659         "orr       r4, r4, r5\n\t"
10660         "ldr       r2, [%[a], #52]\n\t"
10661         "str       r4, [%[r], #60]\n\t"
10662         "lsr       r5, r2, #1\n\t"
10663         "lsl       r2, r2, %[n]\n\t"
10664         "lsr       r5, r5, r6\n\t"
10665         "orr       r3, r3, r5\n\t"
10666         "ldr       r4, [%[a], #48]\n\t"
10667         "str       r3, [%[r], #56]\n\t"
10668         "lsr       r5, r4, #1\n\t"
10669         "lsl       r4, r4, %[n]\n\t"
10670         "lsr       r5, r5, r6\n\t"
10671         "orr       r2, r2, r5\n\t"
10672         "ldr       r3, [%[a], #44]\n\t"
10673         "str       r2, [%[r], #52]\n\t"
10674         "lsr       r5, r3, #1\n\t"
10675         "lsl       r3, r3, %[n]\n\t"
10676         "lsr       r5, r5, r6\n\t"
10677         "orr       r4, r4, r5\n\t"
10678         "ldr       r2, [%[a], #40]\n\t"
10679         "str       r4, [%[r], #48]\n\t"
10680         "lsr       r5, r2, #1\n\t"
10681         "lsl       r2, r2, %[n]\n\t"
10682         "lsr       r5, r5, r6\n\t"
10683         "orr       r3, r3, r5\n\t"
10684         "ldr       r4, [%[a], #36]\n\t"
10685         "str       r3, [%[r], #44]\n\t"
10686         "lsr       r5, r4, #1\n\t"
10687         "lsl       r4, r4, %[n]\n\t"
10688         "lsr       r5, r5, r6\n\t"
10689         "orr       r2, r2, r5\n\t"
10690         "ldr       r3, [%[a], #32]\n\t"
10691         "str       r2, [%[r], #40]\n\t"
10692         "lsr       r5, r3, #1\n\t"
10693         "lsl       r3, r3, %[n]\n\t"
10694         "lsr       r5, r5, r6\n\t"
10695         "orr       r4, r4, r5\n\t"
10696         "ldr       r2, [%[a], #28]\n\t"
10697         "str       r4, [%[r], #36]\n\t"
10698         "lsr       r5, r2, #1\n\t"
10699         "lsl       r2, r2, %[n]\n\t"
10700         "lsr       r5, r5, r6\n\t"
10701         "orr       r3, r3, r5\n\t"
10702         "ldr       r4, [%[a], #24]\n\t"
10703         "str       r3, [%[r], #32]\n\t"
10704         "lsr       r5, r4, #1\n\t"
10705         "lsl       r4, r4, %[n]\n\t"
10706         "lsr       r5, r5, r6\n\t"
10707         "orr       r2, r2, r5\n\t"
10708         "ldr       r3, [%[a], #20]\n\t"
10709         "str       r2, [%[r], #28]\n\t"
10710         "lsr       r5, r3, #1\n\t"
10711         "lsl       r3, r3, %[n]\n\t"
10712         "lsr       r5, r5, r6\n\t"
10713         "orr       r4, r4, r5\n\t"
10714         "ldr       r2, [%[a], #16]\n\t"
10715         "str       r4, [%[r], #24]\n\t"
10716         "lsr       r5, r2, #1\n\t"
10717         "lsl       r2, r2, %[n]\n\t"
10718         "lsr       r5, r5, r6\n\t"
10719         "orr       r3, r3, r5\n\t"
10720         "ldr       r4, [%[a], #12]\n\t"
10721         "str       r3, [%[r], #20]\n\t"
10722         "lsr       r5, r4, #1\n\t"
10723         "lsl       r4, r4, %[n]\n\t"
10724         "lsr       r5, r5, r6\n\t"
10725         "orr       r2, r2, r5\n\t"
10726         "ldr       r3, [%[a], #8]\n\t"
10727         "str       r2, [%[r], #16]\n\t"
10728         "lsr       r5, r3, #1\n\t"
10729         "lsl       r3, r3, %[n]\n\t"
10730         "lsr       r5, r5, r6\n\t"
10731         "orr       r4, r4, r5\n\t"
10732         "ldr       r2, [%[a], #4]\n\t"
10733         "str       r4, [%[r], #12]\n\t"
10734         "lsr       r5, r2, #1\n\t"
10735         "lsl       r2, r2, %[n]\n\t"
10736         "lsr       r5, r5, r6\n\t"
10737         "orr       r3, r3, r5\n\t"
10738         "ldr       r4, [%[a], #0]\n\t"
10739         "str       r3, [%[r], #8]\n\t"
10740         "lsr       r5, r4, #1\n\t"
10741         "lsl       r4, r4, %[n]\n\t"
10742         "lsr       r5, r5, r6\n\t"
10743         "orr       r2, r2, r5\n\t"
10744         "sub     %[a], %[a], #64\n\t"
10745         "sub     %[r], %[r], #64\n\t"
10746         "ldr       r3, [%[a], #60]\n\t"
10747         "str       r2, [%[r], #68]\n\t"
10748         "lsr       r5, r3, #1\n\t"
10749         "lsl       r3, r3, %[n]\n\t"
10750         "lsr       r5, r5, r6\n\t"
10751         "orr       r4, r4, r5\n\t"
10752         "ldr       r2, [%[a], #56]\n\t"
10753         "str       r4, [%[r], #64]\n\t"
10754         "lsr       r5, r2, #1\n\t"
10755         "lsl       r2, r2, %[n]\n\t"
10756         "lsr       r5, r5, r6\n\t"
10757         "orr       r3, r3, r5\n\t"
10758         "ldr       r4, [%[a], #52]\n\t"
10759         "str       r3, [%[r], #60]\n\t"
10760         "lsr       r5, r4, #1\n\t"
10761         "lsl       r4, r4, %[n]\n\t"
10762         "lsr       r5, r5, r6\n\t"
10763         "orr       r2, r2, r5\n\t"
10764         "ldr       r3, [%[a], #48]\n\t"
10765         "str       r2, [%[r], #56]\n\t"
10766         "lsr       r5, r3, #1\n\t"
10767         "lsl       r3, r3, %[n]\n\t"
10768         "lsr       r5, r5, r6\n\t"
10769         "orr       r4, r4, r5\n\t"
10770         "ldr       r2, [%[a], #44]\n\t"
10771         "str       r4, [%[r], #52]\n\t"
10772         "lsr       r5, r2, #1\n\t"
10773         "lsl       r2, r2, %[n]\n\t"
10774         "lsr       r5, r5, r6\n\t"
10775         "orr       r3, r3, r5\n\t"
10776         "ldr       r4, [%[a], #40]\n\t"
10777         "str       r3, [%[r], #48]\n\t"
10778         "lsr       r5, r4, #1\n\t"
10779         "lsl       r4, r4, %[n]\n\t"
10780         "lsr       r5, r5, r6\n\t"
10781         "orr       r2, r2, r5\n\t"
10782         "ldr       r3, [%[a], #36]\n\t"
10783         "str       r2, [%[r], #44]\n\t"
10784         "lsr       r5, r3, #1\n\t"
10785         "lsl       r3, r3, %[n]\n\t"
10786         "lsr       r5, r5, r6\n\t"
10787         "orr       r4, r4, r5\n\t"
10788         "ldr       r2, [%[a], #32]\n\t"
10789         "str       r4, [%[r], #40]\n\t"
10790         "lsr       r5, r2, #1\n\t"
10791         "lsl       r2, r2, %[n]\n\t"
10792         "lsr       r5, r5, r6\n\t"
10793         "orr       r3, r3, r5\n\t"
10794         "ldr       r4, [%[a], #28]\n\t"
10795         "str       r3, [%[r], #36]\n\t"
10796         "lsr       r5, r4, #1\n\t"
10797         "lsl       r4, r4, %[n]\n\t"
10798         "lsr       r5, r5, r6\n\t"
10799         "orr       r2, r2, r5\n\t"
10800         "ldr       r3, [%[a], #24]\n\t"
10801         "str       r2, [%[r], #32]\n\t"
10802         "lsr       r5, r3, #1\n\t"
10803         "lsl       r3, r3, %[n]\n\t"
10804         "lsr       r5, r5, r6\n\t"
10805         "orr       r4, r4, r5\n\t"
10806         "ldr       r2, [%[a], #20]\n\t"
10807         "str       r4, [%[r], #28]\n\t"
10808         "lsr       r5, r2, #1\n\t"
10809         "lsl       r2, r2, %[n]\n\t"
10810         "lsr       r5, r5, r6\n\t"
10811         "orr       r3, r3, r5\n\t"
10812         "ldr       r4, [%[a], #16]\n\t"
10813         "str       r3, [%[r], #24]\n\t"
10814         "lsr       r5, r4, #1\n\t"
10815         "lsl       r4, r4, %[n]\n\t"
10816         "lsr       r5, r5, r6\n\t"
10817         "orr       r2, r2, r5\n\t"
10818         "ldr       r3, [%[a], #12]\n\t"
10819         "str       r2, [%[r], #20]\n\t"
10820         "lsr       r5, r3, #1\n\t"
10821         "lsl       r3, r3, %[n]\n\t"
10822         "lsr       r5, r5, r6\n\t"
10823         "orr       r4, r4, r5\n\t"
10824         "ldr       r2, [%[a], #8]\n\t"
10825         "str       r4, [%[r], #16]\n\t"
10826         "lsr       r5, r2, #1\n\t"
10827         "lsl       r2, r2, %[n]\n\t"
10828         "lsr       r5, r5, r6\n\t"
10829         "orr       r3, r3, r5\n\t"
10830         "ldr       r4, [%[a], #4]\n\t"
10831         "str       r3, [%[r], #12]\n\t"
10832         "lsr       r5, r4, #1\n\t"
10833         "lsl       r4, r4, %[n]\n\t"
10834         "lsr       r5, r5, r6\n\t"
10835         "orr       r2, r2, r5\n\t"
10836         "ldr       r3, [%[a], #0]\n\t"
10837         "str       r2, [%[r], #8]\n\t"
10838         "lsr       r5, r3, #1\n\t"
10839         "lsl       r3, r3, %[n]\n\t"
10840         "lsr       r5, r5, r6\n\t"
10841         "orr       r4, r4, r5\n\t"
10842         "sub     %[a], %[a], #64\n\t"
10843         "sub     %[r], %[r], #64\n\t"
10844         "ldr       r2, [%[a], #60]\n\t"
10845         "str       r4, [%[r], #68]\n\t"
10846         "lsr       r5, r2, #1\n\t"
10847         "lsl       r2, r2, %[n]\n\t"
10848         "lsr       r5, r5, r6\n\t"
10849         "orr       r3, r3, r5\n\t"
10850         "ldr       r4, [%[a], #56]\n\t"
10851         "str       r3, [%[r], #64]\n\t"
10852         "lsr       r5, r4, #1\n\t"
10853         "lsl       r4, r4, %[n]\n\t"
10854         "lsr       r5, r5, r6\n\t"
10855         "orr       r2, r2, r5\n\t"
10856         "ldr       r3, [%[a], #52]\n\t"
10857         "str       r2, [%[r], #60]\n\t"
10858         "lsr       r5, r3, #1\n\t"
10859         "lsl       r3, r3, %[n]\n\t"
10860         "lsr       r5, r5, r6\n\t"
10861         "orr       r4, r4, r5\n\t"
10862         "ldr       r2, [%[a], #48]\n\t"
10863         "str       r4, [%[r], #56]\n\t"
10864         "lsr       r5, r2, #1\n\t"
10865         "lsl       r2, r2, %[n]\n\t"
10866         "lsr       r5, r5, r6\n\t"
10867         "orr       r3, r3, r5\n\t"
10868         "ldr       r4, [%[a], #44]\n\t"
10869         "str       r3, [%[r], #52]\n\t"
10870         "lsr       r5, r4, #1\n\t"
10871         "lsl       r4, r4, %[n]\n\t"
10872         "lsr       r5, r5, r6\n\t"
10873         "orr       r2, r2, r5\n\t"
10874         "ldr       r3, [%[a], #40]\n\t"
10875         "str       r2, [%[r], #48]\n\t"
10876         "lsr       r5, r3, #1\n\t"
10877         "lsl       r3, r3, %[n]\n\t"
10878         "lsr       r5, r5, r6\n\t"
10879         "orr       r4, r4, r5\n\t"
10880         "ldr       r2, [%[a], #36]\n\t"
10881         "str       r4, [%[r], #44]\n\t"
10882         "lsr       r5, r2, #1\n\t"
10883         "lsl       r2, r2, %[n]\n\t"
10884         "lsr       r5, r5, r6\n\t"
10885         "orr       r3, r3, r5\n\t"
10886         "ldr       r4, [%[a], #32]\n\t"
10887         "str       r3, [%[r], #40]\n\t"
10888         "lsr       r5, r4, #1\n\t"
10889         "lsl       r4, r4, %[n]\n\t"
10890         "lsr       r5, r5, r6\n\t"
10891         "orr       r2, r2, r5\n\t"
10892         "ldr       r3, [%[a], #28]\n\t"
10893         "str       r2, [%[r], #36]\n\t"
10894         "lsr       r5, r3, #1\n\t"
10895         "lsl       r3, r3, %[n]\n\t"
10896         "lsr       r5, r5, r6\n\t"
10897         "orr       r4, r4, r5\n\t"
10898         "ldr       r2, [%[a], #24]\n\t"
10899         "str       r4, [%[r], #32]\n\t"
10900         "lsr       r5, r2, #1\n\t"
10901         "lsl       r2, r2, %[n]\n\t"
10902         "lsr       r5, r5, r6\n\t"
10903         "orr       r3, r3, r5\n\t"
10904         "ldr       r4, [%[a], #20]\n\t"
10905         "str       r3, [%[r], #28]\n\t"
10906         "lsr       r5, r4, #1\n\t"
10907         "lsl       r4, r4, %[n]\n\t"
10908         "lsr       r5, r5, r6\n\t"
10909         "orr       r2, r2, r5\n\t"
10910         "ldr       r3, [%[a], #16]\n\t"
10911         "str       r2, [%[r], #24]\n\t"
10912         "lsr       r5, r3, #1\n\t"
10913         "lsl       r3, r3, %[n]\n\t"
10914         "lsr       r5, r5, r6\n\t"
10915         "orr       r4, r4, r5\n\t"
10916         "ldr       r2, [%[a], #12]\n\t"
10917         "str       r4, [%[r], #20]\n\t"
10918         "lsr       r5, r2, #1\n\t"
10919         "lsl       r2, r2, %[n]\n\t"
10920         "lsr       r5, r5, r6\n\t"
10921         "orr       r3, r3, r5\n\t"
10922         "ldr       r4, [%[a], #8]\n\t"
10923         "str       r3, [%[r], #16]\n\t"
10924         "lsr       r5, r4, #1\n\t"
10925         "lsl       r4, r4, %[n]\n\t"
10926         "lsr       r5, r5, r6\n\t"
10927         "orr       r2, r2, r5\n\t"
10928         "ldr       r3, [%[a], #4]\n\t"
10929         "str       r2, [%[r], #12]\n\t"
10930         "lsr       r5, r3, #1\n\t"
10931         "lsl       r3, r3, %[n]\n\t"
10932         "lsr       r5, r5, r6\n\t"
10933         "orr       r4, r4, r5\n\t"
10934         "ldr       r2, [%[a], #0]\n\t"
10935         "str       r4, [%[r], #8]\n\t"
10936         "lsr       r5, r2, #1\n\t"
10937         "lsl       r2, r2, %[n]\n\t"
10938         "lsr       r5, r5, r6\n\t"
10939         "orr       r3, r3, r5\n\t"
10940         "sub     %[a], %[a], #64\n\t"
10941         "sub     %[r], %[r], #64\n\t"
10942         "ldr       r4, [%[a], #60]\n\t"
10943         "str       r3, [%[r], #68]\n\t"
10944         "lsr       r5, r4, #1\n\t"
10945         "lsl       r4, r4, %[n]\n\t"
10946         "lsr       r5, r5, r6\n\t"
10947         "orr       r2, r2, r5\n\t"
10948         "ldr       r3, [%[a], #56]\n\t"
10949         "str       r2, [%[r], #64]\n\t"
10950         "lsr       r5, r3, #1\n\t"
10951         "lsl       r3, r3, %[n]\n\t"
10952         "lsr       r5, r5, r6\n\t"
10953         "orr       r4, r4, r5\n\t"
10954         "ldr       r2, [%[a], #52]\n\t"
10955         "str       r4, [%[r], #60]\n\t"
10956         "lsr       r5, r2, #1\n\t"
10957         "lsl       r2, r2, %[n]\n\t"
10958         "lsr       r5, r5, r6\n\t"
10959         "orr       r3, r3, r5\n\t"
10960         "ldr       r4, [%[a], #48]\n\t"
10961         "str       r3, [%[r], #56]\n\t"
10962         "lsr       r5, r4, #1\n\t"
10963         "lsl       r4, r4, %[n]\n\t"
10964         "lsr       r5, r5, r6\n\t"
10965         "orr       r2, r2, r5\n\t"
10966         "ldr       r3, [%[a], #44]\n\t"
10967         "str       r2, [%[r], #52]\n\t"
10968         "lsr       r5, r3, #1\n\t"
10969         "lsl       r3, r3, %[n]\n\t"
10970         "lsr       r5, r5, r6\n\t"
10971         "orr       r4, r4, r5\n\t"
10972         "ldr       r2, [%[a], #40]\n\t"
10973         "str       r4, [%[r], #48]\n\t"
10974         "lsr       r5, r2, #1\n\t"
10975         "lsl       r2, r2, %[n]\n\t"
10976         "lsr       r5, r5, r6\n\t"
10977         "orr       r3, r3, r5\n\t"
10978         "ldr       r4, [%[a], #36]\n\t"
10979         "str       r3, [%[r], #44]\n\t"
10980         "lsr       r5, r4, #1\n\t"
10981         "lsl       r4, r4, %[n]\n\t"
10982         "lsr       r5, r5, r6\n\t"
10983         "orr       r2, r2, r5\n\t"
10984         "ldr       r3, [%[a], #32]\n\t"
10985         "str       r2, [%[r], #40]\n\t"
10986         "lsr       r5, r3, #1\n\t"
10987         "lsl       r3, r3, %[n]\n\t"
10988         "lsr       r5, r5, r6\n\t"
10989         "orr       r4, r4, r5\n\t"
10990         "ldr       r2, [%[a], #28]\n\t"
10991         "str       r4, [%[r], #36]\n\t"
10992         "lsr       r5, r2, #1\n\t"
10993         "lsl       r2, r2, %[n]\n\t"
10994         "lsr       r5, r5, r6\n\t"
10995         "orr       r3, r3, r5\n\t"
10996         "ldr       r4, [%[a], #24]\n\t"
10997         "str       r3, [%[r], #32]\n\t"
10998         "lsr       r5, r4, #1\n\t"
10999         "lsl       r4, r4, %[n]\n\t"
11000         "lsr       r5, r5, r6\n\t"
11001         "orr       r2, r2, r5\n\t"
11002         "ldr       r3, [%[a], #20]\n\t"
11003         "str       r2, [%[r], #28]\n\t"
11004         "lsr       r5, r3, #1\n\t"
11005         "lsl       r3, r3, %[n]\n\t"
11006         "lsr       r5, r5, r6\n\t"
11007         "orr       r4, r4, r5\n\t"
11008         "ldr       r2, [%[a], #16]\n\t"
11009         "str       r4, [%[r], #24]\n\t"
11010         "lsr       r5, r2, #1\n\t"
11011         "lsl       r2, r2, %[n]\n\t"
11012         "lsr       r5, r5, r6\n\t"
11013         "orr       r3, r3, r5\n\t"
11014         "ldr       r4, [%[a], #12]\n\t"
11015         "str       r3, [%[r], #20]\n\t"
11016         "lsr       r5, r4, #1\n\t"
11017         "lsl       r4, r4, %[n]\n\t"
11018         "lsr       r5, r5, r6\n\t"
11019         "orr       r2, r2, r5\n\t"
11020         "ldr       r3, [%[a], #8]\n\t"
11021         "str       r2, [%[r], #16]\n\t"
11022         "lsr       r5, r3, #1\n\t"
11023         "lsl       r3, r3, %[n]\n\t"
11024         "lsr       r5, r5, r6\n\t"
11025         "orr       r4, r4, r5\n\t"
11026         "ldr       r2, [%[a], #4]\n\t"
11027         "str       r4, [%[r], #12]\n\t"
11028         "lsr       r5, r2, #1\n\t"
11029         "lsl       r2, r2, %[n]\n\t"
11030         "lsr       r5, r5, r6\n\t"
11031         "orr       r3, r3, r5\n\t"
11032         "ldr       r4, [%[a], #0]\n\t"
11033         "str       r3, [%[r], #8]\n\t"
11034         "lsr       r5, r4, #1\n\t"
11035         "lsl       r4, r4, %[n]\n\t"
11036         "lsr       r5, r5, r6\n\t"
11037         "orr       r2, r2, r5\n\t"
11038         "str r4, [%[r]]\n\t"
11039         "str r2, [%[r], #4]\n\t"
11040         :
11041         : [r] "r" (r), [a] "r" (a), [n] "r" (n)
11042         : "memory", "r2", "r3", "r4", "r5", "r6"
11043     );
11044 }
11045 
11046 /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
11047  *
11048  * r     A single precision number that is the result of the operation.
11049  * e     A single precision number that is the exponent.
11050  * bits  The number of bits in the exponent.
11051  * m     A single precision number that is the modulus.
11052  * returns  0 on success.
11053  * returns  MEMORY_E on dynamic memory allocation failure.
11054  * returns  MP_VAL when base is even.
11055  */
sp_3072_mod_exp_2_96(sp_digit * r,const sp_digit * e,int bits,const sp_digit * m)11056 static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits,
11057         const sp_digit* m)
11058 {
11059 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
11060     sp_digit* td = NULL;
11061 #else
11062     sp_digit td[289];
11063 #endif
11064     sp_digit* norm = NULL;
11065     sp_digit* tmp = NULL;
11066     sp_digit mp = 1;
11067     sp_digit n;
11068     sp_digit o;
11069     sp_digit mask;
11070     int i;
11071     int c;
11072     byte y;
11073     int err = MP_OKAY;
11074 
11075     if ((m[0] & 1) == 0) {
11076         err = MP_VAL;
11077     }
11078 
11079 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
11080     if (err == MP_OKAY) {
11081         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 289, NULL,
11082                                 DYNAMIC_TYPE_TMP_BUFFER);
11083         if (td == NULL)
11084             err = MEMORY_E;
11085     }
11086 #endif
11087 
11088     if (err == MP_OKAY) {
11089         norm = td;
11090         tmp = td + 192;
11091 
11092         sp_3072_mont_setup(m, &mp);
11093         sp_3072_mont_norm_96(norm, m);
11094 
11095         i = (bits - 1) / 32;
11096         n = e[i--];
11097         c = bits & 31;
11098         if (c == 0) {
11099             c = 32;
11100         }
11101         c -= bits % 5;
11102         if (c == 32) {
11103             c = 27;
11104         }
11105         if (c < 0) {
11106             /* Number of bits in top word is less than number needed. */
11107             c = -c;
11108             y = (byte)(n << c);
11109             n = e[i--];
11110             y |= (byte)(n >> (64 - c));
11111             n <<= c;
11112             c = 64 - c;
11113         }
11114         else {
11115             y = (byte)(n >> c);
11116             n <<= 32 - c;
11117         }
11118         sp_3072_lshift_96(r, norm, y);
11119         for (; i>=0 || c>=5; ) {
11120             if (c == 0) {
11121                 n = e[i--];
11122                 y = (byte)(n >> 27);
11123                 n <<= 5;
11124                 c = 27;
11125             }
11126             else if (c < 5) {
11127                 y = (byte)(n >> 27);
11128                 n = e[i--];
11129                 c = 5 - c;
11130                 y |= (byte)(n >> (32 - c));
11131                 n <<= c;
11132                 c = 32 - c;
11133             }
11134             else {
11135                 y = (byte)((n >> 27) & 0x1f);
11136                 n <<= 5;
11137                 c -= 5;
11138             }
11139 
11140             sp_3072_mont_sqr_96(r, r, m, mp);
11141             sp_3072_mont_sqr_96(r, r, m, mp);
11142             sp_3072_mont_sqr_96(r, r, m, mp);
11143             sp_3072_mont_sqr_96(r, r, m, mp);
11144             sp_3072_mont_sqr_96(r, r, m, mp);
11145 
11146             sp_3072_lshift_96(r, r, y);
11147             sp_3072_mul_d_96(tmp, norm, r[96]);
11148             r[96] = 0;
11149             o = sp_3072_add_96(r, r, tmp);
11150             sp_3072_cond_sub_96(r, r, m, (sp_digit)0 - o);
11151         }
11152 
11153         XMEMSET(&r[96], 0, sizeof(sp_digit) * 96U);
11154         sp_3072_mont_reduce_96(r, m, mp);
11155 
11156         mask = 0 - (sp_3072_cmp_96(r, m) >= 0);
11157         sp_3072_cond_sub_96(r, r, m, mask);
11158     }
11159 
11160 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
11161     if (td != NULL)
11162         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
11163 #endif
11164 
11165     return err;
11166 }
11167 #endif /* HAVE_FFDHE_3072 */
11168 
11169 /* Perform the modular exponentiation for Diffie-Hellman.
11170  *
11171  * base     Base.
11172  * exp      Array of bytes that is the exponent.
11173  * expLen   Length of data, in bytes, in exponent.
11174  * mod      Modulus.
11175  * out      Buffer to hold big-endian bytes of exponentiation result.
11176  *          Must be at least 384 bytes long.
11177  * outLen   Length, in bytes, of exponentiation result.
11178  * returns 0 on success, MP_READ_E if there are too many bytes in an array
11179  * and MEMORY_E if memory allocation fails.
11180  */
sp_DhExp_3072(const mp_int * base,const byte * exp,word32 expLen,const mp_int * mod,byte * out,word32 * outLen)11181 int sp_DhExp_3072(const mp_int* base, const byte* exp, word32 expLen,
11182     const mp_int* mod, byte* out, word32* outLen)
11183 {
11184     int err = MP_OKAY;
11185     sp_digit b[192];
11186     sp_digit e[96];
11187     sp_digit m[96];
11188     sp_digit* r = b;
11189     word32 i;
11190 
11191     if (mp_count_bits(base) > 3072) {
11192         err = MP_READ_E;
11193     }
11194     else if (expLen > 384) {
11195         err = MP_READ_E;
11196     }
11197     else if (mp_count_bits(mod) != 3072) {
11198         err = MP_READ_E;
11199     }
11200     else if (mp_iseven(mod)) {
11201         err = MP_VAL;
11202     }
11203 
11204     if (err == MP_OKAY) {
11205         sp_3072_from_mp(b, 96, base);
11206         sp_3072_from_bin(e, 96, exp, expLen);
11207         sp_3072_from_mp(m, 96, mod);
11208 
11209     #ifdef HAVE_FFDHE_3072
11210         if (base->used == 1 && base->dp[0] == 2 && m[95] == (sp_digit)-1)
11211             err = sp_3072_mod_exp_2_96(r, e, expLen * 8, m);
11212         else
11213     #endif
11214             err = sp_3072_mod_exp_96(r, b, e, expLen * 8, m, 0);
11215 
11216     }
11217 
11218     if (err == MP_OKAY) {
11219         sp_3072_to_bin_96(r, out);
11220         *outLen = 384;
11221         for (i=0; i<384 && out[i] == 0; i++) {
11222             /* Search for first non-zero. */
11223         }
11224         *outLen -= i;
11225         XMEMMOVE(out, out + i, *outLen);
11226 
11227     }
11228 
11229     XMEMSET(e, 0, sizeof(e));
11230 
11231     return err;
11232 }
11233 #endif /* WOLFSSL_HAVE_SP_DH */
11234 
11235 /* Perform the modular exponentiation for Diffie-Hellman.
11236  *
11237  * base  Base. MP integer.
11238  * exp   Exponent. MP integer.
11239  * mod   Modulus. MP integer.
11240  * res   Result. MP integer.
11241  * returns 0 on success, MP_READ_E if there are too many bytes in an array
11242  * and MEMORY_E if memory allocation fails.
11243  */
sp_ModExp_1536(const mp_int * base,const mp_int * exp,const mp_int * mod,mp_int * res)11244 int sp_ModExp_1536(const mp_int* base, const mp_int* exp, const mp_int* mod,
11245     mp_int* res)
11246 {
11247     int err = MP_OKAY;
11248     sp_digit b[96];
11249     sp_digit e[48];
11250     sp_digit m[48];
11251     sp_digit* r = b;
11252     int expBits = mp_count_bits(exp);
11253 
11254     if (mp_count_bits(base) > 1536) {
11255         err = MP_READ_E;
11256     }
11257     else if (expBits > 1536) {
11258         err = MP_READ_E;
11259     }
11260     else if (mp_count_bits(mod) != 1536) {
11261         err = MP_READ_E;
11262     }
11263     else if (mp_iseven(mod)) {
11264         err = MP_VAL;
11265     }
11266 
11267     if (err == MP_OKAY) {
11268         sp_3072_from_mp(b, 48, base);
11269         sp_3072_from_mp(e, 48, exp);
11270         sp_3072_from_mp(m, 48, mod);
11271 
11272         err = sp_3072_mod_exp_48(r, b, e, expBits, m, 0);
11273     }
11274 
11275     if (err == MP_OKAY) {
11276         XMEMSET(r + 48, 0, sizeof(*r) * 48U);
11277         err = sp_3072_to_mp(r, res);
11278         res->used = mod->used;
11279         mp_clamp(res);
11280     }
11281 
11282     XMEMSET(e, 0, sizeof(e));
11283 
11284     return err;
11285 }
11286 
11287 #endif /* WOLFSSL_HAVE_SP_DH | (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) */
11288 
11289 #endif /* !WOLFSSL_SP_NO_3072 */
11290 
11291 #ifdef WOLFSSL_SP_4096
11292 /* Read big endian unsigned byte array into r.
11293  *
11294  * r  A single precision integer.
11295  * size  Maximum number of bytes to convert
11296  * a  Byte array.
11297  * n  Number of bytes in array to read.
11298  */
sp_4096_from_bin(sp_digit * r,int size,const byte * a,int n)11299 static void sp_4096_from_bin(sp_digit* r, int size, const byte* a, int n)
11300 {
11301     int i;
11302     int j = 0;
11303     word32 s = 0;
11304 
11305     r[0] = 0;
11306     for (i = n-1; i >= 0; i--) {
11307         r[j] |= (((sp_digit)a[i]) << s);
11308         if (s >= 24U) {
11309             r[j] &= 0xffffffff;
11310             s = 32U - s;
11311             if (j + 1 >= size) {
11312                 break;
11313             }
11314             r[++j] = (sp_digit)a[i] >> s;
11315             s = 8U - s;
11316         }
11317         else {
11318             s += 8U;
11319         }
11320     }
11321 
11322     for (j++; j < size; j++) {
11323         r[j] = 0;
11324     }
11325 }
11326 
11327 /* Convert an mp_int to an array of sp_digit.
11328  *
11329  * r  A single precision integer.
11330  * size  Maximum number of bytes to convert
11331  * a  A multi-precision integer.
11332  */
sp_4096_from_mp(sp_digit * r,int size,const mp_int * a)11333 static void sp_4096_from_mp(sp_digit* r, int size, const mp_int* a)
11334 {
11335 #if DIGIT_BIT == 32
11336     int j;
11337 
11338     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
11339 
11340     for (j = a->used; j < size; j++) {
11341         r[j] = 0;
11342     }
11343 #elif DIGIT_BIT > 32
11344     int i;
11345     int j = 0;
11346     word32 s = 0;
11347 
11348     r[0] = 0;
11349     for (i = 0; i < a->used && j < size; i++) {
11350         r[j] |= ((sp_digit)a->dp[i] << s);
11351         r[j] &= 0xffffffff;
11352         s = 32U - s;
11353         if (j + 1 >= size) {
11354             break;
11355         }
11356         /* lint allow cast of mismatch word32 and mp_digit */
11357         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
11358         while ((s + 32U) <= (word32)DIGIT_BIT) {
11359             s += 32U;
11360             r[j] &= 0xffffffff;
11361             if (j + 1 >= size) {
11362                 break;
11363             }
11364             if (s < (word32)DIGIT_BIT) {
11365                 /* lint allow cast of mismatch word32 and mp_digit */
11366                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
11367             }
11368             else {
11369                 r[++j] = (sp_digit)0;
11370             }
11371         }
11372         s = (word32)DIGIT_BIT - s;
11373     }
11374 
11375     for (j++; j < size; j++) {
11376         r[j] = 0;
11377     }
11378 #else
11379     int i;
11380     int j = 0;
11381     int s = 0;
11382 
11383     r[0] = 0;
11384     for (i = 0; i < a->used && j < size; i++) {
11385         r[j] |= ((sp_digit)a->dp[i]) << s;
11386         if (s + DIGIT_BIT >= 32) {
11387             r[j] &= 0xffffffff;
11388             if (j + 1 >= size) {
11389                 break;
11390             }
11391             s = 32 - s;
11392             if (s == DIGIT_BIT) {
11393                 r[++j] = 0;
11394                 s = 0;
11395             }
11396             else {
11397                 r[++j] = a->dp[i] >> s;
11398                 s = DIGIT_BIT - s;
11399             }
11400         }
11401         else {
11402             s += DIGIT_BIT;
11403         }
11404     }
11405 
11406     for (j++; j < size; j++) {
11407         r[j] = 0;
11408     }
11409 #endif
11410 }
11411 
11412 /* Write r as big endian to byte array.
11413  * Fixed length number of bytes written: 512
11414  *
11415  * r  A single precision integer.
11416  * a  Byte array.
11417  */
sp_4096_to_bin_128(sp_digit * r,byte * a)11418 static void sp_4096_to_bin_128(sp_digit* r, byte* a)
11419 {
11420     int i;
11421     int j;
11422     int s = 0;
11423     int b;
11424 
11425     j = 4096 / 8 - 1;
11426     a[j] = 0;
11427     for (i=0; i<128 && j>=0; i++) {
11428         b = 0;
11429         /* lint allow cast of mismatch sp_digit and int */
11430         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
11431         b += 8 - s;
11432         if (j < 0) {
11433             break;
11434         }
11435         while (b < 32) {
11436             a[j--] = (byte)(r[i] >> b);
11437             b += 8;
11438             if (j < 0) {
11439                 break;
11440             }
11441         }
11442         s = 8 - (b - 32);
11443         if (j >= 0) {
11444             a[j] = 0;
11445         }
11446         if (s != 0) {
11447             j++;
11448         }
11449     }
11450 }
11451 
11452 #if (defined(WOLFSSL_HAVE_SP_RSA) && (!defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(WOLFSSL_SP_SMALL))) || defined(WOLFSSL_HAVE_SP_DH)
11453 /* Normalize the values in each word to 32.
11454  *
11455  * a  Array of sp_digit to normalize.
11456  */
11457 #define sp_4096_norm_128(a)
11458 
11459 #endif /* (WOLFSSL_HAVE_SP_RSA && (!WOLFSSL_RSA_PUBLIC_ONLY || !WOLFSSL_SP_SMALL)) || WOLFSSL_HAVE_SP_DH */
11460 /* Normalize the values in each word to 32.
11461  *
11462  * a  Array of sp_digit to normalize.
11463  */
11464 #define sp_4096_norm_128(a)
11465 
11466 #ifndef WOLFSSL_SP_SMALL
11467 /* Sub b from a into r. (r = a - b)
11468  *
11469  * r  A single precision integer.
11470  * a  A single precision integer.
11471  * b  A single precision integer.
11472  */
sp_4096_sub_in_place_128(sp_digit * a,const sp_digit * b)11473 SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
11474         const sp_digit* b)
11475 {
11476     sp_digit c = 0;
11477 
11478     __asm__ __volatile__ (
11479         "ldm	%[a], {r3, r4}\n\t"
11480         "ldm	%[b]!, {r5, r6}\n\t"
11481         "subs	r3, r3, r5\n\t"
11482         "sbcs	r4, r4, r6\n\t"
11483         "stm	%[a]!, {r3, r4}\n\t"
11484         "ldm	%[a], {r3, r4}\n\t"
11485         "ldm	%[b]!, {r5, r6}\n\t"
11486         "sbcs	r3, r3, r5\n\t"
11487         "sbcs	r4, r4, r6\n\t"
11488         "stm	%[a]!, {r3, r4}\n\t"
11489         "ldm	%[a], {r3, r4}\n\t"
11490         "ldm	%[b]!, {r5, r6}\n\t"
11491         "sbcs	r3, r3, r5\n\t"
11492         "sbcs	r4, r4, r6\n\t"
11493         "stm	%[a]!, {r3, r4}\n\t"
11494         "ldm	%[a], {r3, r4}\n\t"
11495         "ldm	%[b]!, {r5, r6}\n\t"
11496         "sbcs	r3, r3, r5\n\t"
11497         "sbcs	r4, r4, r6\n\t"
11498         "stm	%[a]!, {r3, r4}\n\t"
11499         "ldm	%[a], {r3, r4}\n\t"
11500         "ldm	%[b]!, {r5, r6}\n\t"
11501         "sbcs	r3, r3, r5\n\t"
11502         "sbcs	r4, r4, r6\n\t"
11503         "stm	%[a]!, {r3, r4}\n\t"
11504         "ldm	%[a], {r3, r4}\n\t"
11505         "ldm	%[b]!, {r5, r6}\n\t"
11506         "sbcs	r3, r3, r5\n\t"
11507         "sbcs	r4, r4, r6\n\t"
11508         "stm	%[a]!, {r3, r4}\n\t"
11509         "ldm	%[a], {r3, r4}\n\t"
11510         "ldm	%[b]!, {r5, r6}\n\t"
11511         "sbcs	r3, r3, r5\n\t"
11512         "sbcs	r4, r4, r6\n\t"
11513         "stm	%[a]!, {r3, r4}\n\t"
11514         "ldm	%[a], {r3, r4}\n\t"
11515         "ldm	%[b]!, {r5, r6}\n\t"
11516         "sbcs	r3, r3, r5\n\t"
11517         "sbcs	r4, r4, r6\n\t"
11518         "stm	%[a]!, {r3, r4}\n\t"
11519         "ldm	%[a], {r3, r4}\n\t"
11520         "ldm	%[b]!, {r5, r6}\n\t"
11521         "sbcs	r3, r3, r5\n\t"
11522         "sbcs	r4, r4, r6\n\t"
11523         "stm	%[a]!, {r3, r4}\n\t"
11524         "ldm	%[a], {r3, r4}\n\t"
11525         "ldm	%[b]!, {r5, r6}\n\t"
11526         "sbcs	r3, r3, r5\n\t"
11527         "sbcs	r4, r4, r6\n\t"
11528         "stm	%[a]!, {r3, r4}\n\t"
11529         "ldm	%[a], {r3, r4}\n\t"
11530         "ldm	%[b]!, {r5, r6}\n\t"
11531         "sbcs	r3, r3, r5\n\t"
11532         "sbcs	r4, r4, r6\n\t"
11533         "stm	%[a]!, {r3, r4}\n\t"
11534         "ldm	%[a], {r3, r4}\n\t"
11535         "ldm	%[b]!, {r5, r6}\n\t"
11536         "sbcs	r3, r3, r5\n\t"
11537         "sbcs	r4, r4, r6\n\t"
11538         "stm	%[a]!, {r3, r4}\n\t"
11539         "ldm	%[a], {r3, r4}\n\t"
11540         "ldm	%[b]!, {r5, r6}\n\t"
11541         "sbcs	r3, r3, r5\n\t"
11542         "sbcs	r4, r4, r6\n\t"
11543         "stm	%[a]!, {r3, r4}\n\t"
11544         "ldm	%[a], {r3, r4}\n\t"
11545         "ldm	%[b]!, {r5, r6}\n\t"
11546         "sbcs	r3, r3, r5\n\t"
11547         "sbcs	r4, r4, r6\n\t"
11548         "stm	%[a]!, {r3, r4}\n\t"
11549         "ldm	%[a], {r3, r4}\n\t"
11550         "ldm	%[b]!, {r5, r6}\n\t"
11551         "sbcs	r3, r3, r5\n\t"
11552         "sbcs	r4, r4, r6\n\t"
11553         "stm	%[a]!, {r3, r4}\n\t"
11554         "ldm	%[a], {r3, r4}\n\t"
11555         "ldm	%[b]!, {r5, r6}\n\t"
11556         "sbcs	r3, r3, r5\n\t"
11557         "sbcs	r4, r4, r6\n\t"
11558         "stm	%[a]!, {r3, r4}\n\t"
11559         "ldm	%[a], {r3, r4}\n\t"
11560         "ldm	%[b]!, {r5, r6}\n\t"
11561         "sbcs	r3, r3, r5\n\t"
11562         "sbcs	r4, r4, r6\n\t"
11563         "stm	%[a]!, {r3, r4}\n\t"
11564         "ldm	%[a], {r3, r4}\n\t"
11565         "ldm	%[b]!, {r5, r6}\n\t"
11566         "sbcs	r3, r3, r5\n\t"
11567         "sbcs	r4, r4, r6\n\t"
11568         "stm	%[a]!, {r3, r4}\n\t"
11569         "ldm	%[a], {r3, r4}\n\t"
11570         "ldm	%[b]!, {r5, r6}\n\t"
11571         "sbcs	r3, r3, r5\n\t"
11572         "sbcs	r4, r4, r6\n\t"
11573         "stm	%[a]!, {r3, r4}\n\t"
11574         "ldm	%[a], {r3, r4}\n\t"
11575         "ldm	%[b]!, {r5, r6}\n\t"
11576         "sbcs	r3, r3, r5\n\t"
11577         "sbcs	r4, r4, r6\n\t"
11578         "stm	%[a]!, {r3, r4}\n\t"
11579         "ldm	%[a], {r3, r4}\n\t"
11580         "ldm	%[b]!, {r5, r6}\n\t"
11581         "sbcs	r3, r3, r5\n\t"
11582         "sbcs	r4, r4, r6\n\t"
11583         "stm	%[a]!, {r3, r4}\n\t"
11584         "ldm	%[a], {r3, r4}\n\t"
11585         "ldm	%[b]!, {r5, r6}\n\t"
11586         "sbcs	r3, r3, r5\n\t"
11587         "sbcs	r4, r4, r6\n\t"
11588         "stm	%[a]!, {r3, r4}\n\t"
11589         "ldm	%[a], {r3, r4}\n\t"
11590         "ldm	%[b]!, {r5, r6}\n\t"
11591         "sbcs	r3, r3, r5\n\t"
11592         "sbcs	r4, r4, r6\n\t"
11593         "stm	%[a]!, {r3, r4}\n\t"
11594         "ldm	%[a], {r3, r4}\n\t"
11595         "ldm	%[b]!, {r5, r6}\n\t"
11596         "sbcs	r3, r3, r5\n\t"
11597         "sbcs	r4, r4, r6\n\t"
11598         "stm	%[a]!, {r3, r4}\n\t"
11599         "ldm	%[a], {r3, r4}\n\t"
11600         "ldm	%[b]!, {r5, r6}\n\t"
11601         "sbcs	r3, r3, r5\n\t"
11602         "sbcs	r4, r4, r6\n\t"
11603         "stm	%[a]!, {r3, r4}\n\t"
11604         "ldm	%[a], {r3, r4}\n\t"
11605         "ldm	%[b]!, {r5, r6}\n\t"
11606         "sbcs	r3, r3, r5\n\t"
11607         "sbcs	r4, r4, r6\n\t"
11608         "stm	%[a]!, {r3, r4}\n\t"
11609         "ldm	%[a], {r3, r4}\n\t"
11610         "ldm	%[b]!, {r5, r6}\n\t"
11611         "sbcs	r3, r3, r5\n\t"
11612         "sbcs	r4, r4, r6\n\t"
11613         "stm	%[a]!, {r3, r4}\n\t"
11614         "ldm	%[a], {r3, r4}\n\t"
11615         "ldm	%[b]!, {r5, r6}\n\t"
11616         "sbcs	r3, r3, r5\n\t"
11617         "sbcs	r4, r4, r6\n\t"
11618         "stm	%[a]!, {r3, r4}\n\t"
11619         "ldm	%[a], {r3, r4}\n\t"
11620         "ldm	%[b]!, {r5, r6}\n\t"
11621         "sbcs	r3, r3, r5\n\t"
11622         "sbcs	r4, r4, r6\n\t"
11623         "stm	%[a]!, {r3, r4}\n\t"
11624         "ldm	%[a], {r3, r4}\n\t"
11625         "ldm	%[b]!, {r5, r6}\n\t"
11626         "sbcs	r3, r3, r5\n\t"
11627         "sbcs	r4, r4, r6\n\t"
11628         "stm	%[a]!, {r3, r4}\n\t"
11629         "ldm	%[a], {r3, r4}\n\t"
11630         "ldm	%[b]!, {r5, r6}\n\t"
11631         "sbcs	r3, r3, r5\n\t"
11632         "sbcs	r4, r4, r6\n\t"
11633         "stm	%[a]!, {r3, r4}\n\t"
11634         "ldm	%[a], {r3, r4}\n\t"
11635         "ldm	%[b]!, {r5, r6}\n\t"
11636         "sbcs	r3, r3, r5\n\t"
11637         "sbcs	r4, r4, r6\n\t"
11638         "stm	%[a]!, {r3, r4}\n\t"
11639         "ldm	%[a], {r3, r4}\n\t"
11640         "ldm	%[b]!, {r5, r6}\n\t"
11641         "sbcs	r3, r3, r5\n\t"
11642         "sbcs	r4, r4, r6\n\t"
11643         "stm	%[a]!, {r3, r4}\n\t"
11644         "ldm	%[a], {r3, r4}\n\t"
11645         "ldm	%[b]!, {r5, r6}\n\t"
11646         "sbcs	r3, r3, r5\n\t"
11647         "sbcs	r4, r4, r6\n\t"
11648         "stm	%[a]!, {r3, r4}\n\t"
11649         "ldm	%[a], {r3, r4}\n\t"
11650         "ldm	%[b]!, {r5, r6}\n\t"
11651         "sbcs	r3, r3, r5\n\t"
11652         "sbcs	r4, r4, r6\n\t"
11653         "stm	%[a]!, {r3, r4}\n\t"
11654         "ldm	%[a], {r3, r4}\n\t"
11655         "ldm	%[b]!, {r5, r6}\n\t"
11656         "sbcs	r3, r3, r5\n\t"
11657         "sbcs	r4, r4, r6\n\t"
11658         "stm	%[a]!, {r3, r4}\n\t"
11659         "ldm	%[a], {r3, r4}\n\t"
11660         "ldm	%[b]!, {r5, r6}\n\t"
11661         "sbcs	r3, r3, r5\n\t"
11662         "sbcs	r4, r4, r6\n\t"
11663         "stm	%[a]!, {r3, r4}\n\t"
11664         "ldm	%[a], {r3, r4}\n\t"
11665         "ldm	%[b]!, {r5, r6}\n\t"
11666         "sbcs	r3, r3, r5\n\t"
11667         "sbcs	r4, r4, r6\n\t"
11668         "stm	%[a]!, {r3, r4}\n\t"
11669         "ldm	%[a], {r3, r4}\n\t"
11670         "ldm	%[b]!, {r5, r6}\n\t"
11671         "sbcs	r3, r3, r5\n\t"
11672         "sbcs	r4, r4, r6\n\t"
11673         "stm	%[a]!, {r3, r4}\n\t"
11674         "ldm	%[a], {r3, r4}\n\t"
11675         "ldm	%[b]!, {r5, r6}\n\t"
11676         "sbcs	r3, r3, r5\n\t"
11677         "sbcs	r4, r4, r6\n\t"
11678         "stm	%[a]!, {r3, r4}\n\t"
11679         "ldm	%[a], {r3, r4}\n\t"
11680         "ldm	%[b]!, {r5, r6}\n\t"
11681         "sbcs	r3, r3, r5\n\t"
11682         "sbcs	r4, r4, r6\n\t"
11683         "stm	%[a]!, {r3, r4}\n\t"
11684         "ldm	%[a], {r3, r4}\n\t"
11685         "ldm	%[b]!, {r5, r6}\n\t"
11686         "sbcs	r3, r3, r5\n\t"
11687         "sbcs	r4, r4, r6\n\t"
11688         "stm	%[a]!, {r3, r4}\n\t"
11689         "ldm	%[a], {r3, r4}\n\t"
11690         "ldm	%[b]!, {r5, r6}\n\t"
11691         "sbcs	r3, r3, r5\n\t"
11692         "sbcs	r4, r4, r6\n\t"
11693         "stm	%[a]!, {r3, r4}\n\t"
11694         "ldm	%[a], {r3, r4}\n\t"
11695         "ldm	%[b]!, {r5, r6}\n\t"
11696         "sbcs	r3, r3, r5\n\t"
11697         "sbcs	r4, r4, r6\n\t"
11698         "stm	%[a]!, {r3, r4}\n\t"
11699         "ldm	%[a], {r3, r4}\n\t"
11700         "ldm	%[b]!, {r5, r6}\n\t"
11701         "sbcs	r3, r3, r5\n\t"
11702         "sbcs	r4, r4, r6\n\t"
11703         "stm	%[a]!, {r3, r4}\n\t"
11704         "ldm	%[a], {r3, r4}\n\t"
11705         "ldm	%[b]!, {r5, r6}\n\t"
11706         "sbcs	r3, r3, r5\n\t"
11707         "sbcs	r4, r4, r6\n\t"
11708         "stm	%[a]!, {r3, r4}\n\t"
11709         "ldm	%[a], {r3, r4}\n\t"
11710         "ldm	%[b]!, {r5, r6}\n\t"
11711         "sbcs	r3, r3, r5\n\t"
11712         "sbcs	r4, r4, r6\n\t"
11713         "stm	%[a]!, {r3, r4}\n\t"
11714         "ldm	%[a], {r3, r4}\n\t"
11715         "ldm	%[b]!, {r5, r6}\n\t"
11716         "sbcs	r3, r3, r5\n\t"
11717         "sbcs	r4, r4, r6\n\t"
11718         "stm	%[a]!, {r3, r4}\n\t"
11719         "ldm	%[a], {r3, r4}\n\t"
11720         "ldm	%[b]!, {r5, r6}\n\t"
11721         "sbcs	r3, r3, r5\n\t"
11722         "sbcs	r4, r4, r6\n\t"
11723         "stm	%[a]!, {r3, r4}\n\t"
11724         "ldm	%[a], {r3, r4}\n\t"
11725         "ldm	%[b]!, {r5, r6}\n\t"
11726         "sbcs	r3, r3, r5\n\t"
11727         "sbcs	r4, r4, r6\n\t"
11728         "stm	%[a]!, {r3, r4}\n\t"
11729         "ldm	%[a], {r3, r4}\n\t"
11730         "ldm	%[b]!, {r5, r6}\n\t"
11731         "sbcs	r3, r3, r5\n\t"
11732         "sbcs	r4, r4, r6\n\t"
11733         "stm	%[a]!, {r3, r4}\n\t"
11734         "ldm	%[a], {r3, r4}\n\t"
11735         "ldm	%[b]!, {r5, r6}\n\t"
11736         "sbcs	r3, r3, r5\n\t"
11737         "sbcs	r4, r4, r6\n\t"
11738         "stm	%[a]!, {r3, r4}\n\t"
11739         "ldm	%[a], {r3, r4}\n\t"
11740         "ldm	%[b]!, {r5, r6}\n\t"
11741         "sbcs	r3, r3, r5\n\t"
11742         "sbcs	r4, r4, r6\n\t"
11743         "stm	%[a]!, {r3, r4}\n\t"
11744         "ldm	%[a], {r3, r4}\n\t"
11745         "ldm	%[b]!, {r5, r6}\n\t"
11746         "sbcs	r3, r3, r5\n\t"
11747         "sbcs	r4, r4, r6\n\t"
11748         "stm	%[a]!, {r3, r4}\n\t"
11749         "ldm	%[a], {r3, r4}\n\t"
11750         "ldm	%[b]!, {r5, r6}\n\t"
11751         "sbcs	r3, r3, r5\n\t"
11752         "sbcs	r4, r4, r6\n\t"
11753         "stm	%[a]!, {r3, r4}\n\t"
11754         "ldm	%[a], {r3, r4}\n\t"
11755         "ldm	%[b]!, {r5, r6}\n\t"
11756         "sbcs	r3, r3, r5\n\t"
11757         "sbcs	r4, r4, r6\n\t"
11758         "stm	%[a]!, {r3, r4}\n\t"
11759         "ldm	%[a], {r3, r4}\n\t"
11760         "ldm	%[b]!, {r5, r6}\n\t"
11761         "sbcs	r3, r3, r5\n\t"
11762         "sbcs	r4, r4, r6\n\t"
11763         "stm	%[a]!, {r3, r4}\n\t"
11764         "ldm	%[a], {r3, r4}\n\t"
11765         "ldm	%[b]!, {r5, r6}\n\t"
11766         "sbcs	r3, r3, r5\n\t"
11767         "sbcs	r4, r4, r6\n\t"
11768         "stm	%[a]!, {r3, r4}\n\t"
11769         "ldm	%[a], {r3, r4}\n\t"
11770         "ldm	%[b]!, {r5, r6}\n\t"
11771         "sbcs	r3, r3, r5\n\t"
11772         "sbcs	r4, r4, r6\n\t"
11773         "stm	%[a]!, {r3, r4}\n\t"
11774         "ldm	%[a], {r3, r4}\n\t"
11775         "ldm	%[b]!, {r5, r6}\n\t"
11776         "sbcs	r3, r3, r5\n\t"
11777         "sbcs	r4, r4, r6\n\t"
11778         "stm	%[a]!, {r3, r4}\n\t"
11779         "ldm	%[a], {r3, r4}\n\t"
11780         "ldm	%[b]!, {r5, r6}\n\t"
11781         "sbcs	r3, r3, r5\n\t"
11782         "sbcs	r4, r4, r6\n\t"
11783         "stm	%[a]!, {r3, r4}\n\t"
11784         "ldm	%[a], {r3, r4}\n\t"
11785         "ldm	%[b]!, {r5, r6}\n\t"
11786         "sbcs	r3, r3, r5\n\t"
11787         "sbcs	r4, r4, r6\n\t"
11788         "stm	%[a]!, {r3, r4}\n\t"
11789         "ldm	%[a], {r3, r4}\n\t"
11790         "ldm	%[b]!, {r5, r6}\n\t"
11791         "sbcs	r3, r3, r5\n\t"
11792         "sbcs	r4, r4, r6\n\t"
11793         "stm	%[a]!, {r3, r4}\n\t"
11794         "ldm	%[a], {r3, r4}\n\t"
11795         "ldm	%[b]!, {r5, r6}\n\t"
11796         "sbcs	r3, r3, r5\n\t"
11797         "sbcs	r4, r4, r6\n\t"
11798         "stm	%[a]!, {r3, r4}\n\t"
11799         "sbc	%[c], %[c], %[c]\n\t"
11800         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
11801         :
11802         : "memory", "r3", "r4", "r5", "r6"
11803     );
11804 
11805     return c;
11806 }
11807 
11808 /* Add b to a into r. (r = a + b)
11809  *
11810  * r  A single precision integer.
11811  * a  A single precision integer.
11812  * b  A single precision integer.
11813  */
sp_4096_add_128(sp_digit * r,const sp_digit * a,const sp_digit * b)11814 SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
11815         const sp_digit* b)
11816 {
11817     sp_digit c = 0;
11818 
11819     __asm__ __volatile__ (
11820         "ldm	%[a]!, {r4, r5}\n\t"
11821         "ldm	%[b]!, {r6, r8}\n\t"
11822         "adds	r4, r4, r6\n\t"
11823         "adcs	r5, r5, r8\n\t"
11824         "stm	%[r]!, {r4, r5}\n\t"
11825         "ldm	%[a]!, {r4, r5}\n\t"
11826         "ldm	%[b]!, {r6, r8}\n\t"
11827         "adcs	r4, r4, r6\n\t"
11828         "adcs	r5, r5, r8\n\t"
11829         "stm	%[r]!, {r4, r5}\n\t"
11830         "ldm	%[a]!, {r4, r5}\n\t"
11831         "ldm	%[b]!, {r6, r8}\n\t"
11832         "adcs	r4, r4, r6\n\t"
11833         "adcs	r5, r5, r8\n\t"
11834         "stm	%[r]!, {r4, r5}\n\t"
11835         "ldm	%[a]!, {r4, r5}\n\t"
11836         "ldm	%[b]!, {r6, r8}\n\t"
11837         "adcs	r4, r4, r6\n\t"
11838         "adcs	r5, r5, r8\n\t"
11839         "stm	%[r]!, {r4, r5}\n\t"
11840         "ldm	%[a]!, {r4, r5}\n\t"
11841         "ldm	%[b]!, {r6, r8}\n\t"
11842         "adcs	r4, r4, r6\n\t"
11843         "adcs	r5, r5, r8\n\t"
11844         "stm	%[r]!, {r4, r5}\n\t"
11845         "ldm	%[a]!, {r4, r5}\n\t"
11846         "ldm	%[b]!, {r6, r8}\n\t"
11847         "adcs	r4, r4, r6\n\t"
11848         "adcs	r5, r5, r8\n\t"
11849         "stm	%[r]!, {r4, r5}\n\t"
11850         "ldm	%[a]!, {r4, r5}\n\t"
11851         "ldm	%[b]!, {r6, r8}\n\t"
11852         "adcs	r4, r4, r6\n\t"
11853         "adcs	r5, r5, r8\n\t"
11854         "stm	%[r]!, {r4, r5}\n\t"
11855         "ldm	%[a]!, {r4, r5}\n\t"
11856         "ldm	%[b]!, {r6, r8}\n\t"
11857         "adcs	r4, r4, r6\n\t"
11858         "adcs	r5, r5, r8\n\t"
11859         "stm	%[r]!, {r4, r5}\n\t"
11860         "ldm	%[a]!, {r4, r5}\n\t"
11861         "ldm	%[b]!, {r6, r8}\n\t"
11862         "adcs	r4, r4, r6\n\t"
11863         "adcs	r5, r5, r8\n\t"
11864         "stm	%[r]!, {r4, r5}\n\t"
11865         "ldm	%[a]!, {r4, r5}\n\t"
11866         "ldm	%[b]!, {r6, r8}\n\t"
11867         "adcs	r4, r4, r6\n\t"
11868         "adcs	r5, r5, r8\n\t"
11869         "stm	%[r]!, {r4, r5}\n\t"
11870         "ldm	%[a]!, {r4, r5}\n\t"
11871         "ldm	%[b]!, {r6, r8}\n\t"
11872         "adcs	r4, r4, r6\n\t"
11873         "adcs	r5, r5, r8\n\t"
11874         "stm	%[r]!, {r4, r5}\n\t"
11875         "ldm	%[a]!, {r4, r5}\n\t"
11876         "ldm	%[b]!, {r6, r8}\n\t"
11877         "adcs	r4, r4, r6\n\t"
11878         "adcs	r5, r5, r8\n\t"
11879         "stm	%[r]!, {r4, r5}\n\t"
11880         "ldm	%[a]!, {r4, r5}\n\t"
11881         "ldm	%[b]!, {r6, r8}\n\t"
11882         "adcs	r4, r4, r6\n\t"
11883         "adcs	r5, r5, r8\n\t"
11884         "stm	%[r]!, {r4, r5}\n\t"
11885         "ldm	%[a]!, {r4, r5}\n\t"
11886         "ldm	%[b]!, {r6, r8}\n\t"
11887         "adcs	r4, r4, r6\n\t"
11888         "adcs	r5, r5, r8\n\t"
11889         "stm	%[r]!, {r4, r5}\n\t"
11890         "ldm	%[a]!, {r4, r5}\n\t"
11891         "ldm	%[b]!, {r6, r8}\n\t"
11892         "adcs	r4, r4, r6\n\t"
11893         "adcs	r5, r5, r8\n\t"
11894         "stm	%[r]!, {r4, r5}\n\t"
11895         "ldm	%[a]!, {r4, r5}\n\t"
11896         "ldm	%[b]!, {r6, r8}\n\t"
11897         "adcs	r4, r4, r6\n\t"
11898         "adcs	r5, r5, r8\n\t"
11899         "stm	%[r]!, {r4, r5}\n\t"
11900         "ldm	%[a]!, {r4, r5}\n\t"
11901         "ldm	%[b]!, {r6, r8}\n\t"
11902         "adcs	r4, r4, r6\n\t"
11903         "adcs	r5, r5, r8\n\t"
11904         "stm	%[r]!, {r4, r5}\n\t"
11905         "ldm	%[a]!, {r4, r5}\n\t"
11906         "ldm	%[b]!, {r6, r8}\n\t"
11907         "adcs	r4, r4, r6\n\t"
11908         "adcs	r5, r5, r8\n\t"
11909         "stm	%[r]!, {r4, r5}\n\t"
11910         "ldm	%[a]!, {r4, r5}\n\t"
11911         "ldm	%[b]!, {r6, r8}\n\t"
11912         "adcs	r4, r4, r6\n\t"
11913         "adcs	r5, r5, r8\n\t"
11914         "stm	%[r]!, {r4, r5}\n\t"
11915         "ldm	%[a]!, {r4, r5}\n\t"
11916         "ldm	%[b]!, {r6, r8}\n\t"
11917         "adcs	r4, r4, r6\n\t"
11918         "adcs	r5, r5, r8\n\t"
11919         "stm	%[r]!, {r4, r5}\n\t"
11920         "ldm	%[a]!, {r4, r5}\n\t"
11921         "ldm	%[b]!, {r6, r8}\n\t"
11922         "adcs	r4, r4, r6\n\t"
11923         "adcs	r5, r5, r8\n\t"
11924         "stm	%[r]!, {r4, r5}\n\t"
11925         "ldm	%[a]!, {r4, r5}\n\t"
11926         "ldm	%[b]!, {r6, r8}\n\t"
11927         "adcs	r4, r4, r6\n\t"
11928         "adcs	r5, r5, r8\n\t"
11929         "stm	%[r]!, {r4, r5}\n\t"
11930         "ldm	%[a]!, {r4, r5}\n\t"
11931         "ldm	%[b]!, {r6, r8}\n\t"
11932         "adcs	r4, r4, r6\n\t"
11933         "adcs	r5, r5, r8\n\t"
11934         "stm	%[r]!, {r4, r5}\n\t"
11935         "ldm	%[a]!, {r4, r5}\n\t"
11936         "ldm	%[b]!, {r6, r8}\n\t"
11937         "adcs	r4, r4, r6\n\t"
11938         "adcs	r5, r5, r8\n\t"
11939         "stm	%[r]!, {r4, r5}\n\t"
11940         "ldm	%[a]!, {r4, r5}\n\t"
11941         "ldm	%[b]!, {r6, r8}\n\t"
11942         "adcs	r4, r4, r6\n\t"
11943         "adcs	r5, r5, r8\n\t"
11944         "stm	%[r]!, {r4, r5}\n\t"
11945         "ldm	%[a]!, {r4, r5}\n\t"
11946         "ldm	%[b]!, {r6, r8}\n\t"
11947         "adcs	r4, r4, r6\n\t"
11948         "adcs	r5, r5, r8\n\t"
11949         "stm	%[r]!, {r4, r5}\n\t"
11950         "ldm	%[a]!, {r4, r5}\n\t"
11951         "ldm	%[b]!, {r6, r8}\n\t"
11952         "adcs	r4, r4, r6\n\t"
11953         "adcs	r5, r5, r8\n\t"
11954         "stm	%[r]!, {r4, r5}\n\t"
11955         "ldm	%[a]!, {r4, r5}\n\t"
11956         "ldm	%[b]!, {r6, r8}\n\t"
11957         "adcs	r4, r4, r6\n\t"
11958         "adcs	r5, r5, r8\n\t"
11959         "stm	%[r]!, {r4, r5}\n\t"
11960         "ldm	%[a]!, {r4, r5}\n\t"
11961         "ldm	%[b]!, {r6, r8}\n\t"
11962         "adcs	r4, r4, r6\n\t"
11963         "adcs	r5, r5, r8\n\t"
11964         "stm	%[r]!, {r4, r5}\n\t"
11965         "ldm	%[a]!, {r4, r5}\n\t"
11966         "ldm	%[b]!, {r6, r8}\n\t"
11967         "adcs	r4, r4, r6\n\t"
11968         "adcs	r5, r5, r8\n\t"
11969         "stm	%[r]!, {r4, r5}\n\t"
11970         "ldm	%[a]!, {r4, r5}\n\t"
11971         "ldm	%[b]!, {r6, r8}\n\t"
11972         "adcs	r4, r4, r6\n\t"
11973         "adcs	r5, r5, r8\n\t"
11974         "stm	%[r]!, {r4, r5}\n\t"
11975         "ldm	%[a]!, {r4, r5}\n\t"
11976         "ldm	%[b]!, {r6, r8}\n\t"
11977         "adcs	r4, r4, r6\n\t"
11978         "adcs	r5, r5, r8\n\t"
11979         "stm	%[r]!, {r4, r5}\n\t"
11980         "ldm	%[a]!, {r4, r5}\n\t"
11981         "ldm	%[b]!, {r6, r8}\n\t"
11982         "adcs	r4, r4, r6\n\t"
11983         "adcs	r5, r5, r8\n\t"
11984         "stm	%[r]!, {r4, r5}\n\t"
11985         "ldm	%[a]!, {r4, r5}\n\t"
11986         "ldm	%[b]!, {r6, r8}\n\t"
11987         "adcs	r4, r4, r6\n\t"
11988         "adcs	r5, r5, r8\n\t"
11989         "stm	%[r]!, {r4, r5}\n\t"
11990         "ldm	%[a]!, {r4, r5}\n\t"
11991         "ldm	%[b]!, {r6, r8}\n\t"
11992         "adcs	r4, r4, r6\n\t"
11993         "adcs	r5, r5, r8\n\t"
11994         "stm	%[r]!, {r4, r5}\n\t"
11995         "ldm	%[a]!, {r4, r5}\n\t"
11996         "ldm	%[b]!, {r6, r8}\n\t"
11997         "adcs	r4, r4, r6\n\t"
11998         "adcs	r5, r5, r8\n\t"
11999         "stm	%[r]!, {r4, r5}\n\t"
12000         "ldm	%[a]!, {r4, r5}\n\t"
12001         "ldm	%[b]!, {r6, r8}\n\t"
12002         "adcs	r4, r4, r6\n\t"
12003         "adcs	r5, r5, r8\n\t"
12004         "stm	%[r]!, {r4, r5}\n\t"
12005         "ldm	%[a]!, {r4, r5}\n\t"
12006         "ldm	%[b]!, {r6, r8}\n\t"
12007         "adcs	r4, r4, r6\n\t"
12008         "adcs	r5, r5, r8\n\t"
12009         "stm	%[r]!, {r4, r5}\n\t"
12010         "ldm	%[a]!, {r4, r5}\n\t"
12011         "ldm	%[b]!, {r6, r8}\n\t"
12012         "adcs	r4, r4, r6\n\t"
12013         "adcs	r5, r5, r8\n\t"
12014         "stm	%[r]!, {r4, r5}\n\t"
12015         "ldm	%[a]!, {r4, r5}\n\t"
12016         "ldm	%[b]!, {r6, r8}\n\t"
12017         "adcs	r4, r4, r6\n\t"
12018         "adcs	r5, r5, r8\n\t"
12019         "stm	%[r]!, {r4, r5}\n\t"
12020         "ldm	%[a]!, {r4, r5}\n\t"
12021         "ldm	%[b]!, {r6, r8}\n\t"
12022         "adcs	r4, r4, r6\n\t"
12023         "adcs	r5, r5, r8\n\t"
12024         "stm	%[r]!, {r4, r5}\n\t"
12025         "ldm	%[a]!, {r4, r5}\n\t"
12026         "ldm	%[b]!, {r6, r8}\n\t"
12027         "adcs	r4, r4, r6\n\t"
12028         "adcs	r5, r5, r8\n\t"
12029         "stm	%[r]!, {r4, r5}\n\t"
12030         "ldm	%[a]!, {r4, r5}\n\t"
12031         "ldm	%[b]!, {r6, r8}\n\t"
12032         "adcs	r4, r4, r6\n\t"
12033         "adcs	r5, r5, r8\n\t"
12034         "stm	%[r]!, {r4, r5}\n\t"
12035         "ldm	%[a]!, {r4, r5}\n\t"
12036         "ldm	%[b]!, {r6, r8}\n\t"
12037         "adcs	r4, r4, r6\n\t"
12038         "adcs	r5, r5, r8\n\t"
12039         "stm	%[r]!, {r4, r5}\n\t"
12040         "ldm	%[a]!, {r4, r5}\n\t"
12041         "ldm	%[b]!, {r6, r8}\n\t"
12042         "adcs	r4, r4, r6\n\t"
12043         "adcs	r5, r5, r8\n\t"
12044         "stm	%[r]!, {r4, r5}\n\t"
12045         "ldm	%[a]!, {r4, r5}\n\t"
12046         "ldm	%[b]!, {r6, r8}\n\t"
12047         "adcs	r4, r4, r6\n\t"
12048         "adcs	r5, r5, r8\n\t"
12049         "stm	%[r]!, {r4, r5}\n\t"
12050         "ldm	%[a]!, {r4, r5}\n\t"
12051         "ldm	%[b]!, {r6, r8}\n\t"
12052         "adcs	r4, r4, r6\n\t"
12053         "adcs	r5, r5, r8\n\t"
12054         "stm	%[r]!, {r4, r5}\n\t"
12055         "ldm	%[a]!, {r4, r5}\n\t"
12056         "ldm	%[b]!, {r6, r8}\n\t"
12057         "adcs	r4, r4, r6\n\t"
12058         "adcs	r5, r5, r8\n\t"
12059         "stm	%[r]!, {r4, r5}\n\t"
12060         "ldm	%[a]!, {r4, r5}\n\t"
12061         "ldm	%[b]!, {r6, r8}\n\t"
12062         "adcs	r4, r4, r6\n\t"
12063         "adcs	r5, r5, r8\n\t"
12064         "stm	%[r]!, {r4, r5}\n\t"
12065         "ldm	%[a]!, {r4, r5}\n\t"
12066         "ldm	%[b]!, {r6, r8}\n\t"
12067         "adcs	r4, r4, r6\n\t"
12068         "adcs	r5, r5, r8\n\t"
12069         "stm	%[r]!, {r4, r5}\n\t"
12070         "ldm	%[a]!, {r4, r5}\n\t"
12071         "ldm	%[b]!, {r6, r8}\n\t"
12072         "adcs	r4, r4, r6\n\t"
12073         "adcs	r5, r5, r8\n\t"
12074         "stm	%[r]!, {r4, r5}\n\t"
12075         "ldm	%[a]!, {r4, r5}\n\t"
12076         "ldm	%[b]!, {r6, r8}\n\t"
12077         "adcs	r4, r4, r6\n\t"
12078         "adcs	r5, r5, r8\n\t"
12079         "stm	%[r]!, {r4, r5}\n\t"
12080         "ldm	%[a]!, {r4, r5}\n\t"
12081         "ldm	%[b]!, {r6, r8}\n\t"
12082         "adcs	r4, r4, r6\n\t"
12083         "adcs	r5, r5, r8\n\t"
12084         "stm	%[r]!, {r4, r5}\n\t"
12085         "ldm	%[a]!, {r4, r5}\n\t"
12086         "ldm	%[b]!, {r6, r8}\n\t"
12087         "adcs	r4, r4, r6\n\t"
12088         "adcs	r5, r5, r8\n\t"
12089         "stm	%[r]!, {r4, r5}\n\t"
12090         "ldm	%[a]!, {r4, r5}\n\t"
12091         "ldm	%[b]!, {r6, r8}\n\t"
12092         "adcs	r4, r4, r6\n\t"
12093         "adcs	r5, r5, r8\n\t"
12094         "stm	%[r]!, {r4, r5}\n\t"
12095         "ldm	%[a]!, {r4, r5}\n\t"
12096         "ldm	%[b]!, {r6, r8}\n\t"
12097         "adcs	r4, r4, r6\n\t"
12098         "adcs	r5, r5, r8\n\t"
12099         "stm	%[r]!, {r4, r5}\n\t"
12100         "ldm	%[a]!, {r4, r5}\n\t"
12101         "ldm	%[b]!, {r6, r8}\n\t"
12102         "adcs	r4, r4, r6\n\t"
12103         "adcs	r5, r5, r8\n\t"
12104         "stm	%[r]!, {r4, r5}\n\t"
12105         "ldm	%[a]!, {r4, r5}\n\t"
12106         "ldm	%[b]!, {r6, r8}\n\t"
12107         "adcs	r4, r4, r6\n\t"
12108         "adcs	r5, r5, r8\n\t"
12109         "stm	%[r]!, {r4, r5}\n\t"
12110         "ldm	%[a]!, {r4, r5}\n\t"
12111         "ldm	%[b]!, {r6, r8}\n\t"
12112         "adcs	r4, r4, r6\n\t"
12113         "adcs	r5, r5, r8\n\t"
12114         "stm	%[r]!, {r4, r5}\n\t"
12115         "ldm	%[a]!, {r4, r5}\n\t"
12116         "ldm	%[b]!, {r6, r8}\n\t"
12117         "adcs	r4, r4, r6\n\t"
12118         "adcs	r5, r5, r8\n\t"
12119         "stm	%[r]!, {r4, r5}\n\t"
12120         "ldm	%[a]!, {r4, r5}\n\t"
12121         "ldm	%[b]!, {r6, r8}\n\t"
12122         "adcs	r4, r4, r6\n\t"
12123         "adcs	r5, r5, r8\n\t"
12124         "stm	%[r]!, {r4, r5}\n\t"
12125         "ldm	%[a]!, {r4, r5}\n\t"
12126         "ldm	%[b]!, {r6, r8}\n\t"
12127         "adcs	r4, r4, r6\n\t"
12128         "adcs	r5, r5, r8\n\t"
12129         "stm	%[r]!, {r4, r5}\n\t"
12130         "ldm	%[a]!, {r4, r5}\n\t"
12131         "ldm	%[b]!, {r6, r8}\n\t"
12132         "adcs	r4, r4, r6\n\t"
12133         "adcs	r5, r5, r8\n\t"
12134         "stm	%[r]!, {r4, r5}\n\t"
12135         "ldm	%[a]!, {r4, r5}\n\t"
12136         "ldm	%[b]!, {r6, r8}\n\t"
12137         "adcs	r4, r4, r6\n\t"
12138         "adcs	r5, r5, r8\n\t"
12139         "stm	%[r]!, {r4, r5}\n\t"
12140         "mov	%[c], #0\n\t"
12141         "adc	%[c], %[c], %[c]\n\t"
12142         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
12143         :
12144         : "memory", "r4", "r5", "r6", "r8"
12145     );
12146 
12147     return c;
12148 }
12149 
12150 /* Multiply a and b into r. (r = a * b)
12151  *
12152  * r  A single precision integer.
12153  * a  A single precision integer.
12154  * b  A single precision integer.
12155  */
sp_4096_mul_128(sp_digit * r,const sp_digit * a,const sp_digit * b)12156 SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
12157         const sp_digit* b)
12158 {
12159     sp_digit* z0 = r;
12160     sp_digit z1[128];
12161     sp_digit a1[64];
12162     sp_digit b1[64];
12163     sp_digit z2[128];
12164     sp_digit u;
12165     sp_digit ca;
12166     sp_digit cb;
12167 
12168     ca = sp_2048_add_64(a1, a, &a[64]);
12169     cb = sp_2048_add_64(b1, b, &b[64]);
12170     u  = ca & cb;
12171     sp_2048_mul_64(z1, a1, b1);
12172     sp_2048_mul_64(z2, &a[64], &b[64]);
12173     sp_2048_mul_64(z0, a, b);
12174     sp_2048_mask_64(r + 128, a1, 0 - cb);
12175     sp_2048_mask_64(b1, b1, 0 - ca);
12176     u += sp_2048_add_64(r + 128, r + 128, b1);
12177     u += sp_4096_sub_in_place_128(z1, z2);
12178     u += sp_4096_sub_in_place_128(z1, z0);
12179     u += sp_4096_add_128(r + 64, r + 64, z1);
12180     r[192] = u;
12181     XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
12182     (void)sp_4096_add_128(r + 128, r + 128, z2);
12183 }
12184 
12185 /* Square a and put result in r. (r = a * a)
12186  *
12187  * r  A single precision integer.
12188  * a  A single precision integer.
12189  */
sp_4096_sqr_128(sp_digit * r,const sp_digit * a)12190 SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
12191 {
12192     sp_digit* z0 = r;
12193     sp_digit z2[128];
12194     sp_digit z1[128];
12195     sp_digit a1[64];
12196     sp_digit u;
12197 
12198     u = sp_2048_add_64(a1, a, &a[64]);
12199     sp_2048_sqr_64(z1, a1);
12200     sp_2048_sqr_64(z2, &a[64]);
12201     sp_2048_sqr_64(z0, a);
12202     sp_2048_mask_64(r + 128, a1, 0 - u);
12203     u += sp_2048_add_64(r + 128, r + 128, r + 128);
12204     u += sp_4096_sub_in_place_128(z1, z2);
12205     u += sp_4096_sub_in_place_128(z1, z0);
12206     u += sp_4096_add_128(r + 64, r + 64, z1);
12207     r[192] = u;
12208     XMEMSET(r + 192 + 1, 0, sizeof(sp_digit) * (64 - 1));
12209     (void)sp_4096_add_128(r + 128, r + 128, z2);
12210 }
12211 
12212 #endif /* !WOLFSSL_SP_SMALL */
12213 #ifdef WOLFSSL_SP_SMALL
12214 /* Add b to a into r. (r = a + b)
12215  *
12216  * r  A single precision integer.
12217  * a  A single precision integer.
12218  * b  A single precision integer.
12219  */
sp_4096_add_128(sp_digit * r,const sp_digit * a,const sp_digit * b)12220 SP_NOINLINE static sp_digit sp_4096_add_128(sp_digit* r, const sp_digit* a,
12221         const sp_digit* b)
12222 {
12223     sp_digit c = 0;
12224 
12225     __asm__ __volatile__ (
12226         "mov	r6, %[a]\n\t"
12227         "mov	r8, #0\n\t"
12228         "add	r6, r6, #512\n\t"
12229         "sub	r8, r8, #1\n\t"
12230         "\n1:\n\t"
12231         "adds	%[c], %[c], r8\n\t"
12232         "ldr	r4, [%[a]]\n\t"
12233         "ldr	r5, [%[b]]\n\t"
12234         "adcs	r4, r4, r5\n\t"
12235         "str	r4, [%[r]]\n\t"
12236         "mov	%[c], #0\n\t"
12237         "adc	%[c], %[c], %[c]\n\t"
12238         "add	%[a], %[a], #4\n\t"
12239         "add	%[b], %[b], #4\n\t"
12240         "add	%[r], %[r], #4\n\t"
12241         "cmp	%[a], r6\n\t"
12242 #ifdef __GNUC__
12243         "bne	1b\n\t"
12244 #else
12245         "bne.n	1b\n\t"
12246 #endif /* __GNUC__ */
12247         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
12248         :
12249         : "memory", "r4", "r5", "r6", "r8"
12250     );
12251 
12252     return c;
12253 }
12254 
12255 #endif /* WOLFSSL_SP_SMALL */
12256 #ifdef WOLFSSL_SP_SMALL
12257 /* Sub b from a into a. (a -= b)
12258  *
12259  * a  A single precision integer.
12260  * b  A single precision integer.
12261  */
sp_4096_sub_in_place_128(sp_digit * a,const sp_digit * b)12262 SP_NOINLINE static sp_digit sp_4096_sub_in_place_128(sp_digit* a,
12263         const sp_digit* b)
12264 {
12265     sp_digit c = 0;
12266     __asm__ __volatile__ (
12267         "mov	r8, %[a]\n\t"
12268         "add	r8, r8, #512\n\t"
12269         "\n1:\n\t"
12270         "mov	r5, #0\n\t"
12271         "subs	r5, r5, %[c]\n\t"
12272         "ldr	r3, [%[a]]\n\t"
12273         "ldr	r4, [%[a], #4]\n\t"
12274         "ldr	r5, [%[b]]\n\t"
12275         "ldr	r6, [%[b], #4]\n\t"
12276         "sbcs	r3, r3, r5\n\t"
12277         "sbcs	r4, r4, r6\n\t"
12278         "str	r3, [%[a]]\n\t"
12279         "str	r4, [%[a], #4]\n\t"
12280         "sbc	%[c], %[c], %[c]\n\t"
12281         "add	%[a], %[a], #8\n\t"
12282         "add	%[b], %[b], #8\n\t"
12283         "cmp	%[a], r8\n\t"
12284 #ifdef __GNUC__
12285         "bne	1b\n\t"
12286 #else
12287         "bne.n	1b\n\t"
12288 #endif /* __GNUC__ */
12289         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
12290         :
12291         : "memory", "r3", "r4", "r5", "r6", "r8"
12292     );
12293 
12294     return c;
12295 }
12296 
12297 #endif /* WOLFSSL_SP_SMALL */
12298 #ifdef WOLFSSL_SP_SMALL
12299 /* Multiply a and b into r. (r = a * b)
12300  *
12301  * r  A single precision integer.
12302  * a  A single precision integer.
12303  * b  A single precision integer.
12304  */
sp_4096_mul_128(sp_digit * r,const sp_digit * a,const sp_digit * b)12305 SP_NOINLINE static void sp_4096_mul_128(sp_digit* r, const sp_digit* a,
12306         const sp_digit* b)
12307 {
12308     sp_digit tmp_arr[128 * 2];
12309     sp_digit* tmp = tmp_arr;
12310     __asm__ __volatile__ (
12311         "mov	r3, #0\n\t"
12312         "mov	r4, #0\n\t"
12313         "mov	r9, r3\n\t"
12314         "mov	r12, %[r]\n\t"
12315         "mov	r10, %[a]\n\t"
12316         "mov	r11, %[b]\n\t"
12317         "mov	r6, #2\n\t"
12318         "lsl	r6, r6, #8\n\t"
12319         "add	r6, r6, r10\n\t"
12320         "mov	r14, r6\n\t"
12321         "\n1:\n\t"
12322         "mov	%[r], #0\n\t"
12323         "mov	r5, #0\n\t"
12324         "mov	r6, #1\n\t"
12325         "lsl	r6, r6, #8\n\t"
12326         "add	r6, r6, #252\n\t"
12327         "mov	%[a], r9\n\t"
12328         "subs	%[a], %[a], r6\n\t"
12329         "sbc	r6, r6, r6\n\t"
12330         "mvn	r6, r6\n\t"
12331         "and	%[a], %[a], r6\n\t"
12332         "mov	%[b], r9\n\t"
12333         "sub	%[b], %[b], %[a]\n\t"
12334         "add	%[a], %[a], r10\n\t"
12335         "add	%[b], %[b], r11\n\t"
12336         "\n2:\n\t"
12337         /* Multiply Start */
12338         "ldr	r6, [%[a]]\n\t"
12339         "ldr	r8, [%[b]]\n\t"
12340         "umull	r6, r8, r6, r8\n\t"
12341         "adds	r3, r3, r6\n\t"
12342         "adcs 	r4, r4, r8\n\t"
12343         "adc	r5, r5, %[r]\n\t"
12344         /* Multiply Done */
12345         "add	%[a], %[a], #4\n\t"
12346         "sub	%[b], %[b], #4\n\t"
12347         "cmp	%[a], r14\n\t"
12348 #ifdef __GNUC__
12349         "beq	3f\n\t"
12350 #else
12351         "beq.n	3f\n\t"
12352 #endif /* __GNUC__ */
12353         "mov	r6, r9\n\t"
12354         "add	r6, r6, r10\n\t"
12355         "cmp	%[a], r6\n\t"
12356 #ifdef __GNUC__
12357         "ble	2b\n\t"
12358 #else
12359         "ble.n	2b\n\t"
12360 #endif /* __GNUC__ */
12361         "\n3:\n\t"
12362         "mov	%[r], r12\n\t"
12363         "mov	r8, r9\n\t"
12364         "str	r3, [%[r], r8]\n\t"
12365         "mov	r3, r4\n\t"
12366         "mov	r4, r5\n\t"
12367         "add	r8, r8, #4\n\t"
12368         "mov	r9, r8\n\t"
12369         "mov	r6, #3\n\t"
12370         "lsl	r6, r6, #8\n\t"
12371         "add	r6, r6, #248\n\t"
12372         "cmp	r8, r6\n\t"
12373 #ifdef __GNUC__
12374         "ble	1b\n\t"
12375 #else
12376         "ble.n	1b\n\t"
12377 #endif /* __GNUC__ */
12378         "str	r3, [%[r], r8]\n\t"
12379         "mov	%[a], r10\n\t"
12380         "mov	%[b], r11\n\t"
12381         :
12382         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
12383         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
12384     );
12385 
12386     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
12387 }
12388 
12389 /* Square a and put result in r. (r = a * a)
12390  *
12391  * r  A single precision integer.
12392  * a  A single precision integer.
12393  */
sp_4096_sqr_128(sp_digit * r,const sp_digit * a)12394 SP_NOINLINE static void sp_4096_sqr_128(sp_digit* r, const sp_digit* a)
12395 {
12396     __asm__ __volatile__ (
12397         "mov	r3, #0\n\t"
12398         "mov	r4, #0\n\t"
12399         "mov	r5, #0\n\t"
12400         "mov	r9, r3\n\t"
12401         "mov	r12, %[r]\n\t"
12402         "mov	r6, #4\n\t"
12403         "lsl	r6, r6, #8\n\t"
12404         "neg	r6, r6\n\t"
12405         "add	sp, sp, r6\n\t"
12406         "mov	r11, sp\n\t"
12407         "mov	r10, %[a]\n\t"
12408         "\n1:\n\t"
12409         "mov	%[r], #0\n\t"
12410         "mov	r6, #1\n\t"
12411         "lsl	r6, r6, #8\n\t"
12412         "add	r6, r6, #252\n\t"
12413         "mov	%[a], r9\n\t"
12414         "subs	%[a], %[a], r6\n\t"
12415         "sbc	r6, r6, r6\n\t"
12416         "mvn	r6, r6\n\t"
12417         "and	%[a], %[a], r6\n\t"
12418         "mov	r2, r9\n\t"
12419         "sub	r2, r2, %[a]\n\t"
12420         "add	%[a], %[a], r10\n\t"
12421         "add	r2, r2, r10\n\t"
12422         "\n2:\n\t"
12423         "cmp	r2, %[a]\n\t"
12424 #ifdef __GNUC__
12425         "beq	4f\n\t"
12426 #else
12427         "beq.n	4f\n\t"
12428 #endif /* __GNUC__ */
12429         /* Multiply * 2: Start */
12430         "ldr	r6, [%[a]]\n\t"
12431         "ldr	r8, [r2]\n\t"
12432         "umull	r6, r8, r6, r8\n\t"
12433         "adds	r3, r3, r6\n\t"
12434         "adcs 	r4, r4, r8\n\t"
12435         "adc	r5, r5, %[r]\n\t"
12436         "adds	r3, r3, r6\n\t"
12437         "adcs 	r4, r4, r8\n\t"
12438         "adc	r5, r5, %[r]\n\t"
12439         /* Multiply * 2: Done */
12440 #ifdef __GNUC__
12441         "bal	5f\n\t"
12442 #else
12443         "bal.n	5f\n\t"
12444 #endif /* __GNUC__ */
12445         "\n4:\n\t"
12446         /* Square: Start */
12447         "ldr	r6, [%[a]]\n\t"
12448         "umull	r6, r8, r6, r6\n\t"
12449         "adds	r3, r3, r6\n\t"
12450         "adcs	r4, r4, r8\n\t"
12451         "adc	r5, r5, %[r]\n\t"
12452         /* Square: Done */
12453         "\n5:\n\t"
12454         "add	%[a], %[a], #4\n\t"
12455         "sub	r2, r2, #4\n\t"
12456         "mov	r6, #2\n\t"
12457         "lsl	r6, r6, #8\n\t"
12458         "add	r6, r6, r10\n\t"
12459         "cmp	%[a], r6\n\t"
12460 #ifdef __GNUC__
12461         "beq	3f\n\t"
12462 #else
12463         "beq.n	3f\n\t"
12464 #endif /* __GNUC__ */
12465         "cmp	%[a], r2\n\t"
12466 #ifdef __GNUC__
12467         "bgt	3f\n\t"
12468 #else
12469         "bgt.n	3f\n\t"
12470 #endif /* __GNUC__ */
12471         "mov	r8, r9\n\t"
12472         "add	r8, r8, r10\n\t"
12473         "cmp	%[a], r8\n\t"
12474 #ifdef __GNUC__
12475         "ble	2b\n\t"
12476 #else
12477         "ble.n	2b\n\t"
12478 #endif /* __GNUC__ */
12479         "\n3:\n\t"
12480         "mov	%[r], r11\n\t"
12481         "mov	r8, r9\n\t"
12482         "str	r3, [%[r], r8]\n\t"
12483         "mov	r3, r4\n\t"
12484         "mov	r4, r5\n\t"
12485         "mov	r5, #0\n\t"
12486         "add	r8, r8, #4\n\t"
12487         "mov	r9, r8\n\t"
12488         "mov	r6, #3\n\t"
12489         "lsl	r6, r6, #8\n\t"
12490         "add	r6, r6, #248\n\t"
12491         "cmp	r8, r6\n\t"
12492 #ifdef __GNUC__
12493         "ble	1b\n\t"
12494 #else
12495         "ble.n	1b\n\t"
12496 #endif /* __GNUC__ */
12497         "mov	%[a], r10\n\t"
12498         "str	r3, [%[r], r8]\n\t"
12499         "mov	%[r], r12\n\t"
12500         "mov	%[a], r11\n\t"
12501         "mov	r3, #3\n\t"
12502         "lsl	r3, r3, #8\n\t"
12503         "add	r3, r3, #252\n\t"
12504         "\n4:\n\t"
12505         "ldr	r6, [%[a], r3]\n\t"
12506         "str	r6, [%[r], r3]\n\t"
12507         "subs	r3, r3, #4\n\t"
12508 #ifdef __GNUC__
12509         "bge	4b\n\t"
12510 #else
12511         "bge.n	4b\n\t"
12512 #endif /* __GNUC__ */
12513         "mov	r6, #4\n\t"
12514         "lsl	r6, r6, #8\n\t"
12515         "add	sp, sp, r6\n\t"
12516         :
12517         : [r] "r" (r), [a] "r" (a)
12518         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
12519     );
12520 }
12521 
12522 #endif /* WOLFSSL_SP_SMALL */
12523 /* Caclulate the bottom digit of -1/a mod 2^n.
12524  *
12525  * a    A single precision number.
12526  * rho  Bottom word of inverse.
12527  */
sp_4096_mont_setup(const sp_digit * a,sp_digit * rho)12528 static void sp_4096_mont_setup(const sp_digit* a, sp_digit* rho)
12529 {
12530     sp_digit x;
12531     sp_digit b;
12532 
12533     b = a[0];
12534     x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
12535     x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
12536     x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
12537     x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
12538 
12539     /* rho = -1/m mod b */
12540     *rho = (sp_digit)0 - x;
12541 }
12542 
12543 /* Mul a by digit b into r. (r = a * b)
12544  *
12545  * r  A single precision integer.
12546  * a  A single precision integer.
12547  * b  A single precision digit.
12548  */
sp_4096_mul_d_128(sp_digit * r,const sp_digit * a,sp_digit b)12549 SP_NOINLINE static void sp_4096_mul_d_128(sp_digit* r, const sp_digit* a,
12550         sp_digit b)
12551 {
12552     __asm__ __volatile__ (
12553         "add	r9, %[a], #512\n\t"
12554         /* A[0] * B */
12555         "ldr	r6, [%[a]], #4\n\t"
12556         "umull	r5, r3, r6, %[b]\n\t"
12557         "mov	r4, #0\n\t"
12558         "str	r5, [%[r]], #4\n\t"
12559         /* A[0] * B - Done */
12560         "\n1:\n\t"
12561         "mov	r5, #0\n\t"
12562         /* A[] * B */
12563         "ldr	r6, [%[a]], #4\n\t"
12564         "umull	r6, r8, r6, %[b]\n\t"
12565         "adds	r3, r3, r6\n\t"
12566         "adcs 	r4, r4, r8\n\t"
12567         "adc	r5, r5, #0\n\t"
12568         /* A[] * B - Done */
12569         "str	r3, [%[r]], #4\n\t"
12570         "mov	r3, r4\n\t"
12571         "mov	r4, r5\n\t"
12572         "cmp	%[a], r9\n\t"
12573 #ifdef __GNUC__
12574         "blt	1b\n\t"
12575 #else
12576         "blt.n	1b\n\t"
12577 #endif /* __GNUC__ */
12578         "str	r3, [%[r]]\n\t"
12579         : [r] "+r" (r), [a] "+r" (a)
12580         : [b] "r" (b)
12581         : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
12582     );
12583 }
12584 
12585 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
12586 /* r = 2^n mod m where n is the number of bits to reduce by.
12587  * Given m must be 4096 bits, just need to subtract.
12588  *
12589  * r  A single precision number.
12590  * m  A single precision number.
12591  */
sp_4096_mont_norm_128(sp_digit * r,const sp_digit * m)12592 static void sp_4096_mont_norm_128(sp_digit* r, const sp_digit* m)
12593 {
12594     XMEMSET(r, 0, sizeof(sp_digit) * 128);
12595 
12596     /* r = 2^n mod m */
12597     sp_4096_sub_in_place_128(r, m);
12598 }
12599 
12600 #endif /* (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) | WOLFSSL_HAVE_SP_DH */
12601 /* Conditionally subtract b from a using the mask m.
12602  * m is -1 to subtract and 0 when not copying.
12603  *
12604  * r  A single precision number representing condition subtract result.
12605  * a  A single precision number to subtract from.
12606  * b  A single precision number to subtract.
12607  * m  Mask value to apply.
12608  */
sp_4096_cond_sub_128(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)12609 SP_NOINLINE static sp_digit sp_4096_cond_sub_128(sp_digit* r, const sp_digit* a,
12610         const sp_digit* b, sp_digit m)
12611 {
12612     sp_digit c = 0;
12613 
12614     __asm__ __volatile__ (
12615         "mov	r5, #2\n\t"
12616         "lsl	r5, r5, #8\n\t"
12617         "mov	r9, r5\n\t"
12618         "mov	r8, #0\n\t"
12619         "\n1:\n\t"
12620         "ldr	r6, [%[b], r8]\n\t"
12621         "and	r6, r6, %[m]\n\t"
12622         "mov	r5, #0\n\t"
12623         "subs	r5, r5, %[c]\n\t"
12624         "ldr	r5, [%[a], r8]\n\t"
12625         "sbcs	r5, r5, r6\n\t"
12626         "sbcs	%[c], %[c], %[c]\n\t"
12627         "str	r5, [%[r], r8]\n\t"
12628         "add	r8, r8, #4\n\t"
12629         "cmp	r8, r9\n\t"
12630 #ifdef __GNUC__
12631         "blt	1b\n\t"
12632 #else
12633         "blt.n	1b\n\t"
12634 #endif /* __GNUC__ */
12635         : [c] "+r" (c)
12636         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
12637         : "memory", "r5", "r6", "r8", "r9"
12638     );
12639 
12640     return c;
12641 }
12642 
12643 /* Reduce the number back to 4096 bits using Montgomery reduction.
12644  *
12645  * a   A single precision number to reduce in place.
12646  * m   The single precision number representing the modulus.
12647  * mp  The digit representing the negative inverse of m mod 2^n.
12648  */
sp_4096_mont_reduce_128(sp_digit * a,const sp_digit * m,sp_digit mp)12649 SP_NOINLINE static void sp_4096_mont_reduce_128(sp_digit* a, const sp_digit* m,
12650         sp_digit mp)
12651 {
12652     sp_digit ca = 0;
12653 
12654     __asm__ __volatile__ (
12655         "mov	r9, %[mp]\n\t"
12656         "mov	r12, %[m]\n\t"
12657         "mov	r10, %[a]\n\t"
12658         "mov	r4, #0\n\t"
12659         "add	r11, r10, #512\n\t"
12660         "\n1:\n\t"
12661         /* mu = a[i] * mp */
12662         "mov	%[mp], r9\n\t"
12663         "ldr	%[a], [r10]\n\t"
12664         "mul	%[mp], %[mp], %[a]\n\t"
12665         "mov	%[m], r12\n\t"
12666         "add	r14, r10, #504\n\t"
12667         "\n2:\n\t"
12668         /* a[i+j] += m[j] * mu */
12669         "ldr	%[a], [r10]\n\t"
12670         "mov	r5, #0\n\t"
12671         /* Multiply m[j] and mu - Start */
12672         "ldr	r8, [%[m]], #4\n\t"
12673         "umull	r6, r8, %[mp], r8\n\t"
12674         "adds	%[a], %[a], r6\n\t"
12675         "adc	r5, r5, r8\n\t"
12676         /* Multiply m[j] and mu - Done */
12677         "adds	r4, r4, %[a]\n\t"
12678         "adc	r5, r5, #0\n\t"
12679         "str	r4, [r10], #4\n\t"
12680         /* a[i+j+1] += m[j+1] * mu */
12681         "ldr	%[a], [r10]\n\t"
12682         "mov	r4, #0\n\t"
12683         /* Multiply m[j] and mu - Start */
12684         "ldr	r8, [%[m]], #4\n\t"
12685         "umull	r6, r8, %[mp], r8\n\t"
12686         "adds	%[a], %[a], r6\n\t"
12687         "adc	r4, r4, r8\n\t"
12688         /* Multiply m[j] and mu - Done */
12689         "adds	r5, r5, %[a]\n\t"
12690         "adc	r4, r4, #0\n\t"
12691         "str	r5, [r10], #4\n\t"
12692         "cmp	r10, r14\n\t"
12693 #ifdef __GNUC__
12694         "blt	2b\n\t"
12695 #else
12696         "blt.n	2b\n\t"
12697 #endif /* __GNUC__ */
12698         /* a[i+126] += m[126] * mu */
12699         "ldr	%[a], [r10]\n\t"
12700         "mov	r5, #0\n\t"
12701         /* Multiply m[j] and mu - Start */
12702         "ldr	r8, [%[m]], #4\n\t"
12703         "umull	r6, r8, %[mp], r8\n\t"
12704         "adds	%[a], %[a], r6\n\t"
12705         "adc	r5, r5, r8\n\t"
12706         /* Multiply m[j] and mu - Done */
12707         "adds	r4, r4, %[a]\n\t"
12708         "adc	r5, r5, #0\n\t"
12709         "str	r4, [r10], #4\n\t"
12710         /* a[i+127] += m[127] * mu */
12711         "mov	r4, %[ca]\n\t"
12712         "mov	%[ca], #0\n\t"
12713         /* Multiply m[127] and mu - Start */
12714         "ldr	r8, [%[m]]\n\t"
12715         "umull	r6, r8, %[mp], r8\n\t"
12716         "adds	r5, r5, r6\n\t"
12717         "adcs 	r4, r4, r8\n\t"
12718         "adc	%[ca], %[ca], #0\n\t"
12719         /* Multiply m[127] and mu - Done */
12720         "ldr	r6, [r10]\n\t"
12721         "ldr	r8, [r10, #4]\n\t"
12722         "adds	r6, r6, r5\n\t"
12723         "adcs	r8, r8, r4\n\t"
12724         "adc	%[ca], %[ca], #0\n\t"
12725         "str	r6, [r10]\n\t"
12726         "str	r8, [r10, #4]\n\t"
12727         /* Next word in a */
12728         "sub	r10, r10, #504\n\t"
12729         "cmp	r10, r11\n\t"
12730 #ifdef __GNUC__
12731         "blt	1b\n\t"
12732 #else
12733         "blt.n	1b\n\t"
12734 #endif /* __GNUC__ */
12735         "mov	%[a], r10\n\t"
12736         "mov	%[m], r12\n\t"
12737         : [ca] "+r" (ca), [a] "+r" (a)
12738         : [m] "r" (m), [mp] "r" (mp)
12739         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
12740     );
12741 
12742     sp_4096_cond_sub_128(a - 128, a, m, (sp_digit)0 - ca);
12743 }
12744 
12745 /* Multiply two Montgomery form numbers mod the modulus (prime).
12746  * (r = a * b mod m)
12747  *
12748  * r   Result of multiplication.
12749  * a   First number to multiply in Montgomery form.
12750  * b   Second number to multiply in Montgomery form.
12751  * m   Modulus (prime).
12752  * mp  Montgomery mulitplier.
12753  */
sp_4096_mont_mul_128(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)12754 static void sp_4096_mont_mul_128(sp_digit* r, const sp_digit* a,
12755         const sp_digit* b, const sp_digit* m, sp_digit mp)
12756 {
12757     sp_4096_mul_128(r, a, b);
12758     sp_4096_mont_reduce_128(r, m, mp);
12759 }
12760 
12761 /* Square the Montgomery form number. (r = a * a mod m)
12762  *
12763  * r   Result of squaring.
12764  * a   Number to square in Montgomery form.
12765  * m   Modulus (prime).
12766  * mp  Montgomery mulitplier.
12767  */
sp_4096_mont_sqr_128(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)12768 static void sp_4096_mont_sqr_128(sp_digit* r, const sp_digit* a,
12769         const sp_digit* m, sp_digit mp)
12770 {
12771     sp_4096_sqr_128(r, a);
12772     sp_4096_mont_reduce_128(r, m, mp);
12773 }
12774 
12775 #ifdef WOLFSSL_SP_SMALL
12776 /* Sub b from a into r. (r = a - b)
12777  *
12778  * r  A single precision integer.
12779  * a  A single precision integer.
12780  * b  A single precision integer.
12781  */
sp_4096_sub_128(sp_digit * r,const sp_digit * a,const sp_digit * b)12782 SP_NOINLINE static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a,
12783         const sp_digit* b)
12784 {
12785     sp_digit c = 0;
12786 
12787     __asm__ __volatile__ (
12788         "mov	r6, %[a]\n\t"
12789         "mov	r5, #2\n\t"
12790         "lsl	r5, r5, #8\n\t"
12791         "add	r6, r6, r5\n\t"
12792         "\n1:\n\t"
12793         "mov	r5, #0\n\t"
12794         "subs	r5, r5, %[c]\n\t"
12795         "ldr	r4, [%[a]]\n\t"
12796         "ldr	r5, [%[b]]\n\t"
12797         "sbcs	r4, r4, r5\n\t"
12798         "str	r4, [%[r]]\n\t"
12799         "sbc	%[c], %[c], %[c]\n\t"
12800         "add	%[a], %[a], #4\n\t"
12801         "add	%[b], %[b], #4\n\t"
12802         "add	%[r], %[r], #4\n\t"
12803         "cmp	%[a], r6\n\t"
12804 #ifdef __GNUC__
12805         "bne	1b\n\t"
12806 #else
12807         "bne.n	1b\n\t"
12808 #endif /* __GNUC__ */
12809         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
12810         :
12811         : "memory", "r4", "r5", "r6"
12812     );
12813 
12814     return c;
12815 }
12816 
12817 #else
12818 /* Sub b from a into r. (r = a - b)
12819  *
12820  * r  A single precision integer.
12821  * a  A single precision integer.
12822  * b  A single precision integer.
12823  */
sp_4096_sub_128(sp_digit * r,const sp_digit * a,const sp_digit * b)12824 SP_NOINLINE static sp_digit sp_4096_sub_128(sp_digit* r, const sp_digit* a,
12825         const sp_digit* b)
12826 {
12827     sp_digit c = 0;
12828 
12829     __asm__ __volatile__ (
12830         "ldr	r4, [%[a], #0]\n\t"
12831         "ldr	r5, [%[a], #4]\n\t"
12832         "ldr	r6, [%[b], #0]\n\t"
12833         "ldr	r8, [%[b], #4]\n\t"
12834         "subs	r4, r4, r6\n\t"
12835         "sbcs	r5, r5, r8\n\t"
12836         "str	r4, [%[r], #0]\n\t"
12837         "str	r5, [%[r], #4]\n\t"
12838         "ldr	r4, [%[a], #8]\n\t"
12839         "ldr	r5, [%[a], #12]\n\t"
12840         "ldr	r6, [%[b], #8]\n\t"
12841         "ldr	r8, [%[b], #12]\n\t"
12842         "sbcs	r4, r4, r6\n\t"
12843         "sbcs	r5, r5, r8\n\t"
12844         "str	r4, [%[r], #8]\n\t"
12845         "str	r5, [%[r], #12]\n\t"
12846         "ldr	r4, [%[a], #16]\n\t"
12847         "ldr	r5, [%[a], #20]\n\t"
12848         "ldr	r6, [%[b], #16]\n\t"
12849         "ldr	r8, [%[b], #20]\n\t"
12850         "sbcs	r4, r4, r6\n\t"
12851         "sbcs	r5, r5, r8\n\t"
12852         "str	r4, [%[r], #16]\n\t"
12853         "str	r5, [%[r], #20]\n\t"
12854         "ldr	r4, [%[a], #24]\n\t"
12855         "ldr	r5, [%[a], #28]\n\t"
12856         "ldr	r6, [%[b], #24]\n\t"
12857         "ldr	r8, [%[b], #28]\n\t"
12858         "sbcs	r4, r4, r6\n\t"
12859         "sbcs	r5, r5, r8\n\t"
12860         "str	r4, [%[r], #24]\n\t"
12861         "str	r5, [%[r], #28]\n\t"
12862         "ldr	r4, [%[a], #32]\n\t"
12863         "ldr	r5, [%[a], #36]\n\t"
12864         "ldr	r6, [%[b], #32]\n\t"
12865         "ldr	r8, [%[b], #36]\n\t"
12866         "sbcs	r4, r4, r6\n\t"
12867         "sbcs	r5, r5, r8\n\t"
12868         "str	r4, [%[r], #32]\n\t"
12869         "str	r5, [%[r], #36]\n\t"
12870         "ldr	r4, [%[a], #40]\n\t"
12871         "ldr	r5, [%[a], #44]\n\t"
12872         "ldr	r6, [%[b], #40]\n\t"
12873         "ldr	r8, [%[b], #44]\n\t"
12874         "sbcs	r4, r4, r6\n\t"
12875         "sbcs	r5, r5, r8\n\t"
12876         "str	r4, [%[r], #40]\n\t"
12877         "str	r5, [%[r], #44]\n\t"
12878         "ldr	r4, [%[a], #48]\n\t"
12879         "ldr	r5, [%[a], #52]\n\t"
12880         "ldr	r6, [%[b], #48]\n\t"
12881         "ldr	r8, [%[b], #52]\n\t"
12882         "sbcs	r4, r4, r6\n\t"
12883         "sbcs	r5, r5, r8\n\t"
12884         "str	r4, [%[r], #48]\n\t"
12885         "str	r5, [%[r], #52]\n\t"
12886         "ldr	r4, [%[a], #56]\n\t"
12887         "ldr	r5, [%[a], #60]\n\t"
12888         "ldr	r6, [%[b], #56]\n\t"
12889         "ldr	r8, [%[b], #60]\n\t"
12890         "sbcs	r4, r4, r6\n\t"
12891         "sbcs	r5, r5, r8\n\t"
12892         "str	r4, [%[r], #56]\n\t"
12893         "str	r5, [%[r], #60]\n\t"
12894         "ldr	r4, [%[a], #64]\n\t"
12895         "ldr	r5, [%[a], #68]\n\t"
12896         "ldr	r6, [%[b], #64]\n\t"
12897         "ldr	r8, [%[b], #68]\n\t"
12898         "sbcs	r4, r4, r6\n\t"
12899         "sbcs	r5, r5, r8\n\t"
12900         "str	r4, [%[r], #64]\n\t"
12901         "str	r5, [%[r], #68]\n\t"
12902         "ldr	r4, [%[a], #72]\n\t"
12903         "ldr	r5, [%[a], #76]\n\t"
12904         "ldr	r6, [%[b], #72]\n\t"
12905         "ldr	r8, [%[b], #76]\n\t"
12906         "sbcs	r4, r4, r6\n\t"
12907         "sbcs	r5, r5, r8\n\t"
12908         "str	r4, [%[r], #72]\n\t"
12909         "str	r5, [%[r], #76]\n\t"
12910         "ldr	r4, [%[a], #80]\n\t"
12911         "ldr	r5, [%[a], #84]\n\t"
12912         "ldr	r6, [%[b], #80]\n\t"
12913         "ldr	r8, [%[b], #84]\n\t"
12914         "sbcs	r4, r4, r6\n\t"
12915         "sbcs	r5, r5, r8\n\t"
12916         "str	r4, [%[r], #80]\n\t"
12917         "str	r5, [%[r], #84]\n\t"
12918         "ldr	r4, [%[a], #88]\n\t"
12919         "ldr	r5, [%[a], #92]\n\t"
12920         "ldr	r6, [%[b], #88]\n\t"
12921         "ldr	r8, [%[b], #92]\n\t"
12922         "sbcs	r4, r4, r6\n\t"
12923         "sbcs	r5, r5, r8\n\t"
12924         "str	r4, [%[r], #88]\n\t"
12925         "str	r5, [%[r], #92]\n\t"
12926         "ldr	r4, [%[a], #96]\n\t"
12927         "ldr	r5, [%[a], #100]\n\t"
12928         "ldr	r6, [%[b], #96]\n\t"
12929         "ldr	r8, [%[b], #100]\n\t"
12930         "sbcs	r4, r4, r6\n\t"
12931         "sbcs	r5, r5, r8\n\t"
12932         "str	r4, [%[r], #96]\n\t"
12933         "str	r5, [%[r], #100]\n\t"
12934         "ldr	r4, [%[a], #104]\n\t"
12935         "ldr	r5, [%[a], #108]\n\t"
12936         "ldr	r6, [%[b], #104]\n\t"
12937         "ldr	r8, [%[b], #108]\n\t"
12938         "sbcs	r4, r4, r6\n\t"
12939         "sbcs	r5, r5, r8\n\t"
12940         "str	r4, [%[r], #104]\n\t"
12941         "str	r5, [%[r], #108]\n\t"
12942         "ldr	r4, [%[a], #112]\n\t"
12943         "ldr	r5, [%[a], #116]\n\t"
12944         "ldr	r6, [%[b], #112]\n\t"
12945         "ldr	r8, [%[b], #116]\n\t"
12946         "sbcs	r4, r4, r6\n\t"
12947         "sbcs	r5, r5, r8\n\t"
12948         "str	r4, [%[r], #112]\n\t"
12949         "str	r5, [%[r], #116]\n\t"
12950         "ldr	r4, [%[a], #120]\n\t"
12951         "ldr	r5, [%[a], #124]\n\t"
12952         "ldr	r6, [%[b], #120]\n\t"
12953         "ldr	r8, [%[b], #124]\n\t"
12954         "sbcs	r4, r4, r6\n\t"
12955         "sbcs	r5, r5, r8\n\t"
12956         "str	r4, [%[r], #120]\n\t"
12957         "str	r5, [%[r], #124]\n\t"
12958         "sbc	%[c], %[c], %[c]\n\t"
12959         "add	%[a], %[a], #0x80\n\t"
12960         "add	%[b], %[b], #0x80\n\t"
12961         "add	%[r], %[r], #0x80\n\t"
12962         "mov	r6, #0\n\t"
12963         "sub	r6, r6, %[c]\n\t"
12964         "ldr	r4, [%[a], #0]\n\t"
12965         "ldr	r5, [%[a], #4]\n\t"
12966         "ldr	r6, [%[b], #0]\n\t"
12967         "ldr	r8, [%[b], #4]\n\t"
12968         "sbcs	r4, r4, r6\n\t"
12969         "sbcs	r5, r5, r8\n\t"
12970         "str	r4, [%[r], #0]\n\t"
12971         "str	r5, [%[r], #4]\n\t"
12972         "ldr	r4, [%[a], #8]\n\t"
12973         "ldr	r5, [%[a], #12]\n\t"
12974         "ldr	r6, [%[b], #8]\n\t"
12975         "ldr	r8, [%[b], #12]\n\t"
12976         "sbcs	r4, r4, r6\n\t"
12977         "sbcs	r5, r5, r8\n\t"
12978         "str	r4, [%[r], #8]\n\t"
12979         "str	r5, [%[r], #12]\n\t"
12980         "ldr	r4, [%[a], #16]\n\t"
12981         "ldr	r5, [%[a], #20]\n\t"
12982         "ldr	r6, [%[b], #16]\n\t"
12983         "ldr	r8, [%[b], #20]\n\t"
12984         "sbcs	r4, r4, r6\n\t"
12985         "sbcs	r5, r5, r8\n\t"
12986         "str	r4, [%[r], #16]\n\t"
12987         "str	r5, [%[r], #20]\n\t"
12988         "ldr	r4, [%[a], #24]\n\t"
12989         "ldr	r5, [%[a], #28]\n\t"
12990         "ldr	r6, [%[b], #24]\n\t"
12991         "ldr	r8, [%[b], #28]\n\t"
12992         "sbcs	r4, r4, r6\n\t"
12993         "sbcs	r5, r5, r8\n\t"
12994         "str	r4, [%[r], #24]\n\t"
12995         "str	r5, [%[r], #28]\n\t"
12996         "ldr	r4, [%[a], #32]\n\t"
12997         "ldr	r5, [%[a], #36]\n\t"
12998         "ldr	r6, [%[b], #32]\n\t"
12999         "ldr	r8, [%[b], #36]\n\t"
13000         "sbcs	r4, r4, r6\n\t"
13001         "sbcs	r5, r5, r8\n\t"
13002         "str	r4, [%[r], #32]\n\t"
13003         "str	r5, [%[r], #36]\n\t"
13004         "ldr	r4, [%[a], #40]\n\t"
13005         "ldr	r5, [%[a], #44]\n\t"
13006         "ldr	r6, [%[b], #40]\n\t"
13007         "ldr	r8, [%[b], #44]\n\t"
13008         "sbcs	r4, r4, r6\n\t"
13009         "sbcs	r5, r5, r8\n\t"
13010         "str	r4, [%[r], #40]\n\t"
13011         "str	r5, [%[r], #44]\n\t"
13012         "ldr	r4, [%[a], #48]\n\t"
13013         "ldr	r5, [%[a], #52]\n\t"
13014         "ldr	r6, [%[b], #48]\n\t"
13015         "ldr	r8, [%[b], #52]\n\t"
13016         "sbcs	r4, r4, r6\n\t"
13017         "sbcs	r5, r5, r8\n\t"
13018         "str	r4, [%[r], #48]\n\t"
13019         "str	r5, [%[r], #52]\n\t"
13020         "ldr	r4, [%[a], #56]\n\t"
13021         "ldr	r5, [%[a], #60]\n\t"
13022         "ldr	r6, [%[b], #56]\n\t"
13023         "ldr	r8, [%[b], #60]\n\t"
13024         "sbcs	r4, r4, r6\n\t"
13025         "sbcs	r5, r5, r8\n\t"
13026         "str	r4, [%[r], #56]\n\t"
13027         "str	r5, [%[r], #60]\n\t"
13028         "ldr	r4, [%[a], #64]\n\t"
13029         "ldr	r5, [%[a], #68]\n\t"
13030         "ldr	r6, [%[b], #64]\n\t"
13031         "ldr	r8, [%[b], #68]\n\t"
13032         "sbcs	r4, r4, r6\n\t"
13033         "sbcs	r5, r5, r8\n\t"
13034         "str	r4, [%[r], #64]\n\t"
13035         "str	r5, [%[r], #68]\n\t"
13036         "ldr	r4, [%[a], #72]\n\t"
13037         "ldr	r5, [%[a], #76]\n\t"
13038         "ldr	r6, [%[b], #72]\n\t"
13039         "ldr	r8, [%[b], #76]\n\t"
13040         "sbcs	r4, r4, r6\n\t"
13041         "sbcs	r5, r5, r8\n\t"
13042         "str	r4, [%[r], #72]\n\t"
13043         "str	r5, [%[r], #76]\n\t"
13044         "ldr	r4, [%[a], #80]\n\t"
13045         "ldr	r5, [%[a], #84]\n\t"
13046         "ldr	r6, [%[b], #80]\n\t"
13047         "ldr	r8, [%[b], #84]\n\t"
13048         "sbcs	r4, r4, r6\n\t"
13049         "sbcs	r5, r5, r8\n\t"
13050         "str	r4, [%[r], #80]\n\t"
13051         "str	r5, [%[r], #84]\n\t"
13052         "ldr	r4, [%[a], #88]\n\t"
13053         "ldr	r5, [%[a], #92]\n\t"
13054         "ldr	r6, [%[b], #88]\n\t"
13055         "ldr	r8, [%[b], #92]\n\t"
13056         "sbcs	r4, r4, r6\n\t"
13057         "sbcs	r5, r5, r8\n\t"
13058         "str	r4, [%[r], #88]\n\t"
13059         "str	r5, [%[r], #92]\n\t"
13060         "ldr	r4, [%[a], #96]\n\t"
13061         "ldr	r5, [%[a], #100]\n\t"
13062         "ldr	r6, [%[b], #96]\n\t"
13063         "ldr	r8, [%[b], #100]\n\t"
13064         "sbcs	r4, r4, r6\n\t"
13065         "sbcs	r5, r5, r8\n\t"
13066         "str	r4, [%[r], #96]\n\t"
13067         "str	r5, [%[r], #100]\n\t"
13068         "ldr	r4, [%[a], #104]\n\t"
13069         "ldr	r5, [%[a], #108]\n\t"
13070         "ldr	r6, [%[b], #104]\n\t"
13071         "ldr	r8, [%[b], #108]\n\t"
13072         "sbcs	r4, r4, r6\n\t"
13073         "sbcs	r5, r5, r8\n\t"
13074         "str	r4, [%[r], #104]\n\t"
13075         "str	r5, [%[r], #108]\n\t"
13076         "ldr	r4, [%[a], #112]\n\t"
13077         "ldr	r5, [%[a], #116]\n\t"
13078         "ldr	r6, [%[b], #112]\n\t"
13079         "ldr	r8, [%[b], #116]\n\t"
13080         "sbcs	r4, r4, r6\n\t"
13081         "sbcs	r5, r5, r8\n\t"
13082         "str	r4, [%[r], #112]\n\t"
13083         "str	r5, [%[r], #116]\n\t"
13084         "ldr	r4, [%[a], #120]\n\t"
13085         "ldr	r5, [%[a], #124]\n\t"
13086         "ldr	r6, [%[b], #120]\n\t"
13087         "ldr	r8, [%[b], #124]\n\t"
13088         "sbcs	r4, r4, r6\n\t"
13089         "sbcs	r5, r5, r8\n\t"
13090         "str	r4, [%[r], #120]\n\t"
13091         "str	r5, [%[r], #124]\n\t"
13092         "sbc	%[c], %[c], %[c]\n\t"
13093         "add	%[a], %[a], #0x80\n\t"
13094         "add	%[b], %[b], #0x80\n\t"
13095         "add	%[r], %[r], #0x80\n\t"
13096         "mov	r6, #0\n\t"
13097         "sub	r6, r6, %[c]\n\t"
13098         "ldr	r4, [%[a], #0]\n\t"
13099         "ldr	r5, [%[a], #4]\n\t"
13100         "ldr	r6, [%[b], #0]\n\t"
13101         "ldr	r8, [%[b], #4]\n\t"
13102         "sbcs	r4, r4, r6\n\t"
13103         "sbcs	r5, r5, r8\n\t"
13104         "str	r4, [%[r], #0]\n\t"
13105         "str	r5, [%[r], #4]\n\t"
13106         "ldr	r4, [%[a], #8]\n\t"
13107         "ldr	r5, [%[a], #12]\n\t"
13108         "ldr	r6, [%[b], #8]\n\t"
13109         "ldr	r8, [%[b], #12]\n\t"
13110         "sbcs	r4, r4, r6\n\t"
13111         "sbcs	r5, r5, r8\n\t"
13112         "str	r4, [%[r], #8]\n\t"
13113         "str	r5, [%[r], #12]\n\t"
13114         "ldr	r4, [%[a], #16]\n\t"
13115         "ldr	r5, [%[a], #20]\n\t"
13116         "ldr	r6, [%[b], #16]\n\t"
13117         "ldr	r8, [%[b], #20]\n\t"
13118         "sbcs	r4, r4, r6\n\t"
13119         "sbcs	r5, r5, r8\n\t"
13120         "str	r4, [%[r], #16]\n\t"
13121         "str	r5, [%[r], #20]\n\t"
13122         "ldr	r4, [%[a], #24]\n\t"
13123         "ldr	r5, [%[a], #28]\n\t"
13124         "ldr	r6, [%[b], #24]\n\t"
13125         "ldr	r8, [%[b], #28]\n\t"
13126         "sbcs	r4, r4, r6\n\t"
13127         "sbcs	r5, r5, r8\n\t"
13128         "str	r4, [%[r], #24]\n\t"
13129         "str	r5, [%[r], #28]\n\t"
13130         "ldr	r4, [%[a], #32]\n\t"
13131         "ldr	r5, [%[a], #36]\n\t"
13132         "ldr	r6, [%[b], #32]\n\t"
13133         "ldr	r8, [%[b], #36]\n\t"
13134         "sbcs	r4, r4, r6\n\t"
13135         "sbcs	r5, r5, r8\n\t"
13136         "str	r4, [%[r], #32]\n\t"
13137         "str	r5, [%[r], #36]\n\t"
13138         "ldr	r4, [%[a], #40]\n\t"
13139         "ldr	r5, [%[a], #44]\n\t"
13140         "ldr	r6, [%[b], #40]\n\t"
13141         "ldr	r8, [%[b], #44]\n\t"
13142         "sbcs	r4, r4, r6\n\t"
13143         "sbcs	r5, r5, r8\n\t"
13144         "str	r4, [%[r], #40]\n\t"
13145         "str	r5, [%[r], #44]\n\t"
13146         "ldr	r4, [%[a], #48]\n\t"
13147         "ldr	r5, [%[a], #52]\n\t"
13148         "ldr	r6, [%[b], #48]\n\t"
13149         "ldr	r8, [%[b], #52]\n\t"
13150         "sbcs	r4, r4, r6\n\t"
13151         "sbcs	r5, r5, r8\n\t"
13152         "str	r4, [%[r], #48]\n\t"
13153         "str	r5, [%[r], #52]\n\t"
13154         "ldr	r4, [%[a], #56]\n\t"
13155         "ldr	r5, [%[a], #60]\n\t"
13156         "ldr	r6, [%[b], #56]\n\t"
13157         "ldr	r8, [%[b], #60]\n\t"
13158         "sbcs	r4, r4, r6\n\t"
13159         "sbcs	r5, r5, r8\n\t"
13160         "str	r4, [%[r], #56]\n\t"
13161         "str	r5, [%[r], #60]\n\t"
13162         "ldr	r4, [%[a], #64]\n\t"
13163         "ldr	r5, [%[a], #68]\n\t"
13164         "ldr	r6, [%[b], #64]\n\t"
13165         "ldr	r8, [%[b], #68]\n\t"
13166         "sbcs	r4, r4, r6\n\t"
13167         "sbcs	r5, r5, r8\n\t"
13168         "str	r4, [%[r], #64]\n\t"
13169         "str	r5, [%[r], #68]\n\t"
13170         "ldr	r4, [%[a], #72]\n\t"
13171         "ldr	r5, [%[a], #76]\n\t"
13172         "ldr	r6, [%[b], #72]\n\t"
13173         "ldr	r8, [%[b], #76]\n\t"
13174         "sbcs	r4, r4, r6\n\t"
13175         "sbcs	r5, r5, r8\n\t"
13176         "str	r4, [%[r], #72]\n\t"
13177         "str	r5, [%[r], #76]\n\t"
13178         "ldr	r4, [%[a], #80]\n\t"
13179         "ldr	r5, [%[a], #84]\n\t"
13180         "ldr	r6, [%[b], #80]\n\t"
13181         "ldr	r8, [%[b], #84]\n\t"
13182         "sbcs	r4, r4, r6\n\t"
13183         "sbcs	r5, r5, r8\n\t"
13184         "str	r4, [%[r], #80]\n\t"
13185         "str	r5, [%[r], #84]\n\t"
13186         "ldr	r4, [%[a], #88]\n\t"
13187         "ldr	r5, [%[a], #92]\n\t"
13188         "ldr	r6, [%[b], #88]\n\t"
13189         "ldr	r8, [%[b], #92]\n\t"
13190         "sbcs	r4, r4, r6\n\t"
13191         "sbcs	r5, r5, r8\n\t"
13192         "str	r4, [%[r], #88]\n\t"
13193         "str	r5, [%[r], #92]\n\t"
13194         "ldr	r4, [%[a], #96]\n\t"
13195         "ldr	r5, [%[a], #100]\n\t"
13196         "ldr	r6, [%[b], #96]\n\t"
13197         "ldr	r8, [%[b], #100]\n\t"
13198         "sbcs	r4, r4, r6\n\t"
13199         "sbcs	r5, r5, r8\n\t"
13200         "str	r4, [%[r], #96]\n\t"
13201         "str	r5, [%[r], #100]\n\t"
13202         "ldr	r4, [%[a], #104]\n\t"
13203         "ldr	r5, [%[a], #108]\n\t"
13204         "ldr	r6, [%[b], #104]\n\t"
13205         "ldr	r8, [%[b], #108]\n\t"
13206         "sbcs	r4, r4, r6\n\t"
13207         "sbcs	r5, r5, r8\n\t"
13208         "str	r4, [%[r], #104]\n\t"
13209         "str	r5, [%[r], #108]\n\t"
13210         "ldr	r4, [%[a], #112]\n\t"
13211         "ldr	r5, [%[a], #116]\n\t"
13212         "ldr	r6, [%[b], #112]\n\t"
13213         "ldr	r8, [%[b], #116]\n\t"
13214         "sbcs	r4, r4, r6\n\t"
13215         "sbcs	r5, r5, r8\n\t"
13216         "str	r4, [%[r], #112]\n\t"
13217         "str	r5, [%[r], #116]\n\t"
13218         "ldr	r4, [%[a], #120]\n\t"
13219         "ldr	r5, [%[a], #124]\n\t"
13220         "ldr	r6, [%[b], #120]\n\t"
13221         "ldr	r8, [%[b], #124]\n\t"
13222         "sbcs	r4, r4, r6\n\t"
13223         "sbcs	r5, r5, r8\n\t"
13224         "str	r4, [%[r], #120]\n\t"
13225         "str	r5, [%[r], #124]\n\t"
13226         "sbc	%[c], %[c], %[c]\n\t"
13227         "add	%[a], %[a], #0x80\n\t"
13228         "add	%[b], %[b], #0x80\n\t"
13229         "add	%[r], %[r], #0x80\n\t"
13230         "mov	r6, #0\n\t"
13231         "sub	r6, r6, %[c]\n\t"
13232         "ldr	r4, [%[a], #0]\n\t"
13233         "ldr	r5, [%[a], #4]\n\t"
13234         "ldr	r6, [%[b], #0]\n\t"
13235         "ldr	r8, [%[b], #4]\n\t"
13236         "sbcs	r4, r4, r6\n\t"
13237         "sbcs	r5, r5, r8\n\t"
13238         "str	r4, [%[r], #0]\n\t"
13239         "str	r5, [%[r], #4]\n\t"
13240         "ldr	r4, [%[a], #8]\n\t"
13241         "ldr	r5, [%[a], #12]\n\t"
13242         "ldr	r6, [%[b], #8]\n\t"
13243         "ldr	r8, [%[b], #12]\n\t"
13244         "sbcs	r4, r4, r6\n\t"
13245         "sbcs	r5, r5, r8\n\t"
13246         "str	r4, [%[r], #8]\n\t"
13247         "str	r5, [%[r], #12]\n\t"
13248         "ldr	r4, [%[a], #16]\n\t"
13249         "ldr	r5, [%[a], #20]\n\t"
13250         "ldr	r6, [%[b], #16]\n\t"
13251         "ldr	r8, [%[b], #20]\n\t"
13252         "sbcs	r4, r4, r6\n\t"
13253         "sbcs	r5, r5, r8\n\t"
13254         "str	r4, [%[r], #16]\n\t"
13255         "str	r5, [%[r], #20]\n\t"
13256         "ldr	r4, [%[a], #24]\n\t"
13257         "ldr	r5, [%[a], #28]\n\t"
13258         "ldr	r6, [%[b], #24]\n\t"
13259         "ldr	r8, [%[b], #28]\n\t"
13260         "sbcs	r4, r4, r6\n\t"
13261         "sbcs	r5, r5, r8\n\t"
13262         "str	r4, [%[r], #24]\n\t"
13263         "str	r5, [%[r], #28]\n\t"
13264         "ldr	r4, [%[a], #32]\n\t"
13265         "ldr	r5, [%[a], #36]\n\t"
13266         "ldr	r6, [%[b], #32]\n\t"
13267         "ldr	r8, [%[b], #36]\n\t"
13268         "sbcs	r4, r4, r6\n\t"
13269         "sbcs	r5, r5, r8\n\t"
13270         "str	r4, [%[r], #32]\n\t"
13271         "str	r5, [%[r], #36]\n\t"
13272         "ldr	r4, [%[a], #40]\n\t"
13273         "ldr	r5, [%[a], #44]\n\t"
13274         "ldr	r6, [%[b], #40]\n\t"
13275         "ldr	r8, [%[b], #44]\n\t"
13276         "sbcs	r4, r4, r6\n\t"
13277         "sbcs	r5, r5, r8\n\t"
13278         "str	r4, [%[r], #40]\n\t"
13279         "str	r5, [%[r], #44]\n\t"
13280         "ldr	r4, [%[a], #48]\n\t"
13281         "ldr	r5, [%[a], #52]\n\t"
13282         "ldr	r6, [%[b], #48]\n\t"
13283         "ldr	r8, [%[b], #52]\n\t"
13284         "sbcs	r4, r4, r6\n\t"
13285         "sbcs	r5, r5, r8\n\t"
13286         "str	r4, [%[r], #48]\n\t"
13287         "str	r5, [%[r], #52]\n\t"
13288         "ldr	r4, [%[a], #56]\n\t"
13289         "ldr	r5, [%[a], #60]\n\t"
13290         "ldr	r6, [%[b], #56]\n\t"
13291         "ldr	r8, [%[b], #60]\n\t"
13292         "sbcs	r4, r4, r6\n\t"
13293         "sbcs	r5, r5, r8\n\t"
13294         "str	r4, [%[r], #56]\n\t"
13295         "str	r5, [%[r], #60]\n\t"
13296         "ldr	r4, [%[a], #64]\n\t"
13297         "ldr	r5, [%[a], #68]\n\t"
13298         "ldr	r6, [%[b], #64]\n\t"
13299         "ldr	r8, [%[b], #68]\n\t"
13300         "sbcs	r4, r4, r6\n\t"
13301         "sbcs	r5, r5, r8\n\t"
13302         "str	r4, [%[r], #64]\n\t"
13303         "str	r5, [%[r], #68]\n\t"
13304         "ldr	r4, [%[a], #72]\n\t"
13305         "ldr	r5, [%[a], #76]\n\t"
13306         "ldr	r6, [%[b], #72]\n\t"
13307         "ldr	r8, [%[b], #76]\n\t"
13308         "sbcs	r4, r4, r6\n\t"
13309         "sbcs	r5, r5, r8\n\t"
13310         "str	r4, [%[r], #72]\n\t"
13311         "str	r5, [%[r], #76]\n\t"
13312         "ldr	r4, [%[a], #80]\n\t"
13313         "ldr	r5, [%[a], #84]\n\t"
13314         "ldr	r6, [%[b], #80]\n\t"
13315         "ldr	r8, [%[b], #84]\n\t"
13316         "sbcs	r4, r4, r6\n\t"
13317         "sbcs	r5, r5, r8\n\t"
13318         "str	r4, [%[r], #80]\n\t"
13319         "str	r5, [%[r], #84]\n\t"
13320         "ldr	r4, [%[a], #88]\n\t"
13321         "ldr	r5, [%[a], #92]\n\t"
13322         "ldr	r6, [%[b], #88]\n\t"
13323         "ldr	r8, [%[b], #92]\n\t"
13324         "sbcs	r4, r4, r6\n\t"
13325         "sbcs	r5, r5, r8\n\t"
13326         "str	r4, [%[r], #88]\n\t"
13327         "str	r5, [%[r], #92]\n\t"
13328         "ldr	r4, [%[a], #96]\n\t"
13329         "ldr	r5, [%[a], #100]\n\t"
13330         "ldr	r6, [%[b], #96]\n\t"
13331         "ldr	r8, [%[b], #100]\n\t"
13332         "sbcs	r4, r4, r6\n\t"
13333         "sbcs	r5, r5, r8\n\t"
13334         "str	r4, [%[r], #96]\n\t"
13335         "str	r5, [%[r], #100]\n\t"
13336         "ldr	r4, [%[a], #104]\n\t"
13337         "ldr	r5, [%[a], #108]\n\t"
13338         "ldr	r6, [%[b], #104]\n\t"
13339         "ldr	r8, [%[b], #108]\n\t"
13340         "sbcs	r4, r4, r6\n\t"
13341         "sbcs	r5, r5, r8\n\t"
13342         "str	r4, [%[r], #104]\n\t"
13343         "str	r5, [%[r], #108]\n\t"
13344         "ldr	r4, [%[a], #112]\n\t"
13345         "ldr	r5, [%[a], #116]\n\t"
13346         "ldr	r6, [%[b], #112]\n\t"
13347         "ldr	r8, [%[b], #116]\n\t"
13348         "sbcs	r4, r4, r6\n\t"
13349         "sbcs	r5, r5, r8\n\t"
13350         "str	r4, [%[r], #112]\n\t"
13351         "str	r5, [%[r], #116]\n\t"
13352         "ldr	r4, [%[a], #120]\n\t"
13353         "ldr	r5, [%[a], #124]\n\t"
13354         "ldr	r6, [%[b], #120]\n\t"
13355         "ldr	r8, [%[b], #124]\n\t"
13356         "sbcs	r4, r4, r6\n\t"
13357         "sbcs	r5, r5, r8\n\t"
13358         "str	r4, [%[r], #120]\n\t"
13359         "str	r5, [%[r], #124]\n\t"
13360         "sbc	%[c], %[c], %[c]\n\t"
13361         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
13362         :
13363         : "memory", "r4", "r5", "r6", "r8"
13364     );
13365 
13366     return c;
13367 }
13368 
13369 #endif /* WOLFSSL_SP_SMALL */
13370 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
13371  *
13372  * d1   The high order half of the number to divide.
13373  * d0   The low order half of the number to divide.
13374  * div  The dividend.
13375  * returns the result of the division.
13376  *
13377  * Note that this is an approximate div. It may give an answer 1 larger.
13378  */
div_4096_word_128(sp_digit d1,sp_digit d0,sp_digit div)13379 SP_NOINLINE static sp_digit div_4096_word_128(sp_digit d1, sp_digit d0,
13380         sp_digit div)
13381 {
13382     sp_digit r = 0;
13383 
13384     __asm__ __volatile__ (
13385         "lsr	r6, %[div], #16\n\t"
13386         "add	r6, r6, #1\n\t"
13387         "udiv	r4, %[d1], r6\n\t"
13388         "lsl	r8, r4, #16\n\t"
13389         "umull	r4, r5, %[div], r8\n\t"
13390         "subs	%[d0], %[d0], r4\n\t"
13391         "sbc	%[d1], %[d1], r5\n\t"
13392         "udiv	r5, %[d1], r6\n\t"
13393         "lsl	r4, r5, #16\n\t"
13394         "add	r8, r8, r4\n\t"
13395         "umull	r4, r5, %[div], r4\n\t"
13396         "subs	%[d0], %[d0], r4\n\t"
13397         "sbc	%[d1], %[d1], r5\n\t"
13398         "lsl	r4, %[d1], #16\n\t"
13399         "orr	r4, r4, %[d0], lsr #16\n\t"
13400         "udiv	r4, r4, r6\n\t"
13401         "add	r8, r8, r4\n\t"
13402         "umull	r4, r5, %[div], r4\n\t"
13403         "subs	%[d0], %[d0], r4\n\t"
13404         "sbc	%[d1], %[d1], r5\n\t"
13405         "lsl	r4, %[d1], #16\n\t"
13406         "orr	r4, r4, %[d0], lsr #16\n\t"
13407         "udiv	r4, r4, r6\n\t"
13408         "add	r8, r8, r4\n\t"
13409         "umull	r4, r5, %[div], r4\n\t"
13410         "subs	%[d0], %[d0], r4\n\t"
13411         "sbc	%[d1], %[d1], r5\n\t"
13412         "udiv	r4, %[d0], %[div]\n\t"
13413         "add	r8, r8, r4\n\t"
13414         "mov	%[r], r8\n\t"
13415         : [r] "+r" (r)
13416         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
13417         : "r4", "r5", "r6", "r8"
13418     );
13419     return r;
13420 }
13421 
13422 /* Divide d in a and put remainder into r (m*d + r = a)
13423  * m is not calculated as it is not needed at this time.
13424  *
13425  * a  Number to be divided.
13426  * d  Number to divide with.
13427  * m  Multiplier result.
13428  * r  Remainder from the division.
13429  * returns MP_OKAY indicating success.
13430  */
sp_4096_div_128_cond(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)13431 static WC_INLINE int sp_4096_div_128_cond(const sp_digit* a, const sp_digit* d, sp_digit* m,
13432         sp_digit* r)
13433 {
13434     sp_digit t1[256], t2[129];
13435     sp_digit div, r1;
13436     int i;
13437 
13438     (void)m;
13439 
13440     div = d[127];
13441     XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
13442     for (i=127; i>=0; i--) {
13443         sp_digit hi = t1[128 + i] - (t1[128 + i] == div);
13444         r1 = div_4096_word_128(hi, t1[128 + i - 1], div);
13445 
13446         sp_4096_mul_d_128(t2, d, r1);
13447         t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
13448         t1[128 + i] -= t2[128];
13449         if (t1[128 + i] != 0) {
13450             t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
13451             if (t1[128 + i] != 0)
13452                 t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], d);
13453         }
13454     }
13455 
13456     for (i = 127; i > 0; i--) {
13457         if (t1[i] != d[i])
13458             break;
13459     }
13460     if (t1[i] >= d[i]) {
13461         sp_4096_sub_128(r, t1, d);
13462     }
13463     else {
13464         XMEMCPY(r, t1, sizeof(*t1) * 128);
13465     }
13466 
13467     return MP_OKAY;
13468 }
13469 
13470 /* Reduce a modulo m into r. (r = a mod m)
13471  *
13472  * r  A single precision number that is the reduced result.
13473  * a  A single precision number that is to be reduced.
13474  * m  A single precision number that is the modulus to reduce with.
13475  * returns MP_OKAY indicating success.
13476  */
sp_4096_mod_128_cond(sp_digit * r,const sp_digit * a,const sp_digit * m)13477 static WC_INLINE int sp_4096_mod_128_cond(sp_digit* r, const sp_digit* a, const sp_digit* m)
13478 {
13479     return sp_4096_div_128_cond(a, m, NULL, r);
13480 }
13481 
13482 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
13483 /* AND m into each word of a and store in r.
13484  *
13485  * r  A single precision integer.
13486  * a  A single precision integer.
13487  * m  Mask to AND against each digit.
13488  */
sp_4096_mask_128(sp_digit * r,const sp_digit * a,sp_digit m)13489 static void sp_4096_mask_128(sp_digit* r, const sp_digit* a, sp_digit m)
13490 {
13491 #ifdef WOLFSSL_SP_SMALL
13492     int i;
13493 
13494     for (i=0; i<128; i++) {
13495         r[i] = a[i] & m;
13496     }
13497 #else
13498     int i;
13499 
13500     for (i = 0; i < 128; i += 8) {
13501         r[i+0] = a[i+0] & m;
13502         r[i+1] = a[i+1] & m;
13503         r[i+2] = a[i+2] & m;
13504         r[i+3] = a[i+3] & m;
13505         r[i+4] = a[i+4] & m;
13506         r[i+5] = a[i+5] & m;
13507         r[i+6] = a[i+6] & m;
13508         r[i+7] = a[i+7] & m;
13509     }
13510 #endif
13511 }
13512 
13513 /* Compare a with b in constant time.
13514  *
13515  * a  A single precision integer.
13516  * b  A single precision integer.
13517  * return -ve, 0 or +ve if a is less than, equal to or greater than b
13518  * respectively.
13519  */
sp_4096_cmp_128(const sp_digit * a,const sp_digit * b)13520 SP_NOINLINE static sp_int32 sp_4096_cmp_128(const sp_digit* a, const sp_digit* b)
13521 {
13522     sp_digit r = 0;
13523 
13524 
13525     __asm__ __volatile__ (
13526         "mov	r3, #0\n\t"
13527         "mvn	r3, r3\n\t"
13528         "mov	r6, #1\n\t"
13529         "lsl	r6, r6, #8\n\t"
13530         "add	r6, r6, #252\n\t"
13531         "\n1:\n\t"
13532         "ldr	r8, [%[a], r6]\n\t"
13533         "ldr	r5, [%[b], r6]\n\t"
13534         "and	r8, r8, r3\n\t"
13535         "and	r5, r5, r3\n\t"
13536         "mov	r4, r8\n\t"
13537         "subs	r8, r8, r5\n\t"
13538         "sbc	r8, r8, r8\n\t"
13539         "add	%[r], %[r], r8\n\t"
13540         "mvn	r8, r8\n\t"
13541         "and	r3, r3, r8\n\t"
13542         "subs	r5, r5, r4\n\t"
13543         "sbc	r8, r8, r8\n\t"
13544         "sub	%[r], %[r], r8\n\t"
13545         "mvn	r8, r8\n\t"
13546         "and	r3, r3, r8\n\t"
13547         "sub	r6, r6, #4\n\t"
13548         "cmp	r6, #0\n\t"
13549 #ifdef __GNUC__
13550         "bge	1b\n\t"
13551 #else
13552         "bge.n	1b\n\t"
13553 #endif /* __GNUC__ */
13554         : [r] "+r" (r)
13555         : [a] "r" (a), [b] "r" (b)
13556         : "r3", "r4", "r5", "r6", "r8"
13557     );
13558 
13559     return r;
13560 }
13561 
13562 /* Divide d in a and put remainder into r (m*d + r = a)
13563  * m is not calculated as it is not needed at this time.
13564  *
13565  * a  Number to be divided.
13566  * d  Number to divide with.
13567  * m  Multiplier result.
13568  * r  Remainder from the division.
13569  * returns MP_OKAY indicating success.
13570  */
sp_4096_div_128(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)13571 static WC_INLINE int sp_4096_div_128(const sp_digit* a, const sp_digit* d, sp_digit* m,
13572         sp_digit* r)
13573 {
13574     sp_digit t1[256], t2[129];
13575     sp_digit div, r1;
13576     int i;
13577 
13578     (void)m;
13579 
13580     div = d[127];
13581     XMEMCPY(t1, a, sizeof(*t1) * 2 * 128);
13582     for (i=127; i>=0; i--) {
13583         sp_digit hi = t1[128 + i] - (t1[128 + i] == div);
13584         r1 = div_4096_word_128(hi, t1[128 + i - 1], div);
13585 
13586         sp_4096_mul_d_128(t2, d, r1);
13587         t1[128 + i] += sp_4096_sub_in_place_128(&t1[i], t2);
13588         t1[128 + i] -= t2[128];
13589         sp_4096_mask_128(t2, d, t1[128 + i]);
13590         t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
13591         sp_4096_mask_128(t2, d, t1[128 + i]);
13592         t1[128 + i] += sp_4096_add_128(&t1[i], &t1[i], t2);
13593     }
13594 
13595     r1 = sp_4096_cmp_128(t1, d) >= 0;
13596     sp_4096_cond_sub_128(r, t1, d, (sp_digit)0 - r1);
13597 
13598     return MP_OKAY;
13599 }
13600 
13601 /* Reduce a modulo m into r. (r = a mod m)
13602  *
13603  * r  A single precision number that is the reduced result.
13604  * a  A single precision number that is to be reduced.
13605  * m  A single precision number that is the modulus to reduce with.
13606  * returns MP_OKAY indicating success.
13607  */
sp_4096_mod_128(sp_digit * r,const sp_digit * a,const sp_digit * m)13608 static WC_INLINE int sp_4096_mod_128(sp_digit* r, const sp_digit* a, const sp_digit* m)
13609 {
13610     return sp_4096_div_128(a, m, NULL, r);
13611 }
13612 
13613 #if (defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
13614                                                      defined(WOLFSSL_HAVE_SP_DH)
13615 #ifdef WOLFSSL_SP_SMALL
13616 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
13617  *
13618  * r     A single precision number that is the result of the operation.
13619  * a     A single precision number being exponentiated.
13620  * e     A single precision number that is the exponent.
13621  * bits  The number of bits in the exponent.
13622  * m     A single precision number that is the modulus.
13623  * returns  0 on success.
13624  * returns  MEMORY_E on dynamic memory allocation failure.
13625  * returns  MP_VAL when base is even or exponent is 0.
13626  */
sp_4096_mod_exp_128(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)13627 static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
13628         int bits, const sp_digit* m, int reduceA)
13629 {
13630 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13631     sp_digit* td = NULL;
13632 #else
13633     sp_digit td[8 * 256];
13634 #endif
13635     sp_digit* t[8];
13636     sp_digit* norm = NULL;
13637     sp_digit mp = 1;
13638     sp_digit n;
13639     sp_digit mask;
13640     int i;
13641     int c;
13642     byte y;
13643     int err = MP_OKAY;
13644 
13645     if ((m[0] & 1) == 0) {
13646         err = MP_VAL;
13647     }
13648     else if (bits == 0) {
13649         err = MP_VAL;
13650     }
13651 
13652 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13653     if (err == MP_OKAY) {
13654         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (8 * 256), NULL,
13655                                 DYNAMIC_TYPE_TMP_BUFFER);
13656         if (td == NULL)
13657             err = MEMORY_E;
13658     }
13659 #endif
13660 
13661     if (err == MP_OKAY) {
13662         norm = td;
13663         for (i=0; i<8; i++) {
13664             t[i] = td + i * 256;
13665         }
13666 
13667         sp_4096_mont_setup(m, &mp);
13668         sp_4096_mont_norm_128(norm, m);
13669 
13670         XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
13671         if (reduceA != 0) {
13672             err = sp_4096_mod_128(t[1] + 128, a, m);
13673             if (err == MP_OKAY) {
13674                 err = sp_4096_mod_128(t[1], t[1], m);
13675             }
13676         }
13677         else {
13678             XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
13679             err = sp_4096_mod_128(t[1], t[1], m);
13680         }
13681     }
13682 
13683     if (err == MP_OKAY) {
13684         sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
13685         sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
13686         sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
13687         sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
13688         sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
13689         sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
13690 
13691         i = (bits - 1) / 32;
13692         n = e[i--];
13693         c = bits & 31;
13694         if (c == 0) {
13695             c = 32;
13696         }
13697         c -= bits % 3;
13698         if (c == 32) {
13699             c = 29;
13700         }
13701         if (c < 0) {
13702             /* Number of bits in top word is less than number needed. */
13703             c = -c;
13704             y = (byte)(n << c);
13705             n = e[i--];
13706             y |= (byte)(n >> (64 - c));
13707             n <<= c;
13708             c = 64 - c;
13709         }
13710         else {
13711             y = (byte)(n >> c);
13712             n <<= 32 - c;
13713         }
13714         XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
13715         for (; i>=0 || c>=3; ) {
13716             if (c == 0) {
13717                 n = e[i--];
13718                 y = (byte)(n >> 29);
13719                 n <<= 3;
13720                 c = 29;
13721             }
13722             else if (c < 3) {
13723                 y = (byte)(n >> 29);
13724                 n = e[i--];
13725                 c = 3 - c;
13726                 y |= (byte)(n >> (32 - c));
13727                 n <<= c;
13728                 c = 32 - c;
13729             }
13730             else {
13731                 y = (byte)((n >> 29) & 0x7);
13732                 n <<= 3;
13733                 c -= 3;
13734             }
13735 
13736             sp_4096_mont_sqr_128(r, r, m, mp);
13737             sp_4096_mont_sqr_128(r, r, m, mp);
13738             sp_4096_mont_sqr_128(r, r, m, mp);
13739 
13740             sp_4096_mont_mul_128(r, r, t[y], m, mp);
13741         }
13742 
13743         XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
13744         sp_4096_mont_reduce_128(r, m, mp);
13745 
13746         mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
13747         sp_4096_cond_sub_128(r, r, m, mask);
13748     }
13749 
13750 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13751     if (td != NULL)
13752         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
13753 #endif
13754 
13755     return err;
13756 }
13757 #else
13758 /* Modular exponentiate a to the e mod m. (r = a^e mod m)
13759  *
13760  * r     A single precision number that is the result of the operation.
13761  * a     A single precision number being exponentiated.
13762  * e     A single precision number that is the exponent.
13763  * bits  The number of bits in the exponent.
13764  * m     A single precision number that is the modulus.
13765  * returns  0 on success.
13766  * returns  MEMORY_E on dynamic memory allocation failure.
13767  * returns  MP_VAL when base is even or exponent is 0.
13768  */
sp_4096_mod_exp_128(sp_digit * r,const sp_digit * a,const sp_digit * e,int bits,const sp_digit * m,int reduceA)13769 static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e,
13770         int bits, const sp_digit* m, int reduceA)
13771 {
13772 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13773     sp_digit* td = NULL;
13774 #else
13775     sp_digit td[16 * 256];
13776 #endif
13777     sp_digit* t[16];
13778     sp_digit* norm = NULL;
13779     sp_digit mp = 1;
13780     sp_digit n;
13781     sp_digit mask;
13782     int i;
13783     int c;
13784     byte y;
13785     int err = MP_OKAY;
13786 
13787     if ((m[0] & 1) == 0) {
13788         err = MP_VAL;
13789     }
13790     else if (bits == 0) {
13791         err = MP_VAL;
13792     }
13793 
13794 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13795     if (err == MP_OKAY) {
13796         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * (16 * 256), NULL,
13797                                 DYNAMIC_TYPE_TMP_BUFFER);
13798         if (td == NULL)
13799             err = MEMORY_E;
13800     }
13801 #endif
13802 
13803     if (err == MP_OKAY) {
13804         norm = td;
13805         for (i=0; i<16; i++) {
13806             t[i] = td + i * 256;
13807         }
13808 
13809         sp_4096_mont_setup(m, &mp);
13810         sp_4096_mont_norm_128(norm, m);
13811 
13812         XMEMSET(t[1], 0, sizeof(sp_digit) * 128U);
13813         if (reduceA != 0) {
13814             err = sp_4096_mod_128(t[1] + 128, a, m);
13815             if (err == MP_OKAY) {
13816                 err = sp_4096_mod_128(t[1], t[1], m);
13817             }
13818         }
13819         else {
13820             XMEMCPY(t[1] + 128, a, sizeof(sp_digit) * 128);
13821             err = sp_4096_mod_128(t[1], t[1], m);
13822         }
13823     }
13824 
13825     if (err == MP_OKAY) {
13826         sp_4096_mont_sqr_128(t[ 2], t[ 1], m, mp);
13827         sp_4096_mont_mul_128(t[ 3], t[ 2], t[ 1], m, mp);
13828         sp_4096_mont_sqr_128(t[ 4], t[ 2], m, mp);
13829         sp_4096_mont_mul_128(t[ 5], t[ 3], t[ 2], m, mp);
13830         sp_4096_mont_sqr_128(t[ 6], t[ 3], m, mp);
13831         sp_4096_mont_mul_128(t[ 7], t[ 4], t[ 3], m, mp);
13832         sp_4096_mont_sqr_128(t[ 8], t[ 4], m, mp);
13833         sp_4096_mont_mul_128(t[ 9], t[ 5], t[ 4], m, mp);
13834         sp_4096_mont_sqr_128(t[10], t[ 5], m, mp);
13835         sp_4096_mont_mul_128(t[11], t[ 6], t[ 5], m, mp);
13836         sp_4096_mont_sqr_128(t[12], t[ 6], m, mp);
13837         sp_4096_mont_mul_128(t[13], t[ 7], t[ 6], m, mp);
13838         sp_4096_mont_sqr_128(t[14], t[ 7], m, mp);
13839         sp_4096_mont_mul_128(t[15], t[ 8], t[ 7], m, mp);
13840 
13841         i = (bits - 1) / 32;
13842         n = e[i--];
13843         c = bits & 31;
13844         if (c == 0) {
13845             c = 32;
13846         }
13847         c -= bits % 4;
13848         if (c == 32) {
13849             c = 28;
13850         }
13851         if (c < 0) {
13852             /* Number of bits in top word is less than number needed. */
13853             c = -c;
13854             y = (byte)(n << c);
13855             n = e[i--];
13856             y |= (byte)(n >> (64 - c));
13857             n <<= c;
13858             c = 64 - c;
13859         }
13860         else {
13861             y = (byte)(n >> c);
13862             n <<= 32 - c;
13863         }
13864         XMEMCPY(r, t[y], sizeof(sp_digit) * 128);
13865         for (; i>=0 || c>=4; ) {
13866             if (c == 0) {
13867                 n = e[i--];
13868                 y = (byte)(n >> 28);
13869                 n <<= 4;
13870                 c = 28;
13871             }
13872             else if (c < 4) {
13873                 y = (byte)(n >> 28);
13874                 n = e[i--];
13875                 c = 4 - c;
13876                 y |= (byte)(n >> (32 - c));
13877                 n <<= c;
13878                 c = 32 - c;
13879             }
13880             else {
13881                 y = (byte)((n >> 28) & 0xf);
13882                 n <<= 4;
13883                 c -= 4;
13884             }
13885 
13886             sp_4096_mont_sqr_128(r, r, m, mp);
13887             sp_4096_mont_sqr_128(r, r, m, mp);
13888             sp_4096_mont_sqr_128(r, r, m, mp);
13889             sp_4096_mont_sqr_128(r, r, m, mp);
13890 
13891             sp_4096_mont_mul_128(r, r, t[y], m, mp);
13892         }
13893 
13894         XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
13895         sp_4096_mont_reduce_128(r, m, mp);
13896 
13897         mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
13898         sp_4096_cond_sub_128(r, r, m, mask);
13899     }
13900 
13901 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13902     if (td != NULL)
13903         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
13904 #endif
13905 
13906     return err;
13907 }
13908 #endif /* WOLFSSL_SP_SMALL */
13909 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
13910 
13911 #endif /* (WOLFSSL_HAVE_SP_RSA && !WOLFSSL_RSA_PUBLIC_ONLY) || WOLFSSL_HAVE_SP_DH */
13912 #ifdef WOLFSSL_HAVE_SP_RSA
13913 /* RSA public key operation.
13914  *
13915  * in      Array of bytes representing the number to exponentiate, base.
13916  * inLen   Number of bytes in base.
13917  * em      Public exponent.
13918  * mm      Modulus.
13919  * out     Buffer to hold big-endian bytes of exponentiation result.
13920  *         Must be at least 512 bytes long.
13921  * outLen  Number of bytes in result.
13922  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
13923  * an array is too long and MEMORY_E when dynamic memory allocation fails.
13924  */
sp_RsaPublic_4096(const byte * in,word32 inLen,const mp_int * em,const mp_int * mm,byte * out,word32 * outLen)13925 int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em,
13926     const mp_int* mm, byte* out, word32* outLen)
13927 {
13928 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13929     sp_digit* a = NULL;
13930 #else
13931     sp_digit a[128 * 5];
13932 #endif
13933     sp_digit* m = NULL;
13934     sp_digit* r = NULL;
13935     sp_digit *ah = NULL;
13936     sp_digit e[1] = {0};
13937     int err = MP_OKAY;
13938 
13939     if (*outLen < 512) {
13940         err = MP_TO_E;
13941     }
13942     else if (mp_count_bits(em) > 32 || inLen > 512 ||
13943                                                      mp_count_bits(mm) != 4096) {
13944         err = MP_READ_E;
13945     }
13946     else if (mp_iseven(mm)) {
13947         err = MP_VAL;
13948     }
13949 
13950 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
13951     if (err == MP_OKAY) {
13952         a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 5, NULL,
13953                                                               DYNAMIC_TYPE_RSA);
13954         if (a == NULL)
13955             err = MEMORY_E;
13956     }
13957 #endif
13958 
13959     if (err == MP_OKAY) {
13960         r = a + 128 * 2;
13961         m = r + 128 * 2;
13962         ah = a + 128;
13963 
13964         sp_4096_from_bin(ah, 128, in, inLen);
13965 #if DIGIT_BIT >= 32
13966         e[0] = em->dp[0];
13967 #else
13968         e[0] = em->dp[0];
13969         if (em->used > 1) {
13970             e[0] |= ((sp_digit)em->dp[1]) << DIGIT_BIT;
13971         }
13972 #endif
13973         if (e[0] == 0) {
13974             err = MP_EXPTMOD_E;
13975         }
13976     }
13977     if (err == MP_OKAY) {
13978         sp_4096_from_mp(m, 128, mm);
13979 
13980         if (e[0] == 0x3) {
13981             if (err == MP_OKAY) {
13982                 sp_4096_sqr_128(r, ah);
13983                 err = sp_4096_mod_128_cond(r, r, m);
13984             }
13985             if (err == MP_OKAY) {
13986                 sp_4096_mul_128(r, ah, r);
13987                 err = sp_4096_mod_128_cond(r, r, m);
13988             }
13989         }
13990         else {
13991             int i;
13992             sp_digit mp;
13993 
13994             sp_4096_mont_setup(m, &mp);
13995 
13996             /* Convert to Montgomery form. */
13997             XMEMSET(a, 0, sizeof(sp_digit) * 128);
13998             err = sp_4096_mod_128_cond(a, a, m);
13999 
14000             if (err == MP_OKAY) {
14001                 for (i = 31; i >= 0; i--) {
14002                     if (e[0] >> i) {
14003                         break;
14004                     }
14005                 }
14006 
14007                 XMEMCPY(r, a, sizeof(sp_digit) * 128);
14008                 for (i--; i>=0; i--) {
14009                     sp_4096_mont_sqr_128(r, r, m, mp);
14010                     if (((e[0] >> i) & 1) == 1) {
14011                         sp_4096_mont_mul_128(r, r, a, m, mp);
14012                     }
14013                 }
14014                 XMEMSET(&r[128], 0, sizeof(sp_digit) * 128);
14015                 sp_4096_mont_reduce_128(r, m, mp);
14016 
14017                 for (i = 127; i > 0; i--) {
14018                     if (r[i] != m[i]) {
14019                         break;
14020                     }
14021                 }
14022                 if (r[i] >= m[i]) {
14023                     sp_4096_sub_in_place_128(r, m);
14024                 }
14025             }
14026         }
14027     }
14028 
14029     if (err == MP_OKAY) {
14030         sp_4096_to_bin_128(r, out);
14031         *outLen = 512;
14032     }
14033 
14034 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14035     if (a != NULL)
14036         XFREE(a, NULL, DYNAMIC_TYPE_RSA);
14037 #endif
14038 
14039     return err;
14040 }
14041 
14042 #ifndef WOLFSSL_RSA_PUBLIC_ONLY
14043 /* Conditionally add a and b using the mask m.
14044  * m is -1 to add and 0 when not.
14045  *
14046  * r  A single precision number representing conditional add result.
14047  * a  A single precision number to add with.
14048  * b  A single precision number to add.
14049  * m  Mask value to apply.
14050  */
sp_4096_cond_add_64(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)14051 SP_NOINLINE static sp_digit sp_4096_cond_add_64(sp_digit* r, const sp_digit* a, const sp_digit* b,
14052         sp_digit m)
14053 {
14054     sp_digit c = 0;
14055 
14056     __asm__ __volatile__ (
14057         "mov	r5, #1\n\t"
14058         "lsl	r5, r5, #8\n\t"
14059         "mov	r9, r5\n\t"
14060         "mov	r8, #0\n\t"
14061         "\n1:\n\t"
14062         "ldr	r6, [%[b], r8]\n\t"
14063         "and	r6, r6, %[m]\n\t"
14064         "adds	r5, %[c], #-1\n\t"
14065         "ldr	r5, [%[a], r8]\n\t"
14066         "adcs	r5, r5, r6\n\t"
14067         "mov	%[c], #0\n\t"
14068         "adcs	%[c], %[c], %[c]\n\t"
14069         "str	r5, [%[r], r8]\n\t"
14070         "add	r8, r8, #4\n\t"
14071         "cmp	r8, r9\n\t"
14072 #ifdef __GNUC__
14073         "blt	1b\n\t"
14074 #else
14075         "blt.n	1b\n\t"
14076 #endif /* __GNUC__ */
14077         : [c] "+r" (c)
14078         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
14079         : "memory", "r5", "r6", "r8", "r9"
14080     );
14081 
14082     return c;
14083 }
14084 
14085 /* RSA private key operation.
14086  *
14087  * in      Array of bytes representing the number to exponentiate, base.
14088  * inLen   Number of bytes in base.
14089  * dm      Private exponent.
14090  * pm      First prime.
14091  * qm      Second prime.
14092  * dpm     First prime's CRT exponent.
14093  * dqm     Second prime's CRT exponent.
14094  * qim     Inverse of second prime mod p.
14095  * mm      Modulus.
14096  * out     Buffer to hold big-endian bytes of exponentiation result.
14097  *         Must be at least 512 bytes long.
14098  * outLen  Number of bytes in result.
14099  * returns 0 on success, MP_TO_E when the outLen is too small, MP_READ_E when
14100  * an array is too long and MEMORY_E when dynamic memory allocation fails.
14101  */
sp_RsaPrivate_4096(const byte * in,word32 inLen,const mp_int * dm,const mp_int * pm,const mp_int * qm,const mp_int * dpm,const mp_int * dqm,const mp_int * qim,const mp_int * mm,byte * out,word32 * outLen)14102 int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm,
14103     const mp_int* pm, const mp_int* qm, const mp_int* dpm, const mp_int* dqm,
14104     const mp_int* qim, const mp_int* mm, byte* out, word32* outLen)
14105 {
14106 #if defined(SP_RSA_PRIVATE_EXP_D) || defined(RSA_LOW_MEM)
14107 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14108     sp_digit* d = NULL;
14109 #else
14110     sp_digit  d[128 * 4];
14111 #endif
14112     sp_digit* a = NULL;
14113     sp_digit* m = NULL;
14114     sp_digit* r = NULL;
14115     int err = MP_OKAY;
14116 
14117     (void)pm;
14118     (void)qm;
14119     (void)dpm;
14120     (void)dqm;
14121     (void)qim;
14122 
14123     if (*outLen < 512U) {
14124         err = MP_TO_E;
14125     }
14126     if (err == MP_OKAY) {
14127         if (mp_count_bits(dm) > 4096) {
14128            err = MP_READ_E;
14129         }
14130         else if (inLen > 512) {
14131             err = MP_READ_E;
14132         }
14133         else if (mp_count_bits(mm) != 4096) {
14134             err = MP_READ_E;
14135         }
14136         else if (mp_iseven(mm)) {
14137             err = MP_VAL;
14138         }
14139     }
14140 
14141 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14142     if (err == MP_OKAY) {
14143         d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 128 * 4, NULL,
14144                                                               DYNAMIC_TYPE_RSA);
14145         if (d == NULL)
14146             err = MEMORY_E;
14147     }
14148 #endif
14149 
14150     if (err == MP_OKAY) {
14151         a = d + 128;
14152         m = a + 256;
14153         r = a;
14154 
14155         sp_4096_from_bin(a, 128, in, inLen);
14156         sp_4096_from_mp(d, 128, dm);
14157         sp_4096_from_mp(m, 128, mm);
14158         err = sp_4096_mod_exp_128(r, a, d, 4096, m, 0);
14159     }
14160 
14161     if (err == MP_OKAY) {
14162         sp_4096_to_bin_128(r, out);
14163         *outLen = 512;
14164     }
14165 
14166 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14167     if (d != NULL)
14168 #endif
14169     {
14170         /* only "a" and "r" are sensitive and need zeroized (same pointer) */
14171         if (a != NULL)
14172             ForceZero(a, sizeof(sp_digit) * 128);
14173 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14174         XFREE(d, NULL, DYNAMIC_TYPE_RSA);
14175 #endif
14176     }
14177 
14178     return err;
14179 #else
14180 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14181     sp_digit* a = NULL;
14182 #else
14183     sp_digit a[64 * 11];
14184 #endif
14185     sp_digit* p = NULL;
14186     sp_digit* q = NULL;
14187     sp_digit* dp = NULL;
14188     sp_digit* tmpa = NULL;
14189     sp_digit* tmpb = NULL;
14190     sp_digit* r = NULL;
14191     sp_digit* qi = NULL;
14192     sp_digit* dq = NULL;
14193     sp_digit c;
14194     int err = MP_OKAY;
14195 
14196     (void)dm;
14197     (void)mm;
14198 
14199     if (*outLen < 512) {
14200         err = MP_TO_E;
14201     }
14202     else if (inLen > 512 || mp_count_bits(mm) != 4096) {
14203         err = MP_READ_E;
14204     }
14205     else if (mp_iseven(mm)) {
14206         err = MP_VAL;
14207     }
14208 
14209 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14210     if (err == MP_OKAY) {
14211         a = (sp_digit*)XMALLOC(sizeof(sp_digit) * 64 * 11, NULL,
14212                                                               DYNAMIC_TYPE_RSA);
14213         if (a == NULL)
14214             err = MEMORY_E;
14215     }
14216 #endif
14217 
14218     if (err == MP_OKAY) {
14219         p = a + 128 * 2;
14220         q = p + 64;
14221         qi = dq = dp = q + 64;
14222         tmpa = qi + 64;
14223         tmpb = tmpa + 128;
14224         r = a;
14225 
14226         sp_4096_from_bin(a, 128, in, inLen);
14227         sp_4096_from_mp(p, 64, pm);
14228         sp_4096_from_mp(q, 64, qm);
14229         sp_4096_from_mp(dp, 64, dpm);
14230 
14231         err = sp_2048_mod_exp_64(tmpa, a, dp, 2048, p, 1);
14232     }
14233     if (err == MP_OKAY) {
14234         sp_4096_from_mp(dq, 64, dqm);
14235         err = sp_2048_mod_exp_64(tmpb, a, dq, 2048, q, 1);
14236     }
14237 
14238     if (err == MP_OKAY) {
14239         c = sp_2048_sub_in_place_64(tmpa, tmpb);
14240         c += sp_4096_cond_add_64(tmpa, tmpa, p, c);
14241         sp_4096_cond_add_64(tmpa, tmpa, p, c);
14242 
14243         sp_2048_from_mp(qi, 64, qim);
14244         sp_2048_mul_64(tmpa, tmpa, qi);
14245         err = sp_2048_mod_64(tmpa, tmpa, p);
14246     }
14247 
14248     if (err == MP_OKAY) {
14249         sp_2048_mul_64(tmpa, q, tmpa);
14250         XMEMSET(&tmpb[64], 0, sizeof(sp_digit) * 64);
14251         sp_4096_add_128(r, tmpb, tmpa);
14252 
14253         sp_4096_to_bin_128(r, out);
14254         *outLen = 512;
14255     }
14256 
14257 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14258     if (a != NULL)
14259 #endif
14260     {
14261         ForceZero(a, sizeof(sp_digit) * 64 * 11);
14262     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
14263         XFREE(a, NULL, DYNAMIC_TYPE_RSA);
14264     #endif
14265     }
14266 #endif /* SP_RSA_PRIVATE_EXP_D || RSA_LOW_MEM */
14267     return err;
14268 }
14269 #endif /* WOLFSSL_RSA_PUBLIC_ONLY */
14270 #endif /* WOLFSSL_HAVE_SP_RSA */
14271 #if defined(WOLFSSL_HAVE_SP_DH) || (defined(WOLFSSL_HAVE_SP_RSA) && \
14272                                               !defined(WOLFSSL_RSA_PUBLIC_ONLY))
14273 /* Convert an array of sp_digit to an mp_int.
14274  *
14275  * a  A single precision integer.
14276  * r  A multi-precision integer.
14277  */
sp_4096_to_mp(const sp_digit * a,mp_int * r)14278 static int sp_4096_to_mp(const sp_digit* a, mp_int* r)
14279 {
14280     int err;
14281 
14282     err = mp_grow(r, (4096 + DIGIT_BIT - 1) / DIGIT_BIT);
14283     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
14284 #if DIGIT_BIT == 32
14285         XMEMCPY(r->dp, a, sizeof(sp_digit) * 128);
14286         r->used = 128;
14287         mp_clamp(r);
14288 #elif DIGIT_BIT < 32
14289         int i;
14290         int j = 0;
14291         int s = 0;
14292 
14293         r->dp[0] = 0;
14294         for (i = 0; i < 128; i++) {
14295             r->dp[j] |= (mp_digit)(a[i] << s);
14296             r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
14297             s = DIGIT_BIT - s;
14298             r->dp[++j] = (mp_digit)(a[i] >> s);
14299             while (s + DIGIT_BIT <= 32) {
14300                 s += DIGIT_BIT;
14301                 r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1;
14302                 if (s == SP_WORD_SIZE) {
14303                     r->dp[j] = 0;
14304                 }
14305                 else {
14306                     r->dp[j] = (mp_digit)(a[i] >> s);
14307                 }
14308             }
14309             s = 32 - s;
14310         }
14311         r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
14312         mp_clamp(r);
14313 #else
14314         int i;
14315         int j = 0;
14316         int s = 0;
14317 
14318         r->dp[0] = 0;
14319         for (i = 0; i < 128; i++) {
14320             r->dp[j] |= ((mp_digit)a[i]) << s;
14321             if (s + 32 >= DIGIT_BIT) {
14322     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
14323                 r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
14324     #endif
14325                 s = DIGIT_BIT - s;
14326                 r->dp[++j] = a[i] >> s;
14327                 s = 32 - s;
14328             }
14329             else {
14330                 s += 32;
14331             }
14332         }
14333         r->used = (4096 + DIGIT_BIT - 1) / DIGIT_BIT;
14334         mp_clamp(r);
14335 #endif
14336     }
14337 
14338     return err;
14339 }
14340 
14341 /* Perform the modular exponentiation for Diffie-Hellman.
14342  *
14343  * base  Base. MP integer.
14344  * exp   Exponent. MP integer.
14345  * mod   Modulus. MP integer.
14346  * res   Result. MP integer.
14347  * returns 0 on success, MP_READ_E if there are too many bytes in an array
14348  * and MEMORY_E if memory allocation fails.
14349  */
sp_ModExp_4096(const mp_int * base,const mp_int * exp,const mp_int * mod,mp_int * res)14350 int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
14351     mp_int* res)
14352 {
14353     int err = MP_OKAY;
14354     sp_digit b[256];
14355     sp_digit e[128];
14356     sp_digit m[128];
14357     sp_digit* r = b;
14358     int expBits = mp_count_bits(exp);
14359 
14360     if (mp_count_bits(base) > 4096) {
14361         err = MP_READ_E;
14362     }
14363     else if (expBits > 4096) {
14364         err = MP_READ_E;
14365     }
14366     else if (mp_count_bits(mod) != 4096) {
14367         err = MP_READ_E;
14368     }
14369     else if (mp_iseven(mod)) {
14370         err = MP_VAL;
14371     }
14372 
14373     if (err == MP_OKAY) {
14374         sp_4096_from_mp(b, 128, base);
14375         sp_4096_from_mp(e, 128, exp);
14376         sp_4096_from_mp(m, 128, mod);
14377 
14378         err = sp_4096_mod_exp_128(r, b, e, expBits, m, 0);
14379     }
14380 
14381     if (err == MP_OKAY) {
14382         err = sp_4096_to_mp(r, res);
14383     }
14384 
14385     XMEMSET(e, 0, sizeof(e));
14386 
14387     return err;
14388 }
14389 
14390 #ifdef WOLFSSL_HAVE_SP_DH
14391 
14392 #ifdef HAVE_FFDHE_4096
sp_4096_lshift_128(sp_digit * r,sp_digit * a,byte n)14393 static void sp_4096_lshift_128(sp_digit* r, sp_digit* a, byte n)
14394 {
14395     __asm__ __volatile__ (
14396         "mov r6, #31\n\t"
14397         "sub r6, r6, %[n]\n\t"
14398         "add       %[a], %[a], #448\n\t"
14399         "add       %[r], %[r], #448\n\t"
14400         "ldr r3, [%[a], #60]\n\t"
14401         "lsr r4, r3, #1\n\t"
14402         "lsl r3, r3, %[n]\n\t"
14403         "lsr r4, r4, r6\n\t"
14404         "ldr       r2, [%[a], #56]\n\t"
14405         "str       r4, [%[r], #64]\n\t"
14406         "lsr       r5, r2, #1\n\t"
14407         "lsl       r2, r2, %[n]\n\t"
14408         "lsr       r5, r5, r6\n\t"
14409         "orr       r3, r3, r5\n\t"
14410         "ldr       r4, [%[a], #52]\n\t"
14411         "str       r3, [%[r], #60]\n\t"
14412         "lsr       r5, r4, #1\n\t"
14413         "lsl       r4, r4, %[n]\n\t"
14414         "lsr       r5, r5, r6\n\t"
14415         "orr       r2, r2, r5\n\t"
14416         "ldr       r3, [%[a], #48]\n\t"
14417         "str       r2, [%[r], #56]\n\t"
14418         "lsr       r5, r3, #1\n\t"
14419         "lsl       r3, r3, %[n]\n\t"
14420         "lsr       r5, r5, r6\n\t"
14421         "orr       r4, r4, r5\n\t"
14422         "ldr       r2, [%[a], #44]\n\t"
14423         "str       r4, [%[r], #52]\n\t"
14424         "lsr       r5, r2, #1\n\t"
14425         "lsl       r2, r2, %[n]\n\t"
14426         "lsr       r5, r5, r6\n\t"
14427         "orr       r3, r3, r5\n\t"
14428         "ldr       r4, [%[a], #40]\n\t"
14429         "str       r3, [%[r], #48]\n\t"
14430         "lsr       r5, r4, #1\n\t"
14431         "lsl       r4, r4, %[n]\n\t"
14432         "lsr       r5, r5, r6\n\t"
14433         "orr       r2, r2, r5\n\t"
14434         "ldr       r3, [%[a], #36]\n\t"
14435         "str       r2, [%[r], #44]\n\t"
14436         "lsr       r5, r3, #1\n\t"
14437         "lsl       r3, r3, %[n]\n\t"
14438         "lsr       r5, r5, r6\n\t"
14439         "orr       r4, r4, r5\n\t"
14440         "ldr       r2, [%[a], #32]\n\t"
14441         "str       r4, [%[r], #40]\n\t"
14442         "lsr       r5, r2, #1\n\t"
14443         "lsl       r2, r2, %[n]\n\t"
14444         "lsr       r5, r5, r6\n\t"
14445         "orr       r3, r3, r5\n\t"
14446         "ldr       r4, [%[a], #28]\n\t"
14447         "str       r3, [%[r], #36]\n\t"
14448         "lsr       r5, r4, #1\n\t"
14449         "lsl       r4, r4, %[n]\n\t"
14450         "lsr       r5, r5, r6\n\t"
14451         "orr       r2, r2, r5\n\t"
14452         "ldr       r3, [%[a], #24]\n\t"
14453         "str       r2, [%[r], #32]\n\t"
14454         "lsr       r5, r3, #1\n\t"
14455         "lsl       r3, r3, %[n]\n\t"
14456         "lsr       r5, r5, r6\n\t"
14457         "orr       r4, r4, r5\n\t"
14458         "ldr       r2, [%[a], #20]\n\t"
14459         "str       r4, [%[r], #28]\n\t"
14460         "lsr       r5, r2, #1\n\t"
14461         "lsl       r2, r2, %[n]\n\t"
14462         "lsr       r5, r5, r6\n\t"
14463         "orr       r3, r3, r5\n\t"
14464         "ldr       r4, [%[a], #16]\n\t"
14465         "str       r3, [%[r], #24]\n\t"
14466         "lsr       r5, r4, #1\n\t"
14467         "lsl       r4, r4, %[n]\n\t"
14468         "lsr       r5, r5, r6\n\t"
14469         "orr       r2, r2, r5\n\t"
14470         "ldr       r3, [%[a], #12]\n\t"
14471         "str       r2, [%[r], #20]\n\t"
14472         "lsr       r5, r3, #1\n\t"
14473         "lsl       r3, r3, %[n]\n\t"
14474         "lsr       r5, r5, r6\n\t"
14475         "orr       r4, r4, r5\n\t"
14476         "ldr       r2, [%[a], #8]\n\t"
14477         "str       r4, [%[r], #16]\n\t"
14478         "lsr       r5, r2, #1\n\t"
14479         "lsl       r2, r2, %[n]\n\t"
14480         "lsr       r5, r5, r6\n\t"
14481         "orr       r3, r3, r5\n\t"
14482         "ldr       r4, [%[a], #4]\n\t"
14483         "str       r3, [%[r], #12]\n\t"
14484         "lsr       r5, r4, #1\n\t"
14485         "lsl       r4, r4, %[n]\n\t"
14486         "lsr       r5, r5, r6\n\t"
14487         "orr       r2, r2, r5\n\t"
14488         "ldr       r3, [%[a], #0]\n\t"
14489         "str       r2, [%[r], #8]\n\t"
14490         "lsr       r5, r3, #1\n\t"
14491         "lsl       r3, r3, %[n]\n\t"
14492         "lsr       r5, r5, r6\n\t"
14493         "orr       r4, r4, r5\n\t"
14494         "sub     %[a], %[a], #64\n\t"
14495         "sub     %[r], %[r], #64\n\t"
14496         "ldr       r2, [%[a], #60]\n\t"
14497         "str       r4, [%[r], #68]\n\t"
14498         "lsr       r5, r2, #1\n\t"
14499         "lsl       r2, r2, %[n]\n\t"
14500         "lsr       r5, r5, r6\n\t"
14501         "orr       r3, r3, r5\n\t"
14502         "ldr       r4, [%[a], #56]\n\t"
14503         "str       r3, [%[r], #64]\n\t"
14504         "lsr       r5, r4, #1\n\t"
14505         "lsl       r4, r4, %[n]\n\t"
14506         "lsr       r5, r5, r6\n\t"
14507         "orr       r2, r2, r5\n\t"
14508         "ldr       r3, [%[a], #52]\n\t"
14509         "str       r2, [%[r], #60]\n\t"
14510         "lsr       r5, r3, #1\n\t"
14511         "lsl       r3, r3, %[n]\n\t"
14512         "lsr       r5, r5, r6\n\t"
14513         "orr       r4, r4, r5\n\t"
14514         "ldr       r2, [%[a], #48]\n\t"
14515         "str       r4, [%[r], #56]\n\t"
14516         "lsr       r5, r2, #1\n\t"
14517         "lsl       r2, r2, %[n]\n\t"
14518         "lsr       r5, r5, r6\n\t"
14519         "orr       r3, r3, r5\n\t"
14520         "ldr       r4, [%[a], #44]\n\t"
14521         "str       r3, [%[r], #52]\n\t"
14522         "lsr       r5, r4, #1\n\t"
14523         "lsl       r4, r4, %[n]\n\t"
14524         "lsr       r5, r5, r6\n\t"
14525         "orr       r2, r2, r5\n\t"
14526         "ldr       r3, [%[a], #40]\n\t"
14527         "str       r2, [%[r], #48]\n\t"
14528         "lsr       r5, r3, #1\n\t"
14529         "lsl       r3, r3, %[n]\n\t"
14530         "lsr       r5, r5, r6\n\t"
14531         "orr       r4, r4, r5\n\t"
14532         "ldr       r2, [%[a], #36]\n\t"
14533         "str       r4, [%[r], #44]\n\t"
14534         "lsr       r5, r2, #1\n\t"
14535         "lsl       r2, r2, %[n]\n\t"
14536         "lsr       r5, r5, r6\n\t"
14537         "orr       r3, r3, r5\n\t"
14538         "ldr       r4, [%[a], #32]\n\t"
14539         "str       r3, [%[r], #40]\n\t"
14540         "lsr       r5, r4, #1\n\t"
14541         "lsl       r4, r4, %[n]\n\t"
14542         "lsr       r5, r5, r6\n\t"
14543         "orr       r2, r2, r5\n\t"
14544         "ldr       r3, [%[a], #28]\n\t"
14545         "str       r2, [%[r], #36]\n\t"
14546         "lsr       r5, r3, #1\n\t"
14547         "lsl       r3, r3, %[n]\n\t"
14548         "lsr       r5, r5, r6\n\t"
14549         "orr       r4, r4, r5\n\t"
14550         "ldr       r2, [%[a], #24]\n\t"
14551         "str       r4, [%[r], #32]\n\t"
14552         "lsr       r5, r2, #1\n\t"
14553         "lsl       r2, r2, %[n]\n\t"
14554         "lsr       r5, r5, r6\n\t"
14555         "orr       r3, r3, r5\n\t"
14556         "ldr       r4, [%[a], #20]\n\t"
14557         "str       r3, [%[r], #28]\n\t"
14558         "lsr       r5, r4, #1\n\t"
14559         "lsl       r4, r4, %[n]\n\t"
14560         "lsr       r5, r5, r6\n\t"
14561         "orr       r2, r2, r5\n\t"
14562         "ldr       r3, [%[a], #16]\n\t"
14563         "str       r2, [%[r], #24]\n\t"
14564         "lsr       r5, r3, #1\n\t"
14565         "lsl       r3, r3, %[n]\n\t"
14566         "lsr       r5, r5, r6\n\t"
14567         "orr       r4, r4, r5\n\t"
14568         "ldr       r2, [%[a], #12]\n\t"
14569         "str       r4, [%[r], #20]\n\t"
14570         "lsr       r5, r2, #1\n\t"
14571         "lsl       r2, r2, %[n]\n\t"
14572         "lsr       r5, r5, r6\n\t"
14573         "orr       r3, r3, r5\n\t"
14574         "ldr       r4, [%[a], #8]\n\t"
14575         "str       r3, [%[r], #16]\n\t"
14576         "lsr       r5, r4, #1\n\t"
14577         "lsl       r4, r4, %[n]\n\t"
14578         "lsr       r5, r5, r6\n\t"
14579         "orr       r2, r2, r5\n\t"
14580         "ldr       r3, [%[a], #4]\n\t"
14581         "str       r2, [%[r], #12]\n\t"
14582         "lsr       r5, r3, #1\n\t"
14583         "lsl       r3, r3, %[n]\n\t"
14584         "lsr       r5, r5, r6\n\t"
14585         "orr       r4, r4, r5\n\t"
14586         "ldr       r2, [%[a], #0]\n\t"
14587         "str       r4, [%[r], #8]\n\t"
14588         "lsr       r5, r2, #1\n\t"
14589         "lsl       r2, r2, %[n]\n\t"
14590         "lsr       r5, r5, r6\n\t"
14591         "orr       r3, r3, r5\n\t"
14592         "sub     %[a], %[a], #64\n\t"
14593         "sub     %[r], %[r], #64\n\t"
14594         "ldr       r4, [%[a], #60]\n\t"
14595         "str       r3, [%[r], #68]\n\t"
14596         "lsr       r5, r4, #1\n\t"
14597         "lsl       r4, r4, %[n]\n\t"
14598         "lsr       r5, r5, r6\n\t"
14599         "orr       r2, r2, r5\n\t"
14600         "ldr       r3, [%[a], #56]\n\t"
14601         "str       r2, [%[r], #64]\n\t"
14602         "lsr       r5, r3, #1\n\t"
14603         "lsl       r3, r3, %[n]\n\t"
14604         "lsr       r5, r5, r6\n\t"
14605         "orr       r4, r4, r5\n\t"
14606         "ldr       r2, [%[a], #52]\n\t"
14607         "str       r4, [%[r], #60]\n\t"
14608         "lsr       r5, r2, #1\n\t"
14609         "lsl       r2, r2, %[n]\n\t"
14610         "lsr       r5, r5, r6\n\t"
14611         "orr       r3, r3, r5\n\t"
14612         "ldr       r4, [%[a], #48]\n\t"
14613         "str       r3, [%[r], #56]\n\t"
14614         "lsr       r5, r4, #1\n\t"
14615         "lsl       r4, r4, %[n]\n\t"
14616         "lsr       r5, r5, r6\n\t"
14617         "orr       r2, r2, r5\n\t"
14618         "ldr       r3, [%[a], #44]\n\t"
14619         "str       r2, [%[r], #52]\n\t"
14620         "lsr       r5, r3, #1\n\t"
14621         "lsl       r3, r3, %[n]\n\t"
14622         "lsr       r5, r5, r6\n\t"
14623         "orr       r4, r4, r5\n\t"
14624         "ldr       r2, [%[a], #40]\n\t"
14625         "str       r4, [%[r], #48]\n\t"
14626         "lsr       r5, r2, #1\n\t"
14627         "lsl       r2, r2, %[n]\n\t"
14628         "lsr       r5, r5, r6\n\t"
14629         "orr       r3, r3, r5\n\t"
14630         "ldr       r4, [%[a], #36]\n\t"
14631         "str       r3, [%[r], #44]\n\t"
14632         "lsr       r5, r4, #1\n\t"
14633         "lsl       r4, r4, %[n]\n\t"
14634         "lsr       r5, r5, r6\n\t"
14635         "orr       r2, r2, r5\n\t"
14636         "ldr       r3, [%[a], #32]\n\t"
14637         "str       r2, [%[r], #40]\n\t"
14638         "lsr       r5, r3, #1\n\t"
14639         "lsl       r3, r3, %[n]\n\t"
14640         "lsr       r5, r5, r6\n\t"
14641         "orr       r4, r4, r5\n\t"
14642         "ldr       r2, [%[a], #28]\n\t"
14643         "str       r4, [%[r], #36]\n\t"
14644         "lsr       r5, r2, #1\n\t"
14645         "lsl       r2, r2, %[n]\n\t"
14646         "lsr       r5, r5, r6\n\t"
14647         "orr       r3, r3, r5\n\t"
14648         "ldr       r4, [%[a], #24]\n\t"
14649         "str       r3, [%[r], #32]\n\t"
14650         "lsr       r5, r4, #1\n\t"
14651         "lsl       r4, r4, %[n]\n\t"
14652         "lsr       r5, r5, r6\n\t"
14653         "orr       r2, r2, r5\n\t"
14654         "ldr       r3, [%[a], #20]\n\t"
14655         "str       r2, [%[r], #28]\n\t"
14656         "lsr       r5, r3, #1\n\t"
14657         "lsl       r3, r3, %[n]\n\t"
14658         "lsr       r5, r5, r6\n\t"
14659         "orr       r4, r4, r5\n\t"
14660         "ldr       r2, [%[a], #16]\n\t"
14661         "str       r4, [%[r], #24]\n\t"
14662         "lsr       r5, r2, #1\n\t"
14663         "lsl       r2, r2, %[n]\n\t"
14664         "lsr       r5, r5, r6\n\t"
14665         "orr       r3, r3, r5\n\t"
14666         "ldr       r4, [%[a], #12]\n\t"
14667         "str       r3, [%[r], #20]\n\t"
14668         "lsr       r5, r4, #1\n\t"
14669         "lsl       r4, r4, %[n]\n\t"
14670         "lsr       r5, r5, r6\n\t"
14671         "orr       r2, r2, r5\n\t"
14672         "ldr       r3, [%[a], #8]\n\t"
14673         "str       r2, [%[r], #16]\n\t"
14674         "lsr       r5, r3, #1\n\t"
14675         "lsl       r3, r3, %[n]\n\t"
14676         "lsr       r5, r5, r6\n\t"
14677         "orr       r4, r4, r5\n\t"
14678         "ldr       r2, [%[a], #4]\n\t"
14679         "str       r4, [%[r], #12]\n\t"
14680         "lsr       r5, r2, #1\n\t"
14681         "lsl       r2, r2, %[n]\n\t"
14682         "lsr       r5, r5, r6\n\t"
14683         "orr       r3, r3, r5\n\t"
14684         "ldr       r4, [%[a], #0]\n\t"
14685         "str       r3, [%[r], #8]\n\t"
14686         "lsr       r5, r4, #1\n\t"
14687         "lsl       r4, r4, %[n]\n\t"
14688         "lsr       r5, r5, r6\n\t"
14689         "orr       r2, r2, r5\n\t"
14690         "sub     %[a], %[a], #64\n\t"
14691         "sub     %[r], %[r], #64\n\t"
14692         "ldr       r3, [%[a], #60]\n\t"
14693         "str       r2, [%[r], #68]\n\t"
14694         "lsr       r5, r3, #1\n\t"
14695         "lsl       r3, r3, %[n]\n\t"
14696         "lsr       r5, r5, r6\n\t"
14697         "orr       r4, r4, r5\n\t"
14698         "ldr       r2, [%[a], #56]\n\t"
14699         "str       r4, [%[r], #64]\n\t"
14700         "lsr       r5, r2, #1\n\t"
14701         "lsl       r2, r2, %[n]\n\t"
14702         "lsr       r5, r5, r6\n\t"
14703         "orr       r3, r3, r5\n\t"
14704         "ldr       r4, [%[a], #52]\n\t"
14705         "str       r3, [%[r], #60]\n\t"
14706         "lsr       r5, r4, #1\n\t"
14707         "lsl       r4, r4, %[n]\n\t"
14708         "lsr       r5, r5, r6\n\t"
14709         "orr       r2, r2, r5\n\t"
14710         "ldr       r3, [%[a], #48]\n\t"
14711         "str       r2, [%[r], #56]\n\t"
14712         "lsr       r5, r3, #1\n\t"
14713         "lsl       r3, r3, %[n]\n\t"
14714         "lsr       r5, r5, r6\n\t"
14715         "orr       r4, r4, r5\n\t"
14716         "ldr       r2, [%[a], #44]\n\t"
14717         "str       r4, [%[r], #52]\n\t"
14718         "lsr       r5, r2, #1\n\t"
14719         "lsl       r2, r2, %[n]\n\t"
14720         "lsr       r5, r5, r6\n\t"
14721         "orr       r3, r3, r5\n\t"
14722         "ldr       r4, [%[a], #40]\n\t"
14723         "str       r3, [%[r], #48]\n\t"
14724         "lsr       r5, r4, #1\n\t"
14725         "lsl       r4, r4, %[n]\n\t"
14726         "lsr       r5, r5, r6\n\t"
14727         "orr       r2, r2, r5\n\t"
14728         "ldr       r3, [%[a], #36]\n\t"
14729         "str       r2, [%[r], #44]\n\t"
14730         "lsr       r5, r3, #1\n\t"
14731         "lsl       r3, r3, %[n]\n\t"
14732         "lsr       r5, r5, r6\n\t"
14733         "orr       r4, r4, r5\n\t"
14734         "ldr       r2, [%[a], #32]\n\t"
14735         "str       r4, [%[r], #40]\n\t"
14736         "lsr       r5, r2, #1\n\t"
14737         "lsl       r2, r2, %[n]\n\t"
14738         "lsr       r5, r5, r6\n\t"
14739         "orr       r3, r3, r5\n\t"
14740         "ldr       r4, [%[a], #28]\n\t"
14741         "str       r3, [%[r], #36]\n\t"
14742         "lsr       r5, r4, #1\n\t"
14743         "lsl       r4, r4, %[n]\n\t"
14744         "lsr       r5, r5, r6\n\t"
14745         "orr       r2, r2, r5\n\t"
14746         "ldr       r3, [%[a], #24]\n\t"
14747         "str       r2, [%[r], #32]\n\t"
14748         "lsr       r5, r3, #1\n\t"
14749         "lsl       r3, r3, %[n]\n\t"
14750         "lsr       r5, r5, r6\n\t"
14751         "orr       r4, r4, r5\n\t"
14752         "ldr       r2, [%[a], #20]\n\t"
14753         "str       r4, [%[r], #28]\n\t"
14754         "lsr       r5, r2, #1\n\t"
14755         "lsl       r2, r2, %[n]\n\t"
14756         "lsr       r5, r5, r6\n\t"
14757         "orr       r3, r3, r5\n\t"
14758         "ldr       r4, [%[a], #16]\n\t"
14759         "str       r3, [%[r], #24]\n\t"
14760         "lsr       r5, r4, #1\n\t"
14761         "lsl       r4, r4, %[n]\n\t"
14762         "lsr       r5, r5, r6\n\t"
14763         "orr       r2, r2, r5\n\t"
14764         "ldr       r3, [%[a], #12]\n\t"
14765         "str       r2, [%[r], #20]\n\t"
14766         "lsr       r5, r3, #1\n\t"
14767         "lsl       r3, r3, %[n]\n\t"
14768         "lsr       r5, r5, r6\n\t"
14769         "orr       r4, r4, r5\n\t"
14770         "ldr       r2, [%[a], #8]\n\t"
14771         "str       r4, [%[r], #16]\n\t"
14772         "lsr       r5, r2, #1\n\t"
14773         "lsl       r2, r2, %[n]\n\t"
14774         "lsr       r5, r5, r6\n\t"
14775         "orr       r3, r3, r5\n\t"
14776         "ldr       r4, [%[a], #4]\n\t"
14777         "str       r3, [%[r], #12]\n\t"
14778         "lsr       r5, r4, #1\n\t"
14779         "lsl       r4, r4, %[n]\n\t"
14780         "lsr       r5, r5, r6\n\t"
14781         "orr       r2, r2, r5\n\t"
14782         "ldr       r3, [%[a], #0]\n\t"
14783         "str       r2, [%[r], #8]\n\t"
14784         "lsr       r5, r3, #1\n\t"
14785         "lsl       r3, r3, %[n]\n\t"
14786         "lsr       r5, r5, r6\n\t"
14787         "orr       r4, r4, r5\n\t"
14788         "sub     %[a], %[a], #64\n\t"
14789         "sub     %[r], %[r], #64\n\t"
14790         "ldr       r2, [%[a], #60]\n\t"
14791         "str       r4, [%[r], #68]\n\t"
14792         "lsr       r5, r2, #1\n\t"
14793         "lsl       r2, r2, %[n]\n\t"
14794         "lsr       r5, r5, r6\n\t"
14795         "orr       r3, r3, r5\n\t"
14796         "ldr       r4, [%[a], #56]\n\t"
14797         "str       r3, [%[r], #64]\n\t"
14798         "lsr       r5, r4, #1\n\t"
14799         "lsl       r4, r4, %[n]\n\t"
14800         "lsr       r5, r5, r6\n\t"
14801         "orr       r2, r2, r5\n\t"
14802         "ldr       r3, [%[a], #52]\n\t"
14803         "str       r2, [%[r], #60]\n\t"
14804         "lsr       r5, r3, #1\n\t"
14805         "lsl       r3, r3, %[n]\n\t"
14806         "lsr       r5, r5, r6\n\t"
14807         "orr       r4, r4, r5\n\t"
14808         "ldr       r2, [%[a], #48]\n\t"
14809         "str       r4, [%[r], #56]\n\t"
14810         "lsr       r5, r2, #1\n\t"
14811         "lsl       r2, r2, %[n]\n\t"
14812         "lsr       r5, r5, r6\n\t"
14813         "orr       r3, r3, r5\n\t"
14814         "ldr       r4, [%[a], #44]\n\t"
14815         "str       r3, [%[r], #52]\n\t"
14816         "lsr       r5, r4, #1\n\t"
14817         "lsl       r4, r4, %[n]\n\t"
14818         "lsr       r5, r5, r6\n\t"
14819         "orr       r2, r2, r5\n\t"
14820         "ldr       r3, [%[a], #40]\n\t"
14821         "str       r2, [%[r], #48]\n\t"
14822         "lsr       r5, r3, #1\n\t"
14823         "lsl       r3, r3, %[n]\n\t"
14824         "lsr       r5, r5, r6\n\t"
14825         "orr       r4, r4, r5\n\t"
14826         "ldr       r2, [%[a], #36]\n\t"
14827         "str       r4, [%[r], #44]\n\t"
14828         "lsr       r5, r2, #1\n\t"
14829         "lsl       r2, r2, %[n]\n\t"
14830         "lsr       r5, r5, r6\n\t"
14831         "orr       r3, r3, r5\n\t"
14832         "ldr       r4, [%[a], #32]\n\t"
14833         "str       r3, [%[r], #40]\n\t"
14834         "lsr       r5, r4, #1\n\t"
14835         "lsl       r4, r4, %[n]\n\t"
14836         "lsr       r5, r5, r6\n\t"
14837         "orr       r2, r2, r5\n\t"
14838         "ldr       r3, [%[a], #28]\n\t"
14839         "str       r2, [%[r], #36]\n\t"
14840         "lsr       r5, r3, #1\n\t"
14841         "lsl       r3, r3, %[n]\n\t"
14842         "lsr       r5, r5, r6\n\t"
14843         "orr       r4, r4, r5\n\t"
14844         "ldr       r2, [%[a], #24]\n\t"
14845         "str       r4, [%[r], #32]\n\t"
14846         "lsr       r5, r2, #1\n\t"
14847         "lsl       r2, r2, %[n]\n\t"
14848         "lsr       r5, r5, r6\n\t"
14849         "orr       r3, r3, r5\n\t"
14850         "ldr       r4, [%[a], #20]\n\t"
14851         "str       r3, [%[r], #28]\n\t"
14852         "lsr       r5, r4, #1\n\t"
14853         "lsl       r4, r4, %[n]\n\t"
14854         "lsr       r5, r5, r6\n\t"
14855         "orr       r2, r2, r5\n\t"
14856         "ldr       r3, [%[a], #16]\n\t"
14857         "str       r2, [%[r], #24]\n\t"
14858         "lsr       r5, r3, #1\n\t"
14859         "lsl       r3, r3, %[n]\n\t"
14860         "lsr       r5, r5, r6\n\t"
14861         "orr       r4, r4, r5\n\t"
14862         "ldr       r2, [%[a], #12]\n\t"
14863         "str       r4, [%[r], #20]\n\t"
14864         "lsr       r5, r2, #1\n\t"
14865         "lsl       r2, r2, %[n]\n\t"
14866         "lsr       r5, r5, r6\n\t"
14867         "orr       r3, r3, r5\n\t"
14868         "ldr       r4, [%[a], #8]\n\t"
14869         "str       r3, [%[r], #16]\n\t"
14870         "lsr       r5, r4, #1\n\t"
14871         "lsl       r4, r4, %[n]\n\t"
14872         "lsr       r5, r5, r6\n\t"
14873         "orr       r2, r2, r5\n\t"
14874         "ldr       r3, [%[a], #4]\n\t"
14875         "str       r2, [%[r], #12]\n\t"
14876         "lsr       r5, r3, #1\n\t"
14877         "lsl       r3, r3, %[n]\n\t"
14878         "lsr       r5, r5, r6\n\t"
14879         "orr       r4, r4, r5\n\t"
14880         "ldr       r2, [%[a], #0]\n\t"
14881         "str       r4, [%[r], #8]\n\t"
14882         "lsr       r5, r2, #1\n\t"
14883         "lsl       r2, r2, %[n]\n\t"
14884         "lsr       r5, r5, r6\n\t"
14885         "orr       r3, r3, r5\n\t"
14886         "sub     %[a], %[a], #64\n\t"
14887         "sub     %[r], %[r], #64\n\t"
14888         "ldr       r4, [%[a], #60]\n\t"
14889         "str       r3, [%[r], #68]\n\t"
14890         "lsr       r5, r4, #1\n\t"
14891         "lsl       r4, r4, %[n]\n\t"
14892         "lsr       r5, r5, r6\n\t"
14893         "orr       r2, r2, r5\n\t"
14894         "ldr       r3, [%[a], #56]\n\t"
14895         "str       r2, [%[r], #64]\n\t"
14896         "lsr       r5, r3, #1\n\t"
14897         "lsl       r3, r3, %[n]\n\t"
14898         "lsr       r5, r5, r6\n\t"
14899         "orr       r4, r4, r5\n\t"
14900         "ldr       r2, [%[a], #52]\n\t"
14901         "str       r4, [%[r], #60]\n\t"
14902         "lsr       r5, r2, #1\n\t"
14903         "lsl       r2, r2, %[n]\n\t"
14904         "lsr       r5, r5, r6\n\t"
14905         "orr       r3, r3, r5\n\t"
14906         "ldr       r4, [%[a], #48]\n\t"
14907         "str       r3, [%[r], #56]\n\t"
14908         "lsr       r5, r4, #1\n\t"
14909         "lsl       r4, r4, %[n]\n\t"
14910         "lsr       r5, r5, r6\n\t"
14911         "orr       r2, r2, r5\n\t"
14912         "ldr       r3, [%[a], #44]\n\t"
14913         "str       r2, [%[r], #52]\n\t"
14914         "lsr       r5, r3, #1\n\t"
14915         "lsl       r3, r3, %[n]\n\t"
14916         "lsr       r5, r5, r6\n\t"
14917         "orr       r4, r4, r5\n\t"
14918         "ldr       r2, [%[a], #40]\n\t"
14919         "str       r4, [%[r], #48]\n\t"
14920         "lsr       r5, r2, #1\n\t"
14921         "lsl       r2, r2, %[n]\n\t"
14922         "lsr       r5, r5, r6\n\t"
14923         "orr       r3, r3, r5\n\t"
14924         "ldr       r4, [%[a], #36]\n\t"
14925         "str       r3, [%[r], #44]\n\t"
14926         "lsr       r5, r4, #1\n\t"
14927         "lsl       r4, r4, %[n]\n\t"
14928         "lsr       r5, r5, r6\n\t"
14929         "orr       r2, r2, r5\n\t"
14930         "ldr       r3, [%[a], #32]\n\t"
14931         "str       r2, [%[r], #40]\n\t"
14932         "lsr       r5, r3, #1\n\t"
14933         "lsl       r3, r3, %[n]\n\t"
14934         "lsr       r5, r5, r6\n\t"
14935         "orr       r4, r4, r5\n\t"
14936         "ldr       r2, [%[a], #28]\n\t"
14937         "str       r4, [%[r], #36]\n\t"
14938         "lsr       r5, r2, #1\n\t"
14939         "lsl       r2, r2, %[n]\n\t"
14940         "lsr       r5, r5, r6\n\t"
14941         "orr       r3, r3, r5\n\t"
14942         "ldr       r4, [%[a], #24]\n\t"
14943         "str       r3, [%[r], #32]\n\t"
14944         "lsr       r5, r4, #1\n\t"
14945         "lsl       r4, r4, %[n]\n\t"
14946         "lsr       r5, r5, r6\n\t"
14947         "orr       r2, r2, r5\n\t"
14948         "ldr       r3, [%[a], #20]\n\t"
14949         "str       r2, [%[r], #28]\n\t"
14950         "lsr       r5, r3, #1\n\t"
14951         "lsl       r3, r3, %[n]\n\t"
14952         "lsr       r5, r5, r6\n\t"
14953         "orr       r4, r4, r5\n\t"
14954         "ldr       r2, [%[a], #16]\n\t"
14955         "str       r4, [%[r], #24]\n\t"
14956         "lsr       r5, r2, #1\n\t"
14957         "lsl       r2, r2, %[n]\n\t"
14958         "lsr       r5, r5, r6\n\t"
14959         "orr       r3, r3, r5\n\t"
14960         "ldr       r4, [%[a], #12]\n\t"
14961         "str       r3, [%[r], #20]\n\t"
14962         "lsr       r5, r4, #1\n\t"
14963         "lsl       r4, r4, %[n]\n\t"
14964         "lsr       r5, r5, r6\n\t"
14965         "orr       r2, r2, r5\n\t"
14966         "ldr       r3, [%[a], #8]\n\t"
14967         "str       r2, [%[r], #16]\n\t"
14968         "lsr       r5, r3, #1\n\t"
14969         "lsl       r3, r3, %[n]\n\t"
14970         "lsr       r5, r5, r6\n\t"
14971         "orr       r4, r4, r5\n\t"
14972         "ldr       r2, [%[a], #4]\n\t"
14973         "str       r4, [%[r], #12]\n\t"
14974         "lsr       r5, r2, #1\n\t"
14975         "lsl       r2, r2, %[n]\n\t"
14976         "lsr       r5, r5, r6\n\t"
14977         "orr       r3, r3, r5\n\t"
14978         "ldr       r4, [%[a], #0]\n\t"
14979         "str       r3, [%[r], #8]\n\t"
14980         "lsr       r5, r4, #1\n\t"
14981         "lsl       r4, r4, %[n]\n\t"
14982         "lsr       r5, r5, r6\n\t"
14983         "orr       r2, r2, r5\n\t"
14984         "sub     %[a], %[a], #64\n\t"
14985         "sub     %[r], %[r], #64\n\t"
14986         "ldr       r3, [%[a], #60]\n\t"
14987         "str       r2, [%[r], #68]\n\t"
14988         "lsr       r5, r3, #1\n\t"
14989         "lsl       r3, r3, %[n]\n\t"
14990         "lsr       r5, r5, r6\n\t"
14991         "orr       r4, r4, r5\n\t"
14992         "ldr       r2, [%[a], #56]\n\t"
14993         "str       r4, [%[r], #64]\n\t"
14994         "lsr       r5, r2, #1\n\t"
14995         "lsl       r2, r2, %[n]\n\t"
14996         "lsr       r5, r5, r6\n\t"
14997         "orr       r3, r3, r5\n\t"
14998         "ldr       r4, [%[a], #52]\n\t"
14999         "str       r3, [%[r], #60]\n\t"
15000         "lsr       r5, r4, #1\n\t"
15001         "lsl       r4, r4, %[n]\n\t"
15002         "lsr       r5, r5, r6\n\t"
15003         "orr       r2, r2, r5\n\t"
15004         "ldr       r3, [%[a], #48]\n\t"
15005         "str       r2, [%[r], #56]\n\t"
15006         "lsr       r5, r3, #1\n\t"
15007         "lsl       r3, r3, %[n]\n\t"
15008         "lsr       r5, r5, r6\n\t"
15009         "orr       r4, r4, r5\n\t"
15010         "ldr       r2, [%[a], #44]\n\t"
15011         "str       r4, [%[r], #52]\n\t"
15012         "lsr       r5, r2, #1\n\t"
15013         "lsl       r2, r2, %[n]\n\t"
15014         "lsr       r5, r5, r6\n\t"
15015         "orr       r3, r3, r5\n\t"
15016         "ldr       r4, [%[a], #40]\n\t"
15017         "str       r3, [%[r], #48]\n\t"
15018         "lsr       r5, r4, #1\n\t"
15019         "lsl       r4, r4, %[n]\n\t"
15020         "lsr       r5, r5, r6\n\t"
15021         "orr       r2, r2, r5\n\t"
15022         "ldr       r3, [%[a], #36]\n\t"
15023         "str       r2, [%[r], #44]\n\t"
15024         "lsr       r5, r3, #1\n\t"
15025         "lsl       r3, r3, %[n]\n\t"
15026         "lsr       r5, r5, r6\n\t"
15027         "orr       r4, r4, r5\n\t"
15028         "ldr       r2, [%[a], #32]\n\t"
15029         "str       r4, [%[r], #40]\n\t"
15030         "lsr       r5, r2, #1\n\t"
15031         "lsl       r2, r2, %[n]\n\t"
15032         "lsr       r5, r5, r6\n\t"
15033         "orr       r3, r3, r5\n\t"
15034         "ldr       r4, [%[a], #28]\n\t"
15035         "str       r3, [%[r], #36]\n\t"
15036         "lsr       r5, r4, #1\n\t"
15037         "lsl       r4, r4, %[n]\n\t"
15038         "lsr       r5, r5, r6\n\t"
15039         "orr       r2, r2, r5\n\t"
15040         "ldr       r3, [%[a], #24]\n\t"
15041         "str       r2, [%[r], #32]\n\t"
15042         "lsr       r5, r3, #1\n\t"
15043         "lsl       r3, r3, %[n]\n\t"
15044         "lsr       r5, r5, r6\n\t"
15045         "orr       r4, r4, r5\n\t"
15046         "ldr       r2, [%[a], #20]\n\t"
15047         "str       r4, [%[r], #28]\n\t"
15048         "lsr       r5, r2, #1\n\t"
15049         "lsl       r2, r2, %[n]\n\t"
15050         "lsr       r5, r5, r6\n\t"
15051         "orr       r3, r3, r5\n\t"
15052         "ldr       r4, [%[a], #16]\n\t"
15053         "str       r3, [%[r], #24]\n\t"
15054         "lsr       r5, r4, #1\n\t"
15055         "lsl       r4, r4, %[n]\n\t"
15056         "lsr       r5, r5, r6\n\t"
15057         "orr       r2, r2, r5\n\t"
15058         "ldr       r3, [%[a], #12]\n\t"
15059         "str       r2, [%[r], #20]\n\t"
15060         "lsr       r5, r3, #1\n\t"
15061         "lsl       r3, r3, %[n]\n\t"
15062         "lsr       r5, r5, r6\n\t"
15063         "orr       r4, r4, r5\n\t"
15064         "ldr       r2, [%[a], #8]\n\t"
15065         "str       r4, [%[r], #16]\n\t"
15066         "lsr       r5, r2, #1\n\t"
15067         "lsl       r2, r2, %[n]\n\t"
15068         "lsr       r5, r5, r6\n\t"
15069         "orr       r3, r3, r5\n\t"
15070         "ldr       r4, [%[a], #4]\n\t"
15071         "str       r3, [%[r], #12]\n\t"
15072         "lsr       r5, r4, #1\n\t"
15073         "lsl       r4, r4, %[n]\n\t"
15074         "lsr       r5, r5, r6\n\t"
15075         "orr       r2, r2, r5\n\t"
15076         "ldr       r3, [%[a], #0]\n\t"
15077         "str       r2, [%[r], #8]\n\t"
15078         "lsr       r5, r3, #1\n\t"
15079         "lsl       r3, r3, %[n]\n\t"
15080         "lsr       r5, r5, r6\n\t"
15081         "orr       r4, r4, r5\n\t"
15082         "sub     %[a], %[a], #64\n\t"
15083         "sub     %[r], %[r], #64\n\t"
15084         "ldr       r2, [%[a], #60]\n\t"
15085         "str       r4, [%[r], #68]\n\t"
15086         "lsr       r5, r2, #1\n\t"
15087         "lsl       r2, r2, %[n]\n\t"
15088         "lsr       r5, r5, r6\n\t"
15089         "orr       r3, r3, r5\n\t"
15090         "ldr       r4, [%[a], #56]\n\t"
15091         "str       r3, [%[r], #64]\n\t"
15092         "lsr       r5, r4, #1\n\t"
15093         "lsl       r4, r4, %[n]\n\t"
15094         "lsr       r5, r5, r6\n\t"
15095         "orr       r2, r2, r5\n\t"
15096         "ldr       r3, [%[a], #52]\n\t"
15097         "str       r2, [%[r], #60]\n\t"
15098         "lsr       r5, r3, #1\n\t"
15099         "lsl       r3, r3, %[n]\n\t"
15100         "lsr       r5, r5, r6\n\t"
15101         "orr       r4, r4, r5\n\t"
15102         "ldr       r2, [%[a], #48]\n\t"
15103         "str       r4, [%[r], #56]\n\t"
15104         "lsr       r5, r2, #1\n\t"
15105         "lsl       r2, r2, %[n]\n\t"
15106         "lsr       r5, r5, r6\n\t"
15107         "orr       r3, r3, r5\n\t"
15108         "ldr       r4, [%[a], #44]\n\t"
15109         "str       r3, [%[r], #52]\n\t"
15110         "lsr       r5, r4, #1\n\t"
15111         "lsl       r4, r4, %[n]\n\t"
15112         "lsr       r5, r5, r6\n\t"
15113         "orr       r2, r2, r5\n\t"
15114         "ldr       r3, [%[a], #40]\n\t"
15115         "str       r2, [%[r], #48]\n\t"
15116         "lsr       r5, r3, #1\n\t"
15117         "lsl       r3, r3, %[n]\n\t"
15118         "lsr       r5, r5, r6\n\t"
15119         "orr       r4, r4, r5\n\t"
15120         "ldr       r2, [%[a], #36]\n\t"
15121         "str       r4, [%[r], #44]\n\t"
15122         "lsr       r5, r2, #1\n\t"
15123         "lsl       r2, r2, %[n]\n\t"
15124         "lsr       r5, r5, r6\n\t"
15125         "orr       r3, r3, r5\n\t"
15126         "ldr       r4, [%[a], #32]\n\t"
15127         "str       r3, [%[r], #40]\n\t"
15128         "lsr       r5, r4, #1\n\t"
15129         "lsl       r4, r4, %[n]\n\t"
15130         "lsr       r5, r5, r6\n\t"
15131         "orr       r2, r2, r5\n\t"
15132         "ldr       r3, [%[a], #28]\n\t"
15133         "str       r2, [%[r], #36]\n\t"
15134         "lsr       r5, r3, #1\n\t"
15135         "lsl       r3, r3, %[n]\n\t"
15136         "lsr       r5, r5, r6\n\t"
15137         "orr       r4, r4, r5\n\t"
15138         "ldr       r2, [%[a], #24]\n\t"
15139         "str       r4, [%[r], #32]\n\t"
15140         "lsr       r5, r2, #1\n\t"
15141         "lsl       r2, r2, %[n]\n\t"
15142         "lsr       r5, r5, r6\n\t"
15143         "orr       r3, r3, r5\n\t"
15144         "ldr       r4, [%[a], #20]\n\t"
15145         "str       r3, [%[r], #28]\n\t"
15146         "lsr       r5, r4, #1\n\t"
15147         "lsl       r4, r4, %[n]\n\t"
15148         "lsr       r5, r5, r6\n\t"
15149         "orr       r2, r2, r5\n\t"
15150         "ldr       r3, [%[a], #16]\n\t"
15151         "str       r2, [%[r], #24]\n\t"
15152         "lsr       r5, r3, #1\n\t"
15153         "lsl       r3, r3, %[n]\n\t"
15154         "lsr       r5, r5, r6\n\t"
15155         "orr       r4, r4, r5\n\t"
15156         "ldr       r2, [%[a], #12]\n\t"
15157         "str       r4, [%[r], #20]\n\t"
15158         "lsr       r5, r2, #1\n\t"
15159         "lsl       r2, r2, %[n]\n\t"
15160         "lsr       r5, r5, r6\n\t"
15161         "orr       r3, r3, r5\n\t"
15162         "ldr       r4, [%[a], #8]\n\t"
15163         "str       r3, [%[r], #16]\n\t"
15164         "lsr       r5, r4, #1\n\t"
15165         "lsl       r4, r4, %[n]\n\t"
15166         "lsr       r5, r5, r6\n\t"
15167         "orr       r2, r2, r5\n\t"
15168         "ldr       r3, [%[a], #4]\n\t"
15169         "str       r2, [%[r], #12]\n\t"
15170         "lsr       r5, r3, #1\n\t"
15171         "lsl       r3, r3, %[n]\n\t"
15172         "lsr       r5, r5, r6\n\t"
15173         "orr       r4, r4, r5\n\t"
15174         "ldr       r2, [%[a], #0]\n\t"
15175         "str       r4, [%[r], #8]\n\t"
15176         "lsr       r5, r2, #1\n\t"
15177         "lsl       r2, r2, %[n]\n\t"
15178         "lsr       r5, r5, r6\n\t"
15179         "orr       r3, r3, r5\n\t"
15180         "str r2, [%[r]]\n\t"
15181         "str r3, [%[r], #4]\n\t"
15182         :
15183         : [r] "r" (r), [a] "r" (a), [n] "r" (n)
15184         : "memory", "r2", "r3", "r4", "r5", "r6"
15185     );
15186 }
15187 
15188 /* Modular exponentiate 2 to the e mod m. (r = 2^e mod m)
15189  *
15190  * r     A single precision number that is the result of the operation.
15191  * e     A single precision number that is the exponent.
15192  * bits  The number of bits in the exponent.
15193  * m     A single precision number that is the modulus.
15194  * returns  0 on success.
15195  * returns  MEMORY_E on dynamic memory allocation failure.
15196  * returns  MP_VAL when base is even.
15197  */
sp_4096_mod_exp_2_128(sp_digit * r,const sp_digit * e,int bits,const sp_digit * m)15198 static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits,
15199         const sp_digit* m)
15200 {
15201 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15202     sp_digit* td = NULL;
15203 #else
15204     sp_digit td[385];
15205 #endif
15206     sp_digit* norm = NULL;
15207     sp_digit* tmp = NULL;
15208     sp_digit mp = 1;
15209     sp_digit n;
15210     sp_digit o;
15211     sp_digit mask;
15212     int i;
15213     int c;
15214     byte y;
15215     int err = MP_OKAY;
15216 
15217     if ((m[0] & 1) == 0) {
15218         err = MP_VAL;
15219     }
15220 
15221 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15222     if (err == MP_OKAY) {
15223         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 385, NULL,
15224                                 DYNAMIC_TYPE_TMP_BUFFER);
15225         if (td == NULL)
15226             err = MEMORY_E;
15227     }
15228 #endif
15229 
15230     if (err == MP_OKAY) {
15231         norm = td;
15232         tmp = td + 256;
15233 
15234         sp_4096_mont_setup(m, &mp);
15235         sp_4096_mont_norm_128(norm, m);
15236 
15237         i = (bits - 1) / 32;
15238         n = e[i--];
15239         c = bits & 31;
15240         if (c == 0) {
15241             c = 32;
15242         }
15243         c -= bits % 5;
15244         if (c == 32) {
15245             c = 27;
15246         }
15247         if (c < 0) {
15248             /* Number of bits in top word is less than number needed. */
15249             c = -c;
15250             y = (byte)(n << c);
15251             n = e[i--];
15252             y |= (byte)(n >> (64 - c));
15253             n <<= c;
15254             c = 64 - c;
15255         }
15256         else {
15257             y = (byte)(n >> c);
15258             n <<= 32 - c;
15259         }
15260         sp_4096_lshift_128(r, norm, y);
15261         for (; i>=0 || c>=5; ) {
15262             if (c == 0) {
15263                 n = e[i--];
15264                 y = (byte)(n >> 27);
15265                 n <<= 5;
15266                 c = 27;
15267             }
15268             else if (c < 5) {
15269                 y = (byte)(n >> 27);
15270                 n = e[i--];
15271                 c = 5 - c;
15272                 y |= (byte)(n >> (32 - c));
15273                 n <<= c;
15274                 c = 32 - c;
15275             }
15276             else {
15277                 y = (byte)((n >> 27) & 0x1f);
15278                 n <<= 5;
15279                 c -= 5;
15280             }
15281 
15282             sp_4096_mont_sqr_128(r, r, m, mp);
15283             sp_4096_mont_sqr_128(r, r, m, mp);
15284             sp_4096_mont_sqr_128(r, r, m, mp);
15285             sp_4096_mont_sqr_128(r, r, m, mp);
15286             sp_4096_mont_sqr_128(r, r, m, mp);
15287 
15288             sp_4096_lshift_128(r, r, y);
15289             sp_4096_mul_d_128(tmp, norm, r[128]);
15290             r[128] = 0;
15291             o = sp_4096_add_128(r, r, tmp);
15292             sp_4096_cond_sub_128(r, r, m, (sp_digit)0 - o);
15293         }
15294 
15295         XMEMSET(&r[128], 0, sizeof(sp_digit) * 128U);
15296         sp_4096_mont_reduce_128(r, m, mp);
15297 
15298         mask = 0 - (sp_4096_cmp_128(r, m) >= 0);
15299         sp_4096_cond_sub_128(r, r, m, mask);
15300     }
15301 
15302 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
15303     if (td != NULL)
15304         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
15305 #endif
15306 
15307     return err;
15308 }
15309 #endif /* HAVE_FFDHE_4096 */
15310 
15311 /* Perform the modular exponentiation for Diffie-Hellman.
15312  *
15313  * base     Base.
15314  * exp      Array of bytes that is the exponent.
15315  * expLen   Length of data, in bytes, in exponent.
15316  * mod      Modulus.
15317  * out      Buffer to hold big-endian bytes of exponentiation result.
15318  *          Must be at least 512 bytes long.
15319  * outLen   Length, in bytes, of exponentiation result.
15320  * returns 0 on success, MP_READ_E if there are too many bytes in an array
15321  * and MEMORY_E if memory allocation fails.
15322  */
sp_DhExp_4096(const mp_int * base,const byte * exp,word32 expLen,const mp_int * mod,byte * out,word32 * outLen)15323 int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen,
15324     const mp_int* mod, byte* out, word32* outLen)
15325 {
15326     int err = MP_OKAY;
15327     sp_digit b[256];
15328     sp_digit e[128];
15329     sp_digit m[128];
15330     sp_digit* r = b;
15331     word32 i;
15332 
15333     if (mp_count_bits(base) > 4096) {
15334         err = MP_READ_E;
15335     }
15336     else if (expLen > 512) {
15337         err = MP_READ_E;
15338     }
15339     else if (mp_count_bits(mod) != 4096) {
15340         err = MP_READ_E;
15341     }
15342     else if (mp_iseven(mod)) {
15343         err = MP_VAL;
15344     }
15345 
15346     if (err == MP_OKAY) {
15347         sp_4096_from_mp(b, 128, base);
15348         sp_4096_from_bin(e, 128, exp, expLen);
15349         sp_4096_from_mp(m, 128, mod);
15350 
15351     #ifdef HAVE_FFDHE_4096
15352         if (base->used == 1 && base->dp[0] == 2 && m[127] == (sp_digit)-1)
15353             err = sp_4096_mod_exp_2_128(r, e, expLen * 8, m);
15354         else
15355     #endif
15356             err = sp_4096_mod_exp_128(r, b, e, expLen * 8, m, 0);
15357 
15358     }
15359 
15360     if (err == MP_OKAY) {
15361         sp_4096_to_bin_128(r, out);
15362         *outLen = 512;
15363         for (i=0; i<512 && out[i] == 0; i++) {
15364             /* Search for first non-zero. */
15365         }
15366         *outLen -= i;
15367         XMEMMOVE(out, out + i, *outLen);
15368 
15369     }
15370 
15371     XMEMSET(e, 0, sizeof(e));
15372 
15373     return err;
15374 }
15375 #endif /* WOLFSSL_HAVE_SP_DH */
15376 
15377 #endif /* WOLFSSL_HAVE_SP_DH | (WOLFSSL_HAVE_SP_RSA & !WOLFSSL_RSA_PUBLIC_ONLY) */
15378 
15379 #endif /* WOLFSSL_SP_4096 */
15380 
15381 #endif /* WOLFSSL_HAVE_SP_RSA | WOLFSSL_HAVE_SP_DH */
15382 #ifdef WOLFSSL_HAVE_SP_ECC
15383 #ifndef WOLFSSL_SP_NO_256
15384 
15385 /* Point structure to use. */
15386 typedef struct sp_point_256 {
15387     /* X ordinate of point. */
15388     sp_digit x[2 * 8];
15389     /* Y ordinate of point. */
15390     sp_digit y[2 * 8];
15391     /* Z ordinate of point. */
15392     sp_digit z[2 * 8];
15393     /* Indicates point is at infinity. */
15394     int infinity;
15395 } sp_point_256;
15396 
15397 /* The modulus (prime) of the curve P256. */
15398 static const sp_digit p256_mod[8] = {
15399     0xffffffff,0xffffffff,0xffffffff,0x00000000,0x00000000,0x00000000,
15400     0x00000001,0xffffffff
15401 };
15402 /* The Montgomery normalizer for modulus of the curve P256. */
15403 static const sp_digit p256_norm_mod[8] = {
15404     0x00000001,0x00000000,0x00000000,0xffffffff,0xffffffff,0xffffffff,
15405     0xfffffffe,0x00000000
15406 };
15407 /* The Montgomery multiplier for modulus of the curve P256. */
15408 static const sp_digit p256_mp_mod = 0x00000001;
15409 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
15410                                             defined(HAVE_ECC_VERIFY)
15411 /* The order of the curve P256. */
15412 static const sp_digit p256_order[8] = {
15413     0xfc632551,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
15414     0x00000000,0xffffffff
15415 };
15416 #endif
15417 /* The order of the curve P256 minus 2. */
15418 static const sp_digit p256_order2[8] = {
15419     0xfc63254f,0xf3b9cac2,0xa7179e84,0xbce6faad,0xffffffff,0xffffffff,
15420     0x00000000,0xffffffff
15421 };
15422 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
15423 /* The Montgomery normalizer for order of the curve P256. */
15424 static const sp_digit p256_norm_order[8] = {
15425     0x039cdaaf,0x0c46353d,0x58e8617b,0x43190552,0x00000000,0x00000000,
15426     0xffffffff,0x00000000
15427 };
15428 #endif
15429 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
15430 /* The Montgomery multiplier for order of the curve P256. */
15431 static const sp_digit p256_mp_order = 0xee00bc4f;
15432 #endif
15433 /* The base point of curve P256. */
15434 static const sp_point_256 p256_base = {
15435     /* X ordinate */
15436     {
15437         0xd898c296,0xf4a13945,0x2deb33a0,0x77037d81,0x63a440f2,0xf8bce6e5,
15438         0xe12c4247,0x6b17d1f2,
15439         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
15440         (sp_digit)0, (sp_digit)0, (sp_digit)0
15441     },
15442     /* Y ordinate */
15443     {
15444         0x37bf51f5,0xcbb64068,0x6b315ece,0x2bce3357,0x7c0f9e16,0x8ee7eb4a,
15445         0xfe1a7f9b,0x4fe342e2,
15446         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
15447         (sp_digit)0, (sp_digit)0, (sp_digit)0
15448     },
15449     /* Z ordinate */
15450     {
15451         0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
15452         0x00000000,0x00000000,
15453         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
15454         (sp_digit)0, (sp_digit)0, (sp_digit)0
15455     },
15456     /* infinity */
15457     0
15458 };
15459 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
15460 static const sp_digit p256_b[8] = {
15461     0x27d2604b,0x3bce3c3e,0xcc53b0f6,0x651d06b0,0x769886bc,0xb3ebbd55,
15462     0xaa3a93e7,0x5ac635d8
15463 };
15464 #endif
15465 
15466 /* Multiply a and b into r. (r = a * b)
15467  *
15468  * r  A single precision integer.
15469  * a  A single precision integer.
15470  * b  A single precision integer.
15471  */
sp_256_mul_8(sp_digit * r,const sp_digit * a,const sp_digit * b)15472 SP_NOINLINE static void sp_256_mul_8(sp_digit* r, const sp_digit* a,
15473         const sp_digit* b)
15474 {
15475     sp_digit tmp_arr[8];
15476     sp_digit* tmp = tmp_arr;
15477 
15478     __asm__ __volatile__ (
15479         /* A[0] * B[0] */
15480         "ldr	r6, [%[a], #0]\n\t"
15481         "ldr	r8, [%[b], #0]\n\t"
15482         "umull	r3, r4, r6, r8\n\t"
15483         "mov	r5, #0\n\t"
15484         "str	r3, [%[tmp], #0]\n\t"
15485         "mov	r3, #0\n\t"
15486         /* A[0] * B[1] */
15487         "ldr	r8, [%[b], #4]\n\t"
15488         "umull	r6, r8, r6, r8\n\t"
15489         "adds	r4, r4, r6\n\t"
15490         "adc	r5, r5, r8\n\t"
15491         /* A[1] * B[0] */
15492         "ldr	r6, [%[a], #4]\n\t"
15493         "ldr	r8, [%[b], #0]\n\t"
15494         "umull	r6, r8, r6, r8\n\t"
15495         "adds	r4, r4, r6\n\t"
15496         "adcs 	r5, r5, r8\n\t"
15497         "adc	r3, r3, #0\n\t"
15498         "str	r4, [%[tmp], #4]\n\t"
15499         "mov	r4, #0\n\t"
15500         /* A[0] * B[2] */
15501         "ldr	r6, [%[a], #0]\n\t"
15502         "ldr	r8, [%[b], #8]\n\t"
15503         "umull	r6, r8, r6, r8\n\t"
15504         "adds	r5, r5, r6\n\t"
15505         "adcs 	r3, r3, r8\n\t"
15506         "adc	r4, r4, #0\n\t"
15507         /* A[1] * B[1] */
15508         "ldr	r6, [%[a], #4]\n\t"
15509         "ldr	r8, [%[b], #4]\n\t"
15510         "umull	r6, r8, r6, r8\n\t"
15511         "adds	r5, r5, r6\n\t"
15512         "adcs 	r3, r3, r8\n\t"
15513         "adc	r4, r4, #0\n\t"
15514         /* A[2] * B[0] */
15515         "ldr	r6, [%[a], #8]\n\t"
15516         "ldr	r8, [%[b], #0]\n\t"
15517         "umull	r6, r8, r6, r8\n\t"
15518         "adds	r5, r5, r6\n\t"
15519         "adcs 	r3, r3, r8\n\t"
15520         "adc	r4, r4, #0\n\t"
15521         "str	r5, [%[tmp], #8]\n\t"
15522         "mov	r5, #0\n\t"
15523         /* A[0] * B[3] */
15524         "ldr	r6, [%[a], #0]\n\t"
15525         "ldr	r8, [%[b], #12]\n\t"
15526         "umull	r6, r8, r6, r8\n\t"
15527         "adds	r3, r3, r6\n\t"
15528         "adcs 	r4, r4, r8\n\t"
15529         "adc	r5, r5, #0\n\t"
15530         /* A[1] * B[2] */
15531         "ldr	r6, [%[a], #4]\n\t"
15532         "ldr	r8, [%[b], #8]\n\t"
15533         "umull	r6, r8, r6, r8\n\t"
15534         "adds	r3, r3, r6\n\t"
15535         "adcs 	r4, r4, r8\n\t"
15536         "adc	r5, r5, #0\n\t"
15537         /* A[2] * B[1] */
15538         "ldr	r6, [%[a], #8]\n\t"
15539         "ldr	r8, [%[b], #4]\n\t"
15540         "umull	r6, r8, r6, r8\n\t"
15541         "adds	r3, r3, r6\n\t"
15542         "adcs 	r4, r4, r8\n\t"
15543         "adc	r5, r5, #0\n\t"
15544         /* A[3] * B[0] */
15545         "ldr	r6, [%[a], #12]\n\t"
15546         "ldr	r8, [%[b], #0]\n\t"
15547         "umull	r6, r8, r6, r8\n\t"
15548         "adds	r3, r3, r6\n\t"
15549         "adcs 	r4, r4, r8\n\t"
15550         "adc	r5, r5, #0\n\t"
15551         "str	r3, [%[tmp], #12]\n\t"
15552         "mov	r3, #0\n\t"
15553         /* A[0] * B[4] */
15554         "ldr	r6, [%[a], #0]\n\t"
15555         "ldr	r8, [%[b], #16]\n\t"
15556         "umull	r6, r8, r6, r8\n\t"
15557         "adds	r4, r4, r6\n\t"
15558         "adcs 	r5, r5, r8\n\t"
15559         "adc	r3, r3, #0\n\t"
15560         /* A[1] * B[3] */
15561         "ldr	r6, [%[a], #4]\n\t"
15562         "ldr	r8, [%[b], #12]\n\t"
15563         "umull	r6, r8, r6, r8\n\t"
15564         "adds	r4, r4, r6\n\t"
15565         "adcs 	r5, r5, r8\n\t"
15566         "adc	r3, r3, #0\n\t"
15567         /* A[2] * B[2] */
15568         "ldr	r6, [%[a], #8]\n\t"
15569         "ldr	r8, [%[b], #8]\n\t"
15570         "umull	r6, r8, r6, r8\n\t"
15571         "adds	r4, r4, r6\n\t"
15572         "adcs 	r5, r5, r8\n\t"
15573         "adc	r3, r3, #0\n\t"
15574         /* A[3] * B[1] */
15575         "ldr	r6, [%[a], #12]\n\t"
15576         "ldr	r8, [%[b], #4]\n\t"
15577         "umull	r6, r8, r6, r8\n\t"
15578         "adds	r4, r4, r6\n\t"
15579         "adcs 	r5, r5, r8\n\t"
15580         "adc	r3, r3, #0\n\t"
15581         /* A[4] * B[0] */
15582         "ldr	r6, [%[a], #16]\n\t"
15583         "ldr	r8, [%[b], #0]\n\t"
15584         "umull	r6, r8, r6, r8\n\t"
15585         "adds	r4, r4, r6\n\t"
15586         "adcs 	r5, r5, r8\n\t"
15587         "adc	r3, r3, #0\n\t"
15588         "str	r4, [%[tmp], #16]\n\t"
15589         "mov	r4, #0\n\t"
15590         /* A[0] * B[5] */
15591         "ldr	r6, [%[a], #0]\n\t"
15592         "ldr	r8, [%[b], #20]\n\t"
15593         "umull	r6, r8, r6, r8\n\t"
15594         "adds	r5, r5, r6\n\t"
15595         "adcs 	r3, r3, r8\n\t"
15596         "adc	r4, r4, #0\n\t"
15597         /* A[1] * B[4] */
15598         "ldr	r6, [%[a], #4]\n\t"
15599         "ldr	r8, [%[b], #16]\n\t"
15600         "umull	r6, r8, r6, r8\n\t"
15601         "adds	r5, r5, r6\n\t"
15602         "adcs 	r3, r3, r8\n\t"
15603         "adc	r4, r4, #0\n\t"
15604         /* A[2] * B[3] */
15605         "ldr	r6, [%[a], #8]\n\t"
15606         "ldr	r8, [%[b], #12]\n\t"
15607         "umull	r6, r8, r6, r8\n\t"
15608         "adds	r5, r5, r6\n\t"
15609         "adcs 	r3, r3, r8\n\t"
15610         "adc	r4, r4, #0\n\t"
15611         /* A[3] * B[2] */
15612         "ldr	r6, [%[a], #12]\n\t"
15613         "ldr	r8, [%[b], #8]\n\t"
15614         "umull	r6, r8, r6, r8\n\t"
15615         "adds	r5, r5, r6\n\t"
15616         "adcs 	r3, r3, r8\n\t"
15617         "adc	r4, r4, #0\n\t"
15618         /* A[4] * B[1] */
15619         "ldr	r6, [%[a], #16]\n\t"
15620         "ldr	r8, [%[b], #4]\n\t"
15621         "umull	r6, r8, r6, r8\n\t"
15622         "adds	r5, r5, r6\n\t"
15623         "adcs 	r3, r3, r8\n\t"
15624         "adc	r4, r4, #0\n\t"
15625         /* A[5] * B[0] */
15626         "ldr	r6, [%[a], #20]\n\t"
15627         "ldr	r8, [%[b], #0]\n\t"
15628         "umull	r6, r8, r6, r8\n\t"
15629         "adds	r5, r5, r6\n\t"
15630         "adcs 	r3, r3, r8\n\t"
15631         "adc	r4, r4, #0\n\t"
15632         "str	r5, [%[tmp], #20]\n\t"
15633         "mov	r5, #0\n\t"
15634         /* A[0] * B[6] */
15635         "ldr	r6, [%[a], #0]\n\t"
15636         "ldr	r8, [%[b], #24]\n\t"
15637         "umull	r6, r8, r6, r8\n\t"
15638         "adds	r3, r3, r6\n\t"
15639         "adcs 	r4, r4, r8\n\t"
15640         "adc	r5, r5, #0\n\t"
15641         /* A[1] * B[5] */
15642         "ldr	r6, [%[a], #4]\n\t"
15643         "ldr	r8, [%[b], #20]\n\t"
15644         "umull	r6, r8, r6, r8\n\t"
15645         "adds	r3, r3, r6\n\t"
15646         "adcs 	r4, r4, r8\n\t"
15647         "adc	r5, r5, #0\n\t"
15648         /* A[2] * B[4] */
15649         "ldr	r6, [%[a], #8]\n\t"
15650         "ldr	r8, [%[b], #16]\n\t"
15651         "umull	r6, r8, r6, r8\n\t"
15652         "adds	r3, r3, r6\n\t"
15653         "adcs 	r4, r4, r8\n\t"
15654         "adc	r5, r5, #0\n\t"
15655         /* A[3] * B[3] */
15656         "ldr	r6, [%[a], #12]\n\t"
15657         "ldr	r8, [%[b], #12]\n\t"
15658         "umull	r6, r8, r6, r8\n\t"
15659         "adds	r3, r3, r6\n\t"
15660         "adcs 	r4, r4, r8\n\t"
15661         "adc	r5, r5, #0\n\t"
15662         /* A[4] * B[2] */
15663         "ldr	r6, [%[a], #16]\n\t"
15664         "ldr	r8, [%[b], #8]\n\t"
15665         "umull	r6, r8, r6, r8\n\t"
15666         "adds	r3, r3, r6\n\t"
15667         "adcs 	r4, r4, r8\n\t"
15668         "adc	r5, r5, #0\n\t"
15669         /* A[5] * B[1] */
15670         "ldr	r6, [%[a], #20]\n\t"
15671         "ldr	r8, [%[b], #4]\n\t"
15672         "umull	r6, r8, r6, r8\n\t"
15673         "adds	r3, r3, r6\n\t"
15674         "adcs 	r4, r4, r8\n\t"
15675         "adc	r5, r5, #0\n\t"
15676         /* A[6] * B[0] */
15677         "ldr	r6, [%[a], #24]\n\t"
15678         "ldr	r8, [%[b], #0]\n\t"
15679         "umull	r6, r8, r6, r8\n\t"
15680         "adds	r3, r3, r6\n\t"
15681         "adcs 	r4, r4, r8\n\t"
15682         "adc	r5, r5, #0\n\t"
15683         "str	r3, [%[tmp], #24]\n\t"
15684         "mov	r3, #0\n\t"
15685         /* A[0] * B[7] */
15686         "ldr	r6, [%[a], #0]\n\t"
15687         "ldr	r8, [%[b], #28]\n\t"
15688         "umull	r6, r8, r6, r8\n\t"
15689         "adds	r4, r4, r6\n\t"
15690         "adcs 	r5, r5, r8\n\t"
15691         "adc	r3, r3, #0\n\t"
15692         /* A[1] * B[6] */
15693         "ldr	r6, [%[a], #4]\n\t"
15694         "ldr	r8, [%[b], #24]\n\t"
15695         "umull	r6, r8, r6, r8\n\t"
15696         "adds	r4, r4, r6\n\t"
15697         "adcs 	r5, r5, r8\n\t"
15698         "adc	r3, r3, #0\n\t"
15699         /* A[2] * B[5] */
15700         "ldr	r6, [%[a], #8]\n\t"
15701         "ldr	r8, [%[b], #20]\n\t"
15702         "umull	r6, r8, r6, r8\n\t"
15703         "adds	r4, r4, r6\n\t"
15704         "adcs 	r5, r5, r8\n\t"
15705         "adc	r3, r3, #0\n\t"
15706         /* A[3] * B[4] */
15707         "ldr	r6, [%[a], #12]\n\t"
15708         "ldr	r8, [%[b], #16]\n\t"
15709         "umull	r6, r8, r6, r8\n\t"
15710         "adds	r4, r4, r6\n\t"
15711         "adcs 	r5, r5, r8\n\t"
15712         "adc	r3, r3, #0\n\t"
15713         /* A[4] * B[3] */
15714         "ldr	r6, [%[a], #16]\n\t"
15715         "ldr	r8, [%[b], #12]\n\t"
15716         "umull	r6, r8, r6, r8\n\t"
15717         "adds	r4, r4, r6\n\t"
15718         "adcs 	r5, r5, r8\n\t"
15719         "adc	r3, r3, #0\n\t"
15720         /* A[5] * B[2] */
15721         "ldr	r6, [%[a], #20]\n\t"
15722         "ldr	r8, [%[b], #8]\n\t"
15723         "umull	r6, r8, r6, r8\n\t"
15724         "adds	r4, r4, r6\n\t"
15725         "adcs 	r5, r5, r8\n\t"
15726         "adc	r3, r3, #0\n\t"
15727         /* A[6] * B[1] */
15728         "ldr	r6, [%[a], #24]\n\t"
15729         "ldr	r8, [%[b], #4]\n\t"
15730         "umull	r6, r8, r6, r8\n\t"
15731         "adds	r4, r4, r6\n\t"
15732         "adcs 	r5, r5, r8\n\t"
15733         "adc	r3, r3, #0\n\t"
15734         /* A[7] * B[0] */
15735         "ldr	r6, [%[a], #28]\n\t"
15736         "ldr	r8, [%[b], #0]\n\t"
15737         "umull	r6, r8, r6, r8\n\t"
15738         "adds	r4, r4, r6\n\t"
15739         "adcs 	r5, r5, r8\n\t"
15740         "adc	r3, r3, #0\n\t"
15741         "str	r4, [%[tmp], #28]\n\t"
15742         "mov	r4, #0\n\t"
15743         /* A[1] * B[7] */
15744         "ldr	r6, [%[a], #4]\n\t"
15745         "ldr	r8, [%[b], #28]\n\t"
15746         "umull	r6, r8, r6, r8\n\t"
15747         "adds	r5, r5, r6\n\t"
15748         "adcs 	r3, r3, r8\n\t"
15749         "adc	r4, r4, #0\n\t"
15750         /* A[2] * B[6] */
15751         "ldr	r6, [%[a], #8]\n\t"
15752         "ldr	r8, [%[b], #24]\n\t"
15753         "umull	r6, r8, r6, r8\n\t"
15754         "adds	r5, r5, r6\n\t"
15755         "adcs 	r3, r3, r8\n\t"
15756         "adc	r4, r4, #0\n\t"
15757         /* A[3] * B[5] */
15758         "ldr	r6, [%[a], #12]\n\t"
15759         "ldr	r8, [%[b], #20]\n\t"
15760         "umull	r6, r8, r6, r8\n\t"
15761         "adds	r5, r5, r6\n\t"
15762         "adcs 	r3, r3, r8\n\t"
15763         "adc	r4, r4, #0\n\t"
15764         /* A[4] * B[4] */
15765         "ldr	r6, [%[a], #16]\n\t"
15766         "ldr	r8, [%[b], #16]\n\t"
15767         "umull	r6, r8, r6, r8\n\t"
15768         "adds	r5, r5, r6\n\t"
15769         "adcs 	r3, r3, r8\n\t"
15770         "adc	r4, r4, #0\n\t"
15771         /* A[5] * B[3] */
15772         "ldr	r6, [%[a], #20]\n\t"
15773         "ldr	r8, [%[b], #12]\n\t"
15774         "umull	r6, r8, r6, r8\n\t"
15775         "adds	r5, r5, r6\n\t"
15776         "adcs 	r3, r3, r8\n\t"
15777         "adc	r4, r4, #0\n\t"
15778         /* A[6] * B[2] */
15779         "ldr	r6, [%[a], #24]\n\t"
15780         "ldr	r8, [%[b], #8]\n\t"
15781         "umull	r6, r8, r6, r8\n\t"
15782         "adds	r5, r5, r6\n\t"
15783         "adcs 	r3, r3, r8\n\t"
15784         "adc	r4, r4, #0\n\t"
15785         /* A[7] * B[1] */
15786         "ldr	r6, [%[a], #28]\n\t"
15787         "ldr	r8, [%[b], #4]\n\t"
15788         "umull	r6, r8, r6, r8\n\t"
15789         "adds	r5, r5, r6\n\t"
15790         "adcs 	r3, r3, r8\n\t"
15791         "adc	r4, r4, #0\n\t"
15792         "str	r5, [%[r], #32]\n\t"
15793         "mov	r5, #0\n\t"
15794         /* A[2] * B[7] */
15795         "ldr	r6, [%[a], #8]\n\t"
15796         "ldr	r8, [%[b], #28]\n\t"
15797         "umull	r6, r8, r6, r8\n\t"
15798         "adds	r3, r3, r6\n\t"
15799         "adcs 	r4, r4, r8\n\t"
15800         "adc	r5, r5, #0\n\t"
15801         /* A[3] * B[6] */
15802         "ldr	r6, [%[a], #12]\n\t"
15803         "ldr	r8, [%[b], #24]\n\t"
15804         "umull	r6, r8, r6, r8\n\t"
15805         "adds	r3, r3, r6\n\t"
15806         "adcs 	r4, r4, r8\n\t"
15807         "adc	r5, r5, #0\n\t"
15808         /* A[4] * B[5] */
15809         "ldr	r6, [%[a], #16]\n\t"
15810         "ldr	r8, [%[b], #20]\n\t"
15811         "umull	r6, r8, r6, r8\n\t"
15812         "adds	r3, r3, r6\n\t"
15813         "adcs 	r4, r4, r8\n\t"
15814         "adc	r5, r5, #0\n\t"
15815         /* A[5] * B[4] */
15816         "ldr	r6, [%[a], #20]\n\t"
15817         "ldr	r8, [%[b], #16]\n\t"
15818         "umull	r6, r8, r6, r8\n\t"
15819         "adds	r3, r3, r6\n\t"
15820         "adcs 	r4, r4, r8\n\t"
15821         "adc	r5, r5, #0\n\t"
15822         /* A[6] * B[3] */
15823         "ldr	r6, [%[a], #24]\n\t"
15824         "ldr	r8, [%[b], #12]\n\t"
15825         "umull	r6, r8, r6, r8\n\t"
15826         "adds	r3, r3, r6\n\t"
15827         "adcs 	r4, r4, r8\n\t"
15828         "adc	r5, r5, #0\n\t"
15829         /* A[7] * B[2] */
15830         "ldr	r6, [%[a], #28]\n\t"
15831         "ldr	r8, [%[b], #8]\n\t"
15832         "umull	r6, r8, r6, r8\n\t"
15833         "adds	r3, r3, r6\n\t"
15834         "adcs 	r4, r4, r8\n\t"
15835         "adc	r5, r5, #0\n\t"
15836         "str	r3, [%[r], #36]\n\t"
15837         "mov	r3, #0\n\t"
15838         /* A[3] * B[7] */
15839         "ldr	r6, [%[a], #12]\n\t"
15840         "ldr	r8, [%[b], #28]\n\t"
15841         "umull	r6, r8, r6, r8\n\t"
15842         "adds	r4, r4, r6\n\t"
15843         "adcs 	r5, r5, r8\n\t"
15844         "adc	r3, r3, #0\n\t"
15845         /* A[4] * B[6] */
15846         "ldr	r6, [%[a], #16]\n\t"
15847         "ldr	r8, [%[b], #24]\n\t"
15848         "umull	r6, r8, r6, r8\n\t"
15849         "adds	r4, r4, r6\n\t"
15850         "adcs 	r5, r5, r8\n\t"
15851         "adc	r3, r3, #0\n\t"
15852         /* A[5] * B[5] */
15853         "ldr	r6, [%[a], #20]\n\t"
15854         "ldr	r8, [%[b], #20]\n\t"
15855         "umull	r6, r8, r6, r8\n\t"
15856         "adds	r4, r4, r6\n\t"
15857         "adcs 	r5, r5, r8\n\t"
15858         "adc	r3, r3, #0\n\t"
15859         /* A[6] * B[4] */
15860         "ldr	r6, [%[a], #24]\n\t"
15861         "ldr	r8, [%[b], #16]\n\t"
15862         "umull	r6, r8, r6, r8\n\t"
15863         "adds	r4, r4, r6\n\t"
15864         "adcs 	r5, r5, r8\n\t"
15865         "adc	r3, r3, #0\n\t"
15866         /* A[7] * B[3] */
15867         "ldr	r6, [%[a], #28]\n\t"
15868         "ldr	r8, [%[b], #12]\n\t"
15869         "umull	r6, r8, r6, r8\n\t"
15870         "adds	r4, r4, r6\n\t"
15871         "adcs 	r5, r5, r8\n\t"
15872         "adc	r3, r3, #0\n\t"
15873         "str	r4, [%[r], #40]\n\t"
15874         "mov	r4, #0\n\t"
15875         /* A[4] * B[7] */
15876         "ldr	r6, [%[a], #16]\n\t"
15877         "ldr	r8, [%[b], #28]\n\t"
15878         "umull	r6, r8, r6, r8\n\t"
15879         "adds	r5, r5, r6\n\t"
15880         "adcs 	r3, r3, r8\n\t"
15881         "adc	r4, r4, #0\n\t"
15882         /* A[5] * B[6] */
15883         "ldr	r6, [%[a], #20]\n\t"
15884         "ldr	r8, [%[b], #24]\n\t"
15885         "umull	r6, r8, r6, r8\n\t"
15886         "adds	r5, r5, r6\n\t"
15887         "adcs 	r3, r3, r8\n\t"
15888         "adc	r4, r4, #0\n\t"
15889         /* A[6] * B[5] */
15890         "ldr	r6, [%[a], #24]\n\t"
15891         "ldr	r8, [%[b], #20]\n\t"
15892         "umull	r6, r8, r6, r8\n\t"
15893         "adds	r5, r5, r6\n\t"
15894         "adcs 	r3, r3, r8\n\t"
15895         "adc	r4, r4, #0\n\t"
15896         /* A[7] * B[4] */
15897         "ldr	r6, [%[a], #28]\n\t"
15898         "ldr	r8, [%[b], #16]\n\t"
15899         "umull	r6, r8, r6, r8\n\t"
15900         "adds	r5, r5, r6\n\t"
15901         "adcs 	r3, r3, r8\n\t"
15902         "adc	r4, r4, #0\n\t"
15903         "str	r5, [%[r], #44]\n\t"
15904         "mov	r5, #0\n\t"
15905         /* A[5] * B[7] */
15906         "ldr	r6, [%[a], #20]\n\t"
15907         "ldr	r8, [%[b], #28]\n\t"
15908         "umull	r6, r8, r6, r8\n\t"
15909         "adds	r3, r3, r6\n\t"
15910         "adcs 	r4, r4, r8\n\t"
15911         "adc	r5, r5, #0\n\t"
15912         /* A[6] * B[6] */
15913         "ldr	r6, [%[a], #24]\n\t"
15914         "ldr	r8, [%[b], #24]\n\t"
15915         "umull	r6, r8, r6, r8\n\t"
15916         "adds	r3, r3, r6\n\t"
15917         "adcs 	r4, r4, r8\n\t"
15918         "adc	r5, r5, #0\n\t"
15919         /* A[7] * B[5] */
15920         "ldr	r6, [%[a], #28]\n\t"
15921         "ldr	r8, [%[b], #20]\n\t"
15922         "umull	r6, r8, r6, r8\n\t"
15923         "adds	r3, r3, r6\n\t"
15924         "adcs 	r4, r4, r8\n\t"
15925         "adc	r5, r5, #0\n\t"
15926         "str	r3, [%[r], #48]\n\t"
15927         "mov	r3, #0\n\t"
15928         /* A[6] * B[7] */
15929         "ldr	r6, [%[a], #24]\n\t"
15930         "ldr	r8, [%[b], #28]\n\t"
15931         "umull	r6, r8, r6, r8\n\t"
15932         "adds	r4, r4, r6\n\t"
15933         "adcs 	r5, r5, r8\n\t"
15934         "adc	r3, r3, #0\n\t"
15935         /* A[7] * B[6] */
15936         "ldr	r6, [%[a], #28]\n\t"
15937         "ldr	r8, [%[b], #24]\n\t"
15938         "umull	r6, r8, r6, r8\n\t"
15939         "adds	r4, r4, r6\n\t"
15940         "adcs 	r5, r5, r8\n\t"
15941         "adc	r3, r3, #0\n\t"
15942         "str	r4, [%[r], #52]\n\t"
15943         "mov	r4, #0\n\t"
15944         /* A[7] * B[7] */
15945         "ldr	r6, [%[a], #28]\n\t"
15946         "ldr	r8, [%[b], #28]\n\t"
15947         "umull	r6, r8, r6, r8\n\t"
15948         "adds	r5, r5, r6\n\t"
15949         "adc	r3, r3, r8\n\t"
15950         "str	r5, [%[r], #56]\n\t"
15951         "str	r3, [%[r], #60]\n\t"
15952         /* Transfer tmp to r */
15953         "ldr	r3, [%[tmp], #0]\n\t"
15954         "ldr	r4, [%[tmp], #4]\n\t"
15955         "ldr	r5, [%[tmp], #8]\n\t"
15956         "ldr	r6, [%[tmp], #12]\n\t"
15957         "str	r3, [%[r], #0]\n\t"
15958         "str	r4, [%[r], #4]\n\t"
15959         "str	r5, [%[r], #8]\n\t"
15960         "str	r6, [%[r], #12]\n\t"
15961         "ldr	r3, [%[tmp], #16]\n\t"
15962         "ldr	r4, [%[tmp], #20]\n\t"
15963         "ldr	r5, [%[tmp], #24]\n\t"
15964         "ldr	r6, [%[tmp], #28]\n\t"
15965         "str	r3, [%[r], #16]\n\t"
15966         "str	r4, [%[r], #20]\n\t"
15967         "str	r5, [%[r], #24]\n\t"
15968         "str	r6, [%[r], #28]\n\t"
15969         :
15970         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [tmp] "r" (tmp)
15971         : "memory", "r3", "r4", "r5", "r6", "r8"
15972     );
15973 }
15974 
15975 /* Square a and put result in r. (r = a * a)
15976  *
15977  * r  A single precision integer.
15978  * a  A single precision integer.
15979  */
sp_256_sqr_8(sp_digit * r,const sp_digit * a)15980 SP_NOINLINE static void sp_256_sqr_8(sp_digit* r, const sp_digit* a)
15981 {
15982     sp_digit tmp_arr[8];
15983     sp_digit* tmp = tmp_arr;
15984     __asm__ __volatile__ (
15985         /* A[0] * A[0] */
15986         "ldr	r6, [%[a], #0]\n\t"
15987         "umull	r3, r4, r6, r6\n\t"
15988         "mov	r5, #0\n\t"
15989         "str	r3, [%[tmp], #0]\n\t"
15990         "mov	r3, #0\n\t"
15991         /* A[0] * A[1] */
15992         "ldr	r8, [%[a], #4]\n\t"
15993         "umull	r6, r8, r6, r8\n\t"
15994         "adds	r4, r4, r6\n\t"
15995         "adc	r5, r5, r8\n\t"
15996         "adds	r4, r4, r6\n\t"
15997         "adcs 	r5, r5, r8\n\t"
15998         "adc	r3, r3, #0\n\t"
15999         "str	r4, [%[tmp], #4]\n\t"
16000         "mov	r4, #0\n\t"
16001         /* A[0] * A[2] */
16002         "ldr	r6, [%[a], #0]\n\t"
16003         "ldr	r8, [%[a], #8]\n\t"
16004         "umull	r6, r8, r6, r8\n\t"
16005         "adds	r5, r5, r6\n\t"
16006         "adc	r3, r3, r8\n\t"
16007         "adds	r5, r5, r6\n\t"
16008         "adcs 	r3, r3, r8\n\t"
16009         "adc	r4, r4, #0\n\t"
16010         /* A[1] * A[1] */
16011         "ldr	r6, [%[a], #4]\n\t"
16012         "umull	r6, r8, r6, r6\n\t"
16013         "adds	r5, r5, r6\n\t"
16014         "adcs	r3, r3, r8\n\t"
16015         "adc	r4, r4, #0\n\t"
16016         "str	r5, [%[tmp], #8]\n\t"
16017         "mov	r5, #0\n\t"
16018         /* A[0] * A[3] */
16019         "ldr	r6, [%[a], #0]\n\t"
16020         "ldr	r8, [%[a], #12]\n\t"
16021         "umull	r9, r10, r6, r8\n\t"
16022         "mov	r11, #0\n\t"
16023         /* A[1] * A[2] */
16024         "ldr	r6, [%[a], #4]\n\t"
16025         "ldr	r8, [%[a], #8]\n\t"
16026         "umull	r6, r8, r6, r8\n\t"
16027         "adds	r9, r9, r6\n\t"
16028         "adcs 	r10, r10, r8\n\t"
16029         "adc	r11, r11, #0\n\t"
16030         "adds	r9, r9, r9\n\t"
16031         "adcs	r10, r10, r10\n\t"
16032         "adc	r11, r11, r11\n\t"
16033         "adds	r3, r3, r9\n\t"
16034         "adcs	r4, r4, r10\n\t"
16035         "adc	r5, r5, r11\n\t"
16036         "str	r3, [%[tmp], #12]\n\t"
16037         "mov	r3, #0\n\t"
16038         /* A[0] * A[4] */
16039         "ldr	r6, [%[a], #0]\n\t"
16040         "ldr	r8, [%[a], #16]\n\t"
16041         "umull	r9, r10, r6, r8\n\t"
16042         "mov	r11, #0\n\t"
16043         /* A[1] * A[3] */
16044         "ldr	r6, [%[a], #4]\n\t"
16045         "ldr	r8, [%[a], #12]\n\t"
16046         "umull	r6, r8, r6, r8\n\t"
16047         "adds	r9, r9, r6\n\t"
16048         "adcs 	r10, r10, r8\n\t"
16049         "adc	r11, r11, #0\n\t"
16050         /* A[2] * A[2] */
16051         "ldr	r6, [%[a], #8]\n\t"
16052         "umull	r6, r8, r6, r6\n\t"
16053         "adds	r4, r4, r6\n\t"
16054         "adcs	r5, r5, r8\n\t"
16055         "adc	r3, r3, #0\n\t"
16056         "adds	r9, r9, r9\n\t"
16057         "adcs	r10, r10, r10\n\t"
16058         "adc	r11, r11, r11\n\t"
16059         "adds	r4, r4, r9\n\t"
16060         "adcs	r5, r5, r10\n\t"
16061         "adc	r3, r3, r11\n\t"
16062         "str	r4, [%[tmp], #16]\n\t"
16063         "mov	r4, #0\n\t"
16064         /* A[0] * A[5] */
16065         "ldr	r6, [%[a], #0]\n\t"
16066         "ldr	r8, [%[a], #20]\n\t"
16067         "umull	r9, r10, r6, r8\n\t"
16068         "mov	r11, #0\n\t"
16069         /* A[1] * A[4] */
16070         "ldr	r6, [%[a], #4]\n\t"
16071         "ldr	r8, [%[a], #16]\n\t"
16072         "umull	r6, r8, r6, r8\n\t"
16073         "adds	r9, r9, r6\n\t"
16074         "adcs 	r10, r10, r8\n\t"
16075         "adc	r11, r11, #0\n\t"
16076         /* A[2] * A[3] */
16077         "ldr	r6, [%[a], #8]\n\t"
16078         "ldr	r8, [%[a], #12]\n\t"
16079         "umull	r6, r8, r6, r8\n\t"
16080         "adds	r9, r9, r6\n\t"
16081         "adcs 	r10, r10, r8\n\t"
16082         "adc	r11, r11, #0\n\t"
16083         "adds	r9, r9, r9\n\t"
16084         "adcs	r10, r10, r10\n\t"
16085         "adc	r11, r11, r11\n\t"
16086         "adds	r5, r5, r9\n\t"
16087         "adcs	r3, r3, r10\n\t"
16088         "adc	r4, r4, r11\n\t"
16089         "str	r5, [%[tmp], #20]\n\t"
16090         "mov	r5, #0\n\t"
16091         /* A[0] * A[6] */
16092         "ldr	r6, [%[a], #0]\n\t"
16093         "ldr	r8, [%[a], #24]\n\t"
16094         "umull	r9, r10, r6, r8\n\t"
16095         "mov	r11, #0\n\t"
16096         /* A[1] * A[5] */
16097         "ldr	r6, [%[a], #4]\n\t"
16098         "ldr	r8, [%[a], #20]\n\t"
16099         "umull	r6, r8, r6, r8\n\t"
16100         "adds	r9, r9, r6\n\t"
16101         "adcs 	r10, r10, r8\n\t"
16102         "adc	r11, r11, #0\n\t"
16103         /* A[2] * A[4] */
16104         "ldr	r6, [%[a], #8]\n\t"
16105         "ldr	r8, [%[a], #16]\n\t"
16106         "umull	r6, r8, r6, r8\n\t"
16107         "adds	r9, r9, r6\n\t"
16108         "adcs 	r10, r10, r8\n\t"
16109         "adc	r11, r11, #0\n\t"
16110         /* A[3] * A[3] */
16111         "ldr	r6, [%[a], #12]\n\t"
16112         "umull	r6, r8, r6, r6\n\t"
16113         "adds	r3, r3, r6\n\t"
16114         "adcs	r4, r4, r8\n\t"
16115         "adc	r5, r5, #0\n\t"
16116         "adds	r9, r9, r9\n\t"
16117         "adcs	r10, r10, r10\n\t"
16118         "adc	r11, r11, r11\n\t"
16119         "adds	r3, r3, r9\n\t"
16120         "adcs	r4, r4, r10\n\t"
16121         "adc	r5, r5, r11\n\t"
16122         "str	r3, [%[tmp], #24]\n\t"
16123         "mov	r3, #0\n\t"
16124         /* A[0] * A[7] */
16125         "ldr	r6, [%[a], #0]\n\t"
16126         "ldr	r8, [%[a], #28]\n\t"
16127         "umull	r9, r10, r6, r8\n\t"
16128         "mov	r11, #0\n\t"
16129         /* A[1] * A[6] */
16130         "ldr	r6, [%[a], #4]\n\t"
16131         "ldr	r8, [%[a], #24]\n\t"
16132         "umull	r6, r8, r6, r8\n\t"
16133         "adds	r9, r9, r6\n\t"
16134         "adcs 	r10, r10, r8\n\t"
16135         "adc	r11, r11, #0\n\t"
16136         /* A[2] * A[5] */
16137         "ldr	r6, [%[a], #8]\n\t"
16138         "ldr	r8, [%[a], #20]\n\t"
16139         "umull	r6, r8, r6, r8\n\t"
16140         "adds	r9, r9, r6\n\t"
16141         "adcs 	r10, r10, r8\n\t"
16142         "adc	r11, r11, #0\n\t"
16143         /* A[3] * A[4] */
16144         "ldr	r6, [%[a], #12]\n\t"
16145         "ldr	r8, [%[a], #16]\n\t"
16146         "umull	r6, r8, r6, r8\n\t"
16147         "adds	r9, r9, r6\n\t"
16148         "adcs 	r10, r10, r8\n\t"
16149         "adc	r11, r11, #0\n\t"
16150         "adds	r9, r9, r9\n\t"
16151         "adcs	r10, r10, r10\n\t"
16152         "adc	r11, r11, r11\n\t"
16153         "adds	r4, r4, r9\n\t"
16154         "adcs	r5, r5, r10\n\t"
16155         "adc	r3, r3, r11\n\t"
16156         "str	r4, [%[tmp], #28]\n\t"
16157         "mov	r4, #0\n\t"
16158         /* A[1] * A[7] */
16159         "ldr	r6, [%[a], #4]\n\t"
16160         "ldr	r8, [%[a], #28]\n\t"
16161         "umull	r9, r10, r6, r8\n\t"
16162         "mov	r11, #0\n\t"
16163         /* A[2] * A[6] */
16164         "ldr	r6, [%[a], #8]\n\t"
16165         "ldr	r8, [%[a], #24]\n\t"
16166         "umull	r6, r8, r6, r8\n\t"
16167         "adds	r9, r9, r6\n\t"
16168         "adcs 	r10, r10, r8\n\t"
16169         "adc	r11, r11, #0\n\t"
16170         /* A[3] * A[5] */
16171         "ldr	r6, [%[a], #12]\n\t"
16172         "ldr	r8, [%[a], #20]\n\t"
16173         "umull	r6, r8, r6, r8\n\t"
16174         "adds	r9, r9, r6\n\t"
16175         "adcs 	r10, r10, r8\n\t"
16176         "adc	r11, r11, #0\n\t"
16177         /* A[4] * A[4] */
16178         "ldr	r6, [%[a], #16]\n\t"
16179         "umull	r6, r8, r6, r6\n\t"
16180         "adds	r5, r5, r6\n\t"
16181         "adcs	r3, r3, r8\n\t"
16182         "adc	r4, r4, #0\n\t"
16183         "adds	r9, r9, r9\n\t"
16184         "adcs	r10, r10, r10\n\t"
16185         "adc	r11, r11, r11\n\t"
16186         "adds	r5, r5, r9\n\t"
16187         "adcs	r3, r3, r10\n\t"
16188         "adc	r4, r4, r11\n\t"
16189         "str	r5, [%[r], #32]\n\t"
16190         "mov	r5, #0\n\t"
16191         /* A[2] * A[7] */
16192         "ldr	r6, [%[a], #8]\n\t"
16193         "ldr	r8, [%[a], #28]\n\t"
16194         "umull	r9, r10, r6, r8\n\t"
16195         "mov	r11, #0\n\t"
16196         /* A[3] * A[6] */
16197         "ldr	r6, [%[a], #12]\n\t"
16198         "ldr	r8, [%[a], #24]\n\t"
16199         "umull	r6, r8, r6, r8\n\t"
16200         "adds	r9, r9, r6\n\t"
16201         "adcs 	r10, r10, r8\n\t"
16202         "adc	r11, r11, #0\n\t"
16203         /* A[4] * A[5] */
16204         "ldr	r6, [%[a], #16]\n\t"
16205         "ldr	r8, [%[a], #20]\n\t"
16206         "umull	r6, r8, r6, r8\n\t"
16207         "adds	r9, r9, r6\n\t"
16208         "adcs 	r10, r10, r8\n\t"
16209         "adc	r11, r11, #0\n\t"
16210         "adds	r9, r9, r9\n\t"
16211         "adcs	r10, r10, r10\n\t"
16212         "adc	r11, r11, r11\n\t"
16213         "adds	r3, r3, r9\n\t"
16214         "adcs	r4, r4, r10\n\t"
16215         "adc	r5, r5, r11\n\t"
16216         "str	r3, [%[r], #36]\n\t"
16217         "mov	r3, #0\n\t"
16218         /* A[3] * A[7] */
16219         "ldr	r6, [%[a], #12]\n\t"
16220         "ldr	r8, [%[a], #28]\n\t"
16221         "umull	r9, r10, r6, r8\n\t"
16222         "mov	r11, #0\n\t"
16223         /* A[4] * A[6] */
16224         "ldr	r6, [%[a], #16]\n\t"
16225         "ldr	r8, [%[a], #24]\n\t"
16226         "umull	r6, r8, r6, r8\n\t"
16227         "adds	r9, r9, r6\n\t"
16228         "adcs 	r10, r10, r8\n\t"
16229         "adc	r11, r11, #0\n\t"
16230         /* A[5] * A[5] */
16231         "ldr	r6, [%[a], #20]\n\t"
16232         "umull	r6, r8, r6, r6\n\t"
16233         "adds	r4, r4, r6\n\t"
16234         "adcs	r5, r5, r8\n\t"
16235         "adc	r3, r3, #0\n\t"
16236         "adds	r9, r9, r9\n\t"
16237         "adcs	r10, r10, r10\n\t"
16238         "adc	r11, r11, r11\n\t"
16239         "adds	r4, r4, r9\n\t"
16240         "adcs	r5, r5, r10\n\t"
16241         "adc	r3, r3, r11\n\t"
16242         "str	r4, [%[r], #40]\n\t"
16243         "mov	r4, #0\n\t"
16244         /* A[4] * A[7] */
16245         "ldr	r6, [%[a], #16]\n\t"
16246         "ldr	r8, [%[a], #28]\n\t"
16247         "umull	r6, r8, r6, r8\n\t"
16248         "adds	r5, r5, r6\n\t"
16249         "adcs 	r3, r3, r8\n\t"
16250         "adc	r4, r4, #0\n\t"
16251         "adds	r5, r5, r6\n\t"
16252         "adcs 	r3, r3, r8\n\t"
16253         "adc	r4, r4, #0\n\t"
16254         /* A[5] * A[6] */
16255         "ldr	r6, [%[a], #20]\n\t"
16256         "ldr	r8, [%[a], #24]\n\t"
16257         "umull	r6, r8, r6, r8\n\t"
16258         "adds	r5, r5, r6\n\t"
16259         "adcs 	r3, r3, r8\n\t"
16260         "adc	r4, r4, #0\n\t"
16261         "adds	r5, r5, r6\n\t"
16262         "adcs 	r3, r3, r8\n\t"
16263         "adc	r4, r4, #0\n\t"
16264         "str	r5, [%[r], #44]\n\t"
16265         "mov	r5, #0\n\t"
16266         /* A[5] * A[7] */
16267         "ldr	r6, [%[a], #20]\n\t"
16268         "ldr	r8, [%[a], #28]\n\t"
16269         "umull	r6, r8, r6, r8\n\t"
16270         "adds	r3, r3, r6\n\t"
16271         "adcs 	r4, r4, r8\n\t"
16272         "adc	r5, r5, #0\n\t"
16273         "adds	r3, r3, r6\n\t"
16274         "adcs 	r4, r4, r8\n\t"
16275         "adc	r5, r5, #0\n\t"
16276         /* A[6] * A[6] */
16277         "ldr	r6, [%[a], #24]\n\t"
16278         "umull	r6, r8, r6, r6\n\t"
16279         "adds	r3, r3, r6\n\t"
16280         "adcs	r4, r4, r8\n\t"
16281         "adc	r5, r5, #0\n\t"
16282         "str	r3, [%[r], #48]\n\t"
16283         "mov	r3, #0\n\t"
16284         /* A[6] * A[7] */
16285         "ldr	r6, [%[a], #24]\n\t"
16286         "ldr	r8, [%[a], #28]\n\t"
16287         "umull	r6, r8, r6, r8\n\t"
16288         "adds	r4, r4, r6\n\t"
16289         "adcs 	r5, r5, r8\n\t"
16290         "adc	r3, r3, #0\n\t"
16291         "adds	r4, r4, r6\n\t"
16292         "adcs 	r5, r5, r8\n\t"
16293         "adc	r3, r3, #0\n\t"
16294         "str	r4, [%[r], #52]\n\t"
16295         "mov	r4, #0\n\t"
16296         /* A[7] * A[7] */
16297         "ldr	r6, [%[a], #28]\n\t"
16298         "umull	r6, r8, r6, r6\n\t"
16299         "adds	r5, r5, r6\n\t"
16300         "adc	r3, r3, r8\n\t"
16301         "str	r5, [%[r], #56]\n\t"
16302         "str	r3, [%[r], #60]\n\t"
16303         /* Transfer tmp to r */
16304         "ldr	r3, [%[tmp], #0]\n\t"
16305         "ldr	r4, [%[tmp], #4]\n\t"
16306         "ldr	r5, [%[tmp], #8]\n\t"
16307         "ldr	r6, [%[tmp], #12]\n\t"
16308         "str	r3, [%[r], #0]\n\t"
16309         "str	r4, [%[r], #4]\n\t"
16310         "str	r5, [%[r], #8]\n\t"
16311         "str	r6, [%[r], #12]\n\t"
16312         "ldr	r3, [%[tmp], #16]\n\t"
16313         "ldr	r4, [%[tmp], #20]\n\t"
16314         "ldr	r5, [%[tmp], #24]\n\t"
16315         "ldr	r6, [%[tmp], #28]\n\t"
16316         "str	r3, [%[r], #16]\n\t"
16317         "str	r4, [%[r], #20]\n\t"
16318         "str	r5, [%[r], #24]\n\t"
16319         "str	r6, [%[r], #28]\n\t"
16320         :
16321         : [r] "r" (r), [a] "r" (a), [tmp] "r" (tmp)
16322         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11"
16323     );
16324 }
16325 
16326 #ifdef WOLFSSL_SP_SMALL
16327 /* Add b to a into r. (r = a + b)
16328  *
16329  * r  A single precision integer.
16330  * a  A single precision integer.
16331  * b  A single precision integer.
16332  */
sp_256_add_8(sp_digit * r,const sp_digit * a,const sp_digit * b)16333 SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
16334         const sp_digit* b)
16335 {
16336     sp_digit c = 0;
16337 
16338     __asm__ __volatile__ (
16339         "mov	r6, %[a]\n\t"
16340         "mov	r8, #0\n\t"
16341         "add	r6, r6, #32\n\t"
16342         "sub	r8, r8, #1\n\t"
16343         "\n1:\n\t"
16344         "adds	%[c], %[c], r8\n\t"
16345         "ldr	r4, [%[a]]\n\t"
16346         "ldr	r5, [%[b]]\n\t"
16347         "adcs	r4, r4, r5\n\t"
16348         "str	r4, [%[r]]\n\t"
16349         "mov	%[c], #0\n\t"
16350         "adc	%[c], %[c], %[c]\n\t"
16351         "add	%[a], %[a], #4\n\t"
16352         "add	%[b], %[b], #4\n\t"
16353         "add	%[r], %[r], #4\n\t"
16354         "cmp	%[a], r6\n\t"
16355 #ifdef __GNUC__
16356         "bne	1b\n\t"
16357 #else
16358         "bne.n	1b\n\t"
16359 #endif /* __GNUC__ */
16360         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
16361         :
16362         : "memory", "r4", "r5", "r6", "r8"
16363     );
16364 
16365     return c;
16366 }
16367 
16368 #else
16369 /* Add b to a into r. (r = a + b)
16370  *
16371  * r  A single precision integer.
16372  * a  A single precision integer.
16373  * b  A single precision integer.
16374  */
sp_256_add_8(sp_digit * r,const sp_digit * a,const sp_digit * b)16375 SP_NOINLINE static sp_digit sp_256_add_8(sp_digit* r, const sp_digit* a,
16376         const sp_digit* b)
16377 {
16378     sp_digit c = 0;
16379 
16380     __asm__ __volatile__ (
16381         "ldm	%[a]!, {r4, r5}\n\t"
16382         "ldm	%[b]!, {r6, r8}\n\t"
16383         "adds	r4, r4, r6\n\t"
16384         "adcs	r5, r5, r8\n\t"
16385         "stm	%[r]!, {r4, r5}\n\t"
16386         "ldm	%[a]!, {r4, r5}\n\t"
16387         "ldm	%[b]!, {r6, r8}\n\t"
16388         "adcs	r4, r4, r6\n\t"
16389         "adcs	r5, r5, r8\n\t"
16390         "stm	%[r]!, {r4, r5}\n\t"
16391         "ldm	%[a]!, {r4, r5}\n\t"
16392         "ldm	%[b]!, {r6, r8}\n\t"
16393         "adcs	r4, r4, r6\n\t"
16394         "adcs	r5, r5, r8\n\t"
16395         "stm	%[r]!, {r4, r5}\n\t"
16396         "ldm	%[a]!, {r4, r5}\n\t"
16397         "ldm	%[b]!, {r6, r8}\n\t"
16398         "adcs	r4, r4, r6\n\t"
16399         "adcs	r5, r5, r8\n\t"
16400         "stm	%[r]!, {r4, r5}\n\t"
16401         "mov	%[c], #0\n\t"
16402         "adc	%[c], %[c], %[c]\n\t"
16403         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
16404         :
16405         : "memory", "r4", "r5", "r6", "r8"
16406     );
16407 
16408     return c;
16409 }
16410 
16411 #endif /* WOLFSSL_SP_SMALL */
16412 #ifdef WOLFSSL_SP_SMALL
16413 /* Sub b from a into r. (r = a - b)
16414  *
16415  * r  A single precision integer.
16416  * a  A single precision integer.
16417  * b  A single precision integer.
16418  */
sp_256_sub_8(sp_digit * r,const sp_digit * a,const sp_digit * b)16419 SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
16420         const sp_digit* b)
16421 {
16422     sp_digit c = 0;
16423 
16424     __asm__ __volatile__ (
16425         "mov	r6, %[a]\n\t"
16426         "add	r6, r6, #32\n\t"
16427         "\n1:\n\t"
16428         "mov	r5, #0\n\t"
16429         "subs	r5, r5, %[c]\n\t"
16430         "ldr	r4, [%[a]]\n\t"
16431         "ldr	r5, [%[b]]\n\t"
16432         "sbcs	r4, r4, r5\n\t"
16433         "str	r4, [%[r]]\n\t"
16434         "sbc	%[c], %[c], %[c]\n\t"
16435         "add	%[a], %[a], #4\n\t"
16436         "add	%[b], %[b], #4\n\t"
16437         "add	%[r], %[r], #4\n\t"
16438         "cmp	%[a], r6\n\t"
16439 #ifdef __GNUC__
16440         "bne	1b\n\t"
16441 #else
16442         "bne.n	1b\n\t"
16443 #endif /* __GNUC__ */
16444         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
16445         :
16446         : "memory", "r4", "r5", "r6"
16447     );
16448 
16449     return c;
16450 }
16451 
16452 #else
16453 /* Sub b from a into r. (r = a - b)
16454  *
16455  * r  A single precision integer.
16456  * a  A single precision integer.
16457  * b  A single precision integer.
16458  */
sp_256_sub_8(sp_digit * r,const sp_digit * a,const sp_digit * b)16459 SP_NOINLINE static sp_digit sp_256_sub_8(sp_digit* r, const sp_digit* a,
16460         const sp_digit* b)
16461 {
16462     sp_digit c = 0;
16463 
16464     __asm__ __volatile__ (
16465         "ldr	r4, [%[a], #0]\n\t"
16466         "ldr	r5, [%[a], #4]\n\t"
16467         "ldr	r6, [%[b], #0]\n\t"
16468         "ldr	r8, [%[b], #4]\n\t"
16469         "subs	r4, r4, r6\n\t"
16470         "sbcs	r5, r5, r8\n\t"
16471         "str	r4, [%[r], #0]\n\t"
16472         "str	r5, [%[r], #4]\n\t"
16473         "ldr	r4, [%[a], #8]\n\t"
16474         "ldr	r5, [%[a], #12]\n\t"
16475         "ldr	r6, [%[b], #8]\n\t"
16476         "ldr	r8, [%[b], #12]\n\t"
16477         "sbcs	r4, r4, r6\n\t"
16478         "sbcs	r5, r5, r8\n\t"
16479         "str	r4, [%[r], #8]\n\t"
16480         "str	r5, [%[r], #12]\n\t"
16481         "ldr	r4, [%[a], #16]\n\t"
16482         "ldr	r5, [%[a], #20]\n\t"
16483         "ldr	r6, [%[b], #16]\n\t"
16484         "ldr	r8, [%[b], #20]\n\t"
16485         "sbcs	r4, r4, r6\n\t"
16486         "sbcs	r5, r5, r8\n\t"
16487         "str	r4, [%[r], #16]\n\t"
16488         "str	r5, [%[r], #20]\n\t"
16489         "ldr	r4, [%[a], #24]\n\t"
16490         "ldr	r5, [%[a], #28]\n\t"
16491         "ldr	r6, [%[b], #24]\n\t"
16492         "ldr	r8, [%[b], #28]\n\t"
16493         "sbcs	r4, r4, r6\n\t"
16494         "sbcs	r5, r5, r8\n\t"
16495         "str	r4, [%[r], #24]\n\t"
16496         "str	r5, [%[r], #28]\n\t"
16497         "sbc	%[c], %[c], %[c]\n\t"
16498         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
16499         :
16500         : "memory", "r4", "r5", "r6", "r8"
16501     );
16502 
16503     return c;
16504 }
16505 
16506 #endif /* WOLFSSL_SP_SMALL */
16507 /* Multiply a number by Montgomery normalizer mod modulus (prime).
16508  *
16509  * r  The resulting Montgomery form number.
16510  * a  The number to convert.
16511  * m  The modulus (prime).
16512  */
sp_256_mod_mul_norm_8(sp_digit * r,const sp_digit * a,const sp_digit * m)16513 static int sp_256_mod_mul_norm_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
16514 {
16515    (void)m;
16516 
16517     __asm__ __volatile__ (
16518         "sub   sp, sp, #24\n\t"
16519         "ldr r2, [%[a], #0]\n\t"
16520         "ldr r3, [%[a], #4]\n\t"
16521         "ldr r4, [%[a], #8]\n\t"
16522         "ldr r5, [%[a], #12]\n\t"
16523         "ldr r6, [%[a], #16]\n\t"
16524         "ldr r8, [%[a], #20]\n\t"
16525         "ldr r9, [%[a], #24]\n\t"
16526         "ldr r10, [%[a], #28]\n\t"
16527         /* Clear overflow and underflow */
16528         "mov   r14, #0\n\t"
16529         "mov   r12, #0\n\t"
16530         /* t[0] =  1  1  0 -1 -1 -1 -1  0 */
16531         "adds  r11, r2, r3\n\t"
16532         "adc   r14, r14, #0\n\t"
16533         "subs    r11, r11, r5\n\t"
16534         "sbc     r12, r12, #0\n\t"
16535         "subs    r11, r11, r6\n\t"
16536         "sbc     r12, r12, #0\n\t"
16537         "subs    r11, r11, r8\n\t"
16538         "sbc     r12, r12, #0\n\t"
16539         "subs    r11, r11, r9\n\t"
16540         "sbc     r12, r12, #0\n\t"
16541         /* Store t[0] */
16542         "str       r11, [sp, #0]\n\t"
16543         "neg       r12, r12\n\t"
16544         "mov       r11, #0\n\t"
16545         /* t[1] =  0  1  1  0 -1 -1 -1 -1 */
16546         "adds  r14, r14, r3\n\t"
16547         "adc   r11, r11, #0\n\t"
16548         "adds  r14, r14, r4\n\t"
16549         "adc   r11, r11, #0\n\t"
16550         "subs      r14, r14, r12\n\t"
16551         "mov       r12, #0\n\t"
16552         "sbc       r12, r12, #0\n\t"
16553         "subs    r14, r14, r6\n\t"
16554         "sbc     r12, r12, #0\n\t"
16555         "subs    r14, r14, r8\n\t"
16556         "sbc     r12, r12, #0\n\t"
16557         "subs    r14, r14, r9\n\t"
16558         "sbc     r12, r12, #0\n\t"
16559         "subs    r14, r14, r10\n\t"
16560         "sbc     r12, r12, #0\n\t"
16561         /* Store t[1] */
16562         "str       r14, [sp, #4]\n\t"
16563         "neg       r12, r12\n\t"
16564         "mov       r14, #0\n\t"
16565         /* t[2] =  0  0  1  1  0 -1 -1 -1 */
16566         "adds  r11, r11, r4\n\t"
16567         "adc   r14, r14, #0\n\t"
16568         "adds  r11, r11, r5\n\t"
16569         "adc   r14, r14, #0\n\t"
16570         "subs      r11, r11, r12\n\t"
16571         "mov       r12, #0\n\t"
16572         "sbc       r12, r12, #0\n\t"
16573         "subs    r11, r11, r8\n\t"
16574         "sbc     r12, r12, #0\n\t"
16575         "subs    r11, r11, r9\n\t"
16576         "sbc     r12, r12, #0\n\t"
16577         "subs    r11, r11, r10\n\t"
16578         "sbc     r12, r12, #0\n\t"
16579         /* Store t[2] */
16580         "str       r11, [sp, #8]\n\t"
16581         "neg       r12, r12\n\t"
16582         "mov       r11, #0\n\t"
16583         /* t[3] = -1 -1  0  2  2  1  0 -1 */
16584         "adds  r14, r14, r5\n\t"
16585         "adc   r11, r11, #0\n\t"
16586         "adds  r14, r14, r5\n\t"
16587         "adc   r11, r11, #0\n\t"
16588         "adds  r14, r14, r6\n\t"
16589         "adc   r11, r11, #0\n\t"
16590         "adds  r14, r14, r6\n\t"
16591         "adc   r11, r11, #0\n\t"
16592         "adds  r14, r14, r8\n\t"
16593         "adc   r11, r11, #0\n\t"
16594         "subs      r14, r14, r12\n\t"
16595         "mov       r12, #0\n\t"
16596         "sbc       r12, r12, #0\n\t"
16597         "subs    r14, r14, r2\n\t"
16598         "sbc     r12, r12, #0\n\t"
16599         "subs    r14, r14, r3\n\t"
16600         "sbc     r12, r12, #0\n\t"
16601         "subs    r14, r14, r10\n\t"
16602         "sbc     r12, r12, #0\n\t"
16603         /* Store t[3] */
16604         "str       r14, [sp, #12]\n\t"
16605         "neg       r12, r12\n\t"
16606         "mov       r14, #0\n\t"
16607         /* t[4] =  0 -1 -1  0  2  2  1  0 */
16608         "adds  r11, r11, r6\n\t"
16609         "adc   r14, r14, #0\n\t"
16610         "adds  r11, r11, r6\n\t"
16611         "adc   r14, r14, #0\n\t"
16612         "adds  r11, r11, r8\n\t"
16613         "adc   r14, r14, #0\n\t"
16614         "adds  r11, r11, r8\n\t"
16615         "adc   r14, r14, #0\n\t"
16616         "adds  r11, r11, r9\n\t"
16617         "adc   r14, r14, #0\n\t"
16618         "subs      r11, r11, r12\n\t"
16619         "mov       r12, #0\n\t"
16620         "sbc       r12, r12, #0\n\t"
16621         "subs    r11, r11, r3\n\t"
16622         "sbc     r12, r12, #0\n\t"
16623         "subs    r11, r11, r4\n\t"
16624         "sbc     r12, r12, #0\n\t"
16625         /* Store t[4] */
16626         "str       r11, [sp, #16]\n\t"
16627         "neg       r12, r12\n\t"
16628         "mov       r11, #0\n\t"
16629         /* t[5] =  0  0 -1 -1  0  2  2  1 */
16630         "adds  r14, r14, r8\n\t"
16631         "adc   r11, r11, #0\n\t"
16632         "adds  r14, r14, r8\n\t"
16633         "adc   r11, r11, #0\n\t"
16634         "adds  r14, r14, r9\n\t"
16635         "adc   r11, r11, #0\n\t"
16636         "adds  r14, r14, r9\n\t"
16637         "adc   r11, r11, #0\n\t"
16638         "adds  r14, r14, r10\n\t"
16639         "adc   r11, r11, #0\n\t"
16640         "subs      r14, r14, r12\n\t"
16641         "mov       r12, #0\n\t"
16642         "sbc       r12, r12, #0\n\t"
16643         "subs    r14, r14, r4\n\t"
16644         "sbc     r12, r12, #0\n\t"
16645         "subs    r14, r14, r5\n\t"
16646         "sbc     r12, r12, #0\n\t"
16647         /* Store t[5] */
16648         "str       r14, [sp, #20]\n\t"
16649         "neg       r12, r12\n\t"
16650         "mov       r14, #0\n\t"
16651         /* t[6] = -1 -1  0  0  0  1  3  2 */
16652         "adds  r11, r11, r8\n\t"
16653         "adc   r14, r14, #0\n\t"
16654         "adds  r11, r11, r9\n\t"
16655         "adc   r14, r14, #0\n\t"
16656         "adds  r11, r11, r9\n\t"
16657         "adc   r14, r14, #0\n\t"
16658         "adds  r11, r11, r9\n\t"
16659         "adc   r14, r14, #0\n\t"
16660         "adds  r11, r11, r10\n\t"
16661         "adc   r14, r14, #0\n\t"
16662         "adds  r11, r11, r10\n\t"
16663         "adc   r14, r14, #0\n\t"
16664         "subs      r11, r11, r12\n\t"
16665         "mov       r12, #0\n\t"
16666         "sbc       r12, r12, #0\n\t"
16667         "subs    r11, r11, r2\n\t"
16668         "sbc     r12, r12, #0\n\t"
16669         "subs    r11, r11, r3\n\t"
16670         "sbc     r12, r12, #0\n\t"
16671         /* Store t[6] */
16672         "mov       r9, r11\n\t"
16673         "neg       r12, r12\n\t"
16674         "mov       r11, #0\n\t"
16675         /* t[7] =  1  0 -1 -1 -1 -1  0  3 */
16676         "adds  r14, r14, r2\n\t"
16677         "adc   r11, r11, #0\n\t"
16678         "adds  r14, r14, r10\n\t"
16679         "adc   r11, r11, #0\n\t"
16680         "adds  r14, r14, r10\n\t"
16681         "adc   r11, r11, #0\n\t"
16682         "adds  r14, r14, r10\n\t"
16683         "adc   r11, r11, #0\n\t"
16684         "subs      r14, r14, r12\n\t"
16685         "mov       r12, #0\n\t"
16686         "sbc       r12, r12, #0\n\t"
16687         "subs    r14, r14, r4\n\t"
16688         "sbc     r12, r12, #0\n\t"
16689         "subs    r14, r14, r5\n\t"
16690         "sbc     r12, r12, #0\n\t"
16691         "subs    r14, r14, r6\n\t"
16692         "sbc     r12, r12, #0\n\t"
16693         "subs    r14, r14, r8\n\t"
16694         "sbc     r12, r12, #0\n\t"
16695         /* Store t[7] */
16696         /* Load intermediate */
16697         "ldr r2, [sp, #0]\n\t"
16698         "ldr r3, [sp, #4]\n\t"
16699         "ldr r4, [sp, #8]\n\t"
16700         "ldr r5, [sp, #12]\n\t"
16701         "ldr r6, [sp, #16]\n\t"
16702         "ldr r8, [sp, #20]\n\t"
16703         "neg   r12, r12\n\t"
16704         /* Add overflow */
16705         /* Subtract underflow - add neg underflow */
16706         "adds  r2, r2, r11\n\t"
16707         "adcs  r3, r3, #0\n\t"
16708         "adcs  r4, r4, #0\n\t"
16709         "adds  r5, r5, r12\n\t"
16710         "adcs  r6, r6, #0\n\t"
16711         "adcs  r8, r8, #0\n\t"
16712         "adcs  r9, r9, r12\n\t"
16713         "adc   r14, r14, r11\n\t"
16714         /* Subtract overflow */
16715         /* Add underflow - subtract neg underflow */
16716         "subs  r2, r2, r12\n\t"
16717         "sbcs  r3, r3, #0\n\t"
16718         "sbcs  r4, r4, #0\n\t"
16719         "subs  r5, r5, r11\n\t"
16720         "sbcs  r6, r6, #0\n\t"
16721         "sbcs  r8, r8, #0\n\t"
16722         "sbcs  r9, r9, r11\n\t"
16723         "sbc   r14, r14, r12\n\t"
16724         /* Store result */
16725         "str r2, [%[r], #0]\n\t"
16726         "str r3, [%[r], #4]\n\t"
16727         "str r4, [%[r], #8]\n\t"
16728         "str r5, [%[r], #12]\n\t"
16729         "str r6, [%[r], #16]\n\t"
16730         "str r8, [%[r], #20]\n\t"
16731         "str r9, [%[r], #24]\n\t"
16732         "str r14, [%[r], #28]\n\t"
16733         "add   sp, sp, #24\n\t"
16734         :
16735         : [r] "r" (r), [a] "r" (a)
16736         : "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14", "r12"
16737     );
16738 
16739     return MP_OKAY;
16740 }
16741 
16742 /* Convert an mp_int to an array of sp_digit.
16743  *
16744  * r  A single precision integer.
16745  * size  Maximum number of bytes to convert
16746  * a  A multi-precision integer.
16747  */
sp_256_from_mp(sp_digit * r,int size,const mp_int * a)16748 static void sp_256_from_mp(sp_digit* r, int size, const mp_int* a)
16749 {
16750 #if DIGIT_BIT == 32
16751     int j;
16752 
16753     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
16754 
16755     for (j = a->used; j < size; j++) {
16756         r[j] = 0;
16757     }
16758 #elif DIGIT_BIT > 32
16759     int i;
16760     int j = 0;
16761     word32 s = 0;
16762 
16763     r[0] = 0;
16764     for (i = 0; i < a->used && j < size; i++) {
16765         r[j] |= ((sp_digit)a->dp[i] << s);
16766         r[j] &= 0xffffffff;
16767         s = 32U - s;
16768         if (j + 1 >= size) {
16769             break;
16770         }
16771         /* lint allow cast of mismatch word32 and mp_digit */
16772         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
16773         while ((s + 32U) <= (word32)DIGIT_BIT) {
16774             s += 32U;
16775             r[j] &= 0xffffffff;
16776             if (j + 1 >= size) {
16777                 break;
16778             }
16779             if (s < (word32)DIGIT_BIT) {
16780                 /* lint allow cast of mismatch word32 and mp_digit */
16781                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
16782             }
16783             else {
16784                 r[++j] = (sp_digit)0;
16785             }
16786         }
16787         s = (word32)DIGIT_BIT - s;
16788     }
16789 
16790     for (j++; j < size; j++) {
16791         r[j] = 0;
16792     }
16793 #else
16794     int i;
16795     int j = 0;
16796     int s = 0;
16797 
16798     r[0] = 0;
16799     for (i = 0; i < a->used && j < size; i++) {
16800         r[j] |= ((sp_digit)a->dp[i]) << s;
16801         if (s + DIGIT_BIT >= 32) {
16802             r[j] &= 0xffffffff;
16803             if (j + 1 >= size) {
16804                 break;
16805             }
16806             s = 32 - s;
16807             if (s == DIGIT_BIT) {
16808                 r[++j] = 0;
16809                 s = 0;
16810             }
16811             else {
16812                 r[++j] = a->dp[i] >> s;
16813                 s = DIGIT_BIT - s;
16814             }
16815         }
16816         else {
16817             s += DIGIT_BIT;
16818         }
16819     }
16820 
16821     for (j++; j < size; j++) {
16822         r[j] = 0;
16823     }
16824 #endif
16825 }
16826 
16827 /* Convert a point of type ecc_point to type sp_point_256.
16828  *
16829  * p   Point of type sp_point_256 (result).
16830  * pm  Point of type ecc_point.
16831  */
sp_256_point_from_ecc_point_8(sp_point_256 * p,const ecc_point * pm)16832 static void sp_256_point_from_ecc_point_8(sp_point_256* p,
16833         const ecc_point* pm)
16834 {
16835     XMEMSET(p->x, 0, sizeof(p->x));
16836     XMEMSET(p->y, 0, sizeof(p->y));
16837     XMEMSET(p->z, 0, sizeof(p->z));
16838     sp_256_from_mp(p->x, 8, pm->x);
16839     sp_256_from_mp(p->y, 8, pm->y);
16840     sp_256_from_mp(p->z, 8, pm->z);
16841     p->infinity = 0;
16842 }
16843 
16844 /* Convert an array of sp_digit to an mp_int.
16845  *
16846  * a  A single precision integer.
16847  * r  A multi-precision integer.
16848  */
sp_256_to_mp(const sp_digit * a,mp_int * r)16849 static int sp_256_to_mp(const sp_digit* a, mp_int* r)
16850 {
16851     int err;
16852 
16853     err = mp_grow(r, (256 + DIGIT_BIT - 1) / DIGIT_BIT);
16854     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
16855 #if DIGIT_BIT == 32
16856         XMEMCPY(r->dp, a, sizeof(sp_digit) * 8);
16857         r->used = 8;
16858         mp_clamp(r);
16859 #elif DIGIT_BIT < 32
16860         int i;
16861         int j = 0;
16862         int s = 0;
16863 
16864         r->dp[0] = 0;
16865         for (i = 0; i < 8; i++) {
16866             r->dp[j] |= (mp_digit)(a[i] << s);
16867             r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
16868             s = DIGIT_BIT - s;
16869             r->dp[++j] = (mp_digit)(a[i] >> s);
16870             while (s + DIGIT_BIT <= 32) {
16871                 s += DIGIT_BIT;
16872                 r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1;
16873                 if (s == SP_WORD_SIZE) {
16874                     r->dp[j] = 0;
16875                 }
16876                 else {
16877                     r->dp[j] = (mp_digit)(a[i] >> s);
16878                 }
16879             }
16880             s = 32 - s;
16881         }
16882         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
16883         mp_clamp(r);
16884 #else
16885         int i;
16886         int j = 0;
16887         int s = 0;
16888 
16889         r->dp[0] = 0;
16890         for (i = 0; i < 8; i++) {
16891             r->dp[j] |= ((mp_digit)a[i]) << s;
16892             if (s + 32 >= DIGIT_BIT) {
16893     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
16894                 r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
16895     #endif
16896                 s = DIGIT_BIT - s;
16897                 r->dp[++j] = a[i] >> s;
16898                 s = 32 - s;
16899             }
16900             else {
16901                 s += 32;
16902             }
16903         }
16904         r->used = (256 + DIGIT_BIT - 1) / DIGIT_BIT;
16905         mp_clamp(r);
16906 #endif
16907     }
16908 
16909     return err;
16910 }
16911 
16912 /* Convert a point of type sp_point_256 to type ecc_point.
16913  *
16914  * p   Point of type sp_point_256.
16915  * pm  Point of type ecc_point (result).
16916  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
16917  * MP_OKAY.
16918  */
sp_256_point_to_ecc_point_8(const sp_point_256 * p,ecc_point * pm)16919 static int sp_256_point_to_ecc_point_8(const sp_point_256* p, ecc_point* pm)
16920 {
16921     int err;
16922 
16923     err = sp_256_to_mp(p->x, pm->x);
16924     if (err == MP_OKAY) {
16925         err = sp_256_to_mp(p->y, pm->y);
16926     }
16927     if (err == MP_OKAY) {
16928         err = sp_256_to_mp(p->z, pm->z);
16929     }
16930 
16931     return err;
16932 }
16933 
16934 /* Multiply two Montgomery form numbers mod the modulus (prime).
16935  * (r = a * b mod m)
16936  *
16937  * r   Result of multiplication.
16938  * a   First number to multiply in Montgomery form.
16939  * b   Second number to multiply in Montgomery form.
16940  * m   Modulus (prime).
16941  * mp  Montgomery mulitplier.
16942  */
sp_256_mont_mul_8(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)16943 SP_NOINLINE static void sp_256_mont_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
16944         const sp_digit* m, sp_digit mp)
16945 {
16946     (void)mp;
16947     (void)m;
16948 
16949     __asm__ __volatile__ (
16950         "sub   sp, sp, #68\n\t"
16951         "mov   r5, #0\n\t"
16952         /*  A[0] * B[0] */
16953         "ldr	r6, [%[a], #0]\n\t"
16954         "ldr	r8, [%[b], #0]\n\t"
16955         "umull	r9, r10, r6, r8\n\t"
16956         "str r9, [sp, #0]\n\t"
16957         /*  A[0] * B[1] */
16958         "ldr	r6, [%[a], #0]\n\t"
16959         "ldr	r8, [%[b], #4]\n\t"
16960         "umull	r3, r4, r6, r8\n\t"
16961         "adds	r10, r3, r10\n\t"
16962         "adc	r11, r4, #0\n\t"
16963         /*  A[1] * B[0] */
16964         "ldr	r6, [%[a], #4]\n\t"
16965         "ldr	r8, [%[b], #0]\n\t"
16966         "umull	r3, r4, r6, r8\n\t"
16967         "adds	r10, r3, r10\n\t"
16968         "adcs	r11, r4, r11\n\t"
16969         "adc	r14, r5, #0\n\t"
16970         "str r10, [sp, #4]\n\t"
16971         /*  A[0] * B[2] */
16972         "ldr	r6, [%[a], #0]\n\t"
16973         "ldr	r8, [%[b], #8]\n\t"
16974         "umull	r3, r4, r6, r8\n\t"
16975         "adds	r11, r3, r11\n\t"
16976         "adc	r14, r4, r14\n\t"
16977         /*  A[1] * B[1] */
16978         "ldr	r6, [%[a], #4]\n\t"
16979         "ldr	r8, [%[b], #4]\n\t"
16980         "umull	r3, r4, r6, r8\n\t"
16981         "adds	r11, r3, r11\n\t"
16982         "adcs	r14, r4, r14\n\t"
16983         "adc	r9, r5, #0\n\t"
16984         /*  A[2] * B[0] */
16985         "ldr	r6, [%[a], #8]\n\t"
16986         "ldr	r8, [%[b], #0]\n\t"
16987         "umull	r3, r4, r6, r8\n\t"
16988         "adds	r11, r3, r11\n\t"
16989         "adcs	r14, r4, r14\n\t"
16990         "adc	r9, r5, r9\n\t"
16991         "str r11, [sp, #8]\n\t"
16992         /*  A[0] * B[3] */
16993         "ldr	r6, [%[a], #0]\n\t"
16994         "ldr	r8, [%[b], #12]\n\t"
16995         "umull	r3, r4, r6, r8\n\t"
16996         "adds	r14, r3, r14\n\t"
16997         "adcs	r9, r4, r9\n\t"
16998         "adc	r10, r5, #0\n\t"
16999         /*  A[1] * B[2] */
17000         "ldr	r6, [%[a], #4]\n\t"
17001         "ldr	r8, [%[b], #8]\n\t"
17002         "umull	r3, r4, r6, r8\n\t"
17003         "adds	r14, r3, r14\n\t"
17004         "adcs	r9, r4, r9\n\t"
17005         "adc	r10, r5, r10\n\t"
17006         /*  A[2] * B[1] */
17007         "ldr	r6, [%[a], #8]\n\t"
17008         "ldr	r8, [%[b], #4]\n\t"
17009         "umull	r3, r4, r6, r8\n\t"
17010         "adds	r14, r3, r14\n\t"
17011         "adcs	r9, r4, r9\n\t"
17012         "adc	r10, r5, r10\n\t"
17013         /*  A[3] * B[0] */
17014         "ldr	r6, [%[a], #12]\n\t"
17015         "ldr	r8, [%[b], #0]\n\t"
17016         "umull	r3, r4, r6, r8\n\t"
17017         "adds	r14, r3, r14\n\t"
17018         "adcs	r9, r4, r9\n\t"
17019         "adc	r10, r5, r10\n\t"
17020         "str r14, [sp, #12]\n\t"
17021         /*  A[0] * B[4] */
17022         "ldr	r6, [%[a], #0]\n\t"
17023         "ldr	r8, [%[b], #16]\n\t"
17024         "umull	r3, r4, r6, r8\n\t"
17025         "adds	r9, r3, r9\n\t"
17026         "adcs	r10, r4, r10\n\t"
17027         "adc	r11, r5, #0\n\t"
17028         /*  A[1] * B[3] */
17029         "ldr	r6, [%[a], #4]\n\t"
17030         "ldr	r8, [%[b], #12]\n\t"
17031         "umull	r3, r4, r6, r8\n\t"
17032         "adds	r9, r3, r9\n\t"
17033         "adcs	r10, r4, r10\n\t"
17034         "adc	r11, r5, r11\n\t"
17035         /*  A[2] * B[2] */
17036         "ldr	r6, [%[a], #8]\n\t"
17037         "ldr	r8, [%[b], #8]\n\t"
17038         "umull	r3, r4, r6, r8\n\t"
17039         "adds	r9, r3, r9\n\t"
17040         "adcs	r10, r4, r10\n\t"
17041         "adc	r11, r5, r11\n\t"
17042         /*  A[3] * B[1] */
17043         "ldr	r6, [%[a], #12]\n\t"
17044         "ldr	r8, [%[b], #4]\n\t"
17045         "umull	r3, r4, r6, r8\n\t"
17046         "adds	r9, r3, r9\n\t"
17047         "adcs	r10, r4, r10\n\t"
17048         "adc	r11, r5, r11\n\t"
17049         /*  A[4] * B[0] */
17050         "ldr	r6, [%[a], #16]\n\t"
17051         "ldr	r8, [%[b], #0]\n\t"
17052         "umull	r3, r4, r6, r8\n\t"
17053         "adds	r9, r3, r9\n\t"
17054         "adcs	r10, r4, r10\n\t"
17055         "adc	r11, r5, r11\n\t"
17056         "str r9, [sp, #16]\n\t"
17057         /*  A[0] * B[5] */
17058         "ldr	r6, [%[a], #0]\n\t"
17059         "ldr	r8, [%[b], #20]\n\t"
17060         "umull	r3, r4, r6, r8\n\t"
17061         "adds	r10, r3, r10\n\t"
17062         "adcs	r11, r4, r11\n\t"
17063         "adc	r14, r5, #0\n\t"
17064         /*  A[1] * B[4] */
17065         "ldr	r6, [%[a], #4]\n\t"
17066         "ldr	r8, [%[b], #16]\n\t"
17067         "umull	r3, r4, r6, r8\n\t"
17068         "adds	r10, r3, r10\n\t"
17069         "adcs	r11, r4, r11\n\t"
17070         "adc	r14, r5, r14\n\t"
17071         /*  A[2] * B[3] */
17072         "ldr	r6, [%[a], #8]\n\t"
17073         "ldr	r8, [%[b], #12]\n\t"
17074         "umull	r3, r4, r6, r8\n\t"
17075         "adds	r10, r3, r10\n\t"
17076         "adcs	r11, r4, r11\n\t"
17077         "adc	r14, r5, r14\n\t"
17078         /*  A[3] * B[2] */
17079         "ldr	r6, [%[a], #12]\n\t"
17080         "ldr	r8, [%[b], #8]\n\t"
17081         "umull	r3, r4, r6, r8\n\t"
17082         "adds	r10, r3, r10\n\t"
17083         "adcs	r11, r4, r11\n\t"
17084         "adc	r14, r5, r14\n\t"
17085         /*  A[4] * B[1] */
17086         "ldr	r6, [%[a], #16]\n\t"
17087         "ldr	r8, [%[b], #4]\n\t"
17088         "umull	r3, r4, r6, r8\n\t"
17089         "adds	r10, r3, r10\n\t"
17090         "adcs	r11, r4, r11\n\t"
17091         "adc	r14, r5, r14\n\t"
17092         /*  A[5] * B[0] */
17093         "ldr	r6, [%[a], #20]\n\t"
17094         "ldr	r8, [%[b], #0]\n\t"
17095         "umull	r3, r4, r6, r8\n\t"
17096         "adds	r10, r3, r10\n\t"
17097         "adcs	r11, r4, r11\n\t"
17098         "adc	r14, r5, r14\n\t"
17099         "str r10, [sp, #20]\n\t"
17100         /*  A[0] * B[6] */
17101         "ldr	r6, [%[a], #0]\n\t"
17102         "ldr	r8, [%[b], #24]\n\t"
17103         "umull	r3, r4, r6, r8\n\t"
17104         "adds	r11, r3, r11\n\t"
17105         "adcs	r14, r4, r14\n\t"
17106         "adc	r9, r5, #0\n\t"
17107         /*  A[1] * B[5] */
17108         "ldr	r6, [%[a], #4]\n\t"
17109         "ldr	r8, [%[b], #20]\n\t"
17110         "umull	r3, r4, r6, r8\n\t"
17111         "adds	r11, r3, r11\n\t"
17112         "adcs	r14, r4, r14\n\t"
17113         "adc	r9, r5, r9\n\t"
17114         /*  A[2] * B[4] */
17115         "ldr	r6, [%[a], #8]\n\t"
17116         "ldr	r8, [%[b], #16]\n\t"
17117         "umull	r3, r4, r6, r8\n\t"
17118         "adds	r11, r3, r11\n\t"
17119         "adcs	r14, r4, r14\n\t"
17120         "adc	r9, r5, r9\n\t"
17121         /*  A[3] * B[3] */
17122         "ldr	r6, [%[a], #12]\n\t"
17123         "ldr	r8, [%[b], #12]\n\t"
17124         "umull	r3, r4, r6, r8\n\t"
17125         "adds	r11, r3, r11\n\t"
17126         "adcs	r14, r4, r14\n\t"
17127         "adc	r9, r5, r9\n\t"
17128         /*  A[4] * B[2] */
17129         "ldr	r6, [%[a], #16]\n\t"
17130         "ldr	r8, [%[b], #8]\n\t"
17131         "umull	r3, r4, r6, r8\n\t"
17132         "adds	r11, r3, r11\n\t"
17133         "adcs	r14, r4, r14\n\t"
17134         "adc	r9, r5, r9\n\t"
17135         /*  A[5] * B[1] */
17136         "ldr	r6, [%[a], #20]\n\t"
17137         "ldr	r8, [%[b], #4]\n\t"
17138         "umull	r3, r4, r6, r8\n\t"
17139         "adds	r11, r3, r11\n\t"
17140         "adcs	r14, r4, r14\n\t"
17141         "adc	r9, r5, r9\n\t"
17142         /*  A[6] * B[0] */
17143         "ldr	r6, [%[a], #24]\n\t"
17144         "ldr	r8, [%[b], #0]\n\t"
17145         "umull	r3, r4, r6, r8\n\t"
17146         "adds	r11, r3, r11\n\t"
17147         "adcs	r14, r4, r14\n\t"
17148         "adc	r9, r5, r9\n\t"
17149         "str r11, [sp, #24]\n\t"
17150         /*  A[0] * B[7] */
17151         "ldr	r6, [%[a], #0]\n\t"
17152         "ldr	r8, [%[b], #28]\n\t"
17153         "umull	r3, r4, r6, r8\n\t"
17154         "adds	r14, r3, r14\n\t"
17155         "adcs	r9, r4, r9\n\t"
17156         "adc	r10, r5, #0\n\t"
17157         /*  A[1] * B[6] */
17158         "ldr	r6, [%[a], #4]\n\t"
17159         "ldr	r8, [%[b], #24]\n\t"
17160         "umull	r3, r4, r6, r8\n\t"
17161         "adds	r14, r3, r14\n\t"
17162         "adcs	r9, r4, r9\n\t"
17163         "adc	r10, r5, r10\n\t"
17164         /*  A[2] * B[5] */
17165         "ldr	r6, [%[a], #8]\n\t"
17166         "ldr	r8, [%[b], #20]\n\t"
17167         "umull	r3, r4, r6, r8\n\t"
17168         "adds	r14, r3, r14\n\t"
17169         "adcs	r9, r4, r9\n\t"
17170         "adc	r10, r5, r10\n\t"
17171         /*  A[3] * B[4] */
17172         "ldr	r6, [%[a], #12]\n\t"
17173         "ldr	r8, [%[b], #16]\n\t"
17174         "umull	r3, r4, r6, r8\n\t"
17175         "adds	r14, r3, r14\n\t"
17176         "adcs	r9, r4, r9\n\t"
17177         "adc	r10, r5, r10\n\t"
17178         /*  A[4] * B[3] */
17179         "ldr	r6, [%[a], #16]\n\t"
17180         "ldr	r8, [%[b], #12]\n\t"
17181         "umull	r3, r4, r6, r8\n\t"
17182         "adds	r14, r3, r14\n\t"
17183         "adcs	r9, r4, r9\n\t"
17184         "adc	r10, r5, r10\n\t"
17185         /*  A[5] * B[2] */
17186         "ldr	r6, [%[a], #20]\n\t"
17187         "ldr	r8, [%[b], #8]\n\t"
17188         "umull	r3, r4, r6, r8\n\t"
17189         "adds	r14, r3, r14\n\t"
17190         "adcs	r9, r4, r9\n\t"
17191         "adc	r10, r5, r10\n\t"
17192         /*  A[6] * B[1] */
17193         "ldr	r6, [%[a], #24]\n\t"
17194         "ldr	r8, [%[b], #4]\n\t"
17195         "umull	r3, r4, r6, r8\n\t"
17196         "adds	r14, r3, r14\n\t"
17197         "adcs	r9, r4, r9\n\t"
17198         "adc	r10, r5, r10\n\t"
17199         /*  A[7] * B[0] */
17200         "ldr	r6, [%[a], #28]\n\t"
17201         "ldr	r8, [%[b], #0]\n\t"
17202         "umull	r3, r4, r6, r8\n\t"
17203         "adds	r14, r3, r14\n\t"
17204         "adcs	r9, r4, r9\n\t"
17205         "adc	r10, r5, r10\n\t"
17206         "str r14, [sp, #28]\n\t"
17207         /*  A[1] * B[7] */
17208         "ldr	r6, [%[a], #4]\n\t"
17209         "ldr	r8, [%[b], #28]\n\t"
17210         "umull	r3, r4, r6, r8\n\t"
17211         "adds	r9, r3, r9\n\t"
17212         "adcs	r10, r4, r10\n\t"
17213         "adc	r11, r5, #0\n\t"
17214         /*  A[2] * B[6] */
17215         "ldr	r6, [%[a], #8]\n\t"
17216         "ldr	r8, [%[b], #24]\n\t"
17217         "umull	r3, r4, r6, r8\n\t"
17218         "adds	r9, r3, r9\n\t"
17219         "adcs	r10, r4, r10\n\t"
17220         "adc	r11, r5, r11\n\t"
17221         /*  A[3] * B[5] */
17222         "ldr	r6, [%[a], #12]\n\t"
17223         "ldr	r8, [%[b], #20]\n\t"
17224         "umull	r3, r4, r6, r8\n\t"
17225         "adds	r9, r3, r9\n\t"
17226         "adcs	r10, r4, r10\n\t"
17227         "adc	r11, r5, r11\n\t"
17228         /*  A[4] * B[4] */
17229         "ldr	r6, [%[a], #16]\n\t"
17230         "ldr	r8, [%[b], #16]\n\t"
17231         "umull	r3, r4, r6, r8\n\t"
17232         "adds	r9, r3, r9\n\t"
17233         "adcs	r10, r4, r10\n\t"
17234         "adc	r11, r5, r11\n\t"
17235         /*  A[5] * B[3] */
17236         "ldr	r6, [%[a], #20]\n\t"
17237         "ldr	r8, [%[b], #12]\n\t"
17238         "umull	r3, r4, r6, r8\n\t"
17239         "adds	r9, r3, r9\n\t"
17240         "adcs	r10, r4, r10\n\t"
17241         "adc	r11, r5, r11\n\t"
17242         /*  A[6] * B[2] */
17243         "ldr	r6, [%[a], #24]\n\t"
17244         "ldr	r8, [%[b], #8]\n\t"
17245         "umull	r3, r4, r6, r8\n\t"
17246         "adds	r9, r3, r9\n\t"
17247         "adcs	r10, r4, r10\n\t"
17248         "adc	r11, r5, r11\n\t"
17249         /*  A[7] * B[1] */
17250         "ldr	r6, [%[a], #28]\n\t"
17251         "ldr	r8, [%[b], #4]\n\t"
17252         "umull	r3, r4, r6, r8\n\t"
17253         "adds	r9, r3, r9\n\t"
17254         "adcs	r10, r4, r10\n\t"
17255         "adc	r11, r5, r11\n\t"
17256         "str r9, [sp, #32]\n\t"
17257         /*  A[2] * B[7] */
17258         "ldr	r6, [%[a], #8]\n\t"
17259         "ldr	r8, [%[b], #28]\n\t"
17260         "umull	r3, r4, r6, r8\n\t"
17261         "adds	r10, r3, r10\n\t"
17262         "adcs	r11, r4, r11\n\t"
17263         "adc	r14, r5, #0\n\t"
17264         /*  A[3] * B[6] */
17265         "ldr	r6, [%[a], #12]\n\t"
17266         "ldr	r8, [%[b], #24]\n\t"
17267         "umull	r3, r4, r6, r8\n\t"
17268         "adds	r10, r3, r10\n\t"
17269         "adcs	r11, r4, r11\n\t"
17270         "adc	r14, r5, r14\n\t"
17271         /*  A[4] * B[5] */
17272         "ldr	r6, [%[a], #16]\n\t"
17273         "ldr	r8, [%[b], #20]\n\t"
17274         "umull	r3, r4, r6, r8\n\t"
17275         "adds	r10, r3, r10\n\t"
17276         "adcs	r11, r4, r11\n\t"
17277         "adc	r14, r5, r14\n\t"
17278         /*  A[5] * B[4] */
17279         "ldr	r6, [%[a], #20]\n\t"
17280         "ldr	r8, [%[b], #16]\n\t"
17281         "umull	r3, r4, r6, r8\n\t"
17282         "adds	r10, r3, r10\n\t"
17283         "adcs	r11, r4, r11\n\t"
17284         "adc	r14, r5, r14\n\t"
17285         /*  A[6] * B[3] */
17286         "ldr	r6, [%[a], #24]\n\t"
17287         "ldr	r8, [%[b], #12]\n\t"
17288         "umull	r3, r4, r6, r8\n\t"
17289         "adds	r10, r3, r10\n\t"
17290         "adcs	r11, r4, r11\n\t"
17291         "adc	r14, r5, r14\n\t"
17292         /*  A[7] * B[2] */
17293         "ldr	r6, [%[a], #28]\n\t"
17294         "ldr	r8, [%[b], #8]\n\t"
17295         "umull	r3, r4, r6, r8\n\t"
17296         "adds	r10, r3, r10\n\t"
17297         "adcs	r11, r4, r11\n\t"
17298         "adc	r14, r5, r14\n\t"
17299         "str r10, [sp, #36]\n\t"
17300         /*  A[3] * B[7] */
17301         "ldr	r6, [%[a], #12]\n\t"
17302         "ldr	r8, [%[b], #28]\n\t"
17303         "umull	r3, r4, r6, r8\n\t"
17304         "adds	r11, r3, r11\n\t"
17305         "adcs	r14, r4, r14\n\t"
17306         "adc	r9, r5, #0\n\t"
17307         /*  A[4] * B[6] */
17308         "ldr	r6, [%[a], #16]\n\t"
17309         "ldr	r8, [%[b], #24]\n\t"
17310         "umull	r3, r4, r6, r8\n\t"
17311         "adds	r11, r3, r11\n\t"
17312         "adcs	r14, r4, r14\n\t"
17313         "adc	r9, r5, r9\n\t"
17314         /*  A[5] * B[5] */
17315         "ldr	r6, [%[a], #20]\n\t"
17316         "ldr	r8, [%[b], #20]\n\t"
17317         "umull	r3, r4, r6, r8\n\t"
17318         "adds	r11, r3, r11\n\t"
17319         "adcs	r14, r4, r14\n\t"
17320         "adc	r9, r5, r9\n\t"
17321         /*  A[6] * B[4] */
17322         "ldr	r6, [%[a], #24]\n\t"
17323         "ldr	r8, [%[b], #16]\n\t"
17324         "umull	r3, r4, r6, r8\n\t"
17325         "adds	r11, r3, r11\n\t"
17326         "adcs	r14, r4, r14\n\t"
17327         "adc	r9, r5, r9\n\t"
17328         /*  A[7] * B[3] */
17329         "ldr	r6, [%[a], #28]\n\t"
17330         "ldr	r8, [%[b], #12]\n\t"
17331         "umull	r3, r4, r6, r8\n\t"
17332         "adds	r11, r3, r11\n\t"
17333         "adcs	r14, r4, r14\n\t"
17334         "adc	r9, r5, r9\n\t"
17335         "str r11, [sp, #40]\n\t"
17336         /*  A[4] * B[7] */
17337         "ldr	r6, [%[a], #16]\n\t"
17338         "ldr	r8, [%[b], #28]\n\t"
17339         "umull	r3, r4, r6, r8\n\t"
17340         "adds	r14, r3, r14\n\t"
17341         "adcs	r9, r4, r9\n\t"
17342         "adc	r10, r5, #0\n\t"
17343         /*  A[5] * B[6] */
17344         "ldr	r6, [%[a], #20]\n\t"
17345         "ldr	r8, [%[b], #24]\n\t"
17346         "umull	r3, r4, r6, r8\n\t"
17347         "adds	r14, r3, r14\n\t"
17348         "adcs	r9, r4, r9\n\t"
17349         "adc	r10, r5, r10\n\t"
17350         /*  A[6] * B[5] */
17351         "ldr	r6, [%[a], #24]\n\t"
17352         "ldr	r8, [%[b], #20]\n\t"
17353         "umull	r3, r4, r6, r8\n\t"
17354         "adds	r14, r3, r14\n\t"
17355         "adcs	r9, r4, r9\n\t"
17356         "adc	r10, r5, r10\n\t"
17357         /*  A[7] * B[4] */
17358         "ldr	r6, [%[a], #28]\n\t"
17359         "ldr	r8, [%[b], #16]\n\t"
17360         "umull	r3, r4, r6, r8\n\t"
17361         "adds	r14, r3, r14\n\t"
17362         "adcs	r9, r4, r9\n\t"
17363         "adc	r10, r5, r10\n\t"
17364         "str r14, [sp, #44]\n\t"
17365         /*  A[5] * B[7] */
17366         "ldr	r6, [%[a], #20]\n\t"
17367         "ldr	r8, [%[b], #28]\n\t"
17368         "umull	r3, r4, r6, r8\n\t"
17369         "adds	r9, r3, r9\n\t"
17370         "adcs	r10, r4, r10\n\t"
17371         "adc	r11, r5, #0\n\t"
17372         /*  A[6] * B[6] */
17373         "ldr	r6, [%[a], #24]\n\t"
17374         "ldr	r8, [%[b], #24]\n\t"
17375         "umull	r3, r4, r6, r8\n\t"
17376         "adds	r9, r3, r9\n\t"
17377         "adcs	r10, r4, r10\n\t"
17378         "adc	r11, r5, r11\n\t"
17379         /*  A[7] * B[5] */
17380         "ldr	r6, [%[a], #28]\n\t"
17381         "ldr	r8, [%[b], #20]\n\t"
17382         "umull	r3, r4, r6, r8\n\t"
17383         "adds	r9, r3, r9\n\t"
17384         "adcs	r10, r4, r10\n\t"
17385         "adc	r11, r5, r11\n\t"
17386         /*  A[6] * B[7] */
17387         "ldr	r6, [%[a], #24]\n\t"
17388         "ldr	r8, [%[b], #28]\n\t"
17389         "umull	r3, r4, r6, r8\n\t"
17390         "adds	r10, r3, r10\n\t"
17391         "adcs	r11, r4, r11\n\t"
17392         "adc	r14, r5, #0\n\t"
17393         /*  A[7] * B[6] */
17394         "ldr	r6, [%[a], #28]\n\t"
17395         "ldr	r8, [%[b], #24]\n\t"
17396         "umull	r3, r4, r6, r8\n\t"
17397         "adds	r10, r3, r10\n\t"
17398         "adcs	r11, r4, r11\n\t"
17399         "adc	r14, r5, r14\n\t"
17400         /*  A[7] * B[7] */
17401         "ldr	r6, [%[a], #28]\n\t"
17402         "ldr	r8, [%[b], #28]\n\t"
17403         "umull	r3, r4, r6, r8\n\t"
17404         "adds	r11, r3, r11\n\t"
17405         "adc	r14, r4, r14\n\t"
17406         "str r9, [sp, #48]\n\t"
17407         "str r10, [sp, #52]\n\t"
17408         "str r11, [sp, #56]\n\t"
17409         "str r14, [sp, #60]\n\t"
17410         /* Start Reduction */
17411         "ldr r4, [sp, #0]\n\t"
17412         "ldr r5, [sp, #4]\n\t"
17413         "ldr r6, [sp, #8]\n\t"
17414         "ldr r8, [sp, #12]\n\t"
17415         "ldr r9, [sp, #16]\n\t"
17416         "ldr r10, [sp, #20]\n\t"
17417         "ldr r11, [sp, #24]\n\t"
17418         "ldr r14, [sp, #28]\n\t"
17419         /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */
17420         /*    - a[0] << 224 */
17421         /*   + (a[0]-a[1] * 2) << (6 * 32) */
17422         "adds  r11, r11, r4\n\t"
17423         "adc   r14, r14, r5\n\t"
17424         "adds  r11, r11, r4\n\t"
17425         "adc   r14, r14, r5\n\t"
17426         /*   - a[0] << (7 * 32) */
17427         "sub   r14, r14, r4\n\t"
17428         /*   + a[0]-a[4] << (3 * 32) */
17429         "mov   %[a], r8\n\t"
17430         "mov   %[b], r9\n\t"
17431         "adds  r8, r8, r4\n\t"
17432         "adcs  r9, r9, r5\n\t"
17433         "adcs  r10, r10, r6\n\t"
17434         "adcs  r11, r11, %[a]\n\t"
17435         "adc   r14, r14, %[b]\n\t"
17436         "str r4, [sp, #0]\n\t"
17437         "str r5, [sp, #4]\n\t"
17438         "str r6, [sp, #8]\n\t"
17439         "str r8, [sp, #12]\n\t"
17440         "str r9, [sp, #16]\n\t"
17441         "str r10, [sp, #20]\n\t"
17442         /* a += mu * m */
17443         /*   += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */
17444         "mov   %[a], #0\n\t"
17445         /* a[6] +=        t[0] + t[3] */
17446         "ldr   r3, [sp, #24]\n\t"
17447         "adds  r3, r3, r4\n\t"
17448         "adc   %[b], %[a], #0\n\t"
17449         "adds  r3, r3, r8\n\t"
17450         "adc   %[b], %[b], #0\n\t"
17451         "str   r11, [sp, #24]\n\t"
17452         /* a[7] +=        t[1] + t[4] */
17453         "ldr   r3, [sp, #28]\n\t"
17454         "adds  r3, r3, %[b]\n\t"
17455         "adc   %[b], %[a], #0\n\t"
17456         "adds  r3, r3, r5\n\t"
17457         "adc   %[b], %[b], #0\n\t"
17458         "adds  r3, r3, r9\n\t"
17459         "adc   %[b], %[b], #0\n\t"
17460         "str   r14, [sp, #28]\n\t"
17461         "str   r3, [sp, #64]\n\t"
17462         /* a[8] += t[0] + t[2] + t[5] */
17463         "ldr   r3, [sp, #32]\n\t"
17464         "adds  r3, r3, %[b]\n\t"
17465         "adc   %[b], %[a], #0\n\t"
17466         "adds  r3, r3, r4\n\t"
17467         "adc   %[b], %[b], #0\n\t"
17468         "adds  r3, r3, r6\n\t"
17469         "adc   %[b], %[b], #0\n\t"
17470         "adds  r3, r3, r10\n\t"
17471         "adc   %[b], %[b], #0\n\t"
17472         "str   r3, [sp, #32]\n\t"
17473         /* a[9]  += t[1] + t[3] + t[6] */
17474         /* a[10] += t[2] + t[4] + t[7] */
17475         "ldr   r3, [sp, #36]\n\t"
17476         "ldr   r4, [sp, #40]\n\t"
17477         "adds  r3, r3, %[b]\n\t"
17478         "adcs  r4, r4, #0\n\t"
17479         "adc   %[b], %[a], #0\n\t"
17480         "adds  r3, r3, r5\n\t"
17481         "adcs  r4, r4, r6\n\t"
17482         "adc   %[b], %[b], #0\n\t"
17483         "adds  r3, r3, r8\n\t"
17484         "adcs  r4, r4, r9\n\t"
17485         "adc   %[b], %[b], #0\n\t"
17486         "adds  r3, r3, r11\n\t"
17487         "adcs  r4, r4, r14\n\t"
17488         "adc   %[b], %[b], #0\n\t"
17489         "str   r3, [sp, #36]\n\t"
17490         "str   r4, [sp, #40]\n\t"
17491         /* a[11] += t[3] + t[5] */
17492         /* a[12] += t[4] + t[6] */
17493         /* a[13] += t[5] + t[7] */
17494         /* a[14] += t[6] */
17495         "ldr   r3, [sp, #44]\n\t"
17496         "ldr   r4, [sp, #48]\n\t"
17497         "ldr   r5, [sp, #52]\n\t"
17498         "ldr   r6, [sp, #56]\n\t"
17499         "adds  r3, r3, %[b]\n\t"
17500         "adcs  r4, r4, #0\n\t"
17501         "adcs  r5, r5, #0\n\t"
17502         "adcs  r6, r6, #0\n\t"
17503         "adc   %[b], %[a], #0\n\t"
17504         "adds  r3, r3, r8\n\t"
17505         "adcs  r4, r4, r9\n\t"
17506         "adcs  r5, r5, r10\n\t"
17507         "adcs  r6, r6, r11\n\t"
17508         "adc   %[b], %[b], #0\n\t"
17509         "adds  r3, r3, r10\n\t"
17510         "adcs  r4, r4, r11\n\t"
17511         "adcs  r5, r5, r14\n\t"
17512         "adcs  r6, r6, #0\n\t"
17513         "adc   %[b], %[b], #0\n\t"
17514         "str   r3, [sp, #44]\n\t"
17515         "str   r4, [sp, #48]\n\t"
17516         "str   r5, [sp, #52]\n\t"
17517         "str   r6, [sp, #56]\n\t"
17518         /* a[15] += t[7] */
17519         "ldr   r3, [sp, #60]\n\t"
17520         "adds  r3, r3, %[b]\n\t"
17521         "adc   %[b], %[a], #0\n\t"
17522         "adds  r3, r3, r14\n\t"
17523         "adc   %[b], %[b], #0\n\t"
17524         "str   r3, [sp, #60]\n\t"
17525         "ldr   r3, [sp, #64]\n\t"
17526         "ldr   r4, [sp, #32]\n\t"
17527         "ldr   r5, [sp, #36]\n\t"
17528         "ldr   r6, [sp, #40]\n\t"
17529         "ldr   r9, [sp, #0]\n\t"
17530         "ldr   r10, [sp, #4]\n\t"
17531         "ldr   r11, [sp, #8]\n\t"
17532         "ldr   r14, [sp, #12]\n\t"
17533         "subs  r3, r3, r9\n\t"
17534         "sbcs  r4, r4, r10\n\t"
17535         "sbcs  r5, r5, r11\n\t"
17536         "sbcs  r6, r6, r14\n\t"
17537         "str   r4, [sp, #32]\n\t"
17538         "str   r5, [sp, #36]\n\t"
17539         "str   r6, [sp, #40]\n\t"
17540         "ldr   r3, [sp, #44]\n\t"
17541         "ldr   r4, [sp, #48]\n\t"
17542         "ldr   r5, [sp, #52]\n\t"
17543         "ldr   r6, [sp, #56]\n\t"
17544         "ldr   r8, [sp, #60]\n\t"
17545         "ldr   r9, [sp, #16]\n\t"
17546         "ldr   r10, [sp, #20]\n\t"
17547         "ldr   r11, [sp, #24]\n\t"
17548         "ldr   r14, [sp, #28]\n\t"
17549         "sbcs  r3, r3, r9\n\t"
17550         "sbcs  r4, r4, r10\n\t"
17551         "sbcs  r5, r5, r11\n\t"
17552         "sbcs  r6, r6, r14\n\t"
17553         "sbc   r8, r8, #0\n\t"
17554         "str   r3, [sp, #44]\n\t"
17555         "str   r4, [sp, #48]\n\t"
17556         "str   r5, [sp, #52]\n\t"
17557         "str   r6, [sp, #56]\n\t"
17558         "str   r8, [sp, #60]\n\t"
17559         /* mask m and sub from result if overflow */
17560         "sub   %[b], %[a], %[b]\n\t"
17561         "and   %[a], %[b], #1\n\t"
17562         "ldr       r3, [sp, #32]\n\t"
17563         "ldr       r4, [sp, #36]\n\t"
17564         "ldr       r5, [sp, #40]\n\t"
17565         "ldr       r6, [sp, #44]\n\t"
17566         "ldr       r8, [sp, #48]\n\t"
17567         "ldr       r9, [sp, #52]\n\t"
17568         "ldr       r10, [sp, #56]\n\t"
17569         "ldr       r11, [sp, #60]\n\t"
17570         "subs      r3, r3, %[b]\n\t"
17571         "sbcs      r4, r4, %[b]\n\t"
17572         "sbcs      r5, r5, %[b]\n\t"
17573         "sbcs      r6, r6, #0\n\t"
17574         "sbcs      r8, r8, #0\n\t"
17575         "sbcs      r9, r9, #0\n\t"
17576         "sbcs      r10, r10, %[a]\n\t"
17577         "sbc       r11, r11, %[b]\n\t"
17578         "str       r3, [%[r], #0]\n\t"
17579         "str       r4, [%[r], #4]\n\t"
17580         "str       r5, [%[r], #8]\n\t"
17581         "str       r6, [%[r], #12]\n\t"
17582         "str       r8, [%[r], #16]\n\t"
17583         "str       r9, [%[r], #20]\n\t"
17584         "str       r10, [%[r], #24]\n\t"
17585         "str       r11, [%[r], #28]\n\t"
17586         "add   sp, sp, #68\n\t"
17587         : [a] "+r" (a), [b] "+r" (b)
17588         : [r] "r" (r)
17589         : "memory", "r9", "r10", "r11", "r14", "r3", "r4", "r5", "r6", "r8"
17590     );
17591 }
17592 
17593 /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
17594  *
17595  * r   Result of squaring.
17596  * a   Number to square in Montgomery form.
17597  * m   Modulus (prime).
17598  * mp  Montgomery mulitplier.
17599  */
sp_256_mont_sqr_8(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)17600 SP_NOINLINE static void sp_256_mont_sqr_8(sp_digit* r, const sp_digit* a, const sp_digit* m,
17601         sp_digit mp)
17602 {
17603     (void)mp;
17604     (void)m;
17605 
17606     __asm__ __volatile__ (
17607         "sub   sp, sp, #68\n\t"
17608         "mov   r5, #0\n\t"
17609         /*  A[0] * A[1] */
17610         "ldr       r6, [%[a], #0]\n\t"
17611         "ldr       r8, [%[a], #4]\n\t"
17612         "umull     r10, r11, r6, r8\n\t"
17613         "str r10, [sp, #4]\n\t"
17614         /*  A[0] * A[2] */
17615         "ldr       r6, [%[a], #0]\n\t"
17616         "ldr       r8, [%[a], #8]\n\t"
17617         "umull     r3, r4, r6, r8\n\t"
17618         "adds  r11, r3, r11\n\t"
17619         "adc r14, r4, #0\n\t"
17620         "str r11, [sp, #8]\n\t"
17621         /*  A[0] * A[3] */
17622         "ldr       r6, [%[a], #0]\n\t"
17623         "ldr       r8, [%[a], #12]\n\t"
17624         "umull     r3, r4, r6, r8\n\t"
17625         "adds  r14, r3, r14\n\t"
17626         "adc r9, r4, #0\n\t"
17627         /*  A[1] * A[2] */
17628         "ldr       r6, [%[a], #4]\n\t"
17629         "ldr       r8, [%[a], #8]\n\t"
17630         "umull     r3, r4, r6, r8\n\t"
17631         "adds  r14, r3, r14\n\t"
17632         "adcs r9, r4, r9\n\t"
17633         "adc   r10, r5, #0\n\t"
17634         "str r14, [sp, #12]\n\t"
17635         /*  A[0] * A[4] */
17636         "ldr       r6, [%[a], #0]\n\t"
17637         "ldr       r8, [%[a], #16]\n\t"
17638         "umull     r3, r4, r6, r8\n\t"
17639         "adds  r9, r3, r9\n\t"
17640         "adc r10, r4, r10\n\t"
17641         /*  A[1] * A[3] */
17642         "ldr       r6, [%[a], #4]\n\t"
17643         "ldr       r8, [%[a], #12]\n\t"
17644         "umull     r3, r4, r6, r8\n\t"
17645         "adds  r9, r3, r9\n\t"
17646         "adcs r10, r4, r10\n\t"
17647         "adc   r11, r5, #0\n\t"
17648         "str r9, [sp, #16]\n\t"
17649         /*  A[0] * A[5] */
17650         "ldr       r6, [%[a], #0]\n\t"
17651         "ldr       r8, [%[a], #20]\n\t"
17652         "umull     r3, r4, r6, r8\n\t"
17653         "adds  r10, r3, r10\n\t"
17654         "adc r11, r4, r11\n\t"
17655         /*  A[1] * A[4] */
17656         "ldr       r6, [%[a], #4]\n\t"
17657         "ldr       r8, [%[a], #16]\n\t"
17658         "umull     r3, r4, r6, r8\n\t"
17659         "adds  r10, r3, r10\n\t"
17660         "adcs r11, r4, r11\n\t"
17661         "adc   r14, r5, #0\n\t"
17662         /*  A[2] * A[3] */
17663         "ldr       r6, [%[a], #8]\n\t"
17664         "ldr       r8, [%[a], #12]\n\t"
17665         "umull     r3, r4, r6, r8\n\t"
17666         "adds  r10, r3, r10\n\t"
17667         "adcs r11, r4, r11\n\t"
17668         "adc   r14, r5, r14\n\t"
17669         "str r10, [sp, #20]\n\t"
17670         /*  A[0] * A[6] */
17671         "ldr       r6, [%[a], #0]\n\t"
17672         "ldr       r8, [%[a], #24]\n\t"
17673         "umull     r3, r4, r6, r8\n\t"
17674         "adds  r11, r3, r11\n\t"
17675         "adcs r14, r4, r14\n\t"
17676         "adc   r9, r5, #0\n\t"
17677         /*  A[1] * A[5] */
17678         "ldr       r6, [%[a], #4]\n\t"
17679         "ldr       r8, [%[a], #20]\n\t"
17680         "umull     r3, r4, r6, r8\n\t"
17681         "adds  r11, r3, r11\n\t"
17682         "adcs r14, r4, r14\n\t"
17683         "adc   r9, r5, r9\n\t"
17684         /*  A[2] * A[4] */
17685         "ldr       r6, [%[a], #8]\n\t"
17686         "ldr       r8, [%[a], #16]\n\t"
17687         "umull     r3, r4, r6, r8\n\t"
17688         "adds  r11, r3, r11\n\t"
17689         "adcs r14, r4, r14\n\t"
17690         "adc   r9, r5, r9\n\t"
17691         "str r11, [sp, #24]\n\t"
17692         /*  A[0] * A[7] */
17693         "ldr       r6, [%[a], #0]\n\t"
17694         "ldr       r8, [%[a], #28]\n\t"
17695         "umull     r3, r4, r6, r8\n\t"
17696         "adds  r14, r3, r14\n\t"
17697         "adcs r9, r4, r9\n\t"
17698         "adc   r10, r5, #0\n\t"
17699         /*  A[1] * A[6] */
17700         "ldr       r6, [%[a], #4]\n\t"
17701         "ldr       r8, [%[a], #24]\n\t"
17702         "umull     r3, r4, r6, r8\n\t"
17703         "adds  r14, r3, r14\n\t"
17704         "adcs r9, r4, r9\n\t"
17705         "adc   r10, r5, r10\n\t"
17706         /*  A[2] * A[5] */
17707         "ldr       r6, [%[a], #8]\n\t"
17708         "ldr       r8, [%[a], #20]\n\t"
17709         "umull     r3, r4, r6, r8\n\t"
17710         "adds  r14, r3, r14\n\t"
17711         "adcs r9, r4, r9\n\t"
17712         "adc   r10, r5, r10\n\t"
17713         /*  A[3] * A[4] */
17714         "ldr       r6, [%[a], #12]\n\t"
17715         "ldr       r8, [%[a], #16]\n\t"
17716         "umull     r3, r4, r6, r8\n\t"
17717         "adds  r14, r3, r14\n\t"
17718         "adcs r9, r4, r9\n\t"
17719         "adc   r10, r5, r10\n\t"
17720         "str r14, [sp, #28]\n\t"
17721         /*  A[1] * A[7] */
17722         "ldr       r6, [%[a], #4]\n\t"
17723         "ldr       r8, [%[a], #28]\n\t"
17724         "umull     r3, r4, r6, r8\n\t"
17725         "adds  r9, r3, r9\n\t"
17726         "adcs r10, r4, r10\n\t"
17727         "adc   r11, r5, #0\n\t"
17728         /*  A[2] * A[6] */
17729         "ldr       r6, [%[a], #8]\n\t"
17730         "ldr       r8, [%[a], #24]\n\t"
17731         "umull     r3, r4, r6, r8\n\t"
17732         "adds  r9, r3, r9\n\t"
17733         "adcs r10, r4, r10\n\t"
17734         "adc   r11, r5, r11\n\t"
17735         /*  A[3] * A[5] */
17736         "ldr       r6, [%[a], #12]\n\t"
17737         "ldr       r8, [%[a], #20]\n\t"
17738         "umull     r3, r4, r6, r8\n\t"
17739         "adds  r9, r3, r9\n\t"
17740         "adcs r10, r4, r10\n\t"
17741         "adc   r11, r5, r11\n\t"
17742         "str r9, [sp, #32]\n\t"
17743         /*  A[2] * A[7] */
17744         "ldr       r6, [%[a], #8]\n\t"
17745         "ldr       r8, [%[a], #28]\n\t"
17746         "umull     r3, r4, r6, r8\n\t"
17747         "adds  r10, r3, r10\n\t"
17748         "adcs r11, r4, r11\n\t"
17749         "adc   r14, r5, #0\n\t"
17750         /*  A[3] * A[6] */
17751         "ldr       r6, [%[a], #12]\n\t"
17752         "ldr       r8, [%[a], #24]\n\t"
17753         "umull     r3, r4, r6, r8\n\t"
17754         "adds  r10, r3, r10\n\t"
17755         "adcs r11, r4, r11\n\t"
17756         "adc   r14, r5, r14\n\t"
17757         /*  A[4] * A[5] */
17758         "ldr       r6, [%[a], #16]\n\t"
17759         "ldr       r8, [%[a], #20]\n\t"
17760         "umull     r3, r4, r6, r8\n\t"
17761         "adds  r10, r3, r10\n\t"
17762         "adcs r11, r4, r11\n\t"
17763         "adc   r14, r5, r14\n\t"
17764         "str r10, [sp, #36]\n\t"
17765         /*  A[3] * A[7] */
17766         "ldr       r6, [%[a], #12]\n\t"
17767         "ldr       r8, [%[a], #28]\n\t"
17768         "umull     r3, r4, r6, r8\n\t"
17769         "adds  r11, r3, r11\n\t"
17770         "adcs r14, r4, r14\n\t"
17771         "adc   r9, r5, #0\n\t"
17772         /*  A[4] * A[6] */
17773         "ldr       r6, [%[a], #16]\n\t"
17774         "ldr       r8, [%[a], #24]\n\t"
17775         "umull     r3, r4, r6, r8\n\t"
17776         "adds  r11, r3, r11\n\t"
17777         "adcs r14, r4, r14\n\t"
17778         "adc   r9, r5, r9\n\t"
17779         "str r11, [sp, #40]\n\t"
17780         /*  A[4] * A[7] */
17781         "ldr       r6, [%[a], #16]\n\t"
17782         "ldr       r8, [%[a], #28]\n\t"
17783         "umull     r3, r4, r6, r8\n\t"
17784         "adds  r14, r3, r14\n\t"
17785         "adcs r9, r4, r9\n\t"
17786         "adc   r10, r5, #0\n\t"
17787         /*  A[5] * A[6] */
17788         "ldr       r6, [%[a], #20]\n\t"
17789         "ldr       r8, [%[a], #24]\n\t"
17790         "umull     r3, r4, r6, r8\n\t"
17791         "adds  r14, r3, r14\n\t"
17792         "adcs r9, r4, r9\n\t"
17793         "adc   r10, r5, r10\n\t"
17794         "str r14, [sp, #44]\n\t"
17795         /*  A[5] * A[7] */
17796         "ldr       r6, [%[a], #20]\n\t"
17797         "ldr       r8, [%[a], #28]\n\t"
17798         "umull     r3, r4, r6, r8\n\t"
17799         "adds  r9, r3, r9\n\t"
17800         "adcs r10, r4, r10\n\t"
17801         "adc   r11, r5, #0\n\t"
17802         "str r9, [sp, #48]\n\t"
17803         /*  A[6] * A[7] */
17804         "ldr       r6, [%[a], #24]\n\t"
17805         "ldr       r8, [%[a], #28]\n\t"
17806         "umull     r3, r4, r6, r8\n\t"
17807         "adds  r10, r3, r10\n\t"
17808         "adc r11, r4, r11\n\t"
17809         "str r10, [sp, #52]\n\t"
17810         "str   r11, [sp, #56]\n\t"
17811         /*  Double */
17812         "ldr       r4, [sp, #4]\n\t"
17813         "ldr       r6, [sp, #8]\n\t"
17814         "ldr       r8, [sp, #12]\n\t"
17815         "ldr       r9, [sp, #16]\n\t"
17816         "ldr       r10, [sp, #20]\n\t"
17817         "ldr       r11, [sp, #24]\n\t"
17818         "ldr       r14, [sp, #28]\n\t"
17819         "ldr       r12, [sp, #32]\n\t"
17820         "ldr       r3, [sp, #36]\n\t"
17821         "adds        r4, r4, r4\n\t"
17822         "adcs      r6, r6, r6\n\t"
17823         "adcs      r8, r8, r8\n\t"
17824         "adcs      r9, r9, r9\n\t"
17825         "adcs      r10, r10, r10\n\t"
17826         "adcs      r11, r11, r11\n\t"
17827         "adcs      r14, r14, r14\n\t"
17828         "adcs      r12, r12, r12\n\t"
17829         "adcs      r3, r3, r3\n\t"
17830         "str       r4, [sp, #4]\n\t"
17831         "str       r6, [sp, #8]\n\t"
17832         "str       r8, [sp, #12]\n\t"
17833         "str       r9, [sp, #16]\n\t"
17834         "str       r10, [sp, #20]\n\t"
17835         "str       r11, [sp, #24]\n\t"
17836         "str       r14, [sp, #28]\n\t"
17837         "str       r12, [sp, #32]\n\t"
17838         "str       r3, [sp, #36]\n\t"
17839         "ldr       r4, [sp, #40]\n\t"
17840         "ldr       r6, [sp, #44]\n\t"
17841         "ldr       r8, [sp, #48]\n\t"
17842         "ldr       r9, [sp, #52]\n\t"
17843         "ldr       r10, [sp, #56]\n\t"
17844         "adcs        r4, r4, r4\n\t"
17845         "adcs      r6, r6, r6\n\t"
17846         "adcs      r8, r8, r8\n\t"
17847         "adcs      r9, r9, r9\n\t"
17848         "adcs      r10, r10, r10\n\t"
17849         "str       r4, [sp, #40]\n\t"
17850         "str       r6, [sp, #44]\n\t"
17851         "str       r8, [sp, #48]\n\t"
17852         "str       r9, [sp, #52]\n\t"
17853         "str       r10, [sp, #56]\n\t"
17854         "adc   r11, r5, #0\n\t"
17855         "str   r11, [sp, #60]\n\t"
17856         "ldr       r4, [sp, #4]\n\t"
17857         "ldr       r5, [sp, #8]\n\t"
17858         "ldr       r12, [sp, #12]\n\t"
17859         /*  A[0] * A[0] */
17860         "ldr       r6, [%[a], #0]\n\t"
17861         "umull     r9, r10, r6, r6\n\t"
17862         /*  A[1] * A[1] */
17863         "ldr       r6, [%[a], #4]\n\t"
17864         "umull     r11, r14, r6, r6\n\t"
17865         "adds      r10, r10, r4\n\t"
17866         "adcs      r11, r11, r5\n\t"
17867         "adcs      r14, r14, r12\n\t"
17868         "str       r9, [sp, #0]\n\t"
17869         "str       r10, [sp, #4]\n\t"
17870         "str       r11, [sp, #8]\n\t"
17871         "str       r14, [sp, #12]\n\t"
17872         "ldr       r3, [sp, #16]\n\t"
17873         "ldr       r4, [sp, #20]\n\t"
17874         "ldr       r5, [sp, #24]\n\t"
17875         "ldr       r12, [sp, #28]\n\t"
17876         /*  A[2] * A[2] */
17877         "ldr       r6, [%[a], #8]\n\t"
17878         "umull     r9, r10, r6, r6\n\t"
17879         /*  A[3] * A[3] */
17880         "ldr       r6, [%[a], #12]\n\t"
17881         "umull     r11, r14, r6, r6\n\t"
17882         "adcs      r9, r9, r3\n\t"
17883         "adcs      r10, r10, r4\n\t"
17884         "adcs      r11, r11, r5\n\t"
17885         "adcs      r14, r14, r12\n\t"
17886         "str       r9, [sp, #16]\n\t"
17887         "str       r10, [sp, #20]\n\t"
17888         "str       r11, [sp, #24]\n\t"
17889         "str       r14, [sp, #28]\n\t"
17890         "ldr       r3, [sp, #32]\n\t"
17891         "ldr       r4, [sp, #36]\n\t"
17892         "ldr       r5, [sp, #40]\n\t"
17893         "ldr       r12, [sp, #44]\n\t"
17894         /*  A[4] * A[4] */
17895         "ldr       r6, [%[a], #16]\n\t"
17896         "umull     r9, r10, r6, r6\n\t"
17897         /*  A[5] * A[5] */
17898         "ldr       r6, [%[a], #20]\n\t"
17899         "umull     r11, r14, r6, r6\n\t"
17900         "adcs      r9, r9, r3\n\t"
17901         "adcs      r10, r10, r4\n\t"
17902         "adcs      r11, r11, r5\n\t"
17903         "adcs      r14, r14, r12\n\t"
17904         "str       r9, [sp, #32]\n\t"
17905         "str       r10, [sp, #36]\n\t"
17906         "str       r11, [sp, #40]\n\t"
17907         "str       r14, [sp, #44]\n\t"
17908         "ldr       r3, [sp, #48]\n\t"
17909         "ldr       r4, [sp, #52]\n\t"
17910         "ldr       r5, [sp, #56]\n\t"
17911         "ldr       r12, [sp, #60]\n\t"
17912         /*  A[6] * A[6] */
17913         "ldr       r6, [%[a], #24]\n\t"
17914         "umull     r9, r10, r6, r6\n\t"
17915         /*  A[7] * A[7] */
17916         "ldr       r6, [%[a], #28]\n\t"
17917         "umull     r11, r14, r6, r6\n\t"
17918         "adcs      r9, r9, r3\n\t"
17919         "adcs      r10, r10, r4\n\t"
17920         "adcs      r11, r11, r5\n\t"
17921         "adc       r14, r14, r12\n\t"
17922         "str       r9, [sp, #48]\n\t"
17923         "str       r10, [sp, #52]\n\t"
17924         "str       r11, [sp, #56]\n\t"
17925         "str       r14, [sp, #60]\n\t"
17926         /* Start Reduction */
17927         "ldr r4, [sp, #0]\n\t"
17928         "ldr r5, [sp, #4]\n\t"
17929         "ldr r6, [sp, #8]\n\t"
17930         "ldr r8, [sp, #12]\n\t"
17931         "ldr r9, [sp, #16]\n\t"
17932         "ldr r10, [sp, #20]\n\t"
17933         "ldr r11, [sp, #24]\n\t"
17934         "ldr r14, [sp, #28]\n\t"
17935         /* mu = a[0]-a[7] + a[0]-a[4] << 96 + (a[0]-a[1] * 2) << 192 */
17936         /*    - a[0] << 224 */
17937         /*   + (a[0]-a[1] * 2) << (6 * 32) */
17938         "adds  r11, r11, r4\n\t"
17939         "adc   r14, r14, r5\n\t"
17940         "adds  r11, r11, r4\n\t"
17941         "adc   r14, r14, r5\n\t"
17942         /*   - a[0] << (7 * 32) */
17943         "sub   r14, r14, r4\n\t"
17944         /*   + a[0]-a[4] << (3 * 32) */
17945         "mov   %[a], r8\n\t"
17946         "mov   r12, r9\n\t"
17947         "adds  r8, r8, r4\n\t"
17948         "adcs  r9, r9, r5\n\t"
17949         "adcs  r10, r10, r6\n\t"
17950         "adcs  r11, r11, %[a]\n\t"
17951         "adc   r14, r14, r12\n\t"
17952         "str r4, [sp, #0]\n\t"
17953         "str r5, [sp, #4]\n\t"
17954         "str r6, [sp, #8]\n\t"
17955         "str r8, [sp, #12]\n\t"
17956         "str r9, [sp, #16]\n\t"
17957         "str r10, [sp, #20]\n\t"
17958         /* a += mu * m */
17959         /*   += mu * ((1 << 256) - (1 << 224) + (1 << 192) + (1 << 96) - 1) */
17960         "mov   %[a], #0\n\t"
17961         /* a[6] +=        t[0] + t[3] */
17962         "ldr   r3, [sp, #24]\n\t"
17963         "adds  r3, r3, r4\n\t"
17964         "adc   r12, %[a], #0\n\t"
17965         "adds  r3, r3, r8\n\t"
17966         "adc   r12, r12, #0\n\t"
17967         "str   r11, [sp, #24]\n\t"
17968         /* a[7] +=        t[1] + t[4] */
17969         "ldr   r3, [sp, #28]\n\t"
17970         "adds  r3, r3, r12\n\t"
17971         "adc   r12, %[a], #0\n\t"
17972         "adds  r3, r3, r5\n\t"
17973         "adc   r12, r12, #0\n\t"
17974         "adds  r3, r3, r9\n\t"
17975         "adc   r12, r12, #0\n\t"
17976         "str   r14, [sp, #28]\n\t"
17977         "str   r3, [sp, #64]\n\t"
17978         /* a[8] += t[0] + t[2] + t[5] */
17979         "ldr   r3, [sp, #32]\n\t"
17980         "adds  r3, r3, r12\n\t"
17981         "adc   r12, %[a], #0\n\t"
17982         "adds  r3, r3, r4\n\t"
17983         "adc   r12, r12, #0\n\t"
17984         "adds  r3, r3, r6\n\t"
17985         "adc   r12, r12, #0\n\t"
17986         "adds  r3, r3, r10\n\t"
17987         "adc   r12, r12, #0\n\t"
17988         "str   r3, [sp, #32]\n\t"
17989         /* a[9]  += t[1] + t[3] + t[6] */
17990         /* a[10] += t[2] + t[4] + t[7] */
17991         "ldr   r3, [sp, #36]\n\t"
17992         "ldr   r4, [sp, #40]\n\t"
17993         "adds  r3, r3, r12\n\t"
17994         "adcs  r4, r4, #0\n\t"
17995         "adc   r12, %[a], #0\n\t"
17996         "adds  r3, r3, r5\n\t"
17997         "adcs  r4, r4, r6\n\t"
17998         "adc   r12, r12, #0\n\t"
17999         "adds  r3, r3, r8\n\t"
18000         "adcs  r4, r4, r9\n\t"
18001         "adc   r12, r12, #0\n\t"
18002         "adds  r3, r3, r11\n\t"
18003         "adcs  r4, r4, r14\n\t"
18004         "adc   r12, r12, #0\n\t"
18005         "str   r3, [sp, #36]\n\t"
18006         "str   r4, [sp, #40]\n\t"
18007         /* a[11] += t[3] + t[5] */
18008         /* a[12] += t[4] + t[6] */
18009         /* a[13] += t[5] + t[7] */
18010         /* a[14] += t[6] */
18011         "ldr   r3, [sp, #44]\n\t"
18012         "ldr   r4, [sp, #48]\n\t"
18013         "ldr   r5, [sp, #52]\n\t"
18014         "ldr   r6, [sp, #56]\n\t"
18015         "adds  r3, r3, r12\n\t"
18016         "adcs  r4, r4, #0\n\t"
18017         "adcs  r5, r5, #0\n\t"
18018         "adcs  r6, r6, #0\n\t"
18019         "adc   r12, %[a], #0\n\t"
18020         "adds  r3, r3, r8\n\t"
18021         "adcs  r4, r4, r9\n\t"
18022         "adcs  r5, r5, r10\n\t"
18023         "adcs  r6, r6, r11\n\t"
18024         "adc   r12, r12, #0\n\t"
18025         "adds  r3, r3, r10\n\t"
18026         "adcs  r4, r4, r11\n\t"
18027         "adcs  r5, r5, r14\n\t"
18028         "adcs  r6, r6, #0\n\t"
18029         "adc   r12, r12, #0\n\t"
18030         "str   r3, [sp, #44]\n\t"
18031         "str   r4, [sp, #48]\n\t"
18032         "str   r5, [sp, #52]\n\t"
18033         "str   r6, [sp, #56]\n\t"
18034         /* a[15] += t[7] */
18035         "ldr   r3, [sp, #60]\n\t"
18036         "adds  r3, r3, r12\n\t"
18037         "adc   r12, %[a], #0\n\t"
18038         "adds  r3, r3, r14\n\t"
18039         "adc   r12, r12, #0\n\t"
18040         "str   r3, [sp, #60]\n\t"
18041         "ldr   r3, [sp, #64]\n\t"
18042         "ldr   r4, [sp, #32]\n\t"
18043         "ldr   r5, [sp, #36]\n\t"
18044         "ldr   r6, [sp, #40]\n\t"
18045         "ldr   r9, [sp, #0]\n\t"
18046         "ldr   r10, [sp, #4]\n\t"
18047         "ldr   r11, [sp, #8]\n\t"
18048         "ldr   r14, [sp, #12]\n\t"
18049         "subs  r3, r3, r9\n\t"
18050         "sbcs  r4, r4, r10\n\t"
18051         "sbcs  r5, r5, r11\n\t"
18052         "sbcs  r6, r6, r14\n\t"
18053         "str   r4, [sp, #32]\n\t"
18054         "str   r5, [sp, #36]\n\t"
18055         "str   r6, [sp, #40]\n\t"
18056         "ldr   r3, [sp, #44]\n\t"
18057         "ldr   r4, [sp, #48]\n\t"
18058         "ldr   r5, [sp, #52]\n\t"
18059         "ldr   r6, [sp, #56]\n\t"
18060         "ldr   r8, [sp, #60]\n\t"
18061         "ldr   r9, [sp, #16]\n\t"
18062         "ldr   r10, [sp, #20]\n\t"
18063         "ldr   r11, [sp, #24]\n\t"
18064         "ldr   r14, [sp, #28]\n\t"
18065         "sbcs  r3, r3, r9\n\t"
18066         "sbcs  r4, r4, r10\n\t"
18067         "sbcs  r5, r5, r11\n\t"
18068         "sbcs  r6, r6, r14\n\t"
18069         "sbc   r8, r8, #0\n\t"
18070         "str   r3, [sp, #44]\n\t"
18071         "str   r4, [sp, #48]\n\t"
18072         "str   r5, [sp, #52]\n\t"
18073         "str   r6, [sp, #56]\n\t"
18074         "str   r8, [sp, #60]\n\t"
18075         /* mask m and sub from result if overflow */
18076         "sub   r12, %[a], r12\n\t"
18077         "and   %[a], r12, #1\n\t"
18078         "ldr       r3, [sp, #32]\n\t"
18079         "ldr       r4, [sp, #36]\n\t"
18080         "ldr       r5, [sp, #40]\n\t"
18081         "ldr       r6, [sp, #44]\n\t"
18082         "ldr       r8, [sp, #48]\n\t"
18083         "ldr       r9, [sp, #52]\n\t"
18084         "ldr       r10, [sp, #56]\n\t"
18085         "ldr       r11, [sp, #60]\n\t"
18086         "subs      r3, r3, r12\n\t"
18087         "sbcs      r4, r4, r12\n\t"
18088         "sbcs      r5, r5, r12\n\t"
18089         "sbcs      r6, r6, #0\n\t"
18090         "sbcs      r8, r8, #0\n\t"
18091         "sbcs      r9, r9, #0\n\t"
18092         "sbcs      r10, r10, %[a]\n\t"
18093         "sbc       r11, r11, r12\n\t"
18094         "str       r3, [%[r], #0]\n\t"
18095         "str       r4, [%[r], #4]\n\t"
18096         "str       r5, [%[r], #8]\n\t"
18097         "str       r6, [%[r], #12]\n\t"
18098         "str       r8, [%[r], #16]\n\t"
18099         "str       r9, [%[r], #20]\n\t"
18100         "str       r10, [%[r], #24]\n\t"
18101         "str       r11, [%[r], #28]\n\t"
18102         "add   sp, sp, #68\n\t"
18103         : [a] "+r" (a)
18104         : [r] "r" (r)
18105         : "memory", "r9", "r10", "r11", "r14", "r3", "r4", "r5", "r6", "r8", "r12"
18106     );
18107 }
18108 
18109 #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
18110 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
18111  *
18112  * r   Result of squaring.
18113  * a   Number to square in Montgomery form.
18114  * n   Number of times to square.
18115  * m   Modulus (prime).
18116  * mp  Montgomery mulitplier.
18117  */
sp_256_mont_sqr_n_8(sp_digit * r,const sp_digit * a,int n,const sp_digit * m,sp_digit mp)18118 static void sp_256_mont_sqr_n_8(sp_digit* r, const sp_digit* a, int n,
18119         const sp_digit* m, sp_digit mp)
18120 {
18121     sp_256_mont_sqr_8(r, a, m, mp);
18122     for (; n > 1; n--) {
18123         sp_256_mont_sqr_8(r, r, m, mp);
18124     }
18125 }
18126 
18127 #endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */
18128 #ifdef WOLFSSL_SP_SMALL
18129 /* Mod-2 for the P256 curve. */
18130 static const uint32_t p256_mod_minus_2[8] = {
18131     0xfffffffdU,0xffffffffU,0xffffffffU,0x00000000U,0x00000000U,0x00000000U,
18132     0x00000001U,0xffffffffU
18133 };
18134 #endif /* !WOLFSSL_SP_SMALL */
18135 
18136 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
18137  * P256 curve. (r = 1 / a mod m)
18138  *
18139  * r   Inverse result.
18140  * a   Number to invert.
18141  * td  Temporary data.
18142  */
sp_256_mont_inv_8(sp_digit * r,const sp_digit * a,sp_digit * td)18143 static void sp_256_mont_inv_8(sp_digit* r, const sp_digit* a, sp_digit* td)
18144 {
18145 #ifdef WOLFSSL_SP_SMALL
18146     sp_digit* t = td;
18147     int i;
18148 
18149     XMEMCPY(t, a, sizeof(sp_digit) * 8);
18150     for (i=254; i>=0; i--) {
18151         sp_256_mont_sqr_8(t, t, p256_mod, p256_mp_mod);
18152         if (p256_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
18153             sp_256_mont_mul_8(t, t, a, p256_mod, p256_mp_mod);
18154     }
18155     XMEMCPY(r, t, sizeof(sp_digit) * 8);
18156 #else
18157     sp_digit* t1 = td;
18158     sp_digit* t2 = td + 2 * 8;
18159     sp_digit* t3 = td + 4 * 8;
18160     /* 0x2 */
18161     sp_256_mont_sqr_8(t1, a, p256_mod, p256_mp_mod);
18162     /* 0x3 */
18163     sp_256_mont_mul_8(t2, t1, a, p256_mod, p256_mp_mod);
18164     /* 0xc */
18165     sp_256_mont_sqr_n_8(t1, t2, 2, p256_mod, p256_mp_mod);
18166     /* 0xd */
18167     sp_256_mont_mul_8(t3, t1, a, p256_mod, p256_mp_mod);
18168     /* 0xf */
18169     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
18170     /* 0xf0 */
18171     sp_256_mont_sqr_n_8(t1, t2, 4, p256_mod, p256_mp_mod);
18172     /* 0xfd */
18173     sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
18174     /* 0xff */
18175     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
18176     /* 0xff00 */
18177     sp_256_mont_sqr_n_8(t1, t2, 8, p256_mod, p256_mp_mod);
18178     /* 0xfffd */
18179     sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
18180     /* 0xffff */
18181     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
18182     /* 0xffff0000 */
18183     sp_256_mont_sqr_n_8(t1, t2, 16, p256_mod, p256_mp_mod);
18184     /* 0xfffffffd */
18185     sp_256_mont_mul_8(t3, t3, t1, p256_mod, p256_mp_mod);
18186     /* 0xffffffff */
18187     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
18188     /* 0xffffffff00000000 */
18189     sp_256_mont_sqr_n_8(t1, t2, 32, p256_mod, p256_mp_mod);
18190     /* 0xffffffffffffffff */
18191     sp_256_mont_mul_8(t2, t2, t1, p256_mod, p256_mp_mod);
18192     /* 0xffffffff00000001 */
18193     sp_256_mont_mul_8(r, t1, a, p256_mod, p256_mp_mod);
18194     /* 0xffffffff000000010000000000000000000000000000000000000000 */
18195     sp_256_mont_sqr_n_8(r, r, 160, p256_mod, p256_mp_mod);
18196     /* 0xffffffff00000001000000000000000000000000ffffffffffffffff */
18197     sp_256_mont_mul_8(r, r, t2, p256_mod, p256_mp_mod);
18198     /* 0xffffffff00000001000000000000000000000000ffffffffffffffff00000000 */
18199     sp_256_mont_sqr_n_8(r, r, 32, p256_mod, p256_mp_mod);
18200     /* 0xffffffff00000001000000000000000000000000fffffffffffffffffffffffd */
18201     sp_256_mont_mul_8(r, r, t3, p256_mod, p256_mp_mod);
18202 #endif /* WOLFSSL_SP_SMALL */
18203 }
18204 
18205 /* Compare a with b in constant time.
18206  *
18207  * a  A single precision integer.
18208  * b  A single precision integer.
18209  * return -ve, 0 or +ve if a is less than, equal to or greater than b
18210  * respectively.
18211  */
sp_256_cmp_8(const sp_digit * a,const sp_digit * b)18212 SP_NOINLINE static sp_int32 sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
18213 {
18214     sp_digit r = 0;
18215 
18216 
18217     __asm__ __volatile__ (
18218         "mov	r3, #0\n\t"
18219         "mvn	r3, r3\n\t"
18220         "mov	r6, #28\n\t"
18221         "\n1:\n\t"
18222         "ldr	r8, [%[a], r6]\n\t"
18223         "ldr	r5, [%[b], r6]\n\t"
18224         "and	r8, r8, r3\n\t"
18225         "and	r5, r5, r3\n\t"
18226         "mov	r4, r8\n\t"
18227         "subs	r8, r8, r5\n\t"
18228         "sbc	r8, r8, r8\n\t"
18229         "add	%[r], %[r], r8\n\t"
18230         "mvn	r8, r8\n\t"
18231         "and	r3, r3, r8\n\t"
18232         "subs	r5, r5, r4\n\t"
18233         "sbc	r8, r8, r8\n\t"
18234         "sub	%[r], %[r], r8\n\t"
18235         "mvn	r8, r8\n\t"
18236         "and	r3, r3, r8\n\t"
18237         "sub	r6, r6, #4\n\t"
18238         "cmp	r6, #0\n\t"
18239 #ifdef __GNUC__
18240         "bge	1b\n\t"
18241 #else
18242         "bge.n	1b\n\t"
18243 #endif /* __GNUC__ */
18244         : [r] "+r" (r)
18245         : [a] "r" (a), [b] "r" (b)
18246         : "r3", "r4", "r5", "r6", "r8"
18247     );
18248 
18249     return r;
18250 }
18251 
18252 /* Normalize the values in each word to 32.
18253  *
18254  * a  Array of sp_digit to normalize.
18255  */
18256 #define sp_256_norm_8(a)
18257 
18258 /* Conditionally subtract b from a using the mask m.
18259  * m is -1 to subtract and 0 when not copying.
18260  *
18261  * r  A single precision number representing condition subtract result.
18262  * a  A single precision number to subtract from.
18263  * b  A single precision number to subtract.
18264  * m  Mask value to apply.
18265  */
sp_256_cond_sub_8(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)18266 SP_NOINLINE static sp_digit sp_256_cond_sub_8(sp_digit* r, const sp_digit* a,
18267         const sp_digit* b, sp_digit m)
18268 {
18269     sp_digit c = 0;
18270 
18271     __asm__ __volatile__ (
18272         "mov	r5, #32\n\t"
18273         "mov	r9, r5\n\t"
18274         "mov	r8, #0\n\t"
18275         "\n1:\n\t"
18276         "ldr	r6, [%[b], r8]\n\t"
18277         "and	r6, r6, %[m]\n\t"
18278         "mov	r5, #0\n\t"
18279         "subs	r5, r5, %[c]\n\t"
18280         "ldr	r5, [%[a], r8]\n\t"
18281         "sbcs	r5, r5, r6\n\t"
18282         "sbcs	%[c], %[c], %[c]\n\t"
18283         "str	r5, [%[r], r8]\n\t"
18284         "add	r8, r8, #4\n\t"
18285         "cmp	r8, r9\n\t"
18286 #ifdef __GNUC__
18287         "blt	1b\n\t"
18288 #else
18289         "blt.n	1b\n\t"
18290 #endif /* __GNUC__ */
18291         : [c] "+r" (c)
18292         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
18293         : "memory", "r5", "r6", "r8", "r9"
18294     );
18295 
18296     return c;
18297 }
18298 
18299 /* Reduce the number back to 256 bits using Montgomery reduction.
18300  *
18301  * a   A single precision number to reduce in place.
18302  * m   The single precision number representing the modulus.
18303  * mp  The digit representing the negative inverse of m mod 2^n.
18304  */
sp_256_mont_reduce_8(sp_digit * a,const sp_digit * m,sp_digit mp)18305 SP_NOINLINE static void sp_256_mont_reduce_8(sp_digit* a, const sp_digit* m,
18306         sp_digit mp)
18307 {
18308     (void)mp;
18309     (void)m;
18310 
18311     __asm__ __volatile__ (
18312         "mov	r2, #0\n\t"
18313         "mov	r1, #0\n\t"
18314         /* i = 0 */
18315         "mov	r9, r2\n\t"
18316         "\n1:\n\t"
18317         "mov	r4, #0\n\t"
18318         /* mu = a[i] * 1 (mp) = a[i] */
18319         "ldr	r3, [%[a]]\n\t"
18320         /* a[i] += -1 * mu = -1 * a[i] => a[i] = 0 no carry */
18321         /* a[i+1] += -1 * mu */
18322         "ldr	r6, [%[a], #4]\n\t"
18323         "mov	r5, #0\n\t"
18324         "adds	r4, r4, r6\n\t"
18325         "adc	r5, r5, r2\n\t"
18326         "str	r4, [%[a], #4]\n\t"
18327         /* a[i+2] += -1 * mu */
18328         "ldr	r6, [%[a], #8]\n\t"
18329         "mov	r4, #0\n\t"
18330         "adds	r5, r5, r6\n\t"
18331         "adc	r4, r4, r2\n\t"
18332         "str	r5, [%[a], #8]\n\t"
18333         /* a[i+3] += 0 * mu */
18334         "ldr	r6, [%[a], #12]\n\t"
18335         "mov	r5, #0\n\t"
18336         "adds	r4, r4, r3\n\t"
18337         "adc	r5, r5, r2\n\t"
18338         "adds	r4, r4, r6\n\t"
18339         "adc	r5, r5, r2\n\t"
18340         "str	r4, [%[a], #12]\n\t"
18341         /* a[i+4] += 0 * mu */
18342         "ldr	r6, [%[a], #16]\n\t"
18343         "mov	r4, #0\n\t"
18344         "adds	r5, r5, r6\n\t"
18345         "adc	r4, r4, r2\n\t"
18346         "str	r5, [%[a], #16]\n\t"
18347         /* a[i+5] += 0 * mu */
18348         "ldr	r6, [%[a], #20]\n\t"
18349         "mov	r5, #0\n\t"
18350         "adds	r4, r4, r6\n\t"
18351         "adc	r5, r5, r2\n\t"
18352         "str	r4, [%[a], #20]\n\t"
18353         /* a[i+6] += 1 * mu */
18354         "ldr	r6, [%[a], #24]\n\t"
18355         "mov	r4, #0\n\t"
18356         "adds	r5, r5, r3\n\t"
18357         "adc	r4, r4, r2\n\t"
18358         "adds	r5, r5, r6\n\t"
18359         "adc	r4, r4, r2\n\t"
18360         "str	r5, [%[a], #24]\n\t"
18361         /* a[i+7] += -1 * mu */
18362         "ldr	r6, [%[a], #28]\n\t"
18363         "ldr	r8, [%[a], #32]\n\t"
18364         "adds	r5, r1, r3\n\t"
18365         "mov	r1, #0\n\t"
18366         "adc	r1, r1, r2\n\t"
18367         "subs	r4, r4, r3\n\t"
18368         "sbcs	r5, r5, r2\n\t"
18369         "sbc	r1, r1, r2\n\t"
18370         "adds	r4, r4, r6\n\t"
18371         "adcs	r5, r5, r8\n\t"
18372         "adc	r1, r1, r2\n\t"
18373         "str	r4, [%[a],  #28]\n\t"
18374         "str	r5, [%[a], #32]\n\t"
18375         /* i += 1 */
18376         "add	r9, r9, #1\n\t"
18377         "add	%[a], %[a], #4\n\t"
18378         "mov	r6, #8\n\t"
18379         "cmp	r9, r6\n\t"
18380 #ifdef __GNUC__
18381         "blt	1b\n\t"
18382 #else
18383         "blt.n	1b\n\t"
18384 #endif /* __GNUC__ */
18385         "sub	%[a], %[a], #32\n\t"
18386         "mov	r3, r1\n\t"
18387         "sub	r1, r1, #1\n\t"
18388         "mvn	r1, r1\n\t"
18389         "ldr	r4, [%[a],#32]\n\t"
18390         "ldr	r5, [%[a],#36]\n\t"
18391         "ldr	r6, [%[a],#40]\n\t"
18392         "ldr	r8, [%[a],#44]\n\t"
18393         "ldr	r9, [%[a],#48]\n\t"
18394         "ldr	r10, [%[a],#52]\n\t"
18395         "ldr	r11, [%[a],#56]\n\t"
18396         "ldr	r14, [%[a],#60]\n\t"
18397         "subs	r4, r4, r1\n\t"
18398         "sbcs	r5, r5, r1\n\t"
18399         "sbcs	r6, r6, r1\n\t"
18400         "sbcs	r8, r8, r2\n\t"
18401         "sbcs	r9, r9, r2\n\t"
18402         "sbcs	r10, r10, r2\n\t"
18403         "sbcs	r11, r11, r3\n\t"
18404         "sbc	r14, r14, r1\n\t"
18405         "str	r4, [%[a],#0]\n\t"
18406         "str	r5, [%[a],#4]\n\t"
18407         "str	r6, [%[a],#8]\n\t"
18408         "str	r8, [%[a],#12]\n\t"
18409         "str	r9, [%[a],#16]\n\t"
18410         "str	r10, [%[a],#20]\n\t"
18411         "str	r11, [%[a],#24]\n\t"
18412         "str	r14, [%[a],#28]\n\t"
18413         : [a] "+r" (a)
18414         :
18415         : "memory", "r1", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14"
18416     );
18417 
18418 
18419     (void)m;
18420     (void)mp;
18421 }
18422 
18423 /* Reduce the number back to 256 bits using Montgomery reduction.
18424  *
18425  * a   A single precision number to reduce in place.
18426  * m   The single precision number representing the modulus.
18427  * mp  The digit representing the negative inverse of m mod 2^n.
18428  */
sp_256_mont_reduce_order_8(sp_digit * a,const sp_digit * m,sp_digit mp)18429 SP_NOINLINE static void sp_256_mont_reduce_order_8(sp_digit* a, const sp_digit* m,
18430         sp_digit mp)
18431 {
18432     sp_digit ca = 0;
18433 
18434     __asm__ __volatile__ (
18435         "mov	r9, %[mp]\n\t"
18436         "mov	r12, %[m]\n\t"
18437         "mov	r10, %[a]\n\t"
18438         "mov	r4, #0\n\t"
18439         "add	r11, r10, #32\n\t"
18440         "\n1:\n\t"
18441         /* mu = a[i] * mp */
18442         "mov	%[mp], r9\n\t"
18443         "ldr	%[a], [r10]\n\t"
18444         "mul	%[mp], %[mp], %[a]\n\t"
18445         "mov	%[m], r12\n\t"
18446         "add	r14, r10, #24\n\t"
18447         "\n2:\n\t"
18448         /* a[i+j] += m[j] * mu */
18449         "ldr	%[a], [r10]\n\t"
18450         "mov	r5, #0\n\t"
18451         /* Multiply m[j] and mu - Start */
18452         "ldr	r8, [%[m]], #4\n\t"
18453         "umull	r6, r8, %[mp], r8\n\t"
18454         "adds	%[a], %[a], r6\n\t"
18455         "adc	r5, r5, r8\n\t"
18456         /* Multiply m[j] and mu - Done */
18457         "adds	r4, r4, %[a]\n\t"
18458         "adc	r5, r5, #0\n\t"
18459         "str	r4, [r10], #4\n\t"
18460         /* a[i+j+1] += m[j+1] * mu */
18461         "ldr	%[a], [r10]\n\t"
18462         "mov	r4, #0\n\t"
18463         /* Multiply m[j] and mu - Start */
18464         "ldr	r8, [%[m]], #4\n\t"
18465         "umull	r6, r8, %[mp], r8\n\t"
18466         "adds	%[a], %[a], r6\n\t"
18467         "adc	r4, r4, r8\n\t"
18468         /* Multiply m[j] and mu - Done */
18469         "adds	r5, r5, %[a]\n\t"
18470         "adc	r4, r4, #0\n\t"
18471         "str	r5, [r10], #4\n\t"
18472         "cmp	r10, r14\n\t"
18473 #ifdef __GNUC__
18474         "blt	2b\n\t"
18475 #else
18476         "blt.n	2b\n\t"
18477 #endif /* __GNUC__ */
18478         /* a[i+6] += m[6] * mu */
18479         "ldr	%[a], [r10]\n\t"
18480         "mov	r5, #0\n\t"
18481         /* Multiply m[j] and mu - Start */
18482         "ldr	r8, [%[m]], #4\n\t"
18483         "umull	r6, r8, %[mp], r8\n\t"
18484         "adds	%[a], %[a], r6\n\t"
18485         "adc	r5, r5, r8\n\t"
18486         /* Multiply m[j] and mu - Done */
18487         "adds	r4, r4, %[a]\n\t"
18488         "adc	r5, r5, #0\n\t"
18489         "str	r4, [r10], #4\n\t"
18490         /* a[i+7] += m[7] * mu */
18491         "mov	r4, %[ca]\n\t"
18492         "mov	%[ca], #0\n\t"
18493         /* Multiply m[7] and mu - Start */
18494         "ldr	r8, [%[m]]\n\t"
18495         "umull	r6, r8, %[mp], r8\n\t"
18496         "adds	r5, r5, r6\n\t"
18497         "adcs 	r4, r4, r8\n\t"
18498         "adc	%[ca], %[ca], #0\n\t"
18499         /* Multiply m[7] and mu - Done */
18500         "ldr	r6, [r10]\n\t"
18501         "ldr	r8, [r10, #4]\n\t"
18502         "adds	r6, r6, r5\n\t"
18503         "adcs	r8, r8, r4\n\t"
18504         "adc	%[ca], %[ca], #0\n\t"
18505         "str	r6, [r10]\n\t"
18506         "str	r8, [r10, #4]\n\t"
18507         /* Next word in a */
18508         "sub	r10, r10, #24\n\t"
18509         "cmp	r10, r11\n\t"
18510 #ifdef __GNUC__
18511         "blt	1b\n\t"
18512 #else
18513         "blt.n	1b\n\t"
18514 #endif /* __GNUC__ */
18515         "mov	%[a], r10\n\t"
18516         "mov	%[m], r12\n\t"
18517         : [ca] "+r" (ca), [a] "+r" (a)
18518         : [m] "r" (m), [mp] "r" (mp)
18519         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
18520     );
18521 
18522     sp_256_cond_sub_8(a - 8, a, m, (sp_digit)0 - ca);
18523 }
18524 
18525 /* Map the Montgomery form projective coordinate point to an affine point.
18526  *
18527  * r  Resulting affine coordinate point.
18528  * p  Montgomery form projective coordinate point.
18529  * t  Temporary ordinate data.
18530  */
sp_256_map_8(sp_point_256 * r,const sp_point_256 * p,sp_digit * t)18531 static void sp_256_map_8(sp_point_256* r, const sp_point_256* p,
18532     sp_digit* t)
18533 {
18534     sp_digit* t1 = t;
18535     sp_digit* t2 = t + 2*8;
18536     sp_int32 n;
18537 
18538     sp_256_mont_inv_8(t1, p->z, t + 2*8);
18539 
18540     sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
18541     sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
18542 
18543     /* x /= z^2 */
18544     sp_256_mont_mul_8(r->x, p->x, t2, p256_mod, p256_mp_mod);
18545     XMEMSET(r->x + 8, 0, sizeof(r->x) / 2U);
18546     sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod);
18547     /* Reduce x to less than modulus */
18548     n = sp_256_cmp_8(r->x, p256_mod);
18549     sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ?
18550                 (sp_digit)1 : (sp_digit)0));
18551     sp_256_norm_8(r->x);
18552 
18553     /* y /= z^3 */
18554     sp_256_mont_mul_8(r->y, p->y, t1, p256_mod, p256_mp_mod);
18555     XMEMSET(r->y + 8, 0, sizeof(r->y) / 2U);
18556     sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod);
18557     /* Reduce y to less than modulus */
18558     n = sp_256_cmp_8(r->y, p256_mod);
18559     sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ?
18560                 (sp_digit)1 : (sp_digit)0));
18561     sp_256_norm_8(r->y);
18562 
18563     XMEMSET(r->z, 0, sizeof(r->z));
18564     r->z[0] = 1;
18565 
18566 }
18567 
18568 /* Add two Montgomery form numbers (r = a + b % m).
18569  *
18570  * r   Result of addition.
18571  * a   First number to add in Montgomery form.
18572  * b   Second number to add in Montgomery form.
18573  * m   Modulus (prime).
18574  */
sp_256_mont_add_8(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m)18575 SP_NOINLINE static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
18576         const sp_digit* m)
18577 {
18578     (void)m;
18579 
18580     __asm__ __volatile__ (
18581         "mov   r12, #0\n\t"
18582         "ldr       r4, [%[a],#0]\n\t"
18583         "ldr       r5, [%[a],#4]\n\t"
18584         "ldr       r6, [%[a],#8]\n\t"
18585         "ldr       r8, [%[a],#12]\n\t"
18586         "ldr       r9, [%[b],#0]\n\t"
18587         "ldr       r10, [%[b],#4]\n\t"
18588         "ldr       r11, [%[b],#8]\n\t"
18589         "ldr       r14, [%[b],#12]\n\t"
18590         "adds    r4, r4, r9\n\t"
18591         "adcs    r5, r5, r10\n\t"
18592         "adcs    r6, r6, r11\n\t"
18593         "adcs    r8, r8, r14\n\t"
18594         "str       r4, [%[r],#0]\n\t"
18595         "str       r5, [%[r],#4]\n\t"
18596         "str       r6, [%[r],#8]\n\t"
18597         "str       r8, [%[r],#12]\n\t"
18598         "ldr       r4, [%[a],#16]\n\t"
18599         "ldr       r5, [%[a],#20]\n\t"
18600         "ldr       r6, [%[a],#24]\n\t"
18601         "ldr       r8, [%[a],#28]\n\t"
18602         "ldr       r9, [%[b],#16]\n\t"
18603         "ldr       r10, [%[b],#20]\n\t"
18604         "ldr       r11, [%[b],#24]\n\t"
18605         "ldr       r14, [%[b],#28]\n\t"
18606         "adcs    r4, r4, r9\n\t"
18607         "adcs    r5, r5, r10\n\t"
18608         "adcs    r6, r6, r11\n\t"
18609         "adcs    r8, r8, r14\n\t"
18610         "adc   r3, r12, #0\n\t"
18611         "sub   r3, r12, r3\n\t"
18612         "and   r12, r3, #1\n\t"
18613         "ldr   r9, [%[r],#0]\n\t"
18614         "ldr   r10, [%[r],#4]\n\t"
18615         "ldr   r11, [%[r],#8]\n\t"
18616         "ldr   r14, [%[r],#12]\n\t"
18617         "subs  r9, r9, r3\n\t"
18618         "sbcs  r10, r10, r3\n\t"
18619         "sbcs  r11, r11, r3\n\t"
18620         "sbcs  r14, r14, #0\n\t"
18621         "sbcs  r4, r4, #0\n\t"
18622         "sbcs  r5, r5, #0\n\t"
18623         "sbcs  r6, r6, r12\n\t"
18624         "sbc   r8, r8, r3\n\t"
18625         "str   r9, [%[r],#0]\n\t"
18626         "str   r10, [%[r],#4]\n\t"
18627         "str   r11, [%[r],#8]\n\t"
18628         "str   r14, [%[r],#12]\n\t"
18629         "str   r4, [%[r],#16]\n\t"
18630         "str   r5, [%[r],#20]\n\t"
18631         "str   r6, [%[r],#24]\n\t"
18632         "str   r8, [%[r],#28]\n\t"
18633         :
18634         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
18635         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14", "r3", "r12"
18636     );
18637 }
18638 
18639 /* Double a Montgomery form number (r = a + a % m).
18640  *
18641  * r   Result of doubling.
18642  * a   Number to double in Montgomery form.
18643  * m   Modulus (prime).
18644  */
sp_256_mont_dbl_8(sp_digit * r,const sp_digit * a,const sp_digit * m)18645 SP_NOINLINE static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
18646 {
18647     (void)m;
18648 
18649     __asm__ __volatile__ (
18650         "mov   r12, #0\n\t"
18651         "ldr        r4, [%[a],#0]\n\t"
18652         "ldr        r5, [%[a],#4]\n\t"
18653         "ldr        r6, [%[a],#8]\n\t"
18654         "ldr        r8, [%[a],#12]\n\t"
18655         "ldr        r9, [%[a],#16]\n\t"
18656         "ldr        r10, [%[a],#20]\n\t"
18657         "ldr        r11, [%[a],#24]\n\t"
18658         "ldr        r14, [%[a],#28]\n\t"
18659         "adds      r4, r4, r4\n\t"
18660         "adcs      r5, r5, r5\n\t"
18661         "adcs      r6, r6, r6\n\t"
18662         "adcs      r8, r8, r8\n\t"
18663         "adcs      r9, r9, r9\n\t"
18664         "adcs      r10, r10, r10\n\t"
18665         "adcs      r11, r11, r11\n\t"
18666         "adcs      r14, r14, r14\n\t"
18667         "adc   r3, r12, #0\n\t"
18668         "sub   r3, r12, r3\n\t"
18669         "and   r12, r3, #1\n\t"
18670         "subs  r4, r4, r3\n\t"
18671         "sbcs  r5, r5, r3\n\t"
18672         "sbcs  r6, r6, r3\n\t"
18673         "sbcs  r8, r8, #0\n\t"
18674         "sbcs  r9, r9, #0\n\t"
18675         "sbcs  r10, r10, #0\n\t"
18676         "sbcs  r11, r11, r12\n\t"
18677         "sbc   r14, r14, r3\n\t"
18678         "str   r4, [%[r],#0]\n\t"
18679         "str   r5, [%[r],#4]\n\t"
18680         "str   r6, [%[r],#8]\n\t"
18681         "str   r8, [%[r],#12]\n\t"
18682         "str   r9, [%[r],#16]\n\t"
18683         "str   r10, [%[r],#20]\n\t"
18684         "str   r11, [%[r],#24]\n\t"
18685         "str   r14, [%[r],#28]\n\t"
18686         :
18687         : [r] "r" (r), [a] "r" (a)
18688         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14", "r3", "r12"
18689     );
18690 }
18691 
18692 /* Triple a Montgomery form number (r = a + a + a % m).
18693  *
18694  * r   Result of Tripling.
18695  * a   Number to triple in Montgomery form.
18696  * m   Modulus (prime).
18697  */
sp_256_mont_tpl_8(sp_digit * r,const sp_digit * a,const sp_digit * m)18698 SP_NOINLINE static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
18699 {
18700     (void)m;
18701 
18702     __asm__ __volatile__ (
18703         "ldr	r2, [%[a],#0]\n\t"
18704         "ldr	r3, [%[a],#4]\n\t"
18705         "ldr	r4, [%[a],#8]\n\t"
18706         "ldr	r5, [%[a],#12]\n\t"
18707         "ldr	r6, [%[a],#16]\n\t"
18708         "ldr	r8, [%[a],#20]\n\t"
18709         "ldr	r9, [%[a],#24]\n\t"
18710         "ldr	r10, [%[a],#28]\n\t"
18711         "adds	r2, r2, r2\n\t"
18712         "adcs	r3, r3, r3\n\t"
18713         "adcs	r4, r4, r4\n\t"
18714         "adcs	r5, r5, r5\n\t"
18715         "adcs	r6, r6, r6\n\t"
18716         "adcs	r8, r8, r8\n\t"
18717         "adcs	r9, r9, r9\n\t"
18718         "adcs	r10, r10, r10\n\t"
18719         "mov	r11, #0\n\t"
18720         "mov	r14, #0\n\t"
18721         "adc	r11, r11, r11\n\t"
18722         "mov	r12, r11\n\t"
18723         "sub	r11, r11, #1\n\t"
18724         "mvn	r11, r11\n\t"
18725         "subs	r2, r2, r11\n\t"
18726         "sbcs	r3, r3, r11\n\t"
18727         "sbcs	r4, r4, r11\n\t"
18728         "sbcs	r5, r5, r14\n\t"
18729         "sbcs	r6, r6, r14\n\t"
18730         "sbcs	r8, r8, r14\n\t"
18731         "sbcs	r9, r9, r12\n\t"
18732         "sbc	r10, r10, r11\n\t"
18733         "ldr	r12, [%[a],#0]\n\t"
18734         "ldr	r14, [%[a],#4]\n\t"
18735         "adds	r2, r2, r12\n\t"
18736         "adcs	r3, r3, r14\n\t"
18737         "ldr	r12, [%[a],#8]\n\t"
18738         "ldr	r14, [%[a],#12]\n\t"
18739         "adcs	r4, r4, r12\n\t"
18740         "adcs	r5, r5, r14\n\t"
18741         "ldr	r12, [%[a],#16]\n\t"
18742         "ldr	r14, [%[a],#20]\n\t"
18743         "adcs	r6, r6, r12\n\t"
18744         "adcs	r8, r8, r14\n\t"
18745         "ldr	r12, [%[a],#24]\n\t"
18746         "ldr	r14, [%[a],#28]\n\t"
18747         "adcs	r9, r9, r12\n\t"
18748         "adcs	r10, r10, r14\n\t"
18749         "mov	r11, #0\n\t"
18750         "mov	r14, #0\n\t"
18751         "adc	r11, r11, r11\n\t"
18752         "mov	r12, r11\n\t"
18753         "sub	r11, r11, #1\n\t"
18754         "mvn	r11, r11\n\t"
18755         "subs	r2, r2, r11\n\t"
18756         "str	r2, [%[r],#0]\n\t"
18757         "sbcs	r3, r3, r11\n\t"
18758         "str	r3, [%[r],#4]\n\t"
18759         "sbcs	r4, r4, r11\n\t"
18760         "str	r4, [%[r],#8]\n\t"
18761         "sbcs	r5, r5, r14\n\t"
18762         "str	r5, [%[r],#12]\n\t"
18763         "sbcs	r6, r6, r14\n\t"
18764         "str	r6, [%[r],#16]\n\t"
18765         "sbcs	r8, r8, r14\n\t"
18766         "str	r8, [%[r],#20]\n\t"
18767         "sbcs	r9, r9, r12\n\t"
18768         "str	r9, [%[r],#24]\n\t"
18769         "sbc	r10, r10, r11\n\t"
18770         "str	r10, [%[r],#28]\n\t"
18771         :
18772         : [r] "r" (r), [a] "r" (a)
18773         : "memory", "r11", "r12", "r14", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10"
18774     );
18775 }
18776 
18777 /* Subtract two Montgomery form numbers (r = a - b % m).
18778  *
18779  * r   Result of subtration.
18780  * a   Number to subtract from in Montgomery form.
18781  * b   Number to subtract with in Montgomery form.
18782  * m   Modulus (prime).
18783  */
sp_256_mont_sub_8(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m)18784 SP_NOINLINE static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
18785         const sp_digit* m)
18786 {
18787     (void)m;
18788 
18789     __asm__ __volatile__ (
18790         "mov   r12, #0\n\t"
18791         "ldr       r4, [%[a],#0]\n\t"
18792         "ldr       r5, [%[a],#4]\n\t"
18793         "ldr       r6, [%[a],#8]\n\t"
18794         "ldr       r8, [%[a],#12]\n\t"
18795         "ldr       r9, [%[b],#0]\n\t"
18796         "ldr       r10, [%[b],#4]\n\t"
18797         "ldr       r11, [%[b],#8]\n\t"
18798         "ldr       r14, [%[b],#12]\n\t"
18799         "subs    r4, r4, r9\n\t"
18800         "sbcs    r5, r5, r10\n\t"
18801         "sbcs    r6, r6, r11\n\t"
18802         "sbcs    r8, r8, r14\n\t"
18803         "str       r4, [%[r],#0]\n\t"
18804         "str       r5, [%[r],#4]\n\t"
18805         "str       r6, [%[r],#8]\n\t"
18806         "str       r8, [%[r],#12]\n\t"
18807         "ldr       r4, [%[a],#16]\n\t"
18808         "ldr       r5, [%[a],#20]\n\t"
18809         "ldr       r6, [%[a],#24]\n\t"
18810         "ldr       r8, [%[a],#28]\n\t"
18811         "ldr       r9, [%[b],#16]\n\t"
18812         "ldr       r10, [%[b],#20]\n\t"
18813         "ldr       r11, [%[b],#24]\n\t"
18814         "ldr       r14, [%[b],#28]\n\t"
18815         "sbcs    r4, r4, r9\n\t"
18816         "sbcs    r5, r5, r10\n\t"
18817         "sbcs    r6, r6, r11\n\t"
18818         "sbcs    r8, r8, r14\n\t"
18819         "sbc   r3, r12, #0\n\t"
18820         "and   r12, r3, #1\n\t"
18821         "ldr   r9, [%[r],#0]\n\t"
18822         "ldr   r10, [%[r],#4]\n\t"
18823         "ldr   r11, [%[r],#8]\n\t"
18824         "ldr   r14, [%[r],#12]\n\t"
18825         "adds  r9, r9, r3\n\t"
18826         "adcs  r10, r10, r3\n\t"
18827         "adcs  r11, r11, r3\n\t"
18828         "adcs  r14, r14, #0\n\t"
18829         "adcs  r4, r4, #0\n\t"
18830         "adcs  r5, r5, #0\n\t"
18831         "adcs  r6, r6, r12\n\t"
18832         "adc   r8, r8, r3\n\t"
18833         "str   r9, [%[r],#0]\n\t"
18834         "str   r10, [%[r],#4]\n\t"
18835         "str   r11, [%[r],#8]\n\t"
18836         "str   r14, [%[r],#12]\n\t"
18837         "str   r4, [%[r],#16]\n\t"
18838         "str   r5, [%[r],#20]\n\t"
18839         "str   r6, [%[r],#24]\n\t"
18840         "str   r8, [%[r],#28]\n\t"
18841         :
18842         : [r] "r" (r), [a] "r" (a), [b] "r" (b)
18843         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r14", "r3", "r12"
18844     );
18845 }
18846 
18847 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
18848  *
18849  * r  Result of division by 2.
18850  * a  Number to divide.
18851  * m  Modulus (prime).
18852  */
sp_256_div2_8(sp_digit * r,const sp_digit * a,const sp_digit * m)18853 SP_NOINLINE static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
18854 {
18855     __asm__ __volatile__ (
18856         "ldr	r8, [%[a], #0]\n\t"
18857         "lsl	r8, r8, #31\n\t"
18858         "lsr	r8, r8, #31\n\t"
18859         "mov	r5, #0\n\t"
18860         "sub	r5, r5, r8\n\t"
18861         "mov	r8, #0\n\t"
18862         "lsl	r6, r5, #31\n\t"
18863         "lsr	r6, r6, #31\n\t"
18864         "ldr	r3, [%[a], #0]\n\t"
18865         "ldr	r4, [%[a], #4]\n\t"
18866         "adds	r3, r3, r5\n\t"
18867         "adcs	r4, r4, r5\n\t"
18868         "str	r3, [%[r], #0]\n\t"
18869         "str	r4, [%[r], #4]\n\t"
18870         "ldr	r3, [%[a], #8]\n\t"
18871         "ldr	r4, [%[a], #12]\n\t"
18872         "adcs	r3, r3, r5\n\t"
18873         "adcs	r4, r4, r8\n\t"
18874         "str	r3, [%[r], #8]\n\t"
18875         "str	r4, [%[r], #12]\n\t"
18876         "ldr	r3, [%[a], #16]\n\t"
18877         "ldr	r4, [%[a], #20]\n\t"
18878         "adcs	r3, r3, r8\n\t"
18879         "adcs	r4, r4, r8\n\t"
18880         "str	r3, [%[r], #16]\n\t"
18881         "str	r4, [%[r], #20]\n\t"
18882         "ldr	r3, [%[a], #24]\n\t"
18883         "ldr	r4, [%[a], #28]\n\t"
18884         "adcs	r3, r3, r6\n\t"
18885         "adcs	r4, r4, r5\n\t"
18886         "adc	r8, r8, r8\n\t"
18887         "lsl	r8, r8, #31\n\t"
18888         "lsr	r5, r3, #1\n\t"
18889         "lsl	r3, r3, #31\n\t"
18890         "lsr	r6, r4, #1\n\t"
18891         "lsl	r4, r4, #31\n\t"
18892         "orr	r5, r5, r4\n\t"
18893         "orr	r6, r6, r8\n\t"
18894         "mov	r8, r3\n\t"
18895         "str	r5, [%[r], #24]\n\t"
18896         "str	r6, [%[r], #28]\n\t"
18897         "ldr	r3, [%[a], #16]\n\t"
18898         "ldr	r4, [%[a], #20]\n\t"
18899         "lsr	r5, r3, #1\n\t"
18900         "lsl	r3, r3, #31\n\t"
18901         "lsr	r6, r4, #1\n\t"
18902         "lsl	r4, r4, #31\n\t"
18903         "orr	r5, r5, r4\n\t"
18904         "orr	r6, r6, r8\n\t"
18905         "mov	r8, r3\n\t"
18906         "str	r5, [%[r], #16]\n\t"
18907         "str	r6, [%[r], #20]\n\t"
18908         "ldr	r3, [%[a], #8]\n\t"
18909         "ldr	r4, [%[a], #12]\n\t"
18910         "lsr	r5, r3, #1\n\t"
18911         "lsl	r3, r3, #31\n\t"
18912         "lsr	r6, r4, #1\n\t"
18913         "lsl	r4, r4, #31\n\t"
18914         "orr	r5, r5, r4\n\t"
18915         "orr	r6, r6, r8\n\t"
18916         "mov	r8, r3\n\t"
18917         "str	r5, [%[r], #8]\n\t"
18918         "str	r6, [%[r], #12]\n\t"
18919         "ldr	r3, [%[r], #0]\n\t"
18920         "ldr	r4, [%[r], #4]\n\t"
18921         "lsr	r5, r3, #1\n\t"
18922         "lsr	r6, r4, #1\n\t"
18923         "lsl	r4, r4, #31\n\t"
18924         "orr	r5, r5, r4\n\t"
18925         "orr	r6, r6, r8\n\t"
18926         "str	r5, [%[r], #0]\n\t"
18927         "str	r6, [%[r], #4]\n\t"
18928         :
18929         : [r] "r" (r), [a] "r" (a), [m] "r" (m)
18930         : "memory", "r3", "r4", "r5", "r6", "r8"
18931     );
18932 }
18933 
18934 /* Double the Montgomery form projective point p.
18935  *
18936  * r  Result of doubling point.
18937  * p  Point to double.
18938  * t  Temporary ordinate data.
18939  */
18940 #ifdef WOLFSSL_SP_NONBLOCK
18941 typedef struct sp_256_proj_point_dbl_8_ctx {
18942     int state;
18943     sp_digit* t1;
18944     sp_digit* t2;
18945     sp_digit* x;
18946     sp_digit* y;
18947     sp_digit* z;
18948 } sp_256_proj_point_dbl_8_ctx;
18949 
sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t * sp_ctx,sp_point_256 * r,const sp_point_256 * p,sp_digit * t)18950 static int sp_256_proj_point_dbl_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, const sp_point_256* p, sp_digit* t)
18951 {
18952     int err = FP_WOULDBLOCK;
18953     sp_256_proj_point_dbl_8_ctx* ctx = (sp_256_proj_point_dbl_8_ctx*)sp_ctx->data;
18954 
18955     typedef char ctx_size_test[sizeof(sp_256_proj_point_dbl_8_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
18956     (void)sizeof(ctx_size_test);
18957 
18958     switch (ctx->state) {
18959     case 0:
18960         ctx->t1 = t;
18961         ctx->t2 = t + 2*8;
18962         ctx->x = r->x;
18963         ctx->y = r->y;
18964         ctx->z = r->z;
18965 
18966         /* Put infinity into result. */
18967         if (r != p) {
18968             r->infinity = p->infinity;
18969         }
18970         ctx->state = 1;
18971         break;
18972     case 1:
18973         /* T1 = Z * Z */
18974         sp_256_mont_sqr_8(ctx->t1, p->z, p256_mod, p256_mp_mod);
18975         ctx->state = 2;
18976         break;
18977     case 2:
18978         /* Z = Y * Z */
18979         sp_256_mont_mul_8(ctx->z, p->y, p->z, p256_mod, p256_mp_mod);
18980         ctx->state = 3;
18981         break;
18982     case 3:
18983         /* Z = 2Z */
18984         sp_256_mont_dbl_8(ctx->z, ctx->z, p256_mod);
18985         ctx->state = 4;
18986         break;
18987     case 4:
18988         /* T2 = X - T1 */
18989         sp_256_mont_sub_8(ctx->t2, p->x, ctx->t1, p256_mod);
18990         ctx->state = 5;
18991         break;
18992     case 5:
18993         /* T1 = X + T1 */
18994         sp_256_mont_add_8(ctx->t1, p->x, ctx->t1, p256_mod);
18995         ctx->state = 6;
18996         break;
18997     case 6:
18998         /* T2 = T1 * T2 */
18999         sp_256_mont_mul_8(ctx->t2, ctx->t1, ctx->t2, p256_mod, p256_mp_mod);
19000         ctx->state = 7;
19001         break;
19002     case 7:
19003         /* T1 = 3T2 */
19004         sp_256_mont_tpl_8(ctx->t1, ctx->t2, p256_mod);
19005         ctx->state = 8;
19006         break;
19007     case 8:
19008         /* Y = 2Y */
19009         sp_256_mont_dbl_8(ctx->y, p->y, p256_mod);
19010         ctx->state = 9;
19011         break;
19012     case 9:
19013         /* Y = Y * Y */
19014         sp_256_mont_sqr_8(ctx->y, ctx->y, p256_mod, p256_mp_mod);
19015         ctx->state = 10;
19016         break;
19017     case 10:
19018         /* T2 = Y * Y */
19019         sp_256_mont_sqr_8(ctx->t2, ctx->y, p256_mod, p256_mp_mod);
19020         ctx->state = 11;
19021         break;
19022     case 11:
19023         /* T2 = T2/2 */
19024         sp_256_div2_8(ctx->t2, ctx->t2, p256_mod);
19025         ctx->state = 12;
19026         break;
19027     case 12:
19028         /* Y = Y * X */
19029         sp_256_mont_mul_8(ctx->y, ctx->y, p->x, p256_mod, p256_mp_mod);
19030         ctx->state = 13;
19031         break;
19032     case 13:
19033         /* X = T1 * T1 */
19034         sp_256_mont_sqr_8(ctx->x, ctx->t1, p256_mod, p256_mp_mod);
19035         ctx->state = 14;
19036         break;
19037     case 14:
19038         /* X = X - Y */
19039         sp_256_mont_sub_8(ctx->x, ctx->x, ctx->y, p256_mod);
19040         ctx->state = 15;
19041         break;
19042     case 15:
19043         /* X = X - Y */
19044         sp_256_mont_sub_8(ctx->x, ctx->x, ctx->y, p256_mod);
19045         ctx->state = 16;
19046         break;
19047     case 16:
19048         /* Y = Y - X */
19049         sp_256_mont_sub_8(ctx->y, ctx->y, ctx->x, p256_mod);
19050         ctx->state = 17;
19051         break;
19052     case 17:
19053         /* Y = Y * T1 */
19054         sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t1, p256_mod, p256_mp_mod);
19055         ctx->state = 18;
19056         break;
19057     case 18:
19058         /* Y = Y - T2 */
19059         sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t2, p256_mod);
19060         ctx->state = 19;
19061         /* fall-through */
19062     case 19:
19063         err = MP_OKAY;
19064         break;
19065     }
19066 
19067     if (err == MP_OKAY && ctx->state != 19) {
19068         err = FP_WOULDBLOCK;
19069     }
19070 
19071     return err;
19072 }
19073 #endif /* WOLFSSL_SP_NONBLOCK */
19074 
sp_256_proj_point_dbl_8(sp_point_256 * r,const sp_point_256 * p,sp_digit * t)19075 static void sp_256_proj_point_dbl_8(sp_point_256* r, const sp_point_256* p, sp_digit* t)
19076 {
19077     sp_digit* t1 = t;
19078     sp_digit* t2 = t + 2*8;
19079     sp_digit* x;
19080     sp_digit* y;
19081     sp_digit* z;
19082 
19083     x = r->x;
19084     y = r->y;
19085     z = r->z;
19086     /* Put infinity into result. */
19087     if (r != p) {
19088         r->infinity = p->infinity;
19089     }
19090 
19091     /* T1 = Z * Z */
19092     sp_256_mont_sqr_8(t1, p->z, p256_mod, p256_mp_mod);
19093     /* Z = Y * Z */
19094     sp_256_mont_mul_8(z, p->y, p->z, p256_mod, p256_mp_mod);
19095     /* Z = 2Z */
19096     sp_256_mont_dbl_8(z, z, p256_mod);
19097     /* T2 = X - T1 */
19098     sp_256_mont_sub_8(t2, p->x, t1, p256_mod);
19099     /* T1 = X + T1 */
19100     sp_256_mont_add_8(t1, p->x, t1, p256_mod);
19101     /* T2 = T1 * T2 */
19102     sp_256_mont_mul_8(t2, t1, t2, p256_mod, p256_mp_mod);
19103     /* T1 = 3T2 */
19104     sp_256_mont_tpl_8(t1, t2, p256_mod);
19105     /* Y = 2Y */
19106     sp_256_mont_dbl_8(y, p->y, p256_mod);
19107     /* Y = Y * Y */
19108     sp_256_mont_sqr_8(y, y, p256_mod, p256_mp_mod);
19109     /* T2 = Y * Y */
19110     sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
19111     /* T2 = T2/2 */
19112     sp_256_div2_8(t2, t2, p256_mod);
19113     /* Y = Y * X */
19114     sp_256_mont_mul_8(y, y, p->x, p256_mod, p256_mp_mod);
19115     /* X = T1 * T1 */
19116     sp_256_mont_sqr_8(x, t1, p256_mod, p256_mp_mod);
19117     /* X = X - Y */
19118     sp_256_mont_sub_8(x, x, y, p256_mod);
19119     /* X = X - Y */
19120     sp_256_mont_sub_8(x, x, y, p256_mod);
19121     /* Y = Y - X */
19122     sp_256_mont_sub_8(y, y, x, p256_mod);
19123     /* Y = Y * T1 */
19124     sp_256_mont_mul_8(y, y, t1, p256_mod, p256_mp_mod);
19125     /* Y = Y - T2 */
19126     sp_256_mont_sub_8(y, y, t2, p256_mod);
19127 }
19128 
19129 /* Compare two numbers to determine if they are equal.
19130  * Constant time implementation.
19131  *
19132  * a  First number to compare.
19133  * b  Second number to compare.
19134  * returns 1 when equal and 0 otherwise.
19135  */
sp_256_cmp_equal_8(const sp_digit * a,const sp_digit * b)19136 static int sp_256_cmp_equal_8(const sp_digit* a, const sp_digit* b)
19137 {
19138     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) |
19139             (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5]) |
19140             (a[6] ^ b[6]) | (a[7] ^ b[7])) == 0;
19141 }
19142 
19143 /* Add two Montgomery form projective points.
19144  *
19145  * r  Result of addition.
19146  * p  First point to add.
19147  * q  Second point to add.
19148  * t  Temporary ordinate data.
19149  */
19150 
19151 #ifdef WOLFSSL_SP_NONBLOCK
19152 typedef struct sp_256_proj_point_add_8_ctx {
19153     int state;
19154     sp_256_proj_point_dbl_8_ctx dbl_ctx;
19155     const sp_point_256* ap[2];
19156     sp_point_256* rp[2];
19157     sp_digit* t1;
19158     sp_digit* t2;
19159     sp_digit* t3;
19160     sp_digit* t4;
19161     sp_digit* t5;
19162     sp_digit* x;
19163     sp_digit* y;
19164     sp_digit* z;
19165 } sp_256_proj_point_add_8_ctx;
19166 
sp_256_proj_point_add_8_nb(sp_ecc_ctx_t * sp_ctx,sp_point_256 * r,const sp_point_256 * p,const sp_point_256 * q,sp_digit * t)19167 static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r,
19168     const sp_point_256* p, const sp_point_256* q, sp_digit* t)
19169 {
19170     int err = FP_WOULDBLOCK;
19171     sp_256_proj_point_add_8_ctx* ctx = (sp_256_proj_point_add_8_ctx*)sp_ctx->data;
19172 
19173     /* Ensure only the first point is the same as the result. */
19174     if (q == r) {
19175         const sp_point_256* a = p;
19176         p = q;
19177         q = a;
19178     }
19179 
19180     typedef char ctx_size_test[sizeof(sp_256_proj_point_add_8_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
19181     (void)sizeof(ctx_size_test);
19182 
19183     switch (ctx->state) {
19184     case 0: /* INIT */
19185         ctx->t1 = t;
19186         ctx->t2 = t + 2*8;
19187         ctx->t3 = t + 4*8;
19188         ctx->t4 = t + 6*8;
19189         ctx->t5 = t + 8*8;
19190 
19191         ctx->state = 1;
19192         break;
19193     case 1:
19194         /* Check double */
19195         (void)sp_256_sub_8(ctx->t1, p256_mod, q->y);
19196         sp_256_norm_8(ctx->t1);
19197         if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
19198             (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0)
19199         {
19200             XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
19201             ctx->state = 2;
19202         }
19203         else {
19204             ctx->state = 3;
19205         }
19206         break;
19207     case 2:
19208         err = sp_256_proj_point_dbl_8_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t);
19209         if (err == MP_OKAY)
19210             ctx->state = 27; /* done */
19211         break;
19212     case 3:
19213     {
19214         int i;
19215         ctx->rp[0] = r;
19216 
19217         /*lint allow cast to different type of pointer*/
19218         ctx->rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
19219         XMEMSET(ctx->rp[1], 0, sizeof(sp_point_256));
19220         ctx->x = ctx->rp[p->infinity | q->infinity]->x;
19221         ctx->y = ctx->rp[p->infinity | q->infinity]->y;
19222         ctx->z = ctx->rp[p->infinity | q->infinity]->z;
19223 
19224         ctx->ap[0] = p;
19225         ctx->ap[1] = q;
19226         for (i=0; i<8; i++) {
19227             r->x[i] = ctx->ap[p->infinity]->x[i];
19228         }
19229         for (i=0; i<8; i++) {
19230             r->y[i] = ctx->ap[p->infinity]->y[i];
19231         }
19232         for (i=0; i<8; i++) {
19233             r->z[i] = ctx->ap[p->infinity]->z[i];
19234         }
19235         r->infinity = ctx->ap[p->infinity]->infinity;
19236 
19237         ctx->state = 4;
19238         break;
19239     }
19240     case 4:
19241         /* U1 = X1*Z2^2 */
19242         sp_256_mont_sqr_8(ctx->t1, q->z, p256_mod, p256_mp_mod);
19243         ctx->state = 5;
19244         break;
19245     case 5:
19246         sp_256_mont_mul_8(ctx->t3, ctx->t1, q->z, p256_mod, p256_mp_mod);
19247         ctx->state = 6;
19248         break;
19249     case 6:
19250         sp_256_mont_mul_8(ctx->t1, ctx->t1, ctx->x, p256_mod, p256_mp_mod);
19251         ctx->state = 7;
19252         break;
19253     case 7:
19254         /* U2 = X2*Z1^2 */
19255         sp_256_mont_sqr_8(ctx->t2, ctx->z, p256_mod, p256_mp_mod);
19256         ctx->state = 8;
19257         break;
19258     case 8:
19259         sp_256_mont_mul_8(ctx->t4, ctx->t2, ctx->z, p256_mod, p256_mp_mod);
19260         ctx->state = 9;
19261         break;
19262     case 9:
19263         sp_256_mont_mul_8(ctx->t2, ctx->t2, q->x, p256_mod, p256_mp_mod);
19264         ctx->state = 10;
19265         break;
19266     case 10:
19267         /* S1 = Y1*Z2^3 */
19268         sp_256_mont_mul_8(ctx->t3, ctx->t3, ctx->y, p256_mod, p256_mp_mod);
19269         ctx->state = 11;
19270         break;
19271     case 11:
19272         /* S2 = Y2*Z1^3 */
19273         sp_256_mont_mul_8(ctx->t4, ctx->t4, q->y, p256_mod, p256_mp_mod);
19274         ctx->state = 12;
19275         break;
19276     case 12:
19277         /* H = U2 - U1 */
19278         sp_256_mont_sub_8(ctx->t2, ctx->t2, ctx->t1, p256_mod);
19279         ctx->state = 13;
19280         break;
19281     case 13:
19282         /* R = S2 - S1 */
19283         sp_256_mont_sub_8(ctx->t4, ctx->t4, ctx->t3, p256_mod);
19284         ctx->state = 14;
19285         break;
19286     case 14:
19287         /* Z3 = H*Z1*Z2 */
19288         sp_256_mont_mul_8(ctx->z, ctx->z, q->z, p256_mod, p256_mp_mod);
19289         ctx->state = 15;
19290         break;
19291     case 15:
19292         sp_256_mont_mul_8(ctx->z, ctx->z, ctx->t2, p256_mod, p256_mp_mod);
19293         ctx->state = 16;
19294         break;
19295     case 16:
19296         /* X3 = R^2 - H^3 - 2*U1*H^2 */
19297         sp_256_mont_sqr_8(ctx->x, ctx->t4, p256_mod, p256_mp_mod);
19298         ctx->state = 17;
19299         break;
19300     case 17:
19301         sp_256_mont_sqr_8(ctx->t5, ctx->t2, p256_mod, p256_mp_mod);
19302         ctx->state = 18;
19303         break;
19304     case 18:
19305         sp_256_mont_mul_8(ctx->y, ctx->t1, ctx->t5, p256_mod, p256_mp_mod);
19306         ctx->state = 19;
19307         break;
19308     case 19:
19309         sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t2, p256_mod, p256_mp_mod);
19310         ctx->state = 20;
19311         break;
19312     case 20:
19313         sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t5, p256_mod);
19314         ctx->state = 21;
19315         break;
19316     case 21:
19317         sp_256_mont_dbl_8(ctx->t1, ctx->y, p256_mod);
19318         ctx->state = 22;
19319         break;
19320     case 22:
19321         sp_256_mont_sub_8(ctx->x, ctx->x, ctx->t1, p256_mod);
19322         ctx->state = 23;
19323         break;
19324     case 23:
19325         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
19326         sp_256_mont_sub_8(ctx->y, ctx->y, ctx->x, p256_mod);
19327         ctx->state = 24;
19328         break;
19329     case 24:
19330         sp_256_mont_mul_8(ctx->y, ctx->y, ctx->t4, p256_mod, p256_mp_mod);
19331         ctx->state = 25;
19332         break;
19333     case 25:
19334         sp_256_mont_mul_8(ctx->t5, ctx->t5, ctx->t3, p256_mod, p256_mp_mod);
19335         ctx->state = 26;
19336         break;
19337     case 26:
19338         sp_256_mont_sub_8(ctx->y, ctx->y, ctx->t5, p256_mod);
19339         ctx->state = 27;
19340         /* fall-through */
19341     case 27:
19342         err = MP_OKAY;
19343         break;
19344     }
19345 
19346     if (err == MP_OKAY && ctx->state != 27) {
19347         err = FP_WOULDBLOCK;
19348     }
19349     return err;
19350 }
19351 #endif /* WOLFSSL_SP_NONBLOCK */
19352 
sp_256_proj_point_add_8(sp_point_256 * r,const sp_point_256 * p,const sp_point_256 * q,sp_digit * t)19353 static void sp_256_proj_point_add_8(sp_point_256* r,
19354         const sp_point_256* p, const sp_point_256* q, sp_digit* t)
19355 {
19356     const sp_point_256* ap[2];
19357     sp_point_256* rp[2];
19358     sp_digit* t1 = t;
19359     sp_digit* t2 = t + 2*8;
19360     sp_digit* t3 = t + 4*8;
19361     sp_digit* t4 = t + 6*8;
19362     sp_digit* t5 = t + 8*8;
19363     sp_digit* x;
19364     sp_digit* y;
19365     sp_digit* z;
19366     int i;
19367 
19368     /* Ensure only the first point is the same as the result. */
19369     if (q == r) {
19370         const sp_point_256* a = p;
19371         p = q;
19372         q = a;
19373     }
19374 
19375     /* Check double */
19376     (void)sp_256_sub_8(t1, p256_mod, q->y);
19377     sp_256_norm_8(t1);
19378     if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
19379         (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
19380         sp_256_proj_point_dbl_8(r, p, t);
19381     }
19382     else {
19383         rp[0] = r;
19384 
19385         /*lint allow cast to different type of pointer*/
19386         rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
19387         XMEMSET(rp[1], 0, sizeof(sp_point_256));
19388         x = rp[p->infinity | q->infinity]->x;
19389         y = rp[p->infinity | q->infinity]->y;
19390         z = rp[p->infinity | q->infinity]->z;
19391 
19392         ap[0] = p;
19393         ap[1] = q;
19394         for (i=0; i<8; i++) {
19395             r->x[i] = ap[p->infinity]->x[i];
19396         }
19397         for (i=0; i<8; i++) {
19398             r->y[i] = ap[p->infinity]->y[i];
19399         }
19400         for (i=0; i<8; i++) {
19401             r->z[i] = ap[p->infinity]->z[i];
19402         }
19403         r->infinity = ap[p->infinity]->infinity;
19404 
19405         /* U1 = X1*Z2^2 */
19406         sp_256_mont_sqr_8(t1, q->z, p256_mod, p256_mp_mod);
19407         sp_256_mont_mul_8(t3, t1, q->z, p256_mod, p256_mp_mod);
19408         sp_256_mont_mul_8(t1, t1, x, p256_mod, p256_mp_mod);
19409         /* U2 = X2*Z1^2 */
19410         sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
19411         sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
19412         sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
19413         /* S1 = Y1*Z2^3 */
19414         sp_256_mont_mul_8(t3, t3, y, p256_mod, p256_mp_mod);
19415         /* S2 = Y2*Z1^3 */
19416         sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
19417         /* H = U2 - U1 */
19418         sp_256_mont_sub_8(t2, t2, t1, p256_mod);
19419         /* R = S2 - S1 */
19420         sp_256_mont_sub_8(t4, t4, t3, p256_mod);
19421         /* Z3 = H*Z1*Z2 */
19422         sp_256_mont_mul_8(z, z, q->z, p256_mod, p256_mp_mod);
19423         sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
19424         /* X3 = R^2 - H^3 - 2*U1*H^2 */
19425         sp_256_mont_sqr_8(x, t4, p256_mod, p256_mp_mod);
19426         sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
19427         sp_256_mont_mul_8(y, t1, t5, p256_mod, p256_mp_mod);
19428         sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
19429         sp_256_mont_sub_8(x, x, t5, p256_mod);
19430         sp_256_mont_dbl_8(t1, y, p256_mod);
19431         sp_256_mont_sub_8(x, x, t1, p256_mod);
19432         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
19433         sp_256_mont_sub_8(y, y, x, p256_mod);
19434         sp_256_mont_mul_8(y, y, t4, p256_mod, p256_mp_mod);
19435         sp_256_mont_mul_8(t5, t5, t3, p256_mod, p256_mp_mod);
19436         sp_256_mont_sub_8(y, y, t5, p256_mod);
19437     }
19438 }
19439 
19440 #ifndef WC_NO_CACHE_RESISTANT
19441 /* Touch each possible point that could be being copied.
19442  *
19443  * r      Point to copy into.
19444  * table  Table - start of the entires to access
19445  * idx    Index of entry to retrieve.
19446  */
sp_256_get_point_16_8(sp_point_256 * r,const sp_point_256 * table,int idx)19447 static void sp_256_get_point_16_8(sp_point_256* r, const sp_point_256* table,
19448     int idx)
19449 {
19450     int i;
19451     sp_digit mask;
19452 
19453     r->x[0] = 0;
19454     r->x[1] = 0;
19455     r->x[2] = 0;
19456     r->x[3] = 0;
19457     r->x[4] = 0;
19458     r->x[5] = 0;
19459     r->x[6] = 0;
19460     r->x[7] = 0;
19461     r->y[0] = 0;
19462     r->y[1] = 0;
19463     r->y[2] = 0;
19464     r->y[3] = 0;
19465     r->y[4] = 0;
19466     r->y[5] = 0;
19467     r->y[6] = 0;
19468     r->y[7] = 0;
19469     r->z[0] = 0;
19470     r->z[1] = 0;
19471     r->z[2] = 0;
19472     r->z[3] = 0;
19473     r->z[4] = 0;
19474     r->z[5] = 0;
19475     r->z[6] = 0;
19476     r->z[7] = 0;
19477     for (i = 1; i < 16; i++) {
19478         mask = 0 - (i == idx);
19479         r->x[0] |= mask & table[i].x[0];
19480         r->x[1] |= mask & table[i].x[1];
19481         r->x[2] |= mask & table[i].x[2];
19482         r->x[3] |= mask & table[i].x[3];
19483         r->x[4] |= mask & table[i].x[4];
19484         r->x[5] |= mask & table[i].x[5];
19485         r->x[6] |= mask & table[i].x[6];
19486         r->x[7] |= mask & table[i].x[7];
19487         r->y[0] |= mask & table[i].y[0];
19488         r->y[1] |= mask & table[i].y[1];
19489         r->y[2] |= mask & table[i].y[2];
19490         r->y[3] |= mask & table[i].y[3];
19491         r->y[4] |= mask & table[i].y[4];
19492         r->y[5] |= mask & table[i].y[5];
19493         r->y[6] |= mask & table[i].y[6];
19494         r->y[7] |= mask & table[i].y[7];
19495         r->z[0] |= mask & table[i].z[0];
19496         r->z[1] |= mask & table[i].z[1];
19497         r->z[2] |= mask & table[i].z[2];
19498         r->z[3] |= mask & table[i].z[3];
19499         r->z[4] |= mask & table[i].z[4];
19500         r->z[5] |= mask & table[i].z[5];
19501         r->z[6] |= mask & table[i].z[6];
19502         r->z[7] |= mask & table[i].z[7];
19503     }
19504 }
19505 #endif /* !WC_NO_CACHE_RESISTANT */
19506 /* Multiply the point by the scalar and return the result.
19507  * If map is true then convert result to affine coordinates.
19508  *
19509  * Fast implementation that generates a pre-computation table.
19510  * 4 bits of window (no sliding!).
19511  * Uses add and double for calculating table.
19512  * 256 doubles.
19513  * 76 adds.
19514  *
19515  * r     Resulting point.
19516  * g     Point to multiply.
19517  * k     Scalar to multiply by.
19518  * map   Indicates whether to convert result to affine.
19519  * ct    Constant time required.
19520  * heap  Heap to use for allocation.
19521  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
19522  */
sp_256_ecc_mulmod_fast_8(sp_point_256 * r,const sp_point_256 * g,const sp_digit * k,int map,int ct,void * heap)19523 static int sp_256_ecc_mulmod_fast_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
19524         int map, int ct, void* heap)
19525 {
19526 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19527     sp_point_256* t = NULL;
19528     sp_digit* tmp = NULL;
19529 #else
19530     sp_point_256 t[16 + 1];
19531     sp_digit tmp[2 * 8 * 5];
19532 #endif
19533     sp_point_256* rt = NULL;
19534 #ifndef WC_NO_CACHE_RESISTANT
19535 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19536     sp_point_256* p = NULL;
19537 #else
19538     sp_point_256 p[1];
19539 #endif
19540 #endif /* !WC_NO_CACHE_RESISTANT */
19541     sp_digit n;
19542     int i;
19543     int c;
19544     int y;
19545     int err = MP_OKAY;
19546 
19547     /* Constant time used for cache attack resistance implementation. */
19548     (void)ct;
19549     (void)heap;
19550 
19551 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19552     t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * (16 + 1),
19553         heap, DYNAMIC_TYPE_ECC);
19554     if (t == NULL)
19555         err = MEMORY_E;
19556     #ifndef WC_NO_CACHE_RESISTANT
19557     if (err == MP_OKAY) {
19558         p = (sp_point_256*)XMALLOC(sizeof(sp_point_256),
19559             heap, DYNAMIC_TYPE_ECC);
19560         if (p == NULL)
19561             err = MEMORY_E;
19562     }
19563     #endif
19564     if (err == MP_OKAY) {
19565         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
19566                                 DYNAMIC_TYPE_ECC);
19567         if (tmp == NULL)
19568             err = MEMORY_E;
19569     }
19570 #endif
19571 
19572     if (err == MP_OKAY) {
19573         rt = t + 16;
19574 
19575         /* t[0] = {0, 0, 1} * norm */
19576         XMEMSET(&t[0], 0, sizeof(t[0]));
19577         t[0].infinity = 1;
19578         /* t[1] = {g->x, g->y, g->z} * norm */
19579         (void)sp_256_mod_mul_norm_8(t[1].x, g->x, p256_mod);
19580         (void)sp_256_mod_mul_norm_8(t[1].y, g->y, p256_mod);
19581         (void)sp_256_mod_mul_norm_8(t[1].z, g->z, p256_mod);
19582         t[1].infinity = 0;
19583         sp_256_proj_point_dbl_8(&t[ 2], &t[ 1], tmp);
19584         t[ 2].infinity = 0;
19585         sp_256_proj_point_add_8(&t[ 3], &t[ 2], &t[ 1], tmp);
19586         t[ 3].infinity = 0;
19587         sp_256_proj_point_dbl_8(&t[ 4], &t[ 2], tmp);
19588         t[ 4].infinity = 0;
19589         sp_256_proj_point_add_8(&t[ 5], &t[ 3], &t[ 2], tmp);
19590         t[ 5].infinity = 0;
19591         sp_256_proj_point_dbl_8(&t[ 6], &t[ 3], tmp);
19592         t[ 6].infinity = 0;
19593         sp_256_proj_point_add_8(&t[ 7], &t[ 4], &t[ 3], tmp);
19594         t[ 7].infinity = 0;
19595         sp_256_proj_point_dbl_8(&t[ 8], &t[ 4], tmp);
19596         t[ 8].infinity = 0;
19597         sp_256_proj_point_add_8(&t[ 9], &t[ 5], &t[ 4], tmp);
19598         t[ 9].infinity = 0;
19599         sp_256_proj_point_dbl_8(&t[10], &t[ 5], tmp);
19600         t[10].infinity = 0;
19601         sp_256_proj_point_add_8(&t[11], &t[ 6], &t[ 5], tmp);
19602         t[11].infinity = 0;
19603         sp_256_proj_point_dbl_8(&t[12], &t[ 6], tmp);
19604         t[12].infinity = 0;
19605         sp_256_proj_point_add_8(&t[13], &t[ 7], &t[ 6], tmp);
19606         t[13].infinity = 0;
19607         sp_256_proj_point_dbl_8(&t[14], &t[ 7], tmp);
19608         t[14].infinity = 0;
19609         sp_256_proj_point_add_8(&t[15], &t[ 8], &t[ 7], tmp);
19610         t[15].infinity = 0;
19611 
19612         i = 6;
19613         n = k[i+1] << 0;
19614         c = 28;
19615         y = (int)(n >> 28);
19616     #ifndef WC_NO_CACHE_RESISTANT
19617         if (ct) {
19618             sp_256_get_point_16_8(rt, t, y);
19619             rt->infinity = !y;
19620         }
19621         else
19622     #endif
19623         {
19624             XMEMCPY(rt, &t[y], sizeof(sp_point_256));
19625         }
19626         n <<= 4;
19627         for (; i>=0 || c>=4; ) {
19628             if (c < 4) {
19629                 n |= k[i--];
19630                 c += 32;
19631             }
19632             y = (n >> 28) & 0xf;
19633             n <<= 4;
19634             c -= 4;
19635 
19636             sp_256_proj_point_dbl_8(rt, rt, tmp);
19637             sp_256_proj_point_dbl_8(rt, rt, tmp);
19638             sp_256_proj_point_dbl_8(rt, rt, tmp);
19639             sp_256_proj_point_dbl_8(rt, rt, tmp);
19640 
19641     #ifndef WC_NO_CACHE_RESISTANT
19642             if (ct) {
19643                 sp_256_get_point_16_8(p, t, y);
19644                 p->infinity = !y;
19645                 sp_256_proj_point_add_8(rt, rt, p, tmp);
19646             }
19647             else
19648     #endif
19649             {
19650                 sp_256_proj_point_add_8(rt, rt, &t[y], tmp);
19651             }
19652         }
19653 
19654         if (map != 0) {
19655             sp_256_map_8(r, rt, tmp);
19656         }
19657         else {
19658             XMEMCPY(r, rt, sizeof(sp_point_256));
19659         }
19660     }
19661 
19662 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19663     if (tmp != NULL)
19664 #endif
19665     {
19666         ForceZero(tmp, sizeof(sp_digit) * 2 * 8 * 5);
19667     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19668         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
19669     #endif
19670     }
19671 #ifndef WC_NO_CACHE_RESISTANT
19672     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19673     if (p != NULL)
19674     #endif
19675         {
19676             ForceZero(p, sizeof(sp_point_256));
19677         #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19678             XFREE(p, heap, DYNAMIC_TYPE_ECC);
19679         #endif
19680         }
19681 #endif /* !WC_NO_CACHE_RESISTANT */
19682 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19683     if (t != NULL)
19684 #endif
19685     {
19686         ForceZero(t, sizeof(sp_point_256) * 17);
19687     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19688         XFREE(t, heap, DYNAMIC_TYPE_ECC);
19689     #endif
19690     }
19691 
19692     return err;
19693 }
19694 
19695 #ifdef FP_ECC
19696 /* Double the Montgomery form projective point p a number of times.
19697  *
19698  * r  Result of repeated doubling of point.
19699  * p  Point to double.
19700  * n  Number of times to double
19701  * t  Temporary ordinate data.
19702  */
sp_256_proj_point_dbl_n_8(sp_point_256 * p,int n,sp_digit * t)19703 static void sp_256_proj_point_dbl_n_8(sp_point_256* p, int n,
19704     sp_digit* t)
19705 {
19706     sp_digit* w = t;
19707     sp_digit* a = t + 2*8;
19708     sp_digit* b = t + 4*8;
19709     sp_digit* t1 = t + 6*8;
19710     sp_digit* t2 = t + 8*8;
19711     sp_digit* x;
19712     sp_digit* y;
19713     sp_digit* z;
19714 
19715     x = p->x;
19716     y = p->y;
19717     z = p->z;
19718 
19719     /* Y = 2*Y */
19720     sp_256_mont_dbl_8(y, y, p256_mod);
19721     /* W = Z^4 */
19722     sp_256_mont_sqr_8(w, z, p256_mod, p256_mp_mod);
19723     sp_256_mont_sqr_8(w, w, p256_mod, p256_mp_mod);
19724 
19725 #ifndef WOLFSSL_SP_SMALL
19726     while (--n > 0)
19727 #else
19728     while (--n >= 0)
19729 #endif
19730     {
19731         /* A = 3*(X^2 - W) */
19732         sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
19733         sp_256_mont_sub_8(t1, t1, w, p256_mod);
19734         sp_256_mont_tpl_8(a, t1, p256_mod);
19735         /* B = X*Y^2 */
19736         sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
19737         sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
19738         /* X = A^2 - 2B */
19739         sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
19740         sp_256_mont_dbl_8(t2, b, p256_mod);
19741         sp_256_mont_sub_8(x, x, t2, p256_mod);
19742         /* Z = Z*Y */
19743         sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
19744         /* t2 = Y^4 */
19745         sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
19746 #ifdef WOLFSSL_SP_SMALL
19747         if (n != 0)
19748 #endif
19749         {
19750             /* W = W*Y^4 */
19751             sp_256_mont_mul_8(w, w, t1, p256_mod, p256_mp_mod);
19752         }
19753         /* y = 2*A*(B - X) - Y^4 */
19754         sp_256_mont_sub_8(y, b, x, p256_mod);
19755         sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
19756         sp_256_mont_dbl_8(y, y, p256_mod);
19757         sp_256_mont_sub_8(y, y, t1, p256_mod);
19758     }
19759 #ifndef WOLFSSL_SP_SMALL
19760     /* A = 3*(X^2 - W) */
19761     sp_256_mont_sqr_8(t1, x, p256_mod, p256_mp_mod);
19762     sp_256_mont_sub_8(t1, t1, w, p256_mod);
19763     sp_256_mont_tpl_8(a, t1, p256_mod);
19764     /* B = X*Y^2 */
19765     sp_256_mont_sqr_8(t1, y, p256_mod, p256_mp_mod);
19766     sp_256_mont_mul_8(b, t1, x, p256_mod, p256_mp_mod);
19767     /* X = A^2 - 2B */
19768     sp_256_mont_sqr_8(x, a, p256_mod, p256_mp_mod);
19769     sp_256_mont_dbl_8(t2, b, p256_mod);
19770     sp_256_mont_sub_8(x, x, t2, p256_mod);
19771     /* Z = Z*Y */
19772     sp_256_mont_mul_8(z, z, y, p256_mod, p256_mp_mod);
19773     /* t2 = Y^4 */
19774     sp_256_mont_sqr_8(t1, t1, p256_mod, p256_mp_mod);
19775     /* y = 2*A*(B - X) - Y^4 */
19776     sp_256_mont_sub_8(y, b, x, p256_mod);
19777     sp_256_mont_mul_8(y, y, a, p256_mod, p256_mp_mod);
19778     sp_256_mont_dbl_8(y, y, p256_mod);
19779     sp_256_mont_sub_8(y, y, t1, p256_mod);
19780 #endif
19781     /* Y = Y/2 */
19782     sp_256_div2_8(y, y, p256_mod);
19783 }
19784 
19785 /* Convert the projective point to affine.
19786  * Ordinates are in Montgomery form.
19787  *
19788  * a  Point to convert.
19789  * t  Temporary data.
19790  */
sp_256_proj_to_affine_8(sp_point_256 * a,sp_digit * t)19791 static void sp_256_proj_to_affine_8(sp_point_256* a, sp_digit* t)
19792 {
19793     sp_digit* t1 = t;
19794     sp_digit* t2 = t + 2 * 8;
19795     sp_digit* tmp = t + 4 * 8;
19796 
19797     sp_256_mont_inv_8(t1, a->z, tmp);
19798 
19799     sp_256_mont_sqr_8(t2, t1, p256_mod, p256_mp_mod);
19800     sp_256_mont_mul_8(t1, t2, t1, p256_mod, p256_mp_mod);
19801 
19802     sp_256_mont_mul_8(a->x, a->x, t2, p256_mod, p256_mp_mod);
19803     sp_256_mont_mul_8(a->y, a->y, t1, p256_mod, p256_mp_mod);
19804     XMEMCPY(a->z, p256_norm_mod, sizeof(p256_norm_mod));
19805 }
19806 
19807 #endif /* FP_ECC */
19808 /* A table entry for pre-computed points. */
19809 typedef struct sp_table_entry_256 {
19810     sp_digit x[8];
19811     sp_digit y[8];
19812 } sp_table_entry_256;
19813 
19814 #ifdef FP_ECC
19815 #endif /* FP_ECC */
19816 /* Add two Montgomery form projective points. The second point has a q value of
19817  * one.
19818  * Only the first point can be the same pointer as the result point.
19819  *
19820  * r  Result of addition.
19821  * p  First point to add.
19822  * q  Second point to add.
19823  * t  Temporary ordinate data.
19824  */
sp_256_proj_point_add_qz1_8(sp_point_256 * r,const sp_point_256 * p,const sp_point_256 * q,sp_digit * t)19825 static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p,
19826         const sp_point_256* q, sp_digit* t)
19827 {
19828     const sp_point_256* ap[2];
19829     sp_point_256* rp[2];
19830     sp_digit* t1 = t;
19831     sp_digit* t2 = t + 2*8;
19832     sp_digit* t3 = t + 4*8;
19833     sp_digit* t4 = t + 6*8;
19834     sp_digit* t5 = t + 8*8;
19835     sp_digit* x;
19836     sp_digit* y;
19837     sp_digit* z;
19838     int i;
19839 
19840     /* Check double */
19841     (void)sp_256_sub_8(t1, p256_mod, q->y);
19842     sp_256_norm_8(t1);
19843     if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) &
19844         (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) {
19845         sp_256_proj_point_dbl_8(r, p, t);
19846     }
19847     else {
19848         rp[0] = r;
19849 
19850         /*lint allow cast to different type of pointer*/
19851         rp[1] = (sp_point_256*)t; /*lint !e9087 !e740*/
19852         XMEMSET(rp[1], 0, sizeof(sp_point_256));
19853         x = rp[p->infinity | q->infinity]->x;
19854         y = rp[p->infinity | q->infinity]->y;
19855         z = rp[p->infinity | q->infinity]->z;
19856 
19857         ap[0] = p;
19858         ap[1] = q;
19859         for (i=0; i<8; i++) {
19860             r->x[i] = ap[p->infinity]->x[i];
19861         }
19862         for (i=0; i<8; i++) {
19863             r->y[i] = ap[p->infinity]->y[i];
19864         }
19865         for (i=0; i<8; i++) {
19866             r->z[i] = ap[p->infinity]->z[i];
19867         }
19868         r->infinity = ap[p->infinity]->infinity;
19869 
19870         /* U2 = X2*Z1^2 */
19871         sp_256_mont_sqr_8(t2, z, p256_mod, p256_mp_mod);
19872         sp_256_mont_mul_8(t4, t2, z, p256_mod, p256_mp_mod);
19873         sp_256_mont_mul_8(t2, t2, q->x, p256_mod, p256_mp_mod);
19874         /* S2 = Y2*Z1^3 */
19875         sp_256_mont_mul_8(t4, t4, q->y, p256_mod, p256_mp_mod);
19876         /* H = U2 - X1 */
19877         sp_256_mont_sub_8(t2, t2, x, p256_mod);
19878         /* R = S2 - Y1 */
19879         sp_256_mont_sub_8(t4, t4, y, p256_mod);
19880         /* Z3 = H*Z1 */
19881         sp_256_mont_mul_8(z, z, t2, p256_mod, p256_mp_mod);
19882         /* X3 = R^2 - H^3 - 2*X1*H^2 */
19883         sp_256_mont_sqr_8(t1, t4, p256_mod, p256_mp_mod);
19884         sp_256_mont_sqr_8(t5, t2, p256_mod, p256_mp_mod);
19885         sp_256_mont_mul_8(t3, x, t5, p256_mod, p256_mp_mod);
19886         sp_256_mont_mul_8(t5, t5, t2, p256_mod, p256_mp_mod);
19887         sp_256_mont_sub_8(x, t1, t5, p256_mod);
19888         sp_256_mont_dbl_8(t1, t3, p256_mod);
19889         sp_256_mont_sub_8(x, x, t1, p256_mod);
19890         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
19891         sp_256_mont_sub_8(t3, t3, x, p256_mod);
19892         sp_256_mont_mul_8(t3, t3, t4, p256_mod, p256_mp_mod);
19893         sp_256_mont_mul_8(t5, t5, y, p256_mod, p256_mp_mod);
19894         sp_256_mont_sub_8(y, t3, t5, p256_mod);
19895     }
19896 }
19897 
19898 #ifdef WOLFSSL_SP_SMALL
19899 #ifdef FP_ECC
19900 /* Generate the pre-computed table of points for the base point.
19901  *
19902  * width = 4
19903  * 16 entries
19904  * 64 bits between
19905  *
19906  * a      The base point.
19907  * table  Place to store generated point data.
19908  * tmp    Temporary data.
19909  * heap  Heap to use for allocation.
19910  */
sp_256_gen_stripe_table_8(const sp_point_256 * a,sp_table_entry_256 * table,sp_digit * tmp,void * heap)19911 static int sp_256_gen_stripe_table_8(const sp_point_256* a,
19912         sp_table_entry_256* table, sp_digit* tmp, void* heap)
19913 {
19914 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19915     sp_point_256* t = NULL;
19916 #else
19917     sp_point_256 t[3];
19918 #endif
19919     sp_point_256* s1 = NULL;
19920     sp_point_256* s2 = NULL;
19921     int i;
19922     int j;
19923     int err = MP_OKAY;
19924 
19925     (void)heap;
19926 
19927 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19928     t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap,
19929                                      DYNAMIC_TYPE_ECC);
19930     if (t == NULL)
19931         err = MEMORY_E;
19932 #endif
19933 
19934     if (err == MP_OKAY) {
19935         s1 = t + 1;
19936         s2 = t + 2;
19937 
19938         err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
19939     }
19940     if (err == MP_OKAY) {
19941         err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
19942     }
19943     if (err == MP_OKAY) {
19944         err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
19945     }
19946     if (err == MP_OKAY) {
19947         t->infinity = 0;
19948         sp_256_proj_to_affine_8(t, tmp);
19949 
19950         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
19951         s1->infinity = 0;
19952         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
19953         s2->infinity = 0;
19954 
19955         /* table[0] = {0, 0, infinity} */
19956         XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
19957         /* table[1] = Affine version of 'a' in Montgomery form */
19958         XMEMCPY(table[1].x, t->x, sizeof(table->x));
19959         XMEMCPY(table[1].y, t->y, sizeof(table->y));
19960 
19961         for (i=1; i<4; i++) {
19962             sp_256_proj_point_dbl_n_8(t, 64, tmp);
19963             sp_256_proj_to_affine_8(t, tmp);
19964             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
19965             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
19966         }
19967 
19968         for (i=1; i<4; i++) {
19969             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
19970             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
19971             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
19972                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
19973                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
19974                 sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
19975                 sp_256_proj_to_affine_8(t, tmp);
19976                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
19977                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
19978             }
19979         }
19980     }
19981 
19982 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
19983     if (t != NULL)
19984         XFREE(t, heap, DYNAMIC_TYPE_ECC);
19985 #endif
19986 
19987     return err;
19988 }
19989 
19990 #endif /* FP_ECC */
19991 #ifndef WC_NO_CACHE_RESISTANT
19992 /* Touch each possible entry that could be being copied.
19993  *
19994  * r      Point to copy into.
19995  * table  Table - start of the entires to access
19996  * idx    Index of entry to retrieve.
19997  */
sp_256_get_entry_16_8(sp_point_256 * r,const sp_table_entry_256 * table,int idx)19998 static void sp_256_get_entry_16_8(sp_point_256* r,
19999     const sp_table_entry_256* table, int idx)
20000 {
20001     int i;
20002     sp_digit mask;
20003 
20004     r->x[0] = 0;
20005     r->x[1] = 0;
20006     r->x[2] = 0;
20007     r->x[3] = 0;
20008     r->x[4] = 0;
20009     r->x[5] = 0;
20010     r->x[6] = 0;
20011     r->x[7] = 0;
20012     r->y[0] = 0;
20013     r->y[1] = 0;
20014     r->y[2] = 0;
20015     r->y[3] = 0;
20016     r->y[4] = 0;
20017     r->y[5] = 0;
20018     r->y[6] = 0;
20019     r->y[7] = 0;
20020     for (i = 1; i < 16; i++) {
20021         mask = 0 - (i == idx);
20022         r->x[0] |= mask & table[i].x[0];
20023         r->x[1] |= mask & table[i].x[1];
20024         r->x[2] |= mask & table[i].x[2];
20025         r->x[3] |= mask & table[i].x[3];
20026         r->x[4] |= mask & table[i].x[4];
20027         r->x[5] |= mask & table[i].x[5];
20028         r->x[6] |= mask & table[i].x[6];
20029         r->x[7] |= mask & table[i].x[7];
20030         r->y[0] |= mask & table[i].y[0];
20031         r->y[1] |= mask & table[i].y[1];
20032         r->y[2] |= mask & table[i].y[2];
20033         r->y[3] |= mask & table[i].y[3];
20034         r->y[4] |= mask & table[i].y[4];
20035         r->y[5] |= mask & table[i].y[5];
20036         r->y[6] |= mask & table[i].y[6];
20037         r->y[7] |= mask & table[i].y[7];
20038     }
20039 }
20040 #endif /* !WC_NO_CACHE_RESISTANT */
20041 /* Multiply the point by the scalar and return the result.
20042  * If map is true then convert result to affine coordinates.
20043  *
20044  * Stripe implementation.
20045  * Pre-generated: 2^0, 2^64, ...
20046  * Pre-generated: products of all combinations of above.
20047  * 4 doubles and adds (with qz=1)
20048  *
20049  * r      Resulting point.
20050  * k      Scalar to multiply by.
20051  * table  Pre-computed table.
20052  * map    Indicates whether to convert result to affine.
20053  * ct     Constant time required.
20054  * heap   Heap to use for allocation.
20055  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20056  */
sp_256_ecc_mulmod_stripe_8(sp_point_256 * r,const sp_point_256 * g,const sp_table_entry_256 * table,const sp_digit * k,int map,int ct,void * heap)20057 static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
20058         const sp_table_entry_256* table, const sp_digit* k, int map,
20059         int ct, void* heap)
20060 {
20061 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20062     sp_point_256* rt = NULL;
20063     sp_digit* t = NULL;
20064 #else
20065     sp_point_256 rt[2];
20066     sp_digit t[2 * 8 * 5];
20067 #endif
20068     sp_point_256* p = NULL;
20069     int i;
20070     int j;
20071     int y;
20072     int x;
20073     int err = MP_OKAY;
20074 
20075     (void)g;
20076     /* Constant time used for cache attack resistance implementation. */
20077     (void)ct;
20078     (void)heap;
20079 
20080 
20081 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20082     rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap,
20083                                       DYNAMIC_TYPE_ECC);
20084     if (rt == NULL)
20085         err = MEMORY_E;
20086     if (err == MP_OKAY) {
20087         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
20088                                DYNAMIC_TYPE_ECC);
20089         if (t == NULL)
20090             err = MEMORY_E;
20091     }
20092 #endif
20093 
20094     if (err == MP_OKAY) {
20095         p = rt + 1;
20096 
20097         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
20098         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
20099 
20100         y = 0;
20101         x = 63;
20102         for (j=0; j<4; j++) {
20103             y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
20104             x += 64;
20105         }
20106     #ifndef WC_NO_CACHE_RESISTANT
20107         if (ct) {
20108             sp_256_get_entry_16_8(rt, table, y);
20109         } else
20110     #endif
20111         {
20112             XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
20113             XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
20114         }
20115         rt->infinity = !y;
20116         for (i=62; i>=0; i--) {
20117             y = 0;
20118             x = i;
20119             for (j=0; j<4; j++) {
20120                 y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
20121                 x += 64;
20122             }
20123 
20124             sp_256_proj_point_dbl_8(rt, rt, t);
20125         #ifndef WC_NO_CACHE_RESISTANT
20126             if (ct) {
20127                 sp_256_get_entry_16_8(p, table, y);
20128             }
20129             else
20130         #endif
20131             {
20132                 XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
20133                 XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
20134             }
20135             p->infinity = !y;
20136             sp_256_proj_point_add_qz1_8(rt, rt, p, t);
20137         }
20138 
20139         if (map != 0) {
20140             sp_256_map_8(r, rt, t);
20141         }
20142         else {
20143             XMEMCPY(r, rt, sizeof(sp_point_256));
20144         }
20145     }
20146 
20147 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20148     if (t != NULL)
20149         XFREE(t, heap, DYNAMIC_TYPE_ECC);
20150     if (rt != NULL)
20151         XFREE(rt, heap, DYNAMIC_TYPE_ECC);
20152 #endif
20153 
20154     return err;
20155 }
20156 
20157 #ifdef FP_ECC
20158 #ifndef FP_ENTRIES
20159     #define FP_ENTRIES 16
20160 #endif
20161 
20162 /* Cache entry - holds precomputation tables for a point. */
20163 typedef struct sp_cache_256_t {
20164     /* X ordinate of point that table was generated from. */
20165     sp_digit x[8];
20166     /* Y ordinate of point that table was generated from. */
20167     sp_digit y[8];
20168     /* Precomputation table for point. */
20169     sp_table_entry_256 table[16];
20170     /* Count of entries in table. */
20171     uint32_t cnt;
20172     /* Point and table set in entry. */
20173     int set;
20174 } sp_cache_256_t;
20175 
20176 /* Cache of tables. */
20177 static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
20178 /* Index of last entry in cache. */
20179 static THREAD_LS_T int sp_cache_256_last = -1;
20180 /* Cache has been initialized. */
20181 static THREAD_LS_T int sp_cache_256_inited = 0;
20182 
20183 #ifndef HAVE_THREAD_LS
20184     static volatile int initCacheMutex_256 = 0;
20185     static wolfSSL_Mutex sp_cache_256_lock;
20186 #endif
20187 
20188 /* Get the cache entry for the point.
20189  *
20190  * g      [in]   Point scalar multipling.
20191  * cache  [out]  Cache table to use.
20192  */
sp_ecc_get_cache_256(const sp_point_256 * g,sp_cache_256_t ** cache)20193 static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
20194 {
20195     int i;
20196     int j;
20197     uint32_t least;
20198 
20199     if (sp_cache_256_inited == 0) {
20200         for (i=0; i<FP_ENTRIES; i++) {
20201             sp_cache_256[i].set = 0;
20202         }
20203         sp_cache_256_inited = 1;
20204     }
20205 
20206     /* Compare point with those in cache. */
20207     for (i=0; i<FP_ENTRIES; i++) {
20208         if (!sp_cache_256[i].set)
20209             continue;
20210 
20211         if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
20212                            sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
20213             sp_cache_256[i].cnt++;
20214             break;
20215         }
20216     }
20217 
20218     /* No match. */
20219     if (i == FP_ENTRIES) {
20220         /* Find empty entry. */
20221         i = (sp_cache_256_last + 1) % FP_ENTRIES;
20222         for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
20223             if (!sp_cache_256[i].set) {
20224                 break;
20225             }
20226         }
20227 
20228         /* Evict least used. */
20229         if (i == sp_cache_256_last) {
20230             least = sp_cache_256[0].cnt;
20231             for (j=1; j<FP_ENTRIES; j++) {
20232                 if (sp_cache_256[j].cnt < least) {
20233                     i = j;
20234                     least = sp_cache_256[i].cnt;
20235                 }
20236             }
20237         }
20238 
20239         XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
20240         XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
20241         sp_cache_256[i].set = 1;
20242         sp_cache_256[i].cnt = 1;
20243     }
20244 
20245     *cache = &sp_cache_256[i];
20246     sp_cache_256_last = i;
20247 }
20248 #endif /* FP_ECC */
20249 
20250 /* Multiply the base point of P256 by the scalar and return the result.
20251  * If map is true then convert result to affine coordinates.
20252  *
20253  * r     Resulting point.
20254  * g     Point to multiply.
20255  * k     Scalar to multiply by.
20256  * map   Indicates whether to convert result to affine.
20257  * ct    Constant time required.
20258  * heap  Heap to use for allocation.
20259  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20260  */
sp_256_ecc_mulmod_8(sp_point_256 * r,const sp_point_256 * g,const sp_digit * k,int map,int ct,void * heap)20261 static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
20262         int map, int ct, void* heap)
20263 {
20264 #ifndef FP_ECC
20265     return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap);
20266 #else
20267     sp_digit tmp[2 * 8 * 5];
20268     sp_cache_256_t* cache;
20269     int err = MP_OKAY;
20270 
20271 #ifndef HAVE_THREAD_LS
20272     if (initCacheMutex_256 == 0) {
20273          wc_InitMutex(&sp_cache_256_lock);
20274          initCacheMutex_256 = 1;
20275     }
20276     if (wc_LockMutex(&sp_cache_256_lock) != 0)
20277        err = BAD_MUTEX_E;
20278 #endif /* HAVE_THREAD_LS */
20279 
20280     if (err == MP_OKAY) {
20281         sp_ecc_get_cache_256(g, &cache);
20282         if (cache->cnt == 2)
20283             sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
20284 
20285 #ifndef HAVE_THREAD_LS
20286         wc_UnLockMutex(&sp_cache_256_lock);
20287 #endif /* HAVE_THREAD_LS */
20288 
20289         if (cache->cnt < 2) {
20290             err = sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap);
20291         }
20292         else {
20293             err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
20294                     map, ct, heap);
20295         }
20296     }
20297 
20298     return err;
20299 #endif
20300 }
20301 
20302 #else
20303 #ifdef FP_ECC
20304 /* Generate the pre-computed table of points for the base point.
20305  *
20306  * width = 8
20307  * 256 entries
20308  * 32 bits between
20309  *
20310  * a      The base point.
20311  * table  Place to store generated point data.
20312  * tmp    Temporary data.
20313  * heap  Heap to use for allocation.
20314  */
sp_256_gen_stripe_table_8(const sp_point_256 * a,sp_table_entry_256 * table,sp_digit * tmp,void * heap)20315 static int sp_256_gen_stripe_table_8(const sp_point_256* a,
20316         sp_table_entry_256* table, sp_digit* tmp, void* heap)
20317 {
20318 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20319     sp_point_256* t = NULL;
20320 #else
20321     sp_point_256 t[3];
20322 #endif
20323     sp_point_256* s1 = NULL;
20324     sp_point_256* s2 = NULL;
20325     int i;
20326     int j;
20327     int err = MP_OKAY;
20328 
20329     (void)heap;
20330 
20331 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20332     t = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 3, heap,
20333                                      DYNAMIC_TYPE_ECC);
20334     if (t == NULL)
20335         err = MEMORY_E;
20336 #endif
20337 
20338     if (err == MP_OKAY) {
20339         s1 = t + 1;
20340         s2 = t + 2;
20341 
20342         err = sp_256_mod_mul_norm_8(t->x, a->x, p256_mod);
20343     }
20344     if (err == MP_OKAY) {
20345         err = sp_256_mod_mul_norm_8(t->y, a->y, p256_mod);
20346     }
20347     if (err == MP_OKAY) {
20348         err = sp_256_mod_mul_norm_8(t->z, a->z, p256_mod);
20349     }
20350     if (err == MP_OKAY) {
20351         t->infinity = 0;
20352         sp_256_proj_to_affine_8(t, tmp);
20353 
20354         XMEMCPY(s1->z, p256_norm_mod, sizeof(p256_norm_mod));
20355         s1->infinity = 0;
20356         XMEMCPY(s2->z, p256_norm_mod, sizeof(p256_norm_mod));
20357         s2->infinity = 0;
20358 
20359         /* table[0] = {0, 0, infinity} */
20360         XMEMSET(&table[0], 0, sizeof(sp_table_entry_256));
20361         /* table[1] = Affine version of 'a' in Montgomery form */
20362         XMEMCPY(table[1].x, t->x, sizeof(table->x));
20363         XMEMCPY(table[1].y, t->y, sizeof(table->y));
20364 
20365         for (i=1; i<8; i++) {
20366             sp_256_proj_point_dbl_n_8(t, 32, tmp);
20367             sp_256_proj_to_affine_8(t, tmp);
20368             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
20369             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
20370         }
20371 
20372         for (i=1; i<8; i++) {
20373             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
20374             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
20375             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
20376                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
20377                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
20378                 sp_256_proj_point_add_qz1_8(t, s1, s2, tmp);
20379                 sp_256_proj_to_affine_8(t, tmp);
20380                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
20381                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
20382             }
20383         }
20384     }
20385 
20386 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20387     if (t != NULL)
20388         XFREE(t, heap, DYNAMIC_TYPE_ECC);
20389 #endif
20390 
20391     return err;
20392 }
20393 
20394 #endif /* FP_ECC */
20395 #ifndef WC_NO_CACHE_RESISTANT
20396 /* Touch each possible entry that could be being copied.
20397  *
20398  * r      Point to copy into.
20399  * table  Table - start of the entires to access
20400  * idx    Index of entry to retrieve.
20401  */
sp_256_get_entry_256_8(sp_point_256 * r,const sp_table_entry_256 * table,int idx)20402 static void sp_256_get_entry_256_8(sp_point_256* r,
20403     const sp_table_entry_256* table, int idx)
20404 {
20405     int i;
20406     sp_digit mask;
20407 
20408     r->x[0] = 0;
20409     r->x[1] = 0;
20410     r->x[2] = 0;
20411     r->x[3] = 0;
20412     r->x[4] = 0;
20413     r->x[5] = 0;
20414     r->x[6] = 0;
20415     r->x[7] = 0;
20416     r->y[0] = 0;
20417     r->y[1] = 0;
20418     r->y[2] = 0;
20419     r->y[3] = 0;
20420     r->y[4] = 0;
20421     r->y[5] = 0;
20422     r->y[6] = 0;
20423     r->y[7] = 0;
20424     for (i = 1; i < 256; i++) {
20425         mask = 0 - (i == idx);
20426         r->x[0] |= mask & table[i].x[0];
20427         r->x[1] |= mask & table[i].x[1];
20428         r->x[2] |= mask & table[i].x[2];
20429         r->x[3] |= mask & table[i].x[3];
20430         r->x[4] |= mask & table[i].x[4];
20431         r->x[5] |= mask & table[i].x[5];
20432         r->x[6] |= mask & table[i].x[6];
20433         r->x[7] |= mask & table[i].x[7];
20434         r->y[0] |= mask & table[i].y[0];
20435         r->y[1] |= mask & table[i].y[1];
20436         r->y[2] |= mask & table[i].y[2];
20437         r->y[3] |= mask & table[i].y[3];
20438         r->y[4] |= mask & table[i].y[4];
20439         r->y[5] |= mask & table[i].y[5];
20440         r->y[6] |= mask & table[i].y[6];
20441         r->y[7] |= mask & table[i].y[7];
20442     }
20443 }
20444 #endif /* !WC_NO_CACHE_RESISTANT */
20445 /* Multiply the point by the scalar and return the result.
20446  * If map is true then convert result to affine coordinates.
20447  *
20448  * Stripe implementation.
20449  * Pre-generated: 2^0, 2^32, ...
20450  * Pre-generated: products of all combinations of above.
20451  * 8 doubles and adds (with qz=1)
20452  *
20453  * r      Resulting point.
20454  * k      Scalar to multiply by.
20455  * table  Pre-computed table.
20456  * map    Indicates whether to convert result to affine.
20457  * ct     Constant time required.
20458  * heap   Heap to use for allocation.
20459  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20460  */
sp_256_ecc_mulmod_stripe_8(sp_point_256 * r,const sp_point_256 * g,const sp_table_entry_256 * table,const sp_digit * k,int map,int ct,void * heap)20461 static int sp_256_ecc_mulmod_stripe_8(sp_point_256* r, const sp_point_256* g,
20462         const sp_table_entry_256* table, const sp_digit* k, int map,
20463         int ct, void* heap)
20464 {
20465 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20466     sp_point_256* rt = NULL;
20467     sp_digit* t = NULL;
20468 #else
20469     sp_point_256 rt[2];
20470     sp_digit t[2 * 8 * 5];
20471 #endif
20472     sp_point_256* p = NULL;
20473     int i;
20474     int j;
20475     int y;
20476     int x;
20477     int err = MP_OKAY;
20478 
20479     (void)g;
20480     /* Constant time used for cache attack resistance implementation. */
20481     (void)ct;
20482     (void)heap;
20483 
20484 
20485 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20486     rt = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap,
20487                                       DYNAMIC_TYPE_ECC);
20488     if (rt == NULL)
20489         err = MEMORY_E;
20490     if (err == MP_OKAY) {
20491         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, heap,
20492                                DYNAMIC_TYPE_ECC);
20493         if (t == NULL)
20494             err = MEMORY_E;
20495     }
20496 #endif
20497 
20498     if (err == MP_OKAY) {
20499         p = rt + 1;
20500 
20501         XMEMCPY(p->z, p256_norm_mod, sizeof(p256_norm_mod));
20502         XMEMCPY(rt->z, p256_norm_mod, sizeof(p256_norm_mod));
20503 
20504         y = 0;
20505         x = 31;
20506         for (j=0; j<8; j++) {
20507             y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
20508             x += 32;
20509         }
20510     #ifndef WC_NO_CACHE_RESISTANT
20511         if (ct) {
20512             sp_256_get_entry_256_8(rt, table, y);
20513         } else
20514     #endif
20515         {
20516             XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
20517             XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
20518         }
20519         rt->infinity = !y;
20520         for (i=30; i>=0; i--) {
20521             y = 0;
20522             x = i;
20523             for (j=0; j<8; j++) {
20524                 y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
20525                 x += 32;
20526             }
20527 
20528             sp_256_proj_point_dbl_8(rt, rt, t);
20529         #ifndef WC_NO_CACHE_RESISTANT
20530             if (ct) {
20531                 sp_256_get_entry_256_8(p, table, y);
20532             }
20533             else
20534         #endif
20535             {
20536                 XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
20537                 XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
20538             }
20539             p->infinity = !y;
20540             sp_256_proj_point_add_qz1_8(rt, rt, p, t);
20541         }
20542 
20543         if (map != 0) {
20544             sp_256_map_8(r, rt, t);
20545         }
20546         else {
20547             XMEMCPY(r, rt, sizeof(sp_point_256));
20548         }
20549     }
20550 
20551 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20552     if (t != NULL)
20553         XFREE(t, heap, DYNAMIC_TYPE_ECC);
20554     if (rt != NULL)
20555         XFREE(rt, heap, DYNAMIC_TYPE_ECC);
20556 #endif
20557 
20558     return err;
20559 }
20560 
20561 #ifdef FP_ECC
20562 #ifndef FP_ENTRIES
20563     #define FP_ENTRIES 16
20564 #endif
20565 
20566 /* Cache entry - holds precomputation tables for a point. */
20567 typedef struct sp_cache_256_t {
20568     /* X ordinate of point that table was generated from. */
20569     sp_digit x[8];
20570     /* Y ordinate of point that table was generated from. */
20571     sp_digit y[8];
20572     /* Precomputation table for point. */
20573     sp_table_entry_256 table[256];
20574     /* Count of entries in table. */
20575     uint32_t cnt;
20576     /* Point and table set in entry. */
20577     int set;
20578 } sp_cache_256_t;
20579 
20580 /* Cache of tables. */
20581 static THREAD_LS_T sp_cache_256_t sp_cache_256[FP_ENTRIES];
20582 /* Index of last entry in cache. */
20583 static THREAD_LS_T int sp_cache_256_last = -1;
20584 /* Cache has been initialized. */
20585 static THREAD_LS_T int sp_cache_256_inited = 0;
20586 
20587 #ifndef HAVE_THREAD_LS
20588     static volatile int initCacheMutex_256 = 0;
20589     static wolfSSL_Mutex sp_cache_256_lock;
20590 #endif
20591 
20592 /* Get the cache entry for the point.
20593  *
20594  * g      [in]   Point scalar multipling.
20595  * cache  [out]  Cache table to use.
20596  */
sp_ecc_get_cache_256(const sp_point_256 * g,sp_cache_256_t ** cache)20597 static void sp_ecc_get_cache_256(const sp_point_256* g, sp_cache_256_t** cache)
20598 {
20599     int i;
20600     int j;
20601     uint32_t least;
20602 
20603     if (sp_cache_256_inited == 0) {
20604         for (i=0; i<FP_ENTRIES; i++) {
20605             sp_cache_256[i].set = 0;
20606         }
20607         sp_cache_256_inited = 1;
20608     }
20609 
20610     /* Compare point with those in cache. */
20611     for (i=0; i<FP_ENTRIES; i++) {
20612         if (!sp_cache_256[i].set)
20613             continue;
20614 
20615         if (sp_256_cmp_equal_8(g->x, sp_cache_256[i].x) &
20616                            sp_256_cmp_equal_8(g->y, sp_cache_256[i].y)) {
20617             sp_cache_256[i].cnt++;
20618             break;
20619         }
20620     }
20621 
20622     /* No match. */
20623     if (i == FP_ENTRIES) {
20624         /* Find empty entry. */
20625         i = (sp_cache_256_last + 1) % FP_ENTRIES;
20626         for (; i != sp_cache_256_last; i=(i+1)%FP_ENTRIES) {
20627             if (!sp_cache_256[i].set) {
20628                 break;
20629             }
20630         }
20631 
20632         /* Evict least used. */
20633         if (i == sp_cache_256_last) {
20634             least = sp_cache_256[0].cnt;
20635             for (j=1; j<FP_ENTRIES; j++) {
20636                 if (sp_cache_256[j].cnt < least) {
20637                     i = j;
20638                     least = sp_cache_256[i].cnt;
20639                 }
20640             }
20641         }
20642 
20643         XMEMCPY(sp_cache_256[i].x, g->x, sizeof(sp_cache_256[i].x));
20644         XMEMCPY(sp_cache_256[i].y, g->y, sizeof(sp_cache_256[i].y));
20645         sp_cache_256[i].set = 1;
20646         sp_cache_256[i].cnt = 1;
20647     }
20648 
20649     *cache = &sp_cache_256[i];
20650     sp_cache_256_last = i;
20651 }
20652 #endif /* FP_ECC */
20653 
20654 /* Multiply the base point of P256 by the scalar and return the result.
20655  * If map is true then convert result to affine coordinates.
20656  *
20657  * r     Resulting point.
20658  * g     Point to multiply.
20659  * k     Scalar to multiply by.
20660  * map   Indicates whether to convert result to affine.
20661  * ct    Constant time required.
20662  * heap  Heap to use for allocation.
20663  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20664  */
sp_256_ecc_mulmod_8(sp_point_256 * r,const sp_point_256 * g,const sp_digit * k,int map,int ct,void * heap)20665 static int sp_256_ecc_mulmod_8(sp_point_256* r, const sp_point_256* g, const sp_digit* k,
20666         int map, int ct, void* heap)
20667 {
20668 #ifndef FP_ECC
20669     return sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap);
20670 #else
20671     sp_digit tmp[2 * 8 * 5];
20672     sp_cache_256_t* cache;
20673     int err = MP_OKAY;
20674 
20675 #ifndef HAVE_THREAD_LS
20676     if (initCacheMutex_256 == 0) {
20677          wc_InitMutex(&sp_cache_256_lock);
20678          initCacheMutex_256 = 1;
20679     }
20680     if (wc_LockMutex(&sp_cache_256_lock) != 0)
20681        err = BAD_MUTEX_E;
20682 #endif /* HAVE_THREAD_LS */
20683 
20684     if (err == MP_OKAY) {
20685         sp_ecc_get_cache_256(g, &cache);
20686         if (cache->cnt == 2)
20687             sp_256_gen_stripe_table_8(g, cache->table, tmp, heap);
20688 
20689 #ifndef HAVE_THREAD_LS
20690         wc_UnLockMutex(&sp_cache_256_lock);
20691 #endif /* HAVE_THREAD_LS */
20692 
20693         if (cache->cnt < 2) {
20694             err = sp_256_ecc_mulmod_fast_8(r, g, k, map, ct, heap);
20695         }
20696         else {
20697             err = sp_256_ecc_mulmod_stripe_8(r, g, cache->table, k,
20698                     map, ct, heap);
20699         }
20700     }
20701 
20702     return err;
20703 #endif
20704 }
20705 
20706 #endif /* WOLFSSL_SP_SMALL */
20707 /* Multiply the point by the scalar and return the result.
20708  * If map is true then convert result to affine coordinates.
20709  *
20710  * km    Scalar to multiply by.
20711  * p     Point to multiply.
20712  * r     Resulting point.
20713  * map   Indicates whether to convert result to affine.
20714  * heap  Heap to use for allocation.
20715  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20716  */
sp_ecc_mulmod_256(const mp_int * km,const ecc_point * gm,ecc_point * r,int map,void * heap)20717 int sp_ecc_mulmod_256(const mp_int* km, const ecc_point* gm, ecc_point* r,
20718         int map, void* heap)
20719 {
20720 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20721     sp_point_256* point = NULL;
20722     sp_digit* k = NULL;
20723 #else
20724     sp_point_256 point[1];
20725     sp_digit k[8];
20726 #endif
20727     int err = MP_OKAY;
20728 
20729 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20730     point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap,
20731                                          DYNAMIC_TYPE_ECC);
20732     if (point == NULL)
20733         err = MEMORY_E;
20734     if (err == MP_OKAY) {
20735         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
20736                                DYNAMIC_TYPE_ECC);
20737         if (k == NULL)
20738             err = MEMORY_E;
20739     }
20740 #endif
20741 
20742     if (err == MP_OKAY) {
20743         sp_256_from_mp(k, 8, km);
20744         sp_256_point_from_ecc_point_8(point, gm);
20745 
20746             err = sp_256_ecc_mulmod_8(point, point, k, map, 1, heap);
20747     }
20748     if (err == MP_OKAY) {
20749         err = sp_256_point_to_ecc_point_8(point, r);
20750     }
20751 
20752 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20753     if (k != NULL)
20754         XFREE(k, heap, DYNAMIC_TYPE_ECC);
20755     if (point != NULL)
20756         XFREE(point, heap, DYNAMIC_TYPE_ECC);
20757 #endif
20758 
20759     return err;
20760 }
20761 
20762 /* Multiply the point by the scalar, add point a and return the result.
20763  * If map is true then convert result to affine coordinates.
20764  *
20765  * km      Scalar to multiply by.
20766  * p       Point to multiply.
20767  * am      Point to add to scalar mulitply result.
20768  * inMont  Point to add is in montgomery form.
20769  * r       Resulting point.
20770  * map     Indicates whether to convert result to affine.
20771  * heap    Heap to use for allocation.
20772  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20773  */
sp_ecc_mulmod_add_256(const mp_int * km,const ecc_point * gm,const ecc_point * am,int inMont,ecc_point * r,int map,void * heap)20774 int sp_ecc_mulmod_add_256(const mp_int* km, const ecc_point* gm,
20775     const ecc_point* am, int inMont, ecc_point* r, int map, void* heap)
20776 {
20777 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20778     sp_point_256* point = NULL;
20779     sp_digit* k = NULL;
20780 #else
20781     sp_point_256 point[2];
20782     sp_digit k[8 + 8 * 2 * 5];
20783 #endif
20784     sp_point_256* addP = NULL;
20785     sp_digit* tmp = NULL;
20786     int err = MP_OKAY;
20787 
20788 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20789     point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap,
20790                                          DYNAMIC_TYPE_ECC);
20791     if (point == NULL)
20792         err = MEMORY_E;
20793     if (err == MP_OKAY) {
20794         k = (sp_digit*)XMALLOC(
20795             sizeof(sp_digit) * (8 + 8 * 2 * 5), heap,
20796             DYNAMIC_TYPE_ECC);
20797         if (k == NULL)
20798             err = MEMORY_E;
20799     }
20800 #endif
20801 
20802     if (err == MP_OKAY) {
20803         addP = point + 1;
20804         tmp = k + 8;
20805 
20806         sp_256_from_mp(k, 8, km);
20807         sp_256_point_from_ecc_point_8(point, gm);
20808         sp_256_point_from_ecc_point_8(addP, am);
20809     }
20810     if ((err == MP_OKAY) && (!inMont)) {
20811         err = sp_256_mod_mul_norm_8(addP->x, addP->x, p256_mod);
20812     }
20813     if ((err == MP_OKAY) && (!inMont)) {
20814         err = sp_256_mod_mul_norm_8(addP->y, addP->y, p256_mod);
20815     }
20816     if ((err == MP_OKAY) && (!inMont)) {
20817         err = sp_256_mod_mul_norm_8(addP->z, addP->z, p256_mod);
20818     }
20819     if (err == MP_OKAY) {
20820             err = sp_256_ecc_mulmod_8(point, point, k, 0, 0, heap);
20821     }
20822     if (err == MP_OKAY) {
20823             sp_256_proj_point_add_8(point, point, addP, tmp);
20824 
20825         if (map) {
20826                 sp_256_map_8(point, point, tmp);
20827         }
20828 
20829         err = sp_256_point_to_ecc_point_8(point, r);
20830     }
20831 
20832 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
20833     if (k != NULL)
20834         XFREE(k, heap, DYNAMIC_TYPE_ECC);
20835     if (point != NULL)
20836         XFREE(point, heap, DYNAMIC_TYPE_ECC);
20837 #endif
20838 
20839     return err;
20840 }
20841 
20842 #ifdef WOLFSSL_SP_SMALL
20843 /* Striping precomputation table.
20844  * 4 points combined into a table of 16 points.
20845  * Distance of 64 between points.
20846  */
20847 static const sp_table_entry_256 p256_table[16] = {
20848     /* 0 */
20849     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
20850       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
20851     /* 1 */
20852     { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
20853         0xa53755c6,0x18905f76 },
20854       { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
20855         0x25885d85,0x8571ff18 } },
20856     /* 2 */
20857     { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
20858         0xfd1b667f,0x2f5e6961 },
20859       { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
20860         0x8d6f0f7b,0xf648f916 } },
20861     /* 3 */
20862     { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
20863         0x133d0015,0x5abe0285 },
20864       { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
20865         0x6b6f7383,0x94bb725b } },
20866     /* 4 */
20867     { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
20868         0x21d324f6,0x61d587d4 },
20869       { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
20870         0x4621efbe,0xfa11fe12 } },
20871     /* 5 */
20872     { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
20873         0x1f13bedc,0x586eb04c },
20874       { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
20875         0x70864f11,0x19d5ac08 } },
20876     /* 6 */
20877     { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
20878         0xc3b266b1,0xbb6de651 },
20879       { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
20880         0x5d18b99b,0x60b4619a } },
20881     /* 7 */
20882     { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
20883         0xaeebffcd,0x9d0f27b2 },
20884       { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
20885         0x356ec48d,0x244a566d } },
20886     /* 8 */
20887     { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
20888         0xcd42ab1b,0x803f3e02 },
20889       { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
20890         0x5067adc1,0xc097440e } },
20891     /* 9 */
20892     { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
20893         0x915f1f30,0xf1af32d5 },
20894       { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
20895         0xe2d41c8b,0x23d0f130 } },
20896     /* 10 */
20897     { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
20898         0x7990216a,0x50bbb4d9 },
20899       { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
20900         0x01fe49c3,0x2b100118 } },
20901     /* 11 */
20902     { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
20903         0x83fbae0c,0xdd558999 },
20904       { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
20905         0x149d6041,0xe6e4c551 } },
20906     /* 12 */
20907     { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
20908         0xdb7e63af,0xfad27148 },
20909       { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
20910         0x9f0e1a84,0x77387de3 } },
20911     /* 13 */
20912     { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
20913         0xbef0c47e,0xb37b85c0 },
20914       { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
20915         0xf9f628d5,0x9c135ac8 } },
20916     /* 14 */
20917     { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
20918         0x91ece900,0xc109f9cb },
20919       { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
20920         0x2eee1ee1,0x9bc3344f } },
20921     /* 15 */
20922     { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
20923         0x5f1a4cc1,0x29591d52 },
20924       { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
20925         0x18ef332c,0x6376551f } },
20926 };
20927 
20928 /* Multiply the base point of P256 by the scalar and return the result.
20929  * If map is true then convert result to affine coordinates.
20930  *
20931  * Stripe implementation.
20932  * Pre-generated: 2^0, 2^64, ...
20933  * Pre-generated: products of all combinations of above.
20934  * 4 doubles and adds (with qz=1)
20935  *
20936  * r     Resulting point.
20937  * k     Scalar to multiply by.
20938  * map   Indicates whether to convert result to affine.
20939  * ct    Constant time required.
20940  * heap  Heap to use for allocation.
20941  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
20942  */
sp_256_ecc_mulmod_base_8(sp_point_256 * r,const sp_digit * k,int map,int ct,void * heap)20943 static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
20944         int map, int ct, void* heap)
20945 {
20946     return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
20947                                       k, map, ct, heap);
20948 }
20949 
20950 #else
20951 /* Striping precomputation table.
20952  * 8 points combined into a table of 256 points.
20953  * Distance of 32 between points.
20954  */
20955 static const sp_table_entry_256 p256_table[256] = {
20956     /* 0 */
20957     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
20958       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
20959     /* 1 */
20960     { { 0x18a9143c,0x79e730d4,0x5fedb601,0x75ba95fc,0x77622510,0x79fb732b,
20961         0xa53755c6,0x18905f76 },
20962       { 0xce95560a,0xddf25357,0xba19e45c,0x8b4ab8e4,0xdd21f325,0xd2e88688,
20963         0x25885d85,0x8571ff18 } },
20964     /* 2 */
20965     { { 0x4147519a,0x20288602,0x26b372f0,0xd0981eac,0xa785ebc8,0xa9d4a7ca,
20966         0xdbdf58e9,0xd953c50d },
20967       { 0xfd590f8f,0x9d6361cc,0x44e6c917,0x72e9626b,0x22eb64cf,0x7fd96110,
20968         0x9eb288f3,0x863ebb7e } },
20969     /* 3 */
20970     { { 0x5cdb6485,0x7856b623,0x2f0a2f97,0x808f0ea2,0x4f7e300b,0x3e68d954,
20971         0xb5ff80a0,0x00076055 },
20972       { 0x838d2010,0x7634eb9b,0x3243708a,0x54014fbb,0x842a6606,0xe0e47d39,
20973         0x34373ee0,0x83087761 } },
20974     /* 4 */
20975     { { 0x16a0d2bb,0x4f922fc5,0x1a623499,0x0d5cc16c,0x57c62c8b,0x9241cf3a,
20976         0xfd1b667f,0x2f5e6961 },
20977       { 0xf5a01797,0x5c15c70b,0x60956192,0x3d20b44d,0x071fdb52,0x04911b37,
20978         0x8d6f0f7b,0xf648f916 } },
20979     /* 5 */
20980     { { 0xe137bbbc,0x9e566847,0x8a6a0bec,0xe434469e,0x79d73463,0xb1c42761,
20981         0x133d0015,0x5abe0285 },
20982       { 0xc04c7dab,0x92aa837c,0x43260c07,0x573d9f4c,0x78e6cc37,0x0c931562,
20983         0x6b6f7383,0x94bb725b } },
20984     /* 6 */
20985     { { 0x720f141c,0xbbf9b48f,0x2df5bc74,0x6199b3cd,0x411045c4,0xdc3f6129,
20986         0x2f7dc4ef,0xcdd6bbcb },
20987       { 0xeaf436fd,0xcca6700b,0xb99326be,0x6f647f6d,0x014f2522,0x0c0fa792,
20988         0x4bdae5f6,0xa361bebd } },
20989     /* 7 */
20990     { { 0x597c13c7,0x28aa2558,0x50b7c3e1,0xc38d635f,0xf3c09d1d,0x07039aec,
20991         0xc4b5292c,0xba12ca09 },
20992       { 0x59f91dfd,0x9e408fa4,0xceea07fb,0x3af43b66,0x9d780b29,0x1eceb089,
20993         0x701fef4b,0x53ebb99d } },
20994     /* 8 */
20995     { { 0xb0e63d34,0x4fe7ee31,0xa9e54fab,0xf4600572,0xd5e7b5a4,0xc0493334,
20996         0x06d54831,0x8589fb92 },
20997       { 0x6583553a,0xaa70f5cc,0xe25649e5,0x0879094a,0x10044652,0xcc904507,
20998         0x02541c4f,0xebb0696d } },
20999     /* 9 */
21000     { { 0xac1647c5,0x4616ca15,0xc4cf5799,0xb8127d47,0x764dfbac,0xdc666aa3,
21001         0xd1b27da3,0xeb2820cb },
21002       { 0x6a87e008,0x9406f8d8,0x922378f3,0xd87dfa9d,0x80ccecb2,0x56ed2e42,
21003         0x55a7da1d,0x1f28289b } },
21004     /* 10 */
21005     { { 0x3b89da99,0xabbaa0c0,0xb8284022,0xa6f2d79e,0xb81c05e8,0x27847862,
21006         0x05e54d63,0x337a4b59 },
21007       { 0x21f7794a,0x3c67500d,0x7d6d7f61,0x207005b7,0x04cfd6e8,0x0a5a3781,
21008         0xf4c2fbd6,0x0d65e0d5 } },
21009     /* 11 */
21010     { { 0xb5275d38,0xd9d09bbe,0x0be0a358,0x4268a745,0x973eb265,0xf0762ff4,
21011         0x52f4a232,0xc23da242 },
21012       { 0x0b94520c,0x5da1b84f,0xb05bd78e,0x09666763,0x94d29ea1,0x3a4dcb86,
21013         0xc790cff1,0x19de3b8c } },
21014     /* 12 */
21015     { { 0x26c5fe04,0x183a716c,0x3bba1bdb,0x3b28de0b,0xa4cb712c,0x7432c586,
21016         0x91fccbfd,0xe34dcbd4 },
21017       { 0xaaa58403,0xb408d46b,0x82e97a53,0x9a697486,0x36aaa8af,0x9e390127,
21018         0x7b4e0f7f,0xe7641f44 } },
21019     /* 13 */
21020     { { 0xdf64ba59,0x7d753941,0x0b0242fc,0xd33f10ec,0xa1581859,0x4f06dfc6,
21021         0x052a57bf,0x4a12df57 },
21022       { 0x9439dbd0,0xbfa6338f,0xbde53e1f,0xd3c24bd4,0x21f1b314,0xfd5e4ffa,
21023         0xbb5bea46,0x6af5aa93 } },
21024     /* 14 */
21025     { { 0x10c91999,0xda10b699,0x2a580491,0x0a24b440,0xb8cc2090,0x3e0094b4,
21026         0x66a44013,0x5fe3475a },
21027       { 0xf93e7b4b,0xb0f8cabd,0x7c23f91a,0x292b501a,0xcd1e6263,0x42e889ae,
21028         0xecfea916,0xb544e308 } },
21029     /* 15 */
21030     { { 0x16ddfdce,0x6478c6e9,0xf89179e6,0x2c329166,0x4d4e67e1,0x4e8d6e76,
21031         0xa6b0c20b,0xe0b6b2bd },
21032       { 0xbb7efb57,0x0d312df2,0x790c4007,0x1aac0dde,0x679bc944,0xf90336ad,
21033         0x25a63774,0x71c023de } },
21034     /* 16 */
21035     { { 0xbfe20925,0x62a8c244,0x8fdce867,0x91c19ac3,0xdd387063,0x5a96a5d5,
21036         0x21d324f6,0x61d587d4 },
21037       { 0xa37173ea,0xe87673a2,0x53778b65,0x23848008,0x05bab43e,0x10f8441e,
21038         0x4621efbe,0xfa11fe12 } },
21039     /* 17 */
21040     { { 0x2cb19ffd,0x1c891f2b,0xb1923c23,0x01ba8d5b,0x8ac5ca8e,0xb6d03d67,
21041         0x1f13bedc,0x586eb04c },
21042       { 0x27e8ed09,0x0c35c6e5,0x1819ede2,0x1e81a33c,0x56c652fa,0x278fd6c0,
21043         0x70864f11,0x19d5ac08 } },
21044     /* 18 */
21045     { { 0x309a4e1f,0x1e99f581,0xe9270074,0xab7de71b,0xefd28d20,0x26a5ef0b,
21046         0x7f9c563f,0xe7c0073f },
21047       { 0x0ef59f76,0x1f6d663a,0x20fcb050,0x669b3b54,0x7a6602d4,0xc08c1f7a,
21048         0xc65b3c0a,0xe08504fe } },
21049     /* 19 */
21050     { { 0xa031b3ca,0xf098f68d,0xe6da6d66,0x6d1cab9e,0x94f246e8,0x5bfd81fa,
21051         0x5b0996b4,0x78f01882 },
21052       { 0x3a25787f,0xb7eefde4,0x1dccac9b,0x8016f80d,0xb35bfc36,0x0cea4877,
21053         0x7e94747a,0x43a773b8 } },
21054     /* 20 */
21055     { { 0xd2b533d5,0x62577734,0xa1bdddc0,0x673b8af6,0xa79ec293,0x577e7c9a,
21056         0xc3b266b1,0xbb6de651 },
21057       { 0xb65259b3,0xe7e9303a,0xd03a7480,0xd6a0afd3,0x9b3cfc27,0xc5ac83d1,
21058         0x5d18b99b,0x60b4619a } },
21059     /* 21 */
21060     { { 0x1ae5aa1c,0xbd6a38e1,0x49e73658,0xb8b7652b,0xee5f87ed,0x0b130014,
21061         0xaeebffcd,0x9d0f27b2 },
21062       { 0x7a730a55,0xca924631,0xddbbc83a,0x9c955b2f,0xac019a71,0x07c1dfe0,
21063         0x356ec48d,0x244a566d } },
21064     /* 22 */
21065     { { 0xeacf1f96,0x6db0394a,0x024c271c,0x9f2122a9,0x82cbd3b9,0x2626ac1b,
21066         0x3581ef69,0x45e58c87 },
21067       { 0xa38f9dbc,0xd3ff479d,0xe888a040,0xa8aaf146,0x46e0bed7,0x945adfb2,
21068         0xc1e4b7a4,0xc040e21c } },
21069     /* 23 */
21070     { { 0x6f8117b6,0x847af000,0x73a35433,0x651969ff,0x1d9475eb,0x482b3576,
21071         0x682c6ec7,0x1cdf5c97 },
21072       { 0x11f04839,0x7db775b4,0x48de1698,0x7dbeacf4,0xb70b3219,0xb2921dd1,
21073         0xa92dff3d,0x046755f8 } },
21074     /* 24 */
21075     { { 0xbce8ffcd,0xcc8ac5d2,0x2fe61a82,0x0d53c48b,0x7202d6c7,0xf6f16172,
21076         0x3b83a5f3,0x046e5e11 },
21077       { 0xd8007f01,0xe7b8ff64,0x5af43183,0x7fb1ef12,0x35e1a03c,0x045c5ea6,
21078         0x303d005b,0x6e0106c3 } },
21079     /* 25 */
21080     { { 0x88dd73b1,0x48c73584,0x995ed0d9,0x7670708f,0xc56a2ab7,0x38385ea8,
21081         0xe901cf1f,0x442594ed },
21082       { 0x12d4b65b,0xf8faa2c9,0x96c90c37,0x94c2343b,0x5e978d1f,0xd326e4a1,
21083         0x4c2ee68e,0xa796fa51 } },
21084     /* 26 */
21085     { { 0x823addd7,0x359fb604,0xe56693b3,0x9e2a6183,0x3cbf3c80,0xf885b78e,
21086         0xc69766e9,0xe4ad2da9 },
21087       { 0x8e048a61,0x357f7f42,0xc092d9a0,0x082d198c,0xc03ed8ef,0xfc3a1af4,
21088         0xc37b5143,0xc5e94046 } },
21089     /* 27 */
21090     { { 0x2be75f9e,0x476a538c,0xcb123a78,0x6fd1a9e8,0xb109c04b,0xd85e4df0,
21091         0xdb464747,0x63283daf },
21092       { 0xbaf2df15,0xce728cf7,0x0ad9a7f4,0xe592c455,0xe834bcc3,0xfab226ad,
21093         0x1981a938,0x68bd19ab } },
21094     /* 28 */
21095     { { 0x1887d659,0xc08ead51,0xb359305a,0x3374d5f4,0xcfe74fe3,0x96986981,
21096         0x3c6fdfd6,0x495292f5 },
21097       { 0x1acec896,0x4a878c9e,0xec5b4484,0xd964b210,0x664d60a7,0x6696f7e2,
21098         0x26036837,0x0ec7530d } },
21099     /* 29 */
21100     { { 0xad2687bb,0x2da13a05,0xf32e21fa,0xa1f83b6a,0x1dd4607b,0x390f5ef5,
21101         0x64863f0b,0x0f6207a6 },
21102       { 0x0f138233,0xbd67e3bb,0x272aa718,0xdd66b96c,0x26ec88ae,0x8ed00407,
21103         0x08ed6dcf,0xff0db072 } },
21104     /* 30 */
21105     { { 0x4c95d553,0x749fa101,0x5d680a8a,0xa44052fd,0xff3b566f,0x183b4317,
21106         0x88740ea3,0x313b513c },
21107       { 0x08d11549,0xb402e2ac,0xb4dee21c,0x071ee10b,0x47f2320e,0x26b987dd,
21108         0x86f19f81,0x2d3abcf9 } },
21109     /* 31 */
21110     { { 0x815581a2,0x4c288501,0x632211af,0x9a0a6d56,0x0cab2e99,0x19ba7a0f,
21111         0xded98cdf,0xc036fa10 },
21112       { 0xc1fbd009,0x29ae08ba,0x06d15816,0x0b68b190,0x9b9e0d8f,0xc2eb3277,
21113         0xb6d40194,0xa6b2a2c4 } },
21114     /* 32 */
21115     { { 0x6d3549cf,0xd433e50f,0xfacd665e,0x6f33696f,0xce11fcb4,0x695bfdac,
21116         0xaf7c9860,0x810ee252 },
21117       { 0x7159bb2c,0x65450fe1,0x758b357b,0xf7dfbebe,0xd69fea72,0x2b057e74,
21118         0x92731745,0xd485717a } },
21119     /* 33 */
21120     { { 0xf0cb5a98,0x11741a8a,0x1f3110bf,0xd3da8f93,0xab382adf,0x1994e2cb,
21121         0x2f9a604e,0x6a6045a7 },
21122       { 0xa2b2411d,0x170c0d3f,0x510e96e0,0xbe0eb83e,0x8865b3cc,0x3bcc9f73,
21123         0xf9e15790,0xd3e45cfa } },
21124     /* 34 */
21125     { { 0xe83f7669,0xce1f69bb,0x72877d6b,0x09f8ae82,0x3244278d,0x9548ae54,
21126         0xe3c2c19c,0x207755de },
21127       { 0x6fef1945,0x87bd61d9,0xb12d28c3,0x18813cef,0x72df64aa,0x9fbcd1d6,
21128         0x7154b00d,0x48dc5ee5 } },
21129     /* 35 */
21130     { { 0xf7e5a199,0x123790bf,0x989ccbb7,0xe0efb8cf,0x0a519c79,0xc27a2bfe,
21131         0xdff6f445,0xf2fb0aed },
21132       { 0xf0b5025f,0x41c09575,0x40fa9f22,0x550543d7,0x380bfbd0,0x8fa3c8ad,
21133         0xdb28d525,0xa13e9015 } },
21134     /* 36 */
21135     { { 0xa2b65cbc,0xf9f7a350,0x2a464226,0x0b04b972,0xe23f07a1,0x265ce241,
21136         0x1497526f,0x2bf0d6b0 },
21137       { 0x4b216fb7,0xd3d4dd3f,0xfbdda26a,0xf7d7b867,0x6708505c,0xaeb7b83f,
21138         0x162fe89f,0x42a94a5a } },
21139     /* 37 */
21140     { { 0xeaadf191,0x5846ad0b,0x25a268d7,0x0f8a4890,0x494dc1f6,0xe8603050,
21141         0xc65ede3d,0x2c2dd969 },
21142       { 0x93849c17,0x6d02171d,0x1da250dd,0x460488ba,0x3c3a5485,0x4810c706,
21143         0x42c56dbc,0xf437fa1f } },
21144     /* 38 */
21145     { { 0x4a0f7dab,0x6aa0d714,0x1776e9ac,0x0f049793,0xf5f39786,0x52c0a050,
21146         0x54707aa8,0xaaf45b33 },
21147       { 0xc18d364a,0x85e37c33,0x3e497165,0xd40b9b06,0x15ec5444,0xf4171681,
21148         0xf4f272bc,0xcdf6310d } },
21149     /* 39 */
21150     { { 0x8ea8b7ef,0x7473c623,0x85bc2287,0x08e93518,0x2bda8e34,0x41956772,
21151         0xda9e2ff2,0xf0d008ba },
21152       { 0x2414d3b1,0x2912671d,0xb019ea76,0xb3754985,0x453bcbdb,0x5c61b96d,
21153         0xca887b8b,0x5bd5c2f5 } },
21154     /* 40 */
21155     { { 0xf49a3154,0xef0f469e,0x6e2b2e9a,0x3e85a595,0xaa924a9c,0x45aaec1e,
21156         0xa09e4719,0xaa12dfc8 },
21157       { 0x4df69f1d,0x26f27227,0xa2ff5e73,0xe0e4c82c,0xb7a9dd44,0xb9d8ce73,
21158         0xe48ca901,0x6c036e73 } },
21159     /* 41 */
21160     { { 0x0f6e3138,0x5cfae12a,0x25ad345a,0x6966ef00,0x45672bc5,0x8993c64b,
21161         0x96afbe24,0x292ff658 },
21162       { 0x5e213402,0xd5250d44,0x4392c9fe,0xf6580e27,0xda1c72e8,0x097b397f,
21163         0x311b7276,0x644e0c90 } },
21164     /* 42 */
21165     { { 0xa47153f0,0xe1e421e1,0x920418c9,0xb86c3b79,0x705d7672,0x93bdce87,
21166         0xcab79a77,0xf25ae793 },
21167       { 0x6d869d0c,0x1f3194a3,0x4986c264,0x9d55c882,0x096e945e,0x49fb5ea3,
21168         0x13db0a3e,0x39b8e653 } },
21169     /* 43 */
21170     { { 0xb6fd2e59,0x37754200,0x9255c98f,0x35e2c066,0x0e2a5739,0xd9dab21a,
21171         0x0f19db06,0x39122f2f },
21172       { 0x03cad53c,0xcfbce1e0,0xe65c17e3,0x225b2c0f,0x9aa13877,0x72baf1d2,
21173         0xce80ff8d,0x8de80af8 } },
21174     /* 44 */
21175     { { 0x207bbb76,0xafbea8d9,0x21782758,0x921c7e7c,0x1c0436b1,0xdfa2b74b,
21176         0x2e368c04,0x87194906 },
21177       { 0xa3993df5,0xb5f928bb,0xf3b3d26a,0x639d75b5,0x85b55050,0x011aa78a,
21178         0x5b74fde1,0xfc315e6a } },
21179     /* 45 */
21180     { { 0xe8d6ecfa,0x561fd41a,0x1aec7f86,0x5f8c44f6,0x4924741d,0x98452a7b,
21181         0xee389088,0xe6d4a7ad },
21182       { 0x4593c75d,0x60552ed1,0xdd271162,0x70a70da4,0x7ba2c7db,0xd2aede93,
21183         0x9be2ae57,0x35dfaf9a } },
21184     /* 46 */
21185     { { 0xaa736636,0x6b956fcd,0xae2cab7e,0x09f51d97,0x0f349966,0xfb10bf41,
21186         0x1c830d2b,0x1da5c7d7 },
21187       { 0x3cce6825,0x5c41e483,0xf9573c3b,0x15ad118f,0xf23036b8,0xa28552c7,
21188         0xdbf4b9d6,0x7077c0fd } },
21189     /* 47 */
21190     { { 0x46b9661c,0xbf63ff8d,0x0d2cfd71,0xa1dfd36b,0xa847f8f7,0x0373e140,
21191         0xe50efe44,0x53a8632e },
21192       { 0x696d8051,0x0976ff68,0xc74f468a,0xdaec0c95,0x5e4e26bd,0x62994dc3,
21193         0x34e1fcc1,0x028ca76d } },
21194     /* 48 */
21195     { { 0xfc9877ee,0xd11d47dc,0x801d0002,0xc8b36210,0x54c260b6,0xd002c117,
21196         0x6962f046,0x04c17cd8 },
21197       { 0xb0daddf5,0x6d9bd094,0x24ce55c0,0xbea23575,0x72da03b5,0x663356e6,
21198         0xfed97474,0xf7ba4de9 } },
21199     /* 49 */
21200     { { 0xebe1263f,0xd0dbfa34,0x71ae7ce6,0x55763735,0x82a6f523,0xd2440553,
21201         0x52131c41,0xe31f9600 },
21202       { 0xea6b6ec6,0xd1bb9216,0x73c2fc44,0x37a1d12e,0x89d0a294,0xc10e7eac,
21203         0xce34d47b,0xaa3a6259 } },
21204     /* 50 */
21205     { { 0x36f3dcd3,0xfbcf9df5,0xd2bf7360,0x6ceded50,0xdf504f5b,0x491710fa,
21206         0x7e79daee,0x2398dd62 },
21207       { 0x6d09569e,0xcf4705a3,0x5149f769,0xea0619bb,0x35f6034c,0xff9c0377,
21208         0x1c046210,0x5717f5b2 } },
21209     /* 51 */
21210     { { 0x21dd895e,0x9fe229c9,0x40c28451,0x8e518500,0x1d637ecd,0xfa13d239,
21211         0x0e3c28de,0x660a2c56 },
21212       { 0xd67fcbd0,0x9cca88ae,0x0ea9f096,0xc8472478,0x72e92b4d,0x32b2f481,
21213         0x4f522453,0x624ee54c } },
21214     /* 52 */
21215     { { 0xd897eccc,0x09549ce4,0x3f9880aa,0x4d49d1d9,0x043a7c20,0x723c2423,
21216         0x92bdfbc0,0x4f392afb },
21217       { 0x7de44fd9,0x6969f8fa,0x57b32156,0xb66cfbe4,0x368ebc3c,0xdb2fa803,
21218         0xccdb399c,0x8a3e7977 } },
21219     /* 53 */
21220     { { 0x06c4b125,0xdde1881f,0xf6e3ca8c,0xae34e300,0x5c7a13e9,0xef6999de,
21221         0x70c24404,0x3888d023 },
21222       { 0x44f91081,0x76280356,0x5f015504,0x3d9fcf61,0x632cd36e,0x1827edc8,
21223         0x18102336,0xa5e62e47 } },
21224     /* 54 */
21225     { { 0x2facd6c8,0x1a825ee3,0x54bcbc66,0x699c6354,0x98df9931,0x0ce3edf7,
21226         0x466a5adc,0x2c4768e6 },
21227       { 0x90a64bc9,0xb346ff8c,0xe4779f5c,0x630a6020,0xbc05e884,0xd949d064,
21228         0xf9e652a0,0x7b5e6441 } },
21229     /* 55 */
21230     { { 0x1d28444a,0x2169422c,0xbe136a39,0xe996c5d8,0xfb0c7fce,0x2387afe5,
21231         0x0c8d744a,0xb8af73cb },
21232       { 0x338b86fd,0x5fde83aa,0xa58a5cff,0xfee3f158,0x20ac9433,0xc9ee8f6f,
21233         0x7f3f0895,0xa036395f } },
21234     /* 56 */
21235     { { 0xa10f7770,0x8c73c6bb,0xa12a0e24,0xa6f16d81,0x51bc2b9f,0x100df682,
21236         0x875fb533,0x4be36b01 },
21237       { 0x9fb56dbb,0x9226086e,0x07e7a4f8,0x306fef8b,0x66d52f20,0xeeaccc05,
21238         0x1bdc00c0,0x8cbc9a87 } },
21239     /* 57 */
21240     { { 0xc0dac4ab,0xe131895c,0x712ff112,0xa874a440,0x6a1cee57,0x6332ae7c,
21241         0x0c0835f8,0x44e7553e },
21242       { 0x7734002d,0x6d503fff,0x0b34425c,0x9d35cb8b,0x0e8738b5,0x95f70276,
21243         0x5eb8fc18,0x470a683a } },
21244     /* 58 */
21245     { { 0x90513482,0x81b761dc,0x01e9276a,0x0287202a,0x0ce73083,0xcda441ee,
21246         0xc63dc6ef,0x16410690 },
21247       { 0x6d06a2ed,0xf5034a06,0x189b100b,0xdd4d7745,0xab8218c9,0xd914ae72,
21248         0x7abcbb4f,0xd73479fd } },
21249     /* 59 */
21250     { { 0x5ad4c6e5,0x7edefb16,0x5b06d04d,0x262cf08f,0x8575cb14,0x12ed5bb1,
21251         0x0771666b,0x816469e3 },
21252       { 0x561e291e,0xd7ab9d79,0xc1de1661,0xeb9daf22,0x135e0513,0xf49827eb,
21253         0xf0dd3f9c,0x0a36dd23 } },
21254     /* 60 */
21255     { { 0x41d5533c,0x098d32c7,0x8684628f,0x7c5f5a9e,0xe349bd11,0x39a228ad,
21256         0xfdbab118,0xe331dfd6 },
21257       { 0x6bcc6ed8,0x5100ab68,0xef7a260e,0x7160c3bd,0xbce850d7,0x9063d9a7,
21258         0x492e3389,0xd3b4782a } },
21259     /* 61 */
21260     { { 0xf3821f90,0xa149b6e8,0x66eb7aad,0x92edd9ed,0x1a013116,0x0bb66953,
21261         0x4c86a5bd,0x7281275a },
21262       { 0xd3ff47e5,0x503858f7,0x61016441,0x5e1616bc,0x7dfd9bb1,0x62b0f11a,
21263         0xce145059,0x2c062e7e } },
21264     /* 62 */
21265     { { 0x0159ac2e,0xa76f996f,0xcbdb2713,0x281e7736,0x08e46047,0x2ad6d288,
21266         0x2c4e7ef1,0x282a35f9 },
21267       { 0xc0ce5cd2,0x9c354b1e,0x1379c229,0xcf99efc9,0x3e82c11e,0x992caf38,
21268         0x554d2abd,0xc71cd513 } },
21269     /* 63 */
21270     { { 0x09b578f4,0x4885de9c,0xe3affa7a,0x1884e258,0x59182f1f,0x8f76b1b7,
21271         0xcf47f3a3,0xc50f6740 },
21272       { 0x374b68ea,0xa9c4adf3,0x69965fe2,0xa406f323,0x85a53050,0x2f86a222,
21273         0x212958dc,0xb9ecb3a7 } },
21274     /* 64 */
21275     { { 0xf4f8b16a,0x56f8410e,0xc47b266a,0x97241afe,0x6d9c87c1,0x0a406b8e,
21276         0xcd42ab1b,0x803f3e02 },
21277       { 0x04dbec69,0x7f0309a8,0x3bbad05f,0xa83b85f7,0xad8e197f,0xc6097273,
21278         0x5067adc1,0xc097440e } },
21279     /* 65 */
21280     { { 0xc379ab34,0x846a56f2,0x841df8d1,0xa8ee068b,0x176c68ef,0x20314459,
21281         0x915f1f30,0xf1af32d5 },
21282       { 0x5d75bd50,0x99c37531,0xf72f67bc,0x837cffba,0x48d7723f,0x0613a418,
21283         0xe2d41c8b,0x23d0f130 } },
21284     /* 66 */
21285     { { 0xf41500d9,0x857ab6ed,0xfcbeada8,0x0d890ae5,0x89725951,0x52fe8648,
21286         0xc0a3fadd,0xb0288dd6 },
21287       { 0x650bcb08,0x85320f30,0x695d6e16,0x71af6313,0xb989aa76,0x31f520a7,
21288         0xf408c8d2,0xffd3724f } },
21289     /* 67 */
21290     { { 0xb458e6cb,0x53968e64,0x317a5d28,0x992dad20,0x7aa75f56,0x3814ae0b,
21291         0xd78c26df,0xf5590f4a },
21292       { 0xcf0ba55a,0x0fc24bd3,0x0c778bae,0x0fc4724a,0x683b674a,0x1ce9864f,
21293         0xf6f74a20,0x18d6da54 } },
21294     /* 68 */
21295     { { 0xd5be5a2b,0xed93e225,0x5934f3c6,0x6fe79983,0x22626ffc,0x43140926,
21296         0x7990216a,0x50bbb4d9 },
21297       { 0xe57ec63e,0x378191c6,0x181dcdb2,0x65422c40,0x0236e0f6,0x41a8099b,
21298         0x01fe49c3,0x2b100118 } },
21299     /* 69 */
21300     { { 0x9b391593,0xfc68b5c5,0x598270fc,0xc385f5a2,0xd19adcbb,0x7144f3aa,
21301         0x83fbae0c,0xdd558999 },
21302       { 0x74b82ff4,0x93b88b8e,0x71e734c9,0xd2e03c40,0x43c0322a,0x9a7a9eaf,
21303         0x149d6041,0xe6e4c551 } },
21304     /* 70 */
21305     { { 0x1e9af288,0x55f655bb,0xf7ada931,0x647e1a64,0xcb2820e5,0x43697e4b,
21306         0x07ed56ff,0x51e00db1 },
21307       { 0x771c327e,0x43d169b8,0x4a96c2ad,0x29cdb20b,0x3deb4779,0xc07d51f5,
21308         0x49829177,0xe22f4241 } },
21309     /* 71 */
21310     { { 0x635f1abb,0xcd45e8f4,0x68538874,0x7edc0cb5,0xb5a8034d,0xc9472c1f,
21311         0x52dc48c9,0xf709373d },
21312       { 0xa8af30d6,0x401966bb,0xf137b69c,0x95bf5f4a,0x9361c47e,0x3966162a,
21313         0xe7275b11,0xbd52d288 } },
21314     /* 72 */
21315     { { 0x9c5fa877,0xab155c7a,0x7d3a3d48,0x17dad672,0x73d189d8,0x43f43f9e,
21316         0xc8aa77a6,0xa0d0f8e4 },
21317       { 0xcc94f92d,0x0bbeafd8,0x0c4ddb3a,0xd818c8be,0xb82eba14,0x22cc65f8,
21318         0x946d6a00,0xa56c78c7 } },
21319     /* 73 */
21320     { { 0x0dd09529,0x2962391b,0x3daddfcf,0x803e0ea6,0x5b5bf481,0x2c77351f,
21321         0x731a367a,0xd8befdf8 },
21322       { 0xfc0157f4,0xab919d42,0xfec8e650,0xf51caed7,0x02d48b0a,0xcdf9cb40,
21323         0xce9f6478,0x854a68a5 } },
21324     /* 74 */
21325     { { 0x63506ea5,0xdc35f67b,0xa4fe0d66,0x9286c489,0xfe95cd4d,0x3f101d3b,
21326         0x98846a95,0x5cacea0b },
21327       { 0x9ceac44d,0xa90df60c,0x354d1c3a,0x3db29af4,0xad5dbabe,0x08dd3de8,
21328         0x35e4efa9,0xe4982d12 } },
21329     /* 75 */
21330     { { 0xc34cd55e,0x23104a22,0x2680d132,0x58695bb3,0x1fa1d943,0xfb345afa,
21331         0x16b20499,0x8046b7f6 },
21332       { 0x38e7d098,0xb533581e,0xf46f0b70,0xd7f61e8d,0x44cb78c4,0x30dea9ea,
21333         0x9082af55,0xeb17ca7b } },
21334     /* 76 */
21335     { { 0x76a145b9,0x1751b598,0xc1bc71ec,0xa5cf6b0f,0x392715bb,0xd3e03565,
21336         0xfab5e131,0x097b00ba },
21337       { 0x565f69e1,0xaa66c8e9,0xb5be5199,0x77e8f75a,0xda4fd984,0x6033ba11,
21338         0xafdbcc9e,0xf95c747b } },
21339     /* 77 */
21340     { { 0xbebae45e,0x558f01d3,0xc4bc6955,0xa8ebe9f0,0xdbc64fc6,0xaeb705b1,
21341         0x566ed837,0x3512601e },
21342       { 0xfa1161cd,0x9336f1e1,0x4c65ef87,0x328ab8d5,0x724f21e5,0x4757eee2,
21343         0x6068ab6b,0x0ef97123 } },
21344     /* 78 */
21345     { { 0x54ca4226,0x02598cf7,0xf8642c8e,0x5eede138,0x468e1790,0x48963f74,
21346         0x3b4fbc95,0xfc16d933 },
21347       { 0xe7c800ca,0xbe96fb31,0x2678adaa,0x13806331,0x6ff3e8b5,0x3d624497,
21348         0xb95d7a17,0x14ca4af1 } },
21349     /* 79 */
21350     { { 0xbd2f81d5,0x7a4771ba,0x01f7d196,0x1a5f9d69,0xcad9c907,0xd898bef7,
21351         0xf59c231d,0x4057b063 },
21352       { 0x89c05c0a,0xbffd82fe,0x1dc0df85,0xe4911c6f,0xa35a16db,0x3befccae,
21353         0xf1330b13,0x1c3b5d64 } },
21354     /* 80 */
21355     { { 0x80ec21fe,0x5fe14bfe,0xc255be82,0xf6ce116a,0x2f4a5d67,0x98bc5a07,
21356         0xdb7e63af,0xfad27148 },
21357       { 0x29ab05b3,0x90c0b6ac,0x4e251ae6,0x37a9a83c,0xc2aade7d,0x0a7dc875,
21358         0x9f0e1a84,0x77387de3 } },
21359     /* 81 */
21360     { { 0xa56c0dd7,0x1e9ecc49,0x46086c74,0xa5cffcd8,0xf505aece,0x8f7a1408,
21361         0xbef0c47e,0xb37b85c0 },
21362       { 0xcc0e6a8f,0x3596b6e4,0x6b388f23,0xfd6d4bbf,0xc39cef4e,0xaba453fa,
21363         0xf9f628d5,0x9c135ac8 } },
21364     /* 82 */
21365     { { 0x84e35743,0x32aa3202,0x85a3cdef,0x320d6ab1,0x1df19819,0xb821b176,
21366         0xc433851f,0x5721361f },
21367       { 0x71fc9168,0x1f0db36a,0x5e5c403c,0x5f98ba73,0x37bcd8f5,0xf64ca87e,
21368         0xe6bb11bd,0xdcbac3c9 } },
21369     /* 83 */
21370     { { 0x4518cbe2,0xf01d9968,0x9c9eb04e,0xd242fc18,0xe47feebf,0x727663c7,
21371         0x2d626862,0xb8c1c89e },
21372       { 0xc8e1d569,0x51a58bdd,0xb7d88cd0,0x563809c8,0xf11f31eb,0x26c27fd9,
21373         0x2f9422d4,0x5d23bbda } },
21374     /* 84 */
21375     { { 0x95c8f8be,0x0a1c7294,0x3bf362bf,0x2961c480,0xdf63d4ac,0x9e418403,
21376         0x91ece900,0xc109f9cb },
21377       { 0x58945705,0xc2d095d0,0xddeb85c0,0xb9083d96,0x7a40449b,0x84692b8d,
21378         0x2eee1ee1,0x9bc3344f } },
21379     /* 85 */
21380     { { 0x42913074,0x0d5ae356,0x48a542b1,0x55491b27,0xb310732a,0x469ca665,
21381         0x5f1a4cc1,0x29591d52 },
21382       { 0xb84f983f,0xe76f5b6b,0x9f5f84e1,0xbe7eef41,0x80baa189,0x1200d496,
21383         0x18ef332c,0x6376551f } },
21384     /* 86 */
21385     { { 0x562976cc,0xbda5f14e,0x0ef12c38,0x22bca3e6,0x6cca9852,0xbbfa3064,
21386         0x08e2987a,0xbdb79dc8 },
21387       { 0xcb06a772,0xfd2cb5c9,0xfe536dce,0x38f475aa,0x7c2b5db8,0xc2a3e022,
21388         0xadd3c14a,0x8ee86001 } },
21389     /* 87 */
21390     { { 0xa4ade873,0xcbe96981,0xc4fba48c,0x7ee9aa4d,0x5a054ba5,0x2cee2899,
21391         0x6f77aa4b,0x92e51d7a },
21392       { 0x7190a34d,0x948bafa8,0xf6bd1ed1,0xd698f75b,0x0caf1144,0xd00ee6e3,
21393         0x0a56aaaa,0x5182f86f } },
21394     /* 88 */
21395     { { 0x7a4cc99c,0xfba6212c,0x3e6d9ca1,0xff609b68,0x5ac98c5a,0x5dbb27cb,
21396         0x4073a6f2,0x91dcab5d },
21397       { 0x5f575a70,0x01b6cc3d,0x6f8d87fa,0x0cb36139,0x89981736,0x165d4e8c,
21398         0x97974f2b,0x17a0cedb } },
21399     /* 89 */
21400     { { 0x076c8d3a,0x38861e2a,0x210f924b,0x701aad39,0x13a835d9,0x94d0eae4,
21401         0x7f4cdf41,0x2e8ce36c },
21402       { 0x037a862b,0x91273dab,0x60e4c8fa,0x01ba9bb7,0x33baf2dd,0xf9645388,
21403         0x34f668f3,0xf4ccc6cb } },
21404     /* 90 */
21405     { { 0xf1f79687,0x44ef525c,0x92efa815,0x7c595495,0xa5c78d29,0xe1231741,
21406         0x9a0df3c9,0xac0db488 },
21407       { 0xdf01747f,0x86bfc711,0xef17df13,0x592b9358,0x5ccb6bb5,0xe5880e4f,
21408         0x94c974a2,0x95a64a61 } },
21409     /* 91 */
21410     { { 0xc15a4c93,0x72c1efda,0x82585141,0x40269b73,0x16cb0bad,0x6a8dfb1c,
21411         0x29210677,0x231e54ba },
21412       { 0x8ae6d2dc,0xa70df917,0x39112918,0x4d6aa63f,0x5e5b7223,0xf627726b,
21413         0xd8a731e1,0xab0be032 } },
21414     /* 92 */
21415     { { 0x8d131f2d,0x097ad0e9,0x3b04f101,0x637f09e3,0xd5e9a748,0x1ac86196,
21416         0x2cf6a679,0xf1bcc880 },
21417       { 0xe8daacb4,0x25c69140,0x60f65009,0x3c4e4055,0x477937a6,0x591cc8fc,
21418         0x5aebb271,0x85169469 } },
21419     /* 93 */
21420     { { 0xf1dcf593,0xde35c143,0xb018be3b,0x78202b29,0x9bdd9d3d,0xe9cdadc2,
21421         0xdaad55d8,0x8f67d9d2 },
21422       { 0x7481ea5f,0x84111656,0xe34c590c,0xe7d2dde9,0x05053fa8,0xffdd43f4,
21423         0xc0728b5d,0xf84572b9 } },
21424     /* 94 */
21425     { { 0x97af71c9,0x5e1a7a71,0x7a736565,0xa1449444,0x0e1d5063,0xa1b4ae07,
21426         0x616b2c19,0xedee2710 },
21427       { 0x11734121,0xb2f034f5,0x4a25e9f0,0x1cac6e55,0xa40c2ecf,0x8dc148f3,
21428         0x44ebd7f4,0x9fd27e9b } },
21429     /* 95 */
21430     { { 0xf6e2cb16,0x3cc7658a,0xfe5919b6,0xe3eb7d2c,0x168d5583,0x5a8c5816,
21431         0x958ff387,0xa40c2fb6 },
21432       { 0xfedcc158,0x8c9ec560,0x55f23056,0x7ad804c6,0x9a307e12,0xd9396704,
21433         0x7dc6decf,0x99bc9bb8 } },
21434     /* 96 */
21435     { { 0x927dafc6,0x84a9521d,0x5c09cd19,0x52c1fb69,0xf9366dde,0x9d9581a0,
21436         0xa16d7e64,0x9abe210b },
21437       { 0x48915220,0x480af84a,0x4dd816c6,0xfa73176a,0x1681ca5a,0xc7d53987,
21438         0x87f344b0,0x7881c257 } },
21439     /* 97 */
21440     { { 0xe0bcf3ff,0x93399b51,0x127f74f6,0x0d02cbc5,0xdd01d968,0x8fb465a2,
21441         0xa30e8940,0x15e6e319 },
21442       { 0x3e0e05f4,0x646d6e0d,0x43588404,0xfad7bddc,0xc4f850d3,0xbe61c7d1,
21443         0x191172ce,0x0e55facf } },
21444     /* 98 */
21445     { { 0xf8787564,0x7e9d9806,0x31e85ce6,0x1a331721,0xb819e8d6,0x6b0158ca,
21446         0x6fe96577,0xd73d0976 },
21447       { 0x1eb7206e,0x42483425,0xc618bb42,0xa519290f,0x5e30a520,0x5dcbb859,
21448         0x8f15a50b,0x9250a374 } },
21449     /* 99 */
21450     { { 0xbe577410,0xcaff08f8,0x5077a8c6,0xfd408a03,0xec0a63a4,0xf1f63289,
21451         0xc1cc8c0b,0x77414082 },
21452       { 0xeb0991cd,0x05a40fa6,0x49fdc296,0xc1ca0866,0xb324fd40,0x3a68a3c7,
21453         0x12eb20b9,0x8cb04f4d } },
21454     /* 100 */
21455     { { 0x6906171c,0xb1c2d055,0xb0240c3f,0x9073e9cd,0xd8906841,0xdb8e6b4f,
21456         0x47123b51,0xe4e429ef },
21457       { 0x38ec36f4,0x0b8dd53c,0xff4b6a27,0xf9d2dc01,0x879a9a48,0x5d066e07,
21458         0x3c6e6552,0x37bca2ff } },
21459     /* 101 */
21460     { { 0xdf562470,0x4cd2e3c7,0xc0964ac9,0x44f272a2,0x80c793be,0x7c6d5df9,
21461         0x3002b22a,0x59913edc },
21462       { 0x5750592a,0x7a139a83,0xe783de02,0x99e01d80,0xea05d64f,0xcf8c0375,
21463         0xb013e226,0x43786e4a } },
21464     /* 102 */
21465     { { 0x9e56b5a6,0xff32b0ed,0xd9fc68f9,0x0750d9a6,0x597846a7,0xec15e845,
21466         0xb7e79e7a,0x8638ca98 },
21467       { 0x0afc24b2,0x2f5ae096,0x4dace8f2,0x05398eaf,0xaecba78f,0x3b765dd0,
21468         0x7b3aa6f0,0x1ecdd36a } },
21469     /* 103 */
21470     { { 0x6c5ff2f3,0x5d3acd62,0x2873a978,0xa2d516c0,0xd2110d54,0xad94c9fa,
21471         0xd459f32d,0xd85d0f85 },
21472       { 0x10b11da3,0x9f700b8d,0xa78318c4,0xd2c22c30,0x9208decd,0x556988f4,
21473         0xb4ed3c62,0xa04f19c3 } },
21474     /* 104 */
21475     { { 0xed7f93bd,0x087924c8,0x392f51f6,0xcb64ac5d,0x821b71af,0x7cae330a,
21476         0x5c0950b0,0x92b2eeea },
21477       { 0x85b6e235,0x85ac4c94,0x2936c0f0,0xab2ca4a9,0xe0508891,0x80faa6b3,
21478         0x5834276c,0x1ee78221 } },
21479     /* 105 */
21480     { { 0xe63e79f7,0xa60a2e00,0xf399d906,0xf590e7b2,0x6607c09d,0x9021054a,
21481         0x57a6e150,0xf3f2ced8 },
21482       { 0xf10d9b55,0x200510f3,0xd8642648,0x9d2fcfac,0xe8bd0e7c,0xe5631aa7,
21483         0x3da3e210,0x0f56a454 } },
21484     /* 106 */
21485     { { 0x1043e0df,0x5b21bffa,0x9c007e6d,0x6c74b6cc,0xd4a8517a,0x1a656ec0,
21486         0x1969e263,0xbd8f1741 },
21487       { 0xbeb7494a,0x8a9bbb86,0x45f3b838,0x1567d46f,0xa4e5a79a,0xdf7a12a7,
21488         0x30ccfa09,0x2d1a1c35 } },
21489     /* 107 */
21490     { { 0x506508da,0x192e3813,0xa1d795a7,0x336180c4,0x7a9944b3,0xcddb5949,
21491         0xb91fba46,0xa107a65e },
21492       { 0x0f94d639,0xe6d1d1c5,0x8a58b7d7,0x8b4af375,0xbd37ca1c,0x1a7c5584,
21493         0xf87a9af2,0x183d760a } },
21494     /* 108 */
21495     { { 0x0dde59a4,0x29d69711,0x0e8bef87,0xf1ad8d07,0x4f2ebe78,0x229b4963,
21496         0xc269d754,0x1d44179d },
21497       { 0x8390d30e,0xb32dc0cf,0x0de8110c,0x0a3b2753,0x2bc0339a,0x31af1dc5,
21498         0x9606d262,0x771f9cc2 } },
21499     /* 109 */
21500     { { 0x85040739,0x99993e77,0x8026a939,0x44539db9,0xf5f8fc26,0xcf40f6f2,
21501         0x0362718e,0x64427a31 },
21502       { 0x85428aa8,0x4f4f2d87,0xebfb49a8,0x7b7adc3f,0xf23d01ac,0x201b2c6d,
21503         0x6ae90d6d,0x49d9b749 } },
21504     /* 110 */
21505     { { 0x435d1099,0xcc78d8bc,0x8e8d1a08,0x2adbcd4e,0x2cb68a41,0x02c2e2a0,
21506         0x3f605445,0x9037d81b },
21507       { 0x074c7b61,0x7cdbac27,0x57bfd72e,0xfe2031ab,0x596d5352,0x61ccec96,
21508         0x7cc0639c,0x08c3de6a } },
21509     /* 111 */
21510     { { 0xf6d552ab,0x20fdd020,0x05cd81f1,0x56baff98,0x91351291,0x06fb7c3e,
21511         0x45796b2f,0xc6909442 },
21512       { 0x41231bd1,0x17b3ae9c,0x5cc58205,0x1eac6e87,0xf9d6a122,0x208837ab,
21513         0xcafe3ac0,0x3fa3db02 } },
21514     /* 112 */
21515     { { 0x05058880,0xd75a3e65,0x643943f2,0x7da365ef,0xfab24925,0x4147861c,
21516         0xfdb808ff,0xc5c4bdb0 },
21517       { 0xb272b56b,0x73513e34,0x11b9043a,0xc8327e95,0xf8844969,0xfd8ce37d,
21518         0x46c2b6b5,0x2d56db94 } },
21519     /* 113 */
21520     { { 0xff46ac6b,0x2461782f,0x07a2e425,0xd19f7926,0x09a48de1,0xfafea3c4,
21521         0xe503ba42,0x0f56bd9d },
21522       { 0x345cda49,0x137d4ed1,0x816f299d,0x821158fc,0xaeb43402,0xe7c6a54a,
21523         0x1173b5f1,0x4003bb9d } },
21524     /* 114 */
21525     { { 0xa0803387,0x3b8e8189,0x39cbd404,0xece115f5,0xd2877f21,0x4297208d,
21526         0xa07f2f9e,0x53765522 },
21527       { 0xa8a4182d,0xa4980a21,0x3219df79,0xa2bbd07a,0x1a19a2d4,0x674d0a2e,
21528         0x6c5d4549,0x7a056f58 } },
21529     /* 115 */
21530     { { 0x9d8a2a47,0x646b2558,0xc3df2773,0x5b582948,0xabf0d539,0x51ec000e,
21531         0x7a1a2675,0x77d482f1 },
21532       { 0x87853948,0xb8a1bd95,0x6cfbffee,0xa6f817bd,0x80681e47,0xab6ec057,
21533         0x2b38b0e4,0x4115012b } },
21534     /* 116 */
21535     { { 0x6de28ced,0x3c73f0f4,0x9b13ec47,0x1d5da760,0x6e5c6392,0x61b8ce9e,
21536         0xfbea0946,0xcdf04572 },
21537       { 0x6c53c3b0,0x1cb3c58b,0x447b843c,0x97fe3c10,0x2cb9780e,0xfb2b8ae1,
21538         0x97383109,0xee703dda } },
21539     /* 117 */
21540     { { 0xff57e43a,0x34515140,0xb1b811b8,0xd44660d3,0x8f42b986,0x2b3b5dff,
21541         0xa162ce21,0x2a0ad89d },
21542       { 0x6bc277ba,0x64e4a694,0xc141c276,0xc788c954,0xcabf6274,0x141aa64c,
21543         0xac2b4659,0xd62d0b67 } },
21544     /* 118 */
21545     { { 0x2c054ac4,0x39c5d87b,0xf27df788,0x57005859,0xb18128d6,0xedf7cbf3,
21546         0x991c2426,0xb39a23f2 },
21547       { 0xf0b16ae5,0x95284a15,0xa136f51b,0x0c6a05b1,0xf2700783,0x1d63c137,
21548         0xc0674cc5,0x04ed0092 } },
21549     /* 119 */
21550     { { 0x9ae90393,0x1f4185d1,0x4a3d64e6,0x3047b429,0x9854fc14,0xae0001a6,
21551         0x0177c387,0xa0a91fc1 },
21552       { 0xae2c831e,0xff0a3f01,0x2b727e16,0xbb76ae82,0x5a3075b4,0x8f12c8a1,
21553         0x9ed20c41,0x084cf988 } },
21554     /* 120 */
21555     { { 0xfca6becf,0xd98509de,0x7dffb328,0x2fceae80,0x4778e8b9,0x5d8a15c4,
21556         0x73abf77e,0xd57955b2 },
21557       { 0x31b5d4f1,0x210da79e,0x3cfa7a1c,0xaa52f04b,0xdc27c20b,0xd4d12089,
21558         0x02d141f1,0x8e14ea42 } },
21559     /* 121 */
21560     { { 0xf2897042,0xeed50345,0x43402c4a,0x8d05331f,0xc8bdfb21,0xc8d9c194,
21561         0x2aa4d158,0x597e1a37 },
21562       { 0xcf0bd68c,0x0327ec1a,0xab024945,0x6d4be0dc,0xc9fe3e84,0x5b9c8d7a,
21563         0x199b4dea,0xca3f0236 } },
21564     /* 122 */
21565     { { 0x6170bd20,0x592a10b5,0x6d3f5de7,0x0ea897f1,0x44b2ade2,0xa3363ff1,
21566         0x309c07e4,0xbde7fd7e },
21567       { 0xb8f5432c,0x516bb6d2,0xe043444b,0x210dc1cb,0xf8f95b5a,0x3db01e6f,
21568         0x0a7dd198,0xb623ad0e } },
21569     /* 123 */
21570     { { 0x60c7b65b,0xa75bd675,0x23a4a289,0xab8c5590,0xd7b26795,0xf8220fd0,
21571         0x58ec137b,0xd6aa2e46 },
21572       { 0x5138bb85,0x10abc00b,0xd833a95c,0x8c31d121,0x1702a32e,0xb24ff00b,
21573         0x2dcc513a,0x111662e0 } },
21574     /* 124 */
21575     { { 0xefb42b87,0x78114015,0x1b6c4dff,0xbd9f5d70,0xa7d7c129,0x66ecccd7,
21576         0x94b750f8,0xdb3ee1cb },
21577       { 0xf34837cf,0xb26f3db0,0xb9578d4f,0xe7eed18b,0x7c56657d,0x5d2cdf93,
21578         0x52206a59,0x886a6442 } },
21579     /* 125 */
21580     { { 0x65b569ea,0x3c234cfb,0xf72119c1,0x20011141,0xa15a619e,0x8badc85d,
21581         0x018a17bc,0xa70cf4eb },
21582       { 0x8c4a6a65,0x224f97ae,0x0134378f,0x36e5cf27,0x4f7e0960,0xbe3a609e,
21583         0xd1747b77,0xaa4772ab } },
21584     /* 126 */
21585     { { 0x7aa60cc0,0x67676131,0x0368115f,0xc7916361,0xbbc1bb5a,0xded98bb4,
21586         0x30faf974,0x611a6ddc },
21587       { 0xc15ee47a,0x30e78cbc,0x4e0d96a5,0x2e896282,0x3dd9ed88,0x36f35adf,
21588         0x16429c88,0x5cfffaf8 } },
21589     /* 127 */
21590     { { 0x9b7a99cd,0xc0d54cff,0x843c45a1,0x7bf3b99d,0x62c739e1,0x038a908f,
21591         0x7dc1994c,0x6e5a6b23 },
21592       { 0x0ba5db77,0xef8b454e,0xacf60d63,0xb7b8807f,0x76608378,0xe591c0c6,
21593         0x242dabcc,0x481a238d } },
21594     /* 128 */
21595     { { 0x35d0b34a,0xe3417bc0,0x8327c0a7,0x440b386b,0xac0362d1,0x8fb7262d,
21596         0xe0cdf943,0x2c41114c },
21597       { 0xad95a0b1,0x2ba5cef1,0x67d54362,0xc09b37a8,0x01e486c9,0x26d6cdd2,
21598         0x42ff9297,0x20477abf } },
21599     /* 129 */
21600     { { 0x18d65dbf,0x2f75173c,0x339edad8,0x77bf940e,0xdcf1001c,0x7022d26b,
21601         0xc77396b6,0xac66409a },
21602       { 0xc6261cc3,0x8b0bb36f,0x190e7e90,0x213f7bc9,0xa45e6c10,0x6541ceba,
21603         0xcc122f85,0xce8e6975 } },
21604     /* 130 */
21605     { { 0xbc0a67d2,0x0f121b41,0x444d248a,0x62d4760a,0x659b4737,0x0e044f1d,
21606         0x250bb4a8,0x08fde365 },
21607       { 0x848bf287,0xaceec3da,0xd3369d6e,0xc2a62182,0x92449482,0x3582dfdc,
21608         0x565d6cd7,0x2f7e2fd2 } },
21609     /* 131 */
21610     { { 0xc3770fa7,0xae4b92db,0x379043f9,0x095e8d5c,0x17761171,0x54f34e9d,
21611         0x907702ae,0xc65be92e },
21612       { 0xf6fd0a40,0x2758a303,0xbcce784b,0xe7d822e3,0x4f9767bf,0x7ae4f585,
21613         0xd1193b3a,0x4bff8e47 } },
21614     /* 132 */
21615     { { 0x00ff1480,0xcd41d21f,0x0754db16,0x2ab8fb7d,0xbbe0f3ea,0xac81d2ef,
21616         0x5772967d,0x3e4e4ae6 },
21617       { 0x3c5303e6,0x7e18f36d,0x92262397,0x3bd9994b,0x1324c3c0,0x9ed70e26,
21618         0x58ec6028,0x5388aefd } },
21619     /* 133 */
21620     { { 0x5e5d7713,0xad1317eb,0x75de49da,0x09b985ee,0xc74fb261,0x32f5bc4f,
21621         0x4f75be0e,0x5cf908d1 },
21622       { 0x8e657b12,0x76043510,0xb96ed9e6,0xbfd421a5,0x8970ccc2,0x0e29f51f,
21623         0x60f00ce2,0xa698ba40 } },
21624     /* 134 */
21625     { { 0xef748fec,0x73db1686,0x7e9d2cf9,0xe6e755a2,0xce265eff,0x630b6544,
21626         0x7aebad8d,0xb142ef8a },
21627       { 0x17d5770a,0xad31af9f,0x2cb3412f,0x66af3b67,0xdf3359de,0x6bd60d1b,
21628         0x58515075,0xd1896a96 } },
21629     /* 135 */
21630     { { 0x33c41c08,0xec5957ab,0x5468e2e1,0x87de94ac,0xac472f6c,0x18816b73,
21631         0x7981da39,0x267b0e0b },
21632       { 0x8e62b988,0x6e554e5d,0x116d21e7,0xd8ddc755,0x3d2a6f99,0x4610faf0,
21633         0xa1119393,0xb54e287a } },
21634     /* 136 */
21635     { { 0x178a876b,0x0a0122b5,0x085104b4,0x51ff96ff,0x14f29f76,0x050b31ab,
21636         0x5f87d4e6,0x84abb28b },
21637       { 0x8270790a,0xd5ed439f,0x85e3f46b,0x2d6cb59d,0x6c1e2212,0x75f55c1b,
21638         0x17655640,0xe5436f67 } },
21639     /* 137 */
21640     { { 0x2286e8d5,0x53f9025e,0x864453be,0x353c95b4,0xe408e3a0,0xd832f5bd,
21641         0x5b9ce99e,0x0404f68b },
21642       { 0xa781e8e5,0xcad33bde,0x163c2f5b,0x3cdf5018,0x0119caa3,0x57576960,
21643         0x0ac1c701,0x3a4263df } },
21644     /* 138 */
21645     { { 0x9aeb596d,0xc2965ecc,0x023c92b4,0x01ea03e7,0x2e013961,0x4704b4b6,
21646         0x905ea367,0x0ca8fd3f },
21647       { 0x551b2b61,0x92523a42,0x390fcd06,0x1eb7a89c,0x0392a63e,0xe7f1d2be,
21648         0x4ddb0c33,0x96dca264 } },
21649     /* 139 */
21650     { { 0x387510af,0x203bb43a,0xa9a36a01,0x846feaa8,0x2f950378,0xd23a5770,
21651         0x3aad59dc,0x4363e212 },
21652       { 0x40246a47,0xca43a1c7,0xe55dd24d,0xb362b8d2,0x5d8faf96,0xf9b08604,
21653         0xd8bb98c4,0x840e115c } },
21654     /* 140 */
21655     { { 0x1023e8a7,0xf12205e2,0xd8dc7a0b,0xc808a8cd,0x163a5ddf,0xe292a272,
21656         0x30ded6d4,0x5e0d6abd },
21657       { 0x7cfc0f64,0x07a721c2,0x0e55ed88,0x42eec01d,0x1d1f9db2,0x26a7bef9,
21658         0x2945a25a,0x7dea48f4 } },
21659     /* 141 */
21660     { { 0xe5060a81,0xabdf6f1c,0xf8f95615,0xe79f9c72,0x06ac268b,0xcfd36c54,
21661         0xebfd16d1,0xabc2a2be },
21662       { 0xd3e2eac7,0x8ac66f91,0xd2dd0466,0x6f10ba63,0x0282d31b,0x6790e377,
21663         0x6c7eefc1,0x4ea35394 } },
21664     /* 142 */
21665     { { 0x5266309d,0xed8a2f8d,0x81945a3e,0x0a51c6c0,0x578c5dc1,0xcecaf45a,
21666         0x1c94ffc3,0x3a76e689 },
21667       { 0x7d7b0d0f,0x9aace8a4,0x8f584a5f,0x963ace96,0x4e697fbe,0x51a30c72,
21668         0x465e6464,0x8212a10a } },
21669     /* 143 */
21670     { { 0xcfab8caa,0xef7c61c3,0x0e142390,0x18eb8e84,0x7e9733ca,0xcd1dff67,
21671         0x599cb164,0xaa7cab71 },
21672       { 0xbc837bd1,0x02fc9273,0xc36af5d7,0xc06407d0,0xf423da49,0x17621292,
21673         0xfe0617c3,0x40e38073 } },
21674     /* 144 */
21675     { { 0xa7bf9b7c,0xf4f80824,0x3fbe30d0,0x365d2320,0x97cf9ce3,0xbfbe5320,
21676         0xb3055526,0xe3604700 },
21677       { 0x6cc6c2c7,0x4dcb9911,0xba4cbee6,0x72683708,0x637ad9ec,0xdcded434,
21678         0xa3dee15f,0x6542d677 } },
21679     /* 145 */
21680     { { 0x7b6c377a,0x3f32b6d0,0x903448be,0x6cb03847,0x20da8af7,0xd6fdd3a8,
21681         0x09bb6f21,0xa6534aee },
21682       { 0x1035facf,0x30a1780d,0x9dcb47e6,0x35e55a33,0xc447f393,0x6ea50fe1,
21683         0xdc9aef22,0xf3cb672f } },
21684     /* 146 */
21685     { { 0x3b55fd83,0xeb3719fe,0x875ddd10,0xe0d7a46c,0x05cea784,0x33ac9fa9,
21686         0xaae870e7,0x7cafaa2e },
21687       { 0x1d53b338,0x9b814d04,0xef87e6c6,0xe0acc0a0,0x11672b0f,0xfb93d108,
21688         0xb9bd522e,0x0aab13c1 } },
21689     /* 147 */
21690     { { 0xd2681297,0xddcce278,0xb509546a,0xcb350eb1,0x7661aaf2,0x2dc43173,
21691         0x847012e9,0x4b91a602 },
21692       { 0x72f8ddcf,0xdcff1095,0x9a911af4,0x08ebf61e,0xc372430e,0x48f4360a,
21693         0x72321cab,0x49534c53 } },
21694     /* 148 */
21695     { { 0xf07b7e9d,0x83df7d71,0x13cd516f,0xa478efa3,0x6c047ee3,0x78ef264b,
21696         0xd65ac5ee,0xcaf46c4f },
21697       { 0x92aa8266,0xa04d0c77,0x913684bb,0xedf45466,0xae4b16b0,0x56e65168,
21698         0x04c6770f,0x14ce9e57 } },
21699     /* 149 */
21700     { { 0x965e8f91,0x99445e3e,0xcb0f2492,0xd3aca1ba,0x90c8a0a0,0xd31cc70f,
21701         0x3e4c9a71,0x1bb708a5 },
21702       { 0x558bdd7a,0xd5ca9e69,0x018a26b1,0x734a0508,0x4c9cf1ec,0xb093aa71,
21703         0xda300102,0xf9d126f2 } },
21704     /* 150 */
21705     { { 0xaff9563e,0x749bca7a,0xb49914a0,0xdd077afe,0xbf5f1671,0xe27a0311,
21706         0x729ecc69,0x807afcb9 },
21707       { 0xc9b08b77,0x7f8a9337,0x443c7e38,0x86c3a785,0x476fd8ba,0x85fafa59,
21708         0x6568cd8c,0x751adcd1 } },
21709     /* 151 */
21710     { { 0x10715c0d,0x8aea38b4,0x8f7697f7,0xd113ea71,0x93fbf06d,0x665eab14,
21711         0x2537743f,0x29ec4468 },
21712       { 0xb50bebbc,0x3d94719c,0xe4505422,0x399ee5bf,0x8d2dedb1,0x90cd5b3a,
21713         0x92a4077d,0xff9370e3 } },
21714     /* 152 */
21715     { { 0xc6b75b65,0x59a2d69b,0x266651c5,0x4188f8d5,0x3de9d7d2,0x28a9f33e,
21716         0xa2a9d01a,0x9776478b },
21717       { 0x929af2c7,0x8852622d,0x4e690923,0x334f5d6d,0xa89a51e9,0xce6cc7e5,
21718         0xac2f82fa,0x74a6313f } },
21719     /* 153 */
21720     { { 0xb75f079c,0xb2f4dfdd,0x18e36fbb,0x85b07c95,0xe7cd36dd,0x1b6cfcf0,
21721         0x0ff4863d,0xab75be15 },
21722       { 0x173fc9b7,0x81b367c0,0xd2594fd0,0xb90a7420,0xc4091236,0x15fdbf03,
21723         0x0b4459f6,0x4ebeac2e } },
21724     /* 154 */
21725     { { 0x5c9f2c53,0xeb6c5fe7,0x8eae9411,0xd2522011,0xf95ac5d8,0xc8887633,
21726         0x2c1baffc,0xdf99887b },
21727       { 0x850aaecb,0xbb78eed2,0x01d6a272,0x9d49181b,0xb1cdbcac,0x978dd511,
21728         0x779f4058,0x27b040a7 } },
21729     /* 155 */
21730     { { 0xf73b2eb2,0x90405db7,0x8e1b2118,0xe0df8508,0x5962327e,0x501b7152,
21731         0xe4cfa3f5,0xb393dd37 },
21732       { 0x3fd75165,0xa1230e7b,0xbcd33554,0xd66344c2,0x0f7b5022,0x6c36f1be,
21733         0xd0463419,0x09588c12 } },
21734     /* 156 */
21735     { { 0x02601c3b,0xe086093f,0xcf5c335f,0xfb0252f8,0x894aff28,0x955cf280,
21736         0xdb9f648b,0x81c879a9 },
21737       { 0xc6f56c51,0x040e687c,0x3f17618c,0xfed47169,0x9059353b,0x44f88a41,
21738         0x5fc11bc4,0xfa0d48f5 } },
21739     /* 157 */
21740     { { 0xe1608e4d,0xbc6e1c9d,0x3582822c,0x010dda11,0x157ec2d7,0xf6b7ddc1,
21741         0xb6a367d6,0x8ea0e156 },
21742       { 0x2383b3b4,0xa354e02f,0x3f01f53c,0x69966b94,0x2de03ca5,0x4ff6632b,
21743         0xfa00b5ac,0x3f5ab924 } },
21744     /* 158 */
21745     { { 0x59739efb,0x337bb0d9,0xe7ebec0d,0xc751b0f4,0x411a67d1,0x2da52dd6,
21746         0x2b74256e,0x8bc76887 },
21747       { 0x82d3d253,0xa5be3b72,0xf58d779f,0xa9f679a1,0xe16767bb,0xa1cac168,
21748         0x60fcf34f,0xb386f190 } },
21749     /* 159 */
21750     { { 0x2fedcfc2,0x31f3c135,0x62f8af0d,0x5396bf62,0xe57288c2,0x9a02b4ea,
21751         0x1b069c4d,0x4cb460f7 },
21752       { 0x5b8095ea,0xae67b4d3,0x6fc07603,0x92bbf859,0xb614a165,0xe1475f66,
21753         0x95ef5223,0x52c0d508 } },
21754     /* 160 */
21755     { { 0x15339848,0x231c210e,0x70778c8d,0xe87a28e8,0x6956e170,0x9d1de661,
21756         0x2bb09c0b,0x4ac3c938 },
21757       { 0x6998987d,0x19be0551,0xae09f4d6,0x8b2376c4,0x1a3f933d,0x1de0b765,
21758         0xe39705f4,0x380d94c7 } },
21759     /* 161 */
21760     { { 0x81542e75,0x01a355aa,0xee01b9b7,0x96c724a1,0x624d7087,0x6b3a2977,
21761         0xde2637af,0x2ce3e171 },
21762       { 0xf5d5bc1a,0xcfefeb49,0x2777e2b5,0xa655607e,0x9513756c,0x4feaac2f,
21763         0x0b624e4d,0x2e6cd852 } },
21764     /* 162 */
21765     { { 0x8c31c31d,0x3685954b,0x5bf21a0c,0x68533d00,0x75c79ec9,0x0bd7626e,
21766         0x42c69d54,0xca177547 },
21767       { 0xf6d2dbb2,0xcc6edaff,0x174a9d18,0xfd0d8cbd,0xaa4578e8,0x875e8793,
21768         0x9cab2ce6,0xa976a713 } },
21769     /* 163 */
21770     { { 0x93fb353d,0x0a651f1b,0x57fcfa72,0xd75cab8b,0x31b15281,0xaa88cfa7,
21771         0x0a1f4999,0x8720a717 },
21772       { 0x693e1b90,0x8c3e8d37,0x16f6dfc3,0xd345dc0b,0xb52a8742,0x8ea8d00a,
21773         0xc769893c,0x9719ef29 } },
21774     /* 164 */
21775     { { 0x58e35909,0x820eed8d,0x33ddc116,0x9366d8dc,0x6e205026,0xd7f999d0,
21776         0xe15704c1,0xa5072976 },
21777       { 0xc4e70b2e,0x002a37ea,0x6890aa8a,0x84dcf657,0x645b2a5c,0xcd71bf18,
21778         0xf7b77725,0x99389c9d } },
21779     /* 165 */
21780     { { 0x7ada7a4b,0x238c08f2,0xfd389366,0x3abe9d03,0x766f512c,0x6b672e89,
21781         0x202c82e4,0xa88806aa },
21782       { 0xd380184e,0x6602044a,0x126a8b85,0xa8cb78c4,0xad844f17,0x79d670c0,
21783         0x4738dcfe,0x0043bffb } },
21784     /* 166 */
21785     { { 0x36d5192e,0x8d59b5dc,0x4590b2af,0xacf885d3,0x11601781,0x83566d0a,
21786         0xba6c4866,0x52f3ef01 },
21787       { 0x0edcb64d,0x3986732a,0x8068379f,0x0a482c23,0x7040f309,0x16cbe5fa,
21788         0x9ef27e75,0x3296bd89 } },
21789     /* 167 */
21790     { { 0x454d81d7,0x476aba89,0x51eb9b3c,0x9eade7ef,0x81c57986,0x619a21cd,
21791         0xaee571e9,0x3b90febf },
21792       { 0x5496f7cb,0x9393023e,0x7fb51bc4,0x55be41d8,0x99beb5ce,0x03f1dd48,
21793         0x9f810b18,0x6e88069d } },
21794     /* 168 */
21795     { { 0xb43ea1db,0xce37ab11,0x5259d292,0x0a7ff1a9,0x8f84f186,0x851b0221,
21796         0xdefaad13,0xa7222bea },
21797       { 0x2b0a9144,0xa2ac78ec,0xf2fa59c5,0x5a024051,0x6147ce38,0x91d1eca5,
21798         0xbc2ac690,0xbe94d523 } },
21799     /* 169 */
21800     { { 0x0b226ce7,0x72f4945e,0x967e8b70,0xb8afd747,0x85a6c63e,0xedea46f1,
21801         0x9be8c766,0x7782defe },
21802       { 0x3db38626,0x760d2aa4,0x76f67ad1,0x460ae787,0x54499cdb,0x341b86fc,
21803         0xa2892e4b,0x03838567 } },
21804     /* 170 */
21805     { { 0x79ec1a0f,0x2d8daefd,0xceb39c97,0x3bbcd6fd,0x58f61a95,0xf5575ffc,
21806         0xadf7b420,0xdbd986c4 },
21807       { 0x15f39eb7,0x81aa8814,0xb98d976c,0x6ee2fcf5,0xcf2f717d,0x5465475d,
21808         0x6860bbd0,0x8e24d3c4 } },
21809     /* 171 */
21810     { { 0x9a587390,0x749d8e54,0x0cbec588,0x12bb194f,0xb25983c6,0x46e07da4,
21811         0x407bafc8,0x541a99c4 },
21812       { 0x624c8842,0xdb241692,0xd86c05ff,0x6044c12a,0x4f7fcf62,0xc59d14b4,
21813         0xf57d35d1,0xc0092c49 } },
21814     /* 172 */
21815     { { 0xdf2e61ef,0xd3cc75c3,0x2e1b35ca,0x7e8841c8,0x909f29f4,0xc62d30d1,
21816         0x7286944d,0x75e40634 },
21817       { 0xbbc237d0,0xe7d41fc5,0xec4f01c9,0xc9537bf0,0x282bd534,0x91c51a16,
21818         0xc7848586,0x5b7cb658 } },
21819     /* 173 */
21820     { { 0x8a28ead1,0x964a7084,0xfd3b47f6,0x802dc508,0x767e5b39,0x9ae4bfd1,
21821         0x8df097a1,0x7ae13eba },
21822       { 0xeadd384e,0xfd216ef8,0xb6b2ff06,0x0361a2d9,0x4bcdb5f3,0x204b9878,
21823         0xe2a8e3fd,0x787d8074 } },
21824     /* 174 */
21825     { { 0x757fbb1c,0xc5e25d6b,0xca201deb,0xe47bddb2,0x6d2233ff,0x4a55e9a3,
21826         0x9ef28484,0x5c222819 },
21827       { 0x88315250,0x773d4a85,0x827097c1,0x21b21a2b,0xdef5d33f,0xab7c4ea1,
21828         0xbaf0f2b0,0xe45d37ab } },
21829     /* 175 */
21830     { { 0x28511c8a,0xd2df1e34,0xbdca6cd3,0xebb229c8,0x627c39a7,0x578a71a7,
21831         0x84dfb9d3,0xed7bc122 },
21832       { 0x93dea561,0xcf22a6df,0xd48f0ed1,0x5443f18d,0x5bad23e8,0xd8b86140,
21833         0x45ca6d27,0xaac97cc9 } },
21834     /* 176 */
21835     { { 0xa16bd00a,0xeb54ea74,0xf5c0bcc1,0xd839e9ad,0x1f9bfc06,0x092bb7f1,
21836         0x1163dc4e,0x318f97b3 },
21837       { 0xc30d7138,0xecc0c5be,0xabc30220,0x44e8df23,0xb0223606,0x2bb7972f,
21838         0x9a84ff4d,0xfa41faa1 } },
21839     /* 177 */
21840     { { 0xa6642269,0x4402d974,0x9bb783bd,0xc81814ce,0x7941e60b,0x398d38e4,
21841         0x1d26e9e2,0x38bb6b2c },
21842       { 0x6a577f87,0xc64e4a25,0xdc11fe1c,0x8b52d253,0x62280728,0xff336abf,
21843         0xce7601a5,0x94dd0905 } },
21844     /* 178 */
21845     { { 0xde93f92a,0x156cf7dc,0x89b5f315,0xa01333cb,0xc995e750,0x02404df9,
21846         0xd25c2ae9,0x92077867 },
21847       { 0x0bf39d44,0xe2471e01,0x96bb53d7,0x5f2c9020,0x5c9c3d8f,0x4c44b7b3,
21848         0xd29beb51,0x81e8428b } },
21849     /* 179 */
21850     { { 0xc477199f,0x6dd9c2ba,0x6b5ecdd9,0x8cb8eeee,0xee40fd0e,0x8af7db3f,
21851         0xdbbfa4b1,0x1b94ab62 },
21852       { 0xce47f143,0x44f0d8b3,0x63f46163,0x51e623fc,0xcc599383,0xf18f270f,
21853         0x055590ee,0x06a38e28 } },
21854     /* 180 */
21855     { { 0xb3355b49,0x2e5b0139,0xb4ebf99b,0x20e26560,0xd269f3dc,0xc08ffa6b,
21856         0x83d9d4f8,0xa7b36c20 },
21857       { 0x1b3e8830,0x64d15c3a,0xa89f9c0b,0xd5fceae1,0xe2d16930,0xcfeee4a2,
21858         0xa2822a20,0xbe54c6b4 } },
21859     /* 181 */
21860     { { 0x8d91167c,0xd6cdb3df,0xe7a6625e,0x517c3f79,0x346ac7f4,0x7105648f,
21861         0xeae022bb,0xbf30a5ab },
21862       { 0x93828a68,0x8e7785be,0x7f3ef036,0x5161c332,0x592146b2,0xe11b5feb,
21863         0x2732d13a,0xd1c820de } },
21864     /* 182 */
21865     { { 0x9038b363,0x043e1347,0x6b05e519,0x58c11f54,0x6026cad1,0x4fe57abe,
21866         0x68a18da3,0xb7d17bed },
21867       { 0xe29c2559,0x44ca5891,0x5bfffd84,0x4f7a0376,0x74e46948,0x498de4af,
21868         0x6412cc64,0x3997fd5e } },
21869     /* 183 */
21870     { { 0x8bd61507,0xf2074682,0x34a64d2a,0x29e132d5,0x8a8a15e3,0xffeddfb0,
21871         0x3c6c13e8,0x0eeb8929 },
21872       { 0xa7e259f8,0xe9b69a3e,0xd13e7e67,0xce1db7e6,0xad1fa685,0x277318f6,
21873         0xc922b6ef,0x228916f8 } },
21874     /* 184 */
21875     { { 0x0a12ab5b,0x959ae25b,0x957bc136,0xcc11171f,0xd16e2b0c,0x8058429e,
21876         0x6e93097e,0xec05ad1d },
21877       { 0xac3f3708,0x157ba5be,0x30b59d77,0x31baf935,0x118234e5,0x47b55237,
21878         0x7ff11b37,0x7d314156 } },
21879     /* 185 */
21880     { { 0xf6dfefab,0x7bd9c05c,0xdcb37707,0xbe2f2268,0x3a38bb95,0xe53ead97,
21881         0x9bc1d7a3,0xe9ce66fc },
21882       { 0x6f6a02a1,0x75aa1576,0x60e600ed,0x38c087df,0x68cdc1b9,0xf8947f34,
21883         0x72280651,0xd9650b01 } },
21884     /* 186 */
21885     { { 0x5a057e60,0x504b4c4a,0x8def25e4,0xcbccc3be,0x17c1ccbd,0xa6353208,
21886         0x804eb7a2,0x14d6699a },
21887       { 0xdb1f411a,0x2c8a8415,0xf80d769c,0x09fbaf0b,0x1c2f77ad,0xb4deef90,
21888         0x0d43598a,0x6f4c6841 } },
21889     /* 187 */
21890     { { 0x96c24a96,0x8726df4e,0xfcbd99a3,0x534dbc85,0x8b2ae30a,0x3c466ef2,
21891         0x61189abb,0x4c4350fd },
21892       { 0xf855b8da,0x2967f716,0x463c38a1,0x41a42394,0xeae93343,0xc37e1413,
21893         0x5a3118b5,0xa726d242 } },
21894     /* 188 */
21895     { { 0x948c1086,0xdae6b3ee,0xcbd3a2e1,0xf1de503d,0x03d022f3,0x3f35ed3f,
21896         0xcc6cf392,0x13639e82 },
21897       { 0xcdafaa86,0x9ac938fb,0x2654a258,0xf45bc5fb,0x45051329,0x1963b26e,
21898         0xc1a335a3,0xca9365e1 } },
21899     /* 189 */
21900     { { 0x4c3b2d20,0x3615ac75,0x904e241b,0x742a5417,0xcc9d071d,0xb08521c4,
21901         0x970b72a5,0x9ce29c34 },
21902       { 0x6d3e0ad6,0x8cc81f73,0xf2f8434c,0x8060da9e,0x6ce862d9,0x35ed1d1a,
21903         0xab42af98,0x48c4abd7 } },
21904     /* 190 */
21905     { { 0x40c7485a,0xd221b0cc,0xe5274dbf,0xead455bb,0x9263d2e8,0x493c7698,
21906         0xf67b33cb,0x78017c32 },
21907       { 0x930cb5ee,0xb9d35769,0x0c408ed2,0xc0d14e94,0x272f1a4d,0xf8b7bf55,
21908         0xde5c1c04,0x53cd0454 } },
21909     /* 191 */
21910     { { 0x5d28ccac,0xbcd585fa,0x005b746e,0x5f823e56,0xcd0123aa,0x7c79f0a1,
21911         0xd3d7fa8f,0xeea465c1 },
21912       { 0x0551803b,0x7810659f,0x7ce6af70,0x6c0b599f,0x29288e70,0x4195a770,
21913         0x7ae69193,0x1b6e42a4 } },
21914     /* 192 */
21915     { { 0xf67d04c3,0x2e80937c,0x89eeb811,0x1e312be2,0x92594d60,0x56b5d887,
21916         0x187fbd3d,0x0224da14 },
21917       { 0x0c5fe36f,0x87abb863,0x4ef51f5f,0x580f3c60,0xb3b429ec,0x964fb1bf,
21918         0x42bfff33,0x60838ef0 } },
21919     /* 193 */
21920     { { 0x7e0bbe99,0x432cb2f2,0x04aa39ee,0x7bda44f3,0x9fa93903,0x5f497c7a,
21921         0x2d331643,0x636eb202 },
21922       { 0x93ae00aa,0xfcfd0e61,0x31ae6d2f,0x875a00fe,0x9f93901c,0xf43658a2,
21923         0x39218bac,0x8844eeb6 } },
21924     /* 194 */
21925     { { 0x6b3bae58,0x114171d2,0x17e39f3e,0x7db3df71,0x81a8eada,0xcd37bc7f,
21926         0x51fb789e,0x27ba83dc },
21927       { 0xfbf54de5,0xa7df439f,0xb5fe1a71,0x7277030b,0xdb297a48,0x42ee8e35,
21928         0x87f3a4ab,0xadb62d34 } },
21929     /* 195 */
21930     { { 0xa175df2a,0x9b1168a2,0x618c32e9,0x082aa04f,0x146b0916,0xc9e4f2e7,
21931         0x75e7c8b2,0xb990fd76 },
21932       { 0x4df37313,0x0829d96b,0xd0b40789,0x1c205579,0x78087711,0x66c9ae4a,
21933         0x4d10d18d,0x81707ef9 } },
21934     /* 196 */
21935     { { 0x03d6ff96,0x97d7cab2,0x0d843360,0x5b851bfc,0xd042db4b,0x268823c4,
21936         0xd5a8aa5c,0x3792daea },
21937       { 0x941afa0b,0x52818865,0x42d83671,0xf3e9e741,0x5be4e0a7,0x17c82527,
21938         0x94b001ba,0x5abd635e } },
21939     /* 197 */
21940     { { 0x0ac4927c,0x727fa84e,0xa7c8cf23,0xe3886035,0x4adca0df,0xa4bcd5ea,
21941         0x846ab610,0x5995bf21 },
21942       { 0x829dfa33,0xe90f860b,0x958fc18b,0xcaafe2ae,0x78630366,0x9b3baf44,
21943         0xd483411e,0x44c32ca2 } },
21944     /* 198 */
21945     { { 0xe40ed80c,0xa74a97f1,0x31d2ca82,0x5f938cb1,0x7c2d6ad9,0x53f2124b,
21946         0x8082a54c,0x1f2162fb },
21947       { 0x720b173e,0x7e467cc5,0x085f12f9,0x40e8a666,0x4c9d65dc,0x8cebc20e,
21948         0xc3e907c9,0x8f1d402b } },
21949     /* 199 */
21950     { { 0xfbc4058a,0x4f592f9c,0x292f5670,0xb15e14b6,0xbc1d8c57,0xc55cfe37,
21951         0x926edbf9,0xb1980f43 },
21952       { 0x32c76b09,0x98c33e09,0x33b07f78,0x1df5279d,0x863bb461,0x6f08ead4,
21953         0x37448e45,0x2828ad9b } },
21954     /* 200 */
21955     { { 0xc4cf4ac5,0x696722c4,0xdde64afb,0xf5ac1a3f,0xe0890832,0x0551baa2,
21956         0x5a14b390,0x4973f127 },
21957       { 0x322eac5d,0xe59d8335,0x0bd9b568,0x5e07eef5,0xa2588393,0xab36720f,
21958         0xdb168ac7,0x6dac8ed0 } },
21959     /* 201 */
21960     { { 0xeda835ef,0xf7b545ae,0x1d10ed51,0x4aa113d2,0x13741b09,0x035a65e0,
21961         0x20b9de4c,0x4b23ef59 },
21962       { 0x3c4c7341,0xe82bb680,0x3f58bc37,0xd457706d,0xa51e3ee8,0x73527863,
21963         0xddf49a4e,0x4dd71534 } },
21964     /* 202 */
21965     { { 0x95476cd9,0xbf944672,0xe31a725b,0x648d072f,0xfc4b67e0,0x1441c8b8,
21966         0x2f4a4dbb,0xfd317000 },
21967       { 0x8995d0e1,0x1cb43ff4,0x0ef729aa,0x76e695d1,0x41798982,0xe0d5f976,
21968         0x9569f365,0x14fac58c } },
21969     /* 203 */
21970     { { 0xf312ae18,0xad9a0065,0xfcc93fc9,0x51958dc0,0x8a7d2846,0xd9a14240,
21971         0x36abda50,0xed7c7651 },
21972       { 0x25d4abbc,0x46270f1a,0xf1a113ea,0x9b5dd8f3,0x5b51952f,0xc609b075,
21973         0x4d2e9f53,0xfefcb7f7 } },
21974     /* 204 */
21975     { { 0xba119185,0xbd09497a,0xaac45ba4,0xd54e8c30,0xaa521179,0x492479de,
21976         0x87e0d80b,0x1801a57e },
21977       { 0xfcafffb0,0x073d3f8d,0xae255240,0x6cf33c0b,0x5b5fdfbc,0x781d763b,
21978         0x1ead1064,0x9f8fc11e } },
21979     /* 205 */
21980     { { 0x5e69544c,0x1583a171,0xf04b7813,0x0eaf8567,0x278a4c32,0x1e22a8fd,
21981         0x3d3a69a9,0xa9d3809d },
21982       { 0x59a2da3b,0x936c2c2c,0x1895c847,0x38ccbcf6,0x63d50869,0x5e65244e,
21983         0xe1178ef7,0x3006b9ae } },
21984     /* 206 */
21985     { { 0xc9eead28,0x0bb1f2b0,0x89f4dfbc,0x7eef635d,0xb2ce8939,0x074757fd,
21986         0x45f8f761,0x0ab85fd7 },
21987       { 0x3e5b4549,0xecda7c93,0x97922f21,0x4be2bb5c,0xb43b8040,0x261a1274,
21988         0x11e942c2,0xb122d675 } },
21989     /* 207 */
21990     { { 0x66a5ae7a,0x3be607be,0x76adcbe3,0x01e703fa,0x4eb6e5c5,0xaf904301,
21991         0x097dbaec,0x9f599dc1 },
21992       { 0x0ff250ed,0x6d75b718,0x349a20dc,0x8eb91574,0x10b227a3,0x425605a4,
21993         0x8a294b78,0x7d5528e0 } },
21994     /* 208 */
21995     { { 0x20c26def,0xf0f58f66,0x582b2d1e,0x025585ea,0x01ce3881,0xfbe7d79b,
21996         0x303f1730,0x28ccea01 },
21997       { 0x79644ba5,0xd1dabcd1,0x06fff0b8,0x1fc643e8,0x66b3e17b,0xa60a76fc,
21998         0xa1d013bf,0xc18baf48 } },
21999     /* 209 */
22000     { { 0x5dc4216d,0x34e638c8,0x206142ac,0x00c01067,0x95f5064a,0xd453a171,
22001         0xb7a9596b,0x9def809d },
22002       { 0x67ab8d2c,0x41e8642e,0x6237a2b6,0xb4240433,0x64c4218b,0x7d506a6d,
22003         0x68808ce5,0x0357f8b0 } },
22004     /* 210 */
22005     { { 0x4cd2cc88,0x8e9dbe64,0xf0b8f39d,0xcc61c28d,0xcd30a0c8,0x4a309874,
22006         0x1b489887,0xe4a01add },
22007       { 0xf57cd8f9,0x2ed1eeac,0xbd594c48,0x1b767d3e,0x7bd2f787,0xa7295c71,
22008         0xce10cc30,0x466d7d79 } },
22009     /* 211 */
22010     { { 0x9dada2c7,0x47d31892,0x8f9aa27d,0x4fa0a6c3,0x820a59e1,0x90e4fd28,
22011         0x451ead1a,0xc672a522 },
22012       { 0x5d86b655,0x30607cc8,0xf9ad4af1,0xf0235d3b,0x571172a6,0x99a08680,
22013         0xf2a67513,0x5e3d64fa } },
22014     /* 212 */
22015     { { 0x9b3b4416,0xaa6410c7,0xeab26d99,0xcd8fcf85,0xdb656a74,0x5ebff74a,
22016         0xeb8e42fc,0x6c8a7a95 },
22017       { 0xb02a63bd,0x10c60ba7,0x8b8f0047,0x6b2f2303,0x312d90b0,0x8c6c3738,
22018         0xad82ca91,0x348ae422 } },
22019     /* 213 */
22020     { { 0x5ccda2fb,0x7f474663,0x8e0726d2,0x22accaa1,0x492b1f20,0x85adf782,
22021         0xd9ef2d2e,0xc1074de0 },
22022       { 0xae9a65b3,0xfcf3ce44,0x05d7151b,0xfd71e4ac,0xce6a9788,0xd4711f50,
22023         0xc9e54ffc,0xfbadfbdb } },
22024     /* 214 */
22025     { { 0x20a99363,0x1713f1cd,0x6cf22775,0xb915658f,0x24d359b2,0x968175cd,
22026         0x83716fcd,0xb7f976b4 },
22027       { 0x5d6dbf74,0x5758e24d,0x71c3af36,0x8d23bafd,0x0243dfe3,0x48f47760,
22028         0xcafcc805,0xf4d41b2e } },
22029     /* 215 */
22030     { { 0xfdabd48d,0x51f1cf28,0x32c078a4,0xce81be36,0x117146e9,0x6ace2974,
22031         0xe0160f10,0x180824ea },
22032       { 0x66e58358,0x0387698b,0xce6ca358,0x63568752,0x5e41e6c5,0x82380e34,
22033         0x83cf6d25,0x67e5f639 } },
22034     /* 216 */
22035     { { 0xcf4899ef,0xf89ccb8d,0x9ebb44c0,0x949015f0,0xb2598ec9,0x546f9276,
22036         0x04c11fc6,0x9fef789a },
22037       { 0x53d2a071,0x6d367ecf,0xa4519b09,0xb10e1a7f,0x611e2eef,0xca6b3fb0,
22038         0xa99c4e20,0xbc80c181 } },
22039     /* 217 */
22040     { { 0xe5eb82e6,0x972536f8,0xf56cb920,0x1a484fc7,0x50b5da5e,0xc78e2171,
22041         0x9f8cdf10,0x49270e62 },
22042       { 0xea6b50ad,0x1a39b7bb,0xa2388ffc,0x9a0284c1,0x8107197b,0x5403eb17,
22043         0x61372f7f,0xd2ee52f9 } },
22044     /* 218 */
22045     { { 0x88e0362a,0xd37cd285,0x8fa5d94d,0x442fa8a7,0xa434a526,0xaff836e5,
22046         0xe5abb733,0xdfb478be },
22047       { 0x673eede6,0xa91f1ce7,0x2b5b2f04,0xa5390ad4,0x5530da2f,0x5e66f7bf,
22048         0x08df473a,0xd9a140b4 } },
22049     /* 219 */
22050     { { 0x6e8ea498,0x0e0221b5,0x3563ee09,0x62347829,0x335d2ade,0xe06b8391,
22051         0x623f4b1a,0x760c058d },
22052       { 0xc198aa79,0x0b89b58c,0xf07aba7f,0xf74890d2,0xfde2556a,0x4e204110,
22053         0x8f190409,0x7141982d } },
22054     /* 220 */
22055     { { 0x4d4b0f45,0x6f0a0e33,0x392a94e1,0xd9280b38,0xb3c61d5e,0x3af324c6,
22056         0x89d54e47,0x3af9d1ce },
22057       { 0x20930371,0xfd8f7981,0x21c17097,0xeda2664c,0xdc42309b,0x0e9545dc,
22058         0x73957dd6,0xb1f815c3 } },
22059     /* 221 */
22060     { { 0x89fec44a,0x84faa78e,0x3caa4caf,0xc8c2ae47,0xc1b6a624,0x691c807d,
22061         0x1543f052,0xa41aed14 },
22062       { 0x7d5ffe04,0x42435399,0x625b6e20,0x8bacb2df,0x87817775,0x85d660be,
22063         0x86fb60ef,0xd6e9c1dd } },
22064     /* 222 */
22065     { { 0xc6853264,0x3aa2e97e,0xe2304a0b,0x771533b7,0xb8eae9be,0x1b912bb7,
22066         0xae9bf8c2,0x9c9c6e10 },
22067       { 0xe030b74c,0xa2309a59,0x6a631e90,0x4ed7494d,0xa49b79f2,0x89f44b23,
22068         0x40fa61b6,0x566bd596 } },
22069     /* 223 */
22070     { { 0xc18061f3,0x066c0118,0x7c83fc70,0x190b25d3,0x27273245,0xf05fc8e0,
22071         0xf525345e,0xcf2c7390 },
22072       { 0x10eb30cf,0xa09bceb4,0x0d77703a,0xcfd2ebba,0x150ff255,0xe842c43a,
22073         0x8aa20979,0x02f51755 } },
22074     /* 224 */
22075     { { 0xaddb7d07,0x396ef794,0x24455500,0x0b4fc742,0xc78aa3ce,0xfaff8eac,
22076         0xe8d4d97d,0x14e9ada5 },
22077       { 0x2f7079e2,0xdaa480a1,0xe4b0800e,0x45baa3cd,0x7838157d,0x01765e2d,
22078         0x8e9d9ae8,0xa0ad4fab } },
22079     /* 225 */
22080     { { 0x4a653618,0x0bfb7621,0x31eaaa5f,0x1872813c,0x44949d5e,0x1553e737,
22081         0x6e56ed1e,0xbcd530b8 },
22082       { 0x32e9c47b,0x169be853,0xb50059ab,0xdc2776fe,0x192bfbb4,0xcdba9761,
22083         0x6979341d,0x909283cf } },
22084     /* 226 */
22085     { { 0x76e81a13,0x67b00324,0x62171239,0x9bee1a99,0xd32e19d6,0x08ed361b,
22086         0xace1549a,0x35eeb7c9 },
22087       { 0x7e4e5bdc,0x1280ae5a,0xb6ceec6e,0x2dcd2cd3,0x6e266bc1,0x52e4224c,
22088         0x448ae864,0x9a8b2cf4 } },
22089     /* 227 */
22090     { { 0x09d03b59,0xf6471bf2,0xb65af2ab,0xc90e62a3,0xebd5eec9,0xff7ff168,
22091         0xd4491379,0x6bdb60f4 },
22092       { 0x8a55bc30,0xdadafebc,0x10097fe0,0xc79ead16,0x4c1e3bdd,0x42e19741,
22093         0x94ba08a9,0x01ec3cfd } },
22094     /* 228 */
22095     { { 0xdc9485c2,0xba6277eb,0x22fb10c7,0x48cc9a79,0x70a28d8a,0x4f61d60f,
22096         0x475464f6,0xd1acb1c0 },
22097       { 0x26f36612,0xd26902b1,0xe0618d8b,0x59c3a44e,0x308357ee,0x4df8a813,
22098         0x405626c2,0x7dcd079d } },
22099     /* 229 */
22100     { { 0xf05a4b48,0x5ce7d4d3,0x37230772,0xadcd2952,0x812a915a,0xd18f7971,
22101         0x377d19b8,0x0bf53589 },
22102       { 0x6c68ea73,0x35ecd95a,0x823a584d,0xc7f3bbca,0xf473a723,0x9fb674c6,
22103         0xe16686fc,0xd28be4d9 } },
22104     /* 230 */
22105     { { 0x38fa8e4b,0x5d2b9906,0x893fd8fc,0x559f186e,0x436fb6fc,0x3a6de2aa,
22106         0x510f88ce,0xd76007aa },
22107       { 0x523a4988,0x2d10aab6,0x74dd0273,0xb455cf44,0xa3407278,0x7f467082,
22108         0xb303bb01,0xf2b52f68 } },
22109     /* 231 */
22110     { { 0x9835b4ca,0x0d57eafa,0xbb669cbc,0x2d2232fc,0xc6643198,0x8eeeb680,
22111         0xcc5aed3a,0xd8dbe98e },
22112       { 0xc5a02709,0xcba9be3f,0xf5ba1fa8,0x30be68e5,0xf10ea852,0xfebd43cd,
22113         0xee559705,0xe01593a3 } },
22114     /* 232 */
22115     { { 0xea75a0a6,0xd3e5af50,0x57858033,0x512226ac,0xd0176406,0x6fe6d50f,
22116         0xaeb8ef06,0xafec07b1 },
22117       { 0x80bb0a31,0x7fb99567,0x37309aae,0x6f1af3cc,0x01abf389,0x9153a15a,
22118         0x6e2dbfdd,0xa71b9354 } },
22119     /* 233 */
22120     { { 0x18f593d2,0xbf8e12e0,0xa078122b,0xd1a90428,0x0ba4f2ad,0x150505db,
22121         0x628523d9,0x53a2005c },
22122       { 0xe7f2b935,0x07c8b639,0xc182961a,0x2bff975a,0x7518ca2c,0x86bceea7,
22123         0x3d588e3d,0xbf47d19b } },
22124     /* 234 */
22125     { { 0xdd7665d5,0x672967a7,0x2f2f4de5,0x4e303057,0x80d4903f,0x144005ae,
22126         0x39c9a1b6,0x001c2c7f },
22127       { 0x69efc6d6,0x143a8014,0x7bc7a724,0xc810bdaa,0xa78150a4,0x5f65670b,
22128         0x86ffb99b,0xfdadf8e7 } },
22129     /* 235 */
22130     { { 0xffc00785,0xfd38cb88,0x3b48eb67,0x77fa7591,0xbf368fbc,0x0454d055,
22131         0x5aa43c94,0x3a838e4d },
22132       { 0x3e97bb9a,0x56166329,0x441d94d9,0x9eb93363,0x0adb2a83,0x515591a6,
22133         0x873e1da3,0x3cdb8257 } },
22134     /* 236 */
22135     { { 0x7de77eab,0x137140a9,0x41648109,0xf7e1c50d,0xceb1d0df,0x762dcad2,
22136         0xf1f57fba,0x5a60cc89 },
22137       { 0x40d45673,0x80b36382,0x5913c655,0x1b82be19,0xdd64b741,0x057284b8,
22138         0xdbfd8fc0,0x922ff56f } },
22139     /* 237 */
22140     { { 0xc9a129a1,0x1b265dee,0xcc284e04,0xa5b1ce57,0xcebfbe3c,0x04380c46,
22141         0xf6c5cd62,0x72919a7d },
22142       { 0x8fb90f9a,0x298f453a,0x88e4031b,0xd719c00b,0x796f1856,0xe32c0e77,
22143         0x3624089a,0x5e791780 } },
22144     /* 238 */
22145     { { 0x7f63cdfb,0x5c16ec55,0xf1cae4fd,0x8e6a3571,0x560597ca,0xfce26bea,
22146         0xe24c2fab,0x4e0a5371 },
22147       { 0xa5765357,0x276a40d3,0x0d73a2b4,0x3c89af44,0x41d11a32,0xb8f370ae,
22148         0xd56604ee,0xf5ff7818 } },
22149     /* 239 */
22150     { { 0x1a09df21,0xfbf3e3fe,0xe66e8e47,0x26d5d28e,0x29c89015,0x2096bd0a,
22151         0x533f5e64,0xe41df0e9 },
22152       { 0xb3ba9e3f,0x305fda40,0x2604d895,0xf2340ceb,0x7f0367c7,0x0866e192,
22153         0xac4f155f,0x8edd7d6e } },
22154     /* 240 */
22155     { { 0x0bfc8ff3,0xc9a1dc0e,0xe936f42f,0x14efd82b,0xcca381ef,0x67016f7c,
22156         0xed8aee96,0x1432c1ca },
22157       { 0x70b23c26,0xec684829,0x0735b273,0xa64fe873,0xeaef0f5a,0xe389f6e5,
22158         0x5ac8d2c6,0xcaef480b } },
22159     /* 241 */
22160     { { 0x75315922,0x5245c978,0x3063cca5,0xd8295171,0xb64ef2cb,0xf3ce60d0,
22161         0x8efae236,0xd0ba177e },
22162       { 0xb1b3af60,0x53a9ae8f,0x3d2da20e,0x1a796ae5,0xdf9eef28,0x01d63605,
22163         0x1c54ae16,0xf31c957c } },
22164     /* 242 */
22165     { { 0x49cc4597,0xc0f58d52,0xbae0a028,0xdc5015b0,0x734a814a,0xefc5fc55,
22166         0x96e17c3a,0x013404cb },
22167       { 0xc9a824bf,0xb29e2585,0x001eaed7,0xd593185e,0x61ef68ac,0x8d6ee682,
22168         0x91933e6c,0x6f377c4b } },
22169     /* 243 */
22170     { { 0xa8333fd2,0x9f93bad1,0x5a2a95b8,0xa8930202,0xeaf75ace,0x211e5037,
22171         0xd2d09506,0x6dba3e4e },
22172       { 0xd04399cd,0xa48ef98c,0xe6b73ade,0x1811c66e,0xc17ecaf3,0x72f60752,
22173         0x3becf4a7,0xf13cf342 } },
22174     /* 244 */
22175     { { 0xa919e2eb,0xceeb9ec0,0xf62c0f68,0x83a9a195,0x7aba2299,0xcfba3bb6,
22176         0x274bbad3,0xc83fa9a9 },
22177       { 0x62fa1ce0,0x0d7d1b0b,0x3418efbf,0xe58b60f5,0x52706f04,0xbfa8ef9e,
22178         0x5d702683,0xb49d70f4 } },
22179     /* 245 */
22180     { { 0xfad5513b,0x914c7510,0xb1751e2d,0x05f32eec,0xd9fb9d59,0x6d850418,
22181         0x0c30f1cf,0x59cfadbb },
22182       { 0x55cb7fd6,0xe167ac23,0x820426a3,0x249367b8,0x90a78864,0xeaeec58c,
22183         0x354a4b67,0x5babf362 } },
22184     /* 246 */
22185     { { 0xee424865,0x37c981d1,0xf2e5577f,0x8b002878,0xb9e0c058,0x702970f1,
22186         0x9026c8f0,0x6188c6a7 },
22187       { 0xd0f244da,0x06f9a19b,0xfb080873,0x1ecced5c,0x9f213637,0x35470f9b,
22188         0xdf50b9d9,0x993fe475 } },
22189     /* 247 */
22190     { { 0x9b2c3609,0x68e31cdf,0x2c46d4ea,0x84eb19c0,0x9a775101,0x7ac9ec1a,
22191         0x4c80616b,0x81f76466 },
22192       { 0x75fbe978,0x1d7c2a5a,0xf183b356,0x6743fed3,0x501dd2bf,0x838d1f04,
22193         0x5fe9060d,0x564a812a } },
22194     /* 248 */
22195     { { 0xfa817d1d,0x7a5a64f4,0xbea82e0f,0x55f96844,0xcd57f9aa,0xb5ff5a0f,
22196         0x00e51d6c,0x226bf3cf },
22197       { 0x2f2833cf,0xd6d1a9f9,0x4f4f89a8,0x20a0a35a,0x8f3f7f77,0x11536c49,
22198         0xff257836,0x68779f47 } },
22199     /* 249 */
22200     { { 0x73043d08,0x79b0c1c1,0x1fc020fa,0xa5446774,0x9a6d26d0,0xd3767e28,
22201         0xeb092e0b,0x97bcb0d1 },
22202       { 0xf32ed3c3,0x2ab6eaa8,0xb281bc48,0xc8a4f151,0xbfa178f3,0x4d1bf4f3,
22203         0x0a784655,0xa872ffe8 } },
22204     /* 250 */
22205     { { 0xa32b2086,0xb1ab7935,0x8160f486,0xe1eb710e,0x3b6ae6be,0x9bd0cd91,
22206         0xb732a36a,0x02812bfc },
22207       { 0xcf605318,0xa63fd7ca,0xfdfd6d1d,0x646e5d50,0x2102d619,0xa1d68398,
22208         0xfe5396af,0x07391cc9 } },
22209     /* 251 */
22210     { { 0x8b80d02b,0xc50157f0,0x62877f7f,0x6b8333d1,0x78d542ae,0x7aca1af8,
22211         0x7e6d2a08,0x355d2adc },
22212       { 0x287386e1,0xb41f335a,0xf8e43275,0xfd272a94,0xe79989ea,0x286ca2cd,
22213         0x7c2a3a79,0x3dc2b1e3 } },
22214     /* 252 */
22215     { { 0x04581352,0xd689d21c,0x376782be,0x0a00c825,0x9fed701f,0x203bd590,
22216         0x3ccd846b,0xc4786910 },
22217       { 0x24c768ed,0x5dba7708,0x6841f657,0x72feea02,0x6accce0e,0x73313ed5,
22218         0xd5bb4d32,0xccc42968 } },
22219     /* 253 */
22220     { { 0x3d7620b9,0x94e50de1,0x5992a56a,0xd89a5c8a,0x675487c9,0xdc007640,
22221         0xaa4871cf,0xe147eb42 },
22222       { 0xacf3ae46,0x274ab4ee,0x50350fbe,0xfd4936fb,0x48c840ea,0xdf2afe47,
22223         0x080e96e3,0x239ac047 } },
22224     /* 254 */
22225     { { 0x2bfee8d4,0x481d1f35,0xfa7b0fec,0xce80b5cf,0x2ce9af3c,0x105c4c9e,
22226         0xf5f7e59d,0xc55fa1a3 },
22227       { 0x8257c227,0x3186f14e,0x342be00b,0xc5b1653f,0xaa904fb2,0x09afc998,
22228         0xd4f4b699,0x094cd99c } },
22229     /* 255 */
22230     { { 0xd703beba,0x8a981c84,0x32ceb291,0x8631d150,0xe3bd49ec,0xa445f2c9,
22231         0x42abad33,0xb90a30b6 },
22232       { 0xb4a5abf9,0xb465404f,0x75db7603,0x004750c3,0xca35d89f,0x6f9a42cc,
22233         0x1b7924f7,0x019f8b9a } },
22234 };
22235 
22236 /* Multiply the base point of P256 by the scalar and return the result.
22237  * If map is true then convert result to affine coordinates.
22238  *
22239  * Stripe implementation.
22240  * Pre-generated: 2^0, 2^32, ...
22241  * Pre-generated: products of all combinations of above.
22242  * 8 doubles and adds (with qz=1)
22243  *
22244  * r     Resulting point.
22245  * k     Scalar to multiply by.
22246  * map   Indicates whether to convert result to affine.
22247  * ct    Constant time required.
22248  * heap  Heap to use for allocation.
22249  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
22250  */
sp_256_ecc_mulmod_base_8(sp_point_256 * r,const sp_digit * k,int map,int ct,void * heap)22251 static int sp_256_ecc_mulmod_base_8(sp_point_256* r, const sp_digit* k,
22252         int map, int ct, void* heap)
22253 {
22254     return sp_256_ecc_mulmod_stripe_8(r, &p256_base, p256_table,
22255                                       k, map, ct, heap);
22256 }
22257 
22258 #endif
22259 
22260 /* Multiply the base point of P256 by the scalar and return the result.
22261  * If map is true then convert result to affine coordinates.
22262  *
22263  * km    Scalar to multiply by.
22264  * r     Resulting point.
22265  * map   Indicates whether to convert result to affine.
22266  * heap  Heap to use for allocation.
22267  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
22268  */
sp_ecc_mulmod_base_256(const mp_int * km,ecc_point * r,int map,void * heap)22269 int sp_ecc_mulmod_base_256(const mp_int* km, ecc_point* r, int map, void* heap)
22270 {
22271 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22272     sp_point_256* point = NULL;
22273     sp_digit* k = NULL;
22274 #else
22275     sp_point_256  point[1];
22276     sp_digit k[8];
22277 #endif
22278     int err = MP_OKAY;
22279 
22280 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22281     point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap,
22282                                          DYNAMIC_TYPE_ECC);
22283     if (point == NULL)
22284         err = MEMORY_E;
22285     if (err == MP_OKAY) {
22286         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
22287                                DYNAMIC_TYPE_ECC);
22288         if (k == NULL)
22289             err = MEMORY_E;
22290     }
22291 #endif
22292 
22293     if (err == MP_OKAY) {
22294         sp_256_from_mp(k, 8, km);
22295 
22296             err = sp_256_ecc_mulmod_base_8(point, k, map, 1, heap);
22297     }
22298     if (err == MP_OKAY) {
22299         err = sp_256_point_to_ecc_point_8(point, r);
22300     }
22301 
22302 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22303     if (k != NULL)
22304         XFREE(k, heap, DYNAMIC_TYPE_ECC);
22305     if (point != NULL)
22306         XFREE(point, heap, DYNAMIC_TYPE_ECC);
22307 #endif
22308 
22309     return err;
22310 }
22311 
22312 /* Multiply the base point of P256 by the scalar, add point a and return
22313  * the result. If map is true then convert result to affine coordinates.
22314  *
22315  * km      Scalar to multiply by.
22316  * am      Point to add to scalar mulitply result.
22317  * inMont  Point to add is in montgomery form.
22318  * r       Resulting point.
22319  * map     Indicates whether to convert result to affine.
22320  * heap    Heap to use for allocation.
22321  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
22322  */
sp_ecc_mulmod_base_add_256(const mp_int * km,const ecc_point * am,int inMont,ecc_point * r,int map,void * heap)22323 int sp_ecc_mulmod_base_add_256(const mp_int* km, const ecc_point* am,
22324         int inMont, ecc_point* r, int map, void* heap)
22325 {
22326 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22327     sp_point_256* point = NULL;
22328     sp_digit* k = NULL;
22329 #else
22330     sp_point_256 point[2];
22331     sp_digit k[8 + 8 * 2 * 5];
22332 #endif
22333     sp_point_256* addP = NULL;
22334     sp_digit* tmp = NULL;
22335     int err = MP_OKAY;
22336 
22337 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22338     point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap,
22339                                          DYNAMIC_TYPE_ECC);
22340     if (point == NULL)
22341         err = MEMORY_E;
22342     if (err == MP_OKAY) {
22343         k = (sp_digit*)XMALLOC(
22344             sizeof(sp_digit) * (8 + 8 * 2 * 5),
22345             heap, DYNAMIC_TYPE_ECC);
22346         if (k == NULL)
22347             err = MEMORY_E;
22348     }
22349 #endif
22350 
22351     if (err == MP_OKAY) {
22352         addP = point + 1;
22353         tmp = k + 8;
22354 
22355         sp_256_from_mp(k, 8, km);
22356         sp_256_point_from_ecc_point_8(addP, am);
22357     }
22358     if ((err == MP_OKAY) && (!inMont)) {
22359         err = sp_256_mod_mul_norm_8(addP->x, addP->x, p256_mod);
22360     }
22361     if ((err == MP_OKAY) && (!inMont)) {
22362         err = sp_256_mod_mul_norm_8(addP->y, addP->y, p256_mod);
22363     }
22364     if ((err == MP_OKAY) && (!inMont)) {
22365         err = sp_256_mod_mul_norm_8(addP->z, addP->z, p256_mod);
22366     }
22367     if (err == MP_OKAY) {
22368             err = sp_256_ecc_mulmod_base_8(point, k, 0, 0, heap);
22369     }
22370     if (err == MP_OKAY) {
22371             sp_256_proj_point_add_8(point, point, addP, tmp);
22372 
22373         if (map) {
22374                 sp_256_map_8(point, point, tmp);
22375         }
22376 
22377         err = sp_256_point_to_ecc_point_8(point, r);
22378     }
22379 
22380 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22381     if (k != NULL)
22382         XFREE(k, heap, DYNAMIC_TYPE_ECC);
22383     if (point)
22384         XFREE(point, heap, DYNAMIC_TYPE_ECC);
22385 #endif
22386 
22387     return err;
22388 }
22389 
22390 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
22391                                                         defined(HAVE_ECC_VERIFY)
22392 /* Returns 1 if the number of zero.
22393  * Implementation is constant time.
22394  *
22395  * a  Number to check.
22396  * returns 1 if the number is zero and 0 otherwise.
22397  */
sp_256_iszero_8(const sp_digit * a)22398 static int sp_256_iszero_8(const sp_digit* a)
22399 {
22400     return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]) == 0;
22401 }
22402 
22403 #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */
22404 /* Add 1 to a. (a = a + 1)
22405  *
22406  * a  A single precision integer.
22407  */
sp_256_add_one_8(sp_digit * a)22408 SP_NOINLINE static void sp_256_add_one_8(sp_digit* a)
22409 {
22410     __asm__ __volatile__ (
22411         "mov	r2, #1\n\t"
22412         "ldr	r1, [%[a], #0]\n\t"
22413         "adds	r1, r1, r2\n\t"
22414         "mov	r2, #0\n\t"
22415         "str	r1, [%[a], #0]\n\t"
22416         "ldr	r1, [%[a], #4]\n\t"
22417         "adcs	r1, r1, r2\n\t"
22418         "str	r1, [%[a], #4]\n\t"
22419         "ldr	r1, [%[a], #8]\n\t"
22420         "adcs	r1, r1, r2\n\t"
22421         "str	r1, [%[a], #8]\n\t"
22422         "ldr	r1, [%[a], #12]\n\t"
22423         "adcs	r1, r1, r2\n\t"
22424         "str	r1, [%[a], #12]\n\t"
22425         "ldr	r1, [%[a], #16]\n\t"
22426         "adcs	r1, r1, r2\n\t"
22427         "str	r1, [%[a], #16]\n\t"
22428         "ldr	r1, [%[a], #20]\n\t"
22429         "adcs	r1, r1, r2\n\t"
22430         "str	r1, [%[a], #20]\n\t"
22431         "ldr	r1, [%[a], #24]\n\t"
22432         "adcs	r1, r1, r2\n\t"
22433         "str	r1, [%[a], #24]\n\t"
22434         "ldr	r1, [%[a], #28]\n\t"
22435         "adcs	r1, r1, r2\n\t"
22436         "str	r1, [%[a], #28]\n\t"
22437         :
22438         : [a] "r" (a)
22439         : "memory", "r1", "r2"
22440     );
22441 }
22442 
22443 /* Read big endian unsigned byte array into r.
22444  *
22445  * r  A single precision integer.
22446  * size  Maximum number of bytes to convert
22447  * a  Byte array.
22448  * n  Number of bytes in array to read.
22449  */
sp_256_from_bin(sp_digit * r,int size,const byte * a,int n)22450 static void sp_256_from_bin(sp_digit* r, int size, const byte* a, int n)
22451 {
22452     int i;
22453     int j = 0;
22454     word32 s = 0;
22455 
22456     r[0] = 0;
22457     for (i = n-1; i >= 0; i--) {
22458         r[j] |= (((sp_digit)a[i]) << s);
22459         if (s >= 24U) {
22460             r[j] &= 0xffffffff;
22461             s = 32U - s;
22462             if (j + 1 >= size) {
22463                 break;
22464             }
22465             r[++j] = (sp_digit)a[i] >> s;
22466             s = 8U - s;
22467         }
22468         else {
22469             s += 8U;
22470         }
22471     }
22472 
22473     for (j++; j < size; j++) {
22474         r[j] = 0;
22475     }
22476 }
22477 
22478 /* Generates a scalar that is in the range 1..order-1.
22479  *
22480  * rng  Random number generator.
22481  * k    Scalar value.
22482  * returns RNG failures, MEMORY_E when memory allocation fails and
22483  * MP_OKAY on success.
22484  */
sp_256_ecc_gen_k_8(WC_RNG * rng,sp_digit * k)22485 static int sp_256_ecc_gen_k_8(WC_RNG* rng, sp_digit* k)
22486 {
22487     int err;
22488     byte buf[32];
22489 
22490     do {
22491         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
22492         if (err == 0) {
22493             sp_256_from_bin(k, 8, buf, (int)sizeof(buf));
22494             if (sp_256_cmp_8(k, p256_order2) <= 0) {
22495                 sp_256_add_one_8(k);
22496                 break;
22497             }
22498         }
22499     }
22500     while (err == 0);
22501 
22502     return err;
22503 }
22504 
22505 /* Makes a random EC key pair.
22506  *
22507  * rng   Random number generator.
22508  * priv  Generated private value.
22509  * pub   Generated public point.
22510  * heap  Heap to use for allocation.
22511  * returns ECC_INF_E when the point does not have the correct order, RNG
22512  * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
22513  */
sp_ecc_make_key_256(WC_RNG * rng,mp_int * priv,ecc_point * pub,void * heap)22514 int sp_ecc_make_key_256(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
22515 {
22516 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22517     sp_point_256* point = NULL;
22518     sp_digit* k = NULL;
22519 #else
22520     #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22521     sp_point_256 point[2];
22522     #else
22523     sp_point_256 point[1];
22524     #endif
22525     sp_digit k[8];
22526 #endif
22527 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22528     sp_point_256* infinity = NULL;
22529 #endif
22530     int err = MP_OKAY;
22531 
22532 
22533     (void)heap;
22534 
22535 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22536     #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22537     point = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap, DYNAMIC_TYPE_ECC);
22538     #else
22539     point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap, DYNAMIC_TYPE_ECC);
22540     #endif
22541     if (point == NULL)
22542         err = MEMORY_E;
22543     if (err == MP_OKAY) {
22544         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
22545                                DYNAMIC_TYPE_ECC);
22546         if (k == NULL)
22547             err = MEMORY_E;
22548     }
22549 #endif
22550 
22551     if (err == MP_OKAY) {
22552     #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22553         infinity = point + 1;
22554     #endif
22555 
22556         err = sp_256_ecc_gen_k_8(rng, k);
22557     }
22558     if (err == MP_OKAY) {
22559             err = sp_256_ecc_mulmod_base_8(point, k, 1, 1, NULL);
22560     }
22561 
22562 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
22563     if (err == MP_OKAY) {
22564             err = sp_256_ecc_mulmod_8(infinity, point, p256_order, 1, 1, NULL);
22565     }
22566     if (err == MP_OKAY) {
22567         if (sp_256_iszero_8(point->x) || sp_256_iszero_8(point->y)) {
22568             err = ECC_INF_E;
22569         }
22570     }
22571 #endif
22572 
22573     if (err == MP_OKAY) {
22574         err = sp_256_to_mp(k, priv);
22575     }
22576     if (err == MP_OKAY) {
22577         err = sp_256_point_to_ecc_point_8(point, pub);
22578     }
22579 
22580 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22581     if (k != NULL)
22582         XFREE(k, heap, DYNAMIC_TYPE_ECC);
22583     if (point != NULL) {
22584         /* point is not sensitive, so no need to zeroize */
22585         XFREE(point, heap, DYNAMIC_TYPE_ECC);
22586     }
22587 #endif
22588 
22589     return err;
22590 }
22591 
22592 #ifdef HAVE_ECC_DHE
22593 /* Write r as big endian to byte array.
22594  * Fixed length number of bytes written: 32
22595  *
22596  * r  A single precision integer.
22597  * a  Byte array.
22598  */
sp_256_to_bin_8(sp_digit * r,byte * a)22599 static void sp_256_to_bin_8(sp_digit* r, byte* a)
22600 {
22601     int i;
22602     int j;
22603     int s = 0;
22604     int b;
22605 
22606     j = 256 / 8 - 1;
22607     a[j] = 0;
22608     for (i=0; i<8 && j>=0; i++) {
22609         b = 0;
22610         /* lint allow cast of mismatch sp_digit and int */
22611         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
22612         b += 8 - s;
22613         if (j < 0) {
22614             break;
22615         }
22616         while (b < 32) {
22617             a[j--] = (byte)(r[i] >> b);
22618             b += 8;
22619             if (j < 0) {
22620                 break;
22621             }
22622         }
22623         s = 8 - (b - 32);
22624         if (j >= 0) {
22625             a[j] = 0;
22626         }
22627         if (s != 0) {
22628             j++;
22629         }
22630     }
22631 }
22632 
22633 /* Multiply the point by the scalar and serialize the X ordinate.
22634  * The number is 0 padded to maximum size on output.
22635  *
22636  * priv    Scalar to multiply the point by.
22637  * pub     Point to multiply.
22638  * out     Buffer to hold X ordinate.
22639  * outLen  On entry, size of the buffer in bytes.
22640  *         On exit, length of data in buffer in bytes.
22641  * heap    Heap to use for allocation.
22642  * returns BUFFER_E if the buffer is to small for output size,
22643  * MEMORY_E when memory allocation fails and MP_OKAY on success.
22644  */
sp_ecc_secret_gen_256(const mp_int * priv,const ecc_point * pub,byte * out,word32 * outLen,void * heap)22645 int sp_ecc_secret_gen_256(const mp_int* priv, const ecc_point* pub, byte* out,
22646                           word32* outLen, void* heap)
22647 {
22648 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22649     sp_point_256* point = NULL;
22650     sp_digit* k = NULL;
22651 #else
22652     sp_point_256 point[1];
22653     sp_digit k[8];
22654 #endif
22655     int err = MP_OKAY;
22656 
22657     if (*outLen < 32U) {
22658         err = BUFFER_E;
22659     }
22660 
22661 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22662     if (err == MP_OKAY) {
22663         point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap,
22664                                          DYNAMIC_TYPE_ECC);
22665         if (point == NULL)
22666             err = MEMORY_E;
22667     }
22668     if (err == MP_OKAY) {
22669         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
22670                                DYNAMIC_TYPE_ECC);
22671         if (k == NULL)
22672             err = MEMORY_E;
22673     }
22674 #endif
22675 
22676     if (err == MP_OKAY) {
22677         sp_256_from_mp(k, 8, priv);
22678         sp_256_point_from_ecc_point_8(point, pub);
22679             err = sp_256_ecc_mulmod_8(point, point, k, 1, 1, heap);
22680     }
22681     if (err == MP_OKAY) {
22682         sp_256_to_bin_8(point->x, out);
22683         *outLen = 32;
22684     }
22685 
22686 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
22687     if (k != NULL)
22688         XFREE(k, heap, DYNAMIC_TYPE_ECC);
22689     if (point != NULL)
22690         XFREE(point, heap, DYNAMIC_TYPE_ECC);
22691 #endif
22692 
22693     return err;
22694 }
22695 #endif /* HAVE_ECC_DHE */
22696 
22697 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
22698 #endif
22699 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
22700 #ifdef WOLFSSL_SP_SMALL
22701 /* Sub b from a into a. (a -= b)
22702  *
22703  * a  A single precision integer.
22704  * b  A single precision integer.
22705  */
sp_256_sub_in_place_8(sp_digit * a,const sp_digit * b)22706 SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
22707         const sp_digit* b)
22708 {
22709     sp_digit c = 0;
22710     __asm__ __volatile__ (
22711         "mov	r8, %[a]\n\t"
22712         "add	r8, r8, #32\n\t"
22713         "\n1:\n\t"
22714         "mov	r5, #0\n\t"
22715         "subs	r5, r5, %[c]\n\t"
22716         "ldr	r3, [%[a]]\n\t"
22717         "ldr	r4, [%[a], #4]\n\t"
22718         "ldr	r5, [%[b]]\n\t"
22719         "ldr	r6, [%[b], #4]\n\t"
22720         "sbcs	r3, r3, r5\n\t"
22721         "sbcs	r4, r4, r6\n\t"
22722         "str	r3, [%[a]]\n\t"
22723         "str	r4, [%[a], #4]\n\t"
22724         "sbc	%[c], %[c], %[c]\n\t"
22725         "add	%[a], %[a], #8\n\t"
22726         "add	%[b], %[b], #8\n\t"
22727         "cmp	%[a], r8\n\t"
22728 #ifdef __GNUC__
22729         "bne	1b\n\t"
22730 #else
22731         "bne.n	1b\n\t"
22732 #endif /* __GNUC__ */
22733         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
22734         :
22735         : "memory", "r3", "r4", "r5", "r6", "r8"
22736     );
22737 
22738     return c;
22739 }
22740 
22741 #else
22742 /* Sub b from a into r. (r = a - b)
22743  *
22744  * r  A single precision integer.
22745  * a  A single precision integer.
22746  * b  A single precision integer.
22747  */
sp_256_sub_in_place_8(sp_digit * a,const sp_digit * b)22748 SP_NOINLINE static sp_digit sp_256_sub_in_place_8(sp_digit* a,
22749         const sp_digit* b)
22750 {
22751     sp_digit c = 0;
22752 
22753     __asm__ __volatile__ (
22754         "ldm	%[a], {r3, r4}\n\t"
22755         "ldm	%[b]!, {r5, r6}\n\t"
22756         "subs	r3, r3, r5\n\t"
22757         "sbcs	r4, r4, r6\n\t"
22758         "stm	%[a]!, {r3, r4}\n\t"
22759         "ldm	%[a], {r3, r4}\n\t"
22760         "ldm	%[b]!, {r5, r6}\n\t"
22761         "sbcs	r3, r3, r5\n\t"
22762         "sbcs	r4, r4, r6\n\t"
22763         "stm	%[a]!, {r3, r4}\n\t"
22764         "ldm	%[a], {r3, r4}\n\t"
22765         "ldm	%[b]!, {r5, r6}\n\t"
22766         "sbcs	r3, r3, r5\n\t"
22767         "sbcs	r4, r4, r6\n\t"
22768         "stm	%[a]!, {r3, r4}\n\t"
22769         "ldm	%[a], {r3, r4}\n\t"
22770         "ldm	%[b]!, {r5, r6}\n\t"
22771         "sbcs	r3, r3, r5\n\t"
22772         "sbcs	r4, r4, r6\n\t"
22773         "stm	%[a]!, {r3, r4}\n\t"
22774         "sbc	%[c], %[c], %[c]\n\t"
22775         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
22776         :
22777         : "memory", "r3", "r4", "r5", "r6"
22778     );
22779 
22780     return c;
22781 }
22782 
22783 #endif /* WOLFSSL_SP_SMALL */
22784 /* Mul a by digit b into r. (r = a * b)
22785  *
22786  * r  A single precision integer.
22787  * a  A single precision integer.
22788  * b  A single precision digit.
22789  */
sp_256_mul_d_8(sp_digit * r,const sp_digit * a,sp_digit b)22790 SP_NOINLINE static void sp_256_mul_d_8(sp_digit* r, const sp_digit* a,
22791         sp_digit b)
22792 {
22793     __asm__ __volatile__ (
22794         "add	r9, %[a], #32\n\t"
22795         /* A[0] * B */
22796         "ldr	r6, [%[a]], #4\n\t"
22797         "umull	r5, r3, r6, %[b]\n\t"
22798         "mov	r4, #0\n\t"
22799         "str	r5, [%[r]], #4\n\t"
22800         /* A[0] * B - Done */
22801         "\n1:\n\t"
22802         "mov	r5, #0\n\t"
22803         /* A[] * B */
22804         "ldr	r6, [%[a]], #4\n\t"
22805         "umull	r6, r8, r6, %[b]\n\t"
22806         "adds	r3, r3, r6\n\t"
22807         "adcs 	r4, r4, r8\n\t"
22808         "adc	r5, r5, #0\n\t"
22809         /* A[] * B - Done */
22810         "str	r3, [%[r]], #4\n\t"
22811         "mov	r3, r4\n\t"
22812         "mov	r4, r5\n\t"
22813         "cmp	%[a], r9\n\t"
22814 #ifdef __GNUC__
22815         "blt	1b\n\t"
22816 #else
22817         "blt.n	1b\n\t"
22818 #endif /* __GNUC__ */
22819         "str	r3, [%[r]]\n\t"
22820         : [r] "+r" (r), [a] "+r" (a)
22821         : [b] "r" (b)
22822         : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
22823     );
22824 }
22825 
22826 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
22827  *
22828  * d1   The high order half of the number to divide.
22829  * d0   The low order half of the number to divide.
22830  * div  The dividend.
22831  * returns the result of the division.
22832  *
22833  * Note that this is an approximate div. It may give an answer 1 larger.
22834  */
div_256_word_8(sp_digit d1,sp_digit d0,sp_digit div)22835 SP_NOINLINE static sp_digit div_256_word_8(sp_digit d1, sp_digit d0,
22836         sp_digit div)
22837 {
22838     sp_digit r = 0;
22839 
22840     __asm__ __volatile__ (
22841         "lsr	r6, %[div], #16\n\t"
22842         "add	r6, r6, #1\n\t"
22843         "udiv	r4, %[d1], r6\n\t"
22844         "lsl	r8, r4, #16\n\t"
22845         "umull	r4, r5, %[div], r8\n\t"
22846         "subs	%[d0], %[d0], r4\n\t"
22847         "sbc	%[d1], %[d1], r5\n\t"
22848         "udiv	r5, %[d1], r6\n\t"
22849         "lsl	r4, r5, #16\n\t"
22850         "add	r8, r8, r4\n\t"
22851         "umull	r4, r5, %[div], r4\n\t"
22852         "subs	%[d0], %[d0], r4\n\t"
22853         "sbc	%[d1], %[d1], r5\n\t"
22854         "lsl	r4, %[d1], #16\n\t"
22855         "orr	r4, r4, %[d0], lsr #16\n\t"
22856         "udiv	r4, r4, r6\n\t"
22857         "add	r8, r8, r4\n\t"
22858         "umull	r4, r5, %[div], r4\n\t"
22859         "subs	%[d0], %[d0], r4\n\t"
22860         "sbc	%[d1], %[d1], r5\n\t"
22861         "lsl	r4, %[d1], #16\n\t"
22862         "orr	r4, r4, %[d0], lsr #16\n\t"
22863         "udiv	r4, r4, r6\n\t"
22864         "add	r8, r8, r4\n\t"
22865         "umull	r4, r5, %[div], r4\n\t"
22866         "subs	%[d0], %[d0], r4\n\t"
22867         "sbc	%[d1], %[d1], r5\n\t"
22868         "udiv	r4, %[d0], %[div]\n\t"
22869         "add	r8, r8, r4\n\t"
22870         "mov	%[r], r8\n\t"
22871         : [r] "+r" (r)
22872         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
22873         : "r4", "r5", "r6", "r8"
22874     );
22875     return r;
22876 }
22877 
22878 /* AND m into each word of a and store in r.
22879  *
22880  * r  A single precision integer.
22881  * a  A single precision integer.
22882  * m  Mask to AND against each digit.
22883  */
sp_256_mask_8(sp_digit * r,const sp_digit * a,sp_digit m)22884 static void sp_256_mask_8(sp_digit* r, const sp_digit* a, sp_digit m)
22885 {
22886 #ifdef WOLFSSL_SP_SMALL
22887     int i;
22888 
22889     for (i=0; i<8; i++) {
22890         r[i] = a[i] & m;
22891     }
22892 #else
22893     r[0] = a[0] & m;
22894     r[1] = a[1] & m;
22895     r[2] = a[2] & m;
22896     r[3] = a[3] & m;
22897     r[4] = a[4] & m;
22898     r[5] = a[5] & m;
22899     r[6] = a[6] & m;
22900     r[7] = a[7] & m;
22901 #endif
22902 }
22903 
22904 /* Divide d in a and put remainder into r (m*d + r = a)
22905  * m is not calculated as it is not needed at this time.
22906  *
22907  * a  Number to be divided.
22908  * d  Number to divide with.
22909  * m  Multiplier result.
22910  * r  Remainder from the division.
22911  * returns MP_OKAY indicating success.
22912  */
sp_256_div_8(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)22913 static WC_INLINE int sp_256_div_8(const sp_digit* a, const sp_digit* d, sp_digit* m,
22914         sp_digit* r)
22915 {
22916     sp_digit t1[16], t2[9];
22917     sp_digit div, r1;
22918     int i;
22919 
22920     (void)m;
22921 
22922     div = d[7];
22923     XMEMCPY(t1, a, sizeof(*t1) * 2 * 8);
22924     for (i=7; i>=0; i--) {
22925         sp_digit hi = t1[8 + i] - (t1[8 + i] == div);
22926         r1 = div_256_word_8(hi, t1[8 + i - 1], div);
22927 
22928         sp_256_mul_d_8(t2, d, r1);
22929         t1[8 + i] += sp_256_sub_in_place_8(&t1[i], t2);
22930         t1[8 + i] -= t2[8];
22931         sp_256_mask_8(t2, d, t1[8 + i]);
22932         t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
22933         sp_256_mask_8(t2, d, t1[8 + i]);
22934         t1[8 + i] += sp_256_add_8(&t1[i], &t1[i], t2);
22935     }
22936 
22937     r1 = sp_256_cmp_8(t1, d) >= 0;
22938     sp_256_cond_sub_8(r, t1, d, (sp_digit)0 - r1);
22939 
22940     return MP_OKAY;
22941 }
22942 
22943 /* Reduce a modulo m into r. (r = a mod m)
22944  *
22945  * r  A single precision number that is the reduced result.
22946  * a  A single precision number that is to be reduced.
22947  * m  A single precision number that is the modulus to reduce with.
22948  * returns MP_OKAY indicating success.
22949  */
sp_256_mod_8(sp_digit * r,const sp_digit * a,const sp_digit * m)22950 static WC_INLINE int sp_256_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
22951 {
22952     return sp_256_div_8(a, m, NULL, r);
22953 }
22954 
22955 #endif
22956 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
22957 /* Multiply two number mod the order of P256 curve. (r = a * b mod order)
22958  *
22959  * r  Result of the multiplication.
22960  * a  First operand of the multiplication.
22961  * b  Second operand of the multiplication.
22962  */
sp_256_mont_mul_order_8(sp_digit * r,const sp_digit * a,const sp_digit * b)22963 static void sp_256_mont_mul_order_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
22964 {
22965     sp_256_mul_8(r, a, b);
22966     sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
22967 }
22968 
22969 #if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL))
22970 #ifdef WOLFSSL_SP_SMALL
22971 /* Order-2 for the P256 curve. */
22972 static const uint32_t p256_order_minus_2[8] = {
22973     0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU,0xffffffffU,0xffffffffU,
22974     0x00000000U,0xffffffffU
22975 };
22976 #else
22977 /* The low half of the order-2 of the P256 curve. */
22978 static const sp_int_digit p256_order_low[4] = {
22979     0xfc63254fU,0xf3b9cac2U,0xa7179e84U,0xbce6faadU
22980 };
22981 #endif /* WOLFSSL_SP_SMALL */
22982 
22983 /* Square number mod the order of P256 curve. (r = a * a mod order)
22984  *
22985  * r  Result of the squaring.
22986  * a  Number to square.
22987  */
sp_256_mont_sqr_order_8(sp_digit * r,const sp_digit * a)22988 static void sp_256_mont_sqr_order_8(sp_digit* r, const sp_digit* a)
22989 {
22990     sp_256_sqr_8(r, a);
22991     sp_256_mont_reduce_order_8(r, p256_order, p256_mp_order);
22992 }
22993 
22994 #ifndef WOLFSSL_SP_SMALL
22995 /* Square number mod the order of P256 curve a number of times.
22996  * (r = a ^ n mod order)
22997  *
22998  * r  Result of the squaring.
22999  * a  Number to square.
23000  */
sp_256_mont_sqr_n_order_8(sp_digit * r,const sp_digit * a,int n)23001 static void sp_256_mont_sqr_n_order_8(sp_digit* r, const sp_digit* a, int n)
23002 {
23003     int i;
23004 
23005     sp_256_mont_sqr_order_8(r, a);
23006     for (i=1; i<n; i++) {
23007         sp_256_mont_sqr_order_8(r, r);
23008     }
23009 }
23010 #endif /* !WOLFSSL_SP_SMALL */
23011 
23012 /* Invert the number, in Montgomery form, modulo the order of the P256 curve.
23013  * (r = 1 / a mod order)
23014  *
23015  * r   Inverse result.
23016  * a   Number to invert.
23017  * td  Temporary data.
23018  */
23019 
23020 #ifdef WOLFSSL_SP_NONBLOCK
23021 typedef struct sp_256_mont_inv_order_8_ctx {
23022     int state;
23023     int i;
23024 } sp_256_mont_inv_order_8_ctx;
sp_256_mont_inv_order_8_nb(sp_ecc_ctx_t * sp_ctx,sp_digit * r,const sp_digit * a,sp_digit * t)23025 static int sp_256_mont_inv_order_8_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const sp_digit* a,
23026         sp_digit* t)
23027 {
23028     int err = FP_WOULDBLOCK;
23029     sp_256_mont_inv_order_8_ctx* ctx = (sp_256_mont_inv_order_8_ctx*)sp_ctx;
23030 
23031     typedef char ctx_size_test[sizeof(sp_256_mont_inv_order_8_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
23032     (void)sizeof(ctx_size_test);
23033 
23034     switch (ctx->state) {
23035     case 0:
23036         XMEMCPY(t, a, sizeof(sp_digit) * 8);
23037         ctx->i = 254;
23038         ctx->state = 1;
23039         break;
23040     case 1:
23041         sp_256_mont_sqr_order_8(t, t);
23042         ctx->state = 2;
23043         break;
23044     case 2:
23045         if ((p256_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
23046             sp_256_mont_mul_order_8(t, t, a);
23047         }
23048         ctx->i--;
23049         ctx->state = (ctx->i == 0) ? 3 : 1;
23050         break;
23051     case 3:
23052         XMEMCPY(r, t, sizeof(sp_digit) * 8U);
23053         err = MP_OKAY;
23054         break;
23055     }
23056     return err;
23057 }
23058 #endif /* WOLFSSL_SP_NONBLOCK */
23059 
sp_256_mont_inv_order_8(sp_digit * r,const sp_digit * a,sp_digit * td)23060 static void sp_256_mont_inv_order_8(sp_digit* r, const sp_digit* a,
23061         sp_digit* td)
23062 {
23063 #ifdef WOLFSSL_SP_SMALL
23064     sp_digit* t = td;
23065     int i;
23066 
23067     XMEMCPY(t, a, sizeof(sp_digit) * 8);
23068     for (i=254; i>=0; i--) {
23069         sp_256_mont_sqr_order_8(t, t);
23070         if ((p256_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
23071             sp_256_mont_mul_order_8(t, t, a);
23072         }
23073     }
23074     XMEMCPY(r, t, sizeof(sp_digit) * 8U);
23075 #else
23076     sp_digit* t = td;
23077     sp_digit* t2 = td + 2 * 8;
23078     sp_digit* t3 = td + 4 * 8;
23079     int i;
23080 
23081     /* t = a^2 */
23082     sp_256_mont_sqr_order_8(t, a);
23083     /* t = a^3 = t * a */
23084     sp_256_mont_mul_order_8(t, t, a);
23085     /* t2= a^c = t ^ 2 ^ 2 */
23086     sp_256_mont_sqr_n_order_8(t2, t, 2);
23087     /* t3= a^f = t2 * t */
23088     sp_256_mont_mul_order_8(t3, t2, t);
23089     /* t2= a^f0 = t3 ^ 2 ^ 4 */
23090     sp_256_mont_sqr_n_order_8(t2, t3, 4);
23091     /* t = a^ff = t2 * t3 */
23092     sp_256_mont_mul_order_8(t, t2, t3);
23093     /* t3= a^ff00 = t ^ 2 ^ 8 */
23094     sp_256_mont_sqr_n_order_8(t2, t, 8);
23095     /* t = a^ffff = t2 * t */
23096     sp_256_mont_mul_order_8(t, t2, t);
23097     /* t2= a^ffff0000 = t ^ 2 ^ 16 */
23098     sp_256_mont_sqr_n_order_8(t2, t, 16);
23099     /* t = a^ffffffff = t2 * t */
23100     sp_256_mont_mul_order_8(t, t2, t);
23101     /* t2= a^ffffffff0000000000000000 = t ^ 2 ^ 64  */
23102     sp_256_mont_sqr_n_order_8(t2, t, 64);
23103     /* t2= a^ffffffff00000000ffffffff = t2 * t */
23104     sp_256_mont_mul_order_8(t2, t2, t);
23105     /* t2= a^ffffffff00000000ffffffff00000000 = t2 ^ 2 ^ 32  */
23106     sp_256_mont_sqr_n_order_8(t2, t2, 32);
23107     /* t2= a^ffffffff00000000ffffffffffffffff = t2 * t */
23108     sp_256_mont_mul_order_8(t2, t2, t);
23109     /* t2= a^ffffffff00000000ffffffffffffffffbce6 */
23110     for (i=127; i>=112; i--) {
23111         sp_256_mont_sqr_order_8(t2, t2);
23112         if ((p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
23113             sp_256_mont_mul_order_8(t2, t2, a);
23114         }
23115     }
23116     /* t2= a^ffffffff00000000ffffffffffffffffbce6f */
23117     sp_256_mont_sqr_n_order_8(t2, t2, 4);
23118     sp_256_mont_mul_order_8(t2, t2, t3);
23119     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84 */
23120     for (i=107; i>=64; i--) {
23121         sp_256_mont_sqr_order_8(t2, t2);
23122         if ((p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
23123             sp_256_mont_mul_order_8(t2, t2, a);
23124         }
23125     }
23126     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f */
23127     sp_256_mont_sqr_n_order_8(t2, t2, 4);
23128     sp_256_mont_mul_order_8(t2, t2, t3);
23129     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2 */
23130     for (i=59; i>=32; i--) {
23131         sp_256_mont_sqr_order_8(t2, t2);
23132         if ((p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
23133             sp_256_mont_mul_order_8(t2, t2, a);
23134         }
23135     }
23136     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2f */
23137     sp_256_mont_sqr_n_order_8(t2, t2, 4);
23138     sp_256_mont_mul_order_8(t2, t2, t3);
23139     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254 */
23140     for (i=27; i>=0; i--) {
23141         sp_256_mont_sqr_order_8(t2, t2);
23142         if ((p256_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
23143             sp_256_mont_mul_order_8(t2, t2, a);
23144         }
23145     }
23146     /* t2= a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632540 */
23147     sp_256_mont_sqr_n_order_8(t2, t2, 4);
23148     /* r = a^ffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc63254f */
23149     sp_256_mont_mul_order_8(r, t2, t3);
23150 #endif /* WOLFSSL_SP_SMALL */
23151 }
23152 
23153 #endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */
23154 #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */
23155 #ifdef HAVE_ECC_SIGN
23156 #ifndef SP_ECC_MAX_SIG_GEN
23157 #define SP_ECC_MAX_SIG_GEN  64
23158 #endif
23159 
23160 /* Calculate second signature value S from R, k and private value.
23161  *
23162  * s = (r * x + e) / k
23163  *
23164  * s    Signature value.
23165  * r    First signature value.
23166  * k    Ephemeral private key.
23167  * x    Private key as a number.
23168  * e    Hash of message as a number.
23169  * tmp  Temporary storage for intermediate numbers.
23170  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
23171  */
sp_256_calc_s_8(sp_digit * s,const sp_digit * r,sp_digit * k,sp_digit * x,const sp_digit * e,sp_digit * tmp)23172 static int sp_256_calc_s_8(sp_digit* s, const sp_digit* r, sp_digit* k,
23173     sp_digit* x, const sp_digit* e, sp_digit* tmp)
23174 {
23175     int err;
23176     sp_digit carry;
23177     sp_int32 c;
23178     sp_digit* kInv = k;
23179 
23180     /* Conv k to Montgomery form (mod order) */
23181         sp_256_mul_8(k, k, p256_norm_order);
23182     err = sp_256_mod_8(k, k, p256_order);
23183     if (err == MP_OKAY) {
23184         sp_256_norm_8(k);
23185 
23186         /* kInv = 1/k mod order */
23187             sp_256_mont_inv_order_8(kInv, k, tmp);
23188         sp_256_norm_8(kInv);
23189 
23190         /* s = r * x + e */
23191             sp_256_mul_8(x, x, r);
23192         err = sp_256_mod_8(x, x, p256_order);
23193     }
23194     if (err == MP_OKAY) {
23195         sp_256_norm_8(x);
23196         carry = sp_256_add_8(s, e, x);
23197         sp_256_cond_sub_8(s, s, p256_order, 0 - carry);
23198         sp_256_norm_8(s);
23199         c = sp_256_cmp_8(s, p256_order);
23200         sp_256_cond_sub_8(s, s, p256_order,
23201             (sp_digit)0 - (sp_digit)(c >= 0));
23202         sp_256_norm_8(s);
23203 
23204         /* s = s * k^-1 mod order */
23205             sp_256_mont_mul_order_8(s, s, kInv);
23206         sp_256_norm_8(s);
23207     }
23208 
23209     return err;
23210 }
23211 
23212 /* Sign the hash using the private key.
23213  *   e = [hash, 256 bits] from binary
23214  *   r = (k.G)->x mod order
23215  *   s = (r * x + e) / k mod order
23216  * The hash is truncated to the first 256 bits.
23217  *
23218  * hash     Hash to sign.
23219  * hashLen  Length of the hash data.
23220  * rng      Random number generator.
23221  * priv     Private part of key - scalar.
23222  * rm       First part of result as an mp_int.
23223  * sm       Sirst part of result as an mp_int.
23224  * heap     Heap to use for allocation.
23225  * returns RNG failures, MEMORY_E when memory allocation fails and
23226  * MP_OKAY on success.
23227  */
23228 #ifdef WOLFSSL_SP_NONBLOCK
23229 typedef struct sp_ecc_sign_256_ctx {
23230     int state;
23231     union {
23232         sp_256_ecc_mulmod_8_ctx mulmod_ctx;
23233         sp_256_mont_inv_order_8_ctx mont_inv_order_ctx;
23234     };
23235     sp_digit e[2*8];
23236     sp_digit x[2*8];
23237     sp_digit k[2*8];
23238     sp_digit r[2*8];
23239     sp_digit tmp[3 * 2*8];
23240     sp_point_256 point;
23241     sp_digit* s;
23242     sp_digit* kInv;
23243     int i;
23244 } sp_ecc_sign_256_ctx;
23245 
sp_ecc_sign_256_nb(sp_ecc_ctx_t * sp_ctx,const byte * hash,word32 hashLen,WC_RNG * rng,mp_int * priv,mp_int * rm,mp_int * sm,mp_int * km,void * heap)23246 int sp_ecc_sign_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, WC_RNG* rng,
23247     mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap)
23248 {
23249     int err = FP_WOULDBLOCK;
23250     sp_ecc_sign_256_ctx* ctx = (sp_ecc_sign_256_ctx*)sp_ctx->data;
23251 
23252     typedef char ctx_size_test[sizeof(sp_ecc_sign_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
23253     (void)sizeof(ctx_size_test);
23254 
23255     (void)heap;
23256 
23257     switch (ctx->state) {
23258     case 0: /* INIT */
23259         ctx->s = ctx->e;
23260         ctx->kInv = ctx->k;
23261         if (hashLen > 32U) {
23262             hashLen = 32U;
23263         }
23264 
23265         ctx->i = SP_ECC_MAX_SIG_GEN;
23266         ctx->state = 1;
23267         break;
23268     case 1: /* GEN */
23269         /* New random point. */
23270         if (km == NULL || mp_iszero(km)) {
23271             err = sp_256_ecc_gen_k_8(rng, ctx->k);
23272         }
23273         else {
23274             sp_256_from_mp(ctx->k, 8, km);
23275             mp_zero(km);
23276         }
23277         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
23278         ctx->state = 2;
23279         break;
23280     case 2: /* MULMOD */
23281         err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx,
23282             &ctx->point, &p256_base, ctx->k, 1, 1, heap);
23283         if (err == MP_OKAY) {
23284             ctx->state = 3;
23285         }
23286         break;
23287     case 3: /* MODORDER */
23288     {
23289         sp_int32 c;
23290         /* r = point->x mod order */
23291         XMEMCPY(ctx->r, ctx->point.x, sizeof(sp_digit) * 8U);
23292         sp_256_norm_8(ctx->r);
23293         c = sp_256_cmp_8(ctx->r, p256_order);
23294         sp_256_cond_sub_8(ctx->r, ctx->r, p256_order,
23295             (sp_digit)0 - (sp_digit)(c >= 0));
23296         sp_256_norm_8(ctx->r);
23297 
23298         sp_256_from_mp(ctx->x, 8, priv);
23299         sp_256_from_bin(ctx->e, 8, hash, (int)hashLen);
23300         ctx->state = 4;
23301         break;
23302     }
23303     case 4: /* KMODORDER */
23304         /* Conv k to Montgomery form (mod order) */
23305         sp_256_mul_8(ctx->k, ctx->k, p256_norm_order);
23306         err = sp_256_mod_8(ctx->k, ctx->k, p256_order);
23307         if (err == MP_OKAY) {
23308             sp_256_norm_8(ctx->k);
23309             XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
23310             ctx->state = 5;
23311         }
23312         break;
23313     case 5: /* KINV */
23314         /* kInv = 1/k mod order */
23315         err = sp_256_mont_inv_order_8_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->kInv, ctx->k, ctx->tmp);
23316         if (err == MP_OKAY) {
23317             XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
23318             ctx->state = 6;
23319         }
23320         break;
23321     case 6: /* KINVNORM */
23322         sp_256_norm_8(ctx->kInv);
23323         ctx->state = 7;
23324         break;
23325     case 7: /* R */
23326         /* s = r * x + e */
23327         sp_256_mul_8(ctx->x, ctx->x, ctx->r);
23328         ctx->state = 8;
23329         break;
23330     case 8: /* S1 */
23331         err = sp_256_mod_8(ctx->x, ctx->x, p256_order);
23332         if (err == MP_OKAY)
23333             ctx->state = 9;
23334         break;
23335     case 9: /* S2 */
23336     {
23337         sp_digit carry;
23338         sp_int32 c;
23339         sp_256_norm_8(ctx->x);
23340         carry = sp_256_add_8(ctx->s, ctx->e, ctx->x);
23341         sp_256_cond_sub_8(ctx->s, ctx->s,
23342             p256_order, 0 - carry);
23343         sp_256_norm_8(ctx->s);
23344         c = sp_256_cmp_8(ctx->s, p256_order);
23345         sp_256_cond_sub_8(ctx->s, ctx->s, p256_order,
23346             (sp_digit)0 - (sp_digit)(c >= 0));
23347         sp_256_norm_8(ctx->s);
23348 
23349         /* s = s * k^-1 mod order */
23350         sp_256_mont_mul_order_8(ctx->s, ctx->s, ctx->kInv);
23351         sp_256_norm_8(ctx->s);
23352 
23353         /* Check that signature is usable. */
23354         if (sp_256_iszero_8(ctx->s) == 0) {
23355             ctx->state = 10;
23356             break;
23357         }
23358     #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP
23359         ctx->i = 1;
23360     #endif
23361 
23362         /* not usable gen, try again */
23363         ctx->i--;
23364         if (ctx->i == 0) {
23365             err = RNG_FAILURE_E;
23366         }
23367         ctx->state = 1;
23368         break;
23369     }
23370     case 10: /* RES */
23371         err = sp_256_to_mp(ctx->r, rm);
23372         if (err == MP_OKAY) {
23373             err = sp_256_to_mp(ctx->s, sm);
23374         }
23375         break;
23376     }
23377 
23378     if (err == MP_OKAY && ctx->state != 10) {
23379         err = FP_WOULDBLOCK;
23380     }
23381     if (err != FP_WOULDBLOCK) {
23382         XMEMSET(ctx->e, 0, sizeof(sp_digit) * 2U * 8U);
23383         XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 8U);
23384         XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 8U);
23385         XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 8U);
23386         XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 8U);
23387     }
23388 
23389     return err;
23390 }
23391 #endif /* WOLFSSL_SP_NONBLOCK */
23392 
sp_ecc_sign_256(const byte * hash,word32 hashLen,WC_RNG * rng,const mp_int * priv,mp_int * rm,mp_int * sm,mp_int * km,void * heap)23393 int sp_ecc_sign_256(const byte* hash, word32 hashLen, WC_RNG* rng,
23394     const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap)
23395 {
23396 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
23397     sp_digit* e = NULL;
23398     sp_point_256* point = NULL;
23399 #else
23400     sp_digit e[7 * 2 * 8];
23401     sp_point_256 point[1];
23402 #endif
23403     sp_digit* x = NULL;
23404     sp_digit* k = NULL;
23405     sp_digit* r = NULL;
23406     sp_digit* tmp = NULL;
23407     sp_digit* s = NULL;
23408     sp_int32 c;
23409     int err = MP_OKAY;
23410     int i;
23411 
23412     (void)heap;
23413 
23414 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
23415     if (err == MP_OKAY) {
23416         point = (sp_point_256*)XMALLOC(sizeof(sp_point_256), heap,
23417                                              DYNAMIC_TYPE_ECC);
23418         if (point == NULL)
23419             err = MEMORY_E;
23420     }
23421     if (err == MP_OKAY) {
23422         e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 8, heap,
23423                                DYNAMIC_TYPE_ECC);
23424         if (e == NULL)
23425             err = MEMORY_E;
23426     }
23427 #endif
23428 
23429     if (err == MP_OKAY) {
23430         x = e + 2 * 8;
23431         k = e + 4 * 8;
23432         r = e + 6 * 8;
23433         tmp = e + 8 * 8;
23434         s = e;
23435 
23436         if (hashLen > 32U) {
23437             hashLen = 32U;
23438         }
23439     }
23440 
23441     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
23442         /* New random point. */
23443         if (km == NULL || mp_iszero(km)) {
23444             err = sp_256_ecc_gen_k_8(rng, k);
23445         }
23446         else {
23447             sp_256_from_mp(k, 8, km);
23448             mp_zero(km);
23449         }
23450         if (err == MP_OKAY) {
23451                 err = sp_256_ecc_mulmod_base_8(point, k, 1, 1, heap);
23452         }
23453 
23454         if (err == MP_OKAY) {
23455             /* r = point->x mod order */
23456             XMEMCPY(r, point->x, sizeof(sp_digit) * 8U);
23457             sp_256_norm_8(r);
23458             c = sp_256_cmp_8(r, p256_order);
23459             sp_256_cond_sub_8(r, r, p256_order,
23460                 (sp_digit)0 - (sp_digit)(c >= 0));
23461             sp_256_norm_8(r);
23462 
23463             sp_256_from_mp(x, 8, priv);
23464             sp_256_from_bin(e, 8, hash, (int)hashLen);
23465 
23466             err = sp_256_calc_s_8(s, r, k, x, e, tmp);
23467         }
23468 
23469         /* Check that signature is usable. */
23470         if ((err == MP_OKAY) && (sp_256_iszero_8(s) == 0)) {
23471             break;
23472         }
23473 #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP
23474         i = 1;
23475 #endif
23476     }
23477 
23478     if (i == 0) {
23479         err = RNG_FAILURE_E;
23480     }
23481 
23482     if (err == MP_OKAY) {
23483         err = sp_256_to_mp(r, rm);
23484     }
23485     if (err == MP_OKAY) {
23486         err = sp_256_to_mp(s, sm);
23487     }
23488 
23489 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
23490     if (e != NULL)
23491 #endif
23492     {
23493         ForceZero(e, sizeof(sp_digit) * 7 * 2 * 8);
23494     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
23495         XFREE(e, heap, DYNAMIC_TYPE_ECC);
23496     #endif
23497     }
23498 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
23499     if (point != NULL)
23500 #endif
23501     {
23502         ForceZero(point, sizeof(sp_point_256));
23503     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
23504         XFREE(point, heap, DYNAMIC_TYPE_ECC);
23505     #endif
23506     }
23507 
23508     return err;
23509 }
23510 #endif /* HAVE_ECC_SIGN */
23511 
23512 #ifndef WOLFSSL_SP_SMALL
sp_256_rshift1_8(sp_digit * r,sp_digit * a)23513 static void sp_256_rshift1_8(sp_digit* r, sp_digit* a)
23514 {
23515     __asm__ __volatile__ (
23516         "mov       r10, #0\n\t"
23517         "mov       r9, #0\n\t"
23518         "ldr       r3, [%[a], #16]\n\t"
23519         "ldr       r4, [%[a], #20]\n\t"
23520         "ldr       r5, [%[a], #24]\n\t"
23521         "ldr       r6, [%[a], #28]\n\t"
23522         "lsr       r7, r3, #1\n\t"
23523         "and       r3, r3, #1\n\t"
23524         "lsr       r8, r4, #1\n\t"
23525         "lsr       r10, r5, #1\n\t"
23526         "lsr       r14, r6, #1\n\t"
23527         "orr       r7, r7, r4, lsl #31\n\t"
23528         "orr       r8, r8, r5, lsl #31\n\t"
23529         "orr       r10, r10, r6, lsl #31\n\t"
23530         "orr       r14, r14, r9, lsl #31\n\t"
23531         "mov       r9, r3\n\t"
23532         "str       r7, [%[r], #16]\n\t"
23533         "str       r8, [%[r], #20]\n\t"
23534         "str       r10, [%[r], #24]\n\t"
23535         "str       r14, [%[r], #28]\n\t"
23536         "ldr       r3, [%[r], #0]\n\t"
23537         "ldr       r4, [%[r], #4]\n\t"
23538         "ldr       r5, [%[r], #8]\n\t"
23539         "ldr       r6, [%[r], #12]\n\t"
23540         "lsr       r7, r3, #1\n\t"
23541         "lsr       r8, r4, #1\n\t"
23542         "lsr       r10, r5, #1\n\t"
23543         "lsr       r14, r6, #1\n\t"
23544         "orr       r7, r7, r4, lsl #31\n\t"
23545         "orr       r8, r8, r5, lsl #31\n\t"
23546         "orr       r10, r10, r6, lsl #31\n\t"
23547         "orr       r14, r14, r9, lsl #31\n\t"
23548         "str       r7, [%[r], #0]\n\t"
23549         "str       r8, [%[r], #4]\n\t"
23550         "str       r10, [%[r], #8]\n\t"
23551         "str       r14, [%[r], #12]\n\t"
23552         :
23553         : [r] "r" (r), [a] "r" (a)
23554         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9"
23555     );
23556 }
23557 
23558 /* Divide the number by 2 mod the modulus. (r = a / 2 % m)
23559  *
23560  * r  Result of division by 2.
23561  * a  Number to divide.
23562  * m  Modulus.
23563  */
sp_256_div2_mod_8(sp_digit * r,const sp_digit * a,const sp_digit * m)23564 static void sp_256_div2_mod_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
23565 {
23566     __asm__ __volatile__ (
23567         "mov       r10, #0\n\t"
23568         "ldr       r3, [%[a], #0]\n\t"
23569         "ands      r9, r3, #1\n\t"
23570         "beq       1f\n\t"
23571         "ldr       r4, [%[a], #4]\n\t"
23572         "ldr       r5, [%[a], #8]\n\t"
23573         "ldr       r6, [%[a], #12]\n\t"
23574         "ldr       r7, [%[m], #0]\n\t"
23575         "ldr       r8, [%[m], #4]\n\t"
23576         "ldr       r10, [%[m], #8]\n\t"
23577         "ldr       r14, [%[m], #12]\n\t"
23578         "adds      r3, r3, r7\n\t"
23579         "adcs      r4, r4, r8\n\t"
23580         "adcs      r5, r5, r10\n\t"
23581         "adcs      r6, r6, r14\n\t"
23582         "str       r3, [%[r], #0]\n\t"
23583         "str       r4, [%[r], #4]\n\t"
23584         "str       r5, [%[r], #8]\n\t"
23585         "str       r6, [%[r], #12]\n\t"
23586         "ldr       r3, [%[a], #16]\n\t"
23587         "ldr       r4, [%[a], #20]\n\t"
23588         "ldr       r5, [%[a], #24]\n\t"
23589         "ldr       r6, [%[a], #28]\n\t"
23590         "ldr       r7, [%[m], #16]\n\t"
23591         "ldr       r8, [%[m], #20]\n\t"
23592         "ldr       r10, [%[m], #24]\n\t"
23593         "ldr       r14, [%[m], #28]\n\t"
23594         "adcs      r3, r3, r7\n\t"
23595         "adcs      r4, r4, r8\n\t"
23596         "adcs      r5, r5, r10\n\t"
23597         "adcs      r6, r6, r14\n\t"
23598         "adc       r9, r10, r10\n\t"
23599         "b 2f\n\t"
23600         "\n1:\n\t"
23601         "ldr       r3, [%[a], #16]\n\t"
23602         "ldr       r4, [%[a], #20]\n\t"
23603         "ldr       r5, [%[a], #24]\n\t"
23604         "ldr       r6, [%[a], #28]\n\t"
23605         "\n2:\n\t"
23606         "lsr       r7, r3, #1\n\t"
23607         "and       r3, r3, #1\n\t"
23608         "lsr       r8, r4, #1\n\t"
23609         "lsr       r10, r5, #1\n\t"
23610         "lsr       r14, r6, #1\n\t"
23611         "orr       r7, r7, r4, lsl #31\n\t"
23612         "orr       r8, r8, r5, lsl #31\n\t"
23613         "orr       r10, r10, r6, lsl #31\n\t"
23614         "orr       r14, r14, r9, lsl #31\n\t"
23615         "mov       r9, r3\n\t"
23616         "str       r7, [%[r], #16]\n\t"
23617         "str       r8, [%[r], #20]\n\t"
23618         "str       r10, [%[r], #24]\n\t"
23619         "str       r14, [%[r], #28]\n\t"
23620         "ldr       r3, [%[r], #0]\n\t"
23621         "ldr       r4, [%[r], #4]\n\t"
23622         "ldr       r5, [%[r], #8]\n\t"
23623         "ldr       r6, [%[r], #12]\n\t"
23624         "lsr       r7, r3, #1\n\t"
23625         "lsr       r8, r4, #1\n\t"
23626         "lsr       r10, r5, #1\n\t"
23627         "lsr       r14, r6, #1\n\t"
23628         "orr       r7, r7, r4, lsl #31\n\t"
23629         "orr       r8, r8, r5, lsl #31\n\t"
23630         "orr       r10, r10, r6, lsl #31\n\t"
23631         "orr       r14, r14, r9, lsl #31\n\t"
23632         "str       r7, [%[r], #0]\n\t"
23633         "str       r8, [%[r], #4]\n\t"
23634         "str       r10, [%[r], #8]\n\t"
23635         "str       r14, [%[r], #12]\n\t"
23636         :
23637         : [r] "r" (r), [a] "r" (a), [m] "r" (m)
23638         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r10", "r14", "r9"
23639     );
23640 }
23641 
sp_256_num_bits_8(sp_digit * a)23642 static int sp_256_num_bits_8(sp_digit* a)
23643 {
23644     int r = 0;
23645 
23646     __asm__ __volatile__ (
23647         "ldr r2, [%[a], #28]\n\t"
23648         "cmp r2, #0\n\t"
23649         "beq 7f\n\t"
23650         "mov r3, #256\n\t"
23651         "clz %[r], r2\n\t"
23652         "sub %[r], r3, %[r]\n\t"
23653         "b   9f\n\t"
23654         "\n7:\n\t"
23655         "ldr r2, [%[a], #24]\n\t"
23656         "cmp r2, #0\n\t"
23657         "beq 6f\n\t"
23658         "mov r3, #224\n\t"
23659         "clz %[r], r2\n\t"
23660         "sub %[r], r3, %[r]\n\t"
23661         "b   9f\n\t"
23662         "\n6:\n\t"
23663         "ldr r2, [%[a], #20]\n\t"
23664         "cmp r2, #0\n\t"
23665         "beq 5f\n\t"
23666         "mov r3, #192\n\t"
23667         "clz %[r], r2\n\t"
23668         "sub %[r], r3, %[r]\n\t"
23669         "b   9f\n\t"
23670         "\n5:\n\t"
23671         "ldr r2, [%[a], #16]\n\t"
23672         "cmp r2, #0\n\t"
23673         "beq 4f\n\t"
23674         "mov r3, #160\n\t"
23675         "clz %[r], r2\n\t"
23676         "sub %[r], r3, %[r]\n\t"
23677         "b   9f\n\t"
23678         "\n4:\n\t"
23679         "ldr r2, [%[a], #12]\n\t"
23680         "cmp r2, #0\n\t"
23681         "beq 3f\n\t"
23682         "mov r3, #128\n\t"
23683         "clz %[r], r2\n\t"
23684         "sub %[r], r3, %[r]\n\t"
23685         "b   9f\n\t"
23686         "\n3:\n\t"
23687         "ldr r2, [%[a], #8]\n\t"
23688         "cmp r2, #0\n\t"
23689         "beq 2f\n\t"
23690         "mov r3, #96\n\t"
23691         "clz %[r], r2\n\t"
23692         "sub %[r], r3, %[r]\n\t"
23693         "b   9f\n\t"
23694         "\n2:\n\t"
23695         "ldr r2, [%[a], #4]\n\t"
23696         "cmp r2, #0\n\t"
23697         "beq 1f\n\t"
23698         "mov r3, #64\n\t"
23699         "clz %[r], r2\n\t"
23700         "sub %[r], r3, %[r]\n\t"
23701         "b   9f\n\t"
23702         "\n1:\n\t"
23703         "ldr r2, [%[a], #0]\n\t"
23704         "mov r3, #32\n\t"
23705         "clz %[r], r2\n\t"
23706         "sub %[r], r3, %[r]\n\t"
23707         "\n9:\n\t"
23708         : [r] "+r" (r)
23709         : [a] "r" (a)
23710         : "r2", "r3"
23711     );
23712 
23713     return r;
23714 }
23715 
23716 /* Non-constant time modular inversion.
23717  *
23718  * @param  [out]  r   Resulting number.
23719  * @param  [in]   a   Number to invert.
23720  * @param  [in]   m   Modulus.
23721  * @return  MP_OKAY on success.
23722  */
sp_256_mod_inv_8(sp_digit * r,const sp_digit * a,const sp_digit * m)23723 static int sp_256_mod_inv_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
23724 {
23725     sp_digit u[8];
23726     sp_digit v[8];
23727     sp_digit b[8];
23728     sp_digit d[8];
23729     int ut, vt;
23730     sp_digit o;
23731 
23732     XMEMCPY(u, m, sizeof(u));
23733     XMEMCPY(v, a, sizeof(v));
23734 
23735     ut = sp_256_num_bits_8(u);
23736     vt = sp_256_num_bits_8(v);
23737 
23738     XMEMSET(b, 0, sizeof(b));
23739     if ((v[0] & 1) == 0) {
23740         sp_256_rshift1_8(v, v);
23741         XMEMCPY(d, m, sizeof(u));
23742         d[0] += 1;
23743         sp_256_rshift1_8(d, d);
23744         vt--;
23745 
23746         while ((v[0] & 1) == 0) {
23747             sp_256_rshift1_8(v, v);
23748             sp_256_div2_mod_8(d, d, m);
23749             vt--;
23750         }
23751     }
23752     else {
23753         XMEMSET(d+1, 0, sizeof(d)-sizeof(sp_digit));
23754         d[0] = 1;
23755     }
23756 
23757     while (ut > 1 && vt > 1) {
23758         if (ut > vt || (ut == vt && sp_256_cmp_8(u, v) >= 0)) {
23759             sp_256_sub_8(u, u, v);
23760             o = sp_256_sub_8(b, b, d);
23761             if (o != 0)
23762                 sp_256_add_8(b, b, m);
23763             ut = sp_256_num_bits_8(u);
23764 
23765             do {
23766                 sp_256_rshift1_8(u, u);
23767                 sp_256_div2_mod_8(b, b, m);
23768                 ut--;
23769             }
23770             while (ut > 0 && (u[0] & 1) == 0);
23771         }
23772         else {
23773             sp_256_sub_8(v, v, u);
23774             o = sp_256_sub_8(d, d, b);
23775             if (o != 0)
23776                 sp_256_add_8(d, d, m);
23777             vt = sp_256_num_bits_8(v);
23778 
23779             do {
23780                 sp_256_rshift1_8(v, v);
23781                 sp_256_div2_mod_8(d, d, m);
23782                 vt--;
23783             }
23784             while (vt > 0 && (v[0] & 1) == 0);
23785         }
23786     }
23787 
23788     if (ut == 1)
23789         XMEMCPY(r, b, sizeof(b));
23790     else
23791         XMEMCPY(r, d, sizeof(d));
23792 
23793     return MP_OKAY;
23794 }
23795 
23796 #endif /* WOLFSSL_SP_SMALL */
23797 
23798 /* Add point p1 into point p2. Handles p1 == p2 and result at infinity.
23799  *
23800  * p1   First point to add and holds result.
23801  * p2   Second point to add.
23802  * tmp  Temporary storage for intermediate numbers.
23803  */
sp_256_add_points_8(sp_point_256 * p1,const sp_point_256 * p2,sp_digit * tmp)23804 static void sp_256_add_points_8(sp_point_256* p1, const sp_point_256* p2,
23805     sp_digit* tmp)
23806 {
23807 
23808         sp_256_proj_point_add_8(p1, p1, p2, tmp);
23809     if (sp_256_iszero_8(p1->z)) {
23810         if (sp_256_iszero_8(p1->x) && sp_256_iszero_8(p1->y)) {
23811                 sp_256_proj_point_dbl_8(p1, p2, tmp);
23812         }
23813         else {
23814             /* Y ordinate is not used from here - don't set. */
23815             p1->x[0] = 0;
23816             p1->x[1] = 0;
23817             p1->x[2] = 0;
23818             p1->x[3] = 0;
23819             p1->x[4] = 0;
23820             p1->x[5] = 0;
23821             p1->x[6] = 0;
23822             p1->x[7] = 0;
23823             XMEMCPY(p1->z, p256_norm_mod, sizeof(p256_norm_mod));
23824         }
23825     }
23826 }
23827 
23828 /* Calculate the verification point: [e/s]G + [r/s]Q
23829  *
23830  * p1    Calculated point.
23831  * p2    Public point and temporary.
23832  * s     Second part of signature as a number.
23833  * u1    Temporary number.
23834  * u2    Temproray number.
23835  * heap  Heap to use for allocation.
23836  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
23837  */
sp_256_calc_vfy_point_8(sp_point_256 * p1,sp_point_256 * p2,sp_digit * s,sp_digit * u1,sp_digit * u2,sp_digit * tmp,void * heap)23838 static int sp_256_calc_vfy_point_8(sp_point_256* p1, sp_point_256* p2,
23839     sp_digit* s, sp_digit* u1, sp_digit* u2, sp_digit* tmp, void* heap)
23840 {
23841     int err;
23842 
23843 #ifndef WOLFSSL_SP_SMALL
23844     err = sp_256_mod_inv_8(s, s, p256_order);
23845     if (err == MP_OKAY)
23846 #endif /* !WOLFSSL_SP_SMALL */
23847     {
23848         sp_256_mul_8(s, s, p256_norm_order);
23849         err = sp_256_mod_8(s, s, p256_order);
23850     }
23851     if (err == MP_OKAY) {
23852         sp_256_norm_8(s);
23853 #ifdef WOLFSSL_SP_SMALL
23854         {
23855             sp_256_mont_inv_order_8(s, s, tmp);
23856             sp_256_mont_mul_order_8(u1, u1, s);
23857             sp_256_mont_mul_order_8(u2, u2, s);
23858         }
23859 #else
23860         {
23861             sp_256_mont_mul_order_8(u1, u1, s);
23862             sp_256_mont_mul_order_8(u2, u2, s);
23863         }
23864 #endif /* WOLFSSL_SP_SMALL */
23865         {
23866             err = sp_256_ecc_mulmod_base_8(p1, u1, 0, 0, heap);
23867         }
23868     }
23869     if ((err == MP_OKAY) && sp_256_iszero_8(p1->z)) {
23870         p1->infinity = 1;
23871     }
23872     if (err == MP_OKAY) {
23873             err = sp_256_ecc_mulmod_8(p2, p2, u2, 0, 0, heap);
23874     }
23875     if ((err == MP_OKAY) && sp_256_iszero_8(p2->z)) {
23876         p2->infinity = 1;
23877     }
23878 
23879     if (err == MP_OKAY) {
23880         sp_256_add_points_8(p1, p2, tmp);
23881     }
23882 
23883     return err;
23884 }
23885 
23886 #ifdef HAVE_ECC_VERIFY
23887 /* Verify the signature values with the hash and public key.
23888  *   e = Truncate(hash, 256)
23889  *   u1 = e/s mod order
23890  *   u2 = r/s mod order
23891  *   r == (u1.G + u2.Q)->x mod order
23892  * Optimization: Leave point in projective form.
23893  *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
23894  *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
23895  * The hash is truncated to the first 256 bits.
23896  *
23897  * hash     Hash to sign.
23898  * hashLen  Length of the hash data.
23899  * rng      Random number generator.
23900  * priv     Private part of key - scalar.
23901  * rm       First part of result as an mp_int.
23902  * sm       Sirst part of result as an mp_int.
23903  * heap     Heap to use for allocation.
23904  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
23905  */
23906 #ifdef WOLFSSL_SP_NONBLOCK
23907 typedef struct sp_ecc_verify_256_ctx {
23908     int state;
23909     union {
23910         sp_256_ecc_mulmod_8_ctx mulmod_ctx;
23911         sp_256_mont_inv_order_8_ctx mont_inv_order_ctx;
23912         sp_256_proj_point_dbl_8_ctx dbl_ctx;
23913         sp_256_proj_point_add_8_ctx add_ctx;
23914     };
23915     sp_digit u1[2*8];
23916     sp_digit u2[2*8];
23917     sp_digit s[2*8];
23918     sp_digit tmp[2*8 * 5];
23919     sp_point_256 p1;
23920     sp_point_256 p2;
23921 } sp_ecc_verify_256_ctx;
23922 
sp_ecc_verify_256_nb(sp_ecc_ctx_t * sp_ctx,const byte * hash,word32 hashLen,const mp_int * pX,const mp_int * pY,const mp_int * pZ,const mp_int * rm,const mp_int * sm,int * res,void * heap)23923 int sp_ecc_verify_256_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash,
23924     word32 hashLen, const mp_int* pX, const mp_int* pY, const mp_int* pZ,
23925     const mp_int* rm, const mp_int* sm, int* res, void* heap)
23926 {
23927     int err = FP_WOULDBLOCK;
23928     sp_ecc_verify_256_ctx* ctx = (sp_ecc_verify_256_ctx*)sp_ctx->data;
23929 
23930     typedef char ctx_size_test[sizeof(sp_ecc_verify_256_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
23931     (void)sizeof(ctx_size_test);
23932 
23933     switch (ctx->state) {
23934     case 0: /* INIT */
23935         if (hashLen > 32U) {
23936             hashLen = 32U;
23937         }
23938 
23939         sp_256_from_bin(ctx->u1, 8, hash, (int)hashLen);
23940         sp_256_from_mp(ctx->u2, 8, rm);
23941         sp_256_from_mp(ctx->s, 8, sm);
23942         sp_256_from_mp(ctx->p2.x, 8, pX);
23943         sp_256_from_mp(ctx->p2.y, 8, pY);
23944         sp_256_from_mp(ctx->p2.z, 8, pZ);
23945         ctx->state = 1;
23946         break;
23947     case 1: /* NORMS0 */
23948         sp_256_mul_8(ctx->s, ctx->s, p256_norm_order);
23949         err = sp_256_mod_8(ctx->s, ctx->s, p256_order);
23950         if (err == MP_OKAY)
23951             ctx->state = 2;
23952         break;
23953     case 2: /* NORMS1 */
23954         sp_256_norm_8(ctx->s);
23955         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
23956         ctx->state = 3;
23957         break;
23958     case 3: /* NORMS2 */
23959         err = sp_256_mont_inv_order_8_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
23960         if (err == MP_OKAY) {
23961             ctx->state = 4;
23962         }
23963         break;
23964     case 4: /* NORMS3 */
23965         sp_256_mont_mul_order_8(ctx->u1, ctx->u1, ctx->s);
23966         ctx->state = 5;
23967         break;
23968     case 5: /* NORMS4 */
23969         sp_256_mont_mul_order_8(ctx->u2, ctx->u2, ctx->s);
23970         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
23971         ctx->state = 6;
23972         break;
23973     case 6: /* MULBASE */
23974         err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p256_base, ctx->u1, 0, 0, heap);
23975         if (err == MP_OKAY) {
23976             if (sp_256_iszero_8(ctx->p1.z)) {
23977                 ctx->p1.infinity = 1;
23978             }
23979             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
23980             ctx->state = 7;
23981         }
23982         break;
23983     case 7: /* MULMOD */
23984         err = sp_256_ecc_mulmod_8_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
23985         if (err == MP_OKAY) {
23986             if (sp_256_iszero_8(ctx->p2.z)) {
23987                 ctx->p2.infinity = 1;
23988             }
23989             XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
23990             ctx->state = 8;
23991         }
23992         break;
23993     case 8: /* ADD */
23994         err = sp_256_proj_point_add_8_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
23995         if (err == MP_OKAY)
23996             ctx->state = 9;
23997         break;
23998     case 9: /* MONT */
23999         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
24000         /* Reload r and convert to Montgomery form. */
24001         sp_256_from_mp(ctx->u2, 8, rm);
24002         err = sp_256_mod_mul_norm_8(ctx->u2, ctx->u2, p256_mod);
24003         if (err == MP_OKAY)
24004             ctx->state = 10;
24005         break;
24006     case 10: /* SQR */
24007         /* u1 = r.z'.z' mod prime */
24008         sp_256_mont_sqr_8(ctx->p1.z, ctx->p1.z, p256_mod, p256_mp_mod);
24009         ctx->state = 11;
24010         break;
24011     case 11: /* MUL */
24012         sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod, p256_mp_mod);
24013         ctx->state = 12;
24014         break;
24015     case 12: /* RES */
24016     {
24017         sp_int32 c = 0;
24018         err = MP_OKAY; /* math okay, now check result */
24019         *res = (int)(sp_256_cmp_8(ctx->p1.x, ctx->u1) == 0);
24020         if (*res == 0) {
24021             sp_digit carry;
24022 
24023             /* Reload r and add order. */
24024             sp_256_from_mp(ctx->u2, 8, rm);
24025             carry = sp_256_add_8(ctx->u2, ctx->u2, p256_order);
24026             /* Carry means result is greater than mod and is not valid. */
24027             if (carry == 0) {
24028                 sp_256_norm_8(ctx->u2);
24029 
24030                 /* Compare with mod and if greater or equal then not valid. */
24031                 c = sp_256_cmp_8(ctx->u2, p256_mod);
24032             }
24033         }
24034         if ((*res == 0) && (c < 0)) {
24035             /* Convert to Montogomery form */
24036             err = sp_256_mod_mul_norm_8(ctx->u2, ctx->u2, p256_mod);
24037             if (err == MP_OKAY) {
24038                 /* u1 = (r + 1*order).z'.z' mod prime */
24039                 sp_256_mont_mul_8(ctx->u1, ctx->u2, ctx->p1.z, p256_mod,
24040                                                             p256_mp_mod);
24041                 *res = (int)(sp_256_cmp_8(ctx->p1.x, ctx->u1) == 0);
24042             }
24043         }
24044         break;
24045     }
24046     } /* switch */
24047 
24048     if (err == MP_OKAY && ctx->state != 12) {
24049         err = FP_WOULDBLOCK;
24050     }
24051 
24052     return err;
24053 }
24054 #endif /* WOLFSSL_SP_NONBLOCK */
24055 
sp_ecc_verify_256(const byte * hash,word32 hashLen,const mp_int * pX,const mp_int * pY,const mp_int * pZ,const mp_int * rm,const mp_int * sm,int * res,void * heap)24056 int sp_ecc_verify_256(const byte* hash, word32 hashLen, const mp_int* pX,
24057     const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm,
24058     int* res, void* heap)
24059 {
24060 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24061     sp_digit* u1 = NULL;
24062     sp_point_256* p1 = NULL;
24063 #else
24064     sp_digit  u1[16 * 8];
24065     sp_point_256 p1[2];
24066 #endif
24067     sp_digit* u2 = NULL;
24068     sp_digit* s = NULL;
24069     sp_digit* tmp = NULL;
24070     sp_point_256* p2 = NULL;
24071     sp_digit carry;
24072     sp_int32 c = 0;
24073     int err = MP_OKAY;
24074 
24075 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24076     if (err == MP_OKAY) {
24077         p1 = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap,
24078                                              DYNAMIC_TYPE_ECC);
24079         if (p1 == NULL)
24080             err = MEMORY_E;
24081     }
24082     if (err == MP_OKAY) {
24083         u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 8, heap,
24084                                                               DYNAMIC_TYPE_ECC);
24085         if (u1 == NULL)
24086             err = MEMORY_E;
24087     }
24088 #endif
24089 
24090     if (err == MP_OKAY) {
24091         u2  = u1 + 2 * 8;
24092         s   = u1 + 4 * 8;
24093         tmp = u1 + 6 * 8;
24094         p2 = p1 + 1;
24095 
24096         if (hashLen > 32U) {
24097             hashLen = 32U;
24098         }
24099 
24100         sp_256_from_bin(u1, 8, hash, (int)hashLen);
24101         sp_256_from_mp(u2, 8, rm);
24102         sp_256_from_mp(s, 8, sm);
24103         sp_256_from_mp(p2->x, 8, pX);
24104         sp_256_from_mp(p2->y, 8, pY);
24105         sp_256_from_mp(p2->z, 8, pZ);
24106 
24107         err = sp_256_calc_vfy_point_8(p1, p2, s, u1, u2, tmp, heap);
24108     }
24109     if (err == MP_OKAY) {
24110         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
24111         /* Reload r and convert to Montgomery form. */
24112         sp_256_from_mp(u2, 8, rm);
24113         err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
24114     }
24115 
24116     if (err == MP_OKAY) {
24117         /* u1 = r.z'.z' mod prime */
24118         sp_256_mont_sqr_8(p1->z, p1->z, p256_mod, p256_mp_mod);
24119         sp_256_mont_mul_8(u1, u2, p1->z, p256_mod, p256_mp_mod);
24120         *res = (int)(sp_256_cmp_8(p1->x, u1) == 0);
24121         if (*res == 0) {
24122             /* Reload r and add order. */
24123             sp_256_from_mp(u2, 8, rm);
24124             carry = sp_256_add_8(u2, u2, p256_order);
24125             /* Carry means result is greater than mod and is not valid. */
24126             if (carry == 0) {
24127                 sp_256_norm_8(u2);
24128 
24129                 /* Compare with mod and if greater or equal then not valid. */
24130                 c = sp_256_cmp_8(u2, p256_mod);
24131             }
24132         }
24133         if ((*res == 0) && (c < 0)) {
24134             /* Convert to Montogomery form */
24135             err = sp_256_mod_mul_norm_8(u2, u2, p256_mod);
24136             if (err == MP_OKAY) {
24137                 /* u1 = (r + 1*order).z'.z' mod prime */
24138                 sp_256_mont_mul_8(u1, u2, p1->z, p256_mod,
24139                     p256_mp_mod);
24140                 *res = (sp_256_cmp_8(p1->x, u1) == 0);
24141             }
24142         }
24143     }
24144 
24145 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24146     if (u1 != NULL)
24147         XFREE(u1, heap, DYNAMIC_TYPE_ECC);
24148     if (p1 != NULL)
24149         XFREE(p1, heap, DYNAMIC_TYPE_ECC);
24150 #endif
24151 
24152     return err;
24153 }
24154 #endif /* HAVE_ECC_VERIFY */
24155 
24156 #ifdef HAVE_ECC_CHECK_KEY
24157 /* Check that the x and y oridinates are a valid point on the curve.
24158  *
24159  * point  EC point.
24160  * heap   Heap to use if dynamically allocating.
24161  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
24162  * not on the curve and MP_OKAY otherwise.
24163  */
sp_256_ecc_is_point_8(const sp_point_256 * point,void * heap)24164 static int sp_256_ecc_is_point_8(const sp_point_256* point,
24165     void* heap)
24166 {
24167 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24168     sp_digit* t1 = NULL;
24169 #else
24170     sp_digit t1[8 * 4];
24171 #endif
24172     sp_digit* t2 = NULL;
24173     int err = MP_OKAY;
24174 
24175 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24176     t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 4, heap, DYNAMIC_TYPE_ECC);
24177     if (t1 == NULL)
24178         err = MEMORY_E;
24179 #endif
24180     (void)heap;
24181 
24182     if (err == MP_OKAY) {
24183         t2 = t1 + 2 * 8;
24184 
24185         sp_256_sqr_8(t1, point->y);
24186         (void)sp_256_mod_8(t1, t1, p256_mod);
24187         sp_256_sqr_8(t2, point->x);
24188         (void)sp_256_mod_8(t2, t2, p256_mod);
24189         sp_256_mul_8(t2, t2, point->x);
24190         (void)sp_256_mod_8(t2, t2, p256_mod);
24191         (void)sp_256_sub_8(t2, p256_mod, t2);
24192         sp_256_mont_add_8(t1, t1, t2, p256_mod);
24193 
24194         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
24195         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
24196         sp_256_mont_add_8(t1, t1, point->x, p256_mod);
24197 
24198         if (sp_256_cmp_8(t1, p256_b) != 0) {
24199             err = MP_VAL;
24200         }
24201     }
24202 
24203 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24204     if (t1 != NULL)
24205         XFREE(t1, heap, DYNAMIC_TYPE_ECC);
24206 #endif
24207 
24208     return err;
24209 }
24210 
24211 /* Check that the x and y oridinates are a valid point on the curve.
24212  *
24213  * pX  X ordinate of EC point.
24214  * pY  Y ordinate of EC point.
24215  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
24216  * not on the curve and MP_OKAY otherwise.
24217  */
sp_ecc_is_point_256(const mp_int * pX,const mp_int * pY)24218 int sp_ecc_is_point_256(const mp_int* pX, const mp_int* pY)
24219 {
24220 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24221     sp_point_256* pub = NULL;
24222 #else
24223     sp_point_256 pub[1];
24224 #endif
24225     const byte one[1] = { 1 };
24226     int err = MP_OKAY;
24227 
24228 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24229     pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL,
24230                                        DYNAMIC_TYPE_ECC);
24231     if (pub == NULL)
24232         err = MEMORY_E;
24233 #endif
24234 
24235     if (err == MP_OKAY) {
24236         sp_256_from_mp(pub->x, 8, pX);
24237         sp_256_from_mp(pub->y, 8, pY);
24238         sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
24239 
24240         err = sp_256_ecc_is_point_8(pub, NULL);
24241     }
24242 
24243 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24244     if (pub != NULL)
24245         XFREE(pub, NULL, DYNAMIC_TYPE_ECC);
24246 #endif
24247 
24248     return err;
24249 }
24250 
24251 /* Check that the private scalar generates the EC point (px, py), the point is
24252  * on the curve and the point has the correct order.
24253  *
24254  * pX     X ordinate of EC point.
24255  * pY     Y ordinate of EC point.
24256  * privm  Private scalar that generates EC point.
24257  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
24258  * not on the curve, ECC_INF_E if the point does not have the correct order,
24259  * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
24260  * MP_OKAY otherwise.
24261  */
sp_ecc_check_key_256(const mp_int * pX,const mp_int * pY,const mp_int * privm,void * heap)24262 int sp_ecc_check_key_256(const mp_int* pX, const mp_int* pY,
24263     const mp_int* privm, void* heap)
24264 {
24265 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24266     sp_digit* priv = NULL;
24267     sp_point_256* pub = NULL;
24268 #else
24269     sp_digit priv[8];
24270     sp_point_256 pub[2];
24271 #endif
24272     sp_point_256* p = NULL;
24273     const byte one[1] = { 1 };
24274     int err = MP_OKAY;
24275 
24276 
24277     /* Quick check the lengs of public key ordinates and private key are in
24278      * range. Proper check later.
24279      */
24280     if (((mp_count_bits(pX) > 256) ||
24281         (mp_count_bits(pY) > 256) ||
24282         ((privm != NULL) && (mp_count_bits(privm) > 256)))) {
24283         err = ECC_OUT_OF_RANGE_E;
24284     }
24285 
24286 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24287     if (err == MP_OKAY) {
24288         pub = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, heap,
24289                                            DYNAMIC_TYPE_ECC);
24290         if (pub == NULL)
24291             err = MEMORY_E;
24292     }
24293     if (err == MP_OKAY && privm) {
24294         priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8, heap,
24295                                   DYNAMIC_TYPE_ECC);
24296         if (priv == NULL)
24297             err = MEMORY_E;
24298     }
24299 #endif
24300 
24301     if (err == MP_OKAY) {
24302         p = pub + 1;
24303 
24304         sp_256_from_mp(pub->x, 8, pX);
24305         sp_256_from_mp(pub->y, 8, pY);
24306         sp_256_from_bin(pub->z, 8, one, (int)sizeof(one));
24307         if (privm)
24308             sp_256_from_mp(priv, 8, privm);
24309 
24310         /* Check point at infinitiy. */
24311         if ((sp_256_iszero_8(pub->x) != 0) &&
24312             (sp_256_iszero_8(pub->y) != 0)) {
24313             err = ECC_INF_E;
24314         }
24315     }
24316 
24317     /* Check range of X and Y */
24318     if ((err == MP_OKAY) &&
24319             ((sp_256_cmp_8(pub->x, p256_mod) >= 0) ||
24320              (sp_256_cmp_8(pub->y, p256_mod) >= 0))) {
24321         err = ECC_OUT_OF_RANGE_E;
24322     }
24323 
24324     if (err == MP_OKAY) {
24325         /* Check point is on curve */
24326         err = sp_256_ecc_is_point_8(pub, heap);
24327     }
24328 
24329     if (err == MP_OKAY) {
24330         /* Point * order = infinity */
24331             err = sp_256_ecc_mulmod_8(p, pub, p256_order, 1, 1, heap);
24332     }
24333     /* Check result is infinity */
24334     if ((err == MP_OKAY) && ((sp_256_iszero_8(p->x) == 0) ||
24335                              (sp_256_iszero_8(p->y) == 0))) {
24336         err = ECC_INF_E;
24337     }
24338 
24339     if (privm) {
24340         if (err == MP_OKAY) {
24341             /* Base * private = point */
24342                 err = sp_256_ecc_mulmod_base_8(p, priv, 1, 1, heap);
24343         }
24344         /* Check result is public key */
24345         if ((err == MP_OKAY) &&
24346                 ((sp_256_cmp_8(p->x, pub->x) != 0) ||
24347                  (sp_256_cmp_8(p->y, pub->y) != 0))) {
24348             err = ECC_PRIV_KEY_E;
24349         }
24350     }
24351 
24352 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24353     if (pub != NULL)
24354         XFREE(pub, heap, DYNAMIC_TYPE_ECC);
24355     if (priv != NULL)
24356         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
24357 #endif
24358 
24359     return err;
24360 }
24361 #endif
24362 #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
24363 /* Add two projective EC points together.
24364  * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
24365  *
24366  * pX   First EC point's X ordinate.
24367  * pY   First EC point's Y ordinate.
24368  * pZ   First EC point's Z ordinate.
24369  * qX   Second EC point's X ordinate.
24370  * qY   Second EC point's Y ordinate.
24371  * qZ   Second EC point's Z ordinate.
24372  * rX   Resultant EC point's X ordinate.
24373  * rY   Resultant EC point's Y ordinate.
24374  * rZ   Resultant EC point's Z ordinate.
24375  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24376  */
sp_ecc_proj_add_point_256(mp_int * pX,mp_int * pY,mp_int * pZ,mp_int * qX,mp_int * qY,mp_int * qZ,mp_int * rX,mp_int * rY,mp_int * rZ)24377 int sp_ecc_proj_add_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
24378                               mp_int* qX, mp_int* qY, mp_int* qZ,
24379                               mp_int* rX, mp_int* rY, mp_int* rZ)
24380 {
24381 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24382     sp_digit* tmp = NULL;
24383     sp_point_256* p = NULL;
24384 #else
24385     sp_digit tmp[2 * 8 * 5];
24386     sp_point_256 p[2];
24387 #endif
24388     sp_point_256* q = NULL;
24389     int err = MP_OKAY;
24390 
24391 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24392     if (err == MP_OKAY) {
24393         p = (sp_point_256*)XMALLOC(sizeof(sp_point_256) * 2, NULL,
24394                                          DYNAMIC_TYPE_ECC);
24395         if (p == NULL)
24396             err = MEMORY_E;
24397     }
24398     if (err == MP_OKAY) {
24399         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 5, NULL,
24400                                  DYNAMIC_TYPE_ECC);
24401         if (tmp == NULL) {
24402             err = MEMORY_E;
24403         }
24404     }
24405 #endif
24406 
24407     if (err == MP_OKAY) {
24408         q = p + 1;
24409 
24410         sp_256_from_mp(p->x, 8, pX);
24411         sp_256_from_mp(p->y, 8, pY);
24412         sp_256_from_mp(p->z, 8, pZ);
24413         sp_256_from_mp(q->x, 8, qX);
24414         sp_256_from_mp(q->y, 8, qY);
24415         sp_256_from_mp(q->z, 8, qZ);
24416         p->infinity = sp_256_iszero_8(p->x) &
24417                       sp_256_iszero_8(p->y);
24418         q->infinity = sp_256_iszero_8(q->x) &
24419                       sp_256_iszero_8(q->y);
24420 
24421             sp_256_proj_point_add_8(p, p, q, tmp);
24422     }
24423 
24424     if (err == MP_OKAY) {
24425         err = sp_256_to_mp(p->x, rX);
24426     }
24427     if (err == MP_OKAY) {
24428         err = sp_256_to_mp(p->y, rY);
24429     }
24430     if (err == MP_OKAY) {
24431         err = sp_256_to_mp(p->z, rZ);
24432     }
24433 
24434 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24435     if (tmp != NULL)
24436         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
24437     if (p != NULL)
24438         XFREE(p, NULL, DYNAMIC_TYPE_ECC);
24439 #endif
24440 
24441     return err;
24442 }
24443 
24444 /* Double a projective EC point.
24445  * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
24446  *
24447  * pX   EC point's X ordinate.
24448  * pY   EC point's Y ordinate.
24449  * pZ   EC point's Z ordinate.
24450  * rX   Resultant EC point's X ordinate.
24451  * rY   Resultant EC point's Y ordinate.
24452  * rZ   Resultant EC point's Z ordinate.
24453  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24454  */
sp_ecc_proj_dbl_point_256(mp_int * pX,mp_int * pY,mp_int * pZ,mp_int * rX,mp_int * rY,mp_int * rZ)24455 int sp_ecc_proj_dbl_point_256(mp_int* pX, mp_int* pY, mp_int* pZ,
24456                               mp_int* rX, mp_int* rY, mp_int* rZ)
24457 {
24458 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24459     sp_digit* tmp = NULL;
24460     sp_point_256* p = NULL;
24461 #else
24462     sp_digit tmp[2 * 8 * 2];
24463     sp_point_256 p[1];
24464 #endif
24465     int err = MP_OKAY;
24466 
24467 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24468     if (err == MP_OKAY) {
24469         p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL,
24470                                          DYNAMIC_TYPE_ECC);
24471         if (p == NULL)
24472             err = MEMORY_E;
24473     }
24474     if (err == MP_OKAY) {
24475         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 2, NULL,
24476                                  DYNAMIC_TYPE_ECC);
24477         if (tmp == NULL)
24478             err = MEMORY_E;
24479     }
24480 #endif
24481 
24482     if (err == MP_OKAY) {
24483         sp_256_from_mp(p->x, 8, pX);
24484         sp_256_from_mp(p->y, 8, pY);
24485         sp_256_from_mp(p->z, 8, pZ);
24486         p->infinity = sp_256_iszero_8(p->x) &
24487                       sp_256_iszero_8(p->y);
24488 
24489             sp_256_proj_point_dbl_8(p, p, tmp);
24490     }
24491 
24492     if (err == MP_OKAY) {
24493         err = sp_256_to_mp(p->x, rX);
24494     }
24495     if (err == MP_OKAY) {
24496         err = sp_256_to_mp(p->y, rY);
24497     }
24498     if (err == MP_OKAY) {
24499         err = sp_256_to_mp(p->z, rZ);
24500     }
24501 
24502 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24503     if (tmp != NULL)
24504         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
24505     if (p != NULL)
24506         XFREE(p, NULL, DYNAMIC_TYPE_ECC);
24507 #endif
24508 
24509     return err;
24510 }
24511 
24512 /* Map a projective EC point to affine in place.
24513  * pZ will be one.
24514  *
24515  * pX   EC point's X ordinate.
24516  * pY   EC point's Y ordinate.
24517  * pZ   EC point's Z ordinate.
24518  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24519  */
sp_ecc_map_256(mp_int * pX,mp_int * pY,mp_int * pZ)24520 int sp_ecc_map_256(mp_int* pX, mp_int* pY, mp_int* pZ)
24521 {
24522 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24523     sp_digit* tmp = NULL;
24524     sp_point_256* p = NULL;
24525 #else
24526     sp_digit tmp[2 * 8 * 4];
24527     sp_point_256 p[1];
24528 #endif
24529     int err = MP_OKAY;
24530 
24531 
24532 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24533     if (err == MP_OKAY) {
24534         p = (sp_point_256*)XMALLOC(sizeof(sp_point_256), NULL,
24535                                          DYNAMIC_TYPE_ECC);
24536         if (p == NULL)
24537             err = MEMORY_E;
24538     }
24539     if (err == MP_OKAY) {
24540         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 8 * 4, NULL,
24541                                  DYNAMIC_TYPE_ECC);
24542         if (tmp == NULL)
24543             err = MEMORY_E;
24544     }
24545 #endif
24546     if (err == MP_OKAY) {
24547         sp_256_from_mp(p->x, 8, pX);
24548         sp_256_from_mp(p->y, 8, pY);
24549         sp_256_from_mp(p->z, 8, pZ);
24550         p->infinity = sp_256_iszero_8(p->x) &
24551                       sp_256_iszero_8(p->y);
24552 
24553             sp_256_map_8(p, p, tmp);
24554     }
24555 
24556     if (err == MP_OKAY) {
24557         err = sp_256_to_mp(p->x, pX);
24558     }
24559     if (err == MP_OKAY) {
24560         err = sp_256_to_mp(p->y, pY);
24561     }
24562     if (err == MP_OKAY) {
24563         err = sp_256_to_mp(p->z, pZ);
24564     }
24565 
24566 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24567     if (tmp != NULL)
24568         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
24569     if (p != NULL)
24570         XFREE(p, NULL, DYNAMIC_TYPE_ECC);
24571 #endif
24572 
24573     return err;
24574 }
24575 #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
24576 #ifdef HAVE_COMP_KEY
24577 /* Find the square root of a number mod the prime of the curve.
24578  *
24579  * y  The number to operate on and the result.
24580  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24581  */
sp_256_mont_sqrt_8(sp_digit * y)24582 static int sp_256_mont_sqrt_8(sp_digit* y)
24583 {
24584 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24585     sp_digit* t1 = NULL;
24586 #else
24587     sp_digit t1[4 * 8];
24588 #endif
24589     sp_digit* t2 = NULL;
24590     int err = MP_OKAY;
24591 
24592 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24593     t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
24594     if (t1 == NULL) {
24595         err = MEMORY_E;
24596     }
24597 #endif
24598 
24599     if (err == MP_OKAY) {
24600         t2 = t1 + 2 * 8;
24601 
24602         {
24603             /* t2 = y ^ 0x2 */
24604             sp_256_mont_sqr_8(t2, y, p256_mod, p256_mp_mod);
24605             /* t1 = y ^ 0x3 */
24606             sp_256_mont_mul_8(t1, t2, y, p256_mod, p256_mp_mod);
24607             /* t2 = y ^ 0xc */
24608             sp_256_mont_sqr_n_8(t2, t1, 2, p256_mod, p256_mp_mod);
24609             /* t1 = y ^ 0xf */
24610             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
24611             /* t2 = y ^ 0xf0 */
24612             sp_256_mont_sqr_n_8(t2, t1, 4, p256_mod, p256_mp_mod);
24613             /* t1 = y ^ 0xff */
24614             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
24615             /* t2 = y ^ 0xff00 */
24616             sp_256_mont_sqr_n_8(t2, t1, 8, p256_mod, p256_mp_mod);
24617             /* t1 = y ^ 0xffff */
24618             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
24619             /* t2 = y ^ 0xffff0000 */
24620             sp_256_mont_sqr_n_8(t2, t1, 16, p256_mod, p256_mp_mod);
24621             /* t1 = y ^ 0xffffffff */
24622             sp_256_mont_mul_8(t1, t1, t2, p256_mod, p256_mp_mod);
24623             /* t1 = y ^ 0xffffffff00000000 */
24624             sp_256_mont_sqr_n_8(t1, t1, 32, p256_mod, p256_mp_mod);
24625             /* t1 = y ^ 0xffffffff00000001 */
24626             sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
24627             /* t1 = y ^ 0xffffffff00000001000000000000000000000000 */
24628             sp_256_mont_sqr_n_8(t1, t1, 96, p256_mod, p256_mp_mod);
24629             /* t1 = y ^ 0xffffffff00000001000000000000000000000001 */
24630             sp_256_mont_mul_8(t1, t1, y, p256_mod, p256_mp_mod);
24631             sp_256_mont_sqr_n_8(y, t1, 94, p256_mod, p256_mp_mod);
24632         }
24633     }
24634 
24635 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24636     if (t1 != NULL)
24637         XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
24638 #endif
24639 
24640     return err;
24641 }
24642 
24643 
24644 /* Uncompress the point given the X ordinate.
24645  *
24646  * xm    X ordinate.
24647  * odd   Whether the Y ordinate is odd.
24648  * ym    Calculated Y ordinate.
24649  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
24650  */
sp_ecc_uncompress_256(mp_int * xm,int odd,mp_int * ym)24651 int sp_ecc_uncompress_256(mp_int* xm, int odd, mp_int* ym)
24652 {
24653 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24654     sp_digit* x = NULL;
24655 #else
24656     sp_digit x[4 * 8];
24657 #endif
24658     sp_digit* y = NULL;
24659     int err = MP_OKAY;
24660 
24661 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24662     x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 8, NULL, DYNAMIC_TYPE_ECC);
24663     if (x == NULL)
24664         err = MEMORY_E;
24665 #endif
24666 
24667     if (err == MP_OKAY) {
24668         y = x + 2 * 8;
24669 
24670         sp_256_from_mp(x, 8, xm);
24671         err = sp_256_mod_mul_norm_8(x, x, p256_mod);
24672     }
24673     if (err == MP_OKAY) {
24674         /* y = x^3 */
24675         {
24676             sp_256_mont_sqr_8(y, x, p256_mod, p256_mp_mod);
24677             sp_256_mont_mul_8(y, y, x, p256_mod, p256_mp_mod);
24678         }
24679         /* y = x^3 - 3x */
24680         sp_256_mont_sub_8(y, y, x, p256_mod);
24681         sp_256_mont_sub_8(y, y, x, p256_mod);
24682         sp_256_mont_sub_8(y, y, x, p256_mod);
24683         /* y = x^3 - 3x + b */
24684         err = sp_256_mod_mul_norm_8(x, p256_b, p256_mod);
24685     }
24686     if (err == MP_OKAY) {
24687         sp_256_mont_add_8(y, y, x, p256_mod);
24688         /* y = sqrt(x^3 - 3x + b) */
24689         err = sp_256_mont_sqrt_8(y);
24690     }
24691     if (err == MP_OKAY) {
24692         XMEMSET(y + 8, 0, 8U * sizeof(sp_digit));
24693         sp_256_mont_reduce_8(y, p256_mod, p256_mp_mod);
24694         if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
24695             sp_256_mont_sub_8(y, p256_mod, y, p256_mod);
24696         }
24697 
24698         err = sp_256_to_mp(y, ym);
24699     }
24700 
24701 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
24702     if (x != NULL)
24703         XFREE(x, NULL, DYNAMIC_TYPE_ECC);
24704 #endif
24705 
24706     return err;
24707 }
24708 #endif
24709 #endif /* !WOLFSSL_SP_NO_256 */
24710 #ifdef WOLFSSL_SP_384
24711 
24712 /* Point structure to use. */
24713 typedef struct sp_point_384 {
24714     /* X ordinate of point. */
24715     sp_digit x[2 * 12];
24716     /* Y ordinate of point. */
24717     sp_digit y[2 * 12];
24718     /* Z ordinate of point. */
24719     sp_digit z[2 * 12];
24720     /* Indicates point is at infinity. */
24721     int infinity;
24722 } sp_point_384;
24723 
24724 /* The modulus (prime) of the curve P384. */
24725 static const sp_digit p384_mod[12] = {
24726     0xffffffff,0x00000000,0x00000000,0xffffffff,0xfffffffe,0xffffffff,
24727     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
24728 };
24729 /* The Montgomery normalizer for modulus of the curve P384. */
24730 static const sp_digit p384_norm_mod[12] = {
24731     0x00000001,0xffffffff,0xffffffff,0x00000000,0x00000001,0x00000000,
24732     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
24733 };
24734 /* The Montgomery multiplier for modulus of the curve P384. */
24735 static sp_digit p384_mp_mod = 0x00000001;
24736 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
24737                                             defined(HAVE_ECC_VERIFY)
24738 /* The order of the curve P384. */
24739 static const sp_digit p384_order[12] = {
24740     0xccc52973,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
24741     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
24742 };
24743 #endif
24744 /* The order of the curve P384 minus 2. */
24745 static const sp_digit p384_order2[12] = {
24746     0xccc52971,0xecec196a,0x48b0a77a,0x581a0db2,0xf4372ddf,0xc7634d81,
24747     0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff
24748 };
24749 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
24750 /* The Montgomery normalizer for order of the curve P384. */
24751 static const sp_digit p384_norm_order[12] = {
24752     0x333ad68d,0x1313e695,0xb74f5885,0xa7e5f24d,0x0bc8d220,0x389cb27e,
24753     0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000
24754 };
24755 #endif
24756 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
24757 /* The Montgomery multiplier for order of the curve P384. */
24758 static sp_digit p384_mp_order = 0xe88fdc45;
24759 #endif
24760 /* The base point of curve P384. */
24761 static const sp_point_384 p384_base = {
24762     /* X ordinate */
24763     {
24764         0x72760ab7,0x3a545e38,0xbf55296c,0x5502f25d,0x82542a38,0x59f741e0,
24765         0x8ba79b98,0x6e1d3b62,0xf320ad74,0x8eb1c71e,0xbe8b0537,0xaa87ca22,
24766         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
24767         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
24768         (sp_digit)0, (sp_digit)0
24769     },
24770     /* Y ordinate */
24771     {
24772         0x90ea0e5f,0x7a431d7c,0x1d7e819d,0x0a60b1ce,0xb5f0b8c0,0xe9da3113,
24773         0x289a147c,0xf8f41dbd,0x9292dc29,0x5d9e98bf,0x96262c6f,0x3617de4a,
24774         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
24775         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
24776         (sp_digit)0, (sp_digit)0
24777     },
24778     /* Z ordinate */
24779     {
24780         0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
24781         0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
24782         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
24783         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
24784         (sp_digit)0, (sp_digit)0
24785     },
24786     /* infinity */
24787     0
24788 };
24789 #if defined(HAVE_ECC_CHECK_KEY) || defined(HAVE_COMP_KEY)
24790 static const sp_digit p384_b[12] = {
24791     0xd3ec2aef,0x2a85c8ed,0x8a2ed19d,0xc656398d,0x5013875a,0x0314088f,
24792     0xfe814112,0x181d9c6e,0xe3f82d19,0x988e056b,0xe23ee7e4,0xb3312fa7
24793 };
24794 #endif
24795 
24796 /* Multiply a and b into r. (r = a * b)
24797  *
24798  * r  A single precision integer.
24799  * a  A single precision integer.
24800  * b  A single precision integer.
24801  */
sp_384_mul_12(sp_digit * r,const sp_digit * a,const sp_digit * b)24802 SP_NOINLINE static void sp_384_mul_12(sp_digit* r, const sp_digit* a,
24803         const sp_digit* b)
24804 {
24805     sp_digit tmp_arr[12 * 2];
24806     sp_digit* tmp = tmp_arr;
24807     __asm__ __volatile__ (
24808         "mov	r3, #0\n\t"
24809         "mov	r4, #0\n\t"
24810         "mov	r9, r3\n\t"
24811         "mov	r12, %[r]\n\t"
24812         "mov	r10, %[a]\n\t"
24813         "mov	r11, %[b]\n\t"
24814         "mov	r6, #48\n\t"
24815         "add	r6, r6, r10\n\t"
24816         "mov	r14, r6\n\t"
24817         "\n1:\n\t"
24818         "mov	%[r], #0\n\t"
24819         "mov	r5, #0\n\t"
24820         "mov	r6, #44\n\t"
24821         "mov	%[a], r9\n\t"
24822         "subs	%[a], %[a], r6\n\t"
24823         "sbc	r6, r6, r6\n\t"
24824         "mvn	r6, r6\n\t"
24825         "and	%[a], %[a], r6\n\t"
24826         "mov	%[b], r9\n\t"
24827         "sub	%[b], %[b], %[a]\n\t"
24828         "add	%[a], %[a], r10\n\t"
24829         "add	%[b], %[b], r11\n\t"
24830         "\n2:\n\t"
24831         /* Multiply Start */
24832         "ldr	r6, [%[a]]\n\t"
24833         "ldr	r8, [%[b]]\n\t"
24834         "umull	r6, r8, r6, r8\n\t"
24835         "adds	r3, r3, r6\n\t"
24836         "adcs 	r4, r4, r8\n\t"
24837         "adc	r5, r5, %[r]\n\t"
24838         /* Multiply Done */
24839         "add	%[a], %[a], #4\n\t"
24840         "sub	%[b], %[b], #4\n\t"
24841         "cmp	%[a], r14\n\t"
24842 #ifdef __GNUC__
24843         "beq	3f\n\t"
24844 #else
24845         "beq.n	3f\n\t"
24846 #endif /* __GNUC__ */
24847         "mov	r6, r9\n\t"
24848         "add	r6, r6, r10\n\t"
24849         "cmp	%[a], r6\n\t"
24850 #ifdef __GNUC__
24851         "ble	2b\n\t"
24852 #else
24853         "ble.n	2b\n\t"
24854 #endif /* __GNUC__ */
24855         "\n3:\n\t"
24856         "mov	%[r], r12\n\t"
24857         "mov	r8, r9\n\t"
24858         "str	r3, [%[r], r8]\n\t"
24859         "mov	r3, r4\n\t"
24860         "mov	r4, r5\n\t"
24861         "add	r8, r8, #4\n\t"
24862         "mov	r9, r8\n\t"
24863         "mov	r6, #88\n\t"
24864         "cmp	r8, r6\n\t"
24865 #ifdef __GNUC__
24866         "ble	1b\n\t"
24867 #else
24868         "ble.n	1b\n\t"
24869 #endif /* __GNUC__ */
24870         "str	r3, [%[r], r8]\n\t"
24871         "mov	%[a], r10\n\t"
24872         "mov	%[b], r11\n\t"
24873         :
24874         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
24875         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
24876     );
24877 
24878     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
24879 }
24880 
24881 /* Square a and put result in r. (r = a * a)
24882  *
24883  * r  A single precision integer.
24884  * a  A single precision integer.
24885  */
sp_384_sqr_12(sp_digit * r,const sp_digit * a)24886 SP_NOINLINE static void sp_384_sqr_12(sp_digit* r, const sp_digit* a)
24887 {
24888     __asm__ __volatile__ (
24889         "mov	r3, #0\n\t"
24890         "mov	r4, #0\n\t"
24891         "mov	r5, #0\n\t"
24892         "mov	r9, r3\n\t"
24893         "mov	r12, %[r]\n\t"
24894         "mov	r6, #96\n\t"
24895         "neg	r6, r6\n\t"
24896         "add	sp, sp, r6\n\t"
24897         "mov	r11, sp\n\t"
24898         "mov	r10, %[a]\n\t"
24899         "\n1:\n\t"
24900         "mov	%[r], #0\n\t"
24901         "mov	r6, #44\n\t"
24902         "mov	%[a], r9\n\t"
24903         "subs	%[a], %[a], r6\n\t"
24904         "sbc	r6, r6, r6\n\t"
24905         "mvn	r6, r6\n\t"
24906         "and	%[a], %[a], r6\n\t"
24907         "mov	r2, r9\n\t"
24908         "sub	r2, r2, %[a]\n\t"
24909         "add	%[a], %[a], r10\n\t"
24910         "add	r2, r2, r10\n\t"
24911         "\n2:\n\t"
24912         "cmp	r2, %[a]\n\t"
24913 #ifdef __GNUC__
24914         "beq	4f\n\t"
24915 #else
24916         "beq.n	4f\n\t"
24917 #endif /* __GNUC__ */
24918         /* Multiply * 2: Start */
24919         "ldr	r6, [%[a]]\n\t"
24920         "ldr	r8, [r2]\n\t"
24921         "umull	r6, r8, r6, r8\n\t"
24922         "adds	r3, r3, r6\n\t"
24923         "adcs 	r4, r4, r8\n\t"
24924         "adc	r5, r5, %[r]\n\t"
24925         "adds	r3, r3, r6\n\t"
24926         "adcs 	r4, r4, r8\n\t"
24927         "adc	r5, r5, %[r]\n\t"
24928         /* Multiply * 2: Done */
24929 #ifdef __GNUC__
24930         "bal	5f\n\t"
24931 #else
24932         "bal.n	5f\n\t"
24933 #endif /* __GNUC__ */
24934         "\n4:\n\t"
24935         /* Square: Start */
24936         "ldr	r6, [%[a]]\n\t"
24937         "umull	r6, r8, r6, r6\n\t"
24938         "adds	r3, r3, r6\n\t"
24939         "adcs	r4, r4, r8\n\t"
24940         "adc	r5, r5, %[r]\n\t"
24941         /* Square: Done */
24942         "\n5:\n\t"
24943         "add	%[a], %[a], #4\n\t"
24944         "sub	r2, r2, #4\n\t"
24945         "mov	r6, #48\n\t"
24946         "add	r6, r6, r10\n\t"
24947         "cmp	%[a], r6\n\t"
24948 #ifdef __GNUC__
24949         "beq	3f\n\t"
24950 #else
24951         "beq.n	3f\n\t"
24952 #endif /* __GNUC__ */
24953         "cmp	%[a], r2\n\t"
24954 #ifdef __GNUC__
24955         "bgt	3f\n\t"
24956 #else
24957         "bgt.n	3f\n\t"
24958 #endif /* __GNUC__ */
24959         "mov	r8, r9\n\t"
24960         "add	r8, r8, r10\n\t"
24961         "cmp	%[a], r8\n\t"
24962 #ifdef __GNUC__
24963         "ble	2b\n\t"
24964 #else
24965         "ble.n	2b\n\t"
24966 #endif /* __GNUC__ */
24967         "\n3:\n\t"
24968         "mov	%[r], r11\n\t"
24969         "mov	r8, r9\n\t"
24970         "str	r3, [%[r], r8]\n\t"
24971         "mov	r3, r4\n\t"
24972         "mov	r4, r5\n\t"
24973         "mov	r5, #0\n\t"
24974         "add	r8, r8, #4\n\t"
24975         "mov	r9, r8\n\t"
24976         "mov	r6, #88\n\t"
24977         "cmp	r8, r6\n\t"
24978 #ifdef __GNUC__
24979         "ble	1b\n\t"
24980 #else
24981         "ble.n	1b\n\t"
24982 #endif /* __GNUC__ */
24983         "mov	%[a], r10\n\t"
24984         "str	r3, [%[r], r8]\n\t"
24985         "mov	%[r], r12\n\t"
24986         "mov	%[a], r11\n\t"
24987         "mov	r3, #92\n\t"
24988         "\n4:\n\t"
24989         "ldr	r6, [%[a], r3]\n\t"
24990         "str	r6, [%[r], r3]\n\t"
24991         "subs	r3, r3, #4\n\t"
24992 #ifdef __GNUC__
24993         "bge	4b\n\t"
24994 #else
24995         "bge.n	4b\n\t"
24996 #endif /* __GNUC__ */
24997         "mov	r6, #96\n\t"
24998         "add	sp, sp, r6\n\t"
24999         :
25000         : [r] "r" (r), [a] "r" (a)
25001         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
25002     );
25003 }
25004 
25005 #ifdef WOLFSSL_SP_SMALL
25006 /* Add b to a into r. (r = a + b)
25007  *
25008  * r  A single precision integer.
25009  * a  A single precision integer.
25010  * b  A single precision integer.
25011  */
sp_384_add_12(sp_digit * r,const sp_digit * a,const sp_digit * b)25012 SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
25013         const sp_digit* b)
25014 {
25015     sp_digit c = 0;
25016 
25017     __asm__ __volatile__ (
25018         "mov	r6, %[a]\n\t"
25019         "mov	r8, #0\n\t"
25020         "add	r6, r6, #48\n\t"
25021         "sub	r8, r8, #1\n\t"
25022         "\n1:\n\t"
25023         "adds	%[c], %[c], r8\n\t"
25024         "ldr	r4, [%[a]]\n\t"
25025         "ldr	r5, [%[b]]\n\t"
25026         "adcs	r4, r4, r5\n\t"
25027         "str	r4, [%[r]]\n\t"
25028         "mov	%[c], #0\n\t"
25029         "adc	%[c], %[c], %[c]\n\t"
25030         "add	%[a], %[a], #4\n\t"
25031         "add	%[b], %[b], #4\n\t"
25032         "add	%[r], %[r], #4\n\t"
25033         "cmp	%[a], r6\n\t"
25034 #ifdef __GNUC__
25035         "bne	1b\n\t"
25036 #else
25037         "bne.n	1b\n\t"
25038 #endif /* __GNUC__ */
25039         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
25040         :
25041         : "memory", "r4", "r5", "r6", "r8"
25042     );
25043 
25044     return c;
25045 }
25046 
25047 #else
25048 /* Add b to a into r. (r = a + b)
25049  *
25050  * r  A single precision integer.
25051  * a  A single precision integer.
25052  * b  A single precision integer.
25053  */
sp_384_add_12(sp_digit * r,const sp_digit * a,const sp_digit * b)25054 SP_NOINLINE static sp_digit sp_384_add_12(sp_digit* r, const sp_digit* a,
25055         const sp_digit* b)
25056 {
25057     sp_digit c = 0;
25058 
25059     __asm__ __volatile__ (
25060         "ldm	%[a]!, {r4, r5}\n\t"
25061         "ldm	%[b]!, {r6, r8}\n\t"
25062         "adds	r4, r4, r6\n\t"
25063         "adcs	r5, r5, r8\n\t"
25064         "stm	%[r]!, {r4, r5}\n\t"
25065         "ldm	%[a]!, {r4, r5}\n\t"
25066         "ldm	%[b]!, {r6, r8}\n\t"
25067         "adcs	r4, r4, r6\n\t"
25068         "adcs	r5, r5, r8\n\t"
25069         "stm	%[r]!, {r4, r5}\n\t"
25070         "ldm	%[a]!, {r4, r5}\n\t"
25071         "ldm	%[b]!, {r6, r8}\n\t"
25072         "adcs	r4, r4, r6\n\t"
25073         "adcs	r5, r5, r8\n\t"
25074         "stm	%[r]!, {r4, r5}\n\t"
25075         "ldm	%[a]!, {r4, r5}\n\t"
25076         "ldm	%[b]!, {r6, r8}\n\t"
25077         "adcs	r4, r4, r6\n\t"
25078         "adcs	r5, r5, r8\n\t"
25079         "stm	%[r]!, {r4, r5}\n\t"
25080         "ldm	%[a]!, {r4, r5}\n\t"
25081         "ldm	%[b]!, {r6, r8}\n\t"
25082         "adcs	r4, r4, r6\n\t"
25083         "adcs	r5, r5, r8\n\t"
25084         "stm	%[r]!, {r4, r5}\n\t"
25085         "ldm	%[a]!, {r4, r5}\n\t"
25086         "ldm	%[b]!, {r6, r8}\n\t"
25087         "adcs	r4, r4, r6\n\t"
25088         "adcs	r5, r5, r8\n\t"
25089         "stm	%[r]!, {r4, r5}\n\t"
25090         "mov	%[c], #0\n\t"
25091         "adc	%[c], %[c], %[c]\n\t"
25092         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
25093         :
25094         : "memory", "r4", "r5", "r6", "r8"
25095     );
25096 
25097     return c;
25098 }
25099 
25100 #endif /* WOLFSSL_SP_SMALL */
25101 #ifdef WOLFSSL_SP_SMALL
25102 /* Sub b from a into r. (r = a - b)
25103  *
25104  * r  A single precision integer.
25105  * a  A single precision integer.
25106  * b  A single precision integer.
25107  */
sp_384_sub_12(sp_digit * r,const sp_digit * a,const sp_digit * b)25108 SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
25109         const sp_digit* b)
25110 {
25111     sp_digit c = 0;
25112 
25113     __asm__ __volatile__ (
25114         "mov	r6, %[a]\n\t"
25115         "add	r6, r6, #48\n\t"
25116         "\n1:\n\t"
25117         "mov	r5, #0\n\t"
25118         "subs	r5, r5, %[c]\n\t"
25119         "ldr	r4, [%[a]]\n\t"
25120         "ldr	r5, [%[b]]\n\t"
25121         "sbcs	r4, r4, r5\n\t"
25122         "str	r4, [%[r]]\n\t"
25123         "sbc	%[c], %[c], %[c]\n\t"
25124         "add	%[a], %[a], #4\n\t"
25125         "add	%[b], %[b], #4\n\t"
25126         "add	%[r], %[r], #4\n\t"
25127         "cmp	%[a], r6\n\t"
25128 #ifdef __GNUC__
25129         "bne	1b\n\t"
25130 #else
25131         "bne.n	1b\n\t"
25132 #endif /* __GNUC__ */
25133         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
25134         :
25135         : "memory", "r4", "r5", "r6"
25136     );
25137 
25138     return c;
25139 }
25140 
25141 #else
25142 /* Sub b from a into r. (r = a - b)
25143  *
25144  * r  A single precision integer.
25145  * a  A single precision integer.
25146  * b  A single precision integer.
25147  */
sp_384_sub_12(sp_digit * r,const sp_digit * a,const sp_digit * b)25148 SP_NOINLINE static sp_digit sp_384_sub_12(sp_digit* r, const sp_digit* a,
25149         const sp_digit* b)
25150 {
25151     sp_digit c = 0;
25152 
25153     __asm__ __volatile__ (
25154         "ldr	r4, [%[a], #0]\n\t"
25155         "ldr	r5, [%[a], #4]\n\t"
25156         "ldr	r6, [%[b], #0]\n\t"
25157         "ldr	r8, [%[b], #4]\n\t"
25158         "subs	r4, r4, r6\n\t"
25159         "sbcs	r5, r5, r8\n\t"
25160         "str	r4, [%[r], #0]\n\t"
25161         "str	r5, [%[r], #4]\n\t"
25162         "ldr	r4, [%[a], #8]\n\t"
25163         "ldr	r5, [%[a], #12]\n\t"
25164         "ldr	r6, [%[b], #8]\n\t"
25165         "ldr	r8, [%[b], #12]\n\t"
25166         "sbcs	r4, r4, r6\n\t"
25167         "sbcs	r5, r5, r8\n\t"
25168         "str	r4, [%[r], #8]\n\t"
25169         "str	r5, [%[r], #12]\n\t"
25170         "ldr	r4, [%[a], #16]\n\t"
25171         "ldr	r5, [%[a], #20]\n\t"
25172         "ldr	r6, [%[b], #16]\n\t"
25173         "ldr	r8, [%[b], #20]\n\t"
25174         "sbcs	r4, r4, r6\n\t"
25175         "sbcs	r5, r5, r8\n\t"
25176         "str	r4, [%[r], #16]\n\t"
25177         "str	r5, [%[r], #20]\n\t"
25178         "ldr	r4, [%[a], #24]\n\t"
25179         "ldr	r5, [%[a], #28]\n\t"
25180         "ldr	r6, [%[b], #24]\n\t"
25181         "ldr	r8, [%[b], #28]\n\t"
25182         "sbcs	r4, r4, r6\n\t"
25183         "sbcs	r5, r5, r8\n\t"
25184         "str	r4, [%[r], #24]\n\t"
25185         "str	r5, [%[r], #28]\n\t"
25186         "ldr	r4, [%[a], #32]\n\t"
25187         "ldr	r5, [%[a], #36]\n\t"
25188         "ldr	r6, [%[b], #32]\n\t"
25189         "ldr	r8, [%[b], #36]\n\t"
25190         "sbcs	r4, r4, r6\n\t"
25191         "sbcs	r5, r5, r8\n\t"
25192         "str	r4, [%[r], #32]\n\t"
25193         "str	r5, [%[r], #36]\n\t"
25194         "ldr	r4, [%[a], #40]\n\t"
25195         "ldr	r5, [%[a], #44]\n\t"
25196         "ldr	r6, [%[b], #40]\n\t"
25197         "ldr	r8, [%[b], #44]\n\t"
25198         "sbcs	r4, r4, r6\n\t"
25199         "sbcs	r5, r5, r8\n\t"
25200         "str	r4, [%[r], #40]\n\t"
25201         "str	r5, [%[r], #44]\n\t"
25202         "sbc	%[c], %[c], %[c]\n\t"
25203         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
25204         :
25205         : "memory", "r4", "r5", "r6", "r8"
25206     );
25207 
25208     return c;
25209 }
25210 
25211 #endif /* WOLFSSL_SP_SMALL */
25212 /* Multiply a number by Montgomery normalizer mod modulus (prime).
25213  *
25214  * r  The resulting Montgomery form number.
25215  * a  The number to convert.
25216  * m  The modulus (prime).
25217  * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
25218  */
sp_384_mod_mul_norm_12(sp_digit * r,const sp_digit * a,const sp_digit * m)25219 static int sp_384_mod_mul_norm_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
25220 {
25221 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
25222     int64_t* t = NULL;
25223 #else
25224     int64_t t[12];
25225 #endif
25226     int64_t o;
25227     int err = MP_OKAY;
25228 
25229     (void)m;
25230 
25231 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
25232     t = (int64_t*)XMALLOC(sizeof(int64_t) * 12, NULL, DYNAMIC_TYPE_ECC);
25233     if (t == NULL) {
25234         err = MEMORY_E;
25235     }
25236 #endif
25237 
25238     if (err == MP_OKAY) {
25239         /*  1  0  0  0  0  0  0  0  1  1  0 -1 */
25240         t[0] = 0 + (int64_t)a[0] + (int64_t)a[8] + (int64_t)a[9] - (int64_t)a[11];
25241         /* -1  1  0  0  0  0  0  0 -1  0  1  1 */
25242         t[1] = 0 - (int64_t)a[0] + (int64_t)a[1] - (int64_t)a[8] + (int64_t)a[10] + (int64_t)a[11];
25243         /*  0 -1  1  0  0  0  0  0  0 -1  0  1 */
25244         t[2] = 0 - (int64_t)a[1] + (int64_t)a[2] - (int64_t)a[9] + (int64_t)a[11];
25245         /*  1  0 -1  1  0  0  0  0  1  1 -1 -1 */
25246         t[3] = 0 + (int64_t)a[0] - (int64_t)a[2] + (int64_t)a[3] + (int64_t)a[8] + (int64_t)a[9] - (int64_t)a[10] - (int64_t)a[11];
25247         /*  1  1  0 -1  1  0  0  0  1  2  1 -2 */
25248         t[4] = 0 + (int64_t)a[0] + (int64_t)a[1] - (int64_t)a[3] + (int64_t)a[4] + (int64_t)a[8] + 2 * (int64_t)a[9] + (int64_t)a[10] -  2 * (int64_t)a[11];
25249         /*  0  1  1  0 -1  1  0  0  0  1  2  1 */
25250         t[5] = 0 + (int64_t)a[1] + (int64_t)a[2] - (int64_t)a[4] + (int64_t)a[5] + (int64_t)a[9] + 2 * (int64_t)a[10] + (int64_t)a[11];
25251         /*  0  0  1  1  0 -1  1  0  0  0  1  2 */
25252         t[6] = 0 + (int64_t)a[2] + (int64_t)a[3] - (int64_t)a[5] + (int64_t)a[6] + (int64_t)a[10] + 2 * (int64_t)a[11];
25253         /*  0  0  0  1  1  0 -1  1  0  0  0  1 */
25254         t[7] = 0 + (int64_t)a[3] + (int64_t)a[4] - (int64_t)a[6] + (int64_t)a[7] + (int64_t)a[11];
25255         /*  0  0  0  0  1  1  0 -1  1  0  0  0 */
25256         t[8] = 0 + (int64_t)a[4] + (int64_t)a[5] - (int64_t)a[7] + (int64_t)a[8];
25257         /*  0  0  0  0  0  1  1  0 -1  1  0  0 */
25258         t[9] = 0 + (int64_t)a[5] + (int64_t)a[6] - (int64_t)a[8] + (int64_t)a[9];
25259         /*  0  0  0  0  0  0  1  1  0 -1  1  0 */
25260         t[10] = 0 + (int64_t)a[6] + (int64_t)a[7] - (int64_t)a[9] + (int64_t)a[10];
25261         /*  0  0  0  0  0  0  0  1  1  0 -1  1 */
25262         t[11] = 0 + (int64_t)a[7] + (int64_t)a[8] - (int64_t)a[10] + (int64_t)a[11];
25263 
25264         t[1] += t[0] >> 32; t[0] &= 0xffffffff;
25265         t[2] += t[1] >> 32; t[1] &= 0xffffffff;
25266         t[3] += t[2] >> 32; t[2] &= 0xffffffff;
25267         t[4] += t[3] >> 32; t[3] &= 0xffffffff;
25268         t[5] += t[4] >> 32; t[4] &= 0xffffffff;
25269         t[6] += t[5] >> 32; t[5] &= 0xffffffff;
25270         t[7] += t[6] >> 32; t[6] &= 0xffffffff;
25271         t[8] += t[7] >> 32; t[7] &= 0xffffffff;
25272         t[9] += t[8] >> 32; t[8] &= 0xffffffff;
25273         t[10] += t[9] >> 32; t[9] &= 0xffffffff;
25274         t[11] += t[10] >> 32; t[10] &= 0xffffffff;
25275         o     = t[11] >> 32; t[11] &= 0xffffffff;
25276         t[0] += o;
25277         t[1] -= o;
25278         t[3] += o;
25279         t[4] += o;
25280         t[1] += t[0] >> 32; t[0] &= 0xffffffff;
25281         t[2] += t[1] >> 32; t[1] &= 0xffffffff;
25282         t[3] += t[2] >> 32; t[2] &= 0xffffffff;
25283         t[4] += t[3] >> 32; t[3] &= 0xffffffff;
25284         t[5] += t[4] >> 32; t[4] &= 0xffffffff;
25285         t[6] += t[5] >> 32; t[5] &= 0xffffffff;
25286         t[7] += t[6] >> 32; t[6] &= 0xffffffff;
25287         t[8] += t[7] >> 32; t[7] &= 0xffffffff;
25288         t[9] += t[8] >> 32; t[8] &= 0xffffffff;
25289         t[10] += t[9] >> 32; t[9] &= 0xffffffff;
25290         t[11] += t[10] >> 32; t[10] &= 0xffffffff;
25291 
25292         r[0] = t[0];
25293         r[1] = t[1];
25294         r[2] = t[2];
25295         r[3] = t[3];
25296         r[4] = t[4];
25297         r[5] = t[5];
25298         r[6] = t[6];
25299         r[7] = t[7];
25300         r[8] = t[8];
25301         r[9] = t[9];
25302         r[10] = t[10];
25303         r[11] = t[11];
25304     }
25305 
25306 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
25307     if (t != NULL)
25308         XFREE(t, NULL, DYNAMIC_TYPE_ECC);
25309 #endif
25310 
25311     return err;
25312 }
25313 
25314 /* Convert an mp_int to an array of sp_digit.
25315  *
25316  * r  A single precision integer.
25317  * size  Maximum number of bytes to convert
25318  * a  A multi-precision integer.
25319  */
sp_384_from_mp(sp_digit * r,int size,const mp_int * a)25320 static void sp_384_from_mp(sp_digit* r, int size, const mp_int* a)
25321 {
25322 #if DIGIT_BIT == 32
25323     int j;
25324 
25325     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
25326 
25327     for (j = a->used; j < size; j++) {
25328         r[j] = 0;
25329     }
25330 #elif DIGIT_BIT > 32
25331     int i;
25332     int j = 0;
25333     word32 s = 0;
25334 
25335     r[0] = 0;
25336     for (i = 0; i < a->used && j < size; i++) {
25337         r[j] |= ((sp_digit)a->dp[i] << s);
25338         r[j] &= 0xffffffff;
25339         s = 32U - s;
25340         if (j + 1 >= size) {
25341             break;
25342         }
25343         /* lint allow cast of mismatch word32 and mp_digit */
25344         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
25345         while ((s + 32U) <= (word32)DIGIT_BIT) {
25346             s += 32U;
25347             r[j] &= 0xffffffff;
25348             if (j + 1 >= size) {
25349                 break;
25350             }
25351             if (s < (word32)DIGIT_BIT) {
25352                 /* lint allow cast of mismatch word32 and mp_digit */
25353                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
25354             }
25355             else {
25356                 r[++j] = (sp_digit)0;
25357             }
25358         }
25359         s = (word32)DIGIT_BIT - s;
25360     }
25361 
25362     for (j++; j < size; j++) {
25363         r[j] = 0;
25364     }
25365 #else
25366     int i;
25367     int j = 0;
25368     int s = 0;
25369 
25370     r[0] = 0;
25371     for (i = 0; i < a->used && j < size; i++) {
25372         r[j] |= ((sp_digit)a->dp[i]) << s;
25373         if (s + DIGIT_BIT >= 32) {
25374             r[j] &= 0xffffffff;
25375             if (j + 1 >= size) {
25376                 break;
25377             }
25378             s = 32 - s;
25379             if (s == DIGIT_BIT) {
25380                 r[++j] = 0;
25381                 s = 0;
25382             }
25383             else {
25384                 r[++j] = a->dp[i] >> s;
25385                 s = DIGIT_BIT - s;
25386             }
25387         }
25388         else {
25389             s += DIGIT_BIT;
25390         }
25391     }
25392 
25393     for (j++; j < size; j++) {
25394         r[j] = 0;
25395     }
25396 #endif
25397 }
25398 
25399 /* Convert a point of type ecc_point to type sp_point_384.
25400  *
25401  * p   Point of type sp_point_384 (result).
25402  * pm  Point of type ecc_point.
25403  */
sp_384_point_from_ecc_point_12(sp_point_384 * p,const ecc_point * pm)25404 static void sp_384_point_from_ecc_point_12(sp_point_384* p,
25405         const ecc_point* pm)
25406 {
25407     XMEMSET(p->x, 0, sizeof(p->x));
25408     XMEMSET(p->y, 0, sizeof(p->y));
25409     XMEMSET(p->z, 0, sizeof(p->z));
25410     sp_384_from_mp(p->x, 12, pm->x);
25411     sp_384_from_mp(p->y, 12, pm->y);
25412     sp_384_from_mp(p->z, 12, pm->z);
25413     p->infinity = 0;
25414 }
25415 
25416 /* Convert an array of sp_digit to an mp_int.
25417  *
25418  * a  A single precision integer.
25419  * r  A multi-precision integer.
25420  */
sp_384_to_mp(const sp_digit * a,mp_int * r)25421 static int sp_384_to_mp(const sp_digit* a, mp_int* r)
25422 {
25423     int err;
25424 
25425     err = mp_grow(r, (384 + DIGIT_BIT - 1) / DIGIT_BIT);
25426     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
25427 #if DIGIT_BIT == 32
25428         XMEMCPY(r->dp, a, sizeof(sp_digit) * 12);
25429         r->used = 12;
25430         mp_clamp(r);
25431 #elif DIGIT_BIT < 32
25432         int i;
25433         int j = 0;
25434         int s = 0;
25435 
25436         r->dp[0] = 0;
25437         for (i = 0; i < 12; i++) {
25438             r->dp[j] |= (mp_digit)(a[i] << s);
25439             r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
25440             s = DIGIT_BIT - s;
25441             r->dp[++j] = (mp_digit)(a[i] >> s);
25442             while (s + DIGIT_BIT <= 32) {
25443                 s += DIGIT_BIT;
25444                 r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1;
25445                 if (s == SP_WORD_SIZE) {
25446                     r->dp[j] = 0;
25447                 }
25448                 else {
25449                     r->dp[j] = (mp_digit)(a[i] >> s);
25450                 }
25451             }
25452             s = 32 - s;
25453         }
25454         r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
25455         mp_clamp(r);
25456 #else
25457         int i;
25458         int j = 0;
25459         int s = 0;
25460 
25461         r->dp[0] = 0;
25462         for (i = 0; i < 12; i++) {
25463             r->dp[j] |= ((mp_digit)a[i]) << s;
25464             if (s + 32 >= DIGIT_BIT) {
25465     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
25466                 r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
25467     #endif
25468                 s = DIGIT_BIT - s;
25469                 r->dp[++j] = a[i] >> s;
25470                 s = 32 - s;
25471             }
25472             else {
25473                 s += 32;
25474             }
25475         }
25476         r->used = (384 + DIGIT_BIT - 1) / DIGIT_BIT;
25477         mp_clamp(r);
25478 #endif
25479     }
25480 
25481     return err;
25482 }
25483 
25484 /* Convert a point of type sp_point_384 to type ecc_point.
25485  *
25486  * p   Point of type sp_point_384.
25487  * pm  Point of type ecc_point (result).
25488  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
25489  * MP_OKAY.
25490  */
sp_384_point_to_ecc_point_12(const sp_point_384 * p,ecc_point * pm)25491 static int sp_384_point_to_ecc_point_12(const sp_point_384* p, ecc_point* pm)
25492 {
25493     int err;
25494 
25495     err = sp_384_to_mp(p->x, pm->x);
25496     if (err == MP_OKAY) {
25497         err = sp_384_to_mp(p->y, pm->y);
25498     }
25499     if (err == MP_OKAY) {
25500         err = sp_384_to_mp(p->z, pm->z);
25501     }
25502 
25503     return err;
25504 }
25505 
25506 /* Conditionally subtract b from a using the mask m.
25507  * m is -1 to subtract and 0 when not copying.
25508  *
25509  * r  A single precision number representing condition subtract result.
25510  * a  A single precision number to subtract from.
25511  * b  A single precision number to subtract.
25512  * m  Mask value to apply.
25513  */
sp_384_cond_sub_12(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)25514 SP_NOINLINE static sp_digit sp_384_cond_sub_12(sp_digit* r, const sp_digit* a,
25515         const sp_digit* b, sp_digit m)
25516 {
25517     sp_digit c = 0;
25518 
25519     __asm__ __volatile__ (
25520         "mov	r5, #48\n\t"
25521         "mov	r9, r5\n\t"
25522         "mov	r8, #0\n\t"
25523         "\n1:\n\t"
25524         "ldr	r6, [%[b], r8]\n\t"
25525         "and	r6, r6, %[m]\n\t"
25526         "mov	r5, #0\n\t"
25527         "subs	r5, r5, %[c]\n\t"
25528         "ldr	r5, [%[a], r8]\n\t"
25529         "sbcs	r5, r5, r6\n\t"
25530         "sbcs	%[c], %[c], %[c]\n\t"
25531         "str	r5, [%[r], r8]\n\t"
25532         "add	r8, r8, #4\n\t"
25533         "cmp	r8, r9\n\t"
25534 #ifdef __GNUC__
25535         "blt	1b\n\t"
25536 #else
25537         "blt.n	1b\n\t"
25538 #endif /* __GNUC__ */
25539         : [c] "+r" (c)
25540         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
25541         : "memory", "r5", "r6", "r8", "r9"
25542     );
25543 
25544     return c;
25545 }
25546 
25547 #define sp_384_mont_reduce_order_12   sp_384_mont_reduce_12
25548 
25549 /* Reduce the number back to 384 bits using Montgomery reduction.
25550  *
25551  * a   A single precision number to reduce in place.
25552  * m   The single precision number representing the modulus.
25553  * mp  The digit representing the negative inverse of m mod 2^n.
25554  */
sp_384_mont_reduce_12(sp_digit * a,const sp_digit * m,sp_digit mp)25555 SP_NOINLINE static void sp_384_mont_reduce_12(sp_digit* a, const sp_digit* m,
25556         sp_digit mp)
25557 {
25558     sp_digit ca = 0;
25559 
25560     __asm__ __volatile__ (
25561         "mov	r9, %[mp]\n\t"
25562         "mov	r12, %[m]\n\t"
25563         "mov	r10, %[a]\n\t"
25564         "mov	r4, #0\n\t"
25565         "add	r11, r10, #48\n\t"
25566         "\n1:\n\t"
25567         /* mu = a[i] * mp */
25568         "mov	%[mp], r9\n\t"
25569         "ldr	%[a], [r10]\n\t"
25570         "mul	%[mp], %[mp], %[a]\n\t"
25571         "mov	%[m], r12\n\t"
25572         "add	r14, r10, #40\n\t"
25573         "\n2:\n\t"
25574         /* a[i+j] += m[j] * mu */
25575         "ldr	%[a], [r10]\n\t"
25576         "mov	r5, #0\n\t"
25577         /* Multiply m[j] and mu - Start */
25578         "ldr	r8, [%[m]], #4\n\t"
25579         "umull	r6, r8, %[mp], r8\n\t"
25580         "adds	%[a], %[a], r6\n\t"
25581         "adc	r5, r5, r8\n\t"
25582         /* Multiply m[j] and mu - Done */
25583         "adds	r4, r4, %[a]\n\t"
25584         "adc	r5, r5, #0\n\t"
25585         "str	r4, [r10], #4\n\t"
25586         /* a[i+j+1] += m[j+1] * mu */
25587         "ldr	%[a], [r10]\n\t"
25588         "mov	r4, #0\n\t"
25589         /* Multiply m[j] and mu - Start */
25590         "ldr	r8, [%[m]], #4\n\t"
25591         "umull	r6, r8, %[mp], r8\n\t"
25592         "adds	%[a], %[a], r6\n\t"
25593         "adc	r4, r4, r8\n\t"
25594         /* Multiply m[j] and mu - Done */
25595         "adds	r5, r5, %[a]\n\t"
25596         "adc	r4, r4, #0\n\t"
25597         "str	r5, [r10], #4\n\t"
25598         "cmp	r10, r14\n\t"
25599 #ifdef __GNUC__
25600         "blt	2b\n\t"
25601 #else
25602         "blt.n	2b\n\t"
25603 #endif /* __GNUC__ */
25604         /* a[i+10] += m[10] * mu */
25605         "ldr	%[a], [r10]\n\t"
25606         "mov	r5, #0\n\t"
25607         /* Multiply m[j] and mu - Start */
25608         "ldr	r8, [%[m]], #4\n\t"
25609         "umull	r6, r8, %[mp], r8\n\t"
25610         "adds	%[a], %[a], r6\n\t"
25611         "adc	r5, r5, r8\n\t"
25612         /* Multiply m[j] and mu - Done */
25613         "adds	r4, r4, %[a]\n\t"
25614         "adc	r5, r5, #0\n\t"
25615         "str	r4, [r10], #4\n\t"
25616         /* a[i+11] += m[11] * mu */
25617         "mov	r4, %[ca]\n\t"
25618         "mov	%[ca], #0\n\t"
25619         /* Multiply m[11] and mu - Start */
25620         "ldr	r8, [%[m]]\n\t"
25621         "umull	r6, r8, %[mp], r8\n\t"
25622         "adds	r5, r5, r6\n\t"
25623         "adcs 	r4, r4, r8\n\t"
25624         "adc	%[ca], %[ca], #0\n\t"
25625         /* Multiply m[11] and mu - Done */
25626         "ldr	r6, [r10]\n\t"
25627         "ldr	r8, [r10, #4]\n\t"
25628         "adds	r6, r6, r5\n\t"
25629         "adcs	r8, r8, r4\n\t"
25630         "adc	%[ca], %[ca], #0\n\t"
25631         "str	r6, [r10]\n\t"
25632         "str	r8, [r10, #4]\n\t"
25633         /* Next word in a */
25634         "sub	r10, r10, #40\n\t"
25635         "cmp	r10, r11\n\t"
25636 #ifdef __GNUC__
25637         "blt	1b\n\t"
25638 #else
25639         "blt.n	1b\n\t"
25640 #endif /* __GNUC__ */
25641         "mov	%[a], r10\n\t"
25642         "mov	%[m], r12\n\t"
25643         : [ca] "+r" (ca), [a] "+r" (a)
25644         : [m] "r" (m), [mp] "r" (mp)
25645         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
25646     );
25647 
25648     sp_384_cond_sub_12(a - 12, a, m, (sp_digit)0 - ca);
25649 }
25650 
25651 /* Multiply two Montgomery form numbers mod the modulus (prime).
25652  * (r = a * b mod m)
25653  *
25654  * r   Result of multiplication.
25655  * a   First number to multiply in Montgomery form.
25656  * b   Second number to multiply in Montgomery form.
25657  * m   Modulus (prime).
25658  * mp  Montgomery mulitplier.
25659  */
sp_384_mont_mul_12(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)25660 static void sp_384_mont_mul_12(sp_digit* r, const sp_digit* a,
25661         const sp_digit* b, const sp_digit* m, sp_digit mp)
25662 {
25663     sp_384_mul_12(r, a, b);
25664     sp_384_mont_reduce_12(r, m, mp);
25665 }
25666 
25667 /* Square the Montgomery form number. (r = a * a mod m)
25668  *
25669  * r   Result of squaring.
25670  * a   Number to square in Montgomery form.
25671  * m   Modulus (prime).
25672  * mp  Montgomery mulitplier.
25673  */
sp_384_mont_sqr_12(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)25674 static void sp_384_mont_sqr_12(sp_digit* r, const sp_digit* a,
25675         const sp_digit* m, sp_digit mp)
25676 {
25677     sp_384_sqr_12(r, a);
25678     sp_384_mont_reduce_12(r, m, mp);
25679 }
25680 
25681 #if !defined(WOLFSSL_SP_SMALL) || defined(HAVE_COMP_KEY)
25682 /* Square the Montgomery form number a number of times. (r = a ^ n mod m)
25683  *
25684  * r   Result of squaring.
25685  * a   Number to square in Montgomery form.
25686  * n   Number of times to square.
25687  * m   Modulus (prime).
25688  * mp  Montgomery mulitplier.
25689  */
sp_384_mont_sqr_n_12(sp_digit * r,const sp_digit * a,int n,const sp_digit * m,sp_digit mp)25690 static void sp_384_mont_sqr_n_12(sp_digit* r, const sp_digit* a, int n,
25691         const sp_digit* m, sp_digit mp)
25692 {
25693     sp_384_mont_sqr_12(r, a, m, mp);
25694     for (; n > 1; n--) {
25695         sp_384_mont_sqr_12(r, r, m, mp);
25696     }
25697 }
25698 
25699 #endif /* !WOLFSSL_SP_SMALL | HAVE_COMP_KEY */
25700 #ifdef WOLFSSL_SP_SMALL
25701 /* Mod-2 for the P384 curve. */
25702 static const uint32_t p384_mod_minus_2[12] = {
25703     0xfffffffdU,0x00000000U,0x00000000U,0xffffffffU,0xfffffffeU,0xffffffffU,
25704     0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
25705 };
25706 #endif /* !WOLFSSL_SP_SMALL */
25707 
25708 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
25709  * P384 curve. (r = 1 / a mod m)
25710  *
25711  * r   Inverse result.
25712  * a   Number to invert.
25713  * td  Temporary data.
25714  */
sp_384_mont_inv_12(sp_digit * r,const sp_digit * a,sp_digit * td)25715 static void sp_384_mont_inv_12(sp_digit* r, const sp_digit* a, sp_digit* td)
25716 {
25717 #ifdef WOLFSSL_SP_SMALL
25718     sp_digit* t = td;
25719     int i;
25720 
25721     XMEMCPY(t, a, sizeof(sp_digit) * 12);
25722     for (i=382; i>=0; i--) {
25723         sp_384_mont_sqr_12(t, t, p384_mod, p384_mp_mod);
25724         if (p384_mod_minus_2[i / 32] & ((sp_digit)1 << (i % 32)))
25725             sp_384_mont_mul_12(t, t, a, p384_mod, p384_mp_mod);
25726     }
25727     XMEMCPY(r, t, sizeof(sp_digit) * 12);
25728 #else
25729     sp_digit* t1 = td;
25730     sp_digit* t2 = td + 2 * 12;
25731     sp_digit* t3 = td + 4 * 12;
25732     sp_digit* t4 = td + 6 * 12;
25733     sp_digit* t5 = td + 8 * 12;
25734 
25735     /* 0x2 */
25736     sp_384_mont_sqr_12(t1, a, p384_mod, p384_mp_mod);
25737     /* 0x3 */
25738     sp_384_mont_mul_12(t5, t1, a, p384_mod, p384_mp_mod);
25739     /* 0xc */
25740     sp_384_mont_sqr_n_12(t1, t5, 2, p384_mod, p384_mp_mod);
25741     /* 0xf */
25742     sp_384_mont_mul_12(t2, t5, t1, p384_mod, p384_mp_mod);
25743     /* 0x1e */
25744     sp_384_mont_sqr_12(t1, t2, p384_mod, p384_mp_mod);
25745     /* 0x1f */
25746     sp_384_mont_mul_12(t4, t1, a, p384_mod, p384_mp_mod);
25747     /* 0x3e0 */
25748     sp_384_mont_sqr_n_12(t1, t4, 5, p384_mod, p384_mp_mod);
25749     /* 0x3ff */
25750     sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
25751     /* 0x7fe0 */
25752     sp_384_mont_sqr_n_12(t1, t2, 5, p384_mod, p384_mp_mod);
25753     /* 0x7fff */
25754     sp_384_mont_mul_12(t4, t4, t1, p384_mod, p384_mp_mod);
25755     /* 0x3fff8000 */
25756     sp_384_mont_sqr_n_12(t1, t4, 15, p384_mod, p384_mp_mod);
25757     /* 0x3fffffff */
25758     sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
25759     /* 0xfffffffc */
25760     sp_384_mont_sqr_n_12(t3, t2, 2, p384_mod, p384_mp_mod);
25761     /* 0xfffffffd */
25762     sp_384_mont_mul_12(r, t3, a, p384_mod, p384_mp_mod);
25763     /* 0xffffffff */
25764     sp_384_mont_mul_12(t3, t5, t3, p384_mod, p384_mp_mod);
25765     /* 0xfffffffc0000000 */
25766     sp_384_mont_sqr_n_12(t1, t2, 30, p384_mod, p384_mp_mod);
25767     /* 0xfffffffffffffff */
25768     sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
25769     /* 0xfffffffffffffff000000000000000 */
25770     sp_384_mont_sqr_n_12(t1, t2, 60, p384_mod, p384_mp_mod);
25771     /* 0xffffffffffffffffffffffffffffff */
25772     sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
25773     /* 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
25774     sp_384_mont_sqr_n_12(t1, t2, 120, p384_mod, p384_mp_mod);
25775     /* 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
25776     sp_384_mont_mul_12(t2, t2, t1, p384_mod, p384_mp_mod);
25777     /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
25778     sp_384_mont_sqr_n_12(t1, t2, 15, p384_mod, p384_mp_mod);
25779     /* 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
25780     sp_384_mont_mul_12(t2, t4, t1, p384_mod, p384_mp_mod);
25781     /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe00000000 */
25782     sp_384_mont_sqr_n_12(t1, t2, 33, p384_mod, p384_mp_mod);
25783     /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff */
25784     sp_384_mont_mul_12(t2, t3, t1, p384_mod, p384_mp_mod);
25785     /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff000000000000000000000000 */
25786     sp_384_mont_sqr_n_12(t1, t2, 96, p384_mod, p384_mp_mod);
25787     /* 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000fffffffd */
25788     sp_384_mont_mul_12(r, r, t1, p384_mod, p384_mp_mod);
25789 
25790 #endif /* WOLFSSL_SP_SMALL */
25791 }
25792 
25793 /* Compare a with b in constant time.
25794  *
25795  * a  A single precision integer.
25796  * b  A single precision integer.
25797  * return -ve, 0 or +ve if a is less than, equal to or greater than b
25798  * respectively.
25799  */
sp_384_cmp_12(const sp_digit * a,const sp_digit * b)25800 SP_NOINLINE static sp_int32 sp_384_cmp_12(const sp_digit* a, const sp_digit* b)
25801 {
25802     sp_digit r = 0;
25803 
25804 
25805     __asm__ __volatile__ (
25806         "mov	r3, #0\n\t"
25807         "mvn	r3, r3\n\t"
25808         "mov	r6, #44\n\t"
25809         "\n1:\n\t"
25810         "ldr	r8, [%[a], r6]\n\t"
25811         "ldr	r5, [%[b], r6]\n\t"
25812         "and	r8, r8, r3\n\t"
25813         "and	r5, r5, r3\n\t"
25814         "mov	r4, r8\n\t"
25815         "subs	r8, r8, r5\n\t"
25816         "sbc	r8, r8, r8\n\t"
25817         "add	%[r], %[r], r8\n\t"
25818         "mvn	r8, r8\n\t"
25819         "and	r3, r3, r8\n\t"
25820         "subs	r5, r5, r4\n\t"
25821         "sbc	r8, r8, r8\n\t"
25822         "sub	%[r], %[r], r8\n\t"
25823         "mvn	r8, r8\n\t"
25824         "and	r3, r3, r8\n\t"
25825         "sub	r6, r6, #4\n\t"
25826         "cmp	r6, #0\n\t"
25827 #ifdef __GNUC__
25828         "bge	1b\n\t"
25829 #else
25830         "bge.n	1b\n\t"
25831 #endif /* __GNUC__ */
25832         : [r] "+r" (r)
25833         : [a] "r" (a), [b] "r" (b)
25834         : "r3", "r4", "r5", "r6", "r8"
25835     );
25836 
25837     return r;
25838 }
25839 
25840 /* Normalize the values in each word to 32.
25841  *
25842  * a  Array of sp_digit to normalize.
25843  */
25844 #define sp_384_norm_12(a)
25845 
25846 /* Map the Montgomery form projective coordinate point to an affine point.
25847  *
25848  * r  Resulting affine coordinate point.
25849  * p  Montgomery form projective coordinate point.
25850  * t  Temporary ordinate data.
25851  */
sp_384_map_12(sp_point_384 * r,const sp_point_384 * p,sp_digit * t)25852 static void sp_384_map_12(sp_point_384* r, const sp_point_384* p,
25853     sp_digit* t)
25854 {
25855     sp_digit* t1 = t;
25856     sp_digit* t2 = t + 2*12;
25857     sp_int32 n;
25858 
25859     sp_384_mont_inv_12(t1, p->z, t + 2*12);
25860 
25861     sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
25862     sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
25863 
25864     /* x /= z^2 */
25865     sp_384_mont_mul_12(r->x, p->x, t2, p384_mod, p384_mp_mod);
25866     XMEMSET(r->x + 12, 0, sizeof(r->x) / 2U);
25867     sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod);
25868     /* Reduce x to less than modulus */
25869     n = sp_384_cmp_12(r->x, p384_mod);
25870     sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ?
25871                 (sp_digit)1 : (sp_digit)0));
25872     sp_384_norm_12(r->x);
25873 
25874     /* y /= z^3 */
25875     sp_384_mont_mul_12(r->y, p->y, t1, p384_mod, p384_mp_mod);
25876     XMEMSET(r->y + 12, 0, sizeof(r->y) / 2U);
25877     sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod);
25878     /* Reduce y to less than modulus */
25879     n = sp_384_cmp_12(r->y, p384_mod);
25880     sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ?
25881                 (sp_digit)1 : (sp_digit)0));
25882     sp_384_norm_12(r->y);
25883 
25884     XMEMSET(r->z, 0, sizeof(r->z));
25885     r->z[0] = 1;
25886 
25887 }
25888 
25889 /* Add two Montgomery form numbers (r = a + b % m).
25890  *
25891  * r   Result of addition.
25892  * a   First number to add in Montgomery form.
25893  * b   Second number to add in Montgomery form.
25894  * m   Modulus (prime).
25895  */
sp_384_mont_add_12(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m)25896 SP_NOINLINE static void sp_384_mont_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
25897         const sp_digit* m)
25898 {
25899     sp_digit o;
25900 
25901     o = sp_384_add_12(r, a, b);
25902     sp_384_cond_sub_12(r, r, m, 0 - o);
25903 }
25904 
25905 /* Double a Montgomery form number (r = a + a % m).
25906  *
25907  * r   Result of doubling.
25908  * a   Number to double in Montgomery form.
25909  * m   Modulus (prime).
25910  */
sp_384_mont_dbl_12(sp_digit * r,const sp_digit * a,const sp_digit * m)25911 SP_NOINLINE static void sp_384_mont_dbl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
25912 {
25913     sp_digit o;
25914 
25915     o = sp_384_add_12(r, a, a);
25916     sp_384_cond_sub_12(r, r, m, 0 - o);
25917 }
25918 
25919 /* Triple a Montgomery form number (r = a + a + a % m).
25920  *
25921  * r   Result of Tripling.
25922  * a   Number to triple in Montgomery form.
25923  * m   Modulus (prime).
25924  */
sp_384_mont_tpl_12(sp_digit * r,const sp_digit * a,const sp_digit * m)25925 SP_NOINLINE static void sp_384_mont_tpl_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
25926 {
25927     sp_digit o;
25928 
25929     o = sp_384_add_12(r, a, a);
25930     sp_384_cond_sub_12(r, r, m, 0 - o);
25931     o = sp_384_add_12(r, r, a);
25932     sp_384_cond_sub_12(r, r, m, 0 - o);
25933 }
25934 
25935 /* Conditionally add a and b using the mask m.
25936  * m is -1 to add and 0 when not.
25937  *
25938  * r  A single precision number representing conditional add result.
25939  * a  A single precision number to add with.
25940  * b  A single precision number to add.
25941  * m  Mask value to apply.
25942  */
sp_384_cond_add_12(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)25943 SP_NOINLINE static sp_digit sp_384_cond_add_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
25944         sp_digit m)
25945 {
25946     sp_digit c = 0;
25947 
25948     __asm__ __volatile__ (
25949         "mov	r5, #48\n\t"
25950         "mov	r9, r5\n\t"
25951         "mov	r8, #0\n\t"
25952         "\n1:\n\t"
25953         "ldr	r6, [%[b], r8]\n\t"
25954         "and	r6, r6, %[m]\n\t"
25955         "adds	r5, %[c], #-1\n\t"
25956         "ldr	r5, [%[a], r8]\n\t"
25957         "adcs	r5, r5, r6\n\t"
25958         "mov	%[c], #0\n\t"
25959         "adcs	%[c], %[c], %[c]\n\t"
25960         "str	r5, [%[r], r8]\n\t"
25961         "add	r8, r8, #4\n\t"
25962         "cmp	r8, r9\n\t"
25963 #ifdef __GNUC__
25964         "blt	1b\n\t"
25965 #else
25966         "blt.n	1b\n\t"
25967 #endif /* __GNUC__ */
25968         : [c] "+r" (c)
25969         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
25970         : "memory", "r5", "r6", "r8", "r9"
25971     );
25972 
25973     return c;
25974 }
25975 
25976 /* Subtract two Montgomery form numbers (r = a - b % m).
25977  *
25978  * r   Result of subtration.
25979  * a   Number to subtract from in Montgomery form.
25980  * b   Number to subtract with in Montgomery form.
25981  * m   Modulus (prime).
25982  */
sp_384_mont_sub_12(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m)25983 SP_NOINLINE static void sp_384_mont_sub_12(sp_digit* r, const sp_digit* a, const sp_digit* b,
25984         const sp_digit* m)
25985 {
25986     sp_digit o;
25987 
25988     o = sp_384_sub_12(r, a, b);
25989     sp_384_cond_add_12(r, r, m, o);
25990 }
25991 
sp_384_rshift1_12(sp_digit * r,sp_digit * a)25992 static void sp_384_rshift1_12(sp_digit* r, sp_digit* a)
25993 {
25994     __asm__ __volatile__ (
25995         "ldr	r2, [%[a]]\n\t"
25996         "ldr	r3, [%[a], #4]\n\t"
25997         "lsr	r2, r2, #1\n\t"
25998         "orr	r2, r2, r3, lsl #31\n\t"
25999         "lsr	r3, r3, #1\n\t"
26000         "ldr	r4, [%[a], #8]\n\t"
26001         "str	r2, [%[r], #0]\n\t"
26002         "orr	r3, r3, r4, lsl #31\n\t"
26003         "lsr	r4, r4, #1\n\t"
26004         "ldr	r2, [%[a], #12]\n\t"
26005         "str	r3, [%[r], #4]\n\t"
26006         "orr	r4, r4, r2, lsl #31\n\t"
26007         "lsr	r2, r2, #1\n\t"
26008         "ldr	r3, [%[a], #16]\n\t"
26009         "str	r4, [%[r], #8]\n\t"
26010         "orr	r2, r2, r3, lsl #31\n\t"
26011         "lsr	r3, r3, #1\n\t"
26012         "ldr	r4, [%[a], #20]\n\t"
26013         "str	r2, [%[r], #12]\n\t"
26014         "orr	r3, r3, r4, lsl #31\n\t"
26015         "lsr	r4, r4, #1\n\t"
26016         "ldr	r2, [%[a], #24]\n\t"
26017         "str	r3, [%[r], #16]\n\t"
26018         "orr	r4, r4, r2, lsl #31\n\t"
26019         "lsr	r2, r2, #1\n\t"
26020         "ldr	r3, [%[a], #28]\n\t"
26021         "str	r4, [%[r], #20]\n\t"
26022         "orr	r2, r2, r3, lsl #31\n\t"
26023         "lsr	r3, r3, #1\n\t"
26024         "ldr	r4, [%[a], #32]\n\t"
26025         "str	r2, [%[r], #24]\n\t"
26026         "orr	r3, r3, r4, lsl #31\n\t"
26027         "lsr	r4, r4, #1\n\t"
26028         "ldr	r2, [%[a], #36]\n\t"
26029         "str	r3, [%[r], #28]\n\t"
26030         "orr	r4, r4, r2, lsl #31\n\t"
26031         "lsr	r2, r2, #1\n\t"
26032         "ldr	r3, [%[a], #40]\n\t"
26033         "str	r4, [%[r], #32]\n\t"
26034         "orr	r2, r2, r3, lsl #31\n\t"
26035         "lsr	r3, r3, #1\n\t"
26036         "ldr	r4, [%[a], #44]\n\t"
26037         "str	r2, [%[r], #36]\n\t"
26038         "orr	r3, r3, r4, lsl #31\n\t"
26039         "lsr	r4, r4, #1\n\t"
26040         "str	r3, [%[r], #40]\n\t"
26041         "str	r4, [%[r], #44]\n\t"
26042         :
26043         : [r] "r" (r), [a] "r" (a)
26044         : "memory", "r2", "r3", "r4"
26045     );
26046 }
26047 
26048 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
26049  *
26050  * r  Result of division by 2.
26051  * a  Number to divide.
26052  * m  Modulus (prime).
26053  */
sp_384_div2_12(sp_digit * r,const sp_digit * a,const sp_digit * m)26054 SP_NOINLINE static void sp_384_div2_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
26055 {
26056     sp_digit o;
26057 
26058     o = sp_384_cond_add_12(r, a, m, 0 - (a[0] & 1));
26059     sp_384_rshift1_12(r, r);
26060     r[11] |= o << 31;
26061 }
26062 
26063 /* Double the Montgomery form projective point p.
26064  *
26065  * r  Result of doubling point.
26066  * p  Point to double.
26067  * t  Temporary ordinate data.
26068  */
26069 #ifdef WOLFSSL_SP_NONBLOCK
26070 typedef struct sp_384_proj_point_dbl_12_ctx {
26071     int state;
26072     sp_digit* t1;
26073     sp_digit* t2;
26074     sp_digit* x;
26075     sp_digit* y;
26076     sp_digit* z;
26077 } sp_384_proj_point_dbl_12_ctx;
26078 
sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t * sp_ctx,sp_point_384 * r,const sp_point_384 * p,sp_digit * t)26079 static int sp_384_proj_point_dbl_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, const sp_point_384* p, sp_digit* t)
26080 {
26081     int err = FP_WOULDBLOCK;
26082     sp_384_proj_point_dbl_12_ctx* ctx = (sp_384_proj_point_dbl_12_ctx*)sp_ctx->data;
26083 
26084     typedef char ctx_size_test[sizeof(sp_384_proj_point_dbl_12_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
26085     (void)sizeof(ctx_size_test);
26086 
26087     switch (ctx->state) {
26088     case 0:
26089         ctx->t1 = t;
26090         ctx->t2 = t + 2*12;
26091         ctx->x = r->x;
26092         ctx->y = r->y;
26093         ctx->z = r->z;
26094 
26095         /* Put infinity into result. */
26096         if (r != p) {
26097             r->infinity = p->infinity;
26098         }
26099         ctx->state = 1;
26100         break;
26101     case 1:
26102         /* T1 = Z * Z */
26103         sp_384_mont_sqr_12(ctx->t1, p->z, p384_mod, p384_mp_mod);
26104         ctx->state = 2;
26105         break;
26106     case 2:
26107         /* Z = Y * Z */
26108         sp_384_mont_mul_12(ctx->z, p->y, p->z, p384_mod, p384_mp_mod);
26109         ctx->state = 3;
26110         break;
26111     case 3:
26112         /* Z = 2Z */
26113         sp_384_mont_dbl_12(ctx->z, ctx->z, p384_mod);
26114         ctx->state = 4;
26115         break;
26116     case 4:
26117         /* T2 = X - T1 */
26118         sp_384_mont_sub_12(ctx->t2, p->x, ctx->t1, p384_mod);
26119         ctx->state = 5;
26120         break;
26121     case 5:
26122         /* T1 = X + T1 */
26123         sp_384_mont_add_12(ctx->t1, p->x, ctx->t1, p384_mod);
26124         ctx->state = 6;
26125         break;
26126     case 6:
26127         /* T2 = T1 * T2 */
26128         sp_384_mont_mul_12(ctx->t2, ctx->t1, ctx->t2, p384_mod, p384_mp_mod);
26129         ctx->state = 7;
26130         break;
26131     case 7:
26132         /* T1 = 3T2 */
26133         sp_384_mont_tpl_12(ctx->t1, ctx->t2, p384_mod);
26134         ctx->state = 8;
26135         break;
26136     case 8:
26137         /* Y = 2Y */
26138         sp_384_mont_dbl_12(ctx->y, p->y, p384_mod);
26139         ctx->state = 9;
26140         break;
26141     case 9:
26142         /* Y = Y * Y */
26143         sp_384_mont_sqr_12(ctx->y, ctx->y, p384_mod, p384_mp_mod);
26144         ctx->state = 10;
26145         break;
26146     case 10:
26147         /* T2 = Y * Y */
26148         sp_384_mont_sqr_12(ctx->t2, ctx->y, p384_mod, p384_mp_mod);
26149         ctx->state = 11;
26150         break;
26151     case 11:
26152         /* T2 = T2/2 */
26153         sp_384_div2_12(ctx->t2, ctx->t2, p384_mod);
26154         ctx->state = 12;
26155         break;
26156     case 12:
26157         /* Y = Y * X */
26158         sp_384_mont_mul_12(ctx->y, ctx->y, p->x, p384_mod, p384_mp_mod);
26159         ctx->state = 13;
26160         break;
26161     case 13:
26162         /* X = T1 * T1 */
26163         sp_384_mont_sqr_12(ctx->x, ctx->t1, p384_mod, p384_mp_mod);
26164         ctx->state = 14;
26165         break;
26166     case 14:
26167         /* X = X - Y */
26168         sp_384_mont_sub_12(ctx->x, ctx->x, ctx->y, p384_mod);
26169         ctx->state = 15;
26170         break;
26171     case 15:
26172         /* X = X - Y */
26173         sp_384_mont_sub_12(ctx->x, ctx->x, ctx->y, p384_mod);
26174         ctx->state = 16;
26175         break;
26176     case 16:
26177         /* Y = Y - X */
26178         sp_384_mont_sub_12(ctx->y, ctx->y, ctx->x, p384_mod);
26179         ctx->state = 17;
26180         break;
26181     case 17:
26182         /* Y = Y * T1 */
26183         sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t1, p384_mod, p384_mp_mod);
26184         ctx->state = 18;
26185         break;
26186     case 18:
26187         /* Y = Y - T2 */
26188         sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t2, p384_mod);
26189         ctx->state = 19;
26190         /* fall-through */
26191     case 19:
26192         err = MP_OKAY;
26193         break;
26194     }
26195 
26196     if (err == MP_OKAY && ctx->state != 19) {
26197         err = FP_WOULDBLOCK;
26198     }
26199 
26200     return err;
26201 }
26202 #endif /* WOLFSSL_SP_NONBLOCK */
26203 
sp_384_proj_point_dbl_12(sp_point_384 * r,const sp_point_384 * p,sp_digit * t)26204 static void sp_384_proj_point_dbl_12(sp_point_384* r, const sp_point_384* p, sp_digit* t)
26205 {
26206     sp_digit* t1 = t;
26207     sp_digit* t2 = t + 2*12;
26208     sp_digit* x;
26209     sp_digit* y;
26210     sp_digit* z;
26211 
26212     x = r->x;
26213     y = r->y;
26214     z = r->z;
26215     /* Put infinity into result. */
26216     if (r != p) {
26217         r->infinity = p->infinity;
26218     }
26219 
26220     /* T1 = Z * Z */
26221     sp_384_mont_sqr_12(t1, p->z, p384_mod, p384_mp_mod);
26222     /* Z = Y * Z */
26223     sp_384_mont_mul_12(z, p->y, p->z, p384_mod, p384_mp_mod);
26224     /* Z = 2Z */
26225     sp_384_mont_dbl_12(z, z, p384_mod);
26226     /* T2 = X - T1 */
26227     sp_384_mont_sub_12(t2, p->x, t1, p384_mod);
26228     /* T1 = X + T1 */
26229     sp_384_mont_add_12(t1, p->x, t1, p384_mod);
26230     /* T2 = T1 * T2 */
26231     sp_384_mont_mul_12(t2, t1, t2, p384_mod, p384_mp_mod);
26232     /* T1 = 3T2 */
26233     sp_384_mont_tpl_12(t1, t2, p384_mod);
26234     /* Y = 2Y */
26235     sp_384_mont_dbl_12(y, p->y, p384_mod);
26236     /* Y = Y * Y */
26237     sp_384_mont_sqr_12(y, y, p384_mod, p384_mp_mod);
26238     /* T2 = Y * Y */
26239     sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
26240     /* T2 = T2/2 */
26241     sp_384_div2_12(t2, t2, p384_mod);
26242     /* Y = Y * X */
26243     sp_384_mont_mul_12(y, y, p->x, p384_mod, p384_mp_mod);
26244     /* X = T1 * T1 */
26245     sp_384_mont_sqr_12(x, t1, p384_mod, p384_mp_mod);
26246     /* X = X - Y */
26247     sp_384_mont_sub_12(x, x, y, p384_mod);
26248     /* X = X - Y */
26249     sp_384_mont_sub_12(x, x, y, p384_mod);
26250     /* Y = Y - X */
26251     sp_384_mont_sub_12(y, y, x, p384_mod);
26252     /* Y = Y * T1 */
26253     sp_384_mont_mul_12(y, y, t1, p384_mod, p384_mp_mod);
26254     /* Y = Y - T2 */
26255     sp_384_mont_sub_12(y, y, t2, p384_mod);
26256 }
26257 
26258 /* Compare two numbers to determine if they are equal.
26259  * Constant time implementation.
26260  *
26261  * a  First number to compare.
26262  * b  Second number to compare.
26263  * returns 1 when equal and 0 otherwise.
26264  */
sp_384_cmp_equal_12(const sp_digit * a,const sp_digit * b)26265 static int sp_384_cmp_equal_12(const sp_digit* a, const sp_digit* b)
26266 {
26267     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) |
26268             (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5]) |
26269             (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8]) |
26270             (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11])) == 0;
26271 }
26272 
26273 /* Add two Montgomery form projective points.
26274  *
26275  * r  Result of addition.
26276  * p  First point to add.
26277  * q  Second point to add.
26278  * t  Temporary ordinate data.
26279  */
26280 
26281 #ifdef WOLFSSL_SP_NONBLOCK
26282 typedef struct sp_384_proj_point_add_12_ctx {
26283     int state;
26284     sp_384_proj_point_dbl_12_ctx dbl_ctx;
26285     const sp_point_384* ap[2];
26286     sp_point_384* rp[2];
26287     sp_digit* t1;
26288     sp_digit* t2;
26289     sp_digit* t3;
26290     sp_digit* t4;
26291     sp_digit* t5;
26292     sp_digit* x;
26293     sp_digit* y;
26294     sp_digit* z;
26295 } sp_384_proj_point_add_12_ctx;
26296 
sp_384_proj_point_add_12_nb(sp_ecc_ctx_t * sp_ctx,sp_point_384 * r,const sp_point_384 * p,const sp_point_384 * q,sp_digit * t)26297 static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r,
26298     const sp_point_384* p, const sp_point_384* q, sp_digit* t)
26299 {
26300     int err = FP_WOULDBLOCK;
26301     sp_384_proj_point_add_12_ctx* ctx = (sp_384_proj_point_add_12_ctx*)sp_ctx->data;
26302 
26303     /* Ensure only the first point is the same as the result. */
26304     if (q == r) {
26305         const sp_point_384* a = p;
26306         p = q;
26307         q = a;
26308     }
26309 
26310     typedef char ctx_size_test[sizeof(sp_384_proj_point_add_12_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
26311     (void)sizeof(ctx_size_test);
26312 
26313     switch (ctx->state) {
26314     case 0: /* INIT */
26315         ctx->t1 = t;
26316         ctx->t2 = t + 2*12;
26317         ctx->t3 = t + 4*12;
26318         ctx->t4 = t + 6*12;
26319         ctx->t5 = t + 8*12;
26320 
26321         ctx->state = 1;
26322         break;
26323     case 1:
26324         /* Check double */
26325         (void)sp_384_sub_12(ctx->t1, p384_mod, q->y);
26326         sp_384_norm_12(ctx->t1);
26327         if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
26328             (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0)
26329         {
26330             XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
26331             ctx->state = 2;
26332         }
26333         else {
26334             ctx->state = 3;
26335         }
26336         break;
26337     case 2:
26338         err = sp_384_proj_point_dbl_12_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t);
26339         if (err == MP_OKAY)
26340             ctx->state = 27; /* done */
26341         break;
26342     case 3:
26343     {
26344         int i;
26345         ctx->rp[0] = r;
26346 
26347         /*lint allow cast to different type of pointer*/
26348         ctx->rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
26349         XMEMSET(ctx->rp[1], 0, sizeof(sp_point_384));
26350         ctx->x = ctx->rp[p->infinity | q->infinity]->x;
26351         ctx->y = ctx->rp[p->infinity | q->infinity]->y;
26352         ctx->z = ctx->rp[p->infinity | q->infinity]->z;
26353 
26354         ctx->ap[0] = p;
26355         ctx->ap[1] = q;
26356         for (i=0; i<12; i++) {
26357             r->x[i] = ctx->ap[p->infinity]->x[i];
26358         }
26359         for (i=0; i<12; i++) {
26360             r->y[i] = ctx->ap[p->infinity]->y[i];
26361         }
26362         for (i=0; i<12; i++) {
26363             r->z[i] = ctx->ap[p->infinity]->z[i];
26364         }
26365         r->infinity = ctx->ap[p->infinity]->infinity;
26366 
26367         ctx->state = 4;
26368         break;
26369     }
26370     case 4:
26371         /* U1 = X1*Z2^2 */
26372         sp_384_mont_sqr_12(ctx->t1, q->z, p384_mod, p384_mp_mod);
26373         ctx->state = 5;
26374         break;
26375     case 5:
26376         sp_384_mont_mul_12(ctx->t3, ctx->t1, q->z, p384_mod, p384_mp_mod);
26377         ctx->state = 6;
26378         break;
26379     case 6:
26380         sp_384_mont_mul_12(ctx->t1, ctx->t1, ctx->x, p384_mod, p384_mp_mod);
26381         ctx->state = 7;
26382         break;
26383     case 7:
26384         /* U2 = X2*Z1^2 */
26385         sp_384_mont_sqr_12(ctx->t2, ctx->z, p384_mod, p384_mp_mod);
26386         ctx->state = 8;
26387         break;
26388     case 8:
26389         sp_384_mont_mul_12(ctx->t4, ctx->t2, ctx->z, p384_mod, p384_mp_mod);
26390         ctx->state = 9;
26391         break;
26392     case 9:
26393         sp_384_mont_mul_12(ctx->t2, ctx->t2, q->x, p384_mod, p384_mp_mod);
26394         ctx->state = 10;
26395         break;
26396     case 10:
26397         /* S1 = Y1*Z2^3 */
26398         sp_384_mont_mul_12(ctx->t3, ctx->t3, ctx->y, p384_mod, p384_mp_mod);
26399         ctx->state = 11;
26400         break;
26401     case 11:
26402         /* S2 = Y2*Z1^3 */
26403         sp_384_mont_mul_12(ctx->t4, ctx->t4, q->y, p384_mod, p384_mp_mod);
26404         ctx->state = 12;
26405         break;
26406     case 12:
26407         /* H = U2 - U1 */
26408         sp_384_mont_sub_12(ctx->t2, ctx->t2, ctx->t1, p384_mod);
26409         ctx->state = 13;
26410         break;
26411     case 13:
26412         /* R = S2 - S1 */
26413         sp_384_mont_sub_12(ctx->t4, ctx->t4, ctx->t3, p384_mod);
26414         ctx->state = 14;
26415         break;
26416     case 14:
26417         /* Z3 = H*Z1*Z2 */
26418         sp_384_mont_mul_12(ctx->z, ctx->z, q->z, p384_mod, p384_mp_mod);
26419         ctx->state = 15;
26420         break;
26421     case 15:
26422         sp_384_mont_mul_12(ctx->z, ctx->z, ctx->t2, p384_mod, p384_mp_mod);
26423         ctx->state = 16;
26424         break;
26425     case 16:
26426         /* X3 = R^2 - H^3 - 2*U1*H^2 */
26427         sp_384_mont_sqr_12(ctx->x, ctx->t4, p384_mod, p384_mp_mod);
26428         ctx->state = 17;
26429         break;
26430     case 17:
26431         sp_384_mont_sqr_12(ctx->t5, ctx->t2, p384_mod, p384_mp_mod);
26432         ctx->state = 18;
26433         break;
26434     case 18:
26435         sp_384_mont_mul_12(ctx->y, ctx->t1, ctx->t5, p384_mod, p384_mp_mod);
26436         ctx->state = 19;
26437         break;
26438     case 19:
26439         sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t2, p384_mod, p384_mp_mod);
26440         ctx->state = 20;
26441         break;
26442     case 20:
26443         sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t5, p384_mod);
26444         ctx->state = 21;
26445         break;
26446     case 21:
26447         sp_384_mont_dbl_12(ctx->t1, ctx->y, p384_mod);
26448         ctx->state = 22;
26449         break;
26450     case 22:
26451         sp_384_mont_sub_12(ctx->x, ctx->x, ctx->t1, p384_mod);
26452         ctx->state = 23;
26453         break;
26454     case 23:
26455         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
26456         sp_384_mont_sub_12(ctx->y, ctx->y, ctx->x, p384_mod);
26457         ctx->state = 24;
26458         break;
26459     case 24:
26460         sp_384_mont_mul_12(ctx->y, ctx->y, ctx->t4, p384_mod, p384_mp_mod);
26461         ctx->state = 25;
26462         break;
26463     case 25:
26464         sp_384_mont_mul_12(ctx->t5, ctx->t5, ctx->t3, p384_mod, p384_mp_mod);
26465         ctx->state = 26;
26466         break;
26467     case 26:
26468         sp_384_mont_sub_12(ctx->y, ctx->y, ctx->t5, p384_mod);
26469         ctx->state = 27;
26470         /* fall-through */
26471     case 27:
26472         err = MP_OKAY;
26473         break;
26474     }
26475 
26476     if (err == MP_OKAY && ctx->state != 27) {
26477         err = FP_WOULDBLOCK;
26478     }
26479     return err;
26480 }
26481 #endif /* WOLFSSL_SP_NONBLOCK */
26482 
sp_384_proj_point_add_12(sp_point_384 * r,const sp_point_384 * p,const sp_point_384 * q,sp_digit * t)26483 static void sp_384_proj_point_add_12(sp_point_384* r,
26484         const sp_point_384* p, const sp_point_384* q, sp_digit* t)
26485 {
26486     const sp_point_384* ap[2];
26487     sp_point_384* rp[2];
26488     sp_digit* t1 = t;
26489     sp_digit* t2 = t + 2*12;
26490     sp_digit* t3 = t + 4*12;
26491     sp_digit* t4 = t + 6*12;
26492     sp_digit* t5 = t + 8*12;
26493     sp_digit* x;
26494     sp_digit* y;
26495     sp_digit* z;
26496     int i;
26497 
26498     /* Ensure only the first point is the same as the result. */
26499     if (q == r) {
26500         const sp_point_384* a = p;
26501         p = q;
26502         q = a;
26503     }
26504 
26505     /* Check double */
26506     (void)sp_384_sub_12(t1, p384_mod, q->y);
26507     sp_384_norm_12(t1);
26508     if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
26509         (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
26510         sp_384_proj_point_dbl_12(r, p, t);
26511     }
26512     else {
26513         rp[0] = r;
26514 
26515         /*lint allow cast to different type of pointer*/
26516         rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
26517         XMEMSET(rp[1], 0, sizeof(sp_point_384));
26518         x = rp[p->infinity | q->infinity]->x;
26519         y = rp[p->infinity | q->infinity]->y;
26520         z = rp[p->infinity | q->infinity]->z;
26521 
26522         ap[0] = p;
26523         ap[1] = q;
26524         for (i=0; i<12; i++) {
26525             r->x[i] = ap[p->infinity]->x[i];
26526         }
26527         for (i=0; i<12; i++) {
26528             r->y[i] = ap[p->infinity]->y[i];
26529         }
26530         for (i=0; i<12; i++) {
26531             r->z[i] = ap[p->infinity]->z[i];
26532         }
26533         r->infinity = ap[p->infinity]->infinity;
26534 
26535         /* U1 = X1*Z2^2 */
26536         sp_384_mont_sqr_12(t1, q->z, p384_mod, p384_mp_mod);
26537         sp_384_mont_mul_12(t3, t1, q->z, p384_mod, p384_mp_mod);
26538         sp_384_mont_mul_12(t1, t1, x, p384_mod, p384_mp_mod);
26539         /* U2 = X2*Z1^2 */
26540         sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
26541         sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
26542         sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
26543         /* S1 = Y1*Z2^3 */
26544         sp_384_mont_mul_12(t3, t3, y, p384_mod, p384_mp_mod);
26545         /* S2 = Y2*Z1^3 */
26546         sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
26547         /* H = U2 - U1 */
26548         sp_384_mont_sub_12(t2, t2, t1, p384_mod);
26549         /* R = S2 - S1 */
26550         sp_384_mont_sub_12(t4, t4, t3, p384_mod);
26551         /* Z3 = H*Z1*Z2 */
26552         sp_384_mont_mul_12(z, z, q->z, p384_mod, p384_mp_mod);
26553         sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
26554         /* X3 = R^2 - H^3 - 2*U1*H^2 */
26555         sp_384_mont_sqr_12(x, t4, p384_mod, p384_mp_mod);
26556         sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
26557         sp_384_mont_mul_12(y, t1, t5, p384_mod, p384_mp_mod);
26558         sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
26559         sp_384_mont_sub_12(x, x, t5, p384_mod);
26560         sp_384_mont_dbl_12(t1, y, p384_mod);
26561         sp_384_mont_sub_12(x, x, t1, p384_mod);
26562         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
26563         sp_384_mont_sub_12(y, y, x, p384_mod);
26564         sp_384_mont_mul_12(y, y, t4, p384_mod, p384_mp_mod);
26565         sp_384_mont_mul_12(t5, t5, t3, p384_mod, p384_mp_mod);
26566         sp_384_mont_sub_12(y, y, t5, p384_mod);
26567     }
26568 }
26569 
26570 #ifndef WC_NO_CACHE_RESISTANT
26571 /* Touch each possible point that could be being copied.
26572  *
26573  * r      Point to copy into.
26574  * table  Table - start of the entires to access
26575  * idx    Index of entry to retrieve.
26576  */
sp_384_get_point_16_12(sp_point_384 * r,const sp_point_384 * table,int idx)26577 static void sp_384_get_point_16_12(sp_point_384* r, const sp_point_384* table,
26578     int idx)
26579 {
26580     int i;
26581     sp_digit mask;
26582 
26583     r->x[0] = 0;
26584     r->x[1] = 0;
26585     r->x[2] = 0;
26586     r->x[3] = 0;
26587     r->x[4] = 0;
26588     r->x[5] = 0;
26589     r->x[6] = 0;
26590     r->x[7] = 0;
26591     r->x[8] = 0;
26592     r->x[9] = 0;
26593     r->x[10] = 0;
26594     r->x[11] = 0;
26595     r->y[0] = 0;
26596     r->y[1] = 0;
26597     r->y[2] = 0;
26598     r->y[3] = 0;
26599     r->y[4] = 0;
26600     r->y[5] = 0;
26601     r->y[6] = 0;
26602     r->y[7] = 0;
26603     r->y[8] = 0;
26604     r->y[9] = 0;
26605     r->y[10] = 0;
26606     r->y[11] = 0;
26607     r->z[0] = 0;
26608     r->z[1] = 0;
26609     r->z[2] = 0;
26610     r->z[3] = 0;
26611     r->z[4] = 0;
26612     r->z[5] = 0;
26613     r->z[6] = 0;
26614     r->z[7] = 0;
26615     r->z[8] = 0;
26616     r->z[9] = 0;
26617     r->z[10] = 0;
26618     r->z[11] = 0;
26619     for (i = 1; i < 16; i++) {
26620         mask = 0 - (i == idx);
26621         r->x[0] |= mask & table[i].x[0];
26622         r->x[1] |= mask & table[i].x[1];
26623         r->x[2] |= mask & table[i].x[2];
26624         r->x[3] |= mask & table[i].x[3];
26625         r->x[4] |= mask & table[i].x[4];
26626         r->x[5] |= mask & table[i].x[5];
26627         r->x[6] |= mask & table[i].x[6];
26628         r->x[7] |= mask & table[i].x[7];
26629         r->x[8] |= mask & table[i].x[8];
26630         r->x[9] |= mask & table[i].x[9];
26631         r->x[10] |= mask & table[i].x[10];
26632         r->x[11] |= mask & table[i].x[11];
26633         r->y[0] |= mask & table[i].y[0];
26634         r->y[1] |= mask & table[i].y[1];
26635         r->y[2] |= mask & table[i].y[2];
26636         r->y[3] |= mask & table[i].y[3];
26637         r->y[4] |= mask & table[i].y[4];
26638         r->y[5] |= mask & table[i].y[5];
26639         r->y[6] |= mask & table[i].y[6];
26640         r->y[7] |= mask & table[i].y[7];
26641         r->y[8] |= mask & table[i].y[8];
26642         r->y[9] |= mask & table[i].y[9];
26643         r->y[10] |= mask & table[i].y[10];
26644         r->y[11] |= mask & table[i].y[11];
26645         r->z[0] |= mask & table[i].z[0];
26646         r->z[1] |= mask & table[i].z[1];
26647         r->z[2] |= mask & table[i].z[2];
26648         r->z[3] |= mask & table[i].z[3];
26649         r->z[4] |= mask & table[i].z[4];
26650         r->z[5] |= mask & table[i].z[5];
26651         r->z[6] |= mask & table[i].z[6];
26652         r->z[7] |= mask & table[i].z[7];
26653         r->z[8] |= mask & table[i].z[8];
26654         r->z[9] |= mask & table[i].z[9];
26655         r->z[10] |= mask & table[i].z[10];
26656         r->z[11] |= mask & table[i].z[11];
26657     }
26658 }
26659 #endif /* !WC_NO_CACHE_RESISTANT */
26660 /* Multiply the point by the scalar and return the result.
26661  * If map is true then convert result to affine coordinates.
26662  *
26663  * Fast implementation that generates a pre-computation table.
26664  * 4 bits of window (no sliding!).
26665  * Uses add and double for calculating table.
26666  * 384 doubles.
26667  * 108 adds.
26668  *
26669  * r     Resulting point.
26670  * g     Point to multiply.
26671  * k     Scalar to multiply by.
26672  * map   Indicates whether to convert result to affine.
26673  * ct    Constant time required.
26674  * heap  Heap to use for allocation.
26675  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
26676  */
sp_384_ecc_mulmod_fast_12(sp_point_384 * r,const sp_point_384 * g,const sp_digit * k,int map,int ct,void * heap)26677 static int sp_384_ecc_mulmod_fast_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
26678         int map, int ct, void* heap)
26679 {
26680 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26681     sp_point_384* t = NULL;
26682     sp_digit* tmp = NULL;
26683 #else
26684     sp_point_384 t[16 + 1];
26685     sp_digit tmp[2 * 12 * 6];
26686 #endif
26687     sp_point_384* rt = NULL;
26688 #ifndef WC_NO_CACHE_RESISTANT
26689 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26690     sp_point_384* p = NULL;
26691 #else
26692     sp_point_384 p[1];
26693 #endif
26694 #endif /* !WC_NO_CACHE_RESISTANT */
26695     sp_digit n;
26696     int i;
26697     int c;
26698     int y;
26699     int err = MP_OKAY;
26700 
26701     /* Constant time used for cache attack resistance implementation. */
26702     (void)ct;
26703     (void)heap;
26704 
26705 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26706     t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * (16 + 1),
26707         heap, DYNAMIC_TYPE_ECC);
26708     if (t == NULL)
26709         err = MEMORY_E;
26710     #ifndef WC_NO_CACHE_RESISTANT
26711     if (err == MP_OKAY) {
26712         p = (sp_point_384*)XMALLOC(sizeof(sp_point_384),
26713             heap, DYNAMIC_TYPE_ECC);
26714         if (p == NULL)
26715             err = MEMORY_E;
26716     }
26717     #endif
26718     if (err == MP_OKAY) {
26719         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
26720                                 DYNAMIC_TYPE_ECC);
26721         if (tmp == NULL)
26722             err = MEMORY_E;
26723     }
26724 #endif
26725 
26726     if (err == MP_OKAY) {
26727         rt = t + 16;
26728 
26729         /* t[0] = {0, 0, 1} * norm */
26730         XMEMSET(&t[0], 0, sizeof(t[0]));
26731         t[0].infinity = 1;
26732         /* t[1] = {g->x, g->y, g->z} * norm */
26733         (void)sp_384_mod_mul_norm_12(t[1].x, g->x, p384_mod);
26734         (void)sp_384_mod_mul_norm_12(t[1].y, g->y, p384_mod);
26735         (void)sp_384_mod_mul_norm_12(t[1].z, g->z, p384_mod);
26736         t[1].infinity = 0;
26737         sp_384_proj_point_dbl_12(&t[ 2], &t[ 1], tmp);
26738         t[ 2].infinity = 0;
26739         sp_384_proj_point_add_12(&t[ 3], &t[ 2], &t[ 1], tmp);
26740         t[ 3].infinity = 0;
26741         sp_384_proj_point_dbl_12(&t[ 4], &t[ 2], tmp);
26742         t[ 4].infinity = 0;
26743         sp_384_proj_point_add_12(&t[ 5], &t[ 3], &t[ 2], tmp);
26744         t[ 5].infinity = 0;
26745         sp_384_proj_point_dbl_12(&t[ 6], &t[ 3], tmp);
26746         t[ 6].infinity = 0;
26747         sp_384_proj_point_add_12(&t[ 7], &t[ 4], &t[ 3], tmp);
26748         t[ 7].infinity = 0;
26749         sp_384_proj_point_dbl_12(&t[ 8], &t[ 4], tmp);
26750         t[ 8].infinity = 0;
26751         sp_384_proj_point_add_12(&t[ 9], &t[ 5], &t[ 4], tmp);
26752         t[ 9].infinity = 0;
26753         sp_384_proj_point_dbl_12(&t[10], &t[ 5], tmp);
26754         t[10].infinity = 0;
26755         sp_384_proj_point_add_12(&t[11], &t[ 6], &t[ 5], tmp);
26756         t[11].infinity = 0;
26757         sp_384_proj_point_dbl_12(&t[12], &t[ 6], tmp);
26758         t[12].infinity = 0;
26759         sp_384_proj_point_add_12(&t[13], &t[ 7], &t[ 6], tmp);
26760         t[13].infinity = 0;
26761         sp_384_proj_point_dbl_12(&t[14], &t[ 7], tmp);
26762         t[14].infinity = 0;
26763         sp_384_proj_point_add_12(&t[15], &t[ 8], &t[ 7], tmp);
26764         t[15].infinity = 0;
26765 
26766         i = 10;
26767         n = k[i+1] << 0;
26768         c = 28;
26769         y = (int)(n >> 28);
26770     #ifndef WC_NO_CACHE_RESISTANT
26771         if (ct) {
26772             sp_384_get_point_16_12(rt, t, y);
26773             rt->infinity = !y;
26774         }
26775         else
26776     #endif
26777         {
26778             XMEMCPY(rt, &t[y], sizeof(sp_point_384));
26779         }
26780         n <<= 4;
26781         for (; i>=0 || c>=4; ) {
26782             if (c < 4) {
26783                 n |= k[i--];
26784                 c += 32;
26785             }
26786             y = (n >> 28) & 0xf;
26787             n <<= 4;
26788             c -= 4;
26789 
26790             sp_384_proj_point_dbl_12(rt, rt, tmp);
26791             sp_384_proj_point_dbl_12(rt, rt, tmp);
26792             sp_384_proj_point_dbl_12(rt, rt, tmp);
26793             sp_384_proj_point_dbl_12(rt, rt, tmp);
26794 
26795     #ifndef WC_NO_CACHE_RESISTANT
26796             if (ct) {
26797                 sp_384_get_point_16_12(p, t, y);
26798                 p->infinity = !y;
26799                 sp_384_proj_point_add_12(rt, rt, p, tmp);
26800             }
26801             else
26802     #endif
26803             {
26804                 sp_384_proj_point_add_12(rt, rt, &t[y], tmp);
26805             }
26806         }
26807 
26808         if (map != 0) {
26809             sp_384_map_12(r, rt, tmp);
26810         }
26811         else {
26812             XMEMCPY(r, rt, sizeof(sp_point_384));
26813         }
26814     }
26815 
26816 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26817     if (tmp != NULL)
26818 #endif
26819     {
26820         ForceZero(tmp, sizeof(sp_digit) * 2 * 12 * 6);
26821     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26822         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
26823     #endif
26824     }
26825 #ifndef WC_NO_CACHE_RESISTANT
26826     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26827     if (p != NULL)
26828     #endif
26829         {
26830             ForceZero(p, sizeof(sp_point_384));
26831         #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26832             XFREE(p, heap, DYNAMIC_TYPE_ECC);
26833         #endif
26834         }
26835 #endif /* !WC_NO_CACHE_RESISTANT */
26836 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26837     if (t != NULL)
26838 #endif
26839     {
26840         ForceZero(t, sizeof(sp_point_384) * 17);
26841     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
26842         XFREE(t, heap, DYNAMIC_TYPE_ECC);
26843     #endif
26844     }
26845 
26846     return err;
26847 }
26848 
26849 #ifdef FP_ECC
26850 /* Double the Montgomery form projective point p a number of times.
26851  *
26852  * r  Result of repeated doubling of point.
26853  * p  Point to double.
26854  * n  Number of times to double
26855  * t  Temporary ordinate data.
26856  */
sp_384_proj_point_dbl_n_12(sp_point_384 * p,int n,sp_digit * t)26857 static void sp_384_proj_point_dbl_n_12(sp_point_384* p, int n,
26858     sp_digit* t)
26859 {
26860     sp_digit* w = t;
26861     sp_digit* a = t + 2*12;
26862     sp_digit* b = t + 4*12;
26863     sp_digit* t1 = t + 6*12;
26864     sp_digit* t2 = t + 8*12;
26865     sp_digit* x;
26866     sp_digit* y;
26867     sp_digit* z;
26868 
26869     x = p->x;
26870     y = p->y;
26871     z = p->z;
26872 
26873     /* Y = 2*Y */
26874     sp_384_mont_dbl_12(y, y, p384_mod);
26875     /* W = Z^4 */
26876     sp_384_mont_sqr_12(w, z, p384_mod, p384_mp_mod);
26877     sp_384_mont_sqr_12(w, w, p384_mod, p384_mp_mod);
26878 
26879 #ifndef WOLFSSL_SP_SMALL
26880     while (--n > 0)
26881 #else
26882     while (--n >= 0)
26883 #endif
26884     {
26885         /* A = 3*(X^2 - W) */
26886         sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
26887         sp_384_mont_sub_12(t1, t1, w, p384_mod);
26888         sp_384_mont_tpl_12(a, t1, p384_mod);
26889         /* B = X*Y^2 */
26890         sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
26891         sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
26892         /* X = A^2 - 2B */
26893         sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
26894         sp_384_mont_dbl_12(t2, b, p384_mod);
26895         sp_384_mont_sub_12(x, x, t2, p384_mod);
26896         /* Z = Z*Y */
26897         sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
26898         /* t2 = Y^4 */
26899         sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
26900 #ifdef WOLFSSL_SP_SMALL
26901         if (n != 0)
26902 #endif
26903         {
26904             /* W = W*Y^4 */
26905             sp_384_mont_mul_12(w, w, t1, p384_mod, p384_mp_mod);
26906         }
26907         /* y = 2*A*(B - X) - Y^4 */
26908         sp_384_mont_sub_12(y, b, x, p384_mod);
26909         sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
26910         sp_384_mont_dbl_12(y, y, p384_mod);
26911         sp_384_mont_sub_12(y, y, t1, p384_mod);
26912     }
26913 #ifndef WOLFSSL_SP_SMALL
26914     /* A = 3*(X^2 - W) */
26915     sp_384_mont_sqr_12(t1, x, p384_mod, p384_mp_mod);
26916     sp_384_mont_sub_12(t1, t1, w, p384_mod);
26917     sp_384_mont_tpl_12(a, t1, p384_mod);
26918     /* B = X*Y^2 */
26919     sp_384_mont_sqr_12(t1, y, p384_mod, p384_mp_mod);
26920     sp_384_mont_mul_12(b, t1, x, p384_mod, p384_mp_mod);
26921     /* X = A^2 - 2B */
26922     sp_384_mont_sqr_12(x, a, p384_mod, p384_mp_mod);
26923     sp_384_mont_dbl_12(t2, b, p384_mod);
26924     sp_384_mont_sub_12(x, x, t2, p384_mod);
26925     /* Z = Z*Y */
26926     sp_384_mont_mul_12(z, z, y, p384_mod, p384_mp_mod);
26927     /* t2 = Y^4 */
26928     sp_384_mont_sqr_12(t1, t1, p384_mod, p384_mp_mod);
26929     /* y = 2*A*(B - X) - Y^4 */
26930     sp_384_mont_sub_12(y, b, x, p384_mod);
26931     sp_384_mont_mul_12(y, y, a, p384_mod, p384_mp_mod);
26932     sp_384_mont_dbl_12(y, y, p384_mod);
26933     sp_384_mont_sub_12(y, y, t1, p384_mod);
26934 #endif
26935     /* Y = Y/2 */
26936     sp_384_div2_12(y, y, p384_mod);
26937 }
26938 
26939 /* Convert the projective point to affine.
26940  * Ordinates are in Montgomery form.
26941  *
26942  * a  Point to convert.
26943  * t  Temporary data.
26944  */
sp_384_proj_to_affine_12(sp_point_384 * a,sp_digit * t)26945 static void sp_384_proj_to_affine_12(sp_point_384* a, sp_digit* t)
26946 {
26947     sp_digit* t1 = t;
26948     sp_digit* t2 = t + 2 * 12;
26949     sp_digit* tmp = t + 4 * 12;
26950 
26951     sp_384_mont_inv_12(t1, a->z, tmp);
26952 
26953     sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
26954     sp_384_mont_mul_12(t1, t2, t1, p384_mod, p384_mp_mod);
26955 
26956     sp_384_mont_mul_12(a->x, a->x, t2, p384_mod, p384_mp_mod);
26957     sp_384_mont_mul_12(a->y, a->y, t1, p384_mod, p384_mp_mod);
26958     XMEMCPY(a->z, p384_norm_mod, sizeof(p384_norm_mod));
26959 }
26960 
26961 #endif /* FP_ECC */
26962 /* A table entry for pre-computed points. */
26963 typedef struct sp_table_entry_384 {
26964     sp_digit x[12];
26965     sp_digit y[12];
26966 } sp_table_entry_384;
26967 
26968 #ifdef FP_ECC
26969 #endif /* FP_ECC */
26970 /* Add two Montgomery form projective points. The second point has a q value of
26971  * one.
26972  * Only the first point can be the same pointer as the result point.
26973  *
26974  * r  Result of addition.
26975  * p  First point to add.
26976  * q  Second point to add.
26977  * t  Temporary ordinate data.
26978  */
sp_384_proj_point_add_qz1_12(sp_point_384 * r,const sp_point_384 * p,const sp_point_384 * q,sp_digit * t)26979 static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p,
26980         const sp_point_384* q, sp_digit* t)
26981 {
26982     const sp_point_384* ap[2];
26983     sp_point_384* rp[2];
26984     sp_digit* t1 = t;
26985     sp_digit* t2 = t + 2*12;
26986     sp_digit* t3 = t + 4*12;
26987     sp_digit* t4 = t + 6*12;
26988     sp_digit* t5 = t + 8*12;
26989     sp_digit* x;
26990     sp_digit* y;
26991     sp_digit* z;
26992     int i;
26993 
26994     /* Check double */
26995     (void)sp_384_sub_12(t1, p384_mod, q->y);
26996     sp_384_norm_12(t1);
26997     if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) &
26998         (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) {
26999         sp_384_proj_point_dbl_12(r, p, t);
27000     }
27001     else {
27002         rp[0] = r;
27003 
27004         /*lint allow cast to different type of pointer*/
27005         rp[1] = (sp_point_384*)t; /*lint !e9087 !e740*/
27006         XMEMSET(rp[1], 0, sizeof(sp_point_384));
27007         x = rp[p->infinity | q->infinity]->x;
27008         y = rp[p->infinity | q->infinity]->y;
27009         z = rp[p->infinity | q->infinity]->z;
27010 
27011         ap[0] = p;
27012         ap[1] = q;
27013         for (i=0; i<12; i++) {
27014             r->x[i] = ap[p->infinity]->x[i];
27015         }
27016         for (i=0; i<12; i++) {
27017             r->y[i] = ap[p->infinity]->y[i];
27018         }
27019         for (i=0; i<12; i++) {
27020             r->z[i] = ap[p->infinity]->z[i];
27021         }
27022         r->infinity = ap[p->infinity]->infinity;
27023 
27024         /* U2 = X2*Z1^2 */
27025         sp_384_mont_sqr_12(t2, z, p384_mod, p384_mp_mod);
27026         sp_384_mont_mul_12(t4, t2, z, p384_mod, p384_mp_mod);
27027         sp_384_mont_mul_12(t2, t2, q->x, p384_mod, p384_mp_mod);
27028         /* S2 = Y2*Z1^3 */
27029         sp_384_mont_mul_12(t4, t4, q->y, p384_mod, p384_mp_mod);
27030         /* H = U2 - X1 */
27031         sp_384_mont_sub_12(t2, t2, x, p384_mod);
27032         /* R = S2 - Y1 */
27033         sp_384_mont_sub_12(t4, t4, y, p384_mod);
27034         /* Z3 = H*Z1 */
27035         sp_384_mont_mul_12(z, z, t2, p384_mod, p384_mp_mod);
27036         /* X3 = R^2 - H^3 - 2*X1*H^2 */
27037         sp_384_mont_sqr_12(t1, t4, p384_mod, p384_mp_mod);
27038         sp_384_mont_sqr_12(t5, t2, p384_mod, p384_mp_mod);
27039         sp_384_mont_mul_12(t3, x, t5, p384_mod, p384_mp_mod);
27040         sp_384_mont_mul_12(t5, t5, t2, p384_mod, p384_mp_mod);
27041         sp_384_mont_sub_12(x, t1, t5, p384_mod);
27042         sp_384_mont_dbl_12(t1, t3, p384_mod);
27043         sp_384_mont_sub_12(x, x, t1, p384_mod);
27044         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
27045         sp_384_mont_sub_12(t3, t3, x, p384_mod);
27046         sp_384_mont_mul_12(t3, t3, t4, p384_mod, p384_mp_mod);
27047         sp_384_mont_mul_12(t5, t5, y, p384_mod, p384_mp_mod);
27048         sp_384_mont_sub_12(y, t3, t5, p384_mod);
27049     }
27050 }
27051 
27052 #ifdef WOLFSSL_SP_SMALL
27053 #ifdef FP_ECC
27054 /* Generate the pre-computed table of points for the base point.
27055  *
27056  * width = 4
27057  * 16 entries
27058  * 96 bits between
27059  *
27060  * a      The base point.
27061  * table  Place to store generated point data.
27062  * tmp    Temporary data.
27063  * heap  Heap to use for allocation.
27064  */
sp_384_gen_stripe_table_12(const sp_point_384 * a,sp_table_entry_384 * table,sp_digit * tmp,void * heap)27065 static int sp_384_gen_stripe_table_12(const sp_point_384* a,
27066         sp_table_entry_384* table, sp_digit* tmp, void* heap)
27067 {
27068 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27069     sp_point_384* t = NULL;
27070 #else
27071     sp_point_384 t[3];
27072 #endif
27073     sp_point_384* s1 = NULL;
27074     sp_point_384* s2 = NULL;
27075     int i;
27076     int j;
27077     int err = MP_OKAY;
27078 
27079     (void)heap;
27080 
27081 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27082     t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap,
27083                                      DYNAMIC_TYPE_ECC);
27084     if (t == NULL)
27085         err = MEMORY_E;
27086 #endif
27087 
27088     if (err == MP_OKAY) {
27089         s1 = t + 1;
27090         s2 = t + 2;
27091 
27092         err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
27093     }
27094     if (err == MP_OKAY) {
27095         err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
27096     }
27097     if (err == MP_OKAY) {
27098         err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
27099     }
27100     if (err == MP_OKAY) {
27101         t->infinity = 0;
27102         sp_384_proj_to_affine_12(t, tmp);
27103 
27104         XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
27105         s1->infinity = 0;
27106         XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
27107         s2->infinity = 0;
27108 
27109         /* table[0] = {0, 0, infinity} */
27110         XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
27111         /* table[1] = Affine version of 'a' in Montgomery form */
27112         XMEMCPY(table[1].x, t->x, sizeof(table->x));
27113         XMEMCPY(table[1].y, t->y, sizeof(table->y));
27114 
27115         for (i=1; i<4; i++) {
27116             sp_384_proj_point_dbl_n_12(t, 96, tmp);
27117             sp_384_proj_to_affine_12(t, tmp);
27118             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
27119             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
27120         }
27121 
27122         for (i=1; i<4; i++) {
27123             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
27124             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
27125             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
27126                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
27127                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
27128                 sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
27129                 sp_384_proj_to_affine_12(t, tmp);
27130                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
27131                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
27132             }
27133         }
27134     }
27135 
27136 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27137     if (t != NULL)
27138         XFREE(t, heap, DYNAMIC_TYPE_ECC);
27139 #endif
27140 
27141     return err;
27142 }
27143 
27144 #endif /* FP_ECC */
27145 #ifndef WC_NO_CACHE_RESISTANT
27146 /* Touch each possible entry that could be being copied.
27147  *
27148  * r      Point to copy into.
27149  * table  Table - start of the entires to access
27150  * idx    Index of entry to retrieve.
27151  */
sp_384_get_entry_16_12(sp_point_384 * r,const sp_table_entry_384 * table,int idx)27152 static void sp_384_get_entry_16_12(sp_point_384* r,
27153     const sp_table_entry_384* table, int idx)
27154 {
27155     int i;
27156     sp_digit mask;
27157 
27158     r->x[0] = 0;
27159     r->x[1] = 0;
27160     r->x[2] = 0;
27161     r->x[3] = 0;
27162     r->x[4] = 0;
27163     r->x[5] = 0;
27164     r->x[6] = 0;
27165     r->x[7] = 0;
27166     r->x[8] = 0;
27167     r->x[9] = 0;
27168     r->x[10] = 0;
27169     r->x[11] = 0;
27170     r->y[0] = 0;
27171     r->y[1] = 0;
27172     r->y[2] = 0;
27173     r->y[3] = 0;
27174     r->y[4] = 0;
27175     r->y[5] = 0;
27176     r->y[6] = 0;
27177     r->y[7] = 0;
27178     r->y[8] = 0;
27179     r->y[9] = 0;
27180     r->y[10] = 0;
27181     r->y[11] = 0;
27182     for (i = 1; i < 16; i++) {
27183         mask = 0 - (i == idx);
27184         r->x[0] |= mask & table[i].x[0];
27185         r->x[1] |= mask & table[i].x[1];
27186         r->x[2] |= mask & table[i].x[2];
27187         r->x[3] |= mask & table[i].x[3];
27188         r->x[4] |= mask & table[i].x[4];
27189         r->x[5] |= mask & table[i].x[5];
27190         r->x[6] |= mask & table[i].x[6];
27191         r->x[7] |= mask & table[i].x[7];
27192         r->x[8] |= mask & table[i].x[8];
27193         r->x[9] |= mask & table[i].x[9];
27194         r->x[10] |= mask & table[i].x[10];
27195         r->x[11] |= mask & table[i].x[11];
27196         r->y[0] |= mask & table[i].y[0];
27197         r->y[1] |= mask & table[i].y[1];
27198         r->y[2] |= mask & table[i].y[2];
27199         r->y[3] |= mask & table[i].y[3];
27200         r->y[4] |= mask & table[i].y[4];
27201         r->y[5] |= mask & table[i].y[5];
27202         r->y[6] |= mask & table[i].y[6];
27203         r->y[7] |= mask & table[i].y[7];
27204         r->y[8] |= mask & table[i].y[8];
27205         r->y[9] |= mask & table[i].y[9];
27206         r->y[10] |= mask & table[i].y[10];
27207         r->y[11] |= mask & table[i].y[11];
27208     }
27209 }
27210 #endif /* !WC_NO_CACHE_RESISTANT */
27211 /* Multiply the point by the scalar and return the result.
27212  * If map is true then convert result to affine coordinates.
27213  *
27214  * Stripe implementation.
27215  * Pre-generated: 2^0, 2^96, ...
27216  * Pre-generated: products of all combinations of above.
27217  * 4 doubles and adds (with qz=1)
27218  *
27219  * r      Resulting point.
27220  * k      Scalar to multiply by.
27221  * table  Pre-computed table.
27222  * map    Indicates whether to convert result to affine.
27223  * ct     Constant time required.
27224  * heap   Heap to use for allocation.
27225  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
27226  */
sp_384_ecc_mulmod_stripe_12(sp_point_384 * r,const sp_point_384 * g,const sp_table_entry_384 * table,const sp_digit * k,int map,int ct,void * heap)27227 static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
27228         const sp_table_entry_384* table, const sp_digit* k, int map,
27229         int ct, void* heap)
27230 {
27231 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27232     sp_point_384* rt = NULL;
27233     sp_digit* t = NULL;
27234 #else
27235     sp_point_384 rt[2];
27236     sp_digit t[2 * 12 * 6];
27237 #endif
27238     sp_point_384* p = NULL;
27239     int i;
27240     int j;
27241     int y;
27242     int x;
27243     int err = MP_OKAY;
27244 
27245     (void)g;
27246     /* Constant time used for cache attack resistance implementation. */
27247     (void)ct;
27248     (void)heap;
27249 
27250 
27251 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27252     rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap,
27253                                       DYNAMIC_TYPE_ECC);
27254     if (rt == NULL)
27255         err = MEMORY_E;
27256     if (err == MP_OKAY) {
27257         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
27258                                DYNAMIC_TYPE_ECC);
27259         if (t == NULL)
27260             err = MEMORY_E;
27261     }
27262 #endif
27263 
27264     if (err == MP_OKAY) {
27265         p = rt + 1;
27266 
27267         XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
27268         XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
27269 
27270         y = 0;
27271         x = 95;
27272         for (j=0; j<4; j++) {
27273             y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
27274             x += 96;
27275         }
27276     #ifndef WC_NO_CACHE_RESISTANT
27277         if (ct) {
27278             sp_384_get_entry_16_12(rt, table, y);
27279         } else
27280     #endif
27281         {
27282             XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
27283             XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
27284         }
27285         rt->infinity = !y;
27286         for (i=94; i>=0; i--) {
27287             y = 0;
27288             x = i;
27289             for (j=0; j<4; j++) {
27290                 y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
27291                 x += 96;
27292             }
27293 
27294             sp_384_proj_point_dbl_12(rt, rt, t);
27295         #ifndef WC_NO_CACHE_RESISTANT
27296             if (ct) {
27297                 sp_384_get_entry_16_12(p, table, y);
27298             }
27299             else
27300         #endif
27301             {
27302                 XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
27303                 XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
27304             }
27305             p->infinity = !y;
27306             sp_384_proj_point_add_qz1_12(rt, rt, p, t);
27307         }
27308 
27309         if (map != 0) {
27310             sp_384_map_12(r, rt, t);
27311         }
27312         else {
27313             XMEMCPY(r, rt, sizeof(sp_point_384));
27314         }
27315     }
27316 
27317 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27318     if (t != NULL)
27319         XFREE(t, heap, DYNAMIC_TYPE_ECC);
27320     if (rt != NULL)
27321         XFREE(rt, heap, DYNAMIC_TYPE_ECC);
27322 #endif
27323 
27324     return err;
27325 }
27326 
27327 #ifdef FP_ECC
27328 #ifndef FP_ENTRIES
27329     #define FP_ENTRIES 16
27330 #endif
27331 
27332 /* Cache entry - holds precomputation tables for a point. */
27333 typedef struct sp_cache_384_t {
27334     /* X ordinate of point that table was generated from. */
27335     sp_digit x[12];
27336     /* Y ordinate of point that table was generated from. */
27337     sp_digit y[12];
27338     /* Precomputation table for point. */
27339     sp_table_entry_384 table[16];
27340     /* Count of entries in table. */
27341     uint32_t cnt;
27342     /* Point and table set in entry. */
27343     int set;
27344 } sp_cache_384_t;
27345 
27346 /* Cache of tables. */
27347 static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
27348 /* Index of last entry in cache. */
27349 static THREAD_LS_T int sp_cache_384_last = -1;
27350 /* Cache has been initialized. */
27351 static THREAD_LS_T int sp_cache_384_inited = 0;
27352 
27353 #ifndef HAVE_THREAD_LS
27354     static volatile int initCacheMutex_384 = 0;
27355     static wolfSSL_Mutex sp_cache_384_lock;
27356 #endif
27357 
27358 /* Get the cache entry for the point.
27359  *
27360  * g      [in]   Point scalar multipling.
27361  * cache  [out]  Cache table to use.
27362  */
sp_ecc_get_cache_384(const sp_point_384 * g,sp_cache_384_t ** cache)27363 static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
27364 {
27365     int i;
27366     int j;
27367     uint32_t least;
27368 
27369     if (sp_cache_384_inited == 0) {
27370         for (i=0; i<FP_ENTRIES; i++) {
27371             sp_cache_384[i].set = 0;
27372         }
27373         sp_cache_384_inited = 1;
27374     }
27375 
27376     /* Compare point with those in cache. */
27377     for (i=0; i<FP_ENTRIES; i++) {
27378         if (!sp_cache_384[i].set)
27379             continue;
27380 
27381         if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
27382                            sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
27383             sp_cache_384[i].cnt++;
27384             break;
27385         }
27386     }
27387 
27388     /* No match. */
27389     if (i == FP_ENTRIES) {
27390         /* Find empty entry. */
27391         i = (sp_cache_384_last + 1) % FP_ENTRIES;
27392         for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
27393             if (!sp_cache_384[i].set) {
27394                 break;
27395             }
27396         }
27397 
27398         /* Evict least used. */
27399         if (i == sp_cache_384_last) {
27400             least = sp_cache_384[0].cnt;
27401             for (j=1; j<FP_ENTRIES; j++) {
27402                 if (sp_cache_384[j].cnt < least) {
27403                     i = j;
27404                     least = sp_cache_384[i].cnt;
27405                 }
27406             }
27407         }
27408 
27409         XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
27410         XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
27411         sp_cache_384[i].set = 1;
27412         sp_cache_384[i].cnt = 1;
27413     }
27414 
27415     *cache = &sp_cache_384[i];
27416     sp_cache_384_last = i;
27417 }
27418 #endif /* FP_ECC */
27419 
27420 /* Multiply the base point of P384 by the scalar and return the result.
27421  * If map is true then convert result to affine coordinates.
27422  *
27423  * r     Resulting point.
27424  * g     Point to multiply.
27425  * k     Scalar to multiply by.
27426  * map   Indicates whether to convert result to affine.
27427  * ct    Constant time required.
27428  * heap  Heap to use for allocation.
27429  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
27430  */
sp_384_ecc_mulmod_12(sp_point_384 * r,const sp_point_384 * g,const sp_digit * k,int map,int ct,void * heap)27431 static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
27432         int map, int ct, void* heap)
27433 {
27434 #ifndef FP_ECC
27435     return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap);
27436 #else
27437     sp_digit tmp[2 * 12 * 7];
27438     sp_cache_384_t* cache;
27439     int err = MP_OKAY;
27440 
27441 #ifndef HAVE_THREAD_LS
27442     if (initCacheMutex_384 == 0) {
27443          wc_InitMutex(&sp_cache_384_lock);
27444          initCacheMutex_384 = 1;
27445     }
27446     if (wc_LockMutex(&sp_cache_384_lock) != 0)
27447        err = BAD_MUTEX_E;
27448 #endif /* HAVE_THREAD_LS */
27449 
27450     if (err == MP_OKAY) {
27451         sp_ecc_get_cache_384(g, &cache);
27452         if (cache->cnt == 2)
27453             sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
27454 
27455 #ifndef HAVE_THREAD_LS
27456         wc_UnLockMutex(&sp_cache_384_lock);
27457 #endif /* HAVE_THREAD_LS */
27458 
27459         if (cache->cnt < 2) {
27460             err = sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap);
27461         }
27462         else {
27463             err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
27464                     map, ct, heap);
27465         }
27466     }
27467 
27468     return err;
27469 #endif
27470 }
27471 
27472 #else
27473 #ifdef FP_ECC
27474 /* Generate the pre-computed table of points for the base point.
27475  *
27476  * width = 8
27477  * 256 entries
27478  * 48 bits between
27479  *
27480  * a      The base point.
27481  * table  Place to store generated point data.
27482  * tmp    Temporary data.
27483  * heap  Heap to use for allocation.
27484  */
sp_384_gen_stripe_table_12(const sp_point_384 * a,sp_table_entry_384 * table,sp_digit * tmp,void * heap)27485 static int sp_384_gen_stripe_table_12(const sp_point_384* a,
27486         sp_table_entry_384* table, sp_digit* tmp, void* heap)
27487 {
27488 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27489     sp_point_384* t = NULL;
27490 #else
27491     sp_point_384 t[3];
27492 #endif
27493     sp_point_384* s1 = NULL;
27494     sp_point_384* s2 = NULL;
27495     int i;
27496     int j;
27497     int err = MP_OKAY;
27498 
27499     (void)heap;
27500 
27501 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27502     t = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 3, heap,
27503                                      DYNAMIC_TYPE_ECC);
27504     if (t == NULL)
27505         err = MEMORY_E;
27506 #endif
27507 
27508     if (err == MP_OKAY) {
27509         s1 = t + 1;
27510         s2 = t + 2;
27511 
27512         err = sp_384_mod_mul_norm_12(t->x, a->x, p384_mod);
27513     }
27514     if (err == MP_OKAY) {
27515         err = sp_384_mod_mul_norm_12(t->y, a->y, p384_mod);
27516     }
27517     if (err == MP_OKAY) {
27518         err = sp_384_mod_mul_norm_12(t->z, a->z, p384_mod);
27519     }
27520     if (err == MP_OKAY) {
27521         t->infinity = 0;
27522         sp_384_proj_to_affine_12(t, tmp);
27523 
27524         XMEMCPY(s1->z, p384_norm_mod, sizeof(p384_norm_mod));
27525         s1->infinity = 0;
27526         XMEMCPY(s2->z, p384_norm_mod, sizeof(p384_norm_mod));
27527         s2->infinity = 0;
27528 
27529         /* table[0] = {0, 0, infinity} */
27530         XMEMSET(&table[0], 0, sizeof(sp_table_entry_384));
27531         /* table[1] = Affine version of 'a' in Montgomery form */
27532         XMEMCPY(table[1].x, t->x, sizeof(table->x));
27533         XMEMCPY(table[1].y, t->y, sizeof(table->y));
27534 
27535         for (i=1; i<8; i++) {
27536             sp_384_proj_point_dbl_n_12(t, 48, tmp);
27537             sp_384_proj_to_affine_12(t, tmp);
27538             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
27539             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
27540         }
27541 
27542         for (i=1; i<8; i++) {
27543             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
27544             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
27545             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
27546                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
27547                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
27548                 sp_384_proj_point_add_qz1_12(t, s1, s2, tmp);
27549                 sp_384_proj_to_affine_12(t, tmp);
27550                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
27551                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
27552             }
27553         }
27554     }
27555 
27556 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27557     if (t != NULL)
27558         XFREE(t, heap, DYNAMIC_TYPE_ECC);
27559 #endif
27560 
27561     return err;
27562 }
27563 
27564 #endif /* FP_ECC */
27565 #ifndef WC_NO_CACHE_RESISTANT
27566 /* Touch each possible entry that could be being copied.
27567  *
27568  * r      Point to copy into.
27569  * table  Table - start of the entires to access
27570  * idx    Index of entry to retrieve.
27571  */
sp_384_get_entry_256_12(sp_point_384 * r,const sp_table_entry_384 * table,int idx)27572 static void sp_384_get_entry_256_12(sp_point_384* r,
27573     const sp_table_entry_384* table, int idx)
27574 {
27575     int i;
27576     sp_digit mask;
27577 
27578     r->x[0] = 0;
27579     r->x[1] = 0;
27580     r->x[2] = 0;
27581     r->x[3] = 0;
27582     r->x[4] = 0;
27583     r->x[5] = 0;
27584     r->x[6] = 0;
27585     r->x[7] = 0;
27586     r->x[8] = 0;
27587     r->x[9] = 0;
27588     r->x[10] = 0;
27589     r->x[11] = 0;
27590     r->y[0] = 0;
27591     r->y[1] = 0;
27592     r->y[2] = 0;
27593     r->y[3] = 0;
27594     r->y[4] = 0;
27595     r->y[5] = 0;
27596     r->y[6] = 0;
27597     r->y[7] = 0;
27598     r->y[8] = 0;
27599     r->y[9] = 0;
27600     r->y[10] = 0;
27601     r->y[11] = 0;
27602     for (i = 1; i < 256; i++) {
27603         mask = 0 - (i == idx);
27604         r->x[0] |= mask & table[i].x[0];
27605         r->x[1] |= mask & table[i].x[1];
27606         r->x[2] |= mask & table[i].x[2];
27607         r->x[3] |= mask & table[i].x[3];
27608         r->x[4] |= mask & table[i].x[4];
27609         r->x[5] |= mask & table[i].x[5];
27610         r->x[6] |= mask & table[i].x[6];
27611         r->x[7] |= mask & table[i].x[7];
27612         r->x[8] |= mask & table[i].x[8];
27613         r->x[9] |= mask & table[i].x[9];
27614         r->x[10] |= mask & table[i].x[10];
27615         r->x[11] |= mask & table[i].x[11];
27616         r->y[0] |= mask & table[i].y[0];
27617         r->y[1] |= mask & table[i].y[1];
27618         r->y[2] |= mask & table[i].y[2];
27619         r->y[3] |= mask & table[i].y[3];
27620         r->y[4] |= mask & table[i].y[4];
27621         r->y[5] |= mask & table[i].y[5];
27622         r->y[6] |= mask & table[i].y[6];
27623         r->y[7] |= mask & table[i].y[7];
27624         r->y[8] |= mask & table[i].y[8];
27625         r->y[9] |= mask & table[i].y[9];
27626         r->y[10] |= mask & table[i].y[10];
27627         r->y[11] |= mask & table[i].y[11];
27628     }
27629 }
27630 #endif /* !WC_NO_CACHE_RESISTANT */
27631 /* Multiply the point by the scalar and return the result.
27632  * If map is true then convert result to affine coordinates.
27633  *
27634  * Stripe implementation.
27635  * Pre-generated: 2^0, 2^48, ...
27636  * Pre-generated: products of all combinations of above.
27637  * 8 doubles and adds (with qz=1)
27638  *
27639  * r      Resulting point.
27640  * k      Scalar to multiply by.
27641  * table  Pre-computed table.
27642  * map    Indicates whether to convert result to affine.
27643  * ct     Constant time required.
27644  * heap   Heap to use for allocation.
27645  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
27646  */
sp_384_ecc_mulmod_stripe_12(sp_point_384 * r,const sp_point_384 * g,const sp_table_entry_384 * table,const sp_digit * k,int map,int ct,void * heap)27647 static int sp_384_ecc_mulmod_stripe_12(sp_point_384* r, const sp_point_384* g,
27648         const sp_table_entry_384* table, const sp_digit* k, int map,
27649         int ct, void* heap)
27650 {
27651 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27652     sp_point_384* rt = NULL;
27653     sp_digit* t = NULL;
27654 #else
27655     sp_point_384 rt[2];
27656     sp_digit t[2 * 12 * 6];
27657 #endif
27658     sp_point_384* p = NULL;
27659     int i;
27660     int j;
27661     int y;
27662     int x;
27663     int err = MP_OKAY;
27664 
27665     (void)g;
27666     /* Constant time used for cache attack resistance implementation. */
27667     (void)ct;
27668     (void)heap;
27669 
27670 
27671 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27672     rt = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap,
27673                                       DYNAMIC_TYPE_ECC);
27674     if (rt == NULL)
27675         err = MEMORY_E;
27676     if (err == MP_OKAY) {
27677         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, heap,
27678                                DYNAMIC_TYPE_ECC);
27679         if (t == NULL)
27680             err = MEMORY_E;
27681     }
27682 #endif
27683 
27684     if (err == MP_OKAY) {
27685         p = rt + 1;
27686 
27687         XMEMCPY(p->z, p384_norm_mod, sizeof(p384_norm_mod));
27688         XMEMCPY(rt->z, p384_norm_mod, sizeof(p384_norm_mod));
27689 
27690         y = 0;
27691         x = 47;
27692         for (j=0; j<8; j++) {
27693             y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
27694             x += 48;
27695         }
27696     #ifndef WC_NO_CACHE_RESISTANT
27697         if (ct) {
27698             sp_384_get_entry_256_12(rt, table, y);
27699         } else
27700     #endif
27701         {
27702             XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
27703             XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
27704         }
27705         rt->infinity = !y;
27706         for (i=46; i>=0; i--) {
27707             y = 0;
27708             x = i;
27709             for (j=0; j<8; j++) {
27710                 y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
27711                 x += 48;
27712             }
27713 
27714             sp_384_proj_point_dbl_12(rt, rt, t);
27715         #ifndef WC_NO_CACHE_RESISTANT
27716             if (ct) {
27717                 sp_384_get_entry_256_12(p, table, y);
27718             }
27719             else
27720         #endif
27721             {
27722                 XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
27723                 XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
27724             }
27725             p->infinity = !y;
27726             sp_384_proj_point_add_qz1_12(rt, rt, p, t);
27727         }
27728 
27729         if (map != 0) {
27730             sp_384_map_12(r, rt, t);
27731         }
27732         else {
27733             XMEMCPY(r, rt, sizeof(sp_point_384));
27734         }
27735     }
27736 
27737 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27738     if (t != NULL)
27739         XFREE(t, heap, DYNAMIC_TYPE_ECC);
27740     if (rt != NULL)
27741         XFREE(rt, heap, DYNAMIC_TYPE_ECC);
27742 #endif
27743 
27744     return err;
27745 }
27746 
27747 #ifdef FP_ECC
27748 #ifndef FP_ENTRIES
27749     #define FP_ENTRIES 16
27750 #endif
27751 
27752 /* Cache entry - holds precomputation tables for a point. */
27753 typedef struct sp_cache_384_t {
27754     /* X ordinate of point that table was generated from. */
27755     sp_digit x[12];
27756     /* Y ordinate of point that table was generated from. */
27757     sp_digit y[12];
27758     /* Precomputation table for point. */
27759     sp_table_entry_384 table[256];
27760     /* Count of entries in table. */
27761     uint32_t cnt;
27762     /* Point and table set in entry. */
27763     int set;
27764 } sp_cache_384_t;
27765 
27766 /* Cache of tables. */
27767 static THREAD_LS_T sp_cache_384_t sp_cache_384[FP_ENTRIES];
27768 /* Index of last entry in cache. */
27769 static THREAD_LS_T int sp_cache_384_last = -1;
27770 /* Cache has been initialized. */
27771 static THREAD_LS_T int sp_cache_384_inited = 0;
27772 
27773 #ifndef HAVE_THREAD_LS
27774     static volatile int initCacheMutex_384 = 0;
27775     static wolfSSL_Mutex sp_cache_384_lock;
27776 #endif
27777 
27778 /* Get the cache entry for the point.
27779  *
27780  * g      [in]   Point scalar multipling.
27781  * cache  [out]  Cache table to use.
27782  */
sp_ecc_get_cache_384(const sp_point_384 * g,sp_cache_384_t ** cache)27783 static void sp_ecc_get_cache_384(const sp_point_384* g, sp_cache_384_t** cache)
27784 {
27785     int i;
27786     int j;
27787     uint32_t least;
27788 
27789     if (sp_cache_384_inited == 0) {
27790         for (i=0; i<FP_ENTRIES; i++) {
27791             sp_cache_384[i].set = 0;
27792         }
27793         sp_cache_384_inited = 1;
27794     }
27795 
27796     /* Compare point with those in cache. */
27797     for (i=0; i<FP_ENTRIES; i++) {
27798         if (!sp_cache_384[i].set)
27799             continue;
27800 
27801         if (sp_384_cmp_equal_12(g->x, sp_cache_384[i].x) &
27802                            sp_384_cmp_equal_12(g->y, sp_cache_384[i].y)) {
27803             sp_cache_384[i].cnt++;
27804             break;
27805         }
27806     }
27807 
27808     /* No match. */
27809     if (i == FP_ENTRIES) {
27810         /* Find empty entry. */
27811         i = (sp_cache_384_last + 1) % FP_ENTRIES;
27812         for (; i != sp_cache_384_last; i=(i+1)%FP_ENTRIES) {
27813             if (!sp_cache_384[i].set) {
27814                 break;
27815             }
27816         }
27817 
27818         /* Evict least used. */
27819         if (i == sp_cache_384_last) {
27820             least = sp_cache_384[0].cnt;
27821             for (j=1; j<FP_ENTRIES; j++) {
27822                 if (sp_cache_384[j].cnt < least) {
27823                     i = j;
27824                     least = sp_cache_384[i].cnt;
27825                 }
27826             }
27827         }
27828 
27829         XMEMCPY(sp_cache_384[i].x, g->x, sizeof(sp_cache_384[i].x));
27830         XMEMCPY(sp_cache_384[i].y, g->y, sizeof(sp_cache_384[i].y));
27831         sp_cache_384[i].set = 1;
27832         sp_cache_384[i].cnt = 1;
27833     }
27834 
27835     *cache = &sp_cache_384[i];
27836     sp_cache_384_last = i;
27837 }
27838 #endif /* FP_ECC */
27839 
27840 /* Multiply the base point of P384 by the scalar and return the result.
27841  * If map is true then convert result to affine coordinates.
27842  *
27843  * r     Resulting point.
27844  * g     Point to multiply.
27845  * k     Scalar to multiply by.
27846  * map   Indicates whether to convert result to affine.
27847  * ct    Constant time required.
27848  * heap  Heap to use for allocation.
27849  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
27850  */
sp_384_ecc_mulmod_12(sp_point_384 * r,const sp_point_384 * g,const sp_digit * k,int map,int ct,void * heap)27851 static int sp_384_ecc_mulmod_12(sp_point_384* r, const sp_point_384* g, const sp_digit* k,
27852         int map, int ct, void* heap)
27853 {
27854 #ifndef FP_ECC
27855     return sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap);
27856 #else
27857     sp_digit tmp[2 * 12 * 7];
27858     sp_cache_384_t* cache;
27859     int err = MP_OKAY;
27860 
27861 #ifndef HAVE_THREAD_LS
27862     if (initCacheMutex_384 == 0) {
27863          wc_InitMutex(&sp_cache_384_lock);
27864          initCacheMutex_384 = 1;
27865     }
27866     if (wc_LockMutex(&sp_cache_384_lock) != 0)
27867        err = BAD_MUTEX_E;
27868 #endif /* HAVE_THREAD_LS */
27869 
27870     if (err == MP_OKAY) {
27871         sp_ecc_get_cache_384(g, &cache);
27872         if (cache->cnt == 2)
27873             sp_384_gen_stripe_table_12(g, cache->table, tmp, heap);
27874 
27875 #ifndef HAVE_THREAD_LS
27876         wc_UnLockMutex(&sp_cache_384_lock);
27877 #endif /* HAVE_THREAD_LS */
27878 
27879         if (cache->cnt < 2) {
27880             err = sp_384_ecc_mulmod_fast_12(r, g, k, map, ct, heap);
27881         }
27882         else {
27883             err = sp_384_ecc_mulmod_stripe_12(r, g, cache->table, k,
27884                     map, ct, heap);
27885         }
27886     }
27887 
27888     return err;
27889 #endif
27890 }
27891 
27892 #endif /* WOLFSSL_SP_SMALL */
27893 /* Multiply the point by the scalar and return the result.
27894  * If map is true then convert result to affine coordinates.
27895  *
27896  * km    Scalar to multiply by.
27897  * p     Point to multiply.
27898  * r     Resulting point.
27899  * map   Indicates whether to convert result to affine.
27900  * heap  Heap to use for allocation.
27901  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
27902  */
sp_ecc_mulmod_384(const mp_int * km,const ecc_point * gm,ecc_point * r,int map,void * heap)27903 int sp_ecc_mulmod_384(const mp_int* km, const ecc_point* gm, ecc_point* r,
27904         int map, void* heap)
27905 {
27906 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27907     sp_point_384* point = NULL;
27908     sp_digit* k = NULL;
27909 #else
27910     sp_point_384 point[1];
27911     sp_digit k[12];
27912 #endif
27913     int err = MP_OKAY;
27914 
27915 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27916     point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap,
27917                                          DYNAMIC_TYPE_ECC);
27918     if (point == NULL)
27919         err = MEMORY_E;
27920     if (err == MP_OKAY) {
27921         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
27922                                DYNAMIC_TYPE_ECC);
27923         if (k == NULL)
27924             err = MEMORY_E;
27925     }
27926 #endif
27927 
27928     if (err == MP_OKAY) {
27929         sp_384_from_mp(k, 12, km);
27930         sp_384_point_from_ecc_point_12(point, gm);
27931 
27932             err = sp_384_ecc_mulmod_12(point, point, k, map, 1, heap);
27933     }
27934     if (err == MP_OKAY) {
27935         err = sp_384_point_to_ecc_point_12(point, r);
27936     }
27937 
27938 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27939     if (k != NULL)
27940         XFREE(k, heap, DYNAMIC_TYPE_ECC);
27941     if (point != NULL)
27942         XFREE(point, heap, DYNAMIC_TYPE_ECC);
27943 #endif
27944 
27945     return err;
27946 }
27947 
27948 /* Multiply the point by the scalar, add point a and return the result.
27949  * If map is true then convert result to affine coordinates.
27950  *
27951  * km      Scalar to multiply by.
27952  * p       Point to multiply.
27953  * am      Point to add to scalar mulitply result.
27954  * inMont  Point to add is in montgomery form.
27955  * r       Resulting point.
27956  * map     Indicates whether to convert result to affine.
27957  * heap    Heap to use for allocation.
27958  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
27959  */
sp_ecc_mulmod_add_384(const mp_int * km,const ecc_point * gm,const ecc_point * am,int inMont,ecc_point * r,int map,void * heap)27960 int sp_ecc_mulmod_add_384(const mp_int* km, const ecc_point* gm,
27961     const ecc_point* am, int inMont, ecc_point* r, int map, void* heap)
27962 {
27963 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27964     sp_point_384* point = NULL;
27965     sp_digit* k = NULL;
27966 #else
27967     sp_point_384 point[2];
27968     sp_digit k[12 + 12 * 2 * 6];
27969 #endif
27970     sp_point_384* addP = NULL;
27971     sp_digit* tmp = NULL;
27972     int err = MP_OKAY;
27973 
27974 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
27975     point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap,
27976                                          DYNAMIC_TYPE_ECC);
27977     if (point == NULL)
27978         err = MEMORY_E;
27979     if (err == MP_OKAY) {
27980         k = (sp_digit*)XMALLOC(
27981             sizeof(sp_digit) * (12 + 12 * 2 * 6), heap,
27982             DYNAMIC_TYPE_ECC);
27983         if (k == NULL)
27984             err = MEMORY_E;
27985     }
27986 #endif
27987 
27988     if (err == MP_OKAY) {
27989         addP = point + 1;
27990         tmp = k + 12;
27991 
27992         sp_384_from_mp(k, 12, km);
27993         sp_384_point_from_ecc_point_12(point, gm);
27994         sp_384_point_from_ecc_point_12(addP, am);
27995     }
27996     if ((err == MP_OKAY) && (!inMont)) {
27997         err = sp_384_mod_mul_norm_12(addP->x, addP->x, p384_mod);
27998     }
27999     if ((err == MP_OKAY) && (!inMont)) {
28000         err = sp_384_mod_mul_norm_12(addP->y, addP->y, p384_mod);
28001     }
28002     if ((err == MP_OKAY) && (!inMont)) {
28003         err = sp_384_mod_mul_norm_12(addP->z, addP->z, p384_mod);
28004     }
28005     if (err == MP_OKAY) {
28006             err = sp_384_ecc_mulmod_12(point, point, k, 0, 0, heap);
28007     }
28008     if (err == MP_OKAY) {
28009             sp_384_proj_point_add_12(point, point, addP, tmp);
28010 
28011         if (map) {
28012                 sp_384_map_12(point, point, tmp);
28013         }
28014 
28015         err = sp_384_point_to_ecc_point_12(point, r);
28016     }
28017 
28018 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
28019     if (k != NULL)
28020         XFREE(k, heap, DYNAMIC_TYPE_ECC);
28021     if (point != NULL)
28022         XFREE(point, heap, DYNAMIC_TYPE_ECC);
28023 #endif
28024 
28025     return err;
28026 }
28027 
28028 #ifdef WOLFSSL_SP_SMALL
28029 /* Striping precomputation table.
28030  * 4 points combined into a table of 16 points.
28031  * Distance of 96 between points.
28032  */
28033 static const sp_table_entry_384 p384_table[16] = {
28034     /* 0 */
28035     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
28036       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
28037     /* 1 */
28038     { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
28039         0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
28040       { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
28041         0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
28042     /* 2 */
28043     { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
28044         0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
28045       { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
28046         0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
28047     /* 3 */
28048     { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
28049         0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
28050       { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
28051         0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
28052     /* 4 */
28053     { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
28054         0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
28055       { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
28056         0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
28057     /* 5 */
28058     { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
28059         0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
28060       { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
28061         0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
28062     /* 6 */
28063     { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
28064         0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
28065       { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
28066         0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
28067     /* 7 */
28068     { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
28069         0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
28070       { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
28071         0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
28072     /* 8 */
28073     { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
28074         0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
28075       { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
28076         0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
28077     /* 9 */
28078     { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
28079         0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
28080       { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
28081         0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
28082     /* 10 */
28083     { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
28084         0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
28085       { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
28086         0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
28087     /* 11 */
28088     { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
28089         0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
28090       { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
28091         0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
28092     /* 12 */
28093     { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
28094         0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
28095       { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
28096         0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
28097     /* 13 */
28098     { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
28099         0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
28100       { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
28101         0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
28102     /* 14 */
28103     { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
28104         0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
28105       { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
28106         0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
28107     /* 15 */
28108     { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
28109         0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
28110       { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
28111         0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
28112 };
28113 
28114 /* Multiply the base point of P384 by the scalar and return the result.
28115  * If map is true then convert result to affine coordinates.
28116  *
28117  * Stripe implementation.
28118  * Pre-generated: 2^0, 2^96, ...
28119  * Pre-generated: products of all combinations of above.
28120  * 4 doubles and adds (with qz=1)
28121  *
28122  * r     Resulting point.
28123  * k     Scalar to multiply by.
28124  * map   Indicates whether to convert result to affine.
28125  * ct    Constant time required.
28126  * heap  Heap to use for allocation.
28127  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
28128  */
sp_384_ecc_mulmod_base_12(sp_point_384 * r,const sp_digit * k,int map,int ct,void * heap)28129 static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
28130         int map, int ct, void* heap)
28131 {
28132     return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
28133                                       k, map, ct, heap);
28134 }
28135 
28136 #else
28137 /* Striping precomputation table.
28138  * 8 points combined into a table of 256 points.
28139  * Distance of 48 between points.
28140  */
28141 static const sp_table_entry_384 p384_table[256] = {
28142     /* 0 */
28143     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
28144       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
28145     /* 1 */
28146     { { 0x49c0b528,0x3dd07566,0xa0d6ce38,0x20e378e2,0x541b4d6e,0x879c3afc,
28147         0x59a30eff,0x64548684,0x614ede2b,0x812ff723,0x299e1513,0x4d3aadc2 },
28148       { 0x4b03a4fe,0x23043dad,0x7bb4a9ac,0xa1bfa8bf,0x2e83b050,0x8bade756,
28149         0x68f4ffd9,0xc6c35219,0x3969a840,0xdd800226,0x5a15c5e9,0x2b78abc2 } },
28150     /* 2 */
28151     { { 0x2b0c535b,0x29864753,0x70506296,0x90dd6953,0x216ab9ac,0x038cd6b4,
28152         0xbe12d76a,0x3df9b7b7,0x5f347bdb,0x13f4d978,0x13e94489,0x222c5c9c },
28153       { 0x2680dc64,0x5f8e796f,0x58352417,0x120e7cb7,0xd10740b8,0x254b5d8a,
28154         0x5337dee6,0xc38b8efb,0x94f02247,0xf688c2e1,0x6c25bc4c,0x7b5c75f3 } },
28155     /* 3 */
28156     { { 0x9edffea5,0xe26a3cc3,0x37d7e9fc,0x35bbfd1c,0x9bde3ef6,0xf0e7700d,
28157         0x1a538f5a,0x0380eb47,0x05bf9eb3,0x2e9da8bb,0x1a460c3e,0xdbb93c73 },
28158       { 0xf526b605,0x37dba260,0xfd785537,0x95d4978e,0xed72a04a,0x24ed793a,
28159         0x76005b1a,0x26948377,0x9e681f82,0x99f557b9,0xd64954ef,0xae5f9557 } },
28160     /* 4 */
28161     { { 0xf26feef9,0x24480c57,0x3a0e1240,0xc31a2694,0x273e2bc7,0x735002c3,
28162         0x3ef1ed4c,0x8c42e9c5,0x7f4948e8,0x028babf6,0x8a978632,0x6a502f43 },
28163       { 0xb74536fe,0xf5f13a46,0xd8a9f0eb,0x1d218bab,0x37232768,0x30f36bcc,
28164         0x576e8c18,0xc5317b31,0x9bbcb766,0xef1d57a6,0xb3e3d4dc,0x917c4930 } },
28165     /* 5 */
28166     { { 0xe349ddd0,0x11426e2e,0x9b2fc250,0x9f117ef9,0xec0174a6,0xff36b480,
28167         0x18458466,0x4f4bde76,0x05806049,0x2f2edb6d,0x19dfca92,0x8adc75d1 },
28168       { 0xb7d5a7ce,0xa619d097,0xa34411e9,0x874275e5,0x0da4b4ef,0x5403e047,
28169         0x77901d8f,0x2ebaafd9,0xa747170f,0x5e63ebce,0x7f9d8036,0x12a36944 } },
28170     /* 6 */
28171     { { 0x4fc52870,0x28f9c07a,0x1a53a961,0xce0b3748,0x0e1828d9,0xd550fa18,
28172         0x6adb225a,0xa24abaf7,0x6e58a348,0xd11ed0a5,0x948acb62,0xf3d811e6 },
28173       { 0x4c61ed22,0x8618dd77,0x80b47c9d,0x0bb747f9,0xde6b8559,0x22bf796f,
28174         0x680a21e9,0xfdfd1c6d,0x2af2c9dd,0xc0db1577,0xc1e90f3d,0xa09379e6 } },
28175     /* 7 */
28176     { { 0xe085c629,0x386c66ef,0x095bc89a,0x5fc2a461,0x203f4b41,0x1353d631,
28177         0x7e4bd8f5,0x7ca1972b,0xa7df8ce9,0xb077380a,0xee7e4ea3,0xd8a90389 },
28178       { 0xe7b14461,0x1bc74dc7,0x0c9c4f78,0xdc2cb014,0x84ef0a10,0x52b4b3a6,
28179         0x20327fe2,0xbde6ea5d,0x660f9615,0xb71ec435,0xb8ad8173,0xeede5a04 } },
28180     /* 8 */
28181     { { 0x893b9a2d,0x5584cbb3,0x00850c5d,0x820c660b,0x7df2d43d,0x4126d826,
28182         0x0109e801,0xdd5bbbf0,0x38172f1c,0x85b92ee3,0xf31430d9,0x609d4f93 },
28183       { 0xeadaf9d6,0x1e059a07,0x0f125fb0,0x70e6536c,0x560f20e7,0xd6220751,
28184         0x7aaf3a9a,0xa59489ae,0x64bae14e,0x7b70e2f6,0x76d08249,0x0dd03701 } },
28185     /* 9 */
28186     { { 0x8510521f,0x4cc13be8,0xf724cc17,0x87315ba9,0x353dc263,0xb49d83bb,
28187         0x0c279257,0x8b677efe,0xc93c9537,0x510a1c1c,0xa4702c99,0x33e30cd8 },
28188       { 0x2208353f,0xf0ffc89d,0xced42b2b,0x0170fa8d,0x26e2a5f5,0x090851ed,
28189         0xecb52c96,0x81276455,0x7fe1adf4,0x0646c4e1,0xb0868eab,0x513f047e } },
28190     /* 10 */
28191     { { 0xdf5bdf53,0xc07611f4,0x58b11a6d,0x45d331a7,0x1c4ee394,0x58965daf,
28192         0x5a5878d1,0xba8bebe7,0x82dd3025,0xaecc0a18,0xa923eb8b,0xcf2a3899 },
28193       { 0xd24fd048,0xf98c9281,0x8bbb025d,0x841bfb59,0xc9ab9d53,0xb8ddf8ce,
28194         0x7fef044e,0x538a4cb6,0x23236662,0x092ac21f,0x0b66f065,0xa919d385 } },
28195     /* 11 */
28196     { { 0x85d480d8,0x3db03b40,0x1b287a7d,0x8cd9f479,0x4a8f3bae,0x8f24dc75,
28197         0x3db41892,0x482eb800,0x9c56e0f5,0x38bf9eb3,0x9a91dc6f,0x8b977320 },
28198       { 0x7209cfc2,0xa31b05b2,0x05b2db70,0x4c49bf85,0xd619527b,0x56462498,
28199         0x1fac51ba,0x3fe51039,0xab4b8342,0xfb04f55e,0x04c6eabf,0xc07c10dc } },
28200     /* 12 */
28201     { { 0xdb32f048,0xad22fe4c,0x475ed6df,0x5f23bf91,0xaa66b6cb,0xa50ce0c0,
28202         0xf03405c0,0xdf627a89,0xf95e2d6a,0x3674837d,0xba42e64e,0x081c95b6 },
28203       { 0xe71d6ceb,0xeba3e036,0x6c6b0271,0xb45bcccf,0x0684701d,0x67b47e63,
28204         0xe712523f,0x60f8f942,0x5cd47adc,0x82423472,0x87649cbb,0x83027d79 } },
28205     /* 13 */
28206     { { 0x3615b0b8,0xb3929ea6,0xa54dac41,0xb41441fd,0xb5b6a368,0x8995d556,
28207         0x167ef05e,0xa80d4529,0x6d25a27f,0xf6bcb4a1,0x7bd55b68,0x210d6a4c },
28208       { 0x25351130,0xf3804abb,0x903e37eb,0x1d2df699,0x084c25c8,0x5f201efc,
28209         0xa1c68e91,0x31a28c87,0x563f62a5,0x81dad253,0xd6c415d4,0x5dd6de70 } },
28210     /* 14 */
28211     { { 0x846612ce,0x29f470fd,0xda18d997,0x986f3eec,0x2f34af86,0x6b84c161,
28212         0x46ddaf8b,0x5ef0a408,0xe49e795f,0x14405a00,0xaa2f7a37,0x5f491b16 },
28213       { 0xdb41b38d,0xc7f07ae4,0x18fbfcaa,0xef7d119e,0x14443b19,0x3a18e076,
28214         0x79a19926,0x4356841a,0xe2226fbe,0x91f4a91c,0x3cc88721,0xdc77248c } },
28215     /* 15 */
28216     { { 0xe4b1ec9d,0xd570ff1a,0xe7eef706,0x21d23e0e,0xca19e086,0x3cde40f4,
28217         0xcd4bb270,0x7d6523c4,0xbf13aa6c,0x16c1f06c,0xd14c4b60,0x5aa7245a },
28218       { 0x44b74de8,0x37f81467,0x620a934e,0x839e7a17,0xde8b1aa1,0xf74d14e8,
28219         0xf30d75e2,0x8789fa51,0xc81c261e,0x09b24052,0x33c565ee,0x654e2678 } },
28220     /* 16 */
28221     { { 0x2f9fbe67,0x378205de,0x7f728e44,0xc4afcb83,0x682e00f1,0xdbcec06c,
28222         0x114d5423,0xf2a145c3,0x7a52463e,0xa01d9874,0x7d717b0a,0xfc0935b1 },
28223       { 0xd4d01f95,0x9653bc4f,0x9560ad34,0x9aa83ea8,0xaf8e3f3f,0xf77943dc,
28224         0xe86fe16e,0x70774a10,0xbf9ffdcf,0x6b62e6f1,0x588745c9,0x8a72f39e } },
28225     /* 17 */
28226     { { 0x2341c342,0x73ade4da,0xea704422,0xdd326e54,0x3741cef3,0x336c7d98,
28227         0x59e61549,0x1eafa00d,0xbd9a3efd,0xcd3ed892,0xc5c6c7e4,0x03faf26c },
28228       { 0x3045f8ac,0x087e2fcf,0x174f1e73,0x14a65532,0xfe0af9a7,0x2cf84f28,
28229         0x2cdc935b,0xddfd7a84,0x6929c895,0x4c0f117b,0x4c8bcfcc,0x356572d6 } },
28230     /* 18 */
28231     { { 0x7d8c1bba,0x7ecbac01,0x90b0f3d5,0x6058f9c3,0xf6197d0f,0xaee116e3,
28232         0x4033b128,0xc4dd7068,0xc209b983,0xf084dba6,0x831dbc4a,0x97c7c2cf },
28233       { 0xf96010e8,0x2f4e61dd,0x529faa17,0xd97e4e20,0x69d37f20,0x4ee66660,
28234         0x3d366d72,0xccc139ed,0x13488e0f,0x690b6ee2,0xf3a6d533,0x7cad1dc5 } },
28235     /* 19 */
28236     { { 0xda57a41f,0x660a9a81,0xec0039b6,0xe74a0412,0x5e1dad15,0x42343c6b,
28237         0x46681d4c,0x284f3ff5,0x63749e89,0xb51087f1,0x6f9f2f13,0x070f23cc },
28238       { 0x5d186e14,0x542211da,0xfddb0dff,0x84748f37,0xdb1f4180,0x41a3aab4,
28239         0xa6402d0e,0x25ed667b,0x02f58355,0x2f2924a9,0xfa44a689,0x5844ee7c } },
28240     /* 20 */
28241     { { 0x3f3b236f,0xfab08607,0x81e221da,0x19e9d41d,0x3927b428,0xf3f6571e,
28242         0x7550f1f6,0x4348a933,0xa85e62f0,0x7167b996,0x7f5452bf,0x62d43759 },
28243       { 0xf2955926,0xd85feb9e,0x6df78353,0x440a561f,0x9ca36b59,0x389668ec,
28244         0xa22da016,0x052bf1a1,0xf6093254,0xbdfbff72,0xe22209f3,0x94e50f28 } },
28245     /* 21 */
28246     { { 0x3062e8af,0x90b2e5b3,0xe8a3d369,0xa8572375,0x201db7b1,0x3fe1b00b,
28247         0xee651aa2,0xe926def0,0xb9b10ad7,0x6542c9be,0xa2fcbe74,0x098e309b },
28248       { 0xfff1d63f,0x779deeb3,0x20bfd374,0x23d0e80a,0x8768f797,0x8452bb3b,
28249         0x1f952856,0xcf75bb4d,0x29ea3faa,0x8fe6b400,0x81373a53,0x12bd3e40 } },
28250     /* 22 */
28251     { { 0x104cbba5,0xc023780d,0xfa35dd4c,0x6207e747,0x1ca9b6a3,0x35c23928,
28252         0x97987b10,0x4ff19be8,0x8022eee8,0xb8476bbf,0xd3bbe74d,0xaa0a4a14 },
28253       { 0x187d4543,0x20f94331,0x79f6e066,0x32153870,0xac7e82e1,0x83b0f74e,
28254         0x828f06ab,0xa7748ba2,0xc26ef35f,0xc5f0298a,0x8e9a7dbd,0x0f0c5070 } },
28255     /* 23 */
28256     { { 0xdef029dd,0x0c5c244c,0x850661b8,0x3dabc687,0xfe11d981,0x9992b865,
28257         0x6274dbad,0xe9801b8f,0x098da242,0xe54e6319,0x91a53d08,0x9929a91a },
28258       { 0x35285887,0x37bffd72,0xf1418102,0xbc759425,0xfd2e6e20,0x9280cc35,
28259         0xfbc42ee5,0x735c600c,0x8837619a,0xb7ad2864,0xa778c57b,0xa3627231 } },
28260     /* 24 */
28261     { { 0x91361ed8,0xae799b5c,0x6c63366c,0x47d71b75,0x1b265a6a,0x54cdd521,
28262         0x98d77b74,0xe0215a59,0xbab29db0,0x4424d9b7,0x7fd9e536,0x8b0ffacc },
28263       { 0x37b5d9ef,0x46d85d12,0xbfa91747,0x5b106d62,0x5f99ba2d,0xed0479f8,
28264         0x1d104de4,0x0e6f3923,0x25e8983f,0x83a84c84,0xf8105a70,0xa9507e0a } },
28265     /* 25 */
28266     { { 0x14cf381c,0xf6c68a6e,0xc22e31cc,0xaf9d27bd,0xaa8a5ccb,0x23568d4d,
28267         0xe338e4d2,0xe431eec0,0x8f52ad1f,0xf1a828fe,0xe86acd80,0xdb6a0579 },
28268       { 0x4507832a,0x2885672e,0x887e5289,0x73fc275f,0x05610d08,0x65f80278,
28269         0x075ff5b0,0x8d9b4554,0x09f712b5,0x3a8e8fb1,0x2ebe9cf2,0x39f0ac86 } },
28270     /* 26 */
28271     { { 0x4c52edf5,0xd8fabf78,0xa589ae53,0xdcd737e5,0xd791ab17,0x94918bf0,
28272         0xbcff06c9,0xb5fbd956,0xdca46d45,0xf6d3032e,0x41a3e486,0x2cdff7e1 },
28273       { 0x61f47ec8,0x6674b3ba,0xeef84608,0x8a882163,0x4c687f90,0xa257c705,
28274         0xf6cdf227,0xe30cb2ed,0x7f6ea846,0x2c4c64ca,0xcc6bcd3c,0x186fa17c } },
28275     /* 27 */
28276     { { 0x1dfcb91e,0x48a3f536,0x646d358a,0x83595e13,0x91128798,0xbd15827b,
28277         0x2187757a,0x3ce612b8,0x61bd7372,0x873150a1,0xb662f568,0xf4684530 },
28278       { 0x401896f6,0x8833950b,0x77f3e090,0xe11cb89a,0x48e7f4a5,0xb2f12cac,
28279         0xf606677e,0x313dd769,0x16579f93,0xfdcf08b3,0x46b8f22b,0x6429cec9 } },
28280     /* 28 */
28281     { { 0xbb75f9a4,0x4984dd54,0x29d3b570,0x4aef06b9,0x3d6e4c1e,0xb5f84ca2,
28282         0xb083ef35,0x24c61c11,0x392ca9ff,0xce4a7392,0x6730a800,0x865d6517 },
28283       { 0x722b4a2b,0xca3dfe76,0x7b083e0e,0x12c04bf9,0x1b86b8a5,0x803ce5b5,
28284         0x6a7e3e0c,0x3fc7632d,0xc81adbe4,0xc89970c2,0x120e16b1,0x3cbcd3ad } },
28285     /* 29 */
28286     { { 0xec30ce93,0xfbfb4cc7,0xb72720a2,0x10ed6c7d,0x47b55500,0xec675bf7,
28287         0x333ff7c3,0x90725903,0x5075bfc0,0xc7c3973e,0x07acf31b,0xb049ecb0 },
28288       { 0x4f58839c,0xb4076eaf,0xa2b05e4f,0x101896da,0xab40c66e,0x3f6033b0,
28289         0xc8d864ba,0x19ee9eeb,0x47bf6d2a,0xeb6cf155,0xf826477d,0x8e5a9663 } },
28290     /* 30 */
28291     { { 0xf7fbd5e1,0x69e62fdd,0x76912b1d,0x38ecfe54,0xd1da3bfb,0x845a3d56,
28292         0x1c86f0d4,0x0494950e,0x3bc36ce8,0x83cadbf9,0x4fccc8d1,0x41fce572 },
28293       { 0x8332c144,0x05f939c2,0x0871e46e,0xb17f248b,0x66e8aff6,0x3d8534e2,
28294         0x3b85c629,0x1d06f1dc,0xa3131b73,0xdb06a32e,0x8b3f64e5,0xf295184d } },
28295     /* 31 */
28296     { { 0x36ddc103,0xd9653ff7,0x95ef606f,0x25f43e37,0xfe06dce8,0x09e301fc,
28297         0x30b6eebf,0x85af2341,0x0ff56b20,0x79b12b53,0xfe9a3c6b,0x9b4fb499 },
28298       { 0x51d27ac2,0x0154f892,0x56ca5389,0xd33167e3,0xafc065a6,0x7828ec1f,
28299         0x7f746c9b,0x0959a258,0x0c44f837,0xb18f1be3,0xc4132fdb,0xa7946117 } },
28300     /* 32 */
28301     { { 0x5e3c647b,0xc0426b77,0x8cf05348,0xbfcbd939,0x172c0d3d,0x31d312e3,
28302         0xee754737,0x5f49fde6,0x6da7ee61,0x895530f0,0xe8b3a5fb,0xcf281b0a },
28303       { 0x41b8a543,0xfd149735,0x3080dd30,0x41a625a7,0x653908cf,0xe2baae07,
28304         0xba02a278,0xc3d01436,0x7b21b8f8,0xa0d0222e,0xd7ec1297,0xfdc270e9 } },
28305     /* 33 */
28306     { { 0xbc7f41d6,0x00873c0c,0x1b7ad641,0xd976113e,0x238443fb,0x2a536ff4,
28307         0x41e62e45,0x030d00e2,0x5f545fc6,0x532e9867,0x8e91208c,0xcd033108 },
28308       { 0x9797612c,0xd1a04c99,0xeea674e2,0xd4393e02,0xe19742a1,0xd56fa69e,
28309         0x85f0590e,0xdd2ab480,0x48a2243d,0xa5cefc52,0x54383f41,0x48cc67b6 } },
28310     /* 34 */
28311     { { 0xfc14ab48,0x4e50430e,0x26706a74,0x195b7f4f,0xcc881ff6,0x2fe8a228,
28312         0xd945013d,0xb1b968e2,0x4b92162b,0x936aa579,0x364e754a,0x4fb766b7 },
28313       { 0x31e1ff7f,0x13f93bca,0xce4f2691,0x696eb5ca,0xa2b09e02,0xff754bf8,
28314         0xe58e3ff8,0x58f13c9c,0x1678c0b0,0xb757346f,0xa86692b3,0xd54200db } },
28315     /* 35 */
28316     { { 0x6dda1265,0x9a030bbd,0xe89718dd,0xf7b4f3fc,0x936065b8,0xa6a4931f,
28317         0x5f72241c,0xbce72d87,0x65775857,0x6cbb51cb,0x4e993675,0xc7161815 },
28318       { 0x2ee32189,0xe81a0f79,0x277dc0b2,0xef2fab26,0xb71f469f,0x9e64f6fe,
28319         0xdfdaf859,0xb448ce33,0xbe6b5df1,0x3f5c1c4c,0x1de45f7b,0xfb8dfb00 } },
28320     /* 36 */
28321     { { 0x4d5bb921,0xc7345fa7,0x4d2b667e,0x5c7e04be,0x282d7a3e,0x47ed3a80,
28322         0x7e47b2a4,0x5c2777f8,0x08488e2e,0x89b3b100,0xb2eb5b45,0x9aad77c2 },
28323       { 0xdaac34ae,0xd681bca7,0x26afb326,0x2452e4e5,0x41a1ee14,0x0c887924,
28324         0xc2407ade,0x743b04d4,0xfc17a2ac,0xcb5e999b,0x4a701a06,0x4dca2f82 } },
28325     /* 37 */
28326     { { 0x1127bc1a,0x68e31ca6,0x17ead3be,0xa3edd59b,0xe25f5a15,0x67b6b645,
28327         0xa420e15e,0x76221794,0x4b1e872e,0x794fd83b,0xb2dece1b,0x7cab3f03 },
28328       { 0xca9b3586,0x7119bf15,0x4d250bd7,0xa5545924,0xcc6bcf24,0x173633ea,
28329         0xb1b6f884,0x9bd308c2,0x447d38c3,0x3bae06f5,0xf341fe1c,0x54dcc135 } },
28330     /* 38 */
28331     { { 0x943caf0d,0x56d3598d,0x225ff133,0xce044ea9,0x563fadea,0x9edf6a7c,
28332         0x73e8dc27,0x632eb944,0x3190dcab,0x814b467e,0x6dbb1e31,0x2d4f4f31 },
28333       { 0xa143b7ca,0x8d69811c,0xde7cf950,0x4ec1ac32,0x37b5fe82,0x223ab5fd,
28334         0x9390f1d9,0xe82616e4,0x75804610,0xabff4b20,0x875b08f0,0x11b9be15 } },
28335     /* 39 */
28336     { { 0x3bbe682c,0x4ae31a3d,0x74eef2dd,0xbc7c5d26,0x3c47dd40,0x92afd10a,
28337         0xc14ab9e1,0xec7e0a3b,0xb2e495e4,0x6a6c3dd1,0x309bcd85,0x085ee5e9 },
28338       { 0x8c2e67fd,0xf381a908,0xe261eaf2,0x32083a80,0x96deee15,0x0fcd6a49,
28339         0x5e524c79,0xe3b8fb03,0x1d5b08b9,0x8dc360d9,0x7f26719f,0x3a06e2c8 } },
28340     /* 40 */
28341     { { 0x7237cac0,0x5cd9f5a8,0x43586794,0x93f0b59d,0xe94f6c4e,0x4384a764,
28342         0xb62782d3,0x8304ed2b,0xcde06015,0x0b8db8b3,0x5dbe190f,0x4336dd53 },
28343       { 0x92ab473a,0x57443553,0xbe5ed046,0x031c7275,0x21909aa4,0x3e78678c,
28344         0x99202ddb,0x4ab7e04f,0x6977e635,0x2648d206,0x093198be,0xd427d184 } },
28345     /* 41 */
28346     { { 0x0f9b5a31,0x822848f5,0xbaadb62a,0xbb003468,0x3357559c,0x233a0472,
28347         0x79aee843,0x49ef6880,0xaeb9e1e3,0xa89867a0,0x1f6f9a55,0xc151931b },
28348       { 0xad74251e,0xd264eb0b,0x4abf295e,0x37b9b263,0x04960d10,0xb600921b,
28349         0x4da77dc0,0x0de53dbc,0xd2b18697,0x01d9bab3,0xf7156ddf,0xad54ec7a } },
28350     /* 42 */
28351     { { 0x79efdc58,0x8e74dc35,0x4ff68ddb,0x456bd369,0xd32096a5,0x724e74cc,
28352         0x386783d0,0xe41cff42,0x7c70d8a4,0xa04c7f21,0xe61a19a2,0x41199d2f },
28353       { 0x29c05dd2,0xd389a3e0,0xe7e3fda9,0x535f2a6b,0x7c2b4df8,0x26ecf72d,
28354         0xfe745294,0x678275f4,0x9d23f519,0x6319c9cc,0x88048fc4,0x1e05a02d } },
28355     /* 43 */
28356     { { 0xd4d5ffe8,0x75cc8e2e,0xdbea17f2,0xf8bb4896,0xcee3cb4a,0x35059790,
28357         0xa47c6165,0x4c06ee85,0x92935d2f,0xf98fff25,0x32ffd7c7,0x34c4a572 },
28358       { 0xea0376a2,0xc4b14806,0x4f115e02,0x2ea5e750,0x1e55d7c0,0x532d76e2,
28359         0xf31044da,0x68dc9411,0x71b77993,0x9272e465,0x93a8cfd5,0xadaa38bb } },
28360     /* 44 */
28361     { { 0x7d4ed72a,0x4bf0c712,0xba1f79a3,0xda0e9264,0xf4c39ea4,0x48c0258b,
28362         0x2a715138,0xa5394ed8,0xbf06c660,0x4af511ce,0xec5c37cd,0xfcebceef },
28363       { 0x779ae8c1,0xf23b75aa,0xad1e606e,0xdeff59cc,0x22755c82,0xf3f526fd,
28364         0xbb32cefd,0x64c5ab44,0x915bdefd,0xa96e11a2,0x1143813e,0xab19746a } },
28365     /* 45 */
28366     { { 0xec837d7d,0x43c78585,0xb8ee0ba4,0xca5b6fbc,0xd5dbb5ee,0x34e924d9,
28367         0xbb4f1ca5,0x3f4fa104,0x398640f7,0x15458b72,0xd7f407ea,0x4231faa9 },
28368       { 0xf96e6896,0x53e0661e,0xd03b0f9d,0x554e4c69,0x9c7858d1,0xd4fcb07b,
28369         0x52cb04fa,0x7e952793,0x8974e7f7,0x5f5f1574,0x6b6d57c8,0x2e3fa558 } },
28370     /* 46 */
28371     { { 0x6a9951a8,0x42cd4803,0x42792ad0,0xa8b15b88,0xabb29a73,0x18e8bcf9,
28372         0x409933e8,0xbfd9a092,0xefb88dc4,0x760a3594,0x40724458,0x14418863 },
28373       { 0x99caedc7,0x162a56ee,0x91d101c9,0x8fb12ecd,0x393202da,0xea671967,
28374         0xa4ccd796,0x1aac8c4a,0x1cf185a8,0x7db05036,0x8cfd095a,0x0c9f86cd } },
28375     /* 47 */
28376     { { 0x10b2a556,0x9a728147,0x327b70b2,0x767ca964,0x5e3799b7,0x04ed9e12,
28377         0x22a3eb2a,0x6781d2dc,0x0d9450ac,0x5bd116eb,0xa7ebe08a,0xeccac1fc },
28378       { 0xdc2d6e94,0xde68444f,0x35ecf21b,0x3621f429,0x29e03a2c,0x14e2d543,
28379         0x7d3e7f0a,0x53e42cd5,0x73ed00b9,0xbba26c09,0xc57d2272,0x00297c39 } },
28380     /* 48 */
28381     { { 0xb8243a7d,0x3aaaab10,0x8fa58c5b,0x6eeef93e,0x9ae7f764,0xf866fca3,
28382         0x61ab04d3,0x64105a26,0x03945d66,0xa3578d8a,0x791b848c,0xb08cd3e4 },
28383       { 0x756d2411,0x45edc5f8,0xa755128c,0xd4a790d9,0x49e5f6a0,0xc2cf0963,
28384         0xf649beaa,0xc66d267d,0x8467039e,0x3ce6d968,0x42f7816f,0x50046c6b } },
28385     /* 49 */
28386     { { 0x66425043,0x92ae1602,0xf08db890,0x1ff66afd,0x8f162ce5,0x386f5a7f,
28387         0xfcf5598f,0x18d2dea0,0x1a8ca18e,0x78372b3a,0x8cd0e6f7,0xdf0d20eb },
28388       { 0x75bb4045,0x7edd5e1d,0xb96d94b7,0x252a47ce,0x2c626776,0xbdb29358,
28389         0x40dd1031,0x853c3943,0x7d5f47fd,0x9dc9becf,0xbae4044a,0x27c2302f } },
28390     /* 50 */
28391     { { 0x8f2d49ce,0x2d1d208a,0x162df0a2,0x0d91aa02,0x09a07f65,0x9c5cce87,
28392         0x84339012,0xdf07238b,0x419442cd,0x5028e2c8,0x72062aba,0x2dcbd358 },
28393       { 0xe4680967,0xb5fbc3cb,0x9f92d72c,0x2a7bc645,0x116c369d,0x806c76e1,
28394         0x3177e8d8,0x5c50677a,0x4569df57,0x753739eb,0x36c3f40b,0x2d481ef6 } },
28395     /* 51 */
28396     { { 0xfea1103e,0x1a2d39fd,0x95f81b17,0xeaae5592,0xf59b264a,0xdbd0aa18,
28397         0xcb592ee0,0x90c39c1a,0x9750cca3,0xdf62f80d,0xdf97cc6c,0xda4d8283 },
28398       { 0x1e201067,0x0a6dd346,0x69fb1f6b,0x1531f859,0x1d60121f,0x4895e552,
28399         0x4c041c91,0x0b21aab0,0xbcc1ccf8,0x9d896c46,0x3141bde7,0xd24da3b3 } },
28400     /* 52 */
28401     { { 0x53b0a354,0x575a0537,0x0c6ddcd8,0x392ff2f4,0x56157b94,0x0b8e8cff,
28402         0x3b1b80d1,0x073e57bd,0x3fedee15,0x2a75e0f0,0xaa8e6f19,0x752380e4 },
28403       { 0x6558ffe9,0x1f4e227c,0x19ec5415,0x3a348618,0xf7997085,0xab382d5e,
28404         0xddc46ac2,0x5e6deaff,0xfc8d094c,0xe5144078,0xf60e37c6,0xf674fe51 } },
28405     /* 53 */
28406     { { 0xaf63408f,0x6fb87ae5,0xcd75a737,0xa39c36a9,0xcf4c618d,0x7833313f,
28407         0xf034c88d,0xfbcd4482,0x39b35288,0x4469a761,0x66b5d9c9,0x77a711c5 },
28408       { 0x944f8d65,0x4a695dc7,0x161aaba8,0xe6da5f65,0x24601669,0x8654e9c3,
28409         0x28ae7491,0xbc8b93f5,0x8f5580d8,0x5f1d1e83,0xcea32cc8,0x8ccf9a1a } },
28410     /* 54 */
28411     { { 0x7196fee2,0x28ab110c,0x874c8945,0x75799d63,0x29aedadd,0xa2629348,
28412         0x2be88ff4,0x9714cc7b,0xd58d60d6,0xf71293cf,0x32a564e9,0xda6b6cb3 },
28413       { 0x3dd821c2,0xf43fddb1,0x90dd323d,0xf2f2785f,0x048489f8,0x91246419,
28414         0xd24c6749,0x61660f26,0xc803c15c,0x961d9e8c,0xfaadc4c9,0x631c6158 } },
28415     /* 55 */
28416     { { 0xfd752366,0xacf2ebe0,0x139be88b,0xb93c340e,0x0f20179e,0x98f66485,
28417         0xff1da785,0x14820254,0x4f85c16e,0x5278e276,0x7aab1913,0xa246ee45 },
28418       { 0x53763b33,0x43861eb4,0x45c0bc0d,0xc49f03fc,0xad6b1ea1,0xafff16bc,
28419         0x6fd49c99,0xce33908b,0xf7fde8c3,0x5c51e9bf,0xff142c5e,0x076a7a39 } },
28420     /* 56 */
28421     { { 0x9e338d10,0x04639dfe,0xf42b411b,0x8ee6996f,0xa875cef2,0x960461d1,
28422         0x95b4d0ba,0x1057b6d6,0xa906e0bc,0x27639252,0xe1c20f8a,0x2c19f09a },
28423       { 0xeef4c43d,0x5b8fc3f0,0x07a84aa9,0xe2e1b1a8,0x835d2bdb,0x5f455528,
28424         0x207132dd,0x0f4aee4d,0x3907f675,0xe9f8338c,0x0e0531f0,0x7a874dc9 } },
28425     /* 57 */
28426     { { 0x97c27050,0x84b22d45,0x59e70bf8,0xbd0b8df7,0x79738b9b,0xb4d67405,
28427         0xcd917c4f,0x47f4d5f5,0x13ce6e33,0x9099c4ce,0x521d0f8b,0x942bfd39 },
28428       { 0xa43b566d,0x5028f0f6,0x21bff7de,0xaf6e8669,0xc44232cd,0x83f6f856,
28429         0xf915069a,0x65680579,0xecfecb85,0xd12095a2,0xdb01ba16,0xcf7f06ae } },
28430     /* 58 */
28431     { { 0x8ef96c80,0x0f56e3c4,0x3ddb609c,0xd521f2b3,0x7dc1450d,0x2be94102,
28432         0x02a91fe2,0x2d21a071,0x1efa37de,0x2e6f74fa,0x156c28a1,0x9a9a90b8 },
28433       { 0x9dc7dfcb,0xc54ea9ea,0x2c2c1d62,0xc74e66fc,0x49d3e067,0x9f23f967,
28434         0x54dd38ad,0x1c7c3a46,0x5946cee3,0xc7005884,0x45cc045d,0x89856368 } },
28435     /* 59 */
28436     { { 0xfce73946,0x29da7cd4,0x23168563,0x8f697db5,0xcba92ec6,0x8e235e9c,
28437         0x9f91d3ea,0x55d4655f,0xaa50a6cd,0xf3689f23,0x21e6a1a0,0xdcf21c26 },
28438       { 0x61b818bf,0xcffbc82e,0xda47a243,0xc74a2f96,0x8bc1a0cf,0x234e980a,
28439         0x7929cb6d,0xf35fd6b5,0xefe17d6c,0x81468e12,0x58b2dafb,0xddea6ae5 } },
28440     /* 60 */
28441     { { 0x7e787b2e,0x294de887,0x39a9310d,0x258acc1f,0xac14265d,0x92d9714a,
28442         0x708b48a0,0x18b5591c,0xe1abbf71,0x27cc6bb0,0x568307b9,0xc0581fa3 },
28443       { 0xf24d4d58,0x9e0f58a3,0xe0ce2327,0xfebe9bb8,0x9d1be702,0x91fd6a41,
28444         0xfacac993,0x9a7d8a45,0x9e50d66d,0xabc0a08c,0x06498201,0x02c342f7 } },
28445     /* 61 */
28446     { { 0x157bdbc2,0xccd71407,0xad0e1605,0x72fa89c6,0xb92a015f,0xb1d3da2b,
28447         0xa0a3fe56,0x8ad9e7cd,0x24f06737,0x160edcbd,0x61275be6,0x79d4db33 },
28448       { 0x5f3497c4,0xd3d31fd9,0x04192fb0,0x8cafeaee,0x13a50af3,0xe13ca745,
28449         0x8c85aae5,0x18826167,0x9eb556ff,0xce06cea8,0xbdb549f3,0x2eef1995 } },
28450     /* 62 */
28451     { { 0x50596edc,0x8ed7d3eb,0x905243a2,0xaa359362,0xa4b6d02b,0xa212c2c2,
28452         0xc4fbec68,0x611fd727,0xb84f733d,0x8a0b8ff7,0x5f0daf0e,0xd85a6b90 },
28453       { 0xd4091cf7,0x60e899f5,0x2eff2768,0x4fef2b67,0x10c33964,0xc1f195cb,
28454         0x93626a8f,0x8275d369,0x0d6c840a,0xc77904f4,0x7a868acd,0x88d8b7fd } },
28455     /* 63 */
28456     { { 0x7bd98425,0x85f23723,0xc70b154e,0xd4463992,0x96687a2e,0xcbb00ee2,
28457         0xc83214fd,0x905fdbf7,0x13593684,0x2019d293,0xef51218e,0x0428c393 },
28458       { 0x981e909a,0x40c7623f,0x7be192da,0x92513385,0x4010907e,0x48fe480f,
28459         0x3120b459,0xdd7a187c,0xa1fd8f3c,0xc9d7702d,0xe358efc5,0x66e4753b } },
28460     /* 64 */
28461     { { 0x16973cf4,0x070d34e1,0x7e4f34f7,0x20aee08b,0x5eb8ad29,0x269af9b9,
28462         0xa6a45dda,0xdde0a036,0x63df41e0,0xa18b528e,0xa260df2a,0x03cc71b2 },
28463       { 0xa06b1dd7,0x24a6770a,0x9d2675d3,0x5bfa9c11,0x96844432,0x73c1e2a1,
28464         0x131a6cf0,0x3660558d,0x2ee79454,0xb0289c83,0xc6d8ddcd,0xa6aefb01 } },
28465     /* 65 */
28466     { { 0x01ab5245,0xba1464b4,0xc48d93ff,0x9b8d0b6d,0x93ad272c,0x939867dc,
28467         0xae9fdc77,0xbebe085e,0x894ea8bd,0x73ae5103,0x39ac22e1,0x740fc89a },
28468       { 0x28e23b23,0x5e28b0a3,0xe13104d0,0x2352722e,0xb0a2640d,0xf4667a18,
28469         0x49bb37c3,0xac74a72e,0xe81e183a,0x79f734f0,0x3fd9c0eb,0xbffe5b6c } },
28470     /* 66 */
28471     { { 0xc6a2123f,0xb1a358f5,0xfe28df6d,0x927b2d95,0xf199d2f9,0x89702753,
28472         0x1a3f82dc,0x0a73754c,0x777affe1,0x063d029d,0xdae6d34d,0x5439817e },
28473       { 0x6b8b83c4,0xf7979eef,0x9d945682,0x615cb214,0xc5e57eae,0x8f0e4fac,
28474         0x113047dd,0x042b89b8,0x93f36508,0x888356dc,0x5fd1f32f,0xbf008d18 } },
28475     /* 67 */
28476     { { 0x4e8068db,0x8012aa24,0xa5729a47,0xc72cc641,0x43f0691d,0x3c33df2c,
28477         0x1d92145f,0xfa057347,0xb97f7946,0xaefc0f2f,0x2f8121bf,0x813d75cb },
28478       { 0x4383bba6,0x05613c72,0xa4224b3f,0xa924ce70,0x5f2179a6,0xe59cecbe,
28479         0x79f62b61,0x78e2e8aa,0x53ad8079,0x3ac2cc3b,0xd8f4fa96,0x55518d71 } },
28480     /* 68 */
28481     { { 0x00623f3b,0x03cf2922,0x5f29ebff,0x095c7111,0x80aa6823,0x42d72247,
28482         0x7458c0b0,0x044c7ba1,0x0959ec20,0xca62f7ef,0xf8ca929f,0x40ae2ab7 },
28483       { 0xa927b102,0xb8c5377a,0xdc031771,0x398a86a0,0xc216a406,0x04908f9d,
28484         0x918d3300,0xb423a73a,0xe0b94739,0x634b0ff1,0x2d69f697,0xe29de725 } },
28485     /* 69 */
28486     { { 0x8435af04,0x744d1400,0xfec192da,0x5f255b1d,0x336dc542,0x1f17dc12,
28487         0x636a68a8,0x5c90c2a7,0x7704ca1e,0x960c9eb7,0x6fb3d65a,0x9de8cf1e },
28488       { 0x511d3d06,0xc60fee0d,0xf9eb52c7,0x466e2313,0x206b0914,0x743c0f5f,
28489         0x2191aa4d,0x42f55bac,0xffebdbc2,0xcefc7c8f,0xe6e8ed1c,0xd4fa6081 } },
28490     /* 70 */
28491     { { 0xb0ab9645,0xb5e405d3,0xd5f1f711,0xaeec7f98,0x585c2a6e,0x8ad42311,
28492         0x512c6944,0x045acb9e,0xa90db1c6,0xae106c4e,0x898e6563,0xb89f33d5 },
28493       { 0x7fed2ce4,0x43b07cd9,0xdd815b20,0xf9934e17,0x0a81a349,0x6778d4d5,
28494         0x52918061,0x9e616ade,0xd7e67112,0xfa06db06,0x88488091,0x1da23cf1 } },
28495     /* 71 */
28496     { { 0x42f2c4b5,0x821c46b3,0x66059e47,0x931513ef,0x66f50cd1,0x7030ae43,
28497         0x43e7b127,0x43b536c9,0x5fca5360,0x006258cf,0x6b557abf,0xe4e3ee79 },
28498       { 0x24c8b22f,0xbb6b3900,0xfcbf1054,0x2eb5e2c1,0x567492af,0x937b18c9,
28499         0xacf53957,0xf09432e4,0x1dbf3a56,0x585f5a9d,0xbe0887cf,0xf86751fd } },
28500     /* 72 */
28501     { { 0x9d10e0b2,0x157399cb,0x60dc51b7,0x1c0d5956,0x1f583090,0x1d496b8a,
28502         0x88590484,0x6658bc26,0x03213f28,0x88c08ab7,0x7ae58de4,0x8d2e0f73 },
28503       { 0x486cfee6,0x9b79bc95,0xe9e5bc57,0x036a26c7,0xcd8ae97a,0x1ad03601,
28504         0xff3a0494,0x06907f87,0x2c7eb584,0x078f4bbf,0x7e8d0a5a,0xe3731bf5 } },
28505     /* 73 */
28506     { { 0xe1cd0abe,0x72f2282b,0x87efefa2,0xd4f9015e,0x6c3834bd,0x9d189806,
28507         0xb8a29ced,0x9c8cdcc1,0xfee82ebc,0x0601b9f4,0x7206a756,0x371052bc },
28508       { 0x46f32562,0x76fa1092,0x17351bb4,0xdaad534c,0xb3636bb5,0xc3d64c37,
28509         0x45d54e00,0x038a8c51,0x32c09e7c,0x301e6180,0x95735151,0x9764eae7 } },
28510     /* 74 */
28511     { { 0xcbd5256a,0x8791b19f,0x6ca13a3b,0x4007e0f2,0x4cf06904,0x03b79460,
28512         0xb6c17589,0xb18a9c22,0x81d45908,0xa1cb7d7d,0x21bb68f1,0x6e13fa9d },
28513       { 0xa71e6e16,0x47183c62,0xe18749ed,0x5cf0ef8e,0x2e5ed409,0x2c9c7f9b,
28514         0xe6e117e1,0x042eeacc,0x13fb5a7f,0xb86d4816,0xc9e5feb1,0xea1cf0ed } },
28515     /* 75 */
28516     { { 0xcea4cc9b,0x6e6573c9,0xafcec8f3,0x5417961d,0xa438b6f6,0x804bf02a,
28517         0xdcd4ea88,0xb894b03c,0x3799571f,0xd0f807e9,0x862156e8,0x3466a7f5 },
28518       { 0x56515664,0x51e59acd,0xa3c5eb0b,0x55b0f93c,0x6a4279db,0x84a06b02,
28519         0xc5fae08e,0x5c850579,0xa663a1a2,0xcf07b8db,0xf46ffc8d,0x49a36bbc } },
28520     /* 76 */
28521     { { 0x46d93106,0xe47f5acc,0xaa897c9c,0x65b7ade0,0x12d7e4be,0x37cf4c94,
28522         0xd4b2caa9,0xa2ae9b80,0xe60357a3,0x5e7ce09c,0xc8ecd5f9,0x29f77667 },
28523       { 0xa8a0b1c5,0xdf6868f5,0x62978ad8,0x240858cf,0xdc0002a1,0x0f7ac101,
28524         0xffe9aa05,0x1d28a9d7,0x5b962c97,0x744984d6,0x3d28c8b2,0xa8a7c00b } },
28525     /* 77 */
28526     { { 0xae11a338,0x7c58a852,0xd1af96e7,0xa78613f1,0x5355cc73,0x7e9767d2,
28527         0x792a2de6,0x6ba37009,0x124386b2,0x7d60f618,0x11157674,0xab09b531 },
28528       { 0x98eb9dd0,0x95a04841,0x15070328,0xe6c17acc,0x489c6e49,0xafc6da45,
28529         0xbb211530,0xab45a60a,0x7d7ea933,0xc58d6592,0x095642c6,0xa3ef3c65 } },
28530     /* 78 */
28531     { { 0xdf010879,0x89d420e9,0x39576179,0x9d25255d,0xe39513b6,0x9cdefd50,
28532         0xd5d1c313,0xe4efe45b,0x3f7af771,0xc0149de7,0x340ab06b,0x55a6b4f4 },
28533       { 0xebeaf771,0xf1325251,0x878d4288,0x2ab44128,0x18e05afe,0xfcd5832e,
28534         0xcc1fb62b,0xef52a348,0xc1c4792a,0x2bd08274,0x877c6dc7,0x345c5846 } },
28535     /* 79 */
28536     { { 0xbea65e90,0xde15ceb0,0x2416d99c,0x0987f72b,0xfd863dec,0x44db578d,
28537         0xac6a3578,0xf617b74b,0xdb48e999,0x9e62bd7a,0xeab1a1be,0x877cae61 },
28538       { 0x3a358610,0x23adddaa,0x325e2b07,0x2fc4d6d1,0x1585754e,0x897198f5,
28539         0xb392b584,0xf741852c,0xb55f7de1,0x9927804c,0x1aa8efae,0xe9e6c4ed } },
28540     /* 80 */
28541     { { 0x98683186,0x867db639,0xddcc4ea9,0xfb5cf424,0xd4f0e7bd,0xcc9a7ffe,
28542         0x7a779f7e,0x7c57f71c,0xd6b25ef2,0x90774079,0xb4081680,0x90eae903 },
28543       { 0x0ee1fceb,0xdf2aae5e,0xe86c1a1f,0x3ff1da24,0xca193edf,0x80f587d6,
28544         0xdc9b9d6a,0xa5695523,0x85920303,0x7b840900,0xba6dbdef,0x1efa4dfc } },
28545     /* 81 */
28546     { { 0xe0540015,0xfbd838f9,0xc39077dc,0x2c323946,0xad619124,0x8b1fb9e6,
28547         0x0ca62ea8,0x9612440c,0x2dbe00ff,0x9ad9b52c,0xae197643,0xf52abaa1 },
28548       { 0x2cac32ad,0xd0e89894,0x62a98f91,0xdfb79e42,0x276f55cb,0x65452ecf,
28549         0x7ad23e12,0xdb1ac0d2,0xde4986f0,0xf68c5f6a,0x82ce327d,0x389ac37b } },
28550     /* 82 */
28551     { { 0xf8e60f5b,0x511188b4,0x48aa2ada,0x7fe67015,0x381abca2,0xdb333cb8,
28552         0xdaf3fc97,0xb15e6d9d,0x36aabc03,0x4b24f6eb,0x72a748b4,0xc59789df },
28553       { 0x29cf5279,0x26fcb8a5,0x01ad9a6c,0x7a3c6bfc,0x4b8bac9b,0x866cf88d,
28554         0x9c80d041,0xf4c89989,0x70add148,0xf0a04241,0x45d81a41,0x5a02f479 } },
28555     /* 83 */
28556     { { 0xc1c90202,0xfa5c877c,0xf8ac7570,0xd099d440,0xd17881f7,0x428a5b1b,
28557         0x5b2501d7,0x61e267db,0xf2e4465b,0xf889bf04,0x76aa4cb8,0x4da3ae08 },
28558       { 0xe3e66861,0x3ef0fe26,0x3318b86d,0x5e772953,0x747396df,0xc3c35fbc,
28559         0x439ffd37,0x5115a29c,0xb2d70374,0xbfc4bd97,0x56246b9d,0x088630ea } },
28560     /* 84 */
28561     { { 0xb8a9e8c9,0xcd96866d,0x5bb8091e,0xa11963b8,0x045b3cd2,0xc7f90d53,
28562         0x80f36504,0x755a72b5,0x21d3751c,0x46f8b399,0x53c193de,0x4bffdc91 },
28563       { 0xb89554e7,0xcd15c049,0xf7a26be6,0x353c6754,0xbd41d970,0x79602370,
28564         0x12b176c0,0xde16470b,0x40c8809d,0x56ba1175,0xe435fb1e,0xe2db35c3 } },
28565     /* 85 */
28566     { { 0x6328e33f,0xd71e4aab,0xaf8136d1,0x5486782b,0x86d57231,0x07a4995f,
28567         0x1651a968,0xf1f0a5bd,0x76803b6d,0xa5dc5b24,0x42dda935,0x5c587cbc },
28568       { 0xbae8b4c0,0x2b6cdb32,0xb1331138,0x66d1598b,0x5d7e9614,0x4a23b2d2,
28569         0x74a8c05d,0x93e402a6,0xda7ce82e,0x45ac94e6,0xe463d465,0xeb9f8281 } },
28570     /* 86 */
28571     { { 0xfecf5b9b,0x34e0f9d1,0xf206966a,0xa115b12b,0x1eaa0534,0x5591cf3b,
28572         0xfb1558f9,0x5f0293cb,0x1bc703a5,0x1c8507a4,0x862c1f81,0x92e6b81c },
28573       { 0xcdaf24e3,0xcc9ebc66,0x72fcfc70,0x68917ecd,0x8157ba48,0x6dc9a930,
28574         0xb06ab2b2,0x5d425c08,0x36e929c4,0x362f8ce7,0x62e89324,0x09f6f57c } },
28575     /* 87 */
28576     { { 0xd29375fb,0x1c7d6b78,0xe35d1157,0xfabd851e,0x4243ea47,0xf6f62dcd,
28577         0x8fe30b0f,0x1dd92460,0xffc6e709,0x08166dfa,0x0881e6a7,0xc6c4c693 },
28578       { 0xd6a53fb0,0x20368f87,0x9eb4d1f9,0x38718e9f,0xafd7e790,0x03f08acd,
28579         0x72fe2a1c,0x0835eb44,0x88076e5d,0x7e050903,0xa638e731,0x538f765e } },
28580     /* 88 */
28581     { { 0xc2663b4b,0x0e0249d9,0x47cd38dd,0xe700ab5b,0x2c46559f,0xb192559d,
28582         0x4bcde66d,0x8f9f74a8,0x3e2aced5,0xad161523,0x3dd03a5b,0xc155c047 },
28583       { 0x3be454eb,0x346a8799,0x83b7dccd,0x66ee94db,0xab9d2abe,0x1f6d8378,
28584         0x7733f355,0x4a396dd2,0xf53553c2,0x419bd40a,0x731dd943,0xd0ead98d } },
28585     /* 89 */
28586     { { 0xec142408,0x908e0b0e,0x4114b310,0x98943cb9,0x1742b1d7,0x03dbf7d8,
28587         0x693412f4,0xd270df6b,0x8f69e20c,0xc5065494,0x697e43a1,0xa76a90c3 },
28588       { 0x4624825a,0xe0fa3384,0x8acc34c2,0x82e48c0b,0xe9a14f2b,0x7b24bd14,
28589         0x4db30803,0x4f5dd5e2,0x932da0a3,0x0c77a9e7,0x74c653dc,0x20db90f2 } },
28590     /* 90 */
28591     { { 0x0e6c5fd9,0x261179b7,0x6c982eea,0xf8bec123,0xd4957b7e,0x47683338,
28592         0x0a72f66a,0xcc47e664,0x1bad9350,0xbd54bf6a,0xf454e95a,0xdfbf4c6a },
28593       { 0x6907f4fa,0x3f7a7afa,0x865ca735,0x7311fae0,0x2a496ada,0x24737ab8,
28594         0x15feb79b,0x13e425f1,0xa1b93c21,0xe9e97c50,0x4ddd3eb5,0xb26b6eac } },
28595     /* 91 */
28596     { { 0x2a2e5f2b,0x81cab9f5,0xbf385ac4,0xf93caf29,0xc909963a,0xf4bf35c3,
28597         0x74c9143c,0x081e7300,0xc281b4c5,0x3ea57fa8,0x9b340741,0xe497905c },
28598       { 0x55ab3cfb,0xf556dd8a,0x518db6ad,0xd444b96b,0x5ef4b955,0x34f5425a,
28599         0xecd26aa3,0xdda7a3ac,0xda655e97,0xb57da11b,0xc2024c70,0x02da3eff } },
28600     /* 92 */
28601     { { 0x6481d0d9,0xe24b0036,0x818fdfe2,0x3740dbe5,0x190fda00,0xc1fc1f45,
28602         0x3cf27fde,0x329c9280,0x6934f43e,0x7435cb53,0x7884e8fe,0x2b505a5d },
28603       { 0x711adcc9,0x6cfcc6a6,0x531e21e1,0xf034325c,0x9b2a8a99,0xa2f4a967,
28604         0x3c21bdff,0x9d5f3842,0x31b57d66,0xb25c7811,0x0b8093b9,0xdb5344d8 } },
28605     /* 93 */
28606     { { 0xae50a2f5,0x0d72e667,0xe4a861d1,0x9b7f8d8a,0x330df1cb,0xa129f70f,
28607         0xe04fefc3,0xe90aa5d7,0xe72c3ae1,0xff561ecb,0xcdb955fa,0x0d8fb428 },
28608       { 0xd7663784,0xd2235f73,0x7e2c456a,0xc05baec6,0x2adbfccc,0xe5c292e4,
28609         0xefb110d5,0x4fd17988,0xd19d49f3,0x27e57734,0x84f679fe,0x188ac4ce } },
28610     /* 94 */
28611     { { 0xa796c53e,0x7ee344cf,0x0868009b,0xbbf6074d,0x474a1295,0x1f1594f7,
28612         0xac11632d,0x66776edc,0x04e2fa5a,0x1862278b,0xc854a89a,0x52665cf2 },
28613       { 0x8104ab58,0x7e376464,0x7204fd6d,0x16775913,0x44ea1199,0x86ca06a5,
28614         0x1c9240dd,0xaa3f765b,0x24746149,0x5f8501a9,0xdcd251d7,0x7b982e30 } },
28615     /* 95 */
28616     { { 0xc15f3060,0xe44e9efc,0xa87ebbe6,0x5ad62f2e,0xc79500d4,0x36499d41,
28617         0x336fa9d1,0xa66d6dc0,0x5afd3b1f,0xf8afc495,0xe5c9822b,0x1d8ccb24 },
28618       { 0x79d7584b,0x4031422b,0xea3f20dd,0xc54a0580,0x958468c5,0x3f837c8f,
28619         0xfbea7735,0x3d82f110,0x7dffe2fc,0x679a8778,0x20704803,0x48eba63b } },
28620     /* 96 */
28621     { { 0xdf46e2f6,0x89b10d41,0x19514367,0x13ab57f8,0x1d469c87,0x067372b9,
28622         0x4f6c5798,0x0c195afa,0x272c9acf,0xea43a12a,0x678abdac,0x9dadd8cb },
28623       { 0xe182579a,0xcce56c6b,0x2d26c2d8,0x86febadb,0x2a44745c,0x1c668ee1,
28624         0x98dc047a,0x580acd86,0x51b9ec2d,0x5a2b79cc,0x4054f6a0,0x007da608 } },
28625     /* 97 */
28626     { { 0x17b00dd0,0x9e3ca352,0x0e81a7a6,0x046779cb,0xd482d871,0xb999fef3,
28627         0xd9233fbc,0xe6f38134,0xf48cd0e0,0x112c3001,0x3c6c66ae,0x934e7576 },
28628       { 0xd73234dc,0xb44d4fc3,0x864eafc1,0xfcae2062,0x26bef21a,0x843afe25,
28629         0xf3b75fdf,0x61355107,0x794c2e6b,0x8367a5aa,0x8548a372,0x3d2629b1 } },
28630     /* 98 */
28631     { { 0x437cfaf8,0x6230618f,0x2032c299,0x5b8742cb,0x2293643a,0x949f7247,
28632         0x09464f79,0xb8040f1a,0x4f254143,0x049462d2,0x366c7e76,0xabd6b522 },
28633       { 0xd5338f55,0x119b392b,0x01495a0c,0x1a80a9ce,0xf8d7537e,0xf3118ca7,
28634         0x6bf4b762,0xb715adc2,0xa8482b6c,0x24506165,0x96a7c84d,0xd958d7c6 } },
28635     /* 99 */
28636     { { 0xbdc21f31,0x9ad8aa87,0x8063e58c,0xadb3cab4,0xb07dd7b8,0xefd86283,
28637         0x1be7c6b4,0xc7b9b762,0x015582de,0x2ef58741,0x299addf3,0xc970c52e },
28638       { 0x22f24d66,0x78f02e2a,0x74cc100a,0xefec1d10,0x09316e1a,0xaf2a6a39,
28639         0x5849dd49,0xce7c2205,0x96bffc4c,0x9c1fe75c,0x7ba06ec0,0xcad98fd2 } },
28640     /* 100 */
28641     { { 0xb648b73e,0xed76e2d0,0x1cfd285e,0xa9f92ce5,0x2ed13de1,0xa8c86c06,
28642         0xa5191a93,0x1d3a574e,0x1ad1b8bf,0x385cdf8b,0x47d2cfe3,0xbbecc28a },
28643       { 0x69cec548,0x98d326c0,0xf240a0b2,0x4f5bc1dd,0x29057236,0x241a7062,
28644         0xc68294a4,0x0fc6e9c5,0xa319f17a,0x4d04838b,0x9ffc1c6f,0x8b612cf1 } },
28645     /* 101 */
28646     { { 0x4c3830eb,0x9bb0b501,0x8ee0d0c5,0x3d08f83c,0x79ba9389,0xa4a62642,
28647         0x9cbc2914,0x5d5d4044,0x074c46f0,0xae9eb83e,0x74ead7d6,0x63bb758f },
28648       { 0xc6bb29e0,0x1c40d2ea,0x4b02f41e,0x95aa2d87,0x53cb199a,0x92989175,
28649         0x51584f6d,0xdd91bafe,0x31a1aaec,0x3715efb9,0x46780f9e,0xc1b6ae5b } },
28650     /* 102 */
28651     { { 0x42772f41,0xcded3e4b,0x3bcb79d1,0x3a700d5d,0x80feee60,0x4430d50e,
28652         0xf5e5d4bb,0x444ef1fc,0xe6e358ff,0xc660194f,0x6a91b43c,0xe68a2f32 },
28653       { 0x977fe4d2,0x5842775c,0x7e2a41eb,0x78fdef5c,0xff8df00e,0x5f3bec02,
28654         0x5852525d,0xf4b840cd,0x4e6988bd,0x0870483a,0xcc64b837,0x39499e39 } },
28655     /* 103 */
28656     { { 0xb08df5fe,0xfc05de80,0x63ba0362,0x0c12957c,0xd5cf1428,0xea379414,
28657         0x54ef6216,0xc559132a,0xb9e65cf8,0x33d5f12f,0x1695d663,0x09c60278 },
28658       { 0x61f7a2fb,0x3ac1ced4,0xd4f5eeb8,0xdd838444,0x8318fcad,0x82a38c6c,
28659         0xe9f1a864,0x315be2e5,0x442daf47,0x317b5771,0x95aa5f9e,0x81b5904a } },
28660     /* 104 */
28661     { { 0x8b21d232,0x6b6b1c50,0x8c2cba75,0x87f3dbc0,0xae9f0faf,0xa7e74b46,
28662         0xbb7b8079,0x036a0985,0x8d974a25,0x4f185b90,0xd9af5ec9,0x5aa7cef0 },
28663       { 0x57dcfffc,0xe0566a70,0xb8453225,0x6ea311da,0x23368aa9,0x72ea1a8d,
28664         0x48cd552d,0xed9b2083,0xc80ea435,0xb987967c,0x6c104173,0xad735c75 } },
28665     /* 105 */
28666     { { 0xcee76ef4,0xaea85ab3,0xaf1d2b93,0x44997444,0xeacb923f,0x0851929b,
28667         0x51e3bc0c,0xb080b590,0x59be68a2,0xc4ee1d86,0x64b26cda,0xf00de219 },
28668       { 0xf2e90d4d,0x8d7fb5c0,0x77d9ec64,0x00e219a7,0x5d1c491c,0xc4e6febd,
28669         0x1a8f4585,0x080e3754,0x48d2af9c,0x4a9b86c8,0xb6679851,0x2ed70db6 } },
28670     /* 106 */
28671     { { 0x586f25cb,0xaee44116,0xa0fcf70f,0xf7b6861f,0x18a350e8,0x55d2cd20,
28672         0x92dc286f,0x861bf3e5,0x6226aba7,0x9ab18ffa,0xa9857b03,0xd15827be },
28673       { 0x92e6acef,0x26c1f547,0xac1fbac3,0x422c63c8,0xfcbfd71d,0xa2d8760d,
28674         0xb2511224,0x35f6a539,0x048d1a21,0xbaa88fa1,0xebf999db,0x49f1abe9 } },
28675     /* 107 */
28676     { { 0xf7492b73,0x16f9f4f4,0xcb392b1a,0xcf28ec1e,0x69ca6ffc,0x45b130d4,
28677         0xb72efa58,0x28ba8d40,0x5ca066f5,0xace987c7,0x4ad022eb,0x3e399246 },
28678       { 0x752555bb,0x63a2d84e,0x9c2ae394,0xaaa93b4a,0xc89539ca,0xcd80424e,
28679         0xaa119a99,0x6d6b5a6d,0x379f2629,0xbd50334c,0xef3cc7d3,0x899e925e } },
28680     /* 108 */
28681     { { 0xbf825dc4,0xb7ff3651,0x40b9c462,0x0f741cc4,0x5cc4fb5b,0x771ff5a9,
28682         0x47fd56fe,0xcb9e9c9b,0x5626c0d3,0xbdf053db,0xf7e14098,0xa97ce675 },
28683       { 0x6c934f5e,0x68afe5a3,0xccefc46f,0x6cd5e148,0xd7a88586,0xc7758570,
28684         0xdd558d40,0x49978f5e,0x64ae00c1,0xa1d5088a,0xf1d65bb2,0x58f2a720 } },
28685     /* 109 */
28686     { { 0x3e4daedb,0x66fdda4a,0x65d1b052,0x38318c12,0x4c4bbf5c,0x28d910a2,
28687         0x78a9cd14,0x762fe5c4,0xd2cc0aee,0x08e5ebaa,0xca0c654c,0xd2cdf257 },
28688       { 0x08b717d2,0x48f7c58b,0x386cd07a,0x3807184a,0xae7d0112,0x3240f626,
28689         0xc43917b0,0x03e9361b,0x20aea018,0xf261a876,0x7e1e6372,0x53f556a4 } },
28690     /* 110 */
28691     { { 0x2f512a90,0xc84cee56,0x1b0ea9f1,0x24b3c004,0xe26cc1ea,0x0ee15d2d,
28692         0xf0c9ef7d,0xd848762c,0xd5341435,0x1026e9c5,0xfdb16b31,0x8f5b73dc },
28693       { 0xd2c75d95,0x1f69bef2,0xbe064dda,0x8d33d581,0x57ed35e6,0x8c024c12,
28694         0xc309c281,0xf8d435f9,0xd6960193,0xfd295061,0xe9e49541,0x66618d78 } },
28695     /* 111 */
28696     { { 0x8ce382de,0x571cfd45,0xde900dde,0x175806ee,0x34aba3b5,0x61849965,
28697         0xde7aec95,0xe899778a,0xff4aa97f,0xe8f00f6e,0x010b0c6d,0xae971cb5 },
28698       { 0x3af788f1,0x1827eebc,0xe413fe2d,0xd46229ff,0x4741c9b4,0x8a15455b,
28699         0xf8e424eb,0x5f02e690,0xdae87712,0x40a1202e,0x64944f6d,0x49b3bda2 } },
28700     /* 112 */
28701     { { 0x035b2d69,0xd63c6067,0x6bed91b0,0xb507150d,0x7afb39b2,0x1f35f82f,
28702         0x16012b66,0xb9bd9c01,0xed0a5f50,0x00d97960,0x2716f7c9,0xed705451 },
28703       { 0x127abdb4,0x1576eff4,0xf01e701c,0x6850d698,0x3fc87e2f,0x9fa7d749,
28704         0xb0ce3e48,0x0b6bcc6f,0xf7d8c1c0,0xf4fbe1f5,0x02719cc6,0xcf75230e } },
28705     /* 113 */
28706     { { 0x722d94ed,0x6761d6c2,0x3718820e,0xd1ec3f21,0x25d0e7c6,0x65a40b70,
28707         0xbaf3cf31,0xd67f830e,0xb93ea430,0x633b3807,0x0bc96c69,0x17faa0ea },
28708       { 0xdf866b98,0xe6bf3482,0xa9db52d4,0x205c1ee9,0xff9ab869,0x51ef9bbd,
28709         0x75eeb985,0x3863dad1,0xd3cf442a,0xef216c3b,0xf9c8e321,0x3fb228e3 } },
28710     /* 114 */
28711     { { 0x0760ac07,0x94f9b70c,0x9d79bf4d,0xf3c9ccae,0xc5ffc83d,0x73cea084,
28712         0xdc49c38e,0xef50f943,0xbc9e7330,0xf467a2ae,0x44ea7fba,0x5ee534b6 },
28713       { 0x03609e7f,0x20cb6272,0x62fdc9f0,0x09844355,0x0f1457f7,0xaf5c8e58,
28714         0xb4b25941,0xd1f50a6c,0x2ec82395,0x77cb247c,0xda3dca33,0xa5f3e1e5 } },
28715     /* 115 */
28716     { { 0x7d85fa94,0x023489d6,0x2db9ce47,0x0ba40537,0xaed7aad1,0x0fdf7a1f,
28717         0x9a4ccb40,0xa57b0d73,0x5b18967c,0x48fcec99,0xb7274d24,0xf30b5b6e },
28718       { 0xc81c5338,0x7ccb4773,0xa3ed6bd0,0xb85639e6,0x1d56eada,0x7d9df95f,
28719         0x0a1607ad,0xe256d57f,0x957574d6,0x6da7ffdc,0x01c7a8c4,0x65f84046 } },
28720     /* 116 */
28721     { { 0xcba1e7f1,0x8d45d0cb,0x02b55f64,0xef0a08c0,0x17e19892,0x771ca31b,
28722         0x4885907e,0xe1843ecb,0x364ce16a,0x67797ebc,0x8df4b338,0x816d2b2d },
28723       { 0x39aa8671,0xe870b0e5,0xc102b5f5,0x9f0db3e4,0x1720c697,0x34296659,
28724         0x613c0d2a,0x0ad4c89e,0x418ddd61,0x1af900b2,0xd336e20e,0xe087ca72 } },
28725     /* 117 */
28726     { { 0xaba10079,0x222831ff,0x6d64fff2,0x0dc5f87b,0x3e8cb330,0x44547907,
28727         0x702a33fb,0xe815aaa2,0x5fba3215,0x338d6b2e,0x79f549c8,0x0f7535cb },
28728       { 0x2ee95923,0x471ecd97,0xc6d1c09f,0x1e868b37,0xc666ef4e,0x2bc7b8ec,
28729         0x808a4bfc,0xf5416589,0x3fbc4d2e,0xf23e9ee2,0x2d75125b,0x4357236c } },
28730     /* 118 */
28731     { { 0xba9cdb1b,0xfe176d95,0x2f82791e,0x45a1ca01,0x4de4cca2,0x97654af2,
28732         0x5cc4bcb9,0xbdbf9d0e,0xad97ac0a,0xf6a7df50,0x61359fd6,0xc52112b0 },
28733       { 0x4f05eae3,0x696d9ce3,0xe943ac2b,0x903adc02,0x0848be17,0xa9075347,
28734         0x2a3973e5,0x1e20f170,0x6feb67e9,0xe1aacc1c,0xe16bc6b9,0x2ca0ac32 } },
28735     /* 119 */
28736     { { 0xef871eb5,0xffea12e4,0xa8bf0a7a,0x94c2f25d,0x78134eaa,0x4d1e4c2a,
28737         0x0360fb10,0x11ed16fb,0x85fc11be,0x4029b6db,0xf4d390fa,0x5e9f7ab7 },
28738       { 0x30646612,0x5076d72f,0xdda1d0d8,0xa0afed1d,0x85a1d103,0x29022257,
28739         0x4e276bcd,0xcb499e17,0x51246c3d,0x16d1da71,0x589a0443,0xc72d56d3 } },
28740     /* 120 */
28741     { { 0xdae5bb45,0xdf5ffc74,0x261bd6dc,0x99068c4a,0xaa98ec7b,0xdc0afa7a,
28742         0xf121e96d,0xedd2ee00,0x1414045c,0x163cc7be,0x335af50e,0xb0b1bbce },
28743       { 0x01a06293,0xd440d785,0x6552e644,0xcdebab7c,0x8c757e46,0x48cb8dbc,
28744         0x3cabe3cb,0x81f9cf78,0xb123f59a,0xddd02611,0xeeb3784d,0x3dc7b88e } },
28745     /* 121 */
28746     { { 0xc4741456,0xe1b8d398,0x6032a121,0xa9dfa902,0x1263245b,0x1cbfc86d,
28747         0x5244718c,0xf411c762,0x05b0fc54,0x96521d54,0xdbaa4985,0x1afab46e },
28748       { 0x8674b4ad,0xa75902ba,0x5ad87d12,0x486b43ad,0x36e0d099,0x72b1c736,
28749         0xbb6cd6d6,0x39890e07,0x59bace4e,0x8128999c,0x7b535e33,0xd8da430b } },
28750     /* 122 */
28751     { { 0xc6b75791,0x39f65642,0x21806bfb,0x050947a6,0x1362ef84,0x0ca3e370,
28752         0x8c3d2391,0x9bc60aed,0x732e1ddc,0x9b488671,0xa98ee077,0x12d10d9e },
28753       { 0x3651b7dc,0xb6f2822d,0x80abd138,0x6345a5ba,0x472d3c84,0x62033262,
28754         0xacc57527,0xd54a1d40,0x424447cb,0x6ea46b3a,0x2fb1a496,0x5bc41057 } },
28755     /* 123 */
28756     { { 0xa751cd0e,0xe70c57a3,0xeba3c7d6,0x190d8419,0x9d47d55a,0xb1c3bee7,
28757         0xf912c6d8,0xda941266,0x407a6ad6,0x12e9aacc,0x6e838911,0xd6ce5f11 },
28758       { 0x70e1f2ce,0x063ca97b,0x8213d434,0xa3e47c72,0x84df810a,0xa016e241,
28759         0xdfd881a4,0x688ad7b0,0xa89bf0ad,0xa37d99fc,0xa23c2d23,0xd8e3f339 } },
28760     /* 124 */
28761     { { 0x750bed6f,0xbdf53163,0x83e68b0a,0x808abc32,0x5bb08a33,0x85a36627,
28762         0x6b0e4abe,0xf72a3a0f,0xfaf0c6ad,0xf7716d19,0x5379b25f,0x22dcc020 },
28763       { 0xf9a56e11,0x7400bf8d,0x56a47f21,0x6cb8bad7,0x7a6eb644,0x7c97176f,
28764         0xd1f5b646,0xe8fd84f7,0x44ddb054,0x98320a94,0x1dde86f5,0x07071ba3 } },
28765     /* 125 */
28766     { { 0x98f8fcb9,0x6fdfa0e5,0x94d0d70c,0x89cec8e0,0x106d20a8,0xa0899397,
28767         0xba8acc9c,0x915bfb9a,0x5507e01c,0x1370c94b,0x8a821ffb,0x83246a60 },
28768       { 0xbe3c378f,0xa8273a9f,0x35a25be9,0x7e544789,0x4dd929d7,0x6cfa4972,
28769         0x365bd878,0x987fed9d,0x5c29a7ae,0x4982ac94,0x5ddd7ec5,0x4589a5d7 } },
28770     /* 126 */
28771     { { 0xa95540a9,0x9fabb174,0x0162c5b0,0x7cfb886f,0xea3dee18,0x17be766b,
28772         0xe88e624c,0xff7da41f,0x8b919c38,0xad0b71eb,0xf31ff9a9,0x86a522e0 },
28773       { 0x868bc259,0xbc8e6f72,0x3ccef9e4,0x6130c638,0x9a466555,0x09f1f454,
28774         0x19b2bfb4,0x8e6c0f09,0x0ca7bb22,0x945c46c9,0x4dafb67b,0xacd87168 } },
28775     /* 127 */
28776     { { 0x10c53841,0x090c72ca,0x55a4fced,0xc20ae01b,0xe10234ad,0x03f7ebd5,
28777         0x85892064,0xb3f42a6a,0xb4a14722,0xbdbc30c0,0x8ca124cc,0x971bc437 },
28778       { 0x517ff2ff,0x6f79f46d,0xecba947b,0x6a9c96e2,0x62925122,0x5e79f2f4,
28779         0x6a4e91f1,0x30a96bb1,0x2d4c72da,0x1147c923,0x5811e4df,0x65bc311f } },
28780     /* 128 */
28781     { { 0x139b3239,0x87c7dd7d,0x4d833bae,0x8b57824e,0x9fff0015,0xbcbc4878,
28782         0x909eaf1a,0x8ffcef8b,0xf1443a78,0x9905f4ee,0xe15cbfed,0x020dd4a2 },
28783       { 0xa306d695,0xca2969ec,0xb93caf60,0xdf940cad,0x87ea6e39,0x67f7fab7,
28784         0xf98c4fe5,0x0d0ee10f,0xc19cb91e,0xc646879a,0x7d1d7ab4,0x4b4ea50c } },
28785     /* 129 */
28786     { { 0x7a0db57e,0x19e40945,0x9a8c9702,0xe6017cad,0x1be5cff9,0xdbf739e5,
28787         0xa7a938a2,0x3646b3cd,0x68350dfc,0x04511085,0x56e098b5,0xad3bd6f3 },
28788       { 0xee2e3e3e,0x935ebabf,0x473926cb,0xfbd01702,0x9e9fb5aa,0x7c735b02,
28789         0x2e3feff0,0xc52a1b85,0x046b405a,0x9199abd3,0x39039971,0xe306fcec } },
28790     /* 130 */
28791     { { 0x23e4712c,0xd6d9aec8,0xc3c198ee,0x7ca8376c,0x31bebd8a,0xe6d83187,
28792         0xd88bfef3,0xed57aff3,0xcf44edc7,0x72a645ee,0x5cbb1517,0xd4e63d0b },
28793       { 0xceee0ecf,0x98ce7a1c,0x5383ee8e,0x8f012633,0xa6b455e8,0x3b879078,
28794         0xc7658c06,0xcbcd3d96,0x0783336a,0x721d6fe7,0x5a677136,0xf21a7263 } },
28795     /* 131 */
28796     { { 0x9586ba11,0x19d8b3cd,0x8a5c0480,0xd9e0aeb2,0x2230ef5c,0xe4261dbf,
28797         0x02e6bf09,0x095a9dee,0x80dc7784,0x8963723c,0x145157b1,0x5c97dbaf },
28798       { 0x4bc4503e,0x97e74434,0x85a6b370,0x0fb1cb31,0xcd205d4b,0x3e8df2be,
28799         0xf8f765da,0x497dd1bc,0x6c988a1a,0x92ef95c7,0x64dc4cfa,0x3f924baa } },
28800     /* 132 */
28801     { { 0x7268b448,0x6bf1b8dd,0xefd79b94,0xd4c28ba1,0xe4e3551f,0x2fa1f8c8,
28802         0x5c9187a9,0x769e3ad4,0x40326c0d,0x28843b4d,0x50d5d669,0xfefc8094 },
28803       { 0x90339366,0x30c85bfd,0x5ccf6c3a,0x4eeb56f1,0x28ccd1dc,0x0e72b149,
28804         0xf2ce978e,0x73ee85b5,0x3165bb23,0xcdeb2bf3,0x4e410abf,0x8106c923 } },
28805     /* 133 */
28806     { { 0x7d02f4ee,0xc8df0161,0x18e21225,0x8a781547,0x6acf9e40,0x4ea895eb,
28807         0x6e5a633d,0x8b000cb5,0x7e981ffb,0xf31d86d5,0x4475bc32,0xf5c8029c },
28808       { 0x1b568973,0x764561ce,0xa62996ec,0x2f809b81,0xda085408,0x9e513d64,
28809         0xe61ce309,0xc27d815d,0x272999e0,0x0da6ff99,0xfead73f7,0xbd284779 } },
28810     /* 134 */
28811     { { 0x9b1cdf2b,0x6033c2f9,0xbc5fa151,0x2a99cf06,0x12177b3b,0x7d27d259,
28812         0xc4485483,0xb1f15273,0x102e2297,0x5fd57d81,0xc7f6acb7,0x3d43e017 },
28813       { 0x3a70eb28,0x41a8bb0b,0x3e80b06b,0x67de2d8e,0x70c28de5,0x09245a41,
28814         0xa7b26023,0xad7dbcb1,0x2cbc6c1e,0x70b08a35,0x9b33041f,0xb504fb66 } },
28815     /* 135 */
28816     { { 0xf97a27c2,0xa8e85ab5,0xc10a011b,0x6ac5ec8b,0xffbcf161,0x55745533,
28817         0x65790a60,0x01780e85,0x99ee75b0,0xe451bf85,0x39c29881,0x8907a63b },
28818       { 0x260189ed,0x76d46738,0x47bd35cb,0x284a4436,0x20cab61e,0xd74e8c40,
28819         0x416cf20a,0x6264bf8c,0x5fd820ce,0xfa5a6c95,0xf24bb5fc,0xfa7154d0 } },
28820     /* 136 */
28821     { { 0x9b3f5034,0x18482cec,0xcd9e68fd,0x962d445a,0x95746f23,0x266fb1d6,
28822         0x58c94a4b,0xc66ade5a,0xed68a5b6,0xdbbda826,0x7ab0d6ae,0x05664a4d },
28823       { 0x025e32fc,0xbcd4fe51,0xa96df252,0x61a5aebf,0x31592a31,0xd88a07e2,
28824         0x98905517,0x5d9d94de,0x5fd440e7,0x96bb4010,0xe807db4c,0x1b0c47a2 } },
28825     /* 137 */
28826     { { 0x08223878,0x5c2a6ac8,0xe65a5558,0xba08c269,0x9bbc27fd,0xd22b1b9b,
28827         0x72b9607d,0x919171bf,0xe588dc58,0x9ab455f9,0x23662d93,0x6d54916e },
28828       { 0x3b1de0c1,0x8da8e938,0x804f278f,0xa84d186a,0xd3461695,0xbf4988cc,
28829         0xe10eb0cb,0xf5eae3be,0xbf2a66ed,0x1ff8b68f,0xc305b570,0xa68daf67 } },
28830     /* 138 */
28831     { { 0x44b2e045,0xc1004cff,0x4b1c05d4,0x91b5e136,0x88a48a07,0x53ae4090,
28832         0xea11bb1a,0x73fb2995,0x3d93a4ea,0x32048570,0x3bfc8a5f,0xcce45de8 },
28833       { 0xc2b3106e,0xaff4a97e,0xb6848b4f,0x9069c630,0xed76241c,0xeda837a6,
28834         0x6cc3f6cf,0x8a0daf13,0x3da018a8,0x199d049d,0xd9093ba3,0xf867c6b1 } },
28835     /* 139 */
28836     { { 0x56527296,0xe4d42a56,0xce71178d,0xae26c73d,0x6c251664,0x70a0adac,
28837         0x5dc0ae1d,0x813483ae,0xdaab2daf,0x7574eacd,0xc2d55f4f,0xc56b52dc },
28838       { 0x95f32923,0x872bc167,0x5bdd2a89,0x4be17581,0xa7699f00,0x9b57f1e7,
28839         0x3ac2de02,0x5fcd9c72,0x92377739,0x83af3ba1,0xfc50b97f,0xa64d4e2b } },
28840     /* 140 */
28841     { { 0x0e552b40,0x2172dae2,0xd34d52e8,0x62f49725,0x07958f98,0x7930ee40,
28842         0x751fdd74,0x56da2a90,0xf53e48c3,0xf1192834,0x8e53c343,0x34d2ac26 },
28843       { 0x13111286,0x1073c218,0xda9d9827,0x201dac14,0xee95d378,0xec2c29db,
28844         0x1f3ee0b1,0x9316f119,0x544ce71c,0x7890c9f0,0x27612127,0xd77138af } },
28845     /* 141 */
28846     { { 0x3b4ad1cd,0x78045e6d,0x4aa49bc1,0xcd86b94e,0xfd677a16,0x57e51f1d,
28847         0xfa613697,0xd9290935,0x34f4d893,0x7a3f9593,0x5d5fcf9b,0x8c9c248b },
28848       { 0x6f70d4e9,0x9f23a482,0x63190ae9,0x17273454,0x5b081a48,0x4bdd7c13,
28849         0x28d65271,0x1e2de389,0xe5841d1f,0x0bbaaa25,0x746772e5,0xc4c18a79 } },
28850     /* 142 */
28851     { { 0x593375ac,0x10ee2681,0x7dd5e113,0x4f3288be,0x240f3538,0x9a97b2fb,
28852         0x1de6b1e2,0xfa11089f,0x1351bc58,0x516da562,0x2dfa85b5,0x573b6119 },
28853       { 0x6cba7df5,0x89e96683,0x8c28ab40,0xf299be15,0xad43fcbf,0xe91c9348,
28854         0x9a1cefb3,0xe9bbc7cc,0x738b2775,0xc8add876,0x775eaa01,0x6e3b1f2e } },
28855     /* 143 */
28856     { { 0xb677788b,0x0365a888,0x3fd6173c,0x634ae8c4,0x9e498dbe,0x30498761,
28857         0xc8f779ab,0x08c43e6d,0x4c09aca9,0x068ae384,0x2018d170,0x2380c70b },
28858       { 0xa297c5ec,0xcf77fbc3,0xca457948,0xdacbc853,0x336bec7e,0x3690de04,
28859         0x14eec461,0x26bbac64,0x1f713abf,0xd1c23c7e,0xe6fd569e,0xf08bbfcd } },
28860     /* 144 */
28861     { { 0x84770ee3,0x5f8163f4,0x744a1706,0x0e0c7f94,0xe1b2d46d,0x9c8f05f7,
28862         0xd01fd99a,0x417eafe7,0x11440e5b,0x2ba15df5,0x91a6fbcf,0xdc5c552a },
28863       { 0xa270f721,0x86271d74,0xa004485b,0x32c0a075,0x8defa075,0x9d1a87e3,
28864         0xbf0d20fe,0xb590a7ac,0x8feda1f5,0x430c41c2,0x58f6ec24,0x454d2879 } },
28865     /* 145 */
28866     { { 0x7c525435,0x52b7a635,0x37c4bdbc,0x3d9ef57f,0xdffcc475,0x2bb93e9e,
28867         0x7710f3be,0xf7b8ba98,0x21b727de,0x42ee86da,0x2e490d01,0x55ac3f19 },
28868       { 0xc0c1c390,0x487e3a6e,0x446cde7b,0x036fb345,0x496ae951,0x089eb276,
28869         0x71ed1234,0xedfed4d9,0x900f0b46,0x661b0dd5,0x8582f0d3,0x11bd6f1b } },
28870     /* 146 */
28871     { { 0x076bc9d1,0x5cf9350f,0xcf3cd2c3,0x15d903be,0x25af031c,0x21cfc8c2,
28872         0x8b1cc657,0xe0ad3248,0x70014e87,0xdd9fb963,0x297f1658,0xf0f3a5a1 },
28873       { 0xf1f703aa,0xbb908fba,0x2f6760ba,0x2f9cc420,0x66a38b51,0x00ceec66,
28874         0x05d645da,0x4deda330,0xf7de3394,0xb9cf5c72,0x1ad4c906,0xaeef6502 } },
28875     /* 147 */
28876     { { 0x7a19045d,0x0583c8b1,0xd052824c,0xae7c3102,0xff6cfa58,0x2a234979,
28877         0x62c733c0,0xfe9dffc9,0x9c0c4b09,0x3a7fa250,0x4fe21805,0x516437bb },
28878       { 0xc2a23ddb,0x9454e3d5,0x289c104e,0x0726d887,0x4fd15243,0x8977d918,
28879         0x6d7790ba,0xc559e73f,0x465af85f,0x8fd3e87d,0x5feee46b,0xa2615c74 } },
28880     /* 148 */
28881     { { 0x4335167d,0xc8d607a8,0xe0f5c887,0x8b42d804,0x398d11f9,0x5f9f13df,
28882         0x20740c67,0x5aaa5087,0xa3d9234b,0x83da9a6a,0x2a54bad1,0xbd3a5c4e },
28883       { 0x2db0f658,0xdd13914c,0x5a3f373a,0x29dcb66e,0x5245a72b,0xbfd62df5,
28884         0x91e40847,0x19d18023,0xb136b1ae,0xd9df74db,0x3f93bc5b,0x72a06b6b } },
28885     /* 149 */
28886     { { 0xad19d96f,0x6da19ec3,0xfb2a4099,0xb342daa4,0x662271ea,0x0e61633a,
28887         0xce8c054b,0x3bcece81,0x8bd62dc6,0x7cc8e061,0xee578d8b,0xae189e19 },
28888       { 0xdced1eed,0x73e7a25d,0x7875d3ab,0xc1257f0a,0x1cfef026,0x2cb2d5a2,
28889         0xb1fdf61c,0xd98ef39b,0x24e83e6c,0xcd8e6f69,0xc7b7088b,0xd71e7076 } },
28890     /* 150 */
28891     { { 0x9d4245bf,0x33936830,0x2ac2953b,0x22d96217,0x56c3c3cd,0xb3bf5a82,
28892         0x0d0699e8,0x50c9be91,0x8f366459,0xec094463,0x513b7c35,0x6c056dba },
28893       { 0x045ab0e3,0x687a6a83,0x445c9295,0x8d40b57f,0xa16f5954,0x0f345048,
28894         0x3d8f0a87,0x64b5c639,0x9f71c5e2,0x106353a2,0x874f0dd4,0xdd58b475 } },
28895     /* 151 */
28896     { { 0x62230c72,0x67ec084f,0x481385e3,0xf14f6cca,0x4cda7774,0xf58bb407,
28897         0xaa2dbb6b,0xe15011b1,0x0c035ab1,0xd488369d,0x8245f2fd,0xef83c24a },
28898       { 0x9fdc2538,0xfb57328f,0x191fe46a,0x79808293,0x32ede548,0xe28f5c44,
28899         0xea1a022c,0x1b3cda99,0x3df2ec7f,0x39e639b7,0x760e9a18,0x77b6272b } },
28900     /* 152 */
28901     { { 0xa65d56d5,0x2b1d51bd,0x7ea696e0,0x3a9b71f9,0x9904f4c4,0x95250ecc,
28902         0xe75774b7,0x8bc4d6eb,0xeaeeb9aa,0x0e343f8a,0x930e04cb,0xc473c1d1 },
28903       { 0x064cd8ae,0x282321b1,0x5562221c,0xf4b4371e,0xd1bf1221,0xc1cc81ec,
28904         0xe2c8082f,0xa52a07a9,0xba64a958,0x350d8e59,0x6fb32c9a,0x29e4f3de } },
28905     /* 153 */
28906     { { 0xba89aaa5,0x0aa9d56c,0xc4c6059e,0xf0208ac0,0xbd6ddca4,0x7400d9c6,
28907         0xf2c2f74a,0xb384e475,0xb1562dd3,0x4c1061fc,0x2e153b8d,0x3924e248 },
28908       { 0x849808ab,0xf38b8d98,0xa491aa36,0x29bf3260,0x88220ede,0x85159ada,
28909         0xbe5bc422,0x8b47915b,0xd7300967,0xa934d72e,0x2e515d0d,0xc4f30398 } },
28910     /* 154 */
28911     { { 0x1b1de38b,0xe3e9ee42,0x42636760,0xa124e25a,0x90165b1a,0x90bf73c0,
28912         0x146434c5,0x21802a34,0x2e1fa109,0x54aa83f2,0xed9c51e9,0x1d4bd03c },
28913       { 0x798751e6,0xc2d96a38,0x8c3507f5,0xed27235f,0xc8c24f88,0xb5fb80e2,
28914         0xd37f4f78,0xf873eefa,0xf224ba96,0x7229fd74,0x9edd7149,0x9dcd9199 } },
28915     /* 155 */
28916     { { 0x4e94f22a,0xee9f81a6,0xf71ec341,0xe5609892,0xa998284e,0x6c818ddd,
28917         0x3b54b098,0x9fd47295,0x0e8a7cc9,0x47a6ac03,0xb207a382,0xde684e5e },
28918       { 0x2b6b956b,0x4bdd1ecd,0xf01b3583,0x09084414,0x55233b14,0xe2f80b32,
28919         0xef5ebc5e,0x5a0fec54,0xbf8b29a2,0x74cf25e6,0x7f29e014,0x1c757fa0 } },
28920     /* 156 */
28921     { { 0xeb0fdfe4,0x1bcb5c4a,0xf0899367,0xd7c649b3,0x05bc083b,0xaef68e3f,
28922         0xa78aa607,0x57a06e46,0x21223a44,0xa2136ecc,0x52f5a50b,0x89bd6484 },
28923       { 0x4455f15a,0x724411b9,0x08a9c0fd,0x23dfa970,0x6db63bef,0x7b0da4d1,
28924         0xfb162443,0x6f8a7ec1,0xe98284fb,0xc1ac9cee,0x33566022,0x085a582b } },
28925     /* 157 */
28926     { { 0xec1f138a,0x15cb61f9,0x668f0c28,0x11c9a230,0xdf93f38f,0xac829729,
28927         0x4048848d,0xcef25698,0x2bba8fbf,0x3f686da0,0x111c619a,0xed5fea78 },
28928       { 0xd6d1c833,0x9b4f73bc,0x86e7bf80,0x50951606,0x042b1d51,0xa2a73508,
28929         0x5fb89ec2,0x9ef6ea49,0x5ef8b892,0xf1008ce9,0x9ae8568b,0x78a7e684 } },
28930     /* 158 */
28931     { { 0x10470cd8,0x3fe83a7c,0xf86df000,0x92734682,0xda9409b5,0xb5dac06b,
28932         0x94939c5f,0x1e7a9660,0x5cc116dc,0xdec6c150,0x66bac8cc,0x1a52b408 },
28933       { 0x6e864045,0x5303a365,0x9139efc1,0x45eae72a,0x6f31d54f,0x83bec646,
28934         0x6e958a6d,0x2fb4a86f,0x4ff44030,0x6760718e,0xe91ae0df,0x008117e3 } },
28935     /* 159 */
28936     { { 0x384310a2,0x5d5833ba,0x1fd6c9fc,0xbdfb4edc,0x849c4fb8,0xb9a4f102,
28937         0x581c1e1f,0xe5fb239a,0xd0a9746d,0xba44b2e7,0x3bd942b9,0x78f7b768 },
28938       { 0xc87607ae,0x076c8ca1,0xd5caaa7e,0x82b23c2e,0x2763e461,0x6a581f39,
28939         0x3886df11,0xca8a5e4a,0x264e7f22,0xc87e90cf,0x215cfcfc,0x04f74870 } },
28940     /* 160 */
28941     { { 0x141d161c,0x5285d116,0x93c4ed17,0x67cd2e0e,0x7c36187e,0x12c62a64,
28942         0xed2584ca,0xf5329539,0x42fbbd69,0xc4c777c4,0x1bdfc50a,0x107de776 },
28943       { 0xe96beebd,0x9976dcc5,0xa865a151,0xbe2aff95,0x9d8872af,0x0e0a9da1,
28944         0xa63c17cc,0x5e357a3d,0xe15cc67c,0xd31fdfd8,0x7970c6d8,0xc44bbefd } },
28945     /* 161 */
28946     { { 0x4c0c62f1,0x703f83e2,0x4e195572,0x9b1e28ee,0xfe26cced,0x6a82858b,
28947         0xc43638fa,0xd381c84b,0xa5ba43d8,0x94f72867,0x10b82743,0x3b4a783d },
28948       { 0x7576451e,0xee1ad7b5,0x14b6b5c8,0xc3d0b597,0xfcacc1b8,0x3dc30954,
28949         0x472c9d7b,0x55df110e,0x02f8a328,0x97c86ed7,0x88dc098f,0xd0433413 } },
28950     /* 162 */
28951     { { 0x2ca8f2fe,0x1a60d152,0x491bd41f,0x61640948,0x58dfe035,0x6dae29a5,
28952         0x278e4863,0x9a615bea,0x9ad7c8e5,0xbbdb4477,0x2ceac2fc,0x1c706630 },
28953       { 0x99699b4b,0x5e2b54c6,0x239e17e8,0xb509ca6d,0xea063a82,0x728165fe,
28954         0xb6a22e02,0x6b5e609d,0xb26ee1df,0x12813905,0x439491fa,0x07b9f722 } },
28955     /* 163 */
28956     { { 0x48ff4e49,0x1592ec14,0x6d644129,0x3e4e9f17,0x1156acc0,0x7acf8288,
28957         0xbb092b0b,0x5aa34ba8,0x7d38393d,0xcd0f9022,0xea4f8187,0x416724dd },
28958       { 0xc0139e73,0x3c4e641c,0x91e4d87d,0xe0fe46cf,0xcab61f8a,0xedb3c792,
28959         0xd3868753,0x4cb46de4,0x20f1098a,0xe449c21d,0xf5b8ea6e,0x5e5fd059 } },
28960     /* 164 */
28961     { { 0x75856031,0x7fcadd46,0xeaf2fbd0,0x89c7a4cd,0x7a87c480,0x1af523ce,
28962         0x61d9ae90,0xe5fc1095,0xbcdb95f5,0x3fb5864f,0xbb5b2c7d,0xbeb5188e },
28963       { 0x3ae65825,0x3d1563c3,0x0e57d641,0x116854c4,0x1942ebd3,0x11f73d34,
28964         0xc06955b3,0x24dc5904,0x995a0a62,0x8a0d4c83,0x5d577b7d,0xfb26b86d } },
28965     /* 165 */
28966     { { 0xc686ae17,0xc53108e7,0xd1c1da56,0x9090d739,0x9aec50ae,0x4583b013,
28967         0xa49a6ab2,0xdd9a088b,0xf382f850,0x28192eea,0xf5fe910e,0xcc8df756 },
28968       { 0x9cab7630,0x877823a3,0xfb8e7fc1,0x64984a9a,0x364bfc16,0x5448ef9c,
28969         0xc44e2a9a,0xbbb4f871,0x435c95e9,0x901a41ab,0xaaa50a06,0xc6c23e5f } },
28970     /* 166 */
28971     { { 0x9034d8dd,0xb78016c1,0x0b13e79b,0x856bb44b,0xb3241a05,0x85c6409a,
28972         0x2d78ed21,0x8d2fe19a,0x726eddf2,0xdcc7c26d,0x25104f04,0x3ccaff5f },
28973       { 0x6b21f843,0x397d7edc,0xe975de4c,0xda88e4dd,0x4f5ab69e,0x5273d396,
28974         0x9aae6cc0,0x537680e3,0x3e6f9461,0xf749cce5,0x957bffd3,0x021ddbd9 } },
28975     /* 167 */
28976     { { 0x777233cf,0x7b64585f,0x0942a6f0,0xfe6771f6,0xdfe6eef0,0x636aba7a,
28977         0x86038029,0x63bbeb56,0xde8fcf36,0xacee5842,0xd4a20524,0x48d9aa99 },
28978       { 0x0da5e57a,0xcff7a74c,0xe549d6c9,0xc232593c,0xf0f2287b,0x68504bcc,
28979         0xbc8360b5,0x6d7d098d,0x5b402f41,0xeac5f149,0xb87d1bf1,0x61936f11 } },
28980     /* 168 */
28981     { { 0xb8153a9d,0xaa9da167,0x9e83ecf0,0xa49fe3ac,0x1b661384,0x14c18f8e,
28982         0x38434de1,0x61c24dab,0x283dae96,0x3d973c3a,0x82754fc9,0xc99baa01 },
28983       { 0x4c26b1e3,0x477d198f,0xa7516202,0x12e8e186,0x362addfa,0x386e52f6,
28984         0xc3962853,0x31e8f695,0x6aaedb60,0xdec2af13,0x29cf74ac,0xfcfdb4c6 } },
28985     /* 169 */
28986     { { 0xcca40298,0x6b3ee958,0xf2f5d195,0xc3878153,0xed2eae5b,0x0c565630,
28987         0x3a697cf2,0xd089b37e,0xad5029ea,0xc2ed2ac7,0x0f0dda6a,0x7e5cdfad },
28988       { 0xd9b86202,0xf98426df,0x4335e054,0xed1960b1,0x3f14639e,0x1fdb0246,
28989         0x0db6c670,0x17f709c3,0x773421e1,0xbfc687ae,0x26c1a8ac,0x13fefc4a } },
28990     /* 170 */
28991     { { 0x7ffa0a5f,0xe361a198,0xc63fe109,0xf4b26102,0x6c74e111,0x264acbc5,
28992         0x77abebaf,0x4af445fa,0x24cddb75,0x448c4fdd,0x44506eea,0x0b13157d },
28993       { 0x72e9993d,0x22a6b159,0x85e5ecbe,0x2c3c57e4,0xfd83e1a1,0xa673560b,
28994         0xc3b8c83b,0x6be23f82,0x40bbe38e,0x40b13a96,0xad17399b,0x66eea033 } },
28995     /* 171 */
28996     { { 0xb4c6c693,0x49fc6e95,0x36af7d38,0xefc735de,0x35fe42fc,0xe053343d,
28997         0x6a9ab7c3,0xf0aa427c,0x4a0fcb24,0xc79f0436,0x93ebbc50,0x16287243 },
28998       { 0x16927e1e,0x5c3d6bd0,0x673b984c,0x40158ed2,0x4cd48b9a,0xa7f86fc8,
28999         0x60ea282d,0x1643eda6,0xe2a1beed,0x45b393ea,0x19571a94,0x664c839e } },
29000     /* 172 */
29001     { { 0x27eeaf94,0x57745750,0xea99e1e7,0x2875c925,0x5086adea,0xc127e7ba,
29002         0x86fe424f,0x765252a0,0x2b6c0281,0x1143cc6c,0xd671312d,0xc9bb2989 },
29003       { 0x51acb0a5,0x880c337c,0xd3c60f78,0xa3710915,0x9262b6ed,0x496113c0,
29004         0x9ce48182,0x5d25d9f8,0xb3813586,0x53b6ad72,0x4c0e159c,0x0ea3bebc } },
29005     /* 173 */
29006     { { 0xc5e49bea,0xcaba450a,0x7c05da59,0x684e5415,0xde7ac36c,0xa2e9cab9,
29007         0x2e6f957b,0x4ca79b5f,0x09b817b1,0xef7b0247,0x7d89df0f,0xeb304990 },
29008       { 0x46fe5096,0x508f7307,0x2e04eaaf,0x695810e8,0x3512f76c,0x88ef1bd9,
29009         0x3ebca06b,0x77661351,0xccf158b7,0xf7d4863a,0x94ee57da,0xb2a81e44 } },
29010     /* 174 */
29011     { { 0x6d53e6ba,0xff288e5b,0x14484ea2,0xa90de1a9,0xed33c8ec,0x2fadb60c,
29012         0x28b66a40,0x579d6ef3,0xec24372d,0x4f2dd6dd,0x1d66ec7d,0xe9e33fc9 },
29013       { 0x039eab6e,0x110899d2,0x3e97bb5e,0xa31a667a,0xcfdce68e,0x6200166d,
29014         0x5137d54b,0xbe83ebae,0x4800acdf,0x085f7d87,0x0c6f8c86,0xcf4ab133 } },
29015     /* 175 */
29016     { { 0x931e08fb,0x03f65845,0x1506e2c0,0x6438551e,0x9c36961f,0x5791f0dc,
29017         0xe3dcc916,0x68107b29,0xf495d2ca,0x83242374,0x6ee5895b,0xd8cfb663 },
29018       { 0xa0349b1b,0x525e0f16,0x4a0fab86,0x33cd2c6c,0x2af8dda9,0x46c12ee8,
29019         0x71e97ad3,0x7cc424ba,0x37621eb0,0x69766ddf,0xa5f0d390,0x95565f56 } },
29020     /* 176 */
29021     { { 0x1a0f5e94,0xe0e7bbf2,0x1d82d327,0xf771e115,0xceb111fa,0x10033e3d,
29022         0xd3426638,0xd269744d,0x00d01ef6,0xbdf2d9da,0xa049ceaf,0x1cb80c71 },
29023       { 0x9e21c677,0x17f18328,0x19c8f98b,0x6452af05,0x80b67997,0x35b9c5f7,
29024         0x40f8f3d4,0x5c2e1cbe,0x66d667ca,0x43f91656,0xcf9d6e79,0x9faaa059 } },
29025     /* 177 */
29026     { { 0x0a078fe6,0x8ad24618,0x464fd1dd,0xf6cc73e6,0xc3e37448,0x4d2ce34d,
29027         0xe3271b5f,0x624950c5,0xefc5af72,0x62910f5e,0xaa132bc6,0x8b585bf8 },
29028       { 0xa839327f,0x11723985,0x4aac252f,0x34e2d27d,0x6296cc4e,0x402f59ef,
29029         0x47053de9,0x00ae055c,0x28b4f09b,0xfc22a972,0xfa0c180e,0xa9e86264 } },
29030     /* 178 */
29031     { { 0xbc310ecc,0x0b7b6224,0x67fa14ed,0x8a1a74f1,0x7214395c,0x87dd0960,
29032         0xf5c91128,0xdf1b3d09,0x86b264a8,0x39ff23c6,0x3e58d4c5,0xdc2d49d0 },
29033       { 0xa9d6f501,0x2152b7d3,0xc04094f7,0xf4c32e24,0xd938990f,0xc6366596,
29034         0x94fb207f,0x084d078f,0x328594cb,0xfd99f1d7,0xcb2d96b3,0x36defa64 } },
29035     /* 179 */
29036     { { 0x13ed7cbe,0x4619b781,0x9784bd0e,0x95e50015,0x2c7705fe,0x2a32251c,
29037         0x5f0dd083,0xa376af99,0x0361a45b,0x55425c6c,0x1f291e7b,0x812d2cef },
29038       { 0x5fd94972,0xccf581a0,0xe56dc383,0x26e20e39,0x63dbfbf0,0x0093685d,
29039         0x36b8c575,0x1fc164cc,0x390ef5e7,0xb9c5ab81,0x26908c66,0x40086beb } },
29040     /* 180 */
29041     { { 0x37e3c115,0xe5e54f79,0xc1445a8a,0x69b8ee8c,0xb7659709,0x79aedff2,
29042         0x1b46fbe6,0xe288e163,0xd18d7bb7,0xdb4844f0,0x48aa6424,0xe0ea23d0 },
29043       { 0xf3d80a73,0x714c0e4e,0x3bd64f98,0x87a0aa9e,0x2ec63080,0x8844b8a8,
29044         0x255d81a3,0xe0ac9c30,0x455397fc,0x86151237,0x2f820155,0x0b979464 } },
29045     /* 181 */
29046     { { 0x4ae03080,0x127a255a,0x580a89fb,0x232306b4,0x6416f539,0x04e8cd6a,
29047         0x13b02a0e,0xaeb70dee,0x4c09684a,0xa3038cf8,0x28e433ee,0xa710ec3c },
29048       { 0x681b1f7d,0x77a72567,0x2fc28170,0x86fbce95,0xf5735ac8,0xd3408683,
29049         0x6bd68e93,0x3a324e2a,0xc027d155,0x7ec74353,0xd4427177,0xab60354c } },
29050     /* 182 */
29051     { { 0xef4c209d,0x32a5342a,0x08d62704,0x2ba75274,0xc825d5fe,0x4bb4af6f,
29052         0xd28e7ff1,0x1c3919ce,0xde0340f6,0x1dfc2fdc,0x29f33ba9,0xc6580baf },
29053       { 0x41d442cb,0xae121e75,0x3a4724e4,0x4c7727fd,0x524f3474,0xe556d6a4,
29054         0x785642a2,0x87e13cc7,0xa17845fd,0x182efbb1,0x4e144857,0xdcec0cf1 } },
29055     /* 183 */
29056     { { 0xe9539819,0x1cb89541,0x9d94dbf1,0xc8cb3b4f,0x417da578,0x1d353f63,
29057         0x8053a09e,0xb7a697fb,0xc35d8b78,0x8d841731,0xb656a7a9,0x85748d6f },
29058       { 0xc1859c5d,0x1fd03947,0x535d22a2,0x6ce965c1,0x0ca3aadc,0x1966a13e,
29059         0x4fb14eff,0x9802e41d,0x76dd3fcd,0xa9048cbb,0xe9455bba,0x89b182b5 } },
29060     /* 184 */
29061     { { 0x43360710,0xd777ad6a,0x55e9936b,0x841287ef,0x04a21b24,0xbaf5c670,
29062         0x35ad86f1,0xf2c0725f,0xc707e72e,0x338fa650,0xd8883e52,0x2bf8ed2e },
29063       { 0xb56e0d6a,0xb0212cf4,0x6843290c,0x50537e12,0x98b3dc6f,0xd8b184a1,
29064         0x0210b722,0xd2be9a35,0x559781ee,0x407406db,0x0bc18534,0x5a78d591 } },
29065     /* 185 */
29066     { { 0xd748b02c,0x4d57aa2a,0xa12b3b95,0xbe5b3451,0x64711258,0xadca7a45,
29067         0x322153db,0x597e091a,0x32eb1eab,0xf3271006,0x2873f301,0xbd9adcba },
29068       { 0x38543f7f,0xd1dc79d1,0x921b1fef,0x00022092,0x1e5df8ed,0x86db3ef5,
29069         0x9e6b944a,0x888cae04,0x791a32b4,0x71bd29ec,0xa6d1c13e,0xd3516206 } },
29070     /* 186 */
29071     { { 0x55924f43,0x2ef6b952,0x4f9de8d5,0xd2f401ae,0xadc68042,0xfc73e8d7,
29072         0x0d9d1bb4,0x627ea70c,0xbbf35679,0xc3bb3e3e,0xd882dee4,0x7e8a254a },
29073       { 0xb5924407,0x08906f50,0xa1ad444a,0xf14a0e61,0x65f3738e,0xaa0efa21,
29074         0xae71f161,0xd60c7dd6,0xf175894d,0x9e8390fa,0x149f4c00,0xd115cd20 } },
29075     /* 187 */
29076     { { 0xa52abf77,0x2f2e2c1d,0x54232568,0xc2a0dca5,0x54966dcc,0xed423ea2,
29077         0xcd0dd039,0xe48c93c7,0x176405c7,0x1e54a225,0x70d58f2e,0x1efb5b16 },
29078       { 0x94fb1471,0xa751f9d9,0x67d2941d,0xfdb31e1f,0x53733698,0xa6c74eb2,
29079         0x89a0f64a,0xd3155d11,0xa4b8d2b6,0x4414cfe4,0xf7a8e9e3,0x8d5a4be8 } },
29080     /* 188 */
29081     { { 0x52669e98,0x5c96b4d4,0x8fd42a03,0x4547f922,0xd285174e,0xcf5c1319,
29082         0x064bffa0,0x805cd1ae,0x246d27e7,0x50e8bc4f,0xd5781e11,0xf89ef98f },
29083       { 0xdee0b63f,0xb4ff95f6,0x222663a4,0xad850047,0x4d23ce9c,0x02691860,
29084         0x50019f59,0x3e5309ce,0x69a508ae,0x27e6f722,0x267ba52c,0xe9376652 } },
29085     /* 189 */
29086     { { 0xc0368708,0xa04d289c,0x5e306e1d,0xc458872f,0x33112fea,0x76fa23de,
29087         0x6efde42e,0x718e3974,0x1d206091,0xf0c98cdc,0x14a71987,0x5fa3ca62 },
29088       { 0xdcaa9f2a,0xeee8188b,0x589a860d,0x312cc732,0xc63aeb1f,0xf9808dd6,
29089         0x4ea62b53,0x70fd43db,0x890b6e97,0x2c2bfe34,0xfa426aa6,0x105f863c } },
29090     /* 190 */
29091     { { 0xb38059ad,0x0b29795d,0x90647ea0,0x5686b77e,0xdb473a3e,0xeff0470e,
29092         0xf9b6d1e2,0x278d2340,0xbd594ec7,0xebbff95b,0xd3a7f23d,0xf4b72334 },
29093       { 0xa5a83f0b,0x2a285980,0x9716a8b3,0x0786c41a,0x22511812,0x138901bd,
29094         0xe2fede6e,0xd1b55221,0xdf4eb590,0x0806e264,0x762e462e,0x6c4c897e } },
29095     /* 191 */
29096     { { 0xb4b41d9d,0xd10b905f,0x4523a65b,0x826ca466,0xb699fa37,0x535bbd13,
29097         0x73bc8f90,0x5b9933d7,0xcd2118ad,0x9332d61f,0xd4a65fd0,0x158c693e },
29098       { 0xe6806e63,0x4ddfb2a8,0xb5de651b,0xe31ed3ec,0x819bc69a,0xf9460e51,
29099         0x2c76b1f8,0x6229c0d6,0x901970a3,0xbb78f231,0x9cee72b8,0x31f3820f } },
29100     /* 192 */
29101     { { 0xc09e1c72,0xe931caf2,0x12990cf4,0x0715f298,0x943262d8,0x33aad81d,
29102         0x73048d3f,0x5d292b7a,0xdc7415f6,0xb152aaa4,0x0fd19587,0xc3d10fd9 },
29103       { 0x75ddadd0,0xf76b35c5,0x1e7b694c,0x9f5f4a51,0xc0663025,0x2f1ab7eb,
29104         0x920260b0,0x01c9cc87,0x05d39da6,0xc4b1f61a,0xeb4a9c4e,0x6dcd76c4 } },
29105     /* 193 */
29106     { { 0xfdc83f01,0x0ba0916f,0x9553e4f9,0x354c8b44,0xffc5e622,0xa6cc511a,
29107         0xe95be787,0xb954726a,0x75b41a62,0xcb048115,0xebfde989,0xfa2ae6cd },
29108       { 0x0f24659a,0x6376bbc7,0x4c289c43,0x13a999fd,0xec9abd8b,0xc7134184,
29109         0xa789ab04,0x28c02bf6,0xd3e526ec,0xff841ebc,0x640893a8,0x442b191e } },
29110     /* 194 */
29111     { { 0xfa2b6e20,0x4cac6c62,0xf6d69861,0x97f29e9b,0xbc96d12d,0x228ab1db,
29112         0x5e8e108d,0x6eb91327,0x40771245,0xd4b3d4d1,0xca8a803a,0x61b20623 },
29113       { 0xa6a560b1,0x2c2f3b41,0x3859fcf4,0x879e1d40,0x024dbfc3,0x7cdb5145,
29114         0x3bfa5315,0x55d08f15,0xaa93823a,0x2f57d773,0xc6a2c9a2,0xa97f259c } },
29115     /* 195 */
29116     { { 0xe58edbbb,0xc306317b,0x79dfdf13,0x25ade51c,0x16d83dd6,0x6b5beaf1,
29117         0x1dd8f925,0xe8038a44,0xb2a87b6b,0x7f00143c,0xf5b438de,0xa885d00d },
29118       { 0xcf9e48bd,0xe9f76790,0xa5162768,0xf0bdf9f0,0xad7b57cb,0x0436709f,
29119         0xf7c15db7,0x7e151c12,0x5d90ee3b,0x3514f022,0x2c361a8d,0x2e84e803 } },
29120     /* 196 */
29121     { { 0x563ec8d8,0x2277607d,0xe3934cb7,0xa661811f,0xf58fd5de,0x3ca72e7a,
29122         0x62294c6a,0x7989da04,0xf6bbefe9,0x88b3708b,0x53ed7c82,0x0d524cf7 },
29123       { 0x2f30c073,0x69f699ca,0x9dc1dcf3,0xf0fa264b,0x05f0aaf6,0x44ca4568,
29124         0xd19b9baf,0x0f5b23c7,0xeabd1107,0x39193f41,0x2a7c9b83,0x9e3e10ad } },
29125     /* 197 */
29126     { { 0xd4ae972f,0xa90824f0,0xc6e846e7,0x43eef02b,0x29d2160a,0x7e460612,
29127         0xfe604e91,0x29a178ac,0x4eb184b2,0x23056f04,0xeb54cdf4,0x4fcad55f },
29128       { 0xae728d15,0xa0ff96f3,0xc6a00331,0x8a2680c6,0x7ee52556,0x5f84cae0,
29129         0xc5a65dad,0x5e462c3a,0xe2d23f4f,0x5d2b81df,0xc5b1eb07,0x6e47301b } },
29130     /* 198 */
29131     { { 0xaf8219b9,0x77411d68,0x51b1907a,0xcb883ce6,0x101383b5,0x25c87e57,
29132         0x982f970d,0x9c7d9859,0x118305d2,0xaa6abca5,0x9013a5db,0x725fed2f },
29133       { 0xababd109,0x487cdbaf,0x87586528,0xc0f8cf56,0x8ad58254,0xa02591e6,
29134         0xdebbd526,0xc071b1d1,0x961e7e31,0x927dfe8b,0x9263dfe1,0x55f895f9 } },
29135     /* 199 */
29136     { { 0xb175645b,0xf899b00d,0xb65b4b92,0x51f3a627,0xb67399ef,0xa2f3ac8d,
29137         0xe400bc20,0xe717867f,0x1967b952,0x42cc9020,0x3ecd1de1,0x3d596751 },
29138       { 0xdb979775,0xd41ebcde,0x6a2e7e88,0x99ba61bc,0x321504f2,0x039149a5,
29139         0x27ba2fad,0xe7dc2314,0xb57d8368,0x9f556308,0x57da80a7,0x2b6d16c9 } },
29140     /* 200 */
29141     { { 0x279ad982,0x84af5e76,0x9c8b81a6,0x9bb4c92d,0x0e698e67,0xd79ad44e,
29142         0x265fc167,0xe8be9048,0x0c3a4ccc,0xf135f7e6,0xb8863a33,0xa0a10d38 },
29143       { 0xd386efd9,0xe197247c,0xb52346c2,0x0eefd3f9,0x78607bc8,0xc22415f9,
29144         0x508674ce,0xa2a8f862,0xc8c9d607,0xa72ad09e,0x50fa764f,0xcd9f0ede } },
29145     /* 201 */
29146     { { 0xd1a46d4d,0x063391c7,0x9eb01693,0x2df51c11,0x849e83de,0xc5849800,
29147         0x8ad08382,0x48fd09aa,0xaa742736,0xa405d873,0xe1f9600c,0xee49e61e },
29148       { 0x48c76f73,0xd76676be,0x01274b2a,0xd9c100f6,0x83f8718d,0x110bb67c,
29149         0x02fc0d73,0xec85a420,0x744656ad,0xc0449e1e,0x37d9939b,0x28ce7376 } },
29150     /* 202 */
29151     { { 0x44544ac7,0x97e9af72,0xba010426,0xf2c658d5,0xfb3adfbd,0x732dec39,
29152         0xa2df0b07,0xd12faf91,0x2171e208,0x8ac26725,0x5b24fa54,0xf820cdc8 },
29153       { 0x94f4cf77,0x307a6eea,0x944a33c6,0x18c783d2,0x0b741ac5,0x4b939d4c,
29154         0x3ffbb6e4,0x1d7acd15,0x7a255e44,0x06a24858,0xce336d50,0x14fbc494 } },
29155     /* 203 */
29156     { { 0x51584e3c,0x9b920c0c,0xf7e54027,0xc7733c59,0x88422bbe,0xe24ce139,
29157         0x523bd6ab,0x11ada812,0xb88e6def,0xde068800,0xfe8c582d,0x7b872671 },
29158       { 0x7de53510,0x4e746f28,0xf7971968,0x492f8b99,0x7d928ac2,0x1ec80bc7,
29159         0x432eb1b5,0xb3913e48,0x32028f6e,0xad084866,0x8fc2f38b,0x122bb835 } },
29160     /* 204 */
29161     { { 0x3b0b29c3,0x0a9f3b1e,0x4fa44151,0x837b6432,0x17b28ea7,0xb9905c92,
29162         0x98451750,0xf39bc937,0xce8b6da1,0xcd383c24,0x010620b2,0x299f57db },
29163       { 0x58afdce3,0x7b6ac396,0x3d05ef47,0xa15206b3,0xb9bb02ff,0xa0ae37e2,
29164         0x9db3964c,0x107760ab,0x67954bea,0xe29de9a0,0x431c3f82,0x446a1ad8 } },
29165     /* 205 */
29166     { { 0x5c6b8195,0xc6fecea0,0xf49e71b9,0xd744a7c5,0x177a7ae7,0xa8e96acc,
29167         0x358773a7,0x1a05746c,0x37567369,0xa4162146,0x87d1c971,0xaa0217f7 },
29168       { 0x77fd3226,0x61e9d158,0xe4f600be,0x0f6f2304,0x7a6dff07,0xa9c4cebc,
29169         0x09f12a24,0xd15afa01,0x8c863ee9,0x2bbadb22,0xe5eb8c78,0xa28290e4 } },
29170     /* 206 */
29171     { { 0x3e9de330,0x55b87fa0,0x195c145b,0x12b26066,0xa920bef0,0xe08536e0,
29172         0x4d195adc,0x7bff6f2c,0x945f4187,0x7f319e9d,0xf892ce47,0xf9848863 },
29173       { 0x4fe37657,0xd0efc1d3,0x5cf0e45a,0x3c58de82,0x8b0ccbbe,0x626ad21a,
29174         0xaf952fc5,0xd2a31208,0xeb437357,0x81791995,0x98e95d4f,0x5f19d30f } },
29175     /* 207 */
29176     { { 0x0e6865bb,0x72e83d9a,0xf63456a6,0x22f5af3b,0x463c8d9e,0x409e9c73,
29177         0xdfe6970e,0x40e9e578,0x711b91ca,0x876b6efa,0x942625a3,0x895512cf },
29178       { 0xcb4e462b,0x84c8eda8,0x4412e7c8,0x84c0154a,0xceb7b71f,0x04325db1,
29179         0x66f70877,0x1537dde3,0x1992b9ac,0xf3a09399,0xd498ae77,0xa7316606 } },
29180     /* 208 */
29181     { { 0xcad260f5,0x13990d2f,0xeec0e8c0,0x76c3be29,0x0f7bd7d5,0x7dc5bee0,
29182         0xefebda4b,0x9be167d2,0x9122b87e,0xcce3dde6,0x82b5415c,0x75a28b09 },
29183       { 0xe84607a6,0xf6810bcd,0x6f4dbf0d,0xc6d58128,0x1b4dafeb,0xfead577d,
29184         0x066b28eb,0x9bc440b2,0x8b17e84b,0x53f1da97,0xcda9a575,0x0459504b } },
29185     /* 209 */
29186     { { 0x329e5836,0x13e39a02,0xf717269d,0x2c9e7d51,0xf26c963b,0xc5ac58d6,
29187         0x79967bf5,0x3b0c6c43,0x55908d9d,0x60bbea3f,0xf07c9ad1,0xd84811e7 },
29188       { 0x5bd20e4a,0xfe7609a7,0x0a70baa8,0xe4325dd2,0xb3600386,0x3711f370,
29189         0xd0924302,0x97f9562f,0x4acc4436,0x040dc0c3,0xde79cdd4,0xfd6d725c } },
29190     /* 210 */
29191     { { 0xcf13eafb,0xb3efd0e3,0x5aa0ae5f,0x21009cbb,0x79022279,0xe480c553,
29192         0xb2fc9a6d,0x755cf334,0x07096ae7,0x8564a5bf,0xbd238139,0xddd649d0 },
29193       { 0x8a045041,0xd0de10b1,0xc957d572,0x6e05b413,0x4e0fb25c,0x5c5ff806,
29194         0x641162fb,0xd933179b,0xe57439f9,0x42d48485,0x8a8d72aa,0x70c5bd0a } },
29195     /* 211 */
29196     { { 0x97bdf646,0xa7671738,0xab329f7c,0xaa1485b4,0xf8f25fdf,0xce3e11d6,
29197         0xc6221824,0x76a3fc7e,0xf3924740,0x045f281f,0x96d13a9a,0x24557d4e },
29198       { 0xdd4c27cd,0x875c804b,0x0f5c7fea,0x11c5f0f4,0xdc55ff7e,0xac8c880b,
29199         0x1103f101,0x2acddec5,0xf99faa89,0x38341a21,0xce9d6b57,0xc7b67a2c } },
29200     /* 212 */
29201     { { 0x8e357586,0x9a0d724f,0xdf648da0,0x1d7f4ff5,0xfdee62a5,0x9c3e6c9b,
29202         0x0389b372,0x0499cef0,0x98eab879,0xe904050d,0x6c051617,0xe8eef1b6 },
29203       { 0xc37e3ca9,0xebf5bfeb,0xa4e0b91d,0x7c5e946d,0x2c4bea28,0x79097314,
29204         0xee67b2b7,0x81f6c109,0xdafc5ede,0xaf237d9b,0x2abb04c7,0xd2e60201 } },
29205     /* 213 */
29206     { { 0x8a4f57bf,0x6156060c,0xff11182a,0xf9758696,0x6296ef00,0x8336773c,
29207         0xff666899,0x9c054bce,0x719cd11c,0xd6a11611,0xdbe1acfa,0x9824a641 },
29208       { 0xba89fd01,0x0b7b7a5f,0x889f79d8,0xf8d3b809,0xf578285c,0xc5e1ea08,
29209         0xae6d8288,0x7ac74536,0x7521ef5f,0x5d37a200,0xb260a25d,0x5ecc4184 } },
29210     /* 214 */
29211     { { 0xa708c8d3,0xddcebb19,0xc63f81ec,0xe63ed04f,0x11873f95,0xd045f5a0,
29212         0x79f276d5,0x3b5ad544,0x425ae5b3,0x81272a3d,0x10ce1605,0x8bfeb501 },
29213       { 0x888228bf,0x4233809c,0xb2aff7df,0x4bd82acf,0x0cbd4a7f,0x9c68f180,
29214         0x6b44323d,0xfcd77124,0x891db957,0x60c0fcf6,0x04da8f7f,0xcfbb4d89 } },
29215     /* 215 */
29216     { { 0x3b26139a,0x9a6a5df9,0xb2cc7eb8,0x3e076a83,0x5a964bcd,0x47a8e82d,
29217         0xb9278d6b,0x8a4e2a39,0xe4443549,0x93506c98,0xf1e0d566,0x06497a8f },
29218       { 0x2b1efa05,0x3dee8d99,0x45393e33,0x2da63ca8,0xcf0579ad,0xa4af7277,
29219         0x3236d8ea,0xaf4b4639,0x32b617f5,0x6ccad95b,0xb88bb124,0xce76d8b8 } },
29220     /* 216 */
29221     { { 0x083843dc,0x63d2537a,0x1e4153b4,0x89eb3514,0xea9afc94,0x5175ebc4,
29222         0x8ed1aed7,0x7a652580,0xd85e8297,0x67295611,0xb584b73d,0x8dd2d68b },
29223       { 0x0133c3a4,0x237139e6,0x4bd278ea,0x9de838ab,0xc062fcd9,0xe829b072,
29224         0x63ba8706,0x70730d4f,0xd3cd05ec,0x6080483f,0x0c85f84d,0x872ab5b8 } },
29225     /* 217 */
29226     { { 0x999d4d49,0xfc0776d3,0xec3f45e7,0xa3eb59de,0x0dae1fc1,0xbc990e44,
29227         0xa15371ff,0x33596b1e,0x9bc7ab25,0xd447dcb2,0x35979582,0xcd5b63e9 },
29228       { 0x77d1ff11,0xae3366fa,0xedee6903,0x59f28f05,0xa4433bf2,0x6f43fed1,
29229         0xdf9ce00e,0x15409c9b,0xaca9c5dc,0x21b5cded,0x82d7bdb4,0xf9f33595 } },
29230     /* 218 */
29231     { { 0x9422c792,0x95944378,0xc958b8bf,0x239ea923,0xdf076541,0x4b61a247,
29232         0xbb9fc544,0x4d29ce85,0x0b424559,0x9a692a67,0x0e486900,0x6e0ca5a0 },
29233       { 0x85b3bece,0x6b79a782,0xc61f9892,0x41f35e39,0xae747f82,0xff82099a,
29234         0xd0ca59d6,0x58c8ae3f,0x99406b5f,0x4ac930e2,0x9df24243,0x2ce04eb9 } },
29235     /* 219 */
29236     { { 0x1ac37b82,0x4366b994,0x25b04d83,0xff0c728d,0x19c47b7c,0x1f551361,
29237         0xbeff13e7,0xdbf2d5ed,0xe12a683d,0xf78efd51,0x989cf9c4,0x82cd85b9 },
29238       { 0xe0cb5d37,0xe23c6db6,0x72ee1a15,0x818aeebd,0x28771b14,0x8212aafd,
29239         0x1def817d,0x7bc221d9,0x9445c51f,0xdac403a2,0x12c3746b,0x711b0517 } },
29240     /* 220 */
29241     { { 0x5ea99ecc,0x0ed9ed48,0xb8cab5e1,0xf799500d,0xb570cbdc,0xa8ec87dc,
29242         0xd35dfaec,0x52cfb2c2,0x6e4d80a4,0x8d31fae2,0xdcdeabe5,0xe6a37dc9 },
29243       { 0x1deca452,0x5d365a34,0x0d68b44e,0x09a5f8a5,0xa60744b1,0x59238ea5,
29244         0xbb4249e9,0xf2fedc0d,0xa909b2e3,0xe395c74e,0x39388250,0xe156d1a5 } },
29245     /* 221 */
29246     { { 0x47181ae9,0xd796b3d0,0x44197808,0xbaf44ba8,0x34cf3fac,0xe6933094,
29247         0xc3bd5c46,0x41aa6ade,0xeed947c6,0x4fda75d8,0x9ea5a525,0xacd9d412 },
29248       { 0xd430301b,0x65cc55a3,0x7b52ea49,0x3c9a5bcf,0x159507f0,0x22d319cf,
29249         0xde74a8dd,0x2ee0b9b5,0x877ac2b6,0x20c26a1e,0x92e7c314,0x387d73da } },
29250     /* 222 */
29251     { { 0x8cd3fdac,0x13c4833e,0x332e5b8e,0x76fcd473,0xe2fe1fd3,0xff671b4b,
29252         0x5d98d8ec,0x4d734e8b,0x514bbc11,0xb1ead3c6,0x7b390494,0xd14ca858 },
29253       { 0x5d2d37e9,0x95a443af,0x00464622,0x73c6ea73,0x15755044,0xa44aeb4b,
29254         0xfab58fee,0xba3f8575,0xdc680a6f,0x9779dbc9,0x7b37ddfc,0xe1ee5f5a } },
29255     /* 223 */
29256     { { 0x12d29f46,0xcd0b4648,0x0ed53137,0x93295b0b,0x80bef6c9,0xbfe26094,
29257         0x54248b00,0xa6565788,0x80e7f9c4,0x69c43fca,0xbe141ea1,0x2190837b },
29258       { 0xa1b26cfb,0x875e159a,0x7affe852,0x90ca9f87,0x92ca598e,0x15e6550d,
29259         0x1938ad11,0xe3e0945d,0x366ef937,0xef7636bb,0xb39869e5,0xb6034d0b } },
29260     /* 224 */
29261     { { 0x26d8356e,0x4d255e30,0xd314626f,0xf83666ed,0xd0c8ed64,0x421ddf61,
29262         0x26677b61,0x96e473c5,0x9e9b18b3,0xdad4af7e,0xa9393f75,0xfceffd4a },
29263       { 0x11c731d5,0x843138a1,0xb2f141d9,0x05bcb3a1,0x617b7671,0x20e1fa95,
29264         0x88ccec7b,0xbefce812,0x90f1b568,0x582073dc,0x1f055cb7,0xf572261a } },
29265     /* 225 */
29266     { { 0x36973088,0xf3148277,0x86a9f980,0xc008e708,0xe046c261,0x1b795947,
29267         0xca76bca0,0xdf1e6a7d,0x71acddf0,0xabafd886,0x1364d8f4,0xff7054d9 },
29268       { 0xe2260594,0x2cf63547,0xd73b277e,0x468a5372,0xef9bd35e,0xc7419e24,
29269         0x24043cc3,0x2b4a1c20,0x890b39cd,0xa28f047a,0x46f9a2e3,0xdca2cea1 } },
29270     /* 226 */
29271     { { 0x53277538,0xab788736,0xcf697738,0xa734e225,0x6b22e2c1,0x66ee1d1e,
29272         0xebe1d212,0x2c615389,0x02bb0766,0xf36cad40,0x3e64f207,0x120885c3 },
29273       { 0x90fbfec2,0x59e77d56,0xd7a574ae,0xf9e781aa,0x5d045e53,0x801410b0,
29274         0xa91b5f0e,0xd3b5f0aa,0x7fbb3521,0xb3d1df00,0xc72bee9a,0x11c4b33e } },
29275     /* 227 */
29276     { { 0x83c3a7f3,0xd32b9832,0x88d8a354,0x8083abcf,0x50f4ec5a,0xdeb16404,
29277         0x641e2907,0x18d747f0,0xf1bbf03e,0x4e8978ae,0x88a0cd89,0x932447dc },
29278       { 0xcf3d5897,0x561e0feb,0x13600e6d,0xfc3a682f,0xd16a6b73,0xc78b9d73,
29279         0xd29bf580,0xe713fede,0x08d69e5c,0x0a225223,0x1ff7fda4,0x3a924a57 } },
29280     /* 228 */
29281     { { 0xb4093bee,0xfb64554c,0xa58c6ec0,0xa6d65a25,0x43d0ed37,0x4126994d,
29282         0x55152d44,0xa5689a51,0x284caa8d,0xb8e5ea8c,0xd1f25538,0x33f05d4f },
29283       { 0x1b615d6e,0xe0fdfe09,0x705507da,0x2ded7e8f,0x17bbcc80,0xdd5631e5,
29284         0x267fd11f,0x4f87453e,0xff89d62d,0xc6da723f,0xe3cda21d,0x55cbcae2 } },
29285     /* 229 */
29286     { { 0x6b4e84f3,0x336bc94e,0x4ef72c35,0x72863031,0xeeb57f99,0x6d85fdee,
29287         0xa42ece1b,0x7f4e3272,0x36f0320a,0x7f86cbb5,0x923331e6,0xf09b6a2b },
29288       { 0x56778435,0x21d3ecf1,0x8323b2d2,0x2977ba99,0x1704bc0f,0x6a1b57fb,
29289         0x389f048a,0xd777cf8b,0xac6b42cd,0x9ce2174f,0x09e6c55a,0x404e2bff } },
29290     /* 230 */
29291     { { 0x204c5ddb,0x9b9b135e,0x3eff550e,0x9dbfe044,0xec3be0f6,0x35eab4bf,
29292         0x0a43e56f,0x8b4c3f0d,0x0e73f9b3,0x4c1c6673,0x2c78c905,0x92ed38bd },
29293       { 0xa386e27c,0xc7003f6a,0xaced8507,0xb9c4f46f,0x59df5464,0xea024ec8,
29294         0x429572ea,0x4af96152,0xe1fc1194,0x279cd5e2,0x281e358c,0xaa376a03 } },
29295     /* 231 */
29296     { { 0x3cdbc95c,0x07859223,0xef2e337a,0xaae1aa6a,0x472a8544,0xc040108d,
29297         0x8d037b7d,0x80c853e6,0x8c7eee24,0xd221315c,0x8ee47752,0x195d3856 },
29298       { 0xdacd7fbe,0xd4b1ba03,0xd3e0c52b,0x4b5ac61e,0x6aab7b52,0x68d3c052,
29299         0x660e3fea,0xf0d7248c,0x3145efb4,0xafdb3f89,0x8f40936d,0xa73fd9a3 } },
29300     /* 232 */
29301     { { 0xbb1b17ce,0x891b9ef3,0xc6127f31,0x14023667,0x305521fd,0x12b2e58d,
29302         0xe3508088,0x3a47e449,0xff751507,0xe49fc84b,0x5310d16e,0x4023f722 },
29303       { 0xb73399fa,0xa608e5ed,0xd532aa3e,0xf12632d8,0x845e8415,0x13a2758e,
29304         0x1fc2d861,0xae4b6f85,0x339d02f2,0x3879f5b1,0x80d99ebd,0x446d22a6 } },
29305     /* 233 */
29306     { { 0x4be164f1,0x0f502302,0x88b81920,0x8d09d2d6,0x984aceff,0x514056f1,
29307         0x75e9e80d,0xa5c4ddf0,0xdf496a93,0x38cb47e6,0x38df6bf7,0x899e1d6b },
29308       { 0xb59eb2a6,0x69e87e88,0x9b47f38b,0x280d9d63,0x3654e955,0x599411ea,
29309         0x969aa581,0xcf8dd4fd,0x530742a7,0xff5c2baf,0x1a373085,0xa4391536 } },
29310     /* 234 */
29311     { { 0xa8a4bdd2,0x6ace72a3,0xb68ef702,0xc656cdd1,0x90c4dad8,0xd4a33e7e,
29312         0x9d951c50,0x4aece08a,0x085d68e6,0xea8005ae,0x6f7502b8,0xfdd7a7d7 },
29313       { 0x98d6fa45,0xce6fb0a6,0x1104eb8c,0x228f8672,0xda09d7dc,0xd23d8787,
29314         0x2ae93065,0x5521428b,0xea56c366,0x95faba3d,0x0a88aca5,0xedbe5039 } },
29315     /* 235 */
29316     { { 0xbfb26c82,0xd64da0ad,0x952c2f9c,0xe5d70b3c,0xf7e77f68,0xf5e8f365,
29317         0x08f2d695,0x7234e002,0xd12e7be6,0xfaf900ee,0x4acf734e,0x27dc6934 },
29318       { 0xc260a46a,0x80e4ff5e,0x2dc31c28,0x7da5ebce,0xca69f552,0x485c5d73,
29319         0x69cc84c2,0xcdfb6b29,0xed6d4eca,0x031c5afe,0x22247637,0xc7bbf4c8 } },
29320     /* 236 */
29321     { { 0x49fe01b2,0x9d5b72c7,0x793a91b8,0x34785186,0xcf460438,0xa3ba3c54,
29322         0x3ab21b6f,0x73e8e43d,0xbe57b8ab,0x50cde8e0,0xdd204264,0x6488b3a7 },
29323       { 0xdddc4582,0xa9e398b3,0x5bec46fe,0x1698c1a9,0x156d3843,0x7f1446ef,
29324         0x770329a2,0x3fd25dd8,0x2c710668,0x05b1221a,0xa72ee6cf,0x65b2dc2a } },
29325     /* 237 */
29326     { { 0xcd021d63,0x21a885f7,0xfea61f08,0x3f344b15,0xc5cf73e6,0xad5ba6dd,
29327         0x227a8b23,0x154d0d8f,0xdc559311,0x9b74373c,0x98620fa1,0x4feab715 },
29328       { 0x7d9ec924,0x5098938e,0x6d47e550,0x84d54a5e,0x1b617506,0x1a2d1bdc,
29329         0x615868a4,0x99fe1782,0x3005a924,0x171da780,0x7d8f79b6,0xa70bf5ed } },
29330     /* 238 */
29331     { { 0xfe2216c5,0x0bc1250d,0x7601b351,0x2c37e250,0xd6f06b7e,0xb6300175,
29332         0x8bfeb9b7,0x4dde8ca1,0xb82f843d,0x4f210432,0xb1ac0afd,0x8d70e2f9 },
29333       { 0xaae91abb,0x25c73b78,0x863028f2,0x0230dca3,0xe5cf30b7,0x8b923ecf,
29334         0x5506f265,0xed754ec2,0x729a5e39,0x8e41b88c,0xbabf889b,0xee67cec2 } },
29335     /* 239 */
29336     { { 0x1be46c65,0xe183acf5,0xe7565d7a,0x9789538f,0xd9627b4e,0x87873391,
29337         0x9f1d9187,0xbf4ac4c1,0x4691f5c8,0x5db99f63,0x74a1fb98,0xa68df803 },
29338       { 0xbf92b5fa,0x3c448ed1,0x3e0bdc32,0xa098c841,0x79bf016c,0x8e74cd55,
29339         0x115e244d,0x5df0d09c,0x3410b66e,0x9418ad01,0x17a02130,0x8b6124cb } },
29340     /* 240 */
29341     { { 0xc26e3392,0x425ec3af,0xa1722e00,0xc07f8470,0xe2356b43,0xdcc28190,
29342         0xb1ef59a6,0x4ed97dff,0xc63028c1,0xc22b3ad1,0x68c18988,0x070723c2 },
29343       { 0x4cf49e7d,0x70da302f,0x3f12a522,0xc5e87c93,0x18594148,0x74acdd1d,
29344         0xca74124c,0xad5f73ab,0xd69fd478,0xe72e4a3e,0x7b117cc3,0x61593868 } },
29345     /* 241 */
29346     { { 0xa9aa0486,0x7b7b9577,0xa063d557,0x6e41fb35,0xda9047d7,0xb017d5c7,
29347         0x68a87ba9,0x8c748280,0xdf08ad93,0xab45fa5c,0x4c288a28,0xcd9fb217 },
29348       { 0x5747843d,0x59544642,0xa56111e3,0x34d64c6c,0x4bfce8d5,0x12e47ea1,
29349         0x6169267f,0x17740e05,0xeed03fb5,0x5c49438e,0x4fc3f513,0x9da30add } },
29350     /* 242 */
29351     { { 0xccfa5200,0xc4e85282,0x6a19b13d,0x2707608f,0xf5726e2f,0xdcb9a53d,
29352         0xe9427de5,0x612407c9,0xd54d582a,0x3e5a17e1,0x655ae118,0xb99877de },
29353       { 0x015254de,0x6f0e972b,0xf0a6f7c5,0x92a56db1,0xa656f8b2,0xd297e4e1,
29354         0xad981983,0x99fe0052,0x07cfed84,0xd3652d2f,0x843c1738,0xc784352e } },
29355     /* 243 */
29356     { { 0x7e9b2d8a,0x6ee90af0,0x57cf1964,0xac8d7018,0x71f28efc,0xf6ed9031,
29357         0x6812b20e,0x7f70d5a9,0xf1c61eee,0x27b557f4,0xc6263758,0xf1c9bd57 },
29358       { 0x2a1a6194,0x5cf7d014,0x1890ab84,0xdd614e0b,0x0e93c2a6,0x3ef9de10,
29359         0xe0cd91c5,0xf98cf575,0x14befc32,0x504ec0c6,0x6279d68c,0xd0513a66 } },
29360     /* 244 */
29361     { { 0xa859fb6a,0xa8eadbad,0xdb283666,0xcf8346e7,0x3e22e355,0x7b35e61a,
29362         0x99639c6b,0x293ece2c,0x56f241c8,0xfa0162e2,0xbf7a1dda,0xd2e6c7b9 },
29363       { 0x40075e63,0xd0de6253,0xf9ec8286,0x2405aa61,0x8fe45494,0x2237830a,
29364         0x364e9c8c,0x4fd01ac7,0x904ba750,0x4d9c3d21,0xaf1b520b,0xd589be14 } },
29365     /* 245 */
29366     { { 0x4662e53b,0x13576a4f,0xf9077676,0x35ec2f51,0x97c0af97,0x66297d13,
29367         0x9e598b58,0xed3201fe,0x5e70f604,0x49bc752a,0xbb12d951,0xb54af535 },
29368       { 0x212c1c76,0x36ea4c2b,0xeb250dfd,0x18f5bbc7,0x9a0a1a46,0xa0d466cc,
29369         0xdac2d917,0x52564da4,0x8e95fab5,0x206559f4,0x9ca67a33,0x7487c190 } },
29370     /* 246 */
29371     { { 0xdde98e9c,0x75abfe37,0x2a411199,0x99b90b26,0xdcdb1f7c,0x1b410996,
29372         0x8b3b5675,0xab346f11,0xf1f8ae1e,0x04852193,0x6b8b98c1,0x1ec4d227 },
29373       { 0x45452baa,0xba3bc926,0xacc4a572,0x387d1858,0xe51f171e,0x9478eff6,
29374         0x931e1c00,0xf357077d,0xe54c8ca8,0xffee77cd,0x551dc9a4,0xfb4892ff } },
29375     /* 247 */
29376     { { 0x2db8dff8,0x5b1bdad0,0x5a2285a2,0xd462f4fd,0xda00b461,0x1d6aad8e,
29377         0x41306d1b,0x43fbefcf,0x6a13fe19,0x428e86f3,0x17f89404,0xc8b2f118 },
29378       { 0xf0d51afb,0x762528aa,0x549b1d06,0xa3e2fea4,0xea3ddf66,0x86fad8f2,
29379         0x4fbdd206,0x0d9ccc4b,0xc189ff5a,0xcde97d4c,0x199f19a6,0xc36793d6 } },
29380     /* 248 */
29381     { { 0x51b85197,0xea38909b,0xb4c92895,0xffb17dd0,0x1ddb3f3f,0x0eb0878b,
29382         0xc57cf0f2,0xb05d28ff,0x1abd57e2,0xd8bde2e7,0xc40c1b20,0x7f2be28d },
29383       { 0x299a2d48,0x6554dca2,0x8377982d,0x5130ba2e,0x1071971a,0x8863205f,
29384         0x7cf2825d,0x15ee6282,0x03748f2b,0xd4b6c57f,0x430385a0,0xa9e3f4da } },
29385     /* 249 */
29386     { { 0x83fbc9c6,0x33eb7cec,0x4541777e,0x24a311c7,0x4f0767fc,0xc81377f7,
29387         0x4ab702da,0x12adae36,0x2a779696,0xb7fcb6db,0x01cea6ad,0x4a6fb284 },
29388       { 0xcdfc73de,0x5e8b1d2a,0x1b02fd32,0xd0efae8d,0xd81d8519,0x3f99c190,
29389         0xfc808971,0x3c18f7fa,0x51b7ae7b,0x41f713e7,0xf07fc3f8,0x0a4b3435 } },
29390     /* 250 */
29391     { { 0x019b7d2e,0x7dda3c4c,0xd4dc4b89,0x631c8d1a,0x1cdb313c,0x5489cd6e,
29392         0x4c07bb06,0xd44aed10,0x75f000d1,0x8f97e13a,0xdda5df4d,0x0e9ee64f },
29393       { 0x3e346910,0xeaa99f3b,0xfa294ad7,0x622f6921,0x0d0b2fe9,0x22aaa20d,
29394         0x1e5881ba,0x4fed2f99,0xc1571802,0x9af3b2d6,0xdc7ee17c,0x919e67a8 } },
29395     /* 251 */
29396     { { 0x76250533,0xc724fe4c,0x7d817ef8,0x8a2080e5,0x172c9751,0xa2afb0f4,
29397         0x17c0702e,0x9b10cdeb,0xc9b7e3e9,0xbf3975e3,0x1cd0cdc5,0x206117df },
29398       { 0xbe05ebd5,0xfb049e61,0x16c782c0,0xeb0bb55c,0xab7fed09,0x13a331b8,
29399         0x632863f0,0xf6c58b1d,0x4d3b6195,0x6264ef6e,0x9a53f116,0x92c51b63 } },
29400     /* 252 */
29401     { { 0x288b364d,0xa57c7bc8,0x7b41e5c4,0x4a562e08,0x698a9a11,0x699d21c6,
29402         0xf3f849b9,0xa4ed9581,0x9eb726ba,0xa223eef3,0xcc2884f9,0x13159c23 },
29403       { 0x3a3f4963,0x73931e58,0x0ada6a81,0x96500389,0x5ab2950b,0x3ee8a1c6,
29404         0x775fab52,0xeedf4949,0x4f2671b6,0x63d652e1,0x3c4e2f55,0xfed4491c } },
29405     /* 253 */
29406     { { 0xf4eb453e,0x335eadc3,0xcadd1a5b,0x5ff74b63,0x5d84a91a,0x6933d0d7,
29407         0xb49ba337,0x9ca3eeb9,0xc04c15b8,0x1f6facce,0xdc09a7e4,0x4ef19326 },
29408       { 0x3dca3233,0x53d2d324,0xa2259d4b,0x0ee40590,0x5546f002,0x18c22edb,
29409         0x09ea6b71,0x92429801,0xb0e91e61,0xaada0add,0x99963c50,0x5fe53ef4 } },
29410     /* 254 */
29411     { { 0x90c28c65,0x372dd06b,0x119ce47d,0x1765242c,0x6b22fc82,0xc041fb80,
29412         0xb0a7ccc1,0x667edf07,0x1261bece,0xc79599e7,0x19cff22a,0xbc69d9ba },
29413       { 0x13c06819,0x009d77cd,0xe282b79d,0x635a66ae,0x225b1be8,0x4edac4a6,
29414         0x524008f9,0x57d4f4e4,0xb056af84,0xee299ac5,0x3a0bc386,0xcc38444c } },
29415     /* 255 */
29416     { { 0xcd4c2356,0x490643b1,0x750547be,0x740a4851,0xd4944c04,0x643eaf29,
29417         0x299a98a0,0xba572479,0xee05fdf9,0x48b29f16,0x089b2d7b,0x33fb4f61 },
29418       { 0xa950f955,0x86704902,0xfedc3ddf,0x97e1034d,0x05fbb6a2,0x211320b6,
29419         0x432299bb,0x23d7b93f,0x8590e4a3,0x1fe1a057,0xf58c0ce6,0x8e1d0586 } },
29420 };
29421 
29422 /* Multiply the base point of P384 by the scalar and return the result.
29423  * If map is true then convert result to affine coordinates.
29424  *
29425  * Stripe implementation.
29426  * Pre-generated: 2^0, 2^48, ...
29427  * Pre-generated: products of all combinations of above.
29428  * 8 doubles and adds (with qz=1)
29429  *
29430  * r     Resulting point.
29431  * k     Scalar to multiply by.
29432  * map   Indicates whether to convert result to affine.
29433  * ct    Constant time required.
29434  * heap  Heap to use for allocation.
29435  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
29436  */
sp_384_ecc_mulmod_base_12(sp_point_384 * r,const sp_digit * k,int map,int ct,void * heap)29437 static int sp_384_ecc_mulmod_base_12(sp_point_384* r, const sp_digit* k,
29438         int map, int ct, void* heap)
29439 {
29440     return sp_384_ecc_mulmod_stripe_12(r, &p384_base, p384_table,
29441                                       k, map, ct, heap);
29442 }
29443 
29444 #endif
29445 
29446 /* Multiply the base point of P384 by the scalar and return the result.
29447  * If map is true then convert result to affine coordinates.
29448  *
29449  * km    Scalar to multiply by.
29450  * r     Resulting point.
29451  * map   Indicates whether to convert result to affine.
29452  * heap  Heap to use for allocation.
29453  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
29454  */
sp_ecc_mulmod_base_384(const mp_int * km,ecc_point * r,int map,void * heap)29455 int sp_ecc_mulmod_base_384(const mp_int* km, ecc_point* r, int map, void* heap)
29456 {
29457 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29458     sp_point_384* point = NULL;
29459     sp_digit* k = NULL;
29460 #else
29461     sp_point_384  point[1];
29462     sp_digit k[12];
29463 #endif
29464     int err = MP_OKAY;
29465 
29466 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29467     point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap,
29468                                          DYNAMIC_TYPE_ECC);
29469     if (point == NULL)
29470         err = MEMORY_E;
29471     if (err == MP_OKAY) {
29472         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
29473                                DYNAMIC_TYPE_ECC);
29474         if (k == NULL)
29475             err = MEMORY_E;
29476     }
29477 #endif
29478 
29479     if (err == MP_OKAY) {
29480         sp_384_from_mp(k, 12, km);
29481 
29482             err = sp_384_ecc_mulmod_base_12(point, k, map, 1, heap);
29483     }
29484     if (err == MP_OKAY) {
29485         err = sp_384_point_to_ecc_point_12(point, r);
29486     }
29487 
29488 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29489     if (k != NULL)
29490         XFREE(k, heap, DYNAMIC_TYPE_ECC);
29491     if (point != NULL)
29492         XFREE(point, heap, DYNAMIC_TYPE_ECC);
29493 #endif
29494 
29495     return err;
29496 }
29497 
29498 /* Multiply the base point of P384 by the scalar, add point a and return
29499  * the result. If map is true then convert result to affine coordinates.
29500  *
29501  * km      Scalar to multiply by.
29502  * am      Point to add to scalar mulitply result.
29503  * inMont  Point to add is in montgomery form.
29504  * r       Resulting point.
29505  * map     Indicates whether to convert result to affine.
29506  * heap    Heap to use for allocation.
29507  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
29508  */
sp_ecc_mulmod_base_add_384(const mp_int * km,const ecc_point * am,int inMont,ecc_point * r,int map,void * heap)29509 int sp_ecc_mulmod_base_add_384(const mp_int* km, const ecc_point* am,
29510         int inMont, ecc_point* r, int map, void* heap)
29511 {
29512 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29513     sp_point_384* point = NULL;
29514     sp_digit* k = NULL;
29515 #else
29516     sp_point_384 point[2];
29517     sp_digit k[12 + 12 * 2 * 6];
29518 #endif
29519     sp_point_384* addP = NULL;
29520     sp_digit* tmp = NULL;
29521     int err = MP_OKAY;
29522 
29523 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29524     point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap,
29525                                          DYNAMIC_TYPE_ECC);
29526     if (point == NULL)
29527         err = MEMORY_E;
29528     if (err == MP_OKAY) {
29529         k = (sp_digit*)XMALLOC(
29530             sizeof(sp_digit) * (12 + 12 * 2 * 6),
29531             heap, DYNAMIC_TYPE_ECC);
29532         if (k == NULL)
29533             err = MEMORY_E;
29534     }
29535 #endif
29536 
29537     if (err == MP_OKAY) {
29538         addP = point + 1;
29539         tmp = k + 12;
29540 
29541         sp_384_from_mp(k, 12, km);
29542         sp_384_point_from_ecc_point_12(addP, am);
29543     }
29544     if ((err == MP_OKAY) && (!inMont)) {
29545         err = sp_384_mod_mul_norm_12(addP->x, addP->x, p384_mod);
29546     }
29547     if ((err == MP_OKAY) && (!inMont)) {
29548         err = sp_384_mod_mul_norm_12(addP->y, addP->y, p384_mod);
29549     }
29550     if ((err == MP_OKAY) && (!inMont)) {
29551         err = sp_384_mod_mul_norm_12(addP->z, addP->z, p384_mod);
29552     }
29553     if (err == MP_OKAY) {
29554             err = sp_384_ecc_mulmod_base_12(point, k, 0, 0, heap);
29555     }
29556     if (err == MP_OKAY) {
29557             sp_384_proj_point_add_12(point, point, addP, tmp);
29558 
29559         if (map) {
29560                 sp_384_map_12(point, point, tmp);
29561         }
29562 
29563         err = sp_384_point_to_ecc_point_12(point, r);
29564     }
29565 
29566 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29567     if (k != NULL)
29568         XFREE(k, heap, DYNAMIC_TYPE_ECC);
29569     if (point)
29570         XFREE(point, heap, DYNAMIC_TYPE_ECC);
29571 #endif
29572 
29573     return err;
29574 }
29575 
29576 #if defined(WOLFSSL_VALIDATE_ECC_KEYGEN) || defined(HAVE_ECC_SIGN) || \
29577                                                         defined(HAVE_ECC_VERIFY)
29578 /* Returns 1 if the number of zero.
29579  * Implementation is constant time.
29580  *
29581  * a  Number to check.
29582  * returns 1 if the number is zero and 0 otherwise.
29583  */
sp_384_iszero_12(const sp_digit * a)29584 static int sp_384_iszero_12(const sp_digit* a)
29585 {
29586     return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
29587             a[8] | a[9] | a[10] | a[11]) == 0;
29588 }
29589 
29590 #endif /* WOLFSSL_VALIDATE_ECC_KEYGEN | HAVE_ECC_SIGN | HAVE_ECC_VERIFY */
29591 /* Add 1 to a. (a = a + 1)
29592  *
29593  * a  A single precision integer.
29594  */
sp_384_add_one_12(sp_digit * a)29595 SP_NOINLINE static void sp_384_add_one_12(sp_digit* a)
29596 {
29597     __asm__ __volatile__ (
29598         "mov	r2, #1\n\t"
29599         "ldr	r1, [%[a], #0]\n\t"
29600         "adds	r1, r1, r2\n\t"
29601         "mov	r2, #0\n\t"
29602         "str	r1, [%[a], #0]\n\t"
29603         "ldr	r1, [%[a], #4]\n\t"
29604         "adcs	r1, r1, r2\n\t"
29605         "str	r1, [%[a], #4]\n\t"
29606         "ldr	r1, [%[a], #8]\n\t"
29607         "adcs	r1, r1, r2\n\t"
29608         "str	r1, [%[a], #8]\n\t"
29609         "ldr	r1, [%[a], #12]\n\t"
29610         "adcs	r1, r1, r2\n\t"
29611         "str	r1, [%[a], #12]\n\t"
29612         "ldr	r1, [%[a], #16]\n\t"
29613         "adcs	r1, r1, r2\n\t"
29614         "str	r1, [%[a], #16]\n\t"
29615         "ldr	r1, [%[a], #20]\n\t"
29616         "adcs	r1, r1, r2\n\t"
29617         "str	r1, [%[a], #20]\n\t"
29618         "ldr	r1, [%[a], #24]\n\t"
29619         "adcs	r1, r1, r2\n\t"
29620         "str	r1, [%[a], #24]\n\t"
29621         "ldr	r1, [%[a], #28]\n\t"
29622         "adcs	r1, r1, r2\n\t"
29623         "str	r1, [%[a], #28]\n\t"
29624         "ldr	r1, [%[a], #32]\n\t"
29625         "adcs	r1, r1, r2\n\t"
29626         "str	r1, [%[a], #32]\n\t"
29627         "ldr	r1, [%[a], #36]\n\t"
29628         "adcs	r1, r1, r2\n\t"
29629         "str	r1, [%[a], #36]\n\t"
29630         "ldr	r1, [%[a], #40]\n\t"
29631         "adcs	r1, r1, r2\n\t"
29632         "str	r1, [%[a], #40]\n\t"
29633         "ldr	r1, [%[a], #44]\n\t"
29634         "adcs	r1, r1, r2\n\t"
29635         "str	r1, [%[a], #44]\n\t"
29636         :
29637         : [a] "r" (a)
29638         : "memory", "r1", "r2"
29639     );
29640 }
29641 
29642 /* Read big endian unsigned byte array into r.
29643  *
29644  * r  A single precision integer.
29645  * size  Maximum number of bytes to convert
29646  * a  Byte array.
29647  * n  Number of bytes in array to read.
29648  */
sp_384_from_bin(sp_digit * r,int size,const byte * a,int n)29649 static void sp_384_from_bin(sp_digit* r, int size, const byte* a, int n)
29650 {
29651     int i;
29652     int j = 0;
29653     word32 s = 0;
29654 
29655     r[0] = 0;
29656     for (i = n-1; i >= 0; i--) {
29657         r[j] |= (((sp_digit)a[i]) << s);
29658         if (s >= 24U) {
29659             r[j] &= 0xffffffff;
29660             s = 32U - s;
29661             if (j + 1 >= size) {
29662                 break;
29663             }
29664             r[++j] = (sp_digit)a[i] >> s;
29665             s = 8U - s;
29666         }
29667         else {
29668             s += 8U;
29669         }
29670     }
29671 
29672     for (j++; j < size; j++) {
29673         r[j] = 0;
29674     }
29675 }
29676 
29677 /* Generates a scalar that is in the range 1..order-1.
29678  *
29679  * rng  Random number generator.
29680  * k    Scalar value.
29681  * returns RNG failures, MEMORY_E when memory allocation fails and
29682  * MP_OKAY on success.
29683  */
sp_384_ecc_gen_k_12(WC_RNG * rng,sp_digit * k)29684 static int sp_384_ecc_gen_k_12(WC_RNG* rng, sp_digit* k)
29685 {
29686     int err;
29687     byte buf[48];
29688 
29689     do {
29690         err = wc_RNG_GenerateBlock(rng, buf, sizeof(buf));
29691         if (err == 0) {
29692             sp_384_from_bin(k, 12, buf, (int)sizeof(buf));
29693             if (sp_384_cmp_12(k, p384_order2) <= 0) {
29694                 sp_384_add_one_12(k);
29695                 break;
29696             }
29697         }
29698     }
29699     while (err == 0);
29700 
29701     return err;
29702 }
29703 
29704 /* Makes a random EC key pair.
29705  *
29706  * rng   Random number generator.
29707  * priv  Generated private value.
29708  * pub   Generated public point.
29709  * heap  Heap to use for allocation.
29710  * returns ECC_INF_E when the point does not have the correct order, RNG
29711  * failures, MEMORY_E when memory allocation fails and MP_OKAY on success.
29712  */
sp_ecc_make_key_384(WC_RNG * rng,mp_int * priv,ecc_point * pub,void * heap)29713 int sp_ecc_make_key_384(WC_RNG* rng, mp_int* priv, ecc_point* pub, void* heap)
29714 {
29715 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29716     sp_point_384* point = NULL;
29717     sp_digit* k = NULL;
29718 #else
29719     #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
29720     sp_point_384 point[2];
29721     #else
29722     sp_point_384 point[1];
29723     #endif
29724     sp_digit k[12];
29725 #endif
29726 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
29727     sp_point_384* infinity = NULL;
29728 #endif
29729     int err = MP_OKAY;
29730 
29731 
29732     (void)heap;
29733 
29734 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29735     #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
29736     point = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap, DYNAMIC_TYPE_ECC);
29737     #else
29738     point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap, DYNAMIC_TYPE_ECC);
29739     #endif
29740     if (point == NULL)
29741         err = MEMORY_E;
29742     if (err == MP_OKAY) {
29743         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
29744                                DYNAMIC_TYPE_ECC);
29745         if (k == NULL)
29746             err = MEMORY_E;
29747     }
29748 #endif
29749 
29750     if (err == MP_OKAY) {
29751     #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
29752         infinity = point + 1;
29753     #endif
29754 
29755         err = sp_384_ecc_gen_k_12(rng, k);
29756     }
29757     if (err == MP_OKAY) {
29758             err = sp_384_ecc_mulmod_base_12(point, k, 1, 1, NULL);
29759     }
29760 
29761 #ifdef WOLFSSL_VALIDATE_ECC_KEYGEN
29762     if (err == MP_OKAY) {
29763             err = sp_384_ecc_mulmod_12(infinity, point, p384_order, 1, 1, NULL);
29764     }
29765     if (err == MP_OKAY) {
29766         if (sp_384_iszero_12(point->x) || sp_384_iszero_12(point->y)) {
29767             err = ECC_INF_E;
29768         }
29769     }
29770 #endif
29771 
29772     if (err == MP_OKAY) {
29773         err = sp_384_to_mp(k, priv);
29774     }
29775     if (err == MP_OKAY) {
29776         err = sp_384_point_to_ecc_point_12(point, pub);
29777     }
29778 
29779 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29780     if (k != NULL)
29781         XFREE(k, heap, DYNAMIC_TYPE_ECC);
29782     if (point != NULL) {
29783         /* point is not sensitive, so no need to zeroize */
29784         XFREE(point, heap, DYNAMIC_TYPE_ECC);
29785     }
29786 #endif
29787 
29788     return err;
29789 }
29790 
29791 #ifdef HAVE_ECC_DHE
29792 /* Write r as big endian to byte array.
29793  * Fixed length number of bytes written: 48
29794  *
29795  * r  A single precision integer.
29796  * a  Byte array.
29797  */
sp_384_to_bin_12(sp_digit * r,byte * a)29798 static void sp_384_to_bin_12(sp_digit* r, byte* a)
29799 {
29800     int i;
29801     int j;
29802     int s = 0;
29803     int b;
29804 
29805     j = 384 / 8 - 1;
29806     a[j] = 0;
29807     for (i=0; i<12 && j>=0; i++) {
29808         b = 0;
29809         /* lint allow cast of mismatch sp_digit and int */
29810         a[j--] |= (byte)(r[i] << s); /*lint !e9033*/
29811         b += 8 - s;
29812         if (j < 0) {
29813             break;
29814         }
29815         while (b < 32) {
29816             a[j--] = (byte)(r[i] >> b);
29817             b += 8;
29818             if (j < 0) {
29819                 break;
29820             }
29821         }
29822         s = 8 - (b - 32);
29823         if (j >= 0) {
29824             a[j] = 0;
29825         }
29826         if (s != 0) {
29827             j++;
29828         }
29829     }
29830 }
29831 
29832 /* Multiply the point by the scalar and serialize the X ordinate.
29833  * The number is 0 padded to maximum size on output.
29834  *
29835  * priv    Scalar to multiply the point by.
29836  * pub     Point to multiply.
29837  * out     Buffer to hold X ordinate.
29838  * outLen  On entry, size of the buffer in bytes.
29839  *         On exit, length of data in buffer in bytes.
29840  * heap    Heap to use for allocation.
29841  * returns BUFFER_E if the buffer is to small for output size,
29842  * MEMORY_E when memory allocation fails and MP_OKAY on success.
29843  */
sp_ecc_secret_gen_384(const mp_int * priv,const ecc_point * pub,byte * out,word32 * outLen,void * heap)29844 int sp_ecc_secret_gen_384(const mp_int* priv, const ecc_point* pub, byte* out,
29845                           word32* outLen, void* heap)
29846 {
29847 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29848     sp_point_384* point = NULL;
29849     sp_digit* k = NULL;
29850 #else
29851     sp_point_384 point[1];
29852     sp_digit k[12];
29853 #endif
29854     int err = MP_OKAY;
29855 
29856     if (*outLen < 48U) {
29857         err = BUFFER_E;
29858     }
29859 
29860 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29861     if (err == MP_OKAY) {
29862         point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap,
29863                                          DYNAMIC_TYPE_ECC);
29864         if (point == NULL)
29865             err = MEMORY_E;
29866     }
29867     if (err == MP_OKAY) {
29868         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
29869                                DYNAMIC_TYPE_ECC);
29870         if (k == NULL)
29871             err = MEMORY_E;
29872     }
29873 #endif
29874 
29875     if (err == MP_OKAY) {
29876         sp_384_from_mp(k, 12, priv);
29877         sp_384_point_from_ecc_point_12(point, pub);
29878             err = sp_384_ecc_mulmod_12(point, point, k, 1, 1, heap);
29879     }
29880     if (err == MP_OKAY) {
29881         sp_384_to_bin_12(point->x, out);
29882         *outLen = 48;
29883     }
29884 
29885 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
29886     if (k != NULL)
29887         XFREE(k, heap, DYNAMIC_TYPE_ECC);
29888     if (point != NULL)
29889         XFREE(point, heap, DYNAMIC_TYPE_ECC);
29890 #endif
29891 
29892     return err;
29893 }
29894 #endif /* HAVE_ECC_DHE */
29895 
29896 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
29897 #endif
29898 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
29899 #ifdef WOLFSSL_SP_SMALL
29900 /* Sub b from a into a. (a -= b)
29901  *
29902  * a  A single precision integer.
29903  * b  A single precision integer.
29904  */
sp_384_sub_in_place_12(sp_digit * a,const sp_digit * b)29905 SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
29906         const sp_digit* b)
29907 {
29908     sp_digit c = 0;
29909     __asm__ __volatile__ (
29910         "mov	r8, %[a]\n\t"
29911         "add	r8, r8, #48\n\t"
29912         "\n1:\n\t"
29913         "mov	r5, #0\n\t"
29914         "subs	r5, r5, %[c]\n\t"
29915         "ldr	r3, [%[a]]\n\t"
29916         "ldr	r4, [%[a], #4]\n\t"
29917         "ldr	r5, [%[b]]\n\t"
29918         "ldr	r6, [%[b], #4]\n\t"
29919         "sbcs	r3, r3, r5\n\t"
29920         "sbcs	r4, r4, r6\n\t"
29921         "str	r3, [%[a]]\n\t"
29922         "str	r4, [%[a], #4]\n\t"
29923         "sbc	%[c], %[c], %[c]\n\t"
29924         "add	%[a], %[a], #8\n\t"
29925         "add	%[b], %[b], #8\n\t"
29926         "cmp	%[a], r8\n\t"
29927 #ifdef __GNUC__
29928         "bne	1b\n\t"
29929 #else
29930         "bne.n	1b\n\t"
29931 #endif /* __GNUC__ */
29932         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
29933         :
29934         : "memory", "r3", "r4", "r5", "r6", "r8"
29935     );
29936 
29937     return c;
29938 }
29939 
29940 #else
29941 /* Sub b from a into r. (r = a - b)
29942  *
29943  * r  A single precision integer.
29944  * a  A single precision integer.
29945  * b  A single precision integer.
29946  */
sp_384_sub_in_place_12(sp_digit * a,const sp_digit * b)29947 SP_NOINLINE static sp_digit sp_384_sub_in_place_12(sp_digit* a,
29948         const sp_digit* b)
29949 {
29950     sp_digit c = 0;
29951 
29952     __asm__ __volatile__ (
29953         "ldm	%[a], {r3, r4}\n\t"
29954         "ldm	%[b]!, {r5, r6}\n\t"
29955         "subs	r3, r3, r5\n\t"
29956         "sbcs	r4, r4, r6\n\t"
29957         "stm	%[a]!, {r3, r4}\n\t"
29958         "ldm	%[a], {r3, r4}\n\t"
29959         "ldm	%[b]!, {r5, r6}\n\t"
29960         "sbcs	r3, r3, r5\n\t"
29961         "sbcs	r4, r4, r6\n\t"
29962         "stm	%[a]!, {r3, r4}\n\t"
29963         "ldm	%[a], {r3, r4}\n\t"
29964         "ldm	%[b]!, {r5, r6}\n\t"
29965         "sbcs	r3, r3, r5\n\t"
29966         "sbcs	r4, r4, r6\n\t"
29967         "stm	%[a]!, {r3, r4}\n\t"
29968         "ldm	%[a], {r3, r4}\n\t"
29969         "ldm	%[b]!, {r5, r6}\n\t"
29970         "sbcs	r3, r3, r5\n\t"
29971         "sbcs	r4, r4, r6\n\t"
29972         "stm	%[a]!, {r3, r4}\n\t"
29973         "ldm	%[a], {r3, r4}\n\t"
29974         "ldm	%[b]!, {r5, r6}\n\t"
29975         "sbcs	r3, r3, r5\n\t"
29976         "sbcs	r4, r4, r6\n\t"
29977         "stm	%[a]!, {r3, r4}\n\t"
29978         "ldm	%[a], {r3, r4}\n\t"
29979         "ldm	%[b]!, {r5, r6}\n\t"
29980         "sbcs	r3, r3, r5\n\t"
29981         "sbcs	r4, r4, r6\n\t"
29982         "stm	%[a]!, {r3, r4}\n\t"
29983         "sbc	%[c], %[c], %[c]\n\t"
29984         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
29985         :
29986         : "memory", "r3", "r4", "r5", "r6"
29987     );
29988 
29989     return c;
29990 }
29991 
29992 #endif /* WOLFSSL_SP_SMALL */
29993 /* Mul a by digit b into r. (r = a * b)
29994  *
29995  * r  A single precision integer.
29996  * a  A single precision integer.
29997  * b  A single precision digit.
29998  */
sp_384_mul_d_12(sp_digit * r,const sp_digit * a,sp_digit b)29999 SP_NOINLINE static void sp_384_mul_d_12(sp_digit* r, const sp_digit* a,
30000         sp_digit b)
30001 {
30002     __asm__ __volatile__ (
30003         "add	r9, %[a], #48\n\t"
30004         /* A[0] * B */
30005         "ldr	r6, [%[a]], #4\n\t"
30006         "umull	r5, r3, r6, %[b]\n\t"
30007         "mov	r4, #0\n\t"
30008         "str	r5, [%[r]], #4\n\t"
30009         /* A[0] * B - Done */
30010         "\n1:\n\t"
30011         "mov	r5, #0\n\t"
30012         /* A[] * B */
30013         "ldr	r6, [%[a]], #4\n\t"
30014         "umull	r6, r8, r6, %[b]\n\t"
30015         "adds	r3, r3, r6\n\t"
30016         "adcs 	r4, r4, r8\n\t"
30017         "adc	r5, r5, #0\n\t"
30018         /* A[] * B - Done */
30019         "str	r3, [%[r]], #4\n\t"
30020         "mov	r3, r4\n\t"
30021         "mov	r4, r5\n\t"
30022         "cmp	%[a], r9\n\t"
30023 #ifdef __GNUC__
30024         "blt	1b\n\t"
30025 #else
30026         "blt.n	1b\n\t"
30027 #endif /* __GNUC__ */
30028         "str	r3, [%[r]]\n\t"
30029         : [r] "+r" (r), [a] "+r" (a)
30030         : [b] "r" (b)
30031         : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
30032     );
30033 }
30034 
30035 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
30036  *
30037  * d1   The high order half of the number to divide.
30038  * d0   The low order half of the number to divide.
30039  * div  The dividend.
30040  * returns the result of the division.
30041  *
30042  * Note that this is an approximate div. It may give an answer 1 larger.
30043  */
div_384_word_12(sp_digit d1,sp_digit d0,sp_digit div)30044 SP_NOINLINE static sp_digit div_384_word_12(sp_digit d1, sp_digit d0,
30045         sp_digit div)
30046 {
30047     sp_digit r = 0;
30048 
30049     __asm__ __volatile__ (
30050         "lsr	r6, %[div], #16\n\t"
30051         "add	r6, r6, #1\n\t"
30052         "udiv	r4, %[d1], r6\n\t"
30053         "lsl	r8, r4, #16\n\t"
30054         "umull	r4, r5, %[div], r8\n\t"
30055         "subs	%[d0], %[d0], r4\n\t"
30056         "sbc	%[d1], %[d1], r5\n\t"
30057         "udiv	r5, %[d1], r6\n\t"
30058         "lsl	r4, r5, #16\n\t"
30059         "add	r8, r8, r4\n\t"
30060         "umull	r4, r5, %[div], r4\n\t"
30061         "subs	%[d0], %[d0], r4\n\t"
30062         "sbc	%[d1], %[d1], r5\n\t"
30063         "lsl	r4, %[d1], #16\n\t"
30064         "orr	r4, r4, %[d0], lsr #16\n\t"
30065         "udiv	r4, r4, r6\n\t"
30066         "add	r8, r8, r4\n\t"
30067         "umull	r4, r5, %[div], r4\n\t"
30068         "subs	%[d0], %[d0], r4\n\t"
30069         "sbc	%[d1], %[d1], r5\n\t"
30070         "lsl	r4, %[d1], #16\n\t"
30071         "orr	r4, r4, %[d0], lsr #16\n\t"
30072         "udiv	r4, r4, r6\n\t"
30073         "add	r8, r8, r4\n\t"
30074         "umull	r4, r5, %[div], r4\n\t"
30075         "subs	%[d0], %[d0], r4\n\t"
30076         "sbc	%[d1], %[d1], r5\n\t"
30077         "udiv	r4, %[d0], %[div]\n\t"
30078         "add	r8, r8, r4\n\t"
30079         "mov	%[r], r8\n\t"
30080         : [r] "+r" (r)
30081         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
30082         : "r4", "r5", "r6", "r8"
30083     );
30084     return r;
30085 }
30086 
30087 /* AND m into each word of a and store in r.
30088  *
30089  * r  A single precision integer.
30090  * a  A single precision integer.
30091  * m  Mask to AND against each digit.
30092  */
sp_384_mask_12(sp_digit * r,const sp_digit * a,sp_digit m)30093 static void sp_384_mask_12(sp_digit* r, const sp_digit* a, sp_digit m)
30094 {
30095 #ifdef WOLFSSL_SP_SMALL
30096     int i;
30097 
30098     for (i=0; i<12; i++) {
30099         r[i] = a[i] & m;
30100     }
30101 #else
30102     r[0] = a[0] & m;
30103     r[1] = a[1] & m;
30104     r[2] = a[2] & m;
30105     r[3] = a[3] & m;
30106     r[4] = a[4] & m;
30107     r[5] = a[5] & m;
30108     r[6] = a[6] & m;
30109     r[7] = a[7] & m;
30110     r[8] = a[8] & m;
30111     r[9] = a[9] & m;
30112     r[10] = a[10] & m;
30113     r[11] = a[11] & m;
30114 #endif
30115 }
30116 
30117 /* Divide d in a and put remainder into r (m*d + r = a)
30118  * m is not calculated as it is not needed at this time.
30119  *
30120  * a  Number to be divided.
30121  * d  Number to divide with.
30122  * m  Multiplier result.
30123  * r  Remainder from the division.
30124  * returns MP_OKAY indicating success.
30125  */
sp_384_div_12(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)30126 static WC_INLINE int sp_384_div_12(const sp_digit* a, const sp_digit* d, sp_digit* m,
30127         sp_digit* r)
30128 {
30129     sp_digit t1[24], t2[13];
30130     sp_digit div, r1;
30131     int i;
30132 
30133     (void)m;
30134 
30135     div = d[11];
30136     XMEMCPY(t1, a, sizeof(*t1) * 2 * 12);
30137     for (i=11; i>=0; i--) {
30138         sp_digit hi = t1[12 + i] - (t1[12 + i] == div);
30139         r1 = div_384_word_12(hi, t1[12 + i - 1], div);
30140 
30141         sp_384_mul_d_12(t2, d, r1);
30142         t1[12 + i] += sp_384_sub_in_place_12(&t1[i], t2);
30143         t1[12 + i] -= t2[12];
30144         sp_384_mask_12(t2, d, t1[12 + i]);
30145         t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
30146         sp_384_mask_12(t2, d, t1[12 + i]);
30147         t1[12 + i] += sp_384_add_12(&t1[i], &t1[i], t2);
30148     }
30149 
30150     r1 = sp_384_cmp_12(t1, d) >= 0;
30151     sp_384_cond_sub_12(r, t1, d, (sp_digit)0 - r1);
30152 
30153     return MP_OKAY;
30154 }
30155 
30156 /* Reduce a modulo m into r. (r = a mod m)
30157  *
30158  * r  A single precision number that is the reduced result.
30159  * a  A single precision number that is to be reduced.
30160  * m  A single precision number that is the modulus to reduce with.
30161  * returns MP_OKAY indicating success.
30162  */
sp_384_mod_12(sp_digit * r,const sp_digit * a,const sp_digit * m)30163 static WC_INLINE int sp_384_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
30164 {
30165     return sp_384_div_12(a, m, NULL, r);
30166 }
30167 
30168 #endif
30169 #if defined(HAVE_ECC_SIGN) || defined(HAVE_ECC_VERIFY)
30170 /* Multiply two number mod the order of P384 curve. (r = a * b mod order)
30171  *
30172  * r  Result of the multiplication.
30173  * a  First operand of the multiplication.
30174  * b  Second operand of the multiplication.
30175  */
sp_384_mont_mul_order_12(sp_digit * r,const sp_digit * a,const sp_digit * b)30176 static void sp_384_mont_mul_order_12(sp_digit* r, const sp_digit* a, const sp_digit* b)
30177 {
30178     sp_384_mul_12(r, a, b);
30179     sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
30180 }
30181 
30182 #if defined(HAVE_ECC_SIGN) || (defined(HAVE_ECC_VERIFY) && defined(WOLFSSL_SP_SMALL))
30183 #ifdef WOLFSSL_SP_SMALL
30184 /* Order-2 for the P384 curve. */
30185 static const uint32_t p384_order_minus_2[12] = {
30186     0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U,
30187     0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU,0xffffffffU
30188 };
30189 #else
30190 /* The low half of the order-2 of the P384 curve. */
30191 static const uint32_t p384_order_low[6] = {
30192     0xccc52971U,0xecec196aU,0x48b0a77aU,0x581a0db2U,0xf4372ddfU,0xc7634d81U
30193 };
30194 #endif /* WOLFSSL_SP_SMALL */
30195 
30196 /* Square number mod the order of P384 curve. (r = a * a mod order)
30197  *
30198  * r  Result of the squaring.
30199  * a  Number to square.
30200  */
sp_384_mont_sqr_order_12(sp_digit * r,const sp_digit * a)30201 static void sp_384_mont_sqr_order_12(sp_digit* r, const sp_digit* a)
30202 {
30203     sp_384_sqr_12(r, a);
30204     sp_384_mont_reduce_order_12(r, p384_order, p384_mp_order);
30205 }
30206 
30207 #ifndef WOLFSSL_SP_SMALL
30208 /* Square number mod the order of P384 curve a number of times.
30209  * (r = a ^ n mod order)
30210  *
30211  * r  Result of the squaring.
30212  * a  Number to square.
30213  */
sp_384_mont_sqr_n_order_12(sp_digit * r,const sp_digit * a,int n)30214 static void sp_384_mont_sqr_n_order_12(sp_digit* r, const sp_digit* a, int n)
30215 {
30216     int i;
30217 
30218     sp_384_mont_sqr_order_12(r, a);
30219     for (i=1; i<n; i++) {
30220         sp_384_mont_sqr_order_12(r, r);
30221     }
30222 }
30223 #endif /* !WOLFSSL_SP_SMALL */
30224 
30225 /* Invert the number, in Montgomery form, modulo the order of the P384 curve.
30226  * (r = 1 / a mod order)
30227  *
30228  * r   Inverse result.
30229  * a   Number to invert.
30230  * td  Temporary data.
30231  */
30232 
30233 #ifdef WOLFSSL_SP_NONBLOCK
30234 typedef struct sp_384_mont_inv_order_12_ctx {
30235     int state;
30236     int i;
30237 } sp_384_mont_inv_order_12_ctx;
sp_384_mont_inv_order_12_nb(sp_ecc_ctx_t * sp_ctx,sp_digit * r,const sp_digit * a,sp_digit * t)30238 static int sp_384_mont_inv_order_12_nb(sp_ecc_ctx_t* sp_ctx, sp_digit* r, const sp_digit* a,
30239         sp_digit* t)
30240 {
30241     int err = FP_WOULDBLOCK;
30242     sp_384_mont_inv_order_12_ctx* ctx = (sp_384_mont_inv_order_12_ctx*)sp_ctx;
30243 
30244     typedef char ctx_size_test[sizeof(sp_384_mont_inv_order_12_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
30245     (void)sizeof(ctx_size_test);
30246 
30247     switch (ctx->state) {
30248     case 0:
30249         XMEMCPY(t, a, sizeof(sp_digit) * 12);
30250         ctx->i = 382;
30251         ctx->state = 1;
30252         break;
30253     case 1:
30254         sp_384_mont_sqr_order_12(t, t);
30255         ctx->state = 2;
30256         break;
30257     case 2:
30258         if ((p384_order_minus_2[ctx->i / 32] & ((sp_int_digit)1 << (ctx->i % 32))) != 0) {
30259             sp_384_mont_mul_order_12(t, t, a);
30260         }
30261         ctx->i--;
30262         ctx->state = (ctx->i == 0) ? 3 : 1;
30263         break;
30264     case 3:
30265         XMEMCPY(r, t, sizeof(sp_digit) * 12U);
30266         err = MP_OKAY;
30267         break;
30268     }
30269     return err;
30270 }
30271 #endif /* WOLFSSL_SP_NONBLOCK */
30272 
sp_384_mont_inv_order_12(sp_digit * r,const sp_digit * a,sp_digit * td)30273 static void sp_384_mont_inv_order_12(sp_digit* r, const sp_digit* a,
30274         sp_digit* td)
30275 {
30276 #ifdef WOLFSSL_SP_SMALL
30277     sp_digit* t = td;
30278     int i;
30279 
30280     XMEMCPY(t, a, sizeof(sp_digit) * 12);
30281     for (i=382; i>=0; i--) {
30282         sp_384_mont_sqr_order_12(t, t);
30283         if ((p384_order_minus_2[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
30284             sp_384_mont_mul_order_12(t, t, a);
30285         }
30286     }
30287     XMEMCPY(r, t, sizeof(sp_digit) * 12U);
30288 #else
30289     sp_digit* t = td;
30290     sp_digit* t2 = td + 2 * 12;
30291     sp_digit* t3 = td + 4 * 12;
30292     int i;
30293 
30294     /* t = a^2 */
30295     sp_384_mont_sqr_order_12(t, a);
30296     /* t = a^3 = t * a */
30297     sp_384_mont_mul_order_12(t, t, a);
30298     /* t2= a^c = t ^ 2 ^ 2 */
30299     sp_384_mont_sqr_n_order_12(t2, t, 2);
30300     /* t = a^f = t2 * t */
30301     sp_384_mont_mul_order_12(t, t2, t);
30302     /* t2= a^f0 = t ^ 2 ^ 4 */
30303     sp_384_mont_sqr_n_order_12(t2, t, 4);
30304     /* t = a^ff = t2 * t */
30305     sp_384_mont_mul_order_12(t, t2, t);
30306     /* t2= a^ff00 = t ^ 2 ^ 8 */
30307     sp_384_mont_sqr_n_order_12(t2, t, 8);
30308     /* t3= a^ffff = t2 * t */
30309     sp_384_mont_mul_order_12(t3, t2, t);
30310     /* t2= a^ffff0000 = t3 ^ 2 ^ 16 */
30311     sp_384_mont_sqr_n_order_12(t2, t3, 16);
30312     /* t = a^ffffffff = t2 * t3 */
30313     sp_384_mont_mul_order_12(t, t2, t3);
30314     /* t2= a^ffffffff0000 = t ^ 2 ^ 16  */
30315     sp_384_mont_sqr_n_order_12(t2, t, 16);
30316     /* t = a^ffffffffffff = t2 * t3 */
30317     sp_384_mont_mul_order_12(t, t2, t3);
30318     /* t2= a^ffffffffffff000000000000 = t ^ 2 ^ 48  */
30319     sp_384_mont_sqr_n_order_12(t2, t, 48);
30320     /* t= a^fffffffffffffffffffffffff = t2 * t */
30321     sp_384_mont_mul_order_12(t, t2, t);
30322     /* t2= a^ffffffffffffffffffffffff000000000000000000000000 */
30323     sp_384_mont_sqr_n_order_12(t2, t, 96);
30324     /* t2= a^ffffffffffffffffffffffffffffffffffffffffffffffff = t2 * t */
30325     sp_384_mont_mul_order_12(t2, t2, t);
30326     for (i=191; i>=1; i--) {
30327         sp_384_mont_sqr_order_12(t2, t2);
30328         if ((p384_order_low[i / 32] & ((sp_int_digit)1 << (i % 32))) != 0) {
30329             sp_384_mont_mul_order_12(t2, t2, a);
30330         }
30331     }
30332     sp_384_mont_sqr_order_12(t2, t2);
30333     sp_384_mont_mul_order_12(r, t2, a);
30334 #endif /* WOLFSSL_SP_SMALL */
30335 }
30336 
30337 #endif /* HAVE_ECC_SIGN || (HAVE_ECC_VERIFY && WOLFSSL_SP_SMALL) */
30338 #endif /* HAVE_ECC_SIGN | HAVE_ECC_VERIFY */
30339 #ifdef HAVE_ECC_SIGN
30340 #ifndef SP_ECC_MAX_SIG_GEN
30341 #define SP_ECC_MAX_SIG_GEN  64
30342 #endif
30343 
30344 /* Calculate second signature value S from R, k and private value.
30345  *
30346  * s = (r * x + e) / k
30347  *
30348  * s    Signature value.
30349  * r    First signature value.
30350  * k    Ephemeral private key.
30351  * x    Private key as a number.
30352  * e    Hash of message as a number.
30353  * tmp  Temporary storage for intermediate numbers.
30354  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
30355  */
sp_384_calc_s_12(sp_digit * s,const sp_digit * r,sp_digit * k,sp_digit * x,const sp_digit * e,sp_digit * tmp)30356 static int sp_384_calc_s_12(sp_digit* s, const sp_digit* r, sp_digit* k,
30357     sp_digit* x, const sp_digit* e, sp_digit* tmp)
30358 {
30359     int err;
30360     sp_digit carry;
30361     sp_int32 c;
30362     sp_digit* kInv = k;
30363 
30364     /* Conv k to Montgomery form (mod order) */
30365         sp_384_mul_12(k, k, p384_norm_order);
30366     err = sp_384_mod_12(k, k, p384_order);
30367     if (err == MP_OKAY) {
30368         sp_384_norm_12(k);
30369 
30370         /* kInv = 1/k mod order */
30371             sp_384_mont_inv_order_12(kInv, k, tmp);
30372         sp_384_norm_12(kInv);
30373 
30374         /* s = r * x + e */
30375             sp_384_mul_12(x, x, r);
30376         err = sp_384_mod_12(x, x, p384_order);
30377     }
30378     if (err == MP_OKAY) {
30379         sp_384_norm_12(x);
30380         carry = sp_384_add_12(s, e, x);
30381         sp_384_cond_sub_12(s, s, p384_order, 0 - carry);
30382         sp_384_norm_12(s);
30383         c = sp_384_cmp_12(s, p384_order);
30384         sp_384_cond_sub_12(s, s, p384_order,
30385             (sp_digit)0 - (sp_digit)(c >= 0));
30386         sp_384_norm_12(s);
30387 
30388         /* s = s * k^-1 mod order */
30389             sp_384_mont_mul_order_12(s, s, kInv);
30390         sp_384_norm_12(s);
30391     }
30392 
30393     return err;
30394 }
30395 
30396 /* Sign the hash using the private key.
30397  *   e = [hash, 384 bits] from binary
30398  *   r = (k.G)->x mod order
30399  *   s = (r * x + e) / k mod order
30400  * The hash is truncated to the first 384 bits.
30401  *
30402  * hash     Hash to sign.
30403  * hashLen  Length of the hash data.
30404  * rng      Random number generator.
30405  * priv     Private part of key - scalar.
30406  * rm       First part of result as an mp_int.
30407  * sm       Sirst part of result as an mp_int.
30408  * heap     Heap to use for allocation.
30409  * returns RNG failures, MEMORY_E when memory allocation fails and
30410  * MP_OKAY on success.
30411  */
30412 #ifdef WOLFSSL_SP_NONBLOCK
30413 typedef struct sp_ecc_sign_384_ctx {
30414     int state;
30415     union {
30416         sp_384_ecc_mulmod_12_ctx mulmod_ctx;
30417         sp_384_mont_inv_order_12_ctx mont_inv_order_ctx;
30418     };
30419     sp_digit e[2*12];
30420     sp_digit x[2*12];
30421     sp_digit k[2*12];
30422     sp_digit r[2*12];
30423     sp_digit tmp[3 * 2*12];
30424     sp_point_384 point;
30425     sp_digit* s;
30426     sp_digit* kInv;
30427     int i;
30428 } sp_ecc_sign_384_ctx;
30429 
sp_ecc_sign_384_nb(sp_ecc_ctx_t * sp_ctx,const byte * hash,word32 hashLen,WC_RNG * rng,mp_int * priv,mp_int * rm,mp_int * sm,mp_int * km,void * heap)30430 int sp_ecc_sign_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash, word32 hashLen, WC_RNG* rng,
30431     mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap)
30432 {
30433     int err = FP_WOULDBLOCK;
30434     sp_ecc_sign_384_ctx* ctx = (sp_ecc_sign_384_ctx*)sp_ctx->data;
30435 
30436     typedef char ctx_size_test[sizeof(sp_ecc_sign_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
30437     (void)sizeof(ctx_size_test);
30438 
30439     (void)heap;
30440 
30441     switch (ctx->state) {
30442     case 0: /* INIT */
30443         ctx->s = ctx->e;
30444         ctx->kInv = ctx->k;
30445         if (hashLen > 48U) {
30446             hashLen = 48U;
30447         }
30448 
30449         ctx->i = SP_ECC_MAX_SIG_GEN;
30450         ctx->state = 1;
30451         break;
30452     case 1: /* GEN */
30453         /* New random point. */
30454         if (km == NULL || mp_iszero(km)) {
30455             err = sp_384_ecc_gen_k_12(rng, ctx->k);
30456         }
30457         else {
30458             sp_384_from_mp(ctx->k, 12, km);
30459             mp_zero(km);
30460         }
30461         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
30462         ctx->state = 2;
30463         break;
30464     case 2: /* MULMOD */
30465         err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx,
30466             &ctx->point, &p384_base, ctx->k, 1, 1, heap);
30467         if (err == MP_OKAY) {
30468             ctx->state = 3;
30469         }
30470         break;
30471     case 3: /* MODORDER */
30472     {
30473         sp_int32 c;
30474         /* r = point->x mod order */
30475         XMEMCPY(ctx->r, ctx->point.x, sizeof(sp_digit) * 12U);
30476         sp_384_norm_12(ctx->r);
30477         c = sp_384_cmp_12(ctx->r, p384_order);
30478         sp_384_cond_sub_12(ctx->r, ctx->r, p384_order,
30479             (sp_digit)0 - (sp_digit)(c >= 0));
30480         sp_384_norm_12(ctx->r);
30481 
30482         sp_384_from_mp(ctx->x, 12, priv);
30483         sp_384_from_bin(ctx->e, 12, hash, (int)hashLen);
30484         ctx->state = 4;
30485         break;
30486     }
30487     case 4: /* KMODORDER */
30488         /* Conv k to Montgomery form (mod order) */
30489         sp_384_mul_12(ctx->k, ctx->k, p384_norm_order);
30490         err = sp_384_mod_12(ctx->k, ctx->k, p384_order);
30491         if (err == MP_OKAY) {
30492             sp_384_norm_12(ctx->k);
30493             XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
30494             ctx->state = 5;
30495         }
30496         break;
30497     case 5: /* KINV */
30498         /* kInv = 1/k mod order */
30499         err = sp_384_mont_inv_order_12_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->kInv, ctx->k, ctx->tmp);
30500         if (err == MP_OKAY) {
30501             XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
30502             ctx->state = 6;
30503         }
30504         break;
30505     case 6: /* KINVNORM */
30506         sp_384_norm_12(ctx->kInv);
30507         ctx->state = 7;
30508         break;
30509     case 7: /* R */
30510         /* s = r * x + e */
30511         sp_384_mul_12(ctx->x, ctx->x, ctx->r);
30512         ctx->state = 8;
30513         break;
30514     case 8: /* S1 */
30515         err = sp_384_mod_12(ctx->x, ctx->x, p384_order);
30516         if (err == MP_OKAY)
30517             ctx->state = 9;
30518         break;
30519     case 9: /* S2 */
30520     {
30521         sp_digit carry;
30522         sp_int32 c;
30523         sp_384_norm_12(ctx->x);
30524         carry = sp_384_add_12(ctx->s, ctx->e, ctx->x);
30525         sp_384_cond_sub_12(ctx->s, ctx->s,
30526             p384_order, 0 - carry);
30527         sp_384_norm_12(ctx->s);
30528         c = sp_384_cmp_12(ctx->s, p384_order);
30529         sp_384_cond_sub_12(ctx->s, ctx->s, p384_order,
30530             (sp_digit)0 - (sp_digit)(c >= 0));
30531         sp_384_norm_12(ctx->s);
30532 
30533         /* s = s * k^-1 mod order */
30534         sp_384_mont_mul_order_12(ctx->s, ctx->s, ctx->kInv);
30535         sp_384_norm_12(ctx->s);
30536 
30537         /* Check that signature is usable. */
30538         if (sp_384_iszero_12(ctx->s) == 0) {
30539             ctx->state = 10;
30540             break;
30541         }
30542     #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP
30543         ctx->i = 1;
30544     #endif
30545 
30546         /* not usable gen, try again */
30547         ctx->i--;
30548         if (ctx->i == 0) {
30549             err = RNG_FAILURE_E;
30550         }
30551         ctx->state = 1;
30552         break;
30553     }
30554     case 10: /* RES */
30555         err = sp_384_to_mp(ctx->r, rm);
30556         if (err == MP_OKAY) {
30557             err = sp_384_to_mp(ctx->s, sm);
30558         }
30559         break;
30560     }
30561 
30562     if (err == MP_OKAY && ctx->state != 10) {
30563         err = FP_WOULDBLOCK;
30564     }
30565     if (err != FP_WOULDBLOCK) {
30566         XMEMSET(ctx->e, 0, sizeof(sp_digit) * 2U * 12U);
30567         XMEMSET(ctx->x, 0, sizeof(sp_digit) * 2U * 12U);
30568         XMEMSET(ctx->k, 0, sizeof(sp_digit) * 2U * 12U);
30569         XMEMSET(ctx->r, 0, sizeof(sp_digit) * 2U * 12U);
30570         XMEMSET(ctx->tmp, 0, sizeof(sp_digit) * 3U * 2U * 12U);
30571     }
30572 
30573     return err;
30574 }
30575 #endif /* WOLFSSL_SP_NONBLOCK */
30576 
sp_ecc_sign_384(const byte * hash,word32 hashLen,WC_RNG * rng,const mp_int * priv,mp_int * rm,mp_int * sm,mp_int * km,void * heap)30577 int sp_ecc_sign_384(const byte* hash, word32 hashLen, WC_RNG* rng,
30578     const mp_int* priv, mp_int* rm, mp_int* sm, mp_int* km, void* heap)
30579 {
30580 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
30581     sp_digit* e = NULL;
30582     sp_point_384* point = NULL;
30583 #else
30584     sp_digit e[7 * 2 * 12];
30585     sp_point_384 point[1];
30586 #endif
30587     sp_digit* x = NULL;
30588     sp_digit* k = NULL;
30589     sp_digit* r = NULL;
30590     sp_digit* tmp = NULL;
30591     sp_digit* s = NULL;
30592     sp_int32 c;
30593     int err = MP_OKAY;
30594     int i;
30595 
30596     (void)heap;
30597 
30598 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
30599     if (err == MP_OKAY) {
30600         point = (sp_point_384*)XMALLOC(sizeof(sp_point_384), heap,
30601                                              DYNAMIC_TYPE_ECC);
30602         if (point == NULL)
30603             err = MEMORY_E;
30604     }
30605     if (err == MP_OKAY) {
30606         e = (sp_digit*)XMALLOC(sizeof(sp_digit) * 7 * 2 * 12, heap,
30607                                DYNAMIC_TYPE_ECC);
30608         if (e == NULL)
30609             err = MEMORY_E;
30610     }
30611 #endif
30612 
30613     if (err == MP_OKAY) {
30614         x = e + 2 * 12;
30615         k = e + 4 * 12;
30616         r = e + 6 * 12;
30617         tmp = e + 8 * 12;
30618         s = e;
30619 
30620         if (hashLen > 48U) {
30621             hashLen = 48U;
30622         }
30623     }
30624 
30625     for (i = SP_ECC_MAX_SIG_GEN; err == MP_OKAY && i > 0; i--) {
30626         /* New random point. */
30627         if (km == NULL || mp_iszero(km)) {
30628             err = sp_384_ecc_gen_k_12(rng, k);
30629         }
30630         else {
30631             sp_384_from_mp(k, 12, km);
30632             mp_zero(km);
30633         }
30634         if (err == MP_OKAY) {
30635                 err = sp_384_ecc_mulmod_base_12(point, k, 1, 1, heap);
30636         }
30637 
30638         if (err == MP_OKAY) {
30639             /* r = point->x mod order */
30640             XMEMCPY(r, point->x, sizeof(sp_digit) * 12U);
30641             sp_384_norm_12(r);
30642             c = sp_384_cmp_12(r, p384_order);
30643             sp_384_cond_sub_12(r, r, p384_order,
30644                 (sp_digit)0 - (sp_digit)(c >= 0));
30645             sp_384_norm_12(r);
30646 
30647             sp_384_from_mp(x, 12, priv);
30648             sp_384_from_bin(e, 12, hash, (int)hashLen);
30649 
30650             err = sp_384_calc_s_12(s, r, k, x, e, tmp);
30651         }
30652 
30653         /* Check that signature is usable. */
30654         if ((err == MP_OKAY) && (sp_384_iszero_12(s) == 0)) {
30655             break;
30656         }
30657 #ifdef WOLFSSL_ECDSA_SET_K_ONE_LOOP
30658         i = 1;
30659 #endif
30660     }
30661 
30662     if (i == 0) {
30663         err = RNG_FAILURE_E;
30664     }
30665 
30666     if (err == MP_OKAY) {
30667         err = sp_384_to_mp(r, rm);
30668     }
30669     if (err == MP_OKAY) {
30670         err = sp_384_to_mp(s, sm);
30671     }
30672 
30673 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
30674     if (e != NULL)
30675 #endif
30676     {
30677         ForceZero(e, sizeof(sp_digit) * 7 * 2 * 12);
30678     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
30679         XFREE(e, heap, DYNAMIC_TYPE_ECC);
30680     #endif
30681     }
30682 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
30683     if (point != NULL)
30684 #endif
30685     {
30686         ForceZero(point, sizeof(sp_point_384));
30687     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
30688         XFREE(point, heap, DYNAMIC_TYPE_ECC);
30689     #endif
30690     }
30691 
30692     return err;
30693 }
30694 #endif /* HAVE_ECC_SIGN */
30695 
30696 #ifndef WOLFSSL_SP_SMALL
30697 /* Divide the number by 2 mod the modulus. (r = a / 2 % m)
30698  *
30699  * r  Result of division by 2.
30700  * a  Number to divide.
30701  * m  Modulus.
30702  */
sp_384_div2_mod_12(sp_digit * r,const sp_digit * a,const sp_digit * m)30703 static void sp_384_div2_mod_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
30704 {
30705     __asm__ __volatile__ (
30706         "ldr       r4, [%[a]]\n\t"
30707         "ands      r8, r4, #1\n\t"
30708         "beq       1f\n\t"
30709         "mov       r12, #0\n\t"
30710         "ldr     r5, [%[a], #4]\n\t"
30711         "ldr     r6, [%[a], #8]\n\t"
30712         "ldr     r7, [%[a], #12]\n\t"
30713         "ldr     r8, [%[m], #0]\n\t"
30714         "ldr     r9, [%[m], #4]\n\t"
30715         "ldr     r10, [%[m], #8]\n\t"
30716         "ldr     r14, [%[m], #12]\n\t"
30717         "adds    r4, r4, r8\n\t"
30718         "adcs    r5, r5, r9\n\t"
30719         "adcs    r6, r6, r10\n\t"
30720         "adcs    r7, r7, r14\n\t"
30721         "str     r4, [%[r], #0]\n\t"
30722         "str     r5, [%[r], #4]\n\t"
30723         "str     r6, [%[r], #8]\n\t"
30724         "str     r7, [%[r], #12]\n\t"
30725         "ldr     r4, [%[a], #16]\n\t"
30726         "ldr     r5, [%[a], #20]\n\t"
30727         "ldr     r6, [%[a], #24]\n\t"
30728         "ldr     r7, [%[a], #28]\n\t"
30729         "ldr     r8, [%[m], #16]\n\t"
30730         "ldr     r9, [%[m], #20]\n\t"
30731         "ldr     r10, [%[m], #24]\n\t"
30732         "ldr     r14, [%[m], #28]\n\t"
30733         "adcs    r4, r4, r8\n\t"
30734         "adcs    r5, r5, r9\n\t"
30735         "adcs    r6, r6, r10\n\t"
30736         "adcs    r7, r7, r14\n\t"
30737         "str     r4, [%[r], #16]\n\t"
30738         "str     r5, [%[r], #20]\n\t"
30739         "str     r6, [%[r], #24]\n\t"
30740         "str     r7, [%[r], #28]\n\t"
30741         "ldr     r4, [%[a], #32]\n\t"
30742         "ldr     r5, [%[a], #36]\n\t"
30743         "ldr     r6, [%[a], #40]\n\t"
30744         "ldr     r7, [%[a], #44]\n\t"
30745         "ldr     r8, [%[m], #32]\n\t"
30746         "ldr     r9, [%[m], #36]\n\t"
30747         "ldr     r10, [%[m], #40]\n\t"
30748         "ldr     r14, [%[m], #44]\n\t"
30749         "adcs    r4, r4, r8\n\t"
30750         "adcs    r5, r5, r9\n\t"
30751         "adcs    r6, r6, r10\n\t"
30752         "adcs    r7, r7, r14\n\t"
30753         "str     r4, [%[r], #32]\n\t"
30754         "str     r5, [%[r], #36]\n\t"
30755         "str     r6, [%[r], #40]\n\t"
30756         "str     r7, [%[r], #44]\n\t"
30757         "adc       r8, r12, r12\n\t"
30758         "b 2f\n\t"
30759         "\n1:\n\t"
30760         "ldr     r5, [%[a], #2]\n\t"
30761         "str     r4, [%[r], #0]\n\t"
30762         "str     r5, [%[r], #2]\n\t"
30763         "ldr     r4, [%[a], #4]\n\t"
30764         "ldr     r5, [%[a], #6]\n\t"
30765         "str     r4, [%[r], #4]\n\t"
30766         "str     r5, [%[r], #6]\n\t"
30767         "ldr     r4, [%[a], #8]\n\t"
30768         "ldr     r5, [%[a], #10]\n\t"
30769         "str     r4, [%[r], #8]\n\t"
30770         "str     r5, [%[r], #10]\n\t"
30771         "ldr     r4, [%[a], #12]\n\t"
30772         "ldr     r5, [%[a], #14]\n\t"
30773         "str     r4, [%[r], #12]\n\t"
30774         "str     r5, [%[r], #14]\n\t"
30775         "ldr     r4, [%[a], #16]\n\t"
30776         "ldr     r5, [%[a], #18]\n\t"
30777         "str     r4, [%[r], #16]\n\t"
30778         "str     r5, [%[r], #18]\n\t"
30779         "ldr     r4, [%[a], #20]\n\t"
30780         "ldr     r5, [%[a], #22]\n\t"
30781         "str     r4, [%[r], #20]\n\t"
30782         "str     r5, [%[r], #22]\n\t"
30783         "\n2:\n\t"
30784         "ldr       r3, [%[r]]\n\t"
30785         "ldr       r4, [%[r], #4]\n\t"
30786         "lsr       r3, r3, #1\n\t"
30787         "orr       r3, r3, r4, lsl #31\n\t"
30788         "lsr       r4, r4, #1\n\t"
30789         "ldr     r5, [%[a], #8]\n\t"
30790         "str     r3, [%[r], #0]\n\t"
30791         "orr     r4, r4, r5, lsl #31\n\t"
30792         "lsr     r5, r5, #1\n\t"
30793         "ldr     r3, [%[a], #12]\n\t"
30794         "str     r4, [%[r], #4]\n\t"
30795         "orr     r5, r5, r3, lsl #31\n\t"
30796         "lsr     r3, r3, #1\n\t"
30797         "ldr     r4, [%[a], #16]\n\t"
30798         "str     r5, [%[r], #8]\n\t"
30799         "orr     r3, r3, r4, lsl #31\n\t"
30800         "lsr     r4, r4, #1\n\t"
30801         "ldr     r5, [%[a], #20]\n\t"
30802         "str     r3, [%[r], #12]\n\t"
30803         "orr     r4, r4, r5, lsl #31\n\t"
30804         "lsr     r5, r5, #1\n\t"
30805         "ldr     r3, [%[a], #24]\n\t"
30806         "str     r4, [%[r], #16]\n\t"
30807         "orr     r5, r5, r3, lsl #31\n\t"
30808         "lsr     r3, r3, #1\n\t"
30809         "ldr     r4, [%[a], #28]\n\t"
30810         "str     r5, [%[r], #20]\n\t"
30811         "orr     r3, r3, r4, lsl #31\n\t"
30812         "lsr     r4, r4, #1\n\t"
30813         "ldr     r5, [%[a], #32]\n\t"
30814         "str     r3, [%[r], #24]\n\t"
30815         "orr     r4, r4, r5, lsl #31\n\t"
30816         "lsr     r5, r5, #1\n\t"
30817         "ldr     r3, [%[a], #36]\n\t"
30818         "str     r4, [%[r], #28]\n\t"
30819         "orr     r5, r5, r3, lsl #31\n\t"
30820         "lsr     r3, r3, #1\n\t"
30821         "ldr     r4, [%[a], #40]\n\t"
30822         "str     r5, [%[r], #32]\n\t"
30823         "orr     r3, r3, r4, lsl #31\n\t"
30824         "lsr     r4, r4, #1\n\t"
30825         "ldr     r5, [%[a], #44]\n\t"
30826         "str     r3, [%[r], #36]\n\t"
30827         "orr     r4, r4, r5, lsl #31\n\t"
30828         "lsr     r5, r5, #1\n\t"
30829         "orr       r5, r5, r8, lsl #31\n\t"
30830         "str       r4, [%[r], #40]\n\t"
30831         "str       r5, [%[r], #44]\n\t"
30832         :
30833         : [r] "r" (r), [a] "r" (a), [m] "r" (m)
30834         : "memory", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14"
30835     );
30836 }
30837 
sp_384_num_bits_12(sp_digit * a)30838 static int sp_384_num_bits_12(sp_digit* a)
30839 {
30840     int r = 0;
30841 
30842     __asm__ __volatile__ (
30843         "ldr r2, [%[a], #44]\n\t"
30844         "cmp r2, #0\n\t"
30845         "beq 11f\n\t"
30846         "mov r3, #384\n\t"
30847         "clz %[r], r2\n\t"
30848         "sub %[r], r3, %[r]\n\t"
30849         "b   13f\n\t"
30850         "\n11:\n\t"
30851         "ldr r2, [%[a], #40]\n\t"
30852         "cmp r2, #0\n\t"
30853         "beq 10f\n\t"
30854         "mov r3, #352\n\t"
30855         "clz %[r], r2\n\t"
30856         "sub %[r], r3, %[r]\n\t"
30857         "b   13f\n\t"
30858         "\n10:\n\t"
30859         "ldr r2, [%[a], #36]\n\t"
30860         "cmp r2, #0\n\t"
30861         "beq 9f\n\t"
30862         "mov r3, #320\n\t"
30863         "clz %[r], r2\n\t"
30864         "sub %[r], r3, %[r]\n\t"
30865         "b   13f\n\t"
30866         "\n9:\n\t"
30867         "ldr r2, [%[a], #32]\n\t"
30868         "cmp r2, #0\n\t"
30869         "beq 8f\n\t"
30870         "mov r3, #288\n\t"
30871         "clz %[r], r2\n\t"
30872         "sub %[r], r3, %[r]\n\t"
30873         "b   13f\n\t"
30874         "\n8:\n\t"
30875         "ldr r2, [%[a], #28]\n\t"
30876         "cmp r2, #0\n\t"
30877         "beq 7f\n\t"
30878         "mov r3, #256\n\t"
30879         "clz %[r], r2\n\t"
30880         "sub %[r], r3, %[r]\n\t"
30881         "b   13f\n\t"
30882         "\n7:\n\t"
30883         "ldr r2, [%[a], #24]\n\t"
30884         "cmp r2, #0\n\t"
30885         "beq 6f\n\t"
30886         "mov r3, #224\n\t"
30887         "clz %[r], r2\n\t"
30888         "sub %[r], r3, %[r]\n\t"
30889         "b   13f\n\t"
30890         "\n6:\n\t"
30891         "ldr r2, [%[a], #20]\n\t"
30892         "cmp r2, #0\n\t"
30893         "beq 5f\n\t"
30894         "mov r3, #192\n\t"
30895         "clz %[r], r2\n\t"
30896         "sub %[r], r3, %[r]\n\t"
30897         "b   13f\n\t"
30898         "\n5:\n\t"
30899         "ldr r2, [%[a], #16]\n\t"
30900         "cmp r2, #0\n\t"
30901         "beq 4f\n\t"
30902         "mov r3, #160\n\t"
30903         "clz %[r], r2\n\t"
30904         "sub %[r], r3, %[r]\n\t"
30905         "b   13f\n\t"
30906         "\n4:\n\t"
30907         "ldr r2, [%[a], #12]\n\t"
30908         "cmp r2, #0\n\t"
30909         "beq 3f\n\t"
30910         "mov r3, #128\n\t"
30911         "clz %[r], r2\n\t"
30912         "sub %[r], r3, %[r]\n\t"
30913         "b   13f\n\t"
30914         "\n3:\n\t"
30915         "ldr r2, [%[a], #8]\n\t"
30916         "cmp r2, #0\n\t"
30917         "beq 2f\n\t"
30918         "mov r3, #96\n\t"
30919         "clz %[r], r2\n\t"
30920         "sub %[r], r3, %[r]\n\t"
30921         "b   13f\n\t"
30922         "\n2:\n\t"
30923         "ldr r2, [%[a], #4]\n\t"
30924         "cmp r2, #0\n\t"
30925         "beq 1f\n\t"
30926         "mov r3, #64\n\t"
30927         "clz %[r], r2\n\t"
30928         "sub %[r], r3, %[r]\n\t"
30929         "b   13f\n\t"
30930         "\n1:\n\t"
30931         "ldr r2, [%[a], #0]\n\t"
30932         "mov r3, #32\n\t"
30933         "clz %[r], r2\n\t"
30934         "sub %[r], r3, %[r]\n\t"
30935         "\n13:\n\t"
30936         : [r] "+r" (r)
30937         : [a] "r" (a)
30938         : "r2", "r3"
30939     );
30940 
30941     return r;
30942 }
30943 
30944 /* Non-constant time modular inversion.
30945  *
30946  * @param  [out]  r   Resulting number.
30947  * @param  [in]   a   Number to invert.
30948  * @param  [in]   m   Modulus.
30949  * @return  MP_OKAY on success.
30950  */
sp_384_mod_inv_12(sp_digit * r,const sp_digit * a,const sp_digit * m)30951 static int sp_384_mod_inv_12(sp_digit* r, const sp_digit* a, const sp_digit* m)
30952 {
30953     sp_digit u[12];
30954     sp_digit v[12];
30955     sp_digit b[12];
30956     sp_digit d[12];
30957     int ut, vt;
30958     sp_digit o;
30959 
30960     XMEMCPY(u, m, sizeof(u));
30961     XMEMCPY(v, a, sizeof(v));
30962 
30963     ut = sp_384_num_bits_12(u);
30964     vt = sp_384_num_bits_12(v);
30965 
30966     XMEMSET(b, 0, sizeof(b));
30967     if ((v[0] & 1) == 0) {
30968         sp_384_rshift1_12(v, v);
30969         XMEMCPY(d, m, sizeof(u));
30970         d[0] += 1;
30971         sp_384_rshift1_12(d, d);
30972         vt--;
30973 
30974         while ((v[0] & 1) == 0) {
30975             sp_384_rshift1_12(v, v);
30976             sp_384_div2_mod_12(d, d, m);
30977             vt--;
30978         }
30979     }
30980     else {
30981         XMEMSET(d+1, 0, sizeof(d)-sizeof(sp_digit));
30982         d[0] = 1;
30983     }
30984 
30985     while (ut > 1 && vt > 1) {
30986         if (ut > vt || (ut == vt && sp_384_cmp_12(u, v) >= 0)) {
30987             sp_384_sub_12(u, u, v);
30988             o = sp_384_sub_12(b, b, d);
30989             if (o != 0)
30990                 sp_384_add_12(b, b, m);
30991             ut = sp_384_num_bits_12(u);
30992 
30993             do {
30994                 sp_384_rshift1_12(u, u);
30995                 sp_384_div2_mod_12(b, b, m);
30996                 ut--;
30997             }
30998             while (ut > 0 && (u[0] & 1) == 0);
30999         }
31000         else {
31001             sp_384_sub_12(v, v, u);
31002             o = sp_384_sub_12(d, d, b);
31003             if (o != 0)
31004                 sp_384_add_12(d, d, m);
31005             vt = sp_384_num_bits_12(v);
31006 
31007             do {
31008                 sp_384_rshift1_12(v, v);
31009                 sp_384_div2_mod_12(d, d, m);
31010                 vt--;
31011             }
31012             while (vt > 0 && (v[0] & 1) == 0);
31013         }
31014     }
31015 
31016     if (ut == 1)
31017         XMEMCPY(r, b, sizeof(b));
31018     else
31019         XMEMCPY(r, d, sizeof(d));
31020 
31021     return MP_OKAY;
31022 }
31023 
31024 #endif /* WOLFSSL_SP_SMALL */
31025 
31026 /* Add point p1 into point p2. Handles p1 == p2 and result at infinity.
31027  *
31028  * p1   First point to add and holds result.
31029  * p2   Second point to add.
31030  * tmp  Temporary storage for intermediate numbers.
31031  */
sp_384_add_points_12(sp_point_384 * p1,const sp_point_384 * p2,sp_digit * tmp)31032 static void sp_384_add_points_12(sp_point_384* p1, const sp_point_384* p2,
31033     sp_digit* tmp)
31034 {
31035 
31036         sp_384_proj_point_add_12(p1, p1, p2, tmp);
31037     if (sp_384_iszero_12(p1->z)) {
31038         if (sp_384_iszero_12(p1->x) && sp_384_iszero_12(p1->y)) {
31039                 sp_384_proj_point_dbl_12(p1, p2, tmp);
31040         }
31041         else {
31042             /* Y ordinate is not used from here - don't set. */
31043             p1->x[0] = 0;
31044             p1->x[1] = 0;
31045             p1->x[2] = 0;
31046             p1->x[3] = 0;
31047             p1->x[4] = 0;
31048             p1->x[5] = 0;
31049             p1->x[6] = 0;
31050             p1->x[7] = 0;
31051             p1->x[8] = 0;
31052             p1->x[9] = 0;
31053             p1->x[10] = 0;
31054             p1->x[11] = 0;
31055             XMEMCPY(p1->z, p384_norm_mod, sizeof(p384_norm_mod));
31056         }
31057     }
31058 }
31059 
31060 /* Calculate the verification point: [e/s]G + [r/s]Q
31061  *
31062  * p1    Calculated point.
31063  * p2    Public point and temporary.
31064  * s     Second part of signature as a number.
31065  * u1    Temporary number.
31066  * u2    Temproray number.
31067  * heap  Heap to use for allocation.
31068  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
31069  */
sp_384_calc_vfy_point_12(sp_point_384 * p1,sp_point_384 * p2,sp_digit * s,sp_digit * u1,sp_digit * u2,sp_digit * tmp,void * heap)31070 static int sp_384_calc_vfy_point_12(sp_point_384* p1, sp_point_384* p2,
31071     sp_digit* s, sp_digit* u1, sp_digit* u2, sp_digit* tmp, void* heap)
31072 {
31073     int err;
31074 
31075 #ifndef WOLFSSL_SP_SMALL
31076     err = sp_384_mod_inv_12(s, s, p384_order);
31077     if (err == MP_OKAY)
31078 #endif /* !WOLFSSL_SP_SMALL */
31079     {
31080         sp_384_mul_12(s, s, p384_norm_order);
31081         err = sp_384_mod_12(s, s, p384_order);
31082     }
31083     if (err == MP_OKAY) {
31084         sp_384_norm_12(s);
31085 #ifdef WOLFSSL_SP_SMALL
31086         {
31087             sp_384_mont_inv_order_12(s, s, tmp);
31088             sp_384_mont_mul_order_12(u1, u1, s);
31089             sp_384_mont_mul_order_12(u2, u2, s);
31090         }
31091 #else
31092         {
31093             sp_384_mont_mul_order_12(u1, u1, s);
31094             sp_384_mont_mul_order_12(u2, u2, s);
31095         }
31096 #endif /* WOLFSSL_SP_SMALL */
31097         {
31098             err = sp_384_ecc_mulmod_base_12(p1, u1, 0, 0, heap);
31099         }
31100     }
31101     if ((err == MP_OKAY) && sp_384_iszero_12(p1->z)) {
31102         p1->infinity = 1;
31103     }
31104     if (err == MP_OKAY) {
31105             err = sp_384_ecc_mulmod_12(p2, p2, u2, 0, 0, heap);
31106     }
31107     if ((err == MP_OKAY) && sp_384_iszero_12(p2->z)) {
31108         p2->infinity = 1;
31109     }
31110 
31111     if (err == MP_OKAY) {
31112         sp_384_add_points_12(p1, p2, tmp);
31113     }
31114 
31115     return err;
31116 }
31117 
31118 #ifdef HAVE_ECC_VERIFY
31119 /* Verify the signature values with the hash and public key.
31120  *   e = Truncate(hash, 384)
31121  *   u1 = e/s mod order
31122  *   u2 = r/s mod order
31123  *   r == (u1.G + u2.Q)->x mod order
31124  * Optimization: Leave point in projective form.
31125  *   (x, y, 1) == (x' / z'*z', y' / z'*z'*z', z' / z')
31126  *   (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x'
31127  * The hash is truncated to the first 384 bits.
31128  *
31129  * hash     Hash to sign.
31130  * hashLen  Length of the hash data.
31131  * rng      Random number generator.
31132  * priv     Private part of key - scalar.
31133  * rm       First part of result as an mp_int.
31134  * sm       Sirst part of result as an mp_int.
31135  * heap     Heap to use for allocation.
31136  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
31137  */
31138 #ifdef WOLFSSL_SP_NONBLOCK
31139 typedef struct sp_ecc_verify_384_ctx {
31140     int state;
31141     union {
31142         sp_384_ecc_mulmod_12_ctx mulmod_ctx;
31143         sp_384_mont_inv_order_12_ctx mont_inv_order_ctx;
31144         sp_384_proj_point_dbl_12_ctx dbl_ctx;
31145         sp_384_proj_point_add_12_ctx add_ctx;
31146     };
31147     sp_digit u1[2*12];
31148     sp_digit u2[2*12];
31149     sp_digit s[2*12];
31150     sp_digit tmp[2*12 * 5];
31151     sp_point_384 p1;
31152     sp_point_384 p2;
31153 } sp_ecc_verify_384_ctx;
31154 
sp_ecc_verify_384_nb(sp_ecc_ctx_t * sp_ctx,const byte * hash,word32 hashLen,const mp_int * pX,const mp_int * pY,const mp_int * pZ,const mp_int * rm,const mp_int * sm,int * res,void * heap)31155 int sp_ecc_verify_384_nb(sp_ecc_ctx_t* sp_ctx, const byte* hash,
31156     word32 hashLen, const mp_int* pX, const mp_int* pY, const mp_int* pZ,
31157     const mp_int* rm, const mp_int* sm, int* res, void* heap)
31158 {
31159     int err = FP_WOULDBLOCK;
31160     sp_ecc_verify_384_ctx* ctx = (sp_ecc_verify_384_ctx*)sp_ctx->data;
31161 
31162     typedef char ctx_size_test[sizeof(sp_ecc_verify_384_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
31163     (void)sizeof(ctx_size_test);
31164 
31165     switch (ctx->state) {
31166     case 0: /* INIT */
31167         if (hashLen > 48U) {
31168             hashLen = 48U;
31169         }
31170 
31171         sp_384_from_bin(ctx->u1, 12, hash, (int)hashLen);
31172         sp_384_from_mp(ctx->u2, 12, rm);
31173         sp_384_from_mp(ctx->s, 12, sm);
31174         sp_384_from_mp(ctx->p2.x, 12, pX);
31175         sp_384_from_mp(ctx->p2.y, 12, pY);
31176         sp_384_from_mp(ctx->p2.z, 12, pZ);
31177         ctx->state = 1;
31178         break;
31179     case 1: /* NORMS0 */
31180         sp_384_mul_12(ctx->s, ctx->s, p384_norm_order);
31181         err = sp_384_mod_12(ctx->s, ctx->s, p384_order);
31182         if (err == MP_OKAY)
31183             ctx->state = 2;
31184         break;
31185     case 2: /* NORMS1 */
31186         sp_384_norm_12(ctx->s);
31187         XMEMSET(&ctx->mont_inv_order_ctx, 0, sizeof(ctx->mont_inv_order_ctx));
31188         ctx->state = 3;
31189         break;
31190     case 3: /* NORMS2 */
31191         err = sp_384_mont_inv_order_12_nb((sp_ecc_ctx_t*)&ctx->mont_inv_order_ctx, ctx->s, ctx->s, ctx->tmp);
31192         if (err == MP_OKAY) {
31193             ctx->state = 4;
31194         }
31195         break;
31196     case 4: /* NORMS3 */
31197         sp_384_mont_mul_order_12(ctx->u1, ctx->u1, ctx->s);
31198         ctx->state = 5;
31199         break;
31200     case 5: /* NORMS4 */
31201         sp_384_mont_mul_order_12(ctx->u2, ctx->u2, ctx->s);
31202         XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
31203         ctx->state = 6;
31204         break;
31205     case 6: /* MULBASE */
31206         err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p1, &p384_base, ctx->u1, 0, 0, heap);
31207         if (err == MP_OKAY) {
31208             if (sp_384_iszero_12(ctx->p1.z)) {
31209                 ctx->p1.infinity = 1;
31210             }
31211             XMEMSET(&ctx->mulmod_ctx, 0, sizeof(ctx->mulmod_ctx));
31212             ctx->state = 7;
31213         }
31214         break;
31215     case 7: /* MULMOD */
31216         err = sp_384_ecc_mulmod_12_nb((sp_ecc_ctx_t*)&ctx->mulmod_ctx, &ctx->p2, &ctx->p2, ctx->u2, 0, 0, heap);
31217         if (err == MP_OKAY) {
31218             if (sp_384_iszero_12(ctx->p2.z)) {
31219                 ctx->p2.infinity = 1;
31220             }
31221             XMEMSET(&ctx->add_ctx, 0, sizeof(ctx->add_ctx));
31222             ctx->state = 8;
31223         }
31224         break;
31225     case 8: /* ADD */
31226         err = sp_384_proj_point_add_12_nb((sp_ecc_ctx_t*)&ctx->add_ctx, &ctx->p1, &ctx->p1, &ctx->p2, ctx->tmp);
31227         if (err == MP_OKAY)
31228             ctx->state = 9;
31229         break;
31230     case 9: /* MONT */
31231         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
31232         /* Reload r and convert to Montgomery form. */
31233         sp_384_from_mp(ctx->u2, 12, rm);
31234         err = sp_384_mod_mul_norm_12(ctx->u2, ctx->u2, p384_mod);
31235         if (err == MP_OKAY)
31236             ctx->state = 10;
31237         break;
31238     case 10: /* SQR */
31239         /* u1 = r.z'.z' mod prime */
31240         sp_384_mont_sqr_12(ctx->p1.z, ctx->p1.z, p384_mod, p384_mp_mod);
31241         ctx->state = 11;
31242         break;
31243     case 11: /* MUL */
31244         sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod, p384_mp_mod);
31245         ctx->state = 12;
31246         break;
31247     case 12: /* RES */
31248     {
31249         sp_int32 c = 0;
31250         err = MP_OKAY; /* math okay, now check result */
31251         *res = (int)(sp_384_cmp_12(ctx->p1.x, ctx->u1) == 0);
31252         if (*res == 0) {
31253             sp_digit carry;
31254 
31255             /* Reload r and add order. */
31256             sp_384_from_mp(ctx->u2, 12, rm);
31257             carry = sp_384_add_12(ctx->u2, ctx->u2, p384_order);
31258             /* Carry means result is greater than mod and is not valid. */
31259             if (carry == 0) {
31260                 sp_384_norm_12(ctx->u2);
31261 
31262                 /* Compare with mod and if greater or equal then not valid. */
31263                 c = sp_384_cmp_12(ctx->u2, p384_mod);
31264             }
31265         }
31266         if ((*res == 0) && (c < 0)) {
31267             /* Convert to Montogomery form */
31268             err = sp_384_mod_mul_norm_12(ctx->u2, ctx->u2, p384_mod);
31269             if (err == MP_OKAY) {
31270                 /* u1 = (r + 1*order).z'.z' mod prime */
31271                 sp_384_mont_mul_12(ctx->u1, ctx->u2, ctx->p1.z, p384_mod,
31272                                                             p384_mp_mod);
31273                 *res = (int)(sp_384_cmp_12(ctx->p1.x, ctx->u1) == 0);
31274             }
31275         }
31276         break;
31277     }
31278     } /* switch */
31279 
31280     if (err == MP_OKAY && ctx->state != 12) {
31281         err = FP_WOULDBLOCK;
31282     }
31283 
31284     return err;
31285 }
31286 #endif /* WOLFSSL_SP_NONBLOCK */
31287 
sp_ecc_verify_384(const byte * hash,word32 hashLen,const mp_int * pX,const mp_int * pY,const mp_int * pZ,const mp_int * rm,const mp_int * sm,int * res,void * heap)31288 int sp_ecc_verify_384(const byte* hash, word32 hashLen, const mp_int* pX,
31289     const mp_int* pY, const mp_int* pZ, const mp_int* rm, const mp_int* sm,
31290     int* res, void* heap)
31291 {
31292 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31293     sp_digit* u1 = NULL;
31294     sp_point_384* p1 = NULL;
31295 #else
31296     sp_digit  u1[16 * 12];
31297     sp_point_384 p1[2];
31298 #endif
31299     sp_digit* u2 = NULL;
31300     sp_digit* s = NULL;
31301     sp_digit* tmp = NULL;
31302     sp_point_384* p2 = NULL;
31303     sp_digit carry;
31304     sp_int32 c = 0;
31305     int err = MP_OKAY;
31306 
31307 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31308     if (err == MP_OKAY) {
31309         p1 = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap,
31310                                              DYNAMIC_TYPE_ECC);
31311         if (p1 == NULL)
31312             err = MEMORY_E;
31313     }
31314     if (err == MP_OKAY) {
31315         u1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 16 * 12, heap,
31316                                                               DYNAMIC_TYPE_ECC);
31317         if (u1 == NULL)
31318             err = MEMORY_E;
31319     }
31320 #endif
31321 
31322     if (err == MP_OKAY) {
31323         u2  = u1 + 2 * 12;
31324         s   = u1 + 4 * 12;
31325         tmp = u1 + 6 * 12;
31326         p2 = p1 + 1;
31327 
31328         if (hashLen > 48U) {
31329             hashLen = 48U;
31330         }
31331 
31332         sp_384_from_bin(u1, 12, hash, (int)hashLen);
31333         sp_384_from_mp(u2, 12, rm);
31334         sp_384_from_mp(s, 12, sm);
31335         sp_384_from_mp(p2->x, 12, pX);
31336         sp_384_from_mp(p2->y, 12, pY);
31337         sp_384_from_mp(p2->z, 12, pZ);
31338 
31339         err = sp_384_calc_vfy_point_12(p1, p2, s, u1, u2, tmp, heap);
31340     }
31341     if (err == MP_OKAY) {
31342         /* (r + n*order).z'.z' mod prime == (u1.G + u2.Q)->x' */
31343         /* Reload r and convert to Montgomery form. */
31344         sp_384_from_mp(u2, 12, rm);
31345         err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
31346     }
31347 
31348     if (err == MP_OKAY) {
31349         /* u1 = r.z'.z' mod prime */
31350         sp_384_mont_sqr_12(p1->z, p1->z, p384_mod, p384_mp_mod);
31351         sp_384_mont_mul_12(u1, u2, p1->z, p384_mod, p384_mp_mod);
31352         *res = (int)(sp_384_cmp_12(p1->x, u1) == 0);
31353         if (*res == 0) {
31354             /* Reload r and add order. */
31355             sp_384_from_mp(u2, 12, rm);
31356             carry = sp_384_add_12(u2, u2, p384_order);
31357             /* Carry means result is greater than mod and is not valid. */
31358             if (carry == 0) {
31359                 sp_384_norm_12(u2);
31360 
31361                 /* Compare with mod and if greater or equal then not valid. */
31362                 c = sp_384_cmp_12(u2, p384_mod);
31363             }
31364         }
31365         if ((*res == 0) && (c < 0)) {
31366             /* Convert to Montogomery form */
31367             err = sp_384_mod_mul_norm_12(u2, u2, p384_mod);
31368             if (err == MP_OKAY) {
31369                 /* u1 = (r + 1*order).z'.z' mod prime */
31370                 sp_384_mont_mul_12(u1, u2, p1->z, p384_mod,
31371                     p384_mp_mod);
31372                 *res = (sp_384_cmp_12(p1->x, u1) == 0);
31373             }
31374         }
31375     }
31376 
31377 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31378     if (u1 != NULL)
31379         XFREE(u1, heap, DYNAMIC_TYPE_ECC);
31380     if (p1 != NULL)
31381         XFREE(p1, heap, DYNAMIC_TYPE_ECC);
31382 #endif
31383 
31384     return err;
31385 }
31386 #endif /* HAVE_ECC_VERIFY */
31387 
31388 #ifdef HAVE_ECC_CHECK_KEY
31389 /* Check that the x and y oridinates are a valid point on the curve.
31390  *
31391  * point  EC point.
31392  * heap   Heap to use if dynamically allocating.
31393  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
31394  * not on the curve and MP_OKAY otherwise.
31395  */
sp_384_ecc_is_point_12(const sp_point_384 * point,void * heap)31396 static int sp_384_ecc_is_point_12(const sp_point_384* point,
31397     void* heap)
31398 {
31399 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31400     sp_digit* t1 = NULL;
31401 #else
31402     sp_digit t1[12 * 4];
31403 #endif
31404     sp_digit* t2 = NULL;
31405     int err = MP_OKAY;
31406 
31407 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31408     t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12 * 4, heap, DYNAMIC_TYPE_ECC);
31409     if (t1 == NULL)
31410         err = MEMORY_E;
31411 #endif
31412     (void)heap;
31413 
31414     if (err == MP_OKAY) {
31415         t2 = t1 + 2 * 12;
31416 
31417         sp_384_sqr_12(t1, point->y);
31418         (void)sp_384_mod_12(t1, t1, p384_mod);
31419         sp_384_sqr_12(t2, point->x);
31420         (void)sp_384_mod_12(t2, t2, p384_mod);
31421         sp_384_mul_12(t2, t2, point->x);
31422         (void)sp_384_mod_12(t2, t2, p384_mod);
31423         (void)sp_384_sub_12(t2, p384_mod, t2);
31424         sp_384_mont_add_12(t1, t1, t2, p384_mod);
31425 
31426         sp_384_mont_add_12(t1, t1, point->x, p384_mod);
31427         sp_384_mont_add_12(t1, t1, point->x, p384_mod);
31428         sp_384_mont_add_12(t1, t1, point->x, p384_mod);
31429 
31430         if (sp_384_cmp_12(t1, p384_b) != 0) {
31431             err = MP_VAL;
31432         }
31433     }
31434 
31435 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31436     if (t1 != NULL)
31437         XFREE(t1, heap, DYNAMIC_TYPE_ECC);
31438 #endif
31439 
31440     return err;
31441 }
31442 
31443 /* Check that the x and y oridinates are a valid point on the curve.
31444  *
31445  * pX  X ordinate of EC point.
31446  * pY  Y ordinate of EC point.
31447  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
31448  * not on the curve and MP_OKAY otherwise.
31449  */
sp_ecc_is_point_384(const mp_int * pX,const mp_int * pY)31450 int sp_ecc_is_point_384(const mp_int* pX, const mp_int* pY)
31451 {
31452 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31453     sp_point_384* pub = NULL;
31454 #else
31455     sp_point_384 pub[1];
31456 #endif
31457     const byte one[1] = { 1 };
31458     int err = MP_OKAY;
31459 
31460 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31461     pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL,
31462                                        DYNAMIC_TYPE_ECC);
31463     if (pub == NULL)
31464         err = MEMORY_E;
31465 #endif
31466 
31467     if (err == MP_OKAY) {
31468         sp_384_from_mp(pub->x, 12, pX);
31469         sp_384_from_mp(pub->y, 12, pY);
31470         sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
31471 
31472         err = sp_384_ecc_is_point_12(pub, NULL);
31473     }
31474 
31475 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31476     if (pub != NULL)
31477         XFREE(pub, NULL, DYNAMIC_TYPE_ECC);
31478 #endif
31479 
31480     return err;
31481 }
31482 
31483 /* Check that the private scalar generates the EC point (px, py), the point is
31484  * on the curve and the point has the correct order.
31485  *
31486  * pX     X ordinate of EC point.
31487  * pY     Y ordinate of EC point.
31488  * privm  Private scalar that generates EC point.
31489  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
31490  * not on the curve, ECC_INF_E if the point does not have the correct order,
31491  * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
31492  * MP_OKAY otherwise.
31493  */
sp_ecc_check_key_384(const mp_int * pX,const mp_int * pY,const mp_int * privm,void * heap)31494 int sp_ecc_check_key_384(const mp_int* pX, const mp_int* pY,
31495     const mp_int* privm, void* heap)
31496 {
31497 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31498     sp_digit* priv = NULL;
31499     sp_point_384* pub = NULL;
31500 #else
31501     sp_digit priv[12];
31502     sp_point_384 pub[2];
31503 #endif
31504     sp_point_384* p = NULL;
31505     const byte one[1] = { 1 };
31506     int err = MP_OKAY;
31507 
31508 
31509     /* Quick check the lengs of public key ordinates and private key are in
31510      * range. Proper check later.
31511      */
31512     if (((mp_count_bits(pX) > 384) ||
31513         (mp_count_bits(pY) > 384) ||
31514         ((privm != NULL) && (mp_count_bits(privm) > 384)))) {
31515         err = ECC_OUT_OF_RANGE_E;
31516     }
31517 
31518 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31519     if (err == MP_OKAY) {
31520         pub = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, heap,
31521                                            DYNAMIC_TYPE_ECC);
31522         if (pub == NULL)
31523             err = MEMORY_E;
31524     }
31525     if (err == MP_OKAY && privm) {
31526         priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 12, heap,
31527                                   DYNAMIC_TYPE_ECC);
31528         if (priv == NULL)
31529             err = MEMORY_E;
31530     }
31531 #endif
31532 
31533     if (err == MP_OKAY) {
31534         p = pub + 1;
31535 
31536         sp_384_from_mp(pub->x, 12, pX);
31537         sp_384_from_mp(pub->y, 12, pY);
31538         sp_384_from_bin(pub->z, 12, one, (int)sizeof(one));
31539         if (privm)
31540             sp_384_from_mp(priv, 12, privm);
31541 
31542         /* Check point at infinitiy. */
31543         if ((sp_384_iszero_12(pub->x) != 0) &&
31544             (sp_384_iszero_12(pub->y) != 0)) {
31545             err = ECC_INF_E;
31546         }
31547     }
31548 
31549     /* Check range of X and Y */
31550     if ((err == MP_OKAY) &&
31551             ((sp_384_cmp_12(pub->x, p384_mod) >= 0) ||
31552              (sp_384_cmp_12(pub->y, p384_mod) >= 0))) {
31553         err = ECC_OUT_OF_RANGE_E;
31554     }
31555 
31556     if (err == MP_OKAY) {
31557         /* Check point is on curve */
31558         err = sp_384_ecc_is_point_12(pub, heap);
31559     }
31560 
31561     if (err == MP_OKAY) {
31562         /* Point * order = infinity */
31563             err = sp_384_ecc_mulmod_12(p, pub, p384_order, 1, 1, heap);
31564     }
31565     /* Check result is infinity */
31566     if ((err == MP_OKAY) && ((sp_384_iszero_12(p->x) == 0) ||
31567                              (sp_384_iszero_12(p->y) == 0))) {
31568         err = ECC_INF_E;
31569     }
31570 
31571     if (privm) {
31572         if (err == MP_OKAY) {
31573             /* Base * private = point */
31574                 err = sp_384_ecc_mulmod_base_12(p, priv, 1, 1, heap);
31575         }
31576         /* Check result is public key */
31577         if ((err == MP_OKAY) &&
31578                 ((sp_384_cmp_12(p->x, pub->x) != 0) ||
31579                  (sp_384_cmp_12(p->y, pub->y) != 0))) {
31580             err = ECC_PRIV_KEY_E;
31581         }
31582     }
31583 
31584 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31585     if (pub != NULL)
31586         XFREE(pub, heap, DYNAMIC_TYPE_ECC);
31587     if (priv != NULL)
31588         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
31589 #endif
31590 
31591     return err;
31592 }
31593 #endif
31594 #ifdef WOLFSSL_PUBLIC_ECC_ADD_DBL
31595 /* Add two projective EC points together.
31596  * (pX, pY, pZ) + (qX, qY, qZ) = (rX, rY, rZ)
31597  *
31598  * pX   First EC point's X ordinate.
31599  * pY   First EC point's Y ordinate.
31600  * pZ   First EC point's Z ordinate.
31601  * qX   Second EC point's X ordinate.
31602  * qY   Second EC point's Y ordinate.
31603  * qZ   Second EC point's Z ordinate.
31604  * rX   Resultant EC point's X ordinate.
31605  * rY   Resultant EC point's Y ordinate.
31606  * rZ   Resultant EC point's Z ordinate.
31607  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
31608  */
sp_ecc_proj_add_point_384(mp_int * pX,mp_int * pY,mp_int * pZ,mp_int * qX,mp_int * qY,mp_int * qZ,mp_int * rX,mp_int * rY,mp_int * rZ)31609 int sp_ecc_proj_add_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
31610                               mp_int* qX, mp_int* qY, mp_int* qZ,
31611                               mp_int* rX, mp_int* rY, mp_int* rZ)
31612 {
31613 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31614     sp_digit* tmp = NULL;
31615     sp_point_384* p = NULL;
31616 #else
31617     sp_digit tmp[2 * 12 * 5];
31618     sp_point_384 p[2];
31619 #endif
31620     sp_point_384* q = NULL;
31621     int err = MP_OKAY;
31622 
31623 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31624     if (err == MP_OKAY) {
31625         p = (sp_point_384*)XMALLOC(sizeof(sp_point_384) * 2, NULL,
31626                                          DYNAMIC_TYPE_ECC);
31627         if (p == NULL)
31628             err = MEMORY_E;
31629     }
31630     if (err == MP_OKAY) {
31631         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 5, NULL,
31632                                  DYNAMIC_TYPE_ECC);
31633         if (tmp == NULL) {
31634             err = MEMORY_E;
31635         }
31636     }
31637 #endif
31638 
31639     if (err == MP_OKAY) {
31640         q = p + 1;
31641 
31642         sp_384_from_mp(p->x, 12, pX);
31643         sp_384_from_mp(p->y, 12, pY);
31644         sp_384_from_mp(p->z, 12, pZ);
31645         sp_384_from_mp(q->x, 12, qX);
31646         sp_384_from_mp(q->y, 12, qY);
31647         sp_384_from_mp(q->z, 12, qZ);
31648         p->infinity = sp_384_iszero_12(p->x) &
31649                       sp_384_iszero_12(p->y);
31650         q->infinity = sp_384_iszero_12(q->x) &
31651                       sp_384_iszero_12(q->y);
31652 
31653             sp_384_proj_point_add_12(p, p, q, tmp);
31654     }
31655 
31656     if (err == MP_OKAY) {
31657         err = sp_384_to_mp(p->x, rX);
31658     }
31659     if (err == MP_OKAY) {
31660         err = sp_384_to_mp(p->y, rY);
31661     }
31662     if (err == MP_OKAY) {
31663         err = sp_384_to_mp(p->z, rZ);
31664     }
31665 
31666 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31667     if (tmp != NULL)
31668         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
31669     if (p != NULL)
31670         XFREE(p, NULL, DYNAMIC_TYPE_ECC);
31671 #endif
31672 
31673     return err;
31674 }
31675 
31676 /* Double a projective EC point.
31677  * (pX, pY, pZ) + (pX, pY, pZ) = (rX, rY, rZ)
31678  *
31679  * pX   EC point's X ordinate.
31680  * pY   EC point's Y ordinate.
31681  * pZ   EC point's Z ordinate.
31682  * rX   Resultant EC point's X ordinate.
31683  * rY   Resultant EC point's Y ordinate.
31684  * rZ   Resultant EC point's Z ordinate.
31685  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
31686  */
sp_ecc_proj_dbl_point_384(mp_int * pX,mp_int * pY,mp_int * pZ,mp_int * rX,mp_int * rY,mp_int * rZ)31687 int sp_ecc_proj_dbl_point_384(mp_int* pX, mp_int* pY, mp_int* pZ,
31688                               mp_int* rX, mp_int* rY, mp_int* rZ)
31689 {
31690 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31691     sp_digit* tmp = NULL;
31692     sp_point_384* p = NULL;
31693 #else
31694     sp_digit tmp[2 * 12 * 2];
31695     sp_point_384 p[1];
31696 #endif
31697     int err = MP_OKAY;
31698 
31699 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31700     if (err == MP_OKAY) {
31701         p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL,
31702                                          DYNAMIC_TYPE_ECC);
31703         if (p == NULL)
31704             err = MEMORY_E;
31705     }
31706     if (err == MP_OKAY) {
31707         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 2, NULL,
31708                                  DYNAMIC_TYPE_ECC);
31709         if (tmp == NULL)
31710             err = MEMORY_E;
31711     }
31712 #endif
31713 
31714     if (err == MP_OKAY) {
31715         sp_384_from_mp(p->x, 12, pX);
31716         sp_384_from_mp(p->y, 12, pY);
31717         sp_384_from_mp(p->z, 12, pZ);
31718         p->infinity = sp_384_iszero_12(p->x) &
31719                       sp_384_iszero_12(p->y);
31720 
31721             sp_384_proj_point_dbl_12(p, p, tmp);
31722     }
31723 
31724     if (err == MP_OKAY) {
31725         err = sp_384_to_mp(p->x, rX);
31726     }
31727     if (err == MP_OKAY) {
31728         err = sp_384_to_mp(p->y, rY);
31729     }
31730     if (err == MP_OKAY) {
31731         err = sp_384_to_mp(p->z, rZ);
31732     }
31733 
31734 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31735     if (tmp != NULL)
31736         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
31737     if (p != NULL)
31738         XFREE(p, NULL, DYNAMIC_TYPE_ECC);
31739 #endif
31740 
31741     return err;
31742 }
31743 
31744 /* Map a projective EC point to affine in place.
31745  * pZ will be one.
31746  *
31747  * pX   EC point's X ordinate.
31748  * pY   EC point's Y ordinate.
31749  * pZ   EC point's Z ordinate.
31750  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
31751  */
sp_ecc_map_384(mp_int * pX,mp_int * pY,mp_int * pZ)31752 int sp_ecc_map_384(mp_int* pX, mp_int* pY, mp_int* pZ)
31753 {
31754 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31755     sp_digit* tmp = NULL;
31756     sp_point_384* p = NULL;
31757 #else
31758     sp_digit tmp[2 * 12 * 6];
31759     sp_point_384 p[1];
31760 #endif
31761     int err = MP_OKAY;
31762 
31763 
31764 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31765     if (err == MP_OKAY) {
31766         p = (sp_point_384*)XMALLOC(sizeof(sp_point_384), NULL,
31767                                          DYNAMIC_TYPE_ECC);
31768         if (p == NULL)
31769             err = MEMORY_E;
31770     }
31771     if (err == MP_OKAY) {
31772         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 12 * 6, NULL,
31773                                  DYNAMIC_TYPE_ECC);
31774         if (tmp == NULL)
31775             err = MEMORY_E;
31776     }
31777 #endif
31778     if (err == MP_OKAY) {
31779         sp_384_from_mp(p->x, 12, pX);
31780         sp_384_from_mp(p->y, 12, pY);
31781         sp_384_from_mp(p->z, 12, pZ);
31782         p->infinity = sp_384_iszero_12(p->x) &
31783                       sp_384_iszero_12(p->y);
31784 
31785             sp_384_map_12(p, p, tmp);
31786     }
31787 
31788     if (err == MP_OKAY) {
31789         err = sp_384_to_mp(p->x, pX);
31790     }
31791     if (err == MP_OKAY) {
31792         err = sp_384_to_mp(p->y, pY);
31793     }
31794     if (err == MP_OKAY) {
31795         err = sp_384_to_mp(p->z, pZ);
31796     }
31797 
31798 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31799     if (tmp != NULL)
31800         XFREE(tmp, NULL, DYNAMIC_TYPE_ECC);
31801     if (p != NULL)
31802         XFREE(p, NULL, DYNAMIC_TYPE_ECC);
31803 #endif
31804 
31805     return err;
31806 }
31807 #endif /* WOLFSSL_PUBLIC_ECC_ADD_DBL */
31808 #ifdef HAVE_COMP_KEY
31809 /* Find the square root of a number mod the prime of the curve.
31810  *
31811  * y  The number to operate on and the result.
31812  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
31813  */
sp_384_mont_sqrt_12(sp_digit * y)31814 static int sp_384_mont_sqrt_12(sp_digit* y)
31815 {
31816 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31817     sp_digit* t1 = NULL;
31818 #else
31819     sp_digit t1[5 * 2 * 12];
31820 #endif
31821     sp_digit* t2 = NULL;
31822     sp_digit* t3 = NULL;
31823     sp_digit* t4 = NULL;
31824     sp_digit* t5 = NULL;
31825     int err = MP_OKAY;
31826 
31827 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31828     t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 12, NULL, DYNAMIC_TYPE_ECC);
31829     if (t1 == NULL)
31830         err = MEMORY_E;
31831 #endif
31832 
31833     if (err == MP_OKAY) {
31834         t2 = t1 + 2 * 12;
31835         t3 = t1 + 4 * 12;
31836         t4 = t1 + 6 * 12;
31837         t5 = t1 + 8 * 12;
31838 
31839         {
31840             /* t2 = y ^ 0x2 */
31841             sp_384_mont_sqr_12(t2, y, p384_mod, p384_mp_mod);
31842             /* t1 = y ^ 0x3 */
31843             sp_384_mont_mul_12(t1, t2, y, p384_mod, p384_mp_mod);
31844             /* t5 = y ^ 0xc */
31845             sp_384_mont_sqr_n_12(t5, t1, 2, p384_mod, p384_mp_mod);
31846             /* t1 = y ^ 0xf */
31847             sp_384_mont_mul_12(t1, t1, t5, p384_mod, p384_mp_mod);
31848             /* t2 = y ^ 0x1e */
31849             sp_384_mont_sqr_12(t2, t1, p384_mod, p384_mp_mod);
31850             /* t3 = y ^ 0x1f */
31851             sp_384_mont_mul_12(t3, t2, y, p384_mod, p384_mp_mod);
31852             /* t2 = y ^ 0x3e0 */
31853             sp_384_mont_sqr_n_12(t2, t3, 5, p384_mod, p384_mp_mod);
31854             /* t1 = y ^ 0x3ff */
31855             sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
31856             /* t2 = y ^ 0x7fe0 */
31857             sp_384_mont_sqr_n_12(t2, t1, 5, p384_mod, p384_mp_mod);
31858             /* t3 = y ^ 0x7fff */
31859             sp_384_mont_mul_12(t3, t3, t2, p384_mod, p384_mp_mod);
31860             /* t2 = y ^ 0x3fff800 */
31861             sp_384_mont_sqr_n_12(t2, t3, 15, p384_mod, p384_mp_mod);
31862             /* t4 = y ^ 0x3ffffff */
31863             sp_384_mont_mul_12(t4, t3, t2, p384_mod, p384_mp_mod);
31864             /* t2 = y ^ 0xffffffc000000 */
31865             sp_384_mont_sqr_n_12(t2, t4, 30, p384_mod, p384_mp_mod);
31866             /* t1 = y ^ 0xfffffffffffff */
31867             sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
31868             /* t2 = y ^ 0xfffffffffffffff000000000000000 */
31869             sp_384_mont_sqr_n_12(t2, t1, 60, p384_mod, p384_mp_mod);
31870             /* t1 = y ^ 0xffffffffffffffffffffffffffffff */
31871             sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
31872             /* t2 = y ^ 0xffffffffffffffffffffffffffffff000000000000000000000000000000 */
31873             sp_384_mont_sqr_n_12(t2, t1, 120, p384_mod, p384_mp_mod);
31874             /* t1 = y ^ 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
31875             sp_384_mont_mul_12(t1, t1, t2, p384_mod, p384_mp_mod);
31876             /* t2 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffff8000 */
31877             sp_384_mont_sqr_n_12(t2, t1, 15, p384_mod, p384_mp_mod);
31878             /* t1 = y ^ 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff */
31879             sp_384_mont_mul_12(t1, t3, t2, p384_mod, p384_mp_mod);
31880             /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000 */
31881             sp_384_mont_sqr_n_12(t2, t1, 31, p384_mod, p384_mp_mod);
31882             /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff */
31883             sp_384_mont_mul_12(t1, t4, t2, p384_mod, p384_mp_mod);
31884             /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffff0 */
31885             sp_384_mont_sqr_n_12(t2, t1, 4, p384_mod, p384_mp_mod);
31886             /* t1 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc */
31887             sp_384_mont_mul_12(t1, t5, t2, p384_mod, p384_mp_mod);
31888             /* t2 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000 */
31889             sp_384_mont_sqr_n_12(t2, t1, 62, p384_mod, p384_mp_mod);
31890             /* t1 = y ^ 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000001 */
31891             sp_384_mont_mul_12(t1, y, t2, p384_mod, p384_mp_mod);
31892             /* t2 = y ^ 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffc00000000000000040000000 */
31893             sp_384_mont_sqr_n_12(y, t1, 30, p384_mod, p384_mp_mod);
31894         }
31895     }
31896 
31897 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31898     if (t1 != NULL)
31899         XFREE(t1, NULL, DYNAMIC_TYPE_ECC);
31900 #endif
31901 
31902     return err;
31903 }
31904 
31905 
31906 /* Uncompress the point given the X ordinate.
31907  *
31908  * xm    X ordinate.
31909  * odd   Whether the Y ordinate is odd.
31910  * ym    Calculated Y ordinate.
31911  * returns MEMORY_E if dynamic memory allocation fails and MP_OKAY otherwise.
31912  */
sp_ecc_uncompress_384(mp_int * xm,int odd,mp_int * ym)31913 int sp_ecc_uncompress_384(mp_int* xm, int odd, mp_int* ym)
31914 {
31915 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31916     sp_digit* x = NULL;
31917 #else
31918     sp_digit x[4 * 12];
31919 #endif
31920     sp_digit* y = NULL;
31921     int err = MP_OKAY;
31922 
31923 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31924     x = (sp_digit*)XMALLOC(sizeof(sp_digit) * 4 * 12, NULL, DYNAMIC_TYPE_ECC);
31925     if (x == NULL)
31926         err = MEMORY_E;
31927 #endif
31928 
31929     if (err == MP_OKAY) {
31930         y = x + 2 * 12;
31931 
31932         sp_384_from_mp(x, 12, xm);
31933         err = sp_384_mod_mul_norm_12(x, x, p384_mod);
31934     }
31935     if (err == MP_OKAY) {
31936         /* y = x^3 */
31937         {
31938             sp_384_mont_sqr_12(y, x, p384_mod, p384_mp_mod);
31939             sp_384_mont_mul_12(y, y, x, p384_mod, p384_mp_mod);
31940         }
31941         /* y = x^3 - 3x */
31942         sp_384_mont_sub_12(y, y, x, p384_mod);
31943         sp_384_mont_sub_12(y, y, x, p384_mod);
31944         sp_384_mont_sub_12(y, y, x, p384_mod);
31945         /* y = x^3 - 3x + b */
31946         err = sp_384_mod_mul_norm_12(x, p384_b, p384_mod);
31947     }
31948     if (err == MP_OKAY) {
31949         sp_384_mont_add_12(y, y, x, p384_mod);
31950         /* y = sqrt(x^3 - 3x + b) */
31951         err = sp_384_mont_sqrt_12(y);
31952     }
31953     if (err == MP_OKAY) {
31954         XMEMSET(y + 12, 0, 12U * sizeof(sp_digit));
31955         sp_384_mont_reduce_12(y, p384_mod, p384_mp_mod);
31956         if ((((word32)y[0] ^ (word32)odd) & 1U) != 0U) {
31957             sp_384_mont_sub_12(y, p384_mod, y, p384_mod);
31958         }
31959 
31960         err = sp_384_to_mp(y, ym);
31961     }
31962 
31963 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
31964     if (x != NULL)
31965         XFREE(x, NULL, DYNAMIC_TYPE_ECC);
31966 #endif
31967 
31968     return err;
31969 }
31970 #endif
31971 #endif /* WOLFSSL_SP_384 */
31972 #ifdef WOLFSSL_SP_1024
31973 
31974 /* Point structure to use. */
31975 typedef struct sp_point_1024 {
31976     /* X ordinate of point. */
31977     sp_digit x[2 * 32];
31978     /* Y ordinate of point. */
31979     sp_digit y[2 * 32];
31980     /* Z ordinate of point. */
31981     sp_digit z[2 * 32];
31982     /* Indicates point is at infinity. */
31983     int infinity;
31984 } sp_point_1024;
31985 
31986 #ifndef WOLFSSL_SP_SMALL
31987 /* Multiply a and b into r. (r = a * b)
31988  *
31989  * r  A single precision integer.
31990  * a  A single precision integer.
31991  * b  A single precision integer.
31992  */
sp_1024_mul_16(sp_digit * r,const sp_digit * a,const sp_digit * b)31993 SP_NOINLINE static void sp_1024_mul_16(sp_digit* r, const sp_digit* a,
31994         const sp_digit* b)
31995 {
31996     sp_digit tmp_arr[16 * 2];
31997     sp_digit* tmp = tmp_arr;
31998     __asm__ __volatile__ (
31999         "mov	r3, #0\n\t"
32000         "mov	r4, #0\n\t"
32001         "mov	r9, r3\n\t"
32002         "mov	r12, %[r]\n\t"
32003         "mov	r10, %[a]\n\t"
32004         "mov	r11, %[b]\n\t"
32005         "mov	r6, #64\n\t"
32006         "add	r6, r6, r10\n\t"
32007         "mov	r14, r6\n\t"
32008         "\n1:\n\t"
32009         "mov	%[r], #0\n\t"
32010         "mov	r5, #0\n\t"
32011         "mov	r6, #60\n\t"
32012         "mov	%[a], r9\n\t"
32013         "subs	%[a], %[a], r6\n\t"
32014         "sbc	r6, r6, r6\n\t"
32015         "mvn	r6, r6\n\t"
32016         "and	%[a], %[a], r6\n\t"
32017         "mov	%[b], r9\n\t"
32018         "sub	%[b], %[b], %[a]\n\t"
32019         "add	%[a], %[a], r10\n\t"
32020         "add	%[b], %[b], r11\n\t"
32021         "\n2:\n\t"
32022         /* Multiply Start */
32023         "ldr	r6, [%[a]]\n\t"
32024         "ldr	r8, [%[b]]\n\t"
32025         "umull	r6, r8, r6, r8\n\t"
32026         "adds	r3, r3, r6\n\t"
32027         "adcs 	r4, r4, r8\n\t"
32028         "adc	r5, r5, %[r]\n\t"
32029         /* Multiply Done */
32030         "add	%[a], %[a], #4\n\t"
32031         "sub	%[b], %[b], #4\n\t"
32032         "cmp	%[a], r14\n\t"
32033 #ifdef __GNUC__
32034         "beq	3f\n\t"
32035 #else
32036         "beq.n	3f\n\t"
32037 #endif /* __GNUC__ */
32038         "mov	r6, r9\n\t"
32039         "add	r6, r6, r10\n\t"
32040         "cmp	%[a], r6\n\t"
32041 #ifdef __GNUC__
32042         "ble	2b\n\t"
32043 #else
32044         "ble.n	2b\n\t"
32045 #endif /* __GNUC__ */
32046         "\n3:\n\t"
32047         "mov	%[r], r12\n\t"
32048         "mov	r8, r9\n\t"
32049         "str	r3, [%[r], r8]\n\t"
32050         "mov	r3, r4\n\t"
32051         "mov	r4, r5\n\t"
32052         "add	r8, r8, #4\n\t"
32053         "mov	r9, r8\n\t"
32054         "mov	r6, #120\n\t"
32055         "cmp	r8, r6\n\t"
32056 #ifdef __GNUC__
32057         "ble	1b\n\t"
32058 #else
32059         "ble.n	1b\n\t"
32060 #endif /* __GNUC__ */
32061         "str	r3, [%[r], r8]\n\t"
32062         "mov	%[a], r10\n\t"
32063         "mov	%[b], r11\n\t"
32064         :
32065         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
32066         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
32067     );
32068 
32069     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
32070 }
32071 
32072 /* Square a and put result in r. (r = a * a)
32073  *
32074  * r  A single precision integer.
32075  * a  A single precision integer.
32076  */
sp_1024_sqr_16(sp_digit * r,const sp_digit * a)32077 SP_NOINLINE static void sp_1024_sqr_16(sp_digit* r, const sp_digit* a)
32078 {
32079     __asm__ __volatile__ (
32080         "mov	r3, #0\n\t"
32081         "mov	r4, #0\n\t"
32082         "mov	r5, #0\n\t"
32083         "mov	r9, r3\n\t"
32084         "mov	r12, %[r]\n\t"
32085         "mov	r6, #128\n\t"
32086         "neg	r6, r6\n\t"
32087         "add	sp, sp, r6\n\t"
32088         "mov	r11, sp\n\t"
32089         "mov	r10, %[a]\n\t"
32090         "\n1:\n\t"
32091         "mov	%[r], #0\n\t"
32092         "mov	r6, #60\n\t"
32093         "mov	%[a], r9\n\t"
32094         "subs	%[a], %[a], r6\n\t"
32095         "sbc	r6, r6, r6\n\t"
32096         "mvn	r6, r6\n\t"
32097         "and	%[a], %[a], r6\n\t"
32098         "mov	r2, r9\n\t"
32099         "sub	r2, r2, %[a]\n\t"
32100         "add	%[a], %[a], r10\n\t"
32101         "add	r2, r2, r10\n\t"
32102         "\n2:\n\t"
32103         "cmp	r2, %[a]\n\t"
32104 #ifdef __GNUC__
32105         "beq	4f\n\t"
32106 #else
32107         "beq.n	4f\n\t"
32108 #endif /* __GNUC__ */
32109         /* Multiply * 2: Start */
32110         "ldr	r6, [%[a]]\n\t"
32111         "ldr	r8, [r2]\n\t"
32112         "umull	r6, r8, r6, r8\n\t"
32113         "adds	r3, r3, r6\n\t"
32114         "adcs 	r4, r4, r8\n\t"
32115         "adc	r5, r5, %[r]\n\t"
32116         "adds	r3, r3, r6\n\t"
32117         "adcs 	r4, r4, r8\n\t"
32118         "adc	r5, r5, %[r]\n\t"
32119         /* Multiply * 2: Done */
32120 #ifdef __GNUC__
32121         "bal	5f\n\t"
32122 #else
32123         "bal.n	5f\n\t"
32124 #endif /* __GNUC__ */
32125         "\n4:\n\t"
32126         /* Square: Start */
32127         "ldr	r6, [%[a]]\n\t"
32128         "umull	r6, r8, r6, r6\n\t"
32129         "adds	r3, r3, r6\n\t"
32130         "adcs	r4, r4, r8\n\t"
32131         "adc	r5, r5, %[r]\n\t"
32132         /* Square: Done */
32133         "\n5:\n\t"
32134         "add	%[a], %[a], #4\n\t"
32135         "sub	r2, r2, #4\n\t"
32136         "mov	r6, #64\n\t"
32137         "add	r6, r6, r10\n\t"
32138         "cmp	%[a], r6\n\t"
32139 #ifdef __GNUC__
32140         "beq	3f\n\t"
32141 #else
32142         "beq.n	3f\n\t"
32143 #endif /* __GNUC__ */
32144         "cmp	%[a], r2\n\t"
32145 #ifdef __GNUC__
32146         "bgt	3f\n\t"
32147 #else
32148         "bgt.n	3f\n\t"
32149 #endif /* __GNUC__ */
32150         "mov	r8, r9\n\t"
32151         "add	r8, r8, r10\n\t"
32152         "cmp	%[a], r8\n\t"
32153 #ifdef __GNUC__
32154         "ble	2b\n\t"
32155 #else
32156         "ble.n	2b\n\t"
32157 #endif /* __GNUC__ */
32158         "\n3:\n\t"
32159         "mov	%[r], r11\n\t"
32160         "mov	r8, r9\n\t"
32161         "str	r3, [%[r], r8]\n\t"
32162         "mov	r3, r4\n\t"
32163         "mov	r4, r5\n\t"
32164         "mov	r5, #0\n\t"
32165         "add	r8, r8, #4\n\t"
32166         "mov	r9, r8\n\t"
32167         "mov	r6, #120\n\t"
32168         "cmp	r8, r6\n\t"
32169 #ifdef __GNUC__
32170         "ble	1b\n\t"
32171 #else
32172         "ble.n	1b\n\t"
32173 #endif /* __GNUC__ */
32174         "mov	%[a], r10\n\t"
32175         "str	r3, [%[r], r8]\n\t"
32176         "mov	%[r], r12\n\t"
32177         "mov	%[a], r11\n\t"
32178         "mov	r3, #124\n\t"
32179         "\n4:\n\t"
32180         "ldr	r6, [%[a], r3]\n\t"
32181         "str	r6, [%[r], r3]\n\t"
32182         "subs	r3, r3, #4\n\t"
32183 #ifdef __GNUC__
32184         "bge	4b\n\t"
32185 #else
32186         "bge.n	4b\n\t"
32187 #endif /* __GNUC__ */
32188         "mov	r6, #128\n\t"
32189         "add	sp, sp, r6\n\t"
32190         :
32191         : [r] "r" (r), [a] "r" (a)
32192         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
32193     );
32194 }
32195 
32196 /* Add b to a into r. (r = a + b)
32197  *
32198  * r  A single precision integer.
32199  * a  A single precision integer.
32200  * b  A single precision integer.
32201  */
sp_1024_add_16(sp_digit * r,const sp_digit * a,const sp_digit * b)32202 SP_NOINLINE static sp_digit sp_1024_add_16(sp_digit* r, const sp_digit* a,
32203         const sp_digit* b)
32204 {
32205     sp_digit c = 0;
32206 
32207     __asm__ __volatile__ (
32208         "ldm	%[a]!, {r4, r5}\n\t"
32209         "ldm	%[b]!, {r6, r8}\n\t"
32210         "adds	r4, r4, r6\n\t"
32211         "adcs	r5, r5, r8\n\t"
32212         "stm	%[r]!, {r4, r5}\n\t"
32213         "ldm	%[a]!, {r4, r5}\n\t"
32214         "ldm	%[b]!, {r6, r8}\n\t"
32215         "adcs	r4, r4, r6\n\t"
32216         "adcs	r5, r5, r8\n\t"
32217         "stm	%[r]!, {r4, r5}\n\t"
32218         "ldm	%[a]!, {r4, r5}\n\t"
32219         "ldm	%[b]!, {r6, r8}\n\t"
32220         "adcs	r4, r4, r6\n\t"
32221         "adcs	r5, r5, r8\n\t"
32222         "stm	%[r]!, {r4, r5}\n\t"
32223         "ldm	%[a]!, {r4, r5}\n\t"
32224         "ldm	%[b]!, {r6, r8}\n\t"
32225         "adcs	r4, r4, r6\n\t"
32226         "adcs	r5, r5, r8\n\t"
32227         "stm	%[r]!, {r4, r5}\n\t"
32228         "ldm	%[a]!, {r4, r5}\n\t"
32229         "ldm	%[b]!, {r6, r8}\n\t"
32230         "adcs	r4, r4, r6\n\t"
32231         "adcs	r5, r5, r8\n\t"
32232         "stm	%[r]!, {r4, r5}\n\t"
32233         "ldm	%[a]!, {r4, r5}\n\t"
32234         "ldm	%[b]!, {r6, r8}\n\t"
32235         "adcs	r4, r4, r6\n\t"
32236         "adcs	r5, r5, r8\n\t"
32237         "stm	%[r]!, {r4, r5}\n\t"
32238         "ldm	%[a]!, {r4, r5}\n\t"
32239         "ldm	%[b]!, {r6, r8}\n\t"
32240         "adcs	r4, r4, r6\n\t"
32241         "adcs	r5, r5, r8\n\t"
32242         "stm	%[r]!, {r4, r5}\n\t"
32243         "ldm	%[a]!, {r4, r5}\n\t"
32244         "ldm	%[b]!, {r6, r8}\n\t"
32245         "adcs	r4, r4, r6\n\t"
32246         "adcs	r5, r5, r8\n\t"
32247         "stm	%[r]!, {r4, r5}\n\t"
32248         "mov	%[c], #0\n\t"
32249         "adc	%[c], %[c], %[c]\n\t"
32250         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
32251         :
32252         : "memory", "r4", "r5", "r6", "r8"
32253     );
32254 
32255     return c;
32256 }
32257 
32258 /* Sub b from a into r. (r = a - b)
32259  *
32260  * r  A single precision integer.
32261  * a  A single precision integer.
32262  * b  A single precision integer.
32263  */
sp_1024_sub_in_place_32(sp_digit * a,const sp_digit * b)32264 SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a,
32265         const sp_digit* b)
32266 {
32267     sp_digit c = 0;
32268 
32269     __asm__ __volatile__ (
32270         "ldm	%[a], {r3, r4}\n\t"
32271         "ldm	%[b]!, {r5, r6}\n\t"
32272         "subs	r3, r3, r5\n\t"
32273         "sbcs	r4, r4, r6\n\t"
32274         "stm	%[a]!, {r3, r4}\n\t"
32275         "ldm	%[a], {r3, r4}\n\t"
32276         "ldm	%[b]!, {r5, r6}\n\t"
32277         "sbcs	r3, r3, r5\n\t"
32278         "sbcs	r4, r4, r6\n\t"
32279         "stm	%[a]!, {r3, r4}\n\t"
32280         "ldm	%[a], {r3, r4}\n\t"
32281         "ldm	%[b]!, {r5, r6}\n\t"
32282         "sbcs	r3, r3, r5\n\t"
32283         "sbcs	r4, r4, r6\n\t"
32284         "stm	%[a]!, {r3, r4}\n\t"
32285         "ldm	%[a], {r3, r4}\n\t"
32286         "ldm	%[b]!, {r5, r6}\n\t"
32287         "sbcs	r3, r3, r5\n\t"
32288         "sbcs	r4, r4, r6\n\t"
32289         "stm	%[a]!, {r3, r4}\n\t"
32290         "ldm	%[a], {r3, r4}\n\t"
32291         "ldm	%[b]!, {r5, r6}\n\t"
32292         "sbcs	r3, r3, r5\n\t"
32293         "sbcs	r4, r4, r6\n\t"
32294         "stm	%[a]!, {r3, r4}\n\t"
32295         "ldm	%[a], {r3, r4}\n\t"
32296         "ldm	%[b]!, {r5, r6}\n\t"
32297         "sbcs	r3, r3, r5\n\t"
32298         "sbcs	r4, r4, r6\n\t"
32299         "stm	%[a]!, {r3, r4}\n\t"
32300         "ldm	%[a], {r3, r4}\n\t"
32301         "ldm	%[b]!, {r5, r6}\n\t"
32302         "sbcs	r3, r3, r5\n\t"
32303         "sbcs	r4, r4, r6\n\t"
32304         "stm	%[a]!, {r3, r4}\n\t"
32305         "ldm	%[a], {r3, r4}\n\t"
32306         "ldm	%[b]!, {r5, r6}\n\t"
32307         "sbcs	r3, r3, r5\n\t"
32308         "sbcs	r4, r4, r6\n\t"
32309         "stm	%[a]!, {r3, r4}\n\t"
32310         "ldm	%[a], {r3, r4}\n\t"
32311         "ldm	%[b]!, {r5, r6}\n\t"
32312         "sbcs	r3, r3, r5\n\t"
32313         "sbcs	r4, r4, r6\n\t"
32314         "stm	%[a]!, {r3, r4}\n\t"
32315         "ldm	%[a], {r3, r4}\n\t"
32316         "ldm	%[b]!, {r5, r6}\n\t"
32317         "sbcs	r3, r3, r5\n\t"
32318         "sbcs	r4, r4, r6\n\t"
32319         "stm	%[a]!, {r3, r4}\n\t"
32320         "ldm	%[a], {r3, r4}\n\t"
32321         "ldm	%[b]!, {r5, r6}\n\t"
32322         "sbcs	r3, r3, r5\n\t"
32323         "sbcs	r4, r4, r6\n\t"
32324         "stm	%[a]!, {r3, r4}\n\t"
32325         "ldm	%[a], {r3, r4}\n\t"
32326         "ldm	%[b]!, {r5, r6}\n\t"
32327         "sbcs	r3, r3, r5\n\t"
32328         "sbcs	r4, r4, r6\n\t"
32329         "stm	%[a]!, {r3, r4}\n\t"
32330         "ldm	%[a], {r3, r4}\n\t"
32331         "ldm	%[b]!, {r5, r6}\n\t"
32332         "sbcs	r3, r3, r5\n\t"
32333         "sbcs	r4, r4, r6\n\t"
32334         "stm	%[a]!, {r3, r4}\n\t"
32335         "ldm	%[a], {r3, r4}\n\t"
32336         "ldm	%[b]!, {r5, r6}\n\t"
32337         "sbcs	r3, r3, r5\n\t"
32338         "sbcs	r4, r4, r6\n\t"
32339         "stm	%[a]!, {r3, r4}\n\t"
32340         "ldm	%[a], {r3, r4}\n\t"
32341         "ldm	%[b]!, {r5, r6}\n\t"
32342         "sbcs	r3, r3, r5\n\t"
32343         "sbcs	r4, r4, r6\n\t"
32344         "stm	%[a]!, {r3, r4}\n\t"
32345         "ldm	%[a], {r3, r4}\n\t"
32346         "ldm	%[b]!, {r5, r6}\n\t"
32347         "sbcs	r3, r3, r5\n\t"
32348         "sbcs	r4, r4, r6\n\t"
32349         "stm	%[a]!, {r3, r4}\n\t"
32350         "sbc	%[c], %[c], %[c]\n\t"
32351         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
32352         :
32353         : "memory", "r3", "r4", "r5", "r6"
32354     );
32355 
32356     return c;
32357 }
32358 
32359 /* Add b to a into r. (r = a + b)
32360  *
32361  * r  A single precision integer.
32362  * a  A single precision integer.
32363  * b  A single precision integer.
32364  */
sp_1024_add_32(sp_digit * r,const sp_digit * a,const sp_digit * b)32365 SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a,
32366         const sp_digit* b)
32367 {
32368     sp_digit c = 0;
32369 
32370     __asm__ __volatile__ (
32371         "ldm	%[a]!, {r4, r5}\n\t"
32372         "ldm	%[b]!, {r6, r8}\n\t"
32373         "adds	r4, r4, r6\n\t"
32374         "adcs	r5, r5, r8\n\t"
32375         "stm	%[r]!, {r4, r5}\n\t"
32376         "ldm	%[a]!, {r4, r5}\n\t"
32377         "ldm	%[b]!, {r6, r8}\n\t"
32378         "adcs	r4, r4, r6\n\t"
32379         "adcs	r5, r5, r8\n\t"
32380         "stm	%[r]!, {r4, r5}\n\t"
32381         "ldm	%[a]!, {r4, r5}\n\t"
32382         "ldm	%[b]!, {r6, r8}\n\t"
32383         "adcs	r4, r4, r6\n\t"
32384         "adcs	r5, r5, r8\n\t"
32385         "stm	%[r]!, {r4, r5}\n\t"
32386         "ldm	%[a]!, {r4, r5}\n\t"
32387         "ldm	%[b]!, {r6, r8}\n\t"
32388         "adcs	r4, r4, r6\n\t"
32389         "adcs	r5, r5, r8\n\t"
32390         "stm	%[r]!, {r4, r5}\n\t"
32391         "ldm	%[a]!, {r4, r5}\n\t"
32392         "ldm	%[b]!, {r6, r8}\n\t"
32393         "adcs	r4, r4, r6\n\t"
32394         "adcs	r5, r5, r8\n\t"
32395         "stm	%[r]!, {r4, r5}\n\t"
32396         "ldm	%[a]!, {r4, r5}\n\t"
32397         "ldm	%[b]!, {r6, r8}\n\t"
32398         "adcs	r4, r4, r6\n\t"
32399         "adcs	r5, r5, r8\n\t"
32400         "stm	%[r]!, {r4, r5}\n\t"
32401         "ldm	%[a]!, {r4, r5}\n\t"
32402         "ldm	%[b]!, {r6, r8}\n\t"
32403         "adcs	r4, r4, r6\n\t"
32404         "adcs	r5, r5, r8\n\t"
32405         "stm	%[r]!, {r4, r5}\n\t"
32406         "ldm	%[a]!, {r4, r5}\n\t"
32407         "ldm	%[b]!, {r6, r8}\n\t"
32408         "adcs	r4, r4, r6\n\t"
32409         "adcs	r5, r5, r8\n\t"
32410         "stm	%[r]!, {r4, r5}\n\t"
32411         "ldm	%[a]!, {r4, r5}\n\t"
32412         "ldm	%[b]!, {r6, r8}\n\t"
32413         "adcs	r4, r4, r6\n\t"
32414         "adcs	r5, r5, r8\n\t"
32415         "stm	%[r]!, {r4, r5}\n\t"
32416         "ldm	%[a]!, {r4, r5}\n\t"
32417         "ldm	%[b]!, {r6, r8}\n\t"
32418         "adcs	r4, r4, r6\n\t"
32419         "adcs	r5, r5, r8\n\t"
32420         "stm	%[r]!, {r4, r5}\n\t"
32421         "ldm	%[a]!, {r4, r5}\n\t"
32422         "ldm	%[b]!, {r6, r8}\n\t"
32423         "adcs	r4, r4, r6\n\t"
32424         "adcs	r5, r5, r8\n\t"
32425         "stm	%[r]!, {r4, r5}\n\t"
32426         "ldm	%[a]!, {r4, r5}\n\t"
32427         "ldm	%[b]!, {r6, r8}\n\t"
32428         "adcs	r4, r4, r6\n\t"
32429         "adcs	r5, r5, r8\n\t"
32430         "stm	%[r]!, {r4, r5}\n\t"
32431         "ldm	%[a]!, {r4, r5}\n\t"
32432         "ldm	%[b]!, {r6, r8}\n\t"
32433         "adcs	r4, r4, r6\n\t"
32434         "adcs	r5, r5, r8\n\t"
32435         "stm	%[r]!, {r4, r5}\n\t"
32436         "ldm	%[a]!, {r4, r5}\n\t"
32437         "ldm	%[b]!, {r6, r8}\n\t"
32438         "adcs	r4, r4, r6\n\t"
32439         "adcs	r5, r5, r8\n\t"
32440         "stm	%[r]!, {r4, r5}\n\t"
32441         "ldm	%[a]!, {r4, r5}\n\t"
32442         "ldm	%[b]!, {r6, r8}\n\t"
32443         "adcs	r4, r4, r6\n\t"
32444         "adcs	r5, r5, r8\n\t"
32445         "stm	%[r]!, {r4, r5}\n\t"
32446         "ldm	%[a]!, {r4, r5}\n\t"
32447         "ldm	%[b]!, {r6, r8}\n\t"
32448         "adcs	r4, r4, r6\n\t"
32449         "adcs	r5, r5, r8\n\t"
32450         "stm	%[r]!, {r4, r5}\n\t"
32451         "mov	%[c], #0\n\t"
32452         "adc	%[c], %[c], %[c]\n\t"
32453         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
32454         :
32455         : "memory", "r4", "r5", "r6", "r8"
32456     );
32457 
32458     return c;
32459 }
32460 
32461 /* AND m into each word of a and store in r.
32462  *
32463  * r  A single precision integer.
32464  * a  A single precision integer.
32465  * m  Mask to AND against each digit.
32466  */
sp_1024_mask_16(sp_digit * r,const sp_digit * a,sp_digit m)32467 static void sp_1024_mask_16(sp_digit* r, const sp_digit* a, sp_digit m)
32468 {
32469 #ifdef WOLFSSL_SP_SMALL
32470     int i;
32471 
32472     for (i=0; i<16; i++) {
32473         r[i] = a[i] & m;
32474     }
32475 #else
32476     int i;
32477 
32478     for (i = 0; i < 16; i += 8) {
32479         r[i+0] = a[i+0] & m;
32480         r[i+1] = a[i+1] & m;
32481         r[i+2] = a[i+2] & m;
32482         r[i+3] = a[i+3] & m;
32483         r[i+4] = a[i+4] & m;
32484         r[i+5] = a[i+5] & m;
32485         r[i+6] = a[i+6] & m;
32486         r[i+7] = a[i+7] & m;
32487     }
32488 #endif
32489 }
32490 
32491 /* Multiply a and b into r. (r = a * b)
32492  *
32493  * r  A single precision integer.
32494  * a  A single precision integer.
32495  * b  A single precision integer.
32496  */
sp_1024_mul_32(sp_digit * r,const sp_digit * a,const sp_digit * b)32497 SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a,
32498         const sp_digit* b)
32499 {
32500     sp_digit* z0 = r;
32501     sp_digit z1[32];
32502     sp_digit a1[16];
32503     sp_digit b1[16];
32504     sp_digit z2[32];
32505     sp_digit u;
32506     sp_digit ca;
32507     sp_digit cb;
32508 
32509     ca = sp_1024_add_16(a1, a, &a[16]);
32510     cb = sp_1024_add_16(b1, b, &b[16]);
32511     u  = ca & cb;
32512     sp_1024_mul_16(z1, a1, b1);
32513     sp_1024_mul_16(z2, &a[16], &b[16]);
32514     sp_1024_mul_16(z0, a, b);
32515     sp_1024_mask_16(r + 32, a1, 0 - cb);
32516     sp_1024_mask_16(b1, b1, 0 - ca);
32517     u += sp_1024_add_16(r + 32, r + 32, b1);
32518     u += sp_1024_sub_in_place_32(z1, z2);
32519     u += sp_1024_sub_in_place_32(z1, z0);
32520     u += sp_1024_add_32(r + 16, r + 16, z1);
32521     r[48] = u;
32522     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
32523     (void)sp_1024_add_32(r + 32, r + 32, z2);
32524 }
32525 
32526 /* Square a and put result in r. (r = a * a)
32527  *
32528  * r  A single precision integer.
32529  * a  A single precision integer.
32530  */
sp_1024_sqr_32(sp_digit * r,const sp_digit * a)32531 SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a)
32532 {
32533     sp_digit* z0 = r;
32534     sp_digit z2[32];
32535     sp_digit z1[32];
32536     sp_digit a1[16];
32537     sp_digit u;
32538 
32539     u = sp_1024_add_16(a1, a, &a[16]);
32540     sp_1024_sqr_16(z1, a1);
32541     sp_1024_sqr_16(z2, &a[16]);
32542     sp_1024_sqr_16(z0, a);
32543     sp_1024_mask_16(r + 32, a1, 0 - u);
32544     u += sp_1024_add_16(r + 32, r + 32, r + 32);
32545     u += sp_1024_sub_in_place_32(z1, z2);
32546     u += sp_1024_sub_in_place_32(z1, z0);
32547     u += sp_1024_add_32(r + 16, r + 16, z1);
32548     r[48] = u;
32549     XMEMSET(r + 48 + 1, 0, sizeof(sp_digit) * (16 - 1));
32550     (void)sp_1024_add_32(r + 32, r + 32, z2);
32551 }
32552 
32553 #else
32554 /* Multiply a and b into r. (r = a * b)
32555  *
32556  * r  A single precision integer.
32557  * a  A single precision integer.
32558  * b  A single precision integer.
32559  */
sp_1024_mul_32(sp_digit * r,const sp_digit * a,const sp_digit * b)32560 SP_NOINLINE static void sp_1024_mul_32(sp_digit* r, const sp_digit* a,
32561         const sp_digit* b)
32562 {
32563     sp_digit tmp_arr[32 * 2];
32564     sp_digit* tmp = tmp_arr;
32565     __asm__ __volatile__ (
32566         "mov	r3, #0\n\t"
32567         "mov	r4, #0\n\t"
32568         "mov	r9, r3\n\t"
32569         "mov	r12, %[r]\n\t"
32570         "mov	r10, %[a]\n\t"
32571         "mov	r11, %[b]\n\t"
32572         "mov	r6, #128\n\t"
32573         "add	r6, r6, r10\n\t"
32574         "mov	r14, r6\n\t"
32575         "\n1:\n\t"
32576         "mov	%[r], #0\n\t"
32577         "mov	r5, #0\n\t"
32578         "mov	r6, #124\n\t"
32579         "mov	%[a], r9\n\t"
32580         "subs	%[a], %[a], r6\n\t"
32581         "sbc	r6, r6, r6\n\t"
32582         "mvn	r6, r6\n\t"
32583         "and	%[a], %[a], r6\n\t"
32584         "mov	%[b], r9\n\t"
32585         "sub	%[b], %[b], %[a]\n\t"
32586         "add	%[a], %[a], r10\n\t"
32587         "add	%[b], %[b], r11\n\t"
32588         "\n2:\n\t"
32589         /* Multiply Start */
32590         "ldr	r6, [%[a]]\n\t"
32591         "ldr	r8, [%[b]]\n\t"
32592         "umull	r6, r8, r6, r8\n\t"
32593         "adds	r3, r3, r6\n\t"
32594         "adcs 	r4, r4, r8\n\t"
32595         "adc	r5, r5, %[r]\n\t"
32596         /* Multiply Done */
32597         "add	%[a], %[a], #4\n\t"
32598         "sub	%[b], %[b], #4\n\t"
32599         "cmp	%[a], r14\n\t"
32600 #ifdef __GNUC__
32601         "beq	3f\n\t"
32602 #else
32603         "beq.n	3f\n\t"
32604 #endif /* __GNUC__ */
32605         "mov	r6, r9\n\t"
32606         "add	r6, r6, r10\n\t"
32607         "cmp	%[a], r6\n\t"
32608 #ifdef __GNUC__
32609         "ble	2b\n\t"
32610 #else
32611         "ble.n	2b\n\t"
32612 #endif /* __GNUC__ */
32613         "\n3:\n\t"
32614         "mov	%[r], r12\n\t"
32615         "mov	r8, r9\n\t"
32616         "str	r3, [%[r], r8]\n\t"
32617         "mov	r3, r4\n\t"
32618         "mov	r4, r5\n\t"
32619         "add	r8, r8, #4\n\t"
32620         "mov	r9, r8\n\t"
32621         "mov	r6, #248\n\t"
32622         "cmp	r8, r6\n\t"
32623 #ifdef __GNUC__
32624         "ble	1b\n\t"
32625 #else
32626         "ble.n	1b\n\t"
32627 #endif /* __GNUC__ */
32628         "str	r3, [%[r], r8]\n\t"
32629         "mov	%[a], r10\n\t"
32630         "mov	%[b], r11\n\t"
32631         :
32632         : [r] "r" (tmp), [a] "r" (a), [b] "r" (b)
32633         : "memory", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
32634     );
32635 
32636     XMEMCPY(r, tmp_arr, sizeof(tmp_arr));
32637 }
32638 
32639 /* Square a and put result in r. (r = a * a)
32640  *
32641  * r  A single precision integer.
32642  * a  A single precision integer.
32643  */
sp_1024_sqr_32(sp_digit * r,const sp_digit * a)32644 SP_NOINLINE static void sp_1024_sqr_32(sp_digit* r, const sp_digit* a)
32645 {
32646     __asm__ __volatile__ (
32647         "mov	r3, #0\n\t"
32648         "mov	r4, #0\n\t"
32649         "mov	r5, #0\n\t"
32650         "mov	r9, r3\n\t"
32651         "mov	r12, %[r]\n\t"
32652         "mov	r6, #1\n\t"
32653         "lsl	r6, r6, #8\n\t"
32654         "neg	r6, r6\n\t"
32655         "add	sp, sp, r6\n\t"
32656         "mov	r11, sp\n\t"
32657         "mov	r10, %[a]\n\t"
32658         "\n1:\n\t"
32659         "mov	%[r], #0\n\t"
32660         "mov	r6, #124\n\t"
32661         "mov	%[a], r9\n\t"
32662         "subs	%[a], %[a], r6\n\t"
32663         "sbc	r6, r6, r6\n\t"
32664         "mvn	r6, r6\n\t"
32665         "and	%[a], %[a], r6\n\t"
32666         "mov	r2, r9\n\t"
32667         "sub	r2, r2, %[a]\n\t"
32668         "add	%[a], %[a], r10\n\t"
32669         "add	r2, r2, r10\n\t"
32670         "\n2:\n\t"
32671         "cmp	r2, %[a]\n\t"
32672 #ifdef __GNUC__
32673         "beq	4f\n\t"
32674 #else
32675         "beq.n	4f\n\t"
32676 #endif /* __GNUC__ */
32677         /* Multiply * 2: Start */
32678         "ldr	r6, [%[a]]\n\t"
32679         "ldr	r8, [r2]\n\t"
32680         "umull	r6, r8, r6, r8\n\t"
32681         "adds	r3, r3, r6\n\t"
32682         "adcs 	r4, r4, r8\n\t"
32683         "adc	r5, r5, %[r]\n\t"
32684         "adds	r3, r3, r6\n\t"
32685         "adcs 	r4, r4, r8\n\t"
32686         "adc	r5, r5, %[r]\n\t"
32687         /* Multiply * 2: Done */
32688 #ifdef __GNUC__
32689         "bal	5f\n\t"
32690 #else
32691         "bal.n	5f\n\t"
32692 #endif /* __GNUC__ */
32693         "\n4:\n\t"
32694         /* Square: Start */
32695         "ldr	r6, [%[a]]\n\t"
32696         "umull	r6, r8, r6, r6\n\t"
32697         "adds	r3, r3, r6\n\t"
32698         "adcs	r4, r4, r8\n\t"
32699         "adc	r5, r5, %[r]\n\t"
32700         /* Square: Done */
32701         "\n5:\n\t"
32702         "add	%[a], %[a], #4\n\t"
32703         "sub	r2, r2, #4\n\t"
32704         "mov	r6, #128\n\t"
32705         "add	r6, r6, r10\n\t"
32706         "cmp	%[a], r6\n\t"
32707 #ifdef __GNUC__
32708         "beq	3f\n\t"
32709 #else
32710         "beq.n	3f\n\t"
32711 #endif /* __GNUC__ */
32712         "cmp	%[a], r2\n\t"
32713 #ifdef __GNUC__
32714         "bgt	3f\n\t"
32715 #else
32716         "bgt.n	3f\n\t"
32717 #endif /* __GNUC__ */
32718         "mov	r8, r9\n\t"
32719         "add	r8, r8, r10\n\t"
32720         "cmp	%[a], r8\n\t"
32721 #ifdef __GNUC__
32722         "ble	2b\n\t"
32723 #else
32724         "ble.n	2b\n\t"
32725 #endif /* __GNUC__ */
32726         "\n3:\n\t"
32727         "mov	%[r], r11\n\t"
32728         "mov	r8, r9\n\t"
32729         "str	r3, [%[r], r8]\n\t"
32730         "mov	r3, r4\n\t"
32731         "mov	r4, r5\n\t"
32732         "mov	r5, #0\n\t"
32733         "add	r8, r8, #4\n\t"
32734         "mov	r9, r8\n\t"
32735         "mov	r6, #248\n\t"
32736         "cmp	r8, r6\n\t"
32737 #ifdef __GNUC__
32738         "ble	1b\n\t"
32739 #else
32740         "ble.n	1b\n\t"
32741 #endif /* __GNUC__ */
32742         "mov	%[a], r10\n\t"
32743         "str	r3, [%[r], r8]\n\t"
32744         "mov	%[r], r12\n\t"
32745         "mov	%[a], r11\n\t"
32746         "mov	r3, #252\n\t"
32747         "\n4:\n\t"
32748         "ldr	r6, [%[a], r3]\n\t"
32749         "str	r6, [%[r], r3]\n\t"
32750         "subs	r3, r3, #4\n\t"
32751 #ifdef __GNUC__
32752         "bge	4b\n\t"
32753 #else
32754         "bge.n	4b\n\t"
32755 #endif /* __GNUC__ */
32756         "mov	r6, #1\n\t"
32757         "lsl	r6, r6, #8\n\t"
32758         "add	sp, sp, r6\n\t"
32759         :
32760         : [r] "r" (r), [a] "r" (a)
32761         : "memory", "r2", "r3", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12"
32762     );
32763 }
32764 
32765 #endif /* !WOLFSSL_SP_SMALL */
32766 /* The modulus (prime) of the curve P1024. */
32767 static const sp_digit p1024_mod[32] = {
32768     0xfea85feb,0x666d807a,0xac7ace87,0x80c5df10,0x89857db0,0xfce3e823,
32769     0x56971f1f,0x9f94d6af,0x1c3c09aa,0xa7cf3c52,0x31852a82,0xb6aff4a8,
32770     0x65681ce1,0x512ac5cd,0x326b4cd4,0xe26c6487,0xa666a6d0,0x356d27f4,
32771     0xf7c88a19,0xe791b39f,0x31a59cb0,0x228730d5,0xe2fc0f1b,0xf40aab27,
32772     0xb3e01a2e,0xbe9ae358,0x9cb48261,0x416c0ce1,0xdad0657a,0x65c61198,
32773     0x0a563fda,0x997abb1f
32774 };
32775 /* The Montgomery normalizer for modulus of the curve P1024. */
32776 static const sp_digit p1024_norm_mod[32] = {
32777     0x0157a015,0x99927f85,0x53853178,0x7f3a20ef,0x767a824f,0x031c17dc,
32778     0xa968e0e0,0x606b2950,0xe3c3f655,0x5830c3ad,0xce7ad57d,0x49500b57,
32779     0x9a97e31e,0xaed53a32,0xcd94b32b,0x1d939b78,0x5999592f,0xca92d80b,
32780     0x083775e6,0x186e4c60,0xce5a634f,0xdd78cf2a,0x1d03f0e4,0x0bf554d8,
32781     0x4c1fe5d1,0x41651ca7,0x634b7d9e,0xbe93f31e,0x252f9a85,0x9a39ee67,
32782     0xf5a9c025,0x668544e0
32783 };
32784 /* The Montgomery multiplier for modulus of the curve P1024. */
32785 static sp_digit p1024_mp_mod = 0x7c8f2f3d;
32786 #if defined(WOLFSSL_SP_SMALL) || defined(HAVE_ECC_CHECK_KEY)
32787 /* The order of the curve P1024. */
32788 static const sp_digit p1024_order[32] = {
32789     0xbfaa17fb,0xd99b601e,0x2b1eb3a1,0x203177c4,0xe2615f6c,0xff38fa08,
32790     0xd5a5c7c7,0xa7e535ab,0x870f026a,0xa9f3cf14,0x0c614aa0,0x6dabfd2a,
32791     0x595a0738,0x144ab173,0xcc9ad335,0x389b1921,0x2999a9b4,0x4d5b49fd,
32792     0xfdf22286,0x39e46ce7,0x4c69672c,0xc8a1cc35,0xf8bf03c6,0xbd02aac9,
32793     0x2cf8068b,0x6fa6b8d6,0x672d2098,0x905b0338,0x36b4195e,0x99718466,
32794     0xc2958ff6,0x265eaec7
32795 };
32796 #endif
32797 /* The base point of curve P1024. */
32798 static const sp_point_1024 p1024_base = {
32799     /* X ordinate */
32800     {
32801         0xeae63895,0x880dc8ab,0x967e0979,0x80ec46c4,0xb63f73ec,0xee9163a5,
32802         0x80728d87,0xd5cfb4cc,0xba66910d,0xa7c1514d,0x7a60de74,0xa702c339,
32803         0x8b72f2e1,0x337c8654,0x5dd5bccb,0x9760af76,0x406ce890,0x718bd9e7,
32804         0xdb9dfa55,0x43d5f22c,0x30b09e10,0xab10db90,0xf6ce2308,0xb5edb6c0,
32805         0xb6ff7cbf,0x98b2f204,0x0aec69c6,0x2b1a2fd6,0x3ed9b52a,0x0a799005,
32806         0x332c29ad,0x53fc09ee,
32807         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32808         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32809         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32810         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32811         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32812         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32813         (sp_digit)0, (sp_digit)0
32814     },
32815     /* Y ordinate */
32816     {
32817         0x1bef16d7,0x75573fd7,0x6a67dcde,0xadb9b570,0xd5bb4636,0x80bdad5a,
32818         0xe9cb99a9,0x13515ad7,0xc5a4d5f2,0x492d979f,0x164aa989,0xac6f1e80,
32819         0xb7652fe0,0xcad696b5,0xad547c6c,0x70dae117,0xa9e032b9,0x416cff0c,
32820         0x9a140b2e,0x6b598ccf,0xf0de55f6,0xe7f7f5e5,0x654ec2b9,0xf5ea69f4,
32821         0x1e141178,0x3d778d82,0x02990696,0xd3e82016,0x3634a135,0xf9f1f053,
32822         0x3f6009f1,0x0a824906,
32823         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32824         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32825         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32826         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32827         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32828         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32829         (sp_digit)0, (sp_digit)0
32830     },
32831     /* Z ordinate */
32832     {
32833         0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
32834         0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
32835         0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
32836         0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
32837         0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
32838         0x00000000,0x00000000,
32839         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32840         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32841         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32842         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32843         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32844         (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0, (sp_digit)0,
32845         (sp_digit)0, (sp_digit)0
32846     },
32847     /* infinity */
32848     0
32849 };
32850 
32851 #ifdef WOLFSSL_SP_SMALL
32852 /* Sub b from a into a. (a -= b)
32853  *
32854  * a  A single precision integer.
32855  * b  A single precision integer.
32856  */
sp_1024_sub_in_place_32(sp_digit * a,const sp_digit * b)32857 SP_NOINLINE static sp_digit sp_1024_sub_in_place_32(sp_digit* a,
32858         const sp_digit* b)
32859 {
32860     sp_digit c = 0;
32861     __asm__ __volatile__ (
32862         "mov	r8, %[a]\n\t"
32863         "add	r8, r8, #128\n\t"
32864         "\n1:\n\t"
32865         "mov	r5, #0\n\t"
32866         "subs	r5, r5, %[c]\n\t"
32867         "ldr	r3, [%[a]]\n\t"
32868         "ldr	r4, [%[a], #4]\n\t"
32869         "ldr	r5, [%[b]]\n\t"
32870         "ldr	r6, [%[b], #4]\n\t"
32871         "sbcs	r3, r3, r5\n\t"
32872         "sbcs	r4, r4, r6\n\t"
32873         "str	r3, [%[a]]\n\t"
32874         "str	r4, [%[a], #4]\n\t"
32875         "sbc	%[c], %[c], %[c]\n\t"
32876         "add	%[a], %[a], #8\n\t"
32877         "add	%[b], %[b], #8\n\t"
32878         "cmp	%[a], r8\n\t"
32879 #ifdef __GNUC__
32880         "bne	1b\n\t"
32881 #else
32882         "bne.n	1b\n\t"
32883 #endif /* __GNUC__ */
32884         : [c] "+r" (c), [a] "+r" (a), [b] "+r" (b)
32885         :
32886         : "memory", "r3", "r4", "r5", "r6", "r8"
32887     );
32888 
32889     return c;
32890 }
32891 
32892 #endif /* WOLFSSL_SP_SMALL */
32893 /* Conditionally subtract b from a using the mask m.
32894  * m is -1 to subtract and 0 when not copying.
32895  *
32896  * r  A single precision number representing condition subtract result.
32897  * a  A single precision number to subtract from.
32898  * b  A single precision number to subtract.
32899  * m  Mask value to apply.
32900  */
sp_1024_cond_sub_32(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)32901 SP_NOINLINE static sp_digit sp_1024_cond_sub_32(sp_digit* r, const sp_digit* a,
32902         const sp_digit* b, sp_digit m)
32903 {
32904     sp_digit c = 0;
32905 
32906     __asm__ __volatile__ (
32907         "mov	r5, #128\n\t"
32908         "mov	r9, r5\n\t"
32909         "mov	r8, #0\n\t"
32910         "\n1:\n\t"
32911         "ldr	r6, [%[b], r8]\n\t"
32912         "and	r6, r6, %[m]\n\t"
32913         "mov	r5, #0\n\t"
32914         "subs	r5, r5, %[c]\n\t"
32915         "ldr	r5, [%[a], r8]\n\t"
32916         "sbcs	r5, r5, r6\n\t"
32917         "sbcs	%[c], %[c], %[c]\n\t"
32918         "str	r5, [%[r], r8]\n\t"
32919         "add	r8, r8, #4\n\t"
32920         "cmp	r8, r9\n\t"
32921 #ifdef __GNUC__
32922         "blt	1b\n\t"
32923 #else
32924         "blt.n	1b\n\t"
32925 #endif /* __GNUC__ */
32926         : [c] "+r" (c)
32927         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
32928         : "memory", "r5", "r6", "r8", "r9"
32929     );
32930 
32931     return c;
32932 }
32933 
32934 #ifdef WOLFSSL_SP_SMALL
32935 /* Add b to a into r. (r = a + b)
32936  *
32937  * r  A single precision integer.
32938  * a  A single precision integer.
32939  * b  A single precision integer.
32940  */
sp_1024_add_32(sp_digit * r,const sp_digit * a,const sp_digit * b)32941 SP_NOINLINE static sp_digit sp_1024_add_32(sp_digit* r, const sp_digit* a,
32942         const sp_digit* b)
32943 {
32944     sp_digit c = 0;
32945 
32946     __asm__ __volatile__ (
32947         "mov	r6, %[a]\n\t"
32948         "mov	r8, #0\n\t"
32949         "add	r6, r6, #128\n\t"
32950         "sub	r8, r8, #1\n\t"
32951         "\n1:\n\t"
32952         "adds	%[c], %[c], r8\n\t"
32953         "ldr	r4, [%[a]]\n\t"
32954         "ldr	r5, [%[b]]\n\t"
32955         "adcs	r4, r4, r5\n\t"
32956         "str	r4, [%[r]]\n\t"
32957         "mov	%[c], #0\n\t"
32958         "adc	%[c], %[c], %[c]\n\t"
32959         "add	%[a], %[a], #4\n\t"
32960         "add	%[b], %[b], #4\n\t"
32961         "add	%[r], %[r], #4\n\t"
32962         "cmp	%[a], r6\n\t"
32963 #ifdef __GNUC__
32964         "bne	1b\n\t"
32965 #else
32966         "bne.n	1b\n\t"
32967 #endif /* __GNUC__ */
32968         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
32969         :
32970         : "memory", "r4", "r5", "r6", "r8"
32971     );
32972 
32973     return c;
32974 }
32975 
32976 #endif /* WOLFSSL_SP_SMALL */
32977 /* Mul a by digit b into r. (r = a * b)
32978  *
32979  * r  A single precision integer.
32980  * a  A single precision integer.
32981  * b  A single precision digit.
32982  */
sp_1024_mul_d_32(sp_digit * r,const sp_digit * a,sp_digit b)32983 SP_NOINLINE static void sp_1024_mul_d_32(sp_digit* r, const sp_digit* a,
32984         sp_digit b)
32985 {
32986     __asm__ __volatile__ (
32987         "add	r9, %[a], #128\n\t"
32988         /* A[0] * B */
32989         "ldr	r6, [%[a]], #4\n\t"
32990         "umull	r5, r3, r6, %[b]\n\t"
32991         "mov	r4, #0\n\t"
32992         "str	r5, [%[r]], #4\n\t"
32993         /* A[0] * B - Done */
32994         "\n1:\n\t"
32995         "mov	r5, #0\n\t"
32996         /* A[] * B */
32997         "ldr	r6, [%[a]], #4\n\t"
32998         "umull	r6, r8, r6, %[b]\n\t"
32999         "adds	r3, r3, r6\n\t"
33000         "adcs 	r4, r4, r8\n\t"
33001         "adc	r5, r5, #0\n\t"
33002         /* A[] * B - Done */
33003         "str	r3, [%[r]], #4\n\t"
33004         "mov	r3, r4\n\t"
33005         "mov	r4, r5\n\t"
33006         "cmp	%[a], r9\n\t"
33007 #ifdef __GNUC__
33008         "blt	1b\n\t"
33009 #else
33010         "blt.n	1b\n\t"
33011 #endif /* __GNUC__ */
33012         "str	r3, [%[r]]\n\t"
33013         : [r] "+r" (r), [a] "+r" (a)
33014         : [b] "r" (b)
33015         : "memory", "r3", "r4", "r5", "r6", "r8", "r9"
33016     );
33017 }
33018 
33019 /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
33020  *
33021  * d1   The high order half of the number to divide.
33022  * d0   The low order half of the number to divide.
33023  * div  The dividend.
33024  * returns the result of the division.
33025  *
33026  * Note that this is an approximate div. It may give an answer 1 larger.
33027  */
div_1024_word_32(sp_digit d1,sp_digit d0,sp_digit div)33028 SP_NOINLINE static sp_digit div_1024_word_32(sp_digit d1, sp_digit d0,
33029         sp_digit div)
33030 {
33031     sp_digit r = 0;
33032 
33033     __asm__ __volatile__ (
33034         "lsr	r6, %[div], #16\n\t"
33035         "add	r6, r6, #1\n\t"
33036         "udiv	r4, %[d1], r6\n\t"
33037         "lsl	r8, r4, #16\n\t"
33038         "umull	r4, r5, %[div], r8\n\t"
33039         "subs	%[d0], %[d0], r4\n\t"
33040         "sbc	%[d1], %[d1], r5\n\t"
33041         "udiv	r5, %[d1], r6\n\t"
33042         "lsl	r4, r5, #16\n\t"
33043         "add	r8, r8, r4\n\t"
33044         "umull	r4, r5, %[div], r4\n\t"
33045         "subs	%[d0], %[d0], r4\n\t"
33046         "sbc	%[d1], %[d1], r5\n\t"
33047         "lsl	r4, %[d1], #16\n\t"
33048         "orr	r4, r4, %[d0], lsr #16\n\t"
33049         "udiv	r4, r4, r6\n\t"
33050         "add	r8, r8, r4\n\t"
33051         "umull	r4, r5, %[div], r4\n\t"
33052         "subs	%[d0], %[d0], r4\n\t"
33053         "sbc	%[d1], %[d1], r5\n\t"
33054         "lsl	r4, %[d1], #16\n\t"
33055         "orr	r4, r4, %[d0], lsr #16\n\t"
33056         "udiv	r4, r4, r6\n\t"
33057         "add	r8, r8, r4\n\t"
33058         "umull	r4, r5, %[div], r4\n\t"
33059         "subs	%[d0], %[d0], r4\n\t"
33060         "sbc	%[d1], %[d1], r5\n\t"
33061         "udiv	r4, %[d0], %[div]\n\t"
33062         "add	r8, r8, r4\n\t"
33063         "mov	%[r], r8\n\t"
33064         : [r] "+r" (r)
33065         : [d1] "r" (d1), [d0] "r" (d0), [div] "r" (div)
33066         : "r4", "r5", "r6", "r8"
33067     );
33068     return r;
33069 }
33070 
33071 /* AND m into each word of a and store in r.
33072  *
33073  * r  A single precision integer.
33074  * a  A single precision integer.
33075  * m  Mask to AND against each digit.
33076  */
sp_1024_mask_32(sp_digit * r,const sp_digit * a,sp_digit m)33077 static void sp_1024_mask_32(sp_digit* r, const sp_digit* a, sp_digit m)
33078 {
33079 #ifdef WOLFSSL_SP_SMALL
33080     int i;
33081 
33082     for (i=0; i<32; i++) {
33083         r[i] = a[i] & m;
33084     }
33085 #else
33086     int i;
33087 
33088     for (i = 0; i < 32; i += 8) {
33089         r[i+0] = a[i+0] & m;
33090         r[i+1] = a[i+1] & m;
33091         r[i+2] = a[i+2] & m;
33092         r[i+3] = a[i+3] & m;
33093         r[i+4] = a[i+4] & m;
33094         r[i+5] = a[i+5] & m;
33095         r[i+6] = a[i+6] & m;
33096         r[i+7] = a[i+7] & m;
33097     }
33098 #endif
33099 }
33100 
33101 /* Compare a with b in constant time.
33102  *
33103  * a  A single precision integer.
33104  * b  A single precision integer.
33105  * return -ve, 0 or +ve if a is less than, equal to or greater than b
33106  * respectively.
33107  */
sp_1024_cmp_32(const sp_digit * a,const sp_digit * b)33108 SP_NOINLINE static sp_int32 sp_1024_cmp_32(const sp_digit* a, const sp_digit* b)
33109 {
33110     sp_digit r = 0;
33111 
33112 
33113     __asm__ __volatile__ (
33114         "mov	r3, #0\n\t"
33115         "mvn	r3, r3\n\t"
33116         "mov	r6, #124\n\t"
33117         "\n1:\n\t"
33118         "ldr	r8, [%[a], r6]\n\t"
33119         "ldr	r5, [%[b], r6]\n\t"
33120         "and	r8, r8, r3\n\t"
33121         "and	r5, r5, r3\n\t"
33122         "mov	r4, r8\n\t"
33123         "subs	r8, r8, r5\n\t"
33124         "sbc	r8, r8, r8\n\t"
33125         "add	%[r], %[r], r8\n\t"
33126         "mvn	r8, r8\n\t"
33127         "and	r3, r3, r8\n\t"
33128         "subs	r5, r5, r4\n\t"
33129         "sbc	r8, r8, r8\n\t"
33130         "sub	%[r], %[r], r8\n\t"
33131         "mvn	r8, r8\n\t"
33132         "and	r3, r3, r8\n\t"
33133         "sub	r6, r6, #4\n\t"
33134         "cmp	r6, #0\n\t"
33135 #ifdef __GNUC__
33136         "bge	1b\n\t"
33137 #else
33138         "bge.n	1b\n\t"
33139 #endif /* __GNUC__ */
33140         : [r] "+r" (r)
33141         : [a] "r" (a), [b] "r" (b)
33142         : "r3", "r4", "r5", "r6", "r8"
33143     );
33144 
33145     return r;
33146 }
33147 
33148 /* Divide d in a and put remainder into r (m*d + r = a)
33149  * m is not calculated as it is not needed at this time.
33150  *
33151  * a  Number to be divided.
33152  * d  Number to divide with.
33153  * m  Multiplier result.
33154  * r  Remainder from the division.
33155  * returns MP_OKAY indicating success.
33156  */
sp_1024_div_32(const sp_digit * a,const sp_digit * d,sp_digit * m,sp_digit * r)33157 static WC_INLINE int sp_1024_div_32(const sp_digit* a, const sp_digit* d, sp_digit* m,
33158         sp_digit* r)
33159 {
33160     sp_digit t1[64], t2[33];
33161     sp_digit div, r1;
33162     int i;
33163 
33164     (void)m;
33165 
33166     div = d[31];
33167     XMEMCPY(t1, a, sizeof(*t1) * 2 * 32);
33168     for (i=31; i>=0; i--) {
33169         sp_digit hi = t1[32 + i] - (t1[32 + i] == div);
33170         r1 = div_1024_word_32(hi, t1[32 + i - 1], div);
33171 
33172         sp_1024_mul_d_32(t2, d, r1);
33173         t1[32 + i] += sp_1024_sub_in_place_32(&t1[i], t2);
33174         t1[32 + i] -= t2[32];
33175         sp_1024_mask_32(t2, d, t1[32 + i]);
33176         t1[32 + i] += sp_1024_add_32(&t1[i], &t1[i], t2);
33177         sp_1024_mask_32(t2, d, t1[32 + i]);
33178         t1[32 + i] += sp_1024_add_32(&t1[i], &t1[i], t2);
33179     }
33180 
33181     r1 = sp_1024_cmp_32(t1, d) >= 0;
33182     sp_1024_cond_sub_32(r, t1, d, (sp_digit)0 - r1);
33183 
33184     return MP_OKAY;
33185 }
33186 
33187 /* Reduce a modulo m into r. (r = a mod m)
33188  *
33189  * r  A single precision number that is the reduced result.
33190  * a  A single precision number that is to be reduced.
33191  * m  A single precision number that is the modulus to reduce with.
33192  * returns MP_OKAY indicating success.
33193  */
sp_1024_mod_32(sp_digit * r,const sp_digit * a,const sp_digit * m)33194 static WC_INLINE int sp_1024_mod_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
33195 {
33196     return sp_1024_div_32(a, m, NULL, r);
33197 }
33198 
33199 /* Multiply a number by Montgomery normalizer mod modulus (prime).
33200  *
33201  * r  The resulting Montgomery form number.
33202  * a  The number to convert.
33203  * m  The modulus (prime).
33204  * returns MEMORY_E when memory allocation fails and MP_OKAY otherwise.
33205  */
sp_1024_mod_mul_norm_32(sp_digit * r,const sp_digit * a,const sp_digit * m)33206 static int sp_1024_mod_mul_norm_32(sp_digit* r, const sp_digit* a,
33207         const sp_digit* m)
33208 {
33209     sp_1024_mul_32(r, a, p1024_norm_mod);
33210     return sp_1024_mod_32(r, r, m);
33211 }
33212 
33213 
33214 #ifdef WOLFCRYPT_HAVE_SAKKE
33215 /* Create a new point.
33216  *
33217  * heap  [in]   Buffer to allocate dynamic memory from.
33218  * sp    [in]   Data for point - only if not allocating.
33219  * p     [out]  New point.
33220  * returns MEMORY_E when dynamic memory allocation fails and 0 otherwise.
33221  */
sp_1024_point_new_ex_32(void * heap,sp_point_1024 * sp,sp_point_1024 ** p)33222 static int sp_1024_point_new_ex_32(void* heap, sp_point_1024* sp,
33223     sp_point_1024** p)
33224 {
33225     int ret = MP_OKAY;
33226     (void)heap;
33227 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
33228     (void)sp;
33229     *p = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap, DYNAMIC_TYPE_ECC);
33230 #else
33231     *p = sp;
33232 #endif
33233     if (*p == NULL) {
33234         ret = MEMORY_E;
33235     }
33236     return ret;
33237 }
33238 
33239 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
33240 /* Allocate memory for point and return error. */
33241 #define sp_1024_point_new_32(heap, sp, p) sp_1024_point_new_ex_32((heap), NULL, &(p))
33242 #else
33243 /* Set pointer to data and return no error. */
33244 #define sp_1024_point_new_32(heap, sp, p) sp_1024_point_new_ex_32((heap), &(sp), &(p))
33245 #endif
33246 #endif /* WOLFCRYPT_HAVE_SAKKE */
33247 #ifdef WOLFCRYPT_HAVE_SAKKE
33248 /* Free the point.
33249  *
33250  * p      [in,out]  Point to free.
33251  * clear  [in]      Indicates whether to zeroize point.
33252  * heap   [in]      Buffer from which dynamic memory was allocate from.
33253  */
sp_1024_point_free_32(sp_point_1024 * p,int clear,void * heap)33254 static void sp_1024_point_free_32(sp_point_1024* p, int clear, void* heap)
33255 {
33256 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && !defined(WOLFSSL_SP_NO_MALLOC)
33257 /* If valid pointer then clear point data if requested and free data. */
33258     if (p != NULL) {
33259         if (clear != 0) {
33260             XMEMSET(p, 0, sizeof(*p));
33261         }
33262         XFREE(p, heap, DYNAMIC_TYPE_ECC);
33263     }
33264 #else
33265 /* Clear point data if requested. */
33266     if ((p != NULL) && (clear != 0)) {
33267         XMEMSET(p, 0, sizeof(*p));
33268     }
33269 #endif
33270     (void)heap;
33271 }
33272 #endif /* WOLFCRYPT_HAVE_SAKKE */
33273 
33274 /* Convert an mp_int to an array of sp_digit.
33275  *
33276  * r  A single precision integer.
33277  * size  Maximum number of bytes to convert
33278  * a  A multi-precision integer.
33279  */
sp_1024_from_mp(sp_digit * r,int size,const mp_int * a)33280 static void sp_1024_from_mp(sp_digit* r, int size, const mp_int* a)
33281 {
33282 #if DIGIT_BIT == 32
33283     int j;
33284 
33285     XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used);
33286 
33287     for (j = a->used; j < size; j++) {
33288         r[j] = 0;
33289     }
33290 #elif DIGIT_BIT > 32
33291     int i;
33292     int j = 0;
33293     word32 s = 0;
33294 
33295     r[0] = 0;
33296     for (i = 0; i < a->used && j < size; i++) {
33297         r[j] |= ((sp_digit)a->dp[i] << s);
33298         r[j] &= 0xffffffff;
33299         s = 32U - s;
33300         if (j + 1 >= size) {
33301             break;
33302         }
33303         /* lint allow cast of mismatch word32 and mp_digit */
33304         r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
33305         while ((s + 32U) <= (word32)DIGIT_BIT) {
33306             s += 32U;
33307             r[j] &= 0xffffffff;
33308             if (j + 1 >= size) {
33309                 break;
33310             }
33311             if (s < (word32)DIGIT_BIT) {
33312                 /* lint allow cast of mismatch word32 and mp_digit */
33313                 r[++j] = (sp_digit)(a->dp[i] >> s); /*lint !e9033*/
33314             }
33315             else {
33316                 r[++j] = (sp_digit)0;
33317             }
33318         }
33319         s = (word32)DIGIT_BIT - s;
33320     }
33321 
33322     for (j++; j < size; j++) {
33323         r[j] = 0;
33324     }
33325 #else
33326     int i;
33327     int j = 0;
33328     int s = 0;
33329 
33330     r[0] = 0;
33331     for (i = 0; i < a->used && j < size; i++) {
33332         r[j] |= ((sp_digit)a->dp[i]) << s;
33333         if (s + DIGIT_BIT >= 32) {
33334             r[j] &= 0xffffffff;
33335             if (j + 1 >= size) {
33336                 break;
33337             }
33338             s = 32 - s;
33339             if (s == DIGIT_BIT) {
33340                 r[++j] = 0;
33341                 s = 0;
33342             }
33343             else {
33344                 r[++j] = a->dp[i] >> s;
33345                 s = DIGIT_BIT - s;
33346             }
33347         }
33348         else {
33349             s += DIGIT_BIT;
33350         }
33351     }
33352 
33353     for (j++; j < size; j++) {
33354         r[j] = 0;
33355     }
33356 #endif
33357 }
33358 
33359 /* Convert a point of type ecc_point to type sp_point_1024.
33360  *
33361  * p   Point of type sp_point_1024 (result).
33362  * pm  Point of type ecc_point.
33363  */
sp_1024_point_from_ecc_point_32(sp_point_1024 * p,const ecc_point * pm)33364 static void sp_1024_point_from_ecc_point_32(sp_point_1024* p,
33365         const ecc_point* pm)
33366 {
33367     XMEMSET(p->x, 0, sizeof(p->x));
33368     XMEMSET(p->y, 0, sizeof(p->y));
33369     XMEMSET(p->z, 0, sizeof(p->z));
33370     sp_1024_from_mp(p->x, 32, pm->x);
33371     sp_1024_from_mp(p->y, 32, pm->y);
33372     sp_1024_from_mp(p->z, 32, pm->z);
33373     p->infinity = 0;
33374 }
33375 
33376 /* Convert an array of sp_digit to an mp_int.
33377  *
33378  * a  A single precision integer.
33379  * r  A multi-precision integer.
33380  */
sp_1024_to_mp(const sp_digit * a,mp_int * r)33381 static int sp_1024_to_mp(const sp_digit* a, mp_int* r)
33382 {
33383     int err;
33384 
33385     err = mp_grow(r, (1024 + DIGIT_BIT - 1) / DIGIT_BIT);
33386     if (err == MP_OKAY) { /*lint !e774 case where err is always MP_OKAY*/
33387 #if DIGIT_BIT == 32
33388         XMEMCPY(r->dp, a, sizeof(sp_digit) * 32);
33389         r->used = 32;
33390         mp_clamp(r);
33391 #elif DIGIT_BIT < 32
33392         int i;
33393         int j = 0;
33394         int s = 0;
33395 
33396         r->dp[0] = 0;
33397         for (i = 0; i < 32; i++) {
33398             r->dp[j] |= (mp_digit)(a[i] << s);
33399             r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
33400             s = DIGIT_BIT - s;
33401             r->dp[++j] = (mp_digit)(a[i] >> s);
33402             while (s + DIGIT_BIT <= 32) {
33403                 s += DIGIT_BIT;
33404                 r->dp[j++] &= ((sp_digit)1 << DIGIT_BIT) - 1;
33405                 if (s == SP_WORD_SIZE) {
33406                     r->dp[j] = 0;
33407                 }
33408                 else {
33409                     r->dp[j] = (mp_digit)(a[i] >> s);
33410                 }
33411             }
33412             s = 32 - s;
33413         }
33414         r->used = (1024 + DIGIT_BIT - 1) / DIGIT_BIT;
33415         mp_clamp(r);
33416 #else
33417         int i;
33418         int j = 0;
33419         int s = 0;
33420 
33421         r->dp[0] = 0;
33422         for (i = 0; i < 32; i++) {
33423             r->dp[j] |= ((mp_digit)a[i]) << s;
33424             if (s + 32 >= DIGIT_BIT) {
33425     #if DIGIT_BIT != 32 && DIGIT_BIT != 64
33426                 r->dp[j] &= ((sp_digit)1 << DIGIT_BIT) - 1;
33427     #endif
33428                 s = DIGIT_BIT - s;
33429                 r->dp[++j] = a[i] >> s;
33430                 s = 32 - s;
33431             }
33432             else {
33433                 s += 32;
33434             }
33435         }
33436         r->used = (1024 + DIGIT_BIT - 1) / DIGIT_BIT;
33437         mp_clamp(r);
33438 #endif
33439     }
33440 
33441     return err;
33442 }
33443 
33444 /* Convert a point of type sp_point_1024 to type ecc_point.
33445  *
33446  * p   Point of type sp_point_1024.
33447  * pm  Point of type ecc_point (result).
33448  * returns MEMORY_E when allocation of memory in ecc_point fails otherwise
33449  * MP_OKAY.
33450  */
sp_1024_point_to_ecc_point_32(const sp_point_1024 * p,ecc_point * pm)33451 static int sp_1024_point_to_ecc_point_32(const sp_point_1024* p, ecc_point* pm)
33452 {
33453     int err;
33454 
33455     err = sp_1024_to_mp(p->x, pm->x);
33456     if (err == MP_OKAY) {
33457         err = sp_1024_to_mp(p->y, pm->y);
33458     }
33459     if (err == MP_OKAY) {
33460         err = sp_1024_to_mp(p->z, pm->z);
33461     }
33462 
33463     return err;
33464 }
33465 
33466 /* Reduce the number back to 1024 bits using Montgomery reduction.
33467  *
33468  * a   A single precision number to reduce in place.
33469  * m   The single precision number representing the modulus.
33470  * mp  The digit representing the negative inverse of m mod 2^n.
33471  */
sp_1024_mont_reduce_32(sp_digit * a,const sp_digit * m,sp_digit mp)33472 SP_NOINLINE static void sp_1024_mont_reduce_32(sp_digit* a, const sp_digit* m,
33473         sp_digit mp)
33474 {
33475     sp_digit ca = 0;
33476 
33477     __asm__ __volatile__ (
33478         "mov	r9, %[mp]\n\t"
33479         "mov	r12, %[m]\n\t"
33480         "mov	r10, %[a]\n\t"
33481         "mov	r4, #0\n\t"
33482         "add	r11, r10, #128\n\t"
33483         "\n1:\n\t"
33484         /* mu = a[i] * mp */
33485         "mov	%[mp], r9\n\t"
33486         "ldr	%[a], [r10]\n\t"
33487         "mul	%[mp], %[mp], %[a]\n\t"
33488         "mov	%[m], r12\n\t"
33489         "add	r14, r10, #120\n\t"
33490         "\n2:\n\t"
33491         /* a[i+j] += m[j] * mu */
33492         "ldr	%[a], [r10]\n\t"
33493         "mov	r5, #0\n\t"
33494         /* Multiply m[j] and mu - Start */
33495         "ldr	r8, [%[m]], #4\n\t"
33496         "umull	r6, r8, %[mp], r8\n\t"
33497         "adds	%[a], %[a], r6\n\t"
33498         "adc	r5, r5, r8\n\t"
33499         /* Multiply m[j] and mu - Done */
33500         "adds	r4, r4, %[a]\n\t"
33501         "adc	r5, r5, #0\n\t"
33502         "str	r4, [r10], #4\n\t"
33503         /* a[i+j+1] += m[j+1] * mu */
33504         "ldr	%[a], [r10]\n\t"
33505         "mov	r4, #0\n\t"
33506         /* Multiply m[j] and mu - Start */
33507         "ldr	r8, [%[m]], #4\n\t"
33508         "umull	r6, r8, %[mp], r8\n\t"
33509         "adds	%[a], %[a], r6\n\t"
33510         "adc	r4, r4, r8\n\t"
33511         /* Multiply m[j] and mu - Done */
33512         "adds	r5, r5, %[a]\n\t"
33513         "adc	r4, r4, #0\n\t"
33514         "str	r5, [r10], #4\n\t"
33515         "cmp	r10, r14\n\t"
33516 #ifdef __GNUC__
33517         "blt	2b\n\t"
33518 #else
33519         "blt.n	2b\n\t"
33520 #endif /* __GNUC__ */
33521         /* a[i+30] += m[30] * mu */
33522         "ldr	%[a], [r10]\n\t"
33523         "mov	r5, #0\n\t"
33524         /* Multiply m[j] and mu - Start */
33525         "ldr	r8, [%[m]], #4\n\t"
33526         "umull	r6, r8, %[mp], r8\n\t"
33527         "adds	%[a], %[a], r6\n\t"
33528         "adc	r5, r5, r8\n\t"
33529         /* Multiply m[j] and mu - Done */
33530         "adds	r4, r4, %[a]\n\t"
33531         "adc	r5, r5, #0\n\t"
33532         "str	r4, [r10], #4\n\t"
33533         /* a[i+31] += m[31] * mu */
33534         "mov	r4, %[ca]\n\t"
33535         "mov	%[ca], #0\n\t"
33536         /* Multiply m[31] and mu - Start */
33537         "ldr	r8, [%[m]]\n\t"
33538         "umull	r6, r8, %[mp], r8\n\t"
33539         "adds	r5, r5, r6\n\t"
33540         "adcs 	r4, r4, r8\n\t"
33541         "adc	%[ca], %[ca], #0\n\t"
33542         /* Multiply m[31] and mu - Done */
33543         "ldr	r6, [r10]\n\t"
33544         "ldr	r8, [r10, #4]\n\t"
33545         "adds	r6, r6, r5\n\t"
33546         "adcs	r8, r8, r4\n\t"
33547         "adc	%[ca], %[ca], #0\n\t"
33548         "str	r6, [r10]\n\t"
33549         "str	r8, [r10, #4]\n\t"
33550         /* Next word in a */
33551         "sub	r10, r10, #120\n\t"
33552         "cmp	r10, r11\n\t"
33553 #ifdef __GNUC__
33554         "blt	1b\n\t"
33555 #else
33556         "blt.n	1b\n\t"
33557 #endif /* __GNUC__ */
33558         "ldr       r6, [%[m]]\n\t"
33559         "subs      r6, r6, r8\n\t"
33560         "neg       %[ca], %[ca]\n\t"
33561         "sbc       r6, r6, r6\n\t"
33562         "orr       %[ca], %[ca], r6\n\t"
33563         "mov	%[a], r10\n\t"
33564         "mov	%[m], r12\n\t"
33565         : [ca] "+r" (ca), [a] "+r" (a)
33566         : [m] "r" (m), [mp] "r" (mp)
33567         : "memory", "r4", "r5", "r6", "r8", "r9", "r10", "r11", "r12", "r14"
33568     );
33569 
33570     sp_1024_cond_sub_32(a - 32, a, m, ca);
33571 }
33572 
33573 /* Multiply two Montgomery form numbers mod the modulus (prime).
33574  * (r = a * b mod m)
33575  *
33576  * r   Result of multiplication.
33577  * a   First number to multiply in Montgomery form.
33578  * b   Second number to multiply in Montgomery form.
33579  * m   Modulus (prime).
33580  * mp  Montgomery mulitplier.
33581  */
sp_1024_mont_mul_32(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m,sp_digit mp)33582 static void sp_1024_mont_mul_32(sp_digit* r, const sp_digit* a,
33583         const sp_digit* b, const sp_digit* m, sp_digit mp)
33584 {
33585     sp_1024_mul_32(r, a, b);
33586     sp_1024_mont_reduce_32(r, m, mp);
33587 }
33588 
33589 /* Square the Montgomery form number. (r = a * a mod m)
33590  *
33591  * r   Result of squaring.
33592  * a   Number to square in Montgomery form.
33593  * m   Modulus (prime).
33594  * mp  Montgomery mulitplier.
33595  */
sp_1024_mont_sqr_32(sp_digit * r,const sp_digit * a,const sp_digit * m,sp_digit mp)33596 static void sp_1024_mont_sqr_32(sp_digit* r, const sp_digit* a,
33597         const sp_digit* m, sp_digit mp)
33598 {
33599     sp_1024_sqr_32(r, a);
33600     sp_1024_mont_reduce_32(r, m, mp);
33601 }
33602 
33603 /* Mod-2 for the P1024 curve. */
33604 static const uint8_t p1024_mod_minus_2[] = {
33605      6,0x06,  7,0x0f,  7,0x0b,  6,0x0c,  7,0x1e,  9,0x09,  7,0x0c,  7,0x1f,
33606      6,0x16,  6,0x06,  7,0x0e,  8,0x10,  6,0x03,  8,0x11,  6,0x0d,  7,0x14,
33607      9,0x12,  6,0x0f,  7,0x04,  9,0x0d,  6,0x00,  7,0x13,  6,0x01,  6,0x07,
33608      8,0x0d,  8,0x00,  6,0x06,  9,0x17,  6,0x14,  6,0x15,  6,0x11,  6,0x0b,
33609      9,0x0c,  6,0x1e, 13,0x14,  7,0x0e,  6,0x1d, 12,0x0a,  6,0x0b,  8,0x07,
33610      6,0x18,  6,0x0f,  6,0x10,  8,0x1c,  7,0x16,  7,0x02,  6,0x01,  6,0x13,
33611     10,0x15,  7,0x06,  8,0x14,  6,0x0c,  6,0x19,  7,0x10,  6,0x19,  6,0x19,
33612      9,0x16,  7,0x19,  6,0x1f,  6,0x17,  6,0x12,  8,0x02,  6,0x01,  6,0x04,
33613      6,0x15,  7,0x16,  6,0x04,  6,0x1f,  6,0x09,  7,0x06,  7,0x13,  7,0x09,
33614      6,0x0d, 10,0x18,  6,0x06,  6,0x11,  6,0x04,  6,0x01,  6,0x13,  8,0x06,
33615      6,0x0d,  8,0x13,  7,0x08,  6,0x08,  6,0x05,  7,0x0c,  7,0x0e,  7,0x15,
33616      6,0x05,  7,0x14, 10,0x19,  6,0x10,  6,0x16,  6,0x15,  7,0x1f,  6,0x14,
33617      6,0x0a, 10,0x11,  6,0x01,  7,0x05,  7,0x08,  8,0x0a,  7,0x1e,  7,0x1c,
33618      6,0x1c,  7,0x09, 10,0x18,  7,0x1c, 10,0x06,  6,0x0a,  6,0x07,  6,0x19,
33619      7,0x06,  6,0x0d,  7,0x0f,  7,0x0b,  7,0x05,  6,0x11,  6,0x1c,  7,0x1f,
33620      6,0x1e,  7,0x18,  6,0x1e,  6,0x00,  6,0x03,  6,0x02,  7,0x10,  6,0x0b,
33621      6,0x1b,  7,0x10,  6,0x00,  8,0x11,  7,0x1b,  6,0x18,  6,0x01,  7,0x0c,
33622      7,0x1d,  7,0x13,  6,0x08,  7,0x1b,  8,0x13,  7,0x16, 13,0x1d,  7,0x1f,
33623      6,0x0a,  6,0x01,  7,0x1f,  6,0x14,  1,0x01
33624 };
33625 
33626 /* Invert the number, in Montgomery form, modulo the modulus (prime) of the
33627  * P1024 curve. (r = 1 / a mod m)
33628  *
33629  * r   Inverse result.
33630  * a   Number to invert.
33631  * td  Temporary data.
33632  */
sp_1024_mont_inv_32(sp_digit * r,const sp_digit * a,sp_digit * td)33633 static void sp_1024_mont_inv_32(sp_digit* r, const sp_digit* a,
33634         sp_digit* td)
33635 {
33636     sp_digit* t = td;
33637     int i;
33638     int j;
33639     sp_digit table[32][2 * 32];
33640 
33641     XMEMCPY(table[0], a, sizeof(sp_digit) * 32);
33642     for (i = 1; i < 6; i++) {
33643         sp_1024_mont_sqr_32(table[0], table[0], p1024_mod, p1024_mp_mod);
33644     }
33645     for (i = 1; i < 32; i++) {
33646         sp_1024_mont_mul_32(table[i], table[i-1], a, p1024_mod, p1024_mp_mod);
33647     }
33648 
33649     XMEMCPY(t, table[p1024_mod_minus_2[1]], sizeof(sp_digit) * 32);
33650     for (i = 2; i < (int)sizeof(p1024_mod_minus_2) - 2; i += 2) {
33651         for (j = 0; j < p1024_mod_minus_2[i]; j++) {
33652             sp_1024_mont_sqr_32(t, t, p1024_mod, p1024_mp_mod);
33653         }
33654         sp_1024_mont_mul_32(t, t, table[p1024_mod_minus_2[i+1]], p1024_mod,
33655             p1024_mp_mod);
33656     }
33657     sp_1024_mont_sqr_32(t, t, p1024_mod, p1024_mp_mod);
33658     sp_1024_mont_mul_32(r, t, a, p1024_mod, p1024_mp_mod);
33659 }
33660 
33661 /* Normalize the values in each word to 32.
33662  *
33663  * a  Array of sp_digit to normalize.
33664  */
33665 #define sp_1024_norm_32(a)
33666 
33667 /* Map the Montgomery form projective coordinate point to an affine point.
33668  *
33669  * r  Resulting affine coordinate point.
33670  * p  Montgomery form projective coordinate point.
33671  * t  Temporary ordinate data.
33672  */
sp_1024_map_32(sp_point_1024 * r,const sp_point_1024 * p,sp_digit * t)33673 static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p,
33674     sp_digit* t)
33675 {
33676     sp_digit* t1 = t;
33677     sp_digit* t2 = t + 2*32;
33678     sp_int32 n;
33679 
33680     sp_1024_mont_inv_32(t1, p->z, t + 2*32);
33681 
33682     sp_1024_mont_sqr_32(t2, t1, p1024_mod, p1024_mp_mod);
33683     sp_1024_mont_mul_32(t1, t2, t1, p1024_mod, p1024_mp_mod);
33684 
33685     /* x /= z^2 */
33686     sp_1024_mont_mul_32(r->x, p->x, t2, p1024_mod, p1024_mp_mod);
33687     XMEMSET(r->x + 32, 0, sizeof(r->x) / 2U);
33688     sp_1024_mont_reduce_32(r->x, p1024_mod, p1024_mp_mod);
33689     /* Reduce x to less than modulus */
33690     n = sp_1024_cmp_32(r->x, p1024_mod);
33691     sp_1024_cond_sub_32(r->x, r->x, p1024_mod, 0 - ((n >= 0) ?
33692                 (sp_digit)1 : (sp_digit)0));
33693     sp_1024_norm_32(r->x);
33694 
33695     /* y /= z^3 */
33696     sp_1024_mont_mul_32(r->y, p->y, t1, p1024_mod, p1024_mp_mod);
33697     XMEMSET(r->y + 32, 0, sizeof(r->y) / 2U);
33698     sp_1024_mont_reduce_32(r->y, p1024_mod, p1024_mp_mod);
33699     /* Reduce y to less than modulus */
33700     n = sp_1024_cmp_32(r->y, p1024_mod);
33701     sp_1024_cond_sub_32(r->y, r->y, p1024_mod, 0 - ((n >= 0) ?
33702                 (sp_digit)1 : (sp_digit)0));
33703     sp_1024_norm_32(r->y);
33704 
33705     XMEMSET(r->z, 0, sizeof(r->z));
33706     r->z[0] = 1;
33707 
33708 }
33709 
33710 /* Add two Montgomery form numbers (r = a + b % m).
33711  *
33712  * r   Result of addition.
33713  * a   First number to add in Montgomery form.
33714  * b   Second number to add in Montgomery form.
33715  * m   Modulus (prime).
33716  */
sp_1024_mont_add_32(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m)33717 SP_NOINLINE static void sp_1024_mont_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
33718         const sp_digit* m)
33719 {
33720     __asm__ __volatile__ (
33721         "mov   r12, #0\n\t"
33722         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
33723         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
33724         "adds      r4, r4, r8\n\t"
33725         "adcs      r5, r5, r9\n\t"
33726         "adcs      r6, r6, r10\n\t"
33727         "adcs      r7, r7, r14\n\t"
33728         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33729         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
33730         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
33731         "adcs      r4, r4, r8\n\t"
33732         "adcs      r5, r5, r9\n\t"
33733         "adcs      r6, r6, r10\n\t"
33734         "adcs      r7, r7, r14\n\t"
33735         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33736         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
33737         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
33738         "adcs      r4, r4, r8\n\t"
33739         "adcs      r5, r5, r9\n\t"
33740         "adcs      r6, r6, r10\n\t"
33741         "adcs      r7, r7, r14\n\t"
33742         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33743         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
33744         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
33745         "adcs      r4, r4, r8\n\t"
33746         "adcs      r5, r5, r9\n\t"
33747         "adcs      r6, r6, r10\n\t"
33748         "adcs      r7, r7, r14\n\t"
33749         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33750         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
33751         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
33752         "adcs      r4, r4, r8\n\t"
33753         "adcs      r5, r5, r9\n\t"
33754         "adcs      r6, r6, r10\n\t"
33755         "adcs      r7, r7, r14\n\t"
33756         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33757         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
33758         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
33759         "adcs      r4, r4, r8\n\t"
33760         "adcs      r5, r5, r9\n\t"
33761         "adcs      r6, r6, r10\n\t"
33762         "adcs      r7, r7, r14\n\t"
33763         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33764         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
33765         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
33766         "adcs      r4, r4, r8\n\t"
33767         "adcs      r5, r5, r9\n\t"
33768         "adcs      r6, r6, r10\n\t"
33769         "adcs      r7, r7, r14\n\t"
33770         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33771         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
33772         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
33773         "adcs      r4, r4, r8\n\t"
33774         "adcs      r5, r5, r9\n\t"
33775         "adcs      r6, r6, r10\n\t"
33776         "adcs      r7, r7, r14\n\t"
33777         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33778         "ldr   r14, [%[m], #124]\n\t"
33779         "adc   r12, r12, #0\n\t"
33780         "subs  r14, r14, r7\n\t"
33781         "neg   r12, r12\n\t"
33782         "sbc   r14, r14, r14\n\t"
33783         "sub   %[r], %[r], #128\n\t"
33784         "orr   r12, r14\n\t"
33785         "ldm %[r], {r4, r5, r6, r7}\n\t"
33786         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33787         "and       r8, r8, r12\n\t"
33788         "and       r9, r9, r12\n\t"
33789         "and       r10, r10, r12\n\t"
33790         "and       r14, r14, r12\n\t"
33791         "subs      r4, r4, r8\n\t"
33792         "sbcs      r5, r5, r9\n\t"
33793         "sbcs      r6, r6, r10\n\t"
33794         "sbcs      r7, r7, r14\n\t"
33795         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33796         "ldm %[r], {r4, r5, r6, r7}\n\t"
33797         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33798         "and       r8, r8, r12\n\t"
33799         "and       r9, r9, r12\n\t"
33800         "and       r10, r10, r12\n\t"
33801         "and       r14, r14, r12\n\t"
33802         "sbcs      r4, r4, r8\n\t"
33803         "sbcs      r5, r5, r9\n\t"
33804         "sbcs      r6, r6, r10\n\t"
33805         "sbcs      r7, r7, r14\n\t"
33806         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33807         "ldm %[r], {r4, r5, r6, r7}\n\t"
33808         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33809         "and       r8, r8, r12\n\t"
33810         "and       r9, r9, r12\n\t"
33811         "and       r10, r10, r12\n\t"
33812         "and       r14, r14, r12\n\t"
33813         "sbcs      r4, r4, r8\n\t"
33814         "sbcs      r5, r5, r9\n\t"
33815         "sbcs      r6, r6, r10\n\t"
33816         "sbcs      r7, r7, r14\n\t"
33817         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33818         "ldm %[r], {r4, r5, r6, r7}\n\t"
33819         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33820         "and       r8, r8, r12\n\t"
33821         "and       r9, r9, r12\n\t"
33822         "and       r10, r10, r12\n\t"
33823         "and       r14, r14, r12\n\t"
33824         "sbcs      r4, r4, r8\n\t"
33825         "sbcs      r5, r5, r9\n\t"
33826         "sbcs      r6, r6, r10\n\t"
33827         "sbcs      r7, r7, r14\n\t"
33828         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33829         "ldm %[r], {r4, r5, r6, r7}\n\t"
33830         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33831         "and       r8, r8, r12\n\t"
33832         "and       r9, r9, r12\n\t"
33833         "and       r10, r10, r12\n\t"
33834         "and       r14, r14, r12\n\t"
33835         "sbcs      r4, r4, r8\n\t"
33836         "sbcs      r5, r5, r9\n\t"
33837         "sbcs      r6, r6, r10\n\t"
33838         "sbcs      r7, r7, r14\n\t"
33839         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33840         "ldm %[r], {r4, r5, r6, r7}\n\t"
33841         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33842         "and       r8, r8, r12\n\t"
33843         "and       r9, r9, r12\n\t"
33844         "and       r10, r10, r12\n\t"
33845         "and       r14, r14, r12\n\t"
33846         "sbcs      r4, r4, r8\n\t"
33847         "sbcs      r5, r5, r9\n\t"
33848         "sbcs      r6, r6, r10\n\t"
33849         "sbcs      r7, r7, r14\n\t"
33850         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33851         "ldm %[r], {r4, r5, r6, r7}\n\t"
33852         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33853         "and       r8, r8, r12\n\t"
33854         "and       r9, r9, r12\n\t"
33855         "and       r10, r10, r12\n\t"
33856         "and       r14, r14, r12\n\t"
33857         "sbcs      r4, r4, r8\n\t"
33858         "sbcs      r5, r5, r9\n\t"
33859         "sbcs      r6, r6, r10\n\t"
33860         "sbcs      r7, r7, r14\n\t"
33861         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33862         "ldm %[r], {r4, r5, r6, r7}\n\t"
33863         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33864         "and       r8, r8, r12\n\t"
33865         "and       r9, r9, r12\n\t"
33866         "and       r10, r10, r12\n\t"
33867         "and       r14, r14, r12\n\t"
33868         "sbcs      r4, r4, r8\n\t"
33869         "sbcs      r5, r5, r9\n\t"
33870         "sbcs      r6, r6, r10\n\t"
33871         "sbc       r7, r7, r14\n\t"
33872         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33873         "sub   %[r], %[r], #128\n\t"
33874         : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m)
33875         :
33876         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
33877     );
33878 }
33879 
33880 /* Double a Montgomery form number (r = a + a % m).
33881  *
33882  * r   Result of doubling.
33883  * a   Number to double in Montgomery form.
33884  * m   Modulus (prime).
33885  */
sp_1024_mont_dbl_32(sp_digit * r,const sp_digit * a,const sp_digit * m)33886 SP_NOINLINE static void sp_1024_mont_dbl_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
33887 {
33888     __asm__ __volatile__ (
33889         "mov   r12, #0\n\t"
33890         "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
33891         "adds      r4, r4, r4\n\t"
33892         "adcs      r5, r5, r5\n\t"
33893         "adcs      r6, r6, r6\n\t"
33894         "adcs      r7, r7, r7\n\t"
33895         "adcs      r8, r8, r8\n\t"
33896         "adcs      r9, r9, r9\n\t"
33897         "adcs      r10, r10, r10\n\t"
33898         "adcs      r14, r14, r14\n\t"
33899         "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
33900         "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
33901         "adcs      r4, r4, r4\n\t"
33902         "adcs      r5, r5, r5\n\t"
33903         "adcs      r6, r6, r6\n\t"
33904         "adcs      r7, r7, r7\n\t"
33905         "adcs      r8, r8, r8\n\t"
33906         "adcs      r9, r9, r9\n\t"
33907         "adcs      r10, r10, r10\n\t"
33908         "adcs      r14, r14, r14\n\t"
33909         "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
33910         "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
33911         "adcs      r4, r4, r4\n\t"
33912         "adcs      r5, r5, r5\n\t"
33913         "adcs      r6, r6, r6\n\t"
33914         "adcs      r7, r7, r7\n\t"
33915         "adcs      r8, r8, r8\n\t"
33916         "adcs      r9, r9, r9\n\t"
33917         "adcs      r10, r10, r10\n\t"
33918         "adcs      r14, r14, r14\n\t"
33919         "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
33920         "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
33921         "adcs      r4, r4, r4\n\t"
33922         "adcs      r5, r5, r5\n\t"
33923         "adcs      r6, r6, r6\n\t"
33924         "adcs      r7, r7, r7\n\t"
33925         "adcs      r8, r8, r8\n\t"
33926         "adcs      r9, r9, r9\n\t"
33927         "adcs      r10, r10, r10\n\t"
33928         "adcs      r14, r14, r14\n\t"
33929         "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
33930         "ldr   r4, [%[m], #124]\n\t"
33931         "adc   r12, r12, #0\n\t"
33932         "subs  r4, r4, r14\n\t"
33933         "neg   r12, r12\n\t"
33934         "sbc   r4, r4, r4\n\t"
33935         "sub   %[r], %[r], #128\n\t"
33936         "orr   r12, r4\n\t"
33937         "ldm %[r], {r4, r5, r6, r7}\n\t"
33938         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33939         "and       r8, r8, r12\n\t"
33940         "and       r9, r9, r12\n\t"
33941         "and       r10, r10, r12\n\t"
33942         "and       r14, r14, r12\n\t"
33943         "subs      r4, r4, r8\n\t"
33944         "sbcs      r5, r5, r9\n\t"
33945         "sbcs      r6, r6, r10\n\t"
33946         "sbcs      r7, r7, r14\n\t"
33947         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33948         "ldm %[r], {r4, r5, r6, r7}\n\t"
33949         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33950         "and       r8, r8, r12\n\t"
33951         "and       r9, r9, r12\n\t"
33952         "and       r10, r10, r12\n\t"
33953         "and       r14, r14, r12\n\t"
33954         "sbcs      r4, r4, r8\n\t"
33955         "sbcs      r5, r5, r9\n\t"
33956         "sbcs      r6, r6, r10\n\t"
33957         "sbcs      r7, r7, r14\n\t"
33958         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33959         "ldm %[r], {r4, r5, r6, r7}\n\t"
33960         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33961         "and       r8, r8, r12\n\t"
33962         "and       r9, r9, r12\n\t"
33963         "and       r10, r10, r12\n\t"
33964         "and       r14, r14, r12\n\t"
33965         "sbcs      r4, r4, r8\n\t"
33966         "sbcs      r5, r5, r9\n\t"
33967         "sbcs      r6, r6, r10\n\t"
33968         "sbcs      r7, r7, r14\n\t"
33969         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33970         "ldm %[r], {r4, r5, r6, r7}\n\t"
33971         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33972         "and       r8, r8, r12\n\t"
33973         "and       r9, r9, r12\n\t"
33974         "and       r10, r10, r12\n\t"
33975         "and       r14, r14, r12\n\t"
33976         "sbcs      r4, r4, r8\n\t"
33977         "sbcs      r5, r5, r9\n\t"
33978         "sbcs      r6, r6, r10\n\t"
33979         "sbcs      r7, r7, r14\n\t"
33980         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33981         "ldm %[r], {r4, r5, r6, r7}\n\t"
33982         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33983         "and       r8, r8, r12\n\t"
33984         "and       r9, r9, r12\n\t"
33985         "and       r10, r10, r12\n\t"
33986         "and       r14, r14, r12\n\t"
33987         "sbcs      r4, r4, r8\n\t"
33988         "sbcs      r5, r5, r9\n\t"
33989         "sbcs      r6, r6, r10\n\t"
33990         "sbcs      r7, r7, r14\n\t"
33991         "stm %[r]!, {r4, r5, r6, r7}\n\t"
33992         "ldm %[r], {r4, r5, r6, r7}\n\t"
33993         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
33994         "and       r8, r8, r12\n\t"
33995         "and       r9, r9, r12\n\t"
33996         "and       r10, r10, r12\n\t"
33997         "and       r14, r14, r12\n\t"
33998         "sbcs      r4, r4, r8\n\t"
33999         "sbcs      r5, r5, r9\n\t"
34000         "sbcs      r6, r6, r10\n\t"
34001         "sbcs      r7, r7, r14\n\t"
34002         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34003         "ldm %[r], {r4, r5, r6, r7}\n\t"
34004         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34005         "and       r8, r8, r12\n\t"
34006         "and       r9, r9, r12\n\t"
34007         "and       r10, r10, r12\n\t"
34008         "and       r14, r14, r12\n\t"
34009         "sbcs      r4, r4, r8\n\t"
34010         "sbcs      r5, r5, r9\n\t"
34011         "sbcs      r6, r6, r10\n\t"
34012         "sbcs      r7, r7, r14\n\t"
34013         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34014         "ldm %[r], {r4, r5, r6, r7}\n\t"
34015         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34016         "and       r8, r8, r12\n\t"
34017         "and       r9, r9, r12\n\t"
34018         "and       r10, r10, r12\n\t"
34019         "and       r14, r14, r12\n\t"
34020         "sbcs      r4, r4, r8\n\t"
34021         "sbcs      r5, r5, r9\n\t"
34022         "sbcs      r6, r6, r10\n\t"
34023         "sbc       r7, r7, r14\n\t"
34024         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34025         "sub   %[r], %[r], #128\n\t"
34026         : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m)
34027         :
34028         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
34029     );
34030 }
34031 
34032 /* Triple a Montgomery form number (r = a + a + a % m).
34033  *
34034  * r   Result of Tripling.
34035  * a   Number to triple in Montgomery form.
34036  * m   Modulus (prime).
34037  */
sp_1024_mont_tpl_32(sp_digit * r,const sp_digit * a,const sp_digit * m)34038 SP_NOINLINE static void sp_1024_mont_tpl_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
34039 {
34040     __asm__ __volatile__ (
34041         "mov   r12, #0\n\t"
34042         "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
34043         "adds      r4, r4, r4\n\t"
34044         "adcs      r5, r5, r5\n\t"
34045         "adcs      r6, r6, r6\n\t"
34046         "adcs      r7, r7, r7\n\t"
34047         "adcs      r8, r8, r8\n\t"
34048         "adcs      r9, r9, r9\n\t"
34049         "adcs      r10, r10, r10\n\t"
34050         "adcs      r14, r14, r14\n\t"
34051         "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
34052         "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
34053         "adcs      r4, r4, r4\n\t"
34054         "adcs      r5, r5, r5\n\t"
34055         "adcs      r6, r6, r6\n\t"
34056         "adcs      r7, r7, r7\n\t"
34057         "adcs      r8, r8, r8\n\t"
34058         "adcs      r9, r9, r9\n\t"
34059         "adcs      r10, r10, r10\n\t"
34060         "adcs      r14, r14, r14\n\t"
34061         "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
34062         "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
34063         "adcs      r4, r4, r4\n\t"
34064         "adcs      r5, r5, r5\n\t"
34065         "adcs      r6, r6, r6\n\t"
34066         "adcs      r7, r7, r7\n\t"
34067         "adcs      r8, r8, r8\n\t"
34068         "adcs      r9, r9, r9\n\t"
34069         "adcs      r10, r10, r10\n\t"
34070         "adcs      r14, r14, r14\n\t"
34071         "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
34072         "ldm %[a]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
34073         "adcs      r4, r4, r4\n\t"
34074         "adcs      r5, r5, r5\n\t"
34075         "adcs      r6, r6, r6\n\t"
34076         "adcs      r7, r7, r7\n\t"
34077         "adcs      r8, r8, r8\n\t"
34078         "adcs      r9, r9, r9\n\t"
34079         "adcs      r10, r10, r10\n\t"
34080         "adcs      r14, r14, r14\n\t"
34081         "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r14}\n\t"
34082         "ldr   r4, [%[m], #124]\n\t"
34083         "adc   r12, r12, #0\n\t"
34084         "subs  r4, r4, r14\n\t"
34085         "neg   r12, r12\n\t"
34086         "sbc   r4, r4, r4\n\t"
34087         "sub   %[r], %[r], #128\n\t"
34088         "orr   r12, r4\n\t"
34089         "ldm %[r], {r4, r5, r6, r7}\n\t"
34090         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34091         "and       r8, r8, r12\n\t"
34092         "and       r9, r9, r12\n\t"
34093         "and       r10, r10, r12\n\t"
34094         "and       r14, r14, r12\n\t"
34095         "subs      r4, r4, r8\n\t"
34096         "sbcs      r5, r5, r9\n\t"
34097         "sbcs      r6, r6, r10\n\t"
34098         "sbcs      r7, r7, r14\n\t"
34099         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34100         "ldm %[r], {r4, r5, r6, r7}\n\t"
34101         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34102         "and       r8, r8, r12\n\t"
34103         "and       r9, r9, r12\n\t"
34104         "and       r10, r10, r12\n\t"
34105         "and       r14, r14, r12\n\t"
34106         "sbcs      r4, r4, r8\n\t"
34107         "sbcs      r5, r5, r9\n\t"
34108         "sbcs      r6, r6, r10\n\t"
34109         "sbcs      r7, r7, r14\n\t"
34110         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34111         "ldm %[r], {r4, r5, r6, r7}\n\t"
34112         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34113         "and       r8, r8, r12\n\t"
34114         "and       r9, r9, r12\n\t"
34115         "and       r10, r10, r12\n\t"
34116         "and       r14, r14, r12\n\t"
34117         "sbcs      r4, r4, r8\n\t"
34118         "sbcs      r5, r5, r9\n\t"
34119         "sbcs      r6, r6, r10\n\t"
34120         "sbcs      r7, r7, r14\n\t"
34121         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34122         "ldm %[r], {r4, r5, r6, r7}\n\t"
34123         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34124         "and       r8, r8, r12\n\t"
34125         "and       r9, r9, r12\n\t"
34126         "and       r10, r10, r12\n\t"
34127         "and       r14, r14, r12\n\t"
34128         "sbcs      r4, r4, r8\n\t"
34129         "sbcs      r5, r5, r9\n\t"
34130         "sbcs      r6, r6, r10\n\t"
34131         "sbcs      r7, r7, r14\n\t"
34132         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34133         "ldm %[r], {r4, r5, r6, r7}\n\t"
34134         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34135         "and       r8, r8, r12\n\t"
34136         "and       r9, r9, r12\n\t"
34137         "and       r10, r10, r12\n\t"
34138         "and       r14, r14, r12\n\t"
34139         "sbcs      r4, r4, r8\n\t"
34140         "sbcs      r5, r5, r9\n\t"
34141         "sbcs      r6, r6, r10\n\t"
34142         "sbcs      r7, r7, r14\n\t"
34143         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34144         "ldm %[r], {r4, r5, r6, r7}\n\t"
34145         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34146         "and       r8, r8, r12\n\t"
34147         "and       r9, r9, r12\n\t"
34148         "and       r10, r10, r12\n\t"
34149         "and       r14, r14, r12\n\t"
34150         "sbcs      r4, r4, r8\n\t"
34151         "sbcs      r5, r5, r9\n\t"
34152         "sbcs      r6, r6, r10\n\t"
34153         "sbcs      r7, r7, r14\n\t"
34154         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34155         "ldm %[r], {r4, r5, r6, r7}\n\t"
34156         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34157         "and       r8, r8, r12\n\t"
34158         "and       r9, r9, r12\n\t"
34159         "and       r10, r10, r12\n\t"
34160         "and       r14, r14, r12\n\t"
34161         "sbcs      r4, r4, r8\n\t"
34162         "sbcs      r5, r5, r9\n\t"
34163         "sbcs      r6, r6, r10\n\t"
34164         "sbcs      r7, r7, r14\n\t"
34165         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34166         "ldm %[r], {r4, r5, r6, r7}\n\t"
34167         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34168         "and       r8, r8, r12\n\t"
34169         "and       r9, r9, r12\n\t"
34170         "and       r10, r10, r12\n\t"
34171         "and       r14, r14, r12\n\t"
34172         "sbcs      r4, r4, r8\n\t"
34173         "sbcs      r5, r5, r9\n\t"
34174         "sbcs      r6, r6, r10\n\t"
34175         "sbc       r7, r7, r14\n\t"
34176         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34177         "sub   %[r], %[r], #128\n\t"
34178         "sub   %[m], %[m], #128\n\t"
34179         "sub   %[a], %[a], #128\n\t"
34180         "mov   r12, #0\n\t"
34181         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34182         "ldm %[r], {r8, r9, r10, r14}\n\t"
34183         "adds      r8, r8, r4\n\t"
34184         "adcs      r9, r9, r5\n\t"
34185         "adcs      r10, r10, r6\n\t"
34186         "adcs      r14, r14, r7\n\t"
34187         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34188         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34189         "ldm %[r], {r8, r9, r10, r14}\n\t"
34190         "adcs      r8, r8, r4\n\t"
34191         "adcs      r9, r9, r5\n\t"
34192         "adcs      r10, r10, r6\n\t"
34193         "adcs      r14, r14, r7\n\t"
34194         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34195         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34196         "ldm %[r], {r8, r9, r10, r14}\n\t"
34197         "adcs      r8, r8, r4\n\t"
34198         "adcs      r9, r9, r5\n\t"
34199         "adcs      r10, r10, r6\n\t"
34200         "adcs      r14, r14, r7\n\t"
34201         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34202         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34203         "ldm %[r], {r8, r9, r10, r14}\n\t"
34204         "adcs      r8, r8, r4\n\t"
34205         "adcs      r9, r9, r5\n\t"
34206         "adcs      r10, r10, r6\n\t"
34207         "adcs      r14, r14, r7\n\t"
34208         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34209         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34210         "ldm %[r], {r8, r9, r10, r14}\n\t"
34211         "adcs      r8, r8, r4\n\t"
34212         "adcs      r9, r9, r5\n\t"
34213         "adcs      r10, r10, r6\n\t"
34214         "adcs      r14, r14, r7\n\t"
34215         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34216         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34217         "ldm %[r], {r8, r9, r10, r14}\n\t"
34218         "adcs      r8, r8, r4\n\t"
34219         "adcs      r9, r9, r5\n\t"
34220         "adcs      r10, r10, r6\n\t"
34221         "adcs      r14, r14, r7\n\t"
34222         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34223         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34224         "ldm %[r], {r8, r9, r10, r14}\n\t"
34225         "adcs      r8, r8, r4\n\t"
34226         "adcs      r9, r9, r5\n\t"
34227         "adcs      r10, r10, r6\n\t"
34228         "adcs      r14, r14, r7\n\t"
34229         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34230         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34231         "ldm %[r], {r8, r9, r10, r14}\n\t"
34232         "adcs      r8, r8, r4\n\t"
34233         "adcs      r9, r9, r5\n\t"
34234         "adcs      r10, r10, r6\n\t"
34235         "adcs      r14, r14, r7\n\t"
34236         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34237         "ldr   r7, [%[m], #124]\n\t"
34238         "adc   r12, r12, #0\n\t"
34239         "subs  r7, r7, r14\n\t"
34240         "neg   r12, r12\n\t"
34241         "sbc   r7, r7, r7\n\t"
34242         "sub   %[r], %[r], #128\n\t"
34243         "orr   r12, r7\n\t"
34244         "ldm %[r], {r8, r9, r10, r14}\n\t"
34245         "ldm %[m]!, {r4, r5, r6, r7}\n\t"
34246         "and       r4, r4, r12\n\t"
34247         "and       r5, r5, r12\n\t"
34248         "and       r6, r6, r12\n\t"
34249         "and       r7, r7, r12\n\t"
34250         "subs      r8, r8, r4\n\t"
34251         "sbcs      r9, r9, r5\n\t"
34252         "sbcs      r10, r10, r6\n\t"
34253         "sbcs      r14, r14, r7\n\t"
34254         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34255         "ldm %[r], {r8, r9, r10, r14}\n\t"
34256         "ldm %[m]!, {r4, r5, r6, r7}\n\t"
34257         "and       r4, r4, r12\n\t"
34258         "and       r5, r5, r12\n\t"
34259         "and       r6, r6, r12\n\t"
34260         "and       r7, r7, r12\n\t"
34261         "sbcs      r8, r8, r4\n\t"
34262         "sbcs      r9, r9, r5\n\t"
34263         "sbcs      r10, r10, r6\n\t"
34264         "sbcs      r14, r14, r7\n\t"
34265         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34266         "ldm %[r], {r8, r9, r10, r14}\n\t"
34267         "ldm %[m]!, {r4, r5, r6, r7}\n\t"
34268         "and       r4, r4, r12\n\t"
34269         "and       r5, r5, r12\n\t"
34270         "and       r6, r6, r12\n\t"
34271         "and       r7, r7, r12\n\t"
34272         "sbcs      r8, r8, r4\n\t"
34273         "sbcs      r9, r9, r5\n\t"
34274         "sbcs      r10, r10, r6\n\t"
34275         "sbcs      r14, r14, r7\n\t"
34276         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34277         "ldm %[r], {r8, r9, r10, r14}\n\t"
34278         "ldm %[m]!, {r4, r5, r6, r7}\n\t"
34279         "and       r4, r4, r12\n\t"
34280         "and       r5, r5, r12\n\t"
34281         "and       r6, r6, r12\n\t"
34282         "and       r7, r7, r12\n\t"
34283         "sbcs      r8, r8, r4\n\t"
34284         "sbcs      r9, r9, r5\n\t"
34285         "sbcs      r10, r10, r6\n\t"
34286         "sbcs      r14, r14, r7\n\t"
34287         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34288         "ldm %[r], {r8, r9, r10, r14}\n\t"
34289         "ldm %[m]!, {r4, r5, r6, r7}\n\t"
34290         "and       r4, r4, r12\n\t"
34291         "and       r5, r5, r12\n\t"
34292         "and       r6, r6, r12\n\t"
34293         "and       r7, r7, r12\n\t"
34294         "sbcs      r8, r8, r4\n\t"
34295         "sbcs      r9, r9, r5\n\t"
34296         "sbcs      r10, r10, r6\n\t"
34297         "sbcs      r14, r14, r7\n\t"
34298         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34299         "ldm %[r], {r8, r9, r10, r14}\n\t"
34300         "ldm %[m]!, {r4, r5, r6, r7}\n\t"
34301         "and       r4, r4, r12\n\t"
34302         "and       r5, r5, r12\n\t"
34303         "and       r6, r6, r12\n\t"
34304         "and       r7, r7, r12\n\t"
34305         "sbcs      r8, r8, r4\n\t"
34306         "sbcs      r9, r9, r5\n\t"
34307         "sbcs      r10, r10, r6\n\t"
34308         "sbcs      r14, r14, r7\n\t"
34309         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34310         "ldm %[r], {r8, r9, r10, r14}\n\t"
34311         "ldm %[m]!, {r4, r5, r6, r7}\n\t"
34312         "and       r4, r4, r12\n\t"
34313         "and       r5, r5, r12\n\t"
34314         "and       r6, r6, r12\n\t"
34315         "and       r7, r7, r12\n\t"
34316         "sbcs      r8, r8, r4\n\t"
34317         "sbcs      r9, r9, r5\n\t"
34318         "sbcs      r10, r10, r6\n\t"
34319         "sbcs      r14, r14, r7\n\t"
34320         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34321         "ldm %[r], {r8, r9, r10, r14}\n\t"
34322         "ldm %[m]!, {r4, r5, r6, r7}\n\t"
34323         "and       r4, r4, r12\n\t"
34324         "and       r5, r5, r12\n\t"
34325         "and       r6, r6, r12\n\t"
34326         "and       r7, r7, r12\n\t"
34327         "sbcs      r8, r8, r4\n\t"
34328         "sbcs      r9, r9, r5\n\t"
34329         "sbcs      r10, r10, r6\n\t"
34330         "sbc       r14, r14, r7\n\t"
34331         "stm %[r]!, {r8, r9, r10, r14}\n\t"
34332         "sub   %[r], %[r], #128\n\t"
34333         : [r] "+r" (r), [a] "+r" (a), [m] "+r" (m)
34334         :
34335         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
34336     );
34337 }
34338 
34339 /* Subtract two Montgomery form numbers (r = a - b % m).
34340  *
34341  * r   Result of subtration.
34342  * a   Number to subtract from in Montgomery form.
34343  * b   Number to subtract with in Montgomery form.
34344  * m   Modulus (prime).
34345  */
sp_1024_mont_sub_32(sp_digit * r,const sp_digit * a,const sp_digit * b,const sp_digit * m)34346 SP_NOINLINE static void sp_1024_mont_sub_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
34347         const sp_digit* m)
34348 {
34349     __asm__ __volatile__ (
34350         "mov   r12, #0\n\t"
34351         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34352         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
34353         "subs      r4, r4, r8\n\t"
34354         "sbcs      r5, r5, r9\n\t"
34355         "sbcs      r6, r6, r10\n\t"
34356         "sbcs      r7, r7, r14\n\t"
34357         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34358         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34359         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
34360         "sbcs      r4, r4, r8\n\t"
34361         "sbcs      r5, r5, r9\n\t"
34362         "sbcs      r6, r6, r10\n\t"
34363         "sbcs      r7, r7, r14\n\t"
34364         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34365         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34366         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
34367         "sbcs      r4, r4, r8\n\t"
34368         "sbcs      r5, r5, r9\n\t"
34369         "sbcs      r6, r6, r10\n\t"
34370         "sbcs      r7, r7, r14\n\t"
34371         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34372         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34373         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
34374         "sbcs      r4, r4, r8\n\t"
34375         "sbcs      r5, r5, r9\n\t"
34376         "sbcs      r6, r6, r10\n\t"
34377         "sbcs      r7, r7, r14\n\t"
34378         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34379         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34380         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
34381         "sbcs      r4, r4, r8\n\t"
34382         "sbcs      r5, r5, r9\n\t"
34383         "sbcs      r6, r6, r10\n\t"
34384         "sbcs      r7, r7, r14\n\t"
34385         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34386         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34387         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
34388         "sbcs      r4, r4, r8\n\t"
34389         "sbcs      r5, r5, r9\n\t"
34390         "sbcs      r6, r6, r10\n\t"
34391         "sbcs      r7, r7, r14\n\t"
34392         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34393         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34394         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
34395         "sbcs      r4, r4, r8\n\t"
34396         "sbcs      r5, r5, r9\n\t"
34397         "sbcs      r6, r6, r10\n\t"
34398         "sbcs      r7, r7, r14\n\t"
34399         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34400         "ldm %[a]!, {r4, r5, r6, r7}\n\t"
34401         "ldm %[b]!, {r8, r9, r10, r14}\n\t"
34402         "sbcs      r4, r4, r8\n\t"
34403         "sbcs      r5, r5, r9\n\t"
34404         "sbcs      r6, r6, r10\n\t"
34405         "sbcs      r7, r7, r14\n\t"
34406         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34407         "sbc   r12, r12, r12\n\t"
34408         "sub   %[r], %[r], #128\n\t"
34409         "ldm %[r], {r4, r5, r6, r7}\n\t"
34410         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34411         "and       r8, r8, r12\n\t"
34412         "and       r9, r9, r12\n\t"
34413         "and       r10, r10, r12\n\t"
34414         "and       r14, r14, r12\n\t"
34415         "adds      r4, r4, r8\n\t"
34416         "adcs      r5, r5, r9\n\t"
34417         "adcs      r6, r6, r10\n\t"
34418         "adcs      r7, r7, r14\n\t"
34419         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34420         "ldm %[r], {r4, r5, r6, r7}\n\t"
34421         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34422         "and       r8, r8, r12\n\t"
34423         "and       r9, r9, r12\n\t"
34424         "and       r10, r10, r12\n\t"
34425         "and       r14, r14, r12\n\t"
34426         "adcs      r4, r4, r8\n\t"
34427         "adcs      r5, r5, r9\n\t"
34428         "adcs      r6, r6, r10\n\t"
34429         "adcs      r7, r7, r14\n\t"
34430         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34431         "ldm %[r], {r4, r5, r6, r7}\n\t"
34432         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34433         "and       r8, r8, r12\n\t"
34434         "and       r9, r9, r12\n\t"
34435         "and       r10, r10, r12\n\t"
34436         "and       r14, r14, r12\n\t"
34437         "adcs      r4, r4, r8\n\t"
34438         "adcs      r5, r5, r9\n\t"
34439         "adcs      r6, r6, r10\n\t"
34440         "adcs      r7, r7, r14\n\t"
34441         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34442         "ldm %[r], {r4, r5, r6, r7}\n\t"
34443         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34444         "and       r8, r8, r12\n\t"
34445         "and       r9, r9, r12\n\t"
34446         "and       r10, r10, r12\n\t"
34447         "and       r14, r14, r12\n\t"
34448         "adcs      r4, r4, r8\n\t"
34449         "adcs      r5, r5, r9\n\t"
34450         "adcs      r6, r6, r10\n\t"
34451         "adcs      r7, r7, r14\n\t"
34452         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34453         "ldm %[r], {r4, r5, r6, r7}\n\t"
34454         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34455         "and       r8, r8, r12\n\t"
34456         "and       r9, r9, r12\n\t"
34457         "and       r10, r10, r12\n\t"
34458         "and       r14, r14, r12\n\t"
34459         "adcs      r4, r4, r8\n\t"
34460         "adcs      r5, r5, r9\n\t"
34461         "adcs      r6, r6, r10\n\t"
34462         "adcs      r7, r7, r14\n\t"
34463         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34464         "ldm %[r], {r4, r5, r6, r7}\n\t"
34465         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34466         "and       r8, r8, r12\n\t"
34467         "and       r9, r9, r12\n\t"
34468         "and       r10, r10, r12\n\t"
34469         "and       r14, r14, r12\n\t"
34470         "adcs      r4, r4, r8\n\t"
34471         "adcs      r5, r5, r9\n\t"
34472         "adcs      r6, r6, r10\n\t"
34473         "adcs      r7, r7, r14\n\t"
34474         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34475         "ldm %[r], {r4, r5, r6, r7}\n\t"
34476         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34477         "and       r8, r8, r12\n\t"
34478         "and       r9, r9, r12\n\t"
34479         "and       r10, r10, r12\n\t"
34480         "and       r14, r14, r12\n\t"
34481         "adcs      r4, r4, r8\n\t"
34482         "adcs      r5, r5, r9\n\t"
34483         "adcs      r6, r6, r10\n\t"
34484         "adcs      r7, r7, r14\n\t"
34485         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34486         "ldm %[r], {r4, r5, r6, r7}\n\t"
34487         "ldm %[m]!, {r8, r9, r10, r14}\n\t"
34488         "and       r8, r8, r12\n\t"
34489         "and       r9, r9, r12\n\t"
34490         "and       r10, r10, r12\n\t"
34491         "and       r14, r14, r12\n\t"
34492         "adcs      r4, r4, r8\n\t"
34493         "adcs      r5, r5, r9\n\t"
34494         "adcs      r6, r6, r10\n\t"
34495         "adc       r7, r7, r14\n\t"
34496         "stm %[r]!, {r4, r5, r6, r7}\n\t"
34497         "sub   %[r], %[r], #128\n\t"
34498         : [r] "+r" (r), [a] "+r" (a), [b] "+r" (b), [m] "+r" (m)
34499         :
34500         : "memory", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r14", "r12"
34501     );
34502 }
34503 
34504 /* Conditionally add a and b using the mask m.
34505  * m is -1 to add and 0 when not.
34506  *
34507  * r  A single precision number representing conditional add result.
34508  * a  A single precision number to add with.
34509  * b  A single precision number to add.
34510  * m  Mask value to apply.
34511  */
sp_1024_cond_add_32(sp_digit * r,const sp_digit * a,const sp_digit * b,sp_digit m)34512 SP_NOINLINE static sp_digit sp_1024_cond_add_32(sp_digit* r, const sp_digit* a, const sp_digit* b,
34513         sp_digit m)
34514 {
34515     sp_digit c = 0;
34516 
34517     __asm__ __volatile__ (
34518         "mov	r5, #128\n\t"
34519         "mov	r9, r5\n\t"
34520         "mov	r8, #0\n\t"
34521         "\n1:\n\t"
34522         "ldr	r6, [%[b], r8]\n\t"
34523         "and	r6, r6, %[m]\n\t"
34524         "adds	r5, %[c], #-1\n\t"
34525         "ldr	r5, [%[a], r8]\n\t"
34526         "adcs	r5, r5, r6\n\t"
34527         "mov	%[c], #0\n\t"
34528         "adcs	%[c], %[c], %[c]\n\t"
34529         "str	r5, [%[r], r8]\n\t"
34530         "add	r8, r8, #4\n\t"
34531         "cmp	r8, r9\n\t"
34532 #ifdef __GNUC__
34533         "blt	1b\n\t"
34534 #else
34535         "blt.n	1b\n\t"
34536 #endif /* __GNUC__ */
34537         : [c] "+r" (c)
34538         : [r] "r" (r), [a] "r" (a), [b] "r" (b), [m] "r" (m)
34539         : "memory", "r5", "r6", "r8", "r9"
34540     );
34541 
34542     return c;
34543 }
34544 
sp_1024_rshift1_32(sp_digit * r,sp_digit * a)34545 static void sp_1024_rshift1_32(sp_digit* r, sp_digit* a)
34546 {
34547     __asm__ __volatile__ (
34548         "ldr	r2, [%[a]]\n\t"
34549         "ldr	r3, [%[a], #4]\n\t"
34550         "lsr	r2, r2, #1\n\t"
34551         "orr	r2, r2, r3, lsl #31\n\t"
34552         "lsr	r3, r3, #1\n\t"
34553         "ldr	r4, [%[a], #8]\n\t"
34554         "str	r2, [%[r], #0]\n\t"
34555         "orr	r3, r3, r4, lsl #31\n\t"
34556         "lsr	r4, r4, #1\n\t"
34557         "ldr	r2, [%[a], #12]\n\t"
34558         "str	r3, [%[r], #4]\n\t"
34559         "orr	r4, r4, r2, lsl #31\n\t"
34560         "lsr	r2, r2, #1\n\t"
34561         "ldr	r3, [%[a], #16]\n\t"
34562         "str	r4, [%[r], #8]\n\t"
34563         "orr	r2, r2, r3, lsl #31\n\t"
34564         "lsr	r3, r3, #1\n\t"
34565         "ldr	r4, [%[a], #20]\n\t"
34566         "str	r2, [%[r], #12]\n\t"
34567         "orr	r3, r3, r4, lsl #31\n\t"
34568         "lsr	r4, r4, #1\n\t"
34569         "ldr	r2, [%[a], #24]\n\t"
34570         "str	r3, [%[r], #16]\n\t"
34571         "orr	r4, r4, r2, lsl #31\n\t"
34572         "lsr	r2, r2, #1\n\t"
34573         "ldr	r3, [%[a], #28]\n\t"
34574         "str	r4, [%[r], #20]\n\t"
34575         "orr	r2, r2, r3, lsl #31\n\t"
34576         "lsr	r3, r3, #1\n\t"
34577         "ldr	r4, [%[a], #32]\n\t"
34578         "str	r2, [%[r], #24]\n\t"
34579         "orr	r3, r3, r4, lsl #31\n\t"
34580         "lsr	r4, r4, #1\n\t"
34581         "ldr	r2, [%[a], #36]\n\t"
34582         "str	r3, [%[r], #28]\n\t"
34583         "orr	r4, r4, r2, lsl #31\n\t"
34584         "lsr	r2, r2, #1\n\t"
34585         "ldr	r3, [%[a], #40]\n\t"
34586         "str	r4, [%[r], #32]\n\t"
34587         "orr	r2, r2, r3, lsl #31\n\t"
34588         "lsr	r3, r3, #1\n\t"
34589         "ldr	r4, [%[a], #44]\n\t"
34590         "str	r2, [%[r], #36]\n\t"
34591         "orr	r3, r3, r4, lsl #31\n\t"
34592         "lsr	r4, r4, #1\n\t"
34593         "ldr	r2, [%[a], #48]\n\t"
34594         "str	r3, [%[r], #40]\n\t"
34595         "orr	r4, r4, r2, lsl #31\n\t"
34596         "lsr	r2, r2, #1\n\t"
34597         "ldr	r3, [%[a], #52]\n\t"
34598         "str	r4, [%[r], #44]\n\t"
34599         "orr	r2, r2, r3, lsl #31\n\t"
34600         "lsr	r3, r3, #1\n\t"
34601         "ldr	r4, [%[a], #56]\n\t"
34602         "str	r2, [%[r], #48]\n\t"
34603         "orr	r3, r3, r4, lsl #31\n\t"
34604         "lsr	r4, r4, #1\n\t"
34605         "ldr	r2, [%[a], #60]\n\t"
34606         "str	r3, [%[r], #52]\n\t"
34607         "orr	r4, r4, r2, lsl #31\n\t"
34608         "lsr	r2, r2, #1\n\t"
34609         "ldr	r3, [%[a], #64]\n\t"
34610         "str	r4, [%[r], #56]\n\t"
34611         "orr	r2, r2, r3, lsl #31\n\t"
34612         "lsr	r3, r3, #1\n\t"
34613         "ldr	r4, [%[a], #68]\n\t"
34614         "str	r2, [%[r], #60]\n\t"
34615         "orr	r3, r3, r4, lsl #31\n\t"
34616         "lsr	r4, r4, #1\n\t"
34617         "ldr	r2, [%[a], #72]\n\t"
34618         "str	r3, [%[r], #64]\n\t"
34619         "orr	r4, r4, r2, lsl #31\n\t"
34620         "lsr	r2, r2, #1\n\t"
34621         "ldr	r3, [%[a], #76]\n\t"
34622         "str	r4, [%[r], #68]\n\t"
34623         "orr	r2, r2, r3, lsl #31\n\t"
34624         "lsr	r3, r3, #1\n\t"
34625         "ldr	r4, [%[a], #80]\n\t"
34626         "str	r2, [%[r], #72]\n\t"
34627         "orr	r3, r3, r4, lsl #31\n\t"
34628         "lsr	r4, r4, #1\n\t"
34629         "ldr	r2, [%[a], #84]\n\t"
34630         "str	r3, [%[r], #76]\n\t"
34631         "orr	r4, r4, r2, lsl #31\n\t"
34632         "lsr	r2, r2, #1\n\t"
34633         "ldr	r3, [%[a], #88]\n\t"
34634         "str	r4, [%[r], #80]\n\t"
34635         "orr	r2, r2, r3, lsl #31\n\t"
34636         "lsr	r3, r3, #1\n\t"
34637         "ldr	r4, [%[a], #92]\n\t"
34638         "str	r2, [%[r], #84]\n\t"
34639         "orr	r3, r3, r4, lsl #31\n\t"
34640         "lsr	r4, r4, #1\n\t"
34641         "ldr	r2, [%[a], #96]\n\t"
34642         "str	r3, [%[r], #88]\n\t"
34643         "orr	r4, r4, r2, lsl #31\n\t"
34644         "lsr	r2, r2, #1\n\t"
34645         "ldr	r3, [%[a], #100]\n\t"
34646         "str	r4, [%[r], #92]\n\t"
34647         "orr	r2, r2, r3, lsl #31\n\t"
34648         "lsr	r3, r3, #1\n\t"
34649         "ldr	r4, [%[a], #104]\n\t"
34650         "str	r2, [%[r], #96]\n\t"
34651         "orr	r3, r3, r4, lsl #31\n\t"
34652         "lsr	r4, r4, #1\n\t"
34653         "ldr	r2, [%[a], #108]\n\t"
34654         "str	r3, [%[r], #100]\n\t"
34655         "orr	r4, r4, r2, lsl #31\n\t"
34656         "lsr	r2, r2, #1\n\t"
34657         "ldr	r3, [%[a], #112]\n\t"
34658         "str	r4, [%[r], #104]\n\t"
34659         "orr	r2, r2, r3, lsl #31\n\t"
34660         "lsr	r3, r3, #1\n\t"
34661         "ldr	r4, [%[a], #116]\n\t"
34662         "str	r2, [%[r], #108]\n\t"
34663         "orr	r3, r3, r4, lsl #31\n\t"
34664         "lsr	r4, r4, #1\n\t"
34665         "ldr	r2, [%[a], #120]\n\t"
34666         "str	r3, [%[r], #112]\n\t"
34667         "orr	r4, r4, r2, lsl #31\n\t"
34668         "lsr	r2, r2, #1\n\t"
34669         "ldr	r3, [%[a], #124]\n\t"
34670         "str	r4, [%[r], #116]\n\t"
34671         "orr	r2, r2, r3, lsl #31\n\t"
34672         "lsr	r3, r3, #1\n\t"
34673         "str	r2, [%[r], #120]\n\t"
34674         "str	r3, [%[r], #124]\n\t"
34675         :
34676         : [r] "r" (r), [a] "r" (a)
34677         : "memory", "r2", "r3", "r4"
34678     );
34679 }
34680 
34681 /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
34682  *
34683  * r  Result of division by 2.
34684  * a  Number to divide.
34685  * m  Modulus (prime).
34686  */
sp_1024_div2_32(sp_digit * r,const sp_digit * a,const sp_digit * m)34687 SP_NOINLINE static void sp_1024_div2_32(sp_digit* r, const sp_digit* a, const sp_digit* m)
34688 {
34689     sp_digit o;
34690 
34691     o = sp_1024_cond_add_32(r, a, m, 0 - (a[0] & 1));
34692     sp_1024_rshift1_32(r, r);
34693     r[31] |= o << 31;
34694 }
34695 
34696 /* Double the Montgomery form projective point p.
34697  *
34698  * r  Result of doubling point.
34699  * p  Point to double.
34700  * t  Temporary ordinate data.
34701  */
34702 #ifdef WOLFSSL_SP_NONBLOCK
34703 typedef struct sp_1024_proj_point_dbl_32_ctx {
34704     int state;
34705     sp_digit* t1;
34706     sp_digit* t2;
34707     sp_digit* x;
34708     sp_digit* y;
34709     sp_digit* z;
34710 } sp_1024_proj_point_dbl_32_ctx;
34711 
sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t * sp_ctx,sp_point_1024 * r,const sp_point_1024 * p,sp_digit * t)34712 static int sp_1024_proj_point_dbl_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, const sp_point_1024* p, sp_digit* t)
34713 {
34714     int err = FP_WOULDBLOCK;
34715     sp_1024_proj_point_dbl_32_ctx* ctx = (sp_1024_proj_point_dbl_32_ctx*)sp_ctx->data;
34716 
34717     typedef char ctx_size_test[sizeof(sp_1024_proj_point_dbl_32_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
34718     (void)sizeof(ctx_size_test);
34719 
34720     switch (ctx->state) {
34721     case 0:
34722         ctx->t1 = t;
34723         ctx->t2 = t + 2*32;
34724         ctx->x = r->x;
34725         ctx->y = r->y;
34726         ctx->z = r->z;
34727 
34728         /* Put infinity into result. */
34729         if (r != p) {
34730             r->infinity = p->infinity;
34731         }
34732         ctx->state = 1;
34733         break;
34734     case 1:
34735         /* T1 = Z * Z */
34736         sp_1024_mont_sqr_32(ctx->t1, p->z, p1024_mod, p1024_mp_mod);
34737         ctx->state = 2;
34738         break;
34739     case 2:
34740         /* Z = Y * Z */
34741         sp_1024_mont_mul_32(ctx->z, p->y, p->z, p1024_mod, p1024_mp_mod);
34742         ctx->state = 3;
34743         break;
34744     case 3:
34745         /* Z = 2Z */
34746         sp_1024_mont_dbl_32(ctx->z, ctx->z, p1024_mod);
34747         ctx->state = 4;
34748         break;
34749     case 4:
34750         /* T2 = X - T1 */
34751         sp_1024_mont_sub_32(ctx->t2, p->x, ctx->t1, p1024_mod);
34752         ctx->state = 5;
34753         break;
34754     case 5:
34755         /* T1 = X + T1 */
34756         sp_1024_mont_add_32(ctx->t1, p->x, ctx->t1, p1024_mod);
34757         ctx->state = 6;
34758         break;
34759     case 6:
34760         /* T2 = T1 * T2 */
34761         sp_1024_mont_mul_32(ctx->t2, ctx->t1, ctx->t2, p1024_mod, p1024_mp_mod);
34762         ctx->state = 7;
34763         break;
34764     case 7:
34765         /* T1 = 3T2 */
34766         sp_1024_mont_tpl_32(ctx->t1, ctx->t2, p1024_mod);
34767         ctx->state = 8;
34768         break;
34769     case 8:
34770         /* Y = 2Y */
34771         sp_1024_mont_dbl_32(ctx->y, p->y, p1024_mod);
34772         ctx->state = 9;
34773         break;
34774     case 9:
34775         /* Y = Y * Y */
34776         sp_1024_mont_sqr_32(ctx->y, ctx->y, p1024_mod, p1024_mp_mod);
34777         ctx->state = 10;
34778         break;
34779     case 10:
34780         /* T2 = Y * Y */
34781         sp_1024_mont_sqr_32(ctx->t2, ctx->y, p1024_mod, p1024_mp_mod);
34782         ctx->state = 11;
34783         break;
34784     case 11:
34785         /* T2 = T2/2 */
34786         sp_1024_div2_32(ctx->t2, ctx->t2, p1024_mod);
34787         ctx->state = 12;
34788         break;
34789     case 12:
34790         /* Y = Y * X */
34791         sp_1024_mont_mul_32(ctx->y, ctx->y, p->x, p1024_mod, p1024_mp_mod);
34792         ctx->state = 13;
34793         break;
34794     case 13:
34795         /* X = T1 * T1 */
34796         sp_1024_mont_sqr_32(ctx->x, ctx->t1, p1024_mod, p1024_mp_mod);
34797         ctx->state = 14;
34798         break;
34799     case 14:
34800         /* X = X - Y */
34801         sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->y, p1024_mod);
34802         ctx->state = 15;
34803         break;
34804     case 15:
34805         /* X = X - Y */
34806         sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->y, p1024_mod);
34807         ctx->state = 16;
34808         break;
34809     case 16:
34810         /* Y = Y - X */
34811         sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->x, p1024_mod);
34812         ctx->state = 17;
34813         break;
34814     case 17:
34815         /* Y = Y * T1 */
34816         sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t1, p1024_mod, p1024_mp_mod);
34817         ctx->state = 18;
34818         break;
34819     case 18:
34820         /* Y = Y - T2 */
34821         sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t2, p1024_mod);
34822         ctx->state = 19;
34823         /* fall-through */
34824     case 19:
34825         err = MP_OKAY;
34826         break;
34827     }
34828 
34829     if (err == MP_OKAY && ctx->state != 19) {
34830         err = FP_WOULDBLOCK;
34831     }
34832 
34833     return err;
34834 }
34835 #endif /* WOLFSSL_SP_NONBLOCK */
34836 
sp_1024_proj_point_dbl_32(sp_point_1024 * r,const sp_point_1024 * p,sp_digit * t)34837 static void sp_1024_proj_point_dbl_32(sp_point_1024* r, const sp_point_1024* p, sp_digit* t)
34838 {
34839     sp_digit* t1 = t;
34840     sp_digit* t2 = t + 2*32;
34841     sp_digit* x;
34842     sp_digit* y;
34843     sp_digit* z;
34844 
34845     x = r->x;
34846     y = r->y;
34847     z = r->z;
34848     /* Put infinity into result. */
34849     if (r != p) {
34850         r->infinity = p->infinity;
34851     }
34852 
34853     /* T1 = Z * Z */
34854     sp_1024_mont_sqr_32(t1, p->z, p1024_mod, p1024_mp_mod);
34855     /* Z = Y * Z */
34856     sp_1024_mont_mul_32(z, p->y, p->z, p1024_mod, p1024_mp_mod);
34857     /* Z = 2Z */
34858     sp_1024_mont_dbl_32(z, z, p1024_mod);
34859     /* T2 = X - T1 */
34860     sp_1024_mont_sub_32(t2, p->x, t1, p1024_mod);
34861     /* T1 = X + T1 */
34862     sp_1024_mont_add_32(t1, p->x, t1, p1024_mod);
34863     /* T2 = T1 * T2 */
34864     sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod);
34865     /* T1 = 3T2 */
34866     sp_1024_mont_tpl_32(t1, t2, p1024_mod);
34867     /* Y = 2Y */
34868     sp_1024_mont_dbl_32(y, p->y, p1024_mod);
34869     /* Y = Y * Y */
34870     sp_1024_mont_sqr_32(y, y, p1024_mod, p1024_mp_mod);
34871     /* T2 = Y * Y */
34872     sp_1024_mont_sqr_32(t2, y, p1024_mod, p1024_mp_mod);
34873     /* T2 = T2/2 */
34874     sp_1024_div2_32(t2, t2, p1024_mod);
34875     /* Y = Y * X */
34876     sp_1024_mont_mul_32(y, y, p->x, p1024_mod, p1024_mp_mod);
34877     /* X = T1 * T1 */
34878     sp_1024_mont_sqr_32(x, t1, p1024_mod, p1024_mp_mod);
34879     /* X = X - Y */
34880     sp_1024_mont_sub_32(x, x, y, p1024_mod);
34881     /* X = X - Y */
34882     sp_1024_mont_sub_32(x, x, y, p1024_mod);
34883     /* Y = Y - X */
34884     sp_1024_mont_sub_32(y, y, x, p1024_mod);
34885     /* Y = Y * T1 */
34886     sp_1024_mont_mul_32(y, y, t1, p1024_mod, p1024_mp_mod);
34887     /* Y = Y - T2 */
34888     sp_1024_mont_sub_32(y, y, t2, p1024_mod);
34889 }
34890 
34891 #ifdef WOLFSSL_SP_SMALL
34892 /* Sub b from a into r. (r = a - b)
34893  *
34894  * r  A single precision integer.
34895  * a  A single precision integer.
34896  * b  A single precision integer.
34897  */
sp_1024_sub_32(sp_digit * r,const sp_digit * a,const sp_digit * b)34898 SP_NOINLINE static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a,
34899         const sp_digit* b)
34900 {
34901     sp_digit c = 0;
34902 
34903     __asm__ __volatile__ (
34904         "mov	r6, %[a]\n\t"
34905         "add	r6, r6, #128\n\t"
34906         "\n1:\n\t"
34907         "mov	r5, #0\n\t"
34908         "subs	r5, r5, %[c]\n\t"
34909         "ldr	r4, [%[a]]\n\t"
34910         "ldr	r5, [%[b]]\n\t"
34911         "sbcs	r4, r4, r5\n\t"
34912         "str	r4, [%[r]]\n\t"
34913         "sbc	%[c], %[c], %[c]\n\t"
34914         "add	%[a], %[a], #4\n\t"
34915         "add	%[b], %[b], #4\n\t"
34916         "add	%[r], %[r], #4\n\t"
34917         "cmp	%[a], r6\n\t"
34918 #ifdef __GNUC__
34919         "bne	1b\n\t"
34920 #else
34921         "bne.n	1b\n\t"
34922 #endif /* __GNUC__ */
34923         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
34924         :
34925         : "memory", "r4", "r5", "r6"
34926     );
34927 
34928     return c;
34929 }
34930 
34931 #else
34932 /* Sub b from a into r. (r = a - b)
34933  *
34934  * r  A single precision integer.
34935  * a  A single precision integer.
34936  * b  A single precision integer.
34937  */
sp_1024_sub_32(sp_digit * r,const sp_digit * a,const sp_digit * b)34938 SP_NOINLINE static sp_digit sp_1024_sub_32(sp_digit* r, const sp_digit* a,
34939         const sp_digit* b)
34940 {
34941     sp_digit c = 0;
34942 
34943     __asm__ __volatile__ (
34944         "ldr	r4, [%[a], #0]\n\t"
34945         "ldr	r5, [%[a], #4]\n\t"
34946         "ldr	r6, [%[b], #0]\n\t"
34947         "ldr	r8, [%[b], #4]\n\t"
34948         "subs	r4, r4, r6\n\t"
34949         "sbcs	r5, r5, r8\n\t"
34950         "str	r4, [%[r], #0]\n\t"
34951         "str	r5, [%[r], #4]\n\t"
34952         "ldr	r4, [%[a], #8]\n\t"
34953         "ldr	r5, [%[a], #12]\n\t"
34954         "ldr	r6, [%[b], #8]\n\t"
34955         "ldr	r8, [%[b], #12]\n\t"
34956         "sbcs	r4, r4, r6\n\t"
34957         "sbcs	r5, r5, r8\n\t"
34958         "str	r4, [%[r], #8]\n\t"
34959         "str	r5, [%[r], #12]\n\t"
34960         "ldr	r4, [%[a], #16]\n\t"
34961         "ldr	r5, [%[a], #20]\n\t"
34962         "ldr	r6, [%[b], #16]\n\t"
34963         "ldr	r8, [%[b], #20]\n\t"
34964         "sbcs	r4, r4, r6\n\t"
34965         "sbcs	r5, r5, r8\n\t"
34966         "str	r4, [%[r], #16]\n\t"
34967         "str	r5, [%[r], #20]\n\t"
34968         "ldr	r4, [%[a], #24]\n\t"
34969         "ldr	r5, [%[a], #28]\n\t"
34970         "ldr	r6, [%[b], #24]\n\t"
34971         "ldr	r8, [%[b], #28]\n\t"
34972         "sbcs	r4, r4, r6\n\t"
34973         "sbcs	r5, r5, r8\n\t"
34974         "str	r4, [%[r], #24]\n\t"
34975         "str	r5, [%[r], #28]\n\t"
34976         "ldr	r4, [%[a], #32]\n\t"
34977         "ldr	r5, [%[a], #36]\n\t"
34978         "ldr	r6, [%[b], #32]\n\t"
34979         "ldr	r8, [%[b], #36]\n\t"
34980         "sbcs	r4, r4, r6\n\t"
34981         "sbcs	r5, r5, r8\n\t"
34982         "str	r4, [%[r], #32]\n\t"
34983         "str	r5, [%[r], #36]\n\t"
34984         "ldr	r4, [%[a], #40]\n\t"
34985         "ldr	r5, [%[a], #44]\n\t"
34986         "ldr	r6, [%[b], #40]\n\t"
34987         "ldr	r8, [%[b], #44]\n\t"
34988         "sbcs	r4, r4, r6\n\t"
34989         "sbcs	r5, r5, r8\n\t"
34990         "str	r4, [%[r], #40]\n\t"
34991         "str	r5, [%[r], #44]\n\t"
34992         "ldr	r4, [%[a], #48]\n\t"
34993         "ldr	r5, [%[a], #52]\n\t"
34994         "ldr	r6, [%[b], #48]\n\t"
34995         "ldr	r8, [%[b], #52]\n\t"
34996         "sbcs	r4, r4, r6\n\t"
34997         "sbcs	r5, r5, r8\n\t"
34998         "str	r4, [%[r], #48]\n\t"
34999         "str	r5, [%[r], #52]\n\t"
35000         "ldr	r4, [%[a], #56]\n\t"
35001         "ldr	r5, [%[a], #60]\n\t"
35002         "ldr	r6, [%[b], #56]\n\t"
35003         "ldr	r8, [%[b], #60]\n\t"
35004         "sbcs	r4, r4, r6\n\t"
35005         "sbcs	r5, r5, r8\n\t"
35006         "str	r4, [%[r], #56]\n\t"
35007         "str	r5, [%[r], #60]\n\t"
35008         "ldr	r4, [%[a], #64]\n\t"
35009         "ldr	r5, [%[a], #68]\n\t"
35010         "ldr	r6, [%[b], #64]\n\t"
35011         "ldr	r8, [%[b], #68]\n\t"
35012         "sbcs	r4, r4, r6\n\t"
35013         "sbcs	r5, r5, r8\n\t"
35014         "str	r4, [%[r], #64]\n\t"
35015         "str	r5, [%[r], #68]\n\t"
35016         "ldr	r4, [%[a], #72]\n\t"
35017         "ldr	r5, [%[a], #76]\n\t"
35018         "ldr	r6, [%[b], #72]\n\t"
35019         "ldr	r8, [%[b], #76]\n\t"
35020         "sbcs	r4, r4, r6\n\t"
35021         "sbcs	r5, r5, r8\n\t"
35022         "str	r4, [%[r], #72]\n\t"
35023         "str	r5, [%[r], #76]\n\t"
35024         "ldr	r4, [%[a], #80]\n\t"
35025         "ldr	r5, [%[a], #84]\n\t"
35026         "ldr	r6, [%[b], #80]\n\t"
35027         "ldr	r8, [%[b], #84]\n\t"
35028         "sbcs	r4, r4, r6\n\t"
35029         "sbcs	r5, r5, r8\n\t"
35030         "str	r4, [%[r], #80]\n\t"
35031         "str	r5, [%[r], #84]\n\t"
35032         "ldr	r4, [%[a], #88]\n\t"
35033         "ldr	r5, [%[a], #92]\n\t"
35034         "ldr	r6, [%[b], #88]\n\t"
35035         "ldr	r8, [%[b], #92]\n\t"
35036         "sbcs	r4, r4, r6\n\t"
35037         "sbcs	r5, r5, r8\n\t"
35038         "str	r4, [%[r], #88]\n\t"
35039         "str	r5, [%[r], #92]\n\t"
35040         "ldr	r4, [%[a], #96]\n\t"
35041         "ldr	r5, [%[a], #100]\n\t"
35042         "ldr	r6, [%[b], #96]\n\t"
35043         "ldr	r8, [%[b], #100]\n\t"
35044         "sbcs	r4, r4, r6\n\t"
35045         "sbcs	r5, r5, r8\n\t"
35046         "str	r4, [%[r], #96]\n\t"
35047         "str	r5, [%[r], #100]\n\t"
35048         "ldr	r4, [%[a], #104]\n\t"
35049         "ldr	r5, [%[a], #108]\n\t"
35050         "ldr	r6, [%[b], #104]\n\t"
35051         "ldr	r8, [%[b], #108]\n\t"
35052         "sbcs	r4, r4, r6\n\t"
35053         "sbcs	r5, r5, r8\n\t"
35054         "str	r4, [%[r], #104]\n\t"
35055         "str	r5, [%[r], #108]\n\t"
35056         "ldr	r4, [%[a], #112]\n\t"
35057         "ldr	r5, [%[a], #116]\n\t"
35058         "ldr	r6, [%[b], #112]\n\t"
35059         "ldr	r8, [%[b], #116]\n\t"
35060         "sbcs	r4, r4, r6\n\t"
35061         "sbcs	r5, r5, r8\n\t"
35062         "str	r4, [%[r], #112]\n\t"
35063         "str	r5, [%[r], #116]\n\t"
35064         "ldr	r4, [%[a], #120]\n\t"
35065         "ldr	r5, [%[a], #124]\n\t"
35066         "ldr	r6, [%[b], #120]\n\t"
35067         "ldr	r8, [%[b], #124]\n\t"
35068         "sbcs	r4, r4, r6\n\t"
35069         "sbcs	r5, r5, r8\n\t"
35070         "str	r4, [%[r], #120]\n\t"
35071         "str	r5, [%[r], #124]\n\t"
35072         "sbc	%[c], %[c], %[c]\n\t"
35073         : [c] "+r" (c), [r] "+r" (r), [a] "+r" (a), [b] "+r" (b)
35074         :
35075         : "memory", "r4", "r5", "r6", "r8"
35076     );
35077 
35078     return c;
35079 }
35080 
35081 #endif /* WOLFSSL_SP_SMALL */
35082 /* Compare two numbers to determine if they are equal.
35083  * Constant time implementation.
35084  *
35085  * a  First number to compare.
35086  * b  Second number to compare.
35087  * returns 1 when equal and 0 otherwise.
35088  */
sp_1024_cmp_equal_32(const sp_digit * a,const sp_digit * b)35089 static int sp_1024_cmp_equal_32(const sp_digit* a, const sp_digit* b)
35090 {
35091     return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) |
35092             (a[3] ^ b[3]) | (a[4] ^ b[4]) | (a[5] ^ b[5]) |
35093             (a[6] ^ b[6]) | (a[7] ^ b[7]) | (a[8] ^ b[8]) |
35094             (a[9] ^ b[9]) | (a[10] ^ b[10]) | (a[11] ^ b[11]) |
35095             (a[12] ^ b[12]) | (a[13] ^ b[13]) | (a[14] ^ b[14]) |
35096             (a[15] ^ b[15]) | (a[16] ^ b[16]) | (a[17] ^ b[17]) |
35097             (a[18] ^ b[18]) | (a[19] ^ b[19]) | (a[20] ^ b[20]) |
35098             (a[21] ^ b[21]) | (a[22] ^ b[22]) | (a[23] ^ b[23]) |
35099             (a[24] ^ b[24]) | (a[25] ^ b[25]) | (a[26] ^ b[26]) |
35100             (a[27] ^ b[27]) | (a[28] ^ b[28]) | (a[29] ^ b[29]) |
35101             (a[30] ^ b[30]) | (a[31] ^ b[31])) == 0;
35102 }
35103 
35104 /* Add two Montgomery form projective points.
35105  *
35106  * r  Result of addition.
35107  * p  First point to add.
35108  * q  Second point to add.
35109  * t  Temporary ordinate data.
35110  */
35111 
35112 #ifdef WOLFSSL_SP_NONBLOCK
35113 typedef struct sp_1024_proj_point_add_32_ctx {
35114     int state;
35115     sp_1024_proj_point_dbl_32_ctx dbl_ctx;
35116     const sp_point_1024* ap[2];
35117     sp_point_1024* rp[2];
35118     sp_digit* t1;
35119     sp_digit* t2;
35120     sp_digit* t3;
35121     sp_digit* t4;
35122     sp_digit* t5;
35123     sp_digit* x;
35124     sp_digit* y;
35125     sp_digit* z;
35126 } sp_1024_proj_point_add_32_ctx;
35127 
sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t * sp_ctx,sp_point_1024 * r,const sp_point_1024 * p,const sp_point_1024 * q,sp_digit * t)35128 static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r,
35129     const sp_point_1024* p, const sp_point_1024* q, sp_digit* t)
35130 {
35131     int err = FP_WOULDBLOCK;
35132     sp_1024_proj_point_add_32_ctx* ctx = (sp_1024_proj_point_add_32_ctx*)sp_ctx->data;
35133 
35134     /* Ensure only the first point is the same as the result. */
35135     if (q == r) {
35136         const sp_point_1024* a = p;
35137         p = q;
35138         q = a;
35139     }
35140 
35141     typedef char ctx_size_test[sizeof(sp_1024_proj_point_add_32_ctx) >= sizeof(*sp_ctx) ? -1 : 1];
35142     (void)sizeof(ctx_size_test);
35143 
35144     switch (ctx->state) {
35145     case 0: /* INIT */
35146         ctx->t1 = t;
35147         ctx->t2 = t + 2*32;
35148         ctx->t3 = t + 4*32;
35149         ctx->t4 = t + 6*32;
35150         ctx->t5 = t + 8*32;
35151 
35152         ctx->state = 1;
35153         break;
35154     case 1:
35155         /* Check double */
35156         (void)sp_1024_sub_32(ctx->t1, p1024_mod, q->y);
35157         sp_1024_norm_32(ctx->t1);
35158         if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) &
35159             (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, ctx->t1))) != 0)
35160         {
35161             XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx));
35162             ctx->state = 2;
35163         }
35164         else {
35165             ctx->state = 3;
35166         }
35167         break;
35168     case 2:
35169         err = sp_1024_proj_point_dbl_32_nb((sp_ecc_ctx_t*)&ctx->dbl_ctx, r, p, t);
35170         if (err == MP_OKAY)
35171             ctx->state = 27; /* done */
35172         break;
35173     case 3:
35174     {
35175         int i;
35176         ctx->rp[0] = r;
35177 
35178         /*lint allow cast to different type of pointer*/
35179         ctx->rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/
35180         XMEMSET(ctx->rp[1], 0, sizeof(sp_point_1024));
35181         ctx->x = ctx->rp[p->infinity | q->infinity]->x;
35182         ctx->y = ctx->rp[p->infinity | q->infinity]->y;
35183         ctx->z = ctx->rp[p->infinity | q->infinity]->z;
35184 
35185         ctx->ap[0] = p;
35186         ctx->ap[1] = q;
35187         for (i=0; i<32; i++) {
35188             r->x[i] = ctx->ap[p->infinity]->x[i];
35189         }
35190         for (i=0; i<32; i++) {
35191             r->y[i] = ctx->ap[p->infinity]->y[i];
35192         }
35193         for (i=0; i<32; i++) {
35194             r->z[i] = ctx->ap[p->infinity]->z[i];
35195         }
35196         r->infinity = ctx->ap[p->infinity]->infinity;
35197 
35198         ctx->state = 4;
35199         break;
35200     }
35201     case 4:
35202         /* U1 = X1*Z2^2 */
35203         sp_1024_mont_sqr_32(ctx->t1, q->z, p1024_mod, p1024_mp_mod);
35204         ctx->state = 5;
35205         break;
35206     case 5:
35207         sp_1024_mont_mul_32(ctx->t3, ctx->t1, q->z, p1024_mod, p1024_mp_mod);
35208         ctx->state = 6;
35209         break;
35210     case 6:
35211         sp_1024_mont_mul_32(ctx->t1, ctx->t1, ctx->x, p1024_mod, p1024_mp_mod);
35212         ctx->state = 7;
35213         break;
35214     case 7:
35215         /* U2 = X2*Z1^2 */
35216         sp_1024_mont_sqr_32(ctx->t2, ctx->z, p1024_mod, p1024_mp_mod);
35217         ctx->state = 8;
35218         break;
35219     case 8:
35220         sp_1024_mont_mul_32(ctx->t4, ctx->t2, ctx->z, p1024_mod, p1024_mp_mod);
35221         ctx->state = 9;
35222         break;
35223     case 9:
35224         sp_1024_mont_mul_32(ctx->t2, ctx->t2, q->x, p1024_mod, p1024_mp_mod);
35225         ctx->state = 10;
35226         break;
35227     case 10:
35228         /* S1 = Y1*Z2^3 */
35229         sp_1024_mont_mul_32(ctx->t3, ctx->t3, ctx->y, p1024_mod, p1024_mp_mod);
35230         ctx->state = 11;
35231         break;
35232     case 11:
35233         /* S2 = Y2*Z1^3 */
35234         sp_1024_mont_mul_32(ctx->t4, ctx->t4, q->y, p1024_mod, p1024_mp_mod);
35235         ctx->state = 12;
35236         break;
35237     case 12:
35238         /* H = U2 - U1 */
35239         sp_1024_mont_sub_32(ctx->t2, ctx->t2, ctx->t1, p1024_mod);
35240         ctx->state = 13;
35241         break;
35242     case 13:
35243         /* R = S2 - S1 */
35244         sp_1024_mont_sub_32(ctx->t4, ctx->t4, ctx->t3, p1024_mod);
35245         ctx->state = 14;
35246         break;
35247     case 14:
35248         /* Z3 = H*Z1*Z2 */
35249         sp_1024_mont_mul_32(ctx->z, ctx->z, q->z, p1024_mod, p1024_mp_mod);
35250         ctx->state = 15;
35251         break;
35252     case 15:
35253         sp_1024_mont_mul_32(ctx->z, ctx->z, ctx->t2, p1024_mod, p1024_mp_mod);
35254         ctx->state = 16;
35255         break;
35256     case 16:
35257         /* X3 = R^2 - H^3 - 2*U1*H^2 */
35258         sp_1024_mont_sqr_32(ctx->x, ctx->t4, p1024_mod, p1024_mp_mod);
35259         ctx->state = 17;
35260         break;
35261     case 17:
35262         sp_1024_mont_sqr_32(ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod);
35263         ctx->state = 18;
35264         break;
35265     case 18:
35266         sp_1024_mont_mul_32(ctx->y, ctx->t1, ctx->t5, p1024_mod, p1024_mp_mod);
35267         ctx->state = 19;
35268         break;
35269     case 19:
35270         sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t2, p1024_mod, p1024_mp_mod);
35271         ctx->state = 20;
35272         break;
35273     case 20:
35274         sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t5, p1024_mod);
35275         ctx->state = 21;
35276         break;
35277     case 21:
35278         sp_1024_mont_dbl_32(ctx->t1, ctx->y, p1024_mod);
35279         ctx->state = 22;
35280         break;
35281     case 22:
35282         sp_1024_mont_sub_32(ctx->x, ctx->x, ctx->t1, p1024_mod);
35283         ctx->state = 23;
35284         break;
35285     case 23:
35286         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
35287         sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->x, p1024_mod);
35288         ctx->state = 24;
35289         break;
35290     case 24:
35291         sp_1024_mont_mul_32(ctx->y, ctx->y, ctx->t4, p1024_mod, p1024_mp_mod);
35292         ctx->state = 25;
35293         break;
35294     case 25:
35295         sp_1024_mont_mul_32(ctx->t5, ctx->t5, ctx->t3, p1024_mod, p1024_mp_mod);
35296         ctx->state = 26;
35297         break;
35298     case 26:
35299         sp_1024_mont_sub_32(ctx->y, ctx->y, ctx->t5, p1024_mod);
35300         ctx->state = 27;
35301         /* fall-through */
35302     case 27:
35303         err = MP_OKAY;
35304         break;
35305     }
35306 
35307     if (err == MP_OKAY && ctx->state != 27) {
35308         err = FP_WOULDBLOCK;
35309     }
35310     return err;
35311 }
35312 #endif /* WOLFSSL_SP_NONBLOCK */
35313 
sp_1024_proj_point_add_32(sp_point_1024 * r,const sp_point_1024 * p,const sp_point_1024 * q,sp_digit * t)35314 static void sp_1024_proj_point_add_32(sp_point_1024* r,
35315         const sp_point_1024* p, const sp_point_1024* q, sp_digit* t)
35316 {
35317     const sp_point_1024* ap[2];
35318     sp_point_1024* rp[2];
35319     sp_digit* t1 = t;
35320     sp_digit* t2 = t + 2*32;
35321     sp_digit* t3 = t + 4*32;
35322     sp_digit* t4 = t + 6*32;
35323     sp_digit* t5 = t + 8*32;
35324     sp_digit* x;
35325     sp_digit* y;
35326     sp_digit* z;
35327     int i;
35328 
35329     /* Ensure only the first point is the same as the result. */
35330     if (q == r) {
35331         const sp_point_1024* a = p;
35332         p = q;
35333         q = a;
35334     }
35335 
35336     /* Check double */
35337     (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod);
35338     sp_1024_norm_32(t1);
35339     if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) &
35340         (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) {
35341         sp_1024_proj_point_dbl_32(r, p, t);
35342     }
35343     else {
35344         rp[0] = r;
35345 
35346         /*lint allow cast to different type of pointer*/
35347         rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/
35348         XMEMSET(rp[1], 0, sizeof(sp_point_1024));
35349         x = rp[p->infinity | q->infinity]->x;
35350         y = rp[p->infinity | q->infinity]->y;
35351         z = rp[p->infinity | q->infinity]->z;
35352 
35353         ap[0] = p;
35354         ap[1] = q;
35355         for (i=0; i<32; i++) {
35356             r->x[i] = ap[p->infinity]->x[i];
35357         }
35358         for (i=0; i<32; i++) {
35359             r->y[i] = ap[p->infinity]->y[i];
35360         }
35361         for (i=0; i<32; i++) {
35362             r->z[i] = ap[p->infinity]->z[i];
35363         }
35364         r->infinity = ap[p->infinity]->infinity;
35365 
35366         /* U1 = X1*Z2^2 */
35367         sp_1024_mont_sqr_32(t1, q->z, p1024_mod, p1024_mp_mod);
35368         sp_1024_mont_mul_32(t3, t1, q->z, p1024_mod, p1024_mp_mod);
35369         sp_1024_mont_mul_32(t1, t1, x, p1024_mod, p1024_mp_mod);
35370         /* U2 = X2*Z1^2 */
35371         sp_1024_mont_sqr_32(t2, z, p1024_mod, p1024_mp_mod);
35372         sp_1024_mont_mul_32(t4, t2, z, p1024_mod, p1024_mp_mod);
35373         sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod);
35374         /* S1 = Y1*Z2^3 */
35375         sp_1024_mont_mul_32(t3, t3, y, p1024_mod, p1024_mp_mod);
35376         /* S2 = Y2*Z1^3 */
35377         sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod);
35378         /* H = U2 - U1 */
35379         sp_1024_mont_sub_32(t2, t2, t1, p1024_mod);
35380         /* R = S2 - S1 */
35381         sp_1024_mont_sub_32(t4, t4, t3, p1024_mod);
35382         /* Z3 = H*Z1*Z2 */
35383         sp_1024_mont_mul_32(z, z, q->z, p1024_mod, p1024_mp_mod);
35384         sp_1024_mont_mul_32(z, z, t2, p1024_mod, p1024_mp_mod);
35385         /* X3 = R^2 - H^3 - 2*U1*H^2 */
35386         sp_1024_mont_sqr_32(x, t4, p1024_mod, p1024_mp_mod);
35387         sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod);
35388         sp_1024_mont_mul_32(y, t1, t5, p1024_mod, p1024_mp_mod);
35389         sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod);
35390         sp_1024_mont_sub_32(x, x, t5, p1024_mod);
35391         sp_1024_mont_dbl_32(t1, y, p1024_mod);
35392         sp_1024_mont_sub_32(x, x, t1, p1024_mod);
35393         /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
35394         sp_1024_mont_sub_32(y, y, x, p1024_mod);
35395         sp_1024_mont_mul_32(y, y, t4, p1024_mod, p1024_mp_mod);
35396         sp_1024_mont_mul_32(t5, t5, t3, p1024_mod, p1024_mp_mod);
35397         sp_1024_mont_sub_32(y, y, t5, p1024_mod);
35398     }
35399 }
35400 
35401 /* Multiply the point by the scalar and return the result.
35402  * If map is true then convert result to affine coordinates.
35403  *
35404  * Fast implementation that generates a pre-computation table.
35405  * 4 bits of window (no sliding!).
35406  * Uses add and double for calculating table.
35407  * 1024 doubles.
35408  * 268 adds.
35409  *
35410  * r     Resulting point.
35411  * g     Point to multiply.
35412  * k     Scalar to multiply by.
35413  * map   Indicates whether to convert result to affine.
35414  * ct    Constant time required.
35415  * heap  Heap to use for allocation.
35416  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
35417  */
sp_1024_ecc_mulmod_fast_32(sp_point_1024 * r,const sp_point_1024 * g,const sp_digit * k,int map,int ct,void * heap)35418 static int sp_1024_ecc_mulmod_fast_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k,
35419         int map, int ct, void* heap)
35420 {
35421 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35422     sp_point_1024* t = NULL;
35423     sp_digit* tmp = NULL;
35424 #else
35425     sp_point_1024 t[16 + 1];
35426     sp_digit tmp[2 * 32 * 5];
35427 #endif
35428     sp_point_1024* rt = NULL;
35429     sp_digit n;
35430     int i;
35431     int c;
35432     int y;
35433     int err = MP_OKAY;
35434 
35435     /* Constant time used for cache attack resistance implementation. */
35436     (void)ct;
35437     (void)heap;
35438 
35439 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35440     t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * (16 + 1),
35441         heap, DYNAMIC_TYPE_ECC);
35442     if (t == NULL)
35443         err = MEMORY_E;
35444     if (err == MP_OKAY) {
35445         tmp = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 5, heap,
35446                                 DYNAMIC_TYPE_ECC);
35447         if (tmp == NULL)
35448             err = MEMORY_E;
35449     }
35450 #endif
35451 
35452     if (err == MP_OKAY) {
35453         rt = t + 16;
35454 
35455         /* t[0] = {0, 0, 1} * norm */
35456         XMEMSET(&t[0], 0, sizeof(t[0]));
35457         t[0].infinity = 1;
35458         /* t[1] = {g->x, g->y, g->z} * norm */
35459         (void)sp_1024_mod_mul_norm_32(t[1].x, g->x, p1024_mod);
35460         (void)sp_1024_mod_mul_norm_32(t[1].y, g->y, p1024_mod);
35461         (void)sp_1024_mod_mul_norm_32(t[1].z, g->z, p1024_mod);
35462         t[1].infinity = 0;
35463         sp_1024_proj_point_dbl_32(&t[ 2], &t[ 1], tmp);
35464         t[ 2].infinity = 0;
35465         sp_1024_proj_point_add_32(&t[ 3], &t[ 2], &t[ 1], tmp);
35466         t[ 3].infinity = 0;
35467         sp_1024_proj_point_dbl_32(&t[ 4], &t[ 2], tmp);
35468         t[ 4].infinity = 0;
35469         sp_1024_proj_point_add_32(&t[ 5], &t[ 3], &t[ 2], tmp);
35470         t[ 5].infinity = 0;
35471         sp_1024_proj_point_dbl_32(&t[ 6], &t[ 3], tmp);
35472         t[ 6].infinity = 0;
35473         sp_1024_proj_point_add_32(&t[ 7], &t[ 4], &t[ 3], tmp);
35474         t[ 7].infinity = 0;
35475         sp_1024_proj_point_dbl_32(&t[ 8], &t[ 4], tmp);
35476         t[ 8].infinity = 0;
35477         sp_1024_proj_point_add_32(&t[ 9], &t[ 5], &t[ 4], tmp);
35478         t[ 9].infinity = 0;
35479         sp_1024_proj_point_dbl_32(&t[10], &t[ 5], tmp);
35480         t[10].infinity = 0;
35481         sp_1024_proj_point_add_32(&t[11], &t[ 6], &t[ 5], tmp);
35482         t[11].infinity = 0;
35483         sp_1024_proj_point_dbl_32(&t[12], &t[ 6], tmp);
35484         t[12].infinity = 0;
35485         sp_1024_proj_point_add_32(&t[13], &t[ 7], &t[ 6], tmp);
35486         t[13].infinity = 0;
35487         sp_1024_proj_point_dbl_32(&t[14], &t[ 7], tmp);
35488         t[14].infinity = 0;
35489         sp_1024_proj_point_add_32(&t[15], &t[ 8], &t[ 7], tmp);
35490         t[15].infinity = 0;
35491 
35492         i = 30;
35493         n = k[i+1] << 0;
35494         c = 28;
35495         y = (int)(n >> 28);
35496         XMEMCPY(rt, &t[y], sizeof(sp_point_1024));
35497         n <<= 4;
35498         for (; i>=0 || c>=4; ) {
35499             if (c < 4) {
35500                 n |= k[i--];
35501                 c += 32;
35502             }
35503             y = (n >> 28) & 0xf;
35504             n <<= 4;
35505             c -= 4;
35506 
35507             sp_1024_proj_point_dbl_32(rt, rt, tmp);
35508             sp_1024_proj_point_dbl_32(rt, rt, tmp);
35509             sp_1024_proj_point_dbl_32(rt, rt, tmp);
35510             sp_1024_proj_point_dbl_32(rt, rt, tmp);
35511             sp_1024_proj_point_add_32(rt, rt, &t[y], tmp);
35512         }
35513 
35514         if (map != 0) {
35515             sp_1024_map_32(r, rt, tmp);
35516         }
35517         else {
35518             XMEMCPY(r, rt, sizeof(sp_point_1024));
35519         }
35520     }
35521 
35522 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35523     if (tmp != NULL)
35524 #endif
35525     {
35526         ForceZero(tmp, sizeof(sp_digit) * 2 * 32 * 5);
35527     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35528         XFREE(tmp, heap, DYNAMIC_TYPE_ECC);
35529     #endif
35530     }
35531 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35532     if (t != NULL)
35533 #endif
35534     {
35535         ForceZero(t, sizeof(sp_point_1024) * 17);
35536     #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35537         XFREE(t, heap, DYNAMIC_TYPE_ECC);
35538     #endif
35539     }
35540 
35541     return err;
35542 }
35543 
35544 #if defined(FP_ECC) || !defined(WOLFSSL_SP_SMALL)
35545 /* Double the Montgomery form projective point p a number of times.
35546  *
35547  * r  Result of repeated doubling of point.
35548  * p  Point to double.
35549  * n  Number of times to double
35550  * t  Temporary ordinate data.
35551  */
sp_1024_proj_point_dbl_n_32(sp_point_1024 * p,int n,sp_digit * t)35552 static void sp_1024_proj_point_dbl_n_32(sp_point_1024* p, int n,
35553     sp_digit* t)
35554 {
35555     sp_digit* w = t;
35556     sp_digit* a = t + 2*32;
35557     sp_digit* b = t + 4*32;
35558     sp_digit* t1 = t + 6*32;
35559     sp_digit* t2 = t + 8*32;
35560     sp_digit* x;
35561     sp_digit* y;
35562     sp_digit* z;
35563 
35564     x = p->x;
35565     y = p->y;
35566     z = p->z;
35567 
35568     /* Y = 2*Y */
35569     sp_1024_mont_dbl_32(y, y, p1024_mod);
35570     /* W = Z^4 */
35571     sp_1024_mont_sqr_32(w, z, p1024_mod, p1024_mp_mod);
35572     sp_1024_mont_sqr_32(w, w, p1024_mod, p1024_mp_mod);
35573 
35574 #ifndef WOLFSSL_SP_SMALL
35575     while (--n > 0)
35576 #else
35577     while (--n >= 0)
35578 #endif
35579     {
35580         /* A = 3*(X^2 - W) */
35581         sp_1024_mont_sqr_32(t1, x, p1024_mod, p1024_mp_mod);
35582         sp_1024_mont_sub_32(t1, t1, w, p1024_mod);
35583         sp_1024_mont_tpl_32(a, t1, p1024_mod);
35584         /* B = X*Y^2 */
35585         sp_1024_mont_sqr_32(t1, y, p1024_mod, p1024_mp_mod);
35586         sp_1024_mont_mul_32(b, t1, x, p1024_mod, p1024_mp_mod);
35587         /* X = A^2 - 2B */
35588         sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod);
35589         sp_1024_mont_dbl_32(t2, b, p1024_mod);
35590         sp_1024_mont_sub_32(x, x, t2, p1024_mod);
35591         /* Z = Z*Y */
35592         sp_1024_mont_mul_32(z, z, y, p1024_mod, p1024_mp_mod);
35593         /* t2 = Y^4 */
35594         sp_1024_mont_sqr_32(t1, t1, p1024_mod, p1024_mp_mod);
35595 #ifdef WOLFSSL_SP_SMALL
35596         if (n != 0)
35597 #endif
35598         {
35599             /* W = W*Y^4 */
35600             sp_1024_mont_mul_32(w, w, t1, p1024_mod, p1024_mp_mod);
35601         }
35602         /* y = 2*A*(B - X) - Y^4 */
35603         sp_1024_mont_sub_32(y, b, x, p1024_mod);
35604         sp_1024_mont_mul_32(y, y, a, p1024_mod, p1024_mp_mod);
35605         sp_1024_mont_dbl_32(y, y, p1024_mod);
35606         sp_1024_mont_sub_32(y, y, t1, p1024_mod);
35607     }
35608 #ifndef WOLFSSL_SP_SMALL
35609     /* A = 3*(X^2 - W) */
35610     sp_1024_mont_sqr_32(t1, x, p1024_mod, p1024_mp_mod);
35611     sp_1024_mont_sub_32(t1, t1, w, p1024_mod);
35612     sp_1024_mont_tpl_32(a, t1, p1024_mod);
35613     /* B = X*Y^2 */
35614     sp_1024_mont_sqr_32(t1, y, p1024_mod, p1024_mp_mod);
35615     sp_1024_mont_mul_32(b, t1, x, p1024_mod, p1024_mp_mod);
35616     /* X = A^2 - 2B */
35617     sp_1024_mont_sqr_32(x, a, p1024_mod, p1024_mp_mod);
35618     sp_1024_mont_dbl_32(t2, b, p1024_mod);
35619     sp_1024_mont_sub_32(x, x, t2, p1024_mod);
35620     /* Z = Z*Y */
35621     sp_1024_mont_mul_32(z, z, y, p1024_mod, p1024_mp_mod);
35622     /* t2 = Y^4 */
35623     sp_1024_mont_sqr_32(t1, t1, p1024_mod, p1024_mp_mod);
35624     /* y = 2*A*(B - X) - Y^4 */
35625     sp_1024_mont_sub_32(y, b, x, p1024_mod);
35626     sp_1024_mont_mul_32(y, y, a, p1024_mod, p1024_mp_mod);
35627     sp_1024_mont_dbl_32(y, y, p1024_mod);
35628     sp_1024_mont_sub_32(y, y, t1, p1024_mod);
35629 #endif
35630     /* Y = Y/2 */
35631     sp_1024_div2_32(y, y, p1024_mod);
35632 }
35633 
35634 /* Convert the projective point to affine.
35635  * Ordinates are in Montgomery form.
35636  *
35637  * a  Point to convert.
35638  * t  Temporary data.
35639  */
sp_1024_proj_to_affine_32(sp_point_1024 * a,sp_digit * t)35640 static void sp_1024_proj_to_affine_32(sp_point_1024* a, sp_digit* t)
35641 {
35642     sp_digit* t1 = t;
35643     sp_digit* t2 = t + 2 * 32;
35644     sp_digit* tmp = t + 4 * 32;
35645 
35646     sp_1024_mont_inv_32(t1, a->z, tmp);
35647 
35648     sp_1024_mont_sqr_32(t2, t1, p1024_mod, p1024_mp_mod);
35649     sp_1024_mont_mul_32(t1, t2, t1, p1024_mod, p1024_mp_mod);
35650 
35651     sp_1024_mont_mul_32(a->x, a->x, t2, p1024_mod, p1024_mp_mod);
35652     sp_1024_mont_mul_32(a->y, a->y, t1, p1024_mod, p1024_mp_mod);
35653     XMEMCPY(a->z, p1024_norm_mod, sizeof(p1024_norm_mod));
35654 }
35655 
35656 #endif /* FP_ECC || !WOLFSSL_SP_SMALL */
35657 /* A table entry for pre-computed points. */
35658 typedef struct sp_table_entry_1024 {
35659     sp_digit x[32];
35660     sp_digit y[32];
35661 } sp_table_entry_1024;
35662 
35663 #ifdef FP_ECC
35664 #endif /* FP_ECC */
35665 /* Add two Montgomery form projective points. The second point has a q value of
35666  * one.
35667  * Only the first point can be the same pointer as the result point.
35668  *
35669  * r  Result of addition.
35670  * p  First point to add.
35671  * q  Second point to add.
35672  * t  Temporary ordinate data.
35673  */
sp_1024_proj_point_add_qz1_32(sp_point_1024 * r,const sp_point_1024 * p,const sp_point_1024 * q,sp_digit * t)35674 static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* p,
35675         const sp_point_1024* q, sp_digit* t)
35676 {
35677     const sp_point_1024* ap[2];
35678     sp_point_1024* rp[2];
35679     sp_digit* t1 = t;
35680     sp_digit* t2 = t + 2*32;
35681     sp_digit* t3 = t + 4*32;
35682     sp_digit* t4 = t + 6*32;
35683     sp_digit* t5 = t + 8*32;
35684     sp_digit* x;
35685     sp_digit* y;
35686     sp_digit* z;
35687     int i;
35688 
35689     /* Check double */
35690     (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod);
35691     sp_1024_norm_32(t1);
35692     if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) &
35693         (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) {
35694         sp_1024_proj_point_dbl_32(r, p, t);
35695     }
35696     else {
35697         rp[0] = r;
35698 
35699         /*lint allow cast to different type of pointer*/
35700         rp[1] = (sp_point_1024*)t; /*lint !e9087 !e740*/
35701         XMEMSET(rp[1], 0, sizeof(sp_point_1024));
35702         x = rp[p->infinity | q->infinity]->x;
35703         y = rp[p->infinity | q->infinity]->y;
35704         z = rp[p->infinity | q->infinity]->z;
35705 
35706         ap[0] = p;
35707         ap[1] = q;
35708         for (i=0; i<32; i++) {
35709             r->x[i] = ap[p->infinity]->x[i];
35710         }
35711         for (i=0; i<32; i++) {
35712             r->y[i] = ap[p->infinity]->y[i];
35713         }
35714         for (i=0; i<32; i++) {
35715             r->z[i] = ap[p->infinity]->z[i];
35716         }
35717         r->infinity = ap[p->infinity]->infinity;
35718 
35719         /* U2 = X2*Z1^2 */
35720         sp_1024_mont_sqr_32(t2, z, p1024_mod, p1024_mp_mod);
35721         sp_1024_mont_mul_32(t4, t2, z, p1024_mod, p1024_mp_mod);
35722         sp_1024_mont_mul_32(t2, t2, q->x, p1024_mod, p1024_mp_mod);
35723         /* S2 = Y2*Z1^3 */
35724         sp_1024_mont_mul_32(t4, t4, q->y, p1024_mod, p1024_mp_mod);
35725         /* H = U2 - X1 */
35726         sp_1024_mont_sub_32(t2, t2, x, p1024_mod);
35727         /* R = S2 - Y1 */
35728         sp_1024_mont_sub_32(t4, t4, y, p1024_mod);
35729         /* Z3 = H*Z1 */
35730         sp_1024_mont_mul_32(z, z, t2, p1024_mod, p1024_mp_mod);
35731         /* X3 = R^2 - H^3 - 2*X1*H^2 */
35732         sp_1024_mont_sqr_32(t1, t4, p1024_mod, p1024_mp_mod);
35733         sp_1024_mont_sqr_32(t5, t2, p1024_mod, p1024_mp_mod);
35734         sp_1024_mont_mul_32(t3, x, t5, p1024_mod, p1024_mp_mod);
35735         sp_1024_mont_mul_32(t5, t5, t2, p1024_mod, p1024_mp_mod);
35736         sp_1024_mont_sub_32(x, t1, t5, p1024_mod);
35737         sp_1024_mont_dbl_32(t1, t3, p1024_mod);
35738         sp_1024_mont_sub_32(x, x, t1, p1024_mod);
35739         /* Y3 = R*(X1*H^2 - X3) - Y1*H^3 */
35740         sp_1024_mont_sub_32(t3, t3, x, p1024_mod);
35741         sp_1024_mont_mul_32(t3, t3, t4, p1024_mod, p1024_mp_mod);
35742         sp_1024_mont_mul_32(t5, t5, y, p1024_mod, p1024_mp_mod);
35743         sp_1024_mont_sub_32(y, t3, t5, p1024_mod);
35744     }
35745 }
35746 
35747 #ifdef WOLFSSL_SP_SMALL
35748 #if defined(FP_ECC) || !defined(WOLFSSL_SP_SMALL)
35749 /* Generate the pre-computed table of points for the base point.
35750  *
35751  * width = 4
35752  * 16 entries
35753  * 256 bits between
35754  *
35755  * a      The base point.
35756  * table  Place to store generated point data.
35757  * tmp    Temporary data.
35758  * heap  Heap to use for allocation.
35759  */
sp_1024_gen_stripe_table_32(const sp_point_1024 * a,sp_table_entry_1024 * table,sp_digit * tmp,void * heap)35760 static int sp_1024_gen_stripe_table_32(const sp_point_1024* a,
35761         sp_table_entry_1024* table, sp_digit* tmp, void* heap)
35762 {
35763 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35764     sp_point_1024* t = NULL;
35765 #else
35766     sp_point_1024 t[3];
35767 #endif
35768     sp_point_1024* s1 = NULL;
35769     sp_point_1024* s2 = NULL;
35770     int i;
35771     int j;
35772     int err = MP_OKAY;
35773 
35774     (void)heap;
35775 
35776 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35777     t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap,
35778                                      DYNAMIC_TYPE_ECC);
35779     if (t == NULL)
35780         err = MEMORY_E;
35781 #endif
35782 
35783     if (err == MP_OKAY) {
35784         s1 = t + 1;
35785         s2 = t + 2;
35786 
35787         err = sp_1024_mod_mul_norm_32(t->x, a->x, p1024_mod);
35788     }
35789     if (err == MP_OKAY) {
35790         err = sp_1024_mod_mul_norm_32(t->y, a->y, p1024_mod);
35791     }
35792     if (err == MP_OKAY) {
35793         err = sp_1024_mod_mul_norm_32(t->z, a->z, p1024_mod);
35794     }
35795     if (err == MP_OKAY) {
35796         t->infinity = 0;
35797         sp_1024_proj_to_affine_32(t, tmp);
35798 
35799         XMEMCPY(s1->z, p1024_norm_mod, sizeof(p1024_norm_mod));
35800         s1->infinity = 0;
35801         XMEMCPY(s2->z, p1024_norm_mod, sizeof(p1024_norm_mod));
35802         s2->infinity = 0;
35803 
35804         /* table[0] = {0, 0, infinity} */
35805         XMEMSET(&table[0], 0, sizeof(sp_table_entry_1024));
35806         /* table[1] = Affine version of 'a' in Montgomery form */
35807         XMEMCPY(table[1].x, t->x, sizeof(table->x));
35808         XMEMCPY(table[1].y, t->y, sizeof(table->y));
35809 
35810         for (i=1; i<4; i++) {
35811             sp_1024_proj_point_dbl_n_32(t, 256, tmp);
35812             sp_1024_proj_to_affine_32(t, tmp);
35813             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
35814             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
35815         }
35816 
35817         for (i=1; i<4; i++) {
35818             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
35819             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
35820             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
35821                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
35822                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
35823                 sp_1024_proj_point_add_qz1_32(t, s1, s2, tmp);
35824                 sp_1024_proj_to_affine_32(t, tmp);
35825                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
35826                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
35827             }
35828         }
35829     }
35830 
35831 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35832     if (t != NULL)
35833         XFREE(t, heap, DYNAMIC_TYPE_ECC);
35834 #endif
35835 
35836     return err;
35837 }
35838 
35839 #endif /* FP_ECC || !WOLFSSL_SP_SMALL */
35840 /* Multiply the point by the scalar and return the result.
35841  * If map is true then convert result to affine coordinates.
35842  *
35843  * Stripe implementation.
35844  * Pre-generated: 2^0, 2^256, ...
35845  * Pre-generated: products of all combinations of above.
35846  * 4 doubles and adds (with qz=1)
35847  *
35848  * r      Resulting point.
35849  * k      Scalar to multiply by.
35850  * table  Pre-computed table.
35851  * map    Indicates whether to convert result to affine.
35852  * ct     Constant time required.
35853  * heap   Heap to use for allocation.
35854  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
35855  */
sp_1024_ecc_mulmod_stripe_32(sp_point_1024 * r,const sp_point_1024 * g,const sp_table_entry_1024 * table,const sp_digit * k,int map,int ct,void * heap)35856 static int sp_1024_ecc_mulmod_stripe_32(sp_point_1024* r, const sp_point_1024* g,
35857         const sp_table_entry_1024* table, const sp_digit* k, int map,
35858         int ct, void* heap)
35859 {
35860 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35861     sp_point_1024* rt = NULL;
35862     sp_digit* t = NULL;
35863 #else
35864     sp_point_1024 rt[2];
35865     sp_digit t[2 * 32 * 5];
35866 #endif
35867     sp_point_1024* p = NULL;
35868     int i;
35869     int j;
35870     int y;
35871     int x;
35872     int err = MP_OKAY;
35873 
35874     (void)g;
35875     /* Constant time used for cache attack resistance implementation. */
35876     (void)ct;
35877     (void)heap;
35878 
35879 
35880 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35881     rt = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap,
35882                                       DYNAMIC_TYPE_ECC);
35883     if (rt == NULL)
35884         err = MEMORY_E;
35885     if (err == MP_OKAY) {
35886         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 5, heap,
35887                                DYNAMIC_TYPE_ECC);
35888         if (t == NULL)
35889             err = MEMORY_E;
35890     }
35891 #endif
35892 
35893     if (err == MP_OKAY) {
35894         p = rt + 1;
35895 
35896         XMEMCPY(p->z, p1024_norm_mod, sizeof(p1024_norm_mod));
35897         XMEMCPY(rt->z, p1024_norm_mod, sizeof(p1024_norm_mod));
35898 
35899         y = 0;
35900         x = 255;
35901         for (j=0; j<4; j++) {
35902             y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
35903             x += 256;
35904         }
35905         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
35906         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
35907         rt->infinity = !y;
35908         for (i=254; i>=0; i--) {
35909             y = 0;
35910             x = i;
35911             for (j=0; j<4; j++) {
35912                 y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
35913                 x += 256;
35914             }
35915 
35916             sp_1024_proj_point_dbl_32(rt, rt, t);
35917             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
35918             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
35919             p->infinity = !y;
35920             sp_1024_proj_point_add_qz1_32(rt, rt, p, t);
35921         }
35922 
35923         if (map != 0) {
35924             sp_1024_map_32(r, rt, t);
35925         }
35926         else {
35927             XMEMCPY(r, rt, sizeof(sp_point_1024));
35928         }
35929     }
35930 
35931 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
35932     if (t != NULL)
35933         XFREE(t, heap, DYNAMIC_TYPE_ECC);
35934     if (rt != NULL)
35935         XFREE(rt, heap, DYNAMIC_TYPE_ECC);
35936 #endif
35937 
35938     return err;
35939 }
35940 
35941 #ifdef FP_ECC
35942 #ifndef FP_ENTRIES
35943     #define FP_ENTRIES 16
35944 #endif
35945 
35946 /* Cache entry - holds precomputation tables for a point. */
35947 typedef struct sp_cache_1024_t {
35948     /* X ordinate of point that table was generated from. */
35949     sp_digit x[32];
35950     /* Y ordinate of point that table was generated from. */
35951     sp_digit y[32];
35952     /* Precomputation table for point. */
35953     sp_table_entry_1024 table[16];
35954     /* Count of entries in table. */
35955     uint32_t cnt;
35956     /* Point and table set in entry. */
35957     int set;
35958 } sp_cache_1024_t;
35959 
35960 /* Cache of tables. */
35961 static THREAD_LS_T sp_cache_1024_t sp_cache_1024[FP_ENTRIES];
35962 /* Index of last entry in cache. */
35963 static THREAD_LS_T int sp_cache_1024_last = -1;
35964 /* Cache has been initialized. */
35965 static THREAD_LS_T int sp_cache_1024_inited = 0;
35966 
35967 #ifndef HAVE_THREAD_LS
35968     static volatile int initCacheMutex_1024 = 0;
35969     static wolfSSL_Mutex sp_cache_1024_lock;
35970 #endif
35971 
35972 /* Get the cache entry for the point.
35973  *
35974  * g      [in]   Point scalar multipling.
35975  * cache  [out]  Cache table to use.
35976  */
sp_ecc_get_cache_1024(const sp_point_1024 * g,sp_cache_1024_t ** cache)35977 static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cache)
35978 {
35979     int i;
35980     int j;
35981     uint32_t least;
35982 
35983     if (sp_cache_1024_inited == 0) {
35984         for (i=0; i<FP_ENTRIES; i++) {
35985             sp_cache_1024[i].set = 0;
35986         }
35987         sp_cache_1024_inited = 1;
35988     }
35989 
35990     /* Compare point with those in cache. */
35991     for (i=0; i<FP_ENTRIES; i++) {
35992         if (!sp_cache_1024[i].set)
35993             continue;
35994 
35995         if (sp_1024_cmp_equal_32(g->x, sp_cache_1024[i].x) &
35996                            sp_1024_cmp_equal_32(g->y, sp_cache_1024[i].y)) {
35997             sp_cache_1024[i].cnt++;
35998             break;
35999         }
36000     }
36001 
36002     /* No match. */
36003     if (i == FP_ENTRIES) {
36004         /* Find empty entry. */
36005         i = (sp_cache_1024_last + 1) % FP_ENTRIES;
36006         for (; i != sp_cache_1024_last; i=(i+1)%FP_ENTRIES) {
36007             if (!sp_cache_1024[i].set) {
36008                 break;
36009             }
36010         }
36011 
36012         /* Evict least used. */
36013         if (i == sp_cache_1024_last) {
36014             least = sp_cache_1024[0].cnt;
36015             for (j=1; j<FP_ENTRIES; j++) {
36016                 if (sp_cache_1024[j].cnt < least) {
36017                     i = j;
36018                     least = sp_cache_1024[i].cnt;
36019                 }
36020             }
36021         }
36022 
36023         XMEMCPY(sp_cache_1024[i].x, g->x, sizeof(sp_cache_1024[i].x));
36024         XMEMCPY(sp_cache_1024[i].y, g->y, sizeof(sp_cache_1024[i].y));
36025         sp_cache_1024[i].set = 1;
36026         sp_cache_1024[i].cnt = 1;
36027     }
36028 
36029     *cache = &sp_cache_1024[i];
36030     sp_cache_1024_last = i;
36031 }
36032 #endif /* FP_ECC */
36033 
36034 /* Multiply the base point of P1024 by the scalar and return the result.
36035  * If map is true then convert result to affine coordinates.
36036  *
36037  * r     Resulting point.
36038  * g     Point to multiply.
36039  * k     Scalar to multiply by.
36040  * map   Indicates whether to convert result to affine.
36041  * ct    Constant time required.
36042  * heap  Heap to use for allocation.
36043  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
36044  */
sp_1024_ecc_mulmod_32(sp_point_1024 * r,const sp_point_1024 * g,const sp_digit * k,int map,int ct,void * heap)36045 static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k,
36046         int map, int ct, void* heap)
36047 {
36048 #ifndef FP_ECC
36049     return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap);
36050 #else
36051     sp_digit tmp[2 * 32 * 5];
36052     sp_cache_1024_t* cache;
36053     int err = MP_OKAY;
36054 
36055 #ifndef HAVE_THREAD_LS
36056     if (initCacheMutex_1024 == 0) {
36057          wc_InitMutex(&sp_cache_1024_lock);
36058          initCacheMutex_1024 = 1;
36059     }
36060     if (wc_LockMutex(&sp_cache_1024_lock) != 0)
36061        err = BAD_MUTEX_E;
36062 #endif /* HAVE_THREAD_LS */
36063 
36064     if (err == MP_OKAY) {
36065         sp_ecc_get_cache_1024(g, &cache);
36066         if (cache->cnt == 2)
36067             sp_1024_gen_stripe_table_32(g, cache->table, tmp, heap);
36068 
36069 #ifndef HAVE_THREAD_LS
36070         wc_UnLockMutex(&sp_cache_1024_lock);
36071 #endif /* HAVE_THREAD_LS */
36072 
36073         if (cache->cnt < 2) {
36074             err = sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap);
36075         }
36076         else {
36077             err = sp_1024_ecc_mulmod_stripe_32(r, g, cache->table, k,
36078                     map, ct, heap);
36079         }
36080     }
36081 
36082     return err;
36083 #endif
36084 }
36085 
36086 #else
36087 #if defined(FP_ECC) || !defined(WOLFSSL_SP_SMALL)
36088 /* Generate the pre-computed table of points for the base point.
36089  *
36090  * width = 8
36091  * 256 entries
36092  * 128 bits between
36093  *
36094  * a      The base point.
36095  * table  Place to store generated point data.
36096  * tmp    Temporary data.
36097  * heap  Heap to use for allocation.
36098  */
sp_1024_gen_stripe_table_32(const sp_point_1024 * a,sp_table_entry_1024 * table,sp_digit * tmp,void * heap)36099 static int sp_1024_gen_stripe_table_32(const sp_point_1024* a,
36100         sp_table_entry_1024* table, sp_digit* tmp, void* heap)
36101 {
36102 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36103     sp_point_1024* t = NULL;
36104 #else
36105     sp_point_1024 t[3];
36106 #endif
36107     sp_point_1024* s1 = NULL;
36108     sp_point_1024* s2 = NULL;
36109     int i;
36110     int j;
36111     int err = MP_OKAY;
36112 
36113     (void)heap;
36114 
36115 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36116     t = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 3, heap,
36117                                      DYNAMIC_TYPE_ECC);
36118     if (t == NULL)
36119         err = MEMORY_E;
36120 #endif
36121 
36122     if (err == MP_OKAY) {
36123         s1 = t + 1;
36124         s2 = t + 2;
36125 
36126         err = sp_1024_mod_mul_norm_32(t->x, a->x, p1024_mod);
36127     }
36128     if (err == MP_OKAY) {
36129         err = sp_1024_mod_mul_norm_32(t->y, a->y, p1024_mod);
36130     }
36131     if (err == MP_OKAY) {
36132         err = sp_1024_mod_mul_norm_32(t->z, a->z, p1024_mod);
36133     }
36134     if (err == MP_OKAY) {
36135         t->infinity = 0;
36136         sp_1024_proj_to_affine_32(t, tmp);
36137 
36138         XMEMCPY(s1->z, p1024_norm_mod, sizeof(p1024_norm_mod));
36139         s1->infinity = 0;
36140         XMEMCPY(s2->z, p1024_norm_mod, sizeof(p1024_norm_mod));
36141         s2->infinity = 0;
36142 
36143         /* table[0] = {0, 0, infinity} */
36144         XMEMSET(&table[0], 0, sizeof(sp_table_entry_1024));
36145         /* table[1] = Affine version of 'a' in Montgomery form */
36146         XMEMCPY(table[1].x, t->x, sizeof(table->x));
36147         XMEMCPY(table[1].y, t->y, sizeof(table->y));
36148 
36149         for (i=1; i<8; i++) {
36150             sp_1024_proj_point_dbl_n_32(t, 128, tmp);
36151             sp_1024_proj_to_affine_32(t, tmp);
36152             XMEMCPY(table[1<<i].x, t->x, sizeof(table->x));
36153             XMEMCPY(table[1<<i].y, t->y, sizeof(table->y));
36154         }
36155 
36156         for (i=1; i<8; i++) {
36157             XMEMCPY(s1->x, table[1<<i].x, sizeof(table->x));
36158             XMEMCPY(s1->y, table[1<<i].y, sizeof(table->y));
36159             for (j=(1<<i)+1; j<(1<<(i+1)); j++) {
36160                 XMEMCPY(s2->x, table[j-(1<<i)].x, sizeof(table->x));
36161                 XMEMCPY(s2->y, table[j-(1<<i)].y, sizeof(table->y));
36162                 sp_1024_proj_point_add_qz1_32(t, s1, s2, tmp);
36163                 sp_1024_proj_to_affine_32(t, tmp);
36164                 XMEMCPY(table[j].x, t->x, sizeof(table->x));
36165                 XMEMCPY(table[j].y, t->y, sizeof(table->y));
36166             }
36167         }
36168     }
36169 
36170 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36171     if (t != NULL)
36172         XFREE(t, heap, DYNAMIC_TYPE_ECC);
36173 #endif
36174 
36175     return err;
36176 }
36177 
36178 #endif /* FP_ECC || !WOLFSSL_SP_SMALL */
36179 /* Multiply the point by the scalar and return the result.
36180  * If map is true then convert result to affine coordinates.
36181  *
36182  * Stripe implementation.
36183  * Pre-generated: 2^0, 2^128, ...
36184  * Pre-generated: products of all combinations of above.
36185  * 8 doubles and adds (with qz=1)
36186  *
36187  * r      Resulting point.
36188  * k      Scalar to multiply by.
36189  * table  Pre-computed table.
36190  * map    Indicates whether to convert result to affine.
36191  * ct     Constant time required.
36192  * heap   Heap to use for allocation.
36193  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
36194  */
sp_1024_ecc_mulmod_stripe_32(sp_point_1024 * r,const sp_point_1024 * g,const sp_table_entry_1024 * table,const sp_digit * k,int map,int ct,void * heap)36195 static int sp_1024_ecc_mulmod_stripe_32(sp_point_1024* r, const sp_point_1024* g,
36196         const sp_table_entry_1024* table, const sp_digit* k, int map,
36197         int ct, void* heap)
36198 {
36199 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36200     sp_point_1024* rt = NULL;
36201     sp_digit* t = NULL;
36202 #else
36203     sp_point_1024 rt[2];
36204     sp_digit t[2 * 32 * 5];
36205 #endif
36206     sp_point_1024* p = NULL;
36207     int i;
36208     int j;
36209     int y;
36210     int x;
36211     int err = MP_OKAY;
36212 
36213     (void)g;
36214     /* Constant time used for cache attack resistance implementation. */
36215     (void)ct;
36216     (void)heap;
36217 
36218 
36219 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36220     rt = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap,
36221                                       DYNAMIC_TYPE_ECC);
36222     if (rt == NULL)
36223         err = MEMORY_E;
36224     if (err == MP_OKAY) {
36225         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 2 * 32 * 5, heap,
36226                                DYNAMIC_TYPE_ECC);
36227         if (t == NULL)
36228             err = MEMORY_E;
36229     }
36230 #endif
36231 
36232     if (err == MP_OKAY) {
36233         p = rt + 1;
36234 
36235         XMEMCPY(p->z, p1024_norm_mod, sizeof(p1024_norm_mod));
36236         XMEMCPY(rt->z, p1024_norm_mod, sizeof(p1024_norm_mod));
36237 
36238         y = 0;
36239         x = 127;
36240         for (j=0; j<8; j++) {
36241             y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
36242             x += 128;
36243         }
36244         XMEMCPY(rt->x, table[y].x, sizeof(table[y].x));
36245         XMEMCPY(rt->y, table[y].y, sizeof(table[y].y));
36246         rt->infinity = !y;
36247         for (i=126; i>=0; i--) {
36248             y = 0;
36249             x = i;
36250             for (j=0; j<8; j++) {
36251                 y |= (int)(((k[x / 32] >> (x % 32)) & 1) << j);
36252                 x += 128;
36253             }
36254 
36255             sp_1024_proj_point_dbl_32(rt, rt, t);
36256             XMEMCPY(p->x, table[y].x, sizeof(table[y].x));
36257             XMEMCPY(p->y, table[y].y, sizeof(table[y].y));
36258             p->infinity = !y;
36259             sp_1024_proj_point_add_qz1_32(rt, rt, p, t);
36260         }
36261 
36262         if (map != 0) {
36263             sp_1024_map_32(r, rt, t);
36264         }
36265         else {
36266             XMEMCPY(r, rt, sizeof(sp_point_1024));
36267         }
36268     }
36269 
36270 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36271     if (t != NULL)
36272         XFREE(t, heap, DYNAMIC_TYPE_ECC);
36273     if (rt != NULL)
36274         XFREE(rt, heap, DYNAMIC_TYPE_ECC);
36275 #endif
36276 
36277     return err;
36278 }
36279 
36280 #ifdef FP_ECC
36281 #ifndef FP_ENTRIES
36282     #define FP_ENTRIES 16
36283 #endif
36284 
36285 /* Cache entry - holds precomputation tables for a point. */
36286 typedef struct sp_cache_1024_t {
36287     /* X ordinate of point that table was generated from. */
36288     sp_digit x[32];
36289     /* Y ordinate of point that table was generated from. */
36290     sp_digit y[32];
36291     /* Precomputation table for point. */
36292     sp_table_entry_1024 table[256];
36293     /* Count of entries in table. */
36294     uint32_t cnt;
36295     /* Point and table set in entry. */
36296     int set;
36297 } sp_cache_1024_t;
36298 
36299 /* Cache of tables. */
36300 static THREAD_LS_T sp_cache_1024_t sp_cache_1024[FP_ENTRIES];
36301 /* Index of last entry in cache. */
36302 static THREAD_LS_T int sp_cache_1024_last = -1;
36303 /* Cache has been initialized. */
36304 static THREAD_LS_T int sp_cache_1024_inited = 0;
36305 
36306 #ifndef HAVE_THREAD_LS
36307     static volatile int initCacheMutex_1024 = 0;
36308     static wolfSSL_Mutex sp_cache_1024_lock;
36309 #endif
36310 
36311 /* Get the cache entry for the point.
36312  *
36313  * g      [in]   Point scalar multipling.
36314  * cache  [out]  Cache table to use.
36315  */
sp_ecc_get_cache_1024(const sp_point_1024 * g,sp_cache_1024_t ** cache)36316 static void sp_ecc_get_cache_1024(const sp_point_1024* g, sp_cache_1024_t** cache)
36317 {
36318     int i;
36319     int j;
36320     uint32_t least;
36321 
36322     if (sp_cache_1024_inited == 0) {
36323         for (i=0; i<FP_ENTRIES; i++) {
36324             sp_cache_1024[i].set = 0;
36325         }
36326         sp_cache_1024_inited = 1;
36327     }
36328 
36329     /* Compare point with those in cache. */
36330     for (i=0; i<FP_ENTRIES; i++) {
36331         if (!sp_cache_1024[i].set)
36332             continue;
36333 
36334         if (sp_1024_cmp_equal_32(g->x, sp_cache_1024[i].x) &
36335                            sp_1024_cmp_equal_32(g->y, sp_cache_1024[i].y)) {
36336             sp_cache_1024[i].cnt++;
36337             break;
36338         }
36339     }
36340 
36341     /* No match. */
36342     if (i == FP_ENTRIES) {
36343         /* Find empty entry. */
36344         i = (sp_cache_1024_last + 1) % FP_ENTRIES;
36345         for (; i != sp_cache_1024_last; i=(i+1)%FP_ENTRIES) {
36346             if (!sp_cache_1024[i].set) {
36347                 break;
36348             }
36349         }
36350 
36351         /* Evict least used. */
36352         if (i == sp_cache_1024_last) {
36353             least = sp_cache_1024[0].cnt;
36354             for (j=1; j<FP_ENTRIES; j++) {
36355                 if (sp_cache_1024[j].cnt < least) {
36356                     i = j;
36357                     least = sp_cache_1024[i].cnt;
36358                 }
36359             }
36360         }
36361 
36362         XMEMCPY(sp_cache_1024[i].x, g->x, sizeof(sp_cache_1024[i].x));
36363         XMEMCPY(sp_cache_1024[i].y, g->y, sizeof(sp_cache_1024[i].y));
36364         sp_cache_1024[i].set = 1;
36365         sp_cache_1024[i].cnt = 1;
36366     }
36367 
36368     *cache = &sp_cache_1024[i];
36369     sp_cache_1024_last = i;
36370 }
36371 #endif /* FP_ECC */
36372 
36373 /* Multiply the base point of P1024 by the scalar and return the result.
36374  * If map is true then convert result to affine coordinates.
36375  *
36376  * r     Resulting point.
36377  * g     Point to multiply.
36378  * k     Scalar to multiply by.
36379  * map   Indicates whether to convert result to affine.
36380  * ct    Constant time required.
36381  * heap  Heap to use for allocation.
36382  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
36383  */
sp_1024_ecc_mulmod_32(sp_point_1024 * r,const sp_point_1024 * g,const sp_digit * k,int map,int ct,void * heap)36384 static int sp_1024_ecc_mulmod_32(sp_point_1024* r, const sp_point_1024* g, const sp_digit* k,
36385         int map, int ct, void* heap)
36386 {
36387 #ifndef FP_ECC
36388     return sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap);
36389 #else
36390     sp_digit tmp[2 * 32 * 5];
36391     sp_cache_1024_t* cache;
36392     int err = MP_OKAY;
36393 
36394 #ifndef HAVE_THREAD_LS
36395     if (initCacheMutex_1024 == 0) {
36396          wc_InitMutex(&sp_cache_1024_lock);
36397          initCacheMutex_1024 = 1;
36398     }
36399     if (wc_LockMutex(&sp_cache_1024_lock) != 0)
36400        err = BAD_MUTEX_E;
36401 #endif /* HAVE_THREAD_LS */
36402 
36403     if (err == MP_OKAY) {
36404         sp_ecc_get_cache_1024(g, &cache);
36405         if (cache->cnt == 2)
36406             sp_1024_gen_stripe_table_32(g, cache->table, tmp, heap);
36407 
36408 #ifndef HAVE_THREAD_LS
36409         wc_UnLockMutex(&sp_cache_1024_lock);
36410 #endif /* HAVE_THREAD_LS */
36411 
36412         if (cache->cnt < 2) {
36413             err = sp_1024_ecc_mulmod_fast_32(r, g, k, map, ct, heap);
36414         }
36415         else {
36416             err = sp_1024_ecc_mulmod_stripe_32(r, g, cache->table, k,
36417                     map, ct, heap);
36418         }
36419     }
36420 
36421     return err;
36422 #endif
36423 }
36424 
36425 #endif /* WOLFSSL_SP_SMALL */
36426 /* Multiply the point by the scalar and return the result.
36427  * If map is true then convert result to affine coordinates.
36428  *
36429  * km    Scalar to multiply by.
36430  * p     Point to multiply.
36431  * r     Resulting point.
36432  * map   Indicates whether to convert result to affine.
36433  * heap  Heap to use for allocation.
36434  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
36435  */
sp_ecc_mulmod_1024(const mp_int * km,const ecc_point * gm,ecc_point * r,int map,void * heap)36436 int sp_ecc_mulmod_1024(const mp_int* km, const ecc_point* gm, ecc_point* r,
36437         int map, void* heap)
36438 {
36439 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36440     sp_point_1024* point = NULL;
36441     sp_digit* k = NULL;
36442 #else
36443     sp_point_1024 point[1];
36444     sp_digit k[32];
36445 #endif
36446     int err = MP_OKAY;
36447 
36448 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36449     point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap,
36450                                          DYNAMIC_TYPE_ECC);
36451     if (point == NULL)
36452         err = MEMORY_E;
36453     if (err == MP_OKAY) {
36454         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32, heap,
36455                                DYNAMIC_TYPE_ECC);
36456         if (k == NULL)
36457             err = MEMORY_E;
36458     }
36459 #endif
36460 
36461     if (err == MP_OKAY) {
36462         sp_1024_from_mp(k, 32, km);
36463         sp_1024_point_from_ecc_point_32(point, gm);
36464 
36465             err = sp_1024_ecc_mulmod_32(point, point, k, map, 1, heap);
36466     }
36467     if (err == MP_OKAY) {
36468         err = sp_1024_point_to_ecc_point_32(point, r);
36469     }
36470 
36471 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
36472     if (k != NULL)
36473         XFREE(k, heap, DYNAMIC_TYPE_ECC);
36474     if (point != NULL)
36475         XFREE(point, heap, DYNAMIC_TYPE_ECC);
36476 #endif
36477 
36478     return err;
36479 }
36480 
36481 #ifdef WOLFSSL_SP_SMALL
36482 /* Striping precomputation table.
36483  * 4 points combined into a table of 16 points.
36484  * Distance of 256 between points.
36485  */
36486 static const sp_table_entry_1024 p1024_table[16] = {
36487     /* 0 */
36488     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36489         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36490         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
36491       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36492         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36493         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
36494     /* 1 */
36495     { { 0xe0162bc2,0xbf9c7ec6,0x10a89289,0xddecc6e3,0x9e499d81,0x5d599df0,
36496         0x6d358218,0x9a96ea28,0x70c5f8db,0x01aec7d3,0x8cf5d066,0xe72e4995,
36497         0x3e91d7f8,0xc2e7297d,0xda9f2f5a,0x8621db92,0x5a5679ed,0x4b26c867,
36498         0x2c56aac1,0x233385df,0xc6a13f99,0xb88e74d4,0xffa8ec11,0x1214b173,
36499         0x1f3f9fef,0xa0386a27,0xc0e7b44e,0xbd9b1b4e,0xeecd3496,0xafe528dc,
36500         0x1c49f80b,0x8dfff96a },
36501       { 0xc03c0c83,0xb4a4753a,0xabcdcd75,0x68e69d18,0xf775b649,0xe3839b88,
36502         0xbf58f352,0x803f949a,0xbd0bc15c,0x5f702679,0x8ff298c2,0x85bf5d16,
36503         0xc6c7976e,0x3f6ebd98,0x45e3e1b4,0x20618af4,0x54e64093,0x67d5598e,
36504         0x504fed9e,0xb047283b,0x70d87517,0x450cabfd,0x3f5addbe,0x47d628bf,
36505         0x78cb4cca,0x0037ef30,0x6b1c4908,0x4e148d3c,0x4fcfd837,0xe256d329,
36506         0xde3c01f3,0x2aa1207b } },
36507     /* 2 */
36508     { { 0x755c2a27,0xcf3e0bb2,0x59585c44,0xd38e42f9,0x19285e60,0x46b13e0f,
36509         0x76273d0f,0xc3ecd0c0,0x193c569a,0x7800f085,0x4351818a,0xf04e74ab,
36510         0x8496363b,0x9258aa38,0xb8c894fe,0x8456617c,0x2af969a0,0x8bc62aaa,
36511         0x5a4668d9,0x66c2280b,0xa992f4fa,0xbc9df58e,0x3f401e99,0x5db0b7d9,
36512         0xc4c38c0e,0xe0614fe1,0x2ccdf6b3,0xd531151c,0xe143b618,0x1c7575ec,
36513         0xdf9398a4,0x40247985 },
36514       { 0x8f055746,0xfba25178,0x0ab1e6e0,0xc5ba0040,0xac292697,0xe1b194fb,
36515         0x5b4f4740,0x77152119,0x9bb7ba54,0x250091d0,0xb9a139a4,0x7a674861,
36516         0xf353aa7e,0xba8413b3,0x2443ceee,0xafe77192,0x3847bbd0,0x14468d36,
36517         0x3da4942d,0x61f79ff6,0xd425b456,0x1563a1c1,0x75ff4630,0x3c270fcd,
36518         0xeb2802c9,0x42072090,0xc85c7004,0x68f0cdcb,0xfa032e74,0xca4372fb,
36519         0xc8b79d80,0x1a6fd1e6 } },
36520     /* 3 */
36521     { { 0x8d5116a3,0x967a901a,0xb2f5f47f,0x0b844394,0x60ebaf3b,0xe39ad452,
36522         0x60ccfc0c,0x1e1be617,0xcc3f53f2,0xac07e3d2,0x1ed11bb6,0xdd838e0e,
36523         0x1c15b0c2,0x45475307,0x920fe5b8,0x70dd4748,0xe471896d,0x1a20be2d,
36524         0x59276c7c,0x3c3fad8a,0xc886ee07,0x026a1cc3,0x6e831ac4,0x9fdb6f37,
36525         0xac501d65,0x26a35d1a,0x40da8574,0x0ae98905,0xabd734e5,0x65dde0a4,
36526         0x15614750,0x29b7d4dc },
36527       { 0xcbf4e20b,0x44b3c2cb,0x58cc44c5,0x1c3f548f,0x5b0cac1f,0x39809b54,
36528         0x00f80621,0x0c0f02b5,0x066905e0,0xe612b890,0x8350188c,0x8f158ed7,
36529         0x3f5576b2,0xc01dc458,0xa45492e0,0x29803272,0x0ff92443,0x77a5623a,
36530         0x29d0dc41,0xd12a2b00,0x2780e87a,0xb4125459,0x0d53f272,0x1ebcf903,
36531         0x24301e8d,0xbae6ea40,0xa37d0798,0x1e5f3f2f,0x22b4126c,0x9342c310,
36532         0x5382497e,0x5d092802 } },
36533     /* 4 */
36534     { { 0x4b59213a,0xf5b495d0,0x8d70200e,0xca672039,0x2b6771c1,0x4bcb09a6,
36535         0x2b9eb0cb,0x26adeed4,0x8cdba212,0xeb544754,0xf08890d1,0x0e1abfcd,
36536         0x698e46b4,0x52509963,0x82e9c138,0xe1bff0b0,0x51099a71,0xa189e4cd,
36537         0xc9b91cc7,0x2360c9bc,0x137ec4be,0x9bd4d7dc,0xd1519f6e,0xd0356521,
36538         0xcf832503,0xbf5f6d78,0x8deea2b4,0xe4301031,0xef4c319c,0xc3132494,
36539         0x0f1fa7d7,0x2ab3bd47 },
36540       { 0x922c9fbb,0x5753b680,0x0f16c6d1,0x869e7dc8,0xbac16efc,0x83445135,
36541         0x846d1d9b,0x4326a3b4,0xb2d62c21,0xb517fee3,0x0b292ad5,0x6905afa2,
36542         0x2cadac13,0x2a57131a,0xebdbca8d,0xcd904d8f,0x3f365fb2,0xdfeda86f,
36543         0xdc7eaa1c,0x7097b208,0xa45e77c0,0x89a35a84,0xcf5d118e,0x417a062c,
36544         0x1f6e99e8,0x3c0c04a8,0xba7a087d,0xc44704b0,0x3ea22ad2,0x6f8a27d1,
36545         0x4c27d229,0x93a4b416 } },
36546     /* 5 */
36547     { { 0x1f1efb7a,0xd4271bc1,0x33fccc0d,0xae4e68e6,0xb11f50a8,0x9d9bc8f1,
36548         0xaf076089,0x5430398f,0x443d0e03,0x45e242fb,0xf6e3d4c1,0x73ec2519,
36549         0xba9bad09,0xab70f790,0xf9add10f,0xde612ad5,0x14e942b4,0xb837e54e,
36550         0xddb8b68a,0x175a56d3,0x1ac2a408,0xe85b233c,0xf0c80f94,0xf8ff6c30,
36551         0x898db4f9,0x4b7f3fb7,0x45a7dcdd,0xa2c6044f,0xfe3d3895,0xf3abb2f6,
36552         0x32ee7763,0x342ce0d7 },
36553       { 0xcf491b1f,0xeb261394,0x1909e395,0xdcaaeed7,0x9fe4dbea,0xdcc4055a,
36554         0x493d604d,0x17a6611d,0x1ce5ebef,0xba445a3a,0xe3989cb5,0xe82e2858,
36555         0x83f58406,0xb96f4282,0xa156cf55,0x99877b99,0x4e166a0e,0xaf906a66,
36556         0xb2976d13,0xcea1d353,0x36c61a01,0xefc16f27,0xb0f55d86,0xdb04c433,
36557         0x8eb34c01,0x3cb4b269,0x2ae60280,0x38d07f78,0x43be3ec5,0x43ac3bcb,
36558         0xe156fd20,0x455f4af3 } },
36559     /* 6 */
36560     { { 0x95532833,0x2e6fe0a6,0xd626d067,0xabca228e,0x649e73bd,0x22aef3d9,
36561         0xf03c4c0c,0x2083a87a,0x35169b45,0xe954e75d,0x74506a89,0x577509ee,
36562         0x2aeacf90,0x49cb276e,0xfa409f91,0x08275d77,0xf0bbd6b9,0x61eb6f3d,
36563         0xe4132704,0x948202cb,0xb1c498b1,0x35f3fc21,0x361fee59,0x76c68ba8,
36564         0x50e051f3,0xa18cbbd9,0x318e7042,0x2384a879,0x80dd1e8b,0x292abead,
36565         0x5c37c334,0x65713c29 },
36566       { 0xceb77b9a,0xdccca8e9,0x23b69469,0x2f97e727,0xa01d6b28,0xc76abee6,
36567         0x5abecdfe,0x3925203d,0x29290d70,0x89448082,0xb0314438,0xf9931424,
36568         0x7cd447c3,0x04209df1,0xc855c827,0x7c6f2059,0x56c0e069,0xd97d7862,
36569         0x412d94c4,0x5a9db6fe,0x994c41dd,0x19a64591,0xc89e21a3,0x12348aa1,
36570         0xc6a03f0e,0xd6904b50,0xa616feac,0x55c15156,0x7cc7693b,0x4e36d1b5,
36571         0x3bae3c38,0x6b0e996c } },
36572     /* 7 */
36573     { { 0xcceced00,0x32789fab,0xe5b7aa66,0x3237e71a,0x2ddebcdf,0x87b2e269,
36574         0xb61dad8f,0xb7245120,0xd35f803c,0xe11e5e48,0x98e50f0d,0xfb4df5d7,
36575         0xbcd2ab92,0x60ee68b4,0x1ce3363d,0x98ab2f5c,0x7cd42647,0x15ba39da,
36576         0x83f4fb3f,0x1a6572eb,0xe56f08db,0x0f77de88,0x172562c2,0x1743761e,
36577         0x8a58f0f4,0xbe349ff8,0x84d1d6e2,0xe04da71b,0x9e9ff3b4,0x368f0342,
36578         0x678223f8,0x4022a205 },
36579       { 0x83847375,0x527bbd05,0x3f451af0,0x3ae56b62,0x4b2c7f18,0x6198f24d,
36580         0x4525b98d,0xee323f5b,0x0e0884b5,0xa9d8d39a,0xfb12c776,0xd005d7f6,
36581         0x708bc154,0xd71c483e,0x742541bc,0x8ca6fd28,0xf8397ddb,0x0af3dccd,
36582         0x3eccf243,0xb80d3125,0x58d81b8d,0xc743a108,0x71391f68,0x3f48eb21,
36583         0x33bb657f,0x493aff88,0x07e47e31,0x1d15ed66,0xe08279f6,0x10159b11,
36584         0x24a6a956,0x312179cb } },
36585     /* 8 */
36586     { { 0xfb99cfe6,0x950323d3,0xc9334178,0x7b09bc26,0x7cbdfb6f,0x64111e41,
36587         0x89a75760,0x91141744,0x10919cb0,0x4c633df9,0x396bfd2f,0x715fc7c7,
36588         0x8cab62db,0x8ca19512,0x4db81aac,0x30672473,0xb4c4c54a,0xe67a246b,
36589         0xbf229646,0xd77ea0fa,0xfa5b5d70,0x5bed15f1,0xc2f192f3,0xa5686da5,
36590         0x7f6690ad,0xdecac72a,0xcaa50b7d,0x0c4af2a2,0x6049ad2f,0xf44631c1,
36591         0x04ecf056,0x325d2796 },
36592       { 0x4848c144,0xee11fb55,0xb6a7af32,0x4e062925,0x369e0f9a,0x125b68e1,
36593         0xca53b21e,0xad9bdae6,0x2e98ea1b,0xf50d605c,0x9f2fa395,0xbdb9e153,
36594         0xe91532f5,0x4570e32d,0x46a250d7,0x810698ae,0xad9d9145,0x7fd9546c,
36595         0x11e97a5e,0xabf67721,0x249f82e9,0xca29f7d5,0x9851df63,0xa9c539a9,
36596         0x71d0e3e5,0xfd84d54b,0x041d2b56,0xd1e0459c,0xfd80096a,0xceb3eb6e,
36597         0xe32a79d3,0x19d48546 } },
36598     /* 9 */
36599     { { 0xb540f5e5,0xfe19ee8f,0x04e68d17,0x86d2a52f,0xadbdc871,0xd2320db0,
36600         0xd03a7fc8,0xa83ad5a8,0x08bcb916,0x54bf83c7,0x2e51e840,0x092133ea,
36601         0xcb52dddf,0xbce38424,0x31063583,0xd5c7be40,0x458e3176,0xc1ebb9df,
36602         0xbc4dabbf,0xafb19639,0xc05725a8,0x36350fe4,0x84e1cd24,0xac4a0634,
36603         0xc145b8de,0xadf73154,0xb3483237,0x0aa6dd9e,0xcbff2720,0xa3345c3d,
36604         0xb4e453b0,0x1b3ace6c },
36605       { 0x90a8bdc5,0x0343e5e9,0x6306a089,0xa203bf9d,0x8e48520e,0x98489a35,
36606         0xde7d1d06,0xbd17debe,0x5f795d3f,0x8fafa6d7,0x387b0a3f,0xa4ceb630,
36607         0xffddeafa,0xe0166b32,0x7e764e02,0xa2fe2054,0xe871f304,0x55ab9824,
36608         0x952ec45e,0xa2bd36bb,0xa90d20ca,0x7b4c1484,0x75bcfb53,0x5319f387,
36609         0x6982c4e5,0x34238a4a,0xa102921d,0xa2bb61c7,0xdb3ab17e,0x1e061b64,
36610         0x192f0a14,0x538ec33e } },
36611     /* 10 */
36612     { { 0x576374c2,0xe53c7785,0x84727040,0xe60526d1,0x228ca044,0x8a066dc8,
36613         0xf1ce1313,0x1fe1c1b2,0xcdeb0c5d,0x2aeec832,0x9cbf826f,0xa7596699,
36614         0xde77a589,0xcd188e81,0x118d1254,0xe5ce0fe0,0x0790b86a,0xa142a984,
36615         0x39ac28ce,0xe28f043f,0x87de5804,0x4eef8290,0xf639a8c5,0x83c31b32,
36616         0x5887794f,0xd70454a7,0x18b1b391,0xca635d50,0x31d9c795,0xcefea076,
36617         0xb6f8aa25,0x13cbee76 },
36618       { 0x8d3f34f3,0x79cabe0f,0xa3617fe3,0xbda9c31c,0xdd9426a1,0xb26dee23,
36619         0xf29c9104,0xe9dd9627,0xe2c6cd3b,0x033eb169,0xfcba2196,0x8a73f492,
36620         0xb858c83c,0x92e37e0b,0x23b3fbb7,0xe4f2aca6,0x64be00a2,0x8101fb1e,
36621         0x948f6448,0x91a7826a,0x907260e7,0x414067b4,0xe30bb835,0xf774aa50,
36622         0xc999c06e,0xf922ca80,0x0ba08511,0x6b8635b9,0x25fa04f0,0xbf936b5c,
36623         0xe02e8967,0x4e0a1ada } },
36624     /* 11 */
36625     { { 0x8ba29c4d,0x00ca6670,0x22988094,0xc08240ce,0x16dda752,0x21c5ca67,
36626         0xabbbfa34,0x689c0e45,0x3ed28b72,0x1d7545fd,0xd7c56ab4,0x5f221198,
36627         0x38759d65,0x4b3d8f74,0x8fe50b89,0x93490dfb,0xe80eba16,0xb641f5d7,
36628         0x79acb537,0x7b0da5eb,0x0c1d5e5e,0xab6b1497,0xa5da429a,0x2338e68d,
36629         0x2f6d2f25,0xe010c437,0x6530f3a7,0x226f16d2,0xcbef08bc,0xefb0f7b6,
36630         0x9f99c999,0x733e30d9 },
36631       { 0xa42a38f9,0xecfe1582,0x4730b500,0xaec2d58e,0xde976b2c,0x2ee2f2a7,
36632         0xa969c1bb,0xf0539db5,0xfcecdb4a,0x31954168,0xe7a8e902,0xf2f7348a,
36633         0x3121541f,0x1d58d7cc,0x2202ae52,0x5d25b75c,0xf40835a7,0xdea9965a,
36634         0x529b4e46,0x3feb6a41,0xbd27ad9b,0x5c97fb6f,0x261f900b,0xd87554c0,
36635         0x04d5b19e,0xb43031d9,0xcb219b9c,0x33d5e9b8,0x3ee00bcf,0x7a43d492,
36636         0xb79a5c0c,0x56facb39 } },
36637     /* 12 */
36638     { { 0x7c834915,0x667eaed6,0xbc5eb64d,0x9f77aa6a,0x25d62011,0x729ebcb6,
36639         0x699fd9c2,0x0aee24f2,0x2b8d4f6c,0xe1eb5874,0x14c976d6,0x7f12710c,
36640         0xf6d9ea65,0x91390335,0x06b50064,0x668b7049,0x0876ee4f,0x65969a0e,
36641         0x2f9d9360,0xf901bf3f,0xb499e3ce,0xfb1a8651,0xf2dbcaaa,0x80b953fb,
36642         0x973b06b6,0x312cc566,0x3af36c64,0x3534d9c3,0x10ffd815,0xe4463a52,
36643         0xf18c2b91,0x57ea2b4b },
36644       { 0x8aa0f2f2,0x00f5e162,0x0e46bcaa,0x8c7e75c5,0xa4a2c42d,0x97ab479a,
36645         0x14baa202,0xb4f308ea,0x6943cc2e,0xa901bd14,0xeed58804,0xbb125fee,
36646         0x9d180f7c,0x6502c8f9,0x1580c61c,0xe5353919,0x27101ee3,0x7e278069,
36647         0xfaa72717,0x7a0a40a1,0x4c75b153,0x32edce02,0x538f1c22,0xda23660b,
36648         0xbe307d2e,0x4d511e98,0x9baee0b4,0x24276e40,0x7ff1f307,0xa78c3927,
36649         0xea7935c9,0x60480b46 } },
36650     /* 13 */
36651     { { 0x3872ece3,0x31087d66,0x955b70f8,0x5f29be7d,0x9cf95bb8,0xb50b4fc7,
36652         0xdbffa621,0xbae3b58d,0xe022ba5d,0x0e61d280,0x4181449c,0x78ae5117,
36653         0xcf555485,0x0b132840,0xb8ce0b0e,0x800ed1b6,0x78d5de3d,0x35dffdd5,
36654         0x69a56b47,0xf7e42374,0x8d910ae7,0xd5e32369,0x6313c7c7,0xb6ff52a0,
36655         0xa92de9e5,0x5a2fe20d,0xd12110bb,0x41b347d3,0x40c16f23,0xc5905edb,
36656         0x9a8f88cc,0x0774a0d3 },
36657       { 0xe3b6c106,0x3ae181ab,0x8de150b7,0x4ebe163f,0x6f354836,0xcf75b82f,
36658         0x3ac7ac16,0xaa0d2063,0x291722af,0x5c680668,0x11545553,0x73941e61,
36659         0xbf5de3f7,0x17127e38,0x1afb41da,0x32cfdf03,0x87bc8663,0xc6893c91,
36660         0xa62c9c99,0x75046744,0x962c1947,0x96866e2d,0x378cdf4c,0x489ec8df,
36661         0x3407fa32,0x3a60709b,0x551290d1,0xd37d2159,0xbab92273,0x9623d303,
36662         0x2432014b,0x08151954 } },
36663     /* 14 */
36664     { { 0xb05f2b26,0x569044f3,0x80b9f76c,0xb35a294a,0x4290f6ae,0x8839fe28,
36665         0x026a5877,0x761cfb23,0x2e5ff9c3,0x768926b6,0x0b11c576,0xbae6cd20,
36666         0x72a03efe,0xdc857756,0xe1bad63a,0x0cae074a,0xd709d99c,0x3fe491a1,
36667         0x6501d9c1,0x76c5ded6,0xc32aeff7,0x1da6eca1,0xc57683e8,0x50849d55,
36668         0xdf98d847,0x9e392e9c,0x64d9a564,0xfad7982f,0xa37b98b2,0xf7c3bdb7,
36669         0xf0860497,0x1fe09f94 },
36670       { 0x7648cc63,0x49a7eaae,0x67cfa714,0x13ea2511,0x653f4559,0xfc8b923c,
36671         0x81a16e86,0xd957619b,0x3c864674,0x0c7e804b,0x1616599a,0xfc88134a,
36672         0x0a652328,0x366ea969,0x4bc9029e,0x41532960,0xae2aad2b,0xef9e1994,
36673         0x7f10bef5,0x9e2a8c52,0xc67bf860,0x73dcb586,0x844cc25d,0xf61a43fa,
36674         0x74eb3653,0xd74e7eea,0xdd240f02,0xf3356706,0xfd83bcb4,0xeec7694c,
36675         0xdb62526a,0x4de95786 } },
36676     /* 15 */
36677     { { 0x3deac2f7,0x4867d315,0xb61d9a8e,0xa084778a,0x0ab7b2d5,0xf3b76f96,
36678         0xcfdf4f79,0x00b30056,0x31ab8f4b,0xd0701e15,0x9c779d01,0x07f948d5,
36679         0x82675371,0x7c994ebc,0x48bad4c0,0x1104d4ee,0xbfc9d058,0x798ce0b5,
36680         0x309fa80b,0xc7ca898d,0xacb33eaf,0x0244f225,0x5b2f3175,0xd51e8dfc,
36681         0xa4d7be34,0x3e49ba6b,0xbda02b43,0x1760f4c7,0x4435275a,0x37e36a7e,
36682         0xe636980c,0x1c94418b },
36683       { 0x09dc1414,0x43a21313,0x43c93537,0x060765fc,0xdf5f79ce,0x6ff3207a,
36684         0x85d4cfca,0x6f18b1fa,0x63e995ab,0xf5c4272e,0xa82b3002,0x121a09e4,
36685         0x97147f16,0x82b65d1b,0x20a7fe26,0x4993c20c,0xe6716726,0x99c9cb98,
36686         0xfeb440a0,0x5a02d673,0x251b4bc5,0x3f3fa9e1,0xa05338ea,0x75dbc474,
36687         0x7b09f6cb,0x3cb4044b,0x80434609,0x6767da18,0x098ceac2,0x97851422,
36688         0xb55235ba,0x611bfbb2 } },
36689 };
36690 
36691 /* Multiply the base point of P1024 by the scalar and return the result.
36692  * If map is true then convert result to affine coordinates.
36693  *
36694  * Stripe implementation.
36695  * Pre-generated: 2^0, 2^256, ...
36696  * Pre-generated: products of all combinations of above.
36697  * 4 doubles and adds (with qz=1)
36698  *
36699  * r     Resulting point.
36700  * k     Scalar to multiply by.
36701  * map   Indicates whether to convert result to affine.
36702  * ct    Constant time required.
36703  * heap  Heap to use for allocation.
36704  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
36705  */
sp_1024_ecc_mulmod_base_32(sp_point_1024 * r,const sp_digit * k,int map,int ct,void * heap)36706 static int sp_1024_ecc_mulmod_base_32(sp_point_1024* r, const sp_digit* k,
36707         int map, int ct, void* heap)
36708 {
36709     return sp_1024_ecc_mulmod_stripe_32(r, &p1024_base, p1024_table,
36710                                       k, map, ct, heap);
36711 }
36712 
36713 #else
36714 /* Striping precomputation table.
36715  * 8 points combined into a table of 256 points.
36716  * Distance of 128 between points.
36717  */
36718 static const sp_table_entry_1024 p1024_table[256] = {
36719     /* 0 */
36720     { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36721         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36722         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
36723       { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36724         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
36725         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
36726     /* 1 */
36727     { { 0xe0162bc2,0xbf9c7ec6,0x10a89289,0xddecc6e3,0x9e499d81,0x5d599df0,
36728         0x6d358218,0x9a96ea28,0x70c5f8db,0x01aec7d3,0x8cf5d066,0xe72e4995,
36729         0x3e91d7f8,0xc2e7297d,0xda9f2f5a,0x8621db92,0x5a5679ed,0x4b26c867,
36730         0x2c56aac1,0x233385df,0xc6a13f99,0xb88e74d4,0xffa8ec11,0x1214b173,
36731         0x1f3f9fef,0xa0386a27,0xc0e7b44e,0xbd9b1b4e,0xeecd3496,0xafe528dc,
36732         0x1c49f80b,0x8dfff96a },
36733       { 0xc03c0c83,0xb4a4753a,0xabcdcd75,0x68e69d18,0xf775b649,0xe3839b88,
36734         0xbf58f352,0x803f949a,0xbd0bc15c,0x5f702679,0x8ff298c2,0x85bf5d16,
36735         0xc6c7976e,0x3f6ebd98,0x45e3e1b4,0x20618af4,0x54e64093,0x67d5598e,
36736         0x504fed9e,0xb047283b,0x70d87517,0x450cabfd,0x3f5addbe,0x47d628bf,
36737         0x78cb4cca,0x0037ef30,0x6b1c4908,0x4e148d3c,0x4fcfd837,0xe256d329,
36738         0xde3c01f3,0x2aa1207b } },
36739     /* 2 */
36740     { { 0x01900955,0xa95b6dae,0xceb4656d,0xa5dc9cc1,0xe72fe95b,0x50c78907,
36741         0xa040c334,0xa1ae5447,0x7952ea6e,0x91191370,0x6d097305,0x54ff7343,
36742         0xbda4d10f,0xa4db0074,0x91644070,0xfd5306f1,0x8b24522c,0x14b9fe73,
36743         0x7849f762,0x1468dad6,0xb0dcd2e4,0x87b29a18,0x5e1ad492,0xadd7f1a1,
36744         0xdbba2a1a,0x9ac63a81,0x81223379,0x01379c5b,0xb0e53bc8,0xf402b2f0,
36745         0x0bf13b61,0x8c3eb27f },
36746       { 0xe513696f,0x9a4ad3e1,0x18c81ffa,0x0350ba5c,0x3c033d13,0x1e2fc136,
36747         0x17a531bc,0x53da6e71,0x1aed610d,0x42ec6490,0xe99ff567,0xd33e8df7,
36748         0x3deed12a,0xe4aad73e,0x180f4deb,0xd983b465,0x502f30b4,0x99365269,
36749         0xa8918d7f,0x7e2799ab,0x700fc79a,0x0ffe84b6,0x40bfd8c2,0x7b4400d6,
36750         0x5d2641bd,0xc3a21d21,0xc32621cb,0x79839442,0xb1401e83,0xace6500b,
36751         0x251c4310,0x7bf4163e } },
36752     /* 3 */
36753     { { 0xe3fd589e,0x1c174f88,0xdf974a03,0xdb501790,0x3e70549f,0xd09623e3,
36754         0x15924f34,0x8d091eff,0xf9b65ac5,0xeef79cad,0x3f69c2cf,0xd2cc4262,
36755         0x52cd82bc,0x817d9032,0xa5f1dddd,0xacf4f4d9,0x5011b6bd,0xd0612635,
36756         0x2ed140c9,0x9f74490d,0x4db686d2,0x64092e8c,0x776b0fcc,0x225eef16,
36757         0xdf16aeb6,0x0e8c01e9,0x84bbd82a,0x62836741,0x8956e337,0x757574e2,
36758         0x705a7f07,0x9871edc6 },
36759       { 0x776535f7,0xbd0b76d5,0x2635b3b8,0x5214d602,0x9d216f64,0xc0c25ad9,
36760         0x5515bf75,0xfd4df3a7,0x5e9f1675,0x24a625bc,0x406873e7,0x3c35efb7,
36761         0xbb2e5c4a,0xef5c9a33,0x806b198a,0xa971b35e,0xa3c690ed,0x9f5c0ca5,
36762         0x8e1e2341,0xa8d5dd89,0x955ad9e4,0x4cecbcce,0x248d3416,0x2ecf4407,
36763         0x45c0af6e,0x1abb3811,0x1c780fff,0x3f4bee82,0xc272ed57,0xd14df768,
36764         0x371637ad,0x397ed10a } },
36765     /* 4 */
36766     { { 0x755c2a27,0xcf3e0bb2,0x59585c44,0xd38e42f9,0x19285e60,0x46b13e0f,
36767         0x76273d0f,0xc3ecd0c0,0x193c569a,0x7800f085,0x4351818a,0xf04e74ab,
36768         0x8496363b,0x9258aa38,0xb8c894fe,0x8456617c,0x2af969a0,0x8bc62aaa,
36769         0x5a4668d9,0x66c2280b,0xa992f4fa,0xbc9df58e,0x3f401e99,0x5db0b7d9,
36770         0xc4c38c0e,0xe0614fe1,0x2ccdf6b3,0xd531151c,0xe143b618,0x1c7575ec,
36771         0xdf9398a4,0x40247985 },
36772       { 0x8f055746,0xfba25178,0x0ab1e6e0,0xc5ba0040,0xac292697,0xe1b194fb,
36773         0x5b4f4740,0x77152119,0x9bb7ba54,0x250091d0,0xb9a139a4,0x7a674861,
36774         0xf353aa7e,0xba8413b3,0x2443ceee,0xafe77192,0x3847bbd0,0x14468d36,
36775         0x3da4942d,0x61f79ff6,0xd425b456,0x1563a1c1,0x75ff4630,0x3c270fcd,
36776         0xeb2802c9,0x42072090,0xc85c7004,0x68f0cdcb,0xfa032e74,0xca4372fb,
36777         0xc8b79d80,0x1a6fd1e6 } },
36778     /* 5 */
36779     { { 0x8d5116a3,0x967a901a,0xb2f5f47f,0x0b844394,0x60ebaf3b,0xe39ad452,
36780         0x60ccfc0c,0x1e1be617,0xcc3f53f2,0xac07e3d2,0x1ed11bb6,0xdd838e0e,
36781         0x1c15b0c2,0x45475307,0x920fe5b8,0x70dd4748,0xe471896d,0x1a20be2d,
36782         0x59276c7c,0x3c3fad8a,0xc886ee07,0x026a1cc3,0x6e831ac4,0x9fdb6f37,
36783         0xac501d65,0x26a35d1a,0x40da8574,0x0ae98905,0xabd734e5,0x65dde0a4,
36784         0x15614750,0x29b7d4dc },
36785       { 0xcbf4e20b,0x44b3c2cb,0x58cc44c5,0x1c3f548f,0x5b0cac1f,0x39809b54,
36786         0x00f80621,0x0c0f02b5,0x066905e0,0xe612b890,0x8350188c,0x8f158ed7,
36787         0x3f5576b2,0xc01dc458,0xa45492e0,0x29803272,0x0ff92443,0x77a5623a,
36788         0x29d0dc41,0xd12a2b00,0x2780e87a,0xb4125459,0x0d53f272,0x1ebcf903,
36789         0x24301e8d,0xbae6ea40,0xa37d0798,0x1e5f3f2f,0x22b4126c,0x9342c310,
36790         0x5382497e,0x5d092802 } },
36791     /* 6 */
36792     { { 0xff2f780d,0x583a2b7e,0xd7d76b1d,0x34d26820,0x86f74aec,0xe3c32847,
36793         0x10823feb,0x0fd42212,0xfb5e7bf4,0x227e417e,0xa568f8cd,0x510d49b6,
36794         0x1781bbec,0x53bce7d6,0x2f3718b7,0x9cfe3f22,0xd9de6c1f,0x7f44e89f,
36795         0x3fac9b55,0xf1cc553f,0xe6f300bc,0x9d2d0846,0x9f0ae6b1,0x976c82a2,
36796         0x24b8bbe0,0xe63dbf5e,0x973a5aa7,0x4cac7f45,0x84dd33c7,0xc6eb6237,
36797         0x142fee5d,0x0a26e434 },
36798       { 0xacaa9a08,0x8081339f,0x5246ece1,0x40f31105,0x61393747,0x892c8170,
36799         0x242f02e1,0x8d8d4103,0x3b5de98a,0x482bfd20,0x5abbe952,0x89ef946b,
36800         0x37698249,0xb8d218b9,0x66617c7a,0xd5268e89,0x8b7d2b91,0x962e7551,
36801         0xfe8d67c3,0x2c5c7973,0x2b017c51,0x42e3150a,0xc1a29469,0x6f4e5ebc,
36802         0x531c7083,0xa39910ce,0xb77b9e50,0xaf4f6eb4,0xda120ad0,0x68cbb175,
36803         0xb92636ec,0x19497c61 } },
36804     /* 7 */
36805     { { 0x417659a8,0x6920b0c6,0x92cb28ff,0xc77ab9c7,0xb687797f,0x55b67180,
36806         0xe7759363,0x4caf58c1,0x5561b186,0x5155bdb6,0x780f4946,0x2e64e355,
36807         0x229a8b20,0xeb0ac9b7,0x2571bd60,0x88594d78,0xe3fa78f9,0x5dcc0939,
36808         0x2ac2d379,0x7b8b4830,0xb90f1444,0x505fbf60,0x3ce4b3c1,0xac610e81,
36809         0xd59b5c18,0x39a4f27a,0x7cea0222,0x5fa33973,0x8dff1c7b,0xe578730b,
36810         0x517bf7a6,0x96b91b8b },
36811       { 0x9aac087c,0xc1a991f4,0x6cfdb28d,0xce62f74e,0x5f7600d6,0x08d6ff9a,
36812         0xf917f9c9,0xd781cd04,0x3de52dbf,0x7796f5f6,0x2ed72180,0xe7db64e0,
36813         0x6fa4137d,0x0f0876f6,0x3ca1f716,0x3271ee64,0x7c4ab8a3,0xcb9b2058,
36814         0x39481047,0xcba17107,0x598c5c37,0xdf9a190d,0x6f20e125,0x0cb6e72a,
36815         0xf4f2902d,0xa3142204,0x7ce2dcfb,0x42d28cb9,0xa3d3c351,0xdf261b8a,
36816         0xcffc249d,0x73f3d315 } },
36817     /* 8 */
36818     { { 0xe6fd3673,0x5d86855b,0x9d214b7b,0x309b70af,0xdcc46cd3,0x8d332f90,
36819         0x595510de,0xe553c015,0x38c1251c,0x5746a096,0x85cc1bc9,0xcd7cea5b,
36820         0x002eba8f,0x4ffa1468,0x22fcd77c,0x10a3cb70,0xc4ea05e3,0xb6999dfb,
36821         0x4efa756e,0x3375a0d0,0xdced5fd8,0x4d90279e,0x251fd56e,0x48192403,
36822         0x82a4c5f1,0xe87633a4,0x1b34105b,0x3170d130,0x7247e578,0x93998b0f,
36823         0x436ba1fa,0x88934f64 },
36824       { 0x4713eabc,0xf09f43b0,0xaccdc517,0x4ca7dd91,0xef13ca7c,0x27daa63b,
36825         0x2588184b,0x8b2e5a7a,0xd95dc269,0x0a8cb612,0xe1f2f14c,0x346975a2,
36826         0xe172935c,0x1f29b8ed,0xd40bc1e3,0xc3cbfd6e,0x132623da,0xd3f46b3f,
36827         0xfb0b7681,0xc115be6d,0x56da4344,0x5e31c345,0xa8e43d98,0xa7c63f18,
36828         0x4bddb4ea,0x55cb2083,0x4a54f58c,0xb16a0c38,0x46fd69d9,0x74eacca2,
36829         0x153548e1,0x0d1898bb } },
36830     /* 9 */
36831     { { 0xe35ef043,0x4ea73461,0x3496b564,0x107b67d9,0xd0f83a3c,0xd62c173b,
36832         0x51d29c35,0xfad4b038,0x71b1c1a4,0x3f42882a,0x54b43b9e,0x5d2bcf66,
36833         0x2abdf543,0xc77b15aa,0xdabe3dc1,0x5cb38a80,0xa481673b,0x15fda0ae,
36834         0xe7b90ebe,0x86996b4d,0x2bc8f3d8,0x84f87e25,0x37c4e424,0xaded03d6,
36835         0xd7a7afd8,0xe5ede666,0xa1ccb93a,0x80dd95a2,0x46fba391,0xa55cfd25,
36836         0x46f82e60,0x2bdab1dc },
36837       { 0xfa6fed61,0x7a4de22b,0xcc8dd94e,0xca458aa5,0x071222f5,0x3e372df1,
36838         0xe5aff377,0x06a4b44f,0x4a738e6d,0xbc2d0ba7,0x5f31f136,0x1a470e1d,
36839         0xe102a911,0x77ff933a,0x310c7885,0x8b380a50,0x783fc5ac,0x9f3c0228,
36840         0x44725d06,0xec668925,0x5ac84221,0x878f0e16,0xcfda6e8a,0x9a3af1af,
36841         0x78cd2aba,0x0183ed37,0x826d0eae,0x32cdbd60,0xcbee6415,0xb3234661,
36842         0xb9c10120,0x353eb892 } },
36843     /* 10 */
36844     { { 0x10b5521e,0xc8fdcad6,0x52e702f0,0x1a11b440,0x8ffda49c,0x6302680d,
36845         0xcbf36bad,0xcdb9654a,0x4c10a2d7,0x7b58ce11,0xe630e7e0,0x1e5d1f7d,
36846         0x6760a813,0x8cbe3d7d,0x6480d77f,0xeb35866b,0x7f036219,0x58728cf3,
36847         0x42a8a757,0xdd5865ed,0x906a2870,0x283f1f1d,0xa51f906b,0x79e23fa4,
36848         0x543b20a8,0xf2ac6e83,0xb81e7754,0x4f0b6379,0x840016ee,0x57fbc0d4,
36849         0xe621b67d,0x8da20771 },
36850       { 0xecce65ec,0x3c855004,0xb748185e,0x76d10d1f,0x78797ad2,0x64be7bca,
36851         0x77e54aad,0x43444db0,0xbe0df0ff,0x17b6b0c9,0x055086a4,0x8fc4256c,
36852         0xfd74d5a3,0xf952c43b,0x01c4edb8,0x501e005a,0x4a57e328,0xd5172dfc,
36853         0x535d6ee3,0xdb40ce4e,0x0c650918,0xbaef1e5c,0x857561fc,0xe85145e7,
36854         0x34a224c6,0xe468536a,0x0ec0e0a2,0x69a8e227,0x242b03fc,0xb3f52247,
36855         0xc3bebd5f,0x862f55e2 } },
36856     /* 11 */
36857     { { 0x226049fe,0x2d6a390f,0xdcbbc9fb,0xcc92a578,0x97634fb7,0xa52feca4,
36858         0x3dea5893,0x2b340cb6,0x2a49e916,0xa39f338a,0x949e41f3,0x26b2df3d,
36859         0x065a7e40,0xc71c7cdb,0x468281a2,0x4a9b84a0,0x731eeeca,0x63eeb503,
36860         0x76cbb725,0xe6d09134,0xb94a678c,0x0cf979a9,0x808fd9f1,0xb44d8c3b,
36861         0xe0afc5b9,0xe60da613,0x3ea5be69,0x52dce7de,0xdc1ee74f,0x3a5d6864,
36862         0x3bc80790,0x71ab2891 },
36863       { 0x3b5b60ad,0xcf618fc4,0x4a0c3184,0x0afb5e30,0xbc403302,0xd22381cc,
36864         0xdb1c0c66,0x33cf8953,0xa6112a8d,0x9c994e4d,0xd1967a86,0xd7aae2c3,
36865         0x5b7acd29,0xc28d5493,0x6c9a57fb,0x8075bd13,0x9c8427f9,0xc9c0373e,
36866         0x193225f5,0x2cbca18d,0x442c018c,0x73777d13,0xfbb3a727,0xebe5ed47,
36867         0x1962dc18,0x70437d49,0x2dc08806,0xf39c1e09,0x15fff35c,0x03e9c6f7,
36868         0x5e360a65,0x8d087bb6 } },
36869     /* 12 */
36870     { { 0x3fdc1844,0xbe212302,0x105eac56,0x6eca27ef,0xf168a348,0x2183a606,
36871         0xe1d7a4cb,0x295f807d,0x7ef5d43e,0x7246a632,0xc77025c7,0xae143205,
36872         0xf3484e3e,0x4bdfc7ca,0xdf52c075,0xec939895,0xd7a9cac0,0x82e655f6,
36873         0x8baeddb0,0x985dfe20,0x527de731,0x79c817e4,0x313de1ea,0x30ce0fbc,
36874         0xcc4f6cbb,0x9df95b89,0xf5bb20cd,0xf2aedf1e,0x1a8cfb01,0xfc1e0a89,
36875         0x63edb7ec,0x225ed34a },
36876       { 0xbabb1a85,0x3e13154d,0x1e6a565a,0xd3d8dae7,0xab4b100f,0xd3217d56,
36877         0xebc78e1a,0xd44d934e,0x48e73d37,0x0215321b,0x201e43cb,0xbbc90bfa,
36878         0x27500905,0x3c23f1d0,0xc86691a1,0x2a2e5000,0x6065841c,0x08b2bad2,
36879         0x30026b60,0x15d41caf,0x5276ce61,0x1712c2f4,0x15932ffb,0x01c4c3e7,
36880         0x6a74caf2,0x7894e13d,0x0c0537a4,0x02d6f5df,0xc2b1c97e,0xa8fb7602,
36881         0xd0887c7b,0x612b60e5 } },
36882     /* 13 */
36883     { { 0xba245d6b,0xefd495cf,0xa2ce3ff6,0x5cf0cbb7,0xdff5feee,0x24da2ac0,
36884         0xcf28c6a3,0x90c914f8,0x4308a56b,0x72fdb50d,0x13d72034,0x03dbf779,
36885         0x822ac9e9,0xcfa5ec91,0x3aea3e81,0x0dde73c8,0x66289139,0x545ba962,
36886         0xca6acbd3,0xa52f648b,0x98a0683a,0xff6f276e,0xa378ed52,0x2536d3ac,
36887         0x885ac1d9,0x353c2c54,0x00bc84a7,0xcaff52da,0x37684167,0x3971f81c,
36888         0xd2d7986e,0x0f7334e1 },
36889       { 0x6596067e,0xafbb5c83,0x38c19806,0x33e54e19,0x39cb0dcc,0x8285d967,
36890         0x424035f9,0x2b53f43d,0xdfef9095,0x38c531f8,0xdb0f571a,0x90fbe8e4,
36891         0xa39ca787,0x9a0c1ed2,0x606f2620,0x2fecc1d6,0x72b7cb4a,0x9dc890b1,
36892         0xccbb7868,0xc33ca6fb,0xfe73ee49,0xd1b11082,0xfcb66c48,0x590b7d17,
36893         0x86e14573,0x9356b0a6,0x053ead85,0x75d682c4,0xc54d30fb,0xb2ae55fa,
36894         0xf8aee949,0x67636a72 } },
36895     /* 14 */
36896     { { 0xb91d6bea,0x638063bc,0x923ecb96,0xae263a2e,0xc627aca6,0x9d7b0992,
36897         0x77af9e7e,0xc6ed001a,0x24aafebb,0x9214accf,0x78055a90,0xa3564b96,
36898         0xe027499d,0x00999b1c,0xe46a06a5,0xe413a4e1,0x2e51efe7,0xa05d13f6,
36899         0x9ba843be,0x35e87d34,0x3183159e,0x0a633825,0x54601923,0x6023e8ba,
36900         0xb7fd1cf2,0x9b107721,0xfdf2fd53,0x46b5542b,0x1c18af38,0xb314f4f8,
36901         0x60ac8965,0x086f9876 },
36902       { 0x8cbb9850,0x76701954,0xa20d2c8c,0x6210b730,0x5335670c,0x4084d057,
36903         0x0324baea,0x3ecdc595,0xc76ee9b4,0x607fc5f2,0x440ffa64,0xf393d00f,
36904         0x2dc1463c,0xe0111796,0x9c7725e7,0xf00b8251,0x5bd1d186,0x35e60736,
36905         0x2cf72aac,0xf3d8554c,0xefa3497d,0xb4dd0fde,0xf646ad11,0xd712268c,
36906         0x9f7b8ead,0x07c20afb,0xfc06dfe5,0x630969d4,0x7245549a,0x76b7df1c,
36907         0xe61ae810,0x681f9403 } },
36908     /* 15 */
36909     { { 0xc9a0623b,0x7cad5163,0x67fab8d4,0xdbf82957,0x81af7c7c,0x2ccab0ec,
36910         0xe966d5c2,0x469e38c8,0xf0d4e41c,0x34430d52,0xa52b359c,0x426075a2,
36911         0x33bd0127,0x242dd3e3,0x9fed2341,0xcda3f635,0xd7d52ffa,0x4df33730,
36912         0x7640c3ef,0x5fff56f0,0x1bbde57c,0x4783c21c,0xeb8bb336,0xd8784a2a,
36913         0xead08405,0x1ec7c533,0xf9b62bd4,0x4b7f1423,0x7075d4af,0x5543145c,
36914         0xba60590a,0x0c9de94a },
36915       { 0x95d5682b,0x8ed72735,0x2ec276ed,0x711c4283,0x8b36a0d2,0xd1f4aed5,
36916         0x8498a88f,0x62ab40c4,0x4480f451,0x58c8fc62,0xb79cffe2,0x8bc8ca4b,
36917         0x701a359d,0x90ab583c,0x3fd5d15d,0xaee31a73,0xc912333c,0x02a5597b,
36918         0xb6c3e3c2,0x1019cae4,0x29938088,0xe513042c,0xf47c8199,0x0e00283d,
36919         0xf2a00e92,0x90d68e58,0xa775ae3b,0x69e2df41,0x871c30b2,0xb8d2eca5,
36920         0xbb1de396,0x733dca0e } },
36921     /* 16 */
36922     { { 0x4b59213a,0xf5b495d0,0x8d70200e,0xca672039,0x2b6771c1,0x4bcb09a6,
36923         0x2b9eb0cb,0x26adeed4,0x8cdba212,0xeb544754,0xf08890d1,0x0e1abfcd,
36924         0x698e46b4,0x52509963,0x82e9c138,0xe1bff0b0,0x51099a71,0xa189e4cd,
36925         0xc9b91cc7,0x2360c9bc,0x137ec4be,0x9bd4d7dc,0xd1519f6e,0xd0356521,
36926         0xcf832503,0xbf5f6d78,0x8deea2b4,0xe4301031,0xef4c319c,0xc3132494,
36927         0x0f1fa7d7,0x2ab3bd47 },
36928       { 0x922c9fbb,0x5753b680,0x0f16c6d1,0x869e7dc8,0xbac16efc,0x83445135,
36929         0x846d1d9b,0x4326a3b4,0xb2d62c21,0xb517fee3,0x0b292ad5,0x6905afa2,
36930         0x2cadac13,0x2a57131a,0xebdbca8d,0xcd904d8f,0x3f365fb2,0xdfeda86f,
36931         0xdc7eaa1c,0x7097b208,0xa45e77c0,0x89a35a84,0xcf5d118e,0x417a062c,
36932         0x1f6e99e8,0x3c0c04a8,0xba7a087d,0xc44704b0,0x3ea22ad2,0x6f8a27d1,
36933         0x4c27d229,0x93a4b416 } },
36934     /* 17 */
36935     { { 0x1f1efb7a,0xd4271bc1,0x33fccc0d,0xae4e68e6,0xb11f50a8,0x9d9bc8f1,
36936         0xaf076089,0x5430398f,0x443d0e03,0x45e242fb,0xf6e3d4c1,0x73ec2519,
36937         0xba9bad09,0xab70f790,0xf9add10f,0xde612ad5,0x14e942b4,0xb837e54e,
36938         0xddb8b68a,0x175a56d3,0x1ac2a408,0xe85b233c,0xf0c80f94,0xf8ff6c30,
36939         0x898db4f9,0x4b7f3fb7,0x45a7dcdd,0xa2c6044f,0xfe3d3895,0xf3abb2f6,
36940         0x32ee7763,0x342ce0d7 },
36941       { 0xcf491b1f,0xeb261394,0x1909e395,0xdcaaeed7,0x9fe4dbea,0xdcc4055a,
36942         0x493d604d,0x17a6611d,0x1ce5ebef,0xba445a3a,0xe3989cb5,0xe82e2858,
36943         0x83f58406,0xb96f4282,0xa156cf55,0x99877b99,0x4e166a0e,0xaf906a66,
36944         0xb2976d13,0xcea1d353,0x36c61a01,0xefc16f27,0xb0f55d86,0xdb04c433,
36945         0x8eb34c01,0x3cb4b269,0x2ae60280,0x38d07f78,0x43be3ec5,0x43ac3bcb,
36946         0xe156fd20,0x455f4af3 } },
36947     /* 18 */
36948     { { 0x754ec21c,0xc057f262,0xe3a1ba38,0x3eacd4c9,0x116c1fe9,0x3a0210d1,
36949         0xeacc8ab6,0xe4ea4e94,0xea6f32ca,0x31c00c9a,0x86b975ce,0x5cb6239d,
36950         0xa14ea1e9,0x654d5d8c,0x5067fc8b,0x230d31f4,0x6355fecb,0x48bb90cb,
36951         0xdc172e8e,0x78f81ece,0xcb006737,0x288380a8,0xe162d012,0x19b02e01,
36952         0xc5af145c,0x0e087a06,0xb72dc354,0xf04dc8b7,0x8de3c066,0xf70ef214,
36953         0x13009fb7,0x4f148243 },
36954       { 0x6e2055e2,0x5e004fce,0x86c32067,0x89e247ea,0x5f9daaa2,0x4ebcbd95,
36955         0xceb7f63b,0xd15f212f,0x863784a0,0x5ecc5c1f,0x75760251,0x53b3800b,
36956         0x8a6a2954,0xeb9301c3,0xa13cdd19,0x0f16ba18,0x887c2d24,0x8313d251,
36957         0x9a9413f6,0xf9923585,0xfe3fd7c5,0x423405e6,0x16e0ee05,0x678aeb34,
36958         0x3fadaab0,0x1f3be7bb,0x82884471,0x7901fa2c,0x4d662ff6,0xc950db30,
36959         0x3c01170b,0x74d5d2d4 } },
36960     /* 19 */
36961     { { 0x2b5bfe11,0xa3002dc0,0x52d321e7,0x0733410d,0x9679ba89,0x15920f65,
36962         0x685b236e,0x0e248c14,0x346f6040,0x8cfab594,0x40c717f0,0x9f57afb7,
36963         0x66044576,0x0dbab28c,0x9cdc3247,0x0fa09968,0xc230ed05,0x41e02ae2,
36964         0xe45bef74,0x0d961554,0xce4d7b6f,0x9688a982,0x5e62d22e,0xfadefac7,
36965         0xbd2cba28,0xaf1512a6,0xbe7c749f,0x78868e62,0xae9f5a6b,0x88048d81,
36966         0xc5857a29,0x6b1a5442 },
36967       { 0x43242066,0x9f5ab9ad,0x2ccca2ae,0x0afef1b5,0x988edc4e,0xb1b43ec7,
36968         0x0341b0d5,0x0d0c00f1,0xb50aab37,0x4d68b8f7,0xf3a64a99,0x9a8e4e6f,
36969         0x7f1a684e,0x198338fb,0x351a0f5c,0x8bc0e748,0xdac44515,0x2cacf2cd,
36970         0x5e9ff76b,0xc14d3999,0x16393055,0x54a01b3f,0x888d8376,0x6ac3eea5,
36971         0x723277b1,0xb84d9a9a,0xe11dbbbf,0x99132691,0xabb67178,0x597717ae,
36972         0x8bb14ac8,0x4c213526 } },
36973     /* 20 */
36974     { { 0x95532833,0x2e6fe0a6,0xd626d067,0xabca228e,0x649e73bd,0x22aef3d9,
36975         0xf03c4c0c,0x2083a87a,0x35169b45,0xe954e75d,0x74506a89,0x577509ee,
36976         0x2aeacf90,0x49cb276e,0xfa409f91,0x08275d77,0xf0bbd6b9,0x61eb6f3d,
36977         0xe4132704,0x948202cb,0xb1c498b1,0x35f3fc21,0x361fee59,0x76c68ba8,
36978         0x50e051f3,0xa18cbbd9,0x318e7042,0x2384a879,0x80dd1e8b,0x292abead,
36979         0x5c37c334,0x65713c29 },
36980       { 0xceb77b9a,0xdccca8e9,0x23b69469,0x2f97e727,0xa01d6b28,0xc76abee6,
36981         0x5abecdfe,0x3925203d,0x29290d70,0x89448082,0xb0314438,0xf9931424,
36982         0x7cd447c3,0x04209df1,0xc855c827,0x7c6f2059,0x56c0e069,0xd97d7862,
36983         0x412d94c4,0x5a9db6fe,0x994c41dd,0x19a64591,0xc89e21a3,0x12348aa1,
36984         0xc6a03f0e,0xd6904b50,0xa616feac,0x55c15156,0x7cc7693b,0x4e36d1b5,
36985         0x3bae3c38,0x6b0e996c } },
36986     /* 21 */
36987     { { 0xcceced00,0x32789fab,0xe5b7aa66,0x3237e71a,0x2ddebcdf,0x87b2e269,
36988         0xb61dad8f,0xb7245120,0xd35f803c,0xe11e5e48,0x98e50f0d,0xfb4df5d7,
36989         0xbcd2ab92,0x60ee68b4,0x1ce3363d,0x98ab2f5c,0x7cd42647,0x15ba39da,
36990         0x83f4fb3f,0x1a6572eb,0xe56f08db,0x0f77de88,0x172562c2,0x1743761e,
36991         0x8a58f0f4,0xbe349ff8,0x84d1d6e2,0xe04da71b,0x9e9ff3b4,0x368f0342,
36992         0x678223f8,0x4022a205 },
36993       { 0x83847375,0x527bbd05,0x3f451af0,0x3ae56b62,0x4b2c7f18,0x6198f24d,
36994         0x4525b98d,0xee323f5b,0x0e0884b5,0xa9d8d39a,0xfb12c776,0xd005d7f6,
36995         0x708bc154,0xd71c483e,0x742541bc,0x8ca6fd28,0xf8397ddb,0x0af3dccd,
36996         0x3eccf243,0xb80d3125,0x58d81b8d,0xc743a108,0x71391f68,0x3f48eb21,
36997         0x33bb657f,0x493aff88,0x07e47e31,0x1d15ed66,0xe08279f6,0x10159b11,
36998         0x24a6a956,0x312179cb } },
36999     /* 22 */
37000     { { 0x07615ac2,0xa94cc3ca,0x121ad581,0x85865e64,0xa7986b79,0xae47616f,
37001         0x9d5e0f1d,0x395a40eb,0x3d9457ea,0xa9143264,0xfa2865d9,0x8de6d6a3,
37002         0x1014ae8c,0x0771db96,0x976a87cb,0x77a7cce6,0x143a0f60,0xa7de42e1,
37003         0xd993d934,0xe203cc09,0x98ec4c3d,0x92018693,0x3a25df4b,0xd77546d8,
37004         0x62b02d6b,0x0ad9eb47,0xd05a7189,0xfaaaf208,0x431221bb,0x5238181f,
37005         0x733511ea,0x417d6c78 },
37006       { 0x0e91e9a8,0x3cbd81b7,0xc370d6b3,0x73340418,0x8eaa2373,0x825db10a,
37007         0x6c7d6756,0x8f2b09e4,0x94c33ded,0xe288ee9b,0x1695e3fb,0xcd8426bb,
37008         0xdce9e888,0xa6176c86,0x6165e362,0x3f4c8922,0x6063fb09,0x514e411f,
37009         0xc8f9e04c,0x6907ac20,0xdfd2ad61,0xcef7469c,0x8452199a,0xba30bae4,
37010         0x12ac3462,0x30681293,0xc92d482d,0x011be873,0xe8330995,0xff4cbf89,
37011         0xd1470a0a,0x02189d52 } },
37012     /* 23 */
37013     { { 0x92599c69,0x73e419dd,0x7fec32ca,0x5b94221b,0x09bbfbfd,0xb2bf9bd2,
37014         0x63ed895b,0x61ea97a4,0x3f486f79,0x6609146b,0xfd141a39,0xbd1c7a05,
37015         0x83d64135,0xc79ec8cf,0x9883507b,0x7f8fd42f,0x17b3d027,0xafcb53b7,
37016         0x67ca5a21,0x86658dcd,0xcd149786,0xa6a6c0ac,0x34b95067,0x16f3d70e,
37017         0xdf44958c,0x371208e3,0xec280212,0xd2dd64e6,0x30782c71,0x33b2c4ab,
37018         0x521176fa,0x7bbf8abd },
37019       { 0xa78b981a,0xbe9e4aaf,0x304ec828,0x788b4e36,0x3959dea3,0x0c45cf39,
37020         0x240b39c7,0x70a9bdd3,0x28383b7d,0x499cd7dd,0x307a1026,0x30690b2e,
37021         0xee92f1b3,0x2262d598,0xb4725a48,0xc62d77de,0x7bc3aa0e,0xa16f25bc,
37022         0xd15ef7fa,0x62dd8b65,0x0b96d68f,0xd979221d,0xa00f1906,0xb92885c3,
37023         0xeb74c740,0xfa476b9b,0xc7576222,0x217ddbb5,0x5788504f,0xc2782c30,
37024         0xf812716b,0x860d096c } },
37025     /* 24 */
37026     { { 0x4d79bbf9,0xfebc337d,0x69f74f80,0x5d53eab8,0x33104d53,0xff36a095,
37027         0x196f8b97,0x2ab820da,0x75ce6909,0x961d3d1f,0x04683754,0xb197ec04,
37028         0x93a6cb9b,0xa68ce1bf,0xc5f021a3,0x503456ff,0x8940ffdb,0xb50a2db1,
37029         0xef004209,0x77c50f8f,0x04965875,0xd635d177,0x8bb8770a,0x725766d9,
37030         0xa078e53e,0x8e19b028,0xf9fc8378,0x364d4cca,0xf0dd39a0,0x1a3df411,
37031         0x03adf920,0x7e80e442 },
37032       { 0x539a1ddf,0x4b5f8a57,0xee486562,0xd248e7ae,0x816021e1,0x1c7b491d,
37033         0xfd36d2c4,0x2e7b871b,0x0aec00d9,0xda38b504,0x6193f1b3,0xf2827612,
37034         0xfb1f78d6,0x69c3fe86,0xe827ac33,0x56c8b786,0x3487c8f7,0x1687f6c7,
37035         0x19dee5bc,0xab8f2217,0xff399418,0x04e8473f,0xa9027c80,0xf384c014,
37036         0xaa1d2e28,0x9967be9a,0xe065eef1,0x869686d3,0xc7bd837c,0x737c6b08,
37037         0x9e8bd863,0x5dcab5d1 } },
37038     /* 25 */
37039     { { 0x9a7d772b,0x0784283a,0xe540959b,0x6b49e525,0x86414ab5,0x546bb008,
37040         0x9d74b2a9,0xd4448162,0x203b0b1b,0x267890ad,0xc8d3f86b,0x1e7a82bc,
37041         0xd85a83c7,0x1352bfb5,0xfad07ccf,0xf29f16e3,0x41e0c43f,0xc02a63b8,
37042         0x6b379fef,0x904f22c5,0xb1244f26,0x19d8a653,0x3a28bdea,0x6635b6df,
37043         0xf6d455ce,0x18b68851,0x9cff3735,0x74ac2818,0x8b2cbdab,0xad40f9df,
37044         0xadc9d498,0x08cc2d9e },
37045       { 0xc170c84b,0x2e6a6866,0x5a49a484,0xbb989e8b,0xd04c8992,0x7b0e00e0,
37046         0x61b3a423,0x55ad3478,0xb0d01899,0x3c952450,0xe3100cb3,0xe3922155,
37047         0xf03276d0,0x19265b6e,0x76d42b53,0x0fe8595a,0xfc6353b6,0x0a96dee0,
37048         0x246f893e,0x761e0dc8,0xf0a74cba,0x4ec902be,0x3fdfad9b,0x61008684,
37049         0x4fdb6975,0x5d6a60e4,0x7ef7590a,0x3f53aac8,0x12870a37,0xd29e6be0,
37050         0x55aa55b0,0x991fadc1 } },
37051     /* 26 */
37052     { { 0xb4844ffe,0x82bc4b0f,0x60f8b871,0x73922714,0x4ce3f1f3,0x8ac000e2,
37053         0x163519ec,0xf0d548b4,0x88288b5f,0x7aaf842b,0x2bdc9a70,0x9e8b0c4c,
37054         0x4ba5fd67,0xa06d5152,0xf93cdec3,0xd0b1afa0,0xdf89f8f0,0x280955ba,
37055         0xeea32c92,0x86cbe92d,0x3fe05be4,0x0cae3f99,0xfa6919aa,0xf2607095,
37056         0x6e0f1b8b,0x0f54741e,0x30ecf988,0x2aed1f74,0x734991d7,0x9296f76b,
37057         0x259f0fe9,0x66cf8d28 },
37058       { 0x226f5868,0x9b01905b,0x16909e9e,0xc102e88c,0x4a37eb54,0x2bd08916,
37059         0xc9816323,0xf72253e8,0x86bac53c,0x37f84e9d,0xafeaaaf7,0x2e352454,
37060         0x2ca0046e,0x67c86f77,0x6663372e,0x86bce50e,0xb6950a04,0xf6a3a960,
37061         0xfc1aba93,0x61f994d7,0xc1326e6e,0x1957c12b,0x2e56b005,0x9b658fe4,
37062         0x8592740c,0x9cd297fc,0x177f26a5,0x7654ce9b,0xa79d2ebb,0xaaa699db,
37063         0x0ecb6448,0x5fca0c5a } },
37064     /* 27 */
37065     { { 0x569a6663,0xe26e25f3,0xe6aa4ca7,0x09597ee7,0x8d18b80c,0x25a4cda6,
37066         0x22926730,0x450602b5,0x07387209,0x9af5f650,0x26733a53,0xfeeedb34,
37067         0x86572951,0x0f5ce768,0x8398ae9a,0x872a360b,0x2b30f6c3,0x60347a80,
37068         0x1a162158,0xd2113b23,0xee6c6dec,0x6fd9cf92,0x5cbcf9e6,0x85f0a5a8,
37069         0x2ba3fe84,0xd7a5a6e4,0x51ecd727,0xaafe6720,0xa2081a10,0xe09c6bb2,
37070         0xb973b0b4,0x657acbf0 },
37071       { 0xc274c8d4,0x3130466f,0x30a994d1,0x42765176,0x7079435f,0x217258ca,
37072         0xeb897a06,0x44850406,0x561ee130,0xf38dfeee,0xaa1778bb,0x11f4facf,
37073         0xb9abb9e9,0x765c6617,0xd8f10932,0xb135499b,0xa73b9159,0xc0eb6337,
37074         0x6f7e8b6a,0xf2c1ccf1,0x187def53,0x5b32c03a,0x830b9c62,0x89ad1d49,
37075         0x2f10e538,0x1735eae3,0x9d5f55bc,0xb1cbd9c2,0xe539db0d,0x42428c47,
37076         0xc852b3bb,0x3d2da412 } },
37077     /* 28 */
37078     { { 0x871f2865,0x97702b6e,0x142920d6,0x56cb639f,0x45b58611,0x328522a0,
37079         0xf3b13812,0xf3943ad1,0x712206e8,0xe6c2200a,0xa34d59ea,0xc2890e5a,
37080         0xf6b7f759,0xab52fd40,0x180bf567,0xf522c8de,0xaccee396,0x181e97b2,
37081         0xc4ea5cbb,0xe0375819,0xab51d3ef,0x0d9985e8,0xbcb50fd8,0xe26c96ca,
37082         0x97e1c80d,0xfb9d6b13,0xf796357d,0x582b1814,0x07f4c7fb,0x89a78221,
37083         0xc0357e61,0x02aeef2d },
37084       { 0x2c7ec9be,0x2ba7926f,0x7258b201,0x292f307e,0xc6fa6b4a,0x74e62a10,
37085         0xe2bcc5ab,0x80c08549,0x7bb8c073,0xb4160db8,0x329f194d,0xd5ef0529,
37086         0x6dda4a9c,0x0eb8da14,0x15ea23d1,0x0b5d43d2,0xfc34bfae,0x6cebef02,
37087         0x848757a7,0xacd364d0,0x2d34cca3,0xc1401368,0x1d2d95e2,0x09ca6742,
37088         0x786eaa28,0xc3fd1d6e,0xa2965fec,0x9eb1136d,0xc0779203,0x48871baa,
37089         0x4b15aeb0,0x6b446c01 } },
37090     /* 29 */
37091     { { 0x25e8fe80,0xc819eb2e,0x98238a17,0x2b5f7906,0x81e41849,0xd6f1e996,
37092         0x98ea6d45,0x58ad8ad6,0xbfd02e40,0x5bae5ad4,0xa812416d,0x016dc327,
37093         0xa3347ca1,0x8b31a985,0x82a65391,0x0b4da610,0xb48c35fb,0x1cb91b2d,
37094         0xd2aaf8c4,0x9e96817c,0xcdfdcdc0,0x1a630483,0x12b69254,0x70559361,
37095         0xf8a2a097,0x5fdcd712,0x35cc5281,0x59ab623a,0x932b6095,0x30c8ebe0,
37096         0xb08e052f,0x8613424b },
37097       { 0xb2231d8a,0x28902063,0xd9a61667,0xb0f62329,0x071a9f27,0xaafa0fe7,
37098         0x603f047e,0x6bcd8960,0xfd92a1c3,0x118cca76,0x71d483b6,0x3414e62b,
37099         0xba705262,0xa123ccdd,0xfd9b5c5a,0x1a576437,0x4c8d0fa3,0xa5301bc2,
37100         0x102427cd,0x96f0ad44,0xd3aa6c02,0x0e6fb5e0,0x072a3996,0xcd8c4880,
37101         0x840d3fad,0x4dafca12,0xde91d541,0x29f4ca3d,0x8441734d,0x0037c598,
37102         0x9ccfe57c,0x86333a99 } },
37103     /* 30 */
37104     { { 0xecf53b40,0xd213a751,0x2f78a542,0xcff2c6f2,0xf13ae56d,0x0f59f0e2,
37105         0x0e61748e,0x91f8ccbf,0xd72c4145,0x0aadecb9,0x4c9cdcb7,0x6b2ed852,
37106         0x1eaffc70,0x8e00b72c,0xaa728102,0x89b24285,0xb679cafa,0xaa7ea7e0,
37107         0x4f0a6f6f,0x5d2b8c26,0x0e804397,0x7ed7b173,0xc8573049,0x5a93eb45,
37108         0x0986e93e,0xc92bf5d4,0x6a20c0af,0x526b5a9c,0xb99dc3af,0x0adf47c9,
37109         0xba202cc9,0x12b25fe2 },
37110       { 0x33eea395,0x09b8d78a,0xf633fc5c,0xc7a93618,0x270eceef,0x7e821629,
37111         0xc628ed0c,0x524779b8,0xa1d68939,0x91db5ca1,0x586edc90,0x8626e18e,
37112         0xfeb3f3bf,0xfe023e8b,0x0250171c,0x6279fde1,0x55e172de,0xe52ec7dc,
37113         0xc6d4ca45,0x445e8695,0xbdbc10f1,0x42de3878,0x6fc3835e,0x2b114de8,
37114         0x7e10b652,0x9faba456,0x390e78fe,0x4111d82a,0xaedf0aca,0x576b61c2,
37115         0x74accb74,0x216279a9 } },
37116     /* 31 */
37117     { { 0x4047f747,0xc14cdabf,0xc1315a1e,0x03ca233d,0x40e5d0a7,0x59e7cbd3,
37118         0xbb413869,0x1fd0c4e9,0x0f01fbd8,0x189d08b1,0xa76b823d,0x50449c42,
37119         0x398b00a1,0x81c224a1,0x8e8179e4,0x08084e4f,0x698e41e9,0xfd8af994,
37120         0x5610bf2e,0x1e30e37c,0xa7d2790f,0x4e6a043f,0xb3195388,0x9d96e60c,
37121         0x03799dfd,0xe75f986d,0xf8ff902f,0x3b4a8f11,0x7588416e,0xfa945378,
37122         0x9827535e,0x20683e3f },
37123       { 0xd0378878,0xcb582e26,0xa7945787,0x9e214c23,0x8f6688b3,0x13d000bf,
37124         0x40515270,0x7548d4f5,0x40111f5d,0x7113c15d,0xa8bff902,0x3bf5a526,
37125         0x9b4945cc,0xbda6b010,0xbc2f3a05,0x83dcc74e,0x43efdfa1,0x2aef6284,
37126         0x565c5bf4,0xd2e60ee9,0x592f243a,0x4f0fa10d,0x1bc3bf51,0x6ae58b32,
37127         0x60576a74,0x813b0868,0x4d73081a,0x0bc023f8,0x32dcee59,0x9fd03aa0,
37128         0x27d6c795,0x5e416bf5 } },
37129     /* 32 */
37130     { { 0x026cc23c,0x24313760,0xb5b29058,0xf819aaee,0xc5d2ee17,0xa92272f8,
37131         0xee5cc402,0x8048e7cb,0x77def07d,0xdbc7d6ee,0xf6af821e,0x61d69244,
37132         0x996cbb89,0x5f7966ed,0x96a155a4,0xf81b17ea,0x03f3ed56,0xb2d9ef70,
37133         0xe882a5b2,0x5e6e5906,0xae947180,0x86fa1072,0x658c76f4,0x34d9fc51,
37134         0xcb035aa0,0x9f603dc0,0x75be6481,0xb7b39feb,0xcf04a9ef,0xca87554a,
37135         0x87b4fde3,0x4ff682ec },
37136       { 0xd0a10ad5,0x3125627f,0x968e6f45,0x7fd45c72,0x806a1163,0x2981bd6b,
37137         0xde5033e3,0xb92de1cd,0xbf4f8988,0x3b44b45e,0xdae7e1dc,0xca1b9896,
37138         0x0778d878,0x52166e5a,0xa5116847,0x82d472be,0xf2895445,0xfbdd382a,
37139         0x5d6ec4c9,0x22ed1602,0xb6552b02,0x3614eb1c,0xa1e6210f,0x63c5df73,
37140         0x021a74a7,0xe9160285,0xc65cbd4d,0xa44ca400,0x0f15e299,0x48cb187e,
37141         0x3402507c,0x51eb818e } },
37142     /* 33 */
37143     { { 0xb92100ab,0x1fc1d178,0x9605b839,0xdf2e3d60,0xb71e59d0,0x12a7c255,
37144         0x14fcbe04,0x3f8b6675,0x59fd06af,0x0e8a3935,0x12020d07,0x56326502,
37145         0x528e7be5,0x6696fcd1,0x0c7b7654,0x6588514b,0x5912a5b5,0x0cd80f8c,
37146         0xf324cb7f,0x8bafef04,0xc6da3d75,0x6b53eecf,0x31d1df2f,0xedef48d8,
37147         0x73812b6d,0xf336b965,0xee626031,0xc82eae4a,0xd244f09b,0x300abd32,
37148         0x31d9647f,0x8b0af955 },
37149       { 0x2e603544,0xb770180a,0x221acd9e,0x2b573ac3,0x62407032,0x3a17f665,
37150         0xb89abc3d,0xad3e74ad,0xd793225a,0x8a3d2e3a,0xef02564b,0x457bba04,
37151         0xfc2dd2b5,0x8875652f,0xe67143e8,0xd2905d15,0x02e48d70,0x6d884b42,
37152         0xc7636a57,0x06f99219,0x35e378df,0xa8dc3421,0x10c64a02,0x95c1d73d,
37153         0xcc157a66,0xcd6a4ece,0x8e24a354,0xbadcc1c8,0x9839329d,0x8024f1b2,
37154         0x4da48ad0,0x5363e549 } },
37155     /* 34 */
37156     { { 0xe23fc641,0x1f5523b7,0x86667063,0xfe54e72f,0x8e009d2f,0x294a15f5,
37157         0x8c57f5e1,0xf203997f,0xb16d64dc,0xa229724c,0x4baa2ffb,0x697be4fd,
37158         0x0a6e8ed6,0x3f507e46,0x78508536,0x0afe3a5d,0x95408208,0xeeef6cdd,
37159         0xf2c4237c,0x701fd889,0x5c385253,0x496d883a,0x72a212f1,0xe25c67ed,
37160         0x1ff78fcd,0x4b416783,0xc16f4146,0xe9967004,0xc45b0697,0xfa45c3a1,
37161         0x3fbd30c3,0x63334018 },
37162       { 0xa2fbbbce,0x39c9a0cc,0xaa0cb744,0x876f6e5c,0x3438ece3,0x9ce6010e,
37163         0x13802d82,0x0aad148e,0x9cd45a1b,0x9c3e5c60,0x7bcfc1e0,0x875cb859,
37164         0xd8584dd0,0xb19ff790,0xd81c2a2b,0x2598b81e,0x02be07e3,0x118bdf2f,
37165         0xb9765ce9,0x074fc8ee,0xb24f95ae,0x125e9d88,0x0c98f09d,0x3bb12cdc,
37166         0xa0b74b27,0x4a6aee07,0xc08077ce,0x4723d2f9,0xbea8026f,0x959447d6,
37167         0x16280b73,0x93a7075c } },
37168     /* 35 */
37169     { { 0x715b27f9,0x26bbefe2,0x2a280923,0xa935a5e2,0xfd58a26a,0x5ddf23af,
37170         0x7c138694,0x54c83e16,0x892a2153,0x44799bc9,0x9b8d09f5,0x4e6e4710,
37171         0xd588ea68,0xc63af616,0x883ab1b6,0x5e896706,0x3d209336,0x3c1393a0,
37172         0x92c23dda,0xd02f2921,0xdcf6ea43,0xab70cb7a,0x791559e1,0x12434ea8,
37173         0x6d70ff0b,0x040680db,0x2832ba45,0x1a10fe52,0xe5f0cb8f,0xd69f9c08,
37174         0x44b141fd,0x1a7422ac },
37175       { 0x9f40b675,0xc3a9dd2e,0xfcc71f39,0x2a7c6603,0x1948e342,0x18939a61,
37176         0xed0ab484,0x8f3b6158,0xee31ca6b,0xa3aa7d97,0xf7a8db63,0xbc1e865e,
37177         0x2c7c62e4,0x315f8c09,0x9f5c6d0f,0xa260788f,0x4b6f3ec5,0xb1833129,
37178         0x36b4d849,0x73adbcd6,0xbc699a9b,0x66e14890,0x2a1175e7,0xbf3790d8,
37179         0xfc53ca4f,0x7f43605a,0x87ff6091,0x577f6c47,0x600c82b6,0x827c7552,
37180         0x9d25599c,0x0944d630 } },
37181     /* 36 */
37182     { { 0xe6ab9620,0xcfdeb63e,0x786cd808,0xdff4fa6d,0x456320b3,0x145edd82,
37183         0xc4943915,0x2ae5f862,0xb73b3f87,0x9508e813,0xe52f97a9,0x3bd805f3,
37184         0xc9829b62,0xf71b5c28,0x86e0cefc,0xb394c70e,0x23bdb36e,0x534fb1a9,
37185         0xdbe27e5a,0xd64f5862,0x83ab6169,0xbae23df3,0x27c828cb,0xdd6df1b1,
37186         0x3a307a8a,0x1901899f,0x811ddf66,0x36cc8659,0x79943b77,0xa3cb7774,
37187         0x6fd86576,0x7d89f383 },
37188       { 0xc9f92b2b,0xf8564242,0xc46e32bd,0x700c6a75,0x7f99a5c5,0x93e768b7,
37189         0x03149568,0xb6efe858,0xc2ce6709,0xbbfe8a19,0xee6ec493,0x721a3b1b,
37190         0xc371c28d,0x26eeeea9,0x15177e1d,0xd798115e,0xb068a5a5,0xd7bf3bce,
37191         0x46d2b4b2,0xdf8da220,0x59be9dfc,0x3df0995b,0x77640b79,0xc96897bc,
37192         0x5a2bd3c5,0xce0cf4c2,0x89afe744,0x16f45d6e,0x3a8509bb,0xb53f3acb,
37193         0x63f2a6e6,0x449af81f } },
37194     /* 37 */
37195     { { 0xa16d9377,0xc2fcf132,0x7e1a2f9e,0x9ab377b3,0x86d19ae5,0x72e1a12e,
37196         0xd013bbb1,0xd2b12e66,0xcb5f66ba,0x0972e055,0x399eab50,0xd11de1c0,
37197         0xc65f5ec2,0xc1f314fd,0x8a9ff593,0xfc311841,0xe05246e6,0xdf73c1ec,
37198         0x1625056d,0xc28d1363,0x6fb25e19,0x30a9dbd7,0x845cd2d7,0x049ed244,
37199         0xd36e852d,0xc779b83f,0xf68c8a83,0x85a35fc7,0xc95e8033,0x299bf1e1,
37200         0x20891af5,0x0e8617c3 },
37201       { 0x67c81b5c,0x53720602,0xe737873c,0x2fa89dcd,0xa8144fd0,0x2a7430b0,
37202         0x26208c83,0x3006c5a7,0xd8ea40f5,0x4e066660,0x896413a4,0x9dd025f9,
37203         0x46b9149f,0xbdf380cc,0x0a125cc2,0x80156619,0x52793c37,0x04d6a3b7,
37204         0x6b7a62f2,0xb6001374,0x585d5978,0xa9cfe268,0x8395fe66,0xdcad0cb8,
37205         0x46b261f6,0xbab468fc,0x9d9d9218,0xca0ef5ef,0x5e452402,0xc507d4a8,
37206         0x326cf687,0x6f4404f1 } },
37207     /* 38 */
37208     { { 0x4febd3ff,0xa3e1920b,0xfdfd2bba,0xca6234d8,0xe19a9829,0xb7d1af2a,
37209         0xc6f5bc20,0x23de1610,0xdaa39ca9,0xe204dbf3,0x6d8c70ab,0x2a2de9b8,
37210         0x7c9d370b,0x272e0c37,0xe565510e,0x80914c06,0x57cbb6b0,0xb611e7a8,
37211         0xd8266a6e,0x076fc6ef,0x3095801c,0xdfac34ee,0xb9e24063,0x69ff40a2,
37212         0x787aa5c5,0xa7ba31a9,0x33c70cd2,0x0e4d1fdf,0x6895f074,0x903e3132,
37213         0x7fb671e2,0x905771f8 },
37214       { 0xa4062bee,0x5199ba0d,0x94d7d9f9,0x18e7238c,0x1e0922c0,0xf53f29bc,
37215         0xb12d855f,0xde9b2a81,0x6d68ca29,0x649f3eed,0xc50c097f,0x64adfc34,
37216         0x9db398a0,0x81964ab9,0x7a587224,0x00d59c47,0x74c5903a,0x09fea396,
37217         0x15043dd0,0x6aafd8ee,0x5f1ecc20,0xc5721a6e,0x0db9b7b4,0xb6d6a483,
37218         0x66c8d52a,0x06ffc617,0xacc82a27,0x3de241d6,0x27f2f7a8,0x0605f052,
37219         0x6404decc,0x6a22953b } },
37220     /* 39 */
37221     { { 0x74fce389,0x92452d8f,0x2afa5564,0x059634c0,0xf0ed7825,0x9377ccbb,
37222         0x37718e0d,0x89f4045b,0x9fa69a4d,0x11074e7d,0x7295b0ba,0x5d70bb07,
37223         0xf107ede6,0xb22d54ad,0xa1a29c7b,0x5c39a3d8,0xd795e3ab,0x37236c02,
37224         0x2b589951,0xf7282d00,0x5790bee2,0x5e2265be,0xa8e65ea2,0x91e0ea11,
37225         0x6001cebd,0x0e71a708,0x2c1c5402,0x16900f5a,0x357f6981,0xc3b2d5c0,
37226         0x619e3427,0x528c9ea0 },
37227       { 0x5f26c577,0x1edc86b4,0x9438bd45,0xf8074708,0x792582a7,0x2dfe1013,
37228         0xde1e569f,0xe08eaca0,0x9a55a356,0x5f952efa,0xe4976216,0xa4d80b53,
37229         0xcd5d71f2,0xd2b65855,0x66cea3f0,0x246704bf,0x492323ca,0x193f641f,
37230         0x9adb1325,0xa681855c,0x2d19d652,0x86d522ce,0x5b82ed7b,0x53609f10,
37231         0x8e150d29,0x3b0f0094,0x0b13e891,0x23ad8bfb,0xf794b449,0xcbb1556c,
37232         0x738bcf57,0x200f9093 } },
37233     /* 40 */
37234     { { 0x8388387f,0xf9b22fc5,0x28e883c5,0xcf26f170,0xd1b7973c,0x447cab90,
37235         0xf6ec9171,0x8d5d4ea2,0xc30cdbc0,0x2e16f498,0x48623c2b,0xdc92910c,
37236         0x30dbc545,0xeb1491b0,0x14de21b0,0x631deb2e,0x2fe830f4,0x04a21066,
37237         0x379c1f3f,0xa4c6979c,0xfb06a795,0x8a732b68,0x1619dfa9,0x3a44327a,
37238         0x8dbe2c9b,0x91a307d3,0x03989fea,0x939bc8d2,0x0f4a331f,0x3daabaf2,
37239         0xdd0f55dc,0x5c307e98 },
37240       { 0x35b233da,0xbbc4e0c4,0x22f6f985,0xe3d29085,0xa8b02468,0x99dd2d21,
37241         0xa96916e7,0x978f40e9,0x614bcced,0x0327d86c,0xb290762c,0x95e95502,
37242         0xa879f2ed,0x0ffd2197,0x50e0bd33,0xc4365137,0x0827c4c4,0x26c3148a,
37243         0x3fcfc0b2,0xc79812a8,0x31928589,0xc3d8d17e,0x8830f42d,0x8b572cfe,
37244         0x4b07f83f,0x7cd9ff92,0x0a51148f,0x331ca950,0x4c59f9ac,0xd0c53968,
37245         0xc1434785,0x1df16dfa } },
37246     /* 41 */
37247     { { 0x68bcacc3,0xcc7bb4ac,0x430f58cf,0x06ded34f,0xd461855a,0xc59f9f4f,
37248         0x45c9f0bc,0xf5491994,0x4375c892,0xdc5f7ec6,0x3c85983a,0x1b8708f1,
37249         0x82fcd087,0xb32a5cc4,0x2d6b4c0f,0xefdcdc35,0x8ac6fb2d,0x4bb24f04,
37250         0x33906471,0x5982d4f5,0xb83a3ac4,0x162eb52f,0x2337a223,0x7130df28,
37251         0xcbc3dbd3,0xdce7b802,0x2467ac0e,0x8b395959,0x1b56717e,0x21d3d2e8,
37252         0x46512617,0x729a7f50 },
37253       { 0x8420f90a,0x874ed1aa,0x0fe4c855,0x6368e19e,0xb0be74af,0xb62d4aaa,
37254         0x8ca60ca9,0x76fcc480,0x7645a867,0xf310b5a5,0xddb1b24c,0x131bac9b,
37255         0x2dea5b44,0xef77d71d,0x72fcc64e,0x4706d210,0x673d77f0,0x29b92691,
37256         0xe89e0663,0x22e00bf3,0x74077d40,0x472d0cd3,0x829232e2,0x3e21040d,
37257         0x38dc8533,0x2f916dfb,0x14b8f667,0x48bbb59b,0xd44be19d,0x19de9f4a,
37258         0x232d9d5c,0x7f6d3649 } },
37259     /* 42 */
37260     { { 0x6e794819,0x3bd064de,0xf82ebda1,0x5a6b694e,0xb91e2804,0x1f017fe0,
37261         0x07a43cd2,0x190d31f3,0x630433e9,0x6c26f226,0x0abfdcb4,0xba488aa7,
37262         0xa46411c0,0x418d9085,0xbffb5880,0x1b934fe6,0xe200f849,0x75d1e237,
37263         0xa55413db,0xdf04d63f,0xe23b3f77,0xe216ed75,0x0f91bd30,0xa05866cb,
37264         0x7729c509,0x84c395d9,0x452ab2d7,0xec97e188,0x0093d686,0x8cb7c1f9,
37265         0x628f086c,0x2d032395 },
37266       { 0x4a44b4c5,0xa81c9407,0xcc702c98,0xb9846879,0xceb0dc97,0xcb502287,
37267         0x6e3aa321,0x30301126,0xe4c256c2,0xc0ac8763,0xe55b4845,0x65034d20,
37268         0xf240f35b,0xaa96a040,0x7cf7eedc,0x046d26d3,0x3b810656,0x62a5a8e1,
37269         0x83d70c2b,0x86044b97,0x59e4da8f,0x2fbaff88,0x5457f5d1,0x929d901a,
37270         0xb531b757,0xd29e1eb2,0x9e4e9739,0x214dabdc,0x4eaa9bd9,0x5bd724fc,
37271         0x1ef9bb9b,0x734c12b3 } },
37272     /* 43 */
37273     { { 0x92f9b086,0x98fe3c2e,0xb3fd4544,0x4641b93e,0x5c02c65c,0x47ce208b,
37274         0xc4f03242,0x8a52dca1,0x679d29f6,0xb5ec17d9,0x9406f5f4,0x11d2fed0,
37275         0x0d9ba811,0x260f63dc,0x15472a3f,0xde2b056f,0x007290e6,0x1b170d9f,
37276         0xb6b5c8f9,0xa2e23e8d,0xcf34c3ee,0x345a2839,0x1b973ee2,0x9bdc5461,
37277         0xbb24d1c5,0x65bda6c2,0x3c6141a1,0x97d52ba3,0x9d2eb201,0x47bb1612,
37278         0x21fbe49f,0x7c558a87 },
37279       { 0x3f350fec,0xb9485a52,0x6a38d4c0,0x016678c5,0x0d5aa64d,0x8ef346a2,
37280         0xd96da2e4,0xb85daa02,0x4f647b3c,0x845ec4ea,0x0d5e946c,0xc0d1a6ca,
37281         0x4fa9f4ab,0x41d8d1c1,0x9c8b1303,0x43972cc5,0x434ffbfb,0x67e1f48d,
37282         0x819d2318,0x350ce93a,0x6ddef23f,0x49f53090,0x200cf12c,0x3c2e6cf9,
37283         0x640432fc,0x42691cc1,0x72496b52,0xbfff74b4,0x020a97be,0x44527c9f,
37284         0x7b3c4348,0x34cd7dca } },
37285     /* 44 */
37286     { { 0x59e7fe87,0xf031761a,0x0047cd72,0xb1eae31a,0xfae30f62,0x27902e68,
37287         0xb71db143,0xa666f48d,0x0e0038f4,0x75ee6678,0x02bdd76d,0x3b45ac67,
37288         0xa0d6cd5c,0x0d2fb828,0x9d8c5b11,0x27ce7f1d,0x120b5e96,0x141fe0e4,
37289         0xb9267c37,0x95a1b984,0xd60312cd,0x5206e589,0xda549356,0x1867342e,
37290         0x070c74ac,0x374520b9,0x9557b0b3,0x2703cbb5,0xa6ed8c14,0xf621f59c,
37291         0xabf7b887,0x7ceb1cc2 },
37292       { 0xdb7fd65b,0x0647a5bb,0x36c9457c,0xd8d45cc0,0x9e12718a,0xc6da99db,
37293         0xe93a7fb1,0xed1dbbf4,0xbd1566a1,0x4512c95c,0xdbc0c919,0x4861ba00,
37294         0x9e7f5269,0x3c6cc298,0x0941aaae,0x67196150,0xc8c538e3,0xbfcf5d0f,
37295         0xa25a551f,0xad6e9929,0x17ca0f26,0x90710985,0xfa89ef7e,0x743b78ea,
37296         0x71ab4549,0x39d5ea31,0xe6d1c36d,0x7442f3f3,0x059d568d,0x25a683e0,
37297         0x227ced5c,0x1f629a99 } },
37298     /* 45 */
37299     { { 0xe45a1c3e,0x8925ddac,0x41f7545f,0x72d29365,0x37e7f828,0x45622fcb,
37300         0x3e4c79d2,0x88234513,0x9c2645d6,0x5dffaf84,0x994802b9,0x3078f4dd,
37301         0x9d339fa0,0x566927f0,0x9fd91dcc,0x9a500a1e,0x0ab0abd7,0xce008180,
37302         0x8194e5df,0xd97135a3,0x98adf088,0x9e876307,0x9a45a2a7,0x3baf01b8,
37303         0x788b4399,0x6fed6154,0xe77a997d,0x980e5722,0x2a378eed,0xaac90ffa,
37304         0x8bd805a2,0x4a75fda2 },
37305       { 0x55e74cbc,0xd09a8fbb,0xfab18f25,0x737738ce,0x9764ec3a,0x0fc23ad6,
37306         0xe7e0ad31,0xc5a7d35b,0xe481cc9b,0xe75e068e,0x3d4aec34,0xf0c2ea99,
37307         0x0d4a63c4,0xf1324fe8,0x99b0592c,0x5dbb7c16,0xa7e0f46b,0x442d674d,
37308         0xa300faea,0x5a5d66c7,0x3333ac83,0xe83dc821,0x8c408496,0x70ef812e,
37309         0x99ef5fc1,0x96e1dcb6,0x1734e862,0x6e2b771b,0x583507d8,0x04629cdc,
37310         0x23d8179a,0x5819f9ae } },
37311     /* 46 */
37312     { { 0x6aa78811,0xd9969121,0x2103e7c3,0xf64ee8f4,0x22b9e698,0xddf01070,
37313         0x4f582cde,0xe6001f9e,0x2ecfac1a,0x24a608af,0x06393009,0x6ef4c784,
37314         0xebf72911,0x5262eae6,0x8c4ee5a0,0xddbd0af5,0xecd87bc7,0x875aff90,
37315         0x6f24f114,0x2fddb34c,0xe865f172,0x48104281,0x886c1b9a,0x95692426,
37316         0x9ef4231f,0x6f5f3208,0xd0a7e82e,0xaf587acf,0x9ac395c8,0xd6571917,
37317         0x1364a750,0x7459603c },
37318       { 0xf41ae519,0x1c2475bf,0x4af8f251,0x34401fb1,0xaefb2c3d,0x70ddfcd2,
37319         0x51cdaf08,0x9b2d385b,0x8208bb19,0x8531c256,0x4c33f3f6,0x16c89df6,
37320         0x24571769,0xc23cfa99,0x86d010ba,0x2339b51e,0x22638313,0x08db0e8d,
37321         0x00fedeb7,0xf769e179,0xa3687ef1,0x3fd96dcb,0x91476475,0xcd046b23,
37322         0x0c45c8dd,0xf3ff2064,0xb8343d78,0xefd167bd,0x4b77ee90,0x493ccb6d,
37323         0xb3cf7b45,0x33025513 } },
37324     /* 47 */
37325     { { 0x35eaaca1,0x36f00469,0x89119102,0x0c384b75,0xe6d2954c,0xcb375665,
37326         0xb1e9d6d7,0xcb9199b9,0xc29c2757,0x75852349,0xb8e738d0,0x89cbd1ba,
37327         0x5923a427,0x9b8dbe90,0x18fe1889,0xa237793e,0xa742e083,0xa4271757,
37328         0x4eebd613,0x8c4979d2,0xd4f2cf77,0x40325054,0x958705de,0xa3b8a091,
37329         0x33d999ba,0x1b191bd9,0x3b0fee1e,0xbafefba4,0x3facdf14,0xb3bad184,
37330         0x4387561c,0x9328adb0 },
37331       { 0xf906b872,0xabe84e80,0x78262665,0x705523a0,0x3398ccf7,0xd89c6a7e,
37332         0xf55b5323,0x2fab551d,0x0554dea8,0xa0578eca,0x375589cd,0xef26523d,
37333         0x864ad750,0xd8fd6242,0x178fe1fe,0x93f27fc5,0x9df87422,0x7b3e6f30,
37334         0x3750d054,0x2862e49e,0x5dc038a1,0x7d90c6b2,0x84db682b,0xc1a1ae22,
37335         0x9881930a,0x47f3dab7,0xbaf3e0a4,0x30e6bd52,0xf62d25c5,0x0680025b,
37336         0xadd0d5e7,0x0aa1f3cf } },
37337     /* 48 */
37338     { { 0x22a10453,0xa9822190,0x2a03a10b,0xdd1eb91c,0x96646f3b,0xafbb5d95,
37339         0xf38b6fc6,0xa58de344,0xb8cfca1d,0xce47c3e5,0x0f70da04,0xfcd8e16d,
37340         0xda262ed6,0xac44349b,0xc56e2f8e,0x9320d87b,0x19138e58,0x9ce3ea08,
37341         0xa2b236c0,0xa5862dff,0x8e7efb0d,0x6b0f9a5c,0x16ac78eb,0x4b53432b,
37342         0x709b51af,0x6ff43105,0x8f519628,0x08e236f8,0xeed403ad,0x1f93f176,
37343         0x9636545e,0x559337e0 },
37344       { 0xd8fd807a,0x30ddf738,0xab131222,0xf4e0ec9d,0x625afbc3,0x14a2f4db,
37345         0x9f12f895,0xd5b70604,0xac3044fd,0xb46f3c23,0xf540148f,0x1b232d1f,
37346         0x39b4e554,0x61b458f5,0x0dd70b75,0xf694b24a,0x289581d9,0x0fc64299,
37347         0xee5fe22d,0xc05d49be,0x6a18bf63,0x7af3447f,0x7f1929d6,0xe96a1dc2,
37348         0xc1551e8c,0x6afe6028,0x2b5d4fa2,0x27dacaf3,0x545c2cb4,0x4a1631bc,
37349         0xb0c914d3,0x930070f9 } },
37350     /* 49 */
37351     { { 0x69a9bc05,0xd2f32c5e,0x589c4b73,0x0a5c19c6,0x94665f9c,0x095c9e5e,
37352         0xbcfb4c39,0x8ab0f293,0x1ddb7c31,0xb9070877,0x66b38048,0x894e9658,
37353         0x606bd9bd,0xf19a90cf,0xb6fd2d69,0xcc1d58df,0x461d8a69,0x886dcc4e,
37354         0xf9ce4831,0xc455c277,0x765f8a82,0x749a5996,0xc3badc8d,0x2ffc668c,
37355         0x9112cdab,0x38018396,0xb243c7cb,0xa98795c3,0x010a2224,0x8775f310,
37356         0x587b5e14,0x043a2141 },
37357       { 0x3a873752,0x7bbe9dbc,0x2f442fee,0xee1493f4,0xc18c2181,0x981ca2c8,
37358         0xe29769e7,0x00ce3090,0xde768c5f,0xb4626ac8,0x34d7677e,0x33e9ce46,
37359         0xe0fa94e6,0xf89c2cad,0x41f5b5bf,0x04f5cc11,0x2228c12c,0x2565f736,
37360         0x0c05cce5,0xf1bf706a,0xbe487c4f,0x5d07ffff,0xa499f1a4,0x3ec43c09,
37361         0x98d94800,0x4f4e79bb,0x073f12f8,0x8a335a16,0x0f970d6d,0x4bb5eaf7,
37362         0xf24d0ae8,0x18d0747b } },
37363     /* 50 */
37364     { { 0x84601faf,0x58d3c77c,0xaf1c1f72,0xc9465be2,0xd116d806,0xff626798,
37365         0xd5b0d93c,0x3996c0c6,0x5ec6723a,0x2fa1ad75,0x03ba5349,0x966a8144,
37366         0x2ac34d8a,0xdc4c9422,0xed675865,0xddf471de,0x953d528f,0xd8aca597,
37367         0x24ebf67d,0xb2e463b5,0x7e25b4d3,0x25824871,0x43159daa,0x23c5adba,
37368         0x83357540,0x5458f9c6,0xf938b1a6,0xcf685da7,0xcefed231,0x981a4fda,
37369         0x08bb5e59,0x711093ed },
37370       { 0x401f161a,0x12aa3fc6,0x974c5e87,0xf7358560,0x17b5df82,0x4aa252fb,
37371         0xa48e6299,0xb0b82b07,0x29dd847d,0x00234157,0x4529c5a6,0xf1e54d00,
37372         0x6d98f538,0xcc1c539e,0x28d3abcb,0x36162b53,0x2a84f0cd,0x75a37938,
37373         0x4dee7484,0xf717a81b,0x4c23bf1b,0x16cf35fb,0x787e8b3e,0x7fd1c29f,
37374         0x59b79ab0,0xb7da7e68,0x85f6c60b,0x072100a0,0xe7ed48b5,0x31840159,
37375         0x4d9c97d4,0x17898bda } },
37376     /* 51 */
37377     { { 0xae1b8cf8,0xcd8483d8,0xe9a28856,0x323d4b42,0x204a4bc2,0x7633584f,
37378         0xca7a69fa,0x4e0b2228,0xf757bab2,0x8afbda8b,0x6cc5f9ca,0x85b24088,
37379         0xd41a95c3,0x47fb4813,0xc2aabe6b,0x3f1bc53c,0x1ad1599d,0xf22cda3f,
37380         0xc31ea9b1,0x1b2ec081,0x01614ac1,0x048f304b,0xc6afa7ab,0xce31cee9,
37381         0x4140dc3d,0x55af7633,0xdce8abba,0x84b7ab37,0xc7cf3efe,0x50de7648,
37382         0x15356ab2,0x73a88dcf },
37383       { 0x06e83b39,0x3f868288,0x9f44037d,0x477a4413,0x17dbc841,0xf9058b0f,
37384         0x54d17549,0x2db64f4f,0xf2307ffe,0xa23cea6a,0x4f126261,0x393efd55,
37385         0x10f37f26,0x2f4e658a,0xf4ee1e35,0xa4437ce3,0xa93cde8b,0x64ef42a7,
37386         0x939aa901,0x1debc9f4,0x3d7b5cd4,0x44223d6a,0xf88a3acc,0x789a6a11,
37387         0x2c608a2d,0x56fb9df8,0xbbf56c06,0xe79db8e3,0x668fa300,0x73c56af2,
37388         0xae396a1e,0x52f32b17 } },
37389     /* 52 */
37390     { { 0xe714f71a,0x56f524c1,0x9add8519,0xc1be1262,0x65cadbe3,0xad9189d8,
37391         0x5a0fb649,0xd88bf5c8,0x21d192d9,0x9efa6a92,0x6f724b6f,0xe3fe8389,
37392         0xb250119c,0xec3fae24,0x2ae0d3c0,0x4b6af9f6,0xd619624d,0x8fceba0b,
37393         0x2fdb6e3a,0x7dc3092b,0x3263cd29,0xc91da376,0xf95c43bd,0x30c0761e,
37394         0xcdeb44d9,0x89136400,0x43c0d31d,0xfd7dce84,0x9871899f,0x78fec3b1,
37395         0xefdf58c1,0x79e14d28 },
37396       { 0x9bb40c55,0xe3822235,0x0ed07a42,0x0a27202d,0x4838c1f4,0x48e6c1a9,
37397         0xd864a78e,0x2b5f24a7,0x0c6c55c9,0x7e7f140a,0xce12d508,0xe62c104a,
37398         0xc11b1e10,0x9b0a1a7e,0xafbb3dd5,0xfd8a275f,0x9a3b6b30,0xdff354fe,
37399         0x46602a01,0x5a105d9e,0x93bb65f7,0x3d371b4d,0x0f82fdeb,0xda5cbf0b,
37400         0xde468545,0x4601229b,0xc73d517e,0x505e10b9,0x672ff492,0x77cfa541,
37401         0x99566ce2,0x0d8ec28a } },
37402     /* 53 */
37403     { { 0xcbeee995,0x014cf73e,0xd491e80c,0xb2eb88bc,0xd9aba5d4,0x615a6cad,
37404         0x9304c84d,0x2f7d4633,0x8ab03c9a,0xba0501d2,0x91babb94,0xc8f723de,
37405         0x50405772,0xc885f977,0xc7fcb094,0xb5e1d2b3,0xdf96c71a,0x61ee7995,
37406         0x3464499e,0xb8c8daab,0x5f607932,0xdb425ddd,0xb1243587,0x70251ca1,
37407         0x9fc74340,0x26d7d3be,0xc902ac89,0x8c179310,0x4559a74f,0x72522c15,
37408         0xc3734afc,0x86001e27 },
37409       { 0xe7693947,0x13b00ba5,0x012c062b,0x6478641e,0xe85490a8,0xe1a438e0,
37410         0xd9574d5e,0x5173dbbf,0x9bd3ba61,0x9532eb8c,0x5f3ea075,0x1f41bcb8,
37411         0x8cbb92b9,0xac1cc247,0x1ef901b4,0x0f34648e,0xd2b3b2ee,0xdd929d1e,
37412         0xc3d75bfc,0x470f1eab,0x139cf4d2,0x5cdbc6f7,0xf0424953,0xcd86454d,
37413         0x47fcb383,0x1e079812,0x17df930c,0xb9f209b4,0x114ebc00,0x4225fc31,
37414         0x347946c1,0x020591cb } },
37415     /* 54 */
37416     { { 0x275e0af4,0xe3003721,0xe78a4a4b,0x721141ef,0xd1757485,0x666cfcf6,
37417         0x168e659e,0x5fa1d737,0x0e2842ee,0x263e3e54,0x948bd5f6,0xadecc3d4,
37418         0x246b104a,0x019de03d,0xf343d818,0xf8a9e903,0x5b0c0d31,0xcb57ba4a,
37419         0x51e2765f,0x8246c506,0x6519bf67,0x80c5751f,0xf2119a01,0x5f05c200,
37420         0x7821d4f4,0x7e6487b8,0x261c3a06,0x262f94aa,0x72146052,0x56cfe489,
37421         0xa1df05ef,0x5119985f },
37422       { 0xb18586c0,0x5819497d,0xc6eeaa62,0x004415d6,0x97cda28b,0x7c6a46b6,
37423         0x7c194594,0x9a149b28,0x4ed3a506,0xb56369fa,0x43c94cb4,0x7092aa66,
37424         0xa9e9eee2,0x55bce73a,0x77893509,0x34bb2870,0x06eb5326,0x8af95fb0,
37425         0x9638f485,0x87cd0323,0x5ba75bf8,0x29376268,0x9d42d581,0xf32d6f3d,
37426         0x65c6d64d,0xa4cad574,0xb2cded41,0x985f50fb,0x9006a067,0xcf34ce0e,
37427         0x58a57f9a,0x59eaf265 } },
37428     /* 55 */
37429     { { 0x6ec3876f,0x7b407efb,0xf0f48648,0x780c6123,0xbf893039,0x2abb56ff,
37430         0x45a91ab0,0x9592eaa0,0x78811b82,0xce5b84d7,0x1f9f3fc9,0x86a71a34,
37431         0xf0e7e13b,0xc17fdd86,0x655a0880,0x88ed8297,0x81d5e666,0x75d6dc74,
37432         0x1d171797,0xeffc9df6,0xe3f79e1f,0x36ad4c8d,0x2046192e,0xdb15317d,
37433         0x274fda62,0x78c9fa7a,0x82dd9914,0x04ec924f,0x3a64971c,0x059d1e38,
37434         0x2620bbfb,0x3b4450ea },
37435       { 0xc776dcdb,0x3db7a955,0x81c8ba47,0x35c4a57c,0x505760fb,0xae285003,
37436         0xb3aec353,0xe3e80691,0x47117be5,0x380335be,0x056ccf61,0xe1c47e3a,
37437         0x33977916,0x253cfdeb,0xf5cb7ee1,0x3decdfba,0x7cf4b704,0xf3c9794f,
37438         0x9ff81462,0x2401680c,0xbe3daa9f,0x4e440e11,0x69f91d8a,0xc5d04377,
37439         0xcb5e9c5d,0x4106c7a8,0x33b7d24d,0x191909a1,0x3764b4a2,0xe893c838,
37440         0xc429b614,0x4a7fe30c } },
37441     /* 56 */
37442     { { 0x2455c7c5,0xe78f3a70,0x70157754,0x5b7636e8,0x7623262c,0xf32c4524,
37443         0x1bc780c7,0x2c98b11e,0x915ed877,0xd48eaeac,0x199265f4,0xbb04d3c0,
37444         0xcfa5200f,0x6b52b19b,0x93ea3fe8,0xc46a0981,0xba758059,0xd82c733d,
37445         0x1896aacc,0xd324bbd6,0xce8ecd51,0xac09a2fc,0x02fc44b3,0x529918fd,
37446         0xaaa1784b,0xf0c45e4a,0xfe22085c,0x35626340,0xc50c7d61,0x53cbb676,
37447         0x65126b23,0x83fa1ea3 },
37448       { 0x10ccc646,0x60ac86da,0x7b0451e9,0x2ce0637f,0x8a088610,0xbbbcf630,
37449         0x20349982,0x23c19019,0xfc0bcda0,0x707fc39c,0x1bd4fd7d,0x7f4d1f15,
37450         0x44713bbb,0xd6a64e74,0xc5ac9e60,0x57bdc676,0x37b61169,0x456c5303,
37451         0xdcf40a1d,0xd3451396,0x4997d2c7,0xf3edec25,0xc2c4a739,0x534ae9a4,
37452         0x6a6ad2e2,0x1401397e,0x23e95f81,0x20769d4d,0xde98fabf,0xcee007c6,
37453         0x931c51e0,0x61409779 } },
37454     /* 57 */
37455     { { 0x15156623,0x3ddb32db,0xab7a67c2,0x68137fbc,0x6f19e3c2,0x26011f50,
37456         0x89924c61,0x34218b02,0xc6804c1c,0x492a0b0f,0xafaae6a7,0xd65be706,
37457         0x0d01be61,0x3b13d23e,0xf87f4c69,0x44545b47,0x04dc1aa3,0xd42236e2,
37458         0x3c5161ec,0x6135261d,0xbd88bc07,0x1eb46a63,0x1599d720,0x78c6d836,
37459         0x69baf0f3,0xf6955fe1,0x17072820,0x467eebd6,0x3e3a340a,0x2f1b8a2a,
37460         0x2d0b5f88,0x636dac76 },
37461       { 0xb4c80af3,0x94280db9,0x4e3892ab,0x9a189cd1,0xd1477ddc,0x26e702e0,
37462         0x68f9f14f,0xe91aee38,0x80baa0b2,0x2864f63a,0x8b714a29,0xacd81f73,
37463         0xc5fe7cb6,0x30e1b870,0xb10837fd,0x883ea1c3,0x6b20489f,0x2da27953,
37464         0x58a2da5f,0x3aeb2a68,0x03a8fa14,0xe2330bf2,0xdc70b1c4,0xb5c488b5,
37465         0x299678f4,0x0a78c4d9,0x25df675c,0x233bd098,0x7b67d368,0x37b5c076,
37466         0x4d0bef3f,0x2f6dbdfe } },
37467     /* 58 */
37468     { { 0x2e4da7c7,0x2f8472fd,0xae677932,0x708cfc91,0x3dc268e2,0x364af08a,
37469         0x799a2424,0x0f10dfe0,0x71d58bff,0xef912d58,0x988962e6,0x6bf35dfc,
37470         0x5f47ea0a,0x28b96fa9,0xaad308c1,0x734a79ea,0x9f437bba,0x95730337,
37471         0x6cf54f75,0x002cbd8e,0xe7632eec,0x47606dcf,0x53193104,0x404b5ecb,
37472         0x0acf729d,0x0ae0897c,0x3bddf1de,0x89628b86,0xf87d7448,0xeced154e,
37473         0x458d5d4e,0x5cb6e197 },
37474       { 0x008c75ed,0x98cef197,0xf6eeaaf8,0x7cf49d3e,0x1875e96d,0x1d6f9e02,
37475         0xdd9b0d8a,0xfcec2cfe,0xb9576daa,0x38a61cfe,0x36a7dbb8,0x10003f39,
37476         0x23b814f4,0xb37c3868,0xb80e3153,0x9fb66dcb,0x059847a8,0x9e7e2eba,
37477         0x35a72770,0xa4ec63fd,0xfc9e0ed0,0x311f3d91,0xd515baa4,0x3c1dc094,
37478         0xa08cd4e3,0x75a06ebc,0x2ed5eeaa,0xab617238,0xe1f52c1f,0x2e82bbb0,
37479         0x5175d6e5,0x2149d630 } },
37480     /* 59 */
37481     { { 0x5f9311f6,0xee1a8e6f,0xbabc1f85,0xc97e3c9f,0xb494209a,0x4fa7c52e,
37482         0x19774fe1,0x04c2f51c,0x8555844f,0x5cefd122,0xb5873ab3,0xb53862a3,
37483         0xcbed19fc,0x768efdd6,0xee58469a,0xcdc12479,0x3d80c09c,0x11237e31,
37484         0xc044c28c,0xdd74a290,0xbd47e287,0x9ee6517a,0xad0ffeef,0xc2421228,
37485         0x818d281f,0x4273088f,0x43ec0de1,0xebc744bc,0xb415bd73,0x5b26eccf,
37486         0xcb07c26c,0x14e2f350 },
37487       { 0x4216946b,0x548d2a10,0x7a4bd92d,0x6e801f07,0x43695160,0x5996d0a3,
37488         0x63a197c9,0x0f1b5c2f,0x061f77c9,0x79da3c4f,0x93ff7b22,0x1c1cd634,
37489         0xa234123f,0x5e61b650,0xf284033c,0x826b34c5,0xc2f34214,0x718b90e8,
37490         0xae806ec5,0xa5f35620,0xe324a9b4,0xa2fae345,0x8b53cb51,0x8c0bb95e,
37491         0xf9965778,0xc94f6ac2,0x6b9def32,0x07ec607d,0xd0ed8f27,0x63bf1dba,
37492         0xdcb61e4f,0x58537e02 } },
37493     /* 60 */
37494     { { 0x64f80ba2,0x1f64b064,0x0559a45b,0xe8e055e7,0xf1f4b634,0xc3262b34,
37495         0xde8c8482,0xef4f7d5f,0xc30c780a,0x9d55dea0,0xcfa1e693,0x1740afb9,
37496         0x7460c34b,0x2cfe6a66,0x1187c1ee,0xf6695941,0x5f974d94,0x1382f277,
37497         0x004549eb,0x1ca0ace4,0xbabded02,0xf8244b3f,0x4e3653ea,0xc36f4d06,
37498         0xc55c5f83,0xeab9f0dc,0xacebce90,0xd93b9cef,0x19061425,0x16658e72,
37499         0x82d7970d,0x4857835f },
37500       { 0xd2576210,0xdcd525bc,0xd51b5443,0x9f378aa7,0x1bd83994,0xfe97bf17,
37501         0xf38ac621,0x930d0f63,0x818408cc,0xaf8f2c17,0x260f53f6,0x2692c87e,
37502         0xdb0a75e4,0x0ee45407,0xffdb1b37,0x0ec47ae5,0x7aa6a44b,0x769129dc,
37503         0x2e40b75d,0xb6f932b2,0x95ef3b77,0xe06764d0,0x68bc63e8,0x28fd47f5,
37504         0x9c0014c0,0xd1810494,0xd7995d8e,0x90e2d3fd,0x6c2a85af,0xeb39a05d,
37505         0xa21f3128,0x6c0277bd } },
37506     /* 61 */
37507     { { 0xb509e7ef,0xe41b7086,0x3d7f9f91,0x8842ec7b,0x5526b88b,0xcd285f94,
37508         0x051dd0ab,0x6e44e064,0x774f1ceb,0x90198c10,0x123e661b,0x6ecabe98,
37509         0x32f647d9,0x44811136,0x26c52aee,0x1dd82b45,0x939dc9d5,0xd650907f,
37510         0xfcd455bf,0xbd5eeef2,0x8d2e5d7c,0x7815a4dd,0x88bc9f2a,0x5ad4ec92,
37511         0x57a3b322,0xc6f10d0b,0x20b9cbdb,0xe8d0c1e7,0x9b774ee8,0x5a0b071a,
37512         0xf22fcf8f,0x3067bc9a },
37513       { 0xb7ca9326,0xe0e589f2,0xb1224f63,0x17a106fd,0x747a57bd,0xb2354521,
37514         0x62b0882e,0x2614982d,0x4391ffcf,0x7f3af544,0xa84e440d,0x1aaa337b,
37515         0x941bb071,0x28ea37b0,0x2e4a7f54,0xa957dcb4,0x1a6ad5fb,0xe7ab662c,
37516         0xf7c36a20,0xd135e381,0x9baa0b6b,0x42e7980c,0x94e4671f,0x4237030c,
37517         0x8b0922e3,0x24cc63ff,0x445a589f,0xd10d5279,0xa870ff6c,0xbb99d316,
37518         0xa996c195,0x390c83ca } },
37519     /* 62 */
37520     { { 0xffc4a73f,0x50d3fa82,0x3bd53303,0x2665d635,0x264bb77d,0x80a06f8a,
37521         0x22d73d84,0x81c04a6e,0x0323b8aa,0x2409cff5,0x8c4c4d5a,0x31dce217,
37522         0x0c0f9c19,0x374aa80e,0x00186bb8,0x0b25a387,0xaaf1487f,0xd0b77a10,
37523         0xab498de1,0x15f39ad5,0x1aa0c116,0x92e32da6,0x96e25ce8,0x228e3dbd,
37524         0x5e8646d1,0xb57c88dc,0x267b1c68,0x672b1164,0x600bdec5,0x5d0d807f,
37525         0x223e573a,0x3ea4007d },
37526       { 0xa595d0a3,0xd76debd0,0xaff0b3b4,0xa6bd76cb,0x9b1bdb97,0xbf2c154f,
37527         0x4c714c71,0x62b19ab4,0x221af663,0xc9bf33b9,0x8c941ef6,0x23d87c49,
37528         0xd79f0f6d,0x255804c3,0x2a7acbc1,0x6f1a1005,0x550528af,0x5dab79d9,
37529         0xc8d16213,0xfd77a6f0,0xde5e1029,0x40508b6d,0xf95da12b,0xd95ac0f2,
37530         0x758a8ba1,0x8860af71,0x7160c8fb,0x0b194c83,0xce004d34,0xa40e6c80,
37531         0x6b14aaa0,0x09f82a17 } },
37532     /* 63 */
37533     { { 0xc21366dc,0x60abe588,0xaf75daf9,0x729c0a4f,0xacb93ed4,0x70501fd9,
37534         0x87a16d70,0xb97e744e,0x98e7361b,0xa42e0a7a,0x28b54cf3,0x1acdaff2,
37535         0xb7bd9078,0xf087ccbb,0x663250e7,0xda6f3983,0xbaf07c09,0x66d693ee,
37536         0x8cbaf157,0x79baf4c3,0xdfca99d0,0x5a984e07,0xf26d8dab,0xab4d3247,
37537         0x7eba36f9,0x4d0be701,0x0e8dd216,0x37bb9e65,0x531c4f03,0x72aa4e24,
37538         0xb753d85a,0x77d1e984 },
37539       { 0xd8e62367,0xd9373239,0xb9820cf1,0x3361848b,0x5a9c97c4,0x00c7e344,
37540         0x14f960fc,0x9a0ec9ae,0x740474b5,0xcf41f0cf,0xece065d5,0xa5eede8f,
37541         0x9e808610,0xb1de5a4e,0xae0cf75d,0x17c44ae4,0x6b148d0b,0x2fa56323,
37542         0xd29ff2dc,0x64fa740f,0x88cb212e,0xc605eb8a,0x6a863016,0xf2c771ad,
37543         0x607b4c17,0x6d6112e7,0x40d49785,0xfe90ec07,0xe256e0e5,0x599be18b,
37544         0xca54adb0,0x4e6eabec } },
37545     /* 64 */
37546     { { 0xfb99cfe6,0x950323d3,0xc9334178,0x7b09bc26,0x7cbdfb6f,0x64111e41,
37547         0x89a75760,0x91141744,0x10919cb0,0x4c633df9,0x396bfd2f,0x715fc7c7,
37548         0x8cab62db,0x8ca19512,0x4db81aac,0x30672473,0xb4c4c54a,0xe67a246b,
37549         0xbf229646,0xd77ea0fa,0xfa5b5d70,0x5bed15f1,0xc2f192f3,0xa5686da5,
37550         0x7f6690ad,0xdecac72a,0xcaa50b7d,0x0c4af2a2,0x6049ad2f,0xf44631c1,
37551         0x04ecf056,0x325d2796 },
37552       { 0x4848c144,0xee11fb55,0xb6a7af32,0x4e062925,0x369e0f9a,0x125b68e1,
37553         0xca53b21e,0xad9bdae6,0x2e98ea1b,0xf50d605c,0x9f2fa395,0xbdb9e153,
37554         0xe91532f5,0x4570e32d,0x46a250d7,0x810698ae,0xad9d9145,0x7fd9546c,
37555         0x11e97a5e,0xabf67721,0x249f82e9,0xca29f7d5,0x9851df63,0xa9c539a9,
37556         0x71d0e3e5,0xfd84d54b,0x041d2b56,0xd1e0459c,0xfd80096a,0xceb3eb6e,
37557         0xe32a79d3,0x19d48546 } },
37558     /* 65 */
37559     { { 0xb540f5e5,0xfe19ee8f,0x04e68d17,0x86d2a52f,0xadbdc871,0xd2320db0,
37560         0xd03a7fc8,0xa83ad5a8,0x08bcb916,0x54bf83c7,0x2e51e840,0x092133ea,
37561         0xcb52dddf,0xbce38424,0x31063583,0xd5c7be40,0x458e3176,0xc1ebb9df,
37562         0xbc4dabbf,0xafb19639,0xc05725a8,0x36350fe4,0x84e1cd24,0xac4a0634,
37563         0xc145b8de,0xadf73154,0xb3483237,0x0aa6dd9e,0xcbff2720,0xa3345c3d,
37564         0xb4e453b0,0x1b3ace6c },
37565       { 0x90a8bdc5,0x0343e5e9,0x6306a089,0xa203bf9d,0x8e48520e,0x98489a35,
37566         0xde7d1d06,0xbd17debe,0x5f795d3f,0x8fafa6d7,0x387b0a3f,0xa4ceb630,
37567         0xffddeafa,0xe0166b32,0x7e764e02,0xa2fe2054,0xe871f304,0x55ab9824,
37568         0x952ec45e,0xa2bd36bb,0xa90d20ca,0x7b4c1484,0x75bcfb53,0x5319f387,
37569         0x6982c4e5,0x34238a4a,0xa102921d,0xa2bb61c7,0xdb3ab17e,0x1e061b64,
37570         0x192f0a14,0x538ec33e } },
37571     /* 66 */
37572     { { 0xa19b56cf,0x193496fe,0x7bb99acd,0x663d77f4,0x57d0a881,0x8f04afa8,
37573         0x082835fd,0xcced3da2,0x5d82cec7,0x7e21faed,0xf8009c85,0x6e175b99,
37574         0x2d05a307,0xd9c6e31b,0x81487d82,0x96948d4a,0xd46f6655,0x86ebd3f2,
37575         0x773ccc49,0x86851aa8,0x8b1640a6,0x3e220f22,0x41a20b75,0x9f06e3a8,
37576         0x90ac0a6f,0x2cfffe5e,0x8ebeb3fb,0xf5a9b1da,0x6e08e2c9,0x2587d997,
37577         0x03e9f401,0x6fd60298 },
37578       { 0x8eb7516a,0x54709f8d,0xbdc598ab,0x83058a74,0x87e801ce,0xd234dd98,
37579         0xd17b8a96,0xfd0f9d90,0x6e90f6ab,0xaa1e549f,0x5a7ed55b,0x2496ff80,
37580         0x6c254c19,0x0d9f657a,0xb8962575,0x3cdea49c,0x2dff27de,0xb685a3f0,
37581         0xdb8bc04b,0x3c50e7fd,0x987236b0,0x904ff0ff,0xbb0d5055,0x494298fd,
37582         0xe14be8d0,0x34b3386d,0x7c3d30d6,0x7ad34e9c,0xe159fdd9,0x1f2b32bd,
37583         0xc761e5c0,0x84cfa23c } },
37584     /* 67 */
37585     { { 0x8b99b964,0x13bc11eb,0x58e2fc47,0x8e280c0a,0xd4c9a54b,0x870fbc49,
37586         0xbf6e20fa,0x37a334a2,0xd7c88cfa,0xee583d0d,0xef4af1da,0x05e029a8,
37587         0x0c2ef8a6,0x6d55e234,0x209e9b62,0x61b6fdfe,0xbb8e080f,0x3b1dad26,
37588         0x9392fc1a,0x5adbc162,0x0aae3f4e,0x02ac0fe6,0xc2bf4d5b,0x8d99801a,
37589         0xc282fed2,0x2333f93f,0xb52db33f,0x16dcb10c,0xc55752e7,0x09f90f84,
37590         0xc84a0d8e,0x287d4c51 },
37591       { 0x0e9867da,0x5fa58201,0x1a874cda,0x614589b3,0xfbdee22e,0x005e27c5,
37592         0xe612bda8,0xe357fef5,0x2d3635f9,0x4e0dbedf,0x6f125a86,0x62be70e4,
37593         0x0d94a2e5,0xa09b9884,0x28b5e5d1,0x7eb99a15,0x751028b5,0x21b9416e,
37594         0xe06d2cc4,0x1b137fd7,0xfea09845,0x6fa1f517,0xffcecbd7,0x3ba1e966,
37595         0x832f453e,0xd4c89a4a,0xeca68fa1,0x07b1e2af,0x4bd395a3,0xd0fb4453,
37596         0xd8ef9e13,0x0132a3dc } },
37597     /* 68 */
37598     { { 0x576374c2,0xe53c7785,0x84727040,0xe60526d1,0x228ca044,0x8a066dc8,
37599         0xf1ce1313,0x1fe1c1b2,0xcdeb0c5d,0x2aeec832,0x9cbf826f,0xa7596699,
37600         0xde77a589,0xcd188e81,0x118d1254,0xe5ce0fe0,0x0790b86a,0xa142a984,
37601         0x39ac28ce,0xe28f043f,0x87de5804,0x4eef8290,0xf639a8c5,0x83c31b32,
37602         0x5887794f,0xd70454a7,0x18b1b391,0xca635d50,0x31d9c795,0xcefea076,
37603         0xb6f8aa25,0x13cbee76 },
37604       { 0x8d3f34f3,0x79cabe0f,0xa3617fe3,0xbda9c31c,0xdd9426a1,0xb26dee23,
37605         0xf29c9104,0xe9dd9627,0xe2c6cd3b,0x033eb169,0xfcba2196,0x8a73f492,
37606         0xb858c83c,0x92e37e0b,0x23b3fbb7,0xe4f2aca6,0x64be00a2,0x8101fb1e,
37607         0x948f6448,0x91a7826a,0x907260e7,0x414067b4,0xe30bb835,0xf774aa50,
37608         0xc999c06e,0xf922ca80,0x0ba08511,0x6b8635b9,0x25fa04f0,0xbf936b5c,
37609         0xe02e8967,0x4e0a1ada } },
37610     /* 69 */
37611     { { 0x8ba29c4d,0x00ca6670,0x22988094,0xc08240ce,0x16dda752,0x21c5ca67,
37612         0xabbbfa34,0x689c0e45,0x3ed28b72,0x1d7545fd,0xd7c56ab4,0x5f221198,
37613         0x38759d65,0x4b3d8f74,0x8fe50b89,0x93490dfb,0xe80eba16,0xb641f5d7,
37614         0x79acb537,0x7b0da5eb,0x0c1d5e5e,0xab6b1497,0xa5da429a,0x2338e68d,
37615         0x2f6d2f25,0xe010c437,0x6530f3a7,0x226f16d2,0xcbef08bc,0xefb0f7b6,
37616         0x9f99c999,0x733e30d9 },
37617       { 0xa42a38f9,0xecfe1582,0x4730b500,0xaec2d58e,0xde976b2c,0x2ee2f2a7,
37618         0xa969c1bb,0xf0539db5,0xfcecdb4a,0x31954168,0xe7a8e902,0xf2f7348a,
37619         0x3121541f,0x1d58d7cc,0x2202ae52,0x5d25b75c,0xf40835a7,0xdea9965a,
37620         0x529b4e46,0x3feb6a41,0xbd27ad9b,0x5c97fb6f,0x261f900b,0xd87554c0,
37621         0x04d5b19e,0xb43031d9,0xcb219b9c,0x33d5e9b8,0x3ee00bcf,0x7a43d492,
37622         0xb79a5c0c,0x56facb39 } },
37623     /* 70 */
37624     { { 0xa3018bfa,0x019165a2,0x9ffad984,0x100c6b24,0x55341a9b,0xbbf1b1f6,
37625         0x25dc4cc9,0xe6bd1d97,0x2bfffe60,0x52850ed5,0x7e5509ab,0x24e992cc,
37626         0x4ceb59f1,0xff6c502e,0x1aa7d148,0x2f0b3573,0xe7e3aa46,0xe90c1ddd,
37627         0xd1142880,0xbaec9f45,0x65be5dd5,0x475cfd26,0x1febce13,0x83abb14e,
37628         0x80942d30,0x6aba4829,0x297e82c8,0x1e1b235d,0x50d8218d,0xb771cdbe,
37629         0xd94d6cbb,0x88599266 },
37630       { 0x155ccaf2,0x08847290,0x7c5b773e,0x8679ebc7,0xb2dd08ed,0xa88b2dd1,
37631         0x87d475db,0x960a180e,0x6694d02a,0x80fdb6b7,0x3f3f9e96,0x3e8758c9,
37632         0x4ad836c4,0xbda3f6fa,0x32fb387d,0x9400c581,0x2550200f,0x25a78542,
37633         0x776ecf18,0x2a97c351,0x566db59a,0x03ebf46e,0x26545eda,0x4743a280,
37634         0xcf74ab44,0xed169d84,0x88cb3f69,0xbaab931d,0xd8257196,0x70ae932c,
37635         0xa0c09719,0x797224a6 } },
37636     /* 71 */
37637     { { 0x441f3567,0x632923f8,0x2e24bf1d,0xc11c3168,0xb7671fff,0x4b97726b,
37638         0x7a5e1a22,0x601746a7,0x3addb417,0x53dddea0,0x7f59b846,0x57867a3c,
37639         0x56cd7ff7,0xb012a987,0xf19ba9a8,0x1bd5fec9,0xf8306748,0x750379a2,
37640         0xab8c05d1,0x7763445d,0x7903f42a,0x5d7f441b,0xa903e46d,0xc011674d,
37641         0xadd126c1,0x1b1d3c4d,0x61455b40,0xa2752aac,0x555c356e,0x4da42a68,
37642         0xd820852c,0x3ff09c15 },
37643       { 0xf9cb7784,0x4c0a1bce,0x2422f305,0xaec539bc,0x0c414aa7,0x5f40f9fd,
37644         0xffd42bc4,0xd3aa316c,0x2f358e15,0x42f5a4c3,0xd6e27682,0x00bdcd9e,
37645         0xf8a5ecee,0x069f789f,0x05e14f5d,0x8078018e,0x8b40c741,0x2bb3e493,
37646         0x7917f72d,0x5dbc8c1d,0xcc57150c,0xe0eea664,0xc3fa8920,0xa25ecc5a,
37647         0x1c797164,0x3c21b0f5,0x634ad16b,0x8f09a2f2,0x58391d9a,0x8e730fc5,
37648         0x4fdfae4c,0x47ef1805 } },
37649     /* 72 */
37650     { { 0x3da285e4,0x9965f3d1,0x3a01e3f4,0xba7d4dba,0x61214ad0,0x4738413a,
37651         0x22397549,0xd3b7d535,0x5a730b92,0xa53dbdcf,0x332d165d,0x3130d92b,
37652         0x82f97ef4,0x44a28541,0x44dce1b6,0xbf62221c,0x7e2a0ec9,0xbba13858,
37653         0xcbfad998,0x33f32c8d,0xb5fed44b,0x409e5f3f,0xc66217bb,0x5c328c65,
37654         0xfcdf71a9,0xb00db69f,0xb8920788,0xa23c2a21,0x3ae6464b,0xf8ab28e6,
37655         0xb8de0861,0x1a6b6e9c },
37656       { 0x06af77aa,0xaf6ec2b6,0xa887f065,0x2e60f5cd,0x9f498c56,0x87d21400,
37657         0xfcbaaf4b,0xdb595b59,0x271ab855,0x0fb592a1,0xd4349b0c,0xa0ce10e5,
37658         0x887d8c9c,0x9d6187d8,0x154bd6db,0x03ee95f9,0x5d06c999,0x8fe53213,
37659         0xfb6a64d0,0xf4a7bc30,0x66a4cb60,0x3d22af0d,0x5d37367c,0x16952cef,
37660         0x997d8e55,0x6f0ea734,0x731732d0,0xb447c70f,0xa9cb3942,0x00ab3034,
37661         0x28510fd0,0x79dd0180 } },
37662     /* 73 */
37663     { { 0x3ac7424e,0x04e0033a,0x60fda4d0,0xdb06b688,0xbcb772fb,0x236a9766,
37664         0xf297cda4,0x294a8e2b,0xdb013c6e,0x4b0aab85,0x8723a3ad,0x3d2aec98,
37665         0x13c84a6b,0x0cae32cd,0x70ec169e,0x21888f5e,0x42a88262,0x739633bd,
37666         0x7b60d9b8,0x68ac792e,0x10769fe1,0x89f2b722,0xd24bed34,0x8f3fcfe6,
37667         0xa3eb24aa,0xd35efb88,0x484c706b,0xddecfa3f,0x929ece0d,0x7cc119a9,
37668         0x8d405436,0x87e5ad45 },
37669       { 0x7d1000a7,0xba99aa9d,0xae823833,0x8b94affc,0xdfb83dc5,0xc8229628,
37670         0x845a418d,0x2f59fe11,0x5d417054,0xa8b970f8,0x72b71581,0x8918c265,
37671         0xc0d1dd17,0xe4ef477d,0x3afad7c0,0xb50b4cf3,0x01870a5b,0x21baea79,
37672         0xbb3a2868,0xc77087f9,0x124a59cd,0x7857531e,0x57f43239,0xed74c26f,
37673         0x0164c94a,0xd5f5ae25,0xf094bf74,0x6608b7e2,0xfdceea32,0xf4cdb5ba,
37674         0x990cc045,0x0b712519 } },
37675     /* 74 */
37676     { { 0x88d5c64d,0x5a290ca1,0xa7492534,0x0596d749,0x2a00e925,0xa04b0d3d,
37677         0xcaf7b66b,0x082cd02c,0xecdded83,0x912b50c2,0xff31646e,0x813ce9de,
37678         0xc75fff95,0x62ae70c7,0x7e2a4615,0x6f6852e0,0x03804fd1,0x320fd7d0,
37679         0x8218e8d9,0xb1a2a4dd,0xafc645d7,0x4918a6fb,0xe8d9fdbe,0xfb080fa1,
37680         0x4470b6ee,0x33d4d08a,0x6d974ef7,0xd2ba2077,0x69dae5d2,0x8ecb95a7,
37681         0x7d69596d,0x7a3f423a },
37682       { 0x9a929387,0x362d2ca6,0xcb1c1fff,0xabdb7581,0x7e51b6cb,0xd892ec9f,
37683         0x3a4e131f,0xee8d8632,0x5bd87561,0x4680e3f1,0xd4e7e732,0xe3a597e1,
37684         0x5581fefe,0x3cc72b7c,0xca8cae0b,0xf3e77f8a,0x5e2fd4af,0xfcc7d7dc,
37685         0x21355b79,0xdd3a4552,0xa2c07177,0x546b24f2,0x0689621f,0x415b532d,
37686         0x3f78163e,0x2be9af51,0x33d7ed21,0x27d63b9b,0x96802943,0xab019ef2,
37687         0x1623faf4,0x2da5fc55 } },
37688     /* 75 */
37689     { { 0xc8a5c600,0x62429cf3,0x3fe33e7c,0xa7a80c22,0x0a57ddcb,0x9ffda740,
37690         0x925b0c74,0xd1ae156d,0x6b100eb0,0x097a43f9,0xef943c81,0x169e945c,
37691         0x1128cf24,0xa1f734e5,0x419f0133,0x04387c4a,0x01044024,0xc007868b,
37692         0x90359cf2,0xe5416abf,0x478d54e3,0xf9c76fee,0x42a2173e,0x66219da6,
37693         0x9fe30141,0x61e03156,0x93ef247e,0xa0ff5ce3,0x072b6592,0x811792ba,
37694         0x70c854d3,0x855f0219 },
37695       { 0x847314c4,0x61fbfb6c,0xeb45b96a,0x97906155,0x6ba2afac,0x7102e146,
37696         0xab949781,0xed51f975,0xc110c4fe,0x9d2f5b17,0xaff57667,0x7ac8ce70,
37697         0x6eb244e7,0xe7366a21,0x551c65c7,0xdd1bbcec,0xe1a859de,0xb525060a,
37698         0x8ba7d2e7,0x7a048174,0xab8ea8c4,0xe1a2c541,0x6fdff078,0x6e7824c3,
37699         0x14874b04,0x79b49fc7,0x06b1f733,0x22ae337f,0x6f8fe6cf,0x1c352192,
37700         0x525d0797,0x292236cf } },
37701     /* 76 */
37702     { { 0x7d8b29dc,0xcdb8d80a,0x08ea648a,0xd17a2024,0xae92be91,0x7db12c5e,
37703         0xfda72fbc,0x1f347d18,0x9e760c6f,0x11374b40,0xd8e38d91,0x7361e8f1,
37704         0x739ac1f4,0x7714be9d,0xb4df5c4e,0xc1f9701c,0x6f72cae1,0xd9138ed8,
37705         0x6ad180c4,0x1c7fe1f7,0x9e2dbf9c,0xf8c185be,0x7c70c44d,0x835db269,
37706         0xb0d15b5f,0xf997cfea,0x61e6545e,0x5101445a,0x25184e5e,0x16b06884,
37707         0x7521e7aa,0x7cfac359 },
37708       { 0x3c0bc53a,0x81182167,0x7e751367,0x84b5ede3,0xa3657a18,0x3ca255fd,
37709         0xba1fdd98,0x096abbf4,0xc5da77d8,0x9ce8369f,0xaab342c5,0xf27b9ae7,
37710         0x972059f1,0x06c91bd6,0x914ecfe9,0xee0dab30,0x93f53f12,0xbb647fbb,
37711         0xffa57e0e,0x30c38a7a,0x9f2ad607,0x517d06ef,0xbb99dcc9,0x49728d87,
37712         0x446080a1,0xb0034af1,0x12b9c17d,0xcc810c3f,0x772a22a0,0x7225f14f,
37713         0x1ddf82bd,0x6ce3dc7f } },
37714     /* 77 */
37715     { { 0xa4397830,0xc07cd835,0xf4733306,0x4dd9290c,0x29989e8c,0xdd35d3a8,
37716         0x563d8152,0x79902559,0xe87de61b,0xf278d911,0x1024e35c,0x9c7340c7,
37717         0x4a0d0e59,0x2d444461,0xf32626a1,0x63e7608f,0xc4c9baa9,0x627a37e9,
37718         0x76fffd25,0x0c56dc51,0xcef2a1cd,0xcb6defc8,0xefc559d9,0xcbcc0d56,
37719         0x041cb692,0xe45f3fc5,0xe5161e09,0xcd05c239,0x5c3b559c,0x2a731ee9,
37720         0xa3d0a16d,0x85151122 },
37721       { 0x86ff19e2,0x782d0335,0x1da28603,0xc2c60daa,0x557c7eed,0xb2e78cfe,
37722         0x1bc4e8b0,0xa8f6f984,0x3df35c67,0xcc1f9b4b,0x4764462a,0x96e13603,
37723         0x7c7ae0b0,0xbf910b97,0x51435956,0x27c7f305,0xf631eae5,0xc14db15c,
37724         0x7e69b34c,0xa51d6142,0x5fc12ff2,0xdec82851,0xfb887162,0xfcceae13,
37725         0xde1488bd,0xda332ac1,0x2ee3e74c,0xa20374e2,0xf0ae069c,0x597ea1a1,
37726         0x77bdec04,0x8b1159f2 } },
37727     /* 78 */
37728     { { 0x2f961d30,0x4af71a44,0x7ac7248f,0xbdf968a8,0xb1a906cd,0xd32df87c,
37729         0x04abf925,0x00c10e26,0xb9f04d4c,0xb8711759,0x939705da,0x00d54e60,
37730         0xc9f80849,0xf7587433,0x6a7a2375,0x2e9abade,0x94ac17ac,0x5676d478,
37731         0xc202d99c,0x4ca0525b,0xabfae73d,0x95b8bcad,0x3405991b,0x2371ed38,
37732         0x458a99c3,0x2b69e47a,0x2b78c866,0x7cac0b18,0xe0232c7c,0x6ceaa79b,
37733         0x588f7459,0x0bd86433 },
37734       { 0x7e734189,0xdea1a8b4,0xcfe5fa17,0x52c5ac88,0x11437664,0x444a4d4e,
37735         0xaf9e9750,0xc2522308,0xd30c6b3b,0x78b1d0c3,0x4c6df477,0x2edae5f0,
37736         0x2ee88dd7,0x53131d9a,0xacc93e34,0xc4e380ee,0xa8db0e8e,0xd499b1ac,
37737         0x7f5d49d7,0x77348c16,0x1556ccd7,0xc9663257,0x2611d13d,0x65ce0e8c,
37738         0xb5a2fdcc,0x2c95fe66,0x8658faa1,0x26698832,0x31c32c98,0xda87d1f4,
37739         0xfcd91907,0x46650598 } },
37740     /* 79 */
37741     { { 0x6b4a5efa,0x4c6c13cc,0x1d07b265,0xc481989b,0x8bdc69c0,0x10b966ce,
37742         0x2c2531d4,0xf54cfaa2,0xcad0a100,0xcb5f1808,0xee5da449,0xbeb52538,
37743         0xbedd83cc,0xa6240085,0xd6255c78,0xe792dacf,0x2062058f,0x88371906,
37744         0xed1658c1,0x96615e83,0x7d28d542,0x4b549b27,0x83b75df3,0xeaf127db,
37745         0x17fbb942,0x4f60df6d,0xf6f7c930,0xd08631db,0x6018789f,0x17c38f98,
37746         0xb9a9280c,0x0c43574a },
37747       { 0x1d20cad0,0x76eb324c,0x8c61108a,0x90decb09,0x6f06d36d,0xa6e9d39c,
37748         0xbc0da197,0x6cd978ba,0x507ac5ce,0x5948b1c0,0xc5497eb5,0x2bd47164,
37749         0x4d5914e3,0x2a9c4c0f,0xa759f03c,0x772c5046,0x69ac847e,0xe7d7328a,
37750         0x3048b330,0xa8d57d0c,0x40f7bace,0xe60034e0,0xa85f1790,0x823d9193,
37751         0x5c859736,0xa6e9b66c,0x679e1022,0x22ca2c7a,0x09023fa4,0x00e7a19c,
37752         0x2726d5b9,0x324999f1 } },
37753     /* 80 */
37754     { { 0x7c834915,0x667eaed6,0xbc5eb64d,0x9f77aa6a,0x25d62011,0x729ebcb6,
37755         0x699fd9c2,0x0aee24f2,0x2b8d4f6c,0xe1eb5874,0x14c976d6,0x7f12710c,
37756         0xf6d9ea65,0x91390335,0x06b50064,0x668b7049,0x0876ee4f,0x65969a0e,
37757         0x2f9d9360,0xf901bf3f,0xb499e3ce,0xfb1a8651,0xf2dbcaaa,0x80b953fb,
37758         0x973b06b6,0x312cc566,0x3af36c64,0x3534d9c3,0x10ffd815,0xe4463a52,
37759         0xf18c2b91,0x57ea2b4b },
37760       { 0x8aa0f2f2,0x00f5e162,0x0e46bcaa,0x8c7e75c5,0xa4a2c42d,0x97ab479a,
37761         0x14baa202,0xb4f308ea,0x6943cc2e,0xa901bd14,0xeed58804,0xbb125fee,
37762         0x9d180f7c,0x6502c8f9,0x1580c61c,0xe5353919,0x27101ee3,0x7e278069,
37763         0xfaa72717,0x7a0a40a1,0x4c75b153,0x32edce02,0x538f1c22,0xda23660b,
37764         0xbe307d2e,0x4d511e98,0x9baee0b4,0x24276e40,0x7ff1f307,0xa78c3927,
37765         0xea7935c9,0x60480b46 } },
37766     /* 81 */
37767     { { 0x3872ece3,0x31087d66,0x955b70f8,0x5f29be7d,0x9cf95bb8,0xb50b4fc7,
37768         0xdbffa621,0xbae3b58d,0xe022ba5d,0x0e61d280,0x4181449c,0x78ae5117,
37769         0xcf555485,0x0b132840,0xb8ce0b0e,0x800ed1b6,0x78d5de3d,0x35dffdd5,
37770         0x69a56b47,0xf7e42374,0x8d910ae7,0xd5e32369,0x6313c7c7,0xb6ff52a0,
37771         0xa92de9e5,0x5a2fe20d,0xd12110bb,0x41b347d3,0x40c16f23,0xc5905edb,
37772         0x9a8f88cc,0x0774a0d3 },
37773       { 0xe3b6c106,0x3ae181ab,0x8de150b7,0x4ebe163f,0x6f354836,0xcf75b82f,
37774         0x3ac7ac16,0xaa0d2063,0x291722af,0x5c680668,0x11545553,0x73941e61,
37775         0xbf5de3f7,0x17127e38,0x1afb41da,0x32cfdf03,0x87bc8663,0xc6893c91,
37776         0xa62c9c99,0x75046744,0x962c1947,0x96866e2d,0x378cdf4c,0x489ec8df,
37777         0x3407fa32,0x3a60709b,0x551290d1,0xd37d2159,0xbab92273,0x9623d303,
37778         0x2432014b,0x08151954 } },
37779     /* 82 */
37780     { { 0xfb7b2108,0xf9236d89,0xad75f9aa,0x3ecc83cc,0xb4e1da11,0xf7c72b15,
37781         0x0315c362,0x552aeaef,0xf272fe3f,0x11e140ed,0x87843ee8,0x99d79bf6,
37782         0x1d9bb25b,0xce6b54fd,0x5b1bad74,0xb20b0e21,0x5b84c90d,0x54a0214f,
37783         0xfca6cec9,0x459bbf52,0x9e4df76f,0xe363c48d,0xd64cf17e,0x3045f84e,
37784         0xf62ada48,0x8402a167,0x6a74ca01,0x2c9e1bf3,0xf691c42d,0xe8cf9d41,
37785         0xc2c4b874,0x5abf2178 },
37786       { 0xf3b3bccd,0x4777966b,0xbe3e0caa,0x0047e0f0,0x8c7d5043,0xcb8383b3,
37787         0x946fd5fc,0xe77e3baf,0xe9ec0e87,0x79baa785,0xc8a18d25,0xd83c557c,
37788         0x25befcfe,0x9b96e5af,0x98c71b61,0x4f05d15e,0x77e62da1,0x081f991a,
37789         0xcbaa3821,0x1c6ec781,0xe54d9bfb,0x7522f65d,0x44ed1430,0xf5d05573,
37790         0x95cafdda,0x3035b31f,0x6378f5bf,0x47e67f43,0x5270b9d9,0x029f7cad,
37791         0x4d916a48,0x15ad1587 } },
37792     /* 83 */
37793     { { 0xaa588ae4,0x00de2ece,0xa371a232,0x552ebc58,0x71230444,0xd00ea934,
37794         0xe4b1832d,0xafbfa67d,0xb689e843,0x29216341,0x61f4e2e8,0x1f96bbbd,
37795         0x04c29dc5,0x95420684,0x42317fd1,0xc7fe3827,0x63483162,0xe0a0aec6,
37796         0x0700184f,0xfc2b94d1,0xfe1fbd85,0x07219973,0xfb074352,0x648b6ab1,
37797         0xc46e5392,0x23bbdaad,0x00fa56ff,0x0db8dd1f,0x866725f6,0x104815eb,
37798         0x52e81963,0x3f9c4cca },
37799       { 0x32ce637e,0xff36b297,0xf5d25cdd,0x81a15f2d,0x8b02ad97,0x1a1d052d,
37800         0xcfbab3e9,0x2e5f3bbc,0x614eeb75,0x60d2cbd7,0xcd5a793a,0xd4491843,
37801         0xcdba2144,0x2242cf75,0x88b99766,0xa20705e7,0xec77e132,0x64e12cc0,
37802         0xb61a9b05,0xb1c14df6,0x74825b5a,0x8fd97f04,0x3da31223,0x95604821,
37803         0x4d30c70d,0xde486727,0x1c12ee69,0xbcab8f15,0x668d893d,0x5dc638b4,
37804         0x223f574b,0x6479dad6 } },
37805     /* 84 */
37806     { { 0xb05f2b26,0x569044f3,0x80b9f76c,0xb35a294a,0x4290f6ae,0x8839fe28,
37807         0x026a5877,0x761cfb23,0x2e5ff9c3,0x768926b6,0x0b11c576,0xbae6cd20,
37808         0x72a03efe,0xdc857756,0xe1bad63a,0x0cae074a,0xd709d99c,0x3fe491a1,
37809         0x6501d9c1,0x76c5ded6,0xc32aeff7,0x1da6eca1,0xc57683e8,0x50849d55,
37810         0xdf98d847,0x9e392e9c,0x64d9a564,0xfad7982f,0xa37b98b2,0xf7c3bdb7,
37811         0xf0860497,0x1fe09f94 },
37812       { 0x7648cc63,0x49a7eaae,0x67cfa714,0x13ea2511,0x653f4559,0xfc8b923c,
37813         0x81a16e86,0xd957619b,0x3c864674,0x0c7e804b,0x1616599a,0xfc88134a,
37814         0x0a652328,0x366ea969,0x4bc9029e,0x41532960,0xae2aad2b,0xef9e1994,
37815         0x7f10bef5,0x9e2a8c52,0xc67bf860,0x73dcb586,0x844cc25d,0xf61a43fa,
37816         0x74eb3653,0xd74e7eea,0xdd240f02,0xf3356706,0xfd83bcb4,0xeec7694c,
37817         0xdb62526a,0x4de95786 } },
37818     /* 85 */
37819     { { 0x3deac2f7,0x4867d315,0xb61d9a8e,0xa084778a,0x0ab7b2d5,0xf3b76f96,
37820         0xcfdf4f79,0x00b30056,0x31ab8f4b,0xd0701e15,0x9c779d01,0x07f948d5,
37821         0x82675371,0x7c994ebc,0x48bad4c0,0x1104d4ee,0xbfc9d058,0x798ce0b5,
37822         0x309fa80b,0xc7ca898d,0xacb33eaf,0x0244f225,0x5b2f3175,0xd51e8dfc,
37823         0xa4d7be34,0x3e49ba6b,0xbda02b43,0x1760f4c7,0x4435275a,0x37e36a7e,
37824         0xe636980c,0x1c94418b },
37825       { 0x09dc1414,0x43a21313,0x43c93537,0x060765fc,0xdf5f79ce,0x6ff3207a,
37826         0x85d4cfca,0x6f18b1fa,0x63e995ab,0xf5c4272e,0xa82b3002,0x121a09e4,
37827         0x97147f16,0x82b65d1b,0x20a7fe26,0x4993c20c,0xe6716726,0x99c9cb98,
37828         0xfeb440a0,0x5a02d673,0x251b4bc5,0x3f3fa9e1,0xa05338ea,0x75dbc474,
37829         0x7b09f6cb,0x3cb4044b,0x80434609,0x6767da18,0x098ceac2,0x97851422,
37830         0xb55235ba,0x611bfbb2 } },
37831     /* 86 */
37832     { { 0xf00ad2a1,0xbdbaa55e,0x14a290d7,0x29efa85e,0xe92b1694,0x3b4a4768,
37833         0x11ec8130,0x67111bcd,0x88bd27b2,0x0e425702,0xd9a03c06,0xf28cf2a3,
37834         0xf318884a,0xbb7c8d2d,0xe3aaeb20,0xe2ea1462,0x43b85d77,0x33535804,
37835         0x554ee9bd,0x81ee4482,0xe6aa198f,0xeb2eee9e,0xc26c5944,0x7a5aa804,
37836         0x82ab167c,0xa0ef2da5,0x02fe21a5,0x5a2ab476,0x3370298e,0x169cb3b8,
37837         0x0eb3aa8d,0x86e6c544 },
37838       { 0x0b793d9b,0xede03321,0x1ddb5ece,0xf79fade1,0x68930b64,0xf73fda92,
37839         0xfe4fd1b2,0x06aad97d,0x92a4dc88,0x073a5b1d,0xbc976d75,0x8af8cbd8,
37840         0x63ce26c0,0x60b4abb1,0xdcb1fb06,0x9c8300a9,0xda95b3d3,0x335a594c,
37841         0xb37eac87,0x1f97d7d4,0x20eefaab,0xa3d2eba2,0xf3e828c8,0x3258c906,
37842         0x85ab7781,0xc832616f,0x8c28b617,0x72597192,0x3233b82d,0xcd7196bc,
37843         0x19fa126d,0x83867eb9 } },
37844     /* 87 */
37845     { { 0x22474edb,0x774fe73e,0x1a84e1ae,0x2a766394,0x9c6dd6e3,0x270329ad,
37846         0x14f8bf5d,0x00c4a415,0xd2267b90,0x3ce2ea37,0x11d24fae,0x12753015,
37847         0x263a1b78,0x7c14d854,0x1ae0b206,0x20c8401b,0x081f49fc,0xf32a011b,
37848         0x959c6df8,0x1e8123fb,0x800e1d06,0xa328dc7c,0x24259a9a,0x5876a378,
37849         0xb7ef6c37,0x23ada8b5,0xa93d4c9f,0x023f6b6e,0xffb6389f,0x89f5414d,
37850         0xe628b39e,0x4b26bba2 },
37851       { 0x5d318454,0xd30b1cb4,0xd7436cb6,0x123b749f,0x568a7461,0x3110c726,
37852         0x1c84fd1e,0xc85de123,0x08403d55,0xa5f8d6e6,0x9b1fabf8,0x395b6e13,
37853         0x3cfedce0,0xfe6d68c3,0x94b91110,0x1d90381f,0x2dcc6eb7,0xf0a8ea81,
37854         0x7e90ca2b,0x59e80413,0xc8a25c5a,0xbeb5fc07,0x5d84663c,0x009c253a,
37855         0x910b6a7c,0x00b15073,0x4108f8d5,0x8607da4c,0xcb901e65,0x02c3d9c3,
37856         0x2c9615c6,0x4d697bc5 } },
37857     /* 88 */
37858     { { 0xefa8fb40,0xe0db1ef0,0x5ba3989c,0x29021c5b,0x809d19df,0xa8d6fb15,
37859         0x4c1219e1,0x6b787b73,0x14ef05e2,0x6417e168,0x8f9796e2,0x449342db,
37860         0xbf84421b,0x2f878a5e,0xe94a4536,0xe71916d7,0xae119693,0x9818bba3,
37861         0x5768804e,0xec674be9,0xf8424f8a,0x0a26074c,0x466ce6ab,0xdbc93b9d,
37862         0xc920078b,0xb3f15a98,0x3870f1a3,0x9d10fd0d,0xe4e785a7,0xa61241d9,
37863         0xe6c8cd80,0x76ca87a1 },
37864       { 0xe02e48b7,0x4357fb56,0xcc09e9c6,0xfbd14b13,0x24069cf0,0xdb5f2435,
37865         0x2c3b01a9,0xf878165c,0xe6956dad,0xe549e7c4,0xbbd60b68,0xf2fe9538,
37866         0x059dc653,0x952f856b,0xb377fe9b,0xd3f60225,0xbfe908c4,0x6a0c7328,
37867         0xbc8f5f2d,0xce6aa2d3,0x24425050,0xf7213443,0x3d3b3ce5,0x17e1266a,
37868         0xc1677512,0x75b5e43f,0x37fb894a,0x15927062,0x2be3e375,0x15260753,
37869         0x6da3b7be,0x27e7f2c6 } },
37870     /* 89 */
37871     { { 0xe6a15883,0x638f65ad,0x66afdb33,0xd4a7e68c,0xd3f12de5,0x6207b6ab,
37872         0x37b87810,0x1c6ff950,0x64acf6d3,0xc0d44cb2,0xf2be78c2,0x163ac601,
37873         0x1636980e,0x1c63cc5a,0x95c9349b,0x3e92cfe8,0x41ec7220,0x7738e0d8,
37874         0x2d5fa961,0x6169d764,0xc3e028e9,0x2aa776c1,0xb16d5409,0x93dc5646,
37875         0x706df4d9,0xa0b27fb5,0xce9c6b97,0x9e991170,0x53c85f40,0xea8e42be,
37876         0x83246528,0x02e96437 },
37877       { 0xae78ea1f,0x91540add,0x7b670e96,0x51a1b74d,0xf7006826,0xf9936441,
37878         0x7d7520c7,0x8f97d6ea,0x69ce12e1,0x0faa6a02,0x79208342,0x2590aca8,
37879         0x75614436,0x7a483863,0xf381408f,0x07c6149e,0xd7853406,0x733bf584,
37880         0x9abbb6f7,0x8761b010,0xf528a09a,0xe4eb249f,0x2e00ae3c,0x08781ed8,
37881         0x2178effa,0x864c1b25,0x9d513a7e,0xcc1e62a2,0x1919062f,0xedb8b94e,
37882         0x4f16527d,0x739f53da } },
37883     /* 90 */
37884     { { 0x924adc5f,0x7a5f4a88,0xa818f56d,0x95646c16,0x7795f954,0x0ec49129,
37885         0xd19c5400,0x2b48753d,0x205912b4,0x16fa236b,0xe87a4946,0x6b3d65f3,
37886         0x045fd066,0xa7174a01,0x12a5e140,0xb6350313,0xa96b8623,0xa79c4b44,
37887         0x9ab003d5,0x7a339d65,0x3826f31a,0xc72f30c6,0x6f7090cd,0xb4e7390c,
37888         0x906ebe24,0x59ac6c36,0xbba4505a,0x39a7f06d,0xc58c413a,0x839991e1,
37889         0xa20e0e84,0x020c23ff },
37890       { 0xafc74661,0x120e4ada,0x277fc065,0x37bbcf63,0xb6dce799,0x41049cf6,
37891         0x7b161ba1,0x5b8d6b53,0xa9610fb2,0x22218431,0xdfdde769,0xde9ec9d1,
37892         0x42d80630,0xd32bfa4d,0x6244df4b,0x3885702a,0x45592dfb,0xcdedd1ed,
37893         0xfb4e01b8,0x0e1df45b,0x86e215b0,0x8f4bded2,0x6a937e6a,0x80935487,
37894         0x8130f723,0x415278ba,0x38a821f8,0xc6dc4692,0xfd8b4f8a,0x2207b119,
37895         0xf9269cef,0x76e7bf53 } },
37896     /* 91 */
37897     { { 0x27ebd187,0x5f128428,0xb65aadbb,0x8d3320ab,0x72258695,0xb042765a,
37898         0x8f0986ab,0xda3f33f9,0xaebff503,0x411807a7,0x825f71a5,0x25c776ca,
37899         0xff7df24b,0xc0de7bed,0x165f1fb4,0xda8b0f42,0x731f3ae3,0x5f3ff737,
37900         0x193e0a52,0x4cd1d7e7,0xb6b3ba46,0x8df84aa3,0xaa1f3782,0xba84b897,
37901         0xe7733ac7,0x6e7960cc,0x50981a21,0x4d46d6ab,0x7cbb80ed,0x1ec12c25,
37902         0x2b96ef09,0x79e7ad27 },
37903       { 0x8f30caae,0x3cd970dc,0x0a6ebef4,0x85cabcf1,0xc714616d,0x63c1863e,
37904         0x519e3a98,0x1c50db0b,0x64cb13d6,0xf39b8963,0x22547b69,0xdf67d81f,
37905         0xd67db0cc,0x7157abb9,0x889491b7,0xccca25ba,0x7a27e0dc,0xf689207c,
37906         0x0fd43281,0x34ae8fbe,0x5720ec09,0xa5d91f73,0xcdfd7bed,0xb2f61909,
37907         0x4a039e32,0x1ec10232,0xdb0d8fdc,0xd3c3d65e,0x4fe5005d,0x32c916c8,
37908         0x4c0bea94,0x7f8c37ac } },
37909     /* 92 */
37910     { { 0x43ac05e5,0x33ec1e54,0xcd8d3825,0xda4a4da4,0x88bf9e2b,0x86d88c0b,
37911         0xb53811dc,0x34d71dd0,0xa3c3aba4,0x655040d2,0xb61611be,0x2bc40949,
37912         0x279a4fa0,0x1c2d426e,0x3b065ac3,0x535a5aa2,0xc52ea890,0xdaa8a32f,
37913         0x9fddad22,0x5a5deca7,0x2ab3b26f,0x911f05fd,0xf37cd81e,0x5dace7db,
37914         0x90d16b8c,0x0e0e44e7,0xe4f5894e,0x15e68aed,0xfc92a74f,0xafe04999,
37915         0x970e7c2f,0x1d7703aa },
37916       { 0x3f0062a9,0xa8a4c81d,0xd96a20ba,0xe31eb2b8,0x864bd101,0x66dd98df,
37917         0x4413b614,0xba05f592,0xe9a555f8,0x51a67a0d,0x2e4b52d1,0xacc2f097,
37918         0x7184ab23,0xab5daaec,0x7c7f691b,0xce08b43e,0x76c427f4,0x520e530b,
37919         0xe423ebdc,0x7d352069,0x34df14ce,0x6b5e39e8,0x446305ac,0x3dcbf295,
37920         0xfe34cdc1,0x682cb2e1,0x111f5afb,0xd4ac45d1,0x47f296f9,0xc5ef63cd,
37921         0x93c20871,0x0a2c40ec } },
37922     /* 93 */
37923     { { 0xaf5747db,0x09bc384f,0xc06ab86b,0x3bad6086,0x9e7c1547,0xa406882e,
37924         0x55977abf,0x2d5326d1,0xda81deb0,0x063a9a05,0x524b6111,0x9a86e4a7,
37925         0x4ab2eb90,0x1402f87a,0xd5c600ba,0x7d0721d4,0xf289fdbf,0x1a2fd9a9,
37926         0xecde6f07,0xf5dce66d,0xdab9fa73,0x62171277,0x6c474bab,0x6d2dc49f,
37927         0x76eed033,0xdc017e1f,0x4da825d3,0xb97175c0,0x54b05e43,0x6c297e3d,
37928         0x56c9c87e,0x2efb4546 },
37929       { 0x8b21c064,0xa4712b00,0x4a70629e,0xd186fe42,0x9b74f0af,0x6435b340,
37930         0x7ec9e629,0x6965aa43,0xc4c60d08,0xdda14673,0xbf3057aa,0x0b656670,
37931         0x3ce86f60,0x7f05e840,0x04401a16,0xc05073a9,0x294e607e,0x16b1e638,
37932         0x69cf7046,0x20783252,0xe8ce7d3a,0x2941141b,0x7577053d,0xd38ad8d3,
37933         0xcaa6630d,0xdba68fb3,0xe9504350,0xecbeaff1,0x1d2d760b,0x9f5166d5,
37934         0x462891e4,0x337532ce } },
37935     /* 94 */
37936     { { 0x3a00bb9b,0x3f111853,0x45f66685,0x2d2ffbae,0xd4aee24d,0x9ae11a85,
37937         0x0341856e,0x18ba1e1b,0x2731349f,0xa9ac8178,0x545715b5,0xc13dfd4a,
37938         0x5daad2ea,0xa5f7423c,0x535b76a7,0x30a483b9,0xff873e9b,0x92e9ada4,
37939         0x723a1055,0x15662d84,0x8edac4e0,0xb935497b,0x39d8fa70,0x61b6441a,
37940         0x40d1589f,0x1541d756,0xf0a05f0a,0x62994237,0x6bb28908,0xfd8b0034,
37941         0xd4cd32bf,0x192a2b5d },
37942       { 0x365ced07,0x63576628,0x05de1d1f,0x029f32fb,0xbf40a7aa,0x6d17b9bc,
37943         0x9bb50a47,0x1b1b2a08,0x795a6278,0x9389abbb,0xb34fc19b,0x52cff60f,
37944         0x387d8739,0xf3ab9492,0x6920ccd6,0xa8f053e6,0x63a9b4f0,0x3ef2dd4b,
37945         0x51e82129,0x9ab0ede1,0x0838bfa1,0xafba0c0b,0x9ffc11be,0x2bd5a7ac,
37946         0x95cc0878,0x058bfd95,0xf8c2f0c6,0x686d48a3,0x1d9b31ba,0xc33abaaf,
37947         0x3bc0c268,0x632e2289 } },
37948     /* 95 */
37949     { { 0x15a1ccca,0x1c851d20,0x7e522bc3,0x4efe290c,0x18eab053,0x0b741d55,
37950         0xbc85e217,0xae656197,0x01cf8b29,0xae13141e,0x66948478,0x2e2cb593,
37951         0xc31bd8ae,0xeb57bb0f,0xc264e788,0xdecef5d6,0x9cb96d86,0x6fa856cc,
37952         0x279183da,0x2db16813,0x383d796a,0xf03f3820,0x1d0c6fed,0x58a456ff,
37953         0x8a6abd9b,0x25589805,0x83f96f19,0x339f52c5,0xda7e9ea7,0xcf6ded8f,
37954         0x5d1ccd45,0x68c3d9c1 },
37955       { 0xe6b392b7,0x67e26265,0x775d9509,0xcec1d9bf,0xd76514f7,0xe16abcd4,
37956         0x0de72e1c,0xd86f59b2,0x1adfb033,0xa66e43cd,0x05e457cc,0xdb344340,
37957         0x5681daa2,0xb67a7916,0xf0114731,0xc32e7bab,0xd3b1e961,0x066fe16e,
37958         0xf63d26e6,0x924e298e,0x541add6d,0x9bea0dd8,0x9982f971,0xef9500df,
37959         0xc5f076ac,0x5c876e63,0xb23d396b,0x55e12ae5,0x2ec6747a,0x09efbb36,
37960         0x233286a5,0x8f2055ee } },
37961     /* 96 */
37962     { { 0xb82c1af0,0x4a4ab9e3,0xf2cae264,0xfc65e9e7,0x60187d46,0x4feaac0a,
37963         0xe393b363,0x27d3f335,0x819bacce,0x9c9f7c00,0xb8aa6611,0x3f7418b5,
37964         0x372aae95,0xffa94557,0x8db38589,0x937d7804,0x6f1fbc1c,0xd10c86df,
37965         0xa2f0a0ce,0x48aebd89,0x367439eb,0xae5d5fa2,0x3f17d2d8,0x103a6a0b,
37966         0x411d9894,0xf233f68a,0x218b67a2,0x7fece8b3,0x2319bf06,0x0422540f,
37967         0x340d322e,0x1292c8c9 },
37968       { 0x0386463d,0xf5eb5587,0x0371d97f,0xd4bbc2b2,0x0b819c5a,0x1b364571,
37969         0xcf04ad41,0x0cbb42d6,0x66939ec1,0x5d819c76,0xa01847e7,0x8745ac13,
37970         0x1c7232e4,0x4f704b02,0xacb05780,0x2c9e58a0,0xb561e295,0x9523b8b3,
37971         0x79f9ba35,0x3384df00,0x1eaa9628,0x78231fc2,0x8aea2b90,0xa2eac54f,
37972         0x30d1c263,0x8075ed77,0xfb339000,0xacb44ed5,0xf011293a,0x92546ac2,
37973         0xeb821764,0x7c78762b } },
37974     /* 97 */
37975     { { 0x067902b6,0xb8f7d6fb,0xd1735980,0xb2823a43,0x59741ddd,0x062cfb12,
37976         0x4033f95c,0x6e391b07,0x68589b8c,0x3831d0a3,0x522290f2,0xe3474d49,
37977         0x222e1f3a,0x4dab14d6,0x53f08d39,0x8f00fcde,0x707f28f5,0x559917ae,
37978         0x068e607c,0x166aa0ba,0xd7e1f824,0x602713e7,0x4d6a328f,0x7c255540,
37979         0x9890cd2a,0x0d2e3264,0xeca0b20a,0xf2207944,0x52f4e09c,0x5c98dc07,
37980         0xd84de81d,0x69403504 },
37981       { 0xe5407206,0xf8b7b366,0x0d88fa8c,0x1ecf54cf,0xf7272e6f,0x6fefe548,
37982         0x81ab4468,0xd6531372,0x4e474408,0x52cb5f0e,0x6490737f,0x9e426b3a,
37983         0x4980d071,0x2576c19b,0x0f272caf,0x91f34628,0x468f31c9,0x78e60a4f,
37984         0x90844d89,0x8776a329,0xb951582b,0x8a55700c,0x14b1adbf,0xab1af365,
37985         0xfbd343ef,0x22ebff92,0xb7d81f34,0x32f9fb01,0xba6b30e1,0xad850e06,
37986         0xbc5f9546,0x6da9e027 } },
37987     /* 98 */
37988     { { 0x5c9490ce,0x21eee4c2,0x0df68381,0xa96ec4a3,0xa4a9368e,0xe6c607e0,
37989         0x4bc262f3,0xd8b0492a,0x460c34ff,0x0846a210,0x28df33cd,0xf7ff7a64,
37990         0x21827612,0x10c55044,0x149bcd01,0x9d25fce9,0xcfc613dc,0x725611cd,
37991         0x97f51ce5,0x159f7e88,0x4e8c08b5,0x3fa3bf31,0x75e7538f,0xea156115,
37992         0x91c84020,0xd1e0a951,0xcf02ad0a,0x0d2268ba,0x058b8e5f,0xa04c6ac4,
37993         0xb3515912,0x773b40b9 },
37994       { 0x3631cfd2,0x00ff2cdc,0x807737bc,0x14c4c2d3,0x338a5270,0xd600616a,
37995         0xb32cabde,0xd0e3306d,0xa70b17ca,0x336738ea,0x79f353ee,0xf2f4aa8d,
37996         0x576f3ad3,0x712f6ad9,0x89b2bce0,0xe4279852,0xda92ca30,0x05d8f94d,
37997         0xd8492dd9,0x9891d475,0x4d15e4bd,0x3e06a5ca,0x254eabbd,0x4725d4eb,
37998         0xc0ed513c,0x31394ace,0xbbfaae6c,0x7e0f9859,0x833fd137,0xdc125546,
37999         0xc56c4f75,0x12b46385 } },
38000     /* 99 */
38001     { { 0x932951de,0x810dbebd,0x5aa69c94,0x96959d42,0xecb2f08d,0x5fc49c04,
38002         0x2250b82c,0xac74f0cc,0x3aec4e1d,0x96a439a5,0x90499acd,0xc33cab9a,
38003         0x54d9b3af,0x2fccde66,0x3863ae8b,0xf4af285c,0x46febf88,0x2373373e,
38004         0x3c9ab7ed,0x751d672c,0xfe12020c,0xc1c51130,0x52f3e56e,0xad82402f,
38005         0xa4a64a81,0x3489ab7a,0xd9f163f2,0x0a1fb661,0x0e553317,0x17c69be1,
38006         0x7d88d417,0x61c1935e },
38007       { 0x3492ae43,0x2e722d9b,0x0538f05a,0x1ef89d95,0x200aab63,0xae77e588,
38008         0xeba4b117,0x2872c120,0x3a461cb8,0x5c2432c8,0xcb938f26,0x315b3434,
38009         0x8c4c7dc0,0x05bf2ac5,0x596b378d,0xd2e501dd,0xcb890c30,0xa8506c9f,
38010         0x7c361f0c,0x3d0af461,0x5a35cbae,0x21f7b718,0xf3fc0138,0xbd1035f1,
38011         0x8b248edf,0x74628af5,0x48c9cae0,0x8d6421d0,0x2ca18773,0x75e3da39,
38012         0x71d3db94,0x27ad0df2 } },
38013     /* 100 */
38014     { { 0x305b5aed,0x9e3bda79,0x5998d6a7,0x2c67d4a4,0x0f7eb700,0xc855e1d3,
38015         0x147d1c44,0xc18a7e9e,0xc89540ed,0x3ea99618,0x7e6bfd20,0xa53be20a,
38016         0xecc14437,0xc9487e64,0x34ef85c6,0x72979207,0xd5e1ebd5,0xfa0d4e71,
38017         0x4d48d6b6,0xfda2b1e6,0x66e200d4,0x782a1e05,0x5a5366a1,0x2a3c70da,
38018         0x1a473738,0xfe3fbd2b,0x7fe020e8,0xd7ef8c06,0xeacfb665,0xec686fde,
38019         0x6dd1542f,0x5d9b5e27 },
38020       { 0xcb3e472e,0x3637c5a5,0x30a1405e,0x2153d927,0xb4498558,0x009992e5,
38021         0xf39a0851,0x18f00ccd,0xb5c6c560,0x26237c11,0x1343540e,0x418ed408,
38022         0x7e7f3184,0xfef7cbf0,0xbf48576b,0xecd92366,0xbc94c91a,0x1b75be1a,
38023         0x4a162276,0x8e1778de,0xc5c6bcb8,0xc52e57d3,0x5ab71858,0x5cc382c7,
38024         0x3f6e39f9,0xe12c2c28,0xd62735fc,0x4c7e0ef2,0x835a5996,0xe071deb1,
38025         0xcbb8c766,0x24f891cd } },
38026     /* 101 */
38027     { { 0x6778c1e2,0x24ef60bf,0x00d5be5c,0xff49c03d,0x2f01a09f,0xec11986e,
38028         0xae096e58,0x59a728a4,0x7077984c,0xaabbcedb,0x870ca5a5,0xfb473bd2,
38029         0x4de30e3d,0x8c928c61,0x4f67abca,0x3fae7f9a,0xec21a9cf,0x83c2b2eb,
38030         0x9cd9b5de,0xafa70d62,0xc60b18df,0xadeaea59,0x4049b54c,0xd5fef7be,
38031         0x6dd310e3,0xfceebc76,0x8f6321cc,0x7748efe3,0x18ee8af5,0xfe9c32b1,
38032         0xd42df612,0x863ac3cf },
38033       { 0xb85a2fe2,0x0a36fca7,0xee429dc6,0xf3e70d08,0x141c3944,0x8c9ba209,
38034         0x67272a0a,0x306a8106,0xf968bd06,0xe69a1555,0x153c603d,0xb86f7e47,
38035         0xef56e4fa,0x9706614a,0x98780b4c,0xc0dc36b8,0x3a1d3263,0x43657fe2,
38036         0x435522c9,0x01f97a86,0xedfef679,0xd91897f6,0x6daa17a0,0xebbe31d4,
38037         0x85accfbd,0x6f179100,0x8f9fc1de,0xe0da6e32,0xe1e7142c,0x1c9d53db,
38038         0x8b86725a,0x3e3f1b1e } },
38039     /* 102 */
38040     { { 0x7b7fbf05,0xb7ea15c0,0x1f1a3882,0x992f11b6,0xd1dcd1bc,0xc9ddd95a,
38041         0xad0f7e8b,0x31f5b7fa,0xfca7ab79,0x2936e5eb,0x19a55be6,0x30f417dc,
38042         0x43cde554,0x1f6f4e43,0x82f044bf,0x971f5e65,0x4288c408,0x73c3b8e4,
38043         0xb807f575,0x61aac59f,0x818b58f0,0xa64ee2dd,0x97a3b0d3,0x6f7a0a60,
38044         0x0394b058,0x8b85ecc8,0xbfb3517d,0x9a059474,0xa79c3f06,0x89ad5977,
38045         0x700a8025,0x81208ed8 },
38046       { 0x14c4ce37,0x10935099,0xa1aa48a6,0xf34bb843,0x580d58e8,0x86007024,
38047         0xb375b8ba,0x6db42c49,0xed3bde83,0xac365524,0x649233b6,0x5521e1b4,
38048         0x64dd946f,0xbc7cc5d5,0xbfb5b6ae,0x9c14b035,0x0146c1a3,0x7f22ba18,
38049         0x872214f5,0x0b62fbbc,0xb4921764,0x3acfd7f7,0xcb4d6df1,0x5ff10da1,
38050         0x62600a91,0x660e2620,0x81d9167f,0x7ac7da9d,0xb6e7a199,0x6e8e260c,
38051         0x80deb3c2,0x44383fb8 } },
38052     /* 103 */
38053     { { 0xe44f9af6,0xe107f01d,0x8cb1fa1c,0x36381a4d,0xfb7dd493,0xe65be3ec,
38054         0x26a8839f,0xd0b8435a,0x3ec789d8,0xee60f915,0x2bcc5e1f,0xe25fea50,
38055         0x7e44a81c,0x0477c0c5,0x230ba5b8,0x349e9f83,0xde180dd9,0xdd42f32f,
38056         0x64a3d11c,0x8b039eaf,0xbeb7083a,0x80ef884e,0xf12742cb,0x288e60c4,
38057         0x720a0262,0x44156cc5,0x7253b77f,0xcd547de6,0xa6013a59,0x9829a6ec,
38058         0x0d548445,0x8aee708f },
38059       { 0x32c54409,0x18f22d9c,0x75ebaac4,0xa9ebfa46,0x86284981,0x90e2e928,
38060         0x6b3a8e0c,0xd0201f6f,0xbd77641e,0xc973016c,0x70170575,0xf926f2f0,
38061         0xfec0ce01,0x4984048f,0xf319d304,0xbf696211,0xc91a88c4,0x74b5c844,
38062         0xe0030a82,0x4c40fbce,0xe4f6d521,0xbed67525,0x29d67d1e,0xaf7e47cc,
38063         0xc21d3536,0xfa307db8,0xbbb29405,0x56b6c46a,0x033e805f,0xf059a7e3,
38064         0x6096a5a0,0x970f61fe } },
38065     /* 104 */
38066     { { 0x1bec8e4a,0x1bc53d23,0x35a6034c,0x8809ac14,0x509e464d,0x4ee081da,
38067         0x8a488235,0x496ae1fd,0x325864b6,0xa1ae9863,0x74cd069f,0xbaca13e9,
38068         0xb1d8a6b4,0x3738cc58,0xe76b9da4,0x5fa71f58,0xc7eb16fb,0xc919be88,
38069         0xad4e429d,0xf5c8f13f,0x2499f9ed,0x4583b671,0xa10d8bd7,0xbce20115,
38070         0x5790bb7e,0xf66d7605,0x482b78dd,0x9316aede,0x75f855fa,0xe0d8fb2d,
38071         0x5a7dcca7,0x404b5b94 },
38072       { 0x517a15c7,0xf9ee682a,0xef880202,0xaae4cfbc,0x5106a354,0xcee2c139,
38073         0x170febe7,0x5de60192,0x73d0c54b,0x589e39fd,0x8c9092b7,0x195c7135,
38074         0x0a7bfe5f,0xcb7ed53f,0xf61cc979,0x2bd9242a,0x5395f7d9,0x8d2ef16c,
38075         0x70b32f09,0x0d4ac1ca,0x52d185c1,0xa587526d,0x942d6195,0x2932b04a,
38076         0xa500b0ac,0xfe25a979,0x562fd230,0x5fa1f4ae,0x20da253c,0x60f55af2,
38077         0x83146002,0x7faa11b5 } },
38078     /* 105 */
38079     { { 0x6e402149,0xb0ba4f0c,0x963cc119,0x3584cc1d,0xa6527476,0x7740dc1a,
38080         0xc95715f2,0x3f77ff75,0x3f89fb0e,0xb2f234ad,0xef9be3ff,0x55159032,
38081         0x04237e82,0xfc9fb21d,0xa153ed93,0xeb2eff38,0x10041d13,0x89d53ae0,
38082         0x7f1bd828,0xcf2e545b,0x43953ea5,0xdd4a27ce,0xd85e75c8,0x00d2e5d4,
38083         0x241be1c3,0xeb93ed62,0x0242032d,0x1e53f25f,0xc3a4e701,0xb9957636,
38084         0xed98febf,0x14b63a52 },
38085       { 0x71c43336,0x7610b553,0x23a4824b,0x19dfd4a6,0x0286051b,0x7b97a2e0,
38086         0x8f5f1edb,0x86abbb9c,0x9b67daad,0x67a57d77,0xcd5ffafb,0x8ace506d,
38087         0x89ac3c63,0x85da9f95,0x75a3d150,0x081cbaa8,0xe9346ed2,0x03353d8f,
38088         0xa1f9a02d,0xb2ab61f1,0x3a659c71,0xb0cb0937,0x4f5df8a1,0xb7e0e30b,
38089         0xeb7d5a1d,0x77c4c741,0x728e5cf0,0x8f046c9c,0xf7c171ac,0x32dd0bc7,
38090         0x836d2655,0x02485873 } },
38091     /* 106 */
38092     { { 0x75a4cd8d,0xcd40dd23,0x97bcba78,0x132ca433,0x258d61f5,0x30c5cd84,
38093         0xda1e8e68,0x0a7ec059,0x1d65d40a,0x07a8f171,0xf4350d76,0x869e655e,
38094         0x5983ae42,0xb98ce6f0,0x9d8bebd0,0x7b61391d,0xb1ba5d49,0x3a529e25,
38095         0x1f6b2cf6,0x46f732e9,0x3fa3b629,0xbd66ec6a,0xc3ef0ed2,0x397950ec,
38096         0x5f08b476,0xee9008cb,0x965a0e2e,0xfd6be425,0x1177bc87,0x78ed513c,
38097         0xfe512dae,0x6798cedf },
38098       { 0x1b97c5c6,0x49e3f8fd,0x78c3b33f,0x39fbab3e,0x40f595ba,0x44274412,
38099         0x5d7d4376,0x174225b9,0x79c44777,0x880b3fcc,0x3296b245,0xdc3aca83,
38100         0x1734e184,0x55913df7,0x9c934472,0xa4db23d3,0xd1420a11,0xcebb3733,
38101         0xf3608bdc,0xb9d20cf9,0x30cfe13f,0xa618acf6,0x5f30874c,0x75f06b31,
38102         0x9f0005a5,0x506efe7f,0x01bfc9db,0x8aaea78c,0xf78e7c41,0xf9179255,
38103         0x52e96395,0x3ea7aed2 } },
38104     /* 107 */
38105     { { 0x5b06ae25,0x98617e04,0xcb5750ef,0xbcac148d,0x604c2ba2,0x91ea2f0e,
38106         0x76b78975,0x00c19f6b,0x651da181,0x79b9b6d0,0xc945705b,0xf3225beb,
38107         0x5c005bf1,0x30b435f3,0xbc24d86d,0x440b4482,0xd6373777,0x2b8f0996,
38108         0x1c44b4dc,0x65fd6c56,0x30906999,0xe9405ee6,0x08aa1ec1,0x19ff0924,
38109         0x3d2f2895,0xeef3246a,0xbc746797,0x016c3765,0xd0705f7e,0x62d2569f,
38110         0x05250044,0x6a8ad39c },
38111       { 0x46be7282,0xe45f020d,0x21380f12,0x9405afed,0xd5da6ad0,0x4cdca5bd,
38112         0x7f8be61e,0xc2d6f184,0x596b8178,0x20132953,0x7a8df954,0x8d3b1e7b,
38113         0x39572b4d,0x757c61bb,0x80cc3b56,0xd749b57b,0x37b3ffec,0x9590ff93,
38114         0x145dc94d,0x39bbb653,0x2335e573,0x70c1c606,0xf763feba,0x9c2e72d7,
38115         0xcc61b732,0x4768e424,0xaa73f2ca,0x777d2fa6,0xc5cb58cd,0xdee4dbaa,
38116         0x9cfae1aa,0x1a181179 } },
38117     /* 108 */
38118     { { 0x77575ed0,0x6f6ff62f,0x7d1da99b,0x18f14fa9,0x69efd7f6,0x2e72aefb,
38119         0xddc28633,0xc45ab4cb,0x586c5834,0xb0e20d48,0x39775dd8,0xd397011a,
38120         0xf4134498,0x0130c808,0xf5115ed8,0x2d408eba,0x0260ded9,0xc506a05c,
38121         0x19cab911,0x9e5b7362,0xe8693a86,0x4cf508c6,0xcc773617,0x4e71245f,
38122         0x95d89ca3,0x2f71aa1f,0x607bbc98,0x4bba7c6a,0x212b7fd2,0xf3a515e7,
38123         0x9230f5a8,0x7d2ddc75 },
38124       { 0x4ed2cae8,0x3d05816d,0xb9c00377,0x4cf6bc7d,0x646b08d4,0xc23e98e6,
38125         0x4b9c0180,0xf9ee6c61,0xef9179c1,0xe11c9a13,0x8ed9688a,0xa5b6147e,
38126         0xd06670a7,0x7afeb648,0x17685275,0xd670333c,0x75f9e8f2,0xa89dd969,
38127         0x37a68ade,0xbb57228d,0x454cb186,0x21a05d5e,0x063dd550,0x4810158f,
38128         0x4cb6caf3,0x92dd4f08,0x7854abe7,0x70c4d852,0x6e729d76,0x845969dc,
38129         0xb1bf40ba,0x5a52f87a } },
38130     /* 109 */
38131     { { 0x09ecacbd,0xed019e91,0x7b89bdea,0x6544023d,0x5707371e,0x7cc51f0b,
38132         0x16c8e217,0x14832b04,0x81259ab5,0xb1aa6682,0x23e361d4,0x6e100f92,
38133         0xe3a95c2a,0xe593eee9,0x16c10e26,0x699b6bbd,0x9473a13f,0xad487873,
38134         0xb274987c,0xf1c14dc5,0x2559e2e9,0x57dc0075,0xc3d47ad2,0x8449849d,
38135         0xdd527793,0x83df278a,0xeefd5b99,0x770e3ec8,0x76bd02a0,0x2ae58446,
38136         0x3e705ffe,0x17f02764 },
38137       { 0x29abea1f,0xdda4010d,0x2407ac4c,0x636b9695,0x0433218b,0x96a60129,
38138         0x163d534a,0xf221fc3b,0xccc20565,0x05ba15be,0x96285577,0x1238e54d,
38139         0x878804d3,0x1b144257,0xa89a9fe4,0x96fbf304,0x4be642b1,0xc8a7f06c,
38140         0x6e2b085e,0xdd1a20e8,0xff4a591d,0x8f7f27c2,0xa4a343b8,0xc17b0753,
38141         0xbb173d4d,0x684b1e88,0x3dc07bbe,0x3accea44,0x4c441d77,0xdb15c88d,
38142         0x53e5957e,0x0ef0309a } },
38143     /* 110 */
38144     { { 0xfa8e5b60,0x4fc25721,0x691c0bb2,0x646938ad,0x0b0a2248,0xe46d4b76,
38145         0x7de16877,0x863f9ac2,0x2721c630,0x503bb6ef,0x0b67fb02,0xf8c199df,
38146         0xe07abd39,0x78c1ed72,0xb32f0dda,0xcf9deb7b,0x6c3c89f3,0xaff726f0,
38147         0x1972225a,0xb7008b2d,0x4f145f5c,0x8f5a6117,0x457c4f37,0x4e0e6f8c,
38148         0x1c453c64,0x8bbdaa44,0xa6e92c80,0x57be326d,0x5d773561,0xa9bc3fd9,
38149         0xbb37b72a,0x3d3b6cc6 },
38150       { 0x9722c880,0x6e6f12cc,0x286b6889,0x3a1b6ae7,0xad2fafec,0xba1cc09b,
38151         0x43bb8bef,0xad64ad7a,0x97c3f4c3,0xa5af6a00,0xc353a91b,0x2afcb0d9,
38152         0x69ccbf6b,0xca13fcab,0xf2abc190,0x699a1391,0x23a247e5,0x2dbd5542,
38153         0x95488d9a,0xe206180f,0x1244cc3c,0xba9e7bff,0x87d3a365,0x29297abe,
38154         0xfa4ca5e2,0x4054fa38,0x67be1b6c,0xb390623d,0x78f41a44,0x1fa67c57,
38155         0xc7b544e7,0x2e946e43 } },
38156     /* 111 */
38157     { { 0xc60934ae,0x2980fddf,0x164206d1,0x2c3e7eff,0x416ed75a,0xf75e7f96,
38158         0x5cd0b2dc,0xfac60cf3,0x1faad87b,0xddc4bece,0x9849e5dd,0x753fa87c,
38159         0x2c1bf1ae,0xc5d516a3,0x14732b4b,0x565dbea8,0xce48696b,0x007ebe3a,
38160         0xcdb97694,0x40ca74d6,0x65e4e7be,0x3f5cd270,0x3aac4ebc,0x74847c01,
38161         0x43d6c3a1,0x6762e034,0x467a076a,0x690d8c95,0x1eda677d,0x768d78d6,
38162         0x0181d8c2,0x0997ce55 },
38163       { 0x965a0b81,0x9297746c,0xe5e12dfa,0x48b58be6,0x715f437f,0x5573b3c4,
38164         0xb565c459,0xe425e907,0x1582797c,0x4f43f512,0x8ea5474f,0xe5dafa6f,
38165         0x13de04ac,0x2aeb8fbe,0xe8a07c83,0xed7f95f0,0x662c09fe,0x3e012a6e,
38166         0xc742cf17,0xbf96e9b8,0xe28a1c45,0x8ea5759a,0x5cf4e2f3,0x475941b4,
38167         0xf901a019,0x7dd3c02d,0x70916b2e,0xe7a4deea,0x2fa9b988,0x50b272b5,
38168         0xd0917fe6,0x96f9f09f } },
38169     /* 112 */
38170     { { 0x2c310a96,0x78e8aac4,0xf7a2a734,0x32a98303,0x23962207,0xc46ca83d,
38171         0xd9541280,0xad131e6e,0x2cabe911,0x5791fc5e,0x841b6c68,0x50cb77eb,
38172         0x3d3c8878,0xaff93dea,0xf1007bce,0x06541f1d,0x55cdf1fd,0x4ee729c2,
38173         0x323e3972,0xe0f71317,0xad4d08c1,0xa2de7a41,0xa35e22bf,0xa9912abf,
38174         0x89b03325,0xa050122b,0x06514d4e,0x8b9e51f4,0x79d3e0ab,0x423c7aad,
38175         0x40b8fea5,0x71998e26 },
38176       { 0xceb6ed78,0x40140fcd,0x18534516,0x653cf377,0xe8d60dcc,0x0450b65a,
38177         0x9dac55f8,0xce6c1a76,0xae05686c,0x8a96a92d,0x12712562,0x2fe44762,
38178         0xa4f39425,0x747bcb50,0xfc531fc2,0xf0ec6ff2,0x10fe9ff0,0xc97c3447,
38179         0x9c792cff,0xfb488783,0x026fb019,0x552c5248,0xd804c290,0x4001a29c,
38180         0x35c8ca73,0x742b5ad8,0x6ee5dfa0,0xc3781f17,0x3dfa4ab1,0xca6b85f0,
38181         0x0b0d32ac,0x8389941a } },
38182     /* 113 */
38183     { { 0xde067dff,0xc0f062a2,0xbcb80162,0xd4f32690,0x0707a2bd,0x98cd990d,
38184         0xfae4a391,0x5afc63b8,0xb32ad814,0x684f1b7b,0xf199dfb1,0xb0a2dce2,
38185         0x48f25848,0x2260e17f,0xc2d5e862,0x7393db00,0x338cf171,0x9e88f854,
38186         0x02acf522,0x00679429,0x6835af3d,0x19157cb8,0xb8a2614c,0x2faa6f92,
38187         0x134ec46c,0x04ff95f5,0xfb7a8135,0xcf00626e,0xb37a4704,0x454b3d05,
38188         0x2694ec25,0x1fbfda31 },
38189       { 0xc8f69c77,0xfdebb657,0xa3df88fa,0x92a8278b,0xc1fb78b4,0x463b5571,
38190         0x11c71a33,0xd2066a1a,0x089958b0,0x10c88143,0xcf9d67a6,0xb975c7e0,
38191         0x73037b8f,0xdaa5d208,0x40bf5861,0x5ee5005d,0x7dba69a9,0x300e6ce7,
38192         0xc962cc74,0x893c3cb3,0x4cf84055,0x0ac98629,0x225c9d70,0x0a7ef63a,
38193         0xb91e47e8,0xfe184869,0x8c2f84be,0x1b9d7deb,0xc0e278bf,0x67788915,
38194         0xc426f19e,0x4f9488ca } },
38195     /* 114 */
38196     { { 0xdd51b8ce,0x610dfcd4,0x36230e80,0x08579278,0x36599562,0xedc7ff1c,
38197         0xe2cae877,0x905ead4b,0xe7967608,0xa1c325d9,0xbd38926c,0x3e39eddd,
38198         0x5f6f0a4e,0xda92c868,0xf47a0fa4,0xe16f800a,0xe5f60aab,0x50b4db5b,
38199         0x983853d3,0x3665412f,0x9b79789c,0x64b62250,0x4e0e72b2,0xea560058,
38200         0xe555c2bb,0xabbd4901,0x17292e11,0x378419a7,0xe174218f,0x6e0b5aaa,
38201         0x8f796b92,0x688e0684 },
38202       { 0x313b8f64,0xcdfef641,0x942c7462,0xaef11b7b,0x5c0d8abd,0x067cfb77,
38203         0xaf4041a9,0x608ea5f0,0x6935210f,0x23d5bd82,0x27917a08,0x5ab904fc,
38204         0x45d22d21,0x85dbb1fe,0x4d36159f,0xc3d5e509,0x1d39b8f2,0xaebb528e,
38205         0xf44acef0,0xdd5ca828,0x20c57a54,0x24209adf,0x78f95f44,0x5742b433,
38206         0xa9337d37,0xd11fa7d9,0xc64cfdb7,0xd66a0c09,0x9bb817ec,0x56e55b8f,
38207         0xe4c41265,0x1723c7e3 } },
38208     /* 115 */
38209     { { 0xdc8b43f3,0x9a6486d8,0x26409e68,0xfc3e0e61,0xd9b46003,0x1889c437,
38210         0x6284ec7b,0x3a850335,0x6a9dbaea,0x5a3665c4,0xe978933c,0x7bf6941d,
38211         0x69341490,0x1ed5a510,0x8cb8002d,0x664a7b7a,0x60ed0a59,0x603f76e4,
38212         0x1f4ebf27,0xc3e06ba3,0xf2c38a7f,0x296ced41,0xcf1db08a,0x2ac18f79,
38213         0xcde7a3b6,0xc919e882,0xdbf68b06,0x15e77d29,0x4e947cb5,0x21978baa,
38214         0x7630993a,0x84bf542b },
38215       { 0xe364f21e,0xc1decda9,0x012e557e,0x0d6cf345,0x588f90e1,0xba246848,
38216         0xe3b104b8,0x9f6dda4b,0xe3aef57a,0x6bf7a346,0xe8327ea9,0x210299fe,
38217         0xda95e6c7,0xaa99f487,0xd2cdf645,0x24ff813e,0x8bd414b8,0xd1dbb2d2,
38218         0xcafa1a61,0x065101af,0x9cdebda4,0x7d9f4b9a,0xe41039e4,0xaf41b395,
38219         0xc50adf42,0xe3e9e6ba,0x341e9e49,0x4f2133ae,0xcb157f23,0x4968c0f3,
38220         0xda068153,0x383f827b } },
38221     /* 116 */
38222     { { 0x6583ff4c,0x2ec46a21,0x4ad709e7,0x4e645a29,0xc04ca12a,0xdc66e9cf,
38223         0x9160a7e5,0x82f128f4,0x569c762e,0xbfb227b1,0xc2edb8e7,0xf80c7963,
38224         0x49a0f688,0xa7dafe06,0x2d14b8cc,0xb7e41754,0x86de40be,0x3a0c5c53,
38225         0x1db79331,0xf0d05286,0xfbfe071b,0xb902ce69,0x210e9903,0x61e46956,
38226         0xf703ebb8,0xfaef874e,0xdd5f78b6,0xf668947e,0x5af5ea3a,0x6fe86547,
38227         0x43f94625,0x3b121f15 },
38228       { 0x659275e9,0x5b26e847,0x6d0fce50,0x47581cfd,0x8aa3f1ef,0x55f5cbfd,
38229         0xe484e60e,0x1e7be315,0xfe9698e4,0xd8f1a20f,0x7ab04784,0x25d46da9,
38230         0x834cdb3e,0xa526db75,0x8d08a009,0x1fd408d9,0x5b5ca816,0xfc004b20,
38231         0x65e4bbe8,0x5b3e3bb3,0x759bb6ef,0xf50cc125,0xc2fac737,0xf05fa817,
38232         0xd273951a,0x9ee102d2,0xfecb3367,0x2a8e540b,0x2a6a515f,0x673446fb,
38233         0x37290c83,0x5505e1d1 } },
38234     /* 117 */
38235     { { 0xd15e68a6,0x0c3014a1,0x64dd35e5,0x6f9f0b26,0x03ad67f9,0x18c3742d,
38236         0xd2c14484,0x74818c0e,0x0d41a3cb,0xc5181169,0xc49f3e9e,0x65c8c83f,
38237         0x2c279386,0x9b260c61,0xced04e9c,0xf6086fae,0xfd7c4758,0xa7b2cceb,
38238         0x90297fd8,0x4b3c3133,0x09701ac8,0xca8264e8,0x508b3762,0x9f976a87,
38239         0x983a8dfe,0x5d582714,0xd9d598e9,0x350d2669,0x0f6fd348,0x85cb89cb,
38240         0xa574317c,0x617d80d4 },
38241       { 0x70022b67,0x4cef267e,0x3768b94a,0x80536bb5,0xd2784462,0x3153a566,
38242         0x38243919,0x49054d44,0x5df78c4a,0x8d11e172,0xd5a1e35a,0x9b252a71,
38243         0x8171e31d,0x07866c80,0x1b38a00e,0x0a8501db,0xce770236,0x2ed932b8,
38244         0x8edaf7d0,0xa2d77609,0xb93006e9,0x3aee5dab,0xbbfeb036,0xfaffc8c4,
38245         0x4e21b38b,0x077b9678,0xdca8e069,0x491fc59f,0x0e938471,0x3f624f55,
38246         0x7cd1780b,0x5156f508 } },
38247     /* 118 */
38248     { { 0x0206e8d0,0x58234e22,0x7f15af32,0xf5f6f5d4,0xd638950f,0xafab7289,
38249         0x7d4495f4,0x66ec4d09,0x68da80a9,0xad890c5d,0x64f8a36b,0xe4aa0920,
38250         0x0f4d5c5f,0x799e257e,0x24495e31,0x44c677ae,0xa5b8e352,0x720387b3,
38251         0x75a287b9,0x703790f4,0xc3c1f2f7,0x54895cc5,0x41a7fa41,0xb8680f9b,
38252         0xb00b008b,0xfcd47458,0xba6473cb,0x149cc838,0xac9be19a,0x78ed5f7a,
38253         0xb33765ba,0x5254599c },
38254       { 0xa21b54c4,0x08739679,0xb6497d9d,0x029ece2a,0xc8488640,0xf14f1a92,
38255         0xe9fa79d9,0xae48dcff,0x46c208db,0x14b911c2,0xdae3f69e,0x5ab0fbf2,
38256         0xd1edb838,0x180ac87e,0x188586bb,0x146fd718,0x5467cbd0,0x210eb654,
38257         0x1667cfee,0xaa239408,0xb73d1a60,0xdb125c1a,0x881c1cbe,0xde685300,
38258         0x37c30232,0xfe34c713,0x6f3c8d18,0xc6c6070e,0xb4af4e83,0x07e365ba,
38259         0xdcf82b45,0x22f0a7ed } },
38260     /* 119 */
38261     { { 0xea7f1b7f,0xe262791f,0xdcff09d4,0x9c3d8c5d,0x39c7dc58,0x86c2a9c3,
38262         0x4276e8c0,0x4dad4017,0xe9fe1d56,0x0a918f59,0x2aa810c9,0xb8d79670,
38263         0x4aa5cdc4,0xeb7a8836,0xe7afa72e,0xfc4c23bb,0x4ac86908,0x4dbb5c9e,
38264         0x6a0c7e6f,0x37e39013,0x49c218d2,0x855d7001,0x94b324a2,0xe475bc67,
38265         0x6287a071,0xc98a8dc6,0x5fb4323c,0x395a299b,0x0c0389e9,0xe186c3ee,
38266         0x16734c46,0x79f81e6f },
38267       { 0x364f3c4e,0x83f2c1f3,0x1367e14b,0x536b2ac5,0x5933e43d,0x44a6dcfc,
38268         0x10d961fe,0x34e59475,0x7e3f2aae,0x08234ece,0xbdea7f25,0xcb92e00a,
38269         0xa791a124,0x1efba4f0,0x1192d53a,0xc2086fd2,0xb51c8af6,0xfec0d0fc,
38270         0xdc0f1b5f,0x48d1b2ca,0x812dbe19,0xb07a388f,0xdedbdd45,0x40873a6a,
38271         0xd702589a,0xbc2a1268,0x17e27b64,0xbbf6e3a8,0x6d386e85,0x73ee5663,
38272         0x9de7c000,0x442ecd37 } },
38273     /* 120 */
38274     { { 0x8a2f90a6,0xb4cd1ae6,0x6f5ad0cc,0xf277d41d,0x401d4b8e,0x6a3828c4,
38275         0xd8376631,0xe817a134,0xf5e1124b,0x142b758d,0xfd6b95e4,0x25fbc69d,
38276         0xd74a9e3e,0xa30c9f5f,0xd89663ce,0x5ac0f163,0x0ce6386d,0x32a9eef7,
38277         0xd8ed5544,0x7a690ea5,0x9889427a,0x5de23ff0,0xeaaced58,0x75ad36a5,
38278         0xd3e18465,0x3514a6c1,0x7f093910,0x3d9162c3,0xe33d56e8,0x5c10add9,
38279         0x06aa691e,0x85176b73 },
38280       { 0x28a21e38,0xa32110fa,0x5773d538,0x97b6379d,0x2d020dc4,0xd3697bbf,
38281         0x961833cd,0x59177593,0xe5fa8516,0x6d7045fa,0x786ab5d2,0x3390f29a,
38282         0xdc4f5b70,0xac0bda30,0xdcc615c6,0xcca0240a,0xc5146d91,0x8e1f1702,
38283         0xa72cef87,0xceb472d0,0x0b669ba1,0x84840708,0x7e61aa0a,0x79b08f9d,
38284         0x4669560b,0x388160be,0x948eb71e,0x23935c2d,0x9431590c,0xd7fd83c0,
38285         0x6e5768b3,0x8ab154bb } },
38286     /* 121 */
38287     { { 0x353c4a96,0x28686003,0x905cd835,0x4e5c60e8,0x8f66f8cc,0xbd591364,
38288         0x9faccf9e,0xb6b80b98,0xe32639e5,0xbc1c1fae,0x278aadeb,0x2f6396d2,
38289         0x1898202d,0x00a796d0,0x3a474835,0x18ab548f,0xb31b0e3e,0xacd056c3,
38290         0x0164512d,0x15ba68dd,0x4b03f3bc,0x203836d9,0xd8f206c5,0xd64eca6b,
38291         0x9f1779b6,0x931a361e,0x52ab34a8,0xd82690fc,0x92922e22,0x342bb8e0,
38292         0xe00b02a9,0x1bfcdd84 },
38293       { 0x75a365d9,0x310b9a43,0x08d8fb03,0xd4ade15e,0xd742df83,0x9c9753d7,
38294         0xde318742,0xcf7309d4,0x3360ace0,0x1228e212,0xf7669643,0x1043d238,
38295         0xf90f5a53,0xfc2adbed,0x7b5f9397,0x41d64cb7,0xc446d010,0x5200b30a,
38296         0x231720fe,0xc3c8642d,0xb9aa2075,0xfcc0122d,0x041eae47,0x856e3b12,
38297         0x68c876a4,0x45864455,0x233606b1,0x1a1c7842,0x227757bf,0x9b766d1f,
38298         0xf7b9d4f1,0x25b78a3b } },
38299     /* 122 */
38300     { { 0x156707ce,0x90835718,0x4314f90a,0x9bdc2398,0x8be57dbd,0x017c885a,
38301         0xad63a4b8,0xd4bba225,0x15aacffd,0x5ce71b86,0x72954722,0x5f266475,
38302         0x4f0ad3dd,0x0a80f1f7,0xfc352ed7,0x010538a3,0x4203c6ca,0xf8a64045,
38303         0x330c73b4,0x2b2c7a88,0x02dcac1b,0xb3433ee6,0xed2b17c7,0x2e0499cf,
38304         0xbd6329c7,0x9f8681a4,0x36fadc37,0x38979946,0x92b7895b,0xdc5650c8,
38305         0x65a51cf0,0x70ab9570 },
38306       { 0x7b585d93,0x46778ec4,0xa633fe4e,0xca6d3610,0x4ea0311a,0x21da154e,
38307         0xbd64002f,0xaf22190b,0xd91cb7a9,0x9e633ac7,0xee6837d7,0xed13c31f,
38308         0x1616ee8a,0xda4a07d7,0x3afcd616,0xd78a2732,0xba14d694,0xc06696e5,
38309         0x4df58420,0x733754d7,0x2778e3c9,0xe85e504e,0x55b5a5c2,0x3055aa0c,
38310         0x8a3acb5c,0x313df538,0x2a088eda,0x5896acb5,0x84c85dde,0xfc8842a0,
38311         0x51dde6be,0x5fec9f79 } },
38312     /* 123 */
38313     { { 0xfe519f99,0x5ebc2c7c,0xe5410353,0xe396bd80,0x8a3988f3,0xaded9402,
38314         0xd601bda1,0x1c03b735,0x14ce64ac,0xfd302036,0x01240290,0x5837ebe9,
38315         0xa554097d,0xcaaea1a3,0xb0b88139,0xdce73d25,0xecb090b9,0x35ed412b,
38316         0xd63dab3c,0x99029ff7,0x062db071,0x555437d9,0x42a4c11d,0x277d2f56,
38317         0x24fc9109,0x477fa645,0x2799254d,0x7b12e9b7,0xd84c618c,0x7ad2ae22,
38318         0xce8ed195,0x0a8d5663 },
38319       { 0x0a21fde1,0x43ac5163,0x6903d849,0xcfcf5dd6,0x5fdd6281,0x6d2499ee,
38320         0x77a49a34,0x4dedc6f0,0x2875c06f,0x46bda2c0,0x347b8046,0xd0e0e0f6,
38321         0x5e67836f,0x1058169b,0xde8a8042,0xc961912a,0xa93b3d32,0xdf3fea0a,
38322         0x0c576bc5,0x9f138edb,0xd8d37e47,0x7971ad6e,0xcce5e7cb,0xeab85739,
38323         0x1d202b40,0x88a4b434,0xe3a1fd26,0x5d842557,0xb3a86f91,0x872fabd5,
38324         0x6aa4629f,0x95b93493 } },
38325     /* 124 */
38326     { { 0x99f951de,0x9998a701,0xf058db45,0x8fade596,0xf3d03dd3,0x4d479c1e,
38327         0x33b141d3,0x6e928d5d,0xacfe8a40,0x9a465800,0xc1cefa3d,0xd108ad2f,
38328         0xe013726e,0x64b96921,0x8e83bb9f,0xb9b6a6b6,0x1242e544,0x29f1e6dc,
38329         0x2f65966b,0xd3f8f676,0x5e105b41,0xa34dd096,0x16011e1c,0xd4e9139a,
38330         0x2515541b,0xeea4dc68,0xc822166d,0x6f8030ac,0x31d16124,0xbdc7ae1d,
38331         0x621afa7d,0x2e25ef51 },
38332       { 0xdd8e7357,0x2533cf8f,0xeaceddb8,0x333ba218,0x0784d2ac,0x68e3e31d,
38333         0xf2804ae2,0x1c927f36,0x77e7ad7e,0x01433d22,0x587f78a0,0x0b401cf0,
38334         0xaa0027ae,0x9dfcf036,0x1d9a46b5,0xc9e46c8b,0x1f288d32,0xaa6de486,
38335         0x1b8a043d,0xdd56da2f,0xf2d0bb56,0x346230e5,0x19defb56,0x19f0b6e4,
38336         0x21d2c874,0x55ec37cd,0xb70e45b3,0x3dbf0397,0xac7ce852,0xf0862a8d,
38337         0xe141f3d6,0x87979ea7 } },
38338     /* 125 */
38339     { { 0x7f1c747f,0x9b7e7b3f,0xc6e63369,0x151a4c1d,0xb372dba0,0x4273ff70,
38340         0xd3ee54fe,0xca6d2234,0xd33cae0f,0x12fc8e0c,0x5dd6f10c,0x27328538,
38341         0xf01a9cf9,0xc86f3fbd,0xe36cae91,0x5322677f,0x2fefea44,0x39a70033,
38342         0xce8af217,0x2c9ca328,0xf6a731f4,0xc0256776,0x66a96813,0xc687b3df,
38343         0x8db2eda8,0x194aab12,0xeec4febd,0xde30dc5a,0x979241b2,0xc052236a,
38344         0xc23d4c16,0x3ec98802 },
38345       { 0x4072f74d,0x0f9e760c,0xab594059,0xe78eb0de,0xc9b009c2,0xdb3dea40,
38346         0x38b59ae5,0x47e875f0,0x2b4daa06,0xf40eb436,0x090f3788,0x9a6a4f92,
38347         0xedbfaf8b,0xefebe9af,0x9867e256,0xf87f96a5,0x75ab6aeb,0x1e6fed23,
38348         0x3fdb13cb,0x17f2782a,0x70fa2621,0x5102c71e,0xfd4c0dbe,0x5d2b06ec,
38349         0x30347297,0x537cc268,0x2b67e780,0x8dbf5e2b,0xba25da32,0x2f633f3a,
38350         0xefaec914,0x3e9315e8 } },
38351     /* 126 */
38352     { { 0x239a9ea9,0x9255cfa5,0x0be33a62,0x20f3c690,0x9cb642bd,0x759eeb4b,
38353         0x00bae718,0x3316c546,0xf3410f84,0x874a76d5,0x90f129b6,0x123b502e,
38354         0x12851f1c,0xadc8f9a8,0x1b62408c,0xf57b764a,0x1a80777b,0x116ec01f,
38355         0x1f0ddc5c,0x746ecef2,0xe5a6a5a7,0x3c49d47c,0x06e955ba,0x1e15dbe7,
38356         0xb45d79b0,0x629c0c79,0x778d1087,0x11278308,0x8c6a22d7,0x22585dc7,
38357         0x0a682791,0x2ed02a0d },
38358       { 0x4daa2682,0x53043416,0x01359625,0x0e26d32b,0xbd867097,0x449c834a,
38359         0xee77ae2e,0x11a19d2b,0x3af6c169,0x39bd529a,0x5cd61054,0x36cca5c0,
38360         0xdc6c0fe1,0x6370a59b,0xb93d5135,0xca420d27,0x554c451a,0xd8730d45,
38361         0x96cdebf2,0xebd258c9,0xa50f9a05,0x0cb1b990,0x7b0f0151,0x69a8c97a,
38362         0x11d217e1,0x2cc36d34,0x752f75e8,0xf117688a,0xa09b2a61,0x1db01394,
38363         0xa9efd7dd,0x14627844 } },
38364     /* 127 */
38365     { { 0x232803cf,0x6bca3aed,0x9a96ff34,0xc1e4398b,0x74ab788b,0xcaf6757f,
38366         0x7e68c04d,0xc3a53e00,0x5cb7cd20,0x5f969c19,0xdc068bca,0xf28b65a6,
38367         0x1d863032,0xe3ca01d3,0x87808e14,0x9b733b81,0xefe618be,0xb5d704d9,
38368         0xb01b946d,0x276f3542,0xfbedddbf,0xe057e19e,0x903275ce,0x7d182f2b,
38369         0x880f7bc6,0x3cdc5f77,0x78476c14,0xd6f03d3f,0xa9ba5072,0x035f5557,
38370         0xb4029628,0x7acb57b6 },
38371       { 0x44e6b07c,0xd2413569,0xe1c7345d,0x451c4cc9,0xe273b9fb,0x407444d8,
38372         0xb88e34fc,0xfe496079,0xf152776d,0x77d184cf,0xc742299c,0x6d1033b9,
38373         0x77bf2897,0x29a0a684,0xee8f0420,0x59ffdf10,0x44bb56d6,0x4e17146c,
38374         0xfb9ae855,0x831d06c2,0xd93e7cd5,0xb2cb82db,0x3c96b607,0x83381c46,
38375         0x7549e2a8,0x06aed251,0x774a21d4,0xef97891c,0x8675fbdd,0xae9807c7,
38376         0x6363516c,0x6a5a05b9 } },
38377     /* 128 */
38378     { { 0x6a8f4f33,0x92e71ea6,0x4dea8f4a,0xf2fc6fc6,0xfee88461,0xd356252c,
38379         0x08954d08,0x59b0a83e,0x468ab766,0x5bd68c23,0x900f8d04,0x40281357,
38380         0x52b867ae,0x181c19c0,0x18764c41,0x986a5169,0x13575d24,0xcb01dfae,
38381         0x593677b7,0x17269ae5,0x46dc9b19,0xf6d17025,0xc40097c8,0x8de68499,
38382         0x259c407b,0x76df0032,0x17d29d8b,0x4091aad9,0x4a7ab5f6,0xa7f46d21,
38383         0x70ece48c,0x688054b4 },
38384       { 0x51a5b86c,0xf0d168aa,0x95777247,0x2437e4d8,0xf1720329,0xae844076,
38385         0x9647a54e,0x0a7ac87d,0x0405622c,0x1e597a4b,0xf0a79f2f,0xedefe5c6,
38386         0x4d55156d,0xaf3ef0c2,0xef047cf6,0x917fb04e,0x54b62137,0x3792799f,
38387         0x314be0b8,0x875ea32f,0x0c466b0c,0xe157c65b,0x7e218978,0xd28c90ce,
38388         0xcde587af,0xb90fc3ba,0x8b877bed,0xdd32d71c,0xca8e10cd,0x3b432200,
38389         0xd94f6e53,0x0021f419 } },
38390     /* 129 */
38391     { { 0x43519d26,0x2191122c,0x40a51845,0xbdafac1d,0x548bb89f,0xcc6f71e9,
38392         0x16844bf9,0x9ef3375c,0x178e8d55,0xe7789f79,0x1f8be1c5,0x04f599b6,
38393         0x2cbbde40,0x8088c99a,0x893206c9,0x8939a260,0xfcd30851,0xa1ae4bff,
38394         0xe08feafe,0x664cb3fe,0xff14aabc,0x61f38099,0x2a841ef9,0x0d8394cc,
38395         0x17f01db6,0x75fad8ad,0x6debb773,0x6fc34576,0xa4252512,0x1e716b05,
38396         0x29e1ed9f,0x79855880 },
38397       { 0x95106473,0xa2cb3aaa,0x5a61da04,0x95fafa41,0x539563c0,0xfd3c9362,
38398         0x95312b87,0xbaa48091,0xbf885c76,0x6c7e7582,0x230c78d5,0x70f6dab6,
38399         0x7747440d,0x8ce3051c,0xffdb6186,0x6dbebd14,0x190e4096,0xb0e041fa,
38400         0x6ee62e2a,0xba10c466,0x74f333d6,0x93d57e2a,0xfe7b9b66,0x006aadc4,
38401         0x06d2837d,0xfaf72f6c,0x910741ea,0x318cc5e6,0x65692477,0x9c502609,
38402         0x1d0fb08d,0x95d823c3 } },
38403     /* 130 */
38404     { { 0x140528a5,0x6aeebd86,0x53979bc8,0xf268c2ba,0x4ec144ab,0xb1bc9b8a,
38405         0x82a7d7ed,0x1efabb0d,0x4e0118d8,0xf12c70d1,0xa1c1558e,0x31607168,
38406         0xe4b7e73e,0x33e428b7,0x83aec9dd,0x63176637,0xe12ac35c,0x5172ffbe,
38407         0xbc17b2a4,0x37df0bfb,0x741f812a,0x4212f870,0xe2888f9c,0x3dcecbdb,
38408         0x756ca55d,0xa9dc15aa,0xb9028e41,0xf31918ec,0x6aeadb03,0x7ede0285,
38409         0x78654f54,0x0e2708d5 },
38410       { 0xcde20f88,0x2270cc53,0x5f5b1039,0x9338272c,0x5dcb1dbf,0x5042e19e,
38411         0xb72d74c1,0x4b3de219,0x2aaaaa55,0x16c49a8b,0xbba86ba6,0x008443e5,
38412         0x20cf1695,0xee6bcd72,0xa89abd11,0x59ffac6b,0xf115639d,0x2831217b,
38413         0xf34cba52,0xe4d28af2,0x0727a906,0xf27f03e7,0x69017766,0x6842c79f,
38414         0x7a81123e,0xcb3469bd,0xa42973b8,0x48c0f346,0x23990dbd,0xfc5784a6,
38415         0xfb299678,0x0d3dab3b } },
38416     /* 131 */
38417     { { 0xce29c3cc,0x8f8376e6,0xf016cbc6,0xcb0507ec,0x5e394ce1,0xdebff996,
38418         0x73c50d41,0x24fc526f,0x2d16ce3d,0x4edd5a54,0x91c13141,0xbb37bdd9,
38419         0xe33a8606,0xe3442ef2,0xc0629da8,0x2ae90337,0x592ab331,0x57faec64,
38420         0xd82b857b,0x1a938997,0xa3373176,0xad6c8cb9,0x9086751f,0x82595de2,
38421         0x18c17196,0xa81e97fb,0xbf697357,0xe4f48a13,0x5cb89f69,0xa1387c2e,
38422         0x5874b426,0x530b4eeb },
38423       { 0xbab7b5ae,0xe9f275a1,0x03a57bf4,0xbb69dc4d,0xa45c505b,0xc974dc4a,
38424         0x416ac402,0x726369f3,0xaed985dc,0x735e4e78,0xcdd446a1,0x0548d879,
38425         0x9e16b02a,0x84ceb069,0x789b11a6,0xf73f6fa4,0xb2a4e784,0x6aa0c41f,
38426         0x93a9b697,0xb1f76902,0xf03a8ab2,0x814cce00,0x844d66c1,0x64cb255b,
38427         0x30952201,0xb794e7d6,0x3da32271,0xe052d4e4,0x08b6a4d9,0x5278b2e7,
38428         0x80c6577f,0x90942552 } },
38429     /* 132 */
38430     { { 0x0d5b4c2f,0xd269a14d,0x5c8a649c,0x2b8fc59b,0xb0e37d4a,0x95becb3a,
38431         0x9111037e,0xfda1a768,0x94e35322,0x5810e05a,0xa178fafc,0xa24dcc12,
38432         0x8e3dce62,0x5c2c63b2,0x9452c444,0x995c3f17,0x42d45161,0x35330ec3,
38433         0xb4ef8129,0xa025a60a,0x8bae9c13,0x85493252,0xe2e3caf8,0x25d1a606,
38434         0x3649bf47,0xd44091ab,0x704ec5f1,0xc7d0afbf,0xbd8b3333,0x27bd1d62,
38435         0xcfe616f5,0x50570111 },
38436       { 0xf534356b,0xd0084ace,0x4b4b0fbc,0x9df1de05,0xcee04dc1,0x021afe05,
38437         0x361b78e1,0x64bde688,0xef78d38b,0xa324fcc7,0xeb0a5e4e,0xfeb372ce,
38438         0x65811996,0xef04fcb3,0x5eb0ab4e,0x7dce5d50,0x238c586e,0x1e29b588,
38439         0xbcd80037,0xde5e3197,0x4806b9cf,0x8bf5e451,0xd18e67ab,0x4330968b,
38440         0xf9f63fad,0x26a7d04e,0xb5c18bb4,0xa1c7f123,0x25dce22c,0x485b8482,
38441         0xd540e79f,0x8ff0b36f } },
38442     /* 133 */
38443     { { 0x3ff42cff,0x99f2e2f4,0x1c35317c,0xa3c19f9d,0xaba1b545,0xdb749392,
38444         0x4afa9a32,0x84232b05,0xd7dcd436,0x0b855d46,0x45cf9915,0x8ac35e20,
38445         0xf001a218,0xd7cf22c7,0xed408305,0x057d35ae,0x553ccfcd,0x25a4a519,
38446         0x93e2b939,0x5e565793,0x3422ec27,0xa20332b0,0x3ac53958,0x9b09005e,
38447         0x79e9b163,0x628051a3,0xfc6618d6,0xb4a0dc09,0x6748e7af,0x9e0e857f,
38448         0xc577d63e,0x71b28eee },
38449       { 0x99726bf8,0x4942b0cd,0x1c208f3c,0x1290a3b9,0xb0598eaa,0xfd7290e7,
38450         0xa25a9128,0xc6a7791f,0xc037d7da,0x2d33db24,0x70e2837b,0xc21efeb0,
38451         0xe3dae2a0,0xbf70d96e,0x85076027,0x43ed8191,0x4d4ad7e3,0x4aeb0aa8,
38452         0xe8c5b74c,0xbc75101f,0xad26ebdd,0xdbfb2a6e,0x6b78aa4e,0xba812068,
38453         0xe1159848,0xc94aa8f2,0x3eba5c4e,0x0d10d9db,0x6318295a,0xce7fec47,
38454         0x330d925a,0x7294711a } },
38455     /* 134 */
38456     { { 0x32bbd495,0xfce45904,0xbe54973f,0x330f4dd1,0x5d9c3f4e,0x006bee1d,
38457         0x59ba7204,0x40ee6078,0x42c2c768,0xc194fd3f,0xe9fe88be,0xa0e76b12,
38458         0xec2b0210,0x17cddddb,0x00811ec7,0x689d436b,0x284be9e4,0xa6a6ba37,
38459         0x007d4114,0xabc395b2,0x0f11e744,0xf8cdf9f3,0xe9396402,0xc5febec8,
38460         0xeeb46285,0x8a751743,0xc6e0d137,0x99bf8782,0xbeb292e3,0x3965e170,
38461         0x5801fd5f,0x001c39d8 },
38462       { 0xda4a0912,0xf4805cb9,0x4410bca4,0xd27cb76a,0xec71d65b,0xef3dcb8e,
38463         0x4816849a,0x780fbb2b,0xa8b24635,0xef6a7026,0x12c44e68,0x15625c88,
38464         0x4d7a74a8,0x624c232c,0x4b1631e4,0x81a77037,0xdb917c2e,0x04e4f7f1,
38465         0x1f61ed95,0x1d0465fd,0xcbde6e3d,0xb1048049,0xd7131fcf,0x637ce0c1,
38466         0x8ada4715,0x22e4dbc2,0xace99726,0xf7530c5c,0xee287450,0xa0160dcc,
38467         0xbb91af13,0x9132e670 } },
38468     /* 135 */
38469     { { 0x7996099d,0x8057efe2,0xa06e608c,0xb72344db,0xd0958588,0xeb4a8740,
38470         0x79e5aee9,0xe53daf06,0x908a2fad,0xc9560a9a,0x107e706a,0x7f4be131,
38471         0x2830246a,0x6d5f3d9b,0x27cca3e6,0xa5f8e8da,0x4c28f292,0xeb51dca6,
38472         0xf31dfd78,0x4cfa310e,0x2ca073e5,0x92e0c7c2,0xa40da683,0x102f1694,
38473         0x750d38fc,0x16bb07cc,0xbadae035,0x703e83e2,0xb4d3c9dd,0xea93c066,
38474         0x79940ed1,0x7d0b03e5 },
38475       { 0x4dd94c63,0x5fe7ea30,0x738b0b3a,0x57ef01c5,0xa14e6b4b,0x9534a78c,
38476         0xa5353276,0x07622cde,0x7c22d006,0xaf696a07,0x7d46b209,0x733c1886,
38477         0x626c2b4a,0x9654ccbb,0xa84f3c4c,0xa098d3a1,0x2d734b74,0x3596f9ed,
38478         0x5d551c90,0xdfd3021a,0x1ec5123f,0xe2ba7d2f,0xb2c1aa39,0xf9726925,
38479         0xf8eb2927,0xd2e75d0e,0x19192a6f,0xfaba712e,0x9b83e50e,0xa606b43a,
38480         0xdab5de60,0x31b1782f } },
38481     /* 136 */
38482     { { 0x4034db92,0x878dba45,0x8f34dc4d,0xa3977901,0xdf754c33,0x8d004f2e,
38483         0xcd563a88,0xeaa5954a,0xbb5ffad1,0xa29d6c89,0xb0d8bdb8,0xa8adf655,
38484         0x8cdbdb47,0xf7fb842d,0x80d3205b,0xb72e3a03,0x7cac7ca9,0xc335b0b2,
38485         0xd8a5475d,0xffc60bcb,0xeba4d25f,0x736f7719,0x0c50fca6,0x3d901c38,
38486         0x80c01900,0x1fdacf7b,0x5681f84d,0x75cf658f,0x5cefbbc1,0x57a7e634,
38487         0x3e07ed1f,0x6fc0fbe5 },
38488       { 0xb81b0e5f,0x496d116b,0x2ac853b8,0xd82dd2a5,0x327387f0,0x357e22d4,
38489         0xba912c59,0x3e332a84,0x49d5dcc1,0x8b71c643,0x438d85d3,0x0c982ee9,
38490         0xbf7fcd4e,0x90b9553c,0x38fed5e3,0x2cb39bbc,0x5ac42903,0xa2c67c9c,
38491         0xbf07da55,0xebf21217,0xa0b9e4ee,0x55ac05ad,0x8ee9e0c6,0x10bb12c2,
38492         0x48bb6e3f,0x5cf3aee5,0x8b046e91,0x4ae7269c,0xaa0e553f,0xcb266012,
38493         0xa94c8fc8,0x701935a1 } },
38494     /* 137 */
38495     { { 0xa4626dea,0xde58d41d,0x15b9039f,0x25ef66ca,0x3164e65b,0x99a810a4,
38496         0x748cfccf,0x9fe6daad,0x2f142fa9,0x7ab9a6bd,0x5d471796,0xa4cba168,
38497         0x6bc3a39b,0x12d30b36,0x8bf45076,0x1f46a5dc,0x1421ac0e,0xb868e529,
38498         0x59bba1c4,0x7a686206,0xda698b90,0x2b4b552e,0xe5453707,0x5039dcd4,
38499         0x9e90165f,0x42a07a9e,0xd7d45dfc,0xa838fff3,0x3b5ceb30,0x41991e5a,
38500         0x969ca600,0x6c961ec8 },
38501       { 0xc4e7eb46,0x703bdc1b,0x596c7b48,0xd6bac557,0x66afd74d,0x4f9917cd,
38502         0x656ce6f3,0x56355105,0x32497175,0x3d1fb50c,0x63effb2d,0xfda6783e,
38503         0xeefaa2bd,0xbd79f1f3,0x17af9ef7,0xa4efbe54,0x5a55b7a4,0x6cef6462,
38504         0x1a713304,0x116f3238,0xb95625a3,0xdb2a2a7f,0x0b027e96,0x6a0aa43a,
38505         0x4832b3bc,0x458fe5d2,0x5adfaac0,0x523418df,0xc49e7f9a,0xc05a89cb,
38506         0x69e24b53,0x830883d8 } },
38507     /* 138 */
38508     { { 0x02557389,0x959b1c62,0xadefc0bc,0x5fe5ce97,0x8330f383,0x893bbe7f,
38509         0x16cfb81e,0x27e0c6af,0xd04428fd,0x6f64e65b,0xb79e6182,0x53de9245,
38510         0x487e11ca,0x08a313c1,0x445bce93,0x65cec3b9,0xd67ed49e,0x33bc0314,
38511         0x30782352,0x69f36b24,0x93ad31d2,0xd78e5daf,0xc780890c,0xf2682b70,
38512         0x9e45efe9,0x7015c34f,0xe6cbafea,0x135d4ba4,0x7e3fcc6c,0x43a378a4,
38513         0x96638f8c,0x2376f97f },
38514       { 0xae575b99,0x0a6e1ec0,0x81b970dc,0x7e14cb4f,0xd3a73947,0xf00a3824,
38515         0xfb235a9d,0x0b4b9c81,0x5bf62944,0x8d15115f,0x1e165d7a,0xcfd35b43,
38516         0xb2ee3e3b,0x5d12fea2,0xf5182e7b,0x629984a6,0xc365d08e,0x4e43e2f3,
38517         0x30f36e72,0x99327091,0xfd345401,0x698b4a00,0xbaf96dce,0x23c4fd0e,
38518         0x23675554,0xa60ba0ae,0xb0325784,0x51bdac2d,0x215464a1,0x8ab4190a,
38519         0x6bf10296,0x8c461661 } },
38520     /* 139 */
38521     { { 0x2d1f36a5,0xeffca258,0x894c5f2d,0x0eded2b2,0x43ced84f,0x35a5cdb8,
38522         0xdb0e3b9b,0x290f8982,0x0719a112,0xcce0eaf0,0x39a362d6,0xd0e657e4,
38523         0x62697e47,0x5516a55d,0x8e636514,0x269e1f77,0xd50269bc,0x5e3dedcb,
38524         0x441c57c5,0xecec2300,0xc705578d,0xdb83f31c,0x1e489eab,0x1bdefb73,
38525         0x395fcdb4,0x20b678cf,0xff9db001,0x908cf91c,0x55f52cc8,0xcbebc6f4,
38526         0xb4c61162,0x155ea622 },
38527       { 0x876fa42e,0x94be2f1f,0x7fadeee7,0xab5e8749,0x38c865af,0x692e70f5,
38528         0xdf8059b0,0x16e99b84,0x8b5a7ac9,0x0ceb606e,0x2d463d2b,0xced23357,
38529         0x2a9a09a0,0x2d0f2623,0x3861fbdf,0x2529998c,0xc1be310b,0x711888a7,
38530         0x0d8aade3,0x9b1229c5,0x3b13533d,0xdbcf9b78,0xff029708,0x3ca746f8,
38531         0xda83ef88,0xa5a013a1,0x4ab28444,0x8e904d18,0xbcbd4aba,0x2fe84b3d,
38532         0x259058c3,0x8f570f24 } },
38533     /* 140 */
38534     { { 0x2ca9c508,0xdeb66c8a,0x69d6b780,0x2dc5bec2,0x88ead600,0x16d61266,
38535         0x49d72614,0x61841b97,0xce472e6f,0x41e40e6c,0x1fa7a876,0xada24264,
38536         0xcc3997a0,0x45b9fd33,0x7c15dcf4,0xb25e8fa9,0x12e9629d,0x0124ceb2,
38537         0x7db3d956,0x3a8c72c6,0x7c1a7844,0x8e2ded2b,0x6dd027ff,0x94ab09c6,
38538         0x7e7a2bc6,0xf89a057d,0xcf70c763,0xad8bf226,0xc8a26212,0x4cb268e7,
38539         0xb2c44c1d,0x3d171e87 },
38540       { 0x8ce49820,0x382ac16e,0xc0c44dc9,0x24ee45e2,0x73e858c4,0x0ec67912,
38541         0x46327cf9,0x918cb25c,0xc6159c1f,0x43e3876b,0x37545cb3,0xb6b6e0e0,
38542         0x5d12347e,0x64b839ab,0xa300d541,0x72e09274,0x881c1169,0x26ab28e6,
38543         0xeb75a843,0x4a580fff,0x359120df,0x0a5802ca,0x3209f4a3,0x7fee82d0,
38544         0x8e6a9380,0xb518016b,0xc2ee11ca,0xb99c6c70,0xab9d4ec7,0x16105af1,
38545         0x34cd9004,0x234e98f8 } },
38546     /* 141 */
38547     { { 0x14db9cda,0xff435208,0x96adec90,0x99cfdc47,0xaf458b6d,0x843aaa6f,
38548         0x743eaa31,0x3f1f7415,0x61735d81,0x915e192e,0x0ac595d5,0x3441a22d,
38549         0xc044bc8d,0x704bbf67,0xbe23a236,0x2f960471,0x15d1d557,0xcc326388,
38550         0x76b1dd94,0x9410230b,0x0c1c8a67,0xf2e5439f,0x833c910d,0x56b141ac,
38551         0x865b84df,0x467c999f,0x21f02b7b,0x1b0251fa,0x96216950,0xde5b5260,
38552         0xce3a1e93,0x6a2130e3 },
38553       { 0x4b3ca1a7,0xd21b67a0,0x00c0ce80,0xaf42ed53,0x932cf07a,0x22ccd368,
38554         0x5c25c35a,0x36523a81,0x8dd04d06,0xecdd3958,0xb2f93a3b,0x73da3502,
38555         0xd5e5b530,0x4c5e0c3c,0x13268777,0xef9f5486,0x1e742292,0xed87fefc,
38556         0xa24e5ede,0x6d9ac29e,0x33849f1a,0x08abc9f0,0x40f23905,0xb09b2292,
38557         0x7f934353,0x6791072c,0xe6aeb550,0x102a6381,0x96feb870,0x3ee07409,
38558         0x9c4d2830,0x34f06faa } },
38559     /* 142 */
38560     { { 0x2348f005,0x869dc79f,0xdf4920b1,0x9b5c5d71,0x6dee64a4,0xfd1b57ca,
38561         0xe82a4fb4,0x21b7f734,0xb9578366,0x637cb834,0x7d287d96,0xc934101b,
38562         0x0392ecab,0x1590f8ac,0x7f75f4e3,0x280dc373,0x6a61ac62,0x8b36f50f,
38563         0xa65568da,0x74f58304,0xd930870a,0x80d792a9,0xfc8895cc,0x6d17b192,
38564         0x4914939f,0x498392fa,0xd41d5b9e,0xaf36027d,0x5caa82b5,0x452d79e2,
38565         0xf4115d1a,0x764d47b1 },
38566       { 0xa2ee8b9c,0x5df22303,0x85dfcd48,0x1b9f72d3,0x10813a37,0x6b42b983,
38567         0x3de741f5,0xe28c523b,0xf303bb5b,0x0857625a,0xac9bf9af,0x926f299a,
38568         0x0d445b34,0x21beac08,0xd6ba2c0e,0x6a523a02,0x7fce2864,0xe302a1b1,
38569         0xe300c1ea,0x4516a235,0x7b4a9311,0x4543736a,0xc0cc89f7,0xd3c0b9e8,
38570         0x40ed88de,0x0481904f,0x3cb7fc70,0x4f269b56,0x321b9738,0x09a1d53a,
38571         0x230a3810,0x1c0dd9c3 } },
38572     /* 143 */
38573     { { 0xc46a7d9a,0xffaa1f67,0xbedf91cc,0x64743334,0x47a42f2e,0x45833a74,
38574         0x241ffaa9,0x67980051,0x335efe6b,0x70979a84,0xf08b2403,0x5f0613f5,
38575         0x64f211dc,0x6bb22fcd,0xa0572cfc,0xe1b8b2a3,0x7950a14a,0x19e0eb41,
38576         0x3eb6cd4c,0xe634bb29,0x470a25ff,0x31a04b25,0xa3d15a0a,0xa41f7ac9,
38577         0xbf2fede9,0xefed85ec,0x81b94a00,0x1f581f5f,0x9ef4a15c,0xaa3996b0,
38578         0xb06041bc,0x52d8be39 },
38579       { 0xfd631a2f,0xbd1536f6,0xb351a8dc,0x91fae7f0,0x9b126212,0xd1a590c7,
38580         0x2bd0f435,0x52d4875f,0x92b0ea70,0x9aedb6d3,0xb83ab89e,0x0bd0abdc,
38581         0x89fe192c,0x827a1062,0x102a0bda,0x6566a960,0xce036814,0xda083037,
38582         0x58639405,0x30bed79f,0xdbca8df9,0x972019b6,0xefdaa3f5,0x89201286,
38583         0x5236b892,0xb337b996,0x28fc2e73,0x11d3e38e,0x880e8da3,0x70787f41,
38584         0xdae4a45d,0x6cff6367 } },
38585     /* 144 */
38586     { { 0xf89a8bb4,0xbd3d0433,0x93b98f71,0x42144c33,0x03470a2d,0x82b616c8,
38587         0xe5da089e,0x98fcc757,0x7bf5fda6,0x542354ef,0x9ebd34cc,0x1885c253,
38588         0xbec5dd0d,0x2e20b285,0x782a1bca,0xe71bbbe1,0x9b854ef0,0x959ded30,
38589         0x8997fa6a,0x17249979,0xd81f3c45,0x50cf8fa8,0x60c11152,0xa9a3b517,
38590         0xecf845ea,0xc9b0ef7d,0xb9fed11b,0xc9339e23,0x28256080,0xc93e9c5c,
38591         0x613ec1e7,0x1d2c8217 },
38592       { 0x987cfc93,0x7381347d,0xf187f810,0x047603bb,0x1250ca31,0x3fa6bc9d,
38593         0xbb055bf3,0x480091e0,0x3a3af87c,0xbdf95f1a,0x140540ab,0xe2687770,
38594         0xd7fe045b,0x998df730,0xb723bc2d,0xb398135f,0x15ebec46,0xac230f8c,
38595         0x5f5561c0,0xe08e1830,0xda60a47f,0x7c0fbf4c,0xe16d4bfc,0x06e95c24,
38596         0x74617e92,0x74163495,0x4ae0c20e,0x39719869,0x2131e2b6,0xfe269312,
38597         0x0a537722,0x25486e36 } },
38598     /* 145 */
38599     { { 0x53572806,0x618795ca,0x656968e1,0xb2c89449,0x3fb323ae,0x149c2c97,
38600         0x409bc7d6,0xfb15de26,0xc79121b3,0xa90cda72,0x204cabbb,0x6d2fa14e,
38601         0x91604125,0xcbcda6f7,0xb435f947,0x25086261,0xc282eb10,0xdb686c38,
38602         0xf1a791cb,0x51016d62,0x61a2266c,0x6b1c7ed1,0x271d74a6,0x26780666,
38603         0x824287a4,0xb5ffeda1,0xbbe4f0f3,0xcbe503ff,0xb9482a74,0xd7f7f0be,
38604         0x088493f1,0x751b2358 },
38605       { 0xe9c9be68,0xd597b9d6,0x67d10c6c,0x1794b5c4,0x7762b2f4,0xa88cdc3d,
38606         0xa1b44e11,0x6d94a63a,0xaaa8eca8,0xfb0bbbb9,0xc963d87f,0xf4b0f2d0,
38607         0x5dc7075d,0xb753062c,0x49933989,0xfed726ac,0x57f9ccde,0x5da60638,
38608         0x75f8c766,0x221c392a,0x5dc672ca,0xcd264d95,0xb66ecc8d,0x7004ff22,
38609         0x18a458ba,0xfb1aa9ae,0x8babd653,0xea9644df,0x2ba0de7c,0xa9378e80,
38610         0xca2c6c75,0x144cc12d } },
38611     /* 146 */
38612     { { 0x2989aa3a,0x593a0a1d,0x59e6e64d,0xd83f2283,0xd32e732e,0xe938b0cb,
38613         0x3c3cb249,0xf4c464c5,0xf89ea6ac,0x9750a5f8,0x346cfc32,0x467e5bbf,
38614         0x37b2b809,0xc9bfab9d,0x3b339c6d,0xf8eb7453,0x3b766dee,0x3fe01fbe,
38615         0xef6aea27,0xb3154254,0x7be61b10,0x555c3df2,0xdd818488,0x70fb6d81,
38616         0xbbe714f9,0xda1af3a4,0x9d18f693,0x575f2017,0x2465b839,0xdc08fc6b,
38617         0x6b84a951,0x874ecf33 },
38618       { 0xbbb3f6be,0x624af83e,0x08bb423d,0xf578fbb9,0xd7873527,0x5623b0ba,
38619         0xa62e0442,0xc3659bd8,0xfe236f79,0x2903b167,0xe53f26a6,0x55a430c6,
38620         0x3ad712cf,0x222547ae,0x76eb272b,0xb73890d7,0x3d628df9,0x95b4f70b,
38621         0x53eae4ac,0x9f0e13b0,0xe7f2174e,0x5b4f5138,0x98dbae17,0x75482cf9,
38622         0x44518480,0x2b69bbde,0xcafef15c,0x4f279652,0xb6bcaf19,0xa0a3ef2b,
38623         0xce4c634f,0x31fb8581 } },
38624     /* 147 */
38625     { { 0x615cd607,0x398306d1,0xaa32c3a6,0x680c9faa,0x7779131d,0xe87a705b,
38626         0x36708b00,0x1031013a,0x9445297f,0x814fa0e1,0xa6a79b56,0x70c5583a,
38627         0x4b16bed4,0x03039cbf,0xaaaaf8d3,0x18a7ca8d,0x5cdb68a5,0xf33159e7,
38628         0xd23814fa,0xdea0e738,0x8d0f4f9f,0xeb352718,0xdcdff032,0xb0b76609,
38629         0x3d48338b,0x65ba8ea9,0x55dd507a,0x18044d82,0x4a4a50b4,0x844a223e,
38630         0x18e19e54,0x98323000 },
38631       { 0x57f3d5a6,0x28a21027,0x6e8cadcd,0xffce5648,0x02551f3b,0x9590381b,
38632         0x935ebdf1,0xb26cc64f,0xc083aa6e,0x60611291,0x88e4cf41,0xcd988a66,
38633         0xdd53b1b5,0x581c3f73,0x77fc621d,0x78c804a9,0xfadca2fa,0x31874330,
38634         0xc83ccf02,0xf7008da4,0xa79a4707,0xc4122a1d,0x4a915eb5,0x9a8e0d3f,
38635         0xd0123660,0xa2de157d,0x65ead2a0,0x45ef43b2,0x188db285,0xd0a22ade,
38636         0x922e0caa,0x8abbe39e } },
38637     /* 148 */
38638     { { 0x3a2d2f01,0xb4446905,0x5dc6685c,0xd27c3193,0x1d74a027,0x6a908bbf,
38639         0x5b50ec1d,0x01da350f,0x3f3c2e26,0x1d3dd45e,0xb836ee92,0xf66e11d0,
38640         0x474b979c,0x7e03908f,0x98b87834,0x19e7c5b9,0xbd3d1de9,0xa741d3fe,
38641         0x1ef6059b,0x63c68e8d,0x3674e247,0x9b9ff939,0x3e7e67f6,0x1d7d53e7,
38642         0xaee9e248,0x698dc326,0xb3bd984c,0x52f23eda,0x6f8fe8a7,0xf95e31b0,
38643         0xc3d0ba95,0x0f15b4d0 },
38644       { 0x790a8d85,0x8f2f6635,0xe2595af1,0x51bffbae,0x24b51287,0xd15b7ec6,
38645         0x3234715d,0x7639b6ab,0x2bc5441d,0x0cdd5299,0xf6d05833,0x54800ea4,
38646         0xf6d6e360,0x21efd752,0x19290613,0xc0b7ffe5,0xeea898cd,0xb68a5825,
38647         0x22982266,0xecedba92,0xbbd06bb2,0x678a91b0,0x4bb6b0cb,0xb2436dc0,
38648         0xcaf8ea98,0xcf7a99e7,0x71aa05bb,0xb92d0e6e,0xf5993eb1,0xbf8d0471,
38649         0x20385ddb,0x515db378 } },
38650     /* 149 */
38651     { { 0x6f5bef22,0xee43eaaa,0x20348712,0x952d2698,0x7a3af6c6,0x1e4c484e,
38652         0x9a8c9403,0x18d434c6,0x5001899a,0x63e5d741,0xfe8ea40c,0x5238dbbc,
38653         0x96798721,0xca6cc8d2,0x04acbde8,0x73db6aee,0xb7f993ce,0xbf69328d,
38654         0xad45e334,0xa3f79bbf,0x7c1f1630,0x8c51ec93,0x9b00a6de,0x4907325f,
38655         0x12d82bc3,0x49e6acb4,0x0ec59fc9,0x5901b36d,0x9cf34e3b,0xcb09b710,
38656         0x1abf4c02,0x2de0487e },
38657       { 0x8dd9d484,0x18b722f3,0x7c77bacc,0x83349393,0x93d92b8a,0x58dbb8f1,
38658         0x8e3fac25,0x80d78d50,0x745f4a7d,0xf0500981,0x877cc29d,0xd072bfed,
38659         0xc30a89f8,0x67abf8f2,0x9a0820d7,0x92c567ea,0x8a3a5738,0x425ab12e,
38660         0xf055521b,0xc162faeb,0xb94ea5e9,0xee1c4f26,0x3d71e546,0x1e414994,
38661         0x43e8be1d,0x258183b8,0xef9eae0b,0x44917c82,0x73874a30,0x6813a457,
38662         0xcc42f86e,0x6f6ac071 } },
38663     /* 150 */
38664     { { 0x4dd6e3b1,0xd38822ad,0xad620869,0xfc78e1cc,0x2cacde80,0xe7843845,
38665         0xa8469fe3,0x121cc14a,0xe67e8ef2,0x8e8f3da7,0x4d347448,0xdb83d16e,
38666         0x798631f4,0x3ba1dd98,0x0a4c4c17,0xdfab5977,0x3edc701f,0x1f0a1306,
38667         0x6cd8ff28,0x4649d601,0xbcc55bc9,0x2267230b,0x5760412a,0x02a19c60,
38668         0x328faef6,0xc719d5f1,0xf67eaad9,0x27cb969e,0x719bafb5,0xf342530e,
38669         0xff5a82cb,0x6e2c24cc },
38670       { 0xadaf8793,0x6313024b,0x035c948e,0x944bccf1,0x953500bf,0xe9a066b7,
38671         0x1d116765,0x7991a946,0x9fd93c78,0x95addb2e,0xe92e5495,0x05d2c037,
38672         0x9f03e5cf,0xcb145b18,0x95aa1f72,0x81ae48ca,0x135a6e4f,0x203f2702,
38673         0x49b2a7d5,0x2bcef5a2,0x02d7f2a3,0x0687a900,0x6c6745b0,0x2f7d3228,
38674         0x86507305,0x3da8a875,0x2e8dc58f,0xbe38b884,0xdbf11185,0x6b48bf34,
38675         0x97c08f91,0x5af7fd0d } },
38676     /* 151 */
38677     { { 0xf4a224a5,0x55f9b950,0xcc50273a,0x41904574,0x643f1fd5,0x34f81330,
38678         0x0e50f783,0x996801bb,0x89581712,0x866d7403,0xa4091d36,0xdb9a405d,
38679         0x16a46fe7,0xf1e379df,0x83bf9168,0x8d04a93f,0x32b20bca,0xae4c8335,
38680         0xf72a1c10,0x99d334b1,0xd8195db4,0x8fbc9977,0xfba14b5d,0xcaeb3dff,
38681         0x76daf476,0x60fef022,0xdb5b72f4,0x4b948dfe,0xb6dfb062,0x5185c925,
38682         0x9609d4ae,0x27a9c381 },
38683       { 0xf12a93af,0x73c37346,0x5536634d,0x028b707c,0x498193d1,0x8efa58d5,
38684         0xef21b69d,0x4f83a5cc,0xa788a0e2,0x05cbb0a3,0x65b13c98,0x01031781,
38685         0x2b73784c,0xfea20e58,0xe50361f2,0xdf9713a0,0xd0cc22d9,0x31449a0f,
38686         0x7c5e2e1b,0x183752e7,0xb67044cf,0x6e44d6bd,0x733e177a,0x012dde95,
38687         0x08ee2c23,0x68b49669,0x1f5f1949,0xd9bb0541,0x6acd886f,0x95182c71,
38688         0xfbde9244,0x1c690694 } },
38689     /* 152 */
38690     { { 0x3a880026,0x5db67d17,0x125d95f2,0x89c4f0a0,0x3f6cb7a4,0x29050551,
38691         0x5cbbdca5,0x3eb231d1,0x972bcbd3,0xf8cffc99,0xad55a03a,0xcb4ef4d4,
38692         0x22867c2f,0x944d47ca,0x0ead1aa5,0x96d88548,0xcbc8b045,0x76a57cf8,
38693         0x005e55a0,0xdfe5844b,0x1d18a097,0x5e9e7e19,0x52923c74,0x957a26e8,
38694         0x7f5db339,0xd0867b79,0x63bed0c8,0x2553408e,0x689ad23c,0x1596e5d5,
38695         0xa504c339,0x7b8c13d6 },
38696       { 0x52fb6901,0x2fc43aad,0x16ca253b,0x1c0313f9,0x515aadc6,0x1475830a,
38697         0x7f577dc2,0xc93d1926,0xf723c0dd,0x26e52e8e,0x3eb9f6da,0x2f1e0eb8,
38698         0xf180376d,0x9979de82,0xb0834939,0x43e28ecb,0xa39c38e7,0x9a2d51dc,
38699         0xa8e3f6b5,0x6e6063a9,0x4b9b3270,0x4cf1da3a,0xd2f8915d,0x6e5348a2,
38700         0x50507912,0x5e75e3e0,0x20d383fa,0xaeffce57,0x8fd2fb29,0x1d6d53cc,
38701         0x696f4cd0,0x0e3c3ef6 } },
38702     /* 153 */
38703     { { 0x21ee1d83,0x3bc337c1,0x787b7788,0x97e08f6d,0x138fa4ce,0xbf709fcc,
38704         0xa0348e58,0xbaf77647,0xa55e672d,0x04f8babc,0x7d5ec5dd,0x0ed2919d,
38705         0x33e99218,0x8ce64bff,0x24b059af,0xac09fc57,0xdc5e32ba,0x506831f9,
38706         0x465af6a9,0x26a22677,0xc97f1ff8,0x3c5efe66,0xbc6087fd,0x1515e0d6,
38707         0xaa8edc6b,0xb1a39c5e,0x0e79ed29,0x3dd816bb,0xbc3788b8,0x6cc13769,
38708         0xc092a51c,0x463098e3 },
38709       { 0xc8bd0fa7,0x3a6408c7,0xce6bde49,0xd1764311,0x283ef7be,0xe315e108,
38710         0x99b5d938,0x8213cc77,0x45a49a6b,0xaf7f1581,0xe529e4d1,0xd00fdb0f,
38711         0xce66c9d6,0x55d38f77,0x1bd4b952,0xb4f7ccc0,0xaf71f986,0x8d975b49,
38712         0xcd64d00a,0x12b59fcb,0xa5a3bad7,0x1860e504,0x2b5c89f1,0x6d976044,
38713         0x7a3e231f,0xfed0c659,0x178cba92,0x58114c33,0x6698e11e,0xe2e74c06,
38714         0xa348b85a,0x7f8fd093 } },
38715     /* 154 */
38716     { { 0xc19428af,0xf24592ca,0x3a308665,0x192a1c81,0xe30bbd7f,0x42589812,
38717         0x836c6bb9,0x10db0723,0x598e4987,0x9c7a41e9,0x6ead6f4b,0x8aff179e,
38718         0x75862c44,0x70f8f9b9,0x6f21983e,0x6b3b0237,0x98e65152,0x25d83e9b,
38719         0xd751218a,0x3b2d26a8,0x9d6f1da6,0x9508281a,0xa5a81f74,0x8df78d05,
38720         0xe4687471,0xd79ee559,0x6787d8cc,0x2060ca57,0xa8476c95,0x427a84ff,
38721         0xe6435131,0x87b64c51 },
38722       { 0x4b30d3c4,0x87f46f65,0x23b4ef14,0xcdec4c5c,0x63ca4d68,0xb3b74766,
38723         0xcf3fb56d,0x1df34269,0x0fd7d46a,0xd4f139c4,0x6a69a8bd,0xa3b7c7c7,
38724         0xcbadd7d2,0xee56b4c9,0xac942334,0xb28ff342,0x786f1da3,0x0046fdfa,
38725         0xb700c82e,0xce5d149c,0x50966597,0xca30ef81,0xfcff4bdd,0x44a20609,
38726         0x44925268,0x0f2f65e7,0xd4021f38,0xe5b6552c,0x042dbbd0,0x77ea9c2a,
38727         0xd9c062f5,0x8c95267c } },
38728     /* 155 */
38729     { { 0x5fc1abb1,0x6655032e,0x12fe4743,0x2215af54,0x29f05ef5,0xfd657560,
38730         0xdc191be9,0xb0e73325,0xc08639b0,0x7ab3c65e,0x1c3e6673,0x67507f51,
38731         0xc8615555,0x638befc3,0x42f0c4ad,0x5d0188cf,0xd896186d,0x843a301c,
38732         0xb2c6741e,0x045603f7,0xfa3cd1d0,0xf7545c0c,0x4a40672e,0xf612affd,
38733         0x45b9e8dd,0x56197c9f,0x87922d74,0xb453237d,0x4b2d59bf,0xbf132e3a,
38734         0xb84a6a16,0x8afa1b73 },
38735       { 0xe793ac70,0x6b3596ea,0xeef6dd10,0x4c94ef8e,0x70422e40,0x926b4fa2,
38736         0xe9e5d763,0xc8c71dce,0xf512aadf,0x352fcb70,0xa883975f,0x1b7ba138,
38737         0x058c3b13,0x57991390,0x97740fd1,0x9692092a,0x160b0697,0x19ad945b,
38738         0x10837ab2,0xbc634388,0xf174bb71,0x76ee11c4,0xab1b80eb,0x6111bfc1,
38739         0x70ec458a,0xbc82bac8,0x312d3325,0xeee60127,0xb240adc8,0xb4118b1a,
38740         0x2b5a093c,0x67211191 } },
38741     /* 156 */
38742     { { 0xf55cf9bf,0x91e99306,0xa46b96d9,0x9b045308,0x9e7a65df,0xae3c1e1d,
38743         0xc731bcbb,0x453cb151,0xa4d58a61,0x14be5227,0x97c74cc2,0x39dac922,
38744         0x822e00d6,0x4d0f7a45,0xc62b03df,0xafeb1d51,0xbaa18b2d,0xbb1dc3a4,
38745         0xdf2b74f0,0x7f3c7178,0x896b6a33,0xfcd328a6,0x1dce055f,0xe95ed454,
38746         0x6a4e2b87,0x97fbc76b,0xfa59dce9,0xe5ec67f1,0xcc0367c1,0x052368ac,
38747         0x54e4a3fe,0x7c863916 },
38748       { 0xca7388cf,0x55e94b5e,0xc0335d38,0x17cc0a60,0x616f85ba,0x9b69b78b,
38749         0x10122980,0x705d02ef,0x1cfd0a79,0x565a6e80,0x7d1ee352,0xeb74a96d,
38750         0x427b9dad,0x5c8832ed,0xe6d5330f,0x96ea8528,0x18d24ee8,0x30d8862b,
38751         0x9ff939f7,0x9cd38ed5,0x01060252,0x690fc9a2,0x2303b3ff,0xc62d88b8,
38752         0xdd52b469,0xfc42d7a4,0x8cad2d93,0x06f8dfa2,0x60920438,0x50236090,
38753         0xfce855ad,0x32582758 } },
38754     /* 157 */
38755     { { 0x359e8c60,0xeb20e45f,0x364ca186,0xc71bb8a5,0xdff8e110,0x02b15071,
38756         0x4c93e578,0x074e91d3,0xb829d0d8,0xc0326e00,0x626a83fa,0x3c192258,
38757         0xfb29a09e,0x387a64d5,0xe5ac5c82,0xcaaa3d34,0xada2da29,0x8ed685e5,
38758         0xeb29650e,0x92720267,0x763802f3,0xf7184b19,0xdf6b1aea,0x23f5dd0e,
38759         0x25e6125d,0xbe1fa347,0x0c872a1a,0xd6287f9d,0xac57c3af,0x49aa93d2,
38760         0x5bda7656,0x1a4e6a71 },
38761       { 0x554d1267,0x1a126ede,0x1cd02b48,0x37f94533,0xce31fb1d,0xd70af04c,
38762         0x097dc012,0xcf410b0b,0x36c7b6c5,0x930e1d17,0xc6891085,0x902fee41,
38763         0x79fb638f,0x349ba4a7,0xacd6f8df,0xa16c5821,0x2e076ace,0xfb3b83c1,
38764         0xe501d14d,0x6b8d033b,0x20f2d2da,0x0593d452,0x99df1880,0x3752526d,
38765         0x9feb33a6,0xca32351c,0x1f6ef456,0xd91343bc,0x35b9dc8a,0xc74857db,
38766         0x85b4e832,0x856a7c93 } },
38767     /* 158 */
38768     { { 0x0d0a5583,0xa007d002,0xeda4658a,0x2f1301dd,0x34d939be,0x91c07964,
38769         0xa70c0836,0xa0cb6780,0xbe81e540,0xc0b4df95,0x5d4ac8b8,0x6cbbcd34,
38770         0x54756239,0x57c52ed0,0x1805ceb6,0xcac2dca4,0x79344255,0x915ee6ab,
38771         0x24c9a2a6,0x366def31,0x8c12c674,0xbd3b962f,0x7dbb7c3b,0xaab64f1b,
38772         0xe22bb95b,0x3c0e4553,0xc4c63b74,0x2408feba,0x2a4da631,0x3ca77312,
38773         0xc636da40,0x62889084 },
38774       { 0x8cb8d208,0xa457fd53,0x543f06d4,0x7a8f8009,0xf2eff2ab,0xb66de154,
38775         0xf72517e7,0xfddb28eb,0xf9389d2c,0x0149fe66,0xd85b88ce,0x79e8773f,
38776         0x0ba543f7,0x452e090b,0xb0b03fc0,0xdeb9b5cf,0x6c5ed77b,0x3113448a,
38777         0x8ffc0372,0x3609f3cf,0x5c1b4c4a,0x2bc9c46d,0x8fa59be9,0xe66f3bf3,
38778         0xcdb02691,0x1396bf5f,0x009f88f9,0xf1ec59d4,0x2ad9dfe3,0xc2903456,
38779         0x5ada4d58,0x79d8122c } },
38780     /* 159 */
38781     { { 0xaa529507,0x14d4e4ce,0x74655d00,0x056a0814,0x4f0fc474,0xc0d30a38,
38782         0x3443cb8e,0x8a8203ea,0x97f1728d,0x33c62fb0,0xb520ef52,0x8a38dcfd,
38783         0x7cac9d3e,0xa0f90d5d,0x873cea50,0x28a7b0bf,0x6c6c41cb,0xd115ae3a,
38784         0xa13812c1,0xa35171da,0x624d507e,0x25d4bba5,0x7e98f42f,0x91dad289,
38785         0x96a41371,0xffd6b1e9,0xb69e5b77,0xd46c2125,0x20c4f707,0xc7d2b424,
38786         0x8142557a,0x2ab3af95 },
38787       { 0x6a5372a6,0x86ca074c,0x56292ba7,0x728fb83e,0x77741cf5,0x745596dc,
38788         0x520ef49d,0x70b4cea1,0x61e46472,0x1472fe34,0x3fb8ac5d,0xf4d6bd66,
38789         0xc10bc071,0x46e52cc9,0x371a3461,0x28794efe,0x276fe877,0xa4850718,
38790         0x9bef5ab4,0xedad5773,0x3f15c815,0x24c2d9ff,0x8f8395c3,0x188950e5,
38791         0x80b6a855,0xbae40996,0x8a8803e1,0x4f53e22c,0x039d25ee,0xaf233f61,
38792         0x250409ca,0x07db2c35 } },
38793     /* 160 */
38794     { { 0x037d4703,0xc7f3b8db,0xc5f488b9,0xe83708df,0x8471d402,0x1fba830f,
38795         0x5a2faae9,0xa55ee8d2,0x5404fc1e,0xc2e5bf10,0xaa2d5651,0x647d5027,
38796         0x7ebaf5f9,0x37a53c0c,0x95b30abf,0x7adf0bb2,0xd64c93ba,0x5a62e1fe,
38797         0xe2ef4a78,0x7ffc18c0,0x4d2cd04f,0x139dd9d9,0x5ea0af02,0x253fbab7,
38798         0x0fef9acf,0x7c8100ea,0xc8615aa7,0x74c5384d,0x9fe52069,0xcb28682d,
38799         0xcf7dd759,0x08b6ca8f },
38800       { 0x036c3b5a,0xe04e5bea,0x7f9f2b4b,0x38726102,0x29797c0f,0xa9fca570,
38801         0x82879ea3,0x1656180b,0x607f0ddf,0x153389bb,0x67b0e087,0x99a1223c,
38802         0x9d897fc7,0x0d1808ec,0x916edf19,0x9470711a,0x07217118,0xf8f52f2b,
38803         0xd18888b6,0x5d8b29ff,0x4cc6f900,0xef1e22c5,0xeb24877f,0xc4036165,
38804         0x35479525,0xfda95233,0x6861468a,0xd622a421,0x74faba08,0x5d043b07,
38805         0x0d31a7d2,0x2c337b02 } },
38806     /* 161 */
38807     { { 0xea22fa65,0x7b2305bc,0xd159f63a,0xbe183ef4,0x3f35923f,0x3473d87d,
38808         0xc11d7753,0xb27fb306,0x2a054cff,0x702e7e6b,0xaf185619,0x3ce9f97c,
38809         0x4e7d51c5,0x83550243,0xf356ac5b,0xa63e3d82,0xd7645131,0x867b7caa,
38810         0xa671fc9d,0xee85e6af,0x2b07cd77,0x3b985ede,0xffda5193,0x07d598b0,
38811         0xa942dc36,0xb10eca39,0x506218a9,0x17f3dcee,0x06b7d5ca,0x3d94e8d1,
38812         0xed8831c9,0x509b2634 },
38813       { 0x48caed54,0xb1b9414e,0xcbf51e97,0x77a78c6c,0x4de9b258,0xa4688c8d,
38814         0x91ee3d78,0x0024137c,0xe30ee64c,0xa68f9234,0x88190d78,0x573255bc,
38815         0xba80690b,0x41e8e05f,0xec354f4c,0x50038d84,0xdfa52816,0xb18f02d6,
38816         0xccb63fda,0xc47f9007,0xe98ae455,0x29d480fb,0x5d0e319d,0x4ac45d22,
38817         0x026db719,0xd06f3575,0x2c3587b9,0x733b9e20,0x2c317727,0x22483992,
38818         0x54bb8752,0x1592d5a7 } },
38819     /* 162 */
38820     { { 0xcf7453f0,0x5778d9a2,0xed83c1f0,0xaffb899a,0xe0a82ba7,0xae6506d3,
38821         0xea3d5081,0x32c84e1a,0x810aa38b,0x9ad528c0,0xbd37d041,0xb1fdb020,
38822         0xd06ce41f,0x78d6cbe1,0x2e74b7f6,0xd287f0f0,0xc43bb022,0xf5cd2575,
38823         0xf81a71b3,0x6d28f2f3,0xc633e7f4,0xe65bb1f5,0xc4fc580e,0x32e5fc1c,
38824         0xbb7b07a5,0xcd55539f,0xc3caaf3a,0xb5a94471,0x4cc22d2d,0xb958bdf4,
38825         0x77a2777c,0x1614bdbd },
38826       { 0xed0ab04d,0x4c1f0230,0x6e2082ea,0xae347b00,0xc42c5b5f,0x9f10bc63,
38827         0xde019935,0xb0539e6f,0x65dd0825,0xd89bd4e7,0xbbceda16,0x92260fef,
38828         0xe62aca32,0x8aaa755c,0x5ec82c5f,0xed762fa9,0x18650768,0x99e64c01,
38829         0xc92e348c,0x57dd6245,0x31ea6d68,0x0db88a77,0x07b44736,0xef0012ab,
38830         0x171d70fe,0xb9356b94,0x03f891b0,0xe68b0628,0xb79c20a2,0x3a54a53a,
38831         0xb00b0728,0x489656c7 } },
38832     /* 163 */
38833     { { 0x71353c25,0xe43649ba,0x13f67e24,0x517f27a1,0x1c1eb9e3,0x10bd333a,
38834         0x78e29bf9,0x94e1c05c,0x4743f15d,0x84fe7d97,0x90da2df0,0x9c874908,
38835         0x53673be1,0x82403fa7,0x1baea1b1,0x7ebf5db4,0x24180ead,0xcfe0ae35,
38836         0xc2f50c3f,0x1d15873f,0x70661cd9,0x16851ad6,0xa51e8c2c,0x802968d9,
38837         0xe0161099,0xe7d1a9cd,0xa8a7ea56,0x2b153c89,0x06e3c498,0x6d41b789,
38838         0xd6769dcb,0x082bb2e9 },
38839       { 0xc4d6615f,0x6180ef46,0x01b9829c,0xfc629dc1,0x0fb264ca,0xde222ec0,
38840         0x10ecc2c4,0xc5457e06,0x1eea2c4d,0x95ce599f,0x8f9c5b2c,0x0433fa72,
38841         0xcd6310f9,0xee035462,0xce2e2253,0x84c57c3b,0x96d87e44,0x6c8ec31a,
38842         0xa452c5a7,0x30bfe393,0xa047b235,0xc592b140,0xc018545e,0x7bd8be18,
38843         0x5c178c46,0x794e0107,0x2e23005b,0x48471946,0x622a54f3,0x2665e237,
38844         0x901c9042,0x36451a46 } },
38845     /* 164 */
38846     { { 0x19893e71,0x17802d18,0x539a2082,0xa1765d8b,0x2302ecfc,0xfc6aea01,
38847         0x365bf59d,0x8d4cf51b,0x0d232a80,0x87741d72,0x18e80427,0xac343eb3,
38848         0xe74739ec,0x553ecb2f,0x1a8b07ca,0xaeca79a8,0x56f4ab3a,0x089ff322,
38849         0x3fa1d1f7,0x5e95d729,0xf62a9a16,0x260569ae,0xaa08ddc2,0x5e776232,
38850         0x1b7bb54a,0x93fabec3,0x743d56e7,0x48a20956,0xeb0ebeff,0x749cdb12,
38851         0x69b8fcf1,0x705307a4 },
38852       { 0xe488310b,0x7a8e4c04,0x5325cd7b,0x12726e32,0x4983efac,0x5d0fd8b0,
38853         0x02ddb913,0x796e552c,0x77b9685c,0x0eeca3f7,0xb15f24a3,0x9b766e89,
38854         0x48efc979,0x7c2736d6,0xa8021c6c,0x3d619685,0xa0b2f1ea,0xfe33e278,
38855         0xb676d6b0,0x95c69879,0x1af4e0be,0xa0747319,0x36c4ee55,0xa2fab5f1,
38856         0x59e5f3b9,0x6938b8ff,0x39cafe6e,0x1e114da4,0x6a6ad120,0xc9595ec3,
38857         0x57e62aec,0x80f79bd0 } },
38858     /* 165 */
38859     { { 0x60af09b3,0x3cef42a7,0x933dfe14,0x3c016ebd,0xed85eaa8,0x720cf1e0,
38860         0xceaa3bc9,0xd4f5e99f,0xb7106f97,0x7216b9d2,0xc9668ad2,0x65f34c36,
38861         0x5b0c651f,0xa8fb82bc,0xf2fda4de,0x20f42f1c,0xd21f659e,0xeb31ab2c,
38862         0xa13d1618,0xb7a776c7,0x38662be5,0xec441022,0xcad08e0b,0xc825da70,
38863         0x022c0180,0x99299079,0x2aef9ffd,0x7623bda0,0xf5c58b50,0xde84f4f3,
38864         0xd824ff19,0x5f5a5da4 },
38865       { 0x7e8311dc,0x5737257e,0x466cf136,0xdef94f51,0xb05ca21a,0xa73e1645,
38866         0x02e4ab37,0x38ea9b3c,0x8579165b,0x7760eac9,0xc24b01a4,0xdffdd047,
38867         0x3fb95584,0x188d4fd1,0x25548bda,0xfaac38b8,0x59e9dcac,0x1a79a6f0,
38868         0x09a2700f,0x983f720f,0xfb8a7e48,0x8cbba554,0x47a1fad5,0x38a19968,
38869         0x5abd6b5e,0x11856547,0xf3716ec2,0x75113d31,0x4212907b,0x1391e781,
38870         0x0dc15889,0x5319c801 } },
38871     /* 166 */
38872     { { 0x6b61c3af,0x2320136e,0x07b4bb68,0x1d40f2de,0x380c97f0,0x651dee7f,
38873         0x6a8c313a,0xa978ba70,0x2011ca10,0x22c587d6,0xab1f445b,0x48bba218,
38874         0xe50444e6,0x8c5eaf07,0x442fccf9,0x5549f02a,0x3d80493d,0x2564746f,
38875         0x79c04591,0x42d24f61,0xabdc8887,0x1600fa18,0xded38f8f,0x5cb8600a,
38876         0x923aeb46,0xa4bf9b90,0x1e1c578a,0xd63fee35,0xebb9ea14,0xf3c9c5ac,
38877         0xf11a4ff0,0x3d13314d },
38878       { 0xb4513d1e,0xe5cc662d,0xd55952bd,0xde78a8c5,0xe7f86d0a,0xe8a37a3f,
38879         0x7a04f0c5,0xca2d12a4,0x2e25d06c,0x4c6696e4,0xb2136071,0x52614698,
38880         0x89f6e1cb,0xf4d2701b,0x80efd95e,0xaafd6177,0xc5bb6907,0xe6d73ac4,
38881         0x420db35a,0x49e874ac,0xf2751fa0,0x11631de4,0xa1fa2edd,0xb29f7336,
38882         0xb7fd794d,0x4c406864,0xe22f92a6,0x73cb21d3,0x2043cc76,0xeae904e6,
38883         0xb322c6ad,0x67f28a9f } },
38884     /* 167 */
38885     { { 0xca148ab5,0x7c17b258,0xb3c60051,0xb9a1976f,0xc8f28df9,0xea260698,
38886         0xe8d45017,0x87b2cc74,0x0578a422,0x37257329,0x17bec732,0x81d5ee25,
38887         0x1d48bbc4,0xd7411fcf,0x487f5cfe,0x46217e6b,0x41eb8e1b,0xcb007ac5,
38888         0xe05a00c8,0xc41c57a6,0xd2f9fa99,0x1f954d2b,0x40941cad,0x370bd5db,
38889         0x3829509d,0xe487879c,0x5ceca5ee,0x4c137552,0xfd3efb9e,0xe8ef7fa4,
38890         0x1bd1bdb2,0x5ff09174 },
38891       { 0x579c6632,0x791912a4,0xb8a20815,0xbb19a44f,0x535639d3,0xf4f97b84,
38892         0xbc3c9bce,0xe57e2bcb,0xf19e6410,0x122b3f2b,0x1357d9ad,0x1f0189da,
38893         0x79e5ff66,0x675573bb,0xef2f3c4c,0x444e5c98,0x04d10731,0xd6f61e20,
38894         0xac75d635,0x0dfa366f,0x2c854f23,0x9fc47c86,0x0ad0850b,0xc04ae43e,
38895         0x2f720c32,0x5ce94f64,0xa753bc9d,0x67efae65,0xb0373a63,0xc27d30d3,
38896         0x29721646,0x6681013a } },
38897     /* 168 */
38898     { { 0xe84509df,0x1385d913,0xcf339376,0xe978bedd,0x3423a148,0x2df425d3,
38899         0xee8cb579,0x43fa0ae3,0x31c4553c,0xf015369d,0xdfbf1d48,0x05cf08bb,
38900         0x9444244a,0xadff4be6,0xa35dda33,0x01635f81,0xe76fab7c,0x085c8949,
38901         0x16737783,0x4bd7fcde,0xa254f8d2,0xfd8cb52c,0x413ec985,0x62168a66,
38902         0x7a9026cc,0xf2db9741,0x50e1e1b7,0x3962ee56,0xd3beffde,0xbee0a346,
38903         0x0bdfab1f,0x3b35b72f },
38904       { 0x535c3749,0xbff8de9f,0x8add9c48,0x23c1f20f,0xc8f8f663,0xa975b37b,
38905         0xe8f3ae49,0x2529e475,0x1d5e2628,0xc32f10d5,0x67862f1d,0x5ac0d297,
38906         0x854cbe36,0x13c79338,0x4b67e462,0x48f004ef,0xe5d10ee1,0xfa37a150,
38907         0xd28288a0,0x4974778d,0xcfb73f4d,0x96830a66,0x07804952,0x9f444013,
38908         0x9760b694,0x8233c709,0x25b75c99,0x8340cca5,0xc771f99c,0x3f62e40b,
38909         0xcd95c685,0x47d0a1eb } },
38910     /* 169 */
38911     { { 0x652811f1,0x266f4fff,0x62ef3002,0xeaacaa93,0x50cba0ca,0x6c387a55,
38912         0x007f5467,0xa350142a,0x202f2673,0xc7fd102a,0x33dc6e65,0x5daee570,
38913         0x064a63d9,0x60682ec3,0x462b251e,0x46cf0bb0,0x5da936e7,0x0e030ca5,
38914         0x434265b5,0xc87a60f2,0x69b4e8f5,0x9637b2bb,0x7ad7770a,0x601fb58c,
38915         0xed3a15a6,0x1f2147f6,0x2995e961,0x05b47d5e,0x83213a16,0xcb0ca9b3,
38916         0x4995a85c,0x8f4b614a },
38917       { 0x4b4eb3c1,0x5aa8ec19,0x20323a70,0x8c549ac4,0x4f6cc6aa,0x00d49322,
38918         0x45f9a5a3,0x0e53b9bb,0x0897abbb,0xe46ef110,0xd7acd7d0,0xfe873e57,
38919         0x0f7cb588,0x7cfccfe5,0xc85557d1,0x0ea53d65,0x7288f2e2,0xfdd9eb44,
38920         0xc0eb68a8,0xab2dedfa,0x08603a0c,0x58221470,0x00feb06c,0x69464689,
38921         0x25e5caac,0x804cf5bf,0x9fc91ae9,0xd8559858,0x73c45eae,0xed9378b1,
38922         0x524c9801,0x8f942d02 } },
38923     /* 170 */
38924     { { 0x8e845808,0x1f1ec302,0xb77abfc5,0xc302bffa,0xf8d97dc7,0x26afd4b9,
38925         0x3aac594b,0x3d3a83c4,0x674d94dc,0xe3b74bd1,0xcaa5911c,0x4464b737,
38926         0x871c2cd2,0x62925773,0x3b4440fe,0x419f2485,0xe052ad7d,0xdda6a0f3,
38927         0x846c86c0,0x645280d6,0xf8324f42,0xa25689fa,0x07cf117a,0xc74ad1e8,
38928         0x8ddc9db7,0x5626dea0,0x966fc85d,0x52620373,0xf3b1eb53,0xe0ad57c3,
38929         0x949c1acb,0x38300252 },
38930       { 0x5e744723,0xa0ef5a40,0x1ae08481,0xdb5bcf75,0xfec1f76f,0xabfad8cc,
38931         0xfab37fc6,0xfba5d831,0xc8fedb78,0xbe39e248,0xad93f310,0xa5cfad5f,
38932         0x913d5c24,0x747fdb1e,0x4518b7f5,0x052a47c9,0x7cfb4327,0x9e208d6c,
38933         0x70e538be,0xb135cb9c,0x5bb17916,0x36352759,0x5b3106c7,0xa2c07880,
38934         0xc209bb06,0xd2d42a06,0xd3c504ad,0xb525b471,0x822ce034,0xc9f4b368,
38935         0xeb4185a5,0x15f18796 } },
38936     /* 171 */
38937     { { 0x0aee4684,0x094dea06,0x7cdbdbc8,0x42b21f06,0xb1931319,0xa439e149,
38938         0x81a7dba6,0xea4bdd41,0x3c2ae80f,0xc6213706,0x12823dc2,0xb58b0967,
38939         0x832611b1,0x7443d515,0x13c20384,0x2e16f831,0x2bd992d2,0x0ce204d6,
38940         0xf419388b,0x499dbcd6,0x1d3778c7,0x492ded1d,0xc5ddae73,0x9d5bd74f,
38941         0x994b6259,0xd4813d52,0x0e86ca68,0x191d9cf6,0xf3e9c2ac,0x562179ea,
38942         0x9fee1238,0x6146f1f3 },
38943       { 0x078e2aa6,0xbd06d33e,0x9dee9265,0x693af7f7,0xdaa40e84,0xd56e0f81,
38944         0x9b9a407e,0x05fbbb88,0xede99519,0xdcf44adc,0x092dba39,0x7f71f8d3,
38945         0x4231774b,0x675b5da5,0xa5f605eb,0x7456a251,0x87a39a9e,0x9031d4af,
38946         0x05b474bd,0xdb430006,0xb665aa91,0xbda5dbf2,0x6631eeb4,0x5d1a3df5,
38947         0x62377c58,0x028149ef,0x685d0bff,0x2e1af4e9,0x82a465de,0xe0ea0875,
38948         0x06bd0050,0x95543f9e } },
38949     /* 172 */
38950     { { 0x85d7c6ef,0xf7cbc6f4,0x63b1bc24,0xcad8084d,0xbf8cba62,0xdf90ce88,
38951         0xb455c192,0x98e4b686,0x774fc6ed,0x6146b8d5,0x7ae20077,0x70e2389e,
38952         0x61c22529,0x5241c479,0x3884e5f5,0x7d221510,0x17e28273,0xd6d20ce2,
38953         0x4f2674f8,0xe3119f51,0x70c011db,0x85459055,0xfcfb760e,0xdfab75d9,
38954         0x9e8c2a19,0x9546362a,0x4a7d4b27,0x4b6d3f8a,0xee5d698c,0xa5c87104,
38955         0x2ba296ff,0x6db43478 },
38956       { 0x5c3f0d95,0x06486493,0x4e748895,0x8917db82,0x6b2f3e44,0xf73fdf62,
38957         0x2b7f574b,0xc60edc54,0xaf732723,0xbe1c09a2,0x7cad114c,0x7d34669d,
38958         0x321aaff9,0x9646600a,0xed0cd61c,0xb94e2bba,0xdec4750e,0x866e1a41,
38959         0xb1a89f58,0xa1be990d,0xf2759693,0xc39e4d6c,0xc0e0dddf,0x11cfb780,
38960         0xd99c8a41,0xf0afcd7f,0x6e1c3050,0xcebffadb,0x96d2c6e4,0x4f3981b0,
38961         0x2ae27a94,0x07a791e7 } },
38962     /* 173 */
38963     { { 0x1e9f0300,0xe70e9047,0xbccdf904,0xe0253ad9,0xff053078,0x51c0289d,
38964         0xae893462,0xf1ef092e,0xa4846845,0x2c90a91a,0xf1dad4b4,0x1946eda0,
38965         0x33df67b2,0xf07650f3,0x0b15a014,0xc6e988db,0xb542f0f9,0x72e0c66e,
38966         0xe0c0378f,0x5d4b6311,0xae86950d,0x548badaa,0xb35f1c8f,0x6801638d,
38967         0x944d1ad4,0x129e3216,0x40471d32,0x9951bac8,0x85e94dde,0x03cc29f3,
38968         0x4543ecac,0x6d6acc2e },
38969       { 0x57b2d299,0xeb999e95,0xe3d721cd,0x3a2bcd9b,0xbb4cb444,0x2e60384f,
38970         0xdc060faa,0xae177709,0x8c987cde,0x74f0e6d3,0x1076fbed,0x9a237cf8,
38971         0x7983fbff,0x69af1513,0x323f9584,0x6c3f7a1d,0x6db64398,0x3e21cacf,
38972         0x96703d92,0x7cd8134f,0xb8393f76,0x0755898f,0x2e825222,0x1b5b28bc,
38973         0x7924aa7c,0xb78799c1,0x81427a8a,0x1db378f2,0xff289492,0xd5a451b1,
38974         0x3d3c46ee,0x79d18212 } },
38975     /* 174 */
38976     { { 0x109d5589,0x1a3edff9,0x029b4499,0xded52eb4,0xb4b54adf,0x13eb9d30,
38977         0xa27bff67,0x4f9214c1,0x67f0f460,0x4c817ee7,0xc3a50e28,0xbadf8d83,
38978         0x94026237,0xc5dc03c9,0x966647c1,0x5f29581b,0x8a0687f3,0x10b6a089,
38979         0x31634517,0xae787cec,0x62e75188,0x2001dba5,0x45e2c3fb,0x55d4e1a7,
38980         0xb67d3395,0xbfcacdeb,0xbc6842ee,0xa1a0af9c,0x3e88580b,0x50590a2b,
38981         0xa784cdc8,0x73104491 },
38982       { 0x2648d676,0x44ca2cdf,0x4f1b12b1,0x9a85eca5,0x2980e1eb,0x1b9dac94,
38983         0x1ac8aa89,0xf30d3709,0xc719e195,0x73072ab7,0x2f703797,0xba518c82,
38984         0xac0067f6,0xac090e14,0x8dcd2927,0x0e6cfc70,0x21e7da63,0x4f5889e2,
38985         0x8371c7c6,0xb4aaa40b,0x8f7878c9,0x1f9dabe2,0xd84caf3f,0xf78aed6b,
38986         0x9e0e1d92,0x3c39dd07,0x122424dc,0x680be5fb,0x0bdc0099,0xf41b214d,
38987         0x5180c54f,0x6a8f8fc9 } },
38988     /* 175 */
38989     { { 0x53235132,0x62a1ed63,0x59dba88b,0x1db233f1,0x291efdd8,0x85625452,
38990         0xb25111ae,0xc7505297,0x1d701bd8,0xb5921af9,0x9774f45d,0xb4d05d72,
38991         0xf18e73ff,0x6e3d4c5e,0x899b3038,0x897d985f,0xc89b1558,0x8a9c30fb,
38992         0x4d13181c,0x3c92d1a3,0x2223320e,0x292e86ba,0x01ceed02,0xcf2454c2,
38993         0x583f309f,0x27a45f74,0xad0fd1a3,0x75a6102c,0xcb9c7538,0xdb4f45d2,
38994         0xdb283fd7,0x4752d8c1 },
38995       { 0xd5dff4d5,0x514d6cea,0x45a827f4,0x74cd5fdb,0x4fc7135e,0x1070a60c,
38996         0x1be5778e,0xdec0bb78,0x58dc6b08,0x271e12cd,0x54bc2496,0xb765089b,
38997         0x619098ac,0x6ddf2c63,0x67528832,0xfd6ebac6,0xc2508af1,0xeaa2d025,
38998         0x4dcfc1f0,0x13c2cda8,0x45510be0,0x1c7836a8,0x1a886801,0x3904688d,
38999         0xafaf2545,0x643132aa,0x2830a88d,0x49685577,0x8744b470,0x569491ca,
39000         0x75fb8552,0x3a6518f3 } },
39001     /* 176 */
39002     { { 0x224042a0,0xaaa8ed50,0x2452f1e6,0x6cb4e3b0,0x768211d8,0xedca5f4c,
39003         0xef4d5d3f,0x4e0fe3f9,0x522d46e5,0x33a8e2a4,0xf1446775,0x5998e21f,
39004         0xf592d01b,0x1496c50e,0x83a67739,0x69104c2f,0x472bbf00,0x28670bcb,
39005         0x503177bd,0x8ea883b2,0x7d2712a2,0xc5d8bc05,0xb439c994,0x41ef9317,
39006         0xdcda1aff,0x9801d3a8,0x7038f6fb,0xd686eeb5,0xfbfbf820,0xe80c5cd0,
39007         0xedc25817,0x540ac363 },
39008       { 0xfe7f43df,0xa71969a9,0x2c1b9e4c,0xe6653808,0x859c2917,0xad9677d8,
39009         0x96aa4404,0xbaca9545,0xff1297da,0x0e9d855f,0x22aea7de,0x1f61897b,
39010         0x36f13f8e,0x96edccfd,0x16e200df,0x627d3070,0xc98988a4,0x729f0736,
39011         0x97f231d2,0x95e25e60,0xf6048752,0xaf7f221b,0x4019b299,0xd6682609,
39012         0x26b4b1d9,0x1d99de09,0x1acdd7a3,0xec47cf66,0x6ebe15e9,0x4de9f2b3,
39013         0xfa16974f,0x17db32ec } },
39014     /* 177 */
39015     { { 0x6cf40599,0x75ef6919,0x00c020ea,0x7ea10dfb,0xfcaaf679,0x3da5ae7b,
39016         0x88ddd678,0x0d663ca3,0x255bcfcd,0x5a21f8fe,0xe344bc7e,0xe9c3f538,
39017         0x548e0632,0x35f62b1d,0x43c6e64d,0x654f2425,0x26993627,0xc755a7a6,
39018         0xb0f41324,0xa3b7c5f7,0x3a2180f3,0x05697f79,0x1e81675b,0x6cf85fb1,
39019         0xe53428f5,0x6d3cdb35,0x52d28b02,0xe3aa1591,0xf7a3fb78,0xa8470255,
39020         0xa194445d,0x460bd01b },
39021       { 0xc24d8077,0xbc34dc23,0x4c720d2c,0x82f4b580,0x6f5d1ffe,0xa29da911,
39022         0x92783ce2,0x578af520,0xb5904af3,0xe29f51ab,0xf7aa1190,0x46c570d7,
39023         0x571bddf0,0x4a522fba,0xae89bb51,0xbf4e2a06,0x59f3444d,0x799b35cc,
39024         0x26cc2557,0xc3028367,0xafcec177,0x94a4e985,0x7c36cbd0,0xadaf7dcb,
39025         0x75d39077,0xed31b787,0x2d3e24bc,0x52d6904f,0x1f95421b,0xc5ca2669,
39026         0x1734878d,0x7d342c3c } },
39027     /* 178 */
39028     { { 0x11fd127f,0xe5cf2c0a,0x119e4c5e,0x66d36bb8,0x6ef56ac3,0x621ab252,
39029         0xe5430675,0x30cfeaee,0xac3e9619,0x2ede27d2,0xf8fce671,0x6413513a,
39030         0x075f4c3d,0x6159c61b,0x59069d98,0xd447efe9,0xea76aea9,0xaf8d6f68,
39031         0x0f5bd164,0xac5dc61b,0x1e88bb98,0xdbab446e,0x1ba92320,0x618b8b16,
39032         0x78989865,0xa0eafb3c,0xc08b7e82,0x0c7abcc2,0x20d160bb,0x10f09b6e,
39033         0x8e4c63a7,0x5be0afa6 },
39034       { 0x1bbbf49c,0x82ab6d38,0x8c0703fe,0x3e09ce49,0xe10f4263,0xeca58b5d,
39035         0xda5a4532,0xd9cc6581,0xf618f7b7,0x07e18876,0x250f7fe7,0x0419a5e3,
39036         0xde6b86be,0xbb1a9e90,0x37359169,0x584a7deb,0x5149db2c,0x38eb3489,
39037         0xb0ebabb8,0x14546a33,0xc2f88a92,0x0067f0b0,0x0a2db019,0xbde0dfe7,
39038         0xc63e6f3e,0xba51b06c,0xe9206fad,0xa19127b9,0xfe80dc0a,0xe4eb5e87,
39039         0xd4de30ae,0x1e6fccf5 } },
39040     /* 179 */
39041     { { 0xaa8ac924,0xb57dff66,0xc298b3e8,0x06e9ad31,0x65fb080c,0xd140e329,
39042         0x1d95c93f,0x7dab211d,0x8a180caa,0x6d68d842,0xa20ded69,0x1a929408,
39043         0x38df461f,0xa8151753,0x60eae932,0xff5604ae,0x7dae4c0b,0x901b9e49,
39044         0xde262e89,0x4573a97f,0xf1084983,0xed69d9a4,0x64724f1d,0x8ffa022f,
39045         0xea85a15f,0xd5f1c2e4,0x01453794,0x4c626ce9,0xbf0907dd,0x80440cd6,
39046         0x5ddaa837,0x4522d461 },
39047       { 0xebfbe7c5,0x8895f079,0x84ef3446,0x30ea1ded,0xd4a1ab96,0x716a9eb6,
39048         0x50a30c68,0x1a4a5d22,0x0043bbaa,0x5a16631c,0x5010e5f5,0xbd107502,
39049         0x3d8c0556,0xbffe3e9d,0x07772419,0x31b30b18,0x84b82297,0x90ff7ef0,
39050         0xf21a18c3,0x00c37d75,0x565bb8f8,0x18d0a635,0x45e3bceb,0xbac1da2a,
39051         0x23f0b08d,0x1c38e90c,0x5fbc5ac5,0xf1ba1aa2,0xdda71fc6,0x09d5256b,
39052         0x6d7e40ba,0x346501a9 } },
39053     /* 180 */
39054     { { 0xcc2b0f1d,0x86be448c,0xac4c3703,0xe3eb45c9,0x9fc96bbf,0x5387f65d,
39055         0x5ae27fda,0xcef3c4e9,0x1bc18089,0xa008f776,0x22ca18a1,0xf374a084,
39056         0x53b73371,0xee882842,0x7cc09354,0xcb6fc6d8,0x61496d6b,0x8489ec1b,
39057         0x49e325c4,0xa92c29b9,0x7bdec166,0x15c6ca52,0xdcea2813,0x95444eee,
39058         0x3a21154f,0x34683eb3,0xd39061cf,0x8fb26f98,0x06c940bb,0xc3b08aa8,
39059         0xe554c96d,0x7c1d42cf },
39060       { 0xdc110aa7,0x766e703f,0xf362e378,0xab7b79d7,0x5aadca3c,0xd259c75d,
39061         0x60be3373,0x2a6eca79,0x06c4e8ff,0xf4744a4b,0xf3b705bf,0xb2842cce,
39062         0xae304b53,0x1a3af5aa,0x1b2d31b8,0x7bbfa201,0x4bee88d9,0xc4ba6eba,
39063         0x565cb839,0x2d3565ce,0xdaf7ece8,0x24808696,0xe6959745,0x2c7ccce7,
39064         0xe94f9837,0xefd6eb3c,0x3811a326,0x0a33b4cf,0xfffa93a6,0x14203f43,
39065         0x73c31d90,0x031e9828 } },
39066     /* 181 */
39067     { { 0x765a17ff,0x4fefecfc,0xd1290a65,0xa09f3888,0x938da038,0xbf265c46,
39068         0xa169ad46,0x4bb6145d,0x23a62fe8,0x33cf8214,0xabc860a5,0x562df571,
39069         0x815c38c4,0xbf2a90fa,0x17eda875,0x45ba1d6e,0x946fa5e1,0x799d881a,
39070         0xb90f5a3b,0x6c1be784,0xb10ff52a,0x0910a37c,0xa4f4fd36,0xc38c1fe4,
39071         0x8e2d3ba0,0xc3180fc5,0xb17a6187,0x3e2ff050,0x943a35c2,0x3a00059b,
39072         0xa28cc51c,0x494d3645 },
39073       { 0x4ba021f8,0x398426b6,0x796deb6c,0xd14c9083,0x7e36c762,0x6d2e5395,
39074         0x751cf216,0x8f556eca,0x19b24a19,0xdaca1e00,0x4b20c2ae,0x47887da4,
39075         0xff41a733,0x93ed4ccd,0x5c7c0cd7,0x8d717c44,0x91bf7009,0xcc48634a,
39076         0x3b59bbaf,0xa1f146f9,0xe5624f15,0xdd38bb39,0x303f8443,0x96d41aad,
39077         0x4bf104fc,0x6b670f03,0x29706582,0x0503f9ed,0xb34200f5,0x768e1f47,
39078         0xbbd4c6f3,0x3cfdcc5e } },
39079     /* 182 */
39080     { { 0xb523e13d,0x536c2a86,0x2920d0a0,0x1014a458,0xe7571296,0x3d52b478,
39081         0x7eb51bea,0x05746066,0x87b0e919,0x709f7861,0x686888e8,0x028aed88,
39082         0xd94afcd4,0x79a809d7,0xe2129af3,0x50c6032f,0x983c4082,0x75e4be72,
39083         0x7ab3be8e,0x98331bbb,0xb618c728,0xd31a032c,0x3f59c4a4,0x36dd85a1,
39084         0xed4f61e2,0xdbece345,0x1e571715,0xba7aaccd,0x64a1ebd7,0x138c58da,
39085         0x3d1aeea1,0x89296d0f },
39086       { 0xcca82c97,0xb165288f,0x1427e8dc,0x26c6c12d,0x4c3edda9,0x66a94f07,
39087         0xeaa01ebe,0x94600e1e,0x30f5e86d,0x14abce7c,0xcb456a31,0x741d7020,
39088         0x279f42c2,0xab05aa13,0xd4238468,0x70b60faf,0x318d39e6,0xa18efec1,
39089         0x8920b318,0xeb07f1ac,0xd8399e03,0x01e3cba8,0x3c81a301,0x65f8932e,
39090         0xccc667d8,0xae8bca7d,0xa268607c,0xcee1ae79,0xcac0a12c,0x3182e64c,
39091         0x2b1a4c54,0x9233a2f7 } },
39092     /* 183 */
39093     { { 0x0acbee17,0x717e8df6,0x5c24fcdc,0x0f0959c2,0xe54ffcb0,0x46f09887,
39094         0xd285116b,0xb993deca,0xbba1fa51,0x0bfaa4f8,0xd0f2183e,0x9c9249ef,
39095         0x96847779,0xf93cb358,0x2322d421,0x284bfb7f,0xd42af009,0x40cc709a,
39096         0x9bb1d615,0xc69f2274,0x717c3c6a,0x76f50b3a,0xbb9c5eeb,0x8b21e985,
39097         0xa4783b5f,0x58fb19ae,0x52e1c3e7,0x04c86b9b,0xf2971ac8,0xaca59092,
39098         0x21ed8291,0x2bb26a69 },
39099       { 0x15f81416,0x98a34435,0xaaff5bb4,0x086e72e7,0x0317261c,0x3d1f64de,
39100         0x5c0a1cfe,0x31c0786c,0xb3683401,0x542ea4d8,0x1a39b4cd,0x2f77273a,
39101         0xcbef27f1,0x14fe7ee1,0x16bb27dc,0xee7fc09e,0x410e5dc7,0xc0dccc17,
39102         0x1943b3dd,0xa3466742,0x3f31c1b7,0x92934b60,0xc22c1070,0x0186ded9,
39103         0x799f966b,0xa37ee8ba,0x249b0893,0x0f3bfcb4,0x2e92d4de,0xbae61447,
39104         0xe196eb08,0x937cb3f8 } },
39105     /* 184 */
39106     { { 0x16fbfdce,0x57c0e77c,0xc98d4cc0,0xea034cc9,0x42572d20,0xe7606d72,
39107         0x0019a83c,0x9861b55c,0xf1597162,0x80ba2803,0x05a0fd7b,0x0f4141dd,
39108         0x4b0daaa2,0x8865913b,0xaa3848ec,0xe6685746,0x3e0485d2,0x16d15a5a,
39109         0x3b6905dd,0x81c0c774,0x818af2ba,0xcec31b7d,0xd2b74b78,0x80d8f194,
39110         0x543e2f28,0xca659db2,0x9fb07c1c,0x31b83a7d,0x1f1048c0,0x86537fdc,
39111         0x78586a11,0x4d57bb07 },
39112       { 0x53b396b6,0xbc4b768a,0x93b51dac,0xbc8b24c4,0xa30ae1b3,0x33e511eb,
39113         0x945147c5,0x893bbd95,0x179fe3ce,0x6cc86031,0x3f920bd4,0x34b0a167,
39114         0x6b256160,0xb32912eb,0x9d168d83,0xbc69a2a4,0xef0dd128,0xb4949e7a,
39115         0x872699e1,0x2613419a,0xbf21376b,0x06c58477,0xa4f97147,0xe55b1909,
39116         0x7b9b745f,0x63d6eb75,0x08df3c85,0xb5365b29,0x55fcfae3,0x0e257e43,
39117         0x979f2aa8,0x1067c118 } },
39118     /* 185 */
39119     { { 0x32bf8883,0xc8455084,0x6fd06667,0x4755286a,0x77c2335d,0xd70b0f8f,
39120         0x2f4a2c94,0x678e60da,0xd118acf5,0xa468d8ac,0xbf5b90d9,0xce93830b,
39121         0xed4e9104,0xea4b1c74,0x27776ea4,0xac67316d,0x361bab12,0xb98ad75c,
39122         0x99122451,0xc323d482,0x530a43ae,0x26440220,0x3292d5a5,0x3a44532e,
39123         0x5fecf1bc,0xdb48694b,0xc667b8b8,0xe4e0516e,0xa4306ade,0xb3aa595f,
39124         0xf34e9725,0x7e4f7091 },
39125       { 0xb7f70919,0x3f3816e9,0x16b003f5,0x765216ed,0x778c99e5,0x46c6cff4,
39126         0x30a51810,0xe6a5abe8,0x45e728db,0xef6f49e6,0xcaccefd6,0x6fdd73ea,
39127         0x8c37f3f7,0xec394e6f,0xb6407fc3,0x73320802,0x96625cbd,0x988e8f7a,
39128         0x7cabfb00,0x83292363,0x407f359a,0x258ba9df,0xccbfae50,0xff01aee5,
39129         0xfe251813,0xfbeaeace,0x83f1cba1,0x9c69f161,0x9eadcdb5,0x512c58ad,
39130         0x6ccce8bd,0x2ae49cd4 } },
39131     /* 186 */
39132     { { 0xc40849f2,0x1239b0e3,0xa441098c,0x5136a4cd,0xe547f649,0x61535a99,
39133         0x7a9bbac6,0x92e4bdc4,0x53547af6,0x195a1646,0x8b47a74a,0x85ecb319,
39134         0x9de6a2b2,0x278553fc,0x0e2ba52d,0x471c038a,0x35bcba93,0x12ba1b88,
39135         0x6f31eca2,0xd4bf50da,0x802b32c6,0xd146e3f6,0x3c64c8c4,0x0c9c0131,
39136         0xeed21297,0xad30f12d,0x9c68530f,0x9b75bffb,0x8918de51,0x23c0ad3e,
39137         0xa73771b7,0x180e9d52 },
39138       { 0x29ab77b0,0xc316542f,0xf7aee628,0xdd411d9c,0x353c2f40,0x044c0685,
39139         0x4b0ae4cf,0x638dc7e4,0x95fc266f,0xa0924185,0xfd2feb7d,0x639da671,
39140         0x5ea39798,0x56858ed5,0x58f3832a,0x7a694f31,0xd316d831,0xa94233c6,
39141         0x30a35a7b,0x2fcacb26,0xf1ff713b,0xfef8f7dd,0x59eee2f3,0x8b9b4525,
39142         0x156d064a,0xd1b4f91b,0x2f5cfcfc,0x177866c2,0x3777eb41,0x12bc2566,
39143         0xd8ab85b4,0x21ca6f3c } },
39144     /* 187 */
39145     { { 0xa3e66635,0x0e162b13,0x2a9f76af,0x1ef20a2b,0x46db3356,0xab473a30,
39146         0x7802bb8d,0x0840bd77,0xa699b44c,0x5b6baf5e,0x1b2207f1,0xc6e11900,
39147         0x790b0105,0xe5de16a9,0xdb67f004,0x22b12f15,0x8a025d25,0x185fad45,
39148         0xdf0a1142,0xbccf6953,0xf45034c0,0x4c42129b,0x1c277bff,0x0f740400,
39149         0x280a9e18,0x6e440b4c,0x842aa2b4,0x767de8f5,0x05e8d94f,0x3de20ab8,
39150         0x20227635,0x5aff5859 },
39151       { 0xa8458e40,0x805acd20,0x149732bd,0x5a5557d8,0x5f1ca72d,0xc7074131,
39152         0x952b5323,0x7f2e269c,0x6494fadf,0x5c592556,0x1a7d2666,0x153b7acd,
39153         0x86fe2865,0xa6df063d,0x57d53b6b,0x1e91db13,0xe93ead01,0x9195bb89,
39154         0x2963bfe6,0x3d71e1af,0x88278886,0xfab2b9c2,0x3b859b6f,0x77836692,
39155         0xf7029dd1,0x6e695174,0x7b984561,0xc7987876,0x5907d849,0x64fb4f1d,
39156         0x88d8a977,0x3eab7e1c } },
39157     /* 188 */
39158     { { 0x52e5718b,0xc73a94b6,0xf4cee1e9,0xe3aefa54,0x553eedea,0x654e9e63,
39159         0x5f3aca1a,0xf2541e1b,0x0d083316,0xd7129489,0xfb7f950e,0x7965af63,
39160         0xc74e3e4a,0xd8fc9e0d,0xeaf79ebc,0xb4ee48d2,0x8b7787e6,0xa458a86a,
39161         0xf7cceaf0,0xd8c7621f,0xdf67980d,0x8228eeff,0xf9106727,0x210d4742,
39162         0xb07e3629,0x91f63501,0x7971e29d,0x441761c6,0x03a3b8a5,0xc0ccc65f,
39163         0x38e09544,0x3491da4f },
39164       { 0xcb062eae,0x6706d046,0x5d08776d,0xee7db735,0x292315d2,0x80de8052,
39165         0xc402bbdb,0x40785662,0x26ed3337,0x5f93525c,0x7d568ed3,0x6cea14d6,
39166         0x66888b1e,0x916a1189,0x5dc71675,0x0fbd5205,0xe4575df2,0x833d1077,
39167         0xec092335,0x4e93100a,0x6cd85389,0x2f9e1d01,0x43226368,0xeebd3725,
39168         0x1ba4cfd7,0x401d172b,0x574c5838,0x377dab9d,0x80d517de,0xaeaa6958,
39169         0x6ad15a18,0x0c843dfd } },
39170     /* 189 */
39171     { { 0xc9373300,0x455811ff,0x99fdc300,0x1c39332a,0x353cb655,0xe19bb81c,
39172         0x96a83d27,0x774b924a,0xb2ee3f1a,0xcbfc8fcb,0x010d56c7,0xaf278ec4,
39173         0xe0abaf79,0x6fde682f,0x7339aebf,0x7566d072,0x71205db6,0xbd35ad5d,
39174         0x7051c9d0,0xb5bbe694,0xd3a3067c,0x577db480,0x572d7530,0x2c70ff54,
39175         0xe06d853d,0xe8615aec,0x05abfb5d,0x71999ccb,0xea0a8ed7,0xeeefc96b,
39176         0x35f6df69,0x2dcc469d },
39177       { 0xc65f0e77,0xcca6cd06,0xbd71b14a,0xddcc7980,0x3c93cc00,0xb6221f8b,
39178         0xae8cbf57,0xddfcd5b3,0x76f8e63f,0xbc92973f,0x06e132b7,0xe9848a34,
39179         0xd51ec9e2,0x4cc59a03,0x3a33081a,0x9c9d32bb,0x80e8466b,0x00121052,
39180         0x1bbe7295,0xc2b0032a,0x24938448,0xdbfc6572,0xb6bba0ff,0xe972a0ce,
39181         0xc0a94802,0xf60c0a4f,0x599d8bc7,0xf62c41cc,0x312da0b8,0x820c96ee,
39182         0xcdbdf9fc,0x5a1a65db } },
39183     /* 190 */
39184     { { 0x42485684,0xbfba691a,0x29c470c9,0x613116b9,0xe62a0519,0xb4b01971,
39185         0x5ff499da,0xf3245aa6,0xa5238eff,0xc2ef87f4,0xcc9d5515,0xc16dc6ba,
39186         0x2dbdacac,0x5a7f227e,0xa9bbaecb,0x8dedaac4,0x2e7c9885,0xff308a6d,
39187         0xe6895593,0x4c6f2fc2,0x177e0611,0x3655f285,0x300b1bee,0xa63e8d06,
39188         0x13c17b54,0xbed0ce79,0xc4974262,0xca4abe35,0xbc4e4037,0xf4b44a17,
39189         0xefe5fbd9,0x5ae95099 },
39190       { 0x804f7455,0x122e5ee7,0x22066682,0x341a4997,0x7795e333,0x97d24c31,
39191         0xe48efced,0x12f4123c,0x19fbc21c,0xe8738d92,0x0663a3ae,0xbb3bdc61,
39192         0x8593a6db,0x3603d8c2,0xe3c1ac75,0x926227f2,0x5eaae519,0xfea92ac0,
39193         0xfd6812ac,0x5b596f0b,0xfc2a82dc,0x3ce7e844,0x63522b27,0x3840481a,
39194         0x52867895,0x836088b1,0x26588688,0x21ffb7cc,0x2f4a7cac,0x0ca33161,
39195         0xa3edd298,0x4110667e } },
39196     /* 191 */
39197     { { 0xc2d04b63,0x81830357,0xf4929a18,0x3fc5a34d,0x22d195df,0xc73bf6da,
39198         0xcb432473,0x14df2f89,0xe997f138,0x345afe5c,0x8b9604f4,0xd8e3f5f9,
39199         0x50c10ae5,0xad7942e9,0xeed25ff3,0xcefd5447,0x0e73c0cc,0xbf68e51e,
39200         0xab54fa4c,0x5b1ad591,0x12b61c8c,0x8bbc1105,0xb5abf760,0xbb932913,
39201         0x01e79649,0xdb1231be,0x040ccbe7,0xd0a83e91,0x90a96db9,0x3dde426f,
39202         0x34df11ea,0x1cceb645 },
39203       { 0x0c6d0f55,0x2d210c4f,0x9c673c9d,0x6cadf61b,0xa9ce3fbb,0xdd7f9919,
39204         0x93b063e4,0x135f494c,0x145a93be,0x580bdb3c,0x0f52ef7c,0x4d872332,
39205         0x8814bb6a,0x74d876e8,0xc7a97dee,0x4f6f723a,0x3e3cd833,0x7de2b8f0,
39206         0xae720270,0x6162f082,0xddfa486e,0xe88ec2d4,0x8d3a17c6,0xd965c859,
39207         0x3980171a,0x62e59e54,0xbbef6b22,0x0ab6285d,0x4d48b203,0x3cf45195,
39208         0x4ea25ea3,0x1f175233 } },
39209     /* 192 */
39210     { { 0x3467ea91,0x808a765b,0xfd2d9c45,0x3f4632ee,0x9cf2bc6f,0x7b75dc6d,
39211         0x359813ae,0xefc8d240,0xe44cbd8d,0x23ecb209,0x21525622,0x59ba10e3,
39212         0x3f1ee19a,0xfa14d934,0xfb0c48f7,0xdf97c21b,0xea30d437,0xc4e62890,
39213         0x651475c2,0xb286e2a4,0x126672a5,0x291f01e4,0x31aab3b8,0x9c6fda5c,
39214         0xe17d22ec,0xb7277a5a,0x914f0bad,0xbd88ed83,0x6a2392e1,0xd0b05d1b,
39215         0x65893c2b,0x4cb8af90 },
39216       { 0xbb4b1953,0xa2b02057,0xf597f6ee,0x4ce08b44,0x5e6412c8,0x854f5d9b,
39217         0xb3cd4919,0x1913262d,0x6e42bb5d,0x902762e4,0xd78e7f60,0x8355c8e6,
39218         0x38b6c16c,0x8efaa824,0xe550f618,0xd0173790,0xe57d778e,0x118af462,
39219         0x715b4714,0xa16ad5e8,0x41dea4f9,0x900596c3,0x280ca610,0x2a957c32,
39220         0x374c65a1,0x2faee800,0x50080414,0xdb105127,0xff080fa1,0x8c1db931,
39221         0xd79878fc,0x486a5c25 } },
39222     /* 193 */
39223     { { 0x941b4f36,0x0521e213,0xf803b4f9,0xbaacfb14,0x52a54ba8,0xfdf1e22e,
39224         0x8fe4796c,0xacfabbba,0x58dbacb6,0xae0788db,0xc19dfa51,0xdf98d736,
39225         0x35a716ee,0x155c286a,0x9c86461b,0xbe7d4676,0x63a64a5e,0x50b6380f,
39226         0x9f609262,0x14b41914,0xa2dfc5b3,0x0919a7d0,0xcef466ac,0xc454da55,
39227         0x6986aaec,0x93fa4a24,0x71a49ced,0x5090b171,0xc1fa75ad,0x602f1d6c,
39228         0x78e4c054,0x5d269f89 },
39229       { 0x14920419,0x3a74030c,0x90968739,0x0845d868,0xeeb70fa6,0x81b994c4,
39230         0xd9fc5bcb,0xabcaa06d,0xf58f8f2d,0x06539427,0xb1dc52aa,0x35c85f67,
39231         0x2c911baa,0x5a7d8d72,0xaec2d834,0x4041005c,0x7a8e5347,0xb5868a44,
39232         0x8de512c3,0x04ee180b,0x211168eb,0x4daa66e5,0x2317cd8a,0xc0bd5dab,
39233         0x61164df6,0xa1d4185d,0x1dbad7c9,0xacedca26,0x09b02683,0x0fe4b5ac,
39234         0x26d9550f,0x8ac9995a } },
39235     /* 194 */
39236     { { 0x2640a39d,0xb2c8dc9b,0xede0c9f9,0x21ff0b38,0xa1ecba0a,0x74f469bd,
39237         0x080d0417,0x8a902ccd,0xf4994604,0xe956fa32,0x9776ab15,0x348f85cf,
39238         0x0066f492,0xc21fc6ee,0xfeeef367,0x35b1ebfe,0x4613e5ed,0x7804581c,
39239         0xea6ba071,0xcbdfe8e6,0x950d73ed,0xddfcaa32,0x1da48889,0xc9747936,
39240         0xdbaffbd1,0xce867c8c,0x1cbaeae7,0xd267431f,0x897912c8,0x68255045,
39241         0xd7ea1e4d,0x0c7c1ddc },
39242       { 0x1ce963a7,0x53aa30cc,0xc4c5fade,0x7352f64c,0x2828afbf,0x2b9aa2f8,
39243         0xca212107,0x64273c56,0x85a576dc,0xaadd7654,0x90b5c77c,0x6196ac3e,
39244         0xd1aaf39b,0x20d43e9f,0xcd05cbc4,0xfc392062,0x4c0ff2fd,0x14163872,
39245         0x2ae821e6,0xcf32b8d8,0x3fa7a3f0,0x5f58f943,0xf644ca92,0xaebf1d2d,
39246         0x1918a75f,0x0c061563,0x6b876118,0x7989b5ed,0xad412441,0xbf342445,
39247         0x1df633ab,0x24ffc9ae } },
39248     /* 195 */
39249     { { 0x93c7cb2b,0x89fcdc05,0x590053fb,0xc1243b95,0x6182343c,0x601debcf,
39250         0x66c18a63,0x364546ef,0xec913287,0xa5290701,0xf9788c31,0xc35b8026,
39251         0x92d1f7d7,0x852b862a,0x0aa79728,0x1809cb05,0xa3cb2005,0x897d467c,
39252         0x9ef5b946,0xf20c77c0,0xf2241984,0xc3372c42,0xf35bb206,0xda053e0d,
39253         0xa9c140b5,0xbc26c6d0,0xcb56fb33,0x61cfcc0c,0x299b3968,0x1c3cf9ef,
39254         0x40621ba4,0x89e4d3d1 },
39255       { 0xa45a9be3,0xd35e80e7,0x07356fbd,0xc4daa578,0xb967bc2f,0x0186d62e,
39256         0x47cd16e3,0xa702679e,0x5f30ce9b,0xca2f1c02,0x1f864f50,0xf1205b46,
39257         0x85061d66,0x7fd6d797,0x8a08809e,0x47edc4f6,0x9a4d3ae2,0x5dac0449,
39258         0x6d1f9da8,0xf844664a,0xd7a83a71,0x9f30ce84,0xeaac33f1,0xe9382bac,
39259         0x948622ab,0x1f033831,0xf7681eb2,0xb037a4ba,0x99a1b5c7,0xd156a908,
39260         0xe6f1d0fb,0x675d3e6f } },
39261     /* 196 */
39262     { { 0x707193e5,0xd9767ffd,0x810358e5,0xe478aa91,0x328d8ef7,0x5634f9ff,
39263         0x6dbbd9a7,0x913a0ee8,0x7e215686,0x379b2968,0x89d9da38,0x903f410a,
39264         0x1b1334d2,0xd9f8d7b9,0xbd82efb5,0x9fe74229,0x3803c778,0xdb568b62,
39265         0xd3d25344,0x93e9a350,0x724497e8,0x559c35b0,0xa169e23b,0xc472d436,
39266         0xcc5b4c69,0x09864632,0x83c7f531,0x9f6d759d,0x1e497888,0xa91cf1db,
39267         0x60af1a4b,0x5f7f92fe },
39268       { 0x0545167e,0xf18a1cc6,0xaffa88e0,0x55ee2e02,0x432a7bcf,0x24cdff51,
39269         0xa7510866,0x7382da42,0x40511af7,0xe894c11f,0x2aaf1423,0xaa4e4e31,
39270         0xf63dd2ae,0x8c3d36f0,0xd7660635,0xfc5c9550,0x37ea7eab,0x01253731,
39271         0x39b950f6,0x2a5cd598,0x40e63442,0x95a0f601,0xf2ac7045,0x905e238e,
39272         0x446b0f73,0x44bacc0e,0xc448578a,0x4cd4206e,0xa5bd7803,0x367b1aaa,
39273         0x0a2b458d,0x25beced9 } },
39274     /* 197 */
39275     { { 0x0c33a8fb,0x079a7382,0x0f25dc1d,0xcfbf6cd1,0xc6d482b6,0x4ffc73f8,
39276         0x07bf844a,0x3e51f18c,0x599162f0,0xa7651236,0x14013811,0xac59a74e,
39277         0xe55018a0,0x957a6865,0xe3ca09b1,0xe1ec51bd,0xa960253f,0xbc0c7eb3,
39278         0x7de03f84,0xe83bfd14,0x52fbdb09,0xc0540ed1,0xcea15ec1,0x6ba52edd,
39279         0x4b261307,0xf3d30ed5,0xe8397206,0x9bd7bae8,0x096373aa,0xf20d8692,
39280         0xc3b0bf63,0x0a616a4b },
39281       { 0x6e1339c9,0x2075f3ed,0xbf8b00a6,0x7afaa072,0xbccd9b47,0xdfafec82,
39282         0x00ca54c7,0x4713158f,0x38bc31ae,0x449102f1,0x310dfc8a,0xaf98f158,
39283         0x59e954d4,0xc9ef2075,0xc527a0c4,0xe8021af9,0x7a192023,0x6e801277,
39284         0x7fb02377,0x635f538c,0xe8c9e951,0x5df1974f,0x15cc9097,0x0287faed,
39285         0xf7a5115c,0xfa0728f0,0x0fac623d,0x90dbfbe6,0x0311ba09,0xa8d40fd4,
39286         0x07c6464c,0x876d154e } },
39287     /* 198 */
39288     { { 0xc2d3ea8a,0xd3a4d6d2,0xa842600e,0x36be681b,0xe4070672,0xc53f100d,
39289         0x6a7d7a7b,0xe3e5b6fe,0x5d5e1a83,0x6e6994f9,0x76097c2a,0x07cacd22,
39290         0xa6791011,0x12d98dba,0x102e0e24,0xddfc4461,0xd493272a,0x4815dbc2,
39291         0xa9436696,0x7e38e64b,0x32b2bf90,0x4960eb1a,0xd928e28b,0xda457525,
39292         0x2a077c9e,0x72f75b39,0x7fd61d00,0x27760cbb,0x0f4b1456,0xaf235d1b,
39293         0xe76d1700,0x3040c23b },
39294       { 0x4efa9a70,0xb10dc55b,0x53e86610,0xd4de414f,0x09f8a27f,0x3d95c113,
39295         0x06661d3c,0x505109a5,0x60eb513e,0xcaa2994a,0x1e7d338b,0x3ee41537,
39296         0x4651e71f,0x4fd145fc,0xcbc313b4,0x51bbf838,0x1eb92150,0xb039e078,
39297         0x14bf5ac7,0xe8696b44,0x8be0d48c,0x2d667188,0xdd8f2b6f,0xbe93b2f5,
39298         0xeb8a7f8a,0xc1dfd1e7,0x90f751c5,0x862b3dd9,0xa32a74be,0x1eb1ad58,
39299         0x1ebbc9a2,0x5486d79a } },
39300     /* 199 */
39301     { { 0xa1359e13,0xcb2e34ff,0x28196051,0x202d8dbf,0x23564b5e,0xe95e023d,
39302         0x42f6ac12,0xfb1340b6,0xb653725d,0x543ba852,0x8d2466ad,0x81aedcd6,
39303         0x547c728b,0xbf780224,0x9569fb65,0x559f8a11,0xdfb22ec9,0x505b7a62,
39304         0x9eed5e52,0x07107540,0x299f6f11,0x9c899288,0x3db6f8c7,0xa7d69261,
39305         0xb3ca79a9,0x30eb7fb3,0xfb2160b0,0xcab99bb8,0xd28b409a,0xd2012568,
39306         0x5ac45f8b,0x380f1b0f },
39307       { 0xe6a0068f,0xc0b99e6b,0xc8a73753,0x4b67cf2a,0xb2faeb7c,0xa6c9a548,
39308         0x340260c3,0x7f417f99,0xcc0f739e,0x8ee56855,0x780949da,0xf08b510f,
39309         0x8d5c6eff,0xb1770fc2,0xfd96a7bb,0xb4f5abee,0xf2665a2a,0xa07b1136,
39310         0xb601dcf9,0x2fb380a4,0x162becc6,0xcc803614,0xee6b83b3,0x3498fb96,
39311         0xa8c17eeb,0xea9b0fd6,0xa177efc2,0x5834b5ba,0x5b110b3e,0x929044f5,
39312         0xebd7285e,0x4abedded } },
39313     /* 200 */
39314     { { 0x700ef376,0x3355e1b9,0x66cdabff,0xd56e5d9a,0x47e87646,0xb3dc2575,
39315         0x00f79369,0x28f44b8a,0xa0c52e29,0x08c32b1e,0x3729b392,0x5a78de12,
39316         0xb26d239d,0x4184519a,0xe0ce4a6b,0x23f6b4b7,0xacb2a9f9,0x235f6f8a,
39317         0xe2064a59,0xbb8bc454,0x1bf3062e,0x37efd034,0x94dff6f9,0x6bac683b,
39318         0x8aa7fa06,0xc3364b1e,0xce0b3745,0x0616772a,0xd1e3fb0f,0x46f08d08,
39319         0x18e132d3,0x6a20abb3 },
39320       { 0x6a85cbc7,0xea831016,0x934f9aa7,0xd0990946,0xe778f1b3,0xc2211088,
39321         0x2247b799,0x7ea4ff8f,0x454484ce,0xb3171d71,0x4f98c364,0x29403949,
39322         0x97df1458,0x5da911f3,0x09439116,0xa6b58093,0x174238bc,0x75f9509a,
39323         0x8209758d,0xfeb51821,0xa47925d0,0xae0c6021,0xaf8a315e,0x0e946694,
39324         0x6bad04b7,0xae7af8a3,0xf072447d,0x44c15e7f,0xa5456ffe,0x5184668a,
39325         0xbf36b977,0x45e353a7 } },
39326     /* 201 */
39327     { { 0x93092f71,0x76056764,0xf5b92d71,0xeb66b6c2,0xe2c8b6c5,0x9db3149b,
39328         0x20c0363e,0xf62f583a,0x03cd7097,0x688acd33,0xebb916ac,0x85d0c0f8,
39329         0x84c19b0e,0x1bf7462c,0x7c4a6ad1,0xc76ed5f9,0xd119f369,0xec8b88ba,
39330         0xebe50b83,0x59b8371b,0x866706a6,0x0cc69508,0xf8373d2c,0x531c75a3,
39331         0x2a5a02fb,0x4e1cd3a3,0xda39a1d0,0xe8274778,0x75da333e,0xedfc5bbb,
39332         0xca79bd36,0x15941f24 },
39333       { 0xa77dd512,0x42e8c0f8,0x1dc365f6,0xa91b59a7,0x08753862,0xe80d14cd,
39334         0xd272faca,0x1624230d,0x4027cb5a,0xeea3ec16,0xc1ef9f03,0xc1700b59,
39335         0x0da3148d,0xd411c127,0xc4181af1,0x801ee448,0x9e3a900b,0xedf28559,
39336         0x0d09affd,0x5d67b0bd,0x8b370024,0xd839df96,0xe6f836b8,0x3b6307e0,
39337         0xbd3201c9,0x5382e588,0x7a1d02bb,0x636d8a6b,0x968641e9,0x70b7db76,
39338         0x118fad03,0x6d17c34a } },
39339     /* 202 */
39340     { { 0xc181c99b,0xcf608841,0xc87bdcaf,0xb65dc901,0x3720dabe,0xb460b447,
39341         0x5377515b,0x4c79c396,0x0a96c277,0xd447f22e,0x2ac0f440,0x0d952130,
39342         0xc90583ad,0x8330b26b,0x928904a0,0xe25e977a,0x85c50b18,0x1deaffd9,
39343         0xa5ad5f6a,0xcf4dbcb7,0xc8a37ed5,0xcbcd0019,0x1e9850b6,0x7846dd90,
39344         0xb0b8e605,0x1ac8194a,0x34132f90,0xb9728571,0xf56ee28b,0x4ce9f149,
39345         0x3e9e1d4e,0x1ab9b5a4 },
39346       { 0x314fa7a3,0x206dab92,0x478ff963,0xcc4af0f0,0x904d9fdb,0x4cce1713,
39347         0x12c045fe,0xac20a2eb,0xfd8f6d7d,0x44fc5478,0xca7b6ffa,0x886e72c5,
39348         0x6fd6f758,0x7fa4529b,0x92a820d5,0x4df1d1b1,0x2789f149,0x3d812f9f,
39349         0xaabb53d2,0x9842f083,0x2a03ab32,0x2648539b,0xb1512502,0x631ce090,
39350         0x731f6bd5,0xe1294d15,0x9436e634,0xb229361d,0x3ca966af,0x8c4281c4,
39351         0xc21ab3ed,0x24b34956 } },
39352     /* 203 */
39353     { { 0x659824e2,0x49bdcb86,0x4e13e74c,0x6dc4ce48,0x6bbe1eea,0xa4c01a26,
39354         0x1e3ec457,0x47b2b8e7,0x2f5a8e4b,0x7e8b15e0,0xe333530d,0xe81eb6e6,
39355         0x17a45202,0xacba369e,0xd70e4c9f,0x81241431,0x3e12beb8,0xc190af4b,
39356         0x11f486fd,0x53270523,0x29fb2bce,0x9f6c41e1,0xb70f6c08,0xbe6287eb,
39357         0x3feb4477,0x1479850a,0x9bcf18bb,0xfcfdfb11,0xda80d040,0x925c292f,
39358         0x7e3c5bf9,0x212d65e5 },
39359       { 0xca15cf08,0x23adb386,0x81e172eb,0x4dfa4ac4,0x4d42d0c0,0x9d1dbf93,
39360         0x74404dc7,0xd9cf6073,0xe932bfcd,0x60508441,0x1c682a98,0x9ae910ca,
39361         0x41ac1cc0,0x9528fc18,0xdbbed630,0xe6a120ae,0x30ccf250,0x94e0e1ec,
39362         0xe58bbf2f,0xfe84ba54,0x9faa4415,0xc66d0b4f,0xecee7ce5,0x0c58f1e7,
39363         0x6fa6873a,0x7a1d43eb,0x399f1348,0x96c6c5a0,0xe6727ab7,0xe6ef9aaa,
39364         0x9a5c2447,0x66afa554 } },
39365     /* 204 */
39366     { { 0xc980e91d,0xda5aaba8,0x6ac98efa,0xa93cf509,0x8da32662,0xb0990e0a,
39367         0x0081453e,0x01d21530,0x3d71de84,0x2bb0d33e,0x3e19a012,0x465f6d80,
39368         0x78a838e7,0x5902ff4c,0x1931348c,0x74e2afb7,0x9cfb057b,0xa4932757,
39369         0x3ad03f8f,0x761ea642,0x58ffa40a,0xb7d4c245,0x77a87e30,0xb5e9c0d9,
39370         0xc9c84d26,0xd1c5edba,0x3d1963a0,0xeca8839a,0xebf6bf0d,0xbc6f2f35,
39371         0x0d58abdf,0x01ef0631 },
39372       { 0x3ecdcbb0,0x2bf90316,0x27c1c955,0x19e2d728,0x9575c930,0x9e527030,
39373         0x96983930,0x0dc1c5a9,0x7cd082df,0xef9f80ff,0xdf97e051,0xcd915075,
39374         0x9cc61b55,0xf286fffe,0x80f24cc4,0x352db38f,0x36523ae3,0xed9b99ec,
39375         0x10b104a9,0x109a8ca8,0x305203ad,0xc2700fe7,0x769400f5,0x2a2ee24e,
39376         0xee0c452c,0xd595d399,0xf7f02a41,0x0ab75d6a,0x0db730b7,0x34108099,
39377         0x5e8d1202,0x0e4f5ffd } },
39378     /* 205 */
39379     { { 0x0ff14c38,0xbd1c6444,0xaece11f2,0x9a5b59fa,0x22af6330,0xaa4605a7,
39380         0x82af24ee,0xddc9f65a,0xeb9a1159,0xf4ee4bfe,0x74e84eaf,0x2463d076,
39381         0x0e0baace,0x88cbe1e0,0xd5fabdcb,0x7ca568ea,0xc57eb99d,0xbd80d524,
39382         0xe9be9873,0x9c46572c,0x7300b85e,0x918a1dcd,0x40f54176,0x49221312,
39383         0xb5b14236,0xf7e324ff,0x2434f16a,0x40dda501,0xa133d97c,0x08833421,
39384         0x0876f020,0x33d41161 },
39385       { 0x9878e5ec,0x7531a36b,0x46918232,0x5de3e321,0xd0a30464,0xd15f9a33,
39386         0xaa173659,0x734c1b87,0xf925d4fe,0xac2094a2,0xc262b0f4,0x43c965a1,
39387         0x447d5cbc,0x759c903e,0x05239300,0x92af215e,0x1f593f34,0xfffb6d5f,
39388         0xc3cddb5f,0x65943b4b,0xbfdd5408,0x9d03a29c,0x198d76c0,0x8f7cda6b,
39389         0xc0f27b59,0xc0790a22,0x8cb58ccf,0xba557a84,0x76c54fdc,0x5922052d,
39390         0x47b6b466,0x2d3de7aa } },
39391     /* 206 */
39392     { { 0x65add3b7,0xaade7462,0xabf24c2a,0xe5888f35,0xe1a57d93,0xd41549ca,
39393         0x2c76f7bf,0x0e22e18e,0xbe3202b3,0x67f288ea,0x1d1d0f0a,0xb79a66ba,
39394         0x2881ad18,0x0e0ab749,0xc7adb0e9,0x7d424086,0x2842132f,0x870c32c5,
39395         0x58f9a09e,0x858477f1,0xec025589,0x422a9372,0xa5098777,0xbe428c5c,
39396         0x57660058,0x45b79564,0x957f37cf,0x6c7fc631,0xd6316289,0x8b7023dd,
39397         0x5b1c12a6,0x47003bb6 },
39398       { 0xc91c1c96,0xd99401c1,0x27a12970,0xaa5dcdf9,0xc3c29107,0x3ab92e17,
39399         0xa3fe4710,0x26fce8f7,0x4ee998ee,0xb0d09d5e,0x8e3a41f8,0xafa62204,
39400         0xa26ca506,0xb1c012a5,0x99b57252,0x2c6f734c,0x512f7fe1,0x1093d79f,
39401         0xacee19a6,0x2f30906e,0x056d1ea6,0x6bff8381,0xeff35f21,0x61c75856,
39402         0xc1ad2224,0x6e07e978,0x6b20fde8,0x2cca6ca1,0x633fe81b,0xab4d6d2d,
39403         0xb06a2ce6,0x73dff504 } },
39404     /* 207 */
39405     { { 0xd8e20fb8,0x8b615805,0x82b533f0,0x7c6873e4,0x56a854ca,0x5205f001,
39406         0xcb369211,0x87fec6ac,0xc7f092b7,0x1fa3c0ec,0xe845fe4c,0x5b36647e,
39407         0xf8b1f112,0xd4781e85,0x8b0f1a6f,0xc6526839,0xdcb8eb92,0xceeb8c6c,
39408         0x8e5f6d52,0x133f0ead,0xc8d934dc,0x31883e23,0x428ac45a,0x214ed5bd,
39409         0xdbbfca85,0xf77ca492,0x07e5ae13,0xdf4113fe,0x72ab05fb,0x63e4a0d2,
39410         0x7148f535,0x7544d0b7 },
39411       { 0x80797ace,0x4fe8d134,0xaf86d97e,0x216d6aa0,0xef5a68fc,0xdbf0a688,
39412         0x9f9b2684,0x18b26f45,0x8999d2fc,0x52fefcfa,0x62423955,0xd5af8d82,
39413         0xf63a3780,0x8f123469,0xdcd4feaf,0x2933454f,0xa73b5d09,0xba8018b7,
39414         0xe5552c18,0x9af1f276,0xff26bb1c,0xc5d4773d,0x06dd4f44,0x9ef49410,
39415         0x5f39ba49,0xad8f12f9,0xf66ca4f2,0x5767f6dc,0x7922f59a,0xba8773f1,
39416         0xc1e42d49,0x220081ea } },
39417     /* 208 */
39418     { { 0xba37a0ba,0x3043d573,0xdd176df6,0x05a431bc,0xc42070f7,0x03322cfc,
39419         0x67c2d109,0x5cabd30e,0xcbf8bcfa,0x362c95de,0x7787b10b,0xd767d277,
39420         0x6ec05e64,0x612c915e,0xce69c30e,0x9e669631,0x682e2635,0x27c9dd8f,
39421         0x95ffcc38,0x79021f12,0x8a2adca2,0x06a8ee79,0x4b5d500a,0x8e00e784,
39422         0x8d80d6c5,0x87746fc7,0x915f10cc,0x246053be,0x219f6fd8,0x844e328b,
39423         0x11bd3733,0x620541ac },
39424       { 0x509e5a29,0x0f7fd382,0xb432531e,0x8748d7d0,0xcd3883b9,0x8f749354,
39425         0x8bfbb17a,0xc6b8ac74,0x05f2d2c5,0xa4616a66,0x1bcb1b83,0xb3d96625,
39426         0x2fee265a,0xcf753104,0xdb225058,0xc70d73fb,0xf0c2d556,0x1211d434,
39427         0x54b259b3,0x862061d8,0xc42b3f7d,0xffe4606d,0xe86a4949,0x4c5c8585,
39428         0x160eedac,0x04ddcc8b,0x568e2420,0x1804ce67,0x42141656,0x91f3855a,
39429         0xf932be97,0x7f378198 } },
39430     /* 209 */
39431     { { 0xdfa6639a,0x9a374bda,0x02ab7391,0x0cbd48d4,0x47031e2d,0x5c5ef236,
39432         0xd0599d1f,0xb49ee2bc,0xe0d38443,0xd285eb60,0x269392e8,0xdbbea92f,
39433         0xb8bc538f,0x91455fbf,0xe469b768,0xae259ff1,0x41de5682,0xc1cecb1f,
39434         0x9952d1ae,0xc876f071,0xe7bf7446,0x1ce25181,0x282ad2f1,0xcb93ad86,
39435         0x6ba4ef67,0x8fa3cd31,0xe507aa3e,0xfce68a04,0xa61bb608,0xced74170,
39436         0xf6ac10d0,0x6de716b3 },
39437       { 0x172d6dc5,0xd4e58d04,0x6397c65c,0xbed2cde6,0x0c9eb4e8,0x7ae77e18,
39438         0x75fa2edb,0x56275468,0xa91e6738,0x4b30324e,0x235c8b2e,0x6023a856,
39439         0xa8f92887,0x9df6d6c2,0xf6f5e8b5,0xec2c185f,0x3ad5748a,0x7892e12b,
39440         0xd54aefbc,0x7aebb4f2,0xee868821,0x14915448,0xb1d9bd5b,0xa26c5f71,
39441         0x2ff00df7,0xe5ccd166,0xb95b1dee,0xebc99f17,0x3fe1f774,0x90983616,
39442         0xbb3d25b0,0x51f90830 } },
39443     /* 210 */
39444     { { 0xf2922461,0x49376fa1,0x1650d0d1,0xdbb1b1c3,0x0dd8608d,0x92b91c33,
39445         0x36b89906,0x3e612c4b,0xdf560052,0xe1977b0b,0x636a2545,0xf8afff70,
39446         0x11723d8e,0xcda7d278,0x81bde7ba,0x0b0bc4bb,0xed2a578e,0x3cb080b2,
39447         0x171b2e02,0x5bda0d0d,0x941bb9ae,0xf6df38cf,0xc14a65c5,0x85dd81db,
39448         0xc19dd98e,0x7f98c82d,0x52206f93,0xc613747f,0x5f5bbe78,0x9e13a2c2,
39449         0x0aa34be7,0x5eed218e },
39450       { 0x01d4dc0b,0xe1565754,0xf566bb07,0xa1ae5f27,0xb82225d5,0xe985ebeb,
39451         0x1189ec6b,0x5f3ad21c,0xecce4d9d,0x17da518c,0xd6b65b59,0xc84a2d3e,
39452         0x8ffa771c,0x7f988175,0x2ac69a7a,0x50d6ae12,0xc6e6846d,0xcb7f30b1,
39453         0x5bd0bb13,0x8c023a60,0xd73f2407,0x9a10fecd,0xe5f0a996,0x8c5158cc,
39454         0xbd8f5806,0xd26bf615,0x915a46e1,0xaf32ea87,0x0287d308,0xeaf74e81,
39455         0xa6264254,0x8c14ba06 } },
39456     /* 211 */
39457     { { 0xb17ee201,0x0c877895,0x88e57a77,0xc05aa471,0x97822456,0x19c3e763,
39458         0xc9c3ba1d,0x0be6f8c0,0xb4389ebe,0xfe85f4ff,0x0ce7fbb6,0x538bccce,
39459         0x65266c64,0x876eab2a,0xcf9a3842,0x5c9ac690,0xccc8f981,0x9f5cf3b1,
39460         0x9cf687de,0xfa17be6a,0x83835c15,0xfcfc10fc,0x150ef2eb,0x086b0fdb,
39461         0x884a52e6,0x9f97ecd9,0xb0cd1eb8,0x416e6fa2,0x3ecc03ba,0xe2bd1599,
39462         0xeabb165e,0x645c0a5d },
39463       { 0x50aa7e31,0xd94c4205,0x2f851da5,0xaec8df0c,0x3c726e6a,0x99646909,
39464         0x2619bf9a,0x72dbdc36,0xe253fbd5,0x1b4260e0,0x8c709e06,0x97c259fb,
39465         0xcddaec5b,0xfabf7cbb,0xe4b703e9,0xb4d5e8b1,0x0734efdd,0x1b06e56e,
39466         0x1f55f8a5,0x02d4a4f9,0x3f565c8d,0x7f8608ba,0x816d1d94,0x822f47d2,
39467         0x5ce7b136,0x0cc36156,0x31d04242,0xe46ee5ef,0x683567f6,0xb2a65f70,
39468         0xd2fa6c91,0x27e9ff40 } },
39469     /* 212 */
39470     { { 0xd7e952e7,0x75251893,0xc735bf18,0x15b30583,0x96fe0491,0x732b5992,
39471         0x806d2fca,0x27451858,0x1b885ed9,0x71ab76a0,0x6d9f55ec,0xbdce9d97,
39472         0x48f2ba9c,0x3da60b20,0x592b132b,0x6977c086,0x099051d7,0xb6dca9cb,
39473         0xd188ae25,0xd9c2ab23,0xe20aaf3d,0x9f469f3f,0x5aad74d0,0xdbd1f7cf,
39474         0x22a9eb3b,0x3d5efe5c,0x137010c4,0x8c5edfa2,0x57870260,0xada2217b,
39475         0x3dac9776,0x4feee567 },
39476       { 0xb5d3d780,0x30e18d52,0x07166744,0x4dadb5d3,0x5a742156,0x320d386e,
39477         0x8d6bbb86,0x5d8c290e,0x2d263dd1,0x981a4323,0x98984636,0x33d0e7ca,
39478         0xa519acb1,0x5138784d,0xdddc81ff,0x832e3fab,0x3199a43a,0xfc278594,
39479         0x32743163,0x5b4cabcf,0x74f94fa7,0x9fa010bd,0x5694a627,0xc28a743d,
39480         0xcb657a24,0xc1d2a888,0xe86a25ea,0x7eef2503,0x04c561ff,0xed11a5d3,
39481         0x9c9ede0e,0x4fe818e7 } },
39482     /* 213 */
39483     { { 0x7fc1c7ff,0x00252c9d,0x9fa89ad1,0xa9bd419d,0x4064e9cc,0xc93a124a,
39484         0x43942ecc,0x384cbcb8,0x8749695b,0x004c21fd,0x421165bf,0x69c81d9f,
39485         0xdde01102,0xe2325628,0x5a9b004d,0xec937457,0xf6dcfc21,0xfb3346bf,
39486         0x4d372c7d,0xac4da64b,0xf20494e2,0xcecb7ad3,0xe867c150,0x562c41b5,
39487         0xc2b723d8,0x299395ce,0x7ee53231,0xc91adfc5,0xf10b6597,0xe06f1161,
39488         0xb74d3ffc,0x81915529 },
39489       { 0x6ed9d4ee,0x8ec12431,0x689aff01,0x3dffa154,0x2a89a3f4,0x4aba349f,
39490         0xd467efb2,0x2db1e8e2,0x039102e2,0x18dea354,0xe52f082b,0x422ab853,
39491         0xed36dd47,0x7130a2c1,0x0295d1ee,0xca60e86d,0x7c7f5ad3,0xe6ac6808,
39492         0xde864658,0x0f83cecf,0x461d1265,0x72e66c21,0xbd385099,0xfeef4150,
39493         0xa6632289,0x0f183f3a,0x792dc795,0x275454be,0x11367702,0x2744c11b,
39494         0xe8ea6ef3,0x7d06bcc7 } },
39495     /* 214 */
39496     { { 0x7090212f,0x89285942,0x5521e844,0x691b7d4c,0xbe2dbb92,0x4c038422,
39497         0xbd81f880,0x317721ed,0xac89bc36,0xc136cbee,0x7b8f004d,0x4f71b60b,
39498         0x4e218ab8,0x269132d0,0xe6cc814d,0xb0e2496e,0x75fadc15,0x0b2ce317,
39499         0x66d223c5,0x82e3c084,0x4c612f8b,0x9721caa6,0xa4b65355,0x59a751eb,
39500         0xc7d3d9d1,0x3433aad5,0xe80d4246,0x1e61b9d2,0xfc673caa,0x149f655f,
39501         0xd0f9cb92,0x48b52b99 },
39502       { 0xefdc05be,0xa3915399,0x13e095e9,0xde70db18,0xcddb3fda,0x447862e9,
39503         0x1a009451,0xa2b03162,0x23920ea3,0x4b27980c,0xa23b8feb,0xac5394f1,
39504         0x3e5616d4,0x163f7256,0xb714219a,0xaa0ff93f,0x93d62474,0xd26f96d2,
39505         0x7dcfe276,0xdd212ea8,0x47038d15,0xab27bf2f,0xf418168e,0xe58c8325,
39506         0xb32a989a,0xe3704222,0xbfc9f13b,0xa3694390,0x0d0684ad,0xf16e2606,
39507         0x9d8c76ec,0x17c0de87 } },
39508     /* 215 */
39509     { { 0xdcc01958,0xbca5f453,0x1ce88393,0x7d945954,0x561f5b6d,0x5e6350a1,
39510         0x7e2d36bc,0x291c3c86,0xa5ac3a6c,0xf6c7ed84,0xd98006cd,0x7913c40b,
39511         0x5671ec3b,0xf78bb087,0xb43e89a9,0x1c928f6e,0xae1ea1ed,0xfdf28df3,
39512         0xb924b2b5,0x62bba5b1,0x1a116e05,0x491d2705,0x167ed3e3,0x08ec02b7,
39513         0x5bc0b046,0xe291cf7b,0x8c5d7f59,0x30e50169,0xf5c799b7,0x0c7c350d,
39514         0x0ac6e1d7,0x6862b9e2 },
39515       { 0x9ffa1f64,0x56c6f4e7,0xa1e24349,0xfed6a91a,0xcdb75232,0xe9a0ee0c,
39516         0x0322d607,0xbfc90b37,0x462fef87,0x29480ad2,0xc2bfcf34,0xfc214969,
39517         0xa539e38f,0x6e5211e0,0x12a5149c,0x2a59ec26,0xd706b532,0x195fe212,
39518         0xe99c8429,0xf77fb108,0x5dc80482,0x74ceaea3,0xbd92d298,0xa5a6030b,
39519         0xaaea15ee,0xad42dca5,0x4987109c,0xd6ac3bc7,0x290af649,0xc64e1c40,
39520         0x51f8de6c,0x5093fa2d } },
39521     /* 216 */
39522     { { 0x4c2d553b,0xc4cf3280,0x3b966c29,0xdc1abe22,0x2296914a,0x556a549c,
39523         0x999976c9,0xd8c9f8b5,0x776e83f3,0xc22c57bd,0x7c85ec57,0x4f2942ab,
39524         0x6e2c61f5,0xef3407e5,0xf213db48,0xf005e8ca,0xf32698c7,0x470c853d,
39525         0xcac0a54b,0xe6f488d7,0x60b7501e,0xb6bd6bed,0x714a4bd9,0xf0103106,
39526         0x6e098894,0x5285bc3b,0xf5f92a00,0xec06741a,0xef7ef24a,0x32f16426,
39527         0x6c77a438,0x12f9c44d },
39528       { 0x83313a1c,0x1951e964,0x33c58b37,0x98edd3da,0xc7ac4044,0x4edbbf52,
39529         0x0dcb5ee8,0x866ca6f7,0x6dd422f8,0xec0ae8f5,0x0661ec2e,0x1077bc54,
39530         0xd422523c,0x6d39913a,0x58e7cb3e,0xd105e1e8,0xc979bb45,0x47c9397f,
39531         0x0997b592,0x3221d4a9,0xe8952fe7,0x0ef628a3,0x4e946241,0xd08d5827,
39532         0x59780f40,0x64cbed0f,0x08e110ec,0x13d7c227,0x7679b1a3,0xd186d866,
39533         0x26ae1d18,0x02f75e4e } },
39534     /* 217 */
39535     { { 0x47f307d7,0x1b637ebf,0xd0141477,0x6b644a6a,0x2e05a80c,0x82a33d65,
39536         0xfed07b31,0xc8f1a0f3,0x3696e597,0xc09ee7f9,0xc7ffc01e,0xcdaa7ec3,
39537         0xf8f373b9,0x549f88fe,0xc3bb8989,0xc88d1961,0xdfcaa7b7,0xd92a4fe9,
39538         0x3ae4ab20,0x12ff9ee2,0xf5ecb1a5,0xf5aea641,0xe32fb47d,0xe769237f,
39539         0x25d085c0,0x96a5c420,0x26c755a2,0xdc912558,0x9bce9723,0x580b985f,
39540         0x63961941,0x72b1b566 },
39541       { 0x790e5558,0x9d708a08,0x0689af80,0x98536041,0x42313b5f,0xe85e7b8a,
39542         0x55a49d1a,0xe6ba1292,0xac371b0b,0x5e76c4b0,0x938e6e19,0x58504f39,
39543         0x60ae9a21,0x8dd41422,0x968485ce,0xd8b04e9b,0x887efe43,0xf94c4ba5,
39544         0xf11c5e73,0x11268e67,0xcf6b99c4,0x92623e28,0x7a0a9662,0xf2d0aaa8,
39545         0x4ca02ed3,0xb266772a,0x2d63b551,0x68ee8e4e,0x2e78b5b5,0xcdebb299,
39546         0xe17225ad,0x5df19216 } },
39547     /* 218 */
39548     { { 0x8df2e7e3,0x20027e1e,0xd8da07de,0xb183cc68,0x4b4ae694,0xce35ba69,
39549         0x3ca62e88,0x896d97df,0x52efed2c,0x3de4713b,0x26bd084f,0xd006c40e,
39550         0xfc81923b,0x1e9b71bb,0x1aacc6b0,0x9991c7b6,0x8f656840,0x650c9364,
39551         0x87f47524,0x138561d1,0xbffd3ca2,0x610f2b11,0xfa191418,0x96915faf,
39552         0x955e5309,0x8f1236de,0xa1872d79,0x613cbeea,0x66a2a48b,0x7f7b44ea,
39553         0xe0a89c32,0x452265c2 },
39554       { 0x25430010,0x4ad5ec79,0xebd090c0,0xcac786ff,0x20a9d3f5,0xa5f9f4ff,
39555         0xa3edc65f,0xfcbf4112,0x0cf3eb11,0x8824839c,0x8aa5b700,0xb8dd6d4e,
39556         0xb7568ab8,0xe2271dfd,0xb744560e,0xe43ec373,0x1cf75296,0x78eaf926,
39557         0x3fa96d9b,0x1809ae0e,0xdc25dfd5,0x0b312d2d,0x6bab7711,0x6b8f78b4,
39558         0xb5ecf1e4,0x069efc8d,0x609fecaa,0xc1952bae,0x5f4dbde1,0x43e302ed,
39559         0x1e078555,0x14b02bf9 } },
39560     /* 219 */
39561     { { 0xb87e5b57,0x2c71c768,0xf531a557,0x0bcc78f7,0xf7597dc8,0x4ff93f8b,
39562         0x139e175f,0xb28e026d,0xcb94ca6c,0x6b83b727,0x0079f7fc,0x2eafe3b2,
39563         0xcf3bd170,0x2aca54de,0x6af0dc6c,0x17c4133c,0xccf5e35e,0xbea1e665,
39564         0x345505c6,0xa6691a48,0xe6100b89,0x2633abd0,0xc17d0388,0x966c6706,
39565         0x1a0cf90c,0x7aefffbe,0xd0add64c,0x4d847be7,0xaea2aa46,0xd49bcdfb,
39566         0x2cc7d0a5,0x85e07e74 },
39567       { 0x0bc25bca,0x23aae0a6,0xe44f64ec,0x6e8e55f1,0xb607b773,0xe1e696d8,
39568         0xd3005909,0xaa90a746,0x2cbc4990,0x072b1ccd,0xc68e2f5d,0x0d0fe6c6,
39569         0x53e28ec9,0x920ec5f0,0xf0040cc1,0x79b21fb4,0xfcc4a2c7,0xa7375bd3,
39570         0xe1bac7dd,0xf5f5def9,0x35c0f8d3,0xdc315d79,0x2cacd318,0x7117c170,
39571         0xe926f71c,0x6f2823c4,0xed02f39a,0x38db58bb,0x7db69323,0xe5b49231,
39572         0x8d49f430,0x0964039f } },
39573     /* 220 */
39574     { { 0x56999eba,0x21774f16,0xb1de6305,0x3d8ee287,0xde0b2669,0xd81af726,
39575         0x3f8942a1,0x37446939,0xea03e13c,0xbcf6b615,0x94e273cf,0xd30c0c35,
39576         0xc6725c56,0x4fd33a56,0xa8be97a2,0xa57534ad,0x7c22a251,0x799242a6,
39577         0x9d0c5c49,0x4e51bdb5,0xc6a42768,0xd7cd76cc,0xd426bf59,0x914097ac,
39578         0x66e9beb2,0x59404a2c,0x5c96e3e9,0x4738fe98,0xaad666d0,0xbcbb3e0e,
39579         0x63bc5e56,0x626b0fd2 },
39580       { 0xe1a1ec42,0x47217dba,0xab5acc50,0xaa6ae7db,0x865331d1,0xb7e1ab1e,
39581         0x3d30126f,0xb8453070,0xdee61851,0x280649e0,0xea689544,0x8806f4a3,
39582         0xcb56f632,0x4bbe43ad,0xbcaff94f,0x036b9bda,0xbd0637be,0x0d941e65,
39583         0x686f3abb,0x82179d44,0xaad6afd6,0x1486912c,0xff7e1534,0x9a3b891e,
39584         0xeb86fd96,0x88c426ce,0x117928c3,0xb56e6a81,0x96399e00,0x933e7135,
39585         0xa17b6ac1,0x09bbddd9 } },
39586     /* 221 */
39587     { { 0xe4fd3673,0x75e39c1d,0xa65c8e07,0xf880d9d1,0x7289c7fe,0x4725c1dc,
39588         0x3529d200,0x5b6735ee,0x3c747af3,0xc1f8f2ed,0x912efdf5,0x5cf3998f,
39589         0x49859c39,0xed722618,0x0e69795d,0x23793a2f,0x86b1d2a7,0x8a6ab8d6,
39590         0x22a882e4,0x00c815de,0xf9db8d7e,0xbe77d6fc,0x02267547,0x0886fb32,
39591         0x49c10edc,0xb62687d4,0x7c83ed4c,0x9f1c3e17,0x5af366ea,0xe6d5d7f0,
39592         0xd1efad24,0x2eaa01b8 },
39593       { 0x1f357c74,0x5e47fb70,0xa9e3b794,0x93085c4a,0x6e85a905,0x4f098733,
39594         0xbe0244c9,0xf53808ff,0xa3b5660d,0x91dddf93,0xf3b95ed6,0x8b76377b,
39595         0xbb3920d4,0x91b911b7,0x86a13cf3,0x7ccf08bf,0xea018e58,0x53ed8f97,
39596         0x78c55194,0xb1ea4343,0xe0d2d5a6,0x8e6adde9,0x9b96259a,0xfc2b248f,
39597         0xeef17ddd,0x96ebceae,0x557f9c85,0xf694b443,0x07d5bba8,0x48cd150f,
39598         0xb4c1986b,0x02d31de9 } },
39599     /* 222 */
39600     { { 0xde79499d,0xa6bb9e1e,0xfd0fc2ad,0xf6ca8ff8,0x1a7d9356,0xbec0f8e8,
39601         0xe8f06327,0xbc3d1c9f,0x3b300beb,0x805c7217,0x413c181b,0x00420a08,
39602         0xf0ca9d01,0x9e9a167e,0x1aeeddd6,0x076c909d,0x8e3a8a72,0x64a1997f,
39603         0xa77b429e,0x3ce7f7a7,0x5c94d3e9,0xaac0fbf4,0xe6d48407,0xf37694a7,
39604         0xa91921e7,0xf56679e2,0xee1dbbd6,0xf23fe0f3,0xcbf9fa99,0xc7917566,
39605         0xe0f4d765,0x965860f2 },
39606       { 0x7fa5f79c,0xe734702b,0x5af2d26d,0x930bd426,0x6c73e0ce,0x45bd8b98,
39607         0x4ee44a2d,0x7dbe7bed,0x956c8a1a,0xc129e024,0x77cdf80e,0x6fdc05ac,
39608         0x589ca59b,0x70a6ba2b,0x999825af,0xfc484021,0x7a23f0b6,0x1d284b54,
39609         0x28a0a8af,0xb1da10a4,0x2b2af6d8,0xb1eb1b31,0x33935ee3,0xf051443a,
39610         0x8effa6ec,0x7a07eb26,0xd662654c,0x16ee4086,0x4549ee4c,0x7a7bc501,
39611         0x1fa98a52,0x65081032 } },
39612     /* 223 */
39613     { { 0xb67ed9b2,0x49f0e460,0xc36d93d2,0x0cda0fd0,0x88c75e1c,0xbb5963e9,
39614         0x614bc0c9,0x757bbe93,0x9a768605,0x9a9b8801,0x48edc544,0xa8b7e2af,
39615         0xb51a5985,0x9e77ed9e,0xebbf024c,0xdd025274,0x1545c636,0x598b6288,
39616         0x4800dba0,0x39bdaed0,0x81e2a23a,0x7fc20139,0x550cb4f2,0xdc66fd5c,
39617         0xb52068c7,0xad27032f,0x8169fa15,0xc9a0bcae,0x3a7ca8a2,0x60606f21,
39618         0x9862652f,0x98295046 },
39619       { 0x2e11c128,0x3e374600,0x0e6dca7e,0x80dfae5d,0xd9552264,0xe44016e2,
39620         0x880b7143,0xf65f88f2,0x526b881c,0xca3d28d4,0xdfb86afe,0xf9c59dd1,
39621         0x4c74f958,0x548860c2,0x9cb69f4f,0xd06ea43c,0x7334ecec,0x5343c9ae,
39622         0x35329713,0x5cc2ccd6,0x5f3a6c0c,0xa95ff403,0xb372653b,0x2e01a1cc,
39623         0xa250523d,0x31510fdf,0xa6227eb2,0xeee538e2,0xca23cd10,0xeadfc8a0,
39624         0x3e78f54b,0x4b7e6e1b } },
39625     /* 224 */
39626     { { 0xdb5f928b,0x79c9076f,0xb7347cec,0xe6250bb6,0xac00ec41,0x54b67798,
39627         0x9d9619c7,0x900d20ba,0x59e4343f,0xed42c0d0,0x451935d7,0x3df39e85,
39628         0x64f701ce,0x26391182,0xe1f87aac,0xce8f2554,0x65f91aaa,0xfddd6789,
39629         0xa324539f,0x96cd163f,0x4bace995,0x5c815f2c,0xa94f9ea5,0xd78c8c2a,
39630         0xef24e455,0x7ab2aff4,0x1cddc26a,0xf0ed6409,0x00ca2822,0x954a420b,
39631         0xd3297658,0x0611c4c5 },
39632       { 0xa9e81829,0xf192001c,0x08a282cc,0xded33320,0x8f9ded9b,0x0bfd7de1,
39633         0xb7889003,0x6793ac0d,0x3577a5dd,0xbb00d91d,0x802d3c2b,0xe17a23a7,
39634         0xfb549014,0xff95f88c,0xc71b6e07,0x7cd1bf4b,0x23588c8b,0x2e3b24a0,
39635         0xa4112076,0x9b5335b8,0xc4056d30,0x2481c05e,0xe916a1b5,0x55c7410c,
39636         0x850179f4,0xbbe03271,0xb3cd1208,0x15e6c177,0x90cbfe50,0x509a24c0,
39637         0x1c108566,0x82079529 } },
39638     /* 225 */
39639     { { 0x1c7d353e,0x5d2d3cff,0x7de0ce3b,0xd5e7eccd,0x6ca87635,0xb4b1075f,
39640         0x25f9ad3e,0xda8404e0,0x205cb5ae,0x6b963e89,0x09f221a1,0x9e5ee0d8,
39641         0xea41aca4,0xd64c85d9,0x34442a34,0x6a46c4e9,0x3cf655a4,0xac6ff97e,
39642         0xe5417d7c,0x76565c1e,0xeebf9c4c,0x681009a9,0x88da6388,0x95b61d39,
39643         0xf6b472c6,0x6402b46a,0x0b7f1171,0x1fde5165,0xbe0c05e3,0x94f8f273,
39644         0xa88344a7,0x7487b036 },
39645       { 0x9c3e2370,0xa860e575,0xf8048719,0x19d58193,0xa6e2f9aa,0x3a0dbf3c,
39646         0x6144719b,0xb6c7e959,0xdeffec21,0xa9049c74,0x3f50cebf,0x8ba064b2,
39647         0x49a1de15,0xb12822c0,0xb1d527f2,0xb654b7d9,0x0ffd0430,0xc470859d,
39648         0x4f05446b,0x37c74a67,0xa3add995,0xe553251b,0xe33533b5,0x4a3ed6cb,
39649         0x27e419ce,0x2f2f44d0,0xa5d1b979,0x2d84ee82,0xdb6fa69f,0xcc76b123,
39650         0x21fa3bdd,0x834f85c5 } },
39651     /* 226 */
39652     { { 0x2ce9b31a,0x329347c1,0xfe3fb3b7,0x1d88522a,0x52ff90fd,0x4bcefb4d,
39653         0x2b1a081d,0x53b17386,0x2a411f08,0x538c11ba,0x141b603a,0x7895b93c,
39654         0xb10bd741,0x2993b9aa,0x09912986,0xccbbd046,0xeea0aba5,0x669fafb0,
39655         0x35661897,0xd4844622,0x367ffa54,0x4a63b89c,0x1c3478da,0xcbad5d1d,
39656         0xaa6034f7,0xc5339227,0xe61b1391,0x0e6d705f,0xf74ff515,0xdd14b660,
39657         0x5332b54c,0x639d8b0a },
39658       { 0x162217cd,0xfa423162,0x811c28e6,0x2e0e4a2a,0x21766dc0,0x68d9ce18,
39659         0x046a06ef,0x51263739,0xdde92101,0x44eea231,0x114298d3,0x0607c8f2,
39660         0x63d957e9,0x27f272ba,0xa5e8cae1,0xe7ce80cc,0x24f7a63f,0x5816ebe2,
39661         0x89673e34,0x4dece5a7,0x536babd4,0x13756a22,0xe3bf77af,0x644d61ae,
39662         0x2bcf98bc,0x60b2bf6e,0x29fa962c,0x3b0b59f3,0xabb50023,0xb0769a1a,
39663         0x0c75402c,0x40903136 } },
39664     /* 227 */
39665     { { 0x1670433f,0x84d2873a,0x25493dfc,0xc9394df6,0x80fcf89e,0xeb05a19a,
39666         0xdb297616,0xe39e4310,0xd9e63046,0x50742dc9,0x1de9ca9e,0xf31ad8c8,
39667         0xfb7b1d0d,0x86aabf94,0x1b3c82d1,0x36cda27a,0x39702d84,0xfb1a2ef4,
39668         0x46081299,0x280bfddc,0xd2396238,0xe4b2b48d,0x7b3c9353,0x2db2c2f3,
39669         0x12fb8a69,0xd5b5b317,0x08180474,0xf9b87a3b,0x1e952578,0xd8590986,
39670         0xf37a2bc8,0x80668eed },
39671       { 0xb39a0249,0xe2edcd35,0xb2f8aeae,0xaf230cd4,0x7223df05,0x295b15e4,
39672         0xe0e937f4,0xbb66982a,0x8cbc9162,0x019d2b72,0xcf49dca1,0x5c512ae9,
39673         0x630f07b4,0x11b491a7,0xa03874e9,0x48d4f34c,0x44cb7433,0xc1fd0ea6,
39674         0xf95b30c3,0x13f79ae1,0xed8b60ac,0x40362d4d,0x61ead81c,0x9e8314ff,
39675         0x498c3d28,0xed600dd4,0xc2521702,0x5fcb1c19,0x3a9c1f33,0x592329fc,
39676         0x1bde6ce9,0x04677548 } },
39677     /* 228 */
39678     { { 0x39233c96,0xee3de56e,0x80737eaf,0x868c409c,0x201abc68,0xacae11bd,
39679         0x2b486205,0x0f2cea9b,0x6f19056c,0xe32387e1,0xa5dc2a41,0xea75365a,
39680         0x12b4be86,0x76c29acc,0x8d63294d,0xa01fcab7,0x0cab9f24,0x81dbe88b,
39681         0xf414c054,0x76646e5b,0xcb96b7aa,0xfe111893,0x7664e097,0xb649f5b1,
39682         0x53fcf5a9,0xa196422e,0x0b7ff634,0x5978c9bd,0x3c229895,0xb5feb38e,
39683         0x0833c456,0x038a49fb },
39684       { 0x13e93257,0x35e3818c,0xa612741b,0x14cebc9d,0x7caac06b,0x4f6e9249,
39685         0x3daa1116,0x82278e33,0x4de2034a,0xe7cc565e,0x0a1ba630,0xbb7dc95f,
39686         0x66956fbd,0x81dd9f23,0xbb132dd6,0xc63e6319,0xfc241337,0x6e22b022,
39687         0x7e8beb1c,0x23848193,0xd8c938ac,0x83b1994d,0xa6bb5644,0xb54cfaca,
39688         0x06f91807,0x1a7cd44e,0xa8f8d9f3,0x1dd439bb,0x7f74a8e6,0x660c2a78,
39689         0x121b5660,0x4bb76e22 } },
39690     /* 229 */
39691     { { 0xe6354817,0x7a151e8a,0xf038b438,0x33d494ea,0x85958986,0x4c86c688,
39692         0x1dcbac12,0x72153827,0xc0edad06,0xf487af8c,0xe500e5d6,0xad33051f,
39693         0xd6e47f55,0x0a711b1b,0x8c746ad5,0xa68709a7,0x6402f35e,0x27f17262,
39694         0xfb30c130,0xc6d08efa,0xc06c7497,0x9ef1c041,0xdcc3e2da,0xd0c74ece,
39695         0x092e1073,0x30c5f96e,0x2aa12b74,0x0f1393cf,0x2107eb02,0x24584016,
39696         0x7b76f98b,0x8843d25f },
39697       { 0xedb2a83e,0x4e1501dc,0x2bb8d724,0xbcfe8fb0,0xd925df62,0x09020659,
39698         0x42ab6fc3,0x3c715dcf,0xa0f09dfd,0x73c05055,0xe3590aea,0x126745d8,
39699         0x76ff749e,0x5382f4d8,0xa920c663,0xfc69feef,0x9fd711ca,0xde160211,
39700         0x9075c4d5,0x4219c3bd,0x3ded6bf2,0x3800cbd1,0x6263a116,0x8c7ea0eb,
39701         0x7d264c37,0x35bd7958,0x7159c98c,0x56e22e45,0xfa7373b5,0x71bf2a2d,
39702         0x8935c949,0x0503f939 } },
39703     /* 230 */
39704     { { 0x71dad4f6,0x65addc66,0x024bea1b,0x238e4889,0xf605d3dd,0xfb76c8e2,
39705         0xb0d96b89,0x13d5f5de,0x6601b2cb,0xe0b5ba35,0x83e3d254,0xe37d491d,
39706         0x240c8ea7,0xe8860423,0xe91c99ba,0x374182f3,0xa87ad919,0x26c2caf9,
39707         0xf574f295,0x4b13040a,0x944000a3,0x5b9bced1,0x06df42e7,0x4ccc57be,
39708         0x4bd1089d,0x22e8ec50,0xdddbb500,0x0c53177a,0x9ecfeadb,0x690d31d2,
39709         0x176668f9,0x735778fe },
39710       { 0x843c1137,0x0f86ee3e,0x3f0b73cd,0x3c1c42fa,0x8ab20e3a,0x0e75679d,
39711         0x16242fae,0x6f95f1f4,0x39b092e4,0x7b88e11c,0x4c236ac0,0x1629403e,
39712         0x2dac02e6,0x66105f41,0x862e0632,0x74dc28a7,0xf3b23c8d,0x2118ffb2,
39713         0x0745ffbf,0x1182417c,0x4c05711e,0x49b55a04,0xcefbe4de,0x2c665b74,
39714         0x97bf7107,0x1cc4c01d,0xc54f0676,0xb2ca06da,0x7450d0f8,0xfc599daa,
39715         0x1a3182a1,0x52e637a6 } },
39716     /* 231 */
39717     { { 0x6bebc6db,0x481700f1,0xf9503d92,0x4a6b45db,0x5d153919,0xc715cd3c,
39718         0xe5ad2abc,0x942a1c05,0xab7b466f,0x36a82433,0xba13918b,0xba413bed,
39719         0x90f4e6ce,0x698a5624,0xf3f1f3ca,0xbb720da6,0x63471ab3,0x2116d41d,
39720         0x303d3609,0xe00d2227,0x463ba69e,0x7fd4cc00,0x62845fd1,0xac609e4d,
39721         0x80adc9c7,0x63603b2c,0x45fafbca,0xbf16fc9a,0xc4bc94ab,0x41007f7f,
39722         0xa74b1698,0x7c916b4f },
39723       { 0x78bac2d4,0xc1026f91,0x2601a875,0x8a2e8098,0x0073d640,0xad2f276e,
39724         0xfcc1fb88,0x443610c4,0xca6b291f,0x5727b822,0x88ec60fc,0x0645532c,
39725         0xed9ad48b,0x51e48899,0xf543f103,0x841b48b5,0xd591ceeb,0xa6ccb1be,
39726         0x9dcf5a8b,0xfc4adf0f,0xb347ddb4,0x3a7ca020,0xcb44c521,0xaa1accc2,
39727         0x0527c0c4,0x773b6828,0x7023cf50,0xaa374c10,0x6b74c926,0x733d1000,
39728         0x77a8d07c,0x1ff3916f } },
39729     /* 232 */
39730     { { 0xf997939d,0xaa218fe4,0x791583b3,0x3d4dfbbb,0x87f7560b,0xb3a7b5da,
39731         0x5da92c98,0xa9c02801,0x46666f4a,0xe1eb4aad,0x14ce9dd7,0x2eb17a51,
39732         0xef8f3076,0xf46a66a4,0x810e546e,0x900b45c6,0x4baf04dd,0xf7af2258,
39733         0x5c84d42f,0x3cc1c872,0x8e4c83de,0x3093f225,0x170d88b2,0x62fade41,
39734         0xac076e44,0xe19612e4,0x32dd141b,0xf48d7346,0x925e34da,0xc1b1f759,
39735         0x072b90c9,0x19ed1a56 },
39736       { 0x6c735473,0x9cf7fcde,0x6003bc3e,0xaab88e67,0xfb199bb8,0x12187cbc,
39737         0x9accccbd,0xbb730441,0xb0f65459,0x214aff3c,0x6f926282,0x6aec81a3,
39738         0x9f9d20b8,0xaa82cb32,0x5773cc90,0x82f3f90f,0xf62257e1,0x4af60e6b,
39739         0xbd4762df,0xf18b44bf,0xdb970753,0x3948b129,0x7c22c18e,0xc6e920e9,
39740         0x57be97ad,0x393d6208,0x46b637f9,0xe8d7382c,0xf1fed1d5,0xf6625ccb,
39741         0x68681599,0x6f31e0f9 } },
39742     /* 233 */
39743     { { 0x82b8f204,0xc45afe55,0xd358b54a,0xac0441b6,0xacd5f5ed,0x7213e7bf,
39744         0x139bcd93,0x1914c70b,0x96dbcbb0,0x714b4581,0x1ed35d21,0xe9297d35,
39745         0x6a3e1f20,0x8f640837,0x2f3cd705,0x150a8a9d,0xdcdd9f6d,0xfb36e801,
39746         0x5cf56d82,0x5a54eb65,0x92aa5a21,0x7610500c,0x3b089f03,0xd10d0ae2,
39747         0xc42b66e8,0x491b2079,0x0eee8d48,0x4af1ae3d,0x41556f45,0x137e4c28,
39748         0x63d8a7e6,0x875e3308 },
39749       { 0xaf6c0acc,0xdc80fddc,0xbb1e7c08,0xd5ad1e66,0x828585ad,0xdc717ae1,
39750         0x275c7da6,0xbdc54340,0xd26b9e15,0xf4b4c852,0x6a05fa50,0x5f0a1fbf,
39751         0x817bcb32,0xc6f81e47,0x70ff2e1d,0x2cbd4328,0x67c7f7fc,0x8a249016,
39752         0xb585a6c4,0xd045acb7,0x4666c057,0x2e972ad4,0xe6d7d63d,0xc74d87cf,
39753         0x0e274144,0xf7067d87,0x8b2584ae,0xb2ca157a,0x75f0fdeb,0x495c5bfb,
39754         0xf386e009,0x5abb0581 } },
39755     /* 234 */
39756     { { 0xf0c97f57,0x8be62d2b,0x962f28c7,0x0fe04871,0x47b50abb,0xc548a467,
39757         0x44fa09ed,0xf6b26e03,0xab05a96e,0xfd44c6e3,0x70e6ae82,0xedb0032c,
39758         0xd7e4899d,0x28bd402b,0x9b7c11c2,0x43f2e963,0xce913716,0x0ec3fc0e,
39759         0x02fd0f8c,0x769b8bc9,0x7cabc3ac,0x9d9cb3aa,0x06924cc9,0xe88a8892,
39760         0x42609014,0xa51461aa,0x962e79e0,0xc7f4aa8b,0x8b1b3e80,0x4ef0210a,
39761         0x1bfee4bc,0x70544680 },
39762       { 0x121901c1,0xfab3d713,0xfead54aa,0xe90a2627,0xbc08ba23,0x64f6d285,
39763         0x36ec227e,0x8d993015,0x06c191ab,0x99a16ab9,0xf649ce2c,0x86b1cf5b,
39764         0x66be3a80,0x59206759,0xccba2cf0,0x18836279,0xeff53486,0x2c157b87,
39765         0x4b223af2,0xbfac9896,0x0aae7a57,0xcd0fd4f0,0x63218a80,0xdaddb940,
39766         0xdf88f14e,0x3844bb79,0xb71ed9fd,0xc1b3e3d4,0xd6205036,0x6c634a13,
39767         0xb8680a6b,0x6f56aecf } },
39768     /* 235 */
39769     { { 0xd9205c5d,0xb01dc803,0x67123929,0x68955f7d,0x9d9b6565,0x3debbffd,
39770         0xd3b1acfe,0xb844395e,0x6094eeff,0x04328b21,0x22991feb,0x6631ffa8,
39771         0x190dd075,0x0dde66e6,0xe8577c05,0x75b03c55,0x91722407,0x6c91ce5f,
39772         0x8ebb3a3f,0x9a288a40,0x058a1396,0x1d376f8a,0x9a6e0676,0xf3a59457,
39773         0x7b71d288,0x103029c5,0xb44c30c0,0x0843f428,0x730e0b9c,0xd8e6aff8,
39774         0x4ed644ad,0x7b6be811 },
39775       { 0x3d3aa54e,0x3ec38e4a,0xd83d509a,0x10233943,0x243955e2,0xf84aa621,
39776         0xf51d3d44,0x29104717,0x7eca4e37,0x62d2442c,0x85fa55de,0x8c5a523d,
39777         0x851da1b5,0xc6f5ccda,0x20001468,0x044bcaa8,0xe01702e0,0xf7501e68,
39778         0xe6a0acec,0xf0819359,0xac0ef0b2,0x33dda6ad,0xfd964f01,0x97aeedc8,
39779         0x530b90d8,0x48dacd0e,0xb84122eb,0x4c5fad6f,0xd700a1de,0x2284ec1e,
39780         0xdbca5474,0x86f9a835 } },
39781     /* 236 */
39782     { { 0x450cc69f,0x0e1d9055,0xc9edf98f,0x50eb14bc,0xee7eba01,0x1bb94e77,
39783         0x998f8e53,0x5f7a6737,0x1b16eef0,0x588384e3,0xd85c5e15,0xbb928723,
39784         0xcbd952aa,0xfe51e345,0x7e241674,0xc5d0ee28,0x100182f0,0xfdc146ef,
39785         0xe7f5be2c,0x0f739e92,0xb656bd3e,0x501ab3af,0x5168e289,0xb1552dde,
39786         0xb8ee104a,0x940dfe31,0xc4304475,0x42923603,0xc460a913,0x9306f114,
39787         0x03b51f86,0x5bfa9faf },
39788       { 0x107b258e,0x2a23f52c,0xd66341dc,0x989e82bb,0x823cff1a,0x54a3ced8,
39789         0x719b491f,0xf45b7794,0x2433dfb8,0x898c2218,0xc49250ee,0x0f9dd91c,
39790         0x4fa17655,0x50c2a2ae,0x2c327f45,0xf7aa1ce4,0x583b1e41,0x13a15ad6,
39791         0xa1bfad9e,0x9aa0d5a5,0x8e1fbdcd,0x9b1caa28,0x915f7f87,0xaf9283b6,
39792         0x87e81a1e,0xc10e4e0c,0x1080d296,0x04fdca56,0x12755bd8,0x6acc9616,
39793         0x828feeda,0x1b1266aa } },
39794     /* 237 */
39795     { { 0x774ee49c,0x4ebc0a00,0xcb6237d7,0x776f6852,0x5df938a3,0xfc0544ac,
39796         0xb6fbfbbd,0xc3388ec8,0x745f2eae,0x84ac8bcd,0xb1ece937,0xa9c56609,
39797         0x7de8fa13,0x656fb6ac,0xa532b871,0x5f8ded74,0xaa889f09,0xab0d428b,
39798         0x10b7aec2,0x43b27f28,0xfeecb34c,0x26426e1e,0x9e89c2db,0x44431b6b,
39799         0x39211090,0xaac4bc5d,0x4fd81058,0x926f7368,0x471ef60e,0x452fa691,
39800         0x218d7a23,0x33517fdb },
39801       { 0x593c4a36,0xa9c33f46,0x36b1a9ee,0xac69d718,0x4277beec,0x55a20c1d,
39802         0x7e4f179c,0x3e8ca24e,0xd46d88a2,0x57373369,0x730702f8,0x71ceb1cc,
39803         0x35eed574,0x8b184d97,0x0704cec2,0x7f4517a2,0xd7062a53,0x7f129d18,
39804         0xb1d77e1c,0x07a4571b,0x8350d8b2,0x774ac309,0x61fab8ef,0x27b2919f,
39805         0xb5dd801b,0xa7c4cc13,0x1434591f,0xe7e6255b,0x5a3592b3,0x349937b8,
39806         0x30c77549,0x31fac63d } },
39807     /* 238 */
39808     { { 0x04913fb6,0x2ee8cf1b,0x1769a6b3,0x7e401350,0x783e61f0,0x790ebb71,
39809         0xe27f2ffe,0x1e5107f9,0xedaf89bf,0x124ba67f,0xe58de68d,0x189200e1,
39810         0x6df5abee,0x962732a3,0xacbeb4aa,0x72cc37cf,0xe93c5a76,0xb0c5fa96,
39811         0xde63393b,0x4c2a317c,0x830b2d6c,0x97f65e67,0x1be5b96a,0x4afc3504,
39812         0x730ce66d,0x0bf40a60,0x9340d84f,0x96a1ba79,0x07626b08,0x3ee18254,
39813         0x7ab0cbf5,0x01db35db },
39814       { 0xac0efee2,0x6e0fbc2d,0xd71dbb45,0x8406ebcd,0x19b69abe,0xe72bde3e,
39815         0x37e01822,0x49cb7e61,0x11458b4c,0xcbb8c01c,0x687c5d63,0x420b4847,
39816         0x454c6776,0x1847dfa1,0xd1839d18,0xbede911d,0x278df046,0x1b9dc9c9,
39817         0x881a336c,0x294bd62b,0x93e77adc,0x7f096879,0x43ce3ba7,0x7ac90665,
39818         0x7764eefc,0x148695fd,0x9ac465cf,0xe0c20f0b,0xa6e2cdb1,0x636e8d28,
39819         0xd755341d,0x7b6ba98c } },
39820     /* 239 */
39821     { { 0xc1881ab4,0xcb1d9e03,0xb3168c88,0x19c25d55,0x282364ce,0xa82d3d47,
39822         0xf161aa24,0x95994390,0xe1ebb2c9,0x7838bc00,0xbdec7a75,0x8fd5dfcc,
39823         0x4ff7220a,0x4dd203c2,0x0efeff48,0x5ec173b3,0x16428b35,0x99f1d2b3,
39824         0x056e813f,0xc06bd9e5,0xc0b319f1,0x929172ba,0xfd223b15,0x6ae0e384,
39825         0x98d091ed,0xbd01059e,0xa654648e,0x6b3168e4,0x3375e798,0x2211447f,
39826         0x71eb4508,0x47e81019 },
39827       { 0xbc8c290d,0x7045d45a,0x810fb33a,0xa33d1355,0x46fbbf2f,0x2baf0092,
39828         0x385c7cd9,0xacff3f1b,0xe161985c,0xc5b150ec,0x2a888748,0xc6ee0a7f,
39829         0x5e88dcc8,0x9d888c8e,0xccb86443,0x4dd735f2,0x3c40f6f2,0xcc1e13b7,
39830         0xf3fed691,0xfc3a25ff,0x257ee5c7,0x4cb43b17,0xf32db135,0xaa654f93,
39831         0x02dff2d3,0x44f58d0a,0xa8ca6394,0x78e3f188,0xf3e86697,0x39646cce,
39832         0xe0dce87b,0x785b1902 } },
39833     /* 240 */
39834     { { 0xa92f9a20,0xfcce2361,0x9d64540e,0xb7bdca87,0x1d00d7c5,0xd4739a85,
39835         0x2e97c926,0x067ac8dc,0x78da6a8b,0x2aea3ffe,0x63c51b69,0x6828bf54,
39836         0x7155141a,0x76f1c479,0x3977d810,0xf4bcbef6,0x541bce7a,0x75bc4949,
39837         0xd17041a5,0xe01f4066,0x87755eaf,0xd282d5bd,0x59e7ae80,0x6e2107dd,
39838         0x382ab36f,0xaa56e166,0xb9d1d634,0x65ee8ef6,0xce4ed844,0x99a2160a,
39839         0xb7712c27,0x6557c367 },
39840       { 0xd75b6e52,0x561b0268,0x118d0e89,0xb0813640,0x6a2eb1ae,0xcff53330,
39841         0x6d090894,0x4e462226,0xb5fc1d48,0xbb351227,0x57a3062d,0x9365ea07,
39842         0xd66e2dc5,0x4caca37b,0xb9095887,0x220d7d23,0x8c4473bf,0x9c0fd393,
39843         0x6787da4f,0xadff370a,0xd057f4b8,0xef0aebcc,0x1173f33a,0x205e744c,
39844         0x925a26b4,0xb8d1f0a5,0x722fbbfd,0xa9364f49,0x8227d284,0xc891ae77,
39845         0xa0e08ab4,0x15c40d04 } },
39846     /* 241 */
39847     { { 0x2a0e18d1,0x9baf169a,0x4c0327c2,0x9971c017,0x7bc262ce,0xd81a323f,
39848         0x818ff379,0x2099db8d,0x4cd3c330,0x663f663d,0x011a0553,0xef5325c3,
39849         0xf980a470,0x9cd70bdc,0x1c9ed070,0xe64452d1,0xac676e13,0xafbf43f4,
39850         0xae85c2a5,0x97bec0a6,0x470490c4,0x2faae550,0x491e6ba9,0x0ab97a87,
39851         0xaafa9914,0x4055f537,0x36726557,0xfc95adbb,0xd119d6bf,0x646343b9,
39852         0x9d341e37,0x788e94a0 },
39853       { 0x9c53461a,0x053a6fe5,0x08e3b6ed,0x75ec897e,0x0768d939,0xa8f5d2f3,
39854         0xcc213d4f,0x9bd6bff6,0x05b0147c,0x590c7b41,0x7c7b8169,0x20a3628b,
39855         0x5bce78e9,0xc66a086e,0x4dec1d8f,0x3dd4d282,0xc19dcce9,0x890acf44,
39856         0xd8435a7e,0x6632d875,0xea6381b2,0x590167c1,0xf0dcc128,0xb2259797,
39857         0x46f8d463,0x91a612b4,0xc15efa39,0x42185d78,0x119f6788,0xdf55ec37,
39858         0x780dea93,0x91b19cc6 } },
39859     /* 242 */
39860     { { 0xcb5d8b80,0xebf2709d,0xfc35660e,0x03b96182,0x055ef969,0xb873d991,
39861         0xe47c4342,0xd1ea4b4d,0xd54f8867,0xcc4b9244,0xfd8d77ef,0x93b1a2ca,
39862         0xe8c1f563,0x068d24e7,0x49973056,0x5f5fabb6,0x0542374f,0x83248c50,
39863         0x3f38e913,0xc36de2b5,0x7bb680be,0xed07e8eb,0xd8f313b5,0x964813d7,
39864         0xafd2d392,0x7bb6a069,0x0848a31a,0xc06d848e,0xe4f0c325,0x6867fb2f,
39865         0x067343af,0x3c2ba834 },
39866       { 0x9d3ad63b,0xab62d775,0x59e0eb1f,0x3f9cab97,0x3885e117,0x70332a63,
39867         0xe20b2f9e,0xf22cafce,0x49eca947,0xb529ba7e,0x6228d88d,0x24954216,
39868         0x39239561,0x80ea23ec,0xd4370644,0x1b8907e7,0x563e4e44,0x4b7fa455,
39869         0xb2a4b0fa,0xcca9829e,0x48060792,0xd0a720a4,0x246991ce,0x8ccdda0c,
39870         0x348d086b,0x37a2325b,0xf60aee13,0x566ed509,0x147f253f,0x3d30e091,
39871         0xc1073bd8,0x1fa627a5 } },
39872     /* 243 */
39873     { { 0x42478fd4,0xa11222a2,0x670b2000,0xacf4c6f1,0x8359c6de,0xf71bb04f,
39874         0x7b93cdbc,0x618e2829,0x230db60b,0x96e1bae3,0x965b3b29,0xf17fd3b4,
39875         0xbc7055dd,0xa58639c6,0x4b817d7f,0xc3ea92ed,0xd23b08a4,0x9082b2a6,
39876         0xdc17010e,0x8471228a,0x20e89d97,0x753b9e46,0x03ff77c9,0xcf7e4f97,
39877         0x2bbe60e5,0x6c3f8245,0xb80e017d,0x9e432cbc,0xc0a45edb,0x150a5acd,
39878         0x4798743e,0x67b8bd05 },
39879       { 0xf4797cf7,0xe66079b4,0xd03fde02,0xe31c998a,0x54caaef1,0x5aa3763a,
39880         0xf7649711,0x64d9a1fe,0xaf29b1a7,0x7ce0dc73,0xfb66ca93,0x6661b083,
39881         0x32fb6a78,0xbf4d74fe,0xdf00a561,0x25f6ef09,0x831d1159,0x2bc4383f,
39882         0x536bde37,0x6d5cc10c,0x882cc65b,0xd4945f9f,0x451a99b8,0x81f48f13,
39883         0x6bac11a4,0x140161cd,0xf18a4a0a,0x9d94d4ed,0xa467a824,0x65363165,
39884         0xa4c9aedf,0x74297aa9 } },
39885     /* 244 */
39886     { { 0xe21124ba,0xc49758a4,0xa87ffbd2,0x99bd8198,0x3d6638a8,0x45fbcdd1,
39887         0x15f7bf76,0x94645ff8,0xc4e6d57e,0x5fa6736f,0x92e61db9,0x1eae6475,
39888         0xcbdf944a,0x79575c0c,0x25b31d74,0xa3d13047,0x4cab5ae6,0x7881df22,
39889         0x1a2887f2,0x8dbfd299,0xa26ac459,0x23d07590,0xd8661d4a,0x2e589852,
39890         0x8a0140f7,0x37b5c13b,0x3fb3782a,0x0f94199e,0x1bc14e90,0x722aa059,
39891         0xd55bbb12,0x89aab7ba },
39892       { 0xd656bdc7,0x8b345a96,0xe176cd3b,0x43bdc8af,0x32d64c43,0xd69518b6,
39893         0x79b82b41,0xfcf364a7,0xffb0cf82,0x907b344e,0x5101287b,0xf3d0c83c,
39894         0x34cd90ef,0xe9f26a59,0x07082b5c,0xe5f5aaf2,0xece7c165,0x4eb72c75,
39895         0xbe986cd6,0xe9590a81,0xff1536aa,0xfeef498f,0xa8263d5e,0x04560243,
39896         0x54ae872b,0x940be14f,0xe3207686,0xbee7bcc9,0xc1bc4d7a,0xd496a27d,
39897         0x5940ab46,0x002dc297 } },
39898     /* 245 */
39899     { { 0xb69d60c3,0xee533937,0xfe972755,0x260be552,0xc0c725a6,0xb11fb78d,
39900         0xcab2e7c2,0x6982c27e,0xee2322cb,0x4bceedd9,0x122704f7,0x952b19ed,
39901         0x854a6165,0x2df4c285,0x7b192485,0xba40b5bf,0x0119f52a,0xfcbca950,
39902         0xe5add86f,0x7467d1cb,0xd9d0f2c1,0x9bf536fb,0xb8d4ebc9,0x3c296e34,
39903         0x05a81317,0x0495f8f4,0x73335f76,0x8c59e8d6,0xe0542122,0x0b53d324,
39904         0x3c3bda73,0x4d564535 },
39905       { 0x7e5c0877,0x7322f800,0x0ca9a764,0x481b43e6,0xa2c12716,0x231f4f4b,
39906         0xed3136c2,0x09596857,0x38db30de,0xae826322,0x99908ebc,0x652fad40,
39907         0xaf0d231e,0x0b8d1814,0x09cbc349,0x2680c54b,0x4bf3bf8e,0xfd4562f3,
39908         0x092b595f,0x2985090b,0x5e15fc34,0xe6f39ca4,0xbc378168,0x70175191,
39909         0x845a4a87,0x906944b3,0x82a1541a,0xacc6d74a,0xb155c8b4,0xadc9bab3,
39910         0x77306c62,0x1f2f89ce } },
39911     /* 246 */
39912     { { 0x9affefdf,0x8253ef41,0x4cf9256b,0x05d7ece5,0xb444e483,0x377002f2,
39913         0xcba5471f,0xb189755f,0xd5cbe015,0xc88483cb,0x6a0b8429,0x254f7c69,
39914         0x61f3f61d,0x18850bd4,0x0a247157,0x7ba21089,0xd92eeb0d,0x35abbc2e,
39915         0x965dec89,0xfb56cabe,0xbc55684a,0x9da23724,0x6a7a7492,0xd8ba396f,
39916         0x2ef4ba46,0xfcb90db7,0x9909b27a,0xdd234fe0,0x76f4366e,0xbdf3c164,
39917         0x17e50d47,0x09c8097f },
39918       { 0x60050c07,0x6a04b140,0x43a8e37e,0xc29e8318,0xbb55e41f,0xcb9429b2,
39919         0x2ce60e3a,0xed2fea5a,0xdb9d82f4,0xdc7b1ff3,0x687d37fa,0x48ebecc3,
39920         0xecb07539,0x79153e32,0x57075692,0x6a60054f,0x800759ba,0x3871cd0c,
39921         0x30922df1,0x17a7386f,0x83357b7c,0x4e9fc59e,0x39415186,0x1d26b3a9,
39922         0xd34db889,0x912a0222,0x59fcdb71,0x6672fcf4,0x44ff3036,0x5a3f268d,
39923         0x6911e16c,0x6f113ed3 } },
39924     /* 247 */
39925     { { 0x1836f1c9,0x52a9df59,0x4232307d,0xfa6519f5,0x5ded285a,0x8406c701,
39926         0xaf627f75,0x0a1545ca,0xace0417d,0xae1111ee,0xa6113443,0xfb28bdf6,
39927         0x52dbcbcb,0xde9ef0ab,0x7813e658,0xe9dc181b,0x99127225,0x0b1dabdb,
39928         0x22814c59,0x5f0598e3,0xd934ee7e,0x5c3b966e,0xb99ba4bf,0x4eb84eda,
39929         0x3c1b55e7,0xb2919a34,0x94aa860f,0xa9addb49,0xf6811ff6,0x1b7220df,
39930         0xd1a183e2,0x6636a23b },
39931       { 0x20587283,0xdf5d5a2d,0xef07fc5d,0x0b3822c9,0x0ef6de38,0x1786bd55,
39932         0x25d1671d,0x163cf907,0x1cdb1def,0x74bf971f,0x0842fc4a,0x5749e830,
39933         0x27f854f7,0x0e2edbc7,0xbce24acb,0xbb27bbda,0x05bed08d,0xc1b19cec,
39934         0xf7c904bc,0xaada123e,0xd89982db,0x02429f1b,0x65f6e632,0x49d3616e,
39935         0xee59fd32,0xa3789fa8,0xfe9f29f5,0x160ba3ba,0xaf5378a0,0x0f2d3b61,
39936         0x73c2a6f8,0x7aeecc76 } },
39937     /* 248 */
39938     { { 0xdc43b0db,0xf3a4757c,0x98119cad,0x3d8a4e85,0x4616c156,0xf8095bf6,
39939         0x4f533e97,0x3e2a07bc,0x39cfc5ad,0xa9824367,0xcd68052c,0x18a6ba3a,
39940         0x8a1cec66,0xbd60e590,0x02b1b695,0xae3841a5,0x190a195b,0x986dff12,
39941         0xad31fd9b,0x2df2beac,0xcc728f7b,0x7d893224,0x0cf0a992,0xc38ea738,
39942         0x586a44ea,0xa8439a80,0x1615f03c,0xede7f7f0,0x27a1f885,0x48249908,
39943         0xb78a7645,0x28ec4006 },
39944       { 0xa2fe0009,0xe1820c2e,0xf13874e9,0xe11ba5d2,0xc524db52,0x97522454,
39945         0x7fede529,0x4d477426,0x9b2500d4,0x01d3419a,0x1869244b,0xce08a492,
39946         0xdd1be1b9,0xba169023,0x32a301e0,0x242c3e54,0x70906788,0x9b56f7ba,
39947         0xc74a8cc4,0xf0ad2a09,0xd76f9439,0x99cd1841,0x621fb60e,0xeddafe0b,
39948         0xbc397634,0x056bee54,0xff7f0a84,0x4653f860,0x2011c0af,0x6bd4876f,
39949         0x0c9525c3,0x134f4cc7 } },
39950     /* 249 */
39951     { { 0xe938dff4,0x9621a3ec,0x486a79a3,0x7d101a7b,0xde950537,0xf2c4ef97,
39952         0xe65d87db,0xf3184099,0x373b8cfa,0xb89c7ffb,0xe842916e,0x68baa505,
39953         0x4ebea764,0xa790fd09,0xe592892b,0x679df6d4,0xfcfed741,0x2023331c,
39954         0x9880ff21,0x0bf4efd2,0xd0344501,0x7ca78ddd,0x342858c8,0x2cb09ecb,
39955         0x2575487a,0x9e5eb6dc,0xebcb0491,0x50675a15,0x7381d471,0x09d2e74f,
39956         0x83d3d6f4,0x6ea37829 },
39957       { 0x4e5cc40a,0xc65c094b,0x1af37dfb,0x7a2e3f6a,0xf9026e44,0xef677e9d,
39958         0x93880f53,0xb7878c95,0x7f644aa9,0x4aa30b07,0x2f208c3c,0xa0c51683,
39959         0x658d663b,0x7c0277ae,0xae1d9130,0xef0b3c38,0x695c3ea4,0x302f37a7,
39960         0x6a0c5e0d,0xe004c1c5,0x20cbcf9f,0x9fd495c4,0x568a0e7c,0x706d5b9d,
39961         0x59286454,0x8b225dff,0x8d9a709c,0x527d4465,0x87c08d68,0x47c558da,
39962         0xbb4ef07d,0x606ee6e6 } },
39963     /* 250 */
39964     { { 0x57c621f6,0x02d99fc7,0x7fe83d48,0x292e40c1,0x9ef199b0,0x1bdfc7a1,
39965         0xe62c7666,0x78a04102,0xe6738753,0x16cda370,0x1e3a65af,0xbc81974d,
39966         0xf78fe209,0x19742048,0xbf5981c6,0xc83a058a,0x9c89702d,0xf26b2434,
39967         0x9d1a678a,0x988b2f1e,0xff29ae29,0x472bf9b0,0x1d7cf5ec,0xa143e398,
39968         0xb268ddd8,0x9c9d7e45,0x5fc4ff76,0x166cda55,0xa4aa7673,0x6044cdf0,
39969         0xe9148707,0x49dba6f7 },
39970       { 0xa758e37a,0x20e47fb2,0x2d8eaf66,0xaf6b31d7,0x6f9c2210,0x352ad5f9,
39971         0x90efc32b,0x0093f727,0x41e4b264,0x435c99dc,0x05b15795,0xbfa878e0,
39972         0x0e673575,0x99c520a4,0x87eea759,0xca682594,0xf12a348b,0x029f7b81,
39973         0x2aa2ce35,0xa547cc18,0xead5e2c5,0xa11d874b,0x55682cdf,0x9af0349b,
39974         0x8bbe8e66,0xf86ebfea,0xf55394ab,0x3dab8782,0xebc8eb8f,0x458bf797,
39975         0x9b7de78c,0x4890a7a4 } },
39976     /* 251 */
39977     { { 0x8da995f6,0xd7299689,0xec6156ef,0xd39eaae7,0x356a82d5,0x6959040c,
39978         0xc135bcfe,0xb2046b21,0x0f595c78,0xea720b64,0xe7c5fb40,0x02824efa,
39979         0x0edb3bfc,0x97d8fd4c,0x79f24ebe,0x12f02905,0x187ea6b9,0x16fc47cf,
39980         0x789d5c23,0xc219fd27,0x89263ecc,0x233a6b6c,0x8b6d30a6,0x823634b2,
39981         0xc9b33680,0xca352e25,0x40c77456,0x9388d6ca,0x3c92065b,0xf8e55b0b,
39982         0x02439a76,0x5c17474b },
39983       { 0x8aaccab5,0xd888e7c2,0xaaced05b,0x18027836,0xccec0f65,0x185b877d,
39984         0x125c2882,0x93cadc1c,0x67fdc54c,0x45df540a,0xc2788a33,0x4f3c86e2,
39985         0xe3a0fa2c,0x3e874469,0x273983cf,0xc59daa47,0x4a96d8a5,0x3063c48b,
39986         0xc2e58915,0xc38d2bcf,0x84e428c3,0x90e78b87,0xf0c4fd53,0x900a292c,
39987         0x941e6005,0xb7f92db7,0x6ca53a1c,0x95679241,0xb1ab0fa7,0x35f6f31d,
39988         0x7b58408c,0x5d675eb4 } },
39989     /* 252 */
39990     { { 0x870c6025,0xaeee1a77,0x91a2dfca,0xfc4a23b7,0x386b64c4,0x7b0e60c4,
39991         0xe5ae72b1,0xd5d5b17d,0x9eefa212,0x6dfc88ac,0xd4038b96,0x4feaefbe,
39992         0x8e2d2ecc,0x099ac356,0x012af207,0x548ea612,0x89c31218,0x4ffed9db,
39993         0xe0e67331,0x1c1e91c4,0xaf8300e0,0x009bb64f,0x6773c3be,0x8780501c,
39994         0xc08219fa,0xe0cd6ede,0xf81b06ff,0x7c055e07,0xe080b36f,0x82b63f9c,
39995         0x0a9feca3,0x02fccbaf },
39996       { 0xb47cac61,0x9991d4d1,0xab86e12c,0x2e9d1687,0x2b94f042,0x8c6855ec,
39997         0x48e648e5,0xca400519,0xef89ac57,0x9ba91fb2,0x1be792cd,0x4f419206,
39998         0xbd0f1e15,0x82d221cb,0xfc444019,0x062eb13b,0x99790fdc,0xf3a97c32,
39999         0x6067a64b,0x4e796d94,0x6d23775a,0xc46dd300,0xed7f0f23,0x8672c4d5,
40000         0x3b4f63d7,0x821851dc,0xd26273f2,0x50a3ae0c,0xeac60f6f,0x800e58fc,
40001         0x13845545,0x56f1e456 } },
40002     /* 253 */
40003     { { 0x32c24f3b,0x01ccb3f6,0x06d817e6,0x99eb1c7f,0x6aa26776,0x8dc640bb,
40004         0x0845d5e0,0x7838affe,0xf81a79a8,0xf34fecb1,0x3e6819b0,0x6a2e282d,
40005         0x8237a4b8,0xc4b977ce,0x87636439,0x0f46b3db,0x97970497,0xa465f540,
40006         0x8791be43,0xd7e08762,0x34198ec6,0x00220b6c,0x093d94bb,0x57b38637,
40007         0x29d690b2,0x84012e16,0x20aad1a4,0x02ec9db5,0x85dc34e3,0xafee2fc6,
40008         0x25500cf8,0x911d1936 },
40009       { 0xf5e5af5b,0x13b1bd58,0x7b6a22a7,0xa7ca263b,0xf3af2adc,0xab6bec4d,
40010         0xa04420bd,0x16651e59,0x4ba36c11,0x3b448b3b,0xff424310,0x3c62bfcd,
40011         0xf1a96cbb,0xde15c4a5,0xe4d1f980,0xbe0ad8a1,0x36673a3a,0x812bd14e,
40012         0x9212acdd,0x40303af6,0x576095ce,0x8f6dab9c,0x107f5ca5,0x7df1882a,
40013         0x8896a3b0,0xb903e63c,0xd863b3f0,0xf5048544,0xc09887de,0x5e5019b9,
40014         0xa0f53865,0x2be744fe } },
40015     /* 254 */
40016     { { 0x5b50f324,0x054cd05f,0x1ea3c7a2,0xb9b1eb24,0x7ff8e6b7,0x4a858a5c,
40017         0xec040882,0xd83902fe,0xd0cba9bd,0x72b26494,0xb29c9e1e,0xd0176f90,
40018         0xcebadb81,0x05d4eb02,0x372b8bfc,0x874405b1,0x79ead190,0x5c412881,
40019         0xec2b48cd,0xd44a3dd3,0x3f4d5033,0x84499a77,0x564c3a09,0xb37b38cd,
40020         0xf42e803b,0x80e99497,0xb8f518b2,0xc07b47a0,0x3568fde4,0xc710e3c5,
40021         0xcead0e7a,0x735f542f },
40022       { 0x38380039,0xcaa9a171,0xf74d19c8,0xadfafe17,0xccbc1a8b,0x92d4393e,
40023         0xfe029705,0x3c5dbf39,0x930e9b36,0x4552b5ab,0x2afd494a,0x7ee63032,
40024         0x3f02ac43,0x826a9ad7,0x99356298,0x98c53562,0x7342bb39,0x0c869f87,
40025         0xe4f9b79a,0xd7510020,0xd34789a9,0x6361d1a4,0xcfa85637,0xf0ded5ba,
40026         0x88ac07e4,0x407ee73f,0x09ef1cbd,0xfac7d03f,0x4d475bad,0x25d697cb,
40027         0x14bd399e,0x1e984c9d } },
40028     /* 255 */
40029     { { 0x4850c817,0xc76d0561,0x3489812d,0xb08a5b19,0x5e58cbbe,0x7273d154,
40030         0x4be61e5a,0x8900b5fa,0xd7aeb8e1,0xaa088691,0xd35a3d4b,0xe66666af,
40031         0x57ec7d3d,0x38a2c199,0x668d6f5c,0xa0648e8f,0x7adc1746,0x1f9fc92c,
40032         0x843065c3,0x23a116c0,0x61e6ae69,0x36370a20,0x2aa47e73,0x626c3736,
40033         0xdeff6d84,0x540c25f2,0xcdbed2d4,0x9804824c,0x039a9492,0x4b5bfce0,
40034         0x76942e01,0x6c474a56 },
40035       { 0x7d88e3a1,0x3aeb9a41,0xc484742a,0x105d3c88,0x3fe61131,0xe59de8d1,
40036         0x1a869e8b,0x148f5b6b,0xaa75d90a,0x7a8abc59,0x62146013,0x2f0c9bc7,
40037         0xc3824cd9,0x43faa747,0x6a5d0b92,0x81763a18,0x9bcbaebc,0xbbc341bc,
40038         0xf745d1dd,0xe1813160,0xb75ce5f4,0xa53ce52d,0xd50de4c2,0x15eae66c,
40039         0x75d7656d,0x5ed8996c,0xc4ca552a,0xe4ff5711,0x3c5305b4,0x215e985a,
40040         0xfa1ba2ce,0x6b258954 } },
40041 };
40042 
40043 /* Multiply the base point of P1024 by the scalar and return the result.
40044  * If map is true then convert result to affine coordinates.
40045  *
40046  * Stripe implementation.
40047  * Pre-generated: 2^0, 2^128, ...
40048  * Pre-generated: products of all combinations of above.
40049  * 8 doubles and adds (with qz=1)
40050  *
40051  * r     Resulting point.
40052  * k     Scalar to multiply by.
40053  * map   Indicates whether to convert result to affine.
40054  * ct    Constant time required.
40055  * heap  Heap to use for allocation.
40056  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
40057  */
sp_1024_ecc_mulmod_base_32(sp_point_1024 * r,const sp_digit * k,int map,int ct,void * heap)40058 static int sp_1024_ecc_mulmod_base_32(sp_point_1024* r, const sp_digit* k,
40059         int map, int ct, void* heap)
40060 {
40061     return sp_1024_ecc_mulmod_stripe_32(r, &p1024_base, p1024_table,
40062                                       k, map, ct, heap);
40063 }
40064 
40065 #endif
40066 
40067 /* Multiply the base point of P1024 by the scalar and return the result.
40068  * If map is true then convert result to affine coordinates.
40069  *
40070  * km    Scalar to multiply by.
40071  * r     Resulting point.
40072  * map   Indicates whether to convert result to affine.
40073  * heap  Heap to use for allocation.
40074  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
40075  */
sp_ecc_mulmod_base_1024(const mp_int * km,ecc_point * r,int map,void * heap)40076 int sp_ecc_mulmod_base_1024(const mp_int* km, ecc_point* r, int map, void* heap)
40077 {
40078 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40079     sp_point_1024* point = NULL;
40080     sp_digit* k = NULL;
40081 #else
40082     sp_point_1024  point[1];
40083     sp_digit k[32];
40084 #endif
40085     int err = MP_OKAY;
40086 
40087 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40088     point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap,
40089                                          DYNAMIC_TYPE_ECC);
40090     if (point == NULL)
40091         err = MEMORY_E;
40092     if (err == MP_OKAY) {
40093         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32, heap,
40094                                DYNAMIC_TYPE_ECC);
40095         if (k == NULL)
40096             err = MEMORY_E;
40097     }
40098 #endif
40099 
40100     if (err == MP_OKAY) {
40101         sp_1024_from_mp(k, 32, km);
40102 
40103             err = sp_1024_ecc_mulmod_base_32(point, k, map, 1, heap);
40104     }
40105     if (err == MP_OKAY) {
40106         err = sp_1024_point_to_ecc_point_32(point, r);
40107     }
40108 
40109 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40110     if (k != NULL)
40111         XFREE(k, heap, DYNAMIC_TYPE_ECC);
40112     if (point != NULL)
40113         XFREE(point, heap, DYNAMIC_TYPE_ECC);
40114 #endif
40115 
40116     return err;
40117 }
40118 
40119 /* Multiply the base point of P1024 by the scalar, add point a and return
40120  * the result. If map is true then convert result to affine coordinates.
40121  *
40122  * km      Scalar to multiply by.
40123  * am      Point to add to scalar mulitply result.
40124  * inMont  Point to add is in montgomery form.
40125  * r       Resulting point.
40126  * map     Indicates whether to convert result to affine.
40127  * heap    Heap to use for allocation.
40128  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
40129  */
sp_ecc_mulmod_base_add_1024(const mp_int * km,const ecc_point * am,int inMont,ecc_point * r,int map,void * heap)40130 int sp_ecc_mulmod_base_add_1024(const mp_int* km, const ecc_point* am,
40131         int inMont, ecc_point* r, int map, void* heap)
40132 {
40133 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40134     sp_point_1024* point = NULL;
40135     sp_digit* k = NULL;
40136 #else
40137     sp_point_1024 point[2];
40138     sp_digit k[32 + 32 * 2 * 5];
40139 #endif
40140     sp_point_1024* addP = NULL;
40141     sp_digit* tmp = NULL;
40142     int err = MP_OKAY;
40143 
40144 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40145     point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap,
40146                                          DYNAMIC_TYPE_ECC);
40147     if (point == NULL)
40148         err = MEMORY_E;
40149     if (err == MP_OKAY) {
40150         k = (sp_digit*)XMALLOC(
40151             sizeof(sp_digit) * (32 + 32 * 2 * 5),
40152             heap, DYNAMIC_TYPE_ECC);
40153         if (k == NULL)
40154             err = MEMORY_E;
40155     }
40156 #endif
40157 
40158     if (err == MP_OKAY) {
40159         addP = point + 1;
40160         tmp = k + 32;
40161 
40162         sp_1024_from_mp(k, 32, km);
40163         sp_1024_point_from_ecc_point_32(addP, am);
40164     }
40165     if ((err == MP_OKAY) && (!inMont)) {
40166         err = sp_1024_mod_mul_norm_32(addP->x, addP->x, p1024_mod);
40167     }
40168     if ((err == MP_OKAY) && (!inMont)) {
40169         err = sp_1024_mod_mul_norm_32(addP->y, addP->y, p1024_mod);
40170     }
40171     if ((err == MP_OKAY) && (!inMont)) {
40172         err = sp_1024_mod_mul_norm_32(addP->z, addP->z, p1024_mod);
40173     }
40174     if (err == MP_OKAY) {
40175             err = sp_1024_ecc_mulmod_base_32(point, k, 0, 0, heap);
40176     }
40177     if (err == MP_OKAY) {
40178             sp_1024_proj_point_add_32(point, point, addP, tmp);
40179 
40180         if (map) {
40181                 sp_1024_map_32(point, point, tmp);
40182         }
40183 
40184         err = sp_1024_point_to_ecc_point_32(point, r);
40185     }
40186 
40187 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40188     if (k != NULL)
40189         XFREE(k, heap, DYNAMIC_TYPE_ECC);
40190     if (point)
40191         XFREE(point, heap, DYNAMIC_TYPE_ECC);
40192 #endif
40193 
40194     return err;
40195 }
40196 
40197 #ifndef WOLFSSL_SP_SMALL
40198 /* Generate a pre-computation table for the point.
40199  *
40200  * gm     Point to generate table for.
40201  * table  Buffer to hold pre-computed points table.
40202  * len    Length of table.
40203  * heap   Heap to use for allocation.
40204  * returns BAD_FUNC_ARG when gm or len is NULL, LENGTH_ONLY_E when table is
40205  * NULL and length is returned, BUFFER_E if length is too small and 0 otherwise.
40206  */
sp_ecc_gen_table_1024(const ecc_point * gm,byte * table,word32 * len,void * heap)40207 int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len,
40208     void* heap)
40209 {
40210 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40211     sp_point_1024* point = NULL;
40212     sp_digit* t = NULL;
40213 #else
40214     sp_point_1024 point[1];
40215     sp_digit t[5 * 2 * 32];
40216 #endif
40217     int err = MP_OKAY;
40218 
40219     if ((gm == NULL) || (len == NULL)) {
40220         err = BAD_FUNC_ARG;
40221     }
40222 
40223     if ((err == MP_OKAY) && (table == NULL)) {
40224         *len = sizeof(sp_table_entry_1024) * 256;
40225         err = LENGTH_ONLY_E;
40226     }
40227     if ((err == MP_OKAY) && (*len < (int)(sizeof(sp_table_entry_1024) * 256))) {
40228         err = BUFFER_E;
40229     }
40230 
40231 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40232     if (err == MP_OKAY) {
40233         point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap,
40234             DYNAMIC_TYPE_ECC);
40235         if (point == NULL)
40236             err = MEMORY_E;
40237     }
40238     if (err == MP_OKAY) {
40239         t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 5 * 2 * 32, heap,
40240             DYNAMIC_TYPE_ECC);
40241         if (t == NULL)
40242             err = MEMORY_E;
40243     }
40244 #endif
40245 
40246     if (err == MP_OKAY) {
40247         sp_1024_point_from_ecc_point_32(point, gm);
40248             err = sp_1024_gen_stripe_table_32(point,
40249                 (sp_table_entry_1024*)table, t, heap);
40250     }
40251     if (err == 0) {
40252         *len = sizeof(sp_table_entry_1024) * 256;
40253     }
40254 
40255 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40256     if (t != NULL)
40257         XFREE(t, heap, DYNAMIC_TYPE_ECC);
40258     if (point != NULL)
40259         XFREE(point, heap, DYNAMIC_TYPE_ECC);
40260 #endif
40261 
40262     return err;
40263 }
40264 #else
40265 /* Generate a pre-computation table for the point.
40266  *
40267  * gm     Point to generate table for.
40268  * table  Buffer to hold pre-computed points table.
40269  * len    Length of table.
40270  * heap   Heap to use for allocation.
40271  * returns BAD_FUNC_ARG when gm or len is NULL, LENGTH_ONLY_E when table is
40272  * NULL and length is returned, BUFFER_E if length is too small and 0 otherwise.
40273  */
sp_ecc_gen_table_1024(const ecc_point * gm,byte * table,word32 * len,void * heap)40274 int sp_ecc_gen_table_1024(const ecc_point* gm, byte* table, word32* len,
40275     void* heap)
40276 {
40277     int err = 0;
40278 
40279     if ((gm == NULL) || (len == NULL)) {
40280         err = BAD_FUNC_ARG;
40281     }
40282 
40283     if ((err == 0) && (table == NULL)) {
40284         *len = 0;
40285         err = LENGTH_ONLY_E;
40286     }
40287     if ((err == 0) && (*len != 0)) {
40288         err = BUFFER_E;
40289     }
40290     if (err == 0) {
40291         *len = 0;
40292     }
40293 
40294     (void)heap;
40295 
40296     return err;
40297 }
40298 #endif
40299 /* Multiply the point by the scalar and return the result.
40300  * If map is true then convert result to affine coordinates.
40301  *
40302  * km     Scalar to multiply by.
40303  * gm     Point to multiply.
40304  * table  Pre-computed points.
40305  * r      Resulting point.
40306  * map    Indicates whether to convert result to affine.
40307  * heap   Heap to use for allocation.
40308  * returns MEMORY_E when memory allocation fails and MP_OKAY on success.
40309  */
sp_ecc_mulmod_table_1024(const mp_int * km,const ecc_point * gm,byte * table,ecc_point * r,int map,void * heap)40310 int sp_ecc_mulmod_table_1024(const mp_int* km, const ecc_point* gm, byte* table,
40311         ecc_point* r, int map, void* heap)
40312 {
40313 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40314     sp_point_1024* point = NULL;
40315     sp_digit* k = NULL;
40316 #else
40317     sp_point_1024 point[1];
40318     sp_digit k[32];
40319 #endif
40320     int err = MP_OKAY;
40321 
40322 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40323     point = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), heap,
40324         DYNAMIC_TYPE_ECC);
40325     if (point == NULL) {
40326         err = MEMORY_E;
40327     }
40328     if (err == MP_OKAY) {
40329         k = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32, heap, DYNAMIC_TYPE_ECC);
40330         if (k == NULL)
40331             err = MEMORY_E;
40332     }
40333 #endif
40334 
40335     if (err == MP_OKAY) {
40336         sp_1024_from_mp(k, 32, km);
40337         sp_1024_point_from_ecc_point_32(point, gm);
40338 
40339 #ifndef WOLFSSL_SP_SMALL
40340             err = sp_1024_ecc_mulmod_stripe_32(point, point,
40341                 (const sp_table_entry_1024*)table, k, map, 0, heap);
40342 #else
40343         (void)table;
40344         err = sp_1024_ecc_mulmod_32(point, point, k, map, 0, heap);
40345 #endif
40346     }
40347     if (err == MP_OKAY) {
40348         err = sp_1024_point_to_ecc_point_32(point, r);
40349     }
40350 
40351 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
40352     if (k != NULL)
40353         XFREE(k, heap, DYNAMIC_TYPE_ECC);
40354     if (point != NULL)
40355         XFREE(point, heap, DYNAMIC_TYPE_ECC);
40356 #endif
40357 
40358     return err;
40359 }
40360 
40361 /* Multiply p* in projective co-ordinates by q*.
40362  *
40363  * r.x = p.x - (p.y * q.y)
40364  * r.y = (p.x * q.y) + p.y
40365  *
40366  * px  [in,out]  A single precision integer - X ordinate of number to multiply.
40367  * py  [in,out]  A single precision integer - Y ordinate of number to multiply.
40368  * q   [in]      A single precision integer - multiplier.
40369  * t   [in]      Two single precision integers - temps.
40370  */
sp_1024_proj_mul_qx1_32(sp_digit * px,sp_digit * py,const sp_digit * q,sp_digit * t)40371 static void sp_1024_proj_mul_qx1_32(sp_digit* px, sp_digit* py,
40372         const sp_digit* q, sp_digit* t)
40373 {
40374     sp_digit* t1 = t;
40375     sp_digit* t2 = t + 2 * 32;
40376 
40377     /* t1 = p.x * q.y */
40378     sp_1024_mont_mul_32(t1, px, q, p1024_mod, p1024_mp_mod);
40379     /* t2 = p.y * q.y */
40380     sp_1024_mont_mul_32(t2, py, q, p1024_mod, p1024_mp_mod);
40381     /* r.x = p.x - (p.y * q.y) */
40382     sp_1024_mont_sub_32(px, px, t2, p1024_mod);
40383     /* r.y = (p.x * q.y) + p.y */
40384     sp_1024_mont_add_32(py, t1, py, p1024_mod);
40385 }
40386 
40387 /* Square p* in projective co-ordinates.
40388  *
40389  *   px' = (p.x + p.y) * (p.x - p.y) = p.x^2 - p.y^2
40390  *   py' = 2 * p.x * p.y
40391  *
40392  * px  [in,out]  A single precision integer - X ordinate of number to square.
40393  * py  [in,out]  A single precision integer - Y ordinate of number to square.
40394  * t   [in]      Two single precision integers - temps.
40395  */
sp_1024_proj_sqr_32(sp_digit * px,sp_digit * py,sp_digit * t)40396 static void sp_1024_proj_sqr_32(sp_digit* px, sp_digit* py, sp_digit* t)
40397 {
40398     sp_digit* t1 = t;
40399     sp_digit* t2 = t + 2 * 32;
40400 
40401     /* t1 = p.x + p.y */
40402     sp_1024_mont_add_32(t1, px, py, p1024_mod);
40403     /* t2 = p.x - p.y */
40404     sp_1024_mont_sub_32(t2, px, py, p1024_mod);
40405     /* r.y = p.x * p.y */
40406     sp_1024_mont_mul_32(py, px, py, p1024_mod, p1024_mp_mod);
40407     /* r.x = (p.x + p.y) * (p.x - p.y) */
40408     sp_1024_mont_mul_32(px, t1, t2, p1024_mod, p1024_mp_mod);
40409     /* r.y = (p.x * p.y) * 2 */
40410     sp_1024_mont_dbl_32(py, py, p1024_mod);
40411 }
40412 
40413 #ifdef WOLFSSL_SP_SMALL
40414 /* Perform the modular exponentiation in Fp* for SAKKE.
40415  *
40416  * Simple square and multiply when expontent bit is one algorithm.
40417  * Square and multiply performed in Fp*.
40418  *
40419  * base  [in]   Base. MP integer.
40420  * exp   [in]   Exponent. MP integer.
40421  * res   [out]  Result. MP integer.
40422  * returns 0 on success and MEMORY_E if memory allocation fails.
40423  */
sp_ModExp_Fp_star_1024(const mp_int * base,mp_int * exp,mp_int * res)40424 int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res)
40425 {
40426 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
40427     !defined(WOLFSSL_SP_NO_MALLOC)
40428     sp_digit* td;
40429     sp_digit* t;
40430     sp_digit* tx;
40431     sp_digit* ty;
40432     sp_digit* b;
40433     sp_digit* e;
40434 #else
40435     sp_digit t[4 * 2 * 32];
40436     sp_digit tx[2 * 32];
40437     sp_digit ty[2 * 32];
40438     sp_digit b[2 * 32];
40439     sp_digit e[2 * 32];
40440 #endif
40441     sp_digit* r;
40442     int err = MP_OKAY;
40443     int bits;
40444     int i;
40445 
40446 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
40447     !defined(WOLFSSL_SP_NO_MALLOC)
40448     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 8 * 32 * 2, NULL,
40449                             DYNAMIC_TYPE_TMP_BUFFER);
40450     if (td == NULL) {
40451         err = MEMORY_E;
40452     }
40453 #endif
40454 
40455     if (err == MP_OKAY) {
40456 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
40457     !defined(WOLFSSL_SP_NO_MALLOC)
40458         t  = td;
40459         tx = td + 4 * 32 * 2;
40460         ty = td + 5 * 32 * 2;
40461         b  = td + 6 * 32 * 2;
40462         e  = td + 7 * 32 * 2;
40463 #endif
40464         r = ty;
40465 
40466         bits = mp_count_bits(exp);
40467         sp_1024_from_mp(b, 32, base);
40468         sp_1024_from_mp(e, 32, exp);
40469 
40470         XMEMCPY(tx, p1024_norm_mod, sizeof(sp_digit) * 32);
40471         sp_1024_mul_32(b, b, p1024_norm_mod);
40472         err = sp_1024_mod_32(b, b, p1024_mod);
40473     }
40474     if (err == MP_OKAY) {
40475         XMEMCPY(ty, b, sizeof(sp_digit) * 32);
40476 
40477         for (i = bits - 2; i >= 0; i--) {
40478             sp_1024_proj_sqr_32(tx, ty, t);
40479             if ((e[i / 32] >> (i % 32)) & 1) {
40480                 sp_1024_proj_mul_qx1_32(tx, ty, b, t);
40481             }
40482         }
40483     }
40484 
40485     if (err == MP_OKAY) {
40486         sp_1024_mont_inv_32(tx, tx, t);
40487 
40488         XMEMSET(tx + 32, 0, sizeof(sp_digit) * 32);
40489         sp_1024_mont_reduce_32(tx, p1024_mod, p1024_mp_mod);
40490         XMEMSET(ty + 32, 0, sizeof(sp_digit) * 32);
40491         sp_1024_mont_reduce_32(ty, p1024_mod, p1024_mp_mod);
40492 
40493         sp_1024_mul_32(r, tx, ty);
40494         err = sp_1024_mod_32(r, r, p1024_mod);
40495     }
40496     if (err == MP_OKAY) {
40497         err = sp_1024_to_mp(r, res);
40498     }
40499 
40500 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
40501     !defined(WOLFSSL_SP_NO_MALLOC)
40502     if (td != NULL) {
40503         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
40504     }
40505 #endif
40506     return err;
40507 }
40508 
40509 #else
40510 /* Pre-computed table for exponentiating g.
40511  * Striping: 8 points at a distance of (128 combined for
40512  * a total of 256 points.
40513  */
40514 static const sp_digit sp_1024_g_table[256][32] = {
40515     { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
40516       0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
40517       0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
40518       0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
40519       0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
40520       0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
40521       0x00000000, 0x00000000 },
40522     { 0x335c1685, 0x170a46d2, 0xe1007a58, 0xeac9e971, 0x43ca4a73,
40523       0x40e8f3df, 0x82642475, 0x2646f815, 0xb36576d1, 0x3af49bb4,
40524       0x72bf1afb, 0xd89e2d14, 0x2fd151e6, 0x27be882c, 0x8f88717c,
40525       0xaddedc85, 0x16ac6c6f, 0xd6d859bf, 0x2d8eae58, 0x0e741a1b,
40526       0x61c1f30d, 0x6faf7a00, 0x9b67e096, 0x66dbd09a, 0x7d3b4f7d,
40527       0x21f11c06, 0xc727c98e, 0x6152ba02, 0xe86cb221, 0xafd58891,
40528       0x6bd3baf4, 0x59e93c6a },
40529     { 0x71dd4594, 0xe54dd36f, 0x00aef1e6, 0xbbc9cc9f, 0xa19f6530,
40530       0x9ea5a44e, 0x3f520928, 0x8588aa99, 0x8f5c1418, 0x9753794c,
40531       0xc11399fa, 0x118bd792, 0xf5cb6ab5, 0xb9bd3afd, 0x2ecb9652,
40532       0x813d1cb2, 0x40389813, 0xfd456267, 0x4ac8431c, 0x51f7119b,
40533       0x0a180eb6, 0xdd9f6a91, 0x9f7bfa2e, 0x13946d17, 0x50a9d0d9,
40534       0x16f18631, 0x6f8373d3, 0x5f19c20d, 0x9b6a52b9, 0xbe85ac6a,
40535       0x74f62e03, 0x63ef187b },
40536     { 0x016f45e7, 0x7c376b7f, 0x2bec82f8, 0x1c1bdb57, 0xce429b60,
40537       0x7392f741, 0xc7afd81d, 0x6fdbf0a2, 0x7241098b, 0xbda41b1f,
40538       0xbb60f8cf, 0x5b407474, 0xb330bc4d, 0x933e0d41, 0x733fa3be,
40539       0xae182830, 0x0f5c6cd1, 0xa0ed299b, 0x3f9860c8, 0x7ff3354e,
40540       0x15559c41, 0xb1360986, 0x129f85cb, 0xab0cb63c, 0x47685fbe,
40541       0x682ecc49, 0xeb199633, 0x505e8ec2, 0xddac2cda, 0x90dcc794,
40542       0xf192da23, 0x4fe6791c },
40543     { 0x05e8733c, 0x94a423d5, 0x1d5717c1, 0xcc845e65, 0xe961b322,
40544       0x237c7e88, 0xdb4181cc, 0x0c4471c6, 0x713bd721, 0x00c875e2,
40545       0xb2c17b09, 0x9dfde9ed, 0xe88ceaf6, 0x430a6de5, 0x7b81cea6,
40546       0xaaa7a61a, 0x233f98d5, 0xea52d026, 0x60689a9a, 0xb55efdd0,
40547       0x5cac4aab, 0x30cfa7ce, 0x8e950761, 0xfa4db114, 0x4e9a1e52,
40548       0x309570c4, 0x1a040170, 0x18c21f61, 0xbe78d9d2, 0x555d1ffe,
40549       0x561db297, 0x04482a18 },
40550     { 0x73d486d8, 0xe7758ac2, 0x61cdc1e7, 0x8169f946, 0x2188ab4f,
40551       0x723c99fc, 0xf3373630, 0xa0e54f02, 0xbd8c2260, 0x560bee25,
40552       0x4531bc60, 0x28fc307c, 0x7e44feb5, 0xd6f21f1a, 0x57128d37,
40553       0xc8e4499c, 0xd7b2ea45, 0x963b053e, 0x32a3d222, 0x40c27a04,
40554       0x35459668, 0x5b51854d, 0xd73557e9, 0x66e1a49f, 0x8692077a,
40555       0x0d267fd9, 0xe7342702, 0xfa1350d3, 0x68ccdb44, 0x1a9c3f25,
40556       0xdedbf89f, 0x833a0ff8 },
40557     { 0xab376b76, 0xa8c419c7, 0x27d0f0cc, 0x3b7294f3, 0xa90c514d,
40558       0xe56bb9e2, 0xa62575a6, 0x931ba51e, 0x098c0a88, 0x56fee07b,
40559       0xb4c16a2a, 0x04be5aee, 0xe6eb260b, 0xe513350b, 0xa1d5c270,
40560       0x339edad6, 0xe9dbadd1, 0xf366ed59, 0x2dd06ec0, 0x4213be88,
40561       0xcb1187db, 0x22d639c8, 0xd8a1058a, 0x1fec95e1, 0xa2b744f1,
40562       0x03f73ea6, 0xf4f05c0c, 0x741fd51a, 0x85f811a0, 0x2e2df95a,
40563       0xeb24965f, 0x692b3ce3 },
40564     { 0xd2a127b4, 0x0ce6cb72, 0x8f92816f, 0x66a46ea5, 0x47a37616,
40565       0x43ecf463, 0xe0ab96ee, 0x163d9a01, 0xb2edbe8c, 0xc8145c6d,
40566       0x4de4e665, 0x2f426cae, 0x74e252f9, 0x174d0b40, 0x7d2af831,
40567       0x54c240d7, 0x3d652936, 0x581fa397, 0xa09d4695, 0x05b9491c,
40568       0x5452643c, 0x8c4e8533, 0xd4128327, 0x32d64331, 0x70361f25,
40569       0x64479038, 0x89ef09f2, 0x774191b1, 0x81de5fe0, 0xc0cf0aaf,
40570       0xf40042d6, 0x333e430a },
40571     { 0xcf26d3b7, 0x5df04de4, 0xb53f79be, 0x57a77306, 0x1808b664,
40572       0xa4013c5f, 0x85037360, 0xef291ea4, 0x0b061037, 0x1ffc9d7d,
40573       0x65c913bb, 0xd9d04dd9, 0xf13b8587, 0x948a37af, 0xfe3ee755,
40574       0xb5443483, 0x04631386, 0x3fc21e74, 0xcddeb58c, 0xb3a104e5,
40575       0x6572cd52, 0x94fe1862, 0x15aaa408, 0xeb9a71a1, 0x459ea462,
40576       0x8adc6fe5, 0x4aeb02a3, 0xbb18d175, 0x2f7791d1, 0xae127636,
40577       0xd6bbd708, 0x10e8b31d },
40578     { 0x3ed9f1af, 0xb87f03e5, 0x56676166, 0x03ad2477, 0x74ce15b8,
40579       0x38dcd630, 0x26b1e85b, 0x1877e2b0, 0x1af99c15, 0xb1654d17,
40580       0x9382547a, 0x9782e9e4, 0x26d55ef5, 0x6dc7fc7c, 0x2fbeb54c,
40581       0x9038f95d, 0x036c0357, 0xfe590dfe, 0x4fdc3f7f, 0xcfcb6eae,
40582       0xf35e1a88, 0xcb1fbc54, 0xda0a5568, 0x3c8e1db2, 0x5b6f5557,
40583       0x9a87393f, 0xe7ac0a06, 0x38646b32, 0x2a8495ab, 0xfd261c83,
40584       0x0cdcc4bc, 0x6485524c },
40585     { 0xc4a6ff2a, 0x1abfb3e2, 0x35a6428a, 0x2aa03fba, 0x89aff742,
40586       0x884227f0, 0xba5dbd93, 0x2337883a, 0xd2a182cb, 0x38186ae9,
40587       0x49a01f05, 0xb9f0764d, 0x917b1e7a, 0x92411feb, 0x570cbb5b,
40588       0x700b1903, 0xb914be7c, 0x5d5181d5, 0x1981182d, 0x135c4437,
40589       0x574b9997, 0x32758d24, 0x632d28b2, 0xa650a8f5, 0xfa383f09,
40590       0x24078bac, 0x00a33d80, 0x6546a60c, 0x2df8b449, 0xa4061c7a,
40591       0xf234563c, 0x1f76f3f2 },
40592     { 0x44c436b0, 0x9aa2c143, 0x1f69c87a, 0x79070556, 0x5f6db2df,
40593       0x35f3117b, 0xed56ba82, 0x85761f41, 0x7d0afa48, 0xf831464f,
40594       0x3adce71e, 0xa99f2915, 0x116b7488, 0xb27bf693, 0x9bb9443a,
40595       0xa98a5a8c, 0x2ee5fde8, 0x7f878026, 0x1812acb7, 0x3a6f93dd,
40596       0xdc84bc92, 0xaf92a4cc, 0xf1d4995a, 0x3c2562af, 0x04ed899d,
40597       0xfd9fc33c, 0x4ed2a538, 0xc028ca94, 0x049ea726, 0xd0f367bb,
40598       0x3d108e05, 0x04924ffb },
40599     { 0xc673562f, 0x06548e3d, 0xe2eae48c, 0xd3b33025, 0x5e1c6977,
40600       0xe61fd32b, 0x6ebe557b, 0x424e2064, 0x41d6e18e, 0x767391c0,
40601       0x14d7e95b, 0x4b8ebb8e, 0x20991b8c, 0x4ae8b7d4, 0xe01290d3,
40602       0xf8a0df66, 0x925e5f4e, 0xc97e24a3, 0x1508272a, 0x79a7b2cb,
40603       0x25072661, 0xb40b072e, 0x9062fa49, 0xdad9e182, 0xf3c53bce,
40604       0x8780a784, 0x9f142799, 0x58a82b76, 0xc1468426, 0x08cd849c,
40605       0xc380ae35, 0x4dfce809 },
40606     { 0xd527b780, 0x45069cb2, 0x977930dd, 0xd52da015, 0xe27d0263,
40607       0x10cc600b, 0xbb2d1b2b, 0x34102c26, 0x554adf3c, 0x4c652623,
40608       0x45f0ff47, 0xd6891382, 0xca916e7c, 0x83fa8cc5, 0xd15c8d8a,
40609       0x1e10f139, 0x81dc56b3, 0xf173dc2e, 0x5c4ed9ba, 0x7fcecb04,
40610       0x47d01228, 0x307fd7d8, 0x9f3a532f, 0x24a57153, 0xe2153c22,
40611       0x59e9e81d, 0xe428a408, 0xc562595d, 0x9339bd23, 0xdc7daff8,
40612       0xb8a06802, 0x0d075908 },
40613     { 0xde085f2a, 0x870af2a7, 0xbe99b2e5, 0x88fcd24f, 0x59ca413b,
40614       0x88c0d261, 0x8559f851, 0x1f02a2e4, 0xf622da0d, 0x83b96021,
40615       0x6dca3615, 0x5c05c2f5, 0x7910c682, 0x0148cf1c, 0x272695be,
40616       0x392f2896, 0xa8d64ef6, 0x883d0bb5, 0x1cfcbc52, 0xef0d2244,
40617       0x526117e5, 0xf5dafcec, 0xf04928e9, 0xb68612b9, 0x393f2e2a,
40618       0x283f744d, 0x700c1151, 0xfbeed7ed, 0xa4360dfe, 0xf2cde215,
40619       0x2f08535a, 0x24fa961c },
40620     { 0x616df7f6, 0x0767db3f, 0xfbd90326, 0x643057d8, 0x6e82d544,
40621       0x174daa90, 0x689643db, 0x2284f345, 0xcc89a060, 0x18b191df,
40622       0xd6c27d12, 0xbab46af4, 0xc9895145, 0x5a57f486, 0xcc942f9e,
40623       0xc03214e9, 0x41950158, 0x273e1c8f, 0x39ad43ab, 0x8ceb759f,
40624       0xe50ee173, 0x5e1b8b7f, 0x8f4d7d4e, 0xf635b1fc, 0x755603f3,
40625       0x8eff77e3, 0x7752fa60, 0x201f61d1, 0x4a6fb6e1, 0x94d7a03d,
40626       0xfc4f0114, 0x371cc23d },
40627     { 0xda90c351, 0x289b115d, 0x364d9c06, 0x6d196ebf, 0xf650b31b,
40628       0x77a89202, 0x6f57642f, 0xcc28c164, 0x08100127, 0xdc4f7e36,
40629       0xdc4c807b, 0x8836cd08, 0xe00240f2, 0x1280f156, 0x99cb3953,
40630       0x3f9a6d78, 0x3a802038, 0x40a494d3, 0xe87d3474, 0x45697e91,
40631       0x26dde24a, 0x70d97d07, 0x7640c30e, 0x06f6a58d, 0x5ba6e6c6,
40632       0x03c2c0e8, 0xf1bc13e8, 0x330f6a7a, 0xc9f4d78f, 0x3e602e4f,
40633       0x0c80fb7f, 0x92b6bca0 },
40634     { 0x5f00822e, 0x2e3d5c83, 0xb8b16f12, 0x0e825712, 0x92b0a330,
40635       0x81c329c4, 0xa7cc1954, 0x6b4e32ad, 0x1bb1413f, 0x0bee9cee,
40636       0x4a92ca27, 0xedfb7baa, 0xea3b9153, 0xcd472afa, 0x00f0c0f9,
40637       0xe8f09e7e, 0x5cdebb70, 0xa4e1d872, 0x4a9b63b6, 0xfe2bae08,
40638       0x3fd58f65, 0xf40141b8, 0xa3b62759, 0xd7ec5eda, 0x790e3088,
40639       0x9aaf6e67, 0x1f277e31, 0x215ad830, 0xcf33871c, 0xe7db4b98,
40640       0x4f02f89d, 0x71ff62c9 },
40641     { 0x2a4a84d9, 0xaa4c7102, 0x5ebc71e6, 0xe2ee4acd, 0xf1cd6578,
40642       0x3b11a8a5, 0xfff120a5, 0x83f5ef9f, 0x09e65033, 0xa4c598e1,
40643       0xca044180, 0xe1e9f990, 0xf59828c1, 0x8b832d46, 0x33af536b,
40644       0x753f28a0, 0xb6d4f68a, 0x92edc4b1, 0x72ccd1f0, 0xedde692a,
40645       0xd2226432, 0xd3aa0f7d, 0xa3d2661c, 0x38dbb63e, 0xfdc37dda,
40646       0xf1e19fc6, 0x84ef6b4c, 0x6c18b350, 0xdf1bba69, 0xe6a83fe9,
40647       0x5f958273, 0x40fd47e7 },
40648     { 0x267140a4, 0x5b88b746, 0xeab6f2fb, 0x6dbbfc1e, 0x69862548,
40649       0xdd9ec88e, 0x2eb6efc2, 0x69beeba1, 0x8ac8ff88, 0xcfc2214a,
40650       0xb5a21950, 0x95d5c96e, 0x4171fb69, 0x93389c05, 0x1b468337,
40651       0x2d85d452, 0x4113425c, 0x14d68a08, 0xec6c2174, 0xe52c0139,
40652       0xf730084d, 0x20cf0b97, 0x1f578aa3, 0x1ac16a26, 0xf9b6ae43,
40653       0x18b9fab3, 0xd854a695, 0x68d82111, 0xdffbe286, 0x0b334d98,
40654       0xe639338c, 0x5b1c1157 },
40655     { 0x72b6bb8f, 0x90edaab1, 0x02fc92c2, 0x8dc64ed2, 0xfe694c73,
40656       0xf42ba3c5, 0xcb54dce4, 0x316dc65f, 0x632420dc, 0xcb2d66a3,
40657       0x056dcf94, 0x16e706e7, 0xa4f32c9d, 0x2809c764, 0xea6edca8,
40658       0xab18d830, 0x81c65f57, 0x4fd1ace6, 0x7da12c10, 0x1f91651c,
40659       0xc7791a48, 0x0ac3bd66, 0x785e67a3, 0xb6ad1cf4, 0xda0fd591,
40660       0xe4d3fc44, 0x6e1c6344, 0xce164801, 0x33e50ab3, 0x84de9cb8,
40661       0xa756eef4, 0x963ab83a },
40662     { 0xdf4ea5a3, 0x944b47d8, 0x5cfe45fe, 0x96568815, 0x8a3c3564,
40663       0xd16e7d58, 0xe7c99e15, 0x84e55b3e, 0xf55071bc, 0x3fee204d,
40664       0x04057dce, 0x71006f29, 0xbba75570, 0xfe8c390d, 0x3319adac,
40665       0x3645bcb6, 0x7c20bfd8, 0x8189e8b0, 0x7d7d9578, 0x8e550969,
40666       0xb99f4e3b, 0x037d1321, 0xa60cfb6a, 0x011b2521, 0x837382da,
40667       0x66594aaa, 0x83c1dc07, 0xc89b91fd, 0x076b9884, 0x6b82b899,
40668       0xbe45c558, 0x443480fc },
40669     { 0x9114221a, 0xf8ffffb4, 0x3e857a7a, 0x4aec4f2e, 0x0fa54787,
40670       0x42e2d0e4, 0xd6f96152, 0xef3e6b31, 0xfbfe9b77, 0xb2296537,
40671       0xfb43a86a, 0xc2a9d0f2, 0x24572ac6, 0x241284ed, 0xe721ba7b,
40672       0xa3868917, 0xc117a78d, 0xdbef7c00, 0xd31605ac, 0x38149071,
40673       0x065a8ee9, 0xc2dada9e, 0xc442be82, 0xd5b138d8, 0xf6d72b58,
40674       0x9b6c224b, 0x8eb03e6d, 0xb9d355cf, 0xa1700371, 0xab6d1eb0,
40675       0xcffaa7eb, 0x97118a88 },
40676     { 0xcdecb5d8, 0xbf9c59a2, 0xa93a6866, 0x8083c81b, 0x04774fbf,
40677       0x24e0dd81, 0xa02070b4, 0xe779a3ca, 0x0fbfb781, 0x9d352fbb,
40678       0x3ef2a1c4, 0xa8b0d820, 0x14b3e501, 0xb858637b, 0x8a882ff2,
40679       0x5ba70a49, 0x3b06efa5, 0xa2730083, 0x102fee2a, 0xa42c02f4,
40680       0x8a0223a5, 0xe4e76299, 0x85c3fc72, 0xdba2ba26, 0xfe52eae7,
40681       0x554fe763, 0x270f45f6, 0x30b5405a, 0xa573387c, 0xd56a177a,
40682       0x4b71fa82, 0x17c0778d },
40683     { 0x2735e37b, 0x0e6dff1d, 0x656ec572, 0xc9884e56, 0x9ebba978,
40684       0xa2f5ac9d, 0xba09f3c4, 0x40fa4518, 0xf5b04377, 0x8c3fa177,
40685       0x967a2eca, 0xa1a1decd, 0x0528bd40, 0x768bca70, 0x18691c4a,
40686       0xf224952b, 0xe86d5fd5, 0x16e12c45, 0x37859a6a, 0x7a0d9157,
40687       0xa0ffce0e, 0x723f4309, 0xa96cc9a3, 0x5a8db79b, 0x1ad23a38,
40688       0x6dd12ae0, 0xe2bf5d84, 0x9ffec3a1, 0xa452ed66, 0xd6ce84e1,
40689       0x571fe4c6, 0x1219d5c8 },
40690     { 0x262969eb, 0x43eaa67f, 0x2f03e773, 0x3a3ab39d, 0x57bb0909,
40691       0xe6127e51, 0x8d150274, 0x0f82b0ed, 0xe580bdbd, 0xffffcad8,
40692       0xa9743e6b, 0x51d3d075, 0x8bac11d6, 0x1484bdb1, 0xeb24c388,
40693       0x95cd9990, 0x7fac67c6, 0x216a61d0, 0xa04e6b87, 0x4308f762,
40694       0xcba57cc8, 0x2865dd61, 0xd234a07a, 0x3c296b0d, 0x3a0793f9,
40695       0x76f92839, 0x0be29ece, 0x70b57e1f, 0x7e626f42, 0x1314a82f,
40696       0xd657f230, 0x2c8d7ab2 },
40697     { 0x0825e4d6, 0x67cf5892, 0x6ef83b44, 0xdf51eaa5, 0x1310108d,
40698       0x63e665d8, 0x8dd0963f, 0x229f89f5, 0x9df6436a, 0x8c4b14dd,
40699       0xd45ebba7, 0x99dae469, 0x5a4df381, 0x118aab77, 0x29e37feb,
40700       0xda8978bd, 0xaca2d7ef, 0x69ced5aa, 0xc67d6a8a, 0x6c98d05d,
40701       0x77f84a34, 0x7474bf0d, 0xed8cd59a, 0xd4428b2e, 0xd1d398fb,
40702       0xb0fd1cd5, 0x94a20b11, 0x596013db, 0x1b404c44, 0x96eb705a,
40703       0x4b09d958, 0x2299d277 },
40704     { 0xc64397e6, 0x5b9cd58d, 0xbf6dd31e, 0xac198f1e, 0x3e9f1db2,
40705       0x5866d8e1, 0x8fcdc68c, 0x405ae287, 0xe53c01fd, 0xa4b280cd,
40706       0x411db5f6, 0xdc963f2d, 0xbec4f8a0, 0xed5d5189, 0x916ee98b,
40707       0x336fd13d, 0x042df48e, 0x6925b1b3, 0xace0074e, 0x0cf56291,
40708       0x25317e95, 0xe8d38b48, 0x821c446b, 0xc7ad1d2b, 0xf0b65934,
40709       0x71c44135, 0x52ca0d50, 0x971b736f, 0x27b46c26, 0xaf9ffa57,
40710       0x1936618e, 0x21ac6779 },
40711     { 0x2d7fbcd2, 0xab420e3f, 0x97bdfc18, 0x12722473, 0x4df5d4b4,
40712       0x492033f8, 0x3807b7d3, 0x6fcd4236, 0xb33c3625, 0xdfc19b09,
40713       0xa0f22814, 0x13d6f375, 0x037c19b8, 0x70978a59, 0x0ff27b9c,
40714       0x4f398997, 0x615a4389, 0xfc0e1a45, 0x3e602f74, 0xffa3496a,
40715       0xb261ca1c, 0xc3f1c431, 0xee0164cd, 0x612211db, 0xe7f7be9f,
40716       0x30463ee4, 0x92c2e1bb, 0x015f7e78, 0x24483a56, 0x663d88d6,
40717       0x0e62d9d8, 0x0e8ec1e7 },
40718     { 0x8a0878dd, 0xa88ccc29, 0x6640071a, 0x99ac175d, 0xa5173617,
40719       0x90344820, 0xdd58a315, 0x316d023e, 0x88d221a1, 0x30785bd4,
40720       0x959c48e3, 0xb74b3de7, 0x4c67a771, 0x42ee0382, 0xe0b91453,
40721       0x59ef6cdd, 0x9b237e91, 0x7830ae28, 0x495d8325, 0xe1847a4c,
40722       0xd0773666, 0x67b1217e, 0xa294a325, 0x58192c86, 0x864d8326,
40723       0x76aa0f56, 0xf4b13e5b, 0xe2a2bd12, 0x1b6b73fd, 0xd850c1c0,
40724       0x5d103635, 0x653a795f },
40725     { 0x50dcb199, 0xcfe28985, 0x7fa02b60, 0xb35b8e5e, 0xc97603d0,
40726       0xbca7d7c3, 0x27f131b5, 0xb0e5288d, 0xe2b12d52, 0x3aa704de,
40727       0x1db725c7, 0xe206b1d8, 0xc5d1b113, 0x0b12839a, 0xdb45d763,
40728       0x14f970cb, 0xb2125e8e, 0xc997f93e, 0xee7daa26, 0xbd75739c,
40729       0x1fef20e9, 0x46ecbd3f, 0x7c6a42b1, 0xf994a114, 0x27fb0fd1,
40730       0xd289eb4f, 0x9a40da4b, 0x11186d31, 0xfb9d7976, 0x083f65a5,
40731       0xd444675e, 0x30dfc47b },
40732     { 0x9eaadfe8, 0xbcfc5ae2, 0xb4d4e812, 0x25027e54, 0x8b533561,
40733       0xab0702df, 0x56a6a214, 0xa2b9c204, 0x3059068e, 0xb1a3df7a,
40734       0x9883110f, 0xa3514b21, 0xc4b78e1c, 0xb7be2336, 0x3e2f6984,
40735       0x17073ce6, 0x2ddf7ac6, 0x86e114a6, 0x07d7c3c8, 0x276192bf,
40736       0xeb1ae289, 0x5da69e0b, 0x25184939, 0x983af175, 0x407a3aa0,
40737       0x9ac52a4d, 0xae0fe218, 0x1535c7da, 0x397f2501, 0xe16fe872,
40738       0x54c212cf, 0x572a591f },
40739     { 0x09a5553a, 0x49668419, 0x327733bc, 0x3f054318, 0x3eefd690,
40740       0xf9ceb4b2, 0xf22126d4, 0xbd3cbf9b, 0x2fed9578, 0x6d9671c0,
40741       0xca0306d8, 0xbba597ce, 0x3d674fe5, 0xb705ed61, 0x67f33f76,
40742       0xf1d3622b, 0x11cb8c31, 0x15bcf3c6, 0xe53d1aa9, 0xa38467dc,
40743       0xf908ab43, 0x902fe929, 0x8d15767a, 0x6e3e499d, 0x90afd07b,
40744       0x8142db5c, 0x6c8b190e, 0x120c6fbc, 0x24919a4e, 0x80c86553,
40745       0xd8c82c3c, 0x65c2cbe1 },
40746     { 0xa660bb63, 0x684cda20, 0x86e86245, 0x27dc3b0a, 0x6ba0eed7,
40747       0x76472cf6, 0x679dd158, 0x79c162e5, 0x08452d44, 0xb6884277,
40748       0x413f579e, 0x829bc6b3, 0x95011770, 0x92ea15ec, 0x47738183,
40749       0x5e34e300, 0x73e1d2f1, 0x8c3ca349, 0x229bd3de, 0xa5c4f1dc,
40750       0x94ef7ed3, 0x783eff1b, 0xdfae7a1a, 0x46db738d, 0x1a099852,
40751       0x4353d72e, 0xa0dcf4ab, 0x2533ad58, 0x0e7888b9, 0xd8055016,
40752       0x3ba77f66, 0x831440d5 },
40753     { 0xf611b2da, 0xf43e2e32, 0xd0fa46ac, 0x5d066e29, 0x820b3c0d,
40754       0xe897f3e8, 0x1d3e44f0, 0xc45c28e6, 0xdfd27a66, 0x929d7f66,
40755       0x101e8517, 0x735b860a, 0x3de078dd, 0xea3fce98, 0x638ce11a,
40756       0xc9977db5, 0x48536b3b, 0x0488382f, 0x64cadfc6, 0x7e0c7a3c,
40757       0x82147b71, 0x3cd17f7f, 0x1b411e3e, 0xe95663cc, 0x985fb46d,
40758       0x5739ac8f, 0xbcf119ca, 0x385399cd, 0xe15a2815, 0x4a985a70,
40759       0x6d5f4566, 0x504c3a8a },
40760     { 0xb8fa53c7, 0x00b55283, 0x509474e3, 0x985cff38, 0x437ce25f,
40761       0x234d241c, 0xe5a129ed, 0x29832430, 0xaabcc674, 0x6ad38956,
40762       0x7ee81ee1, 0xa2dc001d, 0x670b2702, 0x4c23c6b6, 0xa6e8a3bb,
40763       0xb35e567e, 0xa69673ea, 0xbc70b3ce, 0xe6e28eac, 0x85a7a9c3,
40764       0x5537b7da, 0x2ae684de, 0x6de937dc, 0x5ecac3e5, 0xf8430422,
40765       0xbf2ea6c9, 0x77fdc520, 0x38caf7d0, 0x69f56add, 0xc27af0b1,
40766       0xc71d21d2, 0x496e4699 },
40767     { 0x9fa93467, 0xba14fc82, 0x0eb2a614, 0xc2e37684, 0x4833e09b,
40768       0x659bcfaf, 0x3686bdcc, 0xbc859752, 0x81f3216a, 0x40bfd080,
40769       0x17c081b8, 0xc463bda6, 0xbb04793b, 0xbd01fa86, 0x2cd640c5,
40770       0x5a21ece6, 0x2203d5c4, 0x97bf6a54, 0x951167b7, 0xceb40edc,
40771       0x765ba268, 0xd67aacaf, 0xaeab51f9, 0x8ba0d9e9, 0xb0d6863a,
40772       0xc14b215e, 0xe5f06952, 0x354cdcdb, 0xcb3744b5, 0x4f2b5ccf,
40773       0x13037fe8, 0x13389173 },
40774     { 0x45003cd1, 0xee680640, 0x44ae2ac6, 0xfdac17bc, 0xde8e5314,
40775       0x4bcd419f, 0xc7cea95c, 0x81e34eb9, 0x38f37e01, 0xbb57762d,
40776       0x260990c8, 0xecc4cfb0, 0x50a34a7b, 0x0bc493f9, 0x543304ef,
40777       0x68074172, 0x6bc8aa2a, 0xaec0fcb2, 0x3b45fea5, 0x9e7a9b46,
40778       0x55fbdbac, 0x4bb2952e, 0x0485dff4, 0x50f0c0a6, 0x4dea4796,
40779       0x02c5104d, 0x695e3a02, 0xd2cefa09, 0x6da1f345, 0x4c8102b4,
40780       0xf3833fbd, 0x422eb573 },
40781     { 0xa6ad3f47, 0xac592eb6, 0x9714ba0e, 0xb0861f6d, 0x07281459,
40782       0x57c1e919, 0x64ea5803, 0xcf7c94e2, 0x54b12723, 0x725376ac,
40783       0xdafb736a, 0xf2a6ba41, 0xcba03cdc, 0xc89e8920, 0x5b0fd3ad,
40784       0xf2e20cb4, 0xd66059fe, 0x26ea5a54, 0x889df8bc, 0xee63fa8b,
40785       0x66a3f2bf, 0x40f1c7e1, 0x747312e1, 0x09febc9c, 0x727999ff,
40786       0x7d19b9c2, 0xb7fd2b05, 0xa9fbbb4c, 0xa0da2dc6, 0xcfba27d7,
40787       0x2c252582, 0x368541cf },
40788     { 0x22799d37, 0x510d3c9e, 0xacfa333a, 0x1b677de5, 0x080f795b,
40789       0x4e6ae18f, 0xafc8dfc2, 0x69b53c2a, 0x0e842dc2, 0x797541b6,
40790       0xac067fe8, 0xd5a6f2af, 0xbd07d877, 0xd0208a03, 0x654be2f2,
40791       0x34b473f0, 0xf515e23e, 0xe67c102a, 0x2ac1af48, 0xb00dbf9d,
40792       0xb6a13d00, 0xe264fa41, 0x97e94c11, 0x1669786a, 0x86a586f4,
40793       0x09d8cf2d, 0xc7f927e9, 0x073bf869, 0x2241a566, 0xb8977880,
40794       0x22261334, 0x59a5bf59 },
40795     { 0x81347191, 0xe9d1c91e, 0xeb969972, 0x186c1abc, 0xa9d46a7f,
40796       0x07888767, 0xdaa7d397, 0xda93cfcc, 0xd91b9aa0, 0x08bee9f1,
40797       0xf8dd3c6c, 0x8267fd78, 0x94228100, 0xf93860d0, 0xdadb47fb,
40798       0x6a6a71aa, 0xa6156f8a, 0x9caa06b7, 0x39848bc9, 0xaa1b05e0,
40799       0x2aaa9135, 0x36ddc237, 0xb13f3bd1, 0x77e7e079, 0x4acc5f4d,
40800       0x8d0b5cbe, 0x984cfd36, 0x04da45f8, 0xd3d3e0f8, 0xf14ef618,
40801       0x43eb799c, 0x467564c1 },
40802     { 0xb6fff5d7, 0x8d725904, 0x92dc4752, 0x037f33af, 0x6d20b8aa,
40803       0x9095d575, 0x43baec39, 0x32235fc1, 0x68a2b9b0, 0xa2feb4af,
40804       0x94d35c61, 0x61c50318, 0xea877486, 0xac92b6a2, 0x011bc6f3,
40805       0x8eb48b15, 0xc79edcb2, 0xa28fe128, 0xa5d2a006, 0x9f71bc0c,
40806       0x2f15b850, 0xf3167732, 0x7a036218, 0xfe8d728c, 0x4f81e09e,
40807       0x068f39cb, 0x7b7c50d9, 0x1773f016, 0xed6a1e03, 0x0d0f7adb,
40808       0x4ee984d5, 0x8a0dee16 },
40809     { 0x47366e6f, 0x504991bf, 0xe86c3005, 0xb8084d9f, 0xa40cce36,
40810       0x14c4c751, 0x3f1961e2, 0xbbb46aa6, 0x40445e43, 0x56a785f9,
40811       0xc91e215f, 0xdb8d1b57, 0xc7ee808d, 0x6a8e453e, 0xbbaa1e8c,
40812       0xc0367ef8, 0xe3e18109, 0x310d91f1, 0x7e20a2c3, 0xf97cfd0e,
40813       0x554cc277, 0xf1e80c84, 0x7b628403, 0xe89bbc1d, 0x3fe0a17c,
40814       0x7778a966, 0xc1f00073, 0x9e9db19f, 0xb6f6bed2, 0x2ce7fe7d,
40815       0xee97ce23, 0x7b04b5d2 },
40816     { 0x82c5faf8, 0x5b546bc7, 0x8eb81097, 0x1a734c5e, 0xe77851e0,
40817       0x3d566861, 0xe956d51f, 0x833a1013, 0xc3c3c37c, 0xc7351731,
40818       0xe0c148ec, 0x607738fb, 0xe1bbef41, 0x2ec6f0bb, 0xcfa51857,
40819       0x0aa2ac6e, 0x66e3adf0, 0x072902d7, 0xc622d6e3, 0xcd4d5089,
40820       0xa6dd802f, 0x3ae21b23, 0x33886372, 0xe5465a55, 0xa8d81822,
40821       0xd85119a0, 0x3786977a, 0x4f14d032, 0x9c7b272c, 0x515b081c,
40822       0xc99be31c, 0x1c6a95a4 },
40823     { 0xc2821363, 0xa6b14ad5, 0x4d17de1c, 0x829c1823, 0xccade848,
40824       0xaef5d2c4, 0x82489e27, 0xf412ab39, 0xf081d927, 0x92c9c098,
40825       0x75cbad1f, 0x6f87bdf4, 0x1a1d9fb1, 0xf4aadab8, 0xb75f3b76,
40826       0x475a7923, 0xdbbba8fe, 0x99dd0ad6, 0x4b70ab45, 0x836f6164,
40827       0x34bd9af1, 0x2a464881, 0xba9abda3, 0x5c91226e, 0xe65625fb,
40828       0x4cec8709, 0x0818e4be, 0xd4b3919e, 0x14f6879c, 0xa5c09c84,
40829       0x30a864c9, 0x72708a02 },
40830     { 0xf34a466c, 0x4f33c0b1, 0x7f9d45ba, 0xa1bae09c, 0x0e28785c,
40831       0xd70f0fee, 0x90880881, 0x824c7146, 0xbb043da3, 0xe2416c2a,
40832       0xcec6f432, 0x733da713, 0xc9793e1c, 0x2b590649, 0xb35c9365,
40833       0xdb62d5b0, 0x3e5c1b2a, 0x355eb6e2, 0xbb16b515, 0xcfe8b5ce,
40834       0xf709691c, 0x9e081869, 0x61a85bd5, 0xc865f9fb, 0xfae103f7,
40835       0xf169d3cc, 0x73467e9d, 0x9525c473, 0x43695113, 0x7db55c0b,
40836       0x73265d21, 0x7491c74c },
40837     { 0x80d2b94d, 0x312ed5bf, 0xba4b260b, 0x1b8ac633, 0xd62219a1,
40838       0xac86c58c, 0xaeb82c8e, 0x317ccf6b, 0x59ef9ced, 0x2dfb29ee,
40839       0xe42bcd5a, 0xdaa7d898, 0x5974b201, 0x93e295c8, 0xd9fc5adc,
40840       0x69e75784, 0x012aa3ba, 0xd6c4709f, 0xc85d3cb9, 0x1fda9f37,
40841       0xd3dd4abd, 0xe5487e25, 0x0b3ba22e, 0x00fd4b01, 0xc6e8dcbb,
40842       0xcb591493, 0xbce68664, 0xb7329fab, 0x68906b76, 0x6829d1c2,
40843       0x74176841, 0x8bcfd3e5 },
40844     { 0xd3c8c314, 0x06882734, 0x11870833, 0x95f0b2f1, 0xc068ba16,
40845       0xb937f7c3, 0x77924787, 0x5365e0d8, 0x1f992227, 0x15527e5e,
40846       0x27dffd4f, 0x0a069648, 0x2f586389, 0xd58b3df2, 0x6af20ead,
40847       0x83446b89, 0x50746257, 0x09d7970b, 0x4022a691, 0xd9e8d206,
40848       0x671ec379, 0xd1e5f8af, 0x057fe91e, 0x6f542509, 0x52890418,
40849       0xf14dda81, 0x1db932ad, 0xbd78010e, 0x905a9378, 0x3e18d1e4,
40850       0xbd37ab49, 0x53cadcf7 },
40851     { 0x5e53d0ff, 0x1bb5edf7, 0x888abf67, 0xd886606c, 0x12206d15,
40852       0x6491b0f8, 0xe22b6a33, 0xb3018345, 0xb173b317, 0xaba6794b,
40853       0x7dc9e595, 0x8c1e5867, 0x239624d1, 0x4e106482, 0xda55dd53,
40854       0x61752e59, 0x9e42879c, 0x018b4eab, 0x491f2bed, 0xcaf6784b,
40855       0x1e79429e, 0x3dcdb9d2, 0x10f26224, 0x36941485, 0xa650ec5c,
40856       0x106f190a, 0xb69a9760, 0x7542a5ae, 0xc32d1046, 0x69bd75e9,
40857       0xbf8c62b1, 0x90849964 },
40858     { 0x5a93c661, 0xb1390cf6, 0x9db5f056, 0x18486264, 0xa51a1788,
40859       0x92a93a9d, 0x6772de9a, 0x1b0cbb8f, 0x7c71487c, 0x6e67febd,
40860       0x4e62423e, 0xf9b4382d, 0xbb5a42f8, 0x96fda50e, 0x6089a4f2,
40861       0xc921b337, 0x875ec516, 0x49d32d7b, 0xc410124b, 0xbd86d2ca,
40862       0xc421fb7a, 0xf6862209, 0xf6b7de33, 0x3e1949ab, 0xe93c9268,
40863       0xcdee18f0, 0x08dc4cc0, 0xd4edbd5e, 0x73580d22, 0xc2b75be4,
40864       0x468cd7e8, 0x3d7f6ffa },
40865     { 0xdffbd5d1, 0xea7b290c, 0x970338df, 0x9d759da6, 0x90feedc9,
40866       0x56680b08, 0x42dce68e, 0xbc690af5, 0xb2ae4d82, 0x8519df2b,
40867       0x7f195b60, 0x5612467f, 0xd83c21f4, 0x659a342c, 0x55651633,
40868       0x55771bf5, 0x548ba562, 0x5fc68935, 0x9492f23a, 0xb5419203,
40869       0x9c9c6017, 0x567528e3, 0x511e6019, 0x3f064ed4, 0x1d16a555,
40870       0x303f9eb9, 0x2254abee, 0x3e18c4fd, 0xfd434e7c, 0x40994d6f,
40871       0x6dde74e6, 0x8fb12d3f },
40872     { 0x293cb7a4, 0x6c6381a2, 0xb87b7e4d, 0x453e09f0, 0x078ac3ef,
40873       0x4f212823, 0x578cae91, 0xe89ffad0, 0x716ba4dd, 0x4a2b696a,
40874       0xf6f580a0, 0x14681a14, 0x4c2f1307, 0x1358f97b, 0x2932fb89,
40875       0x87896996, 0x268a5af7, 0x29dd850a, 0xfe239f83, 0xaf771f6d,
40876       0x4f47499d, 0x5f20fd2e, 0x867ca0e9, 0x9b643e77, 0x375981ec,
40877       0xe7858ecd, 0x19ab1c97, 0xbe946a59, 0x06ff3453, 0x4f9303a2,
40878       0x75d237b1, 0x3fcc6731 },
40879     { 0xdf21f920, 0x509debd5, 0xc1401b90, 0xfaf70e1f, 0x95a64aaf,
40880       0x2429cbfd, 0x2c37a122, 0xf2120855, 0x7deb926b, 0x1d4c93f4,
40881       0x9fb3f1dc, 0x12f3e4c0, 0x5b51bc46, 0x56085a59, 0xf10fdbd2,
40882       0x2a2f5d62, 0xdf0cb3c2, 0x60dd62cf, 0x6b0f254b, 0x154424a3,
40883       0x564612b7, 0xc3a5a05d, 0xa1f5249c, 0xbebe30cf, 0x7e62a188,
40884       0x24ec6903, 0xaf429939, 0x75f0fbac, 0xb3fa8685, 0xd41345dc,
40885       0xc7151c34, 0x645146fd },
40886     { 0xba1924f9, 0xecec633a, 0x006326e1, 0xbba6f136, 0x7e50fc17,
40887       0x203757ac, 0xef3d8e00, 0xca531919, 0x51dc5a74, 0x9545a6aa,
40888       0xd31412b8, 0x6e21d58f, 0x7bb1d000, 0x01bc3005, 0x6ed1a9c3,
40889       0xf1789c69, 0x9858fa48, 0x7af2d35f, 0x8197be85, 0x434d09b9,
40890       0x29aa265d, 0x1dc07755, 0xc058fa80, 0xcad03be7, 0x54ba14ce,
40891       0x92d70a9f, 0x6c050a74, 0x6dc78505, 0x4d005dda, 0x2a7ca4a9,
40892       0xabfb9f2e, 0x448d3d72 },
40893     { 0x29b33989, 0xdc56f145, 0xa9ae815a, 0x868351bc, 0x4b074414,
40894       0xb3f45613, 0x3cd9f33b, 0x955ce42a, 0x5ff6e4a3, 0x13ade4ec,
40895       0xa50eaa91, 0xd3aac715, 0x5666efdf, 0x0c61ec99, 0xf6a4470a,
40896       0x108a28b8, 0xe54844c9, 0x402ef584, 0xd0e2f337, 0xb825b162,
40897       0xb46f7cbc, 0x3dcd131f, 0x96f2fd89, 0x208178ec, 0x25928c78,
40898       0x4d8c5d67, 0x9963c459, 0x285a33df, 0xd92a309f, 0x72497175,
40899       0xcb7019a5, 0x76881479 },
40900     { 0x91767eed, 0xba43a114, 0x92bf65db, 0x5e11b9ad, 0x03a5e21a,
40901       0xe8a22ce0, 0x2a335415, 0x63604421, 0x4a9ead62, 0xc2c563b4,
40902       0xa0b2aee5, 0x4bc06264, 0x8bf2e1d7, 0x75b8d575, 0xd08a265d,
40903       0x1cff0ee7, 0xb0b712a7, 0x17914e1d, 0x4b18692d, 0xc35925d0,
40904       0x56cce815, 0xde253f4c, 0x9fff0e3a, 0xa479241c, 0xddabed19,
40905       0x50b9d06e, 0x59fae506, 0x67135260, 0x532ce180, 0xf37600fb,
40906       0x5e5a8626, 0x670eb01c },
40907     { 0x73cdbb43, 0xdf73c0af, 0x7f2431ad, 0xcf08ecc5, 0x2a1a3845,
40908       0x91780541, 0x9224ddf1, 0x69a104f2, 0xbeac7eff, 0x4352f38d,
40909       0x7c2d1322, 0xfc3b3b4e, 0xb5e4b476, 0xa69e9430, 0x975a46f0,
40910       0x7d932340, 0x5d64eece, 0x8093899e, 0xdb2345e9, 0x7b821250,
40911       0x7f4b796b, 0x23552932, 0x4bb90b1f, 0x2ee9cc15, 0x9112f7d6,
40912       0x1fa9c8f5, 0x1cbaae32, 0x2d0f2f98, 0x0075166a, 0xb77f0366,
40913       0x635dff27, 0x504852e7 },
40914     { 0xa2f392fa, 0x2f0f3ce5, 0xec6c9078, 0x326c076a, 0x84baaaf6,
40915       0xad01de92, 0xcbe8e993, 0xb01b16d3, 0x2d950908, 0x71305c24,
40916       0x3853af38, 0xc66fd617, 0xd3c429a0, 0x7735140e, 0x1fabf027,
40917       0x8a31b12a, 0x058b3177, 0xa0530002, 0xa9c7deb9, 0xabffd9fc,
40918       0xe8667d30, 0xd05ef69b, 0xe9a9e13f, 0x2f3a7308, 0xb91eae9c,
40919       0x3f4c9a19, 0x618ce6c4, 0x50d0cee7, 0x5240f8b0, 0xfb24dc40,
40920       0xf7e90cc4, 0x992fe151 },
40921     { 0x38f197aa, 0x4454db31, 0x87872f98, 0xa4ded69d, 0x44f0a828,
40922       0x97b427b0, 0xa31e48c6, 0x9821e1ae, 0xdd98efec, 0xe38cb09f,
40923       0x480cb3ae, 0x20b84fa8, 0x47475573, 0xba5bb4a8, 0xcd50e96b,
40924       0xa9be080a, 0xef103550, 0xc4451e9c, 0xc441325c, 0x626ee75f,
40925       0x38a5e33d, 0x6eea5e98, 0xa2b0abd2, 0x7321beb9, 0x9b6082a9,
40926       0xca92e484, 0x992bcc2a, 0x1dc8168a, 0x9c8eb9fb, 0x134ecf4b,
40927       0x4c5b71e0, 0x5a68bfa8 },
40928     { 0xff0a2bfb, 0xb4ff3b45, 0x5502f8b0, 0xd105fff9, 0x5b1c0c26,
40929       0x14de5885, 0x0d3b9d04, 0xed16865b, 0x026d3917, 0x2f5a2453,
40930       0xf4db3c0e, 0x6a22f493, 0xe2418f2e, 0x4871548a, 0x509bef61,
40931       0x6ab363a8, 0xb8cbbbec, 0x91ca1e3a, 0x4011a396, 0x71e0dc98,
40932       0x0d5ca577, 0xff982e0a, 0x81897bc1, 0xeb40b045, 0x085ad5e7,
40933       0x4bc24a46, 0xa6337b7c, 0xd15c8fa0, 0xbef1628f, 0x56ce6ef7,
40934       0x9f5ef439, 0x78acfdf9 },
40935     { 0xf8520189, 0x45bf7f15, 0xc77f61c4, 0x954202a0, 0xdfa22e1b,
40936       0x39edc6b9, 0x1f4a3487, 0xd2d60267, 0x4814cc52, 0xcd933929,
40937       0x05e9f123, 0xde76a124, 0xae36b6f7, 0xe2306ea0, 0xb83a58e0,
40938       0x53815218, 0xa041231a, 0x9862bb76, 0xbf31be71, 0xe8da253c,
40939       0x37de861f, 0x2dfc5332, 0x90ae4890, 0xf25c93f6, 0x8baa6ed2,
40940       0x66bcb8f0, 0x908b4a29, 0x6f10ae0f, 0xb061c949, 0x8cb4b48c,
40941       0xd075a366, 0x0ad92d73 },
40942     { 0xc2ca548a, 0xbfb95fed, 0x80cd89ab, 0x4778c620, 0x3466c280,
40943       0xbe99154b, 0xd4be8902, 0xea3be093, 0x13e681ed, 0x847b7995,
40944       0x02f40161, 0xf22a8f4b, 0x4aeb7fe8, 0x3ef2cb4d, 0xb3aed5f6,
40945       0x9adc5151, 0x98c31163, 0xec1ccfd1, 0xa3d7d88f, 0xdc2ac17b,
40946       0x46421097, 0x08fa64d3, 0x94b90bcf, 0x5ebf80b7, 0x0b50a9eb,
40947       0x1b78b4ba, 0x279aa66b, 0x1a4fe934, 0x075b3ced, 0x8ef4dcaf,
40948       0x70a6e9ae, 0x95bbd8a0 },
40949     { 0xe614bbd0, 0x59f92495, 0xb823e363, 0x7567a887, 0xfc1bd6a7,
40950       0xe247c9ec, 0x8e835c42, 0x2bfaaf47, 0xaade066a, 0x314ef4e0,
40951       0x5c16d336, 0x072baa63, 0xe2f0e389, 0xfa429c71, 0xbd07d90f,
40952       0xcac1e5d0, 0x514f5c04, 0x69ff35ea, 0xc0554ec1, 0x893053fc,
40953       0x2a35947f, 0xab1d86b7, 0x2aebe487, 0xe29fb060, 0xdfb9cf21,
40954       0xa0a10d6d, 0xf20dfcf5, 0xad147059, 0xb8867a2a, 0x480dc66f,
40955       0xc125a919, 0x375a884f },
40956     { 0x1217f7ea, 0x178cbe2e, 0x875c6dab, 0x1a161e2a, 0x1bdb1a54,
40957       0xf7707ec0, 0xe4fd73ca, 0x678864a0, 0xd13a0d86, 0xbaebc664,
40958       0xc8d30668, 0x40325f99, 0x2f1c5950, 0xb93ed9c9, 0x541e0667,
40959       0xfdf36763, 0xb91a6763, 0xfd97fbb0, 0x6079c9a0, 0x26aa69ea,
40960       0x1eaa8c47, 0xc7303c80, 0xafa63c55, 0xdec75c81, 0x4fd12adb,
40961       0x01cdcde2, 0x1968838a, 0x9fe0dda7, 0x38415379, 0x66bb093b,
40962       0x08cb84ec, 0x268d818b },
40963     { 0x41580555, 0x73dae358, 0x473d103b, 0x4fc32e67, 0xbeccc1ab,
40964       0x240c1013, 0xb24ee9de, 0xda4099f2, 0x9fa8e066, 0x37b0cb5b,
40965       0x6438d7ee, 0xb5ae04e4, 0x2b720140, 0x7f7d3164, 0x339e4a78,
40966       0x86ef4edb, 0x3a7d8375, 0xa5e77eed, 0xbd707c2e, 0x883fad37,
40967       0x0f979189, 0x816b633a, 0x2e7a208e, 0xe24c028a, 0x4435516a,
40968       0x1171fe3c, 0x4f5f2bf5, 0x3eb93b33, 0x01b53a56, 0x8419ed4b,
40969       0x056ca44b, 0x8b02735c },
40970     { 0xe1019195, 0xb89bb464, 0xf3fc28c1, 0x1de4c026, 0x2bfc3b21,
40971       0xac120e6e, 0x91bdf92f, 0xec71bc5a, 0x0d995bc9, 0x485d7ab4,
40972       0xe6491ffe, 0x97c6768e, 0xafbce265, 0xd9552d19, 0x8e1b76c2,
40973       0xbae6c7fe, 0xd7e3ad1b, 0x167d8281, 0x5e989734, 0x3e149af9,
40974       0x8a0c8182, 0xd1f0024c, 0xc3006c0d, 0xf571ffdb, 0x58773d4c,
40975       0xb32ecf7e, 0xfd3540d8, 0x5822a782, 0x04365042, 0x5ab45c3f,
40976       0x4b4d85fe, 0x400e3aa0 },
40977     { 0x5e46e4a2, 0x47321649, 0x24136074, 0x37a2ed64, 0xc60ec77d,
40978       0x659223b1, 0xe5e0ac2e, 0x5e13aac3, 0xc5107ab7, 0xda17c41b,
40979       0x73c253db, 0x65b22ec9, 0xa5012296, 0xff3867b8, 0x0621a99b,
40980       0xfed660d5, 0xc89fc3f5, 0xa3c28506, 0xf16451a7, 0x3ed350b9,
40981       0x67cb586f, 0x27c3e032, 0x967185b1, 0xc807c779, 0x4a13009b,
40982       0x09c157d4, 0xadaf1f4d, 0x362f7647, 0xf3a6a198, 0x4a42b9ac,
40983       0x8da6e039, 0x131c3da2 },
40984     { 0xa7da83ba, 0x4a785ff1, 0xd04f4436, 0xf415b425, 0xec03f812,
40985       0x7c0899bd, 0x80f5f4a2, 0xc58d411a, 0xfda251b9, 0x3d32d610,
40986       0xcd3b2f32, 0x99bb4504, 0xf4c2083c, 0x198c444b, 0x730e83fd,
40987       0x60c261af, 0xcb02db90, 0x060ca4df, 0x9df1e7c8, 0x0ff7838b,
40988       0xc4c690c9, 0x6b79cf97, 0x5d75f154, 0x131514d7, 0x1cb0e8ff,
40989       0xa7c074f1, 0xb2c17615, 0xb920aac1, 0x44aa0ff0, 0xde8098ad,
40990       0x34545ce9, 0x71d1a46a },
40991     { 0xfa1b382e, 0x76178f76, 0x772dda0d, 0xa0d8ecc3, 0xc5d4d130,
40992       0xaa5aab2a, 0x8d72622c, 0x27d38ba4, 0xca3bed06, 0xc5410db6,
40993       0x793ceccf, 0xf637a588, 0x6e65e3d7, 0x1f65dafd, 0x60a45641,
40994       0xc3b44a85, 0x4f78540b, 0x0f47b3a8, 0x5e4d60f6, 0x824fdadd,
40995       0x17d3b6d5, 0xd8ccf90c, 0x325fc13a, 0x008eabdf, 0x3648fab9,
40996       0x3e90d716, 0x24c52d4b, 0x3964ff3a, 0x533d0acb, 0xb95cc416,
40997       0x1167f521, 0x6cd2699f },
40998     { 0x12f4f3ac, 0x2d8c0b3b, 0x99d1bdfb, 0xb03dcfe2, 0x30f37326,
40999       0x540034f8, 0x7c5a8c82, 0x22dd6893, 0xcd8f1442, 0xeb7093d0,
41000       0x585742f2, 0x892795a7, 0x087adadd, 0xe15f282c, 0x16ab7b5e,
41001       0x7bbdc749, 0xa58acbb4, 0xd30fe40b, 0xe2bac39b, 0x0de417eb,
41002       0xc61a04bc, 0x4b4b19a6, 0xf2735569, 0x9338c34d, 0x30ab196f,
41003       0xe8f03742, 0x6c88c965, 0xfa2efcb8, 0xc7eeb826, 0x19eee274,
41004       0xda345dc2, 0x327c063f },
41005     { 0x5b47cd53, 0xab399eff, 0x1943aefe, 0xbbe9869d, 0x1402a866,
41006       0xe64ecc7b, 0xb1c25a16, 0xc3e7c2aa, 0x022de271, 0xc4216b79,
41007       0x366d6a5f, 0xe58dfcc8, 0xda813336, 0xd159509e, 0x130bfb7c,
41008       0x370400f2, 0x93b48780, 0x1be4e059, 0x39f3cd22, 0x0623a1fe,
41009       0xeecb4f87, 0x72aa22b2, 0x6c27b83b, 0x1af4c496, 0xda5fa5bf,
41010       0x7a42a94b, 0x48b01af2, 0x9afba822, 0x3670112c, 0xeb6b9d2a,
41011       0xc0df6856, 0x020f19d1 },
41012     { 0xa4dbba20, 0x37051a86, 0xdb1de5c5, 0xb618ebc6, 0xe6525840,
41013       0x9a780a19, 0xd2bccc4d, 0x9440302d, 0x10285a24, 0xe9ff023d,
41014       0x3a486268, 0x3b937ee3, 0x4cd61147, 0xe37ee2f2, 0xa3d057cf,
41015       0x79fbbfd3, 0xccddefce, 0x5fba16d3, 0x5b231727, 0x916058ec,
41016       0x720c3adb, 0x47699ebe, 0x8b4f6bba, 0x26274386, 0xf18a0770,
41017       0x54b0092a, 0xacca1160, 0x99d090eb, 0x0c888f60, 0xf757e1ff,
41018       0xb0050544, 0x79e72720 },
41019     { 0x2820a239, 0x632acf25, 0xaae6b310, 0xb1a3974e, 0x48c0a1df,
41020       0xd61fd6ba, 0x5a3ee7aa, 0xd2453c39, 0xb980446d, 0x548455a0,
41021       0xde16676f, 0x9f29d97b, 0x789375a1, 0xf252ca0c, 0x7743a985,
41022       0xe961af3e, 0x66cdbd8d, 0x70c79c56, 0xcbc538f9, 0x14a3854e,
41023       0xa126851c, 0x58daa73a, 0x2a9f558c, 0xe9b5bb45, 0xfbd15e05,
41024       0x37af7f83, 0x38a1939d, 0xa4487927, 0x9511a056, 0xe428b2b5,
41025       0x7015846d, 0x001d3ce3 },
41026     { 0xe145b1d7, 0xd6be36b9, 0x009c5664, 0xf3e3938a, 0xe7c0f6db,
41027       0x2e562e7d, 0xc343f539, 0x951044e6, 0xd90897b1, 0xa5ab62b8,
41028       0x512f797c, 0xb1a1f70b, 0x750f28e4, 0x91cdd754, 0xffb8165d,
41029       0xb4c80e2f, 0x594d02b3, 0x65ed39c7, 0x56833edc, 0xcc12a49d,
41030       0xf3693a18, 0xe73694bc, 0xfcd2c404, 0x34cc134a, 0x11d40194,
41031       0x071bd5fc, 0xfc585e46, 0x05759047, 0x790b7a04, 0xb3280360,
41032       0x40afc684, 0x4bb8c6fc },
41033     { 0xfd0f8796, 0x3120e2dd, 0xb133c9de, 0x6968a40d, 0xa9369c6e,
41034       0xfea366c0, 0x6007273b, 0x37e5b6d6, 0x8cb81439, 0x39e4ecf0,
41035       0x9febc005, 0x487fe9cd, 0x0199b53c, 0xeb8af444, 0x293519eb,
41036       0x2f124e3b, 0xc82c9c16, 0x860c218a, 0x709dc590, 0xacd1d6f2,
41037       0x36d50529, 0x5696d545, 0x59120bfc, 0xc03f5df9, 0x10ffa690,
41038       0x99a3e88d, 0x6c432827, 0xd4f9cfa5, 0x9a135d89, 0x2e8fea9e,
41039       0xb6a77e78, 0x3699a881 },
41040     { 0x1eb1c64d, 0x5bca3372, 0xf1d28154, 0xe9cf3a2d, 0x6537106f,
41041       0xb7e2e9b3, 0x4f7cbf4d, 0x06c17151, 0x2058b37f, 0xcbde416e,
41042       0x8834e9c5, 0x82c53a7e, 0xe9ac3a75, 0x94dbdfe2, 0xc5e67c02,
41043       0x795ec6cb, 0x1426a80d, 0x8c23c25f, 0x6a8d4f9f, 0xee2cd20d,
41044       0xd3b7c235, 0x838daa54, 0x3d7a4d52, 0xb9e08ec0, 0x781cb473,
41045       0xca9475e9, 0x5ec31caa, 0x7271f39e, 0x82535187, 0x1df08e9f,
41046       0x208aff8b, 0x4f3a4b03 },
41047     { 0x1ed095f8, 0x0f7b8107, 0xda226d4e, 0x23e37fa6, 0xafb36d1d,
41048       0x8b0f9852, 0x07d8e311, 0xb114634e, 0xe3e0f16e, 0xb9634a97,
41049       0x421eec37, 0x2454bb9c, 0xd72b21c1, 0xb4ecd5db, 0x6df20d7c,
41050       0xf9603868, 0xdf86e0a2, 0x9f5359fd, 0x5ac488aa, 0xc43d54fa,
41051       0xd1049df4, 0x56d714ab, 0xb020607a, 0x13152b3e, 0x7a02325e,
41052       0x49be1c18, 0x52ae84db, 0x44f24f4a, 0x0b5a7b80, 0x9e525c03,
41053       0xa6d179fd, 0x6d874446 },
41054     { 0xbe9a42f5, 0xd29d07aa, 0x3781ccc8, 0x1fd5316c, 0x9dc69ea1,
41055       0x71a75a6d, 0x88fee91a, 0x4e19e0df, 0xf8d44f12, 0x99c2b4dc,
41056       0x31ae94e4, 0x05f6df92, 0xcf28ccc2, 0x27fba876, 0xf57f7ceb,
41057       0x6e1a0f01, 0xf3fd3b74, 0xe03f1f34, 0x42c1d213, 0xa0edc4a7,
41058       0x7deb8580, 0x5caac270, 0xaf0848bc, 0x0f5d791f, 0x07ac759d,
41059       0x17f514ad, 0x904fc531, 0x95a39734, 0x7bb70f3d, 0x95a4aca9,
41060       0xff9c5609, 0x3cf384c9 },
41061     { 0xce1fc9e3, 0x700506ba, 0x676b0399, 0x49721742, 0xe72bf7b3,
41062       0x2b4a1b8d, 0x79b209f7, 0xca8602a8, 0xce26a8e1, 0x90580b90,
41063       0xfe24f39a, 0x1ef339b7, 0x629362e1, 0xb6c5d991, 0x577b24f4,
41064       0x51174e1a, 0x05e451e9, 0xf380fcb5, 0x148321bd, 0xf4d97afb,
41065       0x747e5d2a, 0x099806bb, 0xbe99a608, 0x85525d65, 0xd455e820,
41066       0x264828d9, 0xd8560a65, 0x8c8c5405, 0x71030770, 0x3c67e73c,
41067       0xee73df26, 0x2b248850 },
41068     { 0x8541159f, 0x2173cde6, 0x4fb410b2, 0x78224c18, 0x1f2ca1c7,
41069       0x07a28619, 0xa8b23e40, 0x52c207d6, 0xa6b2344a, 0x071a0210,
41070       0xb5ed2945, 0xdb0e587c, 0x810fcc6c, 0x6c56b8ef, 0x62d843b9,
41071       0x1248c58f, 0x74c66975, 0x4b90363d, 0xe66c66f6, 0x6348f7f2,
41072       0xc126bcbe, 0xb2f9d441, 0x73ce49e8, 0xac07f2a3, 0xe81b0df0,
41073       0x52486758, 0x1d4621d1, 0xa108b54d, 0x74414a1c, 0x17261ece,
41074       0x6a3ac215, 0x938b3bcc },
41075     { 0xe4ded340, 0xa9e4a16b, 0x80e88036, 0x8e65fb2a, 0xdcd73acb,
41076       0x97089606, 0xaaa657a9, 0x1c3a0434, 0x49101b06, 0xf304fc58,
41077       0xda0bb64c, 0xe60fb61a, 0xf5542df5, 0x818c2aec, 0x56f76d5f,
41078       0x74020576, 0x92533d97, 0xb566b790, 0x74d6eb5f, 0xae4655e5,
41079       0xa55b44b7, 0x60f7a1b5, 0x93747ea5, 0x7970179b, 0xf2dace56,
41080       0x8ae7e0e8, 0x84e83c06, 0x98474607, 0x15307341, 0x24e8c9ed,
41081       0xd9e89d6b, 0x6cff58a5 },
41082     { 0x03e51f68, 0x508c01b0, 0x1d2fe7d6, 0xe1d1f225, 0x09bd8805,
41083       0xf7998d0b, 0x03e415b7, 0x255e907a, 0x607d9798, 0xd148467d,
41084       0x9b453896, 0x055c3b1e, 0x809f50f4, 0x35001013, 0xd0233fdc,
41085       0xfbbb2fa6, 0xff1820b8, 0x0b680b0a, 0x38d317e0, 0xb1d404dc,
41086       0xccc8c7df, 0x133d5444, 0x6ec13f84, 0x7fa847e6, 0x046e2e48,
41087       0xc33f83d8, 0x4863b3ac, 0x3c627fc5, 0xeb936af7, 0x5f67f8aa,
41088       0x31b79327, 0x5fe4ac8f },
41089     { 0x8b6f401e, 0x581aa4bf, 0xad5c7ed4, 0x05db12a3, 0x6fb07b4a,
41090       0x7b018726, 0x9c22bcd4, 0xfdd11f04, 0x69371c95, 0x5454a7d4,
41091       0x99a46eaf, 0x066c55fb, 0x7fef96d0, 0x18637c7c, 0x6b83e95c,
41092       0xbafc1d34, 0x00bb42dc, 0x55c38593, 0x34e7e712, 0xdd8dec2b,
41093       0xb184cee8, 0x69c9cfb0, 0x49a27864, 0x8dcc0c42, 0x2010f2e7,
41094       0x290d95f2, 0x6977a420, 0x86e254c9, 0xeb2abdad, 0x20931c89,
41095       0x121c0548, 0x81377164 },
41096     { 0x9c5a8edf, 0x6266b25e, 0x1078a7ad, 0x6e1388c2, 0x4876eedf,
41097       0x5f02737d, 0x62744617, 0x242fa7f9, 0xb385382a, 0x3e2cfbd9,
41098       0x02f71bef, 0xbadad7b1, 0x677d0a92, 0x562abcfa, 0x51fdff34,
41099       0x573ebd17, 0x7c250c78, 0xd7f65852, 0xc47ca896, 0xe0cf16ee,
41100       0x67622c9e, 0x8ccd79b0, 0xf8f2c075, 0x31fc5882, 0xa6008515,
41101       0x9232b37e, 0x82e8c5ba, 0x4d7bb361, 0xd2f146fe, 0xbf24735c,
41102       0x9cd2db98, 0x79c280ee },
41103     { 0xf2b48122, 0xbdcc8203, 0xb04ac48e, 0xa8c04916, 0x9fc4885e,
41104       0xacf064dc, 0x82c1001c, 0xab838997, 0x676de250, 0x7339e721,
41105       0x8e1ab820, 0x17aa5aea, 0x6bc14b2e, 0x24d28ca0, 0x816b6230,
41106       0x570c5bb7, 0xcee6b606, 0x6c51235c, 0x183eae42, 0x1b2bf89f,
41107       0x9c66274b, 0x3e3af3c6, 0xb51e38bc, 0xe0b04426, 0x73e40e3b,
41108       0x26dbc58e, 0xb5be5be4, 0x3f9dd578, 0x52c8f408, 0x9fd9f791,
41109       0xa9e3ff4f, 0x758073a4 },
41110     { 0x8691ca22, 0x7d27b057, 0x13a2a1b6, 0xf206bfd6, 0xac795413,
41111       0xe84bd385, 0x75536607, 0xc5d18a2a, 0xc8a0e24c, 0x2e166de7,
41112       0x3c474dbd, 0x56d5750c, 0x1366843a, 0xdef444c1, 0xcf4b8432,
41113       0x14646e53, 0xa9fd9783, 0x4bc0d030, 0x297ee203, 0xbda4c824,
41114       0xfd7be6c7, 0x3d0b10bf, 0x08c7f3ff, 0x2d216476, 0xb4fd4c45,
41115       0x06e52599, 0x49e9e104, 0xfbab9fa1, 0x8661d32d, 0x9342a7fa,
41116       0xfaf66aa8, 0x3f3e3458 },
41117     { 0x951597aa, 0x51ec35af, 0x49df64eb, 0xb677d4ac, 0x9bf4eff5,
41118       0x0276cd9c, 0x515a2935, 0x423eca49, 0xfd9bb9c3, 0x8a696553,
41119       0xede1f09c, 0xf99ee9df, 0x199e5f98, 0xb8fa2956, 0x35292c32,
41120       0xb7638758, 0xfc40e81b, 0x8734eddc, 0x65457d95, 0xd82d5e9f,
41121       0x30c78d2b, 0xc8ee323e, 0xc1433d67, 0xe77b2e4c, 0x3c8314ae,
41122       0x56d9f807, 0x2a0e2f63, 0x441eede2, 0x6c48295e, 0x1e9e17ed,
41123       0x34c294ef, 0x640d20c4 },
41124     { 0x3284d513, 0x4e9a0b8e, 0xf315053a, 0x074c3545, 0x45acd52a,
41125       0xb36e7407, 0x1de50db7, 0xd80bdcfc, 0x2549fc46, 0x8d9d47dc,
41126       0x303f07a8, 0x29b6ef13, 0x6d4ad4c2, 0x4e461aca, 0xfc9f1b73,
41127       0xca8e351d, 0x57460e65, 0x8bc4094d, 0x0f32d367, 0xb6302b33,
41128       0x285742e8, 0x69a074b6, 0x876c29c3, 0xdfe52b11, 0x912bd17a,
41129       0xf39e4609, 0x349aa639, 0x8ee40d66, 0xc72e05c1, 0xb968902a,
41130       0xc0d92816, 0x0f9c1ca8 },
41131     { 0x67433df3, 0x1ebbaab3, 0x15d3628c, 0xb6aa5347, 0x97f0c5cc,
41132       0x13a320d8, 0x65e408f9, 0x72c918cb, 0xd5373451, 0x4b638854,
41133       0x0b4dca09, 0x731399a3, 0x0a3b1326, 0xcf256730, 0x6608b388,
41134       0x5ea60dfa, 0x7b290dfd, 0x58ad74b0, 0xd7694f9b, 0x83202789,
41135       0xb6630fb1, 0x48593db8, 0xc65e3eaf, 0x3db47f70, 0x3e7263f8,
41136       0x63949c91, 0xe6e6ff33, 0x9b9acec6, 0x098a8240, 0x34bd9ba7,
41137       0x45d36ec5, 0x7e31c12f },
41138     { 0x0dfd2dd7, 0xbe281d68, 0x24ab61d8, 0x1efacb00, 0x94431f97,
41139       0xb9c3005f, 0x959cb3bc, 0x660c8dfa, 0xcffbb406, 0xfdd5fc30,
41140       0x7969a10d, 0x7a4631be, 0xde13fd1b, 0x336e309e, 0xfc947076,
41141       0x76b3bfad, 0xdcc72223, 0xfa91925d, 0x156c4ee1, 0x741f0d73,
41142       0x0e2b3747, 0x4f64ee41, 0xefc4d93c, 0x86be92d3, 0xfc4fbb2e,
41143       0xc53b7e03, 0x337ca1bb, 0xac196cf5, 0x7e23ba60, 0x4de41a30,
41144       0x326d5357, 0x1a219c45 },
41145     { 0xaa4db0bc, 0xfdcf7ef8, 0x7b6c9963, 0x2e231806, 0x3d8a192f,
41146       0xc2639067, 0xffdc7771, 0xc0cec2e2, 0xa2fc0edb, 0x997c8e35,
41147       0x82cc6043, 0x78e10ec1, 0x2b0c8120, 0xfd0de2cb, 0x69e57f8e,
41148       0x4d6c457f, 0x5b53f1c3, 0x953e69b2, 0xc4f89cb8, 0x422a330a,
41149       0x95566be6, 0x92ff2329, 0x437442d1, 0x73cd502d, 0xbea69403,
41150       0xf04ce590, 0xf8030662, 0x6ac1537e, 0xb6d0bf93, 0xe02bcf77,
41151       0xbc90192f, 0x17aaa999 },
41152     { 0x8e55db2e, 0x0d3d5643, 0x3b946851, 0x835dee43, 0x5b88462f,
41153       0x1a1440e5, 0xea17e27c, 0xa6ff3b35, 0xdd95f7a9, 0x23f99c36,
41154       0xbdd672cf, 0x7217fdd9, 0xdd2045c0, 0xf400ac1e, 0x4ff06b25,
41155       0x94b55c87, 0x0e4a49be, 0x0a44a0e5, 0xb43b6813, 0xe8925e91,
41156       0x214f96c5, 0x78bedde1, 0x0f97fa97, 0x0f456a4c, 0xa5bfd267,
41157       0xa28fd86b, 0xbe7608ef, 0x3b4b2d8f, 0x226474bc, 0xfbd5ff8c,
41158       0xa5f3b24a, 0x6b282af0 },
41159     { 0x6341a595, 0x78fc025f, 0xa445e28c, 0x591c38d6, 0xeb446842,
41160       0x72bd6e3d, 0x75547833, 0x3f9466d3, 0x083e16c4, 0x911414d3,
41161       0x95a7acb4, 0x145d9466, 0x8fd2fb64, 0x102ddf09, 0x0bfd87b1,
41162       0x2a2b2d2d, 0x59455088, 0x69e9be5c, 0xa80245de, 0xee378bf4,
41163       0xb2306b0e, 0x80b0bd68, 0xc2be9f3d, 0x76a545c6, 0x4802c245,
41164       0x429d167b, 0x2b412dfb, 0x13e64427, 0xee8d9762, 0xb664f529,
41165       0x54706ebf, 0x6d4f5d23 },
41166     { 0x00ba9f88, 0x35c8f2b6, 0x7bb6d0bf, 0xfdc807e0, 0xb3b81e5b,
41167       0x0a126d42, 0xa7ac781e, 0x335ce6ce, 0xf37dcba6, 0x3e308e6f,
41168       0x63c96487, 0x028dca62, 0x8818434d, 0x72eba57e, 0x79b78a26,
41169       0xa9e3d59f, 0x2f07aea3, 0xd2f0a7dd, 0x24d05f74, 0xe0fe4678,
41170       0x0116deb6, 0xb2085170, 0x58f37580, 0x9c2a5e92, 0x74070bb3,
41171       0xe78bd7a5, 0xb9977d90, 0x551fc872, 0x40db81b4, 0x6eda93c4,
41172       0xd65d34ad, 0x4aaf0b4f },
41173     { 0x3514c7af, 0x9bef2506, 0xbc181ead, 0xb09e7dad, 0x8fa3ec58,
41174       0xef3cae87, 0x173b8685, 0xd8dbfab5, 0x921d32dd, 0xb2490fc0,
41175       0x8bd9c466, 0x4eef386b, 0xa061dbdb, 0xc1cdd52f, 0x25bc04db,
41176       0x64de989a, 0x85728636, 0x06f9836b, 0x8be44aa0, 0x11a5a804,
41177       0x097018c7, 0x16dede4e, 0xb2c11fb1, 0x72aec577, 0xa721ecd9,
41178       0x144dade1, 0xd6ebf3a9, 0xf99c526b, 0x1c2e14d7, 0xa1d4165b,
41179       0x82bc6337, 0x8b2cbd39 },
41180     { 0x8a52e991, 0x28ec1bf2, 0xcf9d42ec, 0x0ba202f6, 0xc634ea45,
41181       0x8307d130, 0xc5762b9c, 0x3fc257b3, 0x487c2a2d, 0xbd3298d1,
41182       0xa319488a, 0xca14f1a7, 0x06ba06d2, 0xc70ca93b, 0xee405e89,
41183       0x9aa3f4b3, 0x35deeae7, 0xcc64eeb3, 0x03bf1d4c, 0xd155f578,
41184       0x45616bfd, 0x041ec0b5, 0x086e33f6, 0x23df80e6, 0xf0243cf5,
41185       0x399a79c8, 0x874ccd58, 0x86c2824e, 0x8fc5c831, 0x220eeaec,
41186       0x7dbe3670, 0x57e28304 },
41187     { 0xfbcdf666, 0x6e60b698, 0x8bebb1d2, 0xbdd06a99, 0x80498436,
41188       0x4044adba, 0x522bc88d, 0xd76bf75e, 0x28423b20, 0x655c4b9b,
41189       0x53398a72, 0x65c0f492, 0x0ca37601, 0x76d4f2b7, 0x2030fa5a,
41190       0x46989925, 0xb6054705, 0x96b37e87, 0x53de1b2f, 0xef96f731,
41191       0xad54ef05, 0x5ecbbc8c, 0xa93617b0, 0xeb289d0a, 0x7cba217d,
41192       0x3ac0fbd5, 0x19d4a2d7, 0xd0d3cb56, 0xc91d6063, 0xe8bee9d4,
41193       0x696ffda6, 0x4f12e037 },
41194     { 0x15f1a610, 0x4ccfa422, 0x3786519a, 0x804a5c55, 0x73838134,
41195       0x1246a454, 0x4b284e2a, 0xfa15b484, 0x146d1320, 0x36464c65,
41196       0x70a8a0fa, 0xfb6ba88c, 0x93c4804e, 0x74e7cee7, 0xb95ae16a,
41197       0x8c34d22c, 0xf9c1d4dd, 0x9d9ed89f, 0x32025371, 0x61a0866d,
41198       0x9bd6444a, 0x45b232b2, 0xf277bab1, 0xf888e92c, 0xa9448b02,
41199       0x73e69c6e, 0x5b521ecb, 0x1a496ea9, 0x5858afb2, 0xa8f78ea7,
41200       0xb1266f91, 0x83d2333e },
41201     { 0x67b478d7, 0x1c633288, 0x50a2fc9c, 0xa1ee1ae1, 0x18d2241b,
41202       0x05b6ab30, 0x893cd696, 0x69f1f288, 0xa8117a87, 0x159d6660,
41203       0x70e73d77, 0xe8120119, 0x93f55f0a, 0x528fef00, 0xd854dfb2,
41204       0xb3978db8, 0xf45d9fbb, 0xd6b43ef6, 0xd5bee397, 0x17de4bfe,
41205       0x6bf76dad, 0xa01e0f59, 0x3d40754c, 0x28b2280e, 0xf8e86ef3,
41206       0x8edb6122, 0xb7d1e586, 0x8226b6af, 0x2f40a55b, 0x46353215,
41207       0xc5a31621, 0x7362f13e },
41208     { 0x73c0c430, 0x792eb27c, 0xa51c3657, 0x8cc0a65f, 0xd2194f1b,
41209       0x50a5cece, 0x814b4947, 0x18945688, 0x4b6fbbf4, 0xbbf0a81a,
41210       0xf0aa8608, 0x376f4f58, 0x3987795e, 0xd9361d68, 0xe3a8d0d5,
41211       0xb6510cd8, 0xb6c1a455, 0x63e2fdbf, 0xaec891f9, 0x2c91154e,
41212       0xff568f64, 0x0eb1e715, 0x2f2b399e, 0xe7af9cd7, 0x89f0bf0b,
41213       0x1fc39bac, 0x90983695, 0xf0861d92, 0xda0a20a8, 0xd9b16f02,
41214       0xa38c0ead, 0x2f10693f },
41215     { 0x0c06ded2, 0x07a6ce91, 0x2fd9087b, 0xf974842f, 0xa9f635a6,
41216       0xe468bfd6, 0x1ed60626, 0x04b61891, 0x369ee548, 0x1fb2f89f,
41217       0xdc96a201, 0x9cbd1113, 0x10d633ac, 0x6759acfe, 0x8faa629e,
41218       0x64ba66fc, 0x47f38283, 0xa686ae49, 0xd59cda99, 0x828c3a05,
41219       0x08ea2f6e, 0x7c7afb14, 0xaf3953c8, 0x2551c8e4, 0x9daa9e4f,
41220       0x5b53d279, 0xad6f1940, 0x1eff68d4, 0x96437cdb, 0x2775dbdd,
41221       0x4fe7a043, 0x985f83e4 },
41222     { 0xeaf45294, 0x89603c16, 0xc24b5751, 0x70131160, 0x39d6b52d,
41223       0x4c112018, 0xed943340, 0x7079cf02, 0x74f41b68, 0x0c5b028b,
41224       0x9c8ac1e1, 0x3dc3f076, 0xf8b24f0e, 0x5ac5eea3, 0xe34c5c22,
41225       0xee6684ba, 0x9abc452a, 0xa5259e63, 0xe9df45cc, 0xb07d2cd1,
41226       0x1a443cfa, 0x07019c93, 0x92c003b3, 0x68fddaa9, 0x0d8cbc2e,
41227       0x2d9f179c, 0x1e781ca7, 0xbbf15a6f, 0x50dcc799, 0x54d779d5,
41228       0x0fe962f1, 0x0c88e540 },
41229     { 0xe8f44357, 0x84f71a6a, 0x3a3cab6a, 0xf75b4bf6, 0x5aebc680,
41230       0x334c9d9e, 0x8a753ef2, 0xcecaf084, 0x075e3c8e, 0xe28014c1,
41231       0xf74f8d3a, 0xbb9d5a38, 0xb80e32ae, 0x75988464, 0xf2bc3792,
41232       0x7b328e6f, 0xeed0e197, 0xebbb1faf, 0x5a33065a, 0x674eac95,
41233       0x922dbce8, 0x8c19fd8f, 0x987b907a, 0x8c17ae85, 0x3b3a2cd7,
41234       0x89f33627, 0xfa87772f, 0xebaea019, 0x3a25ced6, 0x4e5de499,
41235       0xaf110715, 0x8e2560b8 },
41236     { 0x3141aba6, 0x56d3746c, 0xbab2cf9e, 0x45a1079f, 0x9cdd27c7,
41237       0xb6382831, 0x9dfd950e, 0x22237632, 0x3a9408ff, 0x1e0b15cd,
41238       0xb1160118, 0x49a80200, 0xa383bba7, 0x2719db5d, 0x651046d5,
41239       0x6078340a, 0x97523b1f, 0x8929d4de, 0x8e0a28ab, 0x4040345c,
41240       0x0adf09c7, 0x61275ac2, 0x2331d611, 0xb41ab265, 0x5391ca50,
41241       0x230cc77c, 0x8f922315, 0x88be0c92, 0x92fd9a29, 0xfef3d92b,
41242       0x8324f2e5, 0x59005f22 },
41243     { 0x3c4c1c74, 0x6bb1750c, 0xe966fb79, 0xbe73aac0, 0x66c5973f,
41244       0x85a75d92, 0x3a8656b6, 0x8c97f932, 0x50446cde, 0x2b7043b1,
41245       0x3ff3897f, 0x548916f7, 0xb18b72b2, 0x913dd01c, 0x488c0de6,
41246       0xd0a751f1, 0x8558ca58, 0x19175714, 0x44a663da, 0x97714301,
41247       0xb0e08618, 0x2df190ac, 0xf39ead9c, 0x0080fc0c, 0x17382da1,
41248       0x0085ac6e, 0x3262a338, 0xe9791851, 0xb43bae8d, 0xe4495936,
41249       0xd783df6e, 0x57a78e26 },
41250     { 0x40dbddd8, 0x161b346f, 0x9410c3ac, 0x2b49a927, 0x1886cf3b,
41251       0x8c542783, 0x33b93deb, 0x72df3232, 0x40df579d, 0x9c8d59f5,
41252       0xc20ef500, 0xe5d7a67d, 0x67f08643, 0xc46b3918, 0xad96adc3,
41253       0xecfa2445, 0x0c4544d0, 0x658f589b, 0xe08417d7, 0xe6ec9301,
41254       0xc454e288, 0x6ca5ef6a, 0xac0f462d, 0x4191048f, 0x08d8a036,
41255       0x852407d8, 0xf6d35b7e, 0xb4c533a7, 0x8f6ada87, 0x3251e412,
41256       0x81c472e8, 0x1ca370c5 },
41257     { 0xa801b68a, 0x94bd5171, 0xfd1998b3, 0x7312879c, 0x41163202,
41258       0x4905aabf, 0xf5b01fdb, 0xb5fe87f4, 0x9cda128b, 0x78de523a,
41259       0xc7bd31f7, 0x0bf161a1, 0x23904c35, 0xb5decfd0, 0xe188f12d,
41260       0x224b2882, 0xf99dae74, 0x0dd2801d, 0x08cd1cd2, 0xcad467b5,
41261       0xc0867e39, 0x6c311c3d, 0x2b425072, 0x71a11720, 0x2efd9003,
41262       0x83bf464e, 0x1dbd3b03, 0x53d0448a, 0xe6265baa, 0x32db52f4,
41263       0x4c33ac79, 0x2584b34c },
41264     { 0x2aeec688, 0x3cb86389, 0x45fbe523, 0xa5e740ba, 0xfd60b5f8,
41265       0x422e71f7, 0x4874913d, 0x455d185c, 0xfa17d80d, 0x04c2bb36,
41266       0xac054524, 0x3f271854, 0xa8b9a657, 0x76dd3045, 0x62ee7cc8,
41267       0x2e42c3e1, 0x4df6c7d0, 0x00266706, 0xdc7cb488, 0x5927dd51,
41268       0x187897e0, 0x6b3faabe, 0xf2d5737c, 0xfe6ad22e, 0xff51a9ff,
41269       0xafb60269, 0x69807baa, 0xe1c83545, 0x951ca49a, 0xacddb6ff,
41270       0x3f9ab085, 0x7e811374 },
41271     { 0x830a88b1, 0xad722a8b, 0xce1117e1, 0x91918ea8, 0x0409b47d,
41272       0x3e02d0b8, 0x6c46d1d3, 0xb53812d3, 0xe589669c, 0x2fd09db0,
41273       0x15b0cd5e, 0x9845cd06, 0x2386c453, 0x0c1c155a, 0xf5ff43cb,
41274       0xda774de5, 0xe391c0cd, 0xbb076b98, 0x5004f286, 0x97d71eff,
41275       0xaeec0bfe, 0x23e0b46c, 0x32a1ad94, 0xe4538667, 0x396da422,
41276       0xfe0c9f81, 0x63db2bfe, 0x6376c1a2, 0xba56fa91, 0x001c7918,
41277       0xdf8485a6, 0x436b8c64 },
41278     { 0x8ab764bc, 0x88117e9d, 0xa077df84, 0xdfa61e94, 0x0c18eebd,
41279       0x5a7765d3, 0xfc9451dc, 0x548916af, 0x071a347a, 0x01a52e33,
41280       0xb23b41df, 0x633b95de, 0x43c8c286, 0xdd7d68c9, 0x18d97068,
41281       0xe4f9d41e, 0x8c92799d, 0x79908b90, 0xd47394a3, 0xe614148e,
41282       0xcd51e53f, 0xe5018517, 0x0243dcb6, 0x5060075e, 0x17954405,
41283       0xe5dcde62, 0x537da5ff, 0x6f7c90e1, 0x0768cb66, 0x1df7aae4,
41284       0x6dbe95e1, 0x5266ca9e },
41285     { 0x1386b3db, 0x84ddee6d, 0x7c38e540, 0xf9e4af5a, 0xeb04f49d,
41286       0xb3418440, 0xfde5a4fd, 0x2138a1e8, 0x30257cfc, 0x3e6e6924,
41287       0x19fd70c1, 0x3519c6e3, 0x86c31ff0, 0x8f34e174, 0x940ce1e8,
41288       0xf1e298fd, 0x14960d7c, 0x6fb8cb1d, 0x2b2f3bff, 0x207c1347,
41289       0x146ef8ff, 0x899a20b4, 0x7bd3e220, 0x7dec362b, 0x626bea27,
41290       0xa975044e, 0x4fb4cb67, 0x0f32b449, 0x1fc6703a, 0xc17a0920,
41291       0x9cd84a2b, 0x41f325b9 },
41292     { 0xce2843a4, 0x312ed513, 0x00728afc, 0xe748498e, 0x4d864ce5,
41293       0xa8ef2822, 0xa620083b, 0x34064704, 0x4bed338d, 0x5905e1d9,
41294       0x063e7b38, 0x2a578cb5, 0x289e7bb9, 0x98276d96, 0xf17b7341,
41295       0xdfe2dc47, 0x1dac8944, 0x5923521f, 0x23400aa7, 0x3db6d28d,
41296       0xa761ba43, 0xc647705e, 0x9bfd07dd, 0x8947ba6d, 0x242ca8fd,
41297       0x00f2e3ac, 0xeb8c3468, 0x49ef4670, 0xd9aa18fd, 0x7db3d37b,
41298       0xe58cea9e, 0x56b30fb6 },
41299     { 0xcd80a428, 0x07ecdcaa, 0x8732c891, 0x7af922dc, 0x3ada441f,
41300       0x20d88798, 0x924b008a, 0x3bed9a44, 0xb2e81c3a, 0x2123533c,
41301       0x65f807d3, 0xc34e4075, 0x1f2faecb, 0x0bfaefa5, 0xade8a88d,
41302       0x78b634a5, 0x94392a91, 0xc4e0b7f8, 0x90bb1cd8, 0x30922377,
41303       0xf87204ae, 0xdea9b4fa, 0x85d3cd83, 0x3edf81f5, 0xc6523a79,
41304       0x58f88c51, 0x17c0d969, 0xe472fb8b, 0xdccf7f07, 0x899081e5,
41305       0x58bdd146, 0x1353cc57 },
41306     { 0x39bf6e18, 0x28a56497, 0x649b89c7, 0x59e8b5a2, 0xdce8b8e7,
41307       0x8d9434a0, 0x2047040c, 0xd935bf51, 0x6a7b8e82, 0x2ab3a164,
41308       0x27f81294, 0xf1583ed6, 0x72d67297, 0x8416a7e0, 0xcd39e42b,
41309       0x49685d86, 0x958ddbad, 0x8a797fc7, 0x155ce6de, 0xa558f928,
41310       0xf8a36235, 0x75f4e570, 0x52877ae5, 0xbc69cfc0, 0xa6b16ebd,
41311       0x8f4193a9, 0xbb1cc1f1, 0x8d1df43c, 0x5a21e789, 0x723a830e,
41312       0xf451df58, 0x3ec2185d },
41313     { 0x1f0bc2d7, 0xb9d4c7d7, 0x6e51d412, 0x6982c6cc, 0xa09f80f6,
41314       0x92e02d93, 0x047ae09c, 0xb7dd2d25, 0x37f351f9, 0x3503149f,
41315       0xc77850be, 0x69d49ce1, 0x12f0d2c8, 0x60242acb, 0x7bc28b9d,
41316       0xba188c56, 0x06bc0550, 0x8e406121, 0x8d7d4329, 0xb0d84b1f,
41317       0xd38951e0, 0xb4a67ae7, 0x8bc97607, 0xb527c57b, 0x5497aa72,
41318       0xbc93c5f3, 0x39bdd666, 0x5f1de8cc, 0xe9d447a3, 0x3087dc5c,
41319       0xa211abe5, 0x89b356b6 },
41320     { 0xdfdcc837, 0xed6db0af, 0xa871b7a9, 0x0fb80baa, 0x1c1d4b72,
41321       0x413abfc9, 0xadac9e5c, 0xf5b56bf7, 0x8b8657a3, 0x5664a2da,
41322       0x0e41d94e, 0x11b04f72, 0x37433658, 0x63e11d26, 0xf426daea,
41323       0xee628ece, 0xcb162dc2, 0x011619c9, 0x87648643, 0x9cf5817f,
41324       0x5584bc86, 0xe1bb9702, 0x00bf7928, 0x2cc27cef, 0xdc60eee5,
41325       0x4ef3a80e, 0x87adc2f9, 0x7e1202be, 0x8a0d4f52, 0x656f18e0,
41326       0x57c5d126, 0x39c4f10d },
41327     { 0xe88aecd3, 0xb3a9b68c, 0xa518aa9d, 0x555b0918, 0x4bd4ee54,
41328       0xedc1cdad, 0x02068d84, 0x79b68b67, 0x811ac72d, 0x7dac80d0,
41329       0xa81a0a78, 0x6d1e6d35, 0x3bd16283, 0xc841e9ea, 0x894c4444,
41330       0xa7bc1775, 0xf1aa1202, 0xf2b63725, 0xc7d4c556, 0xbec7767e,
41331       0xd46ff51b, 0x2817ebb3, 0x73f7e339, 0xfde5be8d, 0x5aed24c4,
41332       0x44c6c977, 0xb6e579cf, 0x0b9a1707, 0x9069fbcc, 0xcff16478,
41333       0x49152b00, 0x414b542d },
41334     { 0x606e173b, 0x33c31e58, 0x90e6713a, 0x5b7f4e1b, 0xdebb20af,
41335       0x425fb512, 0x05120e70, 0xc788c617, 0x9013e4ec, 0x3ef05602,
41336       0x81c6e6d7, 0x9f9d35ac, 0x9450690a, 0xe131e88f, 0x44af082e,
41337       0x708f9b32, 0x1ba2aea9, 0xb2e4d66c, 0x740db29c, 0xaf1f4a6e,
41338       0xd1843007, 0x74ab9248, 0xed556a6c, 0x13338ef8, 0x270d17a6,
41339       0xf48e623e, 0x9608f5bf, 0x3c7362fa, 0x444e8515, 0x43977874,
41340       0xe00b8b2a, 0x52678d6a },
41341     { 0xdf36aeb4, 0x5dff1c59, 0xa92bc0ab, 0x52d6653c, 0x927a5f81,
41342       0x0e03f496, 0x2dfd491f, 0x8509d414, 0xa571f89b, 0x258c2c52,
41343       0x93334485, 0x2bd61804, 0x3f7d9e09, 0x1a33e94f, 0x2c1bf906,
41344       0xfab418d3, 0x5aa5695c, 0xf39c490e, 0xf6d2d7ff, 0x0e41196e,
41345       0x0f7948a9, 0x3ecd4075, 0xd3053b4f, 0x4b58f9b2, 0x5d9974c9,
41346       0xb8ee842a, 0xbf22f682, 0x23a59c1d, 0xc8efcea6, 0x045ac614,
41347       0xc10ceedd, 0x7040ba5b },
41348     { 0x515a1a96, 0x2c364f81, 0x184327e0, 0x31a63503, 0x1ad93d4f,
41349       0x0a096650, 0x273b6173, 0x9d7694f1, 0xd2cda9d2, 0x8886d876,
41350       0x2814c177, 0x1e01a742, 0x8667696b, 0x3492276b, 0x5b25f006,
41351       0x2fd4f0c6, 0xfb294c4a, 0x6527349f, 0xde1d336f, 0xc1fe0d8a,
41352       0xe7e3860e, 0xaf9a23e8, 0xb774c31e, 0x97d2b721, 0x4365784a,
41353       0xfac3e582, 0x70f4eaa3, 0xff2dff4e, 0xfe873248, 0x3d281e1a,
41354       0x0bd1c9c1, 0x9043a6d6 },
41355     { 0x766c7937, 0x1511a0fe, 0xabbc3be3, 0x1b2ded5c, 0xe00888ac,
41356       0x2ac160cc, 0x616200f3, 0x928754bd, 0x34a2ea06, 0xb801c83d,
41357       0x9cbe106f, 0x8ad7a03a, 0xcedfcd94, 0x996b0822, 0xe4069880,
41358       0xc3c3463a, 0xf597f663, 0xfb12ea4d, 0x40c92af9, 0x2c8d3834,
41359       0x4e8da154, 0x79bc85c6, 0xdb4e801a, 0x95771fa2, 0x1e3579b2,
41360       0x7bd2c138, 0xffaad078, 0xe45c75df, 0xb73eac46, 0xb0760a3c,
41361       0x3a125f35, 0x26362b48 },
41362     { 0xeefc3e89, 0x25c68d28, 0x69e9ee71, 0x2d0ee877, 0xaf5e4b75,
41363       0x8b07bb86, 0xcb86b333, 0xdb709072, 0xff552bac, 0xfd3d20ea,
41364       0x4c0da1e9, 0xa5eeb2b1, 0x44f97145, 0x391f688a, 0x1e06d485,
41365       0x21fbd310, 0xbea9cd49, 0x45e4f2a5, 0xa7bf21da, 0x7b60d464,
41366       0x054d5471, 0x193f88c8, 0xbee0f2e9, 0x5ace53d1, 0xc1439273,
41367       0x92c26563, 0x96c6b5ee, 0x9c86e0b2, 0x09ff59ba, 0x452fe231,
41368       0x555c935e, 0x2e952b20 },
41369     { 0xd75f886e, 0x2a846bca, 0xd43dfc58, 0xe68a5dbe, 0x007b1b86,
41370       0x103e45b6, 0x355ff2b5, 0x580e2ec9, 0xa263ecc9, 0xbc702f26,
41371       0x181e5e33, 0x2835b386, 0x6c122076, 0x025113ec, 0x7fbd856d,
41372       0xa5c26e3a, 0x9d6ebcb1, 0x8ef83fb3, 0xa44d2fa8, 0x7aaa53f2,
41373       0x53b1fa97, 0x7c14ef33, 0x17559a30, 0xff604a11, 0xb09377e0,
41374       0x2bcd96b0, 0xdb2f0273, 0xa5c14896, 0xeb53ef06, 0x1c0a84c9,
41375       0x30378e4b, 0x1236d017 },
41376     { 0xc084373b, 0xd7481c8f, 0x646097ae, 0x29ae4768, 0x613bc34b,
41377       0x1300dfa0, 0x934bc2b0, 0x3712714c, 0x0e2be7e2, 0x86524629,
41378       0xed010800, 0x554fbb9f, 0x42314576, 0xf0ec0b38, 0x330a3282,
41379       0x65baf594, 0x706ef817, 0x3bdde1a8, 0xba7530e9, 0x7d2c727d,
41380       0x74cc95cb, 0xbb0c5d66, 0x2438906d, 0xb3fcd365, 0xd14658f3,
41381       0x19881941, 0x6c97f0e9, 0xe616f555, 0x4b9ec7ea, 0x353c2d85,
41382       0x620cb56e, 0x02a48014 },
41383     { 0x506ccd38, 0x11d6d23d, 0x9059baa6, 0x229a1c54, 0x69d011c5,
41384       0x717c9c27, 0xd828937d, 0xe87e1b46, 0x83835083, 0xf5d63bbb,
41385       0xaadac258, 0xf0a7b427, 0x9f154d1f, 0x99ab26bd, 0x8ec955fd,
41386       0xdec0ffbf, 0x49fcb880, 0xee957c67, 0x1e0114de, 0x32395dee,
41387       0x369f46c7, 0x192a64b7, 0x91eb2599, 0x43044660, 0xa2e8c3da,
41388       0xbe2da887, 0xc3556d18, 0xa44e2c25, 0xb55f75f3, 0x31390414,
41389       0x8f217fe0, 0x1d8bde6f },
41390     { 0xa2028924, 0x03cd39f8, 0xb06ecb9f, 0x6e54f19c, 0xd6f05846,
41391       0x862bbcb7, 0x5a060776, 0xdbe06716, 0xb10fec10, 0x9397c97a,
41392       0x6f1bb65c, 0xf4213826, 0xa672ba38, 0x414deccb, 0xf88b05e6,
41393       0x594d4d43, 0xac94d4d1, 0x7993f57a, 0xbfb17638, 0x74fc2a6a,
41394       0xb6fc655a, 0xd8196b5b, 0xee8d2139, 0xdc375c84, 0x360d3a26,
41395       0xb9b00a02, 0xdeb93b87, 0xb36ed35c, 0xcc83209e, 0xf565b28b,
41396       0xc61013c1, 0x349c6943 },
41397     { 0x4de6c88a, 0xd1b39444, 0x4700207e, 0xd5c2c471, 0x21c2b780,
41398       0xb6f458a2, 0x0850993e, 0x749f7564, 0xbaef0c18, 0x400ba579,
41399       0x737c70f0, 0x2d742938, 0x21467ebf, 0xc5a8e2ec, 0x5337f453,
41400       0x243a666e, 0xed0bd50a, 0xc991f1c7, 0xf4bd1f91, 0x3a7f3e90,
41401       0x5f0e129b, 0x96089e8a, 0x07389635, 0xd0d3a177, 0x27182ac9,
41402       0x9cf842d5, 0x0817c5c2, 0x21195299, 0x87255769, 0xa32f327e,
41403       0x89c2d8fa, 0x056587ab },
41404     { 0x1ce4733d, 0x008562ed, 0x98e51444, 0x5faff7cb, 0xa9ab46b9,
41405       0x5f03021f, 0xb61a8c13, 0x89494c5e, 0x36b35976, 0x57c95036,
41406       0x2ac2d2f6, 0x6be84c8f, 0x9bd2703e, 0x0e5b34d8, 0x7e872abb,
41407       0xc4ad918f, 0xc4052ee1, 0xc2a89e9f, 0x3190b51e, 0xc2caee3f,
41408       0x6fff254f, 0x58fd1437, 0x883e0972, 0x6f3c0d68, 0x0fb15438,
41409       0x63d0a0e9, 0xf6caae00, 0xc438764b, 0x3f1d0f6c, 0x815f1565,
41410       0xb86cdbde, 0x1b87f2ed },
41411     { 0x2b0b15b1, 0x35792bbb, 0xce6ba779, 0xa3e4b5a7, 0xdd8f3779,
41412       0xfbacffd9, 0xc298d1ef, 0x005450bd, 0xc47031c6, 0x0e3f5556,
41413       0x95d68066, 0x0770f07a, 0x2d1052c2, 0xce3e84e0, 0x7aa8cc54,
41414       0xb050791e, 0xba3223a3, 0x4d621e73, 0x39632990, 0x87b9b94d,
41415       0x7eb8056d, 0x8df9cb47, 0xedfca0cc, 0xe2430de8, 0x9712a0ca,
41416       0x374bf416, 0x88848a99, 0xbe3f3c77, 0xc4a3e59e, 0xb22b87b1,
41417       0x3e95bc23, 0x8e0227c4 },
41418     { 0x3210964d, 0x000e22a8, 0xff056eeb, 0xdccd5df5, 0xdaf1ead7,
41419       0x02173a1f, 0x67cdcae3, 0xd02833e0, 0x8bdcc90c, 0x1cc574cb,
41420       0x3224b4f5, 0x86eca714, 0xbb3f8298, 0xd00e603a, 0x0c1a8deb,
41421       0xb98ece1b, 0x378c261d, 0x228a46e4, 0xa6165e5d, 0xc6f9dd0d,
41422       0x4b7ef0e2, 0xb3ae3899, 0xbda9f306, 0x3a3c16b3, 0x38a084db,
41423       0x5e9a26d3, 0x5394e950, 0x528e5993, 0x4ea206bc, 0x848ecb11,
41424       0x40545d6e, 0x14b15ab5 },
41425     { 0x664c59a2, 0x0f6d86c9, 0x60fd7aa5, 0x3dfe2be1, 0x9072cb8e,
41426       0x33f9b569, 0x8176a7e0, 0x5f2325d9, 0x4587080b, 0x79a0d4e7,
41427       0x0d5d4e05, 0xa4ee0def, 0xc87b28e1, 0xc0ad9ffa, 0x3f09b4ee,
41428       0xd6f18d2f, 0x292e9d87, 0xcc896ae7, 0x6094763c, 0xca88953d,
41429       0x18fbf9fa, 0xdbee97a8, 0x4b63d701, 0xdf20e0e9, 0x47ea722f,
41430       0xcbba6e30, 0x612b571f, 0xce57e1ca, 0x009a55f5, 0x1e16ac76,
41431       0xc4389e2e, 0x742bbed8 },
41432     { 0xc1dc2c73, 0x23ea86dc, 0xc1643abf, 0x4bbbfd5b, 0x24d8ca1f,
41433       0x07f8fa1f, 0x8cb5cac7, 0xde68a6e0, 0x54e66a7d, 0x7d54c64b,
41434       0xa9b7ad78, 0x789dba22, 0xe364ab94, 0x4d88d540, 0x1f72e011,
41435       0xc8c2e02d, 0x46e2a278, 0x4c826057, 0x4b187c7d, 0xe6c35bb3,
41436       0xeb8fe0c9, 0xed8b3dfe, 0x7d11e415, 0xb6bc34e8, 0xb865c7f9,
41437       0xb3908bbf, 0xe1ecc17c, 0x717d1ce6, 0xf7cdd69b, 0x151e3308,
41438       0xb5c94124, 0x97bd5a14 },
41439     { 0x81e82861, 0xe01c62fe, 0xdd42c40e, 0x703d4b6d, 0xe65e91e5,
41440       0x7e52e55b, 0x5abbbfdd, 0xb8b49374, 0xc72a45f4, 0xb4f15f52,
41441       0x550f29d8, 0xce8435a8, 0x582de75f, 0x9df76b9b, 0xa20c8b96,
41442       0x52e84c5f, 0x0a8a0af4, 0xaf77d2d1, 0xca6013c3, 0x0389bbd8,
41443       0x26f8305f, 0xb0d9b9ba, 0x0cec8b9a, 0xf053e848, 0xffabda18,
41444       0x4d63367a, 0xa6424c2a, 0x50f53be4, 0x864fba2e, 0xf892c58c,
41445       0x48cc5469, 0x317c6d31 },
41446     { 0x2cb7d42b, 0x0c3525b0, 0x310facae, 0x55240bc9, 0xff20408f,
41447       0x8d5d2022, 0xe0c10ea0, 0x6b01402f, 0x718eb23d, 0x7fbef68a,
41448       0x41252a19, 0xa0146b5a, 0x110e0d6e, 0x59afce48, 0x022de181,
41449       0xe9a1d27f, 0xdc3f49da, 0x6db96d16, 0xefbe4008, 0xfc1ae3f5,
41450       0xeccbc11c, 0xf9d70641, 0x525f8636, 0x49022279, 0xc2763c30,
41451       0x3769796a, 0x1d90630b, 0x9cc3483c, 0xee3d3f17, 0x451651f0,
41452       0x9da0b8fd, 0x6ae59739 },
41453     { 0xbff4d2ee, 0x57b13bc7, 0x30b173d8, 0x20754229, 0x0794936c,
41454       0xb6254bd5, 0x5efd55be, 0x1d5f232a, 0x4e0c3389, 0xc06f4a85,
41455       0x8e61f944, 0xcf2c5b59, 0xfd5f87b7, 0xc564861f, 0x5a2afa4c,
41456       0xee261fb1, 0x2d97a774, 0xb0ff7226, 0xd6cf007a, 0x1a89ae22,
41457       0xd346f214, 0x28880534, 0x97b6497e, 0x8fe73bff, 0xfa2afffc,
41458       0x8a8595b2, 0xf151a726, 0x9ef9cf3e, 0xe744b82b, 0xa84ee5f1,
41459       0xbc63fe72, 0x6649048d },
41460     { 0x1e8b760d, 0x91b7bb78, 0x25aadaa0, 0xd47b0bd8, 0xfab5226f,
41461       0x81493d9f, 0xbffc148e, 0x4a6dd226, 0xa29be3db, 0x5a032f8a,
41462       0x34b0ab0b, 0x318dbc70, 0x7d654868, 0xdcccbfb5, 0x9c581e46,
41463       0x8506ab37, 0x2830ece2, 0x09136a6e, 0xcf6c80c7, 0x48b79356,
41464       0xef6b1e86, 0xfa176377, 0x83f0f1c9, 0x2c9c1cc1, 0x16abeddd,
41465       0x96f0526d, 0xa93b0de4, 0x3e0e98e2, 0x0f13873a, 0x6f2d7ada,
41466       0xf3fa49ec, 0x4eb93b5c },
41467     { 0xe11fae32, 0xbd89f7e5, 0xc4023f51, 0xd13d74f5, 0x491c3f6f,
41468       0x1b0014df, 0x555279b7, 0x1d849a57, 0x05ba0068, 0xbb9e8897,
41469       0xc13ca2ca, 0x82222419, 0xfd33676f, 0xafbbb685, 0x75878a2a,
41470       0x931c3f52, 0xef3d5173, 0x12aeefef, 0xbd8a6878, 0x189a5cc8,
41471       0xd99f0c16, 0x82cffdb3, 0xa19d48b6, 0xbf565406, 0xe9c6c4e0,
41472       0x5605e223, 0x86804172, 0x53e781de, 0xc7001cc8, 0xcdf5c90b,
41473       0x7c043f68, 0x2b582d93 },
41474     { 0x81abc2ae, 0xa1165c82, 0xe2b69eca, 0xa73380f5, 0x07fff66f,
41475       0xc097b3d2, 0x54776506, 0x5d603826, 0xb57fa21c, 0xdcbac9f3,
41476       0xc98dbdd5, 0x78750db4, 0xd9eff32a, 0x85e21103, 0x2f11c41c,
41477       0xceed172c, 0x9e348c09, 0xa8e39264, 0x831eddfb, 0x71cb936b,
41478       0xf50864a3, 0x915c3d06, 0xe93acfcd, 0xfe8e33cd, 0xb3f2f7aa,
41479       0x4bee10d7, 0xeb7cee9a, 0xc1d8eb48, 0xfa574afd, 0x4fa49ce3,
41480       0x862db4c0, 0x78615109 },
41481     { 0x7ae72c21, 0x3fe3f480, 0xfd0f0da5, 0x631aa144, 0xf8c3a454,
41482       0xc76ee1e8, 0x51b4f1ab, 0x379ae094, 0xd7cdbb24, 0x2a3a4397,
41483       0x82bd5fcd, 0x7a14cffe, 0xf427ef5a, 0xbbe4ed12, 0x284d3ccf,
41484       0x9b0a43ee, 0x8eec6e1e, 0x57b78b93, 0x67b8e87b, 0x18d404e4,
41485       0x34374c20, 0x0c8adc05, 0x5428deb5, 0x64373605, 0xc3afa2cf,
41486       0xb4d80ec0, 0x3aa956f9, 0x6d51f93c, 0x84161c68, 0x9f9a28ab,
41487       0x6bc9c025, 0x540b6bb7 },
41488     { 0x321d315d, 0x04e1734c, 0xd86e05d0, 0x4ef56612, 0xbba8cd81,
41489       0xeafae145, 0xacdc789a, 0x1fb07a49, 0x5877570f, 0x6a21e9ad,
41490       0xb9bc53de, 0x2e4a837e, 0x1d6298eb, 0x436db293, 0xea362f45,
41491       0x43afbc78, 0xaabf6585, 0x2a973d97, 0x0c924d60, 0xdce7dabe,
41492       0x7cadf0e9, 0xf69d98f0, 0x75020538, 0xe0b505a1, 0x4461cd29,
41493       0x3db7d1a3, 0x5e20e818, 0xe1c28776, 0x52dd50f6, 0x2ca25867,
41494       0x92e0388c, 0x897cab14 },
41495     { 0x0d8bab8a, 0x59ed3813, 0xa438200a, 0xc11d364c, 0x40581415,
41496       0x0687bf2c, 0x7ac89674, 0x86ad0d3a, 0xb97411a0, 0x44928105,
41497       0xf383371c, 0x74984b11, 0x0d1a831e, 0x70d2ed84, 0x6c912fe0,
41498       0xd883628b, 0x14fa88d2, 0x44f8f7fb, 0xcf0ac93e, 0x564f2a4d,
41499       0xa6c24fa6, 0x82f629aa, 0xbf6cd949, 0xab906ba3, 0x20a5182d,
41500       0x2c822e67, 0x30eb93a5, 0x2ff47dac, 0xfff673aa, 0xdc62c4a4,
41501       0x476b0ec5, 0x64b00763 },
41502     { 0xb3c9a404, 0x1e3f533e, 0xb7ef9952, 0xb1db7f73, 0x6c253693,
41503       0xc7f13e29, 0x0738eed4, 0x7ce7f4c4, 0xce26cad0, 0xccfd3b33,
41504       0x01ec5cf1, 0xd8784935, 0xdc084e01, 0x3f8fc09d, 0xc39b5acf,
41505       0x217cab32, 0x9ef5551c, 0x42daf0bb, 0xe1217a95, 0xfbc76f56,
41506       0xc237002a, 0x80178b12, 0xb070a293, 0x0b52c39f, 0x576ca964,
41507       0xe3925153, 0x19d68e36, 0x25559424, 0x09e50e84, 0x291fb82c,
41508       0x6618ed8c, 0x7dd22ea6 },
41509     { 0x49cbb3bf, 0x7ffe844b, 0x5562fb25, 0xde0cc704, 0x9f5a845a,
41510       0x1e6ee537, 0xe51277fc, 0x956d7f26, 0x30635718, 0x2c75d4b9,
41511       0x96957f34, 0x39a14892, 0x82e5742b, 0x8cf4eb32, 0x83247b72,
41512       0x6b0d3ddd, 0x201a4237, 0x67a9f633, 0x1414a485, 0x416403c1,
41513       0xb6f6a916, 0x60afd447, 0xdac6f790, 0x95f94930, 0xbd3b9d82,
41514       0x685ff94b, 0x51cadf0f, 0x5c8f98fc, 0xb13b7489, 0x9559c88a,
41515       0x5f18fcc8, 0x31377c66 },
41516     { 0x7dcfb35f, 0x35c5de09, 0x01cc36f8, 0x2dccca9f, 0x7576cb63,
41517       0x7e93e85d, 0xf7b4b375, 0x0c2dd48a, 0xb09a19b5, 0x9d95cd4f,
41518       0x71bfe607, 0x752ed159, 0x2596dad2, 0x439880cf, 0x69e90a6f,
41519       0xe52efb53, 0x03d3e60a, 0x44097663, 0xa95070e0, 0xfcf364fa,
41520       0x05624dd2, 0xd8f993b6, 0x00d5e467, 0xb35a9824, 0x0c8f4524,
41521       0xe289d024, 0x648a0179, 0xef45423c, 0x587edabd, 0x3a5fd695,
41522       0xa11e5271, 0x3dacc50c },
41523     { 0x6499ae4c, 0xcb3e4f94, 0x7053c527, 0xa46dcbe1, 0xbe782e8a,
41524       0x807f5ce9, 0xd8481e45, 0xb6c64d28, 0xaa286fd0, 0xf35e4518,
41525       0xdf1cdb49, 0xf7b7b9ba, 0xaec23eaf, 0xf3fb6210, 0xb9bfd2fb,
41526       0x0a9ba385, 0x8807f3a0, 0xe51a0d53, 0xb17b2842, 0x7ab24404,
41527       0xf9dd9f0a, 0x6fd57687, 0xf3e9df64, 0xcd1efdb4, 0x60df194d,
41528       0x5dd2df7a, 0xe069df05, 0xbed3f2c3, 0x23248a31, 0x469b7561,
41529       0x694744f7, 0x866949e1 },
41530     { 0x3f4ab07a, 0x3a9a0da5, 0xf54a6fbf, 0x2cd6f333, 0xb23cf290,
41531       0x0c92e921, 0x848e3d58, 0xc9581c3e, 0xd3b218ab, 0x93af1fbd,
41532       0x066cb4d7, 0x38598ea1, 0x990c03a0, 0x5001394e, 0x7d0877b5,
41533       0x3b664b1e, 0xd74c7091, 0xd79db1bb, 0x4e2d5dd0, 0x852d4435,
41534       0x3329db82, 0x0d2b841b, 0x7b96d480, 0xfa844eb0, 0xc295dc46,
41535       0x37a50569, 0x94f7ec4e, 0xc2d38373, 0x5b083177, 0xdc3884ff,
41536       0x8b1fa598, 0x574352b8 },
41537     { 0x0d5d7ce9, 0xed2193f7, 0x0b487eaf, 0x3c19fd26, 0x7be65fd0,
41538       0x7c44ab59, 0x78270d56, 0xdd9da860, 0xbaa70198, 0x8a84ec00,
41539       0x285985df, 0x2ec27e49, 0xde2028d8, 0x996ccaf0, 0x61c2201d,
41540       0x4e7648c7, 0x091c19eb, 0xa96335bc, 0xf0d6782b, 0x253a3a69,
41541       0xd2946493, 0x3f204340, 0x099f6873, 0x444521a1, 0x6996011a,
41542       0x5fcbcc09, 0xf853a94e, 0x3884d5d8, 0xd3b6a3a1, 0x2418c624,
41543       0x06ae3c4f, 0x3e431af2 },
41544     { 0x83d381f1, 0xf967d939, 0xd0c033c3, 0x36501aae, 0x54410768,
41545       0xbf3af4d0, 0x5093a6d3, 0xa86d1598, 0xd92f2900, 0x43ae0741,
41546       0x36f0b755, 0xfeb2afa6, 0xaa456d6f, 0xd090a6a3, 0xaefdb646,
41547       0x336a4fda, 0x1a942f7d, 0xfd1bfe44, 0x851ee41e, 0x7fc2a3ed,
41548       0x11e935c5, 0x4f1c9686, 0x53bbb343, 0xcd577666, 0xad896c2a,
41549       0xf26931ba, 0x86bbfa41, 0x8a0fbbd1, 0xa203cef1, 0x1c3d7d82,
41550       0xe2664d35, 0x6dad3f15 },
41551     { 0x12ec35a1, 0xd1940b7d, 0xe7dfb128, 0x6219c5b6, 0xf13321d5,
41552       0x2cc278c6, 0x33c58eb6, 0x5e76904a, 0xd9903c43, 0x15090f55,
41553       0xc3d96a19, 0x061bc926, 0x8c0acba7, 0x974a9f03, 0x7198b21b,
41554       0x7a414021, 0xf8958c6f, 0xb069599d, 0xbebd0129, 0x517f2f1d,
41555       0xdf3a8dc3, 0x1109a613, 0x672375c5, 0x08e58448, 0x9383d2d3,
41556       0x56590ba4, 0x0bff837c, 0xfc3ee7c6, 0x27d2d55f, 0xc87a5390,
41557       0x5f517a3f, 0x2438e9d4 },
41558     { 0x8815af3c, 0xc4a45308, 0xf3c9bed5, 0xe55f1a32, 0x97b65ddf,
41559       0xaef1cdc9, 0x12e51eb5, 0x61c61d94, 0xe63f2490, 0xbd0dac54,
41560       0xd0b3e231, 0x6f14429c, 0xf1da6010, 0xf737c3c2, 0x6bbc4fb1,
41561       0x7150e04b, 0x1be281cb, 0x205b4c89, 0xd7701f5b, 0xf1b4633c,
41562       0x2a513490, 0x8b33ef46, 0x68f1f7f2, 0xddb47c73, 0xbd416b67,
41563       0xf4ada511, 0xff795bb3, 0x9d2a97cd, 0x96200e67, 0x00a8b7b2,
41564       0xafe30e01, 0x13f39011 },
41565     { 0x7bd0c827, 0x3dd296ef, 0x4a29ff46, 0x506110f3, 0x1c9a515a,
41566       0xf8793068, 0x268bca77, 0xde8d8045, 0x998045df, 0xcbb83024,
41567       0x68c0e584, 0x3f90d710, 0x263b6062, 0x2a838ca8, 0x535c5d0b,
41568       0x293bb5e7, 0x56415110, 0xceea99d5, 0x1bbda005, 0xfe311ad0,
41569       0xa4d8d018, 0x2497e0bf, 0x1cf2b866, 0x33dd77a0, 0xd8c4ba8b,
41570       0xbc075b73, 0x722b7bc9, 0x298466d4, 0xcbda1b0b, 0x17a7ce24,
41571       0x680703b6, 0x458d4b6b },
41572     { 0x4d54d8b2, 0x8a26a20e, 0x4d320a0d, 0x05a5696e, 0xf994f700,
41573       0x698b5858, 0x2f6549a8, 0x7a4adc3c, 0x3694d00d, 0x1812e819,
41574       0x730402bd, 0x46b9b000, 0xa1b36410, 0xe10a1449, 0x99230220,
41575       0xeae95ea5, 0x1b4820c3, 0x3efc2e9b, 0x85c9eb8a, 0xfe5b5cb5,
41576       0x97847064, 0x21ae0319, 0x8f27d49f, 0x68ef0b70, 0x2f72556b,
41577       0x3259ef18, 0x624db01a, 0x00ae0457, 0x5668f95c, 0x628e3b06,
41578       0xb6fbbf91, 0x5f13f5fa },
41579     { 0x3a9b0dc6, 0x7c6ed9ae, 0x6f883ec8, 0xaea1bde9, 0xea8b3677,
41580       0xea66bf88, 0x9a66e3ab, 0xdefa6abc, 0x68217ffd, 0xc4d3317b,
41581       0x290df05c, 0xf741c8f2, 0x7d11674e, 0x1f0fdf17, 0xc35989ca,
41582       0xfdf0ece7, 0x6b9c482d, 0x0eed92df, 0x55bf1ca7, 0x73713e66,
41583       0x25cec99c, 0x90acb290, 0xe803e69c, 0x37c9e3a2, 0x17713a1a,
41584       0x7c0a3c53, 0x6f5a174d, 0x350dc565, 0x05f802f6, 0x11625a44,
41585       0xa37ba4a2, 0x2196495d },
41586     { 0x13142680, 0x00cb2fd3, 0x65d14cf4, 0xab9e91d7, 0xdfe2669e,
41587       0xc6a0ceab, 0x0ae22bc5, 0xbeefce58, 0xcb6ec250, 0x3c2b7986,
41588       0xd738f1ff, 0x84adb1a2, 0x516ec8ec, 0x9709bc28, 0x8e8f7db5,
41589       0xf3693129, 0x95b197f9, 0xc48efc6b, 0x9aaaa404, 0x9ff10952,
41590       0x144154b0, 0x2c3c8cbd, 0x427f3435, 0x33ef7bc3, 0xd21897c1,
41591       0x04a17940, 0x6ce548a0, 0x5aa0c47d, 0x3d56fa62, 0x2971cea7,
41592       0x04475f08, 0x93ad0eb0 },
41593     { 0x988a9963, 0x7a0b6967, 0x6515e8dd, 0x61e477f7, 0x3b6b50f2,
41594       0x6274e386, 0xd33922de, 0x63a9b8d5, 0x687a5b3d, 0x3c38d3fb,
41595       0x1302e323, 0x18f6f09c, 0xe02fcccf, 0x254c05c3, 0x26e662f7,
41596       0xc04ed0b7, 0x143fe079, 0x1d5646b8, 0xc9016c8c, 0xef8a9448,
41597       0xf823d797, 0xe5674c4b, 0xbccde451, 0x0586f72f, 0x4417eade,
41598       0xc5fc88d5, 0x576e588d, 0x2b952209, 0x5844d1f9, 0x4408dd42,
41599       0xea41c034, 0x73f8c3f0 },
41600     { 0x5df763dd, 0x89534fc8, 0x3ac71836, 0x3b1427f3, 0x6e8f15a0,
41601       0x0db5be17, 0xcb20888e, 0x1d390944, 0x857caea6, 0x7804c9ad,
41602       0x519f7bf3, 0xaa584428, 0x293aa8cf, 0x626eecf1, 0xea36a015,
41603       0x749e0d98, 0x3321edcd, 0xefff6dae, 0x28b791cc, 0x963deea6,
41604       0x2d16e361, 0xa14e0552, 0xb15ae206, 0xa2e058fc, 0xfca325e4,
41605       0x0f268745, 0x21341a8a, 0x7cf9d407, 0x7caa51b8, 0xdfed25d9,
41606       0xadbedd75, 0x0108ae39 },
41607     { 0xa9e88f63, 0x54d178f3, 0xab0c7325, 0xaa05b11e, 0xe261d8a6,
41608       0x773a53e6, 0x8d0b91c8, 0x24db7dae, 0xe9bb004d, 0xde10b073,
41609       0x54e3090b, 0xfc8befe7, 0x0cc69c89, 0x16af0599, 0x9d59511a,
41610       0xddc83803, 0x46c5dafc, 0xc3f65b99, 0x1ee0a599, 0xfbbe4be8,
41611       0xfb3a9b17, 0x88891e36, 0x445dad00, 0x0c9aad75, 0xd5097e1f,
41612       0xdffc46ab, 0xac85a4e1, 0x8848089b, 0xa0c45233, 0x348bb42f,
41613       0xeb13c1df, 0x807c06d8 },
41614     { 0x98ee0ef6, 0x00a969ec, 0x8bb7b7af, 0xba9d5483, 0xa02f8fdb,
41615       0x24484c92, 0x8b70557c, 0x7bdb201a, 0x60ad1af2, 0xe59343e4,
41616       0x998c95fb, 0x53a9a942, 0xda861d3b, 0x974db3de, 0xed399c0e,
41617       0xce1525c9, 0xf72109bd, 0x89b56881, 0x998211a4, 0x08ff7d15,
41618       0xef0f275a, 0x5df76b3a, 0xfa2f358b, 0x93f180f7, 0xc39b0634,
41619       0xaac4ffcf, 0x17583b53, 0x2692c626, 0xb55399fc, 0xb2fdfa36,
41620       0x99607a61, 0x16424c6c },
41621     { 0xdd2744a9, 0x5dd65c55, 0xfe3af418, 0x2544c1c2, 0xefe8b089,
41622       0x32c82e99, 0xa9df691a, 0x30b7ab25, 0x9be99674, 0x98384550,
41623       0xcaf2d122, 0xbcecd258, 0xbcc77272, 0x88ae4098, 0x4b8efa0c,
41624       0xd4396141, 0xed64d12c, 0x44ff67b9, 0x2e7f3404, 0xa9e655e4,
41625       0x45b0e9eb, 0x3d16fc45, 0xf03ded28, 0x474a3e14, 0xacccb85c,
41626       0xa3c9adff, 0x7253a51b, 0x3dfe6bc1, 0xfb5831b1, 0xdddaf4b9,
41627       0xa4f4478a, 0x5544e602 },
41628     { 0xbaa80b4f, 0x897c5313, 0x63bdc8ef, 0x0122716f, 0x7b42c5a8,
41629       0xae2742db, 0x0883308c, 0xe9d9e1e9, 0x2d341ab1, 0x352c8c3f,
41630       0xed945870, 0x163d0500, 0xc290d9d8, 0x8349dd73, 0x1f6c7d29,
41631       0x2053c5e0, 0xcb42033c, 0x83107446, 0x09d09af1, 0x76c88bd2,
41632       0xb2794681, 0xd0f70e6e, 0x19b1b540, 0x720b59de, 0x22994b43,
41633       0x80b7ecdc, 0x2dec53cf, 0xc1a4cdce, 0x1ed60f42, 0xdd7d3edd,
41634       0xe241d261, 0x5735995c },
41635     { 0xa0237056, 0xdc4ba3fb, 0x33ab3388, 0x6856c164, 0x271ec612,
41636       0xc01eebbd, 0xe3031bec, 0xabdeb033, 0x6118a1f5, 0x4eee4419,
41637       0x5b600f33, 0xec497421, 0x08868773, 0x1b7185cf, 0x7c1b7dfd,
41638       0x7b0c46cd, 0x4a4c5e89, 0xd143b2da, 0xbb1ff94d, 0xdb9a5984,
41639       0xc9cf3465, 0xac3904e4, 0xeace64c9, 0xf8729bc0, 0x768ad99a,
41640       0x5cc22821, 0x8a9540c2, 0xbbd3b081, 0x049a6917, 0xe468ed5f,
41641       0x3ec45ef0, 0x885486df },
41642     { 0x4bdff464, 0x6a942c93, 0x25a7b451, 0x3db2719f, 0x325be324,
41643       0xccb0070b, 0x19fe3339, 0x2055a31b, 0x241ee8ff, 0xaca69ae8,
41644       0x55ef8def, 0x7607dd08, 0x1a1b73c6, 0x9e24960f, 0x71d36810,
41645       0xbcb0e8a2, 0x6885e6b9, 0x29e11aa2, 0x185eae19, 0x98b5d0ab,
41646       0x0f81f91c, 0x1a0b96e4, 0x994fc503, 0x4d0e8bcf, 0xf119d6e0,
41647       0x33d81697, 0xaaa4ce0c, 0x29083287, 0xc91ff9d7, 0xc5dd4d3e,
41648       0xd4ab962d, 0x31cecfe8 },
41649     { 0xfc8b21e8, 0x437bfd9a, 0xb19436df, 0xe5dd32b3, 0x921c36a0,
41650       0xfe5902d4, 0xa3d0fa90, 0x8e9de84d, 0x5bb523bd, 0x9663e6ad,
41651       0xaecd6975, 0x9800a23f, 0xb4fbb59c, 0x1009c0d9, 0xc9d20ff1,
41652       0x839aa7bd, 0xecd6fa3d, 0xf502f66d, 0xc5516ca9, 0x480ed4fb,
41653       0x6c742ac4, 0x65ffa5f6, 0xff3252f8, 0x2b7c7945, 0x75d9cb3d,
41654       0x72fefc05, 0xd6d6f1d2, 0x11b0863b, 0x9a6a4ec3, 0x5d8f3cf0,
41655       0xda2547b3, 0x6961b46a },
41656     { 0xcb35e2ac, 0xd07b587e, 0x57af14d9, 0x1ed5546b, 0xdb28a04c,
41657       0xeca17a5b, 0x709d54f0, 0xa1f91d44, 0x9c6f400e, 0xa6e719fd,
41658       0xfb8ce190, 0x4e4b88ed, 0x246e3fd2, 0xf9781edd, 0xb655af5d,
41659       0xd67120e6, 0x93413ca7, 0xda782d1d, 0x9707fa21, 0x697e20a2,
41660       0x54e84123, 0x1eb51f32, 0x36051f9f, 0x2e254d9e, 0x73ce5be9,
41661       0xddaec42b, 0xcd3f794f, 0x89a9a32e, 0x0781aad9, 0x1964e22f,
41662       0x53755212, 0x6a63a90c },
41663     { 0x3d7acbbb, 0x76554e00, 0xb74f6108, 0x2c01668a, 0x388c519b,
41664       0xe4a29672, 0x3eb94d4f, 0x01667714, 0x0cd6d2f6, 0x086a3cdf,
41665       0x7b370f7f, 0xf8658021, 0x5a4d3e7c, 0x658880c1, 0x5ba3f4a1,
41666       0xd6ed5816, 0x5ca471dd, 0xabcc7813, 0xe844a576, 0x809bf074,
41667       0x6ea502ea, 0xa53a81b3, 0x0e021ed3, 0xc20b9307, 0x8617f165,
41668       0x8c27f892, 0x8235cd0b, 0xa5476446, 0x82552961, 0xffc89ffd,
41669       0xd151d90e, 0x51ed4a22 },
41670     { 0x449701b4, 0x37d6963a, 0xbb27caf2, 0xea8d91a3, 0xb572965f,
41671       0x3ef9be15, 0xdb50bf7d, 0x75a7a055, 0xce643b9b, 0xfd67480e,
41672       0x6ceb5d5e, 0xf2a60d2d, 0x5ed7c897, 0x68fc320c, 0x28ce685f,
41673       0x41c53cf6, 0x7106615e, 0x0e29711f, 0x23500ecc, 0x7a872138,
41674       0x6c29fe48, 0xaf0a9260, 0xe1ef9712, 0x93df3f2a, 0xd2d169bf,
41675       0x0d5f6fb1, 0x74a9793c, 0xeb7afe26, 0xe9f49256, 0x4173d94a,
41676       0x2b8b5ce5, 0x2d6951bc },
41677     { 0x904e222e, 0xdd007d9f, 0x86f4e109, 0x333f248f, 0x8f429eee,
41678       0xd4994e8b, 0xcfc77518, 0x29573415, 0x0b0f42f1, 0x6e7fea3a,
41679       0xc2743519, 0xc795cb7d, 0x711e71a0, 0x820a8f66, 0x2b874f55,
41680       0x83d95d9c, 0xe70e1627, 0xd4b64d78, 0x8b92a742, 0x924353f5,
41681       0x447b5e6d, 0x322048b1, 0xbcf931a0, 0x0bad730c, 0xa7af2268,
41682       0x75c4d089, 0xb83b93f9, 0x464904c1, 0x165b3aee, 0xa24eba02,
41683       0xe08cc5f0, 0x65c48e78 },
41684     { 0xde222c22, 0x1a1c73ce, 0xfcea23b4, 0x5683d8cd, 0xb2143b06,
41685       0x0301cb14, 0x59fcec77, 0x284adf8f, 0x31204cef, 0xfb1c581c,
41686       0x94735107, 0xf54d3eee, 0x4d3188c0, 0xdbf67f0b, 0x10f18d12,
41687       0x76a3f2d1, 0x07d3e013, 0x3809fa28, 0x25e7ece0, 0xf06f0a46,
41688       0xb2895d2e, 0xd82867ed, 0x08b0553a, 0xe106f489, 0xef245445,
41689       0xe2280fa6, 0xa8d9a3cb, 0x402d5785, 0xd438ba2d, 0xf63dd9ff,
41690       0x7a6b226f, 0x36b5cd2c },
41691     { 0x545679a7, 0x87ff4e20, 0x4520c750, 0x64d80b41, 0x9b459cd8,
41692       0x90a357fa, 0xc85af1a3, 0xa19eaf39, 0x8d935a5e, 0x0d475d79,
41693       0x781a678a, 0x74501983, 0x0cc2e810, 0x74839779, 0x2f412244,
41694       0xc6a21d11, 0x36a51a37, 0x8d0e85f9, 0xeaa74df8, 0xff50151e,
41695       0x93cf99c4, 0x14e182a7, 0x376a9ab6, 0x45593df1, 0x522389ff,
41696       0x18f73caf, 0xf7445e8a, 0xd27cc960, 0x39a51dc8, 0x0692f4c5,
41697       0xdb39bfd8, 0x08d7c144 },
41698     { 0x3ecca773, 0x809c0d96, 0xd48c2156, 0x87ea9192, 0xdb6bd641,
41699       0xf0eccd74, 0x2a678cdf, 0x77312374, 0xd1587b7e, 0x7a966d8b,
41700       0x6130a4c6, 0xf3c1a101, 0x5fce17bd, 0x7cc6e838, 0xa8de7aa4,
41701       0x95e95bb8, 0x898308e3, 0x3fe1e8b5, 0xe347694a, 0x0197243e,
41702       0xbb0cd2bf, 0xf3fe9c42, 0x0f9b2b49, 0xb5905264, 0xc7367d1f,
41703       0x4c385e8b, 0xb5ee147b, 0x1d3050ae, 0x04004ad9, 0x8e2c3879,
41704       0xbab70202, 0x5f2aa8ee },
41705     { 0x1266524b, 0xe208d464, 0xd0a19f66, 0xb7bf3880, 0xda106ebf,
41706       0xa5aa685e, 0xe642dd46, 0x0a69e8d3, 0xc682e4d6, 0xef349c61,
41707       0x0fcb534c, 0x26f6ee3b, 0x05eb67b8, 0x7daba127, 0x18be05f6,
41708       0x2babb27e, 0x8e2d85d1, 0x959afcba, 0xe2d9d386, 0xedcf2d1a,
41709       0x1ea6f06e, 0x59dc52e6, 0x866e5ae8, 0xc28278b4, 0x02bcd3c7,
41710       0xd9ff0340, 0x784be82f, 0xe884ac76, 0x83c9f224, 0xa3164980,
41711       0xb46ff949, 0x62501a98 },
41712     { 0xad264086, 0x563f7d9a, 0xa5e0e4bd, 0xca6a33db, 0x8c8d3d67,
41713       0xe8253002, 0x46e64b19, 0xa288dac8, 0x20aa4536, 0xfa3c9197,
41714       0xed553eac, 0x8130c9b0, 0x2ea8abd3, 0x622806e0, 0xceccfe77,
41715       0x52fbf54d, 0x4f0d1b70, 0xbd9a8e31, 0xd59b1741, 0x519d2133,
41716       0x9a6fea8a, 0xfd74101c, 0xb5c4eb10, 0xd1acf7a0, 0x91f9da5e,
41717       0x78499b73, 0xc0dea586, 0xabaa4c49, 0xa1f3531a, 0xcc9c5f73,
41718       0xfd3fc665, 0x497b15fe },
41719     { 0xf45568e9, 0x8a56cbaa, 0xc7192a6f, 0xf491a0fe, 0x9ab2539a,
41720       0xdbb03dd3, 0x4ac37da9, 0xc86522f8, 0x02a0f5b4, 0x8c8cdba2,
41721       0xa29c539f, 0x8109fc75, 0xca90f02e, 0x9cd06d31, 0x3e216dbf,
41722       0x8f31f044, 0xba3ebd91, 0x99aa68ac, 0x42c007f4, 0x2a80d0d2,
41723       0x86a9b7ce, 0xdd8dffbf, 0xd6308edc, 0x405d3e84, 0x068012ca,
41724       0xdafa33fe, 0xedea1071, 0xc2eebd13, 0x2ff637e6, 0xb7ae7e5c,
41725       0x9e514cb7, 0x18d46a6c },
41726     { 0xa78b7802, 0x868cbb22, 0x497cbaf4, 0x0745ddb2, 0x42ae8add,
41727       0xc4eb2f3e, 0xb4ceb4e4, 0xac0abcda, 0xa325fd40, 0x2e0d8325,
41728       0x13ac7345, 0x6cfe0571, 0xb14171b9, 0x7407a788, 0x6da7a52b,
41729       0x70eb0603, 0xd85176ac, 0xab0b36f9, 0x7c2954f3, 0x14109d29,
41730       0xdcd705ad, 0x370de9c8, 0x7bb5e751, 0x3f0db5cd, 0xa06e708c,
41731       0x45f93d41, 0x7e93050d, 0x10d54f8a, 0x5a38fef9, 0x69e6f8e4,
41732       0xd3f62e40, 0x55044601 },
41733     { 0x06cb9cc9, 0xd1c5c910, 0x41d00014, 0x542074d7, 0x11236fb8,
41734       0x7cd8663e, 0x29ad5f82, 0x39721ffe, 0x2951fc83, 0x1d21fbfa,
41735       0x400d144f, 0x1cde06e7, 0x91792e6b, 0x9042596b, 0x29ad5166,
41736       0x3365c8e5, 0x9aeefe98, 0xe2220e85, 0x70c2aee3, 0xbcb53189,
41737       0x9ff100bc, 0x477ca3db, 0xf532973f, 0x27074176, 0x9a2bd01b,
41738       0xa12118ac, 0x3dd79f93, 0xf3425209, 0xc6f5d7db, 0x563a8ff7,
41739       0xd7b0ec4f, 0x0da313fc },
41740     { 0x15aa2557, 0x37125a8c, 0x00893e9c, 0xca21d70c, 0x67b8a823,
41741       0x48713994, 0x7cb0042a, 0x0d3e9a74, 0xc9e2ce18, 0x2d2bf4ff,
41742       0x049aeac2, 0xd5531a0d, 0xf03d0660, 0x4d29a616, 0x1f1b7f00,
41743       0x473d50d6, 0xca3de50c, 0x3af0ecbb, 0x09c28f27, 0xe2959bea,
41744       0xf8704664, 0x6d7c2ea0, 0x731083ef, 0xadfae4e1, 0x941c2554,
41745       0x50940c26, 0xa1162d03, 0x44167410, 0x1e82290e, 0x620230d8,
41746       0xdb414acc, 0x63630be8 },
41747     { 0x8a7d2e41, 0xbf8d5222, 0xeb62f879, 0x49e75823, 0x6c402d89,
41748       0x1b4d33dd, 0xde2c59ad, 0x883e04d6, 0x49b9dc38, 0xbf3f38f4,
41749       0xb4b70c4c, 0x9d997d18, 0x13cea045, 0x1f69b20c, 0x58e2606d,
41750       0xca3d7025, 0x261d1b79, 0x3d4fd977, 0x5a1436fa, 0x56aeafa8,
41751       0xbb443c07, 0x369b3e98, 0xe558f6be, 0xfce5186c, 0xf8ac8f89,
41752       0xeb0cd478, 0xd5e5aa72, 0x68074f37, 0x68544eb0, 0x295845c0,
41753       0xf16688ed, 0x306a9871 },
41754     { 0x634ec136, 0xbc451e9d, 0x0e6f658f, 0x1edf27ca, 0xc0db4120,
41755       0xa9be0152, 0xc5bfee67, 0x87b6ef20, 0x9a2d6023, 0x35283238,
41756       0xc7afb899, 0x60e564d8, 0x0ac9c2de, 0x4af22bc0, 0x82a9d22b,
41757       0x28e6f631, 0xf532701b, 0xc075c701, 0x82075f91, 0xf6d418f8,
41758       0x1beaa511, 0xf9fa628d, 0x6e72a13d, 0x551e7a17, 0x77f4c01c,
41759       0x9306215b, 0x93c9d588, 0x71aba731, 0x58e57cd4, 0x6443ebe0,
41760       0xe8103e37, 0x2833ac41 },
41761     { 0x8da5ec5c, 0x7e564b86, 0x1c08db24, 0xac3d9da8, 0x8c57a728,
41762       0x9d7c1f0b, 0x9d343dc2, 0x3512afe7, 0xfdc60339, 0xb438e4cf,
41763       0xdcfa1941, 0x7d5a2700, 0x27320449, 0xd5f323f8, 0x1393c6e6,
41764       0x1b87a58e, 0x04baa431, 0xecb68bd1, 0x4722b4d7, 0xc09c1c5a,
41765       0x206b5faa, 0xf42faa97, 0x9976327e, 0xe1dcbcd6, 0x087787d9,
41766       0x655ba9e4, 0xde5c0191, 0xbd59c757, 0x0bcf3538, 0x673020ed,
41767       0xa49d6303, 0x120cd454 },
41768     { 0xcab0f9ee, 0xebfdb8f4, 0x2cce58ee, 0xbc003ef0, 0x5a8d0665,
41769       0x9b6a6841, 0x9b957774, 0x642ed3a6, 0x4721ab5c, 0x3de487f0,
41770       0x21a4f0d3, 0xef2ff380, 0x29dbddcd, 0xbd16f558, 0x0e93dff2,
41771       0x2ef05b4b, 0x0bc9aec1, 0xde1faa12, 0xd467fa92, 0x66dae2c2,
41772       0x5eb33e34, 0x758daf64, 0x8f0103cb, 0xa67ad9f6, 0x9be02430,
41773       0x151f693a, 0xeb4054bc, 0xd5698496, 0x7019336e, 0x8ef1677e,
41774       0x7fdeea3e, 0x021cfd16 },
41775     { 0xdf5c36f3, 0x5c73715f, 0xd64ad254, 0x703bde37, 0xf2cf7713,
41776       0x55368d10, 0x0f3993c8, 0x1e5ec7b7, 0x304ae4ca, 0xfdb16776,
41777       0x3d3bb18b, 0x0d8f717e, 0x66343d5a, 0x5267073f, 0x156008b5,
41778       0xfaeb52ef, 0x224a470f, 0xb97ad5f9, 0xed2ab51a, 0xaf86e391,
41779       0x9974302c, 0xdc0c7e57, 0xfd0ae28a, 0xc88fa817, 0xbf8ed59c,
41780       0x807c22df, 0xeb128bb6, 0x5dedc231, 0xa20595a3, 0x71edcd9c,
41781       0xc73cf78e, 0x07265b46 },
41782     { 0xbd66232f, 0x73dd99f0, 0xc4027716, 0xc59aaf89, 0x5b860fc4,
41783       0xaf826dfa, 0x7a943f3b, 0x239ea8aa, 0x523c428d, 0x0e0e1b1a,
41784       0x6973b95a, 0x55ea0e3a, 0x2557753b, 0xea399caa, 0x06957b1f,
41785       0xf8adf72f, 0x3bd34302, 0x0389f341, 0xf8a43a97, 0x333f27d0,
41786       0xadaf796f, 0xcd9c0c08, 0x49c12aa2, 0x6dcca49b, 0x7a0ac6e9,
41787       0xdd88deee, 0x0644080e, 0x8f47575d, 0x0cc2f4bd, 0x6e9d667d,
41788       0x31d1496c, 0x36c5754b },
41789     { 0xf323d84b, 0x9120046e, 0x7e789c4f, 0xa6991122, 0x921b8055,
41790       0x4b0eaf4e, 0x8079974e, 0x6339844a, 0x740f8c79, 0xc905466a,
41791       0xcd6def49, 0x1c18d0f7, 0x4b23e4ba, 0x5297da6b, 0xc41800c5,
41792       0x1c09dff3, 0x37ef6777, 0x6c49075b, 0x50513ded, 0xa94c3a40,
41793       0x6b0b1705, 0x3d6742e9, 0xc48af5ae, 0xc0784494, 0xc95822de,
41794       0x40c01532, 0xc164d94f, 0xa2ddade5, 0xa2975eb5, 0xfc8a8ac9,
41795       0x1946944e, 0x06fbf861 },
41796     { 0x3f45aa97, 0x2d65338e, 0x1d040feb, 0xd83b58c8, 0x0fdef8b9,
41797       0x05fef59b, 0xe4d7417c, 0x7beb071a, 0xb30a1a23, 0x982b61f5,
41798       0xfb65bd03, 0x4c5f2a2a, 0x5cbf6bf3, 0xe40abc9d, 0xf06612a5,
41799       0x422c326d, 0x9571ae28, 0xc921e69d, 0x23d3434e, 0x7c88b10b,
41800       0x9da07933, 0x96d2e957, 0x3619cf4d, 0x833d46a1, 0xd95eefa1,
41801       0xd9d19653, 0xa03e8f0e, 0x2a7d8411, 0x04bb5ab1, 0x5e642953,
41802       0x1f0fa9ea, 0x5e9ca0fd },
41803     { 0x197c5dc4, 0x5bd54571, 0xe78a95a2, 0xe2da40bf, 0xffdb0eb2,
41804       0x65fb9efc, 0x0d17467c, 0xe952dc2c, 0xc758c6a3, 0xc1fc9c7b,
41805       0xd4034a9a, 0xfc79562c, 0x61f64b56, 0x26e36fbe, 0x1e84728b,
41806       0x6adc4b9e, 0xa8f9ac8a, 0x7f165fd3, 0x03e3e013, 0x7bc93a45,
41807       0x656478e3, 0xeacc5513, 0x064ddc77, 0xd3391717, 0x76936914,
41808       0x75b318dc, 0x362424a6, 0x69b1f1c7, 0x49955f34, 0x8cc2045b,
41809       0xc6836af8, 0x940622b3 },
41810     { 0x0d997973, 0x4710ccb7, 0xd3f8f115, 0x3b29625d, 0x5b97abd5,
41811       0x8cf0c4d5, 0x673e14a5, 0xc6321e0a, 0x3d262246, 0x0541af9d,
41812       0x6fc83b11, 0xde6d8754, 0xf01652a4, 0x47e97da8, 0xad9802b6,
41813       0x0f82b3a6, 0xae9c44b2, 0x69aa4075, 0xced2bf77, 0xaf3f5de2,
41814       0x497a40da, 0x1ef1ea8a, 0x3c23ba9c, 0x2e0f8608, 0xf190a2c8,
41815       0xd8a998a4, 0xcfde3368, 0xe2b49c8c, 0xbde6bd71, 0xb9f49824,
41816       0x785bedb6, 0x80bb1664 },
41817     { 0xfd145cb5, 0x05e575fe, 0xac5e6883, 0x155ee561, 0x8793b273,
41818       0x461e70cf, 0x133b2338, 0x9f1553de, 0xa2a7ba07, 0x2fb9e0c3,
41819       0x3e7086fa, 0xc3bfd6a8, 0x8bb4cb93, 0xb6ba8500, 0x76f82dbd,
41820       0x0b66d789, 0x54eb49ff, 0x7d5a6ff6, 0x1f20b322, 0xcd65d237,
41821       0x54e29cdc, 0x79ea49c2, 0xcb118ff9, 0x64975963, 0xcc58000b,
41822       0x969598dd, 0x110c779c, 0x95107918, 0x63b85a35, 0xedfc1548,
41823       0x41212350, 0x077ba5ea },
41824     { 0xcdd86f61, 0x0b3a38d3, 0x0502a0ab, 0x43121445, 0x806d0272,
41825       0x1912edc5, 0x8a32f10f, 0x01dc1f98, 0x0e80c760, 0xbb1d31d1,
41826       0xf464e8b3, 0xd46ec7e5, 0x9abf49ee, 0xd569af36, 0x2cdade77,
41827       0x9d286ea7, 0x45ad5920, 0x2be7020d, 0x6299ae7f, 0xabe5236e,
41828       0xd3f55c07, 0xc93179bd, 0x52350e80, 0x8138995a, 0xaff07586,
41829       0x0901265c, 0xf4739653, 0x5b3c81b2, 0x9bc77d21, 0xbaf7581d,
41830       0x4591a2e2, 0x6b2006df },
41831     { 0x965b1bc1, 0xb2fe50a8, 0x962bb4fd, 0x931f536a, 0x000e7f99,
41832       0xd5718d33, 0x53d5125e, 0x84728f25, 0xd2125caf, 0x4f8a6184,
41833       0x357f679e, 0x54f1a701, 0x1531c05a, 0x70a9f40c, 0x6fa8b775,
41834       0x10d0cb97, 0x9dc12ce9, 0xb476f41e, 0x2755f894, 0x5c8d7a75,
41835       0x625741a4, 0xd6c12e10, 0xc917b16c, 0x262a6fb8, 0x38d6b0a0,
41836       0x24d116e6, 0x32c38e83, 0x849540c0, 0x66868afc, 0x855b911c,
41837       0xbd26b550, 0x53217ea6 },
41838     { 0x259f52b4, 0xfc840473, 0xe621146c, 0x968da9cb, 0xcacbd26e,
41839       0x964eb85e, 0xe4a54344, 0xab7daa2d, 0x381a4ff7, 0x6dc3b848,
41840       0x41c815ef, 0xa07a96b3, 0xc3d4b1e1, 0xc4fae9e8, 0x42ce9ea8,
41841       0x0f938d1e, 0x35cc052f, 0xa727dacc, 0xe9a06f07, 0xc81e01c9,
41842       0x4a6d65a1, 0xa9e08dcb, 0x6044a9a6, 0xf8e2d173, 0xf2bd295b,
41843       0x99893dd0, 0xf9781b12, 0xa08d3379, 0x61830ac2, 0x64bd6001,
41844       0xd9adbeef, 0x0386931e },
41845     { 0xd09885a5, 0xd0d7abb3, 0xe355bb07, 0xed9d2b67, 0x536ebaed,
41846       0x3bc238cf, 0x699ce4d6, 0x61ca2e78, 0x111594cd, 0x354ff447,
41847       0x03316ad2, 0x55cbe709, 0x49fff5c4, 0x418679fd, 0x0f9c6c40,
41848       0x75bacd75, 0x2972721a, 0x677edc88, 0xe5ef502f, 0x82596887,
41849       0xbf320e0e, 0x459e9367, 0x8bbdccb2, 0x81ce36ef, 0xb766863d,
41850       0x1ba097fc, 0xd58c6db8, 0xcd3a21d6, 0xb4a8748b, 0x0e4967cd,
41851       0x15041c20, 0x2caaf749 },
41852     { 0x6ed20424, 0x44f98006, 0x22471545, 0xb3e4ea23, 0x781a8c86,
41853       0x268ed1a5, 0x7ae5b70b, 0x48d0ab75, 0x356d3982, 0x6ca8b320,
41854       0x2df31fa4, 0x9ce8e681, 0xd925dcf2, 0xb909d232, 0xf56723de,
41855       0x302c8f78, 0xabac96f9, 0x11725d69, 0x57d1a170, 0x656a47ca,
41856       0xc18a2be7, 0x6bb5d511, 0xad50d9d9, 0xb56e45f1, 0x70b05518,
41857       0x36e886e2, 0x09d8ff91, 0xc7c71f3d, 0x9350361e, 0x65a1bbe2,
41858       0x45fe3bd8, 0x86d7f532 },
41859     { 0xb0bf719a, 0x99f16eb6, 0x8bc3d913, 0xb6975098, 0x26cd01b4,
41860       0xfae50e52, 0x90898d1c, 0xd3e3ac54, 0x887ec666, 0x4da3b9db,
41861       0xfbea45b8, 0x58300644, 0x8355b058, 0x369f3bd9, 0x579bcc13,
41862       0x0fb239a8, 0x6e2bd811, 0x4f5b4539, 0x24198fd2, 0x007f3baf,
41863       0x8837d51d, 0x68a676db, 0xeae75b16, 0x68eeea62, 0x3db6083c,
41864       0x5ffe5f94, 0x7d836c5a, 0x52c94d0f, 0xcbc1ff85, 0x5a4c3c6f,
41865       0x86c0b4dd, 0x682a55e3 },
41866     { 0x587495aa, 0xc8f235a4, 0x34c7245d, 0x2276026c, 0xb75a46e3,
41867       0xd6ae0cc5, 0xecc3e5e7, 0x890d3965, 0x14296629, 0x1b13342f,
41868       0x8a877227, 0xc89927e6, 0x2324a68b, 0x1543f27e, 0x49cdc21a,
41869       0x6c447684, 0x1452d0ac, 0x9bc7fd4f, 0xff4b045c, 0x2cc30a31,
41870       0x852f7611, 0x415d46a0, 0xc6fdd7a6, 0xad737052, 0x7b4c7c91,
41871       0xdcecc3ab, 0x7688d70c, 0xd2cdf01b, 0xe40d3905, 0x054f2542,
41872       0xfefe4dcd, 0x02227fa6 },
41873     { 0xb751948b, 0x1805efd9, 0xfdfd225d, 0x8efeed46, 0x4f2c8b22,
41874       0xcb128e09, 0x96f7c5e5, 0x9d1090bf, 0xb4cbeca0, 0x0959d044,
41875       0x8e08cb04, 0x21c955f9, 0x68fa4fce, 0xbc1f279d, 0x0710ae9a,
41876       0xb021e14e, 0x881167f4, 0x64d16e9f, 0xbbc9f1a5, 0xf5a5c22e,
41877       0xe3420eea, 0x5f3716df, 0xd5c4e843, 0x971eb915, 0x28ffba81,
41878       0x64fc55fc, 0x7dd37578, 0x3427e54d, 0x15ebc7d0, 0x446e6a62,
41879       0x29269778, 0x547e249a },
41880     { 0xa1ffda27, 0x4706868a, 0x7955cf50, 0xb4e6cdcc, 0x0a63f3d8,
41881       0xf65151e1, 0x9de5e70a, 0x5b4127ea, 0xf9342823, 0x3d2c09ba,
41882       0xaa2f7d51, 0x18c99d83, 0xddeec025, 0xa0c5bb1d, 0x03dcf1ce,
41883       0x7ffddf84, 0x616fdeda, 0xe57e4d29, 0x7932a1f0, 0xd2456569,
41884       0x3191d4e3, 0x7475e0e8, 0xc220218b, 0x3479bea1, 0x8bcb2505,
41885       0xfceb5c90, 0x3c6132e6, 0x1c685cea, 0xbfe6c1eb, 0xc42dc745,
41886       0xd2b08eea, 0x45a41cc0 },
41887     { 0x4dbbf0e1, 0x3ea9b2c7, 0xa17cf70e, 0x41ff962f, 0x5eeb4c66,
41888       0xdc1ea758, 0xa9beb17e, 0x4f5412d2, 0xa285741a, 0x2c9e4f52,
41889       0x984fd11f, 0x93df7da4, 0x0df3184e, 0xb2afbddc, 0x2421e375,
41890       0x96323d25, 0x49df781e, 0xc87be1e4, 0x3d589bea, 0x145601ed,
41891       0x28fff6dd, 0x0f0bd9bd, 0x8a0f298c, 0x2d3259d4, 0xd88e6944,
41892       0x362d7a77, 0xb6ac2af6, 0xa84c06b6, 0xd087da02, 0xba850ac9,
41893       0x42ee40c8, 0x128763c9 },
41894     { 0xacbac178, 0x29a80f07, 0x34b08f6e, 0x7cc20044, 0x70feded2,
41895       0xe9631d14, 0x86615767, 0xb2115da3, 0xcb088548, 0x7c75f5c4,
41896       0x9a2e8e03, 0x5b29d213, 0x8b881752, 0xfe9fda66, 0xc1de7ebc,
41897       0x3f1d8d88, 0x03218123, 0xb476565e, 0xb1c995f3, 0x07365561,
41898       0xb13eb71b, 0x2160cb18, 0x99b3a0eb, 0x7e8da513, 0xb20fcd74,
41899       0x5e8ca1f9, 0xb4126d72, 0x6a7e0067, 0x68bb637f, 0x1e8204b7,
41900       0xfc4f74d2, 0x75e96bcc },
41901     { 0x0d19716e, 0x189d1fdc, 0x7c384525, 0xdf585058, 0xea987d2a,
41902       0x64a846d1, 0x6c07150f, 0x12b6bf83, 0x4d6fd5b7, 0x91d85d46,
41903       0x4f53f55f, 0xa9788836, 0x81509129, 0x60083bd8, 0xea876f48,
41904       0xa7672683, 0xc15b2489, 0xe80b2e7a, 0x42d1d992, 0x985ef8d2,
41905       0xcf3de492, 0x9c57b029, 0xb1487627, 0xfe02f83c, 0x8ae5b687,
41906       0xaeba4fe4, 0x5d6b8196, 0x8a86f09b, 0xa16e523d, 0xd88f566b,
41907       0xba268949, 0x309a6e9a },
41908     { 0xbdfbe97a, 0xef27ee50, 0xb8c50c4d, 0x1a5fe70f, 0x7fe09f5c,
41909       0xcc7beb01, 0xbed36cc5, 0x8fa15a85, 0x7550ed3a, 0xc0c3acdb,
41910       0xeb908681, 0xc581ef87, 0xc49d5ccb, 0xa15b3362, 0x1fa264e8,
41911       0x0fbb1714, 0x8e1eee88, 0x267f8d8f, 0x21c2b63d, 0xd31ccfd6,
41912       0x53be7efd, 0x924dbe7d, 0xdb2a358a, 0xd42e877f, 0x75d68ac1,
41913       0xcf9673c7, 0x714fea55, 0xe35978fd, 0x5769b202, 0xeeb36653,
41914       0xd7593789, 0x0458258a },
41915     { 0xa042dbdf, 0x5df71a74, 0x5779dfa2, 0x2d405857, 0x0d2e6657,
41916       0x0e66cba7, 0xca2e892e, 0x285d6745, 0x0f0e6b5f, 0xf56a8def,
41917       0xa30767c3, 0xe0ee851d, 0x43346b9c, 0x98c05658, 0xd6b3c742,
41918       0xb35fce26, 0x39777e00, 0xc0895bff, 0xe7b6d886, 0x83c8f6a6,
41919       0x4f02904b, 0xbee14843, 0x2e84ec34, 0x7f74915b, 0x96d10991,
41920       0xbaaf663c, 0xe41facc0, 0x004b8757, 0x6f86c029, 0xa2b880e5,
41921       0x95b77358, 0x53f4a3e0 },
41922     { 0x89fc48e7, 0x11bb08ce, 0xafab5aeb, 0xba60c577, 0xa0c1cb5a,
41923       0xf06bcbf8, 0x79757cb6, 0x7d2efaea, 0x76319160, 0xe26d90b1,
41924       0x2b77b7a9, 0x42aa1ab6, 0x285df2bf, 0x38eec0cd, 0xf3a8f7f0,
41925       0xd35947f5, 0xfc1cb5b5, 0x97c8dc0e, 0xc45845cf, 0xfeb8cca0,
41926       0x249e26f2, 0x16e8d989, 0x483ed89a, 0x7c264e6d, 0x51d91073,
41927       0x13a3f145, 0x305e99f0, 0x8501562e, 0x6908d563, 0xaaf98d74,
41928       0xd723d236, 0x0a99e653 },
41929     { 0xabbc0559, 0x23536f46, 0x9aa1a160, 0xc163067b, 0x0c1681b5,
41930       0x229fd229, 0x1378e907, 0x61254be1, 0xab793a2d, 0xc60ff57a,
41931       0x466552db, 0xa6f2df8b, 0x8c170a36, 0x9ad31893, 0x29b74d9a,
41932       0xc5cd9abe, 0xf7848523, 0xcf747273, 0x0d0e3063, 0xc126a93a,
41933       0x4248e3d8, 0xfe2021e3, 0x8323ddfa, 0xd97343ee, 0x332639e7,
41934       0x9f768775, 0x75325548, 0x9650fc31, 0x3eebf7ea, 0xb595dbd1,
41935       0x010fcbc0, 0x3a95cb45 },
41936     { 0x39d7ff2e, 0x954e68cb, 0xc1d5c48f, 0x8dd1cb4b, 0x7169438a,
41937       0x02a92c77, 0x91cad8ce, 0x7965c0b0, 0x32cd08d2, 0x0c5798ab,
41938       0xa6902bda, 0x1a5bc3c3, 0x5186d218, 0x545d0925, 0xd27e64db,
41939       0xf0077cdb, 0x8cd092da, 0x0157caa4, 0x24532ab3, 0x2a2fa3a0,
41940       0x41ccaba3, 0xa5fb639b, 0x4744aee6, 0x01702dc1, 0xcdba93da,
41941       0x485bb436, 0x329784f1, 0x93597f66, 0xdad672c3, 0x5d713c1d,
41942       0x030b7245, 0x366d222e },
41943     { 0x573ea5b2, 0xd50b4875, 0xa90da44d, 0x0fce401b, 0x7a1a0310,
41944       0x7b53fa65, 0xcf114460, 0x722a80a5, 0xa538bf49, 0x0b8ebf05,
41945       0xd32acd21, 0xae141147, 0x7b5ad07d, 0x6692712c, 0x3f48ca07,
41946       0x6dc5fee7, 0x2b8a78d8, 0x98ed1499, 0xdd2f1759, 0x4e8b3145,
41947       0x5f971b8e, 0x43408de1, 0xadf1b368, 0x055ea6dd, 0xe5932b7e,
41948       0x4bb76e73, 0xd30893fd, 0x44287153, 0x0661bfda, 0x173dccd2,
41949       0x79defd25, 0x9072ba99 },
41950     { 0x9620ea39, 0x474de4dd, 0xc831cee8, 0xfbf1649f, 0xcd3a9c43,
41951       0x0b0e8bb1, 0x3f3df1d5, 0x6a38286f, 0x8f0ec9b3, 0x4ed072b3,
41952       0x729c09e3, 0xa6e4c987, 0x8ad12242, 0xea3e8ac6, 0xfbdfa5ba,
41953       0x6ae0e22b, 0xb0a0f592, 0x56171ecf, 0x6b871f8d, 0x33b2886d,
41954       0x35e11bda, 0x6b19bea9, 0x7f0f153f, 0x4d815a40, 0x7d6c02ee,
41955       0x7e608d97, 0xb6a88f46, 0x7e8f23d9, 0x439d1654, 0x26ac9652,
41956       0x35546c29, 0x8d92c6bd },
41957     { 0xabeb0ff7, 0xb3e0d7ce, 0x3e0e42f8, 0xfbe35254, 0xde808499,
41958       0x57d1b226, 0x1cd44bc3, 0x9ece2e1f, 0x435cfee1, 0x1245adbc,
41959       0xf93f581c, 0x874ee840, 0xbda0b947, 0x916a779c, 0xfa57ae0a,
41960       0xabcc815a, 0xf0a621b0, 0x97adec2d, 0x81f90bdc, 0xbe6a502b,
41961       0x53bde63d, 0x54bf9de1, 0x78884c25, 0xa88fdabf, 0xcbbb5470,
41962       0x30aa52b1, 0x29053ef5, 0xf805396c, 0x8dd827ea, 0x8d43d898,
41963       0x5c1ae5c0, 0x4e4bec17 },
41964     { 0xfcc09676, 0xbf8483a2, 0x19ea9a94, 0x457c4a3f, 0xd702a5dd,
41965       0xa6852ef3, 0x843fe7d8, 0xe7915fd2, 0x16e35158, 0x644bba98,
41966       0x9ed746f0, 0x8d1b95d0, 0xb90af0b5, 0x47704581, 0xd4fd135e,
41967       0x0bd4bc6b, 0xb4e833a5, 0xa6dce067, 0xff56a9a1, 0x2c0e8f30,
41968       0xec2c63fe, 0xa9c80800, 0x98f508a8, 0x449c20a5, 0x3292813a,
41969       0x02b94cb3, 0xec7e81a2, 0x647e3d28, 0xb4877677, 0x72e67d1a,
41970       0x6f9ded24, 0x7a4aa3f5 },
41971     { 0xe27a0045, 0x559ef1ba, 0xb242cb50, 0xdc812d4f, 0x39cf8d24,
41972       0x23a478e4, 0x9b3f9c54, 0x97544fc5, 0xaffa1fcf, 0x5ac68132,
41973       0x34a2c83b, 0x74f8fee0, 0xcd3f4bb7, 0x96cc640f, 0xb0512ea6,
41974       0x775dce9d, 0xcdce381e, 0x67dca19d, 0xa9d3fe55, 0xc1eeb3f3,
41975       0x1a19274f, 0x38e0bf42, 0x28d69b12, 0x15992fb4, 0x9fd09df8,
41976       0x48fcebde, 0xb41ab5df, 0xdc9dfa4f, 0xc0a269c5, 0x0cbd7dc8,
41977       0xf7f0ade1, 0x60282a7b },
41978     { 0xdceea2e7, 0x7c07e538, 0x3c42061d, 0x38a322c8, 0x4f1f6516,
41979       0x676828f9, 0xc7776a10, 0xf21b69fb, 0xb5e6b405, 0xc63a3417,
41980       0x91a7b642, 0x4c99f258, 0x2cad1440, 0x38692ca8, 0x00869bcd,
41981       0xf1e82ffe, 0x16fe466a, 0xc30b714e, 0x19019138, 0x5fb742f9,
41982       0x0fa516ae, 0xe90166d0, 0xd8c73a43, 0x5550f7ac, 0xfbc5c372,
41983       0x2d6a407d, 0x68cc39ed, 0xe47a7539, 0x4a5fbe70, 0x3fd286d9,
41984       0x23c6b942, 0x5f4ae9c7 },
41985     { 0x53f4d561, 0xd96a2dda, 0x16da1992, 0x286d45d0, 0xfdd4b051,
41986       0x449a01fb, 0x9f2195ea, 0x25488a0d, 0xa37661b3, 0xc4151b0a,
41987       0xf9e5ee02, 0xb98c471e, 0xa8658817, 0xa4bca86e, 0x7a68fc0a,
41988       0xbbcadb87, 0x6b7366a9, 0x88b34649, 0x15661c2d, 0x32ee98d4,
41989       0xc901420c, 0xf5b3b4c6, 0x2f2752af, 0xa2352735, 0x510e4d9c,
41990       0x2f64ce73, 0xaca4aa80, 0x939a7f26, 0x401aa503, 0x9cd3e291,
41991       0xdc46afd2, 0x92a01423 },
41992     { 0x1c2f7dbd, 0xe9f24be1, 0xb7d527fa, 0xda8c900f, 0x8648f128,
41993       0x963e25bb, 0x48141941, 0x9ab713e2, 0x7a6756fb, 0xe87f7d01,
41994       0x058d90bd, 0x274dd85e, 0x82566abd, 0x823fee7a, 0x74240195,
41995       0x9f6230d7, 0xacb5e46e, 0x04579f2c, 0x16a4c87e, 0x2a226263,
41996       0xd99b0857, 0x9ca19a43, 0xe488789e, 0x86dc2ba3, 0x9406c3bd,
41997       0xf960b5b9, 0x8960957e, 0x6f2c428b, 0x161c515b, 0x90748706,
41998       0xaa88cb9b, 0x0fc8fe1e },
41999     { 0xfeb90f2d, 0x68ae1bed, 0xa48b1559, 0xf393bb3c, 0xf64e9635,
42000       0x2be62f9c, 0xf8be75c2, 0x354c2410, 0x5e6f7529, 0xbd7ea703,
42001       0x162cab31, 0xc264868e, 0xc860f3ff, 0xb1391e70, 0x1d89837e,
42002       0xdf367c75, 0x2bf32941, 0xe150b6b4, 0x78c1318f, 0x95e8f46e,
42003       0xa2c4b160, 0x2b3f1dab, 0x701afbf3, 0xc6ccf5ce, 0x5e8874c5,
42004       0x3ad27530, 0x5dc6dcbe, 0x39285e51, 0xd99892dd, 0x3c954d86,
42005       0xdfd3789f, 0x2d0ba862 },
42006     { 0xb472e1af, 0xeacd8ee8, 0xb76abbcc, 0xeb354eae, 0xd0d93fbd,
42007       0x9b520bf8, 0xfe6fc706, 0xfccd60d7, 0xa4ee2f39, 0xa9353dde,
42008       0x9a81e51e, 0x5eb0925e, 0xd1366777, 0xee334da1, 0xd5354d69,
42009       0xc1d28c9f, 0x92a5ed54, 0xb9771755, 0xb7f70d81, 0x5d3e367f,
42010       0xa933ae7a, 0x7be7eeca, 0xe23cfbb7, 0x264cf1f9, 0x89497681,
42011       0x0d129f4a, 0x09b6235b, 0x705375a4, 0x48a376da, 0xccf64c75,
42012       0x4d41dbfc, 0x963c8712 },
42013     { 0xde36a814, 0xbae290cb, 0x733b12b5, 0x9bdb0195, 0xf77fe0e1,
42014       0x0ebad867, 0x29720cea, 0x0a7d19fd, 0x9029ec72, 0x434d7651,
42015       0xbb51911e, 0x856aff17, 0xd80a7f60, 0xd0a25d9a, 0xf848c106,
42016       0xffca86af, 0x43ad749c, 0x53e8bdf9, 0xe3e696bb, 0xfb9e0284,
42017       0xeeee4215, 0x3eb6630a, 0x2ecf3c63, 0x9d8fbb9e, 0x4e00c0c0,
42018       0x71da4ffa, 0x5d57beac, 0xb296be59, 0xa8cec7ef, 0x1751fbad,
42019       0xff55d7bd, 0x2d03eb3c },
42020     { 0x04f2ec1d, 0xeb16925f, 0x0d147ee2, 0xa878f276, 0xaad9d9e0,
42021       0x442df604, 0x3f71035b, 0x891df44b, 0x8cb95d5b, 0xc28272b3,
42022       0x5ee8ed23, 0x6f14efb5, 0x13b0f3e3, 0xf3c4460f, 0x6bd7335e,
42023       0x889f9bd7, 0xf755ba6e, 0x889ee771, 0xed219b6c, 0x626984fe,
42024       0xec2ee411, 0x2d44c737, 0x63efcd37, 0xb94385a2, 0x6637826b,
42025       0xd909321b, 0x3ee6b7a7, 0xc24f8a79, 0xa7cf61b7, 0xa3ca8d24,
42026       0xc54bacd9, 0x842e40c1 },
42027     { 0xa661d843, 0x5a268ed6, 0x4f5b30cd, 0x02328cca, 0x1311e177,
42028       0x16e6fed1, 0xc6695967, 0x690decb4, 0x57b2e280, 0xbdac5bf6,
42029       0x1efe42d0, 0x827f82ca, 0xca5fca2f, 0xc554ec0a, 0xdde45506,
42030       0xac5276c1, 0xe3077513, 0xb7f4cb08, 0xcc8797cc, 0x8caf6d9a,
42031       0x0d9332d2, 0xd5964814, 0x285a409f, 0xcc6ae297, 0x6223d093,
42032       0x7773c2a5, 0x5128fc09, 0x2d5266ac, 0xbc31fe6c, 0xa596b7cb,
42033       0xcac91328, 0x0e63319a },
42034     { 0xf0360ac2, 0xb5cd2fad, 0x285e605a, 0x86b660de, 0xe25b9b14,
42035       0x82c6cf10, 0xaa9ac554, 0x9d5fa38d, 0x526c070e, 0x3dfcf1b8,
42036       0x3fccc52d, 0x0379a96b, 0x0bfcc7f5, 0xe3659c29, 0x69d3e6a1,
42037       0x5b1a3db5, 0x9b7b42d5, 0xb41528b5, 0x9c22a006, 0x934defa4,
42038       0x9b4ce3b6, 0x90f38018, 0xb3abaf32, 0xb073bc04, 0xff8389e2,
42039       0x27a5a222, 0xffa5a35b, 0x0b7a9d51, 0x28e1a7c2, 0x4939ecef,
42040       0x1872705a, 0x88839da2 },
42041     { 0x701ce29a, 0x56b66c30, 0x58981d50, 0x3acaf126, 0x105f9f21,
42042       0xd4dafc0c, 0x373e3d13, 0xfee571e6, 0xfa2ee3ca, 0xe7269c86,
42043       0xdd20385a, 0xf5cca64a, 0x3000e9ac, 0x217f2757, 0x0e7273ef,
42044       0xc934db47, 0x355b6776, 0x4294f4f7, 0x6fc05180, 0x1faa36b9,
42045       0xb052190b, 0x8f88b1db, 0xe9eaef52, 0x35791b90, 0xdb681b90,
42046       0xf37fb2eb, 0x4415c369, 0x39d0a51d, 0x1d2e21c9, 0xfc59cca7,
42047       0xa1f50c26, 0x64128cfe },
42048     { 0xe8f5b0b5, 0xf03678a2, 0xd340f059, 0x5c7e249c, 0x93ca7cec,
42049       0x41440441, 0xbc83af98, 0x075ca346, 0xfaa8bbb0, 0xf39f0033,
42050       0xf38230f7, 0x3d18f0ed, 0xd448f345, 0x78dff00c, 0xd51aa475,
42051       0x849228c0, 0x30c928d1, 0xdd4e2708, 0x8f12cfd3, 0xc66ba686,
42052       0x88b3a206, 0x091049db, 0x016dae01, 0xd865d059, 0xe253e37d,
42053       0x4599e905, 0x7ce9871b, 0x322cf0c2, 0x174a132e, 0x014f54da,
42054       0xbdabcbda, 0x93634a09 },
42055     { 0xa9a2e304, 0x62826b27, 0xc1a4c124, 0xc57e1866, 0x22381710,
42056       0x913ab832, 0xa9847cfe, 0x7e9b6b85, 0x2b5f46fd, 0x29655cf1,
42057       0x8038e66d, 0x7295572b, 0x6fa95eab, 0xe4cba601, 0xb9deda81,
42058       0xbbc11071, 0x3f1cf61e, 0x97f0009a, 0x373e0cfb, 0x5372777b,
42059       0xd139d63b, 0x302f909c, 0x4f87d78e, 0x1ed672da, 0xb4048763,
42060       0x362077a3, 0x9dcc22b2, 0xc408c32d, 0x26deeee7, 0x4b4c5bf2,
42061       0xbc06357e, 0x266cb467 },
42062     { 0xb56363e8, 0x6faa4154, 0x3c1aa4db, 0x4b4fd078, 0x2b9e6597,
42063       0x14358dde, 0xfa004b84, 0x5b34ae3e, 0xf19911a6, 0xcf44b2ec,
42064       0xa536bf78, 0x55caa833, 0x8870dc95, 0x606e1eb9, 0x09f3511d,
42065       0xe3c3287d, 0x9d5cf364, 0x68b2f4eb, 0x63ab8c9e, 0xc154e892,
42066       0xc36ab611, 0x1548828e, 0xa1b7d120, 0x0932bfcb, 0x5315b8d7,
42067       0x7ee7b5bc, 0xf7473ac1, 0x782fd0d1, 0x3c8f2af3, 0xbcb029a8,
42068       0x52454ee1, 0x4b1d5a1b },
42069     { 0x63d52c0c, 0x12fe5174, 0x188c099d, 0x3735525e, 0x360e3956,
42070       0x5c621563, 0xacfa5a43, 0x88b3f1ca, 0x797e8107, 0x90123a0a,
42071       0xb15e080a, 0xba31f6b5, 0xfca3dada, 0xd7de5e12, 0x0df511c8,
42072       0x3287361b, 0x65757d4e, 0x7cc800d4, 0x5207ec91, 0x10810f3d,
42073       0x30eea0e3, 0x0d4e56f1, 0x3ea5a2ec, 0xbbf7ee13, 0xbe6abbd0,
42074       0x6fc07762, 0x120bf619, 0xc831fdce, 0xb622d42a, 0xe07439fa,
42075       0x508e4b27, 0x8186b93f },
42076     { 0x09312867, 0xc619d154, 0xbfaf7db4, 0x7e042c05, 0x1f5f5dda,
42077       0xc1cf1668, 0xa4fc3d82, 0x50aa5057, 0xce68b8fe, 0xed30ed65,
42078       0xbeb4d644, 0xecb01c0b, 0x831c0497, 0x7b5dc444, 0x9b7d9b1c,
42079       0x351e6a00, 0xd9477c91, 0x4bb863b9, 0x05d4110a, 0xaba65891,
42080       0x43580b7a, 0x30086cf4, 0x90be357e, 0xb139c076, 0x27b5214e,
42081       0x12bfff1a, 0x22c3ab57, 0x79cfc6d7, 0xf34a9bfa, 0x4743de57,
42082       0xc9ee2b2a, 0x0bf97e97 },
42083     { 0xdda19e96, 0x96ec4ec8, 0x6c306e8b, 0x54ce18ea, 0x65f6918a,
42084       0x7e83612b, 0x0d9a0d99, 0x1ac6f68b, 0x62fdcc09, 0x98a697a4,
42085       0x95bc3e13, 0x65ce25f1, 0xb3939730, 0x1896ecda, 0x32f12806,
42086       0x9eb81a0f, 0x1d2dc7df, 0xd3d7416e, 0xad473599, 0xe22c7976,
42087       0x9f5ef439, 0x3de37a9a, 0x9e69d94e, 0x6b7ac0ab, 0x0a9d0bc8,
42088       0xe6bfa9e0, 0x5676f120, 0x576a870d, 0xfeaac23f, 0x3bd91bb4,
42089       0x3e40aabb, 0x8fe5482c },
42090     { 0xce9a4d1e, 0x85ae67c2, 0x4f1d2038, 0x4c3eb803, 0x25d06192,
42091       0x5c6c8f3a, 0x308fb41c, 0x803de0ad, 0xe71c294e, 0x9961f5bc,
42092       0xf02eb0da, 0xdc62078d, 0xb64ae8b6, 0xc87ef515, 0x50b4d18f,
42093       0x69679f1e, 0x52199f43, 0xc5c009a1, 0x0f640a5f, 0xa7d484be,
42094       0x23dab566, 0x4c918bb1, 0x64275d2c, 0xa67c114c, 0xcad2ded6,
42095       0x95a913b9, 0x6b4b5c8d, 0x189ed18b, 0xb42d3bf6, 0x4aeb6206,
42096       0xbbc8bc3f, 0x3928c669 },
42097     { 0xdacb4b64, 0xde4bea4a, 0xf26179a1, 0x03f62a44, 0x7a9112a4,
42098       0xf3aac94e, 0xd36f331e, 0x90448fbd, 0x407b85c4, 0x426042bc,
42099       0x2121b77b, 0x5ad8a596, 0x67cee984, 0x31674a4f, 0x4e3b2f0d,
42100       0x7fae8bbe, 0xa7c930eb, 0x681df6dd, 0xc259d0d4, 0xadeefa98,
42101       0xbea1c1fd, 0x1b14d9e6, 0x21d405d1, 0x3baadc8b, 0x73892754,
42102       0xf01dff93, 0xf071cde4, 0x81c35b3e, 0x9150d0d9, 0x1704d2e1,
42103       0x355134f6, 0x6ccc888f },
42104     { 0x7ad7504c, 0xf8d36f0e, 0xf7959ddd, 0xbca3265f, 0xfede67aa,
42105       0x0dcd1ede, 0xbaebf32f, 0x1276f4ce, 0x014edcfc, 0x6825a6e6,
42106       0x99ad8eb7, 0x0b8c1a82, 0x09b8ce1e, 0x312024a9, 0x9cbd351a,
42107       0xcb8fd98b, 0xfab1e8be, 0xa4841378, 0x3973cacf, 0x17ed0f5d,
42108       0x259d5254, 0xa17e1484, 0x74b91393, 0x53d5b843, 0x1aca3ce9,
42109       0x8f792b21, 0xc8c0f815, 0x035ff110, 0xad4ed7bd, 0x6afa6357,
42110       0xb26faef9, 0x2f151980 },
42111     { 0x29d2d439, 0x0c8631da, 0xbc039955, 0x121fbbc2, 0x6c05b75b,
42112       0x3e5a9792, 0xb6ce47ec, 0x6d6cf4c0, 0x9d88c658, 0xbaaa1767,
42113       0xf3355a17, 0x031db9e7, 0x0aef5a85, 0x8381e3d8, 0x15a31bdf,
42114       0xc71db290, 0x9498fd7d, 0x638f6b74, 0x13beeef6, 0x44edf3f9,
42115       0xf4ab67b3, 0xe6173271, 0xfd22df11, 0x3a202c70, 0x205c4e92,
42116       0xf7be0389, 0xa8eb9920, 0x1c219085, 0xbeb54aaa, 0x6c805ce8,
42117       0x0ac58d65, 0x354b05b7 },
42118     { 0x7a9170e9, 0x7171e236, 0x4cad50cd, 0x01eec42d, 0x3cddccfb,
42119       0xffbe824f, 0xa66cae1a, 0xa73e8ce3, 0x965c7d01, 0xb7138a7f,
42120       0x5c3d971e, 0x00058e3f, 0x2ff0a72b, 0x52591ac3, 0xbbbce76f,
42121       0xa32fb5bc, 0xa9f81a18, 0xf3241ab8, 0xeca68630, 0xf31d3332,
42122       0x4482f13b, 0x847af9fc, 0xa4681be2, 0x6196e217, 0xe55efcf9,
42123       0x9938f932, 0x70acc705, 0x3e7dacb8, 0xcf09fac2, 0xd41be893,
42124       0xae3523a1, 0x48dc55c4 },
42125     { 0xa5092193, 0x8e623826, 0x6898970c, 0xe46ec362, 0x25c9eb41,
42126       0x2f1356af, 0x83c7d245, 0x41780640, 0x97d00e38, 0x982def67,
42127       0xa512151c, 0x382eb6e7, 0x8af58869, 0x154e1077, 0x8a51cf02,
42128       0x18707075, 0x71313c58, 0xcdeba9f7, 0xba155904, 0x5d67b973,
42129       0x1d0d7b3a, 0x851c9f4b, 0x8b8af2cd, 0x19f29d71, 0x986b8d62,
42130       0xcb94ccff, 0xb93b9c33, 0x8725e24b, 0x66e38c68, 0x405ce4c5,
42131       0x0b6dc021, 0x5f6a8edd },
42132     { 0x8f9a8690, 0x83704ca5, 0x2f76a407, 0x3f369766, 0x69201028,
42133       0xfbc12d8c, 0xbce3a4cf, 0x4cd58f16, 0x04aab26d, 0x7804664a,
42134       0x4ea457a8, 0x005cfbba, 0xb8a59794, 0x537951b3, 0x4fe1f739,
42135       0x4ca2b9e4, 0xdf325797, 0xe4428acd, 0x0ea243db, 0x648da342,
42136       0xf43ce01e, 0xcce6562b, 0xf27db490, 0x840f0421, 0x8bfb7cf0,
42137       0x156ccb70, 0x5a8797d3, 0x9b33480d, 0x9eb814bb, 0x2e12e07a,
42138       0xca7f87ac, 0x1ca65072 },
42139     { 0x2b9d25a0, 0xfbb321cf, 0x40a746db, 0x66affdca, 0x59e368b5,
42140       0xc1c1530e, 0x7d80068f, 0x56ed1ea4, 0x5647dd68, 0x9b74d8fe,
42141       0x89b78da8, 0x1d96b507, 0x8bbe3391, 0x39b75243, 0x0d858c5f,
42142       0xef8d443e, 0x9646aa34, 0x4dd2db49, 0xe667543c, 0x7fad3bd1,
42143       0x68980985, 0xd0d710c0, 0x49facaba, 0x9f7aff32, 0x14f9a192,
42144       0x055dec1c, 0x1fb307a1, 0xaca66399, 0x35ffff64, 0xac44fd91,
42145       0xcbad3cee, 0x462cafb6 },
42146     { 0xde3237dd, 0x1660a647, 0x82b87404, 0x95f735cc, 0xddfa55f8,
42147       0xf7879f59, 0x726b914a, 0x15ef043e, 0x1c93e298, 0x1875393d,
42148       0x6ef18331, 0xa1a2be74, 0x25a9a12b, 0x4e7e8dfc, 0xa9c3917f,
42149       0xdfefc97d, 0x0a2ebe41, 0xbc875d03, 0xa732d1cc, 0x0f75d235,
42150       0xd9baa6d3, 0x06fee7fe, 0x65f48576, 0xaa784fab, 0x513f83c0,
42151       0x23155e22, 0x3e8f9d13, 0xd2fb7718, 0xb546eafd, 0x2a291503,
42152       0x6cd93608, 0x1293c98c },
42153     { 0x49d53b77, 0x72781251, 0x96eafac7, 0xa6ab403d, 0x4a36b711,
42154       0xb7d7c7db, 0x87e771c1, 0x8238c708, 0x33b37522, 0x495f6abf,
42155       0x8c87530d, 0xb0b0289c, 0xe77b111a, 0xca83cb86, 0xa1bd189e,
42156       0xbe1c0fb8, 0x1ae9d7c7, 0x58cfb2fb, 0x4940c3e8, 0xd05c23c5,
42157       0x74ad9107, 0x16e79e41, 0x064e7142, 0xa0a47f05, 0xfdfd614f,
42158       0xc6929cd4, 0x3946988b, 0xedb2584c, 0xe46f8fb1, 0x73e4b5f3,
42159       0x68ea94ba, 0x53b79aa1 },
42160     { 0x44bbb6a1, 0x216fafce, 0x67821728, 0xd3a5bba0, 0xa9dd939a,
42161       0xef1e4b30, 0xf19efafe, 0x022eaf3d, 0x7b4ec014, 0xfed5abce,
42162       0x512c6738, 0x64968ee6, 0x29fe89a2, 0x23119869, 0x47397c05,
42163       0x0d539d8d, 0x234596c4, 0x6400bc54, 0x5346611d, 0xb9287f58,
42164       0xc9d5da0f, 0x04099903, 0xc83af2a8, 0xe5ef4997, 0x328151e1,
42165       0xc89dc01b, 0x58401104, 0x150fb4a9, 0xf3872c9d, 0x40a6f7d5,
42166       0x56c2e833, 0x8290d6d1 },
42167     { 0xd8546946, 0xf84637c6, 0x69ec57fa, 0xda134a39, 0xd789007e,
42168       0xd42359a4, 0x0dc7b809, 0xb42557fe, 0x2d6784a9, 0xe62ae52d,
42169       0x0bcadb5f, 0xa2714ca6, 0x33aafca5, 0xcc208de6, 0xed967811,
42170       0x2380ed5c, 0xdb321660, 0x6e6b55e9, 0xa675235a, 0x1bead02c,
42171       0xb33fa0e1, 0x51cc6ef9, 0xf06a2a08, 0xfd223e26, 0xec47b3cf,
42172       0x00f332e1, 0xa0aa984e, 0x459f297b, 0xee952e14, 0x6fa1d969,
42173       0x304fabb0, 0x506ef1ab },
42174     { 0x35bff163, 0x11b4eb27, 0xea9fa984, 0x7130b96f, 0x9deb27ce,
42175       0x66aceb3f, 0x9dd1c3d5, 0xa2daf1a5, 0xa73075aa, 0xf5090a7e,
42176       0xe3071b58, 0x36a6af39, 0xdf73ad9c, 0xa28d633d, 0xbdc89a16,
42177       0xdd354cac, 0xd4dcbc3c, 0xdfea3423, 0x379d92d1, 0x6eec74d2,
42178       0x8eed6765, 0xe14a456f, 0xfa8feb1f, 0xfabe7743, 0xb98fcbc7,
42179       0x1404ccf8, 0xf71a706e, 0x6ccd2fbf, 0x4d85c678, 0xdaaf3fdb,
42180       0x15200344, 0x415b7dbf },
42181     { 0x7d8377a7, 0x97010586, 0xcb803272, 0x068a3d68, 0xf03a4c32,
42182       0xfd67d289, 0x93c8f290, 0x4bc7095d, 0xe9e5a2b8, 0x712fa13c,
42183       0x0feb9f3b, 0xfc6ac6c6, 0x6e0e54c2, 0x0cda36d9, 0x86320a01,
42184       0x45499751, 0x97f00f11, 0xf9318c91, 0xe6936508, 0x01dc4c3f,
42185       0x85f068aa, 0x769a2ef9, 0xa2b5511c, 0x3522cef0, 0xb4122e05,
42186       0x006965ed, 0xc175d43f, 0xfce0fafc, 0xec831d59, 0x525dc9bd,
42187       0xaf58879d, 0x1ec314f1 },
42188     { 0x2c8310c2, 0x0663feef, 0x457e3f74, 0xaa7e14da, 0xe5346887,
42189       0x392b10fc, 0x637ec2c5, 0xcde4a38f, 0xb542f8df, 0x50773320,
42190       0xf7de1711, 0x341302f9, 0xae4b9bc6, 0x018b1c63, 0xdd2f9e6f,
42191       0xf001c46e, 0x26eccfa0, 0xd3bb0a97, 0x7746e0c7, 0xa931b99d,
42192       0xf5875aec, 0xe0c8b6f7, 0x96939c82, 0xbb32f17c, 0x3de5a664,
42193       0x765135d2, 0x52abfa6b, 0x71936cb4, 0x2dc105de, 0xad5cc08f,
42194       0x7fff5788, 0x17e91d12 },
42195     { 0xb7e051ca, 0xbe92ced3, 0x19c776d4, 0xc644d4fd, 0x0086784b,
42196       0xc8ab4b52, 0xce9d6b31, 0x3ea66227, 0xd289e9c7, 0x395249a3,
42197       0xd12a19ee, 0x54509e65, 0x8c365aec, 0xa7bd4692, 0x77963e0e,
42198       0x354997e4, 0xb599732d, 0x0d765957, 0x91d4a3b6, 0x99584aeb,
42199       0x1deb3e28, 0x6e653ea4, 0x572571df, 0xca7c98ed, 0xb18ae1f9,
42200       0xf301a38f, 0x63f7b97e, 0x1629f7c2, 0xafc4a0d5, 0xdf242282,
42201       0x3ddd0c01, 0x118f3b4b },
42202     { 0x7ad4762b, 0x74a0a0a8, 0x8c58d175, 0x1aef84da, 0x4cf76d86,
42203       0x16ff4960, 0x7e60d98b, 0xc0be8786, 0x3ecc1dba, 0x83637ffb,
42204       0x5dd6147a, 0xc244a609, 0x5b0846e5, 0xa3e17834, 0xe77a4c05,
42205       0x735eb686, 0xdf758695, 0x5bc18b4f, 0x1bdfe52f, 0x15618d0b,
42206       0x00715ba1, 0x878ecc0d, 0xc2dd617f, 0x1dbdbd1a, 0x21b61710,
42207       0x21d2b631, 0x44f593c2, 0x22ce8a79, 0x44f17024, 0x3b9b536a,
42208       0x8d03e727, 0x01d0a67c },
42209     { 0x1e46533c, 0x7b964236, 0xfb88c2ae, 0xe9477990, 0xa42c4a18,
42210       0x019b5d16, 0xd83c7a45, 0x7135e81d, 0x4cb663e3, 0x74a69bdd,
42211       0xe76c0d63, 0x7b67ecdb, 0x11e68da6, 0x03d54521, 0xd2e8650a,
42212       0x596cceb5, 0x2af03b37, 0xcd572dfd, 0xfabd5952, 0x52364ba1,
42213       0xb4ed8569, 0x7f47d456, 0xc950d5d4, 0x5ad8b572, 0x486e2f84,
42214       0xcadd2dfa, 0xc56bb044, 0xdd527b43, 0x997c08e6, 0xc9adba24,
42215       0x7da6320f, 0x1b625b06 },
42216     { 0x4fd8446d, 0x44dfaa7b, 0xaf6febeb, 0xc01b2f01, 0xfe8838b5,
42217       0xbf444388, 0xbba9758b, 0xf33c434f, 0x87156bc9, 0x2b971cba,
42218       0x1f49098b, 0x6b245e5c, 0x2b41c5dd, 0x87dcb534, 0x34d852d7,
42219       0xdb1f80c6, 0x2433da34, 0x6d6e3258, 0x3f7df0c2, 0xf6682065,
42220       0x360cb365, 0xc4ca567c, 0x9826656a, 0x321faac2, 0xbf069768,
42221       0x13f5ca6f, 0xa7076639, 0x15397921, 0x8400736e, 0xbdf14328,
42222       0x19fc948d, 0x333eca96 },
42223     { 0xac775d81, 0x23337948, 0xd41dbbca, 0x38c2518f, 0xbcfce948,
42224       0x623c7a4f, 0x54703fe7, 0xaad36236, 0x13fb3b5b, 0x2b3a13a4,
42225       0x7f5c01f0, 0x5db3565a, 0x52359661, 0xd72408dc, 0x1d616e91,
42226       0x5a17f8e5, 0xcb25b999, 0x90c16eeb, 0x3393743e, 0xf35e8cf1,
42227       0xe54b64a7, 0x987da74a, 0x65cd449d, 0x557b322a, 0x37e7b15d,
42228       0x765082a5, 0xf2cd134f, 0x4d25c742, 0x4ccf0746, 0xae9d9c07,
42229       0x8728d135, 0x72fc2110 },
42230     { 0xf96004c8, 0xa906b203, 0x458055ff, 0xd83f95cf, 0x55f35909,
42231       0xd77d5867, 0xe550c8ee, 0x4a9ea6fb, 0x55a06081, 0x91c8cca9,
42232       0xbce82062, 0x4a1fee78, 0x9a3df85e, 0xeb9ade06, 0x7d3de666,
42233       0xfbbdcf0c, 0x5d336d51, 0x228a391b, 0x5c2ffc3c, 0x760f8d28,
42234       0x2f7b165b, 0x1ee48de3, 0x56177040, 0x03803d84, 0x9deff9a0,
42235       0xe573f648, 0xa17e35a4, 0xe1a2738e, 0x8840a6c6, 0x238ef17c,
42236       0xb11ed92d, 0x480946f8 },
42237     { 0xfd71f119, 0x84c747a8, 0x53eb3695, 0x19e65c5e, 0x6298587a,
42238       0x0e2f6786, 0xab18d6f4, 0x48a48899, 0xc630b8c0, 0xa1a99024,
42239       0x2caaf892, 0x84975096, 0xe20fd624, 0xc8869aba, 0x6c2b7dd4,
42240       0x3b72b04d, 0x0992f7d0, 0xe2775eb6, 0x7d06e684, 0x0089c06e,
42241       0xe4bbd007, 0xcb3b4361, 0x4ba846e4, 0xa1ae666b, 0x46464d9e,
42242       0xc01c2eb2, 0xc1f8539f, 0xf86f2be6, 0xcf68afc7, 0x16e8e8ae,
42243       0xc7386902, 0x8dab61fd },
42244     { 0xd54d1d45, 0x42a5c903, 0xff4f9ba2, 0xacd4297e, 0x34d478b4,
42245       0x2d88b520, 0x08c4621a, 0x35b2ba2b, 0x34865402, 0xd3d239bb,
42246       0x911f32e6, 0x1de76aed, 0x3f06fdc2, 0x877f8bcf, 0x9ec51502,
42247       0x802714c1, 0xa590700d, 0xa10444eb, 0x31dcc957, 0x8694229f,
42248       0xb8169fed, 0x5ece77ab, 0x2caf080e, 0x55be8a15, 0xcbd7cef1,
42249       0x3eb21b14, 0x67b97ee1, 0x9def7ad1, 0x118f690c, 0xe03ca879,
42250       0xf99b29e7, 0x6f77e62d },
42251     { 0xe40bbf59, 0xa271bded, 0x6401aad6, 0x177ba453, 0x73541cd1,
42252       0x1755e035, 0x4b71b02f, 0x3465b466, 0xa813359f, 0x22eb7113,
42253       0x6f38eac7, 0x9792a8fd, 0xff3bf3b5, 0x11aa012f, 0xf85c3fbf,
42254       0x99aafabf, 0x06c0cc42, 0x91e0a2ef, 0x773b7b3a, 0x314d5d57,
42255       0xd669840a, 0xae5e2e76, 0x2e5a8be6, 0x86136073, 0xc1cf5580,
42256       0xee6d7578, 0x68bed102, 0x2344e00f, 0x8184f0eb, 0x799d7886,
42257       0xc3d2cf80, 0x63819c91 },
42258     { 0x7884b073, 0xca5392e1, 0xeb1267ea, 0x9ec3a1fc, 0x907038a7,
42259       0x3d07f5f0, 0xe4c47b70, 0xcb2ac07c, 0x1bf96b91, 0xf96664ee,
42260       0x2aea4fbf, 0xebf57589, 0xfade6500, 0x5aabf391, 0x171d1204,
42261       0xc5b3376f, 0xa0d3d81a, 0x1ff60c51, 0x976a844b, 0x10b2cfe7,
42262       0xbda6125a, 0xe131cc9a, 0x4ebd453e, 0xe0fc16d3, 0x504b6bc1,
42263       0xc0d0319a, 0x0a2f8cab, 0xe43a0be7, 0x55e49b47, 0xc80afeec,
42264       0x8265d7ee, 0x67d48d12 },
42265     { 0xea2d56d6, 0x068d59a7, 0x27480a63, 0xd71abd0e, 0xae7366cd,
42266       0x6bd11db0, 0x07204ebc, 0xfbb639ca, 0xf77e6293, 0x89a242e7,
42267       0x75ba8c3d, 0xdee7ca2b, 0x64a2f9a8, 0x472ddc3d, 0x7561a010,
42268       0x84229df4, 0xc5b649d4, 0x95f62c85, 0x4dc927cd, 0xfdd56b1b,
42269       0x5ee60596, 0xfe8bb120, 0xabf29401, 0x3efcaa50, 0x10d1c184,
42270       0xd4900d0f, 0x28b01df5, 0x2cf113a9, 0x1f0e43f5, 0xa3d7ebc3,
42271       0xe8384dc7, 0x27950e38 },
42272     { 0xe1d0fa79, 0xeab21ff0, 0x048b5de9, 0x4b9fd033, 0x2fe374cb,
42273       0x4c934689, 0x4eb21f6b, 0xbb4827fa, 0xa925e7e7, 0x46716f79,
42274       0x7dd4c531, 0x1442bf36, 0xd2e96ddf, 0x2073954c, 0x8502aa89,
42275       0x4e0141ae, 0x8eef6cc9, 0x8ee00e1a, 0x5880cdaf, 0x55ce8491,
42276       0x69628046, 0xff3aba5c, 0x5d15dfbf, 0x335cc4f8, 0x9f684f25,
42277       0xa7f0440c, 0xbb1e5bd8, 0xae80453f, 0xff2225ab, 0xa1c99813,
42278       0x79b25d71, 0x54ff7884 },
42279     { 0xde40b068, 0x27c6ee30, 0xe6f3a51e, 0x9226465b, 0xfa3b21f6,
42280       0xe24a4604, 0xc0418115, 0x50a5a5ad, 0x8df90d2b, 0xe3285441,
42281       0xdcb0c00f, 0xbb74e58f, 0x4a2c08e3, 0xc68f1b3b, 0x0ccd9ec9,
42282       0x339df081, 0xb786ea9f, 0x915362dc, 0xc955aead, 0x28945e31,
42283       0x8b6a6c6b, 0xd6a2c01d, 0x3678a427, 0x069e82dc, 0x28c9302c,
42284       0x17875500, 0x9fa101e6, 0x8acda965, 0xee30b286, 0x4e4e4573,
42285       0x3f1830fe, 0x8adbad85 },
42286     { 0x0969d524, 0x060ae11f, 0xf39bcc79, 0xf42fdaf7, 0x7cc1fcc2,
42287       0x3cec6766, 0xe2336d4f, 0x456b9cf2, 0x8e1c0f7f, 0x6aa1f5de,
42288       0x0984fb0e, 0xcdbc2ad2, 0x1b464b28, 0x4090cfa6, 0x1243f3ef,
42289       0x40d86f30, 0xcd5e87e7, 0x95b16ccc, 0x3026cd41, 0x403f168c,
42290       0x816c0730, 0xdbe386cb, 0x58407a1d, 0x14eb86f3, 0x1717e1af,
42291       0xf588b4f8, 0x66cbc96c, 0xb75c41a6, 0x027e71c1, 0xf342c1aa,
42292       0xc0945e5f, 0x73930036 },
42293     { 0x22cdaf42, 0x954f757d, 0xf4181aab, 0x788b591d, 0xf5514f25,
42294       0x8b986819, 0xf18fd5bc, 0x69642e08, 0x022ceb91, 0x92b305d1,
42295       0x6a4f6985, 0x1715903e, 0x61179cae, 0x4bd7d69d, 0xd29c01aa,
42296       0xdacdfd5d, 0xd91108cc, 0x705ddd5a, 0x64ac8f15, 0x434ac7b1,
42297       0xb524632f, 0x61a514e1, 0x731fc447, 0x45b9e61b, 0xe0961b31,
42298       0xcf561348, 0x73eaf223, 0x9c28a967, 0xaa7c99d3, 0x5bd10182,
42299       0xe42965e2, 0x8bc6ec4a },
42300     { 0xe7f2a32b, 0xd096e5c0, 0x09388a30, 0xff54800c, 0x401e360c,
42301       0x06fe437c, 0xbb6054a6, 0x6655fc9c, 0x8457aa6e, 0x510e1860,
42302       0x2b29b2b7, 0xa0acfca2, 0x51b7da61, 0x732483e3, 0x6be6c8ca,
42303       0xe31471ee, 0x8b65c9a1, 0xe565431c, 0x48d65cbb, 0xfc9ac3b9,
42304       0xae9b2aa8, 0xd308fc21, 0xaa60aa6a, 0xd6a7df0d, 0x982fc0d4,
42305       0x2844d96a, 0x5847a4d7, 0xab012c2c, 0xdceb8955, 0x2b3c8f71,
42306       0xbe9c7e15, 0x8e85437d },
42307 };
42308 
42309 /* Perform the modular exponentiation in Fp* for SAKKE.
42310  *
42311  * Base is fixed to be the g parameter - a precomputed table is used.
42312  *
42313  * Striping: 128 points at a distance of 8 combined.
42314  * Total of 256 points in table.
42315  * Square and multiply performed in Fp*.
42316  *
42317  * base  [in]   Base. MP integer.
42318  * exp   [in]   Exponent. MP integer.
42319  * res   [out]  Result. MP integer.
42320  * returns 0 on success, MP_READ_E if there are too many bytes in an array
42321  * and MEMORY_E if memory allocation fails.
42322  */
sp_ModExp_Fp_star_1024(const mp_int * base,mp_int * exp,mp_int * res)42323 int sp_ModExp_Fp_star_1024(const mp_int* base, mp_int* exp, mp_int* res)
42324 {
42325 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
42326     !defined(WOLFSSL_SP_NO_MALLOC)
42327     sp_digit* td;
42328     sp_digit* t;
42329     sp_digit* tx;
42330     sp_digit* ty;
42331 #else
42332     sp_digit t[4 * 2 * 32];
42333     sp_digit tx[2 * 32];
42334     sp_digit ty[2 * 32];
42335 #endif
42336     sp_digit* r = NULL;
42337     unsigned char e[128];
42338     int err = MP_OKAY;
42339     int i;
42340     int y;
42341 
42342     (void)base;
42343 
42344 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
42345     !defined(WOLFSSL_SP_NO_MALLOC)
42346     td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 32 * 2, NULL,
42347                             DYNAMIC_TYPE_TMP_BUFFER);
42348     if (td == NULL) {
42349         err = MEMORY_E;
42350     }
42351 #endif
42352 
42353     if (err == MP_OKAY) {
42354 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
42355     !defined(WOLFSSL_SP_NO_MALLOC)
42356         t  = td;
42357         tx = td + 4 * 32 * 2;
42358         ty = td + 5 * 32 * 2;
42359 #endif
42360         r = ty;
42361 
42362         (void)mp_to_unsigned_bin_len(exp, e, 128);
42363 
42364         XMEMCPY(tx, p1024_norm_mod, sizeof(sp_digit) * 32);
42365         y  =  e[112] >> 7;
42366         y |= (e[96] >> 7) << 1;
42367         y |= (e[80] >> 7) << 2;
42368         y |= (e[64] >> 7) << 3;
42369         y |= (e[48] >> 7) << 4;
42370         y |= (e[32] >> 7) << 5;
42371         y |= (e[16] >> 7) << 6;
42372         y |= (e[0] >> 7) << 7;
42373         XMEMCPY(ty, sp_1024_g_table[y], sizeof(sp_digit) * 32);
42374         for (i = 126; i >= 0; i--) {
42375             y  =  (e[127 - (i / 8)] >> (i & 0x7)) & 1;
42376             y |= ((e[111 - (i / 8)] >> (i & 0x7)) & 1) << 1;
42377             y |= ((e[95 - (i / 8)] >> (i & 0x7)) & 1) << 2;
42378             y |= ((e[79 - (i / 8)] >> (i & 0x7)) & 1) << 3;
42379             y |= ((e[63 - (i / 8)] >> (i & 0x7)) & 1) << 4;
42380             y |= ((e[47 - (i / 8)] >> (i & 0x7)) & 1) << 5;
42381             y |= ((e[31 - (i / 8)] >> (i & 0x7)) & 1) << 6;
42382             y |= ((e[15 - (i / 8)] >> (i & 0x7)) & 1) << 7;
42383 
42384             sp_1024_proj_sqr_32(tx, ty, t);
42385             sp_1024_proj_mul_qx1_32(tx, ty, sp_1024_g_table[y], t);
42386         }
42387     }
42388 
42389     if (err == MP_OKAY) {
42390         sp_1024_mont_inv_32(tx, tx, t);
42391         sp_1024_mont_mul_32(r, tx, ty, p1024_mod, p1024_mp_mod);
42392         XMEMSET(r + 32, 0, sizeof(sp_digit) * 32);
42393         sp_1024_mont_reduce_32(r, p1024_mod, p1024_mp_mod);
42394 
42395         err = sp_1024_to_mp(r, res);
42396     }
42397 
42398 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
42399     !defined(WOLFSSL_SP_NO_MALLOC)
42400     if (td != NULL) {
42401         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
42402     }
42403 #endif
42404     return err;
42405 }
42406 
42407 #endif /* WOLFSSL_SP_SMALL */
42408 /* Multiply p* by q* in projective co-ordinates.
42409  *
42410  *   p.x' = (p.x * q.x) - (p.y * q.y)
42411  *   p.y' = (p.x * q.y) + (p.y * q.x)
42412  * But applying Karatsuba:
42413  *   v0 = p.x * q.x
42414  *   v1 = p.y * q.y
42415  *   p.x' = v0 - v1
42416  *   p.y' = (px + py) * (qx + qy) - v0 - v1
42417  *
42418  * px  [in,out]  A single precision integer - X ordinate of number to multiply.
42419  * py  [in,out]  A single precision integer - Y ordinate of number to multiply.
42420  * qx  [in]      A single precision integer - X ordinate of number of
42421  *               multiplier.
42422  * qy  [in]      A single precision integer - Y ordinate of number of
42423  *               multiplier.
42424  * t   [in]      Two single precision integers - temps.
42425  */
sp_1024_proj_mul_32(sp_digit * px,sp_digit * py,const sp_digit * qx,const sp_digit * qy,sp_digit * t)42426 static void sp_1024_proj_mul_32(sp_digit* px, sp_digit* py,
42427         const sp_digit* qx, const sp_digit* qy, sp_digit* t)
42428 {
42429     sp_digit* t1 = t;
42430     sp_digit* t2 = t + 2 * 32;
42431 
42432     /* t1 = px + py */
42433     sp_1024_mont_add_32(t1, px, py, p1024_mod);
42434     /* t2 = qx + qy */
42435     sp_1024_mont_add_32(t2, qx, qy, p1024_mod);
42436     /* t2 = (px + py) * (qx + qy) */
42437     sp_1024_mont_mul_32(t2, t1, t2, p1024_mod, p1024_mp_mod);
42438     /* t1 = py * qy */
42439     sp_1024_mont_mul_32(t1, py, qy, p1024_mod, p1024_mp_mod);
42440     /* t2 = (px + py) * (qx + qy) - (py * qy) */
42441     sp_1024_mont_sub_32(t2, t2, t1, p1024_mod);
42442     /* px = px * qx */
42443     sp_1024_mont_mul_32(px, px, qx, p1024_mod, p1024_mp_mod);
42444     /* py = (px + py) * (qx + qy) - (py * qy) - (px * qx) */
42445     sp_1024_mont_sub_32(py, t2, px, p1024_mod);
42446     /* px = (px * qx) - (py * qy)*/
42447     sp_1024_mont_sub_32(px, px, t1, p1024_mod);
42448 }
42449 
42450 #ifndef WOLFSSL_SP_SMALL
42451 /*
42452  * Convert point from projective to affine but keep in Montgomery form.
42453  *
42454  * p  [in,out]  Point to convert.
42455  * t  [in]      Temporary numbers: 2.
42456  */
sp_1024_mont_map_32(sp_point_1024 * p,sp_digit * t)42457 static void sp_1024_mont_map_32(sp_point_1024* p, sp_digit* t)
42458 {
42459     sp_digit* t1 = t;
42460     sp_digit* t2 = t + 2 * 32;
42461 
42462     sp_1024_mont_inv_32(t1, p->z, t2);
42463     sp_1024_mont_sqr_32(t2, t1, p1024_mod, p1024_mp_mod);
42464     sp_1024_mont_mul_32(t1, t2, t1, p1024_mod, p1024_mp_mod);
42465     sp_1024_mont_mul_32(p->x, p->x, t2, p1024_mod, p1024_mp_mod);
42466     sp_1024_mont_mul_32(p->y, p->y, t1, p1024_mod, p1024_mp_mod);
42467     XMEMCPY(p->z, p1024_norm_mod, sizeof(sp_digit) * 32);
42468 }
42469 
42470 #endif /* WOLFSSL_SP_SMALL */
42471 /*
42472  * Calculate gradient of line through P, P and [-2]P, accumulate line and
42473  * double P.
42474  *
42475  * Calculations:
42476  *   l = 3 * (p.x^2 - p.z^4) = 3 * (p.x - p.z^2) * (p.x + p.z^2)
42477  *   r.x = l * (p.x + q.x * p.z^2) - 2 * p.y^2
42478  *   r.y = 2 * p.y * p.z^3 * q.y (= p'.z * p.z^2 * q.y)
42479  *   v* = v*^2 * r*
42480  *   p'.x = l^2 - 8 * p.y^2 * p.x
42481  *   p'.y = (4 * p.y^2 * p.x - p'.x) * l - 8 * p.y^4
42482  *   p'.z = 2 * p.y * p.z
42483  *
42484  * @param  [in,out]  vx  X-ordinate of projective value in F*.
42485  * @param  [in,out]  vy  Y-ordinate of projective value in F*.
42486  * @param  [in,out]  p   ECC point - point on E(F_p^2) to double.
42487  * @param  [in]      q   ECC point - second point on E(F_P^2).
42488  * @param  [in]      t   SP temporaries (6 used).
42489  */
sp_1024_accumulate_line_dbl_32(sp_digit * vx,sp_digit * vy,sp_point_1024 * p,const sp_point_1024 * q,sp_digit * t)42490 static void sp_1024_accumulate_line_dbl_32(sp_digit* vx, sp_digit* vy,
42491         sp_point_1024* p, const sp_point_1024* q, sp_digit* t)
42492 {
42493     sp_digit* t1  = t +  0 * 32;
42494     sp_digit* pz2 = t +  2 * 32;
42495     sp_digit* rx  = t +  4 * 32;
42496     sp_digit* ry  = t +  6 * 32;
42497     sp_digit* l   = t +  8 * 32;
42498     sp_digit* ty  = t + 10 * 32;
42499 
42500     /* v = v^2 */
42501     sp_1024_proj_sqr_32(vx, vy, t);
42502     /* pz2 = p.z^2 */
42503     sp_1024_mont_sqr_32(pz2, p->z, p1024_mod, p1024_mp_mod);
42504     /* t1 = p.x + p.z^2 */
42505     sp_1024_mont_add_32(ty, p->x, pz2, p1024_mod);
42506     /* l = p.x - p.z^2 */
42507     sp_1024_mont_sub_32(l, p->x, pz2, p1024_mod);
42508     /* t1 = (p.x + p.z^2) * (p.x - p.z^2) = p.x^2 - p.z^4 */
42509     sp_1024_mont_mul_32(t1, l, ty, p1024_mod, p1024_mp_mod);
42510     /* l = 3 * (p.x^2 - p.z^4) */
42511     sp_1024_mont_tpl_32(l, t1, p1024_mod);
42512     /* t1 = q.x * p.z^2 */
42513     sp_1024_mont_mul_32(t1, q->x, pz2, p1024_mod, p1024_mp_mod);
42514     /* t1 = p.x + q.x * p.z^2 */
42515     sp_1024_mont_add_32(t1, p->x, t1, p1024_mod);
42516     /* r.x = l * (p.x + q.x * p.z^2) */
42517     sp_1024_mont_mul_32(rx, l, t1, p1024_mod, p1024_mp_mod);
42518     /* r.y = 2 * p.y */
42519     sp_1024_mont_dbl_32(ry, p->y, p1024_mod);
42520     /* ty = 4 * p.y ^ 2 */
42521     sp_1024_mont_sqr_32(ty, ry, p1024_mod, p1024_mp_mod);
42522     /* t1 = 2 * p.y ^ 2 */
42523     sp_1024_div2_32(t1, ty, p1024_mod);
42524     /* r.x -= 2 * (p.y ^ 2) */
42525     sp_1024_mont_sub_32(rx, rx, t1, p1024_mod);
42526     /* p'.z = p.y * 2 * p.z */
42527     sp_1024_mont_mul_32(p->z, p->z, ry, p1024_mod, p1024_mp_mod);
42528     /* r.y = p'.z * p.z^2 */
42529     sp_1024_mont_mul_32(t1, p->z, pz2, p1024_mod, p1024_mp_mod);
42530     /* r.y = p'.z * p.z^2 * q.y */
42531     sp_1024_mont_mul_32(ry, t1, q->y, p1024_mod, p1024_mp_mod);
42532     /* v = v^2 * r */
42533     sp_1024_proj_mul_32(vx, vy, rx, ry, t);
42534 
42535     /* Double point using previously calculated values
42536      *   l = 3 * (p.x - p.z^2).(p.x + p.z^2)
42537      *   ty = 4 * p.y^2
42538      *   p'.z = 2 * p.y * p.z
42539      */
42540     /* t1 = (4 * p.y^2) ^ 2 = 16 * p.y^4 */
42541     sp_1024_mont_sqr_32(t1, ty, p1024_mod, p1024_mp_mod);
42542     /* t1 = 16 * p.y^4 / 2 = 8 * p.y^4 */
42543     sp_1024_div2_32(t1, t1, p1024_mod);
42544     /* p'.y = 4 * p.y^2 * p.x */
42545     sp_1024_mont_mul_32(p->y, ty, p->x, p1024_mod, p1024_mp_mod);
42546     /* p'.x = l^2 */
42547     sp_1024_mont_sqr_32(p->x, l, p1024_mod, p1024_mp_mod);
42548     /* p'.x = l^2 - 4 * p.y^2 * p.x */
42549     sp_1024_mont_sub_32(p->x, p->x, p->y, p1024_mod);
42550     /* p'.x = l^2 - 8 * p.y^2 * p.x */
42551     sp_1024_mont_sub_32(p->x, p->x, p->y, p1024_mod);
42552     /* p'.y = 4 * p.y^2 * p.x - p.x' */
42553     sp_1024_mont_sub_32(ty, p->y, p->x, p1024_mod);
42554     /* p'.y = (4 * p.y^2 * p.x - p'.x) * l */
42555     sp_1024_mont_mul_32(p->y, ty, l, p1024_mod, p1024_mp_mod);
42556     /* p'.y = (4 * p.y^2 * p.x - p'.x) * l - 8 * p.y^4 */
42557     sp_1024_mont_sub_32(p->y, p->y, t1, p1024_mod);
42558 }
42559 
42560 #ifdef WOLFSSL_SP_SMALL
42561 /*
42562  * Calculate gradient of line through C, P and -C-P, accumulate line and
42563  * add P to C.
42564  *
42565  * Calculations:
42566  *   r.x = (q.x + p.x) * c.y - (q.x * c.z^2 + c.x) * p.y * c.z
42567  *   r.y = (c.x - p.x * c.z^2) * q.y * c.z
42568  *   v* = v* * r*
42569  *   r = p.y * c.z^3 - c.y
42570  *   c'.x = r^2 + h^3 - 2 * c.x * h^2
42571  *   c'.y = r * (c'.x - c.x * h^2) - c.y * h^3
42572  *   c'.z = (c.x - p.x * c.z^2) * c.z
42573  *
42574  * @param  [in,out]  vx     X-ordinate of projective value in F*.
42575  * @param  [in,out]  vy     Y-ordinate of projective value in F*.
42576  * @param  [in,out]  c      ECC point - current point on E(F_p^2) to be added
42577  *                          to.
42578  * @param  [in]      p      ECC point - point on E(F_p^2) to add.
42579  * @param  [in]      q      ECC point - second point on E(F_P^2).
42580  * @param  [in]      qx_px  SP that is a constant value across adds.
42581  * @param  [in]      t      SP temporaries (6 used).
42582  */
sp_1024_accumulate_line_add_one_32(sp_digit * vx,sp_digit * vy,sp_point_1024 * c,sp_point_1024 * p,sp_point_1024 * q,sp_digit * qx_px,sp_digit * t)42583 static void sp_1024_accumulate_line_add_one_32(sp_digit* vx, sp_digit* vy,
42584         sp_point_1024* c, sp_point_1024* p, sp_point_1024* q, sp_digit* qx_px,
42585         sp_digit* t)
42586 {
42587     sp_digit* t1  = t;
42588     sp_digit* t2  = t +  2 * 32;
42589     sp_digit* rx  = t +  4 * 32;
42590     sp_digit* ry  = t +  6 * 32;
42591     sp_digit* h   = t +  8 * 32;
42592     sp_digit* r   = t + 10 * 32;
42593 
42594     /* r.x = (q.x + p.x) * c.y */
42595     sp_1024_mont_mul_32(rx, qx_px, c->y, p1024_mod, p1024_mp_mod);
42596     /* t2 = c.z^2 */
42597     sp_1024_mont_sqr_32(t2, c->z, p1024_mod, p1024_mp_mod);
42598     /* t1 = q.x * c.z^2 */
42599     sp_1024_mont_mul_32(t1, q->x, t2, p1024_mod, p1024_mp_mod);
42600     /* t1 = q.x * c.z^2 + c.x */
42601     sp_1024_mont_add_32(h, t1, c->x, p1024_mod);
42602     /* r = p.y * c.z */
42603     sp_1024_mont_mul_32(ry, p->y, c->z, p1024_mod, p1024_mp_mod);
42604     /* t1 = (q.x * c.z^2 + c.x) * p.y * c.z */
42605     sp_1024_mont_mul_32(t1, h, ry, p1024_mod, p1024_mp_mod);
42606     /* r = p.y * c.z * c.z^2 = p.y * c.z^3  */
42607     sp_1024_mont_mul_32(r, ry, t2, p1024_mod, p1024_mp_mod);
42608     /* r.x -= (q.x * c.z^2 + c.x) * p.y * c.z */
42609     sp_1024_mont_sub_32(rx, rx, t1, p1024_mod);
42610     /* t1 = p.x * c.z^2 */
42611     sp_1024_mont_mul_32(t1, p->x, t2, p1024_mod, p1024_mp_mod);
42612     /* h = c.x - p.x * c.z^2 */
42613     sp_1024_mont_sub_32(h, c->x, t1, p1024_mod);
42614     /* c'.z = (c.x - p.x * c.z^2) * c.z */
42615     sp_1024_mont_mul_32(c->z, h, c->z, p1024_mod, p1024_mp_mod);
42616     /* r.y = (c.x - p.x * c.z^2) * c.z * q.y */
42617     sp_1024_mont_mul_32(ry, c->z, q->y, p1024_mod, p1024_mp_mod);
42618     /* v = v * r */
42619     sp_1024_proj_mul_32(vx, vy, rx, ry, t);
42620 
42621     /* Add p to c using previously calculated values.
42622      *   h = c.x - p.x * c.z^2
42623      *   r = p.y * c.z^3
42624      *   c'.z = (c.x - p.x * c.z^2) * c.z
42625      */
42626 
42627     /* r = p.y * c.z^3 - c.y */
42628     sp_1024_mont_sub_32(r, r, c->y, p1024_mod);
42629     /* t1 = r^2 */
42630     sp_1024_mont_sqr_32(t1, r, p1024_mod, p1024_mp_mod);
42631     /* t2 = h^2 */
42632     sp_1024_mont_sqr_32(rx, h, p1024_mod, p1024_mp_mod);
42633     /* ry = c.x * h^2 */
42634     sp_1024_mont_mul_32(ry, c->x, rx, p1024_mod, p1024_mp_mod);
42635     /* t2 = h^3 */
42636     sp_1024_mont_mul_32(t2, rx, h, p1024_mod, p1024_mp_mod);
42637     /* c->x = r^2 + h^3 */
42638     sp_1024_mont_add_32(c->x, t1, t2, p1024_mod);
42639     /* t1 = 2 * c.x * h^2 */
42640     sp_1024_mont_dbl_32(t1, ry, p1024_mod);
42641     /* c'.x = r^2 + h^3 - 2 * c.x * h^2 */
42642     sp_1024_mont_sub_32(c->x, c->x, t1, p1024_mod);
42643     /* ry = c'.x - c.x * h^2 */
42644     sp_1024_mont_sub_32(t1, c->x, ry, p1024_mod);
42645     /* ry = r * (c'.x - c.x * h^2) */
42646     sp_1024_mont_mul_32(ry, t1, r, p1024_mod, p1024_mp_mod);
42647     /* t2 = c.y * h^3 */
42648     sp_1024_mont_mul_32(t1, t2, c->y, p1024_mod, p1024_mp_mod);
42649     /* c'.y = r * (c'.x - c.x * h^2) - c.y * h^3 */
42650     sp_1024_mont_sub_32(c->y, ry, t1, p1024_mod);
42651 }
42652 
42653 /*
42654  * Calculate r = pairing <P, Q>.
42655  *
42656  * That is, multiply base in PF_p[q] by the scalar s, such that s.P = Q.
42657  *
42658  * @param  [in]  key  SAKKE key.
42659  * @param  [in]  p    First point on E(F_p)[q].
42660  * @param  [in]  q    Second point on E(F_p)[q].
42661  * @param  [in]  r    Result of calculation.
42662  * @return  0 on success.
42663  * @return  MEMORY_E when dynamic memory allocation fails.
42664  * @return  Other -ve value on internal failure.
42665  */
sp_Pairing_1024(const ecc_point * pm,const ecc_point * qm,mp_int * res)42666 int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res)
42667 {
42668     int err = MP_OKAY;
42669 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
42670     !defined(WOLFSSL_SP_NO_MALLOC)
42671     sp_digit* td = NULL;
42672     sp_digit* t;
42673     sp_digit* vx;
42674     sp_digit* vy;
42675     sp_digit* qx_px;
42676 #else
42677     sp_digit t[6 * 2 * 32];
42678     sp_digit vx[2 * 32];
42679     sp_digit vy[2 * 32];
42680     sp_digit qx_px[2 * 32];
42681     sp_point_1024 pd;
42682     sp_point_1024 qd;
42683     sp_point_1024 cd;
42684 #endif
42685     sp_point_1024* p = NULL;
42686     sp_point_1024* q = NULL;
42687     sp_point_1024* c = NULL;
42688     sp_digit* r = NULL;
42689     int i;
42690 
42691     err = sp_1024_point_new_32(NULL, pd, p);
42692     if (err == MP_OKAY) {
42693         err = sp_1024_point_new_32(NULL, qd, q);
42694     }
42695     if (err == MP_OKAY) {
42696         err = sp_1024_point_new_32(NULL, cd, c);
42697     }
42698 
42699 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
42700     !defined(WOLFSSL_SP_NO_MALLOC)
42701     if (err == MP_OKAY) {
42702         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 9 * 32 * 2, NULL,
42703                                 DYNAMIC_TYPE_TMP_BUFFER);
42704         if (td == NULL) {
42705             err = MEMORY_E;
42706         }
42707     }
42708 #endif
42709 
42710     if (err == MP_OKAY) {
42711 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
42712     !defined(WOLFSSL_SP_NO_MALLOC)
42713         t     = td;
42714         vx    = td + 6 * 32 * 2;
42715         vy    = td + 7 * 32 * 2;
42716         qx_px = td + 8 * 32 * 2;
42717 #endif
42718         r = vy;
42719 
42720         sp_1024_point_from_ecc_point_32(p, pm);
42721         sp_1024_point_from_ecc_point_32(q, qm);
42722 
42723         err = sp_1024_mod_mul_norm_32(p->x, p->x, p1024_mod);
42724     }
42725     if (err == MP_OKAY) {
42726         err = sp_1024_mod_mul_norm_32(p->y, p->y, p1024_mod);
42727     }
42728     if (err == MP_OKAY) {
42729         err = sp_1024_mod_mul_norm_32(p->z, p->z, p1024_mod);
42730     }
42731     if (err == MP_OKAY) {
42732         err = sp_1024_mod_mul_norm_32(q->x, q->x, p1024_mod);
42733     }
42734     if (err == MP_OKAY) {
42735         err = sp_1024_mod_mul_norm_32(q->y, q->y, p1024_mod);
42736     }
42737     if (err == MP_OKAY) {
42738         XMEMCPY(c, p, sizeof(sp_point_1024));
42739         XMEMSET(vx, 0, sizeof(sp_digit) * 2 * 32);
42740         vx[0] = 1;
42741         XMEMSET(vy, 0, sizeof(sp_digit) * 2 * 32);
42742 
42743         sp_1024_mont_add_32(qx_px, q->x, p->x, p1024_mod);
42744 
42745         for (i = 1020; i >= 0; i--) {
42746             /* Accumulate line into v and double point. */
42747             sp_1024_accumulate_line_dbl_32(vx, vy, c, q, t);
42748 
42749             if ((i > 0) && ((p1024_order[i / 32] >> (i % 32)) & 1)) {
42750                 /* Accumulate line into v and add P into C. */
42751                 sp_1024_accumulate_line_add_one_32(vx, vy, c, p, q, qx_px, t);
42752             }
42753         }
42754 
42755         /* Final exponentiation */
42756         sp_1024_proj_sqr_32(vx, vy, t);
42757         sp_1024_proj_sqr_32(vx, vy, t);
42758 
42759         /* Convert from PF_p[q] to F_p */
42760         sp_1024_mont_inv_32(vx, vx, t);
42761         sp_1024_mont_mul_32(r, vx, vy, p1024_mod, p1024_mp_mod);
42762         XMEMSET(r + 32, 0, sizeof(sp_digit) * 32);
42763         sp_1024_mont_reduce_32(r, p1024_mod, p1024_mp_mod);
42764 
42765         err = sp_1024_to_mp(r, res);
42766     }
42767 
42768 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
42769     !defined(WOLFSSL_SP_NO_MALLOC)
42770     if (td != NULL) {
42771         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
42772     }
42773 #endif
42774     sp_1024_point_free_32(c, 1, NULL);
42775     sp_1024_point_free_32(q, 1, NULL);
42776     sp_1024_point_free_32(p, 1, NULL);
42777     return err;
42778 }
42779 
42780 #else
42781 /*
42782  * Calculate gradient of line through C, P and -C-P, accumulate line and
42783  * add P to C.
42784  *
42785  * Both C and P have z ordinates to use in the calculation.
42786  *
42787  * Calculations:
42788  *   r.x  = (q.x * c.z^2 + c.x) * p.y * c.z - (q.x * p.z^2 + p.x) * c.y * p.z
42789  *   r.y  = (p.x * c.z^2 - c.x * p.z^2) * q.y * p.z * c.z
42790  *   v*   = v* * r*
42791  *   h    = p.x * c.z^2 - c.x * p.z^2
42792  *   r    = p.y * c.z^3 - c.y * p.z^3
42793  *   c'.x = r^2 - h^3 - 2 * c.x * p.z^2 * h^2
42794  *   c'.y = r * (c.x * p.z^2 * h^2 - c'.x) - c.y * p.z^3 * h^3
42795  *   c'.z = (p.x * c.z^2 - c.x * p.z^2) * c.z
42796  *
42797  * @param  [in,out]  vx     X-ordinate of projective value in F*.
42798  * @param  [in,out]  vy     Y-ordinate of projective value in F*.
42799  * @param  [in,out]  c      ECC point - current point on E(F_p^2) to be added
42800  *                          to.
42801  * @param  [in,out]  p      ECC point - point on E(F_p^2) to add.
42802  * @param  [in,out]  q      ECC point - second point on E(F_P^2).
42803  * @param  [in,out]  t      SP temporaries (6 used).
42804  * @param  [in,out]  neg    Indicates to use negative P.
42805  * @return  0 on success.
42806  * @return  MEMORY_E when dynamic memory allocation fails.
42807  * @return  Other -ve value on internal failure.
42808  */
sp_1024_accumulate_line_add_n_32(sp_digit * vx,sp_digit * vy,const sp_point_1024 * p,const sp_point_1024 * q,sp_point_1024 * c,sp_digit * t,int neg)42809 static void sp_1024_accumulate_line_add_n_32(sp_digit* vx, sp_digit* vy,
42810         const sp_point_1024* p, const sp_point_1024* q,
42811         sp_point_1024* c, sp_digit* t, int neg)
42812 {
42813     sp_digit* t1 = t;
42814     sp_digit* t2 = t +  2 * 32;
42815     sp_digit* rx = t +  4 * 32;
42816     sp_digit* ry = t +  6 * 32;
42817     sp_digit* h  = t +  8 * 32;
42818     sp_digit* r  = t + 10 * 32;
42819 
42820     /* h = p.z^2 */
42821     sp_1024_mont_sqr_32(h, p->z, p1024_mod, p1024_mp_mod);
42822     /* rx = q.x * p.z^2 */
42823     sp_1024_mont_mul_32(rx, q->x, h, p1024_mod, p1024_mp_mod);
42824     /* rx = q.x * p.z^2 + p.x */
42825     sp_1024_mont_add_32(t2, rx, p->x, p1024_mod);
42826     /* c.y = c.y * p.z */
42827     sp_1024_mont_mul_32(t1, c->y, p->z, p1024_mod, p1024_mp_mod);
42828     /* r.x = (q.x * p.z^2 + p.x) * c.y * p.z */
42829     sp_1024_mont_mul_32(rx, t2, t1, p1024_mod, p1024_mp_mod);
42830     /* c.y = c.y * p.z^3 */
42831     sp_1024_mont_mul_32(c->y, t1, h, p1024_mod, p1024_mp_mod);
42832     /* t2 = c.z^2 */
42833     sp_1024_mont_sqr_32(t2, c->z, p1024_mod, p1024_mp_mod);
42834     /* t1 = q.x * c.z^2 */
42835     sp_1024_mont_mul_32(t1, q->x, t2, p1024_mod, p1024_mp_mod);
42836     /* t1 = q.x * c.z^2 + c.x */
42837     sp_1024_mont_add_32(t1, t1, c->x, p1024_mod);
42838     /* c.x = c.x * p.z^2 */
42839     sp_1024_mont_mul_32(c->x, c->x, h, p1024_mod, p1024_mp_mod);
42840     /* r = p.y * c.z */
42841     sp_1024_mont_mul_32(r, p->y, c->z, p1024_mod, p1024_mp_mod);
42842     if (neg) {
42843         /* r = -p.y * c.z */
42844         sp_1024_mont_sub_32(r, p1024_mod, r, p1024_mod);
42845     }
42846     /* t1 = (q.x * c.z^2 + c.x) * p.y * c.z */
42847     sp_1024_mont_mul_32(ry, t1, r, p1024_mod, p1024_mp_mod);
42848     /* r.x -= (q.x * c.z^2 + c.x) * p.y * c.z */
42849     sp_1024_mont_sub_32(rx, ry, rx, p1024_mod);
42850     /* t1 = p.x * c.z^2 */
42851     sp_1024_mont_mul_32(t1, p->x, t2, p1024_mod, p1024_mp_mod);
42852     /* h = p.x * c.z^2 - c.x * p.z^2 */
42853     sp_1024_mont_sub_32(h, t1, c->x, p1024_mod);
42854     /* c'.z = (p.x * c.z^2 - c.x * p.z^2) * c.z */
42855     sp_1024_mont_mul_32(t1, h, c->z, p1024_mod, p1024_mp_mod);
42856     /* c'.z = (p.x * c.z^2 - c.x * p.z^2) * c.z * p.z */
42857     sp_1024_mont_mul_32(c->z, t1, p->z, p1024_mod, p1024_mp_mod);
42858     /* r.y = (p.x * c.z^2 - c.x * p.z^2) * c.z * p.z * q.y */
42859     sp_1024_mont_mul_32(ry, c->z, q->y, p1024_mod, p1024_mp_mod);
42860     /* r = p.y * c.z^3 */
42861     sp_1024_mont_mul_32(t1, r, t2, p1024_mod, p1024_mp_mod);
42862     /* r = p.y * c.z^3 - c.y * p.z^3 */
42863     sp_1024_mont_sub_32(r, t1, c->y, p1024_mod);
42864     /* v = v * r */
42865     sp_1024_proj_mul_32(vx, vy, rx, ry, t);
42866 
42867     /* Add p to c using previously calculated values.
42868      *   h = p.x * c.z^2 - c.x * p.z^2
42869      *   r = p.y * c.z^3 - c.y * p.z^3
42870      *   c'.z = (p.x * c.z^2 - c.x * p.z^2) * c.z
42871      */
42872 
42873     /* t1 = r^2 */
42874     sp_1024_mont_sqr_32(t1, r, p1024_mod, p1024_mp_mod);
42875     /* t2 = h^2 */
42876     sp_1024_mont_sqr_32(rx, h, p1024_mod, p1024_mp_mod);
42877     /* ry = c.x * p.z^2 * h^2 */
42878     sp_1024_mont_mul_32(ry, rx, c->x, p1024_mod, p1024_mp_mod);
42879     /* t2 = h^3 */
42880     sp_1024_mont_mul_32(t2, rx, h, p1024_mod, p1024_mp_mod);
42881     /* c'.x = r^2 - h^3 */
42882     sp_1024_mont_sub_32(c->x, t1, t2, p1024_mod);
42883     /* t1 = 2 * c.x * p.z^2 * h^2 */
42884     sp_1024_mont_dbl_32(t1, ry, p1024_mod);
42885     /* c'.x = r^2 - h^3 - 2 * c.x * p.z^2 * h^2 */
42886     sp_1024_mont_sub_32(c->x, c->x, t1, p1024_mod);
42887     /* ry = c.x * p.z^2 * h^2 - c'.x */
42888     sp_1024_mont_sub_32(t1, ry, c->x, p1024_mod);
42889     /* ry = r * (c.x * p.z^2 * h^2 - c'.x) */
42890     sp_1024_mont_mul_32(ry, t1, r, p1024_mod, p1024_mp_mod);
42891     /* t2 = c.y * p.z^3 * h^3 */
42892     sp_1024_mont_mul_32(t1, t2, c->y, p1024_mod, p1024_mp_mod);
42893     /* c'.y = r * (c.x * p.z^2 * h^2 - c'.x) - c.y * p.z^3 * h^3 */
42894     sp_1024_mont_sub_32(c->y, ry, t1, p1024_mod);
42895 }
42896 
42897 /*
42898  * Perform n accumulate doubles and doubles of P.
42899  *
42900  * py = 2 * p.y
42901  *
42902  * For each double:
42903  * Calculate gradient of line through P, P and [-2]P, accumulate line and
42904  * double P.
42905  *
42906  * Calculations:
42907  *   l = 3 * (p.x^2 - p.z^4) = 3 * (p.x - p.z^2) * (p.x + p.z^2)
42908  *   r.x = l * (p.x + q.x * p.z^2) - py^2 / 2
42909  *   r.y = py * p.z^3 * q.y (= p'.z * p.z^2 * q.y)
42910  *   v* = v*^2 * r*
42911  *   p'.x = l^2 - 2 * py^2 * p.x
42912  *   py' = (py^2 * p.x - p'.x) * l - py^4 (= 2 * p'.y)
42913  *   p'.z = py * p.z
42914  *
42915  * Finally:
42916  *   p'.y = py' / 2
42917  *
42918  * @param  [in,out]  vx  X-ordinate of projective value in F*.
42919  * @param  [in,out]  vy  Y-ordinate of projective value in F*.
42920  * @param  [in,out]  p   ECC point - point on E(F_p^2) to double.
42921  * @param  [in]      q   ECC point - second point on E(F_P^2).
42922  * @param  [in]      n   Number of times to double.
42923  * @param  [in]      t   SP temporaries (6 used).
42924  */
sp_1024_accumulate_line_dbl_n_32(sp_digit * vx,sp_digit * vy,sp_point_1024 * p,const sp_point_1024 * q,int n,sp_digit * t)42925 static void sp_1024_accumulate_line_dbl_n_32(sp_digit* vx, sp_digit* vy,
42926         sp_point_1024* p, const sp_point_1024* q, int n, sp_digit* t)
42927 {
42928     sp_digit* t1  = t +  0 * 32;
42929     sp_digit* pz2 = t +  2 * 32;
42930     sp_digit* rx  = t +  4 * 32;
42931     sp_digit* ry  = t +  6 * 32;
42932     sp_digit* l   = t +  8 * 32;
42933     sp_digit* ty  = t + 10 * 32;
42934     int i;
42935 
42936     /* py = 2 * p.y */
42937     sp_1024_mont_dbl_32(p->y, p->y, p1024_mod);
42938 
42939     for (i = 0; i < n; i++) {
42940         /* v = v^2 */
42941         sp_1024_proj_sqr_32(vx, vy, t);
42942         /* pz2 = p.z^2 */
42943         sp_1024_mont_sqr_32(pz2, p->z, p1024_mod, p1024_mp_mod);
42944         /* t1 = p.x + p.z^2 */
42945         sp_1024_mont_add_32(t1, p->x, pz2, p1024_mod);
42946         /* l = p.x - p.z^2 */
42947         sp_1024_mont_sub_32(l, p->x, pz2, p1024_mod);
42948         /* t1 = (p.x + p.z^2) * (p.x - p.z^2) = p.x^2 - p.z^4 */
42949         sp_1024_mont_mul_32(ty, l, t1, p1024_mod, p1024_mp_mod);
42950         /* l = 3 * (p.x^2 - p.z^4) */
42951         sp_1024_mont_tpl_32(l, ty, p1024_mod);
42952         /* t1 = q.x * p.z^2 */
42953         sp_1024_mont_mul_32(t1, q->x, pz2, p1024_mod, p1024_mp_mod);
42954         /* t1 = p.x + q.x * p.z^2 */
42955         sp_1024_mont_add_32(t1, p->x, t1, p1024_mod);
42956         /* r.x = l * (p.x + q.x * p.z^2) */
42957         sp_1024_mont_mul_32(rx, l, t1, p1024_mod, p1024_mp_mod);
42958         /* ty = py ^ 2 */
42959         sp_1024_mont_sqr_32(ty, p->y, p1024_mod, p1024_mp_mod);
42960         /* t1 = py ^ 2 / 2 */
42961         sp_1024_div2_32(t1, ty, p1024_mod);
42962         /* r.x -= py ^ 2 / 2 */
42963         sp_1024_mont_sub_32(rx, rx, t1, p1024_mod);
42964         /* p'.z = py * pz */
42965         sp_1024_mont_mul_32(p->z, p->z, p->y, p1024_mod, p1024_mp_mod);
42966         /* r.y = p'.z * p.z^2 */
42967         sp_1024_mont_mul_32(t1, p->z, pz2, p1024_mod, p1024_mp_mod);
42968         /* r.y = p'.z * p.z^2 * q.y */
42969         sp_1024_mont_mul_32(ry, t1, q->y, p1024_mod, p1024_mp_mod);
42970         /* v = v^2 * r */
42971         sp_1024_proj_mul_32(vx, vy, rx, ry, t);
42972 
42973         /* Double point using previously calculated values
42974          *   l = 3 * (p.x - p.z^2).(p.x + p.z^2)
42975          *   ty = py^2
42976          *   p'.z = py * p.z
42977          */
42978         /* t1 = py^2 ^ 2 = py^4 */
42979         sp_1024_mont_sqr_32(t1, ty, p1024_mod, p1024_mp_mod);
42980         /* py' = py^2 * p. x */
42981         sp_1024_mont_mul_32(p->y, ty, p->x, p1024_mod, p1024_mp_mod);
42982         /* p'.x = l^2 */
42983         sp_1024_mont_sqr_32(p->x, l, p1024_mod, p1024_mp_mod);
42984         /* p'.x = l^2 - py^2 * p.x */
42985         sp_1024_mont_sub_32(p->x, p->x, p->y, p1024_mod);
42986         /* p'.x = l^2 - 2 * p.y^2 * p.x */
42987         sp_1024_mont_sub_32(p->x, p->x, p->y, p1024_mod);
42988         /* py' = py^2 * p.x - p.x' */
42989         sp_1024_mont_sub_32(ty, p->y, p->x, p1024_mod);
42990         /* py' = (p.y^2 * p.x - p'.x) * l */
42991         sp_1024_mont_mul_32(p->y, ty, l, p1024_mod, p1024_mp_mod);
42992         /* py' = (p.y^2 * p.x - p'.x) * l * 2 */
42993         sp_1024_mont_dbl_32(p->y, p->y, p1024_mod);
42994         /* py' = (p.y^2 * p.x - p'.x) * l * 2 - p.y^4 */
42995         sp_1024_mont_sub_32(p->y, p->y, t1, p1024_mod);
42996     }
42997 
42998     /* p'.y = py' / 2 */
42999     sp_1024_div2_32(p->y, p->y, p1024_mod);
43000 }
43001 
43002 /* Operations to perform based on order - 1.
43003  * Sliding window. Start at bottom and stop when bottom bit is one.
43004  * Subtract if top bit in window is one.
43005  * Width of 6 bits.
43006  * Pairs: #dbls, add/subtract window value
43007  */
43008 static const signed char sp_1024_order_op[] = {
43009    5,   6, -13,   9, -21,   6,  -5,   8,  31,   6,   3,   6, -27,   6,  25,   9,
43010   -1,   6, -11,   6, -13,   6,  -7,   6, -15,   6, -29,   7,  25,   6,  -9,   6,
43011  -19,   7,   3,   6,  11,   9, -23,   6,   1,   6,  27,   6,   1,   7, -25,   8,
43012   13,   7, -13,   7, -23,  10,  19,   7,   7,   7,  -3,   7,  27,   6,  -7,   7,
43013  -21,   7,  11,   7,  31,   8,   1,   7, -23,   6, -17,   6,  -3,  10,  11,   6,
43014  -21,   7, -27,  11, -29,   6,  -1,  10,  15,   8,  27,   7,  17,   6,  17,   7,
43015  -13,   8,  13,   6,  21,   7, -29,   6,  19,   7, -25,   6,  11,   9,  29,   7,
43016   -7,   8,  27,   7,  29,  10,  -1,   8,  -7,   8,  17,   6,  17,   7, -27,   7,
43017  -21,   6,  -9,   6, -27,  12, -23,   6,  19,   6,  13,   6, -11,   7,  27,   6,
43018   17,   6,  -7,   6, -25,   7, -29,   6,   9,   7,   7,   6,  13,   6, -25,   6,
43019  -19,   6,  13,   6, -11,   6,   5,   8,  19,   6, -21,   8,  23,   7,  27,   6,
43020  -13,   6, -19,  11,  29,   7, -15,   6,  -9,   7, -21,  10,  -3,   7,  21,  10,
43021   25,   6, -15,   6, -23,   6,  21,   6,   1,   6,  21,   7,  -3,   6,  -3,   7,
43022   -7,   6, -23,   7,   7,   8,  15,   9,   5,   6, -11,   6,  21,  11, -27,   7,
43023   27,   6, -11,   6,  31,   6, -21,   6,  19,   6,  -7,   8,  -7,  13,  -3,   6,
43024   -7,   7,  -3,   6,   1,   6,   7,   8,  19,   8,  11,   9,  -9,   7, -31,  12,
43025   25,   6, -17,   9, -15,   7,   5,   6,  25,   7,  -5,   7, -25,   6,  17,   8,
43026  -19,   6, -13,   6,  27,   8,   1,   7,  -5,   7,  -1,   6,  21,   6,   3,  10,
43027   -3,   1,
43028 };
43029 /*
43030  * Calculate r = pairing <P, Q>.
43031  *
43032  * That is, multiply base in PF_p[q] by the scalar s, such that s.P = Q.
43033  *
43034  * Sliding window. Start at bottom and stop when bottom bit is one.
43035  * Subtract if top bit in window is one.
43036  * Width of 6 bits.
43037  *
43038  * @param  [in]  pm   First point on E(F_p)[q].
43039  * @param  [in]  qm   Second point on E(F_p)[q].
43040  * @param  [in]  res  Result of calculation.
43041  * @return  0 on success.
43042  * @return  MEMORY_E when dynamic memory allocation fails.
43043  */
sp_Pairing_1024(const ecc_point * pm,const ecc_point * qm,mp_int * res)43044 int sp_Pairing_1024(const ecc_point* pm, const ecc_point* qm, mp_int* res)
43045 {
43046     int err;
43047 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43048     !defined(WOLFSSL_SP_NO_MALLOC)
43049     sp_digit* td = NULL;
43050     sp_digit* t;
43051     sp_digit* vx;
43052     sp_digit* vy;
43053     sp_digit (*pre_vx)[64];
43054     sp_digit (*pre_vy)[64];
43055     sp_digit (*pre_nvy)[64];
43056     sp_point_1024* pre_p;
43057 #else
43058     sp_digit t[6 * 2 * 32];
43059     sp_digit vx[2 * 32];
43060     sp_digit vy[2 * 32];
43061     sp_digit pre_vx[16][64];
43062     sp_digit pre_vy[16][64];
43063     sp_digit pre_nvy[16][64];
43064     sp_point_1024 pre_p[16];
43065     sp_point_1024 pd;
43066     sp_point_1024 qd;
43067     sp_point_1024 cd;
43068 #endif
43069     sp_point_1024* p = NULL;
43070     sp_point_1024* q = NULL;
43071     sp_point_1024* c = NULL;
43072     sp_digit* r = NULL;
43073     int i;
43074     int j;
43075 
43076     err = sp_1024_point_new_32(NULL, pd, p);
43077     if (err == MP_OKAY) {
43078         err = sp_1024_point_new_32(NULL, qd, q);
43079     }
43080     if (err == MP_OKAY) {
43081         err = sp_1024_point_new_32(NULL, cd, c);
43082     }
43083 
43084 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43085     !defined(WOLFSSL_SP_NO_MALLOC)
43086     if (err == MP_OKAY) {
43087         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 32 * 2 + 16 * sizeof(sp_point_1024), NULL,
43088                                 DYNAMIC_TYPE_TMP_BUFFER);
43089         if (td == NULL) {
43090             err = MEMORY_E;
43091         }
43092     }
43093 #endif
43094 
43095     if (err == MP_OKAY) {
43096 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43097     !defined(WOLFSSL_SP_NO_MALLOC)
43098         t       = td;
43099         vx      = td + 6 * 32 * 2;
43100         vy      = td + 7 * 32 * 2;
43101         pre_vx  = (sp_digit(*)[64])(td + 8 * 32 * 2);
43102         pre_vy  = (sp_digit(*)[64])(td + 24 * 32 * 2);
43103         pre_nvy = (sp_digit(*)[64])(td + 40 * 32 * 2);
43104         pre_p   = (sp_point_1024*)(td + 56 * 32 * 2);
43105 #endif
43106         r = vy;
43107 
43108         sp_1024_point_from_ecc_point_32(p, pm);
43109         sp_1024_point_from_ecc_point_32(q, qm);
43110 
43111         err = sp_1024_mod_mul_norm_32(p->x, p->x, p1024_mod);
43112     }
43113     if (err == MP_OKAY) {
43114         err = sp_1024_mod_mul_norm_32(p->y, p->y, p1024_mod);
43115     }
43116     if (err == MP_OKAY) {
43117         err = sp_1024_mod_mul_norm_32(p->z, p->z, p1024_mod);
43118     }
43119     if (err == MP_OKAY) {
43120         err = sp_1024_mod_mul_norm_32(q->x, q->x, p1024_mod);
43121     }
43122     if (err == MP_OKAY) {
43123         err = sp_1024_mod_mul_norm_32(q->y, q->y, p1024_mod);
43124     }
43125     if (err == MP_OKAY) {
43126         /* Generate pre-computation table: 1, 3, ... , 31 */
43127         XMEMCPY(&pre_p[0], p, sizeof(sp_point_1024));
43128         XMEMSET(pre_vx[0], 0, sizeof(sp_digit) * 2 * 32);
43129         pre_vx[0][0] = 1;
43130         XMEMSET(pre_vy[0], 0, sizeof(sp_digit) * 2 * 32);
43131         sp_1024_mont_sub_32(pre_nvy[0], p1024_mod, pre_vy[0], p1024_mod);
43132 
43133         /* [2]P for adding */
43134         XMEMCPY(c, p, sizeof(sp_point_1024));
43135         XMEMSET(vx, 0, sizeof(sp_digit) * 2 * 32);
43136         vx[0] = 1;
43137         XMEMSET(vy, 0, sizeof(sp_digit) * 2 * 32);
43138         sp_1024_accumulate_line_dbl_32(vx, vy, c, q, t);
43139 
43140         /* 3, 5, ... */
43141         for (i = 1; i < 16; i++) {
43142             XMEMCPY(&pre_p[i], &pre_p[i-1], sizeof(sp_point_1024));
43143             XMEMCPY(pre_vx[i], pre_vx[i-1], sizeof(sp_digit) * 2 * 32);
43144             XMEMCPY(pre_vy[i], pre_vy[i-1], sizeof(sp_digit) * 2 * 32);
43145             sp_1024_proj_mul_32(pre_vx[i], pre_vy[i], vx, vy, t);
43146             sp_1024_accumulate_line_add_n_32(pre_vx[i], pre_vy[i], c,
43147                     q, &pre_p[i], t, 0);
43148             sp_1024_mont_sub_32(pre_nvy[i], p1024_mod, pre_vy[i], p1024_mod);
43149         }
43150 
43151         j = sp_1024_order_op[0] / 2;
43152         XMEMCPY(c, &pre_p[j], sizeof(sp_point_1024));
43153         XMEMCPY(vx, pre_vx[j], sizeof(sp_digit) * 2 * 32);
43154         XMEMCPY(vy, pre_vy[j], sizeof(sp_digit) * 2 * 32);
43155 
43156         /* Accumulate line into v and double point n times. */
43157         sp_1024_accumulate_line_dbl_n_32(vx, vy, c, q,
43158                 sp_1024_order_op[1], t);
43159 
43160         for (i = 2; i < 290; i += 2) {
43161             j = sp_1024_order_op[i];
43162             if (j > 0) {
43163                 j /= 2;
43164                 /* Accumulate line into v and add P into C. */
43165                 sp_1024_proj_mul_32(vx, vy, pre_vx[j], pre_vy[j], t);
43166                 sp_1024_accumulate_line_add_n_32(vx, vy, &pre_p[j], q, c,
43167                     t, 0);
43168             }
43169             else {
43170                 j = -j / 2;
43171                 /* Accumulate line into v and add P into C. */
43172                 sp_1024_proj_mul_32(vx, vy, pre_vx[j], pre_nvy[j], t);
43173                 sp_1024_accumulate_line_add_n_32(vx, vy, &pre_p[j], q, c,
43174                     t, 1);
43175             }
43176 
43177             /* Accumulate line into v and double point n times. */
43178             sp_1024_accumulate_line_dbl_n_32(vx, vy, c, q,
43179                     sp_1024_order_op[i + 1], t);
43180         }
43181 
43182         /* Final exponentiation */
43183         sp_1024_proj_sqr_32(vx, vy, t);
43184         sp_1024_proj_sqr_32(vx, vy, t);
43185 
43186         /* Convert from PF_p[q] to F_p */
43187         sp_1024_mont_inv_32(vx, vx, t);
43188         sp_1024_mont_mul_32(r, vx, vy, p1024_mod, p1024_mp_mod);
43189         XMEMSET(r + 32, 0, sizeof(sp_digit) * 32);
43190         sp_1024_mont_reduce_32(r, p1024_mod, p1024_mp_mod);
43191 
43192         err = sp_1024_to_mp(r, res);
43193     }
43194 
43195 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43196     !defined(WOLFSSL_SP_NO_MALLOC)
43197     if (td != NULL) {
43198         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
43199     }
43200 #endif
43201     sp_1024_point_free_32(c, 1, NULL);
43202     sp_1024_point_free_32(q, 1, NULL);
43203     sp_1024_point_free_32(p, 1, NULL);
43204     return err;
43205 }
43206 
43207 #endif /* WOLFSSL_SP_SMALL */
43208 #ifdef WOLFSSL_SP_SMALL
43209 /*
43210  * Generate table for pairing.
43211  *
43212  * Small implementation does not use a table - returns 0 length.
43213  *
43214  * pm     [in]      Point to generate table for.
43215  * table  [in]      Generated table.
43216  * len    [in,out]  On in, the size of the buffer.
43217  *                  On out, length of table generated.
43218  * @return  0 on success.
43219  *          LENGTH_ONLY_E when table is NULL and only length returned.
43220  *          BUFFER_E when len is too small.
43221  */
sp_Pairing_gen_precomp_1024(const ecc_point * pm,byte * table,word32 * len)43222 int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table,
43223         word32* len)
43224 {
43225     int err = 0;
43226 
43227     if (table == NULL) {
43228         *len = 0;
43229         err = LENGTH_ONLY_E;
43230     }
43231     else if (*len != 0) {
43232         err = BUFFER_E;
43233     }
43234 
43235     (void)*pm;
43236 
43237     return err;
43238 }
43239 
43240 /*
43241  * Calculate r = pairing <P, Q>.
43242  *
43243  * That is, multiply base in PF_p[q] by the scalar s, such that s.P = Q.
43244  *
43245  * Small implementation does not use a table - use the normal implementation.
43246  *
43247  * @param  [in]  pm     First point on E(F_p)[q].
43248  * @param  [in]  qm     Second point on E(F_p)[q].
43249  * @param  [in]  res    Result of calculation.
43250  * @param  [in]  table  Precomputed table of values.
43251  * @param  [in]  len    Length of precomputed table of values in bytes.
43252  * @return  0 on success.
43253  * @return  MEMORY_E when dynamic memory allocation fails.
43254  */
sp_Pairing_precomp_1024(const ecc_point * pm,const ecc_point * qm,mp_int * res,const byte * table,word32 len)43255 int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm,
43256     mp_int* res, const byte* table, word32 len)
43257 {
43258     (void)table;
43259     (void)len;
43260     return sp_Pairing_1024(pm, qm, res);
43261 }
43262 
43263 #else
43264 /*
43265  * Calc l and c for the point when doubling p.
43266  *
43267  * l = 3 * (p.x^2 - 1) / (2 * p.y)
43268  * c = l * p.x - p.y
43269  *
43270  * @param  [out]  lr  Gradient result - table entry.
43271  * @param  [out]  cr  Constant result - table entry.
43272  * @param  [in]   px  X-ordinate of point to double.
43273  * @param  [in]   py  Y-ordinate of point to double.
43274  * @param  [in]   t   SP temporaries (3 used).
43275  */
sp_1024_accum_dbl_calc_lc_32(sp_digit * lr,sp_digit * cr,const sp_digit * px,const sp_digit * py,sp_digit * t)43276 static void sp_1024_accum_dbl_calc_lc_32(sp_digit* lr, sp_digit* cr,
43277         const sp_digit* px, const sp_digit* py, sp_digit* t)
43278 {
43279     sp_digit* t1 = t + 0 * 2 * 32;
43280     sp_digit* t2 = t + 2 * 2 * 32;
43281     sp_digit* l  = t + 4 * 2 * 32;
43282 
43283 
43284     /* l = 1 / 2 * p.y */
43285     sp_1024_mont_dbl_32(l, py, p1024_mod);
43286     sp_1024_mont_inv_32(l, l, t);
43287 
43288     /* t1 = p.x^2 */
43289     sp_1024_mont_sqr_32(t1, px, p1024_mod, p1024_mp_mod);
43290     /* t1 = p.x - 1 */
43291     sp_1024_mont_sub_32(t1, t1, p1024_norm_mod, p1024_mod);
43292     /* t1 = 3 * (p.x^2 - 1) */
43293     sp_1024_mont_dbl_32(t2, t1, p1024_mod);
43294     sp_1024_mont_add_32(t1, t1, t2, p1024_mod);
43295     /* t1 = 3 * (p.x^2 - 1) / (2 * p.y) */
43296     sp_1024_mont_mul_32(l, l, t1, p1024_mod, p1024_mp_mod);
43297     /* t2 = l * p.x */
43298     sp_1024_mont_mul_32(t2, l, px, p1024_mod, p1024_mp_mod);
43299     /* c = t2 = l * p.x - p.y */
43300     sp_1024_mont_sub_32(t2, t2, py, p1024_mod);
43301 
43302     XMEMCPY(lr, l, sizeof(sp_digit) * 32);
43303     XMEMCPY(cr, t2, sizeof(sp_digit) * 32);
43304 }
43305 
43306 /*
43307  * Calc l and c when adding p and c.
43308  *
43309  * l = (c.y - p.y) / (c.x - p.x)
43310  * c = (p.x * c.y - cx * p.y) / (cx - p.x)
43311  *
43312  * @param  [out]  lr  Gradient result - table entry.
43313  * @param  [out]  cr  Constant result - table entry.
43314  * @param  [in]   px  X-ordinate of point to add.
43315  * @param  [in]   py  Y-ordinate of point to add.
43316  * @param  [in]   cx  X-ordinate of current point.
43317  * @param  [in]   cy  Y-ordinate of current point.
43318  * @param  [in]   t   SP temporaries (3 used).
43319  */
sp_1024_accum_add_calc_lc_32(sp_digit * lr,sp_digit * cr,const sp_digit * px,const sp_digit * py,const sp_digit * cx,const sp_digit * cy,sp_digit * t)43320 static void sp_1024_accum_add_calc_lc_32(sp_digit* lr, sp_digit* cr,
43321         const sp_digit* px, const sp_digit* py, const sp_digit* cx,
43322         const sp_digit* cy, sp_digit* t)
43323 {
43324     sp_digit* t1 = t + 0 * 2 * 32;
43325     sp_digit* c  = t + 2 * 2 * 32;
43326     sp_digit* l  = t + 4 * 2 * 32;
43327 
43328 
43329     /* l = 1 / (c.x - p.x) */
43330     sp_1024_mont_sub_32(l, cx, px, p1024_mod);
43331     sp_1024_mont_inv_32(l, l, t);
43332 
43333     /* c = p.x * c.y */
43334     sp_1024_mont_mul_32(c, px, cy, p1024_mod, p1024_mp_mod);
43335     /* t1 = c.x * p.y */
43336     sp_1024_mont_mul_32(t1, cx, py, p1024_mod, p1024_mp_mod);
43337     /* c = (p.x * c.y) - (c.x * p.y) */
43338     sp_1024_mont_sub_32(c, c, t1, p1024_mod);
43339     /* c = ((p.x * c.y) - (c.x * p.y)) / (c.x - p.x) */
43340     sp_1024_mont_mul_32(c, c, l, p1024_mod, p1024_mp_mod);
43341     /* t1 = c.y - p.y */
43342     sp_1024_mont_sub_32(t1, cy, py, p1024_mod);
43343     /* l = (c.y - p.y) / (c.x - p.x) */
43344     sp_1024_mont_mul_32(l, t1, l, p1024_mod, p1024_mp_mod);
43345 
43346     XMEMCPY(lr, l, sizeof(sp_digit) * 32);
43347     XMEMCPY(cr, c, sizeof(sp_digit) * 32);
43348 }
43349 
43350 /*
43351  * Calculate vx and vy given gradient l and constant c and point q.
43352  *
43353  * l is a the gradient and is multiplied by q->x.
43354  * c is a the constant that is added to the multiplicative result.
43355  * q->y is the y-ordinate in result to multiply.
43356  *
43357  * if dbl
43358  *   v*  = v*^2
43359  * r.x = l * q.x + c
43360  * r.y = q->y
43361  * v*  = v* * r*
43362  *
43363  * @param  [in,out]  vx     X-ordinate of projective value in F*.
43364  * @param  [in,out]  vy     Y-ordinate of projective value in F*.
43365  * @param  [in]      l      Gradient to multiply with.
43366  * @param  [in]      c      Constant to add with.
43367  * @param  [in]      q      ECC point - second point on E(F_P^2).
43368  * @param  [in]      t      SP temporaries (3 used).
43369  * @param  [in]      dbl    Indicates whether this is for doubling. Otherwise
43370  *                          adding.
43371  */
sp_1024_accumulate_line_lc_32(sp_digit * vx,sp_digit * vy,const sp_digit * l,const sp_digit * c,const sp_point_1024 * q,sp_digit * t,int dbl)43372 static void sp_1024_accumulate_line_lc_32(sp_digit* vx, sp_digit* vy,
43373         const sp_digit* l, const sp_digit* c, const sp_point_1024* q,
43374         sp_digit* t, int dbl)
43375 {
43376     sp_digit* rx = t + 4 * 2 * 32;
43377 
43378     /* v = v^2 */
43379     if (dbl) {
43380         sp_1024_proj_sqr_32(vx, vy, t);
43381     }
43382     /* rx = l * q.x + c */
43383     sp_1024_mont_mul_32(rx, l, q->x, p1024_mod, p1024_mp_mod);
43384     sp_1024_mont_add_32(rx, rx, c, p1024_mod);
43385     /* v = v^2 * r */
43386     sp_1024_proj_mul_32(vx, vy, rx, q->y, t);
43387 }
43388 
43389 /* Operations to perform based on order - 1.
43390  * Sliding window. Start at bottom and stop when bottom bit is one.
43391  * Subtract if top bit in window is one.
43392  * Width of 6 bits.
43393  * Pairs: #dbls, add/subtract window value
43394  */
43395 static const signed char sp_1024_order_op_pre[] = {
43396    5,   6, -13,   9, -21,   6,  -5,   8,  31,   6,   3,   6, -27,   6,  25,   9,
43397   -1,   6, -11,   6, -13,   6,  -7,   6, -15,   6, -29,   7,  25,   6,  -9,   6,
43398  -19,   7,   3,   6,  11,   9, -23,   6,   1,   6,  27,   6,   1,   7, -25,   8,
43399   13,   7, -13,   7, -23,  10,  19,   7,   7,   7,  -3,   7,  27,   6,  -7,   7,
43400  -21,   7,  11,   7,  31,   8,   1,   7, -23,   6, -17,   6,  -3,  10,  11,   6,
43401  -21,   7, -27,  11, -29,   6,  -1,  10,  15,   8,  27,   7,  17,   6,  17,   7,
43402  -13,   8,  13,   6,  21,   7, -29,   6,  19,   7, -25,   6,  11,   9,  29,   7,
43403   -7,   8,  27,   7,  29,  10,  -1,   8,  -7,   8,  17,   6,  17,   7, -27,   7,
43404  -21,   6,  -9,   6, -27,  12, -23,   6,  19,   6,  13,   6, -11,   7,  27,   6,
43405   17,   6,  -7,   6, -25,   7, -29,   6,   9,   7,   7,   6,  13,   6, -25,   6,
43406  -19,   6,  13,   6, -11,   6,   5,   8,  19,   6, -21,   8,  23,   7,  27,   6,
43407  -13,   6, -19,  11,  29,   7, -15,   6,  -9,   7, -21,  10,  -3,   7,  21,  10,
43408   25,   6, -15,   6, -23,   6,  21,   6,   1,   6,  21,   7,  -3,   6,  -3,   7,
43409   -7,   6, -23,   7,   7,   8,  15,   9,   5,   6, -11,   6,  21,  11, -27,   7,
43410   27,   6, -11,   6,  31,   6, -21,   6,  19,   6,  -7,   8,  -7,  13,  -3,   6,
43411   -7,   7,  -3,   6,   1,   6,   7,   8,  19,   8,  11,   9,  -9,   7, -31,  12,
43412   25,   6, -17,   9, -15,   7,   5,   6,  25,   7,  -5,   7, -25,   6,  17,   8,
43413  -19,   6, -13,   6,  27,   8,   1,   7,  -5,   7,  -1,   6,  21,   6,   3,  10,
43414   -3,   1,
43415 };
43416 
43417 /*
43418  * Generate table for pairing.
43419  *
43420  * Calculate the graident (l) and constant (c) at each step of the way.
43421  * Sliding window. Start at bottom and stop when bottom bit is one.
43422  * Subtract if top bit in window is one.
43423  * Width of 6 bits.
43424  *
43425  * pm     [in]      Point to generate table for.
43426  * table  [in]      Generated table.
43427  * len    [in,out]  On in, the size of the buffer.
43428  *                  On out, length of table generated.
43429  * @return  0 on success.
43430  *          LENGTH_ONLY_E when table is NULL and only length returned.
43431  *          BUFFER_E when len is too small.
43432  *          MEMORY_E when dynamic memory allocation fauls.
43433  */
sp_Pairing_gen_precomp_1024(const ecc_point * pm,byte * table,word32 * len)43434 int sp_Pairing_gen_precomp_1024(const ecc_point* pm, byte* table,
43435         word32* len)
43436 {
43437     int err = 0;
43438 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43439     !defined(WOLFSSL_SP_NO_MALLOC)
43440     sp_digit* td = NULL;
43441     sp_digit* t;
43442     sp_point_1024* pre_p;
43443 #else
43444     sp_digit t[6 * 2 * 32];
43445     sp_point_1024 pre_p[16];
43446     sp_point_1024 pd;
43447     sp_point_1024 cd;
43448     sp_point_1024 negd;
43449 #endif
43450     sp_point_1024* p = NULL;
43451     sp_point_1024* c = NULL;
43452     sp_point_1024* neg = NULL;
43453     int i;
43454     int j;
43455     int k;
43456     sp_table_entry_1024* precomp = (sp_table_entry_1024*)table;
43457 
43458     if (table == NULL) {
43459         *len = sizeof(sp_table_entry_1024) * 1167;
43460         err = LENGTH_ONLY_E;
43461     }
43462 
43463     if ((err == MP_OKAY) &&
43464             (*len < (int)(sizeof(sp_table_entry_1024) * 1167))) {
43465         err = BUFFER_E;
43466     }
43467 
43468     if (err == MP_OKAY) {
43469         err = sp_1024_point_new_32(NULL, pd, p);
43470     }
43471     if (err == MP_OKAY) {
43472         err = sp_1024_point_new_32(NULL, cd, c);
43473     }
43474     if (err == MP_OKAY) {
43475         err = sp_1024_point_new_32(NULL, negd, neg);
43476     }
43477 
43478 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43479     !defined(WOLFSSL_SP_NO_MALLOC)
43480     if (err == MP_OKAY) {
43481         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 6 * 32 * 2 + 16 * sizeof(sp_point_1024), NULL,
43482                                 DYNAMIC_TYPE_TMP_BUFFER);
43483         if (td == NULL) {
43484             err = MEMORY_E;
43485         }
43486     }
43487 #endif
43488 
43489     if (err == MP_OKAY) {
43490 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43491     !defined(WOLFSSL_SP_NO_MALLOC)
43492         t     = td;
43493         pre_p = (sp_point_1024*)(td + 6 * 32 * 2);
43494 #endif
43495 
43496         sp_1024_point_from_ecc_point_32(p, pm);
43497 
43498         err = sp_1024_mod_mul_norm_32(p->x, p->x, p1024_mod);
43499     }
43500     if (err == MP_OKAY) {
43501         err = sp_1024_mod_mul_norm_32(p->y, p->y, p1024_mod);
43502     }
43503     if (err == MP_OKAY) {
43504         XMEMCPY(p->z, p1024_norm_mod, sizeof(p1024_norm_mod));
43505         neg->infinity = 0;
43506         c->infinity = 0;
43507 
43508         /* Generate pre-computation table: 1, 3, ... , 31 */
43509         XMEMCPY(&pre_p[0], p, sizeof(sp_point_1024));
43510         /* [2]P for adding */
43511         sp_1024_proj_point_dbl_32(c, p, t);
43512 
43513         /* 1, 3, ... */
43514         for (i = 1; i < 16; i++) {
43515             sp_1024_proj_point_add_32(&pre_p[i], &pre_p[i-1], c, t);
43516             sp_1024_mont_map_32(&pre_p[i], t);
43517         }
43518 
43519         k = 0;
43520         j = sp_1024_order_op_pre[0] / 2;
43521         XMEMCPY(c, &pre_p[j], sizeof(sp_point_1024));
43522 
43523         for (j = 0; j < sp_1024_order_op_pre[1]; j++) {
43524             sp_1024_accum_dbl_calc_lc_32(precomp[k].x, precomp[k].y, c->x, c->y, t);
43525             k++;
43526             sp_1024_proj_point_dbl_32(c, c, t);
43527             sp_1024_mont_map_32(c, t);
43528         }
43529 
43530         for (i = 2; i < 290; i += 2) {
43531             j = sp_1024_order_op_pre[i];
43532             if (j > 0) {
43533                 sp_1024_accum_add_calc_lc_32(precomp[k].x, precomp[k].y,
43534                     pre_p[j/2].x, pre_p[j/2].y, c->x, c->y, t);
43535                 k++;
43536                 sp_1024_proj_point_add_32(c, c, &pre_p[j/2], t);
43537                 sp_1024_mont_map_32(c, t);
43538             }
43539             else {
43540                 XMEMCPY(neg->x, pre_p[-j / 2].x, sizeof(pre_p->x));
43541                 sp_1024_mont_sub_32(neg->y, p1024_mod, pre_p[-j / 2].y,
43542                         p1024_mod);
43543                 XMEMCPY(neg->z, pre_p[-j / 2].z, sizeof(pre_p->z));
43544 
43545                 sp_1024_accum_add_calc_lc_32(precomp[k].x, precomp[k].y,
43546                     neg->x, neg->y, c->x, c->y, t);
43547                 k++;
43548                 sp_1024_proj_point_add_32(c, c, neg, t);
43549                 sp_1024_mont_map_32(c, t);
43550             }
43551 
43552             for (j = 0; j < sp_1024_order_op_pre[i + 1]; j++) {
43553                 sp_1024_accum_dbl_calc_lc_32(precomp[k].x, precomp[k].y, c->x, c->y, t);
43554                 k++;
43555                 sp_1024_proj_point_dbl_32(c, c, t);
43556                 sp_1024_mont_map_32(c, t);
43557             }
43558         }
43559 
43560         *len = sizeof(sp_table_entry_1024) * 1167;
43561     }
43562 
43563 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43564     !defined(WOLFSSL_SP_NO_MALLOC)
43565     if (td != NULL) {
43566         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
43567     }
43568 #endif
43569     sp_1024_point_free_32(neg, 1, NULL);
43570     sp_1024_point_free_32(c, 1, NULL);
43571     sp_1024_point_free_32(p, 1, NULL);
43572     return err;
43573 }
43574 
43575 /*
43576  * Calculate r = pairing <P, Q>.
43577  *
43578  * That is, multiply base in PF_p[q] by the scalar s, such that s.P = Q.
43579  *
43580  * Sliding window. Start at bottom and stop when bottom bit is one.
43581  * Subtract if top bit in window is one.
43582  * Width of 6 bits.
43583  * Pre-generate values in window (1, 3, ...) - only V.
43584  * Table contains all gradient l and a constant for each point on the path.
43585  *
43586  * @param  [in]  pm     First point on E(F_p)[q].
43587  * @param  [in]  qm     Second point on E(F_p)[q].
43588  * @param  [in]  res    Result of calculation.
43589  * @param  [in]  table  Precomputed table of values.
43590  * @param  [in]  len    Length of precomputed table of values in bytes.
43591  * @return  0 on success.
43592  * @return  MEMORY_E when dynamic memory allocation fails.
43593  */
sp_Pairing_precomp_1024(const ecc_point * pm,const ecc_point * qm,mp_int * res,const byte * table,word32 len)43594 int sp_Pairing_precomp_1024(const ecc_point* pm, const ecc_point* qm,
43595     mp_int* res, const byte* table, word32 len)
43596 {
43597     int err = 0;
43598 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43599     !defined(WOLFSSL_SP_NO_MALLOC)
43600     sp_digit* td = NULL;
43601     sp_digit* t;
43602     sp_digit* vx;
43603     sp_digit* vy;
43604     sp_digit (*pre_vx)[64];
43605     sp_digit (*pre_vy)[64];
43606     sp_digit (*pre_nvy)[64];
43607 #else
43608     sp_digit t[6 * 2 * 32];
43609     sp_digit vx[2 * 32];
43610     sp_digit vy[2 * 32];
43611     sp_digit pre_vx[16][64];
43612     sp_digit pre_vy[16][64];
43613     sp_digit pre_nvy[16][64];
43614     sp_point_1024 pd;
43615     sp_point_1024 qd;
43616     sp_point_1024 cd;
43617 #endif
43618     sp_point_1024* p = NULL;
43619     sp_point_1024* q = NULL;
43620     sp_point_1024* c = NULL;
43621     sp_digit* r = NULL;
43622     int i;
43623     int j;
43624     int k;
43625     const sp_table_entry_1024* precomp = (const sp_table_entry_1024*)table;
43626 
43627     if (len < (int)(sizeof(sp_table_entry_1024) * 1167)) {
43628         err = BUFFER_E;
43629     }
43630 
43631     if (err == MP_OKAY) {
43632         err = sp_1024_point_new_32(NULL, pd, p);
43633     }
43634     if (err == MP_OKAY) {
43635         err = sp_1024_point_new_32(NULL, qd, q);
43636     }
43637     if (err == MP_OKAY) {
43638         err = sp_1024_point_new_32(NULL, cd, c);
43639     }
43640 
43641 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43642     !defined(WOLFSSL_SP_NO_MALLOC)
43643     if (err == MP_OKAY) {
43644         td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 56 * 32 * 2, NULL,
43645                                 DYNAMIC_TYPE_TMP_BUFFER);
43646         if (td == NULL) {
43647             err = MEMORY_E;
43648         }
43649     }
43650 #endif
43651 
43652     if (err == MP_OKAY) {
43653 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43654     !defined(WOLFSSL_SP_NO_MALLOC)
43655         t       = td;
43656         vx      = td + 6 * 32 * 2;
43657         vy      = td + 7 * 32 * 2;
43658         pre_vx  = (sp_digit(*)[64])(td + 8 * 32 * 2);
43659         pre_vy  = (sp_digit(*)[64])(td + 24 * 32 * 2);
43660         pre_nvy = (sp_digit(*)[64])(td + 40 * 32 * 2);
43661 #endif
43662         r = vy;
43663 
43664         sp_1024_point_from_ecc_point_32(p, pm);
43665         sp_1024_point_from_ecc_point_32(q, qm);
43666 
43667         err = sp_1024_mod_mul_norm_32(p->x, p->x, p1024_mod);
43668     }
43669     if (err == MP_OKAY) {
43670         err = sp_1024_mod_mul_norm_32(p->y, p->y, p1024_mod);
43671     }
43672     if (err == MP_OKAY) {
43673         err = sp_1024_mod_mul_norm_32(p->z, p->z, p1024_mod);
43674     }
43675     if (err == MP_OKAY) {
43676         err = sp_1024_mod_mul_norm_32(q->x, q->x, p1024_mod);
43677     }
43678     if (err == MP_OKAY) {
43679         err = sp_1024_mod_mul_norm_32(q->y, q->y, p1024_mod);
43680     }
43681     if (err == MP_OKAY) {
43682         /* Generate pre-computation table: 1, 3, ... , 31 */
43683         XMEMSET(pre_vx[0], 0, sizeof(sp_digit) * 2 * 32);
43684         pre_vx[0][0] = 1;
43685         XMEMSET(pre_vy[0], 0, sizeof(sp_digit) * 2 * 32);
43686         sp_1024_mont_sub_32(pre_nvy[0], p1024_mod, pre_vy[0], p1024_mod);
43687 
43688         /* [2]P for adding */
43689         XMEMCPY(c, p, sizeof(sp_point_1024));
43690         XMEMSET(vx, 0, sizeof(sp_digit) * 2 * 32);
43691         vx[0] = 1;
43692         XMEMSET(vy, 0, sizeof(sp_digit) * 2 * 32);
43693         sp_1024_accumulate_line_dbl_32(vx, vy, c, q, t);
43694 
43695         /* 3, 5, ... */
43696         for (i = 1; i < 16; i++) {
43697             XMEMCPY(pre_vx[i], pre_vx[i-1], sizeof(sp_digit) * 2 * 32);
43698             XMEMCPY(pre_vy[i], pre_vy[i-1], sizeof(sp_digit) * 2 * 32);
43699             sp_1024_proj_mul_32(pre_vx[i], pre_vy[i], vx, vy, t);
43700             sp_1024_accumulate_line_add_n_32(pre_vx[i], pre_vy[i], c,
43701                 q, p, t, 0);
43702             sp_1024_mont_sub_32(pre_nvy[i], p1024_mod, pre_vy[i],
43703                 p1024_mod);
43704         }
43705 
43706         XMEMCPY(c->z, p1024_norm_mod, sizeof(sp_digit) * 32);
43707         c->infinity = 0;
43708         j = sp_1024_order_op_pre[0] / 2;
43709         XMEMCPY(vx, pre_vx[j], sizeof(sp_digit) * 2 * 32);
43710         XMEMCPY(vy, pre_vy[j], sizeof(sp_digit) * 2 * 32);
43711 
43712         k = 0;
43713         for (j = 0; j < sp_1024_order_op_pre[1]; j++) {
43714             /* Accumulate line into v and double point. */
43715             sp_1024_accumulate_line_lc_32(vx, vy, precomp[k].x,
43716                 precomp[k].y, q, t, 1);
43717             k++;
43718         }
43719 
43720         for (i = 2; i < 290; i += 2) {
43721             sp_1024_accumulate_line_lc_32(vx, vy, precomp[k].x,
43722                 precomp[k].y, q, t, 0);
43723             k++;
43724 
43725             j = sp_1024_order_op_pre[i];
43726             if (j > 0) {
43727                 j /= 2;
43728                 /* Accumulate line into v. */
43729                 sp_1024_proj_mul_32(vx, vy, pre_vx[j], pre_vy[j], t);
43730             }
43731             else {
43732                 j = -j / 2;
43733                 /* Accumulate line into v. */
43734                 sp_1024_proj_mul_32(vx, vy, pre_vx[j], pre_nvy[j], t);
43735             }
43736 
43737             for (j = 0; j < sp_1024_order_op_pre[i + 1]; j++) {
43738                 /* Accumulate line into v and double point. */
43739                 sp_1024_accumulate_line_lc_32(vx, vy, precomp[k].x,
43740                     precomp[k].y, q, t, 1);
43741                 k++;
43742             }
43743         }
43744 
43745         /* Final exponentiation */
43746         sp_1024_proj_sqr_32(vx, vy, t);
43747         sp_1024_proj_sqr_32(vx, vy, t);
43748 
43749         /* Convert from PF_p[q] to F_p */
43750         sp_1024_mont_inv_32(vx, vx, t);
43751         sp_1024_mont_mul_32(r, vx, vy, p1024_mod, p1024_mp_mod);
43752         XMEMSET(r + 32, 0, sizeof(sp_digit) * 32);
43753         sp_1024_mont_reduce_32(r, p1024_mod, p1024_mp_mod);
43754 
43755         err = sp_1024_to_mp(r, res);
43756     }
43757 
43758 #if (defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK)) && \
43759     !defined(WOLFSSL_SP_NO_MALLOC)
43760     if (td != NULL) {
43761         XFREE(td, NULL, DYNAMIC_TYPE_TMP_BUFFER);
43762     }
43763 #endif
43764     sp_1024_point_free_32(c, 1, NULL);
43765     sp_1024_point_free_32(q, 1, NULL);
43766     sp_1024_point_free_32(p, 1, NULL);
43767     return err;
43768 }
43769 
43770 #endif /* WOLFSSL_SP_SMALL */
43771 /* Returns 1 if the number of zero.
43772  * Implementation is constant time.
43773  *
43774  * a  Number to check.
43775  * returns 1 if the number is zero and 0 otherwise.
43776  */
sp_1024_iszero_32(const sp_digit * a)43777 static int sp_1024_iszero_32(const sp_digit* a)
43778 {
43779     return (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7] |
43780             a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15] |
43781             a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23] |
43782             a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]) == 0;
43783 }
43784 
43785 #ifdef HAVE_ECC_CHECK_KEY
43786 /* Read big endian unsigned byte array into r.
43787  *
43788  * r  A single precision integer.
43789  * size  Maximum number of bytes to convert
43790  * a  Byte array.
43791  * n  Number of bytes in array to read.
43792  */
sp_1024_from_bin(sp_digit * r,int size,const byte * a,int n)43793 static void sp_1024_from_bin(sp_digit* r, int size, const byte* a, int n)
43794 {
43795     int i;
43796     int j = 0;
43797     word32 s = 0;
43798 
43799     r[0] = 0;
43800     for (i = n-1; i >= 0; i--) {
43801         r[j] |= (((sp_digit)a[i]) << s);
43802         if (s >= 24U) {
43803             r[j] &= 0xffffffff;
43804             s = 32U - s;
43805             if (j + 1 >= size) {
43806                 break;
43807             }
43808             r[++j] = (sp_digit)a[i] >> s;
43809             s = 8U - s;
43810         }
43811         else {
43812             s += 8U;
43813         }
43814     }
43815 
43816     for (j++; j < size; j++) {
43817         r[j] = 0;
43818     }
43819 }
43820 
43821 /* Check that the x and y oridinates are a valid point on the curve.
43822  *
43823  * point  EC point.
43824  * heap   Heap to use if dynamically allocating.
43825  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
43826  * not on the curve and MP_OKAY otherwise.
43827  */
sp_1024_ecc_is_point_32(const sp_point_1024 * point,void * heap)43828 static int sp_1024_ecc_is_point_32(const sp_point_1024* point,
43829     void* heap)
43830 {
43831 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
43832     sp_digit* t1 = NULL;
43833 #else
43834     sp_digit t1[32 * 4];
43835 #endif
43836     sp_digit* t2 = NULL;
43837     sp_int32 n;
43838     int err = MP_OKAY;
43839 
43840 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
43841     t1 = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 4, heap, DYNAMIC_TYPE_ECC);
43842     if (t1 == NULL)
43843         err = MEMORY_E;
43844 #endif
43845     (void)heap;
43846 
43847     if (err == MP_OKAY) {
43848         t2 = t1 + 2 * 32;
43849 
43850         sp_1024_sqr_32(t1, point->y);
43851         (void)sp_1024_mod_32(t1, t1, p1024_mod);
43852         sp_1024_sqr_32(t2, point->x);
43853         (void)sp_1024_mod_32(t2, t2, p1024_mod);
43854         sp_1024_mul_32(t2, t2, point->x);
43855         (void)sp_1024_mod_32(t2, t2, p1024_mod);
43856         (void)sp_1024_sub_32(t2, p1024_mod, t2);
43857         sp_1024_mont_add_32(t1, t1, t2, p1024_mod);
43858 
43859         sp_1024_mont_add_32(t1, t1, point->x, p1024_mod);
43860         sp_1024_mont_add_32(t1, t1, point->x, p1024_mod);
43861         sp_1024_mont_add_32(t1, t1, point->x, p1024_mod);
43862 
43863         n = sp_1024_cmp_32(t1, p1024_mod);
43864         sp_1024_cond_sub_32(t1, t1, p1024_mod, 0 - ((n >= 0) ?
43865             (sp_digit)1 : (sp_digit)0));
43866         sp_1024_norm_32(t1);
43867         if (!sp_1024_iszero_32(t1)) {
43868             err = MP_VAL;
43869         }
43870     }
43871 
43872 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
43873     if (t1 != NULL)
43874         XFREE(t1, heap, DYNAMIC_TYPE_ECC);
43875 #endif
43876 
43877     return err;
43878 }
43879 
43880 /* Check that the x and y oridinates are a valid point on the curve.
43881  *
43882  * pX  X ordinate of EC point.
43883  * pY  Y ordinate of EC point.
43884  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
43885  * not on the curve and MP_OKAY otherwise.
43886  */
sp_ecc_is_point_1024(const mp_int * pX,const mp_int * pY)43887 int sp_ecc_is_point_1024(const mp_int* pX, const mp_int* pY)
43888 {
43889 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
43890     sp_point_1024* pub = NULL;
43891 #else
43892     sp_point_1024 pub[1];
43893 #endif
43894     const byte one[1] = { 1 };
43895     int err = MP_OKAY;
43896 
43897 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
43898     pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024), NULL,
43899                                        DYNAMIC_TYPE_ECC);
43900     if (pub == NULL)
43901         err = MEMORY_E;
43902 #endif
43903 
43904     if (err == MP_OKAY) {
43905         sp_1024_from_mp(pub->x, 32, pX);
43906         sp_1024_from_mp(pub->y, 32, pY);
43907         sp_1024_from_bin(pub->z, 32, one, (int)sizeof(one));
43908 
43909         err = sp_1024_ecc_is_point_32(pub, NULL);
43910     }
43911 
43912 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
43913     if (pub != NULL)
43914         XFREE(pub, NULL, DYNAMIC_TYPE_ECC);
43915 #endif
43916 
43917     return err;
43918 }
43919 
43920 /* Check that the private scalar generates the EC point (px, py), the point is
43921  * on the curve and the point has the correct order.
43922  *
43923  * pX     X ordinate of EC point.
43924  * pY     Y ordinate of EC point.
43925  * privm  Private scalar that generates EC point.
43926  * returns MEMORY_E if dynamic memory allocation fails, MP_VAL if the point is
43927  * not on the curve, ECC_INF_E if the point does not have the correct order,
43928  * ECC_PRIV_KEY_E when the private scalar doesn't generate the EC point and
43929  * MP_OKAY otherwise.
43930  */
sp_ecc_check_key_1024(const mp_int * pX,const mp_int * pY,const mp_int * privm,void * heap)43931 int sp_ecc_check_key_1024(const mp_int* pX, const mp_int* pY,
43932     const mp_int* privm, void* heap)
43933 {
43934 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
43935     sp_digit* priv = NULL;
43936     sp_point_1024* pub = NULL;
43937 #else
43938     sp_digit priv[32];
43939     sp_point_1024 pub[2];
43940 #endif
43941     sp_point_1024* p = NULL;
43942     const byte one[1] = { 1 };
43943     int err = MP_OKAY;
43944 
43945 
43946     /* Quick check the lengs of public key ordinates and private key are in
43947      * range. Proper check later.
43948      */
43949     if (((mp_count_bits(pX) > 1024) ||
43950         (mp_count_bits(pY) > 1024) ||
43951         ((privm != NULL) && (mp_count_bits(privm) > 1024)))) {
43952         err = ECC_OUT_OF_RANGE_E;
43953     }
43954 
43955 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
43956     if (err == MP_OKAY) {
43957         pub = (sp_point_1024*)XMALLOC(sizeof(sp_point_1024) * 2, heap,
43958                                            DYNAMIC_TYPE_ECC);
43959         if (pub == NULL)
43960             err = MEMORY_E;
43961     }
43962     if (err == MP_OKAY && privm) {
43963         priv = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32, heap,
43964                                   DYNAMIC_TYPE_ECC);
43965         if (priv == NULL)
43966             err = MEMORY_E;
43967     }
43968 #endif
43969 
43970     if (err == MP_OKAY) {
43971         p = pub + 1;
43972 
43973         sp_1024_from_mp(pub->x, 32, pX);
43974         sp_1024_from_mp(pub->y, 32, pY);
43975         sp_1024_from_bin(pub->z, 32, one, (int)sizeof(one));
43976         if (privm)
43977             sp_1024_from_mp(priv, 32, privm);
43978 
43979         /* Check point at infinitiy. */
43980         if ((sp_1024_iszero_32(pub->x) != 0) &&
43981             (sp_1024_iszero_32(pub->y) != 0)) {
43982             err = ECC_INF_E;
43983         }
43984     }
43985 
43986     /* Check range of X and Y */
43987     if ((err == MP_OKAY) &&
43988             ((sp_1024_cmp_32(pub->x, p1024_mod) >= 0) ||
43989              (sp_1024_cmp_32(pub->y, p1024_mod) >= 0))) {
43990         err = ECC_OUT_OF_RANGE_E;
43991     }
43992 
43993     if (err == MP_OKAY) {
43994         /* Check point is on curve */
43995         err = sp_1024_ecc_is_point_32(pub, heap);
43996     }
43997 
43998     if (err == MP_OKAY) {
43999         /* Point * order = infinity */
44000             err = sp_1024_ecc_mulmod_32(p, pub, p1024_order, 1, 1, heap);
44001     }
44002     /* Check result is infinity */
44003     if ((err == MP_OKAY) && ((sp_1024_iszero_32(p->x) == 0) ||
44004                              (sp_1024_iszero_32(p->y) == 0))) {
44005         err = ECC_INF_E;
44006     }
44007 
44008     if (privm) {
44009         if (err == MP_OKAY) {
44010             /* Base * private = point */
44011                 err = sp_1024_ecc_mulmod_base_32(p, priv, 1, 1, heap);
44012         }
44013         /* Check result is public key */
44014         if ((err == MP_OKAY) &&
44015                 ((sp_1024_cmp_32(p->x, pub->x) != 0) ||
44016                  (sp_1024_cmp_32(p->y, pub->y) != 0))) {
44017             err = ECC_PRIV_KEY_E;
44018         }
44019     }
44020 
44021 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
44022     if (pub != NULL)
44023         XFREE(pub, heap, DYNAMIC_TYPE_ECC);
44024     if (priv != NULL)
44025         XFREE(priv, heap, DYNAMIC_TYPE_ECC);
44026 #endif
44027 
44028     return err;
44029 }
44030 #endif
44031 #endif /* WOLFSSL_SP_1024 */
44032 #endif /* WOLFSSL_HAVE_SP_ECC */
44033 #endif /* WOLFSSL_SP_ARM_CORTEX_M_ASM */
44034 #endif /* WOLFSSL_HAVE_SP_RSA | WOLFSSL_HAVE_SP_DH | WOLFSSL_HAVE_SP_ECC */
44035