1 /* sp_int.c
2 *
3 * Copyright (C) 2006-2021 wolfSSL Inc.
4 *
5 * This file is part of wolfSSL.
6 *
7 * wolfSSL is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * wolfSSL is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
20 */
21
22 /* Implementation by Sean Parkinson. */
23
24 /*
25 DESCRIPTION
26 This library provides single precision (SP) integer math functions.
27
28 */
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32
33 #include <wolfssl/wolfcrypt/settings.h>
34
35 #if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
36
37 #include <wolfssl/wolfcrypt/error-crypt.h>
38 #ifdef NO_INLINE
39 #include <wolfssl/wolfcrypt/misc.h>
40 #else
41 #define WOLFSSL_MISC_INCLUDED
42 #include <wolfcrypt/src/misc.c>
43 #endif
44
45 /* SP Build Options:
46 * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
47 * WOLFSSL_HAVE_SP_DH: Enable SP DH support
48 * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
49 * WOLFSSL_SP_MATH: Use only single precision math and algorithms
50 * it supports (no fastmath tfm.c or normal integer.c)
51 * WOLFSSL_SP_MATH_ALL Implementation of all MP functions
52 * (replacement for tfm.c and integer.c)
53 * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large
54 * stack variables
55 * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
56 * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
57 * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
58 * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
59 * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
60 * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
61 * WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
62 * WOLFSSL_SP_X86_64_ASM Enable Intel x64 assembly implementation
63 * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly implementation
64 * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly implementation
65 * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly implementation
66 * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly implementation
67 * (used with -mthumb)
68 * WOLFSSL_SP_X86_64 Enable Intel x86 64-bit assembly speedups
69 * WOLFSSL_SP_X86 Enable Intel x86 assembly speedups
70 * WOLFSSL_SP_PPC64 Enable PPC64 assembly speedups
71 * WOLFSSL_SP_PPC Enable PPC assembly speedups
72 * WOLFSSL_SP_MIPS64 Enable MIPS64 assembly speedups
73 * WOLFSSL_SP_MIPS Enable MIPS assembly speedups
74 * WOLFSSL_SP_RISCV64 Enable RISCV64 assembly speedups
75 * WOLFSSL_SP_RISCV32 Enable RISCV32 assembly speedups
76 * WOLFSSL_SP_S390X Enable S390X assembly speedups
77 * SP_WORD_SIZE Force 32 or 64 bit mode
78 * WOLFSSL_SP_NONBLOCK Enables "non blocking" mode for SP math, which
79 * will return FP_WOULDBLOCK for long operations and function must be
80 * called again until complete.
81 * WOLFSSL_SP_FAST_NCT_EXPTMOD Enables the faster non-constant time modular
82 * exponentation implementation.
83 * WOLFSSL_SP_INT_NEGATIVE Enables negative values to be used.
84 * WOLFSSL_SP_INT_DIGIT_ALIGN Enable when unaligned access of sp_int_digit
85 * pointer is not allowed.
86 * WOLFSSL_SP_NO_DYN_STACK Disable use of dynamic stack items.
87 * Used with small code size and not small stack.
88 * WOLFSSL_SP_FAST_MODEXP Allow fast mod_exp with small C code
89 */
90
91 #include <wolfssl/wolfcrypt/sp_int.h>
92
93 /* DECL_SP_INT: Declare one variable of type 'sp_int'. */
94 #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
95 !defined(WOLFSSL_SP_NO_MALLOC)
96 /* Declare a variable that will be assigned a value on XMALLOC. */
97 #define DECL_SP_INT(n, s) \
98 sp_int* n = NULL
99 #else
100 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
101 defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
102 /* Declare a variable on the stack with the required data size. */
103 #define DECL_SP_INT(n, s) \
104 byte n##d[MP_INT_SIZEOF(s)]; \
105 sp_int* n = (sp_int*)n##d
106 #else
107 /* Declare a variable on the stack. */
108 #define DECL_SP_INT(n, s) \
109 sp_int n[1]
110 #endif
111 #endif
112
113 /* ALLOC_SP_INT: Allocate an 'sp_int' of reqired size. */
114 #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
115 !defined(WOLFSSL_SP_NO_MALLOC)
116 /* Dynamically allocate just enough data to support size. */
117 #define ALLOC_SP_INT(n, s, err, h) \
118 do { \
119 if (err == MP_OKAY) { \
120 n = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), h, DYNAMIC_TYPE_BIGINT); \
121 if (n == NULL) { \
122 err = MP_MEM; \
123 } \
124 } \
125 } \
126 while (0)
127
128 /* Dynamically allocate just enough data to support size - and set size. */
129 #define ALLOC_SP_INT_SIZE(n, s, err, h) \
130 do { \
131 ALLOC_SP_INT(n, s, err, h); \
132 if (err == MP_OKAY) { \
133 n->size = s; \
134 } \
135 } \
136 while (0)
137 #else
138 /* Array declared on stack - nothing to do. */
139 #define ALLOC_SP_INT(n, s, err, h)
140 /* Array declared on stack - set the size field. */
141 #define ALLOC_SP_INT_SIZE(n, s, err, h) \
142 n->size = s;
143 #endif
144
145 /* FREE_SP_INT: Free an 'sp_int' variable. */
146 #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
147 !defined(WOLFSSL_SP_NO_MALLOC)
148 /* Free dynamically allocated data. */
149 #define FREE_SP_INT(n, h) \
150 do { \
151 if (n != NULL) { \
152 XFREE(n, h, DYNAMIC_TYPE_BIGINT); \
153 } \
154 } \
155 while (0)
156 #else
157 /* Nothing to do as declared on stack. */
158 #define FREE_SP_INT(n, h)
159 #endif
160
161
162 /* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
163 #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
164 !defined(WOLFSSL_SP_NO_MALLOC)
165 /* Declare a variable that will be assigned a value on XMALLOC. */
166 #define DECL_SP_INT_ARRAY(n, s, c) \
167 sp_int* n##d = NULL; \
168 sp_int* n[c] = { NULL, }
169 #else
170 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
171 defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
172 /* Declare a variable on the stack with the required data size. */
173 #define DECL_SP_INT_ARRAY(n, s, c) \
174 byte n##d[MP_INT_SIZEOF(s) * (c)]; \
175 sp_int* n[c]
176 #else
177 /* Declare a variable on the stack. */
178 #define DECL_SP_INT_ARRAY(n, s, c) \
179 sp_int n##d[c]; \
180 sp_int* n[c]
181 #endif
182 #endif
183
184 /* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of reqired size. */
185 #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
186 !defined(WOLFSSL_SP_NO_MALLOC)
187 /* Dynamically allocate just enough data to support multiple sp_ints of the
188 * required size. Use pointers into data to make up array and set sizes.
189 */
190 #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
191 do { \
192 if (err == MP_OKAY) { \
193 n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), h, \
194 DYNAMIC_TYPE_BIGINT); \
195 if (n##d == NULL) { \
196 err = MP_MEM; \
197 } \
198 else { \
199 int n##ii; \
200 n[0] = n##d; \
201 n[0]->size = s; \
202 for (n##ii = 1; n##ii < (c); n##ii++) { \
203 n[n##ii] = MP_INT_NEXT(n[n##ii-1], s); \
204 n[n##ii]->size = s; \
205 } \
206 } \
207 } \
208 } \
209 while (0)
210 #else
211 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
212 defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
213 /* Data declared on stack that supports multiple sp_ints of the
214 * required size. Use pointers into data to make up array and set sizes.
215 */
216 #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
217 do { \
218 if (err == MP_OKAY) { \
219 int n##ii; \
220 n[0] = (sp_int*)n##d; \
221 n[0]->size = s; \
222 for (n##ii = 1; n##ii < (c); n##ii++) { \
223 n[n##ii] = MP_INT_NEXT(n[n##ii-1], s); \
224 n[n##ii]->size = s; \
225 } \
226 } \
227 } \
228 while (0)
229 #else
230 /* Data declared on stack that supports multiple sp_ints of the
231 * required size. Set into array and set sizes.
232 */
233 #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
234 do { \
235 if (err == MP_OKAY) { \
236 int n##ii; \
237 for (n##ii = 0; n##ii < (c); n##ii++) { \
238 n[n##ii] = &n##d[n##ii]; \
239 n[n##ii]->size = s; \
240 } \
241 } \
242 } \
243 while (0)
244 #endif
245 #endif
246
247 /* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
248 #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
249 !defined(WOLFSSL_SP_NO_MALLOC)
250 /* Free data variable that was dynamically allocated. */
251 #define FREE_SP_INT_ARRAY(n, h) \
252 do { \
253 if (n##d != NULL) { \
254 XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \
255 } \
256 } \
257 while (0)
258 #else
259 /* Nothing to do as data declared on stack. */
260 #define FREE_SP_INT_ARRAY(n, h)
261 #endif
262
263
264 #ifndef WOLFSSL_NO_ASM
265 #ifdef __IAR_SYSTEMS_ICC__
266 #define __asm__ asm
267 #define __volatile__ volatile
268 #endif /* __IAR_SYSTEMS_ICC__ */
269 #ifdef __KEIL__
270 #define __asm__ __asm
271 #define __volatile__ volatile
272 #endif
273
274 #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
275 /*
276 * CPU: x86_64
277 */
278
279 /* Multiply va by vb and store double size result in: vh | vl */
280 #define SP_ASM_MUL(vl, vh, va, vb) \
281 __asm__ __volatile__ ( \
282 "movq %[b], %%rax \n\t" \
283 "mulq %[a] \n\t" \
284 "movq %%rax, %[l] \n\t" \
285 "movq %%rdx, %[h] \n\t" \
286 : [h] "+r" (vh), [l] "+r" (vl) \
287 : [a] "m" (va), [b] "m" (vb) \
288 : "memory", "%rax", "%rdx", "cc" \
289 )
290 /* Multiply va by vb and store double size result in: vo | vh | vl */
291 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
292 __asm__ __volatile__ ( \
293 "movq %[b], %%rax \n\t" \
294 "mulq %[a] \n\t" \
295 "movq $0 , %[o] \n\t" \
296 "movq %%rax, %[l] \n\t" \
297 "movq %%rdx, %[h] \n\t" \
298 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
299 : [a] "m" (va), [b] "m" (vb) \
300 : "%rax", "%rdx", "cc" \
301 )
302 /* Multiply va by vb and add double size result into: vo | vh | vl */
303 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
304 __asm__ __volatile__ ( \
305 "movq %[b], %%rax \n\t" \
306 "mulq %[a] \n\t" \
307 "addq %%rax, %[l] \n\t" \
308 "adcq %%rdx, %[h] \n\t" \
309 "adcq $0 , %[o] \n\t" \
310 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
311 : [a] "m" (va), [b] "m" (vb) \
312 : "%rax", "%rdx", "cc" \
313 )
314 /* Multiply va by vb and add double size result into: vh | vl */
315 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
316 __asm__ __volatile__ ( \
317 "movq %[b], %%rax \n\t" \
318 "mulq %[a] \n\t" \
319 "addq %%rax, %[l] \n\t" \
320 "adcq %%rdx, %[h] \n\t" \
321 : [l] "+r" (vl), [h] "+r" (vh) \
322 : [a] "m" (va), [b] "m" (vb) \
323 : "%rax", "%rdx", "cc" \
324 )
325 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
326 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
327 __asm__ __volatile__ ( \
328 "movq %[b], %%rax \n\t" \
329 "mulq %[a] \n\t" \
330 "addq %%rax, %[l] \n\t" \
331 "adcq %%rdx, %[h] \n\t" \
332 "adcq $0 , %[o] \n\t" \
333 "addq %%rax, %[l] \n\t" \
334 "adcq %%rdx, %[h] \n\t" \
335 "adcq $0 , %[o] \n\t" \
336 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
337 : [a] "m" (va), [b] "m" (vb) \
338 : "%rax", "%rdx", "cc" \
339 )
340 /* Multiply va by vb and add double size result twice into: vo | vh | vl
341 * Assumes first add will not overflow vh | vl
342 */
343 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
344 __asm__ __volatile__ ( \
345 "movq %[b], %%rax \n\t" \
346 "mulq %[a] \n\t" \
347 "addq %%rax, %[l] \n\t" \
348 "adcq %%rdx, %[h] \n\t" \
349 "addq %%rax, %[l] \n\t" \
350 "adcq %%rdx, %[h] \n\t" \
351 "adcq $0 , %[o] \n\t" \
352 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
353 : [a] "m" (va), [b] "m" (vb) \
354 : "%rax", "%rdx", "cc" \
355 )
356 /* Square va and store double size result in: vh | vl */
357 #define SP_ASM_SQR(vl, vh, va) \
358 __asm__ __volatile__ ( \
359 "movq %[a], %%rax \n\t" \
360 "mulq %%rax \n\t" \
361 "movq %%rax, %[l] \n\t" \
362 "movq %%rdx, %[h] \n\t" \
363 : [h] "+r" (vh), [l] "+r" (vl) \
364 : [a] "m" (va) \
365 : "memory", "%rax", "%rdx", "cc" \
366 )
367 /* Square va and add double size result into: vo | vh | vl */
368 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
369 __asm__ __volatile__ ( \
370 "movq %[a], %%rax \n\t" \
371 "mulq %%rax \n\t" \
372 "addq %%rax, %[l] \n\t" \
373 "adcq %%rdx, %[h] \n\t" \
374 "adcq $0 , %[o] \n\t" \
375 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
376 : [a] "m" (va) \
377 : "%rax", "%rdx", "cc" \
378 )
379 /* Square va and add double size result into: vh | vl */
380 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
381 __asm__ __volatile__ ( \
382 "movq %[a], %%rax \n\t" \
383 "mulq %%rax \n\t" \
384 "addq %%rax, %[l] \n\t" \
385 "adcq %%rdx, %[h] \n\t" \
386 : [l] "+r" (vl), [h] "+r" (vh) \
387 : [a] "m" (va) \
388 : "%rax", "%rdx", "cc" \
389 )
390 /* Add va into: vh | vl */
391 #define SP_ASM_ADDC(vl, vh, va) \
392 __asm__ __volatile__ ( \
393 "addq %[a], %[l] \n\t" \
394 "adcq $0 , %[h] \n\t" \
395 : [l] "+r" (vl), [h] "+r" (vh) \
396 : [a] "m" (va) \
397 : "cc" \
398 )
399 /* Add va, variable in a register, into: vh | vl */
400 #define SP_ASM_ADDC_REG(vl, vh, va) \
401 __asm__ __volatile__ ( \
402 "addq %[a], %[l] \n\t" \
403 "adcq $0 , %[h] \n\t" \
404 : [l] "+r" (vl), [h] "+r" (vh) \
405 : [a] "r" (va) \
406 : "cc" \
407 )
408 /* Sub va from: vh | vl */
409 #define SP_ASM_SUBC(vl, vh, va) \
410 __asm__ __volatile__ ( \
411 "subq %[a], %[l] \n\t" \
412 "sbbq $0 , %[h] \n\t" \
413 : [l] "+r" (vl), [h] "+r" (vh) \
414 : [a] "m" (va) \
415 : "cc" \
416 )
417 /* Add two times vc | vb | va into vo | vh | vl */
418 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
419 __asm__ __volatile__ ( \
420 "addq %[a], %[l] \n\t" \
421 "adcq %[b], %[h] \n\t" \
422 "adcq %[c], %[o] \n\t" \
423 "addq %[a], %[l] \n\t" \
424 "adcq %[b], %[h] \n\t" \
425 "adcq %[c], %[o] \n\t" \
426 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
427 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
428 : "%rax", "%rdx", "cc" \
429 )
430
431 #ifndef WOLFSSL_SP_DIV_WORD_HALF
432 /* Divide a two digit number by a digit number and return. (hi | lo) / d
433 *
434 * Using divq instruction on Intel x64.
435 *
436 * @param [in] hi SP integer digit. High digit of the dividend.
437 * @param [in] lo SP integer digit. Lower digit of the dividend.
438 * @param [in] d SP integer digit. Number to divide by.
439 * @reutrn The division result.
440 */
sp_div_word(sp_int_digit hi,sp_int_digit lo,sp_int_digit d)441 static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
442 sp_int_digit d)
443 {
444 __asm__ __volatile__ (
445 "divq %2"
446 : "+a" (lo)
447 : "d" (hi), "r" (d)
448 : "cc"
449 );
450 return lo;
451 }
452 #define SP_ASM_DIV_WORD
453 #endif
454
455 #define SP_INT_ASM_AVAILABLE
456
457 #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
458
459 #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
460 /*
461 * CPU: x86
462 */
463
464 /* Multiply va by vb and store double size result in: vh | vl */
465 #define SP_ASM_MUL(vl, vh, va, vb) \
466 __asm__ __volatile__ ( \
467 "movl %[b], %%eax \n\t" \
468 "mull %[a] \n\t" \
469 "movl %%eax, %[l] \n\t" \
470 "movl %%edx, %[h] \n\t" \
471 : [h] "+r" (vh), [l] "+r" (vl) \
472 : [a] "m" (va), [b] "m" (vb) \
473 : "memory", "eax", "edx", "cc" \
474 )
475 /* Multiply va by vb and store double size result in: vo | vh | vl */
476 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
477 __asm__ __volatile__ ( \
478 "movl %[b], %%eax \n\t" \
479 "mull %[a] \n\t" \
480 "movl $0 , %[o] \n\t" \
481 "movl %%eax, %[l] \n\t" \
482 "movl %%edx, %[h] \n\t" \
483 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
484 : [a] "m" (va), [b] "m" (vb) \
485 : "eax", "edx", "cc" \
486 )
487 /* Multiply va by vb and add double size result into: vo | vh | vl */
488 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
489 __asm__ __volatile__ ( \
490 "movl %[b], %%eax \n\t" \
491 "mull %[a] \n\t" \
492 "addl %%eax, %[l] \n\t" \
493 "adcl %%edx, %[h] \n\t" \
494 "adcl $0 , %[o] \n\t" \
495 : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
496 : [a] "r" (va), [b] "r" (vb) \
497 : "eax", "edx", "cc" \
498 )
499 /* Multiply va by vb and add double size result into: vh | vl */
500 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
501 __asm__ __volatile__ ( \
502 "movl %[b], %%eax \n\t" \
503 "mull %[a] \n\t" \
504 "addl %%eax, %[l] \n\t" \
505 "adcl %%edx, %[h] \n\t" \
506 : [l] "+r" (vl), [h] "+r" (vh) \
507 : [a] "m" (va), [b] "m" (vb) \
508 : "eax", "edx", "cc" \
509 )
510 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
511 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
512 __asm__ __volatile__ ( \
513 "movl %[b], %%eax \n\t" \
514 "mull %[a] \n\t" \
515 "addl %%eax, %[l] \n\t" \
516 "adcl %%edx, %[h] \n\t" \
517 "adcl $0 , %[o] \n\t" \
518 "addl %%eax, %[l] \n\t" \
519 "adcl %%edx, %[h] \n\t" \
520 "adcl $0 , %[o] \n\t" \
521 : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
522 : [a] "r" (va), [b] "r" (vb) \
523 : "eax", "edx", "cc" \
524 )
525 /* Multiply va by vb and add double size result twice into: vo | vh | vl
526 * Assumes first add will not overflow vh | vl
527 */
528 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
529 __asm__ __volatile__ ( \
530 "movl %[b], %%eax \n\t" \
531 "mull %[a] \n\t" \
532 "addl %%eax, %[l] \n\t" \
533 "adcl %%edx, %[h] \n\t" \
534 "addl %%eax, %[l] \n\t" \
535 "adcl %%edx, %[h] \n\t" \
536 "adcl $0 , %[o] \n\t" \
537 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
538 : [a] "m" (va), [b] "m" (vb) \
539 : "eax", "edx", "cc" \
540 )
541 /* Square va and store double size result in: vh | vl */
542 #define SP_ASM_SQR(vl, vh, va) \
543 __asm__ __volatile__ ( \
544 "movl %[a], %%eax \n\t" \
545 "mull %%eax \n\t" \
546 "movl %%eax, %[l] \n\t" \
547 "movl %%edx, %[h] \n\t" \
548 : [h] "+r" (vh), [l] "+r" (vl) \
549 : [a] "m" (va) \
550 : "memory", "eax", "edx", "cc" \
551 )
552 /* Square va and add double size result into: vo | vh | vl */
553 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
554 __asm__ __volatile__ ( \
555 "movl %[a], %%eax \n\t" \
556 "mull %%eax \n\t" \
557 "addl %%eax, %[l] \n\t" \
558 "adcl %%edx, %[h] \n\t" \
559 "adcl $0 , %[o] \n\t" \
560 : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
561 : [a] "m" (va) \
562 : "eax", "edx", "cc" \
563 )
564 /* Square va and add double size result into: vh | vl */
565 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
566 __asm__ __volatile__ ( \
567 "movl %[a], %%eax \n\t" \
568 "mull %%eax \n\t" \
569 "addl %%eax, %[l] \n\t" \
570 "adcl %%edx, %[h] \n\t" \
571 : [l] "+r" (vl), [h] "+r" (vh) \
572 : [a] "m" (va) \
573 : "eax", "edx", "cc" \
574 )
575 /* Add va into: vh | vl */
576 #define SP_ASM_ADDC(vl, vh, va) \
577 __asm__ __volatile__ ( \
578 "addl %[a], %[l] \n\t" \
579 "adcl $0 , %[h] \n\t" \
580 : [l] "+r" (vl), [h] "+r" (vh) \
581 : [a] "m" (va) \
582 : "cc" \
583 )
584 /* Add va, variable in a register, into: vh | vl */
585 #define SP_ASM_ADDC_REG(vl, vh, va) \
586 __asm__ __volatile__ ( \
587 "addl %[a], %[l] \n\t" \
588 "adcl $0 , %[h] \n\t" \
589 : [l] "+r" (vl), [h] "+r" (vh) \
590 : [a] "r" (va) \
591 : "cc" \
592 )
593 /* Sub va from: vh | vl */
594 #define SP_ASM_SUBC(vl, vh, va) \
595 __asm__ __volatile__ ( \
596 "subl %[a], %[l] \n\t" \
597 "sbbl $0 , %[h] \n\t" \
598 : [l] "+r" (vl), [h] "+r" (vh) \
599 : [a] "m" (va) \
600 : "cc" \
601 )
602 /* Add two times vc | vb | va into vo | vh | vl */
603 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
604 __asm__ __volatile__ ( \
605 "addl %[a], %[l] \n\t" \
606 "adcl %[b], %[h] \n\t" \
607 "adcl %[c], %[o] \n\t" \
608 "addl %[a], %[l] \n\t" \
609 "adcl %[b], %[h] \n\t" \
610 "adcl %[c], %[o] \n\t" \
611 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
612 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
613 : "cc" \
614 )
615
616 #ifndef WOLFSSL_SP_DIV_WORD_HALF
617 /* Divide a two digit number by a digit number and return. (hi | lo) / d
618 *
619 * Using divl instruction on Intel x64.
620 *
621 * @param [in] hi SP integer digit. High digit of the dividend.
622 * @param [in] lo SP integer digit. Lower digit of the dividend.
623 * @param [in] d SP integer digit. Number to divide by.
624 * @reutrn The division result.
625 */
sp_div_word(sp_int_digit hi,sp_int_digit lo,sp_int_digit d)626 static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
627 sp_int_digit d)
628 {
629 __asm__ __volatile__ (
630 "divl %2"
631 : "+a" (lo)
632 : "d" (hi), "r" (d)
633 : "cc"
634 );
635 return lo;
636 }
637 #define SP_ASM_DIV_WORD
638 #endif
639
640 #define SP_INT_ASM_AVAILABLE
641
642 #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
643
644 #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
645 /*
646 * CPU: Aarch64
647 */
648
649 /* Multiply va by vb and store double size result in: vh | vl */
650 #define SP_ASM_MUL(vl, vh, va, vb) \
651 __asm__ __volatile__ ( \
652 "mul %[l], %[a], %[b] \n\t" \
653 "umulh %[h], %[a], %[b] \n\t" \
654 : [h] "+r" (vh), [l] "+r" (vl) \
655 : [a] "r" (va), [b] "r" (vb) \
656 : "memory", "cc" \
657 )
658 /* Multiply va by vb and store double size result in: vo | vh | vl */
659 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
660 __asm__ __volatile__ ( \
661 "mul x8, %[a], %[b] \n\t" \
662 "umulh %[h], %[a], %[b] \n\t" \
663 "mov %[l], x8 \n\t" \
664 "mov %[o], xzr \n\t" \
665 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
666 : [a] "r" (va), [b] "r" (vb) \
667 : "x8" \
668 )
669 /* Multiply va by vb and add double size result into: vo | vh | vl */
670 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
671 __asm__ __volatile__ ( \
672 "mul x8, %[a], %[b] \n\t" \
673 "umulh x9, %[a], %[b] \n\t" \
674 "adds %[l], %[l], x8 \n\t" \
675 "adcs %[h], %[h], x9 \n\t" \
676 "adc %[o], %[o], xzr \n\t" \
677 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
678 : [a] "r" (va), [b] "r" (vb) \
679 : "x8", "x9", "cc" \
680 )
681 /* Multiply va by vb and add double size result into: vh | vl */
682 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
683 __asm__ __volatile__ ( \
684 "mul x8, %[a], %[b] \n\t" \
685 "umulh x9, %[a], %[b] \n\t" \
686 "adds %[l], %[l], x8 \n\t" \
687 "adc %[h], %[h], x9 \n\t" \
688 : [l] "+r" (vl), [h] "+r" (vh) \
689 : [a] "r" (va), [b] "r" (vb) \
690 : "x8", "x9", "cc" \
691 )
692 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
693 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
694 __asm__ __volatile__ ( \
695 "mul x8, %[a], %[b] \n\t" \
696 "umulh x9, %[a], %[b] \n\t" \
697 "adds %[l], %[l], x8 \n\t" \
698 "adcs %[h], %[h], x9 \n\t" \
699 "adc %[o], %[o], xzr \n\t" \
700 "adds %[l], %[l], x8 \n\t" \
701 "adcs %[h], %[h], x9 \n\t" \
702 "adc %[o], %[o], xzr \n\t" \
703 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
704 : [a] "r" (va), [b] "r" (vb) \
705 : "x8", "x9", "cc" \
706 )
707 /* Multiply va by vb and add double size result twice into: vo | vh | vl
708 * Assumes first add will not overflow vh | vl
709 */
710 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
711 __asm__ __volatile__ ( \
712 "mul x8, %[a], %[b] \n\t" \
713 "umulh x9, %[a], %[b] \n\t" \
714 "adds %[l], %[l], x8 \n\t" \
715 "adc %[h], %[h], x9 \n\t" \
716 "adds %[l], %[l], x8 \n\t" \
717 "adcs %[h], %[h], x9 \n\t" \
718 "adc %[o], %[o], xzr \n\t" \
719 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
720 : [a] "r" (va), [b] "r" (vb) \
721 : "x8", "x9", "cc" \
722 )
723 /* Square va and store double size result in: vh | vl */
724 #define SP_ASM_SQR(vl, vh, va) \
725 __asm__ __volatile__ ( \
726 "mul %[l], %[a], %[a] \n\t" \
727 "umulh %[h], %[a], %[a] \n\t" \
728 : [h] "+r" (vh), [l] "+r" (vl) \
729 : [a] "r" (va) \
730 : "memory" \
731 )
732 /* Square va and add double size result into: vo | vh | vl */
733 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
734 __asm__ __volatile__ ( \
735 "mul x8, %[a], %[a] \n\t" \
736 "umulh x9, %[a], %[a] \n\t" \
737 "adds %[l], %[l], x8 \n\t" \
738 "adcs %[h], %[h], x9 \n\t" \
739 "adc %[o], %[o], xzr \n\t" \
740 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
741 : [a] "r" (va) \
742 : "x8", "x9", "cc" \
743 )
744 /* Square va and add double size result into: vh | vl */
745 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
746 __asm__ __volatile__ ( \
747 "mul x8, %[a], %[a] \n\t" \
748 "umulh x9, %[a], %[a] \n\t" \
749 "adds %[l], %[l], x8 \n\t" \
750 "adc %[h], %[h], x9 \n\t" \
751 : [l] "+r" (vl), [h] "+r" (vh) \
752 : [a] "r" (va) \
753 : "x8", "x9", "cc" \
754 )
755 /* Add va into: vh | vl */
756 #define SP_ASM_ADDC(vl, vh, va) \
757 __asm__ __volatile__ ( \
758 "adds %[l], %[l], %[a] \n\t" \
759 "adc %[h], %[h], xzr \n\t" \
760 : [l] "+r" (vl), [h] "+r" (vh) \
761 : [a] "r" (va) \
762 : "cc" \
763 )
764 /* Sub va from: vh | vl */
765 #define SP_ASM_SUBC(vl, vh, va) \
766 __asm__ __volatile__ ( \
767 "subs %[l], %[l], %[a] \n\t" \
768 "sbc %[h], %[h], xzr \n\t" \
769 : [l] "+r" (vl), [h] "+r" (vh) \
770 : [a] "r" (va) \
771 : "cc" \
772 )
773 /* Add two times vc | vb | va into vo | vh | vl */
774 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
775 __asm__ __volatile__ ( \
776 "adds %[l], %[l], %[a] \n\t" \
777 "adcs %[h], %[h], %[b] \n\t" \
778 "adc %[o], %[o], %[c] \n\t" \
779 "adds %[l], %[l], %[a] \n\t" \
780 "adcs %[h], %[h], %[b] \n\t" \
781 "adc %[o], %[o], %[c] \n\t" \
782 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
783 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
784 : "cc" \
785 )
786
787 #define SP_INT_ASM_AVAILABLE
788
789 #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
790
791 #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
792 SP_WORD_SIZE == 32
793 /*
794 * CPU: ARM32 or Cortex-M4 and similar
795 */
796
797 /* Multiply va by vb and store double size result in: vh | vl */
798 #define SP_ASM_MUL(vl, vh, va, vb) \
799 __asm__ __volatile__ ( \
800 "umull %[l], %[h], %[a], %[b] \n\t" \
801 : [h] "+r" (vh), [l] "+r" (vl) \
802 : [a] "r" (va), [b] "r" (vb) \
803 : "memory" \
804 )
805 /* Multiply va by vb and store double size result in: vo | vh | vl */
806 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
807 __asm__ __volatile__ ( \
808 "umull %[l], %[h], %[a], %[b] \n\t" \
809 "mov %[o], #0 \n\t" \
810 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
811 : [a] "r" (va), [b] "r" (vb) \
812 : \
813 )
814 /* Multiply va by vb and add double size result into: vo | vh | vl */
815 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
816 __asm__ __volatile__ ( \
817 "umull r8, r9, %[a], %[b] \n\t" \
818 "adds %[l], %[l], r8 \n\t" \
819 "adcs %[h], %[h], r9 \n\t" \
820 "adc %[o], %[o], #0 \n\t" \
821 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
822 : [a] "r" (va), [b] "r" (vb) \
823 : "r8", "r9", "cc" \
824 )
825 /* Multiply va by vb and add double size result into: vh | vl */
826 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
827 __asm__ __volatile__ ( \
828 "umlal %[l], %[h], %[a], %[b] \n\t" \
829 : [l] "+r" (vl), [h] "+r" (vh) \
830 : [a] "r" (va), [b] "r" (vb) \
831 : \
832 )
833 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
834 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
835 __asm__ __volatile__ ( \
836 "umull r8, r9, %[a], %[b] \n\t" \
837 "adds %[l], %[l], r8 \n\t" \
838 "adcs %[h], %[h], r9 \n\t" \
839 "adc %[o], %[o], #0 \n\t" \
840 "adds %[l], %[l], r8 \n\t" \
841 "adcs %[h], %[h], r9 \n\t" \
842 "adc %[o], %[o], #0 \n\t" \
843 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
844 : [a] "r" (va), [b] "r" (vb) \
845 : "r8", "r9", "cc" \
846 )
847 /* Multiply va by vb and add double size result twice into: vo | vh | vl
848 * Assumes first add will not overflow vh | vl
849 */
850 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
851 __asm__ __volatile__ ( \
852 "umull r8, r9, %[a], %[b] \n\t" \
853 "adds %[l], %[l], r8 \n\t" \
854 "adc %[h], %[h], r9 \n\t" \
855 "adds %[l], %[l], r8 \n\t" \
856 "adcs %[h], %[h], r9 \n\t" \
857 "adc %[o], %[o], #0 \n\t" \
858 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
859 : [a] "r" (va), [b] "r" (vb) \
860 : "r8", "r9", "cc" \
861 )
862 /* Square va and store double size result in: vh | vl */
863 #define SP_ASM_SQR(vl, vh, va) \
864 __asm__ __volatile__ ( \
865 "umull %[l], %[h], %[a], %[a] \n\t" \
866 : [h] "+r" (vh), [l] "+r" (vl) \
867 : [a] "r" (va) \
868 : "memory" \
869 )
870 /* Square va and add double size result into: vo | vh | vl */
871 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
872 __asm__ __volatile__ ( \
873 "umull r8, r9, %[a], %[a] \n\t" \
874 "adds %[l], %[l], r8 \n\t" \
875 "adcs %[h], %[h], r9 \n\t" \
876 "adc %[o], %[o], #0 \n\t" \
877 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
878 : [a] "r" (va) \
879 : "r8", "r9", "cc" \
880 )
881 /* Square va and add double size result into: vh | vl */
882 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
883 __asm__ __volatile__ ( \
884 "umlal %[l], %[h], %[a], %[a] \n\t" \
885 : [l] "+r" (vl), [h] "+r" (vh) \
886 : [a] "r" (va) \
887 : "cc" \
888 )
889 /* Add va into: vh | vl */
890 #define SP_ASM_ADDC(vl, vh, va) \
891 __asm__ __volatile__ ( \
892 "adds %[l], %[l], %[a] \n\t" \
893 "adc %[h], %[h], #0 \n\t" \
894 : [l] "+r" (vl), [h] "+r" (vh) \
895 : [a] "r" (va) \
896 : "cc" \
897 )
898 /* Sub va from: vh | vl */
899 #define SP_ASM_SUBC(vl, vh, va) \
900 __asm__ __volatile__ ( \
901 "subs %[l], %[l], %[a] \n\t" \
902 "sbc %[h], %[h], #0 \n\t" \
903 : [l] "+r" (vl), [h] "+r" (vh) \
904 : [a] "r" (va) \
905 : "cc" \
906 )
907 /* Add two times vc | vb | va into vo | vh | vl */
908 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
909 __asm__ __volatile__ ( \
910 "adds %[l], %[l], %[a] \n\t" \
911 "adcs %[h], %[h], %[b] \n\t" \
912 "adc %[o], %[o], %[c] \n\t" \
913 "adds %[l], %[l], %[a] \n\t" \
914 "adcs %[h], %[h], %[b] \n\t" \
915 "adc %[o], %[o], %[c] \n\t" \
916 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
917 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
918 : "cc" \
919 )
920
921 #define SP_INT_ASM_AVAILABLE
922
923 #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
924
925 #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
926 /*
927 * CPU: PPC64
928 */
929
930 /* Multiply va by vb and store double size result in: vh | vl */
931 #define SP_ASM_MUL(vl, vh, va, vb) \
932 __asm__ __volatile__ ( \
933 "mulld %[l], %[a], %[b] \n\t" \
934 "mulhdu %[h], %[a], %[b] \n\t" \
935 : [h] "+r" (vh), [l] "+r" (vl) \
936 : [a] "r" (va), [b] "r" (vb) \
937 : "memory" \
938 )
939 /* Multiply va by vb and store double size result in: vo | vh | vl */
940 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
941 __asm__ __volatile__ ( \
942 "mulhdu %[h], %[a], %[b] \n\t" \
943 "mulld %[l], %[a], %[b] \n\t" \
944 "li %[o], 0 \n\t" \
945 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
946 : [a] "r" (va), [b] "r" (vb) \
947 : \
948 )
949 /* Multiply va by vb and add double size result into: vo | vh | vl */
950 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
951 __asm__ __volatile__ ( \
952 "mulld 16, %[a], %[b] \n\t" \
953 "mulhdu 17, %[a], %[b] \n\t" \
954 "addc %[l], %[l], 16 \n\t" \
955 "adde %[h], %[h], 17 \n\t" \
956 "addze %[o], %[o] \n\t" \
957 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
958 : [a] "r" (va), [b] "r" (vb) \
959 : "16", "17", "cc" \
960 )
961 /* Multiply va by vb and add double size result into: vh | vl */
962 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
963 __asm__ __volatile__ ( \
964 "mulld 16, %[a], %[b] \n\t" \
965 "mulhdu 17, %[a], %[b] \n\t" \
966 "addc %[l], %[l], 16 \n\t" \
967 "adde %[h], %[h], 17 \n\t" \
968 : [l] "+r" (vl), [h] "+r" (vh) \
969 : [a] "r" (va), [b] "r" (vb) \
970 : "16", "17", "cc" \
971 )
972 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
973 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
974 __asm__ __volatile__ ( \
975 "mulld 16, %[a], %[b] \n\t" \
976 "mulhdu 17, %[a], %[b] \n\t" \
977 "addc %[l], %[l], 16 \n\t" \
978 "adde %[h], %[h], 17 \n\t" \
979 "addze %[o], %[o] \n\t" \
980 "addc %[l], %[l], 16 \n\t" \
981 "adde %[h], %[h], 17 \n\t" \
982 "addze %[o], %[o] \n\t" \
983 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
984 : [a] "r" (va), [b] "r" (vb) \
985 : "16", "17", "cc" \
986 )
987 /* Multiply va by vb and add double size result twice into: vo | vh | vl
988 * Assumes first add will not overflow vh | vl
989 */
990 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
991 __asm__ __volatile__ ( \
992 "mulld 16, %[a], %[b] \n\t" \
993 "mulhdu 17, %[a], %[b] \n\t" \
994 "addc %[l], %[l], 16 \n\t" \
995 "adde %[h], %[h], 17 \n\t" \
996 "addc %[l], %[l], 16 \n\t" \
997 "adde %[h], %[h], 17 \n\t" \
998 "addze %[o], %[o] \n\t" \
999 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1000 : [a] "r" (va), [b] "r" (vb) \
1001 : "16", "17", "cc" \
1002 )
1003 /* Square va and store double size result in: vh | vl */
1004 #define SP_ASM_SQR(vl, vh, va) \
1005 __asm__ __volatile__ ( \
1006 "mulld %[l], %[a], %[a] \n\t" \
1007 "mulhdu %[h], %[a], %[a] \n\t" \
1008 : [h] "+r" (vh), [l] "+r" (vl) \
1009 : [a] "r" (va) \
1010 : "memory" \
1011 )
1012 /* Square va and add double size result into: vo | vh | vl */
1013 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1014 __asm__ __volatile__ ( \
1015 "mulld 16, %[a], %[a] \n\t" \
1016 "mulhdu 17, %[a], %[a] \n\t" \
1017 "addc %[l], %[l], 16 \n\t" \
1018 "adde %[h], %[h], 17 \n\t" \
1019 "addze %[o], %[o] \n\t" \
1020 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1021 : [a] "r" (va) \
1022 : "16", "17", "cc" \
1023 )
1024 /* Square va and add double size result into: vh | vl */
1025 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1026 __asm__ __volatile__ ( \
1027 "mulld 16, %[a], %[a] \n\t" \
1028 "mulhdu 17, %[a], %[a] \n\t" \
1029 "addc %[l], %[l], 16 \n\t" \
1030 "adde %[h], %[h], 17 \n\t" \
1031 : [l] "+r" (vl), [h] "+r" (vh) \
1032 : [a] "r" (va) \
1033 : "16", "17", "cc" \
1034 )
1035 /* Add va into: vh | vl */
1036 #define SP_ASM_ADDC(vl, vh, va) \
1037 __asm__ __volatile__ ( \
1038 "addc %[l], %[l], %[a] \n\t" \
1039 "addze %[h], %[h] \n\t" \
1040 : [l] "+r" (vl), [h] "+r" (vh) \
1041 : [a] "r" (va) \
1042 : "cc" \
1043 )
1044 /* Sub va from: vh | vl */
1045 #define SP_ASM_SUBC(vl, vh, va) \
1046 __asm__ __volatile__ ( \
1047 "subfc %[l], %[a], %[l] \n\t" \
1048 "li 16, 0 \n\t" \
1049 "subfe %[h], 16, %[h] \n\t" \
1050 : [l] "+r" (vl), [h] "+r" (vh) \
1051 : [a] "r" (va) \
1052 : "16", "cc" \
1053 )
1054 /* Add two times vc | vb | va into vo | vh | vl */
1055 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
1056 __asm__ __volatile__ ( \
1057 "addc %[l], %[l], %[a] \n\t" \
1058 "adde %[h], %[h], %[b] \n\t" \
1059 "adde %[o], %[o], %[c] \n\t" \
1060 "addc %[l], %[l], %[a] \n\t" \
1061 "adde %[h], %[h], %[b] \n\t" \
1062 "adde %[o], %[o], %[c] \n\t" \
1063 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1064 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
1065 : "cc" \
1066 )
1067
1068 #define SP_INT_ASM_AVAILABLE
1069
1070 #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
1071
1072 #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
1073 /*
1074 * CPU: PPC 32-bit
1075 */
1076
1077 /* Multiply va by vb and store double size result in: vh | vl */
1078 #define SP_ASM_MUL(vl, vh, va, vb) \
1079 __asm__ __volatile__ ( \
1080 "mullw %[l], %[a], %[b] \n\t" \
1081 "mulhwu %[h], %[a], %[b] \n\t" \
1082 : [h] "+r" (vh), [l] "+r" (vl) \
1083 : [a] "r" (va), [b] "r" (vb) \
1084 : "memory" \
1085 )
1086 /* Multiply va by vb and store double size result in: vo | vh | vl */
1087 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
1088 __asm__ __volatile__ ( \
1089 "mulhwu %[h], %[a], %[b] \n\t" \
1090 "mullw %[l], %[a], %[b] \n\t" \
1091 "li %[o], 0 \n\t" \
1092 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
1093 : [a] "r" (va), [b] "r" (vb) \
1094 : \
1095 )
1096 /* Multiply va by vb and add double size result into: vo | vh | vl */
1097 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1098 __asm__ __volatile__ ( \
1099 "mullw 16, %[a], %[b] \n\t" \
1100 "mulhwu 17, %[a], %[b] \n\t" \
1101 "addc %[l], %[l], 16 \n\t" \
1102 "adde %[h], %[h], 17 \n\t" \
1103 "addze %[o], %[o] \n\t" \
1104 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1105 : [a] "r" (va), [b] "r" (vb) \
1106 : "16", "17", "cc" \
1107 )
1108 /* Multiply va by vb and add double size result into: vh | vl */
1109 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
1110 __asm__ __volatile__ ( \
1111 "mullw 16, %[a], %[b] \n\t" \
1112 "mulhwu 17, %[a], %[b] \n\t" \
1113 "addc %[l], %[l], 16 \n\t" \
1114 "adde %[h], %[h], 17 \n\t" \
1115 : [l] "+r" (vl), [h] "+r" (vh) \
1116 : [a] "r" (va), [b] "r" (vb) \
1117 : "16", "17", "cc" \
1118 )
1119 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
1120 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1121 __asm__ __volatile__ ( \
1122 "mullw 16, %[a], %[b] \n\t" \
1123 "mulhwu 17, %[a], %[b] \n\t" \
1124 "addc %[l], %[l], 16 \n\t" \
1125 "adde %[h], %[h], 17 \n\t" \
1126 "addze %[o], %[o] \n\t" \
1127 "addc %[l], %[l], 16 \n\t" \
1128 "adde %[h], %[h], 17 \n\t" \
1129 "addze %[o], %[o] \n\t" \
1130 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1131 : [a] "r" (va), [b] "r" (vb) \
1132 : "16", "17", "cc" \
1133 )
1134 /* Multiply va by vb and add double size result twice into: vo | vh | vl
1135 * Assumes first add will not overflow vh | vl
1136 */
1137 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1138 __asm__ __volatile__ ( \
1139 "mullw 16, %[a], %[b] \n\t" \
1140 "mulhwu 17, %[a], %[b] \n\t" \
1141 "addc %[l], %[l], 16 \n\t" \
1142 "adde %[h], %[h], 17 \n\t" \
1143 "addc %[l], %[l], 16 \n\t" \
1144 "adde %[h], %[h], 17 \n\t" \
1145 "addze %[o], %[o] \n\t" \
1146 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1147 : [a] "r" (va), [b] "r" (vb) \
1148 : "16", "17", "cc" \
1149 )
1150 /* Square va and store double size result in: vh | vl */
1151 #define SP_ASM_SQR(vl, vh, va) \
1152 __asm__ __volatile__ ( \
1153 "mullw %[l], %[a], %[a] \n\t" \
1154 "mulhwu %[h], %[a], %[a] \n\t" \
1155 : [h] "+r" (vh), [l] "+r" (vl) \
1156 : [a] "r" (va) \
1157 : "memory" \
1158 )
1159 /* Square va and add double size result into: vo | vh | vl */
1160 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1161 __asm__ __volatile__ ( \
1162 "mullw 16, %[a], %[a] \n\t" \
1163 "mulhwu 17, %[a], %[a] \n\t" \
1164 "addc %[l], %[l], 16 \n\t" \
1165 "adde %[h], %[h], 17 \n\t" \
1166 "addze %[o], %[o] \n\t" \
1167 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1168 : [a] "r" (va) \
1169 : "16", "17", "cc" \
1170 )
1171 /* Square va and add double size result into: vh | vl */
1172 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1173 __asm__ __volatile__ ( \
1174 "mullw 16, %[a], %[a] \n\t" \
1175 "mulhwu 17, %[a], %[a] \n\t" \
1176 "addc %[l], %[l], 16 \n\t" \
1177 "adde %[h], %[h], 17 \n\t" \
1178 : [l] "+r" (vl), [h] "+r" (vh) \
1179 : [a] "r" (va) \
1180 : "16", "17", "cc" \
1181 )
1182 /* Add va into: vh | vl */
1183 #define SP_ASM_ADDC(vl, vh, va) \
1184 __asm__ __volatile__ ( \
1185 "addc %[l], %[l], %[a] \n\t" \
1186 "addze %[h], %[h] \n\t" \
1187 : [l] "+r" (vl), [h] "+r" (vh) \
1188 : [a] "r" (va) \
1189 : "cc" \
1190 )
1191 /* Sub va from: vh | vl */
1192 #define SP_ASM_SUBC(vl, vh, va) \
1193 __asm__ __volatile__ ( \
1194 "subfc %[l], %[a], %[l] \n\t" \
1195 "li 16, 0 \n\t" \
1196 "subfe %[h], 16, %[h] \n\t" \
1197 : [l] "+r" (vl), [h] "+r" (vh) \
1198 : [a] "r" (va) \
1199 : "16", "cc" \
1200 )
1201 /* Add two times vc | vb | va into vo | vh | vl */
1202 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
1203 __asm__ __volatile__ ( \
1204 "addc %[l], %[l], %[a] \n\t" \
1205 "adde %[h], %[h], %[b] \n\t" \
1206 "adde %[o], %[o], %[c] \n\t" \
1207 "addc %[l], %[l], %[a] \n\t" \
1208 "adde %[h], %[h], %[b] \n\t" \
1209 "adde %[o], %[o], %[c] \n\t" \
1210 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1211 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
1212 : "cc" \
1213 )
1214
1215 #define SP_INT_ASM_AVAILABLE
1216
1217 #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
1218
1219 #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
1220 /*
1221 * CPU: MIPS 64-bit
1222 */
1223
1224 /* Multiply va by vb and store double size result in: vh | vl */
1225 #define SP_ASM_MUL(vl, vh, va, vb) \
1226 __asm__ __volatile__ ( \
1227 "dmultu %[a], %[b] \n\t" \
1228 "mflo %[l] \n\t" \
1229 "mfhi %[h] \n\t" \
1230 : [h] "+r" (vh), [l] "+r" (vl) \
1231 : [a] "r" (va), [b] "r" (vb) \
1232 : "memory", "$lo", "$hi" \
1233 )
1234 /* Multiply va by vb and store double size result in: vo | vh | vl */
1235 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
1236 __asm__ __volatile__ ( \
1237 "dmultu %[a], %[b] \n\t" \
1238 "mflo %[l] \n\t" \
1239 "mfhi %[h] \n\t" \
1240 "move %[o], $0 \n\t" \
1241 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
1242 : [a] "r" (va), [b] "r" (vb) \
1243 : "$lo", "$hi" \
1244 )
1245 /* Multiply va by vb and add double size result into: vo | vh | vl */
1246 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1247 __asm__ __volatile__ ( \
1248 "dmultu %[a], %[b] \n\t" \
1249 "mflo $10 \n\t" \
1250 "mfhi $11 \n\t" \
1251 "daddu %[l], %[l], $10 \n\t" \
1252 "sltu $12, %[l], $10 \n\t" \
1253 "daddu %[h], %[h], $12 \n\t" \
1254 "sltu $12, %[h], $12 \n\t" \
1255 "daddu %[o], %[o], $12 \n\t" \
1256 "daddu %[h], %[h], $11 \n\t" \
1257 "sltu $12, %[h], $11 \n\t" \
1258 "daddu %[o], %[o], $12 \n\t" \
1259 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1260 : [a] "r" (va), [b] "r" (vb) \
1261 : "$10", "$11", "$12", "$lo", "$hi" \
1262 )
1263 /* Multiply va by vb and add double size result into: vh | vl */
1264 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
1265 __asm__ __volatile__ ( \
1266 "dmultu %[a], %[b] \n\t" \
1267 "mflo $10 \n\t" \
1268 "mfhi $11 \n\t" \
1269 "daddu %[l], %[l], $10 \n\t" \
1270 "sltu $12, %[l], $10 \n\t" \
1271 "daddu %[h], %[h], $11 \n\t" \
1272 "daddu %[h], %[h], $12 \n\t" \
1273 : [l] "+r" (vl), [h] "+r" (vh) \
1274 : [a] "r" (va), [b] "r" (vb) \
1275 : "$10", "$11", "$12", "$lo", "$hi" \
1276 )
1277 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
1278 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1279 __asm__ __volatile__ ( \
1280 "dmultu %[a], %[b] \n\t" \
1281 "mflo $10 \n\t" \
1282 "mfhi $11 \n\t" \
1283 "daddu %[l], %[l], $10 \n\t" \
1284 "sltu $12, %[l], $10 \n\t" \
1285 "daddu %[h], %[h], $12 \n\t" \
1286 "sltu $12, %[h], $12 \n\t" \
1287 "daddu %[o], %[o], $12 \n\t" \
1288 "daddu %[h], %[h], $11 \n\t" \
1289 "sltu $12, %[h], $11 \n\t" \
1290 "daddu %[o], %[o], $12 \n\t" \
1291 "daddu %[l], %[l], $10 \n\t" \
1292 "sltu $12, %[l], $10 \n\t" \
1293 "daddu %[h], %[h], $12 \n\t" \
1294 "sltu $12, %[h], $12 \n\t" \
1295 "daddu %[o], %[o], $12 \n\t" \
1296 "daddu %[h], %[h], $11 \n\t" \
1297 "sltu $12, %[h], $11 \n\t" \
1298 "daddu %[o], %[o], $12 \n\t" \
1299 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1300 : [a] "r" (va), [b] "r" (vb) \
1301 : "$10", "$11", "$12", "$lo", "$hi" \
1302 )
1303 /* Multiply va by vb and add double size result twice into: vo | vh | vl
1304 * Assumes first add will not overflow vh | vl
1305 */
1306 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1307 __asm__ __volatile__ ( \
1308 "dmultu %[a], %[b] \n\t" \
1309 "mflo $10 \n\t" \
1310 "mfhi $11 \n\t" \
1311 "daddu %[l], %[l], $10 \n\t" \
1312 "sltu $12, %[l], $10 \n\t" \
1313 "daddu %[h], %[h], $11 \n\t" \
1314 "daddu %[h], %[h], $12 \n\t" \
1315 "daddu %[l], %[l], $10 \n\t" \
1316 "sltu $12, %[l], $10 \n\t" \
1317 "daddu %[h], %[h], $12 \n\t" \
1318 "sltu $12, %[h], $12 \n\t" \
1319 "daddu %[o], %[o], $12 \n\t" \
1320 "daddu %[h], %[h], $11 \n\t" \
1321 "sltu $12, %[h], $11 \n\t" \
1322 "daddu %[o], %[o], $12 \n\t" \
1323 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1324 : [a] "r" (va), [b] "r" (vb) \
1325 : "$10", "$11", "$12", "$lo", "$hi" \
1326 )
1327 /* Square va and store double size result in: vh | vl */
1328 #define SP_ASM_SQR(vl, vh, va) \
1329 __asm__ __volatile__ ( \
1330 "dmultu %[a], %[a] \n\t" \
1331 "mflo %[l] \n\t" \
1332 "mfhi %[h] \n\t" \
1333 : [h] "+r" (vh), [l] "+r" (vl) \
1334 : [a] "r" (va) \
1335 : "memory", "$lo", "$hi" \
1336 )
1337 /* Square va and add double size result into: vo | vh | vl */
1338 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1339 __asm__ __volatile__ ( \
1340 "dmultu %[a], %[a] \n\t" \
1341 "mflo $10 \n\t" \
1342 "mfhi $11 \n\t" \
1343 "daddu %[l], %[l], $10 \n\t" \
1344 "sltu $12, %[l], $10 \n\t" \
1345 "daddu %[h], %[h], $12 \n\t" \
1346 "sltu $12, %[h], $12 \n\t" \
1347 "daddu %[o], %[o], $12 \n\t" \
1348 "daddu %[h], %[h], $11 \n\t" \
1349 "sltu $12, %[h], $11 \n\t" \
1350 "daddu %[o], %[o], $12 \n\t" \
1351 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1352 : [a] "r" (va) \
1353 : "$10", "$11", "$12", "$lo", "$hi" \
1354 )
1355 /* Square va and add double size result into: vh | vl */
1356 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1357 __asm__ __volatile__ ( \
1358 "dmultu %[a], %[a] \n\t" \
1359 "mflo $10 \n\t" \
1360 "mfhi $11 \n\t" \
1361 "daddu %[l], %[l], $10 \n\t" \
1362 "sltu $12, %[l], $10 \n\t" \
1363 "daddu %[h], %[h], $11 \n\t" \
1364 "daddu %[h], %[h], $12 \n\t" \
1365 : [l] "+r" (vl), [h] "+r" (vh) \
1366 : [a] "r" (va) \
1367 : "$10", "$11", "$12", "$lo", "$hi" \
1368 )
1369 /* Add va into: vh | vl */
1370 #define SP_ASM_ADDC(vl, vh, va) \
1371 __asm__ __volatile__ ( \
1372 "daddu %[l], %[l], %[a] \n\t" \
1373 "sltu $12, %[l], %[a] \n\t" \
1374 "daddu %[h], %[h], $12 \n\t" \
1375 : [l] "+r" (vl), [h] "+r" (vh) \
1376 : [a] "r" (va) \
1377 : "$12" \
1378 )
1379 /* Sub va from: vh | vl */
1380 #define SP_ASM_SUBC(vl, vh, va) \
1381 __asm__ __volatile__ ( \
1382 "move $12, %[l] \n\t" \
1383 "dsubu %[l], $12, %[a] \n\t" \
1384 "sltu $12, $12, %[l] \n\t" \
1385 "dsubu %[h], %[h], $12 \n\t" \
1386 : [l] "+r" (vl), [h] "+r" (vh) \
1387 : [a] "r" (va) \
1388 : "$12" \
1389 )
1390 /* Add two times vc | vb | va into vo | vh | vl */
1391 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
1392 __asm__ __volatile__ ( \
1393 "daddu %[l], %[l], %[a] \n\t" \
1394 "sltu $12, %[l], %[a] \n\t" \
1395 "daddu %[h], %[h], $12 \n\t" \
1396 "sltu $12, %[h], $12 \n\t" \
1397 "daddu %[o], %[o], $12 \n\t" \
1398 "daddu %[h], %[h], %[b] \n\t" \
1399 "sltu $12, %[h], %[b] \n\t" \
1400 "daddu %[o], %[o], %[c] \n\t" \
1401 "daddu %[o], %[o], $12 \n\t" \
1402 "daddu %[l], %[l], %[a] \n\t" \
1403 "sltu $12, %[l], %[a] \n\t" \
1404 "daddu %[h], %[h], $12 \n\t" \
1405 "sltu $12, %[h], $12 \n\t" \
1406 "daddu %[o], %[o], $12 \n\t" \
1407 "daddu %[h], %[h], %[b] \n\t" \
1408 "sltu $12, %[h], %[b] \n\t" \
1409 "daddu %[o], %[o], %[c] \n\t" \
1410 "daddu %[o], %[o], $12 \n\t" \
1411 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1412 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
1413 : "$12" \
1414 )
1415
1416 #define SP_INT_ASM_AVAILABLE
1417
1418 #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
1419
1420 #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
1421 /*
1422 * CPU: MIPS 32-bit
1423 */
1424
1425 /* Multiply va by vb and store double size result in: vh | vl */
1426 #define SP_ASM_MUL(vl, vh, va, vb) \
1427 __asm__ __volatile__ ( \
1428 "multu %[a], %[b] \n\t" \
1429 "mflo %[l] \n\t" \
1430 "mfhi %[h] \n\t" \
1431 : [h] "+r" (vh), [l] "+r" (vl) \
1432 : [a] "r" (va), [b] "r" (vb) \
1433 : "memory", "%lo", "%hi" \
1434 )
1435 /* Multiply va by vb and store double size result in: vo | vh | vl */
1436 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
1437 __asm__ __volatile__ ( \
1438 "multu %[a], %[b] \n\t" \
1439 "mflo %[l] \n\t" \
1440 "mfhi %[h] \n\t" \
1441 "move %[o], $0 \n\t" \
1442 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
1443 : [a] "r" (va), [b] "r" (vb) \
1444 : "%lo", "%hi" \
1445 )
1446 /* Multiply va by vb and add double size result into: vo | vh | vl */
1447 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1448 __asm__ __volatile__ ( \
1449 "multu %[a], %[b] \n\t" \
1450 "mflo $10 \n\t" \
1451 "mfhi $11 \n\t" \
1452 "addu %[l], %[l], $10 \n\t" \
1453 "sltu $12, %[l], $10 \n\t" \
1454 "addu %[h], %[h], $12 \n\t" \
1455 "sltu $12, %[h], $12 \n\t" \
1456 "addu %[o], %[o], $12 \n\t" \
1457 "addu %[h], %[h], $11 \n\t" \
1458 "sltu $12, %[h], $11 \n\t" \
1459 "addu %[o], %[o], $12 \n\t" \
1460 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1461 : [a] "r" (va), [b] "r" (vb) \
1462 : "$10", "$11", "$12", "%lo", "%hi" \
1463 )
1464 /* Multiply va by vb and add double size result into: vh | vl */
1465 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
1466 __asm__ __volatile__ ( \
1467 "multu %[a], %[b] \n\t" \
1468 "mflo $10 \n\t" \
1469 "mfhi $11 \n\t" \
1470 "addu %[l], %[l], $10 \n\t" \
1471 "sltu $12, %[l], $10 \n\t" \
1472 "addu %[h], %[h], $11 \n\t" \
1473 "addu %[h], %[h], $12 \n\t" \
1474 : [l] "+r" (vl), [h] "+r" (vh) \
1475 : [a] "r" (va), [b] "r" (vb) \
1476 : "$10", "$11", "$12", "%lo", "%hi" \
1477 )
1478 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
1479 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1480 __asm__ __volatile__ ( \
1481 "multu %[a], %[b] \n\t" \
1482 "mflo $10 \n\t" \
1483 "mfhi $11 \n\t" \
1484 "addu %[l], %[l], $10 \n\t" \
1485 "sltu $12, %[l], $10 \n\t" \
1486 "addu %[h], %[h], $12 \n\t" \
1487 "sltu $12, %[h], $12 \n\t" \
1488 "addu %[o], %[o], $12 \n\t" \
1489 "addu %[h], %[h], $11 \n\t" \
1490 "sltu $12, %[h], $11 \n\t" \
1491 "addu %[o], %[o], $12 \n\t" \
1492 "addu %[l], %[l], $10 \n\t" \
1493 "sltu $12, %[l], $10 \n\t" \
1494 "addu %[h], %[h], $12 \n\t" \
1495 "sltu $12, %[h], $12 \n\t" \
1496 "addu %[o], %[o], $12 \n\t" \
1497 "addu %[h], %[h], $11 \n\t" \
1498 "sltu $12, %[h], $11 \n\t" \
1499 "addu %[o], %[o], $12 \n\t" \
1500 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1501 : [a] "r" (va), [b] "r" (vb) \
1502 : "$10", "$11", "$12", "%lo", "%hi" \
1503 )
1504 /* Multiply va by vb and add double size result twice into: vo | vh | vl
1505 * Assumes first add will not overflow vh | vl
1506 */
1507 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1508 __asm__ __volatile__ ( \
1509 "multu %[a], %[b] \n\t" \
1510 "mflo $10 \n\t" \
1511 "mfhi $11 \n\t" \
1512 "addu %[l], %[l], $10 \n\t" \
1513 "sltu $12, %[l], $10 \n\t" \
1514 "addu %[h], %[h], $11 \n\t" \
1515 "addu %[h], %[h], $12 \n\t" \
1516 "addu %[l], %[l], $10 \n\t" \
1517 "sltu $12, %[l], $10 \n\t" \
1518 "addu %[h], %[h], $12 \n\t" \
1519 "sltu $12, %[h], $12 \n\t" \
1520 "addu %[o], %[o], $12 \n\t" \
1521 "addu %[h], %[h], $11 \n\t" \
1522 "sltu $12, %[h], $11 \n\t" \
1523 "addu %[o], %[o], $12 \n\t" \
1524 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1525 : [a] "r" (va), [b] "r" (vb) \
1526 : "$10", "$11", "$12", "%lo", "%hi" \
1527 )
1528 /* Square va and store double size result in: vh | vl */
1529 #define SP_ASM_SQR(vl, vh, va) \
1530 __asm__ __volatile__ ( \
1531 "multu %[a], %[a] \n\t" \
1532 "mflo %[l] \n\t" \
1533 "mfhi %[h] \n\t" \
1534 : [h] "+r" (vh), [l] "+r" (vl) \
1535 : [a] "r" (va) \
1536 : "memory", "%lo", "%hi" \
1537 )
1538 /* Square va and add double size result into: vo | vh | vl */
1539 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1540 __asm__ __volatile__ ( \
1541 "multu %[a], %[a] \n\t" \
1542 "mflo $10 \n\t" \
1543 "mfhi $11 \n\t" \
1544 "addu %[l], %[l], $10 \n\t" \
1545 "sltu $12, %[l], $10 \n\t" \
1546 "addu %[h], %[h], $12 \n\t" \
1547 "sltu $12, %[h], $12 \n\t" \
1548 "addu %[o], %[o], $12 \n\t" \
1549 "addu %[h], %[h], $11 \n\t" \
1550 "sltu $12, %[h], $11 \n\t" \
1551 "addu %[o], %[o], $12 \n\t" \
1552 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1553 : [a] "r" (va) \
1554 : "$10", "$11", "$12", "%lo", "%hi" \
1555 )
1556 /* Square va and add double size result into: vh | vl */
1557 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1558 __asm__ __volatile__ ( \
1559 "multu %[a], %[a] \n\t" \
1560 "mflo $10 \n\t" \
1561 "mfhi $11 \n\t" \
1562 "addu %[l], %[l], $10 \n\t" \
1563 "sltu $12, %[l], $10 \n\t" \
1564 "addu %[h], %[h], $11 \n\t" \
1565 "addu %[h], %[h], $12 \n\t" \
1566 : [l] "+r" (vl), [h] "+r" (vh) \
1567 : [a] "r" (va) \
1568 : "$10", "$11", "$12", "%lo", "%hi" \
1569 )
1570 /* Add va into: vh | vl */
1571 #define SP_ASM_ADDC(vl, vh, va) \
1572 __asm__ __volatile__ ( \
1573 "addu %[l], %[l], %[a] \n\t" \
1574 "sltu $12, %[l], %[a] \n\t" \
1575 "addu %[h], %[h], $12 \n\t" \
1576 : [l] "+r" (vl), [h] "+r" (vh) \
1577 : [a] "r" (va) \
1578 : "$12" \
1579 )
1580 /* Sub va from: vh | vl */
1581 #define SP_ASM_SUBC(vl, vh, va) \
1582 __asm__ __volatile__ ( \
1583 "move $12, %[l] \n\t" \
1584 "subu %[l], $12, %[a] \n\t" \
1585 "sltu $12, $12, %[l] \n\t" \
1586 "subu %[h], %[h], $12 \n\t" \
1587 : [l] "+r" (vl), [h] "+r" (vh) \
1588 : [a] "r" (va) \
1589 : "$12" \
1590 )
1591 /* Add two times vc | vb | va into vo | vh | vl */
1592 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
1593 __asm__ __volatile__ ( \
1594 "addu %[l], %[l], %[a] \n\t" \
1595 "sltu $12, %[l], %[a] \n\t" \
1596 "addu %[h], %[h], $12 \n\t" \
1597 "sltu $12, %[h], $12 \n\t" \
1598 "addu %[o], %[o], $12 \n\t" \
1599 "addu %[h], %[h], %[b] \n\t" \
1600 "sltu $12, %[h], %[b] \n\t" \
1601 "addu %[o], %[o], %[c] \n\t" \
1602 "addu %[o], %[o], $12 \n\t" \
1603 "addu %[l], %[l], %[a] \n\t" \
1604 "sltu $12, %[l], %[a] \n\t" \
1605 "addu %[h], %[h], $12 \n\t" \
1606 "sltu $12, %[h], $12 \n\t" \
1607 "addu %[o], %[o], $12 \n\t" \
1608 "addu %[h], %[h], %[b] \n\t" \
1609 "sltu $12, %[h], %[b] \n\t" \
1610 "addu %[o], %[o], %[c] \n\t" \
1611 "addu %[o], %[o], $12 \n\t" \
1612 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1613 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
1614 : "$12" \
1615 )
1616
1617 #define SP_INT_ASM_AVAILABLE
1618
1619 #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
1620
1621 #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
1622 /*
1623 * CPU: RISCV 64-bit
1624 */
1625
1626 /* Multiply va by vb and store double size result in: vh | vl */
1627 #define SP_ASM_MUL(vl, vh, va, vb) \
1628 __asm__ __volatile__ ( \
1629 "mul %[l], %[a], %[b] \n\t" \
1630 "mulhu %[h], %[a], %[b] \n\t" \
1631 : [h] "+r" (vh), [l] "+r" (vl) \
1632 : [a] "r" (va), [b] "r" (vb) \
1633 : "memory" \
1634 )
1635 /* Multiply va by vb and store double size result in: vo | vh | vl */
1636 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
1637 __asm__ __volatile__ ( \
1638 "mulhu %[h], %[a], %[b] \n\t" \
1639 "mul %[l], %[a], %[b] \n\t" \
1640 "add %[o], zero, zero \n\t" \
1641 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
1642 : [a] "r" (va), [b] "r" (vb) \
1643 : \
1644 )
1645 /* Multiply va by vb and add double size result into: vo | vh | vl */
1646 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1647 __asm__ __volatile__ ( \
1648 "mul a5, %[a], %[b] \n\t" \
1649 "mulhu a6, %[a], %[b] \n\t" \
1650 "add %[l], %[l], a5 \n\t" \
1651 "sltu a7, %[l], a5 \n\t" \
1652 "add %[h], %[h], a7 \n\t" \
1653 "sltu a7, %[h], a7 \n\t" \
1654 "add %[o], %[o], a7 \n\t" \
1655 "add %[h], %[h], a6 \n\t" \
1656 "sltu a7, %[h], a6 \n\t" \
1657 "add %[o], %[o], a7 \n\t" \
1658 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1659 : [a] "r" (va), [b] "r" (vb) \
1660 : "a5", "a6", "a7" \
1661 )
1662 /* Multiply va by vb and add double size result into: vh | vl */
1663 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
1664 __asm__ __volatile__ ( \
1665 "mul a5, %[a], %[b] \n\t" \
1666 "mulhu a6, %[a], %[b] \n\t" \
1667 "add %[l], %[l], a5 \n\t" \
1668 "sltu a7, %[l], a5 \n\t" \
1669 "add %[h], %[h], a6 \n\t" \
1670 "add %[h], %[h], a7 \n\t" \
1671 : [l] "+r" (vl), [h] "+r" (vh) \
1672 : [a] "r" (va), [b] "r" (vb) \
1673 : "a5", "a6", "a7" \
1674 )
1675 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
1676 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1677 __asm__ __volatile__ ( \
1678 "mul a5, %[a], %[b] \n\t" \
1679 "mulhu a6, %[a], %[b] \n\t" \
1680 "add %[l], %[l], a5 \n\t" \
1681 "sltu a7, %[l], a5 \n\t" \
1682 "add %[h], %[h], a7 \n\t" \
1683 "sltu a7, %[h], a7 \n\t" \
1684 "add %[o], %[o], a7 \n\t" \
1685 "add %[h], %[h], a6 \n\t" \
1686 "sltu a7, %[h], a6 \n\t" \
1687 "add %[o], %[o], a7 \n\t" \
1688 "add %[l], %[l], a5 \n\t" \
1689 "sltu a7, %[l], a5 \n\t" \
1690 "add %[h], %[h], a7 \n\t" \
1691 "sltu a7, %[h], a7 \n\t" \
1692 "add %[o], %[o], a7 \n\t" \
1693 "add %[h], %[h], a6 \n\t" \
1694 "sltu a7, %[h], a6 \n\t" \
1695 "add %[o], %[o], a7 \n\t" \
1696 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1697 : [a] "r" (va), [b] "r" (vb) \
1698 : "a5", "a6", "a7" \
1699 )
1700 /* Multiply va by vb and add double size result twice into: vo | vh | vl
1701 * Assumes first add will not overflow vh | vl
1702 */
1703 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1704 __asm__ __volatile__ ( \
1705 "mul a5, %[a], %[b] \n\t" \
1706 "mulhu a6, %[a], %[b] \n\t" \
1707 "add %[l], %[l], a5 \n\t" \
1708 "sltu a7, %[l], a5 \n\t" \
1709 "add %[h], %[h], a6 \n\t" \
1710 "add %[h], %[h], a7 \n\t" \
1711 "add %[l], %[l], a5 \n\t" \
1712 "sltu a7, %[l], a5 \n\t" \
1713 "add %[h], %[h], a7 \n\t" \
1714 "sltu a7, %[h], a7 \n\t" \
1715 "add %[o], %[o], a7 \n\t" \
1716 "add %[h], %[h], a6 \n\t" \
1717 "sltu a7, %[h], a6 \n\t" \
1718 "add %[o], %[o], a7 \n\t" \
1719 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1720 : [a] "r" (va), [b] "r" (vb) \
1721 : "a5", "a6", "a7" \
1722 )
1723 /* Square va and store double size result in: vh | vl */
1724 #define SP_ASM_SQR(vl, vh, va) \
1725 __asm__ __volatile__ ( \
1726 "mul %[l], %[a], %[a] \n\t" \
1727 "mulhu %[h], %[a], %[a] \n\t" \
1728 : [h] "+r" (vh), [l] "+r" (vl) \
1729 : [a] "r" (va) \
1730 : "memory" \
1731 )
1732 /* Square va and add double size result into: vo | vh | vl */
1733 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1734 __asm__ __volatile__ ( \
1735 "mul a5, %[a], %[a] \n\t" \
1736 "mulhu a6, %[a], %[a] \n\t" \
1737 "add %[l], %[l], a5 \n\t" \
1738 "sltu a7, %[l], a5 \n\t" \
1739 "add %[h], %[h], a7 \n\t" \
1740 "sltu a7, %[h], a7 \n\t" \
1741 "add %[o], %[o], a7 \n\t" \
1742 "add %[h], %[h], a6 \n\t" \
1743 "sltu a7, %[h], a6 \n\t" \
1744 "add %[o], %[o], a7 \n\t" \
1745 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1746 : [a] "r" (va) \
1747 : "a5", "a6", "a7" \
1748 )
1749 /* Square va and add double size result into: vh | vl */
1750 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1751 __asm__ __volatile__ ( \
1752 "mul a5, %[a], %[a] \n\t" \
1753 "mulhu a6, %[a], %[a] \n\t" \
1754 "add %[l], %[l], a5 \n\t" \
1755 "sltu a7, %[l], a5 \n\t" \
1756 "add %[h], %[h], a6 \n\t" \
1757 "add %[h], %[h], a7 \n\t" \
1758 : [l] "+r" (vl), [h] "+r" (vh) \
1759 : [a] "r" (va) \
1760 : "a5", "a6", "a7" \
1761 )
1762 /* Add va into: vh | vl */
1763 #define SP_ASM_ADDC(vl, vh, va) \
1764 __asm__ __volatile__ ( \
1765 "add %[l], %[l], %[a] \n\t" \
1766 "sltu a7, %[l], %[a] \n\t" \
1767 "add %[h], %[h], a7 \n\t" \
1768 : [l] "+r" (vl), [h] "+r" (vh) \
1769 : [a] "r" (va) \
1770 : "a7" \
1771 )
1772 /* Sub va from: vh | vl */
1773 #define SP_ASM_SUBC(vl, vh, va) \
1774 __asm__ __volatile__ ( \
1775 "add a7, %[l], zero \n\t" \
1776 "sub %[l], a7, %[a] \n\t" \
1777 "sltu a7, a7, %[l] \n\t" \
1778 "sub %[h], %[h], a7 \n\t" \
1779 : [l] "+r" (vl), [h] "+r" (vh) \
1780 : [a] "r" (va) \
1781 : "a7" \
1782 )
1783 /* Add two times vc | vb | va into vo | vh | vl */
1784 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
1785 __asm__ __volatile__ ( \
1786 "add %[l], %[l], %[a] \n\t" \
1787 "sltu a7, %[l], %[a] \n\t" \
1788 "add %[h], %[h], a7 \n\t" \
1789 "sltu a7, %[h], a7 \n\t" \
1790 "add %[o], %[o], a7 \n\t" \
1791 "add %[h], %[h], %[b] \n\t" \
1792 "sltu a7, %[h], %[b] \n\t" \
1793 "add %[o], %[o], %[c] \n\t" \
1794 "add %[o], %[o], a7 \n\t" \
1795 "add %[l], %[l], %[a] \n\t" \
1796 "sltu a7, %[l], %[a] \n\t" \
1797 "add %[h], %[h], a7 \n\t" \
1798 "sltu a7, %[h], a7 \n\t" \
1799 "add %[o], %[o], a7 \n\t" \
1800 "add %[h], %[h], %[b] \n\t" \
1801 "sltu a7, %[h], %[b] \n\t" \
1802 "add %[o], %[o], %[c] \n\t" \
1803 "add %[o], %[o], a7 \n\t" \
1804 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1805 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
1806 : "a7" \
1807 )
1808
1809 #define SP_INT_ASM_AVAILABLE
1810
1811 #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
1812
1813 #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
1814 /*
1815 * CPU: RISCV 32-bit
1816 */
1817
1818 /* Multiply va by vb and store double size result in: vh | vl */
1819 #define SP_ASM_MUL(vl, vh, va, vb) \
1820 __asm__ __volatile__ ( \
1821 "mul %[l], %[a], %[b] \n\t" \
1822 "mulhu %[h], %[a], %[b] \n\t" \
1823 : [h] "+r" (vh), [l] "+r" (vl) \
1824 : [a] "r" (va), [b] "r" (vb) \
1825 : "memory" \
1826 )
1827 /* Multiply va by vb and store double size result in: vo | vh | vl */
1828 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
1829 __asm__ __volatile__ ( \
1830 "mulhu %[h], %[a], %[b] \n\t" \
1831 "mul %[l], %[a], %[b] \n\t" \
1832 "add %[o], zero, zero \n\t" \
1833 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
1834 : [a] "r" (va), [b] "r" (vb) \
1835 : \
1836 )
1837 /* Multiply va by vb and add double size result into: vo | vh | vl */
1838 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
1839 __asm__ __volatile__ ( \
1840 "mul a5, %[a], %[b] \n\t" \
1841 "mulhu a6, %[a], %[b] \n\t" \
1842 "add %[l], %[l], a5 \n\t" \
1843 "sltu a7, %[l], a5 \n\t" \
1844 "add %[h], %[h], a7 \n\t" \
1845 "sltu a7, %[h], a7 \n\t" \
1846 "add %[o], %[o], a7 \n\t" \
1847 "add %[h], %[h], a6 \n\t" \
1848 "sltu a7, %[h], a6 \n\t" \
1849 "add %[o], %[o], a7 \n\t" \
1850 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1851 : [a] "r" (va), [b] "r" (vb) \
1852 : "a5", "a6", "a7" \
1853 )
1854 /* Multiply va by vb and add double size result into: vh | vl */
1855 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
1856 __asm__ __volatile__ ( \
1857 "mul a5, %[a], %[b] \n\t" \
1858 "mulhu a6, %[a], %[b] \n\t" \
1859 "add %[l], %[l], a5 \n\t" \
1860 "sltu a7, %[l], a5 \n\t" \
1861 "add %[h], %[h], a6 \n\t" \
1862 "add %[h], %[h], a7 \n\t" \
1863 : [l] "+r" (vl), [h] "+r" (vh) \
1864 : [a] "r" (va), [b] "r" (vb) \
1865 : "a5", "a6", "a7" \
1866 )
1867 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
1868 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
1869 __asm__ __volatile__ ( \
1870 "mul a5, %[a], %[b] \n\t" \
1871 "mulhu a6, %[a], %[b] \n\t" \
1872 "add %[l], %[l], a5 \n\t" \
1873 "sltu a7, %[l], a5 \n\t" \
1874 "add %[h], %[h], a7 \n\t" \
1875 "sltu a7, %[h], a7 \n\t" \
1876 "add %[o], %[o], a7 \n\t" \
1877 "add %[h], %[h], a6 \n\t" \
1878 "sltu a7, %[h], a6 \n\t" \
1879 "add %[o], %[o], a7 \n\t" \
1880 "add %[l], %[l], a5 \n\t" \
1881 "sltu a7, %[l], a5 \n\t" \
1882 "add %[h], %[h], a7 \n\t" \
1883 "sltu a7, %[h], a7 \n\t" \
1884 "add %[o], %[o], a7 \n\t" \
1885 "add %[h], %[h], a6 \n\t" \
1886 "sltu a7, %[h], a6 \n\t" \
1887 "add %[o], %[o], a7 \n\t" \
1888 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1889 : [a] "r" (va), [b] "r" (vb) \
1890 : "a5", "a6", "a7" \
1891 )
1892 /* Multiply va by vb and add double size result twice into: vo | vh | vl
1893 * Assumes first add will not overflow vh | vl
1894 */
1895 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
1896 __asm__ __volatile__ ( \
1897 "mul a5, %[a], %[b] \n\t" \
1898 "mulhu a6, %[a], %[b] \n\t" \
1899 "add %[l], %[l], a5 \n\t" \
1900 "sltu a7, %[l], a5 \n\t" \
1901 "add %[h], %[h], a6 \n\t" \
1902 "add %[h], %[h], a7 \n\t" \
1903 "add %[l], %[l], a5 \n\t" \
1904 "sltu a7, %[l], a5 \n\t" \
1905 "add %[h], %[h], a7 \n\t" \
1906 "sltu a7, %[h], a7 \n\t" \
1907 "add %[o], %[o], a7 \n\t" \
1908 "add %[h], %[h], a6 \n\t" \
1909 "sltu a7, %[h], a6 \n\t" \
1910 "add %[o], %[o], a7 \n\t" \
1911 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1912 : [a] "r" (va), [b] "r" (vb) \
1913 : "a5", "a6", "a7" \
1914 )
1915 /* Square va and store double size result in: vh | vl */
1916 #define SP_ASM_SQR(vl, vh, va) \
1917 __asm__ __volatile__ ( \
1918 "mul %[l], %[a], %[a] \n\t" \
1919 "mulhu %[h], %[a], %[a] \n\t" \
1920 : [h] "+r" (vh), [l] "+r" (vl) \
1921 : [a] "r" (va) \
1922 : "memory" \
1923 )
1924 /* Square va and add double size result into: vo | vh | vl */
1925 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
1926 __asm__ __volatile__ ( \
1927 "mul a5, %[a], %[a] \n\t" \
1928 "mulhu a6, %[a], %[a] \n\t" \
1929 "add %[l], %[l], a5 \n\t" \
1930 "sltu a7, %[l], a5 \n\t" \
1931 "add %[h], %[h], a7 \n\t" \
1932 "sltu a7, %[h], a7 \n\t" \
1933 "add %[o], %[o], a7 \n\t" \
1934 "add %[h], %[h], a6 \n\t" \
1935 "sltu a7, %[h], a6 \n\t" \
1936 "add %[o], %[o], a7 \n\t" \
1937 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1938 : [a] "r" (va) \
1939 : "a5", "a6", "a7" \
1940 )
1941 /* Square va and add double size result into: vh | vl */
1942 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
1943 __asm__ __volatile__ ( \
1944 "mul a5, %[a], %[a] \n\t" \
1945 "mulhu a6, %[a], %[a] \n\t" \
1946 "add %[l], %[l], a5 \n\t" \
1947 "sltu a7, %[l], a5 \n\t" \
1948 "add %[h], %[h], a6 \n\t" \
1949 "add %[h], %[h], a7 \n\t" \
1950 : [l] "+r" (vl), [h] "+r" (vh) \
1951 : [a] "r" (va) \
1952 : "a5", "a6", "a7" \
1953 )
1954 /* Add va into: vh | vl */
1955 #define SP_ASM_ADDC(vl, vh, va) \
1956 __asm__ __volatile__ ( \
1957 "add %[l], %[l], %[a] \n\t" \
1958 "sltu a7, %[l], %[a] \n\t" \
1959 "add %[h], %[h], a7 \n\t" \
1960 : [l] "+r" (vl), [h] "+r" (vh) \
1961 : [a] "r" (va) \
1962 : "a7" \
1963 )
1964 /* Sub va from: vh | vl */
1965 #define SP_ASM_SUBC(vl, vh, va) \
1966 __asm__ __volatile__ ( \
1967 "add a7, %[l], zero \n\t" \
1968 "sub %[l], a7, %[a] \n\t" \
1969 "sltu a7, a7, %[l] \n\t" \
1970 "sub %[h], %[h], a7 \n\t" \
1971 : [l] "+r" (vl), [h] "+r" (vh) \
1972 : [a] "r" (va) \
1973 : "a7" \
1974 )
1975 /* Add two times vc | vb | va into vo | vh | vl */
1976 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
1977 __asm__ __volatile__ ( \
1978 "add %[l], %[l], %[a] \n\t" \
1979 "sltu a7, %[l], %[a] \n\t" \
1980 "add %[h], %[h], a7 \n\t" \
1981 "sltu a7, %[h], a7 \n\t" \
1982 "add %[o], %[o], a7 \n\t" \
1983 "add %[h], %[h], %[b] \n\t" \
1984 "sltu a7, %[h], %[b] \n\t" \
1985 "add %[o], %[o], %[c] \n\t" \
1986 "add %[o], %[o], a7 \n\t" \
1987 "add %[l], %[l], %[a] \n\t" \
1988 "sltu a7, %[l], %[a] \n\t" \
1989 "add %[h], %[h], a7 \n\t" \
1990 "sltu a7, %[h], a7 \n\t" \
1991 "add %[o], %[o], a7 \n\t" \
1992 "add %[h], %[h], %[b] \n\t" \
1993 "sltu a7, %[h], %[b] \n\t" \
1994 "add %[o], %[o], %[c] \n\t" \
1995 "add %[o], %[o], a7 \n\t" \
1996 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
1997 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
1998 : "a7" \
1999 )
2000
2001 #define SP_INT_ASM_AVAILABLE
2002
2003 #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
2004
2005 #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
2006 /*
2007 * CPU: Intel s390x
2008 */
2009
2010 /* Multiply va by vb and store double size result in: vh | vl */
2011 #define SP_ASM_MUL(vl, vh, va, vb) \
2012 __asm__ __volatile__ ( \
2013 "lgr %%r1, %[a] \n\t" \
2014 "mlgr %%r0, %[b] \n\t" \
2015 "lgr %[l], %%r1 \n\t" \
2016 "lgr %[h], %%r0 \n\t" \
2017 : [h] "+r" (vh), [l] "+r" (vl) \
2018 : [a] "r" (va), [b] "r" (vb) \
2019 : "memory", "r0", "r1" \
2020 )
2021 /* Multiply va by vb and store double size result in: vo | vh | vl */
2022 #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
2023 __asm__ __volatile__ ( \
2024 "lgr %%r1, %[a] \n\t" \
2025 "mlgr %%r0, %[b] \n\t" \
2026 "lghi %[o], 0 \n\t" \
2027 "lgr %[l], %%r1 \n\t" \
2028 "lgr %[h], %%r0 \n\t" \
2029 : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
2030 : [a] "r" (va), [b] "r" (vb) \
2031 : "r0", "r1" \
2032 )
2033 /* Multiply va by vb and add double size result into: vo | vh | vl */
2034 #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
2035 __asm__ __volatile__ ( \
2036 "lghi %%r10, 0 \n\t" \
2037 "lgr %%r1, %[a] \n\t" \
2038 "mlgr %%r0, %[b] \n\t" \
2039 "algr %[l], %%r1 \n\t" \
2040 "alcgr %[h], %%r0 \n\t" \
2041 "alcgr %[o], %%r10 \n\t" \
2042 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
2043 : [a] "r" (va), [b] "r" (vb) \
2044 : "r0", "r1", "r10", "cc" \
2045 )
2046 /* Multiply va by vb and add double size result into: vh | vl */
2047 #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
2048 __asm__ __volatile__ ( \
2049 "lgr %%r1, %[a] \n\t" \
2050 "mlgr %%r0, %[b] \n\t" \
2051 "algr %[l], %%r1 \n\t" \
2052 "alcgr %[h], %%r0 \n\t" \
2053 : [l] "+r" (vl), [h] "+r" (vh) \
2054 : [a] "r" (va), [b] "r" (vb) \
2055 : "r0", "r1", "cc" \
2056 )
2057 /* Multiply va by vb and add double size result twice into: vo | vh | vl */
2058 #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
2059 __asm__ __volatile__ ( \
2060 "lghi %%r10, 0 \n\t" \
2061 "lgr %%r1, %[a] \n\t" \
2062 "mlgr %%r0, %[b] \n\t" \
2063 "algr %[l], %%r1 \n\t" \
2064 "alcgr %[h], %%r0 \n\t" \
2065 "alcgr %[o], %%r10 \n\t" \
2066 "algr %[l], %%r1 \n\t" \
2067 "alcgr %[h], %%r0 \n\t" \
2068 "alcgr %[o], %%r10 \n\t" \
2069 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
2070 : [a] "r" (va), [b] "r" (vb) \
2071 : "r0", "r1", "r10", "cc" \
2072 )
2073 /* Multiply va by vb and add double size result twice into: vo | vh | vl
2074 * Assumes first add will not overflow vh | vl
2075 */
2076 #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
2077 __asm__ __volatile__ ( \
2078 "lghi %%r10, 0 \n\t" \
2079 "lgr %%r1, %[a] \n\t" \
2080 "mlgr %%r0, %[b] \n\t" \
2081 "algr %[l], %%r1 \n\t" \
2082 "alcgr %[h], %%r0 \n\t" \
2083 "algr %[l], %%r1 \n\t" \
2084 "alcgr %[h], %%r0 \n\t" \
2085 "alcgr %[o], %%r10 \n\t" \
2086 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
2087 : [a] "r" (va), [b] "r" (vb) \
2088 : "r0", "r1", "r10", "cc" \
2089 )
2090 /* Square va and store double size result in: vh | vl */
2091 #define SP_ASM_SQR(vl, vh, va) \
2092 __asm__ __volatile__ ( \
2093 "lgr %%r1, %[a] \n\t" \
2094 "mlgr %%r0, %%r1 \n\t" \
2095 "lgr %[l], %%r1 \n\t" \
2096 "lgr %[h], %%r0 \n\t" \
2097 : [h] "+r" (vh), [l] "+r" (vl) \
2098 : [a] "r" (va) \
2099 : "memory", "r0", "r1" \
2100 )
2101 /* Square va and add double size result into: vo | vh | vl */
2102 #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
2103 __asm__ __volatile__ ( \
2104 "lghi %%r10, 0 \n\t" \
2105 "lgr %%r1, %[a] \n\t" \
2106 "mlgr %%r0, %%r1 \n\t" \
2107 "algr %[l], %%r1 \n\t" \
2108 "alcgr %[h], %%r0 \n\t" \
2109 "alcgr %[o], %%r10 \n\t" \
2110 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
2111 : [a] "r" (va) \
2112 : "r0", "r1", "r10", "cc" \
2113 )
2114 /* Square va and add double size result into: vh | vl */
2115 #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
2116 __asm__ __volatile__ ( \
2117 "lgr %%r1, %[a] \n\t" \
2118 "mlgr %%r0, %%r1 \n\t" \
2119 "algr %[l], %%r1 \n\t" \
2120 "alcgr %[h], %%r0 \n\t" \
2121 : [l] "+r" (vl), [h] "+r" (vh) \
2122 : [a] "r" (va) \
2123 : "r0", "r1", "cc" \
2124 )
2125 /* Add va into: vh | vl */
2126 #define SP_ASM_ADDC(vl, vh, va) \
2127 __asm__ __volatile__ ( \
2128 "lghi %%r10, 0 \n\t" \
2129 "algr %[l], %[a] \n\t" \
2130 "alcgr %[h], %%r10 \n\t" \
2131 : [l] "+r" (vl), [h] "+r" (vh) \
2132 : [a] "r" (va) \
2133 : "r10", "cc" \
2134 )
2135 /* Sub va from: vh | vl */
2136 #define SP_ASM_SUBC(vl, vh, va) \
2137 __asm__ __volatile__ ( \
2138 "lghi %%r10, 0 \n\t" \
2139 "slgr %[l], %[a] \n\t" \
2140 "slbgr %[h], %%r10 \n\t" \
2141 : [l] "+r" (vl), [h] "+r" (vh) \
2142 : [a] "r" (va) \
2143 : "r10", "cc" \
2144 )
2145 /* Add two times vc | vb | va into vo | vh | vl */
2146 #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
2147 __asm__ __volatile__ ( \
2148 "algr %[l], %[a] \n\t" \
2149 "alcgr %[h], %[b] \n\t" \
2150 "alcgr %[o], %[c] \n\t" \
2151 "algr %[l], %[a] \n\t" \
2152 "alcgr %[h], %[b] \n\t" \
2153 "alcgr %[o], %[c] \n\t" \
2154 : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
2155 : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
2156 : "cc" \
2157 )
2158
2159 #define SP_INT_ASM_AVAILABLE
2160
2161 #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
2162
2163 #ifdef SP_INT_ASM_AVAILABLE
2164 #ifndef SP_INT_NO_ASM
2165 #define SQR_MUL_ASM
2166 #endif
2167 #ifndef SP_ASM_ADDC_REG
2168 #define SP_ASM_ADDC_REG SP_ASM_ADDC
2169 #endif /* SP_ASM_ADDC_REG */
2170 #endif /* SQR_MUL_ASM */
2171
2172 #endif /* !WOLFSSL_NO_ASM */
2173
2174
2175 #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
2176 !defined(NO_DSA) || !defined(NO_DH) || \
2177 (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
2178 (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
2179 #ifndef WC_NO_CACHE_RESISTANT
2180 /* Mask of address for constant time operations. */
2181 const size_t sp_off_on_addr[2] =
2182 {
2183 (size_t) 0,
2184 (size_t)-1
2185 };
2186 #endif
2187 #endif
2188
2189
2190 #if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
2191
2192 #ifdef __cplusplus
2193 extern "C" {
2194 #endif
2195
2196 /* Modular exponentiation implementations using Single Precision. */
2197 WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
2198 sp_int* res);
2199 WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
2200 sp_int* res);
2201 WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
2202 sp_int* res);
2203 WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
2204 sp_int* res);
2205 WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
2206 sp_int* res);
2207
2208 #ifdef __cplusplus
2209 } /* extern "C" */
2210 #endif
2211
2212 #endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
2213
2214
2215 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
2216 static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp);
2217 #endif
2218
2219 /* Set the multi-precision number to zero.
2220 *
2221 * Assumes a is not NULL.
2222 *
2223 * @param [out] a SP integer to set to zero.
2224 */
_sp_zero(sp_int * a)2225 static void _sp_zero(sp_int* a)
2226 {
2227 a->used = 0;
2228 a->dp[0] = 0;
2229 #ifdef WOLFSSL_SP_INT_NEGATIVE
2230 a->sign = MP_ZPOS;
2231 #endif
2232 }
2233
2234 /* Initialize the multi-precision number to be zero.
2235 *
2236 * @param [out] a SP integer.
2237 *
2238 * @return MP_OKAY on success.
2239 * @return MP_VAL when a is NULL.
2240 */
sp_init(sp_int * a)2241 int sp_init(sp_int* a)
2242 {
2243 int err = MP_OKAY;
2244
2245 if (a == NULL) {
2246 err = MP_VAL;
2247 }
2248 if (err == MP_OKAY) {
2249 _sp_zero(a);
2250 a->size = SP_INT_DIGITS;
2251 #ifdef HAVE_WOLF_BIGINT
2252 wc_bigint_init(&a->raw);
2253 #endif
2254 }
2255
2256 return err;
2257 }
2258
sp_init_size(sp_int * a,int size)2259 int sp_init_size(sp_int* a, int size)
2260 {
2261 int err = sp_init(a);
2262
2263 if (err == MP_OKAY) {
2264 a->size = size;
2265 }
2266
2267 return err;
2268 }
2269
2270 #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
2271 /* Initialize up to six multi-precision numbers to be zero.
2272 *
2273 * @param [out] n1 SP integer.
2274 * @param [out] n2 SP integer.
2275 * @param [out] n3 SP integer.
2276 * @param [out] n4 SP integer.
2277 * @param [out] n5 SP integer.
2278 * @param [out] n6 SP integer.
2279 *
2280 * @return MP_OKAY on success.
2281 */
sp_init_multi(sp_int * n1,sp_int * n2,sp_int * n3,sp_int * n4,sp_int * n5,sp_int * n6)2282 int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
2283 sp_int* n6)
2284 {
2285 if (n1 != NULL) {
2286 _sp_zero(n1);
2287 n1->dp[0] = 0;
2288 n1->size = SP_INT_DIGITS;
2289 }
2290 if (n2 != NULL) {
2291 _sp_zero(n2);
2292 n2->dp[0] = 0;
2293 n2->size = SP_INT_DIGITS;
2294 }
2295 if (n3 != NULL) {
2296 _sp_zero(n3);
2297 n3->dp[0] = 0;
2298 n3->size = SP_INT_DIGITS;
2299 }
2300 if (n4 != NULL) {
2301 _sp_zero(n4);
2302 n4->dp[0] = 0;
2303 n4->size = SP_INT_DIGITS;
2304 }
2305 if (n5 != NULL) {
2306 _sp_zero(n5);
2307 n5->dp[0] = 0;
2308 n5->size = SP_INT_DIGITS;
2309 }
2310 if (n6 != NULL) {
2311 _sp_zero(n6);
2312 n6->dp[0] = 0;
2313 n6->size = SP_INT_DIGITS;
2314 }
2315
2316 return MP_OKAY;
2317 }
2318 #endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
2319
2320 /* Free the memory allocated in the multi-precision number.
2321 *
2322 * @param [in] a SP integer.
2323 */
sp_free(sp_int * a)2324 void sp_free(sp_int* a)
2325 {
2326 if (a != NULL) {
2327 #ifdef HAVE_WOLF_BIGINT
2328 wc_bigint_free(&a->raw);
2329 #endif
2330 }
2331 }
2332
2333 #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
2334 /* Grow multi-precision number to be able to hold l digits.
2335 * This function does nothing as the number of digits is fixed.
2336 *
2337 * @param [in,out] a SP integer.
2338 * @param [in] l Number of digits to grow to.
2339 *
2340 * @return MP_OKAY on success
2341 * @return MP_MEM if the number of digits requested is more than available.
2342 */
sp_grow(sp_int * a,int l)2343 int sp_grow(sp_int* a, int l)
2344 {
2345 int err = MP_OKAY;
2346
2347 if (a == NULL) {
2348 err = MP_VAL;
2349 }
2350 if ((err == MP_OKAY) && (l > a->size)) {
2351 err = MP_MEM;
2352 }
2353 if (err == MP_OKAY) {
2354 int i;
2355
2356 for (i = a->used; i < l; i++) {
2357 a->dp[i] = 0;
2358 }
2359 }
2360
2361 return err;
2362 }
2363 #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
2364
2365 #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(HAVE_ECC)
2366 /* Set the multi-precision number to zero.
2367 *
2368 * @param [out] a SP integer to set to zero.
2369 */
sp_zero(sp_int * a)2370 void sp_zero(sp_int* a)
2371 {
2372 if (a != NULL) {
2373 _sp_zero(a);
2374 }
2375 }
2376 #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
2377
2378 /* Clear the data from the multi-precision number and set to zero.
2379 *
2380 * @param [out] a SP integer.
2381 */
sp_clear(sp_int * a)2382 void sp_clear(sp_int* a)
2383 {
2384 if (a != NULL) {
2385 int i;
2386
2387 for (i = 0; i < a->used; i++) {
2388 a->dp[i] = 0;
2389 }
2390 _sp_zero(a);
2391 }
2392 }
2393
2394 #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
2395 /* Ensure the data in the multi-precision number is zeroed.
2396 *
2397 * Use when security sensitive data needs to be wiped.
2398 *
2399 * @param [in] a SP integer.
2400 */
sp_forcezero(sp_int * a)2401 void sp_forcezero(sp_int* a)
2402 {
2403 ForceZero(a->dp, a->used * sizeof(sp_int_digit));
2404 _sp_zero(a);
2405 #ifdef HAVE_WOLF_BIGINT
2406 wc_bigint_zero(&a->raw);
2407 #endif
2408 }
2409 #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
2410
2411 #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
2412 !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
2413 /* Copy value of multi-precision number a into r.
2414 *
2415 * @param [in] a SP integer - source.
2416 * @param [out] r SP integer - destination.
2417 *
2418 * @return MP_OKAY on success.
2419 */
sp_copy(const sp_int * a,sp_int * r)2420 int sp_copy(const sp_int* a, sp_int* r)
2421 {
2422 int err = MP_OKAY;
2423
2424 if ((a == NULL) || (r == NULL)) {
2425 err = MP_VAL;
2426 }
2427 else if (a != r) {
2428 XMEMCPY(r->dp, a->dp, a->used * sizeof(sp_int_digit));
2429 if (a->used == 0)
2430 r->dp[0] = 0;
2431 r->used = a->used;
2432 #ifdef WOLFSSL_SP_INT_NEGATIVE
2433 r->sign = a->sign;
2434 #endif
2435 }
2436
2437 return err;
2438 }
2439 #endif
2440
2441 #if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
2442 /* Initializes r and copies in value from a.
2443 *
2444 * @param [out] r SP integer - destination.
2445 * @param [in] a SP integer - source.
2446 *
2447 * @return MP_OKAY on success.
2448 * @return MP_VAL when a or r is NULL.
2449 */
sp_init_copy(sp_int * r,sp_int * a)2450 int sp_init_copy(sp_int* r, sp_int* a)
2451 {
2452 int err;
2453
2454 err = sp_init(r);
2455 if (err == MP_OKAY) {
2456 err = sp_copy(a, r);
2457 }
2458 return err;
2459 }
2460 #endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
2461
2462 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
2463 !defined(NO_DH) || !defined(NO_DSA)
2464 /* Exchange the values in a and b.
2465 *
2466 * @param [in,out] a SP integer to swap.
2467 * @param [in,out] b SP integer to swap.
2468 *
2469 * @return MP_OKAY on success.
2470 * @return MP_VAL when a or b is NULL.
2471 * @return MP_MEM when dynamic memory allocation fails.
2472 */
sp_exch(sp_int * a,sp_int * b)2473 int sp_exch(sp_int* a, sp_int* b)
2474 {
2475 int err = MP_OKAY;
2476 DECL_SP_INT(t, (a != NULL) ? a->used : 1);
2477
2478 if ((a == NULL) || (b == NULL)) {
2479 err = MP_VAL;
2480 }
2481 if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
2482 err = MP_VAL;
2483 }
2484
2485 ALLOC_SP_INT(t, a->used, err, NULL);
2486 if (err == MP_OKAY) {
2487 int asize = a->size;
2488 int bsize = b->size;
2489 XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
2490 XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
2491 XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
2492 a->size = asize;
2493 b->size = bsize;
2494 }
2495
2496 FREE_SP_INT(t, NULL);
2497 return err;
2498 }
2499 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
2500 * !NO_DSA */
2501
2502 #if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
2503 !defined(WC_NO_CACHE_RESISTANT)
sp_cond_swap_ct(sp_int * a,sp_int * b,int c,int m)2504 int sp_cond_swap_ct(sp_int * a, sp_int * b, int c, int m)
2505 {
2506 int i;
2507 int err = MP_OKAY;
2508 sp_digit mask = (sp_digit)0 - m;
2509 DECL_SP_INT(t, c);
2510
2511 ALLOC_SP_INT(t, c, err, NULL);
2512 if (err == MP_OKAY) {
2513 t->used = (int)((a->used ^ b->used) & mask);
2514 #ifdef WOLFSSL_SP_INT_NEGATIVE
2515 t->sign = (int)((a->sign ^ b->sign) & mask);
2516 #endif
2517 for (i = 0; i < c; i++) {
2518 t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
2519 }
2520 a->used ^= t->used;
2521 #ifdef WOLFSSL_SP_INT_NEGATIVE
2522 a->sign ^= t->sign;
2523 #endif
2524 for (i = 0; i < c; i++) {
2525 a->dp[i] ^= t->dp[i];
2526 }
2527 b->used ^= t->used;
2528 #ifdef WOLFSSL_SP_INT_NEGATIVE
2529 b->sign ^= b->sign;
2530 #endif
2531 for (i = 0; i < c; i++) {
2532 b->dp[i] ^= t->dp[i];
2533 }
2534 }
2535
2536 FREE_SP_INT(t, NULL);
2537 return err;
2538 }
2539 #endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
2540
2541 #ifdef WOLFSSL_SP_INT_NEGATIVE
2542 /* Calculate the absolute value of the multi-precision number.
2543 *
2544 * @param [in] a SP integer to calculate absolute value of.
2545 * @param [out] r SP integer to hold result.
2546 *
2547 * @return MP_OKAY on success.
2548 * @return MP_VAL when a or r is NULL.
2549 */
sp_abs(sp_int * a,sp_int * r)2550 int sp_abs(sp_int* a, sp_int* r)
2551 {
2552 int err;
2553
2554 err = sp_copy(a, r);
2555 if (r != NULL) {
2556 r->sign = MP_ZPOS;
2557 }
2558
2559 return err;
2560 }
2561 #endif /* WOLFSSL_SP_INT_NEGATIVE */
2562
2563 #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
2564 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
2565 /* Compare absolute value of two multi-precision numbers.
2566 *
2567 * @param [in] a SP integer.
2568 * @param [in] b SP integer.
2569 *
2570 * @return MP_GT when a is greater than b.
2571 * @return MP_LT when a is less than b.
2572 * @return MP_EQ when a is equals b.
2573 */
_sp_cmp_abs(sp_int * a,sp_int * b)2574 static int _sp_cmp_abs(sp_int* a, sp_int* b)
2575 {
2576 int ret = MP_EQ;
2577
2578 if (a->used > b->used) {
2579 ret = MP_GT;
2580 }
2581 else if (a->used < b->used) {
2582 ret = MP_LT;
2583 }
2584 else {
2585 int i;
2586
2587 for (i = a->used - 1; i >= 0; i--) {
2588 if (a->dp[i] > b->dp[i]) {
2589 ret = MP_GT;
2590 break;
2591 }
2592 else if (a->dp[i] < b->dp[i]) {
2593 ret = MP_LT;
2594 break;
2595 }
2596 }
2597 }
2598
2599 return ret;
2600 }
2601 #endif
2602
2603 #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
2604 /* Compare absolute value of two multi-precision numbers.
2605 *
2606 * @param [in] a SP integer.
2607 * @param [in] b SP integer.
2608 *
2609 * @return MP_GT when a is greater than b.
2610 * @return MP_LT when a is less than b.
2611 * @return MP_EQ when a is equals b.
2612 */
sp_cmp_mag(sp_int * a,sp_int * b)2613 int sp_cmp_mag(sp_int* a, sp_int* b)
2614 {
2615 int ret;
2616
2617 if (a == b) {
2618 ret = MP_EQ;
2619 }
2620 else if (a == NULL) {
2621 ret = MP_LT;
2622 }
2623 else if (b == NULL) {
2624 ret = MP_GT;
2625 }
2626 else
2627 {
2628 ret = _sp_cmp_abs(a, b);
2629 }
2630
2631 return ret;
2632 }
2633 #endif
2634
2635 #if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
2636 defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
2637 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
2638 /* Compare two multi-precision numbers.
2639 *
2640 * Assumes a and b are not NULL.
2641 *
2642 * @param [in] a SP integer.
2643 * @param [in] a SP integer.
2644 *
2645 * @return MP_GT when a is greater than b.
2646 * @return MP_LT when a is less than b.
2647 * @return MP_EQ when a is equals b.
2648 */
_sp_cmp(sp_int * a,sp_int * b)2649 static int _sp_cmp(sp_int* a, sp_int* b)
2650 {
2651 int ret;
2652
2653 #ifdef WOLFSSL_SP_INT_NEGATIVE
2654 if (a->sign == b->sign) {
2655 #endif
2656 ret = _sp_cmp_abs(a, b);
2657 #ifdef WOLFSSL_SP_INT_NEGATIVE
2658 if (a->sign == MP_NEG) {
2659 /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
2660 * Swapping MP_GT and MP_LT results.
2661 */
2662 ret = -ret;
2663 }
2664 }
2665 else if (a->sign > b->sign) {
2666 ret = MP_LT;
2667 }
2668 else /* (a->sign < b->sign) */ {
2669 ret = MP_GT;
2670 }
2671 #endif
2672
2673 return ret;
2674 }
2675 #endif
2676
2677 #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
2678 !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH)
2679 /* Compare two multi-precision numbers.
2680 *
2681 * Pointers are compared such that NULL is less than not NULL.
2682 *
2683 * @param [in] a SP integer.
2684 * @param [in] a SP integer.
2685 *
2686 * @return MP_GT when a is greater than b.
2687 * @return MP_LT when a is less than b.
2688 * @return MP_EQ when a is equals b.
2689 */
sp_cmp(sp_int * a,sp_int * b)2690 int sp_cmp(sp_int* a, sp_int* b)
2691 {
2692 int ret;
2693
2694 if (a == b) {
2695 ret = MP_EQ;
2696 }
2697 else if (a == NULL) {
2698 ret = MP_LT;
2699 }
2700 else if (b == NULL) {
2701 ret = MP_GT;
2702 }
2703 else
2704 {
2705 ret = _sp_cmp(a, b);
2706 }
2707
2708 return ret;
2709 }
2710 #endif
2711
2712 /*************************
2713 * Bit check/set functions
2714 *************************/
2715
2716 #if !defined(WOLFSSL_RSA_VERIFY_ONLY)
2717 /* Check if a bit is set
2718 *
2719 * When a is NULL, result is 0.
2720 *
2721 * @param [in] a SP integer.
2722 * @param [in] b Bit position to check.
2723 *
2724 * @return 0 when bit is not set.
2725 * @return 1 when bit is set.
2726 */
sp_is_bit_set(sp_int * a,unsigned int b)2727 int sp_is_bit_set(sp_int* a, unsigned int b)
2728 {
2729 int ret = 0;
2730 int i = (int)(b >> SP_WORD_SHIFT);
2731 int s = (int)(b & SP_WORD_MASK);
2732
2733 if ((a != NULL) && (i < a->used)) {
2734 ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
2735 }
2736
2737 return ret;
2738 }
2739 #endif /* WOLFSSL_RSA_VERIFY_ONLY */
2740
2741 /* Count the number of bits in the multi-precision number.
2742 *
2743 * When a is not NULL, result is 0.
2744 *
2745 * @param [in] a SP integer.
2746 *
2747 * @return The number of bits in the number.
2748 */
sp_count_bits(const sp_int * a)2749 int sp_count_bits(const sp_int* a)
2750 {
2751 int r = 0;
2752
2753 if (a != NULL) {
2754 r = a->used - 1;
2755 while ((r >= 0) && (a->dp[r] == 0)) {
2756 r--;
2757 }
2758 if (r < 0) {
2759 r = 0;
2760 }
2761 else {
2762 sp_int_digit d;
2763
2764 d = a->dp[r];
2765 r *= SP_WORD_SIZE;
2766 if (d > SP_HALF_MAX) {
2767 r += SP_WORD_SIZE;
2768 while ((d & ((sp_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
2769 r--;
2770 d <<= 1;
2771 }
2772 }
2773 else {
2774 while (d != 0) {
2775 r++;
2776 d >>= 1;
2777 }
2778 }
2779 }
2780 }
2781
2782 return r;
2783 }
2784
2785 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
2786 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) || \
2787 (defined(HAVE_ECC) && defined(FP_ECC)) || \
2788 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
2789
2790 /* Number of entries in array of number of least significant zero bits. */
2791 #define SP_LNZ_CNT 16
2792 /* Number of bits the array checks. */
2793 #define SP_LNZ_BITS 4
2794 /* Mask to apply to check with array. */
2795 #define SP_LNZ_MASK 0xf
2796 /* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
2797 static const int sp_lnz[SP_LNZ_CNT] = {
2798 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
2799 };
2800
2801 /* Count the number of least significant zero bits.
2802 *
2803 * When a is not NULL, result is 0.
2804 *
2805 * @param [in] a SP integer to use.
2806 *
2807 * @return Number of leas significant zero bits.
2808 */
2809 #if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
2810 static
2811 #endif /* !HAVE_ECC || HAVE_COMP_KEY */
sp_cnt_lsb(sp_int * a)2812 int sp_cnt_lsb(sp_int* a)
2813 {
2814 int bc = 0;
2815
2816 if ((a != NULL) && (!sp_iszero(a))) {
2817 int i;
2818 int j;
2819 int cnt = 0;
2820
2821 for (i = 0; i < a->used && a->dp[i] == 0; i++, cnt += SP_WORD_SIZE) {
2822 }
2823
2824 for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
2825 bc = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
2826 if (bc != 4) {
2827 bc += cnt + j;
2828 break;
2829 }
2830 }
2831 }
2832
2833 return bc;
2834 }
2835 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
2836
2837 #if !defined(WOLFSSL_RSA_VERIFY_ONLY)
2838 /* Determine if the most significant byte of the encoded multi-precision number
2839 * has the top bit set.
2840 *
2841 * When A is NULL, result is 0.
2842 *
2843 * @param [in] a SP integer.
2844 *
2845 * @return 1 when the top bit of top byte is set.
2846 * @return 0 when the top bit of top byte is not set.
2847 */
sp_leading_bit(sp_int * a)2848 int sp_leading_bit(sp_int* a)
2849 {
2850 int bit = 0;
2851
2852 if ((a != NULL) && (a->used > 0)) {
2853 sp_int_digit d = a->dp[a->used - 1];
2854 #if SP_WORD_SIZE > 8
2855 while (d > (sp_int_digit)0xff) {
2856 d >>= 8;
2857 }
2858 #endif
2859 bit = (int)(d >> 7);
2860 }
2861
2862 return bit;
2863 }
2864 #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
2865
2866 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
2867 defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
2868 !defined(NO_RSA)
2869 /* Set a bit of a: a |= 1 << i
2870 * The field 'used' is updated in a.
2871 *
2872 * @param [in,out] a SP integer to set bit into.
2873 * @param [in] i Index of bit to set.
2874 *
2875 * @return MP_OKAY on success.
2876 * @return MP_VAL when a is NULL or index is too large.
2877 */
sp_set_bit(sp_int * a,int i)2878 int sp_set_bit(sp_int* a, int i)
2879 {
2880 int err = MP_OKAY;
2881 int w = (int)(i >> SP_WORD_SHIFT);
2882
2883 if ((a == NULL) || (w >= a->size)) {
2884 err = MP_VAL;
2885 }
2886 else {
2887 int s = (int)(i & (SP_WORD_SIZE - 1));
2888 int j;
2889
2890 for (j = a->used; j <= w; j++) {
2891 a->dp[j] = 0;
2892 }
2893 a->dp[w] |= (sp_int_digit)1 << s;
2894 if (a->used <= w) {
2895 a->used = w + 1;
2896 }
2897 }
2898 return err;
2899 }
2900 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
2901 * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
2902
2903 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
2904 defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
2905 /* Exponentiate 2 to the power of e: a = 2^e
2906 * This is done by setting the 'e'th bit.
2907 *
2908 * @param [out] a SP integer to hold result.
2909 * @param [in] e Exponent.
2910 *
2911 * @return MP_OKAY on success.
2912 * @return MP_VAL when a is NULL or 2^exponent is too large.
2913 */
sp_2expt(sp_int * a,int e)2914 int sp_2expt(sp_int* a, int e)
2915 {
2916 int err = MP_OKAY;
2917
2918 if (a == NULL) {
2919 err = MP_VAL;
2920 }
2921 if (err == MP_OKAY) {
2922 _sp_zero(a);
2923 err = sp_set_bit(a, e);
2924 }
2925
2926 return err;
2927 }
2928 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
2929 * WOLFSSL_KEY_GEN || !NO_DH */
2930
2931 /**********************
2932 * Digit/Long functions
2933 **********************/
2934
2935 /* Set the multi-precision number to be the value of the digit.
2936 *
2937 * @param [out] a SP integer to become number.
2938 * @param [in] d Digit to be set.
2939 *
2940 * @return MP_OKAY on success.
2941 * @return MP_VAL when a is NULL.
2942 */
sp_set(sp_int * a,sp_int_digit d)2943 int sp_set(sp_int* a, sp_int_digit d)
2944 {
2945 int err = MP_OKAY;
2946
2947 if (a == NULL) {
2948 err = MP_VAL;
2949 }
2950 if (err == MP_OKAY) {
2951 /* gcc-11 reports out-of-bounds array access if the byte array backing
2952 * the sp_int* is smaller than sizeof(sp_int), as occurs when
2953 * WOLFSSL_SP_SMALL.
2954 */
2955 PRAGMA_GCC_DIAG_PUSH;
2956 PRAGMA_GCC("GCC diagnostic ignored \"-Warray-bounds\"");
2957 a->dp[0] = d;
2958 a->used = d > 0;
2959 #ifdef WOLFSSL_SP_INT_NEGATIVE
2960 a->sign = MP_ZPOS;
2961 #endif
2962 PRAGMA_GCC_DIAG_POP;
2963 }
2964
2965 return err;
2966 }
2967
2968 #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA)
2969 /* Set a number into the multi-precision number.
2970 *
2971 * Number may be larger than the size of a digit.
2972 *
2973 * @param [out] a SP integer to set.
2974 * @param [in] n Long value to set.
2975 *
2976 * @return MP_OKAY on success.
2977 * @return MP_VAL when a is NULL.
2978 */
sp_set_int(sp_int * a,unsigned long n)2979 int sp_set_int(sp_int* a, unsigned long n)
2980 {
2981 int err = MP_OKAY;
2982
2983 if (a == NULL) {
2984 err = MP_VAL;
2985 }
2986
2987 if (err == MP_OKAY) {
2988 #if SP_WORD_SIZE < SP_ULONG_BITS
2989 if (n <= (sp_int_digit)SP_DIGIT_MAX) {
2990 #endif
2991 a->dp[0] = (sp_int_digit)n;
2992 a->used = (n != 0);
2993 #if SP_WORD_SIZE < SP_ULONG_BITS
2994 }
2995 else {
2996 int i;
2997
2998 for (i = 0; n > 0; i++,n >>= SP_WORD_SIZE) {
2999 a->dp[i] = (sp_int_digit)n;
3000 }
3001 a->used = i;
3002 }
3003 #endif
3004 #ifdef WOLFSSL_SP_INT_NEGATIVE
3005 a->sign = MP_ZPOS;
3006 #endif
3007 }
3008
3009 return err;
3010 }
3011 #endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA */
3012
3013 #ifndef WOLFSSL_RSA_VERIFY_ONLY
3014 /* Compare a one digit number with a multi-precision number.
3015 *
3016 * When a is NULL, MP_LT is returned.
3017 *
3018 * @param [in] a SP integer to compare.
3019 * @param [in] d Digit to compare with.
3020 *
3021 * @return MP_GT when a is greater than d.
3022 * @return MP_LT when a is less than d.
3023 * @return MP_EQ when a is equals d.
3024 */
sp_cmp_d(sp_int * a,sp_int_digit d)3025 int sp_cmp_d(sp_int* a, sp_int_digit d)
3026 {
3027 int ret = MP_EQ;
3028
3029 if (a == NULL) {
3030 ret = MP_LT;
3031 }
3032 else
3033 #ifdef WOLFSSL_SP_INT_NEGATIVE
3034 if (a->sign == MP_NEG) {
3035 ret = MP_LT;
3036 }
3037 else
3038 #endif
3039 {
3040 /* special case for zero*/
3041 if (a->used == 0) {
3042 if (d == 0) {
3043 ret = MP_EQ;
3044 }
3045 else {
3046 ret = MP_LT;
3047 }
3048 }
3049 else if (a->used > 1) {
3050 ret = MP_GT;
3051 }
3052 else {
3053 if (a->dp[0] > d) {
3054 ret = MP_GT;
3055 }
3056 else if (a->dp[0] < d) {
3057 ret = MP_LT;
3058 }
3059 }
3060 }
3061
3062 return ret;
3063 }
3064 #endif
3065
3066 #if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || \
3067 !defined(NO_DSA) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
3068 #define WOLFSSL_SP_ADD_D
3069 #endif
3070 #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
3071 !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
3072 #define WOLFSSL_SP_SUB_D
3073 #endif
3074 #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
3075 !defined(WOLFSSL_RSA_VERIFY_ONLY)
3076 #define WOLFSSL_SP_READ_RADIX_10
3077 #endif
3078 #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
3079 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
3080 !defined(WOLFSSL_RSA_PUBLIC_ONLY))
3081 #define WOLFSSL_SP_INVMOD
3082 #endif
3083 #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
3084 #define WOLFSSL_SP_INVMOD_MONT_CT
3085 #endif
3086 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
3087 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH) || \
3088 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
3089 #define WOLFSSL_SP_PRIME_GEN
3090 #endif
3091
3092 #if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
3093 defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
3094 /* Add a one digit number to the multi-precision number.
3095 *
3096 * @param [in] a SP integer be added to.
3097 * @param [in] d Digit to add.
3098 * @param [out] r SP integer to store result in.
3099 *
3100 * @return MP_OKAY on success.
3101 * @return MP_VAL when result is too large for fixed size dp array.
3102 */
_sp_add_d(sp_int * a,sp_int_digit d,sp_int * r)3103 static int _sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
3104 {
3105 int err = MP_OKAY;
3106 int i = 0;
3107 sp_int_digit t;
3108
3109 r->used = a->used;
3110 if (a->used == 0) {
3111 r->used = d > 0;
3112 }
3113 t = a->dp[0] + d;
3114 if (t < a->dp[0]) {
3115 for (++i; i < a->used; i++) {
3116 r->dp[i] = a->dp[i] + 1;
3117 if (r->dp[i] != 0) {
3118 break;
3119 }
3120 }
3121 if (i == a->used) {
3122 r->used++;
3123 if (i < r->size)
3124 r->dp[i] = 1;
3125 else
3126 err = MP_VAL;
3127 }
3128 }
3129 if (err == MP_OKAY) {
3130 r->dp[0] = t;
3131 if (r != a) {
3132 for (++i; i < a->used; i++) {
3133 r->dp[i] = a->dp[i];
3134 }
3135 }
3136 }
3137
3138 return err;
3139 }
3140 #endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
3141 * defined(WOLFSSL_SP_READ_RADIX_10) */
3142
3143 #if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
3144 defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
3145 defined(WOLFSSL_SP_INVMOD_MONT_CT) || defined(WOLFSSL_SP_PRIME_GEN)
3146 /* Sub a one digit number from the multi-precision number.
3147 *
3148 * returns MP_OKAY always.
3149 * @param [in] a SP integer be subtracted from.
3150 * @param [in] d Digit to subtract.
3151 * @param [out] r SP integer to store result in.
3152 */
_sp_sub_d(sp_int * a,sp_int_digit d,sp_int * r)3153 static void _sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
3154 {
3155 int i = 0;
3156 sp_int_digit t;
3157
3158 r->used = a->used;
3159 if (a->used == 0) {
3160 r->dp[0] = 0;
3161 }
3162 else {
3163 t = a->dp[0] - d;
3164 if (t > a->dp[0]) {
3165 for (++i; i < a->used; i++) {
3166 r->dp[i] = a->dp[i] - 1;
3167 if (r->dp[i] != SP_DIGIT_MAX) {
3168 break;
3169 }
3170 }
3171 }
3172 r->dp[0] = t;
3173 if (r != a) {
3174 for (++i; i < a->used; i++) {
3175 r->dp[i] = a->dp[i];
3176 }
3177 }
3178 sp_clamp(r);
3179 }
3180 }
3181 #endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
3182 * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
3183 * WOLFSSL_SP_PRIME_GEN */
3184
3185 #ifdef WOLFSSL_SP_ADD_D
3186 /* Add a one digit number to the multi-precision number.
3187 *
3188 * @param [in] a SP integer be added to.
3189 * @param [in] d Digit to add.
3190 * @param [out] r SP integer to store result in.
3191 *
3192 * @return MP_OKAY on success.
3193 * @return MP_VAL when result is too large for fixed size dp array.
3194 */
sp_add_d(sp_int * a,sp_int_digit d,sp_int * r)3195 int sp_add_d(sp_int* a, sp_int_digit d, sp_int* r)
3196 {
3197 int err = MP_OKAY;
3198
3199 /* Check validity of parameters. */
3200 if ((a == NULL) || (r == NULL)) {
3201 err = MP_VAL;
3202 }
3203 else
3204 {
3205 #ifndef WOLFSSL_SP_INT_NEGATIVE
3206 /* Positive only so just use internal function. */
3207 err = _sp_add_d(a, d, r);
3208 #else
3209 if (a->sign == MP_ZPOS) {
3210 /* Positive so use interal function. */
3211 r->sign = MP_ZPOS;
3212 err = _sp_add_d(a, d, r);
3213 }
3214 else if ((a->used > 1) || (a->dp[0] > d)) {
3215 /* Negative value bigger than digit so subtract digit. */
3216 r->sign = MP_NEG;
3217 _sp_sub_d(a, d, r);
3218 }
3219 else {
3220 /* Negative value smaller or equal to digit. */
3221 r->sign = MP_ZPOS;
3222 /* Subtract negative value from digit. */
3223 r->dp[0] = d - a->dp[0];
3224 /* Result is a digit equal to or greater than zero. */
3225 r->used = ((r->dp[0] == 0) ? 0 : 1);
3226 }
3227 #endif
3228 }
3229
3230 return err;
3231 }
3232 #endif /* WOLFSSL_SP_ADD_D */
3233
3234 #ifdef WOLFSSL_SP_SUB_D
3235 /* Sub a one digit number from the multi-precision number.
3236 *
3237 * @param [in] a SP integer be subtracted from.
3238 * @param [in] d Digit to subtract.
3239 * @param [out] r SP integer to store result in.
3240 *
3241 * @return MP_OKAY on success.
3242 * @return MP_VAL when a or r is NULL.
3243 */
sp_sub_d(sp_int * a,sp_int_digit d,sp_int * r)3244 int sp_sub_d(sp_int* a, sp_int_digit d, sp_int* r)
3245 {
3246 int err = MP_OKAY;
3247
3248 /* Check validity of parameters. */
3249 if ((a == NULL) || (r == NULL)) {
3250 err = MP_VAL;
3251 }
3252 else {
3253 #ifndef WOLFSSL_SP_INT_NEGATIVE
3254 /* Positive only so just use internal function. */
3255 _sp_sub_d(a, d, r);
3256 #else
3257 if (a->sign == MP_NEG) {
3258 /* Subtracting from negative use interal add. */
3259 r->sign = MP_NEG;
3260 err = _sp_add_d(a, d, r);
3261 }
3262 else if ((a->used > 1) || (a->dp[0] >= d)) {
3263 /* Positive number greater than digit so add digit. */
3264 r->sign = MP_ZPOS;
3265 _sp_sub_d(a, d, r);
3266 }
3267 else {
3268 /* Negative value smaller than digit. */
3269 r->sign = MP_NEG;
3270 /* Subtract positive value from digit. */
3271 r->dp[0] = d - a->dp[0];
3272 /* Result is a digit equal to or greater than zero. */
3273 r->used = 1;
3274 }
3275 #endif
3276 }
3277
3278 return err;
3279 }
3280 #endif /* WOLFSSL_SP_SUB_D */
3281
3282 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
3283 defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
3284 !defined(NO_DH) || defined(HAVE_ECC) || \
3285 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
3286 !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
3287 (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
3288 /* Multiply a by digit n and put result into r shifting up o digits.
3289 * r = (a * n) << (o * SP_WORD_SIZE)
3290 *
3291 * @param [in] a SP integer to be multiplied.
3292 * @param [in] n Number (SP digit) to multiply by.
3293 * @param [out] r SP integer result.
3294 * @param [in] o Number of digits to move result up by.
3295 * @return MP_OKAY on success.
3296 * @return MP_VAL when result is too large for sp_int.
3297 */
_sp_mul_d(sp_int * a,sp_int_digit n,sp_int * r,int o)3298 static int _sp_mul_d(sp_int* a, sp_int_digit n, sp_int* r, int o)
3299 {
3300 int err = MP_OKAY;
3301 int i;
3302 sp_int_word t = 0;
3303
3304 #ifdef WOLFSSL_SP_SMALL
3305 for (i = 0; i < o; i++) {
3306 r->dp[i] = 0;
3307 }
3308 #else
3309 /* Don't use the offset. Only when doing small code size div. */
3310 (void)o;
3311 #endif
3312
3313 for (i = 0; i < a->used; i++, o++) {
3314 t += (sp_int_word)a->dp[i] * n;
3315 r->dp[o] = (sp_int_digit)t;
3316 t >>= SP_WORD_SIZE;
3317 }
3318
3319 if (t > 0) {
3320 if (o == r->size) {
3321 err = MP_VAL;
3322 }
3323 else {
3324 r->dp[o++] = (sp_int_digit)t;
3325 }
3326 }
3327 r->used = o;
3328 sp_clamp(r);
3329
3330 return err;
3331 }
3332 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
3333 * WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
3334
3335 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
3336 (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
3337 /* Multiply a by digit n and put result into r. r = a * n
3338 *
3339 * @param [in] a SP integer to multiply.
3340 * @param [in] n Digit to multiply by.
3341 * @param [out] r SP integer to hold result.
3342 *
3343 * @return MP_OKAY on success.
3344 * @return MP_VAL when a or b is NULL, or a has maximum number of digits used.
3345 */
sp_mul_d(sp_int * a,sp_int_digit d,sp_int * r)3346 int sp_mul_d(sp_int* a, sp_int_digit d, sp_int* r)
3347 {
3348 int err = MP_OKAY;
3349
3350 if ((a == NULL) || (r == NULL)) {
3351 err = MP_VAL;
3352 }
3353 if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
3354 err = MP_VAL;
3355 }
3356
3357 if (err == MP_OKAY) {
3358 err = _sp_mul_d(a, d, r, 0);
3359 #ifdef WOLFSSL_SP_INT_NEGATIVE
3360 if (d == 0) {
3361 r->sign = MP_ZPOS;
3362 }
3363 else {
3364 r->sign = a->sign;
3365 }
3366 #endif
3367 }
3368
3369 return err;
3370 }
3371 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
3372 * (WOLFSSL_KEY_GEN && !NO_RSA) */
3373
3374 /* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
3375 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
3376 defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
3377 defined(WC_MP_TO_RADIX)
3378 #define WOLFSSL_SP_DIV_D
3379 #endif
3380 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
3381 defined(WOLFSSL_HAVE_SP_DH) || \
3382 (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
3383 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
3384 #define WOLFSSL_SP_MOD_D
3385 #endif
3386
3387 #if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
3388 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
3389 !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
3390 defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
3391 #ifndef SP_ASM_DIV_WORD
3392 /* Divide a two digit number by a digit number and return. (hi | lo) / d
3393 *
3394 * @param [in] hi SP integer digit. High digit of the dividend.
3395 * @param [in] lo SP integer digit. Lower digit of the dividend.
3396 * @param [in] d SP integer digit. Number to divide by.
3397 * @reutrn The division result.
3398 */
sp_div_word(sp_int_digit hi,sp_int_digit lo,sp_int_digit d)3399 static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
3400 sp_int_digit d)
3401 {
3402 #ifdef WOLFSSL_SP_DIV_WORD_HALF
3403 sp_int_digit r;
3404
3405 if (hi != 0) {
3406 sp_int_digit divsz = d >> SP_HALF_SIZE;
3407 sp_int_digit r2;
3408 sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
3409 sp_int_word trial;
3410
3411 r = hi / divsz;
3412 if (r > SP_HALF_MAX) {
3413 r = SP_HALF_MAX;
3414 }
3415 r <<= SP_HALF_SIZE;
3416 trial = r * (sp_int_word)d;
3417 while (trial > w) {
3418 r -= (sp_int_digit)1 << SP_HALF_SIZE;
3419 trial -= (sp_int_word)d << SP_HALF_SIZE;
3420 }
3421 w -= trial;
3422 r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divsz;
3423 trial = r2 * (sp_int_word)d;
3424 while (trial > w) {
3425 r2--;
3426 trial -= d;
3427 }
3428 w -= trial;
3429 r += r2;
3430 r2 = ((sp_int_digit)w) / d;
3431 r += r2;
3432 }
3433 else {
3434 r = lo / d;
3435 }
3436
3437 return r;
3438 #else
3439 sp_int_word w;
3440 sp_int_digit r;
3441
3442 w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
3443 w /= d;
3444 r = (sp_int_digit)w;
3445
3446 return r;
3447 #endif /* WOLFSSL_SP_DIV_WORD_HALF */
3448 }
3449 #endif /* !SP_ASM_DIV_WORD */
3450 #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
3451 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
3452
3453 #if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
3454 !defined(WOLFSSL_SP_SMALL)
3455 /* Divide by 3: r = a / 3 and rem = a % 3
3456 *
3457 * @param [in] a SP integer to be divided.
3458 * @param [out] r SP integer that is the quotient. May be NULL.
3459 * @param [out] rem SP integer that is the remainder. May be NULL.
3460 */
_sp_div_3(sp_int * a,sp_int * r,sp_int_digit * rem)3461 static void _sp_div_3(sp_int* a, sp_int* r, sp_int_digit* rem)
3462 {
3463 int i;
3464 sp_int_word t;
3465 sp_int_digit tr = 0;
3466 sp_int_digit tt;
3467 static const char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
3468 static const char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
3469
3470 if (r == NULL) {
3471 for (i = a->used - 1; i >= 0; i--) {
3472 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
3473 #if SP_WORD_SIZE == 64
3474 tt = (t * 0x5555555555555555L) >> 64;
3475 #elif SP_WORD_SIZE == 32
3476 tt = (t * 0x55555555) >> 32;
3477 #elif SP_WORD_SIZE == 16
3478 tt = (t * 0x5555) >> 16;
3479 #elif SP_WORD_SIZE == 8
3480 tt = (t * 0x55) >> 8;
3481 #endif
3482 tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
3483 tr = sp_rem6[tr];
3484 }
3485 *rem = tr;
3486 }
3487 else {
3488 for (i = a->used - 1; i >= 0; i--) {
3489 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
3490 #if SP_WORD_SIZE == 64
3491 tt = (t * 0x5555555555555555L) >> 64;
3492 #elif SP_WORD_SIZE == 32
3493 tt = (t * 0x55555555) >> 32;
3494 #elif SP_WORD_SIZE == 16
3495 tt = (t * 0x5555) >> 16;
3496 #elif SP_WORD_SIZE == 8
3497 tt = (t * 0x55) >> 8;
3498 #endif
3499 tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
3500 tt += sp_r6[tr];
3501 tr = sp_rem6[tr];
3502 r->dp[i] = tt;
3503 }
3504 r->used = a->used;
3505 sp_clamp(r);
3506 if (rem != NULL) {
3507 *rem = tr;
3508 }
3509 }
3510 }
3511
3512 /* Divide by 10: r = a / 10 and rem = a % 10
3513 *
3514 * @param [in] a SP integer to be divided.
3515 * @param [out] r SP integer that is the quotient. May be NULL.
3516 * @param [out] rem SP integer that is the remainder. May be NULL.
3517 */
_sp_div_10(sp_int * a,sp_int * r,sp_int_digit * rem)3518 static void _sp_div_10(sp_int* a, sp_int* r, sp_int_digit* rem)
3519 {
3520 int i;
3521 sp_int_word t;
3522 sp_int_digit tr = 0;
3523 sp_int_digit tt;
3524
3525 if (r == NULL) {
3526 for (i = a->used - 1; i >= 0; i--) {
3527 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
3528 #if SP_WORD_SIZE == 64
3529 tt = (t * 0x1999999999999999L) >> 64;
3530 #elif SP_WORD_SIZE == 32
3531 tt = (t * 0x19999999) >> 32;
3532 #elif SP_WORD_SIZE == 16
3533 tt = (t * 0x1999) >> 16;
3534 #elif SP_WORD_SIZE == 8
3535 tt = (t * 0x19) >> 8;
3536 #endif
3537 tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
3538 tr = tr % 10;
3539 }
3540 *rem = tr;
3541 }
3542 else {
3543 for (i = a->used - 1; i >= 0; i--) {
3544 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
3545 #if SP_WORD_SIZE == 64
3546 tt = (t * 0x1999999999999999L) >> 64;
3547 #elif SP_WORD_SIZE == 32
3548 tt = (t * 0x19999999) >> 32;
3549 #elif SP_WORD_SIZE == 16
3550 tt = (t * 0x1999) >> 16;
3551 #elif SP_WORD_SIZE == 8
3552 tt = (t * 0x19) >> 8;
3553 #endif
3554 tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
3555 tt += tr / 10;
3556 tr = tr % 10;
3557 r->dp[i] = tt;
3558 }
3559 r->used = a->used;
3560 sp_clamp(r);
3561 if (rem != NULL) {
3562 *rem = tr;
3563 }
3564 }
3565 }
3566 #endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
3567
3568 #if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
3569 /* Divide by small number: r = a / d and rem = a % d
3570 *
3571 * @param [in] a SP integer to be divided.
3572 * @param [in] d Digit to divide by.
3573 * @param [out] r SP integer that is the quotient. May be NULL.
3574 * @param [out] rem SP integer that is the remainder. May be NULL.
3575 */
_sp_div_small(sp_int * a,sp_int_digit d,sp_int * r,sp_int_digit * rem)3576 static void _sp_div_small(sp_int* a, sp_int_digit d, sp_int* r,
3577 sp_int_digit* rem)
3578 {
3579 int i;
3580 sp_int_word t;
3581 sp_int_digit tr = 0;
3582 sp_int_digit tt;
3583 sp_int_digit m;
3584
3585 if (r == NULL) {
3586 m = SP_DIGIT_MAX / d;
3587 for (i = a->used - 1; i >= 0; i--) {
3588 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
3589 tt = (t * m) >> SP_WORD_SIZE;
3590 tr = (sp_int_digit)(t - tt * d);
3591 tr = tr % d;
3592 }
3593 *rem = tr;
3594 }
3595 else {
3596 m = SP_DIGIT_MAX / d;
3597 for (i = a->used - 1; i >= 0; i--) {
3598 t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
3599 tt = (t * m) >> SP_WORD_SIZE;
3600 tr = (sp_int_digit)(t - tt * d);
3601 tt += tr / d;
3602 tr = tr % d;
3603 r->dp[i] = tt;
3604 }
3605 r->used = a->used;
3606 sp_clamp(r);
3607 if (rem != NULL) {
3608 *rem = tr;
3609 }
3610 }
3611 }
3612 #endif
3613
3614 #ifdef WOLFSSL_SP_DIV_D
3615 /* Divide a multi-precision number by a digit size number and calculate
3616 * remainder.
3617 * r = a / d; rem = a % d
3618 *
3619 * @param [in] a SP integer to be divided.
3620 * @param [in] d Digit to divide by.
3621 * @param [out] r SP integer that is the quotient. May be NULL.
3622 * @param [out] rem Digit that is the remainder. May be NULL.
3623 *
3624 * @return MP_OKAY on success.
3625 * @return MP_VAL when a is NULL or d is 0.
3626 */
sp_div_d(sp_int * a,sp_int_digit d,sp_int * r,sp_int_digit * rem)3627 int sp_div_d(sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
3628 {
3629 int err = MP_OKAY;
3630
3631 if ((a == NULL) || (d == 0)) {
3632 err = MP_VAL;
3633 }
3634
3635 if (err == MP_OKAY) {
3636 #if !defined(WOLFSSL_SP_SMALL)
3637 if (d == 3) {
3638 _sp_div_3(a, r, rem);
3639 }
3640 else if (d == 10) {
3641 _sp_div_10(a, r, rem);
3642 }
3643 else
3644 #endif
3645 if (d <= SP_HALF_MAX) {
3646 _sp_div_small(a, d, r, rem);
3647 }
3648 else
3649 {
3650 int i;
3651 sp_int_word w = 0;
3652 sp_int_digit t;
3653
3654 for (i = a->used - 1; i >= 0; i--) {
3655 t = sp_div_word((sp_int_digit)w, a->dp[i], d);
3656 w = (w << SP_WORD_SIZE) | a->dp[i];
3657 w -= (sp_int_word)t * d;
3658 if (r != NULL) {
3659 r->dp[i] = t;
3660 }
3661 }
3662 if (r != NULL) {
3663 r->used = a->used;
3664 sp_clamp(r);
3665 }
3666
3667 if (rem != NULL) {
3668 *rem = (sp_int_digit)w;
3669 }
3670 }
3671
3672 #ifdef WOLFSSL_SP_INT_NEGATIVE
3673 if (r != NULL) {
3674 r->sign = a->sign;
3675 }
3676 #endif
3677 }
3678
3679 return err;
3680 }
3681 #endif /* WOLFSSL_SP_DIV_D */
3682
3683 #ifdef WOLFSSL_SP_MOD_D
3684 /* Calculate a modulo the digit d into r: r = a mod d
3685 *
3686 * @param [in] a SP integer to reduce.
3687 * @param [in] d Digit to that is the modulus.
3688 * @param [out] r Digit that is the result..
3689 *
3690 * @return MP_OKAY on success.
3691 * @return MP_VAL when a is NULL or d is 0.
3692 */
3693 #if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
3694 !defined(HAVE_COMP_KEY))
3695 static
3696 #endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
sp_mod_d(sp_int * a,const sp_int_digit d,sp_int_digit * r)3697 int sp_mod_d(sp_int* a, const sp_int_digit d, sp_int_digit* r)
3698 {
3699 int err = MP_OKAY;
3700
3701 if ((a == NULL) || (r == NULL) || (d == 0)) {
3702 err = MP_VAL;
3703 }
3704
3705 #if 0
3706 sp_print(a, "a");
3707 sp_print_digit(d, "m");
3708 #endif
3709
3710 if (err == MP_OKAY) {
3711 /* Check whether d is a power of 2. */
3712 if ((d & (d - 1)) == 0) {
3713 if (a->used == 0) {
3714 *r = 0;
3715 }
3716 else {
3717 *r = a->dp[0] & (d - 1);
3718 }
3719 }
3720 #if !defined(WOLFSSL_SP_SMALL)
3721 else if (d == 3) {
3722 _sp_div_3(a, NULL, r);
3723 }
3724 else if (d == 10) {
3725 _sp_div_10(a, NULL, r);
3726 }
3727 #endif
3728 else if (d <= SP_HALF_MAX) {
3729 _sp_div_small(a, d, NULL, r);
3730 }
3731 else {
3732 int i;
3733 sp_int_word w = 0;
3734 sp_int_digit t;
3735
3736 for (i = a->used - 1; i >= 0; i--) {
3737 t = sp_div_word((sp_int_digit)w, a->dp[i], d);
3738 w = (w << SP_WORD_SIZE) | a->dp[i];
3739 w -= (sp_int_word)t * d;
3740 }
3741
3742 *r = (sp_int_digit)w;
3743 }
3744
3745 #ifdef WOLFSSL_SP_INT_NEGATIVE
3746 if (a->sign == MP_NEG) {
3747 *r = d - *r;
3748 }
3749 #endif
3750 }
3751
3752 #if 0
3753 sp_print_digit(*r, "rmod");
3754 #endif
3755
3756 return err;
3757 }
3758 #endif /* WOLFSSL_SP_MOD_D */
3759
3760 #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
3761 /* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
3762 *
3763 * r = a / 2 (mod m) - constant time (a < m and positive)
3764 *
3765 * @param [in] a SP integer to divide.
3766 * @param [in] m SP integer that is modulus.
3767 * @param [out] r SP integer to hold result.
3768 *
3769 * @return MP_OKAY on success.
3770 * @return MP_VAL when a, m or r is NULL.
3771 */
sp_div_2_mod_ct(sp_int * a,sp_int * m,sp_int * r)3772 int sp_div_2_mod_ct(sp_int* a, sp_int* m, sp_int* r)
3773 {
3774 int err = MP_OKAY;
3775
3776 if ((a == NULL) || (m == NULL) || (r == NULL)) {
3777 err = MP_VAL;
3778 }
3779 if ((err == MP_OKAY) && (r->size < m->used + 1)) {
3780 err = MP_VAL;
3781 }
3782
3783 if (err == MP_OKAY) {
3784 sp_int_word w = 0;
3785 sp_int_digit mask;
3786 int i;
3787
3788 #if 0
3789 sp_print(a, "a");
3790 sp_print(m, "m");
3791 #endif
3792
3793 mask = 0 - (a->dp[0] & 1);
3794 for (i = 0; i < m->used; i++) {
3795 sp_int_digit mask_a = 0 - (i < a->used);
3796
3797 w += m->dp[i] & mask;
3798 w += a->dp[i] & mask_a;
3799 r->dp[i] = (sp_int_digit)w;
3800 w >>= DIGIT_BIT;
3801 }
3802 r->dp[i] = (sp_int_digit)w;
3803 r->used = i + 1;
3804 #ifdef WOLFSSL_SP_INT_NEGATIVE
3805 r->sign = MP_ZPOS;
3806 #endif
3807 sp_clamp(r);
3808 sp_div_2(r, r);
3809
3810 #if 0
3811 sp_print(r, "rd2");
3812 #endif
3813 }
3814
3815 return err;
3816 }
3817 #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
3818
3819 #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
3820 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
3821 !defined(WOLFSSL_RSA_PUBLIC_ONLY))
3822 /* Divides a by 2 and stores in r: r = a >> 1
3823 *
3824 * @param [in] a SP integer to divide.
3825 * @param [out] r SP integer to hold result.
3826 *
3827 * @return MP_OKAY on success.
3828 * @return MP_VAL when a or r is NULL.
3829 */
3830 #if !(defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
3831 static
3832 #endif
sp_div_2(sp_int * a,sp_int * r)3833 int sp_div_2(sp_int* a, sp_int* r)
3834 {
3835 int err = MP_OKAY;
3836
3837 #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
3838 /* Only when a public API. */
3839 if ((a == NULL) || (r == NULL)) {
3840 err = MP_VAL;
3841 }
3842 #endif
3843
3844 if (err == MP_OKAY) {
3845 int i;
3846
3847 r->used = a->used;
3848 for (i = 0; i < a->used - 1; i++) {
3849 r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
3850 }
3851 r->dp[i] = a->dp[i] >> 1;
3852 r->used = i + 1;
3853 sp_clamp(r);
3854 #ifdef WOLFSSL_SP_INT_NEGATIVE
3855 r->sign = a->sign;
3856 #endif
3857 }
3858
3859 return err;
3860 }
3861 #endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
3862 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
3863
3864 /************************
3865 * Add/Subtract Functions
3866 ************************/
3867
3868 #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
3869 /* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
3870 *
3871 * @param [in] a SP integer to add to.
3872 * @param [in] b SP integer to add.
3873 * @param [out] r SP integer to store result in.
3874 * @param [in] o Number of digits to offset b.
3875 *
3876 * @return MP_OKAY on success.
3877 */
_sp_add_off(sp_int * a,sp_int * b,sp_int * r,int o)3878 static int _sp_add_off(sp_int* a, sp_int* b, sp_int* r, int o)
3879 {
3880 int i;
3881 int j;
3882 sp_int_word t = 0;
3883
3884 #if 0
3885 sp_print(a, "a");
3886 sp_print(b, "b");
3887 #endif
3888
3889 #ifdef SP_MATH_NEED_ADD_OFF
3890 for (i = 0; (i < o) && (i < a->used); i++) {
3891 r->dp[i] = a->dp[i];
3892 }
3893 for (; i < o; i++) {
3894 r->dp[i] = 0;
3895 }
3896 #else
3897 i = 0;
3898 (void)o;
3899 #endif
3900
3901 for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
3902 t += a->dp[i];
3903 t += b->dp[j];
3904 r->dp[i] = (sp_int_digit)t;
3905 t >>= SP_WORD_SIZE;
3906 }
3907 for (; i < a->used; i++) {
3908 t += a->dp[i];
3909 r->dp[i] = (sp_int_digit)t;
3910 t >>= SP_WORD_SIZE;
3911 }
3912 for (; j < b->used; i++, j++) {
3913 t += b->dp[j];
3914 r->dp[i] = (sp_int_digit)t;
3915 t >>= SP_WORD_SIZE;
3916 }
3917 r->used = i;
3918 if (t != 0) {
3919 r->dp[i] = (sp_int_digit)t;
3920 r->used++;
3921 }
3922
3923 sp_clamp(r);
3924
3925 #if 0
3926 sp_print(r, "radd");
3927 #endif
3928
3929 return MP_OKAY;
3930 }
3931 #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
3932
3933 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
3934 !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
3935 !defined(WOLFSSL_RSA_VERIFY_ONLY))
3936 /* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
3937 * a must be greater than b.
3938 *
3939 * @param [in] a SP integer to subtract from.
3940 * @param [in] b SP integer to subtract.
3941 * @param [out] r SP integer to store result in.
3942 * @param [in] o Number of digits to offset b.
3943 *
3944 * @return MP_OKAY on success.
3945 */
_sp_sub_off(sp_int * a,sp_int * b,sp_int * r,int o)3946 static int _sp_sub_off(sp_int* a, sp_int* b, sp_int* r, int o)
3947 {
3948 int i;
3949 int j;
3950 sp_int_sword t = 0;
3951
3952 for (i = 0; (i < o) && (i < a->used); i++) {
3953 r->dp[i] = a->dp[i];
3954 }
3955 for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
3956 t += a->dp[i];
3957 t -= b->dp[j];
3958 r->dp[i] = (sp_int_digit)t;
3959 t >>= SP_WORD_SIZE;
3960 }
3961 for (; i < a->used; i++) {
3962 t += a->dp[i];
3963 r->dp[i] = (sp_int_digit)t;
3964 t >>= SP_WORD_SIZE;
3965 }
3966 r->used = i;
3967 sp_clamp(r);
3968
3969 return MP_OKAY;
3970 }
3971 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
3972 * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
3973
3974 #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
3975 /* Add b to a into r: r = a + b
3976 *
3977 * @param [in] a SP integer to add to.
3978 * @param [in] b SP integer to add.
3979 * @param [out] r SP integer to store result in.
3980 *
3981 * @return MP_OKAY on success.
3982 * @return MP_VAL when a, b, or r is NULL.
3983 */
sp_add(sp_int * a,sp_int * b,sp_int * r)3984 int sp_add(sp_int* a, sp_int* b, sp_int* r)
3985 {
3986 int err = MP_OKAY;
3987
3988 if ((a == NULL) || (b == NULL) || (r == NULL)) {
3989 err = MP_VAL;
3990 }
3991 if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
3992 err = MP_VAL;
3993 }
3994 if (err == MP_OKAY) {
3995 #ifndef WOLFSSL_SP_INT_NEGATIVE
3996 err = _sp_add_off(a, b, r, 0);
3997 #else
3998 if (a->sign == b->sign) {
3999 r->sign = a->sign;
4000 err = _sp_add_off(a, b, r, 0);
4001 }
4002 else if (_sp_cmp_abs(a, b) != MP_LT) {
4003 err = _sp_sub_off(a, b, r, 0);
4004 if (sp_iszero(r)) {
4005 r->sign = MP_ZPOS;
4006 }
4007 else {
4008 r->sign = a->sign;
4009 }
4010 }
4011 else {
4012 err = _sp_sub_off(b, a, r, 0);
4013 if (sp_iszero(r)) {
4014 r->sign = MP_ZPOS;
4015 }
4016 else {
4017 r->sign = b->sign;
4018 }
4019 }
4020 #endif
4021 }
4022
4023 return err;
4024 }
4025 #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
4026
4027 #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4028 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
4029 /* Subtract b from a into r: r = a - b
4030 *
4031 * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
4032 *
4033 * @param [in] a SP integer to subtract from.
4034 * @param [in] b SP integer to subtract.
4035 * @param [out] r SP integer to store result in.
4036 *
4037 * @return MP_OKAY on success.
4038 * @return MP_VAL when a, b, or r is NULL.
4039 */
sp_sub(sp_int * a,sp_int * b,sp_int * r)4040 int sp_sub(sp_int* a, sp_int* b, sp_int* r)
4041 {
4042 int err = MP_OKAY;
4043
4044 if ((a == NULL) || (b == NULL) || (r == NULL)) {
4045 err = MP_VAL;
4046 }
4047 else {
4048 #ifndef WOLFSSL_SP_INT_NEGATIVE
4049 err = _sp_sub_off(a, b, r, 0);
4050 #else
4051 if (a->sign != b->sign) {
4052 r->sign = a->sign;
4053 err = _sp_add_off(a, b, r, 0);
4054 }
4055 else if (_sp_cmp_abs(a, b) != MP_LT) {
4056 err = _sp_sub_off(a, b, r, 0);
4057 if (sp_iszero(r)) {
4058 r->sign = MP_ZPOS;
4059 }
4060 else {
4061 r->sign = a->sign;
4062 }
4063 }
4064 else {
4065 err = _sp_sub_off(b, a, r, 0);
4066 if (sp_iszero(r)) {
4067 r->sign = MP_ZPOS;
4068 }
4069 else {
4070 r->sign = 1 - a->sign;
4071 }
4072 }
4073 #endif
4074 }
4075
4076 return err;
4077 }
4078 #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
4079 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
4080
4081 /****************************
4082 * Add/Subtract mod functions
4083 ****************************/
4084
4085 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
4086 (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
4087 defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
4088 /* Add two value and reduce: r = (a + b) % m
4089 *
4090 * @param [in] a SP integer to add.
4091 * @param [in] b SP integer to add with.
4092 * @param [in] m SP integer that is the modulus.
4093 * @param [out] r SP integer to hold result.
4094 *
4095 * @return MP_OKAY on success.
4096 * @return MP_VAL when a, b, m or r is NULL.
4097 * @return MP_MEM when dynamic memory allocation fails.
4098 */
sp_addmod(sp_int * a,sp_int * b,sp_int * m,sp_int * r)4099 int sp_addmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
4100 {
4101 int err = MP_OKAY;
4102 int used = ((a == NULL) || (b == NULL)) ? 1 :
4103 ((a->used >= b->used) ? a->used + 1 : b->used + 1);
4104 DECL_SP_INT(t, used);
4105
4106 if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
4107 err = MP_VAL;
4108 }
4109
4110 ALLOC_SP_INT_SIZE(t, used, err, NULL);
4111 #if 0
4112 if (err == MP_OKAY) {
4113 sp_print(a, "a");
4114 sp_print(b, "b");
4115 sp_print(m, "m");
4116 }
4117 #endif
4118
4119 if (err == MP_OKAY) {
4120 err = sp_add(a, b, t);
4121 }
4122 if (err == MP_OKAY) {
4123 err = sp_mod(t, m, r);
4124 }
4125
4126 #if 0
4127 if (err == MP_OKAY) {
4128 sp_print(r, "rma");
4129 }
4130 #endif
4131
4132 FREE_SP_INT(t, NULL);
4133 return err;
4134 }
4135 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
4136 * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
4137
4138 #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
4139 /* Sub b from a and reduce: r = (a - b) % m
4140 * Result is always positive.
4141 *
4142 * @param [in] a SP integer to subtract from
4143 * @param [in] b SP integer to subtract.
4144 * @param [in] m SP integer that is the modulus.
4145 * @param [out] r SP integer to hold result.
4146 *
4147 * @return MP_OKAY on success.
4148 * @return MP_VAL when a, b, m or r is NULL.
4149 * @return MP_MEM when dynamic memory allocation fails.
4150 */
sp_submod(sp_int * a,sp_int * b,sp_int * m,sp_int * r)4151 int sp_submod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
4152 {
4153 #ifndef WOLFSSL_SP_INT_NEGATIVE
4154 int err = MP_OKAY;
4155 int used = ((a == NULL) || (b == NULL) || (m == NULL)) ? 1 :
4156 ((a->used >= m->used) ?
4157 ((a->used >= b->used) ? (a->used + 1) : (b->used + 1)) :
4158 ((b->used >= m->used)) ? (b->used + 1) : (m->used + 1));
4159 DECL_SP_INT_ARRAY(t, used, 2);
4160
4161 if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
4162 err = MP_VAL;
4163 }
4164
4165 #if 0
4166 if (err == MP_OKAY) {
4167 sp_print(a, "a");
4168 sp_print(b, "b");
4169 sp_print(m, "m");
4170 }
4171 #endif
4172
4173 ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
4174 if (err == MP_OKAY) {
4175 if (_sp_cmp(a, m) == MP_GT) {
4176 err = sp_mod(a, m, t[0]);
4177 a = t[0];
4178 }
4179 }
4180 if (err == MP_OKAY) {
4181 if (_sp_cmp(b, m) == MP_GT) {
4182 err = sp_mod(b, m, t[1]);
4183 b = t[1];
4184 }
4185 }
4186 if (err == MP_OKAY) {
4187 if (_sp_cmp(a, b) == MP_LT) {
4188 err = sp_add(a, m, t[0]);
4189 if (err == MP_OKAY) {
4190 err = sp_sub(t[0], b, r);
4191 }
4192 }
4193 else {
4194 err = sp_sub(a, b, r);
4195 }
4196 }
4197
4198 #if 0
4199 if (err == MP_OKAY) {
4200 sp_print(r, "rms");
4201 }
4202 #endif
4203
4204 FREE_SP_INT_ARRAY(t, NULL);
4205 return err;
4206
4207 #else /* WOLFSSL_SP_INT_NEGATIVE */
4208
4209 int err = MP_OKAY;
4210 int used = ((a == NULL) || (b == NULL)) ? 1 :
4211 ((a->used >= b->used) ? a->used + 1 : b->used + 1);
4212 DECL_SP_INT(t, used);
4213
4214 if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
4215 err = MP_VAL;
4216 }
4217
4218 #if 0
4219 if (err == MP_OKAY) {
4220 sp_print(a, "a");
4221 sp_print(b, "b");
4222 sp_print(m, "m");
4223 }
4224 #endif
4225
4226 ALLOC_SP_INT_SIZE(t, used, err, NULL);
4227 if (err == MP_OKAY) {
4228 err = sp_sub(a, b, t);
4229 }
4230 if (err == MP_OKAY) {
4231 err = sp_mod(t, m, r);
4232 }
4233
4234 #if 0
4235 if (err == MP_OKAY) {
4236 sp_print(r, "rms");
4237 }
4238 #endif
4239
4240 FREE_SP_INT(t, NULL);
4241 return err;
4242 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4243 }
4244 #endif /* WOLFSSL_SP_MATH_ALL */
4245
4246 #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
4247 /* Add two value and reduce: r = (a + b) % m
4248 *
4249 * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
4250 *
4251 * Assumes a, b, m and r are not NULL.
4252 * m and r must not be the same pointer.
4253 *
4254 * @param [in] a SP integer to add.
4255 * @param [in] b SP integer to add with.
4256 * @param [in] m SP integer that is the modulus.
4257 * @param [out] r SP integer to hold result.
4258 *
4259 * @return MP_OKAY on success.
4260 */
sp_addmod_ct(sp_int * a,sp_int * b,sp_int * m,sp_int * r)4261 int sp_addmod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
4262 {
4263 int err = MP_OKAY;
4264 sp_int_sword w;
4265 sp_int_sword s;
4266 sp_int_digit mask;
4267 int i;
4268
4269 if (r->size < m->used) {
4270 err = MP_VAL;
4271 }
4272 if ((err == MP_OKAY) && (r == m)) {
4273 err = MP_VAL;
4274 }
4275
4276 if (err == MP_OKAY) {
4277 if (0) {
4278 sp_print(a, "a");
4279 sp_print(b, "b");
4280 sp_print(m, "m");
4281 }
4282
4283 /* Add a to b into r. Do the subtract of modulus but don't store result.
4284 * When subtract result is negative, the overflow will be negative.
4285 * Only need to subtract mod when result is positive - overflow is
4286 * positive.
4287 */
4288 w = 0;
4289 s = 0;
4290 for (i = 0; i < m->used; i++) {
4291 /* Values past 'used' are not initialized. */
4292 sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
4293 sp_int_digit mask_b = (sp_int_digit)0 - (i < b->used);
4294
4295 w += a->dp[i] & mask_a;
4296 w += b->dp[i] & mask_b;
4297 r->dp[i] = (sp_int_digit)w;
4298 s += (sp_int_digit)w;
4299 s -= m->dp[i];
4300 s >>= DIGIT_BIT;
4301 w >>= DIGIT_BIT;
4302 }
4303 s += (sp_int_digit)w;
4304 /* s will be positive when subtracting modulus is needed. */
4305 mask = (sp_int_digit)0 - (s >= 0);
4306
4307 /* Constant time, conditionally, subtract modulus from sum. */
4308 w = 0;
4309 for (i = 0; i < m->used; i++) {
4310 w += r->dp[i];
4311 w -= m->dp[i] & mask;
4312 r->dp[i] = (sp_int_digit)w;
4313 w >>= DIGIT_BIT;
4314 }
4315 /* Result will always have digits equal to or less than those in
4316 * modulus. */
4317 r->used = i;
4318 #ifdef WOLFSSL_SP_INT_NEGATIVE
4319 r->sign = MP_ZPOS;
4320 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4321 sp_clamp(r);
4322
4323 if (0) {
4324 sp_print(r, "rma");
4325 }
4326 }
4327
4328 return err;
4329 }
4330 #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
4331
4332 #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
4333 /* Sub b from a and reduce: r = (a - b) % m
4334 * Result is always positive.
4335 *
4336 * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
4337 *
4338 * Assumes a, b, m and r are not NULL.
4339 * m and r must not be the same pointer.
4340 *
4341 * @param [in] a SP integer to subtract from
4342 * @param [in] b SP integer to subtract.
4343 * @param [in] m SP integer that is the modulus.
4344 * @param [out] r SP integer to hold result.
4345 *
4346 * @return MP_OKAY on success.
4347 */
sp_submod_ct(sp_int * a,sp_int * b,sp_int * m,sp_int * r)4348 int sp_submod_ct(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
4349 {
4350 int err = MP_OKAY;
4351 sp_int_sword w;
4352 sp_int_digit mask;
4353 int i;
4354
4355 if (r->size < m->used + 1) {
4356 err = MP_VAL;
4357 }
4358 if ((err == MP_OKAY) && (r == m)) {
4359 err = MP_VAL;
4360 }
4361
4362 if (err == MP_OKAY) {
4363 if (0) {
4364 sp_print(a, "a");
4365 sp_print(b, "b");
4366 sp_print(m, "m");
4367 }
4368
4369 /* In constant time, subtract b from a putting result in r. */
4370 w = 0;
4371 for (i = 0; i < m->used; i++) {
4372 /* Values past 'used' are not initialized. */
4373 sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
4374 sp_int_digit mask_b = (sp_int_digit)0 - (i < b->used);
4375
4376 w += a->dp[i] & mask_a;
4377 w -= b->dp[i] & mask_b;
4378 r->dp[i] = (sp_int_digit)w;
4379 w >>= DIGIT_BIT;
4380 }
4381 /* When w is negative then we need to add modulus to make result
4382 * positive. */
4383 mask = (sp_int_digit)0 - (w < 0);
4384 /* Constant time, conditionally, add modulus to difference. */
4385 w = 0;
4386 for (i = 0; i < m->used; i++) {
4387 w += r->dp[i];
4388 w += m->dp[i] & mask;
4389 r->dp[i] = (sp_int_digit)w;
4390 w >>= DIGIT_BIT;
4391 }
4392 r->used = i;
4393 #ifdef WOLFSSL_SP_INT_NEGATIVE
4394 r->sign = MP_ZPOS;
4395 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4396 sp_clamp(r);
4397
4398 if (0) {
4399 sp_print(r, "rms");
4400 }
4401 }
4402
4403 return err;
4404 }
4405 #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
4406
4407 /********************
4408 * Shifting functoins
4409 ********************/
4410
4411 #if !defined(NO_DH) || defined(HAVE_ECC) || (defined(WC_RSA_BLINDING) && \
4412 !defined(WOLFSSL_RSA_VERIFY_ONLY))
4413 /* Left shift the multi-precision number by a number of digits.
4414 *
4415 * @param [in,out] a SP integer to shift.
4416 * @param [in] s Number of digits to shift.
4417 *
4418 * @return MP_OKAY on success.
4419 * @return MP_VAL when a is NULL or the result is too big to fit in an SP.
4420 */
sp_lshd(sp_int * a,int s)4421 int sp_lshd(sp_int* a, int s)
4422 {
4423 int err = MP_OKAY;
4424
4425 if (a == NULL) {
4426 err = MP_VAL;
4427 }
4428 if ((err == MP_OKAY) && (a->used + s > a->size)) {
4429 err = MP_VAL;
4430 }
4431 if (err == MP_OKAY) {
4432 XMEMMOVE(a->dp + s, a->dp, a->used * sizeof(sp_int_digit));
4433 a->used += s;
4434 XMEMSET(a->dp, 0, s * sizeof(sp_int_digit));
4435 sp_clamp(a);
4436 }
4437
4438 return err;
4439 }
4440 #endif
4441
4442 #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4443 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
4444 !defined(WOLFSSL_RSA_PUBLIC_ONLY))
4445 /* Left shift the multi-precision number by n bits.
4446 * Bits may be larger than the word size.
4447 *
4448 * @param [in,out] a SP integer to shift.
4449 * @param [in] n Number of bits to shift left.
4450 *
4451 * @return MP_OKAY on success.
4452 */
sp_lshb(sp_int * a,int n)4453 static int sp_lshb(sp_int* a, int n)
4454 {
4455 int err = MP_OKAY;
4456
4457 if (a->used != 0) {
4458 int s = n >> SP_WORD_SHIFT;
4459 int i;
4460
4461 if (a->used + s >= a->size) {
4462 err = MP_VAL;
4463 }
4464 if (err == MP_OKAY) {
4465 n &= SP_WORD_MASK;
4466 if (n != 0) {
4467 sp_int_digit v;
4468
4469 v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
4470 a->dp[a->used - 1 + s] = a->dp[a->used - 1] << n;
4471 for (i = a->used - 2; i >= 0; i--) {
4472 a->dp[i + 1 + s] |= a->dp[i] >> (SP_WORD_SIZE - n);
4473 a->dp[i + s] = a->dp[i] << n;
4474 }
4475 if (v != 0) {
4476 a->dp[a->used + s] = v;
4477 a->used++;
4478 }
4479 }
4480 else if (s > 0) {
4481 for (i = a->used - 1; i >= 0; i--) {
4482 a->dp[i + s] = a->dp[i];
4483 }
4484 }
4485 a->used += s;
4486 XMEMSET(a->dp, 0, SP_WORD_SIZEOF * s);
4487 }
4488 }
4489
4490 return err;
4491 }
4492 #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
4493 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
4494
4495 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
4496 !defined(NO_DH) || defined(HAVE_ECC) || \
4497 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
4498 /* Shift a right by n digits into r: r = a >> (n * SP_WORD_SIZE)
4499 *
4500 * @param [in] a SP integer to shift.
4501 * @param [in] n Number of digits to shift.
4502 * @param [out] r SP integer to store result in.
4503 */
sp_rshd(sp_int * a,int c)4504 void sp_rshd(sp_int* a, int c)
4505 {
4506 if (a != NULL) {
4507 int i;
4508 int j;
4509
4510 if (c >= a->used) {
4511 _sp_zero(a);
4512 }
4513 else {
4514 for (i = c, j = 0; i < a->used; i++, j++) {
4515 a->dp[j] = a->dp[i];
4516 }
4517 a->used -= c;
4518 }
4519 }
4520 }
4521 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
4522 * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
4523
4524 #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4525 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
4526 defined(WOLFSSL_HAVE_SP_DH)
4527 /* Shift a right by n bits into r: r = a >> n
4528 *
4529 * @param [in] a SP integer to shift.
4530 * @param [in] n Number of bits to shift.
4531 * @param [out] r SP integer to store result in.
4532 */
sp_rshb(sp_int * a,int n,sp_int * r)4533 void sp_rshb(sp_int* a, int n, sp_int* r)
4534 {
4535 int i = n >> SP_WORD_SHIFT;
4536
4537 if (i >= a->used) {
4538 _sp_zero(r);
4539 }
4540 else {
4541 int j;
4542
4543 n &= SP_WORD_SIZE - 1;
4544 if (n == 0) {
4545 for (j = 0; i < a->used; i++, j++)
4546 r->dp[j] = a->dp[i];
4547 r->used = j;
4548 }
4549 else if (n > 0) {
4550 for (j = 0; i < a->used-1; i++, j++)
4551 r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
4552 r->dp[j] = a->dp[i] >> n;
4553 r->used = j + 1;
4554 sp_clamp(r);
4555 }
4556 #ifdef WOLFSSL_SP_INT_NEGATIVE
4557 if (sp_iszero(r)) {
4558 r->sign = MP_ZPOS;
4559 }
4560 else {
4561 r->sign = a->sign;
4562 }
4563 #endif
4564 }
4565 }
4566 #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
4567 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
4568
4569 #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4570 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
4571 !defined(WOLFSSL_RSA_PUBLIC_ONLY))
4572 /* Divide a by d and return the quotient in r and the remainder in rem.
4573 * r = a / d; rem = a % d
4574 *
4575 * @param [in] a SP integer to be divided.
4576 * @param [in] d SP integer to divide by.
4577 * @param [out] r SP integer that is the quotient.
4578 * @param [out] rem SP integer that is the remainder.
4579 *
4580 * @return MP_OKAY on success.
4581 * @return MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
4582 * @return MP_MEM when dynamic memory allocation fails.
4583 */
4584 #ifndef WOLFSSL_SP_MATH_ALL
4585 static
4586 #endif
sp_div(sp_int * a,sp_int * d,sp_int * r,sp_int * rem)4587 int sp_div(sp_int* a, sp_int* d, sp_int* r, sp_int* rem)
4588 {
4589 int err = MP_OKAY;
4590 int ret;
4591 int done = 0;
4592 int i;
4593 int s = 0;
4594 sp_int_digit dt;
4595 sp_int_digit t;
4596 sp_int* sa = NULL;
4597 sp_int* sd = NULL;
4598 sp_int* tr = NULL;
4599 sp_int* trial = NULL;
4600 #ifdef WOLFSSL_SP_INT_NEGATIVE
4601 int aSign = MP_ZPOS;
4602 int dSign = MP_ZPOS;
4603 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4604 DECL_SP_INT_ARRAY(td, (a == NULL) ? 1 : a->used + 1, 4);
4605
4606 if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
4607 err = MP_VAL;
4608 }
4609 if ((err == MP_OKAY) && sp_iszero(d)) {
4610 err = MP_VAL;
4611 }
4612 if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
4613 err = MP_VAL;
4614 }
4615 if ((err == MP_OKAY) && (rem != NULL) && (rem->size < a->used + 1)) {
4616 err = MP_VAL;
4617 }
4618 /* May need to shift number being divided left into a new word. */
4619 if ((err == MP_OKAY) && (a->used == SP_INT_DIGITS)) {
4620 err = MP_VAL;
4621 }
4622
4623 #if 0
4624 if (err == MP_OKAY) {
4625 sp_print(a, "a");
4626 sp_print(d, "b");
4627 }
4628 #endif
4629
4630 if (err == MP_OKAY) {
4631 #ifdef WOLFSSL_SP_INT_NEGATIVE
4632 aSign = a->sign;
4633 dSign = d->sign;
4634 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4635
4636 ret = _sp_cmp_abs(a, d);
4637 if (ret == MP_LT) {
4638 if (rem != NULL) {
4639 sp_copy(a, rem);
4640 }
4641 if (r != NULL) {
4642 sp_set(r, 0);
4643 }
4644 done = 1;
4645 }
4646 else if (ret == MP_EQ) {
4647 if (rem != NULL) {
4648 sp_set(rem, 0);
4649 }
4650 if (r != NULL) {
4651 sp_set(r, 1);
4652 #ifdef WOLFSSL_SP_INT_NEGATIVE
4653 r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
4654 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4655 }
4656 done = 1;
4657 }
4658 else if (sp_count_bits(a) == sp_count_bits(d)) {
4659 /* a is greater than d but same bit length */
4660 if (rem != NULL) {
4661 _sp_sub_off(a, d, rem, 0);
4662 #ifdef WOLFSSL_SP_INT_NEGATIVE
4663 rem->sign = aSign;
4664 #endif
4665 }
4666 if (r != NULL) {
4667 sp_set(r, 1);
4668 #ifdef WOLFSSL_SP_INT_NEGATIVE
4669 r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
4670 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4671 }
4672 done = 1;
4673 }
4674 }
4675
4676 if (!done) {
4677 /* Macro always has code associated with it and checks err first. */
4678 ALLOC_SP_INT_ARRAY(td, a->used + 1, 4, err, NULL);
4679 }
4680
4681 if ((!done) && (err == MP_OKAY)) {
4682 sa = td[0];
4683 sd = td[1];
4684 tr = td[2];
4685 trial = td[3];
4686
4687 sp_init_size(sa, a->used + 1);
4688 sp_init_size(sd, d->used + 1);
4689 sp_init_size(tr, a->used - d->used + 2);
4690 sp_init_size(trial, a->used + 1);
4691
4692 s = sp_count_bits(d);
4693 s = SP_WORD_SIZE - (s & SP_WORD_MASK);
4694 sp_copy(a, sa);
4695 if (s != SP_WORD_SIZE) {
4696 err = sp_lshb(sa, s);
4697 if (err == MP_OKAY) {
4698 sp_copy(d, sd);
4699 d = sd;
4700 err = sp_lshb(sd, s);
4701 }
4702 }
4703 }
4704 if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
4705 #ifdef WOLFSSL_SP_SMALL
4706 int c;
4707 #else
4708 int j;
4709 int o;
4710 sp_int_sword sw;
4711 #endif /* WOLFSSL_SP_SMALL */
4712 #ifdef WOLFSSL_SP_INT_NEGATIVE
4713 sa->sign = MP_ZPOS;
4714 sd->sign = MP_ZPOS;
4715 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4716
4717 tr->used = sa->used - d->used + 1;
4718 sp_clear(tr);
4719 tr->used = sa->used - d->used + 1;
4720 dt = d->dp[d->used-1];
4721
4722 for (i = d->used - 1; i > 0; i--) {
4723 if (sa->dp[sa->used - d->used + i] != d->dp[i]) {
4724 break;
4725 }
4726 }
4727 if (sa->dp[sa->used - d->used + i] >= d->dp[i]) {
4728 i = sa->used;
4729 _sp_sub_off(sa, d, sa, sa->used - d->used);
4730 /* Keep the same used so that 0 zeros will be put in. */
4731 sa->used = i;
4732 if (r != NULL) {
4733 tr->dp[sa->used - d->used] = 1;
4734 }
4735 }
4736 for (i = sa->used - 1; i >= d->used; i--) {
4737 if (sa->dp[i] == dt) {
4738 t = SP_DIGIT_MAX;
4739 }
4740 else {
4741 t = sp_div_word(sa->dp[i], sa->dp[i-1], dt);
4742 }
4743
4744 #ifdef WOLFSSL_SP_SMALL
4745 do {
4746 err = _sp_mul_d(d, t, trial, i - d->used);
4747 if (err != MP_OKAY) {
4748 break;
4749 }
4750 c = _sp_cmp_abs(trial, sa);
4751 if (c == MP_GT) {
4752 t--;
4753 }
4754 }
4755 while (c == MP_GT);
4756
4757 if (err != MP_OKAY) {
4758 break;
4759 }
4760
4761 _sp_sub_off(sa, trial, sa, 0);
4762 tr->dp[i - d->used] += t;
4763 if (tr->dp[i - d->used] < t) {
4764 tr->dp[i + 1 - d->used]++;
4765 }
4766 #else
4767 o = i - d->used;
4768 do {
4769 sp_int_word tw = 0;
4770 for (j = 0; j < d->used; j++) {
4771 tw += (sp_int_word)d->dp[j] * t;
4772 trial->dp[j] = (sp_int_digit)tw;
4773 tw >>= SP_WORD_SIZE;
4774 }
4775 trial->dp[j] = (sp_int_digit)tw;
4776
4777 for (j = d->used; j > 0; j--) {
4778 if (trial->dp[j] != sa->dp[j + o]) {
4779 break;
4780 }
4781 }
4782 if (trial->dp[j] > sa->dp[j + o]) {
4783 t--;
4784 }
4785 }
4786 while (trial->dp[j] > sa->dp[j + o]);
4787
4788 sw = 0;
4789 for (j = 0; j <= d->used; j++) {
4790 sw += sa->dp[j + o];
4791 sw -= trial->dp[j];
4792 sa->dp[j + o] = (sp_int_digit)sw;
4793 sw >>= SP_WORD_SIZE;
4794 }
4795
4796 tr->dp[o] = t;
4797 #endif /* WOLFSSL_SP_SMALL */
4798 }
4799 sa->used = i + 1;
4800
4801 if ((err == MP_OKAY) && (rem != NULL)) {
4802 #ifdef WOLFSSL_SP_INT_NEGATIVE
4803 sa->sign = (sa->used == 0) ? MP_ZPOS : aSign;
4804 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4805 if (s != SP_WORD_SIZE) {
4806 sp_rshb(sa, s, sa);
4807 }
4808 sp_copy(sa, rem);
4809 sp_clamp(rem);
4810 #ifdef WOLFSSL_SP_INT_NEGATIVE
4811 if (sp_iszero(rem)) {
4812 rem->sign = MP_ZPOS;
4813 }
4814 #endif
4815 }
4816 if ((err == MP_OKAY) && (r != NULL)) {
4817 sp_copy(tr, r);
4818 sp_clamp(r);
4819 #ifdef WOLFSSL_SP_INT_NEGATIVE
4820 if (sp_iszero(r)) {
4821 r->sign = MP_ZPOS;
4822 }
4823 else {
4824 r->sign = (aSign == dSign) ? MP_ZPOS : MP_NEG;
4825 }
4826 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4827 }
4828 }
4829
4830 #if 0
4831 if (err == MP_OKAY) {
4832 if (rem != NULL) {
4833 sp_print(rem, "rdr");
4834 }
4835 if (r != NULL) {
4836 sp_print(r, "rdw");
4837 }
4838 }
4839 #endif
4840
4841 FREE_SP_INT_ARRAY(td, NULL);
4842 return err;
4843 }
4844 #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
4845 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
4846
4847 #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
4848 (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
4849 !defined(WOLFSSL_RSA_PUBLIC_ONLY))
4850 #ifndef FREESCALE_LTC_TFM
4851 /* Calculate the remainder of dividing a by m: r = a mod m.
4852 *
4853 * @param [in] a SP integer to reduce.
4854 * @param [in] m SP integer that is the modulus.
4855 * @param [out] r SP integer to store result in.
4856 *
4857 * @return MP_OKAY on success.
4858 * @return MP_VAL when a, m or r is NULL or m is 0.
4859 */
sp_mod(sp_int * a,sp_int * m,sp_int * r)4860 int sp_mod(sp_int* a, sp_int* m, sp_int* r)
4861 {
4862 int err = MP_OKAY;
4863 #ifdef WOLFSSL_SP_INT_NEGATIVE
4864 DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
4865 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4866
4867 if ((a == NULL) || (m == NULL) || (r == NULL)) {
4868 err = MP_VAL;
4869 }
4870 #ifdef WOLFSSL_SP_INT_NEGATIVE
4871 if ((err == MP_OKAY) && (a->used >= SP_INT_DIGITS)) {
4872 err = MP_VAL;
4873 }
4874 #endif
4875
4876 #ifndef WOLFSSL_SP_INT_NEGATIVE
4877 if (err == MP_OKAY) {
4878 err = sp_div(a, m, NULL, r);
4879 }
4880 #else
4881 ALLOC_SP_INT(t, a->used + 1, err, NULL);
4882 if (err == MP_OKAY) {
4883 sp_init_size(t, a->used + 1);
4884 err = sp_div(a, m, NULL, t);
4885 }
4886 if (err == MP_OKAY) {
4887 if ((!sp_iszero(t)) && (t->sign != m->sign)) {
4888 err = sp_add(t, m, r);
4889 }
4890 else {
4891 err = sp_copy(t, r);
4892 }
4893 }
4894
4895 FREE_SP_INT(t, NULL);
4896 #endif /* WOLFSSL_SP_INT_NEGATIVE */
4897
4898 return err;
4899 }
4900 #endif /* !FREESCALE_LTC_TFM */
4901 #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
4902 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
4903
4904 /* START SP_MUL implementations. */
4905 /* This code is generated.
4906 * To generate:
4907 * cd scripts/sp/sp_int
4908 * ./gen.sh
4909 * File sp_mul.c contains code.
4910 */
4911
4912 #ifdef SQR_MUL_ASM
4913 /* Multiply a by b into r where a and b have same no. digits. r = a * b
4914 *
4915 * Optimised code for when number of digits in a and b are the same.
4916 *
4917 * @param [in] a SP integer to mulitply.
4918 * @param [in] b SP integer to mulitply by.
4919 * @param [out] r SP integer to hod reult.
4920 *
4921 * @return MP_OKAY otherwise.
4922 * @return MP_MEM when dynamic memory allocation fails.
4923 */
_sp_mul_nxn(sp_int * a,sp_int * b,sp_int * r)4924 static int _sp_mul_nxn(sp_int* a, sp_int* b, sp_int* r)
4925 {
4926 int err = MP_OKAY;
4927 int i;
4928 int j;
4929 int k;
4930 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4931 sp_int_digit* t = NULL;
4932 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
4933 defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
4934 sp_int_digit t[a->used * 2];
4935 #else
4936 sp_int_digit t[SP_INT_DIGITS];
4937 #endif
4938
4939 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4940 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
4941 DYNAMIC_TYPE_BIGINT);
4942 if (t == NULL) {
4943 err = MP_MEM;
4944 }
4945 #endif
4946 if (err == MP_OKAY) {
4947 sp_int_digit l, h, o;
4948 sp_int_digit* dp;
4949
4950 h = 0;
4951 l = 0;
4952 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
4953 t[0] = h;
4954 h = 0;
4955 o = 0;
4956 for (k = 1; k <= a->used - 1; k++) {
4957 j = k;
4958 dp = a->dp;
4959 for (; j >= 0; dp++, j--) {
4960 SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
4961 }
4962 t[k] = l;
4963 l = h;
4964 h = o;
4965 o = 0;
4966 }
4967 for (; k <= (a->used - 1) * 2; k++) {
4968 i = k - (b->used - 1);
4969 dp = &b->dp[b->used - 1];
4970 for (; i < a->used; i++, dp--) {
4971 SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
4972 }
4973 t[k] = l;
4974 l = h;
4975 h = o;
4976 o = 0;
4977 }
4978 t[k] = l;
4979 r->used = k + 1;
4980 XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
4981 sp_clamp(r);
4982 }
4983
4984 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
4985 if (t != NULL) {
4986 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
4987 }
4988 #endif
4989 return err;
4990 }
4991
4992 /* Multiply a by b into r. r = a * b
4993 *
4994 * @param [in] a SP integer to mulitply.
4995 * @param [in] b SP integer to mulitply by.
4996 * @param [out] r SP integer to hod reult.
4997 *
4998 * @return MP_OKAY otherwise.
4999 * @return MP_MEM when dynamic memory allocation fails.
5000 */
_sp_mul(sp_int * a,sp_int * b,sp_int * r)5001 static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
5002 {
5003 int err = MP_OKAY;
5004 int i;
5005 int j;
5006 int k;
5007 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5008 sp_int_digit* t = NULL;
5009 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
5010 defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
5011 sp_int_digit t[a->used + b->used];
5012 #else
5013 sp_int_digit t[SP_INT_DIGITS];
5014 #endif
5015
5016 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5017 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used),
5018 NULL, DYNAMIC_TYPE_BIGINT);
5019 if (t == NULL) {
5020 err = MP_MEM;
5021 }
5022 #endif
5023 if (err == MP_OKAY) {
5024 sp_int_digit l;
5025 sp_int_digit h;
5026 sp_int_digit o;
5027
5028 h = 0;
5029 l = 0;
5030 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
5031 t[0] = h;
5032 h = 0;
5033 o = 0;
5034 for (k = 1; k <= b->used - 1; k++) {
5035 i = 0;
5036 j = k;
5037 for (; (i < a->used) && (j >= 0); i++, j--) {
5038 SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
5039 }
5040 t[k] = l;
5041 l = h;
5042 h = o;
5043 o = 0;
5044 }
5045 for (; k <= (a->used - 1) + (b->used - 1); k++) {
5046 j = b->used - 1;
5047 i = k - j;
5048 for (; (i < a->used) && (j >= 0); i++, j--) {
5049 SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
5050 }
5051 t[k] = l;
5052 l = h;
5053 h = o;
5054 o = 0;
5055 }
5056 t[k] = l;
5057 r->used = k + 1;
5058 XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
5059 sp_clamp(r);
5060 }
5061
5062 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5063 if (t != NULL) {
5064 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5065 }
5066 #endif
5067 return err;
5068 }
5069 #else
5070 /* Multiply a by b into r. r = a * b
5071 *
5072 * @param [in] a SP integer to mulitply.
5073 * @param [in] b SP integer to mulitply by.
5074 * @param [out] r SP integer to hod reult.
5075 *
5076 * @return MP_OKAY otherwise.
5077 * @return MP_MEM when dynamic memory allocation fails.
5078 */
_sp_mul(sp_int * a,sp_int * b,sp_int * r)5079 static int _sp_mul(sp_int* a, sp_int* b, sp_int* r)
5080 {
5081 int err = MP_OKAY;
5082 int i;
5083 int j;
5084 int k;
5085 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5086 sp_int_digit* t = NULL;
5087 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
5088 defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
5089 sp_int_digit t[a->used + b->used];
5090 #else
5091 sp_int_digit t[SP_INT_DIGITS];
5092 #endif
5093
5094 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5095 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used),
5096 NULL, DYNAMIC_TYPE_BIGINT);
5097 if (t == NULL) {
5098 err = MP_MEM;
5099 }
5100 #endif
5101 if (err == MP_OKAY) {
5102 sp_int_word w;
5103 sp_int_word l;
5104 sp_int_word h;
5105 #ifdef SP_WORD_OVERFLOW
5106 sp_int_word o;
5107 #endif
5108
5109 w = (sp_int_word)a->dp[0] * b->dp[0];
5110 t[0] = (sp_int_digit)w;
5111 l = (sp_int_digit)(w >> SP_WORD_SIZE);
5112 h = 0;
5113 #ifdef SP_WORD_OVERFLOW
5114 o = 0;
5115 #endif
5116 for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) {
5117 i = k - (b->used - 1);
5118 i &= ~(i >> (sizeof(i) * 8 - 1));
5119 j = k - i;
5120 for (; (i < a->used) && (j >= 0); i++, j--) {
5121 w = (sp_int_word)a->dp[i] * b->dp[j];
5122 l += (sp_int_digit)w;
5123 h += (sp_int_digit)(w >> SP_WORD_SIZE);
5124 #ifdef SP_WORD_OVERFLOW
5125 h += (sp_int_digit)(l >> SP_WORD_SIZE);
5126 l &= SP_MASK;
5127 o += (sp_int_digit)(h >> SP_WORD_SIZE);
5128 h &= SP_MASK;
5129 #endif
5130 }
5131 t[k] = (sp_int_digit)l;
5132 l >>= SP_WORD_SIZE;
5133 l += (sp_int_digit)h;
5134 h >>= SP_WORD_SIZE;
5135 #ifdef SP_WORD_OVERFLOW
5136 h += o & SP_MASK;
5137 o >>= SP_WORD_SIZE;
5138 #endif
5139 }
5140 t[k] = (sp_int_digit)l;
5141 r->used = k + 1;
5142 XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
5143 sp_clamp(r);
5144 }
5145
5146 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5147 if (t != NULL) {
5148 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
5149 }
5150 #endif
5151 return err;
5152 }
5153 #endif
5154
5155 #ifndef WOLFSSL_SP_SMALL
5156 #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
5157 #if SP_WORD_SIZE == 64
5158 #ifndef SQR_MUL_ASM
5159 /* Multiply a by b and store in r: r = a * b
5160 *
5161 * @param [in] a SP integer to multiply.
5162 * @param [in] b SP integer to multiply.
5163 * @param [out] r SP integer result.
5164 *
5165 * @return MP_OKAY on success.
5166 * @return MP_MEM when dynamic memory allocation fails.
5167 */
_sp_mul_4(sp_int * a,sp_int * b,sp_int * r)5168 static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
5169 {
5170 int err = MP_OKAY;
5171 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5172 sp_int_word* w = NULL;
5173 #else
5174 sp_int_word w[16];
5175 #endif
5176 sp_int_digit* da = a->dp;
5177 sp_int_digit* db = b->dp;
5178
5179 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5180 w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
5181 DYNAMIC_TYPE_BIGINT);
5182 if (w == NULL) {
5183 err = MP_MEM;
5184 }
5185 #endif
5186
5187 if (err == MP_OKAY) {
5188 w[0] = (sp_int_word)da[0] * db[0];
5189 w[1] = (sp_int_word)da[0] * db[1];
5190 w[2] = (sp_int_word)da[1] * db[0];
5191 w[3] = (sp_int_word)da[0] * db[2];
5192 w[4] = (sp_int_word)da[1] * db[1];
5193 w[5] = (sp_int_word)da[2] * db[0];
5194 w[6] = (sp_int_word)da[0] * db[3];
5195 w[7] = (sp_int_word)da[1] * db[2];
5196 w[8] = (sp_int_word)da[2] * db[1];
5197 w[9] = (sp_int_word)da[3] * db[0];
5198 w[10] = (sp_int_word)da[1] * db[3];
5199 w[11] = (sp_int_word)da[2] * db[2];
5200 w[12] = (sp_int_word)da[3] * db[1];
5201 w[13] = (sp_int_word)da[2] * db[3];
5202 w[14] = (sp_int_word)da[3] * db[2];
5203 w[15] = (sp_int_word)da[3] * db[3];
5204
5205 r->dp[0] = w[0];
5206 w[0] >>= SP_WORD_SIZE;
5207 w[0] += (sp_int_digit)w[1];
5208 w[0] += (sp_int_digit)w[2];
5209 r->dp[1] = w[0];
5210 w[0] >>= SP_WORD_SIZE;
5211 w[1] >>= SP_WORD_SIZE;
5212 w[0] += (sp_int_digit)w[1];
5213 w[2] >>= SP_WORD_SIZE;
5214 w[0] += (sp_int_digit)w[2];
5215 w[0] += (sp_int_digit)w[3];
5216 w[0] += (sp_int_digit)w[4];
5217 w[0] += (sp_int_digit)w[5];
5218 r->dp[2] = w[0];
5219 w[0] >>= SP_WORD_SIZE;
5220 w[3] >>= SP_WORD_SIZE;
5221 w[0] += (sp_int_digit)w[3];
5222 w[4] >>= SP_WORD_SIZE;
5223 w[0] += (sp_int_digit)w[4];
5224 w[5] >>= SP_WORD_SIZE;
5225 w[0] += (sp_int_digit)w[5];
5226 w[0] += (sp_int_digit)w[6];
5227 w[0] += (sp_int_digit)w[7];
5228 w[0] += (sp_int_digit)w[8];
5229 w[0] += (sp_int_digit)w[9];
5230 r->dp[3] = w[0];
5231 w[0] >>= SP_WORD_SIZE;
5232 w[6] >>= SP_WORD_SIZE;
5233 w[0] += (sp_int_digit)w[6];
5234 w[7] >>= SP_WORD_SIZE;
5235 w[0] += (sp_int_digit)w[7];
5236 w[8] >>= SP_WORD_SIZE;
5237 w[0] += (sp_int_digit)w[8];
5238 w[9] >>= SP_WORD_SIZE;
5239 w[0] += (sp_int_digit)w[9];
5240 w[0] += (sp_int_digit)w[10];
5241 w[0] += (sp_int_digit)w[11];
5242 w[0] += (sp_int_digit)w[12];
5243 r->dp[4] = w[0];
5244 w[0] >>= SP_WORD_SIZE;
5245 w[10] >>= SP_WORD_SIZE;
5246 w[0] += (sp_int_digit)w[10];
5247 w[11] >>= SP_WORD_SIZE;
5248 w[0] += (sp_int_digit)w[11];
5249 w[12] >>= SP_WORD_SIZE;
5250 w[0] += (sp_int_digit)w[12];
5251 w[0] += (sp_int_digit)w[13];
5252 w[0] += (sp_int_digit)w[14];
5253 r->dp[5] = w[0];
5254 w[0] >>= SP_WORD_SIZE;
5255 w[13] >>= SP_WORD_SIZE;
5256 w[0] += (sp_int_digit)w[13];
5257 w[14] >>= SP_WORD_SIZE;
5258 w[0] += (sp_int_digit)w[14];
5259 w[0] += (sp_int_digit)w[15];
5260 r->dp[6] = w[0];
5261 w[0] >>= SP_WORD_SIZE;
5262 w[15] >>= SP_WORD_SIZE;
5263 w[0] += (sp_int_digit)w[15];
5264 r->dp[7] = w[0];
5265
5266 r->used = 8;
5267 sp_clamp(r);
5268 }
5269
5270 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5271 if (w != NULL) {
5272 XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
5273 }
5274 #endif
5275 return err;
5276 }
5277 #else /* SQR_MUL_ASM */
5278 /* Multiply a by b and store in r: r = a * b
5279 *
5280 * @param [in] a SP integer to multiply.
5281 * @param [in] b SP integer to multiply.
5282 * @param [out] r SP integer result.
5283 *
5284 * @return MP_OKAY on success.
5285 * @return MP_MEM when dynamic memory allocation fails.
5286 */
_sp_mul_4(sp_int * a,sp_int * b,sp_int * r)5287 static int _sp_mul_4(sp_int* a, sp_int* b, sp_int* r)
5288 {
5289 sp_int_digit l = 0;
5290 sp_int_digit h = 0;
5291 sp_int_digit o = 0;
5292 sp_int_digit t[4];
5293
5294 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
5295 t[0] = h;
5296 h = 0;
5297 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
5298 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
5299 t[1] = l;
5300 l = h;
5301 h = o;
5302 o = 0;
5303 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
5304 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
5305 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
5306 t[2] = l;
5307 l = h;
5308 h = o;
5309 o = 0;
5310 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
5311 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
5312 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
5313 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
5314 t[3] = l;
5315 l = h;
5316 h = o;
5317 o = 0;
5318 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
5319 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
5320 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
5321 r->dp[4] = l;
5322 l = h;
5323 h = o;
5324 o = 0;
5325 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
5326 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
5327 r->dp[5] = l;
5328 l = h;
5329 h = o;
5330 SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
5331 r->dp[6] = l;
5332 r->dp[7] = h;
5333 XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
5334 r->used = 8;
5335 sp_clamp(r);
5336
5337 return MP_OKAY;
5338 }
5339 #endif /* SQR_MUL_ASM */
5340 #endif /* SP_WORD_SIZE == 64 */
5341 #if SP_WORD_SIZE == 64
5342 #ifdef SQR_MUL_ASM
5343 /* Multiply a by b and store in r: r = a * b
5344 *
5345 * @param [in] a SP integer to multiply.
5346 * @param [in] b SP integer to multiply.
5347 * @param [out] r SP integer result.
5348 *
5349 * @return MP_OKAY on success.
5350 * @return MP_MEM when dynamic memory allocation fails.
5351 */
_sp_mul_6(sp_int * a,sp_int * b,sp_int * r)5352 static int _sp_mul_6(sp_int* a, sp_int* b, sp_int* r)
5353 {
5354 sp_int_digit l = 0;
5355 sp_int_digit h = 0;
5356 sp_int_digit o = 0;
5357 sp_int_digit t[6];
5358
5359 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
5360 t[0] = h;
5361 h = 0;
5362 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
5363 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
5364 t[1] = l;
5365 l = h;
5366 h = o;
5367 o = 0;
5368 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
5369 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
5370 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
5371 t[2] = l;
5372 l = h;
5373 h = o;
5374 o = 0;
5375 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
5376 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
5377 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
5378 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
5379 t[3] = l;
5380 l = h;
5381 h = o;
5382 o = 0;
5383 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
5384 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
5385 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
5386 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
5387 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
5388 t[4] = l;
5389 l = h;
5390 h = o;
5391 o = 0;
5392 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
5393 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
5394 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
5395 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
5396 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
5397 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
5398 t[5] = l;
5399 l = h;
5400 h = o;
5401 o = 0;
5402 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
5403 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
5404 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
5405 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
5406 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
5407 r->dp[6] = l;
5408 l = h;
5409 h = o;
5410 o = 0;
5411 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
5412 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
5413 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
5414 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
5415 r->dp[7] = l;
5416 l = h;
5417 h = o;
5418 o = 0;
5419 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
5420 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
5421 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
5422 r->dp[8] = l;
5423 l = h;
5424 h = o;
5425 o = 0;
5426 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
5427 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
5428 r->dp[9] = l;
5429 l = h;
5430 h = o;
5431 SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
5432 r->dp[10] = l;
5433 r->dp[11] = h;
5434 XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
5435 r->used = 12;
5436 sp_clamp(r);
5437
5438 return MP_OKAY;
5439 }
5440 #endif /* SQR_MUL_ASM */
5441 #endif /* SP_WORD_SIZE == 64 */
5442 #if SP_WORD_SIZE == 32
5443 #ifdef SQR_MUL_ASM
5444 /* Multiply a by b and store in r: r = a * b
5445 *
5446 * @param [in] a SP integer to multiply.
5447 * @param [in] b SP integer to multiply.
5448 * @param [out] r SP integer result.
5449 *
5450 * @return MP_OKAY on success.
5451 * @return MP_MEM when dynamic memory allocation fails.
5452 */
_sp_mul_8(sp_int * a,sp_int * b,sp_int * r)5453 static int _sp_mul_8(sp_int* a, sp_int* b, sp_int* r)
5454 {
5455 sp_int_digit l = 0;
5456 sp_int_digit h = 0;
5457 sp_int_digit o = 0;
5458 sp_int_digit t[8];
5459
5460 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
5461 t[0] = h;
5462 h = 0;
5463 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
5464 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
5465 t[1] = l;
5466 l = h;
5467 h = o;
5468 o = 0;
5469 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
5470 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
5471 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
5472 t[2] = l;
5473 l = h;
5474 h = o;
5475 o = 0;
5476 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
5477 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
5478 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
5479 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
5480 t[3] = l;
5481 l = h;
5482 h = o;
5483 o = 0;
5484 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
5485 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
5486 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
5487 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
5488 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
5489 t[4] = l;
5490 l = h;
5491 h = o;
5492 o = 0;
5493 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
5494 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
5495 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
5496 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
5497 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
5498 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
5499 t[5] = l;
5500 l = h;
5501 h = o;
5502 o = 0;
5503 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
5504 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
5505 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
5506 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
5507 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
5508 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
5509 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
5510 t[6] = l;
5511 l = h;
5512 h = o;
5513 o = 0;
5514 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
5515 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
5516 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
5517 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
5518 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
5519 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
5520 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
5521 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
5522 t[7] = l;
5523 l = h;
5524 h = o;
5525 o = 0;
5526 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
5527 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
5528 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
5529 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
5530 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
5531 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
5532 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
5533 r->dp[8] = l;
5534 l = h;
5535 h = o;
5536 o = 0;
5537 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
5538 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
5539 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
5540 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
5541 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
5542 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
5543 r->dp[9] = l;
5544 l = h;
5545 h = o;
5546 o = 0;
5547 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
5548 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
5549 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
5550 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
5551 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
5552 r->dp[10] = l;
5553 l = h;
5554 h = o;
5555 o = 0;
5556 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
5557 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
5558 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
5559 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
5560 r->dp[11] = l;
5561 l = h;
5562 h = o;
5563 o = 0;
5564 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
5565 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
5566 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
5567 r->dp[12] = l;
5568 l = h;
5569 h = o;
5570 o = 0;
5571 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
5572 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
5573 r->dp[13] = l;
5574 l = h;
5575 h = o;
5576 SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
5577 r->dp[14] = l;
5578 r->dp[15] = h;
5579 XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
5580 r->used = 16;
5581 sp_clamp(r);
5582
5583 return MP_OKAY;
5584 }
5585 #endif /* SQR_MUL_ASM */
5586 #endif /* SP_WORD_SIZE == 32 */
5587 #if SP_WORD_SIZE == 32
5588 #ifdef SQR_MUL_ASM
5589 /* Multiply a by b and store in r: r = a * b
5590 *
5591 * @param [in] a SP integer to multiply.
5592 * @param [in] b SP integer to multiply.
5593 * @param [out] r SP integer result.
5594 *
5595 * @return MP_OKAY on success.
5596 * @return MP_MEM when dynamic memory allocation fails.
5597 */
_sp_mul_12(sp_int * a,sp_int * b,sp_int * r)5598 static int _sp_mul_12(sp_int* a, sp_int* b, sp_int* r)
5599 {
5600 sp_int_digit l = 0;
5601 sp_int_digit h = 0;
5602 sp_int_digit o = 0;
5603 sp_int_digit t[12];
5604
5605 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
5606 t[0] = h;
5607 h = 0;
5608 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
5609 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
5610 t[1] = l;
5611 l = h;
5612 h = o;
5613 o = 0;
5614 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
5615 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
5616 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
5617 t[2] = l;
5618 l = h;
5619 h = o;
5620 o = 0;
5621 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
5622 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
5623 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
5624 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
5625 t[3] = l;
5626 l = h;
5627 h = o;
5628 o = 0;
5629 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
5630 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
5631 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
5632 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
5633 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
5634 t[4] = l;
5635 l = h;
5636 h = o;
5637 o = 0;
5638 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
5639 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
5640 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
5641 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
5642 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
5643 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
5644 t[5] = l;
5645 l = h;
5646 h = o;
5647 o = 0;
5648 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
5649 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
5650 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
5651 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
5652 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
5653 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
5654 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
5655 t[6] = l;
5656 l = h;
5657 h = o;
5658 o = 0;
5659 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
5660 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
5661 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
5662 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
5663 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
5664 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
5665 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
5666 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
5667 t[7] = l;
5668 l = h;
5669 h = o;
5670 o = 0;
5671 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
5672 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
5673 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
5674 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
5675 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
5676 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
5677 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
5678 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
5679 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
5680 t[8] = l;
5681 l = h;
5682 h = o;
5683 o = 0;
5684 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
5685 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
5686 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
5687 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
5688 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
5689 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
5690 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
5691 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
5692 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
5693 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
5694 t[9] = l;
5695 l = h;
5696 h = o;
5697 o = 0;
5698 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
5699 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
5700 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
5701 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
5702 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
5703 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
5704 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
5705 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
5706 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
5707 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
5708 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
5709 t[10] = l;
5710 l = h;
5711 h = o;
5712 o = 0;
5713 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
5714 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
5715 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
5716 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
5717 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
5718 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
5719 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
5720 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
5721 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
5722 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
5723 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
5724 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
5725 t[11] = l;
5726 l = h;
5727 h = o;
5728 o = 0;
5729 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
5730 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
5731 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
5732 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
5733 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
5734 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
5735 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
5736 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
5737 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
5738 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
5739 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
5740 r->dp[12] = l;
5741 l = h;
5742 h = o;
5743 o = 0;
5744 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
5745 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
5746 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
5747 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
5748 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
5749 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
5750 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
5751 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
5752 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
5753 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
5754 r->dp[13] = l;
5755 l = h;
5756 h = o;
5757 o = 0;
5758 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
5759 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
5760 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
5761 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
5762 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
5763 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
5764 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
5765 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
5766 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
5767 r->dp[14] = l;
5768 l = h;
5769 h = o;
5770 o = 0;
5771 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
5772 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
5773 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
5774 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
5775 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
5776 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
5777 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
5778 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
5779 r->dp[15] = l;
5780 l = h;
5781 h = o;
5782 o = 0;
5783 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
5784 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
5785 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
5786 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
5787 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
5788 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
5789 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
5790 r->dp[16] = l;
5791 l = h;
5792 h = o;
5793 o = 0;
5794 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
5795 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
5796 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
5797 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
5798 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
5799 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
5800 r->dp[17] = l;
5801 l = h;
5802 h = o;
5803 o = 0;
5804 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
5805 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
5806 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
5807 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
5808 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
5809 r->dp[18] = l;
5810 l = h;
5811 h = o;
5812 o = 0;
5813 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
5814 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
5815 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
5816 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
5817 r->dp[19] = l;
5818 l = h;
5819 h = o;
5820 o = 0;
5821 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
5822 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
5823 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
5824 r->dp[20] = l;
5825 l = h;
5826 h = o;
5827 o = 0;
5828 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
5829 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
5830 r->dp[21] = l;
5831 l = h;
5832 h = o;
5833 SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
5834 r->dp[22] = l;
5835 r->dp[23] = h;
5836 XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
5837 r->used = 24;
5838 sp_clamp(r);
5839
5840 return MP_OKAY;
5841 }
5842 #endif /* SQR_MUL_ASM */
5843 #endif /* SP_WORD_SIZE == 32 */
5844 #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
5845
5846 #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
5847 #if SP_INT_DIGITS >= 32
5848 /* Multiply a by b and store in r: r = a * b
5849 *
5850 * @param [in] a SP integer to multiply.
5851 * @param [in] b SP integer to multiply.
5852 * @param [out] r SP integer result.
5853 *
5854 * @return MP_OKAY on success.
5855 * @return MP_MEM when dynamic memory allocation fails.
5856 */
_sp_mul_16(sp_int * a,sp_int * b,sp_int * r)5857 static int _sp_mul_16(sp_int* a, sp_int* b, sp_int* r)
5858 {
5859 int err = MP_OKAY;
5860 sp_int_digit l = 0;
5861 sp_int_digit h = 0;
5862 sp_int_digit o = 0;
5863 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5864 sp_int_digit* t = NULL;
5865 #else
5866 sp_int_digit t[16];
5867 #endif
5868
5869 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
5870 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
5871 DYNAMIC_TYPE_BIGINT);
5872 if (t == NULL) {
5873 err = MP_MEM;
5874 }
5875 #endif
5876 if (err == MP_OKAY) {
5877 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
5878 t[0] = h;
5879 h = 0;
5880 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
5881 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
5882 t[1] = l;
5883 l = h;
5884 h = o;
5885 o = 0;
5886 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
5887 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
5888 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
5889 t[2] = l;
5890 l = h;
5891 h = o;
5892 o = 0;
5893 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
5894 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
5895 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
5896 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
5897 t[3] = l;
5898 l = h;
5899 h = o;
5900 o = 0;
5901 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
5902 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
5903 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
5904 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
5905 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
5906 t[4] = l;
5907 l = h;
5908 h = o;
5909 o = 0;
5910 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
5911 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
5912 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
5913 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
5914 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
5915 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
5916 t[5] = l;
5917 l = h;
5918 h = o;
5919 o = 0;
5920 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
5921 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
5922 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
5923 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
5924 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
5925 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
5926 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
5927 t[6] = l;
5928 l = h;
5929 h = o;
5930 o = 0;
5931 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
5932 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
5933 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
5934 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
5935 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
5936 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
5937 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
5938 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
5939 t[7] = l;
5940 l = h;
5941 h = o;
5942 o = 0;
5943 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
5944 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
5945 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
5946 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
5947 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
5948 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
5949 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
5950 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
5951 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
5952 t[8] = l;
5953 l = h;
5954 h = o;
5955 o = 0;
5956 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
5957 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
5958 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
5959 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
5960 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
5961 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
5962 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
5963 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
5964 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
5965 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
5966 t[9] = l;
5967 l = h;
5968 h = o;
5969 o = 0;
5970 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
5971 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
5972 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
5973 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
5974 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
5975 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
5976 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
5977 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
5978 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
5979 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
5980 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
5981 t[10] = l;
5982 l = h;
5983 h = o;
5984 o = 0;
5985 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
5986 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
5987 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
5988 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
5989 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
5990 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
5991 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
5992 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
5993 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
5994 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
5995 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
5996 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
5997 t[11] = l;
5998 l = h;
5999 h = o;
6000 o = 0;
6001 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
6002 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
6003 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
6004 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
6005 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
6006 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
6007 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
6008 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
6009 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
6010 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
6011 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
6012 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
6013 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
6014 t[12] = l;
6015 l = h;
6016 h = o;
6017 o = 0;
6018 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
6019 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
6020 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
6021 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
6022 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
6023 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
6024 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
6025 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
6026 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
6027 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
6028 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
6029 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
6030 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
6031 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
6032 t[13] = l;
6033 l = h;
6034 h = o;
6035 o = 0;
6036 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
6037 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
6038 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
6039 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
6040 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
6041 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
6042 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
6043 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
6044 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
6045 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
6046 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
6047 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
6048 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
6049 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
6050 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
6051 t[14] = l;
6052 l = h;
6053 h = o;
6054 o = 0;
6055 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
6056 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
6057 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
6058 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
6059 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
6060 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
6061 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
6062 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
6063 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
6064 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
6065 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
6066 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
6067 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
6068 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
6069 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
6070 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
6071 t[15] = l;
6072 l = h;
6073 h = o;
6074 o = 0;
6075 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
6076 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
6077 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
6078 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
6079 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
6080 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
6081 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
6082 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
6083 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
6084 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
6085 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
6086 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
6087 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
6088 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
6089 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
6090 r->dp[16] = l;
6091 l = h;
6092 h = o;
6093 o = 0;
6094 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
6095 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
6096 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
6097 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
6098 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
6099 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
6100 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
6101 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
6102 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
6103 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
6104 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
6105 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
6106 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
6107 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
6108 r->dp[17] = l;
6109 l = h;
6110 h = o;
6111 o = 0;
6112 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
6113 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
6114 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
6115 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
6116 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
6117 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
6118 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
6119 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
6120 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
6121 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
6122 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
6123 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
6124 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
6125 r->dp[18] = l;
6126 l = h;
6127 h = o;
6128 o = 0;
6129 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
6130 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
6131 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
6132 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
6133 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
6134 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
6135 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
6136 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
6137 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
6138 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
6139 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
6140 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
6141 r->dp[19] = l;
6142 l = h;
6143 h = o;
6144 o = 0;
6145 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
6146 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
6147 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
6148 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
6149 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
6150 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
6151 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
6152 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
6153 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
6154 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
6155 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
6156 r->dp[20] = l;
6157 l = h;
6158 h = o;
6159 o = 0;
6160 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
6161 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
6162 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
6163 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
6164 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
6165 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
6166 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
6167 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
6168 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
6169 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
6170 r->dp[21] = l;
6171 l = h;
6172 h = o;
6173 o = 0;
6174 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
6175 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
6176 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
6177 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
6178 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
6179 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
6180 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
6181 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
6182 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
6183 r->dp[22] = l;
6184 l = h;
6185 h = o;
6186 o = 0;
6187 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
6188 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
6189 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
6190 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
6191 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
6192 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
6193 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
6194 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
6195 r->dp[23] = l;
6196 l = h;
6197 h = o;
6198 o = 0;
6199 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
6200 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
6201 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
6202 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
6203 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
6204 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
6205 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
6206 r->dp[24] = l;
6207 l = h;
6208 h = o;
6209 o = 0;
6210 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
6211 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
6212 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
6213 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
6214 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
6215 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
6216 r->dp[25] = l;
6217 l = h;
6218 h = o;
6219 o = 0;
6220 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
6221 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
6222 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
6223 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
6224 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
6225 r->dp[26] = l;
6226 l = h;
6227 h = o;
6228 o = 0;
6229 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
6230 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
6231 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
6232 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
6233 r->dp[27] = l;
6234 l = h;
6235 h = o;
6236 o = 0;
6237 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
6238 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
6239 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
6240 r->dp[28] = l;
6241 l = h;
6242 h = o;
6243 o = 0;
6244 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
6245 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
6246 r->dp[29] = l;
6247 l = h;
6248 h = o;
6249 SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
6250 r->dp[30] = l;
6251 r->dp[31] = h;
6252 XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
6253 r->used = 32;
6254 sp_clamp(r);
6255 }
6256
6257 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
6258 if (t != NULL) {
6259 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
6260 }
6261 #endif
6262 return err;
6263 }
6264 #endif /* SP_INT_DIGITS >= 32 */
6265
6266 #if SP_INT_DIGITS >= 48
6267 /* Multiply a by b and store in r: r = a * b
6268 *
6269 * @param [in] a SP integer to multiply.
6270 * @param [in] b SP integer to multiply.
6271 * @param [out] r SP integer result.
6272 *
6273 * @return MP_OKAY on success.
6274 * @return MP_MEM when dynamic memory allocation fails.
6275 */
_sp_mul_24(sp_int * a,sp_int * b,sp_int * r)6276 static int _sp_mul_24(sp_int* a, sp_int* b, sp_int* r)
6277 {
6278 int err = MP_OKAY;
6279 sp_int_digit l = 0;
6280 sp_int_digit h = 0;
6281 sp_int_digit o = 0;
6282 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
6283 sp_int_digit* t = NULL;
6284 #else
6285 sp_int_digit t[24];
6286 #endif
6287
6288 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
6289 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
6290 DYNAMIC_TYPE_BIGINT);
6291 if (t == NULL) {
6292 err = MP_MEM;
6293 }
6294 #endif
6295 if (err == MP_OKAY) {
6296 SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
6297 t[0] = h;
6298 h = 0;
6299 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
6300 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
6301 t[1] = l;
6302 l = h;
6303 h = o;
6304 o = 0;
6305 SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
6306 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
6307 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
6308 t[2] = l;
6309 l = h;
6310 h = o;
6311 o = 0;
6312 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
6313 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
6314 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
6315 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
6316 t[3] = l;
6317 l = h;
6318 h = o;
6319 o = 0;
6320 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
6321 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
6322 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
6323 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
6324 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
6325 t[4] = l;
6326 l = h;
6327 h = o;
6328 o = 0;
6329 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
6330 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
6331 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
6332 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
6333 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
6334 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
6335 t[5] = l;
6336 l = h;
6337 h = o;
6338 o = 0;
6339 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
6340 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
6341 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
6342 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
6343 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
6344 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
6345 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
6346 t[6] = l;
6347 l = h;
6348 h = o;
6349 o = 0;
6350 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
6351 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
6352 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
6353 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
6354 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
6355 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
6356 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
6357 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
6358 t[7] = l;
6359 l = h;
6360 h = o;
6361 o = 0;
6362 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
6363 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
6364 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
6365 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
6366 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
6367 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
6368 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
6369 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
6370 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
6371 t[8] = l;
6372 l = h;
6373 h = o;
6374 o = 0;
6375 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
6376 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
6377 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
6378 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
6379 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
6380 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
6381 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
6382 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
6383 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
6384 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
6385 t[9] = l;
6386 l = h;
6387 h = o;
6388 o = 0;
6389 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
6390 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
6391 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
6392 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
6393 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
6394 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
6395 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
6396 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
6397 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
6398 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
6399 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
6400 t[10] = l;
6401 l = h;
6402 h = o;
6403 o = 0;
6404 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
6405 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
6406 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
6407 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
6408 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
6409 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
6410 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
6411 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
6412 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
6413 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
6414 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
6415 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
6416 t[11] = l;
6417 l = h;
6418 h = o;
6419 o = 0;
6420 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
6421 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
6422 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
6423 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
6424 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
6425 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
6426 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
6427 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
6428 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
6429 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
6430 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
6431 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
6432 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
6433 t[12] = l;
6434 l = h;
6435 h = o;
6436 o = 0;
6437 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
6438 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
6439 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
6440 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
6441 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
6442 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
6443 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
6444 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
6445 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
6446 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
6447 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
6448 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
6449 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
6450 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
6451 t[13] = l;
6452 l = h;
6453 h = o;
6454 o = 0;
6455 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
6456 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
6457 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
6458 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
6459 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
6460 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
6461 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
6462 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
6463 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
6464 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
6465 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
6466 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
6467 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
6468 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
6469 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
6470 t[14] = l;
6471 l = h;
6472 h = o;
6473 o = 0;
6474 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
6475 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
6476 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
6477 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
6478 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
6479 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
6480 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
6481 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
6482 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
6483 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
6484 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
6485 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
6486 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
6487 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
6488 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
6489 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
6490 t[15] = l;
6491 l = h;
6492 h = o;
6493 o = 0;
6494 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
6495 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
6496 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
6497 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
6498 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
6499 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
6500 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
6501 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
6502 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
6503 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
6504 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
6505 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
6506 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
6507 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
6508 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
6509 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
6510 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
6511 t[16] = l;
6512 l = h;
6513 h = o;
6514 o = 0;
6515 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
6516 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
6517 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
6518 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
6519 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
6520 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
6521 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
6522 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
6523 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
6524 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
6525 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
6526 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
6527 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
6528 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
6529 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
6530 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
6531 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
6532 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
6533 t[17] = l;
6534 l = h;
6535 h = o;
6536 o = 0;
6537 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
6538 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
6539 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
6540 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
6541 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
6542 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
6543 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
6544 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
6545 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
6546 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
6547 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
6548 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
6549 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
6550 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
6551 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
6552 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
6553 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
6554 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
6555 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
6556 t[18] = l;
6557 l = h;
6558 h = o;
6559 o = 0;
6560 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
6561 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
6562 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
6563 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
6564 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
6565 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
6566 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
6567 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
6568 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
6569 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
6570 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
6571 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
6572 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
6573 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
6574 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
6575 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
6576 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
6577 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
6578 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
6579 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
6580 t[19] = l;
6581 l = h;
6582 h = o;
6583 o = 0;
6584 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
6585 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
6586 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
6587 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
6588 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
6589 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
6590 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
6591 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
6592 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
6593 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
6594 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
6595 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
6596 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
6597 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
6598 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
6599 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
6600 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
6601 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
6602 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
6603 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
6604 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
6605 t[20] = l;
6606 l = h;
6607 h = o;
6608 o = 0;
6609 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
6610 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
6611 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
6612 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
6613 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
6614 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
6615 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
6616 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
6617 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
6618 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
6619 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
6620 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
6621 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
6622 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
6623 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
6624 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
6625 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
6626 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
6627 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
6628 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
6629 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
6630 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
6631 t[21] = l;
6632 l = h;
6633 h = o;
6634 o = 0;
6635 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
6636 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
6637 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
6638 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
6639 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
6640 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
6641 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
6642 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
6643 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
6644 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
6645 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
6646 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
6647 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
6648 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
6649 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
6650 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
6651 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
6652 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
6653 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
6654 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
6655 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
6656 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
6657 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
6658 t[22] = l;
6659 l = h;
6660 h = o;
6661 o = 0;
6662 SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
6663 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
6664 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
6665 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
6666 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
6667 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
6668 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
6669 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
6670 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
6671 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
6672 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
6673 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
6674 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
6675 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
6676 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
6677 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
6678 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
6679 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
6680 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
6681 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
6682 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
6683 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
6684 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
6685 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
6686 t[23] = l;
6687 l = h;
6688 h = o;
6689 o = 0;
6690 SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
6691 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
6692 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
6693 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
6694 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
6695 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
6696 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
6697 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
6698 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
6699 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
6700 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
6701 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
6702 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
6703 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
6704 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
6705 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
6706 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
6707 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
6708 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
6709 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
6710 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
6711 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
6712 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
6713 r->dp[24] = l;
6714 l = h;
6715 h = o;
6716 o = 0;
6717 SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
6718 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
6719 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
6720 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
6721 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
6722 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
6723 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
6724 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
6725 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
6726 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
6727 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
6728 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
6729 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
6730 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
6731 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
6732 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
6733 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
6734 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
6735 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
6736 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
6737 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
6738 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
6739 r->dp[25] = l;
6740 l = h;
6741 h = o;
6742 o = 0;
6743 SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
6744 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
6745 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
6746 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
6747 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
6748 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
6749 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
6750 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
6751 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
6752 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
6753 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
6754 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
6755 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
6756 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
6757 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
6758 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
6759 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
6760 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
6761 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
6762 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
6763 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
6764 r->dp[26] = l;
6765 l = h;
6766 h = o;
6767 o = 0;
6768 SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
6769 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
6770 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
6771 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
6772 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
6773 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
6774 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
6775 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
6776 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
6777 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
6778 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
6779 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
6780 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
6781 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
6782 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
6783 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
6784 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
6785 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
6786 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
6787 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
6788 r->dp[27] = l;
6789 l = h;
6790 h = o;
6791 o = 0;
6792 SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
6793 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
6794 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
6795 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
6796 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
6797 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
6798 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
6799 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
6800 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
6801 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
6802 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
6803 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
6804 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
6805 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
6806 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
6807 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
6808 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
6809 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
6810 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
6811 r->dp[28] = l;
6812 l = h;
6813 h = o;
6814 o = 0;
6815 SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
6816 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
6817 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
6818 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
6819 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
6820 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
6821 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
6822 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
6823 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
6824 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
6825 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
6826 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
6827 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
6828 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
6829 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
6830 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
6831 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
6832 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
6833 r->dp[29] = l;
6834 l = h;
6835 h = o;
6836 o = 0;
6837 SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
6838 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
6839 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
6840 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
6841 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
6842 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
6843 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
6844 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
6845 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
6846 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
6847 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
6848 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
6849 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
6850 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
6851 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
6852 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
6853 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
6854 r->dp[30] = l;
6855 l = h;
6856 h = o;
6857 o = 0;
6858 SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
6859 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
6860 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
6861 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
6862 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
6863 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
6864 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
6865 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
6866 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
6867 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
6868 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
6869 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
6870 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
6871 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
6872 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
6873 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
6874 r->dp[31] = l;
6875 l = h;
6876 h = o;
6877 o = 0;
6878 SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
6879 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
6880 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
6881 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
6882 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
6883 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
6884 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
6885 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
6886 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
6887 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
6888 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
6889 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
6890 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
6891 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
6892 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
6893 r->dp[32] = l;
6894 l = h;
6895 h = o;
6896 o = 0;
6897 SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
6898 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
6899 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
6900 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
6901 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
6902 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
6903 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
6904 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
6905 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
6906 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
6907 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
6908 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
6909 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
6910 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
6911 r->dp[33] = l;
6912 l = h;
6913 h = o;
6914 o = 0;
6915 SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
6916 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
6917 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
6918 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
6919 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
6920 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
6921 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
6922 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
6923 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
6924 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
6925 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
6926 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
6927 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
6928 r->dp[34] = l;
6929 l = h;
6930 h = o;
6931 o = 0;
6932 SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
6933 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
6934 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
6935 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
6936 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
6937 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
6938 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
6939 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
6940 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
6941 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
6942 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
6943 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
6944 r->dp[35] = l;
6945 l = h;
6946 h = o;
6947 o = 0;
6948 SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
6949 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
6950 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
6951 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
6952 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
6953 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
6954 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
6955 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
6956 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
6957 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
6958 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
6959 r->dp[36] = l;
6960 l = h;
6961 h = o;
6962 o = 0;
6963 SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
6964 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
6965 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
6966 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
6967 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
6968 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
6969 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
6970 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
6971 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
6972 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
6973 r->dp[37] = l;
6974 l = h;
6975 h = o;
6976 o = 0;
6977 SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
6978 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
6979 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
6980 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
6981 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
6982 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
6983 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
6984 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
6985 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
6986 r->dp[38] = l;
6987 l = h;
6988 h = o;
6989 o = 0;
6990 SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
6991 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
6992 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
6993 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
6994 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
6995 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
6996 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
6997 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
6998 r->dp[39] = l;
6999 l = h;
7000 h = o;
7001 o = 0;
7002 SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
7003 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
7004 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
7005 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
7006 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
7007 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
7008 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
7009 r->dp[40] = l;
7010 l = h;
7011 h = o;
7012 o = 0;
7013 SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
7014 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
7015 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
7016 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
7017 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
7018 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
7019 r->dp[41] = l;
7020 l = h;
7021 h = o;
7022 o = 0;
7023 SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
7024 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
7025 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
7026 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
7027 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
7028 r->dp[42] = l;
7029 l = h;
7030 h = o;
7031 o = 0;
7032 SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
7033 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
7034 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
7035 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
7036 r->dp[43] = l;
7037 l = h;
7038 h = o;
7039 o = 0;
7040 SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
7041 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
7042 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
7043 r->dp[44] = l;
7044 l = h;
7045 h = o;
7046 o = 0;
7047 SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
7048 SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
7049 r->dp[45] = l;
7050 l = h;
7051 h = o;
7052 SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
7053 r->dp[46] = l;
7054 r->dp[47] = h;
7055 XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
7056 r->used = 48;
7057 sp_clamp(r);
7058 }
7059
7060 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
7061 if (t != NULL) {
7062 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
7063 }
7064 #endif
7065 return err;
7066 }
7067 #endif /* SP_INT_DIGITS >= 48 */
7068
7069 #if SP_INT_DIGITS >= 64
7070 /* Multiply a by b and store in r: r = a * b
7071 *
7072 * @param [in] a SP integer to multiply.
7073 * @param [in] b SP integer to multiply.
7074 * @param [out] r SP integer result.
7075 *
7076 * @return MP_OKAY on success.
7077 * @return MP_MEM when dynamic memory allocation fails.
7078 */
_sp_mul_32(sp_int * a,sp_int * b,sp_int * r)7079 static int _sp_mul_32(sp_int* a, sp_int* b, sp_int* r)
7080 {
7081 int err = MP_OKAY;
7082 int i;
7083 sp_int_digit l;
7084 sp_int_digit h;
7085 sp_int* a1;
7086 sp_int* b1;
7087 sp_int* z0;
7088 sp_int* z1;
7089 sp_int* z2;
7090 sp_int_digit ca;
7091 sp_int_digit cb;
7092 DECL_SP_INT_ARRAY(t, 16, 2);
7093 DECL_SP_INT_ARRAY(z, 33, 2);
7094
7095 ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
7096 ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
7097 if (err == MP_OKAY) {
7098 a1 = t[0];
7099 b1 = t[1];
7100 z1 = z[0];
7101 z2 = z[1];
7102 z0 = r;
7103
7104 XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
7105 a1->used = 16;
7106 XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
7107 b1->used = 16;
7108
7109 /* z2 = a1 * b1 */
7110 err = _sp_mul_16(a1, b1, z2);
7111 }
7112 if (err == MP_OKAY) {
7113 l = a1->dp[0];
7114 h = 0;
7115 SP_ASM_ADDC(l, h, a->dp[0]);
7116 a1->dp[0] = l;
7117 l = h;
7118 h = 0;
7119 for (i = 1; i < 16; i++) {
7120 SP_ASM_ADDC(l, h, a1->dp[i]);
7121 SP_ASM_ADDC(l, h, a->dp[i]);
7122 a1->dp[i] = l;
7123 l = h;
7124 h = 0;
7125 }
7126 ca = l;
7127 /* b01 = b0 + b1 */
7128 l = b1->dp[0];
7129 h = 0;
7130 SP_ASM_ADDC(l, h, b->dp[0]);
7131 b1->dp[0] = l;
7132 l = h;
7133 h = 0;
7134 for (i = 1; i < 16; i++) {
7135 SP_ASM_ADDC(l, h, b1->dp[i]);
7136 SP_ASM_ADDC(l, h, b->dp[i]);
7137 b1->dp[i] = l;
7138 l = h;
7139 h = 0;
7140 }
7141 cb = l;
7142
7143 /* z0 = a0 * b0 */
7144 err = _sp_mul_16(a, b, z0);
7145 }
7146 if (err == MP_OKAY) {
7147 /* z1 = (a0 + a1) * (b0 + b1) */
7148 err = _sp_mul_16(a1, b1, z1);
7149 }
7150 if (err == MP_OKAY) {
7151 /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
7152 /* r = z0 */
7153 /* r += (z1 - z0 - z2) << 16 */
7154 z1->dp[32] = ca & cb;
7155 l = 0;
7156 if (ca) {
7157 h = 0;
7158 for (i = 0; i < 16; i++) {
7159 SP_ASM_ADDC(l, h, z1->dp[i + 16]);
7160 SP_ASM_ADDC(l, h, b1->dp[i]);
7161 z1->dp[i + 16] = l;
7162 l = h;
7163 h = 0;
7164 }
7165 }
7166 z1->dp[32] += l;
7167 l = 0;
7168 if (cb) {
7169 h = 0;
7170 for (i = 0; i < 16; i++) {
7171 SP_ASM_ADDC(l, h, z1->dp[i + 16]);
7172 SP_ASM_ADDC(l, h, a1->dp[i]);
7173 z1->dp[i + 16] = l;
7174 l = h;
7175 h = 0;
7176 }
7177 }
7178 z1->dp[32] += l;
7179 /* z1 = z1 - z0 - z1 */
7180 l = 0;
7181 h = 0;
7182 for (i = 0; i < 32; i++) {
7183 l += z1->dp[i];
7184 SP_ASM_SUBC(l, h, z0->dp[i]);
7185 SP_ASM_SUBC(l, h, z2->dp[i]);
7186 z1->dp[i] = l;
7187 l = h;
7188 h = 0;
7189 }
7190 z1->dp[i] += l;
7191 /* r += z1 << 16 */
7192 l = 0;
7193 h = 0;
7194 for (i = 0; i < 16; i++) {
7195 SP_ASM_ADDC(l, h, r->dp[i + 16]);
7196 SP_ASM_ADDC(l, h, z1->dp[i]);
7197 r->dp[i + 16] = l;
7198 l = h;
7199 h = 0;
7200 }
7201 for (; i < 33; i++) {
7202 SP_ASM_ADDC(l, h, z1->dp[i]);
7203 r->dp[i + 16] = l;
7204 l = h;
7205 h = 0;
7206 }
7207 /* r += z2 << 32 */
7208 l = 0;
7209 h = 0;
7210 for (i = 0; i < 17; i++) {
7211 SP_ASM_ADDC(l, h, r->dp[i + 32]);
7212 SP_ASM_ADDC(l, h, z2->dp[i]);
7213 r->dp[i + 32] = l;
7214 l = h;
7215 h = 0;
7216 }
7217 for (; i < 32; i++) {
7218 SP_ASM_ADDC(l, h, z2->dp[i]);
7219 r->dp[i + 32] = l;
7220 l = h;
7221 h = 0;
7222 }
7223 r->used = 64;
7224 sp_clamp(r);
7225 }
7226
7227 FREE_SP_INT_ARRAY(z, NULL);
7228 FREE_SP_INT_ARRAY(t, NULL);
7229 return err;
7230 }
7231 #endif /* SP_INT_DIGITS >= 64 */
7232
7233 #if SP_INT_DIGITS >= 96
7234 /* Multiply a by b and store in r: r = a * b
7235 *
7236 * @param [in] a SP integer to multiply.
7237 * @param [in] b SP integer to multiply.
7238 * @param [out] r SP integer result.
7239 *
7240 * @return MP_OKAY on success.
7241 * @return MP_MEM when dynamic memory allocation fails.
7242 */
_sp_mul_48(sp_int * a,sp_int * b,sp_int * r)7243 static int _sp_mul_48(sp_int* a, sp_int* b, sp_int* r)
7244 {
7245 int err = MP_OKAY;
7246 int i;
7247 sp_int_digit l;
7248 sp_int_digit h;
7249 sp_int* a1;
7250 sp_int* b1;
7251 sp_int* z0;
7252 sp_int* z1;
7253 sp_int* z2;
7254 sp_int_digit ca;
7255 sp_int_digit cb;
7256 DECL_SP_INT_ARRAY(t, 24, 2);
7257 DECL_SP_INT_ARRAY(z, 49, 2);
7258
7259 ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
7260 ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
7261 if (err == MP_OKAY) {
7262 a1 = t[0];
7263 b1 = t[1];
7264 z1 = z[0];
7265 z2 = z[1];
7266 z0 = r;
7267
7268 XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
7269 a1->used = 24;
7270 XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
7271 b1->used = 24;
7272
7273 /* z2 = a1 * b1 */
7274 err = _sp_mul_24(a1, b1, z2);
7275 }
7276 if (err == MP_OKAY) {
7277 l = a1->dp[0];
7278 h = 0;
7279 SP_ASM_ADDC(l, h, a->dp[0]);
7280 a1->dp[0] = l;
7281 l = h;
7282 h = 0;
7283 for (i = 1; i < 24; i++) {
7284 SP_ASM_ADDC(l, h, a1->dp[i]);
7285 SP_ASM_ADDC(l, h, a->dp[i]);
7286 a1->dp[i] = l;
7287 l = h;
7288 h = 0;
7289 }
7290 ca = l;
7291 /* b01 = b0 + b1 */
7292 l = b1->dp[0];
7293 h = 0;
7294 SP_ASM_ADDC(l, h, b->dp[0]);
7295 b1->dp[0] = l;
7296 l = h;
7297 h = 0;
7298 for (i = 1; i < 24; i++) {
7299 SP_ASM_ADDC(l, h, b1->dp[i]);
7300 SP_ASM_ADDC(l, h, b->dp[i]);
7301 b1->dp[i] = l;
7302 l = h;
7303 h = 0;
7304 }
7305 cb = l;
7306
7307 /* z0 = a0 * b0 */
7308 err = _sp_mul_24(a, b, z0);
7309 }
7310 if (err == MP_OKAY) {
7311 /* z1 = (a0 + a1) * (b0 + b1) */
7312 err = _sp_mul_24(a1, b1, z1);
7313 }
7314 if (err == MP_OKAY) {
7315 /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
7316 /* r = z0 */
7317 /* r += (z1 - z0 - z2) << 24 */
7318 z1->dp[48] = ca & cb;
7319 l = 0;
7320 if (ca) {
7321 h = 0;
7322 for (i = 0; i < 24; i++) {
7323 SP_ASM_ADDC(l, h, z1->dp[i + 24]);
7324 SP_ASM_ADDC(l, h, b1->dp[i]);
7325 z1->dp[i + 24] = l;
7326 l = h;
7327 h = 0;
7328 }
7329 }
7330 z1->dp[48] += l;
7331 l = 0;
7332 if (cb) {
7333 h = 0;
7334 for (i = 0; i < 24; i++) {
7335 SP_ASM_ADDC(l, h, z1->dp[i + 24]);
7336 SP_ASM_ADDC(l, h, a1->dp[i]);
7337 z1->dp[i + 24] = l;
7338 l = h;
7339 h = 0;
7340 }
7341 }
7342 z1->dp[48] += l;
7343 /* z1 = z1 - z0 - z1 */
7344 l = 0;
7345 h = 0;
7346 for (i = 0; i < 48; i++) {
7347 l += z1->dp[i];
7348 SP_ASM_SUBC(l, h, z0->dp[i]);
7349 SP_ASM_SUBC(l, h, z2->dp[i]);
7350 z1->dp[i] = l;
7351 l = h;
7352 h = 0;
7353 }
7354 z1->dp[i] += l;
7355 /* r += z1 << 16 */
7356 l = 0;
7357 h = 0;
7358 for (i = 0; i < 24; i++) {
7359 SP_ASM_ADDC(l, h, r->dp[i + 24]);
7360 SP_ASM_ADDC(l, h, z1->dp[i]);
7361 r->dp[i + 24] = l;
7362 l = h;
7363 h = 0;
7364 }
7365 for (; i < 49; i++) {
7366 SP_ASM_ADDC(l, h, z1->dp[i]);
7367 r->dp[i + 24] = l;
7368 l = h;
7369 h = 0;
7370 }
7371 /* r += z2 << 48 */
7372 l = 0;
7373 h = 0;
7374 for (i = 0; i < 25; i++) {
7375 SP_ASM_ADDC(l, h, r->dp[i + 48]);
7376 SP_ASM_ADDC(l, h, z2->dp[i]);
7377 r->dp[i + 48] = l;
7378 l = h;
7379 h = 0;
7380 }
7381 for (; i < 48; i++) {
7382 SP_ASM_ADDC(l, h, z2->dp[i]);
7383 r->dp[i + 48] = l;
7384 l = h;
7385 h = 0;
7386 }
7387 r->used = 96;
7388 sp_clamp(r);
7389 }
7390
7391 FREE_SP_INT_ARRAY(z, NULL);
7392 FREE_SP_INT_ARRAY(t, NULL);
7393 return err;
7394 }
7395 #endif /* SP_INT_DIGITS >= 96 */
7396
7397 #if SP_INT_DIGITS >= 128
7398 /* Multiply a by b and store in r: r = a * b
7399 *
7400 * @param [in] a SP integer to multiply.
7401 * @param [in] b SP integer to multiply.
7402 * @param [out] r SP integer result.
7403 *
7404 * @return MP_OKAY on success.
7405 * @return MP_MEM when dynamic memory allocation fails.
7406 */
_sp_mul_64(sp_int * a,sp_int * b,sp_int * r)7407 static int _sp_mul_64(sp_int* a, sp_int* b, sp_int* r)
7408 {
7409 int err = MP_OKAY;
7410 int i;
7411 sp_int_digit l;
7412 sp_int_digit h;
7413 sp_int* a1;
7414 sp_int* b1;
7415 sp_int* z0;
7416 sp_int* z1;
7417 sp_int* z2;
7418 sp_int_digit ca;
7419 sp_int_digit cb;
7420 DECL_SP_INT_ARRAY(t, 32, 2);
7421 DECL_SP_INT_ARRAY(z, 65, 2);
7422
7423 ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
7424 ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
7425 if (err == MP_OKAY) {
7426 a1 = t[0];
7427 b1 = t[1];
7428 z1 = z[0];
7429 z2 = z[1];
7430 z0 = r;
7431
7432 XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
7433 a1->used = 32;
7434 XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
7435 b1->used = 32;
7436
7437 /* z2 = a1 * b1 */
7438 err = _sp_mul_32(a1, b1, z2);
7439 }
7440 if (err == MP_OKAY) {
7441 l = a1->dp[0];
7442 h = 0;
7443 SP_ASM_ADDC(l, h, a->dp[0]);
7444 a1->dp[0] = l;
7445 l = h;
7446 h = 0;
7447 for (i = 1; i < 32; i++) {
7448 SP_ASM_ADDC(l, h, a1->dp[i]);
7449 SP_ASM_ADDC(l, h, a->dp[i]);
7450 a1->dp[i] = l;
7451 l = h;
7452 h = 0;
7453 }
7454 ca = l;
7455 /* b01 = b0 + b1 */
7456 l = b1->dp[0];
7457 h = 0;
7458 SP_ASM_ADDC(l, h, b->dp[0]);
7459 b1->dp[0] = l;
7460 l = h;
7461 h = 0;
7462 for (i = 1; i < 32; i++) {
7463 SP_ASM_ADDC(l, h, b1->dp[i]);
7464 SP_ASM_ADDC(l, h, b->dp[i]);
7465 b1->dp[i] = l;
7466 l = h;
7467 h = 0;
7468 }
7469 cb = l;
7470
7471 /* z0 = a0 * b0 */
7472 err = _sp_mul_32(a, b, z0);
7473 }
7474 if (err == MP_OKAY) {
7475 /* z1 = (a0 + a1) * (b0 + b1) */
7476 err = _sp_mul_32(a1, b1, z1);
7477 }
7478 if (err == MP_OKAY) {
7479 /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
7480 /* r = z0 */
7481 /* r += (z1 - z0 - z2) << 32 */
7482 z1->dp[64] = ca & cb;
7483 l = 0;
7484 if (ca) {
7485 h = 0;
7486 for (i = 0; i < 32; i++) {
7487 SP_ASM_ADDC(l, h, z1->dp[i + 32]);
7488 SP_ASM_ADDC(l, h, b1->dp[i]);
7489 z1->dp[i + 32] = l;
7490 l = h;
7491 h = 0;
7492 }
7493 }
7494 z1->dp[64] += l;
7495 l = 0;
7496 if (cb) {
7497 h = 0;
7498 for (i = 0; i < 32; i++) {
7499 SP_ASM_ADDC(l, h, z1->dp[i + 32]);
7500 SP_ASM_ADDC(l, h, a1->dp[i]);
7501 z1->dp[i + 32] = l;
7502 l = h;
7503 h = 0;
7504 }
7505 }
7506 z1->dp[64] += l;
7507 /* z1 = z1 - z0 - z1 */
7508 l = 0;
7509 h = 0;
7510 for (i = 0; i < 64; i++) {
7511 l += z1->dp[i];
7512 SP_ASM_SUBC(l, h, z0->dp[i]);
7513 SP_ASM_SUBC(l, h, z2->dp[i]);
7514 z1->dp[i] = l;
7515 l = h;
7516 h = 0;
7517 }
7518 z1->dp[i] += l;
7519 /* r += z1 << 16 */
7520 l = 0;
7521 h = 0;
7522 for (i = 0; i < 32; i++) {
7523 SP_ASM_ADDC(l, h, r->dp[i + 32]);
7524 SP_ASM_ADDC(l, h, z1->dp[i]);
7525 r->dp[i + 32] = l;
7526 l = h;
7527 h = 0;
7528 }
7529 for (; i < 65; i++) {
7530 SP_ASM_ADDC(l, h, z1->dp[i]);
7531 r->dp[i + 32] = l;
7532 l = h;
7533 h = 0;
7534 }
7535 /* r += z2 << 64 */
7536 l = 0;
7537 h = 0;
7538 for (i = 0; i < 33; i++) {
7539 SP_ASM_ADDC(l, h, r->dp[i + 64]);
7540 SP_ASM_ADDC(l, h, z2->dp[i]);
7541 r->dp[i + 64] = l;
7542 l = h;
7543 h = 0;
7544 }
7545 for (; i < 64; i++) {
7546 SP_ASM_ADDC(l, h, z2->dp[i]);
7547 r->dp[i + 64] = l;
7548 l = h;
7549 h = 0;
7550 }
7551 r->used = 128;
7552 sp_clamp(r);
7553 }
7554
7555 FREE_SP_INT_ARRAY(z, NULL);
7556 FREE_SP_INT_ARRAY(t, NULL);
7557 return err;
7558 }
7559 #endif /* SP_INT_DIGITS >= 128 */
7560
7561 #if SP_INT_DIGITS >= 192
7562 /* Multiply a by b and store in r: r = a * b
7563 *
7564 * @param [in] a SP integer to multiply.
7565 * @param [in] b SP integer to multiply.
7566 * @param [out] r SP integer result.
7567 *
7568 * @return MP_OKAY on success.
7569 * @return MP_MEM when dynamic memory allocation fails.
7570 */
_sp_mul_96(sp_int * a,sp_int * b,sp_int * r)7571 static int _sp_mul_96(sp_int* a, sp_int* b, sp_int* r)
7572 {
7573 int err = MP_OKAY;
7574 int i;
7575 sp_int_digit l;
7576 sp_int_digit h;
7577 sp_int* a1;
7578 sp_int* b1;
7579 sp_int* z0;
7580 sp_int* z1;
7581 sp_int* z2;
7582 sp_int_digit ca;
7583 sp_int_digit cb;
7584 DECL_SP_INT_ARRAY(t, 48, 2);
7585 DECL_SP_INT_ARRAY(z, 97, 2);
7586
7587 ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
7588 ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
7589 if (err == MP_OKAY) {
7590 a1 = t[0];
7591 b1 = t[1];
7592 z1 = z[0];
7593 z2 = z[1];
7594 z0 = r;
7595
7596 XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
7597 a1->used = 48;
7598 XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
7599 b1->used = 48;
7600
7601 /* z2 = a1 * b1 */
7602 err = _sp_mul_48(a1, b1, z2);
7603 }
7604 if (err == MP_OKAY) {
7605 l = a1->dp[0];
7606 h = 0;
7607 SP_ASM_ADDC(l, h, a->dp[0]);
7608 a1->dp[0] = l;
7609 l = h;
7610 h = 0;
7611 for (i = 1; i < 48; i++) {
7612 SP_ASM_ADDC(l, h, a1->dp[i]);
7613 SP_ASM_ADDC(l, h, a->dp[i]);
7614 a1->dp[i] = l;
7615 l = h;
7616 h = 0;
7617 }
7618 ca = l;
7619 /* b01 = b0 + b1 */
7620 l = b1->dp[0];
7621 h = 0;
7622 SP_ASM_ADDC(l, h, b->dp[0]);
7623 b1->dp[0] = l;
7624 l = h;
7625 h = 0;
7626 for (i = 1; i < 48; i++) {
7627 SP_ASM_ADDC(l, h, b1->dp[i]);
7628 SP_ASM_ADDC(l, h, b->dp[i]);
7629 b1->dp[i] = l;
7630 l = h;
7631 h = 0;
7632 }
7633 cb = l;
7634
7635 /* z0 = a0 * b0 */
7636 err = _sp_mul_48(a, b, z0);
7637 }
7638 if (err == MP_OKAY) {
7639 /* z1 = (a0 + a1) * (b0 + b1) */
7640 err = _sp_mul_48(a1, b1, z1);
7641 }
7642 if (err == MP_OKAY) {
7643 /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
7644 /* r = z0 */
7645 /* r += (z1 - z0 - z2) << 48 */
7646 z1->dp[96] = ca & cb;
7647 l = 0;
7648 if (ca) {
7649 h = 0;
7650 for (i = 0; i < 48; i++) {
7651 SP_ASM_ADDC(l, h, z1->dp[i + 48]);
7652 SP_ASM_ADDC(l, h, b1->dp[i]);
7653 z1->dp[i + 48] = l;
7654 l = h;
7655 h = 0;
7656 }
7657 }
7658 z1->dp[96] += l;
7659 l = 0;
7660 if (cb) {
7661 h = 0;
7662 for (i = 0; i < 48; i++) {
7663 SP_ASM_ADDC(l, h, z1->dp[i + 48]);
7664 SP_ASM_ADDC(l, h, a1->dp[i]);
7665 z1->dp[i + 48] = l;
7666 l = h;
7667 h = 0;
7668 }
7669 }
7670 z1->dp[96] += l;
7671 /* z1 = z1 - z0 - z1 */
7672 l = 0;
7673 h = 0;
7674 for (i = 0; i < 96; i++) {
7675 l += z1->dp[i];
7676 SP_ASM_SUBC(l, h, z0->dp[i]);
7677 SP_ASM_SUBC(l, h, z2->dp[i]);
7678 z1->dp[i] = l;
7679 l = h;
7680 h = 0;
7681 }
7682 z1->dp[i] += l;
7683 /* r += z1 << 16 */
7684 l = 0;
7685 h = 0;
7686 for (i = 0; i < 48; i++) {
7687 SP_ASM_ADDC(l, h, r->dp[i + 48]);
7688 SP_ASM_ADDC(l, h, z1->dp[i]);
7689 r->dp[i + 48] = l;
7690 l = h;
7691 h = 0;
7692 }
7693 for (; i < 97; i++) {
7694 SP_ASM_ADDC(l, h, z1->dp[i]);
7695 r->dp[i + 48] = l;
7696 l = h;
7697 h = 0;
7698 }
7699 /* r += z2 << 96 */
7700 l = 0;
7701 h = 0;
7702 for (i = 0; i < 49; i++) {
7703 SP_ASM_ADDC(l, h, r->dp[i + 96]);
7704 SP_ASM_ADDC(l, h, z2->dp[i]);
7705 r->dp[i + 96] = l;
7706 l = h;
7707 h = 0;
7708 }
7709 for (; i < 96; i++) {
7710 SP_ASM_ADDC(l, h, z2->dp[i]);
7711 r->dp[i + 96] = l;
7712 l = h;
7713 h = 0;
7714 }
7715 r->used = 192;
7716 sp_clamp(r);
7717 }
7718
7719 FREE_SP_INT_ARRAY(z, NULL);
7720 FREE_SP_INT_ARRAY(t, NULL);
7721 return err;
7722 }
7723 #endif /* SP_INT_DIGITS >= 192 */
7724
7725 #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
7726 #endif /* !WOLFSSL_SP_SMALL */
7727
7728 /* Multiply a by b and store in r: r = a * b
7729 *
7730 * @param [in] a SP integer to multiply.
7731 * @param [in] b SP integer to multiply.
7732 * @param [out] r SP integer result.
7733 *
7734 * @return MP_OKAY on success.
7735 * @return MP_VAL when a, b or is NULL; or the result will be too big for fixed
7736 * data length.
7737 * @return MP_MEM when dynamic memory allocation fails.
7738 */
sp_mul(sp_int * a,sp_int * b,sp_int * r)7739 int sp_mul(sp_int* a, sp_int* b, sp_int* r)
7740 {
7741 int err = MP_OKAY;
7742 #ifdef WOLFSSL_SP_INT_NEGATIVE
7743 int sign;
7744 #endif
7745
7746 if ((a == NULL) || (b == NULL) || (r == NULL)) {
7747 err = MP_VAL;
7748 }
7749
7750 /* Need extra digit during calculation. */
7751 if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
7752 err = MP_VAL;
7753 }
7754
7755 #if 0
7756 if (err == MP_OKAY) {
7757 sp_print(a, "a");
7758 sp_print(b, "b");
7759 }
7760 #endif
7761
7762 if (err == MP_OKAY) {
7763 #ifdef WOLFSSL_SP_INT_NEGATIVE
7764 sign = a->sign ^ b->sign;
7765 #endif
7766
7767 if ((a->used == 0) || (b->used == 0)) {
7768 _sp_zero(r);
7769 }
7770 else
7771 #ifndef WOLFSSL_SP_SMALL
7772 #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
7773 #if SP_WORD_SIZE == 64
7774 if ((a->used == 4) && (b->used == 4)) {
7775 err = _sp_mul_4(a, b, r);
7776 }
7777 else
7778 #endif /* SP_WORD_SIZE == 64 */
7779 #if SP_WORD_SIZE == 64
7780 #ifdef SQR_MUL_ASM
7781 if ((a->used == 6) && (b->used == 6)) {
7782 err = _sp_mul_6(a, b, r);
7783 }
7784 else
7785 #endif /* SQR_MUL_ASM */
7786 #endif /* SP_WORD_SIZE == 64 */
7787 #if SP_WORD_SIZE == 32
7788 #ifdef SQR_MUL_ASM
7789 if ((a->used == 8) && (b->used == 8)) {
7790 err = _sp_mul_8(a, b, r);
7791 }
7792 else
7793 #endif /* SQR_MUL_ASM */
7794 #endif /* SP_WORD_SIZE == 32 */
7795 #if SP_WORD_SIZE == 32
7796 #ifdef SQR_MUL_ASM
7797 if ((a->used == 12) && (b->used == 12)) {
7798 err = _sp_mul_12(a, b, r);
7799 }
7800 else
7801 #endif /* SQR_MUL_ASM */
7802 #endif /* SP_WORD_SIZE == 32 */
7803 #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
7804 #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
7805 #if SP_INT_DIGITS >= 32
7806 if ((a->used == 16) && (b->used == 16)) {
7807 err = _sp_mul_16(a, b, r);
7808 }
7809 else
7810 #endif /* SP_INT_DIGITS >= 32 */
7811 #if SP_INT_DIGITS >= 48
7812 if ((a->used == 24) && (b->used == 24)) {
7813 err = _sp_mul_24(a, b, r);
7814 }
7815 else
7816 #endif /* SP_INT_DIGITS >= 48 */
7817 #if SP_INT_DIGITS >= 64
7818 if ((a->used == 32) && (b->used == 32)) {
7819 err = _sp_mul_32(a, b, r);
7820 }
7821 else
7822 #endif /* SP_INT_DIGITS >= 64 */
7823 #if SP_INT_DIGITS >= 96
7824 if ((a->used == 48) && (b->used == 48)) {
7825 err = _sp_mul_48(a, b, r);
7826 }
7827 else
7828 #endif /* SP_INT_DIGITS >= 96 */
7829 #if SP_INT_DIGITS >= 128
7830 if ((a->used == 64) && (b->used == 64)) {
7831 err = _sp_mul_64(a, b, r);
7832 }
7833 else
7834 #endif /* SP_INT_DIGITS >= 128 */
7835 #if SP_INT_DIGITS >= 192
7836 if ((a->used == 96) && (b->used == 96)) {
7837 err = _sp_mul_96(a, b, r);
7838 }
7839 else
7840 #endif /* SP_INT_DIGITS >= 192 */
7841 #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
7842 #endif /* !WOLFSSL_SP_SMALL */
7843
7844 #ifdef SQR_MUL_ASM
7845 if (a->used == b->used) {
7846 err = _sp_mul_nxn(a, b, r);
7847 }
7848 else
7849 #endif
7850 {
7851 err = _sp_mul(a, b, r);
7852 }
7853 }
7854
7855 #ifdef WOLFSSL_SP_INT_NEGATIVE
7856 if (err == MP_OKAY) {
7857 r->sign = (r->used == 0) ? MP_ZPOS : sign;
7858 }
7859 #endif
7860
7861 #if 0
7862 if (err == MP_OKAY) {
7863 sp_print(r, "rmul");
7864 }
7865 #endif
7866
7867 return err;
7868 }
7869 /* END SP_MUL implementations. */
7870
7871 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
7872 defined(WOLFCRYPT_HAVE_ECCSI) || \
7873 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
7874 /* Multiply a by b mod m and store in r: r = (a * b) mod m
7875 *
7876 * @param [in] a SP integer to multiply.
7877 * @param [in] b SP integer to multiply.
7878 * @param [in] m SP integer that is the modulus.
7879 * @param [out] r SP integer result.
7880 *
7881 * @return MP_OKAY on success.
7882 * @return MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
7883 * fixed data length.
7884 * @return MP_MEM when dynamic memory allocation fails.
7885 */
sp_mulmod(sp_int * a,sp_int * b,sp_int * m,sp_int * r)7886 int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r)
7887 {
7888 int err = MP_OKAY;
7889 DECL_SP_INT(t, ((a == NULL) || (b == NULL)) ? 1 : a->used + b->used);
7890
7891 if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
7892 err = MP_VAL;
7893 }
7894 if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
7895 err = MP_VAL;
7896 }
7897
7898 ALLOC_SP_INT(t, a->used + b->used, err, NULL);
7899 if (err == MP_OKAY) {
7900 err = sp_init_size(t, a->used + b->used);
7901 }
7902 if (err == MP_OKAY) {
7903 err = sp_mul(a, b, t);
7904 }
7905 if (err == MP_OKAY) {
7906 err = sp_mod(t, m, r);
7907 }
7908
7909 FREE_SP_INT(t, NULL);
7910 return err;
7911 }
7912 #endif
7913
7914 #ifdef WOLFSSL_SP_INVMOD
7915 /* Calculates the multiplicative inverse in the field.
7916 *
7917 * @param [in] a SP integer to find inverse of.
7918 * @param [in] m SP integer this is the modulus.
7919 * @param [out] r SP integer to hold result.
7920 *
7921 * @return MP_OKAY on success.
7922 * @return MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
7923 * m is negative.
7924 * @return MP_MEM when dynamic memory allocation fails.
7925 */
sp_invmod(sp_int * a,sp_int * m,sp_int * r)7926 int sp_invmod(sp_int* a, sp_int* m, sp_int* r)
7927 {
7928 int err = MP_OKAY;
7929 sp_int* u;
7930 sp_int* v;
7931 sp_int* b;
7932 sp_int* c;
7933 int used = ((m == NULL) || (a == NULL)) ? 1 :
7934 ((m->used >= a->used) ? m->used + 1 : a->used + 1);
7935 DECL_SP_INT_ARRAY(t, used, 4);
7936
7937 if ((a == NULL) || (m == NULL) || (r == NULL)) {
7938 err = MP_VAL;
7939 }
7940
7941 #ifdef WOLFSSL_SP_INT_NEGATIVE
7942 if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
7943 err = MP_VAL;
7944 }
7945 #endif
7946
7947 ALLOC_SP_INT_ARRAY(t, (m == NULL) ? 0 : m->used + 1, 4, err, NULL);
7948 if (err == MP_OKAY) {
7949 u = t[0];
7950 v = t[1];
7951 b = t[2];
7952 c = t[3];
7953 sp_init_size(v, used + 1);
7954
7955 if (_sp_cmp_abs(a, m) != MP_LT) {
7956 err = sp_mod(a, m, v);
7957 a = v;
7958 }
7959 }
7960
7961 #ifdef WOLFSSL_SP_INT_NEGATIVE
7962 if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
7963 /* Make 'a' positive */
7964 err = sp_add(m, a, v);
7965 a = v;
7966 }
7967 #endif
7968
7969 /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
7970 if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
7971 err = MP_VAL;
7972 }
7973 /* r*2*x != n*2*y + 1 for integer x,y */
7974 if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
7975 err = MP_VAL;
7976 }
7977
7978 /* 1*1 = 0*m + 1 */
7979 if ((err == MP_OKAY) && sp_isone(a)) {
7980 sp_set(r, 1);
7981 }
7982 else if (err != MP_OKAY) {
7983 }
7984 else if (sp_iseven(m)) {
7985 /* a^-1 mod m = m + (1 - m*(m^-1 % a)) / a
7986 * = m - (m*(m^-1 % a) - 1) / a
7987 */
7988 err = sp_invmod(m, a, r);
7989 if (err == MP_OKAY) {
7990 err = sp_mul(r, m, r);
7991 }
7992 if (err == MP_OKAY) {
7993 _sp_sub_d(r, 1, r);
7994 err = sp_div(r, a, r, NULL);
7995 if (err == MP_OKAY) {
7996 sp_sub(m, r, r);
7997 }
7998 }
7999 }
8000 else {
8001 sp_init_size(u, m->used + 1);
8002 sp_init_size(b, m->used + 1);
8003 sp_init_size(c, m->used + 1);
8004
8005 sp_copy(m, u);
8006 sp_copy(a, v);
8007 _sp_zero(b);
8008 sp_set(c, 1);
8009
8010 while (!sp_isone(v) && !sp_iszero(u)) {
8011 if (sp_iseven(u)) {
8012 sp_div_2(u, u);
8013 if (sp_isodd(b)) {
8014 sp_add(b, m, b);
8015 }
8016 sp_div_2(b, b);
8017 }
8018 else if (sp_iseven(v)) {
8019 sp_div_2(v, v);
8020 if (sp_isodd(c)) {
8021 sp_add(c, m, c);
8022 }
8023 sp_div_2(c, c);
8024 }
8025 else if (_sp_cmp(u, v) != MP_LT) {
8026 sp_sub(u, v, u);
8027 if (_sp_cmp(b, c) == MP_LT) {
8028 sp_add(b, m, b);
8029 }
8030 sp_sub(b, c, b);
8031 }
8032 else {
8033 sp_sub(v, u, v);
8034 if (_sp_cmp(c, b) == MP_LT) {
8035 sp_add(c, m, c);
8036 }
8037 sp_sub(c, b, c);
8038 }
8039 }
8040 if (sp_iszero(u)) {
8041 err = MP_VAL;
8042 }
8043 else {
8044 err = sp_copy(c, r);
8045 }
8046 }
8047
8048 FREE_SP_INT_ARRAY(t, NULL);
8049 return err;
8050 }
8051 #endif /* WOLFSSL_SP_INVMOD */
8052
8053 #ifdef WOLFSSL_SP_INVMOD_MONT_CT
8054
8055 #define CT_INV_MOD_PRE_CNT 8
8056
8057 /* Calculates the multiplicative inverse in the field - constant time.
8058 *
8059 * Modulus (m) must be a prime and greater than 2.
8060 *
8061 * @param [in] a SP integer, Montgomery form, to find inverse of.
8062 * @param [in] m SP integer this is the modulus.
8063 * @param [out] r SP integer to hold result.
8064 * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
8065 *
8066 * @return MP_OKAY on success.
8067 * @return MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
8068 * @return MP_MEM when dynamic memory allocation fails.
8069 */
sp_invmod_mont_ct(sp_int * a,sp_int * m,sp_int * r,sp_int_digit mp)8070 int sp_invmod_mont_ct(sp_int* a, sp_int* m, sp_int* r, sp_int_digit mp)
8071 {
8072 int err = MP_OKAY;
8073 int i;
8074 int j;
8075 sp_int* t;
8076 sp_int* e;
8077 DECL_SP_INT_ARRAY(pre, (m == NULL) ? 1 : m->used * 2 + 1,
8078 CT_INV_MOD_PRE_CNT + 2);
8079
8080 if ((a == NULL) || (m == NULL) || (r == NULL)) {
8081 err = MP_VAL;
8082 }
8083
8084 /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
8085 if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
8086 (m->used == 1 && m->dp[0] < 3))) {
8087 err = MP_VAL;
8088 }
8089
8090 ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL);
8091 if (err == MP_OKAY) {
8092 t = pre[CT_INV_MOD_PRE_CNT + 0];
8093 e = pre[CT_INV_MOD_PRE_CNT + 1];
8094 sp_init_size(t, m->used * 2 + 1);
8095 sp_init_size(e, m->used * 2 + 1);
8096
8097 sp_init_size(pre[0], m->used * 2 + 1);
8098 err = sp_copy(a, pre[0]);
8099 for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
8100 sp_init_size(pre[i], m->used * 2 + 1);
8101 err = sp_sqr(pre[i-1], pre[i]);
8102 if (err == MP_OKAY) {
8103 err = _sp_mont_red(pre[i], m, mp);
8104 }
8105 if (err == MP_OKAY) {
8106 err = sp_mul(pre[i], a, pre[i]);
8107 }
8108 if (err == MP_OKAY) {
8109 err = _sp_mont_red(pre[i], m, mp);
8110 }
8111 }
8112 }
8113
8114 if (err == MP_OKAY) {
8115 _sp_sub_d(m, 2, e);
8116 for (i = sp_count_bits(e)-1, j = 0; i >= 0; i--, j++) {
8117 if ((!sp_is_bit_set(e, i)) || (j == CT_INV_MOD_PRE_CNT)) {
8118 break;
8119 }
8120 }
8121 err = sp_copy(pre[j-1], t);
8122 for (j = 0; (err == MP_OKAY) && (i >= 0); i--) {
8123 int set = sp_is_bit_set(e, i);
8124
8125 if ((j == CT_INV_MOD_PRE_CNT) || ((!set) && j > 0)) {
8126 err = sp_mul(t, pre[j-1], t);
8127 if (err == MP_OKAY) {
8128 err = _sp_mont_red(t, m, mp);
8129 }
8130 j = 0;
8131 }
8132 if (err == MP_OKAY) {
8133 err = sp_sqr(t, t);
8134 if (err == MP_OKAY) {
8135 err = _sp_mont_red(t, m, mp);
8136 }
8137 }
8138 j += set;
8139 }
8140 }
8141 if (err == MP_OKAY) {
8142 if (j > 0) {
8143 err = sp_mul(t, pre[j-1], r);
8144 if (err == MP_OKAY) {
8145 err = _sp_mont_red(r, m, mp);
8146 }
8147 }
8148 else {
8149 err = sp_copy(t, r);
8150 }
8151 }
8152
8153 FREE_SP_INT_ARRAY(pre, NULL);
8154 return err;
8155 }
8156
8157 #endif /* WOLFSSL_SP_INVMOD_MONT_CT */
8158
8159
8160 /**************************
8161 * Exponentiation functions
8162 **************************/
8163
8164 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8165 !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || defined(WOLFSSL_HAVE_SP_DH)
8166 /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
8167 * Process the exponent one bit at a time.
8168 * Is constant time and can be cache attack resistant.
8169 *
8170 * @param [in] b SP integer that is the base.
8171 * @param [in] e SP integer that is the exponent.
8172 * @param [in] bits Number of bits in base to use. May be greater than
8173 * count of bits in b.
8174 * @param [in] m SP integer that is the modulus.
8175 * @param [out] r SP integer to hold result.
8176 *
8177 * @return MP_OKAY on success.
8178 * @return MP_MEM when dynamic memory allocation fails.
8179 */
_sp_exptmod_ex(sp_int * b,sp_int * e,int bits,sp_int * m,sp_int * r)8180 static int _sp_exptmod_ex(sp_int* b, sp_int* e, int bits, sp_int* m, sp_int* r)
8181 {
8182 int i;
8183 int err = MP_OKAY;
8184 int done = 0;
8185 int j;
8186 int y;
8187 int seenTopBit = 0;
8188 #ifdef WC_NO_CACHE_RESISTANT
8189 DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
8190 #else
8191 DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
8192 #endif
8193
8194 #ifdef WC_NO_CACHE_RESISTANT
8195 ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
8196 #else
8197 ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL);
8198 #endif
8199 if (err == MP_OKAY) {
8200 sp_init_size(t[0], 2 * m->used + 1);
8201 sp_init_size(t[1], 2 * m->used + 1);
8202 #ifndef WC_NO_CACHE_RESISTANT
8203 sp_init_size(t[2], 2 * m->used + 1);
8204 #endif
8205
8206 /* Ensure base is less than exponent. */
8207 if (_sp_cmp_abs(b, m) != MP_LT) {
8208 err = sp_mod(b, m, t[0]);
8209 if ((err == MP_OKAY) && sp_iszero(t[0])) {
8210 sp_set(r, 0);
8211 done = 1;
8212 }
8213 }
8214 else {
8215 err = sp_copy(b, t[0]);
8216 }
8217 }
8218
8219 if ((!done) && (err == MP_OKAY)) {
8220 /* t[0] is dummy value and t[1] is result */
8221 err = sp_copy(t[0], t[1]);
8222
8223 for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
8224 #ifdef WC_NO_CACHE_RESISTANT
8225 /* Square real result if seen the top bit. */
8226 err = sp_sqrmod(t[seenTopBit], m, t[seenTopBit]);
8227 if (err == MP_OKAY) {
8228 y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
8229 j = y & seenTopBit;
8230 seenTopBit |= y;
8231 /* Multiply real result if bit is set and seen the top bit. */
8232 err = sp_mulmod(t[j], b, m, t[j]);
8233 }
8234 #else
8235 /* Square real result if seen the top bit. */
8236 sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
8237 ((size_t)t[1] & sp_off_on_addr[seenTopBit ])),
8238 t[2]);
8239 err = sp_sqrmod(t[2], m, t[2]);
8240 sp_copy(t[2],
8241 (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
8242 ((size_t)t[1] & sp_off_on_addr[seenTopBit ])));
8243 if (err == MP_OKAY) {
8244 y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
8245 j = y & seenTopBit;
8246 seenTopBit |= y;
8247 /* Multiply real result if bit is set and seen the top bit. */
8248 sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
8249 ((size_t)t[1] & sp_off_on_addr[j ])),
8250 t[2]);
8251 err = sp_mulmod(t[2], b, m, t[2]);
8252 sp_copy(t[2],
8253 (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
8254 ((size_t)t[1] & sp_off_on_addr[j ])));
8255 }
8256 #endif
8257 }
8258 }
8259 if ((!done) && (err == MP_OKAY)) {
8260 err = sp_copy(t[1], r);
8261 }
8262
8263 FREE_SP_INT_ARRAY(t, NULL);
8264 return err;
8265 }
8266 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
8267 * WOLFSSL_HAVE_SP_DH */
8268
8269 #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
8270 !defined(WOLFSSL_RSA_PUBLIC_ONLY)
8271 #ifndef WC_NO_HARDEN
8272 #if !defined(WC_NO_CACHE_RESISTANT)
8273 /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
8274 * Process the exponent one bit at a time with base in montgomery form.
8275 * Is constant time and cache attack resistant.
8276 *
8277 * @param [in] b SP integer that is the base.
8278 * @param [in] e SP integer that is the exponent.
8279 * @param [in] bits Number of bits in base to use. May be greater than
8280 * count of bits in b.
8281 * @param [in] m SP integer that is the modulus.
8282 * @param [out] r SP integer to hold result.
8283 *
8284 * @return MP_OKAY on success.
8285 * @return MP_MEM when dynamic memory allocation fails.
8286 */
_sp_exptmod_mont_ex(sp_int * b,sp_int * e,int bits,sp_int * m,sp_int * r)8287 static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
8288 sp_int* r)
8289 {
8290 int i;
8291 int err = MP_OKAY;
8292 int done = 0;
8293 int j;
8294 int y;
8295 int seenTopBit = 0;
8296 sp_int_digit mp;
8297 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
8298
8299 ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
8300 if (err == MP_OKAY) {
8301 sp_init_size(t[0], m->used * 2 + 1);
8302 sp_init_size(t[1], m->used * 2 + 1);
8303 sp_init_size(t[2], m->used * 2 + 1);
8304 sp_init_size(t[3], m->used * 2 + 1);
8305
8306 /* Ensure base is less than exponent. */
8307 if (_sp_cmp_abs(b, m) != MP_LT) {
8308 err = sp_mod(b, m, t[0]);
8309 if ((err == MP_OKAY) && sp_iszero(t[0])) {
8310 sp_set(r, 0);
8311 done = 1;
8312 }
8313 }
8314 else {
8315 err = sp_copy(b, t[0]);
8316 }
8317 }
8318
8319
8320 if ((!done) && (err == MP_OKAY)) {
8321 err = sp_mont_setup(m, &mp);
8322 if (err == MP_OKAY) {
8323 err = sp_mont_norm(t[1], m);
8324 }
8325 if (err == MP_OKAY) {
8326 /* Convert to montgomery form. */
8327 err = sp_mulmod(t[0], t[1], m, t[0]);
8328 }
8329 if (err == MP_OKAY) {
8330 /* t[0] is fake working value and t[1] is real working value. */
8331 sp_copy(t[0], t[1]);
8332 /* Montgomert form of base to multiply by. */
8333 sp_copy(t[0], t[2]);
8334 }
8335
8336 for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
8337 /* Square real working value if seen the top bit. */
8338 sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
8339 ((size_t)t[1] & sp_off_on_addr[seenTopBit ])),
8340 t[3]);
8341 err = sp_sqr(t[3], t[3]);
8342 if (err == MP_OKAY) {
8343 err = _sp_mont_red(t[3], m, mp);
8344 }
8345 sp_copy(t[3],
8346 (sp_int*)(((size_t)t[0] & sp_off_on_addr[seenTopBit^1]) +
8347 ((size_t)t[1] & sp_off_on_addr[seenTopBit ])));
8348 if (err == MP_OKAY) {
8349 y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
8350 j = y & seenTopBit;
8351 seenTopBit |= y;
8352 /* Multiply real value if bit is set and seen the top bit. */
8353 sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
8354 ((size_t)t[1] & sp_off_on_addr[j ])),
8355 t[3]);
8356 err = sp_mul(t[3], t[2], t[3]);
8357 if (err == MP_OKAY) {
8358 err = _sp_mont_red(t[3], m, mp);
8359 }
8360 sp_copy(t[3],
8361 (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
8362 ((size_t)t[1] & sp_off_on_addr[j ])));
8363 }
8364 }
8365 if (err == MP_OKAY) {
8366 /* Convert from montgomery form. */
8367 err = _sp_mont_red(t[1], m, mp);
8368 /* Reduction implementation returns number to range < m. */
8369 }
8370 }
8371 if ((!done) && (err == MP_OKAY)) {
8372 err = sp_copy(t[1], r);
8373 }
8374
8375 FREE_SP_INT_ARRAY(t, NULL);
8376 return err;
8377 }
8378 #else
8379
8380 /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
8381 #define SP_ALLOC
8382
8383 /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
8384 * Creates a window of precalculated exponents with base in montgomery form.
8385 * Is constant time but NOT cache attack resistant.
8386 *
8387 * @param [in] b SP integer that is the base.
8388 * @param [in] e SP integer that is the exponent.
8389 * @param [in] bits Number of bits in base to use. May be greater than
8390 * count of bits in b.
8391 * @param [in] m SP integer that is the modulus.
8392 * @param [out] r SP integer to hold result.
8393 *
8394 * @return MP_OKAY on success.
8395 * @return MP_MEM when dynamic memory allocation fails.
8396 */
_sp_exptmod_mont_ex(sp_int * b,sp_int * e,int bits,sp_int * m,sp_int * r)8397 static int _sp_exptmod_mont_ex(sp_int* b, sp_int* e, int bits, sp_int* m,
8398 sp_int* r)
8399 {
8400 int i;
8401 int j;
8402 int c;
8403 int y;
8404 int winBits;
8405 int preCnt;
8406 int err = MP_OKAY;
8407 int done = 0;
8408 sp_int_digit mp;
8409 sp_int_digit n;
8410 sp_int_digit mask;
8411 sp_int* tr = NULL;
8412 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
8413
8414 if (bits > 450) {
8415 winBits = 6;
8416 }
8417 else if (bits <= 21) {
8418 winBits = 1;
8419 }
8420 else if (bits <= 36) {
8421 winBits = 3;
8422 }
8423 else if (bits <= 140) {
8424 winBits = 4;
8425 }
8426 else {
8427 winBits = 5;
8428 }
8429 preCnt = 1 << winBits;
8430 mask = preCnt - 1;
8431
8432 ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
8433 if (err == MP_OKAY) {
8434 tr = t[preCnt];
8435
8436 for (i = 0; i < preCnt; i++) {
8437 sp_init_size(t[i], m->used * 2 + 1);
8438 }
8439 sp_init_size(tr, m->used * 2 + 1);
8440
8441 /* Ensure base is less than exponent. */
8442 if (_sp_cmp_abs(b, m) != MP_LT) {
8443 err = sp_mod(b, m, t[1]);
8444 if ((err == MP_OKAY) && sp_iszero(t[1])) {
8445 sp_set(r, 0);
8446 done = 1;
8447 }
8448 }
8449 else {
8450 err = sp_copy(b, t[1]);
8451 }
8452 }
8453
8454 if ((!done) && (err == MP_OKAY)) {
8455 err = sp_mont_setup(m, &mp);
8456 if (err == MP_OKAY) {
8457 /* Norm value is 1 in montgomery form. */
8458 err = sp_mont_norm(t[0], m);
8459 }
8460 if (err == MP_OKAY) {
8461 /* Convert base to montgomery form. */
8462 err = sp_mulmod(t[1], t[0], m, t[1]);
8463 }
8464
8465 /* Pre-calculate values */
8466 for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
8467 if ((i & 1) == 0) {
8468 err = sp_sqr(t[i/2], t[i]);
8469 }
8470 else {
8471 err = sp_mul(t[i-1], t[1], t[i]);
8472 }
8473 if (err == MP_OKAY) {
8474 err = _sp_mont_red(t[i], m, mp);
8475 }
8476 }
8477
8478 if (err == MP_OKAY) {
8479 /* Bits from the top that - possibly left over. */
8480 i = (bits - 1) >> SP_WORD_SHIFT;
8481 n = e->dp[i--];
8482 c = bits & (SP_WORD_SIZE - 1);
8483 if (c == 0) {
8484 c = SP_WORD_SIZE;
8485 }
8486 c -= bits % winBits;
8487 y = (int)(n >> c);
8488 n <<= SP_WORD_SIZE - c;
8489 /* Copy window number for top bits. */
8490 sp_copy(t[y], tr);
8491 for (; (i >= 0) || (c >= winBits); ) {
8492 if (c == 0) {
8493 /* Bits up to end of digit */
8494 n = e->dp[i--];
8495 y = (int)(n >> (SP_WORD_SIZE - winBits));
8496 n <<= winBits;
8497 c = SP_WORD_SIZE - winBits;
8498 }
8499 else if (c < winBits) {
8500 /* Bits to end of digit and part of next */
8501 y = (int)(n >> (SP_WORD_SIZE - winBits));
8502 n = e->dp[i--];
8503 c = winBits - c;
8504 y |= (int)(n >> (SP_WORD_SIZE - c));
8505 n <<= c;
8506 c = SP_WORD_SIZE - c;
8507 }
8508 else {
8509 /* Bits from middle of digit */
8510 y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
8511 n <<= winBits;
8512 c -= winBits;
8513 }
8514
8515 /* Square for number of bits in window. */
8516 for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
8517 err = sp_sqr(tr, tr);
8518 if (err == MP_OKAY) {
8519 err = _sp_mont_red(tr, m, mp);
8520 }
8521 }
8522 /* Multiply by window number for next set of bits. */
8523 if (err == MP_OKAY) {
8524 err = sp_mul(tr, t[y], tr);
8525 }
8526 if (err == MP_OKAY) {
8527 err = _sp_mont_red(tr, m, mp);
8528 }
8529 }
8530 }
8531
8532 if (err == MP_OKAY) {
8533 /* Convert from montgomery form. */
8534 err = _sp_mont_red(tr, m, mp);
8535 /* Reduction implementation returns number to range < m. */
8536 }
8537 }
8538 if ((!done) && (err == MP_OKAY)) {
8539 err = sp_copy(tr, r);
8540 }
8541
8542 FREE_SP_INT_ARRAY(t, NULL);
8543 return err;
8544 }
8545
8546 #undef SP_ALLOC
8547
8548 #endif /* !WC_NO_CACHE_RESISTANT */
8549 #endif /* !WC_NO_HARDEN */
8550
8551 #if SP_WORD_SIZE <= 16
8552 #define EXP2_WINSIZE 2
8553 #elif SP_WORD_SIZE <= 32
8554 #define EXP2_WINSIZE 3
8555 #elif SP_WORD_SIZE <= 64
8556 #define EXP2_WINSIZE 4
8557 #elif SP_WORD_SIZE <= 128
8558 #define EXP2_WINSIZE 5
8559 #endif
8560
8561 /* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
8562 * Is constant time and cache attack resistant.
8563 *
8564 * @param [in] e SP integer that is the exponent.
8565 * @param [in] digits Number of digits in base to use. May be greater than
8566 * count of bits in b.
8567 * @param [in] m SP integer that is the modulus.
8568 * @param [out] r SP integer to hold result.
8569 *
8570 * @return MP_OKAY on success.
8571 * @return MP_MEM when dynamic memory allocation fails.
8572 */
_sp_exptmod_base_2(sp_int * e,int digits,sp_int * m,sp_int * r)8573 static int _sp_exptmod_base_2(sp_int* e, int digits, sp_int* m, sp_int* r)
8574 {
8575 int i = 0;
8576 int j;
8577 int c = 0;
8578 int y;
8579 int err = MP_OKAY;
8580 sp_int* t = NULL;
8581 sp_int* tr = NULL;
8582 sp_int_digit mp = 0, n = 0;
8583 DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
8584
8585 if (0) {
8586 sp_print_int(2, "a");
8587 sp_print(e, "b");
8588 sp_print(m, "m");
8589 }
8590
8591 ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL);
8592 if (err == MP_OKAY) {
8593 t = d[0];
8594 tr = d[1];
8595
8596 sp_init_size(t, m->used * 2 + 1);
8597 sp_init_size(tr, m->used * 2 + 1);
8598
8599 if (m->used > 1) {
8600 err = sp_mont_setup(m, &mp);
8601 if (err == MP_OKAY) {
8602 /* Norm value is 1 in montgomery form. */
8603 err = sp_mont_norm(tr, m);
8604 }
8605 if (err == MP_OKAY) {
8606 err = sp_mul_2d(m, 1 << EXP2_WINSIZE, t);
8607 }
8608 }
8609 else {
8610 err = sp_set(tr, 1);
8611 }
8612
8613 if (err == MP_OKAY) {
8614 /* Bits from the top. */
8615 i = digits - 1;
8616 n = e->dp[i--];
8617 c = SP_WORD_SIZE;
8618 #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
8619 c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
8620 if (c != SP_WORD_SIZE) {
8621 y = (int)(n >> c);
8622 n <<= SP_WORD_SIZE - c;
8623 }
8624 else
8625 #endif
8626 {
8627 y = 0;
8628 }
8629
8630 /* Multiply montgomery representation of 1 by 2 ^ top */
8631 err = sp_mul_2d(tr, y, tr);
8632 }
8633 if ((err == MP_OKAY) && (m->used > 1)) {
8634 err = sp_add(tr, t, tr);
8635 }
8636 if (err == MP_OKAY) {
8637 err = sp_mod(tr, m, tr);
8638 }
8639 if (err == MP_OKAY) {
8640 for (; (i >= 0) || (c >= EXP2_WINSIZE); ) {
8641 if (c == 0) {
8642 /* Bits up to end of digit */
8643 n = e->dp[i--];
8644 y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
8645 n <<= EXP2_WINSIZE;
8646 c = SP_WORD_SIZE - EXP2_WINSIZE;
8647 }
8648 #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
8649 else if (c < EXP2_WINSIZE) {
8650 /* Bits to end of digit and part of next */
8651 y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
8652 n = e->dp[i--];
8653 c = EXP2_WINSIZE - c;
8654 y |= (int)(n >> (SP_WORD_SIZE - c));
8655 n <<= c;
8656 c = SP_WORD_SIZE - c;
8657 }
8658 #endif
8659 else {
8660 /* Bits from middle of digit */
8661 y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) &
8662 ((1 << EXP2_WINSIZE) - 1));
8663 n <<= EXP2_WINSIZE;
8664 c -= EXP2_WINSIZE;
8665 }
8666
8667 /* Square for number of bits in window. */
8668 for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
8669 err = sp_sqr(tr, tr);
8670 if (err != MP_OKAY) {
8671 break;
8672 }
8673 if (m->used > 1) {
8674 err = _sp_mont_red(tr, m, mp);
8675 }
8676 else {
8677 err = sp_mod(tr, m, tr);
8678 }
8679 }
8680
8681 if (err == MP_OKAY) {
8682 /* then multiply by 2^y */
8683 err = sp_mul_2d(tr, y, tr);
8684 }
8685 if ((err == MP_OKAY) && (m->used > 1)) {
8686 /* Add in value to make mod operation take same time */
8687 err = sp_add(tr, t, tr);
8688 }
8689 if (err == MP_OKAY) {
8690 err = sp_mod(tr, m, tr);
8691 }
8692 if (err != MP_OKAY) {
8693 break;
8694 }
8695 }
8696 }
8697
8698 if ((err == MP_OKAY) && (m->used > 1)) {
8699 /* Convert from montgomery form. */
8700 err = _sp_mont_red(tr, m, mp);
8701 /* Reduction implementation returns number to range < m. */
8702 }
8703 }
8704 if (err == MP_OKAY) {
8705 err = sp_copy(tr, r);
8706 }
8707
8708 if (0) {
8709 sp_print(r, "rme");
8710 }
8711
8712 FREE_SP_INT_ARRAY(d, NULL);
8713 return err;
8714 }
8715 #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
8716
8717 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
8718 defined(WOLFSSL_HAVE_SP_DH) || \
8719 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
8720 /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
8721 *
8722 * @param [in] b SP integer that is the base.
8723 * @param [in] e SP integer that is the exponent.
8724 * @param [in] bits Number of bits in base to use. May be greater than
8725 * count of bits in b.
8726 * @param [in] m SP integer that is the modulus.
8727 * @param [out] r SP integer to hold result.
8728 *
8729 * @return MP_OKAY on success.
8730 * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
8731 * @return MP_MEM when dynamic memory allocation fails.
8732 */
sp_exptmod_ex(sp_int * b,sp_int * e,int digits,sp_int * m,sp_int * r)8733 int sp_exptmod_ex(sp_int* b, sp_int* e, int digits, sp_int* m, sp_int* r)
8734 {
8735 int err = MP_OKAY;
8736 int done = 0;
8737 int mBits = sp_count_bits(m);
8738 int bBits = sp_count_bits(b);
8739 int eBits = sp_count_bits(e);
8740
8741 if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
8742 err = MP_VAL;
8743 }
8744
8745 #if 0
8746 if (err == MP_OKAY) {
8747 sp_print(b, "a");
8748 sp_print(e, "b");
8749 sp_print(m, "m");
8750 }
8751 #endif
8752
8753 if (err != MP_OKAY) {
8754 }
8755 /* Handle special cases. */
8756 else if (sp_iszero(m)) {
8757 err = MP_VAL;
8758 }
8759 #ifdef WOLFSSL_SP_INT_NEGATIVE
8760 else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
8761 err = MP_VAL;
8762 }
8763 #endif
8764 else if (sp_isone(m)) {
8765 sp_set(r, 0);
8766 done = 1;
8767 }
8768 else if (sp_iszero(e)) {
8769 sp_set(r, 1);
8770 done = 1;
8771 }
8772 else if (sp_iszero(b)) {
8773 sp_set(r, 0);
8774 done = 1;
8775 }
8776 /* Ensure SP integers have space for intermediate values. */
8777 else if (m->used * 2 >= r->size) {
8778 err = MP_VAL;
8779 }
8780
8781 if ((!done) && (err == MP_OKAY)) {
8782 /* Use code optimized for specific sizes if possible */
8783 #if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
8784 (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))
8785 #ifndef WOLFSSL_SP_NO_2048
8786 if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
8787 (eBits <= 1024)) {
8788 err = sp_ModExp_1024(b, e, m, r);
8789 done = 1;
8790 }
8791 else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
8792 (eBits <= 2048)) {
8793 err = sp_ModExp_2048(b, e, m, r);
8794 done = 1;
8795 }
8796 else
8797 #endif
8798 #ifndef WOLFSSL_SP_NO_3072
8799 if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
8800 (eBits <= 1536)) {
8801 err = sp_ModExp_1536(b, e, m, r);
8802 done = 1;
8803 }
8804 else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
8805 (eBits <= 3072)) {
8806 err = sp_ModExp_3072(b, e, m, r);
8807 done = 1;
8808 }
8809 else
8810 #endif
8811 #ifdef WOLFSSL_SP_4096
8812 if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
8813 (eBits <= 4096)) {
8814 err = sp_ModExp_4096(b, e, m, r);
8815 done = 1;
8816 }
8817 else
8818 #endif
8819 #endif
8820 {
8821 }
8822 }
8823 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
8824 #if defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)
8825 if ((!done) && (err == MP_OKAY))
8826 err = sp_exptmod_nct(b, e, m, r);
8827 }
8828 #else
8829 #if defined(WOLFSSL_SP_MATH_ALL)
8830 if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
8831 mp_isodd(m)) {
8832 /* Use the generic base 2 implementation. */
8833 err = _sp_exptmod_base_2(e, digits, m, r);
8834 }
8835 else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
8836 #ifndef WC_NO_HARDEN
8837 err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
8838 #else
8839 err = sp_exptmod_nct(b, e, m, r);
8840 #endif
8841 }
8842 else
8843 #endif /* WOLFSSL_SP_MATH_ALL */
8844 if ((!done) && (err == MP_OKAY)) {
8845 /* Otherwise use the generic implementation. */
8846 err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
8847 }
8848 #endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
8849 #else
8850 if ((!done) && (err == MP_OKAY)) {
8851 err = MP_VAL;
8852 }
8853 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
8854
8855 (void)mBits;
8856 (void)bBits;
8857 (void)eBits;
8858 (void)digits;
8859
8860 #if 0
8861 if (err == MP_OKAY) {
8862 sp_print(r, "rme");
8863 }
8864 #endif
8865 return err;
8866 }
8867 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
8868
8869 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
8870 defined(WOLFSSL_HAVE_SP_DH) || \
8871 (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
8872 /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
8873 *
8874 * @param [in] b SP integer that is the base.
8875 * @param [in] e SP integer that is the exponent.
8876 * @param [in] m SP integer that is the modulus.
8877 * @param [out] r SP integer to hold result.
8878 *
8879 * @return MP_OKAY on success.
8880 * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
8881 * @return MP_MEM when dynamic memory allocation fails.
8882 */
8883 int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
8884 {
8885 int err = MP_OKAY;
8886
8887 if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
8888 err = MP_VAL;
8889 }
8890 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
8891 if (err == MP_OKAY) {
8892 err = sp_exptmod_ex(b, e, e->used, m, r);
8893 }
8894 RESTORE_VECTOR_REGISTERS();
8895 return err;
8896 }
8897 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
8898 * WOLFSSL_HAVE_SP_DH */
8899
8900 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
8901 #if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
8902
8903 /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
8904 #define SP_ALLOC
8905
8906 /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
8907 * Creates a window of precalculated exponents with base in montgomery form.
8908 * Sliding window and is NOT constant time.
8909 *
8910 * @param [in] b SP integer that is the base.
8911 * @param [in] e SP integer that is the exponent.
8912 * @param [in] bits Number of bits in base to use. May be greater than
8913 * count of bits in b.
8914 * @param [in] m SP integer that is the modulus.
8915 * @param [out] r SP integer to hold result.
8916 *
8917 * @return MP_OKAY on success.
8918 * @return MP_MEM when dynamic memory allocation fails.
8919 */
8920 static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
8921 {
8922 int i;
8923 int j;
8924 int c;
8925 int y;
8926 int bits;
8927 int winBits;
8928 int preCnt;
8929 int err = MP_OKAY;
8930 int done = 0;
8931 sp_int* tr = NULL;
8932 sp_int* bm = NULL;
8933 sp_int_digit mask;
8934 /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
8935 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
8936
8937 bits = sp_count_bits(e);
8938
8939 if (bits > 450) {
8940 winBits = 6;
8941 }
8942 else if (bits <= 21) {
8943 winBits = 1;
8944 }
8945 else if (bits <= 36) {
8946 winBits = 3;
8947 }
8948 else if (bits <= 140) {
8949 winBits = 4;
8950 }
8951 else {
8952 winBits = 5;
8953 }
8954 preCnt = 1 << (winBits - 1);
8955 mask = preCnt - 1;
8956
8957 ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 2, err, NULL);
8958 if (err == MP_OKAY) {
8959 /* Initialize window numbers and temporary result. */
8960 tr = t[preCnt + 0];
8961 bm = t[preCnt + 1];
8962
8963 for (i = 0; i < preCnt; i++) {
8964 sp_init_size(t[i], m->used * 2 + 1);
8965 }
8966 sp_init_size(tr, m->used * 2 + 1);
8967 sp_init_size(bm, m->used * 2 + 1);
8968
8969 /* Ensure base is less than exponent. */
8970 if (_sp_cmp_abs(b, m) != MP_LT) {
8971 err = sp_mod(b, m, bm);
8972 if ((err == MP_OKAY) && sp_iszero(bm)) {
8973 sp_set(r, 0);
8974 done = 1;
8975 }
8976 }
8977 else {
8978 err = sp_copy(b, bm);
8979 }
8980 }
8981
8982 if ((!done) && (err == MP_OKAY)) {
8983 sp_int_digit mp;
8984 sp_int_digit n;
8985
8986 err = sp_mont_setup(m, &mp);
8987 if (err == MP_OKAY) {
8988 err = sp_mont_norm(t[0], m);
8989 }
8990 if (err == MP_OKAY) {
8991 err = sp_mulmod(bm, t[0], m, bm);
8992 }
8993 if (err == MP_OKAY) {
8994 err = sp_copy(bm, t[0]);
8995 }
8996 for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
8997 err = sp_sqr(t[0], t[0]);
8998 if (err == MP_OKAY) {
8999 err = _sp_mont_red(t[0], m, mp);
9000 }
9001 }
9002 for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
9003 err = sp_mul(t[i-1], bm, t[i]);
9004 if (err == MP_OKAY) {
9005 err = _sp_mont_red(t[i], m, mp);
9006 }
9007 }
9008
9009 if (err == MP_OKAY) {
9010 /* Find the top bit. */
9011 i = (bits - 1) >> SP_WORD_SHIFT;
9012 n = e->dp[i--];
9013 c = bits % SP_WORD_SIZE;
9014 if (c == 0) {
9015 c = SP_WORD_SIZE;
9016 }
9017 /* Put top bit at highest offset in digit. */
9018 n <<= SP_WORD_SIZE - c;
9019
9020 if (bits >= winBits) {
9021 /* Top bit set. Copy from window. */
9022 if (c < winBits) {
9023 /* Bits to end of digit and part of next */
9024 y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
9025 n = e->dp[i--];
9026 c = winBits - c;
9027 y |= (int)(n >> (SP_WORD_SIZE - c));
9028 n <<= c;
9029 c = SP_WORD_SIZE - c;
9030 }
9031 else {
9032 /* Bits from middle of digit */
9033 y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
9034 n <<= winBits;
9035 c -= winBits;
9036 }
9037 err = sp_copy(t[y], tr);
9038 }
9039 else {
9040 /* 1 in Montgomery form. */
9041 err = sp_mont_norm(tr, m);
9042 }
9043 while (err == MP_OKAY) {
9044 /* Sqaure until we find bit that is 1 or there's less than a
9045 * window of bits left.
9046 */
9047 while (err == MP_OKAY && ((i >= 0) || (c >= winBits))) {
9048 sp_digit n2 = n;
9049 int c2 = c;
9050 int i2 = i;
9051
9052 /* Make sure n2 has bits from the right digit. */
9053 if (c2 == 0) {
9054 n2 = e->dp[i2--];
9055 c2 = SP_WORD_SIZE;
9056 }
9057 /* Mask off the next bit. */
9058 y = (int)((n2 >> (SP_WORD_SIZE - 1)) & 1);
9059 if (y == 1) {
9060 break;
9061 }
9062
9063 /* Square and update position. */
9064 err = sp_sqr(tr, tr);
9065 if (err == MP_OKAY) {
9066 err = _sp_mont_red(tr, m, mp);
9067 }
9068 n = n2 << 1;
9069 c = c2 - 1;
9070 i = i2;
9071 }
9072
9073 if (err == MP_OKAY) {
9074 /* Check we have enough bits left for a window. */
9075 if ((i < 0) && (c < winBits)) {
9076 break;
9077 }
9078
9079 if (c == 0) {
9080 /* Bits up to end of digit */
9081 n = e->dp[i--];
9082 y = (int)(n >> (SP_WORD_SIZE - winBits));
9083 n <<= winBits;
9084 c = SP_WORD_SIZE - winBits;
9085 }
9086 else if (c < winBits) {
9087 /* Bits to end of digit and part of next */
9088 y = (int)(n >> (SP_WORD_SIZE - winBits));
9089 n = e->dp[i--];
9090 c = winBits - c;
9091 y |= (int)(n >> (SP_WORD_SIZE - c));
9092 n <<= c;
9093 c = SP_WORD_SIZE - c;
9094 }
9095 else {
9096 /* Bits from middle of digit */
9097 y = (int)(n >> (SP_WORD_SIZE - winBits));
9098 n <<= winBits;
9099 c -= winBits;
9100 }
9101 y &= mask;
9102 }
9103
9104 /* Square for number of bits in window. */
9105 for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
9106 err = sp_sqr(tr, tr);
9107 if (err == MP_OKAY) {
9108 err = _sp_mont_red(tr, m, mp);
9109 }
9110 }
9111 /* Multiply by window number for next set of bits. */
9112 if (err == MP_OKAY) {
9113 err = sp_mul(tr, t[y], tr);
9114 }
9115 if (err == MP_OKAY) {
9116 err = _sp_mont_red(tr, m, mp);
9117 }
9118 }
9119 if ((err == MP_OKAY) && (c > 0)) {
9120 /* Handle remaining bits.
9121 * Window values have top bit set and can't be used. */
9122 n = e->dp[0];
9123 for (--c; (err == MP_OKAY) && (c >= 0); c--) {
9124 err = sp_sqr(tr, tr);
9125 if (err == MP_OKAY) {
9126 err = _sp_mont_red(tr, m, mp);
9127 }
9128 if ((err == MP_OKAY) && ((n >> c) & 1)) {
9129 err = sp_mul(tr, bm, tr);
9130 if (err == MP_OKAY) {
9131 err = _sp_mont_red(tr, m, mp);
9132 }
9133 }
9134 }
9135 }
9136 }
9137
9138 if (err == MP_OKAY) {
9139 /* Convert from montgomery form. */
9140 err = _sp_mont_red(tr, m, mp);
9141 /* Reduction implementation returns number to range < m. */
9142 }
9143 }
9144 if ((!done) && (err == MP_OKAY)) {
9145 err = sp_copy(tr, r);
9146 }
9147
9148 FREE_SP_INT_ARRAY(t, NULL);
9149 return err;
9150 }
9151
9152 #undef SP_ALLOC
9153
9154 #else
9155 /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
9156 * Non-constant time implementation.
9157 *
9158 * @param [in] b SP integer that is the base.
9159 * @param [in] e SP integer that is the exponent.
9160 * @param [in] m SP integer that is the modulus.
9161 * @param [out] r SP integer to hold result.
9162 *
9163 * @return MP_OKAY on success.
9164 * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
9165 * @return MP_MEM when dynamic memory allocation fails.
9166 */
9167 static int _sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
9168 {
9169 int i;
9170 int err = MP_OKAY;
9171 int done = 0;
9172 int y;
9173 int bits = sp_count_bits(e);
9174 sp_int_digit mp;
9175 DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
9176
9177 ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
9178 if (err == MP_OKAY) {
9179 sp_init_size(t[0], m->used * 2 + 1);
9180 sp_init_size(t[1], m->used * 2 + 1);
9181
9182 /* Ensure base is less than exponent. */
9183 if (_sp_cmp_abs(b, m) != MP_LT) {
9184 err = sp_mod(b, m, t[0]);
9185 if ((err == MP_OKAY) && sp_iszero(t[0])) {
9186 sp_set(r, 0);
9187 done = 1;
9188 }
9189 }
9190 else {
9191 err = sp_copy(b, t[0]);
9192 }
9193 }
9194
9195 if ((!done) && (err == MP_OKAY)) {
9196 err = sp_mont_setup(m, &mp);
9197 if (err == MP_OKAY) {
9198 err = sp_mont_norm(t[1], m);
9199 }
9200 if (err == MP_OKAY) {
9201 /* Convert to montgomery form. */
9202 err = sp_mulmod(t[0], t[1], m, t[0]);
9203 }
9204 if (err == MP_OKAY) {
9205 /* Montgomert form of base to multiply by. */
9206 sp_copy(t[0], t[1]);
9207 }
9208
9209 for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
9210 err = sp_sqr(t[0], t[0]);
9211 if (err == MP_OKAY) {
9212 err = _sp_mont_red(t[0], m, mp);
9213 }
9214 if (err == MP_OKAY) {
9215 y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
9216 if (y != 0) {
9217 err = sp_mul(t[0], t[1], t[0]);
9218 if (err == MP_OKAY) {
9219 err = _sp_mont_red(t[0], m, mp);
9220 }
9221 }
9222 }
9223 }
9224 if (err == MP_OKAY) {
9225 /* Convert from montgomery form. */
9226 err = _sp_mont_red(t[0], m, mp);
9227 /* Reduction implementation returns number to range < m. */
9228 }
9229 }
9230 if ((!done) && (err == MP_OKAY)) {
9231 err = sp_copy(t[0], r);
9232 }
9233
9234 FREE_SP_INT_ARRAY(t, NULL);
9235 return err;
9236 }
9237 #endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
9238
9239 /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
9240 * Non-constant time implementation.
9241 *
9242 * @param [in] b SP integer that is the base.
9243 * @param [in] e SP integer that is the exponent.
9244 * @param [in] m SP integer that is the modulus.
9245 * @param [out] r SP integer to hold result.
9246 *
9247 * @return MP_OKAY on success.
9248 * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
9249 * @return MP_MEM when dynamic memory allocation fails.
9250 */
9251 int sp_exptmod_nct(sp_int* b, sp_int* e, sp_int* m, sp_int* r)
9252 {
9253 int err = MP_OKAY;
9254
9255 if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
9256 err = MP_VAL;
9257 }
9258
9259 #if 0
9260 if (err == MP_OKAY) {
9261 sp_print(b, "a");
9262 sp_print(e, "b");
9263 sp_print(m, "m");
9264 }
9265 #endif
9266
9267 if (err != MP_OKAY) {
9268 }
9269 /* Handle special cases. */
9270 else if (sp_iszero(m)) {
9271 err = MP_VAL;
9272 }
9273 #ifdef WOLFSSL_SP_INT_NEGATIVE
9274 else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
9275 err = MP_VAL;
9276 }
9277 #endif
9278 else if (sp_isone(m)) {
9279 sp_set(r, 0);
9280 }
9281 else if (sp_iszero(e)) {
9282 sp_set(r, 1);
9283 }
9284 else if (sp_iszero(b)) {
9285 sp_set(r, 0);
9286 }
9287 /* Ensure SP integers have space for intermediate values. */
9288 else if (m->used * 2 >= r->size) {
9289 err = MP_VAL;
9290 }
9291 #if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
9292 else if (mp_iseven(m)) {
9293 err = _sp_exptmod_ex(b, e, e->used * SP_WORD_SIZE, m, r);
9294 }
9295 #endif
9296 else {
9297 err = _sp_exptmod_nct(b, e, m, r);
9298 }
9299
9300 #if 0
9301 if (err == MP_OKAY) {
9302 sp_print(r, "rme");
9303 }
9304 #endif
9305
9306 return err;
9307 }
9308 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
9309
9310 /***************
9311 * 2^e functions
9312 ***************/
9313
9314 #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
9315 /* Divide by 2^e: r = a >> e and rem = bits shifted out
9316 *
9317 * @param [in] a SP integer to divide.
9318 * @param [in] e Exponent bits (dividing by 2^e).
9319 * @param [in] m SP integer that is the modulus.
9320 * @param [out] r SP integer to hold result.
9321 * @param [out] rem SP integer to hold remainder.
9322 *
9323 * @return MP_OKAY on success.
9324 * @return MP_VAL when a is NULL.
9325 */
9326 int sp_div_2d(sp_int* a, int e, sp_int* r, sp_int* rem)
9327 {
9328 int err = MP_OKAY;
9329
9330 if (a == NULL) {
9331 err = MP_VAL;
9332 }
9333
9334 if (err == MP_OKAY) {
9335 int remBits = sp_count_bits(a) - e;
9336
9337 if (remBits <= 0) {
9338 /* Shifting down by more bits than in number. */
9339 _sp_zero(r);
9340 sp_copy(a, rem);
9341 }
9342 else {
9343 if (rem != NULL) {
9344 /* Copy a in to remainder. */
9345 err = sp_copy(a, rem);
9346 }
9347 /* Shift a down by into result. */
9348 sp_rshb(a, e, r);
9349 if (rem != NULL) {
9350 /* Set used and mask off top digit of remainder. */
9351 rem->used = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
9352 e &= SP_WORD_MASK;
9353 if (e > 0) {
9354 rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
9355 }
9356 sp_clamp(rem);
9357 #ifdef WOLFSSL_SP_INT_NEGATIVE
9358 rem->sign = MP_ZPOS;
9359 #endif
9360 }
9361 }
9362 }
9363
9364 return err;
9365 }
9366 #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
9367
9368 #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
9369 /* The bottom e bits: r = a & ((1 << e) - 1)
9370 *
9371 * @param [in] a SP integer to reduce.
9372 * @param [in] e Modulus bits (modulus equals 2^e).
9373 * @param [out] r SP integer to hold result.
9374 *
9375 * @return MP_OKAY on success.
9376 * @return MP_VAL when a or r is NULL.
9377 */
9378 int sp_mod_2d(sp_int* a, int e, sp_int* r)
9379 {
9380 int err = MP_OKAY;
9381
9382 if ((a == NULL) || (r == NULL)) {
9383 err = MP_VAL;
9384 }
9385
9386 if (err == MP_OKAY) {
9387 int digits = (e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
9388 if (a != r) {
9389 XMEMCPY(r->dp, a->dp, digits * sizeof(sp_int_digit));
9390 r->used = a->used;
9391 }
9392 #ifndef WOLFSSL_SP_INT_NEGATIVE
9393 if (digits <= a->used)
9394 #else
9395 if ((a->sign != MP_ZPOS) || (digits <= a->used))
9396 #endif
9397 {
9398 #ifdef WOLFSSL_SP_INT_NEGATIVE
9399 if (a->sign == MP_NEG) {
9400 int i;
9401 sp_int_digit carry = 0;
9402
9403 /* Negate value. */
9404 for (i = 0; i < r->used; i++) {
9405 sp_int_digit next = r->dp[i] > 0;
9406 r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
9407 carry |= next;
9408 }
9409 for (; i < digits; i++) {
9410 r->dp[i] = (sp_int_digit)0 - carry;
9411 }
9412 r->sign = MP_ZPOS;
9413 }
9414 #endif
9415 /* Set used and mask off top digit of result. */
9416 r->used = digits;
9417 e &= SP_WORD_MASK;
9418 if (e > 0) {
9419 r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
9420 }
9421 sp_clamp(r);
9422 }
9423 }
9424
9425 return err;
9426 }
9427 #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
9428
9429 #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
9430 /* Multiply by 2^e: r = a << e
9431 *
9432 * @param [in] a SP integer to multiply.
9433 * @param [in] e Multiplier bits (multiplier equals 2^e).
9434 * @param [out] r SP integer to hold result.
9435 *
9436 * @return MP_OKAY on success.
9437 * @return MP_VAL when a or r is NULL, or result is too big for fixed data
9438 * length.
9439 */
9440 int sp_mul_2d(sp_int* a, int e, sp_int* r)
9441 {
9442 int err = MP_OKAY;
9443
9444 if ((a == NULL) || (r == NULL)) {
9445 err = MP_VAL;
9446 }
9447
9448 if ((err == MP_OKAY) && (sp_count_bits(a) + e > r->size * SP_WORD_SIZE)) {
9449 err = MP_VAL;
9450 }
9451
9452 if (err == MP_OKAY) {
9453 /* Copy a into r as left shift function works on the number. */
9454 if (a != r) {
9455 err = sp_copy(a, r);
9456 }
9457 }
9458
9459 if (err == MP_OKAY) {
9460 if (0) {
9461 sp_print(a, "a");
9462 sp_print_int(e, "n");
9463 }
9464 err = sp_lshb(r, e);
9465 if (0) {
9466 sp_print(r, "rsl");
9467 }
9468 }
9469
9470 return err;
9471 }
9472 #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
9473
9474 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
9475 defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
9476
9477 /* START SP_SQR implementations */
9478 /* This code is generated.
9479 * To generate:
9480 * cd scripts/sp/sp_int
9481 * ./gen.sh
9482 * File sp_sqr.c contains code.
9483 */
9484
9485 #if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
9486 #ifdef SQR_MUL_ASM
9487 /* Square a and store in r. r = a * a
9488 *
9489 * @param [in] a SP integer to square.
9490 * @param [out] r SP integer result.
9491 *
9492 * @return MP_OKAY on success.
9493 * @return MP_MEM when dynamic memory allocation fails.
9494 */
9495 static int _sp_sqr(sp_int* a, sp_int* r)
9496 {
9497 int err = MP_OKAY;
9498 int i;
9499 int j;
9500 int k;
9501 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9502 sp_int_digit* t = NULL;
9503 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
9504 defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
9505 sp_int_digit t[a->used * 2];
9506 #else
9507 sp_int_digit t[SP_INT_DIGITS];
9508 #endif
9509
9510 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9511 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
9512 DYNAMIC_TYPE_BIGINT);
9513 if (t == NULL) {
9514 err = MP_MEM;
9515 }
9516 #endif
9517 if ((err == MP_OKAY) && (a->used <= 1)) {
9518 sp_int_digit l, h;
9519
9520 h = 0;
9521 l = 0;
9522 SP_ASM_SQR(h, l, a->dp[0]);
9523 t[0] = h;
9524 t[1] = l;
9525 }
9526 else if (err == MP_OKAY) {
9527 sp_int_digit l, h, o;
9528
9529 h = 0;
9530 l = 0;
9531 SP_ASM_SQR(h, l, a->dp[0]);
9532 t[0] = h;
9533 h = 0;
9534 o = 0;
9535 for (k = 1; k < (a->used + 1) / 2; k++) {
9536 i = k;
9537 j = k - 1;
9538 for (; (j >= 0); i++, j--) {
9539 SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
9540 }
9541 t[k * 2 - 1] = l;
9542 l = h;
9543 h = o;
9544 o = 0;
9545
9546 SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
9547 i = k + 1;
9548 j = k - 1;
9549 for (; (j >= 0); i++, j--) {
9550 SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
9551 }
9552 t[k * 2] = l;
9553 l = h;
9554 h = o;
9555 o = 0;
9556 }
9557 for (; k < a->used; k++) {
9558 i = k;
9559 j = k - 1;
9560 for (; (i < a->used); i++, j--) {
9561 SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
9562 }
9563 t[k * 2 - 1] = l;
9564 l = h;
9565 h = o;
9566 o = 0;
9567
9568 SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
9569 i = k + 1;
9570 j = k - 1;
9571 for (; (i < a->used); i++, j--) {
9572 SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
9573 }
9574 t[k * 2] = l;
9575 l = h;
9576 h = o;
9577 o = 0;
9578 }
9579 t[k * 2 - 1] = l;
9580 }
9581
9582 if (err == MP_OKAY) {
9583 r->used = a->used * 2;
9584 XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9585 sp_clamp(r);
9586 }
9587
9588 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9589 if (t != NULL) {
9590 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9591 }
9592 #endif
9593 return err;
9594 }
9595 #else /* !SQR_MUL_ASM */
9596 /* Square a and store in r. r = a * a
9597 *
9598 * @param [in] a SP integer to square.
9599 * @param [out] r SP integer result.
9600 *
9601 * @return MP_OKAY on success.
9602 * @return MP_MEM when dynamic memory allocation fails.
9603 */
9604 static int _sp_sqr(sp_int* a, sp_int* r)
9605 {
9606 int err = MP_OKAY;
9607 int i;
9608 int j;
9609 int k;
9610 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9611 sp_int_digit* t = NULL;
9612 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
9613 defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SP_NO_DYN_STACK)
9614 sp_int_digit t[a->used * 2];
9615 #else
9616 sp_int_digit t[SP_INT_DIGITS];
9617 #endif
9618
9619 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9620 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
9621 DYNAMIC_TYPE_BIGINT);
9622 if (t == NULL) {
9623 err = MP_MEM;
9624 }
9625 #endif
9626 if (err == MP_OKAY) {
9627 sp_int_word w;
9628 sp_int_word l;
9629 sp_int_word h;
9630 #ifdef SP_WORD_OVERFLOW
9631 sp_int_word o;
9632 #endif
9633
9634 w = (sp_int_word)a->dp[0] * a->dp[0];
9635 t[0] = (sp_int_digit)w;
9636 l = (sp_int_digit)(w >> SP_WORD_SIZE);
9637 h = 0;
9638 #ifdef SP_WORD_OVERFLOW
9639 o = 0;
9640 #endif
9641 for (k = 1; k <= (a->used - 1) * 2; k++) {
9642 i = k / 2;
9643 j = k - i;
9644 if (i == j) {
9645 w = (sp_int_word)a->dp[i] * a->dp[j];
9646 l += (sp_int_digit)w;
9647 h += (sp_int_digit)(w >> SP_WORD_SIZE);
9648 #ifdef SP_WORD_OVERFLOW
9649 h += (sp_int_digit)(l >> SP_WORD_SIZE);
9650 l &= SP_MASK;
9651 o += (sp_int_digit)(h >> SP_WORD_SIZE);
9652 h &= SP_MASK;
9653 #endif
9654 }
9655 for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
9656 w = (sp_int_word)a->dp[i] * a->dp[j];
9657 l += (sp_int_digit)w;
9658 h += (sp_int_digit)(w >> SP_WORD_SIZE);
9659 #ifdef SP_WORD_OVERFLOW
9660 h += (sp_int_digit)(l >> SP_WORD_SIZE);
9661 l &= SP_MASK;
9662 o += (sp_int_digit)(h >> SP_WORD_SIZE);
9663 h &= SP_MASK;
9664 #endif
9665 l += (sp_int_digit)w;
9666 h += (sp_int_digit)(w >> SP_WORD_SIZE);
9667 #ifdef SP_WORD_OVERFLOW
9668 h += (sp_int_digit)(l >> SP_WORD_SIZE);
9669 l &= SP_MASK;
9670 o += (sp_int_digit)(h >> SP_WORD_SIZE);
9671 h &= SP_MASK;
9672 #endif
9673 }
9674 t[k] = (sp_int_digit)l;
9675 l >>= SP_WORD_SIZE;
9676 l += (sp_int_digit)h;
9677 h >>= SP_WORD_SIZE;
9678 #ifdef SP_WORD_OVERFLOW
9679 h += o & SP_MASK;
9680 o >>= SP_WORD_SIZE;
9681 #endif
9682 }
9683 t[k] = (sp_int_digit)l;
9684 r->used = k + 1;
9685 XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
9686 sp_clamp(r);
9687 }
9688
9689 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9690 if (t != NULL) {
9691 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
9692 }
9693 #endif
9694 return err;
9695 }
9696 #endif /* SQR_MUL_ASM */
9697 #endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
9698
9699 #ifndef WOLFSSL_SP_SMALL
9700 #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
9701 #if SP_WORD_SIZE == 64
9702 #ifndef SQR_MUL_ASM
9703 /* Square a and store in r. r = a * a
9704 *
9705 * @param [in] a SP integer to square.
9706 * @param [out] r SP integer result.
9707 *
9708 * @return MP_OKAY on success.
9709 * @return MP_MEM when dynamic memory allocation fails.
9710 */
9711 static int _sp_sqr_4(sp_int* a, sp_int* r)
9712 {
9713 int err = MP_OKAY;
9714 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9715 sp_int_word* w = NULL;
9716 #else
9717 sp_int_word w[10];
9718 #endif
9719 sp_int_digit* da = a->dp;
9720
9721 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9722 w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
9723 DYNAMIC_TYPE_BIGINT);
9724 if (w == NULL) {
9725 err = MP_MEM;
9726 }
9727 #endif
9728
9729
9730 if (err == MP_OKAY) {
9731 w[0] = (sp_int_word)da[0] * da[0];
9732 w[1] = (sp_int_word)da[0] * da[1];
9733 w[2] = (sp_int_word)da[0] * da[2];
9734 w[3] = (sp_int_word)da[1] * da[1];
9735 w[4] = (sp_int_word)da[0] * da[3];
9736 w[5] = (sp_int_word)da[1] * da[2];
9737 w[6] = (sp_int_word)da[1] * da[3];
9738 w[7] = (sp_int_word)da[2] * da[2];
9739 w[8] = (sp_int_word)da[2] * da[3];
9740 w[9] = (sp_int_word)da[3] * da[3];
9741
9742 r->dp[0] = w[0];
9743 w[0] >>= SP_WORD_SIZE;
9744 w[0] += (sp_int_digit)w[1];
9745 w[0] += (sp_int_digit)w[1];
9746 r->dp[1] = w[0];
9747 w[0] >>= SP_WORD_SIZE;
9748 w[1] >>= SP_WORD_SIZE;
9749 w[0] += (sp_int_digit)w[1];
9750 w[0] += (sp_int_digit)w[1];
9751 w[0] += (sp_int_digit)w[2];
9752 w[0] += (sp_int_digit)w[2];
9753 w[0] += (sp_int_digit)w[3];
9754 r->dp[2] = w[0];
9755 w[0] >>= SP_WORD_SIZE;
9756 w[2] >>= SP_WORD_SIZE;
9757 w[0] += (sp_int_digit)w[2];
9758 w[0] += (sp_int_digit)w[2];
9759 w[3] >>= SP_WORD_SIZE;
9760 w[0] += (sp_int_digit)w[3];
9761 w[0] += (sp_int_digit)w[4];
9762 w[0] += (sp_int_digit)w[4];
9763 w[0] += (sp_int_digit)w[5];
9764 w[0] += (sp_int_digit)w[5];
9765 r->dp[3] = w[0];
9766 w[0] >>= SP_WORD_SIZE;
9767 w[4] >>= SP_WORD_SIZE;
9768 w[0] += (sp_int_digit)w[4];
9769 w[0] += (sp_int_digit)w[4];
9770 w[5] >>= SP_WORD_SIZE;
9771 w[0] += (sp_int_digit)w[5];
9772 w[0] += (sp_int_digit)w[5];
9773 w[0] += (sp_int_digit)w[6];
9774 w[0] += (sp_int_digit)w[6];
9775 w[0] += (sp_int_digit)w[7];
9776 r->dp[4] = w[0];
9777 w[0] >>= SP_WORD_SIZE;
9778 w[6] >>= SP_WORD_SIZE;
9779 w[0] += (sp_int_digit)w[6];
9780 w[0] += (sp_int_digit)w[6];
9781 w[7] >>= SP_WORD_SIZE;
9782 w[0] += (sp_int_digit)w[7];
9783 w[0] += (sp_int_digit)w[8];
9784 w[0] += (sp_int_digit)w[8];
9785 r->dp[5] = w[0];
9786 w[0] >>= SP_WORD_SIZE;
9787 w[8] >>= SP_WORD_SIZE;
9788 w[0] += (sp_int_digit)w[8];
9789 w[0] += (sp_int_digit)w[8];
9790 w[0] += (sp_int_digit)w[9];
9791 r->dp[6] = w[0];
9792 w[0] >>= SP_WORD_SIZE;
9793 w[9] >>= SP_WORD_SIZE;
9794 w[0] += (sp_int_digit)w[9];
9795 r->dp[7] = w[0];
9796
9797 r->used = 8;
9798 sp_clamp(r);
9799 }
9800
9801 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
9802 if (w != NULL) {
9803 XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
9804 }
9805 #endif
9806 return err;
9807 }
9808 #else /* SQR_MUL_ASM */
9809 /* Square a and store in r. r = a * a
9810 *
9811 * @param [in] a SP integer to square.
9812 * @param [out] r SP integer result.
9813 *
9814 * @return MP_OKAY on success.
9815 * @return MP_MEM when dynamic memory allocation fails.
9816 */
9817 static int _sp_sqr_4(sp_int* a, sp_int* r)
9818 {
9819 sp_int_digit l = 0;
9820 sp_int_digit h = 0;
9821 sp_int_digit o = 0;
9822 sp_int_digit t[4];
9823
9824 SP_ASM_SQR(h, l, a->dp[0]);
9825 t[0] = h;
9826 h = 0;
9827 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
9828 t[1] = l;
9829 l = h;
9830 h = o;
9831 o = 0;
9832 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
9833 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
9834 t[2] = l;
9835 l = h;
9836 h = o;
9837 o = 0;
9838 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
9839 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
9840 t[3] = l;
9841 l = h;
9842 h = o;
9843 o = 0;
9844 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
9845 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
9846 r->dp[4] = l;
9847 l = h;
9848 h = o;
9849 o = 0;
9850 SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
9851 r->dp[5] = l;
9852 l = h;
9853 h = o;
9854 SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
9855 r->dp[6] = l;
9856 r->dp[7] = h;
9857 XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
9858 r->used = 8;
9859 sp_clamp(r);
9860
9861 return MP_OKAY;
9862 }
9863 #endif /* SQR_MUL_ASM */
9864 #endif /* SP_WORD_SIZE == 64 */
9865 #if SP_WORD_SIZE == 64
9866 #ifdef SQR_MUL_ASM
9867 /* Square a and store in r. r = a * a
9868 *
9869 * @param [in] a SP integer to square.
9870 * @param [out] r SP integer result.
9871 *
9872 * @return MP_OKAY on success.
9873 * @return MP_MEM when dynamic memory allocation fails.
9874 */
9875 static int _sp_sqr_6(sp_int* a, sp_int* r)
9876 {
9877 sp_int_digit l = 0;
9878 sp_int_digit h = 0;
9879 sp_int_digit o = 0;
9880 sp_int_digit tl = 0;
9881 sp_int_digit th = 0;
9882 sp_int_digit to;
9883 sp_int_digit t[6];
9884
9885 SP_ASM_SQR(h, l, a->dp[0]);
9886 t[0] = h;
9887 h = 0;
9888 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
9889 t[1] = l;
9890 l = h;
9891 h = o;
9892 o = 0;
9893 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
9894 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
9895 t[2] = l;
9896 l = h;
9897 h = o;
9898 o = 0;
9899 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
9900 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
9901 t[3] = l;
9902 l = h;
9903 h = o;
9904 o = 0;
9905 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
9906 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
9907 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
9908 t[4] = l;
9909 l = h;
9910 h = o;
9911 o = 0;
9912 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
9913 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
9914 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
9915 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
9916 t[5] = l;
9917 l = h;
9918 h = o;
9919 o = 0;
9920 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
9921 SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
9922 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
9923 r->dp[6] = l;
9924 l = h;
9925 h = o;
9926 o = 0;
9927 SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
9928 SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
9929 r->dp[7] = l;
9930 l = h;
9931 h = o;
9932 o = 0;
9933 SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
9934 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
9935 r->dp[8] = l;
9936 l = h;
9937 h = o;
9938 o = 0;
9939 SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
9940 r->dp[9] = l;
9941 l = h;
9942 h = o;
9943 SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
9944 r->dp[10] = l;
9945 r->dp[11] = h;
9946 XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
9947 r->used = 12;
9948 sp_clamp(r);
9949
9950 return MP_OKAY;
9951 }
9952 #endif /* SQR_MUL_ASM */
9953 #endif /* SP_WORD_SIZE == 64 */
9954 #if SP_WORD_SIZE == 32
9955 #ifdef SQR_MUL_ASM
9956 /* Square a and store in r. r = a * a
9957 *
9958 * @param [in] a SP integer to square.
9959 * @param [out] r SP integer result.
9960 *
9961 * @return MP_OKAY on success.
9962 * @return MP_MEM when dynamic memory allocation fails.
9963 */
9964 static int _sp_sqr_8(sp_int* a, sp_int* r)
9965 {
9966 sp_int_digit l = 0;
9967 sp_int_digit h = 0;
9968 sp_int_digit o = 0;
9969 sp_int_digit tl = 0;
9970 sp_int_digit th = 0;
9971 sp_int_digit to;
9972 sp_int_digit t[8];
9973
9974 SP_ASM_SQR(h, l, a->dp[0]);
9975 t[0] = h;
9976 h = 0;
9977 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
9978 t[1] = l;
9979 l = h;
9980 h = o;
9981 o = 0;
9982 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
9983 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
9984 t[2] = l;
9985 l = h;
9986 h = o;
9987 o = 0;
9988 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
9989 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
9990 t[3] = l;
9991 l = h;
9992 h = o;
9993 o = 0;
9994 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
9995 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
9996 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
9997 t[4] = l;
9998 l = h;
9999 h = o;
10000 o = 0;
10001 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
10002 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
10003 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
10004 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10005 t[5] = l;
10006 l = h;
10007 h = o;
10008 o = 0;
10009 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
10010 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
10011 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
10012 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
10013 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10014 t[6] = l;
10015 l = h;
10016 h = o;
10017 o = 0;
10018 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
10019 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
10020 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
10021 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
10022 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10023 t[7] = l;
10024 l = h;
10025 h = o;
10026 o = 0;
10027 SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
10028 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
10029 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
10030 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
10031 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10032 r->dp[8] = l;
10033 l = h;
10034 h = o;
10035 o = 0;
10036 SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
10037 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
10038 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
10039 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10040 r->dp[9] = l;
10041 l = h;
10042 h = o;
10043 o = 0;
10044 SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
10045 SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
10046 SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
10047 r->dp[10] = l;
10048 l = h;
10049 h = o;
10050 o = 0;
10051 SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
10052 SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
10053 r->dp[11] = l;
10054 l = h;
10055 h = o;
10056 o = 0;
10057 SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
10058 SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
10059 r->dp[12] = l;
10060 l = h;
10061 h = o;
10062 o = 0;
10063 SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
10064 r->dp[13] = l;
10065 l = h;
10066 h = o;
10067 SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
10068 r->dp[14] = l;
10069 r->dp[15] = h;
10070 XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
10071 r->used = 16;
10072 sp_clamp(r);
10073
10074 return MP_OKAY;
10075 }
10076 #endif /* SQR_MUL_ASM */
10077 #endif /* SP_WORD_SIZE == 32 */
10078 #if SP_WORD_SIZE == 32
10079 #ifdef SQR_MUL_ASM
10080 /* Square a and store in r. r = a * a
10081 *
10082 * @param [in] a SP integer to square.
10083 * @param [out] r SP integer result.
10084 *
10085 * @return MP_OKAY on success.
10086 * @return MP_MEM when dynamic memory allocation fails.
10087 */
10088 static int _sp_sqr_12(sp_int* a, sp_int* r)
10089 {
10090 sp_int_digit l = 0;
10091 sp_int_digit h = 0;
10092 sp_int_digit o = 0;
10093 sp_int_digit tl = 0;
10094 sp_int_digit th = 0;
10095 sp_int_digit to;
10096 sp_int_digit t[12];
10097
10098 SP_ASM_SQR(h, l, a->dp[0]);
10099 t[0] = h;
10100 h = 0;
10101 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
10102 t[1] = l;
10103 l = h;
10104 h = o;
10105 o = 0;
10106 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
10107 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
10108 t[2] = l;
10109 l = h;
10110 h = o;
10111 o = 0;
10112 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
10113 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
10114 t[3] = l;
10115 l = h;
10116 h = o;
10117 o = 0;
10118 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
10119 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
10120 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
10121 t[4] = l;
10122 l = h;
10123 h = o;
10124 o = 0;
10125 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
10126 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
10127 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
10128 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10129 t[5] = l;
10130 l = h;
10131 h = o;
10132 o = 0;
10133 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
10134 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
10135 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
10136 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
10137 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10138 t[6] = l;
10139 l = h;
10140 h = o;
10141 o = 0;
10142 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
10143 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
10144 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
10145 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
10146 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10147 t[7] = l;
10148 l = h;
10149 h = o;
10150 o = 0;
10151 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
10152 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
10153 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
10154 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
10155 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
10156 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10157 t[8] = l;
10158 l = h;
10159 h = o;
10160 o = 0;
10161 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
10162 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
10163 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
10164 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
10165 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
10166 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10167 t[9] = l;
10168 l = h;
10169 h = o;
10170 o = 0;
10171 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
10172 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
10173 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
10174 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
10175 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
10176 SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
10177 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10178 t[10] = l;
10179 l = h;
10180 h = o;
10181 o = 0;
10182 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
10183 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
10184 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
10185 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
10186 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
10187 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
10188 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10189 t[11] = l;
10190 l = h;
10191 h = o;
10192 o = 0;
10193 SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
10194 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
10195 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
10196 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
10197 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
10198 SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
10199 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10200 r->dp[12] = l;
10201 l = h;
10202 h = o;
10203 o = 0;
10204 SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
10205 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
10206 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
10207 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
10208 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
10209 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10210 r->dp[13] = l;
10211 l = h;
10212 h = o;
10213 o = 0;
10214 SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
10215 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
10216 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
10217 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
10218 SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
10219 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10220 r->dp[14] = l;
10221 l = h;
10222 h = o;
10223 o = 0;
10224 SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
10225 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
10226 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
10227 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
10228 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10229 r->dp[15] = l;
10230 l = h;
10231 h = o;
10232 o = 0;
10233 SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
10234 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
10235 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
10236 SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
10237 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10238 r->dp[16] = l;
10239 l = h;
10240 h = o;
10241 o = 0;
10242 SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
10243 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
10244 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
10245 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10246 r->dp[17] = l;
10247 l = h;
10248 h = o;
10249 o = 0;
10250 SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
10251 SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
10252 SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
10253 r->dp[18] = l;
10254 l = h;
10255 h = o;
10256 o = 0;
10257 SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
10258 SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
10259 r->dp[19] = l;
10260 l = h;
10261 h = o;
10262 o = 0;
10263 SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
10264 SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
10265 r->dp[20] = l;
10266 l = h;
10267 h = o;
10268 o = 0;
10269 SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
10270 r->dp[21] = l;
10271 l = h;
10272 h = o;
10273 SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
10274 r->dp[22] = l;
10275 r->dp[23] = h;
10276 XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
10277 r->used = 24;
10278 sp_clamp(r);
10279
10280 return MP_OKAY;
10281 }
10282 #endif /* SQR_MUL_ASM */
10283 #endif /* SP_WORD_SIZE == 32 */
10284 #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
10285
10286 #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
10287 #if SP_INT_DIGITS >= 32
10288 /* Square a and store in r. r = a * a
10289 *
10290 * @param [in] a SP integer to square.
10291 * @param [out] r SP integer result.
10292 *
10293 * @return MP_OKAY on success.
10294 * @return MP_MEM when dynamic memory allocation fails.
10295 */
10296 static int _sp_sqr_16(sp_int* a, sp_int* r)
10297 {
10298 int err = MP_OKAY;
10299 sp_int_digit l = 0;
10300 sp_int_digit h = 0;
10301 sp_int_digit o = 0;
10302 sp_int_digit tl = 0;
10303 sp_int_digit th = 0;
10304 sp_int_digit to;
10305 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10306 sp_int_digit* t = NULL;
10307 #else
10308 sp_int_digit t[16];
10309 #endif
10310
10311 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10312 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
10313 DYNAMIC_TYPE_BIGINT);
10314 if (t == NULL) {
10315 err = MP_MEM;
10316 }
10317 #endif
10318 if (err == MP_OKAY) {
10319 SP_ASM_SQR(h, l, a->dp[0]);
10320 t[0] = h;
10321 h = 0;
10322 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
10323 t[1] = l;
10324 l = h;
10325 h = o;
10326 o = 0;
10327 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
10328 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
10329 t[2] = l;
10330 l = h;
10331 h = o;
10332 o = 0;
10333 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
10334 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
10335 t[3] = l;
10336 l = h;
10337 h = o;
10338 o = 0;
10339 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
10340 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
10341 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
10342 t[4] = l;
10343 l = h;
10344 h = o;
10345 o = 0;
10346 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
10347 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
10348 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
10349 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10350 t[5] = l;
10351 l = h;
10352 h = o;
10353 o = 0;
10354 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
10355 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
10356 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
10357 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
10358 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10359 t[6] = l;
10360 l = h;
10361 h = o;
10362 o = 0;
10363 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
10364 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
10365 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
10366 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
10367 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10368 t[7] = l;
10369 l = h;
10370 h = o;
10371 o = 0;
10372 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
10373 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
10374 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
10375 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
10376 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
10377 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10378 t[8] = l;
10379 l = h;
10380 h = o;
10381 o = 0;
10382 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
10383 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
10384 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
10385 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
10386 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
10387 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10388 t[9] = l;
10389 l = h;
10390 h = o;
10391 o = 0;
10392 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
10393 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
10394 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
10395 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
10396 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
10397 SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
10398 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10399 t[10] = l;
10400 l = h;
10401 h = o;
10402 o = 0;
10403 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
10404 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
10405 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
10406 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
10407 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
10408 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
10409 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10410 t[11] = l;
10411 l = h;
10412 h = o;
10413 o = 0;
10414 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
10415 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
10416 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
10417 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
10418 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
10419 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
10420 SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
10421 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10422 t[12] = l;
10423 l = h;
10424 h = o;
10425 o = 0;
10426 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
10427 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
10428 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
10429 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
10430 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
10431 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
10432 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
10433 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10434 t[13] = l;
10435 l = h;
10436 h = o;
10437 o = 0;
10438 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
10439 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
10440 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
10441 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
10442 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
10443 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
10444 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
10445 SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
10446 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10447 t[14] = l;
10448 l = h;
10449 h = o;
10450 o = 0;
10451 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
10452 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
10453 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
10454 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
10455 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
10456 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
10457 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
10458 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
10459 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10460 t[15] = l;
10461 l = h;
10462 h = o;
10463 o = 0;
10464 SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
10465 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
10466 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
10467 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
10468 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
10469 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
10470 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
10471 SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
10472 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10473 r->dp[16] = l;
10474 l = h;
10475 h = o;
10476 o = 0;
10477 SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
10478 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
10479 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
10480 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
10481 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
10482 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
10483 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
10484 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10485 r->dp[17] = l;
10486 l = h;
10487 h = o;
10488 o = 0;
10489 SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
10490 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
10491 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
10492 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
10493 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
10494 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
10495 SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
10496 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10497 r->dp[18] = l;
10498 l = h;
10499 h = o;
10500 o = 0;
10501 SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
10502 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
10503 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
10504 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
10505 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
10506 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
10507 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10508 r->dp[19] = l;
10509 l = h;
10510 h = o;
10511 o = 0;
10512 SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
10513 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
10514 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
10515 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
10516 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
10517 SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
10518 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10519 r->dp[20] = l;
10520 l = h;
10521 h = o;
10522 o = 0;
10523 SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
10524 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
10525 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
10526 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
10527 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
10528 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10529 r->dp[21] = l;
10530 l = h;
10531 h = o;
10532 o = 0;
10533 SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
10534 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
10535 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
10536 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
10537 SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
10538 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10539 r->dp[22] = l;
10540 l = h;
10541 h = o;
10542 o = 0;
10543 SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
10544 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
10545 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
10546 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
10547 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10548 r->dp[23] = l;
10549 l = h;
10550 h = o;
10551 o = 0;
10552 SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
10553 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
10554 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
10555 SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
10556 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10557 r->dp[24] = l;
10558 l = h;
10559 h = o;
10560 o = 0;
10561 SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
10562 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
10563 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
10564 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10565 r->dp[25] = l;
10566 l = h;
10567 h = o;
10568 o = 0;
10569 SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
10570 SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
10571 SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
10572 r->dp[26] = l;
10573 l = h;
10574 h = o;
10575 o = 0;
10576 SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
10577 SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
10578 r->dp[27] = l;
10579 l = h;
10580 h = o;
10581 o = 0;
10582 SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
10583 SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
10584 r->dp[28] = l;
10585 l = h;
10586 h = o;
10587 o = 0;
10588 SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
10589 r->dp[29] = l;
10590 l = h;
10591 h = o;
10592 SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
10593 r->dp[30] = l;
10594 r->dp[31] = h;
10595 XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
10596 r->used = 32;
10597 sp_clamp(r);
10598 }
10599
10600 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10601 if (t != NULL) {
10602 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
10603 }
10604 #endif
10605 return err;
10606 }
10607 #endif /* SP_INT_DIGITS >= 32 */
10608
10609 #if SP_INT_DIGITS >= 48
10610 /* Square a and store in r. r = a * a
10611 *
10612 * @param [in] a SP integer to square.
10613 * @param [out] r SP integer result.
10614 *
10615 * @return MP_OKAY on success.
10616 * @return MP_MEM when dynamic memory allocation fails.
10617 */
10618 static int _sp_sqr_24(sp_int* a, sp_int* r)
10619 {
10620 int err = MP_OKAY;
10621 sp_int_digit l = 0;
10622 sp_int_digit h = 0;
10623 sp_int_digit o = 0;
10624 sp_int_digit tl = 0;
10625 sp_int_digit th = 0;
10626 sp_int_digit to;
10627 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10628 sp_int_digit* t = NULL;
10629 #else
10630 sp_int_digit t[24];
10631 #endif
10632
10633 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
10634 t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
10635 DYNAMIC_TYPE_BIGINT);
10636 if (t == NULL) {
10637 err = MP_MEM;
10638 }
10639 #endif
10640 if (err == MP_OKAY) {
10641 SP_ASM_SQR(h, l, a->dp[0]);
10642 t[0] = h;
10643 h = 0;
10644 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
10645 t[1] = l;
10646 l = h;
10647 h = o;
10648 o = 0;
10649 SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
10650 SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
10651 t[2] = l;
10652 l = h;
10653 h = o;
10654 o = 0;
10655 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
10656 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
10657 t[3] = l;
10658 l = h;
10659 h = o;
10660 o = 0;
10661 SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
10662 SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
10663 SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
10664 t[4] = l;
10665 l = h;
10666 h = o;
10667 o = 0;
10668 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
10669 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
10670 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
10671 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10672 t[5] = l;
10673 l = h;
10674 h = o;
10675 o = 0;
10676 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
10677 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
10678 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
10679 SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
10680 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10681 t[6] = l;
10682 l = h;
10683 h = o;
10684 o = 0;
10685 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
10686 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
10687 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
10688 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
10689 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10690 t[7] = l;
10691 l = h;
10692 h = o;
10693 o = 0;
10694 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
10695 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
10696 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
10697 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
10698 SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
10699 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10700 t[8] = l;
10701 l = h;
10702 h = o;
10703 o = 0;
10704 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
10705 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
10706 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
10707 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
10708 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
10709 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10710 t[9] = l;
10711 l = h;
10712 h = o;
10713 o = 0;
10714 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
10715 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
10716 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
10717 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
10718 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
10719 SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
10720 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10721 t[10] = l;
10722 l = h;
10723 h = o;
10724 o = 0;
10725 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
10726 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
10727 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
10728 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
10729 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
10730 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
10731 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10732 t[11] = l;
10733 l = h;
10734 h = o;
10735 o = 0;
10736 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
10737 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
10738 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
10739 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
10740 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
10741 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
10742 SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
10743 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10744 t[12] = l;
10745 l = h;
10746 h = o;
10747 o = 0;
10748 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
10749 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
10750 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
10751 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
10752 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
10753 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
10754 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
10755 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10756 t[13] = l;
10757 l = h;
10758 h = o;
10759 o = 0;
10760 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
10761 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
10762 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
10763 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
10764 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
10765 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
10766 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
10767 SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
10768 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10769 t[14] = l;
10770 l = h;
10771 h = o;
10772 o = 0;
10773 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
10774 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
10775 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
10776 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
10777 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
10778 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
10779 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
10780 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
10781 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10782 t[15] = l;
10783 l = h;
10784 h = o;
10785 o = 0;
10786 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
10787 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
10788 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
10789 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
10790 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
10791 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
10792 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
10793 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
10794 SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
10795 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10796 t[16] = l;
10797 l = h;
10798 h = o;
10799 o = 0;
10800 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
10801 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
10802 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
10803 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
10804 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
10805 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
10806 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
10807 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
10808 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
10809 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10810 t[17] = l;
10811 l = h;
10812 h = o;
10813 o = 0;
10814 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
10815 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
10816 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
10817 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
10818 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
10819 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
10820 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
10821 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
10822 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
10823 SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
10824 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10825 t[18] = l;
10826 l = h;
10827 h = o;
10828 o = 0;
10829 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
10830 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
10831 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
10832 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
10833 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
10834 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
10835 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
10836 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
10837 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
10838 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
10839 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10840 t[19] = l;
10841 l = h;
10842 h = o;
10843 o = 0;
10844 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
10845 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
10846 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
10847 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
10848 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
10849 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
10850 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
10851 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
10852 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
10853 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
10854 SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
10855 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10856 t[20] = l;
10857 l = h;
10858 h = o;
10859 o = 0;
10860 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
10861 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
10862 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
10863 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
10864 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
10865 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
10866 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
10867 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
10868 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
10869 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
10870 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
10871 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10872 t[21] = l;
10873 l = h;
10874 h = o;
10875 o = 0;
10876 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
10877 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
10878 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
10879 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
10880 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
10881 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
10882 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
10883 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
10884 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
10885 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
10886 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
10887 SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
10888 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10889 t[22] = l;
10890 l = h;
10891 h = o;
10892 o = 0;
10893 SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
10894 SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
10895 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
10896 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
10897 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
10898 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
10899 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
10900 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
10901 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
10902 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
10903 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
10904 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
10905 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10906 t[23] = l;
10907 l = h;
10908 h = o;
10909 o = 0;
10910 SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
10911 SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
10912 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
10913 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
10914 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
10915 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
10916 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
10917 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
10918 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
10919 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
10920 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
10921 SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
10922 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10923 r->dp[24] = l;
10924 l = h;
10925 h = o;
10926 o = 0;
10927 SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
10928 SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
10929 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
10930 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
10931 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
10932 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
10933 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
10934 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
10935 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
10936 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
10937 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
10938 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10939 r->dp[25] = l;
10940 l = h;
10941 h = o;
10942 o = 0;
10943 SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
10944 SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
10945 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
10946 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
10947 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
10948 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
10949 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
10950 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
10951 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
10952 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
10953 SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
10954 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10955 r->dp[26] = l;
10956 l = h;
10957 h = o;
10958 o = 0;
10959 SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
10960 SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
10961 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
10962 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
10963 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
10964 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
10965 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
10966 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
10967 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
10968 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
10969 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10970 r->dp[27] = l;
10971 l = h;
10972 h = o;
10973 o = 0;
10974 SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
10975 SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
10976 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
10977 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
10978 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
10979 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
10980 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
10981 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
10982 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
10983 SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
10984 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10985 r->dp[28] = l;
10986 l = h;
10987 h = o;
10988 o = 0;
10989 SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
10990 SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
10991 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
10992 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
10993 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
10994 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
10995 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
10996 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
10997 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
10998 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
10999 r->dp[29] = l;
11000 l = h;
11001 h = o;
11002 o = 0;
11003 SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
11004 SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
11005 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
11006 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
11007 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
11008 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
11009 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
11010 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
11011 SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
11012 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11013 r->dp[30] = l;
11014 l = h;
11015 h = o;
11016 o = 0;
11017 SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
11018 SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
11019 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
11020 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
11021 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
11022 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
11023 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
11024 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
11025 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11026 r->dp[31] = l;
11027 l = h;
11028 h = o;
11029 o = 0;
11030 SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
11031 SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
11032 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
11033 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
11034 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
11035 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
11036 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
11037 SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
11038 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11039 r->dp[32] = l;
11040 l = h;
11041 h = o;
11042 o = 0;
11043 SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
11044 SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
11045 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
11046 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
11047 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
11048 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
11049 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
11050 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11051 r->dp[33] = l;
11052 l = h;
11053 h = o;
11054 o = 0;
11055 SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
11056 SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
11057 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
11058 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
11059 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
11060 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
11061 SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
11062 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11063 r->dp[34] = l;
11064 l = h;
11065 h = o;
11066 o = 0;
11067 SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
11068 SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
11069 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
11070 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
11071 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
11072 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
11073 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11074 r->dp[35] = l;
11075 l = h;
11076 h = o;
11077 o = 0;
11078 SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
11079 SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
11080 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
11081 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
11082 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
11083 SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
11084 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11085 r->dp[36] = l;
11086 l = h;
11087 h = o;
11088 o = 0;
11089 SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
11090 SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
11091 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
11092 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
11093 SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
11094 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11095 r->dp[37] = l;
11096 l = h;
11097 h = o;
11098 o = 0;
11099 SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
11100 SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
11101 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
11102 SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
11103 SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
11104 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11105 r->dp[38] = l;
11106 l = h;
11107 h = o;
11108 o = 0;
11109 SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
11110 SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
11111 SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
11112 SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
11113 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11114 r->dp[39] = l;
11115 l = h;
11116 h = o;
11117 o = 0;
11118 SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
11119 SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
11120 SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
11121 SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
11122 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11123 r->dp[40] = l;
11124 l = h;
11125 h = o;
11126 o = 0;
11127 SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
11128 SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
11129 SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
11130 SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
11131 r->dp[41] = l;
11132 l = h;
11133 h = o;
11134 o = 0;
11135 SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
11136 SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
11137 SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
11138 r->dp[42] = l;
11139 l = h;
11140 h = o;
11141 o = 0;
11142 SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
11143 SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
11144 r->dp[43] = l;
11145 l = h;
11146 h = o;
11147 o = 0;
11148 SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
11149 SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
11150 r->dp[44] = l;
11151 l = h;
11152 h = o;
11153 o = 0;
11154 SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
11155 r->dp[45] = l;
11156 l = h;
11157 h = o;
11158 SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
11159 r->dp[46] = l;
11160 r->dp[47] = h;
11161 XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
11162 r->used = 48;
11163 sp_clamp(r);
11164 }
11165
11166 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
11167 if (t != NULL) {
11168 XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
11169 }
11170 #endif
11171 return err;
11172 }
11173 #endif /* SP_INT_DIGITS >= 48 */
11174
11175 #if SP_INT_DIGITS >= 64
11176 /* Square a and store in r. r = a * a
11177 *
11178 * @param [in] a SP integer to square.
11179 * @param [out] r SP integer result.
11180 *
11181 * @return MP_OKAY on success.
11182 * @return MP_MEM when dynamic memory allocation fails.
11183 */
11184 static int _sp_sqr_32(sp_int* a, sp_int* r)
11185 {
11186 int err = MP_OKAY;
11187 int i;
11188 sp_int_digit l;
11189 sp_int_digit h;
11190 sp_int* z0;
11191 sp_int* z1;
11192 sp_int* z2;
11193 sp_int_digit ca;
11194 DECL_SP_INT(a1, 16);
11195 DECL_SP_INT_ARRAY(z, 33, 2);
11196
11197 ALLOC_SP_INT(a1, 16, err, NULL);
11198 ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
11199 if (err == MP_OKAY) {
11200 z1 = z[0];
11201 z2 = z[1];
11202 z0 = r;
11203
11204 XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
11205 a1->used = 16;
11206
11207 /* z2 = a1 ^ 2 */
11208 err = _sp_sqr_16(a1, z2);
11209 }
11210 if (err == MP_OKAY) {
11211 l = 0;
11212 h = 0;
11213 for (i = 0; i < 16; i++) {
11214 SP_ASM_ADDC(l, h, a1->dp[i]);
11215 SP_ASM_ADDC(l, h, a->dp[i]);
11216 a1->dp[i] = l;
11217 l = h;
11218 h = 0;
11219 }
11220 ca = l;
11221
11222 /* z0 = a0 ^ 2 */
11223 err = _sp_sqr_16(a, z0);
11224 }
11225 if (err == MP_OKAY) {
11226 /* z1 = (a0 + a1) ^ 2 */
11227 err = _sp_sqr_16(a1, z1);
11228 }
11229 if (err == MP_OKAY) {
11230 /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
11231 /* r = z0 */
11232 /* r += (z1 - z0 - z2) << 16 */
11233 z1->dp[32] = ca;
11234 l = 0;
11235 if (ca) {
11236 l = z1->dp[0 + 16];
11237 h = 0;
11238 SP_ASM_ADDC(l, h, a1->dp[0]);
11239 SP_ASM_ADDC(l, h, a1->dp[0]);
11240 z1->dp[0 + 16] = l;
11241 l = h;
11242 h = 0;
11243 for (i = 1; i < 16; i++) {
11244 SP_ASM_ADDC(l, h, z1->dp[i + 16]);
11245 SP_ASM_ADDC(l, h, a1->dp[i]);
11246 SP_ASM_ADDC(l, h, a1->dp[i]);
11247 z1->dp[i + 16] = l;
11248 l = h;
11249 h = 0;
11250 }
11251 }
11252 z1->dp[32] += l;
11253 /* z1 = z1 - z0 - z1 */
11254 l = z1->dp[0];
11255 h = 0;
11256 SP_ASM_SUBC(l, h, z0->dp[0]);
11257 SP_ASM_SUBC(l, h, z2->dp[0]);
11258 z1->dp[0] = l;
11259 l = h;
11260 h = 0;
11261 for (i = 1; i < 32; i++) {
11262 l += z1->dp[i];
11263 SP_ASM_SUBC(l, h, z0->dp[i]);
11264 SP_ASM_SUBC(l, h, z2->dp[i]);
11265 z1->dp[i] = l;
11266 l = h;
11267 h = 0;
11268 }
11269 z1->dp[i] += l;
11270 /* r += z1 << 16 */
11271 l = 0;
11272 h = 0;
11273 for (i = 0; i < 16; i++) {
11274 SP_ASM_ADDC(l, h, r->dp[i + 16]);
11275 SP_ASM_ADDC(l, h, z1->dp[i]);
11276 r->dp[i + 16] = l;
11277 l = h;
11278 h = 0;
11279 }
11280 for (; i < 33; i++) {
11281 SP_ASM_ADDC(l, h, z1->dp[i]);
11282 r->dp[i + 16] = l;
11283 l = h;
11284 h = 0;
11285 }
11286 /* r += z2 << 32 */
11287 l = 0;
11288 h = 0;
11289 for (i = 0; i < 17; i++) {
11290 SP_ASM_ADDC(l, h, r->dp[i + 32]);
11291 SP_ASM_ADDC(l, h, z2->dp[i]);
11292 r->dp[i + 32] = l;
11293 l = h;
11294 h = 0;
11295 }
11296 for (; i < 32; i++) {
11297 SP_ASM_ADDC(l, h, z2->dp[i]);
11298 r->dp[i + 32] = l;
11299 l = h;
11300 h = 0;
11301 }
11302 r->used = 64;
11303 sp_clamp(r);
11304 }
11305
11306 FREE_SP_INT_ARRAY(z, NULL);
11307 FREE_SP_INT(a1, NULL);
11308 return err;
11309 }
11310 #endif /* SP_INT_DIGITS >= 64 */
11311
11312 #if SP_INT_DIGITS >= 96
11313 /* Square a and store in r. r = a * a
11314 *
11315 * @param [in] a SP integer to square.
11316 * @param [out] r SP integer result.
11317 *
11318 * @return MP_OKAY on success.
11319 * @return MP_MEM when dynamic memory allocation fails.
11320 */
11321 static int _sp_sqr_48(sp_int* a, sp_int* r)
11322 {
11323 int err = MP_OKAY;
11324 int i;
11325 sp_int_digit l;
11326 sp_int_digit h;
11327 sp_int* z0;
11328 sp_int* z1;
11329 sp_int* z2;
11330 sp_int_digit ca;
11331 DECL_SP_INT(a1, 24);
11332 DECL_SP_INT_ARRAY(z, 49, 2);
11333
11334 ALLOC_SP_INT(a1, 24, err, NULL);
11335 ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
11336 if (err == MP_OKAY) {
11337 z1 = z[0];
11338 z2 = z[1];
11339 z0 = r;
11340
11341 XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
11342 a1->used = 24;
11343
11344 /* z2 = a1 ^ 2 */
11345 err = _sp_sqr_24(a1, z2);
11346 }
11347 if (err == MP_OKAY) {
11348 l = 0;
11349 h = 0;
11350 for (i = 0; i < 24; i++) {
11351 SP_ASM_ADDC(l, h, a1->dp[i]);
11352 SP_ASM_ADDC(l, h, a->dp[i]);
11353 a1->dp[i] = l;
11354 l = h;
11355 h = 0;
11356 }
11357 ca = l;
11358
11359 /* z0 = a0 ^ 2 */
11360 err = _sp_sqr_24(a, z0);
11361 }
11362 if (err == MP_OKAY) {
11363 /* z1 = (a0 + a1) ^ 2 */
11364 err = _sp_sqr_24(a1, z1);
11365 }
11366 if (err == MP_OKAY) {
11367 /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
11368 /* r = z0 */
11369 /* r += (z1 - z0 - z2) << 24 */
11370 z1->dp[48] = ca;
11371 l = 0;
11372 if (ca) {
11373 l = z1->dp[0 + 24];
11374 h = 0;
11375 SP_ASM_ADDC(l, h, a1->dp[0]);
11376 SP_ASM_ADDC(l, h, a1->dp[0]);
11377 z1->dp[0 + 24] = l;
11378 l = h;
11379 h = 0;
11380 for (i = 1; i < 24; i++) {
11381 SP_ASM_ADDC(l, h, z1->dp[i + 24]);
11382 SP_ASM_ADDC(l, h, a1->dp[i]);
11383 SP_ASM_ADDC(l, h, a1->dp[i]);
11384 z1->dp[i + 24] = l;
11385 l = h;
11386 h = 0;
11387 }
11388 }
11389 z1->dp[48] += l;
11390 /* z1 = z1 - z0 - z1 */
11391 l = z1->dp[0];
11392 h = 0;
11393 SP_ASM_SUBC(l, h, z0->dp[0]);
11394 SP_ASM_SUBC(l, h, z2->dp[0]);
11395 z1->dp[0] = l;
11396 l = h;
11397 h = 0;
11398 for (i = 1; i < 48; i++) {
11399 l += z1->dp[i];
11400 SP_ASM_SUBC(l, h, z0->dp[i]);
11401 SP_ASM_SUBC(l, h, z2->dp[i]);
11402 z1->dp[i] = l;
11403 l = h;
11404 h = 0;
11405 }
11406 z1->dp[i] += l;
11407 /* r += z1 << 16 */
11408 l = 0;
11409 h = 0;
11410 for (i = 0; i < 24; i++) {
11411 SP_ASM_ADDC(l, h, r->dp[i + 24]);
11412 SP_ASM_ADDC(l, h, z1->dp[i]);
11413 r->dp[i + 24] = l;
11414 l = h;
11415 h = 0;
11416 }
11417 for (; i < 49; i++) {
11418 SP_ASM_ADDC(l, h, z1->dp[i]);
11419 r->dp[i + 24] = l;
11420 l = h;
11421 h = 0;
11422 }
11423 /* r += z2 << 48 */
11424 l = 0;
11425 h = 0;
11426 for (i = 0; i < 25; i++) {
11427 SP_ASM_ADDC(l, h, r->dp[i + 48]);
11428 SP_ASM_ADDC(l, h, z2->dp[i]);
11429 r->dp[i + 48] = l;
11430 l = h;
11431 h = 0;
11432 }
11433 for (; i < 48; i++) {
11434 SP_ASM_ADDC(l, h, z2->dp[i]);
11435 r->dp[i + 48] = l;
11436 l = h;
11437 h = 0;
11438 }
11439 r->used = 96;
11440 sp_clamp(r);
11441 }
11442
11443 FREE_SP_INT_ARRAY(z, NULL);
11444 FREE_SP_INT(a1, NULL);
11445 return err;
11446 }
11447 #endif /* SP_INT_DIGITS >= 96 */
11448
11449 #if SP_INT_DIGITS >= 128
11450 /* Square a and store in r. r = a * a
11451 *
11452 * @param [in] a SP integer to square.
11453 * @param [out] r SP integer result.
11454 *
11455 * @return MP_OKAY on success.
11456 * @return MP_MEM when dynamic memory allocation fails.
11457 */
11458 static int _sp_sqr_64(sp_int* a, sp_int* r)
11459 {
11460 int err = MP_OKAY;
11461 int i;
11462 sp_int_digit l;
11463 sp_int_digit h;
11464 sp_int* z0;
11465 sp_int* z1;
11466 sp_int* z2;
11467 sp_int_digit ca;
11468 DECL_SP_INT(a1, 32);
11469 DECL_SP_INT_ARRAY(z, 65, 2);
11470
11471 ALLOC_SP_INT(a1, 32, err, NULL);
11472 ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
11473 if (err == MP_OKAY) {
11474 z1 = z[0];
11475 z2 = z[1];
11476 z0 = r;
11477
11478 XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
11479 a1->used = 32;
11480
11481 /* z2 = a1 ^ 2 */
11482 err = _sp_sqr_32(a1, z2);
11483 }
11484 if (err == MP_OKAY) {
11485 l = 0;
11486 h = 0;
11487 for (i = 0; i < 32; i++) {
11488 SP_ASM_ADDC(l, h, a1->dp[i]);
11489 SP_ASM_ADDC(l, h, a->dp[i]);
11490 a1->dp[i] = l;
11491 l = h;
11492 h = 0;
11493 }
11494 ca = l;
11495
11496 /* z0 = a0 ^ 2 */
11497 err = _sp_sqr_32(a, z0);
11498 }
11499 if (err == MP_OKAY) {
11500 /* z1 = (a0 + a1) ^ 2 */
11501 err = _sp_sqr_32(a1, z1);
11502 }
11503 if (err == MP_OKAY) {
11504 /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
11505 /* r = z0 */
11506 /* r += (z1 - z0 - z2) << 32 */
11507 z1->dp[64] = ca;
11508 l = 0;
11509 if (ca) {
11510 l = z1->dp[0 + 32];
11511 h = 0;
11512 SP_ASM_ADDC(l, h, a1->dp[0]);
11513 SP_ASM_ADDC(l, h, a1->dp[0]);
11514 z1->dp[0 + 32] = l;
11515 l = h;
11516 h = 0;
11517 for (i = 1; i < 32; i++) {
11518 SP_ASM_ADDC(l, h, z1->dp[i + 32]);
11519 SP_ASM_ADDC(l, h, a1->dp[i]);
11520 SP_ASM_ADDC(l, h, a1->dp[i]);
11521 z1->dp[i + 32] = l;
11522 l = h;
11523 h = 0;
11524 }
11525 }
11526 z1->dp[64] += l;
11527 /* z1 = z1 - z0 - z1 */
11528 l = z1->dp[0];
11529 h = 0;
11530 SP_ASM_SUBC(l, h, z0->dp[0]);
11531 SP_ASM_SUBC(l, h, z2->dp[0]);
11532 z1->dp[0] = l;
11533 l = h;
11534 h = 0;
11535 for (i = 1; i < 64; i++) {
11536 l += z1->dp[i];
11537 SP_ASM_SUBC(l, h, z0->dp[i]);
11538 SP_ASM_SUBC(l, h, z2->dp[i]);
11539 z1->dp[i] = l;
11540 l = h;
11541 h = 0;
11542 }
11543 z1->dp[i] += l;
11544 /* r += z1 << 16 */
11545 l = 0;
11546 h = 0;
11547 for (i = 0; i < 32; i++) {
11548 SP_ASM_ADDC(l, h, r->dp[i + 32]);
11549 SP_ASM_ADDC(l, h, z1->dp[i]);
11550 r->dp[i + 32] = l;
11551 l = h;
11552 h = 0;
11553 }
11554 for (; i < 65; i++) {
11555 SP_ASM_ADDC(l, h, z1->dp[i]);
11556 r->dp[i + 32] = l;
11557 l = h;
11558 h = 0;
11559 }
11560 /* r += z2 << 64 */
11561 l = 0;
11562 h = 0;
11563 for (i = 0; i < 33; i++) {
11564 SP_ASM_ADDC(l, h, r->dp[i + 64]);
11565 SP_ASM_ADDC(l, h, z2->dp[i]);
11566 r->dp[i + 64] = l;
11567 l = h;
11568 h = 0;
11569 }
11570 for (; i < 64; i++) {
11571 SP_ASM_ADDC(l, h, z2->dp[i]);
11572 r->dp[i + 64] = l;
11573 l = h;
11574 h = 0;
11575 }
11576 r->used = 128;
11577 sp_clamp(r);
11578 }
11579
11580 FREE_SP_INT_ARRAY(z, NULL);
11581 FREE_SP_INT(a1, NULL);
11582 return err;
11583 }
11584 #endif /* SP_INT_DIGITS >= 128 */
11585
11586 #if SP_INT_DIGITS >= 192
11587 /* Square a and store in r. r = a * a
11588 *
11589 * @param [in] a SP integer to square.
11590 * @param [out] r SP integer result.
11591 *
11592 * @return MP_OKAY on success.
11593 * @return MP_MEM when dynamic memory allocation fails.
11594 */
11595 static int _sp_sqr_96(sp_int* a, sp_int* r)
11596 {
11597 int err = MP_OKAY;
11598 int i;
11599 sp_int_digit l;
11600 sp_int_digit h;
11601 sp_int* z0;
11602 sp_int* z1;
11603 sp_int* z2;
11604 sp_int_digit ca;
11605 DECL_SP_INT(a1, 48);
11606 DECL_SP_INT_ARRAY(z, 97, 2);
11607
11608 ALLOC_SP_INT(a1, 48, err, NULL);
11609 ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
11610 if (err == MP_OKAY) {
11611 z1 = z[0];
11612 z2 = z[1];
11613 z0 = r;
11614
11615 XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
11616 a1->used = 48;
11617
11618 /* z2 = a1 ^ 2 */
11619 err = _sp_sqr_48(a1, z2);
11620 }
11621 if (err == MP_OKAY) {
11622 l = 0;
11623 h = 0;
11624 for (i = 0; i < 48; i++) {
11625 SP_ASM_ADDC(l, h, a1->dp[i]);
11626 SP_ASM_ADDC(l, h, a->dp[i]);
11627 a1->dp[i] = l;
11628 l = h;
11629 h = 0;
11630 }
11631 ca = l;
11632
11633 /* z0 = a0 ^ 2 */
11634 err = _sp_sqr_48(a, z0);
11635 }
11636 if (err == MP_OKAY) {
11637 /* z1 = (a0 + a1) ^ 2 */
11638 err = _sp_sqr_48(a1, z1);
11639 }
11640 if (err == MP_OKAY) {
11641 /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
11642 /* r = z0 */
11643 /* r += (z1 - z0 - z2) << 48 */
11644 z1->dp[96] = ca;
11645 l = 0;
11646 if (ca) {
11647 l = z1->dp[0 + 48];
11648 h = 0;
11649 SP_ASM_ADDC(l, h, a1->dp[0]);
11650 SP_ASM_ADDC(l, h, a1->dp[0]);
11651 z1->dp[0 + 48] = l;
11652 l = h;
11653 h = 0;
11654 for (i = 1; i < 48; i++) {
11655 SP_ASM_ADDC(l, h, z1->dp[i + 48]);
11656 SP_ASM_ADDC(l, h, a1->dp[i]);
11657 SP_ASM_ADDC(l, h, a1->dp[i]);
11658 z1->dp[i + 48] = l;
11659 l = h;
11660 h = 0;
11661 }
11662 }
11663 z1->dp[96] += l;
11664 /* z1 = z1 - z0 - z1 */
11665 l = z1->dp[0];
11666 h = 0;
11667 SP_ASM_SUBC(l, h, z0->dp[0]);
11668 SP_ASM_SUBC(l, h, z2->dp[0]);
11669 z1->dp[0] = l;
11670 l = h;
11671 h = 0;
11672 for (i = 1; i < 96; i++) {
11673 l += z1->dp[i];
11674 SP_ASM_SUBC(l, h, z0->dp[i]);
11675 SP_ASM_SUBC(l, h, z2->dp[i]);
11676 z1->dp[i] = l;
11677 l = h;
11678 h = 0;
11679 }
11680 z1->dp[i] += l;
11681 /* r += z1 << 16 */
11682 l = 0;
11683 h = 0;
11684 for (i = 0; i < 48; i++) {
11685 SP_ASM_ADDC(l, h, r->dp[i + 48]);
11686 SP_ASM_ADDC(l, h, z1->dp[i]);
11687 r->dp[i + 48] = l;
11688 l = h;
11689 h = 0;
11690 }
11691 for (; i < 97; i++) {
11692 SP_ASM_ADDC(l, h, z1->dp[i]);
11693 r->dp[i + 48] = l;
11694 l = h;
11695 h = 0;
11696 }
11697 /* r += z2 << 96 */
11698 l = 0;
11699 h = 0;
11700 for (i = 0; i < 49; i++) {
11701 SP_ASM_ADDC(l, h, r->dp[i + 96]);
11702 SP_ASM_ADDC(l, h, z2->dp[i]);
11703 r->dp[i + 96] = l;
11704 l = h;
11705 h = 0;
11706 }
11707 for (; i < 96; i++) {
11708 SP_ASM_ADDC(l, h, z2->dp[i]);
11709 r->dp[i + 96] = l;
11710 l = h;
11711 h = 0;
11712 }
11713 r->used = 192;
11714 sp_clamp(r);
11715 }
11716
11717 FREE_SP_INT_ARRAY(z, NULL);
11718 FREE_SP_INT(a1, NULL);
11719 return err;
11720 }
11721 #endif /* SP_INT_DIGITS >= 192 */
11722
11723 #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
11724 #endif /* !WOLFSSL_SP_SMALL */
11725
11726 /* Square a and store in r. r = a * a
11727 *
11728 * @param [in] a SP integer to square.
11729 * @param [out] r SP integer result.
11730 *
11731 * @return MP_OKAY on success.
11732 * @return MP_VAL when a or r is NULL, or the result will be too big for fixed
11733 * data length.
11734 * @return MP_MEM when dynamic memory allocation fails.
11735 */
11736 int sp_sqr(sp_int* a, sp_int* r)
11737 {
11738 #if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
11739 return sp_mul(a, a, r);
11740 #else
11741 int err = MP_OKAY;
11742
11743 if ((a == NULL) || (r == NULL)) {
11744 err = MP_VAL;
11745 }
11746 /* Need extra digit during calculation. */
11747 if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
11748 err = MP_VAL;
11749 }
11750
11751 #if 0
11752 if (err == MP_OKAY) {
11753 sp_print(a, "a");
11754 }
11755 #endif
11756
11757 if (err == MP_OKAY) {
11758 if (a->used == 0) {
11759 _sp_zero(r);
11760 }
11761 else
11762 #ifndef WOLFSSL_SP_SMALL
11763 #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
11764 #if SP_WORD_SIZE == 64
11765 if (a->used == 4) {
11766 err = _sp_sqr_4(a, r);
11767 }
11768 else
11769 #endif /* SP_WORD_SIZE == 64 */
11770 #if SP_WORD_SIZE == 64
11771 #ifdef SQR_MUL_ASM
11772 if (a->used == 6) {
11773 err = _sp_sqr_6(a, r);
11774 }
11775 else
11776 #endif /* SQR_MUL_ASM */
11777 #endif /* SP_WORD_SIZE == 64 */
11778 #if SP_WORD_SIZE == 32
11779 #ifdef SQR_MUL_ASM
11780 if (a->used == 8) {
11781 err = _sp_sqr_8(a, r);
11782 }
11783 else
11784 #endif /* SQR_MUL_ASM */
11785 #endif /* SP_WORD_SIZE == 32 */
11786 #if SP_WORD_SIZE == 32
11787 #ifdef SQR_MUL_ASM
11788 if (a->used == 12) {
11789 err = _sp_sqr_12(a, r);
11790 }
11791 else
11792 #endif /* SQR_MUL_ASM */
11793 #endif /* SP_WORD_SIZE == 32 */
11794 #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
11795 #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
11796 #if SP_INT_DIGITS >= 32
11797 if (a->used == 16) {
11798 err = _sp_sqr_16(a, r);
11799 }
11800 else
11801 #endif /* SP_INT_DIGITS >= 32 */
11802 #if SP_INT_DIGITS >= 48
11803 if (a->used == 24) {
11804 err = _sp_sqr_24(a, r);
11805 }
11806 else
11807 #endif /* SP_INT_DIGITS >= 48 */
11808 #if SP_INT_DIGITS >= 64
11809 if (a->used == 32) {
11810 err = _sp_sqr_32(a, r);
11811 }
11812 else
11813 #endif /* SP_INT_DIGITS >= 64 */
11814 #if SP_INT_DIGITS >= 96
11815 if (a->used == 48) {
11816 err = _sp_sqr_48(a, r);
11817 }
11818 else
11819 #endif /* SP_INT_DIGITS >= 96 */
11820 #if SP_INT_DIGITS >= 128
11821 if (a->used == 64) {
11822 err = _sp_sqr_64(a, r);
11823 }
11824 else
11825 #endif /* SP_INT_DIGITS >= 128 */
11826 #if SP_INT_DIGITS >= 192
11827 if (a->used == 96) {
11828 err = _sp_sqr_96(a, r);
11829 }
11830 else
11831 #endif /* SP_INT_DIGITS >= 192 */
11832 #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
11833 #endif /* !WOLFSSL_SP_SMALL */
11834 {
11835 err = _sp_sqr(a, r);
11836 }
11837 }
11838
11839 #ifdef WOLFSSL_SP_INT_NEGATIVE
11840 if (err == MP_OKAY) {
11841 r->sign = MP_ZPOS;
11842 }
11843 #endif
11844
11845 #if 0
11846 if (err == MP_OKAY) {
11847 sp_print(r, "rsqr");
11848 }
11849 #endif
11850
11851 return err;
11852 #endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
11853 }
11854 /* END SP_SQR implementations */
11855
11856 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
11857 * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
11858
11859 #if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
11860 /* Square a mod m and store in r: r = (a * a) mod m
11861 *
11862 * @param [in] a SP integer to square.
11863 * @param [in] m SP integer that is the modulus.
11864 * @param [out] r SP integer result.
11865 *
11866 * @return MP_OKAY on success.
11867 * @return MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
11868 * for fixed data length.
11869 * @return MP_MEM when dynamic memory allocation fails.
11870 */
11871 int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r)
11872 {
11873 int err = MP_OKAY;
11874
11875 if ((a == NULL) || (m == NULL) || (r == NULL)) {
11876 err = MP_VAL;
11877 }
11878 if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
11879 err = MP_VAL;
11880 }
11881
11882 if (err == MP_OKAY) {
11883 err = sp_sqr(a, r);
11884 }
11885 if (err == MP_OKAY) {
11886 err = sp_mod(r, m, r);
11887 }
11888
11889 return err;
11890 }
11891 #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
11892
11893 /**********************
11894 * Montgomery functions
11895 **********************/
11896
11897 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
11898 defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
11899 /* Reduce a number in montgomery form.
11900 *
11901 * Assumes a and m are not NULL and m is not 0.
11902 *
11903 * @param [in,out] a SP integer to Montgomery reduce.
11904 * @param [in] m SP integer that is the modulus.
11905 * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
11906 *
11907 * @return MP_OKAY on success.
11908 */
11909 static int _sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
11910 {
11911 #if !defined(SQR_MUL_ASM)
11912 int i;
11913 int bits;
11914 sp_int_word w;
11915 sp_int_digit mu;
11916
11917 if (0) {
11918 sp_print(a, "a");
11919 sp_print(m, "m");
11920 }
11921
11922 bits = sp_count_bits(m);
11923
11924 for (i = a->used; i < m->used * 2; i++) {
11925 a->dp[i] = 0;
11926 }
11927
11928 if (m->used == 1) {
11929 mu = mp * a->dp[0];
11930 w = a->dp[0];
11931 w += (sp_int_word)mu * m->dp[0];
11932 a->dp[0] = (sp_int_digit)w;
11933 w >>= SP_WORD_SIZE;
11934 w += a->dp[1];
11935 a->dp[1] = (sp_int_digit)w;
11936 w >>= SP_WORD_SIZE;
11937 a->dp[2] = (sp_int_digit)w;
11938 a->used = 3;
11939 /* mp is SP_WORD_SIZE */
11940 bits = SP_WORD_SIZE;
11941 }
11942 else {
11943 sp_int_digit mask = (sp_int_digit)
11944 ((1UL << (bits & (SP_WORD_SIZE - 1))) - 1);
11945 sp_int_word o = 0;
11946 for (i = 0; i < m->used; i++) {
11947 int j;
11948
11949 mu = mp * a->dp[i];
11950 if ((i == m->used - 1) && (mask != 0)) {
11951 mu &= mask;
11952 }
11953 w = a->dp[i];
11954 w += (sp_int_word)mu * m->dp[0];
11955 a->dp[i] = (sp_int_digit)w;
11956 w >>= SP_WORD_SIZE;
11957 for (j = 1; j < m->used - 1; j++) {
11958 w += a->dp[i + j];
11959 w += (sp_int_word)mu * m->dp[j];
11960 a->dp[i + j] = (sp_int_digit)w;
11961 w >>= SP_WORD_SIZE;
11962 }
11963 w += o;
11964 w += a->dp[i + j];
11965 o = (sp_int_digit)(w >> SP_WORD_SIZE);
11966 w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
11967 a->dp[i + j] = (sp_int_digit)w;
11968 w >>= SP_WORD_SIZE;
11969 o += w;
11970 }
11971 o += a->dp[m->used * 2 - 1];
11972 a->dp[m->used * 2 - 1] = (sp_int_digit)o;
11973 o >>= SP_WORD_SIZE;
11974 a->dp[m->used * 2] = (sp_int_digit)o;
11975 a->used = m->used * 2 + 1;
11976 }
11977
11978 sp_clamp(a);
11979 sp_rshb(a, bits, a);
11980
11981 if (_sp_cmp(a, m) != MP_LT) {
11982 _sp_sub_off(a, m, a, 0);
11983 }
11984
11985 if (0) {
11986 sp_print(a, "rr");
11987 }
11988
11989 return MP_OKAY;
11990 #else /* !SQR_MUL_ASM */
11991 int i;
11992 int j;
11993 int bits;
11994 sp_int_digit mu;
11995 sp_int_digit o;
11996 sp_int_digit mask;
11997
11998 bits = sp_count_bits(m);
11999 mask = (1UL << (bits & (SP_WORD_SIZE - 1))) - 1;
12000
12001 for (i = a->used; i < m->used * 2; i++) {
12002 a->dp[i] = 0;
12003 }
12004
12005 if (m->used <= 1) {
12006 sp_int_word w;
12007
12008 mu = mp * a->dp[0];
12009 w = a->dp[0];
12010 w += (sp_int_word)mu * m->dp[0];
12011 a->dp[0] = w;
12012 w >>= SP_WORD_SIZE;
12013 w += a->dp[1];
12014 a->dp[1] = w;
12015 w >>= SP_WORD_SIZE;
12016 a->dp[2] = w;
12017 a->used = m->used * 2 + 1;
12018 /* mp is SP_WORD_SIZE */
12019 bits = SP_WORD_SIZE;
12020 }
12021 #ifndef WOLFSSL_HAVE_SP_ECC
12022 #if SP_WORD_SIZE == 64
12023 else if (m->used == 4) {
12024 sp_int_digit l;
12025 sp_int_digit h;
12026 sp_int_digit o2;
12027
12028 l = 0;
12029 h = 0;
12030 o = 0;
12031 o2 = 0;
12032 for (i = 0; i < 4; i++) {
12033 mu = mp * a->dp[i];
12034 if ((i == 3) && (mask != 0)) {
12035 mu &= mask;
12036 }
12037 l = a->dp[i];
12038 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
12039 a->dp[i] = l;
12040 l = h;
12041 h = 0;
12042 SP_ASM_ADDC(l, h, a->dp[i + 1]);
12043 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
12044 a->dp[i + 1] = l;
12045 l = h;
12046 h = 0;
12047 SP_ASM_ADDC(l, h, a->dp[i + 2]);
12048 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
12049 a->dp[i + 2] = l;
12050 l = h;
12051 h = o2;
12052 o2 = 0;
12053 SP_ASM_ADDC_REG(l, h, o);
12054 SP_ASM_ADDC(l, h, a->dp[i + 3]);
12055 SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
12056 a->dp[i + 3] = l;
12057 o = h;
12058 l = h;
12059 h = 0;
12060 }
12061 h = o2;
12062 SP_ASM_ADDC(l, h, a->dp[7]);
12063 a->dp[7] = l;
12064 a->dp[8] = h;
12065 a->used = 9;
12066 }
12067 else if (m->used == 6) {
12068 sp_int_digit l;
12069 sp_int_digit h;
12070 sp_int_digit o2;
12071
12072 l = 0;
12073 h = 0;
12074 o = 0;
12075 o2 = 0;
12076 for (i = 0; i < 6; i++) {
12077 mu = mp * a->dp[i];
12078 if ((i == 5) && (mask != 0)) {
12079 mu &= mask;
12080 }
12081 l = a->dp[i];
12082 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
12083 a->dp[i] = l;
12084 l = h;
12085 h = 0;
12086 SP_ASM_ADDC(l, h, a->dp[i + 1]);
12087 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
12088 a->dp[i + 1] = l;
12089 l = h;
12090 h = 0;
12091 SP_ASM_ADDC(l, h, a->dp[i + 2]);
12092 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
12093 a->dp[i + 2] = l;
12094 l = h;
12095 h = 0;
12096 SP_ASM_ADDC(l, h, a->dp[i + 3]);
12097 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
12098 a->dp[i + 3] = l;
12099 l = h;
12100 h = 0;
12101 SP_ASM_ADDC(l, h, a->dp[i + 4]);
12102 SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
12103 a->dp[i + 4] = l;
12104 l = h;
12105 h = o2;
12106 o2 = 0;
12107 SP_ASM_ADDC_REG(l, h, o);
12108 SP_ASM_ADDC(l, h, a->dp[i + 5]);
12109 SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
12110 a->dp[i + 5] = l;
12111 o = h;
12112 l = h;
12113 h = 0;
12114 }
12115 h = o2;
12116 SP_ASM_ADDC(l, h, a->dp[11]);
12117 a->dp[11] = l;
12118 a->dp[12] = h;
12119 a->used = 13;
12120 }
12121 #endif /* SP_WORD_SIZE == 64 */
12122 #endif /* WOLFSSL_HAVE_SP_ECC */
12123 else {
12124 sp_int_digit l;
12125 sp_int_digit h;
12126 sp_int_digit o2;
12127 sp_int_digit* ad;
12128 sp_int_digit* md;
12129
12130 o = 0;
12131 o2 = 0;
12132 ad = a->dp;
12133 for (i = 0; i < m->used; i++, ad++) {
12134 md = m->dp;
12135 mu = mp * ad[0];
12136 if ((i == m->used - 1) && (mask != 0)) {
12137 mu &= mask;
12138 }
12139 l = ad[0];
12140 h = 0;
12141 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
12142 ad[0] = l;
12143 l = h;
12144 for (j = 1; j + 1 < m->used - 1; j += 2) {
12145 h = 0;
12146 SP_ASM_ADDC(l, h, ad[j + 0]);
12147 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
12148 ad[j + 0] = l;
12149 l = 0;
12150 SP_ASM_ADDC(h, l, ad[j + 1]);
12151 SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
12152 ad[j + 1] = h;
12153 }
12154 for (; j < m->used - 1; j++) {
12155 h = 0;
12156 SP_ASM_ADDC(l, h, ad[j]);
12157 SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
12158 ad[j] = l;
12159 l = h;
12160 }
12161 h = o2;
12162 o2 = 0;
12163 SP_ASM_ADDC_REG(l, h, o);
12164 SP_ASM_ADDC(l, h, ad[j]);
12165 SP_ASM_MUL_ADD(l, h, o2, mu, *md);
12166 ad[j] = l;
12167 o = h;
12168 }
12169 l = o;
12170 h = o2;
12171 SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
12172 a->dp[m->used * 2 - 1] = l;
12173 a->dp[m->used * 2] = h;
12174 a->used = m->used * 2 + 1;
12175 }
12176
12177 sp_clamp(a);
12178 sp_rshb(a, bits, a);
12179
12180 if (_sp_cmp(a, m) != MP_LT) {
12181 sp_sub(a, m, a);
12182 }
12183
12184 return MP_OKAY;
12185 #endif /* !SQR_MUL_ASM */
12186 }
12187
12188 #ifndef WOLFSSL_RSA_VERIFY_ONLY
12189 /* Reduce a number in montgomery form.
12190 *
12191 * @param [in,out] a SP integer to Montgomery reduce.
12192 * @param [in] m SP integer that is the modulus.
12193 * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
12194 *
12195 * @return MP_OKAY on success.
12196 * @return MP_VAL when a or m is NULL or m is zero.
12197 */
12198 int sp_mont_red(sp_int* a, sp_int* m, sp_int_digit mp)
12199 {
12200 int err;
12201
12202 if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
12203 err = MP_VAL;
12204 }
12205 else if (a->size < m->used * 2 + 1) {
12206 err = MP_VAL;
12207 }
12208 else {
12209 err = _sp_mont_red(a, m, mp);
12210 }
12211
12212 return err;
12213 }
12214 #endif
12215
12216 /* Calculate the bottom digit of the inverse of negative m.
12217 *
12218 * Used when performing Montgomery Reduction.
12219 *
12220 * @param [in] m SP integer that is the modulus.
12221 * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
12222 *
12223 * @return MP_OKAY on success.
12224 * @return MP_VAL when m or rho is NULL.
12225 */
12226 int sp_mont_setup(sp_int* m, sp_int_digit* rho)
12227 {
12228 int err = MP_OKAY;
12229
12230 if ((m == NULL) || (rho == NULL)) {
12231 err = MP_VAL;
12232 }
12233 if ((err == MP_OKAY) && !sp_isodd(m)) {
12234 err = MP_VAL;
12235 }
12236
12237 if (err == MP_OKAY) {
12238 sp_int_digit x;
12239 sp_int_digit b;
12240
12241 b = m->dp[0];
12242 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
12243 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
12244 #if SP_WORD_SIZE >= 16
12245 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
12246 #if SP_WORD_SIZE >= 32
12247 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
12248 #if SP_WORD_SIZE >= 64
12249 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
12250 #endif /* SP_WORD_SIZE >= 64 */
12251 #endif /* SP_WORD_SIZE >= 32 */
12252 #endif /* SP_WORD_SIZE >= 16 */
12253
12254 /* rho = -1/m mod b */
12255 *rho = -x;
12256 }
12257
12258 return err;
12259 }
12260
12261 /* Calculate the normalization value of m.
12262 * norm = 2^k - m, where k is the number of bits in m
12263 *
12264 * @param [out] norm SP integer that normalises numbers into Montgomery
12265 * form.
12266 * @param [in] m SP integer that is the modulus.
12267 *
12268 * @return MP_OKAY on success.
12269 * @return MP_VAL when norm or m is NULL, or number of bits in m is maximual.
12270 */
12271 int sp_mont_norm(sp_int* norm, sp_int* m)
12272 {
12273 int err = MP_OKAY;
12274 int bits = 0;
12275
12276 if ((norm == NULL) || (m == NULL)) {
12277 err = MP_VAL;
12278 }
12279 if (err == MP_OKAY) {
12280 bits = sp_count_bits(m);
12281 if (bits == m->size * SP_WORD_SIZE) {
12282 err = MP_VAL;
12283 }
12284 }
12285 if (err == MP_OKAY) {
12286 if (bits < SP_WORD_SIZE) {
12287 bits = SP_WORD_SIZE;
12288 }
12289 _sp_zero(norm);
12290 sp_set_bit(norm, bits);
12291 err = sp_sub(norm, m, norm);
12292 }
12293 if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
12294 norm->dp[0] %= m->dp[0];
12295 }
12296 if (err == MP_OKAY) {
12297 sp_clamp(norm);
12298 }
12299
12300 return err;
12301 }
12302 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
12303 * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
12304
12305 /*********************************
12306 * To and from binary and strings.
12307 *********************************/
12308
12309 /* Calculate the number of 8-bit values required to represent the
12310 * multi-precision number.
12311 *
12312 * When a is NULL, return s 0.
12313 *
12314 * @param [in] a SP integer.
12315 *
12316 * @return The count of 8-bit values.
12317 */
12318 int sp_unsigned_bin_size(const sp_int* a)
12319 {
12320 int cnt = 0;
12321
12322 if (a != NULL) {
12323 cnt = (sp_count_bits(a) + 7) / 8;
12324 }
12325
12326 return cnt;
12327 }
12328
12329 /* Convert a number as an array of bytes in big-endian format to a
12330 * multi-precision number.
12331 *
12332 * @param [out] a SP integer.
12333 * @param [in] in Array of bytes.
12334 * @param [in] inSz Number of data bytes in array.
12335 *
12336 * @return MP_OKAY on success.
12337 * @return MP_VAL when the number is too big to fit in an SP.
12338 */
12339 int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
12340 {
12341 int err = MP_OKAY;
12342
12343 if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
12344 err = MP_VAL;
12345 }
12346
12347 if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
12348 err = MP_VAL;
12349 }
12350
12351 #ifndef LITTLE_ENDIAN_ORDER
12352 if (err == MP_OKAY) {
12353 int i;
12354 int j;
12355 int s;
12356
12357 a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
12358
12359 #ifndef WOLFSSL_SP_INT_DIGIT_ALIGN
12360 for (i = inSz-1,j = 0; i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF,j++) {
12361 a->dp[j] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
12362 }
12363 #else
12364 for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
12365 a->dp[j] = ((sp_int_digit)in[i - 0] << 0);
12366 #if SP_WORD_SIZE >= 16
12367 a->dp[j] |= ((sp_int_digit)in[i - 1] << 8);
12368 #endif
12369 #if SP_WORD_SIZE >= 32
12370 a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
12371 ((sp_int_digit)in[i - 3] << 24);
12372 #endif
12373 #if SP_WORD_SIZE >= 64
12374 a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
12375 ((sp_int_digit)in[i - 5] << 40) |
12376 ((sp_int_digit)in[i - 6] << 48) |
12377 ((sp_int_digit)in[i - 7] << 56);
12378 #endif
12379 j++;
12380 }
12381 #endif
12382 if (i >= 0) {
12383 a->dp[a->used - 1] = 0;
12384 for (s = 0; i >= 0; i--,s += 8) {
12385 a->dp[j] |= ((sp_int_digit)in[i]) << s;
12386 }
12387 }
12388
12389 sp_clamp(a);
12390 }
12391 #else
12392 if (err == MP_OKAY) {
12393 int i;
12394 int j;
12395
12396 a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
12397
12398 for (i = inSz-1, j = 0; i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
12399 a->dp[j] = ((sp_int_digit)in[i - 0] << 0);
12400 #if SP_WORD_SIZE >= 16
12401 a->dp[j] |= ((sp_int_digit)in[i - 1] << 8);
12402 #endif
12403 #if SP_WORD_SIZE >= 32
12404 a->dp[j] |= ((sp_int_digit)in[i - 2] << 16) |
12405 ((sp_int_digit)in[i - 3] << 24);
12406 #endif
12407 #if SP_WORD_SIZE >= 64
12408 a->dp[j] |= ((sp_int_digit)in[i - 4] << 32) |
12409 ((sp_int_digit)in[i - 5] << 40) |
12410 ((sp_int_digit)in[i - 6] << 48) |
12411 ((sp_int_digit)in[i - 7] << 56);
12412 #endif
12413 j++;
12414 }
12415
12416 #if SP_WORD_SIZE >= 16
12417 if (i >= 0) {
12418 byte *d = (byte*)a->dp;
12419
12420 a->dp[a->used - 1] = 0;
12421 switch (i) {
12422 case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
12423 case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
12424 case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
12425 case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
12426 case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
12427 case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
12428 case 0: d[inSz - 1 - 0] = in[0];
12429 }
12430 }
12431 #endif
12432
12433 sp_clamp(a);
12434 }
12435 #endif /* LITTLE_ENDIAN_ORDER */
12436
12437 return err;
12438 }
12439
12440 /* Convert the multi-precision number to an array of bytes in big-endian format.
12441 *
12442 * The array must be large enough for encoded number - use mp_unsigned_bin_size
12443 * to calculate the number of bytes required.
12444 *
12445 * @param [in] a SP integer.
12446 * @param [out] out Array to put encoding into.
12447 *
12448 * @return MP_OKAY on success.
12449 * @return MP_VAL when a or out is NULL.
12450 */
12451 int sp_to_unsigned_bin(sp_int* a, byte* out)
12452 {
12453 return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
12454 }
12455
12456 /* Convert the multi-precision number to an array of bytes in big-endian format.
12457 *
12458 * The array must be large enough for encoded number - use mp_unsigned_bin_size
12459 * to calculate the number of bytes required.
12460 * Front-pads the output array with zeros make number the size of the array.
12461 *
12462 * @param [in] a SP integer.
12463 * @param [out] out Array to put encoding into.
12464 * @param [in] outSz Size of the array in bytes.
12465 *
12466 * @return MP_OKAY on success.
12467 * @return MP_VAL when a or out is NULL.
12468 */
12469 int sp_to_unsigned_bin_len(sp_int* a, byte* out, int outSz)
12470 {
12471 int err = MP_OKAY;
12472
12473 if ((a == NULL) || (out == NULL)) {
12474 err = MP_VAL;
12475 }
12476 if (err == MP_OKAY) {
12477 int j = outSz - 1;
12478
12479 if (!sp_iszero(a)) {
12480 int i;
12481 for (i = 0; (j >= 0) && (i < a->used); i++) {
12482 int b;
12483 for (b = 0; b < SP_WORD_SIZE; b += 8) {
12484 out[j--] = a->dp[i] >> b;
12485 if (j < 0) {
12486 break;
12487 }
12488 }
12489 }
12490 }
12491 for (; j >= 0; j--) {
12492 out[j] = 0;
12493 }
12494 }
12495
12496 return err;
12497 }
12498
12499 #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
12500 /* Store the number in big-endian format in array at an offset.
12501 * The array must be large enough for encoded number - use mp_unsigned_bin_size
12502 * to calculate the number of bytes required.
12503 *
12504 * @param [in] o Offset into array o start encoding.
12505 * @param [in] a SP integer.
12506 * @param [out] out Array to put encoding into.
12507 *
12508 * @return Index of next byte after data.
12509 * @return MP_VAL when a or out is NULL.
12510 */
12511 int sp_to_unsigned_bin_at_pos(int o, sp_int*a, unsigned char* out)
12512 {
12513 int ret = sp_to_unsigned_bin(a, out + o);
12514
12515 if (ret == MP_OKAY) {
12516 ret = o + sp_unsigned_bin_size(a);
12517 }
12518
12519 return ret;
12520 }
12521 #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
12522
12523 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
12524 defined(HAVE_ECC) || !defined(NO_DSA)
12525 /* Convert hexadecimal number as string in big-endian format to a
12526 * multi-precision number.
12527 *
12528 * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
12529 *
12530 * @param [out] a SP integer.
12531 * @param [in] in NUL terminated string.
12532 *
12533 * @return MP_OKAY on success.
12534 * @return MP_VAL when radix not supported, value is negative, or a character
12535 * is not valid.
12536 */
12537 static int _sp_read_radix_16(sp_int* a, const char* in)
12538 {
12539 int err = MP_OKAY;
12540 int i;
12541 int s = 0;
12542 int j = 0;
12543
12544 #ifdef WOLFSSL_SP_INT_NEGATIVE
12545 if (*in == '-') {
12546 a->sign = MP_NEG;
12547 in++;
12548 }
12549 #endif
12550
12551 while (*in == '0') {
12552 in++;
12553 }
12554
12555 a->dp[0] = 0;
12556 for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
12557 int ch = (int)HexCharToByte(in[i]);
12558 if (ch < 0) {
12559 err = MP_VAL;
12560 break;
12561 }
12562
12563 if (s == SP_WORD_SIZE) {
12564 j++;
12565 if (j >= a->size) {
12566 err = MP_VAL;
12567 break;
12568 }
12569 s = 0;
12570 a->dp[j] = 0;
12571 }
12572
12573 a->dp[j] |= ((sp_int_digit)ch) << s;
12574 s += 4;
12575 }
12576
12577 if (err == MP_OKAY) {
12578 a->used = j + 1;
12579 sp_clamp(a);
12580 #ifdef WOLFSSL_SP_INT_NEGATIVE
12581 if (sp_iszero(a)) {
12582 a->sign = MP_ZPOS;
12583 }
12584 #endif
12585 }
12586 return err;
12587 }
12588 #endif /* (WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
12589
12590 #ifdef WOLFSSL_SP_READ_RADIX_10
12591 /* Convert decimal number as string in big-endian format to a multi-precision
12592 * number.
12593 *
12594 * Negative values supported when compiled with WOLFSSL_SP_INT_NEGATIVE.
12595 *
12596 * @param [out] a SP integer.
12597 * @param [in] in NUL terminated string.
12598 *
12599 * @return MP_OKAY on success.
12600 * @return MP_VAL when radix not supported, value is negative, or a character
12601 * is not valid.
12602 */
12603 static int _sp_read_radix_10(sp_int* a, const char* in)
12604 {
12605 int err = MP_OKAY;
12606 int i;
12607 int len;
12608 char ch;
12609
12610 _sp_zero(a);
12611 #ifdef WOLFSSL_SP_INT_NEGATIVE
12612 if (*in == '-') {
12613 a->sign = MP_NEG;
12614 in++;
12615 }
12616 #endif /* WOLFSSL_SP_INT_NEGATIVE */
12617
12618 while (*in == '0') {
12619 in++;
12620 }
12621
12622 len = (int)XSTRLEN(in);
12623 for (i = 0; i < len; i++) {
12624 ch = in[i];
12625 if ((ch >= '0') && (ch <= '9')) {
12626 ch -= '0';
12627 }
12628 else {
12629 err = MP_VAL;
12630 break;
12631 }
12632 err = _sp_mul_d(a, 10, a, 0);
12633 if (err != MP_OKAY) {
12634 break;
12635 }
12636 (void)_sp_add_d(a, ch, a);
12637 }
12638 #ifdef WOLFSSL_SP_INT_NEGATIVE
12639 if ((err == MP_OKAY) && sp_iszero(a)) {
12640 a->sign = MP_ZPOS;
12641 }
12642 #endif
12643
12644 return err;
12645 }
12646 #endif /* WOLFSSL_SP_READ_RADIX_10 */
12647
12648 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
12649 !defined(WOLFSSL_RSA_VERIFY_ONLY)) || defined(HAVE_ECC) || !defined(NO_DSA)
12650 /* Convert a number as string in big-endian format to a big number.
12651 * Only supports base-16 (hexadecimal) and base-10 (decimal).
12652 *
12653 * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
12654 *
12655 * @param [out] a SP integer.
12656 * @param [in] in NUL terminated string.
12657 * @param [in] radix Number of values in a digit.
12658 *
12659 * @return MP_OKAY on success.
12660 * @return MP_VAL when a or in is NULL, radix not supported, value is negative,
12661 * or a character is not valid.
12662 */
12663 int sp_read_radix(sp_int* a, const char* in, int radix)
12664 {
12665 int err = MP_OKAY;
12666
12667 if ((a == NULL) || (in == NULL)) {
12668 err = MP_VAL;
12669 }
12670
12671 if (err == MP_OKAY) {
12672 #ifndef WOLFSSL_SP_INT_NEGATIVE
12673 if (*in == '-') {
12674 err = MP_VAL;
12675 }
12676 else
12677 #endif
12678 if (radix == 16) {
12679 err = _sp_read_radix_16(a, in);
12680 }
12681 #ifdef WOLFSSL_SP_READ_RADIX_10
12682 else if (radix == 10) {
12683 err = _sp_read_radix_10(a, in);
12684 }
12685 #endif
12686 else {
12687 err = MP_VAL;
12688 }
12689 }
12690
12691 return err;
12692 }
12693 #endif /* (WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
12694
12695 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
12696 defined(WC_MP_TO_RADIX)
12697
12698 /* Put the big-endian, hex string encoding of a into str.
12699 *
12700 * Assumes str is large enough for result.
12701 * Use sp_radix_size() to calculate required length.
12702 *
12703 * @param [in] a SP integer to convert.
12704 * @param [out] str String to hold hex string result.
12705 *
12706 * @return MP_OKAY on success.
12707 * @return MP_VAL when a or str is NULL.
12708 */
12709 int sp_tohex(sp_int* a, char* str)
12710 {
12711 int err = MP_OKAY;
12712 int i;
12713 int j;
12714
12715 if ((a == NULL) || (str == NULL)) {
12716 err = MP_VAL;
12717 }
12718 if (err == MP_OKAY) {
12719 /* quick out if its zero */
12720 if (sp_iszero(a) == MP_YES) {
12721 #ifndef WC_DISABLE_RADIX_ZERO_PAD
12722 *str++ = '0';
12723 #endif /* WC_DISABLE_RADIX_ZERO_PAD */
12724 *str++ = '0';
12725 *str = '\0';
12726 }
12727 else {
12728 #ifdef WOLFSSL_SP_INT_NEGATIVE
12729 if (a->sign == MP_NEG) {
12730 *str = '-';
12731 str++;
12732 }
12733 #endif /* WOLFSSL_SP_INT_NEGATIVE */
12734
12735 i = a->used - 1;
12736 #ifndef WC_DISABLE_RADIX_ZERO_PAD
12737 /* Find highest non-zero byte in most-significant word. */
12738 for (j = SP_WORD_SIZE - 8; j >= 0; j -= 8) {
12739 if (((a->dp[i] >> j) & 0xff) != 0) {
12740 break;
12741 }
12742 else if (j == 0) {
12743 j = SP_WORD_SIZE - 8;
12744 --i;
12745 }
12746 }
12747 /* Start with high nibble of byte. */
12748 j += 4;
12749 #else
12750 /* Find highest non-zero nibble in most-significant word. */
12751 for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
12752 if (((a->dp[i] >> j) & 0xf) != 0) {
12753 break;
12754 }
12755 else if (j == 0) {
12756 j = SP_WORD_SIZE - 4;
12757 --i;
12758 }
12759 }
12760 #endif /* WC_DISABLE_RADIX_ZERO_PAD */
12761 /* Most-significant word. */
12762 for (; j >= 0; j -= 4) {
12763 *(str++) = ByteToHex(a->dp[i] >> j);
12764 }
12765 for (--i; i >= 0; i--) {
12766 for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
12767 *(str++) = ByteToHex(a->dp[i] >> j);
12768 }
12769 }
12770 *str = '\0';
12771 }
12772 }
12773
12774 return err;
12775 }
12776 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
12777
12778 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
12779 defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
12780 defined(WC_MP_TO_RADIX)
12781 /* Put the big-endian, decimal string encoding of a into str.
12782 *
12783 * Assumes str is large enough for result.
12784 * Use sp_radix_size() to calculate required length.
12785 *
12786 * @param [in] a SP integer to convert.
12787 * @param [out] str String to hold hex string result.
12788 *
12789 * @return MP_OKAY on success.
12790 * @return MP_VAL when a or str is NULL.
12791 * @return MP_MEM when dynamic memory allocation fails.
12792 */
12793 int sp_todecimal(sp_int* a, char* str)
12794 {
12795 int err = MP_OKAY;
12796 int i;
12797 int j;
12798 sp_int_digit d;
12799
12800 if ((a == NULL) || (str == NULL)) {
12801 err = MP_VAL;
12802 }
12803 /* quick out if its zero */
12804 else if (sp_iszero(a) == MP_YES) {
12805 *str++ = '0';
12806 *str = '\0';
12807 }
12808 else {
12809 DECL_SP_INT(t, a->used + 1);
12810
12811 ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
12812 if (err == MP_OKAY) {
12813 err = sp_copy(a, t);
12814 }
12815 if (err == MP_OKAY) {
12816
12817 #ifdef WOLFSSL_SP_INT_NEGATIVE
12818 if (a->sign == MP_NEG) {
12819 *str = '-';
12820 str++;
12821 }
12822 #endif /* WOLFSSL_SP_INT_NEGATIVE */
12823
12824 i = 0;
12825 while (!sp_iszero(t)) {
12826 sp_div_d(t, 10, t, &d);
12827 str[i++] = '0' + d;
12828 }
12829 str[i] = '\0';
12830
12831 for (j = 0; j <= (i - 1) / 2; j++) {
12832 int c = str[j];
12833 str[j] = str[i - 1 - j];
12834 str[i - 1 - j] = c;
12835 }
12836 }
12837
12838 FREE_SP_INT(t, NULL);
12839 }
12840
12841 return err;
12842 }
12843 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
12844
12845 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
12846 defined(WC_MP_TO_RADIX)
12847 /* Put the string version, big-endian, of a in str using the given radix.
12848 *
12849 * @param [in] a SP integer to convert.
12850 * @param [out] str String to hold hex string result.
12851 * @param [in] radix Base of character.
12852 * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
12853 *
12854 * @return MP_OKAY on success.
12855 * @return MP_VAL when a or str is NULL, or radix not supported.
12856 */
12857 int sp_toradix(sp_int* a, char* str, int radix)
12858 {
12859 int err = MP_OKAY;
12860
12861 if ((a == NULL) || (str == NULL)) {
12862 err = MP_VAL;
12863 }
12864 else if (radix == MP_RADIX_HEX) {
12865 err = sp_tohex(a, str);
12866 }
12867 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
12868 defined(HAVE_COMP_KEY)
12869 else if (radix == MP_RADIX_DEC) {
12870 err = sp_todecimal(a, str);
12871 }
12872 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
12873 else {
12874 err = MP_VAL;
12875 }
12876
12877 return err;
12878 }
12879 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
12880
12881 #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
12882 defined(WC_MP_TO_RADIX)
12883 /* Calculate the length of the string version, big-endian, of a using the given
12884 * radix.
12885 *
12886 * @param [in] a SP integer to convert.
12887 * @param [in] radix Base of character.
12888 * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
12889 * @param [out] size The number of characters in encoding.
12890 *
12891 * @return MP_OKAY on success.
12892 * @return MP_VAL when a or size is NULL, or radix not supported.
12893 */
12894 int sp_radix_size(sp_int* a, int radix, int* size)
12895 {
12896 int err = MP_OKAY;
12897
12898 if ((a == NULL) || (size == NULL)) {
12899 err = MP_VAL;
12900 }
12901 else if (radix == MP_RADIX_HEX) {
12902 if (a->used == 0) {
12903 #ifndef WC_DISABLE_RADIX_ZERO_PAD
12904 /* 00 and '\0' */
12905 *size = 2 + 1;
12906 #else
12907 /* Zero and '\0' */
12908 *size = 1 + 1;
12909 #endif /* WC_DISABLE_RADIX_ZERO_PAD */
12910 }
12911 else {
12912 int nibbles = (sp_count_bits(a) + 3) / 4;
12913 #ifndef WC_DISABLE_RADIX_ZERO_PAD
12914 if (nibbles & 1) {
12915 nibbles++;
12916 }
12917 #endif /* WC_DISABLE_RADIX_ZERO_PAD */
12918 #ifdef WOLFSSL_SP_INT_NEGATIVE
12919 if (a->sign == MP_NEG) {
12920 nibbles++;
12921 }
12922 #endif /* WOLFSSL_SP_INT_NEGATIVE */
12923 /* One more for \0 */
12924 *size = nibbles + 1;
12925 }
12926 }
12927 #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
12928 defined(HAVE_COMP_KEY)
12929 else if (radix == MP_RADIX_DEC) {
12930 int i;
12931 sp_int_digit d;
12932
12933 /* quick out if its zero */
12934 if (sp_iszero(a) == MP_YES) {
12935 /* Zero and '\0' */
12936 *size = 1 + 1;
12937 }
12938 else {
12939 DECL_SP_INT(t, a->used + 1);
12940
12941 ALLOC_SP_INT(t, a->used + 1, err, NULL);
12942 if (err == MP_OKAY) {
12943 #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
12944 t->size = a->used + 1;
12945 #endif /* WOLFSSL_SMALL_STACK && !WOLFSSL_SP_NO_MALLOC */
12946 err = sp_copy(a, t);
12947 }
12948
12949 if (err == MP_OKAY) {
12950
12951 for (i = 0; !sp_iszero(t); i++) {
12952 sp_div_d(t, 10, t, &d);
12953 }
12954 #ifdef WOLFSSL_SP_INT_NEGATIVE
12955 if (a->sign == MP_NEG) {
12956 i++;
12957 }
12958 #endif /* WOLFSSL_SP_INT_NEGATIVE */
12959 /* One more for \0 */
12960 *size = i + 1;
12961 }
12962
12963 FREE_SP_INT(t, NULL);
12964 }
12965 }
12966 #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
12967 else {
12968 err = MP_VAL;
12969 }
12970
12971 return err;
12972 }
12973 #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
12974
12975 /***************************************
12976 * Prime number generation and checking.
12977 ***************************************/
12978
12979 #if defined(WOLFSSL_KEY_GEN) && (!defined(NO_DH) || !defined(NO_DSA)) && \
12980 !defined(WC_NO_RNG)
12981 /* Generate a random prime for RSA only.
12982 *
12983 * @param [out] r SP integer to hold result.
12984 * @param [in] len Number of bytes in prime.
12985 * @param [in] rng Random number generator.
12986 * @param [in] heap Heap hint. Unused.
12987 *
12988 * @return MP_OKAY on success
12989 * @return MP_VAL when r or rng is NULL, length is not supported or random
12990 * number generator fails.
12991 */
12992 int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
12993 {
12994 static const int USE_BBS = 1;
12995 int err = MP_OKAY;
12996 int type = 0;
12997 int isPrime = MP_NO;
12998 #ifdef WOLFSSL_SP_MATH_ALL
12999 int bits = 0;
13000 #endif /* WOLFSSL_SP_MATH_ALL */
13001
13002 (void)heap;
13003
13004 /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
13005 if ((r == NULL) || (rng == NULL) || (len == 0)) {
13006 err = MP_VAL;
13007 }
13008
13009 if (err == MP_OKAY) {
13010 /* get type */
13011 if (len < 0) {
13012 type = USE_BBS;
13013 len = -len;
13014 }
13015
13016 #ifndef WOLFSSL_SP_MATH_ALL
13017 /* For minimal maths, support only what's in SP and needed for DH. */
13018 #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
13019 if (len == 32) {
13020 }
13021 else
13022 #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
13023 /* Generate RSA primes that are half the modulus length. */
13024 #ifndef WOLFSSL_SP_NO_3072
13025 if ((len != 128) && (len != 192))
13026 #else
13027 if (len != 128)
13028 #endif /* WOLFSSL_SP_NO_3072 */
13029 {
13030 err = MP_VAL;
13031 }
13032 #endif /* !WOLFSSL_SP_MATH_ALL */
13033
13034 #ifdef WOLFSSL_SP_INT_NEGATIVE
13035 r->sign = MP_ZPOS;
13036 #endif /* WOLFSSL_SP_INT_NEGATIVE */
13037 r->used = (len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
13038 #ifdef WOLFSSL_SP_MATH_ALL
13039 bits = (len * 8) & SP_WORD_MASK;
13040 #endif /* WOLFSSL_SP_MATH_ALL */
13041 }
13042
13043 /* Assume the candidate is probably prime and then test until
13044 * it is proven composite. */
13045 while (err == MP_OKAY && isPrime == MP_NO) {
13046 #ifdef SHOW_GEN
13047 printf(".");
13048 fflush(stdout);
13049 #endif /* SHOW_GEN */
13050 /* generate value */
13051 err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, len);
13052 if (err != 0) {
13053 err = MP_VAL;
13054 break;
13055 }
13056
13057 /* munge bits */
13058 #ifndef LITTLE_ENDIAN_ORDER
13059 ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
13060 #else
13061 ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
13062 #endif /* LITTLE_ENDIAN_ORDER */
13063 r->dp[0] |= 0x01 | ((type & USE_BBS) ? 0x02 : 0x00);
13064
13065 #ifndef LITTLE_ENDIAN_ORDER
13066 if (((len * 8) & SP_WORD_MASK) != 0) {
13067 r->dp[r->used-1] >>= SP_WORD_SIZE - ((len * 8) & SP_WORD_MASK);
13068 }
13069 #endif /* LITTLE_ENDIAN_ORDER */
13070 #ifdef WOLFSSL_SP_MATH_ALL
13071 if (bits > 0) {
13072 r->dp[r->used - 1] &= ((sp_digit)1 << bits) - 1;
13073 }
13074 #endif /* WOLFSSL_SP_MATH_ALL */
13075
13076 /* test */
13077 /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
13078 * of a 1024-bit candidate being a false positive, when it is our
13079 * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
13080 * Using 8 because we've always used 8 */
13081 sp_prime_is_prime_ex(r, 8, &isPrime, rng);
13082 }
13083
13084 return err;
13085 }
13086 #endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
13087
13088 #ifdef WOLFSSL_SP_PRIME_GEN
13089 /* Miller-Rabin test of "a" to the base of "b" as described in
13090 * HAC pp. 139 Algorithm 4.24
13091 *
13092 * Sets result to 0 if definitely composite or 1 if probably prime.
13093 * Randomly the chance of error is no more than 1/4 and often
13094 * very much lower.
13095 *
13096 * @param [in] a SP integer to check.
13097 * @param [in] b SP integer that is a small prime.
13098 * @param [out] result MP_YES when number is likey prime.
13099 * MP_NO otherwise.
13100 * @param [in] n1 SP integer temporary.
13101 * @param [in] y SP integer temporary.
13102 * @param [in] r SP integer temporary.
13103 *
13104 * @return MP_OKAY on success.
13105 * @return MP_MEM when dynamic memory allocation fails.
13106 */
13107 static int sp_prime_miller_rabin_ex(sp_int* a, sp_int* b, int* result,
13108 sp_int* n1, sp_int* y, sp_int* r)
13109 {
13110 int s;
13111 int j;
13112 int err = MP_OKAY;
13113
13114 /* default */
13115 *result = MP_NO;
13116
13117 /* ensure b > 1 */
13118 if (sp_cmp_d(b, 1) == MP_GT) {
13119 /* get n1 = a - 1 */
13120 (void)sp_copy(a, n1);
13121 _sp_sub_d(n1, 1, n1);
13122 /* set 2**s * r = n1 */
13123 (void)sp_copy(n1, r);
13124
13125 /* count the number of least significant bits
13126 * which are zero
13127 */
13128 s = sp_cnt_lsb(r);
13129
13130 /* now divide n - 1 by 2**s */
13131 sp_rshb(r, s, r);
13132
13133 /* compute y = b**r mod a */
13134 err = sp_exptmod(b, r, a, y);
13135
13136 if (err == MP_OKAY) {
13137 /* probably prime until shown otherwise */
13138 *result = MP_YES;
13139
13140 /* if y != 1 and y != n1 do */
13141 if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
13142 j = 1;
13143 /* while j <= s-1 and y != n1 */
13144 while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
13145 err = sp_sqrmod(y, a, y);
13146 if (err != MP_OKAY) {
13147 break;
13148 }
13149
13150 /* if y == 1 then composite */
13151 if (sp_cmp_d(y, 1) == MP_EQ) {
13152 *result = MP_NO;
13153 break;
13154 }
13155 ++j;
13156 }
13157
13158 /* if y != n1 then composite */
13159 if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
13160 *result = MP_NO;
13161 }
13162 }
13163 }
13164 }
13165
13166 return err;
13167 }
13168
13169 /* Miller-Rabin test of "a" to the base of "b" as described in
13170 * HAC pp. 139 Algorithm 4.24
13171 *
13172 * Sets result to 0 if definitely composite or 1 if probably prime.
13173 * Randomly the chance of error is no more than 1/4 and often
13174 * very much lower.
13175 *
13176 * @param [in] a SP integer to check.
13177 * @param [in] b SP integer that is a small prime.
13178 * @param [out] result MP_YES when number is likey prime.
13179 * MP_NO otherwise.
13180 *
13181 * @return MP_OKAY on success.
13182 * @return MP_MEM when dynamic memory allocation fails.
13183 */
13184 static int sp_prime_miller_rabin(sp_int* a, sp_int* b, int* result)
13185 {
13186 int err = MP_OKAY;
13187 sp_int *n1;
13188 sp_int *y;
13189 sp_int *r;
13190 DECL_SP_INT_ARRAY(t, a->used * 2 + 1, 3);
13191
13192 ALLOC_SP_INT_ARRAY(t, a->used * 2 + 1, 3, err, NULL);
13193 if (err == MP_OKAY) {
13194 n1 = t[0];
13195 y = t[1];
13196 r = t[2];
13197
13198 /* Only 'y' needs to be twice as big. */
13199 sp_init_size(n1, a->used * 2 + 1);
13200 sp_init_size(y, a->used * 2 + 1);
13201 sp_init_size(r, a->used * 2 + 1);
13202
13203 err = sp_prime_miller_rabin_ex(a, b, result, n1, y, r);
13204
13205 sp_clear(n1);
13206 sp_clear(y);
13207 sp_clear(r);
13208 }
13209
13210 FREE_SP_INT_ARRAY(t, NULL);
13211 return err;
13212 }
13213
13214 #if SP_WORD_SIZE == 8
13215 /* Number of pre-computed primes. First n primes - fitting in a digit. */
13216 #define SP_PRIME_SIZE 54
13217
13218 static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
13219 0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
13220 0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
13221 0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
13222 0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
13223 0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
13224 0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
13225 0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
13226 };
13227 #else
13228 /* Number of pre-computed primes. First n primes. */
13229 #define SP_PRIME_SIZE 256
13230
13231 /* The first 256 primes. */
13232 static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
13233 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
13234 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
13235 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
13236 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
13237 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
13238 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
13239 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
13240 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
13241
13242 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
13243 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
13244 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
13245 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
13246 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
13247 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
13248 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
13249 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
13250
13251 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
13252 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
13253 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
13254 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
13255 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
13256 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
13257 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
13258 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
13259
13260 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
13261 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
13262 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
13263 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
13264 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
13265 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
13266 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
13267 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
13268 };
13269 #endif
13270
13271 /* Check whether a is prime.
13272 * Checks against a number of small primes and does t iterations of
13273 * Miller-Rabin.
13274 *
13275 * @param [in] a SP integer to check.
13276 * @param [in] t Number of iterations of Miller-Rabin test to perform.
13277 * @param [out] result MP_YES when number is prime.
13278 * MP_NO otherwise.
13279 *
13280 * @return MP_OKAY on success.
13281 * @return MP_VAL when a or result is NULL, or t is out of range.
13282 * @return MP_MEM when dynamic memory allocation fails.
13283 */
13284 int sp_prime_is_prime(sp_int* a, int t, int* result)
13285 {
13286 int err = MP_OKAY;
13287 int i;
13288 int haveRes = 0;
13289 sp_int_digit d;
13290 DECL_SP_INT(b, 2);
13291
13292 if ((a == NULL) || (result == NULL)) {
13293 if (result != NULL) {
13294 *result = MP_NO;
13295 }
13296 err = MP_VAL;
13297 }
13298
13299 if ((err == MP_OKAY) && ((t <= 0) || (t > SP_PRIME_SIZE))) {
13300 *result = MP_NO;
13301 err = MP_VAL;
13302 }
13303
13304 if ((err == MP_OKAY) && sp_isone(a)) {
13305 *result = MP_NO;
13306 haveRes = 1;
13307 }
13308
13309 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
13310
13311 if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
13312 /* check against primes table */
13313 for (i = 0; i < SP_PRIME_SIZE; i++) {
13314 if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
13315 *result = MP_YES;
13316 haveRes = 1;
13317 break;
13318 }
13319 }
13320 }
13321
13322 if ((err == MP_OKAY) && (!haveRes)) {
13323 /* do trial division */
13324 for (i = 0; i < SP_PRIME_SIZE; i++) {
13325 err = sp_mod_d(a, sp_primes[i], &d);
13326 if ((err != MP_OKAY) || (d == 0)) {
13327 *result = MP_NO;
13328 haveRes = 1;
13329 break;
13330 }
13331 }
13332 }
13333
13334 if ((err == MP_OKAY) && (!haveRes)) {
13335 ALLOC_SP_INT(b, 1, err, NULL);
13336 if (err == MP_OKAY) {
13337 /* now do 't' miller rabins */
13338 sp_init_size(b, 1);
13339 for (i = 0; i < t; i++) {
13340 sp_set(b, sp_primes[i]);
13341 err = sp_prime_miller_rabin(a, b, result);
13342 if ((err != MP_OKAY) || (*result == MP_NO)) {
13343 break;
13344 }
13345 }
13346 }
13347 }
13348
13349 RESTORE_VECTOR_REGISTERS();
13350
13351 FREE_SP_INT(b, NULL);
13352 return err;
13353 }
13354
13355 /* Check whether a is prime.
13356 * Checks against a number of small primes and does t iterations of
13357 * Miller-Rabin.
13358 *
13359 * @param [in] a SP integer to check.
13360 * @param [in] t Number of iterations of Miller-Rabin test to perform.
13361 * @param [out] result MP_YES when number is prime.
13362 * MP_NO otherwise.
13363 * @param [in] rng Random number generator for Miller-Rabin testing.
13364 *
13365 * @return MP_OKAY on success.
13366 * @return MP_VAL when a, result or rng is NULL.
13367 * @return MP_MEM when dynamic memory allocation fails.
13368 */
13369 int sp_prime_is_prime_ex(sp_int* a, int t, int* result, WC_RNG* rng)
13370 {
13371 int err = MP_OKAY;
13372 int ret = MP_YES;
13373 int haveRes = 0;
13374 int i;
13375 #ifndef WC_NO_RNG
13376 sp_int *b = NULL;
13377 sp_int *c = NULL;
13378 sp_int *n1 = NULL;
13379 sp_int *y = NULL;
13380 sp_int *r = NULL;
13381 #endif /* WC_NO_RNG */
13382
13383 if ((a == NULL) || (result == NULL) || (rng == NULL)) {
13384 err = MP_VAL;
13385 }
13386
13387 if ((err == MP_OKAY) && sp_isone(a)) {
13388 ret = MP_NO;
13389 haveRes = 1;
13390 }
13391
13392 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
13393
13394 if ((err == MP_OKAY) && (!haveRes) && (a->used == 1)) {
13395 /* check against primes table */
13396 for (i = 0; i < SP_PRIME_SIZE; i++) {
13397 if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
13398 ret = MP_YES;
13399 haveRes = 1;
13400 break;
13401 }
13402 }
13403 }
13404
13405 if ((err == MP_OKAY) && (!haveRes)) {
13406 sp_int_digit d;
13407
13408 /* do trial division */
13409 for (i = 0; i < SP_PRIME_SIZE; i++) {
13410 err = sp_mod_d(a, sp_primes[i], &d);
13411 if ((err != MP_OKAY) || (d == 0)) {
13412 ret = MP_NO;
13413 haveRes = 1;
13414 break;
13415 }
13416 }
13417 }
13418
13419 #ifndef WC_NO_RNG
13420 /* now do a miller rabin with up to t random numbers, this should
13421 * give a (1/4)^t chance of a false prime. */
13422 if ((err == MP_OKAY) && (!haveRes)) {
13423 int bits = sp_count_bits(a);
13424 word32 baseSz = (bits + 7) / 8;
13425 DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 5);
13426
13427 ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 5, err, NULL);
13428 if (err == MP_OKAY) {
13429 b = d[0];
13430 c = d[1];
13431 n1 = d[2];
13432 y = d[3];
13433 r = d[4];
13434
13435 /* Only 'y' needs to be twice as big. */
13436 sp_init_size(b , a->used * 2 + 1);
13437 sp_init_size(c , a->used * 2 + 1);
13438 sp_init_size(n1, a->used * 2 + 1);
13439 sp_init_size(y , a->used * 2 + 1);
13440 sp_init_size(r , a->used * 2 + 1);
13441
13442 _sp_sub_d(a, 2, c);
13443
13444 bits &= SP_WORD_MASK;
13445
13446 while (t > 0) {
13447 err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
13448 if (err != MP_OKAY) {
13449 break;
13450 }
13451 b->used = a->used;
13452 /* Ensure the top word has no more bits than necessary. */
13453 if (bits > 0) {
13454 b->dp[b->used - 1] &= ((sp_digit)1 << bits) - 1;
13455 }
13456
13457 if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
13458 continue;
13459 }
13460
13461 err = sp_prime_miller_rabin_ex(a, b, &ret, n1, y, r);
13462 if ((err != MP_OKAY) || (ret == MP_NO)) {
13463 break;
13464 }
13465
13466 t--;
13467 }
13468
13469 sp_clear(n1);
13470 sp_clear(y);
13471 sp_clear(r);
13472 sp_clear(b);
13473 sp_clear(c);
13474 }
13475
13476 FREE_SP_INT_ARRAY(d, NULL);
13477 }
13478 #else
13479 (void)t;
13480 #endif /* !WC_NO_RNG */
13481
13482 if (result != NULL) {
13483 *result = ret;
13484 }
13485
13486 RESTORE_VECTOR_REGISTERS();
13487
13488 return err;
13489 }
13490 #endif /* WOLFSSL_SP_PRIME_GEN */
13491
13492 #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
13493
13494 /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
13495 *
13496 * a and b are positive integers.
13497 *
13498 * @param [in] a SP integer of first operand.
13499 * @param [in] b SP integer of second operand.
13500 * @param [out] r SP integer to hold result.
13501 *
13502 * @return MP_OKAY on success.
13503 * @return MP_VAL when a, b or r is NULL or too large.
13504 * @return MP_MEM when dynamic memory allocation fails.
13505 */
13506 int sp_gcd(sp_int* a, sp_int* b, sp_int* r)
13507 {
13508 int err = MP_OKAY;
13509
13510 if ((a == NULL) || (b == NULL) || (r == NULL)) {
13511 err = MP_VAL;
13512 }
13513 else if (a->used >= SP_INT_DIGITS || b->used >= SP_INT_DIGITS) {
13514 err = MP_VAL;
13515 }
13516 else if (sp_iszero(a)) {
13517 /* GCD of 0 and 0 is undefined as all integers divide 0. */
13518 if (sp_iszero(b)) {
13519 err = MP_VAL;
13520 }
13521 else {
13522 err = sp_copy(b, r);
13523 }
13524 }
13525 else if (sp_iszero(b)) {
13526 err = sp_copy(a, r);
13527 }
13528 else {
13529 sp_int* u = NULL;
13530 sp_int* v = NULL;
13531 sp_int* t = NULL;
13532 int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
13533 DECL_SP_INT_ARRAY(d, used, 3);
13534
13535 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
13536
13537 ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
13538
13539 if (err == MP_OKAY) {
13540 u = d[0];
13541 v = d[1];
13542 t = d[2];
13543 sp_init_size(u, used);
13544 sp_init_size(v, used);
13545 sp_init_size(t, used);
13546
13547 if (_sp_cmp(a, b) != MP_LT) {
13548 sp_copy(b, u);
13549 /* First iteration - u = a, v = b */
13550 if (b->used == 1) {
13551 err = sp_mod_d(a, b->dp[0], &v->dp[0]);
13552 if (err == MP_OKAY) {
13553 v->used = (v->dp[0] != 0);
13554 }
13555 }
13556 else {
13557 err = sp_mod(a, b, v);
13558 }
13559 }
13560 else {
13561 sp_copy(a, u);
13562 /* First iteration - u = b, v = a */
13563 if (a->used == 1) {
13564 err = sp_mod_d(b, a->dp[0], &v->dp[0]);
13565 if (err == MP_OKAY) {
13566 v->used = (v->dp[0] != 0);
13567 }
13568 }
13569 else {
13570 err = sp_mod(b, a, v);
13571 }
13572 }
13573 }
13574
13575 if (err == MP_OKAY) {
13576 #ifdef WOLFSSL_SP_INT_NEGATIVE
13577 u->sign = MP_ZPOS;
13578 v->sign = MP_ZPOS;
13579 #endif /* WOLFSSL_SP_INT_NEGATIVE */
13580
13581 while (!sp_iszero(v)) {
13582 if (v->used == 1) {
13583 err = sp_mod_d(u, v->dp[0], &t->dp[0]);
13584 if (err == MP_OKAY) {
13585 t->used = (t->dp[0] != 0);
13586 }
13587 }
13588 else {
13589 err = sp_mod(u, v, t);
13590 }
13591 if (err != MP_OKAY) {
13592 break;
13593 }
13594 sp_copy(v, u);
13595 sp_copy(t, v);
13596 }
13597 if (err == MP_OKAY)
13598 err = sp_copy(u, r);
13599 }
13600
13601 FREE_SP_INT_ARRAY(d, NULL);
13602
13603 RESTORE_VECTOR_REGISTERS();
13604 }
13605
13606 return err;
13607 }
13608
13609 #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
13610
13611 #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && !defined(WC_RSA_BLINDING)
13612
13613 /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
13614 *
13615 * a and b are positive integers.
13616 *
13617 * @param [in] a SP integer of first operand.
13618 * @param [in] b SP integer of second operand.
13619 * @param [out] r SP integer to hold result.
13620 *
13621 * @return MP_OKAY on success.
13622 * @return MP_VAL when a, b or r is NULL; or a or b is zero.
13623 * @return MP_MEM when dynamic memory allocation fails.
13624 */
13625 int sp_lcm(sp_int* a, sp_int* b, sp_int* r)
13626 {
13627 int err = MP_OKAY;
13628 int used = ((a == NULL) || (b == NULL)) ? 1 :
13629 (a->used >= b->used ? a->used + 1: b->used + 1);
13630 DECL_SP_INT_ARRAY(t, used, 2);
13631
13632 if ((a == NULL) || (b == NULL) || (r == NULL)) {
13633 err = MP_VAL;
13634 }
13635
13636 /* LCM of 0 and any number is undefined as 0 is not in the set of values
13637 * being used.
13638 */
13639 if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
13640 err = MP_VAL;
13641 }
13642
13643 ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
13644
13645 if (err == MP_OKAY) {
13646 sp_init_size(t[0], used);
13647 sp_init_size(t[1], used);
13648
13649 SAVE_VECTOR_REGISTERS(err = _svr_ret;);
13650
13651 if (err == MP_OKAY)
13652 err = sp_gcd(a, b, t[0]);
13653
13654 if (err == MP_OKAY) {
13655 if (_sp_cmp_abs(a, b) == MP_GT) {
13656 err = sp_div(a, t[0], t[1], NULL);
13657 if (err == MP_OKAY) {
13658 err = sp_mul(b, t[1], r);
13659 }
13660 }
13661 else {
13662 err = sp_div(b, t[0], t[1], NULL);
13663 if (err == MP_OKAY) {
13664 err = sp_mul(a, t[1], r);
13665 }
13666 }
13667 }
13668
13669 RESTORE_VECTOR_REGISTERS();
13670 }
13671
13672 FREE_SP_INT_ARRAY(t, NULL);
13673 return err;
13674 }
13675
13676 #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
13677
13678 /* Returns the run time settings.
13679 *
13680 * @return Settings value.
13681 */
13682 word32 CheckRunTimeSettings(void)
13683 {
13684 return CTC_SETTINGS;
13685 }
13686
13687 /* Returns the fast math settings.
13688 *
13689 * @return Setting - number of bits in a digit.
13690 */
13691 word32 CheckRunTimeFastMath(void)
13692 {
13693 return SP_WORD_SIZE;
13694 }
13695
13696 #endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */
13697