1
2 /***************************************************************************
3 *
4 Copyright 2012 CertiVox IOM Ltd. *
5 *
6 This file is part of CertiVox MIRACL Crypto SDK. *
7 *
8 The CertiVox MIRACL Crypto SDK provides developers with an *
9 extensive and efficient set of cryptographic functions. *
10 For further information about its features and functionalities please *
11 refer to http://www.certivox.com *
12 *
13 * The CertiVox MIRACL Crypto SDK is free software: you can *
14 redistribute it and/or modify it under the terms of the *
15 GNU Affero General Public License as published by the *
16 Free Software Foundation, either version 3 of the License, *
17 or (at your option) any later version. *
18 *
19 * The CertiVox MIRACL Crypto SDK is distributed in the hope *
20 that it will be useful, but WITHOUT ANY WARRANTY; without even the *
21 implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
22 See the GNU Affero General Public License for more details. *
23 *
24 * You should have received a copy of the GNU Affero General Public *
25 License along with CertiVox MIRACL Crypto SDK. *
26 If not, see <http://www.gnu.org/licenses/>. *
27 *
28 You can be released from the requirements of the license by purchasing *
29 a commercial license. Buying such a license is mandatory as soon as you *
30 develop commercial activities involving the CertiVox MIRACL Crypto SDK *
31 without disclosing the source code of your own applications, or shipping *
32 the CertiVox MIRACL Crypto SDK with a closed source product. *
33 *
34 ***************************************************************************/
35 /*
36 * MIRACL Montgomery's method for modular arithmetic without division.
37 * mrmonty.c
38 *
39 * Programs to implement Montgomery's method
40 * See "Modular Multiplication Without Trial Division", Math. Comp.
41 * Vol 44, Number 170, April 1985, Pages 519-521
42 * NOTE - there is an important correction to this paper mentioned as a
43 * footnote in "Speeding the Pollard and Elliptic Curve Methods",
44 * Math. Comput., Vol. 48, January 1987, 243-264
45 *
46 * The advantage of this approach is that no division required in order
47 * to compute a modular reduction - useful if division is slow
48 * e.g. on a SPARC processor, or a DSP.
49 *
50 * The disadvantage is that numbers must first be converted to an internal
51 * "n-residue" form.
52 *
53 */
54
55 #include <stdlib.h>
56 #include "miracl.h"
57
58 #ifdef MR_FP
59 #include <math.h>
60 #endif
61
62 #ifdef MR_WIN64
63 #include <intrin.h>
64 #endif
65
66 #ifdef MR_COUNT_OPS
67 extern int fpc,fpa;
68 #endif
69
70 #ifdef MR_CELL
71 extern void mod256(_MIPD_ big,big);
72 #endif
73
kill_monty(_MIPDO_)74 void kill_monty(_MIPDO_ )
75 {
76 #ifdef MR_OS_THREADS
77 miracl *mr_mip=get_mip();
78 #endif
79 zero(mr_mip->modulus);
80 #ifdef MR_KCM
81 zero(mr_mip->big_ndash);
82 #endif
83 }
84
prepare_monty(_MIPD_ big n)85 mr_small prepare_monty(_MIPD_ big n)
86 { /* prepare Montgomery modulus */
87 #ifdef MR_KCM
88 int nl;
89 #endif
90 #ifdef MR_PENTIUM
91 mr_small ndash;
92 mr_small base;
93 mr_small magic=13835058055282163712.0;
94 int control=0x1FFF;
95 #endif
96 #ifdef MR_OS_THREADS
97 miracl *mr_mip=get_mip();
98 #endif
99 if (mr_mip->ERNUM) return (mr_small)0;
100 /* Is it set-up already? */
101 if (size(mr_mip->modulus)!=0)
102 if (mr_compare(n,mr_mip->modulus)==0) return mr_mip->ndash;
103
104 MR_IN(80)
105
106 if (size(n)<=2)
107 {
108 mr_berror(_MIPP_ MR_ERR_BAD_MODULUS);
109 MR_OUT
110 return (mr_small)0;
111 }
112
113 zero(mr_mip->w6);
114 zero(mr_mip->w15);
115
116 /* set a small negative QNR (on the assumption that n is prime!) */
117 /* These defaults can be over-ridden */
118
119 /* Did you know that for p=2 mod 3, -3 is a QNR? */
120
121 mr_mip->pmod8=remain(_MIPP_ n,8);
122
123 switch (mr_mip->pmod8)
124 {
125 case 0:
126 case 1:
127 case 2:
128 case 4:
129 case 6:
130 mr_mip->qnr=0; /* none defined */
131 break;
132 case 3:
133 mr_mip->qnr=-1;
134 break;
135 case 5:
136 mr_mip->qnr=-2;
137 break;
138 case 7:
139 mr_mip->qnr=-1;
140 break;
141 }
142 mr_mip->pmod9=remain(_MIPP_ n,9);
143
144 mr_mip->NO_CARRY=FALSE;
145 if (n->w[n->len-1]>>M4 < 5) mr_mip->NO_CARRY=TRUE;
146
147 #ifdef MR_PENTIUM
148
149 mr_mip->ACTIVE=FALSE;
150 if (mr_mip->base!=0)
151 if (MR_PENTIUM==n->len) mr_mip->ACTIVE=TRUE;
152 if (MR_PENTIUM<0)
153 {
154 if (n->len<=(-MR_PENTIUM)) mr_mip->ACTIVE=TRUE;
155 if (logb2(_MIPP_ n)%mr_mip->lg2b==0) mr_mip->ACTIVE=FALSE;
156 }
157 #endif
158
159 #ifdef MR_DISABLE_MONTGOMERY
160 mr_mip->MONTY=OFF;
161 #else
162 mr_mip->MONTY=ON;
163 #endif
164
165 #ifdef MR_COMBA
166 mr_mip->ACTIVE=FALSE;
167
168 if (MR_COMBA==n->len && mr_mip->base==mr_mip->base2)
169 {
170 mr_mip->ACTIVE=TRUE;
171 #ifdef MR_SPECIAL
172 mr_mip->MONTY=OFF; /* "special" modulus reduction */
173
174 #endif /* implemented in mrcomba.c */
175 }
176
177 #endif
178 convert(_MIPP_ 1,mr_mip->one);
179 if (!mr_mip->MONTY)
180 { /* Montgomery arithmetic is turned off */
181 copy(n,mr_mip->modulus);
182 mr_mip->ndash=0;
183 MR_OUT
184 return (mr_small)0;
185 }
186
187 #ifdef MR_KCM
188
189 /* test for base==0 & n->len=MR_KCM.2^x */
190
191 mr_mip->ACTIVE=FALSE;
192 if (mr_mip->base==0)
193 {
194 nl=(int)n->len;
195 while (nl>=MR_KCM)
196 {
197 if (nl==MR_KCM)
198 {
199 mr_mip->ACTIVE=TRUE;
200 break;
201 }
202 if (nl%2!=0) break;
203 nl/=2;
204 }
205 }
206 if (mr_mip->ACTIVE)
207 {
208 mr_mip->w6->len=n->len+1;
209 mr_mip->w6->w[n->len]=1;
210 if (invmodp(_MIPP_ n,mr_mip->w6,mr_mip->w14)!=1)
211 { /* problems */
212 mr_berror(_MIPP_ MR_ERR_BAD_MODULUS);
213 MR_OUT
214 return (mr_small)0;
215 }
216 }
217 else
218 {
219 #endif
220 mr_mip->w6->len=2;
221 mr_mip->w6->w[0]=0;
222 mr_mip->w6->w[1]=1; /* w6 = base */
223 mr_mip->w15->len=1;
224 mr_mip->w15->w[0]=n->w[0]; /* w15 = n mod base */
225 if (invmodp(_MIPP_ mr_mip->w15,mr_mip->w6,mr_mip->w14)!=1)
226 { /* problems */
227 mr_berror(_MIPP_ MR_ERR_BAD_MODULUS);
228 MR_OUT
229 return (mr_small)0;
230 }
231 #ifdef MR_KCM
232 }
233 copy(mr_mip->w14,mr_mip->big_ndash);
234 #endif
235
236 mr_mip->ndash=mr_mip->base-mr_mip->w14->w[0]; /* = N' mod b */
237 copy(n,mr_mip->modulus);
238 mr_mip->check=OFF;
239 mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->pR);
240 mr_mip->check=ON;
241 #ifdef MR_PENTIUM
242 /* prime the FP stack */
243 if (mr_mip->ACTIVE)
244 {
245 ndash=mr_mip->ndash;
246 base=mr_mip->base;
247 magic *=base;
248 ASM
249 {
250 finit
251 fldcw WORD PTR control
252 fld QWORD PTR ndash
253 fld1
254 fld QWORD PTR base
255 fdiv
256 fld QWORD PTR magic
257 }
258 }
259 #endif
260 nres(_MIPP_ mr_mip->one,mr_mip->one);
261 MR_OUT
262
263 return mr_mip->ndash;
264 }
265
nres(_MIPD_ big x,big y)266 void nres(_MIPD_ big x,big y)
267 { /* convert x to n-residue format */
268 #ifdef MR_OS_THREADS
269 miracl *mr_mip=get_mip();
270 #endif
271 if (mr_mip->ERNUM) return;
272
273 MR_IN(81)
274
275 if (size(mr_mip->modulus)==0)
276 {
277 mr_berror(_MIPP_ MR_ERR_NO_MODULUS);
278 MR_OUT
279 return;
280 }
281 copy(x,y);
282 divide(_MIPP_ y,mr_mip->modulus,mr_mip->modulus);
283 if (size(y)<0) add(_MIPP_ y,mr_mip->modulus,y);
284 if (!mr_mip->MONTY)
285 {
286 MR_OUT
287 return;
288 }
289 mr_mip->check=OFF;
290
291 mr_shift(_MIPP_ y,(int)mr_mip->modulus->len,mr_mip->w0);
292 divide(_MIPP_ mr_mip->w0,mr_mip->modulus,mr_mip->modulus);
293 mr_mip->check=ON;
294 copy(mr_mip->w0,y);
295
296 MR_OUT
297 }
298
redc(_MIPD_ big x,big y)299 void redc(_MIPD_ big x,big y)
300 { /* Montgomery's REDC function p. 520 */
301 /* also used to convert n-residues back to normal form */
302 mr_small carry,delay_carry,m,ndash,*w0g,*mg;
303
304 #ifdef MR_ITANIUM
305 mr_small tm;
306 #endif
307 #ifdef MR_WIN64
308 mr_small tm,tr;
309 #endif
310 int i,j,rn,rn2;
311 big w0,modulus;
312 #ifdef MR_NOASM
313 union doubleword dble;
314 mr_large dbled,ldres;
315 #endif
316 #ifdef MR_OS_THREADS
317 miracl *mr_mip=get_mip();
318 #endif
319 if (mr_mip->ERNUM) return;
320
321 MR_IN(82)
322
323 w0=mr_mip->w0; /* get these into local variables (for inline assembly) */
324 modulus=mr_mip->modulus;
325 ndash=mr_mip->ndash;
326
327 copy(x,w0);
328 if (!mr_mip->MONTY)
329 {
330 /*#ifdef MR_CELL
331 mod256(_MIPP_ w0,w0);
332 #else */
333 divide(_MIPP_ w0,modulus,modulus);
334 /* #endif */
335 copy(w0,y);
336 MR_OUT
337 return;
338 }
339 delay_carry=0;
340 rn=(int)modulus->len;
341 rn2=rn+rn;
342 #ifndef MR_SIMPLE_BASE
343 if (mr_mip->base==0)
344 {
345 #endif
346 #ifndef MR_NOFULLWIDTH
347 mg=modulus->w;
348 w0g=w0->w;
349 for (i=0;i<rn;i++)
350 {
351 /* inline - substitutes for loop below */
352 #if INLINE_ASM == 1
353 ASM cld
354 ASM mov cx,rn
355 ASM mov si,i
356 ASM shl si,1
357 #ifdef MR_LMM
358 ASM push ds
359 ASM push es
360 ASM les bx,DWORD PTR w0g
361 ASM add bx,si
362 ASM mov ax,es:[bx]
363 ASM mul WORD PTR ndash
364 ASM mov di,ax
365 ASM lds si,DWORD PTR mg
366 #else
367 ASM mov bx,w0g
368 ASM add bx,si
369 ASM mov ax,[bx]
370 ASM mul WORD PTR ndash
371 ASM mov di,ax
372 ASM mov si,mg
373 #endif
374 ASM push bp
375 ASM xor bp,bp
376 m1:
377 ASM lodsw
378 ASM mul di
379 ASM add ax,bp
380 ASM adc dx,0
381 #ifdef MR_LMM
382 ASM add es:[bx],ax
383 #else
384 ASM add [bx],ax
385 #endif
386 ASM adc dx,0
387 ASM inc bx
388 ASM inc bx
389 ASM mov bp,dx
390 ASM loop m1
391
392 ASM pop bp
393 ASM mov ax,delay_carry
394 #ifdef MR_LMM
395 ASM add es:[bx],ax
396 ASM mov ax,0
397 ASM adc ax,0
398 ASM add es:[bx],dx
399 ASM pop es
400 ASM pop ds
401 #else
402 ASM add [bx],ax
403 ASM mov ax,0
404 ASM adc ax,0
405 ASM add [bx],dx
406 #endif
407 ASM adc ax,0
408 ASM mov delay_carry,ax
409 #endif
410 #if INLINE_ASM == 2
411 ASM cld
412 ASM mov cx,rn
413 ASM mov si,i
414 ASM shl si,2
415 #ifdef MR_LMM
416 ASM push ds
417 ASM push es
418 ASM les bx,DWORD PTR w0g
419 ASM add bx,si
420 ASM mov eax,es:[bx]
421 ASM mul DWORD PTR ndash
422 ASM mov edi,eax
423 ASM lds si,DWORD PTR mg
424 #else
425 ASM mov bx,w0g
426 ASM add bx,si
427 ASM mov eax,[bx]
428 ASM mul DWORD PTR ndash
429 ASM mov edi,eax
430 ASM mov si,mg
431 #endif
432 ASM push ebp
433 ASM xor ebp,ebp
434 m1:
435 ASM lodsd
436 ASM mul edi
437 ASM add eax,ebp
438 ASM adc edx,0
439 #ifdef MR_LMM
440 ASM add es:[bx],eax
441 #else
442 ASM add [bx],eax
443 #endif
444 ASM adc edx,0
445 ASM add bx,4
446 ASM mov ebp,edx
447 ASM loop m1
448
449 ASM pop ebp
450 ASM mov eax,delay_carry
451 #ifdef MR_LMM
452 ASM add es:[bx],eax
453 ASM mov eax,0
454 ASM adc eax,0
455 ASM add es:[bx],edx
456 ASM pop es
457 ASM pop ds
458 #else
459 ASM add [bx],eax
460 ASM mov eax,0
461 ASM adc eax,0
462 ASM add [bx],edx
463 #endif
464 ASM adc eax,0
465 ASM mov delay_carry,eax
466
467 #endif
468 #if INLINE_ASM == 3
469 ASM mov ecx,rn
470 ASM mov esi,i
471 ASM shl esi,2
472 ASM mov ebx,w0g
473 ASM add ebx,esi
474 ASM mov eax,[ebx]
475 ASM mul DWORD PTR ndash
476 ASM mov edi,eax
477 ASM mov esi,mg
478 ASM sub ebx,esi
479 ASM sub ebx,4
480 ASM push ebp
481 ASM xor ebp,ebp
482 m1:
483 ASM mov eax,[esi]
484 ASM add esi,4
485 ASM mul edi
486 ASM add eax,ebp
487 ASM mov ebp,[esi+ebx]
488 ASM adc edx,0
489 ASM add ebp,eax
490 ASM adc edx,0
491 ASM mov [esi+ebx],ebp
492 ASM dec ecx
493 ASM mov ebp,edx
494 ASM jnz m1
495
496 ASM pop ebp
497 ASM mov eax,delay_carry
498 ASM add [esi+ebx+4],eax
499 ASM mov eax,0
500 ASM adc eax,0
501 ASM add [esi+ebx+4],edx
502 ASM adc eax,0
503 ASM mov delay_carry,eax
504
505 #endif
506 #if INLINE_ASM == 4
507 ASM (
508 "movl %0,%%ecx\n"
509 "movl %1,%%esi\n"
510 "shll $2,%%esi\n"
511 "movl %2,%%ebx\n"
512 "addl %%esi,%%ebx\n"
513 "movl (%%ebx),%%eax\n"
514 "mull %3\n"
515 "movl %%eax,%%edi\n"
516 "movl %4,%%esi\n"
517 "subl %%esi,%%ebx\n"
518 "subl $4,%%ebx\n"
519 "pushl %%ebp\n"
520 "xorl %%ebp,%%ebp\n"
521 "m1:\n"
522 "movl (%%esi),%%eax\n"
523 "addl $4,%%esi\n"
524 "mull %%edi\n"
525 "addl %%ebp,%%eax\n"
526 "movl (%%esi,%%ebx),%%ebp\n"
527 "adcl $0,%%edx\n"
528 "addl %%eax,%%ebp\n"
529 "adcl $0,%%edx\n"
530 "movl %%ebp,(%%esi,%%ebx)\n"
531 "decl %%ecx\n"
532 "movl %%edx,%%ebp\n"
533 "jnz m1\n"
534
535 "popl %%ebp\n"
536 "movl %5,%%eax\n"
537 "addl %%eax,4(%%esi,%%ebx)\n"
538 "movl $0,%%eax\n"
539 "adcl $0,%%eax\n"
540 "addl %%edx,4(%%esi,%%ebx)\n"
541 "adcl $0,%%eax\n"
542 "movl %%eax,%5\n"
543
544 :
545 :"m"(rn),"m"(i),"m"(w0g),"m"(ndash),"m"(mg),"m"(delay_carry)
546 :"eax","edi","esi","ebx","ecx","edx","memory"
547 );
548 #endif
549
550 #ifndef INLINE_ASM
551 /* muldvd(w0->w[i],ndash,0,&m); Note that after this time */
552 m=ndash*w0->w[i];
553 carry=0; /* around the loop, w0[i]=0 */
554
555 for (j=0;j<rn;j++)
556 {
557 #ifdef MR_NOASM
558 dble.d=(mr_large)m*modulus->w[j]+carry+w0->w[i+j];
559 w0->w[i+j]=dble.h[MR_BOT];
560 carry=dble.h[MR_TOP];
561 #else
562 muldvd2(m,modulus->w[j],&carry,&w0->w[i+j]);
563 #endif
564 }
565 w0->w[rn+i]+=delay_carry;
566 if (w0->w[rn+i]<delay_carry) delay_carry=1;
567 else delay_carry=0;
568 w0->w[rn+i]+=carry;
569 if (w0->w[rn+i]<carry) delay_carry=1;
570 #endif
571 }
572 #endif
573
574 #ifndef MR_SIMPLE_BASE
575 }
576 else for (i=0;i<rn;i++)
577 {
578 #ifdef MR_FP_ROUNDING
579 imuldiv(w0->w[i],ndash,0,mr_mip->base,mr_mip->inverse_base,&m);
580 #else
581 muldiv(w0->w[i],ndash,0,mr_mip->base,&m);
582 #endif
583 carry=0;
584 for (j=0;j<rn;j++)
585 {
586 #ifdef MR_NOASM
587 dbled=(mr_large)m*modulus->w[j]+carry+w0->w[i+j];
588 #ifdef MR_FP_ROUNDING
589 carry=(mr_small)MR_LROUND(dbled*mr_mip->inverse_base);
590 #else
591 #ifndef MR_FP
592 if (mr_mip->base==mr_mip->base2)
593 carry=(mr_small)(dbled>>mr_mip->lg2b);
594 else
595 #endif
596 carry=(mr_small)MR_LROUND(dbled/mr_mip->base);
597 #endif
598 w0->w[i+j]=(mr_small)(dbled-(mr_large)carry*mr_mip->base);
599 #else
600 #ifdef MR_FP_ROUNDING
601 carry=imuldiv(modulus->w[j],m,w0->w[i+j]+carry,mr_mip->base,mr_mip->inverse_base,&w0->w[i+j]);
602 #else
603 carry=muldiv(modulus->w[j],m,w0->w[i+j]+carry,mr_mip->base,&w0->w[i+j]);
604 #endif
605 #endif
606 }
607 w0->w[rn+i]+=(delay_carry+carry);
608 delay_carry=0;
609 if (w0->w[rn+i]>=mr_mip->base)
610 {
611 w0->w[rn+i]-=mr_mip->base;
612 delay_carry=1;
613 }
614 }
615 #endif
616 w0->w[rn2]=delay_carry;
617 w0->len=rn2+1;
618 mr_shift(_MIPP_ w0,(-rn),w0);
619 mr_lzero(w0);
620
621 if (mr_compare(w0,modulus)>=0) mr_psub(_MIPP_ w0,modulus,w0);
622 copy(w0,y);
623 MR_OUT
624 }
625
626 /* "Complex" method for ZZn2 squaring */
627
nres_complex(_MIPD_ big a,big b,big r,big i)628 void nres_complex(_MIPD_ big a,big b,big r,big i)
629 {
630 #ifdef MR_OS_THREADS
631 miracl *mr_mip=get_mip();
632 #endif
633 if (mr_mip->ERNUM) return;
634 MR_IN(225)
635
636 if (mr_mip->NO_CARRY && mr_mip->qnr==-1)
637 { /* if modulus is small enough we can ignore carries, and use simple addition and subtraction */
638 /* recall that Montgomery reduction can cope as long as product is less than pR */
639 #ifdef MR_COMBA
640 #ifdef MR_COUNT_OPS
641 fpa+=3;
642 #endif
643 if (mr_mip->ACTIVE)
644 {
645 comba_add(a,b,mr_mip->w1);
646 comba_add(a,mr_mip->modulus,mr_mip->w2); /* a-b is p+a-b */
647 comba_sub(mr_mip->w2,b,mr_mip->w2);
648 comba_add(a,a,r);
649 }
650 else
651 {
652 #endif
653 mr_padd(_MIPP_ a,b,mr_mip->w1);
654 mr_padd(_MIPP_ a,mr_mip->modulus,mr_mip->w2);
655 mr_psub(_MIPP_ mr_mip->w2,b,mr_mip->w2);
656 mr_padd(_MIPP_ a,a,r);
657 #ifdef MR_COMBA
658 }
659 #endif
660 nres_modmult(_MIPP_ r,b,i);
661 nres_modmult(_MIPP_ mr_mip->w1,mr_mip->w2,r);
662 }
663 else
664 {
665 nres_modadd(_MIPP_ a,b,mr_mip->w1);
666 nres_modsub(_MIPP_ a,b,mr_mip->w2);
667
668 if (mr_mip->qnr==-2)
669 nres_modsub(_MIPP_ mr_mip->w2,b,mr_mip->w2);
670
671 nres_modmult(_MIPP_ a,b,i);
672 nres_modmult(_MIPP_ mr_mip->w1,mr_mip->w2,r);
673
674 if (mr_mip->qnr==-2)
675 nres_modadd(_MIPP_ r,i,r);
676
677 nres_modadd(_MIPP_ i,i,i);
678 }
679 MR_OUT
680 }
681
682 #ifndef MR_NO_LAZY_REDUCTION
683
684 /*
685
686 Lazy reduction technique for zzn2 multiplication - competitive if Reduction is more
687 expensive that Multiplication. This is true for pairing-based crypto. Note that
688 Lazy reduction can also be used with Karatsuba! Uses w1, w2, w5, and w6.
689
690 Reduction poly is X^2-D=0
691
692 (a0+a1.X).(b0+b1.X) = (a0.b0 + D.a1.b1) + (a1.b0+a0.b1).X
693
694 Karatsuba
695
696 (a0.b0+D.a1.b1) + ((a0+a1)(b0+b1) - a0.b0 - a1.b1).X
697 */
698
nres_lazy(_MIPD_ big a0,big a1,big b0,big b1,big r,big i)699 void nres_lazy(_MIPD_ big a0,big a1,big b0,big b1,big r,big i)
700 {
701 #ifdef MR_OS_THREADS
702 miracl *mr_mip=get_mip();
703 #endif
704 if (mr_mip->ERNUM) return;
705
706 mr_mip->check=OFF;
707 #ifdef MR_COUNT_OPS
708 fpc+=3;
709 fpa+=5;
710 if (mr_mip->qnr==-2) fpa++;
711 #endif
712
713 #ifdef MR_COMBA
714 if (mr_mip->ACTIVE)
715 {
716 comba_mult(a0,b0,mr_mip->w0);
717 comba_mult(a1,b1,mr_mip->w5);
718 }
719 else
720 {
721 #endif
722 #ifdef MR_KCM
723 if (mr_mip->ACTIVE)
724 {
725 kcm_mul(_MIPP_ a1,b1,mr_mip->w5); /* this destroys w0! */
726 kcm_mul(_MIPP_ a0,b0,mr_mip->w0);
727 }
728 else
729 {
730 #endif
731 MR_IN(151)
732 multiply(_MIPP_ a0,b0,mr_mip->w0);
733 multiply(_MIPP_ a1,b1,mr_mip->w5);
734 #ifdef MR_COMBA
735 }
736 #endif
737 #ifdef MR_KCM
738 }
739 #endif
740
741 if (mr_mip->NO_CARRY && mr_mip->qnr==-1)
742 { /* if modulus is small enough we can ignore carries, and use simple addition and subtraction */
743 #ifdef MR_COMBA
744 #ifdef MR_COUNT_OPS
745 fpa+=2;
746 #endif
747 if (mr_mip->ACTIVE)
748 {
749 comba_double_add(mr_mip->w0,mr_mip->w5,mr_mip->w6);
750 comba_add(a0,a1,mr_mip->w1);
751 comba_add(b0,b1,mr_mip->w2);
752 }
753 else
754 {
755 #endif
756 mr_padd(_MIPP_ mr_mip->w0,mr_mip->w5,mr_mip->w6);
757 mr_padd(_MIPP_ a0,a1,mr_mip->w1);
758 mr_padd(_MIPP_ b0,b1,mr_mip->w2);
759 #ifdef MR_COMBA
760 }
761 #endif
762 }
763 else
764 {
765 nres_double_modadd(_MIPP_ mr_mip->w0,mr_mip->w5,mr_mip->w6); /* w6 = a0.b0+a1.b1 */
766 if (mr_mip->qnr==-2)
767 nres_double_modadd(_MIPP_ mr_mip->w5,mr_mip->w5,mr_mip->w5);
768 nres_modadd(_MIPP_ a0,a1,mr_mip->w1);
769 nres_modadd(_MIPP_ b0,b1,mr_mip->w2);
770 }
771 nres_double_modsub(_MIPP_ mr_mip->w0,mr_mip->w5,mr_mip->w0); /* r = a0.b0+D.a1.b1 */
772
773 #ifdef MR_COMBA
774 if (mr_mip->ACTIVE)
775 {
776 comba_redc(_MIPP_ mr_mip->w0,r);
777 comba_mult(mr_mip->w1,mr_mip->w2,mr_mip->w0);
778 }
779 else
780 {
781 #endif
782 #ifdef MR_KCM
783 if (mr_mip->ACTIVE)
784 {
785 kcm_redc(_MIPP_ mr_mip->w0,r);
786 kcm_mul(_MIPP_ mr_mip->w1,mr_mip->w2,mr_mip->w0);
787 }
788 else
789 {
790 #endif
791 redc(_MIPP_ mr_mip->w0,r);
792 multiply(_MIPP_ mr_mip->w1,mr_mip->w2,mr_mip->w0); /* w0=(a0+a1)*(b0+b1) */
793 #ifdef MR_COMBA
794 }
795 #endif
796 #ifdef MR_KCM
797 }
798 #endif
799
800 if (mr_mip->NO_CARRY && mr_mip->qnr==-1)
801 {
802 #ifdef MR_COMBA
803 if (mr_mip->ACTIVE)
804 comba_double_sub(mr_mip->w0,mr_mip->w6,mr_mip->w0);
805 else
806 #endif
807 mr_psub(_MIPP_ mr_mip->w0,mr_mip->w6,mr_mip->w0);
808 }
809 else
810 nres_double_modsub(_MIPP_ mr_mip->w0,mr_mip->w6,mr_mip->w0); /* (a0+a1)*(b0+b1) - w6 */
811
812 #ifdef MR_COMBA
813 if (mr_mip->ACTIVE)
814 {
815 comba_redc(_MIPP_ mr_mip->w0,i);
816 }
817 else
818 {
819 #endif
820 #ifdef MR_KCM
821 if (mr_mip->ACTIVE)
822 {
823 kcm_redc(_MIPP_ mr_mip->w0,i);
824 }
825 else
826 {
827 #endif
828 redc(_MIPP_ mr_mip->w0,i);
829 MR_OUT
830 #ifdef MR_COMBA
831 }
832 #endif
833 #ifdef MR_KCM
834 }
835 #endif
836
837 mr_mip->check=ON;
838
839 }
840
841 #endif
842
843 #ifndef MR_STATIC
844
nres_dotprod(_MIPD_ int n,big * x,big * y,big w)845 void nres_dotprod(_MIPD_ int n,big *x,big *y,big w)
846 {
847 int i;
848 #ifdef MR_OS_THREADS
849 miracl *mr_mip=get_mip();
850 #endif
851
852 if (mr_mip->ERNUM) return;
853 MR_IN(120)
854 mr_mip->check=OFF;
855 zero(mr_mip->w7);
856 for (i=0;i<n;i++)
857 {
858 multiply(_MIPP_ x[i],y[i],mr_mip->w0);
859 mr_padd(_MIPP_ mr_mip->w7,mr_mip->w0,mr_mip->w7);
860 }
861 copy(mr_mip->pR,mr_mip->w6);
862 /* w6 = p.R */
863 divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6);
864 redc(_MIPP_ mr_mip->w7,w);
865
866 mr_mip->check=ON;
867 MR_OUT
868 }
869
870 #endif
871
nres_negate(_MIPD_ big x,big w)872 void nres_negate(_MIPD_ big x, big w)
873 {
874 #ifdef MR_OS_THREADS
875 miracl *mr_mip=get_mip();
876 #endif
877 if (size(x)==0)
878 {
879 zero(w);
880 return;
881 }
882 #ifdef MR_COMBA
883 if (mr_mip->ACTIVE)
884 {
885 comba_negate(_MIPP_ x,w);
886 return;
887 }
888 else
889 {
890 #endif
891 if (mr_mip->ERNUM) return;
892
893 MR_IN(92)
894 mr_psub(_MIPP_ mr_mip->modulus,x,w);
895 MR_OUT
896
897 #ifdef MR_COMBA
898 }
899 #endif
900
901 }
902
nres_div2(_MIPD_ big x,big w)903 void nres_div2(_MIPD_ big x,big w)
904 {
905 #ifdef MR_OS_THREADS
906 miracl *mr_mip=get_mip();
907 #endif
908
909 MR_IN(198)
910 copy(x,mr_mip->w1);
911 if (remain(_MIPP_ mr_mip->w1,2)!=0)
912 add(_MIPP_ mr_mip->w1,mr_mip->modulus,mr_mip->w1);
913 subdiv(_MIPP_ mr_mip->w1,2,mr_mip->w1);
914 copy(mr_mip->w1,w);
915
916 MR_OUT
917 }
918
nres_div3(_MIPD_ big x,big w)919 void nres_div3(_MIPD_ big x,big w)
920 {
921 #ifdef MR_OS_THREADS
922 miracl *mr_mip=get_mip();
923 #endif
924
925 MR_IN(199)
926 copy(x,mr_mip->w1);
927 while (remain(_MIPP_ mr_mip->w1,3)!=0)
928 add(_MIPP_ mr_mip->w1,mr_mip->modulus,mr_mip->w1);
929 subdiv(_MIPP_ mr_mip->w1,3,mr_mip->w1);
930 copy(mr_mip->w1,w);
931
932 MR_OUT
933 }
934
nres_div5(_MIPD_ big x,big w)935 void nres_div5(_MIPD_ big x,big w)
936 {
937 #ifdef MR_OS_THREADS
938 miracl *mr_mip=get_mip();
939 #endif
940
941 MR_IN(208)
942 copy(x,mr_mip->w1);
943 while (remain(_MIPP_ mr_mip->w1,5)!=0)
944 add(_MIPP_ mr_mip->w1,mr_mip->modulus,mr_mip->w1);
945 subdiv(_MIPP_ mr_mip->w1,5,mr_mip->w1);
946 copy(mr_mip->w1,w);
947
948 MR_OUT
949 }
950
951 /* mod pR addition and subtraction */
952 #ifndef MR_NO_LAZY_REDUCTION
953
nres_double_modadd(_MIPD_ big x,big y,big w)954 void nres_double_modadd(_MIPD_ big x,big y,big w)
955 {
956 #ifdef MR_OS_THREADS
957 miracl *mr_mip=get_mip();
958 #endif
959 #ifdef MR_COMBA
960
961 if (mr_mip->ACTIVE)
962 {
963 comba_double_modadd(_MIPP_ x,y,w);
964 return;
965 }
966 else
967 {
968 #endif
969
970 if (mr_mip->ERNUM) return;
971 MR_IN(153)
972
973 mr_padd(_MIPP_ x,y,w);
974 if (mr_compare(w,mr_mip->pR)>=0)
975 mr_psub(_MIPP_ w,mr_mip->pR,w);
976
977 MR_OUT
978 #ifdef MR_COMBA
979 }
980 #endif
981 }
982
nres_double_modsub(_MIPD_ big x,big y,big w)983 void nres_double_modsub(_MIPD_ big x,big y,big w)
984 {
985 #ifdef MR_OS_THREADS
986 miracl *mr_mip=get_mip();
987 #endif
988 #ifdef MR_COMBA
989
990 if (mr_mip->ACTIVE)
991 {
992 comba_double_modsub(_MIPP_ x,y,w);
993 return;
994 }
995 else
996 {
997 #endif
998
999 if (mr_mip->ERNUM) return;
1000 MR_IN(154)
1001
1002 if (mr_compare(x,y)>=0)
1003 mr_psub(_MIPP_ x,y,w);
1004 else
1005 {
1006 mr_psub(_MIPP_ y,x,w);
1007 mr_psub(_MIPP_ mr_mip->pR,w,w);
1008 }
1009
1010 MR_OUT
1011 #ifdef MR_COMBA
1012 }
1013 #endif
1014 }
1015
1016 #endif
1017
nres_modadd(_MIPD_ big x,big y,big w)1018 void nres_modadd(_MIPD_ big x,big y,big w)
1019 { /* modular addition */
1020 #ifdef MR_OS_THREADS
1021 miracl *mr_mip=get_mip();
1022 #endif
1023 #ifdef MR_COUNT_OPS
1024 fpa++;
1025 #endif
1026 #ifdef MR_COMBA
1027
1028 if (mr_mip->ACTIVE)
1029 {
1030 comba_modadd(_MIPP_ x,y,w);
1031 return;
1032 }
1033 else
1034 {
1035 #endif
1036 if (mr_mip->ERNUM) return;
1037
1038 MR_IN(90)
1039 mr_padd(_MIPP_ x,y,w);
1040 if (mr_compare(w,mr_mip->modulus)>=0) mr_psub(_MIPP_ w,mr_mip->modulus,w);
1041
1042 MR_OUT
1043 #ifdef MR_COMBA
1044 }
1045 #endif
1046 }
1047
nres_modsub(_MIPD_ big x,big y,big w)1048 void nres_modsub(_MIPD_ big x,big y,big w)
1049 { /* modular subtraction */
1050
1051 #ifdef MR_OS_THREADS
1052 miracl *mr_mip=get_mip();
1053 #endif
1054 #ifdef MR_COUNT_OPS
1055 fpa++;
1056 #endif
1057 #ifdef MR_COMBA
1058 if (mr_mip->ACTIVE)
1059 {
1060 comba_modsub(_MIPP_ x,y,w);
1061 return;
1062 }
1063 else
1064 {
1065 #endif
1066 if (mr_mip->ERNUM) return;
1067
1068 MR_IN(91)
1069
1070 if (mr_compare(x,y)>=0)
1071 mr_psub(_MIPP_ x,y,w);
1072 else
1073 {
1074 mr_psub(_MIPP_ y,x,w);
1075 mr_psub(_MIPP_ mr_mip->modulus,w,w);
1076 }
1077
1078 MR_OUT
1079 #ifdef MR_COMBA
1080 }
1081 #endif
1082
1083 }
1084
nres_moddiv(_MIPD_ big x,big y,big w)1085 int nres_moddiv(_MIPD_ big x,big y,big w)
1086 { /* Modular division using n-residues w=x/y mod n */
1087 int gcd;
1088 #ifdef MR_OS_THREADS
1089 miracl *mr_mip=get_mip();
1090 #endif
1091 if (mr_mip->ERNUM) return 0;
1092
1093 MR_IN(85)
1094
1095 if (x==y)
1096 { /* Illegal parameter usage */
1097 mr_berror(_MIPP_ MR_ERR_BAD_PARAMETERS);
1098 MR_OUT
1099
1100 return 0;
1101 }
1102 redc(_MIPP_ y,mr_mip->w6);
1103 gcd=invmodp(_MIPP_ mr_mip->w6,mr_mip->modulus,mr_mip->w6);
1104
1105 if (gcd!=1) zero(w); /* fails silently and returns 0 */
1106 else
1107 {
1108 nres(_MIPP_ mr_mip->w6,mr_mip->w6);
1109 nres_modmult(_MIPP_ x,mr_mip->w6,w);
1110 /* mad(_MIPP_ x,mr_mip->w6,x,mr_mip->modulus,mr_mip->modulus,w); */
1111 }
1112 MR_OUT
1113 return gcd;
1114 }
1115
nres_premult(_MIPD_ big x,int k,big w)1116 void nres_premult(_MIPD_ big x,int k,big w)
1117 { /* multiply n-residue by small ordinary integer */
1118 #ifdef MR_OS_THREADS
1119 miracl *mr_mip=get_mip();
1120 #endif
1121 int sign=0;
1122 if (k==0)
1123 {
1124 zero(w);
1125 return;
1126 }
1127 if (k<0)
1128 {
1129 k=-k;
1130 sign=1;
1131 }
1132 if (mr_mip->ERNUM) return;
1133
1134 MR_IN(102)
1135
1136 if (k<=6)
1137 {
1138 switch (k)
1139 {
1140 case 1: copy(x,w);
1141 break;
1142 case 2: nres_modadd(_MIPP_ x,x,w);
1143 break;
1144 case 3:
1145 nres_modadd(_MIPP_ x,x,mr_mip->w0);
1146 nres_modadd(_MIPP_ x,mr_mip->w0,w);
1147 break;
1148 case 4:
1149 nres_modadd(_MIPP_ x,x,w);
1150 nres_modadd(_MIPP_ w,w,w);
1151 break;
1152 case 5:
1153 nres_modadd(_MIPP_ x,x,mr_mip->w0);
1154 nres_modadd(_MIPP_ mr_mip->w0,mr_mip->w0,mr_mip->w0);
1155 nres_modadd(_MIPP_ x,mr_mip->w0,w);
1156 break;
1157 case 6:
1158 nres_modadd(_MIPP_ x,x,w);
1159 nres_modadd(_MIPP_ w,w,mr_mip->w0);
1160 nres_modadd(_MIPP_ w,mr_mip->w0,w);
1161 break;
1162 }
1163 if (sign==1) nres_negate(_MIPP_ w,w);
1164 MR_OUT
1165 return;
1166 }
1167
1168 mr_pmul(_MIPP_ x,(mr_small)k,mr_mip->w0);
1169 #ifdef MR_COMBA
1170 #ifdef MR_SPECIAL
1171 comba_redc(_MIPP_ mr_mip->w0,w);
1172 #else
1173 divide(_MIPP_ mr_mip->w0,mr_mip->modulus,mr_mip->modulus);
1174 copy(mr_mip->w0,w);
1175 #endif
1176 #else
1177 divide(_MIPP_ mr_mip->w0,mr_mip->modulus,mr_mip->modulus);
1178 copy(mr_mip->w0,w);
1179 #endif
1180
1181 if (sign==1) nres_negate(_MIPP_ w,w);
1182
1183 MR_OUT
1184 }
1185
nres_modmult(_MIPD_ big x,big y,big w)1186 void nres_modmult(_MIPD_ big x,big y,big w)
1187 { /* Modular multiplication using n-residues w=x*y mod n */
1188 #ifdef MR_OS_THREADS
1189 miracl *mr_mip=get_mip();
1190 #endif
1191 if ((x==NULL || x->len==0) && x==w) return;
1192 if ((y==NULL || y->len==0) && y==w) return;
1193 if (y==NULL || x==NULL || x->len==0 || y->len==0)
1194 {
1195 zero(w);
1196 return;
1197 }
1198 #ifdef MR_COUNT_OPS
1199 fpc++;
1200 #endif
1201 #ifdef MR_COMBA
1202 if (mr_mip->ACTIVE)
1203 {
1204 if (x==y) comba_square(x,mr_mip->w0);
1205 else comba_mult(x,y,mr_mip->w0);
1206 comba_redc(_MIPP_ mr_mip->w0,w);
1207 }
1208 else
1209 {
1210 #endif
1211 #ifdef MR_KCM
1212 if (mr_mip->ACTIVE)
1213 {
1214 if (x==y) kcm_sqr(_MIPP_ x,mr_mip->w0);
1215 else kcm_mul(_MIPP_ x,y,mr_mip->w0);
1216 kcm_redc(_MIPP_ mr_mip->w0,w);
1217 }
1218 else
1219 {
1220 #endif
1221 #ifdef MR_PENTIUM
1222 if (mr_mip->ACTIVE)
1223 {
1224 if (x==y) fastmodsquare(_MIPP_ x,w);
1225 else fastmodmult(_MIPP_ x,y,w);
1226 }
1227 else
1228 {
1229 #endif
1230 if (mr_mip->ERNUM) return;
1231
1232 MR_IN(83)
1233
1234 mr_mip->check=OFF;
1235 multiply(_MIPP_ x,y,mr_mip->w0);
1236 redc(_MIPP_ mr_mip->w0,w);
1237 mr_mip->check=ON;
1238 MR_OUT
1239 #ifdef MR_COMBA
1240 }
1241 #endif
1242 #ifdef MR_KCM
1243 }
1244 #endif
1245 #ifdef MR_PENTIUM
1246 }
1247 #endif
1248
1249 }
1250
1251 /* Montgomery's trick for finding multiple *
1252 * simultaneous modular inverses *
1253 * Based on the observation that *
1254 * 1/x = yz*(1/xyz) *
1255 * 1/y = xz*(1/xyz) *
1256 * 1/z = xy*(1/xyz) *
1257 * Why are all of Peter Montgomery's clever *
1258 * algorithms always described as "tricks" ??*/
1259
nres_double_inverse(_MIPD_ big x,big y,big w,big z)1260 BOOL nres_double_inverse(_MIPD_ big x,big y,big w,big z)
1261 { /* find y=1/x mod n and z=1/w mod n */
1262 /* 1/x = w/xw, and 1/w = x/xw */
1263 #ifdef MR_OS_THREADS
1264 miracl *mr_mip=get_mip();
1265 #endif
1266 MR_IN(145)
1267
1268 nres_modmult(_MIPP_ x,w,mr_mip->w6); /* xw */
1269
1270 if (size(mr_mip->w6)==0)
1271 {
1272 mr_berror(_MIPP_ MR_ERR_DIV_BY_ZERO);
1273 MR_OUT
1274 return FALSE;
1275 }
1276 redc(_MIPP_ mr_mip->w6,mr_mip->w6);
1277 redc(_MIPP_ mr_mip->w6,mr_mip->w6);
1278 invmodp(_MIPP_ mr_mip->w6,mr_mip->modulus,mr_mip->w6);
1279
1280 nres_modmult(_MIPP_ w,mr_mip->w6,mr_mip->w5);
1281 nres_modmult(_MIPP_ x,mr_mip->w6,z);
1282 copy(mr_mip->w5,y);
1283
1284 MR_OUT
1285 return TRUE;
1286 }
1287
nres_multi_inverse(_MIPD_ int m,big * x,big * w)1288 BOOL nres_multi_inverse(_MIPD_ int m,big *x,big *w)
1289 { /* find w[i]=1/x[i] mod n, for i=0 to m-1 *
1290 * x and w MUST be distinct */
1291 int i;
1292 #ifdef MR_OS_THREADS
1293 miracl *mr_mip=get_mip();
1294 #endif
1295 if (m==0) return TRUE;
1296 if (m<0) return FALSE;
1297 MR_IN(118)
1298
1299 if (x==w)
1300 {
1301 mr_berror(_MIPP_ MR_ERR_BAD_PARAMETERS);
1302 MR_OUT
1303 return FALSE;
1304 }
1305
1306 if (m==1)
1307 {
1308 copy(mr_mip->one,w[0]);
1309 nres_moddiv(_MIPP_ w[0],x[0],w[0]);
1310 MR_OUT
1311 return TRUE;
1312 }
1313
1314 convert(_MIPP_ 1,w[0]);
1315 copy(x[0],w[1]);
1316 for (i=2;i<m;i++)
1317 nres_modmult(_MIPP_ w[i-1],x[i-1],w[i]);
1318
1319 nres_modmult(_MIPP_ w[m-1],x[m-1],mr_mip->w6); /* y=x[0]*x[1]*x[2]....x[m-1] */
1320 if (size(mr_mip->w6)==0)
1321 {
1322 mr_berror(_MIPP_ MR_ERR_DIV_BY_ZERO);
1323 MR_OUT
1324 return FALSE;
1325 }
1326
1327 redc(_MIPP_ mr_mip->w6,mr_mip->w6);
1328 redc(_MIPP_ mr_mip->w6,mr_mip->w6);
1329
1330 invmodp(_MIPP_ mr_mip->w6,mr_mip->modulus,mr_mip->w6);
1331
1332 /* Now y=1/y */
1333
1334 copy(x[m-1],mr_mip->w5);
1335 nres_modmult(_MIPP_ w[m-1],mr_mip->w6,w[m-1]);
1336
1337 for (i=m-2;;i--)
1338 {
1339 if (i==0)
1340 {
1341 nres_modmult(_MIPP_ mr_mip->w5,mr_mip->w6,w[0]);
1342 break;
1343 }
1344 nres_modmult(_MIPP_ w[i],mr_mip->w5,w[i]);
1345 nres_modmult(_MIPP_ w[i],mr_mip->w6,w[i]);
1346 nres_modmult(_MIPP_ mr_mip->w5,x[i],mr_mip->w5);
1347 }
1348
1349 MR_OUT
1350 return TRUE;
1351 }
1352
1353 /* initialise elliptic curve */
1354
ecurve_init(_MIPD_ big a,big b,big p,int type)1355 void ecurve_init(_MIPD_ big a,big b,big p,int type)
1356 { /* Initialize the active ecurve *
1357 * Asize indicate size of A *
1358 * Bsize indicate size of B */
1359 int as;
1360 #ifdef MR_OS_THREADS
1361 miracl *mr_mip=get_mip();
1362 #endif
1363 if (mr_mip->ERNUM) return;
1364
1365 MR_IN(93)
1366
1367 #ifndef MR_NO_SS
1368 mr_mip->SS=FALSE; /* no special support for super-singular curves */
1369 #endif
1370
1371 prepare_monty(_MIPP_ p);
1372
1373 mr_mip->Asize=size(a);
1374 if (mr_abs(mr_mip->Asize)==MR_TOOBIG)
1375 {
1376 if (mr_mip->Asize>=0)
1377 { /* big positive number - check it isn't minus something small */
1378 copy(a,mr_mip->w1);
1379 divide(_MIPP_ mr_mip->w1,p,p);
1380 subtract(_MIPP_ p,mr_mip->w1,mr_mip->w1);
1381 as=size(mr_mip->w1);
1382 if (as<MR_TOOBIG) mr_mip->Asize=-as;
1383 }
1384 }
1385 nres(_MIPP_ a,mr_mip->A);
1386
1387 mr_mip->Bsize=size(b);
1388 if (mr_abs(mr_mip->Bsize)==MR_TOOBIG)
1389 {
1390 if (mr_mip->Bsize>=0)
1391 { /* big positive number - check it isn't minus something small */
1392 copy(b,mr_mip->w1);
1393 divide(_MIPP_ mr_mip->w1,p,p);
1394 subtract(_MIPP_ p,mr_mip->w1,mr_mip->w1);
1395 as=size(mr_mip->w1);
1396 if (as<MR_TOOBIG) mr_mip->Bsize=-as;
1397 }
1398 }
1399
1400 nres(_MIPP_ b,mr_mip->B);
1401 #ifdef MR_EDWARDS
1402 mr_mip->coord=MR_PROJECTIVE; /* only type supported for Edwards curves */
1403 #else
1404 #ifndef MR_AFFINE_ONLY
1405 if (type==MR_BEST) mr_mip->coord=MR_PROJECTIVE;
1406 else mr_mip->coord=type;
1407 #else
1408 if (type==MR_PROJECTIVE)
1409 mr_berror(_MIPP_ MR_ERR_NOT_SUPPORTED);
1410 #endif
1411 #endif
1412 MR_OUT
1413 return;
1414 }
1415