1
2 /***************************************************************************
3 *
4 Copyright 2012 CertiVox IOM Ltd. *
5 *
6 This file is part of CertiVox MIRACL Crypto SDK. *
7 *
8 The CertiVox MIRACL Crypto SDK provides developers with an *
9 extensive and efficient set of cryptographic functions. *
10 For further information about its features and functionalities please *
11 refer to http://www.certivox.com *
12 *
13 * The CertiVox MIRACL Crypto SDK is free software: you can *
14 redistribute it and/or modify it under the terms of the *
15 GNU Affero General Public License as published by the *
16 Free Software Foundation, either version 3 of the License, *
17 or (at your option) any later version. *
18 *
19 * The CertiVox MIRACL Crypto SDK is distributed in the hope *
20 that it will be useful, but WITHOUT ANY WARRANTY; without even the *
21 implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
22 See the GNU Affero General Public License for more details. *
23 *
24 * You should have received a copy of the GNU Affero General Public *
25 License along with CertiVox MIRACL Crypto SDK. *
26 If not, see <http://www.gnu.org/licenses/>. *
27 *
28 You can be released from the requirements of the license by purchasing *
29 a commercial license. Buying such a license is mandatory as soon as you *
30 develop commercial activities involving the CertiVox MIRACL Crypto SDK *
31 without disclosing the source code of your own applications, or shipping *
32 the CertiVox MIRACL Crypto SDK with a closed source product. *
33 *
34 ***************************************************************************/
35 /*
36 * MIRACL fast fourier multiplication routine, using 3 prime method.
37 * mrfast.c - only faster for very high precision multiplication
38 * of numbers > about 4096 bits (see below)
39 * See "The Fast Fourier Transform in a Finite Field" by J.M. Pollard,
40 * Mathematics of Computation, Vol. 25, No. 114, April 1971, pp365-374
41 * Also Knuth Vol. 2, Chapt. 4.3.3
42 *
43 * Takes time preportional to 9+15N+9N.lg(N) to multiply two different
44 * N digit numbers. This reduces to 6+18N+6N.lg(N) when squaring.
45 * The classic method takes N.N and N(N+1)/2 respectively
46 *
47 * Fast Polynomial arithmetic
48 *
49 * See "A New Polynomial Factorisation Algorithm and its Implementation"
50 * by Victor Shoup, Jl. Symbolic Computation 1996
51 * Uses FFT method for fast arithmetic of large degree polynomials
52 *
53 */
54
55 #include <stdlib.h>
56 #include "miracl.h"
57
58 #ifdef MR_FP
59 #include <math.h>
60 #endif
61
62 #ifdef MR_WIN64
63 #include <intrin.h>
64 #endif
65
66 #ifndef MR_STATIC
67
twop(int n)68 static mr_utype twop(int n)
69 { /* 2^n */
70 #ifdef MR_FP
71 int i;
72 #endif
73 mr_utype r=1;
74 if (n==0) return r;
75 #ifdef MR_FP
76 for (i=0;i<n;i++) r*=2.0;
77 return r;
78 #else
79 return (r<<n);
80 #endif
81 }
82
mr_fft_init(_MIPD_ int logn,big m1,big m2,BOOL cr)83 int mr_fft_init(_MIPD_ int logn,big m1,big m2,BOOL cr)
84 { /* logn is number of bits, m1 and m2 are the largest elements
85 that will arise in each number to be multiplied */
86 mr_small kk;
87 int i,j,newn,pr;
88 mr_utype p,proot;
89 #ifdef MR_OS_THREADS
90 miracl *mr_mip=get_mip();
91 #endif
92
93 newn=(1<<logn);
94 mr_mip->check=OFF;
95 multiply(_MIPP_ m1,m2,mr_mip->w5);
96 premult(_MIPP_ mr_mip->w5,2*newn+1,mr_mip->w5);
97 kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
98 if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
99
100 pr=0;
101 while (size(mr_mip->w5)>0)
102 { /* find out how many primes will be needed */
103 do
104 {
105 kk--;
106 p=kk*newn+1;
107 } while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
108 #ifdef MR_FP_ROUNDING
109 mr_sdiv(_MIPP_ mr_mip->w5,p,mr_invert(p),mr_mip->w5);
110 #else
111 mr_sdiv(_MIPP_ mr_mip->w5,p,mr_mip->w5);
112 #endif
113 pr++;
114 }
115 mr_mip->check=ON;
116 /* if nothing has changed, don't recalculate */
117 if (logn<=mr_mip->logN && pr==mr_mip->nprimes) return pr;
118 fft_reset(_MIPPO_ );
119
120
121 mr_mip->prime=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
122 mr_mip->inverse=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
123 mr_mip->roots=(mr_utype**)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
124 mr_mip->t= (mr_utype **)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
125 mr_mip->cr=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
126 mr_mip->wa=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
127 mr_mip->wb=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
128 mr_mip->wc=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
129
130 kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
131 if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
132 for (i=0;i<pr;i++)
133 { /* find the primes again */
134 mr_mip->roots[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
135 mr_mip->t[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
136
137 do
138 {
139 kk--;
140 p=kk*newn+1;
141 } while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
142
143 proot=p-1;
144 for (j=1;j<logn;j++) proot=sqrmp(proot,p);
145 mr_mip->roots[i][0]=proot; /* build residue table */
146 for (j=1;j<newn;j++) mr_mip->roots[i][j]=smul(mr_mip->roots[i][j-1],proot,p);
147 mr_mip->inverse[i]=invers((mr_small)newn,p);
148 mr_mip->prime[i]=p;
149 }
150
151 mr_mip->logN=logn;
152
153 mr_mip->nprimes=pr;
154 /* set up chinese remainder structure */
155 if (cr)
156 if (!scrt_init(_MIPP_ &mr_mip->chin,pr,mr_mip->prime)) fft_reset(_MIPPO_ );
157 return pr;
158 }
159
fft_reset(_MIPDO_)160 void fft_reset(_MIPDO_ )
161 { /* reclaim any space used by FFT */
162 int i;
163 #ifdef MR_OS_THREADS
164 miracl *mr_mip=get_mip();
165 #endif
166
167 if (mr_mip->degree!=0)
168 { /* clear any precomputed tables */
169 for (i=0;i<mr_mip->nprimes;i++)
170 {
171 mr_free(mr_mip->s1[i]);
172 mr_free(mr_mip->s2[i]);
173 }
174 mr_free(mr_mip->s1);
175 mr_free(mr_mip->s2);
176 mr_mip->degree=0;
177 }
178
179 if (mr_mip->logN!=0)
180 { /* clear away old stuff */
181 for (i=0;i<mr_mip->nprimes;i++)
182 {
183 mr_free(mr_mip->roots[i]);
184 mr_free(mr_mip->t[i]);
185 }
186 mr_free(mr_mip->wa);
187 mr_free(mr_mip->wb);
188 mr_free(mr_mip->wc);
189 mr_free(mr_mip->cr);
190 mr_free(mr_mip->t);
191 mr_free(mr_mip->roots);
192 mr_free(mr_mip->inverse);
193 mr_free(mr_mip->prime);
194 mr_mip->nprimes=0;
195 mr_mip->logN=0;
196 mr_mip->same=FALSE;
197 }
198 /* clear CRT structure */
199 if (mr_mip->chin.NP!=0) scrt_end(&mr_mip->chin);
200 }
201
mr_dif_fft(_MIPD_ int logn,int pr,mr_utype * data)202 void mr_dif_fft(_MIPD_ int logn,int pr,mr_utype *data)
203 { /* decimate-in-frequency fourier transform */
204 int mmax,m,j,k,istep,i,ii,jj,newn,offset;
205 mr_utype w,temp,prime,*roots;
206 #ifdef MR_NOASM
207 mr_large dble,ldres;
208 #endif
209 #ifdef MR_OS_THREADS
210 miracl *mr_mip=get_mip();
211 #endif
212 #ifdef MR_FP_ROUNDING
213 mr_large iprime;
214 #endif
215 prime=mr_mip->prime[pr];
216 roots=mr_mip->roots[pr];
217 #ifdef MR_FP_ROUNDING
218 iprime=mr_invert(prime);
219 #endif
220
221 newn=(1<<logn);
222 offset=(mr_mip->logN-logn);
223 mmax=newn;
224 for (k=0;k<logn;k++) {
225 istep=mmax;
226 mmax>>=1;
227 ii=newn;
228 jj=newn/istep;
229 ii-=jj;
230 for (i=0;i<newn;i+=istep)
231 { /* special case root=1 */
232 j=i+mmax;
233 temp=data[i]-data[j];
234 if (temp<0) temp+=prime;
235 data[i]+=data[j];
236 if (data[i]>=prime) data[i]-=prime;
237 data[j]=temp;
238 }
239 for (m=1;m<mmax;m++) {
240
241 w=roots[(ii<<offset)-1];
242 ii-=jj;
243
244 #ifndef MR_FP
245 #ifdef INLINE_ASM
246 #if INLINE_ASM == 3
247
248 #define MR_IMPASM
249
250 /* esi = i
251 edi = j
252 ebx = data
253 edx = temp
254 ecx = prime
255 */
256
257 ASM mov ecx,DWORD PTR prime
258 ASM mov ebx,DWORD PTR data
259 ASM mov esi,DWORD PTR m
260 hop1:
261 ASM cmp esi,DWORD PTR newn
262 ASM jge hop0
263 ASM mov edi,esi
264 ASM add edi,DWORD PTR mmax
265
266 ASM mov edx,[ebx+4*esi]
267 ASM add edx,ecx
268 ASM sub edx,[ebx+4*edi]
269 ASM mov eax,[ebx+4*esi]
270 ASM add eax,[ebx+4*edi]
271 ASM cmp eax,ecx
272 ASM jl hop2
273 ASM sub eax,ecx
274 hop2:
275 ASM mov [ebx+4*esi],eax
276
277 ASM mov eax,DWORD PTR w
278 ASM mul edx
279 ASM div ecx
280 ASM mov [ebx+4*edi],edx
281
282 ASM add esi,DWORD PTR istep
283 ASM jmp hop1
284 hop0:
285 ASM nop
286 #endif
287 #if INLINE_ASM == 4
288
289 #define MR_IMPASM
290 ASM (
291 "movl %0,%%ecx\n"
292 "movl %1,%%ebx\n"
293 "movl %2,%%esi\n"
294 "hop1:\n"
295 "cmpl %3,%%esi\n"
296 "jge hop0\n"
297 "movl %%esi,%%edi\n"
298 "addl %4,%%edi\n"
299
300 "movl (%%ebx,%%esi,4),%%edx\n"
301 "addl %%ecx,%%edx\n"
302 "subl (%%ebx,%%edi,4),%%edx\n"
303 "movl (%%ebx,%%esi,4),%%eax\n"
304 "addl (%%ebx,%%edi,4),%%eax\n"
305 "cmpl %%ecx,%%eax\n"
306 "jl hop2\n"
307 "subl %%ecx,%%eax\n"
308 "hop2:\n"
309 "movl %%eax,(%%ebx,%%esi,4)\n"
310 "movl %5,%%eax\n"
311 "mull %%edx\n"
312 "divl %%ecx\n"
313 "movl %%edx,(%%ebx,%%edi,4)\n"
314 "addl %6,%%esi\n"
315 "jmp hop1\n"
316 "hop0:\n"
317 "nop\n"
318 :
319 : "m"(prime),"m"(data),"m"(m),"m"(newn),"m"(mmax),"m"(w),"m"(istep)
320 : "eax","edi","esi","edi","ebx","ecx","edx","memory"
321 );
322 #endif
323 #endif
324 #endif
325 #ifndef MR_IMPASM
326 for (i=m;i<newn;i+=istep) {
327 j=i+mmax;
328 temp=prime+data[i]-data[j];
329 data[i]+=data[j];
330 if (data[i]>=prime) data[i]-=prime;
331
332 #ifdef MR_NOASM
333 dble=(mr_large)w*temp;
334 #ifdef MR_FP_ROUNDING
335 data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
336 #else
337 data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
338 #endif
339 #else
340 #ifdef MR_FP_ROUNDING
341 imuldiv(w,temp,(mr_small)0,prime,iprime,(mr_small *)&data[j]);
342 #else
343 muldiv(w,temp,(mr_small)0,prime,(mr_small *)&data[j]);
344 #endif
345 #endif
346 }
347 #endif
348 }
349 }
350 }
351
mr_dit_fft(_MIPD_ int logn,int pr,mr_utype * data)352 void mr_dit_fft(_MIPD_ int logn,int pr,mr_utype *data)
353 { /* decimate-in-time inverse fourier transform */
354 int mmax,m,j,k,i,istep,ii,jj,newn,offset;
355 mr_utype w,temp,prime,*roots;
356 #ifdef MR_NOASM
357 mr_large dble,ldres;
358 #endif
359 #ifdef MR_OS_THREADS
360 miracl *mr_mip=get_mip();
361 #endif
362 #ifdef MR_FP_ROUNDING
363 mr_large iprime;
364 #endif
365 prime=mr_mip->prime[pr];
366 roots=mr_mip->roots[pr];
367 offset=(mr_mip->logN-logn);
368 #ifdef MR_FP_ROUNDING
369 iprime=mr_invert(prime);
370 #endif
371 newn=(1<<logn);
372 mmax=1;
373 for (k=0;k<logn;k++) {
374 istep=(mmax<<1);
375 ii=0;
376 jj=newn/istep;
377 ii+=jj;
378 for (i=0;i<newn;i+=istep)
379 { /* special case root=1 */
380 j=i+mmax;
381 temp=data[j];
382 data[j]=data[i]-temp;
383 if (data[j]<0) data[j]+=prime;
384 data[i]+=temp;
385 if (data[i]>=prime) data[i]-=prime;
386 }
387 for (m=1;m<mmax;m++) {
388 w=roots[(ii<<offset)-1];
389 ii+=jj;
390
391 #ifndef MR_FP
392 #ifdef INLINE_ASM
393 #if INLINE_ASM == 3
394
395 #define MR_IMPASM
396 /* esi = i
397 edi = j
398 ebx = data
399 edx = temp
400 ecx = prime
401 */
402
403
404 ASM mov ebx,DWORD PTR data
405 ASM mov ecx,DWORD PTR prime
406 ASM mov esi,DWORD PTR m
407 hop4:
408 ASM cmp esi,DWORD PTR newn
409 ASM jge hop5
410 ASM mov edi,esi
411 ASM add edi,DWORD PTR mmax
412
413 ASM mov eax,[ebx+4*edi]
414 ASM mul DWORD PTR w
415 ASM div ecx
416
417 ASM cmp [ebx+4*esi],ecx
418 ASM jl hop3
419 ASM sub [ebx+4*esi],ecx
420 hop3:
421 ASM mov eax,[ebx+4*esi]
422 ASM add [ebx+4*esi],edx
423 ASM add eax,ecx
424 ASM sub eax,edx
425 ASM mov [ebx+4*edi],eax
426
427 ASM add esi,DWORD PTR istep
428 ASM jmp hop4
429 hop5:
430 ASM nop
431 #endif
432 #if INLINE_ASM == 4
433
434 #define MR_IMPASM
435 ASM (
436 "movl %0,%%ebx\n"
437 "movl %1,%%ecx\n"
438 "movl %2,%%esi\n"
439 "hop4:\n"
440 "cmpl %3,%%esi\n"
441 "jge hop5\n"
442 "movl %%esi,%%edi\n"
443 "addl %4,%%edi\n"
444
445 "movl (%%ebx,%%edi,4),%%eax\n"
446 "mull %5\n"
447 "divl %%ecx\n"
448
449 "cmpl %%ecx,(%%ebx,%%esi,4)\n"
450 "jl hop3\n"
451 "subl %%ecx,(%%ebx,%%esi,4)\n"
452 "hop3:\n"
453 "movl (%%ebx,%%esi,4),%%eax\n"
454 "addl %%edx,(%%ebx,%%esi,4)\n"
455 "addl %%ecx,%%eax\n"
456 "subl %%edx,%%eax\n"
457 "movl %%eax,(%%ebx,%%edi,4)\n"
458
459 "addl %6,%%esi\n"
460 "jmp hop4\n"
461 "hop5:\n"
462 "nop\n"
463 :
464 : "m"(data),"m"(prime),"m"(m),"m"(newn),"m"(mmax),"m"(w),"m"(istep)
465 : "eax","edi","esi","edi","ebx","ecx","edx","memory"
466 );
467 #endif
468 #endif
469 #endif
470 #ifndef MR_IMPASM
471 for (i=m;i<newn;i+=istep) {
472 j=i+mmax;
473 #ifdef MR_NOASM
474 dble=(mr_large)w*data[j];
475 #ifdef MR_FP_ROUNDING
476 temp=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
477 #else
478 temp=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
479 #endif
480 #else
481 #ifdef MR_FP_ROUNDING
482 imuldiv(w,data[j],(mr_small)0,prime,iprime,(mr_small *)&temp);
483 #else
484 muldiv(w,data[j],(mr_small)0,prime,(mr_small *)&temp);
485 #endif
486 #endif
487 data[j]=data[i]-temp;
488 if (data[j]<0) data[j]+=prime;
489 data[i] += temp;
490 if (data[i]>=prime) data[i]-=prime;
491 }
492 #endif
493 }
494 mmax=istep;
495 }
496 }
497
modxn_1(_MIPD_ int n,int deg,big * x)498 static void modxn_1(_MIPD_ int n,int deg,big *x)
499 { /* set X (of degree deg) =X mod x^n-1 = X%x^n + X/x^n */
500 int i;
501 for (i=0;n+i<=deg;i++)
502 {
503 nres_modadd(_MIPP_ x[i],x[n+i],x[i]);
504 zero(x[n+i]);
505 }
506 }
507
mr_poly_rem(_MIPD_ int dg,big * G,big * R)508 BOOL mr_poly_rem(_MIPD_ int dg,big *G,big *R)
509 { /* G is a polynomial of degree dg - G is overwritten */
510 int i,j,newn,logn,np,n;
511 mr_utype p,inv,fac;
512 #ifdef MR_OS_THREADS
513 miracl *mr_mip=get_mip();
514 #endif
515 #ifdef MR_FP_ROUNDING
516 mr_large ip;
517 #endif
518
519 n=mr_mip->degree; /* degree of modulus */
520 if (n==0) return FALSE; /* the preset tables have been destroyed */
521 np=mr_mip->nprimes;
522
523 newn=1; logn=0;
524 while (2*n>newn) { newn<<=1; logn++; }
525
526 for (i=0;i<np;i++)
527 {
528 p=mr_mip->prime[i];
529 #ifdef MR_FP_ROUNDING
530 ip=mr_invert(p);
531 for (j=n;j<=dg;j++)
532 mr_mip->t[i][j-n]=mr_sdiv(_MIPP_ G[j],p,ip,mr_mip->w1);
533 #else
534 for (j=n;j<=dg;j++)
535 mr_mip->t[i][j-n]=mr_sdiv(_MIPP_ G[j],p,mr_mip->w1);
536 #endif
537 for (j=dg-n+1;j<newn;j++) mr_mip->t[i][j]=0;
538 mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
539 for (j=0;j<newn;j++)
540 #ifdef MR_FP_ROUNDING
541 imuldiv(mr_mip->t[i][j],mr_mip->s1[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
542 #else
543 muldiv(mr_mip->t[i][j],mr_mip->s1[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
544 #endif
545 mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);
546 inv=mr_mip->inverse[i];
547 if (mr_mip->logN > logn)
548 { /* adjust 1/N log p for N/2, N/4 etc */
549 fac=twop(mr_mip->logN-logn);
550 inv=smul(fac,inv,p);
551 }
552 for (j=0;j<n;j++)
553 #ifdef MR_FP_ROUNDING
554 imuldiv(mr_mip->t[i][j+n-1],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j+n-1]);
555 #else
556 muldiv(mr_mip->t[i][j+n-1],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j+n-1]);
557 #endif
558 }
559
560 mr_mip->check=OFF;
561 mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
562 /* w6 = N.R */
563 for (j=0;j<n;j++)
564 {
565 for (i=0;i<np;i++)
566 mr_mip->cr[i]=mr_mip->t[i][j+n-1];
567 scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
568 divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6); /* R[j] may be too big for redc */
569 redc(_MIPP_ mr_mip->w7,R[j]);
570 }
571 mr_mip->check=ON;
572
573 for (i=0;i<np;i++)
574 {
575 p=mr_mip->prime[i];
576 #ifdef MR_FP_ROUNDING
577 ip=mr_invert(p);
578 for (j=0;j<n;j++)
579 mr_mip->t[i][j]=mr_sdiv(_MIPP_ R[j],p,ip,mr_mip->w1);
580 #else
581 for (j=0;j<n;j++)
582 mr_mip->t[i][j]=mr_sdiv(_MIPP_ R[j],p,mr_mip->w1);
583 #endif
584 for (j=n;j<1+newn/2;j++) mr_mip->t[i][j]=0;
585
586 mr_dif_fft(_MIPP_ logn-1,i,mr_mip->t[i]); /* Note: Half size */
587 for (j=0;j<newn/2;j++)
588 #ifdef MR_FP_ROUNDING
589 imuldiv(mr_mip->t[i][j],mr_mip->s2[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
590 #else
591 muldiv(mr_mip->t[i][j],mr_mip->s2[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
592 #endif
593 mr_dit_fft(_MIPP_ logn-1,i,mr_mip->t[i]);
594
595 inv=mr_mip->inverse[i];
596 if (mr_mip->logN > logn-1)
597 {
598 fac=twop(mr_mip->logN-logn+1);
599 inv=smul(fac,inv,p);
600 }
601 for (j=0;j<n;j++)
602 #ifdef MR_FP_ROUNDING
603 imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
604 #else
605 muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
606 #endif
607 }
608
609 modxn_1(_MIPP_ newn/2,dg,G); /* G=G mod 2^x - 1 */
610
611 mr_mip->check=OFF;
612 mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
613 /* w6 = N.R */
614 for (j=0;j<n;j++)
615 {
616 for (i=0;i<np;i++)
617 mr_mip->cr[i]=mr_mip->t[i][j];
618 scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
619 divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6); /* R[j] may be too big for redc */
620 redc(_MIPP_ mr_mip->w7,R[j]);
621 nres_modsub(_MIPP_ G[j],R[j],R[j]);
622
623 }
624 mr_mip->check=ON;
625
626 return TRUE;
627 }
628
mr_polymod_set(_MIPD_ int n,big * rf,big * f)629 void mr_polymod_set(_MIPD_ int n, big *rf,big *f)
630 { /* n is degree of f */
631 int i,j,np,newn,logn,degree;
632 mr_utype p;
633 big *F;
634 #ifdef MR_OS_THREADS
635 miracl *mr_mip=get_mip();
636 #endif
637 #ifdef MR_FP_ROUNDING
638 mr_large ip;
639 #endif
640 degree=2*n;
641 newn=1; logn=0;
642 while (degree>newn) { newn<<=1; logn++; }
643
644 if (mr_mip->degree!=0)
645 {
646 for (i=0;i<mr_mip->nprimes;i++)
647 {
648 mr_free(mr_mip->s1[i]);
649 mr_free(mr_mip->s2[i]);
650 }
651 mr_free(mr_mip->s1);
652 mr_free(mr_mip->s2);
653 }
654
655 if (mr_mip->logN<logn)
656 np=mr_fft_init(_MIPP_ logn,mr_mip->modulus,mr_mip->modulus,TRUE);
657 else np=mr_mip->nprimes;
658
659 mr_mip->degree=n;
660 mr_mip->s1=(mr_utype **)mr_alloc(_MIPP_ np,sizeof(mr_utype *));
661 mr_mip->s2=(mr_utype **)mr_alloc(_MIPP_ np,sizeof(mr_utype *));
662
663 F=(big *)mr_alloc(_MIPP_ n+1,sizeof(big));
664 for (i=0;i<=n;i++)
665 {
666 F[i]=mirvar(_MIPP_ 0);
667 if (f[i]!=NULL) copy(f[i],F[i]);
668 }
669
670 modxn_1(_MIPP_ newn/2,n,F);
671
672 for (i=0;i<np;i++)
673 {
674 /* reserve space for precomputed tables. */
675 /* Note that s2 is half the size of s1 */
676 mr_mip->s1[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
677 mr_mip->s2[i]=(mr_utype *)mr_alloc(_MIPP_ 1+newn/2,sizeof(mr_utype));
678
679 p=mr_mip->prime[i];
680 #ifdef MR_FP_ROUNDING
681 ip=mr_invert(p);
682 #endif
683 for (j=0;j<n;j++)
684 {
685 if (rf[j]==NULL) mr_mip->s1[i][j]=0;
686 #ifdef MR_FP_ROUNDING
687 else mr_mip->s1[i][j]=mr_sdiv(_MIPP_ rf[j],p,ip,mr_mip->w1);
688 #else
689 else mr_mip->s1[i][j]=mr_sdiv(_MIPP_ rf[j],p,mr_mip->w1);
690 #endif
691 }
692 mr_dif_fft(_MIPP_ logn,i,mr_mip->s1[i]);
693
694 for (j=0;j<=n;j++)
695 #ifdef MR_FP_ROUNDING
696 mr_mip->s2[i][j]=mr_sdiv(_MIPP_ F[j],p,ip,mr_mip->w1);
697 #else
698 mr_mip->s2[i][j]=mr_sdiv(_MIPP_ F[j],p,mr_mip->w1);
699 #endif
700 mr_dif_fft(_MIPP_ logn-1,i,mr_mip->s2[i]);
701 }
702 for (i=0;i<=n;i++) mr_free(F[i]);
703 mr_free(F);
704 }
705
mr_ps_zzn_mul(_MIPD_ int deg,big * x,big * y,big * z)706 int mr_ps_zzn_mul(_MIPD_ int deg,big *x,big *y,big *z)
707 {
708 int i,j,newn,logn,np;
709 mr_utype inv,p,fac;
710 #ifdef MR_OS_THREADS
711 miracl *mr_mip=get_mip();
712 #endif
713 #ifdef MR_FP_ROUNDING
714 mr_large ip;
715 #endif
716 newn=1; logn=0;
717 while (2*deg>newn) { newn <<=1; logn++; }
718 if (mr_mip->logN<logn)
719 np=mr_fft_init(_MIPP_ logn,mr_mip->modulus,mr_mip->modulus,TRUE);
720 else np=mr_mip->nprimes;
721
722 for (i=0;i<np;i++)
723 {
724 p=mr_mip->prime[i];
725 #ifdef MR_FP_ROUNDING
726 ip=mr_invert(p);
727 #endif
728 for (j=0;j<deg;j++)
729 {
730 if (x[j]==NULL) mr_mip->wa[j]=0;
731 #ifdef MR_FP_ROUNDING
732 else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1);
733 #else
734 else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1);
735 #endif
736 }
737 for (j=deg;j<newn;j++) mr_mip->wa[j]=0;
738
739 mr_dif_fft(_MIPP_ logn,i,mr_mip->wa);
740 for (j=0;j<deg;j++)
741 {
742 if (y[j]==NULL) mr_mip->t[i][j]=0;
743 #ifdef MR_FP_ROUNDING
744 else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,ip,mr_mip->w1);
745 #else
746 else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,mr_mip->w1);
747 #endif
748 }
749 for (j=deg;j<newn;j++) mr_mip->t[i][j]=0;
750 mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
751 /* multiply FFTs */
752 for (j=0;j<newn;j++)
753 #ifdef MR_FP_ROUNDING
754 imuldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
755 #else
756 muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
757 #endif
758 mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]); /* np*N*lgN */
759
760 inv=mr_mip->inverse[i];
761 if (mr_mip->logN > logn)
762 {
763 fac=twop(mr_mip->logN-logn);
764 inv=smul(fac,inv,p);
765 }
766 for (j=0;j<deg;j++) /* np*N */
767 #ifdef MR_FP_ROUNDING
768 imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
769 #else
770 muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
771 #endif
772 }
773 mr_mip->check=OFF;
774 mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
775 for (j=0;j<deg;j++)
776 {
777 for (i=0;i<np;i++)
778 mr_mip->cr[i]=mr_mip->t[i][j];
779 scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
780 divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6);
781 redc(_MIPP_ mr_mip->w7,z[j]);
782 }
783 mr_mip->check=ON;
784 return np;
785 }
786
mr_ps_big_mul(_MIPD_ int deg,big * x,big * y,big * z)787 int mr_ps_big_mul(_MIPD_ int deg,big *x,big *y,big *z)
788 { /* Multiply two power series with large integer parameters */
789 int i,j,newn,logn,np;
790 mr_utype inv,p,fac;
791 #ifdef MR_OS_THREADS
792 miracl *mr_mip=get_mip();
793 #endif
794 #ifdef MR_FP_ROUNDING
795 mr_large ip;
796 #endif
797 newn=1; logn=0;
798 while (2*deg>newn) { newn <<=1; logn++; }
799
800 zero(mr_mip->w2);
801 zero(mr_mip->w4);
802
803 /* find biggest element in each series */
804 for (i=0;i<deg;i++)
805 {
806 if (x[i]!=NULL)
807 {
808 absol(x[i],mr_mip->w3);
809 if (mr_compare(mr_mip->w3,mr_mip->w2)>0) copy(mr_mip->w3,mr_mip->w2);
810 }
811 if (y[i]!=NULL)
812 {
813 absol(y[i],mr_mip->w3);
814 if (mr_compare(mr_mip->w3,mr_mip->w4)>0) copy(mr_mip->w3,mr_mip->w4);
815 }
816 }
817 premult(_MIPP_ mr_mip->w4,2,mr_mip->w4); /* range is +ve and -ve */
818 /* so extra factor of 2 included */
819
820 np=mr_fft_init(_MIPP_ logn,mr_mip->w4,mr_mip->w2,TRUE);
821 convert(_MIPP_ 1,mr_mip->w3);
822 /* compute coefficients modulo fft primes */
823 for (i=0;i<np;i++)
824 {
825 p=mr_mip->prime[i];
826 #ifdef MR_FP_ROUNDING
827 ip=mr_invert(p);
828 #endif
829 mr_pmul(_MIPP_ mr_mip->w3,p,mr_mip->w3);
830 for (j=0;j<deg;j++)
831 {
832 if (x[j]==NULL) mr_mip->wa[j]=0;
833 else
834 {
835 if (size(x[j])>=0)
836 {
837 copy(x[j],mr_mip->w1);
838 #ifdef MR_FP_ROUNDING
839 mr_mip->wa[j]=mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
840 #else
841 mr_mip->wa[j]=mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
842 #endif
843 }
844 else
845 {
846 negify(x[j],mr_mip->w1);
847 #ifdef MR_FP_ROUNDING
848 mr_mip->wa[j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
849 #else
850 mr_mip->wa[j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
851 #endif
852 }
853 }
854 }
855 for (j=deg;j<newn;j++) mr_mip->wa[j]=0;
856
857 mr_dif_fft(_MIPP_ logn,i,mr_mip->wa);
858 for (j=0;j<deg;j++)
859 {
860 if (y[j]==NULL) mr_mip->t[i][j]=0;
861 else
862 {
863 if (size(y[j])>=0)
864 {
865 copy(y[j],mr_mip->w1);
866 #ifdef MR_FP_ROUNDING
867 mr_mip->t[i][j]=mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
868 #else
869 mr_mip->t[i][j]=mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
870 #endif
871 }
872 else
873 {
874 negify(y[j],mr_mip->w1);
875 #ifdef MR_FP_ROUNDING
876 mr_mip->t[i][j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
877 #else
878 mr_mip->t[i][j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
879 #endif
880 }
881 }
882 }
883 for (j=deg;j<newn;j++) mr_mip->t[i][j]=0;
884 mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
885 /* multiply FFTs */
886 for (j=0;j<newn;j++)
887 #ifdef MR_FP_ROUNDING
888 imuldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
889 #else
890 muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
891 #endif
892 mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]); /* np*N*lgN */
893
894 inv=mr_mip->inverse[i];
895 if (mr_mip->logN > logn)
896 {
897 fac=twop(mr_mip->logN-logn);
898 inv=smul(fac,inv,p);
899 }
900 for (j=0;j<deg;j++) /* np*N */
901 #ifdef MR_FP_ROUNDING
902 imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
903 #else
904 muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
905 #endif
906 }
907 /* w3 is product of chinese primes */
908 decr(_MIPP_ mr_mip->w3,1,mr_mip->w4);
909 subdiv(_MIPP_ mr_mip->w4,2,mr_mip->w4); /* find mid-point of range */
910
911 for (j=0;j<deg;j++)
912 {
913 for (i=0;i<np;i++)
914 mr_mip->cr[i]=mr_mip->t[i][j];
915 scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,z[j]); /* N*3*np*np/2 */
916 if (mr_compare(z[j],mr_mip->w4)>=0)
917 { /* In higher half of range, so number is negative */
918 subtract(_MIPP_ mr_mip->w3,z[j],z[j]);
919 negify(z[j],z[j]);
920 }
921 } /* np*np*N/4 */
922 return np;
923 }
924
mr_poly_mul(_MIPD_ int degx,big * x,int degy,big * y,big * z)925 int mr_poly_mul(_MIPD_ int degx,big *x,int degy,big *y,big *z)
926 { /* Multiply two polynomials. The big arrays are of size degree */
927 int i,j,newn,logn,np,degree;
928 mr_utype inv,p,fac;
929 #ifdef MR_OS_THREADS
930 miracl *mr_mip=get_mip();
931 #endif
932 #ifdef MR_FP_ROUNDING
933 mr_large ip;
934 #endif
935 degree=degx+degy;
936 if (x==y)
937 {
938 mr_poly_sqr(_MIPP_ degx,x,z);
939 return degree;
940 }
941 newn=1; logn=0;
942 while (degree+1>newn) { newn<<=1; logn++; }
943
944 if (mr_mip->logN<logn)
945 np=mr_fft_init(_MIPP_ logn,mr_mip->modulus,mr_mip->modulus,TRUE);
946 else np=mr_mip->nprimes;
947
948 /* compute coefficients modulo fft primes */
949 for (i=0;i<np;i++)
950 {
951 p=mr_mip->prime[i];
952 #ifdef MR_FP_ROUNDING
953 ip=mr_invert(p);
954 #endif
955 for (j=0;j<=degx;j++)
956 {
957 if (x[j]==NULL) mr_mip->wa[j]=0;
958 #ifdef MR_FP_ROUNDING
959 else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1); /* np*np*N/2 muldivs */
960 #else
961 else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1); /* np*np*N/2 muldivs */
962 #endif
963 }
964 for (j=degx+1;j<newn;j++) mr_mip->wa[j]=0;
965 mr_dif_fft(_MIPP_ logn,i,mr_mip->wa); /* np*N*lgN */
966
967 for (j=0;j<=degy;j++)
968 {
969 if (y[j]==NULL) mr_mip->t[i][j]=0;
970 #ifdef MR_FP_ROUNDING
971 else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,ip,mr_mip->w1); /* np*np*N/2 */
972 #else
973 else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,mr_mip->w1); /* np*np*N/2 */
974 #endif
975 }
976 for (j=degy+1;j<newn;j++) mr_mip->t[i][j]=0;
977 mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]); /* np*N*lgN */
978
979 /* multiply FFTs */
980 for (j=0;j<newn;j++) /* np*N */
981 #ifdef MR_FP_ROUNDING
982 imuldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
983 #else
984 muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
985 #endif
986 mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]); /* np*N*lgN */
987
988 inv=mr_mip->inverse[i];
989 if (mr_mip->logN > logn)
990 {
991 fac=twop(mr_mip->logN-logn);
992 inv=smul(fac,inv,p);
993 }
994 for (j=0;j<=degree;j++) /* np*N */
995 #ifdef MR_FP_ROUNDING
996 imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
997 #else
998 muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
999 #endif
1000 }
1001 mr_mip->check=OFF;
1002 mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
1003 /* w6 = N.R */
1004 for (j=0;j<=degree;j++)
1005 {
1006 for (i=0;i<np;i++)
1007 mr_mip->cr[i]=mr_mip->t[i][j];
1008 scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7); /* N*3*np*np/2 */
1009 divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6); /* z[j] may be too big for redc */
1010 redc(_MIPP_ mr_mip->w7,z[j]);
1011 } /* np*np*N/4 */
1012 mr_mip->check=ON;
1013 return degree;
1014 }
1015
mr_poly_sqr(_MIPD_ int degx,big * x,big * z)1016 int mr_poly_sqr(_MIPD_ int degx,big *x,big *z)
1017 { /* Multiply two polynomials. The big arrays are of size degree */
1018 int i,j,newn,logn,np,degree;
1019 mr_utype inv,p,fac;
1020 #ifdef MR_OS_THREADS
1021 miracl *mr_mip=get_mip();
1022 #endif
1023 #ifdef MR_FP_ROUNDING
1024 mr_large ip;
1025 #endif
1026 degree=2*degx;
1027 newn=1; logn=0;
1028 while (degree+1>newn) { newn<<=1; logn++; }
1029
1030 if (mr_mip->logN<logn)
1031 np=mr_fft_init(_MIPP_ logn,mr_mip->modulus,mr_mip->modulus,TRUE);
1032 else np=mr_mip->nprimes;
1033
1034 /* compute coefficients modulo fft primes */
1035 for (i=0;i<np;i++)
1036 {
1037 p=mr_mip->prime[i];
1038 #ifdef MR_FP_ROUNDING
1039 ip=mr_invert(p);
1040 #endif
1041 for (j=0;j<=degx;j++)
1042 {
1043 if (x[j]==NULL) mr_mip->t[i][j]=0;
1044 #ifdef MR_FP_ROUNDING
1045 else mr_mip->t[i][j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1);
1046 #else
1047 else mr_mip->t[i][j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1);
1048 #endif
1049 }
1050 for (j=degx+1;j<newn;j++) mr_mip->t[i][j]=0;
1051 mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
1052
1053 /* multiply FFTs */
1054 for (j=0;j<newn;j++)
1055 #ifdef MR_FP_ROUNDING
1056 imuldiv(mr_mip->t[i][j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
1057 #else
1058 muldiv(mr_mip->t[i][j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
1059 #endif
1060 mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);
1061
1062 inv=mr_mip->inverse[i];
1063 if (mr_mip->logN > logn)
1064 { /* adjust 1/N log p for smaller N */
1065 fac=twop(mr_mip->logN-logn);
1066 inv=smul(fac,inv,p);
1067 }
1068 for (j=0;j<=degree;j++)
1069 #ifdef MR_FP_ROUNDING
1070 imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
1071 #else
1072 muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
1073 #endif
1074 }
1075 mr_mip->check=OFF;
1076 mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
1077 /* w6 = N.R */
1078 for (j=0;j<=degree;j++)
1079 { /* apply CRT to each column */
1080 for (i=0;i<np;i++)
1081 mr_mip->cr[i]=mr_mip->t[i][j];
1082
1083 scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
1084 divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6); /* z[j] may be too big for redc */
1085 redc(_MIPP_ mr_mip->w7,z[j]);
1086 }
1087 mr_mip->check=ON;
1088
1089 return degree;
1090 }
1091
init_it(_MIPD_ int logn)1092 static BOOL init_it(_MIPD_ int logn)
1093 { /* find primes, table of roots, inverses etc for new N */
1094 #ifdef MR_OS_THREADS
1095 miracl *mr_mip=get_mip();
1096 #endif
1097
1098 #ifdef MR_ITANIUM
1099 mr_small tm;
1100 #endif
1101 #ifdef MR_WIN64
1102 mr_small tm;
1103 #endif
1104
1105 zero(mr_mip->w15);
1106 mr_mip->w15->len=2; mr_mip->w15->w[0]=0; mr_mip->w15->w[1]=1;
1107
1108 if (mr_fft_init(_MIPP_ logn,mr_mip->w15,mr_mip->w15,FALSE)!=3) return FALSE;
1109
1110 mr_mip->const1=invers(mr_mip->prime[0],mr_mip->prime[1]);
1111 mr_mip->const2=invers(mr_mip->prime[0],mr_mip->prime[2]);
1112 mr_mip->const3=invers(mr_mip->prime[1],mr_mip->prime[2]);
1113 if (mr_mip->base==0)
1114 {
1115 #ifndef MR_NOFULLWIDTH
1116 mr_mip->msw=muldvd(mr_mip->prime[0],mr_mip->prime[1],(mr_small)0,&mr_mip->lsw);
1117 #endif
1118 }
1119 else mr_mip->msw=muldiv(mr_mip->prime[0],mr_mip->prime[1],(mr_small)0,mr_mip->base,&mr_mip->lsw);
1120 mr_mip->logN=logn;
1121 return TRUE;
1122 }
1123
fastmultop(_MIPD_ int n,big x,big y,big z)1124 BOOL fastmultop(_MIPD_ int n,big x,big y,big z)
1125 { /* only return top n words... assumes x and y are n in length */
1126 #ifdef MR_OS_THREADS
1127 miracl *mr_mip=get_mip();
1128 #endif
1129 int len;
1130 mr_mip->check=OFF;
1131 fft_mult(_MIPP_ x,y,mr_mip->w0);
1132 mr_mip->check=ON;
1133 len=mr_lent(mr_mip->w0);
1134 mr_shift(_MIPP_ mr_mip->w0,n-len,mr_mip->w0);
1135 copy(mr_mip->w0,z);
1136 if (len<2*n) return TRUE;
1137 return FALSE;
1138 }
1139
fft_mult(_MIPD_ big x,big y,big z)1140 void fft_mult(_MIPD_ big x,big y,big z)
1141 { /* "fast" O(n.log n) multiplication */
1142 int i,pr,xl,yl,zl,newn,logn;
1143 mr_small v1,v2,v3,ic,c1,c2,p,fac,inv;
1144
1145 #ifdef MR_ITANIUM
1146 mr_small tm;
1147 #endif
1148 #ifdef MR_WIN64
1149 mr_small tm;
1150 #endif
1151 #ifdef MR_FP
1152 mr_small dres;
1153 #endif
1154 mr_lentype sz;
1155 mr_utype *w[3],*wptr,*dptr,*d0,*d1,*d2,t;
1156 #ifdef MR_OS_THREADS
1157 miracl *mr_mip=get_mip();
1158 #endif
1159 #ifdef MR_FP_ROUNDING
1160 mr_large ip;
1161 #endif
1162 if (mr_mip->ERNUM) return;
1163 if (y->len==0 || x->len==0)
1164 {
1165 zero(z);
1166 return;
1167 }
1168
1169 MR_IN(72)
1170
1171 if (mr_notint(x) || mr_notint(y))
1172 {
1173 mr_berror(_MIPP_ MR_ERR_INT_OP);
1174 MR_OUT
1175 return;
1176 }
1177 sz=((x->len&MR_MSBIT)^(y->len&MR_MSBIT));
1178 xl=(int)(x->len&MR_OBITS);
1179 yl=(int)(y->len&MR_OBITS);
1180 zl=xl+yl;
1181 if (xl<512 || yl<512) /* should be 512 */
1182 { /* not worth it! */
1183 multiply(_MIPP_ x,y,z);
1184 MR_OUT
1185 return;
1186 }
1187 if (zl>mr_mip->nib && mr_mip->check)
1188 {
1189 mr_berror(_MIPP_ MR_ERR_OVERFLOW);
1190 MR_OUT
1191 return;
1192 }
1193 newn=1; logn=0;
1194 while (zl>newn) { newn<<=1; logn++;}
1195 if (logn>mr_mip->logN) /* 2^(N+1) settings can be used for 2^N */
1196 { /* numbers too big for current settings */
1197 if (!init_it(_MIPP_ logn))
1198 {
1199 mr_berror(_MIPP_ MR_ERR_OUT_OF_MEMORY);
1200 MR_OUT
1201 return;
1202 }
1203 }
1204 if (newn>2*mr_mip->nib)
1205 {
1206 mr_berror(_MIPP_ MR_ERR_OVERFLOW);
1207 MR_OUT
1208 return;
1209 }
1210
1211 d0=mr_mip->t[0]; d1=mr_mip->t[1]; d2=mr_mip->t[2];
1212 w[0]=mr_mip->wa; w[1]=mr_mip->wb; w[2]=mr_mip->wc;
1213
1214 fac=twop(mr_mip->logN-logn);
1215
1216 for (pr=0;pr<3;pr++)
1217 { /* multiply mod each prime */
1218 p=mr_mip->prime[pr];
1219 inv=mr_mip->inverse[pr];
1220 #ifdef MR_FP_ROUNDING
1221 ip=mr_invert(p);
1222 #endif
1223 if (fac!=1) inv=smul(fac,inv,p); /* adjust 1/N mod p */
1224
1225 dptr=mr_mip->t[pr];
1226 wptr=w[pr];
1227
1228 for (i=0;i<xl;i++) dptr[i]=MR_REMAIN(x->w[i],p);
1229 for (i=xl;i<newn;i++) dptr[i]=0;
1230
1231 mr_dif_fft(_MIPP_ logn,pr,dptr);
1232
1233
1234 if (x!=y)
1235 {
1236 if (!mr_mip->same || !mr_mip->first_one)
1237 {
1238 for (i=0;i<yl;i++) wptr[i]=MR_REMAIN(y->w[i],p);
1239 for (i=yl;i<newn;i++) wptr[i]=0;
1240 mr_dif_fft(_MIPP_ logn,pr,wptr);
1241 }
1242 }
1243 else wptr=dptr;
1244
1245 for (i=0;i<newn;i++)
1246 { /* "multiply" Fourier transforms */
1247 #ifdef MR_FP_ROUNDING
1248 imuldiv(dptr[i],wptr[i],(mr_small)0,p,ip,(mr_small *)&dptr[i]);
1249 #else
1250 muldiv(dptr[i],wptr[i],(mr_small)0,p,(mr_small *)&dptr[i]);
1251 #endif
1252 }
1253
1254 mr_dit_fft(_MIPP_ logn,pr,dptr);
1255
1256 for (i=0;i<newn;i++)
1257 { /* convert to mixed radix form *
1258 * using chinese remainder thereom *
1259 * but first multiply by 1/N mod p */
1260 #ifdef MR_FP_ROUNDING
1261 imuldiv(dptr[i],inv,(mr_small)0,p,ip,(mr_small *)&dptr[i]);
1262 #else
1263 muldiv(dptr[i],inv,(mr_small)0,p,(mr_small *)&dptr[i]);
1264 #endif
1265 if (pr==1)
1266 {
1267 t=d1[i]-d0[i];
1268 while (t<0) t+=mr_mip->prime[1];
1269 muldiv(t,mr_mip->const1,(mr_small)0,mr_mip->prime[1],(mr_small *)&d1[i]);
1270 }
1271 if (pr==2)
1272 {
1273 t=d2[i]-d0[i];
1274 while (t<0) t+=mr_mip->prime[2];
1275 muldiv(t,mr_mip->const2,(mr_small)0,mr_mip->prime[2],(mr_small *)&t);
1276 t-=d1[i];
1277 while (t<0) t+=mr_mip->prime[2];
1278 muldiv(t,mr_mip->const3,(mr_small)0,mr_mip->prime[2],(mr_small *)&d2[i]);
1279 }
1280 }
1281 }
1282
1283 mr_mip->first_one=TRUE;
1284
1285 zero(z);
1286 c1=c2=0;
1287 if (mr_mip->base==0) for (i=0;i<zl;i++)
1288 { /* propagate carries */
1289 #ifndef MR_NOFULLWIDTH
1290 v1=d0[i];
1291 v2=d1[i];
1292 v3=d2[i];
1293 v2=muldvd(v2,mr_mip->prime[0],v1,&v1);
1294 c1+=v1;
1295 if (c1<v1) v2++;
1296 ic=c2+muldvd(mr_mip->lsw,v3,c1,&z->w[i]);
1297 c2=muldvd(mr_mip->msw,v3,ic,&c1);
1298 c1+=v2;
1299 if (c1<v2) c2++;
1300 #endif
1301 }
1302 else for (i=0;i<zl;i++)
1303 { /* propagate carries */
1304 v1=d0[i];
1305 v2=d1[i];
1306 v3=d2[i];
1307 #ifdef MR_FP_ROUNDING
1308 v2=imuldiv(v2,mr_mip->prime[0],v1+c1,mr_mip->base,mr_mip->inverse_base,&v1);
1309 ic=c2+imuldiv(mr_mip->lsw,v3,v1,mr_mip->base,mr_mip->inverse_base,&z->w[i]);
1310 c2=imuldiv(mr_mip->msw,v3,v2+ic,mr_mip->base,mr_mip->inverse_base,&c1);
1311 #else
1312 v2=muldiv(v2,mr_mip->prime[0],(mr_small)(v1+c1),mr_mip->base,&v1);
1313 ic=c2+muldiv(mr_mip->lsw,v3,v1,mr_mip->base,&z->w[i]);
1314 c2=muldiv(mr_mip->msw,v3,(mr_small)(v2+ic),mr_mip->base,&c1);
1315 #endif
1316 }
1317 z->len=(sz|zl); /* set length and sign of result */
1318 mr_lzero(z);
1319 MR_OUT
1320 }
1321
1322 #endif
1323
1324 /*
1325 main()
1326 {
1327 big x,y,z,w;
1328 int i,j,k;
1329 miracl *mip=mirsys(1024,0);
1330 x=mirvar(0);
1331 y=mirvar(0);
1332 z=mirvar(0);
1333 w=mirvar(0);
1334
1335 mip->IOBASE=16;
1336 bigbits(512*MIRACL,x);
1337 bigbits(512*MIRACL,y);
1338
1339
1340 multiply(x,x,z);
1341
1342 cotnum(z,stdout);
1343
1344 fft_mult(x,x,w);
1345
1346 cotnum(w,stdout);
1347 if (mr_compare(z,w)!=0) printf("Problems\n");
1348
1349 }
1350 */
1351