1 
2 /***************************************************************************
3                                                                            *
4 Copyright 2012 CertiVox IOM Ltd.                                           *
5                                                                            *
6 This file is part of CertiVox MIRACL Crypto SDK.                           *
7                                                                            *
8 The CertiVox MIRACL Crypto SDK provides developers with an                 *
9 extensive and efficient set of cryptographic functions.                    *
10 For further information about its features and functionalities please      *
11 refer to http://www.certivox.com                                           *
12                                                                            *
13 * The CertiVox MIRACL Crypto SDK is free software: you can                 *
14   redistribute it and/or modify it under the terms of the                  *
15   GNU Affero General Public License as published by the                    *
16   Free Software Foundation, either version 3 of the License,               *
17   or (at your option) any later version.                                   *
18                                                                            *
19 * The CertiVox MIRACL Crypto SDK is distributed in the hope                *
20   that it will be useful, but WITHOUT ANY WARRANTY; without even the       *
21   implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
22   See the GNU Affero General Public License for more details.              *
23                                                                            *
24 * You should have received a copy of the GNU Affero General Public         *
25   License along with CertiVox MIRACL Crypto SDK.                           *
26   If not, see <http://www.gnu.org/licenses/>.                              *
27                                                                            *
28 You can be released from the requirements of the license by purchasing     *
29 a commercial license. Buying such a license is mandatory as soon as you    *
30 develop commercial activities involving the CertiVox MIRACL Crypto SDK     *
31 without disclosing the source code of your own applications, or shipping   *
32 the CertiVox MIRACL Crypto SDK with a closed source product.               *
33                                                                            *
34 ***************************************************************************/
35 /*
36  *   MIRACL fast fourier multiplication routine, using 3 prime method.
37  *   mrfast.c  - only faster for very high precision multiplication
38  *               of numbers > about 4096 bits  (see below)
39  *   See "The Fast Fourier Transform in a Finite Field"  by J.M. Pollard,
40  *   Mathematics of Computation, Vol. 25, No. 114, April 1971, pp365-374
41  *   Also Knuth Vol. 2, Chapt. 4.3.3
42  *
43  *   Takes time preportional to 9+15N+9N.lg(N) to multiply two different
44  *   N digit numbers. This reduces to 6+18N+6N.lg(N) when squaring.
45  *   The classic method takes N.N and N(N+1)/2 respectively
46  *
47  *   Fast Polynomial arithmetic
48  *
49  *   See "A New Polynomial Factorisation Algorithm and its Implementation"
50  *   by Victor Shoup, Jl. Symbolic Computation 1996
51  *   Uses FFT method for fast arithmetic of large degree polynomials
52  *
53  */
54 
55 #include <stdlib.h>
56 #include "miracl.h"
57 
58 #ifdef MR_FP
59 #include <math.h>
60 #endif
61 
62 #ifdef MR_WIN64
63 #include <intrin.h>
64 #endif
65 
66 #ifndef MR_STATIC
67 
twop(int n)68 static mr_utype twop(int n)
69 { /* 2^n */
70 #ifdef MR_FP
71     int i;
72 #endif
73     mr_utype r=1;
74     if (n==0) return r;
75 #ifdef MR_FP
76     for (i=0;i<n;i++) r*=2.0;
77     return r;
78 #else
79     return  (r<<n);
80 #endif
81 }
82 
mr_fft_init(_MIPD_ int logn,big m1,big m2,BOOL cr)83 int mr_fft_init(_MIPD_ int logn,big m1,big m2,BOOL cr)
84 { /* logn is number of bits, m1 and m2 are the largest elements
85      that will arise in each number to be multiplied */
86     mr_small kk;
87     int i,j,newn,pr;
88     mr_utype p,proot;
89 #ifdef MR_OS_THREADS
90     miracl *mr_mip=get_mip();
91 #endif
92 
93     newn=(1<<logn);
94     mr_mip->check=OFF;
95     multiply(_MIPP_ m1,m2,mr_mip->w5);
96     premult(_MIPP_ mr_mip->w5,2*newn+1,mr_mip->w5);
97     kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
98     if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
99 
100     pr=0;
101     while (size(mr_mip->w5)>0)
102     { /* find out how many primes will be needed */
103         do
104         {
105             kk--;
106             p=kk*newn+1;
107         } while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
108 #ifdef MR_FP_ROUNDING
109         mr_sdiv(_MIPP_ mr_mip->w5,p,mr_invert(p),mr_mip->w5);
110 #else
111         mr_sdiv(_MIPP_ mr_mip->w5,p,mr_mip->w5);
112 #endif
113         pr++;
114     }
115    mr_mip->check=ON;
116 /* if nothing has changed, don't recalculate */
117     if (logn<=mr_mip->logN && pr==mr_mip->nprimes) return pr;
118     fft_reset(_MIPPO_ );
119 
120 
121     mr_mip->prime=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
122     mr_mip->inverse=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
123     mr_mip->roots=(mr_utype**)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
124     mr_mip->t= (mr_utype **)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
125     mr_mip->cr=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
126     mr_mip->wa=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
127     mr_mip->wb=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
128     mr_mip->wc=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
129 
130     kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
131     if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
132     for (i=0;i<pr;i++)
133     { /* find the primes again */
134         mr_mip->roots[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
135         mr_mip->t[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
136 
137         do
138         {
139             kk--;
140             p=kk*newn+1;
141         } while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
142 
143         proot=p-1;
144         for (j=1;j<logn;j++) proot=sqrmp(proot,p);
145         mr_mip->roots[i][0]=proot;   /* build residue table */
146         for (j=1;j<newn;j++) mr_mip->roots[i][j]=smul(mr_mip->roots[i][j-1],proot,p);
147         mr_mip->inverse[i]=invers((mr_small)newn,p);
148         mr_mip->prime[i]=p;
149     }
150 
151     mr_mip->logN=logn;
152 
153     mr_mip->nprimes=pr;
154 /* set up chinese remainder structure */
155     if (cr)
156         if (!scrt_init(_MIPP_ &mr_mip->chin,pr,mr_mip->prime)) fft_reset(_MIPPO_ );
157     return pr;
158 }
159 
fft_reset(_MIPDO_)160 void fft_reset(_MIPDO_ )
161 { /* reclaim any space used by FFT */
162     int i;
163 #ifdef MR_OS_THREADS
164     miracl *mr_mip=get_mip();
165 #endif
166 
167     if (mr_mip->degree!=0)
168     { /* clear any precomputed tables */
169         for (i=0;i<mr_mip->nprimes;i++)
170         {
171             mr_free(mr_mip->s1[i]);
172             mr_free(mr_mip->s2[i]);
173         }
174         mr_free(mr_mip->s1);
175         mr_free(mr_mip->s2);
176         mr_mip->degree=0;
177     }
178 
179     if (mr_mip->logN!=0)
180     { /* clear away old stuff */
181         for (i=0;i<mr_mip->nprimes;i++)
182         {
183             mr_free(mr_mip->roots[i]);
184             mr_free(mr_mip->t[i]);
185         }
186         mr_free(mr_mip->wa);
187         mr_free(mr_mip->wb);
188         mr_free(mr_mip->wc);
189         mr_free(mr_mip->cr);
190         mr_free(mr_mip->t);
191         mr_free(mr_mip->roots);
192         mr_free(mr_mip->inverse);
193         mr_free(mr_mip->prime);
194         mr_mip->nprimes=0;
195         mr_mip->logN=0;
196         mr_mip->same=FALSE;
197     }
198 /* clear CRT structure */
199     if (mr_mip->chin.NP!=0) scrt_end(&mr_mip->chin);
200 }
201 
mr_dif_fft(_MIPD_ int logn,int pr,mr_utype * data)202 void mr_dif_fft(_MIPD_ int logn,int pr,mr_utype *data)
203 { /* decimate-in-frequency fourier transform */
204     int mmax,m,j,k,istep,i,ii,jj,newn,offset;
205     mr_utype w,temp,prime,*roots;
206 #ifdef MR_NOASM
207     mr_large dble,ldres;
208 #endif
209 #ifdef MR_OS_THREADS
210     miracl *mr_mip=get_mip();
211 #endif
212 #ifdef MR_FP_ROUNDING
213     mr_large iprime;
214 #endif
215     prime=mr_mip->prime[pr];
216     roots=mr_mip->roots[pr];
217 #ifdef MR_FP_ROUNDING
218     iprime=mr_invert(prime);
219 #endif
220 
221     newn=(1<<logn);
222     offset=(mr_mip->logN-logn);
223     mmax=newn;
224     for (k=0;k<logn;k++) {
225         istep=mmax;
226         mmax>>=1;
227         ii=newn;
228         jj=newn/istep;
229         ii-=jj;
230         for (i=0;i<newn;i+=istep)
231         { /* special case root=1 */
232             j=i+mmax;
233             temp=data[i]-data[j];
234             if (temp<0) temp+=prime;
235             data[i]+=data[j];
236             if (data[i]>=prime) data[i]-=prime;
237             data[j]=temp;
238         }
239         for (m=1;m<mmax;m++) {
240 
241             w=roots[(ii<<offset)-1];
242             ii-=jj;
243 
244 #ifndef MR_FP
245 #ifdef INLINE_ASM
246 #if INLINE_ASM == 3
247 
248 #define MR_IMPASM
249 
250 /*  esi = i
251     edi = j
252     ebx = data
253     edx = temp
254     ecx = prime
255 */
256 
257         ASM mov   ecx,DWORD PTR prime
258         ASM mov   ebx,DWORD PTR data
259         ASM mov   esi,DWORD PTR m
260     hop1:
261         ASM cmp   esi,DWORD PTR newn
262         ASM jge   hop0
263         ASM mov   edi,esi
264         ASM add   edi,DWORD PTR mmax
265 
266         ASM mov   edx,[ebx+4*esi]
267         ASM add   edx,ecx
268         ASM sub   edx,[ebx+4*edi]
269         ASM mov   eax,[ebx+4*esi]
270         ASM add   eax,[ebx+4*edi]
271         ASM cmp   eax,ecx
272         ASM jl    hop2
273         ASM sub   eax,ecx
274     hop2:
275         ASM mov   [ebx+4*esi],eax
276 
277         ASM mov   eax,DWORD PTR w
278         ASM mul   edx
279         ASM div   ecx
280         ASM mov   [ebx+4*edi],edx
281 
282         ASM add   esi,DWORD PTR istep
283         ASM jmp   hop1
284     hop0:
285         ASM nop
286 #endif
287 #if INLINE_ASM == 4
288 
289 #define MR_IMPASM
290     ASM (
291            "movl %0,%%ecx\n"
292            "movl %1,%%ebx\n"
293            "movl %2,%%esi\n"
294          "hop1:\n"
295            "cmpl %3,%%esi\n"
296            "jge hop0\n"
297            "movl %%esi,%%edi\n"
298            "addl %4,%%edi\n"
299 
300            "movl (%%ebx,%%esi,4),%%edx\n"
301            "addl %%ecx,%%edx\n"
302            "subl (%%ebx,%%edi,4),%%edx\n"
303            "movl (%%ebx,%%esi,4),%%eax\n"
304            "addl (%%ebx,%%edi,4),%%eax\n"
305            "cmpl %%ecx,%%eax\n"
306            "jl hop2\n"
307            "subl %%ecx,%%eax\n"
308          "hop2:\n"
309            "movl %%eax,(%%ebx,%%esi,4)\n"
310            "movl %5,%%eax\n"
311            "mull %%edx\n"
312            "divl %%ecx\n"
313            "movl %%edx,(%%ebx,%%edi,4)\n"
314            "addl %6,%%esi\n"
315            "jmp hop1\n"
316          "hop0:\n"
317            "nop\n"
318          :
319          : "m"(prime),"m"(data),"m"(m),"m"(newn),"m"(mmax),"m"(w),"m"(istep)
320          : "eax","edi","esi","edi","ebx","ecx","edx","memory"
321     );
322 #endif
323 #endif
324 #endif
325 #ifndef MR_IMPASM
326             for (i=m;i<newn;i+=istep) {
327                 j=i+mmax;
328                 temp=prime+data[i]-data[j];
329                 data[i]+=data[j];
330                 if (data[i]>=prime) data[i]-=prime;
331 
332 #ifdef MR_NOASM
333                 dble=(mr_large)w*temp;
334 #ifdef MR_FP_ROUNDING
335                 data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
336 #else
337                 data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
338 #endif
339 #else
340 #ifdef MR_FP_ROUNDING
341                 imuldiv(w,temp,(mr_small)0,prime,iprime,(mr_small *)&data[j]);
342 #else
343                 muldiv(w,temp,(mr_small)0,prime,(mr_small *)&data[j]);
344 #endif
345 #endif
346             }
347 #endif
348         }
349     }
350 }
351 
mr_dit_fft(_MIPD_ int logn,int pr,mr_utype * data)352 void mr_dit_fft(_MIPD_ int logn,int pr,mr_utype *data)
353 { /* decimate-in-time inverse fourier transform */
354     int mmax,m,j,k,i,istep,ii,jj,newn,offset;
355     mr_utype w,temp,prime,*roots;
356 #ifdef MR_NOASM
357     mr_large dble,ldres;
358 #endif
359 #ifdef MR_OS_THREADS
360     miracl *mr_mip=get_mip();
361 #endif
362 #ifdef MR_FP_ROUNDING
363     mr_large iprime;
364 #endif
365     prime=mr_mip->prime[pr];
366     roots=mr_mip->roots[pr];
367     offset=(mr_mip->logN-logn);
368 #ifdef MR_FP_ROUNDING
369     iprime=mr_invert(prime);
370 #endif
371     newn=(1<<logn);
372     mmax=1;
373     for (k=0;k<logn;k++) {
374         istep=(mmax<<1);
375         ii=0;
376         jj=newn/istep;
377         ii+=jj;
378         for (i=0;i<newn;i+=istep)
379         { /* special case root=1 */
380             j=i+mmax;
381             temp=data[j];
382             data[j]=data[i]-temp;
383             if (data[j]<0) data[j]+=prime;
384             data[i]+=temp;
385             if (data[i]>=prime) data[i]-=prime;
386         }
387         for (m=1;m<mmax;m++) {
388             w=roots[(ii<<offset)-1];
389             ii+=jj;
390 
391 #ifndef MR_FP
392 #ifdef INLINE_ASM
393 #if INLINE_ASM == 3
394 
395 #define MR_IMPASM
396 /*  esi = i
397     edi = j
398     ebx = data
399     edx = temp
400     ecx = prime
401 */
402 
403 
404         ASM mov   ebx,DWORD PTR data
405         ASM mov   ecx,DWORD PTR prime
406         ASM mov   esi,DWORD PTR m
407 hop4:
408         ASM cmp   esi,DWORD PTR newn
409         ASM jge   hop5
410         ASM mov   edi,esi
411         ASM add   edi,DWORD PTR mmax
412 
413         ASM mov   eax,[ebx+4*edi]
414         ASM mul   DWORD PTR w
415         ASM div   ecx
416 
417         ASM cmp   [ebx+4*esi],ecx
418         ASM jl    hop3
419         ASM sub   [ebx+4*esi],ecx
420 hop3:
421         ASM mov   eax,[ebx+4*esi]
422         ASM add   [ebx+4*esi],edx
423         ASM add   eax,ecx
424         ASM sub   eax,edx
425         ASM mov   [ebx+4*edi],eax
426 
427         ASM add   esi,DWORD PTR istep
428         ASM jmp   hop4
429 hop5:
430         ASM nop
431 #endif
432 #if INLINE_ASM == 4
433 
434 #define MR_IMPASM
435     ASM (
436            "movl %0,%%ebx\n"
437            "movl %1,%%ecx\n"
438            "movl %2,%%esi\n"
439          "hop4:\n"
440            "cmpl %3,%%esi\n"
441            "jge hop5\n"
442            "movl %%esi,%%edi\n"
443            "addl %4,%%edi\n"
444 
445            "movl (%%ebx,%%edi,4),%%eax\n"
446            "mull %5\n"
447            "divl %%ecx\n"
448 
449            "cmpl %%ecx,(%%ebx,%%esi,4)\n"
450            "jl hop3\n"
451            "subl %%ecx,(%%ebx,%%esi,4)\n"
452          "hop3:\n"
453            "movl (%%ebx,%%esi,4),%%eax\n"
454            "addl %%edx,(%%ebx,%%esi,4)\n"
455            "addl %%ecx,%%eax\n"
456            "subl %%edx,%%eax\n"
457            "movl %%eax,(%%ebx,%%edi,4)\n"
458 
459            "addl %6,%%esi\n"
460            "jmp hop4\n"
461          "hop5:\n"
462             "nop\n"
463          :
464          : "m"(data),"m"(prime),"m"(m),"m"(newn),"m"(mmax),"m"(w),"m"(istep)
465          : "eax","edi","esi","edi","ebx","ecx","edx","memory"
466         );
467 #endif
468 #endif
469 #endif
470 #ifndef MR_IMPASM
471             for (i=m;i<newn;i+=istep) {
472                 j=i+mmax;
473 #ifdef MR_NOASM
474                 dble=(mr_large)w*data[j];
475 #ifdef MR_FP_ROUNDING
476                 temp=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
477 #else
478                 temp=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
479 #endif
480 #else
481 #ifdef MR_FP_ROUNDING
482                 imuldiv(w,data[j],(mr_small)0,prime,iprime,(mr_small *)&temp);
483 #else
484                 muldiv(w,data[j],(mr_small)0,prime,(mr_small *)&temp);
485 #endif
486 #endif
487                 data[j]=data[i]-temp;
488                 if (data[j]<0) data[j]+=prime;
489                 data[i] += temp;
490                 if (data[i]>=prime) data[i]-=prime;
491             }
492 #endif
493         }
494         mmax=istep;
495     }
496 }
497 
modxn_1(_MIPD_ int n,int deg,big * x)498 static void modxn_1(_MIPD_ int n,int deg,big *x)
499 {  /* set X (of degree deg) =X mod x^n-1 = X%x^n + X/x^n */
500     int i;
501     for (i=0;n+i<=deg;i++)
502     {
503         nres_modadd(_MIPP_ x[i],x[n+i],x[i]);
504         zero(x[n+i]);
505     }
506 }
507 
mr_poly_rem(_MIPD_ int dg,big * G,big * R)508 BOOL mr_poly_rem(_MIPD_ int dg,big *G,big *R)
509 { /* G is a polynomial of degree dg - G is overwritten */
510     int i,j,newn,logn,np,n;
511     mr_utype p,inv,fac;
512 #ifdef MR_OS_THREADS
513     miracl *mr_mip=get_mip();
514 #endif
515 #ifdef MR_FP_ROUNDING
516     mr_large ip;
517 #endif
518 
519     n=mr_mip->degree;  /* degree of modulus */
520     if (n==0) return FALSE; /* the preset tables have been destroyed */
521     np=mr_mip->nprimes;
522 
523     newn=1; logn=0;
524     while (2*n>newn) { newn<<=1; logn++; }
525 
526     for (i=0;i<np;i++)
527     {
528         p=mr_mip->prime[i];
529 #ifdef MR_FP_ROUNDING
530         ip=mr_invert(p);
531         for (j=n;j<=dg;j++)
532             mr_mip->t[i][j-n]=mr_sdiv(_MIPP_ G[j],p,ip,mr_mip->w1);
533 #else
534         for (j=n;j<=dg;j++)
535             mr_mip->t[i][j-n]=mr_sdiv(_MIPP_ G[j],p,mr_mip->w1);
536 #endif
537         for (j=dg-n+1;j<newn;j++) mr_mip->t[i][j]=0;
538         mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
539         for (j=0;j<newn;j++)
540 #ifdef MR_FP_ROUNDING
541             imuldiv(mr_mip->t[i][j],mr_mip->s1[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
542 #else
543             muldiv(mr_mip->t[i][j],mr_mip->s1[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
544 #endif
545         mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);
546         inv=mr_mip->inverse[i];
547         if (mr_mip->logN > logn)
548         { /* adjust 1/N log p for N/2, N/4 etc */
549             fac=twop(mr_mip->logN-logn);
550             inv=smul(fac,inv,p);
551         }
552         for (j=0;j<n;j++)
553 #ifdef MR_FP_ROUNDING
554             imuldiv(mr_mip->t[i][j+n-1],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j+n-1]);
555 #else
556             muldiv(mr_mip->t[i][j+n-1],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j+n-1]);
557 #endif
558     }
559 
560     mr_mip->check=OFF;
561     mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
562        /* w6 = N.R */
563     for (j=0;j<n;j++)
564     {
565         for (i=0;i<np;i++)
566             mr_mip->cr[i]=mr_mip->t[i][j+n-1];
567         scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
568         divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6);  /* R[j] may be too big for redc */
569         redc(_MIPP_ mr_mip->w7,R[j]);
570     }
571     mr_mip->check=ON;
572 
573     for (i=0;i<np;i++)
574     {
575         p=mr_mip->prime[i];
576 #ifdef MR_FP_ROUNDING
577         ip=mr_invert(p);
578         for (j=0;j<n;j++)
579             mr_mip->t[i][j]=mr_sdiv(_MIPP_ R[j],p,ip,mr_mip->w1);
580 #else
581         for (j=0;j<n;j++)
582             mr_mip->t[i][j]=mr_sdiv(_MIPP_ R[j],p,mr_mip->w1);
583 #endif
584         for (j=n;j<1+newn/2;j++) mr_mip->t[i][j]=0;
585 
586         mr_dif_fft(_MIPP_ logn-1,i,mr_mip->t[i]);  /* Note: Half size */
587         for (j=0;j<newn/2;j++)
588 #ifdef MR_FP_ROUNDING
589             imuldiv(mr_mip->t[i][j],mr_mip->s2[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
590 #else
591             muldiv(mr_mip->t[i][j],mr_mip->s2[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
592 #endif
593         mr_dit_fft(_MIPP_ logn-1,i,mr_mip->t[i]);
594 
595         inv=mr_mip->inverse[i];
596         if (mr_mip->logN > logn-1)
597         {
598             fac=twop(mr_mip->logN-logn+1);
599             inv=smul(fac,inv,p);
600         }
601         for (j=0;j<n;j++)
602 #ifdef MR_FP_ROUNDING
603             imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
604 #else
605             muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
606 #endif
607     }
608 
609     modxn_1(_MIPP_ newn/2,dg,G);    /* G=G mod 2^x - 1 */
610 
611     mr_mip->check=OFF;
612     mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
613        /* w6 = N.R */
614     for (j=0;j<n;j++)
615     {
616         for (i=0;i<np;i++)
617             mr_mip->cr[i]=mr_mip->t[i][j];
618         scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
619         divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6);  /* R[j] may be too big for redc */
620         redc(_MIPP_ mr_mip->w7,R[j]);
621         nres_modsub(_MIPP_ G[j],R[j],R[j]);
622 
623     }
624     mr_mip->check=ON;
625 
626     return TRUE;
627 }
628 
mr_polymod_set(_MIPD_ int n,big * rf,big * f)629 void mr_polymod_set(_MIPD_ int n, big *rf,big *f)
630 { /* n is degree of f */
631     int i,j,np,newn,logn,degree;
632     mr_utype p;
633     big *F;
634 #ifdef MR_OS_THREADS
635     miracl *mr_mip=get_mip();
636 #endif
637 #ifdef MR_FP_ROUNDING
638     mr_large ip;
639 #endif
640     degree=2*n;
641     newn=1; logn=0;
642     while (degree>newn) { newn<<=1; logn++; }
643 
644     if (mr_mip->degree!=0)
645     {
646         for (i=0;i<mr_mip->nprimes;i++)
647         {
648             mr_free(mr_mip->s1[i]);
649             mr_free(mr_mip->s2[i]);
650         }
651         mr_free(mr_mip->s1);
652         mr_free(mr_mip->s2);
653     }
654 
655     if (mr_mip->logN<logn)
656         np=mr_fft_init(_MIPP_ logn,mr_mip->modulus,mr_mip->modulus,TRUE);
657     else np=mr_mip->nprimes;
658 
659     mr_mip->degree=n;
660     mr_mip->s1=(mr_utype **)mr_alloc(_MIPP_ np,sizeof(mr_utype *));
661     mr_mip->s2=(mr_utype **)mr_alloc(_MIPP_ np,sizeof(mr_utype *));
662 
663     F=(big *)mr_alloc(_MIPP_ n+1,sizeof(big));
664     for (i=0;i<=n;i++)
665     {
666         F[i]=mirvar(_MIPP_ 0);
667         if (f[i]!=NULL) copy(f[i],F[i]);
668     }
669 
670     modxn_1(_MIPP_ newn/2,n,F);
671 
672     for (i=0;i<np;i++)
673     {
674       /* reserve space for precomputed tables.   */
675       /* Note that s2 is half the size of s1     */
676         mr_mip->s1[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
677         mr_mip->s2[i]=(mr_utype *)mr_alloc(_MIPP_ 1+newn/2,sizeof(mr_utype));
678 
679         p=mr_mip->prime[i];
680 #ifdef MR_FP_ROUNDING
681         ip=mr_invert(p);
682 #endif
683         for (j=0;j<n;j++)
684         {
685             if (rf[j]==NULL) mr_mip->s1[i][j]=0;
686 #ifdef MR_FP_ROUNDING
687             else mr_mip->s1[i][j]=mr_sdiv(_MIPP_ rf[j],p,ip,mr_mip->w1);
688 #else
689             else mr_mip->s1[i][j]=mr_sdiv(_MIPP_ rf[j],p,mr_mip->w1);
690 #endif
691         }
692         mr_dif_fft(_MIPP_ logn,i,mr_mip->s1[i]);
693 
694        for (j=0;j<=n;j++)
695 #ifdef MR_FP_ROUNDING
696            mr_mip->s2[i][j]=mr_sdiv(_MIPP_ F[j],p,ip,mr_mip->w1);
697 #else
698            mr_mip->s2[i][j]=mr_sdiv(_MIPP_ F[j],p,mr_mip->w1);
699 #endif
700        mr_dif_fft(_MIPP_ logn-1,i,mr_mip->s2[i]);
701     }
702     for (i=0;i<=n;i++) mr_free(F[i]);
703     mr_free(F);
704 }
705 
mr_ps_zzn_mul(_MIPD_ int deg,big * x,big * y,big * z)706 int mr_ps_zzn_mul(_MIPD_ int deg,big *x,big *y,big *z)
707 {
708     int i,j,newn,logn,np;
709     mr_utype inv,p,fac;
710 #ifdef MR_OS_THREADS
711     miracl *mr_mip=get_mip();
712 #endif
713 #ifdef MR_FP_ROUNDING
714     mr_large ip;
715 #endif
716     newn=1; logn=0;
717     while (2*deg>newn) { newn <<=1; logn++; }
718     if (mr_mip->logN<logn)
719         np=mr_fft_init(_MIPP_ logn,mr_mip->modulus,mr_mip->modulus,TRUE);
720     else np=mr_mip->nprimes;
721 
722     for (i=0;i<np;i++)
723     {
724         p=mr_mip->prime[i];
725 #ifdef MR_FP_ROUNDING
726         ip=mr_invert(p);
727 #endif
728         for (j=0;j<deg;j++)
729         {
730             if (x[j]==NULL) mr_mip->wa[j]=0;
731 #ifdef MR_FP_ROUNDING
732             else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1);
733 #else
734             else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1);
735 #endif
736         }
737         for (j=deg;j<newn;j++) mr_mip->wa[j]=0;
738 
739         mr_dif_fft(_MIPP_ logn,i,mr_mip->wa);
740         for (j=0;j<deg;j++)
741         {
742             if (y[j]==NULL) mr_mip->t[i][j]=0;
743 #ifdef MR_FP_ROUNDING
744             else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,ip,mr_mip->w1);
745 #else
746             else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,mr_mip->w1);
747 #endif
748         }
749         for (j=deg;j<newn;j++) mr_mip->t[i][j]=0;
750         mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
751   /* multiply FFTs */
752         for (j=0;j<newn;j++)
753 #ifdef MR_FP_ROUNDING
754             imuldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
755 #else
756             muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
757 #endif
758         mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);           /* np*N*lgN */
759 
760         inv=mr_mip->inverse[i];
761         if (mr_mip->logN > logn)
762         {
763             fac=twop(mr_mip->logN-logn);
764             inv=smul(fac,inv,p);
765         }
766         for (j=0;j<deg;j++)                   /* np*N */
767 #ifdef MR_FP_ROUNDING
768             imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
769 #else
770             muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
771 #endif
772     }
773     mr_mip->check=OFF;
774     mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
775     for (j=0;j<deg;j++)
776     {
777         for (i=0;i<np;i++)
778             mr_mip->cr[i]=mr_mip->t[i][j];
779         scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
780         divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6);
781         redc(_MIPP_ mr_mip->w7,z[j]);
782     }
783     mr_mip->check=ON;
784     return np;
785 }
786 
mr_ps_big_mul(_MIPD_ int deg,big * x,big * y,big * z)787 int mr_ps_big_mul(_MIPD_ int deg,big *x,big *y,big *z)
788 { /* Multiply two power series with large integer parameters */
789     int i,j,newn,logn,np;
790     mr_utype inv,p,fac;
791 #ifdef MR_OS_THREADS
792     miracl *mr_mip=get_mip();
793 #endif
794 #ifdef MR_FP_ROUNDING
795     mr_large ip;
796 #endif
797     newn=1; logn=0;
798     while (2*deg>newn) { newn <<=1; logn++; }
799 
800     zero(mr_mip->w2);
801     zero(mr_mip->w4);
802 
803 /* find biggest element in each series */
804     for (i=0;i<deg;i++)
805     {
806         if (x[i]!=NULL)
807         {
808             absol(x[i],mr_mip->w3);
809             if (mr_compare(mr_mip->w3,mr_mip->w2)>0) copy(mr_mip->w3,mr_mip->w2);
810         }
811         if (y[i]!=NULL)
812         {
813             absol(y[i],mr_mip->w3);
814             if (mr_compare(mr_mip->w3,mr_mip->w4)>0) copy(mr_mip->w3,mr_mip->w4);
815         }
816     }
817     premult(_MIPP_ mr_mip->w4,2,mr_mip->w4);     /* range is +ve and -ve */
818                                           /* so extra factor of 2 included */
819 
820     np=mr_fft_init(_MIPP_ logn,mr_mip->w4,mr_mip->w2,TRUE);
821     convert(_MIPP_ 1,mr_mip->w3);
822 /* compute coefficients modulo fft primes */
823     for (i=0;i<np;i++)
824     {
825         p=mr_mip->prime[i];
826 #ifdef MR_FP_ROUNDING
827         ip=mr_invert(p);
828 #endif
829         mr_pmul(_MIPP_ mr_mip->w3,p,mr_mip->w3);
830         for (j=0;j<deg;j++)
831         {
832             if (x[j]==NULL) mr_mip->wa[j]=0;
833             else
834             {
835                 if (size(x[j])>=0)
836                 {
837                     copy(x[j],mr_mip->w1);
838 #ifdef MR_FP_ROUNDING
839                     mr_mip->wa[j]=mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
840 #else
841                     mr_mip->wa[j]=mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
842 #endif
843                 }
844                 else
845                 {
846                     negify(x[j],mr_mip->w1);
847 #ifdef MR_FP_ROUNDING
848                     mr_mip->wa[j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
849 #else
850                     mr_mip->wa[j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
851 #endif
852                 }
853             }
854         }
855         for (j=deg;j<newn;j++) mr_mip->wa[j]=0;
856 
857         mr_dif_fft(_MIPP_ logn,i,mr_mip->wa);
858         for (j=0;j<deg;j++)
859         {
860             if (y[j]==NULL) mr_mip->t[i][j]=0;
861             else
862             {
863                 if (size(y[j])>=0)
864                 {
865                     copy(y[j],mr_mip->w1);
866 #ifdef MR_FP_ROUNDING
867                     mr_mip->t[i][j]=mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
868 #else
869                     mr_mip->t[i][j]=mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
870 #endif
871                 }
872                 else
873                 {
874                     negify(y[j],mr_mip->w1);
875 #ifdef MR_FP_ROUNDING
876                     mr_mip->t[i][j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,ip,mr_mip->w1);
877 #else
878                     mr_mip->t[i][j]=p-mr_sdiv(_MIPP_ mr_mip->w1,p,mr_mip->w1);
879 #endif
880                 }
881             }
882         }
883         for (j=deg;j<newn;j++) mr_mip->t[i][j]=0;
884         mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
885   /* multiply FFTs */
886         for (j=0;j<newn;j++)
887 #ifdef MR_FP_ROUNDING
888             imuldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
889 #else
890             muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
891 #endif
892         mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);           /* np*N*lgN */
893 
894         inv=mr_mip->inverse[i];
895         if (mr_mip->logN > logn)
896         {
897             fac=twop(mr_mip->logN-logn);
898             inv=smul(fac,inv,p);
899         }
900         for (j=0;j<deg;j++)                   /* np*N */
901 #ifdef MR_FP_ROUNDING
902             imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
903 #else
904             muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
905 #endif
906     }
907   /* w3 is product of chinese primes */
908     decr(_MIPP_ mr_mip->w3,1,mr_mip->w4);
909     subdiv(_MIPP_ mr_mip->w4,2,mr_mip->w4); /* find mid-point of range */
910 
911     for (j=0;j<deg;j++)
912     {
913         for (i=0;i<np;i++)
914             mr_mip->cr[i]=mr_mip->t[i][j];
915         scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,z[j]); /* N*3*np*np/2  */
916         if (mr_compare(z[j],mr_mip->w4)>=0)
917         { /* In higher half of range, so number is negative */
918             subtract(_MIPP_ mr_mip->w3,z[j],z[j]);
919             negify(z[j],z[j]);
920         }
921     }            /* np*np*N/4 */
922     return np;
923 }
924 
mr_poly_mul(_MIPD_ int degx,big * x,int degy,big * y,big * z)925 int mr_poly_mul(_MIPD_ int degx,big *x,int degy,big *y,big *z)
926 { /*  Multiply two polynomials. The big arrays are of size degree */
927     int i,j,newn,logn,np,degree;
928     mr_utype inv,p,fac;
929 #ifdef MR_OS_THREADS
930     miracl *mr_mip=get_mip();
931 #endif
932 #ifdef MR_FP_ROUNDING
933     mr_large ip;
934 #endif
935     degree=degx+degy;
936     if (x==y)
937     {
938         mr_poly_sqr(_MIPP_ degx,x,z);
939         return degree;
940     }
941     newn=1; logn=0;
942     while (degree+1>newn) { newn<<=1; logn++; }
943 
944     if (mr_mip->logN<logn)
945         np=mr_fft_init(_MIPP_ logn,mr_mip->modulus,mr_mip->modulus,TRUE);
946     else np=mr_mip->nprimes;
947 
948 /* compute coefficients modulo fft primes */
949     for (i=0;i<np;i++)
950     {
951         p=mr_mip->prime[i];
952 #ifdef MR_FP_ROUNDING
953         ip=mr_invert(p);
954 #endif
955         for (j=0;j<=degx;j++)
956         {
957             if (x[j]==NULL) mr_mip->wa[j]=0;
958 #ifdef MR_FP_ROUNDING
959             else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1);      /* np*np*N/2 muldivs */
960 #else
961             else mr_mip->wa[j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1);      /* np*np*N/2 muldivs */
962 #endif
963         }
964         for (j=degx+1;j<newn;j++) mr_mip->wa[j]=0;
965         mr_dif_fft(_MIPP_ logn,i,mr_mip->wa);              /* np*N*lgN     */
966 
967         for (j=0;j<=degy;j++)
968         {
969             if (y[j]==NULL) mr_mip->t[i][j]=0;
970 #ifdef MR_FP_ROUNDING
971             else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,ip,mr_mip->w1);   /* np*np*N/2   */
972 #else
973             else mr_mip->t[i][j]=mr_sdiv(_MIPP_ y[j],p,mr_mip->w1);   /* np*np*N/2   */
974 #endif
975         }
976         for (j=degy+1;j<newn;j++) mr_mip->t[i][j]=0;
977         mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);           /* np*N*lgN */
978 
979    /* multiply FFTs */
980         for (j=0;j<newn;j++)                       /* np*N */
981 #ifdef MR_FP_ROUNDING
982             imuldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
983 #else
984             muldiv(mr_mip->wa[j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
985 #endif
986         mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);           /* np*N*lgN */
987 
988         inv=mr_mip->inverse[i];
989         if (mr_mip->logN > logn)
990         {
991             fac=twop(mr_mip->logN-logn);
992             inv=smul(fac,inv,p);
993         }
994         for (j=0;j<=degree;j++)                   /* np*N */
995 #ifdef MR_FP_ROUNDING
996             imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
997 #else
998             muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
999 #endif
1000     }
1001     mr_mip->check=OFF;
1002     mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
1003        /* w6 = N.R */
1004     for (j=0;j<=degree;j++)
1005     {
1006         for (i=0;i<np;i++)
1007             mr_mip->cr[i]=mr_mip->t[i][j];
1008         scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7); /* N*3*np*np/2 */
1009         divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6);  /* z[j] may be too big for redc */
1010         redc(_MIPP_ mr_mip->w7,z[j]);
1011     }                                        /* np*np*N/4 */
1012     mr_mip->check=ON;
1013     return degree;
1014 }
1015 
mr_poly_sqr(_MIPD_ int degx,big * x,big * z)1016 int mr_poly_sqr(_MIPD_ int degx,big *x,big *z)
1017 { /*  Multiply two polynomials. The big arrays are of size degree */
1018     int i,j,newn,logn,np,degree;
1019     mr_utype inv,p,fac;
1020 #ifdef MR_OS_THREADS
1021     miracl *mr_mip=get_mip();
1022 #endif
1023 #ifdef MR_FP_ROUNDING
1024     mr_large ip;
1025 #endif
1026     degree=2*degx;
1027     newn=1; logn=0;
1028     while (degree+1>newn) { newn<<=1; logn++; }
1029 
1030     if (mr_mip->logN<logn)
1031         np=mr_fft_init(_MIPP_ logn,mr_mip->modulus,mr_mip->modulus,TRUE);
1032     else np=mr_mip->nprimes;
1033 
1034 /* compute coefficients modulo fft primes */
1035     for (i=0;i<np;i++)
1036     {
1037         p=mr_mip->prime[i];
1038 #ifdef MR_FP_ROUNDING
1039         ip=mr_invert(p);
1040 #endif
1041         for (j=0;j<=degx;j++)
1042         {
1043             if (x[j]==NULL) mr_mip->t[i][j]=0;
1044 #ifdef MR_FP_ROUNDING
1045             else mr_mip->t[i][j]=mr_sdiv(_MIPP_ x[j],p,ip,mr_mip->w1);
1046 #else
1047             else mr_mip->t[i][j]=mr_sdiv(_MIPP_ x[j],p,mr_mip->w1);
1048 #endif
1049         }
1050         for (j=degx+1;j<newn;j++) mr_mip->t[i][j]=0;
1051         mr_dif_fft(_MIPP_ logn,i,mr_mip->t[i]);
1052 
1053    /* multiply FFTs */
1054         for (j=0;j<newn;j++)
1055 #ifdef MR_FP_ROUNDING
1056             imuldiv(mr_mip->t[i][j],mr_mip->t[i][j],(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
1057 #else
1058             muldiv(mr_mip->t[i][j],mr_mip->t[i][j],(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
1059 #endif
1060         mr_dit_fft(_MIPP_ logn,i,mr_mip->t[i]);
1061 
1062         inv=mr_mip->inverse[i];
1063         if (mr_mip->logN > logn)
1064         { /* adjust 1/N log p for smaller N  */
1065             fac=twop(mr_mip->logN-logn);
1066             inv=smul(fac,inv,p);
1067         }
1068         for (j=0;j<=degree;j++)
1069 #ifdef MR_FP_ROUNDING
1070             imuldiv(mr_mip->t[i][j],inv,(mr_small)0,p,ip,(mr_small *)&mr_mip->t[i][j]);
1071 #else
1072             muldiv(mr_mip->t[i][j],inv,(mr_small)0,p,(mr_small *)&mr_mip->t[i][j]);
1073 #endif
1074     }
1075     mr_mip->check=OFF;
1076     mr_shift(_MIPP_ mr_mip->modulus,(int)mr_mip->modulus->len,mr_mip->w6);
1077        /* w6 = N.R */
1078     for (j=0;j<=degree;j++)
1079     { /* apply CRT to each column */
1080         for (i=0;i<np;i++)
1081             mr_mip->cr[i]=mr_mip->t[i][j];
1082 
1083         scrt(_MIPP_ &mr_mip->chin,mr_mip->cr,mr_mip->w7);
1084         divide(_MIPP_ mr_mip->w7,mr_mip->w6,mr_mip->w6);  /* z[j] may be too big for redc */
1085         redc(_MIPP_ mr_mip->w7,z[j]);
1086     }
1087     mr_mip->check=ON;
1088 
1089     return degree;
1090 }
1091 
init_it(_MIPD_ int logn)1092 static BOOL init_it(_MIPD_ int logn)
1093 { /* find primes, table of roots, inverses etc for new N */
1094 #ifdef MR_OS_THREADS
1095     miracl *mr_mip=get_mip();
1096 #endif
1097 
1098 #ifdef MR_ITANIUM
1099     mr_small tm;
1100 #endif
1101 #ifdef MR_WIN64
1102     mr_small tm;
1103 #endif
1104 
1105     zero(mr_mip->w15);
1106     mr_mip->w15->len=2; mr_mip->w15->w[0]=0; mr_mip->w15->w[1]=1;
1107 
1108     if (mr_fft_init(_MIPP_ logn,mr_mip->w15,mr_mip->w15,FALSE)!=3) return FALSE;
1109 
1110     mr_mip->const1=invers(mr_mip->prime[0],mr_mip->prime[1]);
1111     mr_mip->const2=invers(mr_mip->prime[0],mr_mip->prime[2]);
1112     mr_mip->const3=invers(mr_mip->prime[1],mr_mip->prime[2]);
1113     if (mr_mip->base==0)
1114     {
1115 #ifndef MR_NOFULLWIDTH
1116         mr_mip->msw=muldvd(mr_mip->prime[0],mr_mip->prime[1],(mr_small)0,&mr_mip->lsw);
1117 #endif
1118     }
1119     else mr_mip->msw=muldiv(mr_mip->prime[0],mr_mip->prime[1],(mr_small)0,mr_mip->base,&mr_mip->lsw);
1120     mr_mip->logN=logn;
1121     return TRUE;
1122 }
1123 
fastmultop(_MIPD_ int n,big x,big y,big z)1124 BOOL fastmultop(_MIPD_ int n,big x,big y,big z)
1125 { /* only return top n words... assumes x and y are n in length */
1126 #ifdef MR_OS_THREADS
1127     miracl *mr_mip=get_mip();
1128 #endif
1129     int len;
1130     mr_mip->check=OFF;
1131     fft_mult(_MIPP_ x,y,mr_mip->w0);
1132     mr_mip->check=ON;
1133     len=mr_lent(mr_mip->w0);
1134     mr_shift(_MIPP_ mr_mip->w0,n-len,mr_mip->w0);
1135     copy(mr_mip->w0,z);
1136     if (len<2*n) return TRUE;
1137     return FALSE;
1138 }
1139 
fft_mult(_MIPD_ big x,big y,big z)1140 void fft_mult(_MIPD_ big x,big y,big z)
1141 { /* "fast" O(n.log n) multiplication */
1142     int i,pr,xl,yl,zl,newn,logn;
1143     mr_small v1,v2,v3,ic,c1,c2,p,fac,inv;
1144 
1145 #ifdef MR_ITANIUM
1146     mr_small tm;
1147 #endif
1148 #ifdef MR_WIN64
1149     mr_small tm;
1150 #endif
1151 #ifdef MR_FP
1152     mr_small dres;
1153 #endif
1154     mr_lentype sz;
1155     mr_utype *w[3],*wptr,*dptr,*d0,*d1,*d2,t;
1156 #ifdef MR_OS_THREADS
1157     miracl *mr_mip=get_mip();
1158 #endif
1159 #ifdef MR_FP_ROUNDING
1160     mr_large ip;
1161 #endif
1162     if (mr_mip->ERNUM) return;
1163     if (y->len==0 || x->len==0)
1164     {
1165         zero(z);
1166         return;
1167     }
1168 
1169     MR_IN(72)
1170 
1171     if (mr_notint(x) || mr_notint(y))
1172     {
1173         mr_berror(_MIPP_ MR_ERR_INT_OP);
1174         MR_OUT
1175         return;
1176     }
1177     sz=((x->len&MR_MSBIT)^(y->len&MR_MSBIT));
1178     xl=(int)(x->len&MR_OBITS);
1179     yl=(int)(y->len&MR_OBITS);
1180     zl=xl+yl;
1181     if (xl<512 || yl<512)    /* should be 512 */
1182     { /* not worth it! */
1183         multiply(_MIPP_ x,y,z);
1184         MR_OUT
1185         return;
1186     }
1187     if (zl>mr_mip->nib && mr_mip->check)
1188     {
1189         mr_berror(_MIPP_ MR_ERR_OVERFLOW);
1190         MR_OUT
1191         return;
1192     }
1193     newn=1; logn=0;
1194     while (zl>newn) { newn<<=1; logn++;}
1195     if (logn>mr_mip->logN)     /*  2^(N+1) settings can be used for 2^N */
1196     { /* numbers too big for current settings */
1197         if (!init_it(_MIPP_ logn))
1198         {
1199             mr_berror(_MIPP_ MR_ERR_OUT_OF_MEMORY);
1200             MR_OUT
1201             return;
1202         }
1203     }
1204     if (newn>2*mr_mip->nib)
1205     {
1206         mr_berror(_MIPP_ MR_ERR_OVERFLOW);
1207         MR_OUT
1208         return;
1209     }
1210 
1211     d0=mr_mip->t[0]; d1=mr_mip->t[1]; d2=mr_mip->t[2];
1212     w[0]=mr_mip->wa; w[1]=mr_mip->wb; w[2]=mr_mip->wc;
1213 
1214     fac=twop(mr_mip->logN-logn);
1215 
1216     for (pr=0;pr<3;pr++)
1217     { /* multiply mod each prime */
1218         p=mr_mip->prime[pr];
1219         inv=mr_mip->inverse[pr];
1220 #ifdef MR_FP_ROUNDING
1221         ip=mr_invert(p);
1222 #endif
1223         if (fac!=1) inv=smul(fac,inv,p);  /* adjust 1/N mod p */
1224 
1225         dptr=mr_mip->t[pr];
1226         wptr=w[pr];
1227 
1228         for (i=0;i<xl;i++) dptr[i]=MR_REMAIN(x->w[i],p);
1229         for (i=xl;i<newn;i++) dptr[i]=0;
1230 
1231         mr_dif_fft(_MIPP_ logn,pr,dptr);
1232 
1233 
1234         if (x!=y)
1235         {
1236             if (!mr_mip->same || !mr_mip->first_one)
1237             {
1238                 for (i=0;i<yl;i++) wptr[i]=MR_REMAIN(y->w[i],p);
1239                 for (i=yl;i<newn;i++) wptr[i]=0;
1240                 mr_dif_fft(_MIPP_ logn,pr,wptr);
1241             }
1242         }
1243         else wptr=dptr;
1244 
1245         for (i=0;i<newn;i++)
1246         {  /* "multiply" Fourier transforms */
1247 #ifdef MR_FP_ROUNDING
1248             imuldiv(dptr[i],wptr[i],(mr_small)0,p,ip,(mr_small *)&dptr[i]);
1249 #else
1250             muldiv(dptr[i],wptr[i],(mr_small)0,p,(mr_small *)&dptr[i]);
1251 #endif
1252         }
1253 
1254         mr_dit_fft(_MIPP_ logn,pr,dptr);
1255 
1256         for (i=0;i<newn;i++)
1257         {  /* convert to mixed radix form     *
1258             * using chinese remainder thereom *
1259             * but first multiply by 1/N mod p */
1260 #ifdef MR_FP_ROUNDING
1261             imuldiv(dptr[i],inv,(mr_small)0,p,ip,(mr_small *)&dptr[i]);
1262 #else
1263             muldiv(dptr[i],inv,(mr_small)0,p,(mr_small *)&dptr[i]);
1264 #endif
1265             if (pr==1)
1266             {
1267                 t=d1[i]-d0[i];
1268                 while (t<0) t+=mr_mip->prime[1];
1269                 muldiv(t,mr_mip->const1,(mr_small)0,mr_mip->prime[1],(mr_small *)&d1[i]);
1270             }
1271             if (pr==2)
1272             {
1273                 t=d2[i]-d0[i];
1274                 while (t<0) t+=mr_mip->prime[2];
1275                 muldiv(t,mr_mip->const2,(mr_small)0,mr_mip->prime[2],(mr_small *)&t);
1276                 t-=d1[i];
1277                 while (t<0) t+=mr_mip->prime[2];
1278                 muldiv(t,mr_mip->const3,(mr_small)0,mr_mip->prime[2],(mr_small *)&d2[i]);
1279             }
1280         }
1281     }
1282 
1283     mr_mip->first_one=TRUE;
1284 
1285     zero(z);
1286     c1=c2=0;
1287     if (mr_mip->base==0) for (i=0;i<zl;i++)
1288     { /* propagate carries */
1289 #ifndef MR_NOFULLWIDTH
1290         v1=d0[i];
1291         v2=d1[i];
1292         v3=d2[i];
1293         v2=muldvd(v2,mr_mip->prime[0],v1,&v1);
1294         c1+=v1;
1295         if (c1<v1) v2++;
1296         ic=c2+muldvd(mr_mip->lsw,v3,c1,&z->w[i]);
1297         c2=muldvd(mr_mip->msw,v3,ic,&c1);
1298         c1+=v2;
1299         if (c1<v2) c2++;
1300 #endif
1301     }
1302     else for (i=0;i<zl;i++)
1303     { /* propagate carries */
1304         v1=d0[i];
1305         v2=d1[i];
1306         v3=d2[i];
1307 #ifdef MR_FP_ROUNDING
1308         v2=imuldiv(v2,mr_mip->prime[0],v1+c1,mr_mip->base,mr_mip->inverse_base,&v1);
1309         ic=c2+imuldiv(mr_mip->lsw,v3,v1,mr_mip->base,mr_mip->inverse_base,&z->w[i]);
1310         c2=imuldiv(mr_mip->msw,v3,v2+ic,mr_mip->base,mr_mip->inverse_base,&c1);
1311 #else
1312         v2=muldiv(v2,mr_mip->prime[0],(mr_small)(v1+c1),mr_mip->base,&v1);
1313         ic=c2+muldiv(mr_mip->lsw,v3,v1,mr_mip->base,&z->w[i]);
1314         c2=muldiv(mr_mip->msw,v3,(mr_small)(v2+ic),mr_mip->base,&c1);
1315 #endif
1316     }
1317     z->len=(sz|zl); /* set length and sign of result */
1318     mr_lzero(z);
1319     MR_OUT
1320 }
1321 
1322 #endif
1323 
1324 /*
1325 main()
1326 {
1327     big x,y,z,w;
1328     int i,j,k;
1329     miracl *mip=mirsys(1024,0);
1330     x=mirvar(0);
1331     y=mirvar(0);
1332     z=mirvar(0);
1333     w=mirvar(0);
1334 
1335     mip->IOBASE=16;
1336     bigbits(512*MIRACL,x);
1337     bigbits(512*MIRACL,y);
1338 
1339 
1340     multiply(x,x,z);
1341 
1342     cotnum(z,stdout);
1343 
1344     fft_mult(x,x,w);
1345 
1346     cotnum(w,stdout);
1347     if (mr_compare(z,w)!=0) printf("Problems\n");
1348 
1349 }
1350 */
1351