1 
2 /***************************************************************************
3                                                                            *
4 Copyright 2012 CertiVox IOM Ltd.                                           *
5                                                                            *
6 This file is part of CertiVox MIRACL Crypto SDK.                           *
7                                                                            *
8 The CertiVox MIRACL Crypto SDK provides developers with an                 *
9 extensive and efficient set of cryptographic functions.                    *
10 For further information about its features and functionalities please      *
11 refer to http://www.certivox.com                                           *
12                                                                            *
13 * The CertiVox MIRACL Crypto SDK is free software: you can                 *
14   redistribute it and/or modify it under the terms of the                  *
15   GNU Affero General Public License as published by the                    *
16   Free Software Foundation, either version 3 of the License,               *
17   or (at your option) any later version.                                   *
18                                                                            *
19 * The CertiVox MIRACL Crypto SDK is distributed in the hope                *
20   that it will be useful, but WITHOUT ANY WARRANTY; without even the       *
21   implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
22   See the GNU Affero General Public License for more details.              *
23                                                                            *
24 * You should have received a copy of the GNU Affero General Public         *
25   License along with CertiVox MIRACL Crypto SDK.                           *
26   If not, see <http://www.gnu.org/licenses/>.                              *
27                                                                            *
28 You can be released from the requirements of the license by purchasing     *
29 a commercial license. Buying such a license is mandatory as soon as you    *
30 develop commercial activities involving the CertiVox MIRACL Crypto SDK     *
31 without disclosing the source code of your own applications, or shipping   *
32 the CertiVox MIRACL Crypto SDK with a closed source product.               *
33                                                                            *
34 ***************************************************************************/
35 /*
36  *   Many processors support a floating-point coprocessor, which may
37  *   implement a faster multiplication instruction than the corresponding
38  *   integer instruction. This is the case for the 80486/Pentium processor
39  *   which has a built-in co-processor. This can be exploited to give even
40  *   faster performance.
41  *
42  *   As before the fixed modulus size to be used is pre-defined as
43  *   MR_PENTIUM in mirdef.h
44  *
45  *   Note that since the partial products are accumulated in a 64-bit register
46  *   this implies that a full-width number base (2^32) cannot be used.
47  *   The maximum number base that can be used is 2^x where x is
48  *   calculated such that 2^(64-2*x) > 2*MR_PENTIUM. This means that
49  *   x will usually be 28 or 29
50  *
51  *   To use this code:-
52  *
53  *   (1) Define MR_PENTIUM in mirdef.h to the fixed size of the modulus
54  *
55  *   (2) Use as a number base the value of x calculated as shown above
56  *       For example, for 512 bit exponentiation, #define MR_PENTIUM 18
57  *       in mirdef.h and call mirsys(50,536870912L) in your main program.
58  *       (Observe that 536870912 = 2^29, and that 18*29 = 522, big enough
59  *       for 512 bit calculations).
60  *
61  *   (3) Use Montgomery representation when implementing your crypto-system
62  *       i.e. use monty_powmod(). This will automatically call the
63  *       routines in this module.
64  *
65  *   Note that this module generates a *lot* of code e.g. > 49kbytes for
66  *   MR_PENTIUM = 36. Compile using -B switch - you will need
67  *   the TASM macro-assembler. If out-of-memory, try using the TASMX /ml
68  *   version of the assembler.
69  *
70  *   Note that it is *VITAL* that double arrays be aligned on 8-byte
71  *   boundaries for maximum speed on a Pentium.
72  *
73  *   Many thanks are due to Paul Rubin, who suggested to me that this approach
74  *   might be faster than the all-integer method described elsewhere.
75  *
76  *   The FP stack is primed in prepare_monty() :-
77  *   magic  - (2^63+2^62)*base. By adding and then subtracting this number we
78  *            get the top half of the sum.
79  *   1/base - Inverse of the number base
80  *   ndash  - Montgomery's constant
81  */
82 
83 #include "miracl.h"
84 
85 #ifdef MR_PENTIUM
86 
87 #if INLINE_ASM == 1
88 #define N 8
89 #define POINTER QWORD PTR
90 #define PBX bx
91 #define PSI si
92 #define PDI di
93 #define PCX cx
94 #endif
95 
96 #if INLINE_ASM == 2
97 #define N 8
98 #define POINTER QWORD PTR
99 #define PBX bx
100 #define PSI si
101 #define PDI di
102 #define PCX cx
103 #endif
104 
105 #if INLINE_ASM == 3
106 #define N 8
107 #define POINTER QWORD PTR
108 #define PBX ebx
109 #define PSI esi
110 #define PDI edi
111 #define PCX ecx
112 #endif
113 
114 #ifdef INLINE_ASM
115 #ifndef MR_LMM
116                 /* not implemented for large memory model 16 bit */
117 
fastmodmult(_MIPD_ big x,big y,big z)118 void fastmodmult(_MIPD_ big x,big y,big z)
119 {
120     int ij;
121 #ifdef MR_OS_THREADS
122     miracl *mr_mip=get_mip();
123 #endif
124     big w0=mr_mip->w0;
125     big modulus=mr_mip->modulus;
126     mr_small *wg,*mg,*xg,*yg;
127     wg=w0->w;
128     mg=modulus->w;
129     xg=x->w;
130     yg=y->w;
131 
132     for (ij=2*MR_PENTIUM;ij<(int)(w0->len&MR_OBITS);ij++) w0->w[ij]=0.0;
133     w0->len=2*MR_PENTIUM;
134 
135     ASM
136     {
137      FSTEP MACRO i,j
138 /* some fancy Pentium scheduling going on here ... */
139        fld  POINTER [PBX+N*i]
140        fmul POINTER [PSI+N*j]
141        fxch st(2)
142        fadd
143      ENDM
144 
145      FRSTEP MACRO i,j
146        fld  POINTER [PDI+N*i]
147        fmul POINTER [PSI+N*j]
148        fxch st(2)
149        fadd
150      ENDM
151 
152      FDSTEP MACRO i,j
153        fld  POINTER [PBX+N*i]
154        fmul POINTER [PBX+N*j]
155        fxch st(2)
156        fadd
157      ENDM
158 
159      SELF MACRO k
160        fld POINTER [PBX+N*k]
161        fmul st,st(0)
162        fadd
163      ENDM
164 
165      RFINU MACRO k
166        fld  st(0)
167 
168        fadd st,st(2)
169        fsub st,st(2)
170 
171        fsubr st,st(1)
172        fmul st,st(4)
173        fld  st(0)
174 
175        fadd st,st(3)
176        fsub st,st(3)
177 
178        fsub
179        fst  POINTER [PDI+N*k]
180        fmul POINTER [PSI]
181        fadd
182        fmul st,st(2)
183      ENDM
184 
185      RFIND MACRO k
186        fld st(0)
187 
188        fadd st,st(2)
189        fsub st,st(2)
190 
191        fsub st(1),st
192        fmul st,st(3)
193        fxch st(1)
194        fstp POINTER [PDI+N*k]
195 
196      ENDM
197 
198      DIAG MACRO ns,ne
199        CNT1=ns
200        CNT2=ne
201        fld  POINTER [PBX+N*CNT1]
202        fmul POINTER [PSI+N*CNT2]
203        CNT1=CNT1+1
204        CNT2=CNT2-1
205        WHILE CNT1 LE ne
206            FSTEP CNT1,CNT2
207            CNT1=CNT1+1
208            CNT2=CNT2-1
209        ENDM
210        fadd
211      ENDM
212 
213      SDIAG MACRO ns,ne
214         CNT1=ns
215         CNT2=ne
216         IF CNT1 LT CNT2
217             fstp st(5)   /* store carry */
218             fldz
219             fld  POINTER [PBX+N*CNT1]
220             fmul POINTER [PBX+N*CNT2]
221             CNT1=CNT1+1
222             CNT2=CNT2-1
223             WHILE CNT1 LT CNT2
224                 FDSTEP CNT1,CNT2
225                 CNT1=CNT1+1
226                 CNT2=CNT2-1
227             ENDM
228             fadd
229             fld st(0)      /* now double it ... */
230             fadd
231             fadd st,st(5)  /* add in carry */
232         ENDIF
233      ENDM
234 
235      RDIAGU MACRO ns,ne
236         CNT1=ns
237         CNT2=ne
238         IF CNT1 LT ne
239            fld  POINTER [PDI+N*CNT1]
240            fmul POINTER [PSI+N*CNT2]
241            CNT1=CNT1+1
242            CNT2=CNT2-1
243            WHILE CNT1 LT ne
244                FRSTEP CNT1,CNT2
245                CNT1=CNT1+1
246                CNT2=CNT2-1
247            ENDM
248            fadd
249          ENDIF
250      ENDM
251 
252      RDIAGD MACRO ns,ne
253        CNT1=ns
254        CNT2=ne
255        fld  POINTER [PDI+N*CNT1]
256        fmul POINTER [PSI+N*CNT2]
257        CNT1=CNT1+1
258        CNT2=CNT2-1
259        WHILE CNT1 LE ne
260            FRSTEP CNT1,CNT2
261            CNT1=CNT1+1
262            CNT2=CNT2-1
263        ENDM
264        fadd
265      ENDM
266 
267      MODMULT MACRO
268         CNT=0
269         WHILE CNT LT MR_PENTIUM
270             DIAG 0,CNT
271             xchg PSI,PCX
272             RDIAGU 0,CNT
273             RFINU CNT
274             xchg PSI,PCX
275             CNT=CNT+1
276         ENDM
277         SCNT=0
278         WHILE SCNT LT (MR_PENTIUM-1)
279             SCNT=SCNT+1
280             DIAG SCNT,(MR_PENTIUM-1)
281             xchg PSI,PCX
282             RDIAGD SCNT,(MR_PENTIUM-1)
283             RFIND CNT
284             xchg PSI,PCX
285             CNT=CNT+1
286         ENDM
287         RFIND CNT
288         CNT=CNT+1
289         fstp POINTER [PDI+N*CNT]
290      ENDM
291 
292      MODSQUARE MACRO
293         CNT=0
294         WHILE CNT LT MR_PENTIUM
295             SDIAG 0,CNT
296             IF (CNT MOD 2) EQ 0
297                 SELF (CNT/2)
298             ENDIF
299             RDIAGU 0,CNT
300             RFINU CNT
301             CNT=CNT+1
302         ENDM
303         SCNT=0
304         WHILE SCNT LT (MR_PENTIUM-1)
305             SCNT=SCNT+1
306             SDIAG SCNT,(MR_PENTIUM-1)
307             IF (CNT MOD 2) EQ 0
308                 SELF (CNT/2)
309             ENDIF
310             RDIAGD SCNT,(MR_PENTIUM-1)
311             RFIND CNT
312             CNT=CNT+1
313         ENDM
314         RFIND CNT
315         CNT=CNT+1
316         fstp POINTER [PDI+N*CNT]
317      ENDM
318     }
319     ASM
320     {
321         push PDI
322         push PSI
323 
324         mov PBX,xg
325         mov PSI,yg
326         mov PCX,mg
327         mov PDI,wg
328 
329 
330         fldz
331 
332         MODMULT
333 
334         pop PSI
335         pop PDI
336     }
337 
338     for (ij=MR_PENTIUM;ij<(int)(z->len&MR_OBITS);ij++) z->w[ij]=0.0;
339     z->len=MR_PENTIUM;
340     for (ij=0;ij<MR_PENTIUM;ij++) z->w[ij]=w0->w[ij+MR_PENTIUM];
341     if (z->w[MR_PENTIUM-1]==0.0) mr_lzero(z);
342 }
343 
344 void fastmodsquare(_MIPD_ x,z)
345 big x,z;
346 {
347     int ij;
348 #ifdef MR_OS_THREADS
349     miracl *mr_mip=get_mip();
350 #endif
351     big w0=mr_mip->w0;
352     big modulus=mr_mip->modulus;
353     mr_small *wg,*mg,*xg;
354     wg=w0->w;
355     mg=modulus->w;
356     xg=x->w;
357 
358     for (ij=2*MR_PENTIUM;ij<(int)(w0->len&MR_OBITS);ij++) w0->w[ij]=0.0;
359     w0->len=2*MR_PENTIUM;
360 
361     ASM
362     {
363         push PDI
364         push PSI
365 
366         mov PBX,xg
367         mov PSI,mg
368         mov PDI,wg
369 
370         fldz
371 
372         MODSQUARE
373 
374         pop PSI
375         pop PDI
376     }
377     for (ij=MR_PENTIUM;ij<(int)(z->len&MR_OBITS);ij++) z->w[ij]=0.0;
378     z->len=MR_PENTIUM;
379 
380     for (ij=0;ij<MR_PENTIUM;ij++) z->w[ij]=w0->w[ij+MR_PENTIUM];
381     if (z->w[MR_PENTIUM-1]==0.0) mr_lzero(z);
382 
383 }
384 
385 #endif
386 #endif
387 #endif
388 
389