1
2 /***************************************************************************
3 *
4 Copyright 2012 CertiVox IOM Ltd. *
5 *
6 This file is part of CertiVox MIRACL Crypto SDK. *
7 *
8 The CertiVox MIRACL Crypto SDK provides developers with an *
9 extensive and efficient set of cryptographic functions. *
10 For further information about its features and functionalities please *
11 refer to http://www.certivox.com *
12 *
13 * The CertiVox MIRACL Crypto SDK is free software: you can *
14 redistribute it and/or modify it under the terms of the *
15 GNU Affero General Public License as published by the *
16 Free Software Foundation, either version 3 of the License, *
17 or (at your option) any later version. *
18 *
19 * The CertiVox MIRACL Crypto SDK is distributed in the hope *
20 that it will be useful, but WITHOUT ANY WARRANTY; without even the *
21 implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
22 See the GNU Affero General Public License for more details. *
23 *
24 * You should have received a copy of the GNU Affero General Public *
25 License along with CertiVox MIRACL Crypto SDK. *
26 If not, see <http://www.gnu.org/licenses/>. *
27 *
28 You can be released from the requirements of the license by purchasing *
29 a commercial license. Buying such a license is mandatory as soon as you *
30 develop commercial activities involving the CertiVox MIRACL Crypto SDK *
31 without disclosing the source code of your own applications, or shipping *
32 the CertiVox MIRACL Crypto SDK with a closed source product. *
33 *
34 ***************************************************************************/
35 /*
36 * Many processors support a floating-point coprocessor, which may
37 * implement a faster multiplication instruction than the corresponding
38 * integer instruction. This is the case for the 80486/Pentium processor
39 * which has a built-in co-processor. This can be exploited to give even
40 * faster performance.
41 *
42 * As before the fixed modulus size to be used is pre-defined as
43 * MR_PENTIUM in mirdef.h
44 *
45 * Note that since the partial products are accumulated in a 64-bit register
46 * this implies that a full-width number base (2^32) cannot be used.
47 * The maximum number base that can be used is 2^x where x is
48 * calculated such that 2^(64-2*x) > 2*MR_PENTIUM. This means that
49 * x will usually be 28 or 29
50 *
51 * To use this code:-
52 *
53 * (1) Define MR_PENTIUM in mirdef.h to the fixed size of the modulus
54 *
55 * (2) Use as a number base the value of x calculated as shown above
56 * For example, for 512 bit exponentiation, #define MR_PENTIUM 18
57 * in mirdef.h and call mirsys(50,536870912L) in your main program.
58 * (Observe that 536870912 = 2^29, and that 18*29 = 522, big enough
59 * for 512 bit calculations).
60 *
61 * (3) Use Montgomery representation when implementing your crypto-system
62 * i.e. use monty_powmod(). This will automatically call the
63 * routines in this module.
64 *
65 * Note that this module generates a *lot* of code e.g. > 49kbytes for
66 * MR_PENTIUM = 36. Compile using -B switch - you will need
67 * the TASM macro-assembler. If out-of-memory, try using the TASMX /ml
68 * version of the assembler.
69 *
70 * Note that it is *VITAL* that double arrays be aligned on 8-byte
71 * boundaries for maximum speed on a Pentium.
72 *
73 * Many thanks are due to Paul Rubin, who suggested to me that this approach
74 * might be faster than the all-integer method described elsewhere.
75 *
76 * The FP stack is primed in prepare_monty() :-
77 * magic - (2^63+2^62)*base. By adding and then subtracting this number we
78 * get the top half of the sum.
79 * 1/base - Inverse of the number base
80 * ndash - Montgomery's constant
81 */
82
83 #include "miracl.h"
84
85 #ifdef MR_PENTIUM
86
87 #if INLINE_ASM == 1
88 #define N 8
89 #define POINTER QWORD PTR
90 #define PBX bx
91 #define PSI si
92 #define PDI di
93 #define PCX cx
94 #endif
95
96 #if INLINE_ASM == 2
97 #define N 8
98 #define POINTER QWORD PTR
99 #define PBX bx
100 #define PSI si
101 #define PDI di
102 #define PCX cx
103 #endif
104
105 #if INLINE_ASM == 3
106 #define N 8
107 #define POINTER QWORD PTR
108 #define PBX ebx
109 #define PSI esi
110 #define PDI edi
111 #define PCX ecx
112 #endif
113
114 #ifdef INLINE_ASM
115 #ifndef MR_LMM
116 /* not implemented for large memory model 16 bit */
117
fastmodmult(_MIPD_ big x,big y,big z)118 void fastmodmult(_MIPD_ big x,big y,big z)
119 {
120 int ij;
121 #ifdef MR_OS_THREADS
122 miracl *mr_mip=get_mip();
123 #endif
124 big w0=mr_mip->w0;
125 big modulus=mr_mip->modulus;
126 mr_small *wg,*mg,*xg,*yg;
127 wg=w0->w;
128 mg=modulus->w;
129 xg=x->w;
130 yg=y->w;
131
132 for (ij=2*MR_PENTIUM;ij<(int)(w0->len&MR_OBITS);ij++) w0->w[ij]=0.0;
133 w0->len=2*MR_PENTIUM;
134
135 ASM
136 {
137 FSTEP MACRO i,j
138 /* some fancy Pentium scheduling going on here ... */
139 fld POINTER [PBX+N*i]
140 fmul POINTER [PSI+N*j]
141 fxch st(2)
142 fadd
143 ENDM
144
145 FRSTEP MACRO i,j
146 fld POINTER [PDI+N*i]
147 fmul POINTER [PSI+N*j]
148 fxch st(2)
149 fadd
150 ENDM
151
152 FDSTEP MACRO i,j
153 fld POINTER [PBX+N*i]
154 fmul POINTER [PBX+N*j]
155 fxch st(2)
156 fadd
157 ENDM
158
159 SELF MACRO k
160 fld POINTER [PBX+N*k]
161 fmul st,st(0)
162 fadd
163 ENDM
164
165 RFINU MACRO k
166 fld st(0)
167
168 fadd st,st(2)
169 fsub st,st(2)
170
171 fsubr st,st(1)
172 fmul st,st(4)
173 fld st(0)
174
175 fadd st,st(3)
176 fsub st,st(3)
177
178 fsub
179 fst POINTER [PDI+N*k]
180 fmul POINTER [PSI]
181 fadd
182 fmul st,st(2)
183 ENDM
184
185 RFIND MACRO k
186 fld st(0)
187
188 fadd st,st(2)
189 fsub st,st(2)
190
191 fsub st(1),st
192 fmul st,st(3)
193 fxch st(1)
194 fstp POINTER [PDI+N*k]
195
196 ENDM
197
198 DIAG MACRO ns,ne
199 CNT1=ns
200 CNT2=ne
201 fld POINTER [PBX+N*CNT1]
202 fmul POINTER [PSI+N*CNT2]
203 CNT1=CNT1+1
204 CNT2=CNT2-1
205 WHILE CNT1 LE ne
206 FSTEP CNT1,CNT2
207 CNT1=CNT1+1
208 CNT2=CNT2-1
209 ENDM
210 fadd
211 ENDM
212
213 SDIAG MACRO ns,ne
214 CNT1=ns
215 CNT2=ne
216 IF CNT1 LT CNT2
217 fstp st(5) /* store carry */
218 fldz
219 fld POINTER [PBX+N*CNT1]
220 fmul POINTER [PBX+N*CNT2]
221 CNT1=CNT1+1
222 CNT2=CNT2-1
223 WHILE CNT1 LT CNT2
224 FDSTEP CNT1,CNT2
225 CNT1=CNT1+1
226 CNT2=CNT2-1
227 ENDM
228 fadd
229 fld st(0) /* now double it ... */
230 fadd
231 fadd st,st(5) /* add in carry */
232 ENDIF
233 ENDM
234
235 RDIAGU MACRO ns,ne
236 CNT1=ns
237 CNT2=ne
238 IF CNT1 LT ne
239 fld POINTER [PDI+N*CNT1]
240 fmul POINTER [PSI+N*CNT2]
241 CNT1=CNT1+1
242 CNT2=CNT2-1
243 WHILE CNT1 LT ne
244 FRSTEP CNT1,CNT2
245 CNT1=CNT1+1
246 CNT2=CNT2-1
247 ENDM
248 fadd
249 ENDIF
250 ENDM
251
252 RDIAGD MACRO ns,ne
253 CNT1=ns
254 CNT2=ne
255 fld POINTER [PDI+N*CNT1]
256 fmul POINTER [PSI+N*CNT2]
257 CNT1=CNT1+1
258 CNT2=CNT2-1
259 WHILE CNT1 LE ne
260 FRSTEP CNT1,CNT2
261 CNT1=CNT1+1
262 CNT2=CNT2-1
263 ENDM
264 fadd
265 ENDM
266
267 MODMULT MACRO
268 CNT=0
269 WHILE CNT LT MR_PENTIUM
270 DIAG 0,CNT
271 xchg PSI,PCX
272 RDIAGU 0,CNT
273 RFINU CNT
274 xchg PSI,PCX
275 CNT=CNT+1
276 ENDM
277 SCNT=0
278 WHILE SCNT LT (MR_PENTIUM-1)
279 SCNT=SCNT+1
280 DIAG SCNT,(MR_PENTIUM-1)
281 xchg PSI,PCX
282 RDIAGD SCNT,(MR_PENTIUM-1)
283 RFIND CNT
284 xchg PSI,PCX
285 CNT=CNT+1
286 ENDM
287 RFIND CNT
288 CNT=CNT+1
289 fstp POINTER [PDI+N*CNT]
290 ENDM
291
292 MODSQUARE MACRO
293 CNT=0
294 WHILE CNT LT MR_PENTIUM
295 SDIAG 0,CNT
296 IF (CNT MOD 2) EQ 0
297 SELF (CNT/2)
298 ENDIF
299 RDIAGU 0,CNT
300 RFINU CNT
301 CNT=CNT+1
302 ENDM
303 SCNT=0
304 WHILE SCNT LT (MR_PENTIUM-1)
305 SCNT=SCNT+1
306 SDIAG SCNT,(MR_PENTIUM-1)
307 IF (CNT MOD 2) EQ 0
308 SELF (CNT/2)
309 ENDIF
310 RDIAGD SCNT,(MR_PENTIUM-1)
311 RFIND CNT
312 CNT=CNT+1
313 ENDM
314 RFIND CNT
315 CNT=CNT+1
316 fstp POINTER [PDI+N*CNT]
317 ENDM
318 }
319 ASM
320 {
321 push PDI
322 push PSI
323
324 mov PBX,xg
325 mov PSI,yg
326 mov PCX,mg
327 mov PDI,wg
328
329
330 fldz
331
332 MODMULT
333
334 pop PSI
335 pop PDI
336 }
337
338 for (ij=MR_PENTIUM;ij<(int)(z->len&MR_OBITS);ij++) z->w[ij]=0.0;
339 z->len=MR_PENTIUM;
340 for (ij=0;ij<MR_PENTIUM;ij++) z->w[ij]=w0->w[ij+MR_PENTIUM];
341 if (z->w[MR_PENTIUM-1]==0.0) mr_lzero(z);
342 }
343
344 void fastmodsquare(_MIPD_ x,z)
345 big x,z;
346 {
347 int ij;
348 #ifdef MR_OS_THREADS
349 miracl *mr_mip=get_mip();
350 #endif
351 big w0=mr_mip->w0;
352 big modulus=mr_mip->modulus;
353 mr_small *wg,*mg,*xg;
354 wg=w0->w;
355 mg=modulus->w;
356 xg=x->w;
357
358 for (ij=2*MR_PENTIUM;ij<(int)(w0->len&MR_OBITS);ij++) w0->w[ij]=0.0;
359 w0->len=2*MR_PENTIUM;
360
361 ASM
362 {
363 push PDI
364 push PSI
365
366 mov PBX,xg
367 mov PSI,mg
368 mov PDI,wg
369
370 fldz
371
372 MODSQUARE
373
374 pop PSI
375 pop PDI
376 }
377 for (ij=MR_PENTIUM;ij<(int)(z->len&MR_OBITS);ij++) z->w[ij]=0.0;
378 z->len=MR_PENTIUM;
379
380 for (ij=0;ij<MR_PENTIUM;ij++) z->w[ij]=w0->w[ij+MR_PENTIUM];
381 if (z->w[MR_PENTIUM-1]==0.0) mr_lzero(z);
382
383 }
384
385 #endif
386 #endif
387 #endif
388
389