1 /* TomsFastMath, a fast ISO C bignum library.
2  *
3  * This project is meant to fill in where LibTomMath
4  * falls short.  That is speed ;-)
5  *
6  * This project is public domain and free for all purposes.
7  *
8  * Tom St Denis, tomstdenis@gmail.com
9  */
10 
11 /* About this file...
12 
13 */
14 
15 #include "bignum_fast.h"
16 
17 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
18    #undef TFM_SSE2
19    #define TFM_X86
20 #endif
21 
22 /* these are the combas.  Worship them. */
23 #if defined(TFM_X86)
24 /* Generic x86 optimized code */
25 
26 /* anything you need at the start */
27 #define COMBA_START
28 
29 /* clear the chaining variables */
30 #define COMBA_CLEAR \
31    c0 = c1 = c2 = 0;
32 
33 /* forward the carry to the next digit */
34 #define COMBA_FORWARD \
35    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
36 
37 /* store the first sum */
38 #define COMBA_STORE(x) \
39    x = c0;
40 
41 /* store the second sum [carry] */
42 #define COMBA_STORE2(x) \
43    x = c1;
44 
45 /* anything you need at the end */
46 #define COMBA_FINI
47 
48 /* this should multiply i and j  */
49 #define MULADD(i, j)                                      \
50 asm(                                                      \
51      "movl  %6,%%eax     \n\t"                            \
52      "mull  %7           \n\t"                            \
53      "addl  %%eax,%0     \n\t"                            \
54      "adcl  %%edx,%1     \n\t"                            \
55      "adcl  $0,%2        \n\t"                            \
56      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","cc");
57 
58 #elif defined(TFM_X86_64)
59 /* x86-64 optimized */
60 
61 /* anything you need at the start */
62 #define COMBA_START
63 
64 /* clear the chaining variables */
65 #define COMBA_CLEAR \
66    c0 = c1 = c2 = 0;
67 
68 /* forward the carry to the next digit */
69 #define COMBA_FORWARD \
70    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
71 
72 /* store the first sum */
73 #define COMBA_STORE(x) \
74    x = c0;
75 
76 /* store the second sum [carry] */
77 #define COMBA_STORE2(x) \
78    x = c1;
79 
80 /* anything you need at the end */
81 #define COMBA_FINI
82 
83 /* this should multiply i and j  */
84 #define MULADD(i, j)                                      \
85 asm  (                                                    \
86      "movq  %6,%%rax     \n\t"                            \
87      "mulq  %7           \n\t"                            \
88      "addq  %%rax,%0     \n\t"                            \
89      "adcq  %%rdx,%1     \n\t"                            \
90      "adcq  $0,%2        \n\t"                            \
91      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j)  :"%rax","%rdx","cc");
92 
93 #elif defined(TFM_SSE2)
94 /* use SSE2 optimizations */
95 
96 /* anything you need at the start */
97 #define COMBA_START
98 
99 /* clear the chaining variables */
100 #define COMBA_CLEAR \
101    c0 = c1 = c2 = 0;
102 
103 /* forward the carry to the next digit */
104 #define COMBA_FORWARD \
105    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
106 
107 /* store the first sum */
108 #define COMBA_STORE(x) \
109    x = c0;
110 
111 /* store the second sum [carry] */
112 #define COMBA_STORE2(x) \
113    x = c1;
114 
115 /* anything you need at the end */
116 #define COMBA_FINI \
117    asm("emms");
118 
119 /* this should multiply i and j  */
120 #define MULADD(i, j)                                     \
121 asm(                                                     \
122     "movd  %6,%%mm0     \n\t"                            \
123     "movd  %7,%%mm1     \n\t"                            \
124     "pmuludq %%mm1,%%mm0\n\t"                            \
125     "movd  %%mm0,%%eax  \n\t"                            \
126     "psrlq $32,%%mm0    \n\t"                            \
127     "addl  %%eax,%0     \n\t"                            \
128     "movd  %%mm0,%%eax  \n\t"                            \
129     "adcl  %%eax,%1     \n\t"                            \
130     "adcl  $0,%2        \n\t"                            \
131     :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","cc");
132 
133 #elif defined(TFM_ARM)
134 /* ARM code */
135 
136 #define COMBA_START
137 
138 #define COMBA_CLEAR \
139    c0 = c1 = c2 = 0;
140 
141 #define COMBA_FORWARD \
142    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
143 
144 #define COMBA_STORE(x) \
145    x = c0;
146 
147 #define COMBA_STORE2(x) \
148    x = c1;
149 
150 #define COMBA_FINI
151 
152 #define MULADD(i, j)                                          \
153 asm(                                                          \
154 "  UMULL  r0,r1,%6,%7           \n\t"                         \
155 "  ADDS   %0,%0,r0              \n\t"                         \
156 "  ADCS   %1,%1,r1              \n\t"                         \
157 "  ADC    %2,%2,#0              \n\t"                         \
158 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
159 
160 #elif defined(TFM_PPC32)
161 /* For 32-bit PPC */
162 
163 #define COMBA_START
164 
165 #define COMBA_CLEAR \
166    c0 = c1 = c2 = 0;
167 
168 #define COMBA_FORWARD \
169    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
170 
171 #define COMBA_STORE(x) \
172    x = c0;
173 
174 #define COMBA_STORE2(x) \
175    x = c1;
176 
177 #define COMBA_FINI
178 
179 /* untested: will mulhwu change the flags?  Docs say no */
180 #define MULADD(i, j)              \
181 asm(                              \
182    " mullw  16,%6,%7       \n\t" \
183    " addc   %0,%0,16       \n\t" \
184    " mulhwu 16,%6,%7       \n\t" \
185    " adde   %1,%1,16       \n\t" \
186    " addze  %2,%2          \n\t" \
187 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
188 
189 #elif defined(TFM_PPC64)
190 /* For 64-bit PPC */
191 
192 #define COMBA_START
193 
194 #define COMBA_CLEAR \
195    c0 = c1 = c2 = 0;
196 
197 #define COMBA_FORWARD \
198    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
199 
200 #define COMBA_STORE(x) \
201    x = c0;
202 
203 #define COMBA_STORE2(x) \
204    x = c1;
205 
206 #define COMBA_FINI
207 
208 /* untested: will mulhdu change the flags?  Docs say no */
209 #define MULADD(i, j)              \
210 asm(                              \
211    " mulld  r16,%6,%7       \n\t" \
212    " addc   %0,%0,16       \n\t" \
213    " mulhdu r16,%6,%7       \n\t" \
214    " adde   %1,%1,16       \n\t" \
215    " addze  %2,%2          \n\t" \
216 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
217 
218 #elif defined(TFM_AVR32)
219 
220 /* ISO C code */
221 
222 #define COMBA_START
223 
224 #define COMBA_CLEAR \
225    c0 = c1 = c2 = 0;
226 
227 #define COMBA_FORWARD \
228    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
229 
230 #define COMBA_STORE(x) \
231    x = c0;
232 
233 #define COMBA_STORE2(x) \
234    x = c1;
235 
236 #define COMBA_FINI
237 
238 #define MULADD(i, j)             \
239 asm(                             \
240    " mulu.d r2,%6,%7        \n\t"\
241    " add    %0,r2           \n\t"\
242    " adc    %1,%1,r3        \n\t"\
243    " acr    %2              \n\t"\
244 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
245 
246 #elif defined(TFM_MIPS)
247 
248 #define COMBA_START
249 
250 #define COMBA_CLEAR \
251    c0 = c1 = c2 = 0;
252 
253 #define COMBA_FORWARD \
254    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
255 
256 #define COMBA_STORE(x) \
257    x = c0;
258 
259 #define COMBA_STORE2(x) \
260    x = c1;
261 
262 #define COMBA_FINI
263 
264 #define MULADD(i, j)              \
265 asm(                              \
266    " multu  %6,%7          \n\t"  \
267    " mflo   $12            \n\t"  \
268    " mfhi   $13            \n\t"  \
269    " addu    %0,%0,$12     \n\t"  \
270    " sltu   $12,%0,$12     \n\t"  \
271    " addu    %1,%1,$13     \n\t"  \
272    " sltu   $13,%1,$13     \n\t"  \
273    " addu    %1,%1,$12     \n\t"  \
274    " sltu   $12,%1,$12     \n\t"  \
275    " addu    %2,%2,$13     \n\t"  \
276    " addu    %2,%2,$12     \n\t"  \
277 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
278 
279 #else
280 /* ISO C code */
281 
282 #define COMBA_START
283 
284 #define COMBA_CLEAR \
285    c0 = c1 = c2 = 0;
286 
287 #define COMBA_FORWARD \
288    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
289 
290 #define COMBA_STORE(x) \
291    x = c0;
292 
293 #define COMBA_STORE2(x) \
294    x = c1;
295 
296 #define COMBA_FINI
297 
298 #define MULADD(i, j)                                                              \
299    do { fp_word t;                                                                \
300    t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t;                         \
301    t = (fp_word)c1 + (t >> DIGIT_BIT);            c1 = t; c2 += t >> DIGIT_BIT;   \
302    } while (0);
303 
304 #endif
305 
306 #ifndef TFM_DEFINES
307 
308 /* generic PxQ multiplier */
fp_mul_comba(fp_int * A,fp_int * B,fp_int * C)309 void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
310 {
311    int       ix, iy, iz, tx, ty, pa;
312    fp_digit  c0, c1, c2, *tmpx, *tmpy;
313    fp_int    tmp, *dst;
314 
315    COMBA_START;
316    COMBA_CLEAR;
317 
318    /* get size of output and trim */
319    pa = A->used + B->used;
320    if (pa >= FP_SIZE) {
321       pa = FP_SIZE-1;
322    }
323 
324    if (A == C || B == C) {
325       fp_zero(&tmp);
326       dst = &tmp;
327    } else {
328       fp_zero(C);
329       dst = C;
330    }
331 
332    for (ix = 0; ix < pa; ix++) {
333       /* get offsets into the two bignums */
334       ty = MIN(ix, B->used-1);
335       tx = ix - ty;
336 
337       /* setup temp aliases */
338       tmpx = A->dp + tx;
339       tmpy = B->dp + ty;
340 
341       /* this is the number of times the loop will iterrate, essentially its
342          while (tx++ < a->used && ty-- >= 0) { ... }
343        */
344       iy = MIN(A->used-tx, ty+1);
345 
346       /* execute loop */
347       COMBA_FORWARD;
348       for (iz = 0; iz < iy; ++iz) {
349           MULADD(*tmpx++, *tmpy--);
350       }
351 
352       /* store term */
353       COMBA_STORE(dst->dp[ix]);
354   }
355   COMBA_FINI;
356 
357   dst->used = pa;
358   dst->sign = A->sign ^ B->sign;
359   fp_clamp(dst);
360   fp_copy(dst, C);
361 }
362 
363 #endif
364 
365 /* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_comba.c,v $ */
366 /* $Revision: 1.4 $ */
367 /* $Date: 2007/03/14 23:47:42 $ */
368 
369