1 /*
2  *
3  * This project is meant to fill in where LibTomMath
4  * falls short.  That is speed ;-)
5  *
6  * This project is public domain and free for all purposes.
7  *
8  * Tom St Denis, tomstdenis@gmail.com
9  */
10 #include "bignum_fast.h"
11 
12 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
13    #undef TFM_SSE2
14    #define TFM_X86
15 #endif
16 
17 #if defined(TFM_X86)
18 
19 /* x86-32 optimized */
20 
21 #define COMBA_START
22 
23 #define CLEAR_CARRY \
24    c0 = c1 = c2 = 0;
25 
26 #define COMBA_STORE(x) \
27    x = c0;
28 
29 #define COMBA_STORE2(x) \
30    x = c1;
31 
32 #define CARRY_FORWARD \
33    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
34 
35 #define COMBA_FINI
36 
37 #define SQRADD(i, j)                                      \
38 asm(                                            \
39      "movl  %6,%%eax     \n\t"                            \
40      "mull  %%eax        \n\t"                            \
41      "addl  %%eax,%0     \n\t"                            \
42      "adcl  %%edx,%1     \n\t"                            \
43      "adcl  $0,%2        \n\t"                            \
44      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc");
45 
46 #define SQRADD2(i, j)                                     \
47 asm(                                            \
48      "movl  %6,%%eax     \n\t"                            \
49      "mull  %7           \n\t"                            \
50      "addl  %%eax,%0     \n\t"                            \
51      "adcl  %%edx,%1     \n\t"                            \
52      "adcl  $0,%2        \n\t"                            \
53      "addl  %%eax,%0     \n\t"                            \
54      "adcl  %%edx,%1     \n\t"                            \
55      "adcl  $0,%2        \n\t"                            \
56      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","cc");
57 
58 #define SQRADDSC(i, j)                                    \
59 asm(                                                     \
60      "movl  %6,%%eax     \n\t"                            \
61      "mull  %7           \n\t"                            \
62      "movl  %%eax,%0     \n\t"                            \
63      "movl  %%edx,%1     \n\t"                            \
64      "xorl  %2,%2        \n\t"                            \
65      :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
66 
67 #define SQRADDAC(i, j)                                    \
68 asm(                                                     \
69      "movl  %6,%%eax     \n\t"                            \
70      "mull  %7           \n\t"                            \
71      "addl  %%eax,%0     \n\t"                            \
72      "adcl  %%edx,%1     \n\t"                            \
73      "adcl  $0,%2        \n\t"                            \
74      :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc");
75 
76 #define SQRADDDB                                          \
77 asm(                                                     \
78      "addl %6,%0         \n\t"                            \
79      "adcl %7,%1         \n\t"                            \
80      "adcl %8,%2         \n\t"                            \
81      "addl %6,%0         \n\t"                            \
82      "adcl %7,%1         \n\t"                            \
83      "adcl %8,%2         \n\t"                            \
84      :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
85 
86 #elif defined(TFM_X86_64)
87 /* x86-64 optimized */
88 
89 #define COMBA_START
90 
91 #define CLEAR_CARRY \
92    c0 = c1 = c2 = 0;
93 
94 #define COMBA_STORE(x) \
95    x = c0;
96 
97 #define COMBA_STORE2(x) \
98    x = c1;
99 
100 #define CARRY_FORWARD \
101    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
102 
103 #define COMBA_FINI
104 
105 #define SQRADD(i, j)                                      \
106 asm(                                                     \
107      "movq  %6,%%rax     \n\t"                            \
108      "mulq  %%rax        \n\t"                            \
109      "addq  %%rax,%0     \n\t"                            \
110      "adcq  %%rdx,%1     \n\t"                            \
111      "adcq  $0,%2        \n\t"                            \
112      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc");
113 
114 #define SQRADD2(i, j)                                     \
115 asm(                                                     \
116      "movq  %6,%%rax     \n\t"                            \
117      "mulq  %7           \n\t"                            \
118      "addq  %%rax,%0     \n\t"                            \
119      "adcq  %%rdx,%1     \n\t"                            \
120      "adcq  $0,%2        \n\t"                            \
121      "addq  %%rax,%0     \n\t"                            \
122      "adcq  %%rdx,%1     \n\t"                            \
123      "adcq  $0,%2        \n\t"                            \
124      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j)  :"%rax","%rdx","cc");
125 
126 #define SQRADDSC(i, j)                                    \
127 asm(                                                     \
128      "movq  %6,%%rax     \n\t"                            \
129      "mulq  %7           \n\t"                            \
130      "movq  %%rax,%0     \n\t"                            \
131      "movq  %%rdx,%1     \n\t"                            \
132      "xorq  %2,%2        \n\t"                            \
133      :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
134 
135 #define SQRADDAC(i, j)                                                         \
136 asm(                                                     \
137      "movq  %6,%%rax     \n\t"                            \
138      "mulq  %7           \n\t"                            \
139      "addq  %%rax,%0     \n\t"                            \
140      "adcq  %%rdx,%1     \n\t"                            \
141      "adcq  $0,%2        \n\t"                            \
142      :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc");
143 
144 #define SQRADDDB                                          \
145 asm(                                                     \
146      "addq %6,%0         \n\t"                            \
147      "adcq %7,%1         \n\t"                            \
148      "adcq %8,%2         \n\t"                            \
149      "addq %6,%0         \n\t"                            \
150      "adcq %7,%1         \n\t"                            \
151      "adcq %8,%2         \n\t"                            \
152      :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
153 
154 #elif defined(TFM_SSE2)
155 
156 /* SSE2 Optimized */
157 #define COMBA_START
158 
159 #define CLEAR_CARRY \
160    c0 = c1 = c2 = 0;
161 
162 #define COMBA_STORE(x) \
163    x = c0;
164 
165 #define COMBA_STORE2(x) \
166    x = c1;
167 
168 #define CARRY_FORWARD \
169    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
170 
171 #define COMBA_FINI \
172    asm("emms");
173 
174 #define SQRADD(i, j)                                      \
175 asm(                                            \
176      "movd  %6,%%mm0     \n\t"                            \
177      "pmuludq %%mm0,%%mm0\n\t"                            \
178      "movd  %%mm0,%%eax  \n\t"                            \
179      "psrlq $32,%%mm0    \n\t"                            \
180      "addl  %%eax,%0     \n\t"                            \
181      "movd  %%mm0,%%eax  \n\t"                            \
182      "adcl  %%eax,%1     \n\t"                            \
183      "adcl  $0,%2        \n\t"                            \
184      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc");
185 
186 #define SQRADD2(i, j)                                     \
187 asm(                                            \
188      "movd  %6,%%mm0     \n\t"                            \
189      "movd  %7,%%mm1     \n\t"                            \
190      "pmuludq %%mm1,%%mm0\n\t"                            \
191      "movd  %%mm0,%%eax  \n\t"                            \
192      "psrlq $32,%%mm0    \n\t"                            \
193      "movd  %%mm0,%%edx  \n\t"                            \
194      "addl  %%eax,%0     \n\t"                            \
195      "adcl  %%edx,%1     \n\t"                            \
196      "adcl  $0,%2        \n\t"                            \
197      "addl  %%eax,%0     \n\t"                            \
198      "adcl  %%edx,%1     \n\t"                            \
199      "adcl  $0,%2        \n\t"                            \
200      :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j)  :"%eax","%edx","cc");
201 
202 #define SQRADDSC(i, j)                                                         \
203 asm(                                            \
204      "movd  %6,%%mm0     \n\t"                            \
205      "movd  %7,%%mm1     \n\t"                            \
206      "pmuludq %%mm1,%%mm0\n\t"                            \
207      "movd  %%mm0,%0     \n\t"                            \
208      "psrlq $32,%%mm0    \n\t"                            \
209      "movd  %%mm0,%1     \n\t"                            \
210      "xorl  %2,%2        \n\t"                            \
211      :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j));
212 
213 #define SQRADDAC(i, j)                                                         \
214 asm(                                            \
215      "movd  %6,%%mm0     \n\t"                            \
216      "movd  %7,%%mm1     \n\t"                            \
217      "pmuludq %%mm1,%%mm0\n\t"                            \
218      "movd  %%mm0,%%eax  \n\t"                            \
219      "psrlq $32,%%mm0    \n\t"                            \
220      "movd  %%mm0,%%edx  \n\t"                            \
221      "addl  %%eax,%0     \n\t"                            \
222      "adcl  %%edx,%1     \n\t"                            \
223      "adcl  $0,%2        \n\t"                            \
224      :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j)  :"%eax","%edx","cc");
225 
226 #define SQRADDDB                                          \
227 asm(                                                     \
228      "addl %6,%0         \n\t"                            \
229      "adcl %7,%1         \n\t"                            \
230      "adcl %8,%2         \n\t"                            \
231      "addl %6,%0         \n\t"                            \
232      "adcl %7,%1         \n\t"                            \
233      "adcl %8,%2         \n\t"                            \
234      :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc");
235 
236 #elif defined(TFM_ARM)
237 
238 /* ARM code */
239 
240 #define COMBA_START
241 
242 #define CLEAR_CARRY \
243    c0 = c1 = c2 = 0;
244 
245 #define COMBA_STORE(x) \
246    x = c0;
247 
248 #define COMBA_STORE2(x) \
249    x = c1;
250 
251 #define CARRY_FORWARD \
252    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
253 
254 #define COMBA_FINI
255 
256 /* multiplies point i and j, updates carry "c1" and digit c2 */
257 #define SQRADD(i, j)                                             \
258 asm(                                                             \
259 "  UMULL  r0,r1,%6,%6              \n\t"                         \
260 "  ADDS   %0,%0,r0                 \n\t"                         \
261 "  ADCS   %1,%1,r1                 \n\t"                         \
262 "  ADC    %2,%2,#0                 \n\t"                         \
263 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc");
264 
265 /* for squaring some of the terms are doubled... */
266 #define SQRADD2(i, j)                                            \
267 asm(                                                             \
268 "  UMULL  r0,r1,%6,%7              \n\t"                         \
269 "  ADDS   %0,%0,r0                 \n\t"                         \
270 "  ADCS   %1,%1,r1                 \n\t"                         \
271 "  ADC    %2,%2,#0                 \n\t"                         \
272 "  ADDS   %0,%0,r0                 \n\t"                         \
273 "  ADCS   %1,%1,r1                 \n\t"                         \
274 "  ADC    %2,%2,#0                 \n\t"                         \
275 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
276 
277 #define SQRADDSC(i, j)                                           \
278 asm(                                                             \
279 "  UMULL  %0,%1,%6,%7              \n\t"                         \
280 "  SUB    %2,%2,%2                 \n\t"                         \
281 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "cc");
282 
283 #define SQRADDAC(i, j)                                           \
284 asm(                                                             \
285 "  UMULL  r0,r1,%6,%7              \n\t"                         \
286 "  ADDS   %0,%0,r0                 \n\t"                         \
287 "  ADCS   %1,%1,r1                 \n\t"                         \
288 "  ADC    %2,%2,#0                 \n\t"                         \
289 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc");
290 
291 #define SQRADDDB                                                 \
292 asm(                                                             \
293 "  ADDS  %0,%0,%3                     \n\t"                      \
294 "  ADCS  %1,%1,%4                     \n\t"                      \
295 "  ADC   %2,%2,%5                     \n\t"                      \
296 "  ADDS  %0,%0,%3                     \n\t"                      \
297 "  ADCS  %1,%1,%4                     \n\t"                      \
298 "  ADC   %2,%2,%5                     \n\t"                      \
299 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
300 
301 #elif defined(TFM_PPC32)
302 
303 /* PPC32 */
304 
305 #define COMBA_START
306 
307 #define CLEAR_CARRY \
308    c0 = c1 = c2 = 0;
309 
310 #define COMBA_STORE(x) \
311    x = c0;
312 
313 #define COMBA_STORE2(x) \
314    x = c1;
315 
316 #define CARRY_FORWARD \
317    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
318 
319 #define COMBA_FINI
320 
321 /* multiplies point i and j, updates carry "c1" and digit c2 */
322 #define SQRADD(i, j)             \
323 asm(                             \
324    " mullw  16,%6,%6       \n\t" \
325    " addc   %0,%0,16       \n\t" \
326    " mulhwu 16,%6,%6       \n\t" \
327    " adde   %1,%1,16       \n\t" \
328    " addze  %2,%2          \n\t" \
329 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc");
330 
331 /* for squaring some of the terms are doubled... */
332 #define SQRADD2(i, j)            \
333 asm(                             \
334    " mullw  16,%6,%7       \n\t" \
335    " mulhwu 17,%6,%7       \n\t" \
336    " addc   %0,%0,16       \n\t" \
337    " adde   %1,%1,17       \n\t" \
338    " addze  %2,%2          \n\t" \
339    " addc   %0,%0,16       \n\t" \
340    " adde   %1,%1,17       \n\t" \
341    " addze  %2,%2          \n\t" \
342 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc");
343 
344 #define SQRADDSC(i, j)            \
345 asm(                              \
346    " mullw  %0,%6,%7        \n\t" \
347    " mulhwu %1,%6,%7        \n\t" \
348    " xor    %2,%2,%2        \n\t" \
349 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
350 
351 #define SQRADDAC(i, j)           \
352 asm(                             \
353    " mullw  16,%6,%7       \n\t" \
354    " addc   %0,%0,16       \n\t" \
355    " mulhwu 16,%6,%7       \n\t" \
356    " adde   %1,%1,16       \n\t" \
357    " addze  %2,%2          \n\t" \
358 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc");
359 
360 #define SQRADDDB                  \
361 asm(                              \
362    " addc   %0,%0,%3        \n\t" \
363    " adde   %1,%1,%4        \n\t" \
364    " adde   %2,%2,%5        \n\t" \
365    " addc   %0,%0,%3        \n\t" \
366    " adde   %1,%1,%4        \n\t" \
367    " adde   %2,%2,%5        \n\t" \
368 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
369 
370 #elif defined(TFM_PPC64)
371 /* PPC64 */
372 
373 #define COMBA_START
374 
375 #define CLEAR_CARRY \
376    c0 = c1 = c2 = 0;
377 
378 #define COMBA_STORE(x) \
379    x = c0;
380 
381 #define COMBA_STORE2(x) \
382    x = c1;
383 
384 #define CARRY_FORWARD \
385    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
386 
387 #define COMBA_FINI
388 
389 /* multiplies point i and j, updates carry "c1" and digit c2 */
390 #define SQRADD(i, j)             \
391 asm(                             \
392    " mulld  r16,%6,%6       \n\t" \
393    " addc   %0,%0,r16       \n\t" \
394    " mulhdu r16,%6,%6       \n\t" \
395    " adde   %1,%1,r16       \n\t" \
396    " addze  %2,%2          \n\t" \
397 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","cc");
398 
399 /* for squaring some of the terms are doubled... */
400 #define SQRADD2(i, j)            \
401 asm(                             \
402    " mulld  r16,%6,%7       \n\t" \
403    " mulhdu r17,%6,%7       \n\t" \
404    " addc   %0,%0,r16       \n\t" \
405    " adde   %1,%1,r17       \n\t" \
406    " addze  %2,%2          \n\t" \
407    " addc   %0,%0,r16       \n\t" \
408    " adde   %1,%1,r17       \n\t" \
409    " addze  %2,%2          \n\t" \
410 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","cc");
411 
412 #define SQRADDSC(i, j)            \
413 asm(                              \
414    " mulld  %0,%6,%7        \n\t" \
415    " mulhdu %1,%6,%7        \n\t" \
416    " xor    %2,%2,%2        \n\t" \
417 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
418 
419 #define SQRADDAC(i, j)           \
420 asm(                             \
421    " mulld  r16,%6,%7       \n\t" \
422    " addc   %0,%0,r16       \n\t" \
423    " mulhdu r16,%6,%7       \n\t" \
424    " adde   %1,%1,r16       \n\t" \
425    " addze  %2,%2          \n\t" \
426 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "cc");
427 
428 #define SQRADDDB                  \
429 asm(                              \
430    " addc   %0,%0,%3        \n\t" \
431    " adde   %1,%1,%4        \n\t" \
432    " adde   %2,%2,%5        \n\t" \
433    " addc   %0,%0,%3        \n\t" \
434    " adde   %1,%1,%4        \n\t" \
435    " adde   %2,%2,%5        \n\t" \
436 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
437 
438 
439 #elif defined(TFM_AVR32)
440 
441 /* AVR32 */
442 
443 #define COMBA_START
444 
445 #define CLEAR_CARRY \
446    c0 = c1 = c2 = 0;
447 
448 #define COMBA_STORE(x) \
449    x = c0;
450 
451 #define COMBA_STORE2(x) \
452    x = c1;
453 
454 #define CARRY_FORWARD \
455    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
456 
457 #define COMBA_FINI
458 
459 /* multiplies point i and j, updates carry "c1" and digit c2 */
460 #define SQRADD(i, j)             \
461 asm(                             \
462    " mulu.d r2,%6,%6       \n\t" \
463    " add    %0,%0,r2       \n\t" \
464    " adc    %1,%1,r3       \n\t" \
465    " acr    %2             \n\t" \
466 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3");
467 
468 /* for squaring some of the terms are doubled... */
469 #define SQRADD2(i, j)            \
470 asm(                             \
471    " mulu.d r2,%6,%7       \n\t" \
472    " add    %0,%0,r2       \n\t" \
473    " adc    %1,%1,r3       \n\t" \
474    " acr    %2,            \n\t" \
475    " add    %0,%0,r2       \n\t" \
476    " adc    %1,%1,r3       \n\t" \
477    " acr    %2,            \n\t" \
478 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3");
479 
480 #define SQRADDSC(i, j)            \
481 asm(                              \
482    " mulu.d r2,%6,%7        \n\t" \
483    " mov    %0,r2           \n\t" \
484    " mov    %1,r3           \n\t" \
485    " eor    %2,%2           \n\t" \
486 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3");
487 
488 #define SQRADDAC(i, j)           \
489 asm(                             \
490    " mulu.d r2,%6,%7       \n\t" \
491    " add    %0,%0,r2       \n\t" \
492    " adc    %1,%1,r3       \n\t" \
493    " acr    %2             \n\t" \
494 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3");
495 
496 #define SQRADDDB                  \
497 asm(                              \
498    " add    %0,%0,%3        \n\t" \
499    " adc    %1,%1,%4        \n\t" \
500    " adc    %2,%2,%5        \n\t" \
501    " add    %0,%0,%3        \n\t" \
502    " adc    %1,%1,%4        \n\t" \
503    " adc    %2,%2,%5        \n\t" \
504 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc");
505 
506 #elif defined(TFM_MIPS)
507 
508 /* MIPS */
509 
510 #define COMBA_START
511 
512 #define CLEAR_CARRY \
513    c0 = c1 = c2 = 0;
514 
515 #define COMBA_STORE(x) \
516    x = c0;
517 
518 #define COMBA_STORE2(x) \
519    x = c1;
520 
521 #define CARRY_FORWARD \
522    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
523 
524 #define COMBA_FINI
525 
526 /* multiplies point i and j, updates carry "c1" and digit c2 */
527 #define SQRADD(i, j)              \
528 asm(                              \
529    " multu  %6,%6          \n\t"  \
530    " mflo   $12            \n\t"  \
531    " mfhi   $13            \n\t"  \
532    " addu    %0,%0,$12     \n\t"  \
533    " sltu   $12,%0,$12     \n\t"  \
534    " addu    %1,%1,$13     \n\t"  \
535    " sltu   $13,%1,$13     \n\t"  \
536    " addu    %1,%1,$12     \n\t"  \
537    " sltu   $12,%1,$12     \n\t"  \
538    " addu    %2,%2,$13     \n\t"  \
539    " addu    %2,%2,$12     \n\t"  \
540 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13");
541 
542 /* for squaring some of the terms are doubled... */
543 #define SQRADD2(i, j)            \
544 asm(                             \
545    " multu  %6,%7          \n\t" \
546    " mflo   $12            \n\t" \
547    " mfhi   $13            \n\t" \
548                                  \
549    " addu    %0,%0,$12     \n\t" \
550    " sltu   $14,%0,$12     \n\t" \
551    " addu    %1,%1,$13     \n\t" \
552    " sltu   $15,%1,$13     \n\t" \
553    " addu    %1,%1,$14     \n\t" \
554    " sltu   $14,%1,$14     \n\t" \
555    " addu    %2,%2,$15     \n\t" \
556    " addu    %2,%2,$14     \n\t" \
557                                  \
558    " addu    %0,%0,$12     \n\t" \
559    " sltu   $14,%0,$12     \n\t" \
560    " addu    %1,%1,$13     \n\t" \
561    " sltu   $15,%1,$13     \n\t" \
562    " addu    %1,%1,$14     \n\t" \
563    " sltu   $14,%1,$14     \n\t" \
564    " addu    %2,%2,$15     \n\t" \
565    " addu    %2,%2,$14     \n\t" \
566 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15");
567 
568 #define SQRADDSC(i, j)            \
569 asm(                              \
570    " multu  %6,%7          \n\t"  \
571    " mflo   %0             \n\t"  \
572    " mfhi   %1             \n\t"  \
573    " xor    %2,%2,%2       \n\t"  \
574 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc");
575 
576 #define SQRADDAC(i, j)           \
577 asm(                             \
578    " multu  %6,%7          \n\t" \
579    " mflo   $12            \n\t" \
580    " mfhi   $13            \n\t" \
581    " addu    %0,%0,$12     \n\t" \
582    " sltu   $12,%0,$12     \n\t" \
583    " addu    %1,%1,$13     \n\t" \
584    " sltu   $13,%1,$13     \n\t" \
585    " addu    %1,%1,$12     \n\t" \
586    " sltu   $12,%1,$12     \n\t" \
587    " addu    %2,%2,$13     \n\t" \
588    " addu    %2,%2,$12     \n\t" \
589 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14");
590 
591 #define SQRADDDB                  \
592 asm(                              \
593    " addu    %0,%0,%3       \n\t" \
594    " sltu   $10,%0,%3       \n\t" \
595    " addu    %1,%1,$10      \n\t" \
596    " sltu   $10,%1,$10      \n\t" \
597    " addu    %1,%1,%4       \n\t" \
598    " sltu   $11,%1,%4       \n\t" \
599    " addu    %2,%2,$10      \n\t" \
600    " addu    %2,%2,$11      \n\t" \
601    " addu    %2,%2,%5       \n\t" \
602                                   \
603    " addu    %0,%0,%3       \n\t" \
604    " sltu   $10,%0,%3       \n\t" \
605    " addu    %1,%1,$10      \n\t" \
606    " sltu   $10,%1,$10      \n\t" \
607    " addu    %1,%1,%4       \n\t" \
608    " sltu   $11,%1,%4       \n\t" \
609    " addu    %2,%2,$10      \n\t" \
610    " addu    %2,%2,$11      \n\t" \
611    " addu    %2,%2,%5       \n\t" \
612 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11");
613 
614 #else
615 
616 #define TFM_ISO
617 
618 /* ISO C portable code */
619 
620 #define COMBA_START
621 
622 #define CLEAR_CARRY \
623    c0 = c1 = c2 = 0;
624 
625 #define COMBA_STORE(x) \
626    x = c0;
627 
628 #define COMBA_STORE2(x) \
629    x = c1;
630 
631 #define CARRY_FORWARD \
632    do { c0 = c1; c1 = c2; c2 = 0; } while (0);
633 
634 #define COMBA_FINI
635 
636 /* multiplies point i and j, updates carry "c1" and digit c2 */
637 #define SQRADD(i, j)                                 \
638    do { fp_word t;                                   \
639    t = c0 + ((fp_word)i) * ((fp_word)j);  c0 = t;    \
640    t = c1 + (t >> DIGIT_BIT);             c1 = t; c2 += t >> DIGIT_BIT; \
641    } while (0);
642 
643 
644 /* for squaring some of the terms are doubled... */
645 #define SQRADD2(i, j)                                                 \
646    do { fp_word t;                                                    \
647    t  = ((fp_word)i) * ((fp_word)j);                                  \
648    tt = (fp_word)c0 + t;                 c0 = tt;                              \
649    tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT;       \
650    tt = (fp_word)c0 + t;                 c0 = tt;                              \
651    tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT;       \
652    } while (0);
653 
654 #define SQRADDSC(i, j)                                                         \
655    do { fp_word t;                                                             \
656       t =  ((fp_word)i) * ((fp_word)j);                                        \
657       sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0;                      \
658    } while (0);
659 
660 #define SQRADDAC(i, j)                                                         \
661    do { fp_word t;                                                             \
662    t = sc0 + ((fp_word)i) * ((fp_word)j);  sc0 = t;                            \
663    t = sc1 + (t >> DIGIT_BIT);             sc1 = t; sc2 += t >> DIGIT_BIT;     \
664    } while (0);
665 
666 #define SQRADDDB                                                               \
667    do { fp_word t;                                                             \
668    t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t;                                                 \
669    t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t;                              \
670    c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT);                                     \
671    } while (0);
672 
673 #endif
674 
675 /* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba.c,v $ */
676 /* $Revision: 1.4 $ */
677 /* $Date: 2007/03/14 23:47:42 $ */
678