1 /* 2 * 3 * This project is meant to fill in where LibTomMath 4 * falls short. That is speed ;-) 5 * 6 * This project is public domain and free for all purposes. 7 * 8 * Tom St Denis, tomstdenis@gmail.com 9 */ 10 #include "bignum_fast.h" 11 12 #if defined(TFM_PRESCOTT) && defined(TFM_SSE2) 13 #undef TFM_SSE2 14 #define TFM_X86 15 #endif 16 17 #if defined(TFM_X86) 18 19 /* x86-32 optimized */ 20 21 #define COMBA_START 22 23 #define CLEAR_CARRY \ 24 c0 = c1 = c2 = 0; 25 26 #define COMBA_STORE(x) \ 27 x = c0; 28 29 #define COMBA_STORE2(x) \ 30 x = c1; 31 32 #define CARRY_FORWARD \ 33 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 34 35 #define COMBA_FINI 36 37 #define SQRADD(i, j) \ 38 asm( \ 39 "movl %6,%%eax \n\t" \ 40 "mull %%eax \n\t" \ 41 "addl %%eax,%0 \n\t" \ 42 "adcl %%edx,%1 \n\t" \ 43 "adcl $0,%2 \n\t" \ 44 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","%edx","cc"); 45 46 #define SQRADD2(i, j) \ 47 asm( \ 48 "movl %6,%%eax \n\t" \ 49 "mull %7 \n\t" \ 50 "addl %%eax,%0 \n\t" \ 51 "adcl %%edx,%1 \n\t" \ 52 "adcl $0,%2 \n\t" \ 53 "addl %%eax,%0 \n\t" \ 54 "adcl %%edx,%1 \n\t" \ 55 "adcl $0,%2 \n\t" \ 56 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); 57 58 #define SQRADDSC(i, j) \ 59 asm( \ 60 "movl %6,%%eax \n\t" \ 61 "mull %7 \n\t" \ 62 "movl %%eax,%0 \n\t" \ 63 "movl %%edx,%1 \n\t" \ 64 "xorl %2,%2 \n\t" \ 65 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc"); 66 67 #define SQRADDAC(i, j) \ 68 asm( \ 69 "movl %6,%%eax \n\t" \ 70 "mull %7 \n\t" \ 71 "addl %%eax,%0 \n\t" \ 72 "adcl %%edx,%1 \n\t" \ 73 "adcl $0,%2 \n\t" \ 74 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%eax","%edx","cc"); 75 76 #define SQRADDDB \ 77 asm( \ 78 "addl %6,%0 \n\t" \ 79 "adcl %7,%1 \n\t" \ 80 "adcl %8,%2 \n\t" \ 81 "addl %6,%0 \n\t" \ 82 "adcl %7,%1 \n\t" \ 83 "adcl %8,%2 \n\t" \ 84 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 85 86 #elif defined(TFM_X86_64) 87 /* x86-64 optimized */ 88 89 #define COMBA_START 90 91 #define CLEAR_CARRY \ 92 c0 = c1 = c2 = 0; 93 94 #define COMBA_STORE(x) \ 95 x = c0; 96 97 #define COMBA_STORE2(x) \ 98 x = c1; 99 100 #define CARRY_FORWARD \ 101 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 102 103 #define COMBA_FINI 104 105 #define SQRADD(i, j) \ 106 asm( \ 107 "movq %6,%%rax \n\t" \ 108 "mulq %%rax \n\t" \ 109 "addq %%rax,%0 \n\t" \ 110 "adcq %%rdx,%1 \n\t" \ 111 "adcq $0,%2 \n\t" \ 112 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i) :"%rax","%rdx","cc"); 113 114 #define SQRADD2(i, j) \ 115 asm( \ 116 "movq %6,%%rax \n\t" \ 117 "mulq %7 \n\t" \ 118 "addq %%rax,%0 \n\t" \ 119 "adcq %%rdx,%1 \n\t" \ 120 "adcq $0,%2 \n\t" \ 121 "addq %%rax,%0 \n\t" \ 122 "adcq %%rdx,%1 \n\t" \ 123 "adcq $0,%2 \n\t" \ 124 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 125 126 #define SQRADDSC(i, j) \ 127 asm( \ 128 "movq %6,%%rax \n\t" \ 129 "mulq %7 \n\t" \ 130 "movq %%rax,%0 \n\t" \ 131 "movq %%rdx,%1 \n\t" \ 132 "xorq %2,%2 \n\t" \ 133 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 134 135 #define SQRADDAC(i, j) \ 136 asm( \ 137 "movq %6,%%rax \n\t" \ 138 "mulq %7 \n\t" \ 139 "addq %%rax,%0 \n\t" \ 140 "adcq %%rdx,%1 \n\t" \ 141 "adcq $0,%2 \n\t" \ 142 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) :"%rax","%rdx","cc"); 143 144 #define SQRADDDB \ 145 asm( \ 146 "addq %6,%0 \n\t" \ 147 "adcq %7,%1 \n\t" \ 148 "adcq %8,%2 \n\t" \ 149 "addq %6,%0 \n\t" \ 150 "adcq %7,%1 \n\t" \ 151 "adcq %8,%2 \n\t" \ 152 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 153 154 #elif defined(TFM_SSE2) 155 156 /* SSE2 Optimized */ 157 #define COMBA_START 158 159 #define CLEAR_CARRY \ 160 c0 = c1 = c2 = 0; 161 162 #define COMBA_STORE(x) \ 163 x = c0; 164 165 #define COMBA_STORE2(x) \ 166 x = c1; 167 168 #define CARRY_FORWARD \ 169 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 170 171 #define COMBA_FINI \ 172 asm("emms"); 173 174 #define SQRADD(i, j) \ 175 asm( \ 176 "movd %6,%%mm0 \n\t" \ 177 "pmuludq %%mm0,%%mm0\n\t" \ 178 "movd %%mm0,%%eax \n\t" \ 179 "psrlq $32,%%mm0 \n\t" \ 180 "addl %%eax,%0 \n\t" \ 181 "movd %%mm0,%%eax \n\t" \ 182 "adcl %%eax,%1 \n\t" \ 183 "adcl $0,%2 \n\t" \ 184 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i) :"%eax","cc"); 185 186 #define SQRADD2(i, j) \ 187 asm( \ 188 "movd %6,%%mm0 \n\t" \ 189 "movd %7,%%mm1 \n\t" \ 190 "pmuludq %%mm1,%%mm0\n\t" \ 191 "movd %%mm0,%%eax \n\t" \ 192 "psrlq $32,%%mm0 \n\t" \ 193 "movd %%mm0,%%edx \n\t" \ 194 "addl %%eax,%0 \n\t" \ 195 "adcl %%edx,%1 \n\t" \ 196 "adcl $0,%2 \n\t" \ 197 "addl %%eax,%0 \n\t" \ 198 "adcl %%edx,%1 \n\t" \ 199 "adcl $0,%2 \n\t" \ 200 :"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc"); 201 202 #define SQRADDSC(i, j) \ 203 asm( \ 204 "movd %6,%%mm0 \n\t" \ 205 "movd %7,%%mm1 \n\t" \ 206 "pmuludq %%mm1,%%mm0\n\t" \ 207 "movd %%mm0,%0 \n\t" \ 208 "psrlq $32,%%mm0 \n\t" \ 209 "movd %%mm0,%1 \n\t" \ 210 "xorl %2,%2 \n\t" \ 211 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j)); 212 213 #define SQRADDAC(i, j) \ 214 asm( \ 215 "movd %6,%%mm0 \n\t" \ 216 "movd %7,%%mm1 \n\t" \ 217 "pmuludq %%mm1,%%mm0\n\t" \ 218 "movd %%mm0,%%eax \n\t" \ 219 "psrlq $32,%%mm0 \n\t" \ 220 "movd %%mm0,%%edx \n\t" \ 221 "addl %%eax,%0 \n\t" \ 222 "adcl %%edx,%1 \n\t" \ 223 "adcl $0,%2 \n\t" \ 224 :"=r"(sc0), "=r"(sc1), "=r"(sc2): "0"(sc0), "1"(sc1), "2"(sc2), "m"(i), "m"(j) :"%eax","%edx","cc"); 225 226 #define SQRADDDB \ 227 asm( \ 228 "addl %6,%0 \n\t" \ 229 "adcl %7,%1 \n\t" \ 230 "adcl %8,%2 \n\t" \ 231 "addl %6,%0 \n\t" \ 232 "adcl %7,%1 \n\t" \ 233 "adcl %8,%2 \n\t" \ 234 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) : "cc"); 235 236 #elif defined(TFM_ARM) 237 238 /* ARM code */ 239 240 #define COMBA_START 241 242 #define CLEAR_CARRY \ 243 c0 = c1 = c2 = 0; 244 245 #define COMBA_STORE(x) \ 246 x = c0; 247 248 #define COMBA_STORE2(x) \ 249 x = c1; 250 251 #define CARRY_FORWARD \ 252 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 253 254 #define COMBA_FINI 255 256 /* multiplies point i and j, updates carry "c1" and digit c2 */ 257 #define SQRADD(i, j) \ 258 asm( \ 259 " UMULL r0,r1,%6,%6 \n\t" \ 260 " ADDS %0,%0,r0 \n\t" \ 261 " ADCS %1,%1,r1 \n\t" \ 262 " ADC %2,%2,#0 \n\t" \ 263 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i) : "r0", "r1", "cc"); 264 265 /* for squaring some of the terms are doubled... */ 266 #define SQRADD2(i, j) \ 267 asm( \ 268 " UMULL r0,r1,%6,%7 \n\t" \ 269 " ADDS %0,%0,r0 \n\t" \ 270 " ADCS %1,%1,r1 \n\t" \ 271 " ADC %2,%2,#0 \n\t" \ 272 " ADDS %0,%0,r0 \n\t" \ 273 " ADCS %1,%1,r1 \n\t" \ 274 " ADC %2,%2,#0 \n\t" \ 275 :"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc"); 276 277 #define SQRADDSC(i, j) \ 278 asm( \ 279 " UMULL %0,%1,%6,%7 \n\t" \ 280 " SUB %2,%2,%2 \n\t" \ 281 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "cc"); 282 283 #define SQRADDAC(i, j) \ 284 asm( \ 285 " UMULL r0,r1,%6,%7 \n\t" \ 286 " ADDS %0,%0,r0 \n\t" \ 287 " ADCS %1,%1,r1 \n\t" \ 288 " ADC %2,%2,#0 \n\t" \ 289 :"=r"(sc0), "=r"(sc1), "=r"(sc2) : "0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j) : "r0", "r1", "cc"); 290 291 #define SQRADDDB \ 292 asm( \ 293 " ADDS %0,%0,%3 \n\t" \ 294 " ADCS %1,%1,%4 \n\t" \ 295 " ADC %2,%2,%5 \n\t" \ 296 " ADDS %0,%0,%3 \n\t" \ 297 " ADCS %1,%1,%4 \n\t" \ 298 " ADC %2,%2,%5 \n\t" \ 299 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 300 301 #elif defined(TFM_PPC32) 302 303 /* PPC32 */ 304 305 #define COMBA_START 306 307 #define CLEAR_CARRY \ 308 c0 = c1 = c2 = 0; 309 310 #define COMBA_STORE(x) \ 311 x = c0; 312 313 #define COMBA_STORE2(x) \ 314 x = c1; 315 316 #define CARRY_FORWARD \ 317 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 318 319 #define COMBA_FINI 320 321 /* multiplies point i and j, updates carry "c1" and digit c2 */ 322 #define SQRADD(i, j) \ 323 asm( \ 324 " mullw 16,%6,%6 \n\t" \ 325 " addc %0,%0,16 \n\t" \ 326 " mulhwu 16,%6,%6 \n\t" \ 327 " adde %1,%1,16 \n\t" \ 328 " addze %2,%2 \n\t" \ 329 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"16","cc"); 330 331 /* for squaring some of the terms are doubled... */ 332 #define SQRADD2(i, j) \ 333 asm( \ 334 " mullw 16,%6,%7 \n\t" \ 335 " mulhwu 17,%6,%7 \n\t" \ 336 " addc %0,%0,16 \n\t" \ 337 " adde %1,%1,17 \n\t" \ 338 " addze %2,%2 \n\t" \ 339 " addc %0,%0,16 \n\t" \ 340 " adde %1,%1,17 \n\t" \ 341 " addze %2,%2 \n\t" \ 342 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16", "17","cc"); 343 344 #define SQRADDSC(i, j) \ 345 asm( \ 346 " mullw %0,%6,%7 \n\t" \ 347 " mulhwu %1,%6,%7 \n\t" \ 348 " xor %2,%2,%2 \n\t" \ 349 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 350 351 #define SQRADDAC(i, j) \ 352 asm( \ 353 " mullw 16,%6,%7 \n\t" \ 354 " addc %0,%0,16 \n\t" \ 355 " mulhwu 16,%6,%7 \n\t" \ 356 " adde %1,%1,16 \n\t" \ 357 " addze %2,%2 \n\t" \ 358 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"16", "cc"); 359 360 #define SQRADDDB \ 361 asm( \ 362 " addc %0,%0,%3 \n\t" \ 363 " adde %1,%1,%4 \n\t" \ 364 " adde %2,%2,%5 \n\t" \ 365 " addc %0,%0,%3 \n\t" \ 366 " adde %1,%1,%4 \n\t" \ 367 " adde %2,%2,%5 \n\t" \ 368 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 369 370 #elif defined(TFM_PPC64) 371 /* PPC64 */ 372 373 #define COMBA_START 374 375 #define CLEAR_CARRY \ 376 c0 = c1 = c2 = 0; 377 378 #define COMBA_STORE(x) \ 379 x = c0; 380 381 #define COMBA_STORE2(x) \ 382 x = c1; 383 384 #define CARRY_FORWARD \ 385 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 386 387 #define COMBA_FINI 388 389 /* multiplies point i and j, updates carry "c1" and digit c2 */ 390 #define SQRADD(i, j) \ 391 asm( \ 392 " mulld r16,%6,%6 \n\t" \ 393 " addc %0,%0,r16 \n\t" \ 394 " mulhdu r16,%6,%6 \n\t" \ 395 " adde %1,%1,r16 \n\t" \ 396 " addze %2,%2 \n\t" \ 397 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r16","cc"); 398 399 /* for squaring some of the terms are doubled... */ 400 #define SQRADD2(i, j) \ 401 asm( \ 402 " mulld r16,%6,%7 \n\t" \ 403 " mulhdu r17,%6,%7 \n\t" \ 404 " addc %0,%0,r16 \n\t" \ 405 " adde %1,%1,r17 \n\t" \ 406 " addze %2,%2 \n\t" \ 407 " addc %0,%0,r16 \n\t" \ 408 " adde %1,%1,r17 \n\t" \ 409 " addze %2,%2 \n\t" \ 410 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16", "r17","cc"); 411 412 #define SQRADDSC(i, j) \ 413 asm( \ 414 " mulld %0,%6,%7 \n\t" \ 415 " mulhdu %1,%6,%7 \n\t" \ 416 " xor %2,%2,%2 \n\t" \ 417 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 418 419 #define SQRADDAC(i, j) \ 420 asm( \ 421 " mulld r16,%6,%7 \n\t" \ 422 " addc %0,%0,r16 \n\t" \ 423 " mulhdu r16,%6,%7 \n\t" \ 424 " adde %1,%1,r16 \n\t" \ 425 " addze %2,%2 \n\t" \ 426 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r16", "cc"); 427 428 #define SQRADDDB \ 429 asm( \ 430 " addc %0,%0,%3 \n\t" \ 431 " adde %1,%1,%4 \n\t" \ 432 " adde %2,%2,%5 \n\t" \ 433 " addc %0,%0,%3 \n\t" \ 434 " adde %1,%1,%4 \n\t" \ 435 " adde %2,%2,%5 \n\t" \ 436 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 437 438 439 #elif defined(TFM_AVR32) 440 441 /* AVR32 */ 442 443 #define COMBA_START 444 445 #define CLEAR_CARRY \ 446 c0 = c1 = c2 = 0; 447 448 #define COMBA_STORE(x) \ 449 x = c0; 450 451 #define COMBA_STORE2(x) \ 452 x = c1; 453 454 #define CARRY_FORWARD \ 455 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 456 457 #define COMBA_FINI 458 459 /* multiplies point i and j, updates carry "c1" and digit c2 */ 460 #define SQRADD(i, j) \ 461 asm( \ 462 " mulu.d r2,%6,%6 \n\t" \ 463 " add %0,%0,r2 \n\t" \ 464 " adc %1,%1,r3 \n\t" \ 465 " acr %2 \n\t" \ 466 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"r2","r3"); 467 468 /* for squaring some of the terms are doubled... */ 469 #define SQRADD2(i, j) \ 470 asm( \ 471 " mulu.d r2,%6,%7 \n\t" \ 472 " add %0,%0,r2 \n\t" \ 473 " adc %1,%1,r3 \n\t" \ 474 " acr %2, \n\t" \ 475 " add %0,%0,r2 \n\t" \ 476 " adc %1,%1,r3 \n\t" \ 477 " acr %2, \n\t" \ 478 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2", "r3"); 479 480 #define SQRADDSC(i, j) \ 481 asm( \ 482 " mulu.d r2,%6,%7 \n\t" \ 483 " mov %0,r2 \n\t" \ 484 " mov %1,r3 \n\t" \ 485 " eor %2,%2 \n\t" \ 486 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "r2", "r3"); 487 488 #define SQRADDAC(i, j) \ 489 asm( \ 490 " mulu.d r2,%6,%7 \n\t" \ 491 " add %0,%0,r2 \n\t" \ 492 " adc %1,%1,r3 \n\t" \ 493 " acr %2 \n\t" \ 494 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"r2", "r3"); 495 496 #define SQRADDDB \ 497 asm( \ 498 " add %0,%0,%3 \n\t" \ 499 " adc %1,%1,%4 \n\t" \ 500 " adc %2,%2,%5 \n\t" \ 501 " add %0,%0,%3 \n\t" \ 502 " adc %1,%1,%4 \n\t" \ 503 " adc %2,%2,%5 \n\t" \ 504 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "cc"); 505 506 #elif defined(TFM_MIPS) 507 508 /* MIPS */ 509 510 #define COMBA_START 511 512 #define CLEAR_CARRY \ 513 c0 = c1 = c2 = 0; 514 515 #define COMBA_STORE(x) \ 516 x = c0; 517 518 #define COMBA_STORE2(x) \ 519 x = c1; 520 521 #define CARRY_FORWARD \ 522 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 523 524 #define COMBA_FINI 525 526 /* multiplies point i and j, updates carry "c1" and digit c2 */ 527 #define SQRADD(i, j) \ 528 asm( \ 529 " multu %6,%6 \n\t" \ 530 " mflo $12 \n\t" \ 531 " mfhi $13 \n\t" \ 532 " addu %0,%0,$12 \n\t" \ 533 " sltu $12,%0,$12 \n\t" \ 534 " addu %1,%1,$13 \n\t" \ 535 " sltu $13,%1,$13 \n\t" \ 536 " addu %1,%1,$12 \n\t" \ 537 " sltu $12,%1,$12 \n\t" \ 538 " addu %2,%2,$13 \n\t" \ 539 " addu %2,%2,$12 \n\t" \ 540 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i):"$12","$13"); 541 542 /* for squaring some of the terms are doubled... */ 543 #define SQRADD2(i, j) \ 544 asm( \ 545 " multu %6,%7 \n\t" \ 546 " mflo $12 \n\t" \ 547 " mfhi $13 \n\t" \ 548 \ 549 " addu %0,%0,$12 \n\t" \ 550 " sltu $14,%0,$12 \n\t" \ 551 " addu %1,%1,$13 \n\t" \ 552 " sltu $15,%1,$13 \n\t" \ 553 " addu %1,%1,$14 \n\t" \ 554 " sltu $14,%1,$14 \n\t" \ 555 " addu %2,%2,$15 \n\t" \ 556 " addu %2,%2,$14 \n\t" \ 557 \ 558 " addu %0,%0,$12 \n\t" \ 559 " sltu $14,%0,$12 \n\t" \ 560 " addu %1,%1,$13 \n\t" \ 561 " sltu $15,%1,$13 \n\t" \ 562 " addu %1,%1,$14 \n\t" \ 563 " sltu $14,%1,$14 \n\t" \ 564 " addu %2,%2,$15 \n\t" \ 565 " addu %2,%2,$14 \n\t" \ 566 :"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12", "$13", "$14", "$15"); 567 568 #define SQRADDSC(i, j) \ 569 asm( \ 570 " multu %6,%7 \n\t" \ 571 " mflo %0 \n\t" \ 572 " mfhi %1 \n\t" \ 573 " xor %2,%2,%2 \n\t" \ 574 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i),"r"(j) : "cc"); 575 576 #define SQRADDAC(i, j) \ 577 asm( \ 578 " multu %6,%7 \n\t" \ 579 " mflo $12 \n\t" \ 580 " mfhi $13 \n\t" \ 581 " addu %0,%0,$12 \n\t" \ 582 " sltu $12,%0,$12 \n\t" \ 583 " addu %1,%1,$13 \n\t" \ 584 " sltu $13,%1,$13 \n\t" \ 585 " addu %1,%1,$12 \n\t" \ 586 " sltu $12,%1,$12 \n\t" \ 587 " addu %2,%2,$13 \n\t" \ 588 " addu %2,%2,$12 \n\t" \ 589 :"=r"(sc0), "=r"(sc1), "=r"(sc2):"0"(sc0), "1"(sc1), "2"(sc2), "r"(i), "r"(j):"$12", "$13", "$14"); 590 591 #define SQRADDDB \ 592 asm( \ 593 " addu %0,%0,%3 \n\t" \ 594 " sltu $10,%0,%3 \n\t" \ 595 " addu %1,%1,$10 \n\t" \ 596 " sltu $10,%1,$10 \n\t" \ 597 " addu %1,%1,%4 \n\t" \ 598 " sltu $11,%1,%4 \n\t" \ 599 " addu %2,%2,$10 \n\t" \ 600 " addu %2,%2,$11 \n\t" \ 601 " addu %2,%2,%5 \n\t" \ 602 \ 603 " addu %0,%0,%3 \n\t" \ 604 " sltu $10,%0,%3 \n\t" \ 605 " addu %1,%1,$10 \n\t" \ 606 " sltu $10,%1,$10 \n\t" \ 607 " addu %1,%1,%4 \n\t" \ 608 " sltu $11,%1,%4 \n\t" \ 609 " addu %2,%2,$10 \n\t" \ 610 " addu %2,%2,$11 \n\t" \ 611 " addu %2,%2,%5 \n\t" \ 612 :"=r"(c0), "=r"(c1), "=r"(c2) : "r"(sc0), "r"(sc1), "r"(sc2), "0"(c0), "1"(c1), "2"(c2) : "$10", "$11"); 613 614 #else 615 616 #define TFM_ISO 617 618 /* ISO C portable code */ 619 620 #define COMBA_START 621 622 #define CLEAR_CARRY \ 623 c0 = c1 = c2 = 0; 624 625 #define COMBA_STORE(x) \ 626 x = c0; 627 628 #define COMBA_STORE2(x) \ 629 x = c1; 630 631 #define CARRY_FORWARD \ 632 do { c0 = c1; c1 = c2; c2 = 0; } while (0); 633 634 #define COMBA_FINI 635 636 /* multiplies point i and j, updates carry "c1" and digit c2 */ 637 #define SQRADD(i, j) \ 638 do { fp_word t; \ 639 t = c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \ 640 t = c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \ 641 } while (0); 642 643 644 /* for squaring some of the terms are doubled... */ 645 #define SQRADD2(i, j) \ 646 do { fp_word t; \ 647 t = ((fp_word)i) * ((fp_word)j); \ 648 tt = (fp_word)c0 + t; c0 = tt; \ 649 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ 650 tt = (fp_word)c0 + t; c0 = tt; \ 651 tt = (fp_word)c1 + (tt >> DIGIT_BIT); c1 = tt; c2 += tt >> DIGIT_BIT; \ 652 } while (0); 653 654 #define SQRADDSC(i, j) \ 655 do { fp_word t; \ 656 t = ((fp_word)i) * ((fp_word)j); \ 657 sc0 = (fp_digit)t; sc1 = (t >> DIGIT_BIT); sc2 = 0; \ 658 } while (0); 659 660 #define SQRADDAC(i, j) \ 661 do { fp_word t; \ 662 t = sc0 + ((fp_word)i) * ((fp_word)j); sc0 = t; \ 663 t = sc1 + (t >> DIGIT_BIT); sc1 = t; sc2 += t >> DIGIT_BIT; \ 664 } while (0); 665 666 #define SQRADDDB \ 667 do { fp_word t; \ 668 t = ((fp_word)sc0) + ((fp_word)sc0) + c0; c0 = t; \ 669 t = ((fp_word)sc1) + ((fp_word)sc1) + c1 + (t >> DIGIT_BIT); c1 = t; \ 670 c2 = c2 + ((fp_word)sc2) + ((fp_word)sc2) + (t >> DIGIT_BIT); \ 671 } while (0); 672 673 #endif 674 675 /* $Source: /cvs/libtom/tomsfastmath/src/sqr/fp_sqr_comba.c,v $ */ 676 /* $Revision: 1.4 $ */ 677 /* $Date: 2007/03/14 23:47:42 $ */ 678