1/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 2 for the sparc processor. 3 4 These routines are derived from the SPARC Architecture Manual, version 8, 5 slightly edited to match the desired calling convention, and also to 6 optimize them for our purposes. */ 7 8/* An executable stack is *not* required for these functions. */ 9#if defined(__ELF__) && defined(__linux__) 10.section .note.GNU-stack,"",%progbits 11.previous 12#endif 13 14#ifdef L_mulsi3 15.text 16 .align 4 17 .global .umul 18 .proc 4 19.umul: 20 or %o0, %o1, %o4 ! logical or of multiplier and multiplicand 21 mov %o0, %y ! multiplier to Y register 22 andncc %o4, 0xfff, %o5 ! mask out lower 12 bits 23 be mul_shortway ! can do it the short way 24 andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc 25 ! 26 ! long multiply 27 ! 28 mulscc %o4, %o1, %o4 ! first iteration of 33 29 mulscc %o4, %o1, %o4 30 mulscc %o4, %o1, %o4 31 mulscc %o4, %o1, %o4 32 mulscc %o4, %o1, %o4 33 mulscc %o4, %o1, %o4 34 mulscc %o4, %o1, %o4 35 mulscc %o4, %o1, %o4 36 mulscc %o4, %o1, %o4 37 mulscc %o4, %o1, %o4 38 mulscc %o4, %o1, %o4 39 mulscc %o4, %o1, %o4 40 mulscc %o4, %o1, %o4 41 mulscc %o4, %o1, %o4 42 mulscc %o4, %o1, %o4 43 mulscc %o4, %o1, %o4 44 mulscc %o4, %o1, %o4 45 mulscc %o4, %o1, %o4 46 mulscc %o4, %o1, %o4 47 mulscc %o4, %o1, %o4 48 mulscc %o4, %o1, %o4 49 mulscc %o4, %o1, %o4 50 mulscc %o4, %o1, %o4 51 mulscc %o4, %o1, %o4 52 mulscc %o4, %o1, %o4 53 mulscc %o4, %o1, %o4 54 mulscc %o4, %o1, %o4 55 mulscc %o4, %o1, %o4 56 mulscc %o4, %o1, %o4 57 mulscc %o4, %o1, %o4 58 mulscc %o4, %o1, %o4 59 mulscc %o4, %o1, %o4 ! 32nd iteration 60 mulscc %o4, %g0, %o4 ! last iteration only shifts 61 ! the upper 32 bits of product are wrong, but we do not care 62 retl 63 rd %y, %o0 64 ! 65 ! short multiply 66 ! 67mul_shortway: 68 mulscc %o4, %o1, %o4 ! first iteration of 13 69 mulscc %o4, %o1, %o4 70 mulscc %o4, %o1, %o4 71 mulscc %o4, %o1, %o4 72 mulscc %o4, %o1, %o4 73 mulscc %o4, %o1, %o4 74 mulscc %o4, %o1, %o4 75 mulscc %o4, %o1, %o4 76 mulscc %o4, %o1, %o4 77 mulscc %o4, %o1, %o4 78 mulscc %o4, %o1, %o4 79 mulscc %o4, %o1, %o4 ! 12th iteration 80 mulscc %o4, %g0, %o4 ! last iteration only shifts 81 rd %y, %o5 82 sll %o4, 12, %o4 ! left shift partial product by 12 bits 83 srl %o5, 20, %o5 ! right shift partial product by 20 bits 84 retl 85 or %o5, %o4, %o0 ! merge for true product 86#endif 87 88#ifdef L_divsi3 89/* 90 * Division and remainder, from Appendix E of the SPARC Version 8 91 * Architecture Manual, with fixes from Gordon Irlam. 92 */ 93 94/* 95 * Input: dividend and divisor in %o0 and %o1 respectively. 96 * 97 * m4 parameters: 98 * .div name of function to generate 99 * div div=div => %o0 / %o1; div=rem => %o0 % %o1 100 * true true=true => signed; true=false => unsigned 101 * 102 * Algorithm parameters: 103 * N how many bits per iteration we try to get (4) 104 * WORDSIZE total number of bits (32) 105 * 106 * Derived constants: 107 * TOPBITS number of bits in the top decade of a number 108 * 109 * Important variables: 110 * Q the partial quotient under development (initially 0) 111 * R the remainder so far, initially the dividend 112 * ITER number of main division loop iterations required; 113 * equal to ceil(log2(quotient) / N). Note that this 114 * is the log base (2^N) of the quotient. 115 * V the current comparand, initially divisor*2^(ITER*N-1) 116 * 117 * Cost: 118 * Current estimate for non-large dividend is 119 * ceil(log2(quotient) / N) * (10 + 7N/2) + C 120 * A large dividend is one greater than 2^(31-TOPBITS) and takes a 121 * different path, as the upper bits of the quotient must be developed 122 * one bit at a time. 123 */ 124 .global .udiv 125 .align 4 126 .proc 4 127 .text 128.udiv: 129 b ready_to_divide 130 mov 0, %g3 ! result is always positive 131 132 .global .div 133 .align 4 134 .proc 4 135 .text 136.div: 137 ! compute sign of result; if neither is negative, no problem 138 orcc %o1, %o0, %g0 ! either negative? 139 bge ready_to_divide ! no, go do the divide 140 xor %o1, %o0, %g3 ! compute sign in any case 141 tst %o1 142 bge 1f 143 tst %o0 144 ! %o1 is definitely negative; %o0 might also be negative 145 bge ready_to_divide ! if %o0 not negative... 146 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg 1471: ! %o0 is negative, %o1 is nonnegative 148 sub %g0, %o0, %o0 ! make %o0 nonnegative 149 150 151ready_to_divide: 152 153 ! Ready to divide. Compute size of quotient; scale comparand. 154 orcc %o1, %g0, %o5 155 bne 1f 156 mov %o0, %o3 157 158 ! Divide by zero trap. If it returns, return 0 (about as 159 ! wrong as possible, but that is what SunOS does...). 160 ta 0x2 ! ST_DIV0 161 retl 162 clr %o0 163 1641: 165 cmp %o3, %o5 ! if %o1 exceeds %o0, done 166 blu got_result ! (and algorithm fails otherwise) 167 clr %o2 168 sethi %hi(1 << (32 - 4 - 1)), %g1 169 cmp %o3, %g1 170 blu not_really_big 171 clr %o4 172 173 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 174 ! as our usual N-at-a-shot divide step will cause overflow and havoc. 175 ! The number of bits in the result here is N*ITER+SC, where SC <= N. 176 ! Compute ITER in an unorthodox manner: know we need to shift V into 177 ! the top decade: so do not even bother to compare to R. 178 1: 179 cmp %o5, %g1 180 bgeu 3f 181 mov 1, %g2 182 sll %o5, 4, %o5 183 b 1b 184 add %o4, 1, %o4 185 186 ! Now compute %g2. 187 2: addcc %o5, %o5, %o5 188 bcc not_too_big 189 add %g2, 1, %g2 190 191 ! We get here if the %o1 overflowed while shifting. 192 ! This means that %o3 has the high-order bit set. 193 ! Restore %o5 and subtract from %o3. 194 sll %g1, 4, %g1 ! high order bit 195 srl %o5, 1, %o5 ! rest of %o5 196 add %o5, %g1, %o5 197 b do_single_div 198 sub %g2, 1, %g2 199 200 not_too_big: 201 3: cmp %o5, %o3 202 blu 2b 203 nop 204 be do_single_div 205 nop 206 /* NB: these are commented out in the V8-SPARC manual as well */ 207 /* (I do not understand this) */ 208 ! %o5 > %o3: went too far: back up 1 step 209 ! srl %o5, 1, %o5 210 ! dec %g2 211 ! do single-bit divide steps 212 ! 213 ! We have to be careful here. We know that %o3 >= %o5, so we can do the 214 ! first divide step without thinking. BUT, the others are conditional, 215 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 216 ! order bit set in the first step, just falling into the regular 217 ! division loop will mess up the first time around. 218 ! So we unroll slightly... 219 do_single_div: 220 subcc %g2, 1, %g2 221 bl end_regular_divide 222 nop 223 sub %o3, %o5, %o3 224 mov 1, %o2 225 b end_single_divloop 226 nop 227 single_divloop: 228 sll %o2, 1, %o2 229 bl 1f 230 srl %o5, 1, %o5 231 ! %o3 >= 0 232 sub %o3, %o5, %o3 233 b 2f 234 add %o2, 1, %o2 235 1: ! %o3 < 0 236 add %o3, %o5, %o3 237 sub %o2, 1, %o2 238 2: 239 end_single_divloop: 240 subcc %g2, 1, %g2 241 bge single_divloop 242 tst %o3 243 b,a end_regular_divide 244 245not_really_big: 2461: 247 sll %o5, 4, %o5 248 cmp %o5, %o3 249 bleu 1b 250 addcc %o4, 1, %o4 251 be got_result 252 sub %o4, 1, %o4 253 254 tst %o3 ! set up for initial iteration 255divloop: 256 sll %o2, 4, %o2 257 ! depth 1, accumulated bits 0 258 bl L1.16 259 srl %o5,1,%o5 260 ! remainder is positive 261 subcc %o3,%o5,%o3 262 ! depth 2, accumulated bits 1 263 bl L2.17 264 srl %o5,1,%o5 265 ! remainder is positive 266 subcc %o3,%o5,%o3 267 ! depth 3, accumulated bits 3 268 bl L3.19 269 srl %o5,1,%o5 270 ! remainder is positive 271 subcc %o3,%o5,%o3 272 ! depth 4, accumulated bits 7 273 bl L4.23 274 srl %o5,1,%o5 275 ! remainder is positive 276 subcc %o3,%o5,%o3 277 b 9f 278 add %o2, (7*2+1), %o2 279 280L4.23: 281 ! remainder is negative 282 addcc %o3,%o5,%o3 283 b 9f 284 add %o2, (7*2-1), %o2 285 286 287L3.19: 288 ! remainder is negative 289 addcc %o3,%o5,%o3 290 ! depth 4, accumulated bits 5 291 bl L4.21 292 srl %o5,1,%o5 293 ! remainder is positive 294 subcc %o3,%o5,%o3 295 b 9f 296 add %o2, (5*2+1), %o2 297 298L4.21: 299 ! remainder is negative 300 addcc %o3,%o5,%o3 301 b 9f 302 add %o2, (5*2-1), %o2 303 304L2.17: 305 ! remainder is negative 306 addcc %o3,%o5,%o3 307 ! depth 3, accumulated bits 1 308 bl L3.17 309 srl %o5,1,%o5 310 ! remainder is positive 311 subcc %o3,%o5,%o3 312 ! depth 4, accumulated bits 3 313 bl L4.19 314 srl %o5,1,%o5 315 ! remainder is positive 316 subcc %o3,%o5,%o3 317 b 9f 318 add %o2, (3*2+1), %o2 319 320L4.19: 321 ! remainder is negative 322 addcc %o3,%o5,%o3 323 b 9f 324 add %o2, (3*2-1), %o2 325 326L3.17: 327 ! remainder is negative 328 addcc %o3,%o5,%o3 329 ! depth 4, accumulated bits 1 330 bl L4.17 331 srl %o5,1,%o5 332 ! remainder is positive 333 subcc %o3,%o5,%o3 334 b 9f 335 add %o2, (1*2+1), %o2 336 337L4.17: 338 ! remainder is negative 339 addcc %o3,%o5,%o3 340 b 9f 341 add %o2, (1*2-1), %o2 342 343L1.16: 344 ! remainder is negative 345 addcc %o3,%o5,%o3 346 ! depth 2, accumulated bits -1 347 bl L2.15 348 srl %o5,1,%o5 349 ! remainder is positive 350 subcc %o3,%o5,%o3 351 ! depth 3, accumulated bits -1 352 bl L3.15 353 srl %o5,1,%o5 354 ! remainder is positive 355 subcc %o3,%o5,%o3 356 ! depth 4, accumulated bits -1 357 bl L4.15 358 srl %o5,1,%o5 359 ! remainder is positive 360 subcc %o3,%o5,%o3 361 b 9f 362 add %o2, (-1*2+1), %o2 363 364L4.15: 365 ! remainder is negative 366 addcc %o3,%o5,%o3 367 b 9f 368 add %o2, (-1*2-1), %o2 369 370L3.15: 371 ! remainder is negative 372 addcc %o3,%o5,%o3 373 ! depth 4, accumulated bits -3 374 bl L4.13 375 srl %o5,1,%o5 376 ! remainder is positive 377 subcc %o3,%o5,%o3 378 b 9f 379 add %o2, (-3*2+1), %o2 380 381L4.13: 382 ! remainder is negative 383 addcc %o3,%o5,%o3 384 b 9f 385 add %o2, (-3*2-1), %o2 386 387L2.15: 388 ! remainder is negative 389 addcc %o3,%o5,%o3 390 ! depth 3, accumulated bits -3 391 bl L3.13 392 srl %o5,1,%o5 393 ! remainder is positive 394 subcc %o3,%o5,%o3 395 ! depth 4, accumulated bits -5 396 bl L4.11 397 srl %o5,1,%o5 398 ! remainder is positive 399 subcc %o3,%o5,%o3 400 b 9f 401 add %o2, (-5*2+1), %o2 402 403L4.11: 404 ! remainder is negative 405 addcc %o3,%o5,%o3 406 b 9f 407 add %o2, (-5*2-1), %o2 408 409L3.13: 410 ! remainder is negative 411 addcc %o3,%o5,%o3 412 ! depth 4, accumulated bits -7 413 bl L4.9 414 srl %o5,1,%o5 415 ! remainder is positive 416 subcc %o3,%o5,%o3 417 b 9f 418 add %o2, (-7*2+1), %o2 419 420L4.9: 421 ! remainder is negative 422 addcc %o3,%o5,%o3 423 b 9f 424 add %o2, (-7*2-1), %o2 425 426 9: 427end_regular_divide: 428 subcc %o4, 1, %o4 429 bge divloop 430 tst %o3 431 bl,a got_result 432 ! non-restoring fixup here (one instruction only!) 433 sub %o2, 1, %o2 434 435 436got_result: 437 ! check to see if answer should be < 0 438 tst %g3 439 bl,a 1f 440 sub %g0, %o2, %o2 4411: 442 retl 443 mov %o2, %o0 444#endif 445 446#ifdef L_modsi3 447/* This implementation was taken from glibc: 448 * 449 * Input: dividend and divisor in %o0 and %o1 respectively. 450 * 451 * Algorithm parameters: 452 * N how many bits per iteration we try to get (4) 453 * WORDSIZE total number of bits (32) 454 * 455 * Derived constants: 456 * TOPBITS number of bits in the top decade of a number 457 * 458 * Important variables: 459 * Q the partial quotient under development (initially 0) 460 * R the remainder so far, initially the dividend 461 * ITER number of main division loop iterations required; 462 * equal to ceil(log2(quotient) / N). Note that this 463 * is the log base (2^N) of the quotient. 464 * V the current comparand, initially divisor*2^(ITER*N-1) 465 * 466 * Cost: 467 * Current estimate for non-large dividend is 468 * ceil(log2(quotient) / N) * (10 + 7N/2) + C 469 * A large dividend is one greater than 2^(31-TOPBITS) and takes a 470 * different path, as the upper bits of the quotient must be developed 471 * one bit at a time. 472 */ 473.text 474 .align 4 475 .global .urem 476 .proc 4 477.urem: 478 b divide 479 mov 0, %g3 ! result always positive 480 481 .align 4 482 .global .rem 483 .proc 4 484.rem: 485 ! compute sign of result; if neither is negative, no problem 486 orcc %o1, %o0, %g0 ! either negative? 487 bge 2f ! no, go do the divide 488 mov %o0, %g3 ! sign of remainder matches %o0 489 tst %o1 490 bge 1f 491 tst %o0 492 ! %o1 is definitely negative; %o0 might also be negative 493 bge 2f ! if %o0 not negative... 494 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg 4951: ! %o0 is negative, %o1 is nonnegative 496 sub %g0, %o0, %o0 ! make %o0 nonnegative 4972: 498 499 ! Ready to divide. Compute size of quotient; scale comparand. 500divide: 501 orcc %o1, %g0, %o5 502 bne 1f 503 mov %o0, %o3 504 505 ! Divide by zero trap. If it returns, return 0 (about as 506 ! wrong as possible, but that is what SunOS does...). 507 ta 0x2 !ST_DIV0 508 retl 509 clr %o0 510 5111: 512 cmp %o3, %o5 ! if %o1 exceeds %o0, done 513 blu got_result ! (and algorithm fails otherwise) 514 clr %o2 515 sethi %hi(1 << (32 - 4 - 1)), %g1 516 cmp %o3, %g1 517 blu not_really_big 518 clr %o4 519 520 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 521 ! as our usual N-at-a-shot divide step will cause overflow and havoc. 522 ! The number of bits in the result here is N*ITER+SC, where SC <= N. 523 ! Compute ITER in an unorthodox manner: know we need to shift V into 524 ! the top decade: so do not even bother to compare to R. 525 1: 526 cmp %o5, %g1 527 bgeu 3f 528 mov 1, %g2 529 sll %o5, 4, %o5 530 b 1b 531 add %o4, 1, %o4 532 533 ! Now compute %g2. 534 2: addcc %o5, %o5, %o5 535 bcc not_too_big 536 add %g2, 1, %g2 537 538 ! We get here if the %o1 overflowed while shifting. 539 ! This means that %o3 has the high-order bit set. 540 ! Restore %o5 and subtract from %o3. 541 sll %g1, 4, %g1 ! high order bit 542 srl %o5, 1, %o5 ! rest of %o5 543 add %o5, %g1, %o5 544 b do_single_div 545 sub %g2, 1, %g2 546 547 not_too_big: 548 3: cmp %o5, %o3 549 blu 2b 550 nop 551 be do_single_div 552 nop 553 /* NB: these are commented out in the V8-SPARC manual as well */ 554 /* (I do not understand this) */ 555 ! %o5 > %o3: went too far: back up 1 step 556 ! srl %o5, 1, %o5 557 ! dec %g2 558 ! do single-bit divide steps 559 ! 560 ! We have to be careful here. We know that %o3 >= %o5, so we can do the 561 ! first divide step without thinking. BUT, the others are conditional, 562 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 563 ! order bit set in the first step, just falling into the regular 564 ! division loop will mess up the first time around. 565 ! So we unroll slightly... 566 do_single_div: 567 subcc %g2, 1, %g2 568 bl end_regular_divide 569 nop 570 sub %o3, %o5, %o3 571 mov 1, %o2 572 b end_single_divloop 573 nop 574 single_divloop: 575 sll %o2, 1, %o2 576 bl 1f 577 srl %o5, 1, %o5 578 ! %o3 >= 0 579 sub %o3, %o5, %o3 580 b 2f 581 add %o2, 1, %o2 582 1: ! %o3 < 0 583 add %o3, %o5, %o3 584 sub %o2, 1, %o2 585 2: 586 end_single_divloop: 587 subcc %g2, 1, %g2 588 bge single_divloop 589 tst %o3 590 b,a end_regular_divide 591 592not_really_big: 5931: 594 sll %o5, 4, %o5 595 cmp %o5, %o3 596 bleu 1b 597 addcc %o4, 1, %o4 598 be got_result 599 sub %o4, 1, %o4 600 601 tst %o3 ! set up for initial iteration 602divloop: 603 sll %o2, 4, %o2 604 ! depth 1, accumulated bits 0 605 bl L1.16 606 srl %o5,1,%o5 607 ! remainder is positive 608 subcc %o3,%o5,%o3 609 ! depth 2, accumulated bits 1 610 bl L2.17 611 srl %o5,1,%o5 612 ! remainder is positive 613 subcc %o3,%o5,%o3 614 ! depth 3, accumulated bits 3 615 bl L3.19 616 srl %o5,1,%o5 617 ! remainder is positive 618 subcc %o3,%o5,%o3 619 ! depth 4, accumulated bits 7 620 bl L4.23 621 srl %o5,1,%o5 622 ! remainder is positive 623 subcc %o3,%o5,%o3 624 b 9f 625 add %o2, (7*2+1), %o2 626L4.23: 627 ! remainder is negative 628 addcc %o3,%o5,%o3 629 b 9f 630 add %o2, (7*2-1), %o2 631 632L3.19: 633 ! remainder is negative 634 addcc %o3,%o5,%o3 635 ! depth 4, accumulated bits 5 636 bl L4.21 637 srl %o5,1,%o5 638 ! remainder is positive 639 subcc %o3,%o5,%o3 640 b 9f 641 add %o2, (5*2+1), %o2 642 643L4.21: 644 ! remainder is negative 645 addcc %o3,%o5,%o3 646 b 9f 647 add %o2, (5*2-1), %o2 648 649L2.17: 650 ! remainder is negative 651 addcc %o3,%o5,%o3 652 ! depth 3, accumulated bits 1 653 bl L3.17 654 srl %o5,1,%o5 655 ! remainder is positive 656 subcc %o3,%o5,%o3 657 ! depth 4, accumulated bits 3 658 bl L4.19 659 srl %o5,1,%o5 660 ! remainder is positive 661 subcc %o3,%o5,%o3 662 b 9f 663 add %o2, (3*2+1), %o2 664 665L4.19: 666 ! remainder is negative 667 addcc %o3,%o5,%o3 668 b 9f 669 add %o2, (3*2-1), %o2 670 671L3.17: 672 ! remainder is negative 673 addcc %o3,%o5,%o3 674 ! depth 4, accumulated bits 1 675 bl L4.17 676 srl %o5,1,%o5 677 ! remainder is positive 678 subcc %o3,%o5,%o3 679 b 9f 680 add %o2, (1*2+1), %o2 681 682L4.17: 683 ! remainder is negative 684 addcc %o3,%o5,%o3 685 b 9f 686 add %o2, (1*2-1), %o2 687 688L1.16: 689 ! remainder is negative 690 addcc %o3,%o5,%o3 691 ! depth 2, accumulated bits -1 692 bl L2.15 693 srl %o5,1,%o5 694 ! remainder is positive 695 subcc %o3,%o5,%o3 696 ! depth 3, accumulated bits -1 697 bl L3.15 698 srl %o5,1,%o5 699 ! remainder is positive 700 subcc %o3,%o5,%o3 701 ! depth 4, accumulated bits -1 702 bl L4.15 703 srl %o5,1,%o5 704 ! remainder is positive 705 subcc %o3,%o5,%o3 706 b 9f 707 add %o2, (-1*2+1), %o2 708 709L4.15: 710 ! remainder is negative 711 addcc %o3,%o5,%o3 712 b 9f 713 add %o2, (-1*2-1), %o2 714 715L3.15: 716 ! remainder is negative 717 addcc %o3,%o5,%o3 718 ! depth 4, accumulated bits -3 719 bl L4.13 720 srl %o5,1,%o5 721 ! remainder is positive 722 subcc %o3,%o5,%o3 723 b 9f 724 add %o2, (-3*2+1), %o2 725 726L4.13: 727 ! remainder is negative 728 addcc %o3,%o5,%o3 729 b 9f 730 add %o2, (-3*2-1), %o2 731 732L2.15: 733 ! remainder is negative 734 addcc %o3,%o5,%o3 735 ! depth 3, accumulated bits -3 736 bl L3.13 737 srl %o5,1,%o5 738 ! remainder is positive 739 subcc %o3,%o5,%o3 740 ! depth 4, accumulated bits -5 741 bl L4.11 742 srl %o5,1,%o5 743 ! remainder is positive 744 subcc %o3,%o5,%o3 745 b 9f 746 add %o2, (-5*2+1), %o2 747 748L4.11: 749 ! remainder is negative 750 addcc %o3,%o5,%o3 751 b 9f 752 add %o2, (-5*2-1), %o2 753 754L3.13: 755 ! remainder is negative 756 addcc %o3,%o5,%o3 757 ! depth 4, accumulated bits -7 758 bl L4.9 759 srl %o5,1,%o5 760 ! remainder is positive 761 subcc %o3,%o5,%o3 762 b 9f 763 add %o2, (-7*2+1), %o2 764 765L4.9: 766 ! remainder is negative 767 addcc %o3,%o5,%o3 768 b 9f 769 add %o2, (-7*2-1), %o2 770 771 9: 772end_regular_divide: 773 subcc %o4, 1, %o4 774 bge divloop 775 tst %o3 776 bl,a got_result 777 ! non-restoring fixup here (one instruction only!) 778 add %o3, %o1, %o3 779 780got_result: 781 ! check to see if answer should be < 0 782 tst %g3 783 bl,a 1f 784 sub %g0, %o3, %o3 7851: 786 retl 787 mov %o3, %o0 788 789#endif 790 791