1/* ieee754-df.S double-precision floating point support for ARM 2 3 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. 4 Contributed by Nicolas Pitre (nico@cam.org) 5 6 This file is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by the 8 Free Software Foundation; either version 2, or (at your option) any 9 later version. 10 11 In addition to the permissions in the GNU General Public License, the 12 Free Software Foundation gives you unlimited permission to link the 13 compiled version of this file into combinations with other programs, 14 and to distribute those combinations without any restriction coming 15 from the use of this file. (The General Public License restrictions 16 do apply in other respects; for example, they cover modification of 17 the file, and distribution when not linked into a combine 18 executable.) 19 20 This file is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of 22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23 General Public License for more details. 24 25 You should have received a copy of the GNU General Public License 26 along with this program; see the file COPYING. If not, write to 27 the Free Software Foundation, 51 Franklin Street, Fifth Floor, 28 Boston, MA 02110-1301, USA. */ 29 30/* 31 * Notes: 32 * 33 * The goal of this code is to be as fast as possible. This is 34 * not meant to be easy to understand for the casual reader. 35 * For slightly simpler code please see the single precision version 36 * of this file. 37 * 38 * Only the default rounding mode is intended for best performances. 39 * Exceptions aren't supported yet, but that can be added quite easily 40 * if necessary without impacting performances. 41 */ 42 43 44@ For FPA, float words are always big-endian. 45@ For VFP, floats words follow the memory system mode. 46#if defined(__VFP_FP__) && !defined(__ARMEB__) 47#define xl r0 48#define xh r1 49#define yl r2 50#define yh r3 51#else 52#define xh r0 53#define xl r1 54#define yh r2 55#define yl r3 56#endif 57 58 59#ifdef L_negdf2 60 61ARM_FUNC_START negdf2 62ARM_FUNC_ALIAS aeabi_dneg negdf2 63 64 @ flip sign bit 65 eor xh, xh, #0x80000000 66 RET 67 68 FUNC_END aeabi_dneg 69 FUNC_END negdf2 70 71#endif 72 73#ifdef L_addsubdf3 74 75ARM_FUNC_START aeabi_drsub 76 77 eor xh, xh, #0x80000000 @ flip sign bit of first arg 78 b 1f 79 80ARM_FUNC_START subdf3 81ARM_FUNC_ALIAS aeabi_dsub subdf3 82 83 eor yh, yh, #0x80000000 @ flip sign bit of second arg 84#if defined(__INTERWORKING_STUBS__) 85 b 1f @ Skip Thumb-code prologue 86#endif 87 88ARM_FUNC_START adddf3 89ARM_FUNC_ALIAS aeabi_dadd adddf3 90 911: stmfd sp!, {r4, r5, lr} 92 93 @ Look for zeroes, equal values, INF, or NAN. 94 mov r4, xh, lsl #1 95 mov r5, yh, lsl #1 96 teq r4, r5 97 teqeq xl, yl 98 orrnes ip, r4, xl 99 orrnes ip, r5, yl 100 mvnnes ip, r4, asr #21 101 mvnnes ip, r5, asr #21 102 beq LSYM(Lad_s) 103 104 @ Compute exponent difference. Make largest exponent in r4, 105 @ corresponding arg in xh-xl, and positive exponent difference in r5. 106 mov r4, r4, lsr #21 107 rsbs r5, r4, r5, lsr #21 108 rsblt r5, r5, #0 109 ble 1f 110 add r4, r4, r5 111 eor yl, xl, yl 112 eor yh, xh, yh 113 eor xl, yl, xl 114 eor xh, yh, xh 115 eor yl, xl, yl 116 eor yh, xh, yh 1171: 118 @ If exponent difference is too large, return largest argument 119 @ already in xh-xl. We need up to 54 bit to handle proper rounding 120 @ of 0x1p54 - 1.1. 121 cmp r5, #54 122 RETLDM "r4, r5" hi 123 124 @ Convert mantissa to signed integer. 125 tst xh, #0x80000000 126 mov xh, xh, lsl #12 127 mov ip, #0x00100000 128 orr xh, ip, xh, lsr #12 129 beq 1f 130 rsbs xl, xl, #0 131 rsc xh, xh, #0 1321: 133 tst yh, #0x80000000 134 mov yh, yh, lsl #12 135 orr yh, ip, yh, lsr #12 136 beq 1f 137 rsbs yl, yl, #0 138 rsc yh, yh, #0 1391: 140 @ If exponent == difference, one or both args were denormalized. 141 @ Since this is not common case, rescale them off line. 142 teq r4, r5 143 beq LSYM(Lad_d) 144LSYM(Lad_x): 145 146 @ Compensate for the exponent overlapping the mantissa MSB added later 147 sub r4, r4, #1 148 149 @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. 150 rsbs lr, r5, #32 151 blt 1f 152 mov ip, yl, lsl lr 153 adds xl, xl, yl, lsr r5 154 adc xh, xh, #0 155 adds xl, xl, yh, lsl lr 156 adcs xh, xh, yh, asr r5 157 b 2f 1581: sub r5, r5, #32 159 add lr, lr, #32 160 cmp yl, #1 161 mov ip, yh, lsl lr 162 orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later 163 adds xl, xl, yh, asr r5 164 adcs xh, xh, yh, asr #31 1652: 166 @ We now have a result in xh-xl-ip. 167 @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) 168 and r5, xh, #0x80000000 169 bpl LSYM(Lad_p) 170 rsbs ip, ip, #0 171 rscs xl, xl, #0 172 rsc xh, xh, #0 173 174 @ Determine how to normalize the result. 175LSYM(Lad_p): 176 cmp xh, #0x00100000 177 bcc LSYM(Lad_a) 178 cmp xh, #0x00200000 179 bcc LSYM(Lad_e) 180 181 @ Result needs to be shifted right. 182 movs xh, xh, lsr #1 183 movs xl, xl, rrx 184 mov ip, ip, rrx 185 add r4, r4, #1 186 187 @ Make sure we did not bust our exponent. 188 mov r2, r4, lsl #21 189 cmn r2, #(2 << 21) 190 bcs LSYM(Lad_o) 191 192 @ Our result is now properly aligned into xh-xl, remaining bits in ip. 193 @ Round with MSB of ip. If halfway between two numbers, round towards 194 @ LSB of xl = 0. 195 @ Pack final result together. 196LSYM(Lad_e): 197 cmp ip, #0x80000000 198 moveqs ip, xl, lsr #1 199 adcs xl, xl, #0 200 adc xh, xh, r4, lsl #20 201 orr xh, xh, r5 202 RETLDM "r4, r5" 203 204 @ Result must be shifted left and exponent adjusted. 205LSYM(Lad_a): 206 movs ip, ip, lsl #1 207 adcs xl, xl, xl 208 adc xh, xh, xh 209 tst xh, #0x00100000 210 sub r4, r4, #1 211 bne LSYM(Lad_e) 212 213 @ No rounding necessary since ip will always be 0 at this point. 214LSYM(Lad_l): 215 216#if __ARM_ARCH__ < 5 217 218 teq xh, #0 219 movne r3, #20 220 moveq r3, #52 221 moveq xh, xl 222 moveq xl, #0 223 mov r2, xh 224 cmp r2, #(1 << 16) 225 movhs r2, r2, lsr #16 226 subhs r3, r3, #16 227 cmp r2, #(1 << 8) 228 movhs r2, r2, lsr #8 229 subhs r3, r3, #8 230 cmp r2, #(1 << 4) 231 movhs r2, r2, lsr #4 232 subhs r3, r3, #4 233 cmp r2, #(1 << 2) 234 subhs r3, r3, #2 235 sublo r3, r3, r2, lsr #1 236 sub r3, r3, r2, lsr #3 237 238#else 239 240 teq xh, #0 241 moveq xh, xl 242 moveq xl, #0 243 clz r3, xh 244 addeq r3, r3, #32 245 sub r3, r3, #11 246 247#endif 248 249 @ determine how to shift the value. 250 subs r2, r3, #32 251 bge 2f 252 adds r2, r2, #12 253 ble 1f 254 255 @ shift value left 21 to 31 bits, or actually right 11 to 1 bits 256 @ since a register switch happened above. 257 add ip, r2, #20 258 rsb r2, r2, #12 259 mov xl, xh, lsl ip 260 mov xh, xh, lsr r2 261 b 3f 262 263 @ actually shift value left 1 to 20 bits, which might also represent 264 @ 32 to 52 bits if counting the register switch that happened earlier. 2651: add r2, r2, #20 2662: rsble ip, r2, #32 267 mov xh, xh, lsl r2 268 orrle xh, xh, xl, lsr ip 269 movle xl, xl, lsl r2 270 271 @ adjust exponent accordingly. 2723: subs r4, r4, r3 273 addge xh, xh, r4, lsl #20 274 orrge xh, xh, r5 275 RETLDM "r4, r5" ge 276 277 @ Exponent too small, denormalize result. 278 @ Find out proper shift value. 279 mvn r4, r4 280 subs r4, r4, #31 281 bge 2f 282 adds r4, r4, #12 283 bgt 1f 284 285 @ shift result right of 1 to 20 bits, sign is in r5. 286 add r4, r4, #20 287 rsb r2, r4, #32 288 mov xl, xl, lsr r4 289 orr xl, xl, xh, lsl r2 290 orr xh, r5, xh, lsr r4 291 RETLDM "r4, r5" 292 293 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after 294 @ a register switch from xh to xl. 2951: rsb r4, r4, #12 296 rsb r2, r4, #32 297 mov xl, xl, lsr r2 298 orr xl, xl, xh, lsl r4 299 mov xh, r5 300 RETLDM "r4, r5" 301 302 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch 303 @ from xh to xl. 3042: mov xl, xh, lsr r4 305 mov xh, r5 306 RETLDM "r4, r5" 307 308 @ Adjust exponents for denormalized arguments. 309 @ Note that r4 must not remain equal to 0. 310LSYM(Lad_d): 311 teq r4, #0 312 eor yh, yh, #0x00100000 313 eoreq xh, xh, #0x00100000 314 addeq r4, r4, #1 315 subne r5, r5, #1 316 b LSYM(Lad_x) 317 318 319LSYM(Lad_s): 320 mvns ip, r4, asr #21 321 mvnnes ip, r5, asr #21 322 beq LSYM(Lad_i) 323 324 teq r4, r5 325 teqeq xl, yl 326 beq 1f 327 328 @ Result is x + 0.0 = x or 0.0 + y = y. 329 orrs ip, r4, xl 330 moveq xh, yh 331 moveq xl, yl 332 RETLDM "r4, r5" 333 3341: teq xh, yh 335 336 @ Result is x - x = 0. 337 movne xh, #0 338 movne xl, #0 339 RETLDM "r4, r5" ne 340 341 @ Result is x + x = 2x. 342 movs ip, r4, lsr #21 343 bne 2f 344 movs xl, xl, lsl #1 345 adcs xh, xh, xh 346 orrcs xh, xh, #0x80000000 347 RETLDM "r4, r5" 3482: adds r4, r4, #(2 << 21) 349 addcc xh, xh, #(1 << 20) 350 RETLDM "r4, r5" cc 351 and r5, xh, #0x80000000 352 353 @ Overflow: return INF. 354LSYM(Lad_o): 355 orr xh, r5, #0x7f000000 356 orr xh, xh, #0x00f00000 357 mov xl, #0 358 RETLDM "r4, r5" 359 360 @ At least one of x or y is INF/NAN. 361 @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) 362 @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) 363 @ if either is NAN: return NAN 364 @ if opposite sign: return NAN 365 @ otherwise return xh-xl (which is INF or -INF) 366LSYM(Lad_i): 367 mvns ip, r4, asr #21 368 movne xh, yh 369 movne xl, yl 370 mvneqs ip, r5, asr #21 371 movne yh, xh 372 movne yl, xl 373 orrs r4, xl, xh, lsl #12 374 orreqs r5, yl, yh, lsl #12 375 teqeq xh, yh 376 orrne xh, xh, #0x00080000 @ quiet NAN 377 RETLDM "r4, r5" 378 379 FUNC_END aeabi_dsub 380 FUNC_END subdf3 381 FUNC_END aeabi_dadd 382 FUNC_END adddf3 383 384ARM_FUNC_START floatunsidf 385ARM_FUNC_ALIAS aeabi_ui2d floatunsidf 386 387 teq r0, #0 388 moveq r1, #0 389 RETc(eq) 390 stmfd sp!, {r4, r5, lr} 391 mov r4, #0x400 @ initial exponent 392 add r4, r4, #(52-1 - 1) 393 mov r5, #0 @ sign bit is 0 394 .ifnc xl, r0 395 mov xl, r0 396 .endif 397 mov xh, #0 398 b LSYM(Lad_l) 399 400 FUNC_END aeabi_ui2d 401 FUNC_END floatunsidf 402 403ARM_FUNC_START floatsidf 404ARM_FUNC_ALIAS aeabi_i2d floatsidf 405 406 teq r0, #0 407 moveq r1, #0 408 RETc(eq) 409 stmfd sp!, {r4, r5, lr} 410 mov r4, #0x400 @ initial exponent 411 add r4, r4, #(52-1 - 1) 412 ands r5, r0, #0x80000000 @ sign bit in r5 413 rsbmi r0, r0, #0 @ absolute value 414 .ifnc xl, r0 415 mov xl, r0 416 .endif 417 mov xh, #0 418 b LSYM(Lad_l) 419 420 FUNC_END aeabi_i2d 421 FUNC_END floatsidf 422 423ARM_FUNC_START extendsfdf2 424ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 425 426 movs r2, r0, lsl #1 @ toss sign bit 427 mov xh, r2, asr #3 @ stretch exponent 428 mov xh, xh, rrx @ retrieve sign bit 429 mov xl, r2, lsl #28 @ retrieve remaining bits 430 andnes r3, r2, #0xff000000 @ isolate exponent 431 teqne r3, #0xff000000 @ if not 0, check if INF or NAN 432 eorne xh, xh, #0x38000000 @ fixup exponent otherwise. 433 RETc(ne) @ and return it. 434 435 teq r2, #0 @ if actually 0 436 teqne r3, #0xff000000 @ or INF or NAN 437 RETc(eq) @ we are done already. 438 439 @ value was denormalized. We can normalize it now. 440 stmfd sp!, {r4, r5, lr} 441 mov r4, #0x380 @ setup corresponding exponent 442 and r5, xh, #0x80000000 @ move sign bit in r5 443 bic xh, xh, #0x80000000 444 b LSYM(Lad_l) 445 446 FUNC_END aeabi_f2d 447 FUNC_END extendsfdf2 448 449ARM_FUNC_START floatundidf 450ARM_FUNC_ALIAS aeabi_ul2d floatundidf 451 452 orrs r2, r0, r1 453#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 454 mvfeqd f0, #0.0 455#endif 456 RETc(eq) 457 458#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 459 @ For hard FPA code we want to return via the tail below so that 460 @ we can return the result in f0 as well as in r0/r1 for backwards 461 @ compatibility. 462 adr ip, LSYM(f0_ret) 463 stmfd sp!, {r4, r5, ip, lr} 464#else 465 stmfd sp!, {r4, r5, lr} 466#endif 467 468 mov r5, #0 469 b 2f 470 471ARM_FUNC_START floatdidf 472ARM_FUNC_ALIAS aeabi_l2d floatdidf 473 474 orrs r2, r0, r1 475#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 476 mvfeqd f0, #0.0 477#endif 478 RETc(eq) 479 480#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 481 @ For hard FPA code we want to return via the tail below so that 482 @ we can return the result in f0 as well as in r0/r1 for backwards 483 @ compatibility. 484 adr ip, LSYM(f0_ret) 485 stmfd sp!, {r4, r5, ip, lr} 486#else 487 stmfd sp!, {r4, r5, lr} 488#endif 489 490 ands r5, ah, #0x80000000 @ sign bit in r5 491 bpl 2f 492 rsbs al, al, #0 493 rsc ah, ah, #0 4942: 495 mov r4, #0x400 @ initial exponent 496 add r4, r4, #(52-1 - 1) 497 498 @ FPA little-endian: must swap the word order. 499 .ifnc xh, ah 500 mov ip, al 501 mov xh, ah 502 mov xl, ip 503 .endif 504 505 movs ip, xh, lsr #22 506 beq LSYM(Lad_p) 507 508 @ The value is too big. Scale it down a bit... 509 mov r2, #3 510 movs ip, ip, lsr #3 511 addne r2, r2, #3 512 movs ip, ip, lsr #3 513 addne r2, r2, #3 514 add r2, r2, ip, lsr #3 515 516 rsb r3, r2, #32 517 mov ip, xl, lsl r3 518 mov xl, xl, lsr r2 519 orr xl, xl, xh, lsl r3 520 mov xh, xh, lsr r2 521 add r4, r4, r2 522 b LSYM(Lad_p) 523 524#if !defined (__VFP_FP__) && !defined(__SOFTFP__) 525 526 @ Legacy code expects the result to be returned in f0. Copy it 527 @ there as well. 528LSYM(f0_ret): 529 stmfd sp!, {r0, r1} 530 ldfd f0, [sp], #8 531 RETLDM 532 533#endif 534 535 FUNC_END floatdidf 536 FUNC_END aeabi_l2d 537 FUNC_END floatundidf 538 FUNC_END aeabi_ul2d 539 540#endif /* L_addsubdf3 */ 541 542#ifdef L_muldivdf3 543 544ARM_FUNC_START muldf3 545ARM_FUNC_ALIAS aeabi_dmul muldf3 546 stmfd sp!, {r4, r5, r6, lr} 547 548 @ Mask out exponents, trap any zero/denormal/INF/NAN. 549 mov ip, #0xff 550 orr ip, ip, #0x700 551 ands r4, ip, xh, lsr #20 552 andnes r5, ip, yh, lsr #20 553 teqne r4, ip 554 teqne r5, ip 555 bleq LSYM(Lml_s) 556 557 @ Add exponents together 558 add r4, r4, r5 559 560 @ Determine final sign. 561 eor r6, xh, yh 562 563 @ Convert mantissa to unsigned integer. 564 @ If power of two, branch to a separate path. 565 bic xh, xh, ip, lsl #21 566 bic yh, yh, ip, lsl #21 567 orrs r5, xl, xh, lsl #12 568 orrnes r5, yl, yh, lsl #12 569 orr xh, xh, #0x00100000 570 orr yh, yh, #0x00100000 571 beq LSYM(Lml_1) 572 573#if __ARM_ARCH__ < 4 574 575 @ Put sign bit in r6, which will be restored in yl later. 576 and r6, r6, #0x80000000 577 578 @ Well, no way to make it shorter without the umull instruction. 579 stmfd sp!, {r6, r7, r8, r9, sl, fp} 580 mov r7, xl, lsr #16 581 mov r8, yl, lsr #16 582 mov r9, xh, lsr #16 583 mov sl, yh, lsr #16 584 bic xl, xl, r7, lsl #16 585 bic yl, yl, r8, lsl #16 586 bic xh, xh, r9, lsl #16 587 bic yh, yh, sl, lsl #16 588 mul ip, xl, yl 589 mul fp, xl, r8 590 mov lr, #0 591 adds ip, ip, fp, lsl #16 592 adc lr, lr, fp, lsr #16 593 mul fp, r7, yl 594 adds ip, ip, fp, lsl #16 595 adc lr, lr, fp, lsr #16 596 mul fp, xl, sl 597 mov r5, #0 598 adds lr, lr, fp, lsl #16 599 adc r5, r5, fp, lsr #16 600 mul fp, r7, yh 601 adds lr, lr, fp, lsl #16 602 adc r5, r5, fp, lsr #16 603 mul fp, xh, r8 604 adds lr, lr, fp, lsl #16 605 adc r5, r5, fp, lsr #16 606 mul fp, r9, yl 607 adds lr, lr, fp, lsl #16 608 adc r5, r5, fp, lsr #16 609 mul fp, xh, sl 610 mul r6, r9, sl 611 adds r5, r5, fp, lsl #16 612 adc r6, r6, fp, lsr #16 613 mul fp, r9, yh 614 adds r5, r5, fp, lsl #16 615 adc r6, r6, fp, lsr #16 616 mul fp, xl, yh 617 adds lr, lr, fp 618 mul fp, r7, sl 619 adcs r5, r5, fp 620 mul fp, xh, yl 621 adc r6, r6, #0 622 adds lr, lr, fp 623 mul fp, r9, r8 624 adcs r5, r5, fp 625 mul fp, r7, r8 626 adc r6, r6, #0 627 adds lr, lr, fp 628 mul fp, xh, yh 629 adcs r5, r5, fp 630 adc r6, r6, #0 631 ldmfd sp!, {yl, r7, r8, r9, sl, fp} 632 633#else 634 635 @ Here is the actual multiplication. 636 umull ip, lr, xl, yl 637 mov r5, #0 638 umlal lr, r5, xh, yl 639 and yl, r6, #0x80000000 640 umlal lr, r5, xl, yh 641 mov r6, #0 642 umlal r5, r6, xh, yh 643 644#endif 645 646 @ The LSBs in ip are only significant for the final rounding. 647 @ Fold them into lr. 648 teq ip, #0 649 orrne lr, lr, #1 650 651 @ Adjust result upon the MSB position. 652 sub r4, r4, #0xff 653 cmp r6, #(1 << (20-11)) 654 sbc r4, r4, #0x300 655 bcs 1f 656 movs lr, lr, lsl #1 657 adcs r5, r5, r5 658 adc r6, r6, r6 6591: 660 @ Shift to final position, add sign to result. 661 orr xh, yl, r6, lsl #11 662 orr xh, xh, r5, lsr #21 663 mov xl, r5, lsl #11 664 orr xl, xl, lr, lsr #21 665 mov lr, lr, lsl #11 666 667 @ Check exponent range for under/overflow. 668 subs ip, r4, #(254 - 1) 669 cmphi ip, #0x700 670 bhi LSYM(Lml_u) 671 672 @ Round the result, merge final exponent. 673 cmp lr, #0x80000000 674 moveqs lr, xl, lsr #1 675 adcs xl, xl, #0 676 adc xh, xh, r4, lsl #20 677 RETLDM "r4, r5, r6" 678 679 @ Multiplication by 0x1p*: let''s shortcut a lot of code. 680LSYM(Lml_1): 681 and r6, r6, #0x80000000 682 orr xh, r6, xh 683 orr xl, xl, yl 684 eor xh, xh, yh 685 subs r4, r4, ip, lsr #1 686 rsbgts r5, r4, ip 687 orrgt xh, xh, r4, lsl #20 688 RETLDM "r4, r5, r6" gt 689 690 @ Under/overflow: fix things up for the code below. 691 orr xh, xh, #0x00100000 692 mov lr, #0 693 subs r4, r4, #1 694 695LSYM(Lml_u): 696 @ Overflow? 697 bgt LSYM(Lml_o) 698 699 @ Check if denormalized result is possible, otherwise return signed 0. 700 cmn r4, #(53 + 1) 701 movle xl, #0 702 bicle xh, xh, #0x7fffffff 703 RETLDM "r4, r5, r6" le 704 705 @ Find out proper shift value. 706 rsb r4, r4, #0 707 subs r4, r4, #32 708 bge 2f 709 adds r4, r4, #12 710 bgt 1f 711 712 @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. 713 add r4, r4, #20 714 rsb r5, r4, #32 715 mov r3, xl, lsl r5 716 mov xl, xl, lsr r4 717 orr xl, xl, xh, lsl r5 718 and r2, xh, #0x80000000 719 bic xh, xh, #0x80000000 720 adds xl, xl, r3, lsr #31 721 adc xh, r2, xh, lsr r4 722 orrs lr, lr, r3, lsl #1 723 biceq xl, xl, r3, lsr #31 724 RETLDM "r4, r5, r6" 725 726 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after 727 @ a register switch from xh to xl. Then round. 7281: rsb r4, r4, #12 729 rsb r5, r4, #32 730 mov r3, xl, lsl r4 731 mov xl, xl, lsr r5 732 orr xl, xl, xh, lsl r4 733 bic xh, xh, #0x7fffffff 734 adds xl, xl, r3, lsr #31 735 adc xh, xh, #0 736 orrs lr, lr, r3, lsl #1 737 biceq xl, xl, r3, lsr #31 738 RETLDM "r4, r5, r6" 739 740 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch 741 @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. 7422: rsb r5, r4, #32 743 orr lr, lr, xl, lsl r5 744 mov r3, xl, lsr r4 745 orr r3, r3, xh, lsl r5 746 mov xl, xh, lsr r4 747 bic xh, xh, #0x7fffffff 748 bic xl, xl, xh, lsr r4 749 add xl, xl, r3, lsr #31 750 orrs lr, lr, r3, lsl #1 751 biceq xl, xl, r3, lsr #31 752 RETLDM "r4, r5, r6" 753 754 @ One or both arguments are denormalized. 755 @ Scale them leftwards and preserve sign bit. 756LSYM(Lml_d): 757 teq r4, #0 758 bne 2f 759 and r6, xh, #0x80000000 7601: movs xl, xl, lsl #1 761 adc xh, xh, xh 762 tst xh, #0x00100000 763 subeq r4, r4, #1 764 beq 1b 765 orr xh, xh, r6 766 teq r5, #0 767 movne pc, lr 7682: and r6, yh, #0x80000000 7693: movs yl, yl, lsl #1 770 adc yh, yh, yh 771 tst yh, #0x00100000 772 subeq r5, r5, #1 773 beq 3b 774 orr yh, yh, r6 775 mov pc, lr 776 777LSYM(Lml_s): 778 @ Isolate the INF and NAN cases away 779 teq r4, ip 780 and r5, ip, yh, lsr #20 781 teqne r5, ip 782 beq 1f 783 784 @ Here, one or more arguments are either denormalized or zero. 785 orrs r6, xl, xh, lsl #1 786 orrnes r6, yl, yh, lsl #1 787 bne LSYM(Lml_d) 788 789 @ Result is 0, but determine sign anyway. 790LSYM(Lml_z): 791 eor xh, xh, yh 792 bic xh, xh, #0x7fffffff 793 mov xl, #0 794 RETLDM "r4, r5, r6" 795 7961: @ One or both args are INF or NAN. 797 orrs r6, xl, xh, lsl #1 798 moveq xl, yl 799 moveq xh, yh 800 orrnes r6, yl, yh, lsl #1 801 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN 802 teq r4, ip 803 bne 1f 804 orrs r6, xl, xh, lsl #12 805 bne LSYM(Lml_n) @ NAN * <anything> -> NAN 8061: teq r5, ip 807 bne LSYM(Lml_i) 808 orrs r6, yl, yh, lsl #12 809 movne xl, yl 810 movne xh, yh 811 bne LSYM(Lml_n) @ <anything> * NAN -> NAN 812 813 @ Result is INF, but we need to determine its sign. 814LSYM(Lml_i): 815 eor xh, xh, yh 816 817 @ Overflow: return INF (sign already in xh). 818LSYM(Lml_o): 819 and xh, xh, #0x80000000 820 orr xh, xh, #0x7f000000 821 orr xh, xh, #0x00f00000 822 mov xl, #0 823 RETLDM "r4, r5, r6" 824 825 @ Return a quiet NAN. 826LSYM(Lml_n): 827 orr xh, xh, #0x7f000000 828 orr xh, xh, #0x00f80000 829 RETLDM "r4, r5, r6" 830 831 FUNC_END aeabi_dmul 832 FUNC_END muldf3 833 834ARM_FUNC_START divdf3 835ARM_FUNC_ALIAS aeabi_ddiv divdf3 836 837 stmfd sp!, {r4, r5, r6, lr} 838 839 @ Mask out exponents, trap any zero/denormal/INF/NAN. 840 mov ip, #0xff 841 orr ip, ip, #0x700 842 ands r4, ip, xh, lsr #20 843 andnes r5, ip, yh, lsr #20 844 teqne r4, ip 845 teqne r5, ip 846 bleq LSYM(Ldv_s) 847 848 @ Substract divisor exponent from dividend''s. 849 sub r4, r4, r5 850 851 @ Preserve final sign into lr. 852 eor lr, xh, yh 853 854 @ Convert mantissa to unsigned integer. 855 @ Dividend -> r5-r6, divisor -> yh-yl. 856 orrs r5, yl, yh, lsl #12 857 mov xh, xh, lsl #12 858 beq LSYM(Ldv_1) 859 mov yh, yh, lsl #12 860 mov r5, #0x10000000 861 orr yh, r5, yh, lsr #4 862 orr yh, yh, yl, lsr #24 863 mov yl, yl, lsl #8 864 orr r5, r5, xh, lsr #4 865 orr r5, r5, xl, lsr #24 866 mov r6, xl, lsl #8 867 868 @ Initialize xh with final sign bit. 869 and xh, lr, #0x80000000 870 871 @ Ensure result will land to known bit position. 872 @ Apply exponent bias accordingly. 873 cmp r5, yh 874 cmpeq r6, yl 875 adc r4, r4, #(255 - 2) 876 add r4, r4, #0x300 877 bcs 1f 878 movs yh, yh, lsr #1 879 mov yl, yl, rrx 8801: 881 @ Perform first substraction to align result to a nibble. 882 subs r6, r6, yl 883 sbc r5, r5, yh 884 movs yh, yh, lsr #1 885 mov yl, yl, rrx 886 mov xl, #0x00100000 887 mov ip, #0x00080000 888 889 @ The actual division loop. 8901: subs lr, r6, yl 891 sbcs lr, r5, yh 892 subcs r6, r6, yl 893 movcs r5, lr 894 orrcs xl, xl, ip 895 movs yh, yh, lsr #1 896 mov yl, yl, rrx 897 subs lr, r6, yl 898 sbcs lr, r5, yh 899 subcs r6, r6, yl 900 movcs r5, lr 901 orrcs xl, xl, ip, lsr #1 902 movs yh, yh, lsr #1 903 mov yl, yl, rrx 904 subs lr, r6, yl 905 sbcs lr, r5, yh 906 subcs r6, r6, yl 907 movcs r5, lr 908 orrcs xl, xl, ip, lsr #2 909 movs yh, yh, lsr #1 910 mov yl, yl, rrx 911 subs lr, r6, yl 912 sbcs lr, r5, yh 913 subcs r6, r6, yl 914 movcs r5, lr 915 orrcs xl, xl, ip, lsr #3 916 917 orrs lr, r5, r6 918 beq 2f 919 mov r5, r5, lsl #4 920 orr r5, r5, r6, lsr #28 921 mov r6, r6, lsl #4 922 mov yh, yh, lsl #3 923 orr yh, yh, yl, lsr #29 924 mov yl, yl, lsl #3 925 movs ip, ip, lsr #4 926 bne 1b 927 928 @ We are done with a word of the result. 929 @ Loop again for the low word if this pass was for the high word. 930 tst xh, #0x00100000 931 bne 3f 932 orr xh, xh, xl 933 mov xl, #0 934 mov ip, #0x80000000 935 b 1b 9362: 937 @ Be sure result starts in the high word. 938 tst xh, #0x00100000 939 orreq xh, xh, xl 940 moveq xl, #0 9413: 942 @ Check exponent range for under/overflow. 943 subs ip, r4, #(254 - 1) 944 cmphi ip, #0x700 945 bhi LSYM(Lml_u) 946 947 @ Round the result, merge final exponent. 948 subs ip, r5, yh 949 subeqs ip, r6, yl 950 moveqs ip, xl, lsr #1 951 adcs xl, xl, #0 952 adc xh, xh, r4, lsl #20 953 RETLDM "r4, r5, r6" 954 955 @ Division by 0x1p*: shortcut a lot of code. 956LSYM(Ldv_1): 957 and lr, lr, #0x80000000 958 orr xh, lr, xh, lsr #12 959 adds r4, r4, ip, lsr #1 960 rsbgts r5, r4, ip 961 orrgt xh, xh, r4, lsl #20 962 RETLDM "r4, r5, r6" gt 963 964 orr xh, xh, #0x00100000 965 mov lr, #0 966 subs r4, r4, #1 967 b LSYM(Lml_u) 968 969 @ Result mightt need to be denormalized: put remainder bits 970 @ in lr for rounding considerations. 971LSYM(Ldv_u): 972 orr lr, r5, r6 973 b LSYM(Lml_u) 974 975 @ One or both arguments is either INF, NAN or zero. 976LSYM(Ldv_s): 977 and r5, ip, yh, lsr #20 978 teq r4, ip 979 teqeq r5, ip 980 beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN 981 teq r4, ip 982 bne 1f 983 orrs r4, xl, xh, lsl #12 984 bne LSYM(Lml_n) @ NAN / <anything> -> NAN 985 teq r5, ip 986 bne LSYM(Lml_i) @ INF / <anything> -> INF 987 mov xl, yl 988 mov xh, yh 989 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN 9901: teq r5, ip 991 bne 2f 992 orrs r5, yl, yh, lsl #12 993 beq LSYM(Lml_z) @ <anything> / INF -> 0 994 mov xl, yl 995 mov xh, yh 996 b LSYM(Lml_n) @ <anything> / NAN -> NAN 9972: @ If both are nonzero, we need to normalize and resume above. 998 orrs r6, xl, xh, lsl #1 999 orrnes r6, yl, yh, lsl #1 1000 bne LSYM(Lml_d) 1001 @ One or both arguments are 0. 1002 orrs r4, xl, xh, lsl #1 1003 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF 1004 orrs r5, yl, yh, lsl #1 1005 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 1006 b LSYM(Lml_n) @ 0 / 0 -> NAN 1007 1008 FUNC_END aeabi_ddiv 1009 FUNC_END divdf3 1010 1011#endif /* L_muldivdf3 */ 1012 1013#ifdef L_cmpdf2 1014 1015@ Note: only r0 (return value) and ip are clobbered here. 1016 1017ARM_FUNC_START gtdf2 1018ARM_FUNC_ALIAS gedf2 gtdf2 1019 mov ip, #-1 1020 b 1f 1021 1022ARM_FUNC_START ltdf2 1023ARM_FUNC_ALIAS ledf2 ltdf2 1024 mov ip, #1 1025 b 1f 1026 1027ARM_FUNC_START cmpdf2 1028ARM_FUNC_ALIAS nedf2 cmpdf2 1029ARM_FUNC_ALIAS eqdf2 cmpdf2 1030 mov ip, #1 @ how should we specify unordered here? 1031 10321: str ip, [sp, #-4] 1033 1034 @ Trap any INF/NAN first. 1035 mov ip, xh, lsl #1 1036 mvns ip, ip, asr #21 1037 mov ip, yh, lsl #1 1038 mvnnes ip, ip, asr #21 1039 beq 3f 1040 1041 @ Test for equality. 1042 @ Note that 0.0 is equal to -0.0. 10432: orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 1044 orreqs ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 1045 teqne xh, yh @ or xh == yh 1046 teqeq xl, yl @ and xl == yl 1047 moveq r0, #0 @ then equal. 1048 RETc(eq) 1049 1050 @ Clear C flag 1051 cmn r0, #0 1052 1053 @ Compare sign, 1054 teq xh, yh 1055 1056 @ Compare values if same sign 1057 cmppl xh, yh 1058 cmpeq xl, yl 1059 1060 @ Result: 1061 movcs r0, yh, asr #31 1062 mvncc r0, yh, asr #31 1063 orr r0, r0, #1 1064 RET 1065 1066 @ Look for a NAN. 10673: mov ip, xh, lsl #1 1068 mvns ip, ip, asr #21 1069 bne 4f 1070 orrs ip, xl, xh, lsl #12 1071 bne 5f @ x is NAN 10724: mov ip, yh, lsl #1 1073 mvns ip, ip, asr #21 1074 bne 2b 1075 orrs ip, yl, yh, lsl #12 1076 beq 2b @ y is not NAN 10775: ldr r0, [sp, #-4] @ unordered return code 1078 RET 1079 1080 FUNC_END gedf2 1081 FUNC_END gtdf2 1082 FUNC_END ledf2 1083 FUNC_END ltdf2 1084 FUNC_END nedf2 1085 FUNC_END eqdf2 1086 FUNC_END cmpdf2 1087 1088ARM_FUNC_START aeabi_cdrcmple 1089 1090 mov ip, r0 1091 mov r0, r2 1092 mov r2, ip 1093 mov ip, r1 1094 mov r1, r3 1095 mov r3, ip 1096 b 6f 1097 1098ARM_FUNC_START aeabi_cdcmpeq 1099ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq 1100 1101 @ The status-returning routines are required to preserve all 1102 @ registers except ip, lr, and cpsr. 11036: stmfd sp!, {r0, lr} 1104 ARM_CALL cmpdf2 1105 @ Set the Z flag correctly, and the C flag unconditionally. 1106 cmp r0, #0 1107 @ Clear the C flag if the return value was -1, indicating 1108 @ that the first operand was smaller than the second. 1109 cmnmi r0, #0 1110 RETLDM "r0" 1111 1112 FUNC_END aeabi_cdcmple 1113 FUNC_END aeabi_cdcmpeq 1114 FUNC_END aeabi_cdrcmple 1115 1116ARM_FUNC_START aeabi_dcmpeq 1117 1118 str lr, [sp, #-8]! 1119 ARM_CALL aeabi_cdcmple 1120 moveq r0, #1 @ Equal to. 1121 movne r0, #0 @ Less than, greater than, or unordered. 1122 RETLDM 1123 1124 FUNC_END aeabi_dcmpeq 1125 1126ARM_FUNC_START aeabi_dcmplt 1127 1128 str lr, [sp, #-8]! 1129 ARM_CALL aeabi_cdcmple 1130 movcc r0, #1 @ Less than. 1131 movcs r0, #0 @ Equal to, greater than, or unordered. 1132 RETLDM 1133 1134 FUNC_END aeabi_dcmplt 1135 1136ARM_FUNC_START aeabi_dcmple 1137 1138 str lr, [sp, #-8]! 1139 ARM_CALL aeabi_cdcmple 1140 movls r0, #1 @ Less than or equal to. 1141 movhi r0, #0 @ Greater than or unordered. 1142 RETLDM 1143 1144 FUNC_END aeabi_dcmple 1145 1146ARM_FUNC_START aeabi_dcmpge 1147 1148 str lr, [sp, #-8]! 1149 ARM_CALL aeabi_cdrcmple 1150 movls r0, #1 @ Operand 2 is less than or equal to operand 1. 1151 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. 1152 RETLDM 1153 1154 FUNC_END aeabi_dcmpge 1155 1156ARM_FUNC_START aeabi_dcmpgt 1157 1158 str lr, [sp, #-8]! 1159 ARM_CALL aeabi_cdrcmple 1160 movcc r0, #1 @ Operand 2 is less than operand 1. 1161 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, 1162 @ or they are unordered. 1163 RETLDM 1164 1165 FUNC_END aeabi_dcmpgt 1166 1167#endif /* L_cmpdf2 */ 1168 1169#ifdef L_unorddf2 1170 1171ARM_FUNC_START unorddf2 1172ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 1173 1174 mov ip, xh, lsl #1 1175 mvns ip, ip, asr #21 1176 bne 1f 1177 orrs ip, xl, xh, lsl #12 1178 bne 3f @ x is NAN 11791: mov ip, yh, lsl #1 1180 mvns ip, ip, asr #21 1181 bne 2f 1182 orrs ip, yl, yh, lsl #12 1183 bne 3f @ y is NAN 11842: mov r0, #0 @ arguments are ordered. 1185 RET 1186 11873: mov r0, #1 @ arguments are unordered. 1188 RET 1189 1190 FUNC_END aeabi_dcmpun 1191 FUNC_END unorddf2 1192 1193#endif /* L_unorddf2 */ 1194 1195#ifdef L_fixdfsi 1196 1197ARM_FUNC_START fixdfsi 1198ARM_FUNC_ALIAS aeabi_d2iz fixdfsi 1199 1200 @ check exponent range. 1201 mov r2, xh, lsl #1 1202 adds r2, r2, #(1 << 21) 1203 bcs 2f @ value is INF or NAN 1204 bpl 1f @ value is too small 1205 mov r3, #(0xfffffc00 + 31) 1206 subs r2, r3, r2, asr #21 1207 bls 3f @ value is too large 1208 1209 @ scale value 1210 mov r3, xh, lsl #11 1211 orr r3, r3, #0x80000000 1212 orr r3, r3, xl, lsr #21 1213 tst xh, #0x80000000 @ the sign bit 1214 mov r0, r3, lsr r2 1215 rsbne r0, r0, #0 1216 RET 1217 12181: mov r0, #0 1219 RET 1220 12212: orrs xl, xl, xh, lsl #12 1222 bne 4f @ x is NAN. 12233: ands r0, xh, #0x80000000 @ the sign bit 1224 moveq r0, #0x7fffffff @ maximum signed positive si 1225 RET 1226 12274: mov r0, #0 @ How should we convert NAN? 1228 RET 1229 1230 FUNC_END aeabi_d2iz 1231 FUNC_END fixdfsi 1232 1233#endif /* L_fixdfsi */ 1234 1235#ifdef L_fixunsdfsi 1236 1237ARM_FUNC_START fixunsdfsi 1238ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi 1239 1240 @ check exponent range. 1241 movs r2, xh, lsl #1 1242 bcs 1f @ value is negative 1243 adds r2, r2, #(1 << 21) 1244 bcs 2f @ value is INF or NAN 1245 bpl 1f @ value is too small 1246 mov r3, #(0xfffffc00 + 31) 1247 subs r2, r3, r2, asr #21 1248 bmi 3f @ value is too large 1249 1250 @ scale value 1251 mov r3, xh, lsl #11 1252 orr r3, r3, #0x80000000 1253 orr r3, r3, xl, lsr #21 1254 mov r0, r3, lsr r2 1255 RET 1256 12571: mov r0, #0 1258 RET 1259 12602: orrs xl, xl, xh, lsl #12 1261 bne 4f @ value is NAN. 12623: mov r0, #0xffffffff @ maximum unsigned si 1263 RET 1264 12654: mov r0, #0 @ How should we convert NAN? 1266 RET 1267 1268 FUNC_END aeabi_d2uiz 1269 FUNC_END fixunsdfsi 1270 1271#endif /* L_fixunsdfsi */ 1272 1273#ifdef L_truncdfsf2 1274 1275ARM_FUNC_START truncdfsf2 1276ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 1277 1278 @ check exponent range. 1279 mov r2, xh, lsl #1 1280 subs r3, r2, #((1023 - 127) << 21) 1281 subcss ip, r3, #(1 << 21) 1282 rsbcss ip, ip, #(254 << 21) 1283 bls 2f @ value is out of range 1284 12851: @ shift and round mantissa 1286 and ip, xh, #0x80000000 1287 mov r2, xl, lsl #3 1288 orr xl, ip, xl, lsr #29 1289 cmp r2, #0x80000000 1290 adc r0, xl, r3, lsl #2 1291 biceq r0, r0, #1 1292 RET 1293 12942: @ either overflow or underflow 1295 tst xh, #0x40000000 1296 bne 3f @ overflow 1297 1298 @ check if denormalized value is possible 1299 adds r2, r3, #(23 << 21) 1300 andlt r0, xh, #0x80000000 @ too small, return signed 0. 1301 RETc(lt) 1302 1303 @ denormalize value so we can resume with the code above afterwards. 1304 orr xh, xh, #0x00100000 1305 mov r2, r2, lsr #21 1306 rsb r2, r2, #24 1307 rsb ip, r2, #32 1308 movs r3, xl, lsl ip 1309 mov xl, xl, lsr r2 1310 orrne xl, xl, #1 @ fold r3 for rounding considerations. 1311 mov r3, xh, lsl #11 1312 mov r3, r3, lsr #11 1313 orr xl, xl, r3, lsl ip 1314 mov r3, r3, lsr r2 1315 mov r3, r3, lsl #1 1316 b 1b 1317 13183: @ chech for NAN 1319 mvns r3, r2, asr #21 1320 bne 5f @ simple overflow 1321 orrs r3, xl, xh, lsl #12 1322 movne r0, #0x7f000000 1323 orrne r0, r0, #0x00c00000 1324 RETc(ne) @ return NAN 1325 13265: @ return INF with sign 1327 and r0, xh, #0x80000000 1328 orr r0, r0, #0x7f000000 1329 orr r0, r0, #0x00800000 1330 RET 1331 1332 FUNC_END aeabi_d2f 1333 FUNC_END truncdfsf2 1334 1335#endif /* L_truncdfsf2 */ 1336