1;; libgcc routines for the Renesas H8/300 CPU. 2;; Contributed by Steve Chamberlain <sac@cygnus.com> 3;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com> 4 5/* Copyright (C) 1994-2022 Free Software Foundation, Inc. 6 7This file is free software; you can redistribute it and/or modify it 8under the terms of the GNU General Public License as published by the 9Free Software Foundation; either version 3, or (at your option) any 10later version. 11 12This file is distributed in the hope that it will be useful, but 13WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15General Public License for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* Assembler register definitions. */ 27 28#define A0 r0 29#define A0L r0l 30#define A0H r0h 31 32#define A1 r1 33#define A1L r1l 34#define A1H r1h 35 36#define A2 r2 37#define A2L r2l 38#define A2H r2h 39 40#define A3 r3 41#define A3L r3l 42#define A3H r3h 43 44#define S0 r4 45#define S0L r4l 46#define S0H r4h 47 48#define S1 r5 49#define S1L r5l 50#define S1H r5h 51 52#define S2 r6 53#define S2L r6l 54#define S2H r6h 55 56#ifdef __H8300__ 57#define PUSHP push 58#define POPP pop 59 60#define A0P r0 61#define A1P r1 62#define A2P r2 63#define A3P r3 64#define S0P r4 65#define S1P r5 66#define S2P r6 67#endif 68 69#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__) 70#define PUSHP push.l 71#define POPP pop.l 72 73#define A0P er0 74#define A1P er1 75#define A2P er2 76#define A3P er3 77#define S0P er4 78#define S1P er5 79#define S2P er6 80 81#define A0E e0 82#define A1E e1 83#define A2E e2 84#define A3E e3 85#endif 86 87#define CONCAT(A,B) A##B 88#define LABEL0(U,X) CONCAT(U,__##X) 89#define LABEL0_DEF(U,X) CONCAT(U,__##X##:) 90#define LABEL_DEF(X) LABEL0_DEF(__USER_LABEL_PREFIX__,X) 91#define LABEL(X) LABEL0(__USER_LABEL_PREFIX__,X) 92 93#ifdef __H8300H__ 94#ifdef __NORMAL_MODE__ 95 .h8300hn 96#else 97 .h8300h 98#endif 99#endif 100 101#ifdef __H8300S__ 102#ifdef __NORMAL_MODE__ 103 .h8300sn 104#else 105 .h8300s 106#endif 107#endif 108#ifdef __H8300SX__ 109#ifdef __NORMAL_MODE__ 110 .h8300sxn 111#else 112 .h8300sx 113#endif 114#endif 115 116#ifdef L_cmpsi2 117#ifdef __H8300__ 118 .section .text 119 .align 2 120 .global LABEL(cmpsi2) 121LABEL_DEF(cmpsi2) 122 cmp.w A0,A2 123 bne .L2 124 cmp.w A1,A3 125 bne .L4 126 mov.w #1,A0 127 rts 128.L2: 129 bgt .L5 130.L3: 131 mov.w #2,A0 132 rts 133.L4: 134 bls .L3 135.L5: 136 sub.w A0,A0 137 rts 138 .end 139#endif 140#endif /* L_cmpsi2 */ 141 142#ifdef L_ucmpsi2 143#ifdef __H8300__ 144 .section .text 145 .align 2 146 .global LABEL(ucmpsi2) 147LABEL_DEF(ucmpsi2) 148 cmp.w A0,A2 149 bne .L2 150 cmp.w A1,A3 151 bne .L4 152 mov.w #1,A0 153 rts 154.L2: 155 bhi .L5 156.L3: 157 mov.w #2,A0 158 rts 159.L4: 160 bls .L3 161.L5: 162 sub.w A0,A0 163 rts 164 .end 165#endif 166#endif /* L_ucmpsi2 */ 167 168#ifdef L_divhi3 169 170;; HImode divides for the H8/300. 171;; We bunch all of this into one object file since there are several 172;; "supporting routines". 173 174; general purpose normalize routine 175; 176; divisor in A0 177; dividend in A1 178; turns both into +ve numbers, and leaves what the answer sign 179; should be in A2L 180 181#ifdef __H8300__ 182 .section .text 183 .align 2 184divnorm: 185 or A0H,A0H ; is divisor > 0 186 stc ccr,A2L 187 bge _lab1 188 not A0H ; no - then make it +ve 189 not A0L 190 adds #1,A0 191_lab1: or A1H,A1H ; look at dividend 192 bge _lab2 193 not A1H ; it is -ve, make it positive 194 not A1L 195 adds #1,A1 196 xor #0x8,A2L; and toggle sign of result 197_lab2: rts 198;; Basically the same, except that the sign of the divisor determines 199;; the sign. 200modnorm: 201 or A0H,A0H ; is divisor > 0 202 stc ccr,A2L 203 bge _lab7 204 not A0H ; no - then make it +ve 205 not A0L 206 adds #1,A0 207_lab7: or A1H,A1H ; look at dividend 208 bge _lab8 209 not A1H ; it is -ve, make it positive 210 not A1L 211 adds #1,A1 212_lab8: rts 213 214; A0=A0/A1 signed 215 216 .global LABEL(divhi3) 217LABEL_DEF(divhi3) 218 bsr divnorm 219 bsr LABEL(udivhi3) 220negans: btst #3,A2L ; should answer be negative ? 221 beq _lab4 222 not A0H ; yes, so make it so 223 not A0L 224 adds #1,A0 225_lab4: rts 226 227; A0=A0%A1 signed 228 229 .global LABEL(modhi3) 230LABEL_DEF(modhi3) 231 bsr modnorm 232 bsr LABEL(udivhi3) 233 mov A3,A0 234 bra negans 235 236; A0=A0%A1 unsigned 237 238 .global LABEL(umodhi3) 239LABEL_DEF(umodhi3) 240 bsr LABEL(udivhi3) 241 mov A3,A0 242 rts 243 244; A0=A0/A1 unsigned 245; A3=A0%A1 unsigned 246; A2H trashed 247; D high 8 bits of denom 248; d low 8 bits of denom 249; N high 8 bits of num 250; n low 8 bits of num 251; M high 8 bits of mod 252; m low 8 bits of mod 253; Q high 8 bits of quot 254; q low 8 bits of quot 255; P preserve 256 257; The H8/300 only has a 16/8 bit divide, so we look at the incoming and 258; see how to partition up the expression. 259 260 .global LABEL(udivhi3) 261LABEL_DEF(udivhi3) 262 ; A0 A1 A2 A3 263 ; Nn Dd P 264 sub.w A3,A3 ; Nn Dd xP 00 265 or A1H,A1H 266 bne divlongway 267 or A0H,A0H 268 beq _lab6 269 270; we know that D == 0 and N is != 0 271 mov.b A0H,A3L ; Nn Dd xP 0N 272 divxu A1L,A3 ; MQ 273 mov.b A3L,A0H ; Q 274; dealt with N, do n 275_lab6: mov.b A0L,A3L ; n 276 divxu A1L,A3 ; mq 277 mov.b A3L,A0L ; Qq 278 mov.b A3H,A3L ; m 279 mov.b #0x0,A3H ; Qq 0m 280 rts 281 282; D != 0 - which means the denominator is 283; loop around to get the result. 284 285divlongway: 286 mov.b A0H,A3L ; Nn Dd xP 0N 287 mov.b #0x0,A0H ; high byte of answer has to be zero 288 mov.b #0x8,A2H ; 8 289div8: add.b A0L,A0L ; n*=2 290 rotxl A3L ; Make remainder bigger 291 rotxl A3H 292 sub.w A1,A3 ; Q-=N 293 bhs setbit ; set a bit ? 294 add.w A1,A3 ; no : too far , Q+=N 295 296 dec A2H 297 bne div8 ; next bit 298 rts 299 300setbit: inc A0L ; do insert bit 301 dec A2H 302 bne div8 ; next bit 303 rts 304 305#endif /* __H8300__ */ 306#endif /* L_divhi3 */ 307 308#ifdef L_divsi3 309 310;; 4 byte integer divides for the H8/300. 311;; 312;; We have one routine which does all the work and lots of 313;; little ones which prepare the args and massage the sign. 314;; We bunch all of this into one object file since there are several 315;; "supporting routines". 316 317 .section .text 318 .align 2 319 320; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest. 321; This function is here to keep branch displacements small. 322 323#ifdef __H8300__ 324 325divnorm: 326 mov.b A0H,A0H ; is the numerator -ve 327 stc ccr,S2L ; keep the sign in bit 3 of S2L 328 bge postive 329 330 ; negate arg 331 not A0H 332 not A1H 333 not A0L 334 not A1L 335 336 add #1,A1L 337 addx #0,A1H 338 addx #0,A0L 339 addx #0,A0H 340postive: 341 mov.b A2H,A2H ; is the denominator -ve 342 bge postive2 343 not A2L 344 not A2H 345 not A3L 346 not A3H 347 add.b #1,A3L 348 addx #0,A3H 349 addx #0,A2L 350 addx #0,A2H 351 xor.b #0x08,S2L ; toggle the result sign 352postive2: 353 rts 354 355;; Basically the same, except that the sign of the divisor determines 356;; the sign. 357modnorm: 358 mov.b A0H,A0H ; is the numerator -ve 359 stc ccr,S2L ; keep the sign in bit 3 of S2L 360 bge mpostive 361 362 ; negate arg 363 not A0H 364 not A1H 365 not A0L 366 not A1L 367 368 add #1,A1L 369 addx #0,A1H 370 addx #0,A0L 371 addx #0,A0H 372mpostive: 373 mov.b A2H,A2H ; is the denominator -ve 374 bge mpostive2 375 not A2L 376 not A2H 377 not A3L 378 not A3H 379 add.b #1,A3L 380 addx #0,A3H 381 addx #0,A2L 382 addx #0,A2H 383mpostive2: 384 rts 385 386#else /* __H8300H__ */ 387 388divnorm: 389 mov.l A0P,A0P ; is the numerator -ve 390 stc ccr,S2L ; keep the sign in bit 3 of S2L 391 bge postive 392 393 neg.l A0P ; negate arg 394 395postive: 396 mov.l A1P,A1P ; is the denominator -ve 397 bge postive2 398 399 neg.l A1P ; negate arg 400 xor.b #0x08,S2L ; toggle the result sign 401 402postive2: 403 rts 404 405;; Basically the same, except that the sign of the divisor determines 406;; the sign. 407modnorm: 408 mov.l A0P,A0P ; is the numerator -ve 409 stc ccr,S2L ; keep the sign in bit 3 of S2L 410 bge mpostive 411 412 neg.l A0P ; negate arg 413 414mpostive: 415 mov.l A1P,A1P ; is the denominator -ve 416 bge mpostive2 417 418 neg.l A1P ; negate arg 419 420mpostive2: 421 rts 422 423#endif 424 425; numerator in A0/A1 426; denominator in A2/A3 427 .global LABEL(modsi3) 428LABEL_DEF(modsi3) 429#ifdef __H8300__ 430 PUSHP S2P 431 PUSHP S0P 432 PUSHP S1P 433 bsr modnorm 434 bsr divmodsi4 435 mov S0,A0 436 mov S1,A1 437 bra exitdiv 438#else 439 PUSHP S2P 440 bsr modnorm 441 bsr LABEL(divsi3) 442 mov.l er3,er0 443 bra exitdiv 444#endif 445 446 ;; H8/300H and H8S version of ___udivsi3 is defined later in 447 ;; the file. 448#ifdef __H8300__ 449 .global LABEL(udivsi3) 450LABEL_DEF(udivsi3) 451 PUSHP S2P 452 PUSHP S0P 453 PUSHP S1P 454 bsr divmodsi4 455 bra reti 456#endif 457 458 .global LABEL(umodsi3) 459LABEL_DEF(umodsi3) 460#ifdef __H8300__ 461 PUSHP S2P 462 PUSHP S0P 463 PUSHP S1P 464 bsr divmodsi4 465 mov S0,A0 466 mov S1,A1 467 bra reti 468#else 469 bsr LABEL(udivsi3) 470 mov.l er3,er0 471 rts 472#endif 473 474 .global LABEL(divsi3) 475LABEL_DEF(divsi3) 476#ifdef __H8300__ 477 PUSHP S2P 478 PUSHP S0P 479 PUSHP S1P 480 jsr divnorm 481 jsr divmodsi4 482#else 483 PUSHP S2P 484 jsr divnorm 485 bsr LABEL(udivsi3) 486#endif 487 488 ; examine what the sign should be 489exitdiv: 490 btst #3,S2L 491 beq reti 492 493 ; should be -ve 494#ifdef __H8300__ 495 not A0H 496 not A1H 497 not A0L 498 not A1L 499 500 add #1,A1L 501 addx #0,A1H 502 addx #0,A0L 503 addx #0,A0H 504#else /* __H8300H__ */ 505 neg.l A0P 506#endif 507 508reti: 509#ifdef __H8300__ 510 POPP S1P 511 POPP S0P 512#endif 513 POPP S2P 514 rts 515 516 ; takes A0/A1 numerator (A0P for H8/300H) 517 ; A2/A3 denominator (A1P for H8/300H) 518 ; returns A0/A1 quotient (A0P for H8/300H) 519 ; S0/S1 remainder (S0P for H8/300H) 520 ; trashes S2H 521 522#ifdef __H8300__ 523 524divmodsi4: 525 sub.w S0,S0 ; zero play area 526 mov.w S0,S1 527 mov.b A2H,S2H 528 or A2L,S2H 529 or A3H,S2H 530 bne DenHighNonZero 531 mov.b A0H,A0H 532 bne NumByte0Zero 533 mov.b A0L,A0L 534 bne NumByte1Zero 535 mov.b A1H,A1H 536 bne NumByte2Zero 537 bra NumByte3Zero 538NumByte0Zero: 539 mov.b A0H,S1L 540 divxu A3L,S1 541 mov.b S1L,A0H 542NumByte1Zero: 543 mov.b A0L,S1L 544 divxu A3L,S1 545 mov.b S1L,A0L 546NumByte2Zero: 547 mov.b A1H,S1L 548 divxu A3L,S1 549 mov.b S1L,A1H 550NumByte3Zero: 551 mov.b A1L,S1L 552 divxu A3L,S1 553 mov.b S1L,A1L 554 555 mov.b S1H,S1L 556 mov.b #0x0,S1H 557 rts 558 559; have to do the divide by shift and test 560DenHighNonZero: 561 mov.b A0H,S1L 562 mov.b A0L,A0H 563 mov.b A1H,A0L 564 mov.b A1L,A1H 565 566 mov.b #0,A1L 567 mov.b #24,S2H ; only do 24 iterations 568 569nextbit: 570 add.w A1,A1 ; double the answer guess 571 rotxl A0L 572 rotxl A0H 573 574 rotxl S1L ; double remainder 575 rotxl S1H 576 rotxl S0L 577 rotxl S0H 578 sub.w A3,S1 ; does it all fit 579 subx A2L,S0L 580 subx A2H,S0H 581 bhs setone 582 583 add.w A3,S1 ; no, restore mistake 584 addx A2L,S0L 585 addx A2H,S0H 586 587 dec S2H 588 bne nextbit 589 rts 590 591setone: 592 inc A1L 593 dec S2H 594 bne nextbit 595 rts 596 597#else /* __H8300H__ */ 598 599 ;; This function also computes the remainder and stores it in er3. 600 .global LABEL(udivsi3) 601LABEL_DEF(udivsi3) 602 mov.w A1E,A1E ; denominator top word 0? 603 bne DenHighNonZero 604 605 ; do it the easy way, see page 107 in manual 606 mov.w A0E,A2 607 extu.l A2P 608 divxu.w A1,A2P 609 mov.w A2E,A0E 610 divxu.w A1,A0P 611 mov.w A0E,A3 612 mov.w A2,A0E 613 extu.l A3P 614 rts 615 616 ; er0 = er0 / er1 617 ; er3 = er0 % er1 618 ; trashes er1 er2 619 ; expects er1 >= 2^16 620DenHighNonZero: 621 mov.l er0,er3 622 mov.l er1,er2 623#ifdef __H8300H__ 624divmod_L21: 625 shlr.l er0 626 shlr.l er2 ; make divisor < 2^16 627 mov.w e2,e2 628 bne divmod_L21 629#else 630 shlr.l #2,er2 ; make divisor < 2^16 631 mov.w e2,e2 632 beq divmod_L22A 633divmod_L21: 634 shlr.l #2,er0 635divmod_L22: 636 shlr.l #2,er2 ; make divisor < 2^16 637 mov.w e2,e2 638 bne divmod_L21 639divmod_L22A: 640 rotxl.w r2 641 bcs divmod_L23 642 shlr.l er0 643 bra divmod_L24 644divmod_L23: 645 rotxr.w r2 646 shlr.l #2,er0 647divmod_L24: 648#endif 649 ;; At this point, 650 ;; er0 contains shifted dividend 651 ;; er1 contains divisor 652 ;; er2 contains shifted divisor 653 ;; er3 contains dividend, later remainder 654 divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ) 655 extu.l er0 656 beq divmod_L25 657 subs #1,er0 ; er0 = AQ - 1 658 mov.w e1,r2 659 mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor 660 sub.w r2,e3 ; dividend - 65536 * er2 661 mov.w r1,r2 662 mulxu.w r0,er2 ; compute er3 = remainder (tentative) 663 sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor 664divmod_L25: 665 cmp.l er1,er3 ; is divisor < remainder? 666 blo divmod_L26 667 adds #1,er0 668 sub.l er1,er3 ; correct the remainder 669divmod_L26: 670 rts 671 672#endif 673#endif /* L_divsi3 */ 674 675#ifdef L_mulhi3 676 677;; HImode multiply. 678; The H8/300 only has an 8*8->16 multiply. 679; The answer is the same as: 680; 681; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256 682; (we can ignore A1.h * A0.h cause that will all off the top) 683; A0 in 684; A1 in 685; A0 answer 686 687#ifdef __H8300__ 688 .section .text 689 .align 2 690 .global LABEL(mulhi3) 691LABEL_DEF(mulhi3) 692 mov.b A1L,A2L ; A2l gets srcb.l 693 mulxu A0L,A2 ; A2 gets first sub product 694 695 mov.b A0H,A3L ; prepare for 696 mulxu A1L,A3 ; second sub product 697 698 add.b A3L,A2H ; sum first two terms 699 700 mov.b A1H,A3L ; third sub product 701 mulxu A0L,A3 702 703 add.b A3L,A2H ; almost there 704 mov.w A2,A0 ; that is 705 rts 706 707#endif 708#endif /* L_mulhi3 */ 709 710#ifdef L_mulsi3 711 712;; SImode multiply. 713;; 714;; I think that shift and add may be sufficient for this. Using the 715;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way 716;; the inner loop uses maybe 20 cycles + overhead, but terminates 717;; quickly on small args. 718;; 719;; A0/A1 src_a 720;; A2/A3 src_b 721;; 722;; while (a) 723;; { 724;; if (a & 1) 725;; r += b; 726;; a >>= 1; 727;; b <<= 1; 728;; } 729 730 .section .text 731 .align 2 732 733#ifdef __H8300__ 734 735 .global LABEL(mulsi3) 736LABEL_DEF(mulsi3) 737 PUSHP S0P 738 PUSHP S1P 739 740 sub.w S0,S0 741 sub.w S1,S1 742 743 ; while (a) 744_top: mov.w A0,A0 745 bne _more 746 mov.w A1,A1 747 beq _done 748_more: ; if (a & 1) 749 bld #0,A1L 750 bcc _nobit 751 ; r += b 752 add.w A3,S1 753 addx A2L,S0L 754 addx A2H,S0H 755_nobit: 756 ; a >>= 1 757 shlr A0H 758 rotxr A0L 759 rotxr A1H 760 rotxr A1L 761 762 ; b <<= 1 763 add.w A3,A3 764 addx A2L,A2L 765 addx A2H,A2H 766 bra _top 767 768_done: 769 mov.w S0,A0 770 mov.w S1,A1 771 POPP S1P 772 POPP S0P 773 rts 774 775#else /* __H8300H__ */ 776 777; 778; mulsi3 for H8/300H - based on Renesas SH implementation 779; 780; by Toshiyasu Morita 781; 782; Old code: 783; 784; 16b * 16b = 372 states (worst case) 785; 32b * 32b = 724 states (worst case) 786; 787; New code: 788; 789; 16b * 16b = 48 states 790; 16b * 32b = 72 states 791; 32b * 32b = 92 states 792; 793 794 .global LABEL(mulsi3) 795LABEL_DEF(mulsi3) 796 mov.w r1,r2 ; ( 2 states) b * d 797 mulxu r0,er2 ; (22 states) 798 799 mov.w e0,r3 ; ( 2 states) a * d 800 beq L_skip1 ; ( 4 states) 801 mulxu r1,er3 ; (22 states) 802 add.w r3,e2 ; ( 2 states) 803 804L_skip1: 805 mov.w e1,r3 ; ( 2 states) c * b 806 beq L_skip2 ; ( 4 states) 807 mulxu r0,er3 ; (22 states) 808 add.w r3,e2 ; ( 2 states) 809 810L_skip2: 811 mov.l er2,er0 ; ( 2 states) 812 rts ; (10 states) 813 814#endif 815#endif /* L_mulsi3 */ 816#ifdef L_fixunssfsi_asm 817/* For the h8300 we use asm to save some bytes, to 818 allow more programs to fit into the tiny address 819 space. For the H8/300H and H8S, the C version is good enough. */ 820#ifdef __H8300__ 821/* We still treat NANs different than libgcc2.c, but then, the 822 behavior is undefined anyways. */ 823 .global LABEL(fixunssfsi) 824LABEL_DEF(fixunssfsi) 825 cmp.b #0x4f,r0h 826 bge Large_num 827 jmp @LABEL(fixsfsi) 828Large_num: 829 bhi L_huge_num 830 xor.b #0x80,A0L 831 bmi L_shift8 832L_huge_num: 833 mov.w #65535,A0 834 mov.w A0,A1 835 rts 836L_shift8: 837 mov.b A0L,A0H 838 mov.b A1H,A0L 839 mov.b A1L,A1H 840 mov.b #0,A1L 841 rts 842#endif 843#endif /* L_fixunssfsi_asm */ 844