1/* -*- Mode: Asm -*- */ 2/* Copyright (C) 1998-2013 Free Software Foundation, Inc. 3 Contributed by Denis Chertykov <chertykov@gmail.com> 4 5This file is free software; you can redistribute it and/or modify it 6under the terms of the GNU General Public License as published by the 7Free Software Foundation; either version 3, or (at your option) any 8later version. 9 10This file is distributed in the hope that it will be useful, but 11WITHOUT ANY WARRANTY; without even the implied warranty of 12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13General Public License for more details. 14 15Under Section 7 of GPL version 3, you are granted additional 16permissions described in the GCC Runtime Library Exception, version 173.1, as published by the Free Software Foundation. 18 19You should have received a copy of the GNU General Public License and 20a copy of the GCC Runtime Library Exception along with this program; 21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22<http://www.gnu.org/licenses/>. */ 23 24#define __zero_reg__ r1 25#define __tmp_reg__ r0 26#define __SREG__ 0x3f 27#if defined (__AVR_HAVE_SPH__) 28#define __SP_H__ 0x3e 29#endif 30#define __SP_L__ 0x3d 31#define __RAMPZ__ 0x3B 32#define __EIND__ 0x3C 33 34/* Most of the functions here are called directly from avr.md 35 patterns, instead of using the standard libcall mechanisms. 36 This can make better code because GCC knows exactly which 37 of the call-used registers (not all of them) are clobbered. */ 38 39/* FIXME: At present, there is no SORT directive in the linker 40 script so that we must not assume that different modules 41 in the same input section like .libgcc.text.mul will be 42 located close together. Therefore, we cannot use 43 RCALL/RJMP to call a function like __udivmodhi4 from 44 __divmodhi4 and have to use lengthy XCALL/XJMP even 45 though they are in the same input section and all same 46 input sections together are small enough to reach every 47 location with a RCALL/RJMP instruction. */ 48 49 .macro mov_l r_dest, r_src 50#if defined (__AVR_HAVE_MOVW__) 51 movw \r_dest, \r_src 52#else 53 mov \r_dest, \r_src 54#endif 55 .endm 56 57 .macro mov_h r_dest, r_src 58#if defined (__AVR_HAVE_MOVW__) 59 ; empty 60#else 61 mov \r_dest, \r_src 62#endif 63 .endm 64 65.macro wmov r_dest, r_src 66#if defined (__AVR_HAVE_MOVW__) 67 movw \r_dest, \r_src 68#else 69 mov \r_dest, \r_src 70 mov \r_dest+1, \r_src+1 71#endif 72.endm 73 74#if defined (__AVR_HAVE_JMP_CALL__) 75#define XCALL call 76#define XJMP jmp 77#else 78#define XCALL rcall 79#define XJMP rjmp 80#endif 81 82;; Prologue stuff 83 84.macro do_prologue_saves n_pushed n_frame=0 85 ldi r26, lo8(\n_frame) 86 ldi r27, hi8(\n_frame) 87 ldi r30, lo8(gs(.L_prologue_saves.\@)) 88 ldi r31, hi8(gs(.L_prologue_saves.\@)) 89 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2) 90.L_prologue_saves.\@: 91.endm 92 93;; Epilogue stuff 94 95.macro do_epilogue_restores n_pushed n_frame=0 96 in r28, __SP_L__ 97#ifdef __AVR_HAVE_SPH__ 98 in r29, __SP_H__ 99.if \n_frame > 63 100 subi r28, lo8(-\n_frame) 101 sbci r29, hi8(-\n_frame) 102.elseif \n_frame > 0 103 adiw r28, \n_frame 104.endif 105#else 106 clr r29 107.if \n_frame > 0 108 subi r28, lo8(-\n_frame) 109.endif 110#endif /* HAVE SPH */ 111 ldi r30, \n_pushed 112 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2) 113.endm 114 115;; Support function entry and exit for convenience 116 117.macro DEFUN name 118.global \name 119.func \name 120\name: 121.endm 122 123.macro ENDF name 124.size \name, .-\name 125.endfunc 126.endm 127 128.macro FALIAS name 129.global \name 130.func \name 131\name: 132.size \name, .-\name 133.endfunc 134.endm 135 136;; Skip next instruction, typically a jump target 137#define skip cpse 0,0 138 139;; Negate a 2-byte value held in consecutive registers 140.macro NEG2 reg 141 com \reg+1 142 neg \reg 143 sbci \reg+1, -1 144.endm 145 146;; Negate a 4-byte value held in consecutive registers 147;; Sets the V flag for signed overflow tests if REG >= 16 148.macro NEG4 reg 149 com \reg+3 150 com \reg+2 151 com \reg+1 152.if \reg >= 16 153 neg \reg 154 sbci \reg+1, -1 155 sbci \reg+2, -1 156 sbci \reg+3, -1 157.else 158 com \reg 159 adc \reg, __zero_reg__ 160 adc \reg+1, __zero_reg__ 161 adc \reg+2, __zero_reg__ 162 adc \reg+3, __zero_reg__ 163.endif 164.endm 165 166#define exp_lo(N) hlo8 ((N) << 23) 167#define exp_hi(N) hhi8 ((N) << 23) 168 169 170.section .text.libgcc.mul, "ax", @progbits 171 172;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 173/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ 174#if !defined (__AVR_HAVE_MUL__) 175/******************************************************* 176 Multiplication 8 x 8 without MUL 177*******************************************************/ 178#if defined (L_mulqi3) 179 180#define r_arg2 r22 /* multiplicand */ 181#define r_arg1 r24 /* multiplier */ 182#define r_res __tmp_reg__ /* result */ 183 184DEFUN __mulqi3 185 clr r_res ; clear result 186__mulqi3_loop: 187 sbrc r_arg1,0 188 add r_res,r_arg2 189 add r_arg2,r_arg2 ; shift multiplicand 190 breq __mulqi3_exit ; while multiplicand != 0 191 lsr r_arg1 ; 192 brne __mulqi3_loop ; exit if multiplier = 0 193__mulqi3_exit: 194 mov r_arg1,r_res ; result to return register 195 ret 196ENDF __mulqi3 197 198#undef r_arg2 199#undef r_arg1 200#undef r_res 201 202#endif /* defined (L_mulqi3) */ 203 204 205/******************************************************* 206 Widening Multiplication 16 = 8 x 8 without MUL 207 Multiplication 16 x 16 without MUL 208*******************************************************/ 209 210#define A0 r22 211#define A1 r23 212#define B0 r24 213#define BB0 r20 214#define B1 r25 215;; Output overlaps input, thus expand result in CC0/1 216#define C0 r24 217#define C1 r25 218#define CC0 __tmp_reg__ 219#define CC1 R21 220 221#if defined (L_umulqihi3) 222;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24 223;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0 224;;; Clobbers: __tmp_reg__, R21..R23 225DEFUN __umulqihi3 226 clr A1 227 clr B1 228 XJMP __mulhi3 229ENDF __umulqihi3 230#endif /* L_umulqihi3 */ 231 232#if defined (L_mulqihi3) 233;;; R25:R24 = (signed int) R22 * (signed int) R24 234;;; (C1:C0) = (signed int) A0 * (signed int) B0 235;;; Clobbers: __tmp_reg__, R20..R23 236DEFUN __mulqihi3 237 ;; Sign-extend B0 238 clr B1 239 sbrc B0, 7 240 com B1 241 ;; The multiplication runs twice as fast if A1 is zero, thus: 242 ;; Zero-extend A0 243 clr A1 244#ifdef __AVR_HAVE_JMP_CALL__ 245 ;; Store B0 * sign of A 246 clr BB0 247 sbrc A0, 7 248 mov BB0, B0 249 call __mulhi3 250#else /* have no CALL */ 251 ;; Skip sign-extension of A if A >= 0 252 ;; Same size as with the first alternative but avoids errata skip 253 ;; and is faster if A >= 0 254 sbrs A0, 7 255 rjmp __mulhi3 256 ;; If A < 0 store B 257 mov BB0, B0 258 rcall __mulhi3 259#endif /* HAVE_JMP_CALL */ 260 ;; 1-extend A after the multiplication 261 sub C1, BB0 262 ret 263ENDF __mulqihi3 264#endif /* L_mulqihi3 */ 265 266#if defined (L_mulhi3) 267;;; R25:R24 = R23:R22 * R25:R24 268;;; (C1:C0) = (A1:A0) * (B1:B0) 269;;; Clobbers: __tmp_reg__, R21..R23 270DEFUN __mulhi3 271 272 ;; Clear result 273 clr CC0 274 clr CC1 275 rjmp 3f 2761: 277 ;; Bit n of A is 1 --> C += B << n 278 add CC0, B0 279 adc CC1, B1 2802: 281 lsl B0 282 rol B1 2833: 284 ;; If B == 0 we are ready 285 sbiw B0, 0 286 breq 9f 287 288 ;; Carry = n-th bit of A 289 lsr A1 290 ror A0 291 ;; If bit n of A is set, then go add B * 2^n to C 292 brcs 1b 293 294 ;; Carry = 0 --> The ROR above acts like CP A0, 0 295 ;; Thus, it is sufficient to CPC the high part to test A against 0 296 cpc A1, __zero_reg__ 297 ;; Only proceed if A != 0 298 brne 2b 2999: 300 ;; Move Result into place 301 mov C0, CC0 302 mov C1, CC1 303 ret 304ENDF __mulhi3 305#endif /* L_mulhi3 */ 306 307#undef A0 308#undef A1 309#undef B0 310#undef BB0 311#undef B1 312#undef C0 313#undef C1 314#undef CC0 315#undef CC1 316 317 318#define A0 22 319#define A1 A0+1 320#define A2 A0+2 321#define A3 A0+3 322 323#define B0 18 324#define B1 B0+1 325#define B2 B0+2 326#define B3 B0+3 327 328#define CC0 26 329#define CC1 CC0+1 330#define CC2 30 331#define CC3 CC2+1 332 333#define C0 22 334#define C1 C0+1 335#define C2 C0+2 336#define C3 C0+3 337 338/******************************************************* 339 Widening Multiplication 32 = 16 x 16 without MUL 340*******************************************************/ 341 342#if defined (L_umulhisi3) 343DEFUN __umulhisi3 344 wmov B0, 24 345 ;; Zero-extend B 346 clr B2 347 clr B3 348 ;; Zero-extend A 349 wmov A2, B2 350 XJMP __mulsi3 351ENDF __umulhisi3 352#endif /* L_umulhisi3 */ 353 354#if defined (L_mulhisi3) 355DEFUN __mulhisi3 356 wmov B0, 24 357 ;; Sign-extend B 358 lsl r25 359 sbc B2, B2 360 mov B3, B2 361#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 362 ;; Sign-extend A 363 clr A2 364 sbrc A1, 7 365 com A2 366 mov A3, A2 367 XJMP __mulsi3 368#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */ 369 ;; Zero-extend A and __mulsi3 will run at least twice as fast 370 ;; compared to a sign-extended A. 371 clr A2 372 clr A3 373 sbrs A1, 7 374 XJMP __mulsi3 375 ;; If A < 0 then perform the B * 0xffff.... before the 376 ;; very multiplication by initializing the high part of the 377 ;; result CC with -B. 378 wmov CC2, A2 379 sub CC2, B0 380 sbc CC3, B1 381 XJMP __mulsi3_helper 382#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */ 383ENDF __mulhisi3 384#endif /* L_mulhisi3 */ 385 386 387/******************************************************* 388 Multiplication 32 x 32 without MUL 389*******************************************************/ 390 391#if defined (L_mulsi3) 392DEFUN __mulsi3 393 ;; Clear result 394 clr CC2 395 clr CC3 396 ;; FALLTHRU 397ENDF __mulsi3 398 399DEFUN __mulsi3_helper 400 clr CC0 401 clr CC1 402 rjmp 3f 403 4041: ;; If bit n of A is set, then add B * 2^n to the result in CC 405 ;; CC += B 406 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3 407 4082: ;; B <<= 1 409 lsl B0 $ rol B1 $ rol B2 $ rol B3 410 4113: ;; A >>= 1: Carry = n-th bit of A 412 lsr A3 $ ror A2 $ ror A1 $ ror A0 413 414 brcs 1b 415 ;; Only continue if A != 0 416 sbci A1, 0 417 brne 2b 418 sbiw A2, 0 419 brne 2b 420 421 ;; All bits of A are consumed: Copy result to return register C 422 wmov C0, CC0 423 wmov C2, CC2 424 ret 425ENDF __mulsi3_helper 426#endif /* L_mulsi3 */ 427 428#undef A0 429#undef A1 430#undef A2 431#undef A3 432#undef B0 433#undef B1 434#undef B2 435#undef B3 436#undef C0 437#undef C1 438#undef C2 439#undef C3 440#undef CC0 441#undef CC1 442#undef CC2 443#undef CC3 444 445#endif /* !defined (__AVR_HAVE_MUL__) */ 446;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 447 448;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 449#if defined (__AVR_HAVE_MUL__) 450#define A0 26 451#define B0 18 452#define C0 22 453 454#define A1 A0+1 455 456#define B1 B0+1 457#define B2 B0+2 458#define B3 B0+3 459 460#define C1 C0+1 461#define C2 C0+2 462#define C3 C0+3 463 464/******************************************************* 465 Widening Multiplication 32 = 16 x 16 with MUL 466*******************************************************/ 467 468#if defined (L_mulhisi3) 469;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 470;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 471;;; Clobbers: __tmp_reg__ 472DEFUN __mulhisi3 473 XCALL __umulhisi3 474 ;; Sign-extend B 475 tst B1 476 brpl 1f 477 sub C2, A0 478 sbc C3, A1 4791: ;; Sign-extend A 480 XJMP __usmulhisi3_tail 481ENDF __mulhisi3 482#endif /* L_mulhisi3 */ 483 484#if defined (L_usmulhisi3) 485;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 486;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 487;;; Clobbers: __tmp_reg__ 488DEFUN __usmulhisi3 489 XCALL __umulhisi3 490 ;; FALLTHRU 491ENDF __usmulhisi3 492 493DEFUN __usmulhisi3_tail 494 ;; Sign-extend A 495 sbrs A1, 7 496 ret 497 sub C2, B0 498 sbc C3, B1 499 ret 500ENDF __usmulhisi3_tail 501#endif /* L_usmulhisi3 */ 502 503#if defined (L_umulhisi3) 504;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 505;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 506;;; Clobbers: __tmp_reg__ 507DEFUN __umulhisi3 508 mul A0, B0 509 movw C0, r0 510 mul A1, B1 511 movw C2, r0 512 mul A0, B1 513#ifdef __AVR_HAVE_JMP_CALL__ 514 ;; This function is used by many other routines, often multiple times. 515 ;; Therefore, if the flash size is not too limited, avoid the RCALL 516 ;; and inverst 6 Bytes to speed things up. 517 add C1, r0 518 adc C2, r1 519 clr __zero_reg__ 520 adc C3, __zero_reg__ 521#else 522 rcall 1f 523#endif 524 mul A1, B0 5251: add C1, r0 526 adc C2, r1 527 clr __zero_reg__ 528 adc C3, __zero_reg__ 529 ret 530ENDF __umulhisi3 531#endif /* L_umulhisi3 */ 532 533/******************************************************* 534 Widening Multiplication 32 = 16 x 32 with MUL 535*******************************************************/ 536 537#if defined (L_mulshisi3) 538;;; R25:R22 = (signed long) R27:R26 * R21:R18 539;;; (C3:C0) = (signed long) A1:A0 * B3:B0 540;;; Clobbers: __tmp_reg__ 541DEFUN __mulshisi3 542#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 543 ;; Some cores have problem skipping 2-word instruction 544 tst A1 545 brmi __mulohisi3 546#else 547 sbrs A1, 7 548#endif /* __AVR_HAVE_JMP_CALL__ */ 549 XJMP __muluhisi3 550 ;; FALLTHRU 551ENDF __mulshisi3 552 553;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 554;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 555;;; Clobbers: __tmp_reg__ 556DEFUN __mulohisi3 557 XCALL __muluhisi3 558 ;; One-extend R27:R26 (A1:A0) 559 sub C2, B0 560 sbc C3, B1 561 ret 562ENDF __mulohisi3 563#endif /* L_mulshisi3 */ 564 565#if defined (L_muluhisi3) 566;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 567;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 568;;; Clobbers: __tmp_reg__ 569DEFUN __muluhisi3 570 XCALL __umulhisi3 571 mul A0, B3 572 add C3, r0 573 mul A1, B2 574 add C3, r0 575 mul A0, B2 576 add C2, r0 577 adc C3, r1 578 clr __zero_reg__ 579 ret 580ENDF __muluhisi3 581#endif /* L_muluhisi3 */ 582 583/******************************************************* 584 Multiplication 32 x 32 with MUL 585*******************************************************/ 586 587#if defined (L_mulsi3) 588;;; R25:R22 = R25:R22 * R21:R18 589;;; (C3:C0) = C3:C0 * B3:B0 590;;; Clobbers: R26, R27, __tmp_reg__ 591DEFUN __mulsi3 592 movw A0, C0 593 push C2 594 push C3 595 XCALL __muluhisi3 596 pop A1 597 pop A0 598 ;; A1:A0 now contains the high word of A 599 mul A0, B0 600 add C2, r0 601 adc C3, r1 602 mul A0, B1 603 add C3, r0 604 mul A1, B0 605 add C3, r0 606 clr __zero_reg__ 607 ret 608ENDF __mulsi3 609#endif /* L_mulsi3 */ 610 611#undef A0 612#undef A1 613 614#undef B0 615#undef B1 616#undef B2 617#undef B3 618 619#undef C0 620#undef C1 621#undef C2 622#undef C3 623 624#endif /* __AVR_HAVE_MUL__ */ 625 626/******************************************************* 627 Multiplication 24 x 24 with MUL 628*******************************************************/ 629 630#if defined (L_mulpsi3) 631 632;; A[0..2]: In: Multiplicand; Out: Product 633#define A0 22 634#define A1 A0+1 635#define A2 A0+2 636 637;; B[0..2]: In: Multiplier 638#define B0 18 639#define B1 B0+1 640#define B2 B0+2 641 642#if defined (__AVR_HAVE_MUL__) 643 644;; C[0..2]: Expand Result 645#define C0 22 646#define C1 C0+1 647#define C2 C0+2 648 649;; R24:R22 *= R20:R18 650;; Clobbers: r21, r25, r26, r27, __tmp_reg__ 651 652#define AA0 26 653#define AA2 21 654 655DEFUN __mulpsi3 656 wmov AA0, A0 657 mov AA2, A2 658 XCALL __umulhisi3 659 mul AA2, B0 $ add C2, r0 660 mul AA0, B2 $ add C2, r0 661 clr __zero_reg__ 662 ret 663ENDF __mulpsi3 664 665#undef AA2 666#undef AA0 667 668#undef C2 669#undef C1 670#undef C0 671 672#else /* !HAVE_MUL */ 673 674;; C[0..2]: Expand Result 675#define C0 0 676#define C1 C0+1 677#define C2 21 678 679;; R24:R22 *= R20:R18 680;; Clobbers: __tmp_reg__, R18, R19, R20, R21 681 682DEFUN __mulpsi3 683 684 ;; C[] = 0 685 clr __tmp_reg__ 686 clr C2 687 6880: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop 689 LSR B2 $ ror B1 $ ror B0 690 691 ;; If the N-th Bit of B[] was set... 692 brcc 1f 693 694 ;; ...then add A[] * 2^N to the Result C[] 695 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 696 6971: ;; Multiply A[] by 2 698 LSL A0 $ rol A1 $ rol A2 699 700 ;; Loop until B[] is 0 701 subi B0,0 $ sbci B1,0 $ sbci B2,0 702 brne 0b 703 704 ;; Copy C[] to the return Register A[] 705 wmov A0, C0 706 mov A2, C2 707 708 clr __zero_reg__ 709 ret 710ENDF __mulpsi3 711 712#undef C2 713#undef C1 714#undef C0 715 716#endif /* HAVE_MUL */ 717 718#undef B2 719#undef B1 720#undef B0 721 722#undef A2 723#undef A1 724#undef A0 725 726#endif /* L_mulpsi3 */ 727 728#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__) 729 730;; A[0..2]: In: Multiplicand 731#define A0 22 732#define A1 A0+1 733#define A2 A0+2 734 735;; BB: In: Multiplier 736#define BB 25 737 738;; C[0..2]: Result 739#define C0 18 740#define C1 C0+1 741#define C2 C0+2 742 743;; C[] = A[] * sign_extend (BB) 744DEFUN __mulsqipsi3 745 mul A0, BB 746 movw C0, r0 747 mul A2, BB 748 mov C2, r0 749 mul A1, BB 750 add C1, r0 751 adc C2, r1 752 clr __zero_reg__ 753 sbrs BB, 7 754 ret 755 ;; One-extend BB 756 sub C1, A0 757 sbc C2, A1 758 ret 759ENDF __mulsqipsi3 760 761#undef C2 762#undef C1 763#undef C0 764 765#undef BB 766 767#undef A2 768#undef A1 769#undef A0 770 771#endif /* L_mulsqipsi3 && HAVE_MUL */ 772 773/******************************************************* 774 Multiplication 64 x 64 775*******************************************************/ 776 777;; A[] = A[] * B[] 778 779;; A[0..7]: In: Multiplicand 780;; Out: Product 781#define A0 18 782#define A1 A0+1 783#define A2 A0+2 784#define A3 A0+3 785#define A4 A0+4 786#define A5 A0+5 787#define A6 A0+6 788#define A7 A0+7 789 790;; B[0..7]: In: Multiplier 791#define B0 10 792#define B1 B0+1 793#define B2 B0+2 794#define B3 B0+3 795#define B4 B0+4 796#define B5 B0+5 797#define B6 B0+6 798#define B7 B0+7 799 800#if defined (__AVR_HAVE_MUL__) 801 802;; Define C[] for convenience 803;; Notice that parts of C[] overlap A[] respective B[] 804#define C0 16 805#define C1 C0+1 806#define C2 20 807#define C3 C2+1 808#define C4 28 809#define C5 C4+1 810#define C6 C4+2 811#define C7 C4+3 812 813#if defined (L_muldi3) 814 815;; A[] *= B[] 816;; R25:R18 *= R17:R10 817;; Ordinary ABI-Function 818 819DEFUN __muldi3 820 push r29 821 push r28 822 push r17 823 push r16 824 825 ;; Counting in Words, we have to perform a 4 * 4 Multiplication 826 827 ;; 3 * 0 + 0 * 3 828 mul A7,B0 $ $ mov C7,r0 829 mul A0,B7 $ $ add C7,r0 830 mul A6,B1 $ $ add C7,r0 831 mul A6,B0 $ mov C6,r0 $ add C7,r1 832 mul B6,A1 $ $ add C7,r0 833 mul B6,A0 $ add C6,r0 $ adc C7,r1 834 835 ;; 1 * 2 836 mul A2,B4 $ add C6,r0 $ adc C7,r1 837 mul A3,B4 $ $ add C7,r0 838 mul A2,B5 $ $ add C7,r0 839 840 push A5 841 push A4 842 push B1 843 push B0 844 push A3 845 push A2 846 847 ;; 0 * 0 848 wmov 26, B0 849 XCALL __umulhisi3 850 wmov C0, 22 851 wmov C2, 24 852 853 ;; 0 * 2 854 wmov 26, B4 855 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25 856 857 wmov 26, B2 858 ;; 0 * 1 859 XCALL __muldi3_6 860 861 pop A0 862 pop A1 863 ;; 1 * 1 864 wmov 26, B2 865 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 866 867 pop r26 868 pop r27 869 ;; 1 * 0 870 XCALL __muldi3_6 871 872 pop A0 873 pop A1 874 ;; 2 * 0 875 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 876 877 ;; 2 * 1 878 wmov 26, B2 879 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23 880 881 ;; A[] = C[] 882 wmov A0, C0 883 ;; A2 = C2 already 884 wmov A4, C4 885 wmov A6, C6 886 887 clr __zero_reg__ 888 pop r16 889 pop r17 890 pop r28 891 pop r29 892 ret 893ENDF __muldi3 894#endif /* L_muldi3 */ 895 896#if defined (L_muldi3_6) 897;; A helper for some 64-bit multiplications with MUL available 898DEFUN __muldi3_6 899__muldi3_6: 900 XCALL __umulhisi3 901 add C2, 22 902 adc C3, 23 903 adc C4, 24 904 adc C5, 25 905 brcc 0f 906 adiw C6, 1 9070: ret 908ENDF __muldi3_6 909#endif /* L_muldi3_6 */ 910 911#undef C7 912#undef C6 913#undef C5 914#undef C4 915#undef C3 916#undef C2 917#undef C1 918#undef C0 919 920#else /* !HAVE_MUL */ 921 922#if defined (L_muldi3) 923 924#define C0 26 925#define C1 C0+1 926#define C2 C0+2 927#define C3 C0+3 928#define C4 C0+4 929#define C5 C0+5 930#define C6 0 931#define C7 C6+1 932 933#define Loop 9 934 935;; A[] *= B[] 936;; R25:R18 *= R17:R10 937;; Ordinary ABI-Function 938 939DEFUN __muldi3 940 push r29 941 push r28 942 push Loop 943 944 ldi C0, 64 945 mov Loop, C0 946 947 ;; C[] = 0 948 clr __tmp_reg__ 949 wmov C0, 0 950 wmov C2, 0 951 wmov C4, 0 952 9530: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[] 954 ;; where N = 64 - Loop. 955 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished, 956 ;; B[] will have its initial Value again. 957 LSR B7 $ ror B6 $ ror B5 $ ror B4 958 ror B3 $ ror B2 $ ror B1 $ ror B0 959 960 ;; If the N-th Bit of B[] was set then... 961 brcc 1f 962 ;; ...finish Rotation... 963 ori B7, 1 << 7 964 965 ;; ...and add A[] * 2^N to the Result C[] 966 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3 967 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7 968 9691: ;; Multiply A[] by 2 970 LSL A0 $ rol A1 $ rol A2 $ rol A3 971 rol A4 $ rol A5 $ rol A6 $ rol A7 972 973 dec Loop 974 brne 0b 975 976 ;; We expanded the Result in C[] 977 ;; Copy Result to the Return Register A[] 978 wmov A0, C0 979 wmov A2, C2 980 wmov A4, C4 981 wmov A6, C6 982 983 clr __zero_reg__ 984 pop Loop 985 pop r28 986 pop r29 987 ret 988ENDF __muldi3 989 990#undef Loop 991 992#undef C7 993#undef C6 994#undef C5 995#undef C4 996#undef C3 997#undef C2 998#undef C1 999#undef C0 1000 1001#endif /* L_muldi3 */ 1002#endif /* HAVE_MUL */ 1003 1004#undef B7 1005#undef B6 1006#undef B5 1007#undef B4 1008#undef B3 1009#undef B2 1010#undef B1 1011#undef B0 1012 1013#undef A7 1014#undef A6 1015#undef A5 1016#undef A4 1017#undef A3 1018#undef A2 1019#undef A1 1020#undef A0 1021 1022/******************************************************* 1023 Widening Multiplication 64 = 32 x 32 with MUL 1024*******************************************************/ 1025 1026#if defined (__AVR_HAVE_MUL__) 1027#define A0 r22 1028#define A1 r23 1029#define A2 r24 1030#define A3 r25 1031 1032#define B0 r18 1033#define B1 r19 1034#define B2 r20 1035#define B3 r21 1036 1037#define C0 18 1038#define C1 C0+1 1039#define C2 20 1040#define C3 C2+1 1041#define C4 28 1042#define C5 C4+1 1043#define C6 C4+2 1044#define C7 C4+3 1045 1046#if defined (L_umulsidi3) 1047 1048;; Unsigned widening 64 = 32 * 32 Multiplication with MUL 1049 1050;; R18[8] = R22[4] * R18[4] 1051;; 1052;; Ordinary ABI Function, but additionally sets 1053;; X = R20[2] = B2[2] 1054;; Z = R22[2] = A0[2] 1055DEFUN __umulsidi3 1056 clt 1057 ;; FALLTHRU 1058ENDF __umulsidi3 1059 ;; T = sign (A) 1060DEFUN __umulsidi3_helper 1061 push 29 $ push 28 ; Y 1062 wmov 30, A2 1063 ;; Counting in Words, we have to perform 4 Multiplications 1064 ;; 0 * 0 1065 wmov 26, A0 1066 XCALL __umulhisi3 1067 push 23 $ push 22 ; C0 1068 wmov 28, B0 1069 wmov 18, B2 1070 wmov C2, 24 1071 push 27 $ push 26 ; A0 1072 push 19 $ push 18 ; B2 1073 ;; 1074 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y 1075 ;; B2 C2 -- -- -- B0 A2 1076 ;; 1 * 1 1077 wmov 26, 30 ; A2 1078 XCALL __umulhisi3 1079 ;; Sign-extend A. T holds the sign of A 1080 brtc 0f 1081 ;; Subtract B from the high part of the result 1082 sub 22, 28 1083 sbc 23, 29 1084 sbc 24, 18 1085 sbc 25, 19 10860: wmov 18, 28 ;; B0 1087 wmov C4, 22 1088 wmov C6, 24 1089 ;; 1090 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y 1091 ;; B0 C2 -- -- A2 C4 C6 1092 ;; 1093 ;; 1 * 0 1094 XCALL __muldi3_6 1095 ;; 0 * 1 1096 pop 26 $ pop 27 ;; B2 1097 pop 18 $ pop 19 ;; A0 1098 XCALL __muldi3_6 1099 1100 ;; Move result C into place and save A0 in Z 1101 wmov 22, C4 1102 wmov 24, C6 1103 wmov 30, 18 ; A0 1104 pop C0 $ pop C1 1105 1106 ;; Epilogue 1107 pop 28 $ pop 29 ;; Y 1108 ret 1109ENDF __umulsidi3_helper 1110#endif /* L_umulsidi3 */ 1111 1112 1113#if defined (L_mulsidi3) 1114 1115;; Signed widening 64 = 32 * 32 Multiplication 1116;; 1117;; R18[8] = R22[4] * R18[4] 1118;; Ordinary ABI Function 1119DEFUN __mulsidi3 1120 bst A3, 7 1121 sbrs B3, 7 ; Enhanced core has no skip bug 1122 XJMP __umulsidi3_helper 1123 1124 ;; B needs sign-extension 1125 push A3 1126 push A2 1127 XCALL __umulsidi3_helper 1128 ;; A0 survived in Z 1129 sub r22, r30 1130 sbc r23, r31 1131 pop r26 1132 pop r27 1133 sbc r24, r26 1134 sbc r25, r27 1135 ret 1136ENDF __mulsidi3 1137#endif /* L_mulsidi3 */ 1138 1139#undef A0 1140#undef A1 1141#undef A2 1142#undef A3 1143#undef B0 1144#undef B1 1145#undef B2 1146#undef B3 1147#undef C0 1148#undef C1 1149#undef C2 1150#undef C3 1151#undef C4 1152#undef C5 1153#undef C6 1154#undef C7 1155#endif /* HAVE_MUL */ 1156 1157/********************************************************** 1158 Widening Multiplication 64 = 32 x 32 without MUL 1159**********************************************************/ 1160 1161#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__) 1162#define A0 18 1163#define A1 A0+1 1164#define A2 A0+2 1165#define A3 A0+3 1166#define A4 A0+4 1167#define A5 A0+5 1168#define A6 A0+6 1169#define A7 A0+7 1170 1171#define B0 10 1172#define B1 B0+1 1173#define B2 B0+2 1174#define B3 B0+3 1175#define B4 B0+4 1176#define B5 B0+5 1177#define B6 B0+6 1178#define B7 B0+7 1179 1180#define AA0 22 1181#define AA1 AA0+1 1182#define AA2 AA0+2 1183#define AA3 AA0+3 1184 1185#define BB0 18 1186#define BB1 BB0+1 1187#define BB2 BB0+2 1188#define BB3 BB0+3 1189 1190#define Mask r30 1191 1192;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL 1193;; 1194;; R18[8] = R22[4] * R18[4] 1195;; Ordinary ABI Function 1196DEFUN __mulsidi3 1197 set 1198 skip 1199 ;; FALLTHRU 1200ENDF __mulsidi3 1201 1202DEFUN __umulsidi3 1203 clt ; skipped 1204 ;; Save 10 Registers: R10..R17, R28, R29 1205 do_prologue_saves 10 1206 ldi Mask, 0xff 1207 bld Mask, 7 1208 ;; Move B into place... 1209 wmov B0, BB0 1210 wmov B2, BB2 1211 ;; ...and extend it 1212 and BB3, Mask 1213 lsl BB3 1214 sbc B4, B4 1215 mov B5, B4 1216 wmov B6, B4 1217 ;; Move A into place... 1218 wmov A0, AA0 1219 wmov A2, AA2 1220 ;; ...and extend it 1221 and AA3, Mask 1222 lsl AA3 1223 sbc A4, A4 1224 mov A5, A4 1225 wmov A6, A4 1226 XCALL __muldi3 1227 do_epilogue_restores 10 1228ENDF __umulsidi3 1229 1230#undef A0 1231#undef A1 1232#undef A2 1233#undef A3 1234#undef A4 1235#undef A5 1236#undef A6 1237#undef A7 1238#undef B0 1239#undef B1 1240#undef B2 1241#undef B3 1242#undef B4 1243#undef B5 1244#undef B6 1245#undef B7 1246#undef AA0 1247#undef AA1 1248#undef AA2 1249#undef AA3 1250#undef BB0 1251#undef BB1 1252#undef BB2 1253#undef BB3 1254#undef Mask 1255#endif /* L_mulsidi3 && !HAVE_MUL */ 1256 1257;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1258 1259 1260.section .text.libgcc.div, "ax", @progbits 1261 1262/******************************************************* 1263 Division 8 / 8 => (result + remainder) 1264*******************************************************/ 1265#define r_rem r25 /* remainder */ 1266#define r_arg1 r24 /* dividend, quotient */ 1267#define r_arg2 r22 /* divisor */ 1268#define r_cnt r23 /* loop count */ 1269 1270#if defined (L_udivmodqi4) 1271DEFUN __udivmodqi4 1272 sub r_rem,r_rem ; clear remainder and carry 1273 ldi r_cnt,9 ; init loop counter 1274 rjmp __udivmodqi4_ep ; jump to entry point 1275__udivmodqi4_loop: 1276 rol r_rem ; shift dividend into remainder 1277 cp r_rem,r_arg2 ; compare remainder & divisor 1278 brcs __udivmodqi4_ep ; remainder <= divisor 1279 sub r_rem,r_arg2 ; restore remainder 1280__udivmodqi4_ep: 1281 rol r_arg1 ; shift dividend (with CARRY) 1282 dec r_cnt ; decrement loop counter 1283 brne __udivmodqi4_loop 1284 com r_arg1 ; complement result 1285 ; because C flag was complemented in loop 1286 ret 1287ENDF __udivmodqi4 1288#endif /* defined (L_udivmodqi4) */ 1289 1290#if defined (L_divmodqi4) 1291DEFUN __divmodqi4 1292 bst r_arg1,7 ; store sign of dividend 1293 mov __tmp_reg__,r_arg1 1294 eor __tmp_reg__,r_arg2; r0.7 is sign of result 1295 sbrc r_arg1,7 1296 neg r_arg1 ; dividend negative : negate 1297 sbrc r_arg2,7 1298 neg r_arg2 ; divisor negative : negate 1299 XCALL __udivmodqi4 ; do the unsigned div/mod 1300 brtc __divmodqi4_1 1301 neg r_rem ; correct remainder sign 1302__divmodqi4_1: 1303 sbrc __tmp_reg__,7 1304 neg r_arg1 ; correct result sign 1305__divmodqi4_exit: 1306 ret 1307ENDF __divmodqi4 1308#endif /* defined (L_divmodqi4) */ 1309 1310#undef r_rem 1311#undef r_arg1 1312#undef r_arg2 1313#undef r_cnt 1314 1315 1316/******************************************************* 1317 Division 16 / 16 => (result + remainder) 1318*******************************************************/ 1319#define r_remL r26 /* remainder Low */ 1320#define r_remH r27 /* remainder High */ 1321 1322/* return: remainder */ 1323#define r_arg1L r24 /* dividend Low */ 1324#define r_arg1H r25 /* dividend High */ 1325 1326/* return: quotient */ 1327#define r_arg2L r22 /* divisor Low */ 1328#define r_arg2H r23 /* divisor High */ 1329 1330#define r_cnt r21 /* loop count */ 1331 1332#if defined (L_udivmodhi4) 1333DEFUN __udivmodhi4 1334 sub r_remL,r_remL 1335 sub r_remH,r_remH ; clear remainder and carry 1336 ldi r_cnt,17 ; init loop counter 1337 rjmp __udivmodhi4_ep ; jump to entry point 1338__udivmodhi4_loop: 1339 rol r_remL ; shift dividend into remainder 1340 rol r_remH 1341 cp r_remL,r_arg2L ; compare remainder & divisor 1342 cpc r_remH,r_arg2H 1343 brcs __udivmodhi4_ep ; remainder < divisor 1344 sub r_remL,r_arg2L ; restore remainder 1345 sbc r_remH,r_arg2H 1346__udivmodhi4_ep: 1347 rol r_arg1L ; shift dividend (with CARRY) 1348 rol r_arg1H 1349 dec r_cnt ; decrement loop counter 1350 brne __udivmodhi4_loop 1351 com r_arg1L 1352 com r_arg1H 1353; div/mod results to return registers, as for the div() function 1354 mov_l r_arg2L, r_arg1L ; quotient 1355 mov_h r_arg2H, r_arg1H 1356 mov_l r_arg1L, r_remL ; remainder 1357 mov_h r_arg1H, r_remH 1358 ret 1359ENDF __udivmodhi4 1360#endif /* defined (L_udivmodhi4) */ 1361 1362#if defined (L_divmodhi4) 1363DEFUN __divmodhi4 1364 .global _div 1365_div: 1366 bst r_arg1H,7 ; store sign of dividend 1367 mov __tmp_reg__,r_arg2H 1368 brtc 0f 1369 com __tmp_reg__ ; r0.7 is sign of result 1370 rcall __divmodhi4_neg1 ; dividend negative: negate 13710: 1372 sbrc r_arg2H,7 1373 rcall __divmodhi4_neg2 ; divisor negative: negate 1374 XCALL __udivmodhi4 ; do the unsigned div/mod 1375 sbrc __tmp_reg__,7 1376 rcall __divmodhi4_neg2 ; correct remainder sign 1377 brtc __divmodhi4_exit 1378__divmodhi4_neg1: 1379 ;; correct dividend/remainder sign 1380 com r_arg1H 1381 neg r_arg1L 1382 sbci r_arg1H,0xff 1383 ret 1384__divmodhi4_neg2: 1385 ;; correct divisor/result sign 1386 com r_arg2H 1387 neg r_arg2L 1388 sbci r_arg2H,0xff 1389__divmodhi4_exit: 1390 ret 1391ENDF __divmodhi4 1392#endif /* defined (L_divmodhi4) */ 1393 1394#undef r_remH 1395#undef r_remL 1396 1397#undef r_arg1H 1398#undef r_arg1L 1399 1400#undef r_arg2H 1401#undef r_arg2L 1402 1403#undef r_cnt 1404 1405/******************************************************* 1406 Division 24 / 24 => (result + remainder) 1407*******************************************************/ 1408 1409;; A[0..2]: In: Dividend; Out: Quotient 1410#define A0 22 1411#define A1 A0+1 1412#define A2 A0+2 1413 1414;; B[0..2]: In: Divisor; Out: Remainder 1415#define B0 18 1416#define B1 B0+1 1417#define B2 B0+2 1418 1419;; C[0..2]: Expand remainder 1420#define C0 __zero_reg__ 1421#define C1 26 1422#define C2 25 1423 1424;; Loop counter 1425#define r_cnt 21 1426 1427#if defined (L_udivmodpsi4) 1428;; R24:R22 = R24:R22 udiv R20:R18 1429;; R20:R18 = R24:R22 umod R20:R18 1430;; Clobbers: R21, R25, R26 1431 1432DEFUN __udivmodpsi4 1433 ; init loop counter 1434 ldi r_cnt, 24+1 1435 ; Clear remainder and carry. C0 is already 0 1436 clr C1 1437 sub C2, C2 1438 ; jump to entry point 1439 rjmp __udivmodpsi4_start 1440__udivmodpsi4_loop: 1441 ; shift dividend into remainder 1442 rol C0 1443 rol C1 1444 rol C2 1445 ; compare remainder & divisor 1446 cp C0, B0 1447 cpc C1, B1 1448 cpc C2, B2 1449 brcs __udivmodpsi4_start ; remainder <= divisor 1450 sub C0, B0 ; restore remainder 1451 sbc C1, B1 1452 sbc C2, B2 1453__udivmodpsi4_start: 1454 ; shift dividend (with CARRY) 1455 rol A0 1456 rol A1 1457 rol A2 1458 ; decrement loop counter 1459 dec r_cnt 1460 brne __udivmodpsi4_loop 1461 com A0 1462 com A1 1463 com A2 1464 ; div/mod results to return registers 1465 ; remainder 1466 mov B0, C0 1467 mov B1, C1 1468 mov B2, C2 1469 clr __zero_reg__ ; C0 1470 ret 1471ENDF __udivmodpsi4 1472#endif /* defined (L_udivmodpsi4) */ 1473 1474#if defined (L_divmodpsi4) 1475;; R24:R22 = R24:R22 div R20:R18 1476;; R20:R18 = R24:R22 mod R20:R18 1477;; Clobbers: T, __tmp_reg__, R21, R25, R26 1478 1479DEFUN __divmodpsi4 1480 ; R0.7 will contain the sign of the result: 1481 ; R0.7 = A.sign ^ B.sign 1482 mov __tmp_reg__, B2 1483 ; T-flag = sign of dividend 1484 bst A2, 7 1485 brtc 0f 1486 com __tmp_reg__ 1487 ; Adjust dividend's sign 1488 rcall __divmodpsi4_negA 14890: 1490 ; Adjust divisor's sign 1491 sbrc B2, 7 1492 rcall __divmodpsi4_negB 1493 1494 ; Do the unsigned div/mod 1495 XCALL __udivmodpsi4 1496 1497 ; Adjust quotient's sign 1498 sbrc __tmp_reg__, 7 1499 rcall __divmodpsi4_negA 1500 1501 ; Adjust remainder's sign 1502 brtc __divmodpsi4_end 1503 1504__divmodpsi4_negB: 1505 ; Correct divisor/remainder sign 1506 com B2 1507 com B1 1508 neg B0 1509 sbci B1, -1 1510 sbci B2, -1 1511 ret 1512 1513 ; Correct dividend/quotient sign 1514__divmodpsi4_negA: 1515 com A2 1516 com A1 1517 neg A0 1518 sbci A1, -1 1519 sbci A2, -1 1520__divmodpsi4_end: 1521 ret 1522 1523ENDF __divmodpsi4 1524#endif /* defined (L_divmodpsi4) */ 1525 1526#undef A0 1527#undef A1 1528#undef A2 1529 1530#undef B0 1531#undef B1 1532#undef B2 1533 1534#undef C0 1535#undef C1 1536#undef C2 1537 1538#undef r_cnt 1539 1540/******************************************************* 1541 Division 32 / 32 => (result + remainder) 1542*******************************************************/ 1543#define r_remHH r31 /* remainder High */ 1544#define r_remHL r30 1545#define r_remH r27 1546#define r_remL r26 /* remainder Low */ 1547 1548/* return: remainder */ 1549#define r_arg1HH r25 /* dividend High */ 1550#define r_arg1HL r24 1551#define r_arg1H r23 1552#define r_arg1L r22 /* dividend Low */ 1553 1554/* return: quotient */ 1555#define r_arg2HH r21 /* divisor High */ 1556#define r_arg2HL r20 1557#define r_arg2H r19 1558#define r_arg2L r18 /* divisor Low */ 1559 1560#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ 1561 1562#if defined (L_udivmodsi4) 1563DEFUN __udivmodsi4 1564 ldi r_remL, 33 ; init loop counter 1565 mov r_cnt, r_remL 1566 sub r_remL,r_remL 1567 sub r_remH,r_remH ; clear remainder and carry 1568 mov_l r_remHL, r_remL 1569 mov_h r_remHH, r_remH 1570 rjmp __udivmodsi4_ep ; jump to entry point 1571__udivmodsi4_loop: 1572 rol r_remL ; shift dividend into remainder 1573 rol r_remH 1574 rol r_remHL 1575 rol r_remHH 1576 cp r_remL,r_arg2L ; compare remainder & divisor 1577 cpc r_remH,r_arg2H 1578 cpc r_remHL,r_arg2HL 1579 cpc r_remHH,r_arg2HH 1580 brcs __udivmodsi4_ep ; remainder <= divisor 1581 sub r_remL,r_arg2L ; restore remainder 1582 sbc r_remH,r_arg2H 1583 sbc r_remHL,r_arg2HL 1584 sbc r_remHH,r_arg2HH 1585__udivmodsi4_ep: 1586 rol r_arg1L ; shift dividend (with CARRY) 1587 rol r_arg1H 1588 rol r_arg1HL 1589 rol r_arg1HH 1590 dec r_cnt ; decrement loop counter 1591 brne __udivmodsi4_loop 1592 ; __zero_reg__ now restored (r_cnt == 0) 1593 com r_arg1L 1594 com r_arg1H 1595 com r_arg1HL 1596 com r_arg1HH 1597; div/mod results to return registers, as for the ldiv() function 1598 mov_l r_arg2L, r_arg1L ; quotient 1599 mov_h r_arg2H, r_arg1H 1600 mov_l r_arg2HL, r_arg1HL 1601 mov_h r_arg2HH, r_arg1HH 1602 mov_l r_arg1L, r_remL ; remainder 1603 mov_h r_arg1H, r_remH 1604 mov_l r_arg1HL, r_remHL 1605 mov_h r_arg1HH, r_remHH 1606 ret 1607ENDF __udivmodsi4 1608#endif /* defined (L_udivmodsi4) */ 1609 1610#if defined (L_divmodsi4) 1611DEFUN __divmodsi4 1612 mov __tmp_reg__,r_arg2HH 1613 bst r_arg1HH,7 ; store sign of dividend 1614 brtc 0f 1615 com __tmp_reg__ ; r0.7 is sign of result 1616 XCALL __negsi2 ; dividend negative: negate 16170: 1618 sbrc r_arg2HH,7 1619 rcall __divmodsi4_neg2 ; divisor negative: negate 1620 XCALL __udivmodsi4 ; do the unsigned div/mod 1621 sbrc __tmp_reg__, 7 ; correct quotient sign 1622 rcall __divmodsi4_neg2 1623 brtc __divmodsi4_exit ; correct remainder sign 1624 XJMP __negsi2 1625__divmodsi4_neg2: 1626 ;; correct divisor/quotient sign 1627 com r_arg2HH 1628 com r_arg2HL 1629 com r_arg2H 1630 neg r_arg2L 1631 sbci r_arg2H,0xff 1632 sbci r_arg2HL,0xff 1633 sbci r_arg2HH,0xff 1634__divmodsi4_exit: 1635 ret 1636ENDF __divmodsi4 1637#endif /* defined (L_divmodsi4) */ 1638 1639#if defined (L_negsi2) 1640;; (set (reg:SI 22) 1641;; (neg:SI (reg:SI 22))) 1642;; Sets the V flag for signed overflow tests 1643DEFUN __negsi2 1644 NEG4 22 1645 ret 1646ENDF __negsi2 1647#endif /* L_negsi2 */ 1648 1649#undef r_remHH 1650#undef r_remHL 1651#undef r_remH 1652#undef r_remL 1653#undef r_arg1HH 1654#undef r_arg1HL 1655#undef r_arg1H 1656#undef r_arg1L 1657#undef r_arg2HH 1658#undef r_arg2HL 1659#undef r_arg2H 1660#undef r_arg2L 1661#undef r_cnt 1662 1663/******************************************************* 1664 Division 64 / 64 1665 Modulo 64 % 64 1666*******************************************************/ 1667 1668;; Use Speed-optimized Version on "big" Devices, i.e. Devices with 1669;; at least 16k of Program Memory. For smaller Devices, depend 1670;; on MOVW and SP Size. There is a Connexion between SP Size and 1671;; Flash Size so that SP Size can be used to test for Flash Size. 1672 1673#if defined (__AVR_HAVE_JMP_CALL__) 1674# define SPEED_DIV 8 1675#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__) 1676# define SPEED_DIV 16 1677#else 1678# define SPEED_DIV 0 1679#endif 1680 1681;; A[0..7]: In: Dividend; 1682;; Out: Quotient (T = 0) 1683;; Out: Remainder (T = 1) 1684#define A0 18 1685#define A1 A0+1 1686#define A2 A0+2 1687#define A3 A0+3 1688#define A4 A0+4 1689#define A5 A0+5 1690#define A6 A0+6 1691#define A7 A0+7 1692 1693;; B[0..7]: In: Divisor; Out: Clobber 1694#define B0 10 1695#define B1 B0+1 1696#define B2 B0+2 1697#define B3 B0+3 1698#define B4 B0+4 1699#define B5 B0+5 1700#define B6 B0+6 1701#define B7 B0+7 1702 1703;; C[0..7]: Expand remainder; Out: Remainder (unused) 1704#define C0 8 1705#define C1 C0+1 1706#define C2 30 1707#define C3 C2+1 1708#define C4 28 1709#define C5 C4+1 1710#define C6 26 1711#define C7 C6+1 1712 1713;; Holds Signs during Division Routine 1714#define SS __tmp_reg__ 1715 1716;; Bit-Counter in Division Routine 1717#define R_cnt __zero_reg__ 1718 1719;; Scratch Register for Negation 1720#define NN r31 1721 1722#if defined (L_udivdi3) 1723 1724;; R25:R18 = R24:R18 umod R17:R10 1725;; Ordinary ABI-Function 1726 1727DEFUN __umoddi3 1728 set 1729 rjmp __udivdi3_umoddi3 1730ENDF __umoddi3 1731 1732;; R25:R18 = R24:R18 udiv R17:R10 1733;; Ordinary ABI-Function 1734 1735DEFUN __udivdi3 1736 clt 1737ENDF __udivdi3 1738 1739DEFUN __udivdi3_umoddi3 1740 push C0 1741 push C1 1742 push C4 1743 push C5 1744 XCALL __udivmod64 1745 pop C5 1746 pop C4 1747 pop C1 1748 pop C0 1749 ret 1750ENDF __udivdi3_umoddi3 1751#endif /* L_udivdi3 */ 1752 1753#if defined (L_udivmod64) 1754 1755;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation 1756;; No Registers saved/restored; the Callers will take Care. 1757;; Preserves B[] and T-flag 1758;; T = 0: Compute Quotient in A[] 1759;; T = 1: Compute Remainder in A[] and shift SS one Bit left 1760 1761DEFUN __udivmod64 1762 1763 ;; Clear Remainder (C6, C7 will follow) 1764 clr C0 1765 clr C1 1766 wmov C2, C0 1767 wmov C4, C0 1768 ldi C7, 64 1769 1770#if SPEED_DIV == 0 || SPEED_DIV == 16 1771 ;; Initialize Loop-Counter 1772 mov R_cnt, C7 1773 wmov C6, C0 1774#endif /* SPEED_DIV */ 1775 1776#if SPEED_DIV == 8 1777 1778 push A7 1779 clr C6 1780 17811: ;; Compare shifted Devidend against Divisor 1782 ;; If -- even after Shifting -- it is smaller... 1783 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3 1784 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7 1785 brcc 2f 1786 1787 ;; ...then we can subtract it. Thus, it is legal to shift left 1788 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3 1789 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7 1790 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3 1791 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0 1792 1793 ;; 8 Bits are done 1794 subi C7, 8 1795 brne 1b 1796 1797 ;; Shifted 64 Bits: A7 has traveled to C7 1798 pop C7 1799 ;; Divisor is greater than Dividend. We have: 1800 ;; A[] % B[] = A[] 1801 ;; A[] / B[] = 0 1802 ;; Thus, we can return immediately 1803 rjmp 5f 1804 18052: ;; Initialze Bit-Counter with Number of Bits still to be performed 1806 mov R_cnt, C7 1807 1808 ;; Push of A7 is not needed because C7 is still 0 1809 pop C7 1810 clr C7 1811 1812#elif SPEED_DIV == 16 1813 1814 ;; Compare shifted Dividend against Divisor 1815 cp A7, B3 1816 cpc C0, B4 1817 cpc C1, B5 1818 cpc C2, B6 1819 cpc C3, B7 1820 brcc 2f 1821 1822 ;; Divisor is greater than shifted Dividen: We can shift the Dividend 1823 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk 1824 wmov C2,A6 $ wmov C0,A4 1825 wmov A6,A2 $ wmov A4,A0 1826 wmov A2,C6 $ wmov A0,C4 1827 1828 ;; Set Bit Counter to 32 1829 lsr R_cnt 18302: 1831#elif SPEED_DIV 1832#error SPEED_DIV = ? 1833#endif /* SPEED_DIV */ 1834 1835;; The very Division + Remainder Routine 1836 18373: ;; Left-shift Dividend... 1838 lsl A0 $ rol A1 $ rol A2 $ rol A3 1839 rol A4 $ rol A5 $ rol A6 $ rol A7 1840 1841 ;; ...into Remainder 1842 rol C0 $ rol C1 $ rol C2 $ rol C3 1843 rol C4 $ rol C5 $ rol C6 $ rol C7 1844 1845 ;; Compare Remainder and Divisor 1846 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3 1847 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7 1848 1849 brcs 4f 1850 1851 ;; Divisor fits into Remainder: Subtract it from Remainder... 1852 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3 1853 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7 1854 1855 ;; ...and set according Bit in the upcoming Quotient 1856 ;; The Bit will travel to its final Position 1857 ori A0, 1 1858 18594: ;; This Bit is done 1860 dec R_cnt 1861 brne 3b 1862 ;; __zero_reg__ is 0 again 1863 1864 ;; T = 0: We are fine with the Quotient in A[] 1865 ;; T = 1: Copy Remainder to A[] 18665: brtc 6f 1867 wmov A0, C0 1868 wmov A2, C2 1869 wmov A4, C4 1870 wmov A6, C6 1871 ;; Move the Sign of the Result to SS.7 1872 lsl SS 1873 18746: ret 1875 1876ENDF __udivmod64 1877#endif /* L_udivmod64 */ 1878 1879 1880#if defined (L_divdi3) 1881 1882;; R25:R18 = R24:R18 mod R17:R10 1883;; Ordinary ABI-Function 1884 1885DEFUN __moddi3 1886 set 1887 rjmp __divdi3_moddi3 1888ENDF __moddi3 1889 1890;; R25:R18 = R24:R18 div R17:R10 1891;; Ordinary ABI-Function 1892 1893DEFUN __divdi3 1894 clt 1895ENDF __divdi3 1896 1897DEFUN __divdi3_moddi3 1898#if SPEED_DIV 1899 mov r31, A7 1900 or r31, B7 1901 brmi 0f 1902 ;; Both Signs are 0: the following Complexitiy is not needed 1903 XJMP __udivdi3_umoddi3 1904#endif /* SPEED_DIV */ 1905 19060: ;; The Prologue 1907 ;; Save 12 Registers: Y, 17...8 1908 ;; No Frame needed 1909 do_prologue_saves 12 1910 1911 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign) 1912 ;; SS.6 will contain the Sign of the Remainder (A.sign) 1913 mov SS, A7 1914 asr SS 1915 ;; Adjust Dividend's Sign as needed 1916#if SPEED_DIV 1917 ;; Compiling for Speed we know that at least one Sign must be < 0 1918 ;; Thus, if A[] >= 0 then we know B[] < 0 1919 brpl 22f 1920#else 1921 brpl 21f 1922#endif /* SPEED_DIV */ 1923 1924 XCALL __negdi2 1925 1926 ;; Adjust Divisor's Sign and SS.7 as needed 192721: tst B7 1928 brpl 3f 192922: ldi NN, 1 << 7 1930 eor SS, NN 1931 1932 ldi NN, -1 1933 com B4 $ com B5 $ com B6 $ com B7 1934 $ com B1 $ com B2 $ com B3 1935 NEG B0 1936 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN 1937 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN 1938 19393: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag) 1940 XCALL __udivmod64 1941 1942 ;; Adjust Result's Sign 1943#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 1944 tst SS 1945 brpl 4f 1946#else 1947 sbrc SS, 7 1948#endif /* __AVR_HAVE_JMP_CALL__ */ 1949 XCALL __negdi2 1950 19514: ;; Epilogue: Restore 12 Registers and return 1952 do_epilogue_restores 12 1953 1954ENDF __divdi3_moddi3 1955 1956#endif /* L_divdi3 */ 1957 1958#undef R_cnt 1959#undef SS 1960#undef NN 1961 1962.section .text.libgcc, "ax", @progbits 1963 1964#define TT __tmp_reg__ 1965 1966#if defined (L_adddi3) 1967;; (set (reg:DI 18) 1968;; (plus:DI (reg:DI 18) 1969;; (reg:DI 10))) 1970;; Sets the V flag for signed overflow tests 1971;; Sets the C flag for unsigned overflow tests 1972DEFUN __adddi3 1973 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3 1974 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7 1975 ret 1976ENDF __adddi3 1977#endif /* L_adddi3 */ 1978 1979#if defined (L_adddi3_s8) 1980;; (set (reg:DI 18) 1981;; (plus:DI (reg:DI 18) 1982;; (sign_extend:SI (reg:QI 26)))) 1983;; Sets the V flag for signed overflow tests 1984;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128 1985DEFUN __adddi3_s8 1986 clr TT 1987 sbrc r26, 7 1988 com TT 1989 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT 1990 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT 1991 ret 1992ENDF __adddi3_s8 1993#endif /* L_adddi3_s8 */ 1994 1995#if defined (L_subdi3) 1996;; (set (reg:DI 18) 1997;; (minus:DI (reg:DI 18) 1998;; (reg:DI 10))) 1999;; Sets the V flag for signed overflow tests 2000;; Sets the C flag for unsigned overflow tests 2001DEFUN __subdi3 2002 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3 2003 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7 2004 ret 2005ENDF __subdi3 2006#endif /* L_subdi3 */ 2007 2008#if defined (L_cmpdi2) 2009;; (set (cc0) 2010;; (compare (reg:DI 18) 2011;; (reg:DI 10))) 2012DEFUN __cmpdi2 2013 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3 2014 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7 2015 ret 2016ENDF __cmpdi2 2017#endif /* L_cmpdi2 */ 2018 2019#if defined (L_cmpdi2_s8) 2020;; (set (cc0) 2021;; (compare (reg:DI 18) 2022;; (sign_extend:SI (reg:QI 26)))) 2023DEFUN __cmpdi2_s8 2024 clr TT 2025 sbrc r26, 7 2026 com TT 2027 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT 2028 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT 2029 ret 2030ENDF __cmpdi2_s8 2031#endif /* L_cmpdi2_s8 */ 2032 2033#if defined (L_negdi2) 2034;; (set (reg:DI 18) 2035;; (neg:DI (reg:DI 18))) 2036;; Sets the V flag for signed overflow tests 2037DEFUN __negdi2 2038 2039 com A4 $ com A5 $ com A6 $ com A7 2040 $ com A1 $ com A2 $ com A3 2041 NEG A0 2042 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1 2043 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1 2044 ret 2045 2046ENDF __negdi2 2047#endif /* L_negdi2 */ 2048 2049#undef TT 2050 2051#undef C7 2052#undef C6 2053#undef C5 2054#undef C4 2055#undef C3 2056#undef C2 2057#undef C1 2058#undef C0 2059 2060#undef B7 2061#undef B6 2062#undef B5 2063#undef B4 2064#undef B3 2065#undef B2 2066#undef B1 2067#undef B0 2068 2069#undef A7 2070#undef A6 2071#undef A5 2072#undef A4 2073#undef A3 2074#undef A2 2075#undef A1 2076#undef A0 2077 2078 2079.section .text.libgcc.prologue, "ax", @progbits 2080 2081/********************************** 2082 * This is a prologue subroutine 2083 **********************************/ 2084#if defined (L_prologue) 2085 2086;; This function does not clobber T-flag; 64-bit division relies on it 2087DEFUN __prologue_saves__ 2088 push r2 2089 push r3 2090 push r4 2091 push r5 2092 push r6 2093 push r7 2094 push r8 2095 push r9 2096 push r10 2097 push r11 2098 push r12 2099 push r13 2100 push r14 2101 push r15 2102 push r16 2103 push r17 2104 push r28 2105 push r29 2106#if !defined (__AVR_HAVE_SPH__) 2107 in r28,__SP_L__ 2108 sub r28,r26 2109 out __SP_L__,r28 2110 clr r29 2111#elif defined (__AVR_XMEGA__) 2112 in r28,__SP_L__ 2113 in r29,__SP_H__ 2114 sub r28,r26 2115 sbc r29,r27 2116 out __SP_L__,r28 2117 out __SP_H__,r29 2118#else 2119 in r28,__SP_L__ 2120 in r29,__SP_H__ 2121 sub r28,r26 2122 sbc r29,r27 2123 in __tmp_reg__,__SREG__ 2124 cli 2125 out __SP_H__,r29 2126 out __SREG__,__tmp_reg__ 2127 out __SP_L__,r28 2128#endif /* #SP = 8/16 */ 2129 2130#if defined (__AVR_HAVE_EIJMP_EICALL__) 2131 eijmp 2132#else 2133 ijmp 2134#endif 2135 2136ENDF __prologue_saves__ 2137#endif /* defined (L_prologue) */ 2138 2139/* 2140 * This is an epilogue subroutine 2141 */ 2142#if defined (L_epilogue) 2143 2144DEFUN __epilogue_restores__ 2145 ldd r2,Y+18 2146 ldd r3,Y+17 2147 ldd r4,Y+16 2148 ldd r5,Y+15 2149 ldd r6,Y+14 2150 ldd r7,Y+13 2151 ldd r8,Y+12 2152 ldd r9,Y+11 2153 ldd r10,Y+10 2154 ldd r11,Y+9 2155 ldd r12,Y+8 2156 ldd r13,Y+7 2157 ldd r14,Y+6 2158 ldd r15,Y+5 2159 ldd r16,Y+4 2160 ldd r17,Y+3 2161 ldd r26,Y+2 2162#if !defined (__AVR_HAVE_SPH__) 2163 ldd r29,Y+1 2164 add r28,r30 2165 out __SP_L__,r28 2166 mov r28, r26 2167#elif defined (__AVR_XMEGA__) 2168 ldd r27,Y+1 2169 add r28,r30 2170 adc r29,__zero_reg__ 2171 out __SP_L__,r28 2172 out __SP_H__,r29 2173 wmov 28, 26 2174#else 2175 ldd r27,Y+1 2176 add r28,r30 2177 adc r29,__zero_reg__ 2178 in __tmp_reg__,__SREG__ 2179 cli 2180 out __SP_H__,r29 2181 out __SREG__,__tmp_reg__ 2182 out __SP_L__,r28 2183 mov_l r28, r26 2184 mov_h r29, r27 2185#endif /* #SP = 8/16 */ 2186 ret 2187ENDF __epilogue_restores__ 2188#endif /* defined (L_epilogue) */ 2189 2190#ifdef L_exit 2191 .section .fini9,"ax",@progbits 2192DEFUN _exit 2193 .weak exit 2194exit: 2195ENDF _exit 2196 2197 /* Code from .fini8 ... .fini1 sections inserted by ld script. */ 2198 2199 .section .fini0,"ax",@progbits 2200 cli 2201__stop_program: 2202 rjmp __stop_program 2203#endif /* defined (L_exit) */ 2204 2205#ifdef L_cleanup 2206 .weak _cleanup 2207 .func _cleanup 2208_cleanup: 2209 ret 2210.endfunc 2211#endif /* defined (L_cleanup) */ 2212 2213 2214.section .text.libgcc, "ax", @progbits 2215 2216#ifdef L_tablejump 2217DEFUN __tablejump2__ 2218 lsl r30 2219 rol r31 2220 ;; FALLTHRU 2221ENDF __tablejump2__ 2222 2223DEFUN __tablejump__ 2224#if defined (__AVR_HAVE_LPMX__) 2225 lpm __tmp_reg__, Z+ 2226 lpm r31, Z 2227 mov r30, __tmp_reg__ 2228#if defined (__AVR_HAVE_EIJMP_EICALL__) 2229 eijmp 2230#else 2231 ijmp 2232#endif 2233 2234#else /* !HAVE_LPMX */ 2235 lpm 2236 adiw r30, 1 2237 push r0 2238 lpm 2239 push r0 2240#if defined (__AVR_HAVE_EIJMP_EICALL__) 2241 in __tmp_reg__, __EIND__ 2242 push __tmp_reg__ 2243#endif 2244 ret 2245#endif /* !HAVE_LPMX */ 2246ENDF __tablejump__ 2247#endif /* defined (L_tablejump) */ 2248 2249#ifdef L_copy_data 2250 .section .init4,"ax",@progbits 2251DEFUN __do_copy_data 2252#if defined(__AVR_HAVE_ELPMX__) 2253 ldi r17, hi8(__data_end) 2254 ldi r26, lo8(__data_start) 2255 ldi r27, hi8(__data_start) 2256 ldi r30, lo8(__data_load_start) 2257 ldi r31, hi8(__data_load_start) 2258 ldi r16, hh8(__data_load_start) 2259 out __RAMPZ__, r16 2260 rjmp .L__do_copy_data_start 2261.L__do_copy_data_loop: 2262 elpm r0, Z+ 2263 st X+, r0 2264.L__do_copy_data_start: 2265 cpi r26, lo8(__data_end) 2266 cpc r27, r17 2267 brne .L__do_copy_data_loop 2268#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) 2269 ldi r17, hi8(__data_end) 2270 ldi r26, lo8(__data_start) 2271 ldi r27, hi8(__data_start) 2272 ldi r30, lo8(__data_load_start) 2273 ldi r31, hi8(__data_load_start) 2274 ldi r16, hh8(__data_load_start - 0x10000) 2275.L__do_copy_data_carry: 2276 inc r16 2277 out __RAMPZ__, r16 2278 rjmp .L__do_copy_data_start 2279.L__do_copy_data_loop: 2280 elpm 2281 st X+, r0 2282 adiw r30, 1 2283 brcs .L__do_copy_data_carry 2284.L__do_copy_data_start: 2285 cpi r26, lo8(__data_end) 2286 cpc r27, r17 2287 brne .L__do_copy_data_loop 2288#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) 2289 ldi r17, hi8(__data_end) 2290 ldi r26, lo8(__data_start) 2291 ldi r27, hi8(__data_start) 2292 ldi r30, lo8(__data_load_start) 2293 ldi r31, hi8(__data_load_start) 2294 rjmp .L__do_copy_data_start 2295.L__do_copy_data_loop: 2296#if defined (__AVR_HAVE_LPMX__) 2297 lpm r0, Z+ 2298#else 2299 lpm 2300 adiw r30, 1 2301#endif 2302 st X+, r0 2303.L__do_copy_data_start: 2304 cpi r26, lo8(__data_end) 2305 cpc r27, r17 2306 brne .L__do_copy_data_loop 2307#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ 2308#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2309 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2310 out __RAMPZ__, __zero_reg__ 2311#endif /* ELPM && RAMPD */ 2312ENDF __do_copy_data 2313#endif /* L_copy_data */ 2314 2315/* __do_clear_bss is only necessary if there is anything in .bss section. */ 2316 2317#ifdef L_clear_bss 2318 .section .init4,"ax",@progbits 2319DEFUN __do_clear_bss 2320 ldi r17, hi8(__bss_end) 2321 ldi r26, lo8(__bss_start) 2322 ldi r27, hi8(__bss_start) 2323 rjmp .do_clear_bss_start 2324.do_clear_bss_loop: 2325 st X+, __zero_reg__ 2326.do_clear_bss_start: 2327 cpi r26, lo8(__bss_end) 2328 cpc r27, r17 2329 brne .do_clear_bss_loop 2330ENDF __do_clear_bss 2331#endif /* L_clear_bss */ 2332 2333/* __do_global_ctors and __do_global_dtors are only necessary 2334 if there are any constructors/destructors. */ 2335 2336#ifdef L_ctors 2337 .section .init6,"ax",@progbits 2338DEFUN __do_global_ctors 2339#if defined(__AVR_HAVE_ELPM__) 2340 ldi r17, hi8(__ctors_start) 2341 ldi r28, lo8(__ctors_end) 2342 ldi r29, hi8(__ctors_end) 2343 ldi r16, hh8(__ctors_end) 2344 rjmp .L__do_global_ctors_start 2345.L__do_global_ctors_loop: 2346 sbiw r28, 2 2347 sbc r16, __zero_reg__ 2348 mov_h r31, r29 2349 mov_l r30, r28 2350 out __RAMPZ__, r16 2351 XCALL __tablejump_elpm__ 2352.L__do_global_ctors_start: 2353 cpi r28, lo8(__ctors_start) 2354 cpc r29, r17 2355 ldi r24, hh8(__ctors_start) 2356 cpc r16, r24 2357 brne .L__do_global_ctors_loop 2358#else 2359 ldi r17, hi8(__ctors_start) 2360 ldi r28, lo8(__ctors_end) 2361 ldi r29, hi8(__ctors_end) 2362 rjmp .L__do_global_ctors_start 2363.L__do_global_ctors_loop: 2364 sbiw r28, 2 2365 mov_h r31, r29 2366 mov_l r30, r28 2367 XCALL __tablejump__ 2368.L__do_global_ctors_start: 2369 cpi r28, lo8(__ctors_start) 2370 cpc r29, r17 2371 brne .L__do_global_ctors_loop 2372#endif /* defined(__AVR_HAVE_ELPM__) */ 2373ENDF __do_global_ctors 2374#endif /* L_ctors */ 2375 2376#ifdef L_dtors 2377 .section .fini6,"ax",@progbits 2378DEFUN __do_global_dtors 2379#if defined(__AVR_HAVE_ELPM__) 2380 ldi r17, hi8(__dtors_end) 2381 ldi r28, lo8(__dtors_start) 2382 ldi r29, hi8(__dtors_start) 2383 ldi r16, hh8(__dtors_start) 2384 rjmp .L__do_global_dtors_start 2385.L__do_global_dtors_loop: 2386 sbiw r28, 2 2387 sbc r16, __zero_reg__ 2388 mov_h r31, r29 2389 mov_l r30, r28 2390 out __RAMPZ__, r16 2391 XCALL __tablejump_elpm__ 2392.L__do_global_dtors_start: 2393 cpi r28, lo8(__dtors_end) 2394 cpc r29, r17 2395 ldi r24, hh8(__dtors_end) 2396 cpc r16, r24 2397 brne .L__do_global_dtors_loop 2398#else 2399 ldi r17, hi8(__dtors_end) 2400 ldi r28, lo8(__dtors_start) 2401 ldi r29, hi8(__dtors_start) 2402 rjmp .L__do_global_dtors_start 2403.L__do_global_dtors_loop: 2404 mov_h r31, r29 2405 mov_l r30, r28 2406 XCALL __tablejump__ 2407 adiw r28, 2 2408.L__do_global_dtors_start: 2409 cpi r28, lo8(__dtors_end) 2410 cpc r29, r17 2411 brne .L__do_global_dtors_loop 2412#endif /* defined(__AVR_HAVE_ELPM__) */ 2413ENDF __do_global_dtors 2414#endif /* L_dtors */ 2415 2416.section .text.libgcc, "ax", @progbits 2417 2418#ifdef L_tablejump_elpm 2419DEFUN __tablejump_elpm__ 2420#if defined (__AVR_HAVE_ELPMX__) 2421 elpm __tmp_reg__, Z+ 2422 elpm r31, Z 2423 mov r30, __tmp_reg__ 2424#if defined (__AVR_HAVE_RAMPD__) 2425 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2426 out __RAMPZ__, __zero_reg__ 2427#endif /* RAMPD */ 2428#if defined (__AVR_HAVE_EIJMP_EICALL__) 2429 eijmp 2430#else 2431 ijmp 2432#endif 2433 2434#elif defined (__AVR_HAVE_ELPM__) 2435 elpm 2436 adiw r30, 1 2437 push r0 2438 elpm 2439 push r0 2440#if defined (__AVR_HAVE_EIJMP_EICALL__) 2441 in __tmp_reg__, __EIND__ 2442 push __tmp_reg__ 2443#endif 2444 ret 2445#endif 2446ENDF __tablejump_elpm__ 2447#endif /* defined (L_tablejump_elpm) */ 2448 2449;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2450;; Loading n bytes from Flash; n = 3,4 2451;; R22... = Flash[Z] 2452;; Clobbers: __tmp_reg__ 2453 2454#if (defined (L_load_3) \ 2455 || defined (L_load_4)) \ 2456 && !defined (__AVR_HAVE_LPMX__) 2457 2458;; Destination 2459#define D0 22 2460#define D1 D0+1 2461#define D2 D0+2 2462#define D3 D0+3 2463 2464.macro .load dest, n 2465 lpm 2466 mov \dest, r0 2467.if \dest != D0+\n-1 2468 adiw r30, 1 2469.else 2470 sbiw r30, \n-1 2471.endif 2472.endm 2473 2474#if defined (L_load_3) 2475DEFUN __load_3 2476 push D3 2477 XCALL __load_4 2478 pop D3 2479 ret 2480ENDF __load_3 2481#endif /* L_load_3 */ 2482 2483#if defined (L_load_4) 2484DEFUN __load_4 2485 .load D0, 4 2486 .load D1, 4 2487 .load D2, 4 2488 .load D3, 4 2489 ret 2490ENDF __load_4 2491#endif /* L_load_4 */ 2492 2493#endif /* L_load_3 || L_load_3 */ 2494 2495;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2496;; Loading n bytes from Flash or RAM; n = 1,2,3,4 2497;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7 2498;; Clobbers: __tmp_reg__, R21, R30, R31 2499 2500#if (defined (L_xload_1) \ 2501 || defined (L_xload_2) \ 2502 || defined (L_xload_3) \ 2503 || defined (L_xload_4)) 2504 2505;; Destination 2506#define D0 22 2507#define D1 D0+1 2508#define D2 D0+2 2509#define D3 D0+3 2510 2511;; Register containing bits 16+ of the address 2512 2513#define HHI8 21 2514 2515.macro .xload dest, n 2516#if defined (__AVR_HAVE_ELPMX__) 2517 elpm \dest, Z+ 2518#elif defined (__AVR_HAVE_ELPM__) 2519 elpm 2520 mov \dest, r0 2521.if \dest != D0+\n-1 2522 adiw r30, 1 2523 adc HHI8, __zero_reg__ 2524 out __RAMPZ__, HHI8 2525.endif 2526#elif defined (__AVR_HAVE_LPMX__) 2527 lpm \dest, Z+ 2528#else 2529 lpm 2530 mov \dest, r0 2531.if \dest != D0+\n-1 2532 adiw r30, 1 2533.endif 2534#endif 2535#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2536.if \dest == D0+\n-1 2537 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2538 out __RAMPZ__, __zero_reg__ 2539.endif 2540#endif 2541.endm ; .xload 2542 2543#if defined (L_xload_1) 2544DEFUN __xload_1 2545#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__) 2546 sbrc HHI8, 7 2547 ld D0, Z 2548 sbrs HHI8, 7 2549 lpm D0, Z 2550 ret 2551#else 2552 sbrc HHI8, 7 2553 rjmp 1f 2554#if defined (__AVR_HAVE_ELPM__) 2555 out __RAMPZ__, HHI8 2556#endif /* __AVR_HAVE_ELPM__ */ 2557 .xload D0, 1 2558 ret 25591: ld D0, Z 2560 ret 2561#endif /* LPMx && ! ELPM */ 2562ENDF __xload_1 2563#endif /* L_xload_1 */ 2564 2565#if defined (L_xload_2) 2566DEFUN __xload_2 2567 sbrc HHI8, 7 2568 rjmp 1f 2569#if defined (__AVR_HAVE_ELPM__) 2570 out __RAMPZ__, HHI8 2571#endif /* __AVR_HAVE_ELPM__ */ 2572 .xload D0, 2 2573 .xload D1, 2 2574 ret 25751: ld D0, Z+ 2576 ld D1, Z+ 2577 ret 2578ENDF __xload_2 2579#endif /* L_xload_2 */ 2580 2581#if defined (L_xload_3) 2582DEFUN __xload_3 2583 sbrc HHI8, 7 2584 rjmp 1f 2585#if defined (__AVR_HAVE_ELPM__) 2586 out __RAMPZ__, HHI8 2587#endif /* __AVR_HAVE_ELPM__ */ 2588 .xload D0, 3 2589 .xload D1, 3 2590 .xload D2, 3 2591 ret 25921: ld D0, Z+ 2593 ld D1, Z+ 2594 ld D2, Z+ 2595 ret 2596ENDF __xload_3 2597#endif /* L_xload_3 */ 2598 2599#if defined (L_xload_4) 2600DEFUN __xload_4 2601 sbrc HHI8, 7 2602 rjmp 1f 2603#if defined (__AVR_HAVE_ELPM__) 2604 out __RAMPZ__, HHI8 2605#endif /* __AVR_HAVE_ELPM__ */ 2606 .xload D0, 4 2607 .xload D1, 4 2608 .xload D2, 4 2609 .xload D3, 4 2610 ret 26111: ld D0, Z+ 2612 ld D1, Z+ 2613 ld D2, Z+ 2614 ld D3, Z+ 2615 ret 2616ENDF __xload_4 2617#endif /* L_xload_4 */ 2618 2619#endif /* L_xload_{1|2|3|4} */ 2620 2621;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2622;; memcopy from Address Space __pgmx to RAM 2623;; R23:Z = Source Address 2624;; X = Destination Address 2625;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z 2626 2627#if defined (L_movmemx) 2628 2629#define HHI8 23 2630#define LOOP 24 2631 2632DEFUN __movmemx_qi 2633 ;; #Bytes to copy fity in 8 Bits (1..255) 2634 ;; Zero-extend Loop Counter 2635 clr LOOP+1 2636 ;; FALLTHRU 2637ENDF __movmemx_qi 2638 2639DEFUN __movmemx_hi 2640 2641;; Read from where? 2642 sbrc HHI8, 7 2643 rjmp 1f 2644 2645;; Read from Flash 2646 2647#if defined (__AVR_HAVE_ELPM__) 2648 out __RAMPZ__, HHI8 2649#endif 2650 26510: ;; Load 1 Byte from Flash... 2652 2653#if defined (__AVR_HAVE_ELPMX__) 2654 elpm r0, Z+ 2655#elif defined (__AVR_HAVE_ELPM__) 2656 elpm 2657 adiw r30, 1 2658 adc HHI8, __zero_reg__ 2659 out __RAMPZ__, HHI8 2660#elif defined (__AVR_HAVE_LPMX__) 2661 lpm r0, Z+ 2662#else 2663 lpm 2664 adiw r30, 1 2665#endif 2666 2667 ;; ...and store that Byte to RAM Destination 2668 st X+, r0 2669 sbiw LOOP, 1 2670 brne 0b 2671#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2672 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2673 out __RAMPZ__, __zero_reg__ 2674#endif /* ELPM && RAMPD */ 2675 ret 2676 2677;; Read from RAM 2678 26791: ;; Read 1 Byte from RAM... 2680 ld r0, Z+ 2681 ;; and store that Byte to RAM Destination 2682 st X+, r0 2683 sbiw LOOP, 1 2684 brne 1b 2685 ret 2686ENDF __movmemx_hi 2687 2688#undef HHI8 2689#undef LOOP 2690 2691#endif /* L_movmemx */ 2692 2693 2694.section .text.libgcc.builtins, "ax", @progbits 2695 2696/********************************** 2697 * Find first set Bit (ffs) 2698 **********************************/ 2699 2700#if defined (L_ffssi2) 2701;; find first set bit 2702;; r25:r24 = ffs32 (r25:r22) 2703;; clobbers: r22, r26 2704DEFUN __ffssi2 2705 clr r26 2706 tst r22 2707 brne 1f 2708 subi r26, -8 2709 or r22, r23 2710 brne 1f 2711 subi r26, -8 2712 or r22, r24 2713 brne 1f 2714 subi r26, -8 2715 or r22, r25 2716 brne 1f 2717 ret 27181: mov r24, r22 2719 XJMP __loop_ffsqi2 2720ENDF __ffssi2 2721#endif /* defined (L_ffssi2) */ 2722 2723#if defined (L_ffshi2) 2724;; find first set bit 2725;; r25:r24 = ffs16 (r25:r24) 2726;; clobbers: r26 2727DEFUN __ffshi2 2728 clr r26 2729#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 2730 ;; Some cores have problem skipping 2-word instruction 2731 tst r24 2732 breq 2f 2733#else 2734 cpse r24, __zero_reg__ 2735#endif /* __AVR_HAVE_JMP_CALL__ */ 27361: XJMP __loop_ffsqi2 27372: ldi r26, 8 2738 or r24, r25 2739 brne 1b 2740 ret 2741ENDF __ffshi2 2742#endif /* defined (L_ffshi2) */ 2743 2744#if defined (L_loop_ffsqi2) 2745;; Helper for ffshi2, ffssi2 2746;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) 2747;; r24 must be != 0 2748;; clobbers: r26 2749DEFUN __loop_ffsqi2 2750 inc r26 2751 lsr r24 2752 brcc __loop_ffsqi2 2753 mov r24, r26 2754 clr r25 2755 ret 2756ENDF __loop_ffsqi2 2757#endif /* defined (L_loop_ffsqi2) */ 2758 2759 2760/********************************** 2761 * Count trailing Zeros (ctz) 2762 **********************************/ 2763 2764#if defined (L_ctzsi2) 2765;; count trailing zeros 2766;; r25:r24 = ctz32 (r25:r22) 2767;; clobbers: r26, r22 2768;; ctz(0) = 255 2769;; Note that ctz(0) in undefined for GCC 2770DEFUN __ctzsi2 2771 XCALL __ffssi2 2772 dec r24 2773 ret 2774ENDF __ctzsi2 2775#endif /* defined (L_ctzsi2) */ 2776 2777#if defined (L_ctzhi2) 2778;; count trailing zeros 2779;; r25:r24 = ctz16 (r25:r24) 2780;; clobbers: r26 2781;; ctz(0) = 255 2782;; Note that ctz(0) in undefined for GCC 2783DEFUN __ctzhi2 2784 XCALL __ffshi2 2785 dec r24 2786 ret 2787ENDF __ctzhi2 2788#endif /* defined (L_ctzhi2) */ 2789 2790 2791/********************************** 2792 * Count leading Zeros (clz) 2793 **********************************/ 2794 2795#if defined (L_clzdi2) 2796;; count leading zeros 2797;; r25:r24 = clz64 (r25:r18) 2798;; clobbers: r22, r23, r26 2799DEFUN __clzdi2 2800 XCALL __clzsi2 2801 sbrs r24, 5 2802 ret 2803 mov_l r22, r18 2804 mov_h r23, r19 2805 mov_l r24, r20 2806 mov_h r25, r21 2807 XCALL __clzsi2 2808 subi r24, -32 2809 ret 2810ENDF __clzdi2 2811#endif /* defined (L_clzdi2) */ 2812 2813#if defined (L_clzsi2) 2814;; count leading zeros 2815;; r25:r24 = clz32 (r25:r22) 2816;; clobbers: r26 2817DEFUN __clzsi2 2818 XCALL __clzhi2 2819 sbrs r24, 4 2820 ret 2821 mov_l r24, r22 2822 mov_h r25, r23 2823 XCALL __clzhi2 2824 subi r24, -16 2825 ret 2826ENDF __clzsi2 2827#endif /* defined (L_clzsi2) */ 2828 2829#if defined (L_clzhi2) 2830;; count leading zeros 2831;; r25:r24 = clz16 (r25:r24) 2832;; clobbers: r26 2833DEFUN __clzhi2 2834 clr r26 2835 tst r25 2836 brne 1f 2837 subi r26, -8 2838 or r25, r24 2839 brne 1f 2840 ldi r24, 16 2841 ret 28421: cpi r25, 16 2843 brsh 3f 2844 subi r26, -3 2845 swap r25 28462: inc r26 28473: lsl r25 2848 brcc 2b 2849 mov r24, r26 2850 clr r25 2851 ret 2852ENDF __clzhi2 2853#endif /* defined (L_clzhi2) */ 2854 2855 2856/********************************** 2857 * Parity 2858 **********************************/ 2859 2860#if defined (L_paritydi2) 2861;; r25:r24 = parity64 (r25:r18) 2862;; clobbers: __tmp_reg__ 2863DEFUN __paritydi2 2864 eor r24, r18 2865 eor r24, r19 2866 eor r24, r20 2867 eor r24, r21 2868 XJMP __paritysi2 2869ENDF __paritydi2 2870#endif /* defined (L_paritydi2) */ 2871 2872#if defined (L_paritysi2) 2873;; r25:r24 = parity32 (r25:r22) 2874;; clobbers: __tmp_reg__ 2875DEFUN __paritysi2 2876 eor r24, r22 2877 eor r24, r23 2878 XJMP __parityhi2 2879ENDF __paritysi2 2880#endif /* defined (L_paritysi2) */ 2881 2882#if defined (L_parityhi2) 2883;; r25:r24 = parity16 (r25:r24) 2884;; clobbers: __tmp_reg__ 2885DEFUN __parityhi2 2886 eor r24, r25 2887;; FALLTHRU 2888ENDF __parityhi2 2889 2890;; r25:r24 = parity8 (r24) 2891;; clobbers: __tmp_reg__ 2892DEFUN __parityqi2 2893 ;; parity is in r24[0..7] 2894 mov __tmp_reg__, r24 2895 swap __tmp_reg__ 2896 eor r24, __tmp_reg__ 2897 ;; parity is in r24[0..3] 2898 subi r24, -4 2899 andi r24, -5 2900 subi r24, -6 2901 ;; parity is in r24[0,3] 2902 sbrc r24, 3 2903 inc r24 2904 ;; parity is in r24[0] 2905 andi r24, 1 2906 clr r25 2907 ret 2908ENDF __parityqi2 2909#endif /* defined (L_parityhi2) */ 2910 2911 2912/********************************** 2913 * Population Count 2914 **********************************/ 2915 2916#if defined (L_popcounthi2) 2917;; population count 2918;; r25:r24 = popcount16 (r25:r24) 2919;; clobbers: __tmp_reg__ 2920DEFUN __popcounthi2 2921 XCALL __popcountqi2 2922 push r24 2923 mov r24, r25 2924 XCALL __popcountqi2 2925 clr r25 2926 ;; FALLTHRU 2927ENDF __popcounthi2 2928 2929DEFUN __popcounthi2_tail 2930 pop __tmp_reg__ 2931 add r24, __tmp_reg__ 2932 ret 2933ENDF __popcounthi2_tail 2934#endif /* defined (L_popcounthi2) */ 2935 2936#if defined (L_popcountsi2) 2937;; population count 2938;; r25:r24 = popcount32 (r25:r22) 2939;; clobbers: __tmp_reg__ 2940DEFUN __popcountsi2 2941 XCALL __popcounthi2 2942 push r24 2943 mov_l r24, r22 2944 mov_h r25, r23 2945 XCALL __popcounthi2 2946 XJMP __popcounthi2_tail 2947ENDF __popcountsi2 2948#endif /* defined (L_popcountsi2) */ 2949 2950#if defined (L_popcountdi2) 2951;; population count 2952;; r25:r24 = popcount64 (r25:r18) 2953;; clobbers: r22, r23, __tmp_reg__ 2954DEFUN __popcountdi2 2955 XCALL __popcountsi2 2956 push r24 2957 mov_l r22, r18 2958 mov_h r23, r19 2959 mov_l r24, r20 2960 mov_h r25, r21 2961 XCALL __popcountsi2 2962 XJMP __popcounthi2_tail 2963ENDF __popcountdi2 2964#endif /* defined (L_popcountdi2) */ 2965 2966#if defined (L_popcountqi2) 2967;; population count 2968;; r24 = popcount8 (r24) 2969;; clobbers: __tmp_reg__ 2970DEFUN __popcountqi2 2971 mov __tmp_reg__, r24 2972 andi r24, 1 2973 lsr __tmp_reg__ 2974 lsr __tmp_reg__ 2975 adc r24, __zero_reg__ 2976 lsr __tmp_reg__ 2977 adc r24, __zero_reg__ 2978 lsr __tmp_reg__ 2979 adc r24, __zero_reg__ 2980 lsr __tmp_reg__ 2981 adc r24, __zero_reg__ 2982 lsr __tmp_reg__ 2983 adc r24, __zero_reg__ 2984 lsr __tmp_reg__ 2985 adc r24, __tmp_reg__ 2986 ret 2987ENDF __popcountqi2 2988#endif /* defined (L_popcountqi2) */ 2989 2990 2991/********************************** 2992 * Swap bytes 2993 **********************************/ 2994 2995;; swap two registers with different register number 2996.macro bswap a, b 2997 eor \a, \b 2998 eor \b, \a 2999 eor \a, \b 3000.endm 3001 3002#if defined (L_bswapsi2) 3003;; swap bytes 3004;; r25:r22 = bswap32 (r25:r22) 3005DEFUN __bswapsi2 3006 bswap r22, r25 3007 bswap r23, r24 3008 ret 3009ENDF __bswapsi2 3010#endif /* defined (L_bswapsi2) */ 3011 3012#if defined (L_bswapdi2) 3013;; swap bytes 3014;; r25:r18 = bswap64 (r25:r18) 3015DEFUN __bswapdi2 3016 bswap r18, r25 3017 bswap r19, r24 3018 bswap r20, r23 3019 bswap r21, r22 3020 ret 3021ENDF __bswapdi2 3022#endif /* defined (L_bswapdi2) */ 3023 3024 3025/********************************** 3026 * 64-bit shifts 3027 **********************************/ 3028 3029#if defined (L_ashrdi3) 3030;; Arithmetic shift right 3031;; r25:r18 = ashr64 (r25:r18, r17:r16) 3032DEFUN __ashrdi3 3033 bst r25, 7 3034 bld __zero_reg__, 0 3035 ;; FALLTHRU 3036ENDF __ashrdi3 3037 3038;; Logic shift right 3039;; r25:r18 = lshr64 (r25:r18, r17:r16) 3040DEFUN __lshrdi3 3041 lsr __zero_reg__ 3042 sbc __tmp_reg__, __tmp_reg__ 3043 push r16 30440: cpi r16, 8 3045 brlo 2f 3046 subi r16, 8 3047 mov r18, r19 3048 mov r19, r20 3049 mov r20, r21 3050 mov r21, r22 3051 mov r22, r23 3052 mov r23, r24 3053 mov r24, r25 3054 mov r25, __tmp_reg__ 3055 rjmp 0b 30561: asr __tmp_reg__ 3057 ror r25 3058 ror r24 3059 ror r23 3060 ror r22 3061 ror r21 3062 ror r20 3063 ror r19 3064 ror r18 30652: dec r16 3066 brpl 1b 3067 pop r16 3068 ret 3069ENDF __lshrdi3 3070#endif /* defined (L_ashrdi3) */ 3071 3072#if defined (L_ashldi3) 3073;; Shift left 3074;; r25:r18 = ashl64 (r25:r18, r17:r16) 3075DEFUN __ashldi3 3076 push r16 30770: cpi r16, 8 3078 brlo 2f 3079 mov r25, r24 3080 mov r24, r23 3081 mov r23, r22 3082 mov r22, r21 3083 mov r21, r20 3084 mov r20, r19 3085 mov r19, r18 3086 clr r18 3087 subi r16, 8 3088 rjmp 0b 30891: lsl r18 3090 rol r19 3091 rol r20 3092 rol r21 3093 rol r22 3094 rol r23 3095 rol r24 3096 rol r25 30972: dec r16 3098 brpl 1b 3099 pop r16 3100 ret 3101ENDF __ashldi3 3102#endif /* defined (L_ashldi3) */ 3103 3104#if defined (L_rotldi3) 3105;; Shift left 3106;; r25:r18 = rotl64 (r25:r18, r17:r16) 3107DEFUN __rotldi3 3108 push r16 31090: cpi r16, 8 3110 brlo 2f 3111 subi r16, 8 3112 mov __tmp_reg__, r25 3113 mov r25, r24 3114 mov r24, r23 3115 mov r23, r22 3116 mov r22, r21 3117 mov r21, r20 3118 mov r20, r19 3119 mov r19, r18 3120 mov r18, __tmp_reg__ 3121 rjmp 0b 31221: lsl r18 3123 rol r19 3124 rol r20 3125 rol r21 3126 rol r22 3127 rol r23 3128 rol r24 3129 rol r25 3130 adc r18, __zero_reg__ 31312: dec r16 3132 brpl 1b 3133 pop r16 3134 ret 3135ENDF __rotldi3 3136#endif /* defined (L_rotldi3) */ 3137 3138 3139.section .text.libgcc.fmul, "ax", @progbits 3140 3141/***********************************************************/ 3142;;; Softmul versions of FMUL, FMULS and FMULSU to implement 3143;;; __builtin_avr_fmul* if !AVR_HAVE_MUL 3144/***********************************************************/ 3145 3146#define A1 24 3147#define B1 25 3148#define C0 22 3149#define C1 23 3150#define A0 __tmp_reg__ 3151 3152#ifdef L_fmuls 3153;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction 3154;;; Clobbers: r24, r25, __tmp_reg__ 3155DEFUN __fmuls 3156 ;; A0.7 = negate result? 3157 mov A0, A1 3158 eor A0, B1 3159 ;; B1 = |B1| 3160 sbrc B1, 7 3161 neg B1 3162 XJMP __fmulsu_exit 3163ENDF __fmuls 3164#endif /* L_fmuls */ 3165 3166#ifdef L_fmulsu 3167;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction 3168;;; Clobbers: r24, r25, __tmp_reg__ 3169DEFUN __fmulsu 3170 ;; A0.7 = negate result? 3171 mov A0, A1 3172;; FALLTHRU 3173ENDF __fmulsu 3174 3175;; Helper for __fmuls and __fmulsu 3176DEFUN __fmulsu_exit 3177 ;; A1 = |A1| 3178 sbrc A1, 7 3179 neg A1 3180#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 3181 ;; Some cores have problem skipping 2-word instruction 3182 tst A0 3183 brmi 1f 3184#else 3185 sbrs A0, 7 3186#endif /* __AVR_HAVE_JMP_CALL__ */ 3187 XJMP __fmul 31881: XCALL __fmul 3189 ;; C = -C iff A0.7 = 1 3190 NEG2 C0 3191 ret 3192ENDF __fmulsu_exit 3193#endif /* L_fmulsu */ 3194 3195 3196#ifdef L_fmul 3197;;; r22:r23 = fmul (r24, r25) like in FMUL instruction 3198;;; Clobbers: r24, r25, __tmp_reg__ 3199DEFUN __fmul 3200 ; clear result 3201 clr C0 3202 clr C1 3203 clr A0 32041: tst B1 3205 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. 32062: brpl 3f 3207 ;; C += A 3208 add C0, A0 3209 adc C1, A1 32103: ;; A >>= 1 3211 lsr A1 3212 ror A0 3213 ;; B <<= 1 3214 lsl B1 3215 brne 2b 3216 ret 3217ENDF __fmul 3218#endif /* L_fmul */ 3219 3220#undef A0 3221#undef A1 3222#undef B1 3223#undef C0 3224#undef C1 3225 3226#include "lib1funcs-fixed.S" 3227