1/* -*- Mode: Asm -*- */ 2/* Copyright (C) 1998-2019 Free Software Foundation, Inc. 3 Contributed by Denis Chertykov <chertykov@gmail.com> 4 5This file is free software; you can redistribute it and/or modify it 6under the terms of the GNU General Public License as published by the 7Free Software Foundation; either version 3, or (at your option) any 8later version. 9 10This file is distributed in the hope that it will be useful, but 11WITHOUT ANY WARRANTY; without even the implied warranty of 12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13General Public License for more details. 14 15Under Section 7 of GPL version 3, you are granted additional 16permissions described in the GCC Runtime Library Exception, version 173.1, as published by the Free Software Foundation. 18 19You should have received a copy of the GNU General Public License and 20a copy of the GCC Runtime Library Exception along with this program; 21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22<http://www.gnu.org/licenses/>. */ 23 24#if defined (__AVR_TINY__) 25#define __zero_reg__ r17 26#define __tmp_reg__ r16 27#else 28#define __zero_reg__ r1 29#define __tmp_reg__ r0 30#endif 31#define __SREG__ 0x3f 32#if defined (__AVR_HAVE_SPH__) 33#define __SP_H__ 0x3e 34#endif 35#define __SP_L__ 0x3d 36#define __RAMPZ__ 0x3B 37#define __EIND__ 0x3C 38 39/* Most of the functions here are called directly from avr.md 40 patterns, instead of using the standard libcall mechanisms. 41 This can make better code because GCC knows exactly which 42 of the call-used registers (not all of them) are clobbered. */ 43 44/* FIXME: At present, there is no SORT directive in the linker 45 script so that we must not assume that different modules 46 in the same input section like .libgcc.text.mul will be 47 located close together. Therefore, we cannot use 48 RCALL/RJMP to call a function like __udivmodhi4 from 49 __divmodhi4 and have to use lengthy XCALL/XJMP even 50 though they are in the same input section and all same 51 input sections together are small enough to reach every 52 location with a RCALL/RJMP instruction. */ 53 54#if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__) 55#error device not supported 56#endif 57 58 .macro mov_l r_dest, r_src 59#if defined (__AVR_HAVE_MOVW__) 60 movw \r_dest, \r_src 61#else 62 mov \r_dest, \r_src 63#endif 64 .endm 65 66 .macro mov_h r_dest, r_src 67#if defined (__AVR_HAVE_MOVW__) 68 ; empty 69#else 70 mov \r_dest, \r_src 71#endif 72 .endm 73 74.macro wmov r_dest, r_src 75#if defined (__AVR_HAVE_MOVW__) 76 movw \r_dest, \r_src 77#else 78 mov \r_dest, \r_src 79 mov \r_dest+1, \r_src+1 80#endif 81.endm 82 83#if defined (__AVR_HAVE_JMP_CALL__) 84#define XCALL call 85#define XJMP jmp 86#else 87#define XCALL rcall 88#define XJMP rjmp 89#endif 90 91#if defined (__AVR_HAVE_EIJMP_EICALL__) 92#define XICALL eicall 93#define XIJMP eijmp 94#else 95#define XICALL icall 96#define XIJMP ijmp 97#endif 98 99;; Prologue stuff 100 101.macro do_prologue_saves n_pushed n_frame=0 102 ldi r26, lo8(\n_frame) 103 ldi r27, hi8(\n_frame) 104 ldi r30, lo8(gs(.L_prologue_saves.\@)) 105 ldi r31, hi8(gs(.L_prologue_saves.\@)) 106 XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2) 107.L_prologue_saves.\@: 108.endm 109 110;; Epilogue stuff 111 112.macro do_epilogue_restores n_pushed n_frame=0 113 in r28, __SP_L__ 114#ifdef __AVR_HAVE_SPH__ 115 in r29, __SP_H__ 116.if \n_frame > 63 117 subi r28, lo8(-\n_frame) 118 sbci r29, hi8(-\n_frame) 119.elseif \n_frame > 0 120 adiw r28, \n_frame 121.endif 122#else 123 clr r29 124.if \n_frame > 0 125 subi r28, lo8(-\n_frame) 126.endif 127#endif /* HAVE SPH */ 128 ldi r30, \n_pushed 129 XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2) 130.endm 131 132;; Support function entry and exit for convenience 133 134.macro wsubi r_arg1, i_arg2 135#if defined (__AVR_TINY__) 136 subi \r_arg1, lo8(\i_arg2) 137 sbci \r_arg1+1, hi8(\i_arg2) 138#else 139 sbiw \r_arg1, \i_arg2 140#endif 141.endm 142 143.macro waddi r_arg1, i_arg2 144#if defined (__AVR_TINY__) 145 subi \r_arg1, lo8(-\i_arg2) 146 sbci \r_arg1+1, hi8(-\i_arg2) 147#else 148 adiw \r_arg1, \i_arg2 149#endif 150.endm 151 152.macro DEFUN name 153.global \name 154.func \name 155\name: 156.endm 157 158.macro ENDF name 159.size \name, .-\name 160.endfunc 161.endm 162 163.macro FALIAS name 164.global \name 165.func \name 166\name: 167.size \name, .-\name 168.endfunc 169.endm 170 171;; Skip next instruction, typically a jump target 172#if defined(__AVR_TINY__) 173#define skip cpse 0,0 174#else 175#define skip cpse 16,16 176#endif 177 178;; Negate a 2-byte value held in consecutive registers 179.macro NEG2 reg 180 com \reg+1 181 neg \reg 182 sbci \reg+1, -1 183.endm 184 185;; Negate a 4-byte value held in consecutive registers 186;; Sets the V flag for signed overflow tests if REG >= 16 187.macro NEG4 reg 188 com \reg+3 189 com \reg+2 190 com \reg+1 191.if \reg >= 16 192 neg \reg 193 sbci \reg+1, -1 194 sbci \reg+2, -1 195 sbci \reg+3, -1 196.else 197 com \reg 198 adc \reg, __zero_reg__ 199 adc \reg+1, __zero_reg__ 200 adc \reg+2, __zero_reg__ 201 adc \reg+3, __zero_reg__ 202.endif 203.endm 204 205#define exp_lo(N) hlo8 ((N) << 23) 206#define exp_hi(N) hhi8 ((N) << 23) 207 208 209.section .text.libgcc.mul, "ax", @progbits 210 211;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 212/* Note: mulqi3, mulhi3 are open-coded on the enhanced core. */ 213#if !defined (__AVR_HAVE_MUL__) 214/******************************************************* 215 Multiplication 8 x 8 without MUL 216*******************************************************/ 217#if defined (L_mulqi3) 218 219#define r_arg2 r22 /* multiplicand */ 220#define r_arg1 r24 /* multiplier */ 221#define r_res __tmp_reg__ /* result */ 222 223DEFUN __mulqi3 224 clr r_res ; clear result 225__mulqi3_loop: 226 sbrc r_arg1,0 227 add r_res,r_arg2 228 add r_arg2,r_arg2 ; shift multiplicand 229 breq __mulqi3_exit ; while multiplicand != 0 230 lsr r_arg1 ; 231 brne __mulqi3_loop ; exit if multiplier = 0 232__mulqi3_exit: 233 mov r_arg1,r_res ; result to return register 234 ret 235ENDF __mulqi3 236 237#undef r_arg2 238#undef r_arg1 239#undef r_res 240 241#endif /* defined (L_mulqi3) */ 242 243 244/******************************************************* 245 Widening Multiplication 16 = 8 x 8 without MUL 246 Multiplication 16 x 16 without MUL 247*******************************************************/ 248 249#define A0 22 250#define A1 23 251#define B0 24 252#define BB0 20 253#define B1 25 254;; Output overlaps input, thus expand result in CC0/1 255#define C0 24 256#define C1 25 257#define CC0 __tmp_reg__ 258#define CC1 21 259 260#if defined (L_umulqihi3) 261;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24 262;;; (C1:C0) = (unsigned int) A0 * (unsigned int) B0 263;;; Clobbers: __tmp_reg__, R21..R23 264DEFUN __umulqihi3 265 clr A1 266 clr B1 267 XJMP __mulhi3 268ENDF __umulqihi3 269#endif /* L_umulqihi3 */ 270 271#if defined (L_mulqihi3) 272;;; R25:R24 = (signed int) R22 * (signed int) R24 273;;; (C1:C0) = (signed int) A0 * (signed int) B0 274;;; Clobbers: __tmp_reg__, R20..R23 275DEFUN __mulqihi3 276 ;; Sign-extend B0 277 clr B1 278 sbrc B0, 7 279 com B1 280 ;; The multiplication runs twice as fast if A1 is zero, thus: 281 ;; Zero-extend A0 282 clr A1 283#ifdef __AVR_HAVE_JMP_CALL__ 284 ;; Store B0 * sign of A 285 clr BB0 286 sbrc A0, 7 287 mov BB0, B0 288 call __mulhi3 289#else /* have no CALL */ 290 ;; Skip sign-extension of A if A >= 0 291 ;; Same size as with the first alternative but avoids errata skip 292 ;; and is faster if A >= 0 293 sbrs A0, 7 294 rjmp __mulhi3 295 ;; If A < 0 store B 296 mov BB0, B0 297 rcall __mulhi3 298#endif /* HAVE_JMP_CALL */ 299 ;; 1-extend A after the multiplication 300 sub C1, BB0 301 ret 302ENDF __mulqihi3 303#endif /* L_mulqihi3 */ 304 305#if defined (L_mulhi3) 306;;; R25:R24 = R23:R22 * R25:R24 307;;; (C1:C0) = (A1:A0) * (B1:B0) 308;;; Clobbers: __tmp_reg__, R21..R23 309DEFUN __mulhi3 310 311 ;; Clear result 312 clr CC0 313 clr CC1 314 rjmp 3f 3151: 316 ;; Bit n of A is 1 --> C += B << n 317 add CC0, B0 318 adc CC1, B1 3192: 320 lsl B0 321 rol B1 3223: 323 ;; If B == 0 we are ready 324 wsubi B0, 0 325 breq 9f 326 327 ;; Carry = n-th bit of A 328 lsr A1 329 ror A0 330 ;; If bit n of A is set, then go add B * 2^n to C 331 brcs 1b 332 333 ;; Carry = 0 --> The ROR above acts like CP A0, 0 334 ;; Thus, it is sufficient to CPC the high part to test A against 0 335 cpc A1, __zero_reg__ 336 ;; Only proceed if A != 0 337 brne 2b 3389: 339 ;; Move Result into place 340 mov C0, CC0 341 mov C1, CC1 342 ret 343ENDF __mulhi3 344#endif /* L_mulhi3 */ 345 346#undef A0 347#undef A1 348#undef B0 349#undef BB0 350#undef B1 351#undef C0 352#undef C1 353#undef CC0 354#undef CC1 355 356 357#define A0 22 358#define A1 A0+1 359#define A2 A0+2 360#define A3 A0+3 361 362#define B0 18 363#define B1 B0+1 364#define B2 B0+2 365#define B3 B0+3 366 367#define CC0 26 368#define CC1 CC0+1 369#define CC2 30 370#define CC3 CC2+1 371 372#define C0 22 373#define C1 C0+1 374#define C2 C0+2 375#define C3 C0+3 376 377/******************************************************* 378 Widening Multiplication 32 = 16 x 16 without MUL 379*******************************************************/ 380 381#if defined (L_umulhisi3) 382DEFUN __umulhisi3 383 wmov B0, 24 384 ;; Zero-extend B 385 clr B2 386 clr B3 387 ;; Zero-extend A 388 wmov A2, B2 389 XJMP __mulsi3 390ENDF __umulhisi3 391#endif /* L_umulhisi3 */ 392 393#if defined (L_mulhisi3) 394DEFUN __mulhisi3 395 wmov B0, 24 396 ;; Sign-extend B 397 lsl r25 398 sbc B2, B2 399 mov B3, B2 400#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 401 ;; Sign-extend A 402 clr A2 403 sbrc A1, 7 404 com A2 405 mov A3, A2 406 XJMP __mulsi3 407#else /* no __AVR_ERRATA_SKIP_JMP_CALL__ */ 408 ;; Zero-extend A and __mulsi3 will run at least twice as fast 409 ;; compared to a sign-extended A. 410 clr A2 411 clr A3 412 sbrs A1, 7 413 XJMP __mulsi3 414 ;; If A < 0 then perform the B * 0xffff.... before the 415 ;; very multiplication by initializing the high part of the 416 ;; result CC with -B. 417 wmov CC2, A2 418 sub CC2, B0 419 sbc CC3, B1 420 XJMP __mulsi3_helper 421#endif /* __AVR_ERRATA_SKIP_JMP_CALL__ */ 422ENDF __mulhisi3 423#endif /* L_mulhisi3 */ 424 425 426/******************************************************* 427 Multiplication 32 x 32 without MUL 428*******************************************************/ 429 430#if defined (L_mulsi3) 431DEFUN __mulsi3 432#if defined (__AVR_TINY__) 433 in r26, __SP_L__ ; safe to use X, as it is CC0/CC1 434 in r27, __SP_H__ 435 subi r26, lo8(-3) ; Add 3 to point past return address 436 sbci r27, hi8(-3) 437 push B0 ; save callee saved regs 438 push B1 439 ld B0, X+ ; load from caller stack 440 ld B1, X+ 441 ld B2, X+ 442 ld B3, X 443#endif 444 ;; Clear result 445 clr CC2 446 clr CC3 447 ;; FALLTHRU 448ENDF __mulsi3 449 450DEFUN __mulsi3_helper 451 clr CC0 452 clr CC1 453 rjmp 3f 454 4551: ;; If bit n of A is set, then add B * 2^n to the result in CC 456 ;; CC += B 457 add CC0,B0 $ adc CC1,B1 $ adc CC2,B2 $ adc CC3,B3 458 4592: ;; B <<= 1 460 lsl B0 $ rol B1 $ rol B2 $ rol B3 461 4623: ;; A >>= 1: Carry = n-th bit of A 463 lsr A3 $ ror A2 $ ror A1 $ ror A0 464 465 brcs 1b 466 ;; Only continue if A != 0 467 sbci A1, 0 468 brne 2b 469 wsubi A2, 0 470 brne 2b 471 472 ;; All bits of A are consumed: Copy result to return register C 473 wmov C0, CC0 474 wmov C2, CC2 475#if defined (__AVR_TINY__) 476 pop B1 ; restore callee saved regs 477 pop B0 478#endif /* defined (__AVR_TINY__) */ 479 480 ret 481ENDF __mulsi3_helper 482#endif /* L_mulsi3 */ 483 484#undef A0 485#undef A1 486#undef A2 487#undef A3 488#undef B0 489#undef B1 490#undef B2 491#undef B3 492#undef C0 493#undef C1 494#undef C2 495#undef C3 496#undef CC0 497#undef CC1 498#undef CC2 499#undef CC3 500 501#endif /* !defined (__AVR_HAVE_MUL__) */ 502;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 503 504;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 505#if defined (__AVR_HAVE_MUL__) 506#define A0 26 507#define B0 18 508#define C0 22 509 510#define A1 A0+1 511 512#define B1 B0+1 513#define B2 B0+2 514#define B3 B0+3 515 516#define C1 C0+1 517#define C2 C0+2 518#define C3 C0+3 519 520/******************************************************* 521 Widening Multiplication 32 = 16 x 16 with MUL 522*******************************************************/ 523 524#if defined (L_mulhisi3) 525;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18 526;;; C3:C0 = (signed long) A1:A0 * (signed long) B1:B0 527;;; Clobbers: __tmp_reg__ 528DEFUN __mulhisi3 529 XCALL __umulhisi3 530 ;; Sign-extend B 531 tst B1 532 brpl 1f 533 sub C2, A0 534 sbc C3, A1 5351: ;; Sign-extend A 536 XJMP __usmulhisi3_tail 537ENDF __mulhisi3 538#endif /* L_mulhisi3 */ 539 540#if defined (L_usmulhisi3) 541;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18 542;;; C3:C0 = (signed long) A1:A0 * (unsigned long) B1:B0 543;;; Clobbers: __tmp_reg__ 544DEFUN __usmulhisi3 545 XCALL __umulhisi3 546 ;; FALLTHRU 547ENDF __usmulhisi3 548 549DEFUN __usmulhisi3_tail 550 ;; Sign-extend A 551 sbrs A1, 7 552 ret 553 sub C2, B0 554 sbc C3, B1 555 ret 556ENDF __usmulhisi3_tail 557#endif /* L_usmulhisi3 */ 558 559#if defined (L_umulhisi3) 560;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18 561;;; C3:C0 = (unsigned long) A1:A0 * (unsigned long) B1:B0 562;;; Clobbers: __tmp_reg__ 563DEFUN __umulhisi3 564 mul A0, B0 565 movw C0, r0 566 mul A1, B1 567 movw C2, r0 568 mul A0, B1 569#ifdef __AVR_HAVE_JMP_CALL__ 570 ;; This function is used by many other routines, often multiple times. 571 ;; Therefore, if the flash size is not too limited, avoid the RCALL 572 ;; and inverst 6 Bytes to speed things up. 573 add C1, r0 574 adc C2, r1 575 clr __zero_reg__ 576 adc C3, __zero_reg__ 577#else 578 rcall 1f 579#endif 580 mul A1, B0 5811: add C1, r0 582 adc C2, r1 583 clr __zero_reg__ 584 adc C3, __zero_reg__ 585 ret 586ENDF __umulhisi3 587#endif /* L_umulhisi3 */ 588 589/******************************************************* 590 Widening Multiplication 32 = 16 x 32 with MUL 591*******************************************************/ 592 593#if defined (L_mulshisi3) 594;;; R25:R22 = (signed long) R27:R26 * R21:R18 595;;; (C3:C0) = (signed long) A1:A0 * B3:B0 596;;; Clobbers: __tmp_reg__ 597DEFUN __mulshisi3 598#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 599 ;; Some cores have problem skipping 2-word instruction 600 tst A1 601 brmi __mulohisi3 602#else 603 sbrs A1, 7 604#endif /* __AVR_HAVE_JMP_CALL__ */ 605 XJMP __muluhisi3 606 ;; FALLTHRU 607ENDF __mulshisi3 608 609;;; R25:R22 = (one-extended long) R27:R26 * R21:R18 610;;; (C3:C0) = (one-extended long) A1:A0 * B3:B0 611;;; Clobbers: __tmp_reg__ 612DEFUN __mulohisi3 613 XCALL __muluhisi3 614 ;; One-extend R27:R26 (A1:A0) 615 sub C2, B0 616 sbc C3, B1 617 ret 618ENDF __mulohisi3 619#endif /* L_mulshisi3 */ 620 621#if defined (L_muluhisi3) 622;;; R25:R22 = (unsigned long) R27:R26 * R21:R18 623;;; (C3:C0) = (unsigned long) A1:A0 * B3:B0 624;;; Clobbers: __tmp_reg__ 625DEFUN __muluhisi3 626 XCALL __umulhisi3 627 mul A0, B3 628 add C3, r0 629 mul A1, B2 630 add C3, r0 631 mul A0, B2 632 add C2, r0 633 adc C3, r1 634 clr __zero_reg__ 635 ret 636ENDF __muluhisi3 637#endif /* L_muluhisi3 */ 638 639/******************************************************* 640 Multiplication 32 x 32 with MUL 641*******************************************************/ 642 643#if defined (L_mulsi3) 644;;; R25:R22 = R25:R22 * R21:R18 645;;; (C3:C0) = C3:C0 * B3:B0 646;;; Clobbers: R26, R27, __tmp_reg__ 647DEFUN __mulsi3 648 movw A0, C0 649 push C2 650 push C3 651 XCALL __muluhisi3 652 pop A1 653 pop A0 654 ;; A1:A0 now contains the high word of A 655 mul A0, B0 656 add C2, r0 657 adc C3, r1 658 mul A0, B1 659 add C3, r0 660 mul A1, B0 661 add C3, r0 662 clr __zero_reg__ 663 ret 664ENDF __mulsi3 665#endif /* L_mulsi3 */ 666 667#undef A0 668#undef A1 669 670#undef B0 671#undef B1 672#undef B2 673#undef B3 674 675#undef C0 676#undef C1 677#undef C2 678#undef C3 679 680#endif /* __AVR_HAVE_MUL__ */ 681 682/******************************************************* 683 Multiplication 24 x 24 with MUL 684*******************************************************/ 685 686#if defined (L_mulpsi3) 687 688;; A[0..2]: In: Multiplicand; Out: Product 689#define A0 22 690#define A1 A0+1 691#define A2 A0+2 692 693;; B[0..2]: In: Multiplier 694#define B0 18 695#define B1 B0+1 696#define B2 B0+2 697 698#if defined (__AVR_HAVE_MUL__) 699 700;; C[0..2]: Expand Result 701#define C0 22 702#define C1 C0+1 703#define C2 C0+2 704 705;; R24:R22 *= R20:R18 706;; Clobbers: r21, r25, r26, r27, __tmp_reg__ 707 708#define AA0 26 709#define AA2 21 710 711DEFUN __mulpsi3 712 wmov AA0, A0 713 mov AA2, A2 714 XCALL __umulhisi3 715 mul AA2, B0 $ add C2, r0 716 mul AA0, B2 $ add C2, r0 717 clr __zero_reg__ 718 ret 719ENDF __mulpsi3 720 721#undef AA2 722#undef AA0 723 724#undef C2 725#undef C1 726#undef C0 727 728#else /* !HAVE_MUL */ 729;; C[0..2]: Expand Result 730#if defined (__AVR_TINY__) 731#define C0 16 732#else 733#define C0 0 734#endif /* defined (__AVR_TINY__) */ 735#define C1 C0+1 736#define C2 21 737 738;; R24:R22 *= R20:R18 739;; Clobbers: __tmp_reg__, R18, R19, R20, R21 740 741DEFUN __mulpsi3 742#if defined (__AVR_TINY__) 743 in r26,__SP_L__ 744 in r27,__SP_H__ 745 subi r26, lo8(-3) ; Add 3 to point past return address 746 sbci r27, hi8(-3) 747 push B0 ; save callee saved regs 748 push B1 749 ld B0,X+ ; load from caller stack 750 ld B1,X+ 751 ld B2,X+ 752#endif /* defined (__AVR_TINY__) */ 753 754 ;; C[] = 0 755 clr __tmp_reg__ 756 clr C2 757 7580: ;; Shift N-th Bit of B[] into Carry. N = 24 - Loop 759 LSR B2 $ ror B1 $ ror B0 760 761 ;; If the N-th Bit of B[] was set... 762 brcc 1f 763 764 ;; ...then add A[] * 2^N to the Result C[] 765 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 766 7671: ;; Multiply A[] by 2 768 LSL A0 $ rol A1 $ rol A2 769 770 ;; Loop until B[] is 0 771 subi B0,0 $ sbci B1,0 $ sbci B2,0 772 brne 0b 773 774 ;; Copy C[] to the return Register A[] 775 wmov A0, C0 776 mov A2, C2 777 778 clr __zero_reg__ 779#if defined (__AVR_TINY__) 780 pop B1 781 pop B0 782#endif /* (__AVR_TINY__) */ 783 ret 784ENDF __mulpsi3 785 786#undef C2 787#undef C1 788#undef C0 789 790#endif /* HAVE_MUL */ 791 792#undef B2 793#undef B1 794#undef B0 795 796#undef A2 797#undef A1 798#undef A0 799 800#endif /* L_mulpsi3 */ 801 802#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__) 803 804;; A[0..2]: In: Multiplicand 805#define A0 22 806#define A1 A0+1 807#define A2 A0+2 808 809;; BB: In: Multiplier 810#define BB 25 811 812;; C[0..2]: Result 813#define C0 18 814#define C1 C0+1 815#define C2 C0+2 816 817;; C[] = A[] * sign_extend (BB) 818DEFUN __mulsqipsi3 819 mul A0, BB 820 movw C0, r0 821 mul A2, BB 822 mov C2, r0 823 mul A1, BB 824 add C1, r0 825 adc C2, r1 826 clr __zero_reg__ 827 sbrs BB, 7 828 ret 829 ;; One-extend BB 830 sub C1, A0 831 sbc C2, A1 832 ret 833ENDF __mulsqipsi3 834 835#undef C2 836#undef C1 837#undef C0 838 839#undef BB 840 841#undef A2 842#undef A1 843#undef A0 844 845#endif /* L_mulsqipsi3 && HAVE_MUL */ 846 847/******************************************************* 848 Multiplication 64 x 64 849*******************************************************/ 850 851;; A[] = A[] * B[] 852 853;; A[0..7]: In: Multiplicand 854;; Out: Product 855#define A0 18 856#define A1 A0+1 857#define A2 A0+2 858#define A3 A0+3 859#define A4 A0+4 860#define A5 A0+5 861#define A6 A0+6 862#define A7 A0+7 863 864;; B[0..7]: In: Multiplier 865#define B0 10 866#define B1 B0+1 867#define B2 B0+2 868#define B3 B0+3 869#define B4 B0+4 870#define B5 B0+5 871#define B6 B0+6 872#define B7 B0+7 873 874#ifndef __AVR_TINY__ 875#if defined (__AVR_HAVE_MUL__) 876;; Define C[] for convenience 877;; Notice that parts of C[] overlap A[] respective B[] 878#define C0 16 879#define C1 C0+1 880#define C2 20 881#define C3 C2+1 882#define C4 28 883#define C5 C4+1 884#define C6 C4+2 885#define C7 C4+3 886 887#if defined (L_muldi3) 888 889;; A[] *= B[] 890;; R25:R18 *= R17:R10 891;; Ordinary ABI-Function 892 893DEFUN __muldi3 894 push r29 895 push r28 896 push r17 897 push r16 898 899 ;; Counting in Words, we have to perform a 4 * 4 Multiplication 900 901 ;; 3 * 0 + 0 * 3 902 mul A7,B0 $ $ mov C7,r0 903 mul A0,B7 $ $ add C7,r0 904 mul A6,B1 $ $ add C7,r0 905 mul A6,B0 $ mov C6,r0 $ add C7,r1 906 mul B6,A1 $ $ add C7,r0 907 mul B6,A0 $ add C6,r0 $ adc C7,r1 908 909 ;; 1 * 2 910 mul A2,B4 $ add C6,r0 $ adc C7,r1 911 mul A3,B4 $ $ add C7,r0 912 mul A2,B5 $ $ add C7,r0 913 914 push A5 915 push A4 916 push B1 917 push B0 918 push A3 919 push A2 920 921 ;; 0 * 0 922 wmov 26, B0 923 XCALL __umulhisi3 924 wmov C0, 22 925 wmov C2, 24 926 927 ;; 0 * 2 928 wmov 26, B4 929 XCALL __umulhisi3 $ wmov C4,22 $ add C6,24 $ adc C7,25 930 931 wmov 26, B2 932 ;; 0 * 1 933 XCALL __muldi3_6 934 935 pop A0 936 pop A1 937 ;; 1 * 1 938 wmov 26, B2 939 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 940 941 pop r26 942 pop r27 943 ;; 1 * 0 944 XCALL __muldi3_6 945 946 pop A0 947 pop A1 948 ;; 2 * 0 949 XCALL __umulhisi3 $ add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25 950 951 ;; 2 * 1 952 wmov 26, B2 953 XCALL __umulhisi3 $ $ $ add C6,22 $ adc C7,23 954 955 ;; A[] = C[] 956 wmov A0, C0 957 ;; A2 = C2 already 958 wmov A4, C4 959 wmov A6, C6 960 961 pop r16 962 pop r17 963 pop r28 964 pop r29 965 ret 966ENDF __muldi3 967#endif /* L_muldi3 */ 968 969#if defined (L_muldi3_6) 970;; A helper for some 64-bit multiplications with MUL available 971DEFUN __muldi3_6 972__muldi3_6: 973 XCALL __umulhisi3 974 add C2, 22 975 adc C3, 23 976 adc C4, 24 977 adc C5, 25 978 brcc 0f 979 adiw C6, 1 9800: ret 981ENDF __muldi3_6 982#endif /* L_muldi3_6 */ 983 984#undef C7 985#undef C6 986#undef C5 987#undef C4 988#undef C3 989#undef C2 990#undef C1 991#undef C0 992 993#else /* !HAVE_MUL */ 994 995#if defined (L_muldi3) 996 997#define C0 26 998#define C1 C0+1 999#define C2 C0+2 1000#define C3 C0+3 1001#define C4 C0+4 1002#define C5 C0+5 1003#define C6 0 1004#define C7 C6+1 1005 1006#define Loop 9 1007 1008;; A[] *= B[] 1009;; R25:R18 *= R17:R10 1010;; Ordinary ABI-Function 1011 1012DEFUN __muldi3 1013 push r29 1014 push r28 1015 push Loop 1016 1017 ldi C0, 64 1018 mov Loop, C0 1019 1020 ;; C[] = 0 1021 clr __tmp_reg__ 1022 wmov C0, 0 1023 wmov C2, 0 1024 wmov C4, 0 1025 10260: ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[] 1027 ;; where N = 64 - Loop. 1028 ;; Notice that B[] = B[] >>> 64 so after this Routine has finished, 1029 ;; B[] will have its initial Value again. 1030 LSR B7 $ ror B6 $ ror B5 $ ror B4 1031 ror B3 $ ror B2 $ ror B1 $ ror B0 1032 1033 ;; If the N-th Bit of B[] was set then... 1034 brcc 1f 1035 ;; ...finish Rotation... 1036 ori B7, 1 << 7 1037 1038 ;; ...and add A[] * 2^N to the Result C[] 1039 ADD C0,A0 $ adc C1,A1 $ adc C2,A2 $ adc C3,A3 1040 adc C4,A4 $ adc C5,A5 $ adc C6,A6 $ adc C7,A7 1041 10421: ;; Multiply A[] by 2 1043 LSL A0 $ rol A1 $ rol A2 $ rol A3 1044 rol A4 $ rol A5 $ rol A6 $ rol A7 1045 1046 dec Loop 1047 brne 0b 1048 1049 ;; We expanded the Result in C[] 1050 ;; Copy Result to the Return Register A[] 1051 wmov A0, C0 1052 wmov A2, C2 1053 wmov A4, C4 1054 wmov A6, C6 1055 1056 clr __zero_reg__ 1057 pop Loop 1058 pop r28 1059 pop r29 1060 ret 1061ENDF __muldi3 1062 1063#undef Loop 1064 1065#undef C7 1066#undef C6 1067#undef C5 1068#undef C4 1069#undef C3 1070#undef C2 1071#undef C1 1072#undef C0 1073 1074#endif /* L_muldi3 */ 1075#endif /* HAVE_MUL */ 1076#endif /* if not __AVR_TINY__ */ 1077 1078#undef B7 1079#undef B6 1080#undef B5 1081#undef B4 1082#undef B3 1083#undef B2 1084#undef B1 1085#undef B0 1086 1087#undef A7 1088#undef A6 1089#undef A5 1090#undef A4 1091#undef A3 1092#undef A2 1093#undef A1 1094#undef A0 1095 1096/******************************************************* 1097 Widening Multiplication 64 = 32 x 32 with MUL 1098*******************************************************/ 1099 1100#if defined (__AVR_HAVE_MUL__) 1101#define A0 r22 1102#define A1 r23 1103#define A2 r24 1104#define A3 r25 1105 1106#define B0 r18 1107#define B1 r19 1108#define B2 r20 1109#define B3 r21 1110 1111#define C0 18 1112#define C1 C0+1 1113#define C2 20 1114#define C3 C2+1 1115#define C4 28 1116#define C5 C4+1 1117#define C6 C4+2 1118#define C7 C4+3 1119 1120#if defined (L_umulsidi3) 1121 1122;; Unsigned widening 64 = 32 * 32 Multiplication with MUL 1123 1124;; R18[8] = R22[4] * R18[4] 1125;; 1126;; Ordinary ABI Function, but additionally sets 1127;; X = R20[2] = B2[2] 1128;; Z = R22[2] = A0[2] 1129DEFUN __umulsidi3 1130 clt 1131 ;; FALLTHRU 1132ENDF __umulsidi3 1133 ;; T = sign (A) 1134DEFUN __umulsidi3_helper 1135 push 29 $ push 28 ; Y 1136 wmov 30, A2 1137 ;; Counting in Words, we have to perform 4 Multiplications 1138 ;; 0 * 0 1139 wmov 26, A0 1140 XCALL __umulhisi3 1141 push 23 $ push 22 ; C0 1142 wmov 28, B0 1143 wmov 18, B2 1144 wmov C2, 24 1145 push 27 $ push 26 ; A0 1146 push 19 $ push 18 ; B2 1147 ;; 1148 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y 1149 ;; B2 C2 -- -- -- B0 A2 1150 ;; 1 * 1 1151 wmov 26, 30 ; A2 1152 XCALL __umulhisi3 1153 ;; Sign-extend A. T holds the sign of A 1154 brtc 0f 1155 ;; Subtract B from the high part of the result 1156 sub 22, 28 1157 sbc 23, 29 1158 sbc 24, 18 1159 sbc 25, 19 11600: wmov 18, 28 ;; B0 1161 wmov C4, 22 1162 wmov C6, 24 1163 ;; 1164 ;; 18 20 22 24 26 28 30 | B2, B3, A0, A1, C0, C1, Y 1165 ;; B0 C2 -- -- A2 C4 C6 1166 ;; 1167 ;; 1 * 0 1168 XCALL __muldi3_6 1169 ;; 0 * 1 1170 pop 26 $ pop 27 ;; B2 1171 pop 18 $ pop 19 ;; A0 1172 XCALL __muldi3_6 1173 1174 ;; Move result C into place and save A0 in Z 1175 wmov 22, C4 1176 wmov 24, C6 1177 wmov 30, 18 ; A0 1178 pop C0 $ pop C1 1179 1180 ;; Epilogue 1181 pop 28 $ pop 29 ;; Y 1182 ret 1183ENDF __umulsidi3_helper 1184#endif /* L_umulsidi3 */ 1185 1186 1187#if defined (L_mulsidi3) 1188 1189;; Signed widening 64 = 32 * 32 Multiplication 1190;; 1191;; R18[8] = R22[4] * R18[4] 1192;; Ordinary ABI Function 1193DEFUN __mulsidi3 1194 bst A3, 7 1195 sbrs B3, 7 ; Enhanced core has no skip bug 1196 XJMP __umulsidi3_helper 1197 1198 ;; B needs sign-extension 1199 push A3 1200 push A2 1201 XCALL __umulsidi3_helper 1202 ;; A0 survived in Z 1203 sub r22, r30 1204 sbc r23, r31 1205 pop r26 1206 pop r27 1207 sbc r24, r26 1208 sbc r25, r27 1209 ret 1210ENDF __mulsidi3 1211#endif /* L_mulsidi3 */ 1212 1213#undef A0 1214#undef A1 1215#undef A2 1216#undef A3 1217#undef B0 1218#undef B1 1219#undef B2 1220#undef B3 1221#undef C0 1222#undef C1 1223#undef C2 1224#undef C3 1225#undef C4 1226#undef C5 1227#undef C6 1228#undef C7 1229#endif /* HAVE_MUL */ 1230 1231/********************************************************** 1232 Widening Multiplication 64 = 32 x 32 without MUL 1233**********************************************************/ 1234#ifndef __AVR_TINY__ /* if not __AVR_TINY__ */ 1235#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__) 1236#define A0 18 1237#define A1 A0+1 1238#define A2 A0+2 1239#define A3 A0+3 1240#define A4 A0+4 1241#define A5 A0+5 1242#define A6 A0+6 1243#define A7 A0+7 1244 1245#define B0 10 1246#define B1 B0+1 1247#define B2 B0+2 1248#define B3 B0+3 1249#define B4 B0+4 1250#define B5 B0+5 1251#define B6 B0+6 1252#define B7 B0+7 1253 1254#define AA0 22 1255#define AA1 AA0+1 1256#define AA2 AA0+2 1257#define AA3 AA0+3 1258 1259#define BB0 18 1260#define BB1 BB0+1 1261#define BB2 BB0+2 1262#define BB3 BB0+3 1263 1264#define Mask r30 1265 1266;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL 1267;; 1268;; R18[8] = R22[4] * R18[4] 1269;; Ordinary ABI Function 1270DEFUN __mulsidi3 1271 set 1272 skip 1273 ;; FALLTHRU 1274ENDF __mulsidi3 1275 1276DEFUN __umulsidi3 1277 clt ; skipped 1278 ;; Save 10 Registers: R10..R17, R28, R29 1279 do_prologue_saves 10 1280 ldi Mask, 0xff 1281 bld Mask, 7 1282 ;; Move B into place... 1283 wmov B0, BB0 1284 wmov B2, BB2 1285 ;; ...and extend it 1286 and BB3, Mask 1287 lsl BB3 1288 sbc B4, B4 1289 mov B5, B4 1290 wmov B6, B4 1291 ;; Move A into place... 1292 wmov A0, AA0 1293 wmov A2, AA2 1294 ;; ...and extend it 1295 and AA3, Mask 1296 lsl AA3 1297 sbc A4, A4 1298 mov A5, A4 1299 wmov A6, A4 1300 XCALL __muldi3 1301 do_epilogue_restores 10 1302ENDF __umulsidi3 1303 1304#undef A0 1305#undef A1 1306#undef A2 1307#undef A3 1308#undef A4 1309#undef A5 1310#undef A6 1311#undef A7 1312#undef B0 1313#undef B1 1314#undef B2 1315#undef B3 1316#undef B4 1317#undef B5 1318#undef B6 1319#undef B7 1320#undef AA0 1321#undef AA1 1322#undef AA2 1323#undef AA3 1324#undef BB0 1325#undef BB1 1326#undef BB2 1327#undef BB3 1328#undef Mask 1329#endif /* L_mulsidi3 && !HAVE_MUL */ 1330#endif /* if not __AVR_TINY__ */ 1331;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1332 1333 1334.section .text.libgcc.div, "ax", @progbits 1335 1336/******************************************************* 1337 Division 8 / 8 => (result + remainder) 1338*******************************************************/ 1339#define r_rem r25 /* remainder */ 1340#define r_arg1 r24 /* dividend, quotient */ 1341#define r_arg2 r22 /* divisor */ 1342#define r_cnt r23 /* loop count */ 1343 1344#if defined (L_udivmodqi4) 1345DEFUN __udivmodqi4 1346 sub r_rem,r_rem ; clear remainder and carry 1347 ldi r_cnt,9 ; init loop counter 1348 rjmp __udivmodqi4_ep ; jump to entry point 1349__udivmodqi4_loop: 1350 rol r_rem ; shift dividend into remainder 1351 cp r_rem,r_arg2 ; compare remainder & divisor 1352 brcs __udivmodqi4_ep ; remainder <= divisor 1353 sub r_rem,r_arg2 ; restore remainder 1354__udivmodqi4_ep: 1355 rol r_arg1 ; shift dividend (with CARRY) 1356 dec r_cnt ; decrement loop counter 1357 brne __udivmodqi4_loop 1358 com r_arg1 ; complement result 1359 ; because C flag was complemented in loop 1360 ret 1361ENDF __udivmodqi4 1362#endif /* defined (L_udivmodqi4) */ 1363 1364#if defined (L_divmodqi4) 1365DEFUN __divmodqi4 1366 bst r_arg1,7 ; store sign of dividend 1367 mov __tmp_reg__,r_arg1 1368 eor __tmp_reg__,r_arg2; r0.7 is sign of result 1369 sbrc r_arg1,7 1370 neg r_arg1 ; dividend negative : negate 1371 sbrc r_arg2,7 1372 neg r_arg2 ; divisor negative : negate 1373 XCALL __udivmodqi4 ; do the unsigned div/mod 1374 brtc __divmodqi4_1 1375 neg r_rem ; correct remainder sign 1376__divmodqi4_1: 1377 sbrc __tmp_reg__,7 1378 neg r_arg1 ; correct result sign 1379__divmodqi4_exit: 1380 ret 1381ENDF __divmodqi4 1382#endif /* defined (L_divmodqi4) */ 1383 1384#undef r_rem 1385#undef r_arg1 1386#undef r_arg2 1387#undef r_cnt 1388 1389 1390/******************************************************* 1391 Division 16 / 16 => (result + remainder) 1392*******************************************************/ 1393#define r_remL r26 /* remainder Low */ 1394#define r_remH r27 /* remainder High */ 1395 1396/* return: remainder */ 1397#define r_arg1L r24 /* dividend Low */ 1398#define r_arg1H r25 /* dividend High */ 1399 1400/* return: quotient */ 1401#define r_arg2L r22 /* divisor Low */ 1402#define r_arg2H r23 /* divisor High */ 1403 1404#define r_cnt r21 /* loop count */ 1405 1406#if defined (L_udivmodhi4) 1407DEFUN __udivmodhi4 1408 sub r_remL,r_remL 1409 sub r_remH,r_remH ; clear remainder and carry 1410 ldi r_cnt,17 ; init loop counter 1411 rjmp __udivmodhi4_ep ; jump to entry point 1412__udivmodhi4_loop: 1413 rol r_remL ; shift dividend into remainder 1414 rol r_remH 1415 cp r_remL,r_arg2L ; compare remainder & divisor 1416 cpc r_remH,r_arg2H 1417 brcs __udivmodhi4_ep ; remainder < divisor 1418 sub r_remL,r_arg2L ; restore remainder 1419 sbc r_remH,r_arg2H 1420__udivmodhi4_ep: 1421 rol r_arg1L ; shift dividend (with CARRY) 1422 rol r_arg1H 1423 dec r_cnt ; decrement loop counter 1424 brne __udivmodhi4_loop 1425 com r_arg1L 1426 com r_arg1H 1427; div/mod results to return registers, as for the div() function 1428 mov_l r_arg2L, r_arg1L ; quotient 1429 mov_h r_arg2H, r_arg1H 1430 mov_l r_arg1L, r_remL ; remainder 1431 mov_h r_arg1H, r_remH 1432 ret 1433ENDF __udivmodhi4 1434#endif /* defined (L_udivmodhi4) */ 1435 1436#if defined (L_divmodhi4) 1437DEFUN __divmodhi4 1438 .global _div 1439_div: 1440 bst r_arg1H,7 ; store sign of dividend 1441 mov __tmp_reg__,r_arg2H 1442 brtc 0f 1443 com __tmp_reg__ ; r0.7 is sign of result 1444 rcall __divmodhi4_neg1 ; dividend negative: negate 14450: 1446 sbrc r_arg2H,7 1447 rcall __divmodhi4_neg2 ; divisor negative: negate 1448 XCALL __udivmodhi4 ; do the unsigned div/mod 1449 sbrc __tmp_reg__,7 1450 rcall __divmodhi4_neg2 ; correct remainder sign 1451 brtc __divmodhi4_exit 1452__divmodhi4_neg1: 1453 ;; correct dividend/remainder sign 1454 com r_arg1H 1455 neg r_arg1L 1456 sbci r_arg1H,0xff 1457 ret 1458__divmodhi4_neg2: 1459 ;; correct divisor/result sign 1460 com r_arg2H 1461 neg r_arg2L 1462 sbci r_arg2H,0xff 1463__divmodhi4_exit: 1464 ret 1465ENDF __divmodhi4 1466#endif /* defined (L_divmodhi4) */ 1467 1468#undef r_remH 1469#undef r_remL 1470 1471#undef r_arg1H 1472#undef r_arg1L 1473 1474#undef r_arg2H 1475#undef r_arg2L 1476 1477#undef r_cnt 1478 1479/******************************************************* 1480 Division 24 / 24 => (result + remainder) 1481*******************************************************/ 1482 1483;; A[0..2]: In: Dividend; Out: Quotient 1484#define A0 22 1485#define A1 A0+1 1486#define A2 A0+2 1487 1488;; B[0..2]: In: Divisor; Out: Remainder 1489#define B0 18 1490#define B1 B0+1 1491#define B2 B0+2 1492 1493;; C[0..2]: Expand remainder 1494#define C0 __zero_reg__ 1495#define C1 26 1496#define C2 25 1497 1498;; Loop counter 1499#define r_cnt 21 1500 1501#if defined (L_udivmodpsi4) 1502;; R24:R22 = R24:R24 udiv R20:R18 1503;; R20:R18 = R24:R22 umod R20:R18 1504;; Clobbers: R21, R25, R26 1505 1506DEFUN __udivmodpsi4 1507 ; init loop counter 1508 ldi r_cnt, 24+1 1509 ; Clear remainder and carry. C0 is already 0 1510 clr C1 1511 sub C2, C2 1512 ; jump to entry point 1513 rjmp __udivmodpsi4_start 1514__udivmodpsi4_loop: 1515 ; shift dividend into remainder 1516 rol C0 1517 rol C1 1518 rol C2 1519 ; compare remainder & divisor 1520 cp C0, B0 1521 cpc C1, B1 1522 cpc C2, B2 1523 brcs __udivmodpsi4_start ; remainder <= divisor 1524 sub C0, B0 ; restore remainder 1525 sbc C1, B1 1526 sbc C2, B2 1527__udivmodpsi4_start: 1528 ; shift dividend (with CARRY) 1529 rol A0 1530 rol A1 1531 rol A2 1532 ; decrement loop counter 1533 dec r_cnt 1534 brne __udivmodpsi4_loop 1535 com A0 1536 com A1 1537 com A2 1538 ; div/mod results to return registers 1539 ; remainder 1540 mov B0, C0 1541 mov B1, C1 1542 mov B2, C2 1543 clr __zero_reg__ ; C0 1544 ret 1545ENDF __udivmodpsi4 1546#endif /* defined (L_udivmodpsi4) */ 1547 1548#if defined (L_divmodpsi4) 1549;; R24:R22 = R24:R22 div R20:R18 1550;; R20:R18 = R24:R22 mod R20:R18 1551;; Clobbers: T, __tmp_reg__, R21, R25, R26 1552 1553DEFUN __divmodpsi4 1554 ; R0.7 will contain the sign of the result: 1555 ; R0.7 = A.sign ^ B.sign 1556 mov __tmp_reg__, B2 1557 ; T-flag = sign of dividend 1558 bst A2, 7 1559 brtc 0f 1560 com __tmp_reg__ 1561 ; Adjust dividend's sign 1562 rcall __divmodpsi4_negA 15630: 1564 ; Adjust divisor's sign 1565 sbrc B2, 7 1566 rcall __divmodpsi4_negB 1567 1568 ; Do the unsigned div/mod 1569 XCALL __udivmodpsi4 1570 1571 ; Adjust quotient's sign 1572 sbrc __tmp_reg__, 7 1573 rcall __divmodpsi4_negA 1574 1575 ; Adjust remainder's sign 1576 brtc __divmodpsi4_end 1577 1578__divmodpsi4_negB: 1579 ; Correct divisor/remainder sign 1580 com B2 1581 com B1 1582 neg B0 1583 sbci B1, -1 1584 sbci B2, -1 1585 ret 1586 1587 ; Correct dividend/quotient sign 1588__divmodpsi4_negA: 1589 com A2 1590 com A1 1591 neg A0 1592 sbci A1, -1 1593 sbci A2, -1 1594__divmodpsi4_end: 1595 ret 1596 1597ENDF __divmodpsi4 1598#endif /* defined (L_divmodpsi4) */ 1599 1600#undef A0 1601#undef A1 1602#undef A2 1603 1604#undef B0 1605#undef B1 1606#undef B2 1607 1608#undef C0 1609#undef C1 1610#undef C2 1611 1612#undef r_cnt 1613 1614/******************************************************* 1615 Division 32 / 32 => (result + remainder) 1616*******************************************************/ 1617#define r_remHH r31 /* remainder High */ 1618#define r_remHL r30 1619#define r_remH r27 1620#define r_remL r26 /* remainder Low */ 1621 1622/* return: remainder */ 1623#define r_arg1HH r25 /* dividend High */ 1624#define r_arg1HL r24 1625#define r_arg1H r23 1626#define r_arg1L r22 /* dividend Low */ 1627 1628/* return: quotient */ 1629#define r_arg2HH r21 /* divisor High */ 1630#define r_arg2HL r20 1631#define r_arg2H r19 1632#define r_arg2L r18 /* divisor Low */ 1633 1634#define r_cnt __zero_reg__ /* loop count (0 after the loop!) */ 1635 1636#if defined (L_udivmodsi4) 1637DEFUN __udivmodsi4 1638 ldi r_remL, 33 ; init loop counter 1639 mov r_cnt, r_remL 1640 sub r_remL,r_remL 1641 sub r_remH,r_remH ; clear remainder and carry 1642 mov_l r_remHL, r_remL 1643 mov_h r_remHH, r_remH 1644 rjmp __udivmodsi4_ep ; jump to entry point 1645__udivmodsi4_loop: 1646 rol r_remL ; shift dividend into remainder 1647 rol r_remH 1648 rol r_remHL 1649 rol r_remHH 1650 cp r_remL,r_arg2L ; compare remainder & divisor 1651 cpc r_remH,r_arg2H 1652 cpc r_remHL,r_arg2HL 1653 cpc r_remHH,r_arg2HH 1654 brcs __udivmodsi4_ep ; remainder <= divisor 1655 sub r_remL,r_arg2L ; restore remainder 1656 sbc r_remH,r_arg2H 1657 sbc r_remHL,r_arg2HL 1658 sbc r_remHH,r_arg2HH 1659__udivmodsi4_ep: 1660 rol r_arg1L ; shift dividend (with CARRY) 1661 rol r_arg1H 1662 rol r_arg1HL 1663 rol r_arg1HH 1664 dec r_cnt ; decrement loop counter 1665 brne __udivmodsi4_loop 1666 ; __zero_reg__ now restored (r_cnt == 0) 1667 com r_arg1L 1668 com r_arg1H 1669 com r_arg1HL 1670 com r_arg1HH 1671; div/mod results to return registers, as for the ldiv() function 1672 mov_l r_arg2L, r_arg1L ; quotient 1673 mov_h r_arg2H, r_arg1H 1674 mov_l r_arg2HL, r_arg1HL 1675 mov_h r_arg2HH, r_arg1HH 1676 mov_l r_arg1L, r_remL ; remainder 1677 mov_h r_arg1H, r_remH 1678 mov_l r_arg1HL, r_remHL 1679 mov_h r_arg1HH, r_remHH 1680 ret 1681ENDF __udivmodsi4 1682#endif /* defined (L_udivmodsi4) */ 1683 1684#if defined (L_divmodsi4) 1685DEFUN __divmodsi4 1686 mov __tmp_reg__,r_arg2HH 1687 bst r_arg1HH,7 ; store sign of dividend 1688 brtc 0f 1689 com __tmp_reg__ ; r0.7 is sign of result 1690 XCALL __negsi2 ; dividend negative: negate 16910: 1692 sbrc r_arg2HH,7 1693 rcall __divmodsi4_neg2 ; divisor negative: negate 1694 XCALL __udivmodsi4 ; do the unsigned div/mod 1695 sbrc __tmp_reg__, 7 ; correct quotient sign 1696 rcall __divmodsi4_neg2 1697 brtc __divmodsi4_exit ; correct remainder sign 1698 XJMP __negsi2 1699__divmodsi4_neg2: 1700 ;; correct divisor/quotient sign 1701 com r_arg2HH 1702 com r_arg2HL 1703 com r_arg2H 1704 neg r_arg2L 1705 sbci r_arg2H,0xff 1706 sbci r_arg2HL,0xff 1707 sbci r_arg2HH,0xff 1708__divmodsi4_exit: 1709 ret 1710ENDF __divmodsi4 1711#endif /* defined (L_divmodsi4) */ 1712 1713#if defined (L_negsi2) 1714;; (set (reg:SI 22) 1715;; (neg:SI (reg:SI 22))) 1716;; Sets the V flag for signed overflow tests 1717DEFUN __negsi2 1718 NEG4 22 1719 ret 1720ENDF __negsi2 1721#endif /* L_negsi2 */ 1722 1723#undef r_remHH 1724#undef r_remHL 1725#undef r_remH 1726#undef r_remL 1727#undef r_arg1HH 1728#undef r_arg1HL 1729#undef r_arg1H 1730#undef r_arg1L 1731#undef r_arg2HH 1732#undef r_arg2HL 1733#undef r_arg2H 1734#undef r_arg2L 1735#undef r_cnt 1736 1737/* *di routines use registers below R19 and won't work with tiny arch 1738 right now. */ 1739 1740#if !defined (__AVR_TINY__) 1741/******************************************************* 1742 Division 64 / 64 1743 Modulo 64 % 64 1744*******************************************************/ 1745 1746;; Use Speed-optimized Version on "big" Devices, i.e. Devices with 1747;; at least 16k of Program Memory. For smaller Devices, depend 1748;; on MOVW and SP Size. There is a Connexion between SP Size and 1749;; Flash Size so that SP Size can be used to test for Flash Size. 1750 1751#if defined (__AVR_HAVE_JMP_CALL__) 1752# define SPEED_DIV 8 1753#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__) 1754# define SPEED_DIV 16 1755#else 1756# define SPEED_DIV 0 1757#endif 1758 1759;; A[0..7]: In: Dividend; 1760;; Out: Quotient (T = 0) 1761;; Out: Remainder (T = 1) 1762#define A0 18 1763#define A1 A0+1 1764#define A2 A0+2 1765#define A3 A0+3 1766#define A4 A0+4 1767#define A5 A0+5 1768#define A6 A0+6 1769#define A7 A0+7 1770 1771;; B[0..7]: In: Divisor; Out: Clobber 1772#define B0 10 1773#define B1 B0+1 1774#define B2 B0+2 1775#define B3 B0+3 1776#define B4 B0+4 1777#define B5 B0+5 1778#define B6 B0+6 1779#define B7 B0+7 1780 1781;; C[0..7]: Expand remainder; Out: Remainder (unused) 1782#define C0 8 1783#define C1 C0+1 1784#define C2 30 1785#define C3 C2+1 1786#define C4 28 1787#define C5 C4+1 1788#define C6 26 1789#define C7 C6+1 1790 1791;; Holds Signs during Division Routine 1792#define SS __tmp_reg__ 1793 1794;; Bit-Counter in Division Routine 1795#define R_cnt __zero_reg__ 1796 1797;; Scratch Register for Negation 1798#define NN r31 1799 1800#if defined (L_udivdi3) 1801 1802;; R25:R18 = R24:R18 umod R17:R10 1803;; Ordinary ABI-Function 1804 1805DEFUN __umoddi3 1806 set 1807 rjmp __udivdi3_umoddi3 1808ENDF __umoddi3 1809 1810;; R25:R18 = R24:R18 udiv R17:R10 1811;; Ordinary ABI-Function 1812 1813DEFUN __udivdi3 1814 clt 1815ENDF __udivdi3 1816 1817DEFUN __udivdi3_umoddi3 1818 push C0 1819 push C1 1820 push C4 1821 push C5 1822 XCALL __udivmod64 1823 pop C5 1824 pop C4 1825 pop C1 1826 pop C0 1827 ret 1828ENDF __udivdi3_umoddi3 1829#endif /* L_udivdi3 */ 1830 1831#if defined (L_udivmod64) 1832 1833;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation 1834;; No Registers saved/restored; the Callers will take Care. 1835;; Preserves B[] and T-flag 1836;; T = 0: Compute Quotient in A[] 1837;; T = 1: Compute Remainder in A[] and shift SS one Bit left 1838 1839DEFUN __udivmod64 1840 1841 ;; Clear Remainder (C6, C7 will follow) 1842 clr C0 1843 clr C1 1844 wmov C2, C0 1845 wmov C4, C0 1846 ldi C7, 64 1847 1848#if SPEED_DIV == 0 || SPEED_DIV == 16 1849 ;; Initialize Loop-Counter 1850 mov R_cnt, C7 1851 wmov C6, C0 1852#endif /* SPEED_DIV */ 1853 1854#if SPEED_DIV == 8 1855 1856 push A7 1857 clr C6 1858 18591: ;; Compare shifted Devidend against Divisor 1860 ;; If -- even after Shifting -- it is smaller... 1861 CP A7,B0 $ cpc C0,B1 $ cpc C1,B2 $ cpc C2,B3 1862 cpc C3,B4 $ cpc C4,B5 $ cpc C5,B6 $ cpc C6,B7 1863 brcc 2f 1864 1865 ;; ...then we can subtract it. Thus, it is legal to shift left 1866 $ mov C6,C5 $ mov C5,C4 $ mov C4,C3 1867 mov C3,C2 $ mov C2,C1 $ mov C1,C0 $ mov C0,A7 1868 mov A7,A6 $ mov A6,A5 $ mov A5,A4 $ mov A4,A3 1869 mov A3,A2 $ mov A2,A1 $ mov A1,A0 $ clr A0 1870 1871 ;; 8 Bits are done 1872 subi C7, 8 1873 brne 1b 1874 1875 ;; Shifted 64 Bits: A7 has traveled to C7 1876 pop C7 1877 ;; Divisor is greater than Dividend. We have: 1878 ;; A[] % B[] = A[] 1879 ;; A[] / B[] = 0 1880 ;; Thus, we can return immediately 1881 rjmp 5f 1882 18832: ;; Initialze Bit-Counter with Number of Bits still to be performed 1884 mov R_cnt, C7 1885 1886 ;; Push of A7 is not needed because C7 is still 0 1887 pop C7 1888 clr C7 1889 1890#elif SPEED_DIV == 16 1891 1892 ;; Compare shifted Dividend against Divisor 1893 cp A7, B3 1894 cpc C0, B4 1895 cpc C1, B5 1896 cpc C2, B6 1897 cpc C3, B7 1898 brcc 2f 1899 1900 ;; Divisor is greater than shifted Dividen: We can shift the Dividend 1901 ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk 1902 wmov C2,A6 $ wmov C0,A4 1903 wmov A6,A2 $ wmov A4,A0 1904 wmov A2,C6 $ wmov A0,C4 1905 1906 ;; Set Bit Counter to 32 1907 lsr R_cnt 19082: 1909#elif SPEED_DIV 1910#error SPEED_DIV = ? 1911#endif /* SPEED_DIV */ 1912 1913;; The very Division + Remainder Routine 1914 19153: ;; Left-shift Dividend... 1916 lsl A0 $ rol A1 $ rol A2 $ rol A3 1917 rol A4 $ rol A5 $ rol A6 $ rol A7 1918 1919 ;; ...into Remainder 1920 rol C0 $ rol C1 $ rol C2 $ rol C3 1921 rol C4 $ rol C5 $ rol C6 $ rol C7 1922 1923 ;; Compare Remainder and Divisor 1924 CP C0,B0 $ cpc C1,B1 $ cpc C2,B2 $ cpc C3,B3 1925 cpc C4,B4 $ cpc C5,B5 $ cpc C6,B6 $ cpc C7,B7 1926 1927 brcs 4f 1928 1929 ;; Divisor fits into Remainder: Subtract it from Remainder... 1930 SUB C0,B0 $ sbc C1,B1 $ sbc C2,B2 $ sbc C3,B3 1931 sbc C4,B4 $ sbc C5,B5 $ sbc C6,B6 $ sbc C7,B7 1932 1933 ;; ...and set according Bit in the upcoming Quotient 1934 ;; The Bit will travel to its final Position 1935 ori A0, 1 1936 19374: ;; This Bit is done 1938 dec R_cnt 1939 brne 3b 1940 ;; __zero_reg__ is 0 again 1941 1942 ;; T = 0: We are fine with the Quotient in A[] 1943 ;; T = 1: Copy Remainder to A[] 19445: brtc 6f 1945 wmov A0, C0 1946 wmov A2, C2 1947 wmov A4, C4 1948 wmov A6, C6 1949 ;; Move the Sign of the Result to SS.7 1950 lsl SS 1951 19526: ret 1953 1954ENDF __udivmod64 1955#endif /* L_udivmod64 */ 1956 1957 1958#if defined (L_divdi3) 1959 1960;; R25:R18 = R24:R18 mod R17:R10 1961;; Ordinary ABI-Function 1962 1963DEFUN __moddi3 1964 set 1965 rjmp __divdi3_moddi3 1966ENDF __moddi3 1967 1968;; R25:R18 = R24:R18 div R17:R10 1969;; Ordinary ABI-Function 1970 1971DEFUN __divdi3 1972 clt 1973ENDF __divdi3 1974 1975DEFUN __divdi3_moddi3 1976#if SPEED_DIV 1977 mov r31, A7 1978 or r31, B7 1979 brmi 0f 1980 ;; Both Signs are 0: the following Complexitiy is not needed 1981 XJMP __udivdi3_umoddi3 1982#endif /* SPEED_DIV */ 1983 19840: ;; The Prologue 1985 ;; Save 12 Registers: Y, 17...8 1986 ;; No Frame needed 1987 do_prologue_saves 12 1988 1989 ;; SS.7 will contain the Sign of the Quotient (A.sign * B.sign) 1990 ;; SS.6 will contain the Sign of the Remainder (A.sign) 1991 mov SS, A7 1992 asr SS 1993 ;; Adjust Dividend's Sign as needed 1994#if SPEED_DIV 1995 ;; Compiling for Speed we know that at least one Sign must be < 0 1996 ;; Thus, if A[] >= 0 then we know B[] < 0 1997 brpl 22f 1998#else 1999 brpl 21f 2000#endif /* SPEED_DIV */ 2001 2002 XCALL __negdi2 2003 2004 ;; Adjust Divisor's Sign and SS.7 as needed 200521: tst B7 2006 brpl 3f 200722: ldi NN, 1 << 7 2008 eor SS, NN 2009 2010 ldi NN, -1 2011 com B4 $ com B5 $ com B6 $ com B7 2012 $ com B1 $ com B2 $ com B3 2013 NEG B0 2014 $ sbc B1,NN $ sbc B2,NN $ sbc B3,NN 2015 sbc B4,NN $ sbc B5,NN $ sbc B6,NN $ sbc B7,NN 2016 20173: ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag) 2018 XCALL __udivmod64 2019 2020 ;; Adjust Result's Sign 2021#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 2022 tst SS 2023 brpl 4f 2024#else 2025 sbrc SS, 7 2026#endif /* __AVR_HAVE_JMP_CALL__ */ 2027 XCALL __negdi2 2028 20294: ;; Epilogue: Restore 12 Registers and return 2030 do_epilogue_restores 12 2031 2032ENDF __divdi3_moddi3 2033 2034#endif /* L_divdi3 */ 2035 2036#undef R_cnt 2037#undef SS 2038#undef NN 2039 2040.section .text.libgcc, "ax", @progbits 2041 2042#define TT __tmp_reg__ 2043 2044#if defined (L_adddi3) 2045;; (set (reg:DI 18) 2046;; (plus:DI (reg:DI 18) 2047;; (reg:DI 10))) 2048;; Sets the V flag for signed overflow tests 2049;; Sets the C flag for unsigned overflow tests 2050DEFUN __adddi3 2051 ADD A0,B0 $ adc A1,B1 $ adc A2,B2 $ adc A3,B3 2052 adc A4,B4 $ adc A5,B5 $ adc A6,B6 $ adc A7,B7 2053 ret 2054ENDF __adddi3 2055#endif /* L_adddi3 */ 2056 2057#if defined (L_adddi3_s8) 2058;; (set (reg:DI 18) 2059;; (plus:DI (reg:DI 18) 2060;; (sign_extend:SI (reg:QI 26)))) 2061;; Sets the V flag for signed overflow tests 2062;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128 2063DEFUN __adddi3_s8 2064 clr TT 2065 sbrc r26, 7 2066 com TT 2067 ADD A0,r26 $ adc A1,TT $ adc A2,TT $ adc A3,TT 2068 adc A4,TT $ adc A5,TT $ adc A6,TT $ adc A7,TT 2069 ret 2070ENDF __adddi3_s8 2071#endif /* L_adddi3_s8 */ 2072 2073#if defined (L_subdi3) 2074;; (set (reg:DI 18) 2075;; (minus:DI (reg:DI 18) 2076;; (reg:DI 10))) 2077;; Sets the V flag for signed overflow tests 2078;; Sets the C flag for unsigned overflow tests 2079DEFUN __subdi3 2080 SUB A0,B0 $ sbc A1,B1 $ sbc A2,B2 $ sbc A3,B3 2081 sbc A4,B4 $ sbc A5,B5 $ sbc A6,B6 $ sbc A7,B7 2082 ret 2083ENDF __subdi3 2084#endif /* L_subdi3 */ 2085 2086#if defined (L_cmpdi2) 2087;; (set (cc0) 2088;; (compare (reg:DI 18) 2089;; (reg:DI 10))) 2090DEFUN __cmpdi2 2091 CP A0,B0 $ cpc A1,B1 $ cpc A2,B2 $ cpc A3,B3 2092 cpc A4,B4 $ cpc A5,B5 $ cpc A6,B6 $ cpc A7,B7 2093 ret 2094ENDF __cmpdi2 2095#endif /* L_cmpdi2 */ 2096 2097#if defined (L_cmpdi2_s8) 2098;; (set (cc0) 2099;; (compare (reg:DI 18) 2100;; (sign_extend:SI (reg:QI 26)))) 2101DEFUN __cmpdi2_s8 2102 clr TT 2103 sbrc r26, 7 2104 com TT 2105 CP A0,r26 $ cpc A1,TT $ cpc A2,TT $ cpc A3,TT 2106 cpc A4,TT $ cpc A5,TT $ cpc A6,TT $ cpc A7,TT 2107 ret 2108ENDF __cmpdi2_s8 2109#endif /* L_cmpdi2_s8 */ 2110 2111#if defined (L_negdi2) 2112;; (set (reg:DI 18) 2113;; (neg:DI (reg:DI 18))) 2114;; Sets the V flag for signed overflow tests 2115DEFUN __negdi2 2116 2117 com A4 $ com A5 $ com A6 $ com A7 2118 $ com A1 $ com A2 $ com A3 2119 NEG A0 2120 $ sbci A1,-1 $ sbci A2,-1 $ sbci A3,-1 2121 sbci A4,-1 $ sbci A5,-1 $ sbci A6,-1 $ sbci A7,-1 2122 ret 2123 2124ENDF __negdi2 2125#endif /* L_negdi2 */ 2126 2127#undef TT 2128 2129#undef C7 2130#undef C6 2131#undef C5 2132#undef C4 2133#undef C3 2134#undef C2 2135#undef C1 2136#undef C0 2137 2138#undef B7 2139#undef B6 2140#undef B5 2141#undef B4 2142#undef B3 2143#undef B2 2144#undef B1 2145#undef B0 2146 2147#undef A7 2148#undef A6 2149#undef A5 2150#undef A4 2151#undef A3 2152#undef A2 2153#undef A1 2154#undef A0 2155 2156#endif /* !defined (__AVR_TINY__) */ 2157 2158 2159.section .text.libgcc.prologue, "ax", @progbits 2160 2161/********************************** 2162 * This is a prologue subroutine 2163 **********************************/ 2164#if !defined (__AVR_TINY__) 2165#if defined (L_prologue) 2166 2167;; This function does not clobber T-flag; 64-bit division relies on it 2168DEFUN __prologue_saves__ 2169 push r2 2170 push r3 2171 push r4 2172 push r5 2173 push r6 2174 push r7 2175 push r8 2176 push r9 2177 push r10 2178 push r11 2179 push r12 2180 push r13 2181 push r14 2182 push r15 2183 push r16 2184 push r17 2185 push r28 2186 push r29 2187#if !defined (__AVR_HAVE_SPH__) 2188 in r28,__SP_L__ 2189 sub r28,r26 2190 out __SP_L__,r28 2191 clr r29 2192#elif defined (__AVR_XMEGA__) 2193 in r28,__SP_L__ 2194 in r29,__SP_H__ 2195 sub r28,r26 2196 sbc r29,r27 2197 out __SP_L__,r28 2198 out __SP_H__,r29 2199#else 2200 in r28,__SP_L__ 2201 in r29,__SP_H__ 2202 sub r28,r26 2203 sbc r29,r27 2204 in __tmp_reg__,__SREG__ 2205 cli 2206 out __SP_H__,r29 2207 out __SREG__,__tmp_reg__ 2208 out __SP_L__,r28 2209#endif /* #SP = 8/16 */ 2210 2211 XIJMP 2212 2213ENDF __prologue_saves__ 2214#endif /* defined (L_prologue) */ 2215 2216/* 2217 * This is an epilogue subroutine 2218 */ 2219#if defined (L_epilogue) 2220 2221DEFUN __epilogue_restores__ 2222 ldd r2,Y+18 2223 ldd r3,Y+17 2224 ldd r4,Y+16 2225 ldd r5,Y+15 2226 ldd r6,Y+14 2227 ldd r7,Y+13 2228 ldd r8,Y+12 2229 ldd r9,Y+11 2230 ldd r10,Y+10 2231 ldd r11,Y+9 2232 ldd r12,Y+8 2233 ldd r13,Y+7 2234 ldd r14,Y+6 2235 ldd r15,Y+5 2236 ldd r16,Y+4 2237 ldd r17,Y+3 2238 ldd r26,Y+2 2239#if !defined (__AVR_HAVE_SPH__) 2240 ldd r29,Y+1 2241 add r28,r30 2242 out __SP_L__,r28 2243 mov r28, r26 2244#elif defined (__AVR_XMEGA__) 2245 ldd r27,Y+1 2246 add r28,r30 2247 adc r29,__zero_reg__ 2248 out __SP_L__,r28 2249 out __SP_H__,r29 2250 wmov 28, 26 2251#else 2252 ldd r27,Y+1 2253 add r28,r30 2254 adc r29,__zero_reg__ 2255 in __tmp_reg__,__SREG__ 2256 cli 2257 out __SP_H__,r29 2258 out __SREG__,__tmp_reg__ 2259 out __SP_L__,r28 2260 mov_l r28, r26 2261 mov_h r29, r27 2262#endif /* #SP = 8/16 */ 2263 ret 2264ENDF __epilogue_restores__ 2265#endif /* defined (L_epilogue) */ 2266#endif /* !defined (__AVR_TINY__) */ 2267 2268#ifdef L_exit 2269 .section .fini9,"ax",@progbits 2270DEFUN _exit 2271 .weak exit 2272exit: 2273ENDF _exit 2274 2275 /* Code from .fini8 ... .fini1 sections inserted by ld script. */ 2276 2277 .section .fini0,"ax",@progbits 2278 cli 2279__stop_program: 2280 rjmp __stop_program 2281#endif /* defined (L_exit) */ 2282 2283#ifdef L_cleanup 2284 .weak _cleanup 2285 .func _cleanup 2286_cleanup: 2287 ret 2288.endfunc 2289#endif /* defined (L_cleanup) */ 2290 2291 2292.section .text.libgcc, "ax", @progbits 2293 2294#ifdef L_tablejump2 2295DEFUN __tablejump2__ 2296 lsl r30 2297 rol r31 2298#if defined (__AVR_HAVE_EIJMP_EICALL__) 2299 ;; Word address of gs() jumptable entry in R24:Z 2300 rol r24 2301 out __RAMPZ__, r24 2302#elif defined (__AVR_HAVE_ELPM__) 2303 ;; Word address of jumptable entry in Z 2304 clr __tmp_reg__ 2305 rol __tmp_reg__ 2306 out __RAMPZ__, __tmp_reg__ 2307#endif 2308 2309 ;; Read word address from jumptable and jump 2310 2311#if defined (__AVR_HAVE_ELPMX__) 2312 elpm __tmp_reg__, Z+ 2313 elpm r31, Z 2314 mov r30, __tmp_reg__ 2315#ifdef __AVR_HAVE_RAMPD__ 2316 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2317 out __RAMPZ__, __zero_reg__ 2318#endif /* RAMPD */ 2319 XIJMP 2320#elif defined (__AVR_HAVE_ELPM__) 2321 elpm 2322 push r0 2323 adiw r30, 1 2324 elpm 2325 push r0 2326 ret 2327#elif defined (__AVR_HAVE_LPMX__) 2328 lpm __tmp_reg__, Z+ 2329 lpm r31, Z 2330 mov r30, __tmp_reg__ 2331 ijmp 2332#elif defined (__AVR_TINY__) 2333 wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z 2334 ld __tmp_reg__, Z+ 2335 ld r31, Z ; Use ld instead of lpm to load Z 2336 mov r30, __tmp_reg__ 2337 ijmp 2338#else 2339 lpm 2340 push r0 2341 adiw r30, 1 2342 lpm 2343 push r0 2344 ret 2345#endif 2346ENDF __tablejump2__ 2347#endif /* L_tablejump2 */ 2348 2349#if defined(__AVR_TINY__) 2350#ifdef L_copy_data 2351 .section .init4,"ax",@progbits 2352 .global __do_copy_data 2353__do_copy_data: 2354 ldi r18, hi8(__data_end) 2355 ldi r26, lo8(__data_start) 2356 ldi r27, hi8(__data_start) 2357 ldi r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__) 2358 ldi r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__) 2359 rjmp .L__do_copy_data_start 2360.L__do_copy_data_loop: 2361 ld r19, z+ 2362 st X+, r19 2363.L__do_copy_data_start: 2364 cpi r26, lo8(__data_end) 2365 cpc r27, r18 2366 brne .L__do_copy_data_loop 2367#endif 2368#else 2369#ifdef L_copy_data 2370 .section .init4,"ax",@progbits 2371DEFUN __do_copy_data 2372#if defined(__AVR_HAVE_ELPMX__) 2373 ldi r17, hi8(__data_end) 2374 ldi r26, lo8(__data_start) 2375 ldi r27, hi8(__data_start) 2376 ldi r30, lo8(__data_load_start) 2377 ldi r31, hi8(__data_load_start) 2378 ldi r16, hh8(__data_load_start) 2379 out __RAMPZ__, r16 2380 rjmp .L__do_copy_data_start 2381.L__do_copy_data_loop: 2382 elpm r0, Z+ 2383 st X+, r0 2384.L__do_copy_data_start: 2385 cpi r26, lo8(__data_end) 2386 cpc r27, r17 2387 brne .L__do_copy_data_loop 2388#elif !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__) 2389 ldi r17, hi8(__data_end) 2390 ldi r26, lo8(__data_start) 2391 ldi r27, hi8(__data_start) 2392 ldi r30, lo8(__data_load_start) 2393 ldi r31, hi8(__data_load_start) 2394 ldi r16, hh8(__data_load_start - 0x10000) 2395.L__do_copy_data_carry: 2396 inc r16 2397 out __RAMPZ__, r16 2398 rjmp .L__do_copy_data_start 2399.L__do_copy_data_loop: 2400 elpm 2401 st X+, r0 2402 adiw r30, 1 2403 brcs .L__do_copy_data_carry 2404.L__do_copy_data_start: 2405 cpi r26, lo8(__data_end) 2406 cpc r27, r17 2407 brne .L__do_copy_data_loop 2408#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) 2409 ldi r17, hi8(__data_end) 2410 ldi r26, lo8(__data_start) 2411 ldi r27, hi8(__data_start) 2412 ldi r30, lo8(__data_load_start) 2413 ldi r31, hi8(__data_load_start) 2414 rjmp .L__do_copy_data_start 2415.L__do_copy_data_loop: 2416#if defined (__AVR_HAVE_LPMX__) 2417 lpm r0, Z+ 2418#else 2419 lpm 2420 adiw r30, 1 2421#endif 2422 st X+, r0 2423.L__do_copy_data_start: 2424 cpi r26, lo8(__data_end) 2425 cpc r27, r17 2426 brne .L__do_copy_data_loop 2427#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */ 2428#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2429 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2430 out __RAMPZ__, __zero_reg__ 2431#endif /* ELPM && RAMPD */ 2432ENDF __do_copy_data 2433#endif /* L_copy_data */ 2434#endif /* !defined (__AVR_TINY__) */ 2435 2436/* __do_clear_bss is only necessary if there is anything in .bss section. */ 2437 2438#ifdef L_clear_bss 2439 .section .init4,"ax",@progbits 2440DEFUN __do_clear_bss 2441 ldi r18, hi8(__bss_end) 2442 ldi r26, lo8(__bss_start) 2443 ldi r27, hi8(__bss_start) 2444 rjmp .do_clear_bss_start 2445.do_clear_bss_loop: 2446 st X+, __zero_reg__ 2447.do_clear_bss_start: 2448 cpi r26, lo8(__bss_end) 2449 cpc r27, r18 2450 brne .do_clear_bss_loop 2451ENDF __do_clear_bss 2452#endif /* L_clear_bss */ 2453 2454/* __do_global_ctors and __do_global_dtors are only necessary 2455 if there are any constructors/destructors. */ 2456 2457#if defined(__AVR_TINY__) 2458#define cdtors_tst_reg r18 2459#else 2460#define cdtors_tst_reg r17 2461#endif 2462 2463#ifdef L_ctors 2464 .section .init6,"ax",@progbits 2465DEFUN __do_global_ctors 2466 ldi cdtors_tst_reg, pm_hi8(__ctors_start) 2467 ldi r28, pm_lo8(__ctors_end) 2468 ldi r29, pm_hi8(__ctors_end) 2469#ifdef __AVR_HAVE_EIJMP_EICALL__ 2470 ldi r16, pm_hh8(__ctors_end) 2471#endif /* HAVE_EIJMP */ 2472 rjmp .L__do_global_ctors_start 2473.L__do_global_ctors_loop: 2474 wsubi 28, 1 2475#ifdef __AVR_HAVE_EIJMP_EICALL__ 2476 sbc r16, __zero_reg__ 2477 mov r24, r16 2478#endif /* HAVE_EIJMP */ 2479 mov_h r31, r29 2480 mov_l r30, r28 2481 XCALL __tablejump2__ 2482.L__do_global_ctors_start: 2483 cpi r28, pm_lo8(__ctors_start) 2484 cpc r29, cdtors_tst_reg 2485#ifdef __AVR_HAVE_EIJMP_EICALL__ 2486 ldi r24, pm_hh8(__ctors_start) 2487 cpc r16, r24 2488#endif /* HAVE_EIJMP */ 2489 brne .L__do_global_ctors_loop 2490ENDF __do_global_ctors 2491#endif /* L_ctors */ 2492 2493#ifdef L_dtors 2494 .section .fini6,"ax",@progbits 2495DEFUN __do_global_dtors 2496 ldi cdtors_tst_reg, pm_hi8(__dtors_end) 2497 ldi r28, pm_lo8(__dtors_start) 2498 ldi r29, pm_hi8(__dtors_start) 2499#ifdef __AVR_HAVE_EIJMP_EICALL__ 2500 ldi r16, pm_hh8(__dtors_start) 2501#endif /* HAVE_EIJMP */ 2502 rjmp .L__do_global_dtors_start 2503.L__do_global_dtors_loop: 2504#ifdef __AVR_HAVE_EIJMP_EICALL__ 2505 mov r24, r16 2506#endif /* HAVE_EIJMP */ 2507 mov_h r31, r29 2508 mov_l r30, r28 2509 XCALL __tablejump2__ 2510 waddi 28, 1 2511#ifdef __AVR_HAVE_EIJMP_EICALL__ 2512 adc r16, __zero_reg__ 2513#endif /* HAVE_EIJMP */ 2514.L__do_global_dtors_start: 2515 cpi r28, pm_lo8(__dtors_end) 2516 cpc r29, cdtors_tst_reg 2517#ifdef __AVR_HAVE_EIJMP_EICALL__ 2518 ldi r24, pm_hh8(__dtors_end) 2519 cpc r16, r24 2520#endif /* HAVE_EIJMP */ 2521 brne .L__do_global_dtors_loop 2522ENDF __do_global_dtors 2523#endif /* L_dtors */ 2524 2525#undef cdtors_tst_reg 2526 2527.section .text.libgcc, "ax", @progbits 2528 2529#if !defined (__AVR_TINY__) 2530;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2531;; Loading n bytes from Flash; n = 3,4 2532;; R22... = Flash[Z] 2533;; Clobbers: __tmp_reg__ 2534 2535#if (defined (L_load_3) \ 2536 || defined (L_load_4)) \ 2537 && !defined (__AVR_HAVE_LPMX__) 2538 2539;; Destination 2540#define D0 22 2541#define D1 D0+1 2542#define D2 D0+2 2543#define D3 D0+3 2544 2545.macro .load dest, n 2546 lpm 2547 mov \dest, r0 2548.if \dest != D0+\n-1 2549 adiw r30, 1 2550.else 2551 sbiw r30, \n-1 2552.endif 2553.endm 2554 2555#if defined (L_load_3) 2556DEFUN __load_3 2557 push D3 2558 XCALL __load_4 2559 pop D3 2560 ret 2561ENDF __load_3 2562#endif /* L_load_3 */ 2563 2564#if defined (L_load_4) 2565DEFUN __load_4 2566 .load D0, 4 2567 .load D1, 4 2568 .load D2, 4 2569 .load D3, 4 2570 ret 2571ENDF __load_4 2572#endif /* L_load_4 */ 2573 2574#endif /* L_load_3 || L_load_3 */ 2575#endif /* !defined (__AVR_TINY__) */ 2576 2577#if !defined (__AVR_TINY__) 2578;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2579;; Loading n bytes from Flash or RAM; n = 1,2,3,4 2580;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7 2581;; Clobbers: __tmp_reg__, R21, R30, R31 2582 2583#if (defined (L_xload_1) \ 2584 || defined (L_xload_2) \ 2585 || defined (L_xload_3) \ 2586 || defined (L_xload_4)) 2587 2588;; Destination 2589#define D0 22 2590#define D1 D0+1 2591#define D2 D0+2 2592#define D3 D0+3 2593 2594;; Register containing bits 16+ of the address 2595 2596#define HHI8 21 2597 2598.macro .xload dest, n 2599#if defined (__AVR_HAVE_ELPMX__) 2600 elpm \dest, Z+ 2601#elif defined (__AVR_HAVE_ELPM__) 2602 elpm 2603 mov \dest, r0 2604.if \dest != D0+\n-1 2605 adiw r30, 1 2606 adc HHI8, __zero_reg__ 2607 out __RAMPZ__, HHI8 2608.endif 2609#elif defined (__AVR_HAVE_LPMX__) 2610 lpm \dest, Z+ 2611#else 2612 lpm 2613 mov \dest, r0 2614.if \dest != D0+\n-1 2615 adiw r30, 1 2616.endif 2617#endif 2618#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2619.if \dest == D0+\n-1 2620 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2621 out __RAMPZ__, __zero_reg__ 2622.endif 2623#endif 2624.endm ; .xload 2625 2626#if defined (L_xload_1) 2627DEFUN __xload_1 2628#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__) 2629 sbrc HHI8, 7 2630 ld D0, Z 2631 sbrs HHI8, 7 2632 lpm D0, Z 2633 ret 2634#else 2635 sbrc HHI8, 7 2636 rjmp 1f 2637#if defined (__AVR_HAVE_ELPM__) 2638 out __RAMPZ__, HHI8 2639#endif /* __AVR_HAVE_ELPM__ */ 2640 .xload D0, 1 2641 ret 26421: ld D0, Z 2643 ret 2644#endif /* LPMx && ! ELPM */ 2645ENDF __xload_1 2646#endif /* L_xload_1 */ 2647 2648#if defined (L_xload_2) 2649DEFUN __xload_2 2650 sbrc HHI8, 7 2651 rjmp 1f 2652#if defined (__AVR_HAVE_ELPM__) 2653 out __RAMPZ__, HHI8 2654#endif /* __AVR_HAVE_ELPM__ */ 2655 .xload D0, 2 2656 .xload D1, 2 2657 ret 26581: ld D0, Z+ 2659 ld D1, Z+ 2660 ret 2661ENDF __xload_2 2662#endif /* L_xload_2 */ 2663 2664#if defined (L_xload_3) 2665DEFUN __xload_3 2666 sbrc HHI8, 7 2667 rjmp 1f 2668#if defined (__AVR_HAVE_ELPM__) 2669 out __RAMPZ__, HHI8 2670#endif /* __AVR_HAVE_ELPM__ */ 2671 .xload D0, 3 2672 .xload D1, 3 2673 .xload D2, 3 2674 ret 26751: ld D0, Z+ 2676 ld D1, Z+ 2677 ld D2, Z+ 2678 ret 2679ENDF __xload_3 2680#endif /* L_xload_3 */ 2681 2682#if defined (L_xload_4) 2683DEFUN __xload_4 2684 sbrc HHI8, 7 2685 rjmp 1f 2686#if defined (__AVR_HAVE_ELPM__) 2687 out __RAMPZ__, HHI8 2688#endif /* __AVR_HAVE_ELPM__ */ 2689 .xload D0, 4 2690 .xload D1, 4 2691 .xload D2, 4 2692 .xload D3, 4 2693 ret 26941: ld D0, Z+ 2695 ld D1, Z+ 2696 ld D2, Z+ 2697 ld D3, Z+ 2698 ret 2699ENDF __xload_4 2700#endif /* L_xload_4 */ 2701 2702#endif /* L_xload_{1|2|3|4} */ 2703#endif /* if !defined (__AVR_TINY__) */ 2704 2705#if !defined (__AVR_TINY__) 2706;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2707;; memcopy from Address Space __pgmx to RAM 2708;; R23:Z = Source Address 2709;; X = Destination Address 2710;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z 2711 2712#if defined (L_movmemx) 2713 2714#define HHI8 23 2715#define LOOP 24 2716 2717DEFUN __movmemx_qi 2718 ;; #Bytes to copy fity in 8 Bits (1..255) 2719 ;; Zero-extend Loop Counter 2720 clr LOOP+1 2721 ;; FALLTHRU 2722ENDF __movmemx_qi 2723 2724DEFUN __movmemx_hi 2725 2726;; Read from where? 2727 sbrc HHI8, 7 2728 rjmp 1f 2729 2730;; Read from Flash 2731 2732#if defined (__AVR_HAVE_ELPM__) 2733 out __RAMPZ__, HHI8 2734#endif 2735 27360: ;; Load 1 Byte from Flash... 2737 2738#if defined (__AVR_HAVE_ELPMX__) 2739 elpm r0, Z+ 2740#elif defined (__AVR_HAVE_ELPM__) 2741 elpm 2742 adiw r30, 1 2743 adc HHI8, __zero_reg__ 2744 out __RAMPZ__, HHI8 2745#elif defined (__AVR_HAVE_LPMX__) 2746 lpm r0, Z+ 2747#else 2748 lpm 2749 adiw r30, 1 2750#endif 2751 2752 ;; ...and store that Byte to RAM Destination 2753 st X+, r0 2754 sbiw LOOP, 1 2755 brne 0b 2756#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__) 2757 ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM 2758 out __RAMPZ__, __zero_reg__ 2759#endif /* ELPM && RAMPD */ 2760 ret 2761 2762;; Read from RAM 2763 27641: ;; Read 1 Byte from RAM... 2765 ld r0, Z+ 2766 ;; and store that Byte to RAM Destination 2767 st X+, r0 2768 sbiw LOOP, 1 2769 brne 1b 2770 ret 2771ENDF __movmemx_hi 2772 2773#undef HHI8 2774#undef LOOP 2775 2776#endif /* L_movmemx */ 2777#endif /* !defined (__AVR_TINY__) */ 2778 2779 2780.section .text.libgcc.builtins, "ax", @progbits 2781 2782/********************************** 2783 * Find first set Bit (ffs) 2784 **********************************/ 2785 2786#if defined (L_ffssi2) 2787;; find first set bit 2788;; r25:r24 = ffs32 (r25:r22) 2789;; clobbers: r22, r26 2790DEFUN __ffssi2 2791 clr r26 2792 tst r22 2793 brne 1f 2794 subi r26, -8 2795 or r22, r23 2796 brne 1f 2797 subi r26, -8 2798 or r22, r24 2799 brne 1f 2800 subi r26, -8 2801 or r22, r25 2802 brne 1f 2803 ret 28041: mov r24, r22 2805 XJMP __loop_ffsqi2 2806ENDF __ffssi2 2807#endif /* defined (L_ffssi2) */ 2808 2809#if defined (L_ffshi2) 2810;; find first set bit 2811;; r25:r24 = ffs16 (r25:r24) 2812;; clobbers: r26 2813DEFUN __ffshi2 2814 clr r26 2815#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 2816 ;; Some cores have problem skipping 2-word instruction 2817 tst r24 2818 breq 2f 2819#else 2820 cpse r24, __zero_reg__ 2821#endif /* __AVR_HAVE_JMP_CALL__ */ 28221: XJMP __loop_ffsqi2 28232: ldi r26, 8 2824 or r24, r25 2825 brne 1b 2826 ret 2827ENDF __ffshi2 2828#endif /* defined (L_ffshi2) */ 2829 2830#if defined (L_loop_ffsqi2) 2831;; Helper for ffshi2, ffssi2 2832;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) 2833;; r24 must be != 0 2834;; clobbers: r26 2835DEFUN __loop_ffsqi2 2836 inc r26 2837 lsr r24 2838 brcc __loop_ffsqi2 2839 mov r24, r26 2840 clr r25 2841 ret 2842ENDF __loop_ffsqi2 2843#endif /* defined (L_loop_ffsqi2) */ 2844 2845 2846/********************************** 2847 * Count trailing Zeros (ctz) 2848 **********************************/ 2849 2850#if defined (L_ctzsi2) 2851;; count trailing zeros 2852;; r25:r24 = ctz32 (r25:r22) 2853;; clobbers: r26, r22 2854;; ctz(0) = 255 2855;; Note that ctz(0) in undefined for GCC 2856DEFUN __ctzsi2 2857 XCALL __ffssi2 2858 dec r24 2859 ret 2860ENDF __ctzsi2 2861#endif /* defined (L_ctzsi2) */ 2862 2863#if defined (L_ctzhi2) 2864;; count trailing zeros 2865;; r25:r24 = ctz16 (r25:r24) 2866;; clobbers: r26 2867;; ctz(0) = 255 2868;; Note that ctz(0) in undefined for GCC 2869DEFUN __ctzhi2 2870 XCALL __ffshi2 2871 dec r24 2872 ret 2873ENDF __ctzhi2 2874#endif /* defined (L_ctzhi2) */ 2875 2876 2877/********************************** 2878 * Count leading Zeros (clz) 2879 **********************************/ 2880 2881#if defined (L_clzdi2) 2882;; count leading zeros 2883;; r25:r24 = clz64 (r25:r18) 2884;; clobbers: r22, r23, r26 2885DEFUN __clzdi2 2886 XCALL __clzsi2 2887 sbrs r24, 5 2888 ret 2889 mov_l r22, r18 2890 mov_h r23, r19 2891 mov_l r24, r20 2892 mov_h r25, r21 2893 XCALL __clzsi2 2894 subi r24, -32 2895 ret 2896ENDF __clzdi2 2897#endif /* defined (L_clzdi2) */ 2898 2899#if defined (L_clzsi2) 2900;; count leading zeros 2901;; r25:r24 = clz32 (r25:r22) 2902;; clobbers: r26 2903DEFUN __clzsi2 2904 XCALL __clzhi2 2905 sbrs r24, 4 2906 ret 2907 mov_l r24, r22 2908 mov_h r25, r23 2909 XCALL __clzhi2 2910 subi r24, -16 2911 ret 2912ENDF __clzsi2 2913#endif /* defined (L_clzsi2) */ 2914 2915#if defined (L_clzhi2) 2916;; count leading zeros 2917;; r25:r24 = clz16 (r25:r24) 2918;; clobbers: r26 2919DEFUN __clzhi2 2920 clr r26 2921 tst r25 2922 brne 1f 2923 subi r26, -8 2924 or r25, r24 2925 brne 1f 2926 ldi r24, 16 2927 ret 29281: cpi r25, 16 2929 brsh 3f 2930 subi r26, -3 2931 swap r25 29322: inc r26 29333: lsl r25 2934 brcc 2b 2935 mov r24, r26 2936 clr r25 2937 ret 2938ENDF __clzhi2 2939#endif /* defined (L_clzhi2) */ 2940 2941 2942/********************************** 2943 * Parity 2944 **********************************/ 2945 2946#if defined (L_paritydi2) 2947;; r25:r24 = parity64 (r25:r18) 2948;; clobbers: __tmp_reg__ 2949DEFUN __paritydi2 2950 eor r24, r18 2951 eor r24, r19 2952 eor r24, r20 2953 eor r24, r21 2954 XJMP __paritysi2 2955ENDF __paritydi2 2956#endif /* defined (L_paritydi2) */ 2957 2958#if defined (L_paritysi2) 2959;; r25:r24 = parity32 (r25:r22) 2960;; clobbers: __tmp_reg__ 2961DEFUN __paritysi2 2962 eor r24, r22 2963 eor r24, r23 2964 XJMP __parityhi2 2965ENDF __paritysi2 2966#endif /* defined (L_paritysi2) */ 2967 2968#if defined (L_parityhi2) 2969;; r25:r24 = parity16 (r25:r24) 2970;; clobbers: __tmp_reg__ 2971DEFUN __parityhi2 2972 eor r24, r25 2973;; FALLTHRU 2974ENDF __parityhi2 2975 2976;; r25:r24 = parity8 (r24) 2977;; clobbers: __tmp_reg__ 2978DEFUN __parityqi2 2979 ;; parity is in r24[0..7] 2980 mov __tmp_reg__, r24 2981 swap __tmp_reg__ 2982 eor r24, __tmp_reg__ 2983 ;; parity is in r24[0..3] 2984 subi r24, -4 2985 andi r24, -5 2986 subi r24, -6 2987 ;; parity is in r24[0,3] 2988 sbrc r24, 3 2989 inc r24 2990 ;; parity is in r24[0] 2991 andi r24, 1 2992 clr r25 2993 ret 2994ENDF __parityqi2 2995#endif /* defined (L_parityhi2) */ 2996 2997 2998/********************************** 2999 * Population Count 3000 **********************************/ 3001 3002#if defined (L_popcounthi2) 3003;; population count 3004;; r25:r24 = popcount16 (r25:r24) 3005;; clobbers: __tmp_reg__ 3006DEFUN __popcounthi2 3007 XCALL __popcountqi2 3008 push r24 3009 mov r24, r25 3010 XCALL __popcountqi2 3011 clr r25 3012 ;; FALLTHRU 3013ENDF __popcounthi2 3014 3015DEFUN __popcounthi2_tail 3016 pop __tmp_reg__ 3017 add r24, __tmp_reg__ 3018 ret 3019ENDF __popcounthi2_tail 3020#endif /* defined (L_popcounthi2) */ 3021 3022#if defined (L_popcountsi2) 3023;; population count 3024;; r25:r24 = popcount32 (r25:r22) 3025;; clobbers: __tmp_reg__ 3026DEFUN __popcountsi2 3027 XCALL __popcounthi2 3028 push r24 3029 mov_l r24, r22 3030 mov_h r25, r23 3031 XCALL __popcounthi2 3032 XJMP __popcounthi2_tail 3033ENDF __popcountsi2 3034#endif /* defined (L_popcountsi2) */ 3035 3036#if defined (L_popcountdi2) 3037;; population count 3038;; r25:r24 = popcount64 (r25:r18) 3039;; clobbers: r22, r23, __tmp_reg__ 3040DEFUN __popcountdi2 3041 XCALL __popcountsi2 3042 push r24 3043 mov_l r22, r18 3044 mov_h r23, r19 3045 mov_l r24, r20 3046 mov_h r25, r21 3047 XCALL __popcountsi2 3048 XJMP __popcounthi2_tail 3049ENDF __popcountdi2 3050#endif /* defined (L_popcountdi2) */ 3051 3052#if defined (L_popcountqi2) 3053;; population count 3054;; r24 = popcount8 (r24) 3055;; clobbers: __tmp_reg__ 3056DEFUN __popcountqi2 3057 mov __tmp_reg__, r24 3058 andi r24, 1 3059 lsr __tmp_reg__ 3060 lsr __tmp_reg__ 3061 adc r24, __zero_reg__ 3062 lsr __tmp_reg__ 3063 adc r24, __zero_reg__ 3064 lsr __tmp_reg__ 3065 adc r24, __zero_reg__ 3066 lsr __tmp_reg__ 3067 adc r24, __zero_reg__ 3068 lsr __tmp_reg__ 3069 adc r24, __zero_reg__ 3070 lsr __tmp_reg__ 3071 adc r24, __tmp_reg__ 3072 ret 3073ENDF __popcountqi2 3074#endif /* defined (L_popcountqi2) */ 3075 3076 3077/********************************** 3078 * Swap bytes 3079 **********************************/ 3080 3081;; swap two registers with different register number 3082.macro bswap a, b 3083 eor \a, \b 3084 eor \b, \a 3085 eor \a, \b 3086.endm 3087 3088#if defined (L_bswapsi2) 3089;; swap bytes 3090;; r25:r22 = bswap32 (r25:r22) 3091DEFUN __bswapsi2 3092 bswap r22, r25 3093 bswap r23, r24 3094 ret 3095ENDF __bswapsi2 3096#endif /* defined (L_bswapsi2) */ 3097 3098#if defined (L_bswapdi2) 3099;; swap bytes 3100;; r25:r18 = bswap64 (r25:r18) 3101DEFUN __bswapdi2 3102 bswap r18, r25 3103 bswap r19, r24 3104 bswap r20, r23 3105 bswap r21, r22 3106 ret 3107ENDF __bswapdi2 3108#endif /* defined (L_bswapdi2) */ 3109 3110 3111/********************************** 3112 * 64-bit shifts 3113 **********************************/ 3114 3115#if defined (L_ashrdi3) 3116 3117#define SS __zero_reg__ 3118 3119;; Arithmetic shift right 3120;; r25:r18 = ashr64 (r25:r18, r17:r16) 3121DEFUN __ashrdi3 3122 sbrc r25, 7 3123 com SS 3124 ;; FALLTHRU 3125ENDF __ashrdi3 3126 3127;; Logic shift right 3128;; r25:r18 = lshr64 (r25:r18, r17:r16) 3129DEFUN __lshrdi3 3130 ;; Signs are in SS (zero_reg) 3131 mov __tmp_reg__, r16 31320: cpi r16, 8 3133 brlo 2f 3134 subi r16, 8 3135 mov r18, r19 3136 mov r19, r20 3137 mov r20, r21 3138 mov r21, r22 3139 mov r22, r23 3140 mov r23, r24 3141 mov r24, r25 3142 mov r25, SS 3143 rjmp 0b 31441: asr SS 3145 ror r25 3146 ror r24 3147 ror r23 3148 ror r22 3149 ror r21 3150 ror r20 3151 ror r19 3152 ror r18 31532: dec r16 3154 brpl 1b 3155 clr __zero_reg__ 3156 mov r16, __tmp_reg__ 3157 ret 3158ENDF __lshrdi3 3159 3160#undef SS 3161 3162#endif /* defined (L_ashrdi3) */ 3163 3164#if defined (L_ashldi3) 3165;; Shift left 3166;; r25:r18 = ashl64 (r25:r18, r17:r16) 3167;; This function does not clobber T. 3168DEFUN __ashldi3 3169 mov __tmp_reg__, r16 31700: cpi r16, 8 3171 brlo 2f 3172 mov r25, r24 3173 mov r24, r23 3174 mov r23, r22 3175 mov r22, r21 3176 mov r21, r20 3177 mov r20, r19 3178 mov r19, r18 3179 clr r18 3180 subi r16, 8 3181 rjmp 0b 31821: lsl r18 3183 rol r19 3184 rol r20 3185 rol r21 3186 rol r22 3187 rol r23 3188 rol r24 3189 rol r25 31902: dec r16 3191 brpl 1b 3192 mov r16, __tmp_reg__ 3193 ret 3194ENDF __ashldi3 3195#endif /* defined (L_ashldi3) */ 3196 3197#if defined (L_rotldi3) 3198;; Rotate left 3199;; r25:r18 = rotl64 (r25:r18, r17:r16) 3200DEFUN __rotldi3 3201 push r16 32020: cpi r16, 8 3203 brlo 2f 3204 subi r16, 8 3205 mov __tmp_reg__, r25 3206 mov r25, r24 3207 mov r24, r23 3208 mov r23, r22 3209 mov r22, r21 3210 mov r21, r20 3211 mov r20, r19 3212 mov r19, r18 3213 mov r18, __tmp_reg__ 3214 rjmp 0b 32151: lsl r18 3216 rol r19 3217 rol r20 3218 rol r21 3219 rol r22 3220 rol r23 3221 rol r24 3222 rol r25 3223 adc r18, __zero_reg__ 32242: dec r16 3225 brpl 1b 3226 pop r16 3227 ret 3228ENDF __rotldi3 3229#endif /* defined (L_rotldi3) */ 3230 3231 3232.section .text.libgcc.fmul, "ax", @progbits 3233 3234/***********************************************************/ 3235;;; Softmul versions of FMUL, FMULS and FMULSU to implement 3236;;; __builtin_avr_fmul* if !AVR_HAVE_MUL 3237/***********************************************************/ 3238 3239#define A1 24 3240#define B1 25 3241#define C0 22 3242#define C1 23 3243#define A0 __tmp_reg__ 3244 3245#ifdef L_fmuls 3246;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction 3247;;; Clobbers: r24, r25, __tmp_reg__ 3248DEFUN __fmuls 3249 ;; A0.7 = negate result? 3250 mov A0, A1 3251 eor A0, B1 3252 ;; B1 = |B1| 3253 sbrc B1, 7 3254 neg B1 3255 XJMP __fmulsu_exit 3256ENDF __fmuls 3257#endif /* L_fmuls */ 3258 3259#ifdef L_fmulsu 3260;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction 3261;;; Clobbers: r24, r25, __tmp_reg__ 3262DEFUN __fmulsu 3263 ;; A0.7 = negate result? 3264 mov A0, A1 3265;; FALLTHRU 3266ENDF __fmulsu 3267 3268;; Helper for __fmuls and __fmulsu 3269DEFUN __fmulsu_exit 3270 ;; A1 = |A1| 3271 sbrc A1, 7 3272 neg A1 3273#ifdef __AVR_ERRATA_SKIP_JMP_CALL__ 3274 ;; Some cores have problem skipping 2-word instruction 3275 tst A0 3276 brmi 1f 3277#else 3278 sbrs A0, 7 3279#endif /* __AVR_HAVE_JMP_CALL__ */ 3280 XJMP __fmul 32811: XCALL __fmul 3282 ;; C = -C iff A0.7 = 1 3283 NEG2 C0 3284 ret 3285ENDF __fmulsu_exit 3286#endif /* L_fmulsu */ 3287 3288 3289#ifdef L_fmul 3290;;; r22:r23 = fmul (r24, r25) like in FMUL instruction 3291;;; Clobbers: r24, r25, __tmp_reg__ 3292DEFUN __fmul 3293 ; clear result 3294 clr C0 3295 clr C1 3296 clr A0 32971: tst B1 3298 ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C. 32992: brpl 3f 3300 ;; C += A 3301 add C0, A0 3302 adc C1, A1 33033: ;; A >>= 1 3304 lsr A1 3305 ror A0 3306 ;; B <<= 1 3307 lsl B1 3308 brne 2b 3309 ret 3310ENDF __fmul 3311#endif /* L_fmul */ 3312 3313#undef A0 3314#undef A1 3315#undef B1 3316#undef C0 3317#undef C1 3318 3319#include "lib1funcs-fixed.S" 3320