1/*  -*- Mode: Asm -*-  */
2/* Copyright (C) 1998-2013 Free Software Foundation, Inc.
3   Contributed by Denis Chertykov <chertykov@gmail.com>
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 3, or (at your option) any
8later version.
9
10This file is distributed in the hope that it will be useful, but
11WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13General Public License for more details.
14
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22<http://www.gnu.org/licenses/>.  */
23
24#define __zero_reg__ r1
25#define __tmp_reg__ r0
26#define __SREG__ 0x3f
27#if defined (__AVR_HAVE_SPH__)
28#define __SP_H__ 0x3e
29#endif
30#define __SP_L__ 0x3d
31#define __RAMPZ__ 0x3B
32#define __EIND__  0x3C
33
34/* Most of the functions here are called directly from avr.md
35   patterns, instead of using the standard libcall mechanisms.
36   This can make better code because GCC knows exactly which
37   of the call-used registers (not all of them) are clobbered.  */
38
39/* FIXME:  At present, there is no SORT directive in the linker
40           script so that we must not assume that different modules
41           in the same input section like .libgcc.text.mul will be
42           located close together.  Therefore, we cannot use
43           RCALL/RJMP to call a function like __udivmodhi4 from
44           __divmodhi4 and have to use lengthy XCALL/XJMP even
45           though they are in the same input section and all same
46           input sections together are small enough to reach every
47           location with a RCALL/RJMP instruction.  */
48
49	.macro	mov_l  r_dest, r_src
50#if defined (__AVR_HAVE_MOVW__)
51	movw	\r_dest, \r_src
52#else
53	mov	\r_dest, \r_src
54#endif
55	.endm
56
57	.macro	mov_h  r_dest, r_src
58#if defined (__AVR_HAVE_MOVW__)
59	; empty
60#else
61	mov	\r_dest, \r_src
62#endif
63	.endm
64
65.macro	wmov  r_dest, r_src
66#if defined (__AVR_HAVE_MOVW__)
67    movw \r_dest,   \r_src
68#else
69    mov \r_dest,    \r_src
70    mov \r_dest+1,  \r_src+1
71#endif
72.endm
73
74#if defined (__AVR_HAVE_JMP_CALL__)
75#define XCALL call
76#define XJMP  jmp
77#else
78#define XCALL rcall
79#define XJMP  rjmp
80#endif
81
82;; Prologue stuff
83
84.macro do_prologue_saves n_pushed n_frame=0
85    ldi r26, lo8(\n_frame)
86    ldi r27, hi8(\n_frame)
87    ldi r30, lo8(gs(.L_prologue_saves.\@))
88    ldi r31, hi8(gs(.L_prologue_saves.\@))
89    XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
90.L_prologue_saves.\@:
91.endm
92
93;; Epilogue stuff
94
95.macro do_epilogue_restores n_pushed n_frame=0
96    in      r28, __SP_L__
97#ifdef __AVR_HAVE_SPH__
98    in      r29, __SP_H__
99.if \n_frame > 63
100    subi    r28, lo8(-\n_frame)
101    sbci    r29, hi8(-\n_frame)
102.elseif \n_frame > 0
103    adiw    r28, \n_frame
104.endif
105#else
106    clr     r29
107.if \n_frame > 0
108    subi    r28, lo8(-\n_frame)
109.endif
110#endif /* HAVE SPH */
111    ldi     r30, \n_pushed
112    XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
113.endm
114
115;; Support function entry and exit for convenience
116
117.macro DEFUN name
118.global \name
119.func \name
120\name:
121.endm
122
123.macro ENDF name
124.size \name, .-\name
125.endfunc
126.endm
127
128.macro FALIAS name
129.global \name
130.func \name
131\name:
132.size \name, .-\name
133.endfunc
134.endm
135
136;; Skip next instruction, typically a jump target
137#define skip cpse 0,0
138
139;; Negate a 2-byte value held in consecutive registers
140.macro NEG2  reg
141    com     \reg+1
142    neg     \reg
143    sbci    \reg+1, -1
144.endm
145
146;; Negate a 4-byte value held in consecutive registers
147;; Sets the V flag for signed overflow tests if REG >= 16
148.macro NEG4  reg
149    com     \reg+3
150    com     \reg+2
151    com     \reg+1
152.if \reg >= 16
153    neg     \reg
154    sbci    \reg+1, -1
155    sbci    \reg+2, -1
156    sbci    \reg+3, -1
157.else
158    com     \reg
159    adc     \reg,   __zero_reg__
160    adc     \reg+1, __zero_reg__
161    adc     \reg+2, __zero_reg__
162    adc     \reg+3, __zero_reg__
163.endif
164.endm
165
166#define exp_lo(N)  hlo8 ((N) << 23)
167#define exp_hi(N)  hhi8 ((N) << 23)
168
169
170.section .text.libgcc.mul, "ax", @progbits
171
172;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
173/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
174#if !defined (__AVR_HAVE_MUL__)
175/*******************************************************
176    Multiplication  8 x 8  without MUL
177*******************************************************/
178#if defined (L_mulqi3)
179
180#define	r_arg2	r22		/* multiplicand */
181#define	r_arg1 	r24		/* multiplier */
182#define r_res	__tmp_reg__	/* result */
183
184DEFUN __mulqi3
185	clr	r_res		; clear result
186__mulqi3_loop:
187	sbrc	r_arg1,0
188	add	r_res,r_arg2
189	add	r_arg2,r_arg2	; shift multiplicand
190	breq	__mulqi3_exit	; while multiplicand != 0
191	lsr	r_arg1		;
192	brne	__mulqi3_loop	; exit if multiplier = 0
193__mulqi3_exit:
194	mov	r_arg1,r_res	; result to return register
195	ret
196ENDF __mulqi3
197
198#undef r_arg2
199#undef r_arg1
200#undef r_res
201
202#endif 	/* defined (L_mulqi3) */
203
204
205/*******************************************************
206    Widening Multiplication  16 = 8 x 8  without MUL
207    Multiplication  16 x 16  without MUL
208*******************************************************/
209
210#define A0  r22
211#define A1  r23
212#define B0  r24
213#define BB0 r20
214#define B1  r25
215;; Output overlaps input, thus expand result in CC0/1
216#define C0  r24
217#define C1  r25
218#define CC0  __tmp_reg__
219#define CC1  R21
220
221#if defined (L_umulqihi3)
222;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
223;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
224;;; Clobbers: __tmp_reg__, R21..R23
225DEFUN __umulqihi3
226    clr     A1
227    clr     B1
228    XJMP    __mulhi3
229ENDF __umulqihi3
230#endif /* L_umulqihi3 */
231
232#if defined (L_mulqihi3)
233;;; R25:R24 = (signed int) R22 * (signed int) R24
234;;; (C1:C0) = (signed int) A0  * (signed int) B0
235;;; Clobbers: __tmp_reg__, R20..R23
236DEFUN __mulqihi3
237    ;; Sign-extend B0
238    clr     B1
239    sbrc    B0, 7
240    com     B1
241    ;; The multiplication runs twice as fast if A1 is zero, thus:
242    ;; Zero-extend A0
243    clr     A1
244#ifdef __AVR_HAVE_JMP_CALL__
245    ;; Store  B0 * sign of A
246    clr     BB0
247    sbrc    A0, 7
248    mov     BB0, B0
249    call    __mulhi3
250#else /* have no CALL */
251    ;; Skip sign-extension of A if A >= 0
252    ;; Same size as with the first alternative but avoids errata skip
253    ;; and is faster if A >= 0
254    sbrs    A0, 7
255    rjmp    __mulhi3
256    ;; If  A < 0  store B
257    mov     BB0, B0
258    rcall   __mulhi3
259#endif /* HAVE_JMP_CALL */
260    ;; 1-extend A after the multiplication
261    sub     C1, BB0
262    ret
263ENDF __mulqihi3
264#endif /* L_mulqihi3 */
265
266#if defined (L_mulhi3)
267;;; R25:R24 = R23:R22 * R25:R24
268;;; (C1:C0) = (A1:A0) * (B1:B0)
269;;; Clobbers: __tmp_reg__, R21..R23
270DEFUN __mulhi3
271
272    ;; Clear result
273    clr     CC0
274    clr     CC1
275    rjmp 3f
2761:
277    ;; Bit n of A is 1  -->  C += B << n
278    add     CC0, B0
279    adc     CC1, B1
2802:
281    lsl     B0
282    rol     B1
2833:
284    ;; If B == 0 we are ready
285    sbiw    B0, 0
286    breq 9f
287
288    ;; Carry = n-th bit of A
289    lsr     A1
290    ror     A0
291    ;; If bit n of A is set, then go add  B * 2^n  to  C
292    brcs 1b
293
294    ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
295    ;; Thus, it is sufficient to CPC the high part to test A against 0
296    cpc     A1, __zero_reg__
297    ;; Only proceed if A != 0
298    brne    2b
2999:
300    ;; Move Result into place
301    mov     C0, CC0
302    mov     C1, CC1
303    ret
304ENDF  __mulhi3
305#endif /* L_mulhi3 */
306
307#undef A0
308#undef A1
309#undef B0
310#undef BB0
311#undef B1
312#undef C0
313#undef C1
314#undef CC0
315#undef CC1
316
317
318#define A0 22
319#define A1 A0+1
320#define A2 A0+2
321#define A3 A0+3
322
323#define B0 18
324#define B1 B0+1
325#define B2 B0+2
326#define B3 B0+3
327
328#define CC0 26
329#define CC1 CC0+1
330#define CC2 30
331#define CC3 CC2+1
332
333#define C0 22
334#define C1 C0+1
335#define C2 C0+2
336#define C3 C0+3
337
338/*******************************************************
339    Widening Multiplication  32 = 16 x 16  without MUL
340*******************************************************/
341
342#if defined (L_umulhisi3)
343DEFUN __umulhisi3
344    wmov    B0, 24
345    ;; Zero-extend B
346    clr     B2
347    clr     B3
348    ;; Zero-extend A
349    wmov    A2, B2
350    XJMP    __mulsi3
351ENDF __umulhisi3
352#endif /* L_umulhisi3 */
353
354#if defined (L_mulhisi3)
355DEFUN __mulhisi3
356    wmov    B0, 24
357    ;; Sign-extend B
358    lsl     r25
359    sbc     B2, B2
360    mov     B3, B2
361#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
362    ;; Sign-extend A
363    clr     A2
364    sbrc    A1, 7
365    com     A2
366    mov     A3, A2
367    XJMP __mulsi3
368#else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
369    ;; Zero-extend A and __mulsi3 will run at least twice as fast
370    ;; compared to a sign-extended A.
371    clr     A2
372    clr     A3
373    sbrs    A1, 7
374    XJMP __mulsi3
375    ;; If  A < 0  then perform the  B * 0xffff.... before the
376    ;; very multiplication by initializing the high part of the
377    ;; result CC with -B.
378    wmov    CC2, A2
379    sub     CC2, B0
380    sbc     CC3, B1
381    XJMP __mulsi3_helper
382#endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
383ENDF __mulhisi3
384#endif /* L_mulhisi3 */
385
386
387/*******************************************************
388    Multiplication  32 x 32  without MUL
389*******************************************************/
390
391#if defined (L_mulsi3)
392DEFUN __mulsi3
393    ;; Clear result
394    clr     CC2
395    clr     CC3
396    ;; FALLTHRU
397ENDF  __mulsi3
398
399DEFUN __mulsi3_helper
400    clr     CC0
401    clr     CC1
402    rjmp 3f
403
4041:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
405    ;; CC += B
406    add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
407
4082:  ;; B <<= 1
409    lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
410
4113:  ;; A >>= 1:  Carry = n-th bit of A
412    lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
413
414    brcs 1b
415    ;; Only continue if  A != 0
416    sbci    A1, 0
417    brne 2b
418    sbiw    A2, 0
419    brne 2b
420
421    ;; All bits of A are consumed:  Copy result to return register C
422    wmov    C0, CC0
423    wmov    C2, CC2
424    ret
425ENDF __mulsi3_helper
426#endif /* L_mulsi3 */
427
428#undef A0
429#undef A1
430#undef A2
431#undef A3
432#undef B0
433#undef B1
434#undef B2
435#undef B3
436#undef C0
437#undef C1
438#undef C2
439#undef C3
440#undef CC0
441#undef CC1
442#undef CC2
443#undef CC3
444
445#endif /* !defined (__AVR_HAVE_MUL__) */
446;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
447
448;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
449#if defined (__AVR_HAVE_MUL__)
450#define A0 26
451#define B0 18
452#define C0 22
453
454#define A1 A0+1
455
456#define B1 B0+1
457#define B2 B0+2
458#define B3 B0+3
459
460#define C1 C0+1
461#define C2 C0+2
462#define C3 C0+3
463
464/*******************************************************
465    Widening Multiplication  32 = 16 x 16  with MUL
466*******************************************************/
467
468#if defined (L_mulhisi3)
469;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
470;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
471;;; Clobbers: __tmp_reg__
472DEFUN __mulhisi3
473    XCALL   __umulhisi3
474    ;; Sign-extend B
475    tst     B1
476    brpl    1f
477    sub     C2, A0
478    sbc     C3, A1
4791:  ;; Sign-extend A
480    XJMP __usmulhisi3_tail
481ENDF __mulhisi3
482#endif /* L_mulhisi3 */
483
484#if defined (L_usmulhisi3)
485;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
486;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
487;;; Clobbers: __tmp_reg__
488DEFUN __usmulhisi3
489    XCALL   __umulhisi3
490    ;; FALLTHRU
491ENDF __usmulhisi3
492
493DEFUN __usmulhisi3_tail
494    ;; Sign-extend A
495    sbrs    A1, 7
496    ret
497    sub     C2, B0
498    sbc     C3, B1
499    ret
500ENDF __usmulhisi3_tail
501#endif /* L_usmulhisi3 */
502
503#if defined (L_umulhisi3)
504;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
505;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
506;;; Clobbers: __tmp_reg__
507DEFUN __umulhisi3
508    mul     A0, B0
509    movw    C0, r0
510    mul     A1, B1
511    movw    C2, r0
512    mul     A0, B1
513#ifdef __AVR_HAVE_JMP_CALL__
514    ;; This function is used by many other routines, often multiple times.
515    ;; Therefore, if the flash size is not too limited, avoid the RCALL
516    ;; and inverst 6 Bytes to speed things up.
517    add     C1, r0
518    adc     C2, r1
519    clr     __zero_reg__
520    adc     C3, __zero_reg__
521#else
522    rcall   1f
523#endif
524    mul     A1, B0
5251:  add     C1, r0
526    adc     C2, r1
527    clr     __zero_reg__
528    adc     C3, __zero_reg__
529    ret
530ENDF __umulhisi3
531#endif /* L_umulhisi3 */
532
533/*******************************************************
534    Widening Multiplication  32 = 16 x 32  with MUL
535*******************************************************/
536
537#if defined (L_mulshisi3)
538;;; R25:R22 = (signed long) R27:R26 * R21:R18
539;;; (C3:C0) = (signed long) A1:A0   * B3:B0
540;;; Clobbers: __tmp_reg__
541DEFUN __mulshisi3
542#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
543    ;; Some cores have problem skipping 2-word instruction
544    tst     A1
545    brmi    __mulohisi3
546#else
547    sbrs    A1, 7
548#endif /* __AVR_HAVE_JMP_CALL__ */
549    XJMP    __muluhisi3
550    ;; FALLTHRU
551ENDF __mulshisi3
552
553;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
554;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
555;;; Clobbers: __tmp_reg__
556DEFUN __mulohisi3
557    XCALL   __muluhisi3
558    ;; One-extend R27:R26 (A1:A0)
559    sub     C2, B0
560    sbc     C3, B1
561    ret
562ENDF __mulohisi3
563#endif /* L_mulshisi3 */
564
565#if defined (L_muluhisi3)
566;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
567;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
568;;; Clobbers: __tmp_reg__
569DEFUN __muluhisi3
570    XCALL   __umulhisi3
571    mul     A0, B3
572    add     C3, r0
573    mul     A1, B2
574    add     C3, r0
575    mul     A0, B2
576    add     C2, r0
577    adc     C3, r1
578    clr     __zero_reg__
579    ret
580ENDF __muluhisi3
581#endif /* L_muluhisi3 */
582
583/*******************************************************
584    Multiplication  32 x 32  with MUL
585*******************************************************/
586
587#if defined (L_mulsi3)
588;;; R25:R22 = R25:R22 * R21:R18
589;;; (C3:C0) = C3:C0   * B3:B0
590;;; Clobbers: R26, R27, __tmp_reg__
591DEFUN __mulsi3
592    movw    A0, C0
593    push    C2
594    push    C3
595    XCALL   __muluhisi3
596    pop     A1
597    pop     A0
598    ;; A1:A0 now contains the high word of A
599    mul     A0, B0
600    add     C2, r0
601    adc     C3, r1
602    mul     A0, B1
603    add     C3, r0
604    mul     A1, B0
605    add     C3, r0
606    clr     __zero_reg__
607    ret
608ENDF __mulsi3
609#endif /* L_mulsi3 */
610
611#undef A0
612#undef A1
613
614#undef B0
615#undef B1
616#undef B2
617#undef B3
618
619#undef C0
620#undef C1
621#undef C2
622#undef C3
623
624#endif /* __AVR_HAVE_MUL__ */
625
626/*******************************************************
627       Multiplication 24 x 24 with MUL
628*******************************************************/
629
630#if defined (L_mulpsi3)
631
632;; A[0..2]: In: Multiplicand; Out: Product
633#define A0  22
634#define A1  A0+1
635#define A2  A0+2
636
637;; B[0..2]: In: Multiplier
638#define B0  18
639#define B1  B0+1
640#define B2  B0+2
641
642#if defined (__AVR_HAVE_MUL__)
643
644;; C[0..2]: Expand Result
645#define C0  22
646#define C1  C0+1
647#define C2  C0+2
648
649;; R24:R22 *= R20:R18
650;; Clobbers: r21, r25, r26, r27, __tmp_reg__
651
652#define AA0 26
653#define AA2 21
654
655DEFUN __mulpsi3
656    wmov    AA0, A0
657    mov     AA2, A2
658    XCALL   __umulhisi3
659    mul     AA2, B0     $  add  C2, r0
660    mul     AA0, B2     $  add  C2, r0
661    clr     __zero_reg__
662    ret
663ENDF __mulpsi3
664
665#undef AA2
666#undef AA0
667
668#undef C2
669#undef C1
670#undef C0
671
672#else /* !HAVE_MUL */
673
674;; C[0..2]: Expand Result
675#define C0  0
676#define C1  C0+1
677#define C2  21
678
679;; R24:R22 *= R20:R18
680;; Clobbers: __tmp_reg__, R18, R19, R20, R21
681
682DEFUN __mulpsi3
683
684    ;; C[] = 0
685    clr     __tmp_reg__
686    clr     C2
687
6880:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
689    LSR  B2     $  ror  B1     $  ror  B0
690
691    ;; If the N-th Bit of B[] was set...
692    brcc    1f
693
694    ;; ...then add A[] * 2^N to the Result C[]
695    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
696
6971:  ;; Multiply A[] by 2
698    LSL  A0     $  rol  A1     $  rol  A2
699
700    ;; Loop until B[] is 0
701    subi B0,0   $  sbci B1,0   $  sbci B2,0
702    brne    0b
703
704    ;; Copy C[] to the return Register A[]
705    wmov    A0, C0
706    mov     A2, C2
707
708    clr     __zero_reg__
709    ret
710ENDF __mulpsi3
711
712#undef C2
713#undef C1
714#undef C0
715
716#endif /* HAVE_MUL */
717
718#undef B2
719#undef B1
720#undef B0
721
722#undef A2
723#undef A1
724#undef A0
725
726#endif /* L_mulpsi3 */
727
728#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
729
730;; A[0..2]: In: Multiplicand
731#define A0  22
732#define A1  A0+1
733#define A2  A0+2
734
735;; BB: In: Multiplier
736#define BB  25
737
738;; C[0..2]: Result
739#define C0  18
740#define C1  C0+1
741#define C2  C0+2
742
743;; C[] = A[] * sign_extend (BB)
744DEFUN __mulsqipsi3
745    mul     A0, BB
746    movw    C0, r0
747    mul     A2, BB
748    mov     C2, r0
749    mul     A1, BB
750    add     C1, r0
751    adc     C2, r1
752    clr     __zero_reg__
753    sbrs    BB, 7
754    ret
755    ;; One-extend BB
756    sub     C1, A0
757    sbc     C2, A1
758    ret
759ENDF __mulsqipsi3
760
761#undef C2
762#undef C1
763#undef C0
764
765#undef BB
766
767#undef A2
768#undef A1
769#undef A0
770
771#endif /* L_mulsqipsi3  &&  HAVE_MUL */
772
773/*******************************************************
774       Multiplication 64 x 64
775*******************************************************/
776
777;; A[] = A[] * B[]
778
779;; A[0..7]: In: Multiplicand
780;; Out: Product
781#define A0  18
782#define A1  A0+1
783#define A2  A0+2
784#define A3  A0+3
785#define A4  A0+4
786#define A5  A0+5
787#define A6  A0+6
788#define A7  A0+7
789
790;; B[0..7]: In: Multiplier
791#define B0  10
792#define B1  B0+1
793#define B2  B0+2
794#define B3  B0+3
795#define B4  B0+4
796#define B5  B0+5
797#define B6  B0+6
798#define B7  B0+7
799
800#if defined (__AVR_HAVE_MUL__)
801
802;; Define C[] for convenience
803;; Notice that parts of C[] overlap A[] respective B[]
804#define C0  16
805#define C1  C0+1
806#define C2  20
807#define C3  C2+1
808#define C4  28
809#define C5  C4+1
810#define C6  C4+2
811#define C7  C4+3
812
813#if defined (L_muldi3)
814
815;; A[]     *= B[]
816;; R25:R18 *= R17:R10
817;; Ordinary ABI-Function
818
819DEFUN __muldi3
820    push    r29
821    push    r28
822    push    r17
823    push    r16
824
825    ;; Counting in Words, we have to perform a 4 * 4 Multiplication
826
827    ;; 3 * 0  +  0 * 3
828    mul  A7,B0  $             $  mov C7,r0
829    mul  A0,B7  $             $  add C7,r0
830    mul  A6,B1  $             $  add C7,r0
831    mul  A6,B0  $  mov C6,r0  $  add C7,r1
832    mul  B6,A1  $             $  add C7,r0
833    mul  B6,A0  $  add C6,r0  $  adc C7,r1
834
835    ;; 1 * 2
836    mul  A2,B4  $  add C6,r0  $  adc C7,r1
837    mul  A3,B4  $             $  add C7,r0
838    mul  A2,B5  $             $  add C7,r0
839
840    push    A5
841    push    A4
842    push    B1
843    push    B0
844    push    A3
845    push    A2
846
847    ;; 0 * 0
848    wmov    26, B0
849    XCALL   __umulhisi3
850    wmov    C0, 22
851    wmov    C2, 24
852
853    ;; 0 * 2
854    wmov    26, B4
855    XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
856
857    wmov    26, B2
858    ;; 0 * 1
859    XCALL   __muldi3_6
860
861    pop     A0
862    pop     A1
863    ;; 1 * 1
864    wmov    26, B2
865    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
866
867    pop     r26
868    pop     r27
869    ;; 1 * 0
870    XCALL   __muldi3_6
871
872    pop     A0
873    pop     A1
874    ;; 2 * 0
875    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
876
877    ;; 2 * 1
878    wmov    26, B2
879    XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
880
881    ;; A[] = C[]
882    wmov    A0, C0
883    ;; A2 = C2 already
884    wmov    A4, C4
885    wmov    A6, C6
886
887    clr     __zero_reg__
888    pop     r16
889    pop     r17
890    pop     r28
891    pop     r29
892    ret
893ENDF __muldi3
894#endif /* L_muldi3 */
895
896#if defined (L_muldi3_6)
897;; A helper for some 64-bit multiplications with MUL available
898DEFUN __muldi3_6
899__muldi3_6:
900    XCALL   __umulhisi3
901    add     C2, 22
902    adc     C3, 23
903    adc     C4, 24
904    adc     C5, 25
905    brcc    0f
906    adiw    C6, 1
9070:  ret
908ENDF __muldi3_6
909#endif /* L_muldi3_6 */
910
911#undef C7
912#undef C6
913#undef C5
914#undef C4
915#undef C3
916#undef C2
917#undef C1
918#undef C0
919
920#else /* !HAVE_MUL */
921
922#if defined (L_muldi3)
923
924#define C0  26
925#define C1  C0+1
926#define C2  C0+2
927#define C3  C0+3
928#define C4  C0+4
929#define C5  C0+5
930#define C6  0
931#define C7  C6+1
932
933#define Loop 9
934
935;; A[]     *= B[]
936;; R25:R18 *= R17:R10
937;; Ordinary ABI-Function
938
939DEFUN __muldi3
940    push    r29
941    push    r28
942    push    Loop
943
944    ldi     C0, 64
945    mov     Loop, C0
946
947    ;; C[] = 0
948    clr     __tmp_reg__
949    wmov    C0, 0
950    wmov    C2, 0
951    wmov    C4, 0
952
9530:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
954    ;; where N = 64 - Loop.
955    ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
956    ;; B[] will have its initial Value again.
957    LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
958    ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
959
960    ;; If the N-th Bit of B[] was set then...
961    brcc    1f
962    ;; ...finish Rotation...
963    ori     B7, 1 << 7
964
965    ;; ...and add A[] * 2^N to the Result C[]
966    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
967    adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
968
9691:  ;; Multiply A[] by 2
970    LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
971    rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
972
973    dec     Loop
974    brne    0b
975
976    ;; We expanded the Result in C[]
977    ;; Copy Result to the Return Register A[]
978    wmov    A0, C0
979    wmov    A2, C2
980    wmov    A4, C4
981    wmov    A6, C6
982
983    clr     __zero_reg__
984    pop     Loop
985    pop     r28
986    pop     r29
987    ret
988ENDF __muldi3
989
990#undef Loop
991
992#undef C7
993#undef C6
994#undef C5
995#undef C4
996#undef C3
997#undef C2
998#undef C1
999#undef C0
1000
1001#endif /* L_muldi3 */
1002#endif /* HAVE_MUL */
1003
1004#undef B7
1005#undef B6
1006#undef B5
1007#undef B4
1008#undef B3
1009#undef B2
1010#undef B1
1011#undef B0
1012
1013#undef A7
1014#undef A6
1015#undef A5
1016#undef A4
1017#undef A3
1018#undef A2
1019#undef A1
1020#undef A0
1021
1022/*******************************************************
1023   Widening Multiplication 64 = 32 x 32  with  MUL
1024*******************************************************/
1025
1026#if defined (__AVR_HAVE_MUL__)
1027#define A0 r22
1028#define A1 r23
1029#define A2 r24
1030#define A3 r25
1031
1032#define B0 r18
1033#define B1 r19
1034#define B2 r20
1035#define B3 r21
1036
1037#define C0  18
1038#define C1  C0+1
1039#define C2  20
1040#define C3  C2+1
1041#define C4  28
1042#define C5  C4+1
1043#define C6  C4+2
1044#define C7  C4+3
1045
1046#if defined (L_umulsidi3)
1047
1048;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1049
1050;; R18[8] = R22[4] * R18[4]
1051;;
1052;; Ordinary ABI Function, but additionally sets
1053;; X = R20[2] = B2[2]
1054;; Z = R22[2] = A0[2]
1055DEFUN __umulsidi3
1056    clt
1057    ;; FALLTHRU
1058ENDF  __umulsidi3
1059    ;; T = sign (A)
1060DEFUN __umulsidi3_helper
1061    push    29  $  push    28 ; Y
1062    wmov    30, A2
1063    ;; Counting in Words, we have to perform 4 Multiplications
1064    ;; 0 * 0
1065    wmov    26, A0
1066    XCALL __umulhisi3
1067    push    23  $  push    22 ; C0
1068    wmov    28, B0
1069    wmov    18, B2
1070    wmov    C2, 24
1071    push    27  $  push    26 ; A0
1072    push    19  $  push    18 ; B2
1073    ;;
1074    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1075    ;;  B2  C2  --  --  --  B0  A2
1076    ;; 1 * 1
1077    wmov    26, 30      ; A2
1078    XCALL __umulhisi3
1079    ;; Sign-extend A.  T holds the sign of A
1080    brtc    0f
1081    ;; Subtract B from the high part of the result
1082    sub     22, 28
1083    sbc     23, 29
1084    sbc     24, 18
1085    sbc     25, 19
10860:  wmov    18, 28      ;; B0
1087    wmov    C4, 22
1088    wmov    C6, 24
1089    ;;
1090    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1091    ;;  B0  C2  --  --  A2  C4  C6
1092    ;;
1093    ;; 1 * 0
1094    XCALL __muldi3_6
1095    ;; 0 * 1
1096    pop     26  $   pop 27  ;; B2
1097    pop     18  $   pop 19  ;; A0
1098    XCALL __muldi3_6
1099
1100    ;; Move result C into place and save A0 in Z
1101    wmov    22, C4
1102    wmov    24, C6
1103    wmov    30, 18 ; A0
1104    pop     C0  $   pop C1
1105
1106    ;; Epilogue
1107    pop     28  $   pop 29  ;; Y
1108    ret
1109ENDF __umulsidi3_helper
1110#endif /* L_umulsidi3 */
1111
1112
1113#if defined (L_mulsidi3)
1114
1115;; Signed widening 64 = 32 * 32 Multiplication
1116;;
1117;; R18[8] = R22[4] * R18[4]
1118;; Ordinary ABI Function
1119DEFUN __mulsidi3
1120    bst     A3, 7
1121    sbrs    B3, 7           ; Enhanced core has no skip bug
1122    XJMP __umulsidi3_helper
1123
1124    ;; B needs sign-extension
1125    push    A3
1126    push    A2
1127    XCALL __umulsidi3_helper
1128    ;; A0 survived in Z
1129    sub     r22, r30
1130    sbc     r23, r31
1131    pop     r26
1132    pop     r27
1133    sbc     r24, r26
1134    sbc     r25, r27
1135    ret
1136ENDF __mulsidi3
1137#endif /* L_mulsidi3 */
1138
1139#undef A0
1140#undef A1
1141#undef A2
1142#undef A3
1143#undef B0
1144#undef B1
1145#undef B2
1146#undef B3
1147#undef C0
1148#undef C1
1149#undef C2
1150#undef C3
1151#undef C4
1152#undef C5
1153#undef C6
1154#undef C7
1155#endif /* HAVE_MUL */
1156
1157/**********************************************************
1158    Widening Multiplication 64 = 32 x 32  without  MUL
1159**********************************************************/
1160
1161#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1162#define A0 18
1163#define A1 A0+1
1164#define A2 A0+2
1165#define A3 A0+3
1166#define A4 A0+4
1167#define A5 A0+5
1168#define A6 A0+6
1169#define A7 A0+7
1170
1171#define B0 10
1172#define B1 B0+1
1173#define B2 B0+2
1174#define B3 B0+3
1175#define B4 B0+4
1176#define B5 B0+5
1177#define B6 B0+6
1178#define B7 B0+7
1179
1180#define AA0 22
1181#define AA1 AA0+1
1182#define AA2 AA0+2
1183#define AA3 AA0+3
1184
1185#define BB0 18
1186#define BB1 BB0+1
1187#define BB2 BB0+2
1188#define BB3 BB0+3
1189
1190#define Mask r30
1191
1192;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1193;;
1194;; R18[8] = R22[4] * R18[4]
1195;; Ordinary ABI Function
1196DEFUN __mulsidi3
1197    set
1198    skip
1199    ;; FALLTHRU
1200ENDF  __mulsidi3
1201
1202DEFUN __umulsidi3
1203    clt     ; skipped
1204    ;; Save 10 Registers: R10..R17, R28, R29
1205    do_prologue_saves 10
1206    ldi     Mask, 0xff
1207    bld     Mask, 7
1208    ;; Move B into place...
1209    wmov    B0, BB0
1210    wmov    B2, BB2
1211    ;; ...and extend it
1212    and     BB3, Mask
1213    lsl     BB3
1214    sbc     B4, B4
1215    mov     B5, B4
1216    wmov    B6, B4
1217    ;; Move A into place...
1218    wmov    A0, AA0
1219    wmov    A2, AA2
1220    ;; ...and extend it
1221    and     AA3, Mask
1222    lsl     AA3
1223    sbc     A4, A4
1224    mov     A5, A4
1225    wmov    A6, A4
1226    XCALL   __muldi3
1227    do_epilogue_restores 10
1228ENDF __umulsidi3
1229
1230#undef A0
1231#undef A1
1232#undef A2
1233#undef A3
1234#undef A4
1235#undef A5
1236#undef A6
1237#undef A7
1238#undef B0
1239#undef B1
1240#undef B2
1241#undef B3
1242#undef B4
1243#undef B5
1244#undef B6
1245#undef B7
1246#undef AA0
1247#undef AA1
1248#undef AA2
1249#undef AA3
1250#undef BB0
1251#undef BB1
1252#undef BB2
1253#undef BB3
1254#undef Mask
1255#endif /* L_mulsidi3 && !HAVE_MUL */
1256
1257;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1258
1259
1260.section .text.libgcc.div, "ax", @progbits
1261
1262/*******************************************************
1263       Division 8 / 8 => (result + remainder)
1264*******************************************************/
1265#define	r_rem	r25	/* remainder */
1266#define	r_arg1	r24	/* dividend, quotient */
1267#define	r_arg2	r22	/* divisor */
1268#define	r_cnt	r23	/* loop count */
1269
1270#if defined (L_udivmodqi4)
1271DEFUN __udivmodqi4
1272	sub	r_rem,r_rem	; clear remainder and carry
1273	ldi	r_cnt,9		; init loop counter
1274	rjmp	__udivmodqi4_ep	; jump to entry point
1275__udivmodqi4_loop:
1276	rol	r_rem		; shift dividend into remainder
1277	cp	r_rem,r_arg2	; compare remainder & divisor
1278	brcs	__udivmodqi4_ep	; remainder <= divisor
1279	sub	r_rem,r_arg2	; restore remainder
1280__udivmodqi4_ep:
1281	rol	r_arg1		; shift dividend (with CARRY)
1282	dec	r_cnt		; decrement loop counter
1283	brne	__udivmodqi4_loop
1284	com	r_arg1		; complement result
1285				; because C flag was complemented in loop
1286	ret
1287ENDF __udivmodqi4
1288#endif /* defined (L_udivmodqi4) */
1289
1290#if defined (L_divmodqi4)
1291DEFUN __divmodqi4
1292        bst     r_arg1,7	; store sign of dividend
1293        mov     __tmp_reg__,r_arg1
1294        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
1295        sbrc	r_arg1,7
1296	neg     r_arg1		; dividend negative : negate
1297        sbrc	r_arg2,7
1298	neg     r_arg2		; divisor negative : negate
1299	XCALL	__udivmodqi4	; do the unsigned div/mod
1300	brtc	__divmodqi4_1
1301	neg	r_rem		; correct remainder sign
1302__divmodqi4_1:
1303	sbrc	__tmp_reg__,7
1304	neg	r_arg1		; correct result sign
1305__divmodqi4_exit:
1306	ret
1307ENDF __divmodqi4
1308#endif /* defined (L_divmodqi4) */
1309
1310#undef r_rem
1311#undef r_arg1
1312#undef r_arg2
1313#undef r_cnt
1314
1315
1316/*******************************************************
1317       Division 16 / 16 => (result + remainder)
1318*******************************************************/
1319#define	r_remL	r26	/* remainder Low */
1320#define	r_remH	r27	/* remainder High */
1321
1322/* return: remainder */
1323#define	r_arg1L	r24	/* dividend Low */
1324#define	r_arg1H	r25	/* dividend High */
1325
1326/* return: quotient */
1327#define	r_arg2L	r22	/* divisor Low */
1328#define	r_arg2H	r23	/* divisor High */
1329
1330#define	r_cnt	r21	/* loop count */
1331
1332#if defined (L_udivmodhi4)
1333DEFUN __udivmodhi4
1334	sub	r_remL,r_remL
1335	sub	r_remH,r_remH	; clear remainder and carry
1336	ldi	r_cnt,17	; init loop counter
1337	rjmp	__udivmodhi4_ep	; jump to entry point
1338__udivmodhi4_loop:
1339        rol	r_remL		; shift dividend into remainder
1340	rol	r_remH
1341        cp	r_remL,r_arg2L	; compare remainder & divisor
1342	cpc	r_remH,r_arg2H
1343        brcs	__udivmodhi4_ep	; remainder < divisor
1344        sub	r_remL,r_arg2L	; restore remainder
1345        sbc	r_remH,r_arg2H
1346__udivmodhi4_ep:
1347        rol	r_arg1L		; shift dividend (with CARRY)
1348        rol	r_arg1H
1349        dec	r_cnt		; decrement loop counter
1350        brne	__udivmodhi4_loop
1351	com	r_arg1L
1352	com	r_arg1H
1353; div/mod results to return registers, as for the div() function
1354	mov_l	r_arg2L, r_arg1L	; quotient
1355	mov_h	r_arg2H, r_arg1H
1356	mov_l	r_arg1L, r_remL		; remainder
1357	mov_h	r_arg1H, r_remH
1358	ret
1359ENDF __udivmodhi4
1360#endif /* defined (L_udivmodhi4) */
1361
1362#if defined (L_divmodhi4)
1363DEFUN __divmodhi4
1364    .global _div
1365_div:
1366    bst     r_arg1H,7           ; store sign of dividend
1367    mov     __tmp_reg__,r_arg2H
1368    brtc    0f
1369    com     __tmp_reg__         ; r0.7 is sign of result
1370    rcall   __divmodhi4_neg1    ; dividend negative: negate
13710:
1372    sbrc    r_arg2H,7
1373    rcall   __divmodhi4_neg2    ; divisor negative: negate
1374    XCALL   __udivmodhi4        ; do the unsigned div/mod
1375    sbrc    __tmp_reg__,7
1376    rcall   __divmodhi4_neg2    ; correct remainder sign
1377    brtc    __divmodhi4_exit
1378__divmodhi4_neg1:
1379    ;; correct dividend/remainder sign
1380    com     r_arg1H
1381    neg     r_arg1L
1382    sbci    r_arg1H,0xff
1383    ret
1384__divmodhi4_neg2:
1385    ;; correct divisor/result sign
1386    com     r_arg2H
1387    neg     r_arg2L
1388    sbci    r_arg2H,0xff
1389__divmodhi4_exit:
1390    ret
1391ENDF __divmodhi4
1392#endif /* defined (L_divmodhi4) */
1393
1394#undef r_remH
1395#undef r_remL
1396
1397#undef r_arg1H
1398#undef r_arg1L
1399
1400#undef r_arg2H
1401#undef r_arg2L
1402
1403#undef r_cnt
1404
1405/*******************************************************
1406       Division 24 / 24 => (result + remainder)
1407*******************************************************/
1408
1409;; A[0..2]: In: Dividend; Out: Quotient
1410#define A0  22
1411#define A1  A0+1
1412#define A2  A0+2
1413
1414;; B[0..2]: In: Divisor;   Out: Remainder
1415#define B0  18
1416#define B1  B0+1
1417#define B2  B0+2
1418
1419;; C[0..2]: Expand remainder
1420#define C0  __zero_reg__
1421#define C1  26
1422#define C2  25
1423
1424;; Loop counter
1425#define r_cnt   21
1426
1427#if defined (L_udivmodpsi4)
1428;; R24:R22 = R24:R22  udiv  R20:R18
1429;; R20:R18 = R24:R22  umod  R20:R18
1430;; Clobbers: R21, R25, R26
1431
1432DEFUN __udivmodpsi4
1433    ; init loop counter
1434    ldi     r_cnt, 24+1
1435    ; Clear remainder and carry.  C0 is already 0
1436    clr     C1
1437    sub     C2, C2
1438    ; jump to entry point
1439    rjmp    __udivmodpsi4_start
1440__udivmodpsi4_loop:
1441    ; shift dividend into remainder
1442    rol     C0
1443    rol     C1
1444    rol     C2
1445    ; compare remainder & divisor
1446    cp      C0, B0
1447    cpc     C1, B1
1448    cpc     C2, B2
1449    brcs    __udivmodpsi4_start ; remainder <= divisor
1450    sub     C0, B0              ; restore remainder
1451    sbc     C1, B1
1452    sbc     C2, B2
1453__udivmodpsi4_start:
1454    ; shift dividend (with CARRY)
1455    rol     A0
1456    rol     A1
1457    rol     A2
1458    ; decrement loop counter
1459    dec     r_cnt
1460    brne    __udivmodpsi4_loop
1461    com     A0
1462    com     A1
1463    com     A2
1464    ; div/mod results to return registers
1465    ; remainder
1466    mov     B0, C0
1467    mov     B1, C1
1468    mov     B2, C2
1469    clr     __zero_reg__ ; C0
1470    ret
1471ENDF __udivmodpsi4
1472#endif /* defined (L_udivmodpsi4) */
1473
1474#if defined (L_divmodpsi4)
1475;; R24:R22 = R24:R22  div  R20:R18
1476;; R20:R18 = R24:R22  mod  R20:R18
1477;; Clobbers: T, __tmp_reg__, R21, R25, R26
1478
1479DEFUN __divmodpsi4
1480    ; R0.7 will contain the sign of the result:
1481    ; R0.7 = A.sign ^ B.sign
1482    mov __tmp_reg__, B2
1483    ; T-flag = sign of dividend
1484    bst     A2, 7
1485    brtc    0f
1486    com     __tmp_reg__
1487    ; Adjust dividend's sign
1488    rcall   __divmodpsi4_negA
14890:
1490    ; Adjust divisor's sign
1491    sbrc    B2, 7
1492    rcall   __divmodpsi4_negB
1493
1494    ; Do the unsigned div/mod
1495    XCALL   __udivmodpsi4
1496
1497    ; Adjust quotient's sign
1498    sbrc    __tmp_reg__, 7
1499    rcall   __divmodpsi4_negA
1500
1501    ; Adjust remainder's sign
1502    brtc    __divmodpsi4_end
1503
1504__divmodpsi4_negB:
1505    ; Correct divisor/remainder sign
1506    com     B2
1507    com     B1
1508    neg     B0
1509    sbci    B1, -1
1510    sbci    B2, -1
1511    ret
1512
1513    ; Correct dividend/quotient sign
1514__divmodpsi4_negA:
1515    com     A2
1516    com     A1
1517    neg     A0
1518    sbci    A1, -1
1519    sbci    A2, -1
1520__divmodpsi4_end:
1521    ret
1522
1523ENDF __divmodpsi4
1524#endif /* defined (L_divmodpsi4) */
1525
1526#undef A0
1527#undef A1
1528#undef A2
1529
1530#undef B0
1531#undef B1
1532#undef B2
1533
1534#undef C0
1535#undef C1
1536#undef C2
1537
1538#undef r_cnt
1539
1540/*******************************************************
1541       Division 32 / 32 => (result + remainder)
1542*******************************************************/
1543#define	r_remHH	r31	/* remainder High */
1544#define	r_remHL	r30
1545#define	r_remH	r27
1546#define	r_remL	r26	/* remainder Low */
1547
1548/* return: remainder */
1549#define	r_arg1HH r25	/* dividend High */
1550#define	r_arg1HL r24
1551#define	r_arg1H  r23
1552#define	r_arg1L  r22	/* dividend Low */
1553
1554/* return: quotient */
1555#define	r_arg2HH r21	/* divisor High */
1556#define	r_arg2HL r20
1557#define	r_arg2H  r19
1558#define	r_arg2L  r18	/* divisor Low */
1559
1560#define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1561
1562#if defined (L_udivmodsi4)
1563DEFUN __udivmodsi4
1564	ldi	r_remL, 33	; init loop counter
1565	mov	r_cnt, r_remL
1566	sub	r_remL,r_remL
1567	sub	r_remH,r_remH	; clear remainder and carry
1568	mov_l	r_remHL, r_remL
1569	mov_h	r_remHH, r_remH
1570	rjmp	__udivmodsi4_ep	; jump to entry point
1571__udivmodsi4_loop:
1572        rol	r_remL		; shift dividend into remainder
1573	rol	r_remH
1574	rol	r_remHL
1575	rol	r_remHH
1576        cp	r_remL,r_arg2L	; compare remainder & divisor
1577	cpc	r_remH,r_arg2H
1578	cpc	r_remHL,r_arg2HL
1579	cpc	r_remHH,r_arg2HH
1580	brcs	__udivmodsi4_ep	; remainder <= divisor
1581        sub	r_remL,r_arg2L	; restore remainder
1582        sbc	r_remH,r_arg2H
1583        sbc	r_remHL,r_arg2HL
1584        sbc	r_remHH,r_arg2HH
1585__udivmodsi4_ep:
1586        rol	r_arg1L		; shift dividend (with CARRY)
1587        rol	r_arg1H
1588        rol	r_arg1HL
1589        rol	r_arg1HH
1590        dec	r_cnt		; decrement loop counter
1591        brne	__udivmodsi4_loop
1592				; __zero_reg__ now restored (r_cnt == 0)
1593	com	r_arg1L
1594	com	r_arg1H
1595	com	r_arg1HL
1596	com	r_arg1HH
1597; div/mod results to return registers, as for the ldiv() function
1598	mov_l	r_arg2L,  r_arg1L	; quotient
1599	mov_h	r_arg2H,  r_arg1H
1600	mov_l	r_arg2HL, r_arg1HL
1601	mov_h	r_arg2HH, r_arg1HH
1602	mov_l	r_arg1L,  r_remL	; remainder
1603	mov_h	r_arg1H,  r_remH
1604	mov_l	r_arg1HL, r_remHL
1605	mov_h	r_arg1HH, r_remHH
1606	ret
1607ENDF __udivmodsi4
1608#endif /* defined (L_udivmodsi4) */
1609
1610#if defined (L_divmodsi4)
1611DEFUN __divmodsi4
1612    mov     __tmp_reg__,r_arg2HH
1613    bst     r_arg1HH,7          ; store sign of dividend
1614    brtc    0f
1615    com     __tmp_reg__         ; r0.7 is sign of result
1616    XCALL   __negsi2            ; dividend negative: negate
16170:
1618    sbrc    r_arg2HH,7
1619    rcall   __divmodsi4_neg2    ; divisor negative: negate
1620    XCALL   __udivmodsi4        ; do the unsigned div/mod
1621    sbrc    __tmp_reg__, 7      ; correct quotient sign
1622    rcall   __divmodsi4_neg2
1623    brtc    __divmodsi4_exit    ; correct remainder sign
1624    XJMP    __negsi2
1625__divmodsi4_neg2:
1626    ;; correct divisor/quotient sign
1627    com     r_arg2HH
1628    com     r_arg2HL
1629    com     r_arg2H
1630    neg     r_arg2L
1631    sbci    r_arg2H,0xff
1632    sbci    r_arg2HL,0xff
1633    sbci    r_arg2HH,0xff
1634__divmodsi4_exit:
1635    ret
1636ENDF __divmodsi4
1637#endif /* defined (L_divmodsi4) */
1638
1639#if defined (L_negsi2)
1640;; (set (reg:SI 22)
1641;;      (neg:SI (reg:SI 22)))
1642;; Sets the V flag for signed overflow tests
1643DEFUN __negsi2
1644    NEG4    22
1645    ret
1646ENDF __negsi2
1647#endif /* L_negsi2 */
1648
1649#undef r_remHH
1650#undef r_remHL
1651#undef r_remH
1652#undef r_remL
1653#undef r_arg1HH
1654#undef r_arg1HL
1655#undef r_arg1H
1656#undef r_arg1L
1657#undef r_arg2HH
1658#undef r_arg2HL
1659#undef r_arg2H
1660#undef r_arg2L
1661#undef r_cnt
1662
1663/*******************************************************
1664       Division 64 / 64
1665       Modulo   64 % 64
1666*******************************************************/
1667
1668;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1669;; at least 16k of Program Memory.  For smaller Devices, depend
1670;; on MOVW and SP Size.  There is a Connexion between SP Size and
1671;; Flash Size so that SP Size can be used to test for Flash Size.
1672
1673#if defined (__AVR_HAVE_JMP_CALL__)
1674#   define SPEED_DIV 8
1675#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1676#   define SPEED_DIV 16
1677#else
1678#   define SPEED_DIV 0
1679#endif
1680
1681;; A[0..7]: In: Dividend;
1682;; Out: Quotient  (T = 0)
1683;; Out: Remainder (T = 1)
1684#define A0  18
1685#define A1  A0+1
1686#define A2  A0+2
1687#define A3  A0+3
1688#define A4  A0+4
1689#define A5  A0+5
1690#define A6  A0+6
1691#define A7  A0+7
1692
1693;; B[0..7]: In: Divisor;   Out: Clobber
1694#define B0  10
1695#define B1  B0+1
1696#define B2  B0+2
1697#define B3  B0+3
1698#define B4  B0+4
1699#define B5  B0+5
1700#define B6  B0+6
1701#define B7  B0+7
1702
1703;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1704#define C0  8
1705#define C1  C0+1
1706#define C2  30
1707#define C3  C2+1
1708#define C4  28
1709#define C5  C4+1
1710#define C6  26
1711#define C7  C6+1
1712
1713;; Holds Signs during Division Routine
1714#define SS      __tmp_reg__
1715
1716;; Bit-Counter in Division Routine
1717#define R_cnt   __zero_reg__
1718
1719;; Scratch Register for Negation
1720#define NN      r31
1721
1722#if defined (L_udivdi3)
1723
1724;; R25:R18 = R24:R18  umod  R17:R10
1725;; Ordinary ABI-Function
1726
1727DEFUN __umoddi3
1728    set
1729    rjmp __udivdi3_umoddi3
1730ENDF __umoddi3
1731
1732;; R25:R18 = R24:R18  udiv  R17:R10
1733;; Ordinary ABI-Function
1734
1735DEFUN __udivdi3
1736    clt
1737ENDF __udivdi3
1738
1739DEFUN __udivdi3_umoddi3
1740    push    C0
1741    push    C1
1742    push    C4
1743    push    C5
1744    XCALL   __udivmod64
1745    pop     C5
1746    pop     C4
1747    pop     C1
1748    pop     C0
1749    ret
1750ENDF __udivdi3_umoddi3
1751#endif /* L_udivdi3 */
1752
1753#if defined (L_udivmod64)
1754
1755;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1756;; No Registers saved/restored; the Callers will take Care.
1757;; Preserves B[] and T-flag
1758;; T = 0: Compute Quotient  in A[]
1759;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1760
1761DEFUN __udivmod64
1762
1763    ;; Clear Remainder (C6, C7 will follow)
1764    clr     C0
1765    clr     C1
1766    wmov    C2, C0
1767    wmov    C4, C0
1768    ldi     C7, 64
1769
1770#if SPEED_DIV == 0 || SPEED_DIV == 16
1771    ;; Initialize Loop-Counter
1772    mov     R_cnt, C7
1773    wmov    C6, C0
1774#endif /* SPEED_DIV */
1775
1776#if SPEED_DIV == 8
1777
1778    push    A7
1779    clr     C6
1780
17811:  ;; Compare shifted Devidend against Divisor
1782    ;; If -- even after Shifting -- it is smaller...
1783    CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1784    cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1785    brcc    2f
1786
1787    ;; ...then we can subtract it.  Thus, it is legal to shift left
1788               $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1789    mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1790    mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1791    mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1792
1793    ;; 8 Bits are done
1794    subi    C7, 8
1795    brne    1b
1796
1797    ;; Shifted 64 Bits:  A7 has traveled to C7
1798    pop     C7
1799    ;; Divisor is greater than Dividend. We have:
1800    ;; A[] % B[] = A[]
1801    ;; A[] / B[] = 0
1802    ;; Thus, we can return immediately
1803    rjmp    5f
1804
18052:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1806    mov     R_cnt, C7
1807
1808    ;; Push of A7 is not needed because C7 is still 0
1809    pop     C7
1810    clr     C7
1811
1812#elif  SPEED_DIV == 16
1813
1814    ;; Compare shifted Dividend against Divisor
1815    cp      A7, B3
1816    cpc     C0, B4
1817    cpc     C1, B5
1818    cpc     C2, B6
1819    cpc     C3, B7
1820    brcc    2f
1821
1822    ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1823    ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1824    wmov  C2,A6  $  wmov C0,A4
1825    wmov  A6,A2  $  wmov A4,A0
1826    wmov  A2,C6  $  wmov A0,C4
1827
1828    ;; Set Bit Counter to 32
1829    lsr     R_cnt
18302:
1831#elif SPEED_DIV
1832#error SPEED_DIV = ?
1833#endif /* SPEED_DIV */
1834
1835;; The very Division + Remainder Routine
1836
18373:  ;; Left-shift Dividend...
1838    lsl A0     $  rol A1     $  rol A2     $  rol A3
1839    rol A4     $  rol A5     $  rol A6     $  rol A7
1840
1841    ;; ...into Remainder
1842    rol C0     $  rol C1     $  rol C2     $  rol C3
1843    rol C4     $  rol C5     $  rol C6     $  rol C7
1844
1845    ;; Compare Remainder and Divisor
1846    CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1847    cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1848
1849    brcs 4f
1850
1851    ;; Divisor fits into Remainder:  Subtract it from Remainder...
1852    SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1853    sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1854
1855    ;; ...and set according Bit in the upcoming Quotient
1856    ;; The Bit will travel to its final Position
1857    ori A0, 1
1858
18594:  ;; This Bit is done
1860    dec     R_cnt
1861    brne    3b
1862    ;; __zero_reg__ is 0 again
1863
1864    ;; T = 0: We are fine with the Quotient in A[]
1865    ;; T = 1: Copy Remainder to A[]
18665:  brtc    6f
1867    wmov    A0, C0
1868    wmov    A2, C2
1869    wmov    A4, C4
1870    wmov    A6, C6
1871    ;; Move the Sign of the Result to SS.7
1872    lsl     SS
1873
18746:  ret
1875
1876ENDF __udivmod64
1877#endif /* L_udivmod64 */
1878
1879
1880#if defined (L_divdi3)
1881
1882;; R25:R18 = R24:R18  mod  R17:R10
1883;; Ordinary ABI-Function
1884
1885DEFUN __moddi3
1886    set
1887    rjmp    __divdi3_moddi3
1888ENDF __moddi3
1889
1890;; R25:R18 = R24:R18  div  R17:R10
1891;; Ordinary ABI-Function
1892
1893DEFUN __divdi3
1894    clt
1895ENDF __divdi3
1896
1897DEFUN  __divdi3_moddi3
1898#if SPEED_DIV
1899    mov     r31, A7
1900    or      r31, B7
1901    brmi    0f
1902    ;; Both Signs are 0:  the following Complexitiy is not needed
1903    XJMP    __udivdi3_umoddi3
1904#endif /* SPEED_DIV */
1905
19060:  ;; The Prologue
1907    ;; Save 12 Registers:  Y, 17...8
1908    ;; No Frame needed
1909    do_prologue_saves 12
1910
1911    ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1912    ;; SS.6 will contain the Sign of the Remainder (A.sign)
1913    mov     SS, A7
1914    asr     SS
1915    ;; Adjust Dividend's Sign as needed
1916#if SPEED_DIV
1917    ;; Compiling for Speed we know that at least one Sign must be < 0
1918    ;; Thus, if A[] >= 0 then we know B[] < 0
1919    brpl    22f
1920#else
1921    brpl    21f
1922#endif /* SPEED_DIV */
1923
1924    XCALL   __negdi2
1925
1926    ;; Adjust Divisor's Sign and SS.7 as needed
192721: tst     B7
1928    brpl    3f
192922: ldi     NN, 1 << 7
1930    eor     SS, NN
1931
1932    ldi NN, -1
1933    com B4     $  com B5     $  com B6     $  com B7
1934               $  com B1     $  com B2     $  com B3
1935    NEG B0
1936               $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
1937    sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
1938
19393:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
1940    XCALL   __udivmod64
1941
1942    ;; Adjust Result's Sign
1943#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
1944    tst     SS
1945    brpl    4f
1946#else
1947    sbrc    SS, 7
1948#endif /* __AVR_HAVE_JMP_CALL__ */
1949    XCALL   __negdi2
1950
19514:  ;; Epilogue: Restore 12 Registers and return
1952    do_epilogue_restores 12
1953
1954ENDF __divdi3_moddi3
1955
1956#endif /* L_divdi3 */
1957
1958#undef R_cnt
1959#undef SS
1960#undef NN
1961
1962.section .text.libgcc, "ax", @progbits
1963
1964#define TT __tmp_reg__
1965
1966#if defined (L_adddi3)
1967;; (set (reg:DI 18)
1968;;      (plus:DI (reg:DI 18)
1969;;               (reg:DI 10)))
1970;; Sets the V flag for signed overflow tests
1971;; Sets the C flag for unsigned overflow tests
1972DEFUN __adddi3
1973    ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
1974    adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
1975    ret
1976ENDF __adddi3
1977#endif /* L_adddi3 */
1978
1979#if defined (L_adddi3_s8)
1980;; (set (reg:DI 18)
1981;;      (plus:DI (reg:DI 18)
1982;;               (sign_extend:SI (reg:QI 26))))
1983;; Sets the V flag for signed overflow tests
1984;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
1985DEFUN __adddi3_s8
1986    clr     TT
1987    sbrc    r26, 7
1988    com     TT
1989    ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
1990    adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
1991    ret
1992ENDF __adddi3_s8
1993#endif /* L_adddi3_s8 */
1994
1995#if defined (L_subdi3)
1996;; (set (reg:DI 18)
1997;;      (minus:DI (reg:DI 18)
1998;;                (reg:DI 10)))
1999;; Sets the V flag for signed overflow tests
2000;; Sets the C flag for unsigned overflow tests
2001DEFUN __subdi3
2002    SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
2003    sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
2004    ret
2005ENDF __subdi3
2006#endif /* L_subdi3 */
2007
2008#if defined (L_cmpdi2)
2009;; (set (cc0)
2010;;      (compare (reg:DI 18)
2011;;               (reg:DI 10)))
2012DEFUN __cmpdi2
2013    CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
2014    cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
2015    ret
2016ENDF __cmpdi2
2017#endif /* L_cmpdi2 */
2018
2019#if defined (L_cmpdi2_s8)
2020;; (set (cc0)
2021;;      (compare (reg:DI 18)
2022;;               (sign_extend:SI (reg:QI 26))))
2023DEFUN __cmpdi2_s8
2024    clr     TT
2025    sbrc    r26, 7
2026    com     TT
2027    CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
2028    cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
2029    ret
2030ENDF __cmpdi2_s8
2031#endif /* L_cmpdi2_s8 */
2032
2033#if defined (L_negdi2)
2034;; (set (reg:DI 18)
2035;;      (neg:DI (reg:DI 18)))
2036;; Sets the V flag for signed overflow tests
2037DEFUN __negdi2
2038
2039    com  A4    $  com  A5    $  com  A6    $  com  A7
2040               $  com  A1    $  com  A2    $  com  A3
2041    NEG  A0
2042               $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
2043    sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
2044    ret
2045
2046ENDF __negdi2
2047#endif /* L_negdi2 */
2048
2049#undef TT
2050
2051#undef C7
2052#undef C6
2053#undef C5
2054#undef C4
2055#undef C3
2056#undef C2
2057#undef C1
2058#undef C0
2059
2060#undef B7
2061#undef B6
2062#undef B5
2063#undef B4
2064#undef B3
2065#undef B2
2066#undef B1
2067#undef B0
2068
2069#undef A7
2070#undef A6
2071#undef A5
2072#undef A4
2073#undef A3
2074#undef A2
2075#undef A1
2076#undef A0
2077
2078
2079.section .text.libgcc.prologue, "ax", @progbits
2080
2081/**********************************
2082 * This is a prologue subroutine
2083 **********************************/
2084#if defined (L_prologue)
2085
2086;; This function does not clobber T-flag; 64-bit division relies on it
2087DEFUN __prologue_saves__
2088	push r2
2089	push r3
2090	push r4
2091	push r5
2092	push r6
2093	push r7
2094	push r8
2095	push r9
2096	push r10
2097	push r11
2098	push r12
2099	push r13
2100	push r14
2101	push r15
2102	push r16
2103	push r17
2104	push r28
2105	push r29
2106#if !defined (__AVR_HAVE_SPH__)
2107	in	r28,__SP_L__
2108	sub	r28,r26
2109	out	__SP_L__,r28
2110	clr	r29
2111#elif defined (__AVR_XMEGA__)
2112	in	r28,__SP_L__
2113	in	r29,__SP_H__
2114	sub	r28,r26
2115	sbc	r29,r27
2116	out	__SP_L__,r28
2117	out	__SP_H__,r29
2118#else
2119	in	r28,__SP_L__
2120	in	r29,__SP_H__
2121	sub	r28,r26
2122	sbc	r29,r27
2123	in	__tmp_reg__,__SREG__
2124	cli
2125	out	__SP_H__,r29
2126	out	__SREG__,__tmp_reg__
2127	out	__SP_L__,r28
2128#endif /* #SP = 8/16 */
2129
2130#if defined (__AVR_HAVE_EIJMP_EICALL__)
2131	eijmp
2132#else
2133	ijmp
2134#endif
2135
2136ENDF __prologue_saves__
2137#endif /* defined (L_prologue) */
2138
2139/*
2140 * This is an epilogue subroutine
2141 */
2142#if defined (L_epilogue)
2143
2144DEFUN __epilogue_restores__
2145	ldd	r2,Y+18
2146	ldd	r3,Y+17
2147	ldd	r4,Y+16
2148	ldd	r5,Y+15
2149	ldd	r6,Y+14
2150	ldd	r7,Y+13
2151	ldd	r8,Y+12
2152	ldd	r9,Y+11
2153	ldd	r10,Y+10
2154	ldd	r11,Y+9
2155	ldd	r12,Y+8
2156	ldd	r13,Y+7
2157	ldd	r14,Y+6
2158	ldd	r15,Y+5
2159	ldd	r16,Y+4
2160	ldd	r17,Y+3
2161	ldd	r26,Y+2
2162#if !defined (__AVR_HAVE_SPH__)
2163	ldd	r29,Y+1
2164	add	r28,r30
2165	out	__SP_L__,r28
2166	mov	r28, r26
2167#elif defined (__AVR_XMEGA__)
2168	ldd  r27,Y+1
2169	add  r28,r30
2170	adc  r29,__zero_reg__
2171	out  __SP_L__,r28
2172	out  __SP_H__,r29
2173	wmov 28, 26
2174#else
2175	ldd	r27,Y+1
2176	add	r28,r30
2177	adc	r29,__zero_reg__
2178	in	__tmp_reg__,__SREG__
2179	cli
2180	out	__SP_H__,r29
2181	out	__SREG__,__tmp_reg__
2182	out	__SP_L__,r28
2183	mov_l	r28, r26
2184	mov_h	r29, r27
2185#endif /* #SP = 8/16 */
2186	ret
2187ENDF __epilogue_restores__
2188#endif /* defined (L_epilogue) */
2189
2190#ifdef L_exit
2191	.section .fini9,"ax",@progbits
2192DEFUN _exit
2193	.weak	exit
2194exit:
2195ENDF _exit
2196
2197	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
2198
2199	.section .fini0,"ax",@progbits
2200	cli
2201__stop_program:
2202	rjmp	__stop_program
2203#endif /* defined (L_exit) */
2204
2205#ifdef L_cleanup
2206	.weak	_cleanup
2207	.func	_cleanup
2208_cleanup:
2209	ret
2210.endfunc
2211#endif /* defined (L_cleanup) */
2212
2213
2214.section .text.libgcc, "ax", @progbits
2215
2216#ifdef L_tablejump
2217DEFUN __tablejump2__
2218	lsl	r30
2219	rol	r31
2220    ;; FALLTHRU
2221ENDF __tablejump2__
2222
2223DEFUN __tablejump__
2224#if defined (__AVR_HAVE_LPMX__)
2225	lpm __tmp_reg__, Z+
2226	lpm r31, Z
2227	mov r30, __tmp_reg__
2228#if defined (__AVR_HAVE_EIJMP_EICALL__)
2229	eijmp
2230#else
2231	ijmp
2232#endif
2233
2234#else /* !HAVE_LPMX */
2235	lpm
2236	adiw r30, 1
2237	push r0
2238	lpm
2239	push r0
2240#if defined (__AVR_HAVE_EIJMP_EICALL__)
2241	in   __tmp_reg__, __EIND__
2242	push __tmp_reg__
2243#endif
2244	ret
2245#endif /* !HAVE_LPMX */
2246ENDF __tablejump__
2247#endif /* defined (L_tablejump) */
2248
2249#ifdef L_copy_data
2250	.section .init4,"ax",@progbits
2251DEFUN __do_copy_data
2252#if defined(__AVR_HAVE_ELPMX__)
2253	ldi	r17, hi8(__data_end)
2254	ldi	r26, lo8(__data_start)
2255	ldi	r27, hi8(__data_start)
2256	ldi	r30, lo8(__data_load_start)
2257	ldi	r31, hi8(__data_load_start)
2258	ldi	r16, hh8(__data_load_start)
2259	out	__RAMPZ__, r16
2260	rjmp	.L__do_copy_data_start
2261.L__do_copy_data_loop:
2262	elpm	r0, Z+
2263	st	X+, r0
2264.L__do_copy_data_start:
2265	cpi	r26, lo8(__data_end)
2266	cpc	r27, r17
2267	brne	.L__do_copy_data_loop
2268#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2269	ldi	r17, hi8(__data_end)
2270	ldi	r26, lo8(__data_start)
2271	ldi	r27, hi8(__data_start)
2272	ldi	r30, lo8(__data_load_start)
2273	ldi	r31, hi8(__data_load_start)
2274	ldi	r16, hh8(__data_load_start - 0x10000)
2275.L__do_copy_data_carry:
2276	inc	r16
2277	out	__RAMPZ__, r16
2278	rjmp	.L__do_copy_data_start
2279.L__do_copy_data_loop:
2280	elpm
2281	st	X+, r0
2282	adiw	r30, 1
2283	brcs	.L__do_copy_data_carry
2284.L__do_copy_data_start:
2285	cpi	r26, lo8(__data_end)
2286	cpc	r27, r17
2287	brne	.L__do_copy_data_loop
2288#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2289	ldi	r17, hi8(__data_end)
2290	ldi	r26, lo8(__data_start)
2291	ldi	r27, hi8(__data_start)
2292	ldi	r30, lo8(__data_load_start)
2293	ldi	r31, hi8(__data_load_start)
2294	rjmp	.L__do_copy_data_start
2295.L__do_copy_data_loop:
2296#if defined (__AVR_HAVE_LPMX__)
2297	lpm	r0, Z+
2298#else
2299	lpm
2300	adiw	r30, 1
2301#endif
2302	st	X+, r0
2303.L__do_copy_data_start:
2304	cpi	r26, lo8(__data_end)
2305	cpc	r27, r17
2306	brne	.L__do_copy_data_loop
2307#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2308#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2309	;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2310	out	__RAMPZ__, __zero_reg__
2311#endif /* ELPM && RAMPD */
2312ENDF __do_copy_data
2313#endif /* L_copy_data */
2314
2315/* __do_clear_bss is only necessary if there is anything in .bss section.  */
2316
2317#ifdef L_clear_bss
2318	.section .init4,"ax",@progbits
2319DEFUN __do_clear_bss
2320	ldi	r17, hi8(__bss_end)
2321	ldi	r26, lo8(__bss_start)
2322	ldi	r27, hi8(__bss_start)
2323	rjmp	.do_clear_bss_start
2324.do_clear_bss_loop:
2325	st	X+, __zero_reg__
2326.do_clear_bss_start:
2327	cpi	r26, lo8(__bss_end)
2328	cpc	r27, r17
2329	brne	.do_clear_bss_loop
2330ENDF __do_clear_bss
2331#endif /* L_clear_bss */
2332
2333/* __do_global_ctors and __do_global_dtors are only necessary
2334   if there are any constructors/destructors.  */
2335
2336#ifdef L_ctors
2337	.section .init6,"ax",@progbits
2338DEFUN __do_global_ctors
2339#if defined(__AVR_HAVE_ELPM__)
2340	ldi	r17, hi8(__ctors_start)
2341	ldi	r28, lo8(__ctors_end)
2342	ldi	r29, hi8(__ctors_end)
2343	ldi	r16, hh8(__ctors_end)
2344	rjmp	.L__do_global_ctors_start
2345.L__do_global_ctors_loop:
2346	sbiw	r28, 2
2347	sbc     r16, __zero_reg__
2348	mov_h	r31, r29
2349	mov_l	r30, r28
2350	out     __RAMPZ__, r16
2351	XCALL	__tablejump_elpm__
2352.L__do_global_ctors_start:
2353	cpi	r28, lo8(__ctors_start)
2354	cpc	r29, r17
2355	ldi	r24, hh8(__ctors_start)
2356	cpc	r16, r24
2357	brne	.L__do_global_ctors_loop
2358#else
2359	ldi	r17, hi8(__ctors_start)
2360	ldi	r28, lo8(__ctors_end)
2361	ldi	r29, hi8(__ctors_end)
2362	rjmp	.L__do_global_ctors_start
2363.L__do_global_ctors_loop:
2364	sbiw	r28, 2
2365	mov_h	r31, r29
2366	mov_l	r30, r28
2367	XCALL	__tablejump__
2368.L__do_global_ctors_start:
2369	cpi	r28, lo8(__ctors_start)
2370	cpc	r29, r17
2371	brne	.L__do_global_ctors_loop
2372#endif /* defined(__AVR_HAVE_ELPM__) */
2373ENDF __do_global_ctors
2374#endif /* L_ctors */
2375
2376#ifdef L_dtors
2377	.section .fini6,"ax",@progbits
2378DEFUN __do_global_dtors
2379#if defined(__AVR_HAVE_ELPM__)
2380	ldi	r17, hi8(__dtors_end)
2381	ldi	r28, lo8(__dtors_start)
2382	ldi	r29, hi8(__dtors_start)
2383	ldi	r16, hh8(__dtors_start)
2384	rjmp	.L__do_global_dtors_start
2385.L__do_global_dtors_loop:
2386	sbiw	r28, 2
2387	sbc     r16, __zero_reg__
2388	mov_h	r31, r29
2389	mov_l	r30, r28
2390	out     __RAMPZ__, r16
2391	XCALL	__tablejump_elpm__
2392.L__do_global_dtors_start:
2393	cpi	r28, lo8(__dtors_end)
2394	cpc	r29, r17
2395	ldi	r24, hh8(__dtors_end)
2396	cpc	r16, r24
2397	brne	.L__do_global_dtors_loop
2398#else
2399	ldi	r17, hi8(__dtors_end)
2400	ldi	r28, lo8(__dtors_start)
2401	ldi	r29, hi8(__dtors_start)
2402	rjmp	.L__do_global_dtors_start
2403.L__do_global_dtors_loop:
2404	mov_h	r31, r29
2405	mov_l	r30, r28
2406	XCALL	__tablejump__
2407	adiw	r28, 2
2408.L__do_global_dtors_start:
2409	cpi	r28, lo8(__dtors_end)
2410	cpc	r29, r17
2411	brne	.L__do_global_dtors_loop
2412#endif /* defined(__AVR_HAVE_ELPM__) */
2413ENDF __do_global_dtors
2414#endif /* L_dtors */
2415
2416.section .text.libgcc, "ax", @progbits
2417
2418#ifdef L_tablejump_elpm
2419DEFUN __tablejump_elpm__
2420#if defined (__AVR_HAVE_ELPMX__)
2421	elpm	__tmp_reg__, Z+
2422	elpm	r31, Z
2423	mov	r30, __tmp_reg__
2424#if defined (__AVR_HAVE_RAMPD__)
2425	;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2426	out	__RAMPZ__, __zero_reg__
2427#endif /* RAMPD */
2428#if defined (__AVR_HAVE_EIJMP_EICALL__)
2429	eijmp
2430#else
2431	ijmp
2432#endif
2433
2434#elif defined (__AVR_HAVE_ELPM__)
2435	elpm
2436	adiw	r30, 1
2437	push	r0
2438	elpm
2439	push	r0
2440#if defined (__AVR_HAVE_EIJMP_EICALL__)
2441	in      __tmp_reg__, __EIND__
2442	push    __tmp_reg__
2443#endif
2444	ret
2445#endif
2446ENDF __tablejump_elpm__
2447#endif /* defined (L_tablejump_elpm) */
2448
2449;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2450;; Loading n bytes from Flash; n = 3,4
2451;; R22... = Flash[Z]
2452;; Clobbers: __tmp_reg__
2453
2454#if (defined (L_load_3)        \
2455     || defined (L_load_4))    \
2456    && !defined (__AVR_HAVE_LPMX__)
2457
2458;; Destination
2459#define D0  22
2460#define D1  D0+1
2461#define D2  D0+2
2462#define D3  D0+3
2463
2464.macro  .load dest, n
2465    lpm
2466    mov     \dest, r0
2467.if \dest != D0+\n-1
2468    adiw    r30, 1
2469.else
2470    sbiw    r30, \n-1
2471.endif
2472.endm
2473
2474#if defined (L_load_3)
2475DEFUN __load_3
2476    push  D3
2477    XCALL __load_4
2478    pop   D3
2479    ret
2480ENDF __load_3
2481#endif /* L_load_3 */
2482
2483#if defined (L_load_4)
2484DEFUN __load_4
2485    .load D0, 4
2486    .load D1, 4
2487    .load D2, 4
2488    .load D3, 4
2489    ret
2490ENDF __load_4
2491#endif /* L_load_4 */
2492
2493#endif /* L_load_3 || L_load_3 */
2494
2495;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2496;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2497;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2498;; Clobbers: __tmp_reg__, R21, R30, R31
2499
2500#if (defined (L_xload_1)            \
2501     || defined (L_xload_2)         \
2502     || defined (L_xload_3)         \
2503     || defined (L_xload_4))
2504
2505;; Destination
2506#define D0  22
2507#define D1  D0+1
2508#define D2  D0+2
2509#define D3  D0+3
2510
2511;; Register containing bits 16+ of the address
2512
2513#define HHI8  21
2514
2515.macro  .xload dest, n
2516#if defined (__AVR_HAVE_ELPMX__)
2517    elpm    \dest, Z+
2518#elif defined (__AVR_HAVE_ELPM__)
2519    elpm
2520    mov     \dest, r0
2521.if \dest != D0+\n-1
2522    adiw    r30, 1
2523    adc     HHI8, __zero_reg__
2524    out     __RAMPZ__, HHI8
2525.endif
2526#elif defined (__AVR_HAVE_LPMX__)
2527    lpm     \dest, Z+
2528#else
2529    lpm
2530    mov     \dest, r0
2531.if \dest != D0+\n-1
2532    adiw    r30, 1
2533.endif
2534#endif
2535#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2536.if \dest == D0+\n-1
2537    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2538    out     __RAMPZ__, __zero_reg__
2539.endif
2540#endif
2541.endm ; .xload
2542
2543#if defined (L_xload_1)
2544DEFUN __xload_1
2545#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2546    sbrc    HHI8, 7
2547    ld      D0, Z
2548    sbrs    HHI8, 7
2549    lpm     D0, Z
2550    ret
2551#else
2552    sbrc    HHI8, 7
2553    rjmp    1f
2554#if defined (__AVR_HAVE_ELPM__)
2555    out     __RAMPZ__, HHI8
2556#endif /* __AVR_HAVE_ELPM__ */
2557    .xload  D0, 1
2558    ret
25591:  ld      D0, Z
2560    ret
2561#endif /* LPMx && ! ELPM */
2562ENDF __xload_1
2563#endif /* L_xload_1 */
2564
2565#if defined (L_xload_2)
2566DEFUN __xload_2
2567    sbrc    HHI8, 7
2568    rjmp    1f
2569#if defined (__AVR_HAVE_ELPM__)
2570    out     __RAMPZ__, HHI8
2571#endif /* __AVR_HAVE_ELPM__ */
2572    .xload  D0, 2
2573    .xload  D1, 2
2574    ret
25751:  ld      D0, Z+
2576    ld      D1, Z+
2577    ret
2578ENDF __xload_2
2579#endif /* L_xload_2 */
2580
2581#if defined (L_xload_3)
2582DEFUN __xload_3
2583    sbrc    HHI8, 7
2584    rjmp    1f
2585#if defined (__AVR_HAVE_ELPM__)
2586    out     __RAMPZ__, HHI8
2587#endif /* __AVR_HAVE_ELPM__ */
2588    .xload  D0, 3
2589    .xload  D1, 3
2590    .xload  D2, 3
2591    ret
25921:  ld      D0, Z+
2593    ld      D1, Z+
2594    ld      D2, Z+
2595    ret
2596ENDF __xload_3
2597#endif /* L_xload_3 */
2598
2599#if defined (L_xload_4)
2600DEFUN __xload_4
2601    sbrc    HHI8, 7
2602    rjmp    1f
2603#if defined (__AVR_HAVE_ELPM__)
2604    out     __RAMPZ__, HHI8
2605#endif /* __AVR_HAVE_ELPM__ */
2606    .xload  D0, 4
2607    .xload  D1, 4
2608    .xload  D2, 4
2609    .xload  D3, 4
2610    ret
26111:  ld      D0, Z+
2612    ld      D1, Z+
2613    ld      D2, Z+
2614    ld      D3, Z+
2615    ret
2616ENDF __xload_4
2617#endif /* L_xload_4 */
2618
2619#endif /* L_xload_{1|2|3|4} */
2620
2621;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2622;; memcopy from Address Space __pgmx to RAM
2623;; R23:Z = Source Address
2624;; X     = Destination Address
2625;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2626
2627#if defined (L_movmemx)
2628
2629#define HHI8  23
2630#define LOOP  24
2631
2632DEFUN __movmemx_qi
2633    ;; #Bytes to copy fity in 8 Bits (1..255)
2634    ;; Zero-extend Loop Counter
2635    clr     LOOP+1
2636    ;; FALLTHRU
2637ENDF __movmemx_qi
2638
2639DEFUN __movmemx_hi
2640
2641;; Read from where?
2642    sbrc    HHI8, 7
2643    rjmp    1f
2644
2645;; Read from Flash
2646
2647#if defined (__AVR_HAVE_ELPM__)
2648    out     __RAMPZ__, HHI8
2649#endif
2650
26510:  ;; Load 1 Byte from Flash...
2652
2653#if defined (__AVR_HAVE_ELPMX__)
2654    elpm    r0, Z+
2655#elif defined (__AVR_HAVE_ELPM__)
2656    elpm
2657    adiw    r30, 1
2658    adc     HHI8, __zero_reg__
2659    out     __RAMPZ__, HHI8
2660#elif defined (__AVR_HAVE_LPMX__)
2661    lpm     r0, Z+
2662#else
2663    lpm
2664    adiw    r30, 1
2665#endif
2666
2667    ;; ...and store that Byte to RAM Destination
2668    st      X+, r0
2669    sbiw    LOOP, 1
2670    brne    0b
2671#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2672    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2673    out	__RAMPZ__, __zero_reg__
2674#endif /* ELPM && RAMPD */
2675    ret
2676
2677;; Read from RAM
2678
26791:  ;; Read 1 Byte from RAM...
2680    ld      r0, Z+
2681    ;; and store that Byte to RAM Destination
2682    st      X+, r0
2683    sbiw    LOOP, 1
2684    brne    1b
2685    ret
2686ENDF __movmemx_hi
2687
2688#undef HHI8
2689#undef LOOP
2690
2691#endif /* L_movmemx */
2692
2693
2694.section .text.libgcc.builtins, "ax", @progbits
2695
2696/**********************************
2697 * Find first set Bit (ffs)
2698 **********************************/
2699
2700#if defined (L_ffssi2)
2701;; find first set bit
2702;; r25:r24 = ffs32 (r25:r22)
2703;; clobbers: r22, r26
2704DEFUN __ffssi2
2705    clr  r26
2706    tst  r22
2707    brne 1f
2708    subi r26, -8
2709    or   r22, r23
2710    brne 1f
2711    subi r26, -8
2712    or   r22, r24
2713    brne 1f
2714    subi r26, -8
2715    or   r22, r25
2716    brne 1f
2717    ret
27181:  mov  r24, r22
2719    XJMP __loop_ffsqi2
2720ENDF __ffssi2
2721#endif /* defined (L_ffssi2) */
2722
2723#if defined (L_ffshi2)
2724;; find first set bit
2725;; r25:r24 = ffs16 (r25:r24)
2726;; clobbers: r26
2727DEFUN __ffshi2
2728    clr  r26
2729#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2730    ;; Some cores have problem skipping 2-word instruction
2731    tst  r24
2732    breq 2f
2733#else
2734    cpse r24, __zero_reg__
2735#endif /* __AVR_HAVE_JMP_CALL__ */
27361:  XJMP __loop_ffsqi2
27372:  ldi  r26, 8
2738    or   r24, r25
2739    brne 1b
2740    ret
2741ENDF __ffshi2
2742#endif /* defined (L_ffshi2) */
2743
2744#if defined (L_loop_ffsqi2)
2745;; Helper for ffshi2, ffssi2
2746;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2747;; r24 must be != 0
2748;; clobbers: r26
2749DEFUN __loop_ffsqi2
2750    inc  r26
2751    lsr  r24
2752    brcc __loop_ffsqi2
2753    mov  r24, r26
2754    clr  r25
2755    ret
2756ENDF __loop_ffsqi2
2757#endif /* defined (L_loop_ffsqi2) */
2758
2759
2760/**********************************
2761 * Count trailing Zeros (ctz)
2762 **********************************/
2763
2764#if defined (L_ctzsi2)
2765;; count trailing zeros
2766;; r25:r24 = ctz32 (r25:r22)
2767;; clobbers: r26, r22
2768;; ctz(0) = 255
2769;; Note that ctz(0) in undefined for GCC
2770DEFUN __ctzsi2
2771    XCALL __ffssi2
2772    dec  r24
2773    ret
2774ENDF __ctzsi2
2775#endif /* defined (L_ctzsi2) */
2776
2777#if defined (L_ctzhi2)
2778;; count trailing zeros
2779;; r25:r24 = ctz16 (r25:r24)
2780;; clobbers: r26
2781;; ctz(0) = 255
2782;; Note that ctz(0) in undefined for GCC
2783DEFUN __ctzhi2
2784    XCALL __ffshi2
2785    dec  r24
2786    ret
2787ENDF __ctzhi2
2788#endif /* defined (L_ctzhi2) */
2789
2790
2791/**********************************
2792 * Count leading Zeros (clz)
2793 **********************************/
2794
2795#if defined (L_clzdi2)
2796;; count leading zeros
2797;; r25:r24 = clz64 (r25:r18)
2798;; clobbers: r22, r23, r26
2799DEFUN __clzdi2
2800    XCALL __clzsi2
2801    sbrs r24, 5
2802    ret
2803    mov_l r22, r18
2804    mov_h r23, r19
2805    mov_l r24, r20
2806    mov_h r25, r21
2807    XCALL __clzsi2
2808    subi r24, -32
2809    ret
2810ENDF __clzdi2
2811#endif /* defined (L_clzdi2) */
2812
2813#if defined (L_clzsi2)
2814;; count leading zeros
2815;; r25:r24 = clz32 (r25:r22)
2816;; clobbers: r26
2817DEFUN __clzsi2
2818    XCALL __clzhi2
2819    sbrs r24, 4
2820    ret
2821    mov_l r24, r22
2822    mov_h r25, r23
2823    XCALL __clzhi2
2824    subi r24, -16
2825    ret
2826ENDF __clzsi2
2827#endif /* defined (L_clzsi2) */
2828
2829#if defined (L_clzhi2)
2830;; count leading zeros
2831;; r25:r24 = clz16 (r25:r24)
2832;; clobbers: r26
2833DEFUN __clzhi2
2834    clr  r26
2835    tst  r25
2836    brne 1f
2837    subi r26, -8
2838    or   r25, r24
2839    brne 1f
2840    ldi  r24, 16
2841    ret
28421:  cpi  r25, 16
2843    brsh 3f
2844    subi r26, -3
2845    swap r25
28462:  inc  r26
28473:  lsl  r25
2848    brcc 2b
2849    mov  r24, r26
2850    clr  r25
2851    ret
2852ENDF __clzhi2
2853#endif /* defined (L_clzhi2) */
2854
2855
2856/**********************************
2857 * Parity
2858 **********************************/
2859
2860#if defined (L_paritydi2)
2861;; r25:r24 = parity64 (r25:r18)
2862;; clobbers: __tmp_reg__
2863DEFUN __paritydi2
2864    eor  r24, r18
2865    eor  r24, r19
2866    eor  r24, r20
2867    eor  r24, r21
2868    XJMP __paritysi2
2869ENDF __paritydi2
2870#endif /* defined (L_paritydi2) */
2871
2872#if defined (L_paritysi2)
2873;; r25:r24 = parity32 (r25:r22)
2874;; clobbers: __tmp_reg__
2875DEFUN __paritysi2
2876    eor  r24, r22
2877    eor  r24, r23
2878    XJMP __parityhi2
2879ENDF __paritysi2
2880#endif /* defined (L_paritysi2) */
2881
2882#if defined (L_parityhi2)
2883;; r25:r24 = parity16 (r25:r24)
2884;; clobbers: __tmp_reg__
2885DEFUN __parityhi2
2886    eor  r24, r25
2887;; FALLTHRU
2888ENDF __parityhi2
2889
2890;; r25:r24 = parity8 (r24)
2891;; clobbers: __tmp_reg__
2892DEFUN __parityqi2
2893    ;; parity is in r24[0..7]
2894    mov  __tmp_reg__, r24
2895    swap __tmp_reg__
2896    eor  r24, __tmp_reg__
2897    ;; parity is in r24[0..3]
2898    subi r24, -4
2899    andi r24, -5
2900    subi r24, -6
2901    ;; parity is in r24[0,3]
2902    sbrc r24, 3
2903    inc  r24
2904    ;; parity is in r24[0]
2905    andi r24, 1
2906    clr  r25
2907    ret
2908ENDF __parityqi2
2909#endif /* defined (L_parityhi2) */
2910
2911
2912/**********************************
2913 * Population Count
2914 **********************************/
2915
2916#if defined (L_popcounthi2)
2917;; population count
2918;; r25:r24 = popcount16 (r25:r24)
2919;; clobbers: __tmp_reg__
2920DEFUN __popcounthi2
2921    XCALL __popcountqi2
2922    push r24
2923    mov  r24, r25
2924    XCALL __popcountqi2
2925    clr  r25
2926    ;; FALLTHRU
2927ENDF __popcounthi2
2928
2929DEFUN __popcounthi2_tail
2930    pop   __tmp_reg__
2931    add   r24, __tmp_reg__
2932    ret
2933ENDF __popcounthi2_tail
2934#endif /* defined (L_popcounthi2) */
2935
2936#if defined (L_popcountsi2)
2937;; population count
2938;; r25:r24 = popcount32 (r25:r22)
2939;; clobbers: __tmp_reg__
2940DEFUN __popcountsi2
2941    XCALL __popcounthi2
2942    push  r24
2943    mov_l r24, r22
2944    mov_h r25, r23
2945    XCALL __popcounthi2
2946    XJMP  __popcounthi2_tail
2947ENDF __popcountsi2
2948#endif /* defined (L_popcountsi2) */
2949
2950#if defined (L_popcountdi2)
2951;; population count
2952;; r25:r24 = popcount64 (r25:r18)
2953;; clobbers: r22, r23, __tmp_reg__
2954DEFUN __popcountdi2
2955    XCALL __popcountsi2
2956    push  r24
2957    mov_l r22, r18
2958    mov_h r23, r19
2959    mov_l r24, r20
2960    mov_h r25, r21
2961    XCALL __popcountsi2
2962    XJMP  __popcounthi2_tail
2963ENDF __popcountdi2
2964#endif /* defined (L_popcountdi2) */
2965
2966#if defined (L_popcountqi2)
2967;; population count
2968;; r24 = popcount8 (r24)
2969;; clobbers: __tmp_reg__
2970DEFUN __popcountqi2
2971    mov  __tmp_reg__, r24
2972    andi r24, 1
2973    lsr  __tmp_reg__
2974    lsr  __tmp_reg__
2975    adc  r24, __zero_reg__
2976    lsr  __tmp_reg__
2977    adc  r24, __zero_reg__
2978    lsr  __tmp_reg__
2979    adc  r24, __zero_reg__
2980    lsr  __tmp_reg__
2981    adc  r24, __zero_reg__
2982    lsr  __tmp_reg__
2983    adc  r24, __zero_reg__
2984    lsr  __tmp_reg__
2985    adc  r24, __tmp_reg__
2986    ret
2987ENDF __popcountqi2
2988#endif /* defined (L_popcountqi2) */
2989
2990
2991/**********************************
2992 * Swap bytes
2993 **********************************/
2994
2995;; swap two registers with different register number
2996.macro bswap a, b
2997    eor \a, \b
2998    eor \b, \a
2999    eor \a, \b
3000.endm
3001
3002#if defined (L_bswapsi2)
3003;; swap bytes
3004;; r25:r22 = bswap32 (r25:r22)
3005DEFUN __bswapsi2
3006    bswap r22, r25
3007    bswap r23, r24
3008    ret
3009ENDF __bswapsi2
3010#endif /* defined (L_bswapsi2) */
3011
3012#if defined (L_bswapdi2)
3013;; swap bytes
3014;; r25:r18 = bswap64 (r25:r18)
3015DEFUN __bswapdi2
3016    bswap r18, r25
3017    bswap r19, r24
3018    bswap r20, r23
3019    bswap r21, r22
3020    ret
3021ENDF __bswapdi2
3022#endif /* defined (L_bswapdi2) */
3023
3024
3025/**********************************
3026 * 64-bit shifts
3027 **********************************/
3028
3029#if defined (L_ashrdi3)
3030;; Arithmetic shift right
3031;; r25:r18 = ashr64 (r25:r18, r17:r16)
3032DEFUN __ashrdi3
3033    bst     r25, 7
3034    bld     __zero_reg__, 0
3035    ;; FALLTHRU
3036ENDF  __ashrdi3
3037
3038;; Logic shift right
3039;; r25:r18 = lshr64 (r25:r18, r17:r16)
3040DEFUN __lshrdi3
3041    lsr     __zero_reg__
3042    sbc     __tmp_reg__, __tmp_reg__
3043    push    r16
30440:  cpi     r16, 8
3045    brlo 2f
3046    subi    r16, 8
3047    mov     r18, r19
3048    mov     r19, r20
3049    mov     r20, r21
3050    mov     r21, r22
3051    mov     r22, r23
3052    mov     r23, r24
3053    mov     r24, r25
3054    mov     r25, __tmp_reg__
3055    rjmp 0b
30561:  asr     __tmp_reg__
3057    ror     r25
3058    ror     r24
3059    ror     r23
3060    ror     r22
3061    ror     r21
3062    ror     r20
3063    ror     r19
3064    ror     r18
30652:  dec     r16
3066    brpl 1b
3067    pop     r16
3068    ret
3069ENDF __lshrdi3
3070#endif /* defined (L_ashrdi3) */
3071
3072#if defined (L_ashldi3)
3073;; Shift left
3074;; r25:r18 = ashl64 (r25:r18, r17:r16)
3075DEFUN __ashldi3
3076    push    r16
30770:  cpi     r16, 8
3078    brlo 2f
3079    mov     r25, r24
3080    mov     r24, r23
3081    mov     r23, r22
3082    mov     r22, r21
3083    mov     r21, r20
3084    mov     r20, r19
3085    mov     r19, r18
3086    clr     r18
3087    subi    r16, 8
3088    rjmp 0b
30891:  lsl     r18
3090    rol     r19
3091    rol     r20
3092    rol     r21
3093    rol     r22
3094    rol     r23
3095    rol     r24
3096    rol     r25
30972:  dec     r16
3098    brpl 1b
3099    pop     r16
3100    ret
3101ENDF __ashldi3
3102#endif /* defined (L_ashldi3) */
3103
3104#if defined (L_rotldi3)
3105;; Shift left
3106;; r25:r18 = rotl64 (r25:r18, r17:r16)
3107DEFUN __rotldi3
3108    push    r16
31090:  cpi     r16, 8
3110    brlo 2f
3111    subi    r16, 8
3112    mov     __tmp_reg__, r25
3113    mov     r25, r24
3114    mov     r24, r23
3115    mov     r23, r22
3116    mov     r22, r21
3117    mov     r21, r20
3118    mov     r20, r19
3119    mov     r19, r18
3120    mov     r18, __tmp_reg__
3121    rjmp 0b
31221:  lsl     r18
3123    rol     r19
3124    rol     r20
3125    rol     r21
3126    rol     r22
3127    rol     r23
3128    rol     r24
3129    rol     r25
3130    adc     r18, __zero_reg__
31312:  dec     r16
3132    brpl 1b
3133    pop     r16
3134    ret
3135ENDF __rotldi3
3136#endif /* defined (L_rotldi3) */
3137
3138
3139.section .text.libgcc.fmul, "ax", @progbits
3140
3141/***********************************************************/
3142;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3143;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3144/***********************************************************/
3145
3146#define A1 24
3147#define B1 25
3148#define C0 22
3149#define C1 23
3150#define A0 __tmp_reg__
3151
3152#ifdef L_fmuls
3153;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3154;;; Clobbers: r24, r25, __tmp_reg__
3155DEFUN __fmuls
3156    ;; A0.7 = negate result?
3157    mov  A0, A1
3158    eor  A0, B1
3159    ;; B1 = |B1|
3160    sbrc B1, 7
3161    neg  B1
3162    XJMP __fmulsu_exit
3163ENDF __fmuls
3164#endif /* L_fmuls */
3165
3166#ifdef L_fmulsu
3167;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3168;;; Clobbers: r24, r25, __tmp_reg__
3169DEFUN __fmulsu
3170    ;; A0.7 = negate result?
3171    mov  A0, A1
3172;; FALLTHRU
3173ENDF __fmulsu
3174
3175;; Helper for __fmuls and __fmulsu
3176DEFUN __fmulsu_exit
3177    ;; A1 = |A1|
3178    sbrc A1, 7
3179    neg  A1
3180#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3181    ;; Some cores have problem skipping 2-word instruction
3182    tst  A0
3183    brmi 1f
3184#else
3185    sbrs A0, 7
3186#endif /* __AVR_HAVE_JMP_CALL__ */
3187    XJMP  __fmul
31881:  XCALL __fmul
3189    ;; C = -C iff A0.7 = 1
3190    NEG2 C0
3191    ret
3192ENDF __fmulsu_exit
3193#endif /* L_fmulsu */
3194
3195
3196#ifdef L_fmul
3197;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3198;;; Clobbers: r24, r25, __tmp_reg__
3199DEFUN __fmul
3200    ; clear result
3201    clr   C0
3202    clr   C1
3203    clr   A0
32041:  tst   B1
3205    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
32062:  brpl  3f
3207    ;; C += A
3208    add   C0, A0
3209    adc   C1, A1
32103:  ;; A >>= 1
3211    lsr   A1
3212    ror   A0
3213    ;; B <<= 1
3214    lsl   B1
3215    brne  2b
3216    ret
3217ENDF __fmul
3218#endif /* L_fmul */
3219
3220#undef A0
3221#undef A1
3222#undef B1
3223#undef C0
3224#undef C1
3225
3226#include "lib1funcs-fixed.S"
3227