1/*  -*- Mode: Asm -*-  */
2/* Copyright (C) 1998-2019 Free Software Foundation, Inc.
3   Contributed by Denis Chertykov <chertykov@gmail.com>
4
5This file is free software; you can redistribute it and/or modify it
6under the terms of the GNU General Public License as published by the
7Free Software Foundation; either version 3, or (at your option) any
8later version.
9
10This file is distributed in the hope that it will be useful, but
11WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13General Public License for more details.
14
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22<http://www.gnu.org/licenses/>.  */
23
24#if defined (__AVR_TINY__)
25#define __zero_reg__ r17
26#define __tmp_reg__ r16
27#else
28#define __zero_reg__ r1
29#define __tmp_reg__ r0
30#endif
31#define __SREG__ 0x3f
32#if defined (__AVR_HAVE_SPH__)
33#define __SP_H__ 0x3e
34#endif
35#define __SP_L__ 0x3d
36#define __RAMPZ__ 0x3B
37#define __EIND__  0x3C
38
39/* Most of the functions here are called directly from avr.md
40   patterns, instead of using the standard libcall mechanisms.
41   This can make better code because GCC knows exactly which
42   of the call-used registers (not all of them) are clobbered.  */
43
44/* FIXME:  At present, there is no SORT directive in the linker
45           script so that we must not assume that different modules
46           in the same input section like .libgcc.text.mul will be
47           located close together.  Therefore, we cannot use
48           RCALL/RJMP to call a function like __udivmodhi4 from
49           __divmodhi4 and have to use lengthy XCALL/XJMP even
50           though they are in the same input section and all same
51           input sections together are small enough to reach every
52           location with a RCALL/RJMP instruction.  */
53
54#if defined (__AVR_HAVE_EIJMP_EICALL__) && !defined (__AVR_HAVE_ELPMX__)
55#error device not supported
56#endif
57
58	.macro	mov_l  r_dest, r_src
59#if defined (__AVR_HAVE_MOVW__)
60	movw	\r_dest, \r_src
61#else
62	mov	\r_dest, \r_src
63#endif
64	.endm
65
66	.macro	mov_h  r_dest, r_src
67#if defined (__AVR_HAVE_MOVW__)
68	; empty
69#else
70	mov	\r_dest, \r_src
71#endif
72	.endm
73
74.macro	wmov  r_dest, r_src
75#if defined (__AVR_HAVE_MOVW__)
76    movw \r_dest,   \r_src
77#else
78    mov \r_dest,    \r_src
79    mov \r_dest+1,  \r_src+1
80#endif
81.endm
82
83#if defined (__AVR_HAVE_JMP_CALL__)
84#define XCALL call
85#define XJMP  jmp
86#else
87#define XCALL rcall
88#define XJMP  rjmp
89#endif
90
91#if defined (__AVR_HAVE_EIJMP_EICALL__)
92#define XICALL eicall
93#define XIJMP  eijmp
94#else
95#define XICALL icall
96#define XIJMP  ijmp
97#endif
98
99;; Prologue stuff
100
101.macro do_prologue_saves n_pushed n_frame=0
102    ldi r26, lo8(\n_frame)
103    ldi r27, hi8(\n_frame)
104    ldi r30, lo8(gs(.L_prologue_saves.\@))
105    ldi r31, hi8(gs(.L_prologue_saves.\@))
106    XJMP __prologue_saves__ + ((18 - (\n_pushed)) * 2)
107.L_prologue_saves.\@:
108.endm
109
110;; Epilogue stuff
111
112.macro do_epilogue_restores n_pushed n_frame=0
113    in      r28, __SP_L__
114#ifdef __AVR_HAVE_SPH__
115    in      r29, __SP_H__
116.if \n_frame > 63
117    subi    r28, lo8(-\n_frame)
118    sbci    r29, hi8(-\n_frame)
119.elseif \n_frame > 0
120    adiw    r28, \n_frame
121.endif
122#else
123    clr     r29
124.if \n_frame > 0
125    subi    r28, lo8(-\n_frame)
126.endif
127#endif /* HAVE SPH */
128    ldi     r30, \n_pushed
129    XJMP __epilogue_restores__ + ((18 - (\n_pushed)) * 2)
130.endm
131
132;; Support function entry and exit for convenience
133
134.macro wsubi r_arg1, i_arg2
135#if defined (__AVR_TINY__)
136    subi \r_arg1,   lo8(\i_arg2)
137    sbci \r_arg1+1, hi8(\i_arg2)
138#else
139    sbiw \r_arg1, \i_arg2
140#endif
141.endm
142
143.macro waddi r_arg1, i_arg2
144#if defined (__AVR_TINY__)
145    subi \r_arg1,   lo8(-\i_arg2)
146    sbci \r_arg1+1, hi8(-\i_arg2)
147#else
148    adiw \r_arg1, \i_arg2
149#endif
150.endm
151
152.macro DEFUN name
153.global \name
154.func \name
155\name:
156.endm
157
158.macro ENDF name
159.size \name, .-\name
160.endfunc
161.endm
162
163.macro FALIAS name
164.global \name
165.func \name
166\name:
167.size \name, .-\name
168.endfunc
169.endm
170
171;; Skip next instruction, typically a jump target
172#if defined(__AVR_TINY__)
173#define skip cpse 0,0
174#else
175#define skip cpse 16,16
176#endif
177
178;; Negate a 2-byte value held in consecutive registers
179.macro NEG2  reg
180    com     \reg+1
181    neg     \reg
182    sbci    \reg+1, -1
183.endm
184
185;; Negate a 4-byte value held in consecutive registers
186;; Sets the V flag for signed overflow tests if REG >= 16
187.macro NEG4  reg
188    com     \reg+3
189    com     \reg+2
190    com     \reg+1
191.if \reg >= 16
192    neg     \reg
193    sbci    \reg+1, -1
194    sbci    \reg+2, -1
195    sbci    \reg+3, -1
196.else
197    com     \reg
198    adc     \reg,   __zero_reg__
199    adc     \reg+1, __zero_reg__
200    adc     \reg+2, __zero_reg__
201    adc     \reg+3, __zero_reg__
202.endif
203.endm
204
205#define exp_lo(N)  hlo8 ((N) << 23)
206#define exp_hi(N)  hhi8 ((N) << 23)
207
208
209.section .text.libgcc.mul, "ax", @progbits
210
211;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
212/* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
213#if !defined (__AVR_HAVE_MUL__)
214/*******************************************************
215    Multiplication  8 x 8  without MUL
216*******************************************************/
217#if defined (L_mulqi3)
218
219#define	r_arg2	r22		/* multiplicand */
220#define	r_arg1 	r24		/* multiplier */
221#define r_res	__tmp_reg__	/* result */
222
223DEFUN __mulqi3
224	clr	r_res		; clear result
225__mulqi3_loop:
226	sbrc	r_arg1,0
227	add	r_res,r_arg2
228	add	r_arg2,r_arg2	; shift multiplicand
229	breq	__mulqi3_exit	; while multiplicand != 0
230	lsr	r_arg1		;
231	brne	__mulqi3_loop	; exit if multiplier = 0
232__mulqi3_exit:
233	mov	r_arg1,r_res	; result to return register
234	ret
235ENDF __mulqi3
236
237#undef r_arg2
238#undef r_arg1
239#undef r_res
240
241#endif 	/* defined (L_mulqi3) */
242
243
244/*******************************************************
245    Widening Multiplication  16 = 8 x 8  without MUL
246    Multiplication  16 x 16  without MUL
247*******************************************************/
248
249#define A0  22
250#define A1  23
251#define B0  24
252#define BB0 20
253#define B1  25
254;; Output overlaps input, thus expand result in CC0/1
255#define C0  24
256#define C1  25
257#define CC0  __tmp_reg__
258#define CC1  21
259
260#if defined (L_umulqihi3)
261;;; R25:R24 = (unsigned int) R22 * (unsigned int) R24
262;;; (C1:C0) = (unsigned int) A0  * (unsigned int) B0
263;;; Clobbers: __tmp_reg__, R21..R23
264DEFUN __umulqihi3
265    clr     A1
266    clr     B1
267    XJMP    __mulhi3
268ENDF __umulqihi3
269#endif /* L_umulqihi3 */
270
271#if defined (L_mulqihi3)
272;;; R25:R24 = (signed int) R22 * (signed int) R24
273;;; (C1:C0) = (signed int) A0  * (signed int) B0
274;;; Clobbers: __tmp_reg__, R20..R23
275DEFUN __mulqihi3
276    ;; Sign-extend B0
277    clr     B1
278    sbrc    B0, 7
279    com     B1
280    ;; The multiplication runs twice as fast if A1 is zero, thus:
281    ;; Zero-extend A0
282    clr     A1
283#ifdef __AVR_HAVE_JMP_CALL__
284    ;; Store  B0 * sign of A
285    clr     BB0
286    sbrc    A0, 7
287    mov     BB0, B0
288    call    __mulhi3
289#else /* have no CALL */
290    ;; Skip sign-extension of A if A >= 0
291    ;; Same size as with the first alternative but avoids errata skip
292    ;; and is faster if A >= 0
293    sbrs    A0, 7
294    rjmp    __mulhi3
295    ;; If  A < 0  store B
296    mov     BB0, B0
297    rcall   __mulhi3
298#endif /* HAVE_JMP_CALL */
299    ;; 1-extend A after the multiplication
300    sub     C1, BB0
301    ret
302ENDF __mulqihi3
303#endif /* L_mulqihi3 */
304
305#if defined (L_mulhi3)
306;;; R25:R24 = R23:R22 * R25:R24
307;;; (C1:C0) = (A1:A0) * (B1:B0)
308;;; Clobbers: __tmp_reg__, R21..R23
309DEFUN __mulhi3
310
311    ;; Clear result
312    clr     CC0
313    clr     CC1
314    rjmp 3f
3151:
316    ;; Bit n of A is 1  -->  C += B << n
317    add     CC0, B0
318    adc     CC1, B1
3192:
320    lsl     B0
321    rol     B1
3223:
323    ;; If B == 0 we are ready
324    wsubi   B0, 0
325    breq 9f
326
327    ;; Carry = n-th bit of A
328    lsr     A1
329    ror     A0
330    ;; If bit n of A is set, then go add  B * 2^n  to  C
331    brcs 1b
332
333    ;; Carry = 0  -->  The ROR above acts like  CP A0, 0
334    ;; Thus, it is sufficient to CPC the high part to test A against 0
335    cpc     A1, __zero_reg__
336    ;; Only proceed if A != 0
337    brne    2b
3389:
339    ;; Move Result into place
340    mov     C0, CC0
341    mov     C1, CC1
342    ret
343ENDF  __mulhi3
344#endif /* L_mulhi3 */
345
346#undef A0
347#undef A1
348#undef B0
349#undef BB0
350#undef B1
351#undef C0
352#undef C1
353#undef CC0
354#undef CC1
355
356
357#define A0 22
358#define A1 A0+1
359#define A2 A0+2
360#define A3 A0+3
361
362#define B0 18
363#define B1 B0+1
364#define B2 B0+2
365#define B3 B0+3
366
367#define CC0 26
368#define CC1 CC0+1
369#define CC2 30
370#define CC3 CC2+1
371
372#define C0 22
373#define C1 C0+1
374#define C2 C0+2
375#define C3 C0+3
376
377/*******************************************************
378    Widening Multiplication  32 = 16 x 16  without MUL
379*******************************************************/
380
381#if defined (L_umulhisi3)
382DEFUN __umulhisi3
383    wmov    B0, 24
384    ;; Zero-extend B
385    clr     B2
386    clr     B3
387    ;; Zero-extend A
388    wmov    A2, B2
389    XJMP    __mulsi3
390ENDF __umulhisi3
391#endif /* L_umulhisi3 */
392
393#if defined (L_mulhisi3)
394DEFUN __mulhisi3
395    wmov    B0, 24
396    ;; Sign-extend B
397    lsl     r25
398    sbc     B2, B2
399    mov     B3, B2
400#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
401    ;; Sign-extend A
402    clr     A2
403    sbrc    A1, 7
404    com     A2
405    mov     A3, A2
406    XJMP __mulsi3
407#else /*  no __AVR_ERRATA_SKIP_JMP_CALL__ */
408    ;; Zero-extend A and __mulsi3 will run at least twice as fast
409    ;; compared to a sign-extended A.
410    clr     A2
411    clr     A3
412    sbrs    A1, 7
413    XJMP __mulsi3
414    ;; If  A < 0  then perform the  B * 0xffff.... before the
415    ;; very multiplication by initializing the high part of the
416    ;; result CC with -B.
417    wmov    CC2, A2
418    sub     CC2, B0
419    sbc     CC3, B1
420    XJMP __mulsi3_helper
421#endif /*  __AVR_ERRATA_SKIP_JMP_CALL__ */
422ENDF __mulhisi3
423#endif /* L_mulhisi3 */
424
425
426/*******************************************************
427    Multiplication  32 x 32  without MUL
428*******************************************************/
429
430#if defined (L_mulsi3)
431DEFUN __mulsi3
432#if defined (__AVR_TINY__)
433    in     r26, __SP_L__ ; safe to use X, as it is CC0/CC1
434    in     r27, __SP_H__
435    subi   r26, lo8(-3)   ; Add 3 to point past return address
436    sbci   r27, hi8(-3)
437    push   B0    ; save callee saved regs
438    push   B1
439    ld     B0, X+   ; load from caller stack
440    ld     B1, X+
441    ld     B2, X+
442    ld     B3, X
443#endif
444    ;; Clear result
445    clr     CC2
446    clr     CC3
447    ;; FALLTHRU
448ENDF  __mulsi3
449
450DEFUN __mulsi3_helper
451    clr     CC0
452    clr     CC1
453    rjmp 3f
454
4551:  ;; If bit n of A is set, then add  B * 2^n  to the result in CC
456    ;; CC += B
457    add  CC0,B0  $  adc  CC1,B1  $  adc  CC2,B2  $  adc  CC3,B3
458
4592:  ;; B <<= 1
460    lsl  B0      $  rol  B1      $  rol  B2      $  rol  B3
461
4623:  ;; A >>= 1:  Carry = n-th bit of A
463    lsr  A3      $  ror  A2      $  ror  A1      $  ror  A0
464
465    brcs 1b
466    ;; Only continue if  A != 0
467    sbci    A1, 0
468    brne 2b
469    wsubi   A2, 0
470    brne 2b
471
472    ;; All bits of A are consumed:  Copy result to return register C
473    wmov    C0, CC0
474    wmov    C2, CC2
475#if defined (__AVR_TINY__)
476    pop     B1      ; restore callee saved regs
477    pop     B0
478#endif  /* defined (__AVR_TINY__) */
479
480    ret
481ENDF __mulsi3_helper
482#endif /* L_mulsi3 */
483
484#undef A0
485#undef A1
486#undef A2
487#undef A3
488#undef B0
489#undef B1
490#undef B2
491#undef B3
492#undef C0
493#undef C1
494#undef C2
495#undef C3
496#undef CC0
497#undef CC1
498#undef CC2
499#undef CC3
500
501#endif /* !defined (__AVR_HAVE_MUL__) */
502;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
503
504;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
505#if defined (__AVR_HAVE_MUL__)
506#define A0 26
507#define B0 18
508#define C0 22
509
510#define A1 A0+1
511
512#define B1 B0+1
513#define B2 B0+2
514#define B3 B0+3
515
516#define C1 C0+1
517#define C2 C0+2
518#define C3 C0+3
519
520/*******************************************************
521    Widening Multiplication  32 = 16 x 16  with MUL
522*******************************************************/
523
524#if defined (L_mulhisi3)
525;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
526;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
527;;; Clobbers: __tmp_reg__
528DEFUN __mulhisi3
529    XCALL   __umulhisi3
530    ;; Sign-extend B
531    tst     B1
532    brpl    1f
533    sub     C2, A0
534    sbc     C3, A1
5351:  ;; Sign-extend A
536    XJMP __usmulhisi3_tail
537ENDF __mulhisi3
538#endif /* L_mulhisi3 */
539
540#if defined (L_usmulhisi3)
541;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
542;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
543;;; Clobbers: __tmp_reg__
544DEFUN __usmulhisi3
545    XCALL   __umulhisi3
546    ;; FALLTHRU
547ENDF __usmulhisi3
548
549DEFUN __usmulhisi3_tail
550    ;; Sign-extend A
551    sbrs    A1, 7
552    ret
553    sub     C2, B0
554    sbc     C3, B1
555    ret
556ENDF __usmulhisi3_tail
557#endif /* L_usmulhisi3 */
558
559#if defined (L_umulhisi3)
560;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
561;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
562;;; Clobbers: __tmp_reg__
563DEFUN __umulhisi3
564    mul     A0, B0
565    movw    C0, r0
566    mul     A1, B1
567    movw    C2, r0
568    mul     A0, B1
569#ifdef __AVR_HAVE_JMP_CALL__
570    ;; This function is used by many other routines, often multiple times.
571    ;; Therefore, if the flash size is not too limited, avoid the RCALL
572    ;; and inverst 6 Bytes to speed things up.
573    add     C1, r0
574    adc     C2, r1
575    clr     __zero_reg__
576    adc     C3, __zero_reg__
577#else
578    rcall   1f
579#endif
580    mul     A1, B0
5811:  add     C1, r0
582    adc     C2, r1
583    clr     __zero_reg__
584    adc     C3, __zero_reg__
585    ret
586ENDF __umulhisi3
587#endif /* L_umulhisi3 */
588
589/*******************************************************
590    Widening Multiplication  32 = 16 x 32  with MUL
591*******************************************************/
592
593#if defined (L_mulshisi3)
594;;; R25:R22 = (signed long) R27:R26 * R21:R18
595;;; (C3:C0) = (signed long) A1:A0   * B3:B0
596;;; Clobbers: __tmp_reg__
597DEFUN __mulshisi3
598#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
599    ;; Some cores have problem skipping 2-word instruction
600    tst     A1
601    brmi    __mulohisi3
602#else
603    sbrs    A1, 7
604#endif /* __AVR_HAVE_JMP_CALL__ */
605    XJMP    __muluhisi3
606    ;; FALLTHRU
607ENDF __mulshisi3
608
609;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
610;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
611;;; Clobbers: __tmp_reg__
612DEFUN __mulohisi3
613    XCALL   __muluhisi3
614    ;; One-extend R27:R26 (A1:A0)
615    sub     C2, B0
616    sbc     C3, B1
617    ret
618ENDF __mulohisi3
619#endif /* L_mulshisi3 */
620
621#if defined (L_muluhisi3)
622;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
623;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
624;;; Clobbers: __tmp_reg__
625DEFUN __muluhisi3
626    XCALL   __umulhisi3
627    mul     A0, B3
628    add     C3, r0
629    mul     A1, B2
630    add     C3, r0
631    mul     A0, B2
632    add     C2, r0
633    adc     C3, r1
634    clr     __zero_reg__
635    ret
636ENDF __muluhisi3
637#endif /* L_muluhisi3 */
638
639/*******************************************************
640    Multiplication  32 x 32  with MUL
641*******************************************************/
642
643#if defined (L_mulsi3)
644;;; R25:R22 = R25:R22 * R21:R18
645;;; (C3:C0) = C3:C0   * B3:B0
646;;; Clobbers: R26, R27, __tmp_reg__
647DEFUN __mulsi3
648    movw    A0, C0
649    push    C2
650    push    C3
651    XCALL   __muluhisi3
652    pop     A1
653    pop     A0
654    ;; A1:A0 now contains the high word of A
655    mul     A0, B0
656    add     C2, r0
657    adc     C3, r1
658    mul     A0, B1
659    add     C3, r0
660    mul     A1, B0
661    add     C3, r0
662    clr     __zero_reg__
663    ret
664ENDF __mulsi3
665#endif /* L_mulsi3 */
666
667#undef A0
668#undef A1
669
670#undef B0
671#undef B1
672#undef B2
673#undef B3
674
675#undef C0
676#undef C1
677#undef C2
678#undef C3
679
680#endif /* __AVR_HAVE_MUL__ */
681
682/*******************************************************
683       Multiplication 24 x 24 with MUL
684*******************************************************/
685
686#if defined (L_mulpsi3)
687
688;; A[0..2]: In: Multiplicand; Out: Product
689#define A0  22
690#define A1  A0+1
691#define A2  A0+2
692
693;; B[0..2]: In: Multiplier
694#define B0  18
695#define B1  B0+1
696#define B2  B0+2
697
698#if defined (__AVR_HAVE_MUL__)
699
700;; C[0..2]: Expand Result
701#define C0  22
702#define C1  C0+1
703#define C2  C0+2
704
705;; R24:R22 *= R20:R18
706;; Clobbers: r21, r25, r26, r27, __tmp_reg__
707
708#define AA0 26
709#define AA2 21
710
711DEFUN __mulpsi3
712    wmov    AA0, A0
713    mov     AA2, A2
714    XCALL   __umulhisi3
715    mul     AA2, B0     $  add  C2, r0
716    mul     AA0, B2     $  add  C2, r0
717    clr     __zero_reg__
718    ret
719ENDF __mulpsi3
720
721#undef AA2
722#undef AA0
723
724#undef C2
725#undef C1
726#undef C0
727
728#else /* !HAVE_MUL */
729;; C[0..2]: Expand Result
730#if defined (__AVR_TINY__)
731#define C0  16
732#else
733#define C0  0
734#endif /* defined (__AVR_TINY__) */
735#define C1  C0+1
736#define C2  21
737
738;; R24:R22 *= R20:R18
739;; Clobbers: __tmp_reg__, R18, R19, R20, R21
740
741DEFUN __mulpsi3
742#if defined (__AVR_TINY__)
743    in r26,__SP_L__
744    in r27,__SP_H__
745    subi r26, lo8(-3)   ; Add 3 to point past return address
746    sbci r27, hi8(-3)
747    push B0    ; save callee saved regs
748    push B1
749    ld B0,X+   ; load from caller stack
750    ld B1,X+
751    ld B2,X+
752#endif /* defined (__AVR_TINY__) */
753
754    ;; C[] = 0
755    clr     __tmp_reg__
756    clr     C2
757
7580:  ;; Shift N-th Bit of B[] into Carry.  N = 24 - Loop
759    LSR  B2     $  ror  B1     $  ror  B0
760
761    ;; If the N-th Bit of B[] was set...
762    brcc    1f
763
764    ;; ...then add A[] * 2^N to the Result C[]
765    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2
766
7671:  ;; Multiply A[] by 2
768    LSL  A0     $  rol  A1     $  rol  A2
769
770    ;; Loop until B[] is 0
771    subi B0,0   $  sbci B1,0   $  sbci B2,0
772    brne    0b
773
774    ;; Copy C[] to the return Register A[]
775    wmov    A0, C0
776    mov     A2, C2
777
778    clr     __zero_reg__
779#if defined (__AVR_TINY__)
780    pop B1
781    pop B0
782#endif /* (__AVR_TINY__) */
783    ret
784ENDF __mulpsi3
785
786#undef C2
787#undef C1
788#undef C0
789
790#endif /* HAVE_MUL */
791
792#undef B2
793#undef B1
794#undef B0
795
796#undef A2
797#undef A1
798#undef A0
799
800#endif /* L_mulpsi3 */
801
802#if defined (L_mulsqipsi3) && defined (__AVR_HAVE_MUL__)
803
804;; A[0..2]: In: Multiplicand
805#define A0  22
806#define A1  A0+1
807#define A2  A0+2
808
809;; BB: In: Multiplier
810#define BB  25
811
812;; C[0..2]: Result
813#define C0  18
814#define C1  C0+1
815#define C2  C0+2
816
817;; C[] = A[] * sign_extend (BB)
818DEFUN __mulsqipsi3
819    mul     A0, BB
820    movw    C0, r0
821    mul     A2, BB
822    mov     C2, r0
823    mul     A1, BB
824    add     C1, r0
825    adc     C2, r1
826    clr     __zero_reg__
827    sbrs    BB, 7
828    ret
829    ;; One-extend BB
830    sub     C1, A0
831    sbc     C2, A1
832    ret
833ENDF __mulsqipsi3
834
835#undef C2
836#undef C1
837#undef C0
838
839#undef BB
840
841#undef A2
842#undef A1
843#undef A0
844
845#endif /* L_mulsqipsi3  &&  HAVE_MUL */
846
847/*******************************************************
848       Multiplication 64 x 64
849*******************************************************/
850
851;; A[] = A[] * B[]
852
853;; A[0..7]: In: Multiplicand
854;; Out: Product
855#define A0  18
856#define A1  A0+1
857#define A2  A0+2
858#define A3  A0+3
859#define A4  A0+4
860#define A5  A0+5
861#define A6  A0+6
862#define A7  A0+7
863
864;; B[0..7]: In: Multiplier
865#define B0  10
866#define B1  B0+1
867#define B2  B0+2
868#define B3  B0+3
869#define B4  B0+4
870#define B5  B0+5
871#define B6  B0+6
872#define B7  B0+7
873
874#ifndef __AVR_TINY__
875#if defined (__AVR_HAVE_MUL__)
876;; Define C[] for convenience
877;; Notice that parts of C[] overlap A[] respective B[]
878#define C0  16
879#define C1  C0+1
880#define C2  20
881#define C3  C2+1
882#define C4  28
883#define C5  C4+1
884#define C6  C4+2
885#define C7  C4+3
886
887#if defined (L_muldi3)
888
889;; A[]     *= B[]
890;; R25:R18 *= R17:R10
891;; Ordinary ABI-Function
892
893DEFUN __muldi3
894    push    r29
895    push    r28
896    push    r17
897    push    r16
898
899    ;; Counting in Words, we have to perform a 4 * 4 Multiplication
900
901    ;; 3 * 0  +  0 * 3
902    mul  A7,B0  $             $  mov C7,r0
903    mul  A0,B7  $             $  add C7,r0
904    mul  A6,B1  $             $  add C7,r0
905    mul  A6,B0  $  mov C6,r0  $  add C7,r1
906    mul  B6,A1  $             $  add C7,r0
907    mul  B6,A0  $  add C6,r0  $  adc C7,r1
908
909    ;; 1 * 2
910    mul  A2,B4  $  add C6,r0  $  adc C7,r1
911    mul  A3,B4  $             $  add C7,r0
912    mul  A2,B5  $             $  add C7,r0
913
914    push    A5
915    push    A4
916    push    B1
917    push    B0
918    push    A3
919    push    A2
920
921    ;; 0 * 0
922    wmov    26, B0
923    XCALL   __umulhisi3
924    wmov    C0, 22
925    wmov    C2, 24
926
927    ;; 0 * 2
928    wmov    26, B4
929    XCALL   __umulhisi3  $  wmov C4,22            $ add C6,24 $ adc C7,25
930
931    wmov    26, B2
932    ;; 0 * 1
933    XCALL   __muldi3_6
934
935    pop     A0
936    pop     A1
937    ;; 1 * 1
938    wmov    26, B2
939    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
940
941    pop     r26
942    pop     r27
943    ;; 1 * 0
944    XCALL   __muldi3_6
945
946    pop     A0
947    pop     A1
948    ;; 2 * 0
949    XCALL   __umulhisi3  $  add C4,22 $ adc C5,23 $ adc C6,24 $ adc C7,25
950
951    ;; 2 * 1
952    wmov    26, B2
953    XCALL   __umulhisi3  $            $           $ add C6,22 $ adc C7,23
954
955    ;; A[] = C[]
956    wmov    A0, C0
957    ;; A2 = C2 already
958    wmov    A4, C4
959    wmov    A6, C6
960
961    pop     r16
962    pop     r17
963    pop     r28
964    pop     r29
965    ret
966ENDF __muldi3
967#endif /* L_muldi3 */
968
969#if defined (L_muldi3_6)
970;; A helper for some 64-bit multiplications with MUL available
971DEFUN __muldi3_6
972__muldi3_6:
973    XCALL   __umulhisi3
974    add     C2, 22
975    adc     C3, 23
976    adc     C4, 24
977    adc     C5, 25
978    brcc    0f
979    adiw    C6, 1
9800:  ret
981ENDF __muldi3_6
982#endif /* L_muldi3_6 */
983
984#undef C7
985#undef C6
986#undef C5
987#undef C4
988#undef C3
989#undef C2
990#undef C1
991#undef C0
992
993#else /* !HAVE_MUL */
994
995#if defined (L_muldi3)
996
997#define C0  26
998#define C1  C0+1
999#define C2  C0+2
1000#define C3  C0+3
1001#define C4  C0+4
1002#define C5  C0+5
1003#define C6  0
1004#define C7  C6+1
1005
1006#define Loop 9
1007
1008;; A[]     *= B[]
1009;; R25:R18 *= R17:R10
1010;; Ordinary ABI-Function
1011
1012DEFUN __muldi3
1013    push    r29
1014    push    r28
1015    push    Loop
1016
1017    ldi     C0, 64
1018    mov     Loop, C0
1019
1020    ;; C[] = 0
1021    clr     __tmp_reg__
1022    wmov    C0, 0
1023    wmov    C2, 0
1024    wmov    C4, 0
1025
10260:  ;; Rotate B[] right by 1 and set Carry to the N-th Bit of B[]
1027    ;; where N = 64 - Loop.
1028    ;; Notice that B[] = B[] >>> 64 so after this Routine has finished,
1029    ;; B[] will have its initial Value again.
1030    LSR  B7     $  ror  B6     $  ror  B5     $  ror  B4
1031    ror  B3     $  ror  B2     $  ror  B1     $  ror  B0
1032
1033    ;; If the N-th Bit of B[] was set then...
1034    brcc    1f
1035    ;; ...finish Rotation...
1036    ori     B7, 1 << 7
1037
1038    ;; ...and add A[] * 2^N to the Result C[]
1039    ADD  C0,A0  $  adc  C1,A1  $  adc  C2,A2  $  adc  C3,A3
1040    adc  C4,A4  $  adc  C5,A5  $  adc  C6,A6  $  adc  C7,A7
1041
10421:  ;; Multiply A[] by 2
1043    LSL  A0     $  rol  A1     $  rol  A2     $  rol  A3
1044    rol  A4     $  rol  A5     $  rol  A6     $  rol  A7
1045
1046    dec     Loop
1047    brne    0b
1048
1049    ;; We expanded the Result in C[]
1050    ;; Copy Result to the Return Register A[]
1051    wmov    A0, C0
1052    wmov    A2, C2
1053    wmov    A4, C4
1054    wmov    A6, C6
1055
1056    clr     __zero_reg__
1057    pop     Loop
1058    pop     r28
1059    pop     r29
1060    ret
1061ENDF __muldi3
1062
1063#undef Loop
1064
1065#undef C7
1066#undef C6
1067#undef C5
1068#undef C4
1069#undef C3
1070#undef C2
1071#undef C1
1072#undef C0
1073
1074#endif /* L_muldi3 */
1075#endif /* HAVE_MUL */
1076#endif /* if not __AVR_TINY__ */
1077
1078#undef B7
1079#undef B6
1080#undef B5
1081#undef B4
1082#undef B3
1083#undef B2
1084#undef B1
1085#undef B0
1086
1087#undef A7
1088#undef A6
1089#undef A5
1090#undef A4
1091#undef A3
1092#undef A2
1093#undef A1
1094#undef A0
1095
1096/*******************************************************
1097   Widening Multiplication 64 = 32 x 32  with  MUL
1098*******************************************************/
1099
1100#if defined (__AVR_HAVE_MUL__)
1101#define A0 r22
1102#define A1 r23
1103#define A2 r24
1104#define A3 r25
1105
1106#define B0 r18
1107#define B1 r19
1108#define B2 r20
1109#define B3 r21
1110
1111#define C0  18
1112#define C1  C0+1
1113#define C2  20
1114#define C3  C2+1
1115#define C4  28
1116#define C5  C4+1
1117#define C6  C4+2
1118#define C7  C4+3
1119
1120#if defined (L_umulsidi3)
1121
1122;; Unsigned widening 64 = 32 * 32 Multiplication with MUL
1123
1124;; R18[8] = R22[4] * R18[4]
1125;;
1126;; Ordinary ABI Function, but additionally sets
1127;; X = R20[2] = B2[2]
1128;; Z = R22[2] = A0[2]
1129DEFUN __umulsidi3
1130    clt
1131    ;; FALLTHRU
1132ENDF  __umulsidi3
1133    ;; T = sign (A)
1134DEFUN __umulsidi3_helper
1135    push    29  $  push    28 ; Y
1136    wmov    30, A2
1137    ;; Counting in Words, we have to perform 4 Multiplications
1138    ;; 0 * 0
1139    wmov    26, A0
1140    XCALL __umulhisi3
1141    push    23  $  push    22 ; C0
1142    wmov    28, B0
1143    wmov    18, B2
1144    wmov    C2, 24
1145    push    27  $  push    26 ; A0
1146    push    19  $  push    18 ; B2
1147    ;;
1148    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1149    ;;  B2  C2  --  --  --  B0  A2
1150    ;; 1 * 1
1151    wmov    26, 30      ; A2
1152    XCALL __umulhisi3
1153    ;; Sign-extend A.  T holds the sign of A
1154    brtc    0f
1155    ;; Subtract B from the high part of the result
1156    sub     22, 28
1157    sbc     23, 29
1158    sbc     24, 18
1159    sbc     25, 19
11600:  wmov    18, 28      ;; B0
1161    wmov    C4, 22
1162    wmov    C6, 24
1163    ;;
1164    ;;  18  20  22  24  26  28  30  |  B2, B3, A0, A1, C0, C1, Y
1165    ;;  B0  C2  --  --  A2  C4  C6
1166    ;;
1167    ;; 1 * 0
1168    XCALL __muldi3_6
1169    ;; 0 * 1
1170    pop     26  $   pop 27  ;; B2
1171    pop     18  $   pop 19  ;; A0
1172    XCALL __muldi3_6
1173
1174    ;; Move result C into place and save A0 in Z
1175    wmov    22, C4
1176    wmov    24, C6
1177    wmov    30, 18 ; A0
1178    pop     C0  $   pop C1
1179
1180    ;; Epilogue
1181    pop     28  $   pop 29  ;; Y
1182    ret
1183ENDF __umulsidi3_helper
1184#endif /* L_umulsidi3 */
1185
1186
1187#if defined (L_mulsidi3)
1188
1189;; Signed widening 64 = 32 * 32 Multiplication
1190;;
1191;; R18[8] = R22[4] * R18[4]
1192;; Ordinary ABI Function
1193DEFUN __mulsidi3
1194    bst     A3, 7
1195    sbrs    B3, 7           ; Enhanced core has no skip bug
1196    XJMP __umulsidi3_helper
1197
1198    ;; B needs sign-extension
1199    push    A3
1200    push    A2
1201    XCALL __umulsidi3_helper
1202    ;; A0 survived in Z
1203    sub     r22, r30
1204    sbc     r23, r31
1205    pop     r26
1206    pop     r27
1207    sbc     r24, r26
1208    sbc     r25, r27
1209    ret
1210ENDF __mulsidi3
1211#endif /* L_mulsidi3 */
1212
1213#undef A0
1214#undef A1
1215#undef A2
1216#undef A3
1217#undef B0
1218#undef B1
1219#undef B2
1220#undef B3
1221#undef C0
1222#undef C1
1223#undef C2
1224#undef C3
1225#undef C4
1226#undef C5
1227#undef C6
1228#undef C7
1229#endif /* HAVE_MUL */
1230
1231/**********************************************************
1232    Widening Multiplication 64 = 32 x 32  without  MUL
1233**********************************************************/
1234#ifndef __AVR_TINY__ /* if not __AVR_TINY__ */
1235#if defined (L_mulsidi3) && !defined (__AVR_HAVE_MUL__)
1236#define A0 18
1237#define A1 A0+1
1238#define A2 A0+2
1239#define A3 A0+3
1240#define A4 A0+4
1241#define A5 A0+5
1242#define A6 A0+6
1243#define A7 A0+7
1244
1245#define B0 10
1246#define B1 B0+1
1247#define B2 B0+2
1248#define B3 B0+3
1249#define B4 B0+4
1250#define B5 B0+5
1251#define B6 B0+6
1252#define B7 B0+7
1253
1254#define AA0 22
1255#define AA1 AA0+1
1256#define AA2 AA0+2
1257#define AA3 AA0+3
1258
1259#define BB0 18
1260#define BB1 BB0+1
1261#define BB2 BB0+2
1262#define BB3 BB0+3
1263
1264#define Mask r30
1265
1266;; Signed / Unsigned widening 64 = 32 * 32 Multiplication without MUL
1267;;
1268;; R18[8] = R22[4] * R18[4]
1269;; Ordinary ABI Function
1270DEFUN __mulsidi3
1271    set
1272    skip
1273    ;; FALLTHRU
1274ENDF  __mulsidi3
1275
1276DEFUN __umulsidi3
1277    clt     ; skipped
1278    ;; Save 10 Registers: R10..R17, R28, R29
1279    do_prologue_saves 10
1280    ldi     Mask, 0xff
1281    bld     Mask, 7
1282    ;; Move B into place...
1283    wmov    B0, BB0
1284    wmov    B2, BB2
1285    ;; ...and extend it
1286    and     BB3, Mask
1287    lsl     BB3
1288    sbc     B4, B4
1289    mov     B5, B4
1290    wmov    B6, B4
1291    ;; Move A into place...
1292    wmov    A0, AA0
1293    wmov    A2, AA2
1294    ;; ...and extend it
1295    and     AA3, Mask
1296    lsl     AA3
1297    sbc     A4, A4
1298    mov     A5, A4
1299    wmov    A6, A4
1300    XCALL   __muldi3
1301    do_epilogue_restores 10
1302ENDF __umulsidi3
1303
1304#undef A0
1305#undef A1
1306#undef A2
1307#undef A3
1308#undef A4
1309#undef A5
1310#undef A6
1311#undef A7
1312#undef B0
1313#undef B1
1314#undef B2
1315#undef B3
1316#undef B4
1317#undef B5
1318#undef B6
1319#undef B7
1320#undef AA0
1321#undef AA1
1322#undef AA2
1323#undef AA3
1324#undef BB0
1325#undef BB1
1326#undef BB2
1327#undef BB3
1328#undef Mask
1329#endif /* L_mulsidi3 && !HAVE_MUL */
1330#endif /* if not __AVR_TINY__ */
1331;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1332
1333
1334.section .text.libgcc.div, "ax", @progbits
1335
1336/*******************************************************
1337       Division 8 / 8 => (result + remainder)
1338*******************************************************/
1339#define	r_rem	r25	/* remainder */
1340#define	r_arg1	r24	/* dividend, quotient */
1341#define	r_arg2	r22	/* divisor */
1342#define	r_cnt	r23	/* loop count */
1343
1344#if defined (L_udivmodqi4)
1345DEFUN __udivmodqi4
1346	sub	r_rem,r_rem	; clear remainder and carry
1347	ldi	r_cnt,9		; init loop counter
1348	rjmp	__udivmodqi4_ep	; jump to entry point
1349__udivmodqi4_loop:
1350	rol	r_rem		; shift dividend into remainder
1351	cp	r_rem,r_arg2	; compare remainder & divisor
1352	brcs	__udivmodqi4_ep	; remainder <= divisor
1353	sub	r_rem,r_arg2	; restore remainder
1354__udivmodqi4_ep:
1355	rol	r_arg1		; shift dividend (with CARRY)
1356	dec	r_cnt		; decrement loop counter
1357	brne	__udivmodqi4_loop
1358	com	r_arg1		; complement result
1359				; because C flag was complemented in loop
1360	ret
1361ENDF __udivmodqi4
1362#endif /* defined (L_udivmodqi4) */
1363
1364#if defined (L_divmodqi4)
1365DEFUN __divmodqi4
1366        bst     r_arg1,7	; store sign of dividend
1367        mov     __tmp_reg__,r_arg1
1368        eor     __tmp_reg__,r_arg2; r0.7 is sign of result
1369        sbrc	r_arg1,7
1370	neg     r_arg1		; dividend negative : negate
1371        sbrc	r_arg2,7
1372	neg     r_arg2		; divisor negative : negate
1373	XCALL	__udivmodqi4	; do the unsigned div/mod
1374	brtc	__divmodqi4_1
1375	neg	r_rem		; correct remainder sign
1376__divmodqi4_1:
1377	sbrc	__tmp_reg__,7
1378	neg	r_arg1		; correct result sign
1379__divmodqi4_exit:
1380	ret
1381ENDF __divmodqi4
1382#endif /* defined (L_divmodqi4) */
1383
1384#undef r_rem
1385#undef r_arg1
1386#undef r_arg2
1387#undef r_cnt
1388
1389
1390/*******************************************************
1391       Division 16 / 16 => (result + remainder)
1392*******************************************************/
1393#define	r_remL	r26	/* remainder Low */
1394#define	r_remH	r27	/* remainder High */
1395
1396/* return: remainder */
1397#define	r_arg1L	r24	/* dividend Low */
1398#define	r_arg1H	r25	/* dividend High */
1399
1400/* return: quotient */
1401#define	r_arg2L	r22	/* divisor Low */
1402#define	r_arg2H	r23	/* divisor High */
1403
1404#define	r_cnt	r21	/* loop count */
1405
1406#if defined (L_udivmodhi4)
1407DEFUN __udivmodhi4
1408	sub	r_remL,r_remL
1409	sub	r_remH,r_remH	; clear remainder and carry
1410	ldi	r_cnt,17	; init loop counter
1411	rjmp	__udivmodhi4_ep	; jump to entry point
1412__udivmodhi4_loop:
1413        rol	r_remL		; shift dividend into remainder
1414	rol	r_remH
1415        cp	r_remL,r_arg2L	; compare remainder & divisor
1416	cpc	r_remH,r_arg2H
1417        brcs	__udivmodhi4_ep	; remainder < divisor
1418        sub	r_remL,r_arg2L	; restore remainder
1419        sbc	r_remH,r_arg2H
1420__udivmodhi4_ep:
1421        rol	r_arg1L		; shift dividend (with CARRY)
1422        rol	r_arg1H
1423        dec	r_cnt		; decrement loop counter
1424        brne	__udivmodhi4_loop
1425	com	r_arg1L
1426	com	r_arg1H
1427; div/mod results to return registers, as for the div() function
1428	mov_l	r_arg2L, r_arg1L	; quotient
1429	mov_h	r_arg2H, r_arg1H
1430	mov_l	r_arg1L, r_remL		; remainder
1431	mov_h	r_arg1H, r_remH
1432	ret
1433ENDF __udivmodhi4
1434#endif /* defined (L_udivmodhi4) */
1435
1436#if defined (L_divmodhi4)
1437DEFUN __divmodhi4
1438    .global _div
1439_div:
1440    bst     r_arg1H,7           ; store sign of dividend
1441    mov     __tmp_reg__,r_arg2H
1442    brtc    0f
1443    com     __tmp_reg__         ; r0.7 is sign of result
1444    rcall   __divmodhi4_neg1    ; dividend negative: negate
14450:
1446    sbrc    r_arg2H,7
1447    rcall   __divmodhi4_neg2    ; divisor negative: negate
1448    XCALL   __udivmodhi4        ; do the unsigned div/mod
1449    sbrc    __tmp_reg__,7
1450    rcall   __divmodhi4_neg2    ; correct remainder sign
1451    brtc    __divmodhi4_exit
1452__divmodhi4_neg1:
1453    ;; correct dividend/remainder sign
1454    com     r_arg1H
1455    neg     r_arg1L
1456    sbci    r_arg1H,0xff
1457    ret
1458__divmodhi4_neg2:
1459    ;; correct divisor/result sign
1460    com     r_arg2H
1461    neg     r_arg2L
1462    sbci    r_arg2H,0xff
1463__divmodhi4_exit:
1464    ret
1465ENDF __divmodhi4
1466#endif /* defined (L_divmodhi4) */
1467
1468#undef r_remH
1469#undef r_remL
1470
1471#undef r_arg1H
1472#undef r_arg1L
1473
1474#undef r_arg2H
1475#undef r_arg2L
1476
1477#undef r_cnt
1478
1479/*******************************************************
1480       Division 24 / 24 => (result + remainder)
1481*******************************************************/
1482
1483;; A[0..2]: In: Dividend; Out: Quotient
1484#define A0  22
1485#define A1  A0+1
1486#define A2  A0+2
1487
1488;; B[0..2]: In: Divisor;   Out: Remainder
1489#define B0  18
1490#define B1  B0+1
1491#define B2  B0+2
1492
1493;; C[0..2]: Expand remainder
1494#define C0  __zero_reg__
1495#define C1  26
1496#define C2  25
1497
1498;; Loop counter
1499#define r_cnt   21
1500
1501#if defined (L_udivmodpsi4)
1502;; R24:R22 = R24:R24  udiv  R20:R18
1503;; R20:R18 = R24:R22  umod  R20:R18
1504;; Clobbers: R21, R25, R26
1505
1506DEFUN __udivmodpsi4
1507    ; init loop counter
1508    ldi     r_cnt, 24+1
1509    ; Clear remainder and carry.  C0 is already 0
1510    clr     C1
1511    sub     C2, C2
1512    ; jump to entry point
1513    rjmp    __udivmodpsi4_start
1514__udivmodpsi4_loop:
1515    ; shift dividend into remainder
1516    rol     C0
1517    rol     C1
1518    rol     C2
1519    ; compare remainder & divisor
1520    cp      C0, B0
1521    cpc     C1, B1
1522    cpc     C2, B2
1523    brcs    __udivmodpsi4_start ; remainder <= divisor
1524    sub     C0, B0              ; restore remainder
1525    sbc     C1, B1
1526    sbc     C2, B2
1527__udivmodpsi4_start:
1528    ; shift dividend (with CARRY)
1529    rol     A0
1530    rol     A1
1531    rol     A2
1532    ; decrement loop counter
1533    dec     r_cnt
1534    brne    __udivmodpsi4_loop
1535    com     A0
1536    com     A1
1537    com     A2
1538    ; div/mod results to return registers
1539    ; remainder
1540    mov     B0, C0
1541    mov     B1, C1
1542    mov     B2, C2
1543    clr     __zero_reg__ ; C0
1544    ret
1545ENDF __udivmodpsi4
1546#endif /* defined (L_udivmodpsi4) */
1547
1548#if defined (L_divmodpsi4)
1549;; R24:R22 = R24:R22  div  R20:R18
1550;; R20:R18 = R24:R22  mod  R20:R18
1551;; Clobbers: T, __tmp_reg__, R21, R25, R26
1552
1553DEFUN __divmodpsi4
1554    ; R0.7 will contain the sign of the result:
1555    ; R0.7 = A.sign ^ B.sign
1556    mov __tmp_reg__, B2
1557    ; T-flag = sign of dividend
1558    bst     A2, 7
1559    brtc    0f
1560    com     __tmp_reg__
1561    ; Adjust dividend's sign
1562    rcall   __divmodpsi4_negA
15630:
1564    ; Adjust divisor's sign
1565    sbrc    B2, 7
1566    rcall   __divmodpsi4_negB
1567
1568    ; Do the unsigned div/mod
1569    XCALL   __udivmodpsi4
1570
1571    ; Adjust quotient's sign
1572    sbrc    __tmp_reg__, 7
1573    rcall   __divmodpsi4_negA
1574
1575    ; Adjust remainder's sign
1576    brtc    __divmodpsi4_end
1577
1578__divmodpsi4_negB:
1579    ; Correct divisor/remainder sign
1580    com     B2
1581    com     B1
1582    neg     B0
1583    sbci    B1, -1
1584    sbci    B2, -1
1585    ret
1586
1587    ; Correct dividend/quotient sign
1588__divmodpsi4_negA:
1589    com     A2
1590    com     A1
1591    neg     A0
1592    sbci    A1, -1
1593    sbci    A2, -1
1594__divmodpsi4_end:
1595    ret
1596
1597ENDF __divmodpsi4
1598#endif /* defined (L_divmodpsi4) */
1599
1600#undef A0
1601#undef A1
1602#undef A2
1603
1604#undef B0
1605#undef B1
1606#undef B2
1607
1608#undef C0
1609#undef C1
1610#undef C2
1611
1612#undef r_cnt
1613
1614/*******************************************************
1615       Division 32 / 32 => (result + remainder)
1616*******************************************************/
1617#define	r_remHH	r31	/* remainder High */
1618#define	r_remHL	r30
1619#define	r_remH	r27
1620#define	r_remL	r26	/* remainder Low */
1621
1622/* return: remainder */
1623#define	r_arg1HH r25	/* dividend High */
1624#define	r_arg1HL r24
1625#define	r_arg1H  r23
1626#define	r_arg1L  r22	/* dividend Low */
1627
1628/* return: quotient */
1629#define	r_arg2HH r21	/* divisor High */
1630#define	r_arg2HL r20
1631#define	r_arg2H  r19
1632#define	r_arg2L  r18	/* divisor Low */
1633
1634#define	r_cnt __zero_reg__  /* loop count (0 after the loop!) */
1635
1636#if defined (L_udivmodsi4)
1637DEFUN __udivmodsi4
1638	ldi	r_remL, 33	; init loop counter
1639	mov	r_cnt, r_remL
1640	sub	r_remL,r_remL
1641	sub	r_remH,r_remH	; clear remainder and carry
1642	mov_l	r_remHL, r_remL
1643	mov_h	r_remHH, r_remH
1644	rjmp	__udivmodsi4_ep	; jump to entry point
1645__udivmodsi4_loop:
1646        rol	r_remL		; shift dividend into remainder
1647	rol	r_remH
1648	rol	r_remHL
1649	rol	r_remHH
1650        cp	r_remL,r_arg2L	; compare remainder & divisor
1651	cpc	r_remH,r_arg2H
1652	cpc	r_remHL,r_arg2HL
1653	cpc	r_remHH,r_arg2HH
1654	brcs	__udivmodsi4_ep	; remainder <= divisor
1655        sub	r_remL,r_arg2L	; restore remainder
1656        sbc	r_remH,r_arg2H
1657        sbc	r_remHL,r_arg2HL
1658        sbc	r_remHH,r_arg2HH
1659__udivmodsi4_ep:
1660        rol	r_arg1L		; shift dividend (with CARRY)
1661        rol	r_arg1H
1662        rol	r_arg1HL
1663        rol	r_arg1HH
1664        dec	r_cnt		; decrement loop counter
1665        brne	__udivmodsi4_loop
1666				; __zero_reg__ now restored (r_cnt == 0)
1667	com	r_arg1L
1668	com	r_arg1H
1669	com	r_arg1HL
1670	com	r_arg1HH
1671; div/mod results to return registers, as for the ldiv() function
1672	mov_l	r_arg2L,  r_arg1L	; quotient
1673	mov_h	r_arg2H,  r_arg1H
1674	mov_l	r_arg2HL, r_arg1HL
1675	mov_h	r_arg2HH, r_arg1HH
1676	mov_l	r_arg1L,  r_remL	; remainder
1677	mov_h	r_arg1H,  r_remH
1678	mov_l	r_arg1HL, r_remHL
1679	mov_h	r_arg1HH, r_remHH
1680	ret
1681ENDF __udivmodsi4
1682#endif /* defined (L_udivmodsi4) */
1683
1684#if defined (L_divmodsi4)
1685DEFUN __divmodsi4
1686    mov     __tmp_reg__,r_arg2HH
1687    bst     r_arg1HH,7          ; store sign of dividend
1688    brtc    0f
1689    com     __tmp_reg__         ; r0.7 is sign of result
1690    XCALL   __negsi2            ; dividend negative: negate
16910:
1692    sbrc    r_arg2HH,7
1693    rcall   __divmodsi4_neg2    ; divisor negative: negate
1694    XCALL   __udivmodsi4        ; do the unsigned div/mod
1695    sbrc    __tmp_reg__, 7      ; correct quotient sign
1696    rcall   __divmodsi4_neg2
1697    brtc    __divmodsi4_exit    ; correct remainder sign
1698    XJMP    __negsi2
1699__divmodsi4_neg2:
1700    ;; correct divisor/quotient sign
1701    com     r_arg2HH
1702    com     r_arg2HL
1703    com     r_arg2H
1704    neg     r_arg2L
1705    sbci    r_arg2H,0xff
1706    sbci    r_arg2HL,0xff
1707    sbci    r_arg2HH,0xff
1708__divmodsi4_exit:
1709    ret
1710ENDF __divmodsi4
1711#endif /* defined (L_divmodsi4) */
1712
1713#if defined (L_negsi2)
1714;; (set (reg:SI 22)
1715;;      (neg:SI (reg:SI 22)))
1716;; Sets the V flag for signed overflow tests
1717DEFUN __negsi2
1718    NEG4    22
1719    ret
1720ENDF __negsi2
1721#endif /* L_negsi2 */
1722
1723#undef r_remHH
1724#undef r_remHL
1725#undef r_remH
1726#undef r_remL
1727#undef r_arg1HH
1728#undef r_arg1HL
1729#undef r_arg1H
1730#undef r_arg1L
1731#undef r_arg2HH
1732#undef r_arg2HL
1733#undef r_arg2H
1734#undef r_arg2L
1735#undef r_cnt
1736
1737/* *di routines use registers below R19 and won't work with tiny arch
1738   right now. */
1739
1740#if !defined (__AVR_TINY__)
1741/*******************************************************
1742       Division 64 / 64
1743       Modulo   64 % 64
1744*******************************************************/
1745
1746;; Use Speed-optimized Version on "big" Devices, i.e. Devices with
1747;; at least 16k of Program Memory.  For smaller Devices, depend
1748;; on MOVW and SP Size.  There is a Connexion between SP Size and
1749;; Flash Size so that SP Size can be used to test for Flash Size.
1750
1751#if defined (__AVR_HAVE_JMP_CALL__)
1752#   define SPEED_DIV 8
1753#elif defined (__AVR_HAVE_MOVW__) && defined (__AVR_HAVE_SPH__)
1754#   define SPEED_DIV 16
1755#else
1756#   define SPEED_DIV 0
1757#endif
1758
1759;; A[0..7]: In: Dividend;
1760;; Out: Quotient  (T = 0)
1761;; Out: Remainder (T = 1)
1762#define A0  18
1763#define A1  A0+1
1764#define A2  A0+2
1765#define A3  A0+3
1766#define A4  A0+4
1767#define A5  A0+5
1768#define A6  A0+6
1769#define A7  A0+7
1770
1771;; B[0..7]: In: Divisor;   Out: Clobber
1772#define B0  10
1773#define B1  B0+1
1774#define B2  B0+2
1775#define B3  B0+3
1776#define B4  B0+4
1777#define B5  B0+5
1778#define B6  B0+6
1779#define B7  B0+7
1780
1781;; C[0..7]: Expand remainder;  Out: Remainder (unused)
1782#define C0  8
1783#define C1  C0+1
1784#define C2  30
1785#define C3  C2+1
1786#define C4  28
1787#define C5  C4+1
1788#define C6  26
1789#define C7  C6+1
1790
1791;; Holds Signs during Division Routine
1792#define SS      __tmp_reg__
1793
1794;; Bit-Counter in Division Routine
1795#define R_cnt   __zero_reg__
1796
1797;; Scratch Register for Negation
1798#define NN      r31
1799
1800#if defined (L_udivdi3)
1801
1802;; R25:R18 = R24:R18  umod  R17:R10
1803;; Ordinary ABI-Function
1804
1805DEFUN __umoddi3
1806    set
1807    rjmp __udivdi3_umoddi3
1808ENDF __umoddi3
1809
1810;; R25:R18 = R24:R18  udiv  R17:R10
1811;; Ordinary ABI-Function
1812
1813DEFUN __udivdi3
1814    clt
1815ENDF __udivdi3
1816
1817DEFUN __udivdi3_umoddi3
1818    push    C0
1819    push    C1
1820    push    C4
1821    push    C5
1822    XCALL   __udivmod64
1823    pop     C5
1824    pop     C4
1825    pop     C1
1826    pop     C0
1827    ret
1828ENDF __udivdi3_umoddi3
1829#endif /* L_udivdi3 */
1830
1831#if defined (L_udivmod64)
1832
1833;; Worker Routine for 64-Bit unsigned Quotient and Remainder Computation
1834;; No Registers saved/restored; the Callers will take Care.
1835;; Preserves B[] and T-flag
1836;; T = 0: Compute Quotient  in A[]
1837;; T = 1: Compute Remainder in A[] and shift SS one Bit left
1838
1839DEFUN __udivmod64
1840
1841    ;; Clear Remainder (C6, C7 will follow)
1842    clr     C0
1843    clr     C1
1844    wmov    C2, C0
1845    wmov    C4, C0
1846    ldi     C7, 64
1847
1848#if SPEED_DIV == 0 || SPEED_DIV == 16
1849    ;; Initialize Loop-Counter
1850    mov     R_cnt, C7
1851    wmov    C6, C0
1852#endif /* SPEED_DIV */
1853
1854#if SPEED_DIV == 8
1855
1856    push    A7
1857    clr     C6
1858
18591:  ;; Compare shifted Devidend against Divisor
1860    ;; If -- even after Shifting -- it is smaller...
1861    CP  A7,B0  $  cpc C0,B1  $  cpc C1,B2  $  cpc C2,B3
1862    cpc C3,B4  $  cpc C4,B5  $  cpc C5,B6  $  cpc C6,B7
1863    brcc    2f
1864
1865    ;; ...then we can subtract it.  Thus, it is legal to shift left
1866               $  mov C6,C5  $  mov C5,C4  $  mov C4,C3
1867    mov C3,C2  $  mov C2,C1  $  mov C1,C0  $  mov C0,A7
1868    mov A7,A6  $  mov A6,A5  $  mov A5,A4  $  mov A4,A3
1869    mov A3,A2  $  mov A2,A1  $  mov A1,A0  $  clr A0
1870
1871    ;; 8 Bits are done
1872    subi    C7, 8
1873    brne    1b
1874
1875    ;; Shifted 64 Bits:  A7 has traveled to C7
1876    pop     C7
1877    ;; Divisor is greater than Dividend. We have:
1878    ;; A[] % B[] = A[]
1879    ;; A[] / B[] = 0
1880    ;; Thus, we can return immediately
1881    rjmp    5f
1882
18832:  ;; Initialze Bit-Counter with Number of Bits still to be performed
1884    mov     R_cnt, C7
1885
1886    ;; Push of A7 is not needed because C7 is still 0
1887    pop     C7
1888    clr     C7
1889
1890#elif  SPEED_DIV == 16
1891
1892    ;; Compare shifted Dividend against Divisor
1893    cp      A7, B3
1894    cpc     C0, B4
1895    cpc     C1, B5
1896    cpc     C2, B6
1897    cpc     C3, B7
1898    brcc    2f
1899
1900    ;; Divisor is greater than shifted Dividen: We can shift the Dividend
1901    ;; and it is still smaller than the Divisor --> Shift one 32-Bit Chunk
1902    wmov  C2,A6  $  wmov C0,A4
1903    wmov  A6,A2  $  wmov A4,A0
1904    wmov  A2,C6  $  wmov A0,C4
1905
1906    ;; Set Bit Counter to 32
1907    lsr     R_cnt
19082:
1909#elif SPEED_DIV
1910#error SPEED_DIV = ?
1911#endif /* SPEED_DIV */
1912
1913;; The very Division + Remainder Routine
1914
19153:  ;; Left-shift Dividend...
1916    lsl A0     $  rol A1     $  rol A2     $  rol A3
1917    rol A4     $  rol A5     $  rol A6     $  rol A7
1918
1919    ;; ...into Remainder
1920    rol C0     $  rol C1     $  rol C2     $  rol C3
1921    rol C4     $  rol C5     $  rol C6     $  rol C7
1922
1923    ;; Compare Remainder and Divisor
1924    CP  C0,B0  $  cpc C1,B1  $  cpc C2,B2  $  cpc C3,B3
1925    cpc C4,B4  $  cpc C5,B5  $  cpc C6,B6  $  cpc C7,B7
1926
1927    brcs 4f
1928
1929    ;; Divisor fits into Remainder:  Subtract it from Remainder...
1930    SUB C0,B0  $  sbc C1,B1  $  sbc C2,B2  $  sbc C3,B3
1931    sbc C4,B4  $  sbc C5,B5  $  sbc C6,B6  $  sbc C7,B7
1932
1933    ;; ...and set according Bit in the upcoming Quotient
1934    ;; The Bit will travel to its final Position
1935    ori A0, 1
1936
19374:  ;; This Bit is done
1938    dec     R_cnt
1939    brne    3b
1940    ;; __zero_reg__ is 0 again
1941
1942    ;; T = 0: We are fine with the Quotient in A[]
1943    ;; T = 1: Copy Remainder to A[]
19445:  brtc    6f
1945    wmov    A0, C0
1946    wmov    A2, C2
1947    wmov    A4, C4
1948    wmov    A6, C6
1949    ;; Move the Sign of the Result to SS.7
1950    lsl     SS
1951
19526:  ret
1953
1954ENDF __udivmod64
1955#endif /* L_udivmod64 */
1956
1957
1958#if defined (L_divdi3)
1959
1960;; R25:R18 = R24:R18  mod  R17:R10
1961;; Ordinary ABI-Function
1962
1963DEFUN __moddi3
1964    set
1965    rjmp    __divdi3_moddi3
1966ENDF __moddi3
1967
1968;; R25:R18 = R24:R18  div  R17:R10
1969;; Ordinary ABI-Function
1970
1971DEFUN __divdi3
1972    clt
1973ENDF __divdi3
1974
1975DEFUN  __divdi3_moddi3
1976#if SPEED_DIV
1977    mov     r31, A7
1978    or      r31, B7
1979    brmi    0f
1980    ;; Both Signs are 0:  the following Complexitiy is not needed
1981    XJMP    __udivdi3_umoddi3
1982#endif /* SPEED_DIV */
1983
19840:  ;; The Prologue
1985    ;; Save 12 Registers:  Y, 17...8
1986    ;; No Frame needed
1987    do_prologue_saves 12
1988
1989    ;; SS.7 will contain the Sign of the Quotient  (A.sign * B.sign)
1990    ;; SS.6 will contain the Sign of the Remainder (A.sign)
1991    mov     SS, A7
1992    asr     SS
1993    ;; Adjust Dividend's Sign as needed
1994#if SPEED_DIV
1995    ;; Compiling for Speed we know that at least one Sign must be < 0
1996    ;; Thus, if A[] >= 0 then we know B[] < 0
1997    brpl    22f
1998#else
1999    brpl    21f
2000#endif /* SPEED_DIV */
2001
2002    XCALL   __negdi2
2003
2004    ;; Adjust Divisor's Sign and SS.7 as needed
200521: tst     B7
2006    brpl    3f
200722: ldi     NN, 1 << 7
2008    eor     SS, NN
2009
2010    ldi NN, -1
2011    com B4     $  com B5     $  com B6     $  com B7
2012               $  com B1     $  com B2     $  com B3
2013    NEG B0
2014               $  sbc B1,NN  $  sbc B2,NN  $  sbc B3,NN
2015    sbc B4,NN  $  sbc B5,NN  $  sbc B6,NN  $  sbc B7,NN
2016
20173:  ;; Do the unsigned 64-Bit Division/Modulo (depending on T-flag)
2018    XCALL   __udivmod64
2019
2020    ;; Adjust Result's Sign
2021#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2022    tst     SS
2023    brpl    4f
2024#else
2025    sbrc    SS, 7
2026#endif /* __AVR_HAVE_JMP_CALL__ */
2027    XCALL   __negdi2
2028
20294:  ;; Epilogue: Restore 12 Registers and return
2030    do_epilogue_restores 12
2031
2032ENDF __divdi3_moddi3
2033
2034#endif /* L_divdi3 */
2035
2036#undef R_cnt
2037#undef SS
2038#undef NN
2039
2040.section .text.libgcc, "ax", @progbits
2041
2042#define TT __tmp_reg__
2043
2044#if defined (L_adddi3)
2045;; (set (reg:DI 18)
2046;;      (plus:DI (reg:DI 18)
2047;;               (reg:DI 10)))
2048;; Sets the V flag for signed overflow tests
2049;; Sets the C flag for unsigned overflow tests
2050DEFUN __adddi3
2051    ADD A0,B0  $  adc A1,B1  $  adc A2,B2  $  adc A3,B3
2052    adc A4,B4  $  adc A5,B5  $  adc A6,B6  $  adc A7,B7
2053    ret
2054ENDF __adddi3
2055#endif /* L_adddi3 */
2056
2057#if defined (L_adddi3_s8)
2058;; (set (reg:DI 18)
2059;;      (plus:DI (reg:DI 18)
2060;;               (sign_extend:SI (reg:QI 26))))
2061;; Sets the V flag for signed overflow tests
2062;; Sets the C flag for unsigned overflow tests provided 0 <= R26 < 128
2063DEFUN __adddi3_s8
2064    clr     TT
2065    sbrc    r26, 7
2066    com     TT
2067    ADD A0,r26 $  adc A1,TT  $  adc A2,TT  $  adc A3,TT
2068    adc A4,TT  $  adc A5,TT  $  adc A6,TT  $  adc A7,TT
2069    ret
2070ENDF __adddi3_s8
2071#endif /* L_adddi3_s8 */
2072
2073#if defined (L_subdi3)
2074;; (set (reg:DI 18)
2075;;      (minus:DI (reg:DI 18)
2076;;                (reg:DI 10)))
2077;; Sets the V flag for signed overflow tests
2078;; Sets the C flag for unsigned overflow tests
2079DEFUN __subdi3
2080    SUB A0,B0  $  sbc A1,B1  $  sbc A2,B2  $  sbc A3,B3
2081    sbc A4,B4  $  sbc A5,B5  $  sbc A6,B6  $  sbc A7,B7
2082    ret
2083ENDF __subdi3
2084#endif /* L_subdi3 */
2085
2086#if defined (L_cmpdi2)
2087;; (set (cc0)
2088;;      (compare (reg:DI 18)
2089;;               (reg:DI 10)))
2090DEFUN __cmpdi2
2091    CP  A0,B0  $  cpc A1,B1  $  cpc A2,B2  $  cpc A3,B3
2092    cpc A4,B4  $  cpc A5,B5  $  cpc A6,B6  $  cpc A7,B7
2093    ret
2094ENDF __cmpdi2
2095#endif /* L_cmpdi2 */
2096
2097#if defined (L_cmpdi2_s8)
2098;; (set (cc0)
2099;;      (compare (reg:DI 18)
2100;;               (sign_extend:SI (reg:QI 26))))
2101DEFUN __cmpdi2_s8
2102    clr     TT
2103    sbrc    r26, 7
2104    com     TT
2105    CP  A0,r26 $  cpc A1,TT  $  cpc A2,TT  $  cpc A3,TT
2106    cpc A4,TT  $  cpc A5,TT  $  cpc A6,TT  $  cpc A7,TT
2107    ret
2108ENDF __cmpdi2_s8
2109#endif /* L_cmpdi2_s8 */
2110
2111#if defined (L_negdi2)
2112;; (set (reg:DI 18)
2113;;      (neg:DI (reg:DI 18)))
2114;; Sets the V flag for signed overflow tests
2115DEFUN __negdi2
2116
2117    com  A4    $  com  A5    $  com  A6    $  com  A7
2118               $  com  A1    $  com  A2    $  com  A3
2119    NEG  A0
2120               $  sbci A1,-1 $  sbci A2,-1 $  sbci A3,-1
2121    sbci A4,-1 $  sbci A5,-1 $  sbci A6,-1 $  sbci A7,-1
2122    ret
2123
2124ENDF __negdi2
2125#endif /* L_negdi2 */
2126
2127#undef TT
2128
2129#undef C7
2130#undef C6
2131#undef C5
2132#undef C4
2133#undef C3
2134#undef C2
2135#undef C1
2136#undef C0
2137
2138#undef B7
2139#undef B6
2140#undef B5
2141#undef B4
2142#undef B3
2143#undef B2
2144#undef B1
2145#undef B0
2146
2147#undef A7
2148#undef A6
2149#undef A5
2150#undef A4
2151#undef A3
2152#undef A2
2153#undef A1
2154#undef A0
2155
2156#endif /* !defined (__AVR_TINY__) */
2157
2158
2159.section .text.libgcc.prologue, "ax", @progbits
2160
2161/**********************************
2162 * This is a prologue subroutine
2163 **********************************/
2164#if !defined (__AVR_TINY__)
2165#if defined (L_prologue)
2166
2167;; This function does not clobber T-flag; 64-bit division relies on it
2168DEFUN __prologue_saves__
2169	push r2
2170	push r3
2171	push r4
2172	push r5
2173	push r6
2174	push r7
2175	push r8
2176	push r9
2177	push r10
2178	push r11
2179	push r12
2180	push r13
2181	push r14
2182	push r15
2183	push r16
2184	push r17
2185	push r28
2186	push r29
2187#if !defined (__AVR_HAVE_SPH__)
2188	in	r28,__SP_L__
2189	sub	r28,r26
2190	out	__SP_L__,r28
2191	clr	r29
2192#elif defined (__AVR_XMEGA__)
2193	in	r28,__SP_L__
2194	in	r29,__SP_H__
2195	sub	r28,r26
2196	sbc	r29,r27
2197	out	__SP_L__,r28
2198	out	__SP_H__,r29
2199#else
2200	in	r28,__SP_L__
2201	in	r29,__SP_H__
2202	sub	r28,r26
2203	sbc	r29,r27
2204	in	__tmp_reg__,__SREG__
2205	cli
2206	out	__SP_H__,r29
2207	out	__SREG__,__tmp_reg__
2208	out	__SP_L__,r28
2209#endif /* #SP = 8/16 */
2210
2211	XIJMP
2212
2213ENDF __prologue_saves__
2214#endif /* defined (L_prologue) */
2215
2216/*
2217 * This is an epilogue subroutine
2218 */
2219#if defined (L_epilogue)
2220
2221DEFUN __epilogue_restores__
2222	ldd	r2,Y+18
2223	ldd	r3,Y+17
2224	ldd	r4,Y+16
2225	ldd	r5,Y+15
2226	ldd	r6,Y+14
2227	ldd	r7,Y+13
2228	ldd	r8,Y+12
2229	ldd	r9,Y+11
2230	ldd	r10,Y+10
2231	ldd	r11,Y+9
2232	ldd	r12,Y+8
2233	ldd	r13,Y+7
2234	ldd	r14,Y+6
2235	ldd	r15,Y+5
2236	ldd	r16,Y+4
2237	ldd	r17,Y+3
2238	ldd	r26,Y+2
2239#if !defined (__AVR_HAVE_SPH__)
2240	ldd	r29,Y+1
2241	add	r28,r30
2242	out	__SP_L__,r28
2243	mov	r28, r26
2244#elif defined (__AVR_XMEGA__)
2245	ldd  r27,Y+1
2246	add  r28,r30
2247	adc  r29,__zero_reg__
2248	out  __SP_L__,r28
2249	out  __SP_H__,r29
2250	wmov 28, 26
2251#else
2252	ldd	r27,Y+1
2253	add	r28,r30
2254	adc	r29,__zero_reg__
2255	in	__tmp_reg__,__SREG__
2256	cli
2257	out	__SP_H__,r29
2258	out	__SREG__,__tmp_reg__
2259	out	__SP_L__,r28
2260	mov_l	r28, r26
2261	mov_h	r29, r27
2262#endif /* #SP = 8/16 */
2263	ret
2264ENDF __epilogue_restores__
2265#endif /* defined (L_epilogue) */
2266#endif /* !defined (__AVR_TINY__) */
2267
2268#ifdef L_exit
2269	.section .fini9,"ax",@progbits
2270DEFUN _exit
2271	.weak	exit
2272exit:
2273ENDF _exit
2274
2275	/* Code from .fini8 ... .fini1 sections inserted by ld script.  */
2276
2277	.section .fini0,"ax",@progbits
2278	cli
2279__stop_program:
2280	rjmp	__stop_program
2281#endif /* defined (L_exit) */
2282
2283#ifdef L_cleanup
2284	.weak	_cleanup
2285	.func	_cleanup
2286_cleanup:
2287	ret
2288.endfunc
2289#endif /* defined (L_cleanup) */
2290
2291
2292.section .text.libgcc, "ax", @progbits
2293
2294#ifdef L_tablejump2
2295DEFUN __tablejump2__
2296    lsl     r30
2297    rol     r31
2298#if defined (__AVR_HAVE_EIJMP_EICALL__)
2299    ;; Word address of gs() jumptable entry in R24:Z
2300    rol     r24
2301    out     __RAMPZ__, r24
2302#elif defined (__AVR_HAVE_ELPM__)
2303    ;; Word address of jumptable entry in Z
2304    clr     __tmp_reg__
2305    rol     __tmp_reg__
2306    out     __RAMPZ__, __tmp_reg__
2307#endif
2308
2309    ;; Read word address from jumptable and jump
2310
2311#if defined (__AVR_HAVE_ELPMX__)
2312    elpm    __tmp_reg__, Z+
2313    elpm    r31, Z
2314    mov     r30, __tmp_reg__
2315#ifdef __AVR_HAVE_RAMPD__
2316    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2317    out     __RAMPZ__, __zero_reg__
2318#endif /* RAMPD */
2319    XIJMP
2320#elif defined (__AVR_HAVE_ELPM__)
2321    elpm
2322    push    r0
2323    adiw    r30, 1
2324    elpm
2325    push    r0
2326    ret
2327#elif defined (__AVR_HAVE_LPMX__)
2328    lpm     __tmp_reg__, Z+
2329    lpm     r31, Z
2330    mov     r30, __tmp_reg__
2331    ijmp
2332#elif defined (__AVR_TINY__)
2333    wsubi 30, -(__AVR_TINY_PM_BASE_ADDRESS__) ; Add PM offset to Z
2334    ld __tmp_reg__, Z+
2335    ld r31, Z   ; Use ld instead of lpm to load Z
2336    mov r30, __tmp_reg__
2337    ijmp
2338#else
2339    lpm
2340    push    r0
2341    adiw    r30, 1
2342    lpm
2343    push    r0
2344    ret
2345#endif
2346ENDF __tablejump2__
2347#endif /* L_tablejump2 */
2348
2349#if defined(__AVR_TINY__)
2350#ifdef L_copy_data
2351        .section .init4,"ax",@progbits
2352        .global __do_copy_data
2353__do_copy_data:
2354        ldi     r18, hi8(__data_end)
2355        ldi     r26, lo8(__data_start)
2356        ldi     r27, hi8(__data_start)
2357        ldi     r30, lo8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2358        ldi     r31, hi8(__data_load_start + __AVR_TINY_PM_BASE_ADDRESS__)
2359        rjmp    .L__do_copy_data_start
2360.L__do_copy_data_loop:
2361        ld      r19, z+
2362        st      X+, r19
2363.L__do_copy_data_start:
2364        cpi     r26, lo8(__data_end)
2365        cpc     r27, r18
2366        brne    .L__do_copy_data_loop
2367#endif
2368#else
2369#ifdef L_copy_data
2370	.section .init4,"ax",@progbits
2371DEFUN __do_copy_data
2372#if defined(__AVR_HAVE_ELPMX__)
2373	ldi	r17, hi8(__data_end)
2374	ldi	r26, lo8(__data_start)
2375	ldi	r27, hi8(__data_start)
2376	ldi	r30, lo8(__data_load_start)
2377	ldi	r31, hi8(__data_load_start)
2378	ldi	r16, hh8(__data_load_start)
2379	out	__RAMPZ__, r16
2380	rjmp	.L__do_copy_data_start
2381.L__do_copy_data_loop:
2382	elpm	r0, Z+
2383	st	X+, r0
2384.L__do_copy_data_start:
2385	cpi	r26, lo8(__data_end)
2386	cpc	r27, r17
2387	brne	.L__do_copy_data_loop
2388#elif  !defined(__AVR_HAVE_ELPMX__) && defined(__AVR_HAVE_ELPM__)
2389	ldi	r17, hi8(__data_end)
2390	ldi	r26, lo8(__data_start)
2391	ldi	r27, hi8(__data_start)
2392	ldi	r30, lo8(__data_load_start)
2393	ldi	r31, hi8(__data_load_start)
2394	ldi	r16, hh8(__data_load_start - 0x10000)
2395.L__do_copy_data_carry:
2396	inc	r16
2397	out	__RAMPZ__, r16
2398	rjmp	.L__do_copy_data_start
2399.L__do_copy_data_loop:
2400	elpm
2401	st	X+, r0
2402	adiw	r30, 1
2403	brcs	.L__do_copy_data_carry
2404.L__do_copy_data_start:
2405	cpi	r26, lo8(__data_end)
2406	cpc	r27, r17
2407	brne	.L__do_copy_data_loop
2408#elif !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__)
2409	ldi	r17, hi8(__data_end)
2410	ldi	r26, lo8(__data_start)
2411	ldi	r27, hi8(__data_start)
2412	ldi	r30, lo8(__data_load_start)
2413	ldi	r31, hi8(__data_load_start)
2414	rjmp	.L__do_copy_data_start
2415.L__do_copy_data_loop:
2416#if defined (__AVR_HAVE_LPMX__)
2417	lpm	r0, Z+
2418#else
2419	lpm
2420	adiw	r30, 1
2421#endif
2422	st	X+, r0
2423.L__do_copy_data_start:
2424	cpi	r26, lo8(__data_end)
2425	cpc	r27, r17
2426	brne	.L__do_copy_data_loop
2427#endif /* !defined(__AVR_HAVE_ELPMX__) && !defined(__AVR_HAVE_ELPM__) */
2428#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2429	;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2430	out	__RAMPZ__, __zero_reg__
2431#endif /* ELPM && RAMPD */
2432ENDF __do_copy_data
2433#endif /* L_copy_data */
2434#endif /* !defined (__AVR_TINY__) */
2435
2436/* __do_clear_bss is only necessary if there is anything in .bss section.  */
2437
2438#ifdef L_clear_bss
2439	.section .init4,"ax",@progbits
2440DEFUN __do_clear_bss
2441	ldi	r18, hi8(__bss_end)
2442	ldi	r26, lo8(__bss_start)
2443	ldi	r27, hi8(__bss_start)
2444	rjmp	.do_clear_bss_start
2445.do_clear_bss_loop:
2446	st	X+, __zero_reg__
2447.do_clear_bss_start:
2448	cpi	r26, lo8(__bss_end)
2449	cpc	r27, r18
2450	brne	.do_clear_bss_loop
2451ENDF __do_clear_bss
2452#endif /* L_clear_bss */
2453
2454/* __do_global_ctors and __do_global_dtors are only necessary
2455   if there are any constructors/destructors.  */
2456
2457#if defined(__AVR_TINY__)
2458#define cdtors_tst_reg r18
2459#else
2460#define cdtors_tst_reg r17
2461#endif
2462
2463#ifdef L_ctors
2464	.section .init6,"ax",@progbits
2465DEFUN __do_global_ctors
2466    ldi     cdtors_tst_reg, pm_hi8(__ctors_start)
2467    ldi     r28, pm_lo8(__ctors_end)
2468    ldi     r29, pm_hi8(__ctors_end)
2469#ifdef __AVR_HAVE_EIJMP_EICALL__
2470    ldi     r16, pm_hh8(__ctors_end)
2471#endif /* HAVE_EIJMP */
2472    rjmp    .L__do_global_ctors_start
2473.L__do_global_ctors_loop:
2474    wsubi   28, 1
2475#ifdef __AVR_HAVE_EIJMP_EICALL__
2476    sbc     r16, __zero_reg__
2477    mov     r24, r16
2478#endif /* HAVE_EIJMP */
2479    mov_h   r31, r29
2480    mov_l   r30, r28
2481    XCALL   __tablejump2__
2482.L__do_global_ctors_start:
2483    cpi     r28, pm_lo8(__ctors_start)
2484    cpc     r29, cdtors_tst_reg
2485#ifdef __AVR_HAVE_EIJMP_EICALL__
2486    ldi     r24, pm_hh8(__ctors_start)
2487    cpc     r16, r24
2488#endif /* HAVE_EIJMP */
2489    brne    .L__do_global_ctors_loop
2490ENDF __do_global_ctors
2491#endif /* L_ctors */
2492
2493#ifdef L_dtors
2494	.section .fini6,"ax",@progbits
2495DEFUN __do_global_dtors
2496    ldi     cdtors_tst_reg, pm_hi8(__dtors_end)
2497    ldi     r28, pm_lo8(__dtors_start)
2498    ldi     r29, pm_hi8(__dtors_start)
2499#ifdef __AVR_HAVE_EIJMP_EICALL__
2500    ldi     r16, pm_hh8(__dtors_start)
2501#endif /* HAVE_EIJMP */
2502    rjmp    .L__do_global_dtors_start
2503.L__do_global_dtors_loop:
2504#ifdef __AVR_HAVE_EIJMP_EICALL__
2505    mov     r24, r16
2506#endif /* HAVE_EIJMP */
2507    mov_h   r31, r29
2508    mov_l   r30, r28
2509    XCALL   __tablejump2__
2510    waddi   28, 1
2511#ifdef __AVR_HAVE_EIJMP_EICALL__
2512    adc     r16, __zero_reg__
2513#endif /* HAVE_EIJMP */
2514.L__do_global_dtors_start:
2515    cpi     r28, pm_lo8(__dtors_end)
2516    cpc     r29, cdtors_tst_reg
2517#ifdef __AVR_HAVE_EIJMP_EICALL__
2518    ldi     r24, pm_hh8(__dtors_end)
2519    cpc     r16, r24
2520#endif /* HAVE_EIJMP */
2521    brne    .L__do_global_dtors_loop
2522ENDF __do_global_dtors
2523#endif /* L_dtors */
2524
2525#undef cdtors_tst_reg
2526
2527.section .text.libgcc, "ax", @progbits
2528
2529#if !defined (__AVR_TINY__)
2530;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2531;; Loading n bytes from Flash; n = 3,4
2532;; R22... = Flash[Z]
2533;; Clobbers: __tmp_reg__
2534
2535#if (defined (L_load_3)        \
2536     || defined (L_load_4))    \
2537    && !defined (__AVR_HAVE_LPMX__)
2538
2539;; Destination
2540#define D0  22
2541#define D1  D0+1
2542#define D2  D0+2
2543#define D3  D0+3
2544
2545.macro  .load dest, n
2546    lpm
2547    mov     \dest, r0
2548.if \dest != D0+\n-1
2549    adiw    r30, 1
2550.else
2551    sbiw    r30, \n-1
2552.endif
2553.endm
2554
2555#if defined (L_load_3)
2556DEFUN __load_3
2557    push  D3
2558    XCALL __load_4
2559    pop   D3
2560    ret
2561ENDF __load_3
2562#endif /* L_load_3 */
2563
2564#if defined (L_load_4)
2565DEFUN __load_4
2566    .load D0, 4
2567    .load D1, 4
2568    .load D2, 4
2569    .load D3, 4
2570    ret
2571ENDF __load_4
2572#endif /* L_load_4 */
2573
2574#endif /* L_load_3 || L_load_3 */
2575#endif /* !defined (__AVR_TINY__) */
2576
2577#if !defined (__AVR_TINY__)
2578;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2579;; Loading n bytes from Flash or RAM;  n = 1,2,3,4
2580;; R22... = Flash[R21:Z] or RAM[Z] depending on R21.7
2581;; Clobbers: __tmp_reg__, R21, R30, R31
2582
2583#if (defined (L_xload_1)            \
2584     || defined (L_xload_2)         \
2585     || defined (L_xload_3)         \
2586     || defined (L_xload_4))
2587
2588;; Destination
2589#define D0  22
2590#define D1  D0+1
2591#define D2  D0+2
2592#define D3  D0+3
2593
2594;; Register containing bits 16+ of the address
2595
2596#define HHI8  21
2597
2598.macro  .xload dest, n
2599#if defined (__AVR_HAVE_ELPMX__)
2600    elpm    \dest, Z+
2601#elif defined (__AVR_HAVE_ELPM__)
2602    elpm
2603    mov     \dest, r0
2604.if \dest != D0+\n-1
2605    adiw    r30, 1
2606    adc     HHI8, __zero_reg__
2607    out     __RAMPZ__, HHI8
2608.endif
2609#elif defined (__AVR_HAVE_LPMX__)
2610    lpm     \dest, Z+
2611#else
2612    lpm
2613    mov     \dest, r0
2614.if \dest != D0+\n-1
2615    adiw    r30, 1
2616.endif
2617#endif
2618#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2619.if \dest == D0+\n-1
2620    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2621    out     __RAMPZ__, __zero_reg__
2622.endif
2623#endif
2624.endm ; .xload
2625
2626#if defined (L_xload_1)
2627DEFUN __xload_1
2628#if defined (__AVR_HAVE_LPMX__) && !defined (__AVR_HAVE_ELPM__)
2629    sbrc    HHI8, 7
2630    ld      D0, Z
2631    sbrs    HHI8, 7
2632    lpm     D0, Z
2633    ret
2634#else
2635    sbrc    HHI8, 7
2636    rjmp    1f
2637#if defined (__AVR_HAVE_ELPM__)
2638    out     __RAMPZ__, HHI8
2639#endif /* __AVR_HAVE_ELPM__ */
2640    .xload  D0, 1
2641    ret
26421:  ld      D0, Z
2643    ret
2644#endif /* LPMx && ! ELPM */
2645ENDF __xload_1
2646#endif /* L_xload_1 */
2647
2648#if defined (L_xload_2)
2649DEFUN __xload_2
2650    sbrc    HHI8, 7
2651    rjmp    1f
2652#if defined (__AVR_HAVE_ELPM__)
2653    out     __RAMPZ__, HHI8
2654#endif /* __AVR_HAVE_ELPM__ */
2655    .xload  D0, 2
2656    .xload  D1, 2
2657    ret
26581:  ld      D0, Z+
2659    ld      D1, Z+
2660    ret
2661ENDF __xload_2
2662#endif /* L_xload_2 */
2663
2664#if defined (L_xload_3)
2665DEFUN __xload_3
2666    sbrc    HHI8, 7
2667    rjmp    1f
2668#if defined (__AVR_HAVE_ELPM__)
2669    out     __RAMPZ__, HHI8
2670#endif /* __AVR_HAVE_ELPM__ */
2671    .xload  D0, 3
2672    .xload  D1, 3
2673    .xload  D2, 3
2674    ret
26751:  ld      D0, Z+
2676    ld      D1, Z+
2677    ld      D2, Z+
2678    ret
2679ENDF __xload_3
2680#endif /* L_xload_3 */
2681
2682#if defined (L_xload_4)
2683DEFUN __xload_4
2684    sbrc    HHI8, 7
2685    rjmp    1f
2686#if defined (__AVR_HAVE_ELPM__)
2687    out     __RAMPZ__, HHI8
2688#endif /* __AVR_HAVE_ELPM__ */
2689    .xload  D0, 4
2690    .xload  D1, 4
2691    .xload  D2, 4
2692    .xload  D3, 4
2693    ret
26941:  ld      D0, Z+
2695    ld      D1, Z+
2696    ld      D2, Z+
2697    ld      D3, Z+
2698    ret
2699ENDF __xload_4
2700#endif /* L_xload_4 */
2701
2702#endif /* L_xload_{1|2|3|4} */
2703#endif /* if !defined (__AVR_TINY__) */
2704
2705#if !defined (__AVR_TINY__)
2706;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2707;; memcopy from Address Space __pgmx to RAM
2708;; R23:Z = Source Address
2709;; X     = Destination Address
2710;; Clobbers: __tmp_reg__, R23, R24, R25, X, Z
2711
2712#if defined (L_movmemx)
2713
2714#define HHI8  23
2715#define LOOP  24
2716
2717DEFUN __movmemx_qi
2718    ;; #Bytes to copy fity in 8 Bits (1..255)
2719    ;; Zero-extend Loop Counter
2720    clr     LOOP+1
2721    ;; FALLTHRU
2722ENDF __movmemx_qi
2723
2724DEFUN __movmemx_hi
2725
2726;; Read from where?
2727    sbrc    HHI8, 7
2728    rjmp    1f
2729
2730;; Read from Flash
2731
2732#if defined (__AVR_HAVE_ELPM__)
2733    out     __RAMPZ__, HHI8
2734#endif
2735
27360:  ;; Load 1 Byte from Flash...
2737
2738#if defined (__AVR_HAVE_ELPMX__)
2739    elpm    r0, Z+
2740#elif defined (__AVR_HAVE_ELPM__)
2741    elpm
2742    adiw    r30, 1
2743    adc     HHI8, __zero_reg__
2744    out     __RAMPZ__, HHI8
2745#elif defined (__AVR_HAVE_LPMX__)
2746    lpm     r0, Z+
2747#else
2748    lpm
2749    adiw    r30, 1
2750#endif
2751
2752    ;; ...and store that Byte to RAM Destination
2753    st      X+, r0
2754    sbiw    LOOP, 1
2755    brne    0b
2756#if defined (__AVR_HAVE_ELPM__) && defined (__AVR_HAVE_RAMPD__)
2757    ;; Reset RAMPZ to 0 so that EBI devices don't read garbage from RAM
2758    out	__RAMPZ__, __zero_reg__
2759#endif /* ELPM && RAMPD */
2760    ret
2761
2762;; Read from RAM
2763
27641:  ;; Read 1 Byte from RAM...
2765    ld      r0, Z+
2766    ;; and store that Byte to RAM Destination
2767    st      X+, r0
2768    sbiw    LOOP, 1
2769    brne    1b
2770    ret
2771ENDF __movmemx_hi
2772
2773#undef HHI8
2774#undef LOOP
2775
2776#endif /* L_movmemx */
2777#endif /* !defined (__AVR_TINY__) */
2778
2779
2780.section .text.libgcc.builtins, "ax", @progbits
2781
2782/**********************************
2783 * Find first set Bit (ffs)
2784 **********************************/
2785
2786#if defined (L_ffssi2)
2787;; find first set bit
2788;; r25:r24 = ffs32 (r25:r22)
2789;; clobbers: r22, r26
2790DEFUN __ffssi2
2791    clr  r26
2792    tst  r22
2793    brne 1f
2794    subi r26, -8
2795    or   r22, r23
2796    brne 1f
2797    subi r26, -8
2798    or   r22, r24
2799    brne 1f
2800    subi r26, -8
2801    or   r22, r25
2802    brne 1f
2803    ret
28041:  mov  r24, r22
2805    XJMP __loop_ffsqi2
2806ENDF __ffssi2
2807#endif /* defined (L_ffssi2) */
2808
2809#if defined (L_ffshi2)
2810;; find first set bit
2811;; r25:r24 = ffs16 (r25:r24)
2812;; clobbers: r26
2813DEFUN __ffshi2
2814    clr  r26
2815#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
2816    ;; Some cores have problem skipping 2-word instruction
2817    tst  r24
2818    breq 2f
2819#else
2820    cpse r24, __zero_reg__
2821#endif /* __AVR_HAVE_JMP_CALL__ */
28221:  XJMP __loop_ffsqi2
28232:  ldi  r26, 8
2824    or   r24, r25
2825    brne 1b
2826    ret
2827ENDF __ffshi2
2828#endif /* defined (L_ffshi2) */
2829
2830#if defined (L_loop_ffsqi2)
2831;; Helper for ffshi2, ffssi2
2832;; r25:r24 = r26 + zero_extend16 (ffs8(r24))
2833;; r24 must be != 0
2834;; clobbers: r26
2835DEFUN __loop_ffsqi2
2836    inc  r26
2837    lsr  r24
2838    brcc __loop_ffsqi2
2839    mov  r24, r26
2840    clr  r25
2841    ret
2842ENDF __loop_ffsqi2
2843#endif /* defined (L_loop_ffsqi2) */
2844
2845
2846/**********************************
2847 * Count trailing Zeros (ctz)
2848 **********************************/
2849
2850#if defined (L_ctzsi2)
2851;; count trailing zeros
2852;; r25:r24 = ctz32 (r25:r22)
2853;; clobbers: r26, r22
2854;; ctz(0) = 255
2855;; Note that ctz(0) in undefined for GCC
2856DEFUN __ctzsi2
2857    XCALL __ffssi2
2858    dec  r24
2859    ret
2860ENDF __ctzsi2
2861#endif /* defined (L_ctzsi2) */
2862
2863#if defined (L_ctzhi2)
2864;; count trailing zeros
2865;; r25:r24 = ctz16 (r25:r24)
2866;; clobbers: r26
2867;; ctz(0) = 255
2868;; Note that ctz(0) in undefined for GCC
2869DEFUN __ctzhi2
2870    XCALL __ffshi2
2871    dec  r24
2872    ret
2873ENDF __ctzhi2
2874#endif /* defined (L_ctzhi2) */
2875
2876
2877/**********************************
2878 * Count leading Zeros (clz)
2879 **********************************/
2880
2881#if defined (L_clzdi2)
2882;; count leading zeros
2883;; r25:r24 = clz64 (r25:r18)
2884;; clobbers: r22, r23, r26
2885DEFUN __clzdi2
2886    XCALL __clzsi2
2887    sbrs r24, 5
2888    ret
2889    mov_l r22, r18
2890    mov_h r23, r19
2891    mov_l r24, r20
2892    mov_h r25, r21
2893    XCALL __clzsi2
2894    subi r24, -32
2895    ret
2896ENDF __clzdi2
2897#endif /* defined (L_clzdi2) */
2898
2899#if defined (L_clzsi2)
2900;; count leading zeros
2901;; r25:r24 = clz32 (r25:r22)
2902;; clobbers: r26
2903DEFUN __clzsi2
2904    XCALL __clzhi2
2905    sbrs r24, 4
2906    ret
2907    mov_l r24, r22
2908    mov_h r25, r23
2909    XCALL __clzhi2
2910    subi r24, -16
2911    ret
2912ENDF __clzsi2
2913#endif /* defined (L_clzsi2) */
2914
2915#if defined (L_clzhi2)
2916;; count leading zeros
2917;; r25:r24 = clz16 (r25:r24)
2918;; clobbers: r26
2919DEFUN __clzhi2
2920    clr  r26
2921    tst  r25
2922    brne 1f
2923    subi r26, -8
2924    or   r25, r24
2925    brne 1f
2926    ldi  r24, 16
2927    ret
29281:  cpi  r25, 16
2929    brsh 3f
2930    subi r26, -3
2931    swap r25
29322:  inc  r26
29333:  lsl  r25
2934    brcc 2b
2935    mov  r24, r26
2936    clr  r25
2937    ret
2938ENDF __clzhi2
2939#endif /* defined (L_clzhi2) */
2940
2941
2942/**********************************
2943 * Parity
2944 **********************************/
2945
2946#if defined (L_paritydi2)
2947;; r25:r24 = parity64 (r25:r18)
2948;; clobbers: __tmp_reg__
2949DEFUN __paritydi2
2950    eor  r24, r18
2951    eor  r24, r19
2952    eor  r24, r20
2953    eor  r24, r21
2954    XJMP __paritysi2
2955ENDF __paritydi2
2956#endif /* defined (L_paritydi2) */
2957
2958#if defined (L_paritysi2)
2959;; r25:r24 = parity32 (r25:r22)
2960;; clobbers: __tmp_reg__
2961DEFUN __paritysi2
2962    eor  r24, r22
2963    eor  r24, r23
2964    XJMP __parityhi2
2965ENDF __paritysi2
2966#endif /* defined (L_paritysi2) */
2967
2968#if defined (L_parityhi2)
2969;; r25:r24 = parity16 (r25:r24)
2970;; clobbers: __tmp_reg__
2971DEFUN __parityhi2
2972    eor  r24, r25
2973;; FALLTHRU
2974ENDF __parityhi2
2975
2976;; r25:r24 = parity8 (r24)
2977;; clobbers: __tmp_reg__
2978DEFUN __parityqi2
2979    ;; parity is in r24[0..7]
2980    mov  __tmp_reg__, r24
2981    swap __tmp_reg__
2982    eor  r24, __tmp_reg__
2983    ;; parity is in r24[0..3]
2984    subi r24, -4
2985    andi r24, -5
2986    subi r24, -6
2987    ;; parity is in r24[0,3]
2988    sbrc r24, 3
2989    inc  r24
2990    ;; parity is in r24[0]
2991    andi r24, 1
2992    clr  r25
2993    ret
2994ENDF __parityqi2
2995#endif /* defined (L_parityhi2) */
2996
2997
2998/**********************************
2999 * Population Count
3000 **********************************/
3001
3002#if defined (L_popcounthi2)
3003;; population count
3004;; r25:r24 = popcount16 (r25:r24)
3005;; clobbers: __tmp_reg__
3006DEFUN __popcounthi2
3007    XCALL __popcountqi2
3008    push r24
3009    mov  r24, r25
3010    XCALL __popcountqi2
3011    clr  r25
3012    ;; FALLTHRU
3013ENDF __popcounthi2
3014
3015DEFUN __popcounthi2_tail
3016    pop   __tmp_reg__
3017    add   r24, __tmp_reg__
3018    ret
3019ENDF __popcounthi2_tail
3020#endif /* defined (L_popcounthi2) */
3021
3022#if defined (L_popcountsi2)
3023;; population count
3024;; r25:r24 = popcount32 (r25:r22)
3025;; clobbers: __tmp_reg__
3026DEFUN __popcountsi2
3027    XCALL __popcounthi2
3028    push  r24
3029    mov_l r24, r22
3030    mov_h r25, r23
3031    XCALL __popcounthi2
3032    XJMP  __popcounthi2_tail
3033ENDF __popcountsi2
3034#endif /* defined (L_popcountsi2) */
3035
3036#if defined (L_popcountdi2)
3037;; population count
3038;; r25:r24 = popcount64 (r25:r18)
3039;; clobbers: r22, r23, __tmp_reg__
3040DEFUN __popcountdi2
3041    XCALL __popcountsi2
3042    push  r24
3043    mov_l r22, r18
3044    mov_h r23, r19
3045    mov_l r24, r20
3046    mov_h r25, r21
3047    XCALL __popcountsi2
3048    XJMP  __popcounthi2_tail
3049ENDF __popcountdi2
3050#endif /* defined (L_popcountdi2) */
3051
3052#if defined (L_popcountqi2)
3053;; population count
3054;; r24 = popcount8 (r24)
3055;; clobbers: __tmp_reg__
3056DEFUN __popcountqi2
3057    mov  __tmp_reg__, r24
3058    andi r24, 1
3059    lsr  __tmp_reg__
3060    lsr  __tmp_reg__
3061    adc  r24, __zero_reg__
3062    lsr  __tmp_reg__
3063    adc  r24, __zero_reg__
3064    lsr  __tmp_reg__
3065    adc  r24, __zero_reg__
3066    lsr  __tmp_reg__
3067    adc  r24, __zero_reg__
3068    lsr  __tmp_reg__
3069    adc  r24, __zero_reg__
3070    lsr  __tmp_reg__
3071    adc  r24, __tmp_reg__
3072    ret
3073ENDF __popcountqi2
3074#endif /* defined (L_popcountqi2) */
3075
3076
3077/**********************************
3078 * Swap bytes
3079 **********************************/
3080
3081;; swap two registers with different register number
3082.macro bswap a, b
3083    eor \a, \b
3084    eor \b, \a
3085    eor \a, \b
3086.endm
3087
3088#if defined (L_bswapsi2)
3089;; swap bytes
3090;; r25:r22 = bswap32 (r25:r22)
3091DEFUN __bswapsi2
3092    bswap r22, r25
3093    bswap r23, r24
3094    ret
3095ENDF __bswapsi2
3096#endif /* defined (L_bswapsi2) */
3097
3098#if defined (L_bswapdi2)
3099;; swap bytes
3100;; r25:r18 = bswap64 (r25:r18)
3101DEFUN __bswapdi2
3102    bswap r18, r25
3103    bswap r19, r24
3104    bswap r20, r23
3105    bswap r21, r22
3106    ret
3107ENDF __bswapdi2
3108#endif /* defined (L_bswapdi2) */
3109
3110
3111/**********************************
3112 * 64-bit shifts
3113 **********************************/
3114
3115#if defined (L_ashrdi3)
3116
3117#define SS __zero_reg__
3118
3119;; Arithmetic shift right
3120;; r25:r18 = ashr64 (r25:r18, r17:r16)
3121DEFUN __ashrdi3
3122    sbrc    r25, 7
3123    com     SS
3124    ;; FALLTHRU
3125ENDF  __ashrdi3
3126
3127;; Logic shift right
3128;; r25:r18 = lshr64 (r25:r18, r17:r16)
3129DEFUN __lshrdi3
3130    ;; Signs are in SS (zero_reg)
3131    mov     __tmp_reg__, r16
31320:  cpi     r16, 8
3133    brlo 2f
3134    subi    r16, 8
3135    mov     r18, r19
3136    mov     r19, r20
3137    mov     r20, r21
3138    mov     r21, r22
3139    mov     r22, r23
3140    mov     r23, r24
3141    mov     r24, r25
3142    mov     r25, SS
3143    rjmp 0b
31441:  asr     SS
3145    ror     r25
3146    ror     r24
3147    ror     r23
3148    ror     r22
3149    ror     r21
3150    ror     r20
3151    ror     r19
3152    ror     r18
31532:  dec     r16
3154    brpl 1b
3155    clr     __zero_reg__
3156    mov     r16, __tmp_reg__
3157    ret
3158ENDF __lshrdi3
3159
3160#undef SS
3161
3162#endif /* defined (L_ashrdi3) */
3163
3164#if defined (L_ashldi3)
3165;; Shift left
3166;; r25:r18 = ashl64 (r25:r18, r17:r16)
3167;; This function does not clobber T.
3168DEFUN __ashldi3
3169    mov     __tmp_reg__, r16
31700:  cpi     r16, 8
3171    brlo 2f
3172    mov     r25, r24
3173    mov     r24, r23
3174    mov     r23, r22
3175    mov     r22, r21
3176    mov     r21, r20
3177    mov     r20, r19
3178    mov     r19, r18
3179    clr     r18
3180    subi    r16, 8
3181    rjmp 0b
31821:  lsl     r18
3183    rol     r19
3184    rol     r20
3185    rol     r21
3186    rol     r22
3187    rol     r23
3188    rol     r24
3189    rol     r25
31902:  dec     r16
3191    brpl 1b
3192    mov     r16, __tmp_reg__
3193    ret
3194ENDF __ashldi3
3195#endif /* defined (L_ashldi3) */
3196
3197#if defined (L_rotldi3)
3198;; Rotate left
3199;; r25:r18 = rotl64 (r25:r18, r17:r16)
3200DEFUN __rotldi3
3201    push    r16
32020:  cpi     r16, 8
3203    brlo 2f
3204    subi    r16, 8
3205    mov     __tmp_reg__, r25
3206    mov     r25, r24
3207    mov     r24, r23
3208    mov     r23, r22
3209    mov     r22, r21
3210    mov     r21, r20
3211    mov     r20, r19
3212    mov     r19, r18
3213    mov     r18, __tmp_reg__
3214    rjmp 0b
32151:  lsl     r18
3216    rol     r19
3217    rol     r20
3218    rol     r21
3219    rol     r22
3220    rol     r23
3221    rol     r24
3222    rol     r25
3223    adc     r18, __zero_reg__
32242:  dec     r16
3225    brpl 1b
3226    pop     r16
3227    ret
3228ENDF __rotldi3
3229#endif /* defined (L_rotldi3) */
3230
3231
3232.section .text.libgcc.fmul, "ax", @progbits
3233
3234/***********************************************************/
3235;;; Softmul versions of FMUL, FMULS and FMULSU to implement
3236;;; __builtin_avr_fmul* if !AVR_HAVE_MUL
3237/***********************************************************/
3238
3239#define A1 24
3240#define B1 25
3241#define C0 22
3242#define C1 23
3243#define A0 __tmp_reg__
3244
3245#ifdef L_fmuls
3246;;; r23:r22 = fmuls (r24, r25) like in FMULS instruction
3247;;; Clobbers: r24, r25, __tmp_reg__
3248DEFUN __fmuls
3249    ;; A0.7 = negate result?
3250    mov  A0, A1
3251    eor  A0, B1
3252    ;; B1 = |B1|
3253    sbrc B1, 7
3254    neg  B1
3255    XJMP __fmulsu_exit
3256ENDF __fmuls
3257#endif /* L_fmuls */
3258
3259#ifdef L_fmulsu
3260;;; r23:r22 = fmulsu (r24, r25) like in FMULSU instruction
3261;;; Clobbers: r24, r25, __tmp_reg__
3262DEFUN __fmulsu
3263    ;; A0.7 = negate result?
3264    mov  A0, A1
3265;; FALLTHRU
3266ENDF __fmulsu
3267
3268;; Helper for __fmuls and __fmulsu
3269DEFUN __fmulsu_exit
3270    ;; A1 = |A1|
3271    sbrc A1, 7
3272    neg  A1
3273#ifdef __AVR_ERRATA_SKIP_JMP_CALL__
3274    ;; Some cores have problem skipping 2-word instruction
3275    tst  A0
3276    brmi 1f
3277#else
3278    sbrs A0, 7
3279#endif /* __AVR_HAVE_JMP_CALL__ */
3280    XJMP  __fmul
32811:  XCALL __fmul
3282    ;; C = -C iff A0.7 = 1
3283    NEG2 C0
3284    ret
3285ENDF __fmulsu_exit
3286#endif /* L_fmulsu */
3287
3288
3289#ifdef L_fmul
3290;;; r22:r23 = fmul (r24, r25) like in FMUL instruction
3291;;; Clobbers: r24, r25, __tmp_reg__
3292DEFUN __fmul
3293    ; clear result
3294    clr   C0
3295    clr   C1
3296    clr   A0
32971:  tst   B1
3298    ;; 1.0 = 0x80, so test for bit 7 of B to see if A must to be added to C.
32992:  brpl  3f
3300    ;; C += A
3301    add   C0, A0
3302    adc   C1, A1
33033:  ;; A >>= 1
3304    lsr   A1
3305    ror   A0
3306    ;; B <<= 1
3307    lsl   B1
3308    brne  2b
3309    ret
3310ENDF __fmul
3311#endif /* L_fmul */
3312
3313#undef A0
3314#undef A1
3315#undef B1
3316#undef C0
3317#undef C1
3318
3319#include "lib1funcs-fixed.S"
3320