1/* Copyright (c) 2002  Michael Stumpf  <mistumpf@de.pepperl-fuchs.com>
2   Copyright (c) 2006  Dmitry Xmelkov
3   All rights reserved.
4
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions are met:
7
8   * Redistributions of source code must retain the above copyright
9     notice, this list of conditions and the following disclaimer.
10   * Redistributions in binary form must reproduce the above copyright
11     notice, this list of conditions and the following disclaimer in
12     the documentation and/or other materials provided with the
13     distribution.
14   * Neither the name of the copyright holders nor the names of
15     contributors may be used to endorse or promote products derived
16     from this software without specific prior written permission.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28   POSSIBILITY OF SUCH DAMAGE. */
29
30/* $Id: mulsf3x.S 2473 2015-04-09 08:10:22Z pitchumani $ */
31
32/*
33 */
34
35#if !defined(__AVR_TINY__)
36
37#include "fp32def.h"
38#include "asmdef.h"
39
40FUNCTION __mulsf3x
41
42#if	defined(__AVR_ENHANCED__) && __AVR_ENHANCED__
43
440:	XCALL	_U(__fp_pscA)
45	brcs	1f
46	XCALL	_U(__fp_pscB)
47	brcs	1f
48	and	rA3, rB3		; one of args is 0xff
49	breq	1f
50	XJMP	_U(__fp_inf)		; nonzero * Inf --> Inf
511:	XJMP	_U(__fp_nan)		; 0 * Inf --> NaN
522:	clr	r1			; after 'mul rA3,rB3'
53	XJMP	_U(__fp_szero)
54
55ENTRY   __mulsf3x
56	XCALL	_U(__fp_split3)
57	brcs	0b
58
59ENTRY   __mulsf3_pse			; post split entry
60  ; check zero
61	mul	rA3, rB3		; r1 would be clean
62	breq	2b
63  ; rB3.rA3 := rA3 + rB3
64	add	rA3, rB3
65	ldi	rB3, 0
66	adc	rB3, rB3
67
68  ; multiplication:  rA2.rA1.rA0 * rB2.rB1.rB0  -->  rA2.rA1.rA0.rAE.ZH.ZL
69
70  ; ZH.ZL = rA0 * rB0
71	mul	rA0, rB0
72	movw	ZL, r0
73  ; rAE.ZH += rA1 * rB0
74	mul	rA1, rB0
75	clr	rAE
76	add	ZH, r0
77	adc	rAE, r1
78  ; rBE.rAE.ZH = rAE.ZH + rA0 * rB1
79	mul	rA0, rB1
80	clr	rBE
81	add	ZH, r0
82	adc	rAE, r1
83	adc	rBE, rBE
84  ; rA0.rBE.rAE = rBE.rAE + rA0 * rB2
85	mul	rA0, rB2
86	clr	rA0
87	add	rAE, r0
88	adc	rBE, r1
89	adc	rA0, rA0
90  ; rA0.rBE.rAE += rA2 * rB0
91	mul	rA2, rB0
92	clr	rB0
93	add	rAE, r0
94	adc	rBE, r1
95	adc	rA0, rB0
96  ; rA0.rBE.rAE += rA1 * rB1
97	mul	rA1, rB1
98	add	rAE, r0
99	adc	rBE, r1
100	adc	rA0, rB0	; rB0 == 0
101  ; rB0.rA0.rBE = rA0.rBE + rA2 * rB1
102	mul	rA2, rB1
103	add	rBE, r0
104	adc	rA0, r1
105	adc	rB0, rB0	; rB0 was 0
106  ; rB0.rA0.rBE += rA1 * rB2
107	mul	rA1, rB2
108	clr	rB1
109	add	rBE, r0
110	adc	rA0, r1
111	adc	rB0, rB1
112  ; rB0.rA0 += rA2 * rB2
113	mul	rA2, rB2
114	add	rA0, r0
115	adc	rB0, r1
116  ; move result:  rA2.rA1.rA0.rAE.ZH.ZL = rB0.rA0.rBE.rAE.ZH.ZL
117	mov	rA2, rB0
118	mov	rA1, rA0
119	mov	rA0, rBE
120  ; __zero_reg__
121	clr	r1
122
123#else	/* to __AVR_ENHANCED__	*/
124
1250:	XCALL	_U(__fp_pscA)
126	brcs	1f
127	XCALL	_U(__fp_pscB)
128	brcs	1f
129	and	rA3, rB3		; one of args is 0xff
130	breq	1f
131	XJMP	_U(__fp_inf)		; nonzero * Inf --> Inf
1321:	XJMP	_U(__fp_nan)		; 0 * Inf --> NaN
1332:	XJMP	_U(__fp_szero)
134
135ENTRY   __mulsf3x
136	XCALL	_U(__fp_split3)
137	brcs	0b
138
139ENTRY   __mulsf3_pse			; post split entry
140  ; check zero
141	tst	rA3
142	breq	2b
143	tst	rB3
144	breq	2b
145  ; rB3.rA3 := rA3 + rB3
146	add	rA3, rB3
147	ldi	rB3, 0
148	adc	rB3, rB3
149
150  ; multiplication:  rA2.rA1.rA0 * rB2.rB1.rB0  -->  rA2.rA1.rA0.rAE.ZH.ZL
151
152	clr	rBE		; 4-d byte of rB*
153	clr	ZL
154	clr	ZH
155	clr	rAE
156  ; r0.rAE.ZH.ZL += rA0 * rB2.rB1.rB0
157	clr	r0
158	sec			; to count loops
159	ror	rA0
1601:	brcc	2f
161	add	ZL,  rB0
162	adc	ZH,  rB1
163	adc	rAE, rB2
164	adc	r0,  rBE
1652:	lsl	rB0
166	rol	rB1
167	rol	rB2
168	rol	rBE
169	lsr	rA0
170	brne	1b
171  ; rA0.r1.r0.rAE.ZH += rA1 * rBE.rB2.rB1
172	ror	rA1		; C was 1
1733:	brcc	4f
174	add	ZH,  rB1
175	adc	rAE, rB2
176	adc	r0,  rBE
177	adc	r1,  rB0
178	brcc	4f
179	inc	rA0
1804:	lsl	rB1
181	rol	rB2
182	rol	rBE
183	rol	rB0
184	lsr	rA1
185	brne	3b
186  ; rA0.r1.r0.rAE += rA2 * rB0.rBE.rB2
187	ror	rA2		; C was 1
1885:	brcc	6f
189	add	rAE, rB2
190	adc	r0,  rBE
191	adc	r1,  rB0
192	adc	rA0, rB1
1936:	lsl	rB2
194	rol	rBE
195	rol	rB0
196	rol	rB1
197	lsr	rA2
198	brne	5b
199  ; move result:  rA2.rA1.rA0.rAE.ZH.ZL := rA0.r1.r0.rAE.ZH.ZL
200	mov	rA2, rA0
201	mov	rA1, r1
202	mov	rA0, r0
203  ; __zero_reg__
204	clr	r1
205
206#endif	/* not __AVR_ENHANCED__	*/
207
208  ; exponent -= 127	(Why not 126?  For compare conviniency.)
209	subi	rA3, lo8(127)
210	sbci	rB3, hi8(127)
211	brmi	13f		; denormalization is needed
212	breq	15f		; normalization is impossible
213  ; result exponent > min ==> normalization is possible
21410:	tst	rA2
215	brmi	11f		; mantissa is normal
216  ; mantissa <<= 1
217	lsl	ZL
218	rol	ZH
219	rol	rAE
220	rol	rA0
221	rol	rA1
222	rol	rA2
223  ; exponent -= 1
224	subi	rA3, lo8(1)
225	sbci	rB3, hi8(1)
226	brne	10b
227  ; check to overflow
22811:	cpi	rA3, 254
229	cpc	rB3, r1
230	brlo	15f
231	XJMP	_U(__fp_inf)
232  ; check lowest value of exponent to avoid long operation
23312:	XJMP	_U(__fp_szero)
23413:	cpi	rB3, hi8(-24)		; here rB3 < 0
235	brlt	12b
236	cpi	rA3, lo8(-24)
237	brlt	12b
238  ; mantissa >>= -rA3
23914:	lsr	rA2
240	ror	rA1
241	ror	rA0
242	ror	rAE
243	ror	ZH
244	ror	ZL
245	subi	rA3, -1
246	brne	14b
247  ; for rounding
24815:	or	ZH, ZL
249  ; pack
250	lsl	rA2
251	adc	rA3, r1		; restore exponent for normal values
252	lsr	rA3
253	ror	rA2
254	bld	rA3, 7		; sign
255	ret
256ENDFUNC
257
258#endif /* !defined(__AVR_TINY__) */
259