1/* Copyright (C) 2008-2019 Free Software Foundation, Inc.
2   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3		on behalf of Synopsys Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26#include "arc-ieee-754.h"
27#if 0 /* DEBUG */
28	.global __addsf3
29	FUNC(__addsf3)
30	.balign 4
31__addsf3:
32	push_s blink
33	push_s r1
34	bl.d __addsf3_c
35	push_s r0
36	ld_s r1,[sp,4]
37	st_s r0,[sp,4]
38	bl.d __addsf3_asm
39	pop_s r0
40	pop_s r1
41	pop_s blink
42	cmp r0,r1
43	jeq_s [blink]
44	bl abort
45	ENDFUNC(__addsf3)
46	.global __subsf3
47	FUNC(__subsf3)
48	.balign 4
49__subsf3:
50	push_s blink
51	push_s r1
52	bl.d __subsf3_c
53	push_s r0
54	ld_s r1,[sp,4]
55	st_s r0,[sp,4]
56	bl.d __subsf3_asm
57	pop_s r0
58	pop_s r1
59	pop_s blink
60	cmp r0,r1
61	jeq_s [blink]
62	bl abort
63	ENDFUNC(__subsf3)
64#define __addsf3 __addsf3_asm
65#define __subsf3 __subsf3_asm
66#endif /* DEBUG */
67/* N.B. This is optimized for ARC700.
68  ARC600 has very different scheduling / instruction selection criteria.  */
69
70/* inputs: r0, r1
71   output: r0
72   clobber: r1-r10, r12, flags  */
73
74	.balign 4
75	.global __addsf3
76	.global __subsf3
77	FUNC(__addsf3)
78	FUNC(__subsf3)
79	.long 0x7f800000 ; exponent mask
80__subsf3:
81	bxor_l r1,r1,31
82__addsf3:
83	ld r9,[pcl,-8]
84	bmsk r4,r0,30
85	xor r10,r0,r1
86	and r6,r1,r9
87	sub.f r12,r4,r6
88	asr_s r12,r12,23
89	blo .Ldbl1_gt
90	brhs r4,r9,.Linf_nan
91	brne r12,0,.Lsmall_shift
92	brge r10,0,.Ladd_same_exp ; r12 == 0
93/* After subtracting, we need to normalize; when shifting to place the
94  leading 1 into position for the implicit 1 and adding that to DBL0,
95  we increment the exponent.  Thus, we have to subtract one more than
96  the shift count from the exponent beforehand.  Iff the exponent drops thus
97  below zero (before adding in the fraction with the leading one), we have
98  generated a denormal number.  Denormal handling is basicallly reducing the
99  shift count so that we produce a zero exponent instead; FWIW, this way
100  the shift count can become zero (if we started out with exponent 1).
101  On the plus side, we don't need to check for denorm input, the result
102  of subtracing these looks just the same as denormals generated during
103  subtraction.  */
104	bmsk r7,r1,30
105	breq	r4,r7,.Lret0
106	sub.f r5,r4,r7
107	lsr r12,r4,23
108	neg.cs r5,r5
109	norm r3,r5
110	bmsk r2,r0,22
111	sub_s r3,r3,6
112	min r12,r12,r3
113	bic r1,r0,r2
114	sub_s r3,r12,1
115	asl_s r12,r12,23
116	asl r2,r5,r3
117	sub_s r1,r1,r12
118	add_s r0,r1,r2
119	j_s.d [blink]
120	bxor.cs r0,r0,31
121	.balign 4
122.Linf_nan:
123	; If both inputs are inf, but with different signs, the result is NaN.
124	asr r12,r10,31
125	or_s r1,r1,r12
126	j_s.d [blink]
127	or.eq r0,r0,r1
128	.balign 4
129.Ladd_same_exp:
130	/* This is a special case because we can't test for need to shift
131	   down by checking if bit 23 of DBL0 changes.  OTOH, here we know
132	   that we always need to shift down.  */
133	; adding the two floating point numbers together makes the sign
134	; cancel out and apear as carry; the exponent is doubled, and the
135	; fraction also in need of shifting left by one. The two implicit
136	; ones of the sources make an implicit 1 of the result, again
137	; non-existent in a place shifted by one.
138	add.f	r0,r0,r1
139	btst_s	r0,1
140	breq	r6,0,.Ldenorm_add
141	add.ne	r0,r0,1 ; round to even.
142	rrc	r0,r0
143	bmsk	r1,r9,23
144	add	r0,r0,r1 ; increment exponent
145	bic.f	0,r9,r0; check for overflow -> infinity.
146	jne_l	[blink]
147	mov_s	r0,r9
148	j_s.d	[blink]
149	bset.cs	r0,r0,31
150
151.Ldenorm_add:
152	j_s.d [blink]
153	add r0,r4,r1
154
155.Lret_dbl0:
156        j_s [blink]
157
158	.balign 4
159.Lsmall_shift:
160	brhi r12,25,.Lret_dbl0
161	breq.d r6,0,.Ldenorm_small_shift
162	bmsk_s r1,r1,22
163	bset_s r1,r1,23
164.Lfixed_denorm_small_shift:
165	neg r8,r12
166	asl r5,r1,r8
167	brge.d r10,0,.Ladd
168	lsr_l r1,r1,r12
169/* subtract, abs(DBL0) > abs(DBL1) */
170/* DBL0: original values
171   DBL1: fraction with explicit leading 1, shifted into place
172   r4:  orig. DBL0 & 0x7fffffff
173   r6:  orig. DBL1 & 0x7f800000
174   r9:  0x7f800000
175   r10: orig. DBL0H ^ DBL1H
176   r5 : guard bits */
177	.balign 4
178.Lsub:
179	neg.f r12,r5
180	bmsk r3,r0,22
181	bset r5,r3,23
182	sbc.f r4,r5,r1
183	beq.d .Large_cancel_sub
184	bic r7,r0,r3
185	norm r3,r4
186	bmsk r6,r7,30
187.Lsub_done:
188	sub_s r3,r3,6
189	breq r3,1,.Lsub_done_noshift
190	asl r5,r3,23
191	sub_l r3,r3,1
192	brlo r6,r5,.Ldenorm_sub
193	sub r0,r7,r5
194	neg_s r1,r3
195	lsr.f r2,r12,r1
196	asl_s r12,r12,r3
197	btst_s	r2,0
198	bmsk.eq.f r12,r12,30
199	asl r5,r4,r3
200	add_s r0,r0,r2
201	adc.ne r0,r0,0
202	j_s.d [blink]
203	add_l r0,r0,r5
204
205.Lret0:
206	j_s.d	[blink]
207	mov_l	r0,0
208
209	.balign 4
210.Ldenorm_small_shift:
211	brne.d	r12,1,.Lfixed_denorm_small_shift
212	sub_s	r12,r12,1
213	brlt.d	r10,0,.Lsub
214	mov_s	r5,r12 ; zero r5, and align following code
215.Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear.
216	bmsk	r2,r0,22
217	add_s	r2,r2,r1
218	bbit0.d	r2,23,.Lno_shiftdown
219	add_s	r0,r0,r1
220	bic.f	0,r9,r0; check for overflow -> infinity; eq : infinity
221	bmsk	r1,r2,22
222	lsr.ne.f r2,r2,2; cc: even ; hi: might round down
223	lsr.ne	r1,r1,1
224	rcmp.hi	r5,1; hi : round down
225	bclr.hi	r0,r0,0
226	j_l.d	[blink]
227	sub_s	r0,r0,r1
228
229/* r4: DBL0H & 0x7fffffff
230   r6: DBL1H & 0x7f800000
231   r9: 0x7f800000
232   r10: sign difference
233   r12: shift count (negative) */
234	.balign 4
235.Ldbl1_gt:
236	brhs r6,r9,.Lret_dbl1 ; inf or NaN
237	neg r8,r12
238	brhi r8,25,.Lret_dbl1
239.Lsmall_shift_dbl0:
240	breq.d r6,0,.Ldenorm_small_shift_dbl0
241	bmsk_s r0,r0,22
242	bset_s r0,r0,23
243.Lfixed_denorm_small_shift_dbl0:
244	asl r5,r0,r12
245	brge.d r10,0,.Ladd_dbl1_gt
246	lsr r0,r0,r8
247/* subtract, abs(DBL0) < abs(DBL1) */
248/* DBL0: fraction with explicit leading 1, shifted into place
249   DBL1: original value
250   r6:  orig. DBL1 & 0x7f800000
251   r9:  0x7f800000
252   r5: guard bits */
253	.balign 4
254.Lrsub:
255	neg.f r12,r5
256	bmsk r5,r1,22
257	bic r7,r1,r5
258	bset r5,r5,23
259	sbc.f r4,r5,r0
260	bne.d .Lsub_done ; note: r6 is already set up.
261	norm r3,r4
262	/* Fall through */
263
264/* r4:r12 : unnormalized result fraction
265   r7: result sign and exponent         */
266/* When seeing large cancellation, only the topmost guard bit might be set.  */
267	.balign 4
268.Large_cancel_sub:
269	breq_s	r12,0,.Lret0
270	sub	r0,r7,24<<23
271	xor.f	0,r0,r7 ; test if exponent is negative
272	tst.pl	r9,r0  ; test if exponent is zero
273	jpnz	[blink] ; return if non-denormal result
274	bmsk	r6,r7,30
275	lsr	r3,r6,23
276	xor	r0,r6,r7
277	sub_s	r3,r3,24-22
278	j_s.d	[blink]
279	bset	r0,r0,r3
280
281	; If a denorm is produced, we have an exact result -
282	; no need for rounding.
283	.balign 4
284.Ldenorm_sub:
285	sub r3,r6,1
286	lsr.f r3,r3,23
287	xor r0,r6,r7
288	neg_s r1,r3
289	asl.ne r4,r4,r3
290	lsr_s r12,r12,r1
291	add_s r0,r0,r4
292	j_s.d [blink]
293	add.ne r0,r0,r12
294
295	.balign 4
296.Lsub_done_noshift:
297	add.f 0,r12,r12
298	btst.eq r4,0
299	bclr r4,r4,23
300	add r0,r7,r4
301	j_s.d [blink]
302	adc.ne r0,r0,0
303
304	.balign 4
305.Lno_shiftdown:
306	add.f 0,r5,r5
307	btst.eq r0,0
308	cmp.eq r5,r5
309	j_s.d [blink]
310	add.cs r0,r0,1
311
312.Lret_dbl1:
313	j_s.d [blink]
314	mov_l r0,r1
315	.balign 4
316.Ldenorm_small_shift_dbl0:
317	sub.f r8,r8,1
318	bne.d .Lfixed_denorm_small_shift_dbl0
319	add_s r12,r12,1
320	brlt.d r10,0,.Lrsub
321	mov r5,0
322.Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear.
323	bmsk	r2,r1,22
324	add_s	r2,r2,r0
325	bbit0.d	r2,23,.Lno_shiftdown_dbl1_gt
326	add_s	r0,r1,r0
327	bic.f	0,r9,r0; check for overflow -> infinity; eq : infinity
328	bmsk	r1,r2,22
329	lsr.ne.f r2,r2,2; cc: even ; hi: might round down
330	lsr.ne	r1,r1,1
331	rcmp.hi	r5,1; hi : round down
332	bclr.hi	r0,r0,0
333	j_l.d	[blink]
334	sub_s	r0,r0,r1
335
336	.balign	4
337.Lno_shiftdown_dbl1_gt:
338	add.f	0,r5,r5
339	btst.eq	r0,0
340	cmp.eq	r5,r5
341	j_s.d	[blink]
342	add.cs	r0,r0,1
343	ENDFUNC(__addsf3)
344	ENDFUNC(__subsf3)
345