1/* Copyright (C) 2008-2018 Free Software Foundation, Inc.
2   Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
3		on behalf of Synopsys Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26/* XMAC schedule: directly back-to-back multiplies stall; the third
27   instruction after a multiply stalls unless it is also a multiply.  */
28#include "arc-ieee-754.h"
29
30#if 0 /* DEBUG */
31	.global __muldf3
32	.balign 4
33__muldf3:
34	push_s blink
35	push_s r2
36	push_s r3
37	push_s r0
38	bl.d __muldf3_c
39	push_s r1
40	ld_s r2,[sp,12]
41	ld_s r3,[sp,8]
42	st_s r0,[sp,12]
43	st_s r1,[sp,8]
44	pop_s r1
45	bl.d __muldf3_asm
46	pop_s r0
47	pop_s r3
48	pop_s r2
49	pop_s blink
50	cmp r0,r2
51	cmp.eq r1,r3
52	jeq_s [blink]
53	b abort
54#define __muldf3 __muldf3_asm
55#endif /* DEBUG */
56/* N.B. This is optimized for ARC700.
57  ARC600 has very different scheduling / instruction selection criteria.  */
58/* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx  ,
59   we can do:
60   sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx  */
61
62__muldf3_support: /* This label makes debugger output saner.  */
63/* If one number is denormal, subtract some from the exponent of the other
64   one (if the other exponent is too small, return 0), and normalize the
65   denormal.  Then re-run the computation.  */
66	.balign 4
67	FUNC(__muldf3)
68.Ldenorm_dbl0:
69	mov_s r12,DBL0L
70	mov_s DBL0L,DBL1L
71	mov_s DBL1L,r12
72	mov_s r12,DBL0H
73	mov_s DBL0H,DBL1H
74	mov_s DBL1H,r12
75	and r11,DBL0H,r9
76.Ldenorm_dbl1:
77	brhs r11,r9,.Linf_nan
78	brhs 0x3ca00001,r11,.Lret0
79	sub_s DBL0H,DBL0H,DBL1H
80	bmsk_s DBL1H,DBL1H,30
81	add_s DBL0H,DBL0H,DBL1H
82	breq_s DBL1H,0,.Ldenorm_2
83	norm r12,DBL1H
84
85	sub_s r12,r12,10
86	asl r5,r12,20
87	asl_s DBL1H,DBL1H,r12
88	sub DBL0H,DBL0H,r5
89	neg r5,r12
90	lsr r6,DBL1L,r5
91	asl_s DBL1L,DBL1L,r12
92	b.d __muldf3
93	add_s DBL1H,DBL1H,r6
94
95	.balign 4
96.Linf_nan:
97	bclr r12,DBL1H,31
98	xor_s DBL1H,DBL1H,DBL0H
99	bclr_s DBL0H,DBL0H,31
100	max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
101	or.f 0,DBL0H,DBL0L
102	mov_s DBL0L,0
103	or.ne.f DBL1L,DBL1L,r12
104	not_s DBL0H,DBL0L ; inf * 0 -> NaN
105	mov.ne DBL0H,r8
106	tst_s DBL1H,DBL1H
107	j_s.d [blink]
108	bset.mi DBL0H,DBL0H,31
109
110.Lret0:	xor_s DBL0H,DBL0H,DBL1H
111	bclr DBL1H,DBL0H,31
112	xor_s DBL0H,DBL0H,DBL1H
113	j_s.d [blink]
114	mov_l DBL0L,0
115
116	.balign 4
117.Ldenorm_2:
118	breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output
119	norm.f r12,DBL1L
120
121	mov.mi r12,21
122	add.pl r12,r12,22
123	neg r11,r12
124	asl_s r12,r12,20
125	lsr.f DBL1H,DBL1L,r11
126	ror DBL1L,DBL1L,r11
127	sub_s DBL0H,DBL0H,r12
128	mov.eq DBL1H,DBL1L
129	sub_s DBL1L,DBL1L,DBL1H
130	/* Fall through.  */
131	.global __muldf3
132	.balign 4
133__muldf3:
134	ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
135	MPYHU r4,DBL0L,DBL1L
136	bmsk r6,DBL0H,19
137	bset r6,r6,20
138	mpyu r7,r6,DBL1L
139	and r11,DBL0H,r9
140	breq r11,0,.Ldenorm_dbl0
141	MPYHU r8,r6,DBL1L
142	bmsk r10,DBL1H,19
143	bset r10,r10,20
144	MPYHU r5,r10,DBL0L
145	add.f r4,r4,r7
146	and r12,DBL1H,r9
147	MPYHU r7,r6,r10
148	breq r12,0,.Ldenorm_dbl1
149	adc.f r5,r5,r8
150	mpyu r8,r10,DBL0L
151	breq r11,r9,.Linf_nan
152	breq r12,r9,.Linf_nan
153	mpyu r6,r6,r10
154	add.cs r7,r7,1
155	add.f r4,r4,r8
156	mpyu r10,DBL1L,DBL0L
157	bclr r8,r9,30 ; 0x3ff00000
158	adc.f r5,r5,r6
159	; XMAC write-back stall / std. mult stall is one cycle later
160	bclr r6,r9,20 ; 0x7fe00000
161	add.cs r7,r7,1 ; fraction product in r7:r5:r4
162	tst r10,r10
163	bset.ne r4,r4,0 ; put least significant word into sticky bit
164	lsr.f r10,r7,9
165	add_l r12,r12,r11 ; add exponents
166	rsub.eq r8,r8,r9 ; 0x40000000
167	sub r12,r12,r8 ; subtract bias + implicit 1
168	brhs.d r12,r6,.Linf_denorm
169	rsub r10,r10,12
170.Lshift_frac:
171	neg r8,r10
172	asl r6,r4,r10
173	lsr DBL0L,r4,r8
174	add.f 0,r6,r6
175	btst.eq DBL0L,0
176	cmp.eq r4,r4 ; round to nearest / round to even
177	asl r4,r5,r10
178	lsr r5,r5,r8
179	adc.f DBL0L,DBL0L,r4
180	xor.f 0,DBL0H,DBL1H
181	asl r7,r7,r10
182	add_s r12,r12,r5
183	adc DBL0H,r12,r7
184	j_s.d [blink]
185	bset.mi DBL0H,DBL0H,31
186
187/* We have checked for infinity / NaN input before, and transformed
188   denormalized inputs into normalized inputs.  Thus, the worst case
189   exponent overflows are:
190       1 +     1 - 0x400 == 0xc02 : maximum underflow
191   0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
192   N.B. 0x7e and 0x7f are also values for overflow.
193
194   If (r12 <= -54), we have an underflow to zero.  */
195	.balign 4
196.Linf_denorm:
197	brlo r12,0xc0000000,.Linf
198	asr r6,r12,20
199	mov_s r12,0
200	add.f r10,r10,r6
201	brgt r10,0,.Lshift_frac
202	beq_s .Lround_frac
203	add.f r10,r10,32
204.Lshift32_frac:
205	tst r4,r4
206	mov r4,r5
207	bset.ne r4,r4,1
208	mov r5,r7
209	mov r7,0
210	brge r10,1,.Lshift_frac
211	breq r10,0,.Lround_frac
212	add.f r10,r10,32
213	brgt r10,21,.Lshift32_frac
214	b_s .Lret0
215
216.Lround_frac:
217	add.f 0,r4,r4
218	btst.eq r5,0
219	mov_s DBL0L,r5
220	mov_s DBL0H,r7
221	adc.eq.f DBL0L,DBL0L,0
222	j_s.d [blink]
223
224	adc.eq DBL0H,DBL0H,0
225
226.Linf:	xor.f DBL1H,DBL1H,DBL0H
227	mov_s DBL0L,0
228	mov_s DBL0H,r9
229	j_s.d [blink]
230	bset.mi DBL0H,DBL0H,31
231	ENDFUNC(__muldf3)
232
233	.balign 4
234.L7ff00000:
235	.long 0x7ff00000
236