1/* IEEE-754 double-precision functions for Xtensa
2   Copyright (C) 2006-2021 Free Software Foundation, Inc.
3   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26#ifdef __XTENSA_EB__
27#define xh a2
28#define xl a3
29#define yh a4
30#define yl a5
31#else
32#define xh a3
33#define xl a2
34#define yh a5
35#define yl a4
36#endif
37
38/*  Warning!  The branch displacements for some Xtensa branch instructions
39    are quite small, and this code has been carefully laid out to keep
40    branch targets in range.  If you change anything, be sure to check that
41    the assembler is not relaxing anything to branch over a jump.  */
42
43#ifdef L_negdf2
44
45	.align	4
46	.global	__negdf2
47	.type	__negdf2, @function
48__negdf2:
49	leaf_entry sp, 16
50	movi	a4, 0x80000000
51	xor	xh, xh, a4
52	leaf_return
53
54#endif /* L_negdf2 */
55
56#ifdef L_addsubdf3
57
58	.literal_position
59	/* Addition */
60__adddf3_aux:
61
62	/* Handle NaNs and Infinities.  (This code is placed before the
63	   start of the function just to keep it in range of the limited
64	   branch displacements.)  */
65
66.Ladd_xnan_or_inf:
67	/* If y is neither Infinity nor NaN, return x.  */
68	bnall	yh, a6, .Ladd_return_nan_or_inf
69	/* If x is a NaN, return it.  Otherwise, return y.  */
70	slli	a7, xh, 12
71	or	a7, a7, xl
72	bnez	a7, .Ladd_return_nan
73
74.Ladd_ynan_or_inf:
75	/* Return y.  */
76	mov	xh, yh
77	mov	xl, yl
78
79.Ladd_return_nan_or_inf:
80	slli	a7, xh, 12
81	or	a7, a7, xl
82	bnez	a7, .Ladd_return_nan
83	leaf_return
84
85.Ladd_return_nan:
86	movi	a4, 0x80000	/* make it a quiet NaN */
87	or	xh, xh, a4
88	leaf_return
89
90.Ladd_opposite_signs:
91	/* Operand signs differ.  Do a subtraction.  */
92	slli	a7, a6, 11
93	xor	yh, yh, a7
94	j	.Lsub_same_sign
95
96	.align	4
97	.global	__adddf3
98	.type	__adddf3, @function
99__adddf3:
100	leaf_entry sp, 16
101	movi	a6, 0x7ff00000
102
103	/* Check if the two operands have the same sign.  */
104	xor	a7, xh, yh
105	bltz	a7, .Ladd_opposite_signs
106
107.Ladd_same_sign:
108	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
109	ball	xh, a6, .Ladd_xnan_or_inf
110	ball	yh, a6, .Ladd_ynan_or_inf
111
112	/* Compare the exponents.  The smaller operand will be shifted
113	   right by the exponent difference and added to the larger
114	   one.  */
115	extui	a7, xh, 20, 12
116	extui	a8, yh, 20, 12
117	bltu	a7, a8, .Ladd_shiftx
118
119.Ladd_shifty:
120	/* Check if the smaller (or equal) exponent is zero.  */
121	bnone	yh, a6, .Ladd_yexpzero
122
123	/* Replace yh sign/exponent with 0x001.  */
124	or	yh, yh, a6
125	slli	yh, yh, 11
126	srli	yh, yh, 11
127
128.Ladd_yexpdiff:
129	/* Compute the exponent difference.  Optimize for difference < 32.  */
130	sub	a10, a7, a8
131	bgeui	a10, 32, .Ladd_bigshifty
132
133	/* Shift yh/yl right by the exponent difference.  Any bits that are
134	   shifted out of yl are saved in a9 for rounding the result.  */
135	ssr	a10
136	movi	a9, 0
137	src	a9, yl, a9
138	src	yl, yh, yl
139	srl	yh, yh
140
141.Ladd_addy:
142	/* Do the 64-bit addition.  */
143	add	xl, xl, yl
144	add	xh, xh, yh
145	bgeu	xl, yl, 1f
146	addi	xh, xh, 1
1471:
148	/* Check if the add overflowed into the exponent.  */
149	extui	a10, xh, 20, 12
150	beq	a10, a7, .Ladd_round
151	mov	a8, a7
152	j	.Ladd_carry
153
154.Ladd_yexpzero:
155	/* y is a subnormal value.  Replace its sign/exponent with zero,
156	   i.e., no implicit "1.0", and increment the apparent exponent
157	   because subnormals behave as if they had the minimum (nonzero)
158	   exponent.  Test for the case when both exponents are zero.  */
159	slli	yh, yh, 12
160	srli	yh, yh, 12
161	bnone	xh, a6, .Ladd_bothexpzero
162	addi	a8, a8, 1
163	j	.Ladd_yexpdiff
164
165.Ladd_bothexpzero:
166	/* Both exponents are zero.  Handle this as a special case.  There
167	   is no need to shift or round, and the normal code for handling
168	   a carry into the exponent field will not work because it
169	   assumes there is an implicit "1.0" that needs to be added.  */
170	add	xl, xl, yl
171	add	xh, xh, yh
172	bgeu	xl, yl, 1f
173	addi	xh, xh, 1
1741:	leaf_return
175
176.Ladd_bigshifty:
177	/* Exponent difference > 64 -- just return the bigger value.  */
178	bgeui	a10, 64, 1b
179
180	/* Shift yh/yl right by the exponent difference.  Any bits that are
181	   shifted out are saved in a9 for rounding the result.  */
182	ssr	a10
183	sll	a11, yl		/* lost bits shifted out of yl */
184	src	a9, yh, yl
185	srl	yl, yh
186	movi	yh, 0
187	beqz	a11, .Ladd_addy
188	or	a9, a9, a10	/* any positive, nonzero value will work */
189	j	.Ladd_addy
190
191.Ladd_xexpzero:
192	/* Same as "yexpzero" except skip handling the case when both
193	   exponents are zero.  */
194	slli	xh, xh, 12
195	srli	xh, xh, 12
196	addi	a7, a7, 1
197	j	.Ladd_xexpdiff
198
199.Ladd_shiftx:
200	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
201	   because the exponent difference is always nonzero in this version,
202	   the shift sequence can use SLL and skip loading a constant zero.  */
203	bnone	xh, a6, .Ladd_xexpzero
204
205	or	xh, xh, a6
206	slli	xh, xh, 11
207	srli	xh, xh, 11
208
209.Ladd_xexpdiff:
210	sub	a10, a8, a7
211	bgeui	a10, 32, .Ladd_bigshiftx
212
213	ssr	a10
214	sll	a9, xl
215	src	xl, xh, xl
216	srl	xh, xh
217
218.Ladd_addx:
219	add	xl, xl, yl
220	add	xh, xh, yh
221	bgeu	xl, yl, 1f
222	addi	xh, xh, 1
2231:
224	/* Check if the add overflowed into the exponent.  */
225	extui	a10, xh, 20, 12
226	bne	a10, a8, .Ladd_carry
227
228.Ladd_round:
229	/* Round up if the leftover fraction is >= 1/2.  */
230	bgez	a9, 1f
231	addi	xl, xl, 1
232	beqz	xl, .Ladd_roundcarry
233
234	/* Check if the leftover fraction is exactly 1/2.  */
235	slli	a9, a9, 1
236	beqz	a9, .Ladd_exactlyhalf
2371:	leaf_return
238
239.Ladd_bigshiftx:
240	/* Mostly the same thing as "bigshifty"....  */
241	bgeui	a10, 64, .Ladd_returny
242
243	ssr	a10
244	sll	a11, xl
245	src	a9, xh, xl
246	srl	xl, xh
247	movi	xh, 0
248	beqz	a11, .Ladd_addx
249	or	a9, a9, a10
250	j	.Ladd_addx
251
252.Ladd_returny:
253	mov	xh, yh
254	mov	xl, yl
255	leaf_return
256
257.Ladd_carry:
258	/* The addition has overflowed into the exponent field, so the
259	   value needs to be renormalized.  The mantissa of the result
260	   can be recovered by subtracting the original exponent and
261	   adding 0x100000 (which is the explicit "1.0" for the
262	   mantissa of the non-shifted operand -- the "1.0" for the
263	   shifted operand was already added).  The mantissa can then
264	   be shifted right by one bit.  The explicit "1.0" of the
265	   shifted mantissa then needs to be replaced by the exponent,
266	   incremented by one to account for the normalizing shift.
267	   It is faster to combine these operations: do the shift first
268	   and combine the additions and subtractions.  If x is the
269	   original exponent, the result is:
270	       shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
271	   or:
272	       shifted mantissa + ((x + 1) << 19)
273	   Note that the exponent is incremented here by leaving the
274	   explicit "1.0" of the mantissa in the exponent field.  */
275
276	/* Shift xh/xl right by one bit.  Save the lsb of xl.  */
277	mov	a10, xl
278	ssai	1
279	src	xl, xh, xl
280	srl	xh, xh
281
282	/* See explanation above.  The original exponent is in a8.  */
283	addi	a8, a8, 1
284	slli	a8, a8, 19
285	add	xh, xh, a8
286
287	/* Return an Infinity if the exponent overflowed.  */
288	ball	xh, a6, .Ladd_infinity
289
290	/* Same thing as the "round" code except the msb of the leftover
291	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
292	bbci.l	a10, 0, 1f
293	addi	xl, xl, 1
294	beqz	xl, .Ladd_roundcarry
295	beqz	a9, .Ladd_exactlyhalf
2961:	leaf_return
297
298.Ladd_infinity:
299	/* Clear the mantissa.  */
300	movi	xl, 0
301	srli	xh, xh, 20
302	slli	xh, xh, 20
303
304	/* The sign bit may have been lost in a carry-out.  Put it back.  */
305	slli	a8, a8, 1
306	or	xh, xh, a8
307	leaf_return
308
309.Ladd_exactlyhalf:
310	/* Round down to the nearest even value.  */
311	srli	xl, xl, 1
312	slli	xl, xl, 1
313	leaf_return
314
315.Ladd_roundcarry:
316	/* xl is always zero when the rounding increment overflows, so
317	   there's no need to round it to an even value.  */
318	addi	xh, xh, 1
319	/* Overflow to the exponent is OK.  */
320	leaf_return
321
322
323	/* Subtraction */
324__subdf3_aux:
325
326	/* Handle NaNs and Infinities.  (This code is placed before the
327	   start of the function just to keep it in range of the limited
328	   branch displacements.)  */
329
330.Lsub_xnan_or_inf:
331	/* If y is neither Infinity nor NaN, return x.  */
332	bnall	yh, a6, .Lsub_return_nan_or_inf
333
334.Lsub_return_nan:
335	/* Both x and y are either NaN or Inf, so the result is NaN.  */
336	movi	a4, 0x80000	/* make it a quiet NaN */
337	or	xh, xh, a4
338	leaf_return
339
340.Lsub_ynan_or_inf:
341	/* Negate y and return it.  */
342	slli	a7, a6, 11
343	xor	xh, yh, a7
344	mov	xl, yl
345
346.Lsub_return_nan_or_inf:
347	slli	a7, xh, 12
348	or	a7, a7, xl
349	bnez	a7, .Lsub_return_nan
350	leaf_return
351
352.Lsub_opposite_signs:
353	/* Operand signs differ.  Do an addition.  */
354	slli	a7, a6, 11
355	xor	yh, yh, a7
356	j	.Ladd_same_sign
357
358	.align	4
359	.global	__subdf3
360	.type	__subdf3, @function
361__subdf3:
362	leaf_entry sp, 16
363	movi	a6, 0x7ff00000
364
365	/* Check if the two operands have the same sign.  */
366	xor	a7, xh, yh
367	bltz	a7, .Lsub_opposite_signs
368
369.Lsub_same_sign:
370	/* Check if either exponent == 0x7ff (i.e., NaN or Infinity).  */
371	ball	xh, a6, .Lsub_xnan_or_inf
372	ball	yh, a6, .Lsub_ynan_or_inf
373
374	/* Compare the operands.  In contrast to addition, the entire
375	   value matters here.  */
376	extui	a7, xh, 20, 11
377	extui	a8, yh, 20, 11
378	bltu	xh, yh, .Lsub_xsmaller
379	beq	xh, yh, .Lsub_compare_low
380
381.Lsub_ysmaller:
382	/* Check if the smaller (or equal) exponent is zero.  */
383	bnone	yh, a6, .Lsub_yexpzero
384
385	/* Replace yh sign/exponent with 0x001.  */
386	or	yh, yh, a6
387	slli	yh, yh, 11
388	srli	yh, yh, 11
389
390.Lsub_yexpdiff:
391	/* Compute the exponent difference.  Optimize for difference < 32.  */
392	sub	a10, a7, a8
393	bgeui	a10, 32, .Lsub_bigshifty
394
395	/* Shift yh/yl right by the exponent difference.  Any bits that are
396	   shifted out of yl are saved in a9 for rounding the result.  */
397	ssr	a10
398	movi	a9, 0
399	src	a9, yl, a9
400	src	yl, yh, yl
401	srl	yh, yh
402
403.Lsub_suby:
404	/* Do the 64-bit subtraction.  */
405	sub	xh, xh, yh
406	bgeu	xl, yl, 1f
407	addi	xh, xh, -1
4081:	sub	xl, xl, yl
409
410	/* Subtract the leftover bits in a9 from zero and propagate any
411	   borrow from xh/xl.  */
412	neg	a9, a9
413	beqz	a9, 1f
414	addi	a5, xh, -1
415	moveqz	xh, a5, xl
416	addi	xl, xl, -1
4171:
418	/* Check if the subtract underflowed into the exponent.  */
419	extui	a10, xh, 20, 11
420	beq	a10, a7, .Lsub_round
421	j	.Lsub_borrow
422
423.Lsub_compare_low:
424	/* The high words are equal.  Compare the low words.  */
425	bltu	xl, yl, .Lsub_xsmaller
426	bltu	yl, xl, .Lsub_ysmaller
427	/* The operands are equal.  Return 0.0.  */
428	movi	xh, 0
429	movi	xl, 0
4301:	leaf_return
431
432.Lsub_yexpzero:
433	/* y is a subnormal value.  Replace its sign/exponent with zero,
434	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
435	   y's apparent exponent because subnormals behave as if they had
436	   the minimum (nonzero) exponent.  */
437	slli	yh, yh, 12
438	srli	yh, yh, 12
439	bnone	xh, a6, .Lsub_yexpdiff
440	addi	a8, a8, 1
441	j	.Lsub_yexpdiff
442
443.Lsub_bigshifty:
444	/* Exponent difference > 64 -- just return the bigger value.  */
445	bgeui	a10, 64, 1b
446
447	/* Shift yh/yl right by the exponent difference.  Any bits that are
448	   shifted out are saved in a9 for rounding the result.  */
449	ssr	a10
450	sll	a11, yl		/* lost bits shifted out of yl */
451	src	a9, yh, yl
452	srl	yl, yh
453	movi	yh, 0
454	beqz	a11, .Lsub_suby
455	or	a9, a9, a10	/* any positive, nonzero value will work */
456	j	.Lsub_suby
457
458.Lsub_xsmaller:
459	/* Same thing as the "ysmaller" code, but with x and y swapped and
460	   with y negated.  */
461	bnone	xh, a6, .Lsub_xexpzero
462
463	or	xh, xh, a6
464	slli	xh, xh, 11
465	srli	xh, xh, 11
466
467.Lsub_xexpdiff:
468	sub	a10, a8, a7
469	bgeui	a10, 32, .Lsub_bigshiftx
470
471	ssr	a10
472	movi	a9, 0
473	src	a9, xl, a9
474	src	xl, xh, xl
475	srl	xh, xh
476
477	/* Negate y.  */
478	slli	a11, a6, 11
479	xor	yh, yh, a11
480
481.Lsub_subx:
482	sub	xl, yl, xl
483	sub	xh, yh, xh
484	bgeu	yl, xl, 1f
485	addi	xh, xh, -1
4861:
487	/* Subtract the leftover bits in a9 from zero and propagate any
488	   borrow from xh/xl.  */
489	neg	a9, a9
490	beqz	a9, 1f
491	addi	a5, xh, -1
492	moveqz	xh, a5, xl
493	addi	xl, xl, -1
4941:
495	/* Check if the subtract underflowed into the exponent.  */
496	extui	a10, xh, 20, 11
497	bne	a10, a8, .Lsub_borrow
498
499.Lsub_round:
500	/* Round up if the leftover fraction is >= 1/2.  */
501	bgez	a9, 1f
502	addi	xl, xl, 1
503	beqz	xl, .Lsub_roundcarry
504
505	/* Check if the leftover fraction is exactly 1/2.  */
506	slli	a9, a9, 1
507	beqz	a9, .Lsub_exactlyhalf
5081:	leaf_return
509
510.Lsub_xexpzero:
511	/* Same as "yexpzero".  */
512	slli	xh, xh, 12
513	srli	xh, xh, 12
514	bnone	yh, a6, .Lsub_xexpdiff
515	addi	a7, a7, 1
516	j	.Lsub_xexpdiff
517
518.Lsub_bigshiftx:
519	/* Mostly the same thing as "bigshifty", but with the sign bit of the
520	   shifted value set so that the subsequent subtraction flips the
521	   sign of y.  */
522	bgeui	a10, 64, .Lsub_returny
523
524	ssr	a10
525	sll	a11, xl
526	src	a9, xh, xl
527	srl	xl, xh
528	slli	xh, a6, 11	/* set sign bit of xh */
529	beqz	a11, .Lsub_subx
530	or	a9, a9, a10
531	j	.Lsub_subx
532
533.Lsub_returny:
534	/* Negate and return y.  */
535	slli	a7, a6, 11
536	xor	xh, yh, a7
537	mov	xl, yl
538	leaf_return
539
540.Lsub_borrow:
541	/* The subtraction has underflowed into the exponent field, so the
542	   value needs to be renormalized.  Shift the mantissa left as
543	   needed to remove any leading zeros and adjust the exponent
544	   accordingly.  If the exponent is not large enough to remove
545	   all the leading zeros, the result will be a subnormal value.  */
546
547	slli	a8, xh, 12
548	beqz	a8, .Lsub_xhzero
549	do_nsau	a6, a8, a7, a11
550	srli	a8, a8, 12
551	bge	a6, a10, .Lsub_subnormal
552	addi	a6, a6, 1
553
554.Lsub_shift_lt32:
555	/* Shift the mantissa (a8/xl/a9) left by a6.  */
556	ssl	a6
557	src	a8, a8, xl
558	src	xl, xl, a9
559	sll	a9, a9
560
561	/* Combine the shifted mantissa with the sign and exponent,
562	   decrementing the exponent by a6.  (The exponent has already
563	   been decremented by one due to the borrow from the subtraction,
564	   but adding the mantissa will increment the exponent by one.)  */
565	srli	xh, xh, 20
566	sub	xh, xh, a6
567	slli	xh, xh, 20
568	add	xh, xh, a8
569	j	.Lsub_round
570
571.Lsub_exactlyhalf:
572	/* Round down to the nearest even value.  */
573	srli	xl, xl, 1
574	slli	xl, xl, 1
575	leaf_return
576
577.Lsub_roundcarry:
578	/* xl is always zero when the rounding increment overflows, so
579	   there's no need to round it to an even value.  */
580	addi	xh, xh, 1
581	/* Overflow to the exponent is OK.  */
582	leaf_return
583
584.Lsub_xhzero:
585	/* When normalizing the result, all the mantissa bits in the high
586	   word are zero.  Shift by "20 + (leading zero count of xl) + 1".  */
587	do_nsau	a6, xl, a7, a11
588	addi	a6, a6, 21
589	blt	a10, a6, .Lsub_subnormal
590
591.Lsub_normalize_shift:
592	bltui	a6, 32, .Lsub_shift_lt32
593
594	ssl	a6
595	src	a8, xl, a9
596	sll	xl, a9
597	movi	a9, 0
598
599	srli	xh, xh, 20
600	sub	xh, xh, a6
601	slli	xh, xh, 20
602	add	xh, xh, a8
603	j	.Lsub_round
604
605.Lsub_subnormal:
606	/* The exponent is too small to shift away all the leading zeros.
607	   Set a6 to the current exponent (which has already been
608	   decremented by the borrow) so that the exponent of the result
609	   will be zero.  Do not add 1 to a6 in this case, because: (1)
610	   adding the mantissa will not increment the exponent, so there is
611	   no need to subtract anything extra from the exponent to
612	   compensate, and (2) the effective exponent of a subnormal is 1
613	   not 0 so the shift amount must be 1 smaller than normal. */
614	mov	a6, a10
615	j	.Lsub_normalize_shift
616
617#endif /* L_addsubdf3 */
618
619#ifdef L_muldf3
620
621	/* Multiplication */
622#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
623#define XCHAL_NO_MUL 1
624#endif
625
626	.literal_position
627__muldf3_aux:
628
629	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
630	   (This code is placed before the start of the function just to
631	   keep it in range of the limited branch displacements.)  */
632
633.Lmul_xexpzero:
634	/* Clear the sign bit of x.  */
635	slli	xh, xh, 1
636	srli	xh, xh, 1
637
638	/* If x is zero, return zero.  */
639	or	a10, xh, xl
640	beqz	a10, .Lmul_return_zero
641
642	/* Normalize x.  Adjust the exponent in a8.  */
643	beqz	xh, .Lmul_xh_zero
644	do_nsau	a10, xh, a11, a12
645	addi	a10, a10, -11
646	ssl	a10
647	src	xh, xh, xl
648	sll	xl, xl
649	movi	a8, 1
650	sub	a8, a8, a10
651	j	.Lmul_xnormalized
652.Lmul_xh_zero:
653	do_nsau	a10, xl, a11, a12
654	addi	a10, a10, -11
655	movi	a8, -31
656	sub	a8, a8, a10
657	ssl	a10
658	bltz	a10, .Lmul_xl_srl
659	sll	xh, xl
660	movi	xl, 0
661	j	.Lmul_xnormalized
662.Lmul_xl_srl:
663	srl	xh, xl
664	sll	xl, xl
665	j	.Lmul_xnormalized
666
667.Lmul_yexpzero:
668	/* Clear the sign bit of y.  */
669	slli	yh, yh, 1
670	srli	yh, yh, 1
671
672	/* If y is zero, return zero.  */
673	or	a10, yh, yl
674	beqz	a10, .Lmul_return_zero
675
676	/* Normalize y.  Adjust the exponent in a9.  */
677	beqz	yh, .Lmul_yh_zero
678	do_nsau	a10, yh, a11, a12
679	addi	a10, a10, -11
680	ssl	a10
681	src	yh, yh, yl
682	sll	yl, yl
683	movi	a9, 1
684	sub	a9, a9, a10
685	j	.Lmul_ynormalized
686.Lmul_yh_zero:
687	do_nsau	a10, yl, a11, a12
688	addi	a10, a10, -11
689	movi	a9, -31
690	sub	a9, a9, a10
691	ssl	a10
692	bltz	a10, .Lmul_yl_srl
693	sll	yh, yl
694	movi	yl, 0
695	j	.Lmul_ynormalized
696.Lmul_yl_srl:
697	srl	yh, yl
698	sll	yl, yl
699	j	.Lmul_ynormalized
700
701.Lmul_return_zero:
702	/* Return zero with the appropriate sign bit.  */
703	srli	xh, a7, 31
704	slli	xh, xh, 31
705	movi	xl, 0
706	j	.Lmul_done
707
708.Lmul_xnan_or_inf:
709	/* If y is zero, return NaN.  */
710	bnez	yl, 1f
711	slli	a8, yh, 1
712	beqz	a8, .Lmul_return_nan
7131:
714	/* If y is NaN, return y.  */
715	bnall	yh, a6, .Lmul_returnx
716	slli	a8, yh, 12
717	or	a8, a8, yl
718	beqz	a8, .Lmul_returnx
719
720.Lmul_returny:
721	mov	xh, yh
722	mov	xl, yl
723
724.Lmul_returnx:
725	slli	a8, xh, 12
726	or	a8, a8, xl
727	bnez	a8, .Lmul_return_nan
728	/* Set the sign bit and return.  */
729	extui	a7, a7, 31, 1
730	slli	xh, xh, 1
731	ssai	1
732	src	xh, a7, xh
733	j	.Lmul_done
734
735.Lmul_ynan_or_inf:
736	/* If x is zero, return NaN.  */
737	bnez	xl, .Lmul_returny
738	slli	a8, xh, 1
739	bnez	a8, .Lmul_returny
740	mov	xh, yh
741
742.Lmul_return_nan:
743	movi	a4, 0x80000	/* make it a quiet NaN */
744	or	xh, xh, a4
745	j	.Lmul_done
746
747	.align	4
748	.global	__muldf3
749	.type	__muldf3, @function
750__muldf3:
751#if __XTENSA_CALL0_ABI__
752	leaf_entry sp, 32
753	addi	sp, sp, -32
754	s32i	a12, sp, 16
755	s32i	a13, sp, 20
756	s32i	a14, sp, 24
757	s32i	a15, sp, 28
758#elif XCHAL_NO_MUL
759	/* This is not really a leaf function; allocate enough stack space
760	   to allow CALL12s to a helper function.  */
761	leaf_entry sp, 64
762#else
763	leaf_entry sp, 32
764#endif
765	movi	a6, 0x7ff00000
766
767	/* Get the sign of the result.  */
768	xor	a7, xh, yh
769
770	/* Check for NaN and infinity.  */
771	ball	xh, a6, .Lmul_xnan_or_inf
772	ball	yh, a6, .Lmul_ynan_or_inf
773
774	/* Extract the exponents.  */
775	extui	a8, xh, 20, 11
776	extui	a9, yh, 20, 11
777
778	beqz	a8, .Lmul_xexpzero
779.Lmul_xnormalized:
780	beqz	a9, .Lmul_yexpzero
781.Lmul_ynormalized:
782
783	/* Add the exponents.  */
784	add	a8, a8, a9
785
786	/* Replace sign/exponent fields with explicit "1.0".  */
787	movi	a10, 0x1fffff
788	or	xh, xh, a6
789	and	xh, xh, a10
790	or	yh, yh, a6
791	and	yh, yh, a10
792
793	/* Multiply 64x64 to 128 bits.  The result ends up in xh/xl/a6.
794	   The least-significant word of the result is thrown away except
795	   that if it is nonzero, the lsb of a6 is set to 1.  */
796#if XCHAL_HAVE_MUL32_HIGH
797
798	/* Compute a6 with any carry-outs in a10.  */
799	movi	a10, 0
800	mull	a6, xl, yh
801	mull	a11, xh, yl
802	add	a6, a6, a11
803	bgeu	a6, a11, 1f
804	addi	a10, a10, 1
8051:
806	muluh	a11, xl, yl
807	add	a6, a6, a11
808	bgeu	a6, a11, 1f
809	addi	a10, a10, 1
8101:
811	/* If the low word of the result is nonzero, set the lsb of a6.  */
812	mull	a11, xl, yl
813	beqz	a11, 1f
814	movi	a9, 1
815	or	a6, a6, a9
8161:
817	/* Compute xl with any carry-outs in a9.  */
818	movi	a9, 0
819	mull	a11, xh, yh
820	add	a10, a10, a11
821	bgeu	a10, a11, 1f
822	addi	a9, a9, 1
8231:
824	muluh	a11, xh, yl
825	add	a10, a10, a11
826	bgeu	a10, a11, 1f
827	addi	a9, a9, 1
8281:
829	muluh	xl, xl, yh
830	add	xl, xl, a10
831	bgeu	xl, a10, 1f
832	addi	a9, a9, 1
8331:
834	/* Compute xh.  */
835	muluh	xh, xh, yh
836	add	xh, xh, a9
837
838#else /* ! XCHAL_HAVE_MUL32_HIGH */
839
840	/* Break the inputs into 16-bit chunks and compute 16 32-bit partial
841	   products.  These partial products are:
842
843		0 xll * yll
844
845		1 xll * ylh
846		2 xlh * yll
847
848		3 xll * yhl
849		4 xlh * ylh
850		5 xhl * yll
851
852		6 xll * yhh
853		7 xlh * yhl
854		8 xhl * ylh
855		9 xhh * yll
856
857		10 xlh * yhh
858		11 xhl * yhl
859		12 xhh * ylh
860
861		13 xhl * yhh
862		14 xhh * yhl
863
864		15 xhh * yhh
865
866	   where the input chunks are (hh, hl, lh, ll).  If using the Mul16
867	   or Mul32 multiplier options, these input chunks must be stored in
868	   separate registers.  For Mac16, the UMUL.AA.* opcodes can specify
869	   that the inputs come from either half of the registers, so there
870	   is no need to shift them out ahead of time.  If there is no
871	   multiply hardware, the 16-bit chunks can be extracted when setting
872	   up the arguments to the separate multiply function.  */
873
874	/* Save a7 since it is needed to hold a temporary value.  */
875	s32i	a7, sp, 4
876#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
877	/* Calling a separate multiply function will clobber a0 and requires
878	   use of a8 as a temporary, so save those values now.  (The function
879	   uses a custom ABI so nothing else needs to be saved.)  */
880	s32i	a0, sp, 0
881	s32i	a8, sp, 8
882#endif
883
884#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
885
886#define xlh a12
887#define ylh a13
888#define xhh a14
889#define yhh a15
890
891	/* Get the high halves of the inputs into registers.  */
892	srli	xlh, xl, 16
893	srli	ylh, yl, 16
894	srli	xhh, xh, 16
895	srli	yhh, yh, 16
896
897#define xll xl
898#define yll yl
899#define xhl xh
900#define yhl yh
901
902#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
903	/* Clear the high halves of the inputs.  This does not matter
904	   for MUL16 because the high bits are ignored.  */
905	extui	xl, xl, 0, 16
906	extui	xh, xh, 0, 16
907	extui	yl, yl, 0, 16
908	extui	yh, yh, 0, 16
909#endif
910#endif /* MUL16 || MUL32 */
911
912
913#if XCHAL_HAVE_MUL16
914
915#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
916	mul16u	dst, xreg ## xhalf, yreg ## yhalf
917
918#elif XCHAL_HAVE_MUL32
919
920#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
921	mull	dst, xreg ## xhalf, yreg ## yhalf
922
923#elif XCHAL_HAVE_MAC16
924
925/* The preprocessor insists on inserting a space when concatenating after
926   a period in the definition of do_mul below.  These macros are a workaround
927   using underscores instead of periods when doing the concatenation.  */
928#define umul_aa_ll umul.aa.ll
929#define umul_aa_lh umul.aa.lh
930#define umul_aa_hl umul.aa.hl
931#define umul_aa_hh umul.aa.hh
932
933#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
934	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
935	rsr	dst, ACCLO
936
937#else /* no multiply hardware */
938
939#define set_arg_l(dst, src) \
940	extui	dst, src, 0, 16
941#define set_arg_h(dst, src) \
942	srli	dst, src, 16
943
944#if __XTENSA_CALL0_ABI__
945#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
946	set_arg_ ## xhalf (a13, xreg); \
947	set_arg_ ## yhalf (a14, yreg); \
948	call0	.Lmul_mulsi3; \
949	mov	dst, a12
950#else
951#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
952	set_arg_ ## xhalf (a14, xreg); \
953	set_arg_ ## yhalf (a15, yreg); \
954	call12	.Lmul_mulsi3; \
955	mov	dst, a14
956#endif /* __XTENSA_CALL0_ABI__ */
957
958#endif /* no multiply hardware */
959
960	/* Add pp1 and pp2 into a10 with carry-out in a9.  */
961	do_mul(a10, xl, l, yl, h)	/* pp 1 */
962	do_mul(a11, xl, h, yl, l)	/* pp 2 */
963	movi	a9, 0
964	add	a10, a10, a11
965	bgeu	a10, a11, 1f
966	addi	a9, a9, 1
9671:
968	/* Initialize a6 with a9/a10 shifted into position.  Note that
969	   this value can be safely incremented without any carry-outs.  */
970	ssai	16
971	src	a6, a9, a10
972
973	/* Compute the low word into a10.  */
974	do_mul(a11, xl, l, yl, l)	/* pp 0 */
975	sll	a10, a10
976	add	a10, a10, a11
977	bgeu	a10, a11, 1f
978	addi	a6, a6, 1
9791:
980	/* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
981	   This is good enough to determine the low half of a6, so that any
982	   nonzero bits from the low word of the result can be collapsed
983	   into a6, freeing up a register.  */
984	movi	a9, 0
985	do_mul(a11, xl, l, yh, l)	/* pp 3 */
986	add	a6, a6, a11
987	bgeu	a6, a11, 1f
988	addi	a9, a9, 1
9891:
990	do_mul(a11, xl, h, yl, h)	/* pp 4 */
991	add	a6, a6, a11
992	bgeu	a6, a11, 1f
993	addi	a9, a9, 1
9941:
995	do_mul(a11, xh, l, yl, l)	/* pp 5 */
996	add	a6, a6, a11
997	bgeu	a6, a11, 1f
998	addi	a9, a9, 1
9991:
1000	/* Collapse any nonzero bits from the low word into a6.  */
1001	beqz	a10, 1f
1002	movi	a11, 1
1003	or	a6, a6, a11
10041:
1005	/* Add pp6-9 into a11 with carry-outs in a10.  */
1006	do_mul(a7, xl, l, yh, h)	/* pp 6 */
1007	do_mul(a11, xh, h, yl, l)	/* pp 9 */
1008	movi	a10, 0
1009	add	a11, a11, a7
1010	bgeu	a11, a7, 1f
1011	addi	a10, a10, 1
10121:
1013	do_mul(a7, xl, h, yh, l)	/* pp 7 */
1014	add	a11, a11, a7
1015	bgeu	a11, a7, 1f
1016	addi	a10, a10, 1
10171:
1018	do_mul(a7, xh, l, yl, h)	/* pp 8 */
1019	add	a11, a11, a7
1020	bgeu	a11, a7, 1f
1021	addi	a10, a10, 1
10221:
1023	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
1024	src	a10, a10, a11
1025	add	a10, a10, a9
1026	sll	a11, a11
1027	add	a6, a6, a11
1028	bgeu	a6, a11, 1f
1029	addi	a10, a10, 1
10301:
1031	/* Add pp10-12 into xl with carry-outs in a9.  */
1032	movi	a9, 0
1033	do_mul(xl, xl, h, yh, h)	/* pp 10 */
1034	add	xl, xl, a10
1035	bgeu	xl, a10, 1f
1036	addi	a9, a9, 1
10371:
1038	do_mul(a10, xh, l, yh, l)	/* pp 11 */
1039	add	xl, xl, a10
1040	bgeu	xl, a10, 1f
1041	addi	a9, a9, 1
10421:
1043	do_mul(a10, xh, h, yl, h)	/* pp 12 */
1044	add	xl, xl, a10
1045	bgeu	xl, a10, 1f
1046	addi	a9, a9, 1
10471:
1048	/* Add pp13-14 into a11 with carry-outs in a10.  */
1049	do_mul(a11, xh, l, yh, h)	/* pp 13 */
1050	do_mul(a7, xh, h, yh, l)	/* pp 14 */
1051	movi	a10, 0
1052	add	a11, a11, a7
1053	bgeu	a11, a7, 1f
1054	addi	a10, a10, 1
10551:
1056	/* Shift a10/a11 into position, and add low half of a11 to a6.  */
1057	src	a10, a10, a11
1058	add	a10, a10, a9
1059	sll	a11, a11
1060	add	xl, xl, a11
1061	bgeu	xl, a11, 1f
1062	addi	a10, a10, 1
10631:
1064	/* Compute xh.  */
1065	do_mul(xh, xh, h, yh, h)	/* pp 15 */
1066	add	xh, xh, a10
1067
1068	/* Restore values saved on the stack during the multiplication.  */
1069	l32i	a7, sp, 4
1070#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1071	l32i	a0, sp, 0
1072	l32i	a8, sp, 8
1073#endif
1074#endif /* ! XCHAL_HAVE_MUL32_HIGH */
1075
1076	/* Shift left by 12 bits, unless there was a carry-out from the
1077	   multiply, in which case, shift by 11 bits and increment the
1078	   exponent.  Note: It is convenient to use the constant 0x3ff
1079	   instead of 0x400 when removing the extra exponent bias (so that
1080	   it is easy to construct 0x7fe for the overflow check).  Reverse
1081	   the logic here to decrement the exponent sum by one unless there
1082	   was a carry-out.  */
1083	movi	a4, 11
1084	srli	a5, xh, 21 - 12
1085	bnez	a5, 1f
1086	addi	a4, a4, 1
1087	addi	a8, a8, -1
10881:	ssl	a4
1089	src	xh, xh, xl
1090	src	xl, xl, a6
1091	sll	a6, a6
1092
1093	/* Subtract the extra bias from the exponent sum (plus one to account
1094	   for the explicit "1.0" of the mantissa that will be added to the
1095	   exponent in the final result).  */
1096	movi	a4, 0x3ff
1097	sub	a8, a8, a4
1098
1099	/* Check for over/underflow.  The value in a8 is one less than the
1100	   final exponent, so values in the range 0..7fd are OK here.  */
1101	slli	a4, a4, 1	/* 0x7fe */
1102	bgeu	a8, a4, .Lmul_overflow
1103
1104.Lmul_round:
1105	/* Round.  */
1106	bgez	a6, .Lmul_rounded
1107	addi	xl, xl, 1
1108	beqz	xl, .Lmul_roundcarry
1109	slli	a6, a6, 1
1110	beqz	a6, .Lmul_exactlyhalf
1111
1112.Lmul_rounded:
1113	/* Add the exponent to the mantissa.  */
1114	slli	a8, a8, 20
1115	add	xh, xh, a8
1116
1117.Lmul_addsign:
1118	/* Add the sign bit.  */
1119	srli	a7, a7, 31
1120	slli	a7, a7, 31
1121	or	xh, xh, a7
1122
1123.Lmul_done:
1124#if __XTENSA_CALL0_ABI__
1125	l32i	a12, sp, 16
1126	l32i	a13, sp, 20
1127	l32i	a14, sp, 24
1128	l32i	a15, sp, 28
1129	addi	sp, sp, 32
1130#endif
1131	leaf_return
1132
1133.Lmul_exactlyhalf:
1134	/* Round down to the nearest even value.  */
1135	srli	xl, xl, 1
1136	slli	xl, xl, 1
1137	j	.Lmul_rounded
1138
1139.Lmul_roundcarry:
1140	/* xl is always zero when the rounding increment overflows, so
1141	   there's no need to round it to an even value.  */
1142	addi	xh, xh, 1
1143	/* Overflow is OK -- it will be added to the exponent.  */
1144	j	.Lmul_rounded
1145
1146.Lmul_overflow:
1147	bltz	a8, .Lmul_underflow
1148	/* Return +/- Infinity.  */
1149	addi	a8, a4, 1	/* 0x7ff */
1150	slli	xh, a8, 20
1151	movi	xl, 0
1152	j	.Lmul_addsign
1153
1154.Lmul_underflow:
1155	/* Create a subnormal value, where the exponent field contains zero,
1156	   but the effective exponent is 1.  The value of a8 is one less than
1157	   the actual exponent, so just negate it to get the shift amount.  */
1158	neg	a8, a8
1159	mov	a9, a6
1160	ssr	a8
1161	bgeui	a8, 32, .Lmul_bigshift
1162
1163	/* Shift xh/xl right.  Any bits that are shifted out of xl are saved
1164	   in a6 (combined with the shifted-out bits currently in a6) for
1165	   rounding the result.  */
1166	sll	a6, xl
1167	src	xl, xh, xl
1168	srl	xh, xh
1169	j	1f
1170
1171.Lmul_bigshift:
1172	bgeui	a8, 64, .Lmul_flush_to_zero
1173	sll	a10, xl		/* lost bits shifted out of xl */
1174	src	a6, xh, xl
1175	srl	xl, xh
1176	movi	xh, 0
1177	or	a9, a9, a10
1178
1179	/* Set the exponent to zero.  */
11801:	movi	a8, 0
1181
1182	/* Pack any nonzero bits shifted out into a6.  */
1183	beqz	a9, .Lmul_round
1184	movi	a9, 1
1185	or	a6, a6, a9
1186	j	.Lmul_round
1187
1188.Lmul_flush_to_zero:
1189	/* Return zero with the appropriate sign bit.  */
1190	srli	xh, a7, 31
1191	slli	xh, xh, 31
1192	movi	xl, 0
1193	j	.Lmul_done
1194
1195#if XCHAL_NO_MUL
1196
1197	/* For Xtensa processors with no multiply hardware, this simplified
1198	   version of _mulsi3 is used for multiplying 16-bit chunks of
1199	   the floating-point mantissas.  When using CALL0, this function
1200	   uses a custom ABI: the inputs are passed in a13 and a14, the
1201	   result is returned in a12, and a8 and a15 are clobbered.  */
1202	.align	4
1203.Lmul_mulsi3:
1204	leaf_entry sp, 16
1205	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1206	movi	\dst, 0
12071:	add	\tmp1, \src2, \dst
1208	extui	\tmp2, \src1, 0, 1
1209	movnez	\dst, \tmp1, \tmp2
1210
1211	do_addx2 \tmp1, \src2, \dst, \tmp1
1212	extui	\tmp2, \src1, 1, 1
1213	movnez	\dst, \tmp1, \tmp2
1214
1215	do_addx4 \tmp1, \src2, \dst, \tmp1
1216	extui	\tmp2, \src1, 2, 1
1217	movnez	\dst, \tmp1, \tmp2
1218
1219	do_addx8 \tmp1, \src2, \dst, \tmp1
1220	extui	\tmp2, \src1, 3, 1
1221	movnez	\dst, \tmp1, \tmp2
1222
1223	srli	\src1, \src1, 4
1224	slli	\src2, \src2, 4
1225	bnez	\src1, 1b
1226	.endm
1227#if __XTENSA_CALL0_ABI__
1228	mul_mulsi3_body a12, a13, a14, a15, a8
1229#else
1230	/* The result will be written into a2, so save that argument in a4.  */
1231	mov	a4, a2
1232	mul_mulsi3_body a2, a4, a3, a5, a6
1233#endif
1234	leaf_return
1235#endif /* XCHAL_NO_MUL */
1236#endif /* L_muldf3 */
1237
1238#ifdef L_divdf3
1239
1240	/* Division */
1241
1242#if XCHAL_HAVE_DFP_DIV
1243
1244        .text
1245        .align 4
1246        .global __divdf3
1247        .type	__divdf3, @function
1248__divdf3:
1249	leaf_entry	sp, 16
1250
1251	wfrd		f1, xh, xl
1252	wfrd		f2, yh, yl
1253
1254	div0.d		f3, f2
1255	nexp01.d	f4, f2
1256	const.d		f0, 1
1257	maddn.d		f0, f4, f3
1258	const.d		f5, 0
1259	mov.d		f7, f2
1260	mkdadj.d	f7, f1
1261	maddn.d		f3, f0, f3
1262	maddn.d		f5, f0, f0
1263	nexp01.d	f1, f1
1264	div0.d		f2, f2
1265	maddn.d		f3, f5, f3
1266	const.d		f5, 1
1267	const.d		f0, 0
1268	neg.d		f6, f1
1269	maddn.d		f5, f4, f3
1270	maddn.d		f0, f6, f2
1271	maddn.d		f3, f5, f3
1272	maddn.d		f6, f4, f0
1273	const.d		f2, 1
1274	maddn.d		f2, f4, f3
1275	maddn.d		f0, f6, f3
1276	neg.d		f1, f1
1277	maddn.d		f3, f2, f3
1278	maddn.d		f1, f4, f0
1279	addexpm.d	f0, f7
1280	addexp.d	f3, f7
1281	divn.d		f0, f1, f3
1282
1283	rfr		xl, f0
1284	rfrd		xh, f0
1285
1286	leaf_return
1287
1288#else
1289
1290	.literal_position
1291
1292__divdf3_aux:
1293
1294	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1295	   (This code is placed before the start of the function just to
1296	   keep it in range of the limited branch displacements.)  */
1297
1298.Ldiv_yexpzero:
1299	/* Clear the sign bit of y.  */
1300	slli	yh, yh, 1
1301	srli	yh, yh, 1
1302
1303	/* Check for division by zero.  */
1304	or	a10, yh, yl
1305	beqz	a10, .Ldiv_yzero
1306
1307	/* Normalize y.  Adjust the exponent in a9.  */
1308	beqz	yh, .Ldiv_yh_zero
1309	do_nsau	a10, yh, a11, a9
1310	addi	a10, a10, -11
1311	ssl	a10
1312	src	yh, yh, yl
1313	sll	yl, yl
1314	movi	a9, 1
1315	sub	a9, a9, a10
1316	j	.Ldiv_ynormalized
1317.Ldiv_yh_zero:
1318	do_nsau	a10, yl, a11, a9
1319	addi	a10, a10, -11
1320	movi	a9, -31
1321	sub	a9, a9, a10
1322	ssl	a10
1323	bltz	a10, .Ldiv_yl_srl
1324	sll	yh, yl
1325	movi	yl, 0
1326	j	.Ldiv_ynormalized
1327.Ldiv_yl_srl:
1328	srl	yh, yl
1329	sll	yl, yl
1330	j	.Ldiv_ynormalized
1331
1332.Ldiv_yzero:
1333	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
1334	slli	xh, xh, 1
1335	srli	xh, xh, 1
1336	or	xl, xl, xh
1337	srli	xh, a7, 31
1338	slli	xh, xh, 31
1339	or	xh, xh, a6
1340	bnez	xl, 1f
1341	movi	a4, 0x80000	/* make it a quiet NaN */
1342	or	xh, xh, a4
13431:	movi	xl, 0
1344	leaf_return
1345
1346.Ldiv_xexpzero:
1347	/* Clear the sign bit of x.  */
1348	slli	xh, xh, 1
1349	srli	xh, xh, 1
1350
1351	/* If x is zero, return zero.  */
1352	or	a10, xh, xl
1353	beqz	a10, .Ldiv_return_zero
1354
1355	/* Normalize x.  Adjust the exponent in a8.  */
1356	beqz	xh, .Ldiv_xh_zero
1357	do_nsau	a10, xh, a11, a8
1358	addi	a10, a10, -11
1359	ssl	a10
1360	src	xh, xh, xl
1361	sll	xl, xl
1362	movi	a8, 1
1363	sub	a8, a8, a10
1364	j	.Ldiv_xnormalized
1365.Ldiv_xh_zero:
1366	do_nsau	a10, xl, a11, a8
1367	addi	a10, a10, -11
1368	movi	a8, -31
1369	sub	a8, a8, a10
1370	ssl	a10
1371	bltz	a10, .Ldiv_xl_srl
1372	sll	xh, xl
1373	movi	xl, 0
1374	j	.Ldiv_xnormalized
1375.Ldiv_xl_srl:
1376	srl	xh, xl
1377	sll	xl, xl
1378	j	.Ldiv_xnormalized
1379
1380.Ldiv_return_zero:
1381	/* Return zero with the appropriate sign bit.  */
1382	srli	xh, a7, 31
1383	slli	xh, xh, 31
1384	movi	xl, 0
1385	leaf_return
1386
1387.Ldiv_xnan_or_inf:
1388	/* Set the sign bit of the result.  */
1389	srli	a7, yh, 31
1390	slli	a7, a7, 31
1391	xor	xh, xh, a7
1392	/* If y is NaN or Inf, return NaN.  */
1393	ball	yh, a6, .Ldiv_return_nan
1394	slli	a8, xh, 12
1395	or	a8, a8, xl
1396	bnez	a8, .Ldiv_return_nan
1397	leaf_return
1398
1399.Ldiv_ynan_or_inf:
1400	/* If y is Infinity, return zero.  */
1401	slli	a8, yh, 12
1402	or	a8, a8, yl
1403	beqz	a8, .Ldiv_return_zero
1404	/* y is NaN; return it.  */
1405	mov	xh, yh
1406	mov	xl, yl
1407
1408.Ldiv_return_nan:
1409	movi	a4, 0x80000	/* make it a quiet NaN */
1410	or	xh, xh, a4
1411	leaf_return
1412
1413.Ldiv_highequal1:
1414	bltu	xl, yl, 2f
1415	j	3f
1416
1417	.align	4
1418	.global	__divdf3
1419	.type	__divdf3, @function
1420__divdf3:
1421	leaf_entry sp, 16
1422	movi	a6, 0x7ff00000
1423
1424	/* Get the sign of the result.  */
1425	xor	a7, xh, yh
1426
1427	/* Check for NaN and infinity.  */
1428	ball	xh, a6, .Ldiv_xnan_or_inf
1429	ball	yh, a6, .Ldiv_ynan_or_inf
1430
1431	/* Extract the exponents.  */
1432	extui	a8, xh, 20, 11
1433	extui	a9, yh, 20, 11
1434
1435	beqz	a9, .Ldiv_yexpzero
1436.Ldiv_ynormalized:
1437	beqz	a8, .Ldiv_xexpzero
1438.Ldiv_xnormalized:
1439
1440	/* Subtract the exponents.  */
1441	sub	a8, a8, a9
1442
1443	/* Replace sign/exponent fields with explicit "1.0".  */
1444	movi	a10, 0x1fffff
1445	or	xh, xh, a6
1446	and	xh, xh, a10
1447	or	yh, yh, a6
1448	and	yh, yh, a10
1449
1450	/* Set SAR for left shift by one.  */
1451	ssai	(32 - 1)
1452
1453	/* The first digit of the mantissa division must be a one.
1454	   Shift x (and adjust the exponent) as needed to make this true.  */
1455	bltu	yh, xh, 3f
1456	beq	yh, xh, .Ldiv_highequal1
14572:	src	xh, xh, xl
1458	sll	xl, xl
1459	addi	a8, a8, -1
14603:
1461	/* Do the first subtraction and shift.  */
1462	sub	xh, xh, yh
1463	bgeu	xl, yl, 1f
1464	addi	xh, xh, -1
14651:	sub	xl, xl, yl
1466	src	xh, xh, xl
1467	sll	xl, xl
1468
1469	/* Put the quotient into a10/a11.  */
1470	movi	a10, 0
1471	movi	a11, 1
1472
1473	/* Divide one bit at a time for 52 bits.  */
1474	movi	a9, 52
1475#if XCHAL_HAVE_LOOPS
1476	loop	a9, .Ldiv_loopend
1477#endif
1478.Ldiv_loop:
1479	/* Shift the quotient << 1.  */
1480	src	a10, a10, a11
1481	sll	a11, a11
1482
1483	/* Is this digit a 0 or 1?  */
1484	bltu	xh, yh, 3f
1485	beq	xh, yh, .Ldiv_highequal2
1486
1487	/* Output a 1 and subtract.  */
14882:	addi	a11, a11, 1
1489	sub	xh, xh, yh
1490	bgeu	xl, yl, 1f
1491	addi	xh, xh, -1
14921:	sub	xl, xl, yl
1493
1494	/* Shift the dividend << 1.  */
14953:	src	xh, xh, xl
1496	sll	xl, xl
1497
1498#if !XCHAL_HAVE_LOOPS
1499	addi	a9, a9, -1
1500	bnez	a9, .Ldiv_loop
1501#endif
1502.Ldiv_loopend:
1503
1504	/* Add the exponent bias (less one to account for the explicit "1.0"
1505	   of the mantissa that will be added to the exponent in the final
1506	   result).  */
1507	movi	a9, 0x3fe
1508	add	a8, a8, a9
1509
1510	/* Check for over/underflow.  The value in a8 is one less than the
1511	   final exponent, so values in the range 0..7fd are OK here.  */
1512	addmi	a9, a9, 0x400	/* 0x7fe */
1513	bgeu	a8, a9, .Ldiv_overflow
1514
1515.Ldiv_round:
1516	/* Round.  The remainder (<< 1) is in xh/xl.  */
1517	bltu	xh, yh, .Ldiv_rounded
1518	beq	xh, yh, .Ldiv_highequal3
1519.Ldiv_roundup:
1520	addi	a11, a11, 1
1521	beqz	a11, .Ldiv_roundcarry
1522
1523.Ldiv_rounded:
1524	mov	xl, a11
1525	/* Add the exponent to the mantissa.  */
1526	slli	a8, a8, 20
1527	add	xh, a10, a8
1528
1529.Ldiv_addsign:
1530	/* Add the sign bit.  */
1531	srli	a7, a7, 31
1532	slli	a7, a7, 31
1533	or	xh, xh, a7
1534	leaf_return
1535
1536.Ldiv_highequal2:
1537	bgeu	xl, yl, 2b
1538	j	3b
1539
1540.Ldiv_highequal3:
1541	bltu	xl, yl, .Ldiv_rounded
1542	bne	xl, yl, .Ldiv_roundup
1543
1544	/* Remainder is exactly half the divisor.  Round even.  */
1545	addi	a11, a11, 1
1546	beqz	a11, .Ldiv_roundcarry
1547	srli	a11, a11, 1
1548	slli	a11, a11, 1
1549	j	.Ldiv_rounded
1550
1551.Ldiv_overflow:
1552	bltz	a8, .Ldiv_underflow
1553	/* Return +/- Infinity.  */
1554	addi	a8, a9, 1	/* 0x7ff */
1555	slli	xh, a8, 20
1556	movi	xl, 0
1557	j	.Ldiv_addsign
1558
1559.Ldiv_underflow:
1560	/* Create a subnormal value, where the exponent field contains zero,
1561	   but the effective exponent is 1.  The value of a8 is one less than
1562	   the actual exponent, so just negate it to get the shift amount.  */
1563	neg	a8, a8
1564	ssr	a8
1565	bgeui	a8, 32, .Ldiv_bigshift
1566
1567	/* Shift a10/a11 right.  Any bits that are shifted out of a11 are
1568	   saved in a6 for rounding the result.  */
1569	sll	a6, a11
1570	src	a11, a10, a11
1571	srl	a10, a10
1572	j	1f
1573
1574.Ldiv_bigshift:
1575	bgeui	a8, 64, .Ldiv_flush_to_zero
1576	sll	a9, a11		/* lost bits shifted out of a11 */
1577	src	a6, a10, a11
1578	srl	a11, a10
1579	movi	a10, 0
1580	or	xl, xl, a9
1581
1582	/* Set the exponent to zero.  */
15831:	movi	a8, 0
1584
1585	/* Pack any nonzero remainder (in xh/xl) into a6.  */
1586	or	xh, xh, xl
1587	beqz	xh, 1f
1588	movi	a9, 1
1589	or	a6, a6, a9
1590
1591	/* Round a10/a11 based on the bits shifted out into a6.  */
15921:	bgez	a6, .Ldiv_rounded
1593	addi	a11, a11, 1
1594	beqz	a11, .Ldiv_roundcarry
1595	slli	a6, a6, 1
1596	bnez	a6, .Ldiv_rounded
1597	srli	a11, a11, 1
1598	slli	a11, a11, 1
1599	j	.Ldiv_rounded
1600
1601.Ldiv_roundcarry:
1602	/* a11 is always zero when the rounding increment overflows, so
1603	   there's no need to round it to an even value.  */
1604	addi	a10, a10, 1
1605	/* Overflow to the exponent field is OK.  */
1606	j	.Ldiv_rounded
1607
1608.Ldiv_flush_to_zero:
1609	/* Return zero with the appropriate sign bit.  */
1610	srli	xh, a7, 31
1611	slli	xh, xh, 31
1612	movi	xl, 0
1613	leaf_return
1614
1615#endif /* XCHAL_HAVE_DFP_DIV */
1616
1617#endif /* L_divdf3 */
1618
1619#ifdef L_cmpdf2
1620
1621	/* Equal and Not Equal */
1622
1623	.align	4
1624	.global	__eqdf2
1625	.global	__nedf2
1626	.set	__nedf2, __eqdf2
1627	.type	__eqdf2, @function
1628__eqdf2:
1629	leaf_entry sp, 16
1630	bne	xl, yl, 2f
1631	bne	xh, yh, 4f
1632
1633	/* The values are equal but NaN != NaN.  Check the exponent.  */
1634	movi	a6, 0x7ff00000
1635	ball	xh, a6, 3f
1636
1637	/* Equal.  */
1638	movi	a2, 0
1639	leaf_return
1640
1641	/* Not equal.  */
16422:	movi	a2, 1
1643	leaf_return
1644
1645	/* Check if the mantissas are nonzero.  */
16463:	slli	a7, xh, 12
1647	or	a7, a7, xl
1648	j	5f
1649
1650	/* Check if x and y are zero with different signs.  */
16514:	or	a7, xh, yh
1652	slli	a7, a7, 1
1653	or	a7, a7, xl	/* xl == yl here */
1654
1655	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1656	   or x when exponent(x) = 0x7ff and x == y.  */
16575:	movi	a2, 0
1658	movi	a3, 1
1659	movnez	a2, a3, a7
1660	leaf_return
1661
1662
1663	/* Greater Than */
1664
1665	.align	4
1666	.global	__gtdf2
1667	.type	__gtdf2, @function
1668__gtdf2:
1669	leaf_entry sp, 16
1670	movi	a6, 0x7ff00000
1671	ball	xh, a6, 2f
16721:	bnall	yh, a6, .Lle_cmp
1673
1674	/* Check if y is a NaN.  */
1675	slli	a7, yh, 12
1676	or	a7, a7, yl
1677	beqz	a7, .Lle_cmp
1678	movi	a2, 0
1679	leaf_return
1680
1681	/* Check if x is a NaN.  */
16822:	slli	a7, xh, 12
1683	or	a7, a7, xl
1684	beqz	a7, 1b
1685	movi	a2, 0
1686	leaf_return
1687
1688
1689	/* Less Than or Equal */
1690
1691	.align	4
1692	.global	__ledf2
1693	.type	__ledf2, @function
1694__ledf2:
1695	leaf_entry sp, 16
1696	movi	a6, 0x7ff00000
1697	ball	xh, a6, 2f
16981:	bnall	yh, a6, .Lle_cmp
1699
1700	/* Check if y is a NaN.  */
1701	slli	a7, yh, 12
1702	or	a7, a7, yl
1703	beqz	a7, .Lle_cmp
1704	movi	a2, 1
1705	leaf_return
1706
1707	/* Check if x is a NaN.  */
17082:	slli	a7, xh, 12
1709	or	a7, a7, xl
1710	beqz	a7, 1b
1711	movi	a2, 1
1712	leaf_return
1713
1714.Lle_cmp:
1715	/* Check if x and y have different signs.  */
1716	xor	a7, xh, yh
1717	bltz	a7, .Lle_diff_signs
1718
1719	/* Check if x is negative.  */
1720	bltz	xh, .Lle_xneg
1721
1722	/* Check if x <= y.  */
1723	bltu	xh, yh, 4f
1724	bne	xh, yh, 5f
1725	bltu	yl, xl, 5f
17264:	movi	a2, 0
1727	leaf_return
1728
1729.Lle_xneg:
1730	/* Check if y <= x.  */
1731	bltu	yh, xh, 4b
1732	bne	yh, xh, 5f
1733	bgeu	xl, yl, 4b
17345:	movi	a2, 1
1735	leaf_return
1736
1737.Lle_diff_signs:
1738	bltz	xh, 4b
1739
1740	/* Check if both x and y are zero.  */
1741	or	a7, xh, yh
1742	slli	a7, a7, 1
1743	or	a7, a7, xl
1744	or	a7, a7, yl
1745	movi	a2, 1
1746	movi	a3, 0
1747	moveqz	a2, a3, a7
1748	leaf_return
1749
1750
1751	/* Greater Than or Equal */
1752
1753	.align	4
1754	.global	__gedf2
1755	.type	__gedf2, @function
1756__gedf2:
1757	leaf_entry sp, 16
1758	movi	a6, 0x7ff00000
1759	ball	xh, a6, 2f
17601:	bnall	yh, a6, .Llt_cmp
1761
1762	/* Check if y is a NaN.  */
1763	slli	a7, yh, 12
1764	or	a7, a7, yl
1765	beqz	a7, .Llt_cmp
1766	movi	a2, -1
1767	leaf_return
1768
1769	/* Check if x is a NaN.  */
17702:	slli	a7, xh, 12
1771	or	a7, a7, xl
1772	beqz	a7, 1b
1773	movi	a2, -1
1774	leaf_return
1775
1776
1777	/* Less Than */
1778
1779	.align	4
1780	.global	__ltdf2
1781	.type	__ltdf2, @function
1782__ltdf2:
1783	leaf_entry sp, 16
1784	movi	a6, 0x7ff00000
1785	ball	xh, a6, 2f
17861:	bnall	yh, a6, .Llt_cmp
1787
1788	/* Check if y is a NaN.  */
1789	slli	a7, yh, 12
1790	or	a7, a7, yl
1791	beqz	a7, .Llt_cmp
1792	movi	a2, 0
1793	leaf_return
1794
1795	/* Check if x is a NaN.  */
17962:	slli	a7, xh, 12
1797	or	a7, a7, xl
1798	beqz	a7, 1b
1799	movi	a2, 0
1800	leaf_return
1801
1802.Llt_cmp:
1803	/* Check if x and y have different signs.  */
1804	xor	a7, xh, yh
1805	bltz	a7, .Llt_diff_signs
1806
1807	/* Check if x is negative.  */
1808	bltz	xh, .Llt_xneg
1809
1810	/* Check if x < y.  */
1811	bltu	xh, yh, 4f
1812	bne	xh, yh, 5f
1813	bgeu	xl, yl, 5f
18144:	movi	a2, -1
1815	leaf_return
1816
1817.Llt_xneg:
1818	/* Check if y < x.  */
1819	bltu	yh, xh, 4b
1820	bne	yh, xh, 5f
1821	bltu	yl, xl, 4b
18225:	movi	a2, 0
1823	leaf_return
1824
1825.Llt_diff_signs:
1826	bgez	xh, 5b
1827
1828	/* Check if both x and y are nonzero.  */
1829	or	a7, xh, yh
1830	slli	a7, a7, 1
1831	or	a7, a7, xl
1832	or	a7, a7, yl
1833	movi	a2, 0
1834	movi	a3, -1
1835	movnez	a2, a3, a7
1836	leaf_return
1837
1838
1839	/* Unordered */
1840
1841	.align	4
1842	.global	__unorddf2
1843	.type	__unorddf2, @function
1844__unorddf2:
1845	leaf_entry sp, 16
1846	movi	a6, 0x7ff00000
1847	ball	xh, a6, 3f
18481:	ball	yh, a6, 4f
18492:	movi	a2, 0
1850	leaf_return
1851
18523:	slli	a7, xh, 12
1853	or	a7, a7, xl
1854	beqz	a7, 1b
1855	movi	a2, 1
1856	leaf_return
1857
18584:	slli	a7, yh, 12
1859	or	a7, a7, yl
1860	beqz	a7, 2b
1861	movi	a2, 1
1862	leaf_return
1863
1864#endif /* L_cmpdf2 */
1865
1866#ifdef L_fixdfsi
1867
1868	.align	4
1869	.global	__fixdfsi
1870	.type	__fixdfsi, @function
1871__fixdfsi:
1872	leaf_entry sp, 16
1873
1874	/* Check for NaN and Infinity.  */
1875	movi	a6, 0x7ff00000
1876	ball	xh, a6, .Lfixdfsi_nan_or_inf
1877
1878	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 32.  */
1879	extui	a4, xh, 20, 11
1880	extui	a5, a6, 19, 10	/* 0x3fe */
1881	sub	a4, a4, a5
1882	bgei	a4, 32, .Lfixdfsi_maxint
1883	blti	a4, 1, .Lfixdfsi_zero
1884
1885	/* Add explicit "1.0" and shift << 11.  */
1886	or	a7, xh, a6
1887	ssai	(32 - 11)
1888	src	a5, a7, xl
1889
1890	/* Shift back to the right, based on the exponent.  */
1891	ssl	a4		/* shift by 32 - a4 */
1892	srl	a5, a5
1893
1894	/* Negate the result if sign != 0.  */
1895	neg	a2, a5
1896	movgez	a2, a5, a7
1897	leaf_return
1898
1899.Lfixdfsi_nan_or_inf:
1900	/* Handle Infinity and NaN.  */
1901	slli	a4, xh, 12
1902	or	a4, a4, xl
1903	beqz	a4, .Lfixdfsi_maxint
1904
1905	/* Translate NaN to +maxint.  */
1906	movi	xh, 0
1907
1908.Lfixdfsi_maxint:
1909	slli	a4, a6, 11	/* 0x80000000 */
1910	addi	a5, a4, -1	/* 0x7fffffff */
1911	movgez	a4, a5, xh
1912	mov	a2, a4
1913	leaf_return
1914
1915.Lfixdfsi_zero:
1916	movi	a2, 0
1917	leaf_return
1918
1919#endif /* L_fixdfsi */
1920
1921#ifdef L_fixdfdi
1922
1923	.align	4
1924	.global	__fixdfdi
1925	.type	__fixdfdi, @function
1926__fixdfdi:
1927	leaf_entry sp, 16
1928
1929	/* Check for NaN and Infinity.  */
1930	movi	a6, 0x7ff00000
1931	ball	xh, a6, .Lfixdfdi_nan_or_inf
1932
1933	/* Extract the exponent and check if 0 < (exp - 0x3fe) < 64.  */
1934	extui	a4, xh, 20, 11
1935	extui	a5, a6, 19, 10	/* 0x3fe */
1936	sub	a4, a4, a5
1937	bgei	a4, 64, .Lfixdfdi_maxint
1938	blti	a4, 1, .Lfixdfdi_zero
1939
1940	/* Add explicit "1.0" and shift << 11.  */
1941	or	a7, xh, a6
1942	ssai	(32 - 11)
1943	src	xh, a7, xl
1944	sll	xl, xl
1945
1946	/* Shift back to the right, based on the exponent.  */
1947	ssl	a4		/* shift by 64 - a4 */
1948	bgei	a4, 32, .Lfixdfdi_smallshift
1949	srl	xl, xh
1950	movi	xh, 0
1951
1952.Lfixdfdi_shifted:
1953	/* Negate the result if sign != 0.  */
1954	bgez	a7, 1f
1955	neg	xl, xl
1956	neg	xh, xh
1957	beqz	xl, 1f
1958	addi	xh, xh, -1
19591:	leaf_return
1960
1961.Lfixdfdi_smallshift:
1962	src	xl, xh, xl
1963	srl	xh, xh
1964	j	.Lfixdfdi_shifted
1965
1966.Lfixdfdi_nan_or_inf:
1967	/* Handle Infinity and NaN.  */
1968	slli	a4, xh, 12
1969	or	a4, a4, xl
1970	beqz	a4, .Lfixdfdi_maxint
1971
1972	/* Translate NaN to +maxint.  */
1973	movi	xh, 0
1974
1975.Lfixdfdi_maxint:
1976	slli	a7, a6, 11	/* 0x80000000 */
1977	bgez	xh, 1f
1978	mov	xh, a7
1979	movi	xl, 0
1980	leaf_return
1981
19821:	addi	xh, a7, -1	/* 0x7fffffff */
1983	movi	xl, -1
1984	leaf_return
1985
1986.Lfixdfdi_zero:
1987	movi	xh, 0
1988	movi	xl, 0
1989	leaf_return
1990
1991#endif /* L_fixdfdi */
1992
1993#ifdef L_fixunsdfsi
1994
1995	.align	4
1996	.global	__fixunsdfsi
1997	.type	__fixunsdfsi, @function
1998__fixunsdfsi:
1999	leaf_entry sp, 16
2000
2001	/* Check for NaN and Infinity.  */
2002	movi	a6, 0x7ff00000
2003	ball	xh, a6, .Lfixunsdfsi_nan_or_inf
2004
2005	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32.  */
2006	extui	a4, xh, 20, 11
2007	extui	a5, a6, 20, 10	/* 0x3ff */
2008	sub	a4, a4, a5
2009	bgei	a4, 32, .Lfixunsdfsi_maxint
2010	bltz	a4, .Lfixunsdfsi_zero
2011
2012	/* Add explicit "1.0" and shift << 11.  */
2013	or	a7, xh, a6
2014	ssai	(32 - 11)
2015	src	a5, a7, xl
2016
2017	/* Shift back to the right, based on the exponent.  */
2018	addi	a4, a4, 1
2019	beqi	a4, 32, .Lfixunsdfsi_bigexp
2020	ssl	a4		/* shift by 32 - a4 */
2021	srl	a5, a5
2022
2023	/* Negate the result if sign != 0.  */
2024	neg	a2, a5
2025	movgez	a2, a5, a7
2026	leaf_return
2027
2028.Lfixunsdfsi_nan_or_inf:
2029	/* Handle Infinity and NaN.  */
2030	slli	a4, xh, 12
2031	or	a4, a4, xl
2032	beqz	a4, .Lfixunsdfsi_maxint
2033
2034	/* Translate NaN to 0xffffffff.  */
2035	movi	a2, -1
2036	leaf_return
2037
2038.Lfixunsdfsi_maxint:
2039	slli	a4, a6, 11	/* 0x80000000 */
2040	movi	a5, -1		/* 0xffffffff */
2041	movgez	a4, a5, xh
2042	mov	a2, a4
2043	leaf_return
2044
2045.Lfixunsdfsi_zero:
2046	movi	a2, 0
2047	leaf_return
2048
2049.Lfixunsdfsi_bigexp:
2050	/* Handle unsigned maximum exponent case.  */
2051	bltz	xh, 1f
2052	mov	a2, a5		/* no shift needed */
2053	leaf_return
2054
2055	/* Return 0x80000000 if negative.  */
20561:	slli	a2, a6, 11
2057	leaf_return
2058
2059#endif /* L_fixunsdfsi */
2060
2061#ifdef L_fixunsdfdi
2062
2063	.align	4
2064	.global	__fixunsdfdi
2065	.type	__fixunsdfdi, @function
2066__fixunsdfdi:
2067	leaf_entry sp, 16
2068
2069	/* Check for NaN and Infinity.  */
2070	movi	a6, 0x7ff00000
2071	ball	xh, a6, .Lfixunsdfdi_nan_or_inf
2072
2073	/* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64.  */
2074	extui	a4, xh, 20, 11
2075	extui	a5, a6, 20, 10	/* 0x3ff */
2076	sub	a4, a4, a5
2077	bgei	a4, 64, .Lfixunsdfdi_maxint
2078	bltz	a4, .Lfixunsdfdi_zero
2079
2080	/* Add explicit "1.0" and shift << 11.  */
2081	or	a7, xh, a6
2082	ssai	(32 - 11)
2083	src	xh, a7, xl
2084	sll	xl, xl
2085
2086	/* Shift back to the right, based on the exponent.  */
2087	addi	a4, a4, 1
2088	beqi	a4, 64, .Lfixunsdfdi_bigexp
2089	ssl	a4		/* shift by 64 - a4 */
2090	bgei	a4, 32, .Lfixunsdfdi_smallshift
2091	srl	xl, xh
2092	movi	xh, 0
2093
2094.Lfixunsdfdi_shifted:
2095	/* Negate the result if sign != 0.  */
2096	bgez	a7, 1f
2097	neg	xl, xl
2098	neg	xh, xh
2099	beqz	xl, 1f
2100	addi	xh, xh, -1
21011:	leaf_return
2102
2103.Lfixunsdfdi_smallshift:
2104	src	xl, xh, xl
2105	srl	xh, xh
2106	j	.Lfixunsdfdi_shifted
2107
2108.Lfixunsdfdi_nan_or_inf:
2109	/* Handle Infinity and NaN.  */
2110	slli	a4, xh, 12
2111	or	a4, a4, xl
2112	beqz	a4, .Lfixunsdfdi_maxint
2113
2114	/* Translate NaN to 0xffffffff.... */
21151:	movi	xh, -1
2116	movi	xl, -1
2117	leaf_return
2118
2119.Lfixunsdfdi_maxint:
2120	bgez	xh, 1b
21212:	slli	xh, a6, 11	/* 0x80000000 */
2122	movi	xl, 0
2123	leaf_return
2124
2125.Lfixunsdfdi_zero:
2126	movi	xh, 0
2127	movi	xl, 0
2128	leaf_return
2129
2130.Lfixunsdfdi_bigexp:
2131	/* Handle unsigned maximum exponent case.  */
2132	bltz	a7, 2b
2133	leaf_return		/* no shift needed */
2134
2135#endif /* L_fixunsdfdi */
2136
2137#ifdef L_floatsidf
2138
2139	.align	4
2140	.global	__floatunsidf
2141	.type	__floatunsidf, @function
2142__floatunsidf:
2143	leaf_entry sp, 16
2144	beqz	a2, .Lfloatsidf_return_zero
2145
2146	/* Set the sign to zero and jump to the floatsidf code.  */
2147	movi	a7, 0
2148	j	.Lfloatsidf_normalize
2149
2150	.align	4
2151	.global	__floatsidf
2152	.type	__floatsidf, @function
2153__floatsidf:
2154	leaf_entry sp, 16
2155
2156	/* Check for zero.  */
2157	beqz	a2, .Lfloatsidf_return_zero
2158
2159	/* Save the sign.  */
2160	extui	a7, a2, 31, 1
2161
2162	/* Get the absolute value.  */
2163#if XCHAL_HAVE_ABS
2164	abs	a2, a2
2165#else
2166	neg	a4, a2
2167	movltz	a2, a4, a2
2168#endif
2169
2170.Lfloatsidf_normalize:
2171	/* Normalize with the first 1 bit in the msb.  */
2172	do_nsau	a4, a2, a5, a6
2173	ssl	a4
2174	sll	a5, a2
2175
2176	/* Shift the mantissa into position.  */
2177	srli	xh, a5, 11
2178	slli	xl, a5, (32 - 11)
2179
2180	/* Set the exponent.  */
2181	movi	a5, 0x41d	/* 0x3fe + 31 */
2182	sub	a5, a5, a4
2183	slli	a5, a5, 20
2184	add	xh, xh, a5
2185
2186	/* Add the sign and return. */
2187	slli	a7, a7, 31
2188	or	xh, xh, a7
2189	leaf_return
2190
2191.Lfloatsidf_return_zero:
2192	movi	a3, 0
2193	leaf_return
2194
2195#endif /* L_floatsidf */
2196
2197#ifdef L_floatdidf
2198
2199	.align	4
2200	.global	__floatundidf
2201	.type	__floatundidf, @function
2202__floatundidf:
2203	leaf_entry sp, 16
2204
2205	/* Check for zero.  */
2206	or	a4, xh, xl
2207	beqz	a4, 2f
2208
2209	/* Set the sign to zero and jump to the floatdidf code.  */
2210	movi	a7, 0
2211	j	.Lfloatdidf_normalize
2212
2213	.align	4
2214	.global	__floatdidf
2215	.type	__floatdidf, @function
2216__floatdidf:
2217	leaf_entry sp, 16
2218
2219	/* Check for zero.  */
2220	or	a4, xh, xl
2221	beqz	a4, 2f
2222
2223	/* Save the sign.  */
2224	extui	a7, xh, 31, 1
2225
2226	/* Get the absolute value.  */
2227	bgez	xh, .Lfloatdidf_normalize
2228	neg	xl, xl
2229	neg	xh, xh
2230	beqz	xl, .Lfloatdidf_normalize
2231	addi	xh, xh, -1
2232
2233.Lfloatdidf_normalize:
2234	/* Normalize with the first 1 bit in the msb of xh.  */
2235	beqz	xh, .Lfloatdidf_bigshift
2236	do_nsau	a4, xh, a5, a6
2237	ssl	a4
2238	src	xh, xh, xl
2239	sll	xl, xl
2240
2241.Lfloatdidf_shifted:
2242	/* Shift the mantissa into position, with rounding bits in a6.  */
2243	ssai	11
2244	sll	a6, xl
2245	src	xl, xh, xl
2246	srl	xh, xh
2247
2248	/* Set the exponent.  */
2249	movi	a5, 0x43d	/* 0x3fe + 63 */
2250	sub	a5, a5, a4
2251	slli	a5, a5, 20
2252	add	xh, xh, a5
2253
2254	/* Add the sign.  */
2255	slli	a7, a7, 31
2256	or	xh, xh, a7
2257
2258	/* Round up if the leftover fraction is >= 1/2.  */
2259	bgez	a6, 2f
2260	addi	xl, xl, 1
2261	beqz	xl, .Lfloatdidf_roundcarry
2262
2263	/* Check if the leftover fraction is exactly 1/2.  */
2264	slli	a6, a6, 1
2265	beqz	a6, .Lfloatdidf_exactlyhalf
22662:	leaf_return
2267
2268.Lfloatdidf_bigshift:
2269	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
2270	do_nsau	a4, xl, a5, a6
2271	ssl	a4
2272	sll	xh, xl
2273	movi	xl, 0
2274	addi	a4, a4, 32
2275	j	.Lfloatdidf_shifted
2276
2277.Lfloatdidf_exactlyhalf:
2278	/* Round down to the nearest even value.  */
2279	srli	xl, xl, 1
2280	slli	xl, xl, 1
2281	leaf_return
2282
2283.Lfloatdidf_roundcarry:
2284	/* xl is always zero when the rounding increment overflows, so
2285	   there's no need to round it to an even value.  */
2286	addi	xh, xh, 1
2287	/* Overflow to the exponent is OK.  */
2288	leaf_return
2289
2290#endif /* L_floatdidf */
2291
2292#ifdef L_truncdfsf2
2293
2294	.align	4
2295	.global	__truncdfsf2
2296	.type	__truncdfsf2, @function
2297__truncdfsf2:
2298	leaf_entry sp, 16
2299
2300	/* Adjust the exponent bias.  */
2301	movi	a4, (0x3ff - 0x7f) << 20
2302	sub	a5, xh, a4
2303
2304	/* Check for underflow.  */
2305	xor	a6, xh, a5
2306	bltz	a6, .Ltrunc_underflow
2307	extui	a6, a5, 20, 11
2308	beqz	a6, .Ltrunc_underflow
2309
2310	/* Check for overflow.  */
2311	movi	a4, 255
2312	bge	a6, a4, .Ltrunc_overflow
2313
2314	/* Shift a5/xl << 3 into a5/a4.  */
2315	ssai	(32 - 3)
2316	src	a5, a5, xl
2317	sll	a4, xl
2318
2319.Ltrunc_addsign:
2320	/* Add the sign bit.  */
2321	extui	a6, xh, 31, 1
2322	slli	a6, a6, 31
2323	or	a2, a6, a5
2324
2325	/* Round up if the leftover fraction is >= 1/2.  */
2326	bgez	a4, 1f
2327	addi	a2, a2, 1
2328	/* Overflow to the exponent is OK.  The answer will be correct.  */
2329
2330	/* Check if the leftover fraction is exactly 1/2.  */
2331	slli	a4, a4, 1
2332	beqz	a4, .Ltrunc_exactlyhalf
23331:	leaf_return
2334
2335.Ltrunc_exactlyhalf:
2336	/* Round down to the nearest even value.  */
2337	srli	a2, a2, 1
2338	slli	a2, a2, 1
2339	leaf_return
2340
2341.Ltrunc_overflow:
2342	/* Check if exponent == 0x7ff.  */
2343	movi	a4, 0x7ff00000
2344	bnall	xh, a4, 1f
2345
2346	/* Check if mantissa is nonzero.  */
2347	slli	a5, xh, 12
2348	or	a5, a5, xl
2349	beqz	a5, 1f
2350
2351	/* Shift a4 to set a bit in the mantissa, making a quiet NaN.  */
2352	srli	a4, a4, 1
2353
23541:	slli	a4, a4, 4	/* 0xff000000 or 0xff800000 */
2355	/* Add the sign bit.  */
2356	extui	a6, xh, 31, 1
2357	ssai	1
2358	src	a2, a6, a4
2359	leaf_return
2360
2361.Ltrunc_underflow:
2362	/* Find shift count for a subnormal.  Flush to zero if >= 32.  */
2363	extui	a6, xh, 20, 11
2364	movi	a5, 0x3ff - 0x7f
2365	sub	a6, a5, a6
2366	addi	a6, a6, 1
2367	bgeui	a6, 32, 1f
2368
2369	/* Replace the exponent with an explicit "1.0".  */
2370	slli	a5, a5, 13	/* 0x700000 */
2371	or	a5, a5, xh
2372	slli	a5, a5, 11
2373	srli	a5, a5, 11
2374
2375	/* Shift the mantissa left by 3 bits (into a5/a4).  */
2376	ssai	(32 - 3)
2377	src	a5, a5, xl
2378	sll	a4, xl
2379
2380	/* Shift right by a6.  */
2381	ssr	a6
2382	sll	a7, a4
2383	src	a4, a5, a4
2384	srl	a5, a5
2385	beqz	a7, .Ltrunc_addsign
2386	or	a4, a4, a6	/* any positive, nonzero value will work */
2387	j	.Ltrunc_addsign
2388
2389	/* Return +/- zero.  */
23901:	extui	a2, xh, 31, 1
2391	slli	a2, a2, 31
2392	leaf_return
2393
2394#endif /* L_truncdfsf2 */
2395
2396#ifdef L_extendsfdf2
2397
2398	.align	4
2399	.global	__extendsfdf2
2400	.type	__extendsfdf2, @function
2401__extendsfdf2:
2402	leaf_entry sp, 16
2403
2404	/* Save the sign bit and then shift it off.  */
2405	extui	a5, a2, 31, 1
2406	slli	a5, a5, 31
2407	slli	a4, a2, 1
2408
2409	/* Extract and check the exponent.  */
2410	extui	a6, a2, 23, 8
2411	beqz	a6, .Lextend_expzero
2412	addi	a6, a6, 1
2413	beqi	a6, 256, .Lextend_nan_or_inf
2414
2415	/* Shift >> 3 into a4/xl.  */
2416	srli	a4, a4, 4
2417	slli	xl, a2, (32 - 3)
2418
2419	/* Adjust the exponent bias.  */
2420	movi	a6, (0x3ff - 0x7f) << 20
2421	add	a4, a4, a6
2422
2423	/* Add the sign bit.  */
2424	or	xh, a4, a5
2425	leaf_return
2426
2427.Lextend_nan_or_inf:
2428	movi	a4, 0x7ff00000
2429
2430	/* Check for NaN.  */
2431	slli	a7, a2, 9
2432	beqz	a7, 1f
2433
2434	slli	a6, a6, 11	/* 0x80000 */
2435	or	a4, a4, a6
2436
2437	/* Add the sign and return.  */
24381:	or	xh, a4, a5
2439	movi	xl, 0
2440	leaf_return
2441
2442.Lextend_expzero:
2443	beqz	a4, 1b
2444
2445	/* Normalize it to have 8 zero bits before the first 1 bit.  */
2446	do_nsau	a7, a4, a2, a3
2447	addi	a7, a7, -8
2448	ssl	a7
2449	sll	a4, a4
2450
2451	/* Shift >> 3 into a4/xl.  */
2452	slli	xl, a4, (32 - 3)
2453	srli	a4, a4, 3
2454
2455	/* Set the exponent.  */
2456	movi	a6, 0x3fe - 0x7f
2457	sub	a6, a6, a7
2458	slli	a6, a6, 20
2459	add	a4, a4, a6
2460
2461	/* Add the sign and return.  */
2462	or	xh, a4, a5
2463	leaf_return
2464
2465#endif /* L_extendsfdf2 */
2466
2467
2468#if XCHAL_HAVE_DFP_SQRT
2469#ifdef L_sqrt
2470
2471        .text
2472        .align 4
2473        .global __ieee754_sqrt
2474        .type	__ieee754_sqrt, @function
2475__ieee754_sqrt:
2476	leaf_entry	sp, 16
2477
2478	wfrd		f1, xh, xl
2479
2480	sqrt0.d		f2, f1
2481	const.d		f4, 0
2482	maddn.d		f4, f2, f2
2483	nexp01.d	f3, f1
2484	const.d		f0, 3
2485	addexp.d	f3, f0
2486	maddn.d		f0, f4, f3
2487	nexp01.d	f4, f1
2488	maddn.d		f2, f0, f2
2489	const.d		f5, 0
2490	maddn.d		f5, f2, f3
2491	const.d		f0, 3
2492	maddn.d		f0, f5, f2
2493	neg.d		f6, f4
2494	maddn.d		f2, f0, f2
2495	const.d		f0, 0
2496	const.d		f5, 0
2497	const.d		f7, 0
2498	maddn.d		f0, f6, f2
2499	maddn.d		f5, f2, f3
2500	const.d		f3, 3
2501	maddn.d		f7, f3, f2
2502	maddn.d		f4, f0, f0
2503	maddn.d		f3, f5, f2
2504	neg.d		f2, f7
2505	maddn.d		f0, f4, f2
2506	maddn.d		f7, f3, f7
2507	mksadj.d	f2, f1
2508	nexp01.d	f1, f1
2509	maddn.d		f1, f0, f0
2510	neg.d		f3, f7
2511	addexpm.d	f0, f2
2512	addexp.d	f3, f2
2513	divn.d		f0, f1, f3
2514
2515	rfr		xl, f0
2516	rfrd		xh, f0
2517
2518	leaf_return
2519
2520#endif /* L_sqrt */
2521#endif /* XCHAL_HAVE_DFP_SQRT */
2522
2523#if XCHAL_HAVE_DFP_RECIP
2524#ifdef L_recipdf2
2525	/* Reciprocal */
2526
2527	.align	4
2528	.global	__recipdf2
2529	.type	__recipdf2, @function
2530__recipdf2:
2531	leaf_entry	sp, 16
2532
2533	wfrd		f1, xh, xl
2534
2535	recip0.d	f0, f1
2536	const.d		f2, 2
2537	msub.d		f2, f1, f0
2538	mul.d		f3, f1, f0
2539	const.d		f4, 2
2540	mul.d		f5, f0, f2
2541	msub.d		f4, f3, f2
2542	const.d		f2, 1
2543	mul.d		f0, f5, f4
2544	msub.d		f2, f1, f0
2545	maddn.d		f0, f0, f2
2546
2547	rfr		xl, f0
2548	rfrd		xh, f0
2549
2550	leaf_return
2551
2552#endif /* L_recipdf2 */
2553#endif /* XCHAL_HAVE_DFP_RECIP */
2554
2555#if XCHAL_HAVE_DFP_RSQRT
2556#ifdef L_rsqrtdf2
2557	/* Reciprocal square root */
2558
2559	.align	4
2560	.global	__rsqrtdf2
2561	.type	__rsqrtdf2, @function
2562__rsqrtdf2:
2563	leaf_entry	sp, 16
2564
2565	wfrd		f1, xh, xl
2566
2567	rsqrt0.d	f0, f1
2568	mul.d		f2, f1, f0
2569	const.d		f3, 3
2570	mul.d		f4, f3, f0
2571	const.d		f5, 1
2572	msub.d		f5, f2, f0
2573	maddn.d		f0, f4, f5
2574	const.d		f2, 1
2575	mul.d		f4, f1, f0
2576	mul.d		f5, f3, f0
2577	msub.d		f2, f4, f0
2578	maddn.d		f0, f5, f2
2579	const.d		f2, 1
2580	mul.d		f1, f1, f0
2581	mul.d		f3, f3, f0
2582	msub.d		f2, f1, f0
2583	maddn.d		f0, f3, f2
2584
2585	rfr		xl, f0
2586	rfrd		xh, f0
2587
2588	leaf_return
2589
2590#endif /* L_rsqrtdf2 */
2591#endif /* XCHAL_HAVE_DFP_RSQRT */
2592