1/* IEEE-754 single-precision functions for Xtensa
2   Copyright (C) 2006-2013 Free Software Foundation, Inc.
3   Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26#ifdef __XTENSA_EB__
27#define xh a2
28#define xl a3
29#define yh a4
30#define yl a5
31#else
32#define xh a3
33#define xl a2
34#define yh a5
35#define yl a4
36#endif
37
38/*  Warning!  The branch displacements for some Xtensa branch instructions
39    are quite small, and this code has been carefully laid out to keep
40    branch targets in range.  If you change anything, be sure to check that
41    the assembler is not relaxing anything to branch over a jump.  */
42
43#ifdef L_negsf2
44
45	.align	4
46	.global	__negsf2
47	.type	__negsf2, @function
48__negsf2:
49	leaf_entry sp, 16
50	movi	a4, 0x80000000
51	xor	a2, a2, a4
52	leaf_return
53
54#endif /* L_negsf2 */
55
56#ifdef L_addsubsf3
57
58	/* Addition */
59__addsf3_aux:
60
61	/* Handle NaNs and Infinities.  (This code is placed before the
62	   start of the function just to keep it in range of the limited
63	   branch displacements.)  */
64
65.Ladd_xnan_or_inf:
66	/* If y is neither Infinity nor NaN, return x.  */
67	bnall	a3, a6, 1f
68	/* If x is a NaN, return it.  Otherwise, return y.  */
69	slli	a7, a2, 9
70	beqz	a7, .Ladd_ynan_or_inf
711:	leaf_return
72
73.Ladd_ynan_or_inf:
74	/* Return y.  */
75	mov	a2, a3
76	leaf_return
77
78.Ladd_opposite_signs:
79	/* Operand signs differ.  Do a subtraction.  */
80	slli	a7, a6, 8
81	xor	a3, a3, a7
82	j	.Lsub_same_sign
83
84	.align	4
85	.global	__addsf3
86	.type	__addsf3, @function
87__addsf3:
88	leaf_entry sp, 16
89	movi	a6, 0x7f800000
90
91	/* Check if the two operands have the same sign.  */
92	xor	a7, a2, a3
93	bltz	a7, .Ladd_opposite_signs
94
95.Ladd_same_sign:
96	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
97	ball	a2, a6, .Ladd_xnan_or_inf
98	ball	a3, a6, .Ladd_ynan_or_inf
99
100	/* Compare the exponents.  The smaller operand will be shifted
101	   right by the exponent difference and added to the larger
102	   one.  */
103	extui	a7, a2, 23, 9
104	extui	a8, a3, 23, 9
105	bltu	a7, a8, .Ladd_shiftx
106
107.Ladd_shifty:
108	/* Check if the smaller (or equal) exponent is zero.  */
109	bnone	a3, a6, .Ladd_yexpzero
110
111	/* Replace y sign/exponent with 0x008.  */
112	or	a3, a3, a6
113	slli	a3, a3, 8
114	srli	a3, a3, 8
115
116.Ladd_yexpdiff:
117	/* Compute the exponent difference.  */
118	sub	a10, a7, a8
119
120	/* Exponent difference > 32 -- just return the bigger value.  */
121	bgeui	a10, 32, 1f
122
123	/* Shift y right by the exponent difference.  Any bits that are
124	   shifted out of y are saved in a9 for rounding the result.  */
125	ssr	a10
126	movi	a9, 0
127	src	a9, a3, a9
128	srl	a3, a3
129
130	/* Do the addition.  */
131	add	a2, a2, a3
132
133	/* Check if the add overflowed into the exponent.  */
134	extui	a10, a2, 23, 9
135	beq	a10, a7, .Ladd_round
136	mov	a8, a7
137	j	.Ladd_carry
138
139.Ladd_yexpzero:
140	/* y is a subnormal value.  Replace its sign/exponent with zero,
141	   i.e., no implicit "1.0", and increment the apparent exponent
142	   because subnormals behave as if they had the minimum (nonzero)
143	   exponent.  Test for the case when both exponents are zero.  */
144	slli	a3, a3, 9
145	srli	a3, a3, 9
146	bnone	a2, a6, .Ladd_bothexpzero
147	addi	a8, a8, 1
148	j	.Ladd_yexpdiff
149
150.Ladd_bothexpzero:
151	/* Both exponents are zero.  Handle this as a special case.  There
152	   is no need to shift or round, and the normal code for handling
153	   a carry into the exponent field will not work because it
154	   assumes there is an implicit "1.0" that needs to be added.  */
155	add	a2, a2, a3
1561:	leaf_return
157
158.Ladd_xexpzero:
159	/* Same as "yexpzero" except skip handling the case when both
160	   exponents are zero.  */
161	slli	a2, a2, 9
162	srli	a2, a2, 9
163	addi	a7, a7, 1
164	j	.Ladd_xexpdiff
165
166.Ladd_shiftx:
167	/* Same thing as the "shifty" code, but with x and y swapped.  Also,
168	   because the exponent difference is always nonzero in this version,
169	   the shift sequence can use SLL and skip loading a constant zero.  */
170	bnone	a2, a6, .Ladd_xexpzero
171
172	or	a2, a2, a6
173	slli	a2, a2, 8
174	srli	a2, a2, 8
175
176.Ladd_xexpdiff:
177	sub	a10, a8, a7
178	bgeui	a10, 32, .Ladd_returny
179
180	ssr	a10
181	sll	a9, a2
182	srl	a2, a2
183
184	add	a2, a2, a3
185
186	/* Check if the add overflowed into the exponent.  */
187	extui	a10, a2, 23, 9
188	bne	a10, a8, .Ladd_carry
189
190.Ladd_round:
191	/* Round up if the leftover fraction is >= 1/2.  */
192	bgez	a9, 1f
193	addi	a2, a2, 1
194
195	/* Check if the leftover fraction is exactly 1/2.  */
196	slli	a9, a9, 1
197	beqz	a9, .Ladd_exactlyhalf
1981:	leaf_return
199
200.Ladd_returny:
201	mov	a2, a3
202	leaf_return
203
204.Ladd_carry:
205	/* The addition has overflowed into the exponent field, so the
206	   value needs to be renormalized.  The mantissa of the result
207	   can be recovered by subtracting the original exponent and
208	   adding 0x800000 (which is the explicit "1.0" for the
209	   mantissa of the non-shifted operand -- the "1.0" for the
210	   shifted operand was already added).  The mantissa can then
211	   be shifted right by one bit.  The explicit "1.0" of the
212	   shifted mantissa then needs to be replaced by the exponent,
213	   incremented by one to account for the normalizing shift.
214	   It is faster to combine these operations: do the shift first
215	   and combine the additions and subtractions.  If x is the
216	   original exponent, the result is:
217	       shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
218	   or:
219	       shifted mantissa + ((x + 1) << 22)
220	   Note that the exponent is incremented here by leaving the
221	   explicit "1.0" of the mantissa in the exponent field.  */
222
223	/* Shift x right by one bit.  Save the lsb.  */
224	mov	a10, a2
225	srli	a2, a2, 1
226
227	/* See explanation above.  The original exponent is in a8.  */
228	addi	a8, a8, 1
229	slli	a8, a8, 22
230	add	a2, a2, a8
231
232	/* Return an Infinity if the exponent overflowed.  */
233	ball	a2, a6, .Ladd_infinity
234
235	/* Same thing as the "round" code except the msb of the leftover
236	   fraction is bit 0 of a10, with the rest of the fraction in a9.  */
237	bbci.l	a10, 0, 1f
238	addi	a2, a2, 1
239	beqz	a9, .Ladd_exactlyhalf
2401:	leaf_return
241
242.Ladd_infinity:
243	/* Clear the mantissa.  */
244	srli	a2, a2, 23
245	slli	a2, a2, 23
246
247	/* The sign bit may have been lost in a carry-out.  Put it back.  */
248	slli	a8, a8, 1
249	or	a2, a2, a8
250	leaf_return
251
252.Ladd_exactlyhalf:
253	/* Round down to the nearest even value.  */
254	srli	a2, a2, 1
255	slli	a2, a2, 1
256	leaf_return
257
258
259	/* Subtraction */
260__subsf3_aux:
261
262	/* Handle NaNs and Infinities.  (This code is placed before the
263	   start of the function just to keep it in range of the limited
264	   branch displacements.)  */
265
266.Lsub_xnan_or_inf:
267	/* If y is neither Infinity nor NaN, return x.  */
268	bnall	a3, a6, 1f
269	/* Both x and y are either NaN or Inf, so the result is NaN.  */
270	movi	a4, 0x400000	/* make it a quiet NaN */
271	or	a2, a2, a4
2721:	leaf_return
273
274.Lsub_ynan_or_inf:
275	/* Negate y and return it.  */
276	slli	a7, a6, 8
277	xor	a2, a3, a7
278	leaf_return
279
280.Lsub_opposite_signs:
281	/* Operand signs differ.  Do an addition.  */
282	slli	a7, a6, 8
283	xor	a3, a3, a7
284	j	.Ladd_same_sign
285
286	.align	4
287	.global	__subsf3
288	.type	__subsf3, @function
289__subsf3:
290	leaf_entry sp, 16
291	movi	a6, 0x7f800000
292
293	/* Check if the two operands have the same sign.  */
294	xor	a7, a2, a3
295	bltz	a7, .Lsub_opposite_signs
296
297.Lsub_same_sign:
298	/* Check if either exponent == 0x7f8 (i.e., NaN or Infinity).  */
299	ball	a2, a6, .Lsub_xnan_or_inf
300	ball	a3, a6, .Lsub_ynan_or_inf
301
302	/* Compare the operands.  In contrast to addition, the entire
303	   value matters here.  */
304	extui	a7, a2, 23, 8
305	extui	a8, a3, 23, 8
306	bltu	a2, a3, .Lsub_xsmaller
307
308.Lsub_ysmaller:
309	/* Check if the smaller (or equal) exponent is zero.  */
310	bnone	a3, a6, .Lsub_yexpzero
311
312	/* Replace y sign/exponent with 0x008.  */
313	or	a3, a3, a6
314	slli	a3, a3, 8
315	srli	a3, a3, 8
316
317.Lsub_yexpdiff:
318	/* Compute the exponent difference.  */
319	sub	a10, a7, a8
320
321	/* Exponent difference > 32 -- just return the bigger value.  */
322	bgeui	a10, 32, 1f
323
324	/* Shift y right by the exponent difference.  Any bits that are
325	   shifted out of y are saved in a9 for rounding the result.  */
326	ssr	a10
327	movi	a9, 0
328	src	a9, a3, a9
329	srl	a3, a3
330
331	sub	a2, a2, a3
332
333	/* Subtract the leftover bits in a9 from zero and propagate any
334	   borrow from a2.  */
335	neg	a9, a9
336	addi	a10, a2, -1
337	movnez	a2, a10, a9
338
339	/* Check if the subtract underflowed into the exponent.  */
340	extui	a10, a2, 23, 8
341	beq	a10, a7, .Lsub_round
342	j	.Lsub_borrow
343
344.Lsub_yexpzero:
345	/* Return zero if the inputs are equal.  (For the non-subnormal
346	   case, subtracting the "1.0" will cause a borrow from the exponent
347	   and this case can be detected when handling the borrow.)  */
348	beq	a2, a3, .Lsub_return_zero
349
350	/* y is a subnormal value.  Replace its sign/exponent with zero,
351	   i.e., no implicit "1.0".  Unless x is also a subnormal, increment
352	   y's apparent exponent because subnormals behave as if they had
353	   the minimum (nonzero) exponent.  */
354	slli	a3, a3, 9
355	srli	a3, a3, 9
356	bnone	a2, a6, .Lsub_yexpdiff
357	addi	a8, a8, 1
358	j	.Lsub_yexpdiff
359
360.Lsub_returny:
361	/* Negate and return y.  */
362	slli	a7, a6, 8
363	xor	a2, a3, a7
3641:	leaf_return
365
366.Lsub_xsmaller:
367	/* Same thing as the "ysmaller" code, but with x and y swapped and
368	   with y negated.  */
369	bnone	a2, a6, .Lsub_xexpzero
370
371	or	a2, a2, a6
372	slli	a2, a2, 8
373	srli	a2, a2, 8
374
375.Lsub_xexpdiff:
376	sub	a10, a8, a7
377	bgeui	a10, 32, .Lsub_returny
378
379	ssr	a10
380	movi	a9, 0
381	src	a9, a2, a9
382	srl	a2, a2
383
384	/* Negate y.  */
385	slli	a11, a6, 8
386	xor	a3, a3, a11
387
388	sub	a2, a3, a2
389
390	neg	a9, a9
391	addi	a10, a2, -1
392	movnez	a2, a10, a9
393
394	/* Check if the subtract underflowed into the exponent.  */
395	extui	a10, a2, 23, 8
396	bne	a10, a8, .Lsub_borrow
397
398.Lsub_round:
399	/* Round up if the leftover fraction is >= 1/2.  */
400	bgez	a9, 1f
401	addi	a2, a2, 1
402
403	/* Check if the leftover fraction is exactly 1/2.  */
404	slli	a9, a9, 1
405	beqz	a9, .Lsub_exactlyhalf
4061:	leaf_return
407
408.Lsub_xexpzero:
409	/* Same as "yexpzero".  */
410	beq	a2, a3, .Lsub_return_zero
411	slli	a2, a2, 9
412	srli	a2, a2, 9
413	bnone	a3, a6, .Lsub_xexpdiff
414	addi	a7, a7, 1
415	j	.Lsub_xexpdiff
416
417.Lsub_return_zero:
418	movi	a2, 0
419	leaf_return
420
421.Lsub_borrow:
422	/* The subtraction has underflowed into the exponent field, so the
423	   value needs to be renormalized.  Shift the mantissa left as
424	   needed to remove any leading zeros and adjust the exponent
425	   accordingly.  If the exponent is not large enough to remove
426	   all the leading zeros, the result will be a subnormal value.  */
427
428	slli	a8, a2, 9
429	beqz	a8, .Lsub_xzero
430	do_nsau	a6, a8, a7, a11
431	srli	a8, a8, 9
432	bge	a6, a10, .Lsub_subnormal
433	addi	a6, a6, 1
434
435.Lsub_normalize_shift:
436	/* Shift the mantissa (a8/a9) left by a6.  */
437	ssl	a6
438	src	a8, a8, a9
439	sll	a9, a9
440
441	/* Combine the shifted mantissa with the sign and exponent,
442	   decrementing the exponent by a6.  (The exponent has already
443	   been decremented by one due to the borrow from the subtraction,
444	   but adding the mantissa will increment the exponent by one.)  */
445	srli	a2, a2, 23
446	sub	a2, a2, a6
447	slli	a2, a2, 23
448	add	a2, a2, a8
449	j	.Lsub_round
450
451.Lsub_exactlyhalf:
452	/* Round down to the nearest even value.  */
453	srli	a2, a2, 1
454	slli	a2, a2, 1
455	leaf_return
456
457.Lsub_xzero:
458	/* If there was a borrow from the exponent, and the mantissa and
459	   guard digits are all zero, then the inputs were equal and the
460	   result should be zero.  */
461	beqz	a9, .Lsub_return_zero
462
463	/* Only the guard digit is nonzero.  Shift by min(24, a10).  */
464	addi	a11, a10, -24
465	movi	a6, 24
466	movltz	a6, a10, a11
467	j	.Lsub_normalize_shift
468
469.Lsub_subnormal:
470	/* The exponent is too small to shift away all the leading zeros.
471	   Set a6 to the current exponent (which has already been
472	   decremented by the borrow) so that the exponent of the result
473	   will be zero.  Do not add 1 to a6 in this case, because: (1)
474	   adding the mantissa will not increment the exponent, so there is
475	   no need to subtract anything extra from the exponent to
476	   compensate, and (2) the effective exponent of a subnormal is 1
477	   not 0 so the shift amount must be 1 smaller than normal. */
478	mov	a6, a10
479	j	.Lsub_normalize_shift
480
481#endif /* L_addsubsf3 */
482
483#ifdef L_mulsf3
484
485	/* Multiplication */
486#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
487#define XCHAL_NO_MUL 1
488#endif
489
490__mulsf3_aux:
491
492	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
493	   (This code is placed before the start of the function just to
494	   keep it in range of the limited branch displacements.)  */
495
496.Lmul_xexpzero:
497	/* Clear the sign bit of x.  */
498	slli	a2, a2, 1
499	srli	a2, a2, 1
500
501	/* If x is zero, return zero.  */
502	beqz	a2, .Lmul_return_zero
503
504	/* Normalize x.  Adjust the exponent in a8.  */
505	do_nsau	a10, a2, a11, a12
506	addi	a10, a10, -8
507	ssl	a10
508	sll	a2, a2
509	movi	a8, 1
510	sub	a8, a8, a10
511	j	.Lmul_xnormalized
512
513.Lmul_yexpzero:
514	/* Clear the sign bit of y.  */
515	slli	a3, a3, 1
516	srli	a3, a3, 1
517
518	/* If y is zero, return zero.  */
519	beqz	a3, .Lmul_return_zero
520
521	/* Normalize y.  Adjust the exponent in a9.  */
522	do_nsau	a10, a3, a11, a12
523	addi	a10, a10, -8
524	ssl	a10
525	sll	a3, a3
526	movi	a9, 1
527	sub	a9, a9, a10
528	j	.Lmul_ynormalized
529
530.Lmul_return_zero:
531	/* Return zero with the appropriate sign bit.  */
532	srli	a2, a7, 31
533	slli	a2, a2, 31
534	j	.Lmul_done
535
536.Lmul_xnan_or_inf:
537	/* If y is zero, return NaN.  */
538	slli	a8, a3, 1
539	bnez	a8, 1f
540	movi	a4, 0x400000	/* make it a quiet NaN */
541	or	a2, a2, a4
542	j	.Lmul_done
5431:
544	/* If y is NaN, return y.  */
545	bnall	a3, a6, .Lmul_returnx
546	slli	a8, a3, 9
547	beqz	a8, .Lmul_returnx
548
549.Lmul_returny:
550	mov	a2, a3
551
552.Lmul_returnx:
553	/* Set the sign bit and return.  */
554	extui	a7, a7, 31, 1
555	slli	a2, a2, 1
556	ssai	1
557	src	a2, a7, a2
558	j	.Lmul_done
559
560.Lmul_ynan_or_inf:
561	/* If x is zero, return NaN.  */
562	slli	a8, a2, 1
563	bnez	a8, .Lmul_returny
564	movi	a7, 0x400000	/* make it a quiet NaN */
565	or	a2, a3, a7
566	j	.Lmul_done
567
568	.align	4
569	.global	__mulsf3
570	.type	__mulsf3, @function
571__mulsf3:
572#if __XTENSA_CALL0_ABI__
573	leaf_entry sp, 32
574	addi	sp, sp, -32
575	s32i	a12, sp, 16
576	s32i	a13, sp, 20
577	s32i	a14, sp, 24
578	s32i	a15, sp, 28
579#elif XCHAL_NO_MUL
580	/* This is not really a leaf function; allocate enough stack space
581	   to allow CALL12s to a helper function.  */
582	leaf_entry sp, 64
583#else
584	leaf_entry sp, 32
585#endif
586	movi	a6, 0x7f800000
587
588	/* Get the sign of the result.  */
589	xor	a7, a2, a3
590
591	/* Check for NaN and infinity.  */
592	ball	a2, a6, .Lmul_xnan_or_inf
593	ball	a3, a6, .Lmul_ynan_or_inf
594
595	/* Extract the exponents.  */
596	extui	a8, a2, 23, 8
597	extui	a9, a3, 23, 8
598
599	beqz	a8, .Lmul_xexpzero
600.Lmul_xnormalized:
601	beqz	a9, .Lmul_yexpzero
602.Lmul_ynormalized:
603
604	/* Add the exponents.  */
605	add	a8, a8, a9
606
607	/* Replace sign/exponent fields with explicit "1.0".  */
608	movi	a10, 0xffffff
609	or	a2, a2, a6
610	and	a2, a2, a10
611	or	a3, a3, a6
612	and	a3, a3, a10
613
614	/* Multiply 32x32 to 64 bits.  The result ends up in a2/a6.  */
615
616#if XCHAL_HAVE_MUL32_HIGH
617
618	mull	a6, a2, a3
619	muluh	a2, a2, a3
620
621#else
622
623	/* Break the inputs into 16-bit chunks and compute 4 32-bit partial
624	   products.  These partial products are:
625
626		0 xl * yl
627
628		1 xl * yh
629		2 xh * yl
630
631		3 xh * yh
632
633	   If using the Mul16 or Mul32 multiplier options, these input
634	   chunks must be stored in separate registers.  For Mac16, the
635	   UMUL.AA.* opcodes can specify that the inputs come from either
636	   half of the registers, so there is no need to shift them out
637	   ahead of time.  If there is no multiply hardware, the 16-bit
638	   chunks can be extracted when setting up the arguments to the
639	   separate multiply function.  */
640
641#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
642	/* Calling a separate multiply function will clobber a0 and requires
643	   use of a8 as a temporary, so save those values now.  (The function
644	   uses a custom ABI so nothing else needs to be saved.)  */
645	s32i	a0, sp, 0
646	s32i	a8, sp, 4
647#endif
648
649#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
650
651#define a2h a4
652#define a3h a5
653
654	/* Get the high halves of the inputs into registers.  */
655	srli	a2h, a2, 16
656	srli	a3h, a3, 16
657
658#define a2l a2
659#define a3l a3
660
661#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
662	/* Clear the high halves of the inputs.  This does not matter
663	   for MUL16 because the high bits are ignored.  */
664	extui	a2, a2, 0, 16
665	extui	a3, a3, 0, 16
666#endif
667#endif /* MUL16 || MUL32 */
668
669
670#if XCHAL_HAVE_MUL16
671
672#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
673	mul16u	dst, xreg ## xhalf, yreg ## yhalf
674
675#elif XCHAL_HAVE_MUL32
676
677#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
678	mull	dst, xreg ## xhalf, yreg ## yhalf
679
680#elif XCHAL_HAVE_MAC16
681
682/* The preprocessor insists on inserting a space when concatenating after
683   a period in the definition of do_mul below.  These macros are a workaround
684   using underscores instead of periods when doing the concatenation.  */
685#define umul_aa_ll umul.aa.ll
686#define umul_aa_lh umul.aa.lh
687#define umul_aa_hl umul.aa.hl
688#define umul_aa_hh umul.aa.hh
689
690#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
691	umul_aa_ ## xhalf ## yhalf	xreg, yreg; \
692	rsr	dst, ACCLO
693
694#else /* no multiply hardware */
695
696#define set_arg_l(dst, src) \
697	extui	dst, src, 0, 16
698#define set_arg_h(dst, src) \
699	srli	dst, src, 16
700
701#if __XTENSA_CALL0_ABI__
702#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
703	set_arg_ ## xhalf (a13, xreg); \
704	set_arg_ ## yhalf (a14, yreg); \
705	call0	.Lmul_mulsi3; \
706	mov	dst, a12
707#else
708#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
709	set_arg_ ## xhalf (a14, xreg); \
710	set_arg_ ## yhalf (a15, yreg); \
711	call12	.Lmul_mulsi3; \
712	mov	dst, a14
713#endif /* __XTENSA_CALL0_ABI__ */
714
715#endif /* no multiply hardware */
716
717	/* Add pp1 and pp2 into a6 with carry-out in a9.  */
718	do_mul(a6, a2, l, a3, h)	/* pp 1 */
719	do_mul(a11, a2, h, a3, l)	/* pp 2 */
720	movi	a9, 0
721	add	a6, a6, a11
722	bgeu	a6, a11, 1f
723	addi	a9, a9, 1
7241:
725	/* Shift the high half of a9/a6 into position in a9.  Note that
726	   this value can be safely incremented without any carry-outs.  */
727	ssai	16
728	src	a9, a9, a6
729
730	/* Compute the low word into a6.  */
731	do_mul(a11, a2, l, a3, l)	/* pp 0 */
732	sll	a6, a6
733	add	a6, a6, a11
734	bgeu	a6, a11, 1f
735	addi	a9, a9, 1
7361:
737	/* Compute the high word into a2.  */
738	do_mul(a2, a2, h, a3, h)	/* pp 3 */
739	add	a2, a2, a9
740
741#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
742	/* Restore values saved on the stack during the multiplication.  */
743	l32i	a0, sp, 0
744	l32i	a8, sp, 4
745#endif
746#endif /* ! XCHAL_HAVE_MUL32_HIGH */
747
748	/* Shift left by 9 bits, unless there was a carry-out from the
749	   multiply, in which case, shift by 8 bits and increment the
750	   exponent.  */
751	movi	a4, 9
752	srli	a5, a2, 24 - 9
753	beqz	a5, 1f
754	addi	a4, a4, -1
755	addi	a8, a8, 1
7561:	ssl	a4
757	src	a2, a2, a6
758	sll	a6, a6
759
760	/* Subtract the extra bias from the exponent sum (plus one to account
761	   for the explicit "1.0" of the mantissa that will be added to the
762	   exponent in the final result).  */
763	movi	a4, 0x80
764	sub	a8, a8, a4
765
766	/* Check for over/underflow.  The value in a8 is one less than the
767	   final exponent, so values in the range 0..fd are OK here.  */
768	movi	a4, 0xfe
769	bgeu	a8, a4, .Lmul_overflow
770
771.Lmul_round:
772	/* Round.  */
773	bgez	a6, .Lmul_rounded
774	addi	a2, a2, 1
775	slli	a6, a6, 1
776	beqz	a6, .Lmul_exactlyhalf
777
778.Lmul_rounded:
779	/* Add the exponent to the mantissa.  */
780	slli	a8, a8, 23
781	add	a2, a2, a8
782
783.Lmul_addsign:
784	/* Add the sign bit.  */
785	srli	a7, a7, 31
786	slli	a7, a7, 31
787	or	a2, a2, a7
788
789.Lmul_done:
790#if __XTENSA_CALL0_ABI__
791	l32i	a12, sp, 16
792	l32i	a13, sp, 20
793	l32i	a14, sp, 24
794	l32i	a15, sp, 28
795	addi	sp, sp, 32
796#endif
797	leaf_return
798
799.Lmul_exactlyhalf:
800	/* Round down to the nearest even value.  */
801	srli	a2, a2, 1
802	slli	a2, a2, 1
803	j	.Lmul_rounded
804
805.Lmul_overflow:
806	bltz	a8, .Lmul_underflow
807	/* Return +/- Infinity.  */
808	movi	a8, 0xff
809	slli	a2, a8, 23
810	j	.Lmul_addsign
811
812.Lmul_underflow:
813	/* Create a subnormal value, where the exponent field contains zero,
814	   but the effective exponent is 1.  The value of a8 is one less than
815	   the actual exponent, so just negate it to get the shift amount.  */
816	neg	a8, a8
817	mov	a9, a6
818	ssr	a8
819	bgeui	a8, 32, .Lmul_flush_to_zero
820
821	/* Shift a2 right.  Any bits that are shifted out of a2 are saved
822	   in a6 (combined with the shifted-out bits currently in a6) for
823	   rounding the result.  */
824	sll	a6, a2
825	srl	a2, a2
826
827	/* Set the exponent to zero.  */
828	movi	a8, 0
829
830	/* Pack any nonzero bits shifted out into a6.  */
831	beqz	a9, .Lmul_round
832	movi	a9, 1
833	or	a6, a6, a9
834	j	.Lmul_round
835
836.Lmul_flush_to_zero:
837	/* Return zero with the appropriate sign bit.  */
838	srli	a2, a7, 31
839	slli	a2, a2, 31
840	j	.Lmul_done
841
842#if XCHAL_NO_MUL
843
844	/* For Xtensa processors with no multiply hardware, this simplified
845	   version of _mulsi3 is used for multiplying 16-bit chunks of
846	   the floating-point mantissas.  When using CALL0, this function
847	   uses a custom ABI: the inputs are passed in a13 and a14, the
848	   result is returned in a12, and a8 and a15 are clobbered.  */
849	.align	4
850.Lmul_mulsi3:
851	leaf_entry sp, 16
852	.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
853	movi	\dst, 0
8541:	add	\tmp1, \src2, \dst
855	extui	\tmp2, \src1, 0, 1
856	movnez	\dst, \tmp1, \tmp2
857
858	do_addx2 \tmp1, \src2, \dst, \tmp1
859	extui	\tmp2, \src1, 1, 1
860	movnez	\dst, \tmp1, \tmp2
861
862	do_addx4 \tmp1, \src2, \dst, \tmp1
863	extui	\tmp2, \src1, 2, 1
864	movnez	\dst, \tmp1, \tmp2
865
866	do_addx8 \tmp1, \src2, \dst, \tmp1
867	extui	\tmp2, \src1, 3, 1
868	movnez	\dst, \tmp1, \tmp2
869
870	srli	\src1, \src1, 4
871	slli	\src2, \src2, 4
872	bnez	\src1, 1b
873	.endm
874#if __XTENSA_CALL0_ABI__
875	mul_mulsi3_body a12, a13, a14, a15, a8
876#else
877	/* The result will be written into a2, so save that argument in a4.  */
878	mov	a4, a2
879	mul_mulsi3_body a2, a4, a3, a5, a6
880#endif
881	leaf_return
882#endif /* XCHAL_NO_MUL */
883#endif /* L_mulsf3 */
884
885#ifdef L_divsf3
886
887	/* Division */
888__divsf3_aux:
889
890	/* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
891	   (This code is placed before the start of the function just to
892	   keep it in range of the limited branch displacements.)  */
893
894.Ldiv_yexpzero:
895	/* Clear the sign bit of y.  */
896	slli	a3, a3, 1
897	srli	a3, a3, 1
898
899	/* Check for division by zero.  */
900	beqz	a3, .Ldiv_yzero
901
902	/* Normalize y.  Adjust the exponent in a9.  */
903	do_nsau	a10, a3, a4, a5
904	addi	a10, a10, -8
905	ssl	a10
906	sll	a3, a3
907	movi	a9, 1
908	sub	a9, a9, a10
909	j	.Ldiv_ynormalized
910
911.Ldiv_yzero:
912	/* y is zero.  Return NaN if x is also zero; otherwise, infinity.  */
913	slli	a4, a2, 1
914	srli	a4, a4, 1
915	srli	a2, a7, 31
916	slli	a2, a2, 31
917	or	a2, a2, a6
918	bnez	a4, 1f
919	movi	a4, 0x400000	/* make it a quiet NaN */
920	or	a2, a2, a4
9211:	leaf_return
922
923.Ldiv_xexpzero:
924	/* Clear the sign bit of x.  */
925	slli	a2, a2, 1
926	srli	a2, a2, 1
927
928	/* If x is zero, return zero.  */
929	beqz	a2, .Ldiv_return_zero
930
931	/* Normalize x.  Adjust the exponent in a8.  */
932	do_nsau	a10, a2, a4, a5
933	addi	a10, a10, -8
934	ssl	a10
935	sll	a2, a2
936	movi	a8, 1
937	sub	a8, a8, a10
938	j	.Ldiv_xnormalized
939
940.Ldiv_return_zero:
941	/* Return zero with the appropriate sign bit.  */
942	srli	a2, a7, 31
943	slli	a2, a2, 31
944	leaf_return
945
946.Ldiv_xnan_or_inf:
947	/* Set the sign bit of the result.  */
948	srli	a7, a3, 31
949	slli	a7, a7, 31
950	xor	a2, a2, a7
951	/* If y is NaN or Inf, return NaN.  */
952	bnall	a3, a6, 1f
953	movi	a4, 0x400000	/* make it a quiet NaN */
954	or	a2, a2, a4
9551:	leaf_return
956
957.Ldiv_ynan_or_inf:
958	/* If y is Infinity, return zero.  */
959	slli	a8, a3, 9
960	beqz	a8, .Ldiv_return_zero
961	/* y is NaN; return it.  */
962	mov	a2, a3
963	leaf_return
964
965	.align	4
966	.global	__divsf3
967	.type	__divsf3, @function
968__divsf3:
969	leaf_entry sp, 16
970	movi	a6, 0x7f800000
971
972	/* Get the sign of the result.  */
973	xor	a7, a2, a3
974
975	/* Check for NaN and infinity.  */
976	ball	a2, a6, .Ldiv_xnan_or_inf
977	ball	a3, a6, .Ldiv_ynan_or_inf
978
979	/* Extract the exponents.  */
980	extui	a8, a2, 23, 8
981	extui	a9, a3, 23, 8
982
983	beqz	a9, .Ldiv_yexpzero
984.Ldiv_ynormalized:
985	beqz	a8, .Ldiv_xexpzero
986.Ldiv_xnormalized:
987
988	/* Subtract the exponents.  */
989	sub	a8, a8, a9
990
991	/* Replace sign/exponent fields with explicit "1.0".  */
992	movi	a10, 0xffffff
993	or	a2, a2, a6
994	and	a2, a2, a10
995	or	a3, a3, a6
996	and	a3, a3, a10
997
998	/* The first digit of the mantissa division must be a one.
999	   Shift x (and adjust the exponent) as needed to make this true.  */
1000	bltu	a3, a2, 1f
1001	slli	a2, a2, 1
1002	addi	a8, a8, -1
10031:
1004	/* Do the first subtraction and shift.  */
1005	sub	a2, a2, a3
1006	slli	a2, a2, 1
1007
1008	/* Put the quotient into a10.  */
1009	movi	a10, 1
1010
1011	/* Divide one bit at a time for 23 bits.  */
1012	movi	a9, 23
1013#if XCHAL_HAVE_LOOPS
1014	loop	a9, .Ldiv_loopend
1015#endif
1016.Ldiv_loop:
1017	/* Shift the quotient << 1.  */
1018	slli	a10, a10, 1
1019
1020	/* Is this digit a 0 or 1?  */
1021	bltu	a2, a3, 1f
1022
1023	/* Output a 1 and subtract.  */
1024	addi	a10, a10, 1
1025	sub	a2, a2, a3
1026
1027	/* Shift the dividend << 1.  */
10281:	slli	a2, a2, 1
1029
1030#if !XCHAL_HAVE_LOOPS
1031	addi	a9, a9, -1
1032	bnez	a9, .Ldiv_loop
1033#endif
1034.Ldiv_loopend:
1035
1036	/* Add the exponent bias (less one to account for the explicit "1.0"
1037	   of the mantissa that will be added to the exponent in the final
1038	   result).  */
1039	addi	a8, a8, 0x7e
1040
1041	/* Check for over/underflow.  The value in a8 is one less than the
1042	   final exponent, so values in the range 0..fd are OK here.  */
1043	movi	a4, 0xfe
1044	bgeu	a8, a4, .Ldiv_overflow
1045
1046.Ldiv_round:
1047	/* Round.  The remainder (<< 1) is in a2.  */
1048	bltu	a2, a3, .Ldiv_rounded
1049	addi	a10, a10, 1
1050	beq	a2, a3, .Ldiv_exactlyhalf
1051
1052.Ldiv_rounded:
1053	/* Add the exponent to the mantissa.  */
1054	slli	a8, a8, 23
1055	add	a2, a10, a8
1056
1057.Ldiv_addsign:
1058	/* Add the sign bit.  */
1059	srli	a7, a7, 31
1060	slli	a7, a7, 31
1061	or	a2, a2, a7
1062	leaf_return
1063
1064.Ldiv_overflow:
1065	bltz	a8, .Ldiv_underflow
1066	/* Return +/- Infinity.  */
1067	addi	a8, a4, 1	/* 0xff */
1068	slli	a2, a8, 23
1069	j	.Ldiv_addsign
1070
1071.Ldiv_exactlyhalf:
1072	/* Remainder is exactly half the divisor.  Round even.  */
1073	srli	a10, a10, 1
1074	slli	a10, a10, 1
1075	j	.Ldiv_rounded
1076
1077.Ldiv_underflow:
1078	/* Create a subnormal value, where the exponent field contains zero,
1079	   but the effective exponent is 1.  The value of a8 is one less than
1080	   the actual exponent, so just negate it to get the shift amount.  */
1081	neg	a8, a8
1082	ssr	a8
1083	bgeui	a8, 32, .Ldiv_flush_to_zero
1084
1085	/* Shift a10 right.  Any bits that are shifted out of a10 are
1086	   saved in a6 for rounding the result.  */
1087	sll	a6, a10
1088	srl	a10, a10
1089
1090	/* Set the exponent to zero.  */
1091	movi	a8, 0
1092
1093	/* Pack any nonzero remainder (in a2) into a6.  */
1094	beqz	a2, 1f
1095	movi	a9, 1
1096	or	a6, a6, a9
1097
1098	/* Round a10 based on the bits shifted out into a6.  */
10991:	bgez	a6, .Ldiv_rounded
1100	addi	a10, a10, 1
1101	slli	a6, a6, 1
1102	bnez	a6, .Ldiv_rounded
1103	srli	a10, a10, 1
1104	slli	a10, a10, 1
1105	j	.Ldiv_rounded
1106
1107.Ldiv_flush_to_zero:
1108	/* Return zero with the appropriate sign bit.  */
1109	srli	a2, a7, 31
1110	slli	a2, a2, 31
1111	leaf_return
1112
1113#endif /* L_divsf3 */
1114
1115#ifdef L_cmpsf2
1116
1117	/* Equal and Not Equal */
1118
1119	.align	4
1120	.global	__eqsf2
1121	.global	__nesf2
1122	.set	__nesf2, __eqsf2
1123	.type	__eqsf2, @function
1124__eqsf2:
1125	leaf_entry sp, 16
1126	bne	a2, a3, 4f
1127
1128	/* The values are equal but NaN != NaN.  Check the exponent.  */
1129	movi	a6, 0x7f800000
1130	ball	a2, a6, 3f
1131
1132	/* Equal.  */
1133	movi	a2, 0
1134	leaf_return
1135
1136	/* Not equal.  */
11372:	movi	a2, 1
1138	leaf_return
1139
1140	/* Check if the mantissas are nonzero.  */
11413:	slli	a7, a2, 9
1142	j	5f
1143
1144	/* Check if x and y are zero with different signs.  */
11454:	or	a7, a2, a3
1146	slli	a7, a7, 1
1147
1148	/* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1149	   or x when exponent(x) = 0x7f8 and x == y.  */
11505:	movi	a2, 0
1151	movi	a3, 1
1152	movnez	a2, a3, a7
1153	leaf_return
1154
1155
1156	/* Greater Than */
1157
1158	.align	4
1159	.global	__gtsf2
1160	.type	__gtsf2, @function
1161__gtsf2:
1162	leaf_entry sp, 16
1163	movi	a6, 0x7f800000
1164	ball	a2, a6, 2f
11651:	bnall	a3, a6, .Lle_cmp
1166
1167	/* Check if y is a NaN.  */
1168	slli	a7, a3, 9
1169	beqz	a7, .Lle_cmp
1170	movi	a2, 0
1171	leaf_return
1172
1173	/* Check if x is a NaN.  */
11742:	slli	a7, a2, 9
1175	beqz	a7, 1b
1176	movi	a2, 0
1177	leaf_return
1178
1179
1180	/* Less Than or Equal */
1181
1182	.align	4
1183	.global	__lesf2
1184	.type	__lesf2, @function
1185__lesf2:
1186	leaf_entry sp, 16
1187	movi	a6, 0x7f800000
1188	ball	a2, a6, 2f
11891:	bnall	a3, a6, .Lle_cmp
1190
1191	/* Check if y is a NaN.  */
1192	slli	a7, a3, 9
1193	beqz	a7, .Lle_cmp
1194	movi	a2, 1
1195	leaf_return
1196
1197	/* Check if x is a NaN.  */
11982:	slli	a7, a2, 9
1199	beqz	a7, 1b
1200	movi	a2, 1
1201	leaf_return
1202
1203.Lle_cmp:
1204	/* Check if x and y have different signs.  */
1205	xor	a7, a2, a3
1206	bltz	a7, .Lle_diff_signs
1207
1208	/* Check if x is negative.  */
1209	bltz	a2, .Lle_xneg
1210
1211	/* Check if x <= y.  */
1212	bltu	a3, a2, 5f
12134:	movi	a2, 0
1214	leaf_return
1215
1216.Lle_xneg:
1217	/* Check if y <= x.  */
1218	bgeu	a2, a3, 4b
12195:	movi	a2, 1
1220	leaf_return
1221
1222.Lle_diff_signs:
1223	bltz	a2, 4b
1224
1225	/* Check if both x and y are zero.  */
1226	or	a7, a2, a3
1227	slli	a7, a7, 1
1228	movi	a2, 1
1229	movi	a3, 0
1230	moveqz	a2, a3, a7
1231	leaf_return
1232
1233
1234	/* Greater Than or Equal */
1235
1236	.align	4
1237	.global	__gesf2
1238	.type	__gesf2, @function
1239__gesf2:
1240	leaf_entry sp, 16
1241	movi	a6, 0x7f800000
1242	ball	a2, a6, 2f
12431:	bnall	a3, a6, .Llt_cmp
1244
1245	/* Check if y is a NaN.  */
1246	slli	a7, a3, 9
1247	beqz	a7, .Llt_cmp
1248	movi	a2, -1
1249	leaf_return
1250
1251	/* Check if x is a NaN.  */
12522:	slli	a7, a2, 9
1253	beqz	a7, 1b
1254	movi	a2, -1
1255	leaf_return
1256
1257
1258	/* Less Than */
1259
1260	.align	4
1261	.global	__ltsf2
1262	.type	__ltsf2, @function
1263__ltsf2:
1264	leaf_entry sp, 16
1265	movi	a6, 0x7f800000
1266	ball	a2, a6, 2f
12671:	bnall	a3, a6, .Llt_cmp
1268
1269	/* Check if y is a NaN.  */
1270	slli	a7, a3, 9
1271	beqz	a7, .Llt_cmp
1272	movi	a2, 0
1273	leaf_return
1274
1275	/* Check if x is a NaN.  */
12762:	slli	a7, a2, 9
1277	beqz	a7, 1b
1278	movi	a2, 0
1279	leaf_return
1280
1281.Llt_cmp:
1282	/* Check if x and y have different signs.  */
1283	xor	a7, a2, a3
1284	bltz	a7, .Llt_diff_signs
1285
1286	/* Check if x is negative.  */
1287	bltz	a2, .Llt_xneg
1288
1289	/* Check if x < y.  */
1290	bgeu	a2, a3, 5f
12914:	movi	a2, -1
1292	leaf_return
1293
1294.Llt_xneg:
1295	/* Check if y < x.  */
1296	bltu	a3, a2, 4b
12975:	movi	a2, 0
1298	leaf_return
1299
1300.Llt_diff_signs:
1301	bgez	a2, 5b
1302
1303	/* Check if both x and y are nonzero.  */
1304	or	a7, a2, a3
1305	slli	a7, a7, 1
1306	movi	a2, 0
1307	movi	a3, -1
1308	movnez	a2, a3, a7
1309	leaf_return
1310
1311
1312	/* Unordered */
1313
1314	.align	4
1315	.global	__unordsf2
1316	.type	__unordsf2, @function
1317__unordsf2:
1318	leaf_entry sp, 16
1319	movi	a6, 0x7f800000
1320	ball	a2, a6, 3f
13211:	ball	a3, a6, 4f
13222:	movi	a2, 0
1323	leaf_return
1324
13253:	slli	a7, a2, 9
1326	beqz	a7, 1b
1327	movi	a2, 1
1328	leaf_return
1329
13304:	slli	a7, a3, 9
1331	beqz	a7, 2b
1332	movi	a2, 1
1333	leaf_return
1334
1335#endif /* L_cmpsf2 */
1336
1337#ifdef L_fixsfsi
1338
1339	.align	4
1340	.global	__fixsfsi
1341	.type	__fixsfsi, @function
1342__fixsfsi:
1343	leaf_entry sp, 16
1344
1345	/* Check for NaN and Infinity.  */
1346	movi	a6, 0x7f800000
1347	ball	a2, a6, .Lfixsfsi_nan_or_inf
1348
1349	/* Extract the exponent and check if 0 < (exp - 0x7e) < 32.  */
1350	extui	a4, a2, 23, 8
1351	addi	a4, a4, -0x7e
1352	bgei	a4, 32, .Lfixsfsi_maxint
1353	blti	a4, 1, .Lfixsfsi_zero
1354
1355	/* Add explicit "1.0" and shift << 8.  */
1356	or	a7, a2, a6
1357	slli	a5, a7, 8
1358
1359	/* Shift back to the right, based on the exponent.  */
1360	ssl	a4		/* shift by 32 - a4 */
1361	srl	a5, a5
1362
1363	/* Negate the result if sign != 0.  */
1364	neg	a2, a5
1365	movgez	a2, a5, a7
1366	leaf_return
1367
1368.Lfixsfsi_nan_or_inf:
1369	/* Handle Infinity and NaN.  */
1370	slli	a4, a2, 9
1371	beqz	a4, .Lfixsfsi_maxint
1372
1373	/* Translate NaN to +maxint.  */
1374	movi	a2, 0
1375
1376.Lfixsfsi_maxint:
1377	slli	a4, a6, 8	/* 0x80000000 */
1378	addi	a5, a4, -1	/* 0x7fffffff */
1379	movgez	a4, a5, a2
1380	mov	a2, a4
1381	leaf_return
1382
1383.Lfixsfsi_zero:
1384	movi	a2, 0
1385	leaf_return
1386
1387#endif /* L_fixsfsi */
1388
1389#ifdef L_fixsfdi
1390
1391	.align	4
1392	.global	__fixsfdi
1393	.type	__fixsfdi, @function
1394__fixsfdi:
1395	leaf_entry sp, 16
1396
1397	/* Check for NaN and Infinity.  */
1398	movi	a6, 0x7f800000
1399	ball	a2, a6, .Lfixsfdi_nan_or_inf
1400
1401	/* Extract the exponent and check if 0 < (exp - 0x7e) < 64.  */
1402	extui	a4, a2, 23, 8
1403	addi	a4, a4, -0x7e
1404	bgei	a4, 64, .Lfixsfdi_maxint
1405	blti	a4, 1, .Lfixsfdi_zero
1406
1407	/* Add explicit "1.0" and shift << 8.  */
1408	or	a7, a2, a6
1409	slli	xh, a7, 8
1410
1411	/* Shift back to the right, based on the exponent.  */
1412	ssl	a4		/* shift by 64 - a4 */
1413	bgei	a4, 32, .Lfixsfdi_smallshift
1414	srl	xl, xh
1415	movi	xh, 0
1416
1417.Lfixsfdi_shifted:
1418	/* Negate the result if sign != 0.  */
1419	bgez	a7, 1f
1420	neg	xl, xl
1421	neg	xh, xh
1422	beqz	xl, 1f
1423	addi	xh, xh, -1
14241:	leaf_return
1425
1426.Lfixsfdi_smallshift:
1427	movi	xl, 0
1428	sll	xl, xh
1429	srl	xh, xh
1430	j	.Lfixsfdi_shifted
1431
1432.Lfixsfdi_nan_or_inf:
1433	/* Handle Infinity and NaN.  */
1434	slli	a4, a2, 9
1435	beqz	a4, .Lfixsfdi_maxint
1436
1437	/* Translate NaN to +maxint.  */
1438	movi	a2, 0
1439
1440.Lfixsfdi_maxint:
1441	slli	a7, a6, 8	/* 0x80000000 */
1442	bgez	a2, 1f
1443	mov	xh, a7
1444	movi	xl, 0
1445	leaf_return
1446
14471:	addi	xh, a7, -1	/* 0x7fffffff */
1448	movi	xl, -1
1449	leaf_return
1450
1451.Lfixsfdi_zero:
1452	movi	xh, 0
1453	movi	xl, 0
1454	leaf_return
1455
1456#endif /* L_fixsfdi */
1457
1458#ifdef L_fixunssfsi
1459
1460	.align	4
1461	.global	__fixunssfsi
1462	.type	__fixunssfsi, @function
1463__fixunssfsi:
1464	leaf_entry sp, 16
1465
1466	/* Check for NaN and Infinity.  */
1467	movi	a6, 0x7f800000
1468	ball	a2, a6, .Lfixunssfsi_nan_or_inf
1469
1470	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 32.  */
1471	extui	a4, a2, 23, 8
1472	addi	a4, a4, -0x7f
1473	bgei	a4, 32, .Lfixunssfsi_maxint
1474	bltz	a4, .Lfixunssfsi_zero
1475
1476	/* Add explicit "1.0" and shift << 8.  */
1477	or	a7, a2, a6
1478	slli	a5, a7, 8
1479
1480	/* Shift back to the right, based on the exponent.  */
1481	addi	a4, a4, 1
1482	beqi	a4, 32, .Lfixunssfsi_bigexp
1483	ssl	a4		/* shift by 32 - a4 */
1484	srl	a5, a5
1485
1486	/* Negate the result if sign != 0.  */
1487	neg	a2, a5
1488	movgez	a2, a5, a7
1489	leaf_return
1490
1491.Lfixunssfsi_nan_or_inf:
1492	/* Handle Infinity and NaN.  */
1493	slli	a4, a2, 9
1494	beqz	a4, .Lfixunssfsi_maxint
1495
1496	/* Translate NaN to 0xffffffff.  */
1497	movi	a2, -1
1498	leaf_return
1499
1500.Lfixunssfsi_maxint:
1501	slli	a4, a6, 8	/* 0x80000000 */
1502	movi	a5, -1		/* 0xffffffff */
1503	movgez	a4, a5, a2
1504	mov	a2, a4
1505	leaf_return
1506
1507.Lfixunssfsi_zero:
1508	movi	a2, 0
1509	leaf_return
1510
1511.Lfixunssfsi_bigexp:
1512	/* Handle unsigned maximum exponent case.  */
1513	bltz	a2, 1f
1514	mov	a2, a5		/* no shift needed */
1515	leaf_return
1516
1517	/* Return 0x80000000 if negative.  */
15181:	slli	a2, a6, 8
1519	leaf_return
1520
1521#endif /* L_fixunssfsi */
1522
1523#ifdef L_fixunssfdi
1524
1525	.align	4
1526	.global	__fixunssfdi
1527	.type	__fixunssfdi, @function
1528__fixunssfdi:
1529	leaf_entry sp, 16
1530
1531	/* Check for NaN and Infinity.  */
1532	movi	a6, 0x7f800000
1533	ball	a2, a6, .Lfixunssfdi_nan_or_inf
1534
1535	/* Extract the exponent and check if 0 <= (exp - 0x7f) < 64.  */
1536	extui	a4, a2, 23, 8
1537	addi	a4, a4, -0x7f
1538	bgei	a4, 64, .Lfixunssfdi_maxint
1539	bltz	a4, .Lfixunssfdi_zero
1540
1541	/* Add explicit "1.0" and shift << 8.  */
1542	or	a7, a2, a6
1543	slli	xh, a7, 8
1544
1545	/* Shift back to the right, based on the exponent.  */
1546	addi	a4, a4, 1
1547	beqi	a4, 64, .Lfixunssfdi_bigexp
1548	ssl	a4		/* shift by 64 - a4 */
1549	bgei	a4, 32, .Lfixunssfdi_smallshift
1550	srl	xl, xh
1551	movi	xh, 0
1552
1553.Lfixunssfdi_shifted:
1554	/* Negate the result if sign != 0.  */
1555	bgez	a7, 1f
1556	neg	xl, xl
1557	neg	xh, xh
1558	beqz	xl, 1f
1559	addi	xh, xh, -1
15601:	leaf_return
1561
1562.Lfixunssfdi_smallshift:
1563	movi	xl, 0
1564	src	xl, xh, xl
1565	srl	xh, xh
1566	j	.Lfixunssfdi_shifted
1567
1568.Lfixunssfdi_nan_or_inf:
1569	/* Handle Infinity and NaN.  */
1570	slli	a4, a2, 9
1571	beqz	a4, .Lfixunssfdi_maxint
1572
1573	/* Translate NaN to 0xffffffff.... */
15741:	movi	xh, -1
1575	movi	xl, -1
1576	leaf_return
1577
1578.Lfixunssfdi_maxint:
1579	bgez	a2, 1b
15802:	slli	xh, a6, 8	/* 0x80000000 */
1581	movi	xl, 0
1582	leaf_return
1583
1584.Lfixunssfdi_zero:
1585	movi	xh, 0
1586	movi	xl, 0
1587	leaf_return
1588
1589.Lfixunssfdi_bigexp:
1590	/* Handle unsigned maximum exponent case.  */
1591	bltz	a7, 2b
1592	movi	xl, 0
1593	leaf_return		/* no shift needed */
1594
1595#endif /* L_fixunssfdi */
1596
1597#ifdef L_floatsisf
1598
1599	.align	4
1600	.global	__floatunsisf
1601	.type	__floatunsisf, @function
1602__floatunsisf:
1603	leaf_entry sp, 16
1604	beqz	a2, .Lfloatsisf_return
1605
1606	/* Set the sign to zero and jump to the floatsisf code.  */
1607	movi	a7, 0
1608	j	.Lfloatsisf_normalize
1609
1610	.align	4
1611	.global	__floatsisf
1612	.type	__floatsisf, @function
1613__floatsisf:
1614	leaf_entry sp, 16
1615
1616	/* Check for zero.  */
1617	beqz	a2, .Lfloatsisf_return
1618
1619	/* Save the sign.  */
1620	extui	a7, a2, 31, 1
1621
1622	/* Get the absolute value.  */
1623#if XCHAL_HAVE_ABS
1624	abs	a2, a2
1625#else
1626	neg	a4, a2
1627	movltz	a2, a4, a2
1628#endif
1629
1630.Lfloatsisf_normalize:
1631	/* Normalize with the first 1 bit in the msb.  */
1632	do_nsau	a4, a2, a5, a6
1633	ssl	a4
1634	sll	a5, a2
1635
1636	/* Shift the mantissa into position, with rounding bits in a6.  */
1637	srli	a2, a5, 8
1638	slli	a6, a5, (32 - 8)
1639
1640	/* Set the exponent.  */
1641	movi	a5, 0x9d	/* 0x7e + 31 */
1642	sub	a5, a5, a4
1643	slli	a5, a5, 23
1644	add	a2, a2, a5
1645
1646	/* Add the sign.  */
1647	slli	a7, a7, 31
1648	or	a2, a2, a7
1649
1650	/* Round up if the leftover fraction is >= 1/2.  */
1651	bgez	a6, .Lfloatsisf_return
1652	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
1653
1654	/* Check if the leftover fraction is exactly 1/2.  */
1655	slli	a6, a6, 1
1656	beqz	a6, .Lfloatsisf_exactlyhalf
1657
1658.Lfloatsisf_return:
1659	leaf_return
1660
1661.Lfloatsisf_exactlyhalf:
1662	/* Round down to the nearest even value.  */
1663	srli	a2, a2, 1
1664	slli	a2, a2, 1
1665	leaf_return
1666
1667#endif /* L_floatsisf */
1668
1669#ifdef L_floatdisf
1670
1671	.align	4
1672	.global	__floatundisf
1673	.type	__floatundisf, @function
1674__floatundisf:
1675	leaf_entry sp, 16
1676
1677	/* Check for zero.  */
1678	or	a4, xh, xl
1679	beqz	a4, 2f
1680
1681	/* Set the sign to zero and jump to the floatdisf code.  */
1682	movi	a7, 0
1683	j	.Lfloatdisf_normalize
1684
1685	.align	4
1686	.global	__floatdisf
1687	.type	__floatdisf, @function
1688__floatdisf:
1689	leaf_entry sp, 16
1690
1691	/* Check for zero.  */
1692	or	a4, xh, xl
1693	beqz	a4, 2f
1694
1695	/* Save the sign.  */
1696	extui	a7, xh, 31, 1
1697
1698	/* Get the absolute value.  */
1699	bgez	xh, .Lfloatdisf_normalize
1700	neg	xl, xl
1701	neg	xh, xh
1702	beqz	xl, .Lfloatdisf_normalize
1703	addi	xh, xh, -1
1704
1705.Lfloatdisf_normalize:
1706	/* Normalize with the first 1 bit in the msb of xh.  */
1707	beqz	xh, .Lfloatdisf_bigshift
1708	do_nsau	a4, xh, a5, a6
1709	ssl	a4
1710	src	xh, xh, xl
1711	sll	xl, xl
1712
1713.Lfloatdisf_shifted:
1714	/* Shift the mantissa into position, with rounding bits in a6.  */
1715	ssai	8
1716	sll	a5, xl
1717	src	a6, xh, xl
1718	srl	xh, xh
1719	beqz	a5, 1f
1720	movi	a5, 1
1721	or	a6, a6, a5
17221:
1723	/* Set the exponent.  */
1724	movi	a5, 0xbd	/* 0x7e + 63 */
1725	sub	a5, a5, a4
1726	slli	a5, a5, 23
1727	add	a2, xh, a5
1728
1729	/* Add the sign.  */
1730	slli	a7, a7, 31
1731	or	a2, a2, a7
1732
1733	/* Round up if the leftover fraction is >= 1/2.  */
1734	bgez	a6, 2f
1735	addi	a2, a2, 1	/* Overflow to the exponent is OK.  */
1736
1737	/* Check if the leftover fraction is exactly 1/2.  */
1738	slli	a6, a6, 1
1739	beqz	a6, .Lfloatdisf_exactlyhalf
17402:	leaf_return
1741
1742.Lfloatdisf_bigshift:
1743	/* xh is zero.  Normalize with first 1 bit of xl in the msb of xh.  */
1744	do_nsau	a4, xl, a5, a6
1745	ssl	a4
1746	sll	xh, xl
1747	movi	xl, 0
1748	addi	a4, a4, 32
1749	j	.Lfloatdisf_shifted
1750
1751.Lfloatdisf_exactlyhalf:
1752	/* Round down to the nearest even value.  */
1753	srli	a2, a2, 1
1754	slli	a2, a2, 1
1755	leaf_return
1756
1757#endif /* L_floatdisf */
1758