13cab2bb3Spatrick//===----------------------Hexagon builtin routine ------------------------===//
23cab2bb3Spatrick//
33cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
43cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information.
53cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
63cab2bb3Spatrick//
73cab2bb3Spatrick//===----------------------------------------------------------------------===//
83cab2bb3Spatrick
93cab2bb3Spatrick#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
103cab2bb3Spatrick#define END(TAG) .size TAG,.-TAG
113cab2bb3Spatrick
123cab2bb3Spatrick// Double Precision Multiply
133cab2bb3Spatrick
143cab2bb3Spatrick
153cab2bb3Spatrick#define A r1:0
163cab2bb3Spatrick#define AH r1
173cab2bb3Spatrick#define AL r0
183cab2bb3Spatrick#define B r3:2
193cab2bb3Spatrick#define BH r3
203cab2bb3Spatrick#define BL r2
213cab2bb3Spatrick#define C r5:4
223cab2bb3Spatrick#define CH r5
233cab2bb3Spatrick#define CL r4
243cab2bb3Spatrick
253cab2bb3Spatrick
263cab2bb3Spatrick
273cab2bb3Spatrick#define BTMP r15:14
283cab2bb3Spatrick#define BTMPH r15
293cab2bb3Spatrick#define BTMPL r14
303cab2bb3Spatrick
313cab2bb3Spatrick#define ATMP r13:12
323cab2bb3Spatrick#define ATMPH r13
333cab2bb3Spatrick#define ATMPL r12
343cab2bb3Spatrick
353cab2bb3Spatrick#define CTMP r11:10
363cab2bb3Spatrick#define CTMPH r11
373cab2bb3Spatrick#define CTMPL r10
383cab2bb3Spatrick
393cab2bb3Spatrick#define PP_LL r9:8
403cab2bb3Spatrick#define PP_LL_H r9
413cab2bb3Spatrick#define PP_LL_L r8
423cab2bb3Spatrick
433cab2bb3Spatrick#define PP_ODD r7:6
443cab2bb3Spatrick#define PP_ODD_H r7
453cab2bb3Spatrick#define PP_ODD_L r6
463cab2bb3Spatrick
473cab2bb3Spatrick
483cab2bb3Spatrick#define PP_HH r17:16
493cab2bb3Spatrick#define PP_HH_H r17
503cab2bb3Spatrick#define PP_HH_L r16
513cab2bb3Spatrick
523cab2bb3Spatrick#define EXPA r18
533cab2bb3Spatrick#define EXPB r19
543cab2bb3Spatrick#define EXPBA r19:18
553cab2bb3Spatrick
563cab2bb3Spatrick#define TMP r28
573cab2bb3Spatrick
583cab2bb3Spatrick#define P_TMP p0
593cab2bb3Spatrick#define PROD_NEG p3
603cab2bb3Spatrick#define EXACT p2
613cab2bb3Spatrick#define SWAP p1
623cab2bb3Spatrick
633cab2bb3Spatrick#define MANTBITS 52
643cab2bb3Spatrick#define HI_MANTBITS 20
653cab2bb3Spatrick#define EXPBITS 11
663cab2bb3Spatrick#define BIAS 1023
673cab2bb3Spatrick#define STACKSPACE 32
683cab2bb3Spatrick
693cab2bb3Spatrick#define ADJUST 4
703cab2bb3Spatrick
713cab2bb3Spatrick#define FUDGE 7
723cab2bb3Spatrick#define FUDGE2 3
733cab2bb3Spatrick
743cab2bb3Spatrick#ifndef SR_ROUND_OFF
753cab2bb3Spatrick#define SR_ROUND_OFF 22
763cab2bb3Spatrick#endif
773cab2bb3Spatrick
783cab2bb3Spatrick	// First, classify for normal values, and abort if abnormal
793cab2bb3Spatrick	//
803cab2bb3Spatrick	// Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
813cab2bb3Spatrick	//
823cab2bb3Spatrick	// Since we know that the 2 MSBs of the H registers is zero, we should never carry
833cab2bb3Spatrick	// the partial products that involve the H registers
843cab2bb3Spatrick	//
853cab2bb3Spatrick	// Try to buy X slots, at the expense of latency if needed
863cab2bb3Spatrick	//
873cab2bb3Spatrick	// We will have PP_HH with the upper bits of the product, PP_LL with the lower
883cab2bb3Spatrick	// PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
893cab2bb3Spatrick	// PP_HH can have a minimum of 0x0100_0000_0000_0000
903cab2bb3Spatrick	//
913cab2bb3Spatrick	// 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
923cab2bb3Spatrick	//
933cab2bb3Spatrick	// We need to align CTMP.
943cab2bb3Spatrick	// If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
953cab2bb3Spatrick	// If CTMP << PP align CTMP and add 128 bits.  Then compute sticky
963cab2bb3Spatrick	// If CTMP ~= PP, align CTMP and add 128 bits.  May have massive cancellation.
973cab2bb3Spatrick	//
983cab2bb3Spatrick	// Convert partial product and CTMP to 2's complement prior to addition
993cab2bb3Spatrick	//
1003cab2bb3Spatrick	// After we add, we need to normalize into upper 64 bits, then compute sticky.
1013cab2bb3Spatrick
1023cab2bb3Spatrick	.text
1033cab2bb3Spatrick	.global __hexagon_fmadf4
1043cab2bb3Spatrick        .type __hexagon_fmadf4,@function
1053cab2bb3Spatrick	.global __hexagon_fmadf5
1063cab2bb3Spatrick        .type __hexagon_fmadf5,@function
1073cab2bb3Spatrick	Q6_ALIAS(fmadf5)
1083cab2bb3Spatrick	.p2align 5
1093cab2bb3Spatrick__hexagon_fmadf4:
1103cab2bb3Spatrick__hexagon_fmadf5:
111*1f9cb04fSpatrick.Lfma_begin:
1123cab2bb3Spatrick	{
1133cab2bb3Spatrick		P_TMP = dfclass(A,#2)
1143cab2bb3Spatrick		P_TMP = dfclass(B,#2)
1153cab2bb3Spatrick		ATMP = #0
1163cab2bb3Spatrick		BTMP = #0
1173cab2bb3Spatrick	}
1183cab2bb3Spatrick	{
1193cab2bb3Spatrick		ATMP = insert(A,#MANTBITS,#EXPBITS-3)
1203cab2bb3Spatrick		BTMP = insert(B,#MANTBITS,#EXPBITS-3)
1213cab2bb3Spatrick		PP_ODD_H = ##0x10000000
1223cab2bb3Spatrick		allocframe(#STACKSPACE)
1233cab2bb3Spatrick	}
1243cab2bb3Spatrick	{
1253cab2bb3Spatrick		PP_LL = mpyu(ATMPL,BTMPL)
1263cab2bb3Spatrick		if (!P_TMP) jump .Lfma_abnormal_ab
1273cab2bb3Spatrick		ATMPH = or(ATMPH,PP_ODD_H)
1283cab2bb3Spatrick		BTMPH = or(BTMPH,PP_ODD_H)
1293cab2bb3Spatrick	}
1303cab2bb3Spatrick	{
1313cab2bb3Spatrick		P_TMP = dfclass(C,#2)
1323cab2bb3Spatrick		if (!P_TMP.new) jump:nt .Lfma_abnormal_c
1333cab2bb3Spatrick		CTMP = combine(PP_ODD_H,#0)
1343cab2bb3Spatrick		PP_ODD = combine(#0,PP_LL_H)
1353cab2bb3Spatrick	}
1363cab2bb3Spatrick.Lfma_abnormal_c_restart:
1373cab2bb3Spatrick	{
1383cab2bb3Spatrick		PP_ODD += mpyu(BTMPL,ATMPH)
1393cab2bb3Spatrick		CTMP = insert(C,#MANTBITS,#EXPBITS-3)
1403cab2bb3Spatrick		memd(r29+#0) = PP_HH
1413cab2bb3Spatrick		memd(r29+#8) = EXPBA
1423cab2bb3Spatrick	}
1433cab2bb3Spatrick	{
1443cab2bb3Spatrick		PP_ODD += mpyu(ATMPL,BTMPH)
1453cab2bb3Spatrick		EXPBA = neg(CTMP)
1463cab2bb3Spatrick		P_TMP = cmp.gt(CH,#-1)
1473cab2bb3Spatrick		TMP = xor(AH,BH)
1483cab2bb3Spatrick	}
1493cab2bb3Spatrick	{
1503cab2bb3Spatrick		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
1513cab2bb3Spatrick		EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
1523cab2bb3Spatrick		PP_HH = combine(#0,PP_ODD_H)
1533cab2bb3Spatrick		if (!P_TMP) CTMP = EXPBA
1543cab2bb3Spatrick	}
1553cab2bb3Spatrick	{
1563cab2bb3Spatrick		PP_HH += mpyu(ATMPH,BTMPH)
1573cab2bb3Spatrick		PP_LL = combine(PP_ODD_L,PP_LL_L)
1583cab2bb3Spatrick#undef PP_ODD
1593cab2bb3Spatrick#undef PP_ODD_H
1603cab2bb3Spatrick#undef PP_ODD_L
1613cab2bb3Spatrick#undef ATMP
1623cab2bb3Spatrick#undef ATMPL
1633cab2bb3Spatrick#undef ATMPH
1643cab2bb3Spatrick#undef BTMP
1653cab2bb3Spatrick#undef BTMPL
1663cab2bb3Spatrick#undef BTMPH
1673cab2bb3Spatrick#define RIGHTLEFTSHIFT r13:12
1683cab2bb3Spatrick#define RIGHTSHIFT r13
1693cab2bb3Spatrick#define LEFTSHIFT r12
1703cab2bb3Spatrick
1713cab2bb3Spatrick		EXPA = add(EXPA,EXPB)
1723cab2bb3Spatrick#undef EXPB
1733cab2bb3Spatrick#undef EXPBA
1743cab2bb3Spatrick#define EXPC r19
1753cab2bb3Spatrick#define EXPCA r19:18
1763cab2bb3Spatrick		EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
1773cab2bb3Spatrick	}
1783cab2bb3Spatrick	// PP_HH:PP_LL now has product
1793cab2bb3Spatrick	// CTMP is negated
1803cab2bb3Spatrick	// EXPA,B,C are extracted
1813cab2bb3Spatrick	// We need to negate PP
1823cab2bb3Spatrick	// Since we will be adding with carry later, if we need to negate,
1833cab2bb3Spatrick	// just invert all bits now, which we can do conditionally and in parallel
1843cab2bb3Spatrick#define PP_HH_TMP r15:14
1853cab2bb3Spatrick#define PP_LL_TMP r7:6
1863cab2bb3Spatrick	{
1873cab2bb3Spatrick		EXPA = add(EXPA,#-BIAS+(ADJUST))
1883cab2bb3Spatrick		PROD_NEG = !cmp.gt(TMP,#-1)
1893cab2bb3Spatrick		PP_LL_TMP = #0
1903cab2bb3Spatrick		PP_HH_TMP = #0
1913cab2bb3Spatrick	}
1923cab2bb3Spatrick	{
1933cab2bb3Spatrick		PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
1943cab2bb3Spatrick		P_TMP = !cmp.gt(TMP,#-1)
1953cab2bb3Spatrick		SWAP = cmp.gt(EXPC,EXPA)	// If C >> PP
1963cab2bb3Spatrick		if (SWAP.new) EXPCA = combine(EXPA,EXPC)
1973cab2bb3Spatrick	}
1983cab2bb3Spatrick	{
1993cab2bb3Spatrick		PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
2003cab2bb3Spatrick		if (P_TMP) PP_LL = PP_LL_TMP
2013cab2bb3Spatrick#undef PP_LL_TMP
2023cab2bb3Spatrick#define CTMP2 r7:6
2033cab2bb3Spatrick#define CTMP2H r7
2043cab2bb3Spatrick#define CTMP2L r6
2053cab2bb3Spatrick		CTMP2 = #0
2063cab2bb3Spatrick		EXPC = sub(EXPA,EXPC)
2073cab2bb3Spatrick	}
2083cab2bb3Spatrick	{
2093cab2bb3Spatrick		if (P_TMP) PP_HH = PP_HH_TMP
2103cab2bb3Spatrick		P_TMP = cmp.gt(EXPC,#63)
2113cab2bb3Spatrick		if (SWAP) PP_LL = CTMP2
2123cab2bb3Spatrick		if (SWAP) CTMP2 = PP_LL
2133cab2bb3Spatrick	}
2143cab2bb3Spatrick#undef PP_HH_TMP
2153cab2bb3Spatrick//#define ONE r15:14
2163cab2bb3Spatrick//#define S_ONE r14
2173cab2bb3Spatrick#define ZERO r15:14
2183cab2bb3Spatrick#define S_ZERO r15
2193cab2bb3Spatrick#undef PROD_NEG
2203cab2bb3Spatrick#define P_CARRY p3
2213cab2bb3Spatrick	{
2223cab2bb3Spatrick		if (SWAP) PP_HH = CTMP	// Swap C and PP
2233cab2bb3Spatrick		if (SWAP) CTMP = PP_HH
2243cab2bb3Spatrick		if (P_TMP) EXPC = add(EXPC,#-64)
2253cab2bb3Spatrick		TMP = #63
2263cab2bb3Spatrick	}
2273cab2bb3Spatrick	{
2283cab2bb3Spatrick		// If diff > 63, pre-shift-right by 64...
2293cab2bb3Spatrick		if (P_TMP) CTMP2 = CTMP
2303cab2bb3Spatrick		TMP = asr(CTMPH,#31)
2313cab2bb3Spatrick		RIGHTSHIFT = min(EXPC,TMP)
2323cab2bb3Spatrick		LEFTSHIFT = #0
2333cab2bb3Spatrick	}
2343cab2bb3Spatrick#undef C
2353cab2bb3Spatrick#undef CH
2363cab2bb3Spatrick#undef CL
2373cab2bb3Spatrick#define STICKIES r5:4
2383cab2bb3Spatrick#define STICKIESH r5
2393cab2bb3Spatrick#define STICKIESL r4
2403cab2bb3Spatrick	{
2413cab2bb3Spatrick		if (P_TMP) CTMP = combine(TMP,TMP)	// sign extension of pre-shift-right-64
2423cab2bb3Spatrick		STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
2433cab2bb3Spatrick		CTMP2 = lsr(CTMP2,RIGHTSHIFT)
2443cab2bb3Spatrick		LEFTSHIFT = sub(#64,RIGHTSHIFT)
2453cab2bb3Spatrick	}
2463cab2bb3Spatrick	{
2473cab2bb3Spatrick		ZERO = #0
2483cab2bb3Spatrick		TMP = #-2
2493cab2bb3Spatrick		CTMP2 |= lsl(CTMP,LEFTSHIFT)
2503cab2bb3Spatrick		CTMP = asr(CTMP,RIGHTSHIFT)
2513cab2bb3Spatrick	}
2523cab2bb3Spatrick	{
2533cab2bb3Spatrick		P_CARRY = cmp.gtu(STICKIES,ZERO)	// If we have sticky bits from C shift
2543cab2bb3Spatrick		if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
2553cab2bb3Spatrick#undef ZERO
2563cab2bb3Spatrick#define ONE r15:14
2573cab2bb3Spatrick#define S_ONE r14
2583cab2bb3Spatrick		ONE = #1
2593cab2bb3Spatrick		STICKIES = #0
2603cab2bb3Spatrick	}
2613cab2bb3Spatrick	{
2623cab2bb3Spatrick		PP_LL = add(CTMP2,PP_LL,P_CARRY):carry	// use the carry to add the sticky
2633cab2bb3Spatrick	}
2643cab2bb3Spatrick	{
2653cab2bb3Spatrick		PP_HH = add(CTMP,PP_HH,P_CARRY):carry
2663cab2bb3Spatrick		TMP = #62
2673cab2bb3Spatrick	}
2683cab2bb3Spatrick	// PP_HH:PP_LL now holds the sum
2693cab2bb3Spatrick	// We may need to normalize left, up to ??? bits.
2703cab2bb3Spatrick	//
2713cab2bb3Spatrick	// I think that if we have massive cancellation, the range we normalize by
2723cab2bb3Spatrick	// is still limited
2733cab2bb3Spatrick	{
2743cab2bb3Spatrick		LEFTSHIFT = add(clb(PP_HH),#-2)
2753cab2bb3Spatrick		if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f	// all sign bits?
2763cab2bb3Spatrick	}
2773cab2bb3Spatrick	// We had all sign bits, shift left by 62.
2783cab2bb3Spatrick	{
2793cab2bb3Spatrick		CTMP = extractu(PP_LL,#62,#2)
2803cab2bb3Spatrick		PP_LL = asl(PP_LL,#62)
2813cab2bb3Spatrick		EXPA = add(EXPA,#-62)			// And adjust exponent of result
2823cab2bb3Spatrick	}
2833cab2bb3Spatrick	{
2843cab2bb3Spatrick		PP_HH = insert(CTMP,#62,#0)		// Then shift 63
2853cab2bb3Spatrick	}
2863cab2bb3Spatrick	{
2873cab2bb3Spatrick		LEFTSHIFT = add(clb(PP_HH),#-2)
2883cab2bb3Spatrick	}
2893cab2bb3Spatrick	.falign
2903cab2bb3Spatrick1:
2913cab2bb3Spatrick	{
2923cab2bb3Spatrick		CTMP = asl(PP_HH,LEFTSHIFT)
2933cab2bb3Spatrick		STICKIES |= asl(PP_LL,LEFTSHIFT)
2943cab2bb3Spatrick		RIGHTSHIFT = sub(#64,LEFTSHIFT)
2953cab2bb3Spatrick		EXPA = sub(EXPA,LEFTSHIFT)
2963cab2bb3Spatrick	}
2973cab2bb3Spatrick	{
2983cab2bb3Spatrick		CTMP |= lsr(PP_LL,RIGHTSHIFT)
2993cab2bb3Spatrick		EXACT = cmp.gtu(ONE,STICKIES)
3003cab2bb3Spatrick		TMP = #BIAS+BIAS-2
3013cab2bb3Spatrick	}
3023cab2bb3Spatrick	{
3033cab2bb3Spatrick		if (!EXACT) CTMPL = or(CTMPL,S_ONE)
3043cab2bb3Spatrick		// If EXPA is overflow/underflow, jump to ovf_unf
3053cab2bb3Spatrick		P_TMP = !cmp.gt(EXPA,TMP)
3063cab2bb3Spatrick		P_TMP = cmp.gt(EXPA,#1)
3073cab2bb3Spatrick		if (!P_TMP.new) jump:nt .Lfma_ovf_unf
3083cab2bb3Spatrick	}
3093cab2bb3Spatrick	{
3103cab2bb3Spatrick		// XXX: FIXME: should PP_HH for check of zero be CTMP?
3113cab2bb3Spatrick		P_TMP = cmp.gtu(ONE,CTMP)		// is result true zero?
3123cab2bb3Spatrick		A = convert_d2df(CTMP)
3133cab2bb3Spatrick		EXPA = add(EXPA,#-BIAS-60)
3143cab2bb3Spatrick		PP_HH = memd(r29+#0)
3153cab2bb3Spatrick	}
3163cab2bb3Spatrick	{
3173cab2bb3Spatrick		AH += asl(EXPA,#HI_MANTBITS)
3183cab2bb3Spatrick		EXPCA = memd(r29+#8)
3193cab2bb3Spatrick		if (!P_TMP) dealloc_return		// not zero, return
3203cab2bb3Spatrick	}
3213cab2bb3Spatrick.Ladd_yields_zero:
3223cab2bb3Spatrick	// We had full cancellation.  Return +/- zero (-0 when round-down)
3233cab2bb3Spatrick	{
3243cab2bb3Spatrick		TMP = USR
3253cab2bb3Spatrick		A = #0
3263cab2bb3Spatrick	}
3273cab2bb3Spatrick	{
3283cab2bb3Spatrick		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
3293cab2bb3Spatrick		PP_HH = memd(r29+#0)
3303cab2bb3Spatrick		EXPCA = memd(r29+#8)
3313cab2bb3Spatrick	}
3323cab2bb3Spatrick	{
3333cab2bb3Spatrick		p0 = cmp.eq(TMP,#2)
3343cab2bb3Spatrick		if (p0.new) AH = ##0x80000000
3353cab2bb3Spatrick		dealloc_return
3363cab2bb3Spatrick	}
3373cab2bb3Spatrick
3383cab2bb3Spatrick#undef RIGHTLEFTSHIFT
3393cab2bb3Spatrick#undef RIGHTSHIFT
3403cab2bb3Spatrick#undef LEFTSHIFT
3413cab2bb3Spatrick#undef CTMP2
3423cab2bb3Spatrick#undef CTMP2H
3433cab2bb3Spatrick#undef CTMP2L
3443cab2bb3Spatrick
3453cab2bb3Spatrick.Lfma_ovf_unf:
3463cab2bb3Spatrick	{
3473cab2bb3Spatrick		p0 = cmp.gtu(ONE,CTMP)
3483cab2bb3Spatrick		if (p0.new) jump:nt .Ladd_yields_zero
3493cab2bb3Spatrick	}
3503cab2bb3Spatrick	{
3513cab2bb3Spatrick		A = convert_d2df(CTMP)
3523cab2bb3Spatrick		EXPA = add(EXPA,#-BIAS-60)
3533cab2bb3Spatrick		TMP = EXPA
3543cab2bb3Spatrick	}
3553cab2bb3Spatrick#define NEW_EXPB r7
3563cab2bb3Spatrick#define NEW_EXPA r6
3573cab2bb3Spatrick	{
3583cab2bb3Spatrick		AH += asl(EXPA,#HI_MANTBITS)
3593cab2bb3Spatrick		NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
3603cab2bb3Spatrick	}
3613cab2bb3Spatrick	{
3623cab2bb3Spatrick		NEW_EXPA = add(EXPA,NEW_EXPB)
3633cab2bb3Spatrick		PP_HH = memd(r29+#0)
3643cab2bb3Spatrick		EXPCA = memd(r29+#8)
3653cab2bb3Spatrick#undef PP_HH
3663cab2bb3Spatrick#undef PP_HH_H
3673cab2bb3Spatrick#undef PP_HH_L
3683cab2bb3Spatrick#undef EXPCA
3693cab2bb3Spatrick#undef EXPC
3703cab2bb3Spatrick#undef EXPA
3713cab2bb3Spatrick#undef PP_LL
3723cab2bb3Spatrick#undef PP_LL_H
3733cab2bb3Spatrick#undef PP_LL_L
3743cab2bb3Spatrick#define EXPA r6
3753cab2bb3Spatrick#define EXPB r7
3763cab2bb3Spatrick#define EXPBA r7:6
3773cab2bb3Spatrick#define ATMP r9:8
3783cab2bb3Spatrick#define ATMPH r9
3793cab2bb3Spatrick#define ATMPL r8
3803cab2bb3Spatrick#undef NEW_EXPB
3813cab2bb3Spatrick#undef NEW_EXPA
3823cab2bb3Spatrick		ATMP = abs(CTMP)
3833cab2bb3Spatrick	}
3843cab2bb3Spatrick	{
3853cab2bb3Spatrick		p0 = cmp.gt(EXPA,##BIAS+BIAS)
3863cab2bb3Spatrick		if (p0.new) jump:nt .Lfma_ovf
3873cab2bb3Spatrick	}
3883cab2bb3Spatrick	{
3893cab2bb3Spatrick		p0 = cmp.gt(EXPA,#0)
3903cab2bb3Spatrick		if (p0.new) jump:nt .Lpossible_unf
3913cab2bb3Spatrick	}
3923cab2bb3Spatrick	{
3933cab2bb3Spatrick		// TMP has original EXPA.
3943cab2bb3Spatrick		// ATMP is corresponding value
3953cab2bb3Spatrick		// Normalize ATMP and shift right to correct location
3963cab2bb3Spatrick		EXPB = add(clb(ATMP),#-2)		// Amount to left shift to normalize
3973cab2bb3Spatrick		EXPA = sub(#1+5,TMP)			// Amount to right shift to denormalize
3983cab2bb3Spatrick		p3 = cmp.gt(CTMPH,#-1)
3993cab2bb3Spatrick	}
4003cab2bb3Spatrick	// Underflow
4013cab2bb3Spatrick	// We know that the infinte range exponent should be EXPA
4023cab2bb3Spatrick	// CTMP is 2's complement, ATMP is abs(CTMP)
4033cab2bb3Spatrick	{
4043cab2bb3Spatrick		EXPA = add(EXPA,EXPB)		// how much to shift back right
4053cab2bb3Spatrick		ATMP = asl(ATMP,EXPB)		// shift left
4063cab2bb3Spatrick		AH = USR
4073cab2bb3Spatrick		TMP = #63
4083cab2bb3Spatrick	}
4093cab2bb3Spatrick	{
4103cab2bb3Spatrick		EXPB = min(EXPA,TMP)
4113cab2bb3Spatrick		EXPA = #0
4123cab2bb3Spatrick		AL = #0x0030
4133cab2bb3Spatrick	}
4143cab2bb3Spatrick	{
4153cab2bb3Spatrick		B = extractu(ATMP,EXPBA)
4163cab2bb3Spatrick		ATMP = asr(ATMP,EXPB)
4173cab2bb3Spatrick	}
4183cab2bb3Spatrick	{
4193cab2bb3Spatrick		p0 = cmp.gtu(ONE,B)
4203cab2bb3Spatrick		if (!p0.new) ATMPL = or(ATMPL,S_ONE)
4213cab2bb3Spatrick		ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
4223cab2bb3Spatrick	}
4233cab2bb3Spatrick	{
4243cab2bb3Spatrick		CTMP = neg(ATMP)
4253cab2bb3Spatrick		p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
4263cab2bb3Spatrick		if (!p1.new) AH = or(AH,AL)
4273cab2bb3Spatrick		B = #0
4283cab2bb3Spatrick	}
4293cab2bb3Spatrick	{
4303cab2bb3Spatrick		if (p3) CTMP = ATMP
4313cab2bb3Spatrick		USR = AH
4323cab2bb3Spatrick		TMP = #-BIAS-(MANTBITS+FUDGE2)
4333cab2bb3Spatrick	}
4343cab2bb3Spatrick	{
4353cab2bb3Spatrick		A = convert_d2df(CTMP)
4363cab2bb3Spatrick	}
4373cab2bb3Spatrick	{
4383cab2bb3Spatrick		AH += asl(TMP,#HI_MANTBITS)
4393cab2bb3Spatrick		dealloc_return
4403cab2bb3Spatrick	}
4413cab2bb3Spatrick.Lpossible_unf:
4423cab2bb3Spatrick	{
4433cab2bb3Spatrick		TMP = ##0x7fefffff
4443cab2bb3Spatrick		ATMP = abs(CTMP)
4453cab2bb3Spatrick	}
4463cab2bb3Spatrick	{
4473cab2bb3Spatrick		p0 = cmp.eq(AL,#0)
4483cab2bb3Spatrick		p0 = bitsclr(AH,TMP)
4493cab2bb3Spatrick		if (!p0.new) dealloc_return:t
4503cab2bb3Spatrick		TMP = #0x7fff
4513cab2bb3Spatrick	}
4523cab2bb3Spatrick	{
4533cab2bb3Spatrick		p0 = bitsset(ATMPH,TMP)
4543cab2bb3Spatrick		BH = USR
4553cab2bb3Spatrick		BL = #0x0030
4563cab2bb3Spatrick	}
4573cab2bb3Spatrick	{
4583cab2bb3Spatrick		if (p0) BH = or(BH,BL)
4593cab2bb3Spatrick	}
4603cab2bb3Spatrick	{
4613cab2bb3Spatrick		USR = BH
4623cab2bb3Spatrick	}
4633cab2bb3Spatrick	{
4643cab2bb3Spatrick		p0 = dfcmp.eq(A,A)
4653cab2bb3Spatrick		dealloc_return
4663cab2bb3Spatrick	}
4673cab2bb3Spatrick.Lfma_ovf:
4683cab2bb3Spatrick	{
4693cab2bb3Spatrick		TMP = USR
4703cab2bb3Spatrick		CTMP = combine(##0x7fefffff,#-1)
4713cab2bb3Spatrick		A = CTMP
4723cab2bb3Spatrick	}
4733cab2bb3Spatrick	{
4743cab2bb3Spatrick		ATMP = combine(##0x7ff00000,#0)
4753cab2bb3Spatrick		BH = extractu(TMP,#2,#SR_ROUND_OFF)
4763cab2bb3Spatrick		TMP = or(TMP,#0x28)
4773cab2bb3Spatrick	}
4783cab2bb3Spatrick	{
4793cab2bb3Spatrick		USR = TMP
4803cab2bb3Spatrick		BH ^= lsr(AH,#31)
4813cab2bb3Spatrick		BL = BH
4823cab2bb3Spatrick	}
4833cab2bb3Spatrick	{
4843cab2bb3Spatrick		p0 = !cmp.eq(BL,#1)
4853cab2bb3Spatrick		p0 = !cmp.eq(BH,#2)
4863cab2bb3Spatrick	}
4873cab2bb3Spatrick	{
4883cab2bb3Spatrick		p0 = dfcmp.eq(ATMP,ATMP)
4893cab2bb3Spatrick		if (p0.new) CTMP = ATMP
4903cab2bb3Spatrick	}
4913cab2bb3Spatrick	{
4923cab2bb3Spatrick		A = insert(CTMP,#63,#0)
4933cab2bb3Spatrick		dealloc_return
4943cab2bb3Spatrick	}
4953cab2bb3Spatrick#undef CTMP
4963cab2bb3Spatrick#undef CTMPH
4973cab2bb3Spatrick#undef CTMPL
4983cab2bb3Spatrick#define BTMP r11:10
4993cab2bb3Spatrick#define BTMPH r11
5003cab2bb3Spatrick#define BTMPL r10
5013cab2bb3Spatrick
5023cab2bb3Spatrick#undef STICKIES
5033cab2bb3Spatrick#undef STICKIESH
5043cab2bb3Spatrick#undef STICKIESL
5053cab2bb3Spatrick#define C r5:4
5063cab2bb3Spatrick#define CH r5
5073cab2bb3Spatrick#define CL r4
5083cab2bb3Spatrick
5093cab2bb3Spatrick.Lfma_abnormal_ab:
5103cab2bb3Spatrick	{
5113cab2bb3Spatrick		ATMP = extractu(A,#63,#0)
5123cab2bb3Spatrick		BTMP = extractu(B,#63,#0)
5133cab2bb3Spatrick		deallocframe
5143cab2bb3Spatrick	}
5153cab2bb3Spatrick	{
5163cab2bb3Spatrick		p3 = cmp.gtu(ATMP,BTMP)
5173cab2bb3Spatrick		if (!p3.new) A = B		// sort values
5183cab2bb3Spatrick		if (!p3.new) B = A
5193cab2bb3Spatrick	}
5203cab2bb3Spatrick	{
5213cab2bb3Spatrick		p0 = dfclass(A,#0x0f)		// A NaN?
5223cab2bb3Spatrick		if (!p0.new) jump:nt .Lnan
5233cab2bb3Spatrick		if (!p3) ATMP = BTMP
5243cab2bb3Spatrick		if (!p3) BTMP = ATMP
5253cab2bb3Spatrick	}
5263cab2bb3Spatrick	{
5273cab2bb3Spatrick		p1 = dfclass(A,#0x08)		// A is infinity
5283cab2bb3Spatrick		p1 = dfclass(B,#0x0e)		// B is nonzero
5293cab2bb3Spatrick	}
5303cab2bb3Spatrick	{
5313cab2bb3Spatrick		p0 = dfclass(A,#0x08)		// a is inf
5323cab2bb3Spatrick		p0 = dfclass(B,#0x01)		// b is zero
5333cab2bb3Spatrick	}
5343cab2bb3Spatrick	{
5353cab2bb3Spatrick		if (p1) jump .Lab_inf
5363cab2bb3Spatrick		p2 = dfclass(B,#0x01)
5373cab2bb3Spatrick	}
5383cab2bb3Spatrick	{
5393cab2bb3Spatrick		if (p0) jump .Linvalid
5403cab2bb3Spatrick		if (p2) jump .Lab_true_zero
5413cab2bb3Spatrick		TMP = ##0x7c000000
5423cab2bb3Spatrick	}
5433cab2bb3Spatrick	// We are left with a normal or subnormal times a subnormal, A > B
5443cab2bb3Spatrick	// If A and B are both very small, we will go to a single sticky bit; replace
5453cab2bb3Spatrick	// A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
5463cab2bb3Spatrick	// if A and B might multiply to something bigger, decrease A exp and increase B exp
5473cab2bb3Spatrick	// and start over
5483cab2bb3Spatrick	{
5493cab2bb3Spatrick		p0 = bitsclr(AH,TMP)
5503cab2bb3Spatrick		if (p0.new) jump:nt .Lfma_ab_tiny
5513cab2bb3Spatrick	}
5523cab2bb3Spatrick	{
5533cab2bb3Spatrick		TMP = add(clb(BTMP),#-EXPBITS)
5543cab2bb3Spatrick	}
5553cab2bb3Spatrick	{
5563cab2bb3Spatrick		BTMP = asl(BTMP,TMP)
5573cab2bb3Spatrick	}
5583cab2bb3Spatrick	{
5593cab2bb3Spatrick		B = insert(BTMP,#63,#0)
5603cab2bb3Spatrick		AH -= asl(TMP,#HI_MANTBITS)
5613cab2bb3Spatrick	}
562*1f9cb04fSpatrick	jump .Lfma_begin
5633cab2bb3Spatrick
5643cab2bb3Spatrick.Lfma_ab_tiny:
5653cab2bb3Spatrick	ATMP = combine(##0x00100000,#0)
5663cab2bb3Spatrick	{
5673cab2bb3Spatrick		A = insert(ATMP,#63,#0)
5683cab2bb3Spatrick		B = insert(ATMP,#63,#0)
5693cab2bb3Spatrick	}
570*1f9cb04fSpatrick	jump .Lfma_begin
5713cab2bb3Spatrick
5723cab2bb3Spatrick.Lab_inf:
5733cab2bb3Spatrick	{
5743cab2bb3Spatrick		B = lsr(B,#63)
5753cab2bb3Spatrick		p0 = dfclass(C,#0x10)
5763cab2bb3Spatrick	}
5773cab2bb3Spatrick	{
5783cab2bb3Spatrick		A ^= asl(B,#63)
5793cab2bb3Spatrick		if (p0) jump .Lnan
5803cab2bb3Spatrick	}
5813cab2bb3Spatrick	{
5823cab2bb3Spatrick		p1 = dfclass(C,#0x08)
5833cab2bb3Spatrick		if (p1.new) jump:nt .Lfma_inf_plus_inf
5843cab2bb3Spatrick	}
5853cab2bb3Spatrick	// A*B is +/- inf, C is finite.  Return A
5863cab2bb3Spatrick	{
5873cab2bb3Spatrick		jumpr r31
5883cab2bb3Spatrick	}
5893cab2bb3Spatrick	.falign
5903cab2bb3Spatrick.Lfma_inf_plus_inf:
5913cab2bb3Spatrick	{	// adding infinities of different signs is invalid
5923cab2bb3Spatrick		p0 = dfcmp.eq(A,C)
5933cab2bb3Spatrick		if (!p0.new) jump:nt .Linvalid
5943cab2bb3Spatrick	}
5953cab2bb3Spatrick	{
5963cab2bb3Spatrick		jumpr r31
5973cab2bb3Spatrick	}
5983cab2bb3Spatrick
5993cab2bb3Spatrick.Lnan:
6003cab2bb3Spatrick	{
6013cab2bb3Spatrick		p0 = dfclass(B,#0x10)
6023cab2bb3Spatrick		p1 = dfclass(C,#0x10)
6033cab2bb3Spatrick		if (!p0.new) B = A
6043cab2bb3Spatrick		if (!p1.new) C = A
6053cab2bb3Spatrick	}
6063cab2bb3Spatrick	{	// find sNaNs
6073cab2bb3Spatrick		BH = convert_df2sf(B)
6083cab2bb3Spatrick		BL = convert_df2sf(C)
6093cab2bb3Spatrick	}
6103cab2bb3Spatrick	{
6113cab2bb3Spatrick		BH = convert_df2sf(A)
6123cab2bb3Spatrick		A = #-1
6133cab2bb3Spatrick		jumpr r31
6143cab2bb3Spatrick	}
6153cab2bb3Spatrick
6163cab2bb3Spatrick.Linvalid:
6173cab2bb3Spatrick	{
6183cab2bb3Spatrick		TMP = ##0x7f800001		// sp snan
6193cab2bb3Spatrick	}
6203cab2bb3Spatrick	{
6213cab2bb3Spatrick		A = convert_sf2df(TMP)
6223cab2bb3Spatrick		jumpr r31
6233cab2bb3Spatrick	}
6243cab2bb3Spatrick
6253cab2bb3Spatrick.Lab_true_zero:
6263cab2bb3Spatrick	// B is zero, A is finite number
6273cab2bb3Spatrick	{
6283cab2bb3Spatrick		p0 = dfclass(C,#0x10)
6293cab2bb3Spatrick		if (p0.new) jump:nt .Lnan
6303cab2bb3Spatrick		if (p0.new) A = C
6313cab2bb3Spatrick	}
6323cab2bb3Spatrick	{
6333cab2bb3Spatrick		p0 = dfcmp.eq(B,C)		// is C also zero?
6343cab2bb3Spatrick		AH = lsr(AH,#31)		// get sign
6353cab2bb3Spatrick	}
6363cab2bb3Spatrick	{
6373cab2bb3Spatrick		BH ^= asl(AH,#31)		// form correctly signed zero in B
6383cab2bb3Spatrick		if (!p0) A = C			// If C is not zero, return C
6393cab2bb3Spatrick		if (!p0) jumpr r31
6403cab2bb3Spatrick	}
6413cab2bb3Spatrick	// B has correctly signed zero, C is also zero
6423cab2bb3Spatrick.Lzero_plus_zero:
6433cab2bb3Spatrick	{
6443cab2bb3Spatrick		p0 = cmp.eq(B,C)		// yes, scalar equals.  +0++0 or -0+-0
6453cab2bb3Spatrick		if (p0.new) jumpr:t r31
6463cab2bb3Spatrick		A = B
6473cab2bb3Spatrick	}
6483cab2bb3Spatrick	{
6493cab2bb3Spatrick		TMP = USR
6503cab2bb3Spatrick	}
6513cab2bb3Spatrick	{
6523cab2bb3Spatrick		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
6533cab2bb3Spatrick		A = #0
6543cab2bb3Spatrick	}
6553cab2bb3Spatrick	{
6563cab2bb3Spatrick		p0 = cmp.eq(TMP,#2)
6573cab2bb3Spatrick		if (p0.new) AH = ##0x80000000
6583cab2bb3Spatrick		jumpr r31
6593cab2bb3Spatrick	}
6603cab2bb3Spatrick#undef BTMP
6613cab2bb3Spatrick#undef BTMPH
6623cab2bb3Spatrick#undef BTMPL
6633cab2bb3Spatrick#define CTMP r11:10
6643cab2bb3Spatrick	.falign
6653cab2bb3Spatrick.Lfma_abnormal_c:
6663cab2bb3Spatrick	// We know that AB is normal * normal
6673cab2bb3Spatrick	// C is not normal: zero, subnormal, inf, or NaN.
6683cab2bb3Spatrick	{
6693cab2bb3Spatrick		p0 = dfclass(C,#0x10)		// is C NaN?
6703cab2bb3Spatrick		if (p0.new) jump:nt .Lnan
6713cab2bb3Spatrick		if (p0.new) A = C		// move NaN to A
6723cab2bb3Spatrick		deallocframe
6733cab2bb3Spatrick	}
6743cab2bb3Spatrick	{
6753cab2bb3Spatrick		p0 = dfclass(C,#0x08)		// is C inf?
6763cab2bb3Spatrick		if (p0.new) A = C		// return C
6773cab2bb3Spatrick		if (p0.new) jumpr:nt r31
6783cab2bb3Spatrick	}
6793cab2bb3Spatrick	// zero or subnormal
6803cab2bb3Spatrick	// If we have a zero, and we know AB is normal*normal, we can just call normal multiply
6813cab2bb3Spatrick	{
6823cab2bb3Spatrick		p0 = dfclass(C,#0x01)		// is C zero?
6833cab2bb3Spatrick		if (p0.new) jump:nt __hexagon_muldf3
6843cab2bb3Spatrick		TMP = #1
6853cab2bb3Spatrick	}
6863cab2bb3Spatrick	// Left with: subnormal
6873cab2bb3Spatrick	// Adjust C and jump back to restart
6883cab2bb3Spatrick	{
6893cab2bb3Spatrick		allocframe(#STACKSPACE)		// oops, deallocated above, re-allocate frame
6903cab2bb3Spatrick		CTMP = #0
6913cab2bb3Spatrick		CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
6923cab2bb3Spatrick		jump .Lfma_abnormal_c_restart
6933cab2bb3Spatrick	}
6943cab2bb3SpatrickEND(fma)
695