13cab2bb3Spatrick//===----------------------Hexagon builtin routine ------------------------===// 23cab2bb3Spatrick// 33cab2bb3Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 43cab2bb3Spatrick// See https://llvm.org/LICENSE.txt for license information. 53cab2bb3Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 63cab2bb3Spatrick// 73cab2bb3Spatrick//===----------------------------------------------------------------------===// 83cab2bb3Spatrick 93cab2bb3Spatrick#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG 103cab2bb3Spatrick#define END(TAG) .size TAG,.-TAG 113cab2bb3Spatrick 123cab2bb3Spatrick// Double Precision Multiply 133cab2bb3Spatrick 143cab2bb3Spatrick 153cab2bb3Spatrick#define A r1:0 163cab2bb3Spatrick#define AH r1 173cab2bb3Spatrick#define AL r0 183cab2bb3Spatrick#define B r3:2 193cab2bb3Spatrick#define BH r3 203cab2bb3Spatrick#define BL r2 213cab2bb3Spatrick#define C r5:4 223cab2bb3Spatrick#define CH r5 233cab2bb3Spatrick#define CL r4 243cab2bb3Spatrick 253cab2bb3Spatrick 263cab2bb3Spatrick 273cab2bb3Spatrick#define BTMP r15:14 283cab2bb3Spatrick#define BTMPH r15 293cab2bb3Spatrick#define BTMPL r14 303cab2bb3Spatrick 313cab2bb3Spatrick#define ATMP r13:12 323cab2bb3Spatrick#define ATMPH r13 333cab2bb3Spatrick#define ATMPL r12 343cab2bb3Spatrick 353cab2bb3Spatrick#define CTMP r11:10 363cab2bb3Spatrick#define CTMPH r11 373cab2bb3Spatrick#define CTMPL r10 383cab2bb3Spatrick 393cab2bb3Spatrick#define PP_LL r9:8 403cab2bb3Spatrick#define PP_LL_H r9 413cab2bb3Spatrick#define PP_LL_L r8 423cab2bb3Spatrick 433cab2bb3Spatrick#define PP_ODD r7:6 443cab2bb3Spatrick#define PP_ODD_H r7 453cab2bb3Spatrick#define PP_ODD_L r6 463cab2bb3Spatrick 473cab2bb3Spatrick 483cab2bb3Spatrick#define PP_HH r17:16 493cab2bb3Spatrick#define PP_HH_H r17 503cab2bb3Spatrick#define PP_HH_L r16 513cab2bb3Spatrick 523cab2bb3Spatrick#define EXPA r18 533cab2bb3Spatrick#define EXPB r19 543cab2bb3Spatrick#define EXPBA r19:18 553cab2bb3Spatrick 563cab2bb3Spatrick#define TMP r28 573cab2bb3Spatrick 583cab2bb3Spatrick#define P_TMP p0 593cab2bb3Spatrick#define PROD_NEG p3 603cab2bb3Spatrick#define EXACT p2 613cab2bb3Spatrick#define SWAP p1 623cab2bb3Spatrick 633cab2bb3Spatrick#define MANTBITS 52 643cab2bb3Spatrick#define HI_MANTBITS 20 653cab2bb3Spatrick#define EXPBITS 11 663cab2bb3Spatrick#define BIAS 1023 673cab2bb3Spatrick#define STACKSPACE 32 683cab2bb3Spatrick 693cab2bb3Spatrick#define ADJUST 4 703cab2bb3Spatrick 713cab2bb3Spatrick#define FUDGE 7 723cab2bb3Spatrick#define FUDGE2 3 733cab2bb3Spatrick 743cab2bb3Spatrick#ifndef SR_ROUND_OFF 753cab2bb3Spatrick#define SR_ROUND_OFF 22 763cab2bb3Spatrick#endif 773cab2bb3Spatrick 783cab2bb3Spatrick // First, classify for normal values, and abort if abnormal 793cab2bb3Spatrick // 803cab2bb3Spatrick // Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8 813cab2bb3Spatrick // 823cab2bb3Spatrick // Since we know that the 2 MSBs of the H registers is zero, we should never carry 833cab2bb3Spatrick // the partial products that involve the H registers 843cab2bb3Spatrick // 853cab2bb3Spatrick // Try to buy X slots, at the expense of latency if needed 863cab2bb3Spatrick // 873cab2bb3Spatrick // We will have PP_HH with the upper bits of the product, PP_LL with the lower 883cab2bb3Spatrick // PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts 893cab2bb3Spatrick // PP_HH can have a minimum of 0x0100_0000_0000_0000 903cab2bb3Spatrick // 913cab2bb3Spatrick // 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS 923cab2bb3Spatrick // 933cab2bb3Spatrick // We need to align CTMP. 943cab2bb3Spatrick // If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add 953cab2bb3Spatrick // If CTMP << PP align CTMP and add 128 bits. Then compute sticky 963cab2bb3Spatrick // If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation. 973cab2bb3Spatrick // 983cab2bb3Spatrick // Convert partial product and CTMP to 2's complement prior to addition 993cab2bb3Spatrick // 1003cab2bb3Spatrick // After we add, we need to normalize into upper 64 bits, then compute sticky. 1013cab2bb3Spatrick 1023cab2bb3Spatrick .text 1033cab2bb3Spatrick .global __hexagon_fmadf4 1043cab2bb3Spatrick .type __hexagon_fmadf4,@function 1053cab2bb3Spatrick .global __hexagon_fmadf5 1063cab2bb3Spatrick .type __hexagon_fmadf5,@function 1073cab2bb3Spatrick Q6_ALIAS(fmadf5) 1083cab2bb3Spatrick .p2align 5 1093cab2bb3Spatrick__hexagon_fmadf4: 1103cab2bb3Spatrick__hexagon_fmadf5: 111*1f9cb04fSpatrick.Lfma_begin: 1123cab2bb3Spatrick { 1133cab2bb3Spatrick P_TMP = dfclass(A,#2) 1143cab2bb3Spatrick P_TMP = dfclass(B,#2) 1153cab2bb3Spatrick ATMP = #0 1163cab2bb3Spatrick BTMP = #0 1173cab2bb3Spatrick } 1183cab2bb3Spatrick { 1193cab2bb3Spatrick ATMP = insert(A,#MANTBITS,#EXPBITS-3) 1203cab2bb3Spatrick BTMP = insert(B,#MANTBITS,#EXPBITS-3) 1213cab2bb3Spatrick PP_ODD_H = ##0x10000000 1223cab2bb3Spatrick allocframe(#STACKSPACE) 1233cab2bb3Spatrick } 1243cab2bb3Spatrick { 1253cab2bb3Spatrick PP_LL = mpyu(ATMPL,BTMPL) 1263cab2bb3Spatrick if (!P_TMP) jump .Lfma_abnormal_ab 1273cab2bb3Spatrick ATMPH = or(ATMPH,PP_ODD_H) 1283cab2bb3Spatrick BTMPH = or(BTMPH,PP_ODD_H) 1293cab2bb3Spatrick } 1303cab2bb3Spatrick { 1313cab2bb3Spatrick P_TMP = dfclass(C,#2) 1323cab2bb3Spatrick if (!P_TMP.new) jump:nt .Lfma_abnormal_c 1333cab2bb3Spatrick CTMP = combine(PP_ODD_H,#0) 1343cab2bb3Spatrick PP_ODD = combine(#0,PP_LL_H) 1353cab2bb3Spatrick } 1363cab2bb3Spatrick.Lfma_abnormal_c_restart: 1373cab2bb3Spatrick { 1383cab2bb3Spatrick PP_ODD += mpyu(BTMPL,ATMPH) 1393cab2bb3Spatrick CTMP = insert(C,#MANTBITS,#EXPBITS-3) 1403cab2bb3Spatrick memd(r29+#0) = PP_HH 1413cab2bb3Spatrick memd(r29+#8) = EXPBA 1423cab2bb3Spatrick } 1433cab2bb3Spatrick { 1443cab2bb3Spatrick PP_ODD += mpyu(ATMPL,BTMPH) 1453cab2bb3Spatrick EXPBA = neg(CTMP) 1463cab2bb3Spatrick P_TMP = cmp.gt(CH,#-1) 1473cab2bb3Spatrick TMP = xor(AH,BH) 1483cab2bb3Spatrick } 1493cab2bb3Spatrick { 1503cab2bb3Spatrick EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) 1513cab2bb3Spatrick EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) 1523cab2bb3Spatrick PP_HH = combine(#0,PP_ODD_H) 1533cab2bb3Spatrick if (!P_TMP) CTMP = EXPBA 1543cab2bb3Spatrick } 1553cab2bb3Spatrick { 1563cab2bb3Spatrick PP_HH += mpyu(ATMPH,BTMPH) 1573cab2bb3Spatrick PP_LL = combine(PP_ODD_L,PP_LL_L) 1583cab2bb3Spatrick#undef PP_ODD 1593cab2bb3Spatrick#undef PP_ODD_H 1603cab2bb3Spatrick#undef PP_ODD_L 1613cab2bb3Spatrick#undef ATMP 1623cab2bb3Spatrick#undef ATMPL 1633cab2bb3Spatrick#undef ATMPH 1643cab2bb3Spatrick#undef BTMP 1653cab2bb3Spatrick#undef BTMPL 1663cab2bb3Spatrick#undef BTMPH 1673cab2bb3Spatrick#define RIGHTLEFTSHIFT r13:12 1683cab2bb3Spatrick#define RIGHTSHIFT r13 1693cab2bb3Spatrick#define LEFTSHIFT r12 1703cab2bb3Spatrick 1713cab2bb3Spatrick EXPA = add(EXPA,EXPB) 1723cab2bb3Spatrick#undef EXPB 1733cab2bb3Spatrick#undef EXPBA 1743cab2bb3Spatrick#define EXPC r19 1753cab2bb3Spatrick#define EXPCA r19:18 1763cab2bb3Spatrick EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS) 1773cab2bb3Spatrick } 1783cab2bb3Spatrick // PP_HH:PP_LL now has product 1793cab2bb3Spatrick // CTMP is negated 1803cab2bb3Spatrick // EXPA,B,C are extracted 1813cab2bb3Spatrick // We need to negate PP 1823cab2bb3Spatrick // Since we will be adding with carry later, if we need to negate, 1833cab2bb3Spatrick // just invert all bits now, which we can do conditionally and in parallel 1843cab2bb3Spatrick#define PP_HH_TMP r15:14 1853cab2bb3Spatrick#define PP_LL_TMP r7:6 1863cab2bb3Spatrick { 1873cab2bb3Spatrick EXPA = add(EXPA,#-BIAS+(ADJUST)) 1883cab2bb3Spatrick PROD_NEG = !cmp.gt(TMP,#-1) 1893cab2bb3Spatrick PP_LL_TMP = #0 1903cab2bb3Spatrick PP_HH_TMP = #0 1913cab2bb3Spatrick } 1923cab2bb3Spatrick { 1933cab2bb3Spatrick PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry 1943cab2bb3Spatrick P_TMP = !cmp.gt(TMP,#-1) 1953cab2bb3Spatrick SWAP = cmp.gt(EXPC,EXPA) // If C >> PP 1963cab2bb3Spatrick if (SWAP.new) EXPCA = combine(EXPA,EXPC) 1973cab2bb3Spatrick } 1983cab2bb3Spatrick { 1993cab2bb3Spatrick PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry 2003cab2bb3Spatrick if (P_TMP) PP_LL = PP_LL_TMP 2013cab2bb3Spatrick#undef PP_LL_TMP 2023cab2bb3Spatrick#define CTMP2 r7:6 2033cab2bb3Spatrick#define CTMP2H r7 2043cab2bb3Spatrick#define CTMP2L r6 2053cab2bb3Spatrick CTMP2 = #0 2063cab2bb3Spatrick EXPC = sub(EXPA,EXPC) 2073cab2bb3Spatrick } 2083cab2bb3Spatrick { 2093cab2bb3Spatrick if (P_TMP) PP_HH = PP_HH_TMP 2103cab2bb3Spatrick P_TMP = cmp.gt(EXPC,#63) 2113cab2bb3Spatrick if (SWAP) PP_LL = CTMP2 2123cab2bb3Spatrick if (SWAP) CTMP2 = PP_LL 2133cab2bb3Spatrick } 2143cab2bb3Spatrick#undef PP_HH_TMP 2153cab2bb3Spatrick//#define ONE r15:14 2163cab2bb3Spatrick//#define S_ONE r14 2173cab2bb3Spatrick#define ZERO r15:14 2183cab2bb3Spatrick#define S_ZERO r15 2193cab2bb3Spatrick#undef PROD_NEG 2203cab2bb3Spatrick#define P_CARRY p3 2213cab2bb3Spatrick { 2223cab2bb3Spatrick if (SWAP) PP_HH = CTMP // Swap C and PP 2233cab2bb3Spatrick if (SWAP) CTMP = PP_HH 2243cab2bb3Spatrick if (P_TMP) EXPC = add(EXPC,#-64) 2253cab2bb3Spatrick TMP = #63 2263cab2bb3Spatrick } 2273cab2bb3Spatrick { 2283cab2bb3Spatrick // If diff > 63, pre-shift-right by 64... 2293cab2bb3Spatrick if (P_TMP) CTMP2 = CTMP 2303cab2bb3Spatrick TMP = asr(CTMPH,#31) 2313cab2bb3Spatrick RIGHTSHIFT = min(EXPC,TMP) 2323cab2bb3Spatrick LEFTSHIFT = #0 2333cab2bb3Spatrick } 2343cab2bb3Spatrick#undef C 2353cab2bb3Spatrick#undef CH 2363cab2bb3Spatrick#undef CL 2373cab2bb3Spatrick#define STICKIES r5:4 2383cab2bb3Spatrick#define STICKIESH r5 2393cab2bb3Spatrick#define STICKIESL r4 2403cab2bb3Spatrick { 2413cab2bb3Spatrick if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64 2423cab2bb3Spatrick STICKIES = extract(CTMP2,RIGHTLEFTSHIFT) 2433cab2bb3Spatrick CTMP2 = lsr(CTMP2,RIGHTSHIFT) 2443cab2bb3Spatrick LEFTSHIFT = sub(#64,RIGHTSHIFT) 2453cab2bb3Spatrick } 2463cab2bb3Spatrick { 2473cab2bb3Spatrick ZERO = #0 2483cab2bb3Spatrick TMP = #-2 2493cab2bb3Spatrick CTMP2 |= lsl(CTMP,LEFTSHIFT) 2503cab2bb3Spatrick CTMP = asr(CTMP,RIGHTSHIFT) 2513cab2bb3Spatrick } 2523cab2bb3Spatrick { 2533cab2bb3Spatrick P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift 2543cab2bb3Spatrick if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR 2553cab2bb3Spatrick#undef ZERO 2563cab2bb3Spatrick#define ONE r15:14 2573cab2bb3Spatrick#define S_ONE r14 2583cab2bb3Spatrick ONE = #1 2593cab2bb3Spatrick STICKIES = #0 2603cab2bb3Spatrick } 2613cab2bb3Spatrick { 2623cab2bb3Spatrick PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky 2633cab2bb3Spatrick } 2643cab2bb3Spatrick { 2653cab2bb3Spatrick PP_HH = add(CTMP,PP_HH,P_CARRY):carry 2663cab2bb3Spatrick TMP = #62 2673cab2bb3Spatrick } 2683cab2bb3Spatrick // PP_HH:PP_LL now holds the sum 2693cab2bb3Spatrick // We may need to normalize left, up to ??? bits. 2703cab2bb3Spatrick // 2713cab2bb3Spatrick // I think that if we have massive cancellation, the range we normalize by 2723cab2bb3Spatrick // is still limited 2733cab2bb3Spatrick { 2743cab2bb3Spatrick LEFTSHIFT = add(clb(PP_HH),#-2) 2753cab2bb3Spatrick if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits? 2763cab2bb3Spatrick } 2773cab2bb3Spatrick // We had all sign bits, shift left by 62. 2783cab2bb3Spatrick { 2793cab2bb3Spatrick CTMP = extractu(PP_LL,#62,#2) 2803cab2bb3Spatrick PP_LL = asl(PP_LL,#62) 2813cab2bb3Spatrick EXPA = add(EXPA,#-62) // And adjust exponent of result 2823cab2bb3Spatrick } 2833cab2bb3Spatrick { 2843cab2bb3Spatrick PP_HH = insert(CTMP,#62,#0) // Then shift 63 2853cab2bb3Spatrick } 2863cab2bb3Spatrick { 2873cab2bb3Spatrick LEFTSHIFT = add(clb(PP_HH),#-2) 2883cab2bb3Spatrick } 2893cab2bb3Spatrick .falign 2903cab2bb3Spatrick1: 2913cab2bb3Spatrick { 2923cab2bb3Spatrick CTMP = asl(PP_HH,LEFTSHIFT) 2933cab2bb3Spatrick STICKIES |= asl(PP_LL,LEFTSHIFT) 2943cab2bb3Spatrick RIGHTSHIFT = sub(#64,LEFTSHIFT) 2953cab2bb3Spatrick EXPA = sub(EXPA,LEFTSHIFT) 2963cab2bb3Spatrick } 2973cab2bb3Spatrick { 2983cab2bb3Spatrick CTMP |= lsr(PP_LL,RIGHTSHIFT) 2993cab2bb3Spatrick EXACT = cmp.gtu(ONE,STICKIES) 3003cab2bb3Spatrick TMP = #BIAS+BIAS-2 3013cab2bb3Spatrick } 3023cab2bb3Spatrick { 3033cab2bb3Spatrick if (!EXACT) CTMPL = or(CTMPL,S_ONE) 3043cab2bb3Spatrick // If EXPA is overflow/underflow, jump to ovf_unf 3053cab2bb3Spatrick P_TMP = !cmp.gt(EXPA,TMP) 3063cab2bb3Spatrick P_TMP = cmp.gt(EXPA,#1) 3073cab2bb3Spatrick if (!P_TMP.new) jump:nt .Lfma_ovf_unf 3083cab2bb3Spatrick } 3093cab2bb3Spatrick { 3103cab2bb3Spatrick // XXX: FIXME: should PP_HH for check of zero be CTMP? 3113cab2bb3Spatrick P_TMP = cmp.gtu(ONE,CTMP) // is result true zero? 3123cab2bb3Spatrick A = convert_d2df(CTMP) 3133cab2bb3Spatrick EXPA = add(EXPA,#-BIAS-60) 3143cab2bb3Spatrick PP_HH = memd(r29+#0) 3153cab2bb3Spatrick } 3163cab2bb3Spatrick { 3173cab2bb3Spatrick AH += asl(EXPA,#HI_MANTBITS) 3183cab2bb3Spatrick EXPCA = memd(r29+#8) 3193cab2bb3Spatrick if (!P_TMP) dealloc_return // not zero, return 3203cab2bb3Spatrick } 3213cab2bb3Spatrick.Ladd_yields_zero: 3223cab2bb3Spatrick // We had full cancellation. Return +/- zero (-0 when round-down) 3233cab2bb3Spatrick { 3243cab2bb3Spatrick TMP = USR 3253cab2bb3Spatrick A = #0 3263cab2bb3Spatrick } 3273cab2bb3Spatrick { 3283cab2bb3Spatrick TMP = extractu(TMP,#2,#SR_ROUND_OFF) 3293cab2bb3Spatrick PP_HH = memd(r29+#0) 3303cab2bb3Spatrick EXPCA = memd(r29+#8) 3313cab2bb3Spatrick } 3323cab2bb3Spatrick { 3333cab2bb3Spatrick p0 = cmp.eq(TMP,#2) 3343cab2bb3Spatrick if (p0.new) AH = ##0x80000000 3353cab2bb3Spatrick dealloc_return 3363cab2bb3Spatrick } 3373cab2bb3Spatrick 3383cab2bb3Spatrick#undef RIGHTLEFTSHIFT 3393cab2bb3Spatrick#undef RIGHTSHIFT 3403cab2bb3Spatrick#undef LEFTSHIFT 3413cab2bb3Spatrick#undef CTMP2 3423cab2bb3Spatrick#undef CTMP2H 3433cab2bb3Spatrick#undef CTMP2L 3443cab2bb3Spatrick 3453cab2bb3Spatrick.Lfma_ovf_unf: 3463cab2bb3Spatrick { 3473cab2bb3Spatrick p0 = cmp.gtu(ONE,CTMP) 3483cab2bb3Spatrick if (p0.new) jump:nt .Ladd_yields_zero 3493cab2bb3Spatrick } 3503cab2bb3Spatrick { 3513cab2bb3Spatrick A = convert_d2df(CTMP) 3523cab2bb3Spatrick EXPA = add(EXPA,#-BIAS-60) 3533cab2bb3Spatrick TMP = EXPA 3543cab2bb3Spatrick } 3553cab2bb3Spatrick#define NEW_EXPB r7 3563cab2bb3Spatrick#define NEW_EXPA r6 3573cab2bb3Spatrick { 3583cab2bb3Spatrick AH += asl(EXPA,#HI_MANTBITS) 3593cab2bb3Spatrick NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS) 3603cab2bb3Spatrick } 3613cab2bb3Spatrick { 3623cab2bb3Spatrick NEW_EXPA = add(EXPA,NEW_EXPB) 3633cab2bb3Spatrick PP_HH = memd(r29+#0) 3643cab2bb3Spatrick EXPCA = memd(r29+#8) 3653cab2bb3Spatrick#undef PP_HH 3663cab2bb3Spatrick#undef PP_HH_H 3673cab2bb3Spatrick#undef PP_HH_L 3683cab2bb3Spatrick#undef EXPCA 3693cab2bb3Spatrick#undef EXPC 3703cab2bb3Spatrick#undef EXPA 3713cab2bb3Spatrick#undef PP_LL 3723cab2bb3Spatrick#undef PP_LL_H 3733cab2bb3Spatrick#undef PP_LL_L 3743cab2bb3Spatrick#define EXPA r6 3753cab2bb3Spatrick#define EXPB r7 3763cab2bb3Spatrick#define EXPBA r7:6 3773cab2bb3Spatrick#define ATMP r9:8 3783cab2bb3Spatrick#define ATMPH r9 3793cab2bb3Spatrick#define ATMPL r8 3803cab2bb3Spatrick#undef NEW_EXPB 3813cab2bb3Spatrick#undef NEW_EXPA 3823cab2bb3Spatrick ATMP = abs(CTMP) 3833cab2bb3Spatrick } 3843cab2bb3Spatrick { 3853cab2bb3Spatrick p0 = cmp.gt(EXPA,##BIAS+BIAS) 3863cab2bb3Spatrick if (p0.new) jump:nt .Lfma_ovf 3873cab2bb3Spatrick } 3883cab2bb3Spatrick { 3893cab2bb3Spatrick p0 = cmp.gt(EXPA,#0) 3903cab2bb3Spatrick if (p0.new) jump:nt .Lpossible_unf 3913cab2bb3Spatrick } 3923cab2bb3Spatrick { 3933cab2bb3Spatrick // TMP has original EXPA. 3943cab2bb3Spatrick // ATMP is corresponding value 3953cab2bb3Spatrick // Normalize ATMP and shift right to correct location 3963cab2bb3Spatrick EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize 3973cab2bb3Spatrick EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize 3983cab2bb3Spatrick p3 = cmp.gt(CTMPH,#-1) 3993cab2bb3Spatrick } 4003cab2bb3Spatrick // Underflow 4013cab2bb3Spatrick // We know that the infinte range exponent should be EXPA 4023cab2bb3Spatrick // CTMP is 2's complement, ATMP is abs(CTMP) 4033cab2bb3Spatrick { 4043cab2bb3Spatrick EXPA = add(EXPA,EXPB) // how much to shift back right 4053cab2bb3Spatrick ATMP = asl(ATMP,EXPB) // shift left 4063cab2bb3Spatrick AH = USR 4073cab2bb3Spatrick TMP = #63 4083cab2bb3Spatrick } 4093cab2bb3Spatrick { 4103cab2bb3Spatrick EXPB = min(EXPA,TMP) 4113cab2bb3Spatrick EXPA = #0 4123cab2bb3Spatrick AL = #0x0030 4133cab2bb3Spatrick } 4143cab2bb3Spatrick { 4153cab2bb3Spatrick B = extractu(ATMP,EXPBA) 4163cab2bb3Spatrick ATMP = asr(ATMP,EXPB) 4173cab2bb3Spatrick } 4183cab2bb3Spatrick { 4193cab2bb3Spatrick p0 = cmp.gtu(ONE,B) 4203cab2bb3Spatrick if (!p0.new) ATMPL = or(ATMPL,S_ONE) 4213cab2bb3Spatrick ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2) 4223cab2bb3Spatrick } 4233cab2bb3Spatrick { 4243cab2bb3Spatrick CTMP = neg(ATMP) 4253cab2bb3Spatrick p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1) 4263cab2bb3Spatrick if (!p1.new) AH = or(AH,AL) 4273cab2bb3Spatrick B = #0 4283cab2bb3Spatrick } 4293cab2bb3Spatrick { 4303cab2bb3Spatrick if (p3) CTMP = ATMP 4313cab2bb3Spatrick USR = AH 4323cab2bb3Spatrick TMP = #-BIAS-(MANTBITS+FUDGE2) 4333cab2bb3Spatrick } 4343cab2bb3Spatrick { 4353cab2bb3Spatrick A = convert_d2df(CTMP) 4363cab2bb3Spatrick } 4373cab2bb3Spatrick { 4383cab2bb3Spatrick AH += asl(TMP,#HI_MANTBITS) 4393cab2bb3Spatrick dealloc_return 4403cab2bb3Spatrick } 4413cab2bb3Spatrick.Lpossible_unf: 4423cab2bb3Spatrick { 4433cab2bb3Spatrick TMP = ##0x7fefffff 4443cab2bb3Spatrick ATMP = abs(CTMP) 4453cab2bb3Spatrick } 4463cab2bb3Spatrick { 4473cab2bb3Spatrick p0 = cmp.eq(AL,#0) 4483cab2bb3Spatrick p0 = bitsclr(AH,TMP) 4493cab2bb3Spatrick if (!p0.new) dealloc_return:t 4503cab2bb3Spatrick TMP = #0x7fff 4513cab2bb3Spatrick } 4523cab2bb3Spatrick { 4533cab2bb3Spatrick p0 = bitsset(ATMPH,TMP) 4543cab2bb3Spatrick BH = USR 4553cab2bb3Spatrick BL = #0x0030 4563cab2bb3Spatrick } 4573cab2bb3Spatrick { 4583cab2bb3Spatrick if (p0) BH = or(BH,BL) 4593cab2bb3Spatrick } 4603cab2bb3Spatrick { 4613cab2bb3Spatrick USR = BH 4623cab2bb3Spatrick } 4633cab2bb3Spatrick { 4643cab2bb3Spatrick p0 = dfcmp.eq(A,A) 4653cab2bb3Spatrick dealloc_return 4663cab2bb3Spatrick } 4673cab2bb3Spatrick.Lfma_ovf: 4683cab2bb3Spatrick { 4693cab2bb3Spatrick TMP = USR 4703cab2bb3Spatrick CTMP = combine(##0x7fefffff,#-1) 4713cab2bb3Spatrick A = CTMP 4723cab2bb3Spatrick } 4733cab2bb3Spatrick { 4743cab2bb3Spatrick ATMP = combine(##0x7ff00000,#0) 4753cab2bb3Spatrick BH = extractu(TMP,#2,#SR_ROUND_OFF) 4763cab2bb3Spatrick TMP = or(TMP,#0x28) 4773cab2bb3Spatrick } 4783cab2bb3Spatrick { 4793cab2bb3Spatrick USR = TMP 4803cab2bb3Spatrick BH ^= lsr(AH,#31) 4813cab2bb3Spatrick BL = BH 4823cab2bb3Spatrick } 4833cab2bb3Spatrick { 4843cab2bb3Spatrick p0 = !cmp.eq(BL,#1) 4853cab2bb3Spatrick p0 = !cmp.eq(BH,#2) 4863cab2bb3Spatrick } 4873cab2bb3Spatrick { 4883cab2bb3Spatrick p0 = dfcmp.eq(ATMP,ATMP) 4893cab2bb3Spatrick if (p0.new) CTMP = ATMP 4903cab2bb3Spatrick } 4913cab2bb3Spatrick { 4923cab2bb3Spatrick A = insert(CTMP,#63,#0) 4933cab2bb3Spatrick dealloc_return 4943cab2bb3Spatrick } 4953cab2bb3Spatrick#undef CTMP 4963cab2bb3Spatrick#undef CTMPH 4973cab2bb3Spatrick#undef CTMPL 4983cab2bb3Spatrick#define BTMP r11:10 4993cab2bb3Spatrick#define BTMPH r11 5003cab2bb3Spatrick#define BTMPL r10 5013cab2bb3Spatrick 5023cab2bb3Spatrick#undef STICKIES 5033cab2bb3Spatrick#undef STICKIESH 5043cab2bb3Spatrick#undef STICKIESL 5053cab2bb3Spatrick#define C r5:4 5063cab2bb3Spatrick#define CH r5 5073cab2bb3Spatrick#define CL r4 5083cab2bb3Spatrick 5093cab2bb3Spatrick.Lfma_abnormal_ab: 5103cab2bb3Spatrick { 5113cab2bb3Spatrick ATMP = extractu(A,#63,#0) 5123cab2bb3Spatrick BTMP = extractu(B,#63,#0) 5133cab2bb3Spatrick deallocframe 5143cab2bb3Spatrick } 5153cab2bb3Spatrick { 5163cab2bb3Spatrick p3 = cmp.gtu(ATMP,BTMP) 5173cab2bb3Spatrick if (!p3.new) A = B // sort values 5183cab2bb3Spatrick if (!p3.new) B = A 5193cab2bb3Spatrick } 5203cab2bb3Spatrick { 5213cab2bb3Spatrick p0 = dfclass(A,#0x0f) // A NaN? 5223cab2bb3Spatrick if (!p0.new) jump:nt .Lnan 5233cab2bb3Spatrick if (!p3) ATMP = BTMP 5243cab2bb3Spatrick if (!p3) BTMP = ATMP 5253cab2bb3Spatrick } 5263cab2bb3Spatrick { 5273cab2bb3Spatrick p1 = dfclass(A,#0x08) // A is infinity 5283cab2bb3Spatrick p1 = dfclass(B,#0x0e) // B is nonzero 5293cab2bb3Spatrick } 5303cab2bb3Spatrick { 5313cab2bb3Spatrick p0 = dfclass(A,#0x08) // a is inf 5323cab2bb3Spatrick p0 = dfclass(B,#0x01) // b is zero 5333cab2bb3Spatrick } 5343cab2bb3Spatrick { 5353cab2bb3Spatrick if (p1) jump .Lab_inf 5363cab2bb3Spatrick p2 = dfclass(B,#0x01) 5373cab2bb3Spatrick } 5383cab2bb3Spatrick { 5393cab2bb3Spatrick if (p0) jump .Linvalid 5403cab2bb3Spatrick if (p2) jump .Lab_true_zero 5413cab2bb3Spatrick TMP = ##0x7c000000 5423cab2bb3Spatrick } 5433cab2bb3Spatrick // We are left with a normal or subnormal times a subnormal, A > B 5443cab2bb3Spatrick // If A and B are both very small, we will go to a single sticky bit; replace 5453cab2bb3Spatrick // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results 5463cab2bb3Spatrick // if A and B might multiply to something bigger, decrease A exp and increase B exp 5473cab2bb3Spatrick // and start over 5483cab2bb3Spatrick { 5493cab2bb3Spatrick p0 = bitsclr(AH,TMP) 5503cab2bb3Spatrick if (p0.new) jump:nt .Lfma_ab_tiny 5513cab2bb3Spatrick } 5523cab2bb3Spatrick { 5533cab2bb3Spatrick TMP = add(clb(BTMP),#-EXPBITS) 5543cab2bb3Spatrick } 5553cab2bb3Spatrick { 5563cab2bb3Spatrick BTMP = asl(BTMP,TMP) 5573cab2bb3Spatrick } 5583cab2bb3Spatrick { 5593cab2bb3Spatrick B = insert(BTMP,#63,#0) 5603cab2bb3Spatrick AH -= asl(TMP,#HI_MANTBITS) 5613cab2bb3Spatrick } 562*1f9cb04fSpatrick jump .Lfma_begin 5633cab2bb3Spatrick 5643cab2bb3Spatrick.Lfma_ab_tiny: 5653cab2bb3Spatrick ATMP = combine(##0x00100000,#0) 5663cab2bb3Spatrick { 5673cab2bb3Spatrick A = insert(ATMP,#63,#0) 5683cab2bb3Spatrick B = insert(ATMP,#63,#0) 5693cab2bb3Spatrick } 570*1f9cb04fSpatrick jump .Lfma_begin 5713cab2bb3Spatrick 5723cab2bb3Spatrick.Lab_inf: 5733cab2bb3Spatrick { 5743cab2bb3Spatrick B = lsr(B,#63) 5753cab2bb3Spatrick p0 = dfclass(C,#0x10) 5763cab2bb3Spatrick } 5773cab2bb3Spatrick { 5783cab2bb3Spatrick A ^= asl(B,#63) 5793cab2bb3Spatrick if (p0) jump .Lnan 5803cab2bb3Spatrick } 5813cab2bb3Spatrick { 5823cab2bb3Spatrick p1 = dfclass(C,#0x08) 5833cab2bb3Spatrick if (p1.new) jump:nt .Lfma_inf_plus_inf 5843cab2bb3Spatrick } 5853cab2bb3Spatrick // A*B is +/- inf, C is finite. Return A 5863cab2bb3Spatrick { 5873cab2bb3Spatrick jumpr r31 5883cab2bb3Spatrick } 5893cab2bb3Spatrick .falign 5903cab2bb3Spatrick.Lfma_inf_plus_inf: 5913cab2bb3Spatrick { // adding infinities of different signs is invalid 5923cab2bb3Spatrick p0 = dfcmp.eq(A,C) 5933cab2bb3Spatrick if (!p0.new) jump:nt .Linvalid 5943cab2bb3Spatrick } 5953cab2bb3Spatrick { 5963cab2bb3Spatrick jumpr r31 5973cab2bb3Spatrick } 5983cab2bb3Spatrick 5993cab2bb3Spatrick.Lnan: 6003cab2bb3Spatrick { 6013cab2bb3Spatrick p0 = dfclass(B,#0x10) 6023cab2bb3Spatrick p1 = dfclass(C,#0x10) 6033cab2bb3Spatrick if (!p0.new) B = A 6043cab2bb3Spatrick if (!p1.new) C = A 6053cab2bb3Spatrick } 6063cab2bb3Spatrick { // find sNaNs 6073cab2bb3Spatrick BH = convert_df2sf(B) 6083cab2bb3Spatrick BL = convert_df2sf(C) 6093cab2bb3Spatrick } 6103cab2bb3Spatrick { 6113cab2bb3Spatrick BH = convert_df2sf(A) 6123cab2bb3Spatrick A = #-1 6133cab2bb3Spatrick jumpr r31 6143cab2bb3Spatrick } 6153cab2bb3Spatrick 6163cab2bb3Spatrick.Linvalid: 6173cab2bb3Spatrick { 6183cab2bb3Spatrick TMP = ##0x7f800001 // sp snan 6193cab2bb3Spatrick } 6203cab2bb3Spatrick { 6213cab2bb3Spatrick A = convert_sf2df(TMP) 6223cab2bb3Spatrick jumpr r31 6233cab2bb3Spatrick } 6243cab2bb3Spatrick 6253cab2bb3Spatrick.Lab_true_zero: 6263cab2bb3Spatrick // B is zero, A is finite number 6273cab2bb3Spatrick { 6283cab2bb3Spatrick p0 = dfclass(C,#0x10) 6293cab2bb3Spatrick if (p0.new) jump:nt .Lnan 6303cab2bb3Spatrick if (p0.new) A = C 6313cab2bb3Spatrick } 6323cab2bb3Spatrick { 6333cab2bb3Spatrick p0 = dfcmp.eq(B,C) // is C also zero? 6343cab2bb3Spatrick AH = lsr(AH,#31) // get sign 6353cab2bb3Spatrick } 6363cab2bb3Spatrick { 6373cab2bb3Spatrick BH ^= asl(AH,#31) // form correctly signed zero in B 6383cab2bb3Spatrick if (!p0) A = C // If C is not zero, return C 6393cab2bb3Spatrick if (!p0) jumpr r31 6403cab2bb3Spatrick } 6413cab2bb3Spatrick // B has correctly signed zero, C is also zero 6423cab2bb3Spatrick.Lzero_plus_zero: 6433cab2bb3Spatrick { 6443cab2bb3Spatrick p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0 6453cab2bb3Spatrick if (p0.new) jumpr:t r31 6463cab2bb3Spatrick A = B 6473cab2bb3Spatrick } 6483cab2bb3Spatrick { 6493cab2bb3Spatrick TMP = USR 6503cab2bb3Spatrick } 6513cab2bb3Spatrick { 6523cab2bb3Spatrick TMP = extractu(TMP,#2,#SR_ROUND_OFF) 6533cab2bb3Spatrick A = #0 6543cab2bb3Spatrick } 6553cab2bb3Spatrick { 6563cab2bb3Spatrick p0 = cmp.eq(TMP,#2) 6573cab2bb3Spatrick if (p0.new) AH = ##0x80000000 6583cab2bb3Spatrick jumpr r31 6593cab2bb3Spatrick } 6603cab2bb3Spatrick#undef BTMP 6613cab2bb3Spatrick#undef BTMPH 6623cab2bb3Spatrick#undef BTMPL 6633cab2bb3Spatrick#define CTMP r11:10 6643cab2bb3Spatrick .falign 6653cab2bb3Spatrick.Lfma_abnormal_c: 6663cab2bb3Spatrick // We know that AB is normal * normal 6673cab2bb3Spatrick // C is not normal: zero, subnormal, inf, or NaN. 6683cab2bb3Spatrick { 6693cab2bb3Spatrick p0 = dfclass(C,#0x10) // is C NaN? 6703cab2bb3Spatrick if (p0.new) jump:nt .Lnan 6713cab2bb3Spatrick if (p0.new) A = C // move NaN to A 6723cab2bb3Spatrick deallocframe 6733cab2bb3Spatrick } 6743cab2bb3Spatrick { 6753cab2bb3Spatrick p0 = dfclass(C,#0x08) // is C inf? 6763cab2bb3Spatrick if (p0.new) A = C // return C 6773cab2bb3Spatrick if (p0.new) jumpr:nt r31 6783cab2bb3Spatrick } 6793cab2bb3Spatrick // zero or subnormal 6803cab2bb3Spatrick // If we have a zero, and we know AB is normal*normal, we can just call normal multiply 6813cab2bb3Spatrick { 6823cab2bb3Spatrick p0 = dfclass(C,#0x01) // is C zero? 6833cab2bb3Spatrick if (p0.new) jump:nt __hexagon_muldf3 6843cab2bb3Spatrick TMP = #1 6853cab2bb3Spatrick } 6863cab2bb3Spatrick // Left with: subnormal 6873cab2bb3Spatrick // Adjust C and jump back to restart 6883cab2bb3Spatrick { 6893cab2bb3Spatrick allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame 6903cab2bb3Spatrick CTMP = #0 6913cab2bb3Spatrick CH = insert(TMP,#EXPBITS,#HI_MANTBITS) 6923cab2bb3Spatrick jump .Lfma_abnormal_c_restart 6933cab2bb3Spatrick } 6943cab2bb3SpatrickEND(fma) 695