1*7cf9345cSTaylor Simpson/* 2*7cf9345cSTaylor Simpson * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. 3*7cf9345cSTaylor Simpson * 4*7cf9345cSTaylor Simpson * This program is free software; you can redistribute it and/or modify 5*7cf9345cSTaylor Simpson * it under the terms of the GNU General Public License as published by 6*7cf9345cSTaylor Simpson * the Free Software Foundation; either version 2 of the License, or 7*7cf9345cSTaylor Simpson * (at your option) any later version. 8*7cf9345cSTaylor Simpson * 9*7cf9345cSTaylor Simpson * This program is distributed in the hope that it will be useful, 10*7cf9345cSTaylor Simpson * but WITHOUT ANY WARRANTY; without even the implied warranty of 11*7cf9345cSTaylor Simpson * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12*7cf9345cSTaylor Simpson * GNU General Public License for more details. 13*7cf9345cSTaylor Simpson * 14*7cf9345cSTaylor Simpson * You should have received a copy of the GNU General Public License 15*7cf9345cSTaylor Simpson * along with this program; if not, see <http://www.gnu.org/licenses/>. 16*7cf9345cSTaylor Simpson */ 17*7cf9345cSTaylor Simpson 18*7cf9345cSTaylor Simpson/* 19*7cf9345cSTaylor Simpson * Multiply Instructions 20*7cf9345cSTaylor Simpson */ 21*7cf9345cSTaylor Simpson 22*7cf9345cSTaylor Simpson 23*7cf9345cSTaylor Simpson#define STD_SP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\ 24*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(1,RsV),fGETHALF(1,RtV))));})\ 25*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(1,RtV)))));})\ 26*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(1,RsV),fGETHALF(0,RtV))));})\ 27*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(0,RtV)))));})\ 28*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(0,RsV),fGETHALF(1,RtV))));})\ 29*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(1,RtV)))));})\ 30*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETHALF(0,RsV),fGETHALF(0,RtV))));})\ 31*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(0,RtV)))));}) 32*7cf9345cSTaylor Simpson 33*7cf9345cSTaylor Simpson/*****************************************************/ 34*7cf9345cSTaylor Simpson/* multiply 16x16->32 signed instructions */ 35*7cf9345cSTaylor Simpson/*****************************************************/ 36*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_acc, "Rx32+=mpy", ,RxV,RxV+ ,fMPY16SS, ,fPASS,fPASS) 37*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_nac, "Rx32-=mpy", ,RxV,RxV- ,fMPY16SS, ,fPASS,fPASS) 38*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_acc_sat,"Rx32+=mpy", ,RxV,RxV+ ,fMPY16SS,":sat" ,fSAT, fPASS) 39*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_nac_sat,"Rx32-=mpy", ,RxV,RxV- ,fMPY16SS,":sat" ,fSAT, fPASS) 40*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy, "Rd32=mpy", ,RdV, ,fMPY16SS, ,fPASS,fPASS) 41*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_sat, "Rd32=mpy", ,RdV, ,fMPY16SS,":sat" ,fSAT, fPASS) 42*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_rnd, "Rd32=mpy", ,RdV, ,fMPY16SS,":rnd" ,fPASS,fROUND) 43*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_sat_rnd,"Rd32=mpy", ,RdV, ,fMPY16SS,":rnd:sat",fSAT, fROUND) 44*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpyd_acc, "Rxx32+=mpy",,RxxV,RxxV+ ,fMPY16SS, ,fPASS,fPASS) 45*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpyd_nac, "Rxx32-=mpy",,RxxV,RxxV- ,fMPY16SS, ,fPASS,fPASS) 46*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpyd, "Rdd32=mpy", ,RddV, ,fMPY16SS, ,fPASS,fPASS) 47*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpyd_rnd, "Rdd32=mpy", ,RddV, ,fMPY16SS,":rnd" ,fPASS,fROUND) 48*7cf9345cSTaylor Simpson 49*7cf9345cSTaylor Simpson 50*7cf9345cSTaylor Simpson/*****************************************************/ 51*7cf9345cSTaylor Simpson/* multiply 16x16->32 unsigned instructions */ 52*7cf9345cSTaylor Simpson/*****************************************************/ 53*7cf9345cSTaylor Simpson#define STD_USP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\ 54*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(1,RsV),fGETUHALF(1,RtV))));})\ 55*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(1,RtV)))));})\ 56*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(1,RsV),fGETUHALF(0,RtV))));})\ 57*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(0,RtV)))));})\ 58*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(0,RsV),fGETUHALF(1,RtV))));})\ 59*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(1,RtV)))));})\ 60*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM( fGETUHALF(0,RsV),fGETUHALF(0,RtV))));})\ 61*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM, ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(0,RtV)))));}) 62*7cf9345cSTaylor Simpson 63*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyu_acc, "Rx32+=mpyu", ,RxV,RxV+ ,fMPY16UU, ,fPASS,fPASS) 64*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyu_nac, "Rx32-=mpyu", ,RxV,RxV- ,fMPY16UU, ,fPASS,fPASS) 65*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyu, "Rd32=mpyu", ATTRIBS() ,RdV, ,fMPY16UU, ,fPASS,fPASS) 66*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyud_acc, "Rxx32+=mpyu",,RxxV,RxxV+,fMPY16UU, ,fPASS,fPASS) 67*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyud_nac, "Rxx32-=mpyu",,RxxV,RxxV-,fMPY16UU, ,fPASS,fPASS) 68*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyud, "Rdd32=mpyu", ATTRIBS() ,RddV, ,fMPY16UU, ,fPASS,fPASS) 69*7cf9345cSTaylor Simpson 70*7cf9345cSTaylor Simpson/**********************************************/ 71*7cf9345cSTaylor Simpson/* mpy 16x#s8->32 */ 72*7cf9345cSTaylor Simpson/**********************************************/ 73*7cf9345cSTaylor Simpson 74*7cf9345cSTaylor SimpsonQ6INSN(M2_mpysip,"Rd32=+mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2), 75*7cf9345cSTaylor Simpson"32-bit Multiply by unsigned immediate", 76*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RdV=RsV*uiV; }) 77*7cf9345cSTaylor Simpson 78*7cf9345cSTaylor SimpsonQ6INSN(M2_mpysin,"Rd32=-mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2), 79*7cf9345cSTaylor Simpson"32-bit Multiply by unsigned immediate, negate result", 80*7cf9345cSTaylor Simpson{ RdV=RsV*-uiV; }) 81*7cf9345cSTaylor Simpson 82*7cf9345cSTaylor SimpsonQ6INSN(M2_macsip,"Rx32+=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2), 83*7cf9345cSTaylor Simpson"32-bit Multiply-Add by unsigned immediate", 84*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RxV=RxV + (RsV*uiV);}) 85*7cf9345cSTaylor Simpson 86*7cf9345cSTaylor SimpsonQ6INSN(M2_macsin,"Rx32-=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2), 87*7cf9345cSTaylor Simpson"32-bit Multiply-Subtract by unsigned immediate", 88*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RxV=RxV - (RsV*uiV);}) 89*7cf9345cSTaylor Simpson 90*7cf9345cSTaylor Simpson 91*7cf9345cSTaylor Simpson/**********************************************/ 92*7cf9345cSTaylor Simpson/* multiply/mac 32x32->64 instructions */ 93*7cf9345cSTaylor Simpson/**********************************************/ 94*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyss_s0, "Rdd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32SS(RsV,RtV);}) 95*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyss_acc_s0,"Rxx32+=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32SS(RsV,RtV);}) 96*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyss_nac_s0,"Rxx32-=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32SS(RsV,RtV);}) 97*7cf9345cSTaylor Simpson 98*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyuu_s0, "Rdd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));}) 99*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyuu_acc_s0,"Rxx32+=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));}) 100*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyuu_nac_s0,"Rxx32-=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));}) 101*7cf9345cSTaylor Simpson 102*7cf9345cSTaylor Simpson 103*7cf9345cSTaylor Simpson/******************************************************/ 104*7cf9345cSTaylor Simpson/* multiply/mac 32x32->32 (upper) instructions */ 105*7cf9345cSTaylor Simpson/******************************************************/ 106*7cf9345cSTaylor SimpsonQ6INSN(M2_mpy_up, "Rd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>32;}) 107*7cf9345cSTaylor SimpsonQ6INSN(M2_mpy_up_s1, "Rd32=mpy(Rs32,Rt32):<<1", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>31;}) 108*7cf9345cSTaylor SimpsonQ6INSN(M2_mpy_up_s1_sat, "Rd32=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RdV=fSAT(fMPY32SS(RsV,RtV)>>31);}) 109*7cf9345cSTaylor SimpsonQ6INSN(M2_mpyu_up, "Rd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV))>>32;}) 110*7cf9345cSTaylor SimpsonQ6INSN(M2_mpysu_up, "Rd32=mpysu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SU(RsV,fCAST4u(RtV))>>32;}) 111*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyss_rnd_s0,"Rd32=mpy(Rs32,Rt32):rnd", ATTRIBS(),"Multiply 32x32",{RdV=(fMPY32SS(RsV,RtV)+fCONSTLL(0x80000000))>>32;}) 112*7cf9345cSTaylor Simpson 113*7cf9345cSTaylor SimpsonQ6INSN(M4_mac_up_s1_sat, "Rx32+=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT( (fSE32_64(RxV)) + (fMPY32SS(RsV,RtV)>>31));}) 114*7cf9345cSTaylor SimpsonQ6INSN(M4_nac_up_s1_sat, "Rx32-=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT( (fSE32_64(RxV)) - (fMPY32SS(RsV,RtV)>>31));}) 115*7cf9345cSTaylor Simpson 116*7cf9345cSTaylor Simpson 117*7cf9345cSTaylor Simpson/**********************************************/ 118*7cf9345cSTaylor Simpson/* 32x32->32 multiply (lower) */ 119*7cf9345cSTaylor Simpson/**********************************************/ 120*7cf9345cSTaylor Simpson 121*7cf9345cSTaylor SimpsonQ6INSN(M2_mpyi,"Rd32=mpyi(Rs32,Rt32)",ATTRIBS(), 122*7cf9345cSTaylor Simpson"Multiply Integer", 123*7cf9345cSTaylor Simpson{ RdV=RsV*RtV;}) 124*7cf9345cSTaylor Simpson 125*7cf9345cSTaylor SimpsonQ6INSN(M2_maci,"Rx32+=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2), 126*7cf9345cSTaylor Simpson"Multiply-Accumulate Integer", 127*7cf9345cSTaylor Simpson{ RxV=RxV + RsV*RtV;}) 128*7cf9345cSTaylor Simpson 129*7cf9345cSTaylor SimpsonQ6INSN(M2_mnaci,"Rx32-=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2), 130*7cf9345cSTaylor Simpson"Multiply-Neg-Accumulate Integer", 131*7cf9345cSTaylor Simpson{ RxV=RxV - RsV*RtV;}) 132*7cf9345cSTaylor Simpson 133*7cf9345cSTaylor Simpson/****** WHY ARE THESE IN MPY.IDEF? **********/ 134*7cf9345cSTaylor Simpson 135*7cf9345cSTaylor SimpsonQ6INSN(M2_acci,"Rx32+=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2), 136*7cf9345cSTaylor Simpson"Add with accumulate", 137*7cf9345cSTaylor Simpson{ RxV=RxV + RsV + RtV;}) 138*7cf9345cSTaylor Simpson 139*7cf9345cSTaylor SimpsonQ6INSN(M2_accii,"Rx32+=add(Rs32,#s8)",ATTRIBS(A_ARCHV2), 140*7cf9345cSTaylor Simpson"Add with accumulate", 141*7cf9345cSTaylor Simpson{ fIMMEXT(siV); RxV=RxV + RsV + siV;}) 142*7cf9345cSTaylor Simpson 143*7cf9345cSTaylor SimpsonQ6INSN(M2_nacci,"Rx32-=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2), 144*7cf9345cSTaylor Simpson"Add with neg accumulate", 145*7cf9345cSTaylor Simpson{ RxV=RxV - (RsV + RtV);}) 146*7cf9345cSTaylor Simpson 147*7cf9345cSTaylor SimpsonQ6INSN(M2_naccii,"Rx32-=add(Rs32,#s8)",ATTRIBS(A_ARCHV2), 148*7cf9345cSTaylor Simpson"Add with neg accumulate", 149*7cf9345cSTaylor Simpson{ fIMMEXT(siV); RxV=RxV - (RsV + siV);}) 150*7cf9345cSTaylor Simpson 151*7cf9345cSTaylor SimpsonQ6INSN(M2_subacc,"Rx32+=sub(Rt32,Rs32)",ATTRIBS(A_ARCHV2), 152*7cf9345cSTaylor Simpson"Sub with accumulate", 153*7cf9345cSTaylor Simpson{ RxV=RxV + RtV - RsV;}) 154*7cf9345cSTaylor Simpson 155*7cf9345cSTaylor Simpson 156*7cf9345cSTaylor Simpson 157*7cf9345cSTaylor Simpson 158*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyrr_addr,"Ry32=add(Ru32,mpyi(Ry32,Rs32))",ATTRIBS(), 159*7cf9345cSTaylor Simpson"Mpy by immed and add immed", 160*7cf9345cSTaylor Simpson{ RyV = RuV + RsV*RyV;}) 161*7cf9345cSTaylor Simpson 162*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyri_addr_u2,"Rd32=add(Ru32,mpyi(#u6:2,Rs32))",ATTRIBS(), 163*7cf9345cSTaylor Simpson"Mpy by immed and add immed", 164*7cf9345cSTaylor Simpson{ RdV = RuV + RsV*uiV;}) 165*7cf9345cSTaylor Simpson 166*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyri_addr,"Rd32=add(Ru32,mpyi(Rs32,#u6))",ATTRIBS(), 167*7cf9345cSTaylor Simpson"Mpy by immed and add immed", 168*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RdV = RuV + RsV*uiV;}) 169*7cf9345cSTaylor Simpson 170*7cf9345cSTaylor Simpson 171*7cf9345cSTaylor Simpson 172*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyri_addi,"Rd32=add(#u6,mpyi(Rs32,#U6))",ATTRIBS(), 173*7cf9345cSTaylor Simpson"Mpy by immed and add immed", 174*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RdV = uiV + RsV*UiV;}) 175*7cf9345cSTaylor Simpson 176*7cf9345cSTaylor Simpson 177*7cf9345cSTaylor Simpson 178*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyrr_addi,"Rd32=add(#u6,mpyi(Rs32,Rt32))",ATTRIBS(), 179*7cf9345cSTaylor Simpson"Mpy by immed and add immed", 180*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RdV = uiV + RsV*RtV;}) 181*7cf9345cSTaylor Simpson 182*7cf9345cSTaylor Simpson 183*7cf9345cSTaylor Simpson 184*7cf9345cSTaylor Simpson 185*7cf9345cSTaylor Simpson 186*7cf9345cSTaylor Simpson 187*7cf9345cSTaylor Simpson 188*7cf9345cSTaylor Simpson 189*7cf9345cSTaylor Simpson 190*7cf9345cSTaylor Simpson 191*7cf9345cSTaylor Simpson 192*7cf9345cSTaylor Simpson 193*7cf9345cSTaylor Simpson 194*7cf9345cSTaylor Simpson 195*7cf9345cSTaylor Simpson 196*7cf9345cSTaylor Simpson 197*7cf9345cSTaylor Simpson 198*7cf9345cSTaylor Simpson/**********************************************/ 199*7cf9345cSTaylor Simpson/* vector mac 2x[16x16 -> 32] */ 200*7cf9345cSTaylor Simpson/**********************************************/ 201*7cf9345cSTaylor Simpson 202*7cf9345cSTaylor Simpson#undef vmac_sema 203*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 204*7cf9345cSTaylor Simpson{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\ 205*7cf9345cSTaylor Simpson fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\ 206*7cf9345cSTaylor Simpson} 207*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2s_s0,"Rdd32=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 208*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2s_s1,"Rdd32=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 209*7cf9345cSTaylor Simpson 210*7cf9345cSTaylor Simpson 211*7cf9345cSTaylor Simpson#undef vmac_sema 212*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 213*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\ 214*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\ 215*7cf9345cSTaylor Simpson} 216*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2s_s0,"Rxx32+=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 217*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2s_s1,"Rxx32+=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 218*7cf9345cSTaylor Simpson 219*7cf9345cSTaylor Simpson#undef vmac_sema 220*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 221*7cf9345cSTaylor Simpson{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\ 222*7cf9345cSTaylor Simpson fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\ 223*7cf9345cSTaylor Simpson} 224*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2su_s0,"Rdd32=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 225*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2su_s1,"Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 226*7cf9345cSTaylor Simpson 227*7cf9345cSTaylor Simpson 228*7cf9345cSTaylor Simpson#undef vmac_sema 229*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 230*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\ 231*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\ 232*7cf9345cSTaylor Simpson} 233*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2su_s0,"Rxx32+=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 234*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2su_s1,"Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 235*7cf9345cSTaylor Simpson 236*7cf9345cSTaylor Simpson 237*7cf9345cSTaylor Simpson 238*7cf9345cSTaylor Simpson#undef vmac_sema 239*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 240*7cf9345cSTaylor Simpson{ fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\ 241*7cf9345cSTaylor Simpson fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) + 0x8000))));\ 242*7cf9345cSTaylor Simpson} 243*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2s_s0pack,"Rd32=vmpyh(Rs32,Rt32):rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0)) 244*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2s_s1pack,"Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(1)) 245*7cf9345cSTaylor Simpson 246*7cf9345cSTaylor Simpson 247*7cf9345cSTaylor Simpson#undef vmac_sema 248*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 249*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)));\ 250*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)));\ 251*7cf9345cSTaylor Simpson} 252*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2,"Rxx32+=vmpyh(Rs32,Rt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0)) 253*7cf9345cSTaylor Simpson 254*7cf9345cSTaylor Simpson#undef vmac_sema 255*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 256*7cf9345cSTaylor Simpson{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\ 257*7cf9345cSTaylor Simpson fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\ 258*7cf9345cSTaylor Simpson} 259*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2es_s0,"Rdd32=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 260*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2es_s1,"Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 261*7cf9345cSTaylor Simpson 262*7cf9345cSTaylor Simpson#undef vmac_sema 263*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 264*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\ 265*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\ 266*7cf9345cSTaylor Simpson} 267*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2es_s0,"Rxx32+=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 268*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2es_s1,"Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1)) 269*7cf9345cSTaylor Simpson 270*7cf9345cSTaylor Simpson#undef vmac_sema 271*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 272*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)));\ 273*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)));\ 274*7cf9345cSTaylor Simpson} 275*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2es,"Rxx32+=vmpyeh(Rss32,Rtt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0)) 276*7cf9345cSTaylor Simpson 277*7cf9345cSTaylor Simpson 278*7cf9345cSTaylor Simpson 279*7cf9345cSTaylor Simpson 280*7cf9345cSTaylor Simpson/********************************************************/ 281*7cf9345cSTaylor Simpson/* vrmpyh, aka Big Mac, aka Mac Daddy, aka Mac-ac-ac-ac */ 282*7cf9345cSTaylor Simpson/* vector mac 4x[16x16] + 64 ->64 */ 283*7cf9345cSTaylor Simpson/********************************************************/ 284*7cf9345cSTaylor Simpson 285*7cf9345cSTaylor Simpson 286*7cf9345cSTaylor Simpson#undef vmac_sema 287*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 288*7cf9345cSTaylor Simpson{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\ 289*7cf9345cSTaylor Simpson + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\ 290*7cf9345cSTaylor Simpson + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\ 291*7cf9345cSTaylor Simpson + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 292*7cf9345cSTaylor Simpson} 293*7cf9345cSTaylor SimpsonQ6INSN(M2_vrmac_s0,"Rxx32+=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 294*7cf9345cSTaylor Simpson 295*7cf9345cSTaylor Simpson#undef vmac_sema 296*7cf9345cSTaylor Simpson#define vmac_sema(N)\ 297*7cf9345cSTaylor Simpson{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\ 298*7cf9345cSTaylor Simpson + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\ 299*7cf9345cSTaylor Simpson + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\ 300*7cf9345cSTaylor Simpson + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 301*7cf9345cSTaylor Simpson} 302*7cf9345cSTaylor SimpsonQ6INSN(M2_vrmpy_s0,"Rdd32=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0)) 303*7cf9345cSTaylor Simpson 304*7cf9345cSTaylor Simpson 305*7cf9345cSTaylor Simpson 306*7cf9345cSTaylor Simpson/******************************************************/ 307*7cf9345cSTaylor Simpson/* vector dual macs. just like complex */ 308*7cf9345cSTaylor Simpson/******************************************************/ 309*7cf9345cSTaylor Simpson 310*7cf9345cSTaylor Simpson 311*7cf9345cSTaylor Simpson/* With round&pack */ 312*7cf9345cSTaylor Simpson#undef dmpy_sema 313*7cf9345cSTaylor Simpson#define dmpy_sema(N)\ 314*7cf9345cSTaylor Simpson{ fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \ 315*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))) + 0x8000))));\ 316*7cf9345cSTaylor Simpson fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \ 317*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))) + 0x8000))));\ 318*7cf9345cSTaylor Simpson} 319*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmpyrs_s0,"Rd32=vdmpy(Rss32,Rtt32):rnd:sat",ATTRIBS(), "vector dual mac w/ round&pack",dmpy_sema(0)) 320*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmpyrs_s1,"Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"vector dual mac w/ round&pack",dmpy_sema(1)) 321*7cf9345cSTaylor Simpson 322*7cf9345cSTaylor Simpson 323*7cf9345cSTaylor Simpson 324*7cf9345cSTaylor Simpson 325*7cf9345cSTaylor Simpson 326*7cf9345cSTaylor Simpson/******************************************************/ 327*7cf9345cSTaylor Simpson/* vector byte multiplies */ 328*7cf9345cSTaylor Simpson/******************************************************/ 329*7cf9345cSTaylor Simpson 330*7cf9345cSTaylor Simpson 331*7cf9345cSTaylor SimpsonQ6INSN(M5_vrmpybuu,"Rdd32=vrmpybu(Rss32,Rtt32)",ATTRIBS(), 332*7cf9345cSTaylor Simpson "vector dual mpy bytes", 333*7cf9345cSTaylor Simpson{ 334*7cf9345cSTaylor Simpson fSETWORD(0,RddV,(fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) + 335*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) + 336*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) + 337*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV)))); 338*7cf9345cSTaylor Simpson fSETWORD(1,RddV,(fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) + 339*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) + 340*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) + 341*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV)))); 342*7cf9345cSTaylor Simpson }) 343*7cf9345cSTaylor Simpson 344*7cf9345cSTaylor SimpsonQ6INSN(M5_vrmacbuu,"Rxx32+=vrmpybu(Rss32,Rtt32)",ATTRIBS(), 345*7cf9345cSTaylor Simpson "vector dual mac bytes", 346*7cf9345cSTaylor Simpson{ 347*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,(fGETWORD(0,RxxV) + 348*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) + 349*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) + 350*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) + 351*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV)))); 352*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,(fGETWORD(1,RxxV) + 353*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) + 354*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) + 355*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) + 356*7cf9345cSTaylor Simpson fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV)))); 357*7cf9345cSTaylor Simpson }) 358*7cf9345cSTaylor Simpson 359*7cf9345cSTaylor Simpson 360*7cf9345cSTaylor SimpsonQ6INSN(M5_vrmpybsu,"Rdd32=vrmpybsu(Rss32,Rtt32)",ATTRIBS(), 361*7cf9345cSTaylor Simpson "vector dual mpy bytes", 362*7cf9345cSTaylor Simpson{ 363*7cf9345cSTaylor Simpson fSETWORD(0,RddV,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) + 364*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) + 365*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) + 366*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))); 367*7cf9345cSTaylor Simpson fSETWORD(1,RddV,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) + 368*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) + 369*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) + 370*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))); 371*7cf9345cSTaylor Simpson }) 372*7cf9345cSTaylor Simpson 373*7cf9345cSTaylor SimpsonQ6INSN(M5_vrmacbsu,"Rxx32+=vrmpybsu(Rss32,Rtt32)",ATTRIBS(), 374*7cf9345cSTaylor Simpson "vector dual mac bytes", 375*7cf9345cSTaylor Simpson{ 376*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,(fGETWORD(0,RxxV) + 377*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) + 378*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) + 379*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) + 380*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))); 381*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,(fGETWORD(1,RxxV) + 382*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) + 383*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) + 384*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) + 385*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))); 386*7cf9345cSTaylor Simpson }) 387*7cf9345cSTaylor Simpson 388*7cf9345cSTaylor Simpson 389*7cf9345cSTaylor SimpsonQ6INSN(M5_vmpybuu,"Rdd32=vmpybu(Rs32,Rt32)",ATTRIBS(), 390*7cf9345cSTaylor Simpson "vector mpy bytes", 391*7cf9345cSTaylor Simpson{ 392*7cf9345cSTaylor Simpson fSETHALF(0,RddV,(fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV)))); 393*7cf9345cSTaylor Simpson fSETHALF(1,RddV,(fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV)))); 394*7cf9345cSTaylor Simpson fSETHALF(2,RddV,(fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV)))); 395*7cf9345cSTaylor Simpson fSETHALF(3,RddV,(fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV)))); 396*7cf9345cSTaylor Simpson }) 397*7cf9345cSTaylor Simpson 398*7cf9345cSTaylor SimpsonQ6INSN(M5_vmpybsu,"Rdd32=vmpybsu(Rs32,Rt32)",ATTRIBS(), 399*7cf9345cSTaylor Simpson "vector mpy bytes", 400*7cf9345cSTaylor Simpson{ 401*7cf9345cSTaylor Simpson fSETHALF(0,RddV,(fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV)))); 402*7cf9345cSTaylor Simpson fSETHALF(1,RddV,(fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV)))); 403*7cf9345cSTaylor Simpson fSETHALF(2,RddV,(fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV)))); 404*7cf9345cSTaylor Simpson fSETHALF(3,RddV,(fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV)))); 405*7cf9345cSTaylor Simpson }) 406*7cf9345cSTaylor Simpson 407*7cf9345cSTaylor Simpson 408*7cf9345cSTaylor SimpsonQ6INSN(M5_vmacbuu,"Rxx32+=vmpybu(Rs32,Rt32)",ATTRIBS(), 409*7cf9345cSTaylor Simpson "vector mac bytes", 410*7cf9345cSTaylor Simpson{ 411*7cf9345cSTaylor Simpson fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV)))); 412*7cf9345cSTaylor Simpson fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV)))); 413*7cf9345cSTaylor Simpson fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV)))); 414*7cf9345cSTaylor Simpson fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV)))); 415*7cf9345cSTaylor Simpson }) 416*7cf9345cSTaylor Simpson 417*7cf9345cSTaylor SimpsonQ6INSN(M5_vmacbsu,"Rxx32+=vmpybsu(Rs32,Rt32)",ATTRIBS(), 418*7cf9345cSTaylor Simpson "vector mac bytes", 419*7cf9345cSTaylor Simpson{ 420*7cf9345cSTaylor Simpson fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV)))); 421*7cf9345cSTaylor Simpson fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV)))); 422*7cf9345cSTaylor Simpson fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV)))); 423*7cf9345cSTaylor Simpson fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV)))); 424*7cf9345cSTaylor Simpson }) 425*7cf9345cSTaylor Simpson 426*7cf9345cSTaylor Simpson 427*7cf9345cSTaylor Simpson 428*7cf9345cSTaylor SimpsonQ6INSN(M5_vdmpybsu,"Rdd32=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(), 429*7cf9345cSTaylor Simpson "vector quad mpy bytes", 430*7cf9345cSTaylor Simpson{ 431*7cf9345cSTaylor Simpson fSETHALF(0,RddV,fSATN(16,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) + 432*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV))))); 433*7cf9345cSTaylor Simpson fSETHALF(1,RddV,fSATN(16,(fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) + 434*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))))); 435*7cf9345cSTaylor Simpson fSETHALF(2,RddV,fSATN(16,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) + 436*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV))))); 437*7cf9345cSTaylor Simpson fSETHALF(3,RddV,fSATN(16,(fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) + 438*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))))); 439*7cf9345cSTaylor Simpson }) 440*7cf9345cSTaylor Simpson 441*7cf9345cSTaylor Simpson 442*7cf9345cSTaylor SimpsonQ6INSN(M5_vdmacbsu,"Rxx32+=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(), 443*7cf9345cSTaylor Simpson "vector quad mac bytes", 444*7cf9345cSTaylor Simpson{ 445*7cf9345cSTaylor Simpson fSETHALF(0,RxxV,fSATN(16,(fGETHALF(0,RxxV) + 446*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) + 447*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV))))); 448*7cf9345cSTaylor Simpson fSETHALF(1,RxxV,fSATN(16,(fGETHALF(1,RxxV) + 449*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) + 450*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))))); 451*7cf9345cSTaylor Simpson fSETHALF(2,RxxV,fSATN(16,(fGETHALF(2,RxxV) + 452*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) + 453*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV))))); 454*7cf9345cSTaylor Simpson fSETHALF(3,RxxV,fSATN(16,(fGETHALF(3,RxxV) + 455*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) + 456*7cf9345cSTaylor Simpson fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))))); 457*7cf9345cSTaylor Simpson }) 458*7cf9345cSTaylor Simpson 459*7cf9345cSTaylor Simpson 460*7cf9345cSTaylor Simpson 461*7cf9345cSTaylor Simpson/* Full version */ 462*7cf9345cSTaylor Simpson#undef dmpy_sema 463*7cf9345cSTaylor Simpson#define dmpy_sema(N)\ 464*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \ 465*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\ 466*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \ 467*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\ 468*7cf9345cSTaylor Simpson} 469*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmacs_s0,"Rxx32+=vdmpy(Rss32,Rtt32):sat",ATTRIBS(), "",dmpy_sema(0)) 470*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmacs_s1,"Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1)) 471*7cf9345cSTaylor Simpson 472*7cf9345cSTaylor Simpson#undef dmpy_sema 473*7cf9345cSTaylor Simpson#define dmpy_sema(N)\ 474*7cf9345cSTaylor Simpson{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \ 475*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\ 476*7cf9345cSTaylor Simpson fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \ 477*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\ 478*7cf9345cSTaylor Simpson} 479*7cf9345cSTaylor Simpson 480*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmpys_s0,"Rdd32=vdmpy(Rss32,Rtt32):sat",ATTRIBS(), "",dmpy_sema(0)) 481*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmpys_s1,"Rdd32=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1)) 482*7cf9345cSTaylor Simpson 483*7cf9345cSTaylor Simpson 484*7cf9345cSTaylor Simpson 485*7cf9345cSTaylor Simpson/******************************************************/ 486*7cf9345cSTaylor Simpson/* complex multiply/mac with */ 487*7cf9345cSTaylor Simpson/* real&imag are packed together and always saturated */ 488*7cf9345cSTaylor Simpson/* to protect against overflow. */ 489*7cf9345cSTaylor Simpson/******************************************************/ 490*7cf9345cSTaylor Simpson 491*7cf9345cSTaylor Simpson#undef cmpy_sema 492*7cf9345cSTaylor Simpson#define cmpy_sema(N,CONJMINUS,CONJPLUS)\ 493*7cf9345cSTaylor Simpson{ fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \ 494*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))) + 0x8000))));\ 495*7cf9345cSTaylor Simpson fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \ 496*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\ 497*7cf9345cSTaylor Simpson} 498*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyrs_s0,"Rd32=cmpy(Rs32,Rt32):rnd:sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-)) 499*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyrs_s1,"Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-)) 500*7cf9345cSTaylor Simpson 501*7cf9345cSTaylor Simpson 502*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyrsc_s0,"Rd32=cmpy(Rs32,Rt32*):rnd:sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+)) 503*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyrsc_s1,"Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+)) 504*7cf9345cSTaylor Simpson 505*7cf9345cSTaylor Simpson 506*7cf9345cSTaylor Simpson#undef cmpy_sema 507*7cf9345cSTaylor Simpson#define cmpy_sema(N,CONJMINUS,CONJPLUS)\ 508*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \ 509*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\ 510*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \ 511*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\ 512*7cf9345cSTaylor Simpson} 513*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacs_s0,"Rxx32+=cmpy(Rs32,Rt32):sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-)) 514*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacs_s1,"Rxx32+=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-)) 515*7cf9345cSTaylor Simpson 516*7cf9345cSTaylor Simpson/* EJP: Need mac versions w/ CONJ T? */ 517*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacsc_s0,"Rxx32+=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+)) 518*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacsc_s1,"Rxx32+=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+)) 519*7cf9345cSTaylor Simpson 520*7cf9345cSTaylor Simpson 521*7cf9345cSTaylor Simpson#undef cmpy_sema 522*7cf9345cSTaylor Simpson#define cmpy_sema(N,CONJMINUS,CONJPLUS)\ 523*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \ 524*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\ 525*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \ 526*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\ 527*7cf9345cSTaylor Simpson} 528*7cf9345cSTaylor Simpson 529*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpys_s0,"Rdd32=cmpy(Rs32,Rt32):sat",ATTRIBS(), "Complex Multiply",cmpy_sema(0,+,-)) 530*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpys_s1,"Rdd32=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-)) 531*7cf9345cSTaylor Simpson 532*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpysc_s0,"Rdd32=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+)) 533*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpysc_s1,"Rdd32=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+)) 534*7cf9345cSTaylor Simpson 535*7cf9345cSTaylor Simpson 536*7cf9345cSTaylor Simpson 537*7cf9345cSTaylor Simpson#undef cmpy_sema 538*7cf9345cSTaylor Simpson#define cmpy_sema(N,CONJMINUS,CONJPLUS)\ 539*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \ 540*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))))));\ 541*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \ 542*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))))));\ 543*7cf9345cSTaylor Simpson} 544*7cf9345cSTaylor SimpsonQ6INSN(M2_cnacs_s0,"Rxx32-=cmpy(Rs32,Rt32):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,+,-)) 545*7cf9345cSTaylor SimpsonQ6INSN(M2_cnacs_s1,"Rxx32-=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,+,-)) 546*7cf9345cSTaylor Simpson 547*7cf9345cSTaylor Simpson/* EJP: need CONJ versions? */ 548*7cf9345cSTaylor SimpsonQ6INSN(M2_cnacsc_s0,"Rxx32-=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2), "Complex Multiply",cmpy_sema(0,-,+)) 549*7cf9345cSTaylor SimpsonQ6INSN(M2_cnacsc_s1,"Rxx32-=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+)) 550*7cf9345cSTaylor Simpson 551*7cf9345cSTaylor Simpson 552*7cf9345cSTaylor Simpson/******************************************************/ 553*7cf9345cSTaylor Simpson/* complex interpolation */ 554*7cf9345cSTaylor Simpson/* Given a pair of complex values, scale by a,b, sum */ 555*7cf9345cSTaylor Simpson/* Saturate/shift1 and round/pack */ 556*7cf9345cSTaylor Simpson/******************************************************/ 557*7cf9345cSTaylor Simpson 558*7cf9345cSTaylor Simpson#undef vrcmpys_sema 559*7cf9345cSTaylor Simpson#define vrcmpys_sema(N,INWORD) \ 560*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \ 561*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\ 562*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \ 563*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\ 564*7cf9345cSTaylor Simpson} 565*7cf9345cSTaylor Simpson 566*7cf9345cSTaylor Simpson 567*7cf9345cSTaylor Simpson 568*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_s1_h,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV))) 569*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_s1_l,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV))) 570*7cf9345cSTaylor Simpson 571*7cf9345cSTaylor Simpson#undef vrcmpys_sema 572*7cf9345cSTaylor Simpson#define vrcmpys_sema(N,INWORD) \ 573*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \ 574*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\ 575*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \ 576*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\ 577*7cf9345cSTaylor Simpson} 578*7cf9345cSTaylor Simpson 579*7cf9345cSTaylor Simpson 580*7cf9345cSTaylor Simpson 581*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_acc_s1_h,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV))) 582*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_acc_s1_l,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV))) 583*7cf9345cSTaylor Simpson 584*7cf9345cSTaylor Simpson#undef vrcmpys_sema 585*7cf9345cSTaylor Simpson#define vrcmpys_sema(N,INWORD) \ 586*7cf9345cSTaylor Simpson{ fSETHALF(1,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \ 587*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD))) + 0x8000)));\ 588*7cf9345cSTaylor Simpson fSETHALF(0,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \ 589*7cf9345cSTaylor Simpson fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD))) + 0x8000)));\ 590*7cf9345cSTaylor Simpson} 591*7cf9345cSTaylor Simpson 592*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_s1rp_h,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV))) 593*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_s1rp_l,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV))) 594*7cf9345cSTaylor Simpson 595*7cf9345cSTaylor Simpson/**************************************************************/ 596*7cf9345cSTaylor Simpson/* mixed mode 32x16 vector dual multiplies */ 597*7cf9345cSTaylor Simpson/* */ 598*7cf9345cSTaylor Simpson/**************************************************************/ 599*7cf9345cSTaylor Simpson 600*7cf9345cSTaylor Simpson/* SIGNED 32 x SIGNED 16 */ 601*7cf9345cSTaylor Simpson 602*7cf9345cSTaylor Simpson 603*7cf9345cSTaylor Simpson#undef mixmpy_sema 604*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 605*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)) ); \ 606*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)) ); \ 607*7cf9345cSTaylor Simpson} 608*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacls_s0,"Rxx32+=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 609*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacls_s1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 610*7cf9345cSTaylor Simpson 611*7cf9345cSTaylor Simpson#undef mixmpy_sema 612*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 613*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16) )); \ 614*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16 ))); \ 615*7cf9345cSTaylor Simpson} 616*7cf9345cSTaylor SimpsonQ6INSN(M2_mmachs_s0,"Rxx32+=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 617*7cf9345cSTaylor SimpsonQ6INSN(M2_mmachs_s1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 618*7cf9345cSTaylor Simpson 619*7cf9345cSTaylor Simpson#undef mixmpy_sema 620*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 621*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)); \ 622*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)); \ 623*7cf9345cSTaylor Simpson} 624*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyl_s0,"Rdd32=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 625*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyl_s1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 626*7cf9345cSTaylor Simpson 627*7cf9345cSTaylor Simpson#undef mixmpy_sema 628*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 629*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16)); \ 630*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16)); \ 631*7cf9345cSTaylor Simpson} 632*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyh_s0,"Rdd32=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 633*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyh_s1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 634*7cf9345cSTaylor Simpson 635*7cf9345cSTaylor Simpson 636*7cf9345cSTaylor Simpson/* With rounding */ 637*7cf9345cSTaylor Simpson 638*7cf9345cSTaylor Simpson#undef mixmpy_sema 639*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 640*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)) ); \ 641*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)) ); \ 642*7cf9345cSTaylor Simpson} 643*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacls_rs0,"Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 644*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacls_rs1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 645*7cf9345cSTaylor Simpson 646*7cf9345cSTaylor Simpson#undef mixmpy_sema 647*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 648*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16) )); \ 649*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16 ))); \ 650*7cf9345cSTaylor Simpson} 651*7cf9345cSTaylor SimpsonQ6INSN(M2_mmachs_rs0,"Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 652*7cf9345cSTaylor SimpsonQ6INSN(M2_mmachs_rs1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 653*7cf9345cSTaylor Simpson 654*7cf9345cSTaylor Simpson#undef mixmpy_sema 655*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 656*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)); \ 657*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)); \ 658*7cf9345cSTaylor Simpson} 659*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyl_rs0,"Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 660*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyl_rs1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 661*7cf9345cSTaylor Simpson 662*7cf9345cSTaylor Simpson#undef mixmpy_sema 663*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 664*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16)); \ 665*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16)); \ 666*7cf9345cSTaylor Simpson} 667*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyh_rs0,"Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 668*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyh_rs1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 669*7cf9345cSTaylor Simpson 670*7cf9345cSTaylor Simpson 671*7cf9345cSTaylor Simpson#undef mixmpy_sema 672*7cf9345cSTaylor Simpson#define mixmpy_sema(DEST,EQUALS,N)\ 673*7cf9345cSTaylor Simpson{ DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)));} 674*7cf9345cSTaylor Simpson 675*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyeh_s0,"Rdd32=vrmpyweh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RddV,=,0)) 676*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyeh_s1,"Rdd32=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1)) 677*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyeh_acc_s0,"Rxx32+=vrmpyweh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0)) 678*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyeh_acc_s1,"Rxx32+=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1)) 679*7cf9345cSTaylor Simpson 680*7cf9345cSTaylor Simpson#undef mixmpy_sema 681*7cf9345cSTaylor Simpson#define mixmpy_sema(DEST,EQUALS,N)\ 682*7cf9345cSTaylor Simpson{ DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)));} 683*7cf9345cSTaylor Simpson 684*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyoh_s0,"Rdd32=vrmpywoh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RddV,=,0)) 685*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyoh_s1,"Rdd32=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1)) 686*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyoh_acc_s0,"Rxx32+=vrmpywoh(Rss32,Rtt32)",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0)) 687*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyoh_acc_s1,"Rxx32+=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1)) 688*7cf9345cSTaylor Simpson 689*7cf9345cSTaylor Simpson 690*7cf9345cSTaylor Simpson 691*7cf9345cSTaylor Simpson 692*7cf9345cSTaylor Simpson 693*7cf9345cSTaylor Simpson 694*7cf9345cSTaylor Simpson#undef mixmpy_sema 695*7cf9345cSTaylor Simpson#define mixmpy_sema(N,H,RND)\ 696*7cf9345cSTaylor Simpson{ RdV = fSAT((fSCALE(N,fMPY3216SS(RsV,fGETHALF(H,RtV)))RND)>>16); \ 697*7cf9345cSTaylor Simpson} 698*7cf9345cSTaylor SimpsonQ6INSN(M2_hmmpyl_rs1,"Rd32=mpy(Rs32,Rt.L32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,+0x8000)) 699*7cf9345cSTaylor SimpsonQ6INSN(M2_hmmpyh_rs1,"Rd32=mpy(Rs32,Rt.H32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,+0x8000)) 700*7cf9345cSTaylor SimpsonQ6INSN(M2_hmmpyl_s1,"Rd32=mpy(Rs32,Rt.L32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,)) 701*7cf9345cSTaylor SimpsonQ6INSN(M2_hmmpyh_s1,"Rd32=mpy(Rs32,Rt.H32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,)) 702*7cf9345cSTaylor Simpson 703*7cf9345cSTaylor Simpson 704*7cf9345cSTaylor Simpson 705*7cf9345cSTaylor Simpson 706*7cf9345cSTaylor Simpson 707*7cf9345cSTaylor Simpson 708*7cf9345cSTaylor Simpson 709*7cf9345cSTaylor Simpson 710*7cf9345cSTaylor Simpson 711*7cf9345cSTaylor Simpson/* SIGNED 32 x UNSIGNED 16 */ 712*7cf9345cSTaylor Simpson 713*7cf9345cSTaylor Simpson#undef mixmpy_sema 714*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 715*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)) ); \ 716*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)) ); \ 717*7cf9345cSTaylor Simpson} 718*7cf9345cSTaylor SimpsonQ6INSN(M2_mmaculs_s0,"Rxx32+=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 719*7cf9345cSTaylor SimpsonQ6INSN(M2_mmaculs_s1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 720*7cf9345cSTaylor Simpson 721*7cf9345cSTaylor Simpson#undef mixmpy_sema 722*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 723*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16) )); \ 724*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16 ))); \ 725*7cf9345cSTaylor Simpson} 726*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacuhs_s0,"Rxx32+=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 727*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacuhs_s1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 728*7cf9345cSTaylor Simpson 729*7cf9345cSTaylor Simpson#undef mixmpy_sema 730*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 731*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)); \ 732*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)); \ 733*7cf9345cSTaylor Simpson} 734*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyul_s0,"Rdd32=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 735*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyul_s1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 736*7cf9345cSTaylor Simpson 737*7cf9345cSTaylor Simpson#undef mixmpy_sema 738*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 739*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16)); \ 740*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16)); \ 741*7cf9345cSTaylor Simpson} 742*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyuh_s0,"Rdd32=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 743*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyuh_s1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 744*7cf9345cSTaylor Simpson 745*7cf9345cSTaylor Simpson 746*7cf9345cSTaylor Simpson/* With rounding */ 747*7cf9345cSTaylor Simpson 748*7cf9345cSTaylor Simpson#undef mixmpy_sema 749*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 750*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)) ); \ 751*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)) ); \ 752*7cf9345cSTaylor Simpson} 753*7cf9345cSTaylor SimpsonQ6INSN(M2_mmaculs_rs0,"Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 754*7cf9345cSTaylor SimpsonQ6INSN(M2_mmaculs_rs1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 755*7cf9345cSTaylor Simpson 756*7cf9345cSTaylor Simpson#undef mixmpy_sema 757*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 758*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16) )); \ 759*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16 ))); \ 760*7cf9345cSTaylor Simpson} 761*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacuhs_rs0,"Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 762*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacuhs_rs1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 763*7cf9345cSTaylor Simpson 764*7cf9345cSTaylor Simpson#undef mixmpy_sema 765*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 766*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)); \ 767*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)); \ 768*7cf9345cSTaylor Simpson} 769*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyul_rs0,"Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 770*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyul_rs1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 771*7cf9345cSTaylor Simpson 772*7cf9345cSTaylor Simpson#undef mixmpy_sema 773*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\ 774*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16)); \ 775*7cf9345cSTaylor Simpson fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16)); \ 776*7cf9345cSTaylor Simpson} 777*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyuh_rs0,"Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(), "Mixed Precision Multiply",mixmpy_sema(0)) 778*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyuh_rs1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1)) 779*7cf9345cSTaylor Simpson 780*7cf9345cSTaylor Simpson 781*7cf9345cSTaylor Simpson/**************************************************************/ 782*7cf9345cSTaylor Simpson/* complex mac with full 64-bit accum - no sat, no shift */ 783*7cf9345cSTaylor Simpson/* either do real or accum, never both */ 784*7cf9345cSTaylor Simpson/**************************************************************/ 785*7cf9345cSTaylor Simpson 786*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmaci_s0,"Rxx32+=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Imaginary", 787*7cf9345cSTaylor Simpson{ 788*7cf9345cSTaylor SimpsonRxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \ 789*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \ 790*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \ 791*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\ 792*7cf9345cSTaylor Simpson}) 793*7cf9345cSTaylor Simpson 794*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmacr_s0,"Rxx32+=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Real", 795*7cf9345cSTaylor Simpson{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \ 796*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \ 797*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \ 798*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 799*7cf9345cSTaylor Simpson}) 800*7cf9345cSTaylor Simpson 801*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmaci_s0c,"Rxx32+=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Imaginary", 802*7cf9345cSTaylor Simpson{ 803*7cf9345cSTaylor SimpsonRxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \ 804*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \ 805*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \ 806*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\ 807*7cf9345cSTaylor Simpson}) 808*7cf9345cSTaylor Simpson 809*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmacr_s0c,"Rxx32+=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Real", 810*7cf9345cSTaylor Simpson{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \ 811*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \ 812*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \ 813*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 814*7cf9345cSTaylor Simpson}) 815*7cf9345cSTaylor Simpson 816*7cf9345cSTaylor SimpsonQ6INSN(M2_cmaci_s0,"Rxx32+=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Imaginary", 817*7cf9345cSTaylor Simpson{ 818*7cf9345cSTaylor SimpsonRxxV = RxxV + fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \ 819*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)); 820*7cf9345cSTaylor Simpson}) 821*7cf9345cSTaylor Simpson 822*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacr_s0,"Rxx32+=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Real", 823*7cf9345cSTaylor Simpson{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \ 824*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)); 825*7cf9345cSTaylor Simpson}) 826*7cf9345cSTaylor Simpson 827*7cf9345cSTaylor Simpson 828*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpyi_s0,"Rdd32=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Imaginary", 829*7cf9345cSTaylor Simpson{ 830*7cf9345cSTaylor SimpsonRddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \ 831*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \ 832*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \ 833*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\ 834*7cf9345cSTaylor Simpson}) 835*7cf9345cSTaylor Simpson 836*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpyr_s0,"Rdd32=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Real", 837*7cf9345cSTaylor Simpson{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \ 838*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \ 839*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \ 840*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 841*7cf9345cSTaylor Simpson}) 842*7cf9345cSTaylor Simpson 843*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpyi_s0c,"Rdd32=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Imaginary", 844*7cf9345cSTaylor Simpson{ 845*7cf9345cSTaylor SimpsonRddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \ 846*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \ 847*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \ 848*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\ 849*7cf9345cSTaylor Simpson}) 850*7cf9345cSTaylor Simpson 851*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpyr_s0c,"Rdd32=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Real", 852*7cf9345cSTaylor Simpson{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \ 853*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \ 854*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \ 855*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\ 856*7cf9345cSTaylor Simpson}) 857*7cf9345cSTaylor Simpson 858*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyi_s0,"Rdd32=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Imaginary", 859*7cf9345cSTaylor Simpson{ 860*7cf9345cSTaylor SimpsonRddV = fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \ 861*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)); 862*7cf9345cSTaylor Simpson}) 863*7cf9345cSTaylor Simpson 864*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyr_s0,"Rdd32=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Real", 865*7cf9345cSTaylor Simpson{ RddV = fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \ 866*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)); 867*7cf9345cSTaylor Simpson}) 868*7cf9345cSTaylor Simpson 869*7cf9345cSTaylor Simpson 870*7cf9345cSTaylor Simpson/**************************************************************/ 871*7cf9345cSTaylor Simpson/* Complex mpy/mac with 2x32 bit accum, sat, shift */ 872*7cf9345cSTaylor Simpson/* 32x16 real or imag */ 873*7cf9345cSTaylor Simpson/**************************************************************/ 874*7cf9345cSTaylor Simpson 875*7cf9345cSTaylor Simpson#if 1 876*7cf9345cSTaylor Simpson 877*7cf9345cSTaylor SimpsonQ6INSN(M4_cmpyi_wh,"Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply", 878*7cf9345cSTaylor Simpson{ 879*7cf9345cSTaylor Simpson RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV)) 880*7cf9345cSTaylor Simpson + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV)) 881*7cf9345cSTaylor Simpson + 0x4000)>>15); 882*7cf9345cSTaylor Simpson}) 883*7cf9345cSTaylor Simpson 884*7cf9345cSTaylor Simpson 885*7cf9345cSTaylor SimpsonQ6INSN(M4_cmpyr_wh,"Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply", 886*7cf9345cSTaylor Simpson{ 887*7cf9345cSTaylor Simpson RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV)) 888*7cf9345cSTaylor Simpson - fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV)) 889*7cf9345cSTaylor Simpson + 0x4000)>>15); 890*7cf9345cSTaylor Simpson}) 891*7cf9345cSTaylor Simpson 892*7cf9345cSTaylor SimpsonQ6INSN(M4_cmpyi_whc,"Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply", 893*7cf9345cSTaylor Simpson{ 894*7cf9345cSTaylor Simpson RdV = fSAT( ( fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV)) 895*7cf9345cSTaylor Simpson - fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV)) 896*7cf9345cSTaylor Simpson + 0x4000)>>15); 897*7cf9345cSTaylor Simpson}) 898*7cf9345cSTaylor Simpson 899*7cf9345cSTaylor Simpson 900*7cf9345cSTaylor SimpsonQ6INSN(M4_cmpyr_whc,"Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply", 901*7cf9345cSTaylor Simpson{ 902*7cf9345cSTaylor Simpson RdV = fSAT( ( fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV)) 903*7cf9345cSTaylor Simpson + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV)) 904*7cf9345cSTaylor Simpson + 0x4000)>>15); 905*7cf9345cSTaylor Simpson}) 906*7cf9345cSTaylor Simpson 907*7cf9345cSTaylor Simpson 908*7cf9345cSTaylor Simpson#endif 909*7cf9345cSTaylor Simpson 910*7cf9345cSTaylor Simpson/**************************************************************/ 911*7cf9345cSTaylor Simpson/* Vector mpy/mac with 2x32 bit accum, sat, shift */ 912*7cf9345cSTaylor Simpson/* either do real or imag, never both */ 913*7cf9345cSTaylor Simpson/**************************************************************/ 914*7cf9345cSTaylor Simpson 915*7cf9345cSTaylor Simpson#undef VCMPYSEMI 916*7cf9345cSTaylor Simpson#define VCMPYSEMI(DST,ACC0,ACC1,SHIFT,SAT) \ 917*7cf9345cSTaylor Simpson fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \ 918*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV))))); \ 919*7cf9345cSTaylor Simpson fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \ 920*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV))))); \ 921*7cf9345cSTaylor Simpson 922*7cf9345cSTaylor Simpson#undef VCMPYSEMR 923*7cf9345cSTaylor Simpson#define VCMPYSEMR(DST,ACC0,ACC1,SHIFT,SAT) \ 924*7cf9345cSTaylor Simpson fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \ 925*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))))); \ 926*7cf9345cSTaylor Simpson fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \ 927*7cf9345cSTaylor Simpson fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))))); \ 928*7cf9345cSTaylor Simpson 929*7cf9345cSTaylor Simpson 930*7cf9345cSTaylor Simpson#undef VCMPYIR 931*7cf9345cSTaylor Simpson#define VCMPYIR(TAGBASE,DSTSYN,DSTVAL,ACCSEM,ACCVAL0,ACCVAL1,SHIFTSYN,SHIFTVAL,SATSYN,SATVAL) \ 932*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAGBASE##i,DSTSYN ACCSEM "vcmpyi(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \ 933*7cf9345cSTaylor Simpson "Vector Complex Multiply Imaginary", { VCMPYSEMI(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); }) \ 934*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAGBASE##r,DSTSYN ACCSEM "vcmpyr(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \ 935*7cf9345cSTaylor Simpson "Vector Complex Multiply Imaginary", { VCMPYSEMR(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); }) 936*7cf9345cSTaylor Simpson 937*7cf9345cSTaylor Simpson 938*7cf9345cSTaylor SimpsonVCMPYIR(vcmpy_s0_sat_,"Rdd32",RddV,"=",,,"",0,":sat",fSAT) 939*7cf9345cSTaylor SimpsonVCMPYIR(vcmpy_s1_sat_,"Rdd32",RddV,"=",,,":<<1",1,":sat",fSAT) 940*7cf9345cSTaylor SimpsonVCMPYIR(vcmac_s0_sat_,"Rxx32",RxxV,"+=",fGETWORD(0,RxxV) + ,fGETWORD(1,RxxV) + ,"",0,":sat",fSAT) 941*7cf9345cSTaylor Simpson 942*7cf9345cSTaylor Simpson 943*7cf9345cSTaylor Simpson/********************************************************************** 944*7cf9345cSTaylor Simpson * Rotation -- by 0, 90, 180, or 270 means mult by 1, J, -1, -J * 945*7cf9345cSTaylor Simpson *********************************************************************/ 946*7cf9345cSTaylor Simpson 947*7cf9345cSTaylor SimpsonQ6INSN(S2_vcrotate,"Rdd32=vcrotate(Rss32,Rt32)",ATTRIBS(A_ARCHV2),"Rotate complex value by multiple of PI/2", 948*7cf9345cSTaylor Simpson{ 949*7cf9345cSTaylor Simpson fHIDE(size1u_t tmp;) 950*7cf9345cSTaylor Simpson tmp = fEXTRACTU_RANGE(RtV,1,0); 951*7cf9345cSTaylor Simpson if (tmp == 0) { /* No rotation */ 952*7cf9345cSTaylor Simpson fSETHALF(0,RddV,fGETHALF(0,RssV)); 953*7cf9345cSTaylor Simpson fSETHALF(1,RddV,fGETHALF(1,RssV)); 954*7cf9345cSTaylor Simpson } else if (tmp == 1) { /* Multiply by -J */ 955*7cf9345cSTaylor Simpson fSETHALF(0,RddV,fGETHALF(1,RssV)); 956*7cf9345cSTaylor Simpson fSETHALF(1,RddV,fSATH(-fGETHALF(0,RssV))); 957*7cf9345cSTaylor Simpson } else if (tmp == 2) { /* Multiply by J */ 958*7cf9345cSTaylor Simpson fSETHALF(0,RddV,fSATH(-fGETHALF(1,RssV))); 959*7cf9345cSTaylor Simpson fSETHALF(1,RddV,fGETHALF(0,RssV)); 960*7cf9345cSTaylor Simpson } else { /* Multiply by -1 */ 961*7cf9345cSTaylor Simpson fHIDE(if (tmp != 3) fatal("C is broken");) 962*7cf9345cSTaylor Simpson fSETHALF(0,RddV,fSATH(-fGETHALF(0,RssV))); 963*7cf9345cSTaylor Simpson fSETHALF(1,RddV,fSATH(-fGETHALF(1,RssV))); 964*7cf9345cSTaylor Simpson } 965*7cf9345cSTaylor Simpson tmp = fEXTRACTU_RANGE(RtV,3,2); 966*7cf9345cSTaylor Simpson if (tmp == 0) { /* No rotation */ 967*7cf9345cSTaylor Simpson fSETHALF(2,RddV,fGETHALF(2,RssV)); 968*7cf9345cSTaylor Simpson fSETHALF(3,RddV,fGETHALF(3,RssV)); 969*7cf9345cSTaylor Simpson } else if (tmp == 1) { /* Multiply by -J */ 970*7cf9345cSTaylor Simpson fSETHALF(2,RddV,fGETHALF(3,RssV)); 971*7cf9345cSTaylor Simpson fSETHALF(3,RddV,fSATH(-fGETHALF(2,RssV))); 972*7cf9345cSTaylor Simpson } else if (tmp == 2) { /* Multiply by J */ 973*7cf9345cSTaylor Simpson fSETHALF(2,RddV,fSATH(-fGETHALF(3,RssV))); 974*7cf9345cSTaylor Simpson fSETHALF(3,RddV,fGETHALF(2,RssV)); 975*7cf9345cSTaylor Simpson } else { /* Multiply by -1 */ 976*7cf9345cSTaylor Simpson fHIDE(if (tmp != 3) fatal("C is broken");) 977*7cf9345cSTaylor Simpson fSETHALF(2,RddV,fSATH(-fGETHALF(2,RssV))); 978*7cf9345cSTaylor Simpson fSETHALF(3,RddV,fSATH(-fGETHALF(3,RssV))); 979*7cf9345cSTaylor Simpson } 980*7cf9345cSTaylor Simpson}) 981*7cf9345cSTaylor Simpson 982*7cf9345cSTaylor Simpson 983*7cf9345cSTaylor SimpsonQ6INSN(S4_vrcrotate_acc,"Rxx32+=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes", 984*7cf9345cSTaylor Simpson{ 985*7cf9345cSTaylor Simpson fHIDE(int i; int tmpr; int tmpi; unsigned int control;) 986*7cf9345cSTaylor Simpson fHIDE(int sumr; int sumi;) 987*7cf9345cSTaylor Simpson sumr = 0; 988*7cf9345cSTaylor Simpson sumi = 0; 989*7cf9345cSTaylor Simpson control = fGETUBYTE(uiV,RtV); 990*7cf9345cSTaylor Simpson for (i = 0; i < 8; i += 2) { 991*7cf9345cSTaylor Simpson tmpr = fGETBYTE(i ,RssV); 992*7cf9345cSTaylor Simpson tmpi = fGETBYTE(i+1,RssV); 993*7cf9345cSTaylor Simpson switch (control & 3) { 994*7cf9345cSTaylor Simpson case 0: /* No Rotation */ 995*7cf9345cSTaylor Simpson sumr += tmpr; 996*7cf9345cSTaylor Simpson sumi += tmpi; 997*7cf9345cSTaylor Simpson break; 998*7cf9345cSTaylor Simpson case 1: /* Multiply by -J */ 999*7cf9345cSTaylor Simpson sumr += tmpi; 1000*7cf9345cSTaylor Simpson sumi -= tmpr; 1001*7cf9345cSTaylor Simpson break; 1002*7cf9345cSTaylor Simpson case 2: /* Multiply by J */ 1003*7cf9345cSTaylor Simpson sumr -= tmpi; 1004*7cf9345cSTaylor Simpson sumi += tmpr; 1005*7cf9345cSTaylor Simpson break; 1006*7cf9345cSTaylor Simpson case 3: /* Multiply by -1 */ 1007*7cf9345cSTaylor Simpson sumr -= tmpr; 1008*7cf9345cSTaylor Simpson sumi -= tmpi; 1009*7cf9345cSTaylor Simpson break; 1010*7cf9345cSTaylor Simpson fHIDE(default: fatal("C is broken!");) 1011*7cf9345cSTaylor Simpson } 1012*7cf9345cSTaylor Simpson control = control >> 2; 1013*7cf9345cSTaylor Simpson } 1014*7cf9345cSTaylor Simpson fSETWORD(0,RxxV,fGETWORD(0,RxxV) + sumr); 1015*7cf9345cSTaylor Simpson fSETWORD(1,RxxV,fGETWORD(1,RxxV) + sumi); 1016*7cf9345cSTaylor Simpson}) 1017*7cf9345cSTaylor Simpson 1018*7cf9345cSTaylor SimpsonQ6INSN(S4_vrcrotate,"Rdd32=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes", 1019*7cf9345cSTaylor Simpson{ 1020*7cf9345cSTaylor Simpson fHIDE(int i; int tmpr; int tmpi; unsigned int control;) 1021*7cf9345cSTaylor Simpson fHIDE(int sumr; int sumi;) 1022*7cf9345cSTaylor Simpson sumr = 0; 1023*7cf9345cSTaylor Simpson sumi = 0; 1024*7cf9345cSTaylor Simpson control = fGETUBYTE(uiV,RtV); 1025*7cf9345cSTaylor Simpson for (i = 0; i < 8; i += 2) { 1026*7cf9345cSTaylor Simpson tmpr = fGETBYTE(i ,RssV); 1027*7cf9345cSTaylor Simpson tmpi = fGETBYTE(i+1,RssV); 1028*7cf9345cSTaylor Simpson switch (control & 3) { 1029*7cf9345cSTaylor Simpson case 0: /* No Rotation */ 1030*7cf9345cSTaylor Simpson sumr += tmpr; 1031*7cf9345cSTaylor Simpson sumi += tmpi; 1032*7cf9345cSTaylor Simpson break; 1033*7cf9345cSTaylor Simpson case 1: /* Multiply by -J */ 1034*7cf9345cSTaylor Simpson sumr += tmpi; 1035*7cf9345cSTaylor Simpson sumi -= tmpr; 1036*7cf9345cSTaylor Simpson break; 1037*7cf9345cSTaylor Simpson case 2: /* Multiply by J */ 1038*7cf9345cSTaylor Simpson sumr -= tmpi; 1039*7cf9345cSTaylor Simpson sumi += tmpr; 1040*7cf9345cSTaylor Simpson break; 1041*7cf9345cSTaylor Simpson case 3: /* Multiply by -1 */ 1042*7cf9345cSTaylor Simpson sumr -= tmpr; 1043*7cf9345cSTaylor Simpson sumi -= tmpi; 1044*7cf9345cSTaylor Simpson break; 1045*7cf9345cSTaylor Simpson fHIDE(default: fatal("C is broken!");) 1046*7cf9345cSTaylor Simpson } 1047*7cf9345cSTaylor Simpson control = control >> 2; 1048*7cf9345cSTaylor Simpson } 1049*7cf9345cSTaylor Simpson fSETWORD(0,RddV,sumr); 1050*7cf9345cSTaylor Simpson fSETWORD(1,RddV,sumi); 1051*7cf9345cSTaylor Simpson}) 1052*7cf9345cSTaylor Simpson 1053*7cf9345cSTaylor Simpson 1054*7cf9345cSTaylor SimpsonQ6INSN(S2_vcnegh,"Rdd32=vcnegh(Rss32,Rt32)",ATTRIBS(),"Conditional Negate halfwords", 1055*7cf9345cSTaylor Simpson{ 1056*7cf9345cSTaylor Simpson fHIDE(int i;) 1057*7cf9345cSTaylor Simpson for (i = 0; i < 4; i++) { 1058*7cf9345cSTaylor Simpson if (fGETBIT(i,RtV)) { 1059*7cf9345cSTaylor Simpson fSETHALF(i,RddV,fSATH(-fGETHALF(i,RssV))); 1060*7cf9345cSTaylor Simpson } else { 1061*7cf9345cSTaylor Simpson fSETHALF(i,RddV,fGETHALF(i,RssV)); 1062*7cf9345cSTaylor Simpson } 1063*7cf9345cSTaylor Simpson } 1064*7cf9345cSTaylor Simpson}) 1065*7cf9345cSTaylor Simpson 1066*7cf9345cSTaylor SimpsonQ6INSN(S2_vrcnegh,"Rxx32+=vrcnegh(Rss32,Rt32)",ATTRIBS(),"Vector Reduce Conditional Negate halfwords", 1067*7cf9345cSTaylor Simpson{ 1068*7cf9345cSTaylor Simpson fHIDE(int i;) 1069*7cf9345cSTaylor Simpson for (i = 0; i < 4; i++) { 1070*7cf9345cSTaylor Simpson if (fGETBIT(i,RtV)) { 1071*7cf9345cSTaylor Simpson RxxV += -fGETHALF(i,RssV); 1072*7cf9345cSTaylor Simpson } else { 1073*7cf9345cSTaylor Simpson RxxV += fGETHALF(i,RssV); 1074*7cf9345cSTaylor Simpson } 1075*7cf9345cSTaylor Simpson } 1076*7cf9345cSTaylor Simpson}) 1077*7cf9345cSTaylor Simpson 1078*7cf9345cSTaylor Simpson 1079*7cf9345cSTaylor Simpson/********************************************************************** 1080*7cf9345cSTaylor Simpson * Finite-field multiplies. Written by David Hoyle * 1081*7cf9345cSTaylor Simpson *********************************************************************/ 1082*7cf9345cSTaylor Simpson 1083*7cf9345cSTaylor SimpsonQ6INSN(M4_pmpyw,"Rdd32=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)", 1084*7cf9345cSTaylor Simpson{ 1085*7cf9345cSTaylor Simpson fHIDE(int i; unsigned int y;) 1086*7cf9345cSTaylor Simpson fHIDE(unsigned long long x; unsigned long long prod;) 1087*7cf9345cSTaylor Simpson x = fGETUWORD(0, RsV); 1088*7cf9345cSTaylor Simpson y = fGETUWORD(0, RtV); 1089*7cf9345cSTaylor Simpson 1090*7cf9345cSTaylor Simpson prod = 0; 1091*7cf9345cSTaylor Simpson for(i=0; i < 32; i++) { 1092*7cf9345cSTaylor Simpson if((y >> i) & 1) prod ^= (x << i); 1093*7cf9345cSTaylor Simpson } 1094*7cf9345cSTaylor Simpson RddV = prod; 1095*7cf9345cSTaylor Simpson}) 1096*7cf9345cSTaylor Simpson 1097*7cf9345cSTaylor SimpsonQ6INSN(M4_vpmpyh,"Rdd32=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)", 1098*7cf9345cSTaylor Simpson{ 1099*7cf9345cSTaylor Simpson fHIDE(int i; unsigned int x0; unsigned int x1;) 1100*7cf9345cSTaylor Simpson fHIDE(unsigned int y0; unsigned int y1;) 1101*7cf9345cSTaylor Simpson fHIDE(unsigned int prod0; unsigned int prod1;) 1102*7cf9345cSTaylor Simpson 1103*7cf9345cSTaylor Simpson x0 = fGETUHALF(0, RsV); 1104*7cf9345cSTaylor Simpson x1 = fGETUHALF(1, RsV); 1105*7cf9345cSTaylor Simpson y0 = fGETUHALF(0, RtV); 1106*7cf9345cSTaylor Simpson y1 = fGETUHALF(1, RtV); 1107*7cf9345cSTaylor Simpson 1108*7cf9345cSTaylor Simpson prod0 = prod1 = 0; 1109*7cf9345cSTaylor Simpson for(i=0; i < 16; i++) { 1110*7cf9345cSTaylor Simpson if((y0 >> i) & 1) prod0 ^= (x0 << i); 1111*7cf9345cSTaylor Simpson if((y1 >> i) & 1) prod1 ^= (x1 << i); 1112*7cf9345cSTaylor Simpson } 1113*7cf9345cSTaylor Simpson fSETHALF(0,RddV,fGETUHALF(0,prod0)); 1114*7cf9345cSTaylor Simpson fSETHALF(1,RddV,fGETUHALF(0,prod1)); 1115*7cf9345cSTaylor Simpson fSETHALF(2,RddV,fGETUHALF(1,prod0)); 1116*7cf9345cSTaylor Simpson fSETHALF(3,RddV,fGETUHALF(1,prod1)); 1117*7cf9345cSTaylor Simpson}) 1118*7cf9345cSTaylor Simpson 1119*7cf9345cSTaylor SimpsonQ6INSN(M4_pmpyw_acc,"Rxx32^=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)", 1120*7cf9345cSTaylor Simpson{ 1121*7cf9345cSTaylor Simpson fHIDE(int i; unsigned int y;) 1122*7cf9345cSTaylor Simpson fHIDE(unsigned long long x; unsigned long long prod;) 1123*7cf9345cSTaylor Simpson x = fGETUWORD(0, RsV); 1124*7cf9345cSTaylor Simpson y = fGETUWORD(0, RtV); 1125*7cf9345cSTaylor Simpson 1126*7cf9345cSTaylor Simpson prod = 0; 1127*7cf9345cSTaylor Simpson for(i=0; i < 32; i++) { 1128*7cf9345cSTaylor Simpson if((y >> i) & 1) prod ^= (x << i); 1129*7cf9345cSTaylor Simpson } 1130*7cf9345cSTaylor Simpson RxxV ^= prod; 1131*7cf9345cSTaylor Simpson}) 1132*7cf9345cSTaylor Simpson 1133*7cf9345cSTaylor SimpsonQ6INSN(M4_vpmpyh_acc,"Rxx32^=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)", 1134*7cf9345cSTaylor Simpson{ 1135*7cf9345cSTaylor Simpson fHIDE(int i; unsigned int x0; unsigned int x1;) 1136*7cf9345cSTaylor Simpson fHIDE(unsigned int y0; unsigned int y1;) 1137*7cf9345cSTaylor Simpson fHIDE(unsigned int prod0; unsigned int prod1;) 1138*7cf9345cSTaylor Simpson 1139*7cf9345cSTaylor Simpson x0 = fGETUHALF(0, RsV); 1140*7cf9345cSTaylor Simpson x1 = fGETUHALF(1, RsV); 1141*7cf9345cSTaylor Simpson y0 = fGETUHALF(0, RtV); 1142*7cf9345cSTaylor Simpson y1 = fGETUHALF(1, RtV); 1143*7cf9345cSTaylor Simpson 1144*7cf9345cSTaylor Simpson prod0 = prod1 = 0; 1145*7cf9345cSTaylor Simpson for(i=0; i < 16; i++) { 1146*7cf9345cSTaylor Simpson if((y0 >> i) & 1) prod0 ^= (x0 << i); 1147*7cf9345cSTaylor Simpson if((y1 >> i) & 1) prod1 ^= (x1 << i); 1148*7cf9345cSTaylor Simpson } 1149*7cf9345cSTaylor Simpson fSETHALF(0,RxxV,fGETUHALF(0,RxxV) ^ fGETUHALF(0,prod0)); 1150*7cf9345cSTaylor Simpson fSETHALF(1,RxxV,fGETUHALF(1,RxxV) ^ fGETUHALF(0,prod1)); 1151*7cf9345cSTaylor Simpson fSETHALF(2,RxxV,fGETUHALF(2,RxxV) ^ fGETUHALF(1,prod0)); 1152*7cf9345cSTaylor Simpson fSETHALF(3,RxxV,fGETUHALF(3,RxxV) ^ fGETUHALF(1,prod1)); 1153*7cf9345cSTaylor Simpson}) 1154*7cf9345cSTaylor Simpson 1155*7cf9345cSTaylor Simpson 1156*7cf9345cSTaylor Simpson/* V70: TINY CORE */ 1157*7cf9345cSTaylor Simpson 1158*7cf9345cSTaylor Simpson#define CMPY64(TAG,NAME,DESC,OPERAND1,OP,W0,W1,W2,W3) \ 1159*7cf9345cSTaylor SimpsonQ6INSN(M7_##TAG,"Rdd32=" NAME "(Rss32," OPERAND1 ")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 64-bit " DESC, { RddV = (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})\ 1160*7cf9345cSTaylor SimpsonQ6INSN(M7_##TAG##_acc,"Rxx32+=" NAME "(Rss32,"OPERAND1")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply-Accumulate 64-bit " DESC, { RxxV += (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));}) 1161*7cf9345cSTaylor Simpson 1162*7cf9345cSTaylor SimpsonCMPY64(dcmpyrw, "cmpyrw","Real","Rtt32" ,-,0,0,1,1) 1163*7cf9345cSTaylor SimpsonCMPY64(dcmpyrwc,"cmpyrw","Real","Rtt32*",+,0,0,1,1) 1164*7cf9345cSTaylor SimpsonCMPY64(dcmpyiw, "cmpyiw","Imag","Rtt32" ,+,0,1,1,0) 1165*7cf9345cSTaylor SimpsonCMPY64(dcmpyiwc,"cmpyiw","Imag","Rtt32*",-,1,0,0,1) 1166*7cf9345cSTaylor Simpson 1167*7cf9345cSTaylor Simpson#define CMPY128(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \ 1168*7cf9345cSTaylor SimpsonQ6INSN(M7_##TAG,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real", \ 1169*7cf9345cSTaylor Simpson{ \ 1170*7cf9345cSTaylor SimpsonfHIDE(size16s_t acc128;)\ 1171*7cf9345cSTaylor SimpsonfHIDE(size16s_t tmp128;)\ 1172*7cf9345cSTaylor SimpsonfHIDE(size8s_t acc64;)\ 1173*7cf9345cSTaylor Simpsontmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\ 1174*7cf9345cSTaylor Simpsonacc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\ 1175*7cf9345cSTaylor Simpsonacc128 = OP(tmp128,acc128);\ 1176*7cf9345cSTaylor Simpsonacc128 = fSHIFTR128(acc128, 31);\ 1177*7cf9345cSTaylor Simpsonacc64 = fCAST16S_8S(acc128);\ 1178*7cf9345cSTaylor SimpsonRdV = fSATW(acc64);\ 1179*7cf9345cSTaylor Simpson}) 1180*7cf9345cSTaylor Simpson 1181*7cf9345cSTaylor Simpson 1182*7cf9345cSTaylor SimpsonCMPY128(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128) 1183*7cf9345cSTaylor SimpsonCMPY128(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128) 1184*7cf9345cSTaylor SimpsonCMPY128(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128) 1185*7cf9345cSTaylor SimpsonCMPY128(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128) 1186*7cf9345cSTaylor Simpson 1187*7cf9345cSTaylor Simpson 1188*7cf9345cSTaylor Simpson#define CMPY128RND(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \ 1189*7cf9345cSTaylor SimpsonQ6INSN(M7_##TAG##_rnd,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:rnd:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real", \ 1190*7cf9345cSTaylor Simpson{ \ 1191*7cf9345cSTaylor SimpsonfHIDE(size16s_t acc128;)\ 1192*7cf9345cSTaylor SimpsonfHIDE(size16s_t tmp128;)\ 1193*7cf9345cSTaylor SimpsonfHIDE(size16s_t const128;)\ 1194*7cf9345cSTaylor SimpsonfHIDE(size8s_t acc64;)\ 1195*7cf9345cSTaylor Simpsontmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\ 1196*7cf9345cSTaylor Simpsonacc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\ 1197*7cf9345cSTaylor Simpsonconst128 = fCAST8S_16S(fCONSTLL(0x40000000));\ 1198*7cf9345cSTaylor Simpsonacc128 = OP(tmp128,acc128);\ 1199*7cf9345cSTaylor Simpsonacc128 = fADD128(acc128,const128);\ 1200*7cf9345cSTaylor Simpsonacc128 = fSHIFTR128(acc128, 31);\ 1201*7cf9345cSTaylor Simpsonacc64 = fCAST16S_8S(acc128);\ 1202*7cf9345cSTaylor SimpsonRdV = fSATW(acc64);\ 1203*7cf9345cSTaylor Simpson}) 1204*7cf9345cSTaylor Simpson 1205*7cf9345cSTaylor SimpsonCMPY128RND(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128) 1206*7cf9345cSTaylor SimpsonCMPY128RND(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128) 1207*7cf9345cSTaylor SimpsonCMPY128RND(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128) 1208*7cf9345cSTaylor SimpsonCMPY128RND(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128) 1209