xref: /qemu/target/hexagon/imported/mpy.idef (revision 7cf9345c)
1*7cf9345cSTaylor Simpson/*
2*7cf9345cSTaylor Simpson *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3*7cf9345cSTaylor Simpson *
4*7cf9345cSTaylor Simpson *  This program is free software; you can redistribute it and/or modify
5*7cf9345cSTaylor Simpson *  it under the terms of the GNU General Public License as published by
6*7cf9345cSTaylor Simpson *  the Free Software Foundation; either version 2 of the License, or
7*7cf9345cSTaylor Simpson *  (at your option) any later version.
8*7cf9345cSTaylor Simpson *
9*7cf9345cSTaylor Simpson *  This program is distributed in the hope that it will be useful,
10*7cf9345cSTaylor Simpson *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11*7cf9345cSTaylor Simpson *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12*7cf9345cSTaylor Simpson *  GNU General Public License for more details.
13*7cf9345cSTaylor Simpson *
14*7cf9345cSTaylor Simpson *  You should have received a copy of the GNU General Public License
15*7cf9345cSTaylor Simpson *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16*7cf9345cSTaylor Simpson */
17*7cf9345cSTaylor Simpson
18*7cf9345cSTaylor Simpson/*
19*7cf9345cSTaylor Simpson * Multiply Instructions
20*7cf9345cSTaylor Simpson */
21*7cf9345cSTaylor Simpson
22*7cf9345cSTaylor Simpson
23*7cf9345cSTaylor Simpson#define STD_SP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\
24*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETHALF(1,RsV),fGETHALF(1,RtV))));})\
25*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(1,RtV)))));})\
26*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETHALF(1,RsV),fGETHALF(0,RtV))));})\
27*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(1,RsV),fGETHALF(0,RtV)))));})\
28*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETHALF(0,RsV),fGETHALF(1,RtV))));})\
29*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(1,RtV)))));})\
30*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETHALF(0,RsV),fGETHALF(0,RtV))));})\
31*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETHALF(0,RsV),fGETHALF(0,RtV)))));})
32*7cf9345cSTaylor Simpson
33*7cf9345cSTaylor Simpson/*****************************************************/
34*7cf9345cSTaylor Simpson/* multiply 16x16->32 signed instructions            */
35*7cf9345cSTaylor Simpson/*****************************************************/
36*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_acc,    "Rx32+=mpy", ,RxV,RxV+    ,fMPY16SS,          ,fPASS,fPASS)
37*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_nac,    "Rx32-=mpy", ,RxV,RxV-    ,fMPY16SS,          ,fPASS,fPASS)
38*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_acc_sat,"Rx32+=mpy", ,RxV,RxV+    ,fMPY16SS,":sat"    ,fSAT, fPASS)
39*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_nac_sat,"Rx32-=mpy", ,RxV,RxV-    ,fMPY16SS,":sat"    ,fSAT, fPASS)
40*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy,        "Rd32=mpy",  ,RdV,        ,fMPY16SS,          ,fPASS,fPASS)
41*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_sat,    "Rd32=mpy",  ,RdV,        ,fMPY16SS,":sat"    ,fSAT, fPASS)
42*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_rnd,    "Rd32=mpy",  ,RdV,        ,fMPY16SS,":rnd"    ,fPASS,fROUND)
43*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpy_sat_rnd,"Rd32=mpy",  ,RdV,        ,fMPY16SS,":rnd:sat",fSAT, fROUND)
44*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpyd_acc,   "Rxx32+=mpy",,RxxV,RxxV+  ,fMPY16SS,          ,fPASS,fPASS)
45*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpyd_nac,   "Rxx32-=mpy",,RxxV,RxxV-  ,fMPY16SS,          ,fPASS,fPASS)
46*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpyd,       "Rdd32=mpy", ,RddV,       ,fMPY16SS,          ,fPASS,fPASS)
47*7cf9345cSTaylor SimpsonSTD_SP_MODES(mpyd_rnd,   "Rdd32=mpy", ,RddV,       ,fMPY16SS,":rnd"    ,fPASS,fROUND)
48*7cf9345cSTaylor Simpson
49*7cf9345cSTaylor Simpson
50*7cf9345cSTaylor Simpson/*****************************************************/
51*7cf9345cSTaylor Simpson/* multiply 16x16->32 unsigned instructions          */
52*7cf9345cSTaylor Simpson/*****************************************************/
53*7cf9345cSTaylor Simpson#define STD_USP_MODES(TAG,OPER,ATR,DST,ACCSEM,SEM,OSEM,SATSEM,RNDSEM)\
54*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hh_s0, OPER"(Rs.H32,Rt.H32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETUHALF(1,RsV),fGETUHALF(1,RtV))));})\
55*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hh_s1, OPER"(Rs.H32,Rt.H32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(1,RtV)))));})\
56*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hl_s0, OPER"(Rs.H32,Rt.L32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETUHALF(1,RsV),fGETUHALF(0,RtV))));})\
57*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_hl_s1, OPER"(Rs.H32,Rt.L32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(1,RsV),fGETUHALF(0,RtV)))));})\
58*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_lh_s0, OPER"(Rs.L32,Rt.H32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETUHALF(0,RsV),fGETUHALF(1,RtV))));})\
59*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_lh_s1, OPER"(Rs.L32,Rt.H32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(1,RtV)))));})\
60*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_ll_s0, OPER"(Rs.L32,Rt.L32)"OSEM,        ATR,"",{DST=SATSEM(RNDSEM(ACCSEM SEM(         fGETUHALF(0,RsV),fGETUHALF(0,RtV))));})\
61*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAG##_ll_s1, OPER"(Rs.L32,Rt.L32):<<1"OSEM,    ATR,"",{DST=SATSEM(RNDSEM(ACCSEM fSCALE(1,SEM(fGETUHALF(0,RsV),fGETUHALF(0,RtV)))));})
62*7cf9345cSTaylor Simpson
63*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyu_acc,    "Rx32+=mpyu", ,RxV,RxV+  ,fMPY16UU,          ,fPASS,fPASS)
64*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyu_nac,    "Rx32-=mpyu", ,RxV,RxV-  ,fMPY16UU,          ,fPASS,fPASS)
65*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyu,        "Rd32=mpyu",  ATTRIBS() ,RdV,  ,fMPY16UU, ,fPASS,fPASS)
66*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyud_acc,   "Rxx32+=mpyu",,RxxV,RxxV+,fMPY16UU,          ,fPASS,fPASS)
67*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyud_nac,   "Rxx32-=mpyu",,RxxV,RxxV-,fMPY16UU,          ,fPASS,fPASS)
68*7cf9345cSTaylor SimpsonSTD_USP_MODES(mpyud,       "Rdd32=mpyu", ATTRIBS() ,RddV, ,fMPY16UU, ,fPASS,fPASS)
69*7cf9345cSTaylor Simpson
70*7cf9345cSTaylor Simpson/**********************************************/
71*7cf9345cSTaylor Simpson/* mpy 16x#s8->32                             */
72*7cf9345cSTaylor Simpson/**********************************************/
73*7cf9345cSTaylor Simpson
74*7cf9345cSTaylor SimpsonQ6INSN(M2_mpysip,"Rd32=+mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
75*7cf9345cSTaylor Simpson"32-bit Multiply by unsigned immediate",
76*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RdV=RsV*uiV; })
77*7cf9345cSTaylor Simpson
78*7cf9345cSTaylor SimpsonQ6INSN(M2_mpysin,"Rd32=-mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
79*7cf9345cSTaylor Simpson"32-bit Multiply by unsigned immediate, negate result",
80*7cf9345cSTaylor Simpson{ RdV=RsV*-uiV; })
81*7cf9345cSTaylor Simpson
82*7cf9345cSTaylor SimpsonQ6INSN(M2_macsip,"Rx32+=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
83*7cf9345cSTaylor Simpson"32-bit Multiply-Add by unsigned immediate",
84*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RxV=RxV + (RsV*uiV);})
85*7cf9345cSTaylor Simpson
86*7cf9345cSTaylor SimpsonQ6INSN(M2_macsin,"Rx32-=mpyi(Rs32,#u8)",ATTRIBS(A_ARCHV2),
87*7cf9345cSTaylor Simpson"32-bit Multiply-Subtract by unsigned immediate",
88*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RxV=RxV - (RsV*uiV);})
89*7cf9345cSTaylor Simpson
90*7cf9345cSTaylor Simpson
91*7cf9345cSTaylor Simpson/**********************************************/
92*7cf9345cSTaylor Simpson/* multiply/mac  32x32->64 instructions       */
93*7cf9345cSTaylor Simpson/**********************************************/
94*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyss_s0,    "Rdd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32SS(RsV,RtV);})
95*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyss_acc_s0,"Rxx32+=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32SS(RsV,RtV);})
96*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyss_nac_s0,"Rxx32-=mpy(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32SS(RsV,RtV);})
97*7cf9345cSTaylor Simpson
98*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyuu_s0,    "Rdd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RddV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
99*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyuu_acc_s0,"Rxx32+=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV + fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
100*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyuu_nac_s0,"Rxx32-=mpyu(Rs32,Rt32)",ATTRIBS(),"Multiply 32x32",{RxxV= RxxV - fMPY32UU(fCAST4u(RsV),fCAST4u(RtV));})
101*7cf9345cSTaylor Simpson
102*7cf9345cSTaylor Simpson
103*7cf9345cSTaylor Simpson/******************************************************/
104*7cf9345cSTaylor Simpson/* multiply/mac  32x32->32 (upper) instructions       */
105*7cf9345cSTaylor Simpson/******************************************************/
106*7cf9345cSTaylor SimpsonQ6INSN(M2_mpy_up,        "Rd32=mpy(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>32;})
107*7cf9345cSTaylor SimpsonQ6INSN(M2_mpy_up_s1,     "Rd32=mpy(Rs32,Rt32):<<1", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SS(RsV,RtV)>>31;})
108*7cf9345cSTaylor SimpsonQ6INSN(M2_mpy_up_s1_sat, "Rd32=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RdV=fSAT(fMPY32SS(RsV,RtV)>>31);})
109*7cf9345cSTaylor SimpsonQ6INSN(M2_mpyu_up,       "Rd32=mpyu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32UU(fCAST4u(RsV),fCAST4u(RtV))>>32;})
110*7cf9345cSTaylor SimpsonQ6INSN(M2_mpysu_up,      "Rd32=mpysu(Rs32,Rt32)", ATTRIBS(),"Multiply 32x32",{RdV=fMPY32SU(RsV,fCAST4u(RtV))>>32;})
111*7cf9345cSTaylor SimpsonQ6INSN(M2_dpmpyss_rnd_s0,"Rd32=mpy(Rs32,Rt32):rnd", ATTRIBS(),"Multiply 32x32",{RdV=(fMPY32SS(RsV,RtV)+fCONSTLL(0x80000000))>>32;})
112*7cf9345cSTaylor Simpson
113*7cf9345cSTaylor SimpsonQ6INSN(M4_mac_up_s1_sat, "Rx32+=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT(  (fSE32_64(RxV)) + (fMPY32SS(RsV,RtV)>>31));})
114*7cf9345cSTaylor SimpsonQ6INSN(M4_nac_up_s1_sat, "Rx32-=mpy(Rs32,Rt32):<<1:sat", ATTRIBS(),"Multiply 32x32",{RxV=fSAT(  (fSE32_64(RxV)) - (fMPY32SS(RsV,RtV)>>31));})
115*7cf9345cSTaylor Simpson
116*7cf9345cSTaylor Simpson
117*7cf9345cSTaylor Simpson/**********************************************/
118*7cf9345cSTaylor Simpson/* 32x32->32 multiply (lower)                 */
119*7cf9345cSTaylor Simpson/**********************************************/
120*7cf9345cSTaylor Simpson
121*7cf9345cSTaylor SimpsonQ6INSN(M2_mpyi,"Rd32=mpyi(Rs32,Rt32)",ATTRIBS(),
122*7cf9345cSTaylor Simpson"Multiply Integer",
123*7cf9345cSTaylor Simpson{ RdV=RsV*RtV;})
124*7cf9345cSTaylor Simpson
125*7cf9345cSTaylor SimpsonQ6INSN(M2_maci,"Rx32+=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
126*7cf9345cSTaylor Simpson"Multiply-Accumulate Integer",
127*7cf9345cSTaylor Simpson{ RxV=RxV + RsV*RtV;})
128*7cf9345cSTaylor Simpson
129*7cf9345cSTaylor SimpsonQ6INSN(M2_mnaci,"Rx32-=mpyi(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
130*7cf9345cSTaylor Simpson"Multiply-Neg-Accumulate Integer",
131*7cf9345cSTaylor Simpson{ RxV=RxV - RsV*RtV;})
132*7cf9345cSTaylor Simpson
133*7cf9345cSTaylor Simpson/****** WHY ARE THESE IN MPY.IDEF? **********/
134*7cf9345cSTaylor Simpson
135*7cf9345cSTaylor SimpsonQ6INSN(M2_acci,"Rx32+=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
136*7cf9345cSTaylor Simpson"Add with accumulate",
137*7cf9345cSTaylor Simpson{ RxV=RxV + RsV + RtV;})
138*7cf9345cSTaylor Simpson
139*7cf9345cSTaylor SimpsonQ6INSN(M2_accii,"Rx32+=add(Rs32,#s8)",ATTRIBS(A_ARCHV2),
140*7cf9345cSTaylor Simpson"Add with accumulate",
141*7cf9345cSTaylor Simpson{ fIMMEXT(siV); RxV=RxV + RsV + siV;})
142*7cf9345cSTaylor Simpson
143*7cf9345cSTaylor SimpsonQ6INSN(M2_nacci,"Rx32-=add(Rs32,Rt32)",ATTRIBS(A_ARCHV2),
144*7cf9345cSTaylor Simpson"Add with neg accumulate",
145*7cf9345cSTaylor Simpson{ RxV=RxV - (RsV + RtV);})
146*7cf9345cSTaylor Simpson
147*7cf9345cSTaylor SimpsonQ6INSN(M2_naccii,"Rx32-=add(Rs32,#s8)",ATTRIBS(A_ARCHV2),
148*7cf9345cSTaylor Simpson"Add with neg accumulate",
149*7cf9345cSTaylor Simpson{ fIMMEXT(siV); RxV=RxV - (RsV + siV);})
150*7cf9345cSTaylor Simpson
151*7cf9345cSTaylor SimpsonQ6INSN(M2_subacc,"Rx32+=sub(Rt32,Rs32)",ATTRIBS(A_ARCHV2),
152*7cf9345cSTaylor Simpson"Sub with accumulate",
153*7cf9345cSTaylor Simpson{ RxV=RxV + RtV - RsV;})
154*7cf9345cSTaylor Simpson
155*7cf9345cSTaylor Simpson
156*7cf9345cSTaylor Simpson
157*7cf9345cSTaylor Simpson
158*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyrr_addr,"Ry32=add(Ru32,mpyi(Ry32,Rs32))",ATTRIBS(),
159*7cf9345cSTaylor Simpson"Mpy by immed and add immed",
160*7cf9345cSTaylor Simpson{ RyV = RuV + RsV*RyV;})
161*7cf9345cSTaylor Simpson
162*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyri_addr_u2,"Rd32=add(Ru32,mpyi(#u6:2,Rs32))",ATTRIBS(),
163*7cf9345cSTaylor Simpson"Mpy by immed and add immed",
164*7cf9345cSTaylor Simpson{ RdV = RuV + RsV*uiV;})
165*7cf9345cSTaylor Simpson
166*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyri_addr,"Rd32=add(Ru32,mpyi(Rs32,#u6))",ATTRIBS(),
167*7cf9345cSTaylor Simpson"Mpy by immed and add immed",
168*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RdV = RuV + RsV*uiV;})
169*7cf9345cSTaylor Simpson
170*7cf9345cSTaylor Simpson
171*7cf9345cSTaylor Simpson
172*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyri_addi,"Rd32=add(#u6,mpyi(Rs32,#U6))",ATTRIBS(),
173*7cf9345cSTaylor Simpson"Mpy by immed and add immed",
174*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RdV = uiV + RsV*UiV;})
175*7cf9345cSTaylor Simpson
176*7cf9345cSTaylor Simpson
177*7cf9345cSTaylor Simpson
178*7cf9345cSTaylor SimpsonQ6INSN(M4_mpyrr_addi,"Rd32=add(#u6,mpyi(Rs32,Rt32))",ATTRIBS(),
179*7cf9345cSTaylor Simpson"Mpy by immed and add immed",
180*7cf9345cSTaylor Simpson{ fIMMEXT(uiV); RdV = uiV + RsV*RtV;})
181*7cf9345cSTaylor Simpson
182*7cf9345cSTaylor Simpson
183*7cf9345cSTaylor Simpson
184*7cf9345cSTaylor Simpson
185*7cf9345cSTaylor Simpson
186*7cf9345cSTaylor Simpson
187*7cf9345cSTaylor Simpson
188*7cf9345cSTaylor Simpson
189*7cf9345cSTaylor Simpson
190*7cf9345cSTaylor Simpson
191*7cf9345cSTaylor Simpson
192*7cf9345cSTaylor Simpson
193*7cf9345cSTaylor Simpson
194*7cf9345cSTaylor Simpson
195*7cf9345cSTaylor Simpson
196*7cf9345cSTaylor Simpson
197*7cf9345cSTaylor Simpson
198*7cf9345cSTaylor Simpson/**********************************************/
199*7cf9345cSTaylor Simpson/* vector mac  2x[16x16 -> 32]                */
200*7cf9345cSTaylor Simpson/**********************************************/
201*7cf9345cSTaylor Simpson
202*7cf9345cSTaylor Simpson#undef vmac_sema
203*7cf9345cSTaylor Simpson#define vmac_sema(N)\
204*7cf9345cSTaylor Simpson{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\
205*7cf9345cSTaylor Simpson  fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
206*7cf9345cSTaylor Simpson}
207*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2s_s0,"Rdd32=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
208*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2s_s1,"Rdd32=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
209*7cf9345cSTaylor Simpson
210*7cf9345cSTaylor Simpson
211*7cf9345cSTaylor Simpson#undef vmac_sema
212*7cf9345cSTaylor Simpson#define vmac_sema(N)\
213*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)))));\
214*7cf9345cSTaylor Simpson  fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
215*7cf9345cSTaylor Simpson}
216*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2s_s0,"Rxx32+=vmpyh(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
217*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2s_s1,"Rxx32+=vmpyh(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
218*7cf9345cSTaylor Simpson
219*7cf9345cSTaylor Simpson#undef vmac_sema
220*7cf9345cSTaylor Simpson#define vmac_sema(N)\
221*7cf9345cSTaylor Simpson{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\
222*7cf9345cSTaylor Simpson  fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\
223*7cf9345cSTaylor Simpson}
224*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2su_s0,"Rdd32=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
225*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2su_s1,"Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
226*7cf9345cSTaylor Simpson
227*7cf9345cSTaylor Simpson
228*7cf9345cSTaylor Simpson#undef vmac_sema
229*7cf9345cSTaylor Simpson#define vmac_sema(N)\
230*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(0,RsV),fGETUHALF(0,RtV)))));\
231*7cf9345cSTaylor Simpson  fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SU(fGETHALF(1,RsV),fGETUHALF(1,RtV)))));\
232*7cf9345cSTaylor Simpson}
233*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2su_s0,"Rxx32+=vmpyhsu(Rs32,Rt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
234*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2su_s1,"Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
235*7cf9345cSTaylor Simpson
236*7cf9345cSTaylor Simpson
237*7cf9345cSTaylor Simpson
238*7cf9345cSTaylor Simpson#undef vmac_sema
239*7cf9345cSTaylor Simpson#define vmac_sema(N)\
240*7cf9345cSTaylor Simpson{ fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\
241*7cf9345cSTaylor Simpson  fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) + 0x8000))));\
242*7cf9345cSTaylor Simpson}
243*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2s_s0pack,"Rd32=vmpyh(Rs32,Rt32):rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
244*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2s_s1pack,"Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(1))
245*7cf9345cSTaylor Simpson
246*7cf9345cSTaylor Simpson
247*7cf9345cSTaylor Simpson#undef vmac_sema
248*7cf9345cSTaylor Simpson#define vmac_sema(N)\
249*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)));\
250*7cf9345cSTaylor Simpson  fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)));\
251*7cf9345cSTaylor Simpson}
252*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2,"Rxx32+=vmpyh(Rs32,Rt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
253*7cf9345cSTaylor Simpson
254*7cf9345cSTaylor Simpson#undef vmac_sema
255*7cf9345cSTaylor Simpson#define vmac_sema(N)\
256*7cf9345cSTaylor Simpson{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\
257*7cf9345cSTaylor Simpson  fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\
258*7cf9345cSTaylor Simpson}
259*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2es_s0,"Rdd32=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
260*7cf9345cSTaylor SimpsonQ6INSN(M2_vmpy2es_s1,"Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
261*7cf9345cSTaylor Simpson
262*7cf9345cSTaylor Simpson#undef vmac_sema
263*7cf9345cSTaylor Simpson#define vmac_sema(N)\
264*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)))));\
265*7cf9345cSTaylor Simpson  fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)))));\
266*7cf9345cSTaylor Simpson}
267*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2es_s0,"Rxx32+=vmpyeh(Rss32,Rtt32):sat",ATTRIBS(),"Vector Multiply",vmac_sema(0))
268*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2es_s1,"Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Vector Multiply",vmac_sema(1))
269*7cf9345cSTaylor Simpson
270*7cf9345cSTaylor Simpson#undef vmac_sema
271*7cf9345cSTaylor Simpson#define vmac_sema(N)\
272*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fGETWORD(0,RxxV) + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)));\
273*7cf9345cSTaylor Simpson  fSETWORD(1,RxxV,fGETWORD(1,RxxV) + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)));\
274*7cf9345cSTaylor Simpson}
275*7cf9345cSTaylor SimpsonQ6INSN(M2_vmac2es,"Rxx32+=vmpyeh(Rss32,Rtt32)",ATTRIBS(A_ARCHV2),"Vector Multiply",vmac_sema(0))
276*7cf9345cSTaylor Simpson
277*7cf9345cSTaylor Simpson
278*7cf9345cSTaylor Simpson
279*7cf9345cSTaylor Simpson
280*7cf9345cSTaylor Simpson/********************************************************/
281*7cf9345cSTaylor Simpson/* vrmpyh, aka Big Mac, aka Mac Daddy, aka Mac-ac-ac-ac */
282*7cf9345cSTaylor Simpson/* vector mac  4x[16x16] + 64 ->64                      */
283*7cf9345cSTaylor Simpson/********************************************************/
284*7cf9345cSTaylor Simpson
285*7cf9345cSTaylor Simpson
286*7cf9345cSTaylor Simpson#undef vmac_sema
287*7cf9345cSTaylor Simpson#define vmac_sema(N)\
288*7cf9345cSTaylor Simpson{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\
289*7cf9345cSTaylor Simpson              + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\
290*7cf9345cSTaylor Simpson              + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\
291*7cf9345cSTaylor Simpson              + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
292*7cf9345cSTaylor Simpson}
293*7cf9345cSTaylor SimpsonQ6INSN(M2_vrmac_s0,"Rxx32+=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0))
294*7cf9345cSTaylor Simpson
295*7cf9345cSTaylor Simpson#undef vmac_sema
296*7cf9345cSTaylor Simpson#define vmac_sema(N)\
297*7cf9345cSTaylor Simpson{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))\
298*7cf9345cSTaylor Simpson       + fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))\
299*7cf9345cSTaylor Simpson       + fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))\
300*7cf9345cSTaylor Simpson       + fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
301*7cf9345cSTaylor Simpson}
302*7cf9345cSTaylor SimpsonQ6INSN(M2_vrmpy_s0,"Rdd32=vrmpyh(Rss32,Rtt32)",ATTRIBS(),"Vector Multiply",vmac_sema(0))
303*7cf9345cSTaylor Simpson
304*7cf9345cSTaylor Simpson
305*7cf9345cSTaylor Simpson
306*7cf9345cSTaylor Simpson/******************************************************/
307*7cf9345cSTaylor Simpson/* vector dual macs. just like complex                */
308*7cf9345cSTaylor Simpson/******************************************************/
309*7cf9345cSTaylor Simpson
310*7cf9345cSTaylor Simpson
311*7cf9345cSTaylor Simpson/* With round&pack */
312*7cf9345cSTaylor Simpson#undef dmpy_sema
313*7cf9345cSTaylor Simpson#define dmpy_sema(N)\
314*7cf9345cSTaylor Simpson{ fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
315*7cf9345cSTaylor Simpson                                  fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))) + 0x8000))));\
316*7cf9345cSTaylor Simpson  fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
317*7cf9345cSTaylor Simpson                                  fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))) + 0x8000))));\
318*7cf9345cSTaylor Simpson}
319*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmpyrs_s0,"Rd32=vdmpy(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "vector dual mac w/ round&pack",dmpy_sema(0))
320*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmpyrs_s1,"Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"vector dual mac w/ round&pack",dmpy_sema(1))
321*7cf9345cSTaylor Simpson
322*7cf9345cSTaylor Simpson
323*7cf9345cSTaylor Simpson
324*7cf9345cSTaylor Simpson
325*7cf9345cSTaylor Simpson
326*7cf9345cSTaylor Simpson/******************************************************/
327*7cf9345cSTaylor Simpson/* vector byte multiplies                             */
328*7cf9345cSTaylor Simpson/******************************************************/
329*7cf9345cSTaylor Simpson
330*7cf9345cSTaylor Simpson
331*7cf9345cSTaylor SimpsonQ6INSN(M5_vrmpybuu,"Rdd32=vrmpybu(Rss32,Rtt32)",ATTRIBS(),
332*7cf9345cSTaylor Simpson "vector dual mpy bytes",
333*7cf9345cSTaylor Simpson{
334*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,(fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) +
335*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) +
336*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) +
337*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV))));
338*7cf9345cSTaylor Simpson  fSETWORD(1,RddV,(fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) +
339*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) +
340*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) +
341*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV))));
342*7cf9345cSTaylor Simpson })
343*7cf9345cSTaylor Simpson
344*7cf9345cSTaylor SimpsonQ6INSN(M5_vrmacbuu,"Rxx32+=vrmpybu(Rss32,Rtt32)",ATTRIBS(),
345*7cf9345cSTaylor Simpson "vector dual mac bytes",
346*7cf9345cSTaylor Simpson{
347*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,(fGETWORD(0,RxxV) +
348*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(0,RssV),fGETUBYTE(0,RttV)) +
349*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(1,RssV),fGETUBYTE(1,RttV)) +
350*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(2,RssV),fGETUBYTE(2,RttV)) +
351*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(3,RssV),fGETUBYTE(3,RttV))));
352*7cf9345cSTaylor Simpson  fSETWORD(1,RxxV,(fGETWORD(1,RxxV) +
353*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(4,RssV),fGETUBYTE(4,RttV)) +
354*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(5,RssV),fGETUBYTE(5,RttV)) +
355*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(6,RssV),fGETUBYTE(6,RttV)) +
356*7cf9345cSTaylor Simpson                   fMPY16SS(fGETUBYTE(7,RssV),fGETUBYTE(7,RttV))));
357*7cf9345cSTaylor Simpson })
358*7cf9345cSTaylor Simpson
359*7cf9345cSTaylor Simpson
360*7cf9345cSTaylor SimpsonQ6INSN(M5_vrmpybsu,"Rdd32=vrmpybsu(Rss32,Rtt32)",ATTRIBS(),
361*7cf9345cSTaylor Simpson "vector dual mpy bytes",
362*7cf9345cSTaylor Simpson{
363*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
364*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) +
365*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
366*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))));
367*7cf9345cSTaylor Simpson  fSETWORD(1,RddV,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
368*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) +
369*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
370*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))));
371*7cf9345cSTaylor Simpson })
372*7cf9345cSTaylor Simpson
373*7cf9345cSTaylor SimpsonQ6INSN(M5_vrmacbsu,"Rxx32+=vrmpybsu(Rss32,Rtt32)",ATTRIBS(),
374*7cf9345cSTaylor Simpson "vector dual mac bytes",
375*7cf9345cSTaylor Simpson{
376*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,(fGETWORD(0,RxxV) +
377*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
378*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)) +
379*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
380*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV))));
381*7cf9345cSTaylor Simpson  fSETWORD(1,RxxV,(fGETWORD(1,RxxV) +
382*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
383*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)) +
384*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
385*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV))));
386*7cf9345cSTaylor Simpson })
387*7cf9345cSTaylor Simpson
388*7cf9345cSTaylor Simpson
389*7cf9345cSTaylor SimpsonQ6INSN(M5_vmpybuu,"Rdd32=vmpybu(Rs32,Rt32)",ATTRIBS(),
390*7cf9345cSTaylor Simpson "vector mpy bytes",
391*7cf9345cSTaylor Simpson{
392*7cf9345cSTaylor Simpson  fSETHALF(0,RddV,(fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV))));
393*7cf9345cSTaylor Simpson  fSETHALF(1,RddV,(fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV))));
394*7cf9345cSTaylor Simpson  fSETHALF(2,RddV,(fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV))));
395*7cf9345cSTaylor Simpson  fSETHALF(3,RddV,(fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV))));
396*7cf9345cSTaylor Simpson })
397*7cf9345cSTaylor Simpson
398*7cf9345cSTaylor SimpsonQ6INSN(M5_vmpybsu,"Rdd32=vmpybsu(Rs32,Rt32)",ATTRIBS(),
399*7cf9345cSTaylor Simpson "vector mpy bytes",
400*7cf9345cSTaylor Simpson{
401*7cf9345cSTaylor Simpson  fSETHALF(0,RddV,(fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV))));
402*7cf9345cSTaylor Simpson  fSETHALF(1,RddV,(fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV))));
403*7cf9345cSTaylor Simpson  fSETHALF(2,RddV,(fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV))));
404*7cf9345cSTaylor Simpson  fSETHALF(3,RddV,(fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV))));
405*7cf9345cSTaylor Simpson })
406*7cf9345cSTaylor Simpson
407*7cf9345cSTaylor Simpson
408*7cf9345cSTaylor SimpsonQ6INSN(M5_vmacbuu,"Rxx32+=vmpybu(Rs32,Rt32)",ATTRIBS(),
409*7cf9345cSTaylor Simpson "vector mac bytes",
410*7cf9345cSTaylor Simpson{
411*7cf9345cSTaylor Simpson  fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETUBYTE(0,RsV),fGETUBYTE(0,RtV))));
412*7cf9345cSTaylor Simpson  fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETUBYTE(1,RsV),fGETUBYTE(1,RtV))));
413*7cf9345cSTaylor Simpson  fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETUBYTE(2,RsV),fGETUBYTE(2,RtV))));
414*7cf9345cSTaylor Simpson  fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETUBYTE(3,RsV),fGETUBYTE(3,RtV))));
415*7cf9345cSTaylor Simpson })
416*7cf9345cSTaylor Simpson
417*7cf9345cSTaylor SimpsonQ6INSN(M5_vmacbsu,"Rxx32+=vmpybsu(Rs32,Rt32)",ATTRIBS(),
418*7cf9345cSTaylor Simpson "vector mac bytes",
419*7cf9345cSTaylor Simpson{
420*7cf9345cSTaylor Simpson  fSETHALF(0,RxxV,(fGETHALF(0,RxxV)+fMPY16SS(fGETBYTE(0,RsV),fGETUBYTE(0,RtV))));
421*7cf9345cSTaylor Simpson  fSETHALF(1,RxxV,(fGETHALF(1,RxxV)+fMPY16SS(fGETBYTE(1,RsV),fGETUBYTE(1,RtV))));
422*7cf9345cSTaylor Simpson  fSETHALF(2,RxxV,(fGETHALF(2,RxxV)+fMPY16SS(fGETBYTE(2,RsV),fGETUBYTE(2,RtV))));
423*7cf9345cSTaylor Simpson  fSETHALF(3,RxxV,(fGETHALF(3,RxxV)+fMPY16SS(fGETBYTE(3,RsV),fGETUBYTE(3,RtV))));
424*7cf9345cSTaylor Simpson })
425*7cf9345cSTaylor Simpson
426*7cf9345cSTaylor Simpson
427*7cf9345cSTaylor Simpson
428*7cf9345cSTaylor SimpsonQ6INSN(M5_vdmpybsu,"Rdd32=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(),
429*7cf9345cSTaylor Simpson "vector quad mpy bytes",
430*7cf9345cSTaylor Simpson{
431*7cf9345cSTaylor Simpson  fSETHALF(0,RddV,fSATN(16,(fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
432*7cf9345cSTaylor Simpson                            fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)))));
433*7cf9345cSTaylor Simpson  fSETHALF(1,RddV,fSATN(16,(fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
434*7cf9345cSTaylor Simpson                            fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))));
435*7cf9345cSTaylor Simpson  fSETHALF(2,RddV,fSATN(16,(fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
436*7cf9345cSTaylor Simpson                            fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)))));
437*7cf9345cSTaylor Simpson  fSETHALF(3,RddV,fSATN(16,(fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
438*7cf9345cSTaylor Simpson                            fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))));
439*7cf9345cSTaylor Simpson })
440*7cf9345cSTaylor Simpson
441*7cf9345cSTaylor Simpson
442*7cf9345cSTaylor SimpsonQ6INSN(M5_vdmacbsu,"Rxx32+=vdmpybsu(Rss32,Rtt32):sat",ATTRIBS(),
443*7cf9345cSTaylor Simpson "vector quad mac bytes",
444*7cf9345cSTaylor Simpson{
445*7cf9345cSTaylor Simpson  fSETHALF(0,RxxV,fSATN(16,(fGETHALF(0,RxxV) +
446*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(0,RssV),fGETUBYTE(0,RttV)) +
447*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(1,RssV),fGETUBYTE(1,RttV)))));
448*7cf9345cSTaylor Simpson  fSETHALF(1,RxxV,fSATN(16,(fGETHALF(1,RxxV) +
449*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(2,RssV),fGETUBYTE(2,RttV)) +
450*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(3,RssV),fGETUBYTE(3,RttV)))));
451*7cf9345cSTaylor Simpson  fSETHALF(2,RxxV,fSATN(16,(fGETHALF(2,RxxV) +
452*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(4,RssV),fGETUBYTE(4,RttV)) +
453*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(5,RssV),fGETUBYTE(5,RttV)))));
454*7cf9345cSTaylor Simpson  fSETHALF(3,RxxV,fSATN(16,(fGETHALF(3,RxxV) +
455*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(6,RssV),fGETUBYTE(6,RttV)) +
456*7cf9345cSTaylor Simpson                   fMPY16SS(fGETBYTE(7,RssV),fGETUBYTE(7,RttV)))));
457*7cf9345cSTaylor Simpson })
458*7cf9345cSTaylor Simpson
459*7cf9345cSTaylor Simpson
460*7cf9345cSTaylor Simpson
461*7cf9345cSTaylor Simpson/* Full version */
462*7cf9345cSTaylor Simpson#undef dmpy_sema
463*7cf9345cSTaylor Simpson#define dmpy_sema(N)\
464*7cf9345cSTaylor Simpson{ fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
465*7cf9345cSTaylor Simpson                     fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\
466*7cf9345cSTaylor Simpson  fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
467*7cf9345cSTaylor Simpson                     fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\
468*7cf9345cSTaylor Simpson}
469*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmacs_s0,"Rxx32+=vdmpy(Rss32,Rtt32):sat",ATTRIBS(),    "",dmpy_sema(0))
470*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmacs_s1,"Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1))
471*7cf9345cSTaylor Simpson
472*7cf9345cSTaylor Simpson#undef dmpy_sema
473*7cf9345cSTaylor Simpson#define dmpy_sema(N)\
474*7cf9345cSTaylor Simpson{ fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV))) + \
475*7cf9345cSTaylor Simpson              fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)))));\
476*7cf9345cSTaylor Simpson  fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV))) + \
477*7cf9345cSTaylor Simpson              fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV)))));\
478*7cf9345cSTaylor Simpson}
479*7cf9345cSTaylor Simpson
480*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmpys_s0,"Rdd32=vdmpy(Rss32,Rtt32):sat",ATTRIBS(),    "",dmpy_sema(0))
481*7cf9345cSTaylor SimpsonQ6INSN(M2_vdmpys_s1,"Rdd32=vdmpy(Rss32,Rtt32):<<1:sat",ATTRIBS(),"",dmpy_sema(1))
482*7cf9345cSTaylor Simpson
483*7cf9345cSTaylor Simpson
484*7cf9345cSTaylor Simpson
485*7cf9345cSTaylor Simpson/******************************************************/
486*7cf9345cSTaylor Simpson/* complex multiply/mac with                          */
487*7cf9345cSTaylor Simpson/* real&imag are packed together and always saturated */
488*7cf9345cSTaylor Simpson/* to protect against overflow.                       */
489*7cf9345cSTaylor Simpson/******************************************************/
490*7cf9345cSTaylor Simpson
491*7cf9345cSTaylor Simpson#undef cmpy_sema
492*7cf9345cSTaylor Simpson#define cmpy_sema(N,CONJMINUS,CONJPLUS)\
493*7cf9345cSTaylor Simpson{ fSETHALF(1,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
494*7cf9345cSTaylor Simpson                                  fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))) + 0x8000))));\
495*7cf9345cSTaylor Simpson  fSETHALF(0,RdV,fGETHALF(1,(fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
496*7cf9345cSTaylor Simpson                                  fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))) + 0x8000))));\
497*7cf9345cSTaylor Simpson}
498*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyrs_s0,"Rd32=cmpy(Rs32,Rt32):rnd:sat",ATTRIBS(),    "Complex Multiply",cmpy_sema(0,+,-))
499*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyrs_s1,"Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
500*7cf9345cSTaylor Simpson
501*7cf9345cSTaylor Simpson
502*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyrsc_s0,"Rd32=cmpy(Rs32,Rt32*):rnd:sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,-,+))
503*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyrsc_s1,"Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
504*7cf9345cSTaylor Simpson
505*7cf9345cSTaylor Simpson
506*7cf9345cSTaylor Simpson#undef cmpy_sema
507*7cf9345cSTaylor Simpson#define cmpy_sema(N,CONJMINUS,CONJPLUS)\
508*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
509*7cf9345cSTaylor Simpson                                          fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\
510*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
511*7cf9345cSTaylor Simpson                                          fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
512*7cf9345cSTaylor Simpson}
513*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacs_s0,"Rxx32+=cmpy(Rs32,Rt32):sat",ATTRIBS(),    "Complex Multiply",cmpy_sema(0,+,-))
514*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacs_s1,"Rxx32+=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
515*7cf9345cSTaylor Simpson
516*7cf9345cSTaylor Simpson/* EJP: Need mac versions w/ CONJ T? */
517*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacsc_s0,"Rxx32+=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,-,+))
518*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacsc_s1,"Rxx32+=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
519*7cf9345cSTaylor Simpson
520*7cf9345cSTaylor Simpson
521*7cf9345cSTaylor Simpson#undef cmpy_sema
522*7cf9345cSTaylor Simpson#define cmpy_sema(N,CONJMINUS,CONJPLUS)\
523*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
524*7cf9345cSTaylor Simpson                       fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV)))));\
525*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
526*7cf9345cSTaylor Simpson                       fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV)))));\
527*7cf9345cSTaylor Simpson}
528*7cf9345cSTaylor Simpson
529*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpys_s0,"Rdd32=cmpy(Rs32,Rt32):sat",ATTRIBS(),    "Complex Multiply",cmpy_sema(0,+,-))
530*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpys_s1,"Rdd32=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(),"Complex Multiply",cmpy_sema(1,+,-))
531*7cf9345cSTaylor Simpson
532*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpysc_s0,"Rdd32=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,-,+))
533*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpysc_s1,"Rdd32=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
534*7cf9345cSTaylor Simpson
535*7cf9345cSTaylor Simpson
536*7cf9345cSTaylor Simpson
537*7cf9345cSTaylor Simpson#undef cmpy_sema
538*7cf9345cSTaylor Simpson#define cmpy_sema(N,CONJMINUS,CONJPLUS)\
539*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV))) CONJMINUS \
540*7cf9345cSTaylor Simpson                                           fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV))))));\
541*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) - (fSCALE(N,fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV))) CONJPLUS \
542*7cf9345cSTaylor Simpson                                           fSCALE(N,fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV))))));\
543*7cf9345cSTaylor Simpson}
544*7cf9345cSTaylor SimpsonQ6INSN(M2_cnacs_s0,"Rxx32-=cmpy(Rs32,Rt32):sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,+,-))
545*7cf9345cSTaylor SimpsonQ6INSN(M2_cnacs_s1,"Rxx32-=cmpy(Rs32,Rt32):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,+,-))
546*7cf9345cSTaylor Simpson
547*7cf9345cSTaylor Simpson/* EJP: need CONJ versions? */
548*7cf9345cSTaylor SimpsonQ6INSN(M2_cnacsc_s0,"Rxx32-=cmpy(Rs32,Rt32*):sat",ATTRIBS(A_ARCHV2),    "Complex Multiply",cmpy_sema(0,-,+))
549*7cf9345cSTaylor SimpsonQ6INSN(M2_cnacsc_s1,"Rxx32-=cmpy(Rs32,Rt32*):<<1:sat",ATTRIBS(A_ARCHV2),"Complex Multiply",cmpy_sema(1,-,+))
550*7cf9345cSTaylor Simpson
551*7cf9345cSTaylor Simpson
552*7cf9345cSTaylor Simpson/******************************************************/
553*7cf9345cSTaylor Simpson/* complex interpolation                              */
554*7cf9345cSTaylor Simpson/* Given a pair of complex values, scale by a,b, sum  */
555*7cf9345cSTaylor Simpson/* Saturate/shift1 and round/pack                     */
556*7cf9345cSTaylor Simpson/******************************************************/
557*7cf9345cSTaylor Simpson
558*7cf9345cSTaylor Simpson#undef vrcmpys_sema
559*7cf9345cSTaylor Simpson#define vrcmpys_sema(N,INWORD) \
560*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
561*7cf9345cSTaylor Simpson                       fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\
562*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
563*7cf9345cSTaylor Simpson                       fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\
564*7cf9345cSTaylor Simpson}
565*7cf9345cSTaylor Simpson
566*7cf9345cSTaylor Simpson
567*7cf9345cSTaylor Simpson
568*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_s1_h,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
569*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_s1_l,"Rdd32=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
570*7cf9345cSTaylor Simpson
571*7cf9345cSTaylor Simpson#undef vrcmpys_sema
572*7cf9345cSTaylor Simpson#define vrcmpys_sema(N,INWORD) \
573*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
574*7cf9345cSTaylor Simpson                       fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD)))));\
575*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
576*7cf9345cSTaylor Simpson                       fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD)))));\
577*7cf9345cSTaylor Simpson}
578*7cf9345cSTaylor Simpson
579*7cf9345cSTaylor Simpson
580*7cf9345cSTaylor Simpson
581*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_acc_s1_h,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
582*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_acc_s1_l,"Rxx32+=vrcmpys(Rss32,Rtt32):<<1:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
583*7cf9345cSTaylor Simpson
584*7cf9345cSTaylor Simpson#undef vrcmpys_sema
585*7cf9345cSTaylor Simpson#define vrcmpys_sema(N,INWORD) \
586*7cf9345cSTaylor Simpson{ fSETHALF(1,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,INWORD))) + \
587*7cf9345cSTaylor Simpson                       fSCALE(N,fMPY16SS(fGETHALF(3,RssV),fGETHALF(1,INWORD))) + 0x8000)));\
588*7cf9345cSTaylor Simpson  fSETHALF(0,RdV,fGETHALF(1,fSAT(fSCALE(N,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,INWORD))) + \
589*7cf9345cSTaylor Simpson                       fSCALE(N,fMPY16SS(fGETHALF(2,RssV),fGETHALF(1,INWORD))) + 0x8000)));\
590*7cf9345cSTaylor Simpson}
591*7cf9345cSTaylor Simpson
592*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_s1rp_h,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:hi",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(1,RttV)))
593*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpys_s1rp_l,"Rd32=vrcmpys(Rss32,Rtt32):<<1:rnd:sat:raw:lo",ATTRIBS(A_ARCHV3), "Vector Reduce Complex Multiply by Scalar",vrcmpys_sema(1,fGETWORD(0,RttV)))
594*7cf9345cSTaylor Simpson
595*7cf9345cSTaylor Simpson/**************************************************************/
596*7cf9345cSTaylor Simpson/* mixed mode 32x16 vector dual multiplies                    */
597*7cf9345cSTaylor Simpson/*                                                            */
598*7cf9345cSTaylor Simpson/**************************************************************/
599*7cf9345cSTaylor Simpson
600*7cf9345cSTaylor Simpson/* SIGNED 32 x SIGNED 16 */
601*7cf9345cSTaylor Simpson
602*7cf9345cSTaylor Simpson
603*7cf9345cSTaylor Simpson#undef mixmpy_sema
604*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
605*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)) ); \
606*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)) ); \
607*7cf9345cSTaylor Simpson}
608*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacls_s0,"Rxx32+=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
609*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacls_s1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
610*7cf9345cSTaylor Simpson
611*7cf9345cSTaylor Simpson#undef mixmpy_sema
612*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
613*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16) )); \
614*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16 ))); \
615*7cf9345cSTaylor Simpson}
616*7cf9345cSTaylor SimpsonQ6INSN(M2_mmachs_s0,"Rxx32+=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
617*7cf9345cSTaylor SimpsonQ6INSN(M2_mmachs_s1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
618*7cf9345cSTaylor Simpson
619*7cf9345cSTaylor Simpson#undef mixmpy_sema
620*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
621*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))))>>16)); \
622*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV))))>>16)); \
623*7cf9345cSTaylor Simpson}
624*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyl_s0,"Rdd32=vmpyweh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
625*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyl_s1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
626*7cf9345cSTaylor Simpson
627*7cf9345cSTaylor Simpson#undef mixmpy_sema
628*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
629*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))))>>16)); \
630*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV))))>>16)); \
631*7cf9345cSTaylor Simpson}
632*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyh_s0,"Rdd32=vmpywoh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
633*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyh_s1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
634*7cf9345cSTaylor Simpson
635*7cf9345cSTaylor Simpson
636*7cf9345cSTaylor Simpson/* With rounding */
637*7cf9345cSTaylor Simpson
638*7cf9345cSTaylor Simpson#undef mixmpy_sema
639*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
640*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)) ); \
641*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)) ); \
642*7cf9345cSTaylor Simpson}
643*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacls_rs0,"Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
644*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacls_rs1,"Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
645*7cf9345cSTaylor Simpson
646*7cf9345cSTaylor Simpson#undef mixmpy_sema
647*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
648*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16) )); \
649*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16 ))); \
650*7cf9345cSTaylor Simpson}
651*7cf9345cSTaylor SimpsonQ6INSN(M2_mmachs_rs0,"Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
652*7cf9345cSTaylor SimpsonQ6INSN(M2_mmachs_rs1,"Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
653*7cf9345cSTaylor Simpson
654*7cf9345cSTaylor Simpson#undef mixmpy_sema
655*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
656*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV)))+0x8000)>>16)); \
657*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)))+0x8000)>>16)); \
658*7cf9345cSTaylor Simpson}
659*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyl_rs0,"Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
660*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyl_rs1,"Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
661*7cf9345cSTaylor Simpson
662*7cf9345cSTaylor Simpson#undef mixmpy_sema
663*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
664*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV)))+0x8000)>>16)); \
665*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)))+0x8000)>>16)); \
666*7cf9345cSTaylor Simpson}
667*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyh_rs0,"Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
668*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyh_rs1,"Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
669*7cf9345cSTaylor Simpson
670*7cf9345cSTaylor Simpson
671*7cf9345cSTaylor Simpson#undef mixmpy_sema
672*7cf9345cSTaylor Simpson#define mixmpy_sema(DEST,EQUALS,N)\
673*7cf9345cSTaylor Simpson{ DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(2,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RttV)));}
674*7cf9345cSTaylor Simpson
675*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyeh_s0,"Rdd32=vrmpyweh(Rss32,Rtt32)",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(RddV,=,0))
676*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyeh_s1,"Rdd32=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1))
677*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyeh_acc_s0,"Rxx32+=vrmpyweh(Rss32,Rtt32)",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0))
678*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyeh_acc_s1,"Rxx32+=vrmpyweh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1))
679*7cf9345cSTaylor Simpson
680*7cf9345cSTaylor Simpson#undef mixmpy_sema
681*7cf9345cSTaylor Simpson#define mixmpy_sema(DEST,EQUALS,N)\
682*7cf9345cSTaylor Simpson{ DEST EQUALS fSCALE(N,fMPY3216SS(fGETWORD(1,RssV),fGETHALF(3,RttV))) + fSCALE(N,fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RttV)));}
683*7cf9345cSTaylor Simpson
684*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyoh_s0,"Rdd32=vrmpywoh(Rss32,Rtt32)",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(RddV,=,0))
685*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyoh_s1,"Rdd32=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RddV,=,1))
686*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyoh_acc_s0,"Rxx32+=vrmpywoh(Rss32,Rtt32)",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(RxxV,+=,0))
687*7cf9345cSTaylor SimpsonQ6INSN(M4_vrmpyoh_acc_s1,"Rxx32+=vrmpywoh(Rss32,Rtt32):<<1",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(RxxV,+=,1))
688*7cf9345cSTaylor Simpson
689*7cf9345cSTaylor Simpson
690*7cf9345cSTaylor Simpson
691*7cf9345cSTaylor Simpson
692*7cf9345cSTaylor Simpson
693*7cf9345cSTaylor Simpson
694*7cf9345cSTaylor Simpson#undef mixmpy_sema
695*7cf9345cSTaylor Simpson#define mixmpy_sema(N,H,RND)\
696*7cf9345cSTaylor Simpson{  RdV = fSAT((fSCALE(N,fMPY3216SS(RsV,fGETHALF(H,RtV)))RND)>>16); \
697*7cf9345cSTaylor Simpson}
698*7cf9345cSTaylor SimpsonQ6INSN(M2_hmmpyl_rs1,"Rd32=mpy(Rs32,Rt.L32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,+0x8000))
699*7cf9345cSTaylor SimpsonQ6INSN(M2_hmmpyh_rs1,"Rd32=mpy(Rs32,Rt.H32):<<1:rnd:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,+0x8000))
700*7cf9345cSTaylor SimpsonQ6INSN(M2_hmmpyl_s1,"Rd32=mpy(Rs32,Rt.L32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,0,))
701*7cf9345cSTaylor SimpsonQ6INSN(M2_hmmpyh_s1,"Rd32=mpy(Rs32,Rt.H32):<<1:sat",ATTRIBS(A_ARCHV2),"Mixed Precision Multiply",mixmpy_sema(1,1,))
702*7cf9345cSTaylor Simpson
703*7cf9345cSTaylor Simpson
704*7cf9345cSTaylor Simpson
705*7cf9345cSTaylor Simpson
706*7cf9345cSTaylor Simpson
707*7cf9345cSTaylor Simpson
708*7cf9345cSTaylor Simpson
709*7cf9345cSTaylor Simpson
710*7cf9345cSTaylor Simpson
711*7cf9345cSTaylor Simpson/* SIGNED 32 x UNSIGNED 16 */
712*7cf9345cSTaylor Simpson
713*7cf9345cSTaylor Simpson#undef mixmpy_sema
714*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
715*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)) ); \
716*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)) ); \
717*7cf9345cSTaylor Simpson}
718*7cf9345cSTaylor SimpsonQ6INSN(M2_mmaculs_s0,"Rxx32+=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
719*7cf9345cSTaylor SimpsonQ6INSN(M2_mmaculs_s1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
720*7cf9345cSTaylor Simpson
721*7cf9345cSTaylor Simpson#undef mixmpy_sema
722*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
723*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16) )); \
724*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16 ))); \
725*7cf9345cSTaylor Simpson}
726*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacuhs_s0,"Rxx32+=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
727*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacuhs_s1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
728*7cf9345cSTaylor Simpson
729*7cf9345cSTaylor Simpson#undef mixmpy_sema
730*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
731*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV))))>>16)); \
732*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV))))>>16)); \
733*7cf9345cSTaylor Simpson}
734*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyul_s0,"Rdd32=vmpyweuh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
735*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyul_s1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
736*7cf9345cSTaylor Simpson
737*7cf9345cSTaylor Simpson#undef mixmpy_sema
738*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
739*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV))))>>16)); \
740*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV))))>>16)); \
741*7cf9345cSTaylor Simpson}
742*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyuh_s0,"Rdd32=vmpywouh(Rss32,Rtt32):sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
743*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyuh_s1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
744*7cf9345cSTaylor Simpson
745*7cf9345cSTaylor Simpson
746*7cf9345cSTaylor Simpson/* With rounding */
747*7cf9345cSTaylor Simpson
748*7cf9345cSTaylor Simpson#undef mixmpy_sema
749*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
750*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)) ); \
751*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)) ); \
752*7cf9345cSTaylor Simpson}
753*7cf9345cSTaylor SimpsonQ6INSN(M2_mmaculs_rs0,"Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
754*7cf9345cSTaylor SimpsonQ6INSN(M2_mmaculs_rs1,"Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
755*7cf9345cSTaylor Simpson
756*7cf9345cSTaylor Simpson#undef mixmpy_sema
757*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
758*7cf9345cSTaylor Simpson{ fSETWORD(1,RxxV,fSAT(fGETWORD(1,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16) )); \
759*7cf9345cSTaylor Simpson  fSETWORD(0,RxxV,fSAT(fGETWORD(0,RxxV) + ((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16 ))); \
760*7cf9345cSTaylor Simpson}
761*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacuhs_rs0,"Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
762*7cf9345cSTaylor SimpsonQ6INSN(M2_mmacuhs_rs1,"Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
763*7cf9345cSTaylor Simpson
764*7cf9345cSTaylor Simpson#undef mixmpy_sema
765*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
766*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(2,RttV)))+0x8000)>>16)); \
767*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(0,RttV)))+0x8000)>>16)); \
768*7cf9345cSTaylor Simpson}
769*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyul_rs0,"Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
770*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyul_rs1,"Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
771*7cf9345cSTaylor Simpson
772*7cf9345cSTaylor Simpson#undef mixmpy_sema
773*7cf9345cSTaylor Simpson#define mixmpy_sema(N)\
774*7cf9345cSTaylor Simpson{ fSETWORD(1,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(1,RssV),fGETUHALF(3,RttV)))+0x8000)>>16)); \
775*7cf9345cSTaylor Simpson  fSETWORD(0,RddV,fSAT((fSCALE(N,fMPY3216SU(fGETWORD(0,RssV),fGETUHALF(1,RttV)))+0x8000)>>16)); \
776*7cf9345cSTaylor Simpson}
777*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyuh_rs0,"Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat",ATTRIBS(),    "Mixed Precision Multiply",mixmpy_sema(0))
778*7cf9345cSTaylor SimpsonQ6INSN(M2_mmpyuh_rs1,"Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Multiply",mixmpy_sema(1))
779*7cf9345cSTaylor Simpson
780*7cf9345cSTaylor Simpson
781*7cf9345cSTaylor Simpson/**************************************************************/
782*7cf9345cSTaylor Simpson/* complex mac with full 64-bit accum - no sat, no shift      */
783*7cf9345cSTaylor Simpson/* either do real or accum, never both                        */
784*7cf9345cSTaylor Simpson/**************************************************************/
785*7cf9345cSTaylor Simpson
786*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmaci_s0,"Rxx32+=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Imaginary",
787*7cf9345cSTaylor Simpson{
788*7cf9345cSTaylor SimpsonRxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
789*7cf9345cSTaylor Simpson              fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
790*7cf9345cSTaylor Simpson              fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
791*7cf9345cSTaylor Simpson              fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
792*7cf9345cSTaylor Simpson})
793*7cf9345cSTaylor Simpson
794*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmacr_s0,"Rxx32+=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mac Real",
795*7cf9345cSTaylor Simpson{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
796*7cf9345cSTaylor Simpson                fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
797*7cf9345cSTaylor Simpson                fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
798*7cf9345cSTaylor Simpson                fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
799*7cf9345cSTaylor Simpson})
800*7cf9345cSTaylor Simpson
801*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmaci_s0c,"Rxx32+=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Imaginary",
802*7cf9345cSTaylor Simpson{
803*7cf9345cSTaylor SimpsonRxxV = RxxV + fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \
804*7cf9345cSTaylor Simpson              fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
805*7cf9345cSTaylor Simpson              fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \
806*7cf9345cSTaylor Simpson              fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
807*7cf9345cSTaylor Simpson})
808*7cf9345cSTaylor Simpson
809*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmacr_s0c,"Rxx32+=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mac Real",
810*7cf9345cSTaylor Simpson{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \
811*7cf9345cSTaylor Simpson                fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
812*7cf9345cSTaylor Simpson                fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \
813*7cf9345cSTaylor Simpson                fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
814*7cf9345cSTaylor Simpson})
815*7cf9345cSTaylor Simpson
816*7cf9345cSTaylor SimpsonQ6INSN(M2_cmaci_s0,"Rxx32+=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Imaginary",
817*7cf9345cSTaylor Simpson{
818*7cf9345cSTaylor SimpsonRxxV = RxxV + fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \
819*7cf9345cSTaylor Simpson              fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV));
820*7cf9345cSTaylor Simpson})
821*7cf9345cSTaylor Simpson
822*7cf9345cSTaylor SimpsonQ6INSN(M2_cmacr_s0,"Rxx32+=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mac Real",
823*7cf9345cSTaylor Simpson{ RxxV = RxxV + fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \
824*7cf9345cSTaylor Simpson                fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV));
825*7cf9345cSTaylor Simpson})
826*7cf9345cSTaylor Simpson
827*7cf9345cSTaylor Simpson
828*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpyi_s0,"Rdd32=vrcmpyi(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Imaginary",
829*7cf9345cSTaylor Simpson{
830*7cf9345cSTaylor SimpsonRddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
831*7cf9345cSTaylor Simpson       fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
832*7cf9345cSTaylor Simpson       fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
833*7cf9345cSTaylor Simpson       fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
834*7cf9345cSTaylor Simpson})
835*7cf9345cSTaylor Simpson
836*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpyr_s0,"Rdd32=vrcmpyr(Rss32,Rtt32)",ATTRIBS(),"Vector Complex Mpy Real",
837*7cf9345cSTaylor Simpson{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
838*7cf9345cSTaylor Simpson         fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
839*7cf9345cSTaylor Simpson         fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
840*7cf9345cSTaylor Simpson         fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
841*7cf9345cSTaylor Simpson})
842*7cf9345cSTaylor Simpson
843*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpyi_s0c,"Rdd32=vrcmpyi(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Imaginary",
844*7cf9345cSTaylor Simpson{
845*7cf9345cSTaylor SimpsonRddV = fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) - \
846*7cf9345cSTaylor Simpson       fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV)) + \
847*7cf9345cSTaylor Simpson       fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) - \
848*7cf9345cSTaylor Simpson       fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV));\
849*7cf9345cSTaylor Simpson})
850*7cf9345cSTaylor Simpson
851*7cf9345cSTaylor SimpsonQ6INSN(M2_vrcmpyr_s0c,"Rdd32=vrcmpyr(Rss32,Rtt32*)",ATTRIBS(A_ARCHV2),"Vector Complex Mpy Real",
852*7cf9345cSTaylor Simpson{ RddV = fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) + \
853*7cf9345cSTaylor Simpson         fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV)) + \
854*7cf9345cSTaylor Simpson         fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) + \
855*7cf9345cSTaylor Simpson         fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV));\
856*7cf9345cSTaylor Simpson})
857*7cf9345cSTaylor Simpson
858*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyi_s0,"Rdd32=cmpyi(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Imaginary",
859*7cf9345cSTaylor Simpson{
860*7cf9345cSTaylor SimpsonRddV = fMPY16SS(fGETHALF(1,RsV),fGETHALF(0,RtV)) + \
861*7cf9345cSTaylor Simpson       fMPY16SS(fGETHALF(0,RsV),fGETHALF(1,RtV));
862*7cf9345cSTaylor Simpson})
863*7cf9345cSTaylor Simpson
864*7cf9345cSTaylor SimpsonQ6INSN(M2_cmpyr_s0,"Rdd32=cmpyr(Rs32,Rt32)",ATTRIBS(),"Vector Complex Mpy Real",
865*7cf9345cSTaylor Simpson{ RddV = fMPY16SS(fGETHALF(0,RsV),fGETHALF(0,RtV)) - \
866*7cf9345cSTaylor Simpson         fMPY16SS(fGETHALF(1,RsV),fGETHALF(1,RtV));
867*7cf9345cSTaylor Simpson})
868*7cf9345cSTaylor Simpson
869*7cf9345cSTaylor Simpson
870*7cf9345cSTaylor Simpson/**************************************************************/
871*7cf9345cSTaylor Simpson/* Complex mpy/mac with 2x32 bit accum, sat, shift            */
872*7cf9345cSTaylor Simpson/* 32x16 real or imag                                         */
873*7cf9345cSTaylor Simpson/**************************************************************/
874*7cf9345cSTaylor Simpson
875*7cf9345cSTaylor Simpson#if 1
876*7cf9345cSTaylor Simpson
877*7cf9345cSTaylor SimpsonQ6INSN(M4_cmpyi_wh,"Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
878*7cf9345cSTaylor Simpson{
879*7cf9345cSTaylor Simpson RdV = fSAT(  (  fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV))
880*7cf9345cSTaylor Simpson               + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV))
881*7cf9345cSTaylor Simpson               + 0x4000)>>15);
882*7cf9345cSTaylor Simpson})
883*7cf9345cSTaylor Simpson
884*7cf9345cSTaylor Simpson
885*7cf9345cSTaylor SimpsonQ6INSN(M4_cmpyr_wh,"Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
886*7cf9345cSTaylor Simpson{
887*7cf9345cSTaylor Simpson RdV = fSAT(  (  fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV))
888*7cf9345cSTaylor Simpson               - fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV))
889*7cf9345cSTaylor Simpson               + 0x4000)>>15);
890*7cf9345cSTaylor Simpson})
891*7cf9345cSTaylor Simpson
892*7cf9345cSTaylor SimpsonQ6INSN(M4_cmpyi_whc,"Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
893*7cf9345cSTaylor Simpson{
894*7cf9345cSTaylor Simpson RdV = fSAT(  (  fMPY3216SS(fGETWORD(1,RssV),fGETHALF(0,RtV))
895*7cf9345cSTaylor Simpson               - fMPY3216SS(fGETWORD(0,RssV),fGETHALF(1,RtV))
896*7cf9345cSTaylor Simpson               + 0x4000)>>15);
897*7cf9345cSTaylor Simpson})
898*7cf9345cSTaylor Simpson
899*7cf9345cSTaylor Simpson
900*7cf9345cSTaylor SimpsonQ6INSN(M4_cmpyr_whc,"Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat",ATTRIBS(),"Mixed Precision Complex Multiply",
901*7cf9345cSTaylor Simpson{
902*7cf9345cSTaylor Simpson RdV = fSAT(  (  fMPY3216SS(fGETWORD(0,RssV),fGETHALF(0,RtV))
903*7cf9345cSTaylor Simpson               + fMPY3216SS(fGETWORD(1,RssV),fGETHALF(1,RtV))
904*7cf9345cSTaylor Simpson               + 0x4000)>>15);
905*7cf9345cSTaylor Simpson})
906*7cf9345cSTaylor Simpson
907*7cf9345cSTaylor Simpson
908*7cf9345cSTaylor Simpson#endif
909*7cf9345cSTaylor Simpson
910*7cf9345cSTaylor Simpson/**************************************************************/
911*7cf9345cSTaylor Simpson/* Vector mpy/mac with 2x32 bit accum, sat, shift             */
912*7cf9345cSTaylor Simpson/* either do real or imag,  never both                        */
913*7cf9345cSTaylor Simpson/**************************************************************/
914*7cf9345cSTaylor Simpson
915*7cf9345cSTaylor Simpson#undef VCMPYSEMI
916*7cf9345cSTaylor Simpson#define VCMPYSEMI(DST,ACC0,ACC1,SHIFT,SAT) \
917*7cf9345cSTaylor Simpson    fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(1,RssV),fGETHALF(0,RttV)) + \
918*7cf9345cSTaylor Simpson        fMPY16SS(fGETHALF(0,RssV),fGETHALF(1,RttV))))); \
919*7cf9345cSTaylor Simpson    fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(3,RssV),fGETHALF(2,RttV)) + \
920*7cf9345cSTaylor Simpson        fMPY16SS(fGETHALF(2,RssV),fGETHALF(3,RttV))))); \
921*7cf9345cSTaylor Simpson
922*7cf9345cSTaylor Simpson#undef VCMPYSEMR
923*7cf9345cSTaylor Simpson#define VCMPYSEMR(DST,ACC0,ACC1,SHIFT,SAT) \
924*7cf9345cSTaylor Simpson    fSETWORD(0,DST,SAT(ACC0 fSCALE(SHIFT,fMPY16SS(fGETHALF(0,RssV),fGETHALF(0,RttV)) - \
925*7cf9345cSTaylor Simpson        fMPY16SS(fGETHALF(1,RssV),fGETHALF(1,RttV))))); \
926*7cf9345cSTaylor Simpson    fSETWORD(1,DST,SAT(ACC1 fSCALE(SHIFT,fMPY16SS(fGETHALF(2,RssV),fGETHALF(2,RttV)) - \
927*7cf9345cSTaylor Simpson        fMPY16SS(fGETHALF(3,RssV),fGETHALF(3,RttV))))); \
928*7cf9345cSTaylor Simpson
929*7cf9345cSTaylor Simpson
930*7cf9345cSTaylor Simpson#undef VCMPYIR
931*7cf9345cSTaylor Simpson#define VCMPYIR(TAGBASE,DSTSYN,DSTVAL,ACCSEM,ACCVAL0,ACCVAL1,SHIFTSYN,SHIFTVAL,SATSYN,SATVAL) \
932*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAGBASE##i,DSTSYN ACCSEM "vcmpyi(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \
933*7cf9345cSTaylor Simpson    "Vector Complex Multiply Imaginary", { VCMPYSEMI(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); }) \
934*7cf9345cSTaylor SimpsonQ6INSN(M2_##TAGBASE##r,DSTSYN ACCSEM "vcmpyr(Rss32,Rtt32)" SHIFTSYN SATSYN,ATTRIBS(A_ARCHV2), \
935*7cf9345cSTaylor Simpson    "Vector Complex Multiply Imaginary", { VCMPYSEMR(DSTVAL,ACCVAL0,ACCVAL1,SHIFTVAL,SATVAL); })
936*7cf9345cSTaylor Simpson
937*7cf9345cSTaylor Simpson
938*7cf9345cSTaylor SimpsonVCMPYIR(vcmpy_s0_sat_,"Rdd32",RddV,"=",,,"",0,":sat",fSAT)
939*7cf9345cSTaylor SimpsonVCMPYIR(vcmpy_s1_sat_,"Rdd32",RddV,"=",,,":<<1",1,":sat",fSAT)
940*7cf9345cSTaylor SimpsonVCMPYIR(vcmac_s0_sat_,"Rxx32",RxxV,"+=",fGETWORD(0,RxxV) + ,fGETWORD(1,RxxV) + ,"",0,":sat",fSAT)
941*7cf9345cSTaylor Simpson
942*7cf9345cSTaylor Simpson
943*7cf9345cSTaylor Simpson/**********************************************************************
944*7cf9345cSTaylor Simpson *  Rotation  -- by 0, 90, 180, or 270 means mult by 1, J, -1, -J     *
945*7cf9345cSTaylor Simpson *********************************************************************/
946*7cf9345cSTaylor Simpson
947*7cf9345cSTaylor SimpsonQ6INSN(S2_vcrotate,"Rdd32=vcrotate(Rss32,Rt32)",ATTRIBS(A_ARCHV2),"Rotate complex value by multiple of PI/2",
948*7cf9345cSTaylor Simpson{
949*7cf9345cSTaylor Simpson    fHIDE(size1u_t tmp;)
950*7cf9345cSTaylor Simpson    tmp = fEXTRACTU_RANGE(RtV,1,0);
951*7cf9345cSTaylor Simpson    if (tmp == 0) { /* No rotation */
952*7cf9345cSTaylor Simpson        fSETHALF(0,RddV,fGETHALF(0,RssV));
953*7cf9345cSTaylor Simpson        fSETHALF(1,RddV,fGETHALF(1,RssV));
954*7cf9345cSTaylor Simpson    } else if (tmp == 1) { /* Multiply by -J */
955*7cf9345cSTaylor Simpson        fSETHALF(0,RddV,fGETHALF(1,RssV));
956*7cf9345cSTaylor Simpson        fSETHALF(1,RddV,fSATH(-fGETHALF(0,RssV)));
957*7cf9345cSTaylor Simpson    } else if (tmp == 2) { /* Multiply by J */
958*7cf9345cSTaylor Simpson        fSETHALF(0,RddV,fSATH(-fGETHALF(1,RssV)));
959*7cf9345cSTaylor Simpson        fSETHALF(1,RddV,fGETHALF(0,RssV));
960*7cf9345cSTaylor Simpson    } else { /* Multiply by -1 */
961*7cf9345cSTaylor Simpson        fHIDE(if (tmp != 3) fatal("C is broken");)
962*7cf9345cSTaylor Simpson        fSETHALF(0,RddV,fSATH(-fGETHALF(0,RssV)));
963*7cf9345cSTaylor Simpson        fSETHALF(1,RddV,fSATH(-fGETHALF(1,RssV)));
964*7cf9345cSTaylor Simpson    }
965*7cf9345cSTaylor Simpson    tmp = fEXTRACTU_RANGE(RtV,3,2);
966*7cf9345cSTaylor Simpson    if (tmp == 0) { /* No rotation */
967*7cf9345cSTaylor Simpson        fSETHALF(2,RddV,fGETHALF(2,RssV));
968*7cf9345cSTaylor Simpson        fSETHALF(3,RddV,fGETHALF(3,RssV));
969*7cf9345cSTaylor Simpson    } else if (tmp == 1) { /* Multiply by -J */
970*7cf9345cSTaylor Simpson        fSETHALF(2,RddV,fGETHALF(3,RssV));
971*7cf9345cSTaylor Simpson        fSETHALF(3,RddV,fSATH(-fGETHALF(2,RssV)));
972*7cf9345cSTaylor Simpson    } else if (tmp == 2) { /* Multiply by J */
973*7cf9345cSTaylor Simpson        fSETHALF(2,RddV,fSATH(-fGETHALF(3,RssV)));
974*7cf9345cSTaylor Simpson        fSETHALF(3,RddV,fGETHALF(2,RssV));
975*7cf9345cSTaylor Simpson    } else { /* Multiply by -1 */
976*7cf9345cSTaylor Simpson        fHIDE(if (tmp != 3) fatal("C is broken");)
977*7cf9345cSTaylor Simpson        fSETHALF(2,RddV,fSATH(-fGETHALF(2,RssV)));
978*7cf9345cSTaylor Simpson        fSETHALF(3,RddV,fSATH(-fGETHALF(3,RssV)));
979*7cf9345cSTaylor Simpson    }
980*7cf9345cSTaylor Simpson})
981*7cf9345cSTaylor Simpson
982*7cf9345cSTaylor Simpson
983*7cf9345cSTaylor SimpsonQ6INSN(S4_vrcrotate_acc,"Rxx32+=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes",
984*7cf9345cSTaylor Simpson{
985*7cf9345cSTaylor Simpson    fHIDE(int i; int tmpr; int tmpi; unsigned int control;)
986*7cf9345cSTaylor Simpson    fHIDE(int sumr; int sumi;)
987*7cf9345cSTaylor Simpson    sumr = 0;
988*7cf9345cSTaylor Simpson    sumi = 0;
989*7cf9345cSTaylor Simpson    control = fGETUBYTE(uiV,RtV);
990*7cf9345cSTaylor Simpson    for (i = 0; i < 8; i += 2) {
991*7cf9345cSTaylor Simpson        tmpr = fGETBYTE(i  ,RssV);
992*7cf9345cSTaylor Simpson        tmpi = fGETBYTE(i+1,RssV);
993*7cf9345cSTaylor Simpson        switch (control & 3) {
994*7cf9345cSTaylor Simpson        case 0: /* No Rotation */
995*7cf9345cSTaylor Simpson            sumr += tmpr;
996*7cf9345cSTaylor Simpson            sumi += tmpi;
997*7cf9345cSTaylor Simpson            break;
998*7cf9345cSTaylor Simpson        case 1: /* Multiply by -J */
999*7cf9345cSTaylor Simpson            sumr += tmpi;
1000*7cf9345cSTaylor Simpson            sumi -= tmpr;
1001*7cf9345cSTaylor Simpson            break;
1002*7cf9345cSTaylor Simpson        case 2: /* Multiply by J */
1003*7cf9345cSTaylor Simpson            sumr -= tmpi;
1004*7cf9345cSTaylor Simpson            sumi += tmpr;
1005*7cf9345cSTaylor Simpson            break;
1006*7cf9345cSTaylor Simpson        case 3: /* Multiply by -1 */
1007*7cf9345cSTaylor Simpson            sumr -= tmpr;
1008*7cf9345cSTaylor Simpson            sumi -= tmpi;
1009*7cf9345cSTaylor Simpson            break;
1010*7cf9345cSTaylor Simpson        fHIDE(default: fatal("C is broken!");)
1011*7cf9345cSTaylor Simpson        }
1012*7cf9345cSTaylor Simpson        control = control >> 2;
1013*7cf9345cSTaylor Simpson    }
1014*7cf9345cSTaylor Simpson    fSETWORD(0,RxxV,fGETWORD(0,RxxV) + sumr);
1015*7cf9345cSTaylor Simpson    fSETWORD(1,RxxV,fGETWORD(1,RxxV) + sumi);
1016*7cf9345cSTaylor Simpson})
1017*7cf9345cSTaylor Simpson
1018*7cf9345cSTaylor SimpsonQ6INSN(S4_vrcrotate,"Rdd32=vrcrotate(Rss32,Rt32,#u2)",ATTRIBS(),"Rotate and Reduce Bytes",
1019*7cf9345cSTaylor Simpson{
1020*7cf9345cSTaylor Simpson    fHIDE(int i; int tmpr; int tmpi; unsigned int control;)
1021*7cf9345cSTaylor Simpson    fHIDE(int sumr; int sumi;)
1022*7cf9345cSTaylor Simpson    sumr = 0;
1023*7cf9345cSTaylor Simpson    sumi = 0;
1024*7cf9345cSTaylor Simpson    control = fGETUBYTE(uiV,RtV);
1025*7cf9345cSTaylor Simpson    for (i = 0; i < 8; i += 2) {
1026*7cf9345cSTaylor Simpson        tmpr = fGETBYTE(i  ,RssV);
1027*7cf9345cSTaylor Simpson        tmpi = fGETBYTE(i+1,RssV);
1028*7cf9345cSTaylor Simpson        switch (control & 3) {
1029*7cf9345cSTaylor Simpson        case 0: /* No Rotation */
1030*7cf9345cSTaylor Simpson            sumr += tmpr;
1031*7cf9345cSTaylor Simpson            sumi += tmpi;
1032*7cf9345cSTaylor Simpson            break;
1033*7cf9345cSTaylor Simpson        case 1: /* Multiply by -J */
1034*7cf9345cSTaylor Simpson            sumr += tmpi;
1035*7cf9345cSTaylor Simpson            sumi -= tmpr;
1036*7cf9345cSTaylor Simpson            break;
1037*7cf9345cSTaylor Simpson        case 2: /* Multiply by J */
1038*7cf9345cSTaylor Simpson            sumr -= tmpi;
1039*7cf9345cSTaylor Simpson            sumi += tmpr;
1040*7cf9345cSTaylor Simpson            break;
1041*7cf9345cSTaylor Simpson        case 3: /* Multiply by -1 */
1042*7cf9345cSTaylor Simpson            sumr -= tmpr;
1043*7cf9345cSTaylor Simpson            sumi -= tmpi;
1044*7cf9345cSTaylor Simpson            break;
1045*7cf9345cSTaylor Simpson        fHIDE(default: fatal("C is broken!");)
1046*7cf9345cSTaylor Simpson        }
1047*7cf9345cSTaylor Simpson        control = control >> 2;
1048*7cf9345cSTaylor Simpson    }
1049*7cf9345cSTaylor Simpson    fSETWORD(0,RddV,sumr);
1050*7cf9345cSTaylor Simpson    fSETWORD(1,RddV,sumi);
1051*7cf9345cSTaylor Simpson})
1052*7cf9345cSTaylor Simpson
1053*7cf9345cSTaylor Simpson
1054*7cf9345cSTaylor SimpsonQ6INSN(S2_vcnegh,"Rdd32=vcnegh(Rss32,Rt32)",ATTRIBS(),"Conditional Negate halfwords",
1055*7cf9345cSTaylor Simpson{
1056*7cf9345cSTaylor Simpson    fHIDE(int i;)
1057*7cf9345cSTaylor Simpson    for (i = 0; i < 4; i++) {
1058*7cf9345cSTaylor Simpson        if (fGETBIT(i,RtV)) {
1059*7cf9345cSTaylor Simpson            fSETHALF(i,RddV,fSATH(-fGETHALF(i,RssV)));
1060*7cf9345cSTaylor Simpson        } else {
1061*7cf9345cSTaylor Simpson            fSETHALF(i,RddV,fGETHALF(i,RssV));
1062*7cf9345cSTaylor Simpson        }
1063*7cf9345cSTaylor Simpson    }
1064*7cf9345cSTaylor Simpson})
1065*7cf9345cSTaylor Simpson
1066*7cf9345cSTaylor SimpsonQ6INSN(S2_vrcnegh,"Rxx32+=vrcnegh(Rss32,Rt32)",ATTRIBS(),"Vector Reduce Conditional Negate halfwords",
1067*7cf9345cSTaylor Simpson{
1068*7cf9345cSTaylor Simpson    fHIDE(int i;)
1069*7cf9345cSTaylor Simpson    for (i = 0; i < 4; i++) {
1070*7cf9345cSTaylor Simpson        if (fGETBIT(i,RtV)) {
1071*7cf9345cSTaylor Simpson            RxxV += -fGETHALF(i,RssV);
1072*7cf9345cSTaylor Simpson        } else {
1073*7cf9345cSTaylor Simpson            RxxV += fGETHALF(i,RssV);
1074*7cf9345cSTaylor Simpson        }
1075*7cf9345cSTaylor Simpson    }
1076*7cf9345cSTaylor Simpson})
1077*7cf9345cSTaylor Simpson
1078*7cf9345cSTaylor Simpson
1079*7cf9345cSTaylor Simpson/**********************************************************************
1080*7cf9345cSTaylor Simpson *  Finite-field multiplies.  Written by David Hoyle                  *
1081*7cf9345cSTaylor Simpson *********************************************************************/
1082*7cf9345cSTaylor Simpson
1083*7cf9345cSTaylor SimpsonQ6INSN(M4_pmpyw,"Rdd32=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)",
1084*7cf9345cSTaylor Simpson{
1085*7cf9345cSTaylor Simpson        fHIDE(int i; unsigned int y;)
1086*7cf9345cSTaylor Simpson        fHIDE(unsigned long long x; unsigned long long prod;)
1087*7cf9345cSTaylor Simpson        x = fGETUWORD(0, RsV);
1088*7cf9345cSTaylor Simpson        y = fGETUWORD(0, RtV);
1089*7cf9345cSTaylor Simpson
1090*7cf9345cSTaylor Simpson        prod = 0;
1091*7cf9345cSTaylor Simpson        for(i=0; i < 32; i++) {
1092*7cf9345cSTaylor Simpson            if((y >> i) & 1) prod ^= (x << i);
1093*7cf9345cSTaylor Simpson        }
1094*7cf9345cSTaylor Simpson        RddV = prod;
1095*7cf9345cSTaylor Simpson})
1096*7cf9345cSTaylor Simpson
1097*7cf9345cSTaylor SimpsonQ6INSN(M4_vpmpyh,"Rdd32=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)",
1098*7cf9345cSTaylor Simpson{
1099*7cf9345cSTaylor Simpson        fHIDE(int i; unsigned int x0; unsigned int x1;)
1100*7cf9345cSTaylor Simpson        fHIDE(unsigned int y0; unsigned int y1;)
1101*7cf9345cSTaylor Simpson        fHIDE(unsigned int prod0; unsigned int prod1;)
1102*7cf9345cSTaylor Simpson
1103*7cf9345cSTaylor Simpson        x0 = fGETUHALF(0, RsV);
1104*7cf9345cSTaylor Simpson        x1 = fGETUHALF(1, RsV);
1105*7cf9345cSTaylor Simpson        y0 = fGETUHALF(0, RtV);
1106*7cf9345cSTaylor Simpson        y1 = fGETUHALF(1, RtV);
1107*7cf9345cSTaylor Simpson
1108*7cf9345cSTaylor Simpson        prod0 = prod1 = 0;
1109*7cf9345cSTaylor Simpson        for(i=0; i < 16; i++) {
1110*7cf9345cSTaylor Simpson            if((y0 >> i) & 1) prod0 ^= (x0 << i);
1111*7cf9345cSTaylor Simpson            if((y1 >> i) & 1) prod1 ^= (x1 << i);
1112*7cf9345cSTaylor Simpson        }
1113*7cf9345cSTaylor Simpson        fSETHALF(0,RddV,fGETUHALF(0,prod0));
1114*7cf9345cSTaylor Simpson        fSETHALF(1,RddV,fGETUHALF(0,prod1));
1115*7cf9345cSTaylor Simpson        fSETHALF(2,RddV,fGETUHALF(1,prod0));
1116*7cf9345cSTaylor Simpson        fSETHALF(3,RddV,fGETUHALF(1,prod1));
1117*7cf9345cSTaylor Simpson})
1118*7cf9345cSTaylor Simpson
1119*7cf9345cSTaylor SimpsonQ6INSN(M4_pmpyw_acc,"Rxx32^=pmpyw(Rs32,Rt32)",ATTRIBS(),"Polynomial 32bit Multiplication with Addition in GF(2)",
1120*7cf9345cSTaylor Simpson{
1121*7cf9345cSTaylor Simpson        fHIDE(int i; unsigned int y;)
1122*7cf9345cSTaylor Simpson        fHIDE(unsigned long long x; unsigned long long prod;)
1123*7cf9345cSTaylor Simpson        x = fGETUWORD(0, RsV);
1124*7cf9345cSTaylor Simpson        y = fGETUWORD(0, RtV);
1125*7cf9345cSTaylor Simpson
1126*7cf9345cSTaylor Simpson        prod = 0;
1127*7cf9345cSTaylor Simpson        for(i=0; i < 32; i++) {
1128*7cf9345cSTaylor Simpson            if((y >> i) & 1) prod ^= (x << i);
1129*7cf9345cSTaylor Simpson        }
1130*7cf9345cSTaylor Simpson        RxxV ^= prod;
1131*7cf9345cSTaylor Simpson})
1132*7cf9345cSTaylor Simpson
1133*7cf9345cSTaylor SimpsonQ6INSN(M4_vpmpyh_acc,"Rxx32^=vpmpyh(Rs32,Rt32)",ATTRIBS(),"Dual Polynomial 16bit Multiplication with Addition in GF(2)",
1134*7cf9345cSTaylor Simpson{
1135*7cf9345cSTaylor Simpson        fHIDE(int i; unsigned int x0; unsigned int x1;)
1136*7cf9345cSTaylor Simpson        fHIDE(unsigned int y0; unsigned int y1;)
1137*7cf9345cSTaylor Simpson        fHIDE(unsigned int prod0; unsigned int prod1;)
1138*7cf9345cSTaylor Simpson
1139*7cf9345cSTaylor Simpson        x0 = fGETUHALF(0, RsV);
1140*7cf9345cSTaylor Simpson        x1 = fGETUHALF(1, RsV);
1141*7cf9345cSTaylor Simpson        y0 = fGETUHALF(0, RtV);
1142*7cf9345cSTaylor Simpson        y1 = fGETUHALF(1, RtV);
1143*7cf9345cSTaylor Simpson
1144*7cf9345cSTaylor Simpson        prod0 = prod1 = 0;
1145*7cf9345cSTaylor Simpson        for(i=0; i < 16; i++) {
1146*7cf9345cSTaylor Simpson            if((y0 >> i) & 1) prod0 ^= (x0 << i);
1147*7cf9345cSTaylor Simpson            if((y1 >> i) & 1) prod1 ^= (x1 << i);
1148*7cf9345cSTaylor Simpson        }
1149*7cf9345cSTaylor Simpson        fSETHALF(0,RxxV,fGETUHALF(0,RxxV) ^ fGETUHALF(0,prod0));
1150*7cf9345cSTaylor Simpson        fSETHALF(1,RxxV,fGETUHALF(1,RxxV) ^ fGETUHALF(0,prod1));
1151*7cf9345cSTaylor Simpson        fSETHALF(2,RxxV,fGETUHALF(2,RxxV) ^ fGETUHALF(1,prod0));
1152*7cf9345cSTaylor Simpson        fSETHALF(3,RxxV,fGETUHALF(3,RxxV) ^ fGETUHALF(1,prod1));
1153*7cf9345cSTaylor Simpson})
1154*7cf9345cSTaylor Simpson
1155*7cf9345cSTaylor Simpson
1156*7cf9345cSTaylor Simpson/* V70: TINY CORE */
1157*7cf9345cSTaylor Simpson
1158*7cf9345cSTaylor Simpson#define CMPY64(TAG,NAME,DESC,OPERAND1,OP,W0,W1,W2,W3) \
1159*7cf9345cSTaylor SimpsonQ6INSN(M7_##TAG,"Rdd32=" NAME "(Rss32," OPERAND1 ")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 64-bit " DESC,    { RddV  = (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})\
1160*7cf9345cSTaylor SimpsonQ6INSN(M7_##TAG##_acc,"Rxx32+=" NAME "(Rss32,"OPERAND1")",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply-Accumulate 64-bit " DESC, { RxxV += (fMPY32SS(fGETWORD(W0, RssV), fGETWORD(W1, RttV)) OP fMPY32SS(fGETWORD(W2, RssV), fGETWORD(W3, RttV)));})
1161*7cf9345cSTaylor Simpson
1162*7cf9345cSTaylor SimpsonCMPY64(dcmpyrw, "cmpyrw","Real","Rtt32" ,-,0,0,1,1)
1163*7cf9345cSTaylor SimpsonCMPY64(dcmpyrwc,"cmpyrw","Real","Rtt32*",+,0,0,1,1)
1164*7cf9345cSTaylor SimpsonCMPY64(dcmpyiw, "cmpyiw","Imag","Rtt32" ,+,0,1,1,0)
1165*7cf9345cSTaylor SimpsonCMPY64(dcmpyiwc,"cmpyiw","Imag","Rtt32*",-,1,0,0,1)
1166*7cf9345cSTaylor Simpson
1167*7cf9345cSTaylor Simpson#define CMPY128(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \
1168*7cf9345cSTaylor SimpsonQ6INSN(M7_##TAG,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real",  \
1169*7cf9345cSTaylor Simpson{ \
1170*7cf9345cSTaylor SimpsonfHIDE(size16s_t acc128;)\
1171*7cf9345cSTaylor SimpsonfHIDE(size16s_t tmp128;)\
1172*7cf9345cSTaylor SimpsonfHIDE(size8s_t acc64;)\
1173*7cf9345cSTaylor Simpsontmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\
1174*7cf9345cSTaylor Simpsonacc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\
1175*7cf9345cSTaylor Simpsonacc128 = OP(tmp128,acc128);\
1176*7cf9345cSTaylor Simpsonacc128 = fSHIFTR128(acc128, 31);\
1177*7cf9345cSTaylor Simpsonacc64 =  fCAST16S_8S(acc128);\
1178*7cf9345cSTaylor SimpsonRdV = fSATW(acc64);\
1179*7cf9345cSTaylor Simpson})
1180*7cf9345cSTaylor Simpson
1181*7cf9345cSTaylor Simpson
1182*7cf9345cSTaylor SimpsonCMPY128(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128)
1183*7cf9345cSTaylor SimpsonCMPY128(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128)
1184*7cf9345cSTaylor SimpsonCMPY128(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128)
1185*7cf9345cSTaylor SimpsonCMPY128(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)
1186*7cf9345cSTaylor Simpson
1187*7cf9345cSTaylor Simpson
1188*7cf9345cSTaylor Simpson#define CMPY128RND(TAG, NAME, OPERAND1, WORD0, WORD1, WORD2, WORD3, OP) \
1189*7cf9345cSTaylor SimpsonQ6INSN(M7_##TAG##_rnd,"Rd32=" NAME "(Rss32,"OPERAND1"):<<1:rnd:sat",ATTRIBS(A_RESTRICT_SLOT3ONLY),"Complex Multiply 32-bit result real",  \
1190*7cf9345cSTaylor Simpson{ \
1191*7cf9345cSTaylor SimpsonfHIDE(size16s_t acc128;)\
1192*7cf9345cSTaylor SimpsonfHIDE(size16s_t tmp128;)\
1193*7cf9345cSTaylor SimpsonfHIDE(size16s_t const128;)\
1194*7cf9345cSTaylor SimpsonfHIDE(size8s_t acc64;)\
1195*7cf9345cSTaylor Simpsontmp128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD0, RssV), fGETWORD(WORD1, RttV)));\
1196*7cf9345cSTaylor Simpsonacc128 = fCAST8S_16S(fMPY32SS(fGETWORD(WORD2, RssV), fGETWORD(WORD3, RttV)));\
1197*7cf9345cSTaylor Simpsonconst128 = fCAST8S_16S(fCONSTLL(0x40000000));\
1198*7cf9345cSTaylor Simpsonacc128 = OP(tmp128,acc128);\
1199*7cf9345cSTaylor Simpsonacc128 = fADD128(acc128,const128);\
1200*7cf9345cSTaylor Simpsonacc128 = fSHIFTR128(acc128, 31);\
1201*7cf9345cSTaylor Simpsonacc64 =  fCAST16S_8S(acc128);\
1202*7cf9345cSTaylor SimpsonRdV = fSATW(acc64);\
1203*7cf9345cSTaylor Simpson})
1204*7cf9345cSTaylor Simpson
1205*7cf9345cSTaylor SimpsonCMPY128RND(wcmpyrw, "cmpyrw", "Rtt32", 0, 0, 1, 1, fSUB128)
1206*7cf9345cSTaylor SimpsonCMPY128RND(wcmpyrwc, "cmpyrw", "Rtt32*", 0, 0, 1, 1, fADD128)
1207*7cf9345cSTaylor SimpsonCMPY128RND(wcmpyiw, "cmpyiw", "Rtt32", 0, 1, 1, 0, fADD128)
1208*7cf9345cSTaylor SimpsonCMPY128RND(wcmpyiwc, "cmpyiw", "Rtt32*", 1, 0, 0, 1, fSUB128)
1209