1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx2.cc 13520 2018-05-27 19:09:59Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 //   Copyright (c) 2011-2018 Stanislav Shwartsman
6 //          Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 //  You should have received a copy of the GNU Lesser General Public
19 //  License along with this library; if not, write to the Free Software
20 //  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23 
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28 
29 #if BX_SUPPORT_AVX
30 
31 #include "simd_int.h"
32 #include "simd_compare.h"
33 
34 #define AVX_2OP(HANDLER, func)                                                              \
35   /* AVX instruction with two src operands */                                               \
36   void BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER (bxInstruction_c *i)                      \
37   {                                                                                         \
38     BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
39     unsigned len = i->getVL();                                                              \
40                                                                                             \
41     for (unsigned n=0; n < len; n++)                                                        \
42       (func)(&op1.vmm128(n), &op2.vmm128(n));                                               \
43                                                                                             \
44     BX_WRITE_AVX_REGZ(i->dst(), op1, len);                                                  \
45                                                                                             \
46     BX_NEXT_INSTR(i);                                                                       \
47   }
48 
49 AVX_2OP(VANDPS_VpsHpsWpsR, xmm_andps)
50 AVX_2OP(VANDNPS_VpsHpsWpsR, xmm_andnps)
51 AVX_2OP(VXORPS_VpsHpsWpsR, xmm_xorps)
52 AVX_2OP(VORPS_VpsHpsWpsR, xmm_orps)
53 
54 AVX_2OP(VUNPCKLPS_VpsHpsWpsR, xmm_unpcklps)
55 AVX_2OP(VUNPCKLPD_VpdHpdWpdR, xmm_unpcklpd)
56 AVX_2OP(VUNPCKHPS_VpsHpsWpsR, xmm_unpckhps)
57 AVX_2OP(VUNPCKHPD_VpdHpdWpdR, xmm_unpckhpd)
58 
59 AVX_2OP(VPADDB_VdqHdqWdqR, xmm_paddb)
60 AVX_2OP(VPADDW_VdqHdqWdqR, xmm_paddw)
61 AVX_2OP(VPADDD_VdqHdqWdqR, xmm_paddd)
62 AVX_2OP(VPADDQ_VdqHdqWdqR, xmm_paddq)
63 AVX_2OP(VPSUBB_VdqHdqWdqR, xmm_psubb)
64 AVX_2OP(VPSUBW_VdqHdqWdqR, xmm_psubw)
65 AVX_2OP(VPSUBD_VdqHdqWdqR, xmm_psubd)
66 AVX_2OP(VPSUBQ_VdqHdqWdqR, xmm_psubq)
67 
68 AVX_2OP(VPCMPEQB_VdqHdqWdqR, xmm_pcmpeqb)
69 AVX_2OP(VPCMPEQW_VdqHdqWdqR, xmm_pcmpeqw)
70 AVX_2OP(VPCMPEQD_VdqHdqWdqR, xmm_pcmpeqd)
71 AVX_2OP(VPCMPEQQ_VdqHdqWdqR, xmm_pcmpeqq)
72 AVX_2OP(VPCMPGTB_VdqHdqWdqR, xmm_pcmpgtb)
73 AVX_2OP(VPCMPGTW_VdqHdqWdqR, xmm_pcmpgtw)
74 AVX_2OP(VPCMPGTD_VdqHdqWdqR, xmm_pcmpgtd)
75 AVX_2OP(VPCMPGTQ_VdqHdqWdqR, xmm_pcmpgtq)
76 
77 AVX_2OP(VPMINSB_VdqHdqWdqR, xmm_pminsb)
78 AVX_2OP(VPMINSW_VdqHdqWdqR, xmm_pminsw)
79 AVX_2OP(VPMINSD_VdqHdqWdqR, xmm_pminsd)
80 AVX_2OP(VPMINSQ_VdqHdqWdqR, xmm_pminsq)
81 AVX_2OP(VPMINUB_VdqHdqWdqR, xmm_pminub)
82 AVX_2OP(VPMINUW_VdqHdqWdqR, xmm_pminuw)
83 AVX_2OP(VPMINUD_VdqHdqWdqR, xmm_pminud)
84 AVX_2OP(VPMINUQ_VdqHdqWdqR, xmm_pminuq)
85 AVX_2OP(VPMAXSB_VdqHdqWdqR, xmm_pmaxsb)
86 AVX_2OP(VPMAXSW_VdqHdqWdqR, xmm_pmaxsw)
87 AVX_2OP(VPMAXSD_VdqHdqWdqR, xmm_pmaxsd)
88 AVX_2OP(VPMAXSQ_VdqHdqWdqR, xmm_pmaxsq)
89 AVX_2OP(VPMAXUB_VdqHdqWdqR, xmm_pmaxub)
90 AVX_2OP(VPMAXUW_VdqHdqWdqR, xmm_pmaxuw)
91 AVX_2OP(VPMAXUD_VdqHdqWdqR, xmm_pmaxud)
92 AVX_2OP(VPMAXUQ_VdqHdqWdqR, xmm_pmaxuq)
93 
94 AVX_2OP(VPSIGNB_VdqHdqWdqR, xmm_psignb)
95 AVX_2OP(VPSIGNW_VdqHdqWdqR, xmm_psignw)
96 AVX_2OP(VPSIGND_VdqHdqWdqR, xmm_psignd)
97 
98 AVX_2OP(VPSUBSB_VdqHdqWdqR, xmm_psubsb)
99 AVX_2OP(VPSUBSW_VdqHdqWdqR, xmm_psubsw)
100 AVX_2OP(VPSUBUSB_VdqHdqWdqR, xmm_psubusb)
101 AVX_2OP(VPSUBUSW_VdqHdqWdqR, xmm_psubusw)
102 AVX_2OP(VPADDSB_VdqHdqWdqR, xmm_paddsb)
103 AVX_2OP(VPADDSW_VdqHdqWdqR, xmm_paddsw)
104 AVX_2OP(VPADDUSB_VdqHdqWdqR, xmm_paddusb)
105 AVX_2OP(VPADDUSW_VdqHdqWdqR, xmm_paddusw)
106 
107 AVX_2OP(VPHADDW_VdqHdqWdqR, xmm_phaddw)
108 AVX_2OP(VPHADDD_VdqHdqWdqR, xmm_phaddd)
109 AVX_2OP(VPHADDSW_VdqHdqWdqR, xmm_phaddsw)
110 AVX_2OP(VPHSUBW_VdqHdqWdqR, xmm_phsubw)
111 AVX_2OP(VPHSUBD_VdqHdqWdqR, xmm_phsubd)
112 AVX_2OP(VPHSUBSW_VdqHdqWdqR, xmm_phsubsw)
113 
114 AVX_2OP(VPAVGB_VdqHdqWdqR, xmm_pavgb)
115 AVX_2OP(VPAVGW_VdqHdqWdqR, xmm_pavgw)
116 
117 AVX_2OP(VPACKUSWB_VdqHdqWdqR, xmm_packuswb)
118 AVX_2OP(VPACKSSWB_VdqHdqWdqR, xmm_packsswb)
119 AVX_2OP(VPACKUSDW_VdqHdqWdqR, xmm_packusdw)
120 AVX_2OP(VPACKSSDW_VdqHdqWdqR, xmm_packssdw)
121 
122 AVX_2OP(VPUNPCKLBW_VdqHdqWdqR, xmm_punpcklbw)
123 AVX_2OP(VPUNPCKLWD_VdqHdqWdqR, xmm_punpcklwd)
124 AVX_2OP(VPUNPCKHBW_VdqHdqWdqR, xmm_punpckhbw)
125 AVX_2OP(VPUNPCKHWD_VdqHdqWdqR, xmm_punpckhwd)
126 
127 AVX_2OP(VPMULLQ_VdqHdqWdqR, xmm_pmullq)
128 AVX_2OP(VPMULLD_VdqHdqWdqR, xmm_pmulld)
129 AVX_2OP(VPMULLW_VdqHdqWdqR, xmm_pmullw)
130 AVX_2OP(VPMULHW_VdqHdqWdqR, xmm_pmulhw)
131 AVX_2OP(VPMULHUW_VdqHdqWdqR, xmm_pmulhuw)
132 AVX_2OP(VPMULDQ_VdqHdqWdqR, xmm_pmuldq)
133 AVX_2OP(VPMULUDQ_VdqHdqWdqR, xmm_pmuludq)
134 AVX_2OP(VPMULHRSW_VdqHdqWdqR, xmm_pmulhrsw)
135 
136 AVX_2OP(VPMADDWD_VdqHdqWdqR, xmm_pmaddwd)
137 AVX_2OP(VPMADDUBSW_VdqHdqWdqR, xmm_pmaddubsw)
138 
139 AVX_2OP(VPSADBW_VdqHdqWdqR, xmm_psadbw)
140 
141 AVX_2OP(VPSRAVW_VdqHdqWdqR, xmm_psravw)
142 AVX_2OP(VPSRAVD_VdqHdqWdqR, xmm_psravd)
143 AVX_2OP(VPSRAVQ_VdqHdqWdqR, xmm_psravq)
144 AVX_2OP(VPSLLVW_VdqHdqWdqR, xmm_psllvw)
145 AVX_2OP(VPSLLVD_VdqHdqWdqR, xmm_psllvd)
146 AVX_2OP(VPSLLVQ_VdqHdqWdqR, xmm_psllvq)
147 AVX_2OP(VPSRLVW_VdqHdqWdqR, xmm_psrlvw)
148 AVX_2OP(VPSRLVD_VdqHdqWdqR, xmm_psrlvd)
149 AVX_2OP(VPSRLVQ_VdqHdqWdqR, xmm_psrlvq)
150 AVX_2OP(VPROLVD_VdqHdqWdqR, xmm_prolvd)
151 AVX_2OP(VPROLVQ_VdqHdqWdqR, xmm_prolvq)
152 AVX_2OP(VPRORVD_VdqHdqWdqR, xmm_prorvd)
153 AVX_2OP(VPRORVQ_VdqHdqWdqR, xmm_prorvq)
154 
155 #define AVX_1OP(HANDLER, func)                                                             \
156   /* AVX instruction with single src operand */                                            \
157   void BX_CPP_AttrRegparmN(1) BX_CPU_C :: HANDLER (bxInstruction_c *i)                     \
158   {                                                                                        \
159     BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());                                    \
160     unsigned len = i->getVL();                                                             \
161                                                                                            \
162     for (unsigned n=0; n < len; n++)                                                       \
163       (func)(&op.vmm128(n));                                                               \
164                                                                                            \
165     BX_WRITE_AVX_REGZ(i->dst(), op, len);                                                  \
166                                                                                            \
167     BX_NEXT_INSTR(i);                                                                      \
168   }
169 
170 AVX_1OP(VPABSB_VdqWdqR, xmm_pabsb)
171 AVX_1OP(VPABSW_VdqWdqR, xmm_pabsw)
172 AVX_1OP(VPABSD_VdqWdqR, xmm_pabsd)
173 AVX_1OP(VPABSQ_VdqWdqR, xmm_pabsq)
174 
175 #define AVX_PSHIFT(HANDLER, func)                                                          \
176   /* AVX packed shift instruction */                                                       \
177   void BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i)                      \
178   {                                                                                        \
179     BxPackedAvxRegister op  = BX_READ_AVX_REG(i->src1());                                  \
180     Bit64u count = BX_READ_XMM_REG_LO_QWORD(i->src2());                                    \
181     unsigned len = i->getVL();                                                             \
182                                                                                            \
183     for (unsigned n=0; n < len; n++)                                                       \
184       (func)(&op.vmm128(n), count);                                                        \
185                                                                                            \
186     BX_WRITE_AVX_REGZ(i->dst(), op, len);                                                  \
187                                                                                            \
188     BX_NEXT_INSTR(i);                                                                      \
189   }
190 
191 AVX_PSHIFT(VPSRLW_VdqHdqWdqR, xmm_psrlw);
192 AVX_PSHIFT(VPSRLD_VdqHdqWdqR, xmm_psrld);
193 AVX_PSHIFT(VPSRLQ_VdqHdqWdqR, xmm_psrlq);
194 AVX_PSHIFT(VPSRAW_VdqHdqWdqR, xmm_psraw);
195 AVX_PSHIFT(VPSRAD_VdqHdqWdqR, xmm_psrad);
196 AVX_PSHIFT(VPSRAQ_VdqHdqWdqR, xmm_psraq);
197 AVX_PSHIFT(VPSLLW_VdqHdqWdqR, xmm_psllw);
198 AVX_PSHIFT(VPSLLD_VdqHdqWdqR, xmm_pslld);
199 AVX_PSHIFT(VPSLLQ_VdqHdqWdqR, xmm_psllq);
200 
201 #define AVX_PSHIFT_IMM(HANDLER, func)                                                      \
202   /* AVX packed shift with imm8 instruction */                                             \
203   void BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i)                      \
204   {                                                                                        \
205     BxPackedAvxRegister op  = BX_READ_AVX_REG(i->src());                                   \
206     unsigned len = i->getVL();                                                             \
207                                                                                            \
208     for (unsigned n=0; n < len; n++)                                                       \
209       (func)(&op.vmm128(n), i->Ib());                                                      \
210                                                                                            \
211     BX_WRITE_AVX_REGZ(i->dst(), op, len);                                                  \
212                                                                                            \
213     BX_NEXT_INSTR(i);                                                                      \
214   }
215 
216 AVX_PSHIFT_IMM(VPSRLW_UdqIb, xmm_psrlw);
217 AVX_PSHIFT_IMM(VPSRLD_UdqIb, xmm_psrld);
218 AVX_PSHIFT_IMM(VPSRLQ_UdqIb, xmm_psrlq);
219 AVX_PSHIFT_IMM(VPSRAW_UdqIb, xmm_psraw);
220 AVX_PSHIFT_IMM(VPSRAD_UdqIb, xmm_psrad);
221 AVX_PSHIFT_IMM(VPSRAQ_UdqIb, xmm_psraq);
222 AVX_PSHIFT_IMM(VPSLLW_UdqIb, xmm_psllw);
223 AVX_PSHIFT_IMM(VPSLLD_UdqIb, xmm_pslld);
224 AVX_PSHIFT_IMM(VPSLLQ_UdqIb, xmm_psllq);
225 AVX_PSHIFT_IMM(VPROLD_UdqIb, xmm_prold);
226 AVX_PSHIFT_IMM(VPROLQ_UdqIb, xmm_prolq);
227 AVX_PSHIFT_IMM(VPRORD_UdqIb, xmm_prord);
228 AVX_PSHIFT_IMM(VPRORQ_UdqIb, xmm_prorq);
229 
230 AVX_PSHIFT_IMM(VPSRLDQ_UdqIb, xmm_psrldq);
231 AVX_PSHIFT_IMM(VPSLLDQ_UdqIb, xmm_pslldq);
232 
VPSHUFHW_VdqWdqIbR(bxInstruction_c * i)233 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHUFHW_VdqWdqIbR(bxInstruction_c *i)
234 {
235   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
236   Bit8u order = i->Ib();
237   unsigned len = i->getVL();
238 
239   result.clear();
240 
241   for (unsigned n=0; n < len; n++)
242     xmm_pshufhw(&result.vmm128(n), &op.vmm128(n), order);
243 
244   BX_WRITE_AVX_REG(i->dst(), result);
245   BX_NEXT_INSTR(i);
246 }
247 
VPSHUFLW_VdqWdqIbR(bxInstruction_c * i)248 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHUFLW_VdqWdqIbR(bxInstruction_c *i)
249 {
250   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
251   Bit8u order = i->Ib();
252   unsigned len = i->getVL();
253 
254   result.clear();
255 
256   for (unsigned n=0; n < len; n++)
257     xmm_pshuflw(&result.vmm128(n), &op.vmm128(n), order);
258 
259   BX_WRITE_AVX_REG(i->dst(), result);
260   BX_NEXT_INSTR(i);
261 }
262 
VPSHUFB_VdqHdqWdqR(bxInstruction_c * i)263 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPSHUFB_VdqHdqWdqR(bxInstruction_c *i)
264 {
265   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
266   BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
267   unsigned len = i->getVL();
268 
269   result.clear();
270 
271   for (unsigned n=0; n < len; n++)
272     xmm_pshufb(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n));
273 
274   BX_WRITE_AVX_REG(i->dst(), result);
275   BX_NEXT_INSTR(i);
276 }
277 
VMPSADBW_VdqHdqWdqIbR(bxInstruction_c * i)278 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMPSADBW_VdqHdqWdqIbR(bxInstruction_c *i)
279 {
280   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
281   BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
282 
283   result.clear();
284 
285   Bit8u control = i->Ib();
286   unsigned len = i->getVL();
287 
288   for (unsigned n=0; n < len; n++) {
289     xmm_mpsadbw(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n), control & 0x7);
290     control >>= 3;
291   }
292 
293   BX_WRITE_AVX_REG(i->dst(), result);
294   BX_NEXT_INSTR(i);
295 }
296 
VPBLENDW_VdqHdqWdqIbR(bxInstruction_c * i)297 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBLENDW_VdqHdqWdqIbR(bxInstruction_c *i)
298 {
299   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
300 
301   unsigned len = i->getVL();
302   Bit8u mask = i->Ib();
303 
304   for (unsigned n=0; n < len; n++)
305     xmm_pblendw(&op1.vmm128(n), &op2.vmm128(n), mask);
306 
307   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
308   BX_NEXT_INSTR(i);
309 }
310 
VPBROADCASTB_VdqWbR(bxInstruction_c * i)311 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTB_VdqWbR(bxInstruction_c *i)
312 {
313   BxPackedAvxRegister op;
314   unsigned len = i->getVL();
315   op.clear();
316 
317   Bit8u val_8 = BX_READ_XMM_REG_LO_BYTE(i->src());
318 
319   for (unsigned n=0; n < len; n++)
320     xmm_pbroadcastb(&op.vmm128(n), val_8);
321 
322   BX_WRITE_AVX_REG(i->dst(), op);
323   BX_NEXT_INSTR(i);
324 }
325 
VPBROADCASTW_VdqWwR(bxInstruction_c * i)326 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTW_VdqWwR(bxInstruction_c *i)
327 {
328   BxPackedAvxRegister op;
329   unsigned len = i->getVL();
330   op.clear();
331 
332   Bit16u val_16 = BX_READ_XMM_REG_LO_WORD(i->src());
333 
334   for (unsigned n=0; n < len; n++)
335     xmm_pbroadcastw(&op.vmm128(n), val_16);
336 
337   BX_WRITE_AVX_REG(i->dst(), op);
338   BX_NEXT_INSTR(i);
339 }
340 
VPBROADCASTD_VdqWdR(bxInstruction_c * i)341 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTD_VdqWdR(bxInstruction_c *i)
342 {
343   BxPackedAvxRegister op;
344   unsigned len = i->getVL();
345   op.clear();
346 
347   Bit32u val_32 = BX_READ_XMM_REG_LO_DWORD(i->src());
348 
349   for (unsigned n=0; n < len; n++)
350     xmm_pbroadcastd(&op.vmm128(n), val_32);
351 
352   BX_WRITE_AVX_REG(i->dst(), op);
353   BX_NEXT_INSTR(i);
354 }
355 
VPBROADCASTQ_VdqWqR(bxInstruction_c * i)356 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBROADCASTQ_VdqWqR(bxInstruction_c *i)
357 {
358   BxPackedAvxRegister op;
359   unsigned len = i->getVL();
360   op.clear();
361 
362   Bit64u val_64 = BX_READ_XMM_REG_LO_QWORD(i->src());
363 
364   for (unsigned n=0; n < len; n++)
365     xmm_pbroadcastq(&op.vmm128(n), val_64);
366 
367   BX_WRITE_AVX_REG(i->dst(), op);
368   BX_NEXT_INSTR(i);
369 }
370 
VPMOVSXBW_VdqWdqR(bxInstruction_c * i)371 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBW_VdqWdqR(bxInstruction_c *i)
372 {
373   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
374   BxPackedAvxRegister result;
375   unsigned len = i->getVL();
376 
377   for (unsigned n=0; n < WORD_ELEMENTS(len); n++)
378     result.vmm16s(n) = (Bit16s) op.ymmsbyte(n);
379 
380   BX_WRITE_AVX_REGZ(i->dst(), result, len);
381   BX_NEXT_INSTR(i);
382 }
383 
VPMOVSXBD_VdqWdqR(bxInstruction_c * i)384 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBD_VdqWdqR(bxInstruction_c *i)
385 {
386   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
387   BxPackedAvxRegister result;
388   unsigned len = i->getVL();
389 
390   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
391     result.vmm32s(n) = (Bit32s) op.xmmsbyte(n);
392 
393   BX_WRITE_AVX_REGZ(i->dst(), result, len);
394   BX_NEXT_INSTR(i);
395 }
396 
VPMOVSXBQ_VdqWdqR(bxInstruction_c * i)397 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBQ_VdqWdqR(bxInstruction_c *i)
398 {
399   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
400   BxPackedAvxRegister result;
401   unsigned len = i->getVL();
402 
403   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
404     result.vmm64s(n) = (Bit64s) op.xmmsbyte(n);
405 
406   BX_WRITE_AVX_REGZ(i->dst(), result, len);
407   BX_NEXT_INSTR(i);
408 }
409 
VPMOVSXWD_VdqWdqR(bxInstruction_c * i)410 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXWD_VdqWdqR(bxInstruction_c *i)
411 {
412   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
413   BxPackedAvxRegister result;
414   unsigned len = i->getVL();
415 
416   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
417     result.vmm32s(n) = (Bit32s) op.ymm16s(n);
418 
419   BX_WRITE_AVX_REGZ(i->dst(), result, len);
420   BX_NEXT_INSTR(i);
421 }
422 
VPMOVSXWQ_VdqWdqR(bxInstruction_c * i)423 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXWQ_VdqWdqR(bxInstruction_c *i)
424 {
425   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
426   BxPackedAvxRegister result;
427   unsigned len = i->getVL();
428 
429   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
430     result.vmm64s(n) = (Bit64s) op.xmm16s(n);
431 
432   BX_WRITE_AVX_REGZ(i->dst(), result, len);
433   BX_NEXT_INSTR(i);
434 }
435 
VPMOVSXDQ_VdqWdqR(bxInstruction_c * i)436 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXDQ_VdqWdqR(bxInstruction_c *i)
437 {
438   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
439   BxPackedAvxRegister result;
440   unsigned len = i->getVL();
441 
442   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
443     result.vmm64s(n) = (Bit64s) op.ymm32s(n);
444 
445   BX_WRITE_AVX_REGZ(i->dst(), result, len);
446   BX_NEXT_INSTR(i);
447 }
448 
VPMOVZXBW_VdqWdqR(bxInstruction_c * i)449 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBW_VdqWdqR(bxInstruction_c *i)
450 {
451   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
452   BxPackedAvxRegister result;
453   unsigned len = i->getVL();
454 
455   for (unsigned n=0; n < WORD_ELEMENTS(len); n++)
456     result.vmm16u(n) = (Bit16u) op.ymmubyte(n);
457 
458   BX_WRITE_AVX_REGZ(i->dst(), result, len);
459   BX_NEXT_INSTR(i);
460 }
461 
VPMOVZXBD_VdqWdqR(bxInstruction_c * i)462 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBD_VdqWdqR(bxInstruction_c *i)
463 {
464   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
465   BxPackedAvxRegister result;
466   unsigned len = i->getVL();
467 
468   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
469     result.vmm32u(n) = (Bit32u) op.xmmubyte(n);
470 
471   BX_WRITE_AVX_REGZ(i->dst(), result, len);
472   BX_NEXT_INSTR(i);
473 }
474 
VPMOVZXBQ_VdqWdqR(bxInstruction_c * i)475 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBQ_VdqWdqR(bxInstruction_c *i)
476 {
477   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
478   BxPackedAvxRegister result;
479   unsigned len = i->getVL();
480 
481   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
482     result.vmm64u(n) = (Bit64u) op.xmmubyte(n);
483 
484   BX_WRITE_AVX_REGZ(i->dst(), result, len);
485   BX_NEXT_INSTR(i);
486 }
487 
VPMOVZXWD_VdqWdqR(bxInstruction_c * i)488 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXWD_VdqWdqR(bxInstruction_c *i)
489 {
490   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
491   BxPackedAvxRegister result;
492   unsigned len = i->getVL();
493 
494   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
495     result.vmm32u(n) = (Bit32u) op.ymm16u(n);
496 
497   BX_WRITE_AVX_REGZ(i->dst(), result, len);
498   BX_NEXT_INSTR(i);
499 }
500 
VPMOVZXWQ_VdqWdqR(bxInstruction_c * i)501 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXWQ_VdqWdqR(bxInstruction_c *i)
502 {
503   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
504   BxPackedAvxRegister result;
505   unsigned len = i->getVL();
506 
507   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
508     result.vmm64u(n) = (Bit64u) op.xmm16u(n);
509 
510   BX_WRITE_AVX_REGZ(i->dst(), result, len);
511   BX_NEXT_INSTR(i);
512 }
513 
VPMOVZXDQ_VdqWdqR(bxInstruction_c * i)514 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXDQ_VdqWdqR(bxInstruction_c *i)
515 {
516   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
517   BxPackedAvxRegister result;
518   unsigned len = i->getVL();
519 
520   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
521     result.vmm64u(n) = (Bit64u) op.ymm32u(n);
522 
523   BX_WRITE_AVX_REGZ(i->dst(), result, len);
524   BX_NEXT_INSTR(i);
525 }
526 
VPALIGNR_VdqHdqWdqIbR(bxInstruction_c * i)527 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPALIGNR_VdqHdqWdqIbR(bxInstruction_c *i)
528 {
529   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
530   unsigned len = i->getVL();
531 
532   for (unsigned n=0; n<len; n++)
533     xmm_palignr(&op2.vmm128(n), &op1.vmm128(n), i->Ib());
534 
535   BX_WRITE_AVX_REGZ(i->dst(), op2, i->getVL());
536 
537   BX_NEXT_INSTR(i);
538 }
539 
VPERMD_VdqHdqWdqR(bxInstruction_c * i)540 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMD_VdqHdqWdqR(bxInstruction_c *i)
541 {
542   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
543   BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2()), result;
544 
545   for (unsigned n=0;n < 8;n++)
546     result.ymm32u(n) = op2.ymm32u(op1.ymm32u(n) & 0x7);
547 
548   BX_WRITE_YMM_REGZ(i->dst(), result);
549   BX_NEXT_INSTR(i);
550 }
551 
VPERMQ_VdqWdqIbR(bxInstruction_c * i)552 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMQ_VdqWdqIbR(bxInstruction_c *i)
553 {
554   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src()), result;
555 
556   ymm_vpermq(&result, &op, i->Ib());
557 
558   BX_WRITE_YMM_REGZ(i->dst(), result);
559   BX_NEXT_INSTR(i);
560 }
561 
562 #endif
563