1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx.cc 13520 2018-05-27 19:09:59Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 //   Copyright (c) 2011-2018 Stanislav Shwartsman
6 //          Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 //  You should have received a copy of the GNU Lesser General Public
19 //  License along with this library; if not, write to the Free Software
20 //  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23 
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28 
29 #if BX_SUPPORT_AVX
30 
31 #include "simd_int.h"
32 
33 /* VZEROUPPER: VEX.128.0F.77 (VEX.W ignore, VEX.VVV #UD) */
VZEROUPPER(bxInstruction_c * i)34 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VZEROUPPER(bxInstruction_c *i)
35 {
36   for(unsigned index=0; index < 16; index++) // clear only 16 registers even if AVX-512 is present
37   {
38     if (index < 8 || long64_mode())
39       BX_CLEAR_AVX_HIGH128(index);
40   }
41 
42   BX_NEXT_INSTR(i);
43 }
44 
45 /* VZEROALL: VEX.256.0F.77 (VEX.W ignore, VEX.VVV #UD) */
VZEROALL(bxInstruction_c * i)46 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VZEROALL(bxInstruction_c *i)
47 {
48   for(unsigned index=0; index < 16; index++) // clear only 16 registers even if AVX-512 is present
49   {
50     if (index < 8 || long64_mode())
51       BX_CLEAR_AVX_REG(index);
52   }
53 
54   BX_NEXT_INSTR(i);
55 }
56 
57 /* VMOVSS: VEX.F3.0F 10 (VEX.W ignore) */
VMOVSS_VssHpsWssR(bxInstruction_c * i)58 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_VssHpsWssR(bxInstruction_c *i)
59 {
60   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src1());
61 
62   op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->src2());
63 
64   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
65 
66   BX_NEXT_INSTR(i);
67 }
68 
69 /* VMOVSS: VEX.F2.0F 10 (VEX.W ignore) */
VMOVSD_VsdHpdWsdR(bxInstruction_c * i)70 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_VsdHpdWsdR(bxInstruction_c *i)
71 {
72   BxPackedXmmRegister op;
73 
74   op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src2());
75   op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
76 
77   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
78 
79   BX_NEXT_INSTR(i);
80 }
81 
82 /* VMOVAPS: VEX    0F 28 (VEX.W ignore, VEX.VVV #UD) */
83 /* VMOVAPD: VEX.66.0F 28 (VEX.W ignore, VEX.VVV #UD) */
84 /* VMOVDQA: VEX.66.0F 6F (VEX.W ignore, VEX.VVV #UD) */
VMOVAPS_VpsWpsR(bxInstruction_c * i)85 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_VpsWpsR(bxInstruction_c *i)
86 {
87   BX_WRITE_AVX_REGZ(i->dst(), BX_READ_AVX_REG(i->src()), i->getVL());
88 
89   BX_NEXT_INSTR(i);
90 }
91 
VMOVAPS_VpsWpsM(bxInstruction_c * i)92 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_VpsWpsM(bxInstruction_c *i)
93 {
94   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
95   unsigned len = i->getVL();
96 
97 #if BX_SUPPORT_EVEX
98   if (len == BX_VL512)
99     read_virtual_zmmword_aligned(i->seg(), eaddr, &BX_READ_AVX_REG(i->dst()));
100   else
101 #endif
102   {
103     if (len == BX_VL256) {
104       read_virtual_ymmword_aligned(i->seg(), eaddr, &BX_READ_YMM_REG(i->dst()));
105       BX_CLEAR_AVX_HIGH256(i->dst());
106     }
107     else {
108       read_virtual_xmmword_aligned(i->seg(), eaddr, &BX_READ_XMM_REG(i->dst()));
109       BX_CLEAR_AVX_HIGH128(i->dst());
110     }
111   }
112 
113   BX_NEXT_INSTR(i);
114 }
115 
116 /* VMOVUPS: VEX    0F 10 (VEX.W ignore, VEX.VVV #UD) */
117 /* VMOVUPD: VEX.66.0F 10 (VEX.W ignore, VEX.VVV #UD) */
118 /* VMOVDQU: VEX.F3.0F 6F (VEX.W ignore, VEX.VVV #UD) */
VMOVUPS_VpsWpsM(bxInstruction_c * i)119 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_VpsWpsM(bxInstruction_c *i)
120 {
121   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
122   unsigned len = i->getVL();
123 
124 #if BX_SUPPORT_EVEX
125   if (len == BX_VL512)
126     read_virtual_zmmword(i->seg(), eaddr, &BX_READ_AVX_REG(i->dst()));
127   else
128 #endif
129   {
130     if (len == BX_VL256) {
131       read_virtual_ymmword(i->seg(), eaddr, &BX_READ_YMM_REG(i->dst()));
132       BX_CLEAR_AVX_HIGH256(i->dst());
133     }
134     else {
135       read_virtual_xmmword(i->seg(), eaddr, &BX_READ_XMM_REG(i->dst()));
136       BX_CLEAR_AVX_HIGH128(i->dst());
137     }
138   }
139 
140   BX_NEXT_INSTR(i);
141 }
142 
143 /* VMOVUPS: VEX    0F 11 (VEX.W ignore, VEX.VVV #UD) */
144 /* VMOVUPD: VEX.66.0F 11 (VEX.W ignore, VEX.VVV #UD) */
145 /* VMOVDQU: VEX.66.0F 7F (VEX.W ignore, VEX.VVV #UD) */
VMOVUPS_WpsVpsM(bxInstruction_c * i)146 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_WpsVpsM(bxInstruction_c *i)
147 {
148   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
149   unsigned len = i->getVL();
150 
151 #if BX_SUPPORT_EVEX
152   if (len == BX_VL512)
153     write_virtual_zmmword(i->seg(), eaddr, &BX_READ_AVX_REG(i->src()));
154   else
155 #endif
156   {
157     if (len == BX_VL256)
158       write_virtual_ymmword(i->seg(), eaddr, &BX_READ_YMM_REG(i->src()));
159     else
160       write_virtual_xmmword(i->seg(), eaddr, &BX_READ_XMM_REG(i->src()));
161   }
162 
163   BX_NEXT_INSTR(i);
164 }
165 
166 /* VMOVAPS: VEX    0F 29 (VEX.W ignore, VEX.VVV #UD) */
167 /* VMOVAPD: VEX.66.0F 29 (VEX.W ignore, VEX.VVV #UD) */
168 /* VMOVDQA: VEX.66.0F 7F (VEX.W ignore, VEX.VVV #UD) */
VMOVAPS_WpsVpsM(bxInstruction_c * i)169 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_WpsVpsM(bxInstruction_c *i)
170 {
171   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
172   unsigned len = i->getVL();
173 
174 #if BX_SUPPORT_EVEX
175   if (len == BX_VL512)
176     write_virtual_zmmword_aligned(i->seg(), eaddr, &BX_READ_AVX_REG(i->src()));
177   else
178 #endif
179   {
180     if (len == BX_VL256)
181       write_virtual_ymmword_aligned(i->seg(), eaddr, &BX_READ_YMM_REG(i->src()));
182     else
183       write_virtual_xmmword_aligned(i->seg(), eaddr, &BX_READ_XMM_REG(i->src()));
184   }
185 
186   BX_NEXT_INSTR(i);
187 }
188 
189 /* VEX.F2.0F 12 (VEX.W ignore, VEX.VVV #UD) */
VMOVDDUP_VpdWpdR(bxInstruction_c * i)190 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDDUP_VpdWpdR(bxInstruction_c *i)
191 {
192   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
193   unsigned len = i->getVL();
194 
195   for (unsigned n=0; n < QWORD_ELEMENTS(len); n+=2) {
196     op.vmm64u(n+1) = op.vmm64u(n);
197   }
198 
199   BX_WRITE_AVX_REGZ(i->dst(), op, len);
200 
201   BX_NEXT_INSTR(i);
202 }
203 
204 /* VEX.F3.0F 12 (VEX.W ignore, VEX.VVV #UD) */
VMOVSLDUP_VpsWpsR(bxInstruction_c * i)205 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSLDUP_VpsWpsR(bxInstruction_c *i)
206 {
207   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
208   unsigned len = i->getVL();
209 
210   for (unsigned n=0; n < DWORD_ELEMENTS(len); n+=2) {
211     op.vmm32u(n+1) = op.vmm32u(n);
212   }
213 
214   BX_WRITE_AVX_REGZ(i->dst(), op, len);
215 
216   BX_NEXT_INSTR(i);
217 }
218 
219 /* VEX.F3.0F 12 (VEX.W ignore, VEX.VVV #UD) */
VMOVSHDUP_VpsWpsR(bxInstruction_c * i)220 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSHDUP_VpsWpsR(bxInstruction_c *i)
221 {
222   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
223   unsigned len = i->getVL();
224 
225   for (unsigned n=0; n < DWORD_ELEMENTS(len); n+=2) {
226     op.vmm32u(n) = op.vmm32u(n+1);
227   }
228 
229   BX_WRITE_AVX_REGZ(i->dst(), op, len);
230 
231   BX_NEXT_INSTR(i);
232 }
233 
234 /* VEX.0F 12 (VEX.W ignore) */
VMOVHLPS_VpsHpsWps(bxInstruction_c * i)235 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVHLPS_VpsHpsWps(bxInstruction_c *i)
236 {
237   BxPackedXmmRegister op;
238 
239   op.xmm64u(0) = BX_READ_XMM_REG_HI_QWORD(i->src2());
240   op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
241 
242   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
243 
244   BX_NEXT_INSTR(i);
245 }
246 
247 /* VEX.66.0F 12 (VEX.W ignore) */
VMOVLPD_VpdHpdMq(bxInstruction_c * i)248 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVLPD_VpdHpdMq(bxInstruction_c *i)
249 {
250   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
251 
252   BxPackedXmmRegister op;
253 
254   op.xmm64u(0) = read_virtual_qword(i->seg(), eaddr);
255   op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
256 
257   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
258 
259   BX_NEXT_INSTR(i);
260 }
261 
262 /* VEX.0F 16 (VEX.W ignore) */
VMOVLHPS_VpsHpsWps(bxInstruction_c * i)263 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVLHPS_VpsHpsWps(bxInstruction_c *i)
264 {
265   BxPackedXmmRegister op;
266 
267   op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src1());
268   op.xmm64u(1) = BX_READ_XMM_REG_LO_QWORD(i->src2());
269 
270   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
271 
272   BX_NEXT_INSTR(i);
273 }
274 
275 /* VEX.66.0F 16 (VEX.W ignore) */
VMOVHPD_VpdHpdMq(bxInstruction_c * i)276 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVHPD_VpdHpdMq(bxInstruction_c *i)
277 {
278   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
279 
280   BxPackedXmmRegister op;
281 
282   op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src1());
283   op.xmm64u(1) = read_virtual_qword(i->seg(), eaddr);
284 
285   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
286 
287   BX_NEXT_INSTR(i);
288 }
289 
290 /* VEX.0F 50 (VEX.W ignore, VEX.VVV #UD) */
VMOVMSKPS_GdUps(bxInstruction_c * i)291 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVMSKPS_GdUps(bxInstruction_c *i)
292 {
293   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
294   unsigned len = i->getVL();
295   Bit32u mask = 0;
296 
297   for (unsigned n=0; n < len; n++)
298     mask |= xmm_pmovmskd(&op.ymm128(n)) << (4*n);
299 
300   BX_WRITE_32BIT_REGZ(i->dst(), mask);
301 
302   BX_NEXT_INSTR(i);
303 }
304 
305 /* VEX.66.0F 50 (VEX.W ignore, VEX.VVV #UD) */
VMOVMSKPD_GdUpd(bxInstruction_c * i)306 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVMSKPD_GdUpd(bxInstruction_c *i)
307 {
308   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
309   unsigned len = i->getVL();
310   Bit32u mask = 0;
311 
312   for (unsigned n=0; n < len; n++)
313     mask |= xmm_pmovmskq(&op.ymm128(n)) << (2*n);
314 
315   BX_WRITE_32BIT_REGZ(i->dst(), mask);
316 
317   BX_NEXT_INSTR(i);
318 }
319 
320 /* VEX.66.0F 50 (VEX.W ignore, VEX.VVV #UD) */
VPMOVMSKB_GdUdq(bxInstruction_c * i)321 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVMSKB_GdUdq(bxInstruction_c *i)
322 {
323   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
324   unsigned len = i->getVL();
325   Bit32u mask = 0;
326 
327   for (unsigned n=0; n < len; n++)
328     mask |= xmm_pmovmskb(&op.ymm128(n)) << (16*n);
329 
330   BX_WRITE_32BIT_REGZ(i->dst(), mask);
331 
332   BX_NEXT_INSTR(i);
333 }
334 
335 /* Opcode: VEX.0F.C6 (VEX.W ignore) */
VSHUFPS_VpsHpsWpsIbR(bxInstruction_c * i)336 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHUFPS_VpsHpsWpsIbR(bxInstruction_c *i)
337 {
338   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
339   BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
340   unsigned len = i->getVL();
341 
342   result.clear();
343 
344   for (unsigned n=0; n < len; n++)
345     xmm_shufps(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n), i->Ib());
346 
347   BX_WRITE_AVX_REG(i->dst(), result);
348 
349   BX_NEXT_INSTR(i);
350 }
351 
352 /* Opcode: VEX.66.0F.C6 (VEX.W ignore) */
VSHUFPD_VpdHpdWpdIbR(bxInstruction_c * i)353 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHUFPD_VpdHpdWpdIbR(bxInstruction_c *i)
354 {
355   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
356   BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
357 
358   unsigned len = i->getVL();
359   Bit8u order = i->Ib();
360 
361   result.clear();
362 
363   for (unsigned n=0; n < len; n++) {
364     xmm_shufpd(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n), order);
365     order >>= 2;
366   }
367 
368   BX_WRITE_AVX_REG(i->dst(), result);
369 
370   BX_NEXT_INSTR(i);
371 }
372 
373 /* Opcode: VEX.66.0F.38.17 (VEX.W ignore, VEX.VVV #UD) */
VPTEST_VdqWdqR(bxInstruction_c * i)374 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPTEST_VdqWdqR(bxInstruction_c *i)
375 {
376   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->dst()), op2 = BX_READ_YMM_REG(i->src());
377   unsigned len = i->getVL();
378 
379   unsigned result = EFlagsZFMask | EFlagsCFMask;
380 
381   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
382     if ((op2.ymm64u(n) &  op1.ymm64u(n)) != 0) result &= ~EFlagsZFMask;
383     if ((op2.ymm64u(n) & ~op1.ymm64u(n)) != 0) result &= ~EFlagsCFMask;
384   }
385 
386   setEFlagsOSZAPC(result);
387 
388   BX_NEXT_INSTR(i);
389 }
390 
391 /* Opcode: VEX.256.66.0F.38.1A (VEX.W=0, VEX.VVV #UD) */
VBROADCASTF128_VdqMdq(bxInstruction_c * i)392 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBROADCASTF128_VdqMdq(bxInstruction_c *i)
393 {
394   BxPackedAvxRegister dst;
395   BxPackedXmmRegister src;
396   unsigned len = i->getVL();
397 
398   dst.clear();
399 
400   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
401   read_virtual_xmmword(i->seg(), eaddr, &src);
402 
403   for (unsigned n=0; n < len; n++) {
404     dst.vmm128(n) = src;
405   }
406 
407   BX_WRITE_AVX_REG(i->dst(), dst);
408 
409   BX_NEXT_INSTR(i);
410 }
411 
412 /* Opcode: VEX.66.0F.3A 0C (VEX.W ignore) */
VBLENDPS_VpsHpsWpsIbR(bxInstruction_c * i)413 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDPS_VpsHpsWpsIbR(bxInstruction_c *i)
414 {
415   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
416   unsigned len = i->getVL();
417   Bit8u mask = i->Ib();
418 
419   for (unsigned n=0; n < len; n++) {
420     xmm_blendps(&op1.ymm128(n), &op2.ymm128(n), mask);
421     mask >>= 4;
422   }
423 
424   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
425 
426   BX_NEXT_INSTR(i);
427 }
428 
429 /* Opcode: VEX.66.0F.3A 0D (VEX.W ignore) */
VBLENDPD_VpdHpdWpdIbR(bxInstruction_c * i)430 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDPD_VpdHpdWpdIbR(bxInstruction_c *i)
431 {
432   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
433   unsigned len = i->getVL();
434   Bit8u mask = i->Ib();
435 
436   for (unsigned n=0; n < len; n++) {
437     xmm_blendpd(&op1.ymm128(n), &op2.ymm128(n), mask);
438     mask >>= 2;
439   }
440 
441   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
442 
443   BX_NEXT_INSTR(i);
444 }
445 
446 /* Opcode: VEX.66.0F.3A 4A (VEX.W=0) */
VBLENDVPS_VpsHpsWpsIbR(bxInstruction_c * i)447 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDVPS_VpsHpsWpsIbR(bxInstruction_c *i)
448 {
449   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2()),
450            mask = BX_READ_YMM_REG(i->src3());
451 
452   unsigned len = i->getVL();
453 
454   for (unsigned n=0; n < len; n++)
455     xmm_blendvps(&op1.ymm128(n), &op2.ymm128(n), &mask.ymm128(n));
456 
457   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
458 
459   BX_NEXT_INSTR(i);
460 }
461 
462 /* Opcode: VEX.66.0F.3A 4B (VEX.W=0) */
VBLENDVPD_VpdHpdWpdIbR(bxInstruction_c * i)463 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDVPD_VpdHpdWpdIbR(bxInstruction_c *i)
464 {
465   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2()),
466            mask = BX_READ_YMM_REG(i->src3());
467 
468   unsigned len = i->getVL();
469 
470   for (unsigned n=0; n < len; n++)
471     xmm_blendvpd(&op1.ymm128(n), &op2.ymm128(n), &mask.ymm128(n));
472 
473   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
474 
475   BX_NEXT_INSTR(i);
476 }
477 
478 /* Opcode: VEX.66.0F.3A 4C (VEX.W=0) */
VPBLENDVB_VdqHdqWdqIbR(bxInstruction_c * i)479 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBLENDVB_VdqHdqWdqIbR(bxInstruction_c *i)
480 {
481   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2()),
482            mask = BX_READ_YMM_REG(i->src3());
483 
484   unsigned len = i->getVL();
485 
486   for (unsigned n=0; n < len; n++)
487     xmm_pblendvb(&op1.ymm128(n), &op2.ymm128(n), &mask.ymm128(n));
488 
489   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
490 
491   BX_NEXT_INSTR(i);
492 }
493 
494 /* Opcode: VEX.66.0F.3A 18 (VEX.W=0) */
VINSERTF128_VdqHdqWdqIbR(bxInstruction_c * i)495 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF128_VdqHdqWdqIbR(bxInstruction_c *i)
496 {
497   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src1());
498   unsigned len = i->getVL();
499   unsigned offset = i->Ib() & (len-1);
500 
501   op.vmm128(offset) = BX_READ_XMM_REG(i->src2());
502 
503   BX_WRITE_AVX_REGZ(i->dst(), op, len);
504   BX_NEXT_INSTR(i);
505 }
506 
507 /* Opcode: VEX.66.0F.3A 19 (VEX.W=0, VEX.VVV #UD) */
VEXTRACTF128_WdqVdqIbM(bxInstruction_c * i)508 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXTRACTF128_WdqVdqIbM(bxInstruction_c *i)
509 {
510   unsigned len = i->getVL(), offset = i->Ib() & (len - 1);
511   BxPackedXmmRegister op = BX_READ_AVX_REG_LANE(i->src(), offset);
512 
513   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
514   write_virtual_xmmword(i->seg(), eaddr, &op);
515 
516   BX_NEXT_INSTR(i);
517 }
518 
VEXTRACTF128_WdqVdqIbR(bxInstruction_c * i)519 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXTRACTF128_WdqVdqIbR(bxInstruction_c *i)
520 {
521   unsigned len = i->getVL(), offset = i->Ib() & (len - 1);
522   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), BX_READ_AVX_REG_LANE(i->src(), offset));
523   BX_NEXT_INSTR(i);
524 }
525 
526 /* Opcode: VEX.66.0F.38 0C (VEX.W=0) */
VPERMILPS_VpsHpsWpsR(bxInstruction_c * i)527 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMILPS_VpsHpsWpsR(bxInstruction_c *i)
528 {
529   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
530   BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
531   unsigned len = i->getVL();
532 
533   result.clear();
534 
535   for (unsigned n=0; n < len; n++)
536     xmm_permilps(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n));
537 
538   BX_WRITE_AVX_REG(i->dst(), result);
539 
540   BX_NEXT_INSTR(i);
541 }
542 
543 /* Opcode: VEX.66.0F.3A 05 (VEX.W=0) */
VPERMILPD_VpdHpdWpdR(bxInstruction_c * i)544 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMILPD_VpdHpdWpdR(bxInstruction_c *i)
545 {
546   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
547   BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
548   unsigned len = i->getVL();
549 
550   result.clear();
551 
552   for (unsigned n=0; n < len; n++)
553     xmm_permilpd(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n));
554 
555   BX_WRITE_AVX_REG(i->dst(), result);
556 
557   BX_NEXT_INSTR(i);
558 }
559 
560 /* Opcode: VEX.66.0F.3A 04 (VEX.W=0) */
VPERMILPS_VpsWpsIbR(bxInstruction_c * i)561 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMILPS_VpsWpsIbR(bxInstruction_c *i)
562 {
563   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src()), result;
564   unsigned len = i->getVL();
565   result.clear();
566 
567   for (unsigned n=0; n < len; n++)
568     xmm_shufps(&result.vmm128(n), &op1.vmm128(n), &op1.vmm128(n), i->Ib());
569 
570   BX_WRITE_AVX_REG(i->dst(), result);
571 
572   BX_NEXT_INSTR(i);
573 }
574 
575 /* Opcode: VEX.66.0F.3A 05 (VEX.W=0) */
VPERMILPD_VpdWpdIbR(bxInstruction_c * i)576 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMILPD_VpdWpdIbR(bxInstruction_c *i)
577 {
578   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src()), result;
579   unsigned len = i->getVL();
580   Bit8u order = i->Ib();
581 
582   result.clear();
583 
584   for (unsigned n=0; n < len; n++) {
585     xmm_shufpd(&result.vmm128(n), &op1.vmm128(n), &op1.vmm128(n), order);
586     order >>= 2;
587   }
588 
589   BX_WRITE_AVX_REG(i->dst(), result);
590 
591   BX_NEXT_INSTR(i);
592 }
593 
594 /* Opcode: VEX.66.0F.3A 06 (VEX.W=0) */
VPERM2F128_VdqHdqWdqIbR(bxInstruction_c * i)595 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERM2F128_VdqHdqWdqIbR(bxInstruction_c *i)
596 {
597   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
598   BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2()), result;
599   Bit8u order = i->Ib();
600 
601   for (unsigned n=0;n<2;n++) {
602 
603     if (order & 0x8) {
604       result.ymm128(n).clear();
605     }
606     else {
607       if (order & 0x2)
608         result.ymm128(n) = op2.ymm128(order & 0x1);
609       else
610         result.ymm128(n) = op1.ymm128(order & 0x1);
611     }
612 
613     order >>= 4;
614   }
615 
616   BX_WRITE_YMM_REGZ(i->dst(), result);
617 
618   BX_NEXT_INSTR(i);
619 }
620 
621 /* Opcode: VEX.66.0F.38 2C (VEX.W=0) */
VMASKMOVPS_VpsHpsMps(bxInstruction_c * i)622 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPS_VpsHpsMps(bxInstruction_c *i)
623 {
624   BxPackedYmmRegister mask = BX_READ_YMM_REG(i->src1());
625   BxPackedAvxRegister result;
626 
627   unsigned opmask  = xmm_pmovmskd(&mask.ymm128(1));
628            opmask <<= 4;
629            opmask |= xmm_pmovmskd(&mask.ymm128(0));
630 
631   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
632 
633   avx_masked_load32(i, eaddr, &result, opmask);
634 
635   BX_WRITE_AVX_REGZ(i->dst(), result, i->getVL());
636 
637   BX_NEXT_INSTR(i);
638 }
639 
640 /* Opcode: VEX.66.0F.38 2D (VEX.W=0) */
VMASKMOVPD_VpdHpdMpd(bxInstruction_c * i)641 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPD_VpdHpdMpd(bxInstruction_c *i)
642 {
643   BxPackedYmmRegister mask = BX_READ_YMM_REG(i->src1());
644   BxPackedAvxRegister result;
645 
646   unsigned opmask  = xmm_pmovmskq(&mask.ymm128(1));
647            opmask <<= 2;
648            opmask |= xmm_pmovmskq(&mask.ymm128(0));
649 
650   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
651 
652   avx_masked_load64(i, eaddr, &result, opmask);
653 
654   BX_WRITE_AVX_REGZ(i->dst(), result, i->getVL());
655 
656   BX_NEXT_INSTR(i);
657 }
658 
659 /* Opcode: VEX.66.0F.38 2C (VEX.W=0) */
VMASKMOVPS_MpsHpsVps(bxInstruction_c * i)660 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPS_MpsHpsVps(bxInstruction_c *i)
661 {
662   BxPackedYmmRegister mask = BX_READ_YMM_REG(i->src1());
663 
664   unsigned opmask  = xmm_pmovmskd(&mask.ymm128(1));
665            opmask <<= 4;
666            opmask |= xmm_pmovmskd(&mask.ymm128(0));
667 
668   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
669 
670   avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src2()), opmask);
671 
672   BX_NEXT_INSTR(i);
673 }
674 
675 /* Opcode: VEX.66.0F.38 2D (VEX.W=0) */
VMASKMOVPD_MpdHpdVpd(bxInstruction_c * i)676 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPD_MpdHpdVpd(bxInstruction_c *i)
677 {
678   BxPackedYmmRegister mask = BX_READ_YMM_REG(i->src1());
679 
680   unsigned opmask  = xmm_pmovmskq(&mask.ymm128(1));
681            opmask <<= 2;
682            opmask |= xmm_pmovmskq(&mask.ymm128(0));
683 
684   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
685 
686   avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src2()), opmask);
687 
688   BX_NEXT_INSTR(i);
689 }
690 
VPINSRB_VdqHdqEbIbR(bxInstruction_c * i)691 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRB_VdqHdqEbIbR(bxInstruction_c *i)
692 {
693   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
694   op1.xmmubyte(i->Ib() & 0xF) = BX_READ_8BIT_REGL(i->src2()); // won't allow reading of AH/CH/BH/DH
695   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
696 
697   BX_NEXT_INSTR(i);
698 }
699 
VPINSRB_VdqHdqEbIbM(bxInstruction_c * i)700 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRB_VdqHdqEbIbM(bxInstruction_c *i)
701 {
702   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
703 
704   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
705   op1.xmmubyte(i->Ib() & 0xF) = read_virtual_byte(i->seg(), eaddr);
706 
707   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
708 
709   BX_NEXT_INSTR(i);
710 }
711 
VPINSRW_VdqHdqEwIbR(bxInstruction_c * i)712 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRW_VdqHdqEwIbR(bxInstruction_c *i)
713 {
714   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
715   op1.xmm16u(i->Ib() & 0x7) = BX_READ_16BIT_REG(i->src2());
716   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
717 
718   BX_NEXT_INSTR(i);
719 }
720 
VPINSRW_VdqHdqEwIbM(bxInstruction_c * i)721 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRW_VdqHdqEwIbM(bxInstruction_c *i)
722 {
723   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
724 
725   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
726   op1.xmm16u(i->Ib() & 0x7) = read_virtual_word(i->seg(), eaddr);
727 
728   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
729 
730   BX_NEXT_INSTR(i);
731 }
732 
VPINSRD_VdqHdqEdIbR(bxInstruction_c * i)733 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRD_VdqHdqEdIbR(bxInstruction_c *i)
734 {
735   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
736   op1.xmm32u(i->Ib() & 3) = BX_READ_32BIT_REG(i->src2());
737   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
738 
739   BX_NEXT_INSTR(i);
740 }
741 
VPINSRD_VdqHdqEdIbM(bxInstruction_c * i)742 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRD_VdqHdqEdIbM(bxInstruction_c *i)
743 {
744   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
745 
746   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
747   op1.xmm32u(i->Ib() & 3) = read_virtual_dword(i->seg(), eaddr);
748 
749   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
750   BX_NEXT_INSTR(i);
751 }
752 
VPINSRQ_VdqHdqEqIbR(bxInstruction_c * i)753 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRQ_VdqHdqEqIbR(bxInstruction_c *i)
754 {
755   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
756   op1.xmm64u(i->Ib() & 1) = BX_READ_64BIT_REG(i->src2());
757   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
758 
759   BX_NEXT_INSTR(i);
760 }
761 
VPINSRQ_VdqHdqEqIbM(bxInstruction_c * i)762 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRQ_VdqHdqEqIbM(bxInstruction_c *i)
763 {
764   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
765 
766   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
767   Bit64u op2 = read_linear_qword(i->seg(), get_laddr64(i->seg(), eaddr));
768   op1.xmm64u(i->Ib() & 1) = op2;
769 
770   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
771   BX_NEXT_INSTR(i);
772 }
773 
VINSERTPS_VpsHpsWssIbR(bxInstruction_c * i)774 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTPS_VpsHpsWssIbR(bxInstruction_c *i)
775 {
776   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
777   Bit8u control = i->Ib();
778 
779   BxPackedXmmRegister temp = BX_READ_XMM_REG(i->src2());
780   Bit32u op2 = temp.xmm32u((control >> 6) & 3);
781 
782   op1.xmm32u((control >> 4) & 3) = op2;
783   xmm_zero_blendps(&op1, &op1, ~control);
784 
785   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
786 
787   BX_NEXT_INSTR(i);
788 }
789 
VINSERTPS_VpsHpsWssIbM(bxInstruction_c * i)790 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTPS_VpsHpsWssIbM(bxInstruction_c *i)
791 {
792   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
793   Bit8u control = i->Ib();
794 
795   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
796   op1.xmm32u((control >> 4) & 3) = read_virtual_dword(i->seg(), eaddr);
797   xmm_zero_blendps(&op1, &op1, ~control);
798 
799   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
800 
801   BX_NEXT_INSTR(i);
802 }
803 
804 #endif // BX_SUPPORT_AVX
805