1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx.cc 13520 2018-05-27 19:09:59Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2011-2018 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28
29 #if BX_SUPPORT_AVX
30
31 #include "simd_int.h"
32
33 /* VZEROUPPER: VEX.128.0F.77 (VEX.W ignore, VEX.VVV #UD) */
VZEROUPPER(bxInstruction_c * i)34 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VZEROUPPER(bxInstruction_c *i)
35 {
36 for(unsigned index=0; index < 16; index++) // clear only 16 registers even if AVX-512 is present
37 {
38 if (index < 8 || long64_mode())
39 BX_CLEAR_AVX_HIGH128(index);
40 }
41
42 BX_NEXT_INSTR(i);
43 }
44
45 /* VZEROALL: VEX.256.0F.77 (VEX.W ignore, VEX.VVV #UD) */
VZEROALL(bxInstruction_c * i)46 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VZEROALL(bxInstruction_c *i)
47 {
48 for(unsigned index=0; index < 16; index++) // clear only 16 registers even if AVX-512 is present
49 {
50 if (index < 8 || long64_mode())
51 BX_CLEAR_AVX_REG(index);
52 }
53
54 BX_NEXT_INSTR(i);
55 }
56
57 /* VMOVSS: VEX.F3.0F 10 (VEX.W ignore) */
VMOVSS_VssHpsWssR(bxInstruction_c * i)58 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_VssHpsWssR(bxInstruction_c *i)
59 {
60 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src1());
61
62 op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->src2());
63
64 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
65
66 BX_NEXT_INSTR(i);
67 }
68
69 /* VMOVSS: VEX.F2.0F 10 (VEX.W ignore) */
VMOVSD_VsdHpdWsdR(bxInstruction_c * i)70 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_VsdHpdWsdR(bxInstruction_c *i)
71 {
72 BxPackedXmmRegister op;
73
74 op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src2());
75 op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
76
77 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
78
79 BX_NEXT_INSTR(i);
80 }
81
82 /* VMOVAPS: VEX 0F 28 (VEX.W ignore, VEX.VVV #UD) */
83 /* VMOVAPD: VEX.66.0F 28 (VEX.W ignore, VEX.VVV #UD) */
84 /* VMOVDQA: VEX.66.0F 6F (VEX.W ignore, VEX.VVV #UD) */
VMOVAPS_VpsWpsR(bxInstruction_c * i)85 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_VpsWpsR(bxInstruction_c *i)
86 {
87 BX_WRITE_AVX_REGZ(i->dst(), BX_READ_AVX_REG(i->src()), i->getVL());
88
89 BX_NEXT_INSTR(i);
90 }
91
VMOVAPS_VpsWpsM(bxInstruction_c * i)92 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_VpsWpsM(bxInstruction_c *i)
93 {
94 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
95 unsigned len = i->getVL();
96
97 #if BX_SUPPORT_EVEX
98 if (len == BX_VL512)
99 read_virtual_zmmword_aligned(i->seg(), eaddr, &BX_READ_AVX_REG(i->dst()));
100 else
101 #endif
102 {
103 if (len == BX_VL256) {
104 read_virtual_ymmword_aligned(i->seg(), eaddr, &BX_READ_YMM_REG(i->dst()));
105 BX_CLEAR_AVX_HIGH256(i->dst());
106 }
107 else {
108 read_virtual_xmmword_aligned(i->seg(), eaddr, &BX_READ_XMM_REG(i->dst()));
109 BX_CLEAR_AVX_HIGH128(i->dst());
110 }
111 }
112
113 BX_NEXT_INSTR(i);
114 }
115
116 /* VMOVUPS: VEX 0F 10 (VEX.W ignore, VEX.VVV #UD) */
117 /* VMOVUPD: VEX.66.0F 10 (VEX.W ignore, VEX.VVV #UD) */
118 /* VMOVDQU: VEX.F3.0F 6F (VEX.W ignore, VEX.VVV #UD) */
VMOVUPS_VpsWpsM(bxInstruction_c * i)119 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_VpsWpsM(bxInstruction_c *i)
120 {
121 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
122 unsigned len = i->getVL();
123
124 #if BX_SUPPORT_EVEX
125 if (len == BX_VL512)
126 read_virtual_zmmword(i->seg(), eaddr, &BX_READ_AVX_REG(i->dst()));
127 else
128 #endif
129 {
130 if (len == BX_VL256) {
131 read_virtual_ymmword(i->seg(), eaddr, &BX_READ_YMM_REG(i->dst()));
132 BX_CLEAR_AVX_HIGH256(i->dst());
133 }
134 else {
135 read_virtual_xmmword(i->seg(), eaddr, &BX_READ_XMM_REG(i->dst()));
136 BX_CLEAR_AVX_HIGH128(i->dst());
137 }
138 }
139
140 BX_NEXT_INSTR(i);
141 }
142
143 /* VMOVUPS: VEX 0F 11 (VEX.W ignore, VEX.VVV #UD) */
144 /* VMOVUPD: VEX.66.0F 11 (VEX.W ignore, VEX.VVV #UD) */
145 /* VMOVDQU: VEX.66.0F 7F (VEX.W ignore, VEX.VVV #UD) */
VMOVUPS_WpsVpsM(bxInstruction_c * i)146 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_WpsVpsM(bxInstruction_c *i)
147 {
148 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
149 unsigned len = i->getVL();
150
151 #if BX_SUPPORT_EVEX
152 if (len == BX_VL512)
153 write_virtual_zmmword(i->seg(), eaddr, &BX_READ_AVX_REG(i->src()));
154 else
155 #endif
156 {
157 if (len == BX_VL256)
158 write_virtual_ymmword(i->seg(), eaddr, &BX_READ_YMM_REG(i->src()));
159 else
160 write_virtual_xmmword(i->seg(), eaddr, &BX_READ_XMM_REG(i->src()));
161 }
162
163 BX_NEXT_INSTR(i);
164 }
165
166 /* VMOVAPS: VEX 0F 29 (VEX.W ignore, VEX.VVV #UD) */
167 /* VMOVAPD: VEX.66.0F 29 (VEX.W ignore, VEX.VVV #UD) */
168 /* VMOVDQA: VEX.66.0F 7F (VEX.W ignore, VEX.VVV #UD) */
VMOVAPS_WpsVpsM(bxInstruction_c * i)169 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_WpsVpsM(bxInstruction_c *i)
170 {
171 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
172 unsigned len = i->getVL();
173
174 #if BX_SUPPORT_EVEX
175 if (len == BX_VL512)
176 write_virtual_zmmword_aligned(i->seg(), eaddr, &BX_READ_AVX_REG(i->src()));
177 else
178 #endif
179 {
180 if (len == BX_VL256)
181 write_virtual_ymmword_aligned(i->seg(), eaddr, &BX_READ_YMM_REG(i->src()));
182 else
183 write_virtual_xmmword_aligned(i->seg(), eaddr, &BX_READ_XMM_REG(i->src()));
184 }
185
186 BX_NEXT_INSTR(i);
187 }
188
189 /* VEX.F2.0F 12 (VEX.W ignore, VEX.VVV #UD) */
VMOVDDUP_VpdWpdR(bxInstruction_c * i)190 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDDUP_VpdWpdR(bxInstruction_c *i)
191 {
192 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
193 unsigned len = i->getVL();
194
195 for (unsigned n=0; n < QWORD_ELEMENTS(len); n+=2) {
196 op.vmm64u(n+1) = op.vmm64u(n);
197 }
198
199 BX_WRITE_AVX_REGZ(i->dst(), op, len);
200
201 BX_NEXT_INSTR(i);
202 }
203
204 /* VEX.F3.0F 12 (VEX.W ignore, VEX.VVV #UD) */
VMOVSLDUP_VpsWpsR(bxInstruction_c * i)205 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSLDUP_VpsWpsR(bxInstruction_c *i)
206 {
207 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
208 unsigned len = i->getVL();
209
210 for (unsigned n=0; n < DWORD_ELEMENTS(len); n+=2) {
211 op.vmm32u(n+1) = op.vmm32u(n);
212 }
213
214 BX_WRITE_AVX_REGZ(i->dst(), op, len);
215
216 BX_NEXT_INSTR(i);
217 }
218
219 /* VEX.F3.0F 12 (VEX.W ignore, VEX.VVV #UD) */
VMOVSHDUP_VpsWpsR(bxInstruction_c * i)220 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSHDUP_VpsWpsR(bxInstruction_c *i)
221 {
222 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
223 unsigned len = i->getVL();
224
225 for (unsigned n=0; n < DWORD_ELEMENTS(len); n+=2) {
226 op.vmm32u(n) = op.vmm32u(n+1);
227 }
228
229 BX_WRITE_AVX_REGZ(i->dst(), op, len);
230
231 BX_NEXT_INSTR(i);
232 }
233
234 /* VEX.0F 12 (VEX.W ignore) */
VMOVHLPS_VpsHpsWps(bxInstruction_c * i)235 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVHLPS_VpsHpsWps(bxInstruction_c *i)
236 {
237 BxPackedXmmRegister op;
238
239 op.xmm64u(0) = BX_READ_XMM_REG_HI_QWORD(i->src2());
240 op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
241
242 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
243
244 BX_NEXT_INSTR(i);
245 }
246
247 /* VEX.66.0F 12 (VEX.W ignore) */
VMOVLPD_VpdHpdMq(bxInstruction_c * i)248 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVLPD_VpdHpdMq(bxInstruction_c *i)
249 {
250 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
251
252 BxPackedXmmRegister op;
253
254 op.xmm64u(0) = read_virtual_qword(i->seg(), eaddr);
255 op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
256
257 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
258
259 BX_NEXT_INSTR(i);
260 }
261
262 /* VEX.0F 16 (VEX.W ignore) */
VMOVLHPS_VpsHpsWps(bxInstruction_c * i)263 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVLHPS_VpsHpsWps(bxInstruction_c *i)
264 {
265 BxPackedXmmRegister op;
266
267 op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src1());
268 op.xmm64u(1) = BX_READ_XMM_REG_LO_QWORD(i->src2());
269
270 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
271
272 BX_NEXT_INSTR(i);
273 }
274
275 /* VEX.66.0F 16 (VEX.W ignore) */
VMOVHPD_VpdHpdMq(bxInstruction_c * i)276 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVHPD_VpdHpdMq(bxInstruction_c *i)
277 {
278 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
279
280 BxPackedXmmRegister op;
281
282 op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src1());
283 op.xmm64u(1) = read_virtual_qword(i->seg(), eaddr);
284
285 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
286
287 BX_NEXT_INSTR(i);
288 }
289
290 /* VEX.0F 50 (VEX.W ignore, VEX.VVV #UD) */
VMOVMSKPS_GdUps(bxInstruction_c * i)291 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVMSKPS_GdUps(bxInstruction_c *i)
292 {
293 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
294 unsigned len = i->getVL();
295 Bit32u mask = 0;
296
297 for (unsigned n=0; n < len; n++)
298 mask |= xmm_pmovmskd(&op.ymm128(n)) << (4*n);
299
300 BX_WRITE_32BIT_REGZ(i->dst(), mask);
301
302 BX_NEXT_INSTR(i);
303 }
304
305 /* VEX.66.0F 50 (VEX.W ignore, VEX.VVV #UD) */
VMOVMSKPD_GdUpd(bxInstruction_c * i)306 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVMSKPD_GdUpd(bxInstruction_c *i)
307 {
308 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
309 unsigned len = i->getVL();
310 Bit32u mask = 0;
311
312 for (unsigned n=0; n < len; n++)
313 mask |= xmm_pmovmskq(&op.ymm128(n)) << (2*n);
314
315 BX_WRITE_32BIT_REGZ(i->dst(), mask);
316
317 BX_NEXT_INSTR(i);
318 }
319
320 /* VEX.66.0F 50 (VEX.W ignore, VEX.VVV #UD) */
VPMOVMSKB_GdUdq(bxInstruction_c * i)321 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVMSKB_GdUdq(bxInstruction_c *i)
322 {
323 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
324 unsigned len = i->getVL();
325 Bit32u mask = 0;
326
327 for (unsigned n=0; n < len; n++)
328 mask |= xmm_pmovmskb(&op.ymm128(n)) << (16*n);
329
330 BX_WRITE_32BIT_REGZ(i->dst(), mask);
331
332 BX_NEXT_INSTR(i);
333 }
334
335 /* Opcode: VEX.0F.C6 (VEX.W ignore) */
VSHUFPS_VpsHpsWpsIbR(bxInstruction_c * i)336 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHUFPS_VpsHpsWpsIbR(bxInstruction_c *i)
337 {
338 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
339 BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
340 unsigned len = i->getVL();
341
342 result.clear();
343
344 for (unsigned n=0; n < len; n++)
345 xmm_shufps(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n), i->Ib());
346
347 BX_WRITE_AVX_REG(i->dst(), result);
348
349 BX_NEXT_INSTR(i);
350 }
351
352 /* Opcode: VEX.66.0F.C6 (VEX.W ignore) */
VSHUFPD_VpdHpdWpdIbR(bxInstruction_c * i)353 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSHUFPD_VpdHpdWpdIbR(bxInstruction_c *i)
354 {
355 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
356 BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
357
358 unsigned len = i->getVL();
359 Bit8u order = i->Ib();
360
361 result.clear();
362
363 for (unsigned n=0; n < len; n++) {
364 xmm_shufpd(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n), order);
365 order >>= 2;
366 }
367
368 BX_WRITE_AVX_REG(i->dst(), result);
369
370 BX_NEXT_INSTR(i);
371 }
372
373 /* Opcode: VEX.66.0F.38.17 (VEX.W ignore, VEX.VVV #UD) */
VPTEST_VdqWdqR(bxInstruction_c * i)374 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPTEST_VdqWdqR(bxInstruction_c *i)
375 {
376 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->dst()), op2 = BX_READ_YMM_REG(i->src());
377 unsigned len = i->getVL();
378
379 unsigned result = EFlagsZFMask | EFlagsCFMask;
380
381 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
382 if ((op2.ymm64u(n) & op1.ymm64u(n)) != 0) result &= ~EFlagsZFMask;
383 if ((op2.ymm64u(n) & ~op1.ymm64u(n)) != 0) result &= ~EFlagsCFMask;
384 }
385
386 setEFlagsOSZAPC(result);
387
388 BX_NEXT_INSTR(i);
389 }
390
391 /* Opcode: VEX.256.66.0F.38.1A (VEX.W=0, VEX.VVV #UD) */
VBROADCASTF128_VdqMdq(bxInstruction_c * i)392 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBROADCASTF128_VdqMdq(bxInstruction_c *i)
393 {
394 BxPackedAvxRegister dst;
395 BxPackedXmmRegister src;
396 unsigned len = i->getVL();
397
398 dst.clear();
399
400 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
401 read_virtual_xmmword(i->seg(), eaddr, &src);
402
403 for (unsigned n=0; n < len; n++) {
404 dst.vmm128(n) = src;
405 }
406
407 BX_WRITE_AVX_REG(i->dst(), dst);
408
409 BX_NEXT_INSTR(i);
410 }
411
412 /* Opcode: VEX.66.0F.3A 0C (VEX.W ignore) */
VBLENDPS_VpsHpsWpsIbR(bxInstruction_c * i)413 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDPS_VpsHpsWpsIbR(bxInstruction_c *i)
414 {
415 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
416 unsigned len = i->getVL();
417 Bit8u mask = i->Ib();
418
419 for (unsigned n=0; n < len; n++) {
420 xmm_blendps(&op1.ymm128(n), &op2.ymm128(n), mask);
421 mask >>= 4;
422 }
423
424 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
425
426 BX_NEXT_INSTR(i);
427 }
428
429 /* Opcode: VEX.66.0F.3A 0D (VEX.W ignore) */
VBLENDPD_VpdHpdWpdIbR(bxInstruction_c * i)430 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDPD_VpdHpdWpdIbR(bxInstruction_c *i)
431 {
432 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
433 unsigned len = i->getVL();
434 Bit8u mask = i->Ib();
435
436 for (unsigned n=0; n < len; n++) {
437 xmm_blendpd(&op1.ymm128(n), &op2.ymm128(n), mask);
438 mask >>= 2;
439 }
440
441 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
442
443 BX_NEXT_INSTR(i);
444 }
445
446 /* Opcode: VEX.66.0F.3A 4A (VEX.W=0) */
VBLENDVPS_VpsHpsWpsIbR(bxInstruction_c * i)447 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDVPS_VpsHpsWpsIbR(bxInstruction_c *i)
448 {
449 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2()),
450 mask = BX_READ_YMM_REG(i->src3());
451
452 unsigned len = i->getVL();
453
454 for (unsigned n=0; n < len; n++)
455 xmm_blendvps(&op1.ymm128(n), &op2.ymm128(n), &mask.ymm128(n));
456
457 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
458
459 BX_NEXT_INSTR(i);
460 }
461
462 /* Opcode: VEX.66.0F.3A 4B (VEX.W=0) */
VBLENDVPD_VpdHpdWpdIbR(bxInstruction_c * i)463 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VBLENDVPD_VpdHpdWpdIbR(bxInstruction_c *i)
464 {
465 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2()),
466 mask = BX_READ_YMM_REG(i->src3());
467
468 unsigned len = i->getVL();
469
470 for (unsigned n=0; n < len; n++)
471 xmm_blendvpd(&op1.ymm128(n), &op2.ymm128(n), &mask.ymm128(n));
472
473 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
474
475 BX_NEXT_INSTR(i);
476 }
477
478 /* Opcode: VEX.66.0F.3A 4C (VEX.W=0) */
VPBLENDVB_VdqHdqWdqIbR(bxInstruction_c * i)479 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPBLENDVB_VdqHdqWdqIbR(bxInstruction_c *i)
480 {
481 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2()),
482 mask = BX_READ_YMM_REG(i->src3());
483
484 unsigned len = i->getVL();
485
486 for (unsigned n=0; n < len; n++)
487 xmm_pblendvb(&op1.ymm128(n), &op2.ymm128(n), &mask.ymm128(n));
488
489 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
490
491 BX_NEXT_INSTR(i);
492 }
493
494 /* Opcode: VEX.66.0F.3A 18 (VEX.W=0) */
VINSERTF128_VdqHdqWdqIbR(bxInstruction_c * i)495 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTF128_VdqHdqWdqIbR(bxInstruction_c *i)
496 {
497 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src1());
498 unsigned len = i->getVL();
499 unsigned offset = i->Ib() & (len-1);
500
501 op.vmm128(offset) = BX_READ_XMM_REG(i->src2());
502
503 BX_WRITE_AVX_REGZ(i->dst(), op, len);
504 BX_NEXT_INSTR(i);
505 }
506
507 /* Opcode: VEX.66.0F.3A 19 (VEX.W=0, VEX.VVV #UD) */
VEXTRACTF128_WdqVdqIbM(bxInstruction_c * i)508 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXTRACTF128_WdqVdqIbM(bxInstruction_c *i)
509 {
510 unsigned len = i->getVL(), offset = i->Ib() & (len - 1);
511 BxPackedXmmRegister op = BX_READ_AVX_REG_LANE(i->src(), offset);
512
513 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
514 write_virtual_xmmword(i->seg(), eaddr, &op);
515
516 BX_NEXT_INSTR(i);
517 }
518
VEXTRACTF128_WdqVdqIbR(bxInstruction_c * i)519 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VEXTRACTF128_WdqVdqIbR(bxInstruction_c *i)
520 {
521 unsigned len = i->getVL(), offset = i->Ib() & (len - 1);
522 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), BX_READ_AVX_REG_LANE(i->src(), offset));
523 BX_NEXT_INSTR(i);
524 }
525
526 /* Opcode: VEX.66.0F.38 0C (VEX.W=0) */
VPERMILPS_VpsHpsWpsR(bxInstruction_c * i)527 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMILPS_VpsHpsWpsR(bxInstruction_c *i)
528 {
529 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
530 BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
531 unsigned len = i->getVL();
532
533 result.clear();
534
535 for (unsigned n=0; n < len; n++)
536 xmm_permilps(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n));
537
538 BX_WRITE_AVX_REG(i->dst(), result);
539
540 BX_NEXT_INSTR(i);
541 }
542
543 /* Opcode: VEX.66.0F.3A 05 (VEX.W=0) */
VPERMILPD_VpdHpdWpdR(bxInstruction_c * i)544 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMILPD_VpdHpdWpdR(bxInstruction_c *i)
545 {
546 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1());
547 BxPackedAvxRegister op2 = BX_READ_AVX_REG(i->src2()), result;
548 unsigned len = i->getVL();
549
550 result.clear();
551
552 for (unsigned n=0; n < len; n++)
553 xmm_permilpd(&result.vmm128(n), &op1.vmm128(n), &op2.vmm128(n));
554
555 BX_WRITE_AVX_REG(i->dst(), result);
556
557 BX_NEXT_INSTR(i);
558 }
559
560 /* Opcode: VEX.66.0F.3A 04 (VEX.W=0) */
VPERMILPS_VpsWpsIbR(bxInstruction_c * i)561 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMILPS_VpsWpsIbR(bxInstruction_c *i)
562 {
563 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src()), result;
564 unsigned len = i->getVL();
565 result.clear();
566
567 for (unsigned n=0; n < len; n++)
568 xmm_shufps(&result.vmm128(n), &op1.vmm128(n), &op1.vmm128(n), i->Ib());
569
570 BX_WRITE_AVX_REG(i->dst(), result);
571
572 BX_NEXT_INSTR(i);
573 }
574
575 /* Opcode: VEX.66.0F.3A 05 (VEX.W=0) */
VPERMILPD_VpdWpdIbR(bxInstruction_c * i)576 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERMILPD_VpdWpdIbR(bxInstruction_c *i)
577 {
578 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src()), result;
579 unsigned len = i->getVL();
580 Bit8u order = i->Ib();
581
582 result.clear();
583
584 for (unsigned n=0; n < len; n++) {
585 xmm_shufpd(&result.vmm128(n), &op1.vmm128(n), &op1.vmm128(n), order);
586 order >>= 2;
587 }
588
589 BX_WRITE_AVX_REG(i->dst(), result);
590
591 BX_NEXT_INSTR(i);
592 }
593
594 /* Opcode: VEX.66.0F.3A 06 (VEX.W=0) */
VPERM2F128_VdqHdqWdqIbR(bxInstruction_c * i)595 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPERM2F128_VdqHdqWdqIbR(bxInstruction_c *i)
596 {
597 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1());
598 BxPackedYmmRegister op2 = BX_READ_YMM_REG(i->src2()), result;
599 Bit8u order = i->Ib();
600
601 for (unsigned n=0;n<2;n++) {
602
603 if (order & 0x8) {
604 result.ymm128(n).clear();
605 }
606 else {
607 if (order & 0x2)
608 result.ymm128(n) = op2.ymm128(order & 0x1);
609 else
610 result.ymm128(n) = op1.ymm128(order & 0x1);
611 }
612
613 order >>= 4;
614 }
615
616 BX_WRITE_YMM_REGZ(i->dst(), result);
617
618 BX_NEXT_INSTR(i);
619 }
620
621 /* Opcode: VEX.66.0F.38 2C (VEX.W=0) */
VMASKMOVPS_VpsHpsMps(bxInstruction_c * i)622 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPS_VpsHpsMps(bxInstruction_c *i)
623 {
624 BxPackedYmmRegister mask = BX_READ_YMM_REG(i->src1());
625 BxPackedAvxRegister result;
626
627 unsigned opmask = xmm_pmovmskd(&mask.ymm128(1));
628 opmask <<= 4;
629 opmask |= xmm_pmovmskd(&mask.ymm128(0));
630
631 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
632
633 avx_masked_load32(i, eaddr, &result, opmask);
634
635 BX_WRITE_AVX_REGZ(i->dst(), result, i->getVL());
636
637 BX_NEXT_INSTR(i);
638 }
639
640 /* Opcode: VEX.66.0F.38 2D (VEX.W=0) */
VMASKMOVPD_VpdHpdMpd(bxInstruction_c * i)641 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPD_VpdHpdMpd(bxInstruction_c *i)
642 {
643 BxPackedYmmRegister mask = BX_READ_YMM_REG(i->src1());
644 BxPackedAvxRegister result;
645
646 unsigned opmask = xmm_pmovmskq(&mask.ymm128(1));
647 opmask <<= 2;
648 opmask |= xmm_pmovmskq(&mask.ymm128(0));
649
650 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
651
652 avx_masked_load64(i, eaddr, &result, opmask);
653
654 BX_WRITE_AVX_REGZ(i->dst(), result, i->getVL());
655
656 BX_NEXT_INSTR(i);
657 }
658
659 /* Opcode: VEX.66.0F.38 2C (VEX.W=0) */
VMASKMOVPS_MpsHpsVps(bxInstruction_c * i)660 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPS_MpsHpsVps(bxInstruction_c *i)
661 {
662 BxPackedYmmRegister mask = BX_READ_YMM_REG(i->src1());
663
664 unsigned opmask = xmm_pmovmskd(&mask.ymm128(1));
665 opmask <<= 4;
666 opmask |= xmm_pmovmskd(&mask.ymm128(0));
667
668 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
669
670 avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src2()), opmask);
671
672 BX_NEXT_INSTR(i);
673 }
674
675 /* Opcode: VEX.66.0F.38 2D (VEX.W=0) */
VMASKMOVPD_MpdHpdVpd(bxInstruction_c * i)676 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMASKMOVPD_MpdHpdVpd(bxInstruction_c *i)
677 {
678 BxPackedYmmRegister mask = BX_READ_YMM_REG(i->src1());
679
680 unsigned opmask = xmm_pmovmskq(&mask.ymm128(1));
681 opmask <<= 2;
682 opmask |= xmm_pmovmskq(&mask.ymm128(0));
683
684 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
685
686 avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src2()), opmask);
687
688 BX_NEXT_INSTR(i);
689 }
690
VPINSRB_VdqHdqEbIbR(bxInstruction_c * i)691 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRB_VdqHdqEbIbR(bxInstruction_c *i)
692 {
693 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
694 op1.xmmubyte(i->Ib() & 0xF) = BX_READ_8BIT_REGL(i->src2()); // won't allow reading of AH/CH/BH/DH
695 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
696
697 BX_NEXT_INSTR(i);
698 }
699
VPINSRB_VdqHdqEbIbM(bxInstruction_c * i)700 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRB_VdqHdqEbIbM(bxInstruction_c *i)
701 {
702 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
703
704 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
705 op1.xmmubyte(i->Ib() & 0xF) = read_virtual_byte(i->seg(), eaddr);
706
707 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
708
709 BX_NEXT_INSTR(i);
710 }
711
VPINSRW_VdqHdqEwIbR(bxInstruction_c * i)712 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRW_VdqHdqEwIbR(bxInstruction_c *i)
713 {
714 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
715 op1.xmm16u(i->Ib() & 0x7) = BX_READ_16BIT_REG(i->src2());
716 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
717
718 BX_NEXT_INSTR(i);
719 }
720
VPINSRW_VdqHdqEwIbM(bxInstruction_c * i)721 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRW_VdqHdqEwIbM(bxInstruction_c *i)
722 {
723 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
724
725 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
726 op1.xmm16u(i->Ib() & 0x7) = read_virtual_word(i->seg(), eaddr);
727
728 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
729
730 BX_NEXT_INSTR(i);
731 }
732
VPINSRD_VdqHdqEdIbR(bxInstruction_c * i)733 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRD_VdqHdqEdIbR(bxInstruction_c *i)
734 {
735 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
736 op1.xmm32u(i->Ib() & 3) = BX_READ_32BIT_REG(i->src2());
737 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
738
739 BX_NEXT_INSTR(i);
740 }
741
VPINSRD_VdqHdqEdIbM(bxInstruction_c * i)742 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRD_VdqHdqEdIbM(bxInstruction_c *i)
743 {
744 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
745
746 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
747 op1.xmm32u(i->Ib() & 3) = read_virtual_dword(i->seg(), eaddr);
748
749 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
750 BX_NEXT_INSTR(i);
751 }
752
VPINSRQ_VdqHdqEqIbR(bxInstruction_c * i)753 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRQ_VdqHdqEqIbR(bxInstruction_c *i)
754 {
755 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
756 op1.xmm64u(i->Ib() & 1) = BX_READ_64BIT_REG(i->src2());
757 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
758
759 BX_NEXT_INSTR(i);
760 }
761
VPINSRQ_VdqHdqEqIbM(bxInstruction_c * i)762 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPINSRQ_VdqHdqEqIbM(bxInstruction_c *i)
763 {
764 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
765
766 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
767 Bit64u op2 = read_linear_qword(i->seg(), get_laddr64(i->seg(), eaddr));
768 op1.xmm64u(i->Ib() & 1) = op2;
769
770 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
771 BX_NEXT_INSTR(i);
772 }
773
VINSERTPS_VpsHpsWssIbR(bxInstruction_c * i)774 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTPS_VpsHpsWssIbR(bxInstruction_c *i)
775 {
776 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
777 Bit8u control = i->Ib();
778
779 BxPackedXmmRegister temp = BX_READ_XMM_REG(i->src2());
780 Bit32u op2 = temp.xmm32u((control >> 6) & 3);
781
782 op1.xmm32u((control >> 4) & 3) = op2;
783 xmm_zero_blendps(&op1, &op1, ~control);
784
785 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
786
787 BX_NEXT_INSTR(i);
788 }
789
VINSERTPS_VpsHpsWssIbM(bxInstruction_c * i)790 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VINSERTPS_VpsHpsWssIbM(bxInstruction_c *i)
791 {
792 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
793 Bit8u control = i->Ib();
794
795 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
796 op1.xmm32u((control >> 4) & 3) = read_virtual_dword(i->seg(), eaddr);
797 xmm_zero_blendps(&op1, &op1, ~control);
798
799 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
800
801 BX_NEXT_INSTR(i);
802 }
803
804 #endif // BX_SUPPORT_AVX
805