1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx512_move.cc 13716 2019-12-21 20:07:03Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2013-2018 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28
29 #if BX_SUPPORT_AVX
avx_masked_load8(bxInstruction_c * i,bx_address eaddr,BxPackedAvxRegister * op,Bit64u mask)30 void BX_CPU_C::avx_masked_load8(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit64u mask)
31 {
32 unsigned len = i->getVL();
33
34 if (i->as64L()) {
35 Bit64u laddr = get_laddr64(i->seg(), eaddr);
36 for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
37 if (mask & (BX_CONST64(1)<<n)) {
38 if (! IsCanonical(laddr + n))
39 exception(int_number(i->seg()), 0);
40 }
41 }
42 }
43
44 for (int n=BYTE_ELEMENTS(len)-1; n >= 0; n--) {
45 if (mask & (BX_CONST64(1)<<n))
46 op->vmmubyte(n) = read_virtual_byte(i->seg(), eaddr + n);
47 else
48 op->vmmubyte(n) = 0;
49 }
50 }
51
avx_masked_load16(bxInstruction_c * i,bx_address eaddr,BxPackedAvxRegister * op,Bit32u mask)52 void BX_CPU_C::avx_masked_load16(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
53 {
54 unsigned len = i->getVL();
55
56 if (i->as64L()) {
57 Bit64u laddr = get_laddr64(i->seg(), eaddr);
58 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
59 if (mask & (1<<n)) {
60 if (! IsCanonical(laddr + 2*n))
61 exception(int_number(i->seg()), 0);
62 }
63 }
64 }
65
66 #if BX_SUPPORT_ALIGNMENT_CHECK
67 unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
68 BX_CPU_THIS_PTR alignment_check_mask = 0;
69 #endif
70
71 for (int n=WORD_ELEMENTS(len)-1; n >= 0; n--) {
72 if (mask & (1<<n))
73 op->vmm16u(n) = read_virtual_word(i->seg(), eaddr + 2*n);
74 else
75 op->vmm16u(n) = 0;
76 }
77
78 #if BX_SUPPORT_ALIGNMENT_CHECK
79 BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
80 #endif
81 }
82
avx_masked_load32(bxInstruction_c * i,bx_address eaddr,BxPackedAvxRegister * op,Bit32u mask)83 void BX_CPU_C::avx_masked_load32(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
84 {
85 unsigned len = i->getVL();
86
87 if (i->as64L()) {
88 Bit64u laddr = get_laddr64(i->seg(), eaddr);
89 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
90 if (mask & (1<<n)) {
91 if (! IsCanonical(laddr + 4*n))
92 exception(int_number(i->seg()), 0);
93 }
94 }
95 }
96
97 #if BX_SUPPORT_ALIGNMENT_CHECK
98 unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
99 BX_CPU_THIS_PTR alignment_check_mask = 0;
100 #endif
101
102 for (int n=DWORD_ELEMENTS(len)-1; n >= 0; n--) {
103 if (mask & (1<<n))
104 op->vmm32u(n) = read_virtual_dword(i->seg(), eaddr + 4*n);
105 else
106 op->vmm32u(n) = 0;
107 }
108
109 #if BX_SUPPORT_ALIGNMENT_CHECK
110 BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
111 #endif
112 }
113
avx_masked_load64(bxInstruction_c * i,bx_address eaddr,BxPackedAvxRegister * op,Bit32u mask)114 void BX_CPU_C::avx_masked_load64(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
115 {
116 unsigned len = i->getVL();
117
118 if (i->as64L()) {
119 Bit64u laddr = get_laddr64(i->seg(), eaddr);
120 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
121 if (mask & (1<<n)) {
122 if (! IsCanonical(laddr + 8*n))
123 exception(int_number(i->seg()), 0);
124 }
125 }
126 }
127
128 #if BX_SUPPORT_ALIGNMENT_CHECK
129 unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
130 BX_CPU_THIS_PTR alignment_check_mask = 0;
131 #endif
132
133 for (int n=QWORD_ELEMENTS(len)-1; n >= 0; n--) {
134 if (mask & (1<<n))
135 op->vmm64u(n) = read_virtual_qword(i->seg(), eaddr + 8*n);
136 else
137 op->vmm64u(n) = 0;
138 }
139
140 #if BX_SUPPORT_ALIGNMENT_CHECK
141 BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
142 #endif
143 }
144
avx_masked_store8(bxInstruction_c * i,bx_address eaddr,const BxPackedAvxRegister * op,Bit64u mask)145 void BX_CPU_C::avx_masked_store8(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit64u mask)
146 {
147 unsigned len = i->getVL();
148
149 #if BX_SUPPORT_X86_64
150 if (i->as64L()) {
151 Bit64u laddr = get_laddr64(i->seg(), eaddr);
152 for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
153 if (mask & (BX_CONST64(1)<<n)) {
154 if (! IsCanonical(laddr + n))
155 exception(int_number(i->seg()), 0);
156 }
157 }
158 }
159 #endif
160
161 // see if you can successfully write all the elements first
162 for (int n=BYTE_ELEMENTS(len)-1; n >= 0; n--) {
163 if (mask & (BX_CONST64(1)<<n))
164 read_RMW_virtual_byte(i->seg(), eaddr + n);
165 }
166
167 for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
168 if (mask & (BX_CONST64(1)<<n))
169 write_virtual_byte(i->seg(), eaddr + n, op->vmmubyte(n));
170 }
171 }
172
avx_masked_store16(bxInstruction_c * i,bx_address eaddr,const BxPackedAvxRegister * op,Bit32u mask)173 void BX_CPU_C::avx_masked_store16(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
174 {
175 unsigned len = i->getVL();
176
177 #if BX_SUPPORT_X86_64
178 if (i->as64L()) {
179 Bit64u laddr = get_laddr64(i->seg(), eaddr);
180 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
181 if (mask & (1<<n)) {
182 if (! IsCanonical(laddr + 2*n))
183 exception(int_number(i->seg()), 0);
184 }
185 }
186 }
187 #endif
188
189 #if BX_SUPPORT_ALIGNMENT_CHECK
190 unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
191 BX_CPU_THIS_PTR alignment_check_mask = 0;
192 #endif
193
194 // see if you can successfully write all the elements first
195 for (int n=WORD_ELEMENTS(len)-1; n >= 0; n--) {
196 if (mask & (1<<n))
197 read_RMW_virtual_word(i->seg(), eaddr + 2*n);
198 }
199
200 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
201 if (mask & (1<<n))
202 write_virtual_word(i->seg(), eaddr + 2*n, op->vmm16u(n));
203 }
204
205 #if BX_SUPPORT_ALIGNMENT_CHECK
206 BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
207 #endif
208 }
209
avx_masked_store32(bxInstruction_c * i,bx_address eaddr,const BxPackedAvxRegister * op,Bit32u mask)210 void BX_CPU_C::avx_masked_store32(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
211 {
212 unsigned len = i->getVL();
213
214 #if BX_SUPPORT_X86_64
215 if (i->as64L()) {
216 Bit64u laddr = get_laddr64(i->seg(), eaddr);
217 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
218 if (mask & (1<<n)) {
219 if (! IsCanonical(laddr + 4*n))
220 exception(int_number(i->seg()), 0);
221 }
222 }
223 }
224 #endif
225
226 #if BX_SUPPORT_ALIGNMENT_CHECK
227 unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
228 BX_CPU_THIS_PTR alignment_check_mask = 0;
229 #endif
230
231 // see if you can successfully write all the elements first
232 for (int n=DWORD_ELEMENTS(len)-1; n >= 0; n--) {
233 if (mask & (1<<n))
234 read_RMW_virtual_dword(i->seg(), eaddr + 4*n);
235 }
236
237 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
238 if (mask & (1<<n))
239 write_virtual_dword(i->seg(), eaddr + 4*n, op->vmm32u(n));
240 }
241
242 #if BX_SUPPORT_ALIGNMENT_CHECK
243 BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
244 #endif
245 }
246
avx_masked_store64(bxInstruction_c * i,bx_address eaddr,const BxPackedAvxRegister * op,Bit32u mask)247 void BX_CPU_C::avx_masked_store64(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
248 {
249 unsigned len = i->getVL();
250
251 #if BX_SUPPORT_X86_64
252 if (i->as64L()) {
253 Bit64u laddr = get_laddr64(i->seg(), eaddr);
254 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
255 if (mask & (1<<n)) {
256 if (! IsCanonical(laddr + 8*n))
257 exception(int_number(i->seg()), 0);
258 }
259 }
260 }
261 #endif
262
263 #if BX_SUPPORT_ALIGNMENT_CHECK
264 unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
265 BX_CPU_THIS_PTR alignment_check_mask = 0;
266 #endif
267
268 // see if you can successfully write all the elements first
269 for (int n=QWORD_ELEMENTS(len)-1; n >= 0; n--) {
270 if (mask & (1<<n))
271 read_RMW_virtual_qword(i->seg(), eaddr + 8*n);
272 }
273
274 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
275 if (mask & (1<<n))
276 write_virtual_qword(i->seg(), eaddr + 8*n, op->vmm64u(n));
277 }
278
279 #if BX_SUPPORT_ALIGNMENT_CHECK
280 BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
281 #endif
282 }
283 #endif // BX_SUPPORT_AVX
284
285 #if BX_SUPPORT_EVEX
286
287 #include "simd_int.h"
288
avx512_write_regb_masked(bxInstruction_c * i,const BxPackedAvxRegister * op,unsigned len,Bit64u opmask)289 void BX_CPU_C::avx512_write_regb_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit64u opmask)
290 {
291 if (i->isZeroMasking()) {
292 for (unsigned n=0; n < len; n++, opmask >>= 16)
293 xmm_zero_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), (Bit32u) opmask);
294 }
295 else {
296 for (unsigned n=0; n < len; n++, opmask >>= 16)
297 xmm_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), (Bit32u) opmask);
298 }
299
300 BX_CLEAR_AVX_REGZ(i->dst(), len);
301 }
302
avx512_write_regw_masked(bxInstruction_c * i,const BxPackedAvxRegister * op,unsigned len,Bit32u opmask)303 void BX_CPU_C::avx512_write_regw_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
304 {
305 if (i->isZeroMasking()) {
306 for (unsigned n=0; n < len; n++, opmask >>= 8)
307 xmm_zero_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
308 }
309 else {
310 for (unsigned n=0; n < len; n++, opmask >>= 8)
311 xmm_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
312 }
313
314 BX_CLEAR_AVX_REGZ(i->dst(), len);
315 }
316
avx512_write_regd_masked(bxInstruction_c * i,const BxPackedAvxRegister * op,unsigned len,Bit32u opmask)317 void BX_CPU_C::avx512_write_regd_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
318 {
319 if (i->isZeroMasking()) {
320 for (unsigned n=0; n < len; n++, opmask >>= 4)
321 xmm_zero_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
322 }
323 else {
324 for (unsigned n=0; n < len; n++, opmask >>= 4)
325 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
326 }
327
328 BX_CLEAR_AVX_REGZ(i->dst(), len);
329 }
330
avx512_write_regq_masked(bxInstruction_c * i,const BxPackedAvxRegister * op,unsigned len,Bit32u opmask)331 void BX_CPU_C::avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
332 {
333 if (i->isZeroMasking()) {
334 for (unsigned n=0; n < len; n++, opmask >>= 2)
335 xmm_zero_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
336 }
337 else {
338 for (unsigned n=0; n < len; n++, opmask >>= 2)
339 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
340 }
341
342 BX_CLEAR_AVX_REGZ(i->dst(), len);
343 }
344
345 //////////////////////////
346 // masked register move //
347 //////////////////////////
348
VMOVDQU8_MASK_VdqWdqR(bxInstruction_c * i)349 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU8_MASK_VdqWdqR(bxInstruction_c *i)
350 {
351 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
352 avx512_write_regb_masked(i, &op, i->getVL(), BX_READ_OPMASK(i->opmask()));
353 BX_NEXT_INSTR(i);
354 }
355
VMOVDQU16_MASK_VdqWdqR(bxInstruction_c * i)356 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU16_MASK_VdqWdqR(bxInstruction_c *i)
357 {
358 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
359 avx512_write_regw_masked(i, &op, i->getVL(), BX_READ_32BIT_OPMASK(i->opmask()));
360 BX_NEXT_INSTR(i);
361 }
362
VMOVAPS_MASK_VpsWpsR(bxInstruction_c * i)363 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsR(bxInstruction_c *i)
364 {
365 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
366 avx512_write_regd_masked(i, &op, i->getVL(), BX_READ_16BIT_OPMASK(i->opmask()));
367 BX_NEXT_INSTR(i);
368 }
369
VMOVAPD_MASK_VpdWpdR(bxInstruction_c * i)370 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdR(bxInstruction_c *i)
371 {
372 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
373 avx512_write_regq_masked(i, &op, i->getVL(), BX_READ_8BIT_OPMASK(i->opmask()));
374 BX_NEXT_INSTR(i);
375 }
376
377 ////////////////////////////////////////
378 // masked packed load/store - aligned //
379 ////////////////////////////////////////
380
VMOVAPS_MASK_VpsWpsM(bxInstruction_c * i)381 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsM(bxInstruction_c *i)
382 {
383 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
384 bx_address laddr = get_laddr(i->seg(), eaddr);
385
386 unsigned len = i->getVL(), len_in_bytes = BYTE_ELEMENTS(len);
387 if (laddr & (len_in_bytes-1)) {
388 BX_ERROR(("AVX masked read len=%d: #GP misaligned access", len_in_bytes));
389 exception(BX_GP_EXCEPTION, 0);
390 }
391
392 BxPackedAvxRegister reg;
393 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
394 avx_masked_load32(i, eaddr, ®, mask);
395
396 if (i->isZeroMasking()) {
397 BX_WRITE_AVX_REGZ(i->dst(), reg, len);
398 }
399 else {
400 for (unsigned n=0; n < len; n++, mask >>= 4)
401 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
402
403 BX_CLEAR_AVX_REGZ(i->dst(), len);
404 }
405
406 BX_NEXT_INSTR(i);
407 }
408
VMOVAPD_MASK_VpdWpdM(bxInstruction_c * i)409 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdM(bxInstruction_c *i)
410 {
411 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
412 bx_address laddr = get_laddr(i->seg(), eaddr);
413
414 unsigned len = i->getVL(), len_in_bytes = BYTE_ELEMENTS(len);
415 if (laddr & (len_in_bytes-1)) {
416 BX_ERROR(("AVX masked read len=%d: #GP misaligned access", len_in_bytes));
417 exception(BX_GP_EXCEPTION, 0);
418 }
419
420 BxPackedAvxRegister reg;
421 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
422 avx_masked_load64(i, eaddr, ®, mask);
423
424 if (i->isZeroMasking()) {
425 BX_WRITE_AVX_REGZ(i->dst(), reg, len);
426 }
427 else {
428 for (unsigned n=0; n < len; n++, mask >>= 2)
429 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
430
431 BX_CLEAR_AVX_REGZ(i->dst(), len);
432 }
433
434 BX_NEXT_INSTR(i);
435 }
436
VMOVAPS_MASK_WpsVpsM(bxInstruction_c * i)437 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_WpsVpsM(bxInstruction_c *i)
438 {
439 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
440 bx_address laddr = get_laddr(i->seg(), eaddr);
441
442 unsigned len_in_bytes = BYTE_ELEMENTS(i->getVL());
443 if (laddr & (len_in_bytes-1)) {
444 BX_ERROR(("AVX masked write len=%d: #GP misaligned access", len_in_bytes));
445 exception(BX_GP_EXCEPTION, 0);
446 }
447
448 avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_16BIT_OPMASK(i->opmask()));
449
450 BX_NEXT_INSTR(i);
451 }
452
VMOVAPD_MASK_WpdVpdM(bxInstruction_c * i)453 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_WpdVpdM(bxInstruction_c *i)
454 {
455 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
456 bx_address laddr = get_laddr(i->seg(), eaddr);
457
458 unsigned len_in_bytes = BYTE_ELEMENTS(i->getVL());
459 if (laddr & (len_in_bytes-1)) {
460 BX_ERROR(("AVX masked write len=%d: #GP misaligned access", len_in_bytes));
461 exception(BX_GP_EXCEPTION, 0);
462 }
463
464 avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_8BIT_OPMASK(i->opmask()));
465
466 BX_NEXT_INSTR(i);
467 }
468
469 //////////////////////////////
470 // masked packed load/store //
471 //////////////////////////////
472
VMOVDQU8_MASK_VdqWdqM(bxInstruction_c * i)473 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU8_MASK_VdqWdqM(bxInstruction_c *i)
474 {
475 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
476 unsigned len = i->getVL();
477
478 BxPackedAvxRegister reg;
479 Bit64u mask = BX_READ_OPMASK(i->opmask());
480 avx_masked_load8(i, eaddr, ®, mask);
481
482 if (i->isZeroMasking()) {
483 BX_WRITE_AVX_REGZ(i->dst(), reg, len);
484 }
485 else {
486 for (unsigned n=0; n < len; n++, mask >>= 16)
487 xmm_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
488
489 BX_CLEAR_AVX_REGZ(i->dst(), len);
490 }
491
492 BX_NEXT_INSTR(i);
493 }
494
VMOVDQU16_MASK_VdqWdqM(bxInstruction_c * i)495 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU16_MASK_VdqWdqM(bxInstruction_c *i)
496 {
497 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
498 unsigned len = i->getVL();
499
500 BxPackedAvxRegister reg;
501 Bit32u mask = BX_READ_32BIT_OPMASK(i->opmask());
502 avx_masked_load16(i, eaddr, ®, mask);
503
504 if (i->isZeroMasking()) {
505 BX_WRITE_AVX_REGZ(i->dst(), reg, len);
506 }
507 else {
508 for (unsigned n=0; n < len; n++, mask >>= 8)
509 xmm_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
510
511 BX_CLEAR_AVX_REGZ(i->dst(), len);
512 }
513
514 BX_NEXT_INSTR(i);
515 }
516
VMOVUPS_MASK_VpsWpsM(bxInstruction_c * i)517 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_MASK_VpsWpsM(bxInstruction_c *i)
518 {
519 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
520 unsigned len = i->getVL();
521
522 BxPackedAvxRegister reg;
523 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
524 avx_masked_load32(i, eaddr, ®, mask);
525
526 if (i->isZeroMasking()) {
527 BX_WRITE_AVX_REGZ(i->dst(), reg, len);
528 }
529 else {
530 for (unsigned n=0; n < len; n++, mask >>= 4)
531 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
532
533 BX_CLEAR_AVX_REGZ(i->dst(), len);
534 }
535
536 BX_NEXT_INSTR(i);
537 }
538
VMOVUPD_MASK_VpdWpdM(bxInstruction_c * i)539 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPD_MASK_VpdWpdM(bxInstruction_c *i)
540 {
541 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
542 unsigned len = i->getVL();
543
544 BxPackedAvxRegister reg;
545 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
546 avx_masked_load64(i, eaddr, ®, mask);
547
548 if (i->isZeroMasking()) {
549 BX_WRITE_AVX_REGZ(i->dst(), reg, len);
550 }
551 else {
552 for (unsigned n=0; n < len; n++, mask >>= 2)
553 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), ®.vmm128(n), mask);
554
555 BX_CLEAR_AVX_REGZ(i->dst(), len);
556 }
557
558 BX_NEXT_INSTR(i);
559 }
560
VMOVDQU8_MASK_WdqVdqM(bxInstruction_c * i)561 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU8_MASK_WdqVdqM(bxInstruction_c *i)
562 {
563 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
564 avx_masked_store8(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_OPMASK(i->opmask()));
565 BX_NEXT_INSTR(i);
566 }
567
VMOVDQU16_MASK_WdqVdqM(bxInstruction_c * i)568 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU16_MASK_WdqVdqM(bxInstruction_c *i)
569 {
570 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
571 avx_masked_store16(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_32BIT_OPMASK(i->opmask()));
572 BX_NEXT_INSTR(i);
573 }
574
VMOVUPS_MASK_WpsVpsM(bxInstruction_c * i)575 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_MASK_WpsVpsM(bxInstruction_c *i)
576 {
577 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
578 avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_16BIT_OPMASK(i->opmask()));
579 BX_NEXT_INSTR(i);
580 }
581
VMOVUPD_MASK_WpdVpdM(bxInstruction_c * i)582 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPD_MASK_WpdVpdM(bxInstruction_c *i)
583 {
584 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
585 avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_8BIT_OPMASK(i->opmask()));
586 BX_NEXT_INSTR(i);
587 }
588
589 //////////////////////////////
590 // masked scalar load/store //
591 //////////////////////////////
592
VMOVSD_MASK_VsdWsdM(bxInstruction_c * i)593 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_VsdWsdM(bxInstruction_c *i)
594 {
595 BxPackedXmmRegister op;
596
597 op.xmm64u(1) = 0;
598
599 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
600 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
601 op.xmm64u(0) = read_virtual_qword(i->seg(), eaddr);
602 }
603 else {
604 if (! i->isZeroMasking()) {
605 op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
606 }
607 else {
608 op.xmm64u(0) = 0;
609 }
610 }
611
612 BX_WRITE_XMM_REGZ(i->dst(), op, i->getVL());
613 BX_NEXT_INSTR(i);
614 }
615
VMOVSS_MASK_VssWssM(bxInstruction_c * i)616 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssWssM(bxInstruction_c *i)
617 {
618 BxPackedXmmRegister op;
619
620 op.xmm64u(1) = 0;
621
622 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
623 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
624 op.xmm64u(0) = (Bit64u) read_virtual_dword(i->seg(), eaddr);
625 }
626 else {
627 if (! i->isZeroMasking()) {
628 op.xmm64u(0) = (Bit64u) BX_READ_XMM_REG_LO_DWORD(i->dst());
629 }
630 else {
631 op.xmm64u(0) = 0;
632 }
633 }
634
635 BX_WRITE_XMM_REGZ(i->dst(), op, i->getVL());
636 BX_NEXT_INSTR(i);
637 }
638
VMOVSD_MASK_WsdVsdM(bxInstruction_c * i)639 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_WsdVsdM(bxInstruction_c *i)
640 {
641 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
642 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
643 write_virtual_qword(i->seg(), eaddr, BX_READ_XMM_REG_LO_QWORD(i->src()));
644 }
645
646 BX_NEXT_INSTR(i);
647 }
648
VMOVSS_MASK_WssVssM(bxInstruction_c * i)649 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_WssVssM(bxInstruction_c *i)
650 {
651 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
652 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
653 write_virtual_dword(i->seg(), eaddr, BX_READ_XMM_REG_LO_DWORD(i->src()));
654 }
655
656 BX_NEXT_INSTR(i);
657 }
658
VMOVSD_MASK_VsdHpdWsdR(bxInstruction_c * i)659 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
660 {
661 BxPackedXmmRegister op;
662
663 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
664 op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src2());
665 }
666 else {
667 if (! i->isZeroMasking()) {
668 op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
669 }
670 else {
671 op.xmm64u(0) = 0;
672 }
673 }
674 op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
675
676 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
677
678 BX_NEXT_INSTR(i);
679 }
680
VMOVSS_MASK_VssHpsWssR(bxInstruction_c * i)681 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssHpsWssR(bxInstruction_c *i)
682 {
683 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src1());
684
685 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
686 op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->src2());
687 }
688 else {
689 if (! i->isZeroMasking()) {
690 op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
691 }
692 else {
693 op.xmm32u(0) = 0;
694 }
695 }
696
697 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
698
699 BX_NEXT_INSTR(i);
700 }
701
702 ////////////////////////////////////
703 // masked store with down convert //
704 ////////////////////////////////////
705
706 // quad-word to byte
VPMOVQB_MASK_WdqVdqM(bxInstruction_c * i)707 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_MASK_WdqVdqM(bxInstruction_c *i)
708 {
709 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
710 unsigned len = i->getVL();
711
712 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
713 dst.vmmubyte(n) = (Bit8u) src.vmm64u(n);
714 }
715
716 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
717 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
718
719 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
720 avx_masked_store8(i, eaddr, &dst, opmask);
721
722 BX_NEXT_INSTR(i);
723 }
724
VPMOVQB_WdqVdqR(bxInstruction_c * i)725 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_WdqVdqR(bxInstruction_c *i)
726 {
727 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
728 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
729 unsigned len = i->getVL();
730
731 dst.xmm64u(1) = 0;
732
733 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
734 dst.xmmubyte(n) = (Bit8u) src.vmm64u(n);
735 }
736
737 if (len == BX_VL128) dst.xmm16u(1) = 0;
738 if (len != BX_VL512) dst.xmm32u(1) = 0;
739
740 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
741 BX_NEXT_INSTR(i);
742 }
743
VPMOVQB_MASK_WdqVdqR(bxInstruction_c * i)744 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_MASK_WdqVdqR(bxInstruction_c *i)
745 {
746 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
747 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
748 unsigned len = i->getVL();
749
750 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
751
752 dst.xmm64u(1) = 0;
753
754 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
755 if (mask & 0x1)
756 dst.xmmubyte(n) = (Bit8u) src.vmm64u(n);
757 else
758 if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
759 }
760
761 if (len == BX_VL128) dst.xmm16u(1) = 0;
762 if (len != BX_VL512) dst.xmm32u(1) = 0;
763
764 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
765 BX_NEXT_INSTR(i);
766 }
767
VPMOVSQB_MASK_WdqVdqM(bxInstruction_c * i)768 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_MASK_WdqVdqM(bxInstruction_c *i)
769 {
770 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
771 unsigned len = i->getVL();
772
773 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
774 dst.vmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
775 }
776
777 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
778 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
779
780 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
781 avx_masked_store8(i, eaddr, &dst, opmask);
782
783 BX_NEXT_INSTR(i);
784 }
785
VPMOVSQB_WdqVdqR(bxInstruction_c * i)786 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_WdqVdqR(bxInstruction_c *i)
787 {
788 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
789 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
790 unsigned len = i->getVL();
791
792 dst.xmm64u(1) = 0;
793
794 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
795 dst.xmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
796 }
797
798 if (len == BX_VL128) dst.xmm16u(1) = 0;
799 if (len != BX_VL512) dst.xmm32u(1) = 0;
800
801 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
802 BX_NEXT_INSTR(i);
803 }
804
VPMOVSQB_MASK_WdqVdqR(bxInstruction_c * i)805 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_MASK_WdqVdqR(bxInstruction_c *i)
806 {
807 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
808 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
809 unsigned len = i->getVL();
810
811 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
812
813 dst.xmm64u(1) = 0;
814
815 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
816 if (mask & 0x1)
817 dst.xmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
818 else
819 if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
820 }
821
822 if (len == BX_VL128) dst.xmm16u(1) = 0;
823 if (len != BX_VL512) dst.xmm32u(1) = 0;
824
825 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
826 BX_NEXT_INSTR(i);
827 }
828
VPMOVUSQB_MASK_WdqVdqM(bxInstruction_c * i)829 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_MASK_WdqVdqM(bxInstruction_c *i)
830 {
831 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
832 unsigned len = i->getVL();
833
834 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
835 dst.vmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
836 }
837
838 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
839 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
840
841 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
842 avx_masked_store8(i, eaddr, &dst, opmask);
843 BX_NEXT_INSTR(i);
844 }
845
VPMOVUSQB_WdqVdqR(bxInstruction_c * i)846 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_WdqVdqR(bxInstruction_c *i)
847 {
848 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
849 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
850 unsigned len = i->getVL();
851
852 dst.xmm64u(1) = 0;
853
854 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
855 dst.xmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
856 }
857
858 if (len == BX_VL128) dst.xmm16u(1) = 0;
859 if (len != BX_VL512) dst.xmm32u(1) = 0;
860
861 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
862 BX_NEXT_INSTR(i);
863 }
864
VPMOVUSQB_MASK_WdqVdqR(bxInstruction_c * i)865 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_MASK_WdqVdqR(bxInstruction_c *i)
866 {
867 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
868 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
869 unsigned len = i->getVL();
870
871 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
872
873 dst.xmm64u(1) = 0;
874
875 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
876 if (mask & 0x1)
877 dst.xmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
878 else
879 if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
880 }
881
882 if (len == BX_VL128) dst.xmm16u(1) = 0;
883 if (len != BX_VL512) dst.xmm32u(1) = 0;
884
885 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
886 BX_NEXT_INSTR(i);
887 }
888
889 // double-word to byte
VPMOVDB_MASK_WdqVdqM(bxInstruction_c * i)890 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_MASK_WdqVdqM(bxInstruction_c *i)
891 {
892 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
893 unsigned len = i->getVL();
894
895 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
896 dst.vmmubyte(n) = (Bit8u) src.vmm32u(n);
897 }
898
899 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
900 opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
901
902 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
903 avx_masked_store8(i, eaddr, &dst, opmask);
904
905 BX_NEXT_INSTR(i);
906 }
907
VPMOVDB_WdqVdqR(bxInstruction_c * i)908 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_WdqVdqR(bxInstruction_c *i)
909 {
910 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
911 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
912 unsigned len = i->getVL();
913
914 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
915 dst.xmmubyte(n) = (Bit8u) src.vmm32u(n);
916 }
917
918 if (len == BX_VL128) dst.xmm32u(1) = 0;
919 if (len != BX_VL512) dst.xmm64u(1) = 0;
920
921 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
922 BX_NEXT_INSTR(i);
923 }
924
VPMOVDB_MASK_WdqVdqR(bxInstruction_c * i)925 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_MASK_WdqVdqR(bxInstruction_c *i)
926 {
927 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
928 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
929 unsigned len = i->getVL();
930
931 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
932
933 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
934 if (mask & 0x1)
935 dst.xmmubyte(n) = (Bit8u) src.vmm32u(n);
936 else
937 if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
938 }
939
940 if (len == BX_VL128) dst.xmm32u(1) = 0;
941 if (len != BX_VL512) dst.xmm64u(1) = 0;
942
943 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
944 BX_NEXT_INSTR(i);
945 }
946
VPMOVSDB_MASK_WdqVdqM(bxInstruction_c * i)947 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_MASK_WdqVdqM(bxInstruction_c *i)
948 {
949 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
950 unsigned len = i->getVL();
951
952 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
953 dst.vmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
954 }
955
956 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
957 opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
958
959 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
960 avx_masked_store8(i, eaddr, &dst, opmask);
961
962 BX_NEXT_INSTR(i);
963 }
964
VPMOVSDB_WdqVdqR(bxInstruction_c * i)965 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_WdqVdqR(bxInstruction_c *i)
966 {
967 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
968 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
969 unsigned len = i->getVL();
970
971 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
972 dst.xmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
973 }
974
975 if (len == BX_VL128) dst.xmm32u(1) = 0;
976 if (len != BX_VL512) dst.xmm64u(1) = 0;
977
978 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
979 BX_NEXT_INSTR(i);
980 }
981
VPMOVSDB_MASK_WdqVdqR(bxInstruction_c * i)982 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_MASK_WdqVdqR(bxInstruction_c *i)
983 {
984 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
985 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
986 unsigned len = i->getVL();
987
988 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
989
990 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
991 if (mask & 0x1)
992 dst.xmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
993 else
994 if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
995 }
996
997 if (len == BX_VL128) dst.xmm32u(1) = 0;
998 if (len != BX_VL512) dst.xmm64u(1) = 0;
999
1000 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1001 BX_NEXT_INSTR(i);
1002 }
1003
VPMOVUSDB_MASK_WdqVdqM(bxInstruction_c * i)1004 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_MASK_WdqVdqM(bxInstruction_c *i)
1005 {
1006 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1007 unsigned len = i->getVL();
1008
1009 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1010 dst.vmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
1011 }
1012
1013 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1014 opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
1015
1016 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1017 avx_masked_store8(i, eaddr, &dst, opmask);
1018
1019 BX_NEXT_INSTR(i);
1020 }
1021
VPMOVUSDB_WdqVdqR(bxInstruction_c * i)1022 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_WdqVdqR(bxInstruction_c *i)
1023 {
1024 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1025 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1026 unsigned len = i->getVL();
1027
1028 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1029 dst.xmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
1030 }
1031
1032 if (len == BX_VL128) dst.xmm32u(1) = 0;
1033 if (len != BX_VL512) dst.xmm64u(1) = 0;
1034
1035 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1036 BX_NEXT_INSTR(i);
1037 }
1038
VPMOVUSDB_MASK_WdqVdqR(bxInstruction_c * i)1039 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_MASK_WdqVdqR(bxInstruction_c *i)
1040 {
1041 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1042 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1043 unsigned len = i->getVL();
1044
1045 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
1046
1047 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
1048 if (mask & 0x1)
1049 dst.xmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
1050 else
1051 if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
1052 }
1053
1054 if (len == BX_VL128) dst.xmm32u(1) = 0;
1055 if (len != BX_VL512) dst.xmm64u(1) = 0;
1056
1057 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1058 BX_NEXT_INSTR(i);
1059 }
1060
1061 // word to byte
VPMOVWB_MASK_WdqVdqM(bxInstruction_c * i)1062 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVWB_MASK_WdqVdqM(bxInstruction_c *i)
1063 {
1064 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1065 unsigned len = i->getVL();
1066
1067 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1068 dst.vmmubyte(n) = (Bit8u) src.vmm16u(n);
1069 }
1070
1071 Bit32u opmask = i->opmask() ? BX_READ_32BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1072 opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len));
1073
1074 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1075 avx_masked_store8(i, eaddr, &dst, opmask);
1076
1077 BX_NEXT_INSTR(i);
1078 }
1079
VPMOVWB_WdqVdqR(bxInstruction_c * i)1080 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVWB_WdqVdqR(bxInstruction_c *i)
1081 {
1082 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1083 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1084 unsigned len = i->getVL();
1085
1086 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1087 dst.ymmubyte(n) = (Bit8u) src.vmm16u(n);
1088 }
1089
1090 if (len == BX_VL128) dst.ymm64u(1) = 0;
1091 if (len != BX_VL512) dst.ymm128(1).clear();
1092
1093 BX_WRITE_YMM_REGZ(i->dst(), dst);
1094 BX_NEXT_INSTR(i);
1095 }
1096
VPMOVWB_MASK_WdqVdqR(bxInstruction_c * i)1097 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVWB_MASK_WdqVdqR(bxInstruction_c *i)
1098 {
1099 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1100 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1101 unsigned len = i->getVL();
1102
1103 Bit32u mask = BX_READ_32BIT_OPMASK(i->opmask());
1104
1105 for (unsigned n=0; n < WORD_ELEMENTS(len); n++, mask >>= 1) {
1106 if (mask & 0x1)
1107 dst.ymmubyte(n) = (Bit8u) src.vmm16u(n);
1108 else
1109 if (i->isZeroMasking()) dst.ymmubyte(n) = 0;
1110 }
1111
1112 if (len == BX_VL128) dst.ymm64u(1) = 0;
1113 if (len != BX_VL512) dst.ymm128(1).clear();
1114
1115 BX_WRITE_YMM_REGZ(i->dst(), dst);
1116 BX_NEXT_INSTR(i);
1117 }
1118
VPMOVSWB_MASK_WdqVdqM(bxInstruction_c * i)1119 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSWB_MASK_WdqVdqM(bxInstruction_c *i)
1120 {
1121 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1122 unsigned len = i->getVL();
1123
1124 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1125 dst.vmmsbyte(n) = SaturateWordSToByteS(src.vmm16s(n));
1126 }
1127
1128 Bit32u opmask = i->opmask() ? BX_READ_32BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1129 opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len));
1130
1131 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1132 avx_masked_store8(i, eaddr, &dst, opmask);
1133
1134 BX_NEXT_INSTR(i);
1135 }
1136
VPMOVSWB_WdqVdqR(bxInstruction_c * i)1137 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSWB_WdqVdqR(bxInstruction_c *i)
1138 {
1139 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1140 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1141 unsigned len = i->getVL();
1142
1143 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1144 dst.ymmsbyte(n) = SaturateWordSToByteS(src.vmm16s(n));
1145 }
1146
1147 if (len == BX_VL128) dst.ymm64u(1) = 0;
1148 if (len != BX_VL512) dst.ymm128(1).clear();
1149
1150 BX_WRITE_YMM_REGZ(i->dst(), dst);
1151 BX_NEXT_INSTR(i);
1152 }
1153
VPMOVSWB_MASK_WdqVdqR(bxInstruction_c * i)1154 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSWB_MASK_WdqVdqR(bxInstruction_c *i)
1155 {
1156 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1157 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1158 unsigned len = i->getVL();
1159
1160 Bit32u mask = BX_READ_32BIT_OPMASK(i->opmask());
1161
1162 for (unsigned n=0; n < WORD_ELEMENTS(len); n++, mask >>= 1) {
1163 if (mask & 0x1)
1164 dst.ymmsbyte(n) = SaturateWordSToByteS(src.vmm16s(n));
1165 else
1166 if (i->isZeroMasking()) dst.ymmubyte(n) = 0;
1167 }
1168
1169 if (len == BX_VL128) dst.ymm64u(1) = 0;
1170 if (len != BX_VL512) dst.ymm128(1).clear();
1171
1172 BX_WRITE_YMM_REGZ(i->dst(), dst);
1173 BX_NEXT_INSTR(i);
1174 }
1175
VPMOVUSWB_MASK_WdqVdqM(bxInstruction_c * i)1176 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSWB_MASK_WdqVdqM(bxInstruction_c *i)
1177 {
1178 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1179 unsigned len = i->getVL();
1180
1181 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1182 dst.vmmubyte(n) = SaturateWordUToByteU(src.vmm16u(n));
1183 }
1184
1185 Bit32u opmask = i->opmask() ? BX_READ_32BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1186 opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len));
1187
1188 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1189 avx_masked_store8(i, eaddr, &dst, opmask);
1190
1191 BX_NEXT_INSTR(i);
1192 }
1193
VPMOVUSWB_WdqVdqR(bxInstruction_c * i)1194 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSWB_WdqVdqR(bxInstruction_c *i)
1195 {
1196 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1197 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1198 unsigned len = i->getVL();
1199
1200 for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1201 dst.ymmubyte(n) = SaturateWordUToByteU(src.vmm16u(n));
1202 }
1203
1204 if (len == BX_VL128) dst.ymm64u(1) = 0;
1205 if (len != BX_VL512) dst.ymm128(1).clear();
1206
1207 BX_WRITE_YMM_REGZ(i->dst(), dst);
1208 BX_NEXT_INSTR(i);
1209 }
1210
VPMOVUSWB_MASK_WdqVdqR(bxInstruction_c * i)1211 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSWB_MASK_WdqVdqR(bxInstruction_c *i)
1212 {
1213 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1214 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1215 unsigned len = i->getVL();
1216
1217 Bit32u mask = BX_READ_32BIT_OPMASK(i->opmask());
1218
1219 for (unsigned n=0; n < WORD_ELEMENTS(len); n++, mask >>= 1) {
1220 if (mask & 0x1)
1221 dst.ymmubyte(n) = SaturateWordUToByteU(src.vmm16u(n));
1222 else
1223 if (i->isZeroMasking()) dst.ymmubyte(n) = 0;
1224 }
1225
1226 if (len == BX_VL128) dst.ymm64u(1) = 0;
1227 if (len != BX_VL512) dst.ymm128(1).clear();
1228
1229 BX_WRITE_YMM_REGZ(i->dst(), dst);
1230 BX_NEXT_INSTR(i);
1231 }
1232
1233 // double-word to word
VPMOVDW_MASK_WdqVdqM(bxInstruction_c * i)1234 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_MASK_WdqVdqM(bxInstruction_c *i)
1235 {
1236 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1237 unsigned len = i->getVL();
1238
1239 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1240 dst.vmm16u(n) = (Bit16u) src.vmm32u(n);
1241 }
1242
1243 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1244 opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
1245
1246 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1247 avx_masked_store16(i, eaddr, &dst, opmask);
1248
1249 BX_NEXT_INSTR(i);
1250 }
1251
VPMOVDW_WdqVdqR(bxInstruction_c * i)1252 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_WdqVdqR(bxInstruction_c *i)
1253 {
1254 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1255 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1256 unsigned len = i->getVL();
1257
1258 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1259 dst.ymm16u(n) = (Bit16u) src.vmm32u(n);
1260 }
1261
1262 if (len == BX_VL128) dst.ymm64u(1) = 0;
1263 if (len != BX_VL512) dst.ymm128(1).clear();
1264
1265 BX_WRITE_YMM_REGZ(i->dst(), dst);
1266 BX_NEXT_INSTR(i);
1267 }
1268
VPMOVDW_MASK_WdqVdqR(bxInstruction_c * i)1269 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_MASK_WdqVdqR(bxInstruction_c *i)
1270 {
1271 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1272 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1273 unsigned len = i->getVL();
1274
1275 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
1276
1277 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
1278 if (mask & 0x1)
1279 dst.ymm16u(n) = (Bit16u) src.vmm32u(n);
1280 else
1281 if (i->isZeroMasking()) dst.ymm16u(n) = 0;
1282 }
1283
1284 if (len == BX_VL128) dst.ymm64u(1) = 0;
1285 if (len != BX_VL512) dst.ymm128(1).clear();
1286
1287 BX_WRITE_YMM_REGZ(i->dst(), dst);
1288 BX_NEXT_INSTR(i);
1289 }
1290
VPMOVSDW_MASK_WdqVdqM(bxInstruction_c * i)1291 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_MASK_WdqVdqM(bxInstruction_c *i)
1292 {
1293 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1294 unsigned len = i->getVL();
1295
1296 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1297 dst.vmm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
1298 }
1299
1300 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1301 opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
1302
1303 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1304 avx_masked_store16(i, eaddr, &dst, opmask);
1305
1306 BX_NEXT_INSTR(i);
1307 }
1308
VPMOVSDW_WdqVdqR(bxInstruction_c * i)1309 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_WdqVdqR(bxInstruction_c *i)
1310 {
1311 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1312 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1313 unsigned len = i->getVL();
1314
1315 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1316 dst.ymm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
1317 }
1318
1319 if (len == BX_VL128) dst.ymm64u(1) = 0;
1320 if (len != BX_VL512) dst.ymm128(1).clear();
1321
1322 BX_WRITE_YMM_REGZ(i->dst(), dst);
1323 BX_NEXT_INSTR(i);
1324 }
1325
VPMOVSDW_MASK_WdqVdqR(bxInstruction_c * i)1326 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_MASK_WdqVdqR(bxInstruction_c *i)
1327 {
1328 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1329 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1330 unsigned len = i->getVL();
1331
1332 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
1333
1334 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
1335 if (mask & 0x1)
1336 dst.ymm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
1337 else
1338 if (i->isZeroMasking()) dst.ymm16u(n) = 0;
1339 }
1340
1341 if (len == BX_VL128) dst.ymm64u(1) = 0;
1342 if (len != BX_VL512) dst.ymm128(1).clear();
1343
1344 BX_WRITE_YMM_REGZ(i->dst(), dst);
1345 BX_NEXT_INSTR(i);
1346 }
1347
VPMOVUSDW_MASK_WdqVdqM(bxInstruction_c * i)1348 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_MASK_WdqVdqM(bxInstruction_c *i)
1349 {
1350 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1351 unsigned len = i->getVL();
1352
1353 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1354 dst.vmm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
1355 }
1356
1357 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1358 opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
1359
1360 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1361 avx_masked_store16(i, eaddr, &dst, opmask);
1362
1363 BX_NEXT_INSTR(i);
1364 }
1365
VPMOVUSDW_WdqVdqR(bxInstruction_c * i)1366 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_WdqVdqR(bxInstruction_c *i)
1367 {
1368 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1369 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1370 unsigned len = i->getVL();
1371
1372 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1373 dst.ymm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
1374 }
1375
1376 if (len == BX_VL128) dst.ymm64u(1) = 0;
1377 if (len != BX_VL512) dst.ymm128(1).clear();
1378
1379 BX_WRITE_YMM_REGZ(i->dst(), dst);
1380 BX_NEXT_INSTR(i);
1381 }
1382
VPMOVUSDW_MASK_WdqVdqR(bxInstruction_c * i)1383 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_MASK_WdqVdqR(bxInstruction_c *i)
1384 {
1385 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1386 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1387 unsigned len = i->getVL();
1388
1389 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
1390
1391 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
1392 if (mask & 0x1)
1393 dst.ymm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
1394 else
1395 if (i->isZeroMasking()) dst.ymm16u(n) = 0;
1396 }
1397
1398 if (len == BX_VL128) dst.ymm64u(1) = 0;
1399 if (len != BX_VL512) dst.ymm128(1).clear();
1400
1401 BX_WRITE_YMM_REGZ(i->dst(), dst);
1402 BX_NEXT_INSTR(i);
1403 }
1404
1405 // quad-word to word
VPMOVQW_MASK_WdqVdqM(bxInstruction_c * i)1406 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_MASK_WdqVdqM(bxInstruction_c *i)
1407 {
1408 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1409 unsigned len = i->getVL();
1410
1411 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1412 dst.vmm16u(n) = (Bit16u) src.vmm64u(n);
1413 }
1414
1415 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1416 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1417
1418 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1419 avx_masked_store16(i, eaddr, &dst, opmask);
1420
1421 BX_NEXT_INSTR(i);
1422 }
1423
VPMOVQW_WdqVdqR(bxInstruction_c * i)1424 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_WdqVdqR(bxInstruction_c *i)
1425 {
1426 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1427 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1428 unsigned len = i->getVL();
1429
1430 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1431 dst.xmm16u(n) = (Bit16u) src.vmm64u(n);
1432 }
1433
1434 if (len == BX_VL128) dst.xmm32u(1) = 0;
1435 if (len != BX_VL512) dst.xmm64u(1) = 0;
1436
1437 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1438 BX_NEXT_INSTR(i);
1439 }
1440
VPMOVQW_MASK_WdqVdqR(bxInstruction_c * i)1441 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_MASK_WdqVdqR(bxInstruction_c *i)
1442 {
1443 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1444 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1445 unsigned len = i->getVL();
1446
1447 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1448
1449 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1450 if (mask & 0x1)
1451 dst.xmm16u(n) = (Bit16u) src.vmm64u(n);
1452 else
1453 if (i->isZeroMasking()) dst.xmm16u(n) = 0;
1454 }
1455
1456 if (len == BX_VL128) dst.xmm32u(1) = 0;
1457 if (len != BX_VL512) dst.xmm64u(1) = 0;
1458
1459 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1460 BX_NEXT_INSTR(i);
1461 }
1462
VPMOVSQW_MASK_WdqVdqM(bxInstruction_c * i)1463 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_MASK_WdqVdqM(bxInstruction_c *i)
1464 {
1465 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1466 unsigned len = i->getVL();
1467
1468 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1469 dst.vmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
1470 }
1471
1472 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1473 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1474
1475 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1476 avx_masked_store16(i, eaddr, &dst, opmask);
1477
1478 BX_NEXT_INSTR(i);
1479 }
1480
VPMOVSQW_WdqVdqR(bxInstruction_c * i)1481 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_WdqVdqR(bxInstruction_c *i)
1482 {
1483 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1484 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1485 unsigned len = i->getVL();
1486
1487 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1488 dst.xmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
1489 }
1490
1491 if (len == BX_VL128) dst.xmm32u(1) = 0;
1492 if (len != BX_VL512) dst.xmm64u(1) = 0;
1493
1494 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1495 BX_NEXT_INSTR(i);
1496 }
1497
VPMOVSQW_MASK_WdqVdqR(bxInstruction_c * i)1498 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_MASK_WdqVdqR(bxInstruction_c *i)
1499 {
1500 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1501 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1502 unsigned len = i->getVL();
1503
1504 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1505
1506 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1507 if (mask & 0x1)
1508 dst.xmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
1509 else
1510 if (i->isZeroMasking()) dst.xmm16u(n) = 0;
1511 }
1512
1513 if (len == BX_VL128) dst.xmm32u(1) = 0;
1514 if (len != BX_VL512) dst.xmm64u(1) = 0;
1515
1516 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1517 BX_NEXT_INSTR(i);
1518 }
1519
VPMOVUSQW_MASK_WdqVdqM(bxInstruction_c * i)1520 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_MASK_WdqVdqM(bxInstruction_c *i)
1521 {
1522 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1523 unsigned len = i->getVL();
1524
1525 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1526 dst.vmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
1527 }
1528
1529 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1530 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1531
1532 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1533 avx_masked_store16(i, eaddr, &dst, opmask);
1534
1535 BX_NEXT_INSTR(i);
1536 }
1537
VPMOVUSQW_WdqVdqR(bxInstruction_c * i)1538 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_WdqVdqR(bxInstruction_c *i)
1539 {
1540 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1541 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1542 unsigned len = i->getVL();
1543
1544 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1545 dst.xmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
1546 }
1547
1548 if (len == BX_VL128) dst.xmm32u(1) = 0;
1549 if (len != BX_VL512) dst.xmm64u(1) = 0;
1550
1551 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1552 BX_NEXT_INSTR(i);
1553 }
1554
VPMOVUSQW_MASK_WdqVdqR(bxInstruction_c * i)1555 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_MASK_WdqVdqR(bxInstruction_c *i)
1556 {
1557 BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1558 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1559 unsigned len = i->getVL();
1560
1561 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1562
1563 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1564 if (mask & 0x1)
1565 dst.xmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
1566 else
1567 if (i->isZeroMasking()) dst.xmm16u(n) = 0;
1568 }
1569
1570 if (len == BX_VL128) dst.xmm32u(1) = 0;
1571 if (len != BX_VL512) dst.xmm64u(1) = 0;
1572
1573 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1574 BX_NEXT_INSTR(i);
1575 }
1576
1577 // quad-word to double-word
VPMOVQD_MASK_WdqVdqM(bxInstruction_c * i)1578 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_MASK_WdqVdqM(bxInstruction_c *i)
1579 {
1580 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1581 unsigned len = i->getVL();
1582
1583 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1584 dst.vmm32u(n) = (Bit32u) src.vmm64u(n);
1585 }
1586
1587 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1588 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1589
1590 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1591 avx_masked_store32(i, eaddr, &dst, opmask);
1592
1593 BX_NEXT_INSTR(i);
1594 }
1595
VPMOVQD_WdqVdqR(bxInstruction_c * i)1596 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_WdqVdqR(bxInstruction_c *i)
1597 {
1598 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1599 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1600 unsigned len = i->getVL();
1601
1602 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1603 dst.ymm32u(n) = (Bit32u) src.vmm64u(n);
1604 }
1605
1606 if (len == BX_VL128) dst.ymm64u(1) = 0;
1607 if (len != BX_VL512) dst.ymm128(1).clear();
1608
1609 BX_WRITE_YMM_REGZ(i->dst(), dst);
1610 BX_NEXT_INSTR(i);
1611 }
1612
VPMOVQD_MASK_WdqVdqR(bxInstruction_c * i)1613 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_MASK_WdqVdqR(bxInstruction_c *i)
1614 {
1615 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1616 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1617 unsigned len = i->getVL();
1618
1619 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1620
1621 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1622 if (mask & 0x1)
1623 dst.ymm32u(n) = (Bit32u) src.vmm64u(n);
1624 else
1625 if (i->isZeroMasking()) dst.ymm32u(n) = 0;
1626 }
1627
1628 if (len == BX_VL128) dst.ymm64u(1) = 0;
1629 if (len != BX_VL512) dst.ymm128(1).clear();
1630
1631 BX_WRITE_YMM_REGZ(i->dst(), dst);
1632 BX_NEXT_INSTR(i);
1633 }
1634
VPMOVSQD_MASK_WdqVdqM(bxInstruction_c * i)1635 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_MASK_WdqVdqM(bxInstruction_c *i)
1636 {
1637 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1638 unsigned len = i->getVL();
1639
1640 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1641 dst.vmm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
1642 }
1643
1644 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1645 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1646
1647 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1648 avx_masked_store32(i, eaddr, &dst, opmask);
1649
1650 BX_NEXT_INSTR(i);
1651 }
1652
VPMOVSQD_WdqVdqR(bxInstruction_c * i)1653 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_WdqVdqR(bxInstruction_c *i)
1654 {
1655 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1656 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1657 unsigned len = i->getVL();
1658
1659 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1660 dst.ymm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
1661 }
1662
1663 if (len == BX_VL128) dst.ymm64u(1) = 0;
1664 if (len != BX_VL512) dst.ymm128(1).clear();
1665
1666 BX_WRITE_YMM_REGZ(i->dst(), dst);
1667 BX_NEXT_INSTR(i);
1668 }
1669
VPMOVSQD_MASK_WdqVdqR(bxInstruction_c * i)1670 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_MASK_WdqVdqR(bxInstruction_c *i)
1671 {
1672 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1673 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1674 unsigned len = i->getVL();
1675
1676 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1677
1678 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1679 if (mask & 0x1)
1680 dst.ymm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
1681 else
1682 if (i->isZeroMasking()) dst.ymm32u(n) = 0;
1683 }
1684
1685 if (len == BX_VL128) dst.ymm64u(1) = 0;
1686 if (len != BX_VL512) dst.ymm128(1).clear();
1687
1688 BX_WRITE_YMM_REGZ(i->dst(), dst);
1689 BX_NEXT_INSTR(i);
1690 }
1691
VPMOVUSQD_MASK_WdqVdqM(bxInstruction_c * i)1692 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_MASK_WdqVdqM(bxInstruction_c *i)
1693 {
1694 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1695 unsigned len = i->getVL();
1696
1697 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1698 dst.vmm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
1699 }
1700
1701 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1702 opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1703
1704 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1705 avx_masked_store32(i, eaddr, &dst, opmask);
1706
1707 BX_NEXT_INSTR(i);
1708 }
1709
VPMOVUSQD_WdqVdqR(bxInstruction_c * i)1710 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_WdqVdqR(bxInstruction_c *i)
1711 {
1712 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1713 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1714 unsigned len = i->getVL();
1715
1716 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1717 dst.ymm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
1718 }
1719
1720 if (len == BX_VL128) dst.ymm64u(1) = 0;
1721 if (len != BX_VL512) dst.ymm128(1).clear();
1722
1723 BX_WRITE_YMM_REGZ(i->dst(), dst);
1724 BX_NEXT_INSTR(i);
1725 }
1726
VPMOVUSQD_MASK_WdqVdqR(bxInstruction_c * i)1727 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_MASK_WdqVdqR(bxInstruction_c *i)
1728 {
1729 BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1730 BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1731 unsigned len = i->getVL();
1732
1733 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1734
1735 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1736 if (mask & 0x1)
1737 dst.ymm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
1738 else
1739 if (i->isZeroMasking()) dst.ymm32u(n) = 0;
1740 }
1741
1742 if (len == BX_VL128) dst.ymm64u(1) = 0;
1743 if (len != BX_VL512) dst.ymm128(1).clear();
1744
1745 BX_WRITE_YMM_REGZ(i->dst(), dst);
1746 BX_NEXT_INSTR(i);
1747 }
1748
1749 //////////////////////////
1750 // load with up convert //
1751 //////////////////////////
1752
VPMOVSXBW_MASK_VdqWdqR(bxInstruction_c * i)1753 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBW_MASK_VdqWdqR(bxInstruction_c *i)
1754 {
1755 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1756 BxPackedAvxRegister result;
1757 unsigned len = i->getVL();
1758
1759 for (unsigned n=0; n < WORD_ELEMENTS(len); n++)
1760 result.vmm16s(n) = (Bit16s) op.ymmsbyte(n);
1761
1762 avx512_write_regw_masked(i, &result, len, BX_READ_32BIT_OPMASK(i->opmask()));
1763 BX_NEXT_INSTR(i);
1764 }
1765
VPMOVSXBD_MASK_VdqWdqR(bxInstruction_c * i)1766 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBD_MASK_VdqWdqR(bxInstruction_c *i)
1767 {
1768 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1769 BxPackedAvxRegister result;
1770 unsigned len = i->getVL();
1771
1772 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
1773 result.vmm32s(n) = (Bit32s) op.xmmsbyte(n);
1774
1775 avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
1776 BX_NEXT_INSTR(i);
1777 }
1778
VPMOVSXBQ_MASK_VdqWdqR(bxInstruction_c * i)1779 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBQ_MASK_VdqWdqR(bxInstruction_c *i)
1780 {
1781 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1782 BxPackedAvxRegister result;
1783 unsigned len = i->getVL();
1784
1785 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1786 result.vmm64s(n) = (Bit64s) op.xmmsbyte(n);
1787
1788 avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1789 BX_NEXT_INSTR(i);
1790 }
1791
VPMOVSXWD_MASK_VdqWdqR(bxInstruction_c * i)1792 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXWD_MASK_VdqWdqR(bxInstruction_c *i)
1793 {
1794 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1795 BxPackedAvxRegister result;
1796 unsigned len = i->getVL();
1797
1798 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
1799 result.vmm32s(n) = (Bit32s) op.ymm16s(n);
1800
1801 avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
1802 BX_NEXT_INSTR(i);
1803 }
1804
VPMOVSXWQ_MASK_VdqWdqR(bxInstruction_c * i)1805 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXWQ_MASK_VdqWdqR(bxInstruction_c *i)
1806 {
1807 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1808 BxPackedAvxRegister result;
1809 unsigned len = i->getVL();
1810
1811 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1812 result.vmm64s(n) = (Bit64s) op.xmm16s(n);
1813
1814 avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1815 BX_NEXT_INSTR(i);
1816 }
1817
VPMOVSXDQ_MASK_VdqWdqR(bxInstruction_c * i)1818 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXDQ_MASK_VdqWdqR(bxInstruction_c *i)
1819 {
1820 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1821 BxPackedAvxRegister result;
1822 unsigned len = i->getVL();
1823
1824 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1825 result.vmm64s(n) = (Bit64s) op.ymm32s(n);
1826
1827 avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1828 BX_NEXT_INSTR(i);
1829 }
1830
VPMOVZXBW_MASK_VdqWdqR(bxInstruction_c * i)1831 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBW_MASK_VdqWdqR(bxInstruction_c *i)
1832 {
1833 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1834 BxPackedAvxRegister result;
1835 unsigned len = i->getVL();
1836
1837 for (unsigned n=0; n < WORD_ELEMENTS(len); n++)
1838 result.vmm16u(n) = (Bit16u) op.ymmubyte(n);
1839
1840 avx512_write_regw_masked(i, &result, len, BX_READ_32BIT_OPMASK(i->opmask()));
1841 BX_NEXT_INSTR(i);
1842 }
1843
VPMOVZXBD_MASK_VdqWdqR(bxInstruction_c * i)1844 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBD_MASK_VdqWdqR(bxInstruction_c *i)
1845 {
1846 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1847 BxPackedAvxRegister result;
1848 unsigned len = i->getVL();
1849
1850 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
1851 result.vmm32u(n) = (Bit32u) op.xmmubyte(n);
1852
1853 avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
1854 BX_NEXT_INSTR(i);
1855 }
1856
VPMOVZXBQ_MASK_VdqWdqR(bxInstruction_c * i)1857 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBQ_MASK_VdqWdqR(bxInstruction_c *i)
1858 {
1859 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1860 BxPackedAvxRegister result;
1861 unsigned len = i->getVL();
1862
1863 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1864 result.vmm64u(n) = (Bit64u) op.xmmubyte(n);
1865
1866 avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1867 BX_NEXT_INSTR(i);
1868 }
1869
VPMOVZXWD_MASK_VdqWdqR(bxInstruction_c * i)1870 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXWD_MASK_VdqWdqR(bxInstruction_c *i)
1871 {
1872 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1873 BxPackedAvxRegister result;
1874 unsigned len = i->getVL();
1875
1876 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
1877 result.vmm32u(n) = (Bit32u) op.ymm16u(n);
1878
1879 avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
1880 BX_NEXT_INSTR(i);
1881 }
1882
VPMOVZXWQ_MASK_VdqWdqR(bxInstruction_c * i)1883 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXWQ_MASK_VdqWdqR(bxInstruction_c *i)
1884 {
1885 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1886 BxPackedAvxRegister result;
1887 unsigned len = i->getVL();
1888
1889 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1890 result.vmm64u(n) = (Bit64u) op.xmm16u(n);
1891
1892 avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1893 BX_NEXT_INSTR(i);
1894 }
1895
VPMOVZXDQ_MASK_VdqWdqR(bxInstruction_c * i)1896 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXDQ_MASK_VdqWdqR(bxInstruction_c *i)
1897 {
1898 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1899 BxPackedAvxRegister result;
1900 unsigned len = i->getVL();
1901
1902 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1903 result.vmm64u(n) = (Bit64u) op.ymm32u(n);
1904
1905 avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1906 BX_NEXT_INSTR(i);
1907 }
1908
1909 #endif
1910