1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx512_move.cc 13716 2019-12-21 20:07:03Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 //   Copyright (c) 2013-2018 Stanislav Shwartsman
6 //          Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 //  You should have received a copy of the GNU Lesser General Public
19 //  License along with this library; if not, write to the Free Software
20 //  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23 
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28 
29 #if BX_SUPPORT_AVX
avx_masked_load8(bxInstruction_c * i,bx_address eaddr,BxPackedAvxRegister * op,Bit64u mask)30 void BX_CPU_C::avx_masked_load8(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit64u mask)
31 {
32   unsigned len = i->getVL();
33 
34   if (i->as64L()) {
35     Bit64u laddr = get_laddr64(i->seg(), eaddr);
36     for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
37        if (mask & (BX_CONST64(1)<<n)) {
38           if (! IsCanonical(laddr + n))
39              exception(int_number(i->seg()), 0);
40        }
41     }
42   }
43 
44   for (int n=BYTE_ELEMENTS(len)-1; n >= 0; n--) {
45     if (mask & (BX_CONST64(1)<<n))
46        op->vmmubyte(n) = read_virtual_byte(i->seg(), eaddr + n);
47     else
48        op->vmmubyte(n) = 0;
49   }
50 }
51 
avx_masked_load16(bxInstruction_c * i,bx_address eaddr,BxPackedAvxRegister * op,Bit32u mask)52 void BX_CPU_C::avx_masked_load16(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
53 {
54   unsigned len = i->getVL();
55 
56   if (i->as64L()) {
57     Bit64u laddr = get_laddr64(i->seg(), eaddr);
58     for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
59        if (mask & (1<<n)) {
60           if (! IsCanonical(laddr + 2*n))
61              exception(int_number(i->seg()), 0);
62        }
63     }
64   }
65 
66 #if BX_SUPPORT_ALIGNMENT_CHECK
67   unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
68   BX_CPU_THIS_PTR alignment_check_mask = 0;
69 #endif
70 
71   for (int n=WORD_ELEMENTS(len)-1; n >= 0; n--) {
72     if (mask & (1<<n))
73        op->vmm16u(n) = read_virtual_word(i->seg(), eaddr + 2*n);
74     else
75        op->vmm16u(n) = 0;
76   }
77 
78 #if BX_SUPPORT_ALIGNMENT_CHECK
79   BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
80 #endif
81 }
82 
avx_masked_load32(bxInstruction_c * i,bx_address eaddr,BxPackedAvxRegister * op,Bit32u mask)83 void BX_CPU_C::avx_masked_load32(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
84 {
85   unsigned len = i->getVL();
86 
87   if (i->as64L()) {
88     Bit64u laddr = get_laddr64(i->seg(), eaddr);
89     for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
90        if (mask & (1<<n)) {
91           if (! IsCanonical(laddr + 4*n))
92              exception(int_number(i->seg()), 0);
93        }
94     }
95   }
96 
97 #if BX_SUPPORT_ALIGNMENT_CHECK
98   unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
99   BX_CPU_THIS_PTR alignment_check_mask = 0;
100 #endif
101 
102   for (int n=DWORD_ELEMENTS(len)-1; n >= 0; n--) {
103     if (mask & (1<<n))
104        op->vmm32u(n) = read_virtual_dword(i->seg(), eaddr + 4*n);
105     else
106        op->vmm32u(n) = 0;
107   }
108 
109 #if BX_SUPPORT_ALIGNMENT_CHECK
110   BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
111 #endif
112 }
113 
avx_masked_load64(bxInstruction_c * i,bx_address eaddr,BxPackedAvxRegister * op,Bit32u mask)114 void BX_CPU_C::avx_masked_load64(bxInstruction_c *i, bx_address eaddr, BxPackedAvxRegister *op, Bit32u mask)
115 {
116   unsigned len = i->getVL();
117 
118   if (i->as64L()) {
119     Bit64u laddr = get_laddr64(i->seg(), eaddr);
120     for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
121        if (mask & (1<<n)) {
122           if (! IsCanonical(laddr + 8*n))
123              exception(int_number(i->seg()), 0);
124        }
125     }
126   }
127 
128 #if BX_SUPPORT_ALIGNMENT_CHECK
129   unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
130   BX_CPU_THIS_PTR alignment_check_mask = 0;
131 #endif
132 
133   for (int n=QWORD_ELEMENTS(len)-1; n >= 0; n--) {
134     if (mask & (1<<n))
135        op->vmm64u(n) = read_virtual_qword(i->seg(), eaddr + 8*n);
136     else
137        op->vmm64u(n) = 0;
138   }
139 
140 #if BX_SUPPORT_ALIGNMENT_CHECK
141   BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
142 #endif
143 }
144 
avx_masked_store8(bxInstruction_c * i,bx_address eaddr,const BxPackedAvxRegister * op,Bit64u mask)145 void BX_CPU_C::avx_masked_store8(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit64u mask)
146 {
147   unsigned len = i->getVL();
148 
149 #if BX_SUPPORT_X86_64
150   if (i->as64L()) {
151     Bit64u laddr = get_laddr64(i->seg(), eaddr);
152     for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
153       if (mask & (BX_CONST64(1)<<n)) {
154         if (! IsCanonical(laddr + n))
155            exception(int_number(i->seg()), 0);
156       }
157     }
158   }
159 #endif
160 
161   // see if you can successfully write all the elements first
162   for (int n=BYTE_ELEMENTS(len)-1; n >= 0; n--) {
163     if (mask & (BX_CONST64(1)<<n))
164        read_RMW_virtual_byte(i->seg(), eaddr + n);
165   }
166 
167   for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
168     if (mask & (BX_CONST64(1)<<n))
169        write_virtual_byte(i->seg(), eaddr + n, op->vmmubyte(n));
170   }
171 }
172 
avx_masked_store16(bxInstruction_c * i,bx_address eaddr,const BxPackedAvxRegister * op,Bit32u mask)173 void BX_CPU_C::avx_masked_store16(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
174 {
175   unsigned len = i->getVL();
176 
177 #if BX_SUPPORT_X86_64
178   if (i->as64L()) {
179     Bit64u laddr = get_laddr64(i->seg(), eaddr);
180     for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
181       if (mask & (1<<n)) {
182         if (! IsCanonical(laddr + 2*n))
183            exception(int_number(i->seg()), 0);
184       }
185     }
186   }
187 #endif
188 
189 #if BX_SUPPORT_ALIGNMENT_CHECK
190   unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
191   BX_CPU_THIS_PTR alignment_check_mask = 0;
192 #endif
193 
194   // see if you can successfully write all the elements first
195   for (int n=WORD_ELEMENTS(len)-1; n >= 0; n--) {
196     if (mask & (1<<n))
197        read_RMW_virtual_word(i->seg(), eaddr + 2*n);
198   }
199 
200   for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
201     if (mask & (1<<n))
202        write_virtual_word(i->seg(), eaddr + 2*n, op->vmm16u(n));
203   }
204 
205 #if BX_SUPPORT_ALIGNMENT_CHECK
206   BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
207 #endif
208 }
209 
avx_masked_store32(bxInstruction_c * i,bx_address eaddr,const BxPackedAvxRegister * op,Bit32u mask)210 void BX_CPU_C::avx_masked_store32(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
211 {
212   unsigned len = i->getVL();
213 
214 #if BX_SUPPORT_X86_64
215   if (i->as64L()) {
216     Bit64u laddr = get_laddr64(i->seg(), eaddr);
217     for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
218       if (mask & (1<<n)) {
219         if (! IsCanonical(laddr + 4*n))
220            exception(int_number(i->seg()), 0);
221       }
222     }
223   }
224 #endif
225 
226 #if BX_SUPPORT_ALIGNMENT_CHECK
227   unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
228   BX_CPU_THIS_PTR alignment_check_mask = 0;
229 #endif
230 
231   // see if you can successfully write all the elements first
232   for (int n=DWORD_ELEMENTS(len)-1; n >= 0; n--) {
233     if (mask & (1<<n))
234        read_RMW_virtual_dword(i->seg(), eaddr + 4*n);
235   }
236 
237   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
238     if (mask & (1<<n))
239        write_virtual_dword(i->seg(), eaddr + 4*n, op->vmm32u(n));
240   }
241 
242 #if BX_SUPPORT_ALIGNMENT_CHECK
243   BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
244 #endif
245 }
246 
avx_masked_store64(bxInstruction_c * i,bx_address eaddr,const BxPackedAvxRegister * op,Bit32u mask)247 void BX_CPU_C::avx_masked_store64(bxInstruction_c *i, bx_address eaddr, const BxPackedAvxRegister *op, Bit32u mask)
248 {
249   unsigned len = i->getVL();
250 
251 #if BX_SUPPORT_X86_64
252   if (i->as64L()) {
253     Bit64u laddr = get_laddr64(i->seg(), eaddr);
254     for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
255       if (mask & (1<<n)) {
256         if (! IsCanonical(laddr + 8*n))
257            exception(int_number(i->seg()), 0);
258       }
259     }
260   }
261 #endif
262 
263 #if BX_SUPPORT_ALIGNMENT_CHECK
264   unsigned save_alignment_check_mask = BX_CPU_THIS_PTR alignment_check_mask;
265   BX_CPU_THIS_PTR alignment_check_mask = 0;
266 #endif
267 
268   // see if you can successfully write all the elements first
269   for (int n=QWORD_ELEMENTS(len)-1; n >= 0; n--) {
270     if (mask & (1<<n))
271        read_RMW_virtual_qword(i->seg(), eaddr + 8*n);
272   }
273 
274   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
275     if (mask & (1<<n))
276        write_virtual_qword(i->seg(), eaddr + 8*n, op->vmm64u(n));
277   }
278 
279 #if BX_SUPPORT_ALIGNMENT_CHECK
280   BX_CPU_THIS_PTR alignment_check_mask = save_alignment_check_mask;
281 #endif
282 }
283 #endif // BX_SUPPORT_AVX
284 
285 #if BX_SUPPORT_EVEX
286 
287 #include "simd_int.h"
288 
avx512_write_regb_masked(bxInstruction_c * i,const BxPackedAvxRegister * op,unsigned len,Bit64u opmask)289 void BX_CPU_C::avx512_write_regb_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit64u opmask)
290 {
291   if (i->isZeroMasking()) {
292     for (unsigned n=0; n < len; n++, opmask >>= 16)
293       xmm_zero_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), (Bit32u) opmask);
294   }
295   else {
296     for (unsigned n=0; n < len; n++, opmask >>= 16)
297       xmm_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), (Bit32u) opmask);
298   }
299 
300   BX_CLEAR_AVX_REGZ(i->dst(), len);
301 }
302 
avx512_write_regw_masked(bxInstruction_c * i,const BxPackedAvxRegister * op,unsigned len,Bit32u opmask)303 void BX_CPU_C::avx512_write_regw_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
304 {
305   if (i->isZeroMasking()) {
306     for (unsigned n=0; n < len; n++, opmask >>= 8)
307       xmm_zero_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
308   }
309   else {
310     for (unsigned n=0; n < len; n++, opmask >>= 8)
311       xmm_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
312   }
313 
314   BX_CLEAR_AVX_REGZ(i->dst(), len);
315 }
316 
avx512_write_regd_masked(bxInstruction_c * i,const BxPackedAvxRegister * op,unsigned len,Bit32u opmask)317 void BX_CPU_C::avx512_write_regd_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
318 {
319   if (i->isZeroMasking()) {
320     for (unsigned n=0; n < len; n++, opmask >>= 4)
321       xmm_zero_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
322   }
323   else {
324     for (unsigned n=0; n < len; n++, opmask >>= 4)
325       xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
326   }
327 
328   BX_CLEAR_AVX_REGZ(i->dst(), len);
329 }
330 
avx512_write_regq_masked(bxInstruction_c * i,const BxPackedAvxRegister * op,unsigned len,Bit32u opmask)331 void BX_CPU_C::avx512_write_regq_masked(bxInstruction_c *i, const BxPackedAvxRegister *op, unsigned len, Bit32u opmask)
332 {
333   if (i->isZeroMasking()) {
334     for (unsigned n=0; n < len; n++, opmask >>= 2)
335       xmm_zero_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
336   }
337   else {
338     for (unsigned n=0; n < len; n++, opmask >>= 2)
339       xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op->vmm128(n), opmask);
340   }
341 
342   BX_CLEAR_AVX_REGZ(i->dst(), len);
343 }
344 
345 //////////////////////////
346 // masked register move //
347 //////////////////////////
348 
VMOVDQU8_MASK_VdqWdqR(bxInstruction_c * i)349 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU8_MASK_VdqWdqR(bxInstruction_c *i)
350 {
351   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
352   avx512_write_regb_masked(i, &op, i->getVL(), BX_READ_OPMASK(i->opmask()));
353   BX_NEXT_INSTR(i);
354 }
355 
VMOVDQU16_MASK_VdqWdqR(bxInstruction_c * i)356 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU16_MASK_VdqWdqR(bxInstruction_c *i)
357 {
358   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
359   avx512_write_regw_masked(i, &op, i->getVL(), BX_READ_32BIT_OPMASK(i->opmask()));
360   BX_NEXT_INSTR(i);
361 }
362 
VMOVAPS_MASK_VpsWpsR(bxInstruction_c * i)363 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsR(bxInstruction_c *i)
364 {
365   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
366   avx512_write_regd_masked(i, &op, i->getVL(), BX_READ_16BIT_OPMASK(i->opmask()));
367   BX_NEXT_INSTR(i);
368 }
369 
VMOVAPD_MASK_VpdWpdR(bxInstruction_c * i)370 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdR(bxInstruction_c *i)
371 {
372   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
373   avx512_write_regq_masked(i, &op, i->getVL(), BX_READ_8BIT_OPMASK(i->opmask()));
374   BX_NEXT_INSTR(i);
375 }
376 
377 ////////////////////////////////////////
378 // masked packed load/store - aligned //
379 ////////////////////////////////////////
380 
VMOVAPS_MASK_VpsWpsM(bxInstruction_c * i)381 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_VpsWpsM(bxInstruction_c *i)
382 {
383   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
384   bx_address laddr = get_laddr(i->seg(), eaddr);
385 
386   unsigned len = i->getVL(), len_in_bytes = BYTE_ELEMENTS(len);
387   if (laddr & (len_in_bytes-1)) {
388     BX_ERROR(("AVX masked read len=%d: #GP misaligned access", len_in_bytes));
389     exception(BX_GP_EXCEPTION, 0);
390   }
391 
392   BxPackedAvxRegister reg;
393   unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
394   avx_masked_load32(i, eaddr, &reg, mask);
395 
396   if (i->isZeroMasking()) {
397     BX_WRITE_AVX_REGZ(i->dst(), reg, len);
398   }
399   else {
400     for (unsigned n=0; n < len; n++, mask >>= 4)
401       xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &reg.vmm128(n), mask);
402 
403     BX_CLEAR_AVX_REGZ(i->dst(), len);
404   }
405 
406   BX_NEXT_INSTR(i);
407 }
408 
VMOVAPD_MASK_VpdWpdM(bxInstruction_c * i)409 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_VpdWpdM(bxInstruction_c *i)
410 {
411   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
412   bx_address laddr = get_laddr(i->seg(), eaddr);
413 
414   unsigned len = i->getVL(), len_in_bytes = BYTE_ELEMENTS(len);
415   if (laddr & (len_in_bytes-1)) {
416     BX_ERROR(("AVX masked read len=%d: #GP misaligned access", len_in_bytes));
417     exception(BX_GP_EXCEPTION, 0);
418   }
419 
420   BxPackedAvxRegister reg;
421   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
422   avx_masked_load64(i, eaddr, &reg, mask);
423 
424   if (i->isZeroMasking()) {
425     BX_WRITE_AVX_REGZ(i->dst(), reg, len);
426   }
427   else {
428     for (unsigned n=0; n < len; n++, mask >>= 2)
429       xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &reg.vmm128(n), mask);
430 
431     BX_CLEAR_AVX_REGZ(i->dst(), len);
432   }
433 
434   BX_NEXT_INSTR(i);
435 }
436 
VMOVAPS_MASK_WpsVpsM(bxInstruction_c * i)437 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPS_MASK_WpsVpsM(bxInstruction_c *i)
438 {
439   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
440   bx_address laddr = get_laddr(i->seg(), eaddr);
441 
442   unsigned len_in_bytes = BYTE_ELEMENTS(i->getVL());
443   if (laddr & (len_in_bytes-1)) {
444     BX_ERROR(("AVX masked write len=%d: #GP misaligned access", len_in_bytes));
445     exception(BX_GP_EXCEPTION, 0);
446   }
447 
448   avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_16BIT_OPMASK(i->opmask()));
449 
450   BX_NEXT_INSTR(i);
451 }
452 
VMOVAPD_MASK_WpdVpdM(bxInstruction_c * i)453 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVAPD_MASK_WpdVpdM(bxInstruction_c *i)
454 {
455   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
456   bx_address laddr = get_laddr(i->seg(), eaddr);
457 
458   unsigned len_in_bytes = BYTE_ELEMENTS(i->getVL());
459   if (laddr & (len_in_bytes-1)) {
460     BX_ERROR(("AVX masked write len=%d: #GP misaligned access", len_in_bytes));
461     exception(BX_GP_EXCEPTION, 0);
462   }
463 
464   avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_8BIT_OPMASK(i->opmask()));
465 
466   BX_NEXT_INSTR(i);
467 }
468 
469 //////////////////////////////
470 // masked packed load/store //
471 //////////////////////////////
472 
VMOVDQU8_MASK_VdqWdqM(bxInstruction_c * i)473 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU8_MASK_VdqWdqM(bxInstruction_c *i)
474 {
475   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
476   unsigned len = i->getVL();
477 
478   BxPackedAvxRegister reg;
479   Bit64u mask = BX_READ_OPMASK(i->opmask());
480   avx_masked_load8(i, eaddr, &reg, mask);
481 
482   if (i->isZeroMasking()) {
483     BX_WRITE_AVX_REGZ(i->dst(), reg, len);
484   }
485   else {
486     for (unsigned n=0; n < len; n++, mask >>= 16)
487       xmm_pblendb(&BX_READ_AVX_REG_LANE(i->dst(), n), &reg.vmm128(n), mask);
488 
489     BX_CLEAR_AVX_REGZ(i->dst(), len);
490   }
491 
492   BX_NEXT_INSTR(i);
493 }
494 
VMOVDQU16_MASK_VdqWdqM(bxInstruction_c * i)495 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU16_MASK_VdqWdqM(bxInstruction_c *i)
496 {
497   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
498   unsigned len = i->getVL();
499 
500   BxPackedAvxRegister reg;
501   Bit32u mask = BX_READ_32BIT_OPMASK(i->opmask());
502   avx_masked_load16(i, eaddr, &reg, mask);
503 
504   if (i->isZeroMasking()) {
505     BX_WRITE_AVX_REGZ(i->dst(), reg, len);
506   }
507   else {
508     for (unsigned n=0; n < len; n++, mask >>= 8)
509       xmm_pblendw(&BX_READ_AVX_REG_LANE(i->dst(), n), &reg.vmm128(n), mask);
510 
511     BX_CLEAR_AVX_REGZ(i->dst(), len);
512   }
513 
514   BX_NEXT_INSTR(i);
515 }
516 
VMOVUPS_MASK_VpsWpsM(bxInstruction_c * i)517 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_MASK_VpsWpsM(bxInstruction_c *i)
518 {
519   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
520   unsigned len = i->getVL();
521 
522   BxPackedAvxRegister reg;
523   unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
524   avx_masked_load32(i, eaddr, &reg, mask);
525 
526   if (i->isZeroMasking()) {
527     BX_WRITE_AVX_REGZ(i->dst(), reg, len);
528   }
529   else {
530     for (unsigned n=0; n < len; n++, mask >>= 4)
531       xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &reg.vmm128(n), mask);
532 
533     BX_CLEAR_AVX_REGZ(i->dst(), len);
534   }
535 
536   BX_NEXT_INSTR(i);
537 }
538 
VMOVUPD_MASK_VpdWpdM(bxInstruction_c * i)539 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPD_MASK_VpdWpdM(bxInstruction_c *i)
540 {
541   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
542   unsigned len = i->getVL();
543 
544   BxPackedAvxRegister reg;
545   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
546   avx_masked_load64(i, eaddr, &reg, mask);
547 
548   if (i->isZeroMasking()) {
549     BX_WRITE_AVX_REGZ(i->dst(), reg, len);
550   }
551   else {
552     for (unsigned n=0; n < len; n++, mask >>= 2)
553       xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &reg.vmm128(n), mask);
554 
555     BX_CLEAR_AVX_REGZ(i->dst(), len);
556   }
557 
558   BX_NEXT_INSTR(i);
559 }
560 
VMOVDQU8_MASK_WdqVdqM(bxInstruction_c * i)561 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU8_MASK_WdqVdqM(bxInstruction_c *i)
562 {
563   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
564   avx_masked_store8(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_OPMASK(i->opmask()));
565   BX_NEXT_INSTR(i);
566 }
567 
VMOVDQU16_MASK_WdqVdqM(bxInstruction_c * i)568 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVDQU16_MASK_WdqVdqM(bxInstruction_c *i)
569 {
570   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
571   avx_masked_store16(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_32BIT_OPMASK(i->opmask()));
572   BX_NEXT_INSTR(i);
573 }
574 
VMOVUPS_MASK_WpsVpsM(bxInstruction_c * i)575 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPS_MASK_WpsVpsM(bxInstruction_c *i)
576 {
577   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
578   avx_masked_store32(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_16BIT_OPMASK(i->opmask()));
579   BX_NEXT_INSTR(i);
580 }
581 
VMOVUPD_MASK_WpdVpdM(bxInstruction_c * i)582 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVUPD_MASK_WpdVpdM(bxInstruction_c *i)
583 {
584   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
585   avx_masked_store64(i, eaddr, &BX_READ_AVX_REG(i->src()), BX_READ_8BIT_OPMASK(i->opmask()));
586   BX_NEXT_INSTR(i);
587 }
588 
589 //////////////////////////////
590 // masked scalar load/store //
591 //////////////////////////////
592 
VMOVSD_MASK_VsdWsdM(bxInstruction_c * i)593 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_VsdWsdM(bxInstruction_c *i)
594 {
595   BxPackedXmmRegister op;
596 
597   op.xmm64u(1) = 0;
598 
599   if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
600     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
601     op.xmm64u(0) = read_virtual_qword(i->seg(), eaddr);
602   }
603   else {
604     if (! i->isZeroMasking()) {
605       op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
606     }
607     else {
608       op.xmm64u(0) = 0;
609     }
610   }
611 
612   BX_WRITE_XMM_REGZ(i->dst(), op, i->getVL());
613   BX_NEXT_INSTR(i);
614 }
615 
VMOVSS_MASK_VssWssM(bxInstruction_c * i)616 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssWssM(bxInstruction_c *i)
617 {
618   BxPackedXmmRegister op;
619 
620   op.xmm64u(1) = 0;
621 
622   if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
623     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
624     op.xmm64u(0) = (Bit64u) read_virtual_dword(i->seg(), eaddr);
625   }
626   else {
627     if (! i->isZeroMasking()) {
628       op.xmm64u(0) = (Bit64u) BX_READ_XMM_REG_LO_DWORD(i->dst());
629     }
630     else {
631       op.xmm64u(0) = 0;
632     }
633   }
634 
635   BX_WRITE_XMM_REGZ(i->dst(), op, i->getVL());
636   BX_NEXT_INSTR(i);
637 }
638 
VMOVSD_MASK_WsdVsdM(bxInstruction_c * i)639 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_WsdVsdM(bxInstruction_c *i)
640 {
641   if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
642     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
643     write_virtual_qword(i->seg(), eaddr, BX_READ_XMM_REG_LO_QWORD(i->src()));
644   }
645 
646   BX_NEXT_INSTR(i);
647 }
648 
VMOVSS_MASK_WssVssM(bxInstruction_c * i)649 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_WssVssM(bxInstruction_c *i)
650 {
651   if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
652     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
653     write_virtual_dword(i->seg(), eaddr, BX_READ_XMM_REG_LO_DWORD(i->src()));
654   }
655 
656   BX_NEXT_INSTR(i);
657 }
658 
VMOVSD_MASK_VsdHpdWsdR(bxInstruction_c * i)659 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
660 {
661   BxPackedXmmRegister op;
662 
663   if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
664     op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->src2());
665   }
666   else {
667     if (! i->isZeroMasking()) {
668       op.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
669     }
670     else {
671       op.xmm64u(0) = 0;
672     }
673   }
674   op.xmm64u(1) = BX_READ_XMM_REG_HI_QWORD(i->src1());
675 
676   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
677 
678   BX_NEXT_INSTR(i);
679 }
680 
VMOVSS_MASK_VssHpsWssR(bxInstruction_c * i)681 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssHpsWssR(bxInstruction_c *i)
682 {
683   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src1());
684 
685   if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
686     op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->src2());
687   }
688   else {
689     if (! i->isZeroMasking()) {
690       op.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
691     }
692     else {
693       op.xmm32u(0) = 0;
694     }
695   }
696 
697   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op);
698 
699   BX_NEXT_INSTR(i);
700 }
701 
702 ////////////////////////////////////
703 // masked store with down convert //
704 ////////////////////////////////////
705 
706 // quad-word to byte
VPMOVQB_MASK_WdqVdqM(bxInstruction_c * i)707 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_MASK_WdqVdqM(bxInstruction_c *i)
708 {
709   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
710   unsigned len = i->getVL();
711 
712   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
713     dst.vmmubyte(n) = (Bit8u) src.vmm64u(n);
714   }
715 
716   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
717   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
718 
719   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
720   avx_masked_store8(i, eaddr, &dst, opmask);
721 
722   BX_NEXT_INSTR(i);
723 }
724 
VPMOVQB_WdqVdqR(bxInstruction_c * i)725 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_WdqVdqR(bxInstruction_c *i)
726 {
727   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
728   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
729   unsigned len = i->getVL();
730 
731   dst.xmm64u(1) = 0;
732 
733   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
734     dst.xmmubyte(n) = (Bit8u) src.vmm64u(n);
735   }
736 
737   if (len == BX_VL128) dst.xmm16u(1) = 0;
738   if (len != BX_VL512) dst.xmm32u(1) = 0;
739 
740   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
741   BX_NEXT_INSTR(i);
742 }
743 
VPMOVQB_MASK_WdqVdqR(bxInstruction_c * i)744 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQB_MASK_WdqVdqR(bxInstruction_c *i)
745 {
746   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
747   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
748   unsigned len = i->getVL();
749 
750   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
751 
752   dst.xmm64u(1) = 0;
753 
754   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
755     if (mask & 0x1)
756       dst.xmmubyte(n) = (Bit8u) src.vmm64u(n);
757     else
758       if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
759   }
760 
761   if (len == BX_VL128) dst.xmm16u(1) = 0;
762   if (len != BX_VL512) dst.xmm32u(1) = 0;
763 
764   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
765   BX_NEXT_INSTR(i);
766 }
767 
VPMOVSQB_MASK_WdqVdqM(bxInstruction_c * i)768 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_MASK_WdqVdqM(bxInstruction_c *i)
769 {
770   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
771   unsigned len = i->getVL();
772 
773   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
774     dst.vmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
775   }
776 
777   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
778   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
779 
780   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
781   avx_masked_store8(i, eaddr, &dst, opmask);
782 
783   BX_NEXT_INSTR(i);
784 }
785 
VPMOVSQB_WdqVdqR(bxInstruction_c * i)786 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_WdqVdqR(bxInstruction_c *i)
787 {
788   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
789   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
790   unsigned len = i->getVL();
791 
792   dst.xmm64u(1) = 0;
793 
794   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
795     dst.xmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
796   }
797 
798   if (len == BX_VL128) dst.xmm16u(1) = 0;
799   if (len != BX_VL512) dst.xmm32u(1) = 0;
800 
801   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
802   BX_NEXT_INSTR(i);
803 }
804 
VPMOVSQB_MASK_WdqVdqR(bxInstruction_c * i)805 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQB_MASK_WdqVdqR(bxInstruction_c *i)
806 {
807   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
808   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
809   unsigned len = i->getVL();
810 
811   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
812 
813   dst.xmm64u(1) = 0;
814 
815   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
816     if (mask & 0x1)
817       dst.xmmsbyte(n) = SaturateQwordSToByteS(src.vmm64s(n));
818     else
819       if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
820   }
821 
822   if (len == BX_VL128) dst.xmm16u(1) = 0;
823   if (len != BX_VL512) dst.xmm32u(1) = 0;
824 
825   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
826   BX_NEXT_INSTR(i);
827 }
828 
VPMOVUSQB_MASK_WdqVdqM(bxInstruction_c * i)829 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_MASK_WdqVdqM(bxInstruction_c *i)
830 {
831   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
832   unsigned len = i->getVL();
833 
834   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
835     dst.vmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
836   }
837 
838   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
839   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
840 
841   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
842   avx_masked_store8(i, eaddr, &dst, opmask);
843   BX_NEXT_INSTR(i);
844 }
845 
VPMOVUSQB_WdqVdqR(bxInstruction_c * i)846 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_WdqVdqR(bxInstruction_c *i)
847 {
848   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
849   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
850   unsigned len = i->getVL();
851 
852   dst.xmm64u(1) = 0;
853 
854   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
855     dst.xmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
856   }
857 
858   if (len == BX_VL128) dst.xmm16u(1) = 0;
859   if (len != BX_VL512) dst.xmm32u(1) = 0;
860 
861   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
862   BX_NEXT_INSTR(i);
863 }
864 
VPMOVUSQB_MASK_WdqVdqR(bxInstruction_c * i)865 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQB_MASK_WdqVdqR(bxInstruction_c *i)
866 {
867   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
868   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
869   unsigned len = i->getVL();
870 
871   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
872 
873   dst.xmm64u(1) = 0;
874 
875   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
876     if (mask & 0x1)
877       dst.xmmubyte(n) = SaturateQwordUToByteU(src.vmm64u(n));
878     else
879       if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
880   }
881 
882   if (len == BX_VL128) dst.xmm16u(1) = 0;
883   if (len != BX_VL512) dst.xmm32u(1) = 0;
884 
885   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
886   BX_NEXT_INSTR(i);
887 }
888 
889 // double-word to byte
VPMOVDB_MASK_WdqVdqM(bxInstruction_c * i)890 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_MASK_WdqVdqM(bxInstruction_c *i)
891 {
892   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
893   unsigned len = i->getVL();
894 
895   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
896     dst.vmmubyte(n) = (Bit8u) src.vmm32u(n);
897   }
898 
899   Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
900   opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
901 
902   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
903   avx_masked_store8(i, eaddr, &dst, opmask);
904 
905   BX_NEXT_INSTR(i);
906 }
907 
VPMOVDB_WdqVdqR(bxInstruction_c * i)908 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_WdqVdqR(bxInstruction_c *i)
909 {
910   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
911   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
912   unsigned len = i->getVL();
913 
914   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
915     dst.xmmubyte(n) = (Bit8u) src.vmm32u(n);
916   }
917 
918   if (len == BX_VL128) dst.xmm32u(1) = 0;
919   if (len != BX_VL512) dst.xmm64u(1) = 0;
920 
921   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
922   BX_NEXT_INSTR(i);
923 }
924 
VPMOVDB_MASK_WdqVdqR(bxInstruction_c * i)925 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDB_MASK_WdqVdqR(bxInstruction_c *i)
926 {
927   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
928   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
929   unsigned len = i->getVL();
930 
931   unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
932 
933   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
934     if (mask & 0x1)
935       dst.xmmubyte(n) = (Bit8u) src.vmm32u(n);
936     else
937       if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
938   }
939 
940   if (len == BX_VL128) dst.xmm32u(1) = 0;
941   if (len != BX_VL512) dst.xmm64u(1) = 0;
942 
943   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
944   BX_NEXT_INSTR(i);
945 }
946 
VPMOVSDB_MASK_WdqVdqM(bxInstruction_c * i)947 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_MASK_WdqVdqM(bxInstruction_c *i)
948 {
949   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
950   unsigned len = i->getVL();
951 
952   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
953     dst.vmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
954   }
955 
956   Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
957   opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
958 
959   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
960   avx_masked_store8(i, eaddr, &dst, opmask);
961 
962   BX_NEXT_INSTR(i);
963 }
964 
VPMOVSDB_WdqVdqR(bxInstruction_c * i)965 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_WdqVdqR(bxInstruction_c *i)
966 {
967   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
968   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
969   unsigned len = i->getVL();
970 
971   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
972     dst.xmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
973   }
974 
975   if (len == BX_VL128) dst.xmm32u(1) = 0;
976   if (len != BX_VL512) dst.xmm64u(1) = 0;
977 
978   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
979   BX_NEXT_INSTR(i);
980 }
981 
VPMOVSDB_MASK_WdqVdqR(bxInstruction_c * i)982 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDB_MASK_WdqVdqR(bxInstruction_c *i)
983 {
984   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
985   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
986   unsigned len = i->getVL();
987 
988   unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
989 
990   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
991     if (mask & 0x1)
992       dst.xmmsbyte(n) = SaturateDwordSToByteS(src.vmm32s(n));
993     else
994       if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
995   }
996 
997   if (len == BX_VL128) dst.xmm32u(1) = 0;
998   if (len != BX_VL512) dst.xmm64u(1) = 0;
999 
1000   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1001   BX_NEXT_INSTR(i);
1002 }
1003 
VPMOVUSDB_MASK_WdqVdqM(bxInstruction_c * i)1004 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_MASK_WdqVdqM(bxInstruction_c *i)
1005 {
1006   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1007   unsigned len = i->getVL();
1008 
1009   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1010     dst.vmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
1011   }
1012 
1013   Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1014   opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
1015 
1016   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1017   avx_masked_store8(i, eaddr, &dst, opmask);
1018 
1019   BX_NEXT_INSTR(i);
1020 }
1021 
VPMOVUSDB_WdqVdqR(bxInstruction_c * i)1022 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_WdqVdqR(bxInstruction_c *i)
1023 {
1024   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1025   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1026   unsigned len = i->getVL();
1027 
1028   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1029     dst.xmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
1030   }
1031 
1032   if (len == BX_VL128) dst.xmm32u(1) = 0;
1033   if (len != BX_VL512) dst.xmm64u(1) = 0;
1034 
1035   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1036   BX_NEXT_INSTR(i);
1037 }
1038 
VPMOVUSDB_MASK_WdqVdqR(bxInstruction_c * i)1039 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDB_MASK_WdqVdqR(bxInstruction_c *i)
1040 {
1041   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1042   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1043   unsigned len = i->getVL();
1044 
1045   unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
1046 
1047   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
1048     if (mask & 0x1)
1049       dst.xmmubyte(n) = SaturateDwordUToByteU(src.vmm32u(n));
1050     else
1051       if (i->isZeroMasking()) dst.xmmubyte(n) = 0;
1052   }
1053 
1054   if (len == BX_VL128) dst.xmm32u(1) = 0;
1055   if (len != BX_VL512) dst.xmm64u(1) = 0;
1056 
1057   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1058   BX_NEXT_INSTR(i);
1059 }
1060 
1061 // word to byte
VPMOVWB_MASK_WdqVdqM(bxInstruction_c * i)1062 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVWB_MASK_WdqVdqM(bxInstruction_c *i)
1063 {
1064   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1065   unsigned len = i->getVL();
1066 
1067   for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1068     dst.vmmubyte(n) = (Bit8u) src.vmm16u(n);
1069   }
1070 
1071   Bit32u opmask = i->opmask() ? BX_READ_32BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1072   opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len));
1073 
1074   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1075   avx_masked_store8(i, eaddr, &dst, opmask);
1076 
1077   BX_NEXT_INSTR(i);
1078 }
1079 
VPMOVWB_WdqVdqR(bxInstruction_c * i)1080 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVWB_WdqVdqR(bxInstruction_c *i)
1081 {
1082   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1083   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1084   unsigned len = i->getVL();
1085 
1086   for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1087     dst.ymmubyte(n) = (Bit8u) src.vmm16u(n);
1088   }
1089 
1090   if (len == BX_VL128) dst.ymm64u(1) = 0;
1091   if (len != BX_VL512) dst.ymm128(1).clear();
1092 
1093   BX_WRITE_YMM_REGZ(i->dst(), dst);
1094   BX_NEXT_INSTR(i);
1095 }
1096 
VPMOVWB_MASK_WdqVdqR(bxInstruction_c * i)1097 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVWB_MASK_WdqVdqR(bxInstruction_c *i)
1098 {
1099   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1100   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1101   unsigned len = i->getVL();
1102 
1103   Bit32u mask = BX_READ_32BIT_OPMASK(i->opmask());
1104 
1105   for (unsigned n=0; n < WORD_ELEMENTS(len); n++, mask >>= 1) {
1106     if (mask & 0x1)
1107       dst.ymmubyte(n) = (Bit8u) src.vmm16u(n);
1108     else
1109       if (i->isZeroMasking()) dst.ymmubyte(n) = 0;
1110   }
1111 
1112   if (len == BX_VL128) dst.ymm64u(1) = 0;
1113   if (len != BX_VL512) dst.ymm128(1).clear();
1114 
1115   BX_WRITE_YMM_REGZ(i->dst(), dst);
1116   BX_NEXT_INSTR(i);
1117 }
1118 
VPMOVSWB_MASK_WdqVdqM(bxInstruction_c * i)1119 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSWB_MASK_WdqVdqM(bxInstruction_c *i)
1120 {
1121   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1122   unsigned len = i->getVL();
1123 
1124   for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1125     dst.vmmsbyte(n) = SaturateWordSToByteS(src.vmm16s(n));
1126   }
1127 
1128   Bit32u opmask = i->opmask() ? BX_READ_32BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1129   opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len));
1130 
1131   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1132   avx_masked_store8(i, eaddr, &dst, opmask);
1133 
1134   BX_NEXT_INSTR(i);
1135 }
1136 
VPMOVSWB_WdqVdqR(bxInstruction_c * i)1137 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSWB_WdqVdqR(bxInstruction_c *i)
1138 {
1139   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1140   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1141   unsigned len = i->getVL();
1142 
1143   for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1144     dst.ymmsbyte(n) = SaturateWordSToByteS(src.vmm16s(n));
1145   }
1146 
1147   if (len == BX_VL128) dst.ymm64u(1) = 0;
1148   if (len != BX_VL512) dst.ymm128(1).clear();
1149 
1150   BX_WRITE_YMM_REGZ(i->dst(), dst);
1151   BX_NEXT_INSTR(i);
1152 }
1153 
VPMOVSWB_MASK_WdqVdqR(bxInstruction_c * i)1154 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSWB_MASK_WdqVdqR(bxInstruction_c *i)
1155 {
1156   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1157   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1158   unsigned len = i->getVL();
1159 
1160   Bit32u mask = BX_READ_32BIT_OPMASK(i->opmask());
1161 
1162   for (unsigned n=0; n < WORD_ELEMENTS(len); n++, mask >>= 1) {
1163     if (mask & 0x1)
1164       dst.ymmsbyte(n) = SaturateWordSToByteS(src.vmm16s(n));
1165     else
1166       if (i->isZeroMasking()) dst.ymmubyte(n) = 0;
1167   }
1168 
1169   if (len == BX_VL128) dst.ymm64u(1) = 0;
1170   if (len != BX_VL512) dst.ymm128(1).clear();
1171 
1172   BX_WRITE_YMM_REGZ(i->dst(), dst);
1173   BX_NEXT_INSTR(i);
1174 }
1175 
VPMOVUSWB_MASK_WdqVdqM(bxInstruction_c * i)1176 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSWB_MASK_WdqVdqM(bxInstruction_c *i)
1177 {
1178   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1179   unsigned len = i->getVL();
1180 
1181   for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1182     dst.vmmubyte(n) = SaturateWordUToByteU(src.vmm16u(n));
1183   }
1184 
1185   Bit32u opmask = i->opmask() ? BX_READ_32BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1186   opmask &= CUT_OPMASK_TO(WORD_ELEMENTS(len));
1187 
1188   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1189   avx_masked_store8(i, eaddr, &dst, opmask);
1190 
1191   BX_NEXT_INSTR(i);
1192 }
1193 
VPMOVUSWB_WdqVdqR(bxInstruction_c * i)1194 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSWB_WdqVdqR(bxInstruction_c *i)
1195 {
1196   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1197   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1198   unsigned len = i->getVL();
1199 
1200   for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
1201     dst.ymmubyte(n) = SaturateWordUToByteU(src.vmm16u(n));
1202   }
1203 
1204   if (len == BX_VL128) dst.ymm64u(1) = 0;
1205   if (len != BX_VL512) dst.ymm128(1).clear();
1206 
1207   BX_WRITE_YMM_REGZ(i->dst(), dst);
1208   BX_NEXT_INSTR(i);
1209 }
1210 
VPMOVUSWB_MASK_WdqVdqR(bxInstruction_c * i)1211 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSWB_MASK_WdqVdqR(bxInstruction_c *i)
1212 {
1213   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1214   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1215   unsigned len = i->getVL();
1216 
1217   Bit32u mask = BX_READ_32BIT_OPMASK(i->opmask());
1218 
1219   for (unsigned n=0; n < WORD_ELEMENTS(len); n++, mask >>= 1) {
1220     if (mask & 0x1)
1221       dst.ymmubyte(n) = SaturateWordUToByteU(src.vmm16u(n));
1222     else
1223       if (i->isZeroMasking()) dst.ymmubyte(n) = 0;
1224   }
1225 
1226   if (len == BX_VL128) dst.ymm64u(1) = 0;
1227   if (len != BX_VL512) dst.ymm128(1).clear();
1228 
1229   BX_WRITE_YMM_REGZ(i->dst(), dst);
1230   BX_NEXT_INSTR(i);
1231 }
1232 
1233 // double-word to word
VPMOVDW_MASK_WdqVdqM(bxInstruction_c * i)1234 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_MASK_WdqVdqM(bxInstruction_c *i)
1235 {
1236   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1237   unsigned len = i->getVL();
1238 
1239   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1240     dst.vmm16u(n) = (Bit16u) src.vmm32u(n);
1241   }
1242 
1243   Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1244   opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
1245 
1246   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1247   avx_masked_store16(i, eaddr, &dst, opmask);
1248 
1249   BX_NEXT_INSTR(i);
1250 }
1251 
VPMOVDW_WdqVdqR(bxInstruction_c * i)1252 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_WdqVdqR(bxInstruction_c *i)
1253 {
1254   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1255   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1256   unsigned len = i->getVL();
1257 
1258   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1259     dst.ymm16u(n) = (Bit16u) src.vmm32u(n);
1260   }
1261 
1262   if (len == BX_VL128) dst.ymm64u(1) = 0;
1263   if (len != BX_VL512) dst.ymm128(1).clear();
1264 
1265   BX_WRITE_YMM_REGZ(i->dst(), dst);
1266   BX_NEXT_INSTR(i);
1267 }
1268 
VPMOVDW_MASK_WdqVdqR(bxInstruction_c * i)1269 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVDW_MASK_WdqVdqR(bxInstruction_c *i)
1270 {
1271   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1272   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1273   unsigned len = i->getVL();
1274 
1275   unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
1276 
1277   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
1278     if (mask & 0x1)
1279       dst.ymm16u(n) = (Bit16u) src.vmm32u(n);
1280     else
1281       if (i->isZeroMasking()) dst.ymm16u(n) = 0;
1282   }
1283 
1284   if (len == BX_VL128) dst.ymm64u(1) = 0;
1285   if (len != BX_VL512) dst.ymm128(1).clear();
1286 
1287   BX_WRITE_YMM_REGZ(i->dst(), dst);
1288   BX_NEXT_INSTR(i);
1289 }
1290 
VPMOVSDW_MASK_WdqVdqM(bxInstruction_c * i)1291 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_MASK_WdqVdqM(bxInstruction_c *i)
1292 {
1293   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1294   unsigned len = i->getVL();
1295 
1296   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1297     dst.vmm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
1298   }
1299 
1300   Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1301   opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
1302 
1303   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1304   avx_masked_store16(i, eaddr, &dst, opmask);
1305 
1306   BX_NEXT_INSTR(i);
1307 }
1308 
VPMOVSDW_WdqVdqR(bxInstruction_c * i)1309 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_WdqVdqR(bxInstruction_c *i)
1310 {
1311   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1312   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1313   unsigned len = i->getVL();
1314 
1315   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1316     dst.ymm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
1317   }
1318 
1319   if (len == BX_VL128) dst.ymm64u(1) = 0;
1320   if (len != BX_VL512) dst.ymm128(1).clear();
1321 
1322   BX_WRITE_YMM_REGZ(i->dst(), dst);
1323   BX_NEXT_INSTR(i);
1324 }
1325 
VPMOVSDW_MASK_WdqVdqR(bxInstruction_c * i)1326 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSDW_MASK_WdqVdqR(bxInstruction_c *i)
1327 {
1328   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1329   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1330   unsigned len = i->getVL();
1331 
1332   unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
1333 
1334   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
1335     if (mask & 0x1)
1336       dst.ymm16s(n) = SaturateDwordSToWordS(src.vmm32s(n));
1337     else
1338       if (i->isZeroMasking()) dst.ymm16u(n) = 0;
1339   }
1340 
1341   if (len == BX_VL128) dst.ymm64u(1) = 0;
1342   if (len != BX_VL512) dst.ymm128(1).clear();
1343 
1344   BX_WRITE_YMM_REGZ(i->dst(), dst);
1345   BX_NEXT_INSTR(i);
1346 }
1347 
VPMOVUSDW_MASK_WdqVdqM(bxInstruction_c * i)1348 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_MASK_WdqVdqM(bxInstruction_c *i)
1349 {
1350   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1351   unsigned len = i->getVL();
1352 
1353   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1354     dst.vmm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
1355   }
1356 
1357   Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1358   opmask &= CUT_OPMASK_TO(DWORD_ELEMENTS(len));
1359 
1360   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1361   avx_masked_store16(i, eaddr, &dst, opmask);
1362 
1363   BX_NEXT_INSTR(i);
1364 }
1365 
VPMOVUSDW_WdqVdqR(bxInstruction_c * i)1366 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_WdqVdqR(bxInstruction_c *i)
1367 {
1368   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1369   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1370   unsigned len = i->getVL();
1371 
1372   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
1373     dst.ymm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
1374   }
1375 
1376   if (len == BX_VL128) dst.ymm64u(1) = 0;
1377   if (len != BX_VL512) dst.ymm128(1).clear();
1378 
1379   BX_WRITE_YMM_REGZ(i->dst(), dst);
1380   BX_NEXT_INSTR(i);
1381 }
1382 
VPMOVUSDW_MASK_WdqVdqR(bxInstruction_c * i)1383 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSDW_MASK_WdqVdqR(bxInstruction_c *i)
1384 {
1385   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1386   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1387   unsigned len = i->getVL();
1388 
1389   unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
1390 
1391   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++, mask >>= 1) {
1392     if (mask & 0x1)
1393       dst.ymm16u(n) = SaturateDwordUToWordU(src.vmm32u(n));
1394     else
1395       if (i->isZeroMasking()) dst.ymm16u(n) = 0;
1396   }
1397 
1398   if (len == BX_VL128) dst.ymm64u(1) = 0;
1399   if (len != BX_VL512) dst.ymm128(1).clear();
1400 
1401   BX_WRITE_YMM_REGZ(i->dst(), dst);
1402   BX_NEXT_INSTR(i);
1403 }
1404 
1405 // quad-word to word
VPMOVQW_MASK_WdqVdqM(bxInstruction_c * i)1406 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_MASK_WdqVdqM(bxInstruction_c *i)
1407 {
1408   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1409   unsigned len = i->getVL();
1410 
1411   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1412     dst.vmm16u(n) = (Bit16u) src.vmm64u(n);
1413   }
1414 
1415   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1416   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1417 
1418   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1419   avx_masked_store16(i, eaddr, &dst, opmask);
1420 
1421   BX_NEXT_INSTR(i);
1422 }
1423 
VPMOVQW_WdqVdqR(bxInstruction_c * i)1424 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_WdqVdqR(bxInstruction_c *i)
1425 {
1426   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1427   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1428   unsigned len = i->getVL();
1429 
1430   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1431     dst.xmm16u(n) = (Bit16u) src.vmm64u(n);
1432   }
1433 
1434   if (len == BX_VL128) dst.xmm32u(1) = 0;
1435   if (len != BX_VL512) dst.xmm64u(1) = 0;
1436 
1437   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1438   BX_NEXT_INSTR(i);
1439 }
1440 
VPMOVQW_MASK_WdqVdqR(bxInstruction_c * i)1441 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQW_MASK_WdqVdqR(bxInstruction_c *i)
1442 {
1443   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1444   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1445   unsigned len = i->getVL();
1446 
1447   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1448 
1449   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1450     if (mask & 0x1)
1451       dst.xmm16u(n) = (Bit16u) src.vmm64u(n);
1452     else
1453       if (i->isZeroMasking()) dst.xmm16u(n) = 0;
1454   }
1455 
1456   if (len == BX_VL128) dst.xmm32u(1) = 0;
1457   if (len != BX_VL512) dst.xmm64u(1) = 0;
1458 
1459   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1460   BX_NEXT_INSTR(i);
1461 }
1462 
VPMOVSQW_MASK_WdqVdqM(bxInstruction_c * i)1463 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_MASK_WdqVdqM(bxInstruction_c *i)
1464 {
1465   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1466   unsigned len = i->getVL();
1467 
1468   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1469     dst.vmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
1470   }
1471 
1472   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1473   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1474 
1475   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1476   avx_masked_store16(i, eaddr, &dst, opmask);
1477 
1478   BX_NEXT_INSTR(i);
1479 }
1480 
VPMOVSQW_WdqVdqR(bxInstruction_c * i)1481 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_WdqVdqR(bxInstruction_c *i)
1482 {
1483   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1484   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1485   unsigned len = i->getVL();
1486 
1487   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1488     dst.xmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
1489   }
1490 
1491   if (len == BX_VL128) dst.xmm32u(1) = 0;
1492   if (len != BX_VL512) dst.xmm64u(1) = 0;
1493 
1494   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1495   BX_NEXT_INSTR(i);
1496 }
1497 
VPMOVSQW_MASK_WdqVdqR(bxInstruction_c * i)1498 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQW_MASK_WdqVdqR(bxInstruction_c *i)
1499 {
1500   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1501   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1502   unsigned len = i->getVL();
1503 
1504   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1505 
1506   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1507     if (mask & 0x1)
1508       dst.xmm16s(n) = SaturateQwordSToWordS(src.vmm64s(n));
1509     else
1510       if (i->isZeroMasking()) dst.xmm16u(n) = 0;
1511   }
1512 
1513   if (len == BX_VL128) dst.xmm32u(1) = 0;
1514   if (len != BX_VL512) dst.xmm64u(1) = 0;
1515 
1516   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1517   BX_NEXT_INSTR(i);
1518 }
1519 
VPMOVUSQW_MASK_WdqVdqM(bxInstruction_c * i)1520 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_MASK_WdqVdqM(bxInstruction_c *i)
1521 {
1522   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1523   unsigned len = i->getVL();
1524 
1525   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1526     dst.vmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
1527   }
1528 
1529   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1530   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1531 
1532   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1533   avx_masked_store16(i, eaddr, &dst, opmask);
1534 
1535   BX_NEXT_INSTR(i);
1536 }
1537 
VPMOVUSQW_WdqVdqR(bxInstruction_c * i)1538 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_WdqVdqR(bxInstruction_c *i)
1539 {
1540   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1541   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1542   unsigned len = i->getVL();
1543 
1544   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1545     dst.xmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
1546   }
1547 
1548   if (len == BX_VL128) dst.xmm32u(1) = 0;
1549   if (len != BX_VL512) dst.xmm64u(1) = 0;
1550 
1551   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1552   BX_NEXT_INSTR(i);
1553 }
1554 
VPMOVUSQW_MASK_WdqVdqR(bxInstruction_c * i)1555 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQW_MASK_WdqVdqR(bxInstruction_c *i)
1556 {
1557   BxPackedXmmRegister dst = BX_READ_XMM_REG(i->dst());
1558   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1559   unsigned len = i->getVL();
1560 
1561   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1562 
1563   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1564     if (mask & 0x1)
1565       dst.xmm16u(n) = SaturateQwordUToWordU(src.vmm64u(n));
1566     else
1567       if (i->isZeroMasking()) dst.xmm16u(n) = 0;
1568   }
1569 
1570   if (len == BX_VL128) dst.xmm32u(1) = 0;
1571   if (len != BX_VL512) dst.xmm64u(1) = 0;
1572 
1573   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), dst);
1574   BX_NEXT_INSTR(i);
1575 }
1576 
1577 // quad-word to double-word
VPMOVQD_MASK_WdqVdqM(bxInstruction_c * i)1578 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_MASK_WdqVdqM(bxInstruction_c *i)
1579 {
1580   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1581   unsigned len = i->getVL();
1582 
1583   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1584     dst.vmm32u(n) = (Bit32u) src.vmm64u(n);
1585   }
1586 
1587   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1588   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1589 
1590   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1591   avx_masked_store32(i, eaddr, &dst, opmask);
1592 
1593   BX_NEXT_INSTR(i);
1594 }
1595 
VPMOVQD_WdqVdqR(bxInstruction_c * i)1596 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_WdqVdqR(bxInstruction_c *i)
1597 {
1598   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1599   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1600   unsigned len = i->getVL();
1601 
1602   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1603     dst.ymm32u(n) = (Bit32u) src.vmm64u(n);
1604   }
1605 
1606   if (len == BX_VL128) dst.ymm64u(1) = 0;
1607   if (len != BX_VL512) dst.ymm128(1).clear();
1608 
1609   BX_WRITE_YMM_REGZ(i->dst(), dst);
1610   BX_NEXT_INSTR(i);
1611 }
1612 
VPMOVQD_MASK_WdqVdqR(bxInstruction_c * i)1613 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVQD_MASK_WdqVdqR(bxInstruction_c *i)
1614 {
1615   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1616   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1617   unsigned len = i->getVL();
1618 
1619   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1620 
1621   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1622     if (mask & 0x1)
1623       dst.ymm32u(n) = (Bit32u) src.vmm64u(n);
1624     else
1625       if (i->isZeroMasking()) dst.ymm32u(n) = 0;
1626   }
1627 
1628   if (len == BX_VL128) dst.ymm64u(1) = 0;
1629   if (len != BX_VL512) dst.ymm128(1).clear();
1630 
1631   BX_WRITE_YMM_REGZ(i->dst(), dst);
1632   BX_NEXT_INSTR(i);
1633 }
1634 
VPMOVSQD_MASK_WdqVdqM(bxInstruction_c * i)1635 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_MASK_WdqVdqM(bxInstruction_c *i)
1636 {
1637   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1638   unsigned len = i->getVL();
1639 
1640   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1641     dst.vmm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
1642   }
1643 
1644   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1645   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1646 
1647   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1648   avx_masked_store32(i, eaddr, &dst, opmask);
1649 
1650   BX_NEXT_INSTR(i);
1651 }
1652 
VPMOVSQD_WdqVdqR(bxInstruction_c * i)1653 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_WdqVdqR(bxInstruction_c *i)
1654 {
1655   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1656   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1657   unsigned len = i->getVL();
1658 
1659   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1660     dst.ymm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
1661   }
1662 
1663   if (len == BX_VL128) dst.ymm64u(1) = 0;
1664   if (len != BX_VL512) dst.ymm128(1).clear();
1665 
1666   BX_WRITE_YMM_REGZ(i->dst(), dst);
1667   BX_NEXT_INSTR(i);
1668 }
1669 
VPMOVSQD_MASK_WdqVdqR(bxInstruction_c * i)1670 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSQD_MASK_WdqVdqR(bxInstruction_c *i)
1671 {
1672   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1673   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1674   unsigned len = i->getVL();
1675 
1676   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1677 
1678   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1679     if (mask & 0x1)
1680       dst.ymm32s(n) = SaturateQwordSToDwordS(src.vmm64s(n));
1681     else
1682       if (i->isZeroMasking()) dst.ymm32u(n) = 0;
1683   }
1684 
1685   if (len == BX_VL128) dst.ymm64u(1) = 0;
1686   if (len != BX_VL512) dst.ymm128(1).clear();
1687 
1688   BX_WRITE_YMM_REGZ(i->dst(), dst);
1689   BX_NEXT_INSTR(i);
1690 }
1691 
VPMOVUSQD_MASK_WdqVdqM(bxInstruction_c * i)1692 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_MASK_WdqVdqM(bxInstruction_c *i)
1693 {
1694   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src()), dst;
1695   unsigned len = i->getVL();
1696 
1697   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1698     dst.vmm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
1699   }
1700 
1701   Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1702   opmask &= CUT_OPMASK_TO(QWORD_ELEMENTS(len));
1703 
1704   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
1705   avx_masked_store32(i, eaddr, &dst, opmask);
1706 
1707   BX_NEXT_INSTR(i);
1708 }
1709 
VPMOVUSQD_WdqVdqR(bxInstruction_c * i)1710 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_WdqVdqR(bxInstruction_c *i)
1711 {
1712   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1713   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1714   unsigned len = i->getVL();
1715 
1716   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
1717     dst.ymm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
1718   }
1719 
1720   if (len == BX_VL128) dst.ymm64u(1) = 0;
1721   if (len != BX_VL512) dst.ymm128(1).clear();
1722 
1723   BX_WRITE_YMM_REGZ(i->dst(), dst);
1724   BX_NEXT_INSTR(i);
1725 }
1726 
VPMOVUSQD_MASK_WdqVdqR(bxInstruction_c * i)1727 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVUSQD_MASK_WdqVdqR(bxInstruction_c *i)
1728 {
1729   BxPackedYmmRegister dst = BX_READ_YMM_REG(i->dst());
1730   BxPackedAvxRegister src = BX_READ_AVX_REG(i->src());
1731   unsigned len = i->getVL();
1732 
1733   unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
1734 
1735   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++, mask >>= 1) {
1736     if (mask & 0x1)
1737       dst.ymm32u(n) = SaturateQwordUToDwordU(src.vmm64u(n));
1738     else
1739       if (i->isZeroMasking()) dst.ymm32u(n) = 0;
1740   }
1741 
1742   if (len == BX_VL128) dst.ymm64u(1) = 0;
1743   if (len != BX_VL512) dst.ymm128(1).clear();
1744 
1745   BX_WRITE_YMM_REGZ(i->dst(), dst);
1746   BX_NEXT_INSTR(i);
1747 }
1748 
1749 //////////////////////////
1750 // load with up convert //
1751 //////////////////////////
1752 
VPMOVSXBW_MASK_VdqWdqR(bxInstruction_c * i)1753 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBW_MASK_VdqWdqR(bxInstruction_c *i)
1754 {
1755   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1756   BxPackedAvxRegister result;
1757   unsigned len = i->getVL();
1758 
1759   for (unsigned n=0; n < WORD_ELEMENTS(len); n++)
1760     result.vmm16s(n) = (Bit16s) op.ymmsbyte(n);
1761 
1762   avx512_write_regw_masked(i, &result, len, BX_READ_32BIT_OPMASK(i->opmask()));
1763   BX_NEXT_INSTR(i);
1764 }
1765 
VPMOVSXBD_MASK_VdqWdqR(bxInstruction_c * i)1766 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBD_MASK_VdqWdqR(bxInstruction_c *i)
1767 {
1768   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1769   BxPackedAvxRegister result;
1770   unsigned len = i->getVL();
1771 
1772   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
1773     result.vmm32s(n) = (Bit32s) op.xmmsbyte(n);
1774 
1775   avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
1776   BX_NEXT_INSTR(i);
1777 }
1778 
VPMOVSXBQ_MASK_VdqWdqR(bxInstruction_c * i)1779 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXBQ_MASK_VdqWdqR(bxInstruction_c *i)
1780 {
1781   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1782   BxPackedAvxRegister result;
1783   unsigned len = i->getVL();
1784 
1785   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1786     result.vmm64s(n) = (Bit64s) op.xmmsbyte(n);
1787 
1788   avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1789   BX_NEXT_INSTR(i);
1790 }
1791 
VPMOVSXWD_MASK_VdqWdqR(bxInstruction_c * i)1792 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXWD_MASK_VdqWdqR(bxInstruction_c *i)
1793 {
1794   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1795   BxPackedAvxRegister result;
1796   unsigned len = i->getVL();
1797 
1798   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
1799     result.vmm32s(n) = (Bit32s) op.ymm16s(n);
1800 
1801   avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
1802   BX_NEXT_INSTR(i);
1803 }
1804 
VPMOVSXWQ_MASK_VdqWdqR(bxInstruction_c * i)1805 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXWQ_MASK_VdqWdqR(bxInstruction_c *i)
1806 {
1807   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1808   BxPackedAvxRegister result;
1809   unsigned len = i->getVL();
1810 
1811   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1812     result.vmm64s(n) = (Bit64s) op.xmm16s(n);
1813 
1814   avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1815   BX_NEXT_INSTR(i);
1816 }
1817 
VPMOVSXDQ_MASK_VdqWdqR(bxInstruction_c * i)1818 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVSXDQ_MASK_VdqWdqR(bxInstruction_c *i)
1819 {
1820   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1821   BxPackedAvxRegister result;
1822   unsigned len = i->getVL();
1823 
1824   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1825     result.vmm64s(n) = (Bit64s) op.ymm32s(n);
1826 
1827   avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1828   BX_NEXT_INSTR(i);
1829 }
1830 
VPMOVZXBW_MASK_VdqWdqR(bxInstruction_c * i)1831 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBW_MASK_VdqWdqR(bxInstruction_c *i)
1832 {
1833   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1834   BxPackedAvxRegister result;
1835   unsigned len = i->getVL();
1836 
1837   for (unsigned n=0; n < WORD_ELEMENTS(len); n++)
1838     result.vmm16u(n) = (Bit16u) op.ymmubyte(n);
1839 
1840   avx512_write_regw_masked(i, &result, len, BX_READ_32BIT_OPMASK(i->opmask()));
1841   BX_NEXT_INSTR(i);
1842 }
1843 
VPMOVZXBD_MASK_VdqWdqR(bxInstruction_c * i)1844 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBD_MASK_VdqWdqR(bxInstruction_c *i)
1845 {
1846   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1847   BxPackedAvxRegister result;
1848   unsigned len = i->getVL();
1849 
1850   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
1851     result.vmm32u(n) = (Bit32u) op.xmmubyte(n);
1852 
1853   avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
1854   BX_NEXT_INSTR(i);
1855 }
1856 
VPMOVZXBQ_MASK_VdqWdqR(bxInstruction_c * i)1857 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXBQ_MASK_VdqWdqR(bxInstruction_c *i)
1858 {
1859   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1860   BxPackedAvxRegister result;
1861   unsigned len = i->getVL();
1862 
1863   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1864     result.vmm64u(n) = (Bit64u) op.xmmubyte(n);
1865 
1866   avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1867   BX_NEXT_INSTR(i);
1868 }
1869 
VPMOVZXWD_MASK_VdqWdqR(bxInstruction_c * i)1870 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXWD_MASK_VdqWdqR(bxInstruction_c *i)
1871 {
1872   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1873   BxPackedAvxRegister result;
1874   unsigned len = i->getVL();
1875 
1876   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
1877     result.vmm32u(n) = (Bit32u) op.ymm16u(n);
1878 
1879   avx512_write_regd_masked(i, &result, len, BX_READ_16BIT_OPMASK(i->opmask()));
1880   BX_NEXT_INSTR(i);
1881 }
1882 
VPMOVZXWQ_MASK_VdqWdqR(bxInstruction_c * i)1883 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXWQ_MASK_VdqWdqR(bxInstruction_c *i)
1884 {
1885   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1886   BxPackedAvxRegister result;
1887   unsigned len = i->getVL();
1888 
1889   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1890     result.vmm64u(n) = (Bit64u) op.xmm16u(n);
1891 
1892   avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1893   BX_NEXT_INSTR(i);
1894 }
1895 
VPMOVZXDQ_MASK_VdqWdqR(bxInstruction_c * i)1896 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VPMOVZXDQ_MASK_VdqWdqR(bxInstruction_c *i)
1897 {
1898   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
1899   BxPackedAvxRegister result;
1900   unsigned len = i->getVL();
1901 
1902   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++)
1903     result.vmm64u(n) = (Bit64u) op.ymm32u(n);
1904 
1905   avx512_write_regq_masked(i, &result, len, BX_READ_8BIT_OPMASK(i->opmask()));
1906   BX_NEXT_INSTR(i);
1907 }
1908 
1909 #endif
1910