1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx_pfp.cc 13466 2018-02-16 07:57:32Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 //   Copyright (c) 2011-2018 Stanislav Shwartsman
6 //          Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 //  You should have received a copy of the GNU Lesser General Public
19 //  License along with this library; if not, write to the Free Software
20 //  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23 
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28 
29 #if BX_SUPPORT_AVX
30 
31 extern float_status_t mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr);
32 
33 extern float32 approximate_rsqrt(float32 op);
34 extern float32 approximate_rcp(float32 op);
35 
36 #include "fpu/softfloat-compare.h"
37 #include "simd_pfp.h"
38 #include "simd_int.h"
39 
print_state_AVX(void)40 void BX_CPU_C::print_state_AVX(void)
41 {
42   BX_DEBUG(("MXCSR: 0x%08x", BX_MXCSR_REGISTER));
43   for(int n=0;n<BX_XMM_REGISTERS;n++) {
44 #if BX_SUPPORT_EVEX
45     BxPackedZmmRegister vmm = BX_READ_AVX_REG(n);
46     BX_DEBUG(("VMM%02u: %08x%08x:%08x%08x:%08x%08x:%08x%08x:%08x%08x:%08x%08x:%08x%08x:%08x%08x", n,
47        vmm.zmm32u(15), vmm.zmm32u(14), vmm.zmm32u(13), vmm.zmm32u(12),
48        vmm.zmm32u(11), vmm.zmm32u(10), vmm.zmm32u(9),  vmm.zmm32u(8),
49        vmm.zmm32u(7),  vmm.zmm32u(6),  vmm.zmm32u(5),  vmm.zmm32u(4),
50        vmm.zmm32u(3),  vmm.zmm32u(2),  vmm.zmm32u(1),  vmm.zmm32u(0)));
51 #else
52     BxPackedYmmRegister vmm = BX_READ_YMM_REG(n);
53     BX_DEBUG(("VMM%02u: %08x%08x:%08x%08x:%08x%08x:%08x%08x", n,
54        vmm.ymm32u(7), vmm.ymm32u(6), vmm.ymm32u(5), vmm.ymm32u(4),
55        vmm.ymm32u(3), vmm.ymm32u(2), vmm.ymm32u(1), vmm.ymm32u(0)));
56 #endif
57   }
58 }
59 
60 /* Comparison predicate for VCMPSS/VCMPPS instructions */
61 float32_compare_method avx_compare32[32] = {
62   float32_eq_ordered_quiet,
63   float32_lt_ordered_signalling,
64   float32_le_ordered_signalling,
65   float32_unordered_quiet,
66   float32_neq_unordered_quiet,
67   float32_nlt_unordered_signalling,
68   float32_nle_unordered_signalling,
69   float32_ordered_quiet,
70   float32_eq_unordered_quiet,
71   float32_nge_unordered_signalling,
72   float32_ngt_unordered_signalling,
73   float32_false_quiet,
74   float32_neq_ordered_quiet,
75   float32_ge_ordered_signalling,
76   float32_gt_ordered_signalling,
77   float32_true_quiet,
78   float32_eq_ordered_signalling,
79   float32_lt_ordered_quiet,
80   float32_le_ordered_quiet,
81   float32_unordered_signalling,
82   float32_neq_unordered_signalling,
83   float32_nlt_unordered_quiet,
84   float32_nle_unordered_quiet,
85   float32_ordered_signalling,
86   float32_eq_unordered_signalling,
87   float32_nge_unordered_quiet,
88   float32_ngt_unordered_quiet,
89   float32_false_signalling,
90   float32_neq_ordered_signalling,
91   float32_ge_ordered_quiet,
92   float32_gt_ordered_quiet,
93   float32_true_signalling
94 };
95 
96 /* Comparison predicate for VCMPSD/VCMPPD instructions */
97 float64_compare_method avx_compare64[32] = {
98   float64_eq_ordered_quiet,
99   float64_lt_ordered_signalling,
100   float64_le_ordered_signalling,
101   float64_unordered_quiet,
102   float64_neq_unordered_quiet,
103   float64_nlt_unordered_signalling,
104   float64_nle_unordered_signalling,
105   float64_ordered_quiet,
106   float64_eq_unordered_quiet,
107   float64_nge_unordered_signalling,
108   float64_ngt_unordered_signalling,
109   float64_false_quiet,
110   float64_neq_ordered_quiet,
111   float64_ge_ordered_signalling,
112   float64_gt_ordered_signalling,
113   float64_true_quiet,
114   float64_eq_ordered_signalling,
115   float64_lt_ordered_quiet,
116   float64_le_ordered_quiet,
117   float64_unordered_signalling,
118   float64_neq_unordered_signalling,
119   float64_nlt_unordered_quiet,
120   float64_nle_unordered_quiet,
121   float64_ordered_signalling,
122   float64_eq_unordered_signalling,
123   float64_nge_unordered_quiet,
124   float64_ngt_unordered_quiet,
125   float64_false_signalling,
126   float64_neq_ordered_signalling,
127   float64_ge_ordered_quiet,
128   float64_gt_ordered_quiet,
129   float64_true_signalling
130 };
131 
132 /* Opcode: VEX.0F 51 (VEX.W ignore, VEX.VVV #UD) */
VSQRTPS_VpsWpsR(bxInstruction_c * i)133 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_VpsWpsR(bxInstruction_c *i)
134 {
135   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
136   unsigned len = i->getVL();
137 
138   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
139   softfloat_status_word_rc_override(status, i);
140 
141   for (unsigned n=0; n < len; n++) {
142     xmm_sqrtps(&op.vmm128(n), status);
143   }
144 
145   check_exceptionsSSE(get_exception_flags(status));
146   BX_WRITE_AVX_REGZ(i->dst(), op, len);
147   BX_NEXT_INSTR(i);
148 }
149 
150 /* Opcode: VEX.66.0F 51 (VEX.W ignore, VEX.VVV #UD) */
VSQRTPD_VpdWpdR(bxInstruction_c * i)151 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPD_VpdWpdR(bxInstruction_c *i)
152 {
153   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
154   unsigned len = i->getVL();
155 
156   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
157   softfloat_status_word_rc_override(status, i);
158 
159   for (unsigned n=0; n < len; n++) {
160     xmm_sqrtpd(&op.vmm128(n), status);
161   }
162 
163   check_exceptionsSSE(get_exception_flags(status));
164   BX_WRITE_AVX_REGZ(i->dst(), op, len);
165   BX_NEXT_INSTR(i);
166 }
167 
168 /* Opcode: VEX.NDS.F3.0F 51 (VEX.W ignore, VEX.L ignore) */
VSQRTSS_VssHpsWssR(bxInstruction_c * i)169 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSS_VssHpsWssR(bxInstruction_c *i)
170 {
171   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
172   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
173 
174   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
175   softfloat_status_word_rc_override(status, i);
176   op1.xmm32u(0) = float32_sqrt(op2, status);
177   check_exceptionsSSE(get_exception_flags(status));
178 
179   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
180   BX_NEXT_INSTR(i);
181 }
182 
183 /* Opcode: VEX.NDS.F2.0F 51 (VEX.W ignore, VEX.L ignore) */
VSQRTSD_VsdHpdWsdR(bxInstruction_c * i)184 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSD_VsdHpdWsdR(bxInstruction_c *i)
185 {
186   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
187   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
188 
189   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
190   softfloat_status_word_rc_override(status, i);
191   op1.xmm64u(0) = float64_sqrt(op2, status);
192   check_exceptionsSSE(get_exception_flags(status));
193 
194   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
195   BX_NEXT_INSTR(i);
196 }
197 
198 /* Opcode: VEX.0F 52 (VEX.W ignore, VEX.VVV #UD) */
VRSQRTPS_VpsWpsR(bxInstruction_c * i)199 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTPS_VpsWpsR(bxInstruction_c *i)
200 {
201   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
202   unsigned len = i->getVL();
203 
204   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
205     op.ymm32u(n) = approximate_rsqrt(op.ymm32u(n));
206 
207   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
208 
209   BX_NEXT_INSTR(i);
210 }
211 
212 /* Opcode: VEX.NDS.F3.0F 52 (VEX.W ignore, VEX.L ignore) */
VRSQRTSS_VssHpsWssR(bxInstruction_c * i)213 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTSS_VssHpsWssR(bxInstruction_c *i)
214 {
215   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
216   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
217 
218   op1.xmm32u(0) = approximate_rsqrt(op2);
219 
220   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
221   BX_NEXT_INSTR(i);
222 }
223 
224 /* Opcode: VEX.0F 53 (VEX.W ignore, VEX.VVV #UD) */
VRCPPS_VpsWpsR(bxInstruction_c * i)225 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPPS_VpsWpsR(bxInstruction_c *i)
226 {
227   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
228   unsigned len = i->getVL();
229 
230   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
231     op.vmm32u(n) = approximate_rcp(op.vmm32u(n));
232 
233   BX_WRITE_AVX_REGZ(i->dst(), op, len);
234 
235   BX_NEXT_INSTR(i);
236 }
237 
238 /* Opcode: VEX.NDS.F3.0F 53 (VEX.W ignore, VEX.L ignore) */
VRCPSS_VssHpsWssR(bxInstruction_c * i)239 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPSS_VssHpsWssR(bxInstruction_c *i)
240 {
241   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
242   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
243 
244   op1.xmm32u(0) = approximate_rcp(op2);
245 
246   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
247   BX_NEXT_INSTR(i);
248 }
249 
250 /* Opcode: VEX.NDS.0F 58 (VEX.W ignore) */
VADDPS_VpsHpsWpsR(bxInstruction_c * i)251 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDPS_VpsHpsWpsR(bxInstruction_c *i)
252 {
253   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
254   unsigned len = i->getVL();
255 
256   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
257   softfloat_status_word_rc_override(status, i);
258 
259   for (unsigned n=0; n < len; n++) {
260     xmm_addps(&op1.vmm128(n), &op2.vmm128(n), status);
261   }
262 
263   check_exceptionsSSE(get_exception_flags(status));
264 
265   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
266 
267   BX_NEXT_INSTR(i);
268 }
269 
270 /* Opcode: VEX.NDS.66.0F 58 (VEX.W ignore) */
VADDPD_VpdHpdWpdR(bxInstruction_c * i)271 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDPD_VpdHpdWpdR(bxInstruction_c *i)
272 {
273   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
274   unsigned len = i->getVL();
275 
276   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
277   softfloat_status_word_rc_override(status, i);
278 
279   for (unsigned n=0; n < len; n++) {
280     xmm_addpd(&op1.vmm128(n), &op2.vmm128(n), status);
281   }
282 
283   check_exceptionsSSE(get_exception_flags(status));
284 
285   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
286 
287   BX_NEXT_INSTR(i);
288 }
289 
290 /* Opcode: VEX.NDS.F3.0F 58 (VEX.W ignore, VEX.L ignore) */
VADDSS_VssHpsWssR(bxInstruction_c * i)291 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSS_VssHpsWssR(bxInstruction_c *i)
292 {
293   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
294   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
295 
296   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
297   softfloat_status_word_rc_override(status, i);
298 
299   op1.xmm32u(0) = float32_add(op1.xmm32u(0), op2, status);
300 
301   check_exceptionsSSE(get_exception_flags(status));
302   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
303 
304   BX_NEXT_INSTR(i);
305 }
306 
307 /* Opcode: VEX.NDS.F2.0F 58 (VEX.W ignore, VEX.L ignore) */
VADDSD_VsdHpdWsdR(bxInstruction_c * i)308 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSD_VsdHpdWsdR(bxInstruction_c *i)
309 {
310   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
311   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
312 
313   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
314   softfloat_status_word_rc_override(status, i);
315 
316   op1.xmm64u(0) = float64_add(op1.xmm64u(0), op2, status);
317 
318   check_exceptionsSSE(get_exception_flags(status));
319   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
320 
321   BX_NEXT_INSTR(i);
322 }
323 
324 /* Opcode: VEX.NDS.0F 59 (VEX.W ignore) */
VMULPS_VpsHpsWpsR(bxInstruction_c * i)325 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULPS_VpsHpsWpsR(bxInstruction_c *i)
326 {
327   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
328   unsigned len = i->getVL();
329 
330   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
331   softfloat_status_word_rc_override(status, i);
332 
333   for (unsigned n=0; n < len; n++) {
334     xmm_mulps(&op1.vmm128(n), &op2.vmm128(n), status);
335   }
336 
337   check_exceptionsSSE(get_exception_flags(status));
338 
339   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
340 
341   BX_NEXT_INSTR(i);
342 }
343 
344 /* Opcode: VEX.NDS.66.0F 59 (VEX.W ignore) */
VMULPD_VpdHpdWpdR(bxInstruction_c * i)345 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULPD_VpdHpdWpdR(bxInstruction_c *i)
346 {
347   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
348   unsigned len = i->getVL();
349 
350   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
351   softfloat_status_word_rc_override(status, i);
352 
353   for (unsigned n=0; n < len; n++) {
354     xmm_mulpd(&op1.vmm128(n), &op2.vmm128(n), status);
355   }
356 
357   check_exceptionsSSE(get_exception_flags(status));
358 
359   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
360 
361   BX_NEXT_INSTR(i);
362 }
363 
364 /* Opcode: VEX.NDS.F3.0F 59 (VEX.W ignore, VEX.L ignore) */
VMULSS_VssHpsWssR(bxInstruction_c * i)365 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSS_VssHpsWssR(bxInstruction_c *i)
366 {
367   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
368   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
369 
370   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
371   softfloat_status_word_rc_override(status, i);
372 
373   op1.xmm32u(0) = float32_mul(op1.xmm32u(0), op2, status);
374 
375   check_exceptionsSSE(get_exception_flags(status));
376   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
377 
378   BX_NEXT_INSTR(i);
379 }
380 
381 /* Opcode: VEX.NDS.F2.0F 59 (VEX.W ignore, VEX.L ignore) */
VMULSD_VsdHpdWsdR(bxInstruction_c * i)382 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSD_VsdHpdWsdR(bxInstruction_c *i)
383 {
384   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
385   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
386 
387   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
388   softfloat_status_word_rc_override(status, i);
389 
390   op1.xmm64u(0) = float64_mul(op1.xmm64u(0), op2, status);
391 
392   check_exceptionsSSE(get_exception_flags(status));
393   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
394 
395   BX_NEXT_INSTR(i);
396 }
397 
398 /* Opcode: VEX.NDS.0F 5C (VEX.W ignore) */
VSUBPS_VpsHpsWpsR(bxInstruction_c * i)399 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBPS_VpsHpsWpsR(bxInstruction_c *i)
400 {
401   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
402   unsigned len = i->getVL();
403 
404   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
405   softfloat_status_word_rc_override(status, i);
406 
407   for (unsigned n=0; n < len; n++) {
408     xmm_subps(&op1.vmm128(n), &op2.vmm128(n), status);
409   }
410 
411   check_exceptionsSSE(get_exception_flags(status));
412 
413   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
414 
415   BX_NEXT_INSTR(i);
416 }
417 
418 /* Opcode: VEX.NDS.66.0F 5C (VEX.W ignore) */
VSUBPD_VpdHpdWpdR(bxInstruction_c * i)419 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBPD_VpdHpdWpdR(bxInstruction_c *i)
420 {
421   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
422   unsigned len = i->getVL();
423 
424   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
425   softfloat_status_word_rc_override(status, i);
426 
427   for (unsigned n=0; n < len; n++) {
428     xmm_subpd(&op1.vmm128(n), &op2.vmm128(n), status);
429   }
430 
431   check_exceptionsSSE(get_exception_flags(status));
432 
433   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
434 
435   BX_NEXT_INSTR(i);
436 }
437 
438 /* Opcode: VEX.NDS.F3.0F 5C (VEX.W ignore, VEX.L ignore) */
VSUBSS_VssHpsWssR(bxInstruction_c * i)439 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBSS_VssHpsWssR(bxInstruction_c *i)
440 {
441   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
442   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
443 
444   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
445   softfloat_status_word_rc_override(status, i);
446 
447   op1.xmm32u(0) = float32_sub(op1.xmm32u(0), op2, status);
448 
449   check_exceptionsSSE(get_exception_flags(status));
450   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
451 
452   BX_NEXT_INSTR(i);
453 }
454 
455 /* Opcode: VEX.NDS.F2.0F 5C (VEX.W ignore, VEX.L ignore) */
VSUBSD_VsdHpdWsdR(bxInstruction_c * i)456 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBSD_VsdHpdWsdR(bxInstruction_c *i)
457 {
458   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
459   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
460 
461   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
462   softfloat_status_word_rc_override(status, i);
463 
464   op1.xmm64u(0) = float64_sub(op1.xmm64u(0), op2, status);
465 
466   check_exceptionsSSE(get_exception_flags(status));
467   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
468 
469   BX_NEXT_INSTR(i);
470 }
471 
472 /* Opcode: VEX.NDS.0F 5D (VEX.W ignore) */
VMINPS_VpsHpsWpsR(bxInstruction_c * i)473 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINPS_VpsHpsWpsR(bxInstruction_c *i)
474 {
475   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
476   unsigned len = i->getVL();
477 
478   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
479   softfloat_status_word_rc_override(status, i);
480 
481   for (unsigned n=0; n < len; n++) {
482     xmm_minps(&op1.vmm128(n), &op2.vmm128(n), status);
483   }
484 
485   check_exceptionsSSE(get_exception_flags(status));
486 
487   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
488 
489   BX_NEXT_INSTR(i);
490 }
491 
492 /* Opcode: VEX.NDS.66.0F 5D (VEX.W ignore) */
VMINPD_VpdHpdWpdR(bxInstruction_c * i)493 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINPD_VpdHpdWpdR(bxInstruction_c *i)
494 {
495   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
496   unsigned len = i->getVL();
497 
498   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
499   softfloat_status_word_rc_override(status, i);
500 
501   for (unsigned n=0; n < len; n++) {
502     xmm_minpd(&op1.vmm128(n), &op2.vmm128(n), status);
503   }
504 
505   check_exceptionsSSE(get_exception_flags(status));
506 
507   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
508 
509   BX_NEXT_INSTR(i);
510 }
511 
512 /* Opcode: VEX.NDS.F3.0F 5D (VEX.W ignore, VEX.L ignore) */
VMINSS_VssHpsWssR(bxInstruction_c * i)513 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINSS_VssHpsWssR(bxInstruction_c *i)
514 {
515   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
516   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
517 
518   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
519   softfloat_status_word_rc_override(status, i);
520 
521   op1.xmm32u(0) = float32_min(op1.xmm32u(0), op2, status);
522 
523   check_exceptionsSSE(get_exception_flags(status));
524 
525   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
526 
527   BX_NEXT_INSTR(i);
528 }
529 
530 /* Opcode: VEX.NDS.F2.0F 5D (VEX.W ignore, VEX.L ignore) */
VMINSD_VsdHpdWsdR(bxInstruction_c * i)531 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINSD_VsdHpdWsdR(bxInstruction_c *i)
532 {
533   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
534   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
535 
536   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
537   softfloat_status_word_rc_override(status, i);
538 
539   op1.xmm64u(0) = float64_min(op1.xmm64u(0), op2, status);
540 
541   check_exceptionsSSE(get_exception_flags(status));
542 
543   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
544 
545   BX_NEXT_INSTR(i);
546 }
547 
548 /* Opcode: VEX.NDS.0F 5E (VEX.W ignore) */
VDIVPS_VpsHpsWpsR(bxInstruction_c * i)549 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVPS_VpsHpsWpsR(bxInstruction_c *i)
550 {
551   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
552   unsigned len = i->getVL();
553 
554   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
555   softfloat_status_word_rc_override(status, i);
556 
557   for (unsigned n=0; n < len; n++) {
558     xmm_divps(&op1.vmm128(n), &op2.vmm128(n), status);
559   }
560 
561   check_exceptionsSSE(get_exception_flags(status));
562 
563   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
564 
565   BX_NEXT_INSTR(i);
566 }
567 
568 /* Opcode: VEX.NDS.66.0F 5E (VEX.W ignore) */
VDIVPD_VpdHpdWpdR(bxInstruction_c * i)569 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVPD_VpdHpdWpdR(bxInstruction_c *i)
570 {
571   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
572   unsigned len = i->getVL();
573 
574   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
575   softfloat_status_word_rc_override(status, i);
576 
577   for (unsigned n=0; n < len; n++) {
578     xmm_divpd(&op1.vmm128(n), &op2.vmm128(n), status);
579   }
580 
581   check_exceptionsSSE(get_exception_flags(status));
582 
583   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
584 
585   BX_NEXT_INSTR(i);
586 }
587 
588 /* Opcode: VEX.NDS.F3.0F 5E (VEX.W ignore, VEX.L ignore) */
VDIVSS_VssHpsWssR(bxInstruction_c * i)589 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVSS_VssHpsWssR(bxInstruction_c *i)
590 {
591   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
592   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
593 
594   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
595   softfloat_status_word_rc_override(status, i);
596 
597   op1.xmm32u(0) = float32_div(op1.xmm32u(0), op2, status);
598 
599   check_exceptionsSSE(get_exception_flags(status));
600   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
601 
602   BX_NEXT_INSTR(i);
603 }
604 
605 /* Opcode: VEX.NDS.F2.0F 5E (VEX.W ignore, VEX.L ignore) */
VDIVSD_VsdHpdWsdR(bxInstruction_c * i)606 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVSD_VsdHpdWsdR(bxInstruction_c *i)
607 {
608   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
609   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
610 
611   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
612   softfloat_status_word_rc_override(status, i);
613 
614   op1.xmm64u(0) = float64_div(op1.xmm64u(0), op2, status);
615 
616   check_exceptionsSSE(get_exception_flags(status));
617   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
618 
619   BX_NEXT_INSTR(i);
620 }
621 
622 /* Opcode: VEX.NDS.0F 5F (VEX.W ignore) */
VMAXPS_VpsHpsWpsR(bxInstruction_c * i)623 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXPS_VpsHpsWpsR(bxInstruction_c *i)
624 {
625   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
626   unsigned len = i->getVL();
627 
628   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
629   softfloat_status_word_rc_override(status, i);
630 
631   for (unsigned n=0; n < len; n++) {
632     xmm_maxps(&op1.vmm128(n), &op2.vmm128(n), status);
633   }
634 
635   check_exceptionsSSE(get_exception_flags(status));
636 
637   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
638 
639   BX_NEXT_INSTR(i);
640 }
641 
642 /* Opcode: VEX.NDS.66.0F 5F (VEX.W ignore) */
VMAXPD_VpdHpdWpdR(bxInstruction_c * i)643 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXPD_VpdHpdWpdR(bxInstruction_c *i)
644 {
645   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
646   unsigned len = i->getVL();
647 
648   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
649   softfloat_status_word_rc_override(status, i);
650 
651   for (unsigned n=0; n < len; n++) {
652     xmm_maxpd(&op1.vmm128(n), &op2.vmm128(n), status);
653   }
654 
655   check_exceptionsSSE(get_exception_flags(status));
656 
657   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
658 
659   BX_NEXT_INSTR(i);
660 }
661 
662 /* Opcode: VEX.NDS.F3.0F 5F (VEX.W ignore, VEX.L ignore) */
VMAXSS_VssHpsWssR(bxInstruction_c * i)663 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSS_VssHpsWssR(bxInstruction_c *i)
664 {
665   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
666   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
667 
668   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
669   softfloat_status_word_rc_override(status, i);
670 
671   op1.xmm32u(0) = float32_max(op1.xmm32u(0), op2, status);
672 
673   check_exceptionsSSE(get_exception_flags(status));
674 
675   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
676 
677   BX_NEXT_INSTR(i);
678 }
679 
680 /* Opcode: VEX.NDS.F2.0F 5F (VEX.W ignore, VEX.L ignore) */
VMAXSD_VsdHpdWsdR(bxInstruction_c * i)681 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSD_VsdHpdWsdR(bxInstruction_c *i)
682 {
683   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
684   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
685 
686   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
687   softfloat_status_word_rc_override(status, i);
688 
689   op1.xmm64u(0) = float64_max(op1.xmm64u(0), op2, status);
690 
691   check_exceptionsSSE(get_exception_flags(status));
692 
693   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
694 
695   BX_NEXT_INSTR(i);
696 }
697 
698 /* Opcode: VEX.NDS.66.0F 7C (VEX.W ignore) */
VHADDPD_VpdHpdWpdR(bxInstruction_c * i)699 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VHADDPD_VpdHpdWpdR(bxInstruction_c *i)
700 {
701   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
702   unsigned len = i->getVL();
703 
704   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
705   softfloat_status_word_rc_override(status, i);
706 
707   for (unsigned n=0; n < len; n++) {
708     xmm_haddpd(&op1.vmm128(n), &op2.vmm128(n), status);
709   }
710 
711   check_exceptionsSSE(get_exception_flags(status));
712 
713   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
714 
715   BX_NEXT_INSTR(i);
716 }
717 
718 /* Opcode: VEX.NDS.F2.0F 7C (VEX.W ignore) */
VHADDPS_VpsHpsWpsR(bxInstruction_c * i)719 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VHADDPS_VpsHpsWpsR(bxInstruction_c *i)
720 {
721   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
722   unsigned len = i->getVL();
723 
724   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
725   softfloat_status_word_rc_override(status, i);
726 
727   for (unsigned n=0; n < len; n++) {
728     xmm_haddps(&op1.vmm128(n), &op2.vmm128(n), status);
729   }
730 
731   check_exceptionsSSE(get_exception_flags(status));
732 
733   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
734 
735   BX_NEXT_INSTR(i);
736 }
737 
738 /* Opcode: VEX.NDS.66.0F 7D (VEX.W ignore) */
VHSUBPD_VpdHpdWpdR(bxInstruction_c * i)739 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VHSUBPD_VpdHpdWpdR(bxInstruction_c *i)
740 {
741   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
742   unsigned len = i->getVL();
743 
744   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
745   softfloat_status_word_rc_override(status, i);
746 
747   for (unsigned n=0; n < len; n++) {
748     xmm_hsubpd(&op1.vmm128(n), &op2.vmm128(n), status);
749   }
750 
751   check_exceptionsSSE(get_exception_flags(status));
752 
753   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
754 
755   BX_NEXT_INSTR(i);
756 }
757 
758 /* Opcode: VEX.NDS.F2.0F 7D (VEX.W ignore) */
VHSUBPS_VpsHpsWpsR(bxInstruction_c * i)759 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VHSUBPS_VpsHpsWpsR(bxInstruction_c *i)
760 {
761   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
762   unsigned len = i->getVL();
763 
764   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
765   softfloat_status_word_rc_override(status, i);
766 
767   for (unsigned n=0; n < len; n++) {
768     xmm_hsubps(&op1.vmm128(n), &op2.vmm128(n), status);
769   }
770 
771   check_exceptionsSSE(get_exception_flags(status));
772 
773   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
774 
775   BX_NEXT_INSTR(i);
776 }
777 
778 /* Opcode: VEX.NDS.0F C2 (VEX.W ignore) */
VCMPPS_VpsHpsWpsIbR(bxInstruction_c * i)779 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPS_VpsHpsWpsIbR(bxInstruction_c *i)
780 {
781   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
782   unsigned len = i->getVL();
783 
784   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
785   int ib = i->Ib() & 0x1F;
786 
787   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
788     op1.ymm32u(n) = avx_compare32[ib](op1.ymm32u(n), op2.ymm32u(n), status) ? 0xFFFFFFFF : 0;
789   }
790 
791   check_exceptionsSSE(get_exception_flags(status));
792   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
793 
794   BX_NEXT_INSTR(i);
795 }
796 
797 /* Opcode: VEX.NDS.66.0F C2 (VEX.W ignore) */
VCMPPD_VpdHpdWpdIbR(bxInstruction_c * i)798 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPD_VpdHpdWpdIbR(bxInstruction_c *i)
799 {
800   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
801   unsigned len = i->getVL();
802 
803   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
804   int ib = i->Ib() & 0x1F;
805 
806   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
807     op1.ymm64u(n) = avx_compare64[ib](op1.ymm64u(n), op2.ymm64u(n), status) ?
808        BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
809   }
810 
811   check_exceptionsSSE(get_exception_flags(status));
812   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
813 
814   BX_NEXT_INSTR(i);
815 }
816 
817 /* Opcode: VEX.NDS.F2.0F C2 (VEX.W ignore) */
VCMPSD_VsdHpdWsdIbR(bxInstruction_c * i)818 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPSD_VsdHpdWsdIbR(bxInstruction_c *i)
819 {
820   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
821   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
822 
823   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
824   int ib = i->Ib() & 0x1F;
825 
826   if(avx_compare64[ib](op1.xmm64u(0), op2, status)) {
827     op1.xmm64u(0) = BX_CONST64(0xFFFFFFFFFFFFFFFF);
828   } else {
829     op1.xmm64u(0) = 0;
830   }
831 
832   check_exceptionsSSE(get_exception_flags(status));
833   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
834 
835   BX_NEXT_INSTR(i);
836 }
837 
838 /* Opcode: VEX.NDS.F3.0F C2 (VEX.W ignore) */
VCMPSS_VssHpsWssIbR(bxInstruction_c * i)839 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPSS_VssHpsWssIbR(bxInstruction_c *i)
840 {
841   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
842   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
843 
844   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
845   int ib = i->Ib() & 0x1F;
846 
847   if(avx_compare32[ib](op1.xmm32u(0), op2, status)) {
848     op1.xmm32u(0) = 0xFFFFFFFF;
849   } else {
850     op1.xmm32u(0) = 0;
851   }
852 
853   check_exceptionsSSE(get_exception_flags(status));
854   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
855 
856   BX_NEXT_INSTR(i);
857 }
858 
859 /* Opcode: VEX.NDS.F2.0F D0 (VEX.W ignore) */
VADDSUBPD_VpdHpdWpdR(bxInstruction_c * i)860 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSUBPD_VpdHpdWpdR(bxInstruction_c *i)
861 {
862   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
863   unsigned len = i->getVL();
864 
865   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
866   softfloat_status_word_rc_override(status, i);
867 
868   for (unsigned n=0; n < len; n++) {
869     xmm_addsubpd(&op1.vmm128(n), &op2.vmm128(n), status);
870   }
871 
872   check_exceptionsSSE(get_exception_flags(status));
873 
874   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
875 
876   BX_NEXT_INSTR(i);
877 }
878 
879 /* Opcode: VEX.NDS.F2.0F D0 (VEX.W ignore) */
VADDSUBPS_VpsHpsWpsR(bxInstruction_c * i)880 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSUBPS_VpsHpsWpsR(bxInstruction_c *i)
881 {
882   BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
883   unsigned len = i->getVL();
884 
885   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
886   softfloat_status_word_rc_override(status, i);
887 
888   for (unsigned n=0; n < len; n++) {
889     xmm_addsubps(&op1.vmm128(n), &op2.vmm128(n), status);
890   }
891 
892   check_exceptionsSSE(get_exception_flags(status));
893 
894   BX_WRITE_AVX_REGZ(i->dst(), op1, len);
895 
896   BX_NEXT_INSTR(i);
897 }
898 
899 /* Opcode: VEX.66.0F.38.0E (VEX.W=0, VEX.VVV #UD) */
VTESTPS_VpsWpsR(bxInstruction_c * i)900 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPS_VpsWpsR(bxInstruction_c *i)
901 {
902   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->dst()), op2 = BX_READ_YMM_REG(i->src());
903   unsigned len = i->getVL();
904 
905   unsigned result = EFlagsZFMask | EFlagsCFMask;
906 
907   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
908     if ((op2.ymm64u(n) &  op1.ymm64u(n) & BX_CONST64(0x8000000080000000)) != 0)
909       result &= ~EFlagsZFMask;
910 
911     if ((op2.ymm64u(n) & ~op1.ymm64u(n) & BX_CONST64(0x8000000080000000)) != 0)
912       result &= ~EFlagsCFMask;
913   }
914 
915   setEFlagsOSZAPC(result);
916 
917   BX_NEXT_INSTR(i);
918 }
919 
920 /* Opcode: VEX.66.0F.38.0F (VEX.W=0, VEX.VVV #UD) */
VTESTPD_VpdWpdR(bxInstruction_c * i)921 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPD_VpdWpdR(bxInstruction_c *i)
922 {
923   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->dst()), op2 = BX_READ_YMM_REG(i->src());
924   unsigned len = i->getVL();
925 
926   unsigned result = EFlagsZFMask | EFlagsCFMask;
927 
928   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
929     if ((op2.ymm64u(n) &  op1.ymm64u(n) & BX_CONST64(0x8000000000000000)) != 0)
930       result &= ~EFlagsZFMask;
931 
932     if ((op2.ymm64u(n) & ~op1.ymm64u(n) & BX_CONST64(0x8000000000000000)) != 0)
933       result &= ~EFlagsCFMask;
934   }
935 
936   setEFlagsOSZAPC(result);
937 
938   BX_NEXT_INSTR(i);
939 }
940 
941 /* Opcode: VEX.66.0F.3A.08 (VEX.W ignore, VEX.VVV #UD) */
VROUNDPS_VpsWpsIbR(bxInstruction_c * i)942 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPS_VpsWpsIbR(bxInstruction_c *i)
943 {
944   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
945   unsigned len = i->getVL();
946 
947   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
948   Bit8u control = i->Ib();
949 
950   // override MXCSR rounding mode with control coming from imm8
951   if ((control & 0x4) == 0)
952     status.float_rounding_mode = control & 0x3;
953   // ignore precision exception result
954   if (control & 0x8)
955     status.float_suppress_exception |= float_flag_inexact;
956 
957   for(unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
958     op.ymm32u(n) = float32_round_to_int(op.ymm32u(n), status);
959   }
960 
961   check_exceptionsSSE(get_exception_flags(status));
962 
963   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
964 
965   BX_NEXT_INSTR(i);
966 }
967 
968 /* Opcode: VEX.66.0F.3A.09 (VEX.W ignore, VEX.VVV #UD) */
VROUNDPD_VpdWpdIbR(bxInstruction_c * i)969 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPD_VpdWpdIbR(bxInstruction_c *i)
970 {
971   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
972   unsigned len = i->getVL();
973 
974   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
975   Bit8u control = i->Ib();
976 
977   // override MXCSR rounding mode with control coming from imm8
978   if ((control & 0x4) == 0)
979     status.float_rounding_mode = control & 0x3;
980   // ignore precision exception result
981   if (control & 0x8)
982     status.float_suppress_exception |= float_flag_inexact;
983 
984   for(unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
985     op.ymm64u(n) = float64_round_to_int(op.ymm64u(n), status);
986   }
987 
988   check_exceptionsSSE(get_exception_flags(status));
989 
990   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
991 
992   BX_NEXT_INSTR(i);
993 }
994 
995 /* Opcode: VEX.66.0F.3A.0A (VEX.W ignore, VEX.L ignore) */
VROUNDSS_VssHpsWssIbR(bxInstruction_c * i)996 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDSS_VssHpsWssIbR(bxInstruction_c *i)
997 {
998   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
999   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
1000 
1001   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1002   Bit8u control = i->Ib();
1003 
1004   // override MXCSR rounding mode with control coming from imm8
1005   if ((control & 0x4) == 0)
1006     status.float_rounding_mode = control & 0x3;
1007   // ignore precision exception result
1008   if (control & 0x8)
1009     status.float_suppress_exception |= float_flag_inexact;
1010 
1011   op1.xmm32u(0) = float32_round_to_int(op2, status);
1012 
1013   check_exceptionsSSE(get_exception_flags(status));
1014 
1015   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1016 
1017   BX_NEXT_INSTR(i);
1018 }
1019 
1020 /* Opcode: VEX.66.0F.3A.0B (VEX.W ignore, VEX.L ignore) */
VROUNDSD_VsdHpdWsdIbR(bxInstruction_c * i)1021 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDSD_VsdHpdWsdIbR(bxInstruction_c *i)
1022 {
1023   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1024   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
1025 
1026   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1027   Bit8u control = i->Ib();
1028 
1029   // override MXCSR rounding mode with control coming from imm8
1030   if ((control & 0x4) == 0)
1031     status.float_rounding_mode = control & 0x3;
1032   // ignore precision exception result
1033   if (control & 0x8)
1034     status.float_suppress_exception |= float_flag_inexact;
1035 
1036   op1.xmm64u(0) = float64_round_to_int(op2, status);
1037 
1038   check_exceptionsSSE(get_exception_flags(status));
1039 
1040   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1041 
1042   BX_NEXT_INSTR(i);
1043 }
1044 
1045 /* Opcode: VEX.66.0F.3A.40 (VEX.W ignore) */
VDPPS_VpsHpsWpsIbR(bxInstruction_c * i)1046 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDPPS_VpsHpsWpsIbR(bxInstruction_c *i)
1047 {
1048   BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
1049   unsigned len = i->getVL();
1050   Bit8u mask = i->Ib();
1051 
1052   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1053 
1054   for (unsigned n=0; n < len; n++) {
1055 
1056     // op1: [A, B, C, D]
1057     // op2: [E, F, G, H]
1058 
1059     // after multiplication: op1 = [AE, BF, CG, DH]
1060     xmm_mulps_mask(&op1.ymm128(n), &op2.ymm128(n), status, mask >> 4);
1061 
1062     // shuffle op2 = [BF, AE, DH, CG]
1063     xmm_shufps(&op2.ymm128(n), &op1.ymm128(n), &op1.ymm128(n), 0xb1);
1064 
1065     // op2 = [(BF+AE), (AE+BF), (DH+CG), (CG+DH)]
1066     xmm_addps(&op2.ymm128(n), &op1.ymm128(n), status);
1067 
1068     // shuffle op1 = [(DH+CG), (CG+DH), (BF+AE), (AE+BF)]
1069     xmm_shufpd(&op1.ymm128(n), &op2.ymm128(n), &op2.ymm128(n), 0x1);
1070 
1071     // op2 = [(BF+AE)+(DH+CG), (AE+BF)+(CG+DH), (DH+CG)+(BF+AE), (CG+DH)+(AE+BF)]
1072     xmm_addps_mask(&op2.ymm128(n), &op1.ymm128(n), status, mask);
1073   }
1074 
1075   check_exceptionsSSE(get_exception_flags(status));
1076 
1077   BX_WRITE_YMM_REGZ_VLEN(i->dst(), op2, len);
1078 
1079   BX_NEXT_INSTR(i);
1080 }
1081 
1082 #endif // BX_SUPPORT_AVX
1083