1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx_pfp.cc 13466 2018-02-16 07:57:32Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2011-2018 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28
29 #if BX_SUPPORT_AVX
30
31 extern float_status_t mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr);
32
33 extern float32 approximate_rsqrt(float32 op);
34 extern float32 approximate_rcp(float32 op);
35
36 #include "fpu/softfloat-compare.h"
37 #include "simd_pfp.h"
38 #include "simd_int.h"
39
print_state_AVX(void)40 void BX_CPU_C::print_state_AVX(void)
41 {
42 BX_DEBUG(("MXCSR: 0x%08x", BX_MXCSR_REGISTER));
43 for(int n=0;n<BX_XMM_REGISTERS;n++) {
44 #if BX_SUPPORT_EVEX
45 BxPackedZmmRegister vmm = BX_READ_AVX_REG(n);
46 BX_DEBUG(("VMM%02u: %08x%08x:%08x%08x:%08x%08x:%08x%08x:%08x%08x:%08x%08x:%08x%08x:%08x%08x", n,
47 vmm.zmm32u(15), vmm.zmm32u(14), vmm.zmm32u(13), vmm.zmm32u(12),
48 vmm.zmm32u(11), vmm.zmm32u(10), vmm.zmm32u(9), vmm.zmm32u(8),
49 vmm.zmm32u(7), vmm.zmm32u(6), vmm.zmm32u(5), vmm.zmm32u(4),
50 vmm.zmm32u(3), vmm.zmm32u(2), vmm.zmm32u(1), vmm.zmm32u(0)));
51 #else
52 BxPackedYmmRegister vmm = BX_READ_YMM_REG(n);
53 BX_DEBUG(("VMM%02u: %08x%08x:%08x%08x:%08x%08x:%08x%08x", n,
54 vmm.ymm32u(7), vmm.ymm32u(6), vmm.ymm32u(5), vmm.ymm32u(4),
55 vmm.ymm32u(3), vmm.ymm32u(2), vmm.ymm32u(1), vmm.ymm32u(0)));
56 #endif
57 }
58 }
59
60 /* Comparison predicate for VCMPSS/VCMPPS instructions */
61 float32_compare_method avx_compare32[32] = {
62 float32_eq_ordered_quiet,
63 float32_lt_ordered_signalling,
64 float32_le_ordered_signalling,
65 float32_unordered_quiet,
66 float32_neq_unordered_quiet,
67 float32_nlt_unordered_signalling,
68 float32_nle_unordered_signalling,
69 float32_ordered_quiet,
70 float32_eq_unordered_quiet,
71 float32_nge_unordered_signalling,
72 float32_ngt_unordered_signalling,
73 float32_false_quiet,
74 float32_neq_ordered_quiet,
75 float32_ge_ordered_signalling,
76 float32_gt_ordered_signalling,
77 float32_true_quiet,
78 float32_eq_ordered_signalling,
79 float32_lt_ordered_quiet,
80 float32_le_ordered_quiet,
81 float32_unordered_signalling,
82 float32_neq_unordered_signalling,
83 float32_nlt_unordered_quiet,
84 float32_nle_unordered_quiet,
85 float32_ordered_signalling,
86 float32_eq_unordered_signalling,
87 float32_nge_unordered_quiet,
88 float32_ngt_unordered_quiet,
89 float32_false_signalling,
90 float32_neq_ordered_signalling,
91 float32_ge_ordered_quiet,
92 float32_gt_ordered_quiet,
93 float32_true_signalling
94 };
95
96 /* Comparison predicate for VCMPSD/VCMPPD instructions */
97 float64_compare_method avx_compare64[32] = {
98 float64_eq_ordered_quiet,
99 float64_lt_ordered_signalling,
100 float64_le_ordered_signalling,
101 float64_unordered_quiet,
102 float64_neq_unordered_quiet,
103 float64_nlt_unordered_signalling,
104 float64_nle_unordered_signalling,
105 float64_ordered_quiet,
106 float64_eq_unordered_quiet,
107 float64_nge_unordered_signalling,
108 float64_ngt_unordered_signalling,
109 float64_false_quiet,
110 float64_neq_ordered_quiet,
111 float64_ge_ordered_signalling,
112 float64_gt_ordered_signalling,
113 float64_true_quiet,
114 float64_eq_ordered_signalling,
115 float64_lt_ordered_quiet,
116 float64_le_ordered_quiet,
117 float64_unordered_signalling,
118 float64_neq_unordered_signalling,
119 float64_nlt_unordered_quiet,
120 float64_nle_unordered_quiet,
121 float64_ordered_signalling,
122 float64_eq_unordered_signalling,
123 float64_nge_unordered_quiet,
124 float64_ngt_unordered_quiet,
125 float64_false_signalling,
126 float64_neq_ordered_signalling,
127 float64_ge_ordered_quiet,
128 float64_gt_ordered_quiet,
129 float64_true_signalling
130 };
131
132 /* Opcode: VEX.0F 51 (VEX.W ignore, VEX.VVV #UD) */
VSQRTPS_VpsWpsR(bxInstruction_c * i)133 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_VpsWpsR(bxInstruction_c *i)
134 {
135 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
136 unsigned len = i->getVL();
137
138 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
139 softfloat_status_word_rc_override(status, i);
140
141 for (unsigned n=0; n < len; n++) {
142 xmm_sqrtps(&op.vmm128(n), status);
143 }
144
145 check_exceptionsSSE(get_exception_flags(status));
146 BX_WRITE_AVX_REGZ(i->dst(), op, len);
147 BX_NEXT_INSTR(i);
148 }
149
150 /* Opcode: VEX.66.0F 51 (VEX.W ignore, VEX.VVV #UD) */
VSQRTPD_VpdWpdR(bxInstruction_c * i)151 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPD_VpdWpdR(bxInstruction_c *i)
152 {
153 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
154 unsigned len = i->getVL();
155
156 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
157 softfloat_status_word_rc_override(status, i);
158
159 for (unsigned n=0; n < len; n++) {
160 xmm_sqrtpd(&op.vmm128(n), status);
161 }
162
163 check_exceptionsSSE(get_exception_flags(status));
164 BX_WRITE_AVX_REGZ(i->dst(), op, len);
165 BX_NEXT_INSTR(i);
166 }
167
168 /* Opcode: VEX.NDS.F3.0F 51 (VEX.W ignore, VEX.L ignore) */
VSQRTSS_VssHpsWssR(bxInstruction_c * i)169 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSS_VssHpsWssR(bxInstruction_c *i)
170 {
171 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
172 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
173
174 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
175 softfloat_status_word_rc_override(status, i);
176 op1.xmm32u(0) = float32_sqrt(op2, status);
177 check_exceptionsSSE(get_exception_flags(status));
178
179 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
180 BX_NEXT_INSTR(i);
181 }
182
183 /* Opcode: VEX.NDS.F2.0F 51 (VEX.W ignore, VEX.L ignore) */
VSQRTSD_VsdHpdWsdR(bxInstruction_c * i)184 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSD_VsdHpdWsdR(bxInstruction_c *i)
185 {
186 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
187 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
188
189 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
190 softfloat_status_word_rc_override(status, i);
191 op1.xmm64u(0) = float64_sqrt(op2, status);
192 check_exceptionsSSE(get_exception_flags(status));
193
194 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
195 BX_NEXT_INSTR(i);
196 }
197
198 /* Opcode: VEX.0F 52 (VEX.W ignore, VEX.VVV #UD) */
VRSQRTPS_VpsWpsR(bxInstruction_c * i)199 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTPS_VpsWpsR(bxInstruction_c *i)
200 {
201 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
202 unsigned len = i->getVL();
203
204 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
205 op.ymm32u(n) = approximate_rsqrt(op.ymm32u(n));
206
207 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
208
209 BX_NEXT_INSTR(i);
210 }
211
212 /* Opcode: VEX.NDS.F3.0F 52 (VEX.W ignore, VEX.L ignore) */
VRSQRTSS_VssHpsWssR(bxInstruction_c * i)213 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRSQRTSS_VssHpsWssR(bxInstruction_c *i)
214 {
215 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
216 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
217
218 op1.xmm32u(0) = approximate_rsqrt(op2);
219
220 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
221 BX_NEXT_INSTR(i);
222 }
223
224 /* Opcode: VEX.0F 53 (VEX.W ignore, VEX.VVV #UD) */
VRCPPS_VpsWpsR(bxInstruction_c * i)225 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPPS_VpsWpsR(bxInstruction_c *i)
226 {
227 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
228 unsigned len = i->getVL();
229
230 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++)
231 op.vmm32u(n) = approximate_rcp(op.vmm32u(n));
232
233 BX_WRITE_AVX_REGZ(i->dst(), op, len);
234
235 BX_NEXT_INSTR(i);
236 }
237
238 /* Opcode: VEX.NDS.F3.0F 53 (VEX.W ignore, VEX.L ignore) */
VRCPSS_VssHpsWssR(bxInstruction_c * i)239 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRCPSS_VssHpsWssR(bxInstruction_c *i)
240 {
241 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
242 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
243
244 op1.xmm32u(0) = approximate_rcp(op2);
245
246 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
247 BX_NEXT_INSTR(i);
248 }
249
250 /* Opcode: VEX.NDS.0F 58 (VEX.W ignore) */
VADDPS_VpsHpsWpsR(bxInstruction_c * i)251 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDPS_VpsHpsWpsR(bxInstruction_c *i)
252 {
253 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
254 unsigned len = i->getVL();
255
256 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
257 softfloat_status_word_rc_override(status, i);
258
259 for (unsigned n=0; n < len; n++) {
260 xmm_addps(&op1.vmm128(n), &op2.vmm128(n), status);
261 }
262
263 check_exceptionsSSE(get_exception_flags(status));
264
265 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
266
267 BX_NEXT_INSTR(i);
268 }
269
270 /* Opcode: VEX.NDS.66.0F 58 (VEX.W ignore) */
VADDPD_VpdHpdWpdR(bxInstruction_c * i)271 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDPD_VpdHpdWpdR(bxInstruction_c *i)
272 {
273 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
274 unsigned len = i->getVL();
275
276 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
277 softfloat_status_word_rc_override(status, i);
278
279 for (unsigned n=0; n < len; n++) {
280 xmm_addpd(&op1.vmm128(n), &op2.vmm128(n), status);
281 }
282
283 check_exceptionsSSE(get_exception_flags(status));
284
285 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
286
287 BX_NEXT_INSTR(i);
288 }
289
290 /* Opcode: VEX.NDS.F3.0F 58 (VEX.W ignore, VEX.L ignore) */
VADDSS_VssHpsWssR(bxInstruction_c * i)291 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSS_VssHpsWssR(bxInstruction_c *i)
292 {
293 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
294 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
295
296 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
297 softfloat_status_word_rc_override(status, i);
298
299 op1.xmm32u(0) = float32_add(op1.xmm32u(0), op2, status);
300
301 check_exceptionsSSE(get_exception_flags(status));
302 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
303
304 BX_NEXT_INSTR(i);
305 }
306
307 /* Opcode: VEX.NDS.F2.0F 58 (VEX.W ignore, VEX.L ignore) */
VADDSD_VsdHpdWsdR(bxInstruction_c * i)308 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSD_VsdHpdWsdR(bxInstruction_c *i)
309 {
310 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
311 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
312
313 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
314 softfloat_status_word_rc_override(status, i);
315
316 op1.xmm64u(0) = float64_add(op1.xmm64u(0), op2, status);
317
318 check_exceptionsSSE(get_exception_flags(status));
319 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
320
321 BX_NEXT_INSTR(i);
322 }
323
324 /* Opcode: VEX.NDS.0F 59 (VEX.W ignore) */
VMULPS_VpsHpsWpsR(bxInstruction_c * i)325 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULPS_VpsHpsWpsR(bxInstruction_c *i)
326 {
327 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
328 unsigned len = i->getVL();
329
330 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
331 softfloat_status_word_rc_override(status, i);
332
333 for (unsigned n=0; n < len; n++) {
334 xmm_mulps(&op1.vmm128(n), &op2.vmm128(n), status);
335 }
336
337 check_exceptionsSSE(get_exception_flags(status));
338
339 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
340
341 BX_NEXT_INSTR(i);
342 }
343
344 /* Opcode: VEX.NDS.66.0F 59 (VEX.W ignore) */
VMULPD_VpdHpdWpdR(bxInstruction_c * i)345 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULPD_VpdHpdWpdR(bxInstruction_c *i)
346 {
347 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
348 unsigned len = i->getVL();
349
350 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
351 softfloat_status_word_rc_override(status, i);
352
353 for (unsigned n=0; n < len; n++) {
354 xmm_mulpd(&op1.vmm128(n), &op2.vmm128(n), status);
355 }
356
357 check_exceptionsSSE(get_exception_flags(status));
358
359 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
360
361 BX_NEXT_INSTR(i);
362 }
363
364 /* Opcode: VEX.NDS.F3.0F 59 (VEX.W ignore, VEX.L ignore) */
VMULSS_VssHpsWssR(bxInstruction_c * i)365 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSS_VssHpsWssR(bxInstruction_c *i)
366 {
367 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
368 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
369
370 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
371 softfloat_status_word_rc_override(status, i);
372
373 op1.xmm32u(0) = float32_mul(op1.xmm32u(0), op2, status);
374
375 check_exceptionsSSE(get_exception_flags(status));
376 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
377
378 BX_NEXT_INSTR(i);
379 }
380
381 /* Opcode: VEX.NDS.F2.0F 59 (VEX.W ignore, VEX.L ignore) */
VMULSD_VsdHpdWsdR(bxInstruction_c * i)382 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMULSD_VsdHpdWsdR(bxInstruction_c *i)
383 {
384 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
385 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
386
387 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
388 softfloat_status_word_rc_override(status, i);
389
390 op1.xmm64u(0) = float64_mul(op1.xmm64u(0), op2, status);
391
392 check_exceptionsSSE(get_exception_flags(status));
393 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
394
395 BX_NEXT_INSTR(i);
396 }
397
398 /* Opcode: VEX.NDS.0F 5C (VEX.W ignore) */
VSUBPS_VpsHpsWpsR(bxInstruction_c * i)399 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBPS_VpsHpsWpsR(bxInstruction_c *i)
400 {
401 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
402 unsigned len = i->getVL();
403
404 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
405 softfloat_status_word_rc_override(status, i);
406
407 for (unsigned n=0; n < len; n++) {
408 xmm_subps(&op1.vmm128(n), &op2.vmm128(n), status);
409 }
410
411 check_exceptionsSSE(get_exception_flags(status));
412
413 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
414
415 BX_NEXT_INSTR(i);
416 }
417
418 /* Opcode: VEX.NDS.66.0F 5C (VEX.W ignore) */
VSUBPD_VpdHpdWpdR(bxInstruction_c * i)419 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBPD_VpdHpdWpdR(bxInstruction_c *i)
420 {
421 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
422 unsigned len = i->getVL();
423
424 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
425 softfloat_status_word_rc_override(status, i);
426
427 for (unsigned n=0; n < len; n++) {
428 xmm_subpd(&op1.vmm128(n), &op2.vmm128(n), status);
429 }
430
431 check_exceptionsSSE(get_exception_flags(status));
432
433 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
434
435 BX_NEXT_INSTR(i);
436 }
437
438 /* Opcode: VEX.NDS.F3.0F 5C (VEX.W ignore, VEX.L ignore) */
VSUBSS_VssHpsWssR(bxInstruction_c * i)439 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBSS_VssHpsWssR(bxInstruction_c *i)
440 {
441 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
442 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
443
444 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
445 softfloat_status_word_rc_override(status, i);
446
447 op1.xmm32u(0) = float32_sub(op1.xmm32u(0), op2, status);
448
449 check_exceptionsSSE(get_exception_flags(status));
450 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
451
452 BX_NEXT_INSTR(i);
453 }
454
455 /* Opcode: VEX.NDS.F2.0F 5C (VEX.W ignore, VEX.L ignore) */
VSUBSD_VsdHpdWsdR(bxInstruction_c * i)456 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSUBSD_VsdHpdWsdR(bxInstruction_c *i)
457 {
458 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
459 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
460
461 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
462 softfloat_status_word_rc_override(status, i);
463
464 op1.xmm64u(0) = float64_sub(op1.xmm64u(0), op2, status);
465
466 check_exceptionsSSE(get_exception_flags(status));
467 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
468
469 BX_NEXT_INSTR(i);
470 }
471
472 /* Opcode: VEX.NDS.0F 5D (VEX.W ignore) */
VMINPS_VpsHpsWpsR(bxInstruction_c * i)473 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINPS_VpsHpsWpsR(bxInstruction_c *i)
474 {
475 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
476 unsigned len = i->getVL();
477
478 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
479 softfloat_status_word_rc_override(status, i);
480
481 for (unsigned n=0; n < len; n++) {
482 xmm_minps(&op1.vmm128(n), &op2.vmm128(n), status);
483 }
484
485 check_exceptionsSSE(get_exception_flags(status));
486
487 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
488
489 BX_NEXT_INSTR(i);
490 }
491
492 /* Opcode: VEX.NDS.66.0F 5D (VEX.W ignore) */
VMINPD_VpdHpdWpdR(bxInstruction_c * i)493 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINPD_VpdHpdWpdR(bxInstruction_c *i)
494 {
495 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
496 unsigned len = i->getVL();
497
498 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
499 softfloat_status_word_rc_override(status, i);
500
501 for (unsigned n=0; n < len; n++) {
502 xmm_minpd(&op1.vmm128(n), &op2.vmm128(n), status);
503 }
504
505 check_exceptionsSSE(get_exception_flags(status));
506
507 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
508
509 BX_NEXT_INSTR(i);
510 }
511
512 /* Opcode: VEX.NDS.F3.0F 5D (VEX.W ignore, VEX.L ignore) */
VMINSS_VssHpsWssR(bxInstruction_c * i)513 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINSS_VssHpsWssR(bxInstruction_c *i)
514 {
515 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
516 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
517
518 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
519 softfloat_status_word_rc_override(status, i);
520
521 op1.xmm32u(0) = float32_min(op1.xmm32u(0), op2, status);
522
523 check_exceptionsSSE(get_exception_flags(status));
524
525 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
526
527 BX_NEXT_INSTR(i);
528 }
529
530 /* Opcode: VEX.NDS.F2.0F 5D (VEX.W ignore, VEX.L ignore) */
VMINSD_VsdHpdWsdR(bxInstruction_c * i)531 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMINSD_VsdHpdWsdR(bxInstruction_c *i)
532 {
533 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
534 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
535
536 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
537 softfloat_status_word_rc_override(status, i);
538
539 op1.xmm64u(0) = float64_min(op1.xmm64u(0), op2, status);
540
541 check_exceptionsSSE(get_exception_flags(status));
542
543 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
544
545 BX_NEXT_INSTR(i);
546 }
547
548 /* Opcode: VEX.NDS.0F 5E (VEX.W ignore) */
VDIVPS_VpsHpsWpsR(bxInstruction_c * i)549 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVPS_VpsHpsWpsR(bxInstruction_c *i)
550 {
551 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
552 unsigned len = i->getVL();
553
554 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
555 softfloat_status_word_rc_override(status, i);
556
557 for (unsigned n=0; n < len; n++) {
558 xmm_divps(&op1.vmm128(n), &op2.vmm128(n), status);
559 }
560
561 check_exceptionsSSE(get_exception_flags(status));
562
563 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
564
565 BX_NEXT_INSTR(i);
566 }
567
568 /* Opcode: VEX.NDS.66.0F 5E (VEX.W ignore) */
VDIVPD_VpdHpdWpdR(bxInstruction_c * i)569 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVPD_VpdHpdWpdR(bxInstruction_c *i)
570 {
571 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
572 unsigned len = i->getVL();
573
574 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
575 softfloat_status_word_rc_override(status, i);
576
577 for (unsigned n=0; n < len; n++) {
578 xmm_divpd(&op1.vmm128(n), &op2.vmm128(n), status);
579 }
580
581 check_exceptionsSSE(get_exception_flags(status));
582
583 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
584
585 BX_NEXT_INSTR(i);
586 }
587
588 /* Opcode: VEX.NDS.F3.0F 5E (VEX.W ignore, VEX.L ignore) */
VDIVSS_VssHpsWssR(bxInstruction_c * i)589 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVSS_VssHpsWssR(bxInstruction_c *i)
590 {
591 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
592 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
593
594 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
595 softfloat_status_word_rc_override(status, i);
596
597 op1.xmm32u(0) = float32_div(op1.xmm32u(0), op2, status);
598
599 check_exceptionsSSE(get_exception_flags(status));
600 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
601
602 BX_NEXT_INSTR(i);
603 }
604
605 /* Opcode: VEX.NDS.F2.0F 5E (VEX.W ignore, VEX.L ignore) */
VDIVSD_VsdHpdWsdR(bxInstruction_c * i)606 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDIVSD_VsdHpdWsdR(bxInstruction_c *i)
607 {
608 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
609 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
610
611 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
612 softfloat_status_word_rc_override(status, i);
613
614 op1.xmm64u(0) = float64_div(op1.xmm64u(0), op2, status);
615
616 check_exceptionsSSE(get_exception_flags(status));
617 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
618
619 BX_NEXT_INSTR(i);
620 }
621
622 /* Opcode: VEX.NDS.0F 5F (VEX.W ignore) */
VMAXPS_VpsHpsWpsR(bxInstruction_c * i)623 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXPS_VpsHpsWpsR(bxInstruction_c *i)
624 {
625 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
626 unsigned len = i->getVL();
627
628 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
629 softfloat_status_word_rc_override(status, i);
630
631 for (unsigned n=0; n < len; n++) {
632 xmm_maxps(&op1.vmm128(n), &op2.vmm128(n), status);
633 }
634
635 check_exceptionsSSE(get_exception_flags(status));
636
637 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
638
639 BX_NEXT_INSTR(i);
640 }
641
642 /* Opcode: VEX.NDS.66.0F 5F (VEX.W ignore) */
VMAXPD_VpdHpdWpdR(bxInstruction_c * i)643 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXPD_VpdHpdWpdR(bxInstruction_c *i)
644 {
645 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
646 unsigned len = i->getVL();
647
648 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
649 softfloat_status_word_rc_override(status, i);
650
651 for (unsigned n=0; n < len; n++) {
652 xmm_maxpd(&op1.vmm128(n), &op2.vmm128(n), status);
653 }
654
655 check_exceptionsSSE(get_exception_flags(status));
656
657 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
658
659 BX_NEXT_INSTR(i);
660 }
661
662 /* Opcode: VEX.NDS.F3.0F 5F (VEX.W ignore, VEX.L ignore) */
VMAXSS_VssHpsWssR(bxInstruction_c * i)663 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSS_VssHpsWssR(bxInstruction_c *i)
664 {
665 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
666 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
667
668 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
669 softfloat_status_word_rc_override(status, i);
670
671 op1.xmm32u(0) = float32_max(op1.xmm32u(0), op2, status);
672
673 check_exceptionsSSE(get_exception_flags(status));
674
675 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
676
677 BX_NEXT_INSTR(i);
678 }
679
680 /* Opcode: VEX.NDS.F2.0F 5F (VEX.W ignore, VEX.L ignore) */
VMAXSD_VsdHpdWsdR(bxInstruction_c * i)681 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VMAXSD_VsdHpdWsdR(bxInstruction_c *i)
682 {
683 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
684 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
685
686 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
687 softfloat_status_word_rc_override(status, i);
688
689 op1.xmm64u(0) = float64_max(op1.xmm64u(0), op2, status);
690
691 check_exceptionsSSE(get_exception_flags(status));
692
693 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
694
695 BX_NEXT_INSTR(i);
696 }
697
698 /* Opcode: VEX.NDS.66.0F 7C (VEX.W ignore) */
VHADDPD_VpdHpdWpdR(bxInstruction_c * i)699 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VHADDPD_VpdHpdWpdR(bxInstruction_c *i)
700 {
701 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
702 unsigned len = i->getVL();
703
704 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
705 softfloat_status_word_rc_override(status, i);
706
707 for (unsigned n=0; n < len; n++) {
708 xmm_haddpd(&op1.vmm128(n), &op2.vmm128(n), status);
709 }
710
711 check_exceptionsSSE(get_exception_flags(status));
712
713 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
714
715 BX_NEXT_INSTR(i);
716 }
717
718 /* Opcode: VEX.NDS.F2.0F 7C (VEX.W ignore) */
VHADDPS_VpsHpsWpsR(bxInstruction_c * i)719 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VHADDPS_VpsHpsWpsR(bxInstruction_c *i)
720 {
721 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
722 unsigned len = i->getVL();
723
724 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
725 softfloat_status_word_rc_override(status, i);
726
727 for (unsigned n=0; n < len; n++) {
728 xmm_haddps(&op1.vmm128(n), &op2.vmm128(n), status);
729 }
730
731 check_exceptionsSSE(get_exception_flags(status));
732
733 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
734
735 BX_NEXT_INSTR(i);
736 }
737
738 /* Opcode: VEX.NDS.66.0F 7D (VEX.W ignore) */
VHSUBPD_VpdHpdWpdR(bxInstruction_c * i)739 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VHSUBPD_VpdHpdWpdR(bxInstruction_c *i)
740 {
741 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
742 unsigned len = i->getVL();
743
744 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
745 softfloat_status_word_rc_override(status, i);
746
747 for (unsigned n=0; n < len; n++) {
748 xmm_hsubpd(&op1.vmm128(n), &op2.vmm128(n), status);
749 }
750
751 check_exceptionsSSE(get_exception_flags(status));
752
753 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
754
755 BX_NEXT_INSTR(i);
756 }
757
758 /* Opcode: VEX.NDS.F2.0F 7D (VEX.W ignore) */
VHSUBPS_VpsHpsWpsR(bxInstruction_c * i)759 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VHSUBPS_VpsHpsWpsR(bxInstruction_c *i)
760 {
761 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
762 unsigned len = i->getVL();
763
764 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
765 softfloat_status_word_rc_override(status, i);
766
767 for (unsigned n=0; n < len; n++) {
768 xmm_hsubps(&op1.vmm128(n), &op2.vmm128(n), status);
769 }
770
771 check_exceptionsSSE(get_exception_flags(status));
772
773 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
774
775 BX_NEXT_INSTR(i);
776 }
777
778 /* Opcode: VEX.NDS.0F C2 (VEX.W ignore) */
VCMPPS_VpsHpsWpsIbR(bxInstruction_c * i)779 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPS_VpsHpsWpsIbR(bxInstruction_c *i)
780 {
781 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
782 unsigned len = i->getVL();
783
784 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
785 int ib = i->Ib() & 0x1F;
786
787 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
788 op1.ymm32u(n) = avx_compare32[ib](op1.ymm32u(n), op2.ymm32u(n), status) ? 0xFFFFFFFF : 0;
789 }
790
791 check_exceptionsSSE(get_exception_flags(status));
792 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
793
794 BX_NEXT_INSTR(i);
795 }
796
797 /* Opcode: VEX.NDS.66.0F C2 (VEX.W ignore) */
VCMPPD_VpdHpdWpdIbR(bxInstruction_c * i)798 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPD_VpdHpdWpdIbR(bxInstruction_c *i)
799 {
800 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
801 unsigned len = i->getVL();
802
803 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
804 int ib = i->Ib() & 0x1F;
805
806 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
807 op1.ymm64u(n) = avx_compare64[ib](op1.ymm64u(n), op2.ymm64u(n), status) ?
808 BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
809 }
810
811 check_exceptionsSSE(get_exception_flags(status));
812 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
813
814 BX_NEXT_INSTR(i);
815 }
816
817 /* Opcode: VEX.NDS.F2.0F C2 (VEX.W ignore) */
VCMPSD_VsdHpdWsdIbR(bxInstruction_c * i)818 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPSD_VsdHpdWsdIbR(bxInstruction_c *i)
819 {
820 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
821 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
822
823 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
824 int ib = i->Ib() & 0x1F;
825
826 if(avx_compare64[ib](op1.xmm64u(0), op2, status)) {
827 op1.xmm64u(0) = BX_CONST64(0xFFFFFFFFFFFFFFFF);
828 } else {
829 op1.xmm64u(0) = 0;
830 }
831
832 check_exceptionsSSE(get_exception_flags(status));
833 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
834
835 BX_NEXT_INSTR(i);
836 }
837
838 /* Opcode: VEX.NDS.F3.0F C2 (VEX.W ignore) */
VCMPSS_VssHpsWssIbR(bxInstruction_c * i)839 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPSS_VssHpsWssIbR(bxInstruction_c *i)
840 {
841 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
842 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
843
844 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
845 int ib = i->Ib() & 0x1F;
846
847 if(avx_compare32[ib](op1.xmm32u(0), op2, status)) {
848 op1.xmm32u(0) = 0xFFFFFFFF;
849 } else {
850 op1.xmm32u(0) = 0;
851 }
852
853 check_exceptionsSSE(get_exception_flags(status));
854 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
855
856 BX_NEXT_INSTR(i);
857 }
858
859 /* Opcode: VEX.NDS.F2.0F D0 (VEX.W ignore) */
VADDSUBPD_VpdHpdWpdR(bxInstruction_c * i)860 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSUBPD_VpdHpdWpdR(bxInstruction_c *i)
861 {
862 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
863 unsigned len = i->getVL();
864
865 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
866 softfloat_status_word_rc_override(status, i);
867
868 for (unsigned n=0; n < len; n++) {
869 xmm_addsubpd(&op1.vmm128(n), &op2.vmm128(n), status);
870 }
871
872 check_exceptionsSSE(get_exception_flags(status));
873
874 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
875
876 BX_NEXT_INSTR(i);
877 }
878
879 /* Opcode: VEX.NDS.F2.0F D0 (VEX.W ignore) */
VADDSUBPS_VpsHpsWpsR(bxInstruction_c * i)880 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VADDSUBPS_VpsHpsWpsR(bxInstruction_c *i)
881 {
882 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
883 unsigned len = i->getVL();
884
885 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
886 softfloat_status_word_rc_override(status, i);
887
888 for (unsigned n=0; n < len; n++) {
889 xmm_addsubps(&op1.vmm128(n), &op2.vmm128(n), status);
890 }
891
892 check_exceptionsSSE(get_exception_flags(status));
893
894 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
895
896 BX_NEXT_INSTR(i);
897 }
898
899 /* Opcode: VEX.66.0F.38.0E (VEX.W=0, VEX.VVV #UD) */
VTESTPS_VpsWpsR(bxInstruction_c * i)900 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPS_VpsWpsR(bxInstruction_c *i)
901 {
902 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->dst()), op2 = BX_READ_YMM_REG(i->src());
903 unsigned len = i->getVL();
904
905 unsigned result = EFlagsZFMask | EFlagsCFMask;
906
907 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
908 if ((op2.ymm64u(n) & op1.ymm64u(n) & BX_CONST64(0x8000000080000000)) != 0)
909 result &= ~EFlagsZFMask;
910
911 if ((op2.ymm64u(n) & ~op1.ymm64u(n) & BX_CONST64(0x8000000080000000)) != 0)
912 result &= ~EFlagsCFMask;
913 }
914
915 setEFlagsOSZAPC(result);
916
917 BX_NEXT_INSTR(i);
918 }
919
920 /* Opcode: VEX.66.0F.38.0F (VEX.W=0, VEX.VVV #UD) */
VTESTPD_VpdWpdR(bxInstruction_c * i)921 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VTESTPD_VpdWpdR(bxInstruction_c *i)
922 {
923 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->dst()), op2 = BX_READ_YMM_REG(i->src());
924 unsigned len = i->getVL();
925
926 unsigned result = EFlagsZFMask | EFlagsCFMask;
927
928 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
929 if ((op2.ymm64u(n) & op1.ymm64u(n) & BX_CONST64(0x8000000000000000)) != 0)
930 result &= ~EFlagsZFMask;
931
932 if ((op2.ymm64u(n) & ~op1.ymm64u(n) & BX_CONST64(0x8000000000000000)) != 0)
933 result &= ~EFlagsCFMask;
934 }
935
936 setEFlagsOSZAPC(result);
937
938 BX_NEXT_INSTR(i);
939 }
940
941 /* Opcode: VEX.66.0F.3A.08 (VEX.W ignore, VEX.VVV #UD) */
VROUNDPS_VpsWpsIbR(bxInstruction_c * i)942 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPS_VpsWpsIbR(bxInstruction_c *i)
943 {
944 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
945 unsigned len = i->getVL();
946
947 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
948 Bit8u control = i->Ib();
949
950 // override MXCSR rounding mode with control coming from imm8
951 if ((control & 0x4) == 0)
952 status.float_rounding_mode = control & 0x3;
953 // ignore precision exception result
954 if (control & 0x8)
955 status.float_suppress_exception |= float_flag_inexact;
956
957 for(unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
958 op.ymm32u(n) = float32_round_to_int(op.ymm32u(n), status);
959 }
960
961 check_exceptionsSSE(get_exception_flags(status));
962
963 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
964
965 BX_NEXT_INSTR(i);
966 }
967
968 /* Opcode: VEX.66.0F.3A.09 (VEX.W ignore, VEX.VVV #UD) */
VROUNDPD_VpdWpdIbR(bxInstruction_c * i)969 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDPD_VpdWpdIbR(bxInstruction_c *i)
970 {
971 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
972 unsigned len = i->getVL();
973
974 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
975 Bit8u control = i->Ib();
976
977 // override MXCSR rounding mode with control coming from imm8
978 if ((control & 0x4) == 0)
979 status.float_rounding_mode = control & 0x3;
980 // ignore precision exception result
981 if (control & 0x8)
982 status.float_suppress_exception |= float_flag_inexact;
983
984 for(unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
985 op.ymm64u(n) = float64_round_to_int(op.ymm64u(n), status);
986 }
987
988 check_exceptionsSSE(get_exception_flags(status));
989
990 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op, len);
991
992 BX_NEXT_INSTR(i);
993 }
994
995 /* Opcode: VEX.66.0F.3A.0A (VEX.W ignore, VEX.L ignore) */
VROUNDSS_VssHpsWssIbR(bxInstruction_c * i)996 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDSS_VssHpsWssIbR(bxInstruction_c *i)
997 {
998 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
999 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
1000
1001 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1002 Bit8u control = i->Ib();
1003
1004 // override MXCSR rounding mode with control coming from imm8
1005 if ((control & 0x4) == 0)
1006 status.float_rounding_mode = control & 0x3;
1007 // ignore precision exception result
1008 if (control & 0x8)
1009 status.float_suppress_exception |= float_flag_inexact;
1010
1011 op1.xmm32u(0) = float32_round_to_int(op2, status);
1012
1013 check_exceptionsSSE(get_exception_flags(status));
1014
1015 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1016
1017 BX_NEXT_INSTR(i);
1018 }
1019
1020 /* Opcode: VEX.66.0F.3A.0B (VEX.W ignore, VEX.L ignore) */
VROUNDSD_VsdHpdWsdIbR(bxInstruction_c * i)1021 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VROUNDSD_VsdHpdWsdIbR(bxInstruction_c *i)
1022 {
1023 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1024 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
1025
1026 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1027 Bit8u control = i->Ib();
1028
1029 // override MXCSR rounding mode with control coming from imm8
1030 if ((control & 0x4) == 0)
1031 status.float_rounding_mode = control & 0x3;
1032 // ignore precision exception result
1033 if (control & 0x8)
1034 status.float_suppress_exception |= float_flag_inexact;
1035
1036 op1.xmm64u(0) = float64_round_to_int(op2, status);
1037
1038 check_exceptionsSSE(get_exception_flags(status));
1039
1040 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1041
1042 BX_NEXT_INSTR(i);
1043 }
1044
1045 /* Opcode: VEX.66.0F.3A.40 (VEX.W ignore) */
VDPPS_VpsHpsWpsIbR(bxInstruction_c * i)1046 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VDPPS_VpsHpsWpsIbR(bxInstruction_c *i)
1047 {
1048 BxPackedYmmRegister op1 = BX_READ_YMM_REG(i->src1()), op2 = BX_READ_YMM_REG(i->src2());
1049 unsigned len = i->getVL();
1050 Bit8u mask = i->Ib();
1051
1052 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1053
1054 for (unsigned n=0; n < len; n++) {
1055
1056 // op1: [A, B, C, D]
1057 // op2: [E, F, G, H]
1058
1059 // after multiplication: op1 = [AE, BF, CG, DH]
1060 xmm_mulps_mask(&op1.ymm128(n), &op2.ymm128(n), status, mask >> 4);
1061
1062 // shuffle op2 = [BF, AE, DH, CG]
1063 xmm_shufps(&op2.ymm128(n), &op1.ymm128(n), &op1.ymm128(n), 0xb1);
1064
1065 // op2 = [(BF+AE), (AE+BF), (DH+CG), (CG+DH)]
1066 xmm_addps(&op2.ymm128(n), &op1.ymm128(n), status);
1067
1068 // shuffle op1 = [(DH+CG), (CG+DH), (BF+AE), (AE+BF)]
1069 xmm_shufpd(&op1.ymm128(n), &op2.ymm128(n), &op2.ymm128(n), 0x1);
1070
1071 // op2 = [(BF+AE)+(DH+CG), (AE+BF)+(CG+DH), (DH+CG)+(BF+AE), (CG+DH)+(AE+BF)]
1072 xmm_addps_mask(&op2.ymm128(n), &op1.ymm128(n), status, mask);
1073 }
1074
1075 check_exceptionsSSE(get_exception_flags(status));
1076
1077 BX_WRITE_YMM_REGZ_VLEN(i->dst(), op2, len);
1078
1079 BX_NEXT_INSTR(i);
1080 }
1081
1082 #endif // BX_SUPPORT_AVX
1083