1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx512_pfp.cc 13466 2018-02-16 07:57:32Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2013-2018 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28
29 #if BX_SUPPORT_EVEX
30
31 extern float_status_t mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr);
32
33 #include "fpu/softfloat-compare.h"
34 #include "simd_int.h"
35 #include "simd_pfp.h"
36
37 #define EVEX_OP_PACKED_SINGLE(HANDLER, func) \
38 void BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
39 { \
40 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
41 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask()); \
42 unsigned len = i->getVL(); \
43 \
44 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR); \
45 softfloat_status_word_rc_override(status, i); \
46 \
47 for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4) \
48 (func)(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask); \
49 \
50 check_exceptionsSSE(get_exception_flags(status)); \
51 \
52 if (! i->isZeroMasking()) { \
53 for (unsigned n=0; n < len; n++, mask >>= 4) \
54 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
55 \
56 BX_CLEAR_AVX_REGZ(i->dst(), len); \
57 } \
58 else { \
59 BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
60 } \
61 \
62 BX_NEXT_INSTR(i); \
63 }
64
EVEX_OP_PACKED_SINGLE(VADDPS_MASK_VpsHpsWpsR,xmm_addps_mask)65 EVEX_OP_PACKED_SINGLE(VADDPS_MASK_VpsHpsWpsR, xmm_addps_mask)
66 EVEX_OP_PACKED_SINGLE(VSUBPS_MASK_VpsHpsWpsR, xmm_subps_mask)
67 EVEX_OP_PACKED_SINGLE(VMULPS_MASK_VpsHpsWpsR, xmm_mulps_mask)
68 EVEX_OP_PACKED_SINGLE(VDIVPS_MASK_VpsHpsWpsR, xmm_divps_mask)
69 EVEX_OP_PACKED_SINGLE(VMAXPS_MASK_VpsHpsWpsR, xmm_maxps_mask)
70 EVEX_OP_PACKED_SINGLE(VMINPS_MASK_VpsHpsWpsR, xmm_minps_mask)
71 EVEX_OP_PACKED_SINGLE(VSCALEFPS_MASK_VpsHpsWpsR, xmm_scalefps_mask)
72
73 #define EVEX_OP_PACKED_DOUBLE(HANDLER, func) \
74 void BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
75 { \
76 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()); \
77 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask()); \
78 unsigned len = i->getVL(); \
79 \
80 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR); \
81 softfloat_status_word_rc_override(status, i); \
82 \
83 for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2) \
84 (func)(&op1.vmm128(n), &op2.vmm128(n), status, tmp_mask); \
85 \
86 check_exceptionsSSE(get_exception_flags(status)); \
87 \
88 if (! i->isZeroMasking()) { \
89 for (unsigned n=0; n < len; n++, mask >>= 2) \
90 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask); \
91 \
92 BX_CLEAR_AVX_REGZ(i->dst(), len); \
93 } \
94 else { \
95 BX_WRITE_AVX_REGZ(i->dst(), op1, len); \
96 } \
97 \
98 BX_NEXT_INSTR(i); \
99 }
100
101 EVEX_OP_PACKED_DOUBLE(VADDPD_MASK_VpdHpdWpdR, xmm_addpd_mask)
102 EVEX_OP_PACKED_DOUBLE(VSUBPD_MASK_VpdHpdWpdR, xmm_subpd_mask)
103 EVEX_OP_PACKED_DOUBLE(VMULPD_MASK_VpdHpdWpdR, xmm_mulpd_mask)
104 EVEX_OP_PACKED_DOUBLE(VDIVPD_MASK_VpdHpdWpdR, xmm_divpd_mask)
105 EVEX_OP_PACKED_DOUBLE(VMAXPD_MASK_VpdHpdWpdR, xmm_maxpd_mask)
106 EVEX_OP_PACKED_DOUBLE(VMINPD_MASK_VpdHpdWpdR, xmm_minpd_mask)
107 EVEX_OP_PACKED_DOUBLE(VSCALEFPD_MASK_VpdHpdWpdR, xmm_scalefpd_mask)
108
109 #define EVEX_OP_SCALAR_SINGLE(HANDLER, func) \
110 void BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
111 { \
112 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()); \
113 \
114 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) { \
115 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2()); \
116 \
117 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR); \
118 softfloat_status_word_rc_override(status, i); \
119 op1.xmm32u(0) = (func)(op1.xmm32u(0), op2, status); \
120 check_exceptionsSSE(get_exception_flags(status)); \
121 } \
122 else { \
123 if (i->isZeroMasking()) \
124 op1.xmm32u(0) = 0; \
125 else \
126 op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst()); \
127 } \
128 \
129 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); \
130 BX_NEXT_INSTR(i); \
131 }
132
133 EVEX_OP_SCALAR_SINGLE(VADDSS_MASK_VssHpsWssR, float32_add)
134 EVEX_OP_SCALAR_SINGLE(VSUBSS_MASK_VssHpsWssR, float32_sub)
135 EVEX_OP_SCALAR_SINGLE(VMULSS_MASK_VssHpsWssR, float32_mul)
136 EVEX_OP_SCALAR_SINGLE(VDIVSS_MASK_VssHpsWssR, float32_div)
137 EVEX_OP_SCALAR_SINGLE(VMINSS_MASK_VssHpsWssR, float32_min)
138 EVEX_OP_SCALAR_SINGLE(VMAXSS_MASK_VssHpsWssR, float32_max)
139 EVEX_OP_SCALAR_SINGLE(VSCALEFSS_MASK_VssHpsWssR, float32_scalef)
140
141 #define EVEX_OP_SCALAR_DOUBLE(HANDLER, func) \
142 void BX_CPP_AttrRegparmN(1) BX_CPU_C:: HANDLER (bxInstruction_c *i) \
143 { \
144 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1()); \
145 \
146 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) { \
147 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2()); \
148 \
149 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR); \
150 softfloat_status_word_rc_override(status, i); \
151 op1.xmm64u(0) = (func)(op1.xmm64u(0), op2, status); \
152 check_exceptionsSSE(get_exception_flags(status)); \
153 } \
154 else { \
155 if (i->isZeroMasking()) \
156 op1.xmm64u(0) = 0; \
157 else \
158 op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst()); \
159 } \
160 \
161 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1); \
162 BX_NEXT_INSTR(i); \
163 }
164
165 EVEX_OP_SCALAR_DOUBLE(VADDSD_MASK_VsdHpdWsdR, float64_add)
166 EVEX_OP_SCALAR_DOUBLE(VSUBSD_MASK_VsdHpdWsdR, float64_sub)
167 EVEX_OP_SCALAR_DOUBLE(VMULSD_MASK_VsdHpdWsdR, float64_mul)
168 EVEX_OP_SCALAR_DOUBLE(VDIVSD_MASK_VsdHpdWsdR, float64_div)
169 EVEX_OP_SCALAR_DOUBLE(VMINSD_MASK_VsdHpdWsdR, float64_min)
170 EVEX_OP_SCALAR_DOUBLE(VMAXSD_MASK_VsdHpdWsdR, float64_max)
171 EVEX_OP_SCALAR_DOUBLE(VSCALEFSD_MASK_VsdHpdWsdR, float64_scalef)
172
173 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPS_MASK_VpsWpsR(bxInstruction_c *i)
174 {
175 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
176 unsigned mask = BX_READ_16BIT_OPMASK(i->opmask());
177 unsigned len = i->getVL();
178
179 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
180 softfloat_status_word_rc_override(status, i);
181
182 for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4)
183 xmm_sqrtps_mask(&op.vmm128(n), status, tmp_mask);
184
185 check_exceptionsSSE(get_exception_flags(status));
186
187 if (! i->isZeroMasking()) {
188 for (unsigned n=0; n < len; n++, mask >>= 4)
189 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
190 BX_CLEAR_AVX_REGZ(i->dst(), len);
191 }
192 else {
193 BX_WRITE_AVX_REGZ(i->dst(), op, len);
194 }
195
196 BX_NEXT_INSTR(i);
197 }
198
VSQRTPD_MASK_VpdWpdR(bxInstruction_c * i)199 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTPD_MASK_VpdWpdR(bxInstruction_c *i)
200 {
201 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
202 unsigned mask = BX_READ_8BIT_OPMASK(i->opmask());
203 unsigned len = i->getVL();
204
205 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
206 softfloat_status_word_rc_override(status, i);
207
208 for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2)
209 xmm_sqrtpd_mask(&op.vmm128(n), status, tmp_mask);
210
211 check_exceptionsSSE(get_exception_flags(status));
212
213 if (! i->isZeroMasking()) {
214 for (unsigned n=0; n < len; n++, mask >>= 2)
215 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
216 BX_CLEAR_AVX_REGZ(i->dst(), len);
217 }
218 else {
219 BX_WRITE_AVX_REGZ(i->dst(), op, len);
220 }
221
222 BX_NEXT_INSTR(i);
223 }
224
VSQRTSS_MASK_VssHpsWssR(bxInstruction_c * i)225 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSS_MASK_VssHpsWssR(bxInstruction_c *i)
226 {
227 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
228
229 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
230 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
231
232 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
233 softfloat_status_word_rc_override(status, i);
234 op1.xmm32u(0) = float32_sqrt(op2, status);
235 check_exceptionsSSE(get_exception_flags(status));
236 }
237 else {
238 if (i->isZeroMasking())
239 op1.xmm32u(0) = 0;
240 else
241 op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
242 }
243
244 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
245 BX_NEXT_INSTR(i);
246 }
247
VSQRTSD_MASK_VsdHpdWsdR(bxInstruction_c * i)248 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSQRTSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
249 {
250 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
251
252 if (BX_SCALAR_ELEMENT_MASK(i->opmask())) {
253 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
254
255 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
256 softfloat_status_word_rc_override(status, i);
257 op1.xmm64u(0) = float64_sqrt(op2, status);
258 check_exceptionsSSE(get_exception_flags(status));
259 }
260 else {
261 if (i->isZeroMasking())
262 op1.xmm64u(0) = 0;
263 else
264 op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
265 }
266
267 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
268 BX_NEXT_INSTR(i);
269 }
270
271 // compare
272
273 extern float32_compare_method avx_compare32[32];
274 extern float64_compare_method avx_compare64[32];
275
VCMPPS_MASK_KGwHpsWpsIbR(bxInstruction_c * i)276 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPS_MASK_KGwHpsWpsIbR(bxInstruction_c *i)
277 {
278 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
279 unsigned num_elements = DWORD_ELEMENTS(i->getVL());
280
281 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
282 Bit32u result = 0;
283
284 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
285 softfloat_status_word_rc_override(status, i);
286 int ib = i->Ib() & 0x1F;
287
288 for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
289 if (opmask & mask) {
290 if (avx_compare32[ib](op1.vmm32u(n), op2.vmm32u(n), status)) result |= mask;
291 }
292 }
293
294 check_exceptionsSSE(get_exception_flags(status));
295 BX_WRITE_OPMASK(i->dst(), result);
296
297 BX_NEXT_INSTR(i);
298 }
299
VCMPPD_MASK_KGbHpdWpdIbR(bxInstruction_c * i)300 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPPD_MASK_KGbHpdWpdIbR(bxInstruction_c *i)
301 {
302 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
303 unsigned num_elements = QWORD_ELEMENTS(i->getVL());
304
305 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
306 Bit32u result = 0;
307
308 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
309 softfloat_status_word_rc_override(status, i);
310 int ib = i->Ib() & 0x1F;
311
312 for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
313 if (opmask & mask) {
314 if (avx_compare64[ib](op1.vmm64u(n), op2.vmm64u(n), status)) result |= mask;
315 }
316 }
317
318 check_exceptionsSSE(get_exception_flags(status));
319 BX_WRITE_OPMASK(i->dst(), result);
320
321 BX_NEXT_INSTR(i);
322 }
323
VCMPSD_MASK_KGbHsdWsdIbR(bxInstruction_c * i)324 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPSD_MASK_KGbHsdWsdIbR(bxInstruction_c *i)
325 {
326 Bit32u result = 0;
327
328 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
329 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->src1());
330 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
331
332 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
333 softfloat_status_word_rc_override(status, i);
334 if (avx_compare64[i->Ib() & 0x1F](op1, op2, status)) result = 1;
335 check_exceptionsSSE(get_exception_flags(status));
336 }
337
338 BX_WRITE_OPMASK(i->dst(), result);
339 BX_NEXT_INSTR(i);
340 }
341
VCMPSS_MASK_KGbHssWssIbR(bxInstruction_c * i)342 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCMPSS_MASK_KGbHssWssIbR(bxInstruction_c *i)
343 {
344 Bit32u result = 0;
345
346 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
347 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->src1());
348 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
349
350 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
351 softfloat_status_word_rc_override(status, i);
352 if (avx_compare32[i->Ib() & 0x1F](op1, op2, status)) result = 1;
353 check_exceptionsSSE(get_exception_flags(status));
354 }
355
356 BX_WRITE_OPMASK(i->dst(), result);
357 BX_NEXT_INSTR(i);
358 }
359
360 // fixup
361
362 enum {
363 BX_FIXUPIMM_QNAN_TOKEN = 0,
364 BX_FIXUPIMM_SNAN_TOKEN = 1,
365 BX_FIXUPIMM_ZERO_VALUE_TOKEN = 2,
366 BX_FIXUPIMM_POS_ONE_VALUE_TOKEN = 3,
367 BX_FIXUPIMM_NEG_INF_TOKEN = 4,
368 BX_FIXUPIMM_POS_INF_TOKEN = 5,
369 BX_FIXUPIMM_NEG_VALUE_TOKEN = 6,
370 BX_FIXUPIMM_POS_VALUE_TOKEN = 7
371 };
372
373 #include "fpu/softfloat-specialize.h"
374
375 const float32 float32_value_90 = 0x42b40000;
376 const float32 float32_pi_half = 0x3fc90fdb;
377 const float32 float32_positive_half = 0x3f000000;
378
379 const float64 float64_value_90 = BX_CONST64(0x4056800000000000);
380 const float64 float64_pi_half = BX_CONST64(0x3ff921fb54442d18);
381 const float64 float64_positive_half = BX_CONST64(0x3fe0000000000000);
382
float32_fixupimm(float32 dst,float32 op1,Bit32u op2,unsigned imm8,float_status_t & status)383 float32 float32_fixupimm(float32 dst, float32 op1, Bit32u op2, unsigned imm8, float_status_t &status)
384 {
385 float32 tmp_op1 = op1;
386 if (get_denormals_are_zeros(status))
387 tmp_op1 = float32_denormal_to_zero(op1);
388
389 float_class_t op1_class = float32_class(tmp_op1);
390 int sign = float32_sign(tmp_op1);
391 unsigned token = 0, ie_fault_mask = 0, divz_fault_mask = 0;
392
393 switch(op1_class)
394 {
395 case float_zero:
396 token = BX_FIXUPIMM_ZERO_VALUE_TOKEN;
397 divz_fault_mask = 0x01;
398 ie_fault_mask = 0x02;
399 break;
400
401 case float_negative_inf:
402 token = BX_FIXUPIMM_NEG_INF_TOKEN;
403 ie_fault_mask = 0x20;
404 break;
405
406 case float_positive_inf:
407 token = BX_FIXUPIMM_POS_INF_TOKEN;
408 ie_fault_mask = 0x80;
409 break;
410
411 case float_SNaN:
412 token = BX_FIXUPIMM_SNAN_TOKEN;
413 ie_fault_mask = 0x10;
414 break;
415
416 case float_QNaN:
417 token = BX_FIXUPIMM_QNAN_TOKEN;
418 break;
419
420 case float_denormal:
421 case float_normalized:
422 if (tmp_op1 == float32_positive_one) {
423 token = BX_FIXUPIMM_POS_ONE_VALUE_TOKEN;
424 divz_fault_mask = 0x04;
425 ie_fault_mask = 0x08;
426 }
427 else {
428 if (sign) {
429 token = BX_FIXUPIMM_NEG_VALUE_TOKEN;
430 ie_fault_mask = 0x40;
431 }
432 else {
433 token = BX_FIXUPIMM_POS_VALUE_TOKEN;
434 }
435 }
436 break;
437
438 default:
439 break;
440 }
441
442 if (imm8 & ie_fault_mask)
443 float_raise(status, float_flag_invalid);
444
445 if (imm8 & divz_fault_mask)
446 float_raise(status, float_flag_divbyzero);
447
448 // access response table, each response is encoded with 4-bit value in the op2
449 unsigned token_response = (op2 >> (token*4)) & 0xf;
450
451 switch(token_response) {
452 case 0x1: // apply DAZ to the op1 value
453 op1 = tmp_op1;
454 break;
455 case 0x2: op1 = convert_to_QNaN(tmp_op1); break;
456 case 0x3: op1 = float32_default_nan; break;
457 case 0x4: op1 = float32_negative_inf; break;
458 case 0x5: op1 = float32_positive_inf; break;
459 case 0x6:
460 op1 = sign ? float32_negative_inf : float32_positive_inf;
461 break;
462 case 0x7: op1 = float32_negative_zero; break;
463 case 0x8: op1 = float32_positive_zero; break;
464 case 0x9: op1 = float32_negative_one; break;
465 case 0xA: op1 = float32_positive_one; break;
466 case 0xB: op1 = float32_positive_half; break;
467 case 0xC: op1 = float32_value_90; break;
468 case 0xD: op1 = float32_pi_half; break;
469 case 0xE: op1 = float32_max_float; break;
470 case 0xF: op1 = float32_min_float; break;
471 default: // preserve the op1 value
472 op1 = dst; break;
473 }
474
475 return op1;
476 }
477
float64_fixupimm(float64 dst,float64 op1,Bit32u op2,unsigned imm8,float_status_t & status)478 float64 float64_fixupimm(float64 dst, float64 op1, Bit32u op2, unsigned imm8, float_status_t &status)
479 {
480 float64 tmp_op1 = op1;
481 if (get_denormals_are_zeros(status))
482 tmp_op1 = float64_denormal_to_zero(op1);
483
484 float_class_t op1_class = float64_class(tmp_op1);
485 int sign = float64_sign(tmp_op1);
486 unsigned token = 0, ie_fault_mask = 0, divz_fault_mask = 0;
487
488 switch(op1_class)
489 {
490 case float_zero:
491 token = BX_FIXUPIMM_ZERO_VALUE_TOKEN;
492 divz_fault_mask = 0x01;
493 ie_fault_mask = 0x02;
494 break;
495
496 case float_negative_inf:
497 token = BX_FIXUPIMM_NEG_INF_TOKEN;
498 ie_fault_mask = 0x20;
499 break;
500
501 case float_positive_inf:
502 token = BX_FIXUPIMM_POS_INF_TOKEN;
503 ie_fault_mask = 0x80;
504 break;
505
506 case float_SNaN:
507 token = BX_FIXUPIMM_SNAN_TOKEN;
508 ie_fault_mask = 0x10;
509 break;
510
511 case float_QNaN:
512 token = BX_FIXUPIMM_QNAN_TOKEN;
513 break;
514
515 case float_denormal:
516 case float_normalized:
517 if (tmp_op1 == float64_positive_one) {
518 token = BX_FIXUPIMM_POS_ONE_VALUE_TOKEN;
519 divz_fault_mask = 0x04;
520 ie_fault_mask = 0x08;
521 }
522 else {
523 if (sign) {
524 token = BX_FIXUPIMM_NEG_VALUE_TOKEN;
525 ie_fault_mask = 0x40;
526 }
527 else {
528 token = BX_FIXUPIMM_POS_VALUE_TOKEN;
529 }
530 }
531 break;
532
533 default:
534 break;
535 }
536
537 if (imm8 & ie_fault_mask)
538 float_raise(status, float_flag_invalid);
539
540 if (imm8 & divz_fault_mask)
541 float_raise(status, float_flag_divbyzero);
542
543 // access response table, each response is encoded with 4-bit value in the op2
544 unsigned token_response = (op2 >> (token*4)) & 0xf;
545
546 switch(token_response) {
547 case 0x1: // apply DAZ to the op1 value
548 op1 = tmp_op1;
549 break;
550 case 0x2: op1 = convert_to_QNaN(tmp_op1); break;
551 case 0x3: op1 = float64_default_nan; break;
552 case 0x4: op1 = float64_negative_inf; break;
553 case 0x5: op1 = float64_positive_inf; break;
554 case 0x6:
555 op1 = sign ? float64_negative_inf : float64_positive_inf;
556 break;
557 case 0x7: op1 = float64_negative_zero; break;
558 case 0x8: op1 = float64_positive_zero; break;
559 case 0x9: op1 = float64_negative_one; break;
560 case 0xA: op1 = float64_positive_one; break;
561 case 0xB: op1 = float64_positive_half; break;
562 case 0xC: op1 = float64_value_90; break;
563 case 0xD: op1 = float64_pi_half; break;
564 case 0xE: op1 = float64_max_float; break;
565 case 0xF: op1 = float64_min_float; break;
566 default: // preserve the op1 value
567 op1 = dst; break;
568 }
569
570 return op1;
571 }
572
VFIXUPIMMSS_MASK_VssHssWssIbR(bxInstruction_c * i)573 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFIXUPIMMSS_MASK_VssHssWssIbR(bxInstruction_c *i)
574 {
575 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
576 Bit32u op_dst = BX_READ_XMM_REG_LO_DWORD(i->dst());
577
578 if (i->opmask() == 0 || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
579 Bit32u op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
580
581 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
582 softfloat_status_word_rc_override(status, i);
583 op1.xmm32u(0) = float32_fixupimm(op_dst, op1.xmm32u(0), op2, i->Ib(), status);
584 check_exceptionsSSE(get_exception_flags(status));
585 }
586 else {
587 if (i->isZeroMasking())
588 op1.xmm32u(0) = 0;
589 else
590 op1.xmm32u(0) = op_dst;
591 }
592
593 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
594 BX_NEXT_INSTR(i);
595 }
596
VFIXUPIMMSD_MASK_VsdHsdWsdIbR(bxInstruction_c * i)597 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFIXUPIMMSD_MASK_VsdHsdWsdIbR(bxInstruction_c *i)
598 {
599 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
600 Bit64u op_dst = BX_READ_XMM_REG_LO_QWORD(i->dst());
601
602 if (i->opmask() == 0 || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
603 Bit32u op2 = (Bit32u) BX_READ_XMM_REG_LO_QWORD(i->src2());
604
605 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
606 softfloat_status_word_rc_override(status, i);
607 op1.xmm64u(0) = float64_fixupimm(op_dst, op1.xmm64u(0), op2, i->Ib(), status);
608 check_exceptionsSSE(get_exception_flags(status));
609 }
610 else {
611 if (i->isZeroMasking())
612 op1.xmm64u(0) = 0;
613 else
614 op1.xmm64u(0) = op_dst;
615 }
616
617 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
618 BX_NEXT_INSTR(i);
619 }
620
VFIXUPIMMPS_VpsHpsWpsIbR(bxInstruction_c * i)621 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFIXUPIMMPS_VpsHpsWpsIbR(bxInstruction_c *i)
622 {
623 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()), dst = BX_READ_AVX_REG(i->dst());
624 unsigned len = i->getVL();
625
626 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
627 softfloat_status_word_rc_override(status, i);
628
629 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
630 op1.vmm32u(n) = float32_fixupimm(dst.vmm32u(n), op1.vmm32u(n), op2.vmm32u(n), i->Ib(), status);
631 }
632
633 check_exceptionsSSE(get_exception_flags(status));
634
635 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
636 BX_NEXT_INSTR(i);
637 }
638
VFIXUPIMMPS_MASK_VpsHpsWpsIbR(bxInstruction_c * i)639 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFIXUPIMMPS_MASK_VpsHpsWpsIbR(bxInstruction_c *i)
640 {
641 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()), dst = BX_READ_AVX_REG(i->dst());
642 Bit32u mask = BX_READ_16BIT_OPMASK(i->opmask());
643 unsigned len = i->getVL();
644
645 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
646 softfloat_status_word_rc_override(status, i);
647
648 for (unsigned n=0, tmp_mask = mask; n < DWORD_ELEMENTS(len); n++, tmp_mask >>= 1) {
649 if (tmp_mask & 0x1)
650 op1.vmm32u(n) = float32_fixupimm(dst.vmm32u(n), op1.vmm32u(n), op2.vmm32u(n), i->Ib(), status);
651 else
652 op1.vmm32u(n) = 0;
653 }
654
655 check_exceptionsSSE(get_exception_flags(status));
656
657 if (! i->isZeroMasking()) {
658 for (unsigned n=0; n < len; n++, mask >>= 4)
659 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask);
660
661 BX_CLEAR_AVX_REGZ(i->dst(), len);
662 }
663 else {
664 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
665 }
666
667 BX_NEXT_INSTR(i);
668 }
669
VFIXUPIMMPD_VpdHpdWpdIbR(bxInstruction_c * i)670 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFIXUPIMMPD_VpdHpdWpdIbR(bxInstruction_c *i)
671 {
672 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()), dst = BX_READ_AVX_REG(i->dst());
673 unsigned len = i->getVL();
674
675 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
676 softfloat_status_word_rc_override(status, i);
677
678 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
679 op1.vmm64u(n) = float64_fixupimm(dst.vmm64u(n), op1.vmm64u(n), (Bit32u) op2.vmm64u(n), i->Ib(), status);
680 }
681
682 check_exceptionsSSE(get_exception_flags(status));
683
684 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
685 BX_NEXT_INSTR(i);
686 }
687
VFIXUPIMMPD_MASK_VpdHpdWpdIbR(bxInstruction_c * i)688 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFIXUPIMMPD_MASK_VpdHpdWpdIbR(bxInstruction_c *i)
689 {
690 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2()), dst = BX_READ_AVX_REG(i->dst());
691 Bit32u mask = BX_READ_8BIT_OPMASK(i->opmask());
692 unsigned len = i->getVL();
693
694 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
695 softfloat_status_word_rc_override(status, i);
696
697 for (unsigned n=0, tmp_mask = mask; n < QWORD_ELEMENTS(len); n++, tmp_mask >>= 1) {
698 if (tmp_mask & 0x1)
699 op1.vmm64u(n) = float64_fixupimm(dst.vmm64u(n), op1.vmm64u(n), (Bit32u) op2.vmm64u(n), i->Ib(), status);
700 else
701 op1.vmm64u(n) = 0;
702 }
703
704 check_exceptionsSSE(get_exception_flags(status));
705
706 if (! i->isZeroMasking()) {
707 for (unsigned n=0; n < len; n++, mask >>= 2)
708 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), mask);
709
710 BX_CLEAR_AVX_REGZ(i->dst(), len);
711 }
712 else {
713 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
714 }
715
716 BX_NEXT_INSTR(i);
717 }
718
719 // fpclass
720
fpclass(float_class_t op_class,int sign,int selector)721 static int fpclass(float_class_t op_class, int sign, int selector)
722 {
723 return ((op_class == float_QNaN) && (selector & 0x01) != 0) || // QNaN
724 ((op_class == float_zero) && ! sign && (selector & 0x02) != 0) || // positive zero
725 ((op_class == float_zero) && sign && (selector & 0x04) != 0) || // negative zero
726 ((op_class == float_positive_inf) && (selector & 0x08) != 0) || // positive inf
727 ((op_class == float_negative_inf) && (selector & 0x10) != 0) || // negative inf
728 ((op_class == float_denormal) && (selector & 0x20) != 0) || // negative inf
729 ((op_class == float_denormal || op_class == float_normalized) && sign && (selector & 0x40) != 0) || // negative finite
730 ((op_class == float_SNaN) && (selector & 0x80) != 0); // SNaN
731 }
732
float32_fpclass(float32 op,int selector,int daz)733 static BX_CPP_INLINE int float32_fpclass(float32 op, int selector, int daz)
734 {
735 if (daz)
736 op = float32_denormal_to_zero(op);
737
738 return fpclass(float32_class(op), float32_sign(op), selector);
739 }
740
float64_fpclass(float64 op,int selector,int daz)741 static BX_CPP_INLINE int float64_fpclass(float64 op, int selector, int daz)
742 {
743 if (daz)
744 op = float64_denormal_to_zero(op);
745
746 return fpclass(float64_class(op), float64_sign(op), selector);
747 }
748
VFPCLASSPS_MASK_KGwWpsIbR(bxInstruction_c * i)749 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFPCLASSPS_MASK_KGwWpsIbR(bxInstruction_c *i)
750 {
751 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
752 unsigned num_elements = DWORD_ELEMENTS(i->getVL());
753
754 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
755 Bit32u result = 0;
756 int selector = i->Ib(), daz = MXCSR.get_DAZ();
757
758 for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
759 if (opmask & mask) {
760 if (float32_fpclass(op.vmm32u(n), selector, daz)) result |= mask;
761 }
762 }
763
764 BX_WRITE_OPMASK(i->dst(), result);
765 BX_NEXT_INSTR(i);
766 }
767
VFPCLASSPD_MASK_KGbWpdIbR(bxInstruction_c * i)768 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFPCLASSPD_MASK_KGbWpdIbR(bxInstruction_c *i)
769 {
770 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
771 unsigned num_elements = QWORD_ELEMENTS(i->getVL());
772
773 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
774 Bit32u result = 0;
775 int selector = i->Ib(), daz = MXCSR.get_DAZ();
776
777 for (unsigned n=0, mask = 0x1; n < num_elements; n++, mask <<= 1) {
778 if (opmask & mask) {
779 if (float64_fpclass(op.vmm64u(n), selector, daz)) result |= mask;
780 }
781 }
782
783 BX_WRITE_OPMASK(i->dst(), result);
784 BX_NEXT_INSTR(i);
785 }
786
VFPCLASSSS_MASK_KGbWssIbR(bxInstruction_c * i)787 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFPCLASSSS_MASK_KGbWssIbR(bxInstruction_c *i)
788 {
789 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
790 BX_WRITE_OPMASK(i->dst(), float32_fpclass(BX_READ_XMM_REG_LO_DWORD(i->src()), i->Ib(), MXCSR.get_DAZ()));
791 }
792 else {
793 BX_WRITE_OPMASK(i->dst(), 0);
794 }
795
796 BX_NEXT_INSTR(i);
797 }
798
VFPCLASSSD_MASK_KGbWsdIbR(bxInstruction_c * i)799 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VFPCLASSSD_MASK_KGbWsdIbR(bxInstruction_c *i)
800 {
801 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
802 BX_WRITE_OPMASK(i->dst(), float64_fpclass(BX_READ_XMM_REG_LO_QWORD(i->src()), i->Ib(), MXCSR.get_DAZ()));
803 }
804 else {
805 BX_WRITE_OPMASK(i->dst(), 0);
806 }
807
808 BX_NEXT_INSTR(i);
809 }
810
811 // getexp
812
VGETEXPPS_MASK_VpsWpsR(bxInstruction_c * i)813 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPPS_MASK_VpsWpsR(bxInstruction_c *i)
814 {
815 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
816 Bit32u mask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
817 unsigned len = i->getVL();
818
819 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
820 softfloat_status_word_rc_override(status, i);
821
822 for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 4)
823 xmm_getexpps_mask(&op.vmm128(n), status, tmp_mask);
824
825 check_exceptionsSSE(get_exception_flags(status));
826
827 if (! i->isZeroMasking()) {
828 for (unsigned n=0; n < len; n++, mask >>= 4)
829 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
830 BX_CLEAR_AVX_REGZ(i->dst(), len);
831 }
832 else {
833 BX_WRITE_AVX_REGZ(i->dst(), op, len);
834 }
835
836 BX_NEXT_INSTR(i);
837 }
838
VGETEXPPD_MASK_VpdWpdR(bxInstruction_c * i)839 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPPD_MASK_VpdWpdR(bxInstruction_c *i)
840 {
841 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
842 Bit32u mask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
843 unsigned len = i->getVL();
844
845 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
846 softfloat_status_word_rc_override(status, i);
847
848 for (unsigned n=0, tmp_mask = mask; n < len; n++, tmp_mask >>= 2)
849 xmm_getexppd_mask(&op.vmm128(n), status, tmp_mask);
850
851 check_exceptionsSSE(get_exception_flags(status));
852
853 if (! i->isZeroMasking()) {
854 for (unsigned n=0; n < len; n++, mask >>= 2)
855 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), mask);
856 BX_CLEAR_AVX_REGZ(i->dst(), len);
857 }
858 else {
859 BX_WRITE_AVX_REGZ(i->dst(), op, len);
860 }
861
862 BX_NEXT_INSTR(i);
863 }
864
VGETEXPSS_MASK_VssHpsWssR(bxInstruction_c * i)865 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPSS_MASK_VssHpsWssR(bxInstruction_c *i)
866 {
867 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
868
869 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
870 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
871
872 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
873 softfloat_status_word_rc_override(status, i);
874 op1.xmm32u(0) = float32_getexp(op2, status);
875 check_exceptionsSSE(get_exception_flags(status));
876 }
877 else {
878 if (i->isZeroMasking())
879 op1.xmm32u(0) = 0;
880 else
881 op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
882 }
883
884 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
885 BX_NEXT_INSTR(i);
886 }
887
VGETEXPSD_MASK_VsdHpdWsdR(bxInstruction_c * i)888 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETEXPSD_MASK_VsdHpdWsdR(bxInstruction_c *i)
889 {
890 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
891
892 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
893 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
894
895 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
896 softfloat_status_word_rc_override(status, i);
897 op1.xmm64u(0) = float64_getexp(op2, status);
898 check_exceptionsSSE(get_exception_flags(status));
899 }
900 else {
901 if (i->isZeroMasking())
902 op1.xmm64u(0) = 0;
903 else
904 op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
905 }
906
907 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
908 BX_NEXT_INSTR(i);
909 }
910
911 // getmant
912
VGETMANTSS_MASK_VssHpsWssIbR(bxInstruction_c * i)913 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETMANTSS_MASK_VssHpsWssIbR(bxInstruction_c *i)
914 {
915 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
916
917 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
918 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
919
920 int sign_ctrl = (i->Ib() >> 2) & 0x3;
921 int interv = i->Ib() & 0x3;
922
923 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
924 softfloat_status_word_rc_override(status, i);
925 op1.xmm32u(0) = float32_getmant(op2, status, sign_ctrl, interv);
926 check_exceptionsSSE(get_exception_flags(status));
927 }
928 else {
929 if (i->isZeroMasking())
930 op1.xmm32u(0) = 0;
931 else
932 op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
933 }
934
935 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
936 BX_NEXT_INSTR(i);
937 }
938
VGETMANTSD_MASK_VsdHpdWsdIbR(bxInstruction_c * i)939 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETMANTSD_MASK_VsdHpdWsdIbR(bxInstruction_c *i)
940 {
941 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
942
943 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
944 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
945
946 int sign_ctrl = (i->Ib() >> 2) & 0x3;
947 int interv = i->Ib() & 0x3;
948
949 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
950 softfloat_status_word_rc_override(status, i);
951 op1.xmm64u(0) = float64_getmant(op2, status, sign_ctrl, interv);
952 check_exceptionsSSE(get_exception_flags(status));
953 }
954 else {
955 if (i->isZeroMasking())
956 op1.xmm64u(0) = 0;
957 else
958 op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
959 }
960
961 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
962 BX_NEXT_INSTR(i);
963 }
964
VGETMANTPS_MASK_VpsWpsIbR(bxInstruction_c * i)965 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETMANTPS_MASK_VpsWpsIbR(bxInstruction_c *i)
966 {
967 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
968 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
969 unsigned len = i->getVL();
970
971 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
972 softfloat_status_word_rc_override(status, i);
973
974 int sign_ctrl = (i->Ib() >> 2) & 0x3;
975 int interv = i->Ib() & 0x3;
976
977 for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
978 if (opmask & mask)
979 op.vmm32u(n) = float32_getmant(op.vmm32u(n), status, sign_ctrl, interv);
980 else
981 op.vmm32u(n) = 0;
982 }
983
984 check_exceptionsSSE(get_exception_flags(status));
985
986 if (! i->isZeroMasking()) {
987 for (unsigned n=0; n < len; n++, opmask >>= 4)
988 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
989 BX_CLEAR_AVX_REGZ(i->dst(), len);
990 }
991 else {
992 BX_WRITE_AVX_REGZ(i->dst(), op, len);
993 }
994
995 BX_NEXT_INSTR(i);
996 }
997
VGETMANTPD_MASK_VpdWpdIbR(bxInstruction_c * i)998 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VGETMANTPD_MASK_VpdWpdIbR(bxInstruction_c *i)
999 {
1000 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
1001 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1002 unsigned len = i->getVL();
1003
1004 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1005 softfloat_status_word_rc_override(status, i);
1006
1007 int sign_ctrl = (i->Ib() >> 2) & 0x3;
1008 int interv = i->Ib() & 0x3;
1009
1010 for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
1011 if (opmask & mask)
1012 op.vmm64u(n) = float64_getmant(op.vmm64u(n), status, sign_ctrl, interv);
1013 else
1014 op.vmm64u(n) = 0;
1015 }
1016
1017 check_exceptionsSSE(get_exception_flags(status));
1018
1019 if (! i->isZeroMasking()) {
1020 for (unsigned n=0; n < len; n++, opmask >>= 2)
1021 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
1022 BX_CLEAR_AVX_REGZ(i->dst(), len);
1023 }
1024 else {
1025 BX_WRITE_AVX_REGZ(i->dst(), op, len);
1026 }
1027
1028 BX_NEXT_INSTR(i);
1029 }
1030
1031 // rndscale
1032
VRNDSCALEPS_MASK_VpsWpsIbR(bxInstruction_c * i)1033 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALEPS_MASK_VpsWpsIbR(bxInstruction_c *i)
1034 {
1035 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
1036 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1037 unsigned len = i->getVL();
1038
1039 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1040 softfloat_status_word_rc_override(status, i);
1041
1042 Bit8u control = i->Ib(), scale = control >> 4;
1043
1044 // override MXCSR rounding mode with control coming from imm8
1045 if ((control & 0x4) == 0)
1046 status.float_rounding_mode = control & 0x3;
1047 // ignore precision exception result
1048 if (control & 0x8)
1049 status.float_suppress_exception |= float_flag_inexact;
1050
1051 for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
1052 if (opmask & mask)
1053 op.vmm32u(n) = float32_round_to_int(op.vmm32u(n), scale, status);
1054 else
1055 op.vmm32u(n) = 0;
1056 }
1057
1058 check_exceptionsSSE(get_exception_flags(status));
1059
1060 if (! i->isZeroMasking()) {
1061 for (unsigned n=0; n < len; n++, opmask >>= 4)
1062 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
1063 BX_CLEAR_AVX_REGZ(i->dst(), len);
1064 }
1065 else {
1066 BX_WRITE_AVX_REGZ(i->dst(), op, len);
1067 }
1068
1069 BX_NEXT_INSTR(i);
1070 }
1071
VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c * i)1072 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESS_MASK_VssHpsWssIbR(bxInstruction_c *i)
1073 {
1074 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1075
1076 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
1077 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
1078
1079 Bit8u control = i->Ib(), scale = control >> 4;
1080
1081 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1082 softfloat_status_word_rc_override(status, i);
1083
1084 // override MXCSR rounding mode with control coming from imm8
1085 if ((control & 0x4) == 0)
1086 status.float_rounding_mode = control & 0x3;
1087 // ignore precision exception result
1088 if (control & 0x8)
1089 status.float_suppress_exception |= float_flag_inexact;
1090
1091 op1.xmm32u(0) = float32_round_to_int(op2, scale, status);
1092
1093 check_exceptionsSSE(get_exception_flags(status));
1094 }
1095 else {
1096 if (i->isZeroMasking())
1097 op1.xmm32u(0) = 0;
1098 else
1099 op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
1100 }
1101
1102 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1103 BX_NEXT_INSTR(i);
1104 }
1105
VRNDSCALEPD_MASK_VpdWpdIbR(bxInstruction_c * i)1106 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALEPD_MASK_VpdWpdIbR(bxInstruction_c *i)
1107 {
1108 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
1109 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1110 unsigned len = i->getVL();
1111
1112 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1113 softfloat_status_word_rc_override(status, i);
1114
1115 Bit8u control = i->Ib(), scale = control >> 4;
1116
1117 // override MXCSR rounding mode with control coming from imm8
1118 if ((control & 0x4) == 0)
1119 status.float_rounding_mode = control & 0x3;
1120 // ignore precision exception result
1121 if (control & 0x8)
1122 status.float_suppress_exception |= float_flag_inexact;
1123
1124 for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
1125 if (opmask & mask)
1126 op.vmm64u(n) = float64_round_to_int(op.vmm64u(n), scale, status);
1127 else
1128 op.vmm64u(n) = 0;
1129 }
1130
1131 check_exceptionsSSE(get_exception_flags(status));
1132
1133 if (! i->isZeroMasking()) {
1134 for (unsigned n=0; n < len; n++, opmask >>= 2)
1135 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
1136 BX_CLEAR_AVX_REGZ(i->dst(), len);
1137 }
1138 else {
1139 BX_WRITE_AVX_REGZ(i->dst(), op, len);
1140 }
1141
1142 BX_NEXT_INSTR(i);
1143 }
1144
VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c * i)1145 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRNDSCALESD_MASK_VsdHpdWsdIbR(bxInstruction_c *i)
1146 {
1147 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1148
1149 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
1150 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
1151
1152 Bit8u control = i->Ib(), scale = control >> 4;
1153
1154 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1155 softfloat_status_word_rc_override(status, i);
1156
1157 // override MXCSR rounding mode with control coming from imm8
1158 if ((control & 0x4) == 0)
1159 status.float_rounding_mode = control & 0x3;
1160 // ignore precision exception result
1161 if (control & 0x8)
1162 status.float_suppress_exception |= float_flag_inexact;
1163
1164 op1.xmm64u(0) = float64_round_to_int(op2, scale, status);
1165
1166 check_exceptionsSSE(get_exception_flags(status));
1167 }
1168 else {
1169 if (i->isZeroMasking())
1170 op1.xmm64u(0) = 0;
1171 else
1172 op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
1173 }
1174
1175 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1176 BX_NEXT_INSTR(i);
1177 }
1178
1179 // scalef
1180
VSCALEFPS_VpsHpsWpsR(bxInstruction_c * i)1181 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPS_VpsHpsWpsR(bxInstruction_c *i)
1182 {
1183 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
1184 unsigned len = i->getVL();
1185
1186 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1187 softfloat_status_word_rc_override(status, i);
1188
1189 for (unsigned n=0; n < len; n++) {
1190 xmm_scalefps(&op1.vmm128(n), &op2.vmm128(n), status);
1191 }
1192
1193 check_exceptionsSSE(get_exception_flags(status));
1194
1195 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
1196
1197 BX_NEXT_INSTR(i);
1198 }
1199
VSCALEFPD_VpdHpdWpdR(bxInstruction_c * i)1200 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFPD_VpdHpdWpdR(bxInstruction_c *i)
1201 {
1202 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
1203 unsigned len = i->getVL();
1204
1205 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1206 softfloat_status_word_rc_override(status, i);
1207
1208 for (unsigned n=0; n < len; n++) {
1209 xmm_scalefpd(&op1.vmm128(n), &op2.vmm128(n), status);
1210 }
1211
1212 check_exceptionsSSE(get_exception_flags(status));
1213
1214 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
1215
1216 BX_NEXT_INSTR(i);
1217 }
1218
VSCALEFSS_VssHpsWssR(bxInstruction_c * i)1219 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSS_VssHpsWssR(bxInstruction_c *i)
1220 {
1221 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1222 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
1223
1224 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1225 softfloat_status_word_rc_override(status, i);
1226
1227 op1.xmm32u(0) = float32_scalef(op1.xmm32u(0), op2, status);
1228
1229 check_exceptionsSSE(get_exception_flags(status));
1230 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1231
1232 BX_NEXT_INSTR(i);
1233 }
1234
VSCALEFSD_VsdHpdWsdR(bxInstruction_c * i)1235 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VSCALEFSD_VsdHpdWsdR(bxInstruction_c *i)
1236 {
1237 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1238 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
1239
1240 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1241 softfloat_status_word_rc_override(status, i);
1242
1243 op1.xmm64u(0) = float64_scalef(op1.xmm64u(0), op2, status);
1244
1245 check_exceptionsSSE(get_exception_flags(status));
1246 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1247
1248 BX_NEXT_INSTR(i);
1249 }
1250
1251 // range
1252
float32_range(float32 a,float32 b,int opselect,int sign_ctrl,float_status_t & status)1253 static BX_CPP_INLINE float32 float32_range(float32 a, float32 b, int opselect, int sign_ctrl, float_status_t &status)
1254 {
1255 float32 minmax = float32_minmax(a, b, opselect & 0x1, (opselect >> 1) & 0x1, status);
1256
1257 if (! float32_is_signaling_nan(a) && ! float32_is_signaling_nan(b)) {
1258 if (sign_ctrl == 0) {
1259 minmax = (minmax & ~0x80000000) | (a & 0x80000000); // keep sign of a
1260 }
1261 else if (sign_ctrl == 2) {
1262 minmax &= ~0x80000000; // zero out sign it
1263 }
1264 else if (sign_ctrl == 3) {
1265 minmax |= 0x80000000; // set the sign it
1266 }
1267 // else preserve the sign of compare result
1268 }
1269
1270 return minmax;
1271 }
1272
float64_range(float64 a,float64 b,int opselect,int sign_ctrl,float_status_t & status)1273 static BX_CPP_INLINE float64 float64_range(float64 a, float64 b, int opselect, int sign_ctrl, float_status_t &status)
1274 {
1275 float64 minmax = float64_minmax(a, b, opselect & 0x1, (opselect >> 1) & 0x1, status);
1276
1277 if (! float64_is_signaling_nan(a) && ! float64_is_signaling_nan(b)) {
1278 if (sign_ctrl == 0) {
1279 minmax = (minmax & ~BX_CONST64(0x8000000000000000)) | (a & BX_CONST64(0x8000000000000000)); // keep sign of a
1280 }
1281 else if (sign_ctrl == 2) {
1282 minmax &= ~BX_CONST64(0x8000000000000000); // zero out sign it
1283 }
1284 else if (sign_ctrl == 3) {
1285 minmax |= BX_CONST64(0x8000000000000000); // set the sign it
1286 }
1287 // else preserve the sign of compare result
1288 }
1289
1290 return minmax;
1291 }
1292
VRANGEPS_MASK_VpsHpsWpsIbR(bxInstruction_c * i)1293 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRANGEPS_MASK_VpsHpsWpsIbR(bxInstruction_c *i)
1294 {
1295 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
1296 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1297 unsigned len = i->getVL();
1298
1299 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1300 softfloat_status_word_rc_override(status, i);
1301
1302 int sign_ctrl = (i->Ib() >> 2) & 0x3;
1303 int opselect = i->Ib() & 0x3;
1304
1305 for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
1306 if (opmask & mask)
1307 op1.vmm32u(n) = float32_range(op1.vmm32u(n), op2.vmm32u(n), opselect, sign_ctrl, status);
1308 else
1309 op1.vmm32u(n) = 0;
1310 }
1311
1312 check_exceptionsSSE(get_exception_flags(status));
1313
1314 if (! i->isZeroMasking()) {
1315 for (unsigned n=0; n < len; n++, opmask >>= 4)
1316 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), opmask);
1317 BX_CLEAR_AVX_REGZ(i->dst(), len);
1318 }
1319 else {
1320 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
1321 }
1322
1323 BX_NEXT_INSTR(i);
1324 }
1325
VRANGEPD_MASK_VpdHpdWpdIbR(bxInstruction_c * i)1326 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRANGEPD_MASK_VpdHpdWpdIbR(bxInstruction_c *i)
1327 {
1328 BxPackedAvxRegister op1 = BX_READ_AVX_REG(i->src1()), op2 = BX_READ_AVX_REG(i->src2());
1329 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1330 unsigned len = i->getVL();
1331
1332 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1333 softfloat_status_word_rc_override(status, i);
1334
1335 int sign_ctrl = (i->Ib() >> 2) & 0x3;
1336 int opselect = i->Ib() & 0x3;
1337
1338 for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
1339 if (opmask & mask)
1340 op1.vmm64u(n) = float64_range(op1.vmm64u(n), op2.vmm64u(n), opselect, sign_ctrl, status);
1341 else
1342 op1.vmm64u(n) = 0;
1343 }
1344
1345 check_exceptionsSSE(get_exception_flags(status));
1346
1347 if (! i->isZeroMasking()) {
1348 for (unsigned n=0; n < len; n++, opmask >>= 2)
1349 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op1.vmm128(n), opmask);
1350 BX_CLEAR_AVX_REGZ(i->dst(), len);
1351 }
1352 else {
1353 BX_WRITE_AVX_REGZ(i->dst(), op1, len);
1354 }
1355
1356 BX_NEXT_INSTR(i);
1357 }
1358
VRANGESS_MASK_VssHpsWssIbR(bxInstruction_c * i)1359 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRANGESS_MASK_VssHpsWssIbR(bxInstruction_c *i)
1360 {
1361 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1362
1363 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
1364 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
1365
1366 int sign_ctrl = (i->Ib() >> 2) & 0x3;
1367 int opselect = i->Ib() & 0x3;
1368
1369 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1370 softfloat_status_word_rc_override(status, i);
1371 op1.xmm32u(0) = float32_range(op1.xmm32u(0), op2, opselect, sign_ctrl, status);
1372 check_exceptionsSSE(get_exception_flags(status));
1373 }
1374 else {
1375 if (i->isZeroMasking())
1376 op1.xmm32u(0) = 0;
1377 else
1378 op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
1379 }
1380
1381 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1382 BX_NEXT_INSTR(i);
1383 }
1384
VRANGESD_MASK_VsdHpdWsdIbR(bxInstruction_c * i)1385 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VRANGESD_MASK_VsdHpdWsdIbR(bxInstruction_c *i)
1386 {
1387 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1388
1389 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
1390 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
1391
1392 int sign_ctrl = (i->Ib() >> 2) & 0x3;
1393 int opselect = i->Ib() & 0x3;
1394
1395 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1396 softfloat_status_word_rc_override(status, i);
1397 op1.xmm64u(0) = float64_range(op1.xmm64u(0), op2, opselect, sign_ctrl, status);
1398 check_exceptionsSSE(get_exception_flags(status));
1399 }
1400 else {
1401 if (i->isZeroMasking())
1402 op1.xmm64u(0) = 0;
1403 else
1404 op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
1405 }
1406
1407 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1408 BX_NEXT_INSTR(i);
1409 }
1410
1411 // reduce
1412
float32_reduce(float32 a,Bit8u scale,float_status_t & status)1413 static BX_CPP_INLINE float32 float32_reduce(float32 a, Bit8u scale, float_status_t &status)
1414 {
1415 if (a == float32_negative_inf || a == float32_positive_inf)
1416 return 0;
1417
1418 float32 tmp = float32_round_to_int(a, scale, status);
1419 return float32_sub(a, tmp, status);
1420 }
1421
float64_reduce(float64 a,Bit8u scale,float_status_t & status)1422 static BX_CPP_INLINE float64 float64_reduce(float64 a, Bit8u scale, float_status_t &status)
1423 {
1424 if (a == float64_negative_inf || a == float64_positive_inf)
1425 return 0;
1426
1427 float64 tmp = float64_round_to_int(a, scale, status);
1428 return float64_sub(a, tmp, status);
1429 }
1430
VREDUCEPS_MASK_VpsWpsIbR(bxInstruction_c * i)1431 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VREDUCEPS_MASK_VpsWpsIbR(bxInstruction_c *i)
1432 {
1433 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
1434 Bit32u opmask = i->opmask() ? BX_READ_16BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1435 unsigned len = i->getVL();
1436
1437 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1438 softfloat_status_word_rc_override(status, i);
1439
1440 Bit8u control = i->Ib(), scale = control >> 4;
1441
1442 // override MXCSR rounding mode with control coming from imm8
1443 if ((control & 0x4) == 0)
1444 status.float_rounding_mode = control & 0x3;
1445 // ignore precision exception result
1446 if (control & 0x8)
1447 status.float_suppress_exception |= float_flag_inexact;
1448
1449 for (unsigned n=0, mask = 0x1; n < DWORD_ELEMENTS(len); n++, mask <<= 1) {
1450 if (opmask & mask)
1451 op.vmm32u(n) = float32_reduce(op.vmm32u(n), scale, status);
1452 else
1453 op.vmm32u(n) = 0;
1454 }
1455
1456 check_exceptionsSSE(get_exception_flags(status));
1457
1458 if (! i->isZeroMasking()) {
1459 for (unsigned n=0; n < len; n++, opmask >>= 4)
1460 xmm_blendps(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
1461 BX_CLEAR_AVX_REGZ(i->dst(), len);
1462 }
1463 else {
1464 BX_WRITE_AVX_REGZ(i->dst(), op, len);
1465 }
1466
1467 BX_NEXT_INSTR(i);
1468 }
1469
VREDUCESS_MASK_VssHpsWssIbR(bxInstruction_c * i)1470 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VREDUCESS_MASK_VssHpsWssIbR(bxInstruction_c *i)
1471 {
1472 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1473
1474 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
1475 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
1476
1477 Bit8u control = i->Ib(), scale = control >> 4;
1478
1479 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1480 softfloat_status_word_rc_override(status, i);
1481
1482 // override MXCSR rounding mode with control coming from imm8
1483 if ((control & 0x4) == 0)
1484 status.float_rounding_mode = control & 0x3;
1485 // ignore precision exception result
1486 if (control & 0x8)
1487 status.float_suppress_exception |= float_flag_inexact;
1488
1489 op1.xmm32u(0) = float32_reduce(op2, scale, status);
1490
1491 check_exceptionsSSE(get_exception_flags(status));
1492 }
1493 else {
1494 if (i->isZeroMasking())
1495 op1.xmm32u(0) = 0;
1496 else
1497 op1.xmm32u(0) = BX_READ_XMM_REG_LO_DWORD(i->dst());
1498 }
1499
1500 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1501 BX_NEXT_INSTR(i);
1502 }
1503
VREDUCEPD_MASK_VpdWpdIbR(bxInstruction_c * i)1504 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VREDUCEPD_MASK_VpdWpdIbR(bxInstruction_c *i)
1505 {
1506 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
1507 Bit32u opmask = i->opmask() ? BX_READ_8BIT_OPMASK(i->opmask()) : (Bit32u) -1;
1508 unsigned len = i->getVL();
1509
1510 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1511 softfloat_status_word_rc_override(status, i);
1512
1513 Bit8u control = i->Ib(), scale = control >> 4;
1514
1515 // override MXCSR rounding mode with control coming from imm8
1516 if ((control & 0x4) == 0)
1517 status.float_rounding_mode = control & 0x3;
1518 // ignore precision exception result
1519 if (control & 0x8)
1520 status.float_suppress_exception |= float_flag_inexact;
1521
1522 for (unsigned n=0, mask = 0x1; n < QWORD_ELEMENTS(len); n++, mask <<= 1) {
1523 if (opmask & mask)
1524 op.vmm64u(n) = float64_reduce(op.vmm64u(n), scale, status);
1525 else
1526 op.vmm64u(n) = 0;
1527 }
1528
1529 check_exceptionsSSE(get_exception_flags(status));
1530
1531 if (! i->isZeroMasking()) {
1532 for (unsigned n=0; n < len; n++, opmask >>= 2)
1533 xmm_blendpd(&BX_READ_AVX_REG_LANE(i->dst(), n), &op.vmm128(n), opmask);
1534 BX_CLEAR_AVX_REGZ(i->dst(), len);
1535 }
1536 else {
1537 BX_WRITE_AVX_REGZ(i->dst(), op, len);
1538 }
1539
1540 BX_NEXT_INSTR(i);
1541 }
1542
VREDUCESD_MASK_VsdHpdWsdIbR(bxInstruction_c * i)1543 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VREDUCESD_MASK_VsdHpdWsdIbR(bxInstruction_c *i)
1544 {
1545 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1546
1547 if (! i->opmask() || BX_SCALAR_ELEMENT_MASK(i->opmask())) {
1548 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
1549
1550 Bit8u control = i->Ib(), scale = control >> 4;
1551
1552 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1553 softfloat_status_word_rc_override(status, i);
1554
1555 // override MXCSR rounding mode with control coming from imm8
1556 if ((control & 0x4) == 0)
1557 status.float_rounding_mode = control & 0x3;
1558 // ignore precision exception result
1559 if (control & 0x8)
1560 status.float_suppress_exception |= float_flag_inexact;
1561
1562 op1.xmm64u(0) = float64_reduce(op2, scale, status);
1563
1564 check_exceptionsSSE(get_exception_flags(status));
1565 }
1566 else {
1567 if (i->isZeroMasking())
1568 op1.xmm64u(0) = 0;
1569 else
1570 op1.xmm64u(0) = BX_READ_XMM_REG_LO_QWORD(i->dst());
1571 }
1572
1573 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
1574 BX_NEXT_INSTR(i);
1575 }
1576
1577 #endif
1578