1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse_pfp.cc 13466 2018-02-16 07:57:32Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2003-2018 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28
29 #if BX_CPU_LEVEL >= 6
30
31 #include "fpu/softfloat-compare.h"
32
33 #include "simd_pfp.h"
34 #include "simd_int.h"
35
check_exceptionsSSE(int exceptions_flags)36 void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
37 {
38 exceptions_flags &= MXCSR_EXCEPTIONS;
39 int unmasked = ~(MXCSR.get_exceptions_masks()) & exceptions_flags;
40 // unmasked pre-computational exception detected (#IA, #DE or #DZ)
41 if (unmasked & 0x7) exceptions_flags &= 0x7;
42 MXCSR.set_exceptions(exceptions_flags);
43
44 if (unmasked)
45 {
46 if(BX_CPU_THIS_PTR cr4.get_OSXMMEXCPT())
47 exception(BX_XM_EXCEPTION, 0);
48 else
49 exception(BX_UD_EXCEPTION, 0);
50 }
51 }
52
mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr)53 float_status_t mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr)
54 {
55 float_status_t status;
56
57 status.float_exception_flags = 0; // clear exceptions before execution
58 status.float_nan_handling_mode = float_first_operand_nan;
59 status.float_rounding_mode = mxcsr.get_rounding_mode();
60 // if underflow is masked and FUZ is 1, set it to 1, else to 0
61 status.flush_underflow_to_zero =
62 (mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0;
63 status.float_exception_masks = mxcsr.get_exceptions_masks();
64 status.float_suppress_exception = 0;
65 status.denormals_are_zeros = mxcsr.get_DAZ();
66
67 return status;
68 }
69
70 /* Comparison predicate for CMPSS/CMPPS instructions */
71 static float32_compare_method compare32[8] = {
72 float32_eq_ordered_quiet,
73 float32_lt_ordered_signalling,
74 float32_le_ordered_signalling,
75 float32_unordered_quiet,
76 float32_neq_unordered_quiet,
77 float32_nlt_unordered_signalling,
78 float32_nle_unordered_signalling,
79 float32_ordered_quiet
80 };
81
82 /* Comparison predicate for CMPSD/CMPPD instructions */
83 static float64_compare_method compare64[8] = {
84 float64_eq_ordered_quiet,
85 float64_lt_ordered_signalling,
86 float64_le_ordered_signalling,
87 float64_unordered_quiet,
88 float64_neq_unordered_quiet,
89 float64_nlt_unordered_signalling,
90 float64_nle_unordered_signalling,
91 float64_ordered_quiet
92 };
93
94 #endif // BX_CPU_LEVEL >= 6
95
96 /*
97 * Opcode: 0F 2A
98 * Convert two 32bit signed integers from MMX/MEM to two single precision FP
99 * When a conversion is inexact, the value returned is rounded according
100 * to rounding control bits in MXCSR register.
101 * Possible floating point exceptions: #P
102 */
CVTPI2PS_VpsQqR(bxInstruction_c * i)103 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PS_VpsQqR(bxInstruction_c *i)
104 {
105 #if BX_CPU_LEVEL >= 6
106 /* check floating point status word for a pending FPU exceptions */
107 FPU_check_pending_exceptions();
108
109 BxPackedMmxRegister op = BX_READ_MMX_REG(i->src());
110
111 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
112
113 MMXUD0(op) = int32_to_float32(MMXSD0(op), status);
114 MMXUD1(op) = int32_to_float32(MMXSD1(op), status);
115
116 prepareFPU2MMX(); /* cause FPU2MMX state transition */
117 check_exceptionsSSE(get_exception_flags(status));
118 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), MMXUQ(op));
119 #endif
120
121 BX_NEXT_INSTR(i);
122 }
123
CVTPI2PS_VpsQqM(bxInstruction_c * i)124 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PS_VpsQqM(bxInstruction_c *i)
125 {
126 #if BX_CPU_LEVEL >= 6
127 // do not cause transition to MMX state because no MMX register touched
128 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
129 BxPackedMmxRegister op = read_virtual_qword(i->seg(), eaddr);
130
131 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
132
133 MMXUD0(op) = int32_to_float32(MMXSD0(op), status);
134 MMXUD1(op) = int32_to_float32(MMXSD1(op), status);
135
136 check_exceptionsSSE(get_exception_flags(status));
137 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), MMXUQ(op));
138 #endif
139
140 BX_NEXT_INSTR(i);
141 }
142
143 /*
144 * Opcode: 66 0F 2A
145 * Convert two 32bit signed integers from MMX/MEM to two double precision FP
146 * Possible floating point exceptions: -
147 */
CVTPI2PD_VpdQqR(bxInstruction_c * i)148 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PD_VpdQqR(bxInstruction_c *i)
149 {
150 #if BX_CPU_LEVEL >= 6
151 BxPackedXmmRegister result;
152
153 /* check floating point status word for a pending FPU exceptions */
154 FPU_check_pending_exceptions();
155 prepareFPU2MMX(); /* cause FPU2MMX state transition */
156
157 BxPackedMmxRegister op = BX_READ_MMX_REG(i->src());
158
159 result.xmm64u(0) = int32_to_float64(MMXSD0(op));
160 result.xmm64u(1) = int32_to_float64(MMXSD1(op));
161
162 BX_WRITE_XMM_REG(i->dst(), result);
163 #endif
164
165 BX_NEXT_INSTR(i);
166 }
167
CVTPI2PD_VpdQqM(bxInstruction_c * i)168 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PD_VpdQqM(bxInstruction_c *i)
169 {
170 #if BX_CPU_LEVEL >= 6
171 BxPackedXmmRegister result;
172
173 // do not cause transition to MMX state because no MMX register touched
174 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
175 BxPackedMmxRegister op = read_virtual_qword(i->seg(), eaddr);
176
177 result.xmm64u(0) = int32_to_float64(MMXSD0(op));
178 result.xmm64u(1) = int32_to_float64(MMXSD1(op));
179
180 BX_WRITE_XMM_REG(i->dst(), result);
181 #endif
182
183 BX_NEXT_INSTR(i);
184 }
185
186 /*
187 * Opcode: F2 0F 2A
188 * Convert one 32bit signed integer to one double precision FP
189 * Possible floating point exceptions: -
190 */
CVTSI2SD_VsdEdR(bxInstruction_c * i)191 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SD_VsdEdR(bxInstruction_c *i)
192 {
193 #if BX_CPU_LEVEL >= 6
194 float64 result = int32_to_float64(BX_READ_32BIT_REG(i->src()));
195 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), result);
196 #endif
197
198 BX_NEXT_INSTR(i);
199 }
200
201 #if BX_SUPPORT_X86_64
CVTSI2SD_VsdEqR(bxInstruction_c * i)202 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SD_VsdEqR(bxInstruction_c *i)
203 {
204 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
205 float64 result = int64_to_float64(BX_READ_64BIT_REG(i->src()), status);
206 check_exceptionsSSE(get_exception_flags(status));
207 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), result);
208
209 BX_NEXT_INSTR(i);
210 }
211 #endif
212
213 /*
214 * Opcode: F3 0F 2A
215 * Convert one 32bit signed integer to one single precision FP
216 * When a conversion is inexact, the value returned is rounded according
217 * to rounding control bits in MXCSR register.
218 * Possible floating point exceptions: #P
219 */
CVTSI2SS_VssEdR(bxInstruction_c * i)220 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SS_VssEdR(bxInstruction_c *i)
221 {
222 #if BX_CPU_LEVEL >= 6
223 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
224 float32 result = int32_to_float32(BX_READ_32BIT_REG(i->src()), status);
225 check_exceptionsSSE(get_exception_flags(status));
226 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), result);
227 #endif
228
229 BX_NEXT_INSTR(i);
230 }
231
232 #if BX_SUPPORT_X86_64
CVTSI2SS_VssEqR(bxInstruction_c * i)233 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SS_VssEqR(bxInstruction_c *i)
234 {
235 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
236
237 float32 result = int64_to_float32(BX_READ_64BIT_REG(i->src()), status);
238
239 check_exceptionsSSE(get_exception_flags(status));
240 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), result);
241
242 BX_NEXT_INSTR(i);
243 }
244 #endif
245
246 /*
247 * Opcode: 0F 2C
248 * Convert two single precision FP numbers to two signed doubleword integers
249 * in MMX using truncation if the conversion is inexact
250 * Possible floating point exceptions: #I, #P
251 */
CVTTPS2PI_PqWps(bxInstruction_c * i)252 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2PI_PqWps(bxInstruction_c *i)
253 {
254 #if BX_CPU_LEVEL >= 6
255 /* check floating point status word for a pending FPU exceptions */
256 FPU_check_pending_exceptions();
257
258 BxPackedMmxRegister op;
259
260 /* op is a register or memory reference */
261 if (i->modC0()) {
262 op = BX_READ_XMM_REG_LO_QWORD(i->src());
263 }
264 else {
265 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
266 /* pointer, segment address pair */
267 op = read_virtual_qword(i->seg(), eaddr);
268 }
269
270 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
271
272 MMXSD0(op) = float32_to_int32_round_to_zero(MMXUD0(op), status);
273 MMXSD1(op) = float32_to_int32_round_to_zero(MMXUD1(op), status);
274
275 prepareFPU2MMX(); /* cause FPU2MMX state transition */
276 check_exceptionsSSE(get_exception_flags(status));
277 BX_WRITE_MMX_REG(i->dst(), op);
278 #endif
279
280 BX_NEXT_INSTR(i);
281 }
282
283 /*
284 * Opcode: 66 0F 2C
285 * Convert two double precision FP numbers to two signed doubleword integers
286 * in MMX using truncation if the conversion is inexact
287 * Possible floating point exceptions: #I, #P
288 */
CVTTPD2PI_PqWpd(bxInstruction_c * i)289 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2PI_PqWpd(bxInstruction_c *i)
290 {
291 #if BX_CPU_LEVEL >= 6
292 /* check floating point status word for a pending FPU exceptions */
293 FPU_check_pending_exceptions();
294
295 BxPackedXmmRegister op;
296 BxPackedMmxRegister result;
297
298 /* op is a register or memory reference */
299 if (i->modC0()) {
300 op = BX_READ_XMM_REG(i->src());
301 }
302 else {
303 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
304
305 if (BX_CPU_THIS_PTR mxcsr.get_MM())
306 read_virtual_xmmword(i->seg(), eaddr, &op);
307 else
308 read_virtual_xmmword_aligned(i->seg(), eaddr, &op);
309 }
310
311 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
312
313 MMXSD0(result) = float64_to_int32_round_to_zero(op.xmm64u(0), status);
314 MMXSD1(result) = float64_to_int32_round_to_zero(op.xmm64u(1), status);
315
316 prepareFPU2MMX(); /* cause FPU2MMX state transition */
317 check_exceptionsSSE(get_exception_flags(status));
318 BX_WRITE_MMX_REG(i->dst(), result);
319 #endif
320
321 BX_NEXT_INSTR(i);
322 }
323
324 /*
325 * Opcode: F2 0F 2C
326 * Convert one double precision FP number to doubleword integer using
327 * truncation if the conversion is inexact
328 * Possible floating point exceptions: #I, #P
329 */
CVTTSD2SI_GdWsdR(bxInstruction_c * i)330 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSD2SI_GdWsdR(bxInstruction_c *i)
331 {
332 #if BX_CPU_LEVEL >= 6
333 float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
334
335 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
336 softfloat_status_word_rc_override(status, i);
337 Bit32s result = float64_to_int32_round_to_zero(op, status);
338 check_exceptionsSSE(get_exception_flags(status));
339
340 BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result);
341 #endif
342
343 BX_NEXT_INSTR(i);
344 }
345
346 #if BX_SUPPORT_X86_64
CVTTSD2SI_GqWsdR(bxInstruction_c * i)347 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSD2SI_GqWsdR(bxInstruction_c *i)
348 {
349 float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
350
351 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
352 softfloat_status_word_rc_override(status, i);
353 Bit64s result = float64_to_int64_round_to_zero(op, status);
354 check_exceptionsSSE(get_exception_flags(status));
355
356 BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result);
357 BX_NEXT_INSTR(i);
358 }
359 #endif
360
361 /*
362 * Opcode: F3 0F 2C
363 * Convert one single precision FP number to doubleword integer using
364 * truncation if the conversion is inexact
365 * Possible floating point exceptions: #I, #P
366 */
CVTTSS2SI_GdWssR(bxInstruction_c * i)367 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSS2SI_GdWssR(bxInstruction_c *i)
368 {
369 #if BX_CPU_LEVEL >= 6
370 float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
371
372 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
373 softfloat_status_word_rc_override(status, i);
374 Bit32s result = float32_to_int32_round_to_zero(op, status);
375 check_exceptionsSSE(get_exception_flags(status));
376
377 BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result);
378 #endif
379
380 BX_NEXT_INSTR(i);
381 }
382
383 #if BX_SUPPORT_X86_64
CVTTSS2SI_GqWssR(bxInstruction_c * i)384 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSS2SI_GqWssR(bxInstruction_c *i)
385 {
386 float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
387
388 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
389 softfloat_status_word_rc_override(status, i);
390 Bit64s result = float32_to_int64_round_to_zero(op, status);
391 check_exceptionsSSE(get_exception_flags(status));
392
393 BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result);
394 BX_NEXT_INSTR(i);
395 }
396 #endif
397
398 /*
399 * Opcode: 0F 2D
400 * Convert two single precision FP numbers to two signed doubleword integers
401 * in MMX register. When a conversion is inexact, the value returned is
402 * rounded according to rounding control bits in MXCSR register.
403 * Possible floating point exceptions: #I, #P
404 */
CVTPS2PI_PqWps(bxInstruction_c * i)405 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PI_PqWps(bxInstruction_c *i)
406 {
407 #if BX_CPU_LEVEL >= 6
408 /* check floating point status word for a pending FPU exceptions */
409 FPU_check_pending_exceptions();
410
411 BxPackedMmxRegister op;
412
413 /* op is a register or memory reference */
414 if (i->modC0()) {
415 op = BX_READ_XMM_REG_LO_QWORD(i->src());
416 }
417 else {
418 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
419 /* pointer, segment address pair */
420 op = read_virtual_qword(i->seg(), eaddr);
421 }
422
423 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
424
425 MMXSD0(op) = float32_to_int32(MMXUD0(op), status);
426 MMXSD1(op) = float32_to_int32(MMXUD1(op), status);
427
428 prepareFPU2MMX(); /* cause FPU2MMX state transition */
429 check_exceptionsSSE(get_exception_flags(status));
430 BX_WRITE_MMX_REG(i->dst(), op);
431 #endif
432
433 BX_NEXT_INSTR(i);
434 }
435
436 /*
437 * Opcode: 66 0F 2D
438 * Convert two double precision FP numbers to two signed doubleword integers
439 * in MMX register. When a conversion is inexact, the value returned is
440 * rounded according to rounding control bits in MXCSR register.
441 * Possible floating point exceptions: #I, #P
442 */
CVTPD2PI_PqWpd(bxInstruction_c * i)443 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PI_PqWpd(bxInstruction_c *i)
444 {
445 #if BX_CPU_LEVEL >= 6
446 /* check floating point status word for a pending FPU exceptions */
447 FPU_check_pending_exceptions();
448
449 BxPackedXmmRegister op;
450 BxPackedMmxRegister result;
451
452 /* op is a register or memory reference */
453 if (i->modC0()) {
454 op = BX_READ_XMM_REG(i->src());
455 }
456 else {
457 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
458
459 #if BX_SUPPORT_MISALIGNED_SSE
460 if (BX_CPU_THIS_PTR mxcsr.get_MM())
461 read_virtual_xmmword(i->seg(), eaddr, &op);
462 else
463 #endif
464 read_virtual_xmmword_aligned(i->seg(), eaddr, &op);
465 }
466
467 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
468
469 MMXSD0(result) = float64_to_int32(op.xmm64u(0), status);
470 MMXSD1(result) = float64_to_int32(op.xmm64u(1), status);
471
472 prepareFPU2MMX(); /* cause FPU2MMX state transition */
473 check_exceptionsSSE(get_exception_flags(status));
474 BX_WRITE_MMX_REG(i->dst(), result);
475 #endif
476
477 BX_NEXT_INSTR(i);
478 }
479
480 /*
481 * Opcode: F2 0F 2D
482 * Convert one double precision FP number to doubleword integer
483 * When a conversion is inexact, the value returned is rounded according
484 * to rounding control bits in MXCSR register.
485 * Possible floating point exceptions: #I, #P
486 */
CVTSD2SI_GdWsdR(bxInstruction_c * i)487 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SI_GdWsdR(bxInstruction_c *i)
488 {
489 #if BX_CPU_LEVEL >= 6
490 float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
491
492 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
493 softfloat_status_word_rc_override(status, i);
494 Bit32s result = float64_to_int32(op, status);
495 check_exceptionsSSE(get_exception_flags(status));
496
497 BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result);
498 #endif
499
500 BX_NEXT_INSTR(i);
501 }
502
503 #if BX_SUPPORT_X86_64
CVTSD2SI_GqWsdR(bxInstruction_c * i)504 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SI_GqWsdR(bxInstruction_c *i)
505 {
506 float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
507
508 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
509 softfloat_status_word_rc_override(status, i);
510 Bit64s result = float64_to_int64(op, status);
511 check_exceptionsSSE(get_exception_flags(status));
512
513 BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result);
514 BX_NEXT_INSTR(i);
515 }
516 #endif
517
518 /*
519 * Opcode: F3 0F 2D
520 * Convert one single precision FP number to doubleword integer.
521 * When a conversion is inexact, the value returned is rounded according
522 * to rounding control bits in MXCSR register.
523 * Possible floating point exceptions: #I, #P
524 */
CVTSS2SI_GdWssR(bxInstruction_c * i)525 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SI_GdWssR(bxInstruction_c *i)
526 {
527 #if BX_CPU_LEVEL >= 6
528 float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
529
530 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
531 softfloat_status_word_rc_override(status, i);
532 Bit32s result = float32_to_int32(op, status);
533 check_exceptionsSSE(get_exception_flags(status));
534
535 BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result);
536 #endif
537
538 BX_NEXT_INSTR(i);
539 }
540
541 #if BX_SUPPORT_X86_64
CVTSS2SI_GqWssR(bxInstruction_c * i)542 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SI_GqWssR(bxInstruction_c *i)
543 {
544 float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
545
546 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
547 softfloat_status_word_rc_override(status, i);
548 Bit64s result = float32_to_int64(op, status);
549 check_exceptionsSSE(get_exception_flags(status));
550
551 BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result);
552 BX_NEXT_INSTR(i);
553 }
554 #endif
555
556 /*
557 * Opcode: 0F 5A
558 * Convert two single precision FP numbers to two double precision FP numbers
559 * Possible floating point exceptions: #I, #D
560 */
CVTPS2PD_VpdWpsR(bxInstruction_c * i)561 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PD_VpdWpsR(bxInstruction_c *i)
562 {
563 #if BX_CPU_LEVEL >= 6
564 BxPackedXmmRegister result;
565 BxPackedRegister op;
566
567 // use packed register as 64-bit value with convinient accessors
568 op.u64 = BX_READ_XMM_REG_LO_QWORD(i->src());
569
570 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
571
572 result.xmm64u(0) = float32_to_float64(op.u32(0), status);
573 result.xmm64u(1) = float32_to_float64(op.u32(1), status);
574
575 check_exceptionsSSE(get_exception_flags(status));
576 BX_WRITE_XMM_REG(i->dst(), result);
577 #endif
578
579 BX_NEXT_INSTR(i);
580 }
581
582 /*
583 * Opcode: 66 0F 5A
584 * Convert two double precision FP numbers to two single precision FP.
585 * When a conversion is inexact, the value returned is rounded according
586 * to rounding control bits in MXCSR register.
587 * Possible floating point exceptions: #I, #D, #O, #I, #P
588 */
CVTPD2PS_VpsWpdR(bxInstruction_c * i)589 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PS_VpsWpdR(bxInstruction_c *i)
590 {
591 #if BX_CPU_LEVEL >= 6
592 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
593
594 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
595
596 op.xmm32u(0) = float64_to_float32(op.xmm64u(0), status);
597 op.xmm32u(1) = float64_to_float32(op.xmm64u(1), status);
598 op.xmm64u(1) = 0;
599
600 check_exceptionsSSE(get_exception_flags(status));
601 BX_WRITE_XMM_REG(i->dst(), op);
602 #endif
603
604 BX_NEXT_INSTR(i);
605 }
606
607 /*
608 * Opcode: F2 0F 5A
609 * Convert one double precision FP number to one single precision FP.
610 * When a conversion is inexact, the value returned is rounded according
611 * to rounding control bits in MXCSR register.
612 * Possible floating point exceptions: #I, #D, #O, #I, #P
613 */
CVTSD2SS_VssWsdR(bxInstruction_c * i)614 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SS_VssWsdR(bxInstruction_c *i)
615 {
616 #if BX_CPU_LEVEL >= 6
617 float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
618
619 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
620 float32 result = float64_to_float32(op, status);
621 check_exceptionsSSE(get_exception_flags(status));
622 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), result);
623 #endif
624
625 BX_NEXT_INSTR(i);
626 }
627
628 /*
629 * Opcode: F3 0F 5A
630 * Convert one single precision FP number to one double precision FP.
631 * Possible floating point exceptions: #I, #D
632 */
CVTSS2SD_VsdWssR(bxInstruction_c * i)633 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SD_VsdWssR(bxInstruction_c *i)
634 {
635 #if BX_CPU_LEVEL >= 6
636 float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
637
638 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
639 float64 result = float32_to_float64(op, status);
640 check_exceptionsSSE(get_exception_flags(status));
641 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), result);
642 #endif
643
644 BX_NEXT_INSTR(i);
645 }
646
647 /*
648 * Opcode: 0F 5B
649 * Convert four signed integers to four single precision FP numbers.
650 * When a conversion is inexact, the value returned is rounded according
651 * to rounding control bits in MXCSR register.
652 * Possible floating point exceptions: #P
653 */
CVTDQ2PS_VpsWdqR(bxInstruction_c * i)654 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTDQ2PS_VpsWdqR(bxInstruction_c *i)
655 {
656 #if BX_CPU_LEVEL >= 6
657 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
658
659 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
660
661 op.xmm32u(0) = int32_to_float32(op.xmm32s(0), status);
662 op.xmm32u(1) = int32_to_float32(op.xmm32s(1), status);
663 op.xmm32u(2) = int32_to_float32(op.xmm32s(2), status);
664 op.xmm32u(3) = int32_to_float32(op.xmm32s(3), status);
665
666 check_exceptionsSSE(get_exception_flags(status));
667 BX_WRITE_XMM_REG(i->dst(), op);
668 #endif
669
670 BX_NEXT_INSTR(i);
671 }
672
673 /*
674 * Opcode: 66 0F 5B
675 * Convert four single precision FP to four doubleword integers.
676 * When a conversion is inexact, the value returned is rounded according
677 * to rounding control bits in MXCSR register.
678 * Possible floating point exceptions: #I, #P
679 */
CVTPS2DQ_VdqWpsR(bxInstruction_c * i)680 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2DQ_VdqWpsR(bxInstruction_c *i)
681 {
682 #if BX_CPU_LEVEL >= 6
683 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
684
685 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
686
687 op.xmm32s(0) = float32_to_int32(op.xmm32u(0), status);
688 op.xmm32s(1) = float32_to_int32(op.xmm32u(1), status);
689 op.xmm32s(2) = float32_to_int32(op.xmm32u(2), status);
690 op.xmm32s(3) = float32_to_int32(op.xmm32u(3), status);
691
692 check_exceptionsSSE(get_exception_flags(status));
693 BX_WRITE_XMM_REG(i->dst(), op);
694 #endif
695
696 BX_NEXT_INSTR(i);
697 }
698
699 /*
700 * Opcode: F3 0F 5B
701 * Convert four single precision FP to four doubleword integers using
702 * truncation if the conversion is inexact.
703 * Possible floating point exceptions: #I, #P
704 */
CVTTPS2DQ_VdqWpsR(bxInstruction_c * i)705 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2DQ_VdqWpsR(bxInstruction_c *i)
706 {
707 #if BX_CPU_LEVEL >= 6
708 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
709
710 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
711
712 op.xmm32s(0) = float32_to_int32_round_to_zero(op.xmm32u(0), status);
713 op.xmm32s(1) = float32_to_int32_round_to_zero(op.xmm32u(1), status);
714 op.xmm32s(2) = float32_to_int32_round_to_zero(op.xmm32u(2), status);
715 op.xmm32s(3) = float32_to_int32_round_to_zero(op.xmm32u(3), status);
716
717 check_exceptionsSSE(get_exception_flags(status));
718 BX_WRITE_XMM_REG(i->dst(), op);
719 #endif
720
721 BX_NEXT_INSTR(i);
722 }
723
724 /*
725 * Opcode: 66 0F E6
726 * Convert two double precision FP to two signed doubleword integers using
727 * truncation if the conversion is inexact.
728 * Possible floating point exceptions: #I, #P
729 */
CVTTPD2DQ_VqWpdR(bxInstruction_c * i)730 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2DQ_VqWpdR(bxInstruction_c *i)
731 {
732 #if BX_CPU_LEVEL >= 6
733 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
734
735 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
736
737 op.xmm32s(0) = float64_to_int32_round_to_zero(op.xmm64u(0), status);
738 op.xmm32s(1) = float64_to_int32_round_to_zero(op.xmm64u(1), status);
739 op.xmm64u(1) = 0;
740
741 check_exceptionsSSE(get_exception_flags(status));
742 BX_WRITE_XMM_REG(i->dst(), op);
743 #endif
744
745 BX_NEXT_INSTR(i);
746 }
747
748 /*
749 * Opcode: F2 0F E6
750 * Convert two double precision FP to two signed doubleword integers.
751 * When a conversion is inexact, the value returned is rounded according
752 * to rounding control bits in MXCSR register.
753 * Possible floating point exceptions: #I, #P
754 */
CVTPD2DQ_VqWpdR(bxInstruction_c * i)755 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2DQ_VqWpdR(bxInstruction_c *i)
756 {
757 #if BX_CPU_LEVEL >= 6
758 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
759
760 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
761
762 op.xmm32s(0) = float64_to_int32(op.xmm64u(0), status);
763 op.xmm32s(1) = float64_to_int32(op.xmm64u(1), status);
764 op.xmm64u(1) = 0;
765
766 check_exceptionsSSE(get_exception_flags(status));
767 BX_WRITE_XMM_REG(i->dst(), op);
768 #endif
769
770 BX_NEXT_INSTR(i);
771 }
772
773 /*
774 * Opcode: F3 0F E6
775 * Convert two 32bit signed integers from XMM/MEM to two double precision FP
776 * Possible floating point exceptions: -
777 */
CVTDQ2PD_VpdWqR(bxInstruction_c * i)778 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTDQ2PD_VpdWqR(bxInstruction_c *i)
779 {
780 #if BX_CPU_LEVEL >= 6
781 BxPackedXmmRegister result;
782 BxPackedRegister op;
783
784 // use packed register as 64-bit value with convinient accessors
785 op.u64 = BX_READ_XMM_REG_LO_QWORD(i->src());
786
787 result.xmm64u(0) = int32_to_float64(op.s32(0));
788 result.xmm64u(1) = int32_to_float64(op.s32(1));
789
790 BX_WRITE_XMM_REG(i->dst(), result);
791 #endif
792
793 BX_NEXT_INSTR(i);
794 }
795
796 /*
797 * Opcode: 0F 2E
798 * Compare two single precision FP numbers and set EFLAGS accordintly.
799 * Possible floating point exceptions: #I, #D
800 */
UCOMISS_VssWssR(bxInstruction_c * i)801 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UCOMISS_VssWssR(bxInstruction_c *i)
802 {
803 #if BX_CPU_LEVEL >= 6
804 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
805
806 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
807 softfloat_status_word_rc_override(status, i);
808 int rc = float32_compare_quiet(op1, op2, status);
809 check_exceptionsSSE(get_exception_flags(status));
810 BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
811 #endif
812
813 BX_NEXT_INSTR(i);
814 }
815
816 /*
817 * Opcode: 66 0F 2E
818 * Compare two double precision FP numbers and set EFLAGS accordintly.
819 * Possible floating point exceptions: #I, #D
820 */
UCOMISD_VsdWsdR(bxInstruction_c * i)821 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UCOMISD_VsdWsdR(bxInstruction_c *i)
822 {
823 #if BX_CPU_LEVEL >= 6
824 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
825
826 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
827 softfloat_status_word_rc_override(status, i);
828 int rc = float64_compare_quiet(op1, op2, status);
829 check_exceptionsSSE(get_exception_flags(status));
830 BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
831 #endif
832
833 BX_NEXT_INSTR(i);
834 }
835
836 /*
837 * Opcode: 0F 2F
838 * Compare two single precision FP numbers and set EFLAGS accordintly.
839 * Possible floating point exceptions: #I, #D
840 */
COMISS_VssWssR(bxInstruction_c * i)841 void BX_CPP_AttrRegparmN(1) BX_CPU_C::COMISS_VssWssR(bxInstruction_c *i)
842 {
843 #if BX_CPU_LEVEL >= 6
844 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
845
846 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
847 softfloat_status_word_rc_override(status, i);
848 int rc = float32_compare(op1, op2, status);
849 check_exceptionsSSE(get_exception_flags(status));
850 BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
851 #endif
852
853 BX_NEXT_INSTR(i);
854 }
855
856 /*
857 * Opcode: 66 0F 2F
858 * Compare two double precision FP numbers and set EFLAGS accordintly.
859 * Possible floating point exceptions: #I, #D
860 */
COMISD_VsdWsdR(bxInstruction_c * i)861 void BX_CPP_AttrRegparmN(1) BX_CPU_C::COMISD_VsdWsdR(bxInstruction_c *i)
862 {
863 #if BX_CPU_LEVEL >= 6
864 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
865
866 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
867 softfloat_status_word_rc_override(status, i);
868 int rc = float64_compare(op1, op2, status);
869 check_exceptionsSSE(get_exception_flags(status));
870 BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
871 #endif
872
873 BX_NEXT_INSTR(i);
874 }
875
876 /*
877 * Opcode: 0F 51
878 * Square Root packed single precision.
879 * Possible floating point exceptions: #I, #D, #P
880 */
SQRTPS_VpsWpsR(bxInstruction_c * i)881 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTPS_VpsWpsR(bxInstruction_c *i)
882 {
883 #if BX_CPU_LEVEL >= 6
884 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
885 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
886 xmm_sqrtps(&op, status);
887 check_exceptionsSSE(get_exception_flags(status));
888 BX_WRITE_XMM_REG(i->dst(), op);
889 #endif
890
891 BX_NEXT_INSTR(i);
892 }
893
894 /*
895 * Opcode: 66 0F 51
896 * Square Root packed double precision.
897 * Possible floating point exceptions: #I, #D, #P
898 */
SQRTPD_VpdWpdR(bxInstruction_c * i)899 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTPD_VpdWpdR(bxInstruction_c *i)
900 {
901 #if BX_CPU_LEVEL >= 6
902 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
903 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
904 xmm_sqrtpd(&op, status);
905 check_exceptionsSSE(get_exception_flags(status));
906 BX_WRITE_XMM_REG(i->dst(), op);
907 #endif
908
909 BX_NEXT_INSTR(i);
910 }
911
912 /*
913 * Opcode: F2 0F 51
914 * Square Root scalar double precision.
915 * Possible floating point exceptions: #I, #D, #P
916 */
SQRTSD_VsdWsdR(bxInstruction_c * i)917 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTSD_VsdWsdR(bxInstruction_c *i)
918 {
919 #if BX_CPU_LEVEL >= 6
920 float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
921
922 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
923 op = float64_sqrt(op, status);
924 check_exceptionsSSE(get_exception_flags(status));
925 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op);
926 #endif
927
928 BX_NEXT_INSTR(i);
929 }
930
931 /*
932 * Opcode: F3 0F 51
933 * Square Root scalar single precision.
934 * Possible floating point exceptions: #I, #D, #P
935 */
SQRTSS_VssWssR(bxInstruction_c * i)936 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTSS_VssWssR(bxInstruction_c *i)
937 {
938 #if BX_CPU_LEVEL >= 6
939 float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
940
941 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
942 op = float32_sqrt(op, status);
943 check_exceptionsSSE(get_exception_flags(status));
944 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op);
945 #endif
946
947 BX_NEXT_INSTR(i);
948 }
949
950 /*
951 * Opcode: 0F 58
952 * Add packed single precision FP numbers from XMM2/MEM to XMM1.
953 * Possible floating point exceptions: #I, #D, #O, #U, #P
954 */
ADDPS_VpsWpsR(bxInstruction_c * i)955 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDPS_VpsWpsR(bxInstruction_c *i)
956 {
957 #if BX_CPU_LEVEL >= 6
958 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
959
960 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
961 xmm_addps(&op1, &op2, status);
962 check_exceptionsSSE(get_exception_flags(status));
963
964 BX_WRITE_XMM_REG(i->dst(), op1);
965 #endif
966
967 BX_NEXT_INSTR(i);
968 }
969
970 /*
971 * Opcode: 66 0F 58
972 * Add packed double precision FP numbers from XMM2/MEM to XMM1.
973 * Possible floating point exceptions: #I, #D, #O, #U, #P
974 */
ADDPD_VpdWpdR(bxInstruction_c * i)975 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDPD_VpdWpdR(bxInstruction_c *i)
976 {
977 #if BX_CPU_LEVEL >= 6
978 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
979
980 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
981 xmm_addpd(&op1, &op2, status);
982 check_exceptionsSSE(get_exception_flags(status));
983
984 BX_WRITE_XMM_REG(i->dst(), op1);
985 #endif
986
987 BX_NEXT_INSTR(i);
988 }
989
990 /*
991 * Opcode: F2 0F 58
992 * Add the lower double precision FP number from XMM2/MEM to XMM1.
993 * Possible floating point exceptions: #I, #D, #O, #U, #P
994 */
ADDSD_VsdWsdR(bxInstruction_c * i)995 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSD_VsdWsdR(bxInstruction_c *i)
996 {
997 #if BX_CPU_LEVEL >= 6
998 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
999
1000 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1001 op1 = float64_add(op1, op2, status);
1002 check_exceptionsSSE(get_exception_flags(status));
1003 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1004 #endif
1005
1006 BX_NEXT_INSTR(i);
1007 }
1008
1009 /*
1010 * Opcode: F3 0F 58
1011 * Add the lower single precision FP number from XMM2/MEM to XMM1.
1012 * Possible floating point exceptions: #I, #D, #O, #U, #P
1013 */
ADDSS_VssWssR(bxInstruction_c * i)1014 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSS_VssWssR(bxInstruction_c *i)
1015 {
1016 #if BX_CPU_LEVEL >= 6
1017 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1018
1019 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1020 op1 = float32_add(op1, op2, status);
1021 check_exceptionsSSE(get_exception_flags(status));
1022 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1023 #endif
1024
1025 BX_NEXT_INSTR(i);
1026 }
1027
1028 /*
1029 * Opcode: 0F 59
1030 * Multiply packed single precision FP numbers from XMM2/MEM to XMM1.
1031 * Possible floating point exceptions: #I, #D, #O, #U, #P
1032 */
MULPS_VpsWpsR(bxInstruction_c * i)1033 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULPS_VpsWpsR(bxInstruction_c *i)
1034 {
1035 #if BX_CPU_LEVEL >= 6
1036 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1037
1038 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1039 xmm_mulps(&op1, &op2, status);
1040 check_exceptionsSSE(get_exception_flags(status));
1041
1042 BX_WRITE_XMM_REG(i->dst(), op1);
1043 #endif
1044
1045 BX_NEXT_INSTR(i);
1046 }
1047
1048 /*
1049 * Opcode: 66 0F 59
1050 * Multiply packed double precision FP numbers from XMM2/MEM to XMM1.
1051 * Possible floating point exceptions: #I, #D, #O, #U, #P
1052 */
MULPD_VpdWpdR(bxInstruction_c * i)1053 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULPD_VpdWpdR(bxInstruction_c *i)
1054 {
1055 #if BX_CPU_LEVEL >= 6
1056 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1057
1058 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1059 xmm_mulpd(&op1, &op2, status);
1060 check_exceptionsSSE(get_exception_flags(status));
1061
1062 BX_WRITE_XMM_REG(i->dst(), op1);
1063 #endif
1064
1065 BX_NEXT_INSTR(i);
1066 }
1067
1068 /*
1069 * Opcode: F2 0F 59
1070 * Multiply the lower double precision FP number from XMM2/MEM to XMM1.
1071 * Possible floating point exceptions: #I, #D, #O, #U, #P
1072 */
MULSD_VsdWsdR(bxInstruction_c * i)1073 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULSD_VsdWsdR(bxInstruction_c *i)
1074 {
1075 #if BX_CPU_LEVEL >= 6
1076 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1077
1078 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1079 op1 = float64_mul(op1, op2, status);
1080 check_exceptionsSSE(get_exception_flags(status));
1081 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1082 #endif
1083
1084 BX_NEXT_INSTR(i);
1085 }
1086
1087 /*
1088 * Opcode: F3 0F 59
1089 * Multiply the lower single precision FP number from XMM2/MEM to XMM1.
1090 * Possible floating point exceptions: #I, #D, #O, #U, #P
1091 */
MULSS_VssWssR(bxInstruction_c * i)1092 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULSS_VssWssR(bxInstruction_c *i)
1093 {
1094 #if BX_CPU_LEVEL >= 6
1095 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1096
1097 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1098 op1 = float32_mul(op1, op2, status);
1099 check_exceptionsSSE(get_exception_flags(status));
1100 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1101 #endif
1102
1103 BX_NEXT_INSTR(i);
1104 }
1105
1106 /*
1107 * Opcode: 0F 5C
1108 * Subtract packed single precision FP numbers from XMM2/MEM to XMM1.
1109 * Possible floating point exceptions: #I, #D, #O, #U, #P
1110 */
SUBPS_VpsWpsR(bxInstruction_c * i)1111 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBPS_VpsWpsR(bxInstruction_c *i)
1112 {
1113 #if BX_CPU_LEVEL >= 6
1114 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1115
1116 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1117 xmm_subps(&op1, &op2, status);
1118 check_exceptionsSSE(get_exception_flags(status));
1119
1120 BX_WRITE_XMM_REG(i->dst(), op1);
1121 #endif
1122
1123 BX_NEXT_INSTR(i);
1124 }
1125
1126 /*
1127 * Opcode: 66 0F 5C
1128 * Subtract packed double precision FP numbers from XMM2/MEM to XMM1.
1129 * Possible floating point exceptions: #I, #D, #O, #U, #P
1130 */
SUBPD_VpdWpdR(bxInstruction_c * i)1131 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBPD_VpdWpdR(bxInstruction_c *i)
1132 {
1133 #if BX_CPU_LEVEL >= 6
1134 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1135
1136 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1137 xmm_subpd(&op1, &op2, status);
1138 check_exceptionsSSE(get_exception_flags(status));
1139
1140 BX_WRITE_XMM_REG(i->dst(), op1);
1141 #endif
1142
1143 BX_NEXT_INSTR(i);
1144 }
1145
1146 /*
1147 * Opcode: F2 0F 5C
1148 * Subtract the lower double precision FP number from XMM2/MEM to XMM1.
1149 * Possible floating point exceptions: #I, #D, #O, #U, #P
1150 */
SUBSD_VsdWsdR(bxInstruction_c * i)1151 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBSD_VsdWsdR(bxInstruction_c *i)
1152 {
1153 #if BX_CPU_LEVEL >= 6
1154 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1155
1156 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1157 op1 = float64_sub(op1, op2, status);
1158 check_exceptionsSSE(get_exception_flags(status));
1159 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1160 #endif
1161
1162 BX_NEXT_INSTR(i);
1163 }
1164
1165 /*
1166 * Opcode: F3 0F 5C
1167 * Subtract the lower single precision FP number from XMM2/MEM to XMM1.
1168 * Possible floating point exceptions: #I, #D, #O, #U, #P
1169 */
SUBSS_VssWssR(bxInstruction_c * i)1170 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBSS_VssWssR(bxInstruction_c *i)
1171 {
1172 #if BX_CPU_LEVEL >= 6
1173 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1174
1175 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1176 op1 = float32_sub(op1, op2, status);
1177 check_exceptionsSSE(get_exception_flags(status));
1178 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1179 #endif
1180
1181 BX_NEXT_INSTR(i);
1182 }
1183
1184 /*
1185 * Opcode: 0F 5D
1186 * Calculate the minimum single precision FP between XMM2/MEM to XMM1.
1187 * Possible floating point exceptions: #I, #D
1188 */
MINPS_VpsWpsR(bxInstruction_c * i)1189 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINPS_VpsWpsR(bxInstruction_c *i)
1190 {
1191 #if BX_CPU_LEVEL >= 6
1192 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1193
1194 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1195 xmm_minps(&op1, &op2, status);
1196 check_exceptionsSSE(get_exception_flags(status));
1197
1198 BX_WRITE_XMM_REG(i->dst(), op1);
1199 #endif
1200
1201 BX_NEXT_INSTR(i);
1202 }
1203
1204 /*
1205 * Opcode: 66 0F 5D
1206 * Calculate the minimum double precision FP between XMM2/MEM to XMM1.
1207 * Possible floating point exceptions: #I, #D
1208 */
MINPD_VpdWpdR(bxInstruction_c * i)1209 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINPD_VpdWpdR(bxInstruction_c *i)
1210 {
1211 #if BX_CPU_LEVEL >= 6
1212 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1213
1214 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1215 xmm_minpd(&op1, &op2, status);
1216 check_exceptionsSSE(get_exception_flags(status));
1217
1218 BX_WRITE_XMM_REG(i->dst(), op1);
1219 #endif
1220
1221 BX_NEXT_INSTR(i);
1222 }
1223
1224 /*
1225 * Opcode: F2 0F 5D
1226 * Calculate the minimum scalar double precision FP between XMM2/MEM to XMM1.
1227 * Possible floating point exceptions: #I, #D
1228 */
MINSD_VsdWsdR(bxInstruction_c * i)1229 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINSD_VsdWsdR(bxInstruction_c *i)
1230 {
1231 #if BX_CPU_LEVEL >= 6
1232 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1233
1234 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1235 op1 = float64_min(op1, op2, status);
1236 check_exceptionsSSE(get_exception_flags(status));
1237 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1238 #endif
1239
1240 BX_NEXT_INSTR(i);
1241 }
1242
1243 /*
1244 * Opcode: F3 0F 5D
1245 * Calculate the minimum scalar single precision FP between XMM2/MEM to XMM1.
1246 * Possible floating point exceptions: #I, #D
1247 */
MINSS_VssWssR(bxInstruction_c * i)1248 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINSS_VssWssR(bxInstruction_c *i)
1249 {
1250 #if BX_CPU_LEVEL >= 6
1251 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1252
1253 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1254 op1 = float32_min(op1, op2, status);
1255 check_exceptionsSSE(get_exception_flags(status));
1256 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1257 #endif
1258
1259 BX_NEXT_INSTR(i);
1260 }
1261
1262 /*
1263 * Opcode: 0F 5E
1264 * Divide packed single precision FP numbers from XMM2/MEM to XMM1.
1265 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
1266 */
DIVPS_VpsWpsR(bxInstruction_c * i)1267 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVPS_VpsWpsR(bxInstruction_c *i)
1268 {
1269 #if BX_CPU_LEVEL >= 6
1270 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1271
1272 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1273 xmm_divps(&op1, &op2, status);
1274 check_exceptionsSSE(get_exception_flags(status));
1275
1276 BX_WRITE_XMM_REG(i->dst(), op1);
1277 #endif
1278
1279 BX_NEXT_INSTR(i);
1280 }
1281
1282 /*
1283 * Opcode: 66 0F 5E
1284 * Divide packed double precision FP numbers from XMM2/MEM to XMM1.
1285 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
1286 */
DIVPD_VpdWpdR(bxInstruction_c * i)1287 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVPD_VpdWpdR(bxInstruction_c *i)
1288 {
1289 #if BX_CPU_LEVEL >= 6
1290 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1291
1292 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1293 xmm_divpd(&op1, &op2, status);
1294 check_exceptionsSSE(get_exception_flags(status));
1295
1296 BX_WRITE_XMM_REG(i->dst(), op1);
1297 #endif
1298
1299 BX_NEXT_INSTR(i);
1300 }
1301
1302 /*
1303 * Opcode: F2 0F 5E
1304 * Divide the lower double precision FP number from XMM2/MEM to XMM1.
1305 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
1306 */
DIVSD_VsdWsdR(bxInstruction_c * i)1307 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVSD_VsdWsdR(bxInstruction_c *i)
1308 {
1309 #if BX_CPU_LEVEL >= 6
1310 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1311
1312 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1313 op1 = float64_div(op1, op2, status);
1314 check_exceptionsSSE(get_exception_flags(status));
1315 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1316 #endif
1317
1318 BX_NEXT_INSTR(i);
1319 }
1320
1321 /*
1322 * Opcode: F3 0F 5E
1323 * Divide the lower single precision FP number from XMM2/MEM to XMM1.
1324 * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
1325 */
DIVSS_VssWssR(bxInstruction_c * i)1326 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVSS_VssWssR(bxInstruction_c *i)
1327 {
1328 #if BX_CPU_LEVEL >= 6
1329 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1330
1331 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1332 op1 = float32_div(op1, op2, status);
1333 check_exceptionsSSE(get_exception_flags(status));
1334 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1335 #endif
1336
1337 BX_NEXT_INSTR(i);
1338 }
1339
1340 /*
1341 * Opcode: 0F 5F
1342 * Calculate the maximum single precision FP between XMM2/MEM to XMM1.
1343 * Possible floating point exceptions: #I, #D
1344 */
MAXPS_VpsWpsR(bxInstruction_c * i)1345 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXPS_VpsWpsR(bxInstruction_c *i)
1346 {
1347 #if BX_CPU_LEVEL >= 6
1348 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1349
1350 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1351 xmm_maxps(&op1, &op2, status);
1352 check_exceptionsSSE(get_exception_flags(status));
1353
1354 BX_WRITE_XMM_REG(i->dst(), op1);
1355 #endif
1356
1357 BX_NEXT_INSTR(i);
1358 }
1359
1360 /*
1361 * Opcode: 66 0F 5F
1362 * Calculate the maximum double precision FP between XMM2/MEM to XMM1.
1363 * Possible floating point exceptions: #I, #D
1364 */
MAXPD_VpdWpdR(bxInstruction_c * i)1365 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXPD_VpdWpdR(bxInstruction_c *i)
1366 {
1367 #if BX_CPU_LEVEL >= 6
1368 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1369
1370 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1371 xmm_maxpd(&op1, &op2, status);
1372 check_exceptionsSSE(get_exception_flags(status));
1373
1374 BX_WRITE_XMM_REG(i->dst(), op1);
1375 #endif
1376
1377 BX_NEXT_INSTR(i);
1378 }
1379
1380 /*
1381 * Opcode: F2 0F 5F
1382 * Calculate the maximum scalar double precision FP between XMM2/MEM to XMM1.
1383 * Possible floating point exceptions: #I, #D
1384 */
MAXSD_VsdWsdR(bxInstruction_c * i)1385 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXSD_VsdWsdR(bxInstruction_c *i)
1386 {
1387 #if BX_CPU_LEVEL >= 6
1388 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1389
1390 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1391 op1 = float64_max(op1, op2, status);
1392 check_exceptionsSSE(get_exception_flags(status));
1393 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1394 #endif
1395
1396 BX_NEXT_INSTR(i);
1397 }
1398
1399 /*
1400 * Opcode: F3 0F 5F
1401 * Calculate the maxumim scalar single precision FP between XMM2/MEM to XMM1.
1402 * Possible floating point exceptions: #I, #D
1403 */
MAXSS_VssWssR(bxInstruction_c * i)1404 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXSS_VssWssR(bxInstruction_c *i)
1405 {
1406 #if BX_CPU_LEVEL >= 6
1407 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1408
1409 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1410 op1 = float32_max(op1, op2, status);
1411 check_exceptionsSSE(get_exception_flags(status));
1412 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1413 #endif
1414
1415 BX_NEXT_INSTR(i);
1416 }
1417
1418 /*
1419 * Opcode: 66 0F 7C
1420 * Add horizontally packed double precision FP in XMM2/MEM from XMM1.
1421 * Possible floating point exceptions: #I, #D, #O, #U, #P
1422 */
HADDPD_VpdWpdR(bxInstruction_c * i)1423 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HADDPD_VpdWpdR(bxInstruction_c *i)
1424 {
1425 #if BX_CPU_LEVEL >= 6
1426 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1427
1428 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1429 xmm_haddpd(&op1, &op2, status);
1430 check_exceptionsSSE(get_exception_flags(status));
1431
1432 BX_WRITE_XMM_REG(i->dst(), op1);
1433 #endif
1434
1435 BX_NEXT_INSTR(i);
1436 }
1437
1438 /*
1439 * Opcode: F2 0F 7C
1440 * Add horizontally packed single precision FP in XMM2/MEM from XMM1.
1441 * Possible floating point exceptions: #I, #D, #O, #U, #P
1442 */
HADDPS_VpsWpsR(bxInstruction_c * i)1443 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HADDPS_VpsWpsR(bxInstruction_c *i)
1444 {
1445 #if BX_CPU_LEVEL >= 6
1446 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1447
1448 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1449 xmm_haddps(&op1, &op2, status);
1450 check_exceptionsSSE(get_exception_flags(status));
1451
1452 BX_WRITE_XMM_REG(i->dst(), op1);
1453 #endif
1454
1455 BX_NEXT_INSTR(i);
1456 }
1457
1458 /*
1459 * Opcode: 66 0F 7D
1460 * Subtract horizontally packed double precision FP in XMM2/MEM from XMM1.
1461 * Possible floating point exceptions: #I, #D, #O, #U, #P
1462 */
HSUBPD_VpdWpdR(bxInstruction_c * i)1463 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HSUBPD_VpdWpdR(bxInstruction_c *i)
1464 {
1465 #if BX_CPU_LEVEL >= 6
1466 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1467
1468 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1469 xmm_hsubpd(&op1, &op2, status);
1470 check_exceptionsSSE(get_exception_flags(status));
1471
1472 BX_WRITE_XMM_REG(i->dst(), op1);
1473 #endif
1474
1475 BX_NEXT_INSTR(i);
1476 }
1477
1478 /*
1479 * Opcode: F2 0F 7D
1480 * Subtract horizontally packed single precision FP in XMM2/MEM from XMM1.
1481 * Possible floating point exceptions: #I, #D, #O, #U, #P
1482 */
HSUBPS_VpsWpsR(bxInstruction_c * i)1483 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HSUBPS_VpsWpsR(bxInstruction_c *i)
1484 {
1485 #if BX_CPU_LEVEL >= 6
1486 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1487
1488 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1489 xmm_hsubps(&op1, &op2, status);
1490 check_exceptionsSSE(get_exception_flags(status));
1491
1492 BX_WRITE_XMM_REG(i->dst(), op1);
1493 #endif
1494
1495 BX_NEXT_INSTR(i);
1496 }
1497
1498 /*
1499 * Opcode: 0F C2
1500 * Compare packed single precision FP values using Ib as comparison predicate.
1501 * Possible floating point exceptions: #I, #D
1502 */
CMPPS_VpsWpsIbR(bxInstruction_c * i)1503 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPS_VpsWpsIbR(bxInstruction_c *i)
1504 {
1505 #if BX_CPU_LEVEL >= 6
1506 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1507
1508 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1509 int ib = i->Ib() & 7;
1510
1511 op1.xmm32u(0) = compare32[ib](op1.xmm32u(0), op2.xmm32u(0), status) ? 0xFFFFFFFF : 0;
1512 op1.xmm32u(1) = compare32[ib](op1.xmm32u(1), op2.xmm32u(1), status) ? 0xFFFFFFFF : 0;
1513 op1.xmm32u(2) = compare32[ib](op1.xmm32u(2), op2.xmm32u(2), status) ? 0xFFFFFFFF : 0;
1514 op1.xmm32u(3) = compare32[ib](op1.xmm32u(3), op2.xmm32u(3), status) ? 0xFFFFFFFF : 0;
1515
1516 check_exceptionsSSE(get_exception_flags(status));
1517 BX_WRITE_XMM_REG(i->dst(), op1);
1518 #endif
1519
1520 BX_NEXT_INSTR(i);
1521 }
1522
1523 /*
1524 * Opcode: 66 0F C2
1525 * Compare packed double precision FP values using Ib as comparison predicate.
1526 * Possible floating point exceptions: #I, #D
1527 */
CMPPD_VpdWpdIbR(bxInstruction_c * i)1528 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPD_VpdWpdIbR(bxInstruction_c *i)
1529 {
1530 #if BX_CPU_LEVEL >= 6
1531 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1532
1533 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1534 int ib = i->Ib() & 7;
1535
1536 op1.xmm64u(0) = compare64[ib](op1.xmm64u(0), op2.xmm64u(0), status) ?
1537 BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
1538 op1.xmm64u(1) = compare64[ib](op1.xmm64u(1), op2.xmm64u(1), status) ?
1539 BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
1540
1541 check_exceptionsSSE(get_exception_flags(status));
1542 BX_WRITE_XMM_REG(i->dst(), op1);
1543 #endif
1544
1545 BX_NEXT_INSTR(i);
1546 }
1547
1548 /*
1549 * Opcode: F2 0F C2
1550 * Compare double precision FP values using Ib as comparison predicate.
1551 * Possible floating point exceptions: #I, #D
1552 */
CMPSD_VsdWsdIbR(bxInstruction_c * i)1553 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSD_VsdWsdIbR(bxInstruction_c *i)
1554 {
1555 #if BX_CPU_LEVEL >= 6
1556 float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1557
1558 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1559 int ib = i->Ib() & 7;
1560
1561 if(compare64[ib](op1, op2, status)) {
1562 op1 = BX_CONST64(0xFFFFFFFFFFFFFFFF);
1563 } else {
1564 op1 = 0;
1565 }
1566
1567 check_exceptionsSSE(get_exception_flags(status));
1568 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1569 #endif
1570
1571 BX_NEXT_INSTR(i);
1572 }
1573
1574 /*
1575 * Opcode: F3 0F C2
1576 * Compare single precision FP values using Ib as comparison predicate.
1577 * Possible floating point exceptions: #I, #D
1578 */
CMPSS_VssWssIbR(bxInstruction_c * i)1579 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSS_VssWssIbR(bxInstruction_c *i)
1580 {
1581 #if BX_CPU_LEVEL >= 6
1582 float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1583
1584 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1585 int ib = i->Ib() & 7;
1586
1587 op1 = compare32[ib](op1, op2, status) ? 0xFFFFFFFF : 0;
1588
1589 check_exceptionsSSE(get_exception_flags(status));
1590 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1591 #endif
1592
1593 BX_NEXT_INSTR(i);
1594 }
1595
1596 /*
1597 * Opcode: 66 0F D0
1598 * Add/Subtract packed double precision FP numbers from XMM2/MEM to XMM1.
1599 * Possible floating point exceptions: #I, #D, #O, #U, #P
1600 */
ADDSUBPD_VpdWpdR(bxInstruction_c * i)1601 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSUBPD_VpdWpdR(bxInstruction_c *i)
1602 {
1603 #if BX_CPU_LEVEL >= 6
1604 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1605
1606 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1607 xmm_addsubpd(&op1, &op2, status);
1608 check_exceptionsSSE(get_exception_flags(status));
1609
1610 BX_WRITE_XMM_REG(i->dst(), op1);
1611 #endif
1612
1613 BX_NEXT_INSTR(i);
1614 }
1615
1616 /*
1617 * Opcode: F2 0F D0
1618 * Add/Substract packed single precision FP numbers from XMM2/MEM to XMM1.
1619 * Possible floating point exceptions: #I, #D, #O, #U, #P
1620 */
ADDSUBPS_VpsWpsR(bxInstruction_c * i)1621 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSUBPS_VpsWpsR(bxInstruction_c *i)
1622 {
1623 #if BX_CPU_LEVEL >= 6
1624 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1625
1626 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1627 xmm_addsubps(&op1, &op2, status);
1628 check_exceptionsSSE(get_exception_flags(status));
1629
1630 BX_WRITE_XMM_REG(i->dst(), op1);
1631 #endif
1632
1633 BX_NEXT_INSTR(i);
1634 }
1635
1636 #if BX_CPU_LEVEL >= 6
1637
1638 /* 66 0F 3A 08 */
ROUNDPS_VpsWpsIbR(bxInstruction_c * i)1639 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDPS_VpsWpsIbR(bxInstruction_c *i)
1640 {
1641 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1642
1643 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1644 Bit8u control = i->Ib();
1645
1646 // override MXCSR rounding mode with control coming from imm8
1647 if ((control & 0x4) == 0)
1648 status.float_rounding_mode = control & 0x3;
1649 // ignore precision exception result
1650 if (control & 0x8)
1651 status.float_suppress_exception |= float_flag_inexact;
1652
1653 op.xmm32u(0) = float32_round_to_int(op.xmm32u(0), status);
1654 op.xmm32u(1) = float32_round_to_int(op.xmm32u(1), status);
1655 op.xmm32u(2) = float32_round_to_int(op.xmm32u(2), status);
1656 op.xmm32u(3) = float32_round_to_int(op.xmm32u(3), status);
1657
1658 check_exceptionsSSE(get_exception_flags(status));
1659 BX_WRITE_XMM_REG(i->dst(), op);
1660
1661 BX_NEXT_INSTR(i);
1662 }
1663
1664 /* 66 0F 3A 09 */
ROUNDPD_VpdWpdIbR(bxInstruction_c * i)1665 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDPD_VpdWpdIbR(bxInstruction_c *i)
1666 {
1667 BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1668
1669 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1670 Bit8u control = i->Ib();
1671
1672 // override MXCSR rounding mode with control coming from imm8
1673 if ((control & 0x4) == 0)
1674 status.float_rounding_mode = control & 0x3;
1675 // ignore precision exception result
1676 if (control & 0x8)
1677 status.float_suppress_exception |= float_flag_inexact;
1678
1679 op.xmm64u(0) = float64_round_to_int(op.xmm64u(0), status);
1680 op.xmm64u(1) = float64_round_to_int(op.xmm64u(1), status);
1681
1682 check_exceptionsSSE(get_exception_flags(status));
1683 BX_WRITE_XMM_REG(i->dst(), op);
1684
1685 BX_NEXT_INSTR(i);
1686 }
1687
1688 /* 66 0F 3A 0A */
ROUNDSS_VssWssIbR(bxInstruction_c * i)1689 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSS_VssWssIbR(bxInstruction_c *i)
1690 {
1691 float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
1692
1693 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1694 Bit8u control = i->Ib();
1695
1696 // override MXCSR rounding mode with control coming from imm8
1697 if ((control & 0x4) == 0)
1698 status.float_rounding_mode = control & 0x3;
1699 // ignore precision exception result
1700 if (control & 0x8)
1701 status.float_suppress_exception |= float_flag_inexact;
1702
1703 op = float32_round_to_int(op, status);
1704
1705 check_exceptionsSSE(get_exception_flags(status));
1706 BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op);
1707
1708 BX_NEXT_INSTR(i);
1709 }
1710
1711 /* 66 0F 3A 0B */
ROUNDSD_VsdWsdIbR(bxInstruction_c * i)1712 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSD_VsdWsdIbR(bxInstruction_c *i)
1713 {
1714 float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
1715
1716 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1717 Bit8u control = i->Ib();
1718
1719 // override MXCSR rounding mode with control coming from imm8
1720 if ((control & 0x4) == 0)
1721 status.float_rounding_mode = control & 0x3;
1722 // ignore precision exception result
1723 if (control & 0x8)
1724 status.float_suppress_exception |= float_flag_inexact;
1725
1726 op = float64_round_to_int(op, status);
1727
1728 check_exceptionsSSE(get_exception_flags(status));
1729 BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op);
1730
1731 BX_NEXT_INSTR(i);
1732 }
1733
1734 /* Opcode: 66 0F 3A 40
1735 * Selectively multiply packed SP floating-point values from xmm1 with
1736 * packed SP floating-point values from xmm2, add and selectively
1737 * store the packed SP floating-point values or zero values to xmm1
1738 */
DPPS_VpsWpsIbR(bxInstruction_c * i)1739 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIbR(bxInstruction_c *i)
1740 {
1741 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst());
1742 BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src());
1743 Bit8u mask = i->Ib();
1744
1745 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1746
1747 // op1: [A, B, C, D]
1748 // op2: [E, F, G, H]
1749
1750 // after multiplication: op1 = [EA, BF, CG, DH]
1751 xmm_mulps_mask(&op1, &op2, status, mask >> 4);
1752 check_exceptionsSSE(get_exception_flags(status));
1753
1754 // shuffle op2 = [BF, AE, DH, CG]
1755 xmm_shufps(&op2, &op1, &op1, 0xb1);
1756
1757 // op2 = [(BF+AE), (AE+BF), (DH+CG), (CG+DH)]
1758 xmm_addps(&op2, &op1, status);
1759 check_exceptionsSSE(get_exception_flags(status));
1760
1761 // shuffle op1 = [(DH+CG), (CG+DH), (BF+AE), (AE+BF)]
1762 xmm_shufpd(&op1, &op2, &op2, 0x1);
1763
1764 // op2 = [(BF+AE)+(DH+CG), (AE+BF)+(CG+DH), (DH+CG)+(BF+AE), (CG+DH)+(AE+BF)]
1765 xmm_addps_mask(&op2, &op1, status, mask);
1766 check_exceptionsSSE(get_exception_flags(status));
1767
1768 BX_WRITE_XMM_REG(i->dst(), op2);
1769
1770 BX_NEXT_INSTR(i);
1771 }
1772
1773 /* Opcode: 66 0F 3A 41
1774 * Selectively multiply packed DP floating-point values from xmm1 with
1775 * packed DP floating-point values from xmm2, add and selectively
1776 * store the packed DP floating-point values or zero values to xmm1
1777 */
DPPD_VpdHpdWpdIbR(bxInstruction_c * i)1778 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPD_VpdHpdWpdIbR(bxInstruction_c *i)
1779 {
1780 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1781 BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
1782 Bit8u mask = i->Ib();
1783
1784 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1785
1786 // op1: [A, B]
1787 // op2: [C, D]
1788
1789 // after multiplication: op1 = [AC, BD]
1790 xmm_mulpd_mask(&op1, &op2, status, mask >> 4);
1791 check_exceptionsSSE(get_exception_flags(status));
1792
1793 // shuffle op2 = [BD, AC]
1794 xmm_shufpd(&op2, &op1, &op1, 0x1);
1795
1796 // op1 = [AC+BD, BD+AC]
1797 xmm_addpd_mask(&op1, &op2, status, mask);
1798 check_exceptionsSSE(get_exception_flags(status));
1799
1800 BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
1801
1802 BX_NEXT_INSTR(i);
1803 }
1804
1805 #endif // BX_CPU_LEVEL >= 6
1806