1 /////////////////////////////////////////////////////////////////////////
2 // $Id: sse_pfp.cc 13466 2018-02-16 07:57:32Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 //   Copyright (c) 2003-2018 Stanislav Shwartsman
6 //          Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 //  You should have received a copy of the GNU Lesser General Public
19 //  License along with this library; if not, write to the Free Software
20 //  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23 
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28 
29 #if BX_CPU_LEVEL >= 6
30 
31 #include "fpu/softfloat-compare.h"
32 
33 #include "simd_pfp.h"
34 #include "simd_int.h"
35 
check_exceptionsSSE(int exceptions_flags)36 void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
37 {
38   exceptions_flags &= MXCSR_EXCEPTIONS;
39   int unmasked = ~(MXCSR.get_exceptions_masks()) & exceptions_flags;
40   // unmasked pre-computational exception detected (#IA, #DE or #DZ)
41   if (unmasked & 0x7) exceptions_flags &= 0x7;
42   MXCSR.set_exceptions(exceptions_flags);
43 
44   if (unmasked)
45   {
46      if(BX_CPU_THIS_PTR cr4.get_OSXMMEXCPT())
47         exception(BX_XM_EXCEPTION, 0);
48      else
49         exception(BX_UD_EXCEPTION, 0);
50   }
51 }
52 
mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr)53 float_status_t mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr)
54 {
55   float_status_t status;
56 
57   status.float_exception_flags = 0; // clear exceptions before execution
58   status.float_nan_handling_mode = float_first_operand_nan;
59   status.float_rounding_mode = mxcsr.get_rounding_mode();
60   // if underflow is masked and FUZ is 1, set it to 1, else to 0
61   status.flush_underflow_to_zero =
62        (mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0;
63   status.float_exception_masks = mxcsr.get_exceptions_masks();
64   status.float_suppress_exception = 0;
65   status.denormals_are_zeros = mxcsr.get_DAZ();
66 
67   return status;
68 }
69 
70 /* Comparison predicate for CMPSS/CMPPS instructions */
71 static float32_compare_method compare32[8] = {
72   float32_eq_ordered_quiet,
73   float32_lt_ordered_signalling,
74   float32_le_ordered_signalling,
75   float32_unordered_quiet,
76   float32_neq_unordered_quiet,
77   float32_nlt_unordered_signalling,
78   float32_nle_unordered_signalling,
79   float32_ordered_quiet
80 };
81 
82 /* Comparison predicate for CMPSD/CMPPD instructions */
83 static float64_compare_method compare64[8] = {
84   float64_eq_ordered_quiet,
85   float64_lt_ordered_signalling,
86   float64_le_ordered_signalling,
87   float64_unordered_quiet,
88   float64_neq_unordered_quiet,
89   float64_nlt_unordered_signalling,
90   float64_nle_unordered_signalling,
91   float64_ordered_quiet
92 };
93 
94 #endif // BX_CPU_LEVEL >= 6
95 
96 /*
97  * Opcode: 0F 2A
98  * Convert two 32bit signed integers from MMX/MEM to two single precision FP
99  * When a conversion is inexact, the value returned is rounded according
100  * to rounding control bits in MXCSR register.
101  * Possible floating point exceptions: #P
102  */
CVTPI2PS_VpsQqR(bxInstruction_c * i)103 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PS_VpsQqR(bxInstruction_c *i)
104 {
105 #if BX_CPU_LEVEL >= 6
106   /* check floating point status word for a pending FPU exceptions */
107   FPU_check_pending_exceptions();
108 
109   BxPackedMmxRegister op = BX_READ_MMX_REG(i->src());
110 
111   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
112 
113   MMXUD0(op) = int32_to_float32(MMXSD0(op), status);
114   MMXUD1(op) = int32_to_float32(MMXSD1(op), status);
115 
116   prepareFPU2MMX(); /* cause FPU2MMX state transition */
117   check_exceptionsSSE(get_exception_flags(status));
118   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), MMXUQ(op));
119 #endif
120 
121   BX_NEXT_INSTR(i);
122 }
123 
CVTPI2PS_VpsQqM(bxInstruction_c * i)124 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PS_VpsQqM(bxInstruction_c *i)
125 {
126 #if BX_CPU_LEVEL >= 6
127   // do not cause transition to MMX state because no MMX register touched
128   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
129   BxPackedMmxRegister op = read_virtual_qword(i->seg(), eaddr);
130 
131   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
132 
133   MMXUD0(op) = int32_to_float32(MMXSD0(op), status);
134   MMXUD1(op) = int32_to_float32(MMXSD1(op), status);
135 
136   check_exceptionsSSE(get_exception_flags(status));
137   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), MMXUQ(op));
138 #endif
139 
140   BX_NEXT_INSTR(i);
141 }
142 
143 /*
144  * Opcode: 66 0F 2A
145  * Convert two 32bit signed integers from MMX/MEM to two double precision FP
146  * Possible floating point exceptions: -
147  */
CVTPI2PD_VpdQqR(bxInstruction_c * i)148 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PD_VpdQqR(bxInstruction_c *i)
149 {
150 #if BX_CPU_LEVEL >= 6
151   BxPackedXmmRegister result;
152 
153   /* check floating point status word for a pending FPU exceptions */
154   FPU_check_pending_exceptions();
155   prepareFPU2MMX(); /* cause FPU2MMX state transition */
156 
157   BxPackedMmxRegister op = BX_READ_MMX_REG(i->src());
158 
159   result.xmm64u(0) = int32_to_float64(MMXSD0(op));
160   result.xmm64u(1) = int32_to_float64(MMXSD1(op));
161 
162   BX_WRITE_XMM_REG(i->dst(), result);
163 #endif
164 
165   BX_NEXT_INSTR(i);
166 }
167 
CVTPI2PD_VpdQqM(bxInstruction_c * i)168 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPI2PD_VpdQqM(bxInstruction_c *i)
169 {
170 #if BX_CPU_LEVEL >= 6
171   BxPackedXmmRegister result;
172 
173   // do not cause transition to MMX state because no MMX register touched
174   bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
175   BxPackedMmxRegister op = read_virtual_qword(i->seg(), eaddr);
176 
177   result.xmm64u(0) = int32_to_float64(MMXSD0(op));
178   result.xmm64u(1) = int32_to_float64(MMXSD1(op));
179 
180   BX_WRITE_XMM_REG(i->dst(), result);
181 #endif
182 
183   BX_NEXT_INSTR(i);
184 }
185 
186 /*
187  * Opcode: F2 0F 2A
188  * Convert one 32bit signed integer to one double precision FP
189  * Possible floating point exceptions: -
190  */
CVTSI2SD_VsdEdR(bxInstruction_c * i)191 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SD_VsdEdR(bxInstruction_c *i)
192 {
193 #if BX_CPU_LEVEL >= 6
194   float64 result = int32_to_float64(BX_READ_32BIT_REG(i->src()));
195   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), result);
196 #endif
197 
198   BX_NEXT_INSTR(i);
199 }
200 
201 #if BX_SUPPORT_X86_64
CVTSI2SD_VsdEqR(bxInstruction_c * i)202 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SD_VsdEqR(bxInstruction_c *i)
203 {
204   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
205   float64 result = int64_to_float64(BX_READ_64BIT_REG(i->src()), status);
206   check_exceptionsSSE(get_exception_flags(status));
207   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), result);
208 
209   BX_NEXT_INSTR(i);
210 }
211 #endif
212 
213 /*
214  * Opcode: F3 0F 2A
215  * Convert one 32bit signed integer to one single precision FP
216  * When a conversion is inexact, the value returned is rounded according
217  * to rounding control bits in MXCSR register.
218  * Possible floating point exceptions: #P
219  */
CVTSI2SS_VssEdR(bxInstruction_c * i)220 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SS_VssEdR(bxInstruction_c *i)
221 {
222 #if BX_CPU_LEVEL >= 6
223   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
224   float32 result = int32_to_float32(BX_READ_32BIT_REG(i->src()), status);
225   check_exceptionsSSE(get_exception_flags(status));
226   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), result);
227 #endif
228 
229   BX_NEXT_INSTR(i);
230 }
231 
232 #if BX_SUPPORT_X86_64
CVTSI2SS_VssEqR(bxInstruction_c * i)233 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSI2SS_VssEqR(bxInstruction_c *i)
234 {
235   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
236 
237   float32 result = int64_to_float32(BX_READ_64BIT_REG(i->src()), status);
238 
239   check_exceptionsSSE(get_exception_flags(status));
240   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), result);
241 
242   BX_NEXT_INSTR(i);
243 }
244 #endif
245 
246 /*
247  * Opcode: 0F 2C
248  * Convert two single precision FP numbers to two signed doubleword integers
249  * in MMX using truncation if the conversion is inexact
250  * Possible floating point exceptions: #I, #P
251  */
CVTTPS2PI_PqWps(bxInstruction_c * i)252 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2PI_PqWps(bxInstruction_c *i)
253 {
254 #if BX_CPU_LEVEL >= 6
255   /* check floating point status word for a pending FPU exceptions */
256   FPU_check_pending_exceptions();
257 
258   BxPackedMmxRegister op;
259 
260   /* op is a register or memory reference */
261   if (i->modC0()) {
262     op = BX_READ_XMM_REG_LO_QWORD(i->src());
263   }
264   else {
265     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
266     /* pointer, segment address pair */
267     op = read_virtual_qword(i->seg(), eaddr);
268   }
269 
270   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
271 
272   MMXSD0(op) = float32_to_int32_round_to_zero(MMXUD0(op), status);
273   MMXSD1(op) = float32_to_int32_round_to_zero(MMXUD1(op), status);
274 
275   prepareFPU2MMX(); /* cause FPU2MMX state transition */
276   check_exceptionsSSE(get_exception_flags(status));
277   BX_WRITE_MMX_REG(i->dst(), op);
278 #endif
279 
280   BX_NEXT_INSTR(i);
281 }
282 
283 /*
284  * Opcode: 66 0F 2C
285  * Convert two double precision FP numbers to two signed doubleword integers
286  * in MMX using truncation if the conversion is inexact
287  * Possible floating point exceptions: #I, #P
288  */
CVTTPD2PI_PqWpd(bxInstruction_c * i)289 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2PI_PqWpd(bxInstruction_c *i)
290 {
291 #if BX_CPU_LEVEL >= 6
292   /* check floating point status word for a pending FPU exceptions */
293   FPU_check_pending_exceptions();
294 
295   BxPackedXmmRegister op;
296   BxPackedMmxRegister result;
297 
298   /* op is a register or memory reference */
299   if (i->modC0()) {
300     op = BX_READ_XMM_REG(i->src());
301   }
302   else {
303     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
304 
305     if (BX_CPU_THIS_PTR mxcsr.get_MM())
306       read_virtual_xmmword(i->seg(), eaddr, &op);
307     else
308       read_virtual_xmmword_aligned(i->seg(), eaddr, &op);
309   }
310 
311   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
312 
313   MMXSD0(result) = float64_to_int32_round_to_zero(op.xmm64u(0), status);
314   MMXSD1(result) = float64_to_int32_round_to_zero(op.xmm64u(1), status);
315 
316   prepareFPU2MMX(); /* cause FPU2MMX state transition */
317   check_exceptionsSSE(get_exception_flags(status));
318   BX_WRITE_MMX_REG(i->dst(), result);
319 #endif
320 
321   BX_NEXT_INSTR(i);
322 }
323 
324 /*
325  * Opcode: F2 0F 2C
326  * Convert one double precision FP number to doubleword integer using
327  * truncation if the conversion is inexact
328  * Possible floating point exceptions: #I, #P
329  */
CVTTSD2SI_GdWsdR(bxInstruction_c * i)330 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSD2SI_GdWsdR(bxInstruction_c *i)
331 {
332 #if BX_CPU_LEVEL >= 6
333   float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
334 
335   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
336   softfloat_status_word_rc_override(status, i);
337   Bit32s result = float64_to_int32_round_to_zero(op, status);
338   check_exceptionsSSE(get_exception_flags(status));
339 
340   BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result);
341 #endif
342 
343   BX_NEXT_INSTR(i);
344 }
345 
346 #if BX_SUPPORT_X86_64
CVTTSD2SI_GqWsdR(bxInstruction_c * i)347 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSD2SI_GqWsdR(bxInstruction_c *i)
348 {
349   float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
350 
351   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
352   softfloat_status_word_rc_override(status, i);
353   Bit64s result = float64_to_int64_round_to_zero(op, status);
354   check_exceptionsSSE(get_exception_flags(status));
355 
356   BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result);
357   BX_NEXT_INSTR(i);
358 }
359 #endif
360 
361 /*
362  * Opcode: F3 0F 2C
363  * Convert one single precision FP number to doubleword integer using
364  * truncation if the conversion is inexact
365  * Possible floating point exceptions: #I, #P
366  */
CVTTSS2SI_GdWssR(bxInstruction_c * i)367 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSS2SI_GdWssR(bxInstruction_c *i)
368 {
369 #if BX_CPU_LEVEL >= 6
370   float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
371 
372   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
373   softfloat_status_word_rc_override(status, i);
374   Bit32s result = float32_to_int32_round_to_zero(op, status);
375   check_exceptionsSSE(get_exception_flags(status));
376 
377   BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result);
378 #endif
379 
380   BX_NEXT_INSTR(i);
381 }
382 
383 #if BX_SUPPORT_X86_64
CVTTSS2SI_GqWssR(bxInstruction_c * i)384 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTSS2SI_GqWssR(bxInstruction_c *i)
385 {
386   float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
387 
388   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
389   softfloat_status_word_rc_override(status, i);
390   Bit64s result = float32_to_int64_round_to_zero(op, status);
391   check_exceptionsSSE(get_exception_flags(status));
392 
393   BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result);
394   BX_NEXT_INSTR(i);
395 }
396 #endif
397 
398 /*
399  * Opcode: 0F 2D
400  * Convert two single precision FP numbers to two signed doubleword integers
401  * in MMX register. When a conversion is inexact, the value returned is
402  * rounded according to rounding control bits in MXCSR register.
403  * Possible floating point exceptions: #I, #P
404  */
CVTPS2PI_PqWps(bxInstruction_c * i)405 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PI_PqWps(bxInstruction_c *i)
406 {
407 #if BX_CPU_LEVEL >= 6
408   /* check floating point status word for a pending FPU exceptions */
409   FPU_check_pending_exceptions();
410 
411   BxPackedMmxRegister op;
412 
413   /* op is a register or memory reference */
414   if (i->modC0()) {
415     op = BX_READ_XMM_REG_LO_QWORD(i->src());
416   }
417   else {
418     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
419     /* pointer, segment address pair */
420     op = read_virtual_qword(i->seg(), eaddr);
421   }
422 
423   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
424 
425   MMXSD0(op) = float32_to_int32(MMXUD0(op), status);
426   MMXSD1(op) = float32_to_int32(MMXUD1(op), status);
427 
428   prepareFPU2MMX(); /* cause FPU2MMX state transition */
429   check_exceptionsSSE(get_exception_flags(status));
430   BX_WRITE_MMX_REG(i->dst(), op);
431 #endif
432 
433   BX_NEXT_INSTR(i);
434 }
435 
436 /*
437  * Opcode: 66 0F 2D
438  * Convert two double precision FP numbers to two signed doubleword integers
439  * in MMX register. When a conversion is inexact, the value returned is
440  * rounded according to rounding control bits in MXCSR register.
441  * Possible floating point exceptions: #I, #P
442  */
CVTPD2PI_PqWpd(bxInstruction_c * i)443 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PI_PqWpd(bxInstruction_c *i)
444 {
445 #if BX_CPU_LEVEL >= 6
446   /* check floating point status word for a pending FPU exceptions */
447   FPU_check_pending_exceptions();
448 
449   BxPackedXmmRegister op;
450   BxPackedMmxRegister result;
451 
452   /* op is a register or memory reference */
453   if (i->modC0()) {
454     op = BX_READ_XMM_REG(i->src());
455   }
456   else {
457     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
458 
459 #if BX_SUPPORT_MISALIGNED_SSE
460     if (BX_CPU_THIS_PTR mxcsr.get_MM())
461       read_virtual_xmmword(i->seg(), eaddr, &op);
462     else
463 #endif
464       read_virtual_xmmword_aligned(i->seg(), eaddr, &op);
465   }
466 
467   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
468 
469   MMXSD0(result) = float64_to_int32(op.xmm64u(0), status);
470   MMXSD1(result) = float64_to_int32(op.xmm64u(1), status);
471 
472   prepareFPU2MMX(); /* cause FPU2MMX state transition */
473   check_exceptionsSSE(get_exception_flags(status));
474   BX_WRITE_MMX_REG(i->dst(), result);
475 #endif
476 
477   BX_NEXT_INSTR(i);
478 }
479 
480 /*
481  * Opcode: F2 0F 2D
482  * Convert one double precision FP number to doubleword integer
483  * When a conversion is inexact, the value returned is rounded according
484  * to rounding control bits in MXCSR register.
485  * Possible floating point exceptions: #I, #P
486  */
CVTSD2SI_GdWsdR(bxInstruction_c * i)487 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SI_GdWsdR(bxInstruction_c *i)
488 {
489 #if BX_CPU_LEVEL >= 6
490   float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
491 
492   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
493   softfloat_status_word_rc_override(status, i);
494   Bit32s result = float64_to_int32(op, status);
495   check_exceptionsSSE(get_exception_flags(status));
496 
497   BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result);
498 #endif
499 
500   BX_NEXT_INSTR(i);
501 }
502 
503 #if BX_SUPPORT_X86_64
CVTSD2SI_GqWsdR(bxInstruction_c * i)504 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SI_GqWsdR(bxInstruction_c *i)
505 {
506   float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
507 
508   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
509   softfloat_status_word_rc_override(status, i);
510   Bit64s result = float64_to_int64(op, status);
511   check_exceptionsSSE(get_exception_flags(status));
512 
513   BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result);
514   BX_NEXT_INSTR(i);
515 }
516 #endif
517 
518 /*
519  * Opcode: F3 0F 2D
520  * Convert one single precision FP number to doubleword integer.
521  * When a conversion is inexact, the value returned is rounded according
522  * to rounding control bits in MXCSR register.
523  * Possible floating point exceptions: #I, #P
524  */
CVTSS2SI_GdWssR(bxInstruction_c * i)525 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SI_GdWssR(bxInstruction_c *i)
526 {
527 #if BX_CPU_LEVEL >= 6
528   float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
529 
530   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
531   softfloat_status_word_rc_override(status, i);
532   Bit32s result = float32_to_int32(op, status);
533   check_exceptionsSSE(get_exception_flags(status));
534 
535   BX_WRITE_32BIT_REGZ(i->dst(), (Bit32u) result);
536 #endif
537 
538   BX_NEXT_INSTR(i);
539 }
540 
541 #if BX_SUPPORT_X86_64
CVTSS2SI_GqWssR(bxInstruction_c * i)542 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SI_GqWssR(bxInstruction_c *i)
543 {
544   float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
545 
546   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
547   softfloat_status_word_rc_override(status, i);
548   Bit64s result = float32_to_int64(op, status);
549   check_exceptionsSSE(get_exception_flags(status));
550 
551   BX_WRITE_64BIT_REG(i->dst(), (Bit64u) result);
552   BX_NEXT_INSTR(i);
553 }
554 #endif
555 
556 /*
557  * Opcode: 0F 5A
558  * Convert two single precision FP numbers to two double precision FP numbers
559  * Possible floating point exceptions: #I, #D
560  */
CVTPS2PD_VpdWpsR(bxInstruction_c * i)561 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2PD_VpdWpsR(bxInstruction_c *i)
562 {
563 #if BX_CPU_LEVEL >= 6
564   BxPackedXmmRegister result;
565   BxPackedRegister op;
566 
567   // use packed register as 64-bit value with convinient accessors
568   op.u64 = BX_READ_XMM_REG_LO_QWORD(i->src());
569 
570   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
571 
572   result.xmm64u(0) = float32_to_float64(op.u32(0), status);
573   result.xmm64u(1) = float32_to_float64(op.u32(1), status);
574 
575   check_exceptionsSSE(get_exception_flags(status));
576   BX_WRITE_XMM_REG(i->dst(), result);
577 #endif
578 
579   BX_NEXT_INSTR(i);
580 }
581 
582 /*
583  * Opcode: 66 0F 5A
584  * Convert two double precision FP numbers to two single precision FP.
585  * When a conversion is inexact, the value returned is rounded according
586  * to rounding control bits in MXCSR register.
587  * Possible floating point exceptions: #I, #D, #O, #I, #P
588  */
CVTPD2PS_VpsWpdR(bxInstruction_c * i)589 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2PS_VpsWpdR(bxInstruction_c *i)
590 {
591 #if BX_CPU_LEVEL >= 6
592   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
593 
594   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
595 
596   op.xmm32u(0) = float64_to_float32(op.xmm64u(0), status);
597   op.xmm32u(1) = float64_to_float32(op.xmm64u(1), status);
598   op.xmm64u(1) = 0;
599 
600   check_exceptionsSSE(get_exception_flags(status));
601   BX_WRITE_XMM_REG(i->dst(), op);
602 #endif
603 
604   BX_NEXT_INSTR(i);
605 }
606 
607 /*
608  * Opcode: F2 0F 5A
609  * Convert one double precision FP number to one single precision FP.
610  * When a conversion is inexact, the value returned is rounded according
611  * to rounding control bits in MXCSR register.
612  * Possible floating point exceptions: #I, #D, #O, #I, #P
613  */
CVTSD2SS_VssWsdR(bxInstruction_c * i)614 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSD2SS_VssWsdR(bxInstruction_c *i)
615 {
616 #if BX_CPU_LEVEL >= 6
617   float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
618 
619   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
620   float32 result = float64_to_float32(op, status);
621   check_exceptionsSSE(get_exception_flags(status));
622   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), result);
623 #endif
624 
625   BX_NEXT_INSTR(i);
626 }
627 
628 /*
629  * Opcode: F3 0F 5A
630  * Convert one single precision FP number to one double precision FP.
631  * Possible floating point exceptions: #I, #D
632  */
CVTSS2SD_VsdWssR(bxInstruction_c * i)633 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTSS2SD_VsdWssR(bxInstruction_c *i)
634 {
635 #if BX_CPU_LEVEL >= 6
636   float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
637 
638   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
639   float64 result = float32_to_float64(op, status);
640   check_exceptionsSSE(get_exception_flags(status));
641   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), result);
642 #endif
643 
644   BX_NEXT_INSTR(i);
645 }
646 
647 /*
648  * Opcode: 0F 5B
649  * Convert four signed integers to four single precision FP numbers.
650  * When a conversion is inexact, the value returned is rounded according
651  * to rounding control bits in MXCSR register.
652  * Possible floating point exceptions: #P
653  */
CVTDQ2PS_VpsWdqR(bxInstruction_c * i)654 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTDQ2PS_VpsWdqR(bxInstruction_c *i)
655 {
656 #if BX_CPU_LEVEL >= 6
657   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
658 
659   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
660 
661   op.xmm32u(0) = int32_to_float32(op.xmm32s(0), status);
662   op.xmm32u(1) = int32_to_float32(op.xmm32s(1), status);
663   op.xmm32u(2) = int32_to_float32(op.xmm32s(2), status);
664   op.xmm32u(3) = int32_to_float32(op.xmm32s(3), status);
665 
666   check_exceptionsSSE(get_exception_flags(status));
667   BX_WRITE_XMM_REG(i->dst(), op);
668 #endif
669 
670   BX_NEXT_INSTR(i);
671 }
672 
673 /*
674  * Opcode: 66 0F 5B
675  * Convert four single precision FP to four doubleword integers.
676  * When a conversion is inexact, the value returned is rounded according
677  * to rounding control bits in MXCSR register.
678  * Possible floating point exceptions: #I, #P
679  */
CVTPS2DQ_VdqWpsR(bxInstruction_c * i)680 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPS2DQ_VdqWpsR(bxInstruction_c *i)
681 {
682 #if BX_CPU_LEVEL >= 6
683   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
684 
685   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
686 
687   op.xmm32s(0) = float32_to_int32(op.xmm32u(0), status);
688   op.xmm32s(1) = float32_to_int32(op.xmm32u(1), status);
689   op.xmm32s(2) = float32_to_int32(op.xmm32u(2), status);
690   op.xmm32s(3) = float32_to_int32(op.xmm32u(3), status);
691 
692   check_exceptionsSSE(get_exception_flags(status));
693   BX_WRITE_XMM_REG(i->dst(), op);
694 #endif
695 
696   BX_NEXT_INSTR(i);
697 }
698 
699 /*
700  * Opcode: F3 0F 5B
701  * Convert four single precision FP to four doubleword integers using
702  * truncation if the conversion is inexact.
703  * Possible floating point exceptions: #I, #P
704  */
CVTTPS2DQ_VdqWpsR(bxInstruction_c * i)705 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPS2DQ_VdqWpsR(bxInstruction_c *i)
706 {
707 #if BX_CPU_LEVEL >= 6
708   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
709 
710   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
711 
712   op.xmm32s(0) = float32_to_int32_round_to_zero(op.xmm32u(0), status);
713   op.xmm32s(1) = float32_to_int32_round_to_zero(op.xmm32u(1), status);
714   op.xmm32s(2) = float32_to_int32_round_to_zero(op.xmm32u(2), status);
715   op.xmm32s(3) = float32_to_int32_round_to_zero(op.xmm32u(3), status);
716 
717   check_exceptionsSSE(get_exception_flags(status));
718   BX_WRITE_XMM_REG(i->dst(), op);
719 #endif
720 
721   BX_NEXT_INSTR(i);
722 }
723 
724 /*
725  * Opcode: 66 0F E6
726  * Convert two double precision FP to two signed doubleword integers using
727  * truncation if the conversion is inexact.
728  * Possible floating point exceptions: #I, #P
729  */
CVTTPD2DQ_VqWpdR(bxInstruction_c * i)730 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTTPD2DQ_VqWpdR(bxInstruction_c *i)
731 {
732 #if BX_CPU_LEVEL >= 6
733   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
734 
735   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
736 
737   op.xmm32s(0) = float64_to_int32_round_to_zero(op.xmm64u(0), status);
738   op.xmm32s(1) = float64_to_int32_round_to_zero(op.xmm64u(1), status);
739   op.xmm64u(1) = 0;
740 
741   check_exceptionsSSE(get_exception_flags(status));
742   BX_WRITE_XMM_REG(i->dst(), op);
743 #endif
744 
745   BX_NEXT_INSTR(i);
746 }
747 
748 /*
749  * Opcode: F2 0F E6
750  * Convert two double precision FP to two signed doubleword integers.
751  * When a conversion is inexact, the value returned is rounded according
752  * to rounding control bits in MXCSR register.
753  * Possible floating point exceptions: #I, #P
754  */
CVTPD2DQ_VqWpdR(bxInstruction_c * i)755 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTPD2DQ_VqWpdR(bxInstruction_c *i)
756 {
757 #if BX_CPU_LEVEL >= 6
758   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
759 
760   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
761 
762   op.xmm32s(0) = float64_to_int32(op.xmm64u(0), status);
763   op.xmm32s(1) = float64_to_int32(op.xmm64u(1), status);
764   op.xmm64u(1) = 0;
765 
766   check_exceptionsSSE(get_exception_flags(status));
767   BX_WRITE_XMM_REG(i->dst(), op);
768 #endif
769 
770   BX_NEXT_INSTR(i);
771 }
772 
773 /*
774  * Opcode: F3 0F E6
775  * Convert two 32bit signed integers from XMM/MEM to two double precision FP
776  * Possible floating point exceptions: -
777  */
CVTDQ2PD_VpdWqR(bxInstruction_c * i)778 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CVTDQ2PD_VpdWqR(bxInstruction_c *i)
779 {
780 #if BX_CPU_LEVEL >= 6
781   BxPackedXmmRegister result;
782   BxPackedRegister op;
783 
784   // use packed register as 64-bit value with convinient accessors
785   op.u64 = BX_READ_XMM_REG_LO_QWORD(i->src());
786 
787   result.xmm64u(0) = int32_to_float64(op.s32(0));
788   result.xmm64u(1) = int32_to_float64(op.s32(1));
789 
790   BX_WRITE_XMM_REG(i->dst(), result);
791 #endif
792 
793   BX_NEXT_INSTR(i);
794 }
795 
796 /*
797  * Opcode: 0F 2E
798  * Compare two single precision FP numbers and set EFLAGS accordintly.
799  * Possible floating point exceptions: #I, #D
800  */
UCOMISS_VssWssR(bxInstruction_c * i)801 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UCOMISS_VssWssR(bxInstruction_c *i)
802 {
803 #if BX_CPU_LEVEL >= 6
804   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
805 
806   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
807   softfloat_status_word_rc_override(status, i);
808   int rc = float32_compare_quiet(op1, op2, status);
809   check_exceptionsSSE(get_exception_flags(status));
810   BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
811 #endif
812 
813   BX_NEXT_INSTR(i);
814 }
815 
816 /*
817  * Opcode: 66 0F 2E
818  * Compare two double precision FP numbers and set EFLAGS accordintly.
819  * Possible floating point exceptions: #I, #D
820  */
UCOMISD_VsdWsdR(bxInstruction_c * i)821 void BX_CPP_AttrRegparmN(1) BX_CPU_C::UCOMISD_VsdWsdR(bxInstruction_c *i)
822 {
823 #if BX_CPU_LEVEL >= 6
824   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
825 
826   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
827   softfloat_status_word_rc_override(status, i);
828   int rc = float64_compare_quiet(op1, op2, status);
829   check_exceptionsSSE(get_exception_flags(status));
830   BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
831 #endif
832 
833   BX_NEXT_INSTR(i);
834 }
835 
836 /*
837  * Opcode: 0F 2F
838  * Compare two single precision FP numbers and set EFLAGS accordintly.
839  * Possible floating point exceptions: #I, #D
840  */
COMISS_VssWssR(bxInstruction_c * i)841 void BX_CPP_AttrRegparmN(1) BX_CPU_C::COMISS_VssWssR(bxInstruction_c *i)
842 {
843 #if BX_CPU_LEVEL >= 6
844   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
845 
846   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
847   softfloat_status_word_rc_override(status, i);
848   int rc = float32_compare(op1, op2, status);
849   check_exceptionsSSE(get_exception_flags(status));
850   BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
851 #endif
852 
853   BX_NEXT_INSTR(i);
854 }
855 
856 /*
857  * Opcode: 66 0F 2F
858  * Compare two double precision FP numbers and set EFLAGS accordintly.
859  * Possible floating point exceptions: #I, #D
860  */
COMISD_VsdWsdR(bxInstruction_c * i)861 void BX_CPP_AttrRegparmN(1) BX_CPU_C::COMISD_VsdWsdR(bxInstruction_c *i)
862 {
863 #if BX_CPU_LEVEL >= 6
864   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
865 
866   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
867   softfloat_status_word_rc_override(status, i);
868   int rc = float64_compare(op1, op2, status);
869   check_exceptionsSSE(get_exception_flags(status));
870   BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
871 #endif
872 
873   BX_NEXT_INSTR(i);
874 }
875 
876 /*
877  * Opcode: 0F 51
878  * Square Root packed single precision.
879  * Possible floating point exceptions: #I, #D, #P
880  */
SQRTPS_VpsWpsR(bxInstruction_c * i)881 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTPS_VpsWpsR(bxInstruction_c *i)
882 {
883 #if BX_CPU_LEVEL >= 6
884   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
885   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
886   xmm_sqrtps(&op, status);
887   check_exceptionsSSE(get_exception_flags(status));
888   BX_WRITE_XMM_REG(i->dst(), op);
889 #endif
890 
891   BX_NEXT_INSTR(i);
892 }
893 
894 /*
895  * Opcode: 66 0F 51
896  * Square Root packed double precision.
897  * Possible floating point exceptions: #I, #D, #P
898  */
SQRTPD_VpdWpdR(bxInstruction_c * i)899 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTPD_VpdWpdR(bxInstruction_c *i)
900 {
901 #if BX_CPU_LEVEL >= 6
902   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
903   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
904   xmm_sqrtpd(&op, status);
905   check_exceptionsSSE(get_exception_flags(status));
906   BX_WRITE_XMM_REG(i->dst(), op);
907 #endif
908 
909   BX_NEXT_INSTR(i);
910 }
911 
912 /*
913  * Opcode: F2 0F 51
914  * Square Root scalar double precision.
915  * Possible floating point exceptions: #I, #D, #P
916  */
SQRTSD_VsdWsdR(bxInstruction_c * i)917 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTSD_VsdWsdR(bxInstruction_c *i)
918 {
919 #if BX_CPU_LEVEL >= 6
920   float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
921 
922   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
923   op = float64_sqrt(op, status);
924   check_exceptionsSSE(get_exception_flags(status));
925   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op);
926 #endif
927 
928   BX_NEXT_INSTR(i);
929 }
930 
931 /*
932  * Opcode: F3 0F 51
933  * Square Root scalar single precision.
934  * Possible floating point exceptions: #I, #D, #P
935  */
SQRTSS_VssWssR(bxInstruction_c * i)936 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SQRTSS_VssWssR(bxInstruction_c *i)
937 {
938 #if BX_CPU_LEVEL >= 6
939   float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
940 
941   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
942   op = float32_sqrt(op, status);
943   check_exceptionsSSE(get_exception_flags(status));
944   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op);
945 #endif
946 
947   BX_NEXT_INSTR(i);
948 }
949 
950 /*
951  * Opcode: 0F 58
952  * Add packed single precision FP numbers from XMM2/MEM to XMM1.
953  * Possible floating point exceptions: #I, #D, #O, #U, #P
954  */
ADDPS_VpsWpsR(bxInstruction_c * i)955 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDPS_VpsWpsR(bxInstruction_c *i)
956 {
957 #if BX_CPU_LEVEL >= 6
958   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
959 
960   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
961   xmm_addps(&op1, &op2, status);
962   check_exceptionsSSE(get_exception_flags(status));
963 
964   BX_WRITE_XMM_REG(i->dst(), op1);
965 #endif
966 
967   BX_NEXT_INSTR(i);
968 }
969 
970 /*
971  * Opcode: 66 0F 58
972  * Add packed double precision FP numbers from XMM2/MEM to XMM1.
973  * Possible floating point exceptions: #I, #D, #O, #U, #P
974  */
ADDPD_VpdWpdR(bxInstruction_c * i)975 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDPD_VpdWpdR(bxInstruction_c *i)
976 {
977 #if BX_CPU_LEVEL >= 6
978   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
979 
980   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
981   xmm_addpd(&op1, &op2, status);
982   check_exceptionsSSE(get_exception_flags(status));
983 
984   BX_WRITE_XMM_REG(i->dst(), op1);
985 #endif
986 
987   BX_NEXT_INSTR(i);
988 }
989 
990 /*
991  * Opcode: F2 0F 58
992  * Add the lower double precision FP number from XMM2/MEM to XMM1.
993  * Possible floating point exceptions: #I, #D, #O, #U, #P
994  */
ADDSD_VsdWsdR(bxInstruction_c * i)995 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSD_VsdWsdR(bxInstruction_c *i)
996 {
997 #if BX_CPU_LEVEL >= 6
998   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
999 
1000   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1001   op1 = float64_add(op1, op2, status);
1002   check_exceptionsSSE(get_exception_flags(status));
1003   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1004 #endif
1005 
1006   BX_NEXT_INSTR(i);
1007 }
1008 
1009 /*
1010  * Opcode: F3 0F 58
1011  * Add the lower single precision FP number from XMM2/MEM to XMM1.
1012  * Possible floating point exceptions: #I, #D, #O, #U, #P
1013  */
ADDSS_VssWssR(bxInstruction_c * i)1014 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSS_VssWssR(bxInstruction_c *i)
1015 {
1016 #if BX_CPU_LEVEL >= 6
1017   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1018 
1019   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1020   op1 = float32_add(op1, op2, status);
1021   check_exceptionsSSE(get_exception_flags(status));
1022   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1023 #endif
1024 
1025   BX_NEXT_INSTR(i);
1026 }
1027 
1028 /*
1029  * Opcode: 0F 59
1030  * Multiply packed single precision FP numbers from XMM2/MEM to XMM1.
1031  * Possible floating point exceptions: #I, #D, #O, #U, #P
1032  */
MULPS_VpsWpsR(bxInstruction_c * i)1033 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULPS_VpsWpsR(bxInstruction_c *i)
1034 {
1035 #if BX_CPU_LEVEL >= 6
1036   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1037 
1038   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1039   xmm_mulps(&op1, &op2, status);
1040   check_exceptionsSSE(get_exception_flags(status));
1041 
1042   BX_WRITE_XMM_REG(i->dst(), op1);
1043 #endif
1044 
1045   BX_NEXT_INSTR(i);
1046 }
1047 
1048 /*
1049  * Opcode: 66 0F 59
1050  * Multiply packed double precision FP numbers from XMM2/MEM to XMM1.
1051  * Possible floating point exceptions: #I, #D, #O, #U, #P
1052  */
MULPD_VpdWpdR(bxInstruction_c * i)1053 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULPD_VpdWpdR(bxInstruction_c *i)
1054 {
1055 #if BX_CPU_LEVEL >= 6
1056   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1057 
1058   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1059   xmm_mulpd(&op1, &op2, status);
1060   check_exceptionsSSE(get_exception_flags(status));
1061 
1062   BX_WRITE_XMM_REG(i->dst(), op1);
1063 #endif
1064 
1065   BX_NEXT_INSTR(i);
1066 }
1067 
1068 /*
1069  * Opcode: F2 0F 59
1070  * Multiply the lower double precision FP number from XMM2/MEM to XMM1.
1071  * Possible floating point exceptions: #I, #D, #O, #U, #P
1072  */
MULSD_VsdWsdR(bxInstruction_c * i)1073 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULSD_VsdWsdR(bxInstruction_c *i)
1074 {
1075 #if BX_CPU_LEVEL >= 6
1076   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1077 
1078   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1079   op1 = float64_mul(op1, op2, status);
1080   check_exceptionsSSE(get_exception_flags(status));
1081   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1082 #endif
1083 
1084   BX_NEXT_INSTR(i);
1085 }
1086 
1087 /*
1088  * Opcode: F3 0F 59
1089  * Multiply the lower single precision FP number from XMM2/MEM to XMM1.
1090  * Possible floating point exceptions: #I, #D, #O, #U, #P
1091  */
MULSS_VssWssR(bxInstruction_c * i)1092 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MULSS_VssWssR(bxInstruction_c *i)
1093 {
1094 #if BX_CPU_LEVEL >= 6
1095   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1096 
1097   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1098   op1 = float32_mul(op1, op2, status);
1099   check_exceptionsSSE(get_exception_flags(status));
1100   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1101 #endif
1102 
1103   BX_NEXT_INSTR(i);
1104 }
1105 
1106 /*
1107  * Opcode: 0F 5C
1108  * Subtract packed single precision FP numbers from XMM2/MEM to XMM1.
1109  * Possible floating point exceptions: #I, #D, #O, #U, #P
1110  */
SUBPS_VpsWpsR(bxInstruction_c * i)1111 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBPS_VpsWpsR(bxInstruction_c *i)
1112 {
1113 #if BX_CPU_LEVEL >= 6
1114   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1115 
1116   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1117   xmm_subps(&op1, &op2, status);
1118   check_exceptionsSSE(get_exception_flags(status));
1119 
1120   BX_WRITE_XMM_REG(i->dst(), op1);
1121 #endif
1122 
1123   BX_NEXT_INSTR(i);
1124 }
1125 
1126 /*
1127  * Opcode: 66 0F 5C
1128  * Subtract packed double precision FP numbers from XMM2/MEM to XMM1.
1129  * Possible floating point exceptions: #I, #D, #O, #U, #P
1130  */
SUBPD_VpdWpdR(bxInstruction_c * i)1131 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBPD_VpdWpdR(bxInstruction_c *i)
1132 {
1133 #if BX_CPU_LEVEL >= 6
1134   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1135 
1136   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1137   xmm_subpd(&op1, &op2, status);
1138   check_exceptionsSSE(get_exception_flags(status));
1139 
1140   BX_WRITE_XMM_REG(i->dst(), op1);
1141 #endif
1142 
1143   BX_NEXT_INSTR(i);
1144 }
1145 
1146 /*
1147  * Opcode: F2 0F 5C
1148  * Subtract the lower double precision FP number from XMM2/MEM to XMM1.
1149  * Possible floating point exceptions: #I, #D, #O, #U, #P
1150  */
SUBSD_VsdWsdR(bxInstruction_c * i)1151 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBSD_VsdWsdR(bxInstruction_c *i)
1152 {
1153 #if BX_CPU_LEVEL >= 6
1154   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1155 
1156   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1157   op1 = float64_sub(op1, op2, status);
1158   check_exceptionsSSE(get_exception_flags(status));
1159   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1160 #endif
1161 
1162   BX_NEXT_INSTR(i);
1163 }
1164 
1165 /*
1166  * Opcode: F3 0F 5C
1167  * Subtract the lower single precision FP number from XMM2/MEM to XMM1.
1168  * Possible floating point exceptions: #I, #D, #O, #U, #P
1169  */
SUBSS_VssWssR(bxInstruction_c * i)1170 void BX_CPP_AttrRegparmN(1) BX_CPU_C::SUBSS_VssWssR(bxInstruction_c *i)
1171 {
1172 #if BX_CPU_LEVEL >= 6
1173   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1174 
1175   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1176   op1 = float32_sub(op1, op2, status);
1177   check_exceptionsSSE(get_exception_flags(status));
1178   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1179 #endif
1180 
1181   BX_NEXT_INSTR(i);
1182 }
1183 
1184 /*
1185  * Opcode: 0F 5D
1186  * Calculate the minimum single precision FP between XMM2/MEM to XMM1.
1187  * Possible floating point exceptions: #I, #D
1188  */
MINPS_VpsWpsR(bxInstruction_c * i)1189 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINPS_VpsWpsR(bxInstruction_c *i)
1190 {
1191 #if BX_CPU_LEVEL >= 6
1192   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1193 
1194   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1195   xmm_minps(&op1, &op2, status);
1196   check_exceptionsSSE(get_exception_flags(status));
1197 
1198   BX_WRITE_XMM_REG(i->dst(), op1);
1199 #endif
1200 
1201   BX_NEXT_INSTR(i);
1202 }
1203 
1204 /*
1205  * Opcode: 66 0F 5D
1206  * Calculate the minimum double precision FP between XMM2/MEM to XMM1.
1207  * Possible floating point exceptions: #I, #D
1208  */
MINPD_VpdWpdR(bxInstruction_c * i)1209 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINPD_VpdWpdR(bxInstruction_c *i)
1210 {
1211 #if BX_CPU_LEVEL >= 6
1212   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1213 
1214   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1215   xmm_minpd(&op1, &op2, status);
1216   check_exceptionsSSE(get_exception_flags(status));
1217 
1218   BX_WRITE_XMM_REG(i->dst(), op1);
1219 #endif
1220 
1221   BX_NEXT_INSTR(i);
1222 }
1223 
1224 /*
1225  * Opcode: F2 0F 5D
1226  * Calculate the minimum scalar double precision FP between XMM2/MEM to XMM1.
1227  * Possible floating point exceptions: #I, #D
1228  */
MINSD_VsdWsdR(bxInstruction_c * i)1229 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINSD_VsdWsdR(bxInstruction_c *i)
1230 {
1231 #if BX_CPU_LEVEL >= 6
1232   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1233 
1234   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1235   op1 = float64_min(op1, op2, status);
1236   check_exceptionsSSE(get_exception_flags(status));
1237   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1238 #endif
1239 
1240   BX_NEXT_INSTR(i);
1241 }
1242 
1243 /*
1244  * Opcode: F3 0F 5D
1245  * Calculate the minimum scalar single precision FP between XMM2/MEM to XMM1.
1246  * Possible floating point exceptions: #I, #D
1247  */
MINSS_VssWssR(bxInstruction_c * i)1248 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MINSS_VssWssR(bxInstruction_c *i)
1249 {
1250 #if BX_CPU_LEVEL >= 6
1251   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1252 
1253   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1254   op1 = float32_min(op1, op2, status);
1255   check_exceptionsSSE(get_exception_flags(status));
1256   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1257 #endif
1258 
1259   BX_NEXT_INSTR(i);
1260 }
1261 
1262 /*
1263  * Opcode: 0F 5E
1264  * Divide packed single precision FP numbers from XMM2/MEM to XMM1.
1265  * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
1266  */
DIVPS_VpsWpsR(bxInstruction_c * i)1267 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVPS_VpsWpsR(bxInstruction_c *i)
1268 {
1269 #if BX_CPU_LEVEL >= 6
1270   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1271 
1272   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1273   xmm_divps(&op1, &op2, status);
1274   check_exceptionsSSE(get_exception_flags(status));
1275 
1276   BX_WRITE_XMM_REG(i->dst(), op1);
1277 #endif
1278 
1279   BX_NEXT_INSTR(i);
1280 }
1281 
1282 /*
1283  * Opcode: 66 0F 5E
1284  * Divide packed double precision FP numbers from XMM2/MEM to XMM1.
1285  * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
1286  */
DIVPD_VpdWpdR(bxInstruction_c * i)1287 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVPD_VpdWpdR(bxInstruction_c *i)
1288 {
1289 #if BX_CPU_LEVEL >= 6
1290   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1291 
1292   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1293   xmm_divpd(&op1, &op2, status);
1294   check_exceptionsSSE(get_exception_flags(status));
1295 
1296   BX_WRITE_XMM_REG(i->dst(), op1);
1297 #endif
1298 
1299   BX_NEXT_INSTR(i);
1300 }
1301 
1302 /*
1303  * Opcode: F2 0F 5E
1304  * Divide the lower double precision FP number from XMM2/MEM to XMM1.
1305  * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
1306  */
DIVSD_VsdWsdR(bxInstruction_c * i)1307 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVSD_VsdWsdR(bxInstruction_c *i)
1308 {
1309 #if BX_CPU_LEVEL >= 6
1310   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1311 
1312   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1313   op1 = float64_div(op1, op2, status);
1314   check_exceptionsSSE(get_exception_flags(status));
1315   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1316 #endif
1317 
1318   BX_NEXT_INSTR(i);
1319 }
1320 
1321 /*
1322  * Opcode: F3 0F 5E
1323  * Divide the lower single precision FP number from XMM2/MEM to XMM1.
1324  * Possible floating point exceptions: #I, #D, #Z, #O, #U, #P
1325  */
DIVSS_VssWssR(bxInstruction_c * i)1326 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DIVSS_VssWssR(bxInstruction_c *i)
1327 {
1328 #if BX_CPU_LEVEL >= 6
1329   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1330 
1331   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1332   op1 = float32_div(op1, op2, status);
1333   check_exceptionsSSE(get_exception_flags(status));
1334   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1335 #endif
1336 
1337   BX_NEXT_INSTR(i);
1338 }
1339 
1340 /*
1341  * Opcode: 0F 5F
1342  * Calculate the maximum single precision FP between XMM2/MEM to XMM1.
1343  * Possible floating point exceptions: #I, #D
1344  */
MAXPS_VpsWpsR(bxInstruction_c * i)1345 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXPS_VpsWpsR(bxInstruction_c *i)
1346 {
1347 #if BX_CPU_LEVEL >= 6
1348   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1349 
1350   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1351   xmm_maxps(&op1, &op2, status);
1352   check_exceptionsSSE(get_exception_flags(status));
1353 
1354   BX_WRITE_XMM_REG(i->dst(), op1);
1355 #endif
1356 
1357   BX_NEXT_INSTR(i);
1358 }
1359 
1360 /*
1361  * Opcode: 66 0F 5F
1362  * Calculate the maximum double precision FP between XMM2/MEM to XMM1.
1363  * Possible floating point exceptions: #I, #D
1364  */
MAXPD_VpdWpdR(bxInstruction_c * i)1365 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXPD_VpdWpdR(bxInstruction_c *i)
1366 {
1367 #if BX_CPU_LEVEL >= 6
1368   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1369 
1370   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1371   xmm_maxpd(&op1, &op2, status);
1372   check_exceptionsSSE(get_exception_flags(status));
1373 
1374   BX_WRITE_XMM_REG(i->dst(), op1);
1375 #endif
1376 
1377   BX_NEXT_INSTR(i);
1378 }
1379 
1380 /*
1381  * Opcode: F2 0F 5F
1382  * Calculate the maximum scalar double precision FP between XMM2/MEM to XMM1.
1383  * Possible floating point exceptions: #I, #D
1384  */
MAXSD_VsdWsdR(bxInstruction_c * i)1385 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXSD_VsdWsdR(bxInstruction_c *i)
1386 {
1387 #if BX_CPU_LEVEL >= 6
1388   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1389 
1390   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1391   op1 = float64_max(op1, op2, status);
1392   check_exceptionsSSE(get_exception_flags(status));
1393   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1394 #endif
1395 
1396   BX_NEXT_INSTR(i);
1397 }
1398 
1399 /*
1400  * Opcode: F3 0F 5F
1401  * Calculate the maxumim scalar single precision FP between XMM2/MEM to XMM1.
1402  * Possible floating point exceptions: #I, #D
1403  */
MAXSS_VssWssR(bxInstruction_c * i)1404 void BX_CPP_AttrRegparmN(1) BX_CPU_C::MAXSS_VssWssR(bxInstruction_c *i)
1405 {
1406 #if BX_CPU_LEVEL >= 6
1407   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1408 
1409   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1410   op1 = float32_max(op1, op2, status);
1411   check_exceptionsSSE(get_exception_flags(status));
1412   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1413 #endif
1414 
1415   BX_NEXT_INSTR(i);
1416 }
1417 
1418 /*
1419  * Opcode: 66 0F 7C
1420  * Add horizontally packed double precision FP in XMM2/MEM from XMM1.
1421  * Possible floating point exceptions: #I, #D, #O, #U, #P
1422  */
HADDPD_VpdWpdR(bxInstruction_c * i)1423 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HADDPD_VpdWpdR(bxInstruction_c *i)
1424 {
1425 #if BX_CPU_LEVEL >= 6
1426   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1427 
1428   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1429   xmm_haddpd(&op1, &op2, status);
1430   check_exceptionsSSE(get_exception_flags(status));
1431 
1432   BX_WRITE_XMM_REG(i->dst(), op1);
1433 #endif
1434 
1435   BX_NEXT_INSTR(i);
1436 }
1437 
1438 /*
1439  * Opcode: F2 0F 7C
1440  * Add horizontally packed single precision FP in XMM2/MEM from XMM1.
1441  * Possible floating point exceptions: #I, #D, #O, #U, #P
1442  */
HADDPS_VpsWpsR(bxInstruction_c * i)1443 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HADDPS_VpsWpsR(bxInstruction_c *i)
1444 {
1445 #if BX_CPU_LEVEL >= 6
1446   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1447 
1448   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1449   xmm_haddps(&op1, &op2, status);
1450   check_exceptionsSSE(get_exception_flags(status));
1451 
1452   BX_WRITE_XMM_REG(i->dst(), op1);
1453 #endif
1454 
1455   BX_NEXT_INSTR(i);
1456 }
1457 
1458 /*
1459  * Opcode: 66 0F 7D
1460  * Subtract horizontally packed double precision FP in XMM2/MEM from XMM1.
1461  * Possible floating point exceptions: #I, #D, #O, #U, #P
1462  */
HSUBPD_VpdWpdR(bxInstruction_c * i)1463 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HSUBPD_VpdWpdR(bxInstruction_c *i)
1464 {
1465 #if BX_CPU_LEVEL >= 6
1466   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1467 
1468   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1469   xmm_hsubpd(&op1, &op2, status);
1470   check_exceptionsSSE(get_exception_flags(status));
1471 
1472   BX_WRITE_XMM_REG(i->dst(), op1);
1473 #endif
1474 
1475   BX_NEXT_INSTR(i);
1476 }
1477 
1478 /*
1479  * Opcode: F2 0F 7D
1480  * Subtract horizontally packed single precision FP in XMM2/MEM from XMM1.
1481  * Possible floating point exceptions: #I, #D, #O, #U, #P
1482  */
HSUBPS_VpsWpsR(bxInstruction_c * i)1483 void BX_CPP_AttrRegparmN(1) BX_CPU_C::HSUBPS_VpsWpsR(bxInstruction_c *i)
1484 {
1485 #if BX_CPU_LEVEL >= 6
1486   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1487 
1488   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1489   xmm_hsubps(&op1, &op2, status);
1490   check_exceptionsSSE(get_exception_flags(status));
1491 
1492   BX_WRITE_XMM_REG(i->dst(), op1);
1493 #endif
1494 
1495   BX_NEXT_INSTR(i);
1496 }
1497 
1498 /*
1499  * Opcode: 0F C2
1500  * Compare packed single precision FP values using Ib as comparison predicate.
1501  * Possible floating point exceptions: #I, #D
1502  */
CMPPS_VpsWpsIbR(bxInstruction_c * i)1503 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPS_VpsWpsIbR(bxInstruction_c *i)
1504 {
1505 #if BX_CPU_LEVEL >= 6
1506   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1507 
1508   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1509   int ib = i->Ib() & 7;
1510 
1511   op1.xmm32u(0) = compare32[ib](op1.xmm32u(0), op2.xmm32u(0), status) ? 0xFFFFFFFF : 0;
1512   op1.xmm32u(1) = compare32[ib](op1.xmm32u(1), op2.xmm32u(1), status) ? 0xFFFFFFFF : 0;
1513   op1.xmm32u(2) = compare32[ib](op1.xmm32u(2), op2.xmm32u(2), status) ? 0xFFFFFFFF : 0;
1514   op1.xmm32u(3) = compare32[ib](op1.xmm32u(3), op2.xmm32u(3), status) ? 0xFFFFFFFF : 0;
1515 
1516   check_exceptionsSSE(get_exception_flags(status));
1517   BX_WRITE_XMM_REG(i->dst(), op1);
1518 #endif
1519 
1520   BX_NEXT_INSTR(i);
1521 }
1522 
1523 /*
1524  * Opcode: 66 0F C2
1525  * Compare packed double precision FP values using Ib as comparison predicate.
1526  * Possible floating point exceptions: #I, #D
1527  */
CMPPD_VpdWpdIbR(bxInstruction_c * i)1528 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPPD_VpdWpdIbR(bxInstruction_c *i)
1529 {
1530 #if BX_CPU_LEVEL >= 6
1531   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1532 
1533   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1534   int ib = i->Ib() & 7;
1535 
1536   op1.xmm64u(0) = compare64[ib](op1.xmm64u(0), op2.xmm64u(0), status) ?
1537      BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
1538   op1.xmm64u(1) = compare64[ib](op1.xmm64u(1), op2.xmm64u(1), status) ?
1539      BX_CONST64(0xFFFFFFFFFFFFFFFF) : 0;
1540 
1541   check_exceptionsSSE(get_exception_flags(status));
1542   BX_WRITE_XMM_REG(i->dst(), op1);
1543 #endif
1544 
1545   BX_NEXT_INSTR(i);
1546 }
1547 
1548 /*
1549  * Opcode: F2 0F C2
1550  * Compare double precision FP values using Ib as comparison predicate.
1551  * Possible floating point exceptions: #I, #D
1552  */
CMPSD_VsdWsdIbR(bxInstruction_c * i)1553 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSD_VsdWsdIbR(bxInstruction_c *i)
1554 {
1555 #if BX_CPU_LEVEL >= 6
1556   float64 op1 = BX_READ_XMM_REG_LO_QWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_QWORD(i->src());
1557 
1558   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1559   int ib = i->Ib() & 7;
1560 
1561   if(compare64[ib](op1, op2, status)) {
1562     op1 = BX_CONST64(0xFFFFFFFFFFFFFFFF);
1563   } else {
1564     op1 = 0;
1565   }
1566 
1567   check_exceptionsSSE(get_exception_flags(status));
1568   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op1);
1569 #endif
1570 
1571   BX_NEXT_INSTR(i);
1572 }
1573 
1574 /*
1575  * Opcode: F3 0F C2
1576  * Compare single precision FP values using Ib as comparison predicate.
1577  * Possible floating point exceptions: #I, #D
1578  */
CMPSS_VssWssIbR(bxInstruction_c * i)1579 void BX_CPP_AttrRegparmN(1) BX_CPU_C::CMPSS_VssWssIbR(bxInstruction_c *i)
1580 {
1581 #if BX_CPU_LEVEL >= 6
1582   float32 op1 = BX_READ_XMM_REG_LO_DWORD(i->dst()), op2 = BX_READ_XMM_REG_LO_DWORD(i->src());
1583 
1584   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1585   int ib = i->Ib() & 7;
1586 
1587   op1 = compare32[ib](op1, op2, status) ? 0xFFFFFFFF : 0;
1588 
1589   check_exceptionsSSE(get_exception_flags(status));
1590   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op1);
1591 #endif
1592 
1593   BX_NEXT_INSTR(i);
1594 }
1595 
1596 /*
1597  * Opcode: 66 0F D0
1598  * Add/Subtract packed double precision FP numbers from XMM2/MEM to XMM1.
1599  * Possible floating point exceptions: #I, #D, #O, #U, #P
1600  */
ADDSUBPD_VpdWpdR(bxInstruction_c * i)1601 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSUBPD_VpdWpdR(bxInstruction_c *i)
1602 {
1603 #if BX_CPU_LEVEL >= 6
1604   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1605 
1606   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1607   xmm_addsubpd(&op1, &op2, status);
1608   check_exceptionsSSE(get_exception_flags(status));
1609 
1610   BX_WRITE_XMM_REG(i->dst(), op1);
1611 #endif
1612 
1613   BX_NEXT_INSTR(i);
1614 }
1615 
1616 /*
1617  * Opcode: F2 0F D0
1618  * Add/Substract packed single precision FP numbers from XMM2/MEM to XMM1.
1619  * Possible floating point exceptions: #I, #D, #O, #U, #P
1620  */
ADDSUBPS_VpsWpsR(bxInstruction_c * i)1621 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ADDSUBPS_VpsWpsR(bxInstruction_c *i)
1622 {
1623 #if BX_CPU_LEVEL >= 6
1624   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst()), op2 = BX_READ_XMM_REG(i->src());
1625 
1626   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1627   xmm_addsubps(&op1, &op2, status);
1628   check_exceptionsSSE(get_exception_flags(status));
1629 
1630   BX_WRITE_XMM_REG(i->dst(), op1);
1631 #endif
1632 
1633   BX_NEXT_INSTR(i);
1634 }
1635 
1636 #if BX_CPU_LEVEL >= 6
1637 
1638 /* 66 0F 3A 08 */
ROUNDPS_VpsWpsIbR(bxInstruction_c * i)1639 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDPS_VpsWpsIbR(bxInstruction_c *i)
1640 {
1641   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1642 
1643   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1644   Bit8u control = i->Ib();
1645 
1646   // override MXCSR rounding mode with control coming from imm8
1647   if ((control & 0x4) == 0)
1648     status.float_rounding_mode = control & 0x3;
1649   // ignore precision exception result
1650   if (control & 0x8)
1651     status.float_suppress_exception |= float_flag_inexact;
1652 
1653   op.xmm32u(0) = float32_round_to_int(op.xmm32u(0), status);
1654   op.xmm32u(1) = float32_round_to_int(op.xmm32u(1), status);
1655   op.xmm32u(2) = float32_round_to_int(op.xmm32u(2), status);
1656   op.xmm32u(3) = float32_round_to_int(op.xmm32u(3), status);
1657 
1658   check_exceptionsSSE(get_exception_flags(status));
1659   BX_WRITE_XMM_REG(i->dst(), op);
1660 
1661   BX_NEXT_INSTR(i);
1662 }
1663 
1664 /* 66 0F 3A 09 */
ROUNDPD_VpdWpdIbR(bxInstruction_c * i)1665 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDPD_VpdWpdIbR(bxInstruction_c *i)
1666 {
1667   BxPackedXmmRegister op = BX_READ_XMM_REG(i->src());
1668 
1669   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1670   Bit8u control = i->Ib();
1671 
1672   // override MXCSR rounding mode with control coming from imm8
1673   if ((control & 0x4) == 0)
1674     status.float_rounding_mode = control & 0x3;
1675   // ignore precision exception result
1676   if (control & 0x8)
1677     status.float_suppress_exception |= float_flag_inexact;
1678 
1679   op.xmm64u(0) = float64_round_to_int(op.xmm64u(0), status);
1680   op.xmm64u(1) = float64_round_to_int(op.xmm64u(1), status);
1681 
1682   check_exceptionsSSE(get_exception_flags(status));
1683   BX_WRITE_XMM_REG(i->dst(), op);
1684 
1685   BX_NEXT_INSTR(i);
1686 }
1687 
1688 /* 66 0F 3A 0A */
ROUNDSS_VssWssIbR(bxInstruction_c * i)1689 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSS_VssWssIbR(bxInstruction_c *i)
1690 {
1691   float32 op = BX_READ_XMM_REG_LO_DWORD(i->src());
1692 
1693   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1694   Bit8u control = i->Ib();
1695 
1696   // override MXCSR rounding mode with control coming from imm8
1697   if ((control & 0x4) == 0)
1698     status.float_rounding_mode = control & 0x3;
1699   // ignore precision exception result
1700   if (control & 0x8)
1701     status.float_suppress_exception |= float_flag_inexact;
1702 
1703   op = float32_round_to_int(op, status);
1704 
1705   check_exceptionsSSE(get_exception_flags(status));
1706   BX_WRITE_XMM_REG_LO_DWORD(i->dst(), op);
1707 
1708   BX_NEXT_INSTR(i);
1709 }
1710 
1711 /* 66 0F 3A 0B */
ROUNDSD_VsdWsdIbR(bxInstruction_c * i)1712 void BX_CPP_AttrRegparmN(1) BX_CPU_C::ROUNDSD_VsdWsdIbR(bxInstruction_c *i)
1713 {
1714   float64 op = BX_READ_XMM_REG_LO_QWORD(i->src());
1715 
1716   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1717   Bit8u control = i->Ib();
1718 
1719   // override MXCSR rounding mode with control coming from imm8
1720   if ((control & 0x4) == 0)
1721     status.float_rounding_mode = control & 0x3;
1722   // ignore precision exception result
1723   if (control & 0x8)
1724     status.float_suppress_exception |= float_flag_inexact;
1725 
1726   op = float64_round_to_int(op, status);
1727 
1728   check_exceptionsSSE(get_exception_flags(status));
1729   BX_WRITE_XMM_REG_LO_QWORD(i->dst(), op);
1730 
1731   BX_NEXT_INSTR(i);
1732 }
1733 
1734 /* Opcode: 66 0F 3A 40
1735  * Selectively multiply packed SP floating-point values from xmm1 with
1736  * packed SP floating-point values from xmm2, add and selectively
1737  * store the packed SP floating-point values or zero values to xmm1
1738  */
DPPS_VpsWpsIbR(bxInstruction_c * i)1739 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIbR(bxInstruction_c *i)
1740 {
1741   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->dst());
1742   BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src());
1743   Bit8u mask = i->Ib();
1744 
1745   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1746 
1747   // op1: [A, B, C, D]
1748   // op2: [E, F, G, H]
1749 
1750   // after multiplication: op1 = [EA, BF, CG, DH]
1751   xmm_mulps_mask(&op1, &op2, status, mask >> 4);
1752   check_exceptionsSSE(get_exception_flags(status));
1753 
1754   // shuffle op2 = [BF, AE, DH, CG]
1755   xmm_shufps(&op2, &op1, &op1, 0xb1);
1756 
1757   // op2 = [(BF+AE), (AE+BF), (DH+CG), (CG+DH)]
1758   xmm_addps(&op2, &op1, status);
1759   check_exceptionsSSE(get_exception_flags(status));
1760 
1761   // shuffle op1 = [(DH+CG), (CG+DH), (BF+AE), (AE+BF)]
1762   xmm_shufpd(&op1, &op2, &op2, 0x1);
1763 
1764   // op2 = [(BF+AE)+(DH+CG), (AE+BF)+(CG+DH), (DH+CG)+(BF+AE), (CG+DH)+(AE+BF)]
1765   xmm_addps_mask(&op2, &op1, status, mask);
1766   check_exceptionsSSE(get_exception_flags(status));
1767 
1768   BX_WRITE_XMM_REG(i->dst(), op2);
1769 
1770   BX_NEXT_INSTR(i);
1771 }
1772 
1773 /* Opcode: 66 0F 3A 41
1774  * Selectively multiply packed DP floating-point values from xmm1 with
1775  * packed DP floating-point values from xmm2, add and selectively
1776  * store the packed DP floating-point values or zero values to xmm1
1777  */
DPPD_VpdHpdWpdIbR(bxInstruction_c * i)1778 void BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPD_VpdHpdWpdIbR(bxInstruction_c *i)
1779 {
1780   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
1781   BxPackedXmmRegister op2 = BX_READ_XMM_REG(i->src2());
1782   Bit8u mask = i->Ib();
1783 
1784   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
1785 
1786   // op1: [A, B]
1787   // op2: [C, D]
1788 
1789   // after multiplication: op1 = [AC, BD]
1790   xmm_mulpd_mask(&op1, &op2, status, mask >> 4);
1791   check_exceptionsSSE(get_exception_flags(status));
1792 
1793   // shuffle op2 = [BD, AC]
1794   xmm_shufpd(&op2, &op1, &op1, 0x1);
1795 
1796   // op1 = [AC+BD, BD+AC]
1797   xmm_addpd_mask(&op1, &op2, status, mask);
1798   check_exceptionsSSE(get_exception_flags(status));
1799 
1800   BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
1801 
1802   BX_NEXT_INSTR(i);
1803 }
1804 
1805 #endif // BX_CPU_LEVEL >= 6
1806