1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx_cvt.cc 13466 2018-02-16 07:57:32Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 //   Copyright (c) 2011-2018 Stanislav Shwartsman
6 //          Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 //  This library is free software; you can redistribute it and/or
9 //  modify it under the terms of the GNU Lesser General Public
10 //  License as published by the Free Software Foundation; either
11 //  version 2 of the License, or (at your option) any later version.
12 //
13 //  This library is distributed in the hope that it will be useful,
14 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 //  Lesser General Public License for more details.
17 //
18 //  You should have received a copy of the GNU Lesser General Public
19 //  License along with this library; if not, write to the Free Software
20 //  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23 
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28 
29 #if BX_SUPPORT_AVX
30 
31 extern float_status_t mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr);
32 
33 /* Opcode: VEX.F3.0F 2A (VEX.W=0) */
VCVTSI2SS_VssEdR(bxInstruction_c * i)34 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SS_VssEdR(bxInstruction_c *i)
35 {
36   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
37 
38   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
39   softfloat_status_word_rc_override(status, i);
40 
41   op1.xmm32u(0) = int32_to_float32(BX_READ_32BIT_REG(i->src2()), status);
42 
43   check_exceptionsSSE(get_exception_flags(status));
44 
45   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
46   BX_NEXT_INSTR(i);
47 }
48 
49 /* Opcode: VEX.F3.0F 2A (VEX.W=1) */
VCVTSI2SS_VssEqR(bxInstruction_c * i)50 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SS_VssEqR(bxInstruction_c *i)
51 {
52   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
53 
54   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
55   softfloat_status_word_rc_override(status, i);
56 
57   op1.xmm32u(0) = int64_to_float32(BX_READ_64BIT_REG(i->src2()), status);
58 
59   check_exceptionsSSE(get_exception_flags(status));
60 
61   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
62   BX_NEXT_INSTR(i);
63 }
64 
65 /* Opcode: VEX.F2.0F 2A (VEX.W=0) */
VCVTSI2SD_VsdEdR(bxInstruction_c * i)66 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SD_VsdEdR(bxInstruction_c *i)
67 {
68   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
69   op1.xmm64u(0) = int32_to_float64(BX_READ_32BIT_REG(i->src2()));
70   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
71 
72   BX_NEXT_INSTR(i);
73 }
74 
75 /* Opcode: VEX.F2.0F 2A (VEX.W=1) */
VCVTSI2SD_VsdEqR(bxInstruction_c * i)76 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SD_VsdEqR(bxInstruction_c *i)
77 {
78   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
79 
80   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
81   softfloat_status_word_rc_override(status, i);
82 
83   op1.xmm64u(0) = int64_to_float64(BX_READ_64BIT_REG(i->src2()), status);
84 
85   check_exceptionsSSE(get_exception_flags(status));
86 
87   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
88   BX_NEXT_INSTR(i);
89 }
90 
91 /* Opcode: VEX.0F 5A (VEX.W ignore, VEX.VVV #UD) */
VCVTPS2PD_VpdWpsR(bxInstruction_c * i)92 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_VpdWpsR(bxInstruction_c *i)
93 {
94   BxPackedAvxRegister result;
95   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
96   unsigned len = i->getVL();
97 
98   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
99   softfloat_status_word_rc_override(status, i);
100 
101   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
102      result.vmm64u(n) = float32_to_float64(op.ymm32u(n), status);
103   }
104 
105   check_exceptionsSSE(get_exception_flags(status));
106 
107   BX_WRITE_AVX_REGZ(i->dst(), result, len);
108   BX_NEXT_INSTR(i);
109 }
110 
111 /* Opcode: VEX.66.0F 5A (VEX.W ignore, VEX.VVV #UD) */
VCVTPD2PS_VpsWpdR(bxInstruction_c * i)112 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_VpsWpdR(bxInstruction_c *i)
113 {
114   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
115   unsigned len = i->getVL();
116 
117   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
118   softfloat_status_word_rc_override(status, i);
119 
120   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
121     result.vmm32u(n) = float64_to_float32(op.vmm64u(n), status);
122   }
123 
124   check_exceptionsSSE(get_exception_flags(status));
125 
126   if (len == BX_VL128) {
127     BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
128   }
129   else {
130     BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
131   }
132 
133   BX_NEXT_INSTR(i);
134 }
135 
136 /* Opcode: VEX.F3.0F 5A (VEX.W ignore) */
VCVTSS2SD_VsdWssR(bxInstruction_c * i)137 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_VsdWssR(bxInstruction_c *i)
138 {
139   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
140   float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
141 
142   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
143   softfloat_status_word_rc_override(status, i);
144   op1.xmm64u(0) = float32_to_float64(op2, status);
145   check_exceptionsSSE(get_exception_flags(status));
146 
147   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
148 
149   BX_NEXT_INSTR(i);
150 }
151 
152 /* Opcode: VEX.F3.0F 5A (VEX.W ignore) */
VCVTSD2SS_VssWsdR(bxInstruction_c * i)153 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_VssWsdR(bxInstruction_c *i)
154 {
155   BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
156   float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
157 
158   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
159   softfloat_status_word_rc_override(status, i);
160   op1.xmm32u(0) = float64_to_float32(op2, status);
161   check_exceptionsSSE(get_exception_flags(status));
162 
163   BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
164 
165   BX_NEXT_INSTR(i);
166 }
167 
168 /* Opcode: VEX.NDS.0F 5B (VEX.W ignore, VEX.VVV #UD) */
VCVTDQ2PS_VpsWdqR(bxInstruction_c * i)169 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PS_VpsWdqR(bxInstruction_c *i)
170 {
171   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
172   unsigned len = i->getVL();
173 
174   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
175   softfloat_status_word_rc_override(status, i);
176 
177   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
178     op.vmm32u(n) = int32_to_float32(op.vmm32s(n), status);
179   }
180 
181   check_exceptionsSSE(get_exception_flags(status));
182 
183   BX_WRITE_AVX_REGZ(i->dst(), op, len);
184   BX_NEXT_INSTR(i);
185 }
186 
187 /* Opcode: VEX.NDS.66.0F 5B (VEX.W ignore, VEX.VVV #UD) */
VCVTPS2DQ_VdqWpsR(bxInstruction_c * i)188 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2DQ_VdqWpsR(bxInstruction_c *i)
189 {
190   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
191   unsigned len = i->getVL();
192 
193   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
194   softfloat_status_word_rc_override(status, i);
195 
196   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
197     op.vmm32s(n) = float32_to_int32(op.vmm32u(n), status);
198   }
199 
200   check_exceptionsSSE(get_exception_flags(status));
201 
202   BX_WRITE_AVX_REGZ(i->dst(), op, len);
203   BX_NEXT_INSTR(i);
204 }
205 
206 /* Opcode: VEX.NDS.F3.0F 5B (VEX.W ignore, VEX.VVV #UD) */
VCVTTPS2DQ_VdqWpsR(bxInstruction_c * i)207 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2DQ_VdqWpsR(bxInstruction_c *i)
208 {
209   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
210   unsigned len = i->getVL();
211 
212   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
213   softfloat_status_word_rc_override(status, i);
214 
215   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
216     op.vmm32s(n) = float32_to_int32_round_to_zero(op.vmm32u(n), status);
217   }
218 
219   check_exceptionsSSE(get_exception_flags(status));
220 
221   BX_WRITE_AVX_REGZ(i->dst(), op, len);
222   BX_NEXT_INSTR(i);
223 }
224 
225 /* Opcode: VEX.66.0F.E6 (VEX.W ignore, VEX.VVV #UD) */
VCVTTPD2DQ_VdqWpdR(bxInstruction_c * i)226 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2DQ_VdqWpdR(bxInstruction_c *i)
227 {
228   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
229   unsigned len = i->getVL();
230 
231   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
232   softfloat_status_word_rc_override(status, i);
233 
234   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
235     result.vmm32s(n) = float64_to_int32_round_to_zero(op.vmm64u(n), status);
236   }
237 
238   check_exceptionsSSE(get_exception_flags(status));
239 
240   if (len == BX_VL128) {
241     BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
242   }
243   else {
244     BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
245   }
246 
247   BX_NEXT_INSTR(i);
248 }
249 
250 /* Opcode: VEX.F2.0F.E6 (VEX.W ignore, VEX.VVV #UD) */
VCVTPD2DQ_VdqWpdR(bxInstruction_c * i)251 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2DQ_VdqWpdR(bxInstruction_c *i)
252 {
253   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
254   unsigned len = i->getVL();
255 
256   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
257   softfloat_status_word_rc_override(status, i);
258 
259   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
260     result.vmm32s(n) = float64_to_int32(op.vmm64u(n), status);
261   }
262 
263   check_exceptionsSSE(get_exception_flags(status));
264 
265   if (len == BX_VL128) {
266     BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
267   }
268   else {
269     BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
270   }
271 
272   BX_NEXT_INSTR(i);
273 }
274 
275 /* Opcode: VEX.F3.0F.E6 (VEX.W ignore, VEX.VVV #UD) */
VCVTDQ2PD_VpdWdqR(bxInstruction_c * i)276 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_VpdWdqR(bxInstruction_c *i)
277 {
278   BxPackedAvxRegister result;
279   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
280   unsigned len = i->getVL();
281 
282   for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
283      result.vmm64u(n) = int32_to_float64(op.ymm32s(n));
284   }
285 
286   BX_WRITE_AVX_REGZ(i->dst(), result, len);
287   BX_NEXT_INSTR(i);
288 }
289 
290 // float16 convert
291 
292 /* Opcode: VEX.66.0F.3A.13 (VEX.W=0) */
VCVTPH2PS_VpsWpsR(bxInstruction_c * i)293 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_VpsWpsR(bxInstruction_c *i)
294 {
295   BxPackedAvxRegister result;
296   BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
297   unsigned len = i->getVL();
298 
299   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
300   status.denormals_are_zeros = 0; // ignore MXCSR.DAZ
301   // no denormal exception is reported on MXCSR
302   status.float_suppress_exception = float_flag_denormal;
303 
304   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
305      result.vmm32u(n) = float16_to_float32(op.ymm16u(n), status);
306   }
307 
308   check_exceptionsSSE(get_exception_flags(status));
309 
310   BX_WRITE_AVX_REGZ(i->dst(), result, len);
311   BX_NEXT_INSTR(i);
312 }
313 
314 /* Opcode: VEX.66.0F.3A.1D (VEX.W=0) */
VCVTPS2PH_WpsVpsIb(bxInstruction_c * i)315 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_WpsVpsIb(bxInstruction_c *i)
316 {
317   BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
318 
319   float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
320   unsigned len = i->getVL();
321 
322   Bit8u control = i->Ib();
323 
324   status.flush_underflow_to_zero = 0; // ignore MXCSR.FUZ
325   // override MXCSR rounding mode with control coming from imm8
326   if ((control & 0x4) == 0)
327     status.float_rounding_mode = control & 0x3;
328 
329   for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
330     result.vmm16u(n) = float32_to_float16(op.vmm32u(n), status);
331   }
332 
333   check_exceptionsSSE(get_exception_flags(status));
334 
335   if (i->modC0()) {
336     if (len == BX_VL128) {
337       BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
338     }
339     else {
340       BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
341     }
342   }
343   else {
344     bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
345 
346 #if BX_SUPPORT_EVEX
347     if (len == BX_VL512)
348       write_virtual_ymmword(i->seg(), eaddr, &result.vmm256(0));
349     else
350 #endif
351     {
352       if (len == BX_VL256)
353         write_virtual_xmmword(i->seg(), eaddr, &result.vmm128(0));
354       else
355         write_virtual_qword(i->seg(), eaddr, result.vmm64u(0));
356     }
357   }
358 
359   BX_NEXT_INSTR(i);
360 }
361 
362 #endif // BX_SUPPORT_AVX
363