1 /////////////////////////////////////////////////////////////////////////
2 // $Id: avx_cvt.cc 13466 2018-02-16 07:57:32Z sshwarts $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (c) 2011-2018 Stanislav Shwartsman
6 // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
21 //
22 /////////////////////////////////////////////////////////////////////////
23
24 #define NEED_CPU_REG_SHORTCUTS 1
25 #include "bochs.h"
26 #include "cpu.h"
27 #define LOG_THIS BX_CPU_THIS_PTR
28
29 #if BX_SUPPORT_AVX
30
31 extern float_status_t mxcsr_to_softfloat_status_word(bx_mxcsr_t mxcsr);
32
33 /* Opcode: VEX.F3.0F 2A (VEX.W=0) */
VCVTSI2SS_VssEdR(bxInstruction_c * i)34 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SS_VssEdR(bxInstruction_c *i)
35 {
36 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
37
38 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
39 softfloat_status_word_rc_override(status, i);
40
41 op1.xmm32u(0) = int32_to_float32(BX_READ_32BIT_REG(i->src2()), status);
42
43 check_exceptionsSSE(get_exception_flags(status));
44
45 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
46 BX_NEXT_INSTR(i);
47 }
48
49 /* Opcode: VEX.F3.0F 2A (VEX.W=1) */
VCVTSI2SS_VssEqR(bxInstruction_c * i)50 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SS_VssEqR(bxInstruction_c *i)
51 {
52 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
53
54 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
55 softfloat_status_word_rc_override(status, i);
56
57 op1.xmm32u(0) = int64_to_float32(BX_READ_64BIT_REG(i->src2()), status);
58
59 check_exceptionsSSE(get_exception_flags(status));
60
61 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
62 BX_NEXT_INSTR(i);
63 }
64
65 /* Opcode: VEX.F2.0F 2A (VEX.W=0) */
VCVTSI2SD_VsdEdR(bxInstruction_c * i)66 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SD_VsdEdR(bxInstruction_c *i)
67 {
68 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
69 op1.xmm64u(0) = int32_to_float64(BX_READ_32BIT_REG(i->src2()));
70 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
71
72 BX_NEXT_INSTR(i);
73 }
74
75 /* Opcode: VEX.F2.0F 2A (VEX.W=1) */
VCVTSI2SD_VsdEqR(bxInstruction_c * i)76 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSI2SD_VsdEqR(bxInstruction_c *i)
77 {
78 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
79
80 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
81 softfloat_status_word_rc_override(status, i);
82
83 op1.xmm64u(0) = int64_to_float64(BX_READ_64BIT_REG(i->src2()), status);
84
85 check_exceptionsSSE(get_exception_flags(status));
86
87 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
88 BX_NEXT_INSTR(i);
89 }
90
91 /* Opcode: VEX.0F 5A (VEX.W ignore, VEX.VVV #UD) */
VCVTPS2PD_VpdWpsR(bxInstruction_c * i)92 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PD_VpdWpsR(bxInstruction_c *i)
93 {
94 BxPackedAvxRegister result;
95 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
96 unsigned len = i->getVL();
97
98 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
99 softfloat_status_word_rc_override(status, i);
100
101 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
102 result.vmm64u(n) = float32_to_float64(op.ymm32u(n), status);
103 }
104
105 check_exceptionsSSE(get_exception_flags(status));
106
107 BX_WRITE_AVX_REGZ(i->dst(), result, len);
108 BX_NEXT_INSTR(i);
109 }
110
111 /* Opcode: VEX.66.0F 5A (VEX.W ignore, VEX.VVV #UD) */
VCVTPD2PS_VpsWpdR(bxInstruction_c * i)112 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2PS_VpsWpdR(bxInstruction_c *i)
113 {
114 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
115 unsigned len = i->getVL();
116
117 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
118 softfloat_status_word_rc_override(status, i);
119
120 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
121 result.vmm32u(n) = float64_to_float32(op.vmm64u(n), status);
122 }
123
124 check_exceptionsSSE(get_exception_flags(status));
125
126 if (len == BX_VL128) {
127 BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
128 }
129 else {
130 BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
131 }
132
133 BX_NEXT_INSTR(i);
134 }
135
136 /* Opcode: VEX.F3.0F 5A (VEX.W ignore) */
VCVTSS2SD_VsdWssR(bxInstruction_c * i)137 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSS2SD_VsdWssR(bxInstruction_c *i)
138 {
139 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
140 float32 op2 = BX_READ_XMM_REG_LO_DWORD(i->src2());
141
142 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
143 softfloat_status_word_rc_override(status, i);
144 op1.xmm64u(0) = float32_to_float64(op2, status);
145 check_exceptionsSSE(get_exception_flags(status));
146
147 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
148
149 BX_NEXT_INSTR(i);
150 }
151
152 /* Opcode: VEX.F3.0F 5A (VEX.W ignore) */
VCVTSD2SS_VssWsdR(bxInstruction_c * i)153 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTSD2SS_VssWsdR(bxInstruction_c *i)
154 {
155 BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->src1());
156 float64 op2 = BX_READ_XMM_REG_LO_QWORD(i->src2());
157
158 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
159 softfloat_status_word_rc_override(status, i);
160 op1.xmm32u(0) = float64_to_float32(op2, status);
161 check_exceptionsSSE(get_exception_flags(status));
162
163 BX_WRITE_XMM_REG_CLEAR_HIGH(i->dst(), op1);
164
165 BX_NEXT_INSTR(i);
166 }
167
168 /* Opcode: VEX.NDS.0F 5B (VEX.W ignore, VEX.VVV #UD) */
VCVTDQ2PS_VpsWdqR(bxInstruction_c * i)169 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PS_VpsWdqR(bxInstruction_c *i)
170 {
171 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
172 unsigned len = i->getVL();
173
174 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
175 softfloat_status_word_rc_override(status, i);
176
177 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
178 op.vmm32u(n) = int32_to_float32(op.vmm32s(n), status);
179 }
180
181 check_exceptionsSSE(get_exception_flags(status));
182
183 BX_WRITE_AVX_REGZ(i->dst(), op, len);
184 BX_NEXT_INSTR(i);
185 }
186
187 /* Opcode: VEX.NDS.66.0F 5B (VEX.W ignore, VEX.VVV #UD) */
VCVTPS2DQ_VdqWpsR(bxInstruction_c * i)188 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2DQ_VdqWpsR(bxInstruction_c *i)
189 {
190 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
191 unsigned len = i->getVL();
192
193 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
194 softfloat_status_word_rc_override(status, i);
195
196 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
197 op.vmm32s(n) = float32_to_int32(op.vmm32u(n), status);
198 }
199
200 check_exceptionsSSE(get_exception_flags(status));
201
202 BX_WRITE_AVX_REGZ(i->dst(), op, len);
203 BX_NEXT_INSTR(i);
204 }
205
206 /* Opcode: VEX.NDS.F3.0F 5B (VEX.W ignore, VEX.VVV #UD) */
VCVTTPS2DQ_VdqWpsR(bxInstruction_c * i)207 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPS2DQ_VdqWpsR(bxInstruction_c *i)
208 {
209 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src());
210 unsigned len = i->getVL();
211
212 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
213 softfloat_status_word_rc_override(status, i);
214
215 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
216 op.vmm32s(n) = float32_to_int32_round_to_zero(op.vmm32u(n), status);
217 }
218
219 check_exceptionsSSE(get_exception_flags(status));
220
221 BX_WRITE_AVX_REGZ(i->dst(), op, len);
222 BX_NEXT_INSTR(i);
223 }
224
225 /* Opcode: VEX.66.0F.E6 (VEX.W ignore, VEX.VVV #UD) */
VCVTTPD2DQ_VdqWpdR(bxInstruction_c * i)226 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTTPD2DQ_VdqWpdR(bxInstruction_c *i)
227 {
228 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
229 unsigned len = i->getVL();
230
231 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
232 softfloat_status_word_rc_override(status, i);
233
234 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
235 result.vmm32s(n) = float64_to_int32_round_to_zero(op.vmm64u(n), status);
236 }
237
238 check_exceptionsSSE(get_exception_flags(status));
239
240 if (len == BX_VL128) {
241 BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
242 }
243 else {
244 BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
245 }
246
247 BX_NEXT_INSTR(i);
248 }
249
250 /* Opcode: VEX.F2.0F.E6 (VEX.W ignore, VEX.VVV #UD) */
VCVTPD2DQ_VdqWpdR(bxInstruction_c * i)251 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPD2DQ_VdqWpdR(bxInstruction_c *i)
252 {
253 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
254 unsigned len = i->getVL();
255
256 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
257 softfloat_status_word_rc_override(status, i);
258
259 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
260 result.vmm32s(n) = float64_to_int32(op.vmm64u(n), status);
261 }
262
263 check_exceptionsSSE(get_exception_flags(status));
264
265 if (len == BX_VL128) {
266 BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
267 }
268 else {
269 BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
270 }
271
272 BX_NEXT_INSTR(i);
273 }
274
275 /* Opcode: VEX.F3.0F.E6 (VEX.W ignore, VEX.VVV #UD) */
VCVTDQ2PD_VpdWdqR(bxInstruction_c * i)276 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTDQ2PD_VpdWdqR(bxInstruction_c *i)
277 {
278 BxPackedAvxRegister result;
279 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
280 unsigned len = i->getVL();
281
282 for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
283 result.vmm64u(n) = int32_to_float64(op.ymm32s(n));
284 }
285
286 BX_WRITE_AVX_REGZ(i->dst(), result, len);
287 BX_NEXT_INSTR(i);
288 }
289
290 // float16 convert
291
292 /* Opcode: VEX.66.0F.3A.13 (VEX.W=0) */
VCVTPH2PS_VpsWpsR(bxInstruction_c * i)293 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPH2PS_VpsWpsR(bxInstruction_c *i)
294 {
295 BxPackedAvxRegister result;
296 BxPackedYmmRegister op = BX_READ_YMM_REG(i->src());
297 unsigned len = i->getVL();
298
299 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
300 status.denormals_are_zeros = 0; // ignore MXCSR.DAZ
301 // no denormal exception is reported on MXCSR
302 status.float_suppress_exception = float_flag_denormal;
303
304 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
305 result.vmm32u(n) = float16_to_float32(op.ymm16u(n), status);
306 }
307
308 check_exceptionsSSE(get_exception_flags(status));
309
310 BX_WRITE_AVX_REGZ(i->dst(), result, len);
311 BX_NEXT_INSTR(i);
312 }
313
314 /* Opcode: VEX.66.0F.3A.1D (VEX.W=0) */
VCVTPS2PH_WpsVpsIb(bxInstruction_c * i)315 void BX_CPP_AttrRegparmN(1) BX_CPU_C::VCVTPS2PH_WpsVpsIb(bxInstruction_c *i)
316 {
317 BxPackedAvxRegister op = BX_READ_AVX_REG(i->src()), result;
318
319 float_status_t status = mxcsr_to_softfloat_status_word(MXCSR);
320 unsigned len = i->getVL();
321
322 Bit8u control = i->Ib();
323
324 status.flush_underflow_to_zero = 0; // ignore MXCSR.FUZ
325 // override MXCSR rounding mode with control coming from imm8
326 if ((control & 0x4) == 0)
327 status.float_rounding_mode = control & 0x3;
328
329 for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
330 result.vmm16u(n) = float32_to_float16(op.vmm32u(n), status);
331 }
332
333 check_exceptionsSSE(get_exception_flags(status));
334
335 if (i->modC0()) {
336 if (len == BX_VL128) {
337 BX_WRITE_XMM_REG_LO_QWORD_CLEAR_HIGH(i->dst(), result.vmm64u(0));
338 }
339 else {
340 BX_WRITE_AVX_REGZ(i->dst(), result, len >> 1); // write half vector
341 }
342 }
343 else {
344 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
345
346 #if BX_SUPPORT_EVEX
347 if (len == BX_VL512)
348 write_virtual_ymmword(i->seg(), eaddr, &result.vmm256(0));
349 else
350 #endif
351 {
352 if (len == BX_VL256)
353 write_virtual_xmmword(i->seg(), eaddr, &result.vmm128(0));
354 else
355 write_virtual_qword(i->seg(), eaddr, result.vmm64u(0));
356 }
357 }
358
359 BX_NEXT_INSTR(i);
360 }
361
362 #endif // BX_SUPPORT_AVX
363