1 /*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include <math.h>
22 #include "cpu.h"
23 #include "tcg-cpu.h"
24 #include "exec/exec-all.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/helper-proto.h"
27 #include "fpu/softfloat.h"
28 #include "fpu/softfloat-macros.h"
29 #include "helper-tcg.h"
30 #include "access.h"
31
32 /* float macros */
33 #define FT0 (env->ft0)
34 #define ST0 (env->fpregs[env->fpstt].d)
35 #define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d)
36 #define ST1 ST(1)
37
38 #define FPU_RC_SHIFT 10
39 #define FPU_RC_MASK (3 << FPU_RC_SHIFT)
40 #define FPU_RC_NEAR 0x000
41 #define FPU_RC_DOWN 0x400
42 #define FPU_RC_UP 0x800
43 #define FPU_RC_CHOP 0xc00
44
45 #define MAXTAN 9223372036854775808.0
46
47 /* the following deal with x86 long double-precision numbers */
48 #define MAXEXPD 0x7fff
49 #define EXPBIAS 16383
50 #define EXPD(fp) (fp.l.upper & 0x7fff)
51 #define SIGND(fp) ((fp.l.upper) & 0x8000)
52 #define MANTD(fp) (fp.l.lower)
53 #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
54
55 #define FPUS_IE (1 << 0)
56 #define FPUS_DE (1 << 1)
57 #define FPUS_ZE (1 << 2)
58 #define FPUS_OE (1 << 3)
59 #define FPUS_UE (1 << 4)
60 #define FPUS_PE (1 << 5)
61 #define FPUS_SF (1 << 6)
62 #define FPUS_SE (1 << 7)
63 #define FPUS_B (1 << 15)
64
65 #define FPUC_EM 0x3f
66
67 #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
68 #define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL)
69 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
70 #define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL)
71 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
72 #define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL)
73 #define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL)
74 #define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL)
75
fpush(CPUX86State * env)76 static inline void fpush(CPUX86State *env)
77 {
78 env->fpstt = (env->fpstt - 1) & 7;
79 env->fptags[env->fpstt] = 0; /* validate stack entry */
80 }
81
fpop(CPUX86State * env)82 static inline void fpop(CPUX86State *env)
83 {
84 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
85 env->fpstt = (env->fpstt + 1) & 7;
86 }
87
do_fldt(X86Access * ac,target_ulong ptr)88 static floatx80 do_fldt(X86Access *ac, target_ulong ptr)
89 {
90 CPU_LDoubleU temp;
91
92 temp.l.lower = access_ldq(ac, ptr);
93 temp.l.upper = access_ldw(ac, ptr + 8);
94 return temp.d;
95 }
96
do_fstt(X86Access * ac,target_ulong ptr,floatx80 f)97 static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f)
98 {
99 CPU_LDoubleU temp;
100
101 temp.d = f;
102 access_stq(ac, ptr, temp.l.lower);
103 access_stw(ac, ptr + 8, temp.l.upper);
104 }
105
106 /* x87 FPU helpers */
107
floatx80_to_double(CPUX86State * env,floatx80 a)108 static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
109 {
110 union {
111 float64 f64;
112 double d;
113 } u;
114
115 u.f64 = floatx80_to_float64(a, &env->fp_status);
116 return u.d;
117 }
118
double_to_floatx80(CPUX86State * env,double a)119 static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
120 {
121 union {
122 float64 f64;
123 double d;
124 } u;
125
126 u.d = a;
127 return float64_to_floatx80(u.f64, &env->fp_status);
128 }
129
fpu_set_exception(CPUX86State * env,int mask)130 static void fpu_set_exception(CPUX86State *env, int mask)
131 {
132 env->fpus |= mask;
133 if (env->fpus & (~env->fpuc & FPUC_EM)) {
134 env->fpus |= FPUS_SE | FPUS_B;
135 }
136 }
137
cpu_init_fp_statuses(CPUX86State * env)138 void cpu_init_fp_statuses(CPUX86State *env)
139 {
140 /*
141 * Initialise the non-runtime-varying fields of the various
142 * float_status words to x86 behaviour. This must be called at
143 * CPU reset because the float_status words are in the
144 * "zeroed on reset" portion of the CPU state struct.
145 * Fields in float_status that vary under guest control are set
146 * via the codepath for setting that register, eg cpu_set_fpuc().
147 */
148 /*
149 * Use x87 NaN propagation rules:
150 * SNaN + QNaN => return the QNaN
151 * two SNaNs => return the one with the larger significand, silenced
152 * two QNaNs => return the one with the larger significand
153 * SNaN and a non-NaN => return the SNaN, silenced
154 * QNaN and a non-NaN => return the QNaN
155 *
156 * If we get down to comparing significands and they are the same,
157 * return the NaN with the positive sign bit (if any).
158 */
159 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
160 /*
161 * TODO: These are incorrect: the x86 Software Developer's Manual vol 1
162 * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the
163 * "larger significand" behaviour is only used for x87 FPU operations.
164 * For SSE the required behaviour is to always return the first NaN,
165 * which is float_2nan_prop_ab.
166 *
167 * mmx_status is used only for the AMD 3DNow! instructions, which
168 * are documented in the "3DNow! Technology Manual" as not supporting
169 * NaNs or infinities as inputs. The result of passing two NaNs is
170 * documented as "undefined", so we can do what we choose.
171 * (Strictly there is some behaviour we don't implement correctly
172 * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".)
173 */
174 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status);
175 set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status);
176 }
177
save_exception_flags(CPUX86State * env)178 static inline uint8_t save_exception_flags(CPUX86State *env)
179 {
180 uint8_t old_flags = get_float_exception_flags(&env->fp_status);
181 set_float_exception_flags(0, &env->fp_status);
182 return old_flags;
183 }
184
merge_exception_flags(CPUX86State * env,uint8_t old_flags)185 static void merge_exception_flags(CPUX86State *env, uint8_t old_flags)
186 {
187 uint8_t new_flags = get_float_exception_flags(&env->fp_status);
188 float_raise(old_flags, &env->fp_status);
189 fpu_set_exception(env,
190 ((new_flags & float_flag_invalid ? FPUS_IE : 0) |
191 (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) |
192 (new_flags & float_flag_overflow ? FPUS_OE : 0) |
193 (new_flags & float_flag_underflow ? FPUS_UE : 0) |
194 (new_flags & float_flag_inexact ? FPUS_PE : 0) |
195 (new_flags & float_flag_input_denormal ? FPUS_DE : 0)));
196 }
197
helper_fdiv(CPUX86State * env,floatx80 a,floatx80 b)198 static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
199 {
200 uint8_t old_flags = save_exception_flags(env);
201 floatx80 ret = floatx80_div(a, b, &env->fp_status);
202 merge_exception_flags(env, old_flags);
203 return ret;
204 }
205
fpu_raise_exception(CPUX86State * env,uintptr_t retaddr)206 static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
207 {
208 if (env->cr[0] & CR0_NE_MASK) {
209 raise_exception_ra(env, EXCP10_COPR, retaddr);
210 }
211 #if !defined(CONFIG_USER_ONLY)
212 else {
213 fpu_check_raise_ferr_irq(env);
214 }
215 #endif
216 }
217
helper_flds_FT0(CPUX86State * env,uint32_t val)218 void helper_flds_FT0(CPUX86State *env, uint32_t val)
219 {
220 uint8_t old_flags = save_exception_flags(env);
221 union {
222 float32 f;
223 uint32_t i;
224 } u;
225
226 u.i = val;
227 FT0 = float32_to_floatx80(u.f, &env->fp_status);
228 merge_exception_flags(env, old_flags);
229 }
230
helper_fldl_FT0(CPUX86State * env,uint64_t val)231 void helper_fldl_FT0(CPUX86State *env, uint64_t val)
232 {
233 uint8_t old_flags = save_exception_flags(env);
234 union {
235 float64 f;
236 uint64_t i;
237 } u;
238
239 u.i = val;
240 FT0 = float64_to_floatx80(u.f, &env->fp_status);
241 merge_exception_flags(env, old_flags);
242 }
243
helper_fildl_FT0(CPUX86State * env,int32_t val)244 void helper_fildl_FT0(CPUX86State *env, int32_t val)
245 {
246 FT0 = int32_to_floatx80(val, &env->fp_status);
247 }
248
helper_flds_ST0(CPUX86State * env,uint32_t val)249 void helper_flds_ST0(CPUX86State *env, uint32_t val)
250 {
251 uint8_t old_flags = save_exception_flags(env);
252 int new_fpstt;
253 union {
254 float32 f;
255 uint32_t i;
256 } u;
257
258 new_fpstt = (env->fpstt - 1) & 7;
259 u.i = val;
260 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
261 env->fpstt = new_fpstt;
262 env->fptags[new_fpstt] = 0; /* validate stack entry */
263 merge_exception_flags(env, old_flags);
264 }
265
helper_fldl_ST0(CPUX86State * env,uint64_t val)266 void helper_fldl_ST0(CPUX86State *env, uint64_t val)
267 {
268 uint8_t old_flags = save_exception_flags(env);
269 int new_fpstt;
270 union {
271 float64 f;
272 uint64_t i;
273 } u;
274
275 new_fpstt = (env->fpstt - 1) & 7;
276 u.i = val;
277 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
278 env->fpstt = new_fpstt;
279 env->fptags[new_fpstt] = 0; /* validate stack entry */
280 merge_exception_flags(env, old_flags);
281 }
282
tmp_maximise_precision(float_status * st)283 static FloatX80RoundPrec tmp_maximise_precision(float_status *st)
284 {
285 FloatX80RoundPrec old = get_floatx80_rounding_precision(st);
286 set_floatx80_rounding_precision(floatx80_precision_x, st);
287 return old;
288 }
289
helper_fildl_ST0(CPUX86State * env,int32_t val)290 void helper_fildl_ST0(CPUX86State *env, int32_t val)
291 {
292 int new_fpstt;
293 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
294
295 new_fpstt = (env->fpstt - 1) & 7;
296 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
297 env->fpstt = new_fpstt;
298 env->fptags[new_fpstt] = 0; /* validate stack entry */
299
300 set_floatx80_rounding_precision(old, &env->fp_status);
301 }
302
helper_fildll_ST0(CPUX86State * env,int64_t val)303 void helper_fildll_ST0(CPUX86State *env, int64_t val)
304 {
305 int new_fpstt;
306 FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status);
307
308 new_fpstt = (env->fpstt - 1) & 7;
309 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
310 env->fpstt = new_fpstt;
311 env->fptags[new_fpstt] = 0; /* validate stack entry */
312
313 set_floatx80_rounding_precision(old, &env->fp_status);
314 }
315
helper_fsts_ST0(CPUX86State * env)316 uint32_t helper_fsts_ST0(CPUX86State *env)
317 {
318 uint8_t old_flags = save_exception_flags(env);
319 union {
320 float32 f;
321 uint32_t i;
322 } u;
323
324 u.f = floatx80_to_float32(ST0, &env->fp_status);
325 merge_exception_flags(env, old_flags);
326 return u.i;
327 }
328
helper_fstl_ST0(CPUX86State * env)329 uint64_t helper_fstl_ST0(CPUX86State *env)
330 {
331 uint8_t old_flags = save_exception_flags(env);
332 union {
333 float64 f;
334 uint64_t i;
335 } u;
336
337 u.f = floatx80_to_float64(ST0, &env->fp_status);
338 merge_exception_flags(env, old_flags);
339 return u.i;
340 }
341
helper_fist_ST0(CPUX86State * env)342 int32_t helper_fist_ST0(CPUX86State *env)
343 {
344 uint8_t old_flags = save_exception_flags(env);
345 int32_t val;
346
347 val = floatx80_to_int32(ST0, &env->fp_status);
348 if (val != (int16_t)val) {
349 set_float_exception_flags(float_flag_invalid, &env->fp_status);
350 val = -32768;
351 }
352 merge_exception_flags(env, old_flags);
353 return val;
354 }
355
helper_fistl_ST0(CPUX86State * env)356 int32_t helper_fistl_ST0(CPUX86State *env)
357 {
358 uint8_t old_flags = save_exception_flags(env);
359 int32_t val;
360
361 val = floatx80_to_int32(ST0, &env->fp_status);
362 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
363 val = 0x80000000;
364 }
365 merge_exception_flags(env, old_flags);
366 return val;
367 }
368
helper_fistll_ST0(CPUX86State * env)369 int64_t helper_fistll_ST0(CPUX86State *env)
370 {
371 uint8_t old_flags = save_exception_flags(env);
372 int64_t val;
373
374 val = floatx80_to_int64(ST0, &env->fp_status);
375 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
376 val = 0x8000000000000000ULL;
377 }
378 merge_exception_flags(env, old_flags);
379 return val;
380 }
381
helper_fistt_ST0(CPUX86State * env)382 int32_t helper_fistt_ST0(CPUX86State *env)
383 {
384 uint8_t old_flags = save_exception_flags(env);
385 int32_t val;
386
387 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
388 if (val != (int16_t)val) {
389 set_float_exception_flags(float_flag_invalid, &env->fp_status);
390 val = -32768;
391 }
392 merge_exception_flags(env, old_flags);
393 return val;
394 }
395
helper_fisttl_ST0(CPUX86State * env)396 int32_t helper_fisttl_ST0(CPUX86State *env)
397 {
398 uint8_t old_flags = save_exception_flags(env);
399 int32_t val;
400
401 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
402 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
403 val = 0x80000000;
404 }
405 merge_exception_flags(env, old_flags);
406 return val;
407 }
408
helper_fisttll_ST0(CPUX86State * env)409 int64_t helper_fisttll_ST0(CPUX86State *env)
410 {
411 uint8_t old_flags = save_exception_flags(env);
412 int64_t val;
413
414 val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
415 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
416 val = 0x8000000000000000ULL;
417 }
418 merge_exception_flags(env, old_flags);
419 return val;
420 }
421
helper_fldt_ST0(CPUX86State * env,target_ulong ptr)422 void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
423 {
424 int new_fpstt;
425 X86Access ac;
426
427 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
428
429 new_fpstt = (env->fpstt - 1) & 7;
430 env->fpregs[new_fpstt].d = do_fldt(&ac, ptr);
431 env->fpstt = new_fpstt;
432 env->fptags[new_fpstt] = 0; /* validate stack entry */
433 }
434
helper_fstt_ST0(CPUX86State * env,target_ulong ptr)435 void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
436 {
437 X86Access ac;
438
439 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
440 do_fstt(&ac, ptr, ST0);
441 }
442
helper_fpush(CPUX86State * env)443 void helper_fpush(CPUX86State *env)
444 {
445 fpush(env);
446 }
447
helper_fpop(CPUX86State * env)448 void helper_fpop(CPUX86State *env)
449 {
450 fpop(env);
451 }
452
helper_fdecstp(CPUX86State * env)453 void helper_fdecstp(CPUX86State *env)
454 {
455 env->fpstt = (env->fpstt - 1) & 7;
456 env->fpus &= ~0x4700;
457 }
458
helper_fincstp(CPUX86State * env)459 void helper_fincstp(CPUX86State *env)
460 {
461 env->fpstt = (env->fpstt + 1) & 7;
462 env->fpus &= ~0x4700;
463 }
464
465 /* FPU move */
466
helper_ffree_STN(CPUX86State * env,int st_index)467 void helper_ffree_STN(CPUX86State *env, int st_index)
468 {
469 env->fptags[(env->fpstt + st_index) & 7] = 1;
470 }
471
helper_fmov_ST0_FT0(CPUX86State * env)472 void helper_fmov_ST0_FT0(CPUX86State *env)
473 {
474 ST0 = FT0;
475 }
476
helper_fmov_FT0_STN(CPUX86State * env,int st_index)477 void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
478 {
479 FT0 = ST(st_index);
480 }
481
helper_fmov_ST0_STN(CPUX86State * env,int st_index)482 void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
483 {
484 ST0 = ST(st_index);
485 }
486
helper_fmov_STN_ST0(CPUX86State * env,int st_index)487 void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
488 {
489 ST(st_index) = ST0;
490 }
491
helper_fxchg_ST0_STN(CPUX86State * env,int st_index)492 void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
493 {
494 floatx80 tmp;
495
496 tmp = ST(st_index);
497 ST(st_index) = ST0;
498 ST0 = tmp;
499 }
500
501 /* FPU operations */
502
503 static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
504
helper_fcom_ST0_FT0(CPUX86State * env)505 void helper_fcom_ST0_FT0(CPUX86State *env)
506 {
507 uint8_t old_flags = save_exception_flags(env);
508 FloatRelation ret;
509
510 ret = floatx80_compare(ST0, FT0, &env->fp_status);
511 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
512 merge_exception_flags(env, old_flags);
513 }
514
helper_fucom_ST0_FT0(CPUX86State * env)515 void helper_fucom_ST0_FT0(CPUX86State *env)
516 {
517 uint8_t old_flags = save_exception_flags(env);
518 FloatRelation ret;
519
520 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
521 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
522 merge_exception_flags(env, old_flags);
523 }
524
525 static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
526
helper_fcomi_ST0_FT0(CPUX86State * env)527 void helper_fcomi_ST0_FT0(CPUX86State *env)
528 {
529 uint8_t old_flags = save_exception_flags(env);
530 int eflags;
531 FloatRelation ret;
532
533 ret = floatx80_compare(ST0, FT0, &env->fp_status);
534 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
535 CC_SRC = eflags | fcomi_ccval[ret + 1];
536 CC_OP = CC_OP_EFLAGS;
537 merge_exception_flags(env, old_flags);
538 }
539
helper_fucomi_ST0_FT0(CPUX86State * env)540 void helper_fucomi_ST0_FT0(CPUX86State *env)
541 {
542 uint8_t old_flags = save_exception_flags(env);
543 int eflags;
544 FloatRelation ret;
545
546 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
547 eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C);
548 CC_SRC = eflags | fcomi_ccval[ret + 1];
549 CC_OP = CC_OP_EFLAGS;
550 merge_exception_flags(env, old_flags);
551 }
552
helper_fadd_ST0_FT0(CPUX86State * env)553 void helper_fadd_ST0_FT0(CPUX86State *env)
554 {
555 uint8_t old_flags = save_exception_flags(env);
556 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
557 merge_exception_flags(env, old_flags);
558 }
559
helper_fmul_ST0_FT0(CPUX86State * env)560 void helper_fmul_ST0_FT0(CPUX86State *env)
561 {
562 uint8_t old_flags = save_exception_flags(env);
563 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
564 merge_exception_flags(env, old_flags);
565 }
566
helper_fsub_ST0_FT0(CPUX86State * env)567 void helper_fsub_ST0_FT0(CPUX86State *env)
568 {
569 uint8_t old_flags = save_exception_flags(env);
570 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
571 merge_exception_flags(env, old_flags);
572 }
573
helper_fsubr_ST0_FT0(CPUX86State * env)574 void helper_fsubr_ST0_FT0(CPUX86State *env)
575 {
576 uint8_t old_flags = save_exception_flags(env);
577 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
578 merge_exception_flags(env, old_flags);
579 }
580
helper_fdiv_ST0_FT0(CPUX86State * env)581 void helper_fdiv_ST0_FT0(CPUX86State *env)
582 {
583 ST0 = helper_fdiv(env, ST0, FT0);
584 }
585
helper_fdivr_ST0_FT0(CPUX86State * env)586 void helper_fdivr_ST0_FT0(CPUX86State *env)
587 {
588 ST0 = helper_fdiv(env, FT0, ST0);
589 }
590
591 /* fp operations between STN and ST0 */
592
helper_fadd_STN_ST0(CPUX86State * env,int st_index)593 void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
594 {
595 uint8_t old_flags = save_exception_flags(env);
596 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
597 merge_exception_flags(env, old_flags);
598 }
599
helper_fmul_STN_ST0(CPUX86State * env,int st_index)600 void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
601 {
602 uint8_t old_flags = save_exception_flags(env);
603 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
604 merge_exception_flags(env, old_flags);
605 }
606
helper_fsub_STN_ST0(CPUX86State * env,int st_index)607 void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
608 {
609 uint8_t old_flags = save_exception_flags(env);
610 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
611 merge_exception_flags(env, old_flags);
612 }
613
helper_fsubr_STN_ST0(CPUX86State * env,int st_index)614 void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
615 {
616 uint8_t old_flags = save_exception_flags(env);
617 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
618 merge_exception_flags(env, old_flags);
619 }
620
helper_fdiv_STN_ST0(CPUX86State * env,int st_index)621 void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
622 {
623 floatx80 *p;
624
625 p = &ST(st_index);
626 *p = helper_fdiv(env, *p, ST0);
627 }
628
helper_fdivr_STN_ST0(CPUX86State * env,int st_index)629 void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
630 {
631 floatx80 *p;
632
633 p = &ST(st_index);
634 *p = helper_fdiv(env, ST0, *p);
635 }
636
637 /* misc FPU operations */
helper_fchs_ST0(CPUX86State * env)638 void helper_fchs_ST0(CPUX86State *env)
639 {
640 ST0 = floatx80_chs(ST0);
641 }
642
helper_fabs_ST0(CPUX86State * env)643 void helper_fabs_ST0(CPUX86State *env)
644 {
645 ST0 = floatx80_abs(ST0);
646 }
647
helper_fld1_ST0(CPUX86State * env)648 void helper_fld1_ST0(CPUX86State *env)
649 {
650 ST0 = floatx80_one;
651 }
652
helper_fldl2t_ST0(CPUX86State * env)653 void helper_fldl2t_ST0(CPUX86State *env)
654 {
655 switch (env->fpuc & FPU_RC_MASK) {
656 case FPU_RC_UP:
657 ST0 = floatx80_l2t_u;
658 break;
659 default:
660 ST0 = floatx80_l2t;
661 break;
662 }
663 }
664
helper_fldl2e_ST0(CPUX86State * env)665 void helper_fldl2e_ST0(CPUX86State *env)
666 {
667 switch (env->fpuc & FPU_RC_MASK) {
668 case FPU_RC_DOWN:
669 case FPU_RC_CHOP:
670 ST0 = floatx80_l2e_d;
671 break;
672 default:
673 ST0 = floatx80_l2e;
674 break;
675 }
676 }
677
helper_fldpi_ST0(CPUX86State * env)678 void helper_fldpi_ST0(CPUX86State *env)
679 {
680 switch (env->fpuc & FPU_RC_MASK) {
681 case FPU_RC_DOWN:
682 case FPU_RC_CHOP:
683 ST0 = floatx80_pi_d;
684 break;
685 default:
686 ST0 = floatx80_pi;
687 break;
688 }
689 }
690
helper_fldlg2_ST0(CPUX86State * env)691 void helper_fldlg2_ST0(CPUX86State *env)
692 {
693 switch (env->fpuc & FPU_RC_MASK) {
694 case FPU_RC_DOWN:
695 case FPU_RC_CHOP:
696 ST0 = floatx80_lg2_d;
697 break;
698 default:
699 ST0 = floatx80_lg2;
700 break;
701 }
702 }
703
helper_fldln2_ST0(CPUX86State * env)704 void helper_fldln2_ST0(CPUX86State *env)
705 {
706 switch (env->fpuc & FPU_RC_MASK) {
707 case FPU_RC_DOWN:
708 case FPU_RC_CHOP:
709 ST0 = floatx80_ln2_d;
710 break;
711 default:
712 ST0 = floatx80_ln2;
713 break;
714 }
715 }
716
helper_fldz_ST0(CPUX86State * env)717 void helper_fldz_ST0(CPUX86State *env)
718 {
719 ST0 = floatx80_zero;
720 }
721
helper_fldz_FT0(CPUX86State * env)722 void helper_fldz_FT0(CPUX86State *env)
723 {
724 FT0 = floatx80_zero;
725 }
726
helper_fnstsw(CPUX86State * env)727 uint32_t helper_fnstsw(CPUX86State *env)
728 {
729 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
730 }
731
helper_fnstcw(CPUX86State * env)732 uint32_t helper_fnstcw(CPUX86State *env)
733 {
734 return env->fpuc;
735 }
736
set_x86_rounding_mode(unsigned mode,float_status * status)737 static void set_x86_rounding_mode(unsigned mode, float_status *status)
738 {
739 static FloatRoundMode x86_round_mode[4] = {
740 float_round_nearest_even,
741 float_round_down,
742 float_round_up,
743 float_round_to_zero
744 };
745 assert(mode < ARRAY_SIZE(x86_round_mode));
746 set_float_rounding_mode(x86_round_mode[mode], status);
747 }
748
update_fp_status(CPUX86State * env)749 void update_fp_status(CPUX86State *env)
750 {
751 int rnd_mode;
752 FloatX80RoundPrec rnd_prec;
753
754 /* set rounding mode */
755 rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT;
756 set_x86_rounding_mode(rnd_mode, &env->fp_status);
757
758 switch ((env->fpuc >> 8) & 3) {
759 case 0:
760 rnd_prec = floatx80_precision_s;
761 break;
762 case 2:
763 rnd_prec = floatx80_precision_d;
764 break;
765 case 3:
766 default:
767 rnd_prec = floatx80_precision_x;
768 break;
769 }
770 set_floatx80_rounding_precision(rnd_prec, &env->fp_status);
771 }
772
helper_fldcw(CPUX86State * env,uint32_t val)773 void helper_fldcw(CPUX86State *env, uint32_t val)
774 {
775 cpu_set_fpuc(env, val);
776 }
777
helper_fclex(CPUX86State * env)778 void helper_fclex(CPUX86State *env)
779 {
780 env->fpus &= 0x7f00;
781 }
782
helper_fwait(CPUX86State * env)783 void helper_fwait(CPUX86State *env)
784 {
785 if (env->fpus & FPUS_SE) {
786 fpu_raise_exception(env, GETPC());
787 }
788 }
789
do_fninit(CPUX86State * env)790 static void do_fninit(CPUX86State *env)
791 {
792 env->fpus = 0;
793 env->fpstt = 0;
794 env->fpcs = 0;
795 env->fpds = 0;
796 env->fpip = 0;
797 env->fpdp = 0;
798 cpu_set_fpuc(env, 0x37f);
799 env->fptags[0] = 1;
800 env->fptags[1] = 1;
801 env->fptags[2] = 1;
802 env->fptags[3] = 1;
803 env->fptags[4] = 1;
804 env->fptags[5] = 1;
805 env->fptags[6] = 1;
806 env->fptags[7] = 1;
807 }
808
helper_fninit(CPUX86State * env)809 void helper_fninit(CPUX86State *env)
810 {
811 do_fninit(env);
812 }
813
814 /* BCD ops */
815
helper_fbld_ST0(CPUX86State * env,target_ulong ptr)816 void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
817 {
818 X86Access ac;
819 floatx80 tmp;
820 uint64_t val;
821 unsigned int v;
822 int i;
823
824 access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
825
826 val = 0;
827 for (i = 8; i >= 0; i--) {
828 v = access_ldb(&ac, ptr + i);
829 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
830 }
831 tmp = int64_to_floatx80(val, &env->fp_status);
832 if (access_ldb(&ac, ptr + 9) & 0x80) {
833 tmp = floatx80_chs(tmp);
834 }
835 fpush(env);
836 ST0 = tmp;
837 }
838
helper_fbst_ST0(CPUX86State * env,target_ulong ptr)839 void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
840 {
841 uint8_t old_flags = save_exception_flags(env);
842 int v;
843 target_ulong mem_ref, mem_end;
844 int64_t val;
845 CPU_LDoubleU temp;
846 X86Access ac;
847
848 access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
849 temp.d = ST0;
850
851 val = floatx80_to_int64(ST0, &env->fp_status);
852 mem_ref = ptr;
853 if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
854 set_float_exception_flags(float_flag_invalid, &env->fp_status);
855 while (mem_ref < ptr + 7) {
856 access_stb(&ac, mem_ref++, 0);
857 }
858 access_stb(&ac, mem_ref++, 0xc0);
859 access_stb(&ac, mem_ref++, 0xff);
860 access_stb(&ac, mem_ref++, 0xff);
861 merge_exception_flags(env, old_flags);
862 return;
863 }
864 mem_end = mem_ref + 9;
865 if (SIGND(temp)) {
866 access_stb(&ac, mem_end, 0x80);
867 val = -val;
868 } else {
869 access_stb(&ac, mem_end, 0x00);
870 }
871 while (mem_ref < mem_end) {
872 if (val == 0) {
873 break;
874 }
875 v = val % 100;
876 val = val / 100;
877 v = ((v / 10) << 4) | (v % 10);
878 access_stb(&ac, mem_ref++, v);
879 }
880 while (mem_ref < mem_end) {
881 access_stb(&ac, mem_ref++, 0);
882 }
883 merge_exception_flags(env, old_flags);
884 }
885
886 /* 128-bit significand of log(2). */
887 #define ln2_sig_high 0xb17217f7d1cf79abULL
888 #define ln2_sig_low 0xc9e3b39803f2f6afULL
889
890 /*
891 * Polynomial coefficients for an approximation to (2^x - 1) / x, on
892 * the interval [-1/64, 1/64].
893 */
894 #define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL)
895 #define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL)
896 #define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL)
897 #define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL)
898 #define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL)
899 #define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL)
900 #define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL)
901 #define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL)
902 #define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL)
903
904 struct f2xm1_data {
905 /*
906 * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1
907 * are very close to exact floatx80 values.
908 */
909 floatx80 t;
910 /* The value of 2^t. */
911 floatx80 exp2;
912 /* The value of 2^t - 1. */
913 floatx80 exp2m1;
914 };
915
916 static const struct f2xm1_data f2xm1_table[65] = {
917 { make_floatx80_init(0xbfff, 0x8000000000000000ULL),
918 make_floatx80_init(0x3ffe, 0x8000000000000000ULL),
919 make_floatx80_init(0xbffe, 0x8000000000000000ULL) },
920 { make_floatx80_init(0xbffe, 0xf800000000002e7eULL),
921 make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL),
922 make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) },
923 { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL),
924 make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL),
925 make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) },
926 { make_floatx80_init(0xbffe, 0xe800000000006f10ULL),
927 make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL),
928 make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) },
929 { make_floatx80_init(0xbffe, 0xe000000000008a45ULL),
930 make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL),
931 make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) },
932 { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL),
933 make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL),
934 make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) },
935 { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL),
936 make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL),
937 make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) },
938 { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL),
939 make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL),
940 make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) },
941 { make_floatx80_init(0xbffe, 0xc000000000006530ULL),
942 make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL),
943 make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) },
944 { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL),
945 make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL),
946 make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) },
947 { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL),
948 make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL),
949 make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) },
950 { make_floatx80_init(0xbffe, 0xa800000000006f8aULL),
951 make_floatx80_init(0x3ffe, 0xa27043030c49370aULL),
952 make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) },
953 { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL),
954 make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL),
955 make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) },
956 { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL),
957 make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL),
958 make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) },
959 { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL),
960 make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL),
961 make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) },
962 { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL),
963 make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL),
964 make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) },
965 { make_floatx80_init(0xbffe, 0x800000000000227dULL),
966 make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL),
967 make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) },
968 { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL),
969 make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL),
970 make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) },
971 { make_floatx80_init(0xbffd, 0xe00000000000df81ULL),
972 make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL),
973 make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) },
974 { make_floatx80_init(0xbffd, 0xd00000000000bccfULL),
975 make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL),
976 make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) },
977 { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL),
978 make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL),
979 make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) },
980 { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL),
981 make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL),
982 make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) },
983 { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL),
984 make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL),
985 make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) },
986 { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL),
987 make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL),
988 make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) },
989 { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL),
990 make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL),
991 make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) },
992 { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL),
993 make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL),
994 make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) },
995 { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL),
996 make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL),
997 make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) },
998 { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL),
999 make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL),
1000 make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) },
1001 { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL),
1002 make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL),
1003 make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) },
1004 { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL),
1005 make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL),
1006 make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) },
1007 { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL),
1008 make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL),
1009 make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) },
1010 { make_floatx80_init(0xbff9, 0xffffffffffff11feULL),
1011 make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL),
1012 make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) },
1013 { floatx80_zero_init,
1014 make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1015 floatx80_zero_init },
1016 { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL),
1017 make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL),
1018 make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) },
1019 { make_floatx80_init(0x3ffb, 0x800000000000b500ULL),
1020 make_floatx80_init(0x3fff, 0x85aac367cc488345ULL),
1021 make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) },
1022 { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL),
1023 make_floatx80_init(0x3fff, 0x88980e8092da7cceULL),
1024 make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) },
1025 { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL),
1026 make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL),
1027 make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) },
1028 { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL),
1029 make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL),
1030 make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) },
1031 { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL),
1032 make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL),
1033 make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) },
1034 { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL),
1035 make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL),
1036 make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) },
1037 { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL),
1038 make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL),
1039 make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) },
1040 { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL),
1041 make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL),
1042 make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) },
1043 { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL),
1044 make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL),
1045 make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) },
1046 { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL),
1047 make_floatx80_init(0x3fff, 0xa27043030c49370aULL),
1048 make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) },
1049 { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL),
1050 make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL),
1051 make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) },
1052 { make_floatx80_init(0x3ffd, 0xd0000000000093beULL),
1053 make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL),
1054 make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) },
1055 { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL),
1056 make_floatx80_init(0x3fff, 0xad583eea42a17876ULL),
1057 make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) },
1058 { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL),
1059 make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL),
1060 make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) },
1061 { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL),
1062 make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL),
1063 make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) },
1064 { make_floatx80_init(0x3ffe, 0x8800000000006344ULL),
1065 make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL),
1066 make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) },
1067 { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL),
1068 make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL),
1069 make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) },
1070 { make_floatx80_init(0x3ffe, 0x9800000000009127ULL),
1071 make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL),
1072 make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) },
1073 { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL),
1074 make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL),
1075 make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) },
1076 { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL),
1077 make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL),
1078 make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) },
1079 { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL),
1080 make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL),
1081 make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) },
1082 { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL),
1083 make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL),
1084 make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) },
1085 { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL),
1086 make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL),
1087 make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) },
1088 { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL),
1089 make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL),
1090 make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) },
1091 { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL),
1092 make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL),
1093 make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) },
1094 { make_floatx80_init(0x3ffe, 0xd800000000004165ULL),
1095 make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL),
1096 make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) },
1097 { make_floatx80_init(0x3ffe, 0xe00000000000582cULL),
1098 make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL),
1099 make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) },
1100 { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL),
1101 make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL),
1102 make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) },
1103 { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL),
1104 make_floatx80_init(0x3fff, 0xf5257d152486a2faULL),
1105 make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) },
1106 { make_floatx80_init(0x3ffe, 0xf800000000001069ULL),
1107 make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL),
1108 make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) },
1109 { make_floatx80_init(0x3fff, 0x8000000000000000ULL),
1110 make_floatx80_init(0x4000, 0x8000000000000000ULL),
1111 make_floatx80_init(0x3fff, 0x8000000000000000ULL) },
1112 };
1113
helper_f2xm1(CPUX86State * env)1114 void helper_f2xm1(CPUX86State *env)
1115 {
1116 uint8_t old_flags = save_exception_flags(env);
1117 uint64_t sig = extractFloatx80Frac(ST0);
1118 int32_t exp = extractFloatx80Exp(ST0);
1119 bool sign = extractFloatx80Sign(ST0);
1120
1121 if (floatx80_invalid_encoding(ST0)) {
1122 float_raise(float_flag_invalid, &env->fp_status);
1123 ST0 = floatx80_default_nan(&env->fp_status);
1124 } else if (floatx80_is_any_nan(ST0)) {
1125 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1126 float_raise(float_flag_invalid, &env->fp_status);
1127 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1128 }
1129 } else if (exp > 0x3fff ||
1130 (exp == 0x3fff && sig != (0x8000000000000000ULL))) {
1131 /* Out of range for the instruction, treat as invalid. */
1132 float_raise(float_flag_invalid, &env->fp_status);
1133 ST0 = floatx80_default_nan(&env->fp_status);
1134 } else if (exp == 0x3fff) {
1135 /* Argument 1 or -1, exact result 1 or -0.5. */
1136 if (sign) {
1137 ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL);
1138 }
1139 } else if (exp < 0x3fb0) {
1140 if (!floatx80_is_zero(ST0)) {
1141 /*
1142 * Multiplying the argument by an extra-precision version
1143 * of log(2) is sufficiently precise. Zero arguments are
1144 * returned unchanged.
1145 */
1146 uint64_t sig0, sig1, sig2;
1147 if (exp == 0) {
1148 normalizeFloatx80Subnormal(sig, &exp, &sig);
1149 }
1150 mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1,
1151 &sig2);
1152 /* This result is inexact. */
1153 sig1 |= 1;
1154 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1155 sign, exp, sig0, sig1,
1156 &env->fp_status);
1157 }
1158 } else {
1159 floatx80 tmp, y, accum;
1160 bool asign, bsign;
1161 int32_t n, aexp, bexp;
1162 uint64_t asig0, asig1, asig2, bsig0, bsig1;
1163 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1164 FloatX80RoundPrec save_prec =
1165 env->fp_status.floatx80_rounding_precision;
1166 env->fp_status.float_rounding_mode = float_round_nearest_even;
1167 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1168
1169 /* Find the nearest multiple of 1/32 to the argument. */
1170 tmp = floatx80_scalbn(ST0, 5, &env->fp_status);
1171 n = 32 + floatx80_to_int32(tmp, &env->fp_status);
1172 y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status);
1173
1174 if (floatx80_is_zero(y)) {
1175 /*
1176 * Use the value of 2^t - 1 from the table, to avoid
1177 * needing to special-case zero as a result of
1178 * multiplication below.
1179 */
1180 ST0 = f2xm1_table[n].t;
1181 set_float_exception_flags(float_flag_inexact, &env->fp_status);
1182 env->fp_status.float_rounding_mode = save_mode;
1183 } else {
1184 /*
1185 * Compute the lower parts of a polynomial expansion for
1186 * (2^y - 1) / y.
1187 */
1188 accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status);
1189 accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status);
1190 accum = floatx80_mul(accum, y, &env->fp_status);
1191 accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status);
1192 accum = floatx80_mul(accum, y, &env->fp_status);
1193 accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status);
1194 accum = floatx80_mul(accum, y, &env->fp_status);
1195 accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status);
1196 accum = floatx80_mul(accum, y, &env->fp_status);
1197 accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status);
1198 accum = floatx80_mul(accum, y, &env->fp_status);
1199 accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status);
1200 accum = floatx80_mul(accum, y, &env->fp_status);
1201 accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status);
1202
1203 /*
1204 * The full polynomial expansion is f2xm1_coeff_0 + accum
1205 * (where accum has much lower magnitude, and so, in
1206 * particular, carry out of the addition is not possible).
1207 * (This expansion is only accurate to about 70 bits, not
1208 * 128 bits.)
1209 */
1210 aexp = extractFloatx80Exp(f2xm1_coeff_0);
1211 asign = extractFloatx80Sign(f2xm1_coeff_0);
1212 shift128RightJamming(extractFloatx80Frac(accum), 0,
1213 aexp - extractFloatx80Exp(accum),
1214 &asig0, &asig1);
1215 bsig0 = extractFloatx80Frac(f2xm1_coeff_0);
1216 bsig1 = 0;
1217 if (asign == extractFloatx80Sign(accum)) {
1218 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1219 } else {
1220 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1221 }
1222 /* And thus compute an approximation to 2^y - 1. */
1223 mul128By64To192(asig0, asig1, extractFloatx80Frac(y),
1224 &asig0, &asig1, &asig2);
1225 aexp += extractFloatx80Exp(y) - 0x3ffe;
1226 asign ^= extractFloatx80Sign(y);
1227 if (n != 32) {
1228 /*
1229 * Multiply this by the precomputed value of 2^t and
1230 * add that of 2^t - 1.
1231 */
1232 mul128By64To192(asig0, asig1,
1233 extractFloatx80Frac(f2xm1_table[n].exp2),
1234 &asig0, &asig1, &asig2);
1235 aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe;
1236 bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1);
1237 bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1);
1238 bsig1 = 0;
1239 if (bexp < aexp) {
1240 shift128RightJamming(bsig0, bsig1, aexp - bexp,
1241 &bsig0, &bsig1);
1242 } else if (aexp < bexp) {
1243 shift128RightJamming(asig0, asig1, bexp - aexp,
1244 &asig0, &asig1);
1245 aexp = bexp;
1246 }
1247 /* The sign of 2^t - 1 is always that of the result. */
1248 bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1);
1249 if (asign == bsign) {
1250 /* Avoid possible carry out of the addition. */
1251 shift128RightJamming(asig0, asig1, 1,
1252 &asig0, &asig1);
1253 shift128RightJamming(bsig0, bsig1, 1,
1254 &bsig0, &bsig1);
1255 ++aexp;
1256 add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1);
1257 } else {
1258 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
1259 asign = bsign;
1260 }
1261 }
1262 env->fp_status.float_rounding_mode = save_mode;
1263 /* This result is inexact. */
1264 asig1 |= 1;
1265 ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1266 asign, aexp, asig0, asig1,
1267 &env->fp_status);
1268 }
1269
1270 env->fp_status.floatx80_rounding_precision = save_prec;
1271 }
1272 merge_exception_flags(env, old_flags);
1273 }
1274
helper_fptan(CPUX86State * env)1275 void helper_fptan(CPUX86State *env)
1276 {
1277 double fptemp = floatx80_to_double(env, ST0);
1278
1279 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
1280 env->fpus |= 0x400;
1281 } else {
1282 fptemp = tan(fptemp);
1283 ST0 = double_to_floatx80(env, fptemp);
1284 fpush(env);
1285 ST0 = floatx80_one;
1286 env->fpus &= ~0x400; /* C2 <-- 0 */
1287 /* the above code is for |arg| < 2**52 only */
1288 }
1289 }
1290
1291 /* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */
1292 #define pi_4_exp 0x3ffe
1293 #define pi_4_sig_high 0xc90fdaa22168c234ULL
1294 #define pi_4_sig_low 0xc4c6628b80dc1cd1ULL
1295 #define pi_2_exp 0x3fff
1296 #define pi_2_sig_high 0xc90fdaa22168c234ULL
1297 #define pi_2_sig_low 0xc4c6628b80dc1cd1ULL
1298 #define pi_34_exp 0x4000
1299 #define pi_34_sig_high 0x96cbe3f9990e91a7ULL
1300 #define pi_34_sig_low 0x9394c9e8a0a5159dULL
1301 #define pi_exp 0x4000
1302 #define pi_sig_high 0xc90fdaa22168c234ULL
1303 #define pi_sig_low 0xc4c6628b80dc1cd1ULL
1304
1305 /*
1306 * Polynomial coefficients for an approximation to atan(x), with only
1307 * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike
1308 * for some other approximations, no low part is needed for the first
1309 * coefficient here to achieve a sufficiently accurate result, because
1310 * the coefficient in this minimax approximation is very close to
1311 * exactly 1.)
1312 */
1313 #define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL)
1314 #define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL)
1315 #define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL)
1316 #define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL)
1317 #define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL)
1318 #define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL)
1319 #define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL)
1320
1321 struct fpatan_data {
1322 /* High and low parts of atan(x). */
1323 floatx80 atan_high, atan_low;
1324 };
1325
1326 static const struct fpatan_data fpatan_table[9] = {
1327 { floatx80_zero_init,
1328 floatx80_zero_init },
1329 { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL),
1330 make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) },
1331 { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL),
1332 make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) },
1333 { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL),
1334 make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) },
1335 { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL),
1336 make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) },
1337 { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL),
1338 make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) },
1339 { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL),
1340 make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) },
1341 { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL),
1342 make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) },
1343 { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL),
1344 make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) },
1345 };
1346
helper_fpatan(CPUX86State * env)1347 void helper_fpatan(CPUX86State *env)
1348 {
1349 uint8_t old_flags = save_exception_flags(env);
1350 uint64_t arg0_sig = extractFloatx80Frac(ST0);
1351 int32_t arg0_exp = extractFloatx80Exp(ST0);
1352 bool arg0_sign = extractFloatx80Sign(ST0);
1353 uint64_t arg1_sig = extractFloatx80Frac(ST1);
1354 int32_t arg1_exp = extractFloatx80Exp(ST1);
1355 bool arg1_sign = extractFloatx80Sign(ST1);
1356
1357 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1358 float_raise(float_flag_invalid, &env->fp_status);
1359 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
1360 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
1361 float_raise(float_flag_invalid, &env->fp_status);
1362 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
1363 } else if (floatx80_invalid_encoding(ST0) ||
1364 floatx80_invalid_encoding(ST1)) {
1365 float_raise(float_flag_invalid, &env->fp_status);
1366 ST1 = floatx80_default_nan(&env->fp_status);
1367 } else if (floatx80_is_any_nan(ST0)) {
1368 ST1 = ST0;
1369 } else if (floatx80_is_any_nan(ST1)) {
1370 /* Pass this NaN through. */
1371 } else if (floatx80_is_zero(ST1) && !arg0_sign) {
1372 /* Pass this zero through. */
1373 } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) ||
1374 arg0_exp - arg1_exp >= 80) &&
1375 !arg0_sign) {
1376 /*
1377 * Dividing ST1 by ST0 gives the correct result up to
1378 * rounding, and avoids spurious underflow exceptions that
1379 * might result from passing some small values through the
1380 * polynomial approximation, but if a finite nonzero result of
1381 * division is exact, the result of fpatan is still inexact
1382 * (and underflowing where appropriate).
1383 */
1384 FloatX80RoundPrec save_prec =
1385 env->fp_status.floatx80_rounding_precision;
1386 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1387 ST1 = floatx80_div(ST1, ST0, &env->fp_status);
1388 env->fp_status.floatx80_rounding_precision = save_prec;
1389 if (!floatx80_is_zero(ST1) &&
1390 !(get_float_exception_flags(&env->fp_status) &
1391 float_flag_inexact)) {
1392 /*
1393 * The mathematical result is very slightly closer to zero
1394 * than this exact result. Round a value with the
1395 * significand adjusted accordingly to get the correct
1396 * exceptions, and possibly an adjusted result depending
1397 * on the rounding mode.
1398 */
1399 uint64_t sig = extractFloatx80Frac(ST1);
1400 int32_t exp = extractFloatx80Exp(ST1);
1401 bool sign = extractFloatx80Sign(ST1);
1402 if (exp == 0) {
1403 normalizeFloatx80Subnormal(sig, &exp, &sig);
1404 }
1405 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1406 sign, exp, sig - 1,
1407 -1, &env->fp_status);
1408 }
1409 } else {
1410 /* The result is inexact. */
1411 bool rsign = arg1_sign;
1412 int32_t rexp;
1413 uint64_t rsig0, rsig1;
1414 if (floatx80_is_zero(ST1)) {
1415 /*
1416 * ST0 is negative. The result is pi with the sign of
1417 * ST1.
1418 */
1419 rexp = pi_exp;
1420 rsig0 = pi_sig_high;
1421 rsig1 = pi_sig_low;
1422 } else if (floatx80_is_infinity(ST1)) {
1423 if (floatx80_is_infinity(ST0)) {
1424 if (arg0_sign) {
1425 rexp = pi_34_exp;
1426 rsig0 = pi_34_sig_high;
1427 rsig1 = pi_34_sig_low;
1428 } else {
1429 rexp = pi_4_exp;
1430 rsig0 = pi_4_sig_high;
1431 rsig1 = pi_4_sig_low;
1432 }
1433 } else {
1434 rexp = pi_2_exp;
1435 rsig0 = pi_2_sig_high;
1436 rsig1 = pi_2_sig_low;
1437 }
1438 } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) {
1439 rexp = pi_2_exp;
1440 rsig0 = pi_2_sig_high;
1441 rsig1 = pi_2_sig_low;
1442 } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) {
1443 /* ST0 is negative. */
1444 rexp = pi_exp;
1445 rsig0 = pi_sig_high;
1446 rsig1 = pi_sig_low;
1447 } else {
1448 /*
1449 * ST0 and ST1 are finite, nonzero and with exponents not
1450 * too far apart.
1451 */
1452 int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp;
1453 int32_t azexp, axexp;
1454 bool adj_sub, ysign, zsign;
1455 uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1;
1456 uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2;
1457 uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1;
1458 uint64_t azsig0, azsig1;
1459 uint64_t azsig2, azsig3, axsig0, axsig1;
1460 floatx80 x8;
1461 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
1462 FloatX80RoundPrec save_prec =
1463 env->fp_status.floatx80_rounding_precision;
1464 env->fp_status.float_rounding_mode = float_round_nearest_even;
1465 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
1466
1467 if (arg0_exp == 0) {
1468 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
1469 }
1470 if (arg1_exp == 0) {
1471 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
1472 }
1473 if (arg0_exp > arg1_exp ||
1474 (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) {
1475 /* Work with abs(ST1) / abs(ST0). */
1476 num_exp = arg1_exp;
1477 num_sig = arg1_sig;
1478 den_exp = arg0_exp;
1479 den_sig = arg0_sig;
1480 if (arg0_sign) {
1481 /* The result is subtracted from pi. */
1482 adj_exp = pi_exp;
1483 adj_sig0 = pi_sig_high;
1484 adj_sig1 = pi_sig_low;
1485 adj_sub = true;
1486 } else {
1487 /* The result is used as-is. */
1488 adj_exp = 0;
1489 adj_sig0 = 0;
1490 adj_sig1 = 0;
1491 adj_sub = false;
1492 }
1493 } else {
1494 /* Work with abs(ST0) / abs(ST1). */
1495 num_exp = arg0_exp;
1496 num_sig = arg0_sig;
1497 den_exp = arg1_exp;
1498 den_sig = arg1_sig;
1499 /* The result is added to or subtracted from pi/2. */
1500 adj_exp = pi_2_exp;
1501 adj_sig0 = pi_2_sig_high;
1502 adj_sig1 = pi_2_sig_low;
1503 adj_sub = !arg0_sign;
1504 }
1505
1506 /*
1507 * Compute x = num/den, where 0 < x <= 1 and x is not too
1508 * small.
1509 */
1510 xexp = num_exp - den_exp + 0x3ffe;
1511 remsig0 = num_sig;
1512 remsig1 = 0;
1513 if (den_sig <= remsig0) {
1514 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1515 ++xexp;
1516 }
1517 xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig);
1518 mul64To128(den_sig, xsig0, &msig0, &msig1);
1519 sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1);
1520 while ((int64_t) remsig0 < 0) {
1521 --xsig0;
1522 add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1);
1523 }
1524 xsig1 = estimateDiv128To64(remsig1, 0, den_sig);
1525 /*
1526 * No need to correct any estimation error in xsig1; even
1527 * with such error, it is accurate enough.
1528 */
1529
1530 /*
1531 * Split x as x = t + y, where t = n/8 is the nearest
1532 * multiple of 1/8 to x.
1533 */
1534 x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
1535 false, xexp + 3, xsig0,
1536 xsig1, &env->fp_status);
1537 n = floatx80_to_int32(x8, &env->fp_status);
1538 if (n == 0) {
1539 ysign = false;
1540 yexp = xexp;
1541 ysig0 = xsig0;
1542 ysig1 = xsig1;
1543 texp = 0;
1544 tsig = 0;
1545 } else {
1546 int shift = clz32(n) + 32;
1547 texp = 0x403b - shift;
1548 tsig = n;
1549 tsig <<= shift;
1550 if (texp == xexp) {
1551 sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1);
1552 if ((int64_t) ysig0 >= 0) {
1553 ysign = false;
1554 if (ysig0 == 0) {
1555 if (ysig1 == 0) {
1556 yexp = 0;
1557 } else {
1558 shift = clz64(ysig1) + 64;
1559 yexp = xexp - shift;
1560 shift128Left(ysig0, ysig1, shift,
1561 &ysig0, &ysig1);
1562 }
1563 } else {
1564 shift = clz64(ysig0);
1565 yexp = xexp - shift;
1566 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1567 }
1568 } else {
1569 ysign = true;
1570 sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1);
1571 if (ysig0 == 0) {
1572 shift = clz64(ysig1) + 64;
1573 } else {
1574 shift = clz64(ysig0);
1575 }
1576 yexp = xexp - shift;
1577 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1578 }
1579 } else {
1580 /*
1581 * t's exponent must be greater than x's because t
1582 * is positive and the nearest multiple of 1/8 to
1583 * x, and if x has a greater exponent, the power
1584 * of 2 with that exponent is also a multiple of
1585 * 1/8.
1586 */
1587 uint64_t usig0, usig1;
1588 shift128RightJamming(xsig0, xsig1, texp - xexp,
1589 &usig0, &usig1);
1590 ysign = true;
1591 sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1);
1592 if (ysig0 == 0) {
1593 shift = clz64(ysig1) + 64;
1594 } else {
1595 shift = clz64(ysig0);
1596 }
1597 yexp = texp - shift;
1598 shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1);
1599 }
1600 }
1601
1602 /*
1603 * Compute z = y/(1+tx), so arctan(x) = arctan(t) +
1604 * arctan(z).
1605 */
1606 zsign = ysign;
1607 if (texp == 0 || yexp == 0) {
1608 zexp = yexp;
1609 zsig0 = ysig0;
1610 zsig1 = ysig1;
1611 } else {
1612 /*
1613 * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1.
1614 */
1615 int32_t dexp = texp + xexp - 0x3ffe;
1616 uint64_t dsig0, dsig1, dsig2;
1617 mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2);
1618 /*
1619 * dexp <= 0x3fff (and if equal, dsig0 has a leading 0
1620 * bit). Add 1 to produce the denominator 1+tx.
1621 */
1622 shift128RightJamming(dsig0, dsig1, 0x3fff - dexp,
1623 &dsig0, &dsig1);
1624 dsig0 |= 0x8000000000000000ULL;
1625 zexp = yexp - 1;
1626 remsig0 = ysig0;
1627 remsig1 = ysig1;
1628 remsig2 = 0;
1629 if (dsig0 <= remsig0) {
1630 shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1);
1631 ++zexp;
1632 }
1633 zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0);
1634 mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2);
1635 sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2,
1636 &remsig0, &remsig1, &remsig2);
1637 while ((int64_t) remsig0 < 0) {
1638 --zsig0;
1639 add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1,
1640 &remsig0, &remsig1, &remsig2);
1641 }
1642 zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0);
1643 /* No need to correct any estimation error in zsig1. */
1644 }
1645
1646 if (zexp == 0) {
1647 azexp = 0;
1648 azsig0 = 0;
1649 azsig1 = 0;
1650 } else {
1651 floatx80 z2, accum;
1652 uint64_t z2sig0, z2sig1, z2sig2, z2sig3;
1653 /* Compute z^2. */
1654 mul128To256(zsig0, zsig1, zsig0, zsig1,
1655 &z2sig0, &z2sig1, &z2sig2, &z2sig3);
1656 z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1657 zexp + zexp - 0x3ffe,
1658 z2sig0, z2sig1,
1659 &env->fp_status);
1660
1661 /* Compute the lower parts of the polynomial expansion. */
1662 accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status);
1663 accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status);
1664 accum = floatx80_mul(accum, z2, &env->fp_status);
1665 accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status);
1666 accum = floatx80_mul(accum, z2, &env->fp_status);
1667 accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status);
1668 accum = floatx80_mul(accum, z2, &env->fp_status);
1669 accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status);
1670 accum = floatx80_mul(accum, z2, &env->fp_status);
1671 accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status);
1672 accum = floatx80_mul(accum, z2, &env->fp_status);
1673
1674 /*
1675 * The full polynomial expansion is z*(fpatan_coeff_0 + accum).
1676 * fpatan_coeff_0 is 1, and accum is negative and much smaller.
1677 */
1678 aexp = extractFloatx80Exp(fpatan_coeff_0);
1679 shift128RightJamming(extractFloatx80Frac(accum), 0,
1680 aexp - extractFloatx80Exp(accum),
1681 &asig0, &asig1);
1682 sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1,
1683 &asig0, &asig1);
1684 /* Multiply by z to compute arctan(z). */
1685 azexp = aexp + zexp - 0x3ffe;
1686 mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1,
1687 &azsig2, &azsig3);
1688 }
1689
1690 /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */
1691 if (texp == 0) {
1692 /* z is positive. */
1693 axexp = azexp;
1694 axsig0 = azsig0;
1695 axsig1 = azsig1;
1696 } else {
1697 bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low);
1698 int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low);
1699 uint64_t low_sig0 =
1700 extractFloatx80Frac(fpatan_table[n].atan_low);
1701 uint64_t low_sig1 = 0;
1702 axexp = extractFloatx80Exp(fpatan_table[n].atan_high);
1703 axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high);
1704 axsig1 = 0;
1705 shift128RightJamming(low_sig0, low_sig1, axexp - low_exp,
1706 &low_sig0, &low_sig1);
1707 if (low_sign) {
1708 sub128(axsig0, axsig1, low_sig0, low_sig1,
1709 &axsig0, &axsig1);
1710 } else {
1711 add128(axsig0, axsig1, low_sig0, low_sig1,
1712 &axsig0, &axsig1);
1713 }
1714 if (azexp >= axexp) {
1715 shift128RightJamming(axsig0, axsig1, azexp - axexp + 1,
1716 &axsig0, &axsig1);
1717 axexp = azexp + 1;
1718 shift128RightJamming(azsig0, azsig1, 1,
1719 &azsig0, &azsig1);
1720 } else {
1721 shift128RightJamming(axsig0, axsig1, 1,
1722 &axsig0, &axsig1);
1723 shift128RightJamming(azsig0, azsig1, axexp - azexp + 1,
1724 &azsig0, &azsig1);
1725 ++axexp;
1726 }
1727 if (zsign) {
1728 sub128(axsig0, axsig1, azsig0, azsig1,
1729 &axsig0, &axsig1);
1730 } else {
1731 add128(axsig0, axsig1, azsig0, azsig1,
1732 &axsig0, &axsig1);
1733 }
1734 }
1735
1736 if (adj_exp == 0) {
1737 rexp = axexp;
1738 rsig0 = axsig0;
1739 rsig1 = axsig1;
1740 } else {
1741 /*
1742 * Add or subtract arctan(x) (exponent axexp,
1743 * significand axsig0 and axsig1, positive, not
1744 * necessarily normalized) to the number given by
1745 * adj_exp, adj_sig0 and adj_sig1, according to
1746 * adj_sub.
1747 */
1748 if (adj_exp >= axexp) {
1749 shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1,
1750 &axsig0, &axsig1);
1751 rexp = adj_exp + 1;
1752 shift128RightJamming(adj_sig0, adj_sig1, 1,
1753 &adj_sig0, &adj_sig1);
1754 } else {
1755 shift128RightJamming(axsig0, axsig1, 1,
1756 &axsig0, &axsig1);
1757 shift128RightJamming(adj_sig0, adj_sig1,
1758 axexp - adj_exp + 1,
1759 &adj_sig0, &adj_sig1);
1760 rexp = axexp + 1;
1761 }
1762 if (adj_sub) {
1763 sub128(adj_sig0, adj_sig1, axsig0, axsig1,
1764 &rsig0, &rsig1);
1765 } else {
1766 add128(adj_sig0, adj_sig1, axsig0, axsig1,
1767 &rsig0, &rsig1);
1768 }
1769 }
1770
1771 env->fp_status.float_rounding_mode = save_mode;
1772 env->fp_status.floatx80_rounding_precision = save_prec;
1773 }
1774 /* This result is inexact. */
1775 rsig1 |= 1;
1776 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp,
1777 rsig0, rsig1, &env->fp_status);
1778 }
1779
1780 fpop(env);
1781 merge_exception_flags(env, old_flags);
1782 }
1783
helper_fxtract(CPUX86State * env)1784 void helper_fxtract(CPUX86State *env)
1785 {
1786 uint8_t old_flags = save_exception_flags(env);
1787 CPU_LDoubleU temp;
1788
1789 temp.d = ST0;
1790
1791 if (floatx80_is_zero(ST0)) {
1792 /* Easy way to generate -inf and raising division by 0 exception */
1793 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
1794 &env->fp_status);
1795 fpush(env);
1796 ST0 = temp.d;
1797 } else if (floatx80_invalid_encoding(ST0)) {
1798 float_raise(float_flag_invalid, &env->fp_status);
1799 ST0 = floatx80_default_nan(&env->fp_status);
1800 fpush(env);
1801 ST0 = ST1;
1802 } else if (floatx80_is_any_nan(ST0)) {
1803 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
1804 float_raise(float_flag_invalid, &env->fp_status);
1805 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
1806 }
1807 fpush(env);
1808 ST0 = ST1;
1809 } else if (floatx80_is_infinity(ST0)) {
1810 fpush(env);
1811 ST0 = ST1;
1812 ST1 = floatx80_infinity;
1813 } else {
1814 int expdif;
1815
1816 if (EXPD(temp) == 0) {
1817 int shift = clz64(temp.l.lower);
1818 temp.l.lower <<= shift;
1819 expdif = 1 - EXPBIAS - shift;
1820 float_raise(float_flag_input_denormal, &env->fp_status);
1821 } else {
1822 expdif = EXPD(temp) - EXPBIAS;
1823 }
1824 /* DP exponent bias */
1825 ST0 = int32_to_floatx80(expdif, &env->fp_status);
1826 fpush(env);
1827 BIASEXPONENT(temp);
1828 ST0 = temp.d;
1829 }
1830 merge_exception_flags(env, old_flags);
1831 }
1832
helper_fprem_common(CPUX86State * env,bool mod)1833 static void helper_fprem_common(CPUX86State *env, bool mod)
1834 {
1835 uint8_t old_flags = save_exception_flags(env);
1836 uint64_t quotient;
1837 CPU_LDoubleU temp0, temp1;
1838 int exp0, exp1, expdiff;
1839
1840 temp0.d = ST0;
1841 temp1.d = ST1;
1842 exp0 = EXPD(temp0);
1843 exp1 = EXPD(temp1);
1844
1845 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
1846 if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
1847 exp0 == 0x7fff || exp1 == 0x7fff ||
1848 floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
1849 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status);
1850 } else {
1851 if (exp0 == 0) {
1852 exp0 = 1 - clz64(temp0.l.lower);
1853 }
1854 if (exp1 == 0) {
1855 exp1 = 1 - clz64(temp1.l.lower);
1856 }
1857 expdiff = exp0 - exp1;
1858 if (expdiff < 64) {
1859 ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status);
1860 env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */
1861 env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
1862 env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */
1863 } else {
1864 /*
1865 * Partial remainder. This choice of how many bits to
1866 * process at once is specified in AMD instruction set
1867 * manuals, and empirically is followed by Intel
1868 * processors as well; it ensures that the final remainder
1869 * operation in a loop does produce the correct low three
1870 * bits of the quotient. AMD manuals specify that the
1871 * flags other than C2 are cleared, and empirically Intel
1872 * processors clear them as well.
1873 */
1874 int n = 32 + (expdiff % 32);
1875 temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
1876 ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
1877 env->fpus |= 0x400; /* C2 <-- 1 */
1878 }
1879 }
1880 merge_exception_flags(env, old_flags);
1881 }
1882
helper_fprem1(CPUX86State * env)1883 void helper_fprem1(CPUX86State *env)
1884 {
1885 helper_fprem_common(env, false);
1886 }
1887
helper_fprem(CPUX86State * env)1888 void helper_fprem(CPUX86State *env)
1889 {
1890 helper_fprem_common(env, true);
1891 }
1892
1893 /* 128-bit significand of log2(e). */
1894 #define log2_e_sig_high 0xb8aa3b295c17f0bbULL
1895 #define log2_e_sig_low 0xbe87fed0691d3e89ULL
1896
1897 /*
1898 * Polynomial coefficients for an approximation to log2((1+x)/(1-x)),
1899 * with only odd powers of x used, for x in the interval [2*sqrt(2)-3,
1900 * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the
1901 * interval [sqrt(2)/2, sqrt(2)].
1902 */
1903 #define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL)
1904 #define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL)
1905 #define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL)
1906 #define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL)
1907 #define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL)
1908 #define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL)
1909 #define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL)
1910 #define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL)
1911 #define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL)
1912 #define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL)
1913 #define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL)
1914
1915 /*
1916 * Compute an approximation of log2(1+arg), where 1+arg is in the
1917 * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this
1918 * function is called, rounding precision is set to 80 and the
1919 * round-to-nearest mode is in effect. arg must not be exactly zero,
1920 * and must not be so close to zero that underflow might occur.
1921 */
helper_fyl2x_common(CPUX86State * env,floatx80 arg,int32_t * exp,uint64_t * sig0,uint64_t * sig1)1922 static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp,
1923 uint64_t *sig0, uint64_t *sig1)
1924 {
1925 uint64_t arg0_sig = extractFloatx80Frac(arg);
1926 int32_t arg0_exp = extractFloatx80Exp(arg);
1927 bool arg0_sign = extractFloatx80Sign(arg);
1928 bool asign;
1929 int32_t dexp, texp, aexp;
1930 uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2;
1931 uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3;
1932 uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1;
1933 floatx80 t2, accum;
1934
1935 /*
1936 * Compute an approximation of arg/(2+arg), with extra precision,
1937 * as the argument to a polynomial approximation. The extra
1938 * precision is only needed for the first term of the
1939 * approximation, with subsequent terms being significantly
1940 * smaller; the approximation only uses odd exponents, and the
1941 * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029....
1942 */
1943 if (arg0_sign) {
1944 dexp = 0x3fff;
1945 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1946 sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1);
1947 } else {
1948 dexp = 0x4000;
1949 shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1);
1950 dsig0 |= 0x8000000000000000ULL;
1951 }
1952 texp = arg0_exp - dexp + 0x3ffe;
1953 rsig0 = arg0_sig;
1954 rsig1 = 0;
1955 rsig2 = 0;
1956 if (dsig0 <= rsig0) {
1957 shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1);
1958 ++texp;
1959 }
1960 tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0);
1961 mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2);
1962 sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2,
1963 &rsig0, &rsig1, &rsig2);
1964 while ((int64_t) rsig0 < 0) {
1965 --tsig0;
1966 add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1,
1967 &rsig0, &rsig1, &rsig2);
1968 }
1969 tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0);
1970 /*
1971 * No need to correct any estimation error in tsig1; even with
1972 * such error, it is accurate enough. Now compute the square of
1973 * that approximation.
1974 */
1975 mul128To256(tsig0, tsig1, tsig0, tsig1,
1976 &t2sig0, &t2sig1, &t2sig2, &t2sig3);
1977 t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false,
1978 texp + texp - 0x3ffe,
1979 t2sig0, t2sig1, &env->fp_status);
1980
1981 /* Compute the lower parts of the polynomial expansion. */
1982 accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status);
1983 accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status);
1984 accum = floatx80_mul(accum, t2, &env->fp_status);
1985 accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status);
1986 accum = floatx80_mul(accum, t2, &env->fp_status);
1987 accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status);
1988 accum = floatx80_mul(accum, t2, &env->fp_status);
1989 accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status);
1990 accum = floatx80_mul(accum, t2, &env->fp_status);
1991 accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status);
1992 accum = floatx80_mul(accum, t2, &env->fp_status);
1993 accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status);
1994 accum = floatx80_mul(accum, t2, &env->fp_status);
1995 accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status);
1996 accum = floatx80_mul(accum, t2, &env->fp_status);
1997 accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status);
1998 accum = floatx80_mul(accum, t2, &env->fp_status);
1999 accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status);
2000
2001 /*
2002 * The full polynomial expansion is fyl2x_coeff_0 + accum (where
2003 * accum has much lower magnitude, and so, in particular, carry
2004 * out of the addition is not possible), multiplied by t. (This
2005 * expansion is only accurate to about 70 bits, not 128 bits.)
2006 */
2007 aexp = extractFloatx80Exp(fyl2x_coeff_0);
2008 asign = extractFloatx80Sign(fyl2x_coeff_0);
2009 shift128RightJamming(extractFloatx80Frac(accum), 0,
2010 aexp - extractFloatx80Exp(accum),
2011 &asig0, &asig1);
2012 bsig0 = extractFloatx80Frac(fyl2x_coeff_0);
2013 bsig1 = 0;
2014 if (asign == extractFloatx80Sign(accum)) {
2015 add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
2016 } else {
2017 sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1);
2018 }
2019 /* Multiply by t to compute the required result. */
2020 mul128To256(asig0, asig1, tsig0, tsig1,
2021 &asig0, &asig1, &asig2, &asig3);
2022 aexp += texp - 0x3ffe;
2023 *exp = aexp;
2024 *sig0 = asig0;
2025 *sig1 = asig1;
2026 }
2027
helper_fyl2xp1(CPUX86State * env)2028 void helper_fyl2xp1(CPUX86State *env)
2029 {
2030 uint8_t old_flags = save_exception_flags(env);
2031 uint64_t arg0_sig = extractFloatx80Frac(ST0);
2032 int32_t arg0_exp = extractFloatx80Exp(ST0);
2033 bool arg0_sign = extractFloatx80Sign(ST0);
2034 uint64_t arg1_sig = extractFloatx80Frac(ST1);
2035 int32_t arg1_exp = extractFloatx80Exp(ST1);
2036 bool arg1_sign = extractFloatx80Sign(ST1);
2037
2038 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2039 float_raise(float_flag_invalid, &env->fp_status);
2040 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2041 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2042 float_raise(float_flag_invalid, &env->fp_status);
2043 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2044 } else if (floatx80_invalid_encoding(ST0) ||
2045 floatx80_invalid_encoding(ST1)) {
2046 float_raise(float_flag_invalid, &env->fp_status);
2047 ST1 = floatx80_default_nan(&env->fp_status);
2048 } else if (floatx80_is_any_nan(ST0)) {
2049 ST1 = ST0;
2050 } else if (floatx80_is_any_nan(ST1)) {
2051 /* Pass this NaN through. */
2052 } else if (arg0_exp > 0x3ffd ||
2053 (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ?
2054 0x95f619980c4336f7ULL :
2055 0xd413cccfe7799211ULL))) {
2056 /*
2057 * Out of range for the instruction (ST0 must have absolute
2058 * value less than 1 - sqrt(2)/2 = 0.292..., according to
2059 * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1
2060 * to sqrt(2) - 1, which we allow here), treat as invalid.
2061 */
2062 float_raise(float_flag_invalid, &env->fp_status);
2063 ST1 = floatx80_default_nan(&env->fp_status);
2064 } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
2065 arg1_exp == 0x7fff) {
2066 /*
2067 * One argument is zero, or multiplying by infinity; correct
2068 * result is exact and can be obtained by multiplying the
2069 * arguments.
2070 */
2071 ST1 = floatx80_mul(ST0, ST1, &env->fp_status);
2072 } else if (arg0_exp < 0x3fb0) {
2073 /*
2074 * Multiplying both arguments and an extra-precision version
2075 * of log2(e) is sufficiently precise.
2076 */
2077 uint64_t sig0, sig1, sig2;
2078 int32_t exp;
2079 if (arg0_exp == 0) {
2080 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2081 }
2082 if (arg1_exp == 0) {
2083 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2084 }
2085 mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig,
2086 &sig0, &sig1, &sig2);
2087 exp = arg0_exp + 1;
2088 mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2);
2089 exp += arg1_exp - 0x3ffe;
2090 /* This result is inexact. */
2091 sig1 |= 1;
2092 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2093 arg0_sign ^ arg1_sign, exp,
2094 sig0, sig1, &env->fp_status);
2095 } else {
2096 int32_t aexp;
2097 uint64_t asig0, asig1, asig2;
2098 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2099 FloatX80RoundPrec save_prec =
2100 env->fp_status.floatx80_rounding_precision;
2101 env->fp_status.float_rounding_mode = float_round_nearest_even;
2102 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2103
2104 helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1);
2105 /*
2106 * Multiply by the second argument to compute the required
2107 * result.
2108 */
2109 if (arg1_exp == 0) {
2110 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2111 }
2112 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2113 aexp += arg1_exp - 0x3ffe;
2114 /* This result is inexact. */
2115 asig1 |= 1;
2116 env->fp_status.float_rounding_mode = save_mode;
2117 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2118 arg0_sign ^ arg1_sign, aexp,
2119 asig0, asig1, &env->fp_status);
2120 env->fp_status.floatx80_rounding_precision = save_prec;
2121 }
2122 fpop(env);
2123 merge_exception_flags(env, old_flags);
2124 }
2125
helper_fyl2x(CPUX86State * env)2126 void helper_fyl2x(CPUX86State *env)
2127 {
2128 uint8_t old_flags = save_exception_flags(env);
2129 uint64_t arg0_sig = extractFloatx80Frac(ST0);
2130 int32_t arg0_exp = extractFloatx80Exp(ST0);
2131 bool arg0_sign = extractFloatx80Sign(ST0);
2132 uint64_t arg1_sig = extractFloatx80Frac(ST1);
2133 int32_t arg1_exp = extractFloatx80Exp(ST1);
2134 bool arg1_sign = extractFloatx80Sign(ST1);
2135
2136 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2137 float_raise(float_flag_invalid, &env->fp_status);
2138 ST1 = floatx80_silence_nan(ST0, &env->fp_status);
2139 } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) {
2140 float_raise(float_flag_invalid, &env->fp_status);
2141 ST1 = floatx80_silence_nan(ST1, &env->fp_status);
2142 } else if (floatx80_invalid_encoding(ST0) ||
2143 floatx80_invalid_encoding(ST1)) {
2144 float_raise(float_flag_invalid, &env->fp_status);
2145 ST1 = floatx80_default_nan(&env->fp_status);
2146 } else if (floatx80_is_any_nan(ST0)) {
2147 ST1 = ST0;
2148 } else if (floatx80_is_any_nan(ST1)) {
2149 /* Pass this NaN through. */
2150 } else if (arg0_sign && !floatx80_is_zero(ST0)) {
2151 float_raise(float_flag_invalid, &env->fp_status);
2152 ST1 = floatx80_default_nan(&env->fp_status);
2153 } else if (floatx80_is_infinity(ST1)) {
2154 FloatRelation cmp = floatx80_compare(ST0, floatx80_one,
2155 &env->fp_status);
2156 switch (cmp) {
2157 case float_relation_less:
2158 ST1 = floatx80_chs(ST1);
2159 break;
2160 case float_relation_greater:
2161 /* Result is infinity of the same sign as ST1. */
2162 break;
2163 default:
2164 float_raise(float_flag_invalid, &env->fp_status);
2165 ST1 = floatx80_default_nan(&env->fp_status);
2166 break;
2167 }
2168 } else if (floatx80_is_infinity(ST0)) {
2169 if (floatx80_is_zero(ST1)) {
2170 float_raise(float_flag_invalid, &env->fp_status);
2171 ST1 = floatx80_default_nan(&env->fp_status);
2172 } else if (arg1_sign) {
2173 ST1 = floatx80_chs(ST0);
2174 } else {
2175 ST1 = ST0;
2176 }
2177 } else if (floatx80_is_zero(ST0)) {
2178 if (floatx80_is_zero(ST1)) {
2179 float_raise(float_flag_invalid, &env->fp_status);
2180 ST1 = floatx80_default_nan(&env->fp_status);
2181 } else {
2182 /* Result is infinity with opposite sign to ST1. */
2183 float_raise(float_flag_divbyzero, &env->fp_status);
2184 ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff,
2185 0x8000000000000000ULL);
2186 }
2187 } else if (floatx80_is_zero(ST1)) {
2188 if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) {
2189 ST1 = floatx80_chs(ST1);
2190 }
2191 /* Otherwise, ST1 is already the correct result. */
2192 } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) {
2193 if (arg1_sign) {
2194 ST1 = floatx80_chs(floatx80_zero);
2195 } else {
2196 ST1 = floatx80_zero;
2197 }
2198 } else {
2199 int32_t int_exp;
2200 floatx80 arg0_m1;
2201 FloatRoundMode save_mode = env->fp_status.float_rounding_mode;
2202 FloatX80RoundPrec save_prec =
2203 env->fp_status.floatx80_rounding_precision;
2204 env->fp_status.float_rounding_mode = float_round_nearest_even;
2205 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2206
2207 if (arg0_exp == 0) {
2208 normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig);
2209 }
2210 if (arg1_exp == 0) {
2211 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2212 }
2213 int_exp = arg0_exp - 0x3fff;
2214 if (arg0_sig > 0xb504f333f9de6484ULL) {
2215 ++int_exp;
2216 }
2217 arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp,
2218 &env->fp_status),
2219 floatx80_one, &env->fp_status);
2220 if (floatx80_is_zero(arg0_m1)) {
2221 /* Exact power of 2; multiply by ST1. */
2222 env->fp_status.float_rounding_mode = save_mode;
2223 ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status),
2224 ST1, &env->fp_status);
2225 } else {
2226 bool asign = extractFloatx80Sign(arg0_m1);
2227 int32_t aexp;
2228 uint64_t asig0, asig1, asig2;
2229 helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1);
2230 if (int_exp != 0) {
2231 bool isign = (int_exp < 0);
2232 int32_t iexp;
2233 uint64_t isig;
2234 int shift;
2235 int_exp = isign ? -int_exp : int_exp;
2236 shift = clz32(int_exp) + 32;
2237 isig = int_exp;
2238 isig <<= shift;
2239 iexp = 0x403e - shift;
2240 shift128RightJamming(asig0, asig1, iexp - aexp,
2241 &asig0, &asig1);
2242 if (asign == isign) {
2243 add128(isig, 0, asig0, asig1, &asig0, &asig1);
2244 } else {
2245 sub128(isig, 0, asig0, asig1, &asig0, &asig1);
2246 }
2247 aexp = iexp;
2248 asign = isign;
2249 }
2250 /*
2251 * Multiply by the second argument to compute the required
2252 * result.
2253 */
2254 if (arg1_exp == 0) {
2255 normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig);
2256 }
2257 mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2);
2258 aexp += arg1_exp - 0x3ffe;
2259 /* This result is inexact. */
2260 asig1 |= 1;
2261 env->fp_status.float_rounding_mode = save_mode;
2262 ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x,
2263 asign ^ arg1_sign, aexp,
2264 asig0, asig1, &env->fp_status);
2265 }
2266
2267 env->fp_status.floatx80_rounding_precision = save_prec;
2268 }
2269 fpop(env);
2270 merge_exception_flags(env, old_flags);
2271 }
2272
helper_fsqrt(CPUX86State * env)2273 void helper_fsqrt(CPUX86State *env)
2274 {
2275 uint8_t old_flags = save_exception_flags(env);
2276 if (floatx80_is_neg(ST0)) {
2277 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2278 env->fpus |= 0x400;
2279 }
2280 ST0 = floatx80_sqrt(ST0, &env->fp_status);
2281 merge_exception_flags(env, old_flags);
2282 }
2283
helper_fsincos(CPUX86State * env)2284 void helper_fsincos(CPUX86State *env)
2285 {
2286 double fptemp = floatx80_to_double(env, ST0);
2287
2288 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2289 env->fpus |= 0x400;
2290 } else {
2291 ST0 = double_to_floatx80(env, sin(fptemp));
2292 fpush(env);
2293 ST0 = double_to_floatx80(env, cos(fptemp));
2294 env->fpus &= ~0x400; /* C2 <-- 0 */
2295 /* the above code is for |arg| < 2**63 only */
2296 }
2297 }
2298
helper_frndint(CPUX86State * env)2299 void helper_frndint(CPUX86State *env)
2300 {
2301 uint8_t old_flags = save_exception_flags(env);
2302 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
2303 merge_exception_flags(env, old_flags);
2304 }
2305
helper_fscale(CPUX86State * env)2306 void helper_fscale(CPUX86State *env)
2307 {
2308 uint8_t old_flags = save_exception_flags(env);
2309 if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) {
2310 float_raise(float_flag_invalid, &env->fp_status);
2311 ST0 = floatx80_default_nan(&env->fp_status);
2312 } else if (floatx80_is_any_nan(ST1)) {
2313 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2314 float_raise(float_flag_invalid, &env->fp_status);
2315 }
2316 ST0 = ST1;
2317 if (floatx80_is_signaling_nan(ST0, &env->fp_status)) {
2318 float_raise(float_flag_invalid, &env->fp_status);
2319 ST0 = floatx80_silence_nan(ST0, &env->fp_status);
2320 }
2321 } else if (floatx80_is_infinity(ST1) &&
2322 !floatx80_invalid_encoding(ST0) &&
2323 !floatx80_is_any_nan(ST0)) {
2324 if (floatx80_is_neg(ST1)) {
2325 if (floatx80_is_infinity(ST0)) {
2326 float_raise(float_flag_invalid, &env->fp_status);
2327 ST0 = floatx80_default_nan(&env->fp_status);
2328 } else {
2329 ST0 = (floatx80_is_neg(ST0) ?
2330 floatx80_chs(floatx80_zero) :
2331 floatx80_zero);
2332 }
2333 } else {
2334 if (floatx80_is_zero(ST0)) {
2335 float_raise(float_flag_invalid, &env->fp_status);
2336 ST0 = floatx80_default_nan(&env->fp_status);
2337 } else {
2338 ST0 = (floatx80_is_neg(ST0) ?
2339 floatx80_chs(floatx80_infinity) :
2340 floatx80_infinity);
2341 }
2342 }
2343 } else {
2344 int n;
2345 FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision;
2346 uint8_t save_flags = get_float_exception_flags(&env->fp_status);
2347 set_float_exception_flags(0, &env->fp_status);
2348 n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
2349 set_float_exception_flags(save_flags, &env->fp_status);
2350 env->fp_status.floatx80_rounding_precision = floatx80_precision_x;
2351 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
2352 env->fp_status.floatx80_rounding_precision = save;
2353 }
2354 merge_exception_flags(env, old_flags);
2355 }
2356
helper_fsin(CPUX86State * env)2357 void helper_fsin(CPUX86State *env)
2358 {
2359 double fptemp = floatx80_to_double(env, ST0);
2360
2361 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2362 env->fpus |= 0x400;
2363 } else {
2364 ST0 = double_to_floatx80(env, sin(fptemp));
2365 env->fpus &= ~0x400; /* C2 <-- 0 */
2366 /* the above code is for |arg| < 2**53 only */
2367 }
2368 }
2369
helper_fcos(CPUX86State * env)2370 void helper_fcos(CPUX86State *env)
2371 {
2372 double fptemp = floatx80_to_double(env, ST0);
2373
2374 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
2375 env->fpus |= 0x400;
2376 } else {
2377 ST0 = double_to_floatx80(env, cos(fptemp));
2378 env->fpus &= ~0x400; /* C2 <-- 0 */
2379 /* the above code is for |arg| < 2**63 only */
2380 }
2381 }
2382
helper_fxam_ST0(CPUX86State * env)2383 void helper_fxam_ST0(CPUX86State *env)
2384 {
2385 CPU_LDoubleU temp;
2386 int expdif;
2387
2388 temp.d = ST0;
2389
2390 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
2391 if (SIGND(temp)) {
2392 env->fpus |= 0x200; /* C1 <-- 1 */
2393 }
2394
2395 if (env->fptags[env->fpstt]) {
2396 env->fpus |= 0x4100; /* Empty */
2397 return;
2398 }
2399
2400 expdif = EXPD(temp);
2401 if (expdif == MAXEXPD) {
2402 if (MANTD(temp) == 0x8000000000000000ULL) {
2403 env->fpus |= 0x500; /* Infinity */
2404 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2405 env->fpus |= 0x100; /* NaN */
2406 }
2407 } else if (expdif == 0) {
2408 if (MANTD(temp) == 0) {
2409 env->fpus |= 0x4000; /* Zero */
2410 } else {
2411 env->fpus |= 0x4400; /* Denormal */
2412 }
2413 } else if (MANTD(temp) & 0x8000000000000000ULL) {
2414 env->fpus |= 0x400;
2415 }
2416 }
2417
do_fstenv(X86Access * ac,target_ulong ptr,int data32)2418 static void do_fstenv(X86Access *ac, target_ulong ptr, int data32)
2419 {
2420 CPUX86State *env = ac->env;
2421 int fpus, fptag, exp, i;
2422 uint64_t mant;
2423 CPU_LDoubleU tmp;
2424
2425 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2426 fptag = 0;
2427 for (i = 7; i >= 0; i--) {
2428 fptag <<= 2;
2429 if (env->fptags[i]) {
2430 fptag |= 3;
2431 } else {
2432 tmp.d = env->fpregs[i].d;
2433 exp = EXPD(tmp);
2434 mant = MANTD(tmp);
2435 if (exp == 0 && mant == 0) {
2436 /* zero */
2437 fptag |= 1;
2438 } else if (exp == 0 || exp == MAXEXPD
2439 || (mant & (1LL << 63)) == 0) {
2440 /* NaNs, infinity, denormal */
2441 fptag |= 2;
2442 }
2443 }
2444 }
2445 if (data32) {
2446 /* 32 bit */
2447 access_stl(ac, ptr, env->fpuc);
2448 access_stl(ac, ptr + 4, fpus);
2449 access_stl(ac, ptr + 8, fptag);
2450 access_stl(ac, ptr + 12, env->fpip); /* fpip */
2451 access_stl(ac, ptr + 16, env->fpcs); /* fpcs */
2452 access_stl(ac, ptr + 20, env->fpdp); /* fpoo */
2453 access_stl(ac, ptr + 24, env->fpds); /* fpos */
2454 } else {
2455 /* 16 bit */
2456 access_stw(ac, ptr, env->fpuc);
2457 access_stw(ac, ptr + 2, fpus);
2458 access_stw(ac, ptr + 4, fptag);
2459 access_stw(ac, ptr + 6, env->fpip);
2460 access_stw(ac, ptr + 8, env->fpcs);
2461 access_stw(ac, ptr + 10, env->fpdp);
2462 access_stw(ac, ptr + 12, env->fpds);
2463 }
2464 }
2465
helper_fstenv(CPUX86State * env,target_ulong ptr,int data32)2466 void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
2467 {
2468 X86Access ac;
2469
2470 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2471 do_fstenv(&ac, ptr, data32);
2472 }
2473
cpu_set_fpus(CPUX86State * env,uint16_t fpus)2474 static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
2475 {
2476 env->fpstt = (fpus >> 11) & 7;
2477 env->fpus = fpus & ~0x3800 & ~FPUS_B;
2478 env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0;
2479 #if !defined(CONFIG_USER_ONLY)
2480 if (!(env->fpus & FPUS_SE)) {
2481 /*
2482 * Here the processor deasserts FERR#; in response, the chipset deasserts
2483 * IGNNE#.
2484 */
2485 cpu_clear_ignne();
2486 }
2487 #endif
2488 }
2489
do_fldenv(X86Access * ac,target_ulong ptr,int data32)2490 static void do_fldenv(X86Access *ac, target_ulong ptr, int data32)
2491 {
2492 int i, fpus, fptag;
2493 CPUX86State *env = ac->env;
2494
2495 cpu_set_fpuc(env, access_ldw(ac, ptr));
2496 fpus = access_ldw(ac, ptr + (2 << data32));
2497 fptag = access_ldw(ac, ptr + (4 << data32));
2498
2499 cpu_set_fpus(env, fpus);
2500 for (i = 0; i < 8; i++) {
2501 env->fptags[i] = ((fptag & 3) == 3);
2502 fptag >>= 2;
2503 }
2504 }
2505
helper_fldenv(CPUX86State * env,target_ulong ptr,int data32)2506 void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
2507 {
2508 X86Access ac;
2509
2510 access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
2511 do_fldenv(&ac, ptr, data32);
2512 }
2513
do_fsave(X86Access * ac,target_ulong ptr,int data32)2514 static void do_fsave(X86Access *ac, target_ulong ptr, int data32)
2515 {
2516 CPUX86State *env = ac->env;
2517
2518 do_fstenv(ac, ptr, data32);
2519 ptr += 14 << data32;
2520
2521 for (int i = 0; i < 8; i++) {
2522 floatx80 tmp = ST(i);
2523 do_fstt(ac, ptr, tmp);
2524 ptr += 10;
2525 }
2526
2527 do_fninit(env);
2528 }
2529
helper_fsave(CPUX86State * env,target_ulong ptr,int data32)2530 void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
2531 {
2532 int size = (14 << data32) + 80;
2533 X86Access ac;
2534
2535 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC());
2536 do_fsave(&ac, ptr, data32);
2537 }
2538
do_frstor(X86Access * ac,target_ulong ptr,int data32)2539 static void do_frstor(X86Access *ac, target_ulong ptr, int data32)
2540 {
2541 CPUX86State *env = ac->env;
2542
2543 do_fldenv(ac, ptr, data32);
2544 ptr += 14 << data32;
2545
2546 for (int i = 0; i < 8; i++) {
2547 floatx80 tmp = do_fldt(ac, ptr);
2548 ST(i) = tmp;
2549 ptr += 10;
2550 }
2551 }
2552
helper_frstor(CPUX86State * env,target_ulong ptr,int data32)2553 void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
2554 {
2555 int size = (14 << data32) + 80;
2556 X86Access ac;
2557
2558 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC());
2559 do_frstor(&ac, ptr, data32);
2560 }
2561
2562 #define XO(X) offsetof(X86XSaveArea, X)
2563
do_xsave_fpu(X86Access * ac,target_ulong ptr)2564 static void do_xsave_fpu(X86Access *ac, target_ulong ptr)
2565 {
2566 CPUX86State *env = ac->env;
2567 int fpus, fptag, i;
2568 target_ulong addr;
2569
2570 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
2571 fptag = 0;
2572 for (i = 0; i < 8; i++) {
2573 fptag |= (env->fptags[i] << i);
2574 }
2575
2576 access_stw(ac, ptr + XO(legacy.fcw), env->fpuc);
2577 access_stw(ac, ptr + XO(legacy.fsw), fpus);
2578 access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff);
2579
2580 /* In 32-bit mode this is eip, sel, dp, sel.
2581 In 64-bit mode this is rip, rdp.
2582 But in either case we don't write actual data, just zeros. */
2583 access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */
2584 access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */
2585
2586 addr = ptr + XO(legacy.fpregs);
2587
2588 for (i = 0; i < 8; i++) {
2589 floatx80 tmp = ST(i);
2590 do_fstt(ac, addr, tmp);
2591 addr += 16;
2592 }
2593 }
2594
do_xsave_mxcsr(X86Access * ac,target_ulong ptr)2595 static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr)
2596 {
2597 CPUX86State *env = ac->env;
2598
2599 update_mxcsr_from_sse_status(env);
2600 access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr);
2601 access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff);
2602 }
2603
do_xsave_sse(X86Access * ac,target_ulong ptr)2604 static void do_xsave_sse(X86Access *ac, target_ulong ptr)
2605 {
2606 CPUX86State *env = ac->env;
2607 int i, nb_xmm_regs;
2608 target_ulong addr;
2609
2610 if (env->hflags & HF_CS64_MASK) {
2611 nb_xmm_regs = 16;
2612 } else {
2613 nb_xmm_regs = 8;
2614 }
2615
2616 addr = ptr + XO(legacy.xmm_regs);
2617 for (i = 0; i < nb_xmm_regs; i++) {
2618 access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0));
2619 access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1));
2620 addr += 16;
2621 }
2622 }
2623
do_xsave_ymmh(X86Access * ac,target_ulong ptr)2624 static void do_xsave_ymmh(X86Access *ac, target_ulong ptr)
2625 {
2626 CPUX86State *env = ac->env;
2627 int i, nb_xmm_regs;
2628
2629 if (env->hflags & HF_CS64_MASK) {
2630 nb_xmm_regs = 16;
2631 } else {
2632 nb_xmm_regs = 8;
2633 }
2634
2635 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
2636 access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2));
2637 access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3));
2638 }
2639 }
2640
do_xsave_bndregs(X86Access * ac,target_ulong ptr)2641 static void do_xsave_bndregs(X86Access *ac, target_ulong ptr)
2642 {
2643 CPUX86State *env = ac->env;
2644 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2645 int i;
2646
2647 for (i = 0; i < 4; i++, addr += 16) {
2648 access_stq(ac, addr, env->bnd_regs[i].lb);
2649 access_stq(ac, addr + 8, env->bnd_regs[i].ub);
2650 }
2651 }
2652
do_xsave_bndcsr(X86Access * ac,target_ulong ptr)2653 static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr)
2654 {
2655 CPUX86State *env = ac->env;
2656
2657 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
2658 env->bndcs_regs.cfgu);
2659 access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
2660 env->bndcs_regs.sts);
2661 }
2662
do_xsave_pkru(X86Access * ac,target_ulong ptr)2663 static void do_xsave_pkru(X86Access *ac, target_ulong ptr)
2664 {
2665 access_stq(ac, ptr, ac->env->pkru);
2666 }
2667
do_fxsave(X86Access * ac,target_ulong ptr)2668 static void do_fxsave(X86Access *ac, target_ulong ptr)
2669 {
2670 CPUX86State *env = ac->env;
2671
2672 do_xsave_fpu(ac, ptr);
2673 if (env->cr[4] & CR4_OSFXSR_MASK) {
2674 do_xsave_mxcsr(ac, ptr);
2675 /* Fast FXSAVE leaves out the XMM registers */
2676 if (!(env->efer & MSR_EFER_FFXSR)
2677 || (env->hflags & HF_CPL_MASK)
2678 || !(env->hflags & HF_LMA_MASK)) {
2679 do_xsave_sse(ac, ptr);
2680 }
2681 }
2682 }
2683
helper_fxsave(CPUX86State * env,target_ulong ptr)2684 void helper_fxsave(CPUX86State *env, target_ulong ptr)
2685 {
2686 uintptr_t ra = GETPC();
2687 X86Access ac;
2688
2689 /* The operand must be 16 byte aligned */
2690 if (ptr & 0xf) {
2691 raise_exception_ra(env, EXCP0D_GPF, ra);
2692 }
2693
2694 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2695 MMU_DATA_STORE, ra);
2696 do_fxsave(&ac, ptr);
2697 }
2698
get_xinuse(CPUX86State * env)2699 static uint64_t get_xinuse(CPUX86State *env)
2700 {
2701 uint64_t inuse = -1;
2702
2703 /* For the most part, we don't track XINUSE. We could calculate it
2704 here for all components, but it's probably less work to simply
2705 indicate in use. That said, the state of BNDREGS is important
2706 enough to track in HFLAGS, so we might as well use that here. */
2707 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
2708 inuse &= ~XSTATE_BNDREGS_MASK;
2709 }
2710 return inuse;
2711 }
2712
do_xsave_access(X86Access * ac,target_ulong ptr,uint64_t rfbm,uint64_t inuse,uint64_t opt)2713 static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm,
2714 uint64_t inuse, uint64_t opt)
2715 {
2716 uint64_t old_bv, new_bv;
2717
2718 if (opt & XSTATE_FP_MASK) {
2719 do_xsave_fpu(ac, ptr);
2720 }
2721 if (rfbm & XSTATE_SSE_MASK) {
2722 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
2723 do_xsave_mxcsr(ac, ptr);
2724 }
2725 if (opt & XSTATE_SSE_MASK) {
2726 do_xsave_sse(ac, ptr);
2727 }
2728 if (opt & XSTATE_YMM_MASK) {
2729 do_xsave_ymmh(ac, ptr + XO(avx_state));
2730 }
2731 if (opt & XSTATE_BNDREGS_MASK) {
2732 do_xsave_bndregs(ac, ptr + XO(bndreg_state));
2733 }
2734 if (opt & XSTATE_BNDCSR_MASK) {
2735 do_xsave_bndcsr(ac, ptr + XO(bndcsr_state));
2736 }
2737 if (opt & XSTATE_PKRU_MASK) {
2738 do_xsave_pkru(ac, ptr + XO(pkru_state));
2739 }
2740
2741 /* Update the XSTATE_BV field. */
2742 old_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
2743 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
2744 access_stq(ac, ptr + XO(header.xstate_bv), new_bv);
2745 }
2746
do_xsave_chk(CPUX86State * env,target_ulong ptr,uintptr_t ra)2747 static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
2748 {
2749 /* The OS must have enabled XSAVE. */
2750 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
2751 raise_exception_ra(env, EXCP06_ILLOP, ra);
2752 }
2753
2754 /* The operand must be 64 byte aligned. */
2755 if (ptr & 63) {
2756 raise_exception_ra(env, EXCP0D_GPF, ra);
2757 }
2758 }
2759
do_xsave(CPUX86State * env,target_ulong ptr,uint64_t rfbm,uint64_t inuse,uint64_t opt,uintptr_t ra)2760 static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
2761 uint64_t inuse, uint64_t opt, uintptr_t ra)
2762 {
2763 X86Access ac;
2764 unsigned size;
2765
2766 do_xsave_chk(env, ptr, ra);
2767
2768 /* Never save anything not enabled by XCR0. */
2769 rfbm &= env->xcr0;
2770 opt &= rfbm;
2771 size = xsave_area_size(opt, false);
2772
2773 access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
2774 do_xsave_access(&ac, ptr, rfbm, inuse, opt);
2775 }
2776
helper_xsave(CPUX86State * env,target_ulong ptr,uint64_t rfbm)2777 void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2778 {
2779 do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC());
2780 }
2781
helper_xsaveopt(CPUX86State * env,target_ulong ptr,uint64_t rfbm)2782 void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
2783 {
2784 uint64_t inuse = get_xinuse(env);
2785 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
2786 }
2787
do_xrstor_fpu(X86Access * ac,target_ulong ptr)2788 static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
2789 {
2790 CPUX86State *env = ac->env;
2791 int i, fpuc, fpus, fptag;
2792 target_ulong addr;
2793
2794 fpuc = access_ldw(ac, ptr + XO(legacy.fcw));
2795 fpus = access_ldw(ac, ptr + XO(legacy.fsw));
2796 fptag = access_ldw(ac, ptr + XO(legacy.ftw));
2797 cpu_set_fpuc(env, fpuc);
2798 cpu_set_fpus(env, fpus);
2799
2800 fptag ^= 0xff;
2801 for (i = 0; i < 8; i++) {
2802 env->fptags[i] = ((fptag >> i) & 1);
2803 }
2804
2805 addr = ptr + XO(legacy.fpregs);
2806
2807 for (i = 0; i < 8; i++) {
2808 floatx80 tmp = do_fldt(ac, addr);
2809 ST(i) = tmp;
2810 addr += 16;
2811 }
2812 }
2813
do_xrstor_mxcsr(X86Access * ac,target_ulong ptr)2814 static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr)
2815 {
2816 CPUX86State *env = ac->env;
2817 cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr)));
2818 }
2819
do_xrstor_sse(X86Access * ac,target_ulong ptr)2820 static void do_xrstor_sse(X86Access *ac, target_ulong ptr)
2821 {
2822 CPUX86State *env = ac->env;
2823 int i, nb_xmm_regs;
2824 target_ulong addr;
2825
2826 if (env->hflags & HF_CS64_MASK) {
2827 nb_xmm_regs = 16;
2828 } else {
2829 nb_xmm_regs = 8;
2830 }
2831
2832 addr = ptr + XO(legacy.xmm_regs);
2833 for (i = 0; i < nb_xmm_regs; i++) {
2834 env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr);
2835 env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8);
2836 addr += 16;
2837 }
2838 }
2839
do_clear_sse(CPUX86State * env)2840 static void do_clear_sse(CPUX86State *env)
2841 {
2842 int i, nb_xmm_regs;
2843
2844 if (env->hflags & HF_CS64_MASK) {
2845 nb_xmm_regs = 16;
2846 } else {
2847 nb_xmm_regs = 8;
2848 }
2849
2850 for (i = 0; i < nb_xmm_regs; i++) {
2851 env->xmm_regs[i].ZMM_Q(0) = 0;
2852 env->xmm_regs[i].ZMM_Q(1) = 0;
2853 }
2854 }
2855
do_xrstor_ymmh(X86Access * ac,target_ulong ptr)2856 static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr)
2857 {
2858 CPUX86State *env = ac->env;
2859 int i, nb_xmm_regs;
2860
2861 if (env->hflags & HF_CS64_MASK) {
2862 nb_xmm_regs = 16;
2863 } else {
2864 nb_xmm_regs = 8;
2865 }
2866
2867 for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
2868 env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr);
2869 env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8);
2870 }
2871 }
2872
do_clear_ymmh(CPUX86State * env)2873 static void do_clear_ymmh(CPUX86State *env)
2874 {
2875 int i, nb_xmm_regs;
2876
2877 if (env->hflags & HF_CS64_MASK) {
2878 nb_xmm_regs = 16;
2879 } else {
2880 nb_xmm_regs = 8;
2881 }
2882
2883 for (i = 0; i < nb_xmm_regs; i++) {
2884 env->xmm_regs[i].ZMM_Q(2) = 0;
2885 env->xmm_regs[i].ZMM_Q(3) = 0;
2886 }
2887 }
2888
do_xrstor_bndregs(X86Access * ac,target_ulong ptr)2889 static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr)
2890 {
2891 CPUX86State *env = ac->env;
2892 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
2893 int i;
2894
2895 for (i = 0; i < 4; i++, addr += 16) {
2896 env->bnd_regs[i].lb = access_ldq(ac, addr);
2897 env->bnd_regs[i].ub = access_ldq(ac, addr + 8);
2898 }
2899 }
2900
do_xrstor_bndcsr(X86Access * ac,target_ulong ptr)2901 static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr)
2902 {
2903 CPUX86State *env = ac->env;
2904
2905 /* FIXME: Extend highest implemented bit of linear address. */
2906 env->bndcs_regs.cfgu
2907 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu));
2908 env->bndcs_regs.sts
2909 = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts));
2910 }
2911
do_xrstor_pkru(X86Access * ac,target_ulong ptr)2912 static void do_xrstor_pkru(X86Access *ac, target_ulong ptr)
2913 {
2914 ac->env->pkru = access_ldq(ac, ptr);
2915 }
2916
do_fxrstor(X86Access * ac,target_ulong ptr)2917 static void do_fxrstor(X86Access *ac, target_ulong ptr)
2918 {
2919 CPUX86State *env = ac->env;
2920
2921 do_xrstor_fpu(ac, ptr);
2922 if (env->cr[4] & CR4_OSFXSR_MASK) {
2923 do_xrstor_mxcsr(ac, ptr);
2924 /* Fast FXRSTOR leaves out the XMM registers */
2925 if (!(env->efer & MSR_EFER_FFXSR)
2926 || (env->hflags & HF_CPL_MASK)
2927 || !(env->hflags & HF_LMA_MASK)) {
2928 do_xrstor_sse(ac, ptr);
2929 }
2930 }
2931 }
2932
helper_fxrstor(CPUX86State * env,target_ulong ptr)2933 void helper_fxrstor(CPUX86State *env, target_ulong ptr)
2934 {
2935 uintptr_t ra = GETPC();
2936 X86Access ac;
2937
2938 /* The operand must be 16 byte aligned */
2939 if (ptr & 0xf) {
2940 raise_exception_ra(env, EXCP0D_GPF, ra);
2941 }
2942
2943 access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
2944 MMU_DATA_LOAD, ra);
2945 do_fxrstor(&ac, ptr);
2946 }
2947
valid_xrstor_header(X86Access * ac,uint64_t * pxsbv,target_ulong ptr)2948 static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv,
2949 target_ulong ptr)
2950 {
2951 uint64_t xstate_bv, xcomp_bv, reserve0;
2952
2953 xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
2954 xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv));
2955 reserve0 = access_ldq(ac, ptr + XO(header.reserve0));
2956 *pxsbv = xstate_bv;
2957
2958 /*
2959 * XCOMP_BV bit 63 indicates compact form, which we do not support,
2960 * and thus must raise #GP. That leaves us in standard form.
2961 * In standard form, bytes 23:8 must be zero -- which is both
2962 * XCOMP_BV and the following 64-bit field.
2963 */
2964 if (xcomp_bv || reserve0) {
2965 return false;
2966 }
2967
2968 /* The XSTATE_BV field must not set bits not present in XCR0. */
2969 return (xstate_bv & ~ac->env->xcr0) == 0;
2970 }
2971
do_xrstor(X86Access * ac,target_ulong ptr,uint64_t rfbm,uint64_t xstate_bv)2972 static void do_xrstor(X86Access *ac, target_ulong ptr,
2973 uint64_t rfbm, uint64_t xstate_bv)
2974 {
2975 CPUX86State *env = ac->env;
2976
2977 if (rfbm & XSTATE_FP_MASK) {
2978 if (xstate_bv & XSTATE_FP_MASK) {
2979 do_xrstor_fpu(ac, ptr);
2980 } else {
2981 do_fninit(env);
2982 memset(env->fpregs, 0, sizeof(env->fpregs));
2983 }
2984 }
2985 if (rfbm & XSTATE_SSE_MASK) {
2986 /* Note that the standard form of XRSTOR loads MXCSR from memory
2987 whether or not the XSTATE_BV bit is set. */
2988 do_xrstor_mxcsr(ac, ptr);
2989 if (xstate_bv & XSTATE_SSE_MASK) {
2990 do_xrstor_sse(ac, ptr);
2991 } else {
2992 do_clear_sse(env);
2993 }
2994 }
2995 if (rfbm & XSTATE_YMM_MASK) {
2996 if (xstate_bv & XSTATE_YMM_MASK) {
2997 do_xrstor_ymmh(ac, ptr + XO(avx_state));
2998 } else {
2999 do_clear_ymmh(env);
3000 }
3001 }
3002 if (rfbm & XSTATE_BNDREGS_MASK) {
3003 if (xstate_bv & XSTATE_BNDREGS_MASK) {
3004 do_xrstor_bndregs(ac, ptr + XO(bndreg_state));
3005 env->hflags |= HF_MPX_IU_MASK;
3006 } else {
3007 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
3008 env->hflags &= ~HF_MPX_IU_MASK;
3009 }
3010 }
3011 if (rfbm & XSTATE_BNDCSR_MASK) {
3012 if (xstate_bv & XSTATE_BNDCSR_MASK) {
3013 do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state));
3014 } else {
3015 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
3016 }
3017 cpu_sync_bndcs_hflags(env);
3018 }
3019 if (rfbm & XSTATE_PKRU_MASK) {
3020 uint64_t old_pkru = env->pkru;
3021 if (xstate_bv & XSTATE_PKRU_MASK) {
3022 do_xrstor_pkru(ac, ptr + XO(pkru_state));
3023 } else {
3024 env->pkru = 0;
3025 }
3026 if (env->pkru != old_pkru) {
3027 CPUState *cs = env_cpu(env);
3028 tlb_flush(cs);
3029 }
3030 }
3031 }
3032
3033 #undef XO
3034
helper_xrstor(CPUX86State * env,target_ulong ptr,uint64_t rfbm)3035 void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
3036 {
3037 uintptr_t ra = GETPC();
3038 X86Access ac;
3039 uint64_t xstate_bv;
3040 unsigned size, size_ext;
3041
3042 do_xsave_chk(env, ptr, ra);
3043
3044 /* Begin with just the minimum size to validate the header. */
3045 size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
3046 access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
3047 if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
3048 raise_exception_ra(env, EXCP0D_GPF, ra);
3049 }
3050
3051 rfbm &= env->xcr0;
3052 size_ext = xsave_area_size(rfbm & xstate_bv, false);
3053 if (size < size_ext) {
3054 /* TODO: See if existing page probe has covered extra size. */
3055 access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
3056 }
3057
3058 do_xrstor(&ac, ptr, rfbm, xstate_bv);
3059 }
3060
3061 #if defined(CONFIG_USER_ONLY)
cpu_x86_fsave(CPUX86State * env,void * host,size_t len)3062 void cpu_x86_fsave(CPUX86State *env, void *host, size_t len)
3063 {
3064 X86Access ac = {
3065 .haddr1 = host,
3066 .size = 4 * 7 + 8 * 10,
3067 .env = env,
3068 };
3069
3070 assert(ac.size <= len);
3071 do_fsave(&ac, 0, true);
3072 }
3073
cpu_x86_frstor(CPUX86State * env,void * host,size_t len)3074 void cpu_x86_frstor(CPUX86State *env, void *host, size_t len)
3075 {
3076 X86Access ac = {
3077 .haddr1 = host,
3078 .size = 4 * 7 + 8 * 10,
3079 .env = env,
3080 };
3081
3082 assert(ac.size <= len);
3083 do_frstor(&ac, 0, true);
3084 }
3085
cpu_x86_fxsave(CPUX86State * env,void * host,size_t len)3086 void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len)
3087 {
3088 X86Access ac = {
3089 .haddr1 = host,
3090 .size = sizeof(X86LegacyXSaveArea),
3091 .env = env,
3092 };
3093
3094 assert(ac.size <= len);
3095 do_fxsave(&ac, 0);
3096 }
3097
cpu_x86_fxrstor(CPUX86State * env,void * host,size_t len)3098 void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len)
3099 {
3100 X86Access ac = {
3101 .haddr1 = host,
3102 .size = sizeof(X86LegacyXSaveArea),
3103 .env = env,
3104 };
3105
3106 assert(ac.size <= len);
3107 do_fxrstor(&ac, 0);
3108 }
3109
cpu_x86_xsave(CPUX86State * env,void * host,size_t len,uint64_t rfbm)3110 void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
3111 {
3112 X86Access ac = {
3113 .haddr1 = host,
3114 .env = env,
3115 };
3116
3117 /*
3118 * Since this is only called from user-level signal handling,
3119 * we should have done the job correctly there.
3120 */
3121 assert((rfbm & ~env->xcr0) == 0);
3122 ac.size = xsave_area_size(rfbm, false);
3123 assert(ac.size <= len);
3124 do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm);
3125 }
3126
cpu_x86_xrstor(CPUX86State * env,void * host,size_t len,uint64_t rfbm)3127 bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
3128 {
3129 X86Access ac = {
3130 .haddr1 = host,
3131 .env = env,
3132 };
3133 uint64_t xstate_bv;
3134
3135 /*
3136 * Since this is only called from user-level signal handling,
3137 * we should have done the job correctly there.
3138 */
3139 assert((rfbm & ~env->xcr0) == 0);
3140 ac.size = xsave_area_size(rfbm, false);
3141 assert(ac.size <= len);
3142
3143 if (!valid_xrstor_header(&ac, &xstate_bv, 0)) {
3144 return false;
3145 }
3146 do_xrstor(&ac, 0, rfbm, xstate_bv);
3147 return true;
3148 }
3149 #endif
3150
helper_xgetbv(CPUX86State * env,uint32_t ecx)3151 uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
3152 {
3153 /* The OS must have enabled XSAVE. */
3154 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
3155 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
3156 }
3157
3158 switch (ecx) {
3159 case 0:
3160 return env->xcr0;
3161 case 1:
3162 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
3163 return env->xcr0 & get_xinuse(env);
3164 }
3165 break;
3166 }
3167 raise_exception_ra(env, EXCP0D_GPF, GETPC());
3168 }
3169
helper_xsetbv(CPUX86State * env,uint32_t ecx,uint64_t mask)3170 void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
3171 {
3172 uint32_t dummy, ena_lo, ena_hi;
3173 uint64_t ena;
3174
3175 /* The OS must have enabled XSAVE. */
3176 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
3177 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
3178 }
3179
3180 /* Only XCR0 is defined at present; the FPU may not be disabled. */
3181 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
3182 goto do_gpf;
3183 }
3184
3185 /* SSE can be disabled, but only if AVX is disabled too. */
3186 if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) {
3187 goto do_gpf;
3188 }
3189
3190 /* Disallow enabling unimplemented features. */
3191 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
3192 ena = ((uint64_t)ena_hi << 32) | ena_lo;
3193 if (mask & ~ena) {
3194 goto do_gpf;
3195 }
3196
3197 /* Disallow enabling only half of MPX. */
3198 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
3199 & XSTATE_BNDCSR_MASK) {
3200 goto do_gpf;
3201 }
3202
3203 env->xcr0 = mask;
3204 cpu_sync_bndcs_hflags(env);
3205 cpu_sync_avx_hflag(env);
3206 return;
3207
3208 do_gpf:
3209 raise_exception_ra(env, EXCP0D_GPF, GETPC());
3210 }
3211
3212 /* MMX/SSE */
3213 /* XXX: optimize by storing fptt and fptags in the static cpu state */
3214
3215 #define SSE_DAZ 0x0040
3216 #define SSE_RC_SHIFT 13
3217 #define SSE_RC_MASK (3 << SSE_RC_SHIFT)
3218 #define SSE_FZ 0x8000
3219
update_mxcsr_status(CPUX86State * env)3220 void update_mxcsr_status(CPUX86State *env)
3221 {
3222 uint32_t mxcsr = env->mxcsr;
3223 int rnd_type;
3224
3225 /* set rounding mode */
3226 rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT;
3227 set_x86_rounding_mode(rnd_type, &env->sse_status);
3228
3229 /* Set exception flags. */
3230 set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) |
3231 (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) |
3232 (mxcsr & FPUS_OE ? float_flag_overflow : 0) |
3233 (mxcsr & FPUS_UE ? float_flag_underflow : 0) |
3234 (mxcsr & FPUS_PE ? float_flag_inexact : 0),
3235 &env->sse_status);
3236
3237 /* set denormals are zero */
3238 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
3239
3240 /* set flush to zero */
3241 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status);
3242 }
3243
update_mxcsr_from_sse_status(CPUX86State * env)3244 void update_mxcsr_from_sse_status(CPUX86State *env)
3245 {
3246 uint8_t flags = get_float_exception_flags(&env->sse_status);
3247 /*
3248 * The MXCSR denormal flag has opposite semantics to
3249 * float_flag_input_denormal (the softfloat code sets that flag
3250 * only when flushing input denormals to zero, but SSE sets it
3251 * only when not flushing them to zero), so is not converted
3252 * here.
3253 */
3254 env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) |
3255 (flags & float_flag_divbyzero ? FPUS_ZE : 0) |
3256 (flags & float_flag_overflow ? FPUS_OE : 0) |
3257 (flags & float_flag_underflow ? FPUS_UE : 0) |
3258 (flags & float_flag_inexact ? FPUS_PE : 0) |
3259 (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE :
3260 0));
3261 }
3262
helper_update_mxcsr(CPUX86State * env)3263 void helper_update_mxcsr(CPUX86State *env)
3264 {
3265 update_mxcsr_from_sse_status(env);
3266 }
3267
helper_ldmxcsr(CPUX86State * env,uint32_t val)3268 void helper_ldmxcsr(CPUX86State *env, uint32_t val)
3269 {
3270 cpu_set_mxcsr(env, val);
3271 }
3272
helper_enter_mmx(CPUX86State * env)3273 void helper_enter_mmx(CPUX86State *env)
3274 {
3275 env->fpstt = 0;
3276 *(uint32_t *)(env->fptags) = 0;
3277 *(uint32_t *)(env->fptags + 4) = 0;
3278 }
3279
helper_emms(CPUX86State * env)3280 void helper_emms(CPUX86State *env)
3281 {
3282 /* set to empty state */
3283 *(uint32_t *)(env->fptags) = 0x01010101;
3284 *(uint32_t *)(env->fptags + 4) = 0x01010101;
3285 }
3286
3287 #define SHIFT 0
3288 #include "ops_sse.h"
3289
3290 #define SHIFT 1
3291 #include "ops_sse.h"
3292
3293 #define SHIFT 2
3294 #include "ops_sse.h"
3295