1 /*- 2 * Copyright (c) 1990 William Jolitz. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Neither the name of the University nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 31 */ 32 33 #include <sys/cdefs.h> 34 #include "opt_cpu.h" 35 #include "opt_isa.h" 36 #include "opt_npx.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/bus.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/malloc.h> 44 #include <sys/module.h> 45 #include <sys/mutex.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/smp.h> 49 #include <sys/sysctl.h> 50 #include <machine/bus.h> 51 #include <sys/rman.h> 52 #ifdef NPX_DEBUG 53 #include <sys/syslog.h> 54 #endif 55 #include <sys/signalvar.h> 56 #include <vm/uma.h> 57 58 #include <machine/asmacros.h> 59 #include <machine/cputypes.h> 60 #include <machine/frame.h> 61 #include <machine/md_var.h> 62 #include <machine/pcb.h> 63 #include <machine/psl.h> 64 #include <machine/resource.h> 65 #include <machine/specialreg.h> 66 #include <machine/segments.h> 67 #include <machine/ucontext.h> 68 #include <x86/ifunc.h> 69 70 #include <machine/intr_machdep.h> 71 72 #ifdef DEV_ISA 73 #include <isa/isavar.h> 74 #endif 75 76 /* 77 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 78 */ 79 80 #define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) 81 #define fnclex() __asm __volatile("fnclex") 82 #define fninit() __asm __volatile("fninit") 83 #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) 84 #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 85 #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) 86 #define fp_divide_by_0() __asm __volatile( \ 87 "fldz; fld1; fdiv %st,%st(1); fnop") 88 #define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) 89 #define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) 90 #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 91 #define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) 92 #define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr))) 93 94 static __inline void 95 xrstor(char *addr, uint64_t mask) 96 { 97 uint32_t low, hi; 98 99 low = mask; 100 hi = mask >> 32; 101 __asm __volatile("xrstor %0" : : "m" (*addr), "a" (low), "d" (hi)); 102 } 103 104 static __inline void 105 xsave(char *addr, uint64_t mask) 106 { 107 uint32_t low, hi; 108 109 low = mask; 110 hi = mask >> 32; 111 __asm __volatile("xsave %0" : "=m" (*addr) : "a" (low), "d" (hi) : 112 "memory"); 113 } 114 115 static __inline void 116 xsaveopt(char *addr, uint64_t mask) 117 { 118 uint32_t low, hi; 119 120 low = mask; 121 hi = mask >> 32; 122 __asm __volatile("xsaveopt %0" : "=m" (*addr) : "a" (low), "d" (hi) : 123 "memory"); 124 } 125 126 #define start_emulating() load_cr0(rcr0() | CR0_TS) 127 #define stop_emulating() clts() 128 129 #define GET_FPU_CW(thread) \ 130 (cpu_fxsr ? \ 131 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ 132 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) 133 #define GET_FPU_SW(thread) \ 134 (cpu_fxsr ? \ 135 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ 136 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) 137 #define SET_FPU_CW(savefpu, value) do { \ 138 if (cpu_fxsr) \ 139 (savefpu)->sv_xmm.sv_env.en_cw = (value); \ 140 else \ 141 (savefpu)->sv_87.sv_env.en_cw = (value); \ 142 } while (0) 143 144 CTASSERT(sizeof(union savefpu) == 512); 145 CTASSERT(sizeof(struct xstate_hdr) == 64); 146 CTASSERT(sizeof(struct savefpu_ymm) == 832); 147 148 /* 149 * This requirement is to make it easier for asm code to calculate 150 * offset of the fpu save area from the pcb address. FPU save area 151 * must be 64-byte aligned. 152 */ 153 CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); 154 155 /* 156 * Ensure the copy of XCR0 saved in a core is contained in the padding 157 * area. 158 */ 159 CTASSERT(X86_XSTATE_XCR0_OFFSET >= offsetof(struct savexmm, sv_pad) && 160 X86_XSTATE_XCR0_OFFSET + sizeof(uint64_t) <= sizeof(struct savexmm)); 161 162 static void fpu_clean_state(void); 163 164 static void fpurstor(union savefpu *); 165 166 int hw_float; 167 168 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, 169 &hw_float, 0, "Floating point instructions executed in hardware"); 170 171 int lazy_fpu_switch = 0; 172 SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, 173 &lazy_fpu_switch, 0, 174 "Lazily load FPU context after context switch"); 175 176 u_int cpu_fxsr; /* SSE enabled */ 177 int use_xsave; 178 uint64_t xsave_mask; 179 static uma_zone_t fpu_save_area_zone; 180 static union savefpu *npx_initialstate; 181 182 static struct xsave_area_elm_descr { 183 u_int offset; 184 u_int size; 185 } *xsave_area_desc; 186 187 static volatile u_int npx_traps_while_probing; 188 189 alias_for_inthand_t probetrap; 190 __asm(" \n\ 191 .text \n\ 192 .p2align 2,0x90 \n\ 193 .type " __XSTRING(CNAME(probetrap)) ",@function \n\ 194 " __XSTRING(CNAME(probetrap)) ": \n\ 195 ss \n\ 196 incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ 197 fnclex \n\ 198 iret \n\ 199 "); 200 201 /* 202 * Determine if an FPU is present and how to use it. 203 */ 204 static int 205 npx_probe(void) 206 { 207 struct gate_descriptor save_idt_npxtrap; 208 u_short control, status; 209 210 /* 211 * Modern CPUs all have an FPU that uses the INT16 interface 212 * and provide a simple way to verify that, so handle the 213 * common case right away. 214 */ 215 if (cpu_feature & CPUID_FPU) { 216 hw_float = 1; 217 return (1); 218 } 219 220 save_idt_npxtrap = idt[IDT_MF]; 221 setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, 222 GSEL(GCODE_SEL, SEL_KPL)); 223 224 /* 225 * Don't trap while we're probing. 226 */ 227 stop_emulating(); 228 229 /* 230 * Finish resetting the coprocessor, if any. If there is an error 231 * pending, then we may get a bogus IRQ13, but npx_intr() will handle 232 * it OK. Bogus halts have never been observed, but we enabled 233 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 234 */ 235 fninit(); 236 237 /* 238 * Don't use fwait here because it might hang. 239 * Don't use fnop here because it usually hangs if there is no FPU. 240 */ 241 DELAY(1000); /* wait for any IRQ13 */ 242 #ifdef DIAGNOSTIC 243 if (npx_traps_while_probing != 0) 244 printf("fninit caused %u bogus npx trap(s)\n", 245 npx_traps_while_probing); 246 #endif 247 /* 248 * Check for a status of mostly zero. 249 */ 250 status = 0x5a5a; 251 fnstsw(&status); 252 if ((status & 0xb8ff) == 0) { 253 /* 254 * Good, now check for a proper control word. 255 */ 256 control = 0x5a5a; 257 fnstcw(&control); 258 if ((control & 0x1f3f) == 0x033f) { 259 /* 260 * We have an npx, now divide by 0 to see if exception 261 * 16 works. 262 */ 263 control &= ~(1 << 2); /* enable divide by 0 trap */ 264 fldcw(control); 265 npx_traps_while_probing = 0; 266 fp_divide_by_0(); 267 if (npx_traps_while_probing != 0) { 268 /* 269 * Good, exception 16 works. 270 */ 271 hw_float = 1; 272 goto cleanup; 273 } 274 printf( 275 "FPU does not use exception 16 for error reporting\n"); 276 goto cleanup; 277 } 278 } 279 280 /* 281 * Probe failed. Floating point simply won't work. 282 * Notify user and disable FPU/MMX/SSE instruction execution. 283 */ 284 printf("WARNING: no FPU!\n"); 285 __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : 286 "n" (CR0_EM | CR0_MP) : "ax"); 287 288 cleanup: 289 idt[IDT_MF] = save_idt_npxtrap; 290 return (hw_float); 291 } 292 293 static void 294 fpusave_xsaveopt(union savefpu *addr) 295 { 296 297 xsaveopt((char *)addr, xsave_mask); 298 } 299 300 static void 301 fpusave_xsave(union savefpu *addr) 302 { 303 304 xsave((char *)addr, xsave_mask); 305 } 306 307 static void 308 fpusave_fxsave(union savefpu *addr) 309 { 310 311 fxsave((char *)addr); 312 } 313 314 static void 315 fpusave_fnsave(union savefpu *addr) 316 { 317 318 fnsave((char *)addr); 319 } 320 321 DEFINE_IFUNC(, void, fpusave, (union savefpu *)) 322 { 323 if (use_xsave) 324 return ((cpu_stdext_feature & CPUID_EXTSTATE_XSAVEOPT) != 0 ? 325 fpusave_xsaveopt : fpusave_xsave); 326 if (cpu_fxsr) 327 return (fpusave_fxsave); 328 return (fpusave_fnsave); 329 } 330 331 /* 332 * Enable XSAVE if supported and allowed by user. 333 * Calculate the xsave_mask. 334 */ 335 static void 336 npxinit_bsp1(void) 337 { 338 u_int cp[4]; 339 uint64_t xsave_mask_user; 340 341 TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); 342 if (!use_xsave) 343 return; 344 cpuid_count(0xd, 0x0, cp); 345 xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 346 if ((cp[0] & xsave_mask) != xsave_mask) 347 panic("CPU0 does not support X87 or SSE: %x", cp[0]); 348 xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; 349 xsave_mask_user = xsave_mask; 350 TUNABLE_QUAD_FETCH("hw.xsave_mask", &xsave_mask_user); 351 xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 352 xsave_mask &= xsave_mask_user; 353 if ((xsave_mask & XFEATURE_AVX512) != XFEATURE_AVX512) 354 xsave_mask &= ~XFEATURE_AVX512; 355 if ((xsave_mask & XFEATURE_MPX) != XFEATURE_MPX) 356 xsave_mask &= ~XFEATURE_MPX; 357 } 358 359 /* 360 * Calculate the fpu save area size. 361 */ 362 static void 363 npxinit_bsp2(void) 364 { 365 u_int cp[4]; 366 367 if (use_xsave) { 368 cpuid_count(0xd, 0x0, cp); 369 cpu_max_ext_state_size = cp[1]; 370 371 /* 372 * Reload the cpu_feature2, since we enabled OSXSAVE. 373 */ 374 do_cpuid(1, cp); 375 cpu_feature2 = cp[2]; 376 } else 377 cpu_max_ext_state_size = sizeof(union savefpu); 378 } 379 380 /* 381 * Initialize floating point unit. 382 */ 383 void 384 npxinit(bool bsp) 385 { 386 static union savefpu dummy; 387 register_t saveintr; 388 u_int mxcsr; 389 u_short control; 390 391 if (bsp) { 392 if (!npx_probe()) 393 return; 394 npxinit_bsp1(); 395 } 396 397 if (use_xsave) { 398 load_cr4(rcr4() | CR4_XSAVE); 399 load_xcr(XCR0, xsave_mask); 400 } 401 402 /* 403 * XCR0 shall be set up before CPU can report the save area size. 404 */ 405 if (bsp) 406 npxinit_bsp2(); 407 408 /* 409 * fninit has the same h/w bugs as fnsave. Use the detoxified 410 * fnsave to throw away any junk in the fpu. fpusave() initializes 411 * the fpu. 412 * 413 * It is too early for critical_enter() to work on AP. 414 */ 415 saveintr = intr_disable(); 416 stop_emulating(); 417 if (cpu_fxsr) 418 fninit(); 419 else 420 fnsave(&dummy); 421 control = __INITIAL_NPXCW__; 422 fldcw(control); 423 if (cpu_fxsr) { 424 mxcsr = __INITIAL_MXCSR__; 425 ldmxcsr(mxcsr); 426 } 427 start_emulating(); 428 intr_restore(saveintr); 429 } 430 431 /* 432 * On the boot CPU we generate a clean state that is used to 433 * initialize the floating point unit when it is first used by a 434 * process. 435 */ 436 static void 437 npxinitstate(void *arg __unused) 438 { 439 uint64_t *xstate_bv; 440 register_t saveintr; 441 int cp[4], i, max_ext_n; 442 443 if (!hw_float) 444 return; 445 446 /* Do potentially blocking operations before disabling interrupts. */ 447 fpu_save_area_zone = uma_zcreate("FPU_save_area", 448 cpu_max_ext_state_size, NULL, NULL, NULL, NULL, 449 XSAVE_AREA_ALIGN - 1, 0); 450 npx_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO); 451 if (use_xsave) { 452 if (xsave_mask >> 32 != 0) 453 max_ext_n = fls(xsave_mask >> 32) + 32; 454 else 455 max_ext_n = fls(xsave_mask); 456 xsave_area_desc = malloc(max_ext_n * sizeof(struct 457 xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); 458 } 459 460 saveintr = intr_disable(); 461 stop_emulating(); 462 463 if (cpu_fxsr) 464 fpusave_fxsave(npx_initialstate); 465 else 466 fpusave_fnsave(npx_initialstate); 467 if (cpu_fxsr) { 468 if (npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask) 469 cpu_mxcsr_mask = 470 npx_initialstate->sv_xmm.sv_env.en_mxcsr_mask; 471 else 472 cpu_mxcsr_mask = 0xFFBF; 473 474 /* 475 * The fninit instruction does not modify XMM 476 * registers or x87 registers (MM/ST). The fpusave 477 * call dumped the garbage contained in the registers 478 * after reset to the initial state saved. Clear XMM 479 * and x87 registers file image to make the startup 480 * program state and signal handler XMM/x87 register 481 * content predictable. 482 */ 483 bzero(npx_initialstate->sv_xmm.sv_fp, 484 sizeof(npx_initialstate->sv_xmm.sv_fp)); 485 bzero(npx_initialstate->sv_xmm.sv_xmm, 486 sizeof(npx_initialstate->sv_xmm.sv_xmm)); 487 488 } else 489 bzero(npx_initialstate->sv_87.sv_ac, 490 sizeof(npx_initialstate->sv_87.sv_ac)); 491 492 /* 493 * Create a table describing the layout of the CPU Extended 494 * Save Area. See Intel SDM rev. 075 Vol. 1 13.4.1 "Legacy 495 * Region of an XSAVE Area" for the source of offsets/sizes. 496 * Note that 32bit XSAVE does not use %xmm8-%xmm15, see 497 * 10.5.1.2 and 13.5.2 "SSE State". 498 */ 499 if (use_xsave) { 500 xstate_bv = (uint64_t *)((char *)(npx_initialstate + 1) + 501 offsetof(struct xstate_hdr, xstate_bv)); 502 *xstate_bv = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 503 504 /* x87 state */ 505 xsave_area_desc[0].offset = 0; 506 xsave_area_desc[0].size = 160; 507 /* XMM */ 508 xsave_area_desc[1].offset = 160; 509 xsave_area_desc[1].size = 288 - 160; 510 511 for (i = 2; i < max_ext_n; i++) { 512 cpuid_count(0xd, i, cp); 513 xsave_area_desc[i].offset = cp[1]; 514 xsave_area_desc[i].size = cp[0]; 515 } 516 } 517 518 start_emulating(); 519 intr_restore(saveintr); 520 } 521 SYSINIT(npxinitstate, SI_SUB_CPU, SI_ORDER_ANY, npxinitstate, NULL); 522 523 /* 524 * Free coprocessor (if we have it). 525 */ 526 void 527 npxexit(struct thread *td) 528 { 529 530 critical_enter(); 531 if (curthread == PCPU_GET(fpcurthread)) { 532 stop_emulating(); 533 fpusave(curpcb->pcb_save); 534 start_emulating(); 535 PCPU_SET(fpcurthread, NULL); 536 } 537 critical_exit(); 538 #ifdef NPX_DEBUG 539 if (hw_float) { 540 u_int masked_exceptions; 541 542 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; 543 /* 544 * Log exceptions that would have trapped with the old 545 * control word (overflow, divide by 0, and invalid operand). 546 */ 547 if (masked_exceptions & 0x0d) 548 log(LOG_ERR, 549 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", 550 td->td_proc->p_pid, td->td_proc->p_comm, 551 masked_exceptions); 552 } 553 #endif 554 } 555 556 int 557 npxformat(void) 558 { 559 560 if (!hw_float) 561 return (_MC_FPFMT_NODEV); 562 if (cpu_fxsr) 563 return (_MC_FPFMT_XMM); 564 return (_MC_FPFMT_387); 565 } 566 567 /* 568 * The following mechanism is used to ensure that the FPE_... value 569 * that is passed as a trapcode to the signal handler of the user 570 * process does not have more than one bit set. 571 * 572 * Multiple bits may be set if the user process modifies the control 573 * word while a status word bit is already set. While this is a sign 574 * of bad coding, we have no choice than to narrow them down to one 575 * bit, since we must not send a trapcode that is not exactly one of 576 * the FPE_ macros. 577 * 578 * The mechanism has a static table with 127 entries. Each combination 579 * of the 7 FPU status word exception bits directly translates to a 580 * position in this table, where a single FPE_... value is stored. 581 * This FPE_... value stored there is considered the "most important" 582 * of the exception bits and will be sent as the signal code. The 583 * precedence of the bits is based upon Intel Document "Numerical 584 * Applications", Chapter "Special Computational Situations". 585 * 586 * The macro to choose one of these values does these steps: 1) Throw 587 * away status word bits that cannot be masked. 2) Throw away the bits 588 * currently masked in the control word, assuming the user isn't 589 * interested in them anymore. 3) Reinsert status word bit 7 (stack 590 * fault) if it is set, which cannot be masked but must be presered. 591 * 4) Use the remaining bits to point into the trapcode table. 592 * 593 * The 6 maskable bits in order of their preference, as stated in the 594 * above referenced Intel manual: 595 * 1 Invalid operation (FP_X_INV) 596 * 1a Stack underflow 597 * 1b Stack overflow 598 * 1c Operand of unsupported format 599 * 1d SNaN operand. 600 * 2 QNaN operand (not an exception, irrelavant here) 601 * 3 Any other invalid-operation not mentioned above or zero divide 602 * (FP_X_INV, FP_X_DZ) 603 * 4 Denormal operand (FP_X_DNML) 604 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 605 * 6 Inexact result (FP_X_IMP) 606 */ 607 static char fpetable[128] = { 608 0, 609 FPE_FLTINV, /* 1 - INV */ 610 FPE_FLTUND, /* 2 - DNML */ 611 FPE_FLTINV, /* 3 - INV | DNML */ 612 FPE_FLTDIV, /* 4 - DZ */ 613 FPE_FLTINV, /* 5 - INV | DZ */ 614 FPE_FLTDIV, /* 6 - DNML | DZ */ 615 FPE_FLTINV, /* 7 - INV | DNML | DZ */ 616 FPE_FLTOVF, /* 8 - OFL */ 617 FPE_FLTINV, /* 9 - INV | OFL */ 618 FPE_FLTUND, /* A - DNML | OFL */ 619 FPE_FLTINV, /* B - INV | DNML | OFL */ 620 FPE_FLTDIV, /* C - DZ | OFL */ 621 FPE_FLTINV, /* D - INV | DZ | OFL */ 622 FPE_FLTDIV, /* E - DNML | DZ | OFL */ 623 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 624 FPE_FLTUND, /* 10 - UFL */ 625 FPE_FLTINV, /* 11 - INV | UFL */ 626 FPE_FLTUND, /* 12 - DNML | UFL */ 627 FPE_FLTINV, /* 13 - INV | DNML | UFL */ 628 FPE_FLTDIV, /* 14 - DZ | UFL */ 629 FPE_FLTINV, /* 15 - INV | DZ | UFL */ 630 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 631 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 632 FPE_FLTOVF, /* 18 - OFL | UFL */ 633 FPE_FLTINV, /* 19 - INV | OFL | UFL */ 634 FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 635 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 636 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 637 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 638 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 639 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 640 FPE_FLTRES, /* 20 - IMP */ 641 FPE_FLTINV, /* 21 - INV | IMP */ 642 FPE_FLTUND, /* 22 - DNML | IMP */ 643 FPE_FLTINV, /* 23 - INV | DNML | IMP */ 644 FPE_FLTDIV, /* 24 - DZ | IMP */ 645 FPE_FLTINV, /* 25 - INV | DZ | IMP */ 646 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 647 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 648 FPE_FLTOVF, /* 28 - OFL | IMP */ 649 FPE_FLTINV, /* 29 - INV | OFL | IMP */ 650 FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 651 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 652 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 653 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 654 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 655 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 656 FPE_FLTUND, /* 30 - UFL | IMP */ 657 FPE_FLTINV, /* 31 - INV | UFL | IMP */ 658 FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 659 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 660 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 661 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 662 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 663 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 664 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 665 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 666 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 667 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 668 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 669 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 670 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 671 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 672 FPE_FLTSUB, /* 40 - STK */ 673 FPE_FLTSUB, /* 41 - INV | STK */ 674 FPE_FLTUND, /* 42 - DNML | STK */ 675 FPE_FLTSUB, /* 43 - INV | DNML | STK */ 676 FPE_FLTDIV, /* 44 - DZ | STK */ 677 FPE_FLTSUB, /* 45 - INV | DZ | STK */ 678 FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 679 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 680 FPE_FLTOVF, /* 48 - OFL | STK */ 681 FPE_FLTSUB, /* 49 - INV | OFL | STK */ 682 FPE_FLTUND, /* 4A - DNML | OFL | STK */ 683 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 684 FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 685 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 686 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 687 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 688 FPE_FLTUND, /* 50 - UFL | STK */ 689 FPE_FLTSUB, /* 51 - INV | UFL | STK */ 690 FPE_FLTUND, /* 52 - DNML | UFL | STK */ 691 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 692 FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 693 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 694 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 695 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 696 FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 697 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 698 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 699 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 700 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 701 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 702 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 703 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 704 FPE_FLTRES, /* 60 - IMP | STK */ 705 FPE_FLTSUB, /* 61 - INV | IMP | STK */ 706 FPE_FLTUND, /* 62 - DNML | IMP | STK */ 707 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 708 FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 709 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 710 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 711 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 712 FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 713 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 714 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 715 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 716 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 717 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 718 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 719 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 720 FPE_FLTUND, /* 70 - UFL | IMP | STK */ 721 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 722 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 723 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 724 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 725 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 726 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 727 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 728 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 729 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 730 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 731 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 732 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 733 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 734 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 735 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 736 }; 737 738 /* 739 * Read the FP status and control words, then generate si_code value 740 * for SIGFPE. The error code chosen will be one of the 741 * FPE_... macros. It will be sent as the second argument to old 742 * BSD-style signal handlers and as "siginfo_t->si_code" (second 743 * argument) to SA_SIGINFO signal handlers. 744 * 745 * Some time ago, we cleared the x87 exceptions with FNCLEX there. 746 * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The 747 * usermode code which understands the FPU hardware enough to enable 748 * the exceptions, can also handle clearing the exception state in the 749 * handler. The only consequence of not clearing the exception is the 750 * rethrow of the SIGFPE on return from the signal handler and 751 * reexecution of the corresponding instruction. 752 * 753 * For XMM traps, the exceptions were never cleared. 754 */ 755 int 756 npxtrap_x87(void) 757 { 758 u_short control, status; 759 760 if (!hw_float) { 761 printf( 762 "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n", 763 PCPU_GET(fpcurthread), curthread, hw_float); 764 panic("npxtrap from nowhere"); 765 } 766 critical_enter(); 767 768 /* 769 * Interrupt handling (for another interrupt) may have pushed the 770 * state to memory. Fetch the relevant parts of the state from 771 * wherever they are. 772 */ 773 if (PCPU_GET(fpcurthread) != curthread) { 774 control = GET_FPU_CW(curthread); 775 status = GET_FPU_SW(curthread); 776 } else { 777 fnstcw(&control); 778 fnstsw(&status); 779 } 780 critical_exit(); 781 return (fpetable[status & ((~control & 0x3f) | 0x40)]); 782 } 783 784 int 785 npxtrap_sse(void) 786 { 787 u_int mxcsr; 788 789 if (!hw_float) { 790 printf( 791 "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n", 792 PCPU_GET(fpcurthread), curthread, hw_float); 793 panic("npxtrap from nowhere"); 794 } 795 critical_enter(); 796 if (PCPU_GET(fpcurthread) != curthread) 797 mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr; 798 else 799 stmxcsr(&mxcsr); 800 critical_exit(); 801 return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); 802 } 803 804 static void 805 restore_npx_curthread(struct thread *td, struct pcb *pcb) 806 { 807 808 /* 809 * Record new context early in case frstor causes a trap. 810 */ 811 PCPU_SET(fpcurthread, td); 812 813 stop_emulating(); 814 if (cpu_fxsr) 815 fpu_clean_state(); 816 817 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 818 /* 819 * This is the first time this thread has used the FPU or 820 * the PCB doesn't contain a clean FPU state. Explicitly 821 * load an initial state. 822 * 823 * We prefer to restore the state from the actual save 824 * area in PCB instead of directly loading from 825 * npx_initialstate, to ignite the XSAVEOPT 826 * tracking engine. 827 */ 828 bcopy(npx_initialstate, pcb->pcb_save, cpu_max_ext_state_size); 829 fpurstor(pcb->pcb_save); 830 if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) 831 fldcw(pcb->pcb_initial_npxcw); 832 pcb->pcb_flags |= PCB_NPXINITDONE; 833 if (PCB_USER_FPU(pcb)) 834 pcb->pcb_flags |= PCB_NPXUSERINITDONE; 835 } else { 836 fpurstor(pcb->pcb_save); 837 } 838 } 839 840 /* 841 * Implement device not available (DNA) exception 842 * 843 * It would be better to switch FP context here (if curthread != fpcurthread) 844 * and not necessarily for every context switch, but it is too hard to 845 * access foreign pcb's. 846 */ 847 int 848 npxdna(void) 849 { 850 struct thread *td; 851 852 if (!hw_float) 853 return (0); 854 td = curthread; 855 critical_enter(); 856 857 KASSERT((curpcb->pcb_flags & PCB_NPXNOSAVE) == 0, 858 ("npxdna while in fpu_kern_enter(FPU_KERN_NOCTX)")); 859 if (__predict_false(PCPU_GET(fpcurthread) == td)) { 860 /* 861 * Some virtual machines seems to set %cr0.TS at 862 * arbitrary moments. Silently clear the TS bit 863 * regardless of the eager/lazy FPU context switch 864 * mode. 865 */ 866 stop_emulating(); 867 } else { 868 if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { 869 printf( 870 "npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", 871 PCPU_GET(fpcurthread), 872 PCPU_GET(fpcurthread)->td_proc->p_pid, 873 td, td->td_proc->p_pid); 874 panic("npxdna"); 875 } 876 restore_npx_curthread(td, td->td_pcb); 877 } 878 critical_exit(); 879 return (1); 880 } 881 882 /* 883 * Wrapper for fpusave() called from context switch routines. 884 * 885 * npxsave() must be called with interrupts disabled, so that it clears 886 * fpcurthread atomically with saving the state. We require callers to do the 887 * disabling, since most callers need to disable interrupts anyway to call 888 * npxsave() atomically with checking fpcurthread. 889 */ 890 void 891 npxsave(union savefpu *addr) 892 { 893 894 stop_emulating(); 895 fpusave(addr); 896 } 897 898 void npxswitch(struct thread *td, struct pcb *pcb); 899 void 900 npxswitch(struct thread *td, struct pcb *pcb) 901 { 902 903 if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || 904 !PCB_USER_FPU(pcb)) { 905 start_emulating(); 906 PCPU_SET(fpcurthread, NULL); 907 } else if (PCPU_GET(fpcurthread) != td) { 908 restore_npx_curthread(td, pcb); 909 } 910 } 911 912 /* 913 * Unconditionally save the current co-processor state across suspend and 914 * resume. 915 */ 916 void 917 npxsuspend(union savefpu *addr) 918 { 919 register_t cr0; 920 921 if (!hw_float) 922 return; 923 if (PCPU_GET(fpcurthread) == NULL) { 924 bcopy(npx_initialstate, addr, cpu_max_ext_state_size); 925 return; 926 } 927 cr0 = rcr0(); 928 stop_emulating(); 929 fpusave(addr); 930 load_cr0(cr0); 931 } 932 933 void 934 npxresume(union savefpu *addr) 935 { 936 register_t cr0; 937 938 if (!hw_float) 939 return; 940 941 cr0 = rcr0(); 942 npxinit(false); 943 stop_emulating(); 944 fpurstor(addr); 945 load_cr0(cr0); 946 } 947 948 void 949 npxdrop(void) 950 { 951 struct thread *td; 952 953 /* 954 * Discard pending exceptions in the !cpu_fxsr case so that unmasked 955 * ones don't cause a panic on the next frstor. 956 */ 957 if (!cpu_fxsr) 958 fnclex(); 959 960 td = PCPU_GET(fpcurthread); 961 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); 962 CRITICAL_ASSERT(td); 963 PCPU_SET(fpcurthread, NULL); 964 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; 965 start_emulating(); 966 } 967 968 /* 969 * Get the user state of the FPU into pcb->pcb_user_save without 970 * dropping ownership (if possible). It returns the FPU ownership 971 * status. 972 */ 973 int 974 npxgetregs(struct thread *td) 975 { 976 struct pcb *pcb; 977 uint64_t *xstate_bv, bit; 978 char *sa; 979 int max_ext_n, i; 980 int owned; 981 982 if (!hw_float) 983 return (_MC_FPOWNED_NONE); 984 985 pcb = td->td_pcb; 986 critical_enter(); 987 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 988 bcopy(npx_initialstate, get_pcb_user_save_pcb(pcb), 989 cpu_max_ext_state_size); 990 SET_FPU_CW(get_pcb_user_save_pcb(pcb), pcb->pcb_initial_npxcw); 991 npxuserinited(td); 992 critical_exit(); 993 return (_MC_FPOWNED_PCB); 994 } 995 if (td == PCPU_GET(fpcurthread)) { 996 fpusave(get_pcb_user_save_pcb(pcb)); 997 if (!cpu_fxsr) 998 /* 999 * fnsave initializes the FPU and destroys whatever 1000 * context it contains. Make sure the FPU owner 1001 * starts with a clean state next time. 1002 */ 1003 npxdrop(); 1004 owned = _MC_FPOWNED_FPU; 1005 } else { 1006 owned = _MC_FPOWNED_PCB; 1007 } 1008 if (use_xsave) { 1009 /* 1010 * Handle partially saved state. 1011 */ 1012 sa = (char *)get_pcb_user_save_pcb(pcb); 1013 xstate_bv = (uint64_t *)(sa + sizeof(union savefpu) + 1014 offsetof(struct xstate_hdr, xstate_bv)); 1015 if (xsave_mask >> 32 != 0) 1016 max_ext_n = fls(xsave_mask >> 32) + 32; 1017 else 1018 max_ext_n = fls(xsave_mask); 1019 for (i = 0; i < max_ext_n; i++) { 1020 bit = 1ULL << i; 1021 if ((xsave_mask & bit) == 0 || (*xstate_bv & bit) != 0) 1022 continue; 1023 bcopy((char *)npx_initialstate + 1024 xsave_area_desc[i].offset, 1025 sa + xsave_area_desc[i].offset, 1026 xsave_area_desc[i].size); 1027 *xstate_bv |= bit; 1028 } 1029 } 1030 critical_exit(); 1031 return (owned); 1032 } 1033 1034 void 1035 npxuserinited(struct thread *td) 1036 { 1037 struct pcb *pcb; 1038 1039 CRITICAL_ASSERT(td); 1040 pcb = td->td_pcb; 1041 if (PCB_USER_FPU(pcb)) 1042 pcb->pcb_flags |= PCB_NPXINITDONE; 1043 pcb->pcb_flags |= PCB_NPXUSERINITDONE; 1044 } 1045 1046 int 1047 npxsetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) 1048 { 1049 struct xstate_hdr *hdr, *ehdr; 1050 size_t len, max_len; 1051 uint64_t bv; 1052 1053 /* XXXKIB should we clear all extended state in xstate_bv instead ? */ 1054 if (xfpustate == NULL) 1055 return (0); 1056 if (!use_xsave) 1057 return (EOPNOTSUPP); 1058 1059 len = xfpustate_size; 1060 if (len < sizeof(struct xstate_hdr)) 1061 return (EINVAL); 1062 max_len = cpu_max_ext_state_size - sizeof(union savefpu); 1063 if (len > max_len) 1064 return (EINVAL); 1065 1066 ehdr = (struct xstate_hdr *)xfpustate; 1067 bv = ehdr->xstate_bv; 1068 1069 /* 1070 * Avoid #gp. 1071 */ 1072 if (bv & ~xsave_mask) 1073 return (EINVAL); 1074 1075 hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); 1076 1077 hdr->xstate_bv = bv; 1078 bcopy(xfpustate + sizeof(struct xstate_hdr), 1079 (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); 1080 1081 return (0); 1082 } 1083 1084 int 1085 npxsetregs(struct thread *td, union savefpu *addr, char *xfpustate, 1086 size_t xfpustate_size) 1087 { 1088 struct pcb *pcb; 1089 int error; 1090 1091 if (!hw_float) 1092 return (ENXIO); 1093 1094 if (cpu_fxsr) 1095 addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; 1096 pcb = td->td_pcb; 1097 error = 0; 1098 critical_enter(); 1099 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 1100 error = npxsetxstate(td, xfpustate, xfpustate_size); 1101 if (error == 0) { 1102 if (!cpu_fxsr) 1103 fnclex(); /* As in npxdrop(). */ 1104 bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 1105 fpurstor(get_pcb_user_save_td(td)); 1106 pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; 1107 } 1108 } else { 1109 error = npxsetxstate(td, xfpustate, xfpustate_size); 1110 if (error == 0) { 1111 bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 1112 npxuserinited(td); 1113 } 1114 } 1115 critical_exit(); 1116 return (error); 1117 } 1118 1119 static void 1120 npx_fill_fpregs_xmm1(struct savexmm *sv_xmm, struct save87 *sv_87) 1121 { 1122 struct env87 *penv_87; 1123 struct envxmm *penv_xmm; 1124 struct fpacc87 *fx_reg; 1125 int i, st; 1126 uint64_t mantissa; 1127 uint16_t tw, exp; 1128 uint8_t ab_tw; 1129 1130 penv_87 = &sv_87->sv_env; 1131 penv_xmm = &sv_xmm->sv_env; 1132 1133 /* FPU control/status */ 1134 penv_87->en_cw = penv_xmm->en_cw; 1135 penv_87->en_sw = penv_xmm->en_sw; 1136 penv_87->en_fip = penv_xmm->en_fip; 1137 penv_87->en_fcs = penv_xmm->en_fcs; 1138 penv_87->en_opcode = penv_xmm->en_opcode; 1139 penv_87->en_foo = penv_xmm->en_foo; 1140 penv_87->en_fos = penv_xmm->en_fos; 1141 1142 /* 1143 * FPU registers and tags. 1144 * For ST(i), i = fpu_reg - top; we start with fpu_reg=7. 1145 */ 1146 st = 7 - ((penv_xmm->en_sw >> 11) & 7); 1147 ab_tw = penv_xmm->en_tw; 1148 tw = 0; 1149 for (i = 0x80; i != 0; i >>= 1) { 1150 sv_87->sv_ac[st] = sv_xmm->sv_fp[st].fp_acc; 1151 tw <<= 2; 1152 if (ab_tw & i) { 1153 /* Non-empty - we need to check ST(i) */ 1154 fx_reg = &sv_xmm->sv_fp[st].fp_acc; 1155 /* The first 64 bits contain the mantissa. */ 1156 mantissa = *((uint64_t *)fx_reg->fp_bytes); 1157 /* 1158 * The final 16 bits contain the sign bit and the exponent. 1159 * Mask the sign bit since it is of no consequence to these 1160 * tests. 1161 */ 1162 exp = *((uint16_t *)&fx_reg->fp_bytes[8]) & 0x7fff; 1163 if (exp == 0) { 1164 if (mantissa == 0) 1165 tw |= 1; /* Zero */ 1166 else 1167 tw |= 2; /* Denormal */ 1168 } else if (exp == 0x7fff) 1169 tw |= 2; /* Infinity or NaN */ 1170 } else 1171 tw |= 3; /* Empty */ 1172 st = (st - 1) & 7; 1173 } 1174 penv_87->en_tw = tw; 1175 } 1176 1177 void 1178 npx_fill_fpregs_xmm(struct savexmm *sv_xmm, struct save87 *sv_87) 1179 { 1180 1181 bzero(sv_87, sizeof(*sv_87)); 1182 npx_fill_fpregs_xmm1(sv_xmm, sv_87); 1183 } 1184 1185 void 1186 npx_set_fpregs_xmm(struct save87 *sv_87, struct savexmm *sv_xmm) 1187 { 1188 struct env87 *penv_87; 1189 struct envxmm *penv_xmm; 1190 int i; 1191 1192 penv_87 = &sv_87->sv_env; 1193 penv_xmm = &sv_xmm->sv_env; 1194 1195 /* FPU control/status */ 1196 penv_xmm->en_cw = penv_87->en_cw; 1197 penv_xmm->en_sw = penv_87->en_sw; 1198 penv_xmm->en_fip = penv_87->en_fip; 1199 penv_xmm->en_fcs = penv_87->en_fcs; 1200 penv_xmm->en_opcode = penv_87->en_opcode; 1201 penv_xmm->en_foo = penv_87->en_foo; 1202 penv_xmm->en_fos = penv_87->en_fos; 1203 1204 /* 1205 * FPU registers and tags. 1206 * Abridged / Full translation (values in binary), see FXSAVE spec. 1207 * 0 11 1208 * 1 00, 01, 10 1209 */ 1210 penv_xmm->en_tw = 0; 1211 for (i = 0; i < 8; ++i) { 1212 sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; 1213 if ((penv_87->en_tw & (3 << i * 2)) != (3 << i * 2)) 1214 penv_xmm->en_tw |= 1 << i; 1215 } 1216 } 1217 1218 void 1219 npx_get_fsave(void *addr) 1220 { 1221 struct thread *td; 1222 union savefpu *sv; 1223 1224 td = curthread; 1225 npxgetregs(td); 1226 sv = get_pcb_user_save_td(td); 1227 if (cpu_fxsr) 1228 npx_fill_fpregs_xmm1(&sv->sv_xmm, addr); 1229 else 1230 bcopy(sv, addr, sizeof(struct env87) + 1231 sizeof(struct fpacc87[8])); 1232 } 1233 1234 int 1235 npx_set_fsave(void *addr) 1236 { 1237 union savefpu sv; 1238 int error; 1239 1240 bzero(&sv, sizeof(sv)); 1241 if (cpu_fxsr) 1242 npx_set_fpregs_xmm(addr, &sv.sv_xmm); 1243 else 1244 bcopy(addr, &sv, sizeof(struct env87) + 1245 sizeof(struct fpacc87[8])); 1246 error = npxsetregs(curthread, &sv, NULL, 0); 1247 return (error); 1248 } 1249 1250 /* 1251 * On AuthenticAMD processors, the fxrstor instruction does not restore 1252 * the x87's stored last instruction pointer, last data pointer, and last 1253 * opcode values, except in the rare case in which the exception summary 1254 * (ES) bit in the x87 status word is set to 1. 1255 * 1256 * In order to avoid leaking this information across processes, we clean 1257 * these values by performing a dummy load before executing fxrstor(). 1258 */ 1259 static void 1260 fpu_clean_state(void) 1261 { 1262 static float dummy_variable = 0.0; 1263 u_short status; 1264 1265 /* 1266 * Clear the ES bit in the x87 status word if it is currently 1267 * set, in order to avoid causing a fault in the upcoming load. 1268 */ 1269 fnstsw(&status); 1270 if (status & 0x80) 1271 fnclex(); 1272 1273 /* 1274 * Load the dummy variable into the x87 stack. This mangles 1275 * the x87 stack, but we don't care since we're about to call 1276 * fxrstor() anyway. 1277 */ 1278 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); 1279 } 1280 1281 static void 1282 fpurstor(union savefpu *addr) 1283 { 1284 1285 if (use_xsave) 1286 xrstor((char *)addr, xsave_mask); 1287 else if (cpu_fxsr) 1288 fxrstor(addr); 1289 else 1290 frstor(addr); 1291 } 1292 1293 #ifdef DEV_ISA 1294 /* 1295 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 1296 */ 1297 static struct isa_pnp_id npxisa_ids[] = { 1298 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 1299 { 0 } 1300 }; 1301 1302 static int 1303 npxisa_probe(device_t dev) 1304 { 1305 int result; 1306 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { 1307 device_quiet(dev); 1308 } 1309 return(result); 1310 } 1311 1312 static int 1313 npxisa_attach(device_t dev) 1314 { 1315 return (0); 1316 } 1317 1318 static device_method_t npxisa_methods[] = { 1319 /* Device interface */ 1320 DEVMETHOD(device_probe, npxisa_probe), 1321 DEVMETHOD(device_attach, npxisa_attach), 1322 DEVMETHOD(device_detach, bus_generic_detach), 1323 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1324 DEVMETHOD(device_suspend, bus_generic_suspend), 1325 DEVMETHOD(device_resume, bus_generic_resume), 1326 { 0, 0 } 1327 }; 1328 1329 static driver_t npxisa_driver = { 1330 "npxisa", 1331 npxisa_methods, 1332 1, /* no softc */ 1333 }; 1334 1335 DRIVER_MODULE(npxisa, isa, npxisa_driver, 0, 0); 1336 DRIVER_MODULE(npxisa, acpi, npxisa_driver, 0, 0); 1337 ISA_PNP_INFO(npxisa_ids); 1338 #endif /* DEV_ISA */ 1339 1340 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", 1341 "Kernel contexts for FPU state"); 1342 1343 #define FPU_KERN_CTX_NPXINITDONE 0x01 1344 #define FPU_KERN_CTX_DUMMY 0x02 1345 #define FPU_KERN_CTX_INUSE 0x04 1346 1347 struct fpu_kern_ctx { 1348 union savefpu *prev; 1349 uint32_t flags; 1350 char hwstate1[]; 1351 }; 1352 1353 struct fpu_kern_ctx * 1354 fpu_kern_alloc_ctx(u_int flags) 1355 { 1356 struct fpu_kern_ctx *res; 1357 size_t sz; 1358 1359 sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + 1360 cpu_max_ext_state_size; 1361 res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? 1362 M_NOWAIT : M_WAITOK) | M_ZERO); 1363 return (res); 1364 } 1365 1366 void 1367 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) 1368 { 1369 1370 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx")); 1371 /* XXXKIB clear the memory ? */ 1372 free(ctx, M_FPUKERN_CTX); 1373 } 1374 1375 static union savefpu * 1376 fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) 1377 { 1378 vm_offset_t p; 1379 1380 p = (vm_offset_t)&ctx->hwstate1; 1381 p = roundup2(p, XSAVE_AREA_ALIGN); 1382 return ((union savefpu *)p); 1383 } 1384 1385 void 1386 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 1387 { 1388 struct pcb *pcb; 1389 1390 pcb = td->td_pcb; 1391 KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, 1392 ("ctx is required when !FPU_KERN_NOCTX")); 1393 KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, 1394 ("using inuse ctx")); 1395 KASSERT((pcb->pcb_flags & PCB_NPXNOSAVE) == 0, 1396 ("recursive fpu_kern_enter while in PCB_NPXNOSAVE state")); 1397 1398 if ((flags & FPU_KERN_NOCTX) != 0) { 1399 critical_enter(); 1400 stop_emulating(); 1401 if (curthread == PCPU_GET(fpcurthread)) { 1402 fpusave(curpcb->pcb_save); 1403 PCPU_SET(fpcurthread, NULL); 1404 } else { 1405 KASSERT(PCPU_GET(fpcurthread) == NULL, 1406 ("invalid fpcurthread")); 1407 } 1408 1409 /* 1410 * This breaks XSAVEOPT tracker, but 1411 * PCB_NPXNOSAVE state is supposed to never need to 1412 * save FPU context at all. 1413 */ 1414 fpurstor(npx_initialstate); 1415 pcb->pcb_flags |= PCB_KERNNPX | PCB_NPXNOSAVE | PCB_NPXINITDONE; 1416 return; 1417 } 1418 if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { 1419 ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; 1420 return; 1421 } 1422 pcb = td->td_pcb; 1423 critical_enter(); 1424 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == 1425 get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); 1426 ctx->flags = FPU_KERN_CTX_INUSE; 1427 if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) 1428 ctx->flags |= FPU_KERN_CTX_NPXINITDONE; 1429 npxexit(td); 1430 ctx->prev = pcb->pcb_save; 1431 pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); 1432 pcb->pcb_flags |= PCB_KERNNPX; 1433 pcb->pcb_flags &= ~PCB_NPXINITDONE; 1434 critical_exit(); 1435 } 1436 1437 int 1438 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 1439 { 1440 struct pcb *pcb; 1441 1442 pcb = td->td_pcb; 1443 1444 if ((pcb->pcb_flags & PCB_NPXNOSAVE) != 0) { 1445 KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); 1446 KASSERT(PCPU_GET(fpcurthread) == NULL, 1447 ("non-NULL fpcurthread for PCB_NPXNOSAVE")); 1448 CRITICAL_ASSERT(td); 1449 1450 pcb->pcb_flags &= ~(PCB_NPXNOSAVE | PCB_NPXINITDONE); 1451 start_emulating(); 1452 } else { 1453 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, 1454 ("leaving not inuse ctx")); 1455 ctx->flags &= ~FPU_KERN_CTX_INUSE; 1456 1457 if (is_fpu_kern_thread(0) && 1458 (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) 1459 return (0); 1460 KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, 1461 ("dummy ctx")); 1462 critical_enter(); 1463 if (curthread == PCPU_GET(fpcurthread)) 1464 npxdrop(); 1465 pcb->pcb_save = ctx->prev; 1466 } 1467 1468 if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { 1469 if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) { 1470 pcb->pcb_flags |= PCB_NPXINITDONE; 1471 if ((pcb->pcb_flags & PCB_KERNNPX_THR) == 0) 1472 pcb->pcb_flags &= ~PCB_KERNNPX; 1473 } else if ((pcb->pcb_flags & PCB_KERNNPX_THR) == 0) 1474 pcb->pcb_flags &= ~(PCB_NPXINITDONE | PCB_KERNNPX); 1475 } else { 1476 if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) 1477 pcb->pcb_flags |= PCB_NPXINITDONE; 1478 else 1479 pcb->pcb_flags &= ~PCB_NPXINITDONE; 1480 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); 1481 } 1482 critical_exit(); 1483 return (0); 1484 } 1485 1486 int 1487 fpu_kern_thread(u_int flags) 1488 { 1489 1490 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 1491 ("Only kthread may use fpu_kern_thread")); 1492 KASSERT(curpcb->pcb_save == get_pcb_user_save_pcb(curpcb), 1493 ("mangled pcb_save")); 1494 KASSERT(PCB_USER_FPU(curpcb), ("recursive call")); 1495 1496 curpcb->pcb_flags |= PCB_KERNNPX | PCB_KERNNPX_THR; 1497 return (0); 1498 } 1499 1500 int 1501 is_fpu_kern_thread(u_int flags) 1502 { 1503 1504 if ((curthread->td_pflags & TDP_KTHREAD) == 0) 1505 return (0); 1506 return ((curpcb->pcb_flags & PCB_KERNNPX_THR) != 0); 1507 } 1508 1509 /* 1510 * FPU save area alloc/free/init utility routines 1511 */ 1512 union savefpu * 1513 fpu_save_area_alloc(void) 1514 { 1515 1516 return (uma_zalloc(fpu_save_area_zone, M_WAITOK)); 1517 } 1518 1519 void 1520 fpu_save_area_free(union savefpu *fsa) 1521 { 1522 1523 uma_zfree(fpu_save_area_zone, fsa); 1524 } 1525 1526 void 1527 fpu_save_area_reset(union savefpu *fsa) 1528 { 1529 1530 bcopy(npx_initialstate, fsa, cpu_max_ext_state_size); 1531 } 1532