1 /* $OpenBSD: npx.c,v 1.70 2018/07/30 14:19:12 kettenis Exp $ */ 2 /* $NetBSD: npx.c,v 1.57 1996/05/12 23:12:24 mycroft Exp $ */ 3 4 #if 0 5 #define IPRINTF(x) printf x 6 #else 7 #define IPRINTF(x) 8 #endif 9 10 /*- 11 * Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved. 12 * Copyright (c) 1990 William Jolitz. 13 * Copyright (c) 1991 The Regents of the University of California. 14 * All rights reserved. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)npx.c 7.2 (Berkeley) 5/12/91 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/conf.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/user.h> 49 #include <sys/ioctl.h> 50 #include <sys/device.h> 51 52 #include <uvm/uvm_extern.h> 53 54 #include <machine/cpu.h> 55 #include <machine/intr.h> 56 #include <machine/npx.h> 57 #include <machine/pio.h> 58 #include <machine/cpufunc.h> 59 #include <machine/pcb.h> 60 #include <machine/trap.h> 61 #include <machine/specialreg.h> 62 #include <machine/i8259.h> 63 64 #include <dev/isa/isareg.h> 65 #include <dev/isa/isavar.h> 66 67 /* 68 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 69 * 70 * We do lazy initialization and switching using the TS bit in cr0 and the 71 * MDP_USEDFPU bit in mdproc. 72 * 73 * DNA exceptions are handled like this: 74 * 75 * 1) If there is no NPX, return and go to the emulator. 76 * 2) If someone else has used the NPX, save its state into that process's PCB. 77 * 3a) If MDP_USEDFPU is not set, set it and initialize the NPX. 78 * 3b) Otherwise, reload the process's previous NPX state. 79 * 80 * When a process is created or exec()s, its saved cr0 image has the TS bit 81 * set and the MDP_USEDFPU bit clear. The MDP_USEDFPU bit is set when the 82 * process first gets a DNA and the NPX is initialized. The TS bit is turned 83 * off when the NPX is used, and turned on again later when the process's NPX 84 * state is saved. 85 */ 86 87 #define fldcw(addr) __asm("fldcw %0" : : "m" (*addr)) 88 #define fnclex() __asm("fnclex") 89 #define fninit() __asm("fninit") 90 #define fnsave(addr) __asm("fnsave %0" : "=m" (*addr)) 91 #define fnstcw(addr) __asm("fnstcw %0" : "=m" (*addr)) 92 #define fnstsw(addr) __asm("fnstsw %0" : "=m" (*addr)) 93 #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fwait") 94 #define frstor(addr) __asm("frstor %0" : : "m" (*addr)) 95 #define fwait() __asm("fwait") 96 #define clts() __asm("clts") 97 #define stts() lcr0(rcr0() | CR0_TS) 98 99 /* 100 * The mxcsr_mask for this host, taken from fxsave() on the primary CPU 101 */ 102 uint32_t fpu_mxcsr_mask; 103 104 int npxintr(void *); 105 static int npxprobe1(struct isa_attach_args *); 106 static int x86fpflags_to_siginfo(u_int32_t); 107 108 109 struct npx_softc { 110 struct device sc_dev; 111 void *sc_ih; 112 }; 113 114 int npxprobe(struct device *, void *, void *); 115 void npxattach(struct device *, struct device *, void *); 116 117 struct cfattach npx_ca = { 118 sizeof(struct npx_softc), npxprobe, npxattach 119 }; 120 121 struct cfdriver npx_cd = { 122 NULL, "npx", DV_DULL 123 }; 124 125 enum npx_type { 126 NPX_NONE = 0, 127 NPX_INTERRUPT, 128 NPX_EXCEPTION, 129 NPX_BROKEN, 130 NPX_CPUID, 131 }; 132 133 static enum npx_type npx_type; 134 static volatile u_int npx_intrs_while_probing 135 __attribute__((section(".kudata"))); 136 static volatile u_int npx_traps_while_probing 137 __attribute__((section(".kudata"))); 138 139 extern int i386_fpu_present; 140 extern int i386_fpu_exception; 141 extern int i386_fpu_fdivbug; 142 143 #define fxsave(addr) __asm("fxsave %0" : "=m" (*addr)) 144 #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr)) 145 #define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr)) 146 147 static __inline void 148 fpu_save(union savefpu *addr) 149 { 150 151 if (i386_use_fxsave) { 152 fxsave(&addr->sv_xmm); 153 /* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */ 154 fninit(); 155 } else 156 fnsave(&addr->sv_87); 157 } 158 159 static int 160 npxdna_notset(struct cpu_info *ci) 161 { 162 panic("npxdna vector not initialized"); 163 } 164 165 int (*npxdna_func)(struct cpu_info *) = npxdna_notset; 166 int npxdna_s87(struct cpu_info *); 167 int npxdna_xmm(struct cpu_info *); 168 void npxexit(void); 169 170 /* 171 * Special interrupt handlers. Someday intr0-intr15 will be used to count 172 * interrupts. We'll still need a special exception 16 handler. The busy 173 * latch stuff in probintr() can be moved to npxprobe(). 174 */ 175 void probeintr(void); 176 asm (".text\n\t" 177 "probeintr:\n\t" 178 "ss\n\t" 179 "incl npx_intrs_while_probing\n\t" 180 "pushl %eax\n\t" 181 "movb $0x20,%al # EOI (asm in strings loses cpp features)\n\t" 182 "outb %al,$0xa0 # IO_ICU2\n\t" 183 "outb %al,$0x20 # IO_ICU1\n\t" 184 "movb $0,%al\n\t" 185 "outb %al,$0xf0 # clear BUSY# latch\n\t" 186 "popl %eax\n\t" 187 "iret\n\t"); 188 189 void probetrap(void); 190 asm (".text\n\t" 191 "probetrap:\n\t" 192 "ss\n\t" 193 "incl npx_traps_while_probing\n\t" 194 "fnclex\n\t" 195 "iret\n\t"); 196 197 static inline int 198 npxprobe1(struct isa_attach_args *ia) 199 { 200 int control; 201 int status; 202 203 ia->ia_iosize = 16; 204 ia->ia_msize = 0; 205 206 /* 207 * Finish resetting the coprocessor, if any. If there is an error 208 * pending, then we may get a bogus IRQ13, but probeintr() will handle 209 * it OK. Bogus halts have never been observed, but we enabled 210 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 211 */ 212 fninit(); 213 delay(1000); /* wait for any IRQ13 (fwait might hang) */ 214 215 /* 216 * Check for a status of mostly zero. 217 */ 218 status = 0x5a5a; 219 fnstsw(&status); 220 if ((status & 0xb8ff) == 0) { 221 /* 222 * Good, now check for a proper control word. 223 */ 224 control = 0x5a5a; 225 fnstcw(&control); 226 if ((control & 0x1f3f) == 0x033f) { 227 /* 228 * We have an npx, now divide by 0 to see if exception 229 * 16 works. 230 */ 231 control &= ~(1 << 2); /* enable divide by 0 trap */ 232 fldcw(&control); 233 npx_traps_while_probing = npx_intrs_while_probing = 0; 234 fp_divide_by_0(); 235 delay(1); 236 if (npx_traps_while_probing != 0) { 237 /* 238 * Good, exception 16 works. 239 */ 240 npx_type = NPX_EXCEPTION; 241 ia->ia_irq = IRQUNK; /* zap the interrupt */ 242 i386_fpu_exception = 1; 243 } else if (npx_intrs_while_probing != 0) { 244 /* 245 * Bad, we are stuck with IRQ13. 246 */ 247 npx_type = NPX_INTERRUPT; 248 } else { 249 /* 250 * Worse, even IRQ13 is broken. 251 */ 252 npx_type = NPX_BROKEN; 253 ia->ia_irq = IRQUNK; 254 } 255 return 1; 256 } 257 } 258 259 /* 260 * Probe failed. There is no usable FPU. 261 */ 262 npx_type = NPX_NONE; 263 return 0; 264 } 265 266 /* 267 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait 268 * whether the device exists or not (XXX should be elsewhere). Set flags 269 * to tell npxattach() what to do. Modify device struct if npx doesn't 270 * need to use interrupts. Return 1 if device exists. 271 */ 272 int 273 npxprobe(struct device *parent, void *match, void *aux) 274 { 275 struct isa_attach_args *ia = aux; 276 int irq; 277 int result; 278 u_long s; 279 unsigned save_imen; 280 struct gate_descriptor save_idt_npxintr; 281 struct gate_descriptor save_idt_npxtrap; 282 283 if (cpu_feature & CPUID_FPU) { 284 npx_type = NPX_CPUID; 285 i386_fpu_exception = 1; 286 ia->ia_irq = IRQUNK; /* Don't want the interrupt vector */ 287 ia->ia_iosize = 16; 288 ia->ia_msize = 0; 289 return 1; 290 } 291 292 /* 293 * This routine is now just a wrapper for npxprobe1(), to install 294 * special npx interrupt and trap handlers, to enable npx interrupts 295 * and to disable other interrupts. Someday isa_configure() will 296 * install suitable handlers and run with interrupts enabled so we 297 * won't need to do so much here. 298 */ 299 irq = NRSVIDT + ia->ia_irq; 300 s = intr_disable(); 301 save_idt_npxintr = idt[irq]; 302 save_idt_npxtrap = idt[16]; 303 setgate(&idt[irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL); 304 setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); 305 save_imen = imen; 306 imen = ~((1 << IRQ_SLAVE) | (1 << ia->ia_irq)); 307 SET_ICUS(); 308 309 /* 310 * Partially reset the coprocessor, if any. Some BIOS's don't reset 311 * it after a warm boot. 312 */ 313 outb(0xf1, 0); /* full reset on some systems, NOP on others */ 314 delay(1000); 315 outb(0xf0, 0); /* clear BUSY# latch */ 316 317 /* 318 * We set CR0 in locore to trap all ESC and WAIT instructions. 319 * We have to turn off the CR0_EM bit temporarily while probing. 320 */ 321 lcr0(rcr0() & ~(CR0_EM|CR0_TS)); 322 intr_restore(s); 323 result = npxprobe1(ia); 324 s = intr_disable(); 325 lcr0(rcr0() | (CR0_EM|CR0_TS)); 326 327 imen = save_imen; 328 SET_ICUS(); 329 idt[irq] = save_idt_npxintr; 330 idt[16] = save_idt_npxtrap; 331 intr_restore(s); 332 return (result); 333 } 334 335 int npx586bug1(int, int); 336 asm (".text\n\t" 337 "npx586bug1:\n\t" 338 "fildl 4(%esp) # x\n\t" 339 "fildl 8(%esp) # y\n\t" 340 "fld %st(1)\n\t" 341 "fdiv %st(1),%st # x/y\n\t" 342 "fmulp %st,%st(1) # (x/y)*y\n\t" 343 "fsubrp %st,%st(1) # x-(x/y)*y\n\t" 344 "pushl $0\n\t" 345 "fistpl (%esp)\n\t" 346 "popl %eax\n\t" 347 "ret\n\t"); 348 349 void 350 npxinit(struct cpu_info *ci) 351 { 352 lcr0(rcr0() & ~(CR0_EM|CR0_TS)); 353 fninit(); 354 if (npx586bug1(4195835, 3145727) != 0) { 355 i386_fpu_fdivbug = 1; 356 printf("%s: WARNING: Pentium FDIV bug detected!\n", 357 ci->ci_dev->dv_xname); 358 } 359 if (fpu_mxcsr_mask == 0 && i386_use_fxsave) { 360 struct savexmm xm __attribute__((aligned(16))); 361 362 bzero(&xm, sizeof(xm)); 363 fxsave(&xm); 364 if (xm.sv_env.en_mxcsr_mask) 365 fpu_mxcsr_mask = xm.sv_env.en_mxcsr_mask; 366 else 367 fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 368 } 369 lcr0(rcr0() | (CR0_TS)); 370 } 371 372 /* 373 * Attach routine - announce which it is, and wire into system 374 */ 375 void 376 npxattach(struct device *parent, struct device *self, void *aux) 377 { 378 struct npx_softc *sc = (void *)self; 379 struct isa_attach_args *ia = aux; 380 381 switch (npx_type) { 382 case NPX_INTERRUPT: 383 printf("\n"); 384 lcr0(rcr0() & ~CR0_NE); 385 sc->sc_ih = isa_intr_establish(ia->ia_ic, ia->ia_irq, 386 IST_EDGE, IPL_NONE, npxintr, 0, sc->sc_dev.dv_xname); 387 break; 388 case NPX_EXCEPTION: 389 printf(": using exception 16\n"); 390 break; 391 case NPX_CPUID: 392 printf(": reported by CPUID; using exception 16\n"); 393 npx_type = NPX_EXCEPTION; 394 break; 395 case NPX_BROKEN: 396 printf(": error reporting broken; not using\n"); 397 npx_type = NPX_NONE; 398 return; 399 case NPX_NONE: 400 return; 401 } 402 403 npxinit(&cpu_info_primary); 404 i386_fpu_present = 1; 405 406 if (i386_use_fxsave) 407 npxdna_func = npxdna_xmm; 408 else 409 npxdna_func = npxdna_s87; 410 } 411 412 /* 413 * Record the FPU state and reinitialize it all except for the control word. 414 * Then generate a SIGFPE. 415 * 416 * Reinitializing the state allows naive SIGFPE handlers to longjmp without 417 * doing any fixups. 418 * 419 * XXX there is currently no way to pass the full error state to signal 420 * handlers, and if this is a nested interrupt there is no way to pass even 421 * a status code! So there is no way to have a non-naive SIGFPE handler. At 422 * best a handler could do an fninit followed by an fldcw of a static value. 423 * fnclex would be of little use because it would leave junk on the FPU stack. 424 * Returning from the handler would be even less safe than usual because 425 * IRQ13 exception handling makes exceptions even less precise than usual. 426 */ 427 int 428 npxintr(void *arg) 429 { 430 struct cpu_info *ci = curcpu(); 431 struct proc *p = ci->ci_fpcurproc; 432 union savefpu *addr; 433 struct intrframe *frame = arg; 434 int code; 435 union sigval sv; 436 437 uvmexp.traps++; 438 IPRINTF(("%s: fp intr\n", ci->ci_dev->dv_xname)); 439 440 if (p == NULL || npx_type == NPX_NONE) { 441 /* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */ 442 printf("npxintr: p = %lx, curproc = %lx, npx_type = %d\n", 443 (u_long) p, (u_long) curproc, npx_type); 444 panic("npxintr from nowhere"); 445 } 446 /* 447 * Clear the interrupt latch. 448 */ 449 outb(0xf0, 0); 450 /* 451 * If we're saving, ignore the interrupt. The FPU will happily 452 * generate another one when we restore the state later. 453 */ 454 if (ci->ci_fpsaving) 455 return (1); 456 457 #ifdef DIAGNOSTIC 458 /* 459 * At this point, fpcurproc should be curproc. If it wasn't, the TS 460 * bit should be set, and we should have gotten a DNA exception. 461 */ 462 if (p != curproc) 463 panic("npxintr: wrong process"); 464 #endif 465 466 /* 467 * Find the address of fpcurproc's saved FPU state. (Given the 468 * invariant above, this is always the one in curpcb.) 469 */ 470 addr = &p->p_addr->u_pcb.pcb_savefpu; 471 /* 472 * Save state. This does an implied fninit. It had better not halt 473 * the cpu or we'll hang. 474 */ 475 fpu_save(addr); 476 fwait(); 477 /* 478 * Restore control word (was clobbered by fpu_save). 479 */ 480 if (i386_use_fxsave) { 481 fldcw(&addr->sv_xmm.sv_env.en_cw); 482 /* 483 * FNINIT doesn't affect MXCSR or the XMM registers; 484 * no need to re-load MXCSR here. 485 */ 486 } else 487 fldcw(&addr->sv_87.sv_env.en_cw); 488 fwait(); 489 /* 490 * Remember the exception status word and tag word. The current 491 * (almost fninit'ed) fpu state is in the fpu and the exception 492 * state just saved will soon be junk. However, the implied fninit 493 * doesn't change the error pointers or register contents, and we 494 * preserved the control word and will copy the status and tag 495 * words, so the complete exception state can be recovered. 496 */ 497 if (i386_use_fxsave) { 498 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw; 499 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw; 500 } else { 501 addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw; 502 addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw; 503 } 504 505 /* 506 * Pass exception to process. If it's the current process, try to do 507 * it immediately. 508 */ 509 if (p == curproc && USERMODE(frame->if_cs, frame->if_eflags)) { 510 /* 511 * Interrupt is essentially a trap, so we can afford to call 512 * the SIGFPE handler (if any) as soon as the interrupt 513 * returns. 514 * 515 * XXX little or nothing is gained from this, and plenty is 516 * lost - the interrupt frame has to contain the trap frame 517 * (this is otherwise only necessary for the rescheduling trap 518 * in doreti, and the frame for that could easily be set up 519 * just before it is used). 520 */ 521 p->p_md.md_regs = (struct trapframe *)&frame->if_fs; 522 523 /* 524 * Encode the appropriate code for detailed information on 525 * this exception. 526 */ 527 if (i386_use_fxsave) 528 code = x86fpflags_to_siginfo(addr->sv_xmm.sv_ex_sw); 529 else 530 code = x86fpflags_to_siginfo(addr->sv_87.sv_ex_sw); 531 sv.sival_int = frame->if_eip; 532 KERNEL_LOCK(); 533 trapsignal(p, SIGFPE, T_ARITHTRAP, code, sv); 534 KERNEL_UNLOCK(); 535 } else { 536 /* 537 * Nested interrupt. These losers occur when: 538 * o an IRQ13 is bogusly generated at a bogus time, e.g.: 539 * o immediately after an fnsave or frstor of an 540 * error state. 541 * o a couple of 386 instructions after 542 * "fstpl _memvar" causes a stack overflow. 543 * These are especially nasty when combined with a 544 * trace trap. 545 * o an IRQ13 occurs at the same time as another higher- 546 * priority interrupt. 547 * 548 * Treat them like a true async interrupt. 549 */ 550 KERNEL_LOCK(); 551 psignal(p, SIGFPE); 552 KERNEL_UNLOCK(); 553 } 554 555 return (1); 556 } 557 558 void 559 npxtrap(struct trapframe *frame) 560 { 561 struct proc *p = curcpu()->ci_fpcurproc; 562 union savefpu *addr = &p->p_addr->u_pcb.pcb_savefpu; 563 u_int32_t mxcsr, statbits; 564 int code; 565 union sigval sv; 566 567 #ifdef DIAGNOSTIC 568 /* 569 * At this point, fpcurproc should be curproc. If it wasn't, the TS 570 * bit should be set, and we should have gotten a DNA exception. 571 */ 572 if (p != curproc) 573 panic("npxtrap: wrong process"); 574 #endif 575 576 fxsave(&addr->sv_xmm); 577 mxcsr = addr->sv_xmm.sv_env.en_mxcsr; 578 statbits = mxcsr; 579 mxcsr &= ~0x3f; 580 ldmxcsr(&mxcsr); 581 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw; 582 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw; 583 code = x86fpflags_to_siginfo (statbits); 584 sv.sival_int = frame->tf_eip; 585 KERNEL_LOCK(); 586 trapsignal(p, SIGFPE, frame->tf_err, code, sv); 587 KERNEL_UNLOCK(); 588 } 589 590 static int 591 x86fpflags_to_siginfo(u_int32_t flags) 592 { 593 int i; 594 static int x86fp_siginfo_table[] = { 595 FPE_FLTINV, /* bit 0 - invalid operation */ 596 FPE_FLTRES, /* bit 1 - denormal operand */ 597 FPE_FLTDIV, /* bit 2 - divide by zero */ 598 FPE_FLTOVF, /* bit 3 - fp overflow */ 599 FPE_FLTUND, /* bit 4 - fp underflow */ 600 FPE_FLTRES, /* bit 5 - fp precision */ 601 FPE_FLTINV, /* bit 6 - stack fault */ 602 }; 603 604 for (i=0;i < sizeof(x86fp_siginfo_table)/sizeof(int); i++) { 605 if (flags & (1 << i)) 606 return (x86fp_siginfo_table[i]); 607 } 608 /* punt if flags not set */ 609 return (FPE_FLTINV); 610 } 611 612 /* 613 * Implement device not available (DNA) exception 614 * 615 * If we were the last process to use the FPU, we can simply return. 616 * Otherwise, we save the previous state, if necessary, and restore our last 617 * saved state. 618 */ 619 int 620 npxdna_xmm(struct cpu_info *ci) 621 { 622 union savefpu *sfp; 623 struct proc *p; 624 int s; 625 626 if (ci->ci_fpsaving) { 627 printf("recursive npx trap; cr0=%x\n", rcr0()); 628 return (0); 629 } 630 631 s = splipi(); /* lock out IPI's while we clean house.. */ 632 633 #ifdef MULTIPROCESSOR 634 p = ci->ci_curproc; 635 #else 636 p = curproc; 637 #endif 638 639 IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev->dv_xname, (u_long)p, 640 (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : "")); 641 642 /* 643 * XXX should have a fast-path here when no save/restore is necessary 644 */ 645 /* 646 * Initialize the FPU state to clear any exceptions. If someone else 647 * was using the FPU, save their state (which does an implicit 648 * initialization). 649 */ 650 if (ci->ci_fpcurproc != NULL) { 651 IPRINTF(("%s: fp save %lx\n", ci->ci_dev->dv_xname, 652 (u_long)ci->ci_fpcurproc)); 653 npxsave_cpu(ci, ci->ci_fpcurproc != &proc0); 654 } else { 655 clts(); 656 IPRINTF(("%s: fp init\n", ci->ci_dev->dv_xname)); 657 fninit(); 658 fwait(); 659 stts(); 660 } 661 splx(s); 662 663 IPRINTF(("%s: done saving\n", ci->ci_dev->dv_xname)); 664 KDASSERT(ci->ci_fpcurproc == NULL); 665 #ifndef MULTIPROCESSOR 666 KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); 667 #else 668 if (p->p_addr->u_pcb.pcb_fpcpu != NULL) 669 npxsave_proc(p, 1); 670 #endif 671 p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; 672 clts(); 673 s = splipi(); 674 ci->ci_fpcurproc = p; 675 p->p_addr->u_pcb.pcb_fpcpu = ci; 676 splx(s); 677 uvmexp.fpswtch++; 678 679 sfp = &p->p_addr->u_pcb.pcb_savefpu; 680 681 if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { 682 bzero(&sfp->sv_xmm, sizeof(sfp->sv_xmm)); 683 sfp->sv_xmm.sv_env.en_cw = __INITIAL_NPXCW__; 684 sfp->sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__; 685 fxrstor(&sfp->sv_xmm); 686 p->p_md.md_flags |= MDP_USEDFPU; 687 } else { 688 static double zero = 0.0; 689 690 /* 691 * amd fpu does not restore fip, fdp, fop on fxrstor 692 * thus leaking other process's execution history. 693 */ 694 fnclex(); 695 __asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero)); 696 fxrstor(&sfp->sv_xmm); 697 } 698 699 return (1); 700 } 701 702 int 703 npxdna_s87(struct cpu_info *ci) 704 { 705 union savefpu *sfp; 706 struct proc *p; 707 int s; 708 709 KDASSERT(i386_use_fxsave == 0); 710 711 if (ci->ci_fpsaving) { 712 printf("recursive npx trap; cr0=%x\n", rcr0()); 713 return (0); 714 } 715 716 s = splipi(); /* lock out IPI's while we clean house.. */ 717 #ifdef MULTIPROCESSOR 718 p = ci->ci_curproc; 719 #else 720 p = curproc; 721 #endif 722 723 IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev->dv_xname, (u_long)p, 724 (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : "")); 725 726 /* 727 * If someone else was using our FPU, save their state (which does an 728 * implicit initialization); otherwise, initialize the FPU state to 729 * clear any exceptions. 730 */ 731 if (ci->ci_fpcurproc != NULL) { 732 IPRINTF(("%s: fp save %lx\n", ci->ci_dev->dv_xname, 733 (u_long)ci->ci_fpcurproc)); 734 npxsave_cpu(ci, ci->ci_fpcurproc != &proc0); 735 } else { 736 clts(); 737 IPRINTF(("%s: fp init\n", ci->ci_dev->dv_xname)); 738 fninit(); 739 fwait(); 740 stts(); 741 } 742 splx(s); 743 744 IPRINTF(("%s: done saving\n", ci->ci_dev->dv_xname)); 745 KDASSERT(ci->ci_fpcurproc == NULL); 746 #ifndef MULTIPROCESSOR 747 KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); 748 #else 749 if (p->p_addr->u_pcb.pcb_fpcpu != NULL) 750 npxsave_proc(p, 1); 751 #endif 752 p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; 753 clts(); 754 s = splipi(); 755 ci->ci_fpcurproc = p; 756 p->p_addr->u_pcb.pcb_fpcpu = ci; 757 splx(s); 758 uvmexp.fpswtch++; 759 760 sfp = &p->p_addr->u_pcb.pcb_savefpu; 761 762 if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { 763 bzero(&sfp->sv_87, sizeof(sfp->sv_87)); 764 sfp->sv_87.sv_env.en_cw = __INITIAL_NPXCW__; 765 sfp->sv_87.sv_env.en_tw = 0xffff; 766 frstor(&sfp->sv_87); 767 p->p_md.md_flags |= MDP_USEDFPU; 768 } else { 769 /* 770 * The following frstor may cause an IRQ13 when the state being 771 * restored has a pending error. The error will appear to have 772 * been triggered by the current (npx) user instruction even 773 * when that instruction is a no-wait instruction that should 774 * not trigger an error (e.g., fnclex). On at least one 486 775 * system all of the no-wait instructions are broken the same 776 * as frstor, so our treatment does not amplify the breakage. 777 * On at least one 386/Cyrix 387 system, fnclex works correctly 778 * while frstor and fnsave are broken, so our treatment breaks 779 * fnclex if it is the first FPU instruction after a context 780 * switch. 781 */ 782 frstor(&sfp->sv_87); 783 } 784 785 return (1); 786 } 787 788 /* 789 * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU 790 * immediately, we clear fpcurproc and turn on CR0_TS to force a DNA and a 791 * reload of the FPU state the next time we try to use it. This routine 792 * is only called when forking, core dumping, or debugging, or swapping, 793 * so the lazy reload at worst forces us to trap once per fork(), and at best 794 * saves us a reload once per fork(). 795 */ 796 void 797 npxsave_cpu(struct cpu_info *ci, int save) 798 { 799 struct proc *p; 800 int s; 801 802 KDASSERT(ci == curcpu()); 803 804 p = ci->ci_fpcurproc; 805 if (p == NULL) 806 return; 807 808 IPRINTF(("%s: fp cpu %s %lx\n", ci->ci_dev->dv_xname, 809 save ? "save" : "flush", (u_long)p)); 810 811 if (save) { 812 #ifdef DIAGNOSTIC 813 if (ci->ci_fpsaving != 0) 814 panic("npxsave_cpu: recursive save!"); 815 #endif 816 /* 817 * Set ci->ci_fpsaving, so that any pending exception will be 818 * thrown away. (It will be caught again if/when the FPU 819 * state is restored.) 820 * 821 * XXX on i386 and earlier, this routine should always be 822 * called at spl0; if it might called with the NPX interrupt 823 * masked, it would be necessary to forcibly unmask the NPX 824 * interrupt so that it could succeed. 825 * XXX this is irrelevant on 486 and above (systems 826 * which report FP failures via traps rather than irq13). 827 * XXX punting for now.. 828 */ 829 clts(); 830 ci->ci_fpsaving = 1; 831 fpu_save(&p->p_addr->u_pcb.pcb_savefpu); 832 ci->ci_fpsaving = 0; 833 /* It is unclear if this is needed. */ 834 fwait(); 835 } 836 837 /* 838 * We set the TS bit in the saved CR0 for this process, so that it 839 * will get a DNA exception on any FPU instruction and force a reload. 840 */ 841 stts(); 842 p->p_addr->u_pcb.pcb_cr0 |= CR0_TS; 843 844 s = splipi(); 845 p->p_addr->u_pcb.pcb_fpcpu = NULL; 846 ci->ci_fpcurproc = NULL; 847 splx(s); 848 } 849 850 /* 851 * Save p's FPU state, which may be on this processor or another processor. 852 */ 853 void 854 npxsave_proc(struct proc *p, int save) 855 { 856 struct cpu_info *ci = curcpu(); 857 struct cpu_info *oci; 858 859 KDASSERT(p->p_addr != NULL); 860 861 oci = p->p_addr->u_pcb.pcb_fpcpu; 862 if (oci == NULL) 863 return; 864 865 IPRINTF(("%s: fp proc %s %lx\n", ci->ci_dev->dv_xname, 866 save ? "save" : "flush", (u_long)p)); 867 868 #if defined(MULTIPROCESSOR) 869 if (oci == ci) { 870 int s = splipi(); 871 npxsave_cpu(ci, save); 872 splx(s); 873 } else { 874 IPRINTF(("%s: fp ipi to %s %s %lx\n", ci->ci_dev->dv_xname, 875 oci->ci_dev->dv_xname, save ? "save" : "flush", (u_long)p)); 876 877 oci->ci_fpsaveproc = p; 878 i386_send_ipi(oci, 879 save ? I386_IPI_SYNCH_FPU : I386_IPI_FLUSH_FPU); 880 while (p->p_addr->u_pcb.pcb_fpcpu != NULL) 881 CPU_BUSY_CYCLE(); 882 } 883 #else 884 KASSERT(ci->ci_fpcurproc == p); 885 npxsave_cpu(ci, save); 886 #endif 887 } 888 889 void 890 fpu_kernel_enter(void) 891 { 892 struct cpu_info *ci = curcpu(); 893 uint32_t cw; 894 int s; 895 896 /* 897 * Fast path. If the kernel was using the FPU before, there 898 * is no work to do besides clearing TS. 899 */ 900 if (ci->ci_fpcurproc == &proc0) { 901 clts(); 902 return; 903 } 904 905 s = splipi(); 906 907 if (ci->ci_fpcurproc != NULL) { 908 npxsave_cpu(ci, 1); 909 uvmexp.fpswtch++; 910 } 911 912 /* Claim the FPU */ 913 ci->ci_fpcurproc = &proc0; 914 915 splx(s); 916 917 /* Disable DNA exceptions */ 918 clts(); 919 920 /* Initialize the FPU */ 921 fninit(); 922 cw = __INITIAL_NPXCW__; 923 fldcw(&cw); 924 if (i386_has_sse || i386_has_sse2) { 925 cw = __INITIAL_MXCSR__; 926 ldmxcsr(&cw); 927 } 928 } 929 930 void 931 fpu_kernel_exit(void) 932 { 933 /* Enable DNA exceptions */ 934 stts(); 935 } 936