1 /* $OpenBSD: npx.c,v 1.74 2023/01/30 10:49:05 jsg Exp $ */ 2 /* $NetBSD: npx.c,v 1.57 1996/05/12 23:12:24 mycroft Exp $ */ 3 4 #if 0 5 #define IPRINTF(x) printf x 6 #else 7 #define IPRINTF(x) 8 #endif 9 10 /*- 11 * Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved. 12 * Copyright (c) 1990 William Jolitz. 13 * Copyright (c) 1991 The Regents of the University of California. 14 * All rights reserved. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)npx.c 7.2 (Berkeley) 5/12/91 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/proc.h> 46 #include <sys/signalvar.h> 47 #include <sys/user.h> 48 #include <sys/device.h> 49 50 #include <uvm/uvm_extern.h> 51 52 #include <machine/intr.h> 53 #include <machine/npx.h> 54 #include <machine/pio.h> 55 #include <machine/cpufunc.h> 56 #include <machine/pcb.h> 57 #include <machine/trap.h> 58 #include <machine/specialreg.h> 59 #include <machine/i8259.h> 60 61 #include <dev/isa/isavar.h> 62 63 /* 64 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 65 * 66 * We do lazy initialization and switching using the TS bit in cr0 and the 67 * MDP_USEDFPU bit in mdproc. 68 * 69 * DNA exceptions are handled like this: 70 * 71 * 1) If there is no NPX, return and go to the emulator. 72 * 2) If someone else has used the NPX, save its state into that process's PCB. 73 * 3a) If MDP_USEDFPU is not set, set it and initialize the NPX. 74 * 3b) Otherwise, reload the process's previous NPX state. 75 * 76 * When a process is created or exec()s, its saved cr0 image has the TS bit 77 * set and the MDP_USEDFPU bit clear. The MDP_USEDFPU bit is set when the 78 * process first gets a DNA and the NPX is initialized. The TS bit is turned 79 * off when the NPX is used, and turned on again later when the process's NPX 80 * state is saved. 81 */ 82 83 #define fldcw(addr) __asm("fldcw %0" : : "m" (*addr)) 84 #define fnclex() __asm("fnclex") 85 #define fninit() __asm("fninit") 86 #define fnsave(addr) __asm("fnsave %0" : "=m" (*addr)) 87 #define fnstcw(addr) __asm("fnstcw %0" : "=m" (*addr)) 88 #define fnstsw(addr) __asm("fnstsw %0" : "=m" (*addr)) 89 #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fwait") 90 #define frstor(addr) __asm("frstor %0" : : "m" (*addr)) 91 #define fwait() __asm("fwait") 92 #define clts() __asm("clts") 93 #define stts() lcr0(rcr0() | CR0_TS) 94 95 /* 96 * The mxcsr_mask for this host, taken from fxsave() on the primary CPU 97 */ 98 uint32_t fpu_mxcsr_mask; 99 100 int npxintr(void *); 101 static int npxprobe1(struct isa_attach_args *); 102 static int x86fpflags_to_siginfo(u_int32_t); 103 104 105 struct npx_softc { 106 struct device sc_dev; 107 void *sc_ih; 108 }; 109 110 int npxprobe(struct device *, void *, void *); 111 void npxattach(struct device *, struct device *, void *); 112 113 const struct cfattach npx_ca = { 114 sizeof(struct npx_softc), npxprobe, npxattach 115 }; 116 117 struct cfdriver npx_cd = { 118 NULL, "npx", DV_DULL 119 }; 120 121 enum npx_type { 122 NPX_NONE = 0, 123 NPX_INTERRUPT, 124 NPX_EXCEPTION, 125 NPX_BROKEN, 126 NPX_CPUID, 127 }; 128 129 static enum npx_type npx_type; 130 static volatile u_int npx_intrs_while_probing 131 __attribute__((section(".kudata"))); 132 static volatile u_int npx_traps_while_probing 133 __attribute__((section(".kudata"))); 134 135 extern int i386_fpu_present; 136 extern int i386_fpu_exception; 137 extern int i386_fpu_fdivbug; 138 139 #define fxsave(addr) __asm("fxsave %0" : "=m" (*addr)) 140 #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr)) 141 #define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr)) 142 143 static __inline void 144 fpu_save(union savefpu *addr) 145 { 146 147 if (i386_use_fxsave) { 148 fxsave(&addr->sv_xmm); 149 /* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */ 150 fninit(); 151 } else 152 fnsave(&addr->sv_87); 153 } 154 155 static int 156 npxdna_notset(struct cpu_info *ci) 157 { 158 panic("npxdna vector not initialized"); 159 } 160 161 int (*npxdna_func)(struct cpu_info *) = npxdna_notset; 162 int npxdna_s87(struct cpu_info *); 163 int npxdna_xmm(struct cpu_info *); 164 void npxexit(void); 165 166 /* 167 * Special interrupt handlers. Someday intr0-intr15 will be used to count 168 * interrupts. We'll still need a special exception 16 handler. The busy 169 * latch stuff in probintr() can be moved to npxprobe(). 170 */ 171 void probeintr(void); 172 asm (".text\n\t" 173 "probeintr:\n\t" 174 "ss\n\t" 175 "incl npx_intrs_while_probing\n\t" 176 "pushl %eax\n\t" 177 "movb $0x20,%al # EOI (asm in strings loses cpp features)\n\t" 178 "outb %al,$0xa0 # IO_ICU2\n\t" 179 "outb %al,$0x20 # IO_ICU1\n\t" 180 "movb $0,%al\n\t" 181 "outb %al,$0xf0 # clear BUSY# latch\n\t" 182 "popl %eax\n\t" 183 "iret\n\t"); 184 185 void probetrap(void); 186 asm (".text\n\t" 187 "probetrap:\n\t" 188 "ss\n\t" 189 "incl npx_traps_while_probing\n\t" 190 "fnclex\n\t" 191 "iret\n\t"); 192 193 static inline int 194 npxprobe1(struct isa_attach_args *ia) 195 { 196 int control; 197 int status; 198 199 ia->ia_iosize = 16; 200 ia->ia_msize = 0; 201 202 /* 203 * Finish resetting the coprocessor, if any. If there is an error 204 * pending, then we may get a bogus IRQ13, but probeintr() will handle 205 * it OK. Bogus halts have never been observed, but we enabled 206 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 207 */ 208 fninit(); 209 delay(1000); /* wait for any IRQ13 (fwait might hang) */ 210 211 /* 212 * Check for a status of mostly zero. 213 */ 214 status = 0x5a5a; 215 fnstsw(&status); 216 if ((status & 0xb8ff) == 0) { 217 /* 218 * Good, now check for a proper control word. 219 */ 220 control = 0x5a5a; 221 fnstcw(&control); 222 if ((control & 0x1f3f) == 0x033f) { 223 /* 224 * We have an npx, now divide by 0 to see if exception 225 * 16 works. 226 */ 227 control &= ~(1 << 2); /* enable divide by 0 trap */ 228 fldcw(&control); 229 npx_traps_while_probing = npx_intrs_while_probing = 0; 230 fp_divide_by_0(); 231 delay(1); 232 if (npx_traps_while_probing != 0) { 233 /* 234 * Good, exception 16 works. 235 */ 236 npx_type = NPX_EXCEPTION; 237 ia->ia_irq = IRQUNK; /* zap the interrupt */ 238 i386_fpu_exception = 1; 239 } else if (npx_intrs_while_probing != 0) { 240 /* 241 * Bad, we are stuck with IRQ13. 242 */ 243 npx_type = NPX_INTERRUPT; 244 } else { 245 /* 246 * Worse, even IRQ13 is broken. 247 */ 248 npx_type = NPX_BROKEN; 249 ia->ia_irq = IRQUNK; 250 } 251 return 1; 252 } 253 } 254 255 /* 256 * Probe failed. There is no usable FPU. 257 */ 258 npx_type = NPX_NONE; 259 return 0; 260 } 261 262 /* 263 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait 264 * whether the device exists or not (XXX should be elsewhere). Set flags 265 * to tell npxattach() what to do. Modify device struct if npx doesn't 266 * need to use interrupts. Return 1 if device exists. 267 */ 268 int 269 npxprobe(struct device *parent, void *match, void *aux) 270 { 271 struct isa_attach_args *ia = aux; 272 int irq; 273 int result; 274 u_long s; 275 unsigned save_imen; 276 struct gate_descriptor save_idt_npxintr; 277 struct gate_descriptor save_idt_npxtrap; 278 279 if (cpu_feature & CPUID_FPU) { 280 npx_type = NPX_CPUID; 281 i386_fpu_exception = 1; 282 ia->ia_irq = IRQUNK; /* Don't want the interrupt vector */ 283 ia->ia_iosize = 16; 284 ia->ia_msize = 0; 285 return 1; 286 } 287 288 /* 289 * This routine is now just a wrapper for npxprobe1(), to install 290 * special npx interrupt and trap handlers, to enable npx interrupts 291 * and to disable other interrupts. Someday isa_configure() will 292 * install suitable handlers and run with interrupts enabled so we 293 * won't need to do so much here. 294 */ 295 irq = NRSVIDT + ia->ia_irq; 296 s = intr_disable(); 297 save_idt_npxintr = idt[irq]; 298 save_idt_npxtrap = idt[16]; 299 setgate(&idt[irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL); 300 setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); 301 save_imen = imen; 302 imen = ~((1 << IRQ_SLAVE) | (1 << ia->ia_irq)); 303 SET_ICUS(); 304 305 /* 306 * Partially reset the coprocessor, if any. Some BIOS's don't reset 307 * it after a warm boot. 308 */ 309 outb(0xf1, 0); /* full reset on some systems, NOP on others */ 310 delay(1000); 311 outb(0xf0, 0); /* clear BUSY# latch */ 312 313 /* 314 * We set CR0 in locore to trap all ESC and WAIT instructions. 315 * We have to turn off the CR0_EM bit temporarily while probing. 316 */ 317 lcr0(rcr0() & ~(CR0_EM|CR0_TS)); 318 intr_restore(s); 319 result = npxprobe1(ia); 320 s = intr_disable(); 321 lcr0(rcr0() | (CR0_EM|CR0_TS)); 322 323 imen = save_imen; 324 SET_ICUS(); 325 idt[irq] = save_idt_npxintr; 326 idt[16] = save_idt_npxtrap; 327 intr_restore(s); 328 return (result); 329 } 330 331 int npx586bug1(int, int); 332 asm (".text\n\t" 333 "npx586bug1:\n\t" 334 "fildl 4(%esp) # x\n\t" 335 "fildl 8(%esp) # y\n\t" 336 "fld %st(1)\n\t" 337 "fdiv %st(1),%st # x/y\n\t" 338 "fmulp %st,%st(1) # (x/y)*y\n\t" 339 "fsubrp %st,%st(1) # x-(x/y)*y\n\t" 340 "pushl $0\n\t" 341 "fistpl (%esp)\n\t" 342 "popl %eax\n\t" 343 "ret\n\t"); 344 345 void 346 npxinit(struct cpu_info *ci) 347 { 348 lcr0(rcr0() & ~(CR0_EM|CR0_TS)); 349 fninit(); 350 if (npx586bug1(4195835, 3145727) != 0) { 351 i386_fpu_fdivbug = 1; 352 printf("%s: WARNING: Pentium FDIV bug detected!\n", 353 ci->ci_dev->dv_xname); 354 } 355 if (fpu_mxcsr_mask == 0 && i386_use_fxsave) { 356 struct savexmm xm __attribute__((aligned(16))); 357 358 bzero(&xm, sizeof(xm)); 359 fxsave(&xm); 360 if (xm.sv_env.en_mxcsr_mask) 361 fpu_mxcsr_mask = xm.sv_env.en_mxcsr_mask; 362 else 363 fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 364 } 365 lcr0(rcr0() | (CR0_TS)); 366 } 367 368 /* 369 * Attach routine - announce which it is, and wire into system 370 */ 371 void 372 npxattach(struct device *parent, struct device *self, void *aux) 373 { 374 struct npx_softc *sc = (void *)self; 375 struct isa_attach_args *ia = aux; 376 377 switch (npx_type) { 378 case NPX_INTERRUPT: 379 printf("\n"); 380 lcr0(rcr0() & ~CR0_NE); 381 sc->sc_ih = isa_intr_establish(ia->ia_ic, ia->ia_irq, 382 IST_EDGE, IPL_NONE, npxintr, 0, sc->sc_dev.dv_xname); 383 break; 384 case NPX_EXCEPTION: 385 printf(": using exception 16\n"); 386 break; 387 case NPX_CPUID: 388 printf(": reported by CPUID; using exception 16\n"); 389 npx_type = NPX_EXCEPTION; 390 break; 391 case NPX_BROKEN: 392 printf(": error reporting broken; not using\n"); 393 npx_type = NPX_NONE; 394 return; 395 case NPX_NONE: 396 return; 397 } 398 399 npxinit(&cpu_info_primary); 400 i386_fpu_present = 1; 401 402 if (i386_use_fxsave) 403 npxdna_func = npxdna_xmm; 404 else 405 npxdna_func = npxdna_s87; 406 } 407 408 /* 409 * Record the FPU state and reinitialize it all except for the control word. 410 * Then generate a SIGFPE. 411 * 412 * Reinitializing the state allows naive SIGFPE handlers to longjmp without 413 * doing any fixups. 414 * 415 * XXX there is currently no way to pass the full error state to signal 416 * handlers, and if this is a nested interrupt there is no way to pass even 417 * a status code! So there is no way to have a non-naive SIGFPE handler. At 418 * best a handler could do an fninit followed by an fldcw of a static value. 419 * fnclex would be of little use because it would leave junk on the FPU stack. 420 * Returning from the handler would be even less safe than usual because 421 * IRQ13 exception handling makes exceptions even less precise than usual. 422 */ 423 int 424 npxintr(void *arg) 425 { 426 struct cpu_info *ci = curcpu(); 427 struct proc *p = ci->ci_fpcurproc; 428 union savefpu *addr; 429 struct intrframe *frame = arg; 430 int code; 431 union sigval sv; 432 433 uvmexp.traps++; 434 IPRINTF(("%s: fp intr\n", ci->ci_dev->dv_xname)); 435 436 if (p == NULL || npx_type == NPX_NONE) { 437 /* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */ 438 printf("npxintr: p = %lx, curproc = %lx, npx_type = %d\n", 439 (u_long) p, (u_long) curproc, npx_type); 440 panic("npxintr from nowhere"); 441 } 442 /* 443 * Clear the interrupt latch. 444 */ 445 outb(0xf0, 0); 446 /* 447 * If we're saving, ignore the interrupt. The FPU will happily 448 * generate another one when we restore the state later. 449 */ 450 if (ci->ci_fpsaving) 451 return (1); 452 453 #ifdef DIAGNOSTIC 454 /* 455 * At this point, fpcurproc should be curproc. If it wasn't, the TS 456 * bit should be set, and we should have gotten a DNA exception. 457 */ 458 if (p != curproc) 459 panic("npxintr: wrong process"); 460 #endif 461 462 /* 463 * Find the address of fpcurproc's saved FPU state. (Given the 464 * invariant above, this is always the one in curpcb.) 465 */ 466 addr = &p->p_addr->u_pcb.pcb_savefpu; 467 /* 468 * Save state. This does an implied fninit. It had better not halt 469 * the cpu or we'll hang. 470 */ 471 fpu_save(addr); 472 fwait(); 473 /* 474 * Restore control word (was clobbered by fpu_save). 475 */ 476 if (i386_use_fxsave) { 477 fldcw(&addr->sv_xmm.sv_env.en_cw); 478 /* 479 * FNINIT doesn't affect MXCSR or the XMM registers; 480 * no need to re-load MXCSR here. 481 */ 482 } else 483 fldcw(&addr->sv_87.sv_env.en_cw); 484 fwait(); 485 /* 486 * Remember the exception status word and tag word. The current 487 * (almost fninit'ed) fpu state is in the fpu and the exception 488 * state just saved will soon be junk. However, the implied fninit 489 * doesn't change the error pointers or register contents, and we 490 * preserved the control word and will copy the status and tag 491 * words, so the complete exception state can be recovered. 492 */ 493 if (i386_use_fxsave) { 494 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw; 495 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw; 496 } else { 497 addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw; 498 addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw; 499 } 500 501 /* 502 * Pass exception to process. If it's the current process, try to do 503 * it immediately. 504 */ 505 if (p == curproc && USERMODE(frame->if_cs, frame->if_eflags)) { 506 /* 507 * Interrupt is essentially a trap, so we can afford to call 508 * the SIGFPE handler (if any) as soon as the interrupt 509 * returns. 510 * 511 * XXX little or nothing is gained from this, and plenty is 512 * lost - the interrupt frame has to contain the trap frame 513 * (this is otherwise only necessary for the rescheduling trap 514 * in doreti, and the frame for that could easily be set up 515 * just before it is used). 516 */ 517 p->p_md.md_regs = (struct trapframe *)&frame->if_fs; 518 519 /* 520 * Encode the appropriate code for detailed information on 521 * this exception. 522 */ 523 if (i386_use_fxsave) 524 code = x86fpflags_to_siginfo(addr->sv_xmm.sv_ex_sw); 525 else 526 code = x86fpflags_to_siginfo(addr->sv_87.sv_ex_sw); 527 sv.sival_int = frame->if_eip; 528 trapsignal(p, SIGFPE, T_ARITHTRAP, code, sv); 529 } else { 530 /* 531 * Nested interrupt. These losers occur when: 532 * o an IRQ13 is bogusly generated at a bogus time, e.g.: 533 * o immediately after an fnsave or frstor of an 534 * error state. 535 * o a couple of 386 instructions after 536 * "fstpl _memvar" causes a stack overflow. 537 * These are especially nasty when combined with a 538 * trace trap. 539 * o an IRQ13 occurs at the same time as another higher- 540 * priority interrupt. 541 * 542 * Treat them like a true async interrupt. 543 */ 544 KERNEL_LOCK(); 545 psignal(p, SIGFPE); 546 KERNEL_UNLOCK(); 547 } 548 549 return (1); 550 } 551 552 void 553 npxtrap(struct trapframe *frame) 554 { 555 struct proc *p = curcpu()->ci_fpcurproc; 556 union savefpu *addr = &p->p_addr->u_pcb.pcb_savefpu; 557 u_int32_t mxcsr, statbits; 558 int code; 559 union sigval sv; 560 561 #ifdef DIAGNOSTIC 562 /* 563 * At this point, fpcurproc should be curproc. If it wasn't, the TS 564 * bit should be set, and we should have gotten a DNA exception. 565 */ 566 if (p != curproc) 567 panic("npxtrap: wrong process"); 568 #endif 569 570 fxsave(&addr->sv_xmm); 571 mxcsr = addr->sv_xmm.sv_env.en_mxcsr; 572 statbits = mxcsr; 573 mxcsr &= ~0x3f; 574 ldmxcsr(&mxcsr); 575 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw; 576 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw; 577 code = x86fpflags_to_siginfo(statbits); 578 sv.sival_int = frame->tf_eip; 579 trapsignal(p, SIGFPE, frame->tf_err, code, sv); 580 } 581 582 static int 583 x86fpflags_to_siginfo(u_int32_t flags) 584 { 585 int i; 586 static int x86fp_siginfo_table[] = { 587 FPE_FLTINV, /* bit 0 - invalid operation */ 588 FPE_FLTRES, /* bit 1 - denormal operand */ 589 FPE_FLTDIV, /* bit 2 - divide by zero */ 590 FPE_FLTOVF, /* bit 3 - fp overflow */ 591 FPE_FLTUND, /* bit 4 - fp underflow */ 592 FPE_FLTRES, /* bit 5 - fp precision */ 593 FPE_FLTINV, /* bit 6 - stack fault */ 594 }; 595 596 for (i=0;i < sizeof(x86fp_siginfo_table)/sizeof(int); i++) { 597 if (flags & (1 << i)) 598 return (x86fp_siginfo_table[i]); 599 } 600 /* punt if flags not set */ 601 return (FPE_FLTINV); 602 } 603 604 /* 605 * Implement device not available (DNA) exception 606 * 607 * If we were the last process to use the FPU, we can simply return. 608 * Otherwise, we save the previous state, if necessary, and restore our last 609 * saved state. 610 */ 611 int 612 npxdna_xmm(struct cpu_info *ci) 613 { 614 union savefpu *sfp; 615 struct proc *p; 616 int s; 617 618 if (ci->ci_fpsaving) { 619 printf("recursive npx trap; cr0=%x\n", rcr0()); 620 return (0); 621 } 622 623 s = splipi(); /* lock out IPI's while we clean house.. */ 624 625 #ifdef MULTIPROCESSOR 626 p = ci->ci_curproc; 627 #else 628 p = curproc; 629 #endif 630 631 IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev->dv_xname, (u_long)p, 632 (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : "")); 633 634 /* 635 * XXX should have a fast-path here when no save/restore is necessary 636 */ 637 /* 638 * Initialize the FPU state to clear any exceptions. If someone else 639 * was using the FPU, save their state (which does an implicit 640 * initialization). 641 */ 642 if (ci->ci_fpcurproc != NULL) { 643 IPRINTF(("%s: fp save %lx\n", ci->ci_dev->dv_xname, 644 (u_long)ci->ci_fpcurproc)); 645 npxsave_cpu(ci, ci->ci_fpcurproc != &proc0); 646 } else { 647 clts(); 648 IPRINTF(("%s: fp init\n", ci->ci_dev->dv_xname)); 649 fninit(); 650 fwait(); 651 stts(); 652 } 653 splx(s); 654 655 IPRINTF(("%s: done saving\n", ci->ci_dev->dv_xname)); 656 KDASSERT(ci->ci_fpcurproc == NULL); 657 #ifndef MULTIPROCESSOR 658 KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); 659 #else 660 if (p->p_addr->u_pcb.pcb_fpcpu != NULL) 661 npxsave_proc(p, 1); 662 #endif 663 p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; 664 clts(); 665 s = splipi(); 666 ci->ci_fpcurproc = p; 667 p->p_addr->u_pcb.pcb_fpcpu = ci; 668 splx(s); 669 uvmexp.fpswtch++; 670 671 sfp = &p->p_addr->u_pcb.pcb_savefpu; 672 673 if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { 674 bzero(&sfp->sv_xmm, sizeof(sfp->sv_xmm)); 675 sfp->sv_xmm.sv_env.en_cw = __INITIAL_NPXCW__; 676 sfp->sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__; 677 fxrstor(&sfp->sv_xmm); 678 p->p_md.md_flags |= MDP_USEDFPU; 679 } else { 680 static double zero = 0.0; 681 682 /* 683 * amd fpu does not restore fip, fdp, fop on fxrstor 684 * thus leaking other process's execution history. 685 */ 686 fnclex(); 687 __asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero)); 688 fxrstor(&sfp->sv_xmm); 689 } 690 691 return (1); 692 } 693 694 int 695 npxdna_s87(struct cpu_info *ci) 696 { 697 union savefpu *sfp; 698 struct proc *p; 699 int s; 700 701 KDASSERT(i386_use_fxsave == 0); 702 703 if (ci->ci_fpsaving) { 704 printf("recursive npx trap; cr0=%x\n", rcr0()); 705 return (0); 706 } 707 708 s = splipi(); /* lock out IPI's while we clean house.. */ 709 #ifdef MULTIPROCESSOR 710 p = ci->ci_curproc; 711 #else 712 p = curproc; 713 #endif 714 715 IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev->dv_xname, (u_long)p, 716 (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : "")); 717 718 /* 719 * If someone else was using our FPU, save their state (which does an 720 * implicit initialization); otherwise, initialize the FPU state to 721 * clear any exceptions. 722 */ 723 if (ci->ci_fpcurproc != NULL) { 724 IPRINTF(("%s: fp save %lx\n", ci->ci_dev->dv_xname, 725 (u_long)ci->ci_fpcurproc)); 726 npxsave_cpu(ci, ci->ci_fpcurproc != &proc0); 727 } else { 728 clts(); 729 IPRINTF(("%s: fp init\n", ci->ci_dev->dv_xname)); 730 fninit(); 731 fwait(); 732 stts(); 733 } 734 splx(s); 735 736 IPRINTF(("%s: done saving\n", ci->ci_dev->dv_xname)); 737 KDASSERT(ci->ci_fpcurproc == NULL); 738 #ifndef MULTIPROCESSOR 739 KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); 740 #else 741 if (p->p_addr->u_pcb.pcb_fpcpu != NULL) 742 npxsave_proc(p, 1); 743 #endif 744 p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; 745 clts(); 746 s = splipi(); 747 ci->ci_fpcurproc = p; 748 p->p_addr->u_pcb.pcb_fpcpu = ci; 749 splx(s); 750 uvmexp.fpswtch++; 751 752 sfp = &p->p_addr->u_pcb.pcb_savefpu; 753 754 if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { 755 bzero(&sfp->sv_87, sizeof(sfp->sv_87)); 756 sfp->sv_87.sv_env.en_cw = __INITIAL_NPXCW__; 757 sfp->sv_87.sv_env.en_tw = 0xffff; 758 frstor(&sfp->sv_87); 759 p->p_md.md_flags |= MDP_USEDFPU; 760 } else { 761 /* 762 * The following frstor may cause an IRQ13 when the state being 763 * restored has a pending error. The error will appear to have 764 * been triggered by the current (npx) user instruction even 765 * when that instruction is a no-wait instruction that should 766 * not trigger an error (e.g., fnclex). On at least one 486 767 * system all of the no-wait instructions are broken the same 768 * as frstor, so our treatment does not amplify the breakage. 769 * On at least one 386/Cyrix 387 system, fnclex works correctly 770 * while frstor and fnsave are broken, so our treatment breaks 771 * fnclex if it is the first FPU instruction after a context 772 * switch. 773 */ 774 frstor(&sfp->sv_87); 775 } 776 777 return (1); 778 } 779 780 /* 781 * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU 782 * immediately, we clear fpcurproc and turn on CR0_TS to force a DNA and a 783 * reload of the FPU state the next time we try to use it. This routine 784 * is only called when forking, core dumping, or debugging, or swapping, 785 * so the lazy reload at worst forces us to trap once per fork(), and at best 786 * saves us a reload once per fork(). 787 */ 788 void 789 npxsave_cpu(struct cpu_info *ci, int save) 790 { 791 struct proc *p; 792 int s; 793 794 KDASSERT(ci == curcpu()); 795 796 p = ci->ci_fpcurproc; 797 if (p == NULL) 798 return; 799 800 IPRINTF(("%s: fp cpu %s %lx\n", ci->ci_dev->dv_xname, 801 save ? "save" : "flush", (u_long)p)); 802 803 if (save) { 804 #ifdef DIAGNOSTIC 805 if (ci->ci_fpsaving != 0) 806 panic("npxsave_cpu: recursive save!"); 807 #endif 808 /* 809 * Set ci->ci_fpsaving, so that any pending exception will be 810 * thrown away. (It will be caught again if/when the FPU 811 * state is restored.) 812 * 813 * XXX on i386 and earlier, this routine should always be 814 * called at spl0; if it might called with the NPX interrupt 815 * masked, it would be necessary to forcibly unmask the NPX 816 * interrupt so that it could succeed. 817 * XXX this is irrelevant on 486 and above (systems 818 * which report FP failures via traps rather than irq13). 819 * XXX punting for now.. 820 */ 821 clts(); 822 ci->ci_fpsaving = 1; 823 fpu_save(&p->p_addr->u_pcb.pcb_savefpu); 824 ci->ci_fpsaving = 0; 825 /* It is unclear if this is needed. */ 826 fwait(); 827 } 828 829 /* 830 * We set the TS bit in the saved CR0 for this process, so that it 831 * will get a DNA exception on any FPU instruction and force a reload. 832 */ 833 stts(); 834 p->p_addr->u_pcb.pcb_cr0 |= CR0_TS; 835 836 s = splipi(); 837 p->p_addr->u_pcb.pcb_fpcpu = NULL; 838 ci->ci_fpcurproc = NULL; 839 splx(s); 840 } 841 842 /* 843 * Save p's FPU state, which may be on this processor or another processor. 844 */ 845 void 846 npxsave_proc(struct proc *p, int save) 847 { 848 struct cpu_info *ci = curcpu(); 849 struct cpu_info *oci; 850 851 KDASSERT(p->p_addr != NULL); 852 853 oci = p->p_addr->u_pcb.pcb_fpcpu; 854 if (oci == NULL) 855 return; 856 857 IPRINTF(("%s: fp proc %s %lx\n", ci->ci_dev->dv_xname, 858 save ? "save" : "flush", (u_long)p)); 859 860 #if defined(MULTIPROCESSOR) 861 if (oci == ci) { 862 int s = splipi(); 863 npxsave_cpu(ci, save); 864 splx(s); 865 } else { 866 IPRINTF(("%s: fp ipi to %s %s %lx\n", ci->ci_dev->dv_xname, 867 oci->ci_dev->dv_xname, save ? "save" : "flush", (u_long)p)); 868 869 oci->ci_fpsaveproc = p; 870 i386_send_ipi(oci, 871 save ? I386_IPI_SYNCH_FPU : I386_IPI_FLUSH_FPU); 872 while (p->p_addr->u_pcb.pcb_fpcpu != NULL) 873 CPU_BUSY_CYCLE(); 874 } 875 #else 876 KASSERT(ci->ci_fpcurproc == p); 877 npxsave_cpu(ci, save); 878 #endif 879 } 880 881 void 882 fpu_kernel_enter(void) 883 { 884 struct cpu_info *ci = curcpu(); 885 uint32_t cw; 886 int s; 887 888 /* 889 * Fast path. If the kernel was using the FPU before, there 890 * is no work to do besides clearing TS. 891 */ 892 if (ci->ci_fpcurproc == &proc0) { 893 clts(); 894 return; 895 } 896 897 s = splipi(); 898 899 if (ci->ci_fpcurproc != NULL) { 900 npxsave_cpu(ci, 1); 901 uvmexp.fpswtch++; 902 } 903 904 /* Claim the FPU */ 905 ci->ci_fpcurproc = &proc0; 906 907 splx(s); 908 909 /* Disable DNA exceptions */ 910 clts(); 911 912 /* Initialize the FPU */ 913 fninit(); 914 cw = __INITIAL_NPXCW__; 915 fldcw(&cw); 916 if (i386_has_sse || i386_has_sse2) { 917 cw = __INITIAL_MXCSR__; 918 ldmxcsr(&cw); 919 } 920 } 921 922 void 923 fpu_kernel_exit(void) 924 { 925 /* Enable DNA exceptions */ 926 stts(); 927 } 928