1 /*	$NetBSD: fpu.c,v 1.10 2014/11/27 14:22:09 uebayasi Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
5  * rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1991 The Regents of the University of California.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
61  */
62 
63 /*-
64  * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
65  * Copyright (c) 1990 William Jolitz.
66  *
67  * Redistribution and use in source and binary forms, with or without
68  * modification, are permitted provided that the following conditions
69  * are met:
70  * 1. Redistributions of source code must retain the above copyright
71  *    notice, this list of conditions and the following disclaimer.
72  * 2. Redistributions in binary form must reproduce the above copyright
73  *    notice, this list of conditions and the following disclaimer in the
74  *    documentation and/or other materials provided with the distribution.
75  * 3. All advertising materials mentioning features or use of this software
76  *    must display the following acknowledgement:
77  *	This product includes software developed by the University of
78  *	California, Berkeley and its contributors.
79  * 4. Neither the name of the University nor the names of its contributors
80  *    may be used to endorse or promote products derived from this software
81  *    without specific prior written permission.
82  *
83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93  * SUCH DAMAGE.
94  *
95  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
96  */
97 
98 /*
99  * XXXfvdl update copyright notice. this started out as a stripped isa/npx.c
100  */
101 
102 #include <sys/cdefs.h>
103 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.10 2014/11/27 14:22:09 uebayasi Exp $");
104 
105 #include "opt_multiprocessor.h"
106 
107 #include <sys/param.h>
108 #include <sys/systm.h>
109 #include <sys/conf.h>
110 #include <sys/cpu.h>
111 #include <sys/file.h>
112 #include <sys/proc.h>
113 #include <sys/kernel.h>
114 
115 #include <machine/cpu.h>
116 #include <machine/intr.h>
117 #include <machine/cpufunc.h>
118 #include <machine/pcb.h>
119 #include <machine/trap.h>
120 #include <machine/specialreg.h>
121 #include <x86/cpu.h>
122 #include <x86/fpu.h>
123 
124 /* Check some duplicate definitions match */
125 #include <machine/fenv.h>
126 
127 #ifdef XEN
128 #define clts() HYPERVISOR_fpu_taskswitch(0)
129 #define stts() HYPERVISOR_fpu_taskswitch(1)
130 #endif
131 
132 static inline union savefpu *
process_fpframe(struct lwp * lwp)133 process_fpframe(struct lwp *lwp)
134 {
135 	struct pcb *pcb = lwp_getpcb(lwp);
136 
137 	return &pcb->pcb_savefpu;
138 }
139 
140 /*
141  * We do lazy initialization and switching using the TS bit in cr0 and the
142  * MDL_USEDFPU bit in mdlwp.
143  *
144  * DNA exceptions are handled like this:
145  *
146  * 1) If there is no FPU, send SIGILL.
147  * 2) If someone else has used the FPU, save its state into that lwp's PCB.
148  * 3a) If MDL_USEDFPU is not set, set it and initialize the FPU.
149  * 3b) Otherwise, reload the lwp's previous FPU state.
150  *
151  * When a lwp is created or exec()s, its saved cr0 image has the TS bit
152  * set and the MDL_USEDFPU bit clear.  The MDL_USEDFPU bit is set when the
153  * lwp first gets a DNA and the FPU is initialized.  The TS bit is turned
154  * off when the FPU is used, and turned on again later when the lwp's FPU
155  * state is saved.
156  */
157 
158 /*
159  * The following table is used to ensure that the FPE_... value
160  * that is passed as a trapcode to the signal handler of the user
161  * process does not have more than one bit set.
162  *
163  * Multiple bits may be set if SSE simd instructions generate errors
164  * on more than one value or if the user process modifies the control
165  * word while a status word bit is already set (which this is a sign
166  * of bad coding).
167  * We have no choise than to narrow them down to one bit, since we must
168  * not send a trapcode that is not exactly one of the FPE_ macros.
169  *
170  * The mechanism has a static table with 127 entries.  Each combination
171  * of the 7 FPU status word exception bits directly translates to a
172  * position in this table, where a single FPE_... value is stored.
173  * This FPE_... value stored there is considered the "most important"
174  * of the exception bits and will be sent as the signal code.  The
175  * precedence of the bits is based upon Intel Document "Numerical
176  * Applications", Chapter "Special Computational Situations".
177  *
178  * The code to choose one of these values does these steps:
179  * 1) Throw away status word bits that cannot be masked.
180  * 2) Throw away the bits currently masked in the control word,
181  *    assuming the user isn't interested in them anymore.
182  * 3) Reinsert status word bit 7 (stack fault) if it is set, which
183  *    cannot be masked but must be presered.
184  *    'Stack fault' is a sub-class of 'invalid operation'.
185  * 4) Use the remaining bits to point into the trapcode table.
186  *
187  * The 6 maskable bits in order of their preference, as stated in the
188  * above referenced Intel manual:
189  * 1  Invalid operation (FP_X_INV)
190  * 1a   Stack underflow
191  * 1b   Stack overflow
192  * 1c   Operand of unsupported format
193  * 1d   SNaN operand.
194  * 2  QNaN operand (not an exception, irrelavant here)
195  * 3  Any other invalid-operation not mentioned above or zero divide
196  *      (FP_X_INV, FP_X_DZ)
197  * 4  Denormal operand (FP_X_DNML)
198  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
199  * 6  Inexact result (FP_X_IMP)
200  *
201  * NB: the above seems to mix up the mxscr error bits and the x87 ones.
202  * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
203  * status.
204  *
205  * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
206  * are swapped).
207  *
208  * This table assumes that any stack fault is cleared - so that an INVOP
209  * fault will only be reported as FLTSUB once.
210  * This might not happen if the mask is being changed.
211  */
212 #define FPE_xxx1(f) (f & EN_SW_INVOP \
213 		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
214 	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
215 	: f & EN_SW_DENORM ? FPE_FLTUND \
216 	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
217 	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
218 	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
219 	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
220 #define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
221 #define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
222 #define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
223 #define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
224 #define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
225 static const uint8_t fpetable[128] = {
226 	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
227 };
228 #undef FPE_xxx1
229 #undef FPE_xxx2
230 #undef FPE_xxx4
231 #undef FPE_xxx8
232 #undef FPE_xxx16
233 #undef FPE_xxx32
234 
235 /*
236  * Init the FPU.
237  *
238  * This might not be structly necessary since it will be initialised
239  * for each process.  However it does no harm.
240  */
241 void
fpuinit(struct cpu_info * ci)242 fpuinit(struct cpu_info *ci)
243 {
244 	if (!i386_fpu_present)
245 		return;
246 
247 	clts();
248 	fninit();
249 	stts();
250 }
251 
252 void
fpu_set_default_cw(struct lwp * lwp,unsigned int x87_cw)253 fpu_set_default_cw(struct lwp *lwp, unsigned int x87_cw)
254 {
255 	union savefpu *fpu_save = process_fpframe(lwp);
256 
257 	if (i386_use_fxsave)
258 		fpu_save->sv_xmm.fx_cw = x87_cw;
259 	else
260 		fpu_save->sv_87.s87_cw = x87_cw;
261 	fpu_save->sv_os.fxo_dflt_cw = x87_cw;
262 }
263 
264 static void
send_sigill(void * rip)265 send_sigill(void *rip)
266 {
267 	/* No fpu (486SX) - send SIGILL */
268 	ksiginfo_t ksi;
269 
270 	x86_enable_intr();
271 	KSI_INIT_TRAP(&ksi);
272 	ksi.ksi_signo = SIGILL;
273 	ksi.ksi_addr = rip;
274 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
275 	return;
276 }
277 
278 /*
279  * This is a synchronous trap on either an x87 instruction (due to an
280  * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc
281  * instruction due to an error on the instruction itself.
282  *
283  * If trap actually generates a signal, then the fpu state is saved
284  * and then copied onto the process's user-stack, and then recovered
285  * from there when the signal returns (or from the jmp_buf if the
286  * signal handler exits with a longjmp()).
287  *
288  * All this code need to do is save the reason for the trap.
289  * For x87 interrupts the status word bits need clearing to stop the
290  * trap re-occurring.
291  *
292  * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
293  *
294  * Since this is a synchronous trap, the fpu registers must still belong
295  * to the correct process (we trap through an interrupt gate so that
296  * interrupts are disabled on entry).
297  * Interrupts (these better include IPIs) are left disabled until we've
298  * finished looking at fpu registers.
299  *
300  * For amd64 the calling code (in amd64_trap.S) has already checked
301  * that we trapped from usermode.
302  */
303 
304 void
fputrap(struct trapframe * frame)305 fputrap(struct trapframe *frame)
306 {
307 	uint32_t statbits;
308 	ksiginfo_t ksi;
309 
310 	if (!USERMODE(frame->tf_cs, frame->tf_eflags))
311 		panic("fpu trap from kernel, trapframe %p\n", frame);
312 
313 	if (i386_fpu_present == 0) {
314 		send_sigill((void *)X86_TF_RIP(frame));
315 		return;
316 	}
317 
318 	/*
319 	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS bit
320 	 * should be set, and we should have gotten a DNA exception.
321 	 */
322 	KASSERT(curcpu()->ci_fpcurlwp == curlwp);
323 
324 	if (frame->tf_trapno == T_XMM) {
325 		uint32_t mxcsr;
326 		x86_stmxcsr(&mxcsr);
327 		statbits = mxcsr;
328 		/* Clear the sticky status bits */
329 		mxcsr &= ~0x3f;
330 		x86_ldmxcsr(&mxcsr);
331 
332 		/* Remove masked interrupts and non-status bits */
333 		statbits &= ~(statbits >> 7) & 0x3f;
334 		/* Mark this is an XMM status */
335 		statbits |= 0x10000;
336 	} else {
337 		uint16_t cw, sw;
338 		/* Get current control and status words */
339 		fnstcw(&cw);
340 		fnstsw(&sw);
341 		/* Clear any pending exceptions from status word */
342 		fnclex();
343 
344 		/* Removed masked interrupts */
345 		statbits = sw & ~(cw & 0x3f);
346 	}
347 
348 	/* Doesn't matter now if we get pre-empted */
349 	x86_enable_intr();
350 
351 	KSI_INIT_TRAP(&ksi);
352 	ksi.ksi_signo = SIGFPE;
353 	ksi.ksi_addr = (void *)X86_TF_RIP(frame);
354 	ksi.ksi_code = fpetable[statbits & 0x7f];
355 	ksi.ksi_trap = statbits;
356 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
357 }
358 
359 /*
360  * Implement device not available (DNA) exception
361  *
362  * If we were the last lwp to use the FPU, we can simply return.
363  * Otherwise, we save the previous state, if necessary, and restore
364  * our last saved state.
365  *
366  * Called directly from the trap 0x13 entry with interrupts still disabled.
367  */
368 void
fpudna(struct trapframe * frame)369 fpudna(struct trapframe *frame)
370 {
371 	struct cpu_info *ci;
372 	struct lwp *l, *fl;
373 	struct pcb *pcb;
374 	int s;
375 
376 	if (!USERMODE(frame->tf_cs, frame->tf_eflags))
377 		panic("fpudna from kernel, ip %p, trapframe %p\n",
378 		    (void *)X86_TF_RIP(frame), frame);
379 
380 	if (i386_fpu_present == 0) {
381 		send_sigill((void *)X86_TF_RIP(frame));
382 		return;
383 	}
384 
385 	ci = curcpu();
386 
387 	/* Save soft spl level - interrupts are hard disabled */
388 	s = splhigh();
389 
390 	/* Save state on current CPU. */
391 	l = ci->ci_curlwp;
392 	pcb = lwp_getpcb(l);
393 	fl = ci->ci_fpcurlwp;
394 	if (fl != NULL) {
395 		/*
396 		 * It seems we can get here on Xen even if we didn't
397 		 * switch lwp.  In this case do nothing
398 		 */
399 		if (fl == l) {
400 			KASSERT(pcb->pcb_fpcpu == ci);
401 			clts();
402 			splx(s);
403 			return;
404 		}
405 		fpusave_cpu(true);
406 	}
407 
408 	/* Save our state if on a remote CPU. */
409 	if (pcb->pcb_fpcpu != NULL) {
410 		/* Explicitly disable preemption before dropping spl. */
411 		kpreempt_disable();
412 		splx(s);
413 
414 		/* Actually enable interrupts */
415 		x86_enable_intr();
416 
417 		fpusave_lwp(l, true);
418 		KASSERT(pcb->pcb_fpcpu == NULL);
419 		s = splhigh();
420 		kpreempt_enable();
421 	}
422 
423 	/*
424 	 * Restore state on this CPU, or initialize.  Ensure that
425 	 * the entire update is atomic with respect to FPU-sync IPIs.
426 	 */
427 	clts();
428 	ci->ci_fpcurlwp = l;
429 	pcb->pcb_fpcpu = ci;
430 
431 	if (i386_use_fxsave) {
432 		if (x86_xsave_features != 0) {
433 			xrstor(&pcb->pcb_savefpu, x86_xsave_features);
434 		} else {
435 			/*
436 			 * AMD FPU's do not restore FIP, FDP, and FOP on
437 			 * fxrstor, leaking other process's execution history.
438 			 * Clear them manually by loading a zero.
439 			 *
440 			 * Clear the ES bit in the x87 status word if it is
441 			 * currently set, in order to avoid causing a fault
442 			 * in the upcoming load.
443 			 */
444 			if (fngetsw() & 0x80)
445 				fnclex();
446 			fldummy();
447 
448 			fxrstor(&pcb->pcb_savefpu);
449 		}
450 	} else {
451 		frstor(&pcb->pcb_savefpu);
452 	}
453 
454 	KASSERT(ci == curcpu());
455 	splx(s);
456 }
457 
458 /*
459  * Save current CPU's FPU state.  Must be called at IPL_HIGH.
460  */
461 void
fpusave_cpu(bool save)462 fpusave_cpu(bool save)
463 {
464 	struct cpu_info *ci;
465 	struct pcb *pcb;
466 	struct lwp *l;
467 
468 	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
469 
470 	ci = curcpu();
471 	l = ci->ci_fpcurlwp;
472 	if (l == NULL) {
473 		return;
474 	}
475 	pcb = lwp_getpcb(l);
476 
477 	if (save) {
478 		clts();
479 		if (i386_use_fxsave) {
480 			if (x86_xsave_features != 0)
481 				xsave(&pcb->pcb_savefpu, x86_xsave_features);
482 			else
483 				fxsave(&pcb->pcb_savefpu);
484 		} else {
485 			fnsave(&pcb->pcb_savefpu);
486 		}
487 	}
488 
489 	stts();
490 	pcb->pcb_fpcpu = NULL;
491 	ci->ci_fpcurlwp = NULL;
492 }
493 
494 /*
495  * Save l's FPU state, which may be on this processor or another processor.
496  * It may take some time, so we avoid disabling preemption where possible.
497  * Caller must know that the target LWP is stopped, otherwise this routine
498  * may race against it.
499  */
500 void
fpusave_lwp(struct lwp * l,bool save)501 fpusave_lwp(struct lwp *l, bool save)
502 {
503 	struct pcb *pcb = lwp_getpcb(l);
504 	struct cpu_info *oci;
505 	int s, spins, ticks;
506 
507 	spins = 0;
508 	ticks = hardclock_ticks;
509 	for (;;) {
510 		s = splhigh();
511 		oci = pcb->pcb_fpcpu;
512 		if (oci == NULL) {
513 			splx(s);
514 			break;
515 		}
516 		if (oci == curcpu()) {
517 			KASSERT(oci->ci_fpcurlwp == l);
518 			fpusave_cpu(save);
519 			splx(s);
520 			break;
521 		}
522 		splx(s);
523 #ifdef XEN
524 		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
525 			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
526 			    cpu_name(oci));
527 		}
528 #else /* XEN */
529 		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
530 #endif
531 		while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
532 			x86_pause();
533 			spins++;
534 		}
535 		if (spins > 100000000) {
536 			panic("fpusave_lwp: did not");
537 		}
538 	}
539 }
540 
541 /*
542  * exec needs to clear the fpu save area to avoid leaking info from the
543  * old process to userspace.
544  * We must also (later) load these values into the fpu - otherwise the process
545  * will see another processes fpu registers.
546  */
547 void
fpu_save_area_clear(struct lwp * lwp,unsigned int x87_cw)548 fpu_save_area_clear(struct lwp *lwp, unsigned int x87_cw)
549 {
550 	union savefpu *fpu_save;
551 
552 	fpusave_lwp(lwp, false);
553 
554 	fpu_save = process_fpframe(lwp);
555 
556 	if (i386_use_fxsave) {
557 		memset(&fpu_save->sv_xmm, 0, sizeof fpu_save->sv_xmm);
558 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
559 		fpu_save->sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
560 		fpu_save->sv_xmm.fx_cw = x87_cw;
561 	} else {
562 		memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
563 		fpu_save->sv_87.s87_tw = 0xffff;
564 		fpu_save->sv_87.s87_cw = x87_cw;
565 	}
566 	fpu_save->sv_os.fxo_dflt_cw = x87_cw;
567 }
568 
569 /* For signal handlers the register values don't matter */
570 void
fpu_save_area_reset(struct lwp * lwp)571 fpu_save_area_reset(struct lwp *lwp)
572 {
573 	union savefpu *fpu_save = process_fpframe(lwp);
574 
575 	if (i386_use_fxsave) {
576 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
577 		fpu_save->sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
578 		fpu_save->sv_xmm.fx_tw = 0;
579 		fpu_save->sv_xmm.fx_cw = fpu_save->sv_os.fxo_dflt_cw;
580 	} else {
581 		fpu_save->sv_87.s87_tw = 0xffff;
582 		fpu_save->sv_87.s87_cw = fpu_save->sv_os.fxo_dflt_cw;
583 	}
584 }
585 
586 /* During fork the xsave data needs to be copied */
587 void
fpu_save_area_fork(struct pcb * pcb2,const struct pcb * pcb1)588 fpu_save_area_fork(struct pcb *pcb2, const struct pcb *pcb1)
589 {
590 	ssize_t extra;
591 
592 	/* The pcb itself has been copied, but the xsave area
593 	 * extends further. */
594 
595 	extra = offsetof(struct pcb, pcb_savefpu) + x86_fpu_save_size -
596 	    sizeof (struct pcb);
597 
598 	if (extra > 0)
599 		memcpy(pcb2 + 1, pcb1 + 1, extra);
600 }
601 
602 
603 /*
604  * Write the FP registers.
605  * Buffer has usually come from userspace so should not be trusted.
606  */
607 void
process_write_fpregs_xmm(struct lwp * lwp,const struct fxsave * fpregs)608 process_write_fpregs_xmm(struct lwp *lwp, const struct fxsave *fpregs)
609 {
610 	union savefpu *fpu_save;
611 
612 	fpusave_lwp(lwp, false);
613 	fpu_save = process_fpframe(lwp);
614 
615 	if (i386_use_fxsave) {
616 		memcpy(&fpu_save->sv_xmm, fpregs,
617 		    sizeof fpu_save->sv_xmm);
618 		/* Invalid bits in the mxcsr_mask will cause faults */
619 		fpu_save->sv_xmm.fx_mxcsr_mask &= __INITIAL_MXCSR_MASK__;
620 	} else {
621 		process_xmm_to_s87(fpregs, &fpu_save->sv_87);
622 	}
623 }
624 
625 /* We need to use x87 format for 32bit ptrace */
626 void
process_write_fpregs_s87(struct lwp * lwp,const struct save87 * fpregs)627 process_write_fpregs_s87(struct lwp *lwp, const struct save87 *fpregs)
628 {
629 
630 	if (i386_use_fxsave) {
631 		/* Save so we don't lose the xmm registers */
632 		fpusave_lwp(lwp, true);
633 		process_s87_to_xmm(fpregs, &process_fpframe(lwp)->sv_xmm);
634 	} else {
635 		fpusave_lwp(lwp, false);
636 		memcpy(&process_fpframe(lwp)->sv_87, fpregs,
637 		    sizeof process_fpframe(lwp)->sv_87);
638 	}
639 }
640 
641 /*
642  * Read fpu registers, the buffer is usually copied out to userspace.
643  * Ensure we write to the entire structure.
644  */
645 void
process_read_fpregs_xmm(struct lwp * lwp,struct fxsave * fpregs)646 process_read_fpregs_xmm(struct lwp *lwp, struct fxsave *fpregs)
647 {
648 	fpusave_lwp(lwp, true);
649 
650 	if (i386_use_fxsave) {
651 		memcpy(fpregs, &process_fpframe(lwp)->sv_xmm,
652 		    sizeof process_fpframe(lwp)->sv_xmm);
653 	} else {
654 		/* This usually gets copied to userspace */
655 		memset(fpregs, 0, sizeof *fpregs);
656 		process_s87_to_xmm(&process_fpframe(lwp)->sv_87, fpregs);
657 
658 	}
659 }
660 
661 void
process_read_fpregs_s87(struct lwp * lwp,struct save87 * fpregs)662 process_read_fpregs_s87(struct lwp *lwp, struct save87 *fpregs)
663 {
664 	fpusave_lwp(lwp, true);
665 
666 	if (i386_use_fxsave) {
667 		memset(fpregs, 0, 12);
668 		process_xmm_to_s87(&process_fpframe(lwp)->sv_xmm, fpregs);
669 	} else {
670 		memcpy(fpregs, &process_fpframe(lwp)->sv_87,
671 		    sizeof process_fpframe(lwp)->sv_87);
672 	}
673 }
674