1 /* $NetBSD: fpu.c,v 1.10 2014/11/27 14:22:09 uebayasi Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc. All
5 * rights reserved.
6 *
7 * This code is derived from software developed for The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1991 The Regents of the University of California.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)npx.c 7.2 (Berkeley) 5/12/91
61 */
62
63 /*-
64 * Copyright (c) 1994, 1995, 1998 Charles M. Hannum. All rights reserved.
65 * Copyright (c) 1990 William Jolitz.
66 *
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
69 * are met:
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 * 3. All advertising materials mentioning features or use of this software
76 * must display the following acknowledgement:
77 * This product includes software developed by the University of
78 * California, Berkeley and its contributors.
79 * 4. Neither the name of the University nor the names of its contributors
80 * may be used to endorse or promote products derived from this software
81 * without specific prior written permission.
82 *
83 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93 * SUCH DAMAGE.
94 *
95 * @(#)npx.c 7.2 (Berkeley) 5/12/91
96 */
97
98 /*
99 * XXXfvdl update copyright notice. this started out as a stripped isa/npx.c
100 */
101
102 #include <sys/cdefs.h>
103 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.10 2014/11/27 14:22:09 uebayasi Exp $");
104
105 #include "opt_multiprocessor.h"
106
107 #include <sys/param.h>
108 #include <sys/systm.h>
109 #include <sys/conf.h>
110 #include <sys/cpu.h>
111 #include <sys/file.h>
112 #include <sys/proc.h>
113 #include <sys/kernel.h>
114
115 #include <machine/cpu.h>
116 #include <machine/intr.h>
117 #include <machine/cpufunc.h>
118 #include <machine/pcb.h>
119 #include <machine/trap.h>
120 #include <machine/specialreg.h>
121 #include <x86/cpu.h>
122 #include <x86/fpu.h>
123
124 /* Check some duplicate definitions match */
125 #include <machine/fenv.h>
126
127 #ifdef XEN
128 #define clts() HYPERVISOR_fpu_taskswitch(0)
129 #define stts() HYPERVISOR_fpu_taskswitch(1)
130 #endif
131
132 static inline union savefpu *
process_fpframe(struct lwp * lwp)133 process_fpframe(struct lwp *lwp)
134 {
135 struct pcb *pcb = lwp_getpcb(lwp);
136
137 return &pcb->pcb_savefpu;
138 }
139
140 /*
141 * We do lazy initialization and switching using the TS bit in cr0 and the
142 * MDL_USEDFPU bit in mdlwp.
143 *
144 * DNA exceptions are handled like this:
145 *
146 * 1) If there is no FPU, send SIGILL.
147 * 2) If someone else has used the FPU, save its state into that lwp's PCB.
148 * 3a) If MDL_USEDFPU is not set, set it and initialize the FPU.
149 * 3b) Otherwise, reload the lwp's previous FPU state.
150 *
151 * When a lwp is created or exec()s, its saved cr0 image has the TS bit
152 * set and the MDL_USEDFPU bit clear. The MDL_USEDFPU bit is set when the
153 * lwp first gets a DNA and the FPU is initialized. The TS bit is turned
154 * off when the FPU is used, and turned on again later when the lwp's FPU
155 * state is saved.
156 */
157
158 /*
159 * The following table is used to ensure that the FPE_... value
160 * that is passed as a trapcode to the signal handler of the user
161 * process does not have more than one bit set.
162 *
163 * Multiple bits may be set if SSE simd instructions generate errors
164 * on more than one value or if the user process modifies the control
165 * word while a status word bit is already set (which this is a sign
166 * of bad coding).
167 * We have no choise than to narrow them down to one bit, since we must
168 * not send a trapcode that is not exactly one of the FPE_ macros.
169 *
170 * The mechanism has a static table with 127 entries. Each combination
171 * of the 7 FPU status word exception bits directly translates to a
172 * position in this table, where a single FPE_... value is stored.
173 * This FPE_... value stored there is considered the "most important"
174 * of the exception bits and will be sent as the signal code. The
175 * precedence of the bits is based upon Intel Document "Numerical
176 * Applications", Chapter "Special Computational Situations".
177 *
178 * The code to choose one of these values does these steps:
179 * 1) Throw away status word bits that cannot be masked.
180 * 2) Throw away the bits currently masked in the control word,
181 * assuming the user isn't interested in them anymore.
182 * 3) Reinsert status word bit 7 (stack fault) if it is set, which
183 * cannot be masked but must be presered.
184 * 'Stack fault' is a sub-class of 'invalid operation'.
185 * 4) Use the remaining bits to point into the trapcode table.
186 *
187 * The 6 maskable bits in order of their preference, as stated in the
188 * above referenced Intel manual:
189 * 1 Invalid operation (FP_X_INV)
190 * 1a Stack underflow
191 * 1b Stack overflow
192 * 1c Operand of unsupported format
193 * 1d SNaN operand.
194 * 2 QNaN operand (not an exception, irrelavant here)
195 * 3 Any other invalid-operation not mentioned above or zero divide
196 * (FP_X_INV, FP_X_DZ)
197 * 4 Denormal operand (FP_X_DNML)
198 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
199 * 6 Inexact result (FP_X_IMP)
200 *
201 * NB: the above seems to mix up the mxscr error bits and the x87 ones.
202 * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
203 * status.
204 *
205 * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
206 * are swapped).
207 *
208 * This table assumes that any stack fault is cleared - so that an INVOP
209 * fault will only be reported as FLTSUB once.
210 * This might not happen if the mask is being changed.
211 */
212 #define FPE_xxx1(f) (f & EN_SW_INVOP \
213 ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
214 : f & EN_SW_ZERODIV ? FPE_FLTDIV \
215 : f & EN_SW_DENORM ? FPE_FLTUND \
216 : f & EN_SW_OVERFLOW ? FPE_FLTOVF \
217 : f & EN_SW_UNDERFLOW ? FPE_FLTUND \
218 : f & EN_SW_PRECLOSS ? FPE_FLTRES \
219 : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
220 #define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1))
221 #define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2))
222 #define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4))
223 #define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8))
224 #define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16))
225 static const uint8_t fpetable[128] = {
226 FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
227 };
228 #undef FPE_xxx1
229 #undef FPE_xxx2
230 #undef FPE_xxx4
231 #undef FPE_xxx8
232 #undef FPE_xxx16
233 #undef FPE_xxx32
234
235 /*
236 * Init the FPU.
237 *
238 * This might not be structly necessary since it will be initialised
239 * for each process. However it does no harm.
240 */
241 void
fpuinit(struct cpu_info * ci)242 fpuinit(struct cpu_info *ci)
243 {
244 if (!i386_fpu_present)
245 return;
246
247 clts();
248 fninit();
249 stts();
250 }
251
252 void
fpu_set_default_cw(struct lwp * lwp,unsigned int x87_cw)253 fpu_set_default_cw(struct lwp *lwp, unsigned int x87_cw)
254 {
255 union savefpu *fpu_save = process_fpframe(lwp);
256
257 if (i386_use_fxsave)
258 fpu_save->sv_xmm.fx_cw = x87_cw;
259 else
260 fpu_save->sv_87.s87_cw = x87_cw;
261 fpu_save->sv_os.fxo_dflt_cw = x87_cw;
262 }
263
264 static void
send_sigill(void * rip)265 send_sigill(void *rip)
266 {
267 /* No fpu (486SX) - send SIGILL */
268 ksiginfo_t ksi;
269
270 x86_enable_intr();
271 KSI_INIT_TRAP(&ksi);
272 ksi.ksi_signo = SIGILL;
273 ksi.ksi_addr = rip;
274 (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
275 return;
276 }
277
278 /*
279 * This is a synchronous trap on either an x87 instruction (due to an
280 * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc
281 * instruction due to an error on the instruction itself.
282 *
283 * If trap actually generates a signal, then the fpu state is saved
284 * and then copied onto the process's user-stack, and then recovered
285 * from there when the signal returns (or from the jmp_buf if the
286 * signal handler exits with a longjmp()).
287 *
288 * All this code need to do is save the reason for the trap.
289 * For x87 interrupts the status word bits need clearing to stop the
290 * trap re-occurring.
291 *
292 * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
293 *
294 * Since this is a synchronous trap, the fpu registers must still belong
295 * to the correct process (we trap through an interrupt gate so that
296 * interrupts are disabled on entry).
297 * Interrupts (these better include IPIs) are left disabled until we've
298 * finished looking at fpu registers.
299 *
300 * For amd64 the calling code (in amd64_trap.S) has already checked
301 * that we trapped from usermode.
302 */
303
304 void
fputrap(struct trapframe * frame)305 fputrap(struct trapframe *frame)
306 {
307 uint32_t statbits;
308 ksiginfo_t ksi;
309
310 if (!USERMODE(frame->tf_cs, frame->tf_eflags))
311 panic("fpu trap from kernel, trapframe %p\n", frame);
312
313 if (i386_fpu_present == 0) {
314 send_sigill((void *)X86_TF_RIP(frame));
315 return;
316 }
317
318 /*
319 * At this point, fpcurlwp should be curlwp. If it wasn't, the TS bit
320 * should be set, and we should have gotten a DNA exception.
321 */
322 KASSERT(curcpu()->ci_fpcurlwp == curlwp);
323
324 if (frame->tf_trapno == T_XMM) {
325 uint32_t mxcsr;
326 x86_stmxcsr(&mxcsr);
327 statbits = mxcsr;
328 /* Clear the sticky status bits */
329 mxcsr &= ~0x3f;
330 x86_ldmxcsr(&mxcsr);
331
332 /* Remove masked interrupts and non-status bits */
333 statbits &= ~(statbits >> 7) & 0x3f;
334 /* Mark this is an XMM status */
335 statbits |= 0x10000;
336 } else {
337 uint16_t cw, sw;
338 /* Get current control and status words */
339 fnstcw(&cw);
340 fnstsw(&sw);
341 /* Clear any pending exceptions from status word */
342 fnclex();
343
344 /* Removed masked interrupts */
345 statbits = sw & ~(cw & 0x3f);
346 }
347
348 /* Doesn't matter now if we get pre-empted */
349 x86_enable_intr();
350
351 KSI_INIT_TRAP(&ksi);
352 ksi.ksi_signo = SIGFPE;
353 ksi.ksi_addr = (void *)X86_TF_RIP(frame);
354 ksi.ksi_code = fpetable[statbits & 0x7f];
355 ksi.ksi_trap = statbits;
356 (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
357 }
358
359 /*
360 * Implement device not available (DNA) exception
361 *
362 * If we were the last lwp to use the FPU, we can simply return.
363 * Otherwise, we save the previous state, if necessary, and restore
364 * our last saved state.
365 *
366 * Called directly from the trap 0x13 entry with interrupts still disabled.
367 */
368 void
fpudna(struct trapframe * frame)369 fpudna(struct trapframe *frame)
370 {
371 struct cpu_info *ci;
372 struct lwp *l, *fl;
373 struct pcb *pcb;
374 int s;
375
376 if (!USERMODE(frame->tf_cs, frame->tf_eflags))
377 panic("fpudna from kernel, ip %p, trapframe %p\n",
378 (void *)X86_TF_RIP(frame), frame);
379
380 if (i386_fpu_present == 0) {
381 send_sigill((void *)X86_TF_RIP(frame));
382 return;
383 }
384
385 ci = curcpu();
386
387 /* Save soft spl level - interrupts are hard disabled */
388 s = splhigh();
389
390 /* Save state on current CPU. */
391 l = ci->ci_curlwp;
392 pcb = lwp_getpcb(l);
393 fl = ci->ci_fpcurlwp;
394 if (fl != NULL) {
395 /*
396 * It seems we can get here on Xen even if we didn't
397 * switch lwp. In this case do nothing
398 */
399 if (fl == l) {
400 KASSERT(pcb->pcb_fpcpu == ci);
401 clts();
402 splx(s);
403 return;
404 }
405 fpusave_cpu(true);
406 }
407
408 /* Save our state if on a remote CPU. */
409 if (pcb->pcb_fpcpu != NULL) {
410 /* Explicitly disable preemption before dropping spl. */
411 kpreempt_disable();
412 splx(s);
413
414 /* Actually enable interrupts */
415 x86_enable_intr();
416
417 fpusave_lwp(l, true);
418 KASSERT(pcb->pcb_fpcpu == NULL);
419 s = splhigh();
420 kpreempt_enable();
421 }
422
423 /*
424 * Restore state on this CPU, or initialize. Ensure that
425 * the entire update is atomic with respect to FPU-sync IPIs.
426 */
427 clts();
428 ci->ci_fpcurlwp = l;
429 pcb->pcb_fpcpu = ci;
430
431 if (i386_use_fxsave) {
432 if (x86_xsave_features != 0) {
433 xrstor(&pcb->pcb_savefpu, x86_xsave_features);
434 } else {
435 /*
436 * AMD FPU's do not restore FIP, FDP, and FOP on
437 * fxrstor, leaking other process's execution history.
438 * Clear them manually by loading a zero.
439 *
440 * Clear the ES bit in the x87 status word if it is
441 * currently set, in order to avoid causing a fault
442 * in the upcoming load.
443 */
444 if (fngetsw() & 0x80)
445 fnclex();
446 fldummy();
447
448 fxrstor(&pcb->pcb_savefpu);
449 }
450 } else {
451 frstor(&pcb->pcb_savefpu);
452 }
453
454 KASSERT(ci == curcpu());
455 splx(s);
456 }
457
458 /*
459 * Save current CPU's FPU state. Must be called at IPL_HIGH.
460 */
461 void
fpusave_cpu(bool save)462 fpusave_cpu(bool save)
463 {
464 struct cpu_info *ci;
465 struct pcb *pcb;
466 struct lwp *l;
467
468 KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
469
470 ci = curcpu();
471 l = ci->ci_fpcurlwp;
472 if (l == NULL) {
473 return;
474 }
475 pcb = lwp_getpcb(l);
476
477 if (save) {
478 clts();
479 if (i386_use_fxsave) {
480 if (x86_xsave_features != 0)
481 xsave(&pcb->pcb_savefpu, x86_xsave_features);
482 else
483 fxsave(&pcb->pcb_savefpu);
484 } else {
485 fnsave(&pcb->pcb_savefpu);
486 }
487 }
488
489 stts();
490 pcb->pcb_fpcpu = NULL;
491 ci->ci_fpcurlwp = NULL;
492 }
493
494 /*
495 * Save l's FPU state, which may be on this processor or another processor.
496 * It may take some time, so we avoid disabling preemption where possible.
497 * Caller must know that the target LWP is stopped, otherwise this routine
498 * may race against it.
499 */
500 void
fpusave_lwp(struct lwp * l,bool save)501 fpusave_lwp(struct lwp *l, bool save)
502 {
503 struct pcb *pcb = lwp_getpcb(l);
504 struct cpu_info *oci;
505 int s, spins, ticks;
506
507 spins = 0;
508 ticks = hardclock_ticks;
509 for (;;) {
510 s = splhigh();
511 oci = pcb->pcb_fpcpu;
512 if (oci == NULL) {
513 splx(s);
514 break;
515 }
516 if (oci == curcpu()) {
517 KASSERT(oci->ci_fpcurlwp == l);
518 fpusave_cpu(save);
519 splx(s);
520 break;
521 }
522 splx(s);
523 #ifdef XEN
524 if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
525 panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
526 cpu_name(oci));
527 }
528 #else /* XEN */
529 x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
530 #endif
531 while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
532 x86_pause();
533 spins++;
534 }
535 if (spins > 100000000) {
536 panic("fpusave_lwp: did not");
537 }
538 }
539 }
540
541 /*
542 * exec needs to clear the fpu save area to avoid leaking info from the
543 * old process to userspace.
544 * We must also (later) load these values into the fpu - otherwise the process
545 * will see another processes fpu registers.
546 */
547 void
fpu_save_area_clear(struct lwp * lwp,unsigned int x87_cw)548 fpu_save_area_clear(struct lwp *lwp, unsigned int x87_cw)
549 {
550 union savefpu *fpu_save;
551
552 fpusave_lwp(lwp, false);
553
554 fpu_save = process_fpframe(lwp);
555
556 if (i386_use_fxsave) {
557 memset(&fpu_save->sv_xmm, 0, sizeof fpu_save->sv_xmm);
558 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
559 fpu_save->sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
560 fpu_save->sv_xmm.fx_cw = x87_cw;
561 } else {
562 memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
563 fpu_save->sv_87.s87_tw = 0xffff;
564 fpu_save->sv_87.s87_cw = x87_cw;
565 }
566 fpu_save->sv_os.fxo_dflt_cw = x87_cw;
567 }
568
569 /* For signal handlers the register values don't matter */
570 void
fpu_save_area_reset(struct lwp * lwp)571 fpu_save_area_reset(struct lwp *lwp)
572 {
573 union savefpu *fpu_save = process_fpframe(lwp);
574
575 if (i386_use_fxsave) {
576 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
577 fpu_save->sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
578 fpu_save->sv_xmm.fx_tw = 0;
579 fpu_save->sv_xmm.fx_cw = fpu_save->sv_os.fxo_dflt_cw;
580 } else {
581 fpu_save->sv_87.s87_tw = 0xffff;
582 fpu_save->sv_87.s87_cw = fpu_save->sv_os.fxo_dflt_cw;
583 }
584 }
585
586 /* During fork the xsave data needs to be copied */
587 void
fpu_save_area_fork(struct pcb * pcb2,const struct pcb * pcb1)588 fpu_save_area_fork(struct pcb *pcb2, const struct pcb *pcb1)
589 {
590 ssize_t extra;
591
592 /* The pcb itself has been copied, but the xsave area
593 * extends further. */
594
595 extra = offsetof(struct pcb, pcb_savefpu) + x86_fpu_save_size -
596 sizeof (struct pcb);
597
598 if (extra > 0)
599 memcpy(pcb2 + 1, pcb1 + 1, extra);
600 }
601
602
603 /*
604 * Write the FP registers.
605 * Buffer has usually come from userspace so should not be trusted.
606 */
607 void
process_write_fpregs_xmm(struct lwp * lwp,const struct fxsave * fpregs)608 process_write_fpregs_xmm(struct lwp *lwp, const struct fxsave *fpregs)
609 {
610 union savefpu *fpu_save;
611
612 fpusave_lwp(lwp, false);
613 fpu_save = process_fpframe(lwp);
614
615 if (i386_use_fxsave) {
616 memcpy(&fpu_save->sv_xmm, fpregs,
617 sizeof fpu_save->sv_xmm);
618 /* Invalid bits in the mxcsr_mask will cause faults */
619 fpu_save->sv_xmm.fx_mxcsr_mask &= __INITIAL_MXCSR_MASK__;
620 } else {
621 process_xmm_to_s87(fpregs, &fpu_save->sv_87);
622 }
623 }
624
625 /* We need to use x87 format for 32bit ptrace */
626 void
process_write_fpregs_s87(struct lwp * lwp,const struct save87 * fpregs)627 process_write_fpregs_s87(struct lwp *lwp, const struct save87 *fpregs)
628 {
629
630 if (i386_use_fxsave) {
631 /* Save so we don't lose the xmm registers */
632 fpusave_lwp(lwp, true);
633 process_s87_to_xmm(fpregs, &process_fpframe(lwp)->sv_xmm);
634 } else {
635 fpusave_lwp(lwp, false);
636 memcpy(&process_fpframe(lwp)->sv_87, fpregs,
637 sizeof process_fpframe(lwp)->sv_87);
638 }
639 }
640
641 /*
642 * Read fpu registers, the buffer is usually copied out to userspace.
643 * Ensure we write to the entire structure.
644 */
645 void
process_read_fpregs_xmm(struct lwp * lwp,struct fxsave * fpregs)646 process_read_fpregs_xmm(struct lwp *lwp, struct fxsave *fpregs)
647 {
648 fpusave_lwp(lwp, true);
649
650 if (i386_use_fxsave) {
651 memcpy(fpregs, &process_fpframe(lwp)->sv_xmm,
652 sizeof process_fpframe(lwp)->sv_xmm);
653 } else {
654 /* This usually gets copied to userspace */
655 memset(fpregs, 0, sizeof *fpregs);
656 process_s87_to_xmm(&process_fpframe(lwp)->sv_87, fpregs);
657
658 }
659 }
660
661 void
process_read_fpregs_s87(struct lwp * lwp,struct save87 * fpregs)662 process_read_fpregs_s87(struct lwp *lwp, struct save87 *fpregs)
663 {
664 fpusave_lwp(lwp, true);
665
666 if (i386_use_fxsave) {
667 memset(fpregs, 0, 12);
668 process_xmm_to_s87(&process_fpframe(lwp)->sv_xmm, fpregs);
669 } else {
670 memcpy(fpregs, &process_fpframe(lwp)->sv_87,
671 sizeof process_fpframe(lwp)->sv_87);
672 }
673 }
674