1 /* $OpenBSD: fpu.h,v 1.20 2024/04/14 09:59:04 kettenis Exp $ */
2 /* $NetBSD: fpu.h,v 1.1 2003/04/26 18:39:40 fvdl Exp $ */
3
4 #ifndef _MACHINE_FPU_H_
5 #define _MACHINE_FPU_H_
6
7 #include <sys/types.h>
8
9 /*
10 * If the CPU supports xsave/xrstor then we use them so that we can provide
11 * AVX support. Otherwise we require fxsave/fxrstor, as the SSE registers
12 * are part of the ABI for passing floating point values.
13 * While fxsave/fxrstor only required 16-byte alignment for the save area,
14 * xsave/xrstor requires the save area to have 64-byte alignment.
15 */
16
17 struct fxsave64 {
18 u_int16_t fx_fcw;
19 u_int16_t fx_fsw;
20 u_int8_t fx_ftw;
21 u_int8_t fx_unused1;
22 u_int16_t fx_fop;
23 u_int64_t fx_rip;
24 u_int64_t fx_rdp;
25 u_int32_t fx_mxcsr;
26 u_int32_t fx_mxcsr_mask;
27 u_int64_t fx_st[8][2]; /* 8 normal FP regs */
28 u_int64_t fx_xmm[16][2]; /* 16 SSE2 registers */
29 u_int8_t fx_unused3[96];
30 } __packed;
31
32 struct xstate_hdr {
33 uint64_t xstate_bv;
34 uint64_t xstate_xcomp_bv;
35 uint8_t xstate_rsrv0[8];
36 uint8_t xstate_rsrv[40];
37 } __packed;
38
39 struct savefpu {
40 struct fxsave64 fp_fxsave; /* see above */
41 struct xstate_hdr fp_xstate;
42 u_int64_t fp_ymm[16][2];
43 u_int8_t fp_components[1856]; /* enough for AVX-512 */
44 };
45
46 /*
47 * The i387 defaults to Intel extended precision mode and round to nearest,
48 * with all exceptions masked.
49 */
50 #define __INITIAL_NPXCW__ 0x037f
51 #define __INITIAL_MXCSR__ 0x1f80
52 #define __INITIAL_MXCSR_MASK__ 0xffbf
53
54 #ifdef _KERNEL
55 /*
56 * XXX
57 */
58 struct trapframe;
59 struct cpu_info;
60
61 extern size_t fpu_save_len;
62 extern uint32_t fpu_mxcsr_mask;
63 extern uint64_t xsave_mask;
64 extern int cpu_use_xsaves;
65
66 void fpuinit(struct cpu_info *);
67 int fputrap(int _type);
68 void fpusave(struct savefpu *);
69 void fpusavereset(struct savefpu *);
70 void fpu_kernel_enter(void);
71 void fpu_kernel_exit(void);
72
73 /* pointer to fxsave/xsave/xsaves data with everything reset */
74 #define fpu_cleandata (&proc0.p_addr->u_pcb.pcb_savefpu)
75
76 int xrstor_user(struct savefpu *_addr, uint64_t _mask);
77 void xrstor_kern(struct savefpu *_addr, uint64_t _mask);
78 #define fpureset() \
79 xrstor_kern(fpu_cleandata, xsave_mask)
80 int xsetbv_user(uint32_t _reg, uint64_t _mask);
81
82 #define fninit() __asm("fninit")
83 #define fwait() __asm("fwait")
84 /* should be fxsave64, but where we use this it doesn't matter */
85 #define fxsave(addr) __asm("fxsave %0" : "=m" (*addr))
86 #define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr))
87 #define fldcw(addr) __asm("fldcw %0" : : "m" (*addr))
88
89 static inline void
xsave(struct savefpu * addr,uint64_t mask)90 xsave(struct savefpu *addr, uint64_t mask)
91 {
92 uint32_t lo, hi;
93
94 lo = mask;
95 hi = mask >> 32;
96 __asm volatile("xsave64 %0" : "+m" (*addr) : "a" (lo), "d" (hi));
97 }
98
99 static inline void
xrstors(const struct savefpu * addr,uint64_t mask)100 xrstors(const struct savefpu *addr, uint64_t mask)
101 {
102 uint32_t lo, hi;
103
104 lo = mask;
105 hi = mask >> 32;
106 __asm volatile("xrstors64 %0" : : "m" (*addr), "a" (lo), "d" (hi));
107 }
108
109 #endif
110
111 #endif /* _MACHINE_FPU_H_ */
112