1*664d632dSanton /* $OpenBSD */
2*664d632dSanton
3*664d632dSanton #include <sys/types.h>
4*664d632dSanton #include <sys/ptrace.h>
5*664d632dSanton #include <sys/wait.h>
6*664d632dSanton
7*664d632dSanton #include <err.h>
8*664d632dSanton #include <signal.h>
9*664d632dSanton #include <stdint.h>
10*664d632dSanton #include <stdio.h>
11*664d632dSanton #include <stdlib.h>
12*664d632dSanton #include <string.h>
13*664d632dSanton #include <unistd.h>
14*664d632dSanton
15*664d632dSanton struct cpuid {
16*664d632dSanton uint32_t a, b, c, d;
17*664d632dSanton };
18*664d632dSanton
19*664d632dSanton struct xstate {
20*664d632dSanton struct {
21*664d632dSanton uint8_t buf[1024];
22*664d632dSanton uint32_t size;
23*664d632dSanton } area;
24*664d632dSanton
25*664d632dSanton struct {
26*664d632dSanton uint32_t supported;
27*664d632dSanton uint32_t offset;
28*664d632dSanton uint32_t size;
29*664d632dSanton } components[3];
30*664d632dSanton #define XSTATE_COMPONENT_X87 0
31*664d632dSanton #define XSTATE_COMPONENT_SSE 1
32*664d632dSanton #define XSTATE_COMPONENT_AVX 2
33*664d632dSanton };
34*664d632dSanton
35*664d632dSanton struct u128 {
36*664d632dSanton uint64_t v[2];
37*664d632dSanton } __attribute__((packed));
38*664d632dSanton
39*664d632dSanton struct ymm {
40*664d632dSanton struct u128 xmm;
41*664d632dSanton struct u128 ymm;
42*664d632dSanton } __attribute__((packed));
43*664d632dSanton
44*664d632dSanton extern void ymm_write(void);
45*664d632dSanton extern void ymm_read(struct ymm[16]);
46*664d632dSanton
47*664d632dSanton static inline void
cpuid(uint32_t leaf,uint32_t subleaf,struct cpuid * out)48*664d632dSanton cpuid(uint32_t leaf, uint32_t subleaf, struct cpuid *out)
49*664d632dSanton {
50*664d632dSanton __asm__("cpuid"
51*664d632dSanton : "=a" (out->a), "=b" (out->b), "=c" (out->c), "=d" (out->d)
52*664d632dSanton : "a" (leaf), "c" (subleaf));
53*664d632dSanton }
54*664d632dSanton
55*664d632dSanton static int
xstate_init(struct xstate * xstate,pid_t pid)56*664d632dSanton xstate_init(struct xstate *xstate, pid_t pid)
57*664d632dSanton {
58*664d632dSanton #define CPUID_01_C_XSAVE_MASK (1 << 26)
59*664d632dSanton #define XCR0_XMM_MASK (1 << 1)
60*664d632dSanton #define XCR0_YMM_MASK (1 << 2)
61*664d632dSanton
62*664d632dSanton struct cpuid leaf;
63*664d632dSanton struct ptrace_xstate_info info;
64*664d632dSanton
65*664d632dSanton cpuid(0x1, 0, &leaf);
66*664d632dSanton if ((leaf.c & CPUID_01_C_XSAVE_MASK) == 0) {
67*664d632dSanton printf("SKIPPED: XSAVE not enumerated");
68*664d632dSanton return 1;
69*664d632dSanton }
70*664d632dSanton
71*664d632dSanton memset(xstate, 0, sizeof(*xstate));
72*664d632dSanton
73*664d632dSanton if (ptrace(PT_GETXSTATE_INFO, pid,
74*664d632dSanton (caddr_t)&info, sizeof(info)) == -1)
75*664d632dSanton err(1, "ptrace: PT_GETXSTATE_INFO");
76*664d632dSanton if (info.xsave_len > sizeof(xstate->area.buf))
77*664d632dSanton errx(1, "xstate buffer too small");
78*664d632dSanton xstate->area.size = info.xsave_len;
79*664d632dSanton
80*664d632dSanton if ((info.xsave_mask & XCR0_XMM_MASK) == 0 ||
81*664d632dSanton (info.xsave_mask & XCR0_YMM_MASK) == 0) {
82*664d632dSanton printf("SKIPPED: SSE/AVX disabled in XCR0\n");
83*664d632dSanton return 1;
84*664d632dSanton }
85*664d632dSanton
86*664d632dSanton xstate->components[XSTATE_COMPONENT_SSE].supported = 1;
87*664d632dSanton /* Part of legacy region in XSAVE area. */
88*664d632dSanton xstate->components[XSTATE_COMPONENT_SSE].offset = 160;
89*664d632dSanton xstate->components[XSTATE_COMPONENT_SSE].size = 256;
90*664d632dSanton
91*664d632dSanton cpuid(0xd, XSTATE_COMPONENT_AVX, &leaf);
92*664d632dSanton xstate->components[XSTATE_COMPONENT_AVX].supported = 1;
93*664d632dSanton xstate->components[XSTATE_COMPONENT_AVX].offset = leaf.b;
94*664d632dSanton xstate->components[XSTATE_COMPONENT_AVX].size = leaf.a;
95*664d632dSanton
96*664d632dSanton return 0;
97*664d632dSanton }
98*664d632dSanton
99*664d632dSanton static void
xstate_ymm_read(struct xstate * xstate,int regno,struct ymm * rd)100*664d632dSanton xstate_ymm_read(struct xstate *xstate, int regno, struct ymm *rd)
101*664d632dSanton {
102*664d632dSanton struct u128 *xmm = (struct u128 *)(xstate->area.buf +
103*664d632dSanton xstate->components[XSTATE_COMPONENT_SSE].offset);
104*664d632dSanton struct u128 *ymm = (struct u128 *)(xstate->area.buf +
105*664d632dSanton xstate->components[XSTATE_COMPONENT_AVX].offset);
106*664d632dSanton
107*664d632dSanton rd->xmm = xmm[regno];
108*664d632dSanton rd->ymm = ymm[regno];
109*664d632dSanton }
110*664d632dSanton
111*664d632dSanton static void
xstate_ymm_write(struct xstate * xstate,int regno,struct ymm * wr)112*664d632dSanton xstate_ymm_write(struct xstate *xstate, int regno, struct ymm *wr)
113*664d632dSanton {
114*664d632dSanton struct u128 *xmm = (struct u128 *)(xstate->area.buf +
115*664d632dSanton xstate->components[XSTATE_COMPONENT_SSE].offset);
116*664d632dSanton struct u128 *ymm = (struct u128 *)(xstate->area.buf +
117*664d632dSanton xstate->components[XSTATE_COMPONENT_AVX].offset);
118*664d632dSanton
119*664d632dSanton xmm[regno] = wr->xmm;
120*664d632dSanton ymm[regno] = wr->ymm;
121*664d632dSanton }
122*664d632dSanton
123*664d632dSanton static void
wait_until_stopped(pid_t pid)124*664d632dSanton wait_until_stopped(pid_t pid)
125*664d632dSanton {
126*664d632dSanton int status;
127*664d632dSanton
128*664d632dSanton if (waitpid(pid, &status, 0) == -1)
129*664d632dSanton err(1, "waitpid");
130*664d632dSanton if (!WIFSTOPPED(status))
131*664d632dSanton errx(1, "expected traced process to be stopped");
132*664d632dSanton }
133*664d632dSanton
134*664d632dSanton static int
check_ymm(const struct ymm ymm[16])135*664d632dSanton check_ymm(const struct ymm ymm[16])
136*664d632dSanton {
137*664d632dSanton int error = 0;
138*664d632dSanton int i;
139*664d632dSanton
140*664d632dSanton for (i = 0; i < 16; i++) {
141*664d632dSanton struct ymm exp;
142*664d632dSanton
143*664d632dSanton memset(&exp, (i << 4) | i, 32);
144*664d632dSanton if (memcmp(&exp, &ymm[i], 32) == 0)
145*664d632dSanton continue;
146*664d632dSanton
147*664d632dSanton warnx("ymm%d: expected %016llx%016llx%016llx%016llx,"
148*664d632dSanton " got %016llx%016llx%016llx%016llx", i,
149*664d632dSanton exp.ymm.v[1], exp.ymm.v[0],
150*664d632dSanton exp.xmm.v[1], exp.xmm.v[0],
151*664d632dSanton ymm[i].ymm.v[1], ymm[i].ymm.v[0],
152*664d632dSanton ymm[i].xmm.v[1], ymm[i].xmm.v[0]);
153*664d632dSanton error = 1;
154*664d632dSanton }
155*664d632dSanton
156*664d632dSanton return error;
157*664d632dSanton }
158*664d632dSanton
159*664d632dSanton static int
test_ymm_get(struct xstate * xstate)160*664d632dSanton test_ymm_get(struct xstate *xstate)
161*664d632dSanton {
162*664d632dSanton struct ymm ymm[16];
163*664d632dSanton pid_t pid;
164*664d632dSanton int i;
165*664d632dSanton
166*664d632dSanton pid = fork();
167*664d632dSanton if (pid == 0) {
168*664d632dSanton ptrace(PT_TRACE_ME, 0, 0, 0);
169*664d632dSanton ymm_write();
170*664d632dSanton raise(SIGSTOP);
171*664d632dSanton /* UNREACHABLE */
172*664d632dSanton }
173*664d632dSanton
174*664d632dSanton wait_until_stopped(pid);
175*664d632dSanton
176*664d632dSanton if (xstate_init(xstate, pid))
177*664d632dSanton return 0;
178*664d632dSanton
179*664d632dSanton if (ptrace(PT_GETXSTATE, pid,
180*664d632dSanton xstate->area.buf, xstate->area.size) == -1)
181*664d632dSanton err(1, "ptrace: PT_GETXSTATE");
182*664d632dSanton for (i = 0; i < 16; i++)
183*664d632dSanton xstate_ymm_read(xstate, i, &ymm[i]);
184*664d632dSanton return check_ymm(ymm);
185*664d632dSanton }
186*664d632dSanton
187*664d632dSanton static int
test_ymm_set(struct xstate * xstate)188*664d632dSanton test_ymm_set(struct xstate *xstate)
189*664d632dSanton {
190*664d632dSanton pid_t pid;
191*664d632dSanton int i, status;
192*664d632dSanton
193*664d632dSanton pid = fork();
194*664d632dSanton if (pid == 0) {
195*664d632dSanton struct ymm ymm[16];
196*664d632dSanton
197*664d632dSanton ptrace(PT_TRACE_ME, 0, 0, 0);
198*664d632dSanton raise(SIGSTOP);
199*664d632dSanton ymm_read(ymm);
200*664d632dSanton _exit(check_ymm(ymm));
201*664d632dSanton }
202*664d632dSanton
203*664d632dSanton wait_until_stopped(pid);
204*664d632dSanton
205*664d632dSanton if (xstate_init(xstate, pid))
206*664d632dSanton return 0;
207*664d632dSanton
208*664d632dSanton if (ptrace(PT_GETXSTATE, pid,
209*664d632dSanton xstate->area.buf, xstate->area.size) == -1)
210*664d632dSanton err(1, "ptrace: PT_GETXSTATE");
211*664d632dSanton for (i = 0; i < 16; i++) {
212*664d632dSanton struct ymm ymm;
213*664d632dSanton
214*664d632dSanton memset(&ymm, (i << 4) | i, 32);
215*664d632dSanton xstate_ymm_write(xstate, i, &ymm);
216*664d632dSanton }
217*664d632dSanton
218*664d632dSanton if (ptrace(PT_SETXSTATE, pid,
219*664d632dSanton xstate->area.buf, xstate->area.size) == -1)
220*664d632dSanton err(1, "ptrace: PT_SETXSTATE");
221*664d632dSanton
222*664d632dSanton if (ptrace(PT_CONTINUE, pid, (caddr_t)1, 0) == -1)
223*664d632dSanton err(1, "ptrace: PT_CONTINUE");
224*664d632dSanton if (waitpid(pid, &status, 0) == -1)
225*664d632dSanton err(1, "waitpid");
226*664d632dSanton return WIFEXITED(status) && WEXITSTATUS(status) == 0 ? 0 : 1;
227*664d632dSanton }
228*664d632dSanton
229*664d632dSanton static void __attribute__((noreturn))
usage(void)230*664d632dSanton usage(void)
231*664d632dSanton {
232*664d632dSanton fprintf(stderr, "usage: xstate test-case\n");
233*664d632dSanton exit(1);
234*664d632dSanton }
235*664d632dSanton
236*664d632dSanton int
main(int argc,char * argv[])237*664d632dSanton main(int argc, char *argv[])
238*664d632dSanton {
239*664d632dSanton struct {
240*664d632dSanton const char *name;
241*664d632dSanton int (*test)(struct xstate *);
242*664d632dSanton } tests[] = {
243*664d632dSanton { "xstate-ymm-get", test_ymm_get },
244*664d632dSanton { "xstate-ymm-set", test_ymm_set },
245*664d632dSanton };
246*664d632dSanton struct xstate xstate;
247*664d632dSanton unsigned int i;
248*664d632dSanton
249*664d632dSanton if (argc != 2)
250*664d632dSanton usage();
251*664d632dSanton
252*664d632dSanton for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
253*664d632dSanton if (strcmp(argv[1], tests[i].name) == 0)
254*664d632dSanton return tests[i].test(&xstate);
255*664d632dSanton }
256*664d632dSanton
257*664d632dSanton warnx("no such test case");
258*664d632dSanton return 1;
259*664d632dSanton }
260