xref: /freebsd/sys/arm64/arm64/vfp.c (revision 1719886f)
1 /*-
2  * Copyright (c) 2015-2016 The FreeBSD Foundation
3  *
4  * This software was developed by Andrew Turner under
5  * sponsorship from the FreeBSD Foundation.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #ifdef VFP
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/limits.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/pcpu.h>
37 #include <sys/proc.h>
38 
39 #include <vm/uma.h>
40 
41 #include <machine/armreg.h>
42 #include <machine/md_var.h>
43 #include <machine/pcb.h>
44 #include <machine/vfp.h>
45 
46 /* Sanity check we can store all the VFP registers */
47 CTASSERT(sizeof(((struct pcb *)0)->pcb_fpustate.vfp_regs) == 16 * 32);
48 
49 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
50     "Kernel contexts for VFP state");
51 
52 struct fpu_kern_ctx {
53 	struct vfpstate	*prev;
54 #define	FPU_KERN_CTX_DUMMY	0x01	/* avoided save for the kern thread */
55 #define	FPU_KERN_CTX_INUSE	0x02
56 	uint32_t	 flags;
57 	struct vfpstate	 state;
58 };
59 
60 static uma_zone_t fpu_save_area_zone;
61 static struct vfpstate *fpu_initialstate;
62 
63 void
64 vfp_enable(void)
65 {
66 	uint32_t cpacr;
67 
68 	cpacr = READ_SPECIALREG(cpacr_el1);
69 	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE;
70 	WRITE_SPECIALREG(cpacr_el1, cpacr);
71 	isb();
72 }
73 
74 void
75 vfp_disable(void)
76 {
77 	uint32_t cpacr;
78 
79 	cpacr = READ_SPECIALREG(cpacr_el1);
80 	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1;
81 	WRITE_SPECIALREG(cpacr_el1, cpacr);
82 	isb();
83 }
84 
85 /*
86  * Called when the thread is dying or when discarding the kernel VFP state.
87  * If the thread was the last to use the VFP unit mark it as unused to tell
88  * the kernel the fp state is unowned. Ensure the VFP unit is off so we get
89  * an exception on the next access.
90  */
91 void
92 vfp_discard(struct thread *td)
93 {
94 
95 #ifdef INVARIANTS
96 	if (td != NULL)
97 		CRITICAL_ASSERT(td);
98 #endif
99 	if (PCPU_GET(fpcurthread) == td)
100 		PCPU_SET(fpcurthread, NULL);
101 
102 	vfp_disable();
103 }
104 
105 void
106 vfp_store(struct vfpstate *state)
107 {
108 	__uint128_t *vfp_state;
109 	uint64_t fpcr, fpsr;
110 
111 	vfp_state = state->vfp_regs;
112 	__asm __volatile(
113 	    ".arch_extension fp\n"
114 	    "mrs	%0, fpcr		\n"
115 	    "mrs	%1, fpsr		\n"
116 	    "stp	q0,  q1,  [%2, #16 *  0]\n"
117 	    "stp	q2,  q3,  [%2, #16 *  2]\n"
118 	    "stp	q4,  q5,  [%2, #16 *  4]\n"
119 	    "stp	q6,  q7,  [%2, #16 *  6]\n"
120 	    "stp	q8,  q9,  [%2, #16 *  8]\n"
121 	    "stp	q10, q11, [%2, #16 * 10]\n"
122 	    "stp	q12, q13, [%2, #16 * 12]\n"
123 	    "stp	q14, q15, [%2, #16 * 14]\n"
124 	    "stp	q16, q17, [%2, #16 * 16]\n"
125 	    "stp	q18, q19, [%2, #16 * 18]\n"
126 	    "stp	q20, q21, [%2, #16 * 20]\n"
127 	    "stp	q22, q23, [%2, #16 * 22]\n"
128 	    "stp	q24, q25, [%2, #16 * 24]\n"
129 	    "stp	q26, q27, [%2, #16 * 26]\n"
130 	    "stp	q28, q29, [%2, #16 * 28]\n"
131 	    "stp	q30, q31, [%2, #16 * 30]\n"
132 	    ".arch_extension nofp\n"
133 	    : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state));
134 
135 	state->vfp_fpcr = fpcr;
136 	state->vfp_fpsr = fpsr;
137 }
138 
139 void
140 vfp_restore(struct vfpstate *state)
141 {
142 	__uint128_t *vfp_state;
143 	uint64_t fpcr, fpsr;
144 
145 	vfp_state = state->vfp_regs;
146 	fpcr = state->vfp_fpcr;
147 	fpsr = state->vfp_fpsr;
148 
149 	__asm __volatile(
150 	    ".arch_extension fp\n"
151 	    "ldp	q0,  q1,  [%2, #16 *  0]\n"
152 	    "ldp	q2,  q3,  [%2, #16 *  2]\n"
153 	    "ldp	q4,  q5,  [%2, #16 *  4]\n"
154 	    "ldp	q6,  q7,  [%2, #16 *  6]\n"
155 	    "ldp	q8,  q9,  [%2, #16 *  8]\n"
156 	    "ldp	q10, q11, [%2, #16 * 10]\n"
157 	    "ldp	q12, q13, [%2, #16 * 12]\n"
158 	    "ldp	q14, q15, [%2, #16 * 14]\n"
159 	    "ldp	q16, q17, [%2, #16 * 16]\n"
160 	    "ldp	q18, q19, [%2, #16 * 18]\n"
161 	    "ldp	q20, q21, [%2, #16 * 20]\n"
162 	    "ldp	q22, q23, [%2, #16 * 22]\n"
163 	    "ldp	q24, q25, [%2, #16 * 24]\n"
164 	    "ldp	q26, q27, [%2, #16 * 26]\n"
165 	    "ldp	q28, q29, [%2, #16 * 28]\n"
166 	    "ldp	q30, q31, [%2, #16 * 30]\n"
167 	    "msr	fpcr, %0		\n"
168 	    "msr	fpsr, %1		\n"
169 	    ".arch_extension nofp\n"
170 	    : : "r"(fpcr), "r"(fpsr), "r"(vfp_state));
171 }
172 
173 static void
174 vfp_save_state_common(struct thread *td, struct pcb *pcb)
175 {
176 	uint32_t cpacr;
177 
178 	critical_enter();
179 	/*
180 	 * Only store the registers if the VFP is enabled,
181 	 * i.e. return if we are trapping on FP access.
182 	 */
183 	cpacr = READ_SPECIALREG(cpacr_el1);
184 	if ((cpacr & CPACR_FPEN_MASK) == CPACR_FPEN_TRAP_NONE) {
185 		KASSERT(PCPU_GET(fpcurthread) == td,
186 		    ("Storing an invalid VFP state"));
187 
188 		vfp_store(pcb->pcb_fpusaved);
189 		dsb(ish);
190 		vfp_disable();
191 	}
192 	critical_exit();
193 }
194 
195 void
196 vfp_save_state(struct thread *td, struct pcb *pcb)
197 {
198 	KASSERT(td != NULL, ("NULL vfp thread"));
199 	KASSERT(pcb != NULL, ("NULL vfp pcb"));
200 	KASSERT(td->td_pcb == pcb, ("Invalid vfp pcb"));
201 
202 	vfp_save_state_common(td, pcb);
203 }
204 
205 void
206 vfp_save_state_savectx(struct pcb *pcb)
207 {
208 	/*
209 	 * savectx() will be called on panic with dumppcb as an argument,
210 	 * dumppcb doesn't have pcb_fpusaved set, so set it to save
211 	 * the VFP registers.
212 	 */
213 	MPASS(pcb->pcb_fpusaved == NULL);
214 	pcb->pcb_fpusaved = &pcb->pcb_fpustate;
215 
216 	vfp_save_state_common(curthread, pcb);
217 }
218 
219 /*
220  * Update the VFP state for a forked process or new thread. The PCB will
221  * have been copied from the old thread.
222  */
223 void
224 vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork)
225 {
226 	struct pcb *newpcb;
227 
228 	newpcb = newtd->td_pcb;
229 
230 	/* Kernel threads start with clean VFP */
231 	if ((oldtd->td_pflags & TDP_KTHREAD) != 0) {
232 		newpcb->pcb_fpflags &=
233 		    ~(PCB_FP_STARTED | PCB_FP_KERN | PCB_FP_NOSAVE);
234 	} else {
235 		MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0);
236 		if (!fork) {
237 			newpcb->pcb_fpflags &= ~PCB_FP_STARTED;
238 		}
239 	}
240 
241 	newpcb->pcb_fpusaved = &newpcb->pcb_fpustate;
242 	newpcb->pcb_vfpcpu = UINT_MAX;
243 }
244 
245 /*
246  * Reset the FP state to avoid leaking state from the parent process across
247  * execve() (and to ensure that we get a consistent floating point environment
248  * in every new process).
249  */
250 void
251 vfp_reset_state(struct thread *td, struct pcb *pcb)
252 {
253 	/* Discard the threads VFP state before resetting it */
254 	critical_enter();
255 	vfp_discard(td);
256 	critical_exit();
257 
258 	/*
259 	 * Clear the thread state. The VFP is disabled and is not the current
260 	 * VFP thread so we won't change any of these on context switch.
261 	 */
262 	bzero(&pcb->pcb_fpustate.vfp_regs, sizeof(pcb->pcb_fpustate.vfp_regs));
263 	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
264 	    ("pcb_fpusaved should point to pcb_fpustate."));
265 	pcb->pcb_fpustate.vfp_fpcr = VFPCR_INIT;
266 	pcb->pcb_fpustate.vfp_fpsr = 0;
267 	pcb->pcb_vfpcpu = UINT_MAX;
268 	pcb->pcb_fpflags = 0;
269 }
270 
271 void
272 vfp_restore_state(void)
273 {
274 	struct pcb *curpcb;
275 	u_int cpu;
276 
277 	critical_enter();
278 
279 	cpu = PCPU_GET(cpuid);
280 	curpcb = curthread->td_pcb;
281 	curpcb->pcb_fpflags |= PCB_FP_STARTED;
282 
283 	vfp_enable();
284 
285 	/*
286 	 * If the previous thread on this cpu to use the VFP was not the
287 	 * current thread, or the current thread last used it on a different
288 	 * cpu we need to restore the old state.
289 	 */
290 	if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) {
291 		vfp_restore(curthread->td_pcb->pcb_fpusaved);
292 		PCPU_SET(fpcurthread, curthread);
293 		curpcb->pcb_vfpcpu = cpu;
294 	}
295 
296 	critical_exit();
297 }
298 
299 void
300 vfp_init_secondary(void)
301 {
302 	uint64_t pfr;
303 
304 	/* Check if there is a vfp unit present */
305 	pfr = READ_SPECIALREG(id_aa64pfr0_el1);
306 	if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE)
307 		return;
308 
309 	/* Disable to be enabled when it's used */
310 	vfp_disable();
311 }
312 
313 static void
314 vfp_init(const void *dummy __unused)
315 {
316 	uint64_t pfr;
317 
318 	/* Check if there is a vfp unit present */
319 	pfr = READ_SPECIALREG(id_aa64pfr0_el1);
320 	if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE)
321 		return;
322 
323 	fpu_save_area_zone = uma_zcreate("VFP_save_area",
324 	    sizeof(struct vfpstate), NULL, NULL, NULL, NULL,
325 	    _Alignof(struct vfpstate) - 1, 0);
326 	fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO);
327 
328 	/* Ensure the VFP is enabled before accessing it in vfp_store */
329 	vfp_enable();
330 	vfp_store(fpu_initialstate);
331 
332 	/* Disable to be enabled when it's used */
333 	vfp_disable();
334 
335 	/* Zero the VFP registers but keep fpcr and fpsr */
336 	bzero(fpu_initialstate->vfp_regs, sizeof(fpu_initialstate->vfp_regs));
337 
338 	thread0.td_pcb->pcb_fpusaved->vfp_fpcr = VFPCR_INIT;
339 }
340 
341 SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
342 
343 struct fpu_kern_ctx *
344 fpu_kern_alloc_ctx(u_int flags)
345 {
346 	struct fpu_kern_ctx *res;
347 	size_t sz;
348 
349 	sz = sizeof(struct fpu_kern_ctx);
350 	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
351 	    M_NOWAIT : M_WAITOK) | M_ZERO);
352 	return (res);
353 }
354 
355 void
356 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
357 {
358 
359 	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx"));
360 	/* XXXAndrew clear the memory ? */
361 	free(ctx, M_FPUKERN_CTX);
362 }
363 
364 void
365 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
366 {
367 	struct pcb *pcb;
368 
369 	pcb = td->td_pcb;
370 	KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
371 	    ("ctx is required when !FPU_KERN_NOCTX"));
372 	KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
373 	    ("using inuse ctx"));
374 	KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0,
375 	    ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state"));
376 
377 	if ((flags & FPU_KERN_NOCTX) != 0) {
378 		critical_enter();
379 		if (curthread == PCPU_GET(fpcurthread)) {
380 			vfp_save_state(curthread, pcb);
381 		}
382 		PCPU_SET(fpcurthread, NULL);
383 
384 		vfp_enable();
385 		pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE |
386 		    PCB_FP_STARTED;
387 		return;
388 	}
389 
390 	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
391 		ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
392 		return;
393 	}
394 	/*
395 	 * Check either we are already using the VFP in the kernel, or
396 	 * the saved state points to the default user space.
397 	 */
398 	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 ||
399 	    pcb->pcb_fpusaved == &pcb->pcb_fpustate,
400 	    ("Mangled pcb_fpusaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_fpusaved, &pcb->pcb_fpustate));
401 	ctx->flags = FPU_KERN_CTX_INUSE;
402 	vfp_save_state(curthread, pcb);
403 	ctx->prev = pcb->pcb_fpusaved;
404 	pcb->pcb_fpusaved = &ctx->state;
405 	pcb->pcb_fpflags |= PCB_FP_KERN;
406 	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
407 
408 	return;
409 }
410 
411 int
412 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
413 {
414 	struct pcb *pcb;
415 
416 	pcb = td->td_pcb;
417 
418 	if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) {
419 		KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
420 		KASSERT(PCPU_GET(fpcurthread) == NULL,
421 		    ("non-NULL fpcurthread for PCB_FP_NOSAVE"));
422 		CRITICAL_ASSERT(td);
423 
424 		vfp_disable();
425 		pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED);
426 		critical_exit();
427 	} else {
428 		KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
429 		    ("FPU context not inuse"));
430 		ctx->flags &= ~FPU_KERN_CTX_INUSE;
431 
432 		if (is_fpu_kern_thread(0) &&
433 		    (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
434 			return (0);
435 		KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
436 		critical_enter();
437 		vfp_discard(td);
438 		critical_exit();
439 		pcb->pcb_fpflags &= ~PCB_FP_STARTED;
440 		pcb->pcb_fpusaved = ctx->prev;
441 	}
442 
443 	if (pcb->pcb_fpusaved == &pcb->pcb_fpustate) {
444 		pcb->pcb_fpflags &= ~PCB_FP_KERN;
445 	} else {
446 		KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0,
447 		    ("unpaired fpu_kern_leave"));
448 	}
449 
450 	return (0);
451 }
452 
453 int
454 fpu_kern_thread(u_int flags __unused)
455 {
456 	struct pcb *pcb = curthread->td_pcb;
457 
458 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
459 	    ("Only kthread may use fpu_kern_thread"));
460 	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
461 	    ("Mangled pcb_fpusaved"));
462 	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0,
463 	    ("Thread already setup for the VFP"));
464 	pcb->pcb_fpflags |= PCB_FP_KERN;
465 	return (0);
466 }
467 
468 int
469 is_fpu_kern_thread(u_int flags __unused)
470 {
471 	struct pcb *curpcb;
472 
473 	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
474 		return (0);
475 	curpcb = curthread->td_pcb;
476 	return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0);
477 }
478 
479 /*
480  * FPU save area alloc/free/init utility routines
481  */
482 struct vfpstate *
483 fpu_save_area_alloc(void)
484 {
485 	return (uma_zalloc(fpu_save_area_zone, M_WAITOK));
486 }
487 
488 void
489 fpu_save_area_free(struct vfpstate *fsa)
490 {
491 	uma_zfree(fpu_save_area_zone, fsa);
492 }
493 
494 void
495 fpu_save_area_reset(struct vfpstate *fsa)
496 {
497 	memcpy(fsa, fpu_initialstate, sizeof(*fsa));
498 }
499 #endif
500