xref: /freebsd/sys/arm/arm/vfp.c (revision f552d7ad)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Ian Lepore <ian@freebsd.org>
5  * Copyright (c) 2012 Mark Tinguely
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/malloc.h>
36 #include <sys/proc.h>
37 
38 #include <machine/armreg.h>
39 #include <machine/elf.h>
40 #include <machine/frame.h>
41 #include <machine/md_var.h>
42 #include <machine/pcb.h>
43 #include <machine/undefined.h>
44 #include <machine/vfp.h>
45 
46 /* function prototypes */
47 static int vfp_bounce(u_int, u_int, struct trapframe *, int);
48 static void vfp_restore(struct vfp_state *);
49 
50 extern int vfp_exists;
51 static struct undefined_handler vfp10_uh, vfp11_uh;
52 /* If true the VFP unit has 32 double registers, otherwise it has 16 */
53 static int is_d32;
54 
55 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
56     "Kernel contexts for VFP state");
57 
58 struct fpu_kern_ctx {
59 	struct vfp_state	*prev;
60 #define	FPU_KERN_CTX_DUMMY	0x01	/* avoided save for the kern thread */
61 #define	FPU_KERN_CTX_INUSE	0x02
62 	uint32_t	 flags;
63 	struct vfp_state	 state;
64 };
65 
66 /*
67  * About .fpu directives in this file...
68  *
69  * We should need simply .fpu vfpv3, but clang 3.5 has a quirk where setting
70  * vfpv3 doesn't imply that vfp2 features are also available -- both have to be
71  * explicitly set to get all the features of both.  This is probably a bug in
72  * clang, so it may get fixed and require changes here some day.  Other changes
73  * are probably coming in clang too, because there is email and open PRs
74  * indicating they want to completely disable the ability to use .fpu and
75  * similar directives in inline asm.  That would be catastrophic for us,
76  * hopefully they come to their senses.  There was also some discusion of a new
77  * syntax such as .push fpu=vfpv3; ...; .pop fpu; and that would be ideal for
78  * us, better than what we have now really.
79  *
80  * For gcc, each .fpu directive completely overrides the prior directive, unlike
81  * with clang, but luckily on gcc saying v3 implies all the v2 features as well.
82  */
83 
84 #define fmxr(reg, val) \
85     __asm __volatile("	.fpu vfpv2\n .fpu vfpv3\n"			\
86 		     "	vmsr	" __STRING(reg) ", %0"   :: "r"(val));
87 
88 #define fmrx(reg) \
89 ({ u_int val = 0;\
90     __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n"			\
91 		     "	vmrs	%0, " __STRING(reg) : "=r"(val));	\
92     val; \
93 })
94 
95 static u_int
96 get_coprocessorACR(void)
97 {
98 	u_int val;
99 	__asm __volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (val) : : "cc");
100 	return val;
101 }
102 
103 static void
104 set_coprocessorACR(u_int val)
105 {
106 	__asm __volatile("mcr p15, 0, %0, c1, c0, 2\n\t"
107 	 : : "r" (val) : "cc");
108 	isb();
109 }
110 
111 static void
112 vfp_enable(void)
113 {
114 	uint32_t fpexc;
115 
116 	fpexc = fmrx(fpexc);
117 	fmxr(fpexc, fpexc | VFPEXC_EN);
118 	isb();
119 }
120 
121 static void
122 vfp_disable(void)
123 {
124 	uint32_t fpexc;
125 
126 	fpexc = fmrx(fpexc);
127 	fmxr(fpexc, fpexc & ~VFPEXC_EN);
128 	isb();
129 }
130 
131 	/* called for each cpu */
132 void
133 vfp_init(void)
134 {
135 	u_int fpsid, tmp;
136 	u_int coproc, vfp_arch;
137 
138 	coproc = get_coprocessorACR();
139 	coproc |= COPROC10 | COPROC11;
140 	set_coprocessorACR(coproc);
141 
142 	fpsid = fmrx(fpsid);		/* read the vfp system id */
143 
144 	if (!(fpsid & VFPSID_HARDSOFT_IMP)) {
145 		vfp_exists = 1;
146 		is_d32 = 0;
147 		PCPU_SET(vfpsid, fpsid);	/* save the fpsid */
148 		elf_hwcap |= HWCAP_VFP;
149 
150 		vfp_arch =
151 		    (fpsid & VFPSID_SUBVERSION2_MASK) >> VFPSID_SUBVERSION_OFF;
152 
153 		if (vfp_arch >= VFP_ARCH3) {
154 			tmp = fmrx(mvfr0);
155 			PCPU_SET(vfpmvfr0, tmp);
156 			elf_hwcap |= HWCAP_VFPv3;
157 
158 			if ((tmp & VMVFR0_RB_MASK) == 2) {
159 				elf_hwcap |= HWCAP_VFPD32;
160 				is_d32 = 1;
161 			} else
162 				elf_hwcap |= HWCAP_VFPv3D16;
163 
164 			tmp = fmrx(mvfr1);
165 			PCPU_SET(vfpmvfr1, tmp);
166 
167 			if (PCPU_GET(cpuid) == 0) {
168 				if ((tmp & VMVFR1_FZ_MASK) == 0x1) {
169 					/* Denormals arithmetic support */
170 					initial_fpscr &= ~VFPSCR_FZ;
171 					thread0.td_pcb->pcb_vfpstate.fpscr =
172 					    initial_fpscr;
173 				}
174 			}
175 
176 			if ((tmp & VMVFR1_LS_MASK) >> VMVFR1_LS_OFF == 1 &&
177 			    (tmp & VMVFR1_I_MASK) >> VMVFR1_I_OFF == 1 &&
178 			    (tmp & VMVFR1_SP_MASK) >> VMVFR1_SP_OFF == 1)
179 				elf_hwcap |= HWCAP_NEON;
180 			if ((tmp & VMVFR1_FMAC_MASK) >>  VMVFR1_FMAC_OFF == 1)
181 				elf_hwcap |= HWCAP_VFPv4;
182 		}
183 
184 		vfp_disable();
185 
186 		/* initialize the coprocess 10 and 11 calls
187 		 * These are called to restore the registers and enable
188 		 * the VFP hardware.
189 		 */
190 		if (vfp10_uh.uh_handler == NULL) {
191 			vfp10_uh.uh_handler = vfp_bounce;
192 			vfp11_uh.uh_handler = vfp_bounce;
193 			install_coproc_handler_static(10, &vfp10_uh);
194 			install_coproc_handler_static(11, &vfp11_uh);
195 		}
196 	}
197 }
198 
199 SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
200 
201 /*
202  * Start the VFP unit, restore the VFP registers from the PCB and retry
203  * the instruction.
204  */
205 static int
206 vfp_bounce(u_int addr, u_int insn, struct trapframe *frame, int code)
207 {
208 	u_int cpu, fpexc;
209 	struct pcb *curpcb;
210 	ksiginfo_t ksi;
211 
212 	critical_enter();
213 
214 	/*
215 	 * If the VFP is already on and we got an undefined instruction, then
216 	 * something tried to executate a truly invalid instruction that maps to
217 	 * the VFP.
218 	 */
219 	fpexc = fmrx(fpexc);
220 	if (fpexc & VFPEXC_EN) {
221 		/* Clear any exceptions */
222 		fmxr(fpexc, fpexc & ~(VFPEXC_EX | VFPEXC_FP2V));
223 
224 		/* kill the process - we do not handle emulation */
225 		critical_exit();
226 
227 		if (fpexc & VFPEXC_EX) {
228 			/* We have an exception, signal a SIGFPE */
229 			ksiginfo_init_trap(&ksi);
230 			ksi.ksi_signo = SIGFPE;
231 			if (fpexc & VFPEXC_UFC)
232 				ksi.ksi_code = FPE_FLTUND;
233 			else if (fpexc & VFPEXC_OFC)
234 				ksi.ksi_code = FPE_FLTOVF;
235 			else if (fpexc & VFPEXC_IOC)
236 				ksi.ksi_code = FPE_FLTINV;
237 			ksi.ksi_addr = (void *)addr;
238 			trapsignal(curthread, &ksi);
239 			return 0;
240 		}
241 
242 		return 1;
243 	}
244 
245 	curpcb = curthread->td_pcb;
246 	if ((code & FAULT_USER) == 0 &&
247 	    (curpcb->pcb_fpflags & PCB_FP_KERN) == 0) {
248 		critical_exit();
249 		return (1);
250 	}
251 
252 	/*
253 	 * If the last time this thread used the VFP it was on this core, and
254 	 * the last thread to use the VFP on this core was this thread, then the
255 	 * VFP state is valid, otherwise restore this thread's state to the VFP.
256 	 */
257 	fmxr(fpexc, fpexc | VFPEXC_EN);
258 	cpu = PCPU_GET(cpuid);
259 	if (curpcb->pcb_vfpcpu != cpu || curthread != PCPU_GET(fpcurthread)) {
260 		vfp_restore(curpcb->pcb_vfpsaved);
261 		curpcb->pcb_vfpcpu = cpu;
262 		PCPU_SET(fpcurthread, curthread);
263 	}
264 
265 	critical_exit();
266 
267 	KASSERT((code & FAULT_USER) == 0 ||
268 	    curpcb->pcb_vfpsaved == &curpcb->pcb_vfpstate,
269 	    ("Kernel VFP state in use when entering userspace"));
270 
271 	return (0);
272 }
273 
274 /*
275  * Update the VFP state for a forked process or new thread. The PCB will
276  * have been copied from the old thread.
277  * The code is heavily based on arm64 logic.
278  */
279 void
280 vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork)
281 {
282 	struct pcb *newpcb;
283 
284 	newpcb = newtd->td_pcb;
285 
286 	/* Kernel threads start with clean VFP */
287 	if ((oldtd->td_pflags & TDP_KTHREAD) != 0) {
288 		newpcb->pcb_fpflags &=
289 		    ~(PCB_FP_STARTED | PCB_FP_KERN | PCB_FP_NOSAVE);
290 	} else {
291 		MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0);
292 		if (!fork) {
293 			newpcb->pcb_fpflags &= ~PCB_FP_STARTED;
294 		}
295 	}
296 
297 	newpcb->pcb_vfpsaved = &newpcb->pcb_vfpstate;
298 	newpcb->pcb_vfpcpu = UINT_MAX;
299 }
300 /*
301  * Restore the given state to the VFP hardware.
302  */
303 static void
304 vfp_restore(struct vfp_state *vfpsave)
305 {
306 	uint32_t fpexc;
307 
308 	/* On vfpv3 we may need to restore FPINST and FPINST2 */
309 	fpexc = vfpsave->fpexec;
310 	if (fpexc & VFPEXC_EX) {
311 		fmxr(fpinst, vfpsave->fpinst);
312 		if (fpexc & VFPEXC_FP2V)
313 			fmxr(fpinst2, vfpsave->fpinst2);
314 	}
315 	fmxr(fpscr, vfpsave->fpscr);
316 
317 	__asm __volatile(
318 	    " .fpu	vfpv2\n"
319 	    " .fpu	vfpv3\n"
320 	    " vldmia	%0!, {d0-d15}\n"	/* d0-d15 */
321 	    " cmp	%1, #0\n"		/* -D16 or -D32? */
322 	    " vldmiane	%0!, {d16-d31}\n"	/* d16-d31 */
323 	    " addeq	%0, %0, #128\n"		/* skip missing regs */
324 	    : "+&r" (vfpsave) : "r" (is_d32) : "cc"
325 	    );
326 
327 	fmxr(fpexc, fpexc);
328 }
329 
330 /*
331  * If the VFP is on, save its current state and turn it off if requested to do
332  * so.  If the VFP is not on, does not change the values at *vfpsave.  Caller is
333  * responsible for preventing a context switch while this is running.
334  */
335 void
336 vfp_store(struct vfp_state *vfpsave, boolean_t disable_vfp)
337 {
338 	uint32_t fpexc;
339 
340 	fpexc = fmrx(fpexc);		/* Is the vfp enabled? */
341 	if (fpexc & VFPEXC_EN) {
342 		vfpsave->fpexec = fpexc;
343 		vfpsave->fpscr = fmrx(fpscr);
344 
345 		/* On vfpv3 we may need to save FPINST and FPINST2 */
346 		if (fpexc & VFPEXC_EX) {
347 			vfpsave->fpinst = fmrx(fpinst);
348 			if (fpexc & VFPEXC_FP2V)
349 				vfpsave->fpinst2 = fmrx(fpinst2);
350 			fpexc &= ~VFPEXC_EX;
351 		}
352 
353 		__asm __volatile(
354 		    " .fpu	vfpv2\n"
355 		    " .fpu	vfpv3\n"
356 		    " vstmia	%0!, {d0-d15}\n"	/* d0-d15 */
357 		    " cmp	%1, #0\n"		/* -D16 or -D32? */
358 		    " vstmiane	%0!, {d16-d31}\n"	/* d16-d31 */
359 		    " addeq	%0, %0, #128\n"		/* skip missing regs */
360 		    : "+&r" (vfpsave) : "r" (is_d32) : "cc"
361 		    );
362 
363 		if (disable_vfp)
364 			fmxr(fpexc , fpexc & ~VFPEXC_EN);
365 	}
366 }
367 
368 /*
369  * The current thread is dying.  If the state currently in the hardware belongs
370  * to the current thread, set fpcurthread to NULL to indicate that the VFP
371  * hardware state does not belong to any thread.  If the VFP is on, turn it off.
372  */
373 void
374 vfp_discard(struct thread *td)
375 {
376 	u_int tmp;
377 
378 	if (PCPU_GET(fpcurthread) == td)
379 		PCPU_SET(fpcurthread, NULL);
380 
381 	tmp = fmrx(fpexc);
382 	if (tmp & VFPEXC_EN)
383 		fmxr(fpexc, tmp & ~VFPEXC_EN);
384 }
385 
386 void
387 vfp_save_state(struct thread *td, struct pcb *pcb)
388 {
389 	int32_t fpexc;
390 
391 	KASSERT(pcb != NULL, ("NULL vfp pcb"));
392 	KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb"));
393 
394 	/*
395 	 * savectx() will be called on panic with dumppcb as an argument,
396 	 * dumppcb doesn't have pcb_vfpsaved set, so set it to save
397 	 * the VFP registers.
398 	 */
399 	if (pcb->pcb_vfpsaved == NULL)
400 		pcb->pcb_vfpsaved = &pcb->pcb_vfpstate;
401 
402 	if (td == NULL)
403 		td = curthread;
404 
405 	critical_enter();
406 	/*
407 	 * Only store the registers if the VFP is enabled,
408 	 * i.e. return if we are trapping on FP access.
409 	 */
410 	fpexc = fmrx(fpexc);
411 	if (fpexc & VFPEXC_EN) {
412 		KASSERT(PCPU_GET(fpcurthread) == td,
413 		    ("Storing an invalid VFP state"));
414 
415 		vfp_store(pcb->pcb_vfpsaved, true);
416 	}
417 	critical_exit();
418 }
419 
420 struct fpu_kern_ctx *
421 fpu_kern_alloc_ctx(u_int flags)
422 {
423 	return (malloc(sizeof(struct fpu_kern_ctx), M_FPUKERN_CTX,
424 	    ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO));
425 }
426 
427 void
428 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
429 {
430 	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("freeing in-use ctx"));
431 
432 	free(ctx, M_FPUKERN_CTX);
433 }
434 
435 void
436 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
437 {
438 	struct pcb *pcb;
439 
440 	pcb = td->td_pcb;
441 	KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
442 	    ("ctx is required when !FPU_KERN_NOCTX"));
443 	KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
444 	    ("using inuse ctx"));
445 	KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0,
446 	    ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state"));
447 
448 	if ((flags & FPU_KERN_NOCTX) != 0) {
449 		critical_enter();
450 		if (curthread == PCPU_GET(fpcurthread)) {
451 			vfp_save_state(curthread, pcb);
452 		}
453 		PCPU_SET(fpcurthread, NULL);
454 
455 		vfp_enable();
456 		pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE |
457 		    PCB_FP_STARTED;
458 		return;
459 	}
460 
461 	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
462 		ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
463 		return;
464 	}
465 	/*
466 	 * Check either we are already using the VFP in the kernel, or
467 	 * the the saved state points to the default user space.
468 	 */
469 	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 ||
470 	    pcb->pcb_vfpsaved == &pcb->pcb_vfpstate,
471 	    ("Mangled pcb_vfpsaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_vfpsaved,
472 	     &pcb->pcb_vfpstate));
473 	ctx->flags = FPU_KERN_CTX_INUSE;
474 	vfp_save_state(curthread, pcb);
475 	ctx->prev = pcb->pcb_vfpsaved;
476 	pcb->pcb_vfpsaved = &ctx->state;
477 	pcb->pcb_fpflags |= PCB_FP_KERN;
478 	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
479 
480 	return;
481 }
482 
483 int
484 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
485 {
486 	struct pcb *pcb;
487 
488 	pcb = td->td_pcb;
489 
490 	if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) {
491 		KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
492 		KASSERT(PCPU_GET(fpcurthread) == NULL,
493 		    ("non-NULL fpcurthread for PCB_FP_NOSAVE"));
494 		CRITICAL_ASSERT(td);
495 
496 		vfp_disable();
497 		pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED);
498 		critical_exit();
499 	} else {
500 		KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
501 		    ("FPU context not inuse"));
502 		ctx->flags &= ~FPU_KERN_CTX_INUSE;
503 
504 		if (is_fpu_kern_thread(0) &&
505 		    (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
506 			return (0);
507 		KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
508 		critical_enter();
509 		vfp_discard(td);
510 		critical_exit();
511 		pcb->pcb_fpflags &= ~PCB_FP_STARTED;
512 		pcb->pcb_vfpsaved = ctx->prev;
513 	}
514 
515 	if (pcb->pcb_vfpsaved == &pcb->pcb_vfpstate) {
516 		pcb->pcb_fpflags &= ~PCB_FP_KERN;
517 	} else {
518 		KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0,
519 		    ("unpaired fpu_kern_leave"));
520 	}
521 
522 	return (0);
523 }
524 
525 int
526 fpu_kern_thread(u_int flags __unused)
527 {
528 	struct pcb *pcb = curthread->td_pcb;
529 
530 	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
531 	    ("Only kthread may use fpu_kern_thread"));
532 	KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate,
533 	    ("Mangled pcb_vfpsaved"));
534 	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0,
535 	    ("Thread already setup for the VFP"));
536 	pcb->pcb_fpflags |= PCB_FP_KERN;
537 	return (0);
538 }
539 
540 int
541 is_fpu_kern_thread(u_int flags __unused)
542 {
543 	struct pcb *curpcb;
544 
545 	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
546 		return (0);
547 	curpcb = curthread->td_pcb;
548 	return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0);
549 }
550