1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Ian Lepore <ian@freebsd.org> 5 * Copyright (c) 2012 Mark Tinguely 6 * 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/limits.h> 35 #include <sys/malloc.h> 36 #include <sys/proc.h> 37 38 #include <machine/armreg.h> 39 #include <machine/elf.h> 40 #include <machine/frame.h> 41 #include <machine/md_var.h> 42 #include <machine/pcb.h> 43 #include <machine/undefined.h> 44 #include <machine/vfp.h> 45 46 /* function prototypes */ 47 static int vfp_bounce(u_int, u_int, struct trapframe *, int); 48 static void vfp_restore(struct vfp_state *); 49 50 extern int vfp_exists; 51 static struct undefined_handler vfp10_uh, vfp11_uh; 52 /* If true the VFP unit has 32 double registers, otherwise it has 16 */ 53 static int is_d32; 54 55 static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", 56 "Kernel contexts for VFP state"); 57 58 struct fpu_kern_ctx { 59 struct vfp_state *prev; 60 #define FPU_KERN_CTX_DUMMY 0x01 /* avoided save for the kern thread */ 61 #define FPU_KERN_CTX_INUSE 0x02 62 uint32_t flags; 63 struct vfp_state state; 64 }; 65 66 /* 67 * About .fpu directives in this file... 68 * 69 * We should need simply .fpu vfpv3, but clang 3.5 has a quirk where setting 70 * vfpv3 doesn't imply that vfp2 features are also available -- both have to be 71 * explicitly set to get all the features of both. This is probably a bug in 72 * clang, so it may get fixed and require changes here some day. Other changes 73 * are probably coming in clang too, because there is email and open PRs 74 * indicating they want to completely disable the ability to use .fpu and 75 * similar directives in inline asm. That would be catastrophic for us, 76 * hopefully they come to their senses. There was also some discusion of a new 77 * syntax such as .push fpu=vfpv3; ...; .pop fpu; and that would be ideal for 78 * us, better than what we have now really. 79 * 80 * For gcc, each .fpu directive completely overrides the prior directive, unlike 81 * with clang, but luckily on gcc saying v3 implies all the v2 features as well. 82 */ 83 84 #define fmxr(reg, val) \ 85 __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n" \ 86 " vmsr " __STRING(reg) ", %0" :: "r"(val)); 87 88 #define fmrx(reg) \ 89 ({ u_int val = 0;\ 90 __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n" \ 91 " vmrs %0, " __STRING(reg) : "=r"(val)); \ 92 val; \ 93 }) 94 95 static u_int 96 get_coprocessorACR(void) 97 { 98 u_int val; 99 __asm __volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (val) : : "cc"); 100 return val; 101 } 102 103 static void 104 set_coprocessorACR(u_int val) 105 { 106 __asm __volatile("mcr p15, 0, %0, c1, c0, 2\n\t" 107 : : "r" (val) : "cc"); 108 isb(); 109 } 110 111 static void 112 vfp_enable(void) 113 { 114 uint32_t fpexc; 115 116 fpexc = fmrx(fpexc); 117 fmxr(fpexc, fpexc | VFPEXC_EN); 118 isb(); 119 } 120 121 static void 122 vfp_disable(void) 123 { 124 uint32_t fpexc; 125 126 fpexc = fmrx(fpexc); 127 fmxr(fpexc, fpexc & ~VFPEXC_EN); 128 isb(); 129 } 130 131 /* called for each cpu */ 132 void 133 vfp_init(void) 134 { 135 u_int fpsid, tmp; 136 u_int coproc, vfp_arch; 137 138 coproc = get_coprocessorACR(); 139 coproc |= COPROC10 | COPROC11; 140 set_coprocessorACR(coproc); 141 142 fpsid = fmrx(fpsid); /* read the vfp system id */ 143 144 if (!(fpsid & VFPSID_HARDSOFT_IMP)) { 145 vfp_exists = 1; 146 is_d32 = 0; 147 PCPU_SET(vfpsid, fpsid); /* save the fpsid */ 148 elf_hwcap |= HWCAP_VFP; 149 150 vfp_arch = 151 (fpsid & VFPSID_SUBVERSION2_MASK) >> VFPSID_SUBVERSION_OFF; 152 153 if (vfp_arch >= VFP_ARCH3) { 154 tmp = fmrx(mvfr0); 155 PCPU_SET(vfpmvfr0, tmp); 156 elf_hwcap |= HWCAP_VFPv3; 157 158 if ((tmp & VMVFR0_RB_MASK) == 2) { 159 elf_hwcap |= HWCAP_VFPD32; 160 is_d32 = 1; 161 } else 162 elf_hwcap |= HWCAP_VFPv3D16; 163 164 tmp = fmrx(mvfr1); 165 PCPU_SET(vfpmvfr1, tmp); 166 167 if (PCPU_GET(cpuid) == 0) { 168 if ((tmp & VMVFR1_FZ_MASK) == 0x1) { 169 /* Denormals arithmetic support */ 170 initial_fpscr &= ~VFPSCR_FZ; 171 thread0.td_pcb->pcb_vfpstate.fpscr = 172 initial_fpscr; 173 } 174 } 175 176 if ((tmp & VMVFR1_LS_MASK) >> VMVFR1_LS_OFF == 1 && 177 (tmp & VMVFR1_I_MASK) >> VMVFR1_I_OFF == 1 && 178 (tmp & VMVFR1_SP_MASK) >> VMVFR1_SP_OFF == 1) 179 elf_hwcap |= HWCAP_NEON; 180 if ((tmp & VMVFR1_FMAC_MASK) >> VMVFR1_FMAC_OFF == 1) 181 elf_hwcap |= HWCAP_VFPv4; 182 } 183 184 vfp_disable(); 185 186 /* initialize the coprocess 10 and 11 calls 187 * These are called to restore the registers and enable 188 * the VFP hardware. 189 */ 190 if (vfp10_uh.uh_handler == NULL) { 191 vfp10_uh.uh_handler = vfp_bounce; 192 vfp11_uh.uh_handler = vfp_bounce; 193 install_coproc_handler_static(10, &vfp10_uh); 194 install_coproc_handler_static(11, &vfp11_uh); 195 } 196 } 197 } 198 199 SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL); 200 201 /* 202 * Start the VFP unit, restore the VFP registers from the PCB and retry 203 * the instruction. 204 */ 205 static int 206 vfp_bounce(u_int addr, u_int insn, struct trapframe *frame, int code) 207 { 208 u_int cpu, fpexc; 209 struct pcb *curpcb; 210 ksiginfo_t ksi; 211 212 critical_enter(); 213 214 /* 215 * If the VFP is already on and we got an undefined instruction, then 216 * something tried to executate a truly invalid instruction that maps to 217 * the VFP. 218 */ 219 fpexc = fmrx(fpexc); 220 if (fpexc & VFPEXC_EN) { 221 /* Clear any exceptions */ 222 fmxr(fpexc, fpexc & ~(VFPEXC_EX | VFPEXC_FP2V)); 223 224 /* kill the process - we do not handle emulation */ 225 critical_exit(); 226 227 if (fpexc & VFPEXC_EX) { 228 /* We have an exception, signal a SIGFPE */ 229 ksiginfo_init_trap(&ksi); 230 ksi.ksi_signo = SIGFPE; 231 if (fpexc & VFPEXC_UFC) 232 ksi.ksi_code = FPE_FLTUND; 233 else if (fpexc & VFPEXC_OFC) 234 ksi.ksi_code = FPE_FLTOVF; 235 else if (fpexc & VFPEXC_IOC) 236 ksi.ksi_code = FPE_FLTINV; 237 ksi.ksi_addr = (void *)addr; 238 trapsignal(curthread, &ksi); 239 return 0; 240 } 241 242 return 1; 243 } 244 245 curpcb = curthread->td_pcb; 246 if ((code & FAULT_USER) == 0 && 247 (curpcb->pcb_fpflags & PCB_FP_KERN) == 0) { 248 critical_exit(); 249 return (1); 250 } 251 252 /* 253 * If the last time this thread used the VFP it was on this core, and 254 * the last thread to use the VFP on this core was this thread, then the 255 * VFP state is valid, otherwise restore this thread's state to the VFP. 256 */ 257 fmxr(fpexc, fpexc | VFPEXC_EN); 258 cpu = PCPU_GET(cpuid); 259 if (curpcb->pcb_vfpcpu != cpu || curthread != PCPU_GET(fpcurthread)) { 260 vfp_restore(curpcb->pcb_vfpsaved); 261 curpcb->pcb_vfpcpu = cpu; 262 PCPU_SET(fpcurthread, curthread); 263 } 264 265 critical_exit(); 266 267 KASSERT((code & FAULT_USER) == 0 || 268 curpcb->pcb_vfpsaved == &curpcb->pcb_vfpstate, 269 ("Kernel VFP state in use when entering userspace")); 270 271 return (0); 272 } 273 274 /* 275 * Update the VFP state for a forked process or new thread. The PCB will 276 * have been copied from the old thread. 277 * The code is heavily based on arm64 logic. 278 */ 279 void 280 vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork) 281 { 282 struct pcb *newpcb; 283 284 newpcb = newtd->td_pcb; 285 286 /* Kernel threads start with clean VFP */ 287 if ((oldtd->td_pflags & TDP_KTHREAD) != 0) { 288 newpcb->pcb_fpflags &= 289 ~(PCB_FP_STARTED | PCB_FP_KERN | PCB_FP_NOSAVE); 290 } else { 291 MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0); 292 if (!fork) { 293 newpcb->pcb_fpflags &= ~PCB_FP_STARTED; 294 } 295 } 296 297 newpcb->pcb_vfpsaved = &newpcb->pcb_vfpstate; 298 newpcb->pcb_vfpcpu = UINT_MAX; 299 } 300 /* 301 * Restore the given state to the VFP hardware. 302 */ 303 static void 304 vfp_restore(struct vfp_state *vfpsave) 305 { 306 uint32_t fpexc; 307 308 /* On vfpv3 we may need to restore FPINST and FPINST2 */ 309 fpexc = vfpsave->fpexec; 310 if (fpexc & VFPEXC_EX) { 311 fmxr(fpinst, vfpsave->fpinst); 312 if (fpexc & VFPEXC_FP2V) 313 fmxr(fpinst2, vfpsave->fpinst2); 314 } 315 fmxr(fpscr, vfpsave->fpscr); 316 317 __asm __volatile( 318 " .fpu vfpv2\n" 319 " .fpu vfpv3\n" 320 " vldmia %0!, {d0-d15}\n" /* d0-d15 */ 321 " cmp %1, #0\n" /* -D16 or -D32? */ 322 " vldmiane %0!, {d16-d31}\n" /* d16-d31 */ 323 " addeq %0, %0, #128\n" /* skip missing regs */ 324 : "+&r" (vfpsave) : "r" (is_d32) : "cc" 325 ); 326 327 fmxr(fpexc, fpexc); 328 } 329 330 /* 331 * If the VFP is on, save its current state and turn it off if requested to do 332 * so. If the VFP is not on, does not change the values at *vfpsave. Caller is 333 * responsible for preventing a context switch while this is running. 334 */ 335 void 336 vfp_store(struct vfp_state *vfpsave, boolean_t disable_vfp) 337 { 338 uint32_t fpexc; 339 340 fpexc = fmrx(fpexc); /* Is the vfp enabled? */ 341 if (fpexc & VFPEXC_EN) { 342 vfpsave->fpexec = fpexc; 343 vfpsave->fpscr = fmrx(fpscr); 344 345 /* On vfpv3 we may need to save FPINST and FPINST2 */ 346 if (fpexc & VFPEXC_EX) { 347 vfpsave->fpinst = fmrx(fpinst); 348 if (fpexc & VFPEXC_FP2V) 349 vfpsave->fpinst2 = fmrx(fpinst2); 350 fpexc &= ~VFPEXC_EX; 351 } 352 353 __asm __volatile( 354 " .fpu vfpv2\n" 355 " .fpu vfpv3\n" 356 " vstmia %0!, {d0-d15}\n" /* d0-d15 */ 357 " cmp %1, #0\n" /* -D16 or -D32? */ 358 " vstmiane %0!, {d16-d31}\n" /* d16-d31 */ 359 " addeq %0, %0, #128\n" /* skip missing regs */ 360 : "+&r" (vfpsave) : "r" (is_d32) : "cc" 361 ); 362 363 if (disable_vfp) 364 fmxr(fpexc , fpexc & ~VFPEXC_EN); 365 } 366 } 367 368 /* 369 * The current thread is dying. If the state currently in the hardware belongs 370 * to the current thread, set fpcurthread to NULL to indicate that the VFP 371 * hardware state does not belong to any thread. If the VFP is on, turn it off. 372 */ 373 void 374 vfp_discard(struct thread *td) 375 { 376 u_int tmp; 377 378 if (PCPU_GET(fpcurthread) == td) 379 PCPU_SET(fpcurthread, NULL); 380 381 tmp = fmrx(fpexc); 382 if (tmp & VFPEXC_EN) 383 fmxr(fpexc, tmp & ~VFPEXC_EN); 384 } 385 386 void 387 vfp_save_state(struct thread *td, struct pcb *pcb) 388 { 389 int32_t fpexc; 390 391 KASSERT(pcb != NULL, ("NULL vfp pcb")); 392 KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb")); 393 394 /* 395 * savectx() will be called on panic with dumppcb as an argument, 396 * dumppcb doesn't have pcb_vfpsaved set, so set it to save 397 * the VFP registers. 398 */ 399 if (pcb->pcb_vfpsaved == NULL) 400 pcb->pcb_vfpsaved = &pcb->pcb_vfpstate; 401 402 if (td == NULL) 403 td = curthread; 404 405 critical_enter(); 406 /* 407 * Only store the registers if the VFP is enabled, 408 * i.e. return if we are trapping on FP access. 409 */ 410 fpexc = fmrx(fpexc); 411 if (fpexc & VFPEXC_EN) { 412 KASSERT(PCPU_GET(fpcurthread) == td, 413 ("Storing an invalid VFP state")); 414 415 vfp_store(pcb->pcb_vfpsaved, true); 416 } 417 critical_exit(); 418 } 419 420 struct fpu_kern_ctx * 421 fpu_kern_alloc_ctx(u_int flags) 422 { 423 return (malloc(sizeof(struct fpu_kern_ctx), M_FPUKERN_CTX, 424 ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO)); 425 } 426 427 void 428 fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) 429 { 430 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("freeing in-use ctx")); 431 432 free(ctx, M_FPUKERN_CTX); 433 } 434 435 void 436 fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 437 { 438 struct pcb *pcb; 439 440 pcb = td->td_pcb; 441 KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL, 442 ("ctx is required when !FPU_KERN_NOCTX")); 443 KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0, 444 ("using inuse ctx")); 445 KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0, 446 ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state")); 447 448 if ((flags & FPU_KERN_NOCTX) != 0) { 449 critical_enter(); 450 if (curthread == PCPU_GET(fpcurthread)) { 451 vfp_save_state(curthread, pcb); 452 } 453 PCPU_SET(fpcurthread, NULL); 454 455 vfp_enable(); 456 pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE | 457 PCB_FP_STARTED; 458 return; 459 } 460 461 if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) { 462 ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE; 463 return; 464 } 465 /* 466 * Check either we are already using the VFP in the kernel, or 467 * the the saved state points to the default user space. 468 */ 469 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 || 470 pcb->pcb_vfpsaved == &pcb->pcb_vfpstate, 471 ("Mangled pcb_vfpsaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_vfpsaved, 472 &pcb->pcb_vfpstate)); 473 ctx->flags = FPU_KERN_CTX_INUSE; 474 vfp_save_state(curthread, pcb); 475 ctx->prev = pcb->pcb_vfpsaved; 476 pcb->pcb_vfpsaved = &ctx->state; 477 pcb->pcb_fpflags |= PCB_FP_KERN; 478 pcb->pcb_fpflags &= ~PCB_FP_STARTED; 479 480 return; 481 } 482 483 int 484 fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 485 { 486 struct pcb *pcb; 487 488 pcb = td->td_pcb; 489 490 if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) { 491 KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX")); 492 KASSERT(PCPU_GET(fpcurthread) == NULL, 493 ("non-NULL fpcurthread for PCB_FP_NOSAVE")); 494 CRITICAL_ASSERT(td); 495 496 vfp_disable(); 497 pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED); 498 critical_exit(); 499 } else { 500 KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0, 501 ("FPU context not inuse")); 502 ctx->flags &= ~FPU_KERN_CTX_INUSE; 503 504 if (is_fpu_kern_thread(0) && 505 (ctx->flags & FPU_KERN_CTX_DUMMY) != 0) 506 return (0); 507 KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx")); 508 critical_enter(); 509 vfp_discard(td); 510 critical_exit(); 511 pcb->pcb_fpflags &= ~PCB_FP_STARTED; 512 pcb->pcb_vfpsaved = ctx->prev; 513 } 514 515 if (pcb->pcb_vfpsaved == &pcb->pcb_vfpstate) { 516 pcb->pcb_fpflags &= ~PCB_FP_KERN; 517 } else { 518 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0, 519 ("unpaired fpu_kern_leave")); 520 } 521 522 return (0); 523 } 524 525 int 526 fpu_kern_thread(u_int flags __unused) 527 { 528 struct pcb *pcb = curthread->td_pcb; 529 530 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 531 ("Only kthread may use fpu_kern_thread")); 532 KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate, 533 ("Mangled pcb_vfpsaved")); 534 KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0, 535 ("Thread already setup for the VFP")); 536 pcb->pcb_fpflags |= PCB_FP_KERN; 537 return (0); 538 } 539 540 int 541 is_fpu_kern_thread(u_int flags __unused) 542 { 543 struct pcb *curpcb; 544 545 if ((curthread->td_pflags & TDP_KTHREAD) == 0) 546 return (0); 547 curpcb = curthread->td_pcb; 548 return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0); 549 } 550